From 5cc9b7c2556310441f0a58b37161dc606c6f136e Mon Sep 17 00:00:00 2001 From: zhangning Date: Sat, 19 Apr 2025 21:57:05 +0800 Subject: [PATCH] =?UTF-8?q?=E5=90=88=E5=85=A5mindspeed080=E5=9F=BA?= =?UTF-8?q?=E7=BA=BF=E7=89=88=E6=9C=AC=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- model/train/yoco_moe/.gitignore | 5 + model/train/yoco_moe/LICENSE | 290 +++ model/train/yoco_moe/OWNERS | 36 + model/train/yoco_moe/README.md | 948 ++++++++ model/train/yoco_moe/SECURITYNOTE.md | 116 + .../Third_Party_Open_Source_Software_Notice | 141 ++ .../train/yoco_moe/ci/access_control_test.py | 43 + model/train/yoco_moe/docs/LOGO.png | Bin 0 -> 17565 bytes model/train/yoco_moe/docs/RELEASENOTE.md | 54 + model/train/yoco_moe/docs/faq/data_helpers.md | 62 + .../docs/faq/megatron070_grad_norm_nan.md | 51 + .../yoco_moe/docs/faq/torch_extensions.md | 22 + .../docs/features/Automatic_Parallelism.md | 157 ++ .../features/activation-function-recompute.md | 94 + .../yoco_moe/docs/features/adaptive-memory.md | 71 + .../docs/features/adaptive-recompute.md | 46 + model/train/yoco_moe/docs/features/alibi.md | 33 + model/train/yoco_moe/docs/features/ampipe.md | 59 + .../docs/features/async-ddp-param-gather.md | 41 + .../train/yoco_moe/docs/features/async-ddp.md | 27 + .../yoco_moe/docs/features/auto_tuning.md | 73 + .../docs/features/automated-pipeline.md | 44 + .../docs/features/automatic_parallelism_mm.md | 179 ++ .../communication-over-computation.md | 82 + .../features/context_parallelism_kv_cache.md | 66 + .../docs/features/conv3d_sequence_paralle.md | 36 + .../yoco_moe/docs/features/data-parallel.md | 31 + .../deepspeed-moe-efficient-moe.md | 52 + .../deepspeed-moe-token-rearrange.md | 28 + .../features/deepspeed_moe/deepspeed-moe.md | 79 + .../docs/features/deepspeed_moe/moe.png | Bin 0 -> 49794 bytes .../yoco_moe/docs/features/dist-train.md | 53 + .../docs/features/distributed-optimizer.md | 15 + .../train/yoco_moe/docs/features/eod-reset.md | 56 + .../yoco_moe/docs/features/flash-attention.md | 35 + .../features/fused_ema_adamw_optimizer.md | 26 + .../yoco_moe/docs/features/fusion-attn-v2.md | 15 + .../yoco_moe/docs/features/generate-mask.md | 57 + .../docs/features/hccl-group-buffer-set.md | 33 + .../docs/features/hccl-replace-gloo.md | 39 + .../docs/features/hierarchical-alltoallv.md | 37 + .../docs/features/hybrid-context-parallel.md | 45 + model/train/yoco_moe/docs/features/mc2.md | 63 + ...atron-moe-adaptive-recompute-activation.md | 22 + .../megatron-moe-allgather-dispatcher.md | 32 + .../megatron-moe-alltoall-dispatcher.md | 25 + .../megatron_moe/megatron-moe-bmm-fused.md | 26 + .../features/megatron_moe/megatron-moe-gmm.md | 41 + .../megatron_moe/megatron-moe-tp-extend-ep.md | 28 + .../features/moe-experts-pipeline-degree.md | 31 + .../moe-token-permute-and-unpermute.md | 23 + .../docs/features/multi_parameter_pipeline.md | 47 + ...meter_pipeline_and_variable_seq_lengths.md | 49 + .../features/nanopipe-pipeline-parallel.md | 74 + .../train/yoco_moe/docs/features/nd-matmul.md | 27 + .../yoco_moe/docs/features/noop-layers.md | 30 + .../yoco_moe/docs/features/norm-recompute.md | 33 + .../docs/features/npu_deterministic.md | 21 + .../yoco_moe/docs/features/npu_matmul_add.md | 29 + .../yoco_moe/docs/features/ops_flops_cal.md | 31 + .../docs/features/pipeline-experts.md | 80 + .../docs/features/pipeline-parallel.md | 30 + .../yoco_moe/docs/features/recomputation.md | 32 + .../recompute_independent_pipelining.md | 33 + .../docs/features/reuse-fp32-param.md | 46 + .../ring-attention-context-parallel.md | 47 + .../train/yoco_moe/docs/features/rms_norm.md | 15 + .../docs/features/rotary-embedding.md | 25 + .../docs/features/sequence-parallel.md | 33 + .../yoco_moe/docs/features/shared-experts.md | 28 + .../yoco_moe/docs/features/smart_swap.md | 39 + .../yoco_moe/docs/features/swap_attention.md | 51 + model/train/yoco_moe/docs/features/swiglu.md | 17 + .../docs/features/tensor-parallel-2d.md | 156 ++ .../yoco_moe/docs/features/tensor-parallel.md | 35 + .../docs/features/ulysses-context-parallel.md | 31 + .../unaligned-ulysses-context-parallel.md | 57 + .../docs/features/unaligned_linear.md | 35 + .../docs/features/variable_seq_lengths.md | 40 + .../features/virtual-pipeline-parallel.md | 43 + model/train/yoco_moe/docs/ops/README.md | 30 + model/train/yoco_moe/docs/ops/ffn.md | 230 ++ .../yoco_moe/docs/ops/fusion_attention.md | 146 ++ model/train/yoco_moe/docs/ops/gmm.md | 112 + model/train/yoco_moe/docs/ops/lcal_coc.md | 395 ++++ .../docs/ops/npu_all_to_all_all_gather_bmm.md | 190 ++ .../docs/ops/npu_apply_fused_ema_adamw.md | 91 + .../ops/npu_bmm_reduce_scatter_all_to_all.md | 187 ++ .../docs/ops/npu_dropout_add_layer_norm.md | 127 ++ .../docs/ops/npu_fused_moe_token_permute.md | 121 + .../docs/ops/npu_fused_moe_token_unpermute.md | 179 ++ .../ops/npu_grouped_mat_mul_all_reduce.md | 135 ++ .../yoco_moe/docs/ops/npu_groupmatmul_add.md | 35 + .../train/yoco_moe/docs/ops/npu_matmul_add.md | 24 + .../ops/npu_mm_all_reduce_add_rms_norm.md | 151 ++ .../ops/npu_mm_all_reduce_add_rms_norm_.md | 151 ++ .../docs/ops/npu_ring_attention_update.md | 96 + .../docs/ops/npu_rotary_position_embedding.md | 107 + model/train/yoco_moe/docs/ops/quant_gmm.md | 95 + model/train/yoco_moe/docs/ops/rms_norm.md | 43 + model/train/yoco_moe/docs/ops/swiglu.md | 42 + .../yoco_moe/docs/ops/weight_quant_gmm.md | 86 + model/train/yoco_moe/mindspeed/__init__.py | 0 model/train/yoco_moe/mindspeed/arguments.py | 1117 ++++++++++ .../mindspeed/auto_tuning/__init__.py | 0 .../mindspeed/auto_tuning/auto_tuning.py | 152 ++ .../mindspeed/auto_tuning/config/__init__.py | 0 .../config/generate_profiling_configs.py | 103 + .../auto_tuning/config/model_config.py | 156 ++ .../auto_tuning/config/search_config.py | 60 + .../auto_tuning/mindspeed_adaptor.py | 167 ++ .../mindspeed/auto_tuning/module/__init__.py | 0 .../module/communication/__init__.py | 0 .../module/communication/communication.py | 107 + .../communication/communication_model.py | 196 ++ .../communication/communication_model_cp.py | 228 ++ .../communication/communication_model_dp.py | 226 ++ .../communication/communication_model_ep.py | 119 + .../communication/communication_model_mc2.py | 57 + .../communication/communication_model_pp.py | 99 + .../communication/communication_model_tp.py | 96 + .../communication/communication_profile.py | 70 + .../mindspeed/auto_tuning/module/hardware.py | 50 + .../auto_tuning/module/memory/__init__.py | 0 .../module/memory/dynamic_mem_modeling.py | 349 +++ .../module/memory/memory_modeling.py | 55 + .../auto_tuning/module/memory/model_param.py | 16 + .../module/memory/static_mem_modeling.py | 257 +++ .../auto_tuning/module/model_performance.py | 159 ++ .../auto_tuning/module/operator/__init__.py | 0 .../auto_tuning/module/operator/operator.py | 314 +++ .../module/operator/operator_base_block.py | 91 + .../module/operator/operator_change_block.py | 163 ++ .../operator/operator_change_block_cp.py | 126 ++ .../operator/operator_change_block_ep.py | 195 ++ .../module/operator/operator_database.py | 195 ++ .../module/operator/operator_elemental.py | 178 ++ .../module/operator/operator_note_cal.py | 115 + .../module/operator/operator_profile_get.py | 96 + .../module/operator/operator_re_profile.py | 202 ++ .../operator/operator_shape_analysis.py | 357 +++ .../module/operator/operator_shape_cal.py | 178 ++ .../auto_tuning/module/parse/__init__.py | 0 .../module/parse/profiling_parse/__init__.py | 0 .../profiling_communication_parse.py | 284 +++ .../parse/profiling_parse/profiling_config.py | 259 +++ .../profiling_parse/profiling_constant.py | 52 + .../profiling_parse/profiling_memory_parse.py | 143 ++ .../profiling_parse/profiling_meta_parse.py | 95 + .../profiling_parse/profiling_node_parse.py | 94 + .../profiling_operator_parse.py | 39 + .../parse/profiling_parse/profiling_parse.py | 200 ++ .../module/parse/recompute_module_info.py | 12 + .../module/parse/recompute_parser.py | 286 +++ .../auto_tuning/module/search/__init__.py | 0 .../module/search/recompute_solver.py | 304 +++ .../module/search/search_engine.py | 272 +++ .../module/search/stage_1_prune.py | 127 ++ .../mindspeed/auto_tuning/utils/__init__.py | 0 .../mindspeed/auto_tuning/utils/dtype.py | 7 + .../mindspeed/auto_tuning/utils/file_utils.py | 9 + .../mindspeed/auto_tuning/utils/logger.py | 41 + .../mindspeed/auto_tuning/utils/mem_utils.py | 29 + .../auto_tuning/utils/restricted_unpickler.py | 18 + .../auto_tuning/utils/runner/__init__.py | 0 .../auto_tuning/utils/runner/irunner.py | 22 + .../utils/runner/model_executor.py | 267 +++ .../utils/runner/torchrun_runner.py | 71 + .../mindspeed/auto_tuning/utils/singleton.py | 14 + .../mindspeed/auto_tuning/utils/utils.py | 34 + .../train/yoco_moe/mindspeed/checkpointing.py | 412 ++++ .../train/yoco_moe/mindspeed/core/__init__.py | 0 .../mindspeed/core/auto_parallel/__init__.py | 245 ++ .../core/auto_parallel/auto_parallel_apply.py | 156 ++ .../auto_parallel/auto_parallel_memory.py | 168 ++ .../core/auto_parallel/auto_parallel_model.py | 462 ++++ .../auto_parallel/auto_parallel_optimizer.py | 151 ++ .../auto_parallel/auto_parallel_profiling.py | 399 ++++ .../auto_parallel/auto_parallel_rectify.py | 424 ++++ .../core/auto_parallel/mm_search/help.py | 51 + .../mm_search/memory_modeling.py | 77 + .../core/auto_parallel/mm_search/optimizer.py | 161 ++ .../mm_search/pp_layer_search.py | 218 ++ .../core/auto_parallel/mm_search/profiling.py | 270 +++ .../core/auto_parallel/mm_search/schedules.py | 26 + .../core/auto_parallel/mm_search/solver.py | 132 ++ .../FlashAttentionScoreGrad_910B.pth | Bin 0 -> 6466 bytes .../FlashAttentionScore_910B.pth | Bin 0 -> 6402 bytes .../noise_predict_ckpt/MatMul_910B.pth | Bin 0 -> 6706 bytes .../noise_predict_ckpt/RmsNormGrad_910B.pth | Bin 0 -> 6210 bytes .../noise_predict_ckpt/RmsNorm_910B.pth | Bin 0 -> 6146 bytes .../core/context_parallel/__init__.py | 0 .../adaptive_context_parallel.py | 371 ++++ .../context_parallel_kv_cache.py | 124 ++ .../context_parallel/ring_context_parallel.py | 939 ++++++++ .../ring_context_parallel_for_ampipe.py | 503 +++++ .../ulysses_context_parallel.py | 745 +++++++ .../context_parallel/unaligned_cp/__init__.py | 0 .../context_parallel/unaligned_cp/mapping.py | 623 ++++++ .../mindspeed/core/context_parallel/utils.py | 661 ++++++ .../distributed_data_parallel.py | 455 ++++ .../mindspeed/core/datasets/gpt_dataset.py | 98 + .../mindspeed/core/distributed/__init__.py | 0 .../core/distributed/layerzero/__init__.py | 4 + .../distributed/layerzero/comm/hookwrap.py | 69 + .../core/distributed/layerzero/config.py | 415 ++++ .../core/distributed/layerzero/constants.py | 18 + .../core/distributed/layerzero/debug/sum.py | 77 + .../layerzero/megatron_adaptor/__init__.py | 2 + .../megatron_adaptor/optimizer/clip.py | 95 + .../megatron_adaptor/optimizer/misc.py | 47 + .../optimizer/sharded_grad_scaler.py | 387 ++++ .../megatron_adaptor/optimizer/zero.py | 277 +++ .../distributed/layerzero/runtime/_forward.py | 566 +++++ .../distributed/layerzero/runtime/_grad.py | 94 + .../layerzero/runtime/_initialize.py | 134 ++ .../layerzero/runtime/_root_forward.py | 69 + .../distributed/layerzero/runtime/_shard.py | 277 +++ .../distributed/layerzero/runtime/_utils.py | 194 ++ .../distributed/layerzero/runtime/hook.py | 78 + .../distributed/layerzero/state/__init__.py | 0 .../core/distributed/layerzero/state/fqn.py | 28 + .../layerzero/state/mga_checkpoint.py | 293 +++ .../layerzero/state/optim_state.py | 154 ++ .../layerzero/state/scripts/__init__.py | 0 .../state/scripts/convert_to_megatron.py | 110 + .../state/scripts/layerzero_checkpointer.py | 404 ++++ .../distributed/layerzero/state/state_dict.py | 135 ++ .../distributed/layerzero/zero3/__init__.py | 8 + .../layerzero/zero3/_common_utils.py | 367 +++ .../layerzero/zero3/_exec_order_utils.py | 153 ++ .../layerzero/zero3/_init_utils.py | 665 ++++++ .../distributed/layerzero/zero3/_limiter.py | 25 + .../layerzero/zero3/_traversal_utils.py | 107 + .../layerzero/zero3/_wrap_utils.py | 129 ++ .../core/distributed/layerzero/zero3/api.py | 34 + .../distributed/layerzero/zero3/flat_param.py | 1938 ++++++++++++++++ .../core/distributed/layerzero/zero3/fsdp.py | 364 +++ .../core/distributed/layerzero/zero3/wrap.py | 62 + .../core/distributed/param_and_grad_buffer.py | 277 +++ .../mindspeed/core/fusions/__init__.py | 0 .../core/fusions/fused_bias_swiglu.py | 17 + .../core/fusions/fused_layer_norm.py | 25 + .../mindspeed/core/fusions/fused_softmax.py | 52 + .../core/fusions/npu_moe_token_permute.py | 19 + .../core/fusions/npu_moe_token_unpermute.py | 25 + .../mindspeed/core/fusions/rms_norm.py | 51 + .../core/fusions/rotary_pos_embedding.py | 0 .../mindspeed/core/memory/__init__.py | 0 .../core/memory/adaptive_memory/__init__.py | 0 .../adaptive_memory/adaptive_memory_apply.py | 159 ++ .../adaptive_memory/adaptive_memory_cache.py | 277 +++ .../adaptive_memory_function.py | 136 ++ .../adaptive_memory/adaptive_memory_opt.py | 212 ++ .../adaptive_memory/adaptive_memory_policy.py | 185 ++ .../adaptive_memory_prefetch.py | 438 ++++ .../adaptive_memory_profiling.py | 327 +++ .../adaptive_memory/adaptive_memory_solver.py | 438 ++++ .../adaptive_memory_swap_manager.py | 450 ++++ .../adaptive_memory/adaptive_memory_tool.py | 219 ++ .../core/memory/adaptive_memory/cpu_binder.py | 109 + .../memory/adaptive_recomputing/__init__.py | 0 .../adaptive_recompute.py | 782 +++++++ .../adaptive_recompute_apply.py | 200 ++ .../adaptive_recompute_solver.py | 574 +++++ .../pluggable_allocator_adpator.py | 24 + .../memory/adaptive_recomputing/prefetch.py | 328 +++ .../adaptive_recomputing/swap_manager.py | 226 ++ .../adaptive_recomputing/swappable_tensor.py | 88 + .../core/memory/auto_pipeline/autopipeline.py | 371 ++++ .../auto_pipeline/autopipeline_apply.py | 53 + .../auto_pipeline/autopipeline_solver.py | 501 +++++ .../yoco_moe/mindspeed/core/memory/common.py | 11 + .../memory_fragmentation/malloc_recorder.py | 18 + .../memory_fragmentation/memory_recorder.py | 45 + .../optimizer_init_precise.py | 22 + .../pluggable_allocator_adpator.py | 34 + .../core/memory/smart_swap/__init__.py | 0 .../mindspeed/core/memory/smart_swap/hooks.py | 242 ++ .../memory/smart_swap/policy_generator.py | 320 +++ .../core/memory/smart_swap/swap_adaptor.py | 43 + .../core/memory/smart_swap/swap_arranger.py | 205 ++ .../memory/smart_swap/swap_cpp_adaptor.py | 1206 ++++++++++ .../core/memory/smart_swap/swap_engine.py | 294 +++ .../core/memory/smart_swap/swap_manager.py | 235 ++ .../smart_swap/swap_megatron_adaptor.py | 72 + .../memory/smart_swap/swap_policy_config.py | 49 + .../core/memory/smart_swap/swap_utils.py | 39 + .../core/mindspeed_parallel_group.py | 75 + .../embeddings/language_model_embedding.py | 34 + .../common/embeddings/rotary_pos_embedding.py | 386 ++++ .../core/models/gpt/gpt_layer_specs.py | 101 + .../mindspeed/core/models/gpt/gpt_model.py | 28 + .../yoco_moe/mindspeed/core/parallel_state.py | 1223 ++++++++++ .../auto_pipeline_perf/autopipeline_perf.py | 401 ++++ .../auto_pipeline_perf/data_samplers.py | 71 + .../auto_pipeline_perf/global_vars.py | 16 + .../optimpipeline_solver.py | 304 +++ .../schedulepipeline_solver.py | 445 ++++ .../auto_pipeline_perf/schedules.py | 274 +++ .../auto_pipeline_perf/transformer.py | 17 + .../core/pipeline_parallel/__init__.py | 0 .../pipeline_parallel/flexible_schedules.py | 1771 +++++++++++++++ .../multiparameter_schedules.py | 965 ++++++++ .../pipeline_parallel/p2p_communication.py | 471 ++++ .../pipeline_parallel/ripipe_schedules.py | 784 +++++++ .../core/pipeline_parallel/schedules.py | 184 ++ .../mindspeed/core/simple_parallel_cfg.py | 19 + .../yoco_moe/mindspeed/core/singleton_meta.py | 27 + .../core/tensor_parallel/__init__.py | 0 .../tensor_parallel/ascend_turbo/__init__.py | 0 .../ascend_turbo/ascend_turbo_cfg.py | 40 + .../ascend_turbo/initialize.py | 94 + .../ascend_turbo/mc2_linears_seq_parallel.py | 352 +++ .../tensor_parallel/checkpoint_manager.py | 62 + .../tensor_parallel/comm_autograd_function.py | 246 ++ .../core/tensor_parallel/comm_group_api.py | 174 ++ .../core/tensor_parallel/comm_utils.py | 271 +++ .../core/tensor_parallel/cross_entropy.py | 38 + .../mindspeed/core/tensor_parallel/layers.py | 1969 +++++++++++++++++ .../core/tensor_parallel/lcal_coc/__init__.py | 1 + .../coc_parallel_linears_all_reduce.py | 73 + .../coc_parallel_linears_all_reduce_fused.py | 43 + .../coc_parallel_linears_sequence_parallel.py | 242 ++ ...arallel_linears_sequence_parallel_fused.py | 153 ++ .../tensor_parallel/lcal_coc/coc_utils.py | 245 ++ .../lcal_coc/matmul_soc_friendly.py | 136 ++ .../tensor_parallel/lcal_coc/min_comm_cfg.py | 224 ++ .../rewrite_parallel_linears_all_reduce.py | 59 + ...rite_parallel_linears_sequence_parallel.py | 82 + .../tensor_parallel/lcal_coc/user_config.py | 110 + .../mindspeed/core/tensor_parallel/mapping.py | 36 + .../mindspeed/core/tensor_parallel/random.py | 300 +++ .../tensor_parallel/tp_2d/layernorm_2d.py | 179 ++ .../linear_2d_moe_split_along_first_dim.py | 298 +++ .../tp_2d/linear_2d_split_along_first_dim.py | 499 +++++ .../tensor_parallel/tp_2d/norm_factory.py | 73 + .../tp_2d/parallel_linear_2d.py | 204 ++ .../core/tensor_parallel/tp_2d/rms_norm_2d.py | 98 + .../unaligned_layers/__init__.py | 0 .../unaligned_layers/adaptor.py | 143 ++ .../unaligned_column_parallel_linear.py | 250 +++ .../unaligned_row_parallel_linear.py | 206 ++ .../unaligned_layers/unaligned_utils.py | 304 +++ .../core/tensor_parallel_x_union_cp.py | 87 + .../core/tensor_parallel_y_union_cp.py | 88 + .../train/yoco_moe/mindspeed/core/training.py | 544 +++++ .../mindspeed/core/transformer/__init__.py | 0 .../mindspeed/core/transformer/attention.py | 496 +++++ .../custom_layers/transformer_engine.py | 53 + .../core/transformer/dot_product_attention.py | 309 +++ .../mindspeed/core/transformer/mlp.py | 150 ++ .../mindspeed/core/transformer/module.py | 17 + .../core/transformer/moe/__init__.py | 0 .../core/transformer/moe/comm_utils.py | 257 +++ .../mindspeed/core/transformer/moe/experts.py | 197 ++ .../core/transformer/moe/grouped_gemm_util.py | 195 ++ ..._mlp_with_comp_and_comm_overlap_all2all.py | 298 +++ ...lp_with_comp_and_comm_overlap_allgather.py | 187 ++ .../mindspeed/core/transformer/moe/layers.py | 638 ++++++ .../core/transformer/moe/moe_layer.py | 143 ++ .../moe/moe_layer_overlap_all2all.py | 605 +++++ .../moe/moe_layer_overlap_allgather.py | 229 ++ .../core/transformer/moe/moe_utils.py | 376 ++++ .../mindspeed/core/transformer/moe/router.py | 97 + .../core/transformer/moe/token_dispatcher.py | 1262 +++++++++++ .../core/transformer/moe/tp_2d/__init__.py | 1 + .../transformer/moe/tp_2d/grouped_mlp_2d.py | 595 +++++ .../moe_allgather_token_dispatcher_2d.py | 279 +++ .../transformer/moe/tp_2d/moe_layer_2d.py | 62 + .../moe/tp_2d/sequential_mlp_2d.py | 92 + .../transformer/moe/tp_2d/topk_router_2d.py | 81 + .../moe/unpermute_without_activation.py | 135 ++ .../mindspeed/core/transformer/transformer.py | 289 +++ .../core/transformer/transformer_block.py | 223 ++ .../core/transformer/transformer_config.py | 187 ++ .../mindspeed/core/weight_grad_store.py | 241 ++ .../mindspeed/features_manager/__init__.py | 9 + .../mindspeed/features_manager/feature.py | 38 + .../features_manager/functional/__init__.py | 0 .../functional/profiler_default_feature.py | 10 + .../tensor_parallel/__init__.py | 0 .../unaligned_linear_feature.py | 58 + .../yoco_moe/mindspeed/functional/__init__.py | 0 .../mindspeed/functional/profiler/__init__.py | 0 .../functional/profiler/profiler_info.py | 19 + model/train/yoco_moe/mindspeed/initialize.py | 65 + .../yoco_moe/mindspeed/megatron_adaptor.py | 1145 ++++++++++ .../yoco_moe/mindspeed/model/__init__.py | 0 .../yoco_moe/mindspeed/model/alibi_mask.py | 79 + .../mindspeed/model/language_model.py | 161 ++ .../yoco_moe/mindspeed/model/transformer.py | 1814 +++++++++++++++ .../train/yoco_moe/mindspeed/moe/__init__.py | 0 .../yoco_moe/mindspeed/moe/ampipe/__init__.py | 0 .../yoco_moe/mindspeed/moe/ampipe/ampipe.py | 327 +++ .../mindspeed/moe/ampipe/ampipe_args.py | 150 ++ .../moe/ampipe/ampipe_async_communication.py | 172 ++ .../ampipe_bias_dropout_add_ln_computer.py | 57 + .../mindspeed/moe/ampipe/ampipe_fa.py | 55 + .../moe/ampipe/ampipe_fa_computer.py | 132 ++ .../moe/ampipe/ampipe_moe_gating_computer.py | 63 + .../moe/ampipe/ampipe_moe_mlp_computer.py | 229 ++ .../moe/ampipe/ampipe_post_mlp_computer.py | 63 + .../mindspeed/moe/async_comm_utils.py | 170 ++ model/train/yoco_moe/mindspeed/moe/config.py | 43 + model/train/yoco_moe/mindspeed/moe/experts.py | 39 + model/train/yoco_moe/mindspeed/moe/gate.py | 306 +++ .../mindspeed/moe/mixtral_parallel_mlpbm.py | 93 + model/train/yoco_moe/mindspeed/moe/moe.py | 105 + .../train/yoco_moe/mindspeed/moe/moe_layer.py | 148 ++ .../yoco_moe/mindspeed/moe/pipe_experts.py | 422 ++++ model/train/yoco_moe/mindspeed/moe/utils.py | 210 ++ .../mindspeed/multi_modal/conv3d/__init__.py | 0 .../conv3d/conv3d_depth_parallel.py | 205 ++ .../multi_modal/dist_train/__init__.py | 1 + .../multi_modal/dist_train/checkpointing.py | 48 + .../dist_train/communication/__init__.py | 2 + .../communication/dist_communication.py | 230 ++ .../communication/dist_ranks_match.py | 116 + .../multi_modal/dist_train/config/__init__.py | 5 + .../dist_train/config/dist_train_config.py | 322 +++ .../inner_data_parallel/__init__.py | 0 .../inner_data_parallel.py | 43 + .../inner_data_parallel/mappings.py | 83 + .../dist_train/inner_data_parallel/utils.py | 26 + .../multi_modal/dist_train/parallel_state.py | 1260 +++++++++++ .../dist_train/pipeline_parallel/__init__.py | 1 + .../pipeline_parallel/dist_schedules.py | 524 +++++ .../multi_modal/dist_train/training.py | 21 + .../yoco_moe/mindspeed/op_builder/__init__.py | 27 + .../op_builder/adaptive_cp_builder.py | 11 + .../adaptive_recomputing_builder.py | 37 + .../mindspeed/op_builder/algorithm_builder.py | 11 + .../mindspeed/op_builder/atb_builder.py | 49 + .../yoco_moe/mindspeed/op_builder/builder.py | 77 + .../mindspeed/op_builder/ffn_builder.py | 373 ++++ .../op_builder/fused_ema_adamw_builder.py | 31 + .../op_builder/fusion_attention_v2_builder.py | 41 + .../mindspeed/op_builder/gmm_builder.py | 352 +++ .../op_builder/groupmatmul_add_builder.py | 15 + .../mindspeed/op_builder/lcal_builder.py | 18 + .../op_builder/matmul_add_builder.py | 15 + .../memory_fragmentation_builder.py | 47 + .../npu_all_to_all_all_gather_bmm_builder.py | 244 ++ ...u_bmm_reduce_scatter_all_to_all_builder.py | 147 ++ .../npu_dropout_add_layer_norm_builder.py | 26 + .../npu_grouped_mat_mul_all_reduce_builder.py | 37 + ...lace_mm_all_reduce_add_rms_norm_builder.py | 110 + .../npu_mm_all_reduce_add_rms_norm_builder.py | 324 +++ .../npu_moe_token_permute_builder.py | 27 + .../npu_moe_token_unpermute_builder.py | 27 + .../npu_ring_attention_update_builder.py | 27 + .../npu_rotary_position_embedding_builder.py | 26 + .../mindspeed/op_builder/quant_gmm_builder.py | 69 + .../mindspeed/op_builder/rms_norm_builder.py | 13 + .../op_builder/smart_swap_builder.py | 44 + .../mindspeed/op_builder/swiglu_builder.py | 12 + .../op_builder/weight_quant_gmm_builder.py | 61 + .../train/yoco_moe/mindspeed/ops/__init__.py | 0 .../algorithm/adaptive_cp/adaptive_cp.cpp | 454 ++++ .../ops/csrc/algorithm/algorithm.cpp | 24 + .../ops/csrc/atb/groupmatmul_add.cpp | 71 + .../mindspeed/ops/csrc/atb/inc/atb_adapter.h | 44 + .../mindspeed/ops/csrc/atb/lcal_coc.cpp | 283 +++ .../mindspeed/ops/csrc/atb/matmul_add.cpp | 72 + .../mindspeed/ops/csrc/atb/rms_norm.cpp | 145 ++ .../mindspeed/ops/csrc/atb/swiglu.cpp | 136 ++ .../ops/csrc/atb/utils/atb_adapter.cpp | 140 ++ .../yoco_moe/mindspeed/ops/csrc/cann/ffn.cpp | 96 + .../ops/csrc/cann/fusion_attention_v2.cpp | 458 ++++ .../yoco_moe/mindspeed/ops/csrc/cann/gmm.cpp | 309 +++ .../ops/csrc/cann/inc/aclnn_common.h | 646 ++++++ .../mindspeed/ops/csrc/cann/inc/mc2_utils.h | 73 + .../cann/npu_all_to_all_all_gather_bmm.cpp | 97 + .../csrc/cann/npu_apply_fused_ema_adamw.cpp | 85 + .../npu_bmm_reduce_scatter_all_to_all.cpp | 87 + .../csrc/cann/npu_dropout_add_layer_norm.cpp | 181 ++ .../cann/npu_grouped_mat_mul_all_reduce.cpp | 179 ++ .../cann/npu_mm_all_reduce_add_rms_norm.cpp | 89 + .../cann/npu_mm_all_reduce_add_rms_norm_.cpp | 89 + .../ops/csrc/cann/npu_moe_token_permute.cpp | 117 + .../ops/csrc/cann/npu_moe_token_unpermute.cpp | 133 ++ .../csrc/cann/npu_ring_attention_update.cpp | 60 + .../cann/npu_rotary_position_embedding.cpp | 159 ++ .../mindspeed/ops/csrc/cann/quant_gmm.cpp | 97 + .../ops/csrc/cann/weight_quant_gmm.cpp | 83 + .../ops/csrc/flop_counter/flop_counter.cpp | 363 +++ .../ops/csrc/flop_counter/flop_counter.h | 44 + .../NpuCachingCustomAllocator.cpp | 219 ++ .../NpuCachingCustomAllocator.h | 1593 +++++++++++++ .../CachingAllocatorConfig.cpp | 183 ++ .../CachingAllocatorConfig.h | 41 + .../memory_fragmentation/Decorator.cpp | 25 + .../memory_fragmentation/Decorator.h | 16 + .../DeviceCachingAllocator.cpp | 1444 ++++++++++++ .../DeviceCachingAllocator.h | 174 ++ .../memory_fragmentation/EventPool.cpp | 31 + .../memory_fragmentation/EventPool.h | 27 + .../PluggableAllocator.cpp | 233 ++ .../memory_fragmentation/PluggableAllocator.h | 58 + .../PluggableAllocatorFunctions.cpp | 314 +++ .../memory_fragmentation/Recorder.cpp | 317 +++ .../memory_fragmentation/Recorder.h | 146 ++ .../memory_fragmentation/common.cpp | 142 ++ .../memory_fragmentation/common.h | 399 ++++ .../memory_fragmentation/test.py | 36 + .../smart_swap/CachingAllocatorConfig.cpp | 88 + .../smart_swap/CachingAllocatorConfig.h | 58 + .../smart_swap/DeviceCachingAllocator.cpp | 1703 ++++++++++++++ .../smart_swap/DeviceCachingAllocator.h | 207 ++ .../smart_swap/EventPool.cpp | 45 + .../smart_swap/EventPool.h | 36 + .../smart_swap/NPUSwapManager.cpp | 1442 ++++++++++++ .../smart_swap/NPUSwapManager.h | 501 +++++ .../smart_swap/NPUVmmApi.h | 431 ++++ .../smart_swap/PluggableAllocator.cpp | 218 ++ .../smart_swap/PluggableAllocator.h | 66 + .../smart_swap/SwapBindings.cpp | 237 ++ .../smart_swap/SwapException.h | 39 + .../pluggable_allocator/smart_swap/common.cpp | 54 + .../pluggable_allocator/smart_swap/common.h | 262 +++ .../pluggable_allocator/smart_swap/swap_log.h | 123 + .../mindspeed/ops/dropout_add_layer_norm.py | 49 + .../mindspeed/ops/dropout_add_rms_norm.py | 47 + model/train/yoco_moe/mindspeed/ops/ffn.py | 69 + .../mindspeed/ops/fusion_attention_v2.py | 114 + model/train/yoco_moe/mindspeed/ops/gmm.py | 175 ++ .../yoco_moe/mindspeed/ops/lcal_functional.py | 84 + .../ops/npu_all_to_all_all_gather_bmm.py | 58 + .../ops/npu_apply_fused_ema_adamw.py | 37 + .../ops/npu_bmm_reduce_scatter_all_to_all.py | 55 + .../ops/npu_grouped_mat_mul_all_reduce.py | 27 + .../mindspeed/ops/npu_groupmatmul_add.py | 13 + .../yoco_moe/mindspeed/ops/npu_matmul_add.py | 39 + .../ops/npu_mm_all_reduce_add_rms_norm.py | 67 + .../ops/npu_mm_all_reduce_add_rms_norm_.py | 67 + .../mindspeed/ops/npu_moe_token_permute.py | 20 + .../mindspeed/ops/npu_moe_token_unpermute.py | 22 + .../ops/npu_ring_attention_update.py | 25 + .../ops/npu_rotary_position_embedding.py | 12 + .../train/yoco_moe/mindspeed/ops/quant_gmm.py | 58 + .../mindspeed/ops/weight_quant_gmm.py | 48 + .../yoco_moe/mindspeed/optimizer/__init__.py | 0 .../yoco_moe/mindspeed/optimizer/adamw.py | 166 ++ .../mindspeed/optimizer/distrib_optimizer.py | 1200 ++++++++++ .../yoco_moe/mindspeed/optimizer/ema_adamw.py | 200 ++ .../yoco_moe/mindspeed/optimizer/optimizer.py | 332 +++ model/train/yoco_moe/mindspeed/patch_utils.py | 120 + .../yoco_moe/mindspeed/run/gpt_dataset.patch | 17 + .../yoco_moe/mindspeed/run/helpers.patch | 30 + .../yoco_moe/mindspeed/run/initialize.patch | 13 + model/train/yoco_moe/mindspeed/run/run.py | 128 ++ .../yoco_moe/mindspeed/tokenizer/__init__.py | 17 + .../yoco_moe/mindspeed/tokenizer/tokenizer.py | 156 ++ model/train/yoco_moe/mindspeed/training.py | 355 +++ model/train/yoco_moe/mindspeed/utils.py | 726 ++++++ .../yoco_moe/mindspeed/yaml_arguments.py | 25 + model/train/yoco_moe/requirements.txt | 23 + model/train/yoco_moe/setup.py | 99 + model/train/yoco_moe/sources/images/FBW.png | Bin 0 -> 62176 bytes .../sources/images/activation_function_a.png | Bin 0 -> 26411 bytes .../sources/images/activation_function_b.png | Bin 0 -> 31088 bytes .../sources/images/adaptive_memory_a.png.png | Bin 0 -> 117068 bytes .../sources/images/adaptive_recompute_a.png | Bin 0 -> 95774 bytes .../sources/images/adaptive_recompute_b.png | Bin 0 -> 69240 bytes .../images/algo_tp_comm_optimize_a.png | Bin 0 -> 55803 bytes .../images/algo_tp_comm_optimize_b.png | Bin 0 -> 20116 bytes model/train/yoco_moe/sources/images/alibi.png | Bin 0 -> 52715 bytes .../train/yoco_moe/sources/images/ampipe.png | Bin 0 -> 58092 bytes .../yoco_moe/sources/images/async_ddp.png | Bin 0 -> 19147 bytes .../images/async_ddp_param_gather_a.png | Bin 0 -> 113103 bytes .../images/async_ddp_param_gather_b.png | Bin 0 -> 134747 bytes .../images/async_ddp_param_gather_c.png | Bin 0 -> 155110 bytes .../sources/images/auto_parallel_1.png | Bin 0 -> 104831 bytes .../sources/images/auto_parallel_2.png | Bin 0 -> 60877 bytes .../sources/images/auto_parallel_mm_1.PNG | Bin 0 -> 106292 bytes .../sources/images/auto_parallel_mm_2.PNG | Bin 0 -> 177550 bytes .../sources/images/auto_pipeline_parallel.png | Bin 0 -> 30474 bytes .../images/conv3d_sequence_parallel.png | Bin 0 -> 161292 bytes .../sources/images/flash_attention.png | Bin 0 -> 91918 bytes .../images/hierarchical_alltoallv_1.png | Bin 0 -> 17640 bytes .../images/hierarchical_alltoallv_2.png | Bin 0 -> 19435 bytes model/train/yoco_moe/sources/images/logo.png | Bin 0 -> 13650 bytes .../yoco_moe/sources/images/megatron_tp.png | Bin 0 -> 35137 bytes .../sources/images/moe_dynamic_padding_a.png | Bin 0 -> 43548 bytes .../sources/images/moe_dynamic_padding_b.png | Bin 0 -> 79984 bytes .../sources/images/moe_dynamic_padding_c.png | Bin 0 -> 81597 bytes .../sources/images/moe_dynamic_padding_d.png | Bin 0 -> 80959 bytes .../sources/images/moe_dynamic_padding_e.png | Bin 0 -> 348661 bytes .../images/moe_experts_pipeline_degree.png | Bin 0 -> 12380 bytes .../yoco_moe/sources/images/nanopipe.png | Bin 0 -> 36925 bytes .../yoco_moe/sources/images/nanopipe_v2.png | Bin 0 -> 100512 bytes .../yoco_moe/sources/images/nd_matmul.png | Bin 0 -> 60917 bytes .../yoco_moe/sources/images/pipedream1F1B.png | Bin 0 -> 86908 bytes .../sources/images/pipeline_experts.png | Bin 0 -> 32611 bytes .../sources/images/reuse_fp32_param_a.png | Bin 0 -> 48348 bytes .../sources/images/reuse_fp32_param_b.png | Bin 0 -> 46566 bytes .../yoco_moe/sources/images/ripipe_a.png | Bin 0 -> 46665 bytes .../yoco_moe/sources/images/ripipe_b.png | Bin 0 -> 52574 bytes .../sources/images/sequence-parallel.png | Bin 0 -> 50795 bytes .../sources/images/shared-experts.png | Bin 0 -> 176825 bytes .../sources/images/smart_swap_flowchart.png | Bin 0 -> 161939 bytes .../sources/images/swap_attention.png | Bin 0 -> 80471 bytes .../sources/images/swap_attention1.png | Bin 0 -> 10187 bytes .../sources/images/swap_attention2.png | Bin 0 -> 16018 bytes .../sources/images/swap_attention3.png | Bin 0 -> 21523 bytes .../sources/images/tensor-parallel-2d.png | Bin 0 -> 35860 bytes .../sources/images/virtual-pipeline.PNG | Bin 0 -> 116627 bytes model/train/yoco_moe/tests_extend/README.md | 24 + model/train/yoco_moe/tests_extend/commons.py | 29 + model/train/yoco_moe/tests_extend/conftest.py | 46 + .../gpt_175b/pretrain_gpt3_175B_128k.sh | 105 + .../gpt_175b/pretrain_gpt3_175B_32k_cp2.sh | 105 + .../gpt_175b/pretrain_gpt3_175B_32k_cp4.sh | 105 + .../gpt_175b/pretrain_gpt3_175B_8k.sh | 98 + .../gpt_moe/pretrain_gpt_moe_128k.sh | 109 + .../gpt_moe/pretrain_gpt_moe_32k_cp2.sh | 109 + .../gpt_moe/pretrain_gpt_moe_32k_cp4.sh | 109 + .../gpt_moe/pretrain_gpt_moe_8k.sh | 106 + .../llama2/pretrain_llama2_70b_phd_128k.sh | 107 + .../llama2/pretrain_llama2_70b_phd_32k_cp2.sh | 107 + .../llama2/pretrain_llama2_70b_phd_32k_cp4.sh | 107 + .../llama2/pretrain_llama2_70b_phd_4k.sh | 101 + .../mfu_model/pretrain_gpt_moe_128k.sh | 110 + .../mfu_model/pretrain_llama2_70b_128k.sh | 108 + .../gpt_175b/pretrain_gpt3_175B_128k.sh | 106 + .../gpt_175b/pretrain_gpt3_175B_32k.sh | 103 + .../gpt_175b/pretrain_gpt3_175B_8k.sh | 100 + .../gpt_moe/pretrain_gpt_moe_128k.sh | 115 + .../gpt_moe/pretrain_gpt_moe_32k.sh | 111 + .../perf_model/gpt_moe/pretrain_gpt_moe_8k.sh | 106 + .../llama2/pretrain_llama2_70b_128k.sh | 109 + .../llama2/pretrain_llama2_70b_32k.sh | 106 + .../llama2/pretrain_llama2_70b_4k.sh | 101 + .../system_tests/core_model/README.md | 134 ++ .../system_tests/core_model/gpt-usecase.yaml | 218 ++ .../gpt-usecase_adaptive_memory.yaml | 15 + .../core_model/gpt-usecase_fp32.yaml | 15 + .../core_model/pretrain_gpt_usecase.py | 148 ++ .../core_model/pretrain_gpt_usecase.sh | 116 + .../pretrain_gpt_usecase_adaptive_memory.sh | 116 + .../core_model/pretrain_gpt_usecase_fp32.sh | 139 ++ .../tests_extend/system_tests/env_npu.sh | 23 + .../feature_precision_guarding/README.md | 103 + .../fpg_llama_usecase.yaml | 179 ++ .../llama_param_cvt.py | 228 ++ .../llama_param_cvt.sh | 21 + .../pretrain_fpg_llama.sh | 105 + .../pretrain_gpt_fpg.py | 192 ++ .../feature_tests/deepseek_mla.sh | 143 ++ .../feature_tests/deepseek_moe.sh | 120 + .../feature_tests/deepspeed_moe.sh | 115 + .../system_tests/feature_tests/hybrid.sh | 109 + .../feature_tests/megatron_moe.sh | 116 + .../system_tests/feature_tests/nanopipe.sh | 105 + .../feature_tests/ring_attention.sh | 109 + .../system_tests/feature_tests/tp2d.sh | 109 + .../tp2d_grouped_mlp_megatron_moe.sh | 107 + .../tp2d_sequential_mlp_megatron_moe.sh | 106 + .../system_tests/feature_tests/ulysses.sh | 118 + .../feature_tests/ulysses_allgather.sh | 124 ++ .../feature_tests/unaligned_tpsp.sh | 97 + .../gpt/pretrain_gpt_deepspeed_moe_128k.sh | 122 + .../gpt/pretrain_gpt_megatron_moe_8k.sh | 133 ++ ...pretrain_llama2_70B_128k_tp2_cp4_hybrid.sh | 111 + ...etrain_llama2_70B_128k_tp2_cp4_megatron.sh | 110 + ...70B_32k_tp2_pp2_vpp1_cp2_ulysses_resume.sh | 122 + ...pretrain_llama2_70B_4k_tp2_pp2_vpp1_dp2.sh | 100 + .../pretrain_llama2_70B_8k_tp8_x4_y2_2d.sh | 105 + .../llama2/pretrain_llama_pp2_dp4.sh | 94 + .../system_tests/llama3/README.md | 117 + .../llama3/pretrain_llama3_8b_ptd.sh | 97 + .../pretrain_llama3_8b_ptd_adaptive_cp.sh | 102 + .../system_tests/mixtral/README.md | 106 + .../system_tests/mixtral/pretrain_mixtral.sh | 133 ++ .../pretrain_multi_parameter_pipeline_test.py | 197 ++ .../pretrain_multi_parameter_pipeline_test.sh | 93 + .../system_tests/opensora1.0/16x256x256.py | 31 + .../opensora1.0/pretrain_opensora.py | 232 ++ .../opensora1.0/pretrain_opensora.sh | 89 + .../system_tests/pretrain_base.sh | 84 + .../system_tests/pretrain_skip_train.sh | 68 + .../system_tests/yaml_args_example/README.md | 18 + .../yaml_args_example/example.yaml | 534 +++++ .../yaml_args_example/pretrain_yaml_args.sh | 62 + .../tests_extend/tools/data_handler.py | 544 +++++ .../tests_extend/tools/preprocess_data.py | 185 ++ .../tests_extend/unit_tests/__init__.py | 0 .../tests_extend/unit_tests/common.py | 348 +++ .../test_activation_function_recompute.py | 39 + .../features/adaptive_memory/__init__.py | 0 .../test_adaptive_memory_apply.py | 371 ++++ .../test_adaptive_memory_cache.py | 118 + .../test_adaptive_memory_policy.py | 698 ++++++ .../test_adaptive_memory_prefetch.py | 151 ++ .../test_adaptive_memory_profiling.py | 328 +++ .../test_adaptive_memory_solver.py | 388 ++++ ...st_adaptive_recompute_allocator_adpator.py | 51 + .../test_adaptive_recompute_apply.py | 93 + .../test_adaptive_recompute_hook.py | 136 ++ .../test_adaptive_recomputing.py | 435 ++++ .../test_swap_function.py | 89 + .../features/algorithm/test_reuse_dptr.py | 55 + .../algorithm/test_reuse_optimizer.py | 158 ++ .../features/ampipe/test_ampipe_forward.py | 341 +++ .../ampipe/test_attn_with_cp_for_ampipe.py | 175 ++ .../mc2/test_mc2columnparallellinear.py | 96 + .../features/mc2/test_mc2rowparallellinear.py | 107 + .../mc2/test_mcore_mc2columnparallellinear.py | 134 ++ .../test_caching_allocator_config.py | 6 + .../test_device_caching_allocator.py | 7 + .../test_malloc_recorder.py | 18 + .../test_memory_recorder.py | 21 + .../test_pluggable_allocator_api.py | 50 + .../features/moe/test_moe_pipeline_experts.py | 213 ++ .../features/moe/test_moe_token_rearrange.py | 80 + .../dist_train/dist_train_config_utils.py | 47 + .../dist_train/test_dist_communication.py | 259 +++ .../dist_train/test_dist_config.py | 347 +++ .../dist_train/test_dist_ranks_match.py | 112 + .../multi_model/test_fused_ema_adamw.py | 141 ++ .../test_sequence_parallel_conv3d.py | 32 + .../test_checkpoint_function.py | 106 + .../test_checkpoint_manager.py | 54 + .../features/smart_swap/test_smart_swap.py | 136 ++ .../swap_attention/test_swap_attention.py | 298 +++ .../unit_tests/features/test_noop_layers.py | 281 +++ .../features/test_norm_recompute.py | 61 + .../test_replace_gloo_communication.py | 270 +++ .../unit_tests/features/test_tflops.py | 152 ++ .../features/test_weight_grad_store.py | 158 ++ .../unaligned_linear/test_unaligned_linear.py | 113 + .../unit_tests/megatron/test_cp_ep_loss.py | 79 + .../unit_tests/megatron/test_cross_entropy.py | 73 + .../megatron/test_distrib_optimizer.py | 118 + .../megatron/test_dot_product_attention.py | 64 + .../megatron/test_overlap_grad_reduce.py | 86 + .../megatron/test_overlap_param_gather.py | 161 ++ .../test_parallel_state_ep_with_cp.py | 69 + .../unit_tests/megatron/test_recompute.py | 75 + .../test_adaptive_context_parallel.py | 280 +++ .../test_hybrid_context_parallel.py | 171 ++ .../core/context_parallel/test_mapping.py | 231 ++ .../test_ringattn_context_parallel.py | 353 +++ .../test_ringattn_context_parallel_eodbase.py | 162 ++ .../test_ringattn_context_parallel_tnd.py | 235 ++ .../test_ulysses_context_parallel.py | 204 ++ .../core/tensor_parallel/test_layernorm2d.py | 146 ++ .../core/tensor_parallel/test_rms_norm2d.py | 143 ++ .../mindspeed/test_register_patches.py | 115 + .../ops/atb/test_groupmatmul_add.py | 53 + .../unit_tests/ops/atb/test_lcal_coc.py | 178 ++ .../unit_tests/ops/atb/test_matmul_add.py | 23 + .../test_npu_all_to_all_all_gather_bmm.py | 184 ++ .../cann/test_npu_apply_fused_ema_adamw.py | 92 + .../test_npu_bmm_reduce_scatter_all_to_all.py | 146 ++ .../cann/test_npu_dropout_add_layer_norm.py | 71 + .../unit_tests/ops/cann/test_npu_ffn.py | 160 ++ ...u_fused_moe_token_permute_and_unpermute.py | 110 + .../test_npu_fused_rotary_pos_embedding.py | 44 + .../ops/cann/test_npu_fusion_attention.py | 36 + .../unit_tests/ops/cann/test_npu_gmm.py | 128 ++ .../test_npu_grouped_mat_mul_all_reduce.py | 78 + ...st_npu_kernel_rotary_position_embedding.py | 62 + .../test_npu_mm_all_reduce_add_rms_norm.py | 88 + .../test_npu_mm_all_reduce_add_rms_norm_.py | 86 + .../unit_tests/ops/cann/test_npu_quant_gmm.py | 71 + .../cann/test_npu_ring_attention_update.py | 66 + .../unit_tests/ops/cann/test_npu_rms_norm.py | 58 + .../unit_tests/ops/cann/test_npu_swiglu.py | 55 + .../ops/cann/test_npu_weight_quant_gmm.py | 71 + 771 files changed, 117685 insertions(+) create mode 100644 model/train/yoco_moe/.gitignore create mode 100644 model/train/yoco_moe/LICENSE create mode 100644 model/train/yoco_moe/OWNERS create mode 100644 model/train/yoco_moe/README.md create mode 100644 model/train/yoco_moe/SECURITYNOTE.md create mode 100644 model/train/yoco_moe/Third_Party_Open_Source_Software_Notice create mode 100644 model/train/yoco_moe/ci/access_control_test.py create mode 100644 model/train/yoco_moe/docs/LOGO.png create mode 100644 model/train/yoco_moe/docs/RELEASENOTE.md create mode 100644 model/train/yoco_moe/docs/faq/data_helpers.md create mode 100644 model/train/yoco_moe/docs/faq/megatron070_grad_norm_nan.md create mode 100644 model/train/yoco_moe/docs/faq/torch_extensions.md create mode 100644 model/train/yoco_moe/docs/features/Automatic_Parallelism.md create mode 100644 model/train/yoco_moe/docs/features/activation-function-recompute.md create mode 100644 model/train/yoco_moe/docs/features/adaptive-memory.md create mode 100644 model/train/yoco_moe/docs/features/adaptive-recompute.md create mode 100644 model/train/yoco_moe/docs/features/alibi.md create mode 100644 model/train/yoco_moe/docs/features/ampipe.md create mode 100644 model/train/yoco_moe/docs/features/async-ddp-param-gather.md create mode 100644 model/train/yoco_moe/docs/features/async-ddp.md create mode 100644 model/train/yoco_moe/docs/features/auto_tuning.md create mode 100644 model/train/yoco_moe/docs/features/automated-pipeline.md create mode 100644 model/train/yoco_moe/docs/features/automatic_parallelism_mm.md create mode 100644 model/train/yoco_moe/docs/features/communication-over-computation.md create mode 100644 model/train/yoco_moe/docs/features/context_parallelism_kv_cache.md create mode 100644 model/train/yoco_moe/docs/features/conv3d_sequence_paralle.md create mode 100644 model/train/yoco_moe/docs/features/data-parallel.md create mode 100644 model/train/yoco_moe/docs/features/deepspeed_moe/deepspeed-moe-efficient-moe.md create mode 100644 model/train/yoco_moe/docs/features/deepspeed_moe/deepspeed-moe-token-rearrange.md create mode 100644 model/train/yoco_moe/docs/features/deepspeed_moe/deepspeed-moe.md create mode 100644 model/train/yoco_moe/docs/features/deepspeed_moe/moe.png create mode 100644 model/train/yoco_moe/docs/features/dist-train.md create mode 100644 model/train/yoco_moe/docs/features/distributed-optimizer.md create mode 100644 model/train/yoco_moe/docs/features/eod-reset.md create mode 100644 model/train/yoco_moe/docs/features/flash-attention.md create mode 100644 model/train/yoco_moe/docs/features/fused_ema_adamw_optimizer.md create mode 100644 model/train/yoco_moe/docs/features/fusion-attn-v2.md create mode 100644 model/train/yoco_moe/docs/features/generate-mask.md create mode 100644 model/train/yoco_moe/docs/features/hccl-group-buffer-set.md create mode 100644 model/train/yoco_moe/docs/features/hccl-replace-gloo.md create mode 100644 model/train/yoco_moe/docs/features/hierarchical-alltoallv.md create mode 100644 model/train/yoco_moe/docs/features/hybrid-context-parallel.md create mode 100644 model/train/yoco_moe/docs/features/mc2.md create mode 100644 model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-adaptive-recompute-activation.md create mode 100644 model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-allgather-dispatcher.md create mode 100644 model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-alltoall-dispatcher.md create mode 100644 model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-bmm-fused.md create mode 100644 model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-gmm.md create mode 100644 model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-tp-extend-ep.md create mode 100644 model/train/yoco_moe/docs/features/moe-experts-pipeline-degree.md create mode 100644 model/train/yoco_moe/docs/features/moe-token-permute-and-unpermute.md create mode 100644 model/train/yoco_moe/docs/features/multi_parameter_pipeline.md create mode 100644 model/train/yoco_moe/docs/features/multi_parameter_pipeline_and_variable_seq_lengths.md create mode 100644 model/train/yoco_moe/docs/features/nanopipe-pipeline-parallel.md create mode 100644 model/train/yoco_moe/docs/features/nd-matmul.md create mode 100644 model/train/yoco_moe/docs/features/noop-layers.md create mode 100644 model/train/yoco_moe/docs/features/norm-recompute.md create mode 100644 model/train/yoco_moe/docs/features/npu_deterministic.md create mode 100644 model/train/yoco_moe/docs/features/npu_matmul_add.md create mode 100644 model/train/yoco_moe/docs/features/ops_flops_cal.md create mode 100644 model/train/yoco_moe/docs/features/pipeline-experts.md create mode 100644 model/train/yoco_moe/docs/features/pipeline-parallel.md create mode 100644 model/train/yoco_moe/docs/features/recomputation.md create mode 100644 model/train/yoco_moe/docs/features/recompute_independent_pipelining.md create mode 100644 model/train/yoco_moe/docs/features/reuse-fp32-param.md create mode 100644 model/train/yoco_moe/docs/features/ring-attention-context-parallel.md create mode 100644 model/train/yoco_moe/docs/features/rms_norm.md create mode 100644 model/train/yoco_moe/docs/features/rotary-embedding.md create mode 100644 model/train/yoco_moe/docs/features/sequence-parallel.md create mode 100644 model/train/yoco_moe/docs/features/shared-experts.md create mode 100644 model/train/yoco_moe/docs/features/smart_swap.md create mode 100644 model/train/yoco_moe/docs/features/swap_attention.md create mode 100644 model/train/yoco_moe/docs/features/swiglu.md create mode 100644 model/train/yoco_moe/docs/features/tensor-parallel-2d.md create mode 100644 model/train/yoco_moe/docs/features/tensor-parallel.md create mode 100644 model/train/yoco_moe/docs/features/ulysses-context-parallel.md create mode 100644 model/train/yoco_moe/docs/features/unaligned-ulysses-context-parallel.md create mode 100644 model/train/yoco_moe/docs/features/unaligned_linear.md create mode 100644 model/train/yoco_moe/docs/features/variable_seq_lengths.md create mode 100644 model/train/yoco_moe/docs/features/virtual-pipeline-parallel.md create mode 100644 model/train/yoco_moe/docs/ops/README.md create mode 100644 model/train/yoco_moe/docs/ops/ffn.md create mode 100644 model/train/yoco_moe/docs/ops/fusion_attention.md create mode 100644 model/train/yoco_moe/docs/ops/gmm.md create mode 100644 model/train/yoco_moe/docs/ops/lcal_coc.md create mode 100644 model/train/yoco_moe/docs/ops/npu_all_to_all_all_gather_bmm.md create mode 100644 model/train/yoco_moe/docs/ops/npu_apply_fused_ema_adamw.md create mode 100644 model/train/yoco_moe/docs/ops/npu_bmm_reduce_scatter_all_to_all.md create mode 100644 model/train/yoco_moe/docs/ops/npu_dropout_add_layer_norm.md create mode 100644 model/train/yoco_moe/docs/ops/npu_fused_moe_token_permute.md create mode 100644 model/train/yoco_moe/docs/ops/npu_fused_moe_token_unpermute.md create mode 100644 model/train/yoco_moe/docs/ops/npu_grouped_mat_mul_all_reduce.md create mode 100644 model/train/yoco_moe/docs/ops/npu_groupmatmul_add.md create mode 100644 model/train/yoco_moe/docs/ops/npu_matmul_add.md create mode 100644 model/train/yoco_moe/docs/ops/npu_mm_all_reduce_add_rms_norm.md create mode 100644 model/train/yoco_moe/docs/ops/npu_mm_all_reduce_add_rms_norm_.md create mode 100644 model/train/yoco_moe/docs/ops/npu_ring_attention_update.md create mode 100644 model/train/yoco_moe/docs/ops/npu_rotary_position_embedding.md create mode 100644 model/train/yoco_moe/docs/ops/quant_gmm.md create mode 100644 model/train/yoco_moe/docs/ops/rms_norm.md create mode 100644 model/train/yoco_moe/docs/ops/swiglu.md create mode 100644 model/train/yoco_moe/docs/ops/weight_quant_gmm.md create mode 100644 model/train/yoco_moe/mindspeed/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/arguments.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/auto_tuning.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/config/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/config/generate_profiling_configs.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/config/model_config.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/config/search_config.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/mindspeed_adaptor.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/communication/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_cp.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_dp.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_ep.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_mc2.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_pp.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_tp.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_profile.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/hardware.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/memory/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/memory/dynamic_mem_modeling.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/memory/memory_modeling.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/memory/model_param.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/memory/static_mem_modeling.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/model_performance.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/operator/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_base_block.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_change_block.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_change_block_cp.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_change_block_ep.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_database.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_elemental.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_note_cal.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_profile_get.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_re_profile.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_shape_analysis.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_shape_cal.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/parse/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_communication_parse.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_config.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_constant.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_memory_parse.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_meta_parse.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_node_parse.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_operator_parse.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_parse.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/parse/recompute_module_info.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/parse/recompute_parser.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/search/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/search/recompute_solver.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/search/search_engine.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/module/search/stage_1_prune.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/utils/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/utils/dtype.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/utils/file_utils.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/utils/logger.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/utils/mem_utils.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/utils/restricted_unpickler.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/irunner.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/model_executor.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/torchrun_runner.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/utils/singleton.py create mode 100644 model/train/yoco_moe/mindspeed/auto_tuning/utils/utils.py create mode 100644 model/train/yoco_moe/mindspeed/checkpointing.py create mode 100644 model/train/yoco_moe/mindspeed/core/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_apply.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_memory.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_model.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_optimizer.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_profiling.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_rectify.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/help.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/memory_modeling.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/optimizer.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/pp_layer_search.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/profiling.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/schedules.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/solver.py create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/noise_predict_ckpt/FlashAttentionScoreGrad_910B.pth create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/noise_predict_ckpt/FlashAttentionScore_910B.pth create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/noise_predict_ckpt/MatMul_910B.pth create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/noise_predict_ckpt/RmsNormGrad_910B.pth create mode 100644 model/train/yoco_moe/mindspeed/core/auto_parallel/noise_predict_ckpt/RmsNorm_910B.pth create mode 100644 model/train/yoco_moe/mindspeed/core/context_parallel/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/context_parallel/adaptive_context_parallel.py create mode 100644 model/train/yoco_moe/mindspeed/core/context_parallel/context_parallel_kv_cache.py create mode 100644 model/train/yoco_moe/mindspeed/core/context_parallel/ring_context_parallel.py create mode 100644 model/train/yoco_moe/mindspeed/core/context_parallel/ring_context_parallel_for_ampipe.py create mode 100644 model/train/yoco_moe/mindspeed/core/context_parallel/ulysses_context_parallel.py create mode 100644 model/train/yoco_moe/mindspeed/core/context_parallel/unaligned_cp/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/context_parallel/unaligned_cp/mapping.py create mode 100644 model/train/yoco_moe/mindspeed/core/context_parallel/utils.py create mode 100644 model/train/yoco_moe/mindspeed/core/data_parallel/distributed_data_parallel.py create mode 100644 model/train/yoco_moe/mindspeed/core/datasets/gpt_dataset.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/comm/hookwrap.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/config.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/constants.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/debug/sum.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/clip.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/misc.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/sharded_grad_scaler.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/zero.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_forward.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_grad.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_initialize.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_root_forward.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_shard.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_utils.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/hook.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/fqn.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/mga_checkpoint.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/optim_state.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/scripts/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/scripts/convert_to_megatron.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/scripts/layerzero_checkpointer.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/state_dict.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_common_utils.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_exec_order_utils.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_init_utils.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_limiter.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_traversal_utils.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_wrap_utils.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/api.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/flat_param.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/fsdp.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/wrap.py create mode 100644 model/train/yoco_moe/mindspeed/core/distributed/param_and_grad_buffer.py create mode 100644 model/train/yoco_moe/mindspeed/core/fusions/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/fusions/fused_bias_swiglu.py create mode 100644 model/train/yoco_moe/mindspeed/core/fusions/fused_layer_norm.py create mode 100644 model/train/yoco_moe/mindspeed/core/fusions/fused_softmax.py create mode 100644 model/train/yoco_moe/mindspeed/core/fusions/npu_moe_token_permute.py create mode 100644 model/train/yoco_moe/mindspeed/core/fusions/npu_moe_token_unpermute.py create mode 100644 model/train/yoco_moe/mindspeed/core/fusions/rms_norm.py create mode 100644 model/train/yoco_moe/mindspeed/core/fusions/rotary_pos_embedding.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_apply.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_cache.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_function.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_opt.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_policy.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_prefetch.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_profiling.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_solver.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_swap_manager.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_tool.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/cpu_binder.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/adaptive_recompute.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/adaptive_recompute_apply.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/adaptive_recompute_solver.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/pluggable_allocator_adpator.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/prefetch.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/swap_manager.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/swappable_tensor.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/auto_pipeline/autopipeline.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/auto_pipeline/autopipeline_apply.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/auto_pipeline/autopipeline_solver.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/common.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/malloc_recorder.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/memory_recorder.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/optimizer_init_precise.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/pluggable_allocator_adpator.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/smart_swap/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/smart_swap/hooks.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/smart_swap/policy_generator.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_adaptor.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_arranger.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_cpp_adaptor.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_engine.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_manager.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_megatron_adaptor.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_policy_config.py create mode 100644 model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_utils.py create mode 100644 model/train/yoco_moe/mindspeed/core/mindspeed_parallel_group.py create mode 100644 model/train/yoco_moe/mindspeed/core/models/common/embeddings/language_model_embedding.py create mode 100644 model/train/yoco_moe/mindspeed/core/models/common/embeddings/rotary_pos_embedding.py create mode 100644 model/train/yoco_moe/mindspeed/core/models/gpt/gpt_layer_specs.py create mode 100644 model/train/yoco_moe/mindspeed/core/models/gpt/gpt_model.py create mode 100644 model/train/yoco_moe/mindspeed/core/parallel_state.py create mode 100644 model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/autopipeline_perf.py create mode 100644 model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/data_samplers.py create mode 100644 model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/global_vars.py create mode 100644 model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/optimpipeline_solver.py create mode 100644 model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/schedulepipeline_solver.py create mode 100644 model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/schedules.py create mode 100644 model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/transformer.py create mode 100644 model/train/yoco_moe/mindspeed/core/pipeline_parallel/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/pipeline_parallel/flexible_schedules.py create mode 100644 model/train/yoco_moe/mindspeed/core/pipeline_parallel/multiparameter_schedules.py create mode 100644 model/train/yoco_moe/mindspeed/core/pipeline_parallel/p2p_communication.py create mode 100644 model/train/yoco_moe/mindspeed/core/pipeline_parallel/ripipe_schedules.py create mode 100644 model/train/yoco_moe/mindspeed/core/pipeline_parallel/schedules.py create mode 100644 model/train/yoco_moe/mindspeed/core/simple_parallel_cfg.py create mode 100644 model/train/yoco_moe/mindspeed/core/singleton_meta.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/ascend_turbo_cfg.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/initialize.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/mc2_linears_seq_parallel.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/checkpoint_manager.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/comm_autograd_function.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/comm_group_api.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/comm_utils.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/cross_entropy.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/layers.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_all_reduce.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_all_reduce_fused.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_sequence_parallel.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_sequence_parallel_fused.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_utils.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/matmul_soc_friendly.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/min_comm_cfg.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/rewrite_parallel_linears_all_reduce.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/rewrite_parallel_linears_sequence_parallel.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/user_config.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/mapping.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/random.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/layernorm_2d.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/linear_2d_moe_split_along_first_dim.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/linear_2d_split_along_first_dim.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/norm_factory.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/parallel_linear_2d.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/rms_norm_2d.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/adaptor.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/unaligned_column_parallel_linear.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/unaligned_row_parallel_linear.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/unaligned_utils.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel_x_union_cp.py create mode 100644 model/train/yoco_moe/mindspeed/core/tensor_parallel_y_union_cp.py create mode 100644 model/train/yoco_moe/mindspeed/core/training.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/attention.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/custom_layers/transformer_engine.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/dot_product_attention.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/mlp.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/module.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/comm_utils.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/experts.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/grouped_gemm_util.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/grouped_mlp_with_comp_and_comm_overlap_all2all.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/grouped_mlp_with_comp_and_comm_overlap_allgather.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/layers.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/moe_layer.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/moe_layer_overlap_all2all.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/moe_layer_overlap_allgather.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/moe_utils.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/router.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/token_dispatcher.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/grouped_mlp_2d.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/moe_allgather_token_dispatcher_2d.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/moe_layer_2d.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/sequential_mlp_2d.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/topk_router_2d.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/moe/unpermute_without_activation.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/transformer.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/transformer_block.py create mode 100644 model/train/yoco_moe/mindspeed/core/transformer/transformer_config.py create mode 100644 model/train/yoco_moe/mindspeed/core/weight_grad_store.py create mode 100644 model/train/yoco_moe/mindspeed/features_manager/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/features_manager/feature.py create mode 100644 model/train/yoco_moe/mindspeed/features_manager/functional/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/features_manager/functional/profiler_default_feature.py create mode 100644 model/train/yoco_moe/mindspeed/features_manager/tensor_parallel/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/features_manager/tensor_parallel/unaligned_linear_feature.py create mode 100644 model/train/yoco_moe/mindspeed/functional/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/functional/profiler/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/functional/profiler/profiler_info.py create mode 100644 model/train/yoco_moe/mindspeed/initialize.py create mode 100644 model/train/yoco_moe/mindspeed/megatron_adaptor.py create mode 100644 model/train/yoco_moe/mindspeed/model/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/model/alibi_mask.py create mode 100644 model/train/yoco_moe/mindspeed/model/language_model.py create mode 100644 model/train/yoco_moe/mindspeed/model/transformer.py create mode 100644 model/train/yoco_moe/mindspeed/moe/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/moe/ampipe/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/moe/ampipe/ampipe.py create mode 100644 model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_args.py create mode 100644 model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_async_communication.py create mode 100644 model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_bias_dropout_add_ln_computer.py create mode 100644 model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_fa.py create mode 100644 model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_fa_computer.py create mode 100644 model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_moe_gating_computer.py create mode 100644 model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_moe_mlp_computer.py create mode 100644 model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_post_mlp_computer.py create mode 100644 model/train/yoco_moe/mindspeed/moe/async_comm_utils.py create mode 100644 model/train/yoco_moe/mindspeed/moe/config.py create mode 100644 model/train/yoco_moe/mindspeed/moe/experts.py create mode 100644 model/train/yoco_moe/mindspeed/moe/gate.py create mode 100644 model/train/yoco_moe/mindspeed/moe/mixtral_parallel_mlpbm.py create mode 100644 model/train/yoco_moe/mindspeed/moe/moe.py create mode 100644 model/train/yoco_moe/mindspeed/moe/moe_layer.py create mode 100644 model/train/yoco_moe/mindspeed/moe/pipe_experts.py create mode 100644 model/train/yoco_moe/mindspeed/moe/utils.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/conv3d/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/conv3d/conv3d_depth_parallel.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/checkpointing.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/communication/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/communication/dist_communication.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/communication/dist_ranks_match.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/config/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/config/dist_train_config.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/inner_data_parallel.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/mappings.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/utils.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/parallel_state.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/pipeline_parallel/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/pipeline_parallel/dist_schedules.py create mode 100644 model/train/yoco_moe/mindspeed/multi_modal/dist_train/training.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/adaptive_cp_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/adaptive_recomputing_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/algorithm_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/atb_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/ffn_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/fused_ema_adamw_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/fusion_attention_v2_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/gmm_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/groupmatmul_add_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/lcal_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/matmul_add_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/memory_fragmentation_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/npu_all_to_all_all_gather_bmm_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/npu_bmm_reduce_scatter_all_to_all_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/npu_dropout_add_layer_norm_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/npu_grouped_mat_mul_all_reduce_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/npu_inplace_mm_all_reduce_add_rms_norm_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/npu_mm_all_reduce_add_rms_norm_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/npu_moe_token_permute_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/npu_moe_token_unpermute_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/npu_ring_attention_update_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/npu_rotary_position_embedding_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/quant_gmm_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/rms_norm_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/smart_swap_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/swiglu_builder.py create mode 100644 model/train/yoco_moe/mindspeed/op_builder/weight_quant_gmm_builder.py create mode 100644 model/train/yoco_moe/mindspeed/ops/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/algorithm/adaptive_cp/adaptive_cp.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/algorithm/algorithm.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/atb/groupmatmul_add.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/atb/inc/atb_adapter.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/atb/lcal_coc.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/atb/matmul_add.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/atb/rms_norm.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/atb/swiglu.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/atb/utils/atb_adapter.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/ffn.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/fusion_attention_v2.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/gmm.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/inc/aclnn_common.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/inc/mc2_utils.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_all_to_all_all_gather_bmm.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_apply_fused_ema_adamw.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_bmm_reduce_scatter_all_to_all.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_dropout_add_layer_norm.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_grouped_mat_mul_all_reduce.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_mm_all_reduce_add_rms_norm.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_mm_all_reduce_add_rms_norm_.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_moe_token_permute.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_moe_token_unpermute.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_ring_attention_update.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_rotary_position_embedding.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/quant_gmm.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/cann/weight_quant_gmm.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/flop_counter/flop_counter.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/flop_counter/flop_counter.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/adaptive_recomputing/NpuCachingCustomAllocator.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/adaptive_recomputing/NpuCachingCustomAllocator.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/CachingAllocatorConfig.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/CachingAllocatorConfig.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Decorator.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Decorator.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/DeviceCachingAllocator.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/DeviceCachingAllocator.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/EventPool.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/EventPool.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/PluggableAllocator.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/PluggableAllocator.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/PluggableAllocatorFunctions.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Recorder.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Recorder.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/common.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/common.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/test.py create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/CachingAllocatorConfig.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/CachingAllocatorConfig.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/DeviceCachingAllocator.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/DeviceCachingAllocator.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/EventPool.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/EventPool.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/NPUSwapManager.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/NPUSwapManager.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/NPUVmmApi.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/PluggableAllocator.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/PluggableAllocator.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/SwapBindings.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/SwapException.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/common.cpp create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/common.h create mode 100644 model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/swap_log.h create mode 100644 model/train/yoco_moe/mindspeed/ops/dropout_add_layer_norm.py create mode 100644 model/train/yoco_moe/mindspeed/ops/dropout_add_rms_norm.py create mode 100644 model/train/yoco_moe/mindspeed/ops/ffn.py create mode 100644 model/train/yoco_moe/mindspeed/ops/fusion_attention_v2.py create mode 100644 model/train/yoco_moe/mindspeed/ops/gmm.py create mode 100644 model/train/yoco_moe/mindspeed/ops/lcal_functional.py create mode 100644 model/train/yoco_moe/mindspeed/ops/npu_all_to_all_all_gather_bmm.py create mode 100644 model/train/yoco_moe/mindspeed/ops/npu_apply_fused_ema_adamw.py create mode 100644 model/train/yoco_moe/mindspeed/ops/npu_bmm_reduce_scatter_all_to_all.py create mode 100644 model/train/yoco_moe/mindspeed/ops/npu_grouped_mat_mul_all_reduce.py create mode 100644 model/train/yoco_moe/mindspeed/ops/npu_groupmatmul_add.py create mode 100644 model/train/yoco_moe/mindspeed/ops/npu_matmul_add.py create mode 100644 model/train/yoco_moe/mindspeed/ops/npu_mm_all_reduce_add_rms_norm.py create mode 100644 model/train/yoco_moe/mindspeed/ops/npu_mm_all_reduce_add_rms_norm_.py create mode 100644 model/train/yoco_moe/mindspeed/ops/npu_moe_token_permute.py create mode 100644 model/train/yoco_moe/mindspeed/ops/npu_moe_token_unpermute.py create mode 100644 model/train/yoco_moe/mindspeed/ops/npu_ring_attention_update.py create mode 100644 model/train/yoco_moe/mindspeed/ops/npu_rotary_position_embedding.py create mode 100644 model/train/yoco_moe/mindspeed/ops/quant_gmm.py create mode 100644 model/train/yoco_moe/mindspeed/ops/weight_quant_gmm.py create mode 100644 model/train/yoco_moe/mindspeed/optimizer/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/optimizer/adamw.py create mode 100644 model/train/yoco_moe/mindspeed/optimizer/distrib_optimizer.py create mode 100644 model/train/yoco_moe/mindspeed/optimizer/ema_adamw.py create mode 100644 model/train/yoco_moe/mindspeed/optimizer/optimizer.py create mode 100644 model/train/yoco_moe/mindspeed/patch_utils.py create mode 100644 model/train/yoco_moe/mindspeed/run/gpt_dataset.patch create mode 100644 model/train/yoco_moe/mindspeed/run/helpers.patch create mode 100644 model/train/yoco_moe/mindspeed/run/initialize.patch create mode 100644 model/train/yoco_moe/mindspeed/run/run.py create mode 100644 model/train/yoco_moe/mindspeed/tokenizer/__init__.py create mode 100644 model/train/yoco_moe/mindspeed/tokenizer/tokenizer.py create mode 100644 model/train/yoco_moe/mindspeed/training.py create mode 100644 model/train/yoco_moe/mindspeed/utils.py create mode 100644 model/train/yoco_moe/mindspeed/yaml_arguments.py create mode 100644 model/train/yoco_moe/requirements.txt create mode 100644 model/train/yoco_moe/setup.py create mode 100644 model/train/yoco_moe/sources/images/FBW.png create mode 100644 model/train/yoco_moe/sources/images/activation_function_a.png create mode 100644 model/train/yoco_moe/sources/images/activation_function_b.png create mode 100644 model/train/yoco_moe/sources/images/adaptive_memory_a.png.png create mode 100644 model/train/yoco_moe/sources/images/adaptive_recompute_a.png create mode 100644 model/train/yoco_moe/sources/images/adaptive_recompute_b.png create mode 100644 model/train/yoco_moe/sources/images/algo_tp_comm_optimize_a.png create mode 100644 model/train/yoco_moe/sources/images/algo_tp_comm_optimize_b.png create mode 100644 model/train/yoco_moe/sources/images/alibi.png create mode 100644 model/train/yoco_moe/sources/images/ampipe.png create mode 100644 model/train/yoco_moe/sources/images/async_ddp.png create mode 100644 model/train/yoco_moe/sources/images/async_ddp_param_gather_a.png create mode 100644 model/train/yoco_moe/sources/images/async_ddp_param_gather_b.png create mode 100644 model/train/yoco_moe/sources/images/async_ddp_param_gather_c.png create mode 100644 model/train/yoco_moe/sources/images/auto_parallel_1.png create mode 100644 model/train/yoco_moe/sources/images/auto_parallel_2.png create mode 100644 model/train/yoco_moe/sources/images/auto_parallel_mm_1.PNG create mode 100644 model/train/yoco_moe/sources/images/auto_parallel_mm_2.PNG create mode 100644 model/train/yoco_moe/sources/images/auto_pipeline_parallel.png create mode 100644 model/train/yoco_moe/sources/images/conv3d_sequence_parallel.png create mode 100644 model/train/yoco_moe/sources/images/flash_attention.png create mode 100644 model/train/yoco_moe/sources/images/hierarchical_alltoallv_1.png create mode 100644 model/train/yoco_moe/sources/images/hierarchical_alltoallv_2.png create mode 100644 model/train/yoco_moe/sources/images/logo.png create mode 100644 model/train/yoco_moe/sources/images/megatron_tp.png create mode 100644 model/train/yoco_moe/sources/images/moe_dynamic_padding_a.png create mode 100644 model/train/yoco_moe/sources/images/moe_dynamic_padding_b.png create mode 100644 model/train/yoco_moe/sources/images/moe_dynamic_padding_c.png create mode 100644 model/train/yoco_moe/sources/images/moe_dynamic_padding_d.png create mode 100644 model/train/yoco_moe/sources/images/moe_dynamic_padding_e.png create mode 100644 model/train/yoco_moe/sources/images/moe_experts_pipeline_degree.png create mode 100644 model/train/yoco_moe/sources/images/nanopipe.png create mode 100644 model/train/yoco_moe/sources/images/nanopipe_v2.png create mode 100644 model/train/yoco_moe/sources/images/nd_matmul.png create mode 100644 model/train/yoco_moe/sources/images/pipedream1F1B.png create mode 100644 model/train/yoco_moe/sources/images/pipeline_experts.png create mode 100644 model/train/yoco_moe/sources/images/reuse_fp32_param_a.png create mode 100644 model/train/yoco_moe/sources/images/reuse_fp32_param_b.png create mode 100644 model/train/yoco_moe/sources/images/ripipe_a.png create mode 100644 model/train/yoco_moe/sources/images/ripipe_b.png create mode 100644 model/train/yoco_moe/sources/images/sequence-parallel.png create mode 100644 model/train/yoco_moe/sources/images/shared-experts.png create mode 100644 model/train/yoco_moe/sources/images/smart_swap_flowchart.png create mode 100644 model/train/yoco_moe/sources/images/swap_attention.png create mode 100644 model/train/yoco_moe/sources/images/swap_attention1.png create mode 100644 model/train/yoco_moe/sources/images/swap_attention2.png create mode 100644 model/train/yoco_moe/sources/images/swap_attention3.png create mode 100644 model/train/yoco_moe/sources/images/tensor-parallel-2d.png create mode 100644 model/train/yoco_moe/sources/images/virtual-pipeline.PNG create mode 100644 model/train/yoco_moe/tests_extend/README.md create mode 100644 model/train/yoco_moe/tests_extend/commons.py create mode 100644 model/train/yoco_moe/tests_extend/conftest.py create mode 100644 model/train/yoco_moe/tests_extend/model_tests/benchmark_model/gpt_175b/pretrain_gpt3_175B_128k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/benchmark_model/gpt_175b/pretrain_gpt3_175B_32k_cp2.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/benchmark_model/gpt_175b/pretrain_gpt3_175B_32k_cp4.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/benchmark_model/gpt_175b/pretrain_gpt3_175B_8k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/benchmark_model/gpt_moe/pretrain_gpt_moe_128k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/benchmark_model/gpt_moe/pretrain_gpt_moe_32k_cp2.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/benchmark_model/gpt_moe/pretrain_gpt_moe_32k_cp4.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/benchmark_model/gpt_moe/pretrain_gpt_moe_8k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/benchmark_model/llama2/pretrain_llama2_70b_phd_128k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/benchmark_model/llama2/pretrain_llama2_70b_phd_32k_cp2.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/benchmark_model/llama2/pretrain_llama2_70b_phd_32k_cp4.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/benchmark_model/llama2/pretrain_llama2_70b_phd_4k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/mfu_model/pretrain_gpt_moe_128k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/mfu_model/pretrain_llama2_70b_128k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/perf_model/gpt_175b/pretrain_gpt3_175B_128k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/perf_model/gpt_175b/pretrain_gpt3_175B_32k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/perf_model/gpt_175b/pretrain_gpt3_175B_8k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/perf_model/gpt_moe/pretrain_gpt_moe_128k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/perf_model/gpt_moe/pretrain_gpt_moe_32k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/perf_model/gpt_moe/pretrain_gpt_moe_8k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/perf_model/llama2/pretrain_llama2_70b_128k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/perf_model/llama2/pretrain_llama2_70b_32k.sh create mode 100644 model/train/yoco_moe/tests_extend/model_tests/perf_model/llama2/pretrain_llama2_70b_4k.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/core_model/README.md create mode 100644 model/train/yoco_moe/tests_extend/system_tests/core_model/gpt-usecase.yaml create mode 100644 model/train/yoco_moe/tests_extend/system_tests/core_model/gpt-usecase_adaptive_memory.yaml create mode 100644 model/train/yoco_moe/tests_extend/system_tests/core_model/gpt-usecase_fp32.yaml create mode 100644 model/train/yoco_moe/tests_extend/system_tests/core_model/pretrain_gpt_usecase.py create mode 100644 model/train/yoco_moe/tests_extend/system_tests/core_model/pretrain_gpt_usecase.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/core_model/pretrain_gpt_usecase_adaptive_memory.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/core_model/pretrain_gpt_usecase_fp32.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/env_npu.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_precision_guarding/README.md create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_precision_guarding/fpg_llama_usecase.yaml create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_precision_guarding/llama_param_cvt.py create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_precision_guarding/llama_param_cvt.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_precision_guarding/pretrain_fpg_llama.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_precision_guarding/pretrain_gpt_fpg.py create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_tests/deepseek_mla.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_tests/deepseek_moe.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_tests/deepspeed_moe.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_tests/hybrid.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_tests/megatron_moe.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_tests/nanopipe.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_tests/ring_attention.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_tests/tp2d.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_tests/tp2d_grouped_mlp_megatron_moe.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_tests/tp2d_sequential_mlp_megatron_moe.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_tests/ulysses.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_tests/ulysses_allgather.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/feature_tests/unaligned_tpsp.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/gpt/pretrain_gpt_deepspeed_moe_128k.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/gpt/pretrain_gpt_megatron_moe_8k.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/llama2/pretrain_llama2_70B_128k_tp2_cp4_hybrid.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/llama2/pretrain_llama2_70B_128k_tp2_cp4_megatron.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/llama2/pretrain_llama2_70B_32k_tp2_pp2_vpp1_cp2_ulysses_resume.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/llama2/pretrain_llama2_70B_4k_tp2_pp2_vpp1_dp2.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/llama2/pretrain_llama2_70B_8k_tp8_x4_y2_2d.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/llama2/pretrain_llama_pp2_dp4.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/llama3/README.md create mode 100644 model/train/yoco_moe/tests_extend/system_tests/llama3/pretrain_llama3_8b_ptd.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/llama3/pretrain_llama3_8b_ptd_adaptive_cp.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/mixtral/README.md create mode 100644 model/train/yoco_moe/tests_extend/system_tests/mixtral/pretrain_mixtral.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/multi_modal/multi_parameter_pipeline/pretrain_multi_parameter_pipeline_test.py create mode 100644 model/train/yoco_moe/tests_extend/system_tests/multi_modal/multi_parameter_pipeline/pretrain_multi_parameter_pipeline_test.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/opensora1.0/16x256x256.py create mode 100644 model/train/yoco_moe/tests_extend/system_tests/opensora1.0/pretrain_opensora.py create mode 100644 model/train/yoco_moe/tests_extend/system_tests/opensora1.0/pretrain_opensora.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/pretrain_base.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/pretrain_skip_train.sh create mode 100644 model/train/yoco_moe/tests_extend/system_tests/yaml_args_example/README.md create mode 100644 model/train/yoco_moe/tests_extend/system_tests/yaml_args_example/example.yaml create mode 100644 model/train/yoco_moe/tests_extend/system_tests/yaml_args_example/pretrain_yaml_args.sh create mode 100644 model/train/yoco_moe/tests_extend/tools/data_handler.py create mode 100644 model/train/yoco_moe/tests_extend/tools/preprocess_data.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/__init__.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/common.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/activation_function_recomputing/test_activation_function_recompute.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/adaptive_memory/__init__.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/adaptive_memory/test_adaptive_memory_apply.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/adaptive_memory/test_adaptive_memory_cache.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/adaptive_memory/test_adaptive_memory_policy.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/adaptive_memory/test_adaptive_memory_prefetch.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/adaptive_memory/test_adaptive_memory_profiling.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/adaptive_memory/test_adaptive_memory_solver.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/adaptive_recomputing/test_adaptive_recompute_allocator_adpator.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/adaptive_recomputing/test_adaptive_recompute_apply.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/adaptive_recomputing/test_adaptive_recompute_hook.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/adaptive_recomputing/test_adaptive_recomputing.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/adaptive_recomputing/test_swap_function.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/algorithm/test_reuse_dptr.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/algorithm/test_reuse_optimizer.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/ampipe/test_ampipe_forward.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/ampipe/test_attn_with_cp_for_ampipe.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/mc2/test_mc2columnparallellinear.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/mc2/test_mc2rowparallellinear.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/mc2/test_mcore_mc2columnparallellinear.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/memory_fragmentation/test_caching_allocator_config.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/memory_fragmentation/test_device_caching_allocator.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/memory_fragmentation/test_malloc_recorder.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/memory_fragmentation/test_memory_recorder.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/memory_fragmentation/test_pluggable_allocator_api.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/moe/test_moe_pipeline_experts.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/moe/test_moe_token_rearrange.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/multi_model/dist_train/dist_train_config_utils.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/multi_model/dist_train/test_dist_communication.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/multi_model/dist_train/test_dist_config.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/multi_model/dist_train/test_dist_ranks_match.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/multi_model/test_fused_ema_adamw.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/multi_model/test_sequence_parallel_conv3d.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/recompute_independent_pipelining/test_checkpoint_function.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/recompute_independent_pipelining/test_checkpoint_manager.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/smart_swap/test_smart_swap.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/swap_attention/test_swap_attention.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/test_noop_layers.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/test_norm_recompute.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/test_replace_gloo_communication.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/test_tflops.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/test_weight_grad_store.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/features/unaligned_linear/test_unaligned_linear.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/megatron/test_cp_ep_loss.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/megatron/test_cross_entropy.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/megatron/test_distrib_optimizer.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/megatron/test_dot_product_attention.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/megatron/test_overlap_grad_reduce.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/megatron/test_overlap_param_gather.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/megatron/test_parallel_state_ep_with_cp.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/megatron/test_recompute.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/mindspeed/core/context_parallel/test_adaptive_context_parallel.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/mindspeed/core/context_parallel/test_hybrid_context_parallel.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/mindspeed/core/context_parallel/test_mapping.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/mindspeed/core/context_parallel/test_ringattn_context_parallel.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/mindspeed/core/context_parallel/test_ringattn_context_parallel_eodbase.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/mindspeed/core/context_parallel/test_ringattn_context_parallel_tnd.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/mindspeed/core/context_parallel/test_ulysses_context_parallel.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/mindspeed/core/tensor_parallel/test_layernorm2d.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/mindspeed/core/tensor_parallel/test_rms_norm2d.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/mindspeed/test_register_patches.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/atb/test_groupmatmul_add.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/atb/test_lcal_coc.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/atb/test_matmul_add.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_all_to_all_all_gather_bmm.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_apply_fused_ema_adamw.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_bmm_reduce_scatter_all_to_all.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_dropout_add_layer_norm.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_ffn.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_fused_moe_token_permute_and_unpermute.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_fused_rotary_pos_embedding.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_fusion_attention.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_gmm.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_grouped_mat_mul_all_reduce.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_kernel_rotary_position_embedding.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_mm_all_reduce_add_rms_norm.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_mm_all_reduce_add_rms_norm_.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_quant_gmm.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_ring_attention_update.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_rms_norm.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_swiglu.py create mode 100644 model/train/yoco_moe/tests_extend/unit_tests/ops/cann/test_npu_weight_quant_gmm.py diff --git a/model/train/yoco_moe/.gitignore b/model/train/yoco_moe/.gitignore new file mode 100644 index 000000000..ce476ed08 --- /dev/null +++ b/model/train/yoco_moe/.gitignore @@ -0,0 +1,5 @@ +*.pyc +.idea/ +.vscode/ +.pytest_cache/ +*.egg-info/ diff --git a/model/train/yoco_moe/LICENSE b/model/train/yoco_moe/LICENSE new file mode 100644 index 000000000..762244cf2 --- /dev/null +++ b/model/train/yoco_moe/LICENSE @@ -0,0 +1,290 @@ +The following applies to all files unless otherwise noted; +Copyright (c) 2024, Bytedance Inc. +Copyright (c) 2023, Huawei Technologies Co., Ltd +Copyright (c) 2022, NVIDIA CORPORATION. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-- + +This repository also contains code from Microsoft (from their DeepSpeed +project). Files from these organization(s) have notices at the top of each file. +Below are licenses used in those files, as indicated. + + +----------------------------- LICENSE FOR Microsoft code, Facebook, huggingface and Google Research code ----------------------------- + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +------------- LICENSE FOR various code from Facebook -------------- + +MIT License + +Copyright (c) Facebook, Inc. and its affiliates. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +------------- LICENSE FOR Mircrosoft Swin transformer code -------------- + +MIT License + +Copyright (c) Microsoft Corporation. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE diff --git a/model/train/yoco_moe/OWNERS b/model/train/yoco_moe/OWNERS new file mode 100644 index 000000000..d8969ab07 --- /dev/null +++ b/model/train/yoco_moe/OWNERS @@ -0,0 +1,36 @@ +approvers: +- gp513 +- qingfenxiaochong +- clinglai +- bingobb +- jializheng +reviewers: +- gp513 +- qingfenxiaochong +- pxc911 +- sanjaychan +- jializheng +- zhizaidicengshehua +- li-baokui +- arch-devil +- fishhhqi +- ryanyeee +- glhyy +- bingobb +- mingzhenwang1 +- wangzw1022 +- aijgnem +- clinglai +- ckirchhoff +- liu-tianlong +- akstt +- yu-xinjie62 +- yangyuan667 +- veronica-lam +- kong-deyuan +- zhao-yifan27 +- wang-xiaochao23 +- harygo2 +- ChenDonYY +- wenjiang2357 +- z0020952 diff --git a/model/train/yoco_moe/README.md b/model/train/yoco_moe/README.md new file mode 100644 index 000000000..b6213b8f9 --- /dev/null +++ b/model/train/yoco_moe/README.md @@ -0,0 +1,948 @@ +

+ +

+ + + + Badge + + + Documentation + + + + +

+ +# 简介 + +MindSpeed 是针对华为[昇腾设备](https://www.hiascend.com/)的大模型加速库。 + +大模型训练是一种非常复杂的过程,涉及到许多技术和挑战,其中大模型训练需要大量的显存资源是一个难题,对计算卡提出了不小的挑战。 +为了在单个计算卡显存资源不足时,可以通过多张计算卡进行计算,业界出现了类似 Megatron、DeepSpeed 等第三方大模型加速库,对模型、输入数据等进行切分并分配到不同的计算卡上,最后再通过集合通信对结果进行汇总。 + +昇腾提供 MindSpeed 加速库,使能客户大模型业务快速迁移至昇腾设备,并且支持昇腾专有算法,确保开箱可用。 + +--- +# 安装 + +### 1. 安装依赖 + +☀️ 在安装 **MindSpeed** 之前,请参考[版本配套表](#版本配套表),安装最新昇腾软件栈。 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
依赖软件软件安装指南
昇腾NPU驱动驱动固件安装指南
昇腾NPU固件
Toolkit(开发套件)CANN 软件安装指南
Kernel(算子包)
NNAL(Ascend Transformer Boost加速库)
PyTorchAscend Extension for PyTorch 配置与安装
torch_npu插件
apex
+ + + + +### 2. 安装 MindSpeed + +☀️ 下载源码安装: + + ```shell + git clone -b core_r0.8.0 https://gitee.com/ascend/MindSpeed.git + pip install -e MindSpeed + ``` + +如需使用Ascend Transformer Boost(ATB)加速库算子,请先安装 CANN-NNAL 并初始化添加环境,例如: + ```shell +# CANN-NNAL默认安装路径为:/usr/local/Ascend/nnal +# 运行CANN-NNAL默认安装路径下atb文件夹中的环境配置脚本set_env.sh +source /usr/local/Ascend/nnal/atb/set_env.sh + ``` + +### 3. 获取 Megatron-LM 并指定分支 + +☀️ 获取并切换 Megatron-LM 版本至 core_r0.8.0 的release版本,如下所示: + ```shell + git clone https://github.com/NVIDIA/Megatron-LM.git + cd Megatron-LM + git checkout core_r0.8.0 + ``` + +# 快速上手 + +1. 仅仅一行代码就可以轻松使能 MindSpeed 的各项功能。以 GPT 模型为例:在 Megatron-LM 目录下修改`pretrain_gpt.py`文件,在`import torch`下新增一行:`import mindspeed.megatron_adaptor`,即如下修改: + + ```diff + import os + import torch + +import mindspeed.megatron_adaptor + from functools import partial + from typing import Union + ``` + +2. 在 Megatron-LM 目录下,准备好训练数据,并在示例脚本中填写对应路径,然后执行。 + ```shell + bash examples/gpt3/train_gpt3_175b_distributed.sh + ``` +--- +# 自定义优化级别 +MindSpeed 提供了多层次的优化解决方案,并划分为三个层级,用户可根据实际需求灵活启用任意层级。高层级兼容低层级的能力,确保了整个系统的稳定性和扩展性。 +用户可以通过设置启动脚本中的 `--optimization-level {层级}` 参数来自定义开启的优化层级。该参数支持以下配置: + + + + + + + + + + + + + + + + + + + + + + + + + +
层级层级名称介绍
0 基础兼容层提供Megatron-LM框架对NPU的支持,确保无缝集成。该层包含基础功能集patch,保证可靠性和稳定性,为高级优化奠定基础。
1 亲和性增强层🔥兼容L0能力,集成高性能融合算子库,结合昇腾亲和的计算优化,充分释放昇腾算力,显著提升计算效率。
2 自研加速算法层🔥🔥默认值。该模式兼容了L1, L0能力,并集成了昇腾多项自主研发核心技术成果,可提供全面的性能优化。
+ + +# 特性介绍 +MindSpeed 特性由七大模块组成,分别为:megetron特性支持、并行策略特性、内存优化特性、亲和计算特性、通信优化特性、关键场景特性以及多模态特性。其中【Released】表示是否商用发布,原型特性为非商用发布。 + +- 特性的介绍中说明了对应特性的应用场景及使用说明。一般而言,在脚本中加入相关参数即可轻松使用对应特性。🛰️ + +- 当前大模型训练主要使用bf16数据类型,以下特性若无特殊声明原则上兼容fp16, 如遇到fp16场景下相关问题请联系 MindSpeed 团队或提交issue, 我们会快速响应。🛰️ +## Megatron特性支持 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
特性名称介绍McoreLegacy
Megatron 数据并行link
Megatron 张量并行link
Megatron 流水并行link
Megatron 虚拟流水并行link
Megatron 分布式优化器link
Megatron 序列并行link
Megatron 异步DDPlink
Megatron 权重更新通信隐藏 link
Megatron 重计算link
+ + +## 并行策略特性 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
特性名称介绍McoreLegacyReleased
Ascend Ulysses 长序列并行link
Ascend Ring Attention 长序列并行link
Ascend 混合长序列并行link
Ascend 自定义空操作层link
+ +## 内存优化特性 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
特性名称介绍McoreLegacyReleased
Ascend 自适应选择重计算 link
Ascend 激活函数重计算 link
Ascend 重计算流水线独立调度 link
Ascend Mask归一link
Ascend BF16 参数副本复用link
Ascend swap_attentionlink
Ascend Norm重计算link
Ascend Hccl Buffer 自适应link
+ + +## 亲和计算特性 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
特性名称介绍McoreLegacyReleased
Ascend rms_norm 融合算子 link
Ascend swiglu 融合算子 link
Ascend rotary_embedding 融合算子 link
Ascend flash attentionlink
Ascend Moe Token Permute and Unpermute 融合算子link
Ascend npu_matmul_add_fp32 梯度累加融合算子link
Ascend Moe BMM通算融合算子link
Ascend 计算通信并行优化link
Ascend MC2link
Ascend fusion_attention_v2 link
+ + +## 通信优化特性 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
特性名称介绍McoreLegacyReleased
Ascend nano-pipe流水线并行 link
Ascend Gloo 存档落盘优化 link
Ascend 高维张量并行 link
+ +## Mcore MoE特性 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
特性名称介绍McoreLegacyReleased
Ascend Megatron MoE GMM link
Ascend Megatron MoE Allgather Dispatcher 性能优化 link
Ascend Megatron MoE Alltoall Dispatcher 性能优化 link
Ascend Megatron MoE TP拓展EP link
Ascend 共享专家 link
Ascend Megatron MoE 负载感知内存均衡算 link
Ascend Megatron MoE 分层通信 link
Ascend Megatron MoE 大专家流水 link
+ + +## DeepSpeed MoE特性 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
特性名称介绍McoreLegacyReleased
DeepSpeed MoE link
Ascend DeepSpeed MoE token 重排性能优化 link
Ascend DeepSpeed MoE dropless 性能优化 link
Ascend MLP 通信隐藏 link
Ascend Ampipe流水通信隐藏 link
+ +## 关键场景特性 + + + + + + + + + + + + + + + + + + + + + + + + + +
特性名称介绍McoreLegacyReleased
Ascend EOD Reset训练场景 link
Ascend alibi link
+ +## 多模态特性 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
特性名称介绍McoreLegacyReleased
Ascend fused ema adamw优化器 link
Ascend PP支持动态形状link
Ascend PP支持多参数传递link
Ascend PP支持多参数传递和动态形状link
+ +## 其它特性 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
特性名称介绍McoreLegacyReleased
Ascend TFLOPS计算 link
Ascend Auto Tuning 并行策略自动搜索系统 link
Ascend 确定性计算 link
+ + +## 自定义算子 + + +部分自定义算子设置为公开接口,公开接口设置说明请参照 MindSpeed 安全声明中的[公开接口声明](SECURITYNOTE.md#公开接口声明),具体对外接口细节参照以下算子对应的手册链接。 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
自定义算子名称介绍Released
npu_dropout_add_layer_norm link
npu_rotary_position_embedding link
fusion_attention link
rms_norm link
swiglu link
npu_mm_all_reduce_add_rms_norm link
npu_mm_all_reduce_add_rms_norm_ link
npu_gmm link
npu_grouped_mat_mul_all_reduce link
lcal_coc link
ffn link
npu_fused_moe_token_permute link
npu_fused_moe_token_unpermute link
npu_ring_attention_update link
npu_matmul_add_fp32 link
npu_groupmatmul_add_fp32 link
npu_all_to_all_all_gather_bmm link
npu_bmm_reduce_scatter_all_to_all link
quant_gmm link
npu_apply_fused_ema_adamw link
+ +--- +# MindSpeed 中采集Profile数据 + +📝 MindSpeed 支持命令式开启Profile采集数据,命令配置介绍如下: + +| 配置命令 | 命令含义 | +|-------------------------|-----------------------------------------------------------------------------------| +| --profile | 打开profile开关 | +| --profile-step-start | 配置开始采集步,未配置时默认为10, 配置举例: --profile-step-start 30 | +| --profile-step-end | 配置结束采集步,未配置时默认为12, 配置举例: --profile-step-end 35 | +| --profile-level | 配置采集等级,未配置时默认为level0, 可选配置: level0, level1, level2, 配置举例: --profile-level level1 | +| --profile-with-cpu | 打开cpu信息采集开关 | +| --profile-with-stack | 打开stack信息采集开关 | +| --profile-with-memory | 打开memory信息采集开关,配置本开关时需打开--profile-with-cpu | +| --profile-record-shapes | 打开shapes信息采集开关 | +| --profile-save-path | 配置采集信息保存路径, 未配置时默认为./profile_dir, 配置举例: --profile-save-path ./result_dir | +| --profile-ranks | 配置待采集的ranks,未配置时默认为-1,表示采集所有rank的profiling数据,配置举例: --profile-ranks 0 1 2 3, 需注意: 该配置值为每个rank在单机/集群中的全局值 | + +--- +# 版本配套表 + +💡 **PyTorch Extension**版本号采用`{PyTorch版本}-{昇腾版本}`命名规则,前者为**PyTorch Extension**匹配的PyTorch版本,后者用于匹配CANN版本,详细匹配如下: + +| MindSpeed版本 | Megatron版本 | PyTorch版本 | torch_npu版本 | CANN版本 | Python版本 | 硬件型态 | +|-------------------------|-----------------|------------- |-------------|---------|----------------------------------------|----------| +| master(主线) | Core 0.8.0 | 2.1.0 | 在研版本 | 在研版本 | Python3.8.x, Python3.9.x, Python3.10.x | Atlas 200T A2 Box16, Atlas 800T A2, Atlas 900 A2 PODc | +| core_r0.7.0(主线) | Core 0.7.0 | 2.1.0 | 在研版本 | 在研版本 | Python3.8.x, Python3.9.x, Python3.10.x | Atlas 200T A2 Box16, Atlas 800T A2, Atlas 900 A2 PODc | +| core_r0.6.0(主线) | Core 0.6.0 | 2.1.0 | 在研版本 | 在研版本 | Python3.8.x, Python3.9.x, Python3.10.x | Atlas 200T A2 Box16, Atlas 800T A2, Atlas 900 A2 PODc | +| 1.0.0_core_r0.7.0(商用) | Core 0.7.0 | 2.1.0 | 6.0.0 | 8.0.0 | Python3.8.x, Python3.9.x, Python3.10.x | Atlas 200T A2 Box16, Atlas 800T A2, Atlas 900 A2 PODc | +| 1.0.0_core_r0.6.0(商用) | Core 0.6.0 | 2.1.0 | 6.0.0 | 8.0.0 | Python3.8.x, Python3.9.x, Python3.10.x | Atlas 200T A2 Box16, Atlas 800T A2, Atlas 900 A2 PODc | +| 1.0.RC3_core_r0.7.0(商用) | Core 0.7.0 | 2.1.0 | 6.0.RC3 | 8.0.RC3 | Python3.8.x, Python3.9.x, Python3.10.x | Atlas 200T A2 Box16, Atlas 800T A2, Atlas 900 A2 PODc | +| 1.0.RC3_core_r0.6.0(商用) | Core 0.6.0 | 2.1.0 | 6.0.RC3 | 8.0.RC3 | Python3.8.x, Python3.9.x, Python3.10.x | Atlas 200T A2 Box16, Atlas 800T A2, Atlas 900 A2 PODc | +| 1.0.RC2(商用) | Core 0.6.0 | 2.1.0 | 6.0.RC2 | 8.0.RC2 | Python3.8.x, Python3.9.x, Python3.10.x | Atlas 200T A2 Box16, Atlas 800T A2, Atlas 900 A2 PODc | +| 1.0.RC1(商用) | commitid bcce6f | 2.1.0 | 6.0.RC1 | 8.0.RC1 | Python3.8.x, Python3.9.x, Python3.10.x | Atlas 200T A2 Box16, Atlas 800T A2, Atlas 900 A2 PODc | + +[昇腾辅助软件](https://gitee.com/ascend/pytorch#%E6%98%87%E8%85%BE%E8%BE%85%E5%8A%A9%E8%BD%AF%E4%BB%B6)中有更多关于PyTorch和CANN的版本信息。 + +# 分支维护策略 + +🛠️ MindSpeed 版本分支的维护阶段如下: + +| **状态** | **时间** | **说明** | +| ------------------- | -------- |----------------------------------------------------------------------| +| 计划 🕐 | 1-3 个月 | 计划特性 | +| 开发 🕔 | 3 个月 | 开发特性 | +| 维护 🕚 | 6-12 个月| 合入所有已解决的问题并发布版本,针对不同的MindSpeed 版本采取不同的维护策略,常规版本和长期支持版本维护周期分别为6个月和12个月 | +| 无维护 🕛 | 0-3 个月 | 合入所有已解决的问题,无专职维护人员,无版本发布 | +| 生命周期终止(EOL)🚫 | N/A | 分支不再接受任何修改 | + +🛠️ MindSpeed 版本维护策略: + +| **MindSpeed版本** | **维护策略** | **当前状态** | **发布时间** | **后续状态** | **EOL日期** | +|---------------------|-----------|---------|------------|--------------------|-----------| +| 1.0.0_core_r0.7.0 | 常规版本 | 开发 | 2024/12/30 | 预计2025/6/30起无维护 | | +| 1.0.0_core_r0.6.0 | 常规版本 | 开发 | 2024/12/30 | 预计2025/6/30起无维护 | | +| 1.0.RC3_core_r0.7.0 | 常规版本 | 维护 | 2024/09/30 | 预计2025/3/30起无维护 | | +| 1.0.RC3_core_r0.6.0 | 常规版本 | 维护 | 2024/09/30 | 预计2025/3/30起无维护 | | +| 1.0.RC2 | 常规版本 | 维护 | 2024/06/30 | 预计2024/12/30起无维护 | | +| 1.0.RC1 | 常规版本 | 停止维护 | 2024/03/30 | 2024/9/30起无维护 | | + +--- + +# 常见问题 + +| 现象 | 介绍 | +|------------------------------------|---------------------------------------| +| Data helpers 数据预处理出错 ❗ | [link](docs/faq/data_helpers.md) | +| Torch extensions 编译卡住 ❗ | [link](docs/faq/torch_extensions.md) | +| megatron0.7.0版本长稳测试出现grad norm为nan ❗| [link](docs/faq/megatron070_grad_norm_nan.md) | +| Gloo建链失败Gloo connectFullMesh failed with ... ❗| [link](docs/features/hccl-replace-gloo.md) | + +# 技术文章 +- [MindSpeed 加速百万级超长序列大模型训练](https://mp.weixin.qq.com/s/8q4MxCkosLn0yoneuxzynw) 🚀🚀 +- [MindSpeed 加速万亿MoE大模型训练](https://mp.weixin.qq.com/s/HQRzYzSUNNMonv5d1AP0OQ) 🚀🚀 +- [大模型训练内存优化难?MindSpeed 帮你来支招](https://mp.weixin.qq.com/s/lwjVgM67hwsgtOKp06zYPg) 🚀🚀 + +# 安全声明 + +⚠️ [MindSpeed 安全声明](SECURITYNOTE.md) + +--- + +# 致谢 + +🔎 MindSpeed-Core 由华为公司的下列部门联合贡献 : + +华为公司: + +- 昇腾计算产品部 +- 计算算法部 +- 计算软件平台部 +- 计算技术开发部 +- 公共开发部:NAIE +- 网络技术实验室 + +此外,MindSpeed-Core 感谢以下团队对项目的贡献: + +- 微信基础架构团队 + +感谢来自社区的每一个PR,欢迎贡献 MindSpeed-Core! diff --git a/model/train/yoco_moe/SECURITYNOTE.md b/model/train/yoco_moe/SECURITYNOTE.md new file mode 100644 index 000000000..d909e54e1 --- /dev/null +++ b/model/train/yoco_moe/SECURITYNOTE.md @@ -0,0 +1,116 @@ +# MindSpeed 安全声明 + +## 系统安全加固 + +- 用户可在运行系统配置时开启 ASLR(级别2)以提高系统安全性,保护系统随机化开启。 +可参考以下方式进行配置: + + ``` + echo 2 > /proc/sys/kernel/randomize_va_space + ``` + +## 运行用户建议 + +- 基于安全性考虑,建议您在执行任何命令时,不建议使用root等管理员类型账户执行,遵循权限最小化原则。 + +## 文件权限控制 + +- 建议用户在主机(包括宿主机)及容器中设置运行系统umask值为0027及以上,保障新增文件夹默认最高权限为750,新增文件默认最高权限为640。 +- 建议用户对训练所需文件、训练过程中保存的文件、用户个人的隐私数据、商业资产等敏感文件做好权限控制等安全措施,例如多用户共享数据集场景下的数据集文件写权限控制等,设定的权限建议参考[附录A 文件(夹)各场景权限管控推荐最大值](#A-文件(夹)各场景权限管控推荐最大值)进行设置。 +- MindSpeed 中各类融合算子通过调用 PyTorch 中的 cpp_extension 特性进行编译,编译结果会默认缓存到 `~/.cache/torch_extensions` 目录下,建议用户根据自身需要,参考[附录A 文件(夹)各场景权限管控推荐最大值](#A-文件(夹)各场景权限管控推荐最大值)对生成文件做好权限控制。 +- 原生 Megatron-LM 以及 PyTorch 框架运行中所生成的文件权限依赖系统设定,如 Megatron-LM 生成的数据集索引文件、torch.save 接口保存的文件等。建议当前执行脚本的用户根据自身需要,对生成文件做好权限控制,设定的权限可参考[附录A 文件(夹)各场景权限管控推荐最大值](#A-文件(夹)各场景权限管控推荐最大值)进行设置。 +- 运行时 CANN 可能会缓存算子编译文件,存储在运行目录下的`kernel_meta_*`文件夹内,加快后续训练的运行速度,用户可根据需要自行对生成后的相关文件进行权限控制。 +- 用户安装和使用过程需要做好权限控制,建议参考[附录A 文件(夹)各场景权限管控推荐最大值](#A-文件(夹)各场景权限管控推荐最大值)文件权限参考进行设置。如需要保存安装/卸载日志,可在安装/卸载命令后面加上参数 `--log `, 注意对``文件及目录做好权限管控。 + +## 数据安全声明 + +- MindSpeed 依赖 CANN 的基础能力实现 AOE 性能调优、算子 dump、日志记录等功能,用户需要关注上述功能生成文件的权限控制。 + +## 运行安全声明 + +- 建议用户结合运行环境资源状况编写对应训练脚本。若训练脚本与资源状况不匹配,如数据集加载内存大小超出内存容量限制、训练脚本在本地生成数据超过磁盘空间大小等情况,可能引发错误并导致进程意外退出。 +- MindSpeed 在运行异常时会退出进程并打印报错信息,建议根据报错提示定位具体错误原因,包括设定算子同步执行、查看 CANN 日志、解析生成的 Core Dump 文件等方式。 + +## 公网地址声明 +- MindSpeed代码中包含公网地址声明如下表所示: + +| 类型 | 开源代码地址 | 文件名 | 公网IP地址/公网URL地址/域名/邮箱地址 | 用途说明 | +| :------------: |:------------------------------------------------------------------------------------------:|:----------------------------------------------------------:| :----------------------------------------------------------: |:-----------------------------------------:| +| 开源引入 | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/sharded_moe.py | mindspeed/moe/gate.py | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/sharded_moe.py | deepspeed moe源码地址 | +| 开源引入 | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/sharded_moe.py | mindspeed/moe/gate.py | https://arxiv.org/pdf/2006.16668.pdf | 开源引入TopKGate类实现 | +| 开源引入 | https://github.com/tensorflow/mesh/blob/master/mesh_tensorflow/transformer/moe.py | mindspeed/moe/gate.py | https://arxiv.org/pdf/2202.08906.pdf | 开源引入apply_z_loss实现 | +| 开源引入 | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/sharded_moe.py | mindspeed/moe/moe_layer.py | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/sharded_moe.py | deepspeed moe源码地址 | +| 开源引入 | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/sharded_moe.py | mindspeed/moe/moe_layer.py | https://arxiv.org/pdf/2006.16668.pdf | 开源引入MOELayer类实现 | +| 开源引入 | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/layer.py | mindspeed/moe/mixtral_parallel_mlpbm.py | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/layer.py | deepspeed moe源码地址 | +| 开源引入 | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/layer.py | mindspeed/moe/moe.py | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/layer.py | deepspeed moe源码地址 | +| 开源引入 | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/sharded_moe.py | mindspeed/moe/utils.py | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/sharded_moe.py | deepspeed moe源码地址 | +| 开源引入 | https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/transformer/moe/moe_utils.py | mindspeed/moe/utils.py | https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/transformer/moe/moe_utils.py | megatron moe源码地址 | +| 开源引入 | https://github.com/pytorch/pytorch/pull/40762 | mindspeed/moe/utils.py | https://github.com/pytorch/pytorch/pull/40762 | alltoall实现源码 | +| 开源引入 | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/sharded_moe.py | mindspeed/moe/utils.py | https://arxiv.org/pdf/2006.16668.pdf | einsum论文地址 | +| 开源引入 | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/experts.py | mindspeed/moe/experts.py | https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/experts.py | deepspeed moe源码地址 | +| 开源引入 | https://github.com/HazyResearch/flash-attention | docs/features/flash-attention.md | https://arxiv.org/pdf/2205.14135 | flash-attention说明文档 | +| 开源引入 | https://github.com/nvidia/megatron-lm | docs/features/virtual-pipeline-parallel.md | https://people.eecs.berkeley.edu/~matei/papers/2021/sc_megatron_lm.pdf | virtual-pipeline-parallel说明文档 | +| 开源引入 | https://github.com/feifeibear/long-context-attention | docs/features/hybrid-context-parallel.md | https://arxiv.org/abs/2405.07719 | hybrid-context-parallel说明文档 | +| 开源引入 | https://github.com/feifeibear/long-context-attention | docs/features/ring-attention-context-parallel.md | https://arxiv.org/pdf/2310.01889 | ring-attention-context-parallel说明文档 | +| 开源引入 | https://github.com/ofirpress/attention_with_linear_biases | docs/features/alibi.md | https://arxiv.org/pdf/2108.12409 | alibi说明文档 | +| 开源引入 | https://github.com/NVIDIA/Megatron-LM | docs/features/sequence-parallel.md | https://arxiv.org/pdf/2205.05198 | sequence-parallel说明文档 | +| 开源引入 | https://github.com/NVIDIA/Megatron-LM | docs/features/pipeline-parallel.md | https://arxiv.org/pdf/1806.03377 | pipeline-parallel说明文档 | +| 开源引入 | https://github.com/NVIDIA/Megatron-LM/pull/598 | docs/faq/data_helpers.md | https://github.com/NVIDIA/Megatron-LM/pull/598 | data_helpers说明文档 | +| 开源引入 | https://pytorch.org/docs/stable/distributed.html | mindspeed/core/parallel_state.py | https://pytorch.org/docs/stable/distributed.html | torch.distributed相关接口注意事项 | +| 开源引入 | https://github.com/pytorch/pytorch/pull/40762 | mindspeed/moe/utils.py | https://github.com/pytorch/pytorch/pull/40762 | _AllToAll自动反向参考 | +| 开源引入 | https://github.com/NVIDIA/Megatron-LM | mindspeed/optimizer/distrib_optimizer.py | https://github.com/NVIDIA/Megatron-LM/blob/main/docs/source/distrib_optimizer.md | distributed_optimizer_zero3_init文档字符串参数说明 | +| 开源引入 | https://github.com/InternLM/InternEvo | mindspeed/docs/features/ring-attention-context-parallel.md | https://arxiv.org/pdf/2406.18485 | ring-attention-context-parallel说明文档 | +| 开源引入 | https://github.com/sail-sg/zero-bubble-pipeline-parallelism | mindspeed/docs/features/nanopipe-pipeline-parallel.md | https://arxiv.org/abs/2401.10241 | nanopipe-pipeline-parallel说明文档 | +| 开源引入 | https://github.com/iclr24-3434/AMPipe.git | mindspeed/docs/features/ampipe.md | https://openreview.net/pdf?id=yLgr02IsXY | ampipe说明文档 | +| 开源引入 | https://gitee.com/ascend/pytorch | mindspeed/docs/features/adaptive-recompute.md | https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha001/apiref/envref/envref_07_0053.html | 环境变量`PYTORCH_NPU_ALLOC_CONF`说明文档 | +| 开源引入 | https://github.com/deepseek-ai/DeepSeek-MoE | mindspeed/docs/features/shared-experts.md | https://arxiv.org/pdf/2401.06066 | 共享专家说明文档 | +| 开源引入 | https://gitee.com/ascend/MindSpeed | mindspeed/setup.py | https://gitee.com/ascend/MindSpeed | MindSpeed源码地址 | +| 开源引入 | https://gitee.com/ascend/MindSpeed/release | mindspeed/setup.py | https://gitee.com/ascend/MindSpeed/release | MindSpeed源码地址 | +| 开源引入 | https://packaging.python.org/en/latest/single_source_version.html | mindspeed/setup.py | https://packaging.python.org/en/latest/single_source_version.html | MindSpeed版本管理 | +| 开源引入 | https://github.com/NVIDIA/TransformerEngine/pull/719 | mindspeed/core/data_parallel/distributed_data_parallel.py | https://github.com/NVIDIA/TransformerEngine/pull/719 | use_distributed_optimizer实现源码 | + +## 公开接口声明 + +-MindSpeed已更新其接口策略,现在除了对原生megatron在昇腾设备的无缝支持,还新增了针对融合算子的公开接口。用户在使用时,可以直接调用这些新增的融合算子接口,以充分利用MindSpeed在特定计算任务上的优化能力。 +#### 判断函数是否为公开接口: +如果一个函数被定义在__all__中,并且在MindSpeed/tree/{分支}/docs 中进行了对外接口的文档记录,则该接口为公开接口,可以依赖其作为公共函数。该对外接口的具体使用方法以及场景请参照docs中的接口使用手册说明。如果需要依赖一个在文档中未记录的函数,请在MindSpeed主页开启Issue向我们确认该函数是否为公开接口、是否是因意外暴露、或者可能在未来被移除。 + +## 通信安全加固 + +[通信安全加固说明](https://gitee.com/ascend/pytorch/blob/master/SECURITYNOTE.md#%E9%80%9A%E4%BF%A1%E5%AE%89%E5%85%A8%E5%8A%A0%E5%9B%BA +) + +## 通信矩阵 +[通信矩阵说明](https://gitee.com/ascend/pytorch/blob/master/SECURITYNOTE.md#%E9%80%9A%E4%BF%A1%E7%9F%A9%E9%98%B5%E4%BF%A1%E6%81%AF) + +### 特殊场景 +| 场景 | 使用方法 | 端口 | 可能的风险 | +|-------------------------------------| ------------------------------------------------ | ---------- | ---------- | +| 用户下载并使用HuggingFace的开源数据集 | 调用`load_dataset`函数,并填写目标开源数据集路径 | 随机端口 | 数据集可能包含敏感或不合法内容,导致合规问题。数据集中可能存在质量问题,如标签错误或数据偏差,影响数据预处理。| +| 使用`from_pretrained`信任特定代码,使用相关模型的实现 | 调用`from_pretrained`函数,设置`trust_remote_code=True` | 随机端口 |如果 trust_remote_code=True,下载的代码可能包含恶意逻辑或后门,威胁系统安全。但同时已设置local_files_only=True,程序仅会运行本地的文件来规避风险。| +| 调用auto_tuning进行训练任务时,新增端口 | torchrun拉起训练端口 auto_tuning通过此端口指定MindSpeed拉起特定配置采集Profiling信息 | [1024, 65535]内 |业务需要,无风险 | + + +## 附录 + +### A-文件(夹)各场景权限管控推荐最大值 + +| 类型 | linux权限参考最大值 | +| -------------- | --------------- | +| 用户主目录 | 750(rwxr-x---) | +| 程序文件(含脚本文件、库文件等) | 550(r-xr-x---) | +| 程序文件目录 | 550(r-xr-x---) | +| 配置文件 | 640(rw-r-----) | +| 配置文件目录 | 750(rwxr-x---) | +| 日志文件(记录完毕或者已经归档) | 440(r--r-----) | +| 日志文件(正在记录) | 640(rw-r-----) | +| 日志文件目录 | 750(rwxr-x---) | +| Debug文件 | 640(rw-r-----) | +| Debug文件目录 | 750(rwxr-x---) | +| 临时文件目录 | 750(rwxr-x---) | +| 维护升级文件目录 | 770(rwxrwx---) | +| 业务数据文件 | 640(rw-r-----) | +| 业务数据文件目录 | 750(rwxr-x---) | +| 密钥组件、私钥、证书、密文文件目录 | 700(rwx—----) | +| 密钥组件、私钥、证书、加密密文 | 600(rw-------) | +| 加解密接口、加解密脚本 | 500(r-x------) | diff --git a/model/train/yoco_moe/Third_Party_Open_Source_Software_Notice b/model/train/yoco_moe/Third_Party_Open_Source_Software_Notice new file mode 100644 index 000000000..c5d32948c --- /dev/null +++ b/model/train/yoco_moe/Third_Party_Open_Source_Software_Notice @@ -0,0 +1,141 @@ +OPEN SOURCE SOFTWARE NOTICE + +Please note we provide an open source software notice along with this product and/or this product firmware (in the following just “this product”). The open source software licenses are granted by the respective right holders. And the open source licenses prevail all other license information with regard to the respective open source software contained in the product, including but not limited to End User Software Licensing Agreement. This notice is provided on behalf of Huawei Technologies Co. Ltd. and any of its local subsidiaries which may have provided this product to you in your local country. + +Warranty Disclaimer +THE OPEN SOURCE SOFTWARE IN THIS PRODUCT IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, BUT WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. SEE THE APPLICABLE LICENSES FOR MORE DETAILS. + +Copyright Notice and License Texts +Software: pytorch v2.1.0 +Copyright notice: +Copyright (c) 2016- Facebook, Inc +Copyright (c) 2014- Facebook, Inc +Copyright (c) 2011-2014 Idiap Research Institute +Copyright (c) 2012-2014 Deepmind Technologies +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) +Copyright (c) 2006 Idiap Research Institute +Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) +Copyright (c) 2016-present, Facebook Inc. +Copyright (c) 2016 Facebook Inc. +Copyright (c) 2015 Google Inc. +Copyright (c) 2015 Yangqing Jia +Copyright 2019-2020 Kakao Brain +Copyright (c) 2022 Cruise LLC. +Copyright (c) 2013, 2014, 2015, the respective contributors +Copyright (c) 2015, 2016 the respective contributors +Copyright (c) 2014, The Regents of the University of California (Regents) +Copyright (c) 2014, the respective contributors +Copyright (c) 2018, Steven Moshier +Copyright (c) 2001-2002 Enthought, Inc. 2003-2019, SciPy Developers +Copyright (c) 1997-2011 by Secret Labs AB +Copyright (c) 1995-2011 by Fredrik Lundh +Copyright (c) 2010-2022 by Alex Clark and contributors +Copyright (c) 2006 The Android Open Source Project +Copyright (c) Facebook, Inc. and its affiliates +Copyright (c) Meta Platforms, Inc. and affiliates +Copyright 2004-present Facebook +Copyright (c) 2017 by Contributors +Copyright (c) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura +Copyright (c) 2022 Apple Inc. +Copyright (c) 2023 Apple Inc. +Copyright 2005 Robert Kern (robert.kern@gmail.com) +copyright 2019 The TensorFlow Authors +Copyright (c) 2018 MathInf GmbH, Thomas Viehmann +Copyright (c) 2014 Indiana University (c) +Copyright John Maddock 2006 +Copyright (c) 2012 Massachusetts Institute of Technology +Copyright (c) 2012 Giovanni Garberoglio Interdisciplinary Laboratory for Computational Science (LISC) Fondazione Bruno Kessler and University of Trento +Copyright (c) 2018 Marat Dukhan +Copyright (c) 2017-2018 Facebook Inc. +Copyright (c) 2017 Georgia Institute of Technology +Copyright 2015 Google Inc. +Copyright (c) 2011-2021, NVIDIA CORPORATION. +Copyright (c) 2022, Tri Dao +Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. +Copyright (c) 2017 - 2022 NVIDIA CORPORATION & AFFILIATES. +Copyright (c) 2017 The Android Open Source Project +Copyright (c) 2016-present, Facebook, Inc. +Copyright (c) 2005-2020 Rich Felker +Copyright Malte Skarupke 2017 +Copyright 2008 Google Inc. +Copyright (c) 2011 - 2012 Andrzej Krzemienski +Copyright (c) 2001-2019 Free Software Foundation, Inc. +Copyright (c) 1994 Hewlett-Packard Company +Copyright (c) 1996-1998 Silicon Graphics Computer Systems, Inc. +Copyright (c) Bjorn Fahller +Copyright Michael Park, 2015-2017 +Copyright (c) 2017-present, Facebook, Inc. +Copyright (c) 2018-present, Facebook, Inc. +Copyright (c) 2008-2015 The Khronos Group Inc. +Copyright 2016 Facebook +Copyright (c) 2016, NVIDIA CORPORATION +Copyright (c) 2008 - 2012 The Khronos Group Inc. +Copyright (c) 2008-2013 The Khronos Group Inc. +Copyright (c) 2008-2012 The Khronos Group Inc. +Copyright (c) 2016-2017, ARM Limited and Contributors +Copyright (c) 2014-2015 The Khronos Group Inc. +Copyright (c) 2015-2017 The Khronos Group Inc. +Copyright (c) Facebook Inc. and Microsoft Corporation +Copyright (c) 2014-2017 The Regents of the University of California (Regents) +Copyright (c) 2014-2017, the respective contributors +Copyright (c) 2017 Microsoft +Copyright 2015 The Gemmlowp Authors +Copyright (c) 2011-2019 Stephan Brumme +Copyright 2006, Google Inc. +Copyright (c) Meta Platforms, Inc. and its affiliates +Copyright (c) 2008 - 2009 NVIDIA Corporation +Copyright (c) 2007-2009 Scientific Computing and Imaging Institute, University of Utah +Copyright (c) 2006, Laurent Montel, montel@kde.org +Copyright 2013 Conrad Steenberg conrad.steenberg@gmail.com +copyright 2022, PyTorch +copyright 2023, PyTorch +Copyright (c) 2005-2022 NVIDIA Corporation Built +copyright PyTorch Contributors +Copyright (c) 2018 Alex Rogozhnikov +Copyright (c) 2016 Microsoft +Copyright (c) 2014, 2015, The Regents of the University of California (Regents) +Copyright (c) 2014, 2015, the respective contributors +Copyright (c) 2005-2017, NumPy Developers (c) Parameter containing Float +Copyright 2005, Google Inc. +Copyright 2019 Kakao Brain +Copyright 2013-2014 RAD Game +Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC +Copyright 2016 Martin Raiber +Copyright (c) 2003-2017 Josef Weidendorfer +Copyright (c) 2000-2017 Julian Seward +Copyright (c) Edward Z. Yang ezyang@mit.edu +Copyright (c) 2005-2010 ActiveState Software Inc. +Copyright (c) 2013 Eddy Petrisor +Copyright (c) 2010 ActiveState Software Inc. +Copyright (c) 2001-2014 Python Software Foundation +Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Python Software Foundation +Copyright Python Software Foundation +Copyright 2022 Cruise LLC +Copyright (c) 2014 Matthew Rocklin +Copyright (c) 2015 Melissa E. O'Neill +Copyright (c) 2019 NumPy Developers +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. +Copyright 2013 Mark Dickinson + +License: BSD 3-Clause License +Copyright (c) , , +All rights reserved. +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Written Offer +This product contains software whose rights holders license it on the terms of the GNU General Public License, version 2 (GPLv2) and/or other open source software licenses. We will provide you and any third party with the source code of the software licensed under an open source software license if you send us a written request by mail or email to the following addresses: +foss@huawei.com +detailing the name of the product and the firmware version for which you need the source code and indicating how we can contact you. + +Please note you need to make a payment before you obtain the complete Corresponding Source Code from us. For how much you will pay and how we will deliver the complete Corresponding Source Code to you, we will further discuss it by mail or email. +This offer is valid to anyone in receipt of this information. + +THIS OFFER IS VALID FOR THREE YEARS FROM THE MOMENT WE DISTRIBUTED THE PRODUCT OR FIRMWARE. \ No newline at end of file diff --git a/model/train/yoco_moe/ci/access_control_test.py b/model/train/yoco_moe/ci/access_control_test.py new file mode 100644 index 000000000..58a90dfc6 --- /dev/null +++ b/model/train/yoco_moe/ci/access_control_test.py @@ -0,0 +1,43 @@ +import os +import stat +import sys +import unittest +from pathlib import Path +import xmlrunner + + +# ============================= +# ST test, run with shell +# ============================= +def success_check(res): + if res != 0: + sys.exit(1) + + +def success_check_ut(res): + if len(res.failures) + len(res.errors) != 0: + sys.exit(1) + + +class ST_Test: + def __init__(self): + self.shell_file_list = [] + + + def run_shell(self): + for shell_file in self.shell_file_list: + success_check(os.system("sh {}".format(shell_file))) + +# =============================================== +# UT test, run with pytest, waiting for more ... +# =============================================== + + +if __name__ == "__main__": + st_test = ST_Test() + st_test.run_shell() + test_loader = unittest.TestLoader() + discover = test_loader.discover(start_dir="./", pattern="test*.py") + + runner = unittest.TextTestRunner() + success_check_ut(runner.run(discover)) diff --git a/model/train/yoco_moe/docs/LOGO.png b/model/train/yoco_moe/docs/LOGO.png new file mode 100644 index 0000000000000000000000000000000000000000..46b994b2ada90fd3f421120a3c6edf58ea0bf56b GIT binary patch literal 17565 zcmeIaby!qi`!_nYf(R%fjY>!(DInb`HKcS7-62B?sI-(cO4raK-2);pNHe6Qba$OS zexK+0o$FlZdC%YHdf)lOz1e%OwbxqvUUz)%&+R8QRXKcI3S1Bfgs&hktpNgIXanb` z_ppI~#|ufH00(k+8C`cxCo6X^Q&&rngoTrtC7ptUskNnsrKyFF%MZ)fKr2Rwmae<5 zin5TolLLn->KP7i2WOx)2=rRa+u79I&eEOE%+ea-DDrHtsqGmZ#6skm4xb8_inElZ z4MhH(tEJ{URW0*(cIJW>&%{LOUV94x9XMFJo6>nZ*gLujd5b*z+piFCj=IeGjPCCv z?sg*2{uz|6iW;4ildC12AjeB~b1p7!IsrirD;_>xODl5$E;c%DE^Z#qmoGWF1leD5 z3vuxY@$u08>-P+Jp{s?JkcPDEzg`FYCGyP1-Q8J;lhez~i^GeD!^zc}^QEAmASV|$ zCpR}c@C3V?kE6S(H@l_qq(Jmw8%4{3l0dxLWoO{Pe4LOl1qwLO5){98QE6?uVh~FaZ3mYO7p%Hkd*#+ zTWKeA4+l#}_kXvw_@A~f|7%;6I5;>1ZrLn z+i`>037bLs80Xl84IiJyjY+L);9>O)2S!L%eJ_uG`pS*s{9UGAHeL0vrimp1`J zLTe+GnbeTmAob_9J)|##qgifmU9RB6CfEo0xQ#a$E%Su^zWKL}X35}3enAmuR5Dp;ipHJYsTw;!k4#svi;K(?cjZJ<((ge!v>c&!c1}MUobA(tH{xW(o#@pt5!6 z4?Igj(6yi z_ZT6+ zGaidMj8}r>X-`?MN!~u`9<#$PC-Y}Hy z;6pe0589i>NH1ykr{6)KEFzJt&uk|wtU2Ufaz!jAOH@%-ILg?|-5)F@Wy%8h{244H_F#$q?heS!~K z99=y5-F`5MHX?abgr4vzIO81gg_`?FX(VV_Sl{b`jTodKBe7}Y6Ces7`H9Q-gF-M) zCr$S|3`Q0;+MGoQ>?1#c?in{<)XFsC&A^xtVZP0`>>-mX+1RtxNF<4!}_nsXmpf*vUfBW zk9mHG-eZlxOu>FZ^~(FtjC#$?@ULoXEMoeenj@&gjyCzP=Dm#%n|me+K|0yVBQA<1 z9`&a*+&c^nTr)ocd1vl-wM|LA!306EJ8^AI0Ra&s7}=0>=~d)F@C9=*6=s#|-Rf$O zvA5h!1!vDRz3gmf)u5BryjBVoPGhEyJ7tn#R@C`&=Ec1-YgWFcAM=d~9VCijisi95 z-$MLEeWo(*NZt{vM;x7ThgY2EBHd(J(DGU1`h=k|7tobVPo54-biMB6Lo!6&NvZ3u z#>j2@N0Pk4pH_y*`nNLw9@x`Ayy8z4g|%IuaDP-vD0#T=KR0vBfg3axDd*@HNQ6zH z)xgPC~!&iuA7Sb2v3gUJ)rj^&*hoj*`{y__cy$y_qkdNrg>3^o*g zd;LO$2((XrkF^kMMZ8%>)gzGHo;(2*qeIO%BcAo;W>((oPCk>_=0in?; z`GvhpBJ5;&?=lK{dcP4LdVOYkFO!!Ot1O&2<%#I`qjuhI@LM%V&x#AN?}5%AgpiC$=#GGL0&_uO9olsxpvQf`2e4%7y$HW0gtUj_ z6@S>XM7R{SfC@iIq%lQ^xZbivyOHX=PsJpPV%+AU5k(pg=l5I2@)!S4yaT_ZzjhwgI#A zCM2=^J@0b|OcRV9nW%^%F}Ln72DtEd4(kcpTaLBR1E;sjBy~U1rjL+Hq>W(V@mPB- zLF22b(el?z`2$oZZpV9{1A5Z%hOByV0RbUrES1O7lHQWFT|UpleG5l!piGwUlh|T} znJ_*fo&A7e^C^qWLAJ*EYLYL3K<$fSGb>R;mUR+KqSa*&Smp>RIC$Gr0l9bf@NAP! zjI~~Gxi>v^6syPTE$#!r&Og93&Fd@mDd8CX10`4(?E>bnCW|aYl#7Xts-!9WRK>U= zF2;VF-@>%f5UbL>-=7r#o6eM_*a0*|Ry{-U)~hv$Vh32u*h+qim#r%e?ieo~(+S$~H+Cl)kFm?0&V)zreVf&h zn>jD7Qex<2>`M?PQhQb*JKU;5GZ98P=b14X=`hOFaG%KFa&%SOAgG58p&Bz zqA*hP{A|nZ=@t&q_5T5z50e zG!U@w@WOsrOMkj_r*)F2Fkxa);rx)cdImTlYnH(dVbPf)gAs+B$lptHAFj7^x*icf zq$%<$|B{R5-yGdaiRoHqH5lfF;8>_zzABk?+Je@!axfb@=<{c_Sua?t2$9x!SFYE7 zX&ybhge9g};OQOfeuw&HUPKDQNki9o_5?g327VuO>_1O7Z)_b}SN13@2p)`Tjn3KU zx-0w8-Aq=N+K+P9?m+CdHnq6m*RrI;b9l8D|B|L>?%u?r|4T!(!txv*4;tkB*G$rtTT!t@ErL zr7buW`V-4^StnB(B5VU4TKJsT%R5%>3x8yEYhRsOH8@WT=8ZQRDCvB$TU~=&y>{L5KniPT z1RVUhNZnyi@jtYR@{~@nC6#KpJjAuhlk*--BJ?QSh<-Nj#N3}{_(#h{r@7G*5u9>o1 zQ`_q*Wdq-d*cy2uVIPVp{1jzSF0N}?U2$n}BdGL{>a}~+q^bZQR0516y4f71#OO?R zr~4Gd6WuyZNsqN{L~>?5a=;w=MFBLG8E?O*DhE7Di%hFKwi4JMdGK}lVQ1K@IzbwX z-ad1}cUr!Es)0V7evgFb>p~-S&~j>@fTD1umOHS{#YLlKC}dr|D*L>v9bLd?onUfusfzTPeD@TR@7n$^(q;mHFz+ zuM;d}>4Zch`D=9C7=J3V_&-Z+7|A3mhu!zJ*~6SJ#4ZFk(|3GLLyA19yWKd=BiO=y z*ZmYa>^x6Jl)B52Q#YD$dT2TF1thYK)?F5F43&gSkr))tm?K&d@9MTh9Dn^9(i}>y zFRst=?Z_$Uv(UbY} zkXq!*C&K*N&TRn*{%}%@IwjHL-KniY}tk1wHR>t)UD>i2)Qx( zCf(fO7Y~nY*;}jtkt1y$@ z^?Helo=s%p=i?vl^9&$p0; zqlQ%&(AeYtnVVSwfU-qX#m*7QZ>n+9Ga41E*{N-b!ZbvS+<3mM57(I5=4@JtOXWxm z#CUYyxH=y5_s+W*19CVPmOWVDlgul<*kdI%nL!}DsJf>RGwkZfM+J|O8KmLnvIO^a zbrZ&;>DIn=McJk`>E}2CwPN)XWtNH7=`e{H*n^YL3hzsP}M4mrhrHCqwsS@0q-%@&^;I z-+fI9tyISkrObZOOM4dm(nf{G)Yz4QuV#stBQ^Ai?8<1)4hc>(>QBa| zdF@I3vYzU^2>FHjhL}A()Guee4wKRBPX?0So^S55#6p(ukb%pPOLofck zYO|-i(Hh#Ykd(mNd6@w?tyzx(lHBLHaJsqcZsvoOb0LDil`KnY5kGxvDqnnpaW7aW zd#{NcP?{d0cCy6o;qV_QI3tbSeoyy=h8xx$!M?IR1bCN5dN+5hPI5nC3re+OS9s;u zJ|EY3Yz=J+$HgAgYlL;b{NAG=x$)YvXYdpupX(Ab{ZR6vjyevz6uBlPbrw)eH*&XO zT|s9(zoHzvUA*jeD+PGx0bdBgxMbD(0WrG`1ij@4)QK5iQtekc6wOLvr&tt4rTjzx z3Oy!_w>`wI`qLnFNf_DffoDvSv#X$G*5=(Pd^7iSnomz+LK!2Wsi|{9>@naNOvxm^ zo=o!c;;+BJu<+t9g;3S~LLP|Jx(xM79OF-ezbwjD{!yd%Qw;_@`a7#_b@Di#IFD?< z-5kd5J&VqBxc2YL#E&c}?{QO{W&Q;p$(HW$lHm-(&aPK+dyyvcjWCw;`Qi|UUE_s8 z154EQ_u2BBi$tAR%b(4B&S$(4H%@sb(fL$ zDj39@!m*G3x^OSR7^MnYHs-)^Snj+l-)|37!O$~3DZY{~mdwBXN{N7!)7j;r*@8<- z!->G3pQJ^(Qf&7~Ab;N)?_RvE%+L+28xXkB+jZPvn-o@VExjtc%p7CMs=_YYpz6!fRR`+P9S^gUp3ybi4HvQfb}bRyQP z-7$rm^|=KSc&~(a8255@wm)v;G|LMBFruiKHDmSY2wDl9`*!kFmXb09~WV{Rm4=0vU_QJsG1-r1= zaNDE@thc%4_=@wx`0e!T{E~}#8WpO!GkVDm^MgE!f^3vSi#A%G*#>wlM%-v?wx1td zl|Vbr(z7V!D9#u{87pYNzq_g9PtBIV_2(7THr6s_g~}45j6jF{<%~&2aQ(sIxKe(b z^-T(azl$#4s!V@qbOwc(UHqwambdX9WtM;8T0lCV6Ie`65PyFVtC4?PRGtG8*kd3$ zE~fb|%M~Yu=)=$MmyDOQ2fgDl5qf&~?yRSJUfd|i^y_q{Z+?HwaT|~wvj6_H+-I~c z^JGr*b{BboY2J%rX;()q&-J?uxpsNupSS;0g!R}Hns>SS@qnTA_>%?ZgbtWvGBRPz z0_M9S`NvITH~x@zBYtfXzUg;J+jN#u2RQs}=w&1_Ab~71lg*Or-NNeW8k}LA9*7L(RHlZSS%@_$9O`fbnSH{x4g` z;VKIa{k-8%y*xf1Beh*Ub9J!u-*`_qwmh&&<90_XzxGrC9^nwTU(Jy4K;&?f%~)^=eSRwB)z( z`z07KVOyekcfTjP#SN3B3&)Gb3x2!HzF>5DL-gY#3u+Hfo50`HQ-(+i6wp&==u-_oR@xZ#KT+cR z>w@H~arx)ZVy$lro=Ni@de3Y*)YzVvO*q2!gQ0xE6_jUkj=2O8cS$uS zb^BnmgKXaCsQ%P6yk0z_^rR= zPdLFV4!hNx3#&@|ni!_$@OzPchhLbq=EdpZ4ZrJ(Wr6MLw&{-jkP7_s5AqgVypFTh zr(#!sDrk>NsLDLo;am}{y>DDBA zwmsb+hKTjidA{2EFsrg3{#J>Cu9JnKW(1^CaVOlCHFvM1=6xWVmsoAzPfXSh4Q>O( zF4_Tf5Us2QnbQ)yoi83*9H31Cu8N>*x=%!&ovGbVuQoCC`HZ!&qIPT1OR?9>)187M zD1&vOJ2CVnyVm*B;#u*Na#EzSQYka3$;Yht2UojFW~%lHql;X1Rp!7~HZR)Nn!c^5 z>yC+Hdl~``%l1BHTFWbr_|TC$LFYgwA$n}rfWkaP1Bk+27~X*-Jd$DK`MH<5E)=?@|eM`-;(`S zn^r`D+ETxSuHqFb-V;e>K|}(Cgte>Que{{rTuVAtb|gL3hTl+%&BzM+?vPmY*aqIz zFr81woDeQBbWq{4w+spst9cJg z$7*;?sFukpDlfNEahWcil|U@shF^{H@sJJ^9t39K+z=>>_0r@+R|~iB4)p zf}g9E0)EwfJH>TnK?|n6;R&K`0y8cOfsayFU+Xq5Y#oejIbl{*X>6U5C#@UbcHB?h zx$q0}F_aHTs7{TedGVpCi?PDucH}MFFwLmzM2~h*ywG-bV+4zvCW~>oZpc#?&MzLs zhzAN2eOFV83)=mn&;8My$CfP)IshCq<1)G3U)&-1W@(B2M?lNkZ~x`RxJLE2#k204 zciR>O@v_%LIa)&x6(|<-i+-zOBtSQbmQRq6DIE-m%5L>6<7gd%xk}87fkf#<>JENm z(cF8b{DND*gQJ(>Sl?8J1KP_Ik+zaxPi)s(o+FISV$DoDiuJ?~Xp9rI}bm zki#Uk_jEMV<#j$N|J^#E;J-$Fb>3a7^&`f3#ayZ`WEy&=@>FME#Jj|H@c0h!zqeqXqzMmgyTiGn=3sIP{o0g=z;y2qB%oBC>g zFY_18s7A%Jk1T;mvjrC~k~QHbDD6ysz>oC}lDw%rbBPG1IK$>HAj0XtC~lv0K)Zo#?UFucUpoi*+}?W=02pzhAwE~8DD~J>G{p}NF0&F7 z2a++z!~^n4;YK;ierj=D269kwg0gBo#Uc=u=?HIW%vg5Cq5p9?k>rS1xfqCy;uWV{ z-LCZt{hP#%@{D|O(~!SYg9*4jZ$}053_cYCfQyx!*W`A}*R>)bAb(Bq2{*Xv{cint zfZx(J%2MGwuLUzE1su6_y7`Wu0p2C?IZ%jQRntbi6&vk}#l=JAD!j-k79Rt>eeyG$ zKRogn(z;mhM_@>enY$mn@8LK4S*tc*@a=Lz`Mw6PAc6CRKmwS_lo;yqssLTZcmCzq zn?CpMxy_Rq-k&qt$6NEBvV?94+YfB=(Ez6rA9q?J3ovn$7kZg<1JgJy(6%+n_yk>o z{!>}H({X#wX&+5^-KnH5rh;t{kwKmnt7X>HGu>92Zk(j#H-L*34+^G=lq6C`q zwOq0K8^I6%BvlSw4Zi!rWgN1rUbZt0>iTD`@lZUicx8X1X|O*Y2KZh=eoiPP@de&x zhLr!Qx(wmb*ldrTuTVH7P{Po}5pQ}mc8Qdq)G&I4>|vONa>P^?Nywm6!A_-SqKfcKI(>vo4Q0;Q6C*rpB~SY(@>iPlCkP*$*2)eOT{);TE>+W+(66NpJCF0G`Vp*kj4a zbH&2!oXhjqwtJg#6L&gcC~vz3XMHK&>Z23e;BtP8an1*jKp+>_BzfdUu6_vxq`A^M zb8SexE*F*Ef7PQs18^VGJr!;8bJ4KDHIlm8bsLxVF@|w-5-$u5{VkKy0A)LEmFWQ6 z=J9{rjYCRXs#hAq+um3dbN}}7vGKe2LwW6S*sp-_zlM@4s1WJmdw>~>Afk=`ah=_L|JM1o4dFz4Rl&Jnuh!12SerR&~*(=3wIZa zRsIAPKbmh}PkdMx-kQuo7fePXSG`_^H-R^hJhp%jeoZ^1vk{0aIe9rsHc>E2R?}N^ z5m|EPbi7a}wtIS}p5}gpy(GixU%Awh+o~&Q1LdyQGiPU2EjvAX%D>I2Q%v8~Ua0q5 zby~aib1AHbl__8N&5QHMUDN#z0HLL4g9PqM7u6Z2-^-*lI`5syIN69`Y4Gi^qZ76M zm3lYf-U>|DSWj2)`*YjzG(1lqNBJgcKY;4;#^1N-#wE|kDJ*sU`S+ZIsik z&8!R_BjG{sKWHsP%o;CR<|$HT%;GPvlCx1^g5O{ykXQ@~XRmC_=#GK_n6zo+G#zlA6E=%-s=Mwqo;bN z*E$(%3%Va~-+46Wf!=Y5e)S_9Q{LRPe#^FvO9DATRAgQiN9i4D*N(ccY>=`r8yep7 z3?djpS+6s5Y7p9jNw{qxBt#d{C<$ks1hCZmECbNJt?=(!=46mCz~|g_4D)!i>u|-Uh|yFW;)_npVkg2g zJsVuvJ9H+638W}Y0MuU)IV2-^Qly${d~w<>awQ5dHS#Ch`{2oSpAIjEUcGm;_3~<5 zXezbWq#d+A@FfFlfD*4PlmRvjka(9k`7A4lf2+WYnFN)^$nulb*})z78&+E#?!76e z&F5_=B)S69uewoT39MaurQakOe#1lb8u~{L_k$-825AyT>JI(7wMRZH;J>`S zJTj8i{TbTckZL|N0IqK4Qn8d8rYv{M{y} zmT_w}L80zD_ixQZu5XoZ?D?T#kRvLwb>U?me;C_Ck*fzGVA7X%^Tj{Y3G>jxWVBS8 z%j@;^v{#|4-atG^Km~;NgSX5!8CS)4US>Ma=ZHhjM6b_}3vpQmySUAuKe~`;rHdZD z_3j=%0Yul&$}H*y2I8jy5@!Jm4*5Z~_=bP^sgGtyxuMoZJW@u>q7}Lr&4DpORH&L{ z>`9pb%kB2fM*^We*YeSfXcWRc65%}r|K2x-8+(sc811WKDwu_gBFs=rK{027u1Xz$ z*iP_jv#%_64G3OVgKsvokbn-(#m%$wHS&Bh=qsXfbu?{Loe!!r+;d@d_{dT&+^W$! ze`DTCq$4uu#(`V%UFfvA>Nme<$?lV3eR@<*n-P9VveC%2n%CRU>lS@0T8DlbMwI*od$Z+EI zeErh4!%5AgMD09MHmP_0SxQ9L9~57u`#VIb+)Ezg8VN$B@&yL56E!<6^1M?|KTOP% zY@&E9>W}h@6T7lgvHw_p&}f+5{!CW~1XP~<_DsTHvG1GnUTmL=Ur|5;`L6e_QK*W% z=f}AVKyu2$roBUN9s-`{)v)~8Mjtj7hIJO>;pS`$@EaP7nX8*hJfR>(;$2h*o-<{h2@gCbwpg05j zRmGd%)ufp(fmpBDdK7zZY*#`|C=Q5NCy6na>FcL2ODXx=0)8>WCIdZ?r| z=cDxDseXwFINj~fD992^5SSd2VG~7QaBO-GE+Q?EoZ^@*3z{7nUk!D=rB5VgV^)0{ ztQb709#1N|m|0N8QOs6=HbD`na3cP=ux-L&ak>2CaNMG#XZ6Y#OoceOTTM-YbZQJV z1KI*DuX)PM#dyzO3Ry>G;;o8S!<8R{WRlreT2QQQ5BF&^lu1w!^e`Wdu>Zu;&@W}4 zS@RtuixXmCgt2IW6`Zb=aLd+20KRfX*l{vXTndsqmL%uH*cN^za$1X7dWnqJw1<4< z7Bvs^&FpV13=brD$Z%49AQh3bJ7&y;?`)FQnRiw?MWScWr-%sGD=3?EoRp+Fvi0i4(Mv5h{yh1V`JjdfiuQc83f zP+y4p)<5Y20|;1b8`CaX+`^>2cYW0Tvhv#wFan9Izm-~&nNe|o$GKPkq4O##pzS^f zeS6`7V98yK>doDe9~xnsf2R`e*Ql3vs zpd8ez$+zw~ookSi3pi=vcYwfqlRuUQHr-S=x3OhDq;wyQpl~Z>BTV*miwf9&+C=so zPFw5L+kD!Q52On>GVv|aTDMIOuKP3k`pVnd42y9h07gXYV*B_T^mO= z!rZBKK(?!@IG5Z4k@>oC6kr0B(6lFx)AlF|+18D1X5HTf5@9|3-PJo@Wu|WwG(TrN z%73?as^#DIxgzBwOVk!qO|Lt#19SG~PToR*UbXskM{F>&|LeAEInrk+Q8e?v8~!*U z1rMMyUkVIp3fliHR&La`Sfm`VC!@$3>-n;#fb|^MWfLY{y`hme`9} z1F$9~d+Jlhpd?sAB*4s1qn^9^LQ}gRxd{n;x|Jn|i3%sH*I=8wFAC6Xc~@2+mMY`g zw~+d$0Mz9yHe6mU8SoT`>gh~Um8PYX)_tS?+7INEN2GO%55xKXT$%v8{j1NZ+|dwI z+b>m~!hV3?%K>jvGz}+0 z22o0uuXdXBYf$P$ubSfP8hvR7_McvsJWoFH9csnr99iziO85rkX|6^l0KG!&LA(fn zR4Ii>*bl4h>`oAaN#X7m_7Jy%Viq7EK2ns1due#O0q-OE3d>&M!JS!|V_Yxn+I|lh z8)dB#ki$bsq7Z(aXa8~&S^ME;lx1UcXWLOlT z=S`RC_%=cU$=T6!34Z~!$wpUHVp3|;Z7F?Xg%}YMlmLkY14-+AD(}$ z5HvO|88{-ZBBYlQ>s&#(5h2>*>P4V@}-_OKEk5%3rxE z54kHcylg`hDN~Q0)jx~QnI{JlM#;gjxUN%brSk{kx))E>l=#GF5Xct=9Y#Ar-(62D zw(Lc#Kj2Bnuwd4Xf&hH0N(5i+Dq}`bf)IzeeD;YX4s%h%eHV@|;$c5B$L%@D}cHgk|o{F|~nA40LWo@Q_FNLP6UrkQo4VR6-oJ4?8zw~hxb2@?L; zVz0hw21pJgb51`*5w3H3R!baOlxAW)j- zg&ppRwz-uIX~1_dyxhqN(dEdE|HK~i=!rb67U6k;zi$`ZA9Ex|Iz*F1fBURSyKES@ zinBKYCNK6-X)|`Jhkb~cuY<5U0~rR}CpRT31#*)TTU7Eu=CKvCG+bEwrPv6MeJ+}9 zmd~)Ld|3*1p&!4B5(;g(k(Y?oDi(Iy;G(CKBPWn%s3?bg>r!dLOB7+)7Xs>Z)Qe(0 zv?LfLJ{br1k0js1MWl@#nV)R!D)$Ee>2(^Q)iXIlKI9BC4Y`D+EBTIiDzen|D`!cd z5_w#_C1|Gr76_w7*fYik*uE}rLYid&zOk*NoPh$ySZlX-$(UU-R#wd3+wF~?L7ODi zs)n2pu3a=ov2@E)z_2tN^4Q@?qkuhQ)W`n9HXyBR($kYbj`Msa@a$J`+{g2D{oa~p zO0SdoFaGHOt<3zB|F+y(*rdOKW&Kmqy4z`hyhid&bye_va{{S>%%n9j$(M%_*#txG zn_z8i*+WzbVQp^PDXOM{-OzAX@LrMxgF&S~R8;TN6F$iAh1X^7ulFXym-0EY{0!X! ze}-hYnCIO@KB|E5<^qZr!O)RQE3BP*dhBKoUPZA+n~I3}ae3vP+j5Q1>Z;@JV`rPZ zA$p*(;ixyApVf3W9-3Dn^LpyDAY#hbb9gl_vA4z?eymp;1vy(ksoxh}@=PqD%#Yp> z9`NLQ`*SZRAn*n})Vqx$dVLC>Scl!$kVnyilA+Ej_OtJ>I$5?mi!TzLyin{v^896} zRd5jY#_a&djseHx=4J|A+@>tzf8-6Q%iJ@4WA`B;^S!gKX18Z(8>kYHNnB}$JoO>XyvM|p z9W%f1uWhUDh=hOYu|HQ6NX@D(Q@TnDh~6C1D!v(yJc)>k(jPY|XuRAcn)$ICUOB_9 zXD__fQ~;&BsRrb>OZ~A1z&j%%CDaD$bo)#!tl_T)oD@JN-I6!Tc&XP9h92?rqD{1^ zIC~EKx}{ldZ~Yw_&^(8!7oV6Ku~3^w1XTe_Do=XgW_AL7Hv%``Jq@T~0o*tytSIgu zssz|1r6O|`6C|!Xz5c}LDF_tkRZ~tUpZfp}1VU8`)7d5erGdHBE3#0T0hOYtf>m*C z^*o_$T+?Frdmzx?`r)nPi<5P0+q(5hnFXMa3UuH)K8ne={30Li;kM>Ey*3u`4xm#D zl3X<4E~@W#3`dHu6OM_RQ;bfG|8pO>NUDMFO!Gfi0Q}=|SU@FC2|5Uz13EVaZIC(y)JD-7)mcIk(V-dPria(<6;cYLY=I|%}PcPvq z43*!)2YVU;H!B>B1|mfL4+(l$sGj~d+z0*7=|><10Jz>hB>!n0wsGe+@e&vs0sh<_ zbJD-3hnn@xXDOfp=AW^SSX(LBCtLXfSuTzqppN)gfsv)9B`@vuHd^`7CGZg4A<0a* zsVeB6Fne^%1^0}Y`SJwoGl!hZSNy+^S4pw-YpHTI-4TJ$=*`G~>M8OzQGgFXJl`fMs3I4wT4rsE4 literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/docs/RELEASENOTE.md b/model/train/yoco_moe/docs/RELEASENOTE.md new file mode 100644 index 000000000..ae88b4472 --- /dev/null +++ b/model/train/yoco_moe/docs/RELEASENOTE.md @@ -0,0 +1,54 @@ +# MindSpeed 版本说明书 +- [MindSpeed 1.0](#FrameworkPTAdapter-5-0-RC1md) + - [用户须知](#用户须知md) + - [新增特性](#新增特性md) + - [特性修改](#特性修改md) + - [已修复问题](#已修复问题md) + - [已知问题](#已知问题md) + - [兼容性](#兼容性md) + + +## MindSpeed 1.0 + +### 用户须知 + +本框架基于NVIDIA主导的开源Megatron进行修改,采用插件化适配方式,延续原生的Megatron特性,使用NPU进行大模型加速训练;代码重用性好,支持现有的网络只修改设备类型或数据类型,即可迁移到NPU上使用。使能客户大模型业务快速迁移至昇腾设备,并且支持昇腾专有算法。 + +### 新增特性 + +**表 1** MindSpeed支持的版本特性列表 + +| 一级特性 | 二级特性 | 说明 | +| -------------- | --------------- | --------------- | +| Megatron原生特性 | 数据并行 | 支持数据并行训练策略 | +| | 张量并行 | 支持张量并行训练策略 | +| | 流水并行 | 支持流水并行训练策略 | +| | 张量并行 | 支持张量并行训练策略 | +| | 虚拟流水并行 | 支持虚拟流水并行训练策略 | +| | 序列并行 | 支持序列并行训练策略 | +| | 重计算 | 支持选择性重计算和完全重计算策略 | +| | 分布式优化器 | 支持分布式优化器策略,将优化器状态拆分到所有DP组间 | +| | 异步DDP | 支持异步DDP,在进行梯度更新时,将数据并行组中的通信和计算并行执行 | +| 昇腾专有算法 | TP 重计算通信优化 | 重计算通信算子消除,优化重计算层划分,实现大模型训练通信性能提升 | +| | 自适应选择重计算 | 支持通过自动调整训练内存大小来自动选择重新计算策略 | +| | 计算通信并行优化 | 通过将计算和通信任务分别拆分成更细粒度的子任务来实现相互的流水掩盖 | +| 昇腾自定义算子 | npu_dropout_add_layer_norm | 支持自定义算子npu_dropout_add_layer_norm调用 | + +### 特性修改 + +不涉及 + +### 已修复问题 + +不涉及 + +### 已知问题 + +| 已知问题 | 问题描述 | +| -------------- | --------------- | + +### 兼容性 + +A800-9010:CentOS 7.6/Ubuntu 18.04, 2.04/BC-Linux 7.6/Debian 9.9/Debian 10/OpenEuler 20.03 LTS + +A800-9000:CentOS 7.6/Ubuntu 18.04, 2.04/Euler 2.8, 2.10/Kylin v10/BC-Linux 7.6/OpenEuler 20.03 LTS/UOS 20 1020e \ No newline at end of file diff --git a/model/train/yoco_moe/docs/faq/data_helpers.md b/model/train/yoco_moe/docs/faq/data_helpers.md new file mode 100644 index 000000000..8312c1861 --- /dev/null +++ b/model/train/yoco_moe/docs/faq/data_helpers.md @@ -0,0 +1,62 @@ +# Data helpers overflow bug +## 问题现象 +在增大 gbs、iteration 等理论上不影响模型内存的参数后,出现OOM现象,或者在模型预处理数据集的阶段报如下错误: +```shell +Traceback (most recent call last): + File "pretrain_gpt.py", line 121, in + args_defaults={'tokenizer_type': 'GPT2BPETokenizer'} + File "/home/ma-user/modelarts/user-job-dir/GPT-3-kernel_ID2728_for_PyTorch_zgcl/megatron/training.py", line 150, in pretrain + process_non_loss_data_func) + File "/home/ma-user/modelarts/user-job-dir/GPT-3-kernel_ID2728_for_PyTorch_zgcl/megatron/training.py", line 689, in train + opt_param_scheduler) + File "/home/ma-user/modelarts/user-job-dir/GPT-3-kernel_ID2728_for_PyTorch_zgcl/megatron/training.py", line 417, in train_step + optimizer, fwd_bwd_timers, forward_only=False) + File "/home/ma-user/modelarts/user-job-dir/GPT-3-kernel_ID2728_for_PyTorch_zgcl/megatron/schedules.py", line 654, in forward_backward_pipelining_without_interleaving + timers, collect_non_loss_data) + File "/home/ma-user/modelarts/user-job-dir/GPT-3-kernel_ID2728_for_PyTorch_zgcl/megatron/schedules.py", line 118, in forward_step + output_tensor, loss_func = forward_step_func(data_iterator, model) + File "pretrain_gpt.py", line 84, in forward_step + data_iterator) + File "pretrain_gpt.py", line 45, in get_batch + data = next(data_iterator) + File "/home/ma-user/anaconda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 530, in __next__ + data = self._next_data() + File "/home/ma-user/anaconda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 570, in _next_data + data = self._dataset_fetcher.fetch(index) # may raise StopIteration + File "/home/ma-user/anaconda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch + return self.collate_fn(data) + File "/home/ma-user/anaconda/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 157, in default_collate + return elem_type({key: default_collate([d[key] for d in batch]) for key in elem}) + File "/home/ma-user/anaconda/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 157, in + return elem_type({key: default_collate([d[key] for d in batch]) for key in elem}) + File "/home/ma-user/anaconda/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 146, in default_collate + return default_collate([torch.as_tensor(b) for b in batch]) + File "/home/ma-user/anaconda/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 138, in default_collate + return torch.stack(batch, 0, out=out) +RuntimeError: stack expects each tensor to be equal size, but got [8193] at entry 0 and [8246] at entry 1 +``` + +## 问题根因 +在 `megatron/core/datasets/helpers.cpp` 文件里的 `build_sample_idx()` 函数中创建了 `sample_idx` 的 int32 数组去记录每个 sample 的 index, +而每个 sample 的 index 又是以 `doc_idx_index` 这个 int64 的变量去计算,在 `sample_idx[2 * sample_index] = doc_idx_index;` 这个赋值操作中存在溢出的可能。 +在数据集中的句子较短,而要求训练的步数 * Global Batch Size * Sequence Length 较大的情况下就会出现 `doc_idx_index` 超过 int32 的表达范围而导致最终的 index 溢出。 + +## 解决方案 + +#### 规避方案 +1. 减小模型训练步数 + + +#### 推荐方案 +1. 将相关变量修改为 int64 数据类型,具体可查看:[PR](https://github.com/NVIDIA/Megatron-LM/pull/598) + + > 可以在 Megatron-LM 目录下,运行`mindspeed -P`命令,自动完成修改。 + >```shell + > mindspeed -P + >``` +2. 删除 `megatron/core/datasets/` 下面的 `helpers.cpython-xx-xxx-linux-gnu.so` 文件。 +3. 删除已生成的数据集缓存文件夹,例如 `enwiki/my-t5_text_sentence/cache/GPTDataset_indices`。 + + +## 备注 +此问题为 Megatron-LM 原生问题,CPP 代码难以通过 monkey patch 的方式进行修改。已多次提交修复 PR,但似乎 Megatron-LM 较为封闭,无人管理且不接受来自社区的代码提交。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/faq/megatron070_grad_norm_nan.md b/model/train/yoco_moe/docs/faq/megatron070_grad_norm_nan.md new file mode 100644 index 000000000..cf4f2f97c --- /dev/null +++ b/model/train/yoco_moe/docs/faq/megatron070_grad_norm_nan.md @@ -0,0 +1,51 @@ +# megatron0.7.0版本长稳测试出现grad norm为nan +## 问题现象 +在megatron0.7.0版本中,采用mindspeed自定义`--tokenizer-type PretrainedFromHF`, 长稳测试一定步数后发现loss抖动异常最终出现grad norm为nan的问题,报错示例如下: +``` +2024-09-18 11:14:247 iteration 427/ 5000 consumed samples: 6832 elapsed time per iteration ( +ms): 209.8 | Learning rate: 1.229919E-06 | global batch size: 16 | Lm loss: 8.567080E+00 | loss scale: 1.0 | gr +ad norm: 35.518 | number of skipped iterations: О | number of nan iterations: 0 +[2024-09-18 11:14:25] iteration 428/ 5000] consumed samples: 6848 elapsed time per iteration ( +ms): 210.5 | Learning rate: 1.229826E-06 | global batch size: _ 16 | lm loss: 7.180392E+00 | loss scale: 1.0 | gr +ad norm: 36.838 ] number of skipped iterations: О | number of nan iterations: +Traceback (most recent call last): +File "pretrain_gpt.py”, line 247, in +pretrain( +File "/home/Megatron-LM/megatron/training/training.py”, Line 274, in pretrain +iteration, num floating point operations so far = train( +File "/home/Megatron-LM/megatron/training/training.py”, Line 1027, in train +train step(forward step func, +File "/home/Megatron-LM/megatron/training/training.py”, Line 550, in train_step +losses reduced = forward backward func( +File "/home/Megatron-LM/megatron/core/pipeline parallel/schedules.py”, line 1400, in forward backward +pipelining without interleaving +config.finalize model grads func( +File "/home/Megatron-LM/megatron/core/distributed/finalize model_grads.py”, Line 113, in finalize mode +l grads +model chunk.finish grad sync() +File "/home/Megatron-LM/megatron/core/distributed/distributed data parallel.py”, Line 248, in finish_g +rad sync +buffer.finish grad sync() +File "/home/Megatron-LM/megatron/core/distributed/param and_grad buffer.py”, Line 513, in finish_grad +sync +bucket.finish grad sync() +File "/home/Megatron-LM/megatron/core/distributed/param and_grad buffer.py”, Line 151, in finish_grad +sync +self.start grad sync() +File “/home/Megatron-LM/megatron/core/distributed/param and grad buffer.py”, Line 114, in start_grad_s +ync +assert not norm.isnan( ), ( +AssertionError: Rank 13: found NaN in local grad norm in backward pass before data-parallel communication collectie +ve. Device: 5, node: node-15-11 +``` + +## 问题根因 + +1. 问题场景使用的数据集生成时,增加了`--append-eod`参数,这会让每个数据sample末尾增加一个eos结束标志位; +2. megatron0.7.0对数据集提取过程增加了pad功能(在`class GPTDataset`类中),`PretrainedFromHF`模式下,会将pad标志位与eos标志位配成相同值(`pad_token_id == eos_token_id`)。loss_mask中会去掉pad标志位,但实际去掉的都是eos标志位。 +3. 以上两个原因综合导致了grad norm为nan的问题,这个问题是megatron原生问题,相同配置下实测GPU中也会报错。 + + +## 解决方案 + +在`--tokenizer-type PretrainedFromHF`模式下,不使用`--append-eod`生成数据集 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/faq/torch_extensions.md b/model/train/yoco_moe/docs/faq/torch_extensions.md new file mode 100644 index 000000000..049febfef --- /dev/null +++ b/model/train/yoco_moe/docs/faq/torch_extensions.md @@ -0,0 +1,22 @@ +# Torch extensions 卡住 +## 问题现象 +在模型运行时,卡在如下场景,且等待十几分钟无反应。 +``` +Using ~/.cache/torch_extensions/py38_cpu as PyTorch extensions root... +Using ~/.cache/torch_extensions/py38_cpu as PyTorch extensions root... +Using ~/.cache/torch_extensions/py38_cpu as PyTorch extensions root... +Using ~/.cache/torch_extensions/py38_cpu as PyTorch extensions root... +Using ~/.cache/torch_extensions/py38_cpu as PyTorch extensions root... +Using ~/.cache/torch_extensions/py38_cpu as PyTorch extensions root... +Using ~/.cache/torch_extensions/py38_cpu as PyTorch extensions root... +Using ~/.cache/torch_extensions/py38_cpu as PyTorch extensions root... +``` + +## 问题根因 +此问题为 Pytorch extension 编译问题,编译开始前其中一个线程会生成 `.lock` 文件对编译文件夹进行锁定,其他线程会进行等待。 +如果因为其他原因导致编译的线程中途被强制结束,`.lock` 文件不会被清除,导致第二次编译开始时,所有的线程看到存在 `.lock` 文件,就都会开始进行等待。 + + +## 解决方案 + +删除 `~/.cache/torch_extensions/py38_cpu` 文件夹,再重新启动程序。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/Automatic_Parallelism.md b/model/train/yoco_moe/docs/features/Automatic_Parallelism.md new file mode 100644 index 000000000..2cfd65c87 --- /dev/null +++ b/model/train/yoco_moe/docs/features/Automatic_Parallelism.md @@ -0,0 +1,157 @@ +## Automatic Parallelism + +## 问题分析 + +当前主流的大模型并行训练方法有PP、TP、DP、SP、CP、Ulyssess Parallel(UP)、VPP、EP等,在内存、计算、通信方面都有不同的优化,直接叠加。大模型端到端训练性能由模型结构、集群规模、并行配置、batch_size等因素共同决定,在调优时需要综合考虑。当前并行配置人工调优需要大量的专家经验、人工分析和实验调优,预计数天~数周,实验成本高。相似模型的最优并行配置也并不相同,仍需花费时间进行优化。随着搜索空间变大,依赖手工调优变的不可行。例如,llama65B模型在4*8的集群规模下,仅考虑PP、TP、DP、SP、VP、mbs六个维度,配置组合有812种,手工调优时间成本太高。因此,需要构建自动并行系统根据模型结构和集群规模给用户自动推荐一个性能较优的并行配置策略。 + +## 解决方案 + +针对该问题场景提出多维并行配置自动寻优算法,在给定模型结构、集群配置的条件下,用户仅需要在启动脚本中配置相关参数即可启动多维并行配置自动寻优,在规定时间内找到较优的并行配置推荐给用户。算法原理图如下: + +* **内存自适应感知的搜索空间构建**:考虑模型结构和集群信息约束,采用内存灰盒模型排除OOM并行配置,缩小搜索空间; +* **基于算子不确定性估计的高保序性Cost Model建模方法**:引入低保真数据(单算子调用)作为先验信息,结合算子整网性能数据构建算子执行耗时的不确定性模型,结合通信耗时根据并行策略合成得到端到端性能的概率分布模型。 +* **基于概率匹配的高效搜索算法**:基于Thompson Sampling方法探索并行策略,以高概率探索高价值并行配置,提高探索效率,灵活支持探索早停,提高易用性。 + + +![1](../../sources/images/auto_parallel_1.png) + +**并行配置的支持情况:** + +已支持搜索的并行配置维度: + +- [x] PP +- [x] TP +- [x] DP +- [x] CP +- [x] DeepSpeed-Ulyssess +- [x] Megatron-SP +- [x] mbs + +正在支持的并行配置维度: + +- [ ] MOE +- [ ] VP +- [ ] 自适应重计算 + +## 使用方法 + +在使用多维自动并行特性时,**需使用python作为脚本启动器,在所有的节点上拉起脚本**,并配置多维自动并行相关的参数。相关参数及其函数如下表所示: + +| 参数名 | 参数含义 | +| ---------------- | ------------------------------------------------- | +| --auto-parallel | 多维自动并行特性总开关 | +| --nodes | 集群中节点的个数 | +| --nproc-per-node | 每个节点中计算设备的个数 | +| --master-addr | 集群中主节点的IP地址 | +| --master-port | 用于通信的端口号,各节点需要配置相同的端口号 | +| --node-rank | 集群中节点的rank,主节点为0,其他节点为1,2,······ | + +注:算法最长搜索时间为8小时,支持灵活提前退出,无需人工干预。 + +下面是基于llama7B模型的配置示例: + +```shell +#!/bin/bash +export CUDA_DEVICE_MAX_CONNECTIONS=1 +export NPU_ASD_ENABLE=0 +source /usr/local/Ascend/ascend-toolkit/set_env.sh + +MASTER_ADDR=localhost +MASTER_PORT=6001 +GPUS_PER_NODE=8 +NNODES=1 +NODE_RANK=0 + +LOAD_CHECKPOINT_PATH=./ckpt +SAVE_CHECKPOINT_PATH=./ckpt +DATA_PATH={your dataset path} +TOKENIZER_MODEL={your tokenizer model path} +TP=1 +PP=8 + +DISTRIBUTED_ARGS=" + --nproc_per_node $GPUS_PER_NODE \ + --nnodes $NNODES \ + --node_rank $NODE_RANK \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT +" + +GPT_ARGS=" + --tensor-model-parallel-size ${TP} \ + --pipeline-model-parallel-size ${PP} \ + --sequence-parallel \ + --num-layers 32 \ + --hidden-size 4096 \ + --ffn-hidden-size 11008 \ + --num-attention-heads 32 \ + --tokenizer-type Llama2Tokenizer \ + --tokenizer-model ${TOKENIZER_MODEL} \ + --seq-length 2048 \ + --max-position-embeddings 2048 \ + --micro-batch-size 4 \ + --global-batch-size 256 \ + --make-vocab-size-divisible-by 1 \ + --lr 1.0e-6 \ + --train-iters 5000 \ + --lr-decay-style cosine \ + --untie-embeddings-and-output-weights \ + --disable-bias-linear \ + --attention-dropout 0.0 \ + --init-method-std 0.01 \ + --hidden-dropout 0.0 \ + --position-embedding-type rope \ + --normalization RMSNorm \ + --use-fused-rmsnorm \ + --swiglu \ + --use-flash-attn \ + --no-masked-softmax-fusion \ + --attention-softmax-in-fp32 \ + --min-lr 1.0e-7 \ + --weight-decay 1e-1 \ + --lr-warmup-fraction 0.01 \ + --clip-grad 1.0 \ + --adam-beta1 0.9 \ + --initial-loss-scale 65536 \ + --adam-beta2 0.95 \ + --no-gradient-accumulation-fusion \ + --load ${LOAD_CHECKPOINT_PATH} \ + --no-load-optim \ + --no-load-rng \ + --fp16 +" + +DATA_ARGS=" + --data-path $DATA_PATH \ + --split 100,0,0 +" + +OUTPUT_ARGS=" + --log-interval 1 \ + --save-interval 10000 \ + --eval-interval 1000 \ + --eval-iters 0 \ +" + +SEARCH_ARGS=" + --auto-parallel \ + --nnodes $NNODES \ + --nproc-per-node $GPUS_PER_NODE \ + --master-addr $MASTER_ADDR \ + --master-port $MASTER_PORT \ + --node-rank $NODE_RANK \ +" + +python pretrain_gpt.py \ + $GPT_ARGS \ + $DATA_ARGS \ + $OUTPUT_ARGS \ + $SEARCH_ARGS \ + --distributed-backend nccl \ + | tee logs/search_llama_7b.txt +``` + +## 使用效果 + +![2](../../sources/images/auto_parallel_2.png) + diff --git a/model/train/yoco_moe/docs/features/activation-function-recompute.md b/model/train/yoco_moe/docs/features/activation-function-recompute.md new file mode 100644 index 000000000..976391342 --- /dev/null +++ b/model/train/yoco_moe/docs/features/activation-function-recompute.md @@ -0,0 +1,94 @@ +# 激活函数重计算 + +## 问题分析 + +现有的大模型训练框架中,重计算和反向计算是绑定在一起调度的,这严重限制了重计算的灵活性。在某些场景下,会限制重计算在模型性能上的优化。 + +比如在模型中存在某个流程: + +前向:gelu激活函数模块->后续模块A。 + +反向:后续模块A的反向(需要gelu输出的激活值)->gelu反向(与重计算绑定)。 + +gelu激活函数会产生大量的数据,但本身计算量很小。此时进行激活函数的重计算可以在性能劣化极少的代价下,减少内存占用。 +但在现有重计算框架下,如果对gelu激活函数模块做重计算,并不能节省gelu函数的输出。这是因为在反向时,模块A所需要的gelu输出的激活值,会早于gelu激活函数模块的重计算流程,所以前向必须保留激活函数的输出,导致激活函数的输出并不能节省下来。 + + +## 解决方案 + +本特性重新实现了一套重计算框架,可以将重计算灵活地插入到反向计算之前的任意位置。 + +反向(新框架): + +gelu函数重计算->后续模块A的反向。 + +此时,gelu函数的输出已经早于模块A的反向,在前向时就无须保留gelu函数的输出值。 + +## 解决思路 + +通过设计一种传入模块函数进行重计算的机制,在合适的时机,丢弃重计算模块输出的物理存储,保留逻辑视图。在反向时,在恰当时机,利用register_hook插入重计算流程,并利用传入的函数重新进行计算,得到结果。 + +例如,gelu在mlp中的位置下图所示。反向计算需要前向产生的a,b,c, d。其中b, c的shape为(batch, seq , 4 * hidden_szie),gelu为激活函数,其计算较少,故可将tensor c释放掉,反向在 4h->h 反向前重新计算。 + +![现有框架](../../sources/images/activation_function_a.png) + +在前向4h->h计算完毕后,将c释放,保留逻辑视图。在4h->h grad前,需要将c计算回来。这里使用给d打tensor_hook的方式来进行重计算的插入,如下图所示: + +![新框架](../../sources/images/activation_function_b.png) + +## 使用场景 + +主要用于训练场景,用户内存不足或要节省内存时。 + +## 使用方法 + +脚本中添加:`--recompute-activation-function` 可开启激活函数重计算。 + +添加:`--recompute-activation-function-num-layers ${num}` 可指定激活函数重计算的层数。 + +激活函数重计算可以与全重计算同时开启: + +1.同时开启时,仅支持 `--recompute-method 为 block` + +2.同时开启时,会按照指定的全重计算和激活函数重计算的层数做各自类型的重计算,即不会有一层既做全重计算又做激活函数重计算。 + +(注意点:执行优先级是先计算全重计算层,后计算激活函数重计算层。在流水线并行未开启的情况下,全重计算层数和激活函数重计算层数之和应该等于总层数。) + +3.暂不兼容自适应重计算特性。 + +## 使用效果 +激活函数重计算在llama2-7B场景下,根据模型配置不同,收益也会发生改变。 +在不同参数场景下,激活函数重计算收益表现如下: +| 模型参数 | 设备数 | 内存收益 | +|-----------------------------------------------------------------------------|----------|-------------| +| seq-length=12288、micro-batch-size=2、TP=4、DP=2 | 8卡(单机) | 8.05G | +| seq-length=8096、micro-batch-size=2、TP=4、DP=2 | 8卡(单机) | 5.31G | +| seq-length=16384、micro-batch-size=2、TP=4、DP=2 | 8卡(单机) | 12.49G | +| seq-length=12288、micro-batch-size=1、TP=4、DP=2 | 8卡(单机) | 4.04G | +| seq-length=12288、micro-batch-size=2、TP=8、DP=1 | 8卡(单机) | 4.02G | + +## 扩展使用 + +本特性引入的 CheckpointWithoutOutput 类可以自定义对任何模块进行重计算,并且在合适的时机进行重计算恢复。 + +此处提供一个示例,可以灵活使用 CheckpointWithoutOutput 来对自定义的函数进行重计算: + +(注意:如要使用register_hook,需要确保张量有梯度) +```python +from mindspeed.core.tensor_parallel.random import CheckpointWithoutOutput + + +class Custom_module(torch.nn.Module): + def __init__(self): + ...... + + def forward(self, input): + self.activation_checkpoint_manager = CheckpointWithoutOutput() + function_output = self.activation_checkpoint_manager.checkpoint(self.custom_function, False, function_input1, function_input2, ...) + ...(after used output) + self.activation_checkpoint_manager.discard_output() + if module_output.requires_grad: + module_output.register_hook(self.activation_checkpoint_manager.recompute) + + return module_output +``` \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/adaptive-memory.md b/model/train/yoco_moe/docs/features/adaptive-memory.md new file mode 100644 index 000000000..e85267dbe --- /dev/null +++ b/model/train/yoco_moe/docs/features/adaptive-memory.md @@ -0,0 +1,71 @@ +# 自适应内存优化 + +## 问题分析 + +在大模型训练中,重计算特性可以有效的减少显存使用,但是策略较为固定,无法最大限度使用显存资源。 + +## 解决方案 + +为了在最大限度地利用NPU显存的同时,提高模型训练的性能,我们支持通过自动调整训练内存大小来自动选择计算策略。这一特性称为自适应内存优化。 + +### 解决思路 + +自适应内存优化设计主要包括:自适应无损swap、 策略生成、策略搜索、SwapManager功能以及内存管理等几部分。 + +- 策略生成依赖自适应无损swap去生成策略 + +- 策略搜索依赖SwapManager功能及时将tensor换到CPU,避免OOM导致训练中断。 + +自适应内存优化策略流程如下图所示: + +

+ +SwapManager功能需要内存管理适配PTA的NPUPluggableAllocator接口拦截OOM,让SwapManager功能可以介入,流程如下图所示: +

+ +## 使用场景 + +该特性主要用于训练场景,如果用户发现开启了全重计算功能后, NPU显存剩余较多,此时若想充分利用显存,从而提高训练性能,可以考虑开启该特性。 + +## 使用方法 + +在训练脚本中添加`--adaptive-memory-optimization` + +注意: +1. 当前自适应内存优化与全重计算、自适应选择重计算、预取特性swap-attention、 recompute-in-bubble等不兼容。 +2. 目前自适应内存优化已能够管理一部分使用torch.autograd.Function修饰的auto_function类 + + - 在调用auto_function的文件中 添加 `from mindspeed.core.memory.adaptive_memory.adaptive_memory_function import adapt_mem_func_wrapper` + - 将 `auto_function.apply(*args)` 修改为 `adapt_mem_func_wrapper(auto_function, *args)` + - 以mindspeed.moe.pipe_experts中的PipeExpert类的调用为例,在mindspeed.moe.moe_layer文件中添加`from mindspeed.core.memory.adaptive_memory.adaptive_memory_function import adapt_mem_func_wrapper`,将`expert_output = PipeExpert.apply(*args)`修改为`expert_output = adapt_mem_func_wrapper(PipeExpert, *args)` + +## 使用效果 + +这里的gpt-175B是经过裁剪后的 + +gpt-175B: + +| 特性 | 参数 | NPU卡数 | TFLOPs | 收益 | +|------------|---------------------------------------------------------------------------------------------------------------------------|----------|-------------| -------------| +| adaptive-memory-optimization | seq-length=8192、mico-batch-size=10、global-batch-size=40、TP=8、PP=1、DP=1、CP=1、NL=8、hidden-size=12288 | 8卡(单机) | 165.90 | - | +| 全重计算 | seq-length=8192、mico-batch-size=10、global-batch-size=40、TP=8、PP=1、DP=1、CP=1、NL=3、hidden-size=12288、recompute-num-layers=3 | 8卡(单机) | 145.93 | 13.68% | + + +| 特性 | 参数 | NPU卡数 | TFLOPs | 收益 | +|------------|---------------------------------------------------------------------------------------------------------------------------|----------|--------|--------| +| adaptive-memory-optimization | seq-length=8192、mico-batch-size=3、global-batch-size=9、TP=2、PP=4、DP=1、CP=1、NL=8、hidden-size=12288 | 8卡(单机) | 76.30 | - | +| 全重计算 | seq-length=8192、mico-batch-size=3、global-batch-size=9、TP=2、PP=4、DP=1、CP=1、NL=8、hidden-size=12288、recompute-num-layers=1 | 8卡(单机) | 66.50 | 14.17% | + +| 特性 | 参数 | NPU卡数 | TFLOPs | 收益 | +|------------|---------------------------------------------------------------------------------------------------------------------------------|----------|--------|--------| +| adaptive-memory-optimization | seq-length=8192、mico-batch-size=2、global-batch-size=8、TP=2、PP=4、VPP=2、DP=1、CP=1、NL=8、hidden-size=12288 | 8卡(单机) | 86.10 | - | +| 全重计算 | seq-length=8192、mico-batch-size=2、global-batch-size=8、TP=2、PP=4、VPP=2、DP=1、CP=1、NL=8、hidden-size=12288、recompute-num-layers=1 | 8卡(单机) | 75.10 | 14.65% | + +## 注意事项 + +1. 由于自适应内存优化与内存碎片优化两个特性都修改了PyTorch内存管理模块,这两个特性都打开会存在冲突,mindspeed进行了assert判断。 +2. 由于自适应内存优化依赖cpu的绑核,因此需要保证运行环境内含有npu-smi以及lspci命令。 +安装命令:yum install pciutils + + + diff --git a/model/train/yoco_moe/docs/features/adaptive-recompute.md b/model/train/yoco_moe/docs/features/adaptive-recompute.md new file mode 100644 index 000000000..a2f44bda4 --- /dev/null +++ b/model/train/yoco_moe/docs/features/adaptive-recompute.md @@ -0,0 +1,46 @@ +# 自适应选择重计算 + +## 问题分析 + +重计算特性可以有效的减少显存使用,但是策略较为固定,无法最大限度使用显存资源。 + +## 解决方案 + +为了在最大限度地利用计算设备显存的同时,提高模型训练的性能,我们支持通过自动调整训练内存大小来自动选择重新计算策略。这一特性称为自适应选择重计算。 + +### 解决思路 + +自适应选择重计算设计主要包括重计算策略搜索、SwapManager 功能和内存管理三大部分。 + +其中重计算策略搜索依赖 SwapManager 功能及时将 tensor 换到 CPU,避免 OOM 导致训练中断。 + +自动选择重计算策略流程如下图所示: + +

+ +SwapManager 能需要内存管理适配 PTA 的 NPUPluggableAllocator 接口拦截 OOM,让 SwapManager 功能可以介入,流程如下图所示: +

+ +## 使用场景 + +该特性主要用于训练场景,如果用户发现开启了全重计算功能后, NPU显存剩余较多,此时若想充分利用显存,从而提高训练性能,可以考虑开启该特性。 + +## 使用方法 + +1. 在训练脚本中添加`--adaptive-recompute-device-swap`。 +2. (可选)支持手动调整训练内存大小来自动选择重计算策略,请使用`--adaptive-recompute-device-size`进行设置来指定自适应选择重计算策略的训练内存大小(单位:MB)。内存>0为有效内存,最大内存限度为device最大内存。在该范围内自适应重计算才可以进行最优策略搜寻,不在有效内存范围内会使用读取到的device最大内存信息作为默认值。需要注意的是内存设定较小时,性能会与全重计算一致。该方式如果发生OOM,您需要重新选择一个新的内存值来重启模型训练。您也可以通过二分法的方式获得最优解,对该特性不熟悉请勿使用此选项。 +3. (可选)支持设置停止profiling的训练step,请使用`--adaptive-recompute-profiling-step`进行设置。该参数需要设置为>0的整数。默认在第10步停止profiling。若该值<=0,则采用默认值10,推荐设置该值>5。当step<5或者>总步数的1/10时,会有告警信息,但不影响正常训练,不会对性能和精度有任何影响。 +4. 此特性暂只适用于`--use-legacy-models`。 + +## 使用效果 + +相比全重计算,Llama2-7B场景下,性能提升约 16.29%,Llama2-13B 性能提升约12.05%。 + +## 注意事项 + +- 自适应选择重计算当前暂只适用于`--use-legacy-models`。 +- 当前自适应选择性重计算与全重计算、选择重计算、重计算独立调度流水线ripipe、激活函数重计算、预取特性swap-attention等特性均不兼容。 +- 由于自适应选择重计算特性修改了PyTorch内存管理模块,打开会存在冲突,mindspeed进行了assert判断。 +- 当使用`--adaptive-recompute-device-swap`时,用户可以通过环境变量`MIN_SWAP_TENSOR_SIZE`来指定允许被换出tensor的最小大小(最小可为1024),如不指定,则默认为1024 +- 当使用`--adaptive-recompute-device-swap`时,用户可以通过环境变量`SWAP_SIZE_MULTIPLE`来指定换出大小与malloc大小的比值(最小可为1),如不指定,则默认为1 +- 自适应重计算通过实现自己的allocator来实现对OOM的拦截,此allocator仍然支持PTA的环境变量`PYTORCH_NPU_ALLOC_CONF`,用户可以参考[此处](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha001/apiref/envref/envref_07_0053.html)来配置该环境变量。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/alibi.md b/model/train/yoco_moe/docs/features/alibi.md new file mode 100644 index 000000000..26356224c --- /dev/null +++ b/model/train/yoco_moe/docs/features/alibi.md @@ -0,0 +1,33 @@ +# Alibi 位置编码 + +## 问题分析 + +当大模型在训练和预测时的输入长度不一致时,模型的泛化能力会下降。若外推能力不佳,大模型在处理长文本或多轮对话时的效果就会受到限制。正弦位置编码的外推能力比较弱,RoPE(Rotary Position Embedding)的外推能力有一定提高但仍然有限。 + +## 解决方案 + +支持Alibi位置编码,提高模型外推能力。 + +### 解决思路: + +Alibi算法给attention score添加了一个预设的线性偏置矩阵(如下图所示),使模型能够理解输入之间的相对位置关系。由于位置信息直接作用于attention score上,位置差异性被突出,使模型具有较强的外推能力。 +

+ +[原文链接](https://arxiv.org/pdf/2108.12409) + + +## 使用方法 + +(1)对于不开启`--use-fusion-attn-v2`特性的情况,设置`--position-embedding-type alibi`即可调用该算法。 + +(2)对于开启`--use-fusion-attn-v2`特性的情况设置,需要设置`--position-embedding-type alibi`和`--alibi-fusion-attn-type 2`(支持0,2,3)。 +0表示生成alibi后传入,1暂不开放, 2和3表示核内生成, 3做pse的时候会做sqrt。 +如果要设置alibi为对角线对称取反,则需设置`alibi_diagonal_opposite`,反之(亦是默认情况,且与2和3时核内生成一致)无需进行设置。 + +(3)目前alibi位置编码已经支持ring-attention长序列并行,当前只支持mask为causal的场景,以及 `--alibi-fusion-attn-type` 为2,3的压缩模式。暂不支持ulysses长序列并行和混合长序列并行。 + +(4)开启`--use-fusion-attn-v2`特性和长序列并行时,alibi编码不支持开启dropout。 + +## 使用效果 + +模型外推能力提高。 diff --git a/model/train/yoco_moe/docs/features/ampipe.md b/model/train/yoco_moe/docs/features/ampipe.md new file mode 100644 index 000000000..c5d85b23f --- /dev/null +++ b/model/train/yoco_moe/docs/features/ampipe.md @@ -0,0 +1,59 @@ +# Ampipe流水通信隐藏 + +## 问题分析 + +MoE模型中引入了alltoall通信算子,用于在ep组中不同rank间交换token。在MoE层前向过程中,专家mlp部分前后各有一个alltoall通信算子,且计算与通信为串行执行,需要减少这部分通信的时间,提升训练性能。 + + +## 解决方案 + +ampipe将transformer模型中从attention到mlp部分的通信和计算的输入切分为多份,每一份数据之间互相独立不存在依赖,使得各个部分的计算和通信可以循环流水并行,同时调整计算和通信的算子执行顺序,实现计算和通信并行达到掩盖通信的目的。 + +![原理图](../../sources/images/ampipe.png) + +论文参考: +https://openreview.net/pdf?id=yLgr02IsXY + +## 解决思路 +1. 从attention的输入开始切分,q和attention_mask在seq序列维度进行切分, k, v保持完整输入,可以使得切分attention后再拼接结果等价。 +2. attention之后的dropout、残差、norm归一化以及MLP等计算在seq序列维度上均独立,切分后再拼接结果同样可以等价,所以在中间各个部分不需要拼接,直到所有计算完成后再拼接结果即可。 +3. 切分后重新编排各个切分副本循环流水的顺序,使得计算和通信并行。 +4. 针对主流的megatron的序列并行sequence-parallel以及长序列并行的context-parallel进行适配,可以实现sp开启时mlp部分的all-gather和reduce-scatter通信隐藏。 + +## 使用场景 + +在训练MoE模型时,可以开启ampipe特性。 +推荐在`--seq-length`序列长度较长时开启特性,可以获得更好的性能提升。 + +## 使用方法 + +1. 在训练脚本中添加`--ampipe-degree N`即可使能ampipe特性,N为切分数。 +2. 推荐开启`--ampipe-tp-sp-comm-overlap`,额外掩盖mlp中tp域内通信以达到最佳性能提升。 +3. 支持同时开启ampipe特性(包含1,2中两个特性开关)以及mlp通信隐藏特性`--use-pipe-experts`,单独或同时设置`--pipe-experts-multi-stream`和`--pipe-experts-multi-data N`来叠加使用“多流水线”和“多副本”的特性。 + +限制条件: +1. 需要开启`--moe-model-type deepspeed_moe`以及`--use-flash-attn`的前提下使用特性 +2. 暂不支持`--use-ascend-mc2`、`--overlap-grad-reduce`、`--overlap-param-gather`以及nanopipe `--use-nanopipe`、ripipe `--recompute-in-bubble` `--recompute-in-advance`和自适应选择重计算。 +3. 需要保证设置的`--seq-length`即序列长度可以被`--ampipe-degree`整除,如果需要设置`--sequence-parallel`以及`--context-parallel-size > 1`,需要额外保证设置的`--seq-length`可以被tp和cp整除 +4. 同时开启ampipe特性以及mlp通信隐藏特性时,`--pipe-experts-multi-data N`多副本数量N必须被`--ampipe-degree M`ampipe切分数M整除且N>M,否则`--use-pipe-experts`不生效;同时额外设置`--pipe-experts-multi-stream`时,此限制可以放开至N>=M + +## 使用效果 + +使用该特性可以提升性能。 + +场景:双机16P, sequence_len = 128k, num_layers = 2, num_experts = 4, recompute_method = block, recompute_granularity = full, recompute_num_layers = 2, hidden_size = 12288, moe_router_topk = 2, ep = 2, tp = 8, dp = 1, cp = 2, pp = 1, sp = True + + +| 对比场景 | ampipe-degree | ampipe-tp-sp-comm-overlap | multi-stream | multi-data | 平均TFLOPs | 提升幅度 | +|:-----------------------:|:-------------:|:-------------------------:|:------------:|:----------:|:--------:|:-----:| +| baseline | 1 | 关 | 关 | 1 | 120.56 | / | +| pipe-experts(baseline2) | 1 | 关 | 开 | 2 | 124.85 | 3.56% | +| ampipe | 2 | 开 | 关 | 1 | 127.29 | 5.58% | +| ampipe&pipe-experts | 2 | 开 | 开 | 4 | 126.87 | 5.23% | + + +## 注意事项 + +- 在开启`--ampipe-degree N`时,若`N`过大,导致输入数据切分过细,会引入多余的 cast 和 add 算子,导致额外的开销,引起性能劣化。 目前仅推荐开启`--ampipe-degree 2`,在开启`--context-parallel-size` > 1的场景下,仅支持设置`--ampipe-degree 2`。 +- 推荐开启`--ampipe-tp-sp-comm-overlap`,尤其在开启`--sequence-parallel`时,可额外掩盖mlp中tp域内通信以达到最佳性能提升。 +- 与部分通信隐藏特性冲突,暂时不支持,参考使用方法中的限制条件。 diff --git a/model/train/yoco_moe/docs/features/async-ddp-param-gather.md b/model/train/yoco_moe/docs/features/async-ddp-param-gather.md new file mode 100644 index 000000000..c99c621c7 --- /dev/null +++ b/model/train/yoco_moe/docs/features/async-ddp-param-gather.md @@ -0,0 +1,41 @@ +# 权重更新通信隐藏 + +## 问题分析 + +大模型训练过程中,通常会使用数据并行。在进行梯度更新时,数据并行组中的通信要等反向计算完成后再进行。这样的串行执行顺序会造成计算和通信流存在一定的空闲等待时间,导致执行效率较低。 + +## 解决方案 + +通过计算和通信任务并行的方式来实现相互的流水掩盖。 + +### a. 仅打开 `--use-distributed-optimizer` +仅打开分布式优化器时(`--use-distributed-optimizer`),运行流程如下图所示,前向和反向计算完成后,会有独立的通信时间,进行梯度的reduce-scatter、计算权重、进行权重的all-gather,获得权重之后再进入下一轮的前向计算。 +

+ +### b. 打开 `--use-distributed-optimizer` 和 `--overlap-grad-reduce` +在打开`--use-distributed-optimizer`的同时打开`--overlap-grad-reduce`,运行流程如下图所示,对梯度的reduce-scatter过程与反向计算过程并行,从而节省了单独的reduce-scatter过程,提高了计算-通信并行效率。 +

+ +### c. 打开 `--use-distributed-optimizer` 和 `--overlap-grad-reduce` 和 `--overlap-param-gather` +在打开`--use-distributed-optimizer`和`--overlap-grad-reduce`的基础上进一步打开`--overlap-param-gather`,运行流程如下图所示,对权重的all-gather过程与下一轮的前向计算并行,从而节省了单独的all-gather过程。 +

+ +以上流程对比发现,打开--overlap-param-gather后,通信与计算完全并行,极大提高了计算-通信并行效率,进而提升了模型训练效率。 + +## 使用场景 + +在数据并行场景可以开启该特性。 + +## 使用方法 + +设置`--overlap-param-gather`即可调用该算法。 +确保同时开启了`--use-distributed-optimizer`和`--overlap-grad-reduce`。 + +## 使用效果 + +使用该特性可以提升性能。 + +## 注意事项 + +开启该特性后,attention层init的顺序会更正为先创建linear_qkv再创建linear_proj,这是为了修复Megatron的错误init顺序,该bug会导致当linear_qkv和linear_proj被分配在不同bucket时,overlap-param-gather可能会在权重未完成更新时进行下一轮前向计算。 +legacy下,`--overlap-param-gather`暂不支持和`reuse_fp32_param`一起使用。 diff --git a/model/train/yoco_moe/docs/features/async-ddp.md b/model/train/yoco_moe/docs/features/async-ddp.md new file mode 100644 index 000000000..4901eb55e --- /dev/null +++ b/model/train/yoco_moe/docs/features/async-ddp.md @@ -0,0 +1,27 @@ +# 异步DDP + +## 问题分析 + +大模型训练过程中,通常会使用数据并行。在进行梯度更新时,数据并行组中的通信(未开启分布式优化器时为AllReduce,开启时为ReduceScatter)要等反向计算完成后再进行。这样的串行执行顺序会造成计算和通信流存在一定的空闲等待时间,导致执行效率较低。 + +## 解决方案 + +通过将计算和通信任务分别拆分成更细粒度的子任务来实现相互的流水掩盖。并行原理如下图所示: +

+ +### 解决思路: + +设置一个Bucket,存储反向计算的结果。每当Bucket存满时立刻执行桶中结果的通信任务,后续反向计算可以和这部分通信并行执行,从而增大计算和通信流的利用率,提高执行效率。 + +## 使用场景 + +使用该特性的前提是模型开启数据并行和虚拟流水并行,脚本中设置了`--num-layers-per-virtual-pipeline-stage N`。 + +## 使用方法 + +设置`--overlap-grad-reduce`即可调用该算法。 + +## 使用效果 + +开启该特性可以提升性能。 + diff --git a/model/train/yoco_moe/docs/features/auto_tuning.md b/model/train/yoco_moe/docs/features/auto_tuning.md new file mode 100644 index 000000000..cf97797e8 --- /dev/null +++ b/model/train/yoco_moe/docs/features/auto_tuning.md @@ -0,0 +1,73 @@ +# 开箱优化-大模型并行策略自动搜索 Auto Tuning 特性说明 + +## 问题背景 + +随着大模型并行训练可配置的参数越来越多,例如DP、TP(以及SP)、PP、ZERO、VPP、CP、EP、mbs、重计算等,内存和性能受到各种配置的影响变得越来越复杂,人工调优变得越来越困难。于是,业界开始尝试一些自动调优的方法,主要思路是基于网络模型的结构进行白盒或者灰盒建模,在建模的指导下结合一些profiling,进行配置参数的搜索。 + +但是,这些方法通常存在以下两个不足之处: + +- 白盒或灰盒的建模**对网络模型的结构进行了假设**,而很多用户都会对模型做出修改,这类建模难以捕捉到模型的变化。例如,仅仅是GQA/MQA的修改,就会让此类建模的内存出现偏差。 +- **profiling的规模和实际的负载规模相同**,当要进行大规模(如千卡)的训练时,profiling的开销会变得很大。 + +因此,我们设计并开发了一种Auto Tuning的特性,该特性和业界已有的自动调优方案相比,完全基于profiling的分析,无需对网络的结构做出假设,并且支持”以小仿大“,即以小规模的profiling预估更大集群上的较优训练配置。 + +## 解决方案 + +Auto Tuning特性完全依赖由profiling得出的黑盒建模,与网络结构的变化解耦,并且支持在小规模集群(如双机)上推测大规模集群的配置。 + +- **阶段1:** 用少量机器拉起auto tuning,该特性会裁剪网络大小,并生成多个profiling的配置,自动多次拉起。这些profiling主要是用作黑盒分析,例如分析配置变化时,哪些tensor会被切分,哪些算子的shape会如何变化,会增加或减少哪些算子等。profiling结束后会对结果文件进行解析,提取出后续黑盒建模需要的信息。 +- **阶段2:** 依据profiling结果进行黑盒建模。内存方面会自动分析各个tensor在不同配置下的切分情况,性能方面会推断算子随不同配置的增减和shape变化,并回归出机内和机间通信的效率。除了基础的性能和内存建模之外,还会分析各个候选重计算模块的性能和内存,从而可以在后续搜索中预估应该选择哪些模块做重计算,以及其对性能和内存的影响。 +- **阶段3:** 根据阶段2得出的建模,进行配置的搜索,给出每个配置下预期的性能和内存。这一步还会依赖一个算子性能知识库,从中查询不同shape的算子的性能。profiling产生的没见过的算子都会被添加到算子性能知识库中。如果某个配置下算子性能知识库覆盖的算子比例小于阈值,则会额外拉起一组profiling,该profiling仍然可以以小仿大,通过同时缩小网络的规模和并行参数,从而得到相同shape的算子。如果算子性能知识库覆盖的算子比例不足以推测算子性能,则未覆盖到的少量算子会通过回归来估计性能。搜索结束后会推荐出内存充足的性能最好的三组配置。 + +已支持的模型: +- [x] llama2-7b +- [x] mixtral-8*7b +- [x] gpt3-15b + +已支持的特性: + +- [x] DP +- [x] TP +- [x] Megatron-SP +- [x] PP +- [x] ZeRO1 +- [x] VPP +- [x] CP (ring attention) +- [x] EP (Deepspeed-MOE) +- [x] MicroBatchSize +- [x] Token重排 +- [x] 重计算 +- [x] MC2 + +未来计划支持的特性: + +- [ ] ZeRO2 +- [ ] EP (Megatron-MOE) +- [ ] swap-attention +- [ ] 激活函数重计算 +- [ ] MoE All2All overlap comm + +## 使用方法 + +在训练脚本的参数列表中加入以下配置开启 Auto Tuning 特性: + +```bash +--auto-tuning \ # 开启 Auto Tuning 特性 +--auto-tuning-work-dir ./auto_tuning_dir \ # 工作目录,在此会保存profiling等文件 +--auto-tuning-ranks 16 \ # 需求搜索的卡数,最低16卡 +--auto-tuning-log-level debug \ # Auto Tuning log记录等级,可选warning, info, debug +--nnodes $NNODES \ # Profiling拉起的节点数,与基线训练脚本保持一致 +--nproc-per-node $GPUS_PER_NODE \ # 每个节点上运行的进程数,一般与单节点卡数相同,与基线训练脚本保持一致 +--master-addr $MASTER_ADDR \ # 主节点IP,与基线训练脚本保持一致 +--master-port 6005 \ # 主节点端口,设置一个与基线脚本不同的端口 +--node-rank $NODE_RANK \ # 与基线训练脚本保持一致 +``` + +## 环境变量 +以下环境变量为 Auto Tuning 控制阶段性 Profiling 所用环境变量开关,**仅为 Auto Tuning 内部使用**,**禁止**在正常训练流程中设置 + +**Auto Tuning会在一个隔离的进程环境中设置以下环境变量,不会export至用户环境中** +- "OOTB_OPTIMIZER_MODIFIED_ARGV_PATH=${WORK_dir}/auto_tuning_modified_argv.json": 修改Profiling拉起配置参数的文件位置 +- "OOTB_OPTIMIZER_PARSE_ARGS=TRUE": 获取硬件相关信息及模型参数 +- "OOTB_OPTIMIZER_PARSE_MODEL=TRUE": 获取模型结构 +- "OOTB_OPTIMIZER_PROFILING=TRUE": 获取完整Profiling信息及自适应重计算Profiling信息 diff --git a/model/train/yoco_moe/docs/features/automated-pipeline.md b/model/train/yoco_moe/docs/features/automated-pipeline.md new file mode 100644 index 000000000..89974e662 --- /dev/null +++ b/model/train/yoco_moe/docs/features/automated-pipeline.md @@ -0,0 +1,44 @@ +# PP自动并行算法 + +## 问题分析 + +流水线并行是将模型网络层切分成多个stage,再把stage映射到不同的设备上,使得不同设备并行计算神经网络的不同部分。流水线并行大大缓解了单卡内存瓶颈问题,并通过多卡之间的流水训练提高了硬件的利用率。流水线并行成为了当前大模型训练最常用的并行方式之一。然而当前流水线并行在内存消耗和性能方面并非最优,主要存在两大问题: + +1)内存不均衡:当前流水线常用调度模式(1F1B)下,靠近模型前面层的流水线stage的内存占用远多于后面的stage内存占用,并且内存占用差距有2~3倍,总体上可训的模型规模受限于PP-Stage 0的显存消耗。 + +2)流水线气泡:流水线1F1B调度策略在每个设备上交替进行小批次数据的前向后向计算,由于各流水设备之间计算负载不均衡或者网络通信的波动,导致设备与设备之间存在等待(流水线气泡),影响训练性能。 + +## 解决方案 +本系统基于在线profiling+PP建模搜索,通过使能内存优化模块、性能优化模块分别最大化流水线并行训练的内存和性能。内存优化模块旨在通过自动寻找流水线并行中stage的最优层分布和细粒度重计算模块,均匀分配每个卡上的显存,优化存在显存瓶颈的PP-stages,降低峰值内存;性能优化模块采用mbs序列和前向反向调度序列自动寻优和多流异步通信机制,压缩流水线气泡,提升训练性能。 + +### 内存优化模块 +基于在线profiling+PP建模搜索,自动构建出最优的内存排布方案均衡化各个stage之间的内存开销,降低峰值内存的同时最小化端到端训练时间,具备较好的易用性和泛化性。具体而言,在层分布和细粒度重计算的联合搜索空间自动寻优内存排布方案: +① PP层分布切分:采用不均匀层切分策略,自动搜索最优层切分方式,均衡化每个卡消耗的显存,从而优化存在显存瓶颈的PP-stages,降低峰值内存。 +② 细粒度重计算:利用流水线气泡时间来做重计算,保证性能不劣化,通过自动寻优细粒度的重计算策略,进一步降低峰值内存。 + +### 性能优化模块 +在满足训练峰值内存开销不超过设备最大内存容量的条件下,通过自动寻找流水线并行中最优的mbs序列及前向反向调度序列,最小化端到端训练时间。 +① 动态mbs:在给定的gbs下,自动搜索最优mbs序列。通过小mbs加速流水线的启动与冷却,压缩气泡时间,稳态阶段自动寻找最高效的mbs进行计算,缩短稳态阶段计算时间,提升端到端训练性能。 +② 前反向调度:通过调整流水线并行过程中前反向计算的顺序,结合多流异步通信机制,压缩流水线稳态气泡,提升训练性能。 + +PP自动并行系统如下图所示: + +

+ + +## 使用场景 + +该系统主要用于开启流水线并行的训练场景,使用PP自动并行系统可有效优化内存不足或流水线气泡占比过大的问题。 +**使用条件:** +1. `--pipeline-model-parallel-size >= 2`; +2. 内存、性能优化模块不能同时使用。 + + +## 使用方法 + +(1)当内存不足时,可启用PP自动并行内存优化模块,请首先在训练脚本中添加 `--automated-pipeline` 标志启用功能。 +(2)当流水线气泡过大导致训练性能不优时,可启用PP自动并行性能优化模块,请首先在训练脚本中添加 `--automated-pipeline-perf` 标志启用功能。 + +## 使用效果 + +PP自动并行内存优化模块收益:LLaMA2-7B,LLaMA-13B,LLaMA2-70B等使用流水线并行PP配置训练的模型,叠加本算法后平均峰值内存减少11.5%,平均性能劣化小于1%。性能优化模块收益:LLaMA2-7B,LLaMA-13B,LLaMA3-8B等使用流水线并行PP配置训练的模型,叠加本算法后平均性能提升7.6%。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/automatic_parallelism_mm.md b/model/train/yoco_moe/docs/features/automatic_parallelism_mm.md new file mode 100644 index 000000000..9d29229d9 --- /dev/null +++ b/model/train/yoco_moe/docs/features/automatic_parallelism_mm.md @@ -0,0 +1,179 @@ +## Automatic Parallelism For Multi-Modal + +## 问题分析 +当前多模态大模型的并行训练方法越来越丰富,主要有TP\PP\DP\CP\VPP等,每种并行方法在计算、内存、通信上均有不同的优势。在当前的生产过程中,主要是使用基于专家经验的人工调优,一般需要数天甚至数周的时间。相似模型或者一个模型的不同训练阶段,最优的并行配置也并不相同。随着并行方法的不断丰富,并行搜索空间不断变大,使用人工调优的方法变得越来越不可行。因此需要构建一个面向多模态大模型的并行配置自动调优算法,可以自动化得根据集群资源、模型结构得出最优的并行方法。 + +## 解决方案 +针对多模态大模型结构丰富,训练阶段多样的特点,我们将网络进行切分和子图归并,然后使用基于黑盒Profiling的方法对多种并行配置采样,最后使用基于整数规划的方法进行非均匀的网络层切分: + +- 采样性能: +遵照多模态大模型原有的训练方法和调用逻辑,将模型进行切分和子图归并,然后使用少量资源进行block级别的性能采样,这样的采样方案兼顾了子图外推的灵活性和采样操作低开销的要求。 +- 端到端建模: +根据性能采样得到的子图性能、内存数据,使用白盒建模的方法得到网络的峰值内存,以及仿真得到的单步迭代时间。 +- 并行策略调优: +根据集群的资源和模型支持的并行策略,构建全量的并行策略搜索空间;针对每种并行策略,将PP非均匀层最优切分问题转化为整数规划问题,联合考虑PP流水调度、内存限制和重计算策略,优化目标为端到端时间最短。遍历所有可行的并行策略,得到最优的并行方案; + +![1](../../sources/images/auto_parallel_mm_1.png) + +## 使用方法 +在使用多维自动并行特性时,**需使用python作为脚本启动器,在所有的节点上拉起脚本**,并配置多维自动并行相关的参数。相关参数及其函数如下表所示: + +| 参数名 | 参数含义 | +| --------------------------- | -------------------------------------------------- | +| --auto-parallel-mm | 多维自动并行特性总开关 | +| --nnodes | 采样集群中节点的个数 | +| --nproc-per-node | 采样集群中每个节点计算设备的个数 | +| --master-addr | 采样集群中主节点的IP地址 | +| --master-port | 采样集群用于通信的端口号,各节点需要配置相同的端口 | +| --node-rank | 采样集群中节点的rank,主节点为0,其他节点为1,2,······ | +| --simulated-nnodes | 待训练集群的节点个数 | +| --simulated-nproc-per-node | 待训练集群每个节点的设备数 | + +下面是基于QWen2VL-72B模型的配置示例: +```shell +#!/bin/bash +export CUDA_DEVICE_MAX_CONNECTIONS=1 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +export ASCEND_GLOBAL_LOG_LEVEL=3 +export TASK_QUEUE_ENABLE=2 +export COMBINED_ENABLE=1 +export CPU_AFFINITY_CONF=2 +export HCCL_CONNECT_TIMEOUT=1200 +export NPU_ASD_ENABLE=0 +export ASCEND_LAUNCH_BLOCKING=0 +export HOST_CACHE_CAPACITY=20 +export ACLNN_CACHE_LIMIT=100000 +export MULTI_STREAM_MEMORY_REUSE=2 +export PYTORCH_NPU_ALLOC_CONF="expandable_segments:True" +# 根据机器实际情况填写 +NPUS_PER_NODE=8 +MASTER_ADDR=localhost +MASTER_PORT=6010 +NODE_RANK=0 +NNODES=1 +WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES)) +echo $MASTER_ADDR +echo $NODE_ADDR +echo $NODE_RANK +echo $NNODES + + +MM_DATA="./examples/qwen2vl/data_72b.json" +MM_MODEL="./examples/qwen2vl/model_72b.json" +MM_TOOL="./mindspeed_mm/tools/tools.json" +LOAD_PATH="ckpt/Qwen2-VL-72B-Instruct" +SAVE_PATH="save_dir" + +TP=4 +PP=2 +CP=1 +SEQ_LEN=1024 +MBS=1 +GRAD_ACC_STEP=32 +DP=$(($WORLD_SIZE/$TP/$PP/$CP)) +GBS=$(($MBS*$GRAD_ACC_STEP*$DP)) + +DISTRIBUTED_ARGS=" + --nproc_per_node $NPUS_PER_NODE \ + --nnodes $NNODES \ + --node_rank $NODE_RANK \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT +" + +GPT_ARGS=" + --use-mcore-models \ + --tensor-model-parallel-size ${TP} \ + --pipeline-model-parallel-size ${PP} \ + --micro-batch-size ${MBS} \ + --global-batch-size ${GBS} \ + --num-layers 80 \ + --hidden-size 8192 \ + --ffn-hidden-size 29568 \ + --num-attention-heads 64 \ + --tokenizer-type NullTokenizer \ + --vocab-size 152064 \ + --seq-length 8192 \ + --max-position-embeddings 32768 \ + --make-vocab-size-divisible-by 1 \ + --init-method-std 0.01 \ + --normalization RMSNorm \ + --use-fused-rmsnorm \ + --swiglu \ + --use-fused-swiglu \ + --lr 1.0e-5 \ + --lr-decay-style cosine \ + --weight-decay 0 \ + --train-iters 5 \ + --lr-warmup-fraction 0.1 \ + --clip-grad 0.0 \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --no-gradient-accumulation-fusion \ + --no-load-optim \ + --no-load-rng \ + --no-save-optim \ + --no-save-rng \ + --seed 42 \ + --bf16 \ + --load $LOAD_PATH \ + --variable-seq-lengths \ + --enable-one-logger \ + --use-distributed-optimizer \ + --reuse-fp32-param +" + +MM_ARGS=" + --mm-data $MM_DATA \ + --mm-model $MM_MODEL \ + --mm-tool $MM_TOOL +" + +SEARCH_ARGS=" + --auto-parallel-mm \ + --nnodes $NNODES \ + --nproc-per-node $NPUS_PER_NODE \ + --master-addr $MASTER_ADDR \ + --master-port $MASTER_PORT \ + --node-rank $NODE_RANK \ + --simulated-nnodes 8 \ + --simulated-nproc-per-node 16 \ +" + +OUTPUT_ARGS=" + --log-interval 1 \ + --save-interval 10000 \ + --eval-interval 10000 \ + --eval-iters 5000 \ + --save $SAVE_PATH \ +" +logfile=$(date +%Y%m%d)_$(date +%H%M%S) +mkdir -p logs + +python pretrain_qwen2vl.py \ + $GPT_ARGS \ + $MM_ARGS \ + $OUTPUT_ARGS \ + $SEARCH_ARGS \ + --distributed-backend nccl \ + | tee logs/train_${logfile}.log 2>&1 + +chmod 440 logs/train_${logfile}.log +``` + +## 调优效果 + +![2](../../sources/images/auto_parallel_mm_2.png) + + +## 搜索结果说明 + +搜索算法的调优结果会存放在执行目录下的 auto_parallel_search_optimal_config.json 文件中,以下表格是对搜索结果的解释 + +| 参数名 | 参数含义 | +| --------------------------- | -------------------------------------------------- | +| parallel_config | 并行配置,包含PP/TP/DP/MBS维度 | +| layer_placement | 层切分配置,其中包含ViT及LLM的PP层切分策略 | +| layer_recompute | 细粒度重计算层数,包含ViT及LLM的重计算层数 | +| e2e_time | 仿真的端到端时间 | +| throughput | 仿真的模型吞吐率 | diff --git a/model/train/yoco_moe/docs/features/communication-over-computation.md b/model/train/yoco_moe/docs/features/communication-over-computation.md new file mode 100644 index 000000000..eb2dd8bd5 --- /dev/null +++ b/model/train/yoco_moe/docs/features/communication-over-computation.md @@ -0,0 +1,82 @@ +# 计算通信并行 CoC (Communication Over Computation) + +## 问题分析 + +大模型训练过程中,其ColumnParallelLinear和RowParallelLinear部分的前反向均存在相互毗邻、顺序依赖的计算通信组合,计算为Matmul,而通信则为AllReduce(不开启序列并行)或AllGather和ReduceScatter(开启序列并行)。这些计算通信的组合因为存在顺序依赖(即后一个的输入是前一个输出),常常被串行执行,但这时候计算和通信流都存在一定的空闲等待时间,该过程的执行效率没有被最大化。 + +## 解决方案 + +通过将计算和通信任务分别拆分成更细粒度的子任务来实现相互的流水掩盖。 + +### 解决思路 + +#### Python脚本侧实现 +将张量进行进一步切分(2/4/8份),通过Python脚本的方式实现每个子tensor之间计算和通信的并行,从而增大计算和通信流的利用率; + + +#### 融合算子实现 +基于MTE远端内存访问能力,以融合大Kernel方式在算子实现的内部将计算和通信任务分别拆分成更细粒度的子任务来实现相互的流水掩盖; + +## 使用场景 +该特性目前主要用于训练场景,当Attention模块和MLP模块串行执行且计算通信存在顺序依赖与位置毗邻关系时适用。 + +使用Python脚本侧实现时,对Matmul左矩阵的m轴有一定要求,必须是切分数(2/4/8)的倍数,且不适用于计算与通信片段耗时相差较大的情况。需要注意的是,脚本侧实现在切分矩阵、切分数量较大时,容易出现host bound问题,从而不能得到预期的收益。支持ALL_REDUCE, ALL_GATHER, REDUCE_SCATTER三个通信场景,支持灵活设置先通信或先计算。 + +对于计算通信融合算子,目前已支持: +1. MATMUL_ALL_REDUCE融合算子(先计算后通信)及其确定性计算; +2. MATMUL_REDUCE_SCATTER融合算子(先计算后通信)及其确定性计算; +3. ALL_GATHER_MATMUL, ALL_GATHER_MATMUL_V2融合算子(先通信后计算)(V2版本接口支持ALL_GATHER中间结果获取); +4. 量化场景:MATMUL_ALL_REDUCE融合算子支持fp16格式的w8A16伪量化,粒度包含per tensor / per channel / per group; + +## 使用方法 + +当前计算通信并行有两种实现方法:python脚本使能、融合算子使能,两者选其一即可。两个方式都需要替换原Megatron框架中的ColumnParallelLinear和RowParallelLinear这两个class的forward函数,替换脚本已经根据MindSpeed指定Megatron版本进行编码和适配,位于mindspeed/core/tensor_parallel/lcal_coc/目录下。 + +请根据需要选择下列两种场景中的一个进行使用。 + +设置--use-ascend-coc使能计算通信并行功能,使用方式通过如下变量进行设置: + +### 1. 使用通过Python脚本使能的计算通信并行特性 + +```shell +--use-ascend-coc +--coc-parallel-num 2 # 或者4,或者8 +``` + +### 2. 使用通过融合算子使能的计算通信并行特性 +注意:计算通信并行融合算子需要安装ATB后才能使用! + +ATB安装方法: + +- 二进制包安装:安装CANN-NNAL包之后, source /usr/local/Ascend/nnal/atb/set_env.sh +```shell +--use-ascend-coc +--coc-fused-kernel # 注意:当前只支持TP=8的场景! +``` + +融合算子的环境变量拥有更高优先级,即当 coc-parallel-num > 1 且 使能coc-fused-kernel时,前者不会生效。 + + +## CFG自定义方法 + +用户可以自定义mindspeed/core/tensor_parallel/lcal_coc/user_config.py中的coc_cfgs字典,来达到自定义COC的部分配置。 + +【只对通过Python脚本使能的计算通信并行实现适用】 +'matmul_soc_friendly':是否对输入matmul的张量做transpose/padding操作,使其以NPU亲和的shape进入Matmul算子从而获得一定性能提升,默认为True; +'customized_coc': 自定义指定shape的matmul的COC切分份数,默认为{}。如果需要设置指定shape的matmul的CoC切分份数为1(不开COC)或与coc-parallel-num不同的值,可以按照这个例子设置: +'customized_coc': {"[16384, 5120, 1920]": 8, "[16384, 1920, 5120]": 1} + +【只对通过融合算子使能的计算通信并行实现适用】 +'enable_coc_in_column_backward': 是否在ColumnParllelLinear的反向中使用COC(ColumnParallelLinear的反向中本来就有非互相依赖的计算通信并行),默认为False; + +【对脚本实现和融合算子实现都适用】 +'recompute_all_gather': 是否在ColumnParallelLinear的反向中重新计算all gather,默认为True。若为False,则将从前向保存all gather结果到反向,会减少反向计算时间但是会增加训练过程中的峰值内存占用; + +## COC融合算子使用效果 + +在BLOOM 7B模型中获得端到端性能收益约3.20%,在BLOOM 176B模型中获得端到端性能收益约5.47%,在LLAMA2 70B模型中获得端到端性能收益约7.85%。精度相对误差控制在2%的范围内。 + +## 注意事项 + +暂不兼容 --use-ascend-mc2 特性 。 +当前暂未适配MoE模型。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/context_parallelism_kv_cache.md b/model/train/yoco_moe/docs/features/context_parallelism_kv_cache.md new file mode 100644 index 000000000..bcf2b6a47 --- /dev/null +++ b/model/train/yoco_moe/docs/features/context_parallelism_kv_cache.md @@ -0,0 +1,66 @@ +# Context Parallelism特性中的KV缓存优化 + +## 问题分析 +Context Parallelism特性在attention计算前将序列在sequence维度切分,在attention计算时,需要计算全量的序列,因此在attention计算时,会有CP的通信产生。 + +1. Ring CP在attention计算时通过send/recv方式循环接收其他卡的KV,最终保证Q能关注到全量KV,保持和不拆解情况的数学等价性。当前,前向计算完成后KV被丢弃,反向时需要再次send-recv拿到kv。 当在短序列计算过程中通信时间会大于计算时间,导致计算时间无法掩盖通信时间的情况,从而导致性能变差。因此,需要针对计算时间不足以掩盖通信时间的情况进行优化来加速改场景下的训练需求。 + +2. Ulysses CP方案在GQA模型下,开启TP后,每个rank通常会只有一个head,在这种情况下,使用All2All的通信量与AllGather通信量相同,而All2All方案在只有一个head的情况下,需要对KV进行repeat,在数据layerout通常为sbh或sbnd的情况下,对h维做repeat,地址不连续,会导致算子存在效率问题,并且需要插入transpose等操作,而allgather直接操作s维,地址连续,无需额外操作。 + +3. Ulysses CP在有repeat产生的情况下,传入attention反向的Key和Value相较于repeat前的Key和Value内存扩大了CP倍,这将会导致内存的消耗增加出现out of memory的情况。 + +## 解决方案 + +1. 在Ring Attention长序列并行的基础上加入KV缓存功能,可选择进行(1)缓存所有K,V,(2)只缓存K以及(3)设置分层缓存的方式在长序列并行的前向中使前向计算接收的kv始终保留至反向计算,直接计算出梯度结果,减少通信时间。 + +2. 在GQA模型,一个head的情况下,Ulysses Attention长序列并行的基础上加入Allgather KV + All2All Q的方案,减少repeat操作以及transpose等内存非连续的开销,提高训练性能。 + +3. 在Ulysses使用All2All和AllGather方案加入KV缓存功能,可选择进行(1)缓存所有K,V,(2)只缓存K以及(3)设置分层缓存的方式在前向中将通信前的KV进行缓存始终保留至反向再进行重通信进行计算,节省内存。All2All方案只能在做了Repeat的情况下可以开启KV缓存。 + +### 解决思路: +1. Ring方案中序列被切分成CP份并行计算,在不同rank上计算出自己的K和V,同时send-recv其他rank的K和V。例如rank0上的K0/V0和K7V7发送给“下游”的rank,同时接收“上游”rank发送过来的K3/V3和K4/V4,每张卡重复执行相同的动作CP-1次,最终每个切分后的序列可以“关注”到全局的KV,计算得到完整attention结果。反向计算逻辑同理,初始时每个rank有自己的KV,在计算出自己的gradient后,之后步骤将接收到的K和V分块以及dK和dV发送给其他rank,同时接收其他rank的K、V分块以及dK和dV分块,并把接收到的K和V作为输入计算和更新梯度,实现计算和通信并行。 +反向过程关键的一点,rank间通信需要发送K、V、dK、dV四个数据块,一共要发送CP-1次,其中K和V在前向已经在各个rank间逐次接收发送,如果在前向过程中将K、V缓存,反向的通信时间将减半。在CP比较大时,缓存全部K、V对内存压力增大,通过支持缓存K、V的一部分,或者每经过N个Layer缓存一次,支持按需灵活配置。 + +2. 在GQA模型,一个head的情况下,使用AllGather KV的通信方式替换原有的Repeat-All2All KV方式获取全量的sequence,对Q仍然使用All2All方案。 + +3. Ulysses方案中,将在前向进行Repeat-All2All或者AllGather通信前的KV进行缓存带到反向,并使用通信后的KV进行计算确保计算的正确性,反向在拿到Repeat-All2All或者AllGather通信前的KV的时候,对KV进行Repeat-All2All或者AllGather重通信进行梯度计算。因为进行重通信会有性能损失,因此可以缓存K、V的一部分,或者每经过N个Layer缓存一次,灵活组合,在内存限制内达到最优的性能。 + +灵活缓存方案如下, +1. 支持配置缓存K、V的layer间隔:缓存部分K、V可通过考虑在不同layer之间进行缓存来实现,通过增加一个参数interval来控制缓存的间隔层数。例如interval=1时,那么就会在编号为0,2,4,...的layer中对K、V进行缓存,依次类推。缓存间隔支持从0开始,不超过rank上的layer数量,间隔默认值等于0。 + +2. 支持缓存K、V的一部分:在每个layer上,可支持只缓存K(K和V的size一样),这种方法通过使用一个参数对其控制,当参数的值为half时,只对K缓存,配置full则缓存K和V,默认缓存K和V。此配置和按layer间隔配置缓存可同时开启,配置后的缓存效果叠加,互不冲突 + +## 使用场景 + +训练过程中开启长序列并行的情况下。 + +需使用FlashAttention,目前已默认开启FlashAttention。 + +在Ring Attention中想要使用KV缓存获得收益,需要使得计算时间小于通信时间,理论上需要确保每个计算块分到的序列长度需要`c < F/B`。其中`F`是每个device的FLOPS,`B`是每个device间的带宽。 + +在Ulysse Attention中,想要使用AllGather KV + All2All Q获得收益,需要使用GQA模型,并且需要在通信量相同的前提下,即KV仅有一个head的情况下。 + +在Ulysses Attention中,想要使用KV缓存获得收益,Repeat-All2All方案需要在使用repeat的情况下,才能获得内存收益,而AllGather KV + All2All Q开启CP即可以获得内存收益。 + +## 使用方法 + +| 重要参数 | 参数说明 | +|------------------------------------------------|----------------------------------------------------------| +| --context-parallel-kv-cache-policy [full/half] | 开启CP前向计算过程缓存KV及其级别,默认full缓存K和V,half缓存K | +| --context-parallel-cache-interval [int] | 设定执行CP前向计算过程缓存KV的layer间隔层数,默认为0,即每一个layer都需要缓存,根据用户需求配置。 | +| --use-ulysses-allgather-kv | 设定Ulysses Attention启用AllGather方案,默认为False,不启用。 | + +## 使用效果 + +在Ring Attention中计算时间无法掩盖通信时间的场景下,开启KV缓存特性会使得训练时间变短,提升训练性能,但会导致内存增加。 + +在Ulysses Attention中开启AllGather KV,在允许的场景下,会使得训练时间变短,提升训练性能。 + +在Ulysses Attention中开启KV缓存,在Repeat-All2All做了Repeat的情况下,内存使用会减少,但会导致性能下降。 AllGather情况下,内存使用会减少,但会导致性能下降。 + +## 注意事项: + +1. 开启--context-parallel-kv-cache-policy时需要同时开启Context Parallel,否则特性不支持。 +2. 开启--context-parallel-cache-interval时需要同时开启--context-parallel-kv-cache-policy并且interval的值需要小于layer的数量,否则特性不支持。 +3. 开启--use-ulysses-allgather-kv时需要开启Context Parallel且设置--context-parallel-algo ulysses_cp_algo,并且需要开启--group-query-attention,且KV每个rank的head数量为1, 否则特性不支持。 +4. 开启--context-parallel-kv-cache-policy以及--context-parallel-algo ulysses_cp_algo的情况下,需要使KV做Repeat操作,否则特性不支持。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/conv3d_sequence_paralle.md b/model/train/yoco_moe/docs/features/conv3d_sequence_paralle.md new file mode 100644 index 000000000..dc4b99bc2 --- /dev/null +++ b/model/train/yoco_moe/docs/features/conv3d_sequence_paralle.md @@ -0,0 +1,36 @@ +# conv3d 序列并行 +## 问题分析 +在多模态、机器视觉等领域的模型结构中经常会采用conv3d模块用于特征图的三维卷积操作。在大模型中,卷积操作的耗时会随着特征图规模的增加而增加。
+由于特征图的每一个卷积区块的卷积过程是顺序执行,但实际上各个区块的执行顺序并不存在先后顺序上的约束关系。在分布式训练中需要对三维卷积操作进行并行化处理来提高卷积速度。
+ +## 解决思路 +构造Conv3DSequenceParallel类,将输入特征图按照卷积核的depth维度进行切分后进行并行卷积。
+- **前向过程** :
+ 构造Conv3DSequenceParallel类,将输入特征图按照卷积核的depth维度进行切分,分发到不同的进程组中进行conv3d三维卷积操作,将卷积结果进行gather操作后输出到下游模块。
+- **反向过程** :
+ Conv3DSequenceParallel类会将下游反向得到的梯度进行split操作,实现梯度的depth维度进行切分,分发到并行的三维卷积模块上进行反向传播,再将并行的三维卷积模块的反向梯度进行gather操作后输出到上游模块。
+![](../../sources/images/conv3d_sequence_parallel.png) +## 使用场景 +训练含有conv3d(非padding模式)模块的模型。
+ +## 使用方法 +将原有的conv3d模块替换为Conv3DSequenceParallel并指定相关参数,以实现并行加速。
+Conv3DSequenceParallel模块接口如下:
+ +`Conv3DSequenceParallel(pg, in_channels, out_channels, kernel_size, stride, dilation, bias, param_async, dtype, sp_size)` +- `pg`:必选输入,数据类型为list(int),表示通信进程组。 +- `in_channels`:必选输入,数据类型为int,表示输入通道数。 +- `out_channels`:必选输入,数据类型为int,表示输出通道数。 +- `kernel_size`:可选属性,数据类型为tuple(int,int,int),默认值:(1, 1, 1),表示卷积核大小。 +- `stride`:可选属性,数据类型为tuple(int,int,int),默认值:(1, 1, 1),表示各个维度卷积步长大小。 +- `dilation`:可选属性,数据类型为float,默认值:1.0,表示扩张率。 +- `bias`:可选属性,数据类型为bool,默认值:True。表示是否开启偏置。 +- `param_async`:可选属性,数据类型为bool,默认值:False。表示是否开启参数异步通信。 +- `dtype`:可选属性,表示数据类型,默认值:torch.bfloat16。表示数据类型。 +- `sp_size`:可选属性,数据类型为int,默认值:1。表示序列并行大小。 + +## 使用影响 +将逐卷积区域的卷积操作分发到进程组中进行并行化执行,提高三维卷积效率。
+ +## 注意事项 +Conv3DSequenceParallel模块并不支持padding模式,因此使用了padding的conv3d模块不能使用Conv3DSequenceParallel模块替换。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/data-parallel.md b/model/train/yoco_moe/docs/features/data-parallel.md new file mode 100644 index 000000000..7e34b53bd --- /dev/null +++ b/model/train/yoco_moe/docs/features/data-parallel.md @@ -0,0 +1,31 @@ +# 数据并行 + +## 问题分析 + +对于数据集过大的模型训练场景,其训练时间过长,要将数据集进行切分,让一个计算设备只处理一部分数据。 + +## 解决方案 + +数据并行将数据集切分为多个batch,并且均分给不同计算设备。每个计算设备只负责处理自己的batch。 +数据并行满足: +1.每个设备上模型的组网和参数相同。 +2.每个设备处理不同batch的数据。 + +### 解决思路 + +1.每个计算设备上都存储一份完整的模型复制。 +2.数据集被切分为多个batch,并且平均分给不同的计算设备,各计算设备处理不同的数据。 +3.前向计算完成得到梯度之后,需要通过all-reduce操作将各计算设备得到的梯度取平均后再发给各计算设备,保证各计算设备的参数保持一致。 + +## 使用场景 + +训练数据集过大,训练时间过长,且可用于训练的计算设备比较充足,可以存储多份完整模型,可以开启数据并行,来加快训练速度,减轻单设备的计算压力。 + +## 使用方法 + +框架中数据并行通过总设备数(world_size)、模型并行数(tensor_model_parallel_size)、流水线并行数(pipeline_model_parallel_size)、长序列并行数(context_parallel_size)计算得到。 +数据并行数(data_parallel_size) = world_size // (tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size) + +## 使用效果 + +利用多个设备,增加了总的显存占用量,但是加快了训练速度,减轻了单设备的计算压力。 diff --git a/model/train/yoco_moe/docs/features/deepspeed_moe/deepspeed-moe-efficient-moe.md b/model/train/yoco_moe/docs/features/deepspeed_moe/deepspeed-moe-efficient-moe.md new file mode 100644 index 000000000..558353721 --- /dev/null +++ b/model/train/yoco_moe/docs/features/deepspeed_moe/deepspeed-moe-efficient-moe.md @@ -0,0 +1,52 @@ +# MoE token dropless性能优化 + +### 问题分析 + +现有MoE无token丢弃方案在训练过程中实时all reduce通信全局最大专家容量作为所有rank的容量,引入频繁的all reduce开销。 + +### 优化方案 + +采用滑窗预测,无需每一个step都进行all reduce通信,all reduce通信频次降低为1/window_size,对于超过预测值场景,使用实际最大专家容量值替换预测capacity。 + +#### 优化思路: + +1. MoE模型训练过程中capacity具有一定的连续性,维护一个滑动窗口来保存近几次统计的capacity来预估下一个窗口的capacity。 +2. 在Gate中不再每一个step都直接进行all reduce统计全局最大capacity,而是各个进程先判断当前的capacity能否满足不丢弃token,通过reduce统计全局的判断信息,若都能满足则无需进行all reduce通信,否则进行all reduce通信取得实际max capacity. +

+ +### 使用场景 + +请在MoE模型中的无token丢弃训练场景下使用此优化特性,以提升训练速度。当训练脚本`--num-experts`等于`--moe-train-capacity-factor`即`专家数量`等于`专家容量`时,为无token丢弃场景。 + +### 使用方法 + +设置`--moe-no-drop`: 表示开启MoE无token丢弃训练模式,Top1 Gate &Top2 Gate均已支持, 请搭配aux loss/sinkhorn负载均衡方式使用,避免无token丢弃场景负载均衡情况劣化严重 + +设置`--moe-dynamic-padding`: 表示开启MoE无token丢弃训练优化,需要搭配`--moe-no-drop`同时开启, +附加功能 + +设置`--moe-use-sinkhorn`: 表示开启sinkhorn负载均衡功能 + + +### 使用效果 + +在保持精度的同时提升训练速度。 + +训练模型:Mixtral(4层) + +精度对比图如下: +

+ +top2 多种并行方式 提速效果: +

+ +top1 多种并行方式 提速效果: +

+ +同时开启此优化减少显存占用3%: +

+ +## 注意事项: + +暂不兼容 流水线并行特性,即需满足--pipeline_model_parallel_size <= 1。 + diff --git a/model/train/yoco_moe/docs/features/deepspeed_moe/deepspeed-moe-token-rearrange.md b/model/train/yoco_moe/docs/features/deepspeed_moe/deepspeed-moe-token-rearrange.md new file mode 100644 index 000000000..f2d12bce8 --- /dev/null +++ b/model/train/yoco_moe/docs/features/deepspeed_moe/deepspeed-moe-token-rearrange.md @@ -0,0 +1,28 @@ +# Token 重排性能优化 + +## 问题分析 + +DeepSpeed MoE的token重排采用了两个BatchMatmul实现,时间复杂度为o(s^2),而token重排进行计算时由于矩阵的稀疏性导致一些不必要的计算,存在优化空间。 + +## 解决方案 + +重排操作可以通过等价的pytorch API: index_select来实现,降低计算时间复杂度到o(s),从而提高训练性能。 + +### 解决思路: + +1. 重排过程:top1gating/top2gating 函数计算出每个专家选择的token的索引:expert_select_token_idx,shape为: [E*C],MoE前向过程中根据此索引通过index_select API实现token的重排; + +2. 反重排过程:top1gating/top2gating 函数同时需要计算每个token在各个专家输出的索引位置:token_rearrange_ec_idx,shape为:[S]。在MoE前向过程中,token经过专家输出后通过index_select API 从[E*C, M]的专家输出中恢复token的输出:[S, M],最后乘以token选择对应专家的权重,得到MoE layer的输出。 + +## 使用场景 + +进MoE层时实际序列长度8K以上。 + +## 使用方法 + +设置`--enable-token-rearrange-opt`,即可调用该算法。 + +## 使用效果 + +预期性能收益在2%~3%左右。 + diff --git a/model/train/yoco_moe/docs/features/deepspeed_moe/deepspeed-moe.md b/model/train/yoco_moe/docs/features/deepspeed_moe/deepspeed-moe.md new file mode 100644 index 000000000..60c7e4bd5 --- /dev/null +++ b/model/train/yoco_moe/docs/features/deepspeed_moe/deepspeed-moe.md @@ -0,0 +1,79 @@ +# Ascend DeepSpeed MoE 相关特性 + +## 整体方案: + +Mixture of Expert(MoE)是指混合专家模型功能。在大模型训练中使用该功能可以将常规的稠密大模型变成稀疏的MoE大模型,在计算量不显著增加的情况下大幅提升模型的参数量。 + +通过使用专家并行(Expert Parallelism,EP),把专家分配到多个计算设备上,减轻单个计算设备的显存压力,也就是说专家并行(Expert Parallelism,EP),对全量专家进行分组。 +如图所示,一个包含6个专家的MoE模型在EP=2时的专家分布情况。可以把专家并行理解成模型并行的一种形态(模型被切分成多份),但是输入的数据又是不同的(DP),因此token在经过Router之后,可能会选中别的卡上的专家,此时就需要将这些token发送过去,即EP进程组内需要通过All2All通信交换token。值得注意的是,该MoE模型在token选择专家时,如果超过容量会drop掉token。 + +![输入图片说明](moe.png) + +## 特性背景: + +支持moe模型及相关特性的兼容和适配,包含MoE基础模型、MoE适配序列并行(SP)、MoE适配长序列(CP)、MoE token重排性能优化。 + +1.Mindspeed新增MoE混合专家模型(Mixtral 8*7B),支持使用MoE模型进行训练。 + +2.MoE支持序列并行(sequence parallel),支持MoE与序列并行同时开启,减少MoE模块计算,提升MoE模块的训练性能。 + +3.MoE适配长序列(context parallel)特性,支持MoE和CP特性同时开启。 + +4.MoE适配分布式优化器特性,支持MoE和分布式优化器同时开启,降低内存,减少OOM风险。 + +5.MoE token重排性能优化,减少token选择专家的gate计算量,提升训练性能。 + +## 使用场景 + +在需要处理大规模数据集和复杂任务的情况下,使用基于 moe 结构的大模型,以及其他SP、CP等特性。此特性暂只适配`--use-legacy-models`。 + +### 使用建议: + +MoE+SP MoE开启TP需要同时开启SP。 + +MoE+cp 建议处理长序列时开启。 + +MoE+分布式优化器 建议默认开启。 + +Token重排优化 建议默认开启。 + + +## 使用方法 + +MoE特性基础功能: + +| 重要参数 | 参数说明 | +|-------------------------------------|--------------| +| --moe-model-type deepspeed_moe | 使用mixtral模型 | +| --num-experts [int] | 专家数 | +| --expert-model-parallel-size [int] | 专家并行 | +| --expert-interval [int] | 专家层数间隔 | +| --moe-train-capacity-factor [float] | 专家容量因子 | +| --noisy-gate-policy | gate噪声策略 | +| --no-use-rts | 不使用随机token选择 | + + +MoE支持序列并行: + +|重要参数| 参数说明 | 注意事项 | +| ---- | ---- | ---- | +|--sequence-parallel |开启SP | MoE场景开启SP,由于影响数据分发,节省内存的同时可能引起性能波动| + +MoE适配长序列: + +| 重要参数| 参数说明 | +| ---- | ---- | +|--context-parallel-algo megatron_cp_algo |配置CP算法| + + +MoE适配分布式优化器: + +|重要参数| 参数说明 | +| ---- | ---- | +|--use-distributed-optimizer |开启分布式优化器特性| + +MoE特性token重排优化: + +|重要参数| 参数说明 | +| ---- | ---- | +|--enable-token-rearrange-opt |开启token重排| diff --git a/model/train/yoco_moe/docs/features/deepspeed_moe/moe.png b/model/train/yoco_moe/docs/features/deepspeed_moe/moe.png new file mode 100644 index 0000000000000000000000000000000000000000..c5cf063886329023e6c36bbc0e5f660bc46127a3 GIT binary patch literal 49794 zcmeFZbyQXByEe>PEJ~0rk?xX^Zj_Wxr9mkHMG%ybw2%hrMi2!-QY8f}KxrhUr9m1A zsrR0`_de(B-+BM}{(Q$b27@8Xx#oQ0j_bbe>zSc?I%+3R(4D}-!a8|DUD*H&3p)=B z3w<6R7e0}sx!i(<#f)`9`J9nA>PH5_i1F8>+~=edeIC!kn-@WqDw-d2p67n&@$u-~Q0J*EkL@AH)z<@K z`xRCtb3fB7cFdOcEmv3N@4W0}?EivIK@fxW?;mev=1#U+QB5?v;=g~gxo6J($A|0_ zr%+)Aon2pr{(YwyEES$;6gl(1e>(B?;(Z@xKD-cwQU9Mm+!{Va|I3I}gK~OBwnVl5 zJsx&N{V%rv^A5-j8%5BRd6`e1U;fWWhZ!(p|L43i$+EeB63Ly6`;W!Yi{GIAkMWbQ zMPR8EILF z)?0{9%=x22^r>{8*_3SVlkZ5PwAoQiq~1Tung_YON4V4*_z%%w4M96F(znqlIeZj~ zEblDV<&8v`YUP1b21cD(L-7}8;dF8x8#4_LkH*_6-Bxx|&7d67L|lcqAWit9B|6Ri zHD#VD+V(aq{Qzup&BOkipD6PZ^@Z68s&rsPw?2w0qJ@edZt4{5sG(H~c7(I>VCZD9 zV`EGN@T)n9F(&8&T*?0Kn0o03VwYIwgMw+Om+-fjrYC74#w7+)qjB*X5z$uGJq46gbWNL-s;Sw zQGsdt0go>xo5(O2=rSu#&gnhN>%fj0Ab?Avl3{CB;}>rd>BTRS6Z}FJS%2|R7=HN^ z&7%mCVanR%2fAnha`-yUq=UYgiW7Bdasrs3F^s18`)ea9l>!c}Hf6L1Jd^jG7TaYm zMg14eWbjLkejW!rW}K&Qk~W*WkLAQxFoqTG=))cpgobaQU^W*zr|*m+uYk#Lvoy-& zX$UrSmmgD{0D8x1>$0&rm3y=H}DVR$v zVlMFTplk0iDg`*|ZRgR*qOHPk();mMoXliLE}%8wHoTv-Z7;*>zEGRy(fIiY`z;tg z%!?rk+?ydskx8BTJeao{nt+=bF2X9up`gs8W)>y{t070%8NR@Hn6XjjIjy^K5pW~7 zp_GVxkN}&*82g;NkBxZiE_jQV#w|%)ZjD=y(EiN9_;6!J0`}}6xCq@st)&-l7W9!H zwnGDdO$YYWDN--~*{2T~?Ah^fg~5m@^+jZll{0gUq9wZs!NkV?_r)uI)MFVgoWCmi z9uX%NzP${@`%~XUSM)5YrqS}z|e!%3K06*j4 z31b2NVA+Mjj?Zd^!Qo{~ZnynjPCw&*eLmDjUGXH0SS;O%V;VC;n%DYkmZfC!zRjba zGhe&j_%Uv5aBC z;E%QLbNY)ydKcBuQm{|$D8}%|L5SIWijl4(apAmuK7pK7okt`5$;WnB6BO89Dt)h# zW=*d0Sl)d7mflE*5~0i!Wh(+tYw`v;fw)LP$~^JxGJJw6WpE#k(xODn6#_{<4X?}R zl?*?s91UdA`akj-cszOa&L|Ey3v0zFV3|j%$J!|Dx`Xi zH&b-+SX#CcvU*7jlzh0q?=p#2vPe`udU=ohn(9)-;VYLb{z?&v*Cgp(W~ur=HGA2T zts|?ch&=LMw_wpJQyh-X8n%U*$A=!>;({b){uvY@I4T7cl%{wDl1ebdQ&B|^Fg@gM z7vmY<7aNBQ*WF^=C|-zi(xkR+-)`xfYNtq}yCQnrKiRZhO+l!4Oi%V)TljML!j-$+ zqYXqjGn;zf7t#FTWKV0KRi6J`N1op!Z7}eC;Y0VLvriH4GXbS|tFVI&j_{4oxfh!< zI#X>$MKh))gv9!Sm{j70&1zRCmaI^2KgiTF3;o)0&IGu+%w+BSui|8;5#fJ(&Ij4KzEc z9l9f|2fv{4C!uOZ+m2SPt-Hls;yxHis*F;aNbbrrrCcSw>_T+PFj0H8t65pepGcG`Qb*~< zGBB*Lp~&%RQB*m-Nr&$f zt7C5!wpM-`$szbPLLjeFAY)<5M=;2u_{Ufj6OrlJrOxM%j9uc%sHXw;8?mixgUSunPVhulB!G?Y4&YB|S;@V+Dzx;l~lw4tmu z!@n{iPVOd1cdte+WFj3 z&Z7#{F7=@t3Es~|%Wn*5mNz+tR^6J|J`-8MSHiw=`u6+#(YClapF%soziFxWJ zlE2bD;nofhk-4DA16Sl#Iq?_4690T83%Sxgn+Em;vgrqa884J5=dlTtA$xDAGox_e zA}YlJN9Dm43E!WUmHwXZ#y|6gY)ae=&P%MH zO~OtJX8MIK7()q3#~5qqEJW*kL`TN9Z9}@~-HhM+V!3 zV>{tJg8#s8~hth&CZ$ay9T+X8JwH?+>Q|`iau`@@t}5 zm`kq@i>JFcooZ=q{h+VLfMZ2Pa;|Gg8F`+%l1*#k8I{Ak_+MQ|Yw|v~TOwnRNrAZQ zpxSm0{e-jYwaX=?h%oL11+x4)bGk*PyOA&pe5qgo%bJfr37=;LNZ0Rv5061$R1oGS z*lH@FTp~ws5JgPw(+z!S^tbnow>O^(&PW5t%WL6zz45So!^t0=BK*2rQG?(Trtcz8y4RRbvfLeUvr5-UDUVs&Xr}}LOS&}i z>nFG<-=renIIm^1yEa)hahEXSXyVPV>xVj@O=d40m|SaZ9*exdpeK0?U|XkRjAZEiaPHH#t zB;@vI5gdv2s1~l^hi|@)2s?Fwc?5#O2Zq2mnaCC7fb%)YC%xc)web1!V_PY5e5l`h zVsO&iIS_*(f{#V8zPrK|t?df zmJh>Y^v3cUamO=zbv0NulJ3dPnJ-oAO{e#0(6}6$iOLDQ#=G?AT6OrLsy1>Px9wZ=C+pb@mXwnfO~zeScPXzgl|oi214jZ4A^pPf7{uDsBb z*PVUtUQWQdUu2x*>T2L~3>iVI`wP#rB2W&H8(1#AUF<6i2{`<9dA2QOu*%)G(Y$lk zUNb0q)j^ZrV$Z%&*tUsiYi0B_tn{bX16D460n4uk+#q9;;}jh9{wDd5RE_M zj<7pyw1>u@({9u|jhsy28`~N2AhGgY;P^3E_Q5(byYdS{U8t?N+#7y#T_m$OMy+;Z zbF;hfl3?9thfKYzVN6EgRHQ^i?URlBTD7;A*EU)Or&K=-saet2oTVYuEzD!Cj@SF|Gp`=EFhs!r&_xgW?~Hn(oLSPo zi$i#;B*3dYlDo65$hhp>UzFlLcl}^@J;sY7rH-T2Yt<@(L5BL)V(+^%l_OUyvJZ~- zSKr-SfIuy<*)=gZKI`aN91`d!vk@$1gAv-<~|ol3K3FP{*E7#h?ZFxQbmD|$6% zJs@B9BG>8z;n*Ms7}M?iGA-6%u;6iTQKOYi`Mv45m|%i9GxSZot+HlDk+HqS;mQ(W z_gFKMt7o_&jNc=_aAUa4p`Ffib!@cijw->D@zN?>8++gu?J?;5jQgbd z$G%sWGG}85`itv-IcsIgnnre$n#HP371wQOOr6JCJ%4`<%dOxuWu8FrNuSpnq9L-M zSXxhhuH(|01R9DiTe8IL^%oP77fK<-7LQpJ-TU%*STzQUEepeCw%BYicG}amD>Z~K z&0fF9WAY;{?e`f-_Zs)7X-|_-YmQ=w_V%>j=&*WHkI?&V%h&$j==EDl;?FAbD5w@Zv))0*iV@*K5i z*#5?&@gV?{@6u4WG+6E7IJGD_(A1V$E8E?bqe5u=owVawnw&ynDZ-*k^UbzJHm;1; zx{v$sJkd@QA1t@mbWJFGl?Z$NnkZcCxkE>~la|T5x#gGZSBgi#%x9aT^D4NQTX9YFo4H|}Tj&ipBq990KZAPphw03uN4nFj!Q}nT@w0i67 z*EIHgeKU8MGA|$^_1HgP_3e&c?d|JpWiQ*x;+t=s7QNcYVRbh53ULVHRhY+pb}BYA zUlQ_7D>$1XPQ@8ZIrOK2H;=7fSg?eU*;Q;Q^JBb0VJ5(p9VG!@sX3Q$%Zz1hxpMq_ zl*~#{(d)MORK+9#OXj!{_w~JAzp6gO^jF;MU#1YWERvrUh&A<1xry3IcEBDTIr!Qq(89)tJPT2cIZ~ zkb6B?*&lm*Co*(p`W7)K`PyBW98Is2NUY-V(V;s;h$#sBIwPM}<)IE;yl3;-wx)9A zCgDR8rWNcP3s>|Irx7bZav6XO;fnv&I$-ZpV28|fS=$Rfksnd?Qaw-TOivQn0go?w z$AN#6I5-#^k4~53=+{!o3a!n{VnBGe7~EZPr`<6=Nnw}XdrYHoZu#^^ImHbq5p{DyfX(=gtd@pyl= zfoa188Ufyb)_Rxly8>8)VdtNyMKgw`VIRLWe6ADIoSdTG=mI-or53bJL7<+s)}=(k z;X3QZ1n9F*QaxI3o7QgaoibV)`5S0$mRLmy%}TeL^;Sq*KWT81cfs2Ku0=yJb_QK! zY^v<=?RHoWXS!@2!)MtS;yCd)Weigva=voakAEn>WhP7RhO-~908 z=^q#0-DIDTgt*-tTYf$S-|t|(x#~xGYpLs~bcl0rN$Sqn9g>;Wq-IFbTOLu250JLO zRizUD4+^^z*zr7uxLEv;+_HPvh=7z zWNqyOiPBNBy5gTebN`5)e152c%dR>(I}cLvIfn#0R@5lC%px_}uQ>H=^Ga9Rvv%s= z9RdcR=T@M4&a}>3xVYiSw_fn6zSg`3(C1F^I0_3FCK`jWtG2(ntar$)&M#!2ETKMQ zW>+WTlFJ<)6SRb^mYNo}6nTtTMxk-xLqIRnCKTbdh%H=yPra1%Q(Yydes4C7T2tsW zK|aFqCrZ2=Vq04*EXi@7YT7>Ga#6!-F5mDuaiW0bixfxCkiR+_yTL(qznJ4b3NOID zjN*1%Sm(=U87TJ}Z2 z8JJ#K2V}T@M+CjBpi+Q4Ia=o<@+I+1cuB*N8TO5NG{IAd)v2k3m5XXNOZ_jKio#{Q zR(d<7!WlpRSb{k8Aye)Sf1-^zxgL0wk(`IwD>cQYZ{_8mE?N8Am!K1G{#kvI)AQie z%*Bh2t~?&QP`MUl<9_zj63{>URc;lElkP(3M@sa7BKV zOs33Z#G3>3`d)0n;@)`R@u;ot)l5texTSVy$eo|8Zfg_gjPmu~B`wJ{93SoH>*qeS zQd~Gm@D!3+D*A+klco(skE-usd{tu=&&YDPe@Y5j)_A^y+&NK3BdUu|0Y7nY_(<DxaZrHD9ky7sT^-zT8x0$9C#kfX$|M-NZ^-r`of zg8BB^B?Rbz`4xx!gIw;P+#_eq-+Uat(XbqQE0rVo2eR;;5gc30rB}OIM||cod?n+W z!e`slZ1*;1oNl_&5X>W>sl>bM^b!UsKHgj(HWO@2|0__J2S=Wk$npKT_*tgv0Y(&7 z@P)hEyqPK*+^pC43Sr7_u|s?6so{6aMA&YBYXOMZvne3VWaON)F6Urw{qSS#t<3P1 zwpDWYFsbob;cY;{v_dwDHT)xL=x>1L`UKcLxe3ug5|K`?n^=CG@6Kh3y8-z<3zqyB zkJ*-3Y*3v5OUYN^1th24lWfVY<-1t4)_7L?agaYm_3fdur#l#{z{6WFnI;p|ye_>o zn~~%8RYaSDC$^)hE1cCP0OwD!)zPUu$T4ykvU@YiS5jwCW}M0vJOXPp+Yuw?tv0bX z`Jt)p4U}AK9idnFTc>Q#DhW{#>;eT}FwUwPXiGpZ?TMdc<;ydz5Qj9KshVxZ>U^po z`cj$aNs+gJH#)5lncl0!*p=r>jCp@B=Q1$K0@7v${D$&QRVZc)Swb(rc}oZM**B&& zo=&q7ZIgevt>)9-yOSZW14?+t%mdhhxnYaB2?I8`f`MP6yN5^8-?0aXe&a3Ya?bgX zBA6J`-t*)X0*odLSX#Q4mTnw|6jB3GZFf3Om-1gjx>#%x3gDm*8B+t)q-7A5|2?L1 zq5*(1nNn;Xe>J}KvGGV4k&-L-^lm7|4{6e=y~`=&A!w1>h2*4L-B}p9zq2~f!Bt|? zReX%>m!QLzWM)wMz0J+dDT$3VE@Yjq97A^Ke&uBNfZ8bxla43_=r#MN8G=u{R-v*2 zkE*?h*-fk}98N;r99x=TeW8&FzII5)<(rb(cx%8i1mI#`=f}5ynPqQd#3_-k3dhcQ z+yGV-6PVCX{nB~oGK}=C#rrsf)ReW>Kcfl^pXYe3j=gB_VI+`5e2Ofh=%X!~LFV>- zev%KmycPOkvG_RBCJvguKtMhcTk zR|eOb1)AOOC(W9e}5DV**G=Fh|DF`9SPCU7c2Bd(^&hJ`R4NQuMC#m%Mrz)aor#R^Emz( zmPxG(byt=dPiFKBIm)~+^;S09jTKS|6NqlWnoX^Ng23YDXO7S`**$^ih|d<-up*a` z6^W!jjN({ZDrv~^T<9rF+a1D&Ps6ee<9X5AB2i>9G{;H&rg!Hu?+`~T9969|T0tEx zeB~p)R+@Ob$(TCh0{~DBh^?D{oG%f;)E{hU@>|tabIwb84sF z2lgfx94F={m1Y93{m8CwC?TmfLesZbMe!nKPtDJd8uqyu!J*4rFZ2^H?1Bq{~DlBOxx7`w+*KDt`~ z`XmIY@EB6u} z|HYX|SwGC_^?AnQ@|0UU^ucK=ikKbM8OyFaE7f?soII%tf%Pl_GnU3spgaKc6Nt@b zk|x~+5sIRGm9 zP>28(nQby&b?>i*Onc+WS!&-(9S`)f#S$>_iz^?Lh2tN@P#q>8MiSd z-ftu0<{^Sw2daB7Za!?%-(=B~p1vc}hww$=ZZMG>G8Pe-^sA6PGM1c=)H`)9FmNT>fQ7I8dYeJmiW`JG8tmdzjMdqgK1oHz29_^ z)3@#fMcD0${`;!8zws|!=}Z-C)zMF62~J0VK}1*V32qIBcgfY{VLfdT$sc^s8&%gz ztlp-VNQTe>rD-pF_Kn9}2fg!UeezO7za}q}xTO#bz&%GdOWsUOIgvZ=)TI6FZBBAM zq>vC>@7?CsF!XTu<5MY^Qa8Pfi!LAZ$3dYRUS9oL={*MMBspAA0AZa{E&^eJagy znM52rwmMK|#E|HEx*p1JouoNeb8|-Hx)iXlm~uNJxD7OCUOc|N{)Ss4+;M82G0Z^C zy}X}q*u(;J{ybCcz z#Bob9gr7b`ZPT)kwoWc4t*?1W4=pwFDhph}2GQS3W12 zu$B7l-Gp4$V&)L-@!1a_U{`LlRhM5f#Qa6F>tK}b5Dze^4qXL?Tu3ODV4o%%1CQHp zvX)*91hYGeW-*Up2##Wq`L)P!RBqpT1xVx}vB%XH8acsV(kAEypgAwp_5DRI_n9c7 zN*Qg%XMM+qJL4Yt+oapetriC{30f(IzxjUv6J{-+`PnCk;G2$;<lXNds$)sO`$!Dh4c3tAr zC6D>TbNR+O>}GFYRqtw1@3gShwWo?zB&Q9jGkd}Kt3~Wi9_l^B3IFGo3+aT77h+K)9AdQiUg0%W-lzE%*O?O$g?(aqB z{(F(7ryUsm*3Px1h&0PTI9w8XqY`@Mfm0oGyHY@Hb<3d@kJ~4zOA{eX0i~-FnvWH! z;kFe)pNi_ax_~?6Hz{olkda;d-65ct_3o-4=fCiOTdA|P-+%pF@(|H)!}@sU9B&6$ z4?R=B?q%123Q+~ z_s|D7kD)SQ)~mj0|6{av;Cu&>>0?}GW4*lldo9HtUh3L{c1;ryhf=Kfvi=rxUMxNp z48*YHCVSZT0HD^0y@6O3%e~-r7fB@1`TsZqi-h4btyA>!euJk@gks0M_tMVrwUg-o z9Kz^}FXGVlX!K(z`ve`B%RpE8m6HULzIA**XllGxqlPLRUDV?RaFFtYku2};V5a2y z4DdrtT%qAY*W;C{#`N8m-SHC?x225lyvl?aZu@I%q5C92Pyuo`&zQ`6SsRSEK7~ zxvX6*5^R?eX#^~iDIQD*t4)>*hOUxRRYVmqOr1UV6~gfkO8ZmfEuaT;>TfY=Y{q=6 zyb%rkt?6F0;4hq%?1kFZ`qs}ks5w!5V3sfSZYltB*pE0#2$4uJWTb7d%vRWY&1RS~ zjKMoutcG0+`X5)mqCxoeHwQ&WhENv|i5Z-S+5T_tVfV5VdU?aoN^dx!HbmOXPxEPM zihjCJpLGKGFa~H_ISxBgvx8C!HbFCD?@&w8D)K5cXqGv>y9NC~^l){s^vU%s(-=%S zA~QI7W@p17zNT5c*deS(lK$X3Yp}(t^cSqB0k!YdZijCg-d}k6@+S3DgF}EDYfuzY zJBAKlXmp3D5yfWzAeCaxyg|sWg*1Xz_`JspPeG*Z^79e9NtH~2o>o!CCKno0apLGIp$1DRO9u^(ZtO=c|)dcXS|@hNuQF=BOz!a&HwvGqW^WH zCdyxDvDTHoRk^bvO%sJG4JK@;CPfwB*%|VanUva~fCRnHQ!J_Q3pznUtHZ{fR;H~88mXRoZj;&?T|li^k0~uiJ7@#Eo|be)Ms*`>iW5-9{clY^~kUzPs({j zHv$RK{q~O64w0nZCNwQrzNdu#U!WxpI?3{OO8=@eB&fFt&GsNsh>$=M(FmAk7PTv2 zrU%DC&+>-2(q({nsyTX~UqPB$3&A@d6LO67>7hOjewMSuG-wT!c z4eY-%wD|P}WgBQmT+*CIrRZ=qNzLb$M#JIu>1i*XrMR=P zLC)}hFG8QcQ3FCUe9D|hgF)@fi8G|Tkm&RwfYl)!>dnH>Jy6aSD?G;)dr1j;*jKbd zBlIWGAtX!>wI1$3JHqAbn7L~;POfTXPZN_gxxnvfrv2A6+0np7ig8jRly{GO-)Z0N z9|S&0uececF)eg?tf^c&7{)t9F^4-N*nY?U`r-l1C@ydY=?ihUoYZWZh_ba$XWy`% z7;+5$6dtUrfHUo9o;)4(R7-Sc$T2G=O?H3$rMU#sj2794vlj@a1ku-j!<3Nrx=O4S zEW9TlASB7-?ZR(ewO%(OUK$ZpZJSNhZ3a>3U~OzX214_O%~wui*KQqrc>Y77`O7Xh zJ2OAeU6dL>8b3zOWs&se2M?4qFw$6ilJ%EZubyJ4)>}yWX()G3`NJQcqKpHcohUSu z8JZ*N^{|LVBpAqtiG)j5=Kw$IRXAv;d-hyF6iI;BqXw)4N2|^reRCBjdu0C!^(mUF{v~$Y@kJPLVZ{{@L4->(FpS*WRkj zK7odiK$Bgraft7?%c9pH&&+CcG_6yx7q~Ia8mIs+Jv}!!70Q*#{cUy@uUf<$@GKF% zWS5Y~Ou{8P|9+<9T{R1+l#-^)HU{M<8cX82-`a6m+oE146+dcg`&ix$@ z4LgB8*|Wgi4@_>*46%h~x4Yf+VFnx-nRvT17CyW(o%HS=-s$jcL5nJ(>Ti*!gBy;- zq!k~(>mgH9#?{_1xv|tAyOaG`^Cls|G-#JZ9q$(@UP4>@-LEgCm#PQq0o^;o!yI_j!W@@29q{yi)+toD+!f? zTNkBfo^rPnFcCU3Gk@O#3c9J`_;B@Uhb1mu!Q-|TzDtEO{+UqcOBz#~4`+f-!nTVg zn)JCg&`qISs(3p0@UV3iPPY8Yo2`9zyhp;ks`}D0?G5h=lJJ;ZkTazJ7 zoIlOWboUWGT+HVf?;X)t`UV-Kx6kvqx=%?z^|V539?x$TRT^ftGb{S5j?9^24~2~u zwlb(FUx|H@$uGC@S^oA~IN-p0(5J)^2$lJ7uM5{Z7VnYBKg!s6RGhH2Qrqa>tb(gG zTC1k%cjRL?+j4v;D4h({fXzqziQyZ$dq5~&LugD?q{b4B8~zgOM-wGrB6dAY@< zclWwI^4pwUg|)NfN4s$q^uXiHH)RIxkg3B#dPjSB_t6&s@-*gJvvQB9ayIsQ z4Rt>Oe~BQcsq9zxLNOLd7n6LDzc7!xojiH;2dEG}m!(soLG2J6m*H8{g{lM7~SJ&`S%^(%FLhT?T@ew42Av5b31V{p>eyl7WRV22o5Ts##lN z)J|6Q+K|aL)f5{g&e5x5vbSBfHqCEvkkrwMg)w=ncx+OD%n>p*8JWnuhGL>yY@XH8 z{&@aiJmGCNHFN>ziN^=P$Y@KT8jdq>6;9V3Lt}ChSOJMKnd{^AZ_6_CEii~y$fAL{ zz6L*ApxR#LHEnuvGimXsMJ`lvG_d{Z^K=QB{}iqMPn z!vP$4{vf*>Sn_FBqmNIum|3DF8QedTHHp`1Hu}NGCUxFr!fZJf3yFn&h|U01gYG~k z(qczL-%@JFdtjtmapp@3LEV;t-FM>6^|7xy*xxGgKwD^$D5ai_9GfG-u_oJ{#ItFm z&DKcBCr!9?dYUR!Ye~0N=CH32JA`Cz#4aU`*|GuY6rQHxS}qEe^O$YR2`>PNq;NC6 z?Ff%0#Kew(3N(2O0u3lw#P)eK`rL4Tqr0PT6xr@gX=zIpLB}i;_iv& zp1l6iggpC)#5l!OtkUXy#wX^-nFi~IeC66sA#BwXqS|9RITz}#S3GOY3aT9*8^i%= z;)>#}m5=iG*VUGX1#H=?Wq1#VperKmwPG=~37I6aa|Y>;y8$#V2zRTp4i#ouBRpw$}gRbz6Gb zSk|nH>)A|?{puP)Th{8hc(+G0l0pF*%$~Oy7N00<4gx~|{HtrGiv4j%8?6@3+r~ps zC0*W{?{=4OEqcqUHS;rmwc%Lam8l^n$PhqA25X8ru$|TG6&9(#b)H&Eiiuva#5G0* z22D?uoXQ{G8R%5No@&Mxb5X_fIX>X($|EGb_>OUfjkWkEWuCj_?k7U@M${akAl7Ra zwNCxxT|Bqr!^XuvEAQ#*cEHl;vsve^&`Epdcdr_QkZnaQ;iKPy4*E8X{2RyIH7O{k zeW=Y(t2mWit@?ltMzl-MW@*mxij3X#^&{+R<2SEFW8-2aS9 zN`bc95Opqhdm`;-8U~7xhsOtV3aTL^Kt6hi91pXUUcu=qV{`kppdak};bh~}3%8wZ zPh|o3_|s|PU~&7{`XbKF75TaJd7rsdm!L9R5JDV$LJTa>oaovkpgIcN_Uiu(VT^@j|(>h(>(4)|yW1 z^ma)9)sVaU{hP!%)`t*mDqiJIVHzuZN=mHL61aH@UxvFul%1=4jje(dhr^=QD~gfv(^?^o@k&;W3@eIrE)MiP+EZK|YkZzn zV;lF{7dqd)h6nG&gAL)_hvTA4tn^oUbe)TLl=mXcwoHPO$ zDMiqliz`OiEo|*z$vSuLB-t}oy%Y-;q>9gt236B@zMmC6Zok>0vA-eStIqZ!tG9GC zmY%WQL$?71&b?f(-UpE8lYWVDi7NP4hys?xD5k}>L8~4(w8f>hg~>YEyL{2#tvvOO z9(~EJ`S1|6F)t9eglYSHjN77oTZ=sgQ3lgbNSVav^qxyzc#*@ctroI>A9g&>txdR@ z_O2&LlJ4hFpi=M`el6wR(@o*Nz{oIV+b6B^xf~8S!8uY}epE{Q)e%WWPM)(_Pj`13 zf>%aSDQ%A~@ciyaB=B7l|J63RaU?)NFovO=N0U)C~}gQBFEW?#pYTeG<1sEM^fhTl-&=Qn*0#bU1Tbf ztMt<9t@Lg8QLSToiq!@Z&js|sW|n@iSp}z1fb804=aG{dzFZ#n$;(|8*EbgX8V`4{ z9W$s7{RfIq)~%t}S_Oytn+^oX5tdz`Y66|hl<{T9+mfAO zpT(?3RLBjq{pJE<6-BKWK;hAW2UAQBg))E_Z|yhn@A}P8>Hp17edBU)XiE-`(~ zuNdx z4MQRwSSz(8Ce=MWd_`)gU;xFFoq1t4LTmRJkZiVU+mQo1XI`Ddh0ksDr$D*M0w`zXO zp_nG#2uLs>ON|~Sdi_LO>dfZD2gCj;ag+18(M<)PNJVNm z-WsOp;@V`fRcnx^0SdeD*|8qEW_*5g+*db9(r=p?W%rkm)NMVsZak1F|JlQ8VaU(I0ta6{217M~FLL6|C@ah^JlmoJ2+^}P z6~1uvO=t}9>PQ|}$8 z^I@;5sXl66uUJZNpXsXLa$BkHKIhr@qVcOP_Wq8`l!G4q z6Yi55>=fV1{G_14wpQ7o5~%ib5oP#{-EMlIl+=6UGgqX171R_+b^W2|+8Yr~*b7s% z2ak+Ofu;=w;8ksQ@`F`SeR_2po23+IhQfgmlRDZMwK`o%mb0bu8|vAnA~xICG(V;A zh(Lg9i|~t68fy=nCd)P|$R};@#LM&}-(BPz7rhr%qm~U!ud1~lFqE~9_r>;+Ly?S= zxqmZ?VnP+R&BR897u)AC<{(@jxBHCZByUM4ty}S7r(V$#XW%#5P^b*;*;{%og&kZ zm-#ij$(5w4-jI-3{HleWF>Gls-05N`E34@eos1Ss@BeM zbc>`Ddk-Zp)g46-_MjG`x3ZEUD>@HdYvW-!M0xmM(4rnfk`@g(6_;X%P|L^wAiFSVTH$>2kF77V-8S_p78l;MF z6rjcIdHHOCR8`VzJ$vn5xjUX~T^>yEo(Ph2y}=S=kBXRhmmZ}y7ZwTfE=2&Rq=^YQ z+H*82wiudkP;04<<>>{>`H2s!_J?46+0s_gDevOxphoYG$qYR5QMu^!5L3=x>dDjf z=FD1BgEHR1$!p2(GQAiXEgA<5=z(f5*2D;*B<)q31E0f$t!f&RDT zOuQkpX3g8f?P21+FV2WI%_v&^A1e_^P7!q?kCK|DRf>r^CDMdLSo9bp%3SKkf+TLJ z|L=e7R>PJUCu&Q39_-Ok1CWbfHy2heC>T4qRUrICcdH;CE(z}K70-9fh3qiPI*JH)u>q*w=z#>WIu)f{@bAqyys25ZFjRxVZFtZ&#?(NCQczB*>CaJ0KyyXY}D zYa$$uoc;y@-?gnn8Y^#>{eaaEPd;LY!LeBpI5yk1{%tS!MECB|g|6@0OV#u)vNuO*6j}eLdz% zEEXxP%Dl^YNS54SAUQAm9|Lb=^b0kMzxxLB{O#BN#wh7XXWKM&2#)j_kjJFFSgoS~BhPPyrnvV^Li~{Nji=ma z*aa%WsVT8&tY{+*ADn1hn{3RKd-37?9ro-q!Q0Ejr>E}AtW+tPGc6?lksnH#KF;aA zkm|Gu#Q}D6uH`?#L=*ugYF1tR1eo;3>CcTPC6rZkL&+7JqZ-4gGR0(jbG8~$Ha|?V z&0b&C>X5>l@5bJprs%wK{sViq3~Dptje#iH#OtphrhO_z~U$LlNq| zKbqb+5pSsTPAlcr130VMXo)T_>iLsZggyVsD#gq9zbWx(%>7PCQPRBckB^T?st#|1 zB$9l7jSt>`;f?{`)Xt)11)4nWrd6XMzoRtW-!6CkT^op5uY2+$iezZJo{6_X&PuBh z)E&#OP26i7a#X`{Zcl~wHcH`eI={du;el#={SISSwa8j@pt96J0XCEbZo6*0q9dws zv^My6>Jk13!fui2Y1alI0sGht{x%3tIuV)MKZ3NDGXXV2?`fiPJU}xptrkv##*-#1 zFkKxY-~F1!`hH>1HrV_Ijzv03rYpy@nRsHoIk?;&Xbr7XidOoA^_CaZo$h1qYE3|o z=%H2^w{2)Fn;LF)!4s#`;Pp@b@%nJ#3cJtb>9_h#Rk?cLc-)nn?(j|!c>NJE13J^6 z8l>m>$-JiI2&=QEq&9jYojZZ>8n8A4mNfP|I}O+yE5k30qsndV!P6RHtl>R*nx}2U zAO}%|cK*%^ihpMX=?5wLxvHlan&G@tIHDs}A$Da&k#~g!@waZZuqhT8=*K2xkpKg| z)^|y&mX21x##@-V;$fEC`TJj)j`3T{sVW@j+8$BC;pwMb$l>X9NMudBSH8V+S`hIl z>HocN(6{`PT{CQE>-Segq=`=54_JGLM{s8k-twb(2i=;8VvxoP+&70}pb$?h<;w6a z>+qh}E+AS72Il5VuCpX#}8Y%#zh{L_k#!)Zq8*dX5E{})y+-Sg=9(`R{9QNig z@EH5oHjezUY~dRbWc4dBh+H*JSN>EpgBSgn&m4mzpX2pEFCetRcQex9tA)6ID;LIx4K%%5D?-_)&Vr5uy27zq(#Wcy4>ss2ovE z8&kXShCl^6(W^F|7^I(Wk+1`DunvJYs|!v^r2q5QXGp@ z4M#Iy$R#J`NC$7i(Sr#H^x{oyAOnh7Cx|;jjO@{0`M3rf#NmB0e2>c?-2%DdsPKC; z2&@O16Rm>5%}jW3)`2{5DN2Ue`-ia2{_UU3slWYKa+dZ(R#ucGy6We}AQ7(@(Ku%w z16F)Ubm~fk<_qJ(*~d5+m)N1y!WBc%27ut-Mi^ev+1-N4DP{%L5O&1}_*`?k@^e0%7BqgiEs* zB{bpv9ak@8XJrpDQ_qP^o(|;hk!a9&kcUY&e7A#9|~2QpEy@M2?*O2;ez0fC87uAq&X-1A#Kn!zaSAi;W}YX`t} z=bMz}h-{l6@0_}gLG&@UMLl&s9?O2YO8Q_K`qWkdIo!s|AVxXW1&vfygxrtwgJmt# z#0~iUB83>7;3_W-5_+trD4gPIPF}`NDZH4Or}9UV;bw#tI+;W*8L@SBE#nK%u^D90 zJe=0RTQUl2#srPyzj7JqpshfZ{{bFyqDKY*Ql0;VqjrX^ztG6kv;ddSpHuuw%sTN+ z7nxfnfkK4rN0mUuix7)Vw%x3AEvJ)tmm1`)eiKj`q3_6Zo)mRZ;ufpZbk{IEM9H>2|{-z(gq`{TJaj zB@#KxsyBOY&msO5^T)rI?=cG51i7<9&++RzuVelzlE)lil0@i4Whef^Bx_vdnAGU$ z)YT_nyvXucbp}eJ&s<&*`Rk2m!7O@(7th$rZYH-KwLeAPoF#Yu+#f1uTXFpM@bxBw zy8px6Uq*G+zU#j*9fHyz4We|1D6L4BgmgD3jfAucA|Tx%2!f)tpdcwF-HL>?G}1`3 z&x!Z_Tk9F?|LncSezjjYh6BFe$(+}`t}~9~bN*9Tsa4bsL+AwNIF?+BHa!@QpK+lX zd8HBd@f@_>?C`*6^jLHKlbj^Txt*MW;j20YLjFduh*(A3myM)Z+WwC5)epYF{GVFU zNYF&N9SOVEThsJP5QAhGnav|8J)kFXJ|FO$#Qa@L5&b-@OZ4@y34wAdX}%uwt>pgBoEUqnBTqWHtGg~wB0HIx-N)~1ui>B-`+LyZ z)9wHt!gD<+mx65445-g=o1YJ424J;DVcUO?xX=#;sKo(H-4D#7o%P3PZ5dIc6*&(x z9+LlU6ev7Yk7s6_#v;bKh^nVP1*DH^zInY$XEWQ;LJNp*c0&P=h8%p>Scuh`nq>jZ z_hz*(6r}kLOfMmP1{8dC_9qQvj_8r1+*cT)Zemp)Ses6W*$_e|hfjkO&rM7AHp%1? zJ{`XAL9W>ocprxV_I0c{1<_ASJ4T;Cm{zC!Ac^nUJItlGtw#J2IBH?n@}3nUp-5m&BDOhpJ} zd%(v0%4OF1UQ1X?rvNs*9HzLOdSw{k`G37RIsl5f%RfAp$9ZA7WJ~{DkNLMc+xL+Q z3Wi<1|A~gpj5hKomMBfF6bHSM4&o?cC4QPo@52d&#uB@@pZaS_X+n6mqoi=KHD#g) z95eOhT`aMDej=h53z6e62KacaBA%S?76;s7eq^Zu>J=@n^_c?ocyhVc>9u!v^3%J( z>ShPhGfcR!eT~HTc6WlFCb<9Je0TWMyRlSPK?c0Ma<^`wTKwAZmPx)~@O4m$2X%La0#`lVgQ~Aie4|G+=Bp>I zaI#hKWI&X(^fU9_b{085v30R59Ujx4pE)^{GW}*-Ux1k`!U?+ZlOK9Ab4>_~8i(_D z;8B?kyjFmV!R{5h^zwd}|1G+thy3l$5vsd*!sE{9mJ%-jd-6;I@z(N!<7vwB?Z9`) z>BytXGT_a)Tg{~?6LB4~0VwVYVa@Tk8GtYGjK-Wn%!)Ygeo++`FbJA$GW`+y}8#S`%SLT$*!AiKet zUK|?b6K@P@HcZrh%!m!Qg#r-2aTIW6SRy9TqD&SdO_nI0zOV5g0Qr16xBe-Z0T#!r zfTIJbgR$hrs8UyM?ExOd+AsXmouml-dg++GV0n7srVUQPA3QgH1f+efF-(q;&^erV zGbgs_n$7x!rR97BH5Y22#-qlYx> z#e}c8DTXW?5HDnF7wVT(m-iKZnz%sePUF7qCq2xz-_!s~N&N3%DOU2h)H#T^wFtQg z1)2NS%RHQMr+O8=B#&&t@14Sv7~p@0w9M9rN*D_?lHCcLAG}}2ir!TIO+wf+!1JT3 z`UBYZ5l#9{lX#c+x) z(bbos8*ZpAD-+SdbVu197ssv2E%o}^B!Sq8;N1#)t;!4^ZYTwpcsPt^mucQtr&>9Q zXOpUZ)PMbS%}MInrG~9vrB$&K0Z{pst!KE;$e+)&(L!NzdE6>L>Om|#Cbt*?YU-{u zFaxuF$7l$79RA0V6uqavoIiqI_wo~&1E*{4On0_k3d9LxEW@EUZ0y4| zvcEba^})I!Mo#wWC@u?1p#ggS7IPB!=nS>G{i7NmVO~odkix*GXo%J}<<&_ppwoW_ zk~%5o7z+(GsUBL8nPpft@&BM>Rl??DW7&XWHucG4v0SrjwIKa&T@lsZ;*Z+Ku&g

v!xPa7gZ>cx~LuvTSFK zq7p*!CZ6hJtbylm+n06g0j|HcvK03_=nRg=-rK`V59rPUn-h^yR{?b3qL9c?I@&0 z`xuX{vVU9Eyz+2(z^#78HA(AtqDV3tlcEaq*8Zz;wnBAuE}qXg(CM$MdUhtxPy(Q3w3vTuaq9gQ$lA#P|GkZHt^HYWNp!u<(- zg=m?4Y;+s>&=5aguBWo)3(cN_#mgRJWO|goZ4tHne8h2JjC{_K($@Wg^akkA^A%1G zi(JC_nxgD*+E$CocI5M}`aRycs60Vj0`QeNv+g-R4`wyow)?cIMHEig!3lG9dMyCL z*JG&c1Zh7A$=2fApy`Is2d4(k$g3H!f=YdE?|vya%^{<{T%2sMM1dqLY!HmPIClb5 z@lzY~kP%sA$)K%RJ zcfVA}EG1hD(+W6zVNx8|;S|2?6**skF9Euar>l2e!Zdsse$H9<=4>Q@0a~Z4a0isb zkDBA5UV5YQ7kr|DCCM_hbx??-p$~F;;dR0nXfDz5J=8SZRE2*py`zBt4l79T2c;|G zqX2N-ry59c!ynwPCC$Z^LfZnLhe=R5|2|SK$%8+2W{O$Zhc397R3^XP+%ebWpZ|Z< z{Dq4Lz!+E3?()dkp+yJ!=E4Or6e5?p8=9Xs6Xps3y4_qRbpQ=07x82n zZbJQ)`~S=9glATJqw@dVvz*C*XG)_5H{pa#?|*xpP=RmxqT;wGcEN-#Oe3IW9C`Y1 z{YYf_#K0H`8#32l8B2x@Sov*7CnYTx1Cm$GHcQpylbX~?jo46TW#_|UccR>C=I=br z+TN_XzePe;FOs&tUEH_7f%;nxuY`Z!*(jd*qxhcicTc@NPO|t=NeWV_;3UT|jtRDk zfWBsWywZ~=^CxH?K)>A__LF=_lv}q#5oS*6X2`ZkU9f{!6s|$Phu6SQ+Pq`J-f9SI zp!cUgE0RDco|5JM_M94dYN|K6bK30D$gUdw`ME|0#@psA6_WE)eWb#{lR}iftR%T2 zxOXWD$rgII&}JXY&Hd53ful~y5jOCkC!)W`(UpXQNImsbjAmE1`;GF+<7t^`7aCdh z)I+fX=IhV3!`3OT+DsqhR=TO1TV3)SR*YwN^wEF1bcnf)cKcQRyB=8dc1-g}NGNq% z9!Ri@$}HmB7yM5`VyZ(BtI8*XC*Cd%%ciWF57rK2bbC6cnB236YHmB(@y2`IVvH$q zn>=4I_L8da{NHPfm(NFek0a#mZ`&pJ<~_EpRw-t<14k-i{cducJT4Q>P3&nJ`NX$E zoN^f4sGBbVUZxL`uZu50fBAwAgG|2nud0or=Ofd}=6y7rq&qJp^zDBOl%%oLHhz^f zS95%Bb^oU($M4Od%10M0Sgj&NNIf2$R%zNxSZjJcYqHEFtbSY*JFVdp-#o*u1oy0 z3cStPf}<&EF5yI!^QNWiPhWe!@A^Ud(A)*TyNB19G^JF5-9Qkhd{5#%<;sueg2BQ6 zY5^?17G~xqe0j#NZh!jH{FLG%UH7D16K?X|eMirMx@^?6n|)C=Nz`Ye3WPD%wK7(5 z0pTrW!kx_c?qS`TvzLTh$kyXW2~_*nh2|@TdPLcJPTEgyKgMAv7TR1O-xixwt|u&I zTX?@e8RCbPqPTpfnqT4nCF0>sm4qv|Yn@4Gl-jX37sKmFisZ>wro5d%}%%wh1{?4VfBuRruhBeu2ZC%0esv~4a11JDagWp`56&*G+rr?9%`bYy#Jhf?rQY9{3matFfH@zVcx0 z58gbVMI@6}OIY^M^b?a0%b+L0yjqzpC=xASGnn8T8}{RxT0j^64hlxxx_6Hh+ObM?cEr6!f2R8o$kliL~(%>BRaF~w9t z>&jZzzXvJ!JPIPq#iflKIl%k=`cik-n5W(r+2+$5R})n3ncPmvE)G+TH19RZ9$I~U zp+&K4)}x(PKkSQKK{VUr4E;dnVG2zt3G!C?Tn4faurYiN9cXiEAWaO`DbFS1t7nR{ z-}ysKBJ5_fdPZuc`HjNh>XMt;syTL=f;Y_%kbeLJM^()=pKsn}h zl)_$|>x~az6zj9XSzquf{HBN4{l+O?u^<28j_ZpGr99;@|?YAP;EO{HH8zV8)pPD7!4?ym9DU)8dQEIHBd2wa~x z?H|{1*I|l$MY%8(b(m~rflZ&c7%`^3V<527DIHUr?f2UsPQSj}@#I@co-4|A^FAIj zPrhVN>K_L(`7=<|7r%Wljv3M@<}#M;!n-DCZr92!dsXg($YHyRON(gko-|p2Z|=LV zVzL^$c-E3;bu2dX6&{WXR-&%H`im4{OF9}A48soo#JTh9qW9`e_ekcezILT;8AiWS z6HX?7IXnQXyXxABH{0;#mPB{~Y#9{?ckrwqgdGl5N{DTmYy3XB-A_B#=+E@cNg%OQ zGm{MzulY9Oa6@KhOw{d-W5UmPGQzxx1%qF(-RX)m?^m!lcb6T`u4(+a+8kSy$MRAN z?+IyIeKKl!jKH+zs&(i?lHB~?IG4LrJY+oOy1$rN^4heP*_snSSD>+Q+tBr*A^k=o zerX1q%KC)u>6;~`#*FZ6Ib|dK@OP1ryqu7Lt%l~dKhT4zTLOHLQuVmtZTT}^C}@UE6%1S%Z0keqhDd+9>{N8=fn4_LND4qk@w%fhqKQy$(#ke<-QR5ueq6at82x;@mZJIX_g`@h#KY z!Ul9{5LZpj(E}tx-Q|`jO=MHFu)|n~;&66y2xuN|ujHp>g=(lVvI{y|ZM-_k&U-c%f)41jBE6f=K=kFr7ViPe0Ri<)4mQeTo|C6qq2YwG({(w4>6HUO%XzJn(-~*iF1|V?rOv{qu{pbK)8-M&@uG{Nt`~j z6#>FK>N55>NcRF{zW*Pi`2WLoLjLCB9G4~@)u;m}pa~!B66e0Q&E(MiBKho}IbE~g!MqNdHEM3mcnPpUB>CA^vkwK>;eqVF$bq@Yg8NP#mh zf2&@TJv+2r6!E{cfN>@C>L~_R|fGNmxYA2b5sMU zdCg=ZR>wXOfI!o^m+1S0=LpE96d0dM-XFRxf8A*Ns49980KN@*(F|qqr!@nmCcPl` zp_PK^bYn15LlZ~@%qJD}`4af(1uEsHfK8j&+y)lQCs0VOUckBk@eYHeB)#i|yNG(c zh{}*L5~);L8FgG#hhmWyRE)3PH_>gli3Xbe=H}-l0@{_(ue^{E5kKBtV)_NG++pl{ zED;fxqq}cngaFCj`lQnm+G8WFTjaUmoSw$yt;PvF=>GfQC1DHom;H^gCrK<kmN z71prxPmtOtpcNT9ehjlO^dAc?-)2+HktH9))P6ny50Lmqd7(@Bp+p^So5jF@Z%*H*-I^p9Wd? z73D@*tU@fRn~As8sd&EN6n|2T5A!C%$V^Hq+4o1v!`)};a0}}yfer;)+7GEP} z{J3~RUm#+8hV8rAv>p}aEf;dWS|Hs~%K3ZhB>Pb1zBnJVXL!efVGZq5xJLdwVCNdl zYDy;<_>5VaJc51770AED(TP%i?0Fr6lMNc+oI)l@8aslJU!9r$^VU_D52xhvl z;OR#2=q`2NaGjIy`nA!xiW^!IGoh~6BJ};4_|i{q{c_ehGdHkvj!?Tdj%+WW>Qy;U zh?(x#X2}suM$vrY!!^*uQPvITtq}3P6@NiGW!o4uV&E!xvi4X;IHCMgqYimNm`(o` zB89CfE{-kziiRW^9GZIt_z8|+2`&{W6H97|y+hNkShz)F+M-xexb1So~D%fXfy7cRf^K|3rnb<}HdgW}SD0JKY z++X{UnU6mrWI_EB49n8f&+G44yQ>X-9Ly`Hmhywg2iv6qXZ~X=F)@WB znGd7l;*JGGuTyrq;o~ih)J|hD=x|TPD&1~=%czV;+R!qjHGJk|6_M$?7t7kM8!)y$ zY1R~x#I*JJ>fFMBlFMdUr*o-;!Pt7cJf)T8{k;YG6Vtl%%!gf(Dy>`Yp+OIPm{%Cu zB2oB1zk!ul2|CNc%wm8Sx1AM61SuUsEQ1v>(*$v7;25JK**z{t0RWRJoDJ-R4@T5m zQ93W-T0<{2^l{*EFE}3>a+5;BKwJJB8b@ zg^hD&GcH?@?$NxFlDa)z z=L4l|){9Ycby|3&tkI{xC^_Y~e|0_sSd{bx3Zi6emXeyEIX+p}`;$1|iQP6)h)3Rh zx5SO-73)13(Sy^(B!-8}7{-y7t6z!@boQ+7u3qkr3EajF%{l}_gY!a$Cl%UGtP(da z(VH#hg!J^)<1l?)&`{z{%c={?*pZgZw1YiLyjJgnb+GUAj-+>-G(>A1ULyw6dc+&7 zL1Y|(9L4xJkD*Z>auMB<lBT+Fpn7|s&8InC@%%l$)T_D0`4HTeu98PIk5$!E(HUgkAYmM| zjVQ|th}lfCw%mXMNp13u|5iZI*GDd{%q=um3DHpxBtOqw$OLR8>)nP#t_Er+6i_J; z;s#$pJvl-*_=L4pwa5d1))O`!DG;MOR$X(JyqAyWyMgL_?A5SwPv@W~i4FZu6_;EV z;i(KvzF^_?J9KYbESu`L-uhIq(;F`ivEsj*6Wp1eEUQVLvm`sf6G!k20403<26?Z> z(M<#EOT-&EsCo~#$~prVGJ8KameF>0@}PVEBqJovRYG^FzqmSxiy`_R4ChxX49n5YC4MWhz5jNNKZfJbOssY|-L~ohb zRk`$E5wT#PmSuOn8~-i~-$#4$o|cei_fWCP)P26Ql#83Ed^@SEv1OCv0*c%LGs6mI#Bn`=Yuf&aSCcQSg6sAFP+jWBh>>Lc@8K;a> zo|6}M5G`6#Eo#cb@nTLxd)&yvtR8429^kf}W|UQ=*Yv3=D&DrYe-~pyE)i={=5SH7 z2O=gn>(^JH*nXjdG?z$oqRNehyLy%1mxZADmO>)`pFi2IjsAp}PZiKg33WemetvJ= zjR^nRGDI1qEX`8w25GAF*=6k9r`3A7njQe4!2a8XacodwfL!CJxn3E2`A*{b3!p3& z@+-uL8@PHzzdx^VhyF0{K&FWFL}<-{g8vpw4r1>>sOfvZf2J*^!b_J8n{#WDGjf=z z?3n@HjrgwI)c8%)Ez7BP7WC{p+ItS)@sq_3GU#3OvsqsdPbHaz)Wt_LPL&mQMo_Ag zH?M-gV3YWU$Li%f?qLjEV0BNl+Kk2!>pnCD79~sctR; zs)mwU>{)d-0FqmiN(*>d##)epEK7>Vz-=YN05eQn-L8^BiZKy$Axsx2`$?y12{)Z-*7701 z(ca^}Nl;U_=U%LGTbK#LrsHj1f}ZR}qB2C;be-r&Ew$GQSJs{9fq_*EbO1*{Ns)Z3 z2RjW3A*J@%C!V`mQVfwTVF%`EEsF`fxf?LBGY}I45V8(p+lD@C6^IB9lf1?Nm?d55ANgSoCuzVu}anj(i89lFXHZz@)g1ec`(V0C?ntRKX zfM%nxh^EG7t@aOEtq(f&bquSyOhmmw_~Y|K4n&kAwgLv3i}k06?`a(?N15U4rvE|m ztVnfE7(z=sd^7k2cvfz4wkvl(Si;wVTSjWQsDaiK%^;&GLg3E*&!jr3GiFY0yDJvI|<{s^#NFhrjg3iVluQhUZ5kodsJ)t6MvuenRw)&nmbaNpnBGBH)AN@wn+T z0eA>YUzE>*p-XB$IgGc^sQNl2qGU_CU2?Ec(2i~`s0;5UYzTuW*@ksd+$46w)@GW zEKjGY2_N)_ocAt7_tWL)_k>f5gSHM~Fhr|l#z&|s@JMcxeyfsWWDv1kRU0`X_yZ`ON4(I7Cy;NsngYe{QpS%oKac^Rfq-WWG z3!E(5at3!^nbng2HUYfaHpIy{-I(WYN*h1*T>a6XlHgoA3?(Iuq`z`JvY^N>5e2?U zJSGx3rMAOGv~!)$5&yt6hX2t;uRlgvz6XJQ)w|jtwh*jcpyOiq6uE}EZ5g?kn{?>g zl|c7k{}{6jV-SSMsgZ(tXi!0GBtMZM&FvS^ z^lkyWT7QX|bj~#E-7J*4gadoz4EW6eOjK}UqRE6~VZ2+N9(hB+uzmer-g+JSXgO+8 zHTP8b&Z=rEM!NX>0g_R8M@ym`fT4vjiDs*^A!z==B)y$de(w0tg&N=A8+rq=|6PDH z=Q8@w+kwVYWX@o2bAtOcZ^BySvuG5(yXy;}wzsUzT8>x`pL(F&he-g~<`HzKPUNum?NJld&dNlP! zs@}ihd!cMQi)uRWBaIopru*=gA&88-QJ&H(GG3MJOGPg}86q=q z|I1t7ZVeRJCy2XYrW{bbKfqkvPOeJ>FNHlCG`~z)^3?t~urV2QhDs$AT2l$3S+1lM z#LfY2iQ&%Ko-p`Tr`{Krb!7NQz{DQRGcb^lAk4fM3r}H8QR-2qC~qvXMqm89MnASG zXG-`*V9CvgB4csL^~_3Pxv=BRL0exnT?8|N9uQ1Kgz4VfXoUtxA@krK*a$eIxS{jr z60bKu;q(Q#bpO6%6||nKz(DqZ@krmiB;IP}6C<8P8|~&!lgGasOF5cA*=9wT01#GS$^mlVIFkpxy^~+i zc9eOB5+z1VnDb|nKr0^&GLsv7>j7XI#R4J@A7rqh$T(_S=_y!W0A+izNrOPXrr*5NnxCW`0&EkU_b*n;21T6 z3KKcyG6tmC#j`1OW)s(pAMK+3uI7pUq|0a8Mku8{)y<5Yme1bEOG%@_X)N>Vc5uYU zdI&vkaEGVy-F%7o=NQ3qOL`7}B}-<0k&6Iw4yf(Gws!uga1wK*Oy}lL}?c5T&sFb^%;F zfzir}XnCrb+%V1PDgM&^37X3JRH>>mLXd|-G(q`GBfd2I1#+45XdzB2$7db$^F#FCl}rqm zIkZrq(00SM+3=0b4@H842b6A0xe`7)Mwo(jj|X8$2+YKqAGNT9`;tclSa1;_AKR3# z8CmU1#BrQEg@E$%7gsuJEl_3@@qn7CHm|tu@Ll&rpBOwjk)X9Pn5Yu;jQ$c%e`9da zttTiq-(1PKff_-Jca=Hs$e9yr6TuXL2iy0&q^RL*h={#lLQ=%XM=C$td}nI0KgV|HZA`y5;3h9aC001! zl6)59+~osSDn?S@78xDpdMU`sntq>_-lrl*QO_Z$L}i?5UD-AIUBr@r5Q^3 z@6s$RP5Do1We2_pm&HED7qz+fuV%5KQZA3yOM+<`_0=Av%W%eudfb^Sx42{H>|ouC z1*Gx)qe=ALM8=Ld7W9Q4%!Rqh^F_1xv-(p%vk8)3T^$qR^9J)F_+$E)?)Lwgfrr$XOxKAMvYYTx&iNW1hw=r|LinI%ZPv-&yX=n%S14@_@8(1mNmrd z1Y+A=0{W0StOS7Ma{4|ju>234eIsnn^IEhi`##-5!-DYT01UFw zClH8Mb7`?QXwFa7ZDN&JDLJd2s-V4OK-w!v=4ES>CUie!@Qs>J_C$f@|H#hY;gm~v zuZ~vW0nhn5RhJySA1Uni4{cQzujAg4mwFn9pTVKa%a9&Mjv0cF3XPbO=pSWO#_?Dd zF~$SGe+C7da5PkDv6({ZwLWqF_ZljgLd8~gER{otyU2gN1LrG<>IU{HQ`JEuMCt8HWc3y zz-BeQ#UwSl`o0kCMFQW zoOizkRf_G#J6W0NO(#9apiwsMh-Yg{6E5zL?2woSI?(w5n8mlEa?mOeLVDHQ{7mAx z0~YL(15Y~cphaHU_?1-WplDf5Jo0%CDsgWzLC{`OO9fR2#^`&IR>hhl+e7s&5uE(j z!c`RT5e5~4rgbV$16wq;wM*L7XEMEXo}f;ge%TLYDA_CIetE1FI0c>I301lLU$vg* zcil{=dINNKt{|(M9Ee+gU8`&60%cM8t2XQ}idLQ2kJ14n!G!@M zS=OGjy{|m<83G^F;!kSl1{zx!kg}fIo#w}JO>*CqVS+#sAOAAS@v&QcFrO%H^7%NI zw>rfy`?-Je+(evzj&VNae!6Z^nHGKKL@q$SE2z@YfTzX{x%rW zkRs3^M~^CGkMXqqh47)an^CzuwJ?(l{B^{NW?KdN2r6jY+h$54`dfUir~TRK`RzGTg~loSwN(Rz(#os-hb(me$fsj2xV9sxo!H2NA8602+wq z(E^sw(?Cwrf!DaszY-*fVmT=KD*2$(l$flk&MK3y5Wmp-r!d>M=^|>7ej(My^s|QN zB+Qg%d@!q63SNR2d+RV|?JJ zAV@Wj%qoQrs!)z@;w`iWh{%riULEN%Got~;E#P#?>o#dF`BUe4<*5{#!1qO4xI;?Z zsLS8bB~C9_diA3{;T#CZunaGUX&aZ*|%~p8-w>j@=Aym_&mevys<4! zMit_a{rr#mOEIbK`G)kH^CM;oFPRYXouS8wm8^jF8*ZwLNDw?IWOAGAk#>pcEl$@TG#+oZ|2#f}2;&Y& z!v7xkdzC43bCx;NiDZusm`7%Xr7M>^H^y9MQBsZ;D6-2aa`A@~%&O_+Npr7b=@ubXo|xV z0KQdcFSrOP@J%pbXXu7cKU8IAXc`}_K!2RxM(Q%AM|*2;^()@>`$MA`SJI8g@ZO^M{bvvt_Wjpj5R%)bUmjy3 zW~zwR6Nt1MB~ql@<{Z&hhS8xU%P%+}DfUT=ZBioEw@ug_Cb2KogWVpjSCRk#MN<=n z#y^F}kB8V)GV%us6CF`(%|lP0$qy=l9d6eiXy^e8HV&U!hI$4OyPvuC(QexVq?3%tsbx6DO~FwHsDTr z+{BCqvl*IcJ;%)i58-h(;W|1xX*U`w>F))_7KJ6Pfa~e_U^w<2LLj&!?_0b$9?kbFR#RqV3 zVL=3ZU8=Vcbd=~`$RF3^ZCPw zlxp-M?=>Vv1AW-vb3;Xdh~x)Y-Iu=?-WMZnGnX%8C61HDuw0iNY1;fs$*aK znz*|>lU&9Ecty9(6AQmmUq>lpRSiN5I!m}B6L@xv4QiVQJiion&!=EOtU&3Y265Oo zQK|5M=5I1w_d_64FVA`8&r<^*P%w?P$`{)~ts=YbB}KBl=`gp+x>|pB(6be6dW^$e z^pB20{n4Mo!A<9KY$X9cd7!3ao<5@spow-U-RJ+h*H90ZaajX5nAu_$qAX*21@$6S znd}KM5VOg7MN+W4`3^Fs{;fuC*k;Myzf5oDJXbGud<;wVsJ~Smu{)EGI5q~90#~Qo zERmsNgE7l`Q-Fs2i@gjwD@+v%ijqJ(J=RpqG+It5XKM+k5FSl_{rW00Zy{I6sv`aG z@PhvUegsJ23JLr;6o~0ir_PVCGYoI~HLfJ_tHZG20N;{sxXc16l=f_ z*d^+E-IYcPzls4l8Oh0^pjNS4%#}7u6(;aHJ?MLxCh{<_JuYZ_fmWYwp-M%4xh6Sf zzH}#AQO)iJRS?DPfjD;YcWw)*)*+av(ASay{y6?EeJ}@en$)H1%1a@5wMS23_S-+W zb;?1Unyn6w)~04;W*PWT3KkTP|Jre?@m&_Aeiz%?a&Gn?ZOqx9LPv;ZDD|?RusyAJv9pv)Ppj| z;i8PqXcuJAC&a9IW6m-`a#3glIfkX?Q?K^+#!yGLOuw<~X!Ua?fXzthD%ZPbt0`

&Waq3J%H?cwBs z1;_2LK>?}3)s+24Qz;PzAO@etiTCM?@1lCGi%Wgk@cB80dG@D>Gu9IZh+JNi2EI;( zBh34l#s)L0Z12$v;}IZJ4Uw;=L_qv-+q@e-?0%(4?GTJQmzsB!*y)q@TF!q1$~A2F z%;4IYY1D^v7SE}t5hIl9f(FgIc}fD>9Jwfu$JLR0 zF3?JP3jj_~-I4i-*PYQVe!mFfoIt|eKgl#ViG8YSH2!vce%frrsUb&EC-=4=y`lFt zBxW43aryS}_jr|TH81ckD?6rJZjwZQ3$#k!e9!Nc+&*|7aQ_Fw23a-iEYeWhD={J; zHr0^Lg5*z@;7&8vH3@WVGy+K)!t7wYqewmvB8wp|NaY=gTuLNa)X-*IQJ+5#S`S?* z+IBd(Y9J*OLyk;tpK{^0Chb#hV;BGIQd=VU1)JOq27_z(92^yD#vdx^9;amwnKxYG zFb;_?ifa)}jwgv`Gyu{hu9MCqajQDrgRP)v%1g9<2Ub##lu~GrnM#|CLm!|@hr%R_ zV%3RNCwh9IHfe6dp)IFwH2cb!!xT z6ifjTY0&iiTxmDRtQAk@`eZN|ukIj=pJ%9kG_Q*~&O2w)pBkz>)qxNV;XQuqLuR?c zLTN=*!=?4*vD9Q8z}ix3lBPccGQZtD9eao{I;k1f(LIX@?Q)oD_)bp#-~NLwc5?o` z_vDsKzRX8Bp{zhD=h@gDt_xPOY%#3AoRHvdC1b=q48`Gx4z{}mKSnD8YeK=V+VDqt zb4JRQzr^R$w41s46^LN--ki~Gz5}h9yo$=Myn|l1DXhaUf~!YJ0|2R2WYL<+YL8l# zB}qkb8}S~6P@@rqCa@puT~4T#O)*67-Y()VAO8 z+v2c;UsEKsYXf>2zX4K@2fIP7YcQF_p5RY}Hk+aVap@w+r)5lObvb47Pi_g^u~Q5? ziR?o+fb(tiafWOYQKd6#Fr6>&xE`GjkeZ>VaS^oM%S5FANjy=lSryj^h~d}yN(Mm? zB)T-{p$(Y_FS8FMR->Jhsw;rI86fxpd2UI`4rptXHbve!!&tT^s22y|ukOGtP(3Wq zAcO8n!P~duvM>Vplj?V*hz4l!f09lV0T8WXgHGb~@{c7{nf#0$J5dg}(|YRaJ`sRI zaLND_!ZOjt|A)(hu5Whs-esA5{Rt601kVR~{y#e;dKX+RJ*1yU7N&1t(3!`;o3#SC z4c0O;YkXlE6*Ujb;L79#|JxEee+Y10UmH{j;3m9Q8`Fb)pgSyV)PgkE-h0IodL>b| z|0(2N;+2L!I2wL3DkuY<)Tlz{#}8Kk^hZWk@s(Ba+t&AXV0zK@LoEEaU{7lbmfBMW zb+|5cQE@==$4NNYz#srY4tDz}_4s$c3gd(zUCu@vG3PW}(FT0vF=oT!!18zmkD0}9 zv?mM_O>M0qI-tFD8nE0)(jrh||2=3u0x97SegdK3|NcRYh7Z)T$t%iBJwAJ5d_jY@ zL5PY1w5+dw)z!c#_3-?``>$(RccrQUCb)B!{o@33m5huO9cXzg2&6&5=R?qJsit=5 zz^{|o=(1t_=zRHnmdE9iiPSB*ejo29r!M%+?ZWS*Cs4b=>>R##JwUW+_dm5g-K%hqYp zfv?-`&>aLWRV**8{2>)Py?^*;g}(CoqAzRXuSF;P*>2gLA{_QN#sND7OLmxqzQ!Gj zAAbF5l2rb&UcDZ4)nUIxMjTLjuhjQOS;91mWem>_Q`r=o{60NiyZuoZO~&q{m!V%Q zeE-J(@AoGxTR(cg8n7Wxk!sh-uQHV@S+d{z=DBP=E52+pH(^s)Q(qZ6nxMnJQ!3AC z*~!PF;tjp_MW4lm^0E_uEK%}>NnpC8x$pl1#3Df!-cMo8_zN%l)-@Q6qVobWL;O9D z>eEhmE}l&OI$i&5I_KyBKI|tstAz+PB@{6p0;dHyuqXoB;Tn@oGPb?e8+>O?&rB|( zy&_=of4W3Khl`6dx0|VQl%ddPo$f=L+B?ay%)_(uh3x}<-S+80A?1Uwn=#pfm%O+u z0nCkg%UzGGfIRn$Lm<7;2H!_K*uc+vHYS`9&w%QGE#HW5*@9Y&2Z0!p&KPGI9@m>@ zWP=#%LPC`TKD?xKjI=uNaTuJhvF@T@zMsmvTYQ*yDyn5_vv=i?329-w4U&NmAS_RG z*}Q`F;z`jiAKt0NrBl(hf`w6VCwLz7v}oXgk>qVkz(tw-1LY5#aD_^oFU4#tSa-z@ zpI)e%vPhwq;NQ86)r;EAAb&D^_(<%?!uyhV?&a*E5tU;$NBf^5BykZ{LxAnj5h1fHu%; z(n+Wr(RB-x|!b70#x}WbP&mj4twAMhB_Nbc#Yy^_4ESPa|7fZ;*%Dl#Z{gjPJUd79xEDz9XDpU zsfb%V_iGeAbhc*`OMc=Ff=@`uyt*McGP}mgFyW*z7!N6hzF(ee?m+g{Q5feP42}r) z^G|n9U{zUP+9{4syM9M_@nTJ6Dac0n*zJ8|g=aqdQgq^^LLK0J{*6Lgm4amHTnBmS=HOU z)~M_u<5Lg)By|qx$9)_Ob|1SE{cx#5#WO76$L)VUStg(BQysZ%6s>3o%mOyVGpSj{ zkjZacOoA#n|Fh8cF^pC6+=;yP_u?}7p6ibxR|inc%RVfC4CXS3=P6Gxn|@;5z3ykS z#->>7LxEUCU^wOmH;Rfqyf3_j753j6VeS=~{%bZ`{R)ycY^^0m1|M(YQr(z`2^-Q1J9_{BjwFcK{!P(I0Yj!#~~oTz%>vbdH$o2 zfuH{i?<<6ga3KHs0?(zt)IzShYtRuHl^6V} z5t?g{fi}niqnkw_j;fGkKR+nB2_z2YNNNDshJ4^3Wbh75R?1kqLEse*d&0>k)Xw4T z^5z9+3NnC*d%HYM7UGWSLS9hsgJwkWl}zdYGWO#{9XWoH9|ez(FO23!KG1W2V;ZnS z%KzgEZ@IeiOB^mT{kJQG5yGfTTvl7iw85iU&_KcwxRnLPM5Os+G%ql60tj0eKDGWo zZy)?B5gs{bNvGIa9-R31yf}_`2G!o0)Ysh(7tCcsJ}~F@-w%}1)*Uakc!qr5&k{3q zU^FfL&(A~hHW+I8-yR1bm}X<-=z@-|x44myv-o>`8&V(VV!3qP6+z6C)X(iqa6cnE zdx9cNW4B8{9|ju9H&bVxK~oHwBS4+-ttJt|8}uO<$#4vFqQVLX(pAr&EyPvuY@42d zzVH=TNj#E!zUS@4D@Fn7<$Ntc+zL_9Gwg5*9$9GFtn#X)Q6ixbiuvwW5D8fTK1n;x z?0^d3wiS{$&L8Y00)ztD_So%nE)*op8lU2Ru968Lg-3r6M>GPOOT4Cr{$Ai~@mjC_ zV`!{oqZI{{-qS6EE@tetX&U;aqC>cH<^WT})!G^@lqaq*_{bB!Fk>LZq`hBE{tWr`rEkFXRDUf`WLW8ZtJErW zFj(rJFg;xKikTu0F8~Uv1)8(v(hP2?VH7+?87A)Xv_b6m-kAa{ zaT$L=gBgvCL%SK~y7SX6p#C9_wsAf9{Y#*Vjt`b8<0bw#cKz8h*LCw0XmKLw#Hm1n z@a+qk+93lE24wWJ?Vo+B24I7#_+A^srC*K|0&8ims12VWyfSL1nrS;M4{d+Q?En^B zlfm8h5sDBnG`a?gP4JoRe}m7K84E~~n~{QWDH(aO0BVZl&UUZl%4+s~zx!a`fdq1T z^sv-ij|7_7*Z_BXxvOw75-BsD9Q_$?Ao*-c0R_Hl5ZF|A`HVUO@frq8a;gbznEwL3 zDAou^ssaJL%GaB3g*=Pr(CNYI6wJp%J*DyF6C2pYmH6r@kgGK8zHfhoL^&9Y zQ$8=B45L6zp!G-O3QTM?e*gL7E|5#u1Yx|17*c^rD;O-egkjy2ZpZmZ5cUAtahD1L zU_Z&imevmwVs(b&5;{`@cLHxP?r|A>Y=&NK(!*Z4Ae^$hHB?qrlvDAZv;pOqc}AuMM`5+I}c-|B(GZR zNsH{S%Uysb+&8&JU6%r7P1VEVL^ra_?@fHh(myP1`;DJ7;{Y)Z3&X+k3QL2>&a+)- zBusSd=P8^HS;|`1;1hhOLbcf}I`!7^q=@?`c~(=}h0~4JlA&0udo$9jWmn)iujaCN zX0r^H(BPRbS#y~{ARl$f+lXo*VA3p&Me~tro{OrJz3S`UgSst=rX8}TFr^>nPjVOy)>rAo=i%Vtl?RFqp zjnV+J`sSr{Wl5(o_7~~VndeW+LjKX>QDqI%MMXHLa0DzHico7e&H*>R<9L|S(&>3m z5GS_UQ0DaZNelANr~v2kC}bMdl69bPNYv$Pznj;-G^g255&6Be4wh57Ucn#F@6-*t z@<9-yOB}tc-gG+m@C_(K$6Js}%seP3XWiwpULjjA5GjB)BUL&Pi?joLQVnTR6vk{f zSP~cWA0BQ^N>4w~c4R4{))5`K9h+ezvn^}_l_Ht zI2IUE@W_1zK0hw_5z5!}c=ajIx%v?#{cc}jTh=U4Bd4rMVeB4J+kV3KV9J#2Xnw!e<+|<^{`Jy=zSs%DRM6Nj)VqyL=8BTc73Q0C`qIt$z~li z!j4*>`tz_li25#&Z zK_&C@CW;oI-m zIoI<%t>3FH?jRWoTYM;Xol+=6&Wg1z=&_h_T81_prfQXkmwgnOO|5UDsxMg$JzFsq zeU8M=_@#z?jFq+$c1lg&l~{Cm8n)dLQWGA3hz<5RP(F8qexG~ci;&_w@gX1c?62+7 zSMj=X;MN1NmhS-88XJ3Q+87#4d_Qx%Vs1PQb>6ClQ|)s&=B{5VSlExTYk!Ey31@up zf$u5$V*zBX%$S{$svz6#J$FxtTfz2XTUE_Mm5)O3&Q%;RVGR&~T|YrD_CHO#<2xjx zVYNlvVPhzKavke6S?e6bUKUGVpG6{R9)fxGADg+TdOCp7*@L28R_tNDy12t?*hMqk zi)51m-yJ32;N&#zJ8&QdQLhlWKOgB)O1bD7BEVA@$EuB0S2H^a?E;?dJ?q%u3r>de z(xudX9h!Yo-C_!PFcZJArgy)^otN3ud@vX_^GwGj7y_Q?^6lpeG=S|7aif*PlaPBn zDnk8TOr)^+F4*)gH@fb-h#XojFQ7^`anUX9Y!~WA8TxfDY-gN$-0-M;Au_|c)oeVb zk^OTh`j+?8CgzE(JUqI2qTo`n{0(s?}xl%`CxkxOTLqG4FXuj_8v z?{A;mJi1H8m+)w{XOu#_e(~qj;?mP0ZZI}_F-G9TUowVEBSJo=z8F^yi=cIW7oA2PK9uI4oBN|5d*P6V}Rf90A)Qtv2{d<2Ma_)=2sK^@Bv zPgXee-jAsf+Jch}@BJM8H?64a!3~O#v-jm6W1Wu7cfY+`!L!zWB~tf~q`v>~avI5u zaT>dvpZZC1TNu%9b`zMiapH|WM`(+VFB3ZUQ6cd2Ln)yP-FGirC2dl-~>QIBqo({o1F>ZKSK7WaaV8$QOG zy~EU@?W*JNh&C^`y&-Ueqt7a-W8TXxiA39X)6wBLbk1HJzq{}@F$_gaC>#Ro0XW}v zUA;D3++hKe|A6R`6lcpk@S>3v0fC-!o zVdqR4&}CE=H%d4k7KTkpx$g}?E#awL2V0%e8vX3a*^tJeAJ};j3#6J+0d!VePpNS5 z)V1n^d+z;EYa z-`snU^1m*?)Y-2Oe#0|?i^`9==n*V!vY*lp$rkp**2O)DbNccH?(ypDgTA^gRx+|T z8*E3g`?y_RT9jqV48@%A^J+&soX)$vGxX8P!=4Y>xFosPg@+Wq?ZWv{T?V0BOtTbks0#xq<%mdC4!DnP z>5^=k>O8_sX>V!;nR@ru70zdN>bwoDh&%`@PNH)uQj;l#s86X%nWDD|Zy9~`tL>U+ z4??YN{{#*9UYs$L?NcXfk=-3$Mvp(X-^2*t+HT(Yy>3xCb_sgez}=3Bx@mbhb1y@L zf3u3pT8~oyGR)09T?qw+)3U{byenB|5VZ(#yBLH0ovx71Tj4U8^^Y-EmH6bZ5p~Vs zi2gG>IxOQs8rrcV)zsc&OQw4J;^pd|Uoc5))UEtQpA_4v)p4mxaz{snbeRAd9et72 ze$bJkSVfM^YW6R_n?E)jkKj;FTQFAtIY^@taU|ql59hRd^}kp z_X+|C^ySHjQs7CIlMM@Ac;1`7d2KeUC|`zCOwlPUhFMH>1j*kb%F4=TiW=WIQjl1) z;qE@C?Ph5WBoW_(KWB8h!dLrEw@QVwe~8SjT}xlIOa{6Z!W)I; zomD;fgQUJ#Y0XX@3H&j1t46n<=-{HHBelBh@NdJ`-kUn3@YS#p4nZv8AjSdUBq9uP zx1*82rB4uI?gk*_KgfRXjOw76lheTzO#1e3+YLG)_I1gQeysAfe4U2aTB$a9zbmFm z$6K5u_GxnZt(stb^{VmJj_j>Zwr5Ny-C%1uCO;JB9Vlm8LLUl?dEMZ#28=0xoL0VM z-?B@l&TqT^uAO$sAEoV_VALUHnq;DTGP0tyv2rYU?iXml>*{kA*h z554abLu^Y#KhwlGvj?C55gyGe>A8fI$I*W(Xr$(AxYi?VFRov+h@VvozN0q7*lvQ- z*qc2JU~*et_Dm?%X4lEBO0En!?vZ&+0~-6xQx{U2WRUAdr=4InPLYpKlrlF+J=YE> zwV}wIRN5B56Zmrl1!7T^F-xTw4=b$zd&bLNOyuC!DJ%o_mlX$%k@ukp1D6=eP~kqL z77E_?9{8FbS0o3K*oOPLL_;wG-T*+je@Zih#Z zCy3&Nq_Ec2U*7jPbA0Pro8NGMBFsTLd(wl`A)VAZUttn*=`4Ks9l5x;9k^R-XRY9w zz!SIA;tA2bB7z}JjVCtaPll+)js2DxYDeYItyD*jI11A(L19QV0?$#t&G|WGJ}R8x zusGv3dj%@d8+EPbK$SI z@iOQvYe+63dC^$ws*W<|3H^Fo@>EK}EP=fLbsMiEV>{okw{6&*V%qZW+X$ek!1$~f zJL!RF>`+~8C;b$tez6r0=CWWo8F_9`{}YLHxXNk**z_A|u*$T3QP(Ezb{a5-_!Hb; zP%*x_ogfyN#07KRMLsztq;V1~@~7uMxlc~U_~l5gOm;(kME1x^erZK9B)3&4O; z_*~18$B~Q{Fk@){`sLWu=EAvrb$R_kB=VB4O0Gcs2|!7{33YQ+giaHOd%G&0qOYpi5J3?@od6tC7C>zz^_^h-K^n1jIZA$-%O)8QvX7g zFFuwjMS4|)8^iLn#U3HlE4%{nWlQKSw2(TIS9k>=Vk20C&-~KhBw+Tt)x}ePZ*Y>; zr9R$j<@opu5)m2y-k&*nv8@ChciPt_Ms7~xWVkSDjmqT|JvYxh0g!h;NycY-awODf`hcu-$!^z z*^=OQY5U5yyFO2z>v|EnQ21F(2vzxi9pQRO8{9jR){~yVt3pI1mpC)O_9T~O2Q*;+ zK5=Mzt_UPnXTGP-72*V@nRrra)L&nM<>uezWm-~z}SDnAaCt{zKysA3m>31Q&g^l z?_K1$Z|}+6^($0}$<9uK&$+eBF&@cm`qfLtv*Ykyv331hBFY-j#e`q?L+vrRu3p@h zatx17F?_t1tIG`=mVKU@5@ML2Q7uWJTuI2Nl(+ zdD0y7BcqIHqNr@dRoD7XJa{}B&>)}AW`9a^u=pP>MWdV~jiNBJ?wz!8bx4mRw*>($in^uGowR850vh~RR~$3;xs7eH zPYm<)-$oQ}UOjxZ2LGJfLq_uh(kOg>d`EZ+94kODAx%<=xi*;kpF=5yMoALv;xVuZ zpoL4j=JOPk>V*aQf!O0xO5WpASs;8b^Bmm)K!#)U2+UB(9yH2!cJ(+_awOrG%KY<) z;@pKYQ#?3f5;><~c%uFGbw9STtZV$0pDrJF92b|xs$x;9MnTX9bG-#*SvID01MOY4o7ulR>V&4n=0{ z8_$<}yJENLt{$e7jaJ~Uu3h|Z6jfkJ9*ylLrPeC<%Zp9S($5XSzj!6Kk@ccpf3F> zi$_VLm(cr+78Rz?hcSMivZA@{TIHQ019`GGn9!=KAMQB!&B=DNwDFs&+E}9s9yI?2 z2V{iPm6)FDUvgCZ(v_2^v{OVhXI;P=>Tm<|sYe50Q`B$GgKDow9V|=Vyx}cT64eqg zUCf<7&o|CgPiICxRj}1d#^rJ2Sq8U)%egDB?lQO)%iHZ=ncw&FF3$w4{R8JeT-vl3+Dr!UrwRw&=SHFWpe<ZJ8&VUB}nxXpMY6lq?!=#*3uoPJnB4s`<&HlRSWoIMwDPd z?3l-R3clZ9Q!oCd@_6=90*7rnGq86;XQB-mJQ0;tG#o4|)k=UH@lg-JY6->&xtuaGY z{6&K4`-{TqD&Fp=V*4H(+YO({g#IECqz)K7`|2RwEQZRira#r$JoIidKv#)cbN=oc zmqlNxMVAlCc_%%{HQJXvrqeUUnf;=xF;kT^%`$g|^hOSxxu8 zSBVYH_evV2%9k32KfRg4-zDZAWIj3MJe3U^0P)Fl!~;-ViZ9qyW~ZXQV(7D!NY9M8 zH-ARl+bvq}IQxKb3H1ETO8p&X>Tg?wlGBzJ(x-AaF?+xFM3Pm+sMAlr6aGjJcBeHOm%qc>U zg#)TvAH0^6Wqn~l4_QH9ozgmZ<>!*xo(evP1&htGb(8+b$|OxNp6tpT{^!ZEc9i`3 zzcu`SXAK7?;iOuT0wL=lG((S6~12VPkUh1HtEf`mQ_HlU-|e>bzsd%jR?Lg}x&@pO7R| z1SadUstc<<=d^%l{^wlBN&Nw6NmU5{;De+1W0wA>6w`UuN&VHG%s)Dq9pPs^4Q4q$ zDuZ!)#UOLs+6m(W2A4USFGa@_WuCSkn|S- literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/docs/features/dist-train.md b/model/train/yoco_moe/docs/features/dist-train.md new file mode 100644 index 000000000..886367810 --- /dev/null +++ b/model/train/yoco_moe/docs/features/dist-train.md @@ -0,0 +1,53 @@ +# 问题分析 +多模态模型的训练中,由于不同模态模型对算力和内存需求的异构性,会产生以下问题: +- 不同模态模型的最优并行配置不同,全部使用同一种并行配置,造成负载不均衡、资源利用不充分; +- 多模型融合部署,造成静态内存占用偏高,训练内存资源利用率不佳。 + + +# 解决方案 +新增dist-train功能,通过对异构模型配置不同的计算资源和并行配置,减少冗余的静态资源和异构模型间的气泡,使能异构模型之间的运行速度达到最优匹配。 + + +# 使用方法 +在启动脚本中添加参数`--dist-train`。 +需要在MindSpeed-MM仓库中,对应模型目录下的`model.json`中添加`dist_config`字段,具体配置示例如下: +```json +{ + "dist_config": { + "model_name": "internvl2", // 多模态模型名称 + "use_multiparam_send_recv": false, // 模型间是否传递tensor列表 + "model_config": [ + { + "name": "vit", // 内部模型名称 + "model_index": 0, // 模型位于流水线中的序号 + "world_size": 1, // 模型使用卡数 + "tensor_model_parallel_size": 1, + "pipeline_model_parallel_size": 1, + "context_parallel_size": 1, + "forward_only": false // 是否不做反向计算 + }, + { + "name": "gpt", + "model_index": 1, + "world_size": 3, + "tensor_model_parallel_size": 1, + "pipeline_model_parallel_size": 3, + "context_parallel_size": 1, + "forward_only": false, + "main_dp": true // 配置该项时,代表DP数量以该模型为准,只在需要开启inner_dp时配置该项,且该配置唯一 + } + ] + } +} +``` + + +# 使用效果 +根据模型不同、参数量不同,效果各有差异,可以针对SPS、MFU等指标进行调优,均有收益。 + + +# 注意事项 +- 需要配合多模态模型仓库MindSpeed-MM使用,目前支持模型和对应的子模型名称:internvl2 - [vit, gpt], opensoraplan1.3 - [vae, dit]; +- 需要注意在配置并行策略时,若原生模型不支持某种并行策略,则dist-train配置中也不应该开启此种并行策略; +- 配置并行策略时,需要保证各个模型的DP数量保持一致,若配置有main_dp,则以main_dp的DP数量为准; +- 需要使用dist-train配套的权重转换脚本,和MindSpeed-MM中使用的权重互相转换。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/distributed-optimizer.md b/model/train/yoco_moe/docs/features/distributed-optimizer.md new file mode 100644 index 000000000..888a43fcf --- /dev/null +++ b/model/train/yoco_moe/docs/features/distributed-optimizer.md @@ -0,0 +1,15 @@ +# Megatron 分布式优化器 +## 问题分析 +数据并行(DP)场景下, 梯度all-reduce之前,不同的设备并行执行不同的工作。然而all-reduce以后, 所有设备都利用同样的梯度、参数以及优化器状态做相同的梯度更新操作,因此存在计算和存储冗余。 + +## 解决思路 +将重复的内存存储和计算步骤拆分给不同的设备,通过通信进行联通,最终在同时节省显存和计算时间的条件下,达到和原始DP相同的效果。 + +## 使用场景 +主要用于训练场景,当DP不为1时,将优化器状态拆分到所有DP组间,在对梯度进行reduce-scatter后,每个DP组分别进行部分权重更新,最后再用一个all-gather操作收集模型权重。 + +## 使用方法 +脚本中添加`--use-distributed-optimizer`开启分布式优化器。 + +## 使用影响 +降低显存开销。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/eod-reset.md b/model/train/yoco_moe/docs/features/eod-reset.md new file mode 100644 index 000000000..542f96d7f --- /dev/null +++ b/model/train/yoco_moe/docs/features/eod-reset.md @@ -0,0 +1,56 @@ +# 支持EOD Reset训练场景 + +## EOD Reset训练场景 +通常一个批次中输入进模型的文本序列是由多个文档(doc)拼接得到。在默认情况下,多个文档被视为同一序列,互相间的self attention没有掩盖。在特定情况下,多个文档间要求独立,文档间不能互相做self attention,在这种情况下attention mask和position ids需要在每个文档结束的位置(EOD)被重新设置。--reset-position-ids参数关闭时,整个序列计算位置编码;开启时,在每个序列内独立计算位置编码。 + +## 解决方案 +通过调用底层flash-attention算子的可变长模式,支持EOD Reset训练场景。同时在EOD Reset训练场景下,支持Ring Attention长序列并行,对超长序列场景进行加速。 + +## 使用方式 +### 1. Megatron代码修改 +1. 在 Megatron-LM 目录下修改`pretrain_gpt.py`文件中的`get_batch`函数。 + ```diff + def get_batch(data_iterator): + """Generate a batch.""" + + - # TODO: this is pretty hacky, find a better way + - if (not mpu.is_pipeline_first_stage()) and (not mpu.is_pipeline_last_stage()): + - return None, None, None, None, None + + # get batches based on the TP rank you are on + batch = get_batch_on_this_tp_rank(data_iterator) + + # slice batch along sequence dimension for context parallelism + batch = get_batch_on_this_cp_rank(batch) + + + # TODO: this is pretty hacky, find a better way + + if (not mpu.is_pipeline_first_stage()) and (not mpu.is_pipeline_last_stage()): + + return None, None, None, None, None + + return batch.values() + ``` + +2. 在 Megatron-LM 目录下修改`pretrain_gpt.py`文件中的`get_batch`函数。 + + ```diff + def is_dataset_built_on_rank(): + - return (mpu.is_pipeline_first_stage() or mpu.is_pipeline_last_stage()) and mpu.get_tensor_model_parallel_rank() == 0 + + return mpu.get_tensor_model_parallel_rank() == 0 + ``` + +### 2. 数据准备 +首先确保每一个文档的末尾都添加了EOD Token。 + + +### 3. 参数设置 +前提,确保`--attention-mask-type`设置为`general`。 + +#### 不启用长序列并行(CP) +打开 `--reset-attention-mask`和`--reset-position-ids`选项 +#### 启用长序列并行 +首先确保`--context-parallel-size`大于`1`。 + +打开`--reset-attention-mask`和`--reset-position-ids`选项。 + +### 4. 注意事项 +Ascend EOD Reset训练场景下mask-type为general时,Ring/Hybrid Attention比Ulysses下降较多,为正常现象。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/flash-attention.md b/model/train/yoco_moe/docs/features/flash-attention.md new file mode 100644 index 000000000..15555927c --- /dev/null +++ b/model/train/yoco_moe/docs/features/flash-attention.md @@ -0,0 +1,35 @@ +# flash attention + +## 问题分析 + +由于self-attention的时间计算复杂度和内存复杂度与序列长度成二次方关系,因此transformer在长序列上的处理时间、内存开销较大。近似的注意力方法可以优化这一问题,但会降低模型质量。 + +## 解决方案 + +加速注意力的关键在于优化IO访存,即降低片上内存的读/写次数。 + +### 解决思路: + +Flash Attention 是一种优化IO访存开销的精确注意力方法,原理如下图所示[1],通过Tiling切片、重计算、Kernel Fusion等方式来减少高带宽内存(片上内存)和SRAM之间的内存读/写次数。NPU上提供了相同解决方案,可参考[fusion attention 对外接口](../ops/fusion_attention.md) 。 + +a. Tiling切片:利用更高速的SRAM代替片上内存,但SRAM的内存容量较少,无法一次性完成所有数据的完整注意力计算,因此需要进行分块计算。 + +b. 重计算:放弃中间结果写回,需要使用时重新计算,用计算换访存。 + +c. Kernel Fusion:将多个操作融合为一个操作,基于Tiling利用一个kernel完成整个计算。 + +

+ +[原文链接](https://arxiv.org/pdf/2205.14135) + +## 使用场景 + +本方法适用于self-attention相关模型,尤其适用于长序列输入场景,开启长序列并行时该特性默认开启。 + +## 使用方法 + +设置`--use-flash-attn`即可调用该算法。 + +## 使用效果 + +在模型训练时间、模型质量等方面可以提升性能。 diff --git a/model/train/yoco_moe/docs/features/fused_ema_adamw_optimizer.md b/model/train/yoco_moe/docs/features/fused_ema_adamw_optimizer.md new file mode 100644 index 000000000..79c227954 --- /dev/null +++ b/model/train/yoco_moe/docs/features/fused_ema_adamw_optimizer.md @@ -0,0 +1,26 @@ +# fused_ema_adamw 优化器 +## 问题分析 +多模态领域在模型训练过程中往往会额外生成ema模型用于后续任务,因此需要在训练过程中生成和保存ema模型数据,fused_ema_adamw优化器可以在模型训练过程中额外维护一份ema模型参数,在权重保存时ema模型将自动保存到模型权重文件中。 + +## 解决思路 +在训练过程中,fused_ema_adamw优化器会为模型参数维护一份```ema_params```状态,并在每次优化器迭代过程中更新。ema_params状态更新公式如下:
+ + ema_params = ema_decay * ema_params + (1 - ema_decay) * model_params + +```model_params```为模型参数,```ema_decay```为超参数。其中,```ema_decay```可在训练脚本中使用'--ema-decay 数值'来指定,若脚本中未指定,则默认ema_decay为0.9999。
+ +## 使用场景 +主要用于需要保存ema模型用于后续任务的多模态训练场景。
+ +## 使用方法 +1.在脚本中添加`--optimizer-selection fused_ema_adamw`,可开启fused_ema_adamw优化器,优化器```ema_params```状态保存功能与ema模型权重保存功能会一同开启。
+2.在脚本中添加`--ema-decay 数值`,可指定ema_decay,如未指定,则默认为0.9999。
+ +## 使用影响 +1.由于fused_ema_adamw优化器在训练时需要额外维护```ema_params```状态,内存开销会有所增加。
+2.权重保存时,优化器的```ema_params```优化器状态将会存储在distrib_optim.pt文件中。
+3.权重保存时,ema模型权重数据将会存储在model_optim_rng.pt文件中的```ema_model```字段中。
+ +## 注意事项 +1.fused_ema_adamw优化器不支持和参数副本复用特性同时开启,使用本优化器时请勿在训练脚本中添加`--reuse-fp32-param`参数。
+2.fused_ema_adamw优化器在训练时需要额外维护一份ema数据,内存开销会有所增加。不同的训练配置内存开销增加幅度不同,使用时请根据实际硬件内存变化情况,适当调整训练脚本或模型结构。 diff --git a/model/train/yoco_moe/docs/features/fusion-attn-v2.md b/model/train/yoco_moe/docs/features/fusion-attn-v2.md new file mode 100644 index 000000000..757c3f576 --- /dev/null +++ b/model/train/yoco_moe/docs/features/fusion-attn-v2.md @@ -0,0 +1,15 @@ +# fusion_attention_v2 + +## 使用场景 + +本方法为FlashAttention的V2版本,对V1版本进行了一定功能拓展,当前仅支持特定场景如[Alibi位置编码](./alibi.md),默认关闭。 + +其它场景原则上与V1版本无差异,不建议主动开启。算子说明详情见[接口说明](../ops/fusion_attention.md)。 + +## 使用方法 + +设置`--use-fusion-attn-v2`即可调用该算法。 + +## 使用效果 + +基础效果等同于Flash attention,特定场景如[Alibi位置编码](./alibi.md)需手动开启。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/generate-mask.md b/model/train/yoco_moe/docs/features/generate-mask.md new file mode 100644 index 000000000..3e8f08344 --- /dev/null +++ b/model/train/yoco_moe/docs/features/generate-mask.md @@ -0,0 +1,57 @@ +# MindSpeed Mask归一实现阐述 + +## 问题分析 +### 1. Megatron源码阐述 +[1] 各device通过 `pretrain_gpt.py#L93`-`def get_batch` 去获取各项数据,包括AttnMask。 + +[2] PP的首尾节点通过 `megatron/training/utils.py#L276`-`def get_batch_on_this_tp_rank` 去获取各项数据,包括AttnMask。其他节点直接返回None。 + +[3] TP的首节点通过 `megatron/core/datasets/gpt_dataset.py#L675`-`def _get_ltor_masks_and_position_ids` 生成AttnMask。 + +[4] TP其他节点,直接生成与首节点相同shape的empty矩阵,通过broadcast获取首节点生成的AttnMask。 + +Tips: 以上操作默认开启,生成的AttnMask全部为下三角形状,可以通过 `--no-create-attention-mask-in-dataloader` 关闭。 + +### 2. 问题发现 +[1] 昇腾的FA需要外部生成AttnMask,所以除了基础下三角模式,需要额外接口生成自定义AttnMask。 + +[2] 非PP首尾节点的AttnMask为None,无法使能FA加速。 + +[3] AttnMask生成、拷贝及广播操作,在每个micro_step都会跟随get_batch重复。 + +[4] 长序列下,生成的AttnMask占用显存过大。 + + +## 解决方案 +### 解决思路 +[1] 提供统一AttnMask生成接口,同一进程复用一个全局变量AttnMask,避免重复生成和拷贝。 + +[2] 适配AttnMask压缩模式,减少显存占用。 + + +## 使用场景 +目前支持FA和非FA的Mask生成,传入AttnMask可以为None和下三角模式。 + +FA场景,当序列长度大于2048或使用ring_cp_algo时,默认走压缩模式。 + +其他场景使用完整Mask。 + + +## 使用方法 +[1] 针对以上问题和思路,在MindSpeed中,直接默认使能AttnMask,不再使用原生mask生成方式。 + +[2] 提供全局变量 `mindspeed/model/transformer.py`-`global _GLOBAL_ATTN_MASK`。 + +[3] 提供 `--sparse-mode` 传参,配合FA多种模式调用。`--sparse-mode`的不同模式信息可以参考[torch_npu.npu_fusion_attention算子文档](https://www.hiascend.com/document/detail/zh/Pytorch/60RC2/apiref/apilist/ptaoplist_001095.html)。 + +[4] 提供 `mindspeed/model/transformer.py`-`def set_attention_mask`、`def get_attention_mask`、`def generate_attention_mask` 三个额外接口,实现正常流程外的设置、获取和生成功能。 + +[5] 提供 `mindspeed/model/transformer.py`-`def parallel_transformer_forward_wrapper` 接口,避开get_batch的缺陷,在第一次正向的时候使能generate_attention_mask来生成mask。 + + +## 使用效果 +例如下三角模式,压缩模式下设sparse_mode=2,mask.shape固定为[2048,2048],将大幅提升性能并降低显存。 + + +## 注意事项 +当前FA场景仅支持下三角及Band模式,其他自定义AttnMask模式需要手动set_attention_mask,或修改get_attention_mask逻辑。 diff --git a/model/train/yoco_moe/docs/features/hccl-group-buffer-set.md b/model/train/yoco_moe/docs/features/hccl-group-buffer-set.md new file mode 100644 index 000000000..271e3e71b --- /dev/null +++ b/model/train/yoco_moe/docs/features/hccl-group-buffer-set.md @@ -0,0 +1,33 @@ +# Hccl Group Buffer Set + +## 问题背景 +当前 MindSpeed 的通信域 Buffer,只能通过环境变量 HCCL_BUFFSIZE 进行统一设置(默认为 200M ),但是往往不同的通信域所需的 Buffer 大小不能一概而论 + +## 解决方案 +### 1.手动配置 +对外呈现开关,使得用户可以根据自己需求自己设置通信域缓冲区大小 +### 2.自动配置(推荐) +使用自适应方案,MindSpeed 根据网络参数自适应通信域缓冲区大小 + +## 使用方法 +### 1.手动配置 +打开--hccl-group-buffer,并指定所需要设定的组以及大小(例如:dp:200;tp:300;exp:400),单位是 M 。 + +手动配置目前支持通信组: + +["dp", "dp_cp", "cp", "mp", "mp_exp", "tp", "pp", "embd", "tp_dp_cp", "tp_dp", "tp_cp", "tp_exp", + "exp", "dp_modulo_exp", "pp_new_stream", "cp2", "cp_ulysses", "cp_ring","cp_ring_intra", "cp_ring_intra_overlap", + "nd1_dim1", "ag_x_sd_rcv_overlap", "nd1_dim2", "ag_y_sd_rcv_overlap", "nd2_dim1", "nd2_dim2"] + +### 2.自动配置 +打开 --hccl-group-buffer-adaptive ,会自适应设置 tp、cp、pp 相关通信组大小; 需要注意的是,对于 ep 相关的通信组(exp、tp_exp、tp),用户可自行根据当前模型 MOE 负载不均衡的程度指定系数--hccl-ep-group-buffer-adaptive-factor 从而得到合适的通信域 Buffer,该系数代表当前负载不均衡的程度( 例如,设置--hccl-ep-group-buffer-adaptive-factor 大小为 1, 代表的是负载均衡情况下需要开启的buffer大小;设置为n,代表当前缓冲区大小是负载均衡情况下的 n 倍,n 配置过大有可能会导致OOM ) + +自动配置目前支持通信组: + +[ "cp", "mp", "mp-exp", "tp", "pp", "tp_cp", "tp_exp", "exp", "pp_new_stream", "cp2", "cp_ulysses", "cp_ring", "cp_ring_intra","cp_ring_intra_overlap"] + +## 使用效果 +llama 系列模型,开启自适应方案,性能不下降的同时节约显存;MOE 相关模型,开启自适应方案并设置合适的负载不均衡系数,性能不下降的同时节约显存。 + +## 使用限制 +本特性依赖 PTA:FrameworkPTAdapter 7.0.RC1.B020 (包含该版本)之后的版本 diff --git a/model/train/yoco_moe/docs/features/hccl-replace-gloo.md b/model/train/yoco_moe/docs/features/hccl-replace-gloo.md new file mode 100644 index 000000000..10836b439 --- /dev/null +++ b/model/train/yoco_moe/docs/features/hccl-replace-gloo.md @@ -0,0 +1,39 @@ +# Gloo 存档落盘优化 + +## 问题分析 +在大规模集群下,Gloo 通信存在规模限制和稳定性问题。一方面,容易出现 Gloo 通信组创建失败的情况;另一方面,与 Hccl 通信相比,Gloo 通信较慢。 + +对于Gloo通信组创建失败报错`Gloo connectFullMesh failed with ...`的问题,本质上是由于N张卡链接到主Master来完成建链,集群规模大时,Master处理能力不足,可能造成建链失败。可通过调整和网络建链相关参数进行规避(云上8k卡场景验证有效): + +``` +net.ipv4.tcp_max_syn_backlog = 65536 +net.core.netdev_max_backlog = 65536 +``` + +此外,MindSpeed设计了 Gloo 通信优化方案使用Hccl通信替代Gloo。 + +## 解决方案 + +### 解决思路 +[1] 采用 Hccl 通信组替换 Gloo 通信组,实现在原有功能基础上的替代。 + +[2] 采用切片方式减少单次通信的数据量,避免通信量过大导致的显存消耗。 + +## 使用场景 +当 Gloo 通信频繁出现建链失败时,模型启动效率较低,此时需要通过替换 Gloo 通信组来提升效率。 + +## 使用方法 +[1] 在训练脚本中加入 `--disable-gloo-group`,以启用该特性。 + +[2] 在脚本中定义 `--hccl-slice-size N`(可选),设置 DP 组保存和加载分布式优化器状态时的通信量大小。该参数的有效区间为 (0, bucket_size/dp],其中 bucket_size 为分布式优化器中每个桶的大小。建议在显存允许的情况下,尽量增大该参数,以提高通信效率。 + +## 使用效果 +### 通信效率分析 +理论上,分布式优化器状态保存和加载的通信效率在一定范围内随着 `hccl-slice-size` 增加而提升。 + + +### 显存增量分析 +开启该特性后,显存的增加量为 `hccl-slice-size * (dp + 1) * 4B`。 diff --git a/model/train/yoco_moe/docs/features/hierarchical-alltoallv.md b/model/train/yoco_moe/docs/features/hierarchical-alltoallv.md new file mode 100644 index 000000000..ec853b898 --- /dev/null +++ b/model/train/yoco_moe/docs/features/hierarchical-alltoallv.md @@ -0,0 +1,37 @@ +# MoE Hierarchical Alltoallv分层通信 + +## 问题分析 + +大模型训练过程中,在MoE Layer中前向和反向均有两次alltoallv进行数据传输。在原本的实现中,多卡多机组成了一个规模较大的通信组,机间机内同时进行alltoallv通信,造成机间的冗余通信较多。 + +如下图所示,假设有两个节点,其中topk=3,节点1内的某个token选择了节点2上不同卡的3个专家,那么这个token通过alltoallv传输了3次,也就是topk次。 + +![输入图片说明](../../sources/images/hierarchical_alltoallv_1.png) + +## 解决方案 + +将规模较大的通信组分成两个相互正交的inner group和outer group。在outer group内,也就是ep组内,使用allgather收集token,再在inner group内,也就是tp组内,使用alltoallv传输token,提供分层通信的功能,节省topk倍的冗余通信,提升性能。 + +![输入图片说明](../../sources/images/hierarchical_alltoallv_2.png) + +## 使用场景 + +在多机情况下,deepseekv2类moe模型,开启tp_extend_ep特性,且需要提升性能的场景下。 + +## 使用方法 + +在启动脚本中添加参数--moe-hierarchical-alltoallv,即可开启分层通信开关。 + +## 使用效果 + +开启分层通信moe_hierarchical_alltoallv前后,5000step精度对齐,性能收益4.28% + +在内存优化特性memory level0开启的情况下,开启分层通信moe_hierarchical_alltoallv前后对比,5000step精度对齐,性能收益3.02% + +在内存优化特性memory level1开启的情况下,开启分层通信moe_hierarchical_alltoallv前后对比,5000step精度对齐,性能收益4.34% + +## 注意事项: + +1.仅支持在多机情况下,moe_tp_extend_ep和moe_alltoall_overlap_comm特性开启的情况下 + +2.Megatron和MindSpeed版本均为使用core_r0.8.0分支。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/hybrid-context-parallel.md b/model/train/yoco_moe/docs/features/hybrid-context-parallel.md new file mode 100644 index 000000000..57762f73b --- /dev/null +++ b/model/train/yoco_moe/docs/features/hybrid-context-parallel.md @@ -0,0 +1,45 @@ +# 混合长序列并行 + +## 问题分析 + +从生成性AI到科研模型,长序列训练正在变得非常重要。 在生成性AI领域,会话式AI、长文档摘要和视频生成等任务都需要在空间和时间层面对长上下文进行推理。 同样,章节和书籍级别的摘要(数万甚至数十万字)在会话式AI和摘要任务中也受到重视。现有的数据、张量和流水线等并行方法无法在序列维度进行切分。当序列维度(S)增长时,训练内存开销会以 $O$($S^2$) 的速度增长。因此需要针对长序列场景进行特定的优化解决长训练场景的训练需求。 + +目前流行的序列并行方案,Ulysses和Ring Attention存在各自的局限性。 + +Ulysses需要确保attention head数可以被序列并行维度整除,在GQA、MQA场景下序列并行的大小有限制,导致序列长度的扩展有限。 + +Ring Attention的并行维度不受attention head数限制,因此理论上序列长度可以无限拓展。但相比于Ulysses,Ring Attention不能充分利用通信和计算带宽,在序列块大小较低时性能劣于Ulysses。 + +## 解决方案 +对Ulysses和Ring Attention做融合,实现混合序列并行,以此解决两个方案各自缺陷。 + +## 使用场景 + +可兼容FlashAttention,目前已默认开启FlashAttention。 + +序列并行维度被分为Ulysses维度和ring attention维度,Ulysses维度和ring attention维度乘积即为序列并行维度。 + +## 使用方法 + +设置`--context-parallel-size`,默认为1,根据用户需求配置。 + +设置`--context-parallel-algo hybrid_cp_algo`,以使能混合序列并行。 + +设置`--ulysses-degree-in-cp`,需要确保`--context-parallel-size`可以被该参数整除且大于1。例如当设置`--context-parallel-size=8`时,可以设置`--ulysses-degree-in-cp=2`或`--ulysses-degree-in-cp=4`。 + +同时需要确保`--ulysses-degree-in-cp`可以被attention head数整除。 + +混合长序列并行支持Ring Attention长序列并行相关特性,包括send receive overlap功能、Mask计算类型配置。 + +## 使用效果 + +利用多个计算设备对输入序列进行并行切分,降低单设备的内存消耗,相比不开启序列并行单步耗时增加,相比重计算计算效率提升。 + +## 鸣谢 + +1. GitHub项目地址: +https://github.com/feifeibear/long-context-attention + +2. 论文预印本地址: +USP: A Unified Sequence Parallelism Approach for Long Context Generative AI +https://arxiv.org/abs/2405.07719 diff --git a/model/train/yoco_moe/docs/features/mc2.md b/model/train/yoco_moe/docs/features/mc2.md new file mode 100644 index 000000000..204116f1a --- /dev/null +++ b/model/train/yoco_moe/docs/features/mc2.md @@ -0,0 +1,63 @@ +# Ascend MC2 + +## 使用前提 +仅限于版本标识为`CANN 8.0.RC2`和`Ascend HDK 24.1.RC2`及其后续所有迭代版本的系统环境。 + +若在非指定版本中尝试采用该配置,可能触发系统级的异常行为,包括但不限于运行时错误。 + +## 问题分析 + +在开启了TP和SP的大模型训练场景下,存在matmul计算和all-reduce操作的强依赖关系(不开启SP),或存在matmul计算和all_gather/reduce_scatter操作的强依赖关系(开启SP)。当模型参数量较大时,此处通信量和计算量都较大,在串行执行时,会引入较长的等待闲置时间。 + +## 解决方案 + +针对存在强依赖关系的matmul计算和通信操作,昇腾自研MC2解决方案。 + +MC2通过融合算子的方式将matmul计算和集合通信操作进行融合,将较大的计算和通信任务切分成了较小的计算子任务和通信子任务,并通过流水的方式使得通信子任务和计算子任务可以互相掩盖,从而减少等待和闲置时间,提高利用率。 + +## 解决思路: + +MindSpeed在python脚本侧将原本串行的matmul和all_gather/reduce_scatter操作通过MC2融合算子接口进行了融合。 + +具体实现参见[link](../../mindspeed/core/tensor_parallel/ascend_turbo/mc2_linears_seq_parallel.py)。 + +MC2算子接口参见[link](https://www.hiascend.com/document/detail/zh/Pytorch/60RC1/apiref/apilist/ptaoplist_000449.html)。 + +## 使用场景 + +当开启了TP和SP时,建议开启MC2进一步优化。模型权重冻结和模型权重不冻结两个场景均支持。 + +### 说明 +可以通过设置`requires_grad`属性为`False`来实现权重冻结。 +```python +# 举例1:冻结所有参数 +for param in model.parameters(): + param.requires_grad = False +``` + +```python +# 举例2:除了output_layer,冻结所有ColumnParallelLinear和RowParallelLinear +from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear +for name, module in model.named_modules(): + if ('output_layer' not in name + and (isinstance(module, ColumnParallelLinear) or isinstance(module, RowParallelLinear))): + for param in module.parameters(): + param.requires_grad = False +``` + +## 使用方法 + +设置--use-ascend-mc2即可使能MC2算子。 + + +**同时需要确保开启**`--sequence-parallel`。 + +## 使用效果 + +在开启TP和SP的训练场景下,使用MC2可以减少内存开销并提高计算效率。 + +## 注意事项 + +1. MoE模型暂不支持开启MC2。 +2. 暂不兼容计算通信并行 CoC 特性 --use-ascend-coc 。 +3. 该特性不支持在 Atlas 900 A3 硬件上使用。 diff --git a/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-adaptive-recompute-activation.md b/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-adaptive-recompute-activation.md new file mode 100644 index 000000000..9f2aac771 --- /dev/null +++ b/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-adaptive-recompute-activation.md @@ -0,0 +1,22 @@ +# MoE 负载感知内存均衡算法 + +## 问题分析 + +MoE在预训练前期负载均衡 aux_loss 未起作用时,token 在专家层的分配不均会导致全局内存负载不均衡问题,并引入大量碎片内存,导致显存波动巨大,并呈现逐步递增的趋势,大集群训练下更容易出现OOM。 + +## 优化方案 + +根据模型设定参数(DP/TP/SeqLength/NumExpert等),设定token分布不均的判定阈值,当超过该阈值后执行重计算,避免产生内存不均衡的激活值。 + +## 使用限制 + +1. 使用时**建议**开启`--moe-router-load-balancing-type aux_loss`,这样会使得训练过程中token分布快速趋向于平衡。 +2. 开启全局重计算后该功能无效。 +3. 仅支持`--moe-token-dispatcher-type`是all-gather时可用。 +4. 不兼容--recompute-in-bubble特性。 + +## 使用方法 + +- 必选参数:脚本中加入`--moe-adaptive-recompute-activation`即可。 + +- 可选参数:如果希望调节判定阈值,则修改`--moe-adaptive-recompute-activation-scale`即可,默认值为2.0,表示阈值为平均分配在每个专家上的序列的两倍长度。 diff --git a/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-allgather-dispatcher.md b/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-allgather-dispatcher.md new file mode 100644 index 000000000..46570ac22 --- /dev/null +++ b/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-allgather-dispatcher.md @@ -0,0 +1,32 @@ +# Allgather Dispatcher 分支优化 + +## 问题分析 +### 1. gather & scatter 算子替换 + +在 Megatron MoE 中的 Allgather 分支,存在使用 gather/scatter 操作。gather/scatter 功能为沿 dim 轴根据索引逐元素进行取值/赋值操作,此操作会有大量的随机地址,对性能造成巨大影响。 + +在 Megatron MoE 中对 gather/scatter 的调用主要是以下调用方式,通过对 index 做 expand 操作对维度进行扩展,再通过扩展后 index 对 hidden_states 进行逐元素取值/赋值。 + +```python +self.global_local_map = global_local_map.view(-1, 1).expand(-1, hidden_states.shape[-1]) +local_hidden_states = torch.gather(global_hidden_states, 0, self.global_local_map) +``` + +### 2. 异步通信 +在 Allgather dispatcher 分支中,会 permutate 函数开头分别对 hidden_states、max_ind、max_prob 三个数据做 allgather 通信,这些操作为串行操作,但各计算任务之间并非串行依赖关系。 + + +## 解决方案 +### 1. gather & scatter 算子替换 +由于 index 是通过 expand 进行扩展的,因此它的每一行中的内容都是一致,而我们没有必要使用 gather/scatter 进行逐元素的操作,可通过 index 算子以及 indexput 算子进行逐行操作,对 gather/scatter 进行等价替换。 + + +### 2. 异步通信 +通过对通信任务进行重新排序,并使用 async=True 参数进行异步下发,达到计算和通信并行的目的。 + +## 使用场景 + +在使用 mcore MoE 的场景下,开启了 `--moe-token-dispatcher-type allgather`。 + +## 使用方法 +开启参数 `--moe-permutation-async-comm`。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-alltoall-dispatcher.md b/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-alltoall-dispatcher.md new file mode 100644 index 000000000..6b30ad7e9 --- /dev/null +++ b/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-alltoall-dispatcher.md @@ -0,0 +1,25 @@ +# Alltoall Dispatcher 分支优化 + +## 问题分析 +### 1. repeat_interleave 并行 +在 Alltoall dispatcher 分支中,调用了 repeat_interleave 算子,此算子只使用了单个 block dim 在单个下发流上进行串行计算,且耗时较长,算子的输出也是在 alltoall、permute、alltoallv 之后才用到。 + +### 2. 计算通信并行 +在 alltoall 分支中的 permutation 函数最后会进行 allgather 操作,对所有 tokens 被切分的 H 维进行补全,然后再对数据分块进行专家计算。此项操作为串行操作,但各专家间的 tokens 并没有存在依赖关系,可修改为并行操作。 + + +## 解决方案 +### 1. repeat_interleave 并行 +通过新建一条下发流,将 repeat_interleave 算子调用分到新的流上,在 block dim 资源充足的情况下,可进行两个算子的并行计算,节省耗时。 + +### 2. 计算通信并行 +可按照每个专家需要的 tokens 进行切分,然后逐个对 tokens 进行 allgather 通信 + 专家计算,由于第一个专家计算只依赖第一个通信,专家之间无依赖关系,因此在做第一个专家计算的时候可同步进行第二专家的通信,达到计算和通信并行。 + +## 使用场景 +在使用 mcore MoE 的场景下,开启了 `--moe-token-dispatcher-type alltoall`。 + +## 使用方法 +开启参数 `--moe-permutation-async-comm`。 + +## 场景限制 +由于开启 `--moe-grouped-gemm` 后,专家计算被单一算子合并,因此计算通信并行优化会失效。 diff --git a/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-bmm-fused.md b/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-bmm-fused.md new file mode 100644 index 000000000..35cc98bcf --- /dev/null +++ b/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-bmm-fused.md @@ -0,0 +1,26 @@ +# Megatron MoE BMM + +## 问题分析 + +针对MoE的drop and pad场景,所有专家上tokens数量相同,使用bmm融合算子(融合前后的通信操作)替换gmm算子能达到更好的效果。 + +## 解决方案 + +通过调用bmm通算融合算子(alltoall_allgather_bmm和bmm_reducescatter_alltoall)替换gmm算子及前后的通信操作,达到加速效果。 + +## 使用方法 +在drop and pad场景 + +前置条件需要设置`--moe-grouped-gemm` + +设置`--moe-bmm-mc2`: 表示通过BMM的融合算子计算。 + +## 使用效果 +在ep=8的场景下,开启融合算子替换,性能提升2%左右。 + +## 使用限制 +1.仅支持megatron_moe的alltoall分支,且开启tp和ep。 + +2.仅支持昇腾Atlas A3 AI处理器。 + +3.不支持`--moe-tp-extend-ep`和`--moe-alltoall-overlap-comm`特性。 diff --git a/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-gmm.md b/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-gmm.md new file mode 100644 index 000000000..9d58ac545 --- /dev/null +++ b/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-gmm.md @@ -0,0 +1,41 @@ +# Megatron MoE Grouped GeMM + +## 问题分析 + +针对MoE单卡多专家计算,存在细碎的专家计算操作与通信,通过Grouped GeMM算子对多专家计算进行合并,提升MoE单卡多专家训练性能。 + +## 解决方案 + +通过调用 gmm 融合算子,对多个专家计算进行融合,达到加速效果。 + +## 使用方法 + +设置`--moe-grouped-gemm`: 表示开启Grouped GeMM计算。 + +## 效果说明 + +典型场景: + +- EP变小导致单卡专家数量增大 & DeepSeek MoE专家数量较多等场景。 +- DeepSeek MoE finegrained expert单个专家较小 & FFN规模不大 & TP变大导致单卡切分的计算变小。 + +1. 随着FFN规模提升,计算不再细碎,单专家计算效率提升,Grouped GeMM 收益变小。 + +表1:grok模型FFN大小和性能加速对比 + +|ffn_hidden_size| 32768 | 16384| 8192| 4096| +|--|--|--|--|--| +|baseline|2280|1780|1537|1446| +|GeMM|2416|1719|1448|1331| +|性能提升|-5.30%|3.53%|6.12%|8.60%| + + +2. TP越大,EP越小,收益更大。 + +表2:Mixtral8*7B模型配置不同性能收益 + +|配置| tp4 ep2 16expert | tp4 ep2 8expert | tp2 ep4 16expert| tp2 ep4 8expert| +|--|--|--|--|--| +|baseline|27969|20127|11976|13981| +|GeMM|19415|17361|11049|14290| +|性能提升|44.06%|17.93%|8.39%|-2.19%| diff --git a/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-tp-extend-ep.md b/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-tp-extend-ep.md new file mode 100644 index 000000000..f6b8e6972 --- /dev/null +++ b/model/train/yoco_moe/docs/features/megatron_moe/megatron-moe-tp-extend-ep.md @@ -0,0 +1,28 @@ +# Megatron MoE TP拓展EP + +## 问题分析 + +开启TP+EP后,专家层TP组切分专家参数,MoE细粒度小专家场景TP切分后GMM算子效率下降严重。 + +## 解决方案 + +针对小专家场景TP切分后GMM算子效率下降问题,专家层TP组不切分专家参数,切分专家数量。 + +## 使用方法 + +打开`--moe-tp-extend-ep`启用该特性。 + +同时需要开启: +- `--moe-permutation-async-comm` +- `--moe-grouped-gemm`,目前仅支持Grouped MLP。 + +同时需要确保`--num-experts`能被`tp * ep`整除。 + +当前该特性不支持Moe Token drop and pad模式,即`--moe-expert-capacity-factor`需要为None。 + +## 适用场景 + +细粒度小专家,类DeepSeek-V2模型,每个专家的参数量较小。 + + + diff --git a/model/train/yoco_moe/docs/features/moe-experts-pipeline-degree.md b/model/train/yoco_moe/docs/features/moe-experts-pipeline-degree.md new file mode 100644 index 000000000..7b7bb81af --- /dev/null +++ b/model/train/yoco_moe/docs/features/moe-experts-pipeline-degree.md @@ -0,0 +1,31 @@ +# MoE Experts Pipeline Degree大专家流水 + +## 问题分析 + +该功能在面向megatron+mindspeed框架MoE类模型大专家的情况下,专家的计算时间和通信时间在每层神经网络中占比大,严重影响性能和内存。 + +## 解决方案 + +本方案中,将专家分组做流水,使专家计算内部的通信和计算相互掩盖,只有头尾未掩盖通信开销。在负载基本均衡的情况下,专家的allgather和reducescatter的未掩盖通信变成了1/moe_experts_pipeline_degree。(moe_experts_pipeline_degree表示流水次数) + +![输入图片说明](../../sources/images/moe_experts_pipeline_degree.png) + +## 使用场景 + +基于gpt-moe大专家模型,专家的计算时间和通信时间在每层神经网络中占比大的情况。 + +## 使用方法 + +在启动脚本中添加并合理配置 --moe-experts-pipeline-degree [int] + +其中,[int]是大专家的流水粒度,是大于1小于专家数num_experts,并且可以被专家数num_experts整除的整数。 + +## 使用效果 + +配置大专家流水moe_experts_pipeline_degree前后, 5000step精度对齐。性能收益2.11%,内存收益4.38% + +## 注意事项: + +1.仅支持tp>1, sp和moe_alltoall_overlap_comm特性开启的情况下 + +2.Megatron和MindSpeed版本均为使用core_r0.8.0分支。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/moe-token-permute-and-unpermute.md b/model/train/yoco_moe/docs/features/moe-token-permute-and-unpermute.md new file mode 100644 index 000000000..e8f87fb72 --- /dev/null +++ b/model/train/yoco_moe/docs/features/moe-token-permute-and-unpermute.md @@ -0,0 +1,23 @@ +# Moe Token Permute and Unpermute 融合优化 + +## 问题分析 +在MoE架构中,MoEAlltoAllTokenDispatcher调度器负责将token令牌分配给各个专家进行处理,并将处理后的结果重新组合回原始的token顺序。这个过程通常涉及到以下步骤: +Token路由:确定每个token应该由哪个专家处理。这可以通过专家门控机制(gating mechanism)来完成,门控机制为每个token选择最适合的专家。 +数据重排(Permute):将token按选择的专家进行分组,以便每个专家可以并行处理属于它的token。这通常涉及到对token的重排操作。 +专家处理:每个专家并行处理属于它的token。 +结果重组(Unpermute):处理完成后,需要将来自不同专家的结果重组回原始的token顺序。 +在上述流程中,数据重排和结果重组步骤是性能瓶颈之一。这是因为这两个步骤涉及到大量的数据移动,特别是在使用分布式训练时。 +## 解决方法 +为了优化这一过程,可以考虑将数据重排和结果重组步骤合并成一个操作。MindSpeed对将Moe Token Permute和Unpermute操作分别融合成一个算子,提升模型训练性能。算子接口分别见[link](../ops/npu_fused_moe_token_permute.md),[link](../ops/npu_fused_moe_token_unpermute.md)。 + +## 使用方法 +### 前提条件 +开启专家并行,并且`moe-token-dispatcher-type`设置为`alltoall` +`--expert-model-parallel-size M \ +--num-experts N \ +--moe-token-dispatcher-type alltoall` + +设置`--use-fused-moe-token-permute-and-unpermute`即可调用Moe Token Permute和Unpermute融合算子。 + +## 使用效果 +启用融合算子后,不仅能够有效节省内存资源,还能提升模型训练性能。 diff --git a/model/train/yoco_moe/docs/features/multi_parameter_pipeline.md b/model/train/yoco_moe/docs/features/multi_parameter_pipeline.md new file mode 100644 index 000000000..268284d39 --- /dev/null +++ b/model/train/yoco_moe/docs/features/multi_parameter_pipeline.md @@ -0,0 +1,47 @@ +# PP支持多参数传递 + +## 背景与挑战 + +在深度学习的大模型分布式训练中,流水线并行(Pipeline Parallelism, PP)是一种将模型分割为多个阶段并在不同设备上并发执行以提高效率的技术。然而,在多模态场景下引入多参数传递支持时,PP面临特定的挑战: + +- **通信部分的设计**:传统PP通常只涉及单一张量的传输,但在多参数传递的情况下,需要处理多个变量的传递。这不仅增加了通信复杂度,还要求对每个变量的shape、dtype等属性进行精确管理,这些属性往往与整体模型架构紧密相关,具有高度定制性。 +- **前向传播的变量传递**:在前向计算过程中,不仅要根据定义的shape正确传递多个变量,还要确保每个阶段接收到的数据格式符合预期,这对数据流的设计提出了更高的要求。 +- **反向传播的运算扩展**:对于反向传播,除了对首个输出进行梯度计算外,还需对其他所有输出进行相应的运算,确保整个训练过程的完整性和准确性。 + +## 解决方案 + +针对上述挑战,我们设计了以下解决方案,旨在使PP能够有效支持多参数传递: + +- **优化的通信机制**:开发了一套高效的通信机制,支持多种类型和格式的数据传输。针对每个阶段的具体需求定制化配置传输参数。 +- **增强的梯度计算逻辑**:改进了反向传播算法,使得系统可以自动识别并处理来自多个输出的梯度信息,保证每个输出都能参与到最终的权重更新中。 + +## 使用场景 + +本特性特别适用于以下场景: +- 需要处理大量多模态数据(如文本、图像、音频)的大型神经网络训练任务,并且流水线并行各个阶段传递多参数。 + +## 使用方法 + +**注意事项**: +- 用户需代码配置`args.pipeline_tensor_shapes`明确指定各阶段间传递的具体参数及其属性(如shape、dtype)。 +- args.pipeline_tensor_shapes配置参考`tests_extend/system_tests/multi_modal/multi_parameter_pipeline/pretrain_multi_parameter_pipeline_test.py` + + +**设置训练脚本参数** +- 支持PP场景 +```shell +# PP >= 2 +--pipeline-model-parallel-size ${PP} \ +--use-multiparameter-pipeline-model-parallel \ +``` +- 支持VPP场景 +```shell +# PP >= 2, num-layers-per-virtual-pipeline-stage不为None +--pipeline-model-parallel-size ${PP} \ +--num-layers-per-virtual-pipeline-stage 1 \ +--use-multiparameter-pipeline-model-parallel \ +``` + +## 使用效果 + +采用PP支持多参数传递后,用户可以在保持高通信效率的同时,更灵活地处理复杂的多模态数据。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/multi_parameter_pipeline_and_variable_seq_lengths.md b/model/train/yoco_moe/docs/features/multi_parameter_pipeline_and_variable_seq_lengths.md new file mode 100644 index 000000000..9722eb479 --- /dev/null +++ b/model/train/yoco_moe/docs/features/multi_parameter_pipeline_and_variable_seq_lengths.md @@ -0,0 +1,49 @@ +# PP支持多参数传递和动态形状 + +## 背景与挑战 + +在深度学习的大规模分布式训练中,流水线并行(Pipeline Parallelism, PP)通过将模型分割为多个阶段并在不同设备上并发执行来提高效率。然而,在处理复杂的多模态数据时,PP面临了新的挑战: + +- **对于多参数传递**:传统PP通常只涉及单一张量的传输,但在多参数传递的情况下,需要处理多个变量的传递,这不仅增加了通信复杂度,还要求对每个变量的shape、dtype等属性进行精确管理 。 +- **对于动态形状**:当输入数据的序列长度不固定时,传统的方法是将所有序列调整到统一长度,这导致了内存和计算资源的浪费 。 + +## 解决方案 + +为了应对这些挑战,开发了一系列优化措施: + +- **多参数传递**:开发了一套高效的通信机制,支持多种类型和格式的数据传输,并改进了反向传播算法,使得系统可以自动识别并处理来自多个输出的梯度信息 。 +- **动态形状**:引入对动态形状的支持,允许每个微批次中的序列保持其原始长度。这样可以通过在发送张量之前,提前通信张量的形状信息,在各个流水线阶段之间同步即将接收的数据形状,确保内存分配和预处理的准确性 。 + +## 使用场景 + +- **多参数传递**:适用于需要处理大量多模态数据的任务,如文本、图像和音频等大型神经网络训练任务,其中流水线并行的各个阶段都需要传递多参数 。 +- **动态形状**:非常适合于处理文本长度差异很大的任务,比如文档分类和机器翻译,同时也增强了模型的泛化能力 。 + +## 使用方法 + +**注意事项**: +- 用户需代码配置`args.pipeline_tensor_shapes`明确指定各阶段间传递的具体参数及其属性(如shape、dtype)。 +- args.pipeline_tensor_shapes配置参考`tests_extend/system_tests/multi_modal/multi_parameter_pipeline/pretrain_multi_parameter_pipeline_test.py` + + +**设置训练脚本参数** +- # 支持PP场景 +```shell +# 配置举例 +# PP >= 2 +--pipeline-model-parallel-size ${PP} \ +--use-multiparameter-pipeline-model-parallel \ +--variable-seq-lengths \ +``` +- # 支持VPP场景 +```shell +# 配置举例 +# PP >= 2, num-layers-per-virtual-pipeline-stage不为None +--pipeline-model-parallel-size ${PP} \ +--num-layers-per-virtual-pipeline-stage 1 \ +--use-multiparameter-pipeline-model-parallel \ +--variable-seq-lengths \ +``` + +## 使用效果 +同时支持在流水线并行中各阶段间传递多个参数和处理变长输入数据。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/nanopipe-pipeline-parallel.md b/model/train/yoco_moe/docs/features/nanopipe-pipeline-parallel.md new file mode 100644 index 000000000..fcc4a0989 --- /dev/null +++ b/model/train/yoco_moe/docs/features/nanopipe-pipeline-parallel.md @@ -0,0 +1,74 @@ +# nanopipe流水线并行 + +## 问题分析 + +流水线并行是AI大模型大规模分布式训练的关键组成部分之一,但其效率收到流水线中bubble的影响,为了提高吞吐量,需要降低其bubble比例。 + +## 解决方案 + +在大模型流水线调度中,反向的input梯度和weight梯度通常是一起调度计算的,然而,通过分析他们计算的依赖关系,可以发现其实只有input梯度的计算存在相互层间的依赖关系。因此,通过独立调度反向的input梯度和weight梯度的计算,我们可以减少流水线调度的bubble。 + +反向input梯度和weight梯度一起调度的Interleaved 1F1B如下图所示: + +![img](../../sources/images/virtual-pipeline.PNG) + +独立调度input梯度和weight梯度的nano-pipe如下图所示: + +![img](../../sources/images/nanopipe.png) + +独立调度weight计算展示图如下图所示: + +![img](../../sources/images/FBW.png) + +### 解决思路: + +* 分离weight梯度计算流程,通过修改RowParallelLinear和ColumnParallelLinear的backward实现,将对weight的梯度计算进行剥离,先存储在调度器的dw计算队列中。 +* 在需要对dw计算时,从调度器的dw计算队列中pop出一个计算,然后计算对应的梯度。 + +## 使用场景 + +在训练模型时,降低bubble的比例,从而提升计算效率,达到更好的流水线并行。此特性暂只适配`--use-legacy-models`。 + +## 使用方法 + +nanopipe依赖于vpp,设置`--num-layers-per-virtual-pipeline-stage N`。要求`--pipeline-model-parallel-size` > 2 +设置`--use-nanopipe`,默认为False,根据用户需求配置。 + +## 使用效果 + +提升计算效率,减少bubble占比。如下表所示: + +| device | TP | SP | PP | SEQ | hidden-size | Nano vs vpp收益 | +| :-----: | :----: | :----: | :-----:| :----: | :----: | :-----: | +| 单机 | 1 | 关 | 4 | 4096 | 4096 | 3.24% | +| 双机 | 4 | 开 | 4 | 8192 | 8192 | 1.02% | + +# nanoswap + +## 问题分析 + +使用nano时grad从前向到反向需要持续存储在npu上,生命周期过长,多次累加会增大npu内存的峰值。 + +## 解决方案 + +将过多的张量做offload动作存储到cpu上,在内存峰值过后再将其张量reload回npu上。 + +### 解决思路 + +在前向时将上一轮过多的张量offload到cpu,再在连续的反向运算中途reload回npu上,通过swap流控制不会让reload和offload出现顺序错误。 + +完整nanopipe-swap原理图如下图所示: + +![img](../../sources/images/nanopipe_v2.png) + +## 使用方法 + +基于nanopipe的基础上再开启`--use-nanopipe-swap`。 + +## 使用效果 + +优化设备内存峰值,如下表所示: + +| device | TP | SP | PP | SEQ | hidden-size | mc2 | Nano内存峰值 |swap内存峰值 | Nano vs swap内存峰值下降 | +| :-----: | :----: | :----: | :-----:| :----: | :----: | :-----: | :-----: | :-----: | :-----: | +| 单机 | 2 | 开 | 4 | 1024 | 4096 | 开 | 5520.62 | 5177.72 | 6.21% | \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/nd-matmul.md b/model/train/yoco_moe/docs/features/nd-matmul.md new file mode 100644 index 000000000..ca390c7fa --- /dev/null +++ b/model/train/yoco_moe/docs/features/nd-matmul.md @@ -0,0 +1,27 @@ +## ND_MatMul + +## 问题分析 + +传统的1d张量并行中,输入数据仅在张量并行组内简单复制,造成单卡静态内存较大;同时,attention和ffn的partial输出结果都需要做一次all_reduce,这一部分通信开销较大。 + +megatron_tp + +## 解决方案 + +针对attention和ffn中的矩阵乘,将矩阵乘的参数同时进行行和列切分,即mp=mp_row*mp_col,同时在一个张量并行组内将输入x列切mp份,每张卡只保留输入数据的1/mp,通过插入更小规模的all_gather和reduce_scatter通信算子保证计算的准确性。算法原理图如下: + +![nd_matmul](../../sources/images/nd_matmul.png) + +## 使用方法 + +设置`--use-nd-matmul`,打开ND_MatMul特性的总开关。 + +设置`--nd1-dim1-size`,默认为1,需要确保`--nd1-dim1-size`能够被`--tensor-model-parallel-size`整除。 + +设置`--nd2-dim1-size`,默认为1,需要确保`--nd2-dim2-size`能够被`--tensor-model-parallel-size`整除。 + +示例:`--tensor-model-parallel-size`为32,`--nd1-dim1-size`可以设置为2、4、8、16,`--nd2-dim1-size`可以设置为2、4、8、16,出于性能考虑(建议`--nd1-dim1-size`或者`--nd2-dim1-size`大于等于8),可配置`--nd1-dim1-size`为8、`--nd2-dim1-size`为4。 + +## 使用效果 + +降低单卡显存占用效果明显,在`--nd1-dim1-size`或者`--nd2-dim2-size`较大(>8)时,相比megatron TP性能提升。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/noop-layers.md b/model/train/yoco_moe/docs/features/noop-layers.md new file mode 100644 index 000000000..ca8af14b3 --- /dev/null +++ b/model/train/yoco_moe/docs/features/noop-layers.md @@ -0,0 +1,30 @@ +## Ascend 自定义空操作层 +## 问题分析 + +在神经网络训练过程中,初始层的嵌入(Embedding)操作及终端层的对数几率(Logits)计算通常属于计算密集型任务,这可能对整个网络的运行效率产生显著影响。具体而言: + +- **嵌入层(Embedding Layer)**:在处理文本或分类数据时,嵌入层将高维稀疏特征转换为低维稠密向量表示。此过程涉及索引查找和潜在的大规模矩阵乘法,特别是在自然语言处理应用中,词汇表可能包含数十万乃至数百万词条。高维度的查找与转换操作会消耗大量计算资源。 + +- **对数几率层(Logits Layer)**:位于网络末端的对数几率层通常是一个全连接层,其功能是将最后一层的隐藏状态映射到输出空间,为后续损失函数计算提供未归一化的预测值。如果分类任务具有大量类别,那么该层的权重矩阵将非常庞大,导致矩阵乘法运算成为性能瓶颈。 + +上述操作的计算复杂度随着输入特征数量和类别数量的增加而上升,可能导致训练速度降低,并且在计算资源有限的环境中形成性能瓶颈。 + +## 解决方案 + +为应对上述挑战,我们引入了“自定义空操作层”功能,允许用户通过指定特定层为“空操作层”(No-Op Layers)来动态调整模型在训练流水线中的计算负载。此机制有助于在多个计算节点间更均匀地分配工作负载,从而优化整体计算资源的利用。 + +## 使用场景 + +当用户遇到由于计算资源分配不均导致的性能瓶颈时,此功能尤为适用。通过对计算密集型任务进行重新分配,可以有效减少流水线中的空闲时间(即空泡),从而提高系统的吞吐量和效率。 + +## 使用方法 + +要启用此功能,用户需通过命令行参数设置目标层为无操作层。例如,原模型共126层,该模型参数为 `--num-layers 126`,即执行实际计算的层有126层。若在该模型首和尾各自添加1层空层,则该模型参数应设置为 `--num-layers 128 --noop-layers 0,127` 表示总共128层,首尾层(即第0层和第127层,层数从0开始计数)为不执行实际计算的空操作层,中间的126层为执行实际计算的层。 + +## 使用效果 + +通过实施自定义增加无操作层的策略,预期能够显著减少流水线中的空泡现象,从而优化计算流程并提升系统性能。这不仅有助于加速模型训练过程,还能最大化硬件资源的利用率。 + +## 注意事项 + +使用“Ascend 自定义空操作层”特性增加空层后总层数发生变化,需要根据包含空层的总层数重新调整流水线(虚拟流水线)的配置。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/norm-recompute.md b/model/train/yoco_moe/docs/features/norm-recompute.md new file mode 100644 index 000000000..8d80faebd --- /dev/null +++ b/model/train/yoco_moe/docs/features/norm-recompute.md @@ -0,0 +1,33 @@ +# Norm重计算 + +## 问题分析 + +大模型训练过程中,往往会面临的显存不足的问题。 + +## 解决方案 + +类似于激活函数重计算,本特性支持了Norm层的重计算。 + +## 解决思路 + +运用激活函数重计算特性中的 `checkpoint` 机制,对norm层进行重计算处理,具体细节如下文所示: +[原文链接](https://www.usenix.org/conference/atc24/presentation/yuan) + +## 使用场景 + +主要用于训练场景,用户内存不足或要进一步节省内存时。 + +## 使用方法 + +脚本中添加:`--recompute-norm` 可开启Norm重计算。此特性仅支持mcore分支。 + +添加:`--recompute-norm-num-layers ${num}` 可指定Norm重计算的层数。 + +Norm重计算兼容激活函数重计算、全重计算同时开启: + +1.同时开启时,仅支持 `--recompute-method` 为 `block`。 + +2.同时开启时,会按照指定的全重计算和Norm重计算的层数做各自类型的重计算,即不会有一层既做全重计算又做Norm重计算。 + +(注意:执行优先级是先计算全重计算层,后Norm重计算层。) + diff --git a/model/train/yoco_moe/docs/features/npu_deterministic.md b/model/train/yoco_moe/docs/features/npu_deterministic.md new file mode 100644 index 000000000..740f95df6 --- /dev/null +++ b/model/train/yoco_moe/docs/features/npu_deterministic.md @@ -0,0 +1,21 @@ +## Ascend 确定性计算 +## 问题分析 + +在训练过程中,各类随机因素会使得每次实验的训练过程并不完全一致,从而影响其LOSS曲线、性能曲线等无法完全重合。 +然而,在重复实验与对比实验中有时需要确定性的计算结果,保证结果的可复现性。 + +## 解决方案 + +为满足上述需求,引入了“确定性计算”功能,允许用户通过昇腾(Ascend)芯片确保多次训练结果的一致性,从而帮助性能调优、对照实验等工作。 + +## 使用场景 + +需要进行性能对比、特定场景复现时。 + +## 使用方法 + +要启用此功能,在脚本中加入`--npu-deterministic`即可。 + +## 使用效果 + +通过确定性计算功能,可保证同参数下多次实验具有相同的实验结果。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/npu_matmul_add.md b/model/train/yoco_moe/docs/features/npu_matmul_add.md new file mode 100644 index 000000000..ebd3a28ca --- /dev/null +++ b/model/train/yoco_moe/docs/features/npu_matmul_add.md @@ -0,0 +1,29 @@ +# matmul_add融合优化 + +## 问题分析 +模型训练中开启了梯度累加功能,但累加效率较慢,梯度累加中的 Add 算子占比较高。 + +## 解决方法 +MindSpeed将matmul操作和add操作合并成一个融合算子。算子接口见[link](../ops/npu_matmul_add.md)。 + +## 使用场景 +llama、gpt大模型均使用。 + +## 使用方法 +先安装CANN-NNAL并初始化添加环境,例如: +CANN-NNAL默认安装路径 +source /usr/local/Ascend/nnal/atb/set_env.sh + +去掉`--no-gradient-accumulation-fusion`即可调用npu_matmul_add_fp32融合算子。 + +## 使用效果 +在显存未打满情况下,开启融合算子,llama2_70B_4k_tp2_pp2_vpp1_dp2性能可提升1.5%。 + +## 使用限制 +1.npu_matmul_add_fp32暂不支持mfu统计 + +2.融合算子与小算子之间存在精度差异,精度差异的原因是: +小算子dtype变化过程:`bf16*bf16=fp32->bf16->fp32+fp32=fp32` +融合算子dtype变化过程:`bf16*bf16=fp32+fp32=fp32` +差异点在于融合算子做了升精度的操作,故导致精度与小算子存在差异 + diff --git a/model/train/yoco_moe/docs/features/ops_flops_cal.md b/model/train/yoco_moe/docs/features/ops_flops_cal.md new file mode 100644 index 000000000..c06328b54 --- /dev/null +++ b/model/train/yoco_moe/docs/features/ops_flops_cal.md @@ -0,0 +1,31 @@ +# TFLOPS计算 + +## 问题分析 + +当前大模型在计算MFU时,依靠框架理论打印值TFLOPS/有效算力得到,但是理论值计算适用于一般模型,如果针对模型结构进行变动,将不再适用,同时HFU的计算目前需要手动计算。 + +## 解决方案 + +提供接口可以统计所有涉及MatMul计算的算子的浮点计算次数,同时能统计到模型正反向训练以及重计算的总浮点计算次数。 + +### 解决思路: + +目前支持的涉及MatMul的算子有MatMul、BatchMatMul、FlashAttention、MC2相关融合算子、coc相关融合算子、GEMM相关融合算子、matmul_add_fp32融合算子。 + +## 使用方法 + +对于开启此功能,设置`--op-cal-tflops`即可调用。 + +## 使用效果 + +通过打印值`actual throughput per NPU (TFLOP/s/NPU)`和`actual throughput per NPU with recompute (TFLOP/s/NPU)`可以方便计算MFU +和HFU。 + +## 注意事项 + +(1)由于此功能统计的是各卡的TFOPS信息,在CP/EP/PP场景下,各卡计算量不同,因此在最后需要汇总各卡的信息进行平均,额外增加一个all_reduce通信。 + +(2)使用此功能由于会增加一个额外通信以及计算各算子的浮点计算次数,可能影响性能。 + +(3)由于在Ring Attention长序列并行方案中,在causal场景下,由于算法优化缘故,会有部分计算减少,因此会导致理论值和实际统计值不符合的现象,理论计算上FA +的计算减少值为`(CP-1)/2CP`。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/pipeline-experts.md b/model/train/yoco_moe/docs/features/pipeline-experts.md new file mode 100644 index 000000000..5d9883623 --- /dev/null +++ b/model/train/yoco_moe/docs/features/pipeline-experts.md @@ -0,0 +1,80 @@ +# MLP通信隐藏 + +## 问题分析 + +大模型训练过程中,通信和计算往往存在依赖关系,这样的串行执行顺序会造成计算和通信流存在一定程度的空闲等待时间,导致执行效率较低。 + +## 解决方案 + +对通信和计算算子做更为细粒度的切分,保证细粒度间的计算和通信任务不存在依赖关系,是创造可并行执行任务的前提。 + +再对算子调度/执行顺序进行编排,实现计算和通信的并行执行,在计算过程能掩盖中间部分的通信过程。 + +![原理图](../../sources/images/pipeline_experts.png) + +### a. MLP通信隐藏:`--use-pipe-experts` +开启后,将对每个experts进行细粒度切分,对前向和反向的执行顺序进行编排,实现通信和计算之间的掩盖,提高效率。 + +### b. 多流水线:`--pipe-experts-multi-stream` +需要在打开`--use-pipe-experts`的基础上开启使用。开启后,能够保证ep的alltoall通信和tp的allgather/reduce-scatter之间串行执行,避免集合通信出现链路冲突。 + +### c. 多副本:`--pipe-experts-multi-data N` +需要在打开`--use-pipe-experts`的基础上开启使用,`N`表示使用N份副本。开启后,能将输入数据切分为多个副本,将不同副本间的计算和通信类比为多个experts的计算和通信。 + +## 使用场景 + +在 local_experts 大于等于 2 时,可以考虑使用专家间的计算通信流水实现通信隐藏的目的。 + +在 local_experts 等于 1 时,即 ep = num_expert 时,可以考虑使用多副本间的计算通信流水实现通信隐藏的目的。 + +可开启多流水线`--pipe-experts-multi-stream`规避集合通信上出现的链路冲突。 + +## 使用方法 + +需要在保证开启了`--moe-model-type deepspeed_moe`的前提下,开启`--use-pipe-experts`才会生效。 +进一步,可以在`--use-pipe-experts`的前提下,单独或同时设置`--pipe-experts-multi-stream`和`--pipe-experts-multi-data N`来叠加使用“多流水线”和“多副本”的特性。 + +## 使用效果 + +使用该特性可以提升性能。 + +8机, world_size = 64, sequence_len = 128k, num_layers = 4, recompute_granularity = full, hidden_size = 12288, moe_router_topk = 2, ep = 4, tp = 8, dp = 2, cp = 4, pp = 1, sp = True + +场景1:num_experts = 4 (num_local_experts = 1) + +| pipe-experts | multi-stream | multi-data | 平均TFLOPs | 提升幅度 | +|:------------:|:------------:|:---------------:|:--------:|:------:| +| 关 | 关 | 关 = 1 (Default) | 104.88 | / | +| 开 | 关 | 开 = 2 | 108.01 | 2.99% | +| 开 | 关 | 开 = 4 | 110.96 | 5.80% | +| 开 | 开 | 开 = 2 | 110.21 | 5.08% | +| 开 | 开 | 开 = 4 | 111.43 | 6.25%★ | + +场景2:num_experts = 16 (num_local_experts = 4) + +| pipe-experts | multi-stream | multi-data | 平均TFLOPs | 提升幅度 | +|:------------:|:------------:|:---------------:|:--------:|:------:| +| 关 | 关 | 关 = 1 (Default) | 103.15 | / | +| 开 | 关 | 关 = 1 (Default) | 109.27 | 5.93% | +| 开 | 关 | 开 = 2 | 109.20 | 5.86% | +| 开 | 开 | 关 = 1 (Default) | 109.49 | 6.14%★ | +| 开 | 开 | 开 = 2 | 108.32 | 5.01% | + +场景3:num_experts = 8 (num_local_experts = 2) + +| pipe-experts | multi-stream | multi-data | 平均TFLOPs | 提升幅度 | +|:------------:|:------------:|:---------------:|:--------:|:-------:| +| 关 | 关 | 关 = 1 (Default) | 103.98 | / | +| 开 | 开 | 关 = 1 (Default) | 109.32 | 5.13%★ | +| 开 | 开 | 开 = 2 | 108.38 | 4.23% | + +## 注意事项 +1、在开启`--pipe-experts-multi-data N`时,若`N`过大,导致输入数据切分过细,会引入多余的 cast 和 add 算子,导致额外的开销,引起性能恶化。 +2、目前 8 机推荐在 num_local_experts = 1 时开启`--pipe-experts-multi-data 4`来获得最佳性能,在 num_local_experts > 1 +时,不推荐开启`--pipe-experts-multi-data N`。 +3、单机,当 num_local_experts 为 1 或 2 时,`N`推荐设置为 2,当 num_local_experts 为 4 及以上时,不推荐开启多副本。 +4、`--pipe-experts-multi-data N`特性主要被用来提供 num_local_experts 为 1 时无法进行 experts 间的细粒度切分的替代方案。 +5、虽然兼容 num_local_experts > 1 的场景,开启后可以进一步提高计算通信掩盖比例,但会新引入 cast 和 add +算子操作,当掩盖的收益不足以抵消新引入算子的拖慢时,就会导致性能恶化。 +6、在未开启SP`--sequence-parallel`时,无法开启多流水线`--pipe-experts-multi-stream`。 +7、未适配MoE token dropless特性。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/pipeline-parallel.md b/model/train/yoco_moe/docs/features/pipeline-parallel.md new file mode 100644 index 000000000..f46f7164d --- /dev/null +++ b/model/train/yoco_moe/docs/features/pipeline-parallel.md @@ -0,0 +1,30 @@ +# 流水线并行 + +## 问题分析 + +在大模型时代,单一设备无法存储整个模型。模型并行可以在训练过程中将模型加载到多个设备上。在朴素的模型并行中,设备需要等待前一阶段的计算结果,导致计算资源的严重利用率不足。同时,设备需要储存计算的中间结果,存储开销大。 + +## 解决方案 + +采用流水线的思想,减少不同机器之间等待的时间。同时尽可能地缩短前向计算与反向计算之间的距离,以减少内存消耗 + +### 解决思路: + +* 将整个网络分阶段(stage),不同阶段在不同的设备上,前后阶段流水分批工作,通过一种“接力”的方式并行。 +* 开始训练时,会先进行预热。预热完成后,每进行一个前向运算,就安排一个后向运算。最后进行冷却,完成剩余阶段。如下图所示 + +![alt text](../../sources/images/pipedream1F1B.png) + +[原文链接](https://arxiv.org/pdf/1806.03377) +## 使用场景 + +在训练模型时,为了降低单个设备的存储开销,提升计算效率,将模型加载到多卡来进行流水线并行。 + +## 使用方法 + +设置`--pipeline_model_parallel_size`,默认为1,根据用户需求配置。 + +## 使用效果 + +提升计算效率,减少内存消耗 + diff --git a/model/train/yoco_moe/docs/features/recomputation.md b/model/train/yoco_moe/docs/features/recomputation.md new file mode 100644 index 000000000..eb0b5c282 --- /dev/null +++ b/model/train/yoco_moe/docs/features/recomputation.md @@ -0,0 +1,32 @@ +# Megatron 重计算 +## 问题分析 + +大模型训练过程中,通常要求保留前向计算的激活值用于后续的反向梯度计算,并且需要保存结果的数量会随着模型层数的增加线性增加,大大增加芯片的内存压力。 + +## 解决思路 + +在前向过程和loss计算时直接删除激活值,反向梯度计算需要用时再重新计算一遍激活值,从而有效缩短激活值的生命周期,缓解内存压力。 + +## 使用场景 +主要用于训练场景,重计算分为:选择性重计算和完全重计算。 + +选择性重计算(推荐使用):只重计算transformer中的core_attention部分,将占用较少内存存储空间且重计算开销较高的激活保留在内存中,并将占用较多内存存储空间但重新计算开销相对较低的激活重新计算。 + +完全重计算:对于内存非常有限场景,仅将输入保存,重新计算所有激活值。 + +## 使用方法 + +选择性重计算:脚本中添加`--recompute-activations`开启选择性重计算。 + +完全重计算:脚本中配置`--recompute-granularity full`开启完全重计算,开启完全重计算时使用`--recompute-method uniform/block` 确认具体重计算方式。 + +`--recompute-method uniform`:将Transformer层均匀划分组(每组大小`--recompute-num-layers`),按组存储输入和激活值。 + +`--recompute-method block`:将前`--recompute-num-layers`个transformer层重计算,剩余层不进行重计算。 + +同时配置`--recompute-activations` 、`--recompute-granularity full`生效选择性重计算。 + +当脚本配置了`--recompute-method block`、`--recompute-granularity full`、`--num-layers-per-virtual-pipeline-stage N`参数时,用户可以通过`--recompute-num-layers N`参数来配置每个vpp stage做多少层重计算,参数`--enable-recompute-layers-per-pp-rank`可用于修改此情况下`--recompute-num-layers N`参数的语义,新的语义表示无视vpp,按每个pp stage来配置重计算层数。 + +## 使用影响 +显存开销降低、性能降低。 diff --git a/model/train/yoco_moe/docs/features/recompute_independent_pipelining.md b/model/train/yoco_moe/docs/features/recompute_independent_pipelining.md new file mode 100644 index 000000000..0f2511a60 --- /dev/null +++ b/model/train/yoco_moe/docs/features/recompute_independent_pipelining.md @@ -0,0 +1,33 @@ +# 重计算流水线独立调度 +## 问题分析 + +在目前的流水线调度中,重计算由反向计算触发,与反向计算绑定在一起调度,意味着重计算需要等待下一个stage返回梯度才可以开始计算。然而重计算并不需要用到反向计算的梯度,这导致bubble的增多和性能的下降。 + +## 解决方案 + +为了将重计算和反向计算独立调度,需要将重计算的调度修改为由调度器主动触发,并修改调度器,将重计算作为一个调度单元加入到调度器中,这使我们获得了自由地插入或去除部分重计算的能力,进而可以在内存和性能方面做出优化。 + +### 解决思路 +通过torch的saved_tensors_hooks实现一种新的重计算方法,在反向计算前合适的时机主动触发或者直接去除部分重计算,从而实现对内存或性能的优化。 + +## 使用场景 + +在pipelining_with_interleaving调度中,若用户未开启重计算,则可以利用bubble主动插入重计算,以极小的性能代价换取内存峰值的降低,将需要保留激活值的前向计算块的个数减少到pp * vp。 +

+ +在pipelining_with_interleaving调度中,若用户已开启重计算,则可以通过解除重计算与后一个stage的反向计算间的依赖关系从而提前重计算,以及去除模型最后一层的重计算,实现计算性能的提升。 +

+ +## 使用方法 + +脚本中添加: --recompute-in-bubble 可开启利用bubble进行重计算功能,实现内存节省。 +使用条件:必须开启虚拟流水并行特性,使用此功能前不能开启重计算,recompute_num_layers参数需为None或0。 + +脚本中添加: --recompute-in-advance 可开启提前重计算以及去除不必要重计算功能,实现训练性能提升。 +使用条件:必须开启虚拟流水并行特性,使用此功能前需要开启重计算,且不支持recompute_method为uniform,recompute_num_layers不能为None或0。 + +#### 注意: + +1. --recompute-in-bubble 特性暂不兼容完全重计算uniform、完全重计算block、选择重计算、自适应选择重计算特性,不兼容moe场景下的--moe-adaptive-recompute-activation、--moe-layer-recompute特性。 +2. --recompute-in-advance 特性暂不兼容完全重计算uniform、选择重计算、自适应选择重计算特性。 +3. --recompute-in-bubble和--recompute-in-advance两者不可同时开启。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/reuse-fp32-param.md b/model/train/yoco_moe/docs/features/reuse-fp32-param.md new file mode 100644 index 000000000..c5ae38fb9 --- /dev/null +++ b/model/train/yoco_moe/docs/features/reuse-fp32-param.md @@ -0,0 +1,46 @@ +# 参数副本复用 + +## 问题分析 + +当前所有大模型训练客户场景都采用混精训练,导致在训练过程中持续保存了计算权重与状态权重。实际上,两者所使用的生命周期并没有重叠,故可通过数值变换的方式**等价**去除冗余。 + + +## 解决方案 + +基于大模型混合精度训练中BF16计算参数(负责前后向计算)及FP32参数副本(负责参数更新)不需同时存在的特点,和两者在数值上的对应关系,设计内存共用算法。 + + + +### 解决思路 + +具体算法步骤如下: +1. FP32 = BF16 + Residual; +2. 前向计算开始前将FP32转换为BF16并保存Residual; +3. 优化器更新前基于BF16和Residual恢复FP32参数并进行更新; +4. 使用int32加减法来等价模拟原始逻辑中FP32<->BF16的相互转换(IEEE745向偶数舍入)。 + + + +参数副本复用流程如下图所示: +

+ +数值变化的详细逻辑如下图所示: +

+ +## 使用场景 + +1. 该特性主要用于使用BF16的训练场景。 + +## 使用方法 + +设置`--reuse-fp32-param`,即可调用该算法。 + +## 使用效果 + +1. 对于Float16OptimizerWithFloat16Params,整体能够节省`sizeof(bfloat16)*模型参数量`的静态内存,性能劣化在多个模型上测试小于1%。 +2. 对于开启分布式优化器的训练,整体能够节省`sizeof(bfloat16)*模型参数量 / DP`的静态内存,性能劣化在多个模型上测试小于1%。 + +## 注意事项 + +1. 使用legacy model训练时,`reuse_fp32_param`暂不支持和`--overlap-param-gather`一起使用。 +2. 使用fused_ema_adamw优化器时,不支持同时开启`reuse_fp32_param`。 diff --git a/model/train/yoco_moe/docs/features/ring-attention-context-parallel.md b/model/train/yoco_moe/docs/features/ring-attention-context-parallel.md new file mode 100644 index 000000000..becbb1eb4 --- /dev/null +++ b/model/train/yoco_moe/docs/features/ring-attention-context-parallel.md @@ -0,0 +1,47 @@ +# Ring Attention长序列并行 + +## 问题分析 + +从生成性AI到科研模型,长序列训练正在变得非常重要。 在生成性AI领域,会话式AI、长文档摘要和视频生成等任务都需要在空间和时间层面对长上下文进行推理。 同样,章节和书籍级别的摘要(数万甚至数十万字)在会话式AI和摘要任务中也非常重要。现有的数据、张量和流水线等并行方法无法在序列维度进行切分。当序列维度(S)增长时,训练内存开销会以 $O$($S^2$) 的速度增长。因此需要针对长序列场景进行特定的优化解决长训练场景的训练需求。 + +## 解决方案 + +支持Ring Attention长序列并行方案,以此解决序列维度扩展问题。具体细节参见原文: +> Ring Attention with Blockwise Transformers for Near-Infinite Context (https://arxiv.org/pdf/2310.01889) + +### 解决思路: + +Ring Attention借鉴了分块Softmax原理,在不需要获取整个序列的完整矩阵情况下进行分块attention计算。因此作者提出以分块方式执行自注意力和前馈网络计算,跨多个设备分布序列维度。具体地,该方法在进程之间构建注意力计算块的环状通信结构(Ring),每个进程具有一个切分后的本地QKV块。在计算完本地的attention后,通过向后发送和向前获取KV块,遍历进程设备环,以逐块的方式进行注意力和前馈网络计算。同时,本地的attention计算和KV块的通信理想情况下可以互相掩盖,从而消除了额外引入的通信开销。另外该方案在计算attention的过程中全程不需要数据拼接,支持的序列长度理论上可以无限拓展。 + +## 使用场景 + +当使用GPT类模型进行训练,同时数据进MoE层时实际序列长度8K以上。 + +不同于Ulysses方案,该方案不需要确保head_size被cp_size整除。 + +可兼容FlashAttention,目前已默认开启FlashAttention。 + +如果想要使得计算和通信可以互相掩盖,理论上需要确保每个计算块分到的序列长度$c \geq F/B$。其中F是每个device的FLOPS,B是每个device间的带宽。具体推导过程参见原文。在实践中,需要确保每个计算块分到的序列长度足够大,才能较好掩盖。 + + +## 使用方法 + +| 重要参数 | 参数说明 | +|-------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------| +| --context-parallel-size [int] | 开启CP对应的数量,默认为1,根据用户需求配置。 | +| --seq-length [int] | 输入序列的长度。 | +| --use-cp-send-recv-overlap | 建议开启,开启后支持send receive overlap功能。 | +| --attention-mask-type [general/causal] | 可选,设置Mask计算类型,默认是causal(倒三角)Mask计算,设置general代表全量计算。 | +| --context-parallel-algo megatron_cp_algo | 长序列并行算法选项,默认项为`ulysses_cp_algo`,当设置为`megatron_cp_algo`时开启Ring Attention。 | +| --megatron-cp-in-bnsd | 开启后,FA使用BNSD计算。 | + +## 使用效果 + +利用多个计算设备对输入序列进行并行切分,降低单设备的内存消耗,相比不开启序列并行单步耗时增加,相比重计算计算效率提升。 + + +## 注意事项: + +1. 开启Context Parallel时需要同时开启Flash Attention特性,否则特性不支持。 +2. 在使用GPT类模型进行训练的场景下,建议attention-mask-type设置为causal。 +3. 在8k的序列长度情况下,由于计算的时间缩短,cp功能分割之后的send receive的时间反而会长于计算时间,造成性能的下降,所以建议配置seq-length / context-parallel-size> 8k以获取最佳效果。具体公式参考:S/(Talpha) >= 1/(Wbeta),其中,S=seq-length / context-parallel-size, T表示芯片的理论算力,alpha表示计算效率,W表示理论通信带宽,beta表示带宽利用率。 diff --git a/model/train/yoco_moe/docs/features/rms_norm.md b/model/train/yoco_moe/docs/features/rms_norm.md new file mode 100644 index 000000000..9612bdc35 --- /dev/null +++ b/model/train/yoco_moe/docs/features/rms_norm.md @@ -0,0 +1,15 @@ +# rms_norm融合优化 +## 问题分析 +rms_norm常见于LLaMA、LLaMA2、Baichuan等LLM模型中用于归一化,由于torch侧没有提供rms_norm算子的接口,因此在模型中通常是以自定义的形式出现,这种形式的执行效率相对较低。 + +## 解决方法 +MindSpeed对将rms_norm操作合并成一个算子,减少数据传输和临时存储。算子接口见[link](../ops/rms_norm.md)。 + +## 使用场景 +模型使用rms_norm作为归一化方式,脚本中设置了`--normalization RMSNorm`。 + +## 使用方法 +设置`--use-fused-rmsnorm`即可调用rms_norm融合算子。mcore分支下仅支持使能该融合算子。 + +## 使用效果 +开启融合算子可以节省内存,提升性能。 diff --git a/model/train/yoco_moe/docs/features/rotary-embedding.md b/model/train/yoco_moe/docs/features/rotary-embedding.md new file mode 100644 index 000000000..f3afd4d11 --- /dev/null +++ b/model/train/yoco_moe/docs/features/rotary-embedding.md @@ -0,0 +1,25 @@ +# Rotary Postion Embedding 融合优化 + +## 问题分析 + +Rotary Position Embedding(RoPE)是一种大模型文本位置信息编码(Position Embedding)的解决方案。RoPE通过绝对位置编码的形式实现了相对位置信息的注入,融合了绝对和相对位置编码的优点,同时具备较好的长度外推性。目前RoPE方案已经被较多的大模型采用,例如LLaMA和GLM。 + +然而,目前torch并没有针对RoPE做特定的实现和优化,在模型侧通常是通过自定义的方式实现,且Rotary Embedding的计算方式较为复杂,实现方式的计算和内存开销需要优化。 + +## 解决方案 +`torch_npu`侧将Rotary Embedding操作合并成一个算子,减少数据传输和临时储存,优化模型训练性能。MindSpeed调用`torch_npu`侧接口实现算子融合。 + +## 使用场景 + +模型侧使用了Rotary Embedding作为Position Embedding解决方案。 + +## 使用方法 + +首先确保`--position-embedding-type`选项设置为`rope`。 + +同时开启`--use-fused-rotary-pos-emb`选项,以启用融合算子。 + +## 使用效果 + +使用融合算子可以提升训练性能。 + diff --git a/model/train/yoco_moe/docs/features/sequence-parallel.md b/model/train/yoco_moe/docs/features/sequence-parallel.md new file mode 100644 index 000000000..84c7f294d --- /dev/null +++ b/model/train/yoco_moe/docs/features/sequence-parallel.md @@ -0,0 +1,33 @@ +# 序列并行 + +## 问题分析 + +张量模型并行可以降低显存占用,加快训练速度,但是它要求将模型各层划分为独立的、可管理的块,所以不适用于 LayerNorm 和 Dropout 等操作。虽然 LayerNorm 和 Dropout 等操作的计算成本很低,但它们确实需要大量冗余内存来存储激活。为了分摊张量并行中无法切分的显存和计算,引入了序列并行的方法。 + +## 解决方案 + +在张量模型并行的基础上,进一步对 LayerNorm 和 Dropout 模块的序列维度进行切分。 + +### 解决思路: + +将 LayerNorm 以及 Dropout 等操作的输入按序列维度进行了切分,使得各个设备上面只需要做一部分的 Dropout 和 LayerNorm 等操作即可。 + +为了方便理解,以下图为例:假设输入$X$的大小为$ s \times b \times h $,按照序列维度切分$X=[X_1^s,X_2^s]$,经过LayerNorm操作后的结果为$Y=[Y_1^s,Y_2^s]$,随后进行张量模型并行。 + +![image.png](../../sources/images/sequence-parallel.png) + +[原文链接](https://arxiv.org/pdf/2205.05198) + +## 使用场景 + +使用训练模型时,将模型加载到多卡,使用张量模型并行后显存依旧占用过高或超出了处理器显存限制,或者训练时间过长,可以开启序列并行来降低显存占用,加快训练速度。 + +## 使用方法 + +首先确保训练参数中加入`--tensor-model-parallel-size N`,设置张量模型并行。 + +同时添加`--sequence-parallel`,开启序列并行。 + +## 使用效果 + +利用多个设备,降低显存开销,加快训练速度。 diff --git a/model/train/yoco_moe/docs/features/shared-experts.md b/model/train/yoco_moe/docs/features/shared-experts.md new file mode 100644 index 000000000..91c510fab --- /dev/null +++ b/model/train/yoco_moe/docs/features/shared-experts.md @@ -0,0 +1,28 @@ +# 共享专家特性 + +## 方案介绍: + +随着混合专家模型MoE的演进,产生了路由专家和共享专家的概念。针对路由专家,输入数据会经过路由模块选择概率较高的专家进行计算;而对于共享专家,输入数据无需经过路由模块计算,所有数据都会经过共享专家计算。路由专家和共享专家的计算结果相加后作为MoE模块最终的计算结果。 + +通过将共享专家和路由专家结合,MOE模型能够在不同的输入情况下既关注到输入数据的共性也能关注到输入数据的差异性,从而提高模型的泛化能力。 + +共享专家如下图c所示(参考论文:https://arxiv.org/pdf/2401.06066 ): +![img](../../sources/images/shared-experts.png) + +## 使用场景 + +MoE场景下使用:`--moe-model-type megatron_moe` + +## 使用方法 + +共享专家相关命令和参数说明: + +| 命令参数 | 参数说明 | +|--------------------------|------------------------| +| `--n-shared-experts [int]` | 共享专家数量 | + +## 注意事项 + +1. 开启共享专家需要在mcore模式下,即没有设置`--use-legacy-models` + +2. 共享专家中间隐藏层大小的配置命令与路由专家相同:`--ffn-hidden-size [int]` diff --git a/model/train/yoco_moe/docs/features/smart_swap.md b/model/train/yoco_moe/docs/features/smart_swap.md new file mode 100644 index 000000000..7142eb6f7 --- /dev/null +++ b/model/train/yoco_moe/docs/features/smart_swap.md @@ -0,0 +1,39 @@ +# SmartSwap + +## 问题分析 + +在用户训练过程中产生的OOM问题,现有的内存方案主要为重计算和Swap两个方法。重计算会增加计算开销,而Swap路线需要用户自己编写和控制异步换入换出时机和内存管理,增加较多的使用成本。 + +## 解决方案 + +为了在最大限度地利用计算设备显存的同时,提高模型训练的性能,我们支持通过自适应迭代生成Swap策略,这一特性称为SmartSwap。 + +此功能通过数据采样,策略生成,策略执行等流程的循环迭代,选择有限次数验证下的最优策略。 +在迭代中分为3个阶段。 + +- WarmUp阶段,仅执行数据采样。采集Tensor生命周期信息供后续分析。此时OOM时会通过覆盖底层的内存异常,使得模型能够继续运行。 +- SearchPolicy阶段,执行数据采样和策略执行。 在策略生成中,包括候选内存过滤,内存策略生成,内存模拟排布等步骤。 +- Stable阶段,仅执行策略执行。在策略执行中,通过多流异步执行内存Swap,掩盖对计算流的耗时影响。 + +![smart_swap_flowchart](../../sources/images/smart_swap_flowchart.png) + +## 使用场景 + +1. OOM场景:当前训练配置下,出现OOM报错;可开启此功能,将OOM报错拦截,自动生成Swap策略,使训练在可用最大显存内运行。 +2. 非OOM场景:当前训练配置下,未出现OOM报错;可开启此功能,根据配置文件中的减少显存值,自动生成Swap策略,使训练在指定显存内运行。 +3. 重计算的替代场景:减少模型代码中的重计算生效范围,节省重计算过程。 + +## 使用方法 + +1. 在训练脚本中添加此功能的使能参数:`--smart-swap`。 +2. (可选)修改此功能的配置文件`mindspeed/core/memory/smart_swap/swap_policy_config.py`进行调试。 + +## 使用效果 + +1. 通过减少TP和PP数,获得性能收益;例如在llama2(8p,pp1,seqlen 8K,layer 32),将tp8改为tp1,性能收益25%; +2. 通过关闭或者部分关闭全重计算,获得性能收益;例如在llama2(8p,pp1,seqlen 16K,layer 40),将全重计算关闭,性能收益28%; + +## 注意事项 + +1. SmartSwap适配静态序列场景;暂未适配动态场景,例如MOE类场景。 +2. SmartSwap将占用Host内存,例如单机8卡,若每卡均换出`30 GB`到Host,则单机至少需要Host内存`8*30=240 GB`。 diff --git a/model/train/yoco_moe/docs/features/swap_attention.md b/model/train/yoco_moe/docs/features/swap_attention.md new file mode 100644 index 000000000..72003deb9 --- /dev/null +++ b/model/train/yoco_moe/docs/features/swap_attention.md @@ -0,0 +1,51 @@ +# swap-attention + +## 问题分析 + +大模型训练过程中,使用重计算功能可以大幅度减少内存,但会增加训练过程的计算时长,导致执行效率较低。 + +## 解决方案 + +新增swap-attention功能,利用设备内存和CPU内存来存放激活值,在梯度反传的同时从CPU内存预取激活值来减少重计算,充分利用H2D高带宽的优势以网补存、以网强算,提升MFU,加速大模型的训练。 + +![输入图片说明](../../sources/images/swap_attention.png) + +## 使用场景 + +### a. 优化性能: + +在需要开启全重计算的场景下,可以通过开启`--swap-attention`和`--recompute-num-layers [int]`替换全重计算,以达到提升性能的目的。 + +### b. 内存节省: + +对于不需要重计算的场景,只开启`--swap-attention`,可以在几乎不损耗性能的情况下,节省内存,以支持更大的模型的配置。 + + +## 使用方法 + +需要添加参数`--swap-attention`。使用前提是开启flash attention融合算子。 + +可选参数`--swap-modules`:参数类型为string,默认值为"input_norm,self_attention,post_attention_norm",可根据模型自行配置module,在mcore场景下默认仅预取self_attention module。 + +### a. 仅开启预取功能:`--swap-attention` + +开启后,将对每一层的attention层的激活值进行预取,提高计算效率。 + +![输入图片说明](../../sources/images/swap_attention1.png) + +### b. 开启预取功能并且指定重计算层数:`--swap-attention`和`--recompute-num-layers [int]` + +开启后,将对每一层的attention层的激活值进行预取,同时,对前[int]层的全连接层进行重计算。 + +![输入图片说明](../../sources/images/swap_attention2.png) + +## 使用效果 + +与完全重计算相比 ,有性能收益; +与不重计算相比,有内存收益; + +## 注意事项: + +1. `--recompute-num-layers [int]`中的[int]层数指的是每一个pp stage的层数。[int]的取值应该小于等于num-layers/pipeline-model-parallel-size. +2. 暂不兼容自适应选择重计算特性. +3. 若出现性能严重劣化,可能是跨NUMA内存访问引起,可尝试通过进程绑核缓解,实现方法可参考[mstt绑核工具](https://gitee.com/ascend/mstt/tree/master/profiler/affinity_cpu_bind) diff --git a/model/train/yoco_moe/docs/features/swiglu.md b/model/train/yoco_moe/docs/features/swiglu.md new file mode 100644 index 000000000..8653eb7d7 --- /dev/null +++ b/model/train/yoco_moe/docs/features/swiglu.md @@ -0,0 +1,17 @@ +# swiglu融合优化 + +## 问题分析 +swiglu常见于LLaMA、LLaMA2、Baichuan等大模型中的激活层,由于torch侧没有提供swiglu算子的接口,因此在模型中通常是以小算子的形式出现,这种形式的执行效率相对较低。 + +## 解决方法 +MindSpeed将swiglu操作合并成一个融合算子,减少数据传输和临时存储。算子接口见[link](../ops/swiglu.md)。 + +## 使用场景 +模型使用swiglu作为MLP层激活函数,脚本中设置了`--swiglu`。 + +## 使用方法 +设置`--use-fused-swiglu`即可调用swiglu融合算子。mcore分支下仅支持使能该融合算子。 + +## 使用效果 +开启融合算子可以节省内存,提升性能。 + diff --git a/model/train/yoco_moe/docs/features/tensor-parallel-2d.md b/model/train/yoco_moe/docs/features/tensor-parallel-2d.md new file mode 100644 index 000000000..f88473c01 --- /dev/null +++ b/model/train/yoco_moe/docs/features/tensor-parallel-2d.md @@ -0,0 +1,156 @@ +# 高维张量并行 + +## 问题分析 + +大模型训练时,张量并行(TP)将模型参数切分到多个设备上以减少其内存的占用,在训练过程中为了更新参数梯度信息等,需要引入allreduce通信。当集群规模较大时,如果设置TP域很大时,其通信开销会变得很大,使得训练效率降低。 + +## 解决方案 + +为了提高大规模TP域通信效率,采用高维张量并行,其将激活值和参数同时切分到多个计算设备上,相对1D-TP降低了通信域、减少通信次数,从而减少通信时间,提升模型训练的性能。 + +### 解决思路 + +#### 2D张量并行策略 + +给定TP域大小,通过建立多通信域,在原Megatron(ColumnParallelLinear、RowParallelLinear)增加了一维的切分维度。将原tp通信域进行分解为两个子通信域tp_x和tp_y,需要满足`tp = tp_x * tp_y`。以MLP层为例,其实现过程如下: + +![img](../../sources/images/tensor-parallel-2d.png) + +#### 分布式normalization + +在transformer网络中,normalization会将每一层神经元的输入都转成均值方差都一样的,加快其收敛。在MLP和attention层分别进行2D张量并行时,其输入和输出都分别在first-dim和last-dim做了tp_x和tp_y的切分,如果继续使用原LayerNorm或者RMSNorm需要先将input进行沿first-dim进行all-gather(x)和沿last-dim进行all-gather(y)操作,才能保证input数据的完整性。为了提升这部分的性能,采用了分布式normalization。其处理流程如下: + +##### **步骤1:计算输入的总和** + +首先,计算输入张量$\mathbf{x}$ 在最后一个维度上的总和: + +$$ +e_x = \sum_{i=1}^{H} x_i +\ +$$ + +##### **步骤2:分布式归约操作(All-Reduce)** + +将步骤1中的总和 $e_x$ 在所有tp_y通信域进程中进行归约(求和),确保每个进程都拥有其通信域全局总和: +$$ +\ +e_x^{\text{global}} = \text{AllReduce}\left( e_x \right) = \sum_{p=1}^{P} \sum_{i=1}^{H} x_i^{(p)} +\ +$$ + +其中: +- $P$ 是分布式进程的数量。 +- $x_i^{(p)}$ 表示第 $p$ 个进程中第 $i$ 个元素的值。 + +##### **步骤3:计算输入元素的平方和** + +接下来,计算输入张量每个元素的平方和: + +$$ +s_x = \sum_{i=1}^{H} x_i^2 +$$ + +##### **步骤4:分布式归约操作(All-Reduce)** + +将步骤3中的平方和 $s_x$ 在所有tp_y通信域进程中进行归约(求和),确保每个进程都拥有其通信域全局平方和: + +$$ +s_x^{\text{global}} = \text{AllReduce}\left( s_x \right) = \sum_{p=1}^{P} \sum_{i=1}^{H} \left( x_i^{(p)} \right)^2 +$$ + +##### **步骤5:中心化输入数据** + +将输入数据 $\mathbf{x}$ 中心化,即减去平均值。平均值 $\mu$ 计算如下: + +$$ +\mu = \frac{e_x^{\text{global}}}{H} +$$ + +然后,中心化输入: + +$$ +x'_i = x_i - \mu \quad \forall i \in \{1, 2, \dots, H\} +$$ + +##### **步骤6:计算总和的平方** + +计算全局总和的平方: + +$$ +e_x'^2 = \left( e_x^{\text{global}} \right)^2 +$$ + +##### **步骤7:计算归一化因子** + +计算归一化因子 $\gamma$,用于标准化输入数据。公式如下: + +$$ +\gamma = \frac{1}{\sqrt{ \left( \frac{s_x^{\text{global}}}{H} \right) - e_x'^2 + \epsilon }} +$$ + +这里: +- $\frac{s_x^{\text{global}}}{H}$ 是全局平方和的平均值。 +- $e_x'^2$ 是全局总和的平方。 +- $\epsilon$ 是一个小常数,防止分母为零,增加数值稳定性。 + +##### **步骤8:标准化输入数据** + +将中心化后的输入数据 $\mathbf{x}'$ 与归一化因子 $\gamma$ 相乘,得到标准化后的数据 $\mathbf{\hat{x}}$: + +$$ +\hat{x}_i = x'_i \cdot \gamma \quad \forall i \in \{1, 2, \dots, H\} +$$ + +##### **步骤9:应用权重和偏置** + +最后,将标准化后的数据与权重向量 $\mathbf{W}$ 相乘,并根据是否存在偏置向量 $\mathbf{b}$ 来决定最终输出。 + +- **如果存在偏置**: + +$$ +\text{output}_i = b_i + W_i \cdot \hat{x}_i \quad \forall i \in \{1, 2, \dots, H\} +$$ + +- **如果不存在偏置**: + +$$ +\text{output}_i = W_i \cdot \hat{x}_i \quad \forall i \in \{1, 2, \dots, H\} +$$ + + +## 使用场景 + +当TP通信域需要设置较大时,通信效率较低,需要通过分解通信域来提升其通信效率。 + +## 使用方法 + +在训练脚本的参数列表中加入 `--tp-2d`,开启2D张量并行,`--tp-x N1`和`--tp-y N2`分别设置其x轴、y轴的切分大小,其中需满足`tp = N1 * N2`(N1 > 1, N2 > 1)。 + +其他优化参数,用于辅助高维张量并行特性进行通信隐藏,需要开启tp-2d时生效: +- `--enable-overlap-ag-with-matmul`: 在linear层forward计算时,开启all-gather通信和matmul进行隐藏,以便加速 +- `--enable-overlap-matmul-with-rs`: 在linear层forward计算时,开启matmul计算和reduce-scatter通信进行隐藏,以便加速 +- `--coc-fused-kernel`: 在linear层forward计算时,开启计算通信融合算子,将matmul计算与all-gather、reduce-scatter都进行算子级融合,实现进一步加速(该特性不与前两个特性兼容,依赖ATB加速库) +- `--enable-backward-overlap-ag-with-matmul`: 在linear层backward计算梯度时,开启all-gather通信和matmul进行隐藏,以便加速(该特性依赖ATB加速库) + +上述3个forward计算优化参数`--enable-overlap-ag-with-matmul`、`--enable-overlap-matmul-with-rs`、`--coc-fused-kernel`只能同时开启1个。 + +注意事项: + +Megatron-MOE支持情况如表1所示 + +**表1** 高维张量并行支持Megatron-MOE + +| Dispather | MLP | 支持情况 | 性能优化 | +|-----------|----------------|------|------| +| AllGather | Sequential MLP | ✅ | ❌ | +| AllGather | Grouped MLP | ✅ | ❌ | +| All2All | Sequential MLP | ❌ | ❌ | +| All2All | Grouped MLP | ❌ | ❌ | + +当前高维张量并行特性不与`--sequence-parallel`、`--use-fused-rmsnorm`等特性相兼容,请根据实际情况调整配置。 + +## 使用效果 + +在llama3-405B模型训练时,tp=16情况下,开启2D张量并行,tp_x=8,tp_y=2,相比原Megatron 1D张量并行性能提升5%+。 +开启coc-fused-kernel和enable-backward-overlap-ag-with-matmul通信计算融合优化后,进一步提升性能5%+。 +其他场景下,由于计算效率和通信组的划分差异,需根据tp_x和tp_y实际调优情况进行配置,部分配置不能保证效率提升。 diff --git a/model/train/yoco_moe/docs/features/tensor-parallel.md b/model/train/yoco_moe/docs/features/tensor-parallel.md new file mode 100644 index 000000000..5632722d9 --- /dev/null +++ b/model/train/yoco_moe/docs/features/tensor-parallel.md @@ -0,0 +1,35 @@ +# 张量并行 + +## 问题分析 + +随着模型越来越庞大,其尺寸远远超出了处理器内存的限制,并且模型训练时间也变得很长。所以需要把一个模型切分,每个计算设备只负责一部分模型的存储与计算。 + +## 解决方案 + +张量并行将模型分成多份并存储在多个计算设备上,这样模型的权重和优化器状态可以分布在多个计算设备上,以此来克服单个计算设备显存无法容纳整个大型模型的问题。并且因为各计算设备只需要处理一部分的模型计算,训练速度也得到显著提高。这种分片策略叫做张量并行。 + +### 解决思路 + +#### 参数矩阵横切 + +1.参数矩阵横切策略按照参数矩阵的行来切分模型,该切分策略需要将输入矩阵也进行按列切分。 +2.横切策略前向时,先切分输入矩阵,对应部分的输入矩阵进入对应部分的模型进行前向计算,之后用all-reduce操作来将各部分模型计算结果累加得到最终前向计算结果。 +3.横切策略反向时,可以计算得出最终输出的梯度和各部分模型的输出梯度相等,先将最终输出的梯度传到各部分模型的输出张量,再用all-gather操作将切分后的输入矩阵的梯度拼接得到最初输入矩阵的梯度。 + +#### 参数矩阵纵切 + +1.参数矩阵纵切策略按照参数矩阵的列来切分模型,该切分策略输入矩阵无需进行切分。 +2.纵切策略前向时,先将输入矩阵送入各部分模型,各部分模型分别进行前向计算得到输出结果,之后用all-gather操作来将各部分模型输出结果拼接得到最终前向计算结果。 +3.纵切策略反向时,先将最终输出的梯度进行切分并将对应的部分传到对应部分模型的输出张量,之后用all-reduce操作将各部分模型的输入矩阵的梯度累加得到最初输入矩阵的梯度。 + +## 使用场景 + +如果用户发现训练显存占用过高或超出了处理器显存限制,或者训练时间过长,可以开启张量并行来降低单设备显存占用,加快训练速度。 + +## 使用方法 + +在训练脚本的参数列表中加入 `--tensor-model-parallel-size N`,设置张量并行的size。 + +## 使用效果 + +利用多个设备,降低显存占用,加快训练速度。 diff --git a/model/train/yoco_moe/docs/features/ulysses-context-parallel.md b/model/train/yoco_moe/docs/features/ulysses-context-parallel.md new file mode 100644 index 000000000..2e9b101ef --- /dev/null +++ b/model/train/yoco_moe/docs/features/ulysses-context-parallel.md @@ -0,0 +1,31 @@ +# Ulysses长序列并行 + +## 问题分析 + +从生成式AI到科研模型,长序列训练正在变得非常重要。 在生成式AI领域,会话式AI、长文档摘要和视频生成等任务都需要在空间和时间层面对长上下文进行推理。 同样,章节和书籍级别的摘要(数万甚至数十万字)在会话式AI和摘要任务中也非常重要。现有的数据、张量和流水线等并行方法无法解决序列维度的扩展问题。 + +## 解决方案 + +支持 Ulysses长序列并行方案,以此解决序列维度扩展问题。 + +### 解决思路: + +Ulysses 将各个样本在序列维度上分割给参与的计算设备。然后,在 attention 计算之前,它对已分割的查询(Q)、键(K)和值(V)执行 all-to-all 通信操作,以使每个计算设备接收完整的序列,但仅用于注意力头的非重叠子集。这使得参与的计算设备可以并行计算不同的注意力头。最后,Ulysses 还使用另一个 all-to-all 来在注意力头上收集结果,同时重新在序列维度上进行分区。 + +## 使用场景 + +num_head 要能被 tp_size*cp_size 整除。 + +## 使用方法 + +设置`--context-parallel-size`,默认为1,根据用户需求配置。 +同时设置`--context-parallel-algo ulysses_cp_algo`。 + +## 使用效果 + +利用多个计算设备对输入序列进行并行切分,降低单设备的内存消耗,相比不开启序列并行单步耗时增加,相比重计算计算效率提升。 + +## 鸣谢 + +1.GitHub项目地址: +https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-ulysses \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/unaligned-ulysses-context-parallel.md b/model/train/yoco_moe/docs/features/unaligned-ulysses-context-parallel.md new file mode 100644 index 000000000..9eb16a9e1 --- /dev/null +++ b/model/train/yoco_moe/docs/features/unaligned-ulysses-context-parallel.md @@ -0,0 +1,57 @@ +# 非对齐Ulysses长序列并行 + +## 背景与挑战 + +随着生成式AI和科研模型领域的发展,长序列训练变得越来越重要。然而,传统的Ulysses设计要求序列长度(sequence length)必须能够被长序列并行大小(Context Parallel size, CP size)整除。这在处理动态或不规则输入时带来了限制,特别是在多模态应用中,输入数据的序列长度可能无法预测且经常变化。因此,需要一种机制来支持这些非对齐情况下的操作,以适应更广泛的应用场景。 + + +## 解决方案 + +为了解决传统Ulysses设计在处理非对齐序列长度时的局限性,“非对齐 Ulysses”机制通过引入一个抽象基类 `GatherSizeCalculator` 来提供计算 gather size 的接口。Gather size 通常指的是经过 (Ulysses 机制中的)all-to-all 通信后,输出张量在 `gather_idx` 维度上的大小。该基类定义了任何具体实现都必须提供的 `calculate()` 方法,用于返回整数形式的 gather size 或者 None。 + +基于此接口,实现了两种具体的策略:`DefaultGatherSizeCalculator` 和 `DynamicGatherSizeCalculator`。前者默认返回 None,意味着使用对齐的Ulysses长序列并行;后者则根据当前批次的注意力掩码序列长度动态计算 gather size。这种设计使得系统能够灵活应对不同场景的需求,尤其是在多模态领域中处理 sequence length 不能被 CP size 整除的情况时尤为重要。 + +此外,在 `UlyssesContextAttention` 类中,允许用户注入一个 `gather_size_calculator` 实例,使得系统能够灵活地选择不同的 gather size 计算方法,从而适应不同场景的需求。 + +## 使用场景 + +“非对齐 Ulysses”功能适用于以下几种典型场景: + +- **多模态学习**:当处理图像、视频、文本等多种类型的数据时,由于不同类型数据的序列长度差异较大,难以统一到固定的CP size。 +- **实时数据分析**:在处理流数据时,数据到达的时间不确定,导致每次处理的序列长度也可能不同。 +- **个性化推荐系统**:用户行为数据的序列长度通常各不相同,这种情况下也需要支持非对齐的操作。 + +## 使用方法 + +为了利用“非对齐 Ulysses”功能,用户可以根据业务需求传入基于 `GatherSizeCalculator` 基类的自定义 Calculator,或者直接使用预定义的 `DynamicGatherSizeCalculator`。以下是基本步骤: + +1. 启动脚本中配置长序列并行大小大于1`--context-parallel-size [int]`。 同时配置`--context-parallel-algo ulysses_cp_algo`。 +2. 创建一个继承自 `GatherSizeCalculator` 的自定义计算器类,并实现 `calculate()` 方法。在初始化 `UlyssesContextAttention` 对象时,通过构造函数参数传入自定义的 `gather_size_calculator` 实例。 +3. 如果不需要复杂的自定义逻辑,可以直接使用 `DynamicGatherSizeCalculator`,它会自动根据当前批次的注意力掩码序列长度计算 gather size。 + +```python +# 示例代码 +import megatron.core.parallel_state as ps +from mindspeed.core.context_parallel.ulysses_context_parallel import UlyssesContextAttention, GatherSizeCalculator, DynamicGatherSizeCalculator +from your_library import FlashSelfAttention + +# 自定义 GatherSizeCalculator +class CustomGatherSizeCalculator(GatherSizeCalculator): + def calculate(self, *args, **kwargs): + # 示例逻辑 + return kwargs.get("gather_size", None) + + +core_attention = FlashSelfAttention() +# 根据实际情况,使用预定义DynamicGatherSizeCalculator()或自定义CustomGatherSizeCalculator() +calculator = DynamicGatherSizeCalculator() +ulysses_attention = UlyssesContextAttention(core_attention, ps.get_context_parallel_group(), + gather_size_calculator=calculator) + +``` +*说明*: +“非对齐 Ulysses”长序列并行暂不兼容Ulysses长序列并行KV缓存优化,即启动脚本设置了--context-parallel-kv-cache-policy为full或者half,系统将自动切换回使用对齐的Ulysses长序列并行机制。 + +## 使用效果 + +通过引入“非对齐 Ulysses”,系统提升了对不同输入长度的适应能力。这不仅解决了传统 Ulysses 在处理动态或不规则输入序列时遇到的问题,而且保持了良好的扩展能力。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/unaligned_linear.md b/model/train/yoco_moe/docs/features/unaligned_linear.md new file mode 100644 index 000000000..b3df9e3d9 --- /dev/null +++ b/model/train/yoco_moe/docs/features/unaligned_linear.md @@ -0,0 +1,35 @@ +# unaligned linear 非对齐线性层 + +## 背景与挑战 + +类Megatron-LM框架已成为大模型训练的主流方案之一,TP(张量并行 Tensor Parallism)是大模型训练的基本并行范式,该范式在部分场景仍存在不足,例如要求大模型的注意力头数、序列长度要能整除TP,不满足条件将在参数校验中抛出异常;本特性提供了一种注意力头数、序列长度不能整除TP的的解决方案; + +## 解决方案 + +- **序列长度不能整除TP**:和pad方案(将序列长度pad到TP的整数倍)不同,该方案通过序列分配策略来解决,小于 **(seq_len%tp_size)** 的tp卡分配 **(seq_len//tp_size+1)** 序列长度,其他分配 **(seq_len//tp_size)** 序列长度,例如seq_len=1026,tp_size=4, tp0和tp1分配的序列长度为257,tp2和tp3分配的序列长度为256; +- **注意力头数不能整除TP**:和上述方案类似,小于 **(num_attention_heads%tp_size)** 的tp卡分配 **(num_attention_heads//tp_size+1)** 个注意力头,其他卡分配 **(num_attention_heads//tp_size)** 注意力头,例如num_attention_heads=25,tp_size=4, tp0分配的注意力头为7个,tp1、tp2和tp3分配的注意力头均为6个;值得注意的是,模型的注意力相关权重TP切分和头数相关,假设hidden_size=3200, qkv_weight大小为(9600,3200)[MHA], dense_weight大小为(3200,3200), tp0的qkv权重大小为(2688,3200),dense权重大小为(3200,896), tp1、tp2和tp3的qkv权重大小为(2304, 3200),dense权重大小为(3200,768);GQA的权重切分方案按num_query_groups比例分配,注意; + +## 使用场景 + +- 序列长度不能整除TP、注意力头数不能整除TP + +## 使用方法 + +在模型参数中添加 --unaligned-linear 参数 + +**注意事项:** +- 非对齐的线性层会引起各TP的负载不均衡 +- 该特性不支持mc2、2d张量并行、cp特性(要求TP*CP能被注意力头数整除)等 +- 特殊的模型结构需要特殊适配该特性,当前已适配MHA、GQA结构,暂不支持MOE、MLA等结构 + +**设置训练脚本参数** +```shell +# 开启非对齐线性层 +--unaligned-linear \ +``` + +## 使用效果 +- **补充功能场景**:补充注意力头数、序列长度不能被TP整除的场景。 +- **潜在性能影响**:各TP处理的注意力头数、序列长度不一致,负载不均衡,建议模型结构设计时考虑该情况。 + +综上所述,该特性是为了完善TP(张量并行)场景下的限制约束,特性本身会带来负载不均衡的性能影响,所以在模型设计和超参优化时注意这一影响。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/variable_seq_lengths.md b/model/train/yoco_moe/docs/features/variable_seq_lengths.md new file mode 100644 index 000000000..408692620 --- /dev/null +++ b/model/train/yoco_moe/docs/features/variable_seq_lengths.md @@ -0,0 +1,40 @@ +# PP支持动态形状 + +## 背景与挑战 + +在深度学习模型训练中,尤其是涉及多模态任务时,输入数据的序列长度往往不是固定的。对于采用流水线并行(Pipeline Parallelism, PP)策略的模型, +处理不同长度的序列通常需要将所有序列调整为统一长度,通过填充或截断来实现。这种做法虽然简化了数据处理和模型设计,但会导致计算资源和内存的浪费,特别是在处理较短序列时,因为需要大量的填充。 +**主要挑战:** +- **内存效率低下**:可能存在大量填充导致内存利用率低。 +- **计算效率低下**:对填充部分进行不必要的计算。 + +## 解决方案 + +为了应对上述挑战,我们引入了对动态形状的支持,允许每个微批次中的序列保持其原始长度。此功能通过在发送张量之前,提前通信张量的形状信息,在各个流水线阶段之间同步即将接收的数据形状,确保内存分配和预处理的准确性。 +## 使用场景 + +- **多变长度文本处理**:如文档分类、机器翻译等任务,其中文本长度差异很大。 +- **增强模型泛化能力**:让模型更好地适应各种长度的输入,从而提高其在实际应用中的表现。 + +## 使用方法 + +**注意事项:** +- 当采用流水线并行策略且序列长度固定时,启用该特性将增加不必要的通信开销,因此不建议使用。 +- 密切监控训练过程中的内存消耗,避免因序列长度变动引起的溢出问题。 + +**设置训练脚本参数** +```shell +# 开启流水线并行, PP >= 2 +--pipeline-model-parallel-size ${PP} \ +# 开启PP支持动态形状 +--variable-seq-lengths +``` + +## 使用效果 + +- **优化资源利用**:与传统方法中所有序列需填充至统一长度相比,本方案通过减少不必要的填充操作,有效节省内存空间,降低计算负载,提高整体性能。 +- **提高灵活性**:该特性赋予模型更强的适应性,使其能够高效处理各种长度的输入数据,进而增强了模型的泛化能力。这对于需要处理变长输入的任务(如文本分类、机器翻译等)尤为重要。 +- **更真实的数据表示**:保留了原始文本的真实长度,有助于模型更准确地捕捉文本特征。 +- **潜在性能影响**:尽管有诸多优点,但在某些情况下(如开启流水线并行,并且原序列为等长或需被截断以保持一致长度时),启用该特性可能会增加复杂度并减慢训练速度。因此,在设计和部署时应综合考虑这些因素,确保系统整体性能最优化。 + +综上所述,PP支持动态形状是针对特定应用场景的一种有效优化手段,它能够在保证模型性能的同时,显著改善资源利用率和数据处理的灵活性。用户应根据实际情况权衡利弊,决定是否启用这一特性。 \ No newline at end of file diff --git a/model/train/yoco_moe/docs/features/virtual-pipeline-parallel.md b/model/train/yoco_moe/docs/features/virtual-pipeline-parallel.md new file mode 100644 index 000000000..5c02b425e --- /dev/null +++ b/model/train/yoco_moe/docs/features/virtual-pipeline-parallel.md @@ -0,0 +1,43 @@ +# 虚拟流水线并行 + +## 问题分析 + +Pipedream流水线并行切分粒度过大,运行过程中仍然有许多空泡(bubble),计算资源利用率仍有提高空间。 + +## 解决方案 + +将计算进一步细分,减少空泡。 + +### 解决思路: + +在设备数量不变的情况下,分出更多的流水线阶段,以更多的通信量,换取空泡比率降低。 + +![alt text](../../sources/images/virtual-pipeline.PNG) + +[原文链接](https://people.eecs.berkeley.edu/~matei/papers/2021/sc_megatron_lm.pdf) + +为了方便理解,举一个例子:假设模型层数为16,张量并行大小为1,流水线并行大小为4,虚拟流水线并行大小为2。模型会被分为 4 * 2 = 8 个阶段,每个阶段 16 / 8 = 2 个层。 + + Device 0: [1, 2] [9, 10] + Device 1: [3, 4] [11, 12] + Device 2: [5, 6] [13, 14] + Device 3: [7, 8] [15, 16] + +前向的顺序为 device 0 -> device 1 -> device 2 -> device 3 -> device 0 -> device 1 -> device 2 -> device 3 + +## 使用场景 + +想要进一步减小空泡比率,提升性能 + +## 使用方法 + +设置`--num-layers-per-virtual-pipeline-stage N`。表示每个阶段的层数。要求模型的总层数 L % N == 0。要求 `--pipeline-model-parallel-size` > 2。 + +## 使用效果 + +空泡比率进一步减小 + +## 注意事项 + +Megatron虚拟流水并行vpp影响权重切分方式,保存、加载权重时需保证vpp配置一致,才能正常加载; + diff --git a/model/train/yoco_moe/docs/ops/README.md b/model/train/yoco_moe/docs/ops/README.md new file mode 100644 index 000000000..e17e98284 --- /dev/null +++ b/model/train/yoco_moe/docs/ops/README.md @@ -0,0 +1,30 @@ +# How to run the ops? + +## previous installation ++ CANN ++ CANN-NNAL(Ascend-Transformer-Boost) ++ torch_npu + +## compile and install +### 1. set the environment variables + ```shell +# Default path, change it if needed. +source /usr/local/Ascend/ascend-toolkit/set_env.sh + ``` +#### if use Ascend-Transformer-Boost + ```shell +# Default path, change it if needed. +source /usr/local/Ascend/nnal/atb/set_env.sh + ``` + +### 2. include head files + ++ newest torch_npu ++ newest cann + +### 3. install scripts +```shell +python3 setup.py build +python3 setup.py bdist_wheel +pip3 install dist/*.whl --force-reinstall +``` diff --git a/model/train/yoco_moe/docs/ops/ffn.md b/model/train/yoco_moe/docs/ops/ffn.md new file mode 100644 index 000000000..261d765f0 --- /dev/null +++ b/model/train/yoco_moe/docs/ops/ffn.md @@ -0,0 +1,230 @@ +# ffn对外接口(只支持前向) + +npu_ffn(Tensor x, Tensor weight1, Tensor weight2, str activation, *, Tensor? expert_tokens=None, + Tensor? expert_tokens_index=None, Tensor? bias1=None, Tensor? bias2=None, Tensor? scale=None, + Tensor? offset=None, Tensor? deq_scale1=None, Tensor? deq_scale2=None, Tensor? antiquant_scale1=None, + Tensor? antiquant_scale2=None, Tensor? antiquant_offset1=None, Tensor? antiquant_offset2=None, + int? inner_precise=None, ScalarType? output_dtype=None) -> Tensor + +计算逻辑: + - **非量化场景:** + + $$ + y=activation(x * W1 + b1) * W2 + b2 + $$ + + - **量化场景:** + + $$ + y=((activation((x * W1 + b1) * deqScale1) * scale + offset) * W2 + b2) * deqScale2 + $$ + + - **伪量化场景:** + + $$ + y=activation(x * ((W1 + antiquantOffset1) * antiquantScale1) + b1) * ((W2 + antiquantOffset2) * antiquantScale2) + b2 + $$ + +**说明:** + 激活层为geglu/swiglu/reglu时,性能使能需要满足门槛要求,即整网中FFN结构所对应的小算子中vector耗时30us且占比10%以上的用例方可尝试FFN融合算子;或在不知道小算子性能的情况下,尝试使能FFN,若性能劣化则不使能FFN。 + +## 非量化场景: +输入: +- x:必选输入,公式中的输入x,数据类型int8, float16, bfloat16,支持输入的维度最少是2维[M, K1],最多是8维 +- weight1: 必选输入,专家的权重数据,公式中的W1,数据类型int4, int8, float16, bfloat16,输入在有/无专家时分别为[E, K1, N1]/[K1, N1] +- weight2: 必选输入,专家的权重数据,公式中的W2,数据类型int4, int8, float16, bfloat16,输入在有/无专家时分别为[E, K2, N2]/[K2, N2] + **说明:** + M表示token个数,对应transform中的BS(B(Batch)表示输入样本批量大小、S(Seq-Length)表示输入样本序列长度);K1表示第一组matmul的输入通道数,对应transform中的H(Head-Size)表示隐藏层的大小);N1表示第一组matmul的输出通道数;K2表示第二组matmul的输入通道数;N2表示第二组matmul的输出通道数,对应transform中的H;E表示有专家场景的专家数。 +- activation: 必选输入,代表使用的激活函数,公式中的activation,当前支持fastgelu/gelu/relu/silu以及geglu/swiglu/reglu +- expert_tokens: 可选输入,数据类型int64 +- expert_tokens_index:可选输入,数据类型int64 + **说明:** + 不能同时输入expert_tokens和expert_tokens_index + expert_tokens,expert_tokens_index,若不为空时可支持的最大长度为256个 +- bias1: 可选输入,权重数据修正值,公式中的b1,数据类型int32, float16, float32,输入在有/无专家时分别为[E, N1]/[N1] +- bias2: 可选输入,权重数据修正值,公式中的b2,数据类型int32, float16, float32,输入在有/无专家时分别为[E, N2]/[N2] +- inner_precise:可选输入,表示高精度或者高性能选择,数据类型支持int64, 该参数仅对float16生效,bfloat16和int8不区分高精度和高性能。 + - innerPrecise为0时,代表开启高精度模式,算子内部采用float32数据类型计算 + - innerPrecise为1时,代表高性能模式 + +输出: +- y:必选输出,数据类型float16, bfloat16 + +## 全量化场景: +输入: +- x:必选输入,公式中的输入x,数据类型int8, float16, bfloat16,支持输入的维度最少是2维[M, K1],最多是8维 +- weight1: 必选输入,专家的权重数据,公式中的W1,数据类型int4, int8, float16, bfloat16,输入在有/无专家时分别为[E, K1, N1]/[K1, N1] +- weight2: 必选输入,专家的权重数据,公式中的W2,数据类型int4, int8, float16, bfloat16,输入在有/无专家时分别为[E, K2, N2]/[K2, N2] + **说明:** + M表示token个数,对应transform中的BS(B(Batch)表示输入样本批量大小、S(Seq-Length)表示输入样本序列长度);K1表示第一组matmul的输入通道数,对应transform中的H(Head-Size)表示隐藏层的大小);N1表示第一组matmul的输出通道数;K2表示第二组matmul的输入通道数;N2表示第二组matmul的输出通道数,对应transform中的H;E表示有专家场景的专家数。 +- activation: 必选输入,代表使用的激活函数,公式中的activation,当前支持fastgelu/gelu/relu/silu以及geglu/swiglu/reglu +- expert_tokens: 可选输入,数据类型int64 +- expert_tokens_index:可选输入,数据类型int64 + **说明:** + 不能同时输入expert_tokens和expert_tokens_index + expert_tokens,expert_tokens_index,若不为空时可支持的最大长度为256个 +- bias1: 可选输入,权重数据修正值,公式中的b1,数据类型int32, float16, float32,输入在有/无专家时分别为[E, N1]/[N1] +- bias2: 可选输入,权重数据修正值,公式中的b2,数据类型int32, float16, float32,输入在有/无专家时分别为[E, N2]/[N2] +- scale: 可选输入,量化参数,量化缩放系数,数据类型float32,per-tensor下输入在有/无专家时均为一维向量,输入元素个数在有/无专家时分别为[E]/[1];per-channel下输入在有/无专家时为二维向量/一维向量,输入元素个数在有/无专家时分别为[E, N1]/[N1] +- offset: 可选输入,量化参数,量化偏移量,数据类型float32,一维向量,输入元素个数在有/无专家时分别为[E]/[1] +- deq_scale1:可选输入,量化参数,第一组matmul的反量化缩放系数,数据类型uint64, int64, float32, bfloat16,输入在有/无专家时分别为[E, N1]/[N1] +- deq_scale2:可选输入,量化参数,第二组matmul的反量化缩放系数,数据类型uint64, int64, float32, bfloat16,输入在有/无专家时分别为[E, N2]/[N2] +- inner_precise:可选输入,表示高精度或者高性能选择,数据类型支持int64, 该参数仅对float16生效,bfloat16和int8不区分高精度和高性能。 + - innerPrecise为0时,代表开启高精度模式,算子内部采用float32数据类型计算 + - innerPrecise为1时,代表高性能模式 +- output_dtype:可选输入,表示输出y的数据类型,为空时输出y的数据类型为float16,不为空时支持float16, bfloat16 + +输出: +- y:必选输出,数据类型float16, bfloat16 + +## 伪量化场景: +输入: +- x:必选输入,公式中的输入x,数据类型int8, float16, bfloat16,支持输入的维度最少是2维[M, K1],最多是8维 +- weight1: 必选输入,专家的权重数据,公式中的W1,数据类型int4, int8, float16, bfloat16,输入在有/无专家时分别为[E, K1, N1]/[K1, N1] +- weight2: 必选输入,专家的权重数据,公式中的W2,数据类型int4, int8, float16, bfloat16,输入在有/无专家时分别为[E, K2, N2]/[K2, N2] + **说明:** + M表示token个数,对应transform中的BS(B(Batch)表示输入样本批量大小、S(Seq-Length)表示输入样本序列长度);K1表示第一组matmul的输入通道数,对应transform中的H(Head-Size)表示隐藏层的大小);N1表示第一组matmul的输出通道数;K2表示第二组matmul的输入通道数;N2表示第二组matmul的输出通道数,对应transform中的H;E表示有专家场景的专家数。 +- activation: 必选输入,代表使用的激活函数,公式中的activation,当前支持fastgelu/gelu/relu/silu以及geglu/swiglu/reglu +- expert_tokens: 可选输入,代表各专家的token数,数据类型int64 +- expert_tokens_index:可选输入,代表各专家的token数,数据类型int64 + **说明:** + 不能同时输入expert_tokens和expert_tokens_index + expert_tokens,expert_tokens_index,若不为空时可支持的最大长度为256个 +- bias1: 可选输入,权重数据修正值,公式中的b1,数据类型int32, float16, float32,输入在有/无专家时分别为[E, N1]/[N1] +- bias2: 可选输入,权重数据修正值,公式中的b2,数据类型int32, float16, float32,输入在有/无专家时分别为[E, N2]/[N2] +- antiquant_scale1: 可选输入,伪量化参数,第一组matmul的缩放系数,数据类型float16, bfloat16,per-channel下输入在有/无专家时分别为[E, N1]/[N1],per-in-group下输入在有/无专家时分别为[E, G, N1]/[G, N1] +- antiquant_scale2: 可选输入,伪量化参数,第二组matmul的缩放系数,数据类型float16, bfloat16,per-channel下输入在有/无专家时分别为[E, N2]/[N2],per-in-group下输入在有/无专家时分别为[E, G, N2]/[G, N2] +- antiquant_offset1: 可选输入,伪量化参数,第一组matmul的偏移量,数据类型float16, bfloat16,per-channel下输入在有/无专家时分别为[E, N1]/[N1],per-in-group下输入在有/无专家时分别为[E, G, N1]/[G, N1] +- antiquant_offset2: 可选输入,伪量化参数,第二组matmul的偏移量,数据类型float16, bfloat16,per-channel下输入在有/无专家时分别为[E, N2]/[N2],per-in-group下输入在有/无专家时分别为[E, G, N2]/[G, N2] + **说明:** + G表示伪量化per-in-group场景下,antiquantOffsetOptional、antiquantScaleOptional的组数。 +- inner_precise:可选输入,表示高精度或者高性能选择,数据类型支持int64, 该参数仅对float16生效,bfloat16和int8不区分高精度和高性能。 + - innerPrecise为0时,代表开启高精度模式,算子内部采用float32数据类型计算 + - innerPrecise为1时,代表高性能模式 + +输出: +- y:必选输出,数据类型float16, bfloat16 + +## 约束与限制 + +- 有专家时,专家数据的总数需要与x的M保持一致。 +- 激活层为geglu/swiglu/reglu时,仅支持无专家分组时的float16高性能场景(float16场景指类型为aclTensor的必选参数数据类型都为float16的场景),且N1=2\*K2。 +- 激活层为gelu/fastgelu/relu/silu时,支持有专家或无专家分组的float16高精度及高性能场景,bfloat16场景,量化场景及伪量化场景,且N1=K2。 +- 非量化场景不能输入量化参数和伪量化参数,量化场景不能输入伪量化参数,伪量化场景不能输入量化参数。 +- 量化场景参数类型:x为int8、weight为int8、bias为int32、scale为float32、offset为float32,其余参数类型根据y不同分两种情况: + - y为float16,deqScale支持数据类型:uint64、int64、float32。 + - y为bfloat16,deqScale支持数据类型:bfloat16。 + - 要求deqScale1与deqScale2的数据类型保持一致。 +- 量化场景支持scale的per-channel模式参数类型:x为int8、weight为int8、bias为int32、scale为float32、offset为float32,其余参数类型根据y不同分两种情况: + - y为float16,deqScale支持数据类型:uint64、int64。 + - y为bfloat16,deqScale支持数据类型:bfloat16。 + - 要求deqScale1与deqScale2的数据类型保持一致。 +- 伪量化场景支持两种不同参数类型: + - y为float16、x为float16、bias为float16,antiquantScale为float16、antiquantOffset为float16,weight支持数据类型int8和int4。 + - y为bfloat16、x为bfloat16、bias为float32,antiquantScale为bfloat16、antiquantOffset为bfloat16,weight支持数据类型int8和int4。 +- 当weight1/weight2的数据类型为int4时,其shape最后一维必须为偶数。 +- 伪量化场景,per-in-group下,antiquantScale1和antiquantOffset1中的K1需要能整除组数G,antiquantScale2和antiquantOffset2中的K2需要能整除组数G。 +- 伪量化场景,per-in-group下目前只支持weight是int4数据类型的场景。 +- innerPrecise参数在bfloat16非量化场景,只能配置为0;float16非量化场景,可以配置为0或者1;量化或者伪量化场景,0和1都可配置,但是配置后不生效。 + +## npu_ffn 接口的调用方式 +```python +import os +import torch +import torch_npu +import torchair as tng +from torchair.configs.compiler_config import CompilerConfig +from mindspeed.ops import ffn + +os.environ["ENABLE_ACLNN"] = "true" +config = CompilerConfig() +npu_backend = tng.get_npu_backend(compiler_config=config) + +ACTIVATION_TYPE = 'relu' + +EXPERT_TOKENS_MODE_NONE = 0 +EXPERT_TOKENS_MODE_TOKENS = 1 +EXPERT_TOKENS_MODE_TOKENS_INDEX = 2 + + +class FFNModel(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, weight1, weight2, activation, expert_tokens=None, expert_tokens_index=None, + bias1=None, bias2=None, scale=None, offset=None, deq_scale1=None, deq_scale2=None, + antiquant_scale1=None, antiquant_scale2=None, antiquant_offset1=None, antiquant_offset2=None, + inner_precise=0): + return ffn.npu_ffn(x, weight1, weight2, activation, + expert_tokens=expert_tokens, expert_tokens_index=expert_tokens_index, + bias1=bias1, bias2=bias2, inner_precise=inner_precise) + + +def test_ffn(tokens_mode, is_graph_mode=True): + M = 512 + K1 = 256 + N1 = 1024 + K2 = N1 + N2 = K1 + + dtype = torch.float16 + bias_dtype = torch.float16 if dtype == torch.float16 else torch.float32 + + expert_tokens = None + expert_tokens_index = None + + if tokens_mode == EXPERT_TOKENS_MODE_NONE: + x = torch.empty(M, K1, dtype=dtype).uniform_(-1.0, 1.0) + weight1 = torch.empty(K1, N1, dtype=dtype).uniform_(-0.1, 0.1) + weight2 = torch.empty(K2, N2, dtype=dtype).uniform_(-0.1, 0.1) + bias1 = torch.empty(N1, dtype=bias_dtype).uniform_(-0.1, 0.1) + bias2 = torch.empty(N2, dtype=bias_dtype).uniform_(-0.1, 0.1) + elif tokens_mode == EXPERT_TOKENS_MODE_TOKENS: + E = 8 + x = torch.empty(M, K1, dtype=dtype).uniform_(-1.0, 1.0) + weight1 = torch.empty(E, K1, N1, dtype=dtype).uniform_(-0.1, 0.1) + weight2 = torch.empty(E, K2, N2, dtype=dtype).uniform_(-0.1, 0.1) + bias1 = torch.empty(E, N1, dtype=bias_dtype).uniform_(-0.1, 0.1) + bias2 = torch.empty(E, N2, dtype=bias_dtype).uniform_(-0.1, 0.1) + expert_tokens = [64, 64, 64, 64, 64, 64, 64, 64] + expert_tokens = torch.tensor(expert_tokens, dtype=torch.int64) + elif tokens_mode == EXPERT_TOKENS_MODE_TOKENS_INDEX: + E = 8 + x = torch.empty(M, K1, dtype=dtype).uniform_(-1.0, 1.0) + weight1 = torch.empty(E, K1, N1, dtype=dtype).uniform_(-0.1, 0.1) + weight2 = torch.empty(E, K2, N2, dtype=dtype).uniform_(-0.1, 0.1) + bias1 = torch.empty(E, N1, dtype=bias_dtype).uniform_(-0.1, 0.1) + bias2 = torch.empty(E, N2, dtype=bias_dtype).uniform_(-0.1, 0.1) + expert_tokens_index = [64, 128, 192, 256, 320, 384, 448, 512] + expert_tokens_index = torch.tensor(expert_tokens_index, dtype=torch.int64) + + x = x.npu() + weight1 = weight1.npu() + weight2 = weight2.npu() + bias1 = bias1.npu() + bias2 = bias2.npu() + + if expert_tokens != None: + expert_tokens = expert_tokens.npu() + if expert_tokens_index != None: + expert_tokens_index = expert_tokens_index.npu() + + if is_graph_mode: + model = FFNModel().npu() + model = torch.compile(model, backend=npu_backend, dynamic=True) + y = model(x, weight1, weight2, ACTIVATION_TYPE, expert_tokens=expert_tokens, + expert_tokens_index=expert_tokens_index, bias1=bias1, bias2=bias2) + else: + y = ffn.npu_ffn(x, weight1, weight2, ACTIVATION_TYPE, expert_tokens=expert_tokens, + expert_tokens_index=expert_tokens_index, bias1=bias1, bias2=bias2) + print('y.shape:', y.shape) + + +if __name__ == '__main__': + test_ffn(EXPERT_TOKENS_MODE_NONE, True) + test_ffn(EXPERT_TOKENS_MODE_TOKENS, True) + test_ffn(EXPERT_TOKENS_MODE_TOKENS_INDEX, True) + test_ffn(EXPERT_TOKENS_MODE_NONE, False) + test_ffn(EXPERT_TOKENS_MODE_TOKENS, False) + test_ffn(EXPERT_TOKENS_MODE_TOKENS_INDEX, False) +``` \ No newline at end of file diff --git a/model/train/yoco_moe/docs/ops/fusion_attention.md b/model/train/yoco_moe/docs/ops/fusion_attention.md new file mode 100644 index 000000000..d12745fdf --- /dev/null +++ b/model/train/yoco_moe/docs/ops/fusion_attention.md @@ -0,0 +1,146 @@ +# fusion attention 对外接口 + +### 注意当前若要使用v2版本接口,需要开启`--use-fusion-attn-v2`特性 + +npu_fusion_attention( + query, key, value, head_num, + input_layout, *, pse=None, + padding_mask=None, atten_mask=None, + scale=1., keep_prob=1., pre_tokens=2147483647, + next_tokens=2147483647, inner_precise=0, prefix=None, + actual_seq_qlen=None, actual_seq_kvlen=None, + sparse_mode=0, gen_mask_parallel=True, + sync=False, pse_type=1, q_start_idx=None, + kv_start_idx=None) + +- 计算公式: + + 注意力的正向计算公式如下: + + - pse_type=1时,公式如下: + + $$ + attention\\_out = Dropout(Softmax(Mask(scale*(pse+query*key^T), atten\\_mask)), keep\\_prob)*value + $$ + + - pse_type=其他取值时,公式如下: + + $$ + attention\\_out=Dropout(Softmax(Mask(scale*(query*key^T) + pse),atten\\_mask),keep\\_prob)*value + $$ + +## 前向接口: +输入: +- query:必选输入,Device侧的Tensor,数据类型支持FLOAT16、BFLOAT16,数据格式支持ND。 +- key:必选输入,Device侧的Tensor,数据类型支持FLOAT16、BFLOAT16,数据格式支持ND。 +- value:必选输入,Device侧的Tensor,数据类型支持FLOAT16、BFLOAT16,数据格式支持ND。 +- atten_mask:可选输入,数据类型bool,缺省none。在softmax之前drop的mask。 +- pse:可选输入,Device侧的Tensor,可选参数,表示位置编码。数据类型支持FLOAT16、BFLOAT16,数据格式支持ND。非varlen场景支持四维输入,包含BNSS格式、BN1Skv格式、1NSS格式。如果非varlen场景Sq大于1024或varlen场景、每个batch的Sq与Skv等长且是sparse_mode为0、2、3的下三角掩码场景,可使能alibi位置编码压缩,此时只需要输入原始PSE最后1024行进行内存优化,即alibi_compress = ori_pse[:, :, -1024:, :],参数每个batch不相同时,输入BNHSkv(H=1024),每个batch相同时,输入1NHSkv(H=1024)。如果pse_type为2或3的话,需传入数据类型为float32的slope数据,slope数据支持BN或N两种shape。 +- padding_mask:可选输入,Device侧的Tensor,暂不支持该参数。 +- atten_mask:Device侧的Tensor,可选参数,取值为1代表该位不参与计算(不生效),为0代表该位参与计算,数据类型支持BOOL、UINT8,数据格式支持ND格式,输入shape类型支持BNSS格式、B1SS格式、11SS格式、SS格式。varlen场景只支持SS格式,SS分别是maxSq和maxSkv。 +- prefix:Host侧的int array,可选参数,代表prefix稀疏计算场景每个Batch的N值。数据类型支持INT64,数据格式支持ND。 +- actual_seq_qlen:Host侧的int array,可选参数,varlen场景时需要传入此参数。表示query每个S的累加和长度,数据类型支持INT64,数据格式支持ND。 + 比如真正的S长度列表为:2 2 2 2 2 则actual_seq_qlen传:2 4 6 8 10。 +- actual_seq_kvlen:Host侧的int array,可选参数,varlen场景时需要传入此参数。表示key/value每个S的累加和长度。数据类型支持INT64,数据格式支持ND。 + 比如真正的S长度列表为:2 2 2 2 2 则actual_seq_kvlen传:2 4 6 8 10。 +- sparse_mode:Host侧的int,表示sparse的模式,可选参数。数据类型支持:INT64,默认值为0,支持配置值为0、1、2、3、4、5、6、7、8。当整网的atten_mask都相同且shape小于2048*2048时,建议使用defaultMask模式,来减少内存使用, + 具体可参考昇腾社区说明https://www.hiascend.com/document/detail/zh/Pytorch/60RC1/apiref/apilist/ptaoplist_000448.html。 +- q_start_idx:Host侧的int array,可选参数,长度为1的int类型数组。pse_type配置为2或3时,表示内部生成alibi编码在Sq方向偏移的格数,正数表示0对角线向上移动。缺省值为0,表示不进行偏移。 +- kv_start_idx:Host侧的int array,可选参数,长度为1的int类型数组。pse_type配置为2或3时,表示内部生成alibi编码在Skv方向偏移的格数,正数表示0对角线向左移动。缺省值为0,表示不进行偏移。 + +输出: +(Tensor, Tensor, Tensor, Tensor, int, int, int) + +- 第1个输出为Tensor,计算公式的最终输出y,数据类型支持:FLOAT16、BFLOAT16。 +- 第2个输出为Tensor,Softmax 计算的Max中间结果,用于反向计算,数据类型支持:FLOAT。 +- 第3个输出为Tensor,Softmax计算的Sum中间结果,用于反向计算,数据类型支持:FLOAT。 +- 第4个输出为Tensor,保留参数,暂未使用。 +- 第5个输出为int,DSA生成dropoutmask中,Philox算法的seed。 +- 第6个输出为int,DSA生成dropoutmask中,Philox算法的offset。 +- 第7个输出为int,DSA生成dropoutmask的长度。 + +属性: +- scale:可选属性,Host侧的double,可选参数,代表缩放系数,作为计算流中Muls的scalar值,数据类型支持DOUBLE,默认值为1。 +- pse_type:可选属性,Host侧的int,数据类型支持INT64,默认值为1。支持范围0-3。 +- pse_type配置为0的时候,pse由外部传入,计算流程是先mul scale再add pse。 +- pse_type配置为1的时候,pse由外部传入,计算流程是先add pse再mul scale。 +- pse_type配置为2的时候,pse由内部生成,生成标准alibi位置信息。内部生成的alibi矩阵0线与Q@K^T的左上角对齐。 +- pse_type配置为3的时候,pse由内部生成,生成的alibi位置信息为标准的基础上再做sqrt开平方。内部生成的alibi矩阵0线与Q@K^T的左上角对齐。 +- head_num:必选属性,Host侧的int,代表head个数,数据类型支持INT64。 +- input_layout:必选属性,Host侧的string,代表输入query、key、value的数据排布格式,支持BSH、SBH、BSND、BNSD、TND(actual_seq_qlen/actual_seq_kvlen需传值);后续章节如无特殊说明,S表示query或key、value的sequence length,Sq表示query的sequence length,Skv表示key、value的sequence length,SS表示Sq*Skv +- keep_prob:可选属性,数据类型float,默认值为1.0。在 softmax 后的保留比例。 +- pre_tokens:可选属性,Host侧的int,用于稀疏计算的参数,可选参数,数据类型支持INT64,默认值为2147483647。 +- next_tokens:可选属性,Host侧的int,用于稀疏计算的参数,可选参数,数据类型支持INT64,默认值为2147483647。 +- inner_precise:可选属性,Host侧的int,用于提升精度,数据类型支持INT64,默认值为0。 +- gen_mask_parallel:debug参数,DSA生成dropout随机数向量mask的控制开关,默认值为True:同AICORE计算并行,False:同AICORE计算串行 +- sync:debug参数,DSA生成dropout随机数向量mask的控制开关,默认值为False:dropout mask异步生成,True:dropout mask同步生成 + +## 反向接口 +输入: +- grad:必选输入,数据类型float16, bfloat16,正向attention_out的梯度输入 + +输出: +- grad_query:必选输出,数据类型float16, bfloat16 +- grad_key:必选输出,数据类型float16, bfloat16 +- grad_value:必选输出,数据类型float16, bfloat16 + + +## 输入限制 +- 输入query、key、value的B:batchsize必须相等,取值范围1~2M。非varlen prefix场景B最大支持2K,varlen prefix场景B最大支持1K。 +- 输入query、key、value、pse的数据类型必须一致。pse_type=2或3的时候例外,此时pse需要传fp32的slope +- 输入query、key、value的input_layout必须一致。 +- 输入query的N和key/value的N 必须成比例关系,即Nq/Nkv必须是非0整数,Nq取值范围1~256。当Nq/Nkv > 1时,即为GQA,当Nkv=1时,即为MQA。 +- 输入key/value的shape必须一致。 +- 输入query、key、value的S:sequence length,取值范围1~1M。 +- 输入query、key、value的D:head dim,取值范围1~512。 +- sparse_mode为1、2、3、4、5、6、7、8时,应传入对应正确的atten_mask,否则将导致计算结果错误。当atten_mask输入为None时,sparse_mode,pre_tokens,next_tokens参数不生效,固定为全计算。 +- sparse_mode配置为1、2、3、5、6时,用户配置的pre_tokens、next_tokens不会生效。 +- sparse_mode配置为0、4时,须保证atten_mask与pre_tokens、next_tokens的范围一致。 +- keep_prob的取值范围为(0, 1]。 +- varlen场景不支持非压缩prefix,即不支持sparse_mode=5;当Sq>Skv时,prefix的N值取值范围[0, Skv],当Sq<=Skv时,prefix的N值取值范围[Skv-Sq, Skv]。 +- sparse_mode=7或者8时,不支持可选输入pse。 +- varlen场景: + atten_mask输入不支持补pad,即atten_mask中不能存在某一行全1的场景。 + pse仅支持alibi场景,即shape为BNHSkv(H=1024)、1NHSkv(H=1024)两种场景。 +- 支持actual_seq_qlen中某个Batch上的S长度为0;如果存在S为0的情况,不支持pse输入, 假设真实的S长度为[2,2,0,2,2],则传入的actual_seq_qlen为[2,4,4,6,8];不支持某个batch中Sq不为0,但是Skv为0的场景。 + +## flash_attention_v2 类的调用方式 + +```python +import os +import torch +import torch_npu +import numpy as np +import math +from mindspeed.ops.fusion_attention_v2 import npu_fusion_attention + +batch_size, head_num, seq_size, head_dim = 1, 32, 2048, 128 +input_layout = "BNSD" +keep_prob = 1.0 +pre_tokens = 65536 +next_tokens = 1 +scale_value = 0.088 +q_scale = 1.0 + +input_shape = (batch_size, head_num, seq_size, head_dim) +softmax_shape = (batch_size, head_num, seq_size) +mask_shape = (1, 1, seq_size, seq_size) +dtype = torch.float16 +dx = (torch.rand(input_shape).to(dtype) - 0.5) +query = (torch.rand(input_shape).to(dtype) - 0.5) * 5 +key = (torch.rand(input_shape).to(dtype) - 0.5) * 5 +value = (torch.rand(input_shape).to(dtype) - 0.5) * 5 +atten_mask = np.tri(seq_size, k=-next_tokens) + np.tri(seq_size, k=-pre_tokens).transpose() +alibi_mask = torch.zeros((batch_size, head_num, seq_size, seq_size)).to(dtype) +atten_mask = torch.from_numpy(atten_mask).to(torch.bool) + +query.requires_grad = True +key.requires_grad = True +value.requires_grad = True + +#正向接口案例 +result = npu_fusion_attention(query.npu(), key.npu(), value.npu(), head_num, input_layout, atten_mask=atten_mask.npu(), scale=scale_value, keep_prob=keep_prob, pre_tokens=pre_tokens, next_tokens=next_tokens, pse_type=1) + +#反向接口案例 +result[0].backward(dx.npu()) +``` diff --git a/model/train/yoco_moe/docs/ops/gmm.md b/model/train/yoco_moe/docs/ops/gmm.md new file mode 100644 index 000000000..3a9597e9e --- /dev/null +++ b/model/train/yoco_moe/docs/ops/gmm.md @@ -0,0 +1,112 @@ +# gmm对外接口 + +npu_gmm(x, weight, *, bias=None, group_list=None, group_type=0, gemm_fusion=False, original_weight=None) + +npu_gmm_v2(x, weight, *, bias=None, group_list=None, group_type=0, gemm_fusion=False, original_weight=None) + +[npu_gmm_v2]相较于[npu_gmm]接口, group_list的含义不同, 在npu_gmm接口中group_list中数值为分组轴大小的cumsum结果(累积和),npu_gmm_v2接口中group_list中数值为分组轴上每组大小。两个接口的算子性能无差异,使用时可以根据整网中group_list的情况决定,如果前序算子输出的group_list为各group的大小,建议使用npu_gmm_v2接口,因为此时使用npu_gmm接口需要先调用torch.cumsum将group_list转为累积和的形式,带来额外开销。 + +## 前向接口: +输入: +- x:必选输入,为tensor,数据类型float16, bfloat16, float32 +- weight:必选输入,为tensor,数据类型float16, bfloat16, float32 +- bias:可选输入,为tensor,数据类型float16, float32, 默认值为none。训练场景下,仅支持bias为none +- group_list:可选输入,数据类型list[int64], tensor,默认值为none。不同接口中的数值定义不同,具体如上。 +- group_type:可选输入,数据类型int64,代表需要分组的轴,如矩阵乘为C[m,n]=A[m,k]xB[k,n],则groupType取值-1:不分组,0:m轴分组,1:n轴分组,2:k轴分组,默认值为0。 +- gemm_fusion:可选输入,为bool,数据类型True,False,用于反向累加梯度的时候使能GMM+ADD融合算子,默认值为False。 +- original_weight:可选输入,为tensor,数据类型float16, bfloat16, float32,用于获取view之前的weight的main_grad用于GMM+ADD中梯度累加功能,默认值为None。 + +输出: +- y:必选输出,数据类型float16, bfloat16, float32 + +约束与限制: +- npu_gmm接口中,group_list必须为非负单调非递减数列,且长度不能为1 +- npu_gmm_v2接口中,group_list必须为非负数列,长度不能为1,且数据类型仅支持tensor +- 不同group_type支持场景: + | group_type | 场景限制 | + | :---: | :---: | + | 0 | 1. weight中tensor需为3维,x,y中tensor需为2维
2. 必须传group_list,如果调用npu_gmm接口,则最后一个值与x中tensor的第一维相等,如果调用npu_gmm_v2接口,则数值的总和与x中tensor的第一维相等 | + | 2 | 1. x,weight中tensor需为2维,y中tensor需为2维
2. 必须传group_list,如果调用npu_gmm接口,则最后一个值与x中tensor的第一维相等,如果调用npu_gmm_v2接口,则数值的总和与x中tensor的第一维相等 | +- group_type不支持group_type=1的场景,其中昇腾310系列处理器支持转置的场景为group_type为0,x为单tensor,weight为单tensor,y为单tensor。 +- x和weight中每一组tensor的最后一维大小都应小于65536.$x_i$的最后一维指当属性transpose_x为false时$x_i$的K轴或当transpose_x为true时$x_i$的M轴。$weight_i$的最后一维指当属性transpose_weight为false时$weight_i$的N轴或当transpose_weight为true时$weight_i$的K轴。 +- x和weight中每一组tensor的每一维大小在32字节对齐后都应小于int32的最大值2147483647。 + +## 反向接口 +输入: +- grad:必选输入,为tensor,数据类型float16, bfloat16, float32 +- x:必选输入,为tensor,数据类型float16, bfloat16, float32 +- weight:必选输入,为tensor,数据类型float16, bfloat16, float32 +- group_list:可选输入,数据类型list[int64]、tensor,默认值为none。数据来自正向输入 + +输出: +- grad_x:必选输出,数据类型float16, bfloat16, float32 +- grad_weight:必选输出,数据类型float16, bfloat16, float32 +- grad_bias:当前不支持,默认为none + +## gmm 类的调用方式 + +```python +import os +import torch +import torch_npu +import numpy as np +import math +from mindspeed.ops import gmm + +num_expert, seq_len, hidden_dim = 8, 32, 256 +group_list = [1, 3, 6, 10, 15, 21, 28, 32] +group_type = 0 + +x_shape = (seq_len, hidden_dim) +weight_shape = (num_expert, hidden_dim, seq_len) +dtype = torch.float16 +x = (torch.rand(x_shape).to(dtype) - 0.5) +weight = (torch.rand(weight_shape).to(dtype) - 0.5) + +# 正向接口案例 +x.requires_grad = True +weight.requires_grad = True +result = gmm.npu_gmm(x.npu(), weight.npu(), bias=None, group_list=group_list, group_type=group_type) + +# 反向接口案例 +result.backward(torch.ones(result.shape).npu()) + +# weight转置案例 +weight_shape_trans = (num_expert, seq_len, hidden_dim) +weight_trans = (torch.rand(weight_shape_trans).to(dtype) - 0.5) +weight_trans.requires_grad = True +result = gmm.npu_gmm(x.npu(), weight_trans.transpose(-1,-2).npu(), bias=None, group_list=group_list, group_type=group_type) +``` + +```python +import os +import torch +import torch_npu +import numpy as np +import math +from mindspeed.ops import gmm + +num_expert, seq_len, hidden_dim = 8, 32, 256 +group_list = torch.tensor([1, 3, 3, 4, 5, 6, 7, 4]) +group_type = 0 + +x_shape = (seq_len, hidden_dim) +weight_shape = (num_expert, hidden_dim, seq_len) +dtype = torch.float16 +x = (torch.rand(x_shape).to(dtype) - 0.5) +weight = (torch.rand(weight_shape).to(dtype) - 0.5) + +# 正向接口案例 +x.requires_grad = True +weight.requires_grad = True +result = gmm.npu_gmm_v2(x.npu(), weight.npu(), bias=None, group_list=group_list.npu(), group_type=group_type) + +# 反向接口案例 +result.backward(torch.ones(result.shape).npu()) + +# weight转置案例 +weight_shape_trans = (num_expert, seq_len, hidden_dim) +weight_trans = (torch.rand(weight_shape_trans).to(dtype) - 0.5) +weight_trans.requires_grad = True +result = gmm.npu_gmm_v2(x.npu(), weight_trans.transpose(-1,-2).npu(), bias=None, group_list=group_list.npu(), group_type=group_type) +``` \ No newline at end of file diff --git a/model/train/yoco_moe/docs/ops/lcal_coc.md b/model/train/yoco_moe/docs/ops/lcal_coc.md new file mode 100644 index 000000000..eee78e7d3 --- /dev/null +++ b/model/train/yoco_moe/docs/ops/lcal_coc.md @@ -0,0 +1,395 @@ +# LCAL_COC对外接口 + +## MATMUL_ALL_REDUCE接口 + +```python +from mindspeed.ops.lcal_functional import coc_ops + +coc_ops.matmul_all_reduce(input1, input2, output, bias) +``` + +### 接口功能 + +该接口对输入的左右矩阵进行Matmul操作,并对其结果进行All-Reduce通信,最后加上bias(如果bias不为None)。将最终结果赋值到output内存区域中。 + +### 接口输入输出 + +假设Matmul操作对应的shape为[m, k]和[k, n]: + +接口输入: +- input1:左矩阵(必选输入,数据类型float16/bfloat16,shape只支持二维,不支持转置,\[m,k\]); +- input2:右矩阵(必选输入,数据类型float16/bfloat16,shape只支持二维,支持转置,\[k,n\]/\[n,k\]); +- output:输出矩阵,需要提前申请内存作为接口的输入(必选输入,数据类型float16/bfloat16,shape只支持二维,\[m,n\]); +- bias:偏置向量(可选输入,数据类型float16/bfloat16,shape支持\[1, n\]); + +接口输出: +- 无 + +### 使用案例 + +```python +import torch +import torch_npu +import torch.multiprocessing as mp +import torch.distributed as dist +from torch_npu.contrib import transfer_to_npu +import megatron.core.parallel_state as ps + + +def initialize_model_parallel( + tensor_model_parallel_size=1, + pipeline_model_parallel_size=1, + virtual_pipeline_model_parallel_size=None, + pipeline_model_parallel_split_rank=None, + context_parallel_size=1, +): + ps.destroy_model_parallel() + ps.initialize_model_parallel( + tensor_model_parallel_size=tensor_model_parallel_size, + pipeline_model_parallel_size=pipeline_model_parallel_size, + virtual_pipeline_model_parallel_size=virtual_pipeline_model_parallel_size, + pipeline_model_parallel_split_rank=pipeline_model_parallel_split_rank, + context_parallel_size=context_parallel_size, + ) + + +def test_coc_matmul_all_reduce(rank, world_size, master_ip, master_port): + torch_npu.npu.set_device(rank) + init_method = 'tcp://' + master_ip + ':' + master_port + dist.init_process_group(backend='hccl', rank=rank, world_size=world_size, init_method=init_method) + initialize_model_parallel(world_size) + from mindspeed.ops.lcal_functional import coc_ops + + m, k, n = 2048, 4096, 8192 + dtype = torch.float16 + input1 = torch.rand(m, k, dtype=dtype, device=torch.npu.current_device()) + input2 = torch.rand(k, n, dtype=dtype, device=torch.npu.current_device()) + bias = torch.rand(1, n, dtype=dtype, device=torch.npu.current_device()) + output = torch.zeros(m, n, dtype=dtype, device=torch.npu.current_device()) + coc_ops.matmul_all_reduce(input1, input2, output, bias) + torch.npu.synchronize() + print(output) + +if __name__ == "__main__": + world_size = 8 + master_ip = "127.0.0.1" + master_port = "50001" + mp.spawn(test_coc_matmul_all_reduce, args=(world_size, master_ip, master_port), nprocs=world_size) +``` + + +## ALL_GATHER_MATMUL接口 + +```python +from mindspeed.ops.lcal_functional import coc_ops + +coc_ops.all_gather_matmul(input1, input2, output, bias) +```` + +### 接口功能 + +该接口对输入的左矩阵进行All-Gather操作,然后将其与右矩阵做Matmul操作,最后加上bias(如果bias不为None)。将最终结果赋值到output内存区域中。 + +### 接口输入输出 + +假设Matmul操作对应的shape为[m, k]和[k, n](m必须为world_size的倍数): + +接口输入: +- input1:左矩阵(必选输入,数据类型float16/bfloat16,shape只支持二维,不支持转置,\[m // world_size,k\]); +- input2:右矩阵(必选输入,数据类型float16/bfloat16,shape只支持二维,支持转置,\[k,n\]/\[n,k\]); +- output:输出矩阵,需要提前申请内存作为接口的输入(必选输入,数据类型float16/bfloat16,shape只支持二维,\[m,n\]); +- bias:偏置向量(可选输入,数据类型float16/bfloat16,shape支持\[1, n\]); + +接口输出: +- 无 + +### 使用案例 + +```python +import torch +import torch_npu +import torch.multiprocessing as mp +import torch.distributed as dist +from torch_npu.contrib import transfer_to_npu +import megatron.core.parallel_state as ps + + +def initialize_model_parallel( + tensor_model_parallel_size=1, + pipeline_model_parallel_size=1, + virtual_pipeline_model_parallel_size=None, + pipeline_model_parallel_split_rank=None, + context_parallel_size=1, +): + ps.destroy_model_parallel() + ps.initialize_model_parallel( + tensor_model_parallel_size=tensor_model_parallel_size, + pipeline_model_parallel_size=pipeline_model_parallel_size, + virtual_pipeline_model_parallel_size=virtual_pipeline_model_parallel_size, + pipeline_model_parallel_split_rank=pipeline_model_parallel_split_rank, + context_parallel_size=context_parallel_size, + ) + + +def test_coc_all_gather_matmul(rank, world_size, master_ip, master_port): + torch_npu.npu.set_device(rank) + init_method = 'tcp://' + master_ip + ':' + master_port + dist.init_process_group(backend='hccl', rank=rank, world_size=world_size, init_method=init_method) + initialize_model_parallel(world_size) + from mindspeed.ops.lcal_functional import coc_ops + + m, k, n = 2048, 4096, 8192 + dtype = torch.float16 + input1 = torch.rand(m // world_size, k, dtype=dtype, device=torch.npu.current_device()) + input2 = torch.rand(k, n, dtype=dtype, device=torch.npu.current_device()) + bias = torch.rand(1, n, dtype=dtype, device=torch.npu.current_device()) + output = torch.zeros(m, n, dtype=dtype, device=torch.npu.current_device()) + coc_ops.all_gather_matmul(input1, input2, output, bias) + torch.npu.synchronize() + print(output) + + +if __name__ == "__main__": + world_size = 8 + master_ip = "127.0.0.1" + master_port = "50001" + mp.spawn(test_coc_all_gather_matmul, args=(world_size, master_ip, master_port), nprocs=world_size) +``` + + +## ALL_GATHER_MATMUL_V2接口 + +```python +from mindspeed.ops.lcal_functional import coc_ops + +coc_ops.all_gather_matmul_v2(input1, input2, output, comm_output, bias) +``` + +### 接口功能 + +该接口对输入的左矩阵进行All-Gather操作,然后将其与右矩阵做Matmul操作,最后加上bias(如果bias不为None)。将最终结果赋值到output内存区域中,并将左矩阵进行All-Gather操作后得到的结果赋值到comm_output内存区域中。 + +### 接口输入输出 + +假设Matmul操作对应的shape为[m, k]和[k, n](m必须为world_size的倍数): + +接口输入: +- input1:左矩阵(必选输入,数据类型float16/bfloat16,shape只支持二维,不支持转置,\[m // world_size,k\]); +- input2:右矩阵(必选输入,数据类型float16/bfloat16,shape只支持二维,支持转置,\[k,n\]/\[n,k\]); +- output:输出矩阵,需要提前申请内存作为接口的输入(必选输入,数据类型float16/bfloat16,shape只支持二维,\[m,n\]); +- comm_output:输出矩阵,需要提前申请内存作为接口的输入(必选输入,数据类型float16/bfloat16,shape只支持二维,\[m,k\]); +- bias:偏置向量(可选输入,数据类型float16/bfloat16,shape支持\[1, n\]); + +接口输出: +- 无 + +### 使用案例 + +```python +import torch +import torch_npu +import torch.multiprocessing as mp +import torch.distributed as dist +from torch_npu.contrib import transfer_to_npu +import megatron.core.parallel_state as ps + + +def initialize_model_parallel( + tensor_model_parallel_size=1, + pipeline_model_parallel_size=1, + virtual_pipeline_model_parallel_size=None, + pipeline_model_parallel_split_rank=None, + context_parallel_size=1, +): + ps.destroy_model_parallel() + ps.initialize_model_parallel( + tensor_model_parallel_size=tensor_model_parallel_size, + pipeline_model_parallel_size=pipeline_model_parallel_size, + virtual_pipeline_model_parallel_size=virtual_pipeline_model_parallel_size, + pipeline_model_parallel_split_rank=pipeline_model_parallel_split_rank, + context_parallel_size=context_parallel_size, + ) + + +def test_coc_all_gather_matmul_v2(rank, world_size, master_ip, master_port): + torch_npu.npu.set_device(rank) + init_method = 'tcp://' + master_ip + ':' + master_port + dist.init_process_group(backend='hccl', rank=rank, world_size=world_size, init_method=init_method) + initialize_model_parallel(world_size) + from mindspeed.ops.lcal_functional import coc_ops + + m, k, n = 2048, 4096, 8192 + dtype = torch.float16 + input1 = torch.rand(m // world_size, k, dtype=dtype, device=torch.npu.current_device()) + input2 = torch.rand(k, n, dtype=dtype, device=torch.npu.current_device()) + bias = torch.rand(1, n, dtype=dtype, device=torch.npu.current_device()) + output = torch.zeros(m, n, dtype=dtype, device=torch.npu.current_device()) + comm_output = torch.zeros(m, k, dtype=dtype, device=torch.npu.current_device()) + coc_ops.all_gather_matmul_v2(input1, input2, output, comm_output, bias) + torch.npu.synchronize() + print(output) + + +if __name__ == "__main__": + world_size = 8 + master_ip = "127.0.0.1" + master_port = "50001" + mp.spawn(test_coc_all_gather_matmul_v2, args=(world_size, master_ip, master_port), nprocs=world_size) +``` + +## MATMUL_REDUCE_SCATTER接口 + +```python +from mindspeed.ops.lcal_functional import coc_ops + +coc_ops.matmul_reduce_scatter(input1, input2, output, bias) +```` + +### 接口功能 + +该接口对输入的左右矩阵进行Matmul操作,并对其结果进行Reduce-Scatter通信,最后加上bias(如果bias不为None)。将最终结果赋值到output内存区域中。 + +### 接口输入输出 + +假设Matmul操作对应的shape为[m, k]和[k, n](m必须为world_size的倍数): + +接口输入: +- input1:左矩阵(必选输入,数据类型float16/bfloat16,shape只支持二维,不支持转置,\[m,k\]); +- input2:右矩阵(必选输入,数据类型float16/bfloat16,shape只支持二维,支持转置,\[k,n\]/\[n,k\]); +- output:输出矩阵,需要提前申请内存作为接口的输入(必选输入,数据类型float16/bfloat16,shape只支持二维,\[m // world_size,n\]); +- bias:偏置向量(可选输入,数据类型float16/bfloat16,shape支持\[1, n\]); + +接口输出: +- 无 + +### 使用方法 + +```python +import torch +import torch_npu +import torch.multiprocessing as mp +import torch.distributed as dist +from torch_npu.contrib import transfer_to_npu +import megatron.core.parallel_state as ps + + +def initialize_model_parallel( + tensor_model_parallel_size=1, + pipeline_model_parallel_size=1, + virtual_pipeline_model_parallel_size=None, + pipeline_model_parallel_split_rank=None, + context_parallel_size=1, +): + ps.destroy_model_parallel() + ps.initialize_model_parallel( + tensor_model_parallel_size=tensor_model_parallel_size, + pipeline_model_parallel_size=pipeline_model_parallel_size, + virtual_pipeline_model_parallel_size=virtual_pipeline_model_parallel_size, + pipeline_model_parallel_split_rank=pipeline_model_parallel_split_rank, + context_parallel_size=context_parallel_size, + ) + + +def test_coc_matmul_reduce_scatter(rank, world_size, master_ip, master_port): + torch_npu.npu.set_device(rank) + init_method = 'tcp://' + master_ip + ':' + master_port + dist.init_process_group(backend='hccl', rank=rank, world_size=world_size, init_method=init_method) + initialize_model_parallel(world_size) + from mindspeed.ops.lcal_functional import coc_ops + + m, k, n = 2048, 4096, 8192 + dtype = torch.float16 + input1 = torch.rand(m, k, dtype=dtype, device=torch.npu.current_device()) + input2 = torch.rand(k, n, dtype=dtype, device=torch.npu.current_device()) + bias = torch.rand(1, n, dtype=dtype, device=torch.npu.current_device()) + output = torch.zeros(m // world_size, n, dtype=dtype, device=torch.npu.current_device()) + coc_ops.matmul_reduce_scatter(input1, input2, output, bias) + torch.npu.synchronize() + print(output) + + +if __name__ == "__main__": + world_size = 8 + master_ip = "127.0.0.1" + master_port = "50001" + mp.spawn(test_coc_matmul_reduce_scatter, args=(world_size, master_ip, master_port), nprocs=world_size) +``` + + +## PURE_MATMUL接口 + +```python +from mindspeed.ops.lcal_functional import coc_ops + +coc_ops.pure_matmul(input1, input2, output, bias) +```` + +### 接口功能 + +该接口对输入的左右矩阵进行Lcal Matmul操作,最后加上bias(如果bias不为None)。将最终结果赋值到output内存区域中。 + +### 接口输入输出 + +假设Matmul操作对应的shape为[m, k]和[k, n]: + +接口输入: +- input1:左矩阵(必选输入,数据类型float16/bfloat16,shape只支持二维,不支持转置,\[m,k\]); +- input2:右矩阵(必选输入,数据类型float16/bfloat16,shape只支持二维,支持转置,\[k,n\]/\[n,k\]); +- output:输出矩阵,需要提前申请内存作为接口的输入(必选输入,数据类型float16/bfloat16,shape只支持二维,\[m,n\]); +- bias:偏置向量(可选输入,数据类型float16/bfloat16,shape支持\[1, n\]); + +接口输出: +- 无 + +### 使用方法 + +```python +import torch +import torch_npu +import torch.multiprocessing as mp +import torch.distributed as dist +from torch_npu.contrib import transfer_to_npu +import megatron.core.parallel_state as ps + + +def initialize_model_parallel( + tensor_model_parallel_size=1, + pipeline_model_parallel_size=1, + virtual_pipeline_model_parallel_size=None, + pipeline_model_parallel_split_rank=None, + context_parallel_size=1, +): + ps.destroy_model_parallel() + ps.initialize_model_parallel( + tensor_model_parallel_size=tensor_model_parallel_size, + pipeline_model_parallel_size=pipeline_model_parallel_size, + virtual_pipeline_model_parallel_size=virtual_pipeline_model_parallel_size, + pipeline_model_parallel_split_rank=pipeline_model_parallel_split_rank, + context_parallel_size=context_parallel_size, + ) + + +def test_coc_pure_matmul(rank, world_size, master_ip, master_port): + torch_npu.npu.set_device(rank) + init_method = 'tcp://' + master_ip + ':' + master_port + dist.init_process_group(backend='hccl', rank=rank, world_size=world_size, init_method=init_method) + initialize_model_parallel(world_size) + from mindspeed.ops.lcal_functional import coc_ops + + m, k, n = 2048, 4096, 8192 + dtype = torch.float16 + input1 = torch.rand(m, k, dtype=dtype, device=torch.npu.current_device()) + input2 = torch.rand(k, n, dtype=dtype, device=torch.npu.current_device()) + bias = torch.rand(1, n, dtype=dtype, device=torch.npu.current_device()) + output = torch.zeros(m, n, dtype=dtype, device=torch.npu.current_device()) + coc_ops.pure_matmul(input1, input2, output, bias) + torch.npu.synchronize() + print(output) + + +if __name__ == "__main__": + world_size = 8 + master_ip = "127.0.0.1" + master_port = "50001" + mp.spawn(test_coc_pure_matmul, args=(world_size, master_ip, master_port), nprocs=world_size) +``` diff --git a/model/train/yoco_moe/docs/ops/npu_all_to_all_all_gather_bmm.md b/model/train/yoco_moe/docs/ops/npu_all_to_all_all_gather_bmm.md new file mode 100644 index 000000000..da9a330e4 --- /dev/null +++ b/model/train/yoco_moe/docs/ops/npu_all_to_all_all_gather_bmm.md @@ -0,0 +1,190 @@ +# npu_alltoall_allgather_bmm对外接口 +``` +def npu_alltoall_allgather_bmm( + x: Tensor, + weight: Tensor, + group_ep: str, + group_ep_worldsize: int, + group_tp: str, + group_tp_worldsize: int, + *, + bias: Optional[Tensor] = None, + shard_type: Optional[int] = 0, + act_type: Optional[str] = "None", + need_allgather_out: Optional[bool] = False, + need_activation_feature: Optional[bool] = False +) -> (Tensor, Tensor, Tensor): + +``` + +计算逻辑: +bmm指BatchMatMul,AllToAllAllGahterBatchMatMul算子是实现AllToAll、AllGather集合通信与BatchMatMul计算并行的算子。 +大体计算流程为:AllToAll集合通信-->AllGather集合通信-->BatchMatMul-->激活(可选,可以没有) + +计算逻辑如下,其中y1Out y2OutOptional y3OutOptional为输出,x weight bias为输入,activating为激活函数(由act_type决定,当act_type为None时,表示不调用激活函数) +$$ + alltoallOut = AllToAll(x) +$$ +$$ + y2OutOptional = AllGather(alltoallOut) +$$ +$$ + y3OutOptional = BatchMatMul(y2OutOptional, weight, bias) +$$ +$$ + y1Out = activating(y3OutOptional) +$$ + +## 输入输出及属性说明: +输入: +- x:必选输入,Tensor,数据类型支持float16,bfloat16。该输入进行AllToAll、AllGather集合通信,必须为3维,数据格式支持ND,通信后结果作为BatchMatMul计算的左矩阵。 +- weight:必选输入,Tensor,数据类型支持float16, bfloat16,类型需与x保持一致,必须为3维,数据格式支持ND, BatchMatMul计算的右矩阵。 +- bias:可选输入,Tensor,数据类型支持float16, float32。x为float16时,bias需为float16;x为bfloat16时,bias需为float32,必须为两维或三维,数据格式支持ND。BatchMatMul计算的bias。 + +输出: +- y1Out:Tensor,数据类型支持float16, bfloat16,仅支持3维。最终计算结果,如果有激活函数则为激活函数的输出,否则为BatchMatMul的输出。数据类型与输入x保持一致。 +- y2OutOptional:Tensor,可选输出,数据类型支持float16, bfloat16,仅支持3维。AllGather的输出,数据类型与输入x保持一致。反向可能需要。 +- y3OutOptional:Tensor,可选输出,数据类型支持float16, bfloat16,仅支持3维。有激活函数时,BatchMatMul的输出,类型与输入x保持一致。 + +属性: +- group_ep:必选属性,str。ep通信域名称,专家并行的通信域。 +- group_ep_worldsize:必选属性,int。ep通信域size,支持2/4/8/16/32。 +- group_tp:必选属性,str。tp通信域名称,Tensor并行的通信域。 +- group_tp_worldsize:必选属性,int。tp通信域size,支持2/4/8/16/32。 +- shard_type:可选属性,int,默认值为0,0表示在H维度按tp域进行allgather,1表示在C维度上按tp域进行allgather。 +- act_type:可选属性,str,激活函数类型,默认值为None,表示无激活函数。支持GELU/Silu/FastGELU/Relu/None等。 +- need_allgather_out:是否需要输出allgather后的结果,默认False,表示不需要输出。 +- need_activation_feature:是否需要输出执行激活函数前的结果(BatchMatMul后),默认False,表示不需要输出。仅在act_type不为None的时候有意义。 + + +## 输入shape限制 +因为集合通信及BatchMatMul计算所需,输入输出shape需满足以下数学关系:(其中ep=group_ep_worldsize,tp=group_tp_worldsize) +按H轴进行AllGather场景,shard_type为0时: +- x: (E, C, H/tp) +- weight:(E/ep, H, M/tp) +- bias:支持两维或三维,三维时shape为:(E/ep, 1, M/tp),两维时shape为:(E/ep, M/tp) +- y1Out:(E/ep, ep\*C, M/tp) +- y2OutOptional:(E/ep, ep\*C, H) +- y3OutOptional:(E/ep, ep\*C, M/tp) +按C轴进行AllGather场景,shard_type为1时: +- x: (E, C/tp, H); +- weight:(E/ep, H, M/tp); +- bias:支持两维或三维,三维时shape为:(E/ep, 1, M/tp),两维时shape为:(E/ep, M/tp) +- y1Out:(E/ep, ep\*tp\*C/tp, M/tp); +- y2OutOptional:(E/ep, ep\*tp\*C/tp, H); +- y3OutOptional:(E/ep, ep\*tp\*C/tp, M/tp) + +数据关系说明: +- 比如x.size(0)等于E,weight.size(0)等于E/ep,则表示,x.size(0) = ep\*weight.size(0),x.size(0)是ep的整数倍;其他关系类似 +- E的取值范围为[2, 512],且E是ep的整数倍; +- H的取值范围为:[1, 65535],当shard_type为0时,H需为tp的整数倍; +- M/tp的取值为:[1, 65535]; +- E/ep的取值范围为:[1, 32]; +- ep、tp均仅支持2、4、8、16、32; +- group_ep和group_tp名称不能相同; +- C大于0,上限为算子device内存上限,当shard_type为1时,C需为tp的整数倍; +- 不支持跨超节点,只支持超节点内。 + +## npu_alltoall_allgather_bmm 的调用示例 +在终端调用命令如下: +``` +python3 -m torch.distributed.launch --nproc_per_node 8 --master_addr 127.0.0.1 --master_port 29500 demo_test.py +``` +注:master_addr和master_port参数需用户根据实际情况设置 + +demo_test.py的示例代码如下: +```python +import os +import pytest +import torch +import torch.distributed as dist +from torch.distributed.distributed_c10d import _get_default_group, ReduceOp +import torch_npu +from mindspeed.ops.npu_all_to_all_all_gather_bmm import npu_alltoall_allgather_bmm + +world_size = 8 +ep_size = 4 +tp_size = 2 +def setup_ep_tp(rank, tp_size, ep_size, backend_type): + # 初始化EP域 + print("device %d initialize ep group" % rank, flush=True) + for i in range(tp_size): + ep_ranks = [x + ep_size * i for x in range(ep_size)] + ep_group = dist.new_group(backend=backend_type, ranks=ep_ranks) + if rank in ep_ranks: + ep_group_tmp = ep_group + print("device %d initialize tp group" % rank, flush=True) + for i in range(ep_size): + tp_ranks = [x * ep_size + i for x in range(tp_size)] + tp_group = dist.new_group(backend=backend_type, ranks=tp_ranks) + if rank in tp_ranks: + tp_group_tmp = tp_group + return ep_group_tmp, tp_group_tmp + +def get_ep_tp_hcomm_info(rank, ep_size, tp_size): + ep_group, tp_group = setup_ep_tp(rank, tp_size, ep_size, "hccl") + if torch.__version__ > '2.0.1': + ep_hcomm_info = ep_group._get_backend(torch.device("npu")).get_hccl_comm_name(rank) + tp_hcomm_info = tp_group._get_backend(torch.device("npu")).get_hccl_comm_name(rank) + else: + ep_hcomm_info = ep_group.get_hccl_comm_name(rank) + tp_hcomm_info = tp_group.get_hccl_comm_name(rank) + return ep_hcomm_info, tp_hcomm_info + +if __name__ == '__main__': + dtype = torch.float16 + x_shard_type = 1 + out_y2_flag = True + out_y3_flag = False + act_type = "None" + transpose_weight = False + rank = int(os.environ["LOCAL_RANK"]) + torch_npu.npu.set_device(rank) + dist.init_process_group(backend="hccl", rank=rank, world_size=world_size) + ep_group, tp_group = get_ep_tp_hcomm_info(rank, ep_size, tp_size) + print(f'current device: {torch_npu.npu.current_device()}, local rank = {rank}, hcomm_info = {ep_group}, {tp_group}') + E, C, H, M = 4, 1024, 1024, 8192 + if x_shard_type == 0: + x_shape = (E, C, H / tp_size) + elif x_shard_type == 1: + x_shape = (E, C / tp_size, H) + else: + x_shape = (E / ep_size, tp_size * ep_size * C, M / tp_size) + weight_shape = (E / ep_size, H, M / tp_size) + if transpose_weight == True: + weight_shape = (E / ep_size, M / tp_size, H) + bias_shape = (E / ep_size, 1, M / tp_size) + + x_shape = tuple(int(item) for item in x_shape) + weight_shape = tuple(int(item) for item in weight_shape) + bias_shape = tuple(int(item) for item in bias_shape) + x = torch.rand(x_shape) + weight = torch.rand(weight_shape) + bias = torch.rand(bias_shape) + x_npu = x.npu().to(dtype) + weight_npu = weight.npu().to(dtype) + if transpose_weight == True: + print(f'!!!!before transpose, weight_npu.size()={weight_npu.size()}') + weight_npu = weight_npu.transpose(1, 2) + print(f'!!!!after transpose, weight_npu.size()={weight_npu.size()}') + print(f'!!!!after transpose, weight_npu.is_contiguous()={weight_npu.is_contiguous()}') + bias_npu = bias.npu().to(dtype) + # 赋值None可以验证bias为空的场景 + bias_npu = None + + y_npu = npu_alltoall_allgather_bmm(x_npu, + weight_npu, + ep_group, + ep_size, + tp_group, + tp_size, + bias=bias_npu, + shard_type=x_shard_type, + act_type=act_type, + need_allgather_out=out_y2_flag, + need_activation_feature=out_y3_flag) + if rank == 0: + for i, y in enumerate(y_npu[0]): + y.cpu().numpy().tofile(f"./y_{i}.bin") + +``` diff --git a/model/train/yoco_moe/docs/ops/npu_apply_fused_ema_adamw.md b/model/train/yoco_moe/docs/ops/npu_apply_fused_ema_adamw.md new file mode 100644 index 000000000..9a4a329f6 --- /dev/null +++ b/model/train/yoco_moe/docs/ops/npu_apply_fused_ema_adamw.md @@ -0,0 +1,91 @@ +# npu_apply_fused_ema_adamw 对外接口 + +## 接口原型 +``` +npu_apply_fused_ema_adamw(grad, var, m, v, s, step, lr, ema_decay, beta1, beta2, eps, mode, bias_correction, weight_decay)-> var, m, v, s +``` +npu_apply_fused_ema_adamw接口用于更新fused_ema_adamw优化器中的var(模型参数), m(一阶矩动量), v(二阶矩动量), s(ema模型参数)这四个参数。
+ +```python +# 接口内部计算逻辑示例如下 +def npu_apply_fused_ema_adamw(grad, var, m, v, s, step, lr, ema_decay, + beta1, beta2, eps, mode, bias_correction, + weight_decay): + beta1_correction = 1 - torch.pow(beta1, step) * bias_correction + beta2_correction = 1 - torch.pow(beta2, step) * bias_correction + grad_ = grad + weight_decay * var * (1 - mode) + m_ = beta1 * m + (1 - beta1) * grad_ + v_ = beta2 * v + (1 - beta2) * grad_ * grad_ + next_m = m_ / beta1_correction + next_v = v_ / beta2_correction + demon = torch.pow(next_v, 0.5) + eps + update = next_m / demon + weight_decay * var * mode + var_ = var - lr * update + s_ = ema_decay * s + (1 - ema_decay) * var_ + return var_, m_, v_, s_ +``` + +## 输入: +- `grad`:必选输入,数据类型为tensor(float32),表示模型参数的梯度。接受任意shape但需保持接口调用时`grad, var, m, v, s`五个入参shape一致。 +- `var`:必选输入,数据类型为tensor(float32),表示模型参数。接受任意shape但需保持接口调用时`grad, var, m, v, s`五个入参shape一致。 +- `m`:必选输入,数据类型为tensor(float32),表示一阶矩动量。接受任意shape但需保持接口调用时`grad, var, m, v, s`五个入参shape一致。 +- `v`:必选输入,数据类型为tensor(float32),表示二阶矩动量。接受任意shape但需保持接口调用时`grad, var, m, v, s`五个入参shape一致。 +- `s`:必选输入,数据类型为tensor(float32),表示ema模型参数。接受任意shape但需保持接口调用时`grad, var, m, v, s`五个入参shape一致。 +- `step`:必选输入,数据类型为tensor(int64),shape:(1,),表示当前为第几步。 +- `lr`:可选属性,数据类型为float32,默认值:1e-3。表示学习率。 +- `ema_decay`:可选属性,数据类型为float32,默认值:0.9999。表示ema衰减超参数。 +- `beta1`:可选属性,数据类型为float32,默认值:0.9。表示一阶矩动量的衰减率。 +- `beta2`:可选属性,数据类型为float32,默认值:0.999。表示二阶矩动量的衰减率。 +- `eps`:可选属性,数据类型为float32,默认值:1e-8。表示一个极小的数。 +- `mode`:可选属性,数据类型为int,默认值:1。取1表示以adamw模式计算,取0表示以adam模式计算。 +- `bias_correction`:可选属性,数据类型为bool,默认值:True。表示是否开启偏置修正。 +- `weight_decay`:可选属性,数据类型为float32,默认值:0.0。表示模型参数的衰减率。 + +支持的输入数据类型组合: + +| 参数名称 | 数据类型 | +|-----------------|-----------------| +| grad | tensor(float32) | +| var | tensor(float32) | +| m | tensor(float32) | +| v | tensor(float32) | +| s | tensor(float32) | +| step | tensor(int64) | +| lr | float32 | +| ema_decay | float32 | +| beta1 | float32 | +| beta2 | float32 | +| eps | float32 | +| mode | int | +| bias_correction | bool | +| weight_decay | float32 | + +## 输出: +- var:必选输出,数据类型为tensor(float32),shape和入参var一致,表示更新后的模型参数。 +- m:必选输出,数据类型为tensor(float32),shape和入参m一致,表示更新后的一阶矩动量。 +- v:必选输出,数据类型为tensor(float32),shape和入参v一致,表示更新后的二阶矩动量。 +- s:必选输出,数据类型为tensor(float32),shape和入参s一致,表示更新后的ema模型参数。 + +*** +## 调用示例: + +- 输入 grad, var, m, v, s, step, lr, ema_decay, beta1, beta2, eps, mode, bias_correction, weight_decay +- 输出 var, m, v, s + +```python +import torch +import torch_npu +from mindspeed.ops.npu_apply_fused_ema_adamw import npu_apply_fused_ema_adamw + + +grad = torch.full((10, 10), 0.5).to(torch.float32).npu() +var = torch.full((10, 10), 0.5).to(torch.float32).npu() +m = torch.full((10, 10), 0.9).to(torch.float32).npu() +v = torch.full((10, 10), 0.9).to(torch.float32).npu() +s = torch.full((10, 10), 0.5).to(torch.float32).npu() +step = torch.full((1, ), 1).to(torch.int64).npu() +lr, ema_decay, beta1, beta2, eps, mode, bias_correction, weight_decay= 1e-8, 0.9999, 0.9999, 0.9999, 1e-8, 1, True, 0.001 +var, m, v, s = npu_apply_fused_ema_adamw(grad, var, m, v, s, step, lr, ema_decay, beta1, beta2, eps, mode, + bias_correction, weight_decay) + +``` \ No newline at end of file diff --git a/model/train/yoco_moe/docs/ops/npu_bmm_reduce_scatter_all_to_all.md b/model/train/yoco_moe/docs/ops/npu_bmm_reduce_scatter_all_to_all.md new file mode 100644 index 000000000..8c4e3b6d6 --- /dev/null +++ b/model/train/yoco_moe/docs/ops/npu_bmm_reduce_scatter_all_to_all.md @@ -0,0 +1,187 @@ +# npu_bmm_reducescatter_alltoall对外接口 +``` +def npu_bmm_reducescatter_alltoall(x: Tensor, + weight: Tensor, + group_ep: str, + group_ep_worldsize: int, + group_tp: str, + group_tp_worldsize: int, + *, + bias: Optional[Tensor] = None, + shard_type: Optional[int] = 0) -> Tensor: +``` + +计算逻辑: +BatchMatMulReduceScatterAllToAll是实现BatchMatMul计算与ReduceScatter、AllToAll集合通信并行的算子。 +大体计算流程为:BatchMatMul计算-->转置(shard_type等于0时需要)-->ReduceScatter集合通信-->Add-->AllToAll集合通信 + +计算逻辑如下,其中out为最终输出,x weight bias为输入 +$$ + bmmOut = BatchMatMul(x,weight) +$$ +$$ + reduceScatterOut = ReduceScatter(bmmOut) +$$ +$$ + addOut = Add(reduceScatterOut, bias) +$$ +$$ + out = AllToAll(addOut) +$$ + +## 输入输出及属性说明: +输入: +- x:必选输入,Tensor,数据类型float16,bfloat16,必须为3维。BatchMatMul计算的左矩阵。 +- weight:必选输入,Tensor,数据类型float16, bfloat16,必须为3维,类型与x保持一致。BatchMatMul计算的右矩阵。 +- bias:可选输入,Tensor,数据类型float16, float32。x为float16时,bias需为float16;x为bfloat16时,bias需为float32。支持两维或三维。BatchMatMul计算的bias。(由于要进行ReduceScatter通信,因此需要在通信之后再Add)。 + +输出: +- out:Tensor,数据类型float16, bfloat16,必须为3维。最终计算结果,类型与输入x保持一致。 + +属性: +- group_ep:必选属性,str。ep通信域名称,专家并行的通信域。 +- group_ep_worldsize:必选属性,int。ep通信域size,支持2/4/8/16/32。 +- group_tp:必选属性,str。tp通信域名称,Tensor并行的通信域。 +- group_tp_worldsize:必选属性,int。tp通信域size,支持2/4/8/16/32。 +- shard_type:可选属性,int,默认值为0。0表示输出在H维度按tp分片,1表示输出在C维度按tp分片。 + + +## 输入限制 +因为集合通信及BatchMatMul计算所需,输入输出shape需满足以下数学关系:(其中ep=group_ep_worldsize,tp=group_tp_worldsize) + +按H轴进行ReduceScatter场景,即shard_type为0场景: +- x: (E/ep, ep\*C, M/tp) +- weight:(E/ep, M/tp, H) +- bias:(E/ep, 1, H/tp) 两维时为(E/ep, H/tp) +- out:(E, C, H/tp) + +按C轴进行ReduceScatter场景,即shard_type为1场景: +- x: (E/ep, ep\*tp\*C/tp, M/tp) +- weight:(E/ep, M/tp, H) +- bias:(E/ep, 1, H) 两维时为(E/ep, H) +- out:(E, C/tp, H) + +数据关系说明: +- 比如x.size(0)等于E/tp,out.size(0)等于E,则表示,out.size(0) = ep\*x.size(0),out.size(0)是ep的整数倍;其他关系类似 +- E的取值范围为[2, 512],且E是ep的整数倍; +- H的取值范围为:[1, 65535],当shard_type为0时,H需为tp的整数倍; +- M/tp的取值范围为:[1, 65535]; +- E/ep的取值范围为:[1, 32]; +- ep、tp均仅支持2、4、8、16、32; +- group_ep和group_tp名称不能相同; +- C大于0,上限为算子device内存上限,当shard_type为1时,C需为tp的整数倍; +- 不支持跨超节点,只支持超节点内。 + +## npu_bmm_reducescatter_alltoall 类的调用示例(待验证) +在终端调用命令如下: +``` +python3 -m torch.distributed.launch --nproc_per_node 8 --master_addr 127.0.0.1 --master_port 29500 demo_test.py +``` +注:master_addr和master_port参数需用户根据实际情况设置,8表示ep_size*tp_size,按实际修改 + +demo_test.py的示例代码如下: +```python +import os +import pytest +import torch +import torch.distributed as dist +from torch.distributed.distributed_c10d import _get_default_group, ReduceOp +import torch_npu +from mindspeed.ops.npu_bmm_reduce_scatter_all_to_all import npu_bmm_reducescatter_alltoall + +world_size = 8 +ep_size = 4 +tp_size = 2 +def get_hcomm_info(n, i): + default_pg = _get_default_group() + if torch.__version__ > '2.0.1': + hcomm_info = default_pg._get_backend(torch.device('npu')).get_hccl_comm_name(i) + else: + hcomm_info = default_pg.get_hccl_comm_name(i) + return hcomm_info + +def setup_ep_tp(rank, tp_size, ep_size, backend_type): + # 初始化EP域 + print("device %d initialize ep group" % rank, flush=True) + for i in range(tp_size): + ep_ranks = [x + ep_size * i for x in range(ep_size)] + ep_group = dist.new_group(backend=backend_type, ranks=ep_ranks) + if rank in ep_ranks: + ep_group_tmp = ep_group + print("device %d initialize tp group" % rank, flush=True) + for i in range(ep_size): + tp_ranks = [x * ep_size + i for x in range(tp_size)] + tp_group = dist.new_group(backend=backend_type, ranks=tp_ranks) + if rank in tp_ranks: + tp_group_tmp = tp_group + return ep_group_tmp, tp_group_tmp + +def get_ep_tp_hcomm_info(rank, ep_size, tp_size): + ep_group, tp_group = setup_ep_tp(rank, tp_size, ep_size, "hccl") + if torch.__version__ > '2.0.1': + ep_hcomm_info = ep_group._get_backend(torch.device("npu")).get_hccl_comm_name(rank) + tp_hcomm_info = tp_group._get_backend(torch.device("npu")).get_hccl_comm_name(rank) + else: + ep_hcomm_info = ep_group.get_hccl_comm_name(rank) + tp_hcomm_info = tp_group.get_hccl_comm_name(rank) + return ep_hcomm_info, tp_hcomm_info + +def test_npu_bmm_reducescatter_alltoall(dtype, y_shard_type, transpose_weight): + rank = int(os.environ["LOCAL_RANK"]) + torch_npu.npu.set_device(rank) + dist.init_process_group(backend="hccl", rank=rank, world_size=world_size) + ep_group, tp_group = get_ep_tp_hcomm_info(rank, ep_size, tp_size) + hcomm_info = get_hcomm_info(world_size, rank) + print(f'current device: {torch_npu.npu.current_device()}, local rank = {rank}, hcomm_info = {ep_group}, {tp_group}') + E, C, H, M = 4, 1024, 1024, 8192 + if y_shard_type == 0: + x_shape = (E / ep_size, ep_size * C, M / tp_size) + bias_shape = (E / ep_size, 1, H / tp_size) + else: + x_shape = (E / ep_size, tp_size * ep_size * C, M / tp_size) + bias_shape = (E / ep_size, 1, H) + weight_shape = (E / ep_size, M / tp_size, H) + if transpose_weight == True: + weight_shape = (E / ep_size, H, M / tp_size) + + x_shape = tuple(int(item) for item in x_shape) + weight_shape = tuple(int(item) for item in weight_shape) + bias_shape = tuple(int(item) for item in bias_shape) + x = torch.rand(x_shape) + weight = torch.rand(weight_shape) + bias = torch.rand(bias_shape) + x_npu = x.npu().to(dtype) + weight_npu = weight.npu().to(dtype) + if transpose_weight == True: + print(f'!!!!before transpose, weight_npu.size()={weight_npu.size()}') + weight_npu = weight_npu.transpose(1, 2) + print(f'!!!!after transpose, weight_npu.size()={weight_npu.size()}') + print(f'!!!!after transpose, weight_npu.is_contiguous()={weight_npu.is_contiguous()}') + bias_npu = bias.npu().to(dtype) + + y = npu_bmm_reducescatter_alltoall(x_npu, + weight_npu, + ep_group, + ep_size, + tp_group, + tp_size, + bias=bias_npu, + shard_type=y_shard_type) + print(f'y_shape = {y.size()}') + if y_shard_type == 0: + assert y.size() == (E, C, int(H / tp_size)) + else: + assert y.size() == (E, C, H) + return y + +if __name__ == '__main__': + dtype = torch.float16 + shard_type = 1 + transpose_weight = False + y_npu = test_npu_bmm_reducescatter_alltoall(dtype, shard_type, transpose_weight) + rank = int(os.environ["LOCAL_RANK"]) + if rank == 0: + for i, y in enumerate(y_npu): + y.cpu().numpy().tofile(f"./y_{i}.bin") + +``` diff --git a/model/train/yoco_moe/docs/ops/npu_dropout_add_layer_norm.md b/model/train/yoco_moe/docs/ops/npu_dropout_add_layer_norm.md new file mode 100644 index 000000000..5685b546a --- /dev/null +++ b/model/train/yoco_moe/docs/ops/npu_dropout_add_layer_norm.md @@ -0,0 +1,127 @@ +# npu_dropout_add_layer_norm 对外接口 +``` +# 计算逻辑 +# norm_result = LayerNorm(Dropout(x0 x rowscale x layerscale) + residual) +def npu_dropout_add_layer_norm(x0, + residual, + weight, + bias, + dropout_p, + epsilon, + rowscale=None, + layerscale=None, + prenorm=False, + residual_in_fp32=False, + return_dropout_mask=False) + +# 计算逻辑 +# norm_result = RmsNorm(Dropout(x0 x rowscale x layerscale) + residual) +def npu_dropout_add_rms_norm(x0, + residual, + weight, + bias, + dropout_p, + epsilon, + rowscale=None, + layerscale=None, + prenorm=False, + residual_in_fp32=False, + return_dropout_mask=False) +``` + +输入: +- x0:必选输入,shape:(B,S,H)。 +- residual:必选输入,shape:(B,S,H),可输入None。表示残差。 +- weight:必选输入,shape:(H,)。表示归一化处理时的权重参数。 +- bias:必选输入,shape:(H,),数据类型与输入weight一致,可输入None。表示归一化处理时的偏置参数。 +- dropout_p:必选属性,数据类型float。表示Dropout舍弃概率,eval模式下p=0。 +- epsilon:必选属性,数据类型float。归一化处理时,添加到分母中的值,以提高数值稳定性。 +- rowscale:可选输入,shape:(B,S),数据类型与输入x0一致,默认值:None。表示矩阵按行缩放比例。 +- layerscale:可选输入,shape:(H,),数据类型与输入x0一致,默认值:None。表示矩阵按列缩放比例。 +- prenorm:可选属性,数据类型为bool,默认值:False。表示是否返回输出pre_norm_result。 +- residual_in_fp32:可选属性,数据类型为bool,默认值:False。仅在输入residual不为None时有意义。 +- return_dropout_mask:可选属性,数据类型为bool,默认值:False。表示是否返回输出drop_mask。 + +支持的输入数据类型组合: + +| x0 | residual | weight | rowscale | layerscale | +| ----- | ----- | ----- | ----- | ----- | +|fp32|fp32|fp32|fp32|fp32 | +|fp16|fp16|fp16|fp16|fp16 | +|bf16|bf16|bf16|bf16|bf16 | + + +输出: +- norm_result:必选输出,数据类型与输入x0一致。 +- pre_norm_result:可选输出,数据类型与输入residual一致。 +- mask_result:可选输出,数据类型为bool。 + +*** +## 案例一:npu_dropout_add_layer_norm 接口的调用方式 + +- 输入 x0 和 weight +- 结果只返回 norm_result + +```python +import torch +import torch_npu + +from mindspeed.ops.dropout_add_layer_norm import npu_dropout_add_layer_norm + + +batch, seq, hidden_size = 6, 60, 1024 +x0 = torch.randn((batch, seq, hidden_size), requires_grad=True).to(torch.float).npu() +weight = torch.randn((hidden_size, ), requires_grad=True).to(torch.float).npu() +residual, bias, rowscale, layerscale = None, None, None, None +dropout_p = 0.0 +epsilon = 1e-5 +prenorm, residual_in_fp32, return_dropout_mask = False, True, False + +# 正向接口案例 +norm_result = npu_dropout_add_layer_norm(x0, residual, weight, bias, dropout_p, epsilon, + rowscale, layerscale, prenorm, residual_in_fp32, return_dropout_mask) + +g = torch.randn_like(norm_result) +norm_result.backward(g) +x0_grad = x0.grad +weight_grad = weight.grad + +``` + +## 案例二:npu_dropout_add_layer_norm 接口的调用方式 +- 输入 x0, residual, weight, rowscale, layerscale +- 结果返回 norm_result, pre_norm_result, mask_result + +```python +import torch +import torch_npu + +from mindspeed.ops.dropout_add_layer_norm import npu_dropout_add_layer_norm + + +batch, seq, hidden_size = 6, 60, 1024 +x0 = torch.randn((batch, seq, hidden_size), requires_grad=True).to(torch.float).npu() +residual = torch.randn((batch, seq, hidden_size), requires_grad=True).to(torch.float).npu() +weight = torch.randn((hidden_size, ), requires_grad=True).to(torch.float).npu() +bias = torch.randn((hidden_size, ), requires_grad=True).to(torch.float).npu() +rowscale = torch.randn((batch, seq, ), requires_grad=True).to(torch.float).npu() +layerscale = torch.randn((hidden_size, ), requires_grad=True).to(torch.float).npu() +dropout_p = 0.0 +epsilon = 1e-5 +prenorm, residual_in_fp32, return_dropout_mask = True, True, True + +# 正向接口案例 +norm_result, pre_norm_result, mask_result = npu_dropout_add_layer_norm(x0, residual, weight, + bias, dropout_p, epsilon, + rowscale, layerscale, prenorm, + residual_in_fp32, return_dropout_mask) + +g = torch.randn_like(norm_result) +norm_result.backward(g) +x0_grad = x0.grad +residual_grad = residual.grad +weight_grad = weight.grad +bias_grad = bias.grad +rowscale_grad = rowscale.grad +layerscale_grad = layerscale.grad +``` \ No newline at end of file diff --git a/model/train/yoco_moe/docs/ops/npu_fused_moe_token_permute.md b/model/train/yoco_moe/docs/ops/npu_fused_moe_token_permute.md new file mode 100644 index 000000000..9ca4166a9 --- /dev/null +++ b/model/train/yoco_moe/docs/ops/npu_fused_moe_token_permute.md @@ -0,0 +1,121 @@ +# npu_moe_token_permute对外接口 + +npu_moe_token_permute( + tokens: torch.Tensor, + indices: torch.Tensor, + num_out_tokens: int = None, + padded_mode: bool = False +) + +小算子等价计算逻辑: +```python +import torch + +def permute_with_padded_tokens(tokens, indices): + """Permute the tokens based on the indices, only used in padding mode. + The input indices shape is [num_expert, capacity], it indicates which tokens were selected by each expert separately. + Args: + tokens (torch.Tensor): The input token tensor. + indices (torch.Tensor): A tensor with shape [num_expert, capacity], indicating the selected tokens for each expert. + + Returns: + torch.Tensor: The permuted tensor. + torch.Tensor: The sorted_indices corresponding permuted tensor. + """ + permuted_tokens = tokens.index_select(dim=0, index=indices.view(-1)) + + return permuted_tokens, indices + + +def permute(tokens, indices, num_out_tokens: int = None, padded_mode: bool = False): + """Permute the tokens based on the indices. Token with the same index will be grouped together. + The input indices shape is [tokens, top_k], it indicates which experts were selected by each token separately. + Args: + tokens (torch.Tensor): The input token tensor. + indices (torch.Tensor): The token to expert indices tensor, should have a shape of [num_tokens] or [num_tokens, topk]. + num_out_tokens (int, optional): The effective output token count, when enabling the capacity factor, should equal the number of tokens not dropped. By default, set to None, meaning no tokens are dropped. + padded_mode (bool, optional): If True, indicating the indices are padded to [num_expert, capacity] to denote selected tokens per expert. Defaults to False. + + Returns: + torch.Tensor: The permuted tensor. + torch.Tensor: The sorted_indices corresponding permuted tensor. + """ + if padded_mode: + return permute_with_padded_tokens(tokens, indices) + + if indices.dim() == 1: + topk = 1 + else: + topk = indices.size(1) + flatten_indices = indices.view(-1) + sorted_indices = torch.argsort(flatten_indices, stable=True) + sorted_indices1 = torch.argsort(sorted_indices, stable=True) + + if num_out_tokens is not None: + sorted_indices = sorted_indices[:num_out_tokens] + permuted_tokens = tokens.index_select(0, sorted_indices // topk) + return permuted_tokens, sorted_indices1 +``` + +## 前向接口: + +输入: + +- tokens:必选输入,2维Tensor,数据类型bfloat16(当前版本tokens仅支持bfloat16) +- indices: 必选输入,2维Tensor,数据类型int64 + +输出: + +- permuted_tokens:必选输出,2维Tensor,数据类型bfloat16(当前版本permuted_tokens仅支持bfloat16) +- sorted_indices:必选输出,1维Tensor,数据类型int32(当前版本sorted_indices仅支持int32) + +属性: + +- num_out_tokens:可选属性,数据类型int64_t,表示有效输出token数 +- padded_mode: 可选属性,数据类型int64_t,如果为 True,则表示索引被填充到 [num_expert,capacity] 以表示每个专家选择的token + + +## 反向接口: + +输入: + +- grad_permuted_tokens:必选输入,2维Tensor,数据类型bfloat16(当前版本grad_permuted_tokens仅支持bfloat16) +- sorted_indices:必选输入,2维Tensor,数据类型int32(当前版本sorted_indices1仅支持int32) + +输出: + +- grad_tokens:必选输出,2维Tensor,数据类型bfloat16(当前版本grad_tokens仅支持bfloat16) + +属性: + +- num_topK:必选属性,数据类型int64_t,表示每条token输出的专家个数 +- padded_mode:可选属性,数据类型int64_t,表示有效输出token数 + + +**备注**: +1. 目前仅支持padded_mode为False +2. 目前仅支持bfloat16 + + + +## 案例 + +```python +import torch +import torch_npu + +from mindspeed.ops.npu_moe_token_permute import npu_moe_token_permute + +dtype = torch.bfloat16 +tokens = torch.tensor([[1, 1, 1], [2, 2, 2], [3, 3, 3], [0, 0, 0]]).npu().to(dtype).requires_grad_(True) +indices = torch.tensor([[0, 4], [4, 3], [4, 2], [1, 1]]).npu() +num_out_tokens = indices.numel() +probs = torch.ones_like(indices) / 2 +probs = probs.npu().to(dtype) +# 正向接口案例 +permuted_tokens, sorted_indices = npu_moe_token_permute(tokens, indices, num_out_tokens) + +# 反向接口案例 +permuted_tokens.backward(torch.ones(permuted_tokens.shape).to(torch.bfloat16).npu()) + +``` \ No newline at end of file diff --git a/model/train/yoco_moe/docs/ops/npu_fused_moe_token_unpermute.md b/model/train/yoco_moe/docs/ops/npu_fused_moe_token_unpermute.md new file mode 100644 index 000000000..dfb1f7cb7 --- /dev/null +++ b/model/train/yoco_moe/docs/ops/npu_fused_moe_token_unpermute.md @@ -0,0 +1,179 @@ +# npu_moe_token_unpermute对外接口 + +npu_moe_token_unpermute( + permuted_tokens: torch.Tensor, + sorted_indices: torch.Tensor, + probs: torch.Tensor = None, + padded_mode: bool = False, + restore_shape: torch.Size = None, +) + +小算子等价计算逻辑: +```python +import torch + +def unpermute_with_padded_tokens( + permuted_tokens: torch.Tensor, + indices: torch.Tensor, + probs: torch.Tensor, + restore_shape: torch.Size, +) -> torch.Tensor: + """ + Unpermutes a padded permuted tokens based on sorted indices and merges the tokens with their corresponding probabilities. + + This function takes a tensor of permuted tokens and reorders them according to the provided indices. It also combines the tokens with their associated probabilities. + + Parameters: + permuted_tokens (torch.Tensor): A 2D tensor containing permuted tokens. + indices (torch.Tensor): A tensor with shape [num_expert, capacity], indicating the selected tokens for each expert. + probs (torch.Tensor): A tensor with the same shape as indices, containing probabilities corresponding to each token. + restore_shape (torch.Size): The target shape for the unpermuted tokens tensor. + + Returns: + torch.Tensor: A tensor of unpermuted tokens, merged with their probabilities. + + """ + # Ensure permuted_tokens is 2D + assert permuted_tokens.dim() == 2, f"Got {permuted_tokens.dim()}D." + + # Reshape and expand probabilities and indices to match permuted_tokens + probs = probs.view(-1).unsqueeze(-1) + indices = indices.view(-1, 1).expand(-1, permuted_tokens.shape[1]) + assert ( + permuted_tokens.shape == indices.shape + ), "Shape mismatch between permuted_tokens and indices." + + # Combine tokens with their probabilities + combined_output = probs * permuted_tokens + + # Prepare a tensor of zeros with the desired output shape + empty_tokens = torch.zeros( + restore_shape, + dtype=combined_output.dtype, + device=combined_output.device, + requires_grad=True, + ) + + # Scatter the combined tokens back to their original positions + unpermuted_tokens = torch.scatter_add(empty_tokens, 0, indices, combined_output) + + return unpermuted_tokens + +def unpermute( + permuted_tokens: torch.Tensor, + sorted_indices: torch.Tensor, + probs: torch.Tensor = None, + padded_mode: bool = False, + restore_shape: torch.Size = None, +): + """Unpermute a tensor of permuted tokens based on sorted indices, and optionally merge the tokens with their corresponding probabilities. + + Args: + permuted_tokens (torch.Tensor): The tensor of permuted tokens to be unpermuted. + sorted_indices (torch.Tensor): The tensor of sorted indices used to unpermute the tokens. + probs (torch.Tensor, optional): The tensor of probabilities corresponding to the permuted tokens. If provided, the unpermuted tokens will be merged with their respective probabilities. + padded_mode (bool, optional): If True, indicating the indices are padded to [num_expert, capacity] to denote selected tokens per expert. Defaults to False. + restore_shape (torch.Size, optional): The input shape before permutation, only used in padding mode. Defaults to None. + + Returns: + torch.Tensor: The unpermuted tokens, optionally merged with probabilities. + """ + if padded_mode: + return unpermute_with_padded_tokens( + permuted_tokens, sorted_indices, probs, restore_shape=restore_shape + ) + + assert sorted_indices.numel() == permuted_tokens.size(0) + if probs is not None: + # Unpermute and merge the tokens with their probabilities + num_unpermuted_tokens = probs.numel() + topk = probs.size(1) + else: + # Unpermute the tokens without merge + num_unpermuted_tokens = permuted_tokens.size(0) + topk = 1 + + unpermuted_tokens = torch.zeros( + [num_unpermuted_tokens, permuted_tokens.shape[-1]], + dtype=permuted_tokens.dtype, + device=permuted_tokens.device, + ) + unpermuted_tokens.index_copy_(0, sorted_indices, permuted_tokens) + unpermuted_tokens = unpermuted_tokens.reshape(-1, topk, permuted_tokens.size(-1)) + if probs is not None: + unpermuted_tokens = unpermuted_tokens * probs.unsqueeze(-1) + unpermuted_tokens = unpermuted_tokens.sum(dim=1) + + return unpermuted_tokens +``` + +## 前向接口: + +输入: + +- permuted_tokens:必选输入,2维Tensor,数据类型bfloat16(当前版本permuted_tokens仅支持bfloat16) +- sorted_indices: 必选输入,1维Tensor,数据类型int32(当前版本sorted_indices仅支持int32) +- probs:可选输入,2维Tensor,数据类型bfloat16(当前版本probs仅支持bfloat16) + +输出: + +- unpermuted_tokens:必选输出,2维Tensor,数据类型bfloat16(当前版本unpermuted_tokens仅支持bfloat16) + +属性: + +- padded_mode: 可选属性,数据类型int64_t,如果为 True,则表示索引被填充到 [num_expert,capacity] 以表示每个专家选择的token + + +## 反向接口: + +输入: + +- permuted_tokens:必选输入,2维Tensor,数据类型bfloat16(当前版本permuted_tokens仅支持bfloat16) +- grad_unpermuted_tokens:必选输入,2维Tensor,数据类型bfloat16(当前版本grad_permuted_tokens仅支持bfloat16) +- sorted_indices: 必选输入,1维Tensor,数据类型int32(当前版本sorted_indices仅支持int32) +- probs:可选输入,2维Tensor,数据类型bfloat16(当前版本probs仅支持bfloat16) + +输出: + +- grad_permuted_tokens:必选输出,2维Tensor,数据类型bfloat16(当前版本grad_permuted_tokens仅支持bfloat16) +- grad_probs:必选输出,2维Tensor,数据类型bfloat16(当前版本grad_probs仅支持bfloat16) + +属性: + +- padded_mode:可选属性,数据类型int64_t,表示有效输出token数 + + +**备注**: +1. 目前仅支持padded_mode为False +2. 目前仅支持bfloat16 + + +## 案例 + +```python +import torch +import torch_npu + +from mindspeed.ops.npu_moe_token_unpermute import npu_moe_token_unpermute + +dtype = torch.bfloat16 +permuted_tokens = torch.tensor([[1., 1., 1.], + [0., 0., 0.], + [0., 0., 0.], + [3., 3., 3.], + [2., 2., 2.], + [1., 1., 1.], + [2., 2., 2.], + [3., 3., 3.]]).npu().to(dtype).requires_grad_(True) +sorted_indices = torch.tensor([0, 6, 7, 5, 3, 1, 2, 4], dtype=torch.int32).npu() +indices = torch.tensor([[0, 4], [4, 3], [4, 2], [1, 1]]).npu() +probs = torch.ones_like(indices) / 2 +probs = probs.npu().to(dtype).requires_grad_(True) + +# 正向接口案例 +unpermuted_tokens = npu_moe_token_unpermute( + permuted_tokens, sorted_indices, probs=probs) + +# 反向接口案例 +unpermuted_tokens.backward(torch.ones(unpermuted_tokens.shape).to(torch.bfloat16).npu()) +``` \ No newline at end of file diff --git a/model/train/yoco_moe/docs/ops/npu_grouped_mat_mul_all_reduce.md b/model/train/yoco_moe/docs/ops/npu_grouped_mat_mul_all_reduce.md new file mode 100644 index 000000000..d4c522ab6 --- /dev/null +++ b/model/train/yoco_moe/docs/ops/npu_grouped_mat_mul_all_reduce.md @@ -0,0 +1,135 @@ +# npu_grouped_mat_mul_all_reduce对外接口 +``` +def npu_grouped_mat_mul_all_reduce(x: List[torch.Tensor], + weight: List[torch.Tensor], + hcomm: str, + *, + bias: Optional[List[torch.Tensor]] = None, + group_list: Optional[List[int]] = None, + split_item: Optional[int] = 0, + reduce_op: str = "sum", + comm_turn: int = 0) -> List[torch.Tensor] +``` + +计算逻辑: +GroupedMatMulAllReduce算子是GroupedMatmul算子的多卡通信版本。它可以实现分组矩阵计算,每组矩阵乘的维度大小可以不同,是一种灵活的组合方式。输入数据x和输出数据y均支持切分或不切分模式,可以根据参数split_item来确定是否切分。当x需要切分时,使用group_list参数来描述x的m轴切分配置。本算子增加了AllReduce集合通信操作,可以把矩阵乘任务切分到多张卡上并行计算,然后通过AllReduce集合通信操作把所有卡的计算结果加和到一起,最终完成整个任务。根据输入x、weight和输出y的Tensor数量,本算子可以支持如下四种场景: +- x、weight、y的tensor数量均等于组数group_num,即每组的数据对应的tensor是独立的。 +- x的tensor数量为1, weight和y的tensor数量等于组数,此时需要通过group_list来说明x在m轴方向上的分组情况。如group_list[0]=10说明x矩阵的前10行参与第一组矩阵乘计算。 +- x、weight的tensor数量均等于组数group_num, y的tensor数量为1,此时每组矩阵乘的结果放在同一个输出tensor中连续存放。 +- x、y的tensor数量均为1,weight的tensor数量等于组数,属于前两种情况的组合。 + +计算公式为: +对于每一组矩阵乘任务i: $$y_i = x_i * weight_i + bias_i$$ +切分到n张卡上后,计算形式可表示为: + +$$ +y_i = [x_{i1}, x_{i2}, ..., x_{in}] * +\begin{bmatrix} +{weight_{i1}} \\ +{weight_{i2}} \\ +{...} \\ +{weight_{in}} +\end{bmatrix}+\sum^{n}{bias_i/n} +$$ + +## 前向接口: +输入: +- x:必选输入,List[Tensor],数据类型float16,bfloat16。支持的最大长度为64个。 +- weight:必选输入,List[Tensor],数据类型float16, bfloat16。支持的最大长度为64个。 +- bias:可选输入,List[Tensor],数据类型float16, float32。支持的最大长度为64个。对于实际无bias的场景,可以直接不指定bias参数或设置为None。 +- group_list:可选输入,Optional[List[int64]],缺省none。代表输入和输出M方向的matmul大小分布,支持的最大长度为64个。 + +输出: +- y:List[Tensor],数据类型float16, bfloat16。支持的最大长度为64个。 + +属性: +- split_item:可选属性,int64。代表输入和输出是否要做tensor切分,0代表输入和输出都不用切分;1代表输入需要切分,输出不需要切分;2代表输入不需要切分,输出需要切分;3代表输入和输出都需要切分。缺省0。 +- hcomm:必选属性,数据类型支持:string。表示通信域名称,Host侧标识列组的字符串。通过Hccl提供的接口获取。 +- reduce_op:可选属性,数据类型支持:string。reduce操作类型。**当前版本仅支持输入"sum"。** +- comm_turn:可选属性,int64。Host侧的整型,通信数据切分数,即总数据量/单次通信量。**当前版本仅支持输入0。** + + +## 反向接口 +无 + +## 输入限制 +- List支持的最大长度为64; +- 属性reduce_op仅支持输入"sum"; +- 属性comm_turn仅支持输入0; +- 通信支持2、4、8卡。 +- 当split_item取0或2时,x的tensor数量与weight的相同;当split_item取1或3时,x的tensor数量为1。 +- 当split_item取0或2时,group_list为空;当split_item取1或3时,group_list的长度与weight的tensor个数相同。 +- 若bias不是空,其tensor数量要与weight的相同。 +- 满足矩阵乘的m/k/n维度关系。 +- 支持输入元素类型: + 1. x中元素类型为float16,weight中元素类型为float16,bias中元素类型为float16,输出y中元素类型为float16; + + 2. x中元素类型为bfloat16,weight中元素类型为bfloat16,bias中元素类型为float32,输出y中元素类型为bfloat16; +- 输入中须包含通信域hcomm字符串,它需要从torch.distributed包中的接口获取。 +- 当前只支持Pytorch2.1版本。 + +## npu_grouped_mat_mul_all_reduce 类的调用方式 + +```python +import os +import torch +import torch_npu +import torch.multiprocessing as mp +import torch.distributed as dist +import numpy as np +import math +from mindspeed.ops.npu_grouped_mat_mul_all_reduce import npu_grouped_mat_mul_all_reduce + + +def get_hcomm_info(world_size, rank): + dist.init_process_group(backend='hccl', rank=rank, world_size=world_size, init_method='tcp://127.0.0.1:5001') + print(f'device_{rank} init_process_group success.') + if dist.is_available(): + from torch.distributed.distributed_c10d import _get_default_group + default_pg = _get_default_group() + if torch.__version__ > '2.0.1': + hcomm_info = default_pg._get_backend(torch.device('npu')).get_hccl_comm_name(rank) + else: + hcomm_info = default_pg.get_hccl_comm_name(rank) + print(f'device_{rank} get_hccl_comm_name success.') + return dist, hcomm_info + + +def run(rank, n_device): + torch_npu.npu.set_device(rank) + dist, hcomm_info = get_hcomm_info(n_device, rank) + + x1 = torch.randn(32, 256, dtype=torch.float16) + x1_npu = x1.npu() + w1 = torch.randn(256, 32, dtype=torch.float16) + w1_npu = w1.npu() + + x2 = torch.randn(64, 128, dtype=torch.float16) + x2_npu = x2.npu() + w2 = torch.randn(128, 64, dtype=torch.float16) + w2_npu = w2.npu() + + group_list = None + split_item = 0 + hccl_group = hcomm_info + reduce_op = "sum" + comm_turn = 0 + + # 正向接口案例 + x1_npu.requires_grad = False + x2_npu.requires_grad = False + w1_npu.requires_grad = False + w2_npu.requires_grad = False + y_npu = npu_grouped_mat_mul_all_reduce([x1_npu, x2_npu], [w1_npu, w2_npu], hccl_group, bias=[], + group_list=group_list, split_item=split_item, reduce_op=reduce_op, + comm_turn=comm_turn) + if rank == 0: + for i, y in enumerate(y_npu): + y.cpu().numpy().tofile(f"./y_{i}.bin") + + +if __name__ == '__main__': + n_device = 2 + mp.spawn(run, args=(n_device,), nprocs=n_device) + +``` diff --git a/model/train/yoco_moe/docs/ops/npu_groupmatmul_add.md b/model/train/yoco_moe/docs/ops/npu_groupmatmul_add.md new file mode 100644 index 000000000..d45cabcfa --- /dev/null +++ b/model/train/yoco_moe/docs/ops/npu_groupmatmul_add.md @@ -0,0 +1,35 @@ +# npu_groupmatmul_add_fp32对外接口(只支持前向) +输入: +- x:必选输入,维度[m,k], 数据类型float16, bf16 +- weight:必选输入,维度[k,n], 数据类型float16, bf16 +- groupNum:必选输入, 维度[NumGroup], 数据类型int64 +- output:必选输入,维度[w1,w2],数据类型float32 + +输出: +- output:必选输出,数据类型float32 + +## 案例 +```python +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import pytest +import torch +import mindspeed.megatron_adaptor +import torch_npu +from mindspeed.ops.gmm import GMMFunction +from mindspeed.ops.npu_groupmatmul_add import npu_groupmatmul_add_fp32 + +x = torch.rand((1024, 4096), dtype=torch.float16).npu() +grads_out = torch.rand((1024, 2048), dtype=torch.float16).npu() +group_list = torch.tensor([500, 1024], dtype=torch.int64).npu() +out = torch.rand((8192, 2048), dtype=torch.float32).npu() +weight = torch.rand((2, 4096, 2048), dtype=torch.float16).npu() +# 分开算子计算结果 +_, dy, _ = GMMFunction.builder.load().npu_gmm_backward([grads_out], [x], [weight], group_list, 0) +out_single = out+dy[0].view(*out.shape) +#融合算子计算结果 +x = x.clone().detach() +grads_out = grads_out.clone().detach() +group_list = group_list.clone().detach() +out = out.clone().detach() +npu_groupmatmul_add_fp32(x, grads_out, group_list, out) +``` diff --git a/model/train/yoco_moe/docs/ops/npu_matmul_add.md b/model/train/yoco_moe/docs/ops/npu_matmul_add.md new file mode 100644 index 000000000..182423651 --- /dev/null +++ b/model/train/yoco_moe/docs/ops/npu_matmul_add.md @@ -0,0 +1,24 @@ +# npu_matmul_add_fp32对外接口(只支持前向) +输入: +- x:必选输入,数据类型float16, bf16 +- weight:必选输入,数据类型float16, bf16 +- C:必选输入,数据类型float32 + +输出: +- output:必选输出,数据类型float32 + +## 案例 +```python + import torch + import torch_npu + from mindspeed.ops.npu_matmul_add import npu_matmul_add_fp32 + + x = torch.rand((4096, 8192),dtype=torch.float16).npu() + weight = torch.rand((4096, 8192),dtype=torch.float16).npu() + C = torch.rand((8192, 8192),dtype=torch.float32).npu() + # 分开算子计算过程 + product = torch.mm(x.T, weight) + result = product + C + # 融合算子计算过程 + npu_matmul_add_fp32(weight, x, C) +``` diff --git a/model/train/yoco_moe/docs/ops/npu_mm_all_reduce_add_rms_norm.md b/model/train/yoco_moe/docs/ops/npu_mm_all_reduce_add_rms_norm.md new file mode 100644 index 000000000..935ec2e8e --- /dev/null +++ b/model/train/yoco_moe/docs/ops/npu_mm_all_reduce_add_rms_norm.md @@ -0,0 +1,151 @@ +# npu_mm_all_reduce_add_rms_norm对外接口 + +CLASS MatmulAllReduceAddRmsNorm() + +计算逻辑: +$$ +mmOut = allReduce(x1*x2 + bias) +$$ +$$ +y = mmOut + residual +$$ +$$ +normOut = \frac{y}{RMS(y)}*gamma, RMS(x) = \sqrt{\frac{1}{d}\sum_{i=1}^{d} y_{i}^{2} + epsilon} +$$ + +## 非量化场景: +输入: +- x1:必选输入,数据类型float16, bfloat16 +- x2:必选输入,数据类型float16, bfloat16 +- residual:必选输入,数据类型float16, bfloat16 +- gamma:必选输入,数据类型float16, bfloat16 +- hcom:必选输入,数据类型string, +- reduce_op:可选输入,数据类型为string,当前仅支持sum +- epsilon:可选输入,数据类型为float,缺省情况下为1e-06 +- bias:可选输入,数据类型float16, bfloat16 +- antiquant_scale:可选输入,该场景默认为nullptr +- antiquant_offset:可选输入,该场景默认为nullptr +- dequant_scale:可选输入,该场景默认为nullptr +- antiquant_group_size:可选输入,该场景默认为0 +- comm_turn:可选输入,数据类型为int,缺省情况下为0 + +输出: +- y:必选输出,数据类型float16, bfloat16 +- normOut:必选输出,数据类型float16, bfloat16 + +## 全量化场景 +输入: +- x1:必选输入,数据类型int8 +- x2:必选输入,数据类型int8 +- residual:必选输入,数据类型float16, bfloat16 +- gamma:必选输入,数据类型float16, bfloat16 +- hcom:必选输入,数据类型string, +- reduce_op:可选输入,数据类型为string,当前仅支持sum +- epsilon:可选输入,数据类型为float,缺省情况下为1e-06 +- bias:可选输入,数据类型int32 +- antiquant_scale:可选输入,该场景默认为nullptr +- antiquant_offset:可选输入,该场景默认为nullptr +- dequant_scale:可选输入,数据类型int64,uint64,bfloat16 +- antiquant_group_size:可选输入,该场景默认为0 +- comm_turn:可选输入,数据类型为int,缺省情况下为0 + +输出: +- y:必选输出,数据类型float16, bfloat16 +- normOut:必选输出,数据类型float16, bfloat16 + +## 伪量化场景 +输入: +- x1:必选输入,数据类型float16, bfloat16 +- x2:必选输入,数据类型int8 +- residual:必选输入,数据类型float16, bfloat16 +- gamma:必选输入,数据类型float16, bfloat16 +- hcom:必选输入,数据类型string, +- reduce_op:可选输入,数据类型为string,当前仅支持sum +- epsilon:可选输入,数据类型为float,缺省情况下为1e-06 +- bias:可选输入,数据类型float16, bfloat16 +- antiquant_scale:可选输入,数据类型float16, bfloat16 +- antiquant_offset:可选输入,数据类型float16, bfloat16 +- dequant_scale:可选输入,该场景默认为nullptr +- antiquant_group_size:可选输入,数据类型为int,缺省情况下为0 +- comm_turn:可选输入,数据类型为int,缺省情况下为0 + +输出: +- y:必选输出,数据类型float16, bfloat16 +- normOut:必选输出,数据类型float16, bfloat16 + +## 输入限制 +- ``x2`` 仅支持最后两轴转置情况下的非连续tensor传入,``x1``、``residual``、``gamma`` 等输入仅支持连续tensor +- 仅支持ND数据格式 +- ``x1`` 支持两维或者三维,其维度为 ``(b, s, k)`` 或者 ``(s, k)`` +- ``x2`` 仅支持两维,其维度为 ``(k, n)``,``x1`` 和 ``x2`` 的轴满足matmul算子入参要求,k轴相等 +- ``bias`` 在非空情况下为1维,其维度为 ``(n)`` +- ``residual`` 仅支持三维,其维度为 ``(b, s, n)``,当 ``x1`` 为两维时,``residual`` 的 ``(b * s)`` 等于 ``x1`` 的 ``s``,当 ``x1`` 为三维时,``residual`` 的 ``(b * s)`` 等于 ``x1`` 的 ``(b * s)``;``residual`` 的最后一维与``x2`` 的最后一维相等 +- ``gamma`` 仅支持一维,其维度为 ``(n)``,``gamma`` 的最后一维与 ``residual`` 的最后一维相等 +- ``reduce_op`` 仅支持 ``sum`` +- 昇腾Atlas A2 AI处理器支持1、2、4、8卡,并且仅支持hccs链路all mesh组网 +- 昇腾Atlas A2 AI处理器支持``(b * s)``,``n``为0的空tensor,不支持``k``为0的空tensor +- 非量化场景下,``x1``、``x2``、``bias``(若支持)、``residual``、``gamma`` 计算输入的数据类型要一致 +- 昇腾Atlas A2 AI处理器,在非量化场景下,``(b * s)``、``k``、``n``的范围为``[1, 2147483647]`` +- 全量化场景下,若输出 ``residual`` 类型为 ``FLOAT16``,``dequant_scale`` 的类型为 ``INT64``、``UINT64``(需通过 ``torch_npu.npu_trans_quant_param()`` 接口对 ``dequant_scale`` 进行处理);若输出 ``residual`` 类型为 ``BFLOAT16``,``dequant_scale`` 的类型为 ``BFLOAT16``。``dequant_scale`` 满足两种模式: + - ``per_tensor`` 模式:``(1,)`` + - ``per_channel`` 模式:``(1, n)`` 或 ``(n,)`` +- 全量化场景下,``x1``、``x2`` 数据类型为 ``int8``,``bias``(若支持)数据类型为 ``int32``,``residual``、``gamma``计算输入的数据类型要一致。 +- 全量化场景下,``m``大小不超过2147483647,``x1``与``x2``的最后一维大小不超过65535,``x1``的最后一维指``k``,``x2``的最后一维指转置时的``k``或非转置时的``n``。 +- 伪量化场景下,``m``的范围为``[1, 2147483647]``,``k``、``n``的范围为``[1,65535]`` +- 伪量化场景下,``antiquant_scale`` 满足三种模式: + - ``per_tensor`` 模式:``(1,)`` + - ``per_channel`` 模式:``(1, n)`` 或 ``(n,)`` + - ``per_group`` 模式:``(ceil(k,antiquant_group_size),n)`` +- ``antiquantOffset`` 若非空,shape 与 ``antiquant_scale``一致。 +- 伪量化场景下,``x2`` 的数据类型需为 ``int8``,``x1``、``bias``(若支持)、``residual``、``gamma``、``antiquant_scale``、``antiquant_offset``计算输入的数据类型要一致。 +- 伪量化场景下,``antiquant_group_size`` 取值满足取值范围``[32, min(k-1, INT_MAX)]``且为32倍数。 +- 一个模型中的通算融合MC2算子,仅支持相同通信域。 + +## npu_mm_all_redcue_add_rms_norm 接口的调用方式 + +```python +import torch +import torch_npu +import torch.distributed as dist +import torch.multiprocessing as mp +from mindspeed.ops.npu_mm_all_reduce_add_rms_norm import npu_mm_all_reduce_add_rms_norm + + +def run_mm_all_reduce_add_rms_norm(rank, world_size, master_ip, master_port, x1_shape, x2_shape, residual_shape, + gamma_shape, dtype): + torch_npu.npu.set_device(rank) + init_method = 'tcp://' + master_ip + ':' + master_port + dist.init_process_group(backend='hccl', rank=rank, world_size=world_size, init_method=init_method) + from torch.distributed.distributed_c10d import _get_default_group + default_pg = _get_default_group() + if torch.__version__ > '2.0.1': + hcom_info = default_pg._get_backend(torch.device('npu')).get_hccl_comm_name(rank) + else: + hcom_info = default_pg.get_hccl_comm_name(rank) + + x1 = torch.randn(x1_shape, dtype=dtype).npu() + x2 = torch.randn(x2_shape, dtype=dtype).npu() + residual = torch.randn(residual_shape, dtype=dtype).npu() + gamma = torch.randn(gamma_shape, dtype=dtype).npu() + epsilon = 0.000001 + y, norm_out = npu_mm_all_reduce_add_rms_norm(x1=x1, x2=x2, residual=residual, gamma=gamma, hcom=hcom_info, + reduce_op='sum', epsilon=epsilon) + print("y:", y) + print("norm_out:", norm_out) + + +if __name__ == "__main__": + worksize = 8 + master_ip = "127.0.0.1" + master_port = '50001' + b, s, k, n = 4, 1024, 1024, 8192 + x1_shape = (b, s, k) + x2_shape = (k, n) + residual_shape = (b, s, n) + gamma_shape = (n) + dtype = torch.float16 + + mp.spawn(run_mm_all_reduce_add_rms_norm, + args=(worksize, master_ip, master_port, x1_shape, x2_shape, residual_shape, gamma_shape, dtype), + nprocs=worksize) +``` \ No newline at end of file diff --git a/model/train/yoco_moe/docs/ops/npu_mm_all_reduce_add_rms_norm_.md b/model/train/yoco_moe/docs/ops/npu_mm_all_reduce_add_rms_norm_.md new file mode 100644 index 000000000..c6b2ea1da --- /dev/null +++ b/model/train/yoco_moe/docs/ops/npu_mm_all_reduce_add_rms_norm_.md @@ -0,0 +1,151 @@ +# npu_mm_all_reduce_add_rms_norm_对外接口 + +CLASS InplaceMatmulAllReduceAddRmsNorm() + +计算逻辑: +$$ +mmOut = allReduce(x1*x2 + bias) +$$ +$$ +y = mmOut + residual +$$ +$$ +normOut = \frac{y}{RMS(y)}*gamma, RMS(x) = \sqrt{\frac{1}{d}\sum_{i=1}^{d} y_{i}^{2} + epsilon} +$$ + +## 非量化场景: +输入: +- x1:必选输入,数据类型float16, bfloat16 +- x2:必选输入,数据类型float16, bfloat16 +- residual:必选输入,数据类型float16, bfloat16 +- gamma:必选输入,数据类型float16, bfloat16 +- hcom:必选输入,数据类型string, +- reduce_op:可选输入,数据类型为string,当前仅支持sum +- epsilon:可选输入,数据类型为float,缺省情况下为1e-06 +- bias:可选输入,数据类型float16, bfloat16 +- antiquant_scale:可选输入,该场景默认为nullptr +- antiquant_offset:可选输入,该场景默认为nullptr +- dequant_scale:可选输入,该场景默认为nullptr +- antiquant_group_size:可选输入,该场景默认为0 +- comm_turn:可选输入,数据类型为int,缺省情况下为0 + +输出: +- residual:必选输出,复用residual,数据类型float16, bfloat16 +- normOut:必选输出,数据类型float16, bfloat16 + +## 全量化场景 +输入: +- x1:必选输入,数据类型int8 +- x2:必选输入,数据类型int8 +- residual:必选输入,数据类型float16, bfloat16 +- gamma:必选输入,数据类型float16, bfloat16 +- hcom:必选输入,数据类型string, +- reduce_op:可选输入,数据类型为string,当前仅支持sum +- epsilon:可选输入,数据类型为float,缺省情况下为1e-06 +- bias:可选输入,数据类型int32 +- antiquant_scale:可选输入,该场景默认为nullptr +- antiquant_offset:可选输入,该场景默认为nullptr +- dequant_scale:可选输入,数据类型int64,uint64,bfloat16 +- antiquant_group_size:可选输入,该场景默认为0 +- comm_turn:可选输入,数据类型为int,缺省情况下为0 + +输出: +- residual:必选输出,复用residual,数据类型float16, bfloat16 +- normOut:必选输出,数据类型float16, bfloat16 + +## 伪量化场景 +输入: +- x1:必选输入,数据类型float16, bfloat16 +- x2:必选输入,数据类型int8 +- residual:必选输入,数据类型float16, bfloat16 +- gamma:必选输入,数据类型float16, bfloat16 +- hcom:必选输入,数据类型string, +- reduce_op:可选输入,数据类型为string,当前仅支持sum +- epsilon:可选输入,数据类型为float,缺省情况下为1e-06 +- bias:可选输入,数据类型float16, bfloat16 +- antiquant_scale:可选输入,数据类型float16, bfloat16 +- antiquant_offset:可选输入,数据类型float16, bfloat16 +- dequant_scale:可选输入,该场景默认为nullptr +- antiquant_group_size:可选输入,数据类型为int,缺省情况下为0 +- comm_turn:可选输入,数据类型为int,缺省情况下为0 + +输出: +- residual:必选输出,复用residual,数据类型float16, bfloat16 +- normOut:必选输出,数据类型float16, bfloat16 + +## 输入限制 +- ``x2`` 仅支持最后两轴转置情况下的非连续tensor传入,``x1``、``residual``、``gamma`` 等输入仅支持连续tensor +- 仅支持ND数据格式 +- ``x1`` 支持两维或者三维,其维度为 ``(b, s, k)`` 或者 ``(s, k)`` +- ``x2`` 仅支持两维,其维度为 ``(k, n)``,``x1`` 和 ``x2`` 的轴满足matmul算子入参要求,k轴相等 +- ``bias`` 在非空情况下为1维,其维度为 ``(n)`` +- ``residual`` 仅支持三维,其维度为 ``(b, s, n)``,当 ``x1`` 为两维时,``residual`` 的 ``(b * s)`` 等于 ``x1`` 的 ``s``,当 ``x1`` 为三维时,``residual`` 的 ``(b * s)`` 等于 ``x1`` 的 ``(b * s)``;``residual`` 的最后一维与``x2`` 的最后一维相等 +- ``gamma`` 仅支持一维,其维度为 ``(n)``,``gamma`` 的最后一维与 ``residual`` 的最后一维相等 +- ``reduce_op`` 仅支持 ``sum`` +- 昇腾Atlas A2 AI处理器支持1、2、4、8卡,并且仅支持hccs链路all mesh组网 +- 昇腾Atlas A2 AI处理器支持``(b * s)``,``n``为0的空tensor,不支持``k``为0的空tensor +- 非量化场景下,``x1``、``x2``、``bias``(若支持)、``residual``、``gamma`` 计算输入的数据类型要一致 +- 昇腾Atlas A2 AI处理器,在非量化场景下,``(b * s)``、``k``、``n``的范围为``[1, 2147483647]`` +- 全量化场景下,若输出 ``residual`` 类型为 ``FLOAT16``,``dequant_scale`` 的类型为 ``INT64``、``UINT64``(需通过 ``torch_npu.npu_trans_quant_param()`` 接口对 ``dequant_scale`` 进行处理);若输出 ``residual`` 类型为 ``BFLOAT16``,``dequant_scale`` 的类型为 ``BFLOAT16``。``dequant_scale`` 满足两种模式: + - ``per_tensor`` 模式:``(1,)`` + - ``per_channel`` 模式:``(1, n)`` 或 ``(n,)`` +- 全量化场景下,``x1``、``x2`` 数据类型为 ``int8``,``bias``(若支持)数据类型为 ``int32``,``residual``、``gamma``计算输入的数据类型要一致。 +- 全量化场景下,``m``大小不超过2147483647,``x1``与``x2``的最后一维大小不超过65535,``x1``的最后一维指``k``,``x2``的最后一维指转置时的``k``或非转置时的``n``。 +- 伪量化场景下,``m``的范围为``[1, 2147483647]``,``k``、``n``的范围为``[1,65535]`` +- 伪量化场景下,``antiquant_scale`` 满足三种模式: + - ``per_tensor`` 模式:``(1,)`` + - ``per_channel`` 模式:``(1, n)`` 或 ``(n,)`` + - ``per_group`` 模式:``(ceil(k,antiquant_group_size),n)`` +- ``antiquantOffset`` 若非空,shape 与 ``antiquant_scale``一致。 +- 伪量化场景下,``x2`` 的数据类型需为 ``int8``,``x1``、``bias``(若支持)、``residual``、``gamma``、``antiquant_scale``、``antiquant_offset``计算输入的数据类型要一致。 +- 伪量化场景下,``antiquant_group_size`` 取值满足取值范围``[32, min(k-1, INT_MAX)]``且为32倍数。 +- 一个模型中的通算融合MC2算子,仅支持相同通信域。 + +## npu_mm_all_redcue_add_rms_norm 接口的调用方式 + +```python +import torch +import torch_npu +import torch.distributed as dist +import torch.multiprocessing as mp +from mindspeed.ops.npu_mm_all_reduce_add_rms_norm_ import npu_mm_all_reduce_add_rms_norm_ + + +def run_mm_all_reduce_add_rms_norm(rank, world_size, master_ip, master_port, x1_shape, x2_shape, residual_shape, + gamma_shape, dtype): + torch_npu.npu.set_device(rank) + init_method = 'tcp://' + master_ip + ':' + master_port + dist.init_process_group(backend='hccl', rank=rank, world_size=world_size, init_method=init_method) + from torch.distributed.distributed_c10d import _get_default_group + default_pg = _get_default_group() + if torch.__version__ > '2.0.1': + hcom_info = default_pg._get_backend(torch.device('npu')).get_hccl_comm_name(rank) + else: + hcom_info = default_pg.get_hccl_comm_name(rank) + + x1 = torch.randn(x1_shape, dtype=dtype).npu() + x2 = torch.randn(x2_shape, dtype=dtype).npu() + residual = torch.randn(residual_shape, dtype=dtype).npu() + gamma = torch.randn(gamma_shape, dtype=dtype).npu() + epsilon = 0.000001 + y, norm_out = npu_mm_all_reduce_add_rms_norm_(x1=x1, x2=x2, residual=residual, gamma=gamma, hcom=hcom_info, + reduce_op='sum', epsilon=epsilon) + print("y:", y) + print("norm_out:", norm_out) + + +if __name__ == "__main__": + worksize = 8 + master_ip = "127.0.0.1" + master_port = '50001' + b, s, k, n = 4, 1024, 1024, 8192 + x1_shape = (b, s, k) + x2_shape = (k, n) + residual_shape = (b, s, n) + gamma_shape = (n) + dtype = torch.float16 + + mp.spawn(run_mm_all_reduce_add_rms_norm, + args=(worksize, master_ip, master_port, x1_shape, x2_shape, residual_shape, gamma_shape, dtype), + nprocs=worksize) +``` diff --git a/model/train/yoco_moe/docs/ops/npu_ring_attention_update.md b/model/train/yoco_moe/docs/ops/npu_ring_attention_update.md new file mode 100644 index 000000000..4eb9a364c --- /dev/null +++ b/model/train/yoco_moe/docs/ops/npu_ring_attention_update.md @@ -0,0 +1,96 @@ +# npu_ring_attention_update对外接口 + +npu_ring_attention_update( + prev_attn_out: torch.Tensor, + prev_softmax_max: torch.Tensor, + prev_softmax_sum: torch.Tensor, + cur_attn_out: torch.Tensor, + cur_softmax_max: torch.Tensor, + cur_softmax_sum: torch.Tensor, + actual_seq_qlen: torch.Tensor = None, + layout: str = "SBH", +) + +小算子等价计算逻辑: +```python +import torch + + +def forward_update(prev_attn_out, prev_softmax_max, prev_softmax_sum, + cur_attn_out, cur_softmax_max, cur_softmax_sum, actual_seq_qlen=None, layout='SBH'): + # update softmax_max + origin_dtype = prev_attn_out.dtype + softmax_max = torch.maximum(prev_softmax_max, cur_softmax_max) + prev_scale = torch.exp(prev_softmax_max - softmax_max) + cur_scale = torch.exp(cur_softmax_max - softmax_max) + + # update softmax_sum + prev_softmax_sum_scaled = prev_softmax_sum * prev_scale + cur_softmax_sum_scaled = cur_softmax_sum * cur_scale + softmax_sum = prev_softmax_sum_scaled + cur_softmax_sum_scaled + + # out updating scale + prev_out_scale = prev_softmax_sum_scaled / softmax_sum + cur_out_scale = cur_softmax_sum_scaled / softmax_sum + + # [b, n, s, 8] -> [s, b, h] + # SBH layout + n = prev_out_scale.shape[1] + h = prev_attn_out.shape[-1] + d = h // n + prev_out_scale = prev_out_scale[..., 0].unsqueeze(3).repeat(1, 1, 1, d) + prev_out_scale = rearrange(prev_out_scale, 'b n s d -> s b (n d)').contiguous() + cur_out_scale = cur_out_scale[..., 0].unsqueeze(3).repeat(1, 1, 1, d) + cur_out_scale = rearrange(cur_out_scale, 'b n s d -> s b (n d)').contiguous() + + # update output + attn_out = prev_attn_out * prev_out_scale + cur_attn_out * cur_out_scale + attn_out = attn_out.to(origin_dtype) + return attn_out, softmax_max, softmax_sum + +``` + +## 前向接口: + +输入: + +- prev_attn_out:必选输入,数据类型torch.bfloat16, torch.float, torch.float16 +- prev_softmax_max: 必选输入,数据类型torch.float +- prev_softmax_sum: 必选输入,数据类型torch.float +- cur_attn_out: 必选输入,数据类型torch.bfloat16, torch.float, torch.float16 +- cur_softmax_max: 必选输入,数据类型torch.float +- cur_softmax_sum: 必选输入,数据类型torch.float + + +输出: + +- attn_out:必选输出,数据类型torch.bfloat16, torch.float, torch.float16 +- softmax_max:必选输出,数据类型torch.float +- softmax_max:必选输出,数据类型torch.float + +属性: + +- actual_seq_qlen:可选属性,数据类型torch.int64, 数据单调递增,layout为TND的时候使用 +- layout:必选属性,数据类型str + + + +## 案例 + +```python +import torch +import torch_npu +from mindspeed.ops.npu_ring_attention_update import npu_ring_attention_update + +prev_attn_out = torch.randn(2048, 1, 12, dtype=torch.bfloat16).npu() +prev_softmax_max = torch.randn(1, 12, 2048, 8, dtype=torch.float32).npu() +prev_softmax_sum = torch.randn(1, 12, 2048, 8, dtype=torch.float32).npu() +cur_attn_out = torch.randn(2048, 1, 12, dtype=torch.bfloat16).npu() +cur_softmax_max = torch.randn(1, 12, 2048, 8, dtype=torch.float32).npu() +cur_softmax_sum = torch.randn(1, 12, 2048, 8, dtype=torch.float32).npu() + +attn_out, softmax_max, softmax_sum = forward_update(prev_attn_out, prev_softmax_max, prev_softmax_sum, + cur_attn_out, cur_softmax_max, cur_softmax_sum) + + +``` \ No newline at end of file diff --git a/model/train/yoco_moe/docs/ops/npu_rotary_position_embedding.md b/model/train/yoco_moe/docs/ops/npu_rotary_position_embedding.md new file mode 100644 index 000000000..e1a3d38c2 --- /dev/null +++ b/model/train/yoco_moe/docs/ops/npu_rotary_position_embedding.md @@ -0,0 +1,107 @@ +# npu_rotary_position_embedding对外接口 + +npu_rotary_position_embedding(x, cos, sin, mode=0) + +小算子等价计算逻辑: +```python +import torch +from einops import rearrange + +# mode = 0 +def rotate_half(x): + x1, x2 = torch.chunk(x, 2, dim=-1) + return torch.cat((-x2, x1), dim=-1) + +# mode = 1 +def rotate_interleaved(x): + x1 = x[..., ::2] + x2 = x[..., 1::2] + return rearrange(torch.stack((-x2, x1), dim=-1), "... d two -> ...(d two)", two=2) + +def fused_rotary_position_embedding(x, cos, sin, interleaved=False): + if not interleaved: + return x * cos + rotate_half(x) * sin + else: + return x * cos + rotate_interleaved(x) * sin +``` + +## 前向接口: + +输入: + +- x:必选输入,4维Tensor,数据类型float16, bfloat16, float32 +- cos: 必选输入,4维Tensor,数据类型float16, bfloat16, float32 +- sin: 必选输入,4维Tensor,数据类型float16, bfloat16, float32 + +输出: + +- y:必选输出,数据类型float16, bfloat16, float32 + +属性: + +- mode:可选属性,数据类型int64_t,用于选择计算模式,0: rotate_half(GPT-NeoX style); 1: rotate_interleaved(GPT-J style)。缺省为0。 + + +## 反向接口: + +输入: + +- dy:必选输入,4维Tensor,数据类型float16, bfloat16, float32 +- cos:必选输入,4维Tensor,数据类型float16, bfloat16, float32 +- sin:必选输入,4维Tensor,数据类型float16, bfloat16, float32 +- x: 可选输入,4维Tensor,数据类型float16, bfloat16, float32 + +输出: + +- dx:必选输出,4维Tensor,数据类型float16, bfloat16, float32 +- dcos:可选输出,4维Tensor,数据类型float16, bfloat16, float32 +- dsin:可选输出,4维Tensor,数据类型float16, bfloat16, float32 + +属性: + +- mode:可选属性,数据类型int64_t,用于选择计算模式,0: rotate_half(GPT-NeoX style); 1: rotate_interleaved(GPT-J style)。缺省为0。 + +## 输入约束: + +| 输入 | RotateHalf(mode: 0) | RotateInterleaved(mode: 1) | +| :-: | :- | :- | +| x | layout支持: BNSD、BSND、SBND;
D < 896,且为2的倍数;
B, N < 1000;
当需要计算cos/sin的反向梯度时,B*N <= 1024 | layout支持: BNSD、BSND、SBND;
B * N < 1000;
D < 896, 且D为2的倍数; | +| cos | 数据范围:[-1, 1];
对应x layout的支持情况:
x为BNSD: 11SD、B1SD、BNSD;
x为BSND: 1S1D、BS1D、BSND;
x为SBND: S11D、SB1D、SBND. | 数据范围:[-1, 1];
对应x layout的支持情况:
x为BNSD: 11SD;
x为BSND: 1S1D;
x为SBND: S11D.| +| sin | 同cos | 同cos | + +**备注**: +1. 输入不支持为None; +2. cos, sin如需计算反向梯度需要同时设置`requires_grad = True`, 若仅设置一个则均不计算; +3. RotaryHalf(mode=0)模式下,当输入layout是BNSD,且D为非32Bytes对齐时,建议不使用该融合算子(模型启动脚本中不开启`--use-fused-rotary-pos-emb`选项),否则可能出现性能下降。 + + +## 案例 + +```python +import os +import torch +import torch_npu +import numpy as np +import math +from mindspeed.ops.npu_rotary_position_embedding import npu_rotary_position_embedding + +x = np.random.uniform(-2, 2, (4, 8192, 4, 128)) +cos = np.random.uniform(-1, 1, (1, 8192, 1, 128)) +sin = np.random.uniform(-1, 1, (1, 8192, 1, 128)) + +x_npu = torch.from_numpy(x).float().npu() +cos_npu = torch.from_numpy(cos).float().npu() +sin_npu = torch.from_numpy(sin).float().npu() + +x_npu.requires_grad = True +cos_npu.requires_grad = True +sin_npu.requires_grad = True +# 正向接口案例 +result = npu_rotary_position_embedding(x_npu, cos_npu, sin_npu, 0) + +# 反向接口案例 +result.backward(torch.ones_like(result).npu()) +x_npu.grad +cos_npu.grad +sin_npu.grad +``` \ No newline at end of file diff --git a/model/train/yoco_moe/docs/ops/quant_gmm.md b/model/train/yoco_moe/docs/ops/quant_gmm.md new file mode 100644 index 000000000..4e545eb8a --- /dev/null +++ b/model/train/yoco_moe/docs/ops/quant_gmm.md @@ -0,0 +1,95 @@ +# quant_gmm对外接口 + +npu_quant_gmm(x, weight, scale, *, offset=None, per_token_scale=None, bias=None, group_list=None, +output_dtype=None, act_type=0) + +npu_quant_gmm_v2(x, weight, scale, *, offset=None, per_token_scale=None, bias=None, group_list=None, +output_dtype=None, act_type=0) + +[npu_quant_gmm_v2]相较于[npu_quant_gmm]接口, group_list的含义不同, 在npu_quant_gmm接口中group_list中数值为分组轴大小的cumsum结果(累积和),npu_quant_gmm_v2接口中group_list中数值为分组轴上每组大小。两个接口的算子性能无差异,使用时可以根据整网中group_list的情况决定,如果前序算子输出的group_list为各group的大小,建议使用npu_quant_gmm_v2接口,因为此时使用npu_quant_gmm接口需要先调用torch.cumsum将group_list转为累积和的形式,带来额外开销。 + +## 前向接口: + +输入: + +- x:必选输入,参数为tensor,数据类型int8; +- weight:必选输入,参数为tensor,数据类型int8; +- scale:必选输入,参数类型为tensor,数据类型int64,bfloat16,float32; +- offset:保留参数,当前未使能; +- per_token_scale:可选参数,参数类型为tensor,数据类型float32,默认值为none; +- bias:可选输入,参数类型为tensor,数据类型int32, 默认值为none; +- group_list:可选输入,参数类型为tensor,数据类型int64,默认值为none。不同接口中的数值定义不同,具体见上述接口说明中描述; +- output_dtype:可选输入,参数类型为torch.dtype,可选值为:torch.int8,torch.bfloat16,torch.float16,用于指定输出数据类型,默认值为None,此时输出类型为torch.float16; +- act_type:可选参数,参数类型为int,用于指定激活函数类型,默认值为0,支持的激活函数类型如下: + - 0:无激活函数; + - 1:relu; + - 2:gelu_tanh; + - 3:gelu_err_func(暂不支持); + - 4:fast_gelu; + - 5:silu。 + +输出: + +- y:必选输出,数据类型int8, float16, bfloat16。 + +约束与限制: + +- npu_quant_gmm接口中,group_list必须为非负单调非递减数列,且长度不能为1; +- npu_quant_gmm_v2接口中,group_list必须为非负数列,长度不能为1,且数据类型仅支持tensor; +- x和weight中每一组tensor的最后一维大小都应小于65536.$x_i$的最后一维指当属性transpose_x为false时$x_i$的K轴或当transpose_x为true时$x_i$的M轴。$weight_i$的最后一维指当属性transpose_weight为false时$weight_i$的N轴或当transpose_weight为true时$weight_i$的K轴; +- x和weight中每一组tensor的每一维大小在32字节对齐后都应小于int32的最大值2147483647; +- 当需要输出y数据类型为int8时,指定output_dtype为torch.int8,scale类型为int64,per_token_scale为空,此时只支持act_type=0,即无激活函数;该场景当前仅支持单算子模式,图模式不支持; +- 当需要输出y数据类型为bfloat16时,output_dtype为torch.bfloat16,scale类型为bfloat16; +- 当需要输出y数据类型为float16时,output_dtype为torch.float16或者默认参数None,scale类型为float32。 + +## gmm 类的调用方式 + +```python +import os +import torch +import torch_npu +import numpy as np +import math +from mindspeed.ops import quant_gmm + +num_expert, seq_len, hidden_dim, out_channel = 8, 32, 256, 128 +group_list = torch.tensor([1, 3, 6, 10, 15, 21, 28, 32], dtype=torch.int64).npu() + +x = torch.randint(-128, 128, (seq_len, hidden_dim), dtype=torch.int8).npu() +weight = torch.randint(-128, 128, (num_expert, hidden_dim, out_channel), dtype=torch.int8).npu() +scale = torch.rand(num_expert, out_channel, dtype=torch.float32).npu() +per_token_scale = torch.rand(seq_len, dtype=torch.float32).npu() + +result = quant_gmm.npu_quant_gmm(x, weight, scale, per_token_scale=per_token_scale, + bias=None, group_list=group_list, output_dtype=torch.float16) + +# weight转置案例 +weight_trans = torch.randint(-128, 128, (num_expert, out_channel, hidden_dim), dtype=torch.int8).npu() +result = quant_gmm.npu_quant_gmm(x, weight_trans.transpose(-1,-2), scale, per_token_scale=per_token_scale, + bias=None, group_list=group_list, output_dtype=torch.float16) +``` + +```python +import os +import torch +import torch_npu +import numpy as np +import math +from mindspeed.ops import quant_gmm + +num_expert, seq_len, hidden_dim, out_channel = 8, 32, 256, 128 +group_list = torch.tensor([1, 3, 3, 4, 5, 6, 7, 4], dtype=torch.int64).npu() + +x = torch.randint(-128, 128, (seq_len, hidden_dim), dtype=torch.int8).npu() +weight = torch.randint(-128, 128, (num_expert, hidden_dim, out_channel), dtype=torch.int8).npu() +scale = torch.rand(num_expert, out_channel, dtype=torch.float32).npu() +per_token_scale = torch.rand(seq_len, dtype=torch.float32).npu() + +result = quant_gmm.npu_quant_gmm_v2(x, weight, scale, per_token_scale=per_token_scale, + bias=None, group_list=group_list, output_dtype=torch.float16) + +# weight转置案例 +weight_trans = torch.randint(-128, 128, (num_expert, out_channel, hidden_dim), dtype=torch.int8).npu() +result = quant_gmm.npu_quant_gmm_v2(x, weight_trans.transpose(-1,-2), scale, per_token_scale=per_token_scale, + bias=None, group_list=group_list, output_dtype=torch.float16) +``` diff --git a/model/train/yoco_moe/docs/ops/rms_norm.md b/model/train/yoco_moe/docs/ops/rms_norm.md new file mode 100644 index 000000000..15598e74a --- /dev/null +++ b/model/train/yoco_moe/docs/ops/rms_norm.md @@ -0,0 +1,43 @@ +# rms_norm对外接口 +## 前向接口: +输入: +- x:必选输入,数据类型float16, bfloat16, float32 +- gamma:必选输入,数据类型float16, bfloat16, float32 + +输出: +- rstd:中间变量,数据类型float32 +- out:必选输出,数据类型float16, bfloat16, float32 + +属性: +- epsilon:可选属性,数据类型float,缺省1e-6。 + +## 反向接口: +输入: +- dy:必选输入,数据类型float16, bfloat16, float32 +- x:必选输入,数据类型float16, bfloat16, float32 +- rstd:必选输入,数据类型float32 +- gamma:必选输入,数据类型float16, bfloat16, float32 + +输出: +- dx:必选输出,数据类型float16, bfloat16, float32 +- dgammma:必选输出,数据类型float32 + +## 案例 + +```python +import os +import torch +import torch_npu +import numpy as np +import math +from mindspeed.op_builder import RmsNormOpBuilder + +x = torch.rand([2, 4]).to(torch.float).npu() +gamma = torch.rand([4]).to(torch.float).npu() +# 正向接口案例 +mindspeed_ops = RmsNormOpBuilder().load() +out = mindspeed_ops.rms_norm(x, gamma) +# 反向接口案例 +out.backward(torch.ones(out.shape).npu()) + +``` diff --git a/model/train/yoco_moe/docs/ops/swiglu.md b/model/train/yoco_moe/docs/ops/swiglu.md new file mode 100644 index 000000000..a2dac8061 --- /dev/null +++ b/model/train/yoco_moe/docs/ops/swiglu.md @@ -0,0 +1,42 @@ +# swiglu对外接口 +## 前向接口: +输入: +- x:必选输入,数据类型float16, bfloat16, float32 + +输出: +- y:必选输出,数据类型float16, bfloat16, float32 + +属性: +- dim:可选属性,数据类型int32_t,缺省-1。 + +## 反向接口: +输入: +- dy:必选输入,数据类型float16, bfloat16, float32 +- x:必选输入,数据类型float16, bfloat16, float32 + +输出: +- dx:必选输出,数据类型float16, bfloat16, float32 + +属性: +- dim:可选属性,数据类型int32_t,缺省-1。 + +## 案例 +```python + import os + import torch + import torch_npu + import numpy as np + import math + from mindspeed.op_builder import SwigluOpBuilder + + x = np.random.uniform(-2, 2, (8192,1,3904)) + x = torch.from_numpy(x).float().npu() + y_grad = np.random.uniform(-2, 2, (8192,1,1952)) + y_grad = torch.from_numpy(y_grad).float().npu() + + x.requires_grad = True + # 正向接口案例 + mindspeed_ops = SwigluOpBuilder().load() + result = mindspeed_ops.swiglu(x, dim=-1) + # 反向接口案例 + result.backward(y_grad) \ No newline at end of file diff --git a/model/train/yoco_moe/docs/ops/weight_quant_gmm.md b/model/train/yoco_moe/docs/ops/weight_quant_gmm.md new file mode 100644 index 000000000..767cbba17 --- /dev/null +++ b/model/train/yoco_moe/docs/ops/weight_quant_gmm.md @@ -0,0 +1,86 @@ +# weight_quant_gmm对外接口 + +npu_weight_quant_gmm(x, weight, antiquant_scale, *, antiquant_offset=None, bias=None, group_list=None, act_type=0) + +npu_weight_quant_gmm_v2(x, weight, antiquant_scale, *, antiquant_offset=None, bias=None, group_list=None, act_type=0) + +[npu_weight_quant_gmm_v2]相较于[npu_weight_quant_gmm]接口,group_list的含义不同,在npu_weight_quant_gmm接口中group_list中数值为分组轴大小的cumsum结果(累积和),npu_weight_quant_gmm_v2接口中group_list中数值为分组轴上每组大小。两个接口的算子性能无差异,使用时可以根据整网中group_list的情况决定,如果前序算子输出的group_list为各group的大小,建议使用npu_weight_quant_gmm_v2接口,因为此时使用npu_weight_quant_gmm接口需要先调用torch.cumsum将group_list转为累积和的形式,带来额外开销。 + +## 前向接口: + +输入: + +- x:必选输入,参数为tensor,数据类型float16,bfloat16; +- weight:必选输入,参数为tensor,数据类型int8; +- antiquant_scale:必选输入,参数类型为tensor,数据类型float16,bfloat16; +- antiquant_offset:可选参数,参数类型为tensor,数据类型float16,bfloat16,默认值为none,当前不支持传none; +- bias:可选输入,参数类型为tensor,数据类型float16,float32,默认值为none; +- group_list:可选输入,参数类型为tensor,数据类型int64,默认值为none。不同接口中的数值定义不同,具体见上述接口说明中描述; +- act_type:可选参数,参数类型为int,用于指定激活函数类型,默认值为0,表示无激活函数,当前只支持默认值0; + +输出: + +- y:必选输出,数据类型float16,bfloat16。 + +约束与限制: + +- npu_weight_quant_gmm接口中,group_list必须为非负单调非递减数列,且长度不能为1; +- npu_weight_quant_gmm_v2接口中,group_list必须为非负数列,长度不能为1,且数据类型仅支持tensor; +- x和weight中每一组tensor的最后一维大小都应小于65536.$x_i$的最后一维指当属性transpose_x为false时$x_i$的K轴或当transpose_x为true时$x_i$的M轴。$weight_i$的最后一维指当属性transpose_weight为false时$weight_i$的N轴或当transpose_weight为true时$weight_i$的K轴; +- x和weight中每一组tensor的每一维大小在32字节对齐后都应小于int32的最大值2147483647; +- x,antiquant_scale,antiquant_offset,y的数据类型因保持一致 +- 当需要输出y数据类型为bfloat16时,bias类型为float32; +- 当需要输出y数据类型为float16时,bias类型为float16。 +- 暂不支持计算flops。 + +## gmm 类的调用方式 + +```python +import os +import torch +import torch_npu +import numpy as np +import math +from mindspeed.ops import weight_quant_gmm + +num_expert, seq_len, hidden_dim, out_channel = 8, 32, 256, 128 +group_list = torch.tensor([1, 3, 6, 10, 15, 21, 28, 32], dtype=torch.int64).npu() + +x = torch.rand(seq_len, hidden_dim, dtype=torch.float16).npu() +weight = torch.randint(-128, 128, (num_expert, hidden_dim, out_channel), dtype=torch.int8).npu() +antiquant_scale = torch.rand(num_expert, out_channel, dtype=torch.float16).npu() +antiquant_offset = torch.rand(num_expert, out_channel, dtype=torch.float16).npu() + +result = weight_quant_gmm.npu_weight_quant_gmm(x, weight, antiquant_scale, antiquant_offset=antiquant_offset, + bias=None, group_list=group_list) + +# weight转置案例 +weight_trans = torch.randint(-128, 128, (num_expert, out_channel, hidden_dim), dtype=torch.int8).npu() +result = weight_quant_gmm.npu_weight_quant_gmm(x, weight_trans.transpose(-1,-2), antiquant_scale, + antiquant_offset=antiquant_offset, bias=None, group_list=group_list) +``` + +```python +import os +import torch +import torch_npu +import numpy as np +import math +from mindspeed.ops import weight_quant_gmm + +num_expert, seq_len, hidden_dim, out_channel = 8, 32, 256, 128 +group_list = torch.tensor([1, 3, 3, 4, 5, 6, 7, 4], dtype=torch.int64).npu() + +x = torch.rand(seq_len, hidden_dim, dtype=torch.float16).npu() +weight = torch.randint(-128, 128, (num_expert, hidden_dim, out_channel), dtype=torch.int8).npu() +antiquant_scale = torch.rand(num_expert, out_channel, dtype=torch.float16).npu() +antiquant_offset = torch.rand(num_expert, out_channel, dtype=torch.float16).npu() + +result = weight_quant_gmm.npu_weight_quant_gmm_v2(x, weight, antiquant_scale, antiquant_offset=antiquant_offset, + bias=None, group_list=group_list) + +# weight转置案例 +weight_trans = torch.randint(-128, 128, (num_expert, out_channel, hidden_dim), dtype=torch.int8).npu() +result = weight_quant_gmm.npu_weight_quant_gmm_v2(x, weight_trans.transpose(-1,-2), antiquant_scale, + antiquant_offset=antiquant_offset, bias=None, group_list=group_list) +``` diff --git a/model/train/yoco_moe/mindspeed/__init__.py b/model/train/yoco_moe/mindspeed/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/arguments.py b/model/train/yoco_moe/mindspeed/arguments.py new file mode 100644 index 000000000..576db4152 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/arguments.py @@ -0,0 +1,1117 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +from dataclasses import make_dataclass, field +from functools import wraps +import argparse +import warnings +import torch +from mindspeed.features_manager import FEATURES_LIST + + +def extra_args_provider_decorator(extra_args_provider): + @wraps(extra_args_provider) + def wrapper(parser): + if extra_args_provider is not None: + parser = extra_args_provider(parser) + parser = process_args(parser) + return parser + + return wrapper + + +def parse_args_wrapper(parse_args): + @wraps(parse_args) + def wrapper(extra_args_provider=None, ignore_unknown_args=False): + decorated_provider = extra_args_provider_decorator(extra_args_provider) + return parse_args(decorated_provider, ignore_unknown_args) + + return wrapper + + +def process_args(parser): + parser.conflict_handler = 'resolve' + parser = _add_network_size_args(parser) + parser = _add_distributed_args(parser) + parser = _add_training_args(parser) + parser = _add_data_args(parser) + parser = _add_moe_args(parser) + parser = _add_cp_args(parser) + parser = _add_network_args(parser) + parser = _add_algorithm_args(parser) + parser = _add_automated_pipeline_args(parser) + parser = _add_alibi_args(parser) + parser = _add_ndmm_args(parser) + parser = _add_2d_tp_args(parser) + parser = _add_coc_args(parser) + parser = _add_profile_args(parser) + parser = _add_auto_parallel_args(parser) + parser = _add_deepseek_args(parser) + parser = _auto_tuning_args(parser) + parser = _add_auto_parallel_mm_args(parser) + parser = _add_hccl_group_buffer_args(parser) + parser = _add_layerzero_args(parser) + parser = _add_dist_train_args(parser) + + for feature in FEATURES_LIST: + feature.register_args(parser) + + return parser + + +def _add_deepseek_args(parser): + group = parser.add_argument_group(title='deepseek') + # deepseek moe arguments + group.add_argument('--n-shared-experts', type=int, default=None) + # mla arguments + group.add_argument('--multi-head-latent-attention', action='store_true', default=False, + help='Use Multi-head Latent Attention(MLA)') + group.add_argument('--q-lora-rank', type=int, default=None, help='The low rank of q') + group.add_argument('--kv-lora-rank', type=int, default=None, help='The low rank of k and v') + group.add_argument('--v-head-dim', type=int, default=None, help='The head dim of v') + group.add_argument('--qk-rope-head-dim', type=int, default=None, help='The qk head dim for rope') + group.add_argument('--qk-nope-head-dim', type=int, default=None, help='The qk head dim for only self-attn') + # yarn arguments + group.add_argument('--rope-scaling-type', type=str, default=None, choices=['yarn', ], + help='Set the rope scaling type, only support "yarn" type now') + group.add_argument('--rope-scaling-beta-fast', type=int, default=32, help='Yarn rope: rope beta fast') + group.add_argument('--rope-scaling-beta-slow', type=int, default=1, help='Yarn rope: rope beta slow') + group.add_argument('--rope-scaling-factor', type=float, default=1.0, help='Yarn rope: rope factor') + group.add_argument('--rope-scaling-mscale', type=float, default=1.0, help='Yarn rope: rope mscale') + group.add_argument('--rope-scaling-mscale-all-dim', type=float, default=0.0, help='Yarn rope: rope mscale all dim') + group.add_argument('--rope-scaling-original-max-position-embeddings', type=int, default=None, + help='Yarn rope: rope original max position embeddings') + group.add_argument('--moe-hierarchical-alltoallv', action='store_true', + help='Reduce communication cost between nodes') + + return parser + + +def _auto_tuning_args(parser): + group = parser.add_argument_group(title='auto_tuning') + + group.add_argument('--auto-tuning', action='store_true', help='enable auto tuning') + group.add_argument('--auto-tuning-work-dir', type=str, default='./auto_tuning_dir', + help="auto tuning working path.") + group.add_argument('--auto-tuning-ranks', type=int, default=16, help='the global size of auto tuning') + group.add_argument('--auto-tuning-log-level', type=str, default='info', choices=['debug', 'info', 'warning'], + help='auto tuning log level, could be debug, info or warning') + + return parser + + +def _add_profile_args(parser): + group = parser.add_argument_group(title='profile') + group.add_argument("--profile-level", type=str, default='level0', + choices=['level0', 'level1', 'level2'], + help="Profile level default level0.") + group.add_argument("--profile-with-cpu", action='store_true', default=False, + help="Profile with cpu info.") + group.add_argument("--profile-with-stack", action='store_true', default=False, + help="Profile without stack info.") + group.add_argument("--profile-with-memory", action='store_true', default=False, + help="Profile without memory info.") + group.add_argument("--profile-record-shapes", action='store_true', default=False, + help="Profile record shape info.") + group.add_argument("--profile-save-path", type=str, default='./profile_dir', + help="Profile save path.") + group.add_argument('--profile-ranks', nargs='+', type=int, default=[-1], + help='Global ranks to profile.The default value of -1 means to profile all ranks') + return parser + + +def _add_coc_args(parser): + group = parser.add_argument_group(title='coc') + # ascend mc2 arguments + group.add_argument("--use-ascend-mc2", action='store_true', + help="Use ascend mc2") + # ascend coc arguments + group.add_argument("--use-ascend-coc", action='store_true', + help="Use ascend coc") + group.add_argument('--coc-mode', type=int, default=-1, + help='coc-mode: 0=original, 1=rewrite, 2=coc default') + group.add_argument('--coc-parallel-num', type=int, default=1, + help='coc parallel num') + group.add_argument('--coc-fused-kernel', action='store_true', + help='use coc fused kernel') + return parser + + +def _add_moe_args(parser): + group = parser.add_argument_group(title='moe') + # deepspeed moe arguments + group.add_argument('--moe-model-type', type=str, default='megatron_moe', + choices=['deepspeed_moe', 'megatron_moe'], help='moe model type default megatron moe') + group.add_argument('--expert-interval', type=int, default=1, + help='Use experts in every "expert-interval" layers') + group.add_argument('--moe-train-capacity-factor', type=float, default=1.0, + help='The capacity of the MoE expert at training time') + group.add_argument('--noisy-gate-policy', type=str, default=None, choices=['Jitter', 'RSample', 'None'], + help="noisy gate policy, valid options are 'Jitter', 'RSample' or 'None'.") + group.add_argument('--enable-token-rearrange-opt', action='store_true', + help="Use this flag to enable token rearrange optimize") + group.add_argument('--no-use-rts', + action='store_false', default=False, + help='whether to use Random Token Selection.', + dest='use_rts') + group.add_argument("--moe-no-drop", action='store_true', + help="Use no drop policy in moe layer, no tokens will be discarded.") + group.add_argument("--moe-dynamic-padding", action='store_true', + help="Reducing AllReduce communication under the no drop policy through the sliding window mechanism.") + group.add_argument("--moe-use-sinkhorn", action='store_true', + help="Use sinkhorn load balancing in the gate.") + + # megatron mcore moe arguments + group.add_argument("--moe-tp-extend-ep", action='store_true', + help="use tp group to extend experts parallelism" + "instead of sharding weight tensor of experts in tp group") + group.add_argument("--moe-permutation-async-comm", action='store_true', + help="overlap moe permutation 3 all gather communications") + group.add_argument("--moe-adaptive-recompute-activation", action='store_true', + help="MoE adaptive recompute, avoiding memory imbalance in the early stage.") + group.add_argument('--moe-adaptive-recompute-activation-scale', type=float, default=2.0, + help='MoE adaptive recompute threshold factor.') + group.add_argument("--use-fused-moe-token-permute-and-unpermute", action='store_true', + help="Use fused moe permute and unpermute.") + group.add_argument("--gemm-gradient-accumulation-fusion", action='store_true', + help="Use gradient-accumulation-fusion in gemm.") + # moe optimization arguments + group.add_argument('--moe-alltoall-overlap-comm', action='store_true', default=False, + help='moe_alltoall_overlap_comm') + group.add_argument('--moe-allgather-overlap-comm', action='store_true', default=False, + help='moe_allgather_overlap_comm') + group.add_argument('--moe-experts-pipeline-degree', type=int, default=0, + help='Group experts into pipeline stages to overlap computation and communication.') + group.add_argument("--moe-zero-memory", type=str, default='disable', + choices=['disable', 'level0', 'level1'], + help="Save activation memory in moe layer.") + group.add_argument('--moe-zero-memory-num-layers', type=int, default=None, + help='the number of layers using moe-zero-memory level1' + 'in each pp stage.') + group.add_argument('--moe-bmm-mc2', action='store_true', default=False, + help='moe_bmm_mc2') + return parser + + +def _add_cp_args(parser): + group = parser.add_argument_group(title='cp parallel') + group.add_argument('--context-parallel-algo', type=str, default='ulysses_cp_algo', + choices=['ulysses_cp_algo', 'megatron_cp_algo', 'hybrid_cp_algo', 'adaptive_cp_algo', + 'hybrid_adaptive_cp_algo'], + help='context parallel algorithm') + group.add_argument('--ulysses-degree-in-cp', type=int, default=None) + group.add_argument('--cp-window-size', type=int, default=1) + group.add_argument('--attention-mask-type', type=str, default='causal', + choices=['causal', 'general'], help='context parallel attention mask type') + group.add_argument('--use-cp-send-recv-overlap', action='store_true', + help='use this flag to enable cp send-recv-overlap.') + group.add_argument("--use-fused-ring-attention-update", action='store_true', + help="Use fused ring attention update.") + group.add_argument("--megatron-cp-in-bnsd", action='store_true', + help="Megatron CP in bnsd.") + group.add_argument('--attention-mask-on-cpu', action='store_true', + help='store full attention mask on CPU instead of NPU') + group.add_argument('--adaptive-cp-without-coarse', action='store_true', + help='does not coarse the attention mask in adaptive_cp feature, only recommended when full' + 'sequence length is less than 8K and dynamic attention mask is not feasible') + group.add_argument('--adaptive-cp-dynamic-attn-mask', action='store_true', + help='if the attention mask is dynamic across batches') + group.add_argument('--adaptive-cp-only-reschedule', action='store_true', + help='not apply remapping but only rescheduling process in adaptive-cp feature') + group.add_argument('--adaptive-cp-manually-set-mask-list', action='store_true', + help='manually set pre-cooked attention mask list') + group.add_argument('--context-parallel-kv-cache-policy', type=str, default=None, + choices=['full', 'half'], + help='Selectivity cache K, V in process of cp.' + 'Default is None, means not used cache K, V.' + 'If para is full, cache all K, V.' + 'If para is half, cache only K') + group.add_argument('--context-parallel-cache-interval', type=int, default=0, + help='Set the interval of cache layers in cp.' + 'Default is 0, means cache K, V in all layers.') + group.add_argument('--use-ulysses-allgather-kv', action='store_true', + help='use this flag to enable allgather kv + repeat all2all q in ulysses cp.') + return parser + + +def _add_network_size_args(parser): + group = parser.add_argument_group(title='network size') + group.add_argument("--use-fused-rmsnorm", action='store_true', + help="Use fused rmsnorm.") + group.add_argument("--use-fused-swiglu", action='store_true', + help="Use fused swiglu.") + group.add_argument("--use-fused-rotary-pos-emb", action='store_true', + help="Use fused rotary-pos-emb.") + return parser + + +def _add_data_args(parser): + group = parser.add_argument_group(title='data and dataloader') + group.add_argument('--tokenizer-type', type=str, + default=None, + choices=['BertWordPieceLowerCase', + 'BertWordPieceCase', + 'GPT2BPETokenizer', + 'SentencePieceTokenizer', + 'GPTSentencePieceTokenizer', + 'Llama2Tokenizer', + 'PretrainedFromHF', + 'NullTokenizer'], + help='What type of tokenizer to use.') + group.add_argument("--tokenizer-name-or-path", type=str, default=None, + help="Name or path of the huggingface tokenizer.") + group.add_argument("--tokenizer-not-use-fast", action='store_false', + help="HuggingFace tokenizer not use the fast version.") + return parser + + +def _add_distributed_args(parser): + group = parser.add_argument_group(title='distributed') + + group.add_argument('--local-rank', type=int, default=None, + help='Local rank passed from distributed launcher for torch2.x.') + group.add_argument('--param-and-grad-buffer-pad', type=int, default=None, + help='Use this argument to ensure that all buckets start at a memory address that is needed-byte. Set 512 for Ascend') + group.add_argument('--use-nanopipe', action='store_true', + default=False, help='use nano pipeline parallelism for reduce bubble.') + group.add_argument('--use-nanopipe-swap', action='store_true', + default=False, help='use nano pipeline parallelism with swap for reduce bubble.') + group.add_argument('--use-pipe-experts', action='store_true', + help='Use this flag to enable pipe moe, overlap all2all and expert') + group.add_argument('--disable-gloo-group', action='store_true', + help='Replace the communication method of the DP group in the distributed optimizer from gloo to hccl.') + group.add_argument('--hccl-slice-size', type=int, default=10 * 1024 * 1024, + help='data slice size on each dp rank in distributed optimizer') + group.add_argument('--variable-seq-lengths', action='store_true', + help='Supports variable sequence lengths across batches/microbatches. Set this if the data ' + 'loader supports variable sequence length generation across batches/microbatches. Because ' + 'of the additional communication overhead incurred during pipeline parallelism, it should ' + 'not be set if the sequence length is constant during training. if sequence length is ' + 'constant during training.') + return parser + + +def _add_training_args(parser): + + group = parser.add_argument_group(title='training') + + group.add_argument('--pre-tockens', type=int, default=65536, + help='pre-tockens is used by Flash attention') + group.add_argument('--next-tockens', type=int, default=0, + help='next-tockens is used by Flash attention') + group.add_argument('--shape-order', type=str, default='SBH', + choices=['SBH', 'BSH', 'BSND'], + help='input shape order used by Flash attention') + group.add_argument('--sparse-mode', type=int, default=0, + help='To improve performance in different modes of attention mask') + group.add_argument('--adaptive-recompute-device-size', + type=int, default=-1, + help='The memory size for adaptive selective recompute strategy. ' + 'The default is -1. If this parameter > 0, ' + 'will activate adaptive selective recompute. ') + group.add_argument('--adaptive-recompute-profiling-step', + type=int, default=10, + help='The profiling step for adaptive selective recompute strategy. ' + 'The default is 10. If activate adaptive selective recompute, ' + 'will solve graph after step 10. ') + group.add_argument('--adaptive-recompute-device-swap', + action='store_true', default=False, + help='switch to open adaptive recompute feature. ' + 'The default is False.') + group.add_argument('--enable-recompute-layers-per-pp-rank', + action='store_true', default=False, + help='If enabled, --recompute-num-layers will mean the number of ' + 'layers recomputed in each pp rank. Otherwise it means the number ' + 'of layers recomputed in each vpp rank.') + group.add_argument('--recompute-activation-function', action='store_true', + help='Recompute the activation function in MLP layers.') + group.add_argument('--recompute-activation-function-num-layers', type=int, default=None, + help='Can be used together with "--recompute-method block." ' + 'and "--recompute-num-layers". ') + group.add_argument('--recompute-norm', action='store_true', + help='Recompute norm in Transformer Layers') + group.add_argument('--recompute-norm-num-layers', type=int, default=None, + help='Recompute norm num layers, can be used together with activation function recompute. ') + group.add_argument('--recompute-in-bubble', action='store_true', + help='use bubble to do recompute to reduce memory') + group.add_argument('--recompute-in-advance', action='store_true', + help='recompute early to reduce bubble and improve training.') + group.add_argument('--jit-compile', action='store_true', default=False, + help='Setting jit compile mode to True') + group.add_argument('--swap-attention', action='store_true', default=False, + help='switch to open swap-attention feature.' + 'The default is False.') + group.add_argument('--swap-modules', type=str, default="input_norm,self_attention,post_attention_norm", + help='Swap modules for model. Can be used together with "--swap-attention."') + group.add_argument('--adaptive-memory-optimization', action='store_true', default=False, + help='Switch to open adaptive memory optimization feature, default is False.') + group.add_argument('--use-fusion-attn-v2', action='store_true', default=False, + help='use fusion_attention ops version 2') + group.add_argument('--pipe-experts-multi-data', type=int, default=1, + help='Use multi data to split the input tensor to implement masking when --use-pipe-experts. ' + 'The default is 1.') + group.add_argument('--pipe-experts-multi-stream', action='store_true', default=False, + help='Use multi stream to avoid link collision in collective communication when --use-pipe-experts. ' + 'The default is False.') + group.add_argument("--additional-config", help="additional model config file path") + group.add_argument('--use-ema', action='store_true', default=False, + help='use ema when training') + group.add_argument('--use-multiparameter-pipeline-model-parallel', action='store_true', default=False, + help='can transfer multi parameters from stage to stage in pipeline model parallel') + group.add_argument('--ampipe-degree', type=int, default=1, + help='Set Attention MoE pipe(AMPipe) degree, 1 means not enable ' + 'AMPipe, greater than 1 means enable this feature.') + group.add_argument('--ampipe-tp-sp-comm-overlap', action='store_true', default=False, + help='enable computation and tp or sp communication overlap in ampipe') + group.add_argument('--op-cal-tflops', action='store_true', default=False, + help='use for cal mfu and hfu') + group.add_argument('--npu-deterministic', action='store_true', default=False, + help='enable deterministic computing for npu') + group.add_argument('--optimizer-selection', type=str, default='fused_adamw', + choices=['fused_adamw', 'fused_torch_adamw', 'fused_ema_adamw'], + help='Select from the former fused AdamW optimizer and Torch fused AdamW optimizer') + group.add_argument('--ema-decay', type=float, default=0.9999, + help='Set ema_decay of fused_ema_adamw optimizer.') + return parser + + +def _add_network_args(parser): + group = parser.add_argument_group(title='network') + + group.add_argument("--add-qkv-bias", action="store_true", default=False, + help='Configuration for the qkv bias.') + group.add_argument("--add-dense-bias", action="store_true", default=False, + help='Configuration for the dense bias.') + group.add_argument("--skip-bias-add", action="store_false", default=True, + help='Configuration for the skip bias.') + group.add_argument("--noop-layers", type=str, + help='Specity the noop layers.') + return parser + + +def _add_automated_pipeline_args(parser): + group = parser.add_argument_group(title='automated_pipeline_allocation') + group.add_argument('--automated-pipeline', + action='store_true', + help='To enable automated pipeline memory saving process' + ) + group.add_argument('--automated-pipeline-perf', + action='store_true', + help='To enable automated pipeline performance acceleration process' + ) + group.add_argument('--save-memory-ratio', + type=float, default=0.20, + help='To set memory saving rate in automated pipeline' + ) + group.add_argument('--num-layer-list', + type=str, help='To store the layer policy of automated pipeline' + ) + group.add_argument('--recompute-module-list', + type=str, help='To store the recompute policy of automated pipeline' + ) + group.add_argument('--recompute-type', + type=int, default=2, + help='To store the recompute type of automated pipeline, 0 for mlp block ' + '1 for attention block and 2 for transformer layer' + ) + group.add_argument('--optimized-mbs-list', + type=str, + help='To store the optimized mbs policy of automated pipeline performance' + ) + group.add_argument('--mbs-idx', + type=int, + help='To store the index of mbs list' + ) + group.add_argument('--pp-schedule-list', + type=str, + help='To store the pipeline schedule policy of automated pipeline performance' + ) + group.add_argument('--optimized-mbs-mode', + action='store_false', + help='To store the status of optimized mbs in automated pipeline performance' + ) + group.add_argument('--memory-fragmentation', + action='store_true', default=False, + help='Enable the memory fragmentation feature.') + group.add_argument('--smart-swap', + action='store_true', default=False, help='Enable the smart swap feature.') + return parser + + +def _add_algorithm_args(parser): + group = parser.add_argument_group(title='training') + group.add_argument('--optimization-level', type=int, choices=[0, 1, 2], default=2, + help='0: The minimum patch set for megatron to adapt to NPU,' + '1: Affinity optimization (fusion operator, etc.), ' + '2: Advanced acceleration algorithm') + group.add_argument('--reuse-fp32-param', action='store_true', + help='The distributed training optimizer frees up ' + 'param copies of FP32 to save memory.') + + group.add_argument('--optimize-send-recv-comm', action='store_true', + help='optimize send_recv communication in pipeline without interleaving.') + group.add_argument('--optimize-vpp-send-recv-comm', action='store_true', + help='optimize send_recv communication in pipeline with interleaving.') + group.add_argument('--enable-zero3', action='store_true', default=False, + help='Use this flag to enable zero3, including the segmentation of the parameters, gradients, and optimizers of the row-parallel and column-parallel models, as well as the overlap optimization of the gradient reduce sactter and weight all gather.') + return parser + + +def _add_layerzero_args(parser): + group = parser.add_argument_group(title='layerzero') + group.add_argument('--layerzero', action='store_true', default=False, + help='Use this flag to enable layerzero, including the segmentation of the parameters, gradients, and optimizers of the row-parallel and column-parallel models, as well as the overlap optimization of the gradient reduce sactter and weight all gather.') + group.add_argument('--layerzero-config', type=str, + help='Use this yaml file to config layerzero behaviours') + return parser + + +def _add_dist_train_args(parser): + group = parser.add_argument_group(title='dist_train') + group.add_argument('--dist-train', action='store_true', help='Enable dist-train feature.') + return parser + + +def core_transformer_config_from_args_wrapper(fn): + @wraps(fn) + def wrapper(args): + config = fn(args) + config.context_parallel_algo = args.context_parallel_algo + config.batch_p2p_comm = False + if args.use_multiparameter_pipeline_model_parallel: + config.deallocate_pipeline_outputs = False + return config + + return wrapper + + +def validate_args_wrapper(validate_args): + @wraps(validate_args) + def wrapper(args, defaults=None): + if args.dist_train: + if not hasattr(args, 'mm_model'): + raise ValueError('DistTrain must work with MindSpeed-MM') + from mindspeed.multi_modal.dist_train.config.dist_train_config import validate_configs_world_size, \ + get_dist_model_config, merge_dist_train_args + merge_dist_train_args(args.mm_model) + validate_configs_world_size(args) + cfg = get_dist_model_config(rank=args.rank) + args.world_size = cfg.world_size + args.tensor_model_parallel_size = cfg.tensor_model_parallel_size + args.pipeline_model_parallel_size = cfg.pipeline_model_parallel_size + args.context_parallel_size = cfg.context_parallel_size + seq_parallel_enabled = args.sequence_parallel + + if defaults is None: + defaults = {} + replace_model_type_for_deepspeed_moe = False + if args.num_experts: + if args.use_ascend_coc: + raise AssertionError('coc is not compatible with moe models') + if args.use_ascend_mc2: + raise AssertionError('mc2 is not compatible with moe models') + if args.use_legacy_models: + if args.moe_model_type == 'megatron_moe': + raise AssertionError('megatron_moe is not compatible with --use-legacy-models') + replace_model_type_for_deepspeed_moe = True + else: + if args.moe_model_type == 'deepspeed_moe': + raise AssertionError('deepspeed_moe only support with --use-legacy-models') + overlap_param_gather_without_mcore_models = False + if args.overlap_param_gather and args.use_legacy_models: + args.use_legacy_models = False + overlap_param_gather_without_mcore_models = True + + #validate optimizer + if args.optimizer_selection == 'fused_adamw': + print("[WARNING] The default AdamW optimizer is no longer recommended for new edition, Use the torch fused AdamW optimizer by argument --optimizer-selection fused_torch_adamw") + elif args.optimizer_selection == 'fused_ema_adamw': + if args.reuse_fp32_param: + raise AssertionError('fused_ema_adamw optimizer is not compatible with reuse_fp32_param') + + # validate mla + if args.multi_head_latent_attention: + if args.kv_lora_rank is None: + raise AssertionError('The parameter kv-lora-rank should be set when use multi_head_latent_attention.') + elif args.v_head_dim is None: + raise AssertionError('The parameter v-head-dim should be set when use multi_head_latent_attention.') + elif args.qk_rope_head_dim is None: + raise AssertionError( + 'The parameter qk-rope-head-dim should be set when use multi_head_latent_attention.') + elif args.qk_nope_head_dim is None: + raise AssertionError( + 'The parameter qk-nope-head-dim should be set when use multi_head_latent_attention.') + + # validate yarn + if args.rope_scaling_type == "yarn": + if args.rope_scaling_original_max_position_embeddings is None: + raise AssertionError('The parameter rope_scaling_original_max_position_embeddings should be set ' + 'when use yarn.') + + # alibi type [2, 3] is only support FA2 + if args.alibi_fusion_attn_type in [2, 3]: + args.use_fusion_attn_v2 = True + if args.use_fusion_attn_v2: + args.use_flash_attn = True + print("[WARNING] \"use_fusion_attn_v2\" is not recommended. This feature is not officially released.") + + # for vpp assert pp should > 2 + flag_num_layers_per_virtual_pipeline_stage = None + flag_overlap_p2p_comm = False + if args.num_layers_per_virtual_pipeline_stage is not None and args.pipeline_model_parallel_size == 2: + flag_num_layers_per_virtual_pipeline_stage = args.num_layers_per_virtual_pipeline_stage + args.num_layers_per_virtual_pipeline_stage = None + if args.overlap_p2p_comm: + flag_overlap_p2p_comm = True + + # skip validation for deepspeed_moe with CP + origin_use_legacy_models = args.use_legacy_models + if replace_model_type_for_deepspeed_moe: + args.use_legacy_models = False + origin_context_parallel_size = args.context_parallel_size + args.context_parallel_size = 1 + original_variable_seq_lengths = args.variable_seq_lengths + args = validate_args(args, defaults) + + args.variable_seq_lengths = original_variable_seq_lengths + args.context_parallel_size = origin_context_parallel_size + model_parallel_size = args.pipeline_model_parallel_size * args.tensor_model_parallel_size + args.data_parallel_size = args.world_size // (model_parallel_size * args.context_parallel_size) + + if args.dist_train: + from mindspeed.multi_modal.dist_train.config.dist_train_config import get_all_config + if args.tensor_model_parallel_size > 1 and seq_parallel_enabled: + args.sequence_parallel = True + if any(cfg.main_dp for cfg in get_all_config().values()): + from mindspeed.multi_modal.dist_train.inner_data_parallel.utils import get_global_data_parallel_size + args.data_parallel_size = get_global_data_parallel_size() + + if args.global_batch_size is None: + args.global_batch_size = args.micro_batch_size * args.data_parallel_size + if args.rank == 0: + print('Resetting global batch size to {}'.format( + args.global_batch_size), flush=True) + if args.optimize_vpp_send_recv_comm and args.num_layers_per_virtual_pipeline_stage is None: + raise AssertionError('--optimize-vpp-send-recv-comm can only be used with pipeline with interleaving.') + + if replace_model_type_for_deepspeed_moe: + args.use_legacy_models = origin_use_legacy_models + if args.enable_zero3: + print("[WARNING] zero3 currently does not support model save and load") + if args.use_ascend_mc2 or args.reuse_fp32_param or args.recompute_granularity is not None or args.use_pipe_experts: + raise AssertionError('zero3 cannot be used together with MC2(--use-ascend-mc2), ' + 'parameter copy reuse(--reuse-fp32-param),' + 'recompute(--recompute-granularity)' + 'and pipe_experts(use-pipe-experts)') + + # for vpp assert pp should > 2 + if flag_num_layers_per_virtual_pipeline_stage is not None and args.pipeline_model_parallel_size == 2: + args.num_layers_per_virtual_pipeline_stage = flag_num_layers_per_virtual_pipeline_stage + args.overlap_p2p_comm = flag_overlap_p2p_comm + if args.num_layers_per_virtual_pipeline_stage is not None: + assert args.num_layers % args.transformer_pipeline_model_parallel_size == 0, \ + 'number of layers should be divisible by the pipeline parallel size' + num_layers_per_pipeline_stage = args.num_layers // args.transformer_pipeline_model_parallel_size + assert num_layers_per_pipeline_stage % args.num_layers_per_virtual_pipeline_stage == 0, \ + 'number of layers per pipeline stage must be divisible number of layers per virtual pipeline stage' + args.virtual_pipeline_model_parallel_size = num_layers_per_pipeline_stage // \ + args.num_layers_per_virtual_pipeline_stage + + # num_layers_per_virtual_pipeline_stage should be meaningful + if args.num_layers_per_virtual_pipeline_stage is not None: + num_layers_per_pipeline_stage = args.num_layers // args.pipeline_model_parallel_size + assert num_layers_per_pipeline_stage // args.num_layers_per_virtual_pipeline_stage > 1, \ + 'considering args of num_layers and pipeline_model_parallel_size, vpp setting should be meaningful' + + # deepspeed dropless does not support pp + if args.moe_no_drop and args.pipeline_model_parallel_size > 1: + raise AssertionError("--moe-no-drop is not compatible with pp") + + if args.param_and_grad_buffer_pad and args.param_and_grad_buffer_pad <= 0: + raise AssertionError('--param-and-grad-buffer-pad must be greater than 0') + + if args.use_fused_rmsnorm: + if args.normalization != "RMSNorm": + raise AssertionError( + '--use-fused-rmsnorm must enable with ' + '--normalization=RMSNorm, but got normalization' + '={}.'.format(args.normalization)) + if args.use_nd_matmul: + raise AssertionError("ND_MatMul is not compatible with fused_rmsnorm.") + if args.use_fused_swiglu: + if not args.swiglu: + raise AssertionError( + '--use-fused-swiglu must enable with --swiglu, ' + 'but --swiglu={}.'.format(args.swiglu)) + if args.use_fused_rotary_pos_emb: + if args.position_embedding_type != 'rope': + raise AssertionError( + '--use-fused-rotary-pos-emb must enable with' + '--position-embedding-type=rope') + if args.alibi_fusion_attn_type is not None and args.alibi_fusion_attn_type not in [0, 2, 3]: + raise AssertionError('--alibi-fusion-attn-type only support for `0, 2, 3`') + if args.reuse_fp32_param and not args.bf16: + raise AssertionError('--reuse-fp32-param only support for `bf16`') + if args.use_pipe_experts: + if args.pipe_experts_multi_data <= 0: + raise AssertionError('--pipe-experts-multi-data must greater than 0') + if not args.sequence_parallel and args.pipe_experts_multi_stream: + raise AssertionError('--pipe-experts-multi-stream can only be used with --sequence-parallel.') + local_experts = args.num_experts // args.expert_model_parallel_size + if local_experts == 1 and args.pipe_experts_multi_data == 1: + print("[WARNING] if local_experts = num_experts // expert_model_parallel_size is equal to 1 " + "and --pipe-experts-multi-data is set to 1, " + "--use-pipe-experts will be turned off.") + args.use_pipe_experts = False + if args.moe_alltoall_overlap_comm and not args.moe_token_dispatcher_type == 'alltoall': + raise AssertionError('`--moe-alltoall-overlap-comm` only support with `--moe-token-dispatcher-type alltoall`.') + + if args.moe_adaptive_recompute_activation and args.moe_token_dispatcher_type == 'alltoall': + raise AssertionError('`--moe-adaptive-recompute-activation` only support with `--moe-token-dispatcher-type allgather`.') + + if args.moe_allgather_overlap_comm and not args.moe_token_dispatcher_type == 'allgather': + raise AssertionError('`--moe-allgather-overlap-comm` only support with `--moe-token-dispatcher-type allgather`.') + + if args.moe_alltoall_overlap_comm or args.moe_allgather_overlap_comm: + if not args.moe_permutation_async_comm: + raise AssertionError('`--moe-alltoall-overlap-comm` and `--moe-allgather-overlap-comm` only support with `--moe-permutation-async-comm`.') + if not args.moe_grouped_gemm: + raise AssertionError('`--moe-alltoall-overlap-comm` and `--moe-allgather-overlap-comm` only support with `--moe-grouped-gemm`.') + if not (args.moe_tp_extend_ep or args.moe_experts_pipeline_degree) and args.moe_alltoall_overlap_comm and args.tensor_model_parallel_size > 1: + raise AssertionError('`--moe-alltoall-overlap-comm` do not support tp for now. only support with moe_tp_extend_ep or moe_experts_pipeline_degree when tp > 1.') + if args.moe_experts_pipeline_degree: + if args.moe_experts_pipeline_degree < 2: + raise AssertionError("`--moe-experts-pipeline-degree` should be at least 2. ") + if args.moe_experts_pipeline_degree > args.num_experts or args.num_experts % args.moe_experts_pipeline_degree != 0: + raise AssertionError("`--moe-experts-pipeline-degree` must smaller than `--num-experts` and `--num-experts` divided by `--moe-experts-pipeline-degree` is an integer.") + if args.moe_zero_memory != "disable": + raise AssertionError("`--moe-experts-pipeline-degree` is not compatible with `--moe-zero-memory`") + if not args.tensor_model_parallel_size or args.tensor_model_parallel_size <= 1: + raise AssertionError("`--moe-experts-pipeline-degree` only support when '--tensor-model-parallel-size' is bigger than 1.") + if args.expert_model_parallel_size > 1: + raise AssertionError("`--moe-experts-pipeline-degree` is not compatible with expert model parallel.") + if args.moe_tp_extend_ep: + raise AssertionError("`--moe-experts-pipeline-degree` is not compatible with `--moe-tp-extend-ep`.") + if args.moe_tp_extend_ep: + if args.num_experts % (args.tensor_model_parallel_size * args.expert_model_parallel_size) != 0: + raise AssertionError('`--moe-tp-extend-ep` only support when num_experts % ( tp * ep ) == 0') + if not (args.moe_permutation_async_comm and args.moe_grouped_gemm): + raise AssertionError('`--moe-tp-extend-ep` needs `--moe-permutation-async-comm` and `--moe-grouped-gemm`.') + if args.moe_expert_capacity_factor is not None: + raise AssertionError('`--moe-tp-extend-ep` only support when moe_expert_capacity_factor is None.') + if args.moe_hierarchical_alltoallv: + tp = args.tensor_model_parallel_size + ep = args.expert_model_parallel_size + if ((not args.moe_alltoall_overlap_comm) or (not args.moe_tp_extend_ep) or tp <= 1 or tp > torch.npu.device_count() or + ep * tp <= torch.npu.device_count() or args.world_size <= torch.npu.device_count()): + raise AssertionError( + '`--moe-hierarchical-alltoallv` must have `--moe-alltoall-overlap-comm` on and ' + '`--moe-tp-extend-ep` on and 1 < tp <= torch.npu.device_count() and cross-device communication') + if args.moe_zero_memory_num_layers is not None: + num_layers_per_pipeline_stage = args.num_layers // args.pipeline_model_parallel_size + if args.moe_zero_memory_num_layers < 0 or args.moe_zero_memory_num_layers > num_layers_per_pipeline_stage: + raise AssertionError('`--moe-zero-memory-num-layers` must be between 0 and num layers per pipeline stage') + if args.moe_zero_memory == "disable": + raise AssertionError('`--moe-zero-memory` must be enabled when using `--moe-zero-memory-num-layers`') + if args.moe_zero_memory != "disable" and args.moe_allgather_overlap_comm: + raise AssertionError('`--moe-zero-memory` do not support `--moe-allgather-overlap-comm` for now.') + if args.moe_dynamic_padding and not args.moe_no_drop: + raise AssertionError('`--moe-dynamic-padding` only support for `--moe-no-drop`.') + if args.moe_permutation_async_comm and args.moe_model_type != 'megatron_moe': + raise AssertionError('`--moe-permutation-async-comm` only support for megatron core moe.') + if args.moe_bmm_mc2: + if args.moe_model_type != 'megatron_moe' or not args.moe_token_dispatcher_type == 'alltoall': + raise AssertionError('`--moe-bmm-mc2` only support for megatron core moe and dispatcher is alltoall.') + if not args.moe_grouped_gemm: + raise AssertionError('`--moe-bmm-mc2` only support when `--moe-grouped-gemm` is true.') + if args.moe_tp_extend_ep or args.moe_alltoall_overlap_comm: + raise AssertionError( + '`--moe-bmm-mc2` not support with `--moe-tp-extend-ep` and `--moe-alltoall-overlap-comm`.') + + if args.context_parallel_size > 1 and args.position_embedding_type == 'alibi': + assert args.context_parallel_algo == 'megatron_cp_algo', f"alibi only support megatron_cp_algo" + if args.context_parallel_size > 1 and args.context_parallel_algo == 'ulysses_cp_algo': + assert args.seq_length % args.context_parallel_size == 0, f"sequence length must be divisible by context_parallel_size" + head, remainder = divmod(args.num_attention_heads, args.context_parallel_size * args.tensor_model_parallel_size) + assert head >= 1 and remainder == 0, f"num_attention_heads must be divisible by context_parallel_size * tensor_model_parallel_size" + args.use_flash_attn = True + if args.context_parallel_size > 1 and args.context_parallel_algo == 'megatron_cp_algo': + assert args.seq_length % (2 * args.context_parallel_size) == 0, f"sequence length must be divisible by 2 * context_parallel_size" + if args.position_embedding_type == 'alibi': + assert args.alibi_fusion_attn_type in [2, 3] and args.attention_mask_type == 'causal', f"megatron_cp_algo only support alibi type in [2, 3] and attention_mask_type is causal" + + assert args.cp_window_size >= 1 and args.cp_window_size < args.context_parallel_size, f'cp_window_size should in range [1, context_parallel_size) when using double_ring_attention.' + n_window, remainder = divmod(args.context_parallel_size, args.cp_window_size) + assert n_window >= 1 and remainder == 0, f'context parallel size must be divisible by cp_window_size when using double ring attention.' + args.use_flash_attn = True + if args.context_parallel_size > 1 and args.context_parallel_algo == 'hybrid_cp_algo': + assert args.ulysses_degree_in_cp is not None, "--ulysses-degree-in-cp must be specified in hybrid_cp_algo" + ring_degree, remainder = divmod(args.context_parallel_size, args.ulysses_degree_in_cp) + assert ring_degree > 1 and remainder == 0, "--ulysses-degree-in-cp must be devisible by --context-parallel-size" + args.ring_degree = ring_degree + + head, remainder = divmod(args.num_attention_heads, args.ulysses_degree_in_cp * args.tensor_model_parallel_size) + assert head >= 1 and remainder == 0, f"num_attention_heads must be divisible by ulysse-degree-in-cp * tensor_model_parallel_size in hybrid cp" + + assert args.seq_length % (2 * args.context_parallel_size) == 0, f"sequence length must be divisible by 2 * context_parallel_size in hybrid cp" + + assert args.cp_window_size >= 1 and args.cp_window_size < ring_degree, f'cp_window_size should be in range [1, ring_degree) when using double ring attention with hybrid context parallelism.' + n_window, remainder = divmod(ring_degree, args.cp_window_size) + assert n_window >= 1 and remainder == 0, f'ring_degree should be divisible by cp_window_size when using double ring with hybrid context parallelism.' + args.use_flash_attn = True + + if args.context_parallel_size > 1 and args.context_parallel_algo == 'adaptive_cp_algo': + assert args.seq_length % args.context_parallel_size == 0, f"sequence length must be divisible by context_parallel_size" + args.use_flash_attn = True + if args.context_parallel_size > 1 and args.context_parallel_algo == 'hybrid_adaptive_cp_algo': + assert args.ulysses_degree_in_cp is not None, "--ulysses-degree-in-cp must be specified in hybrid_adaptive_cp_algo" + ring_degree, remainder = divmod(args.context_parallel_size, args.ulysses_degree_in_cp) + assert ring_degree > 1 and remainder == 0, "--ulysses-degree-in-cp must be devisible by --context-parallel-size" + head, remainder = divmod(args.num_attention_heads, args.ulysses_degree_in_cp * args.tensor_model_parallel_size) + assert head >= 1 and remainder == 0, f"num_attention_heads must be divisible by ulysse-degree-in-cp * tensor_model_parallel_size in hybrid cp" + assert args.seq_length % args.context_parallel_size == 0, f"sequence length must be divisible by context_parallel_size in hybrid cp" + args.use_flash_attn = True + + # Mandatory modification to SBH, subsequent abandonment of other formats such as BSH,BSND + if args.shape_order != 'SBH': + args.shape_order = 'SBH' + if overlap_param_gather_without_mcore_models: + args.use_legacy_models = True + if args.transformer_impl == 'transformer_engine': + args.transformer_impl = 'local' + if args.fp8: + raise AssertionError('NPU not supported FP8.') + if args.tp_comm_overlap: + args.tp_comm_overlap = False + if args.recompute_method == "uniform": + assert not args.recompute_activation_function, \ + 'uniform recomputation is not compatible ' \ + 'with activation function recomputation ' + assert not args.recompute_norm, \ + 'uniform recomputation is not compatible ' \ + 'with norm recomputation ' + if args.recompute_activation_function and args.recompute_granularity == "selective": + raise AssertionError('--recompute-activation-function is not compatible with selective recomputation') + adaptive_recompute_enable = args.adaptive_recompute_device_size > 0 or args.adaptive_recompute_device_swap + if args.recompute_norm and args.recompute_granularity == "selective": + raise AssertionError('--recompute-norm is not compatible with selective recomputation') + if args.recompute_norm and args.use_legacy_models: + raise AssertionError('--recompute-norm is only supported with mcore models') + if args.use_nanopipe and not args.use_legacy_models: + raise AssertionError('--use-nanopipe is not available with mcore models') + if args.adaptive_recompute_device_swap and not args.use_legacy_models: + raise AssertionError('--adaptive-recompute-device-swap is not available with mcore models') + if adaptive_recompute_enable: + assert args.recompute_granularity is None and args.recompute_method is None, \ + 'adaptive selective recompute is not compatible with ' \ + 'recompute_granularity and recompute_method. ' + assert not args.recompute_activation_function, \ + 'adaptive selective recompute is not compatible ' \ + 'with activation function recomputation ' + assert not args.swap_attention, 'adaptive selective recompute is not compatible with swap_attention feature' + assert not args.recompute_in_advance and not args.recompute_in_bubble, 'adaptive selective recompute ' \ + 'is not compatible with ripipe schedule' + assert not args.memory_fragmentation, \ + 'adaptive selective recompute is not compatible with memory fragmentation' + if args.memory_fragmentation: + assert not args.use_fused_rotary_pos_emb, \ + 'memory fragmentation is not compatible with use_fused_rotary_pos_emb' + if args.smart_swap: + assert not adaptive_recompute_enable, 'smart swap is not compatible with adaptive selective recompute' + assert not args.memory_fragmentation, 'smart swap is not compatible with memory fragmentation' + if args.adaptive_memory_optimization: + assert args.ampipe_degree <= 1, 'adaptive memory optimization is not compatible with ampipe' + assert not adaptive_recompute_enable, 'adaptive memory optimization is not compatible with adaptive recomputing' + assert args.recompute_granularity is None and args.recompute_method is None, \ + 'adaptive memory optimization is not compatible with recompute_granularity or recompute_method' + assert not args.recompute_activation_function, \ + 'adaptive memory optimization is not compatible with recompute_activation_function' + assert not args.swap_attention, 'adaptive memory optimization is not compatible with swap_attention feature' + assert not args.recompute_in_bubble, 'adaptive memory optimization is not compatible with recompute_in_bubble' + assert not args.memory_fragmentation, \ + 'adaptive memory optimization is not compatible with memory_fragmentation' + if args.use_flash_attn: + assert args.sparse_mode == 0 or args.sparse_mode == 2, f"Only supports sparse modes 0 and 2" + args.create_attention_mask_in_dataloader = False + if args.automated_pipeline: + if args.recompute_activation_function: + print("[WARNING] disable activation function recomputation when enabling automated pipeline") + args.recompute_activation_function = False + if args.recompute_granularity is not None or args.recompute_method is not None: + print("[WARNING] disable recompute granularity and recompute method when enabling automated pipeline") + args.recompute_granularity = None + args.recompute_method = None + if args.noop_layers: + print("[WARNING] disable noop_layers when enabling automated pipeline") + args.noop_layers = None + if args.automated_pipeline_perf: + if args.automated_pipeline: + print("[WARNING] disable automated pipeline when enabling automated pipeline performance version") + args.automated_pipeline = False + if args.num_layers_per_virtual_pipeline_stage is not None: + raise AssertionError('automated pipeline performance is temporarily incompatible with virtual pipeline') + if args.use_ascend_mc2: + if args.use_ascend_coc: + raise AssertionError('--mc2 and coc can not be used together') + if args.use_nd_matmul: + if args.normalization == 'LayerNorm': + raise AssertionError('ND_MatMul is temporarily incompatible with LayerNorm') + if args.load is not None or args.pretrained_checkpoint is not None: + raise AssertionError('ND_MatMul does not support loading weights for training temporarily') + if args.tensor_model_parallel_size % args.nd1_dim1_size != 0: + raise AssertionError('tensor_model_parallel_size must be divisible by nd1_dim1_size') + if args.tensor_model_parallel_size % args.nd2_dim1_size != 0: + raise AssertionError('tensor_model_parallel_size must be divisible by nd2_dim1_size') + + args.reduce_recompute_for_last_chunk = False + if args.recompute_in_advance: + args.reduce_recompute_for_last_chunk = True + if args.recompute_method == "uniform": + raise AssertionError('recompute_in_advance does not support uniform recompute_method') + if not args.recompute_num_layers and not args.adaptive_memory_optimization: + raise AssertionError('recompute_num_layers can not be None or 0 when using recompute_in_advance') + if args.pipeline_model_parallel_size <= 1 or args.num_layers_per_virtual_pipeline_stage is None: + raise AssertionError('recompute_in_advance only support pipelining with interleaving') + if args.num_layers_per_virtual_pipeline_stage != 1: + args.recompute_in_advance = False + if args.recompute_in_bubble: + if args.recompute_num_layers: + raise AssertionError('recompute_num_layers must be None or 0 when using recompute_in_bubble') + if args.pipeline_model_parallel_size <= 1 or args.num_layers_per_virtual_pipeline_stage is None: + raise AssertionError('recompute_in_bubble only support pipelining with interleaving') + if not args.swap_attention: + # Following is a trick to realize bubble recomputation. We first enable all recomputation, + # and then disable recomputation for all layers except the ones chosen for bubble recomputation. + args.recompute_granularity = "full" + args.recompute_method = "block" + if args.enable_recompute_layers_per_pp_rank: + args.recompute_num_layers = args.num_layers // args.pipeline_model_parallel_size + else: + args.recompute_num_layers = args.num_layers_per_virtual_pipeline_stage + if isinstance(args.noop_layers, str): + noop_layers = set() + for x in args.noop_layers.split(','): + if int(x) >= args.num_layers or int(x) < 0: + raise AssertionError(f'each element in args.noop_layers({args.noop_layers}) should bigger or equal ' + f'to 0 and smaller than args.num_layers({args.num_layers})') + noop_layers.add(int(x)) + args.noop_layers = noop_layers + + if args.ampipe_degree > 1: + assert args.use_flash_attn, "ampipe only supports flash attention, please enable '--use-flash-attn'." + assert args.num_experts is not None, "ampipe only supports MoE model." + assert args.expert_model_parallel_size > 1, "ampipe only supports expert_model_parallel_size > 1" + assert args.moe_model_type == 'deepspeed_moe', "ampipe only supports deepspeed_moe." + assert not args.use_ascend_mc2, "ampipe does't supports ascend mc2 for now." + assert not args.add_bias_linear, "ampipe does't supports bias linear for now." + assert not args.overlap_grad_reduce, "ampipe does't supports overlap_grad_reduce for now." + assert not args.overlap_param_gather, "ampipe does't supports overlap_param_gather for now." + assert not args.use_nanopipe, "ampipe does't supports use_nanopipe for now." + assert not args.recompute_in_bubble, "ampipe does't supports ripipe recompute_in_bubble for now." + assert not args.recompute_in_advance, "ampipe does't supports ripipe recompute_in_advance for now." + assert not args.adaptive_recompute_device_swap, "ampipe does't supports ripipe recompute_in_advance for now." + if args.sequence_parallel: + assert args.seq_length % (args.ampipe_degree * args.tensor_model_parallel_size) == 0, \ + "sequence length must be divisible by ampipe_degree * tensor_model_parallel_size" + if args.context_parallel_size > 1: + assert args.context_parallel_algo == 'megatron_cp_algo', "ampipe only supports megatron_cp_algo" + assert args.ampipe_degree == 2, "ampipe only supports ampipe_degree=2 when context_parallel_size>1" + slice_size, remainder = divmod(args.seq_length, 2 * args.ampipe_degree * args.context_parallel_size) + assert remainder == 0, \ + "sequence length must be divisible by 2 * ampipe_degree * context_parallel_size" + if args.sequence_parallel: + assert slice_size % (args.tensor_model_parallel_size) == 0, \ + "sequence length must be divisible by 2 * ampipe_degree * context_parallel_size * tensor_model_parallel_size" + if args.use_pipe_experts: + if args.pipe_experts_multi_data % args.ampipe_degree != 0: + print("[WARNING] if pipe_experts_multi_data isn't divisible by ampipe_degree " + "--use-pipe-experts will be turned off.") + args.use_pipe_experts = False + args.pipe_experts_multi_stream = False + args.pipe_experts_multi_data = 1 + if args.tp_2d: + if args.sequence_parallel: + raise AssertionError('2d tp does not support sequence parallel') + if args.use_fused_rmsnorm: + raise AssertionError('2d tp does not support fused rmsnorm') + if args.use_nanopipe: + raise AssertionError('tp-2d does not support nano-pipe') + if args.ampipe_degree > 1: + raise AssertionError('tp-2d does not support ampipe') + if args.context_parallel_algo not in ['megatron_cp_algo', 'ulysses_cp_algo']: + raise AssertionError('tp-2d now only support megatron_cp_algo or ulysses_cp_algo') + if args.use_ascend_coc: + raise AssertionError('tp-2d does not support ascend coc') + if args.tensor_model_parallel_size // args.tp_x != args.tp_y: + raise AssertionError('need satisfy tp = tp_x * tp_y') + if args.expert_model_parallel_size > 1: + if args.moe_token_dispatcher_type != "allgather": + raise AssertionError('2d tp only support allgather megatron-moe now') + + if args.expert_interval <= 0 or args.expert_interval > args.num_layers: + raise AssertionError("--expert-interval must be between 1 and num layers") + if args.moe_train_capacity_factor <= 0.0: + raise AssertionError("--moe-train-capacity-factor must be greater than 0.0") + + if args.gemm_gradient_accumulation_fusion: + if not args.moe_grouped_gemm: + raise AssertionError('`--gemm-gradient-accumulation-fusion` only support with `--moe-grouped-gemm`.') + + if args.use_legacy_models: + if args.overlap_param_gather and args.reuse_fp32_param: + raise AssertionError('In legacy, `overlap_param_gather` does not support `reuse_fp32_param`.') + + if args.fp16: + args.gradient_accumulation_fusion = False + warnings.warn("Unsupported gradient fp16 bf16 for gradient accumulation fusion") + + if args.reset_attention_mask and args.attention_mask_type == 'causal': + assert args.context_parallel_algo == 'megatron_cp_algo', 'accelerated eod reset mode only support ring attention' + + if args.context_parallel_kv_cache_policy: + if args.context_parallel_size == 1: + raise AssertionError( + 'context parallel size must larger than 1 when --context-parallel-kv-cache-policy is set.') + if not args.use_flash_attn: + raise AssertionError( + '--context-parallel-kv-cache-policy only support use flash attention.' + ) + + if args.context_parallel_cache_interval != 0: + if not args.context_parallel_kv_cache_policy: + raise AssertionError( + '--context-parallel-cache-interval only can be used when --context-parallel-kv-cache-policy is set.' + ) + if args.context_parallel_cache_interval >= args.num_layers: + raise AssertionError( + '--context-parallel-cache-interval should be smaller than the number of layers.' + ) + if args.context_parallel_cache_interval < 0: + raise AssertionError( + '--context-parallel-cache-interval cannot be negative number.' + ) + + if args.use_ulysses_allgather_kv: + if args.context_parallel_size == 1: + raise AssertionError( + 'context parallel size must larger than 1 when --use-ulysses-allgather-kv is set.') + if args.context_parallel_algo != 'ulysses_cp_algo': + raise AssertionError( + '--context_parallel-algo should be ulysses_cp_algo when using --use-ulysses-allgather-kv.' + ) + if not args.group_query_attention: + raise AssertionError( + '--use-ulysses-allgather-kv needs to enable --group-query-attention.' + ) + + from megatron.training.arguments import _print_args + _print_args('arguments', args, True) + + for feature in FEATURES_LIST: + if feature.optimization_level <= args.optimization_level and \ + (getattr(args, feature.feature_name, None) or feature.default_patches): + feature.pre_validate_args(args) + feature.validate_args(args) + feature.post_validate_args(args) + + return args + + return wrapper + + +def add_parser_argument_choices_value(parser, argument_name, value): + if parser._actions: + for action in parser._actions: + if isinstance(action, argparse._ArgumentGroup): + add_parser_argument_choices_value(action, argument_name) + elif isinstance(action, argparse.Action) and argument_name in action.option_strings: + action.choices.append(value) + + +def _add_alibi_args(parser): + add_parser_argument_choices_value(parser, "--position-embedding-type", 'alibi') + + group = parser.add_argument_group(title='alibi') + group.add_argument('--square-alibi-mask', + action='store_true', + default=False, + help='attention mask of alibi is squared') + group.add_argument('--fill-neg-inf', + action='store_true', + default=False, + help='fill alibi with negative inf') + + group.add_argument('--alibi-fusion-attn-type', + type=int, + help='alibi pse type, support for 0,2,3') + + group.add_argument('--alibi-diagonal-opposite', + action='store_true', + default=False, + help='make alibi diagonal opposite') + + return parser + + +def _add_ndmm_args(parser): + group = parser.add_argument_group(title='ndmm') + group.add_argument('--use-nd-matmul', action='store_true', default=False, + help='use use-nd-matmul to replace megatron-style tensor parallel') + group.add_argument('--nd1-dim1-size', type=int, default=1, + help='Dim1 of the first nd matmul when use-3d-matmul is True') + group.add_argument('--nd2-dim1-size', type=int, default=1, + help='Dim1 of the second nd matmul when use-3d-matmul is True') + return parser + + +def _add_auto_parallel_args(parser): + group = parser.add_argument_group(title='auto_parallel') + group.add_argument('--auto-parallel', action='store_true', + help='enable automatic parallelism with auto-parallel') + group.add_argument('--nnodes', type=int, default=1, help='the number of node in the cluster') + group.add_argument('--nproc-per-node', type=int, default=8, help='the number of NPU on each node') + group.add_argument('--master-addr', type=str, default=None, help='the ip-address of master node') + group.add_argument('--master-port', type=str, default=None, help='the ip-port of master node') + group.add_argument('--node-rank', type=int, default=0, + help='the rank of nodes in the cluster, starting from 0 and increment by 1') + group.add_argument('--profile-operator', action='store_true', help='') + group.add_argument('--profile-memory', action='store_true', help='') + group.add_argument('--prof-file', type=str, default=None, help='') + return parser + + +def _add_auto_parallel_mm_args(parser): + group = parser.add_argument_group(title='auto_parallel_mm') + group.add_argument('--auto-parallel-mm', action='store_true', default=False, + help='enable multimode automated parallel policy search') + group.add_argument('--auto-parallel-profile', action='store_true', default=False, + help='multimode performance sampling') + + return parser + + +def _add_2d_tp_args(parser): + group = parser.add_argument_group(title='2d-tp') + group.add_argument('--tp-2d', action='store_true', default=False, + help='use use-2d-tp to replace megatron-style tensor parallel') + group.add_argument('--tp-x', type=int, default=1, + help='the fist dim tensor parallel size for Linear') + group.add_argument('--tp-y', type=int, default=1, + help='the second dim tensor parallel size for Linear') + group.add_argument('--enable-overlap-ag-with-matmul', action='store_true', default=False, + help='use enable-overlap-ag-with-matmul to overlap all-gather with matmul') + group.add_argument('--enable-overlap-matmul-with-rs', action='store_true', default=False, + help='use enable-overlap-matmul-with-rs to overlap matmul with reduce-scatter') + group.add_argument('--enable-backward-overlap-ag-with-matmul', action='store_true', default=False, + help='use enable-backward-overlap-ag-with-matmul to overlap all-gather with matmul in backward') + return parser + + +def _add_hccl_group_buffer_args(parser): + group = parser.add_argument_group(title='hccl-group-buffer') + group.add_argument('--hccl-group-buffer', type=str, default=None, + help='the hccl buffer for group') + group.add_argument('--hccl-group-buffer-adaptive', action='store_true', default=False, + help='the hccl buffer for group adaptively') + group.add_argument('--hccl-ep-group-buffer-adaptive-factor', type=float, default=-1.0, + help='the ep group buffer factor') + return parser diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/__init__.py b/model/train/yoco_moe/mindspeed/auto_tuning/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/auto_tuning.py b/model/train/yoco_moe/mindspeed/auto_tuning/auto_tuning.py new file mode 100644 index 000000000..e27e129e7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/auto_tuning.py @@ -0,0 +1,152 @@ +import json +import logging +import os +import stat +import time +import pickle +from argparse import Namespace + +from mindspeed.auto_tuning.utils.logger import init_logger, get_logger +from mindspeed.auto_tuning.module.hardware import Hardware +from mindspeed.auto_tuning.module.memory.memory_modeling import MemoryModeling +from mindspeed.auto_tuning.module.model_performance import ModelPerformance +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_node_parse import GatherNodeProfiling +from mindspeed.auto_tuning.module.search.search_engine import search_demo +from mindspeed.auto_tuning.utils.runner.model_executor import ExecutorFlag, ModelExecutor +from mindspeed.auto_tuning.utils.runner.torchrun_runner import TorchRunRunner +from mindspeed.auto_tuning.config.model_config import ModelConfig +from mindspeed.auto_tuning.config.generate_profiling_configs import generate_profiling_configs +from mindspeed.auto_tuning.utils.utils import get_prof_dir +from mindspeed.auto_tuning.utils.restricted_unpickler import restricted_loads + + +def auto_tuning(args: Namespace, working_dir: str): + init_logger(args.auto_tuning_log_level) + logger = get_logger("main") + start_time = time.time() + executor = ModelExecutor(TorchRunRunner()) + + # Force refresh model args just in case model has been modified after previous run. + logger.info("<==========Begin to parse args==========>") + executor.execute(working_dir, flag=ExecutorFlag.PARSE_ARGS) + hardware_parse_path = os.path.join(working_dir, Hardware.HARDWARE_PARSE_FILENAME) + args_parse_path = os.path.join(working_dir, ModelConfig.ARGS_PARSE_FILENAME) + try: + with open(hardware_parse_path, mode="rb") as file: + hardware: Hardware = restricted_loads(file) # type: ignore + with open(args_parse_path, mode="rb") as file: + model_config: ModelConfig = restricted_loads(file) # type: ignore + except pickle.UnpicklingError as e: + logger.error(f"Incorrect pickle format. UnpicklingError: {e}") + raise e + Hardware().load(hardware) + model_config.disable_cp_flag = False + logger.info("<==========Finished parsing args==========>") + + # Memory modeling + MemoryModeling.set_model_cfg(model_config) + static_list, dynamic_list = MemoryModeling.generate_mem_modeling_profiling_list() + logger.info("<==========Begin to profile static memory==========>") + for cfg, filename in static_list: + if not os.path.exists(os.path.join(working_dir, filename)): + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + mode = stat.S_IWUSR | stat.S_IRUSR + pkl_filename = os.path.join(working_dir, f'ootb_{Hardware().node_rank}.pkl') + with os.fdopen(os.open(pkl_filename, flags, mode=mode), 'wb') as f: + pickle.dump(cfg, f) + executor.execute(working_dir, output_filename=filename, cfg=cfg, flag=ExecutorFlag.PARSE_MODEL) + logger.info("<==========Finished profiling static memory==========>") + logger.info("<==========Begin to profile dynamic memory==========>") + for cfg in dynamic_list: + path = os.path.join(working_dir, get_prof_dir(cfg)) + if not os.path.exists(path): + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + mode = stat.S_IWUSR | stat.S_IRUSR + pkl_filename = os.path.join(working_dir, f'ootb_{Hardware().node_rank}.pkl') + with os.fdopen(os.open(pkl_filename, flags, mode=mode), 'wb') as f: + pickle.dump(cfg, f) + executor.execute(working_dir, output_filename=path, cfg=cfg, flag=ExecutorFlag.PROFILE) + logger.info("<==========Finished profiling dynamic memory==========>") + MemoryModeling.modeling(working_dir) + model_parser_end_time = time.time() + logger.info("Model parser cost time: %sms", str((model_parser_end_time - start_time) * 1000)) + + hardware_config = Hardware() + profiling_cfg_list = generate_profiling_configs(model_config) + + logger.info("profile_cfgs (tp, pp, dp, cp, ep, #layers, seq_len):") + logger.info(",".join( + str((cfg.tp, + cfg.pp, + cfg.dp, + cfg.cp, + cfg.ep, + cfg.num_layers, + cfg.seq_length)) + for cfg in profiling_cfg_list)) + + generate_profiling_config_end_time = time.time() + + profiling_results = [] + logger.info("<==========Begin profiling==========>") + logger.info("This process will run the script and get some profiling results.") + logger.info("Please wait for a while.") + count = 1 + for profiling_cfg in profiling_cfg_list: + # tracking the order of profiling all over the list + logger.info('<==========the %s/%s loop==========>', str(count), str(len(profiling_cfg_list))) + logger.info("profile_db_configs (tp, pp, dp, cp, ep, #layers, seq_len):") + logger.info(str([profiling_cfg.tp, + profiling_cfg.pp, + profiling_cfg.dp, + profiling_cfg.cp, + profiling_cfg.ep, + profiling_cfg.num_layers, + profiling_cfg.seq_length])) + res_dir = f"{working_dir}/{get_prof_dir(profiling_cfg)}" + if not os.path.exists(res_dir): + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + mode = stat.S_IWUSR | stat.S_IRUSR + pkl_filename = os.path.join(working_dir, f'ootb_{Hardware().node_rank}.pkl') + with os.fdopen(os.open(pkl_filename, flags, mode=mode), 'wb') as f: + pickle.dump(profiling_cfg, f) + executor.execute(working_dir, output_filename=res_dir, cfg=profiling_cfg, flag=ExecutorFlag.PROFILE) + + profiling_node_parse = GatherNodeProfiling(res_dir) + profiling_res = profiling_node_parse.fuse_node_pkl() + + profiling_results.append([profiling_cfg, profiling_res]) + count += 1 + + profiling_and_parser_end_time = time.time() + + # Performance Modeling + model_performance = ModelPerformance(hardware_config, model_config, working_dir) + model_performance.get_profiling_info(profiling_results) + + final_cfgs, unsampled_profiling = search_demo(model_config=model_config, + perf_obj_function=model_performance.performance, + working_dir=working_dir) + logger.info("model config is that:\n%s", str(model_config)) + logger.info("hardware config is that:\n%s", str(hardware_config)) + + search_cfg_end_time = time.time() + logger.info(">>>>>> Generate profiling config cost time: %sms", + str((generate_profiling_config_end_time - model_parser_end_time) * 1000)) + logger.info(">>>>>> Profiling and parser cost time: %sms", + str((profiling_and_parser_end_time - generate_profiling_config_end_time) * 1000)) + logger.info(">>>>>> Search_cfg cost time: %sms", + str((search_cfg_end_time - profiling_and_parser_end_time) * 1000)) + logger.info(">>>>>> Total cost time: %sms", + str((search_cfg_end_time - start_time) * 1000)) + + logger.info("<==========Final config generated==========>") + logger.info("The recommended configs are:") + for i, final_cfg in enumerate(final_cfgs): + if final_cfg: + logger.info("<==========Top #%s config==========>", str(i)) + if logger.getEffectiveLevel() == logging.DEBUG: + logger.debug("\n%s", str(final_cfg)) + else: + logger.info("\n%s", ModelConfig.__str__(final_cfg)) + logger.info("<==========Launch training==========>") diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/config/__init__.py b/model/train/yoco_moe/mindspeed/auto_tuning/config/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/config/generate_profiling_configs.py b/model/train/yoco_moe/mindspeed/auto_tuning/config/generate_profiling_configs.py new file mode 100644 index 000000000..76dd8286c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/config/generate_profiling_configs.py @@ -0,0 +1,103 @@ +from typing import List +from dataclasses import replace + +from mindspeed.auto_tuning.module.hardware import Hardware +from mindspeed.auto_tuning.config.model_config import ModelConfig +from mindspeed.auto_tuning.config.search_config import SearchConfig +from mindspeed.auto_tuning.utils.utils import get_tp_for_profiling, get_seq_length_for_profiling + + +def generate_profiling_configs(model_cfg: ModelConfig) -> List[SearchConfig]: + profile_cfgs: List[SearchConfig] = list() + + base_cfg = SearchConfig() + base_cfg.copy_from_config(model_cfg) + base_cfg.tensor_model_parallel_size = get_tp_for_profiling() + base_cfg.context_parallel_size = 1 + base_cfg.pipeline_model_parallel_size = 1 + base_cfg.seq_length = get_seq_length_for_profiling(model_cfg) + if model_cfg.is_moe(): + base_cfg.num_experts = 4 + base_cfg.expert_model_parallel_size = 4 + bi_tp = base_cfg.tp * 2 + if "910B" in Hardware().device_type and base_cfg.tp == 8: + bi_tp = 4 + + if "910_9" in Hardware().device_type and base_cfg.tp == 8: + bi_tp = 16 + + # base config + # 4dp + profile_cfgs.append(base_cfg) + + # 4dp mc2 + gen_cfg_mc2 = replace(base_cfg, use_ascend_mc2=True) + profile_cfgs.append(gen_cfg_mc2) + + # 2dp 2tp + gen_cfg = replace(base_cfg) + gen_cfg.tensor_model_parallel_size = bi_tp + if model_cfg.is_moe(): + gen_cfg.expert_model_parallel_size = 2 + profile_cfgs.append(gen_cfg) + + # 2dp 2tp mc2 + gen_cfg_mc2 = replace(gen_cfg, use_ascend_mc2=True) + profile_cfgs.append(gen_cfg_mc2) + + # 2dp 2pp + gen_cfg = replace(base_cfg) + gen_cfg.pipeline_model_parallel_size = 2 + if model_cfg.is_moe(): + gen_cfg.expert_model_parallel_size = 2 + profile_cfgs.append(gen_cfg) + + # CP config + if not model_cfg.disable_cp_flag: + # 4cp + gen_cfg = replace(base_cfg) + gen_cfg.context_parallel_size = 4 + if gen_cfg.seq_length // gen_cfg.cp >= 2 * 1024: + profile_cfgs.append(gen_cfg) + + # 2cp + gen_cfg = replace(base_cfg) + gen_cfg.context_parallel_size = 2 + if model_cfg.is_moe(): + gen_cfg.expert_model_parallel_size = 2 + if gen_cfg.seq_length // gen_cfg.cp >= 2 * 1024: + profile_cfgs.append(gen_cfg) + + # roce cp + gen_cfg = replace(base_cfg) + gen_cfg.context_parallel_size = 2 + gen_cfg.tensor_model_parallel_size = bi_tp + if model_cfg.is_moe(): + gen_cfg.expert_model_parallel_size = 2 + if gen_cfg.seq_length // gen_cfg.cp >= 2 * 1024: + profile_cfgs.append(gen_cfg) + + # MLP config + if model_cfg.is_moe(): + gen_cfg = replace(base_cfg) + gen_cfg.expert_model_parallel_size = 1 + gen_cfg.pipeline_model_parallel_size = 1 + profile_cfgs.append(gen_cfg) + + gen_cfg_pp2 = replace(gen_cfg) + gen_cfg_pp2.pipeline_model_parallel_size = 2 + profile_cfgs.append(gen_cfg_pp2) + + # half-seq + gen_cfg = replace(base_cfg) + if model_cfg.is_moe(): + gen_cfg.expert_model_parallel_size = 1 + gen_cfg.seq_length = base_cfg.seq_length // 2 + if gen_cfg.seq_length < 2 * 1024: + gen_cfg.seq_length = gen_cfg.seq_length * 4 + + for cfg in profile_cfgs: + cfg.prepare_for_profiling() + cfg.num_layers = cfg.pp + + return profile_cfgs diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/config/model_config.py b/model/train/yoco_moe/mindspeed/auto_tuning/config/model_config.py new file mode 100644 index 000000000..609629e48 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/config/model_config.py @@ -0,0 +1,156 @@ +from typing import List, Optional +from dataclasses import dataclass + +from mindspeed.auto_tuning.utils.dtype import DTYPE + + +@dataclass +class ModelConfig: + ARGS_PARSE_FILENAME = "auto_tuning_model_args.json" + # Set all parameter defaults to None, so that errors will occur when calculations are performed with + # unresolved parameters, reflect issues in time. + # Parallel configs + tensor_model_parallel_size: int = None # type: ignore + context_parallel_size: int = None # type: ignore + pipeline_model_parallel_size: int = None # type: ignore + num_layers_per_virtual_pipeline_stage: Optional[int] = None + data_parallel_size: int = None # type: ignore + sequence_parallel: bool = None # type: ignore + use_distributed_optimizer: bool = None # type: ignore + global_batch_size: int = None # type: ignore + micro_batch_size: int = None # type: ignore + + # Model configs + num_layers: int = None # type: ignore + num_attention_heads: int = None # type: ignore + hidden_size: int = None # type: ignore + ffn_hidden_size: int = None # type: ignore + add_bias_linear: bool = None # type: ignore + swiglu: bool = None # type: ignore + fp16: bool = None # type: ignore + bf16: bool = None # type: ignore + use_ascend_mc2: bool = None # type: ignore + + # Data configs + seq_length: int = None # type: ignore + + # MoE configs + num_experts: Optional[int] = None + moe_router_topk: Optional[int] = None + moe_train_capacity_factor: Optional[float] = None + expert_model_parallel_size: Optional[int] = None + enable_token_rearrange_opt: bool = None # type: ignore + + # Memory configs + recompute_granularity: Optional[str] = None + recompute_method: Optional[str] = None + recompute_num_layers: Optional[int] = None + use_flash_attn: bool = None # type: ignore + adaptive_recompute_device_swap: bool = None # type: ignore + + # Train configs + train_iters: int = None # type: ignore + profile: bool = None # type: ignore + profile_step_start: int = None # type: ignore + profile_step_end: int = None # type: ignore + profile_ranks: List[int] = None # type: ignore + profile_level: str = None # type: ignore + profile_with_cpu: bool = None # type: ignore + profile_with_stack: bool = None # type: ignore + profile_with_memory: bool = None # type: ignore + profile_record_shapes: bool = None # type: ignore + + # World Size + global_world_size: int = None # type: ignore + + # JIT + jit_compile: bool = None # type: ignore + + # Flags + disable_cp_flag: bool = False + + def __str__(self) -> str: + rt = list() + rt.append(f"{'Data Parallel Size':<30}{str(self.dp):<40}") + rt.append(f"{'Tensor Parallel Size':<30}{str(self.tp):<40}") + rt.append(f"{'Pipeline Parallel Size':<30}{str(self.pp):<40}") + rt.append(f"{'Virtual Pipeline Size':<30}{str(self.vpp):<40}") + rt.append(f"{'Context Parallel Size':<30}{str(self.cp):<40}") + rt.append(f"{'Expert Parallel Size':<30}{str(self.ep):<40}") + rt.append(f"{'ZeRO1':<30}{str(self.zero1):<40}") + rt.append(f"{'MC2':<30}{str(self.use_ascend_mc2):<40}") + rt.append(f"{'Token Rearrange':<30}{str(self.enable_token_rearrange_opt):<40}") + rt.append(f"{'Micro Batch Size':<30}{str(self.mbs):<40}") + rt.append(f"{'Recompute layer':<30}{str(self.re_layer):<40}") + return "\n".join(rt) + + @property + def tp(self) -> int: + return self.tensor_model_parallel_size + + @property + def cp(self) -> int: + return self.context_parallel_size + + @property + def pp(self) -> int: + return self.pipeline_model_parallel_size + + @property + def layers_per_vpp(self) -> Optional[int]: + return self.num_layers_per_virtual_pipeline_stage + + @property + def vpp(self) -> Optional[int]: + if self.num_layers_per_virtual_pipeline_stage: + return self.num_layers // (self.pp * self.num_layers_per_virtual_pipeline_stage) + return None + + @property + def dp(self) -> int: + return self.data_parallel_size + + @property + def ep(self) -> Optional[int]: + return self.expert_model_parallel_size or 1 + + @property + def zero1(self) -> bool: + return self.use_distributed_optimizer + + @property + def gbs(self) -> int: + return self.global_batch_size + + @property + def mbs(self) -> int: + return self.micro_batch_size + + @property + def adaptive_recompute(self) -> bool: + return self.adaptive_recompute_device_swap + + @property + def re_layer(self) -> Optional[int]: + return self.recompute_num_layers + + @property + def num_micro_batches(self) -> int: + return self.global_batch_size // self.micro_batch_size + + @property + def dtype(self) -> DTYPE: + if self.fp16: + return DTYPE.fp16 + elif self.bf16: + return DTYPE.bf16 + return DTYPE.fp32 + + def is_full_recompute(self) -> bool: + return self.recompute_granularity is not None and \ + self.recompute_granularity == "full" and \ + self.recompute_method is not None and \ + self.recompute_method == "block" + + def is_moe(self) -> bool: + return self.num_experts is not None diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/config/search_config.py b/model/train/yoco_moe/mindspeed/auto_tuning/config/search_config.py new file mode 100644 index 000000000..405d92db9 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/config/search_config.py @@ -0,0 +1,60 @@ +from typing import Optional +from dataclasses import dataclass + +from mindspeed.auto_tuning.config.model_config import ModelConfig +from mindspeed.auto_tuning.module.hardware import Hardware + + +@dataclass +class SearchConfig(ModelConfig): + memory: Optional[float] = None + performance: Optional[float] = None + + def __str__(self) -> str: + rt = list() + if self.performance: + rt.append(f"{'Performance':<30}{str(self.performance):<40}") + if self.memory: + rt.append(f"{'Memory':<30}{str(self.memory):<40}") + return super().__str__() + "\n" + "\n".join(rt) + + def copy_from_config(self, cfg: ModelConfig) -> None: + for k, v in vars(cfg).items(): + if k in self.__dict__: + self.__dict__[k] = v + + def prepare_for_profiling(self) -> None: + self.use_distributed_optimizer = True + self.recompute_granularity = "full" + self.recompute_method = "block" + self.adaptive_recompute_device_swap = False + self.global_world_size = Hardware().num_devices + self.micro_batch_size = 1 + + self.normalize() + self.global_batch_size = self.dp * self.pp * self.mbs + + self.train_iters = 10 + self.profile = True + self.profile_step_start = 8 + self.profile_step_end = 9 + self.profile_ranks = list(range(Hardware().num_devices)) + self.profile_level = "level1" + self.profile_with_cpu = True + self.profile_with_stack = False + self.profile_with_memory = True + self.profile_record_shapes = True + + def normalize(self) -> None: + self.data_parallel_size = self.global_world_size // \ + (self.tp * self.cp * self.pp) + + if self.is_moe(): + self.enable_token_rearrange_opt = True + + if self.adaptive_recompute_device_swap: + self.recompute_granularity = None + self.recompute_method = None + self.recompute_num_layers = None + elif self.is_full_recompute(): + self.recompute_num_layers = self.num_layers // self.pp diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/mindspeed_adaptor.py b/model/train/yoco_moe/mindspeed/auto_tuning/mindspeed_adaptor.py new file mode 100644 index 000000000..8eaa89d64 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/mindspeed_adaptor.py @@ -0,0 +1,167 @@ +from typing import Dict, List, Optional, Tuple +import os +import stat +from argparse import Namespace + +import pickle +from torch.nn import Module +import torch.distributed as dist + +from mindspeed.auto_tuning.utils.logger import get_logger +from mindspeed.auto_tuning.utils.restricted_unpickler import restricted_loads +from mindspeed.auto_tuning.module.hardware import Hardware +from mindspeed.auto_tuning.module.memory.model_param import ModelParam +from mindspeed.auto_tuning.config.model_config import ModelConfig + + +_logger = get_logger("MindSpeedAdaptor") + + +class MindSpeedAdaptor: + + def __new__(cls): + raise NotImplementedError("MindSpeedAdaptor is a static class.") + + @staticmethod + def get_hardware(working_dir: str = str()) -> Hardware: + import acl + from .utils.mem_utils import mem_b_to_mb + + device_type = acl.get_soc_name() + + devices_per_node, _ = acl.rt.get_device_count() + + num_nodes = dist.get_world_size() // devices_per_node + device_rank = dist.get_rank() + node_rank = device_rank // devices_per_node + device_id = device_rank % devices_per_node + acl.rt.set_device(device_id) + _, memory_limit, _ = acl.rt.get_mem_info(1) + acl.rt.reset_device(device_id) + + host_ip = os.environ.get("MASTER_ADDR", None) + + if device_rank == 0: + import getpass + user_name = getpass.getuser() + + object_list = [user_name] + else: + object_list = [None] + + dist.broadcast_object_list(object_list) + user_name: str = object_list[0] # type: ignore + + hardware = Hardware() + hardware.device_type = device_type + hardware.host_ip = host_ip + hardware.user_name = user_name + hardware.memory_limit = mem_b_to_mb(memory_limit) - 2 * 1024 + hardware.devices_per_node = devices_per_node + hardware.num_nodes = num_nodes + hardware.node_rank = node_rank + + if working_dir and device_id == 0: + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + mode = stat.S_IWUSR | stat.S_IRUSR + hardware_filename = os.path.join(working_dir, Hardware.HARDWARE_PARSE_FILENAME) + with os.fdopen(os.open(hardware_filename, flags, mode=mode), 'wb') as f: + pickle.dump(hardware, f) + + return hardware + + @staticmethod + def get_model_args(args: Namespace, hardware: Hardware, working_dir: str) -> ModelConfig: + model_config = ModelConfig() + for arg_name, arg_value in vars(args).items(): + if arg_name in model_config.__dict__: + model_config.__dict__[arg_name] = arg_value + model_config.global_world_size = args.auto_tuning_ranks + + if dist.get_rank() % hardware.devices_per_node == 0: + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + mode = stat.S_IWUSR | stat.S_IRUSR + model_config_filename = os.path.join(working_dir, ModelConfig.ARGS_PARSE_FILENAME) + with os.fdopen(os.open(model_config_filename, flags, mode=mode), 'wb') as f: + pickle.dump(model_config, f) + + return model_config + + @staticmethod + def get_model_params(model: List[Module], + pipeline_model_parallel_rank: int, + hardware: Hardware, + output_path: str + ) -> List[ModelParam]: + model_params: List[ModelParam] = list() + + def traverse_module_layers(module: Module, prefix: str): + new_prefix = f"{prefix}{module.__class__.__name__}." + + if all(False for _ in module.children()): + for param_name, param in module.named_parameters(): + model_params.append(ModelParam(f"{new_prefix}{param_name}", param.numel())) + return + + for sub_module in module.children(): + traverse_module_layers(sub_module, new_prefix) + + for module in model: + traverse_module_layers(module, str()) + + total_model_params = [None] * dist.get_world_size() + dist.all_gather_object(total_model_params, (pipeline_model_parallel_rank, model_params)) + if dist.get_rank() % hardware.devices_per_node == 0: + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + mode = stat.S_IWUSR | stat.S_IRUSR + with os.fdopen(os.open(output_path, flags, mode=mode), 'wb') as f: + pickle.dump(total_model_params, f) + + return model_params + + @staticmethod + def set_argv(argv: List[str], input_path: str) -> List[str]: + with open(input_path, mode="rb") as file: + try: + modified_argv: Tuple[Dict[str, Optional[str]], Dict[str, Optional[str]]] = \ + restricted_loads(file) # type: ignore + except pickle.UnpicklingError as e: + _logger.warning(f"Incorrect pickle format. UnpicklingError: {e}") + raise e + + enabled_argv, disabled_argv = modified_argv + + for arg_name, arg_value in enabled_argv.items(): + # Flag args + if arg_name == "--profile-ranks" and arg_value: + argv.extend([arg_name, *[s.strip() for s in arg_value.strip("[]").split(",")]]) + continue + if arg_value is None: + try: + argv.index(arg_name) + except ValueError: + argv.append(arg_name) + # Non-flag args + else: + try: + argv[argv.index(arg_name) + 1] = arg_value + except ValueError: + argv.extend([arg_name, arg_value]) + + for arg_name, arg_value in disabled_argv.items(): + # Flag args + if arg_value is None: + try: + argv.pop(argv.index(arg_name)) + except ValueError: + continue + # Non-flag args + else: + try: + i = argv.index(arg_name) + argv.pop(i) + argv.pop(i) + except ValueError: + continue + + return argv diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/__init__.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/__init__.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication.py new file mode 100644 index 000000000..a0f01850e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication.py @@ -0,0 +1,107 @@ +from mindspeed.auto_tuning.module.communication import communication_profile +from mindspeed.auto_tuning.module.communication.communication_model_tp import TpModel +from mindspeed.auto_tuning.module.communication.communication_model_cp import CpModel +from mindspeed.auto_tuning.module.communication.communication_model_dp import DpModel +from mindspeed.auto_tuning.module.communication.communication_model_pp import PpModel +from mindspeed.auto_tuning.module.communication.communication_model_ep import EpModel +from mindspeed.auto_tuning.module.communication.communication_model_mc2 import Mc2Model + + +class Communication(object): + """Communication modeling.""" + + def __init__(self, hardware=None, model_cfg=None): + self.hardware = hardware + self.model_cfg = model_cfg + + self.hccs_dev_num_910_9 = 384 + self.hccs_dev_num_910b = 8 + self.hccs_dev_num = 0 + if "910_9" in self.hardware.device_type: + self.hccs_dev_num = self.hccs_dev_num_910_9 + if "910B" in self.hardware.device_type: + self.hccs_dev_num = self.hccs_dev_num_910b + + self.tp_model = TpModel(self.hccs_dev_num) + self.cp_model = CpModel(self.hccs_dev_num) + self.dp_model = DpModel(self.hccs_dev_num) + self.pp_model = PpModel(self.hccs_dev_num) + self.ep_model = EpModel(self.hccs_dev_num) + self.mc2_model = Mc2Model(self.hccs_dev_num) + + self.config_list = [] + + def communication_modeling(self, profiling_results): + self.adapt_to_profile_info(profiling_results) + self.info_to_modeling() + + def adapt_to_profile_info(self, profiling_results): + for index, (config, model) in enumerate(profiling_results): + # Reads profile information in a group of configuration files. + total_profile_time_info = communication_profile.TotalProfileTimeInfo() + + self.config_list.append(config) + + self.get_profile_info(model, total_profile_time_info, config, profiling_results, index) + # Now force to run only one floor + + if config.use_ascend_mc2: + self.mc2_model.get_comm_info_list( + total_profile_time_info.mc2_profile_time_info, config) + else: + self.tp_model.get_comm_info_list( + total_profile_time_info.tp_profile_time_info, config) + self.dp_model.get_comm_info_list( + total_profile_time_info.dp_profile_time_info, config) + self.cp_model.get_comm_info_list( + total_profile_time_info.cp_profile_time_info, config) + self.ep_model.get_comm_info_list( + total_profile_time_info.ep_profile_time_info, config) + self.pp_model.get_comm_info_list( + total_profile_time_info.pp_profile_time_info, config) + + def info_to_modeling(self): + self.tp_model.modeling() + self.tp_model.print_modeling(self.config_list) + self.mc2_model.modeling() + self.mc2_model.print_modeling(self.config_list) + self.dp_model.modeling() + self.dp_model.print_modeling(self.config_list) + self.cp_model.modeling() + self.cp_model.print_modeling(self.config_list) + self.ep_model.modeling() + self.ep_model.print_modeling(self.config_list) + self.pp_model.modeling() + self.pp_model.print_modeling(self.config_list) + + def get_profile_info(self, model, total_profile_time_info, config, profiling_results, index): + tensor_hcom_info = model.tensor_parallel_comm + data_hcom_info = model.data_parallel_comm + pipeline_hcom_info = model.pipeline_parallel_comm + context_hcom_info = model.context_parallel_comm + expert_hcom_info = model.expert_parallel_comm + if config.use_ascend_mc2: + self.mc2_model.get_communication_info_from_profile(total_profile_time_info.mc2_profile_time_info, + profiling_results, + index) + for stage_id, stage_id_tensor_hcom_info in enumerate(tensor_hcom_info): + # ["tp_x"] regression + if stage_id == 0 and len(tensor_hcom_info) > stage_id: + self.tp_model.get_communication_info_from_profile( + total_profile_time_info.tp_profile_time_info, tensor_hcom_info[stage_id]) + # para_list.cp_x regression + if stage_id == 0 and len(context_hcom_info) > stage_id: + self.cp_model.get_communication_info_from_profile( + total_profile_time_info.cp_profile_time_info, context_hcom_info[stage_id], model, config.cp) + if config.pp > 1: + if stage_id == 0 and len(pipeline_hcom_info) > stage_id: + self.pp_model.get_communication_info_from_profile( + total_profile_time_info.pp_profile_time_info, pipeline_hcom_info[stage_id], config.pp) + # para_list.dp_x regression + if stage_id == len(tensor_hcom_info) - 1 and len(data_hcom_info) > stage_id: + self.dp_model.get_communication_info_from_profile( + total_profile_time_info.dp_profile_time_info, data_hcom_info[stage_id]) + # para_list.ep_x regression + if stage_id == 0 and len(expert_hcom_info) > stage_id: + self.ep_model.get_communication_info_from_profile( + total_profile_time_info.ep_profile_time_info, expert_hcom_info[stage_id]) diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model.py new file mode 100644 index 000000000..e652f0728 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model.py @@ -0,0 +1,196 @@ +import abc +from mindspeed.auto_tuning.module.operator.operator_shape_cal import linear_regression +from mindspeed.auto_tuning.utils.logger import get_logger + + +class CommunicationList(): + def __init__(self): + self.roce_x_list = [] + self.roce_time_list = [] + self.hccs_x_list = [] + self.hccs_time_list = [] + self.cross_x_list = [] + self.cross_y_list = [] + self.cross_time_list = [] + + self.roce_w = 0 + self.roce_b = 0 + self.hccs_w = 0 + self.hccs_b = 0 + self.cross_list = (0, 0) + + def append_roce(self, iv_list, time): + self.roce_x_list.append([iv_list[0]]) + self.roce_time_list.append([time]) + self.hccs_x_list.append([None]) + self.hccs_time_list.append([None]) + self.cross_x_list.append([None]) + self.cross_y_list.append([None]) + self.cross_time_list.append([None]) + + def append_hccs(self, iv_list, time): + self.roce_x_list.append([None]) + self.roce_time_list.append([None]) + self.hccs_x_list.append([iv_list[0]]) + self.hccs_time_list.append([time]) + self.cross_x_list.append([None]) + self.cross_y_list.append([None]) + self.cross_time_list.append([None]) + + def append_cross(self, iv_list, time): + self.roce_x_list.append([None]) + self.roce_time_list.append([None]) + self.hccs_x_list.append([None]) + self.hccs_time_list.append([None]) + self.cross_x_list.append([iv_list[1]]) + self.cross_y_list.append([iv_list[2]]) + self.cross_time_list.append([time]) + + def cal_roce(self, iv_list): + return self.roce_w * iv_list[0] + self.roce_b + + def cal_hccs(self, iv_list): + return self.hccs_w * iv_list[0] + self.hccs_b + + def cal_cross(self, iv_list): + return self.hccs_w * iv_list[1] + self.hccs_b + self.roce_w * iv_list[2] + self.roce_b + + def modeling(self): + lists = ( + self.hccs_x_list, + self.hccs_time_list, + self.roce_x_list, + self.roce_time_list, + self.cross_x_list, + self.cross_y_list, + self.cross_time_list + ) + (hccs_x_cal, hccs_time_cal), (roce_x_cal, roce_time_cal), (cross_x_cal, + cross_time_cal) = self.get_hccs_roce_list(lists) + if roce_x_cal: + self.roce_w, self.roce_b = self.linear_x_y(roce_x_cal, roce_time_cal) + if hccs_x_cal: + self.hccs_w, self.hccs_b = self.linear_x_y(hccs_x_cal, hccs_time_cal) + + def get_hccs_roce_list(self, lists): + hccs_x_list = [] + hccs_y_list = [] + roce_x_list = [] + roce_y_list = [] + cross_x_list = [] + cross_y_list = [] + for i, x_index in enumerate(lists[0]): + if lists[0][i] != [None]: + hccs_x_list.append(lists[0][i]) + hccs_y_list.append(lists[1][i]) + elif lists[2][i] != [None]: + roce_x_list.append(lists[2][i]) + roce_y_list.append(lists[3][i]) + else: + cross_x_list.append([lists[4][i][0] / lists[5][i][0]]) + cross_y_list.append([lists[6][i][0] / lists[5][i][0]]) + hccs_lists = (hccs_x_list, hccs_y_list) + roce_lists = (roce_x_list, roce_y_list) + cross_lists = (cross_x_list, cross_y_list) + re_hccs_lists = self.add_origin_whith_single_point(hccs_lists) + re_roce_lists = self.add_origin_whith_single_point(roce_lists) + re_cross_lists = self.add_origin_whith_single_point(cross_lists) + + return re_hccs_lists, re_roce_lists, re_cross_lists + + @classmethod + def add_origin_whith_single_point(cls, lists): + last = None + for item in lists[0]: + if last: + if item != last: + last = None + break + else: + last = item + listres = lists + if last: + listres = [[], []] + listres[0].append(lists[0][0]) + listres[1].append(lists[1][0]) + if len(listres[0]) == 1: + listres[0].append([0]) + listres[1].append([0]) + return listres + + @classmethod + def linear_x_y(cls, list1, list2): + w, b = 0, 0 + if len(list1) > 0: + w, b = linear_regression(list1, list2) if list1 else (0, 0) + return w, b + + +class CommunicationModel: + def __init__(self, hccs_dev_num): + self.comm = CommunicationList() + self.main_domain = Domain(hccs_dev_num) + self.hccs_dev_num = hccs_dev_num + self.logger = get_logger("Communication") + + @abc.abstractmethod + def get_communication_info_from_profile(self, hcom_info_tage_id): + pass + + @abc.abstractmethod + def get_comm_info_list(self, profile_info): + pass + + @abc.abstractmethod + def modeling(self): + pass + + @abc.abstractmethod + def print_modeling(self): + pass + + +class Domain: + def __init__(self, hccs_dev_num): + self.max_domain = 0 + self.min_domain = 0 + self.roce_comm_exist = False + self.hccs_comm_exist = False + self.cross_comm_exist = False + self.hccs_dev_num = hccs_dev_num + + def is_hccs_domain(self): + return self.max_domain <= self.hccs_dev_num + + def is_cross_domain(self): + return self.min_domain < self.hccs_dev_num < self.max_domain + + def is_roce_domain(self): + return not (self.is_hccs_domain() or self.is_hccs_domain()) + + def append_method_for_domain(self): + if self.is_hccs_domain(): + self.hccs_comm_exist = True + return "append_hccs" + if self.is_cross_domain(): + self.cross_comm_exist = True + return "append_cross" + self.roce_comm_exist = True + return "append_roce" + + def append_time_in_domain(self, communication_list, iv_list, time): + method_for_domain = self.append_method_for_domain() + append_domain = getattr(communication_list, method_for_domain) + append_domain(iv_list, time) + + def cal_method_for_domain(self): + if self.is_hccs_domain(): + return "cal_hccs" + if self.is_cross_domain(): + return "cal_cross" + return "cal_roce" + + def cal_time_in_domain(self, communication_list, iv_list): + method_for_domain = self.cal_method_for_domain() + cal_domain = getattr(communication_list, method_for_domain) + return cal_domain(iv_list) diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_cp.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_cp.py new file mode 100644 index 000000000..9dbe0cb8d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_cp.py @@ -0,0 +1,228 @@ +from mindspeed.auto_tuning.module.communication.communication_model import CommunicationModel +_GLOBAL_ATTN_FORWARD_KERNEL_NAMES = [ + "aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore" +] +_GLOBAL_ATTN_BACKWARD_KERNEL_NAMES = [ + "aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad" +] + + +class CpModel(CommunicationModel): + def __init__(self, hccs_dev_num): + super(CpModel, self).__init__(hccs_dev_num) + # Profile Modeling Data Information Table + self.cp_vector_x = [] + self.cp_vector_time = [] + self.cp_attn_x = [] + self.cp_attn_time = [] + self.cp_attn_bw_x = [] + self.cp_attn_bw_time = [] + + self.cp_attn_w = 0 + self.cp_attn_b = 0 + self.cp_attn_bw_w = 0 + self.cp_attn_bw_b = 0 + self.cp_vector_w = 0 + self.cp_vector_b = 0 + + def get_communication_info_from_profile(self, cp_profile_time_info, hcom_info_tage_id, model, cp): + cp_profile_time_info.total_comm_time += hcom_info_tage_id.total_time_ms + cp_profile_time_info.wait_comm_time += hcom_info_tage_id.wait_time_ms + cp_profile_time_info.attn_cp_time, cp_profile_time_info.attn_cpbw_time = \ + self.get_vectortime_from_profiling(model, cp) + cp_profile_time_info.vector_cp_time += hcom_info_tage_id.vector_time_ms + + def get_comm_info_list(self, cp_profile_time_info, config): + tp = config.tp + cp = config.cp + pp = config.pp + dp = config.dp + s = config.seq_length / 1000 + + # CP's communication volume is CP-1 times the forward KV, backward KV, and dKV per machine. + if cp > 1: + # Here we consider only the attention of communication hiding, with forward CP-1 and backward CP. + self.cp_attn_x.append([s / tp / cp * (cp - 1) / cp]) + self.cp_attn_time.append([cp_profile_time_info.attn_cp_time]) + self.cp_attn_bw_x.append([s / tp / cp]) + self.cp_attn_bw_time.append([cp_profile_time_info.attn_cpbw_time]) + self.cp_vector_time.append([cp_profile_time_info.vector_cp_time]) + if cp - 2 < 0: + self.cp_vector_x.append([0]) + else: + self.cp_vector_x.append([cp - 2]) + + comm_x = (cp - 1) * s / (tp * cp) * pp + comm_time = cp_profile_time_info.total_comm_time + + K = cp * tp / self.hccs_dev_num + comm_y = (K) * s / (tp * cp) * pp + comm_z = (K - 1) * s / (tp * cp) * pp + iv_list = [comm_x, comm_y, comm_z] + self.main_domain.max_domain = cp * tp + self.main_domain.min_domain = tp + self.main_domain.append_time_in_domain(self.comm, iv_list, comm_time) + + def modeling(self): + # traffic of model + self.comm.modeling() + + # overlap + self.cp_attn_w, self.cp_attn_b = self.comm.linear_x_y( + self.cp_attn_x, self.cp_attn_time) + self.cp_attn_bw_w, self.cp_attn_bw_b = self.comm.linear_x_y( + self.cp_attn_bw_x, self.cp_attn_bw_time) + self.cp_vector_w, self.cp_vector_b = self.comm.linear_x_y( + self.cp_vector_x, self.cp_vector_time) + + def print_modeling(self, config_list): + self.logger.debug(f"****************** cp(ms) ***********************") + if self.main_domain.roce_comm_exist: + self.logger.debug(f"roce") + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8}\t{7:<8}" + self.logger.debug(tplt.format('No', 'tp', 'dp', 'pp', 'cp', 'ep', 'cp_time', 'cp_x', + chr(12288))) + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8.2f}\t{7:<8}" + index = 0 + for i, _ in enumerate(config_list): + if config_list[i].cp > 1: + if self.comm.roce_x_list[index][0]: + self.logger.debug(tplt.format(i, config_list[i].tp, config_list[i].dp, config_list[i].pp, + config_list[i].cp, config_list[i].ep, + self.comm.roce_time_list[index][0], self.comm.roce_x_list[index][0], + chr(12288))) + index += 1 + self.logger.debug(f"--------------") + tplt = "{0:<9}\t{1:<9}" + self.logger.debug(tplt.format('cp_w,', 'cp_b', chr(12288))) + self.logger.debug(tplt.format(round(self.comm.roce_w, 3), round(self.comm.roce_b, 3), + chr(12288))) + self.logger.debug(f"-------------") + if self.main_domain.hccs_comm_exist: + self.logger.debug(f"hccs") + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8}\t{7:<8}" + self.logger.debug(tplt.format('No', 'tp', 'dp', 'pp', 'cp', 'ep', 'cp_time', 'cp_x', + chr(12288))) + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8.2f}\t{7:<8}" + index = 0 + for i, _ in enumerate(config_list): + if config_list[i].cp > 1: + if self.comm.hccs_x_list[index][0]: + self.logger.debug(tplt.format(i, config_list[i].tp, config_list[i].dp, config_list[i].pp, + config_list[i].cp, config_list[i].ep, + self.comm.hccs_time_list[index][0], self.comm.hccs_x_list[index][0], + chr(12288))) + index += 1 + self.logger.debug(f"-----------") + tplt = "{0:<9}\t{1:<9}" + self.logger.debug(tplt.format('cp_HCCS_w,', 'cp_HCCS_b', chr(12288))) + self.logger.debug(tplt.format(round(self.comm.hccs_w, 3), round(self.comm.hccs_b, 3), + chr(12288))) + self.logger.debug(f"-----------") + + if self.main_domain.cross_comm_exist: + self.logger.debug(f"cross") + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8}\t{7:<8}\t{8:<8}" + self.logger.debug(tplt.format('No', 'tp', 'dp', 'pp', 'cp', + 'ep', 'cp_time', 'cp_cross_x', 'cp_cross_y', chr(12288))) + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8.2f}\t{7:<8.2f}\t{8:<8.2f}" + index = 0 + for i, _ in enumerate(config_list): + if config_list[i].cp > 1: + if self.comm.cross_x_list[index][0]: + self.logger.debug(tplt.format(i, config_list[i].tp, config_list[i].dp, config_list[i].pp, + config_list[i].cp, config_list[i].ep, + self.comm.cross_time_list[index][0], + self.comm.cross_x_list[index][0], self.comm.cross_y_list[index][0], + chr(12288))) + index += 1 + self.logger.debug(f"-----------") + tplt = "{0:<9}\t{1:<9}" + self.logger.debug(tplt.format(round(self.comm.hccs_w, 3), round(self.comm.roce_w, 3), + chr(12288))) + self.logger.debug(f"-----------") + + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8}\t{7:<8}\t{8:<8}\t{9:<8}\t{10:<8}\t{11:<8}" + self.logger.debug(tplt.format('No', 'tp', 'dp', 'pp', 'cp', 'ep', 'attn_x', + 'attention', 'attn_bw_x', 'attn_bw', 'vector_x', 'vector_time', chr(12288))) + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8.2f}\t{7:<8.2f}\t{8:<8.2f}\t{9:<8.2f}\t{10:<8.2f}\t{11:<8.2f}" + index = 0 + for i, _ in enumerate(config_list): + if config_list[i].cp > 1: + self.logger.debug(tplt.format(i, config_list[i].tp, config_list[i].dp, config_list[i].pp, + config_list[i].cp, config_list[i].ep, + self.cp_attn_x[index][0], self.cp_attn_time[index][0], + self.cp_attn_bw_x[index][0], self.cp_attn_bw_time[index][0], + self.cp_vector_x[index][0], self.cp_vector_time[index][0], chr(12288))) + index += 1 + self.logger.debug(f"-----------") + tplt = "{0:<9}\t{1:<9}\t{2:<9}\t{3:<9}\t{4:<9}\t{5:<9}" + self.logger.debug(tplt.format('attn_w,', 'attn_b', 'attn_bw_w', + 'attn_bw_b', 'vector_w', 'vector_b', chr(12288))) + self.logger.debug(tplt.format(round(self.cp_attn_w, 3), round(self.cp_attn_b, 3), + round(self.cp_attn_bw_w, 3), round(self.cp_attn_bw_b, 3), + round(self.cp_vector_w, 3), round( + self.cp_vector_b, 3), + chr(12288))) + self.logger.debug(f"\n\n\n") + return + + + def get_vectortime_from_profiling(self, model, cp): + attn_list = [] + attn_re_list = [] + attn_gb_list = [] + profile_info = model + attention = 0.0 + attn_bw = 0.0 + for item in profile_info.forward.operator_info[0]: + if item.name in _GLOBAL_ATTN_FORWARD_KERNEL_NAMES and len(attn_list) < cp - 1: + attn_list.append(item) + attention += float(item.duration_us) + for item in profile_info.backward.operator_info[0]: + if item.name in _GLOBAL_ATTN_FORWARD_KERNEL_NAMES and len(attn_re_list) < cp - 1: + attn_re_list.append(item) + attention += float(item.duration_us) + if item.name in _GLOBAL_ATTN_BACKWARD_KERNEL_NAMES and len(attn_gb_list) < cp: + attn_gb_list.append(item) + attn_bw += float(item.duration_us) + # Attention, one of them is shadowed. attn_bw needs to be calculated. + attention = attention / 1000 + attn_bw = attn_bw / 1000 + return attention, attn_bw + + def performance(self, search_cfg): + tp = search_cfg.tensor_model_parallel_size + pp = search_cfg.pipeline_model_parallel_size + cp = search_cfg.context_parallel_size + s = search_cfg.seq_length / 1000 + cp_time = 0.0 + comm_x = (cp - 1) * s / (tp * cp) * pp + K = cp * tp / self.hccs_dev_num + comm_y = (K) * s / (tp * cp) * pp + comm_z = (K - 1) * s / (tp * cp) * pp + iv_list = [comm_x, comm_y, comm_z] + self.main_domain.max_domain = cp * tp + self.main_domain.min_domain = tp + if cp > 1: + comm_time = self.main_domain.cal_time_in_domain(self.comm, iv_list) + + attn_time = self.cp_attn_w * (s / tp / cp * (cp - 1) / cp) + self.cp_attn_b + attn_bw_time = self.cp_attn_bw_w * (s / tp / cp) + self.cp_attn_bw_b + # Attention and attn_bw need to be considered separately. + cp_time1 = comm_time / 2 - attn_time * pp + if cp_time1 < 0: + cp_time1 = 0 + cp_time2 = comm_time / 2 - attn_bw_time * pp + if cp_time2 < 0: + cp_time2 = 0 + cp_time = cp_time1 + cp_time2 + if cp > 2: + cp_vector_time = self.cp_vector_w * (cp - 2) + self.cp_vector_b + cp_time = cp_time - cp_vector_time + self.logger.debug('cp_time:{}, attn_time:{}, attn_bw_time:{}, ' + 'cp_vector_time:{}'.format(cp_time, attn_time, attn_bw_time, cp_vector_time)) + if cp_time < 0: + cp_time = 0.0 + self.logger.debug(f'The communication time of the CP is the waiting time.') + return cp_time diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_dp.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_dp.py new file mode 100644 index 000000000..621056631 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_dp.py @@ -0,0 +1,226 @@ +from mindspeed.auto_tuning.module.communication.communication_model \ + import CommunicationModel, CommunicationList, Domain + + +class DpModel(CommunicationModel): + def __init__(self, hccs_dev_num): + super(DpModel, self).__init__(hccs_dev_num) + # Profile modeling data table + + self.attention = CommunicationList() + self.attention_reducescatter = CommunicationList() + self.attention_allgather = CommunicationList() + + self.mlp_domain = Domain(hccs_dev_num) + self.zero_comm = CommunicationList() + self.zero = CommunicationList() + self.zero_reducescatter = CommunicationList() + self.zero_allgather = CommunicationList() + + def get_communication_info_from_profile(self, dp_profile_time_info, hcom_info_tage_id): + dp_profile_time_info.total_comm_time += hcom_info_tage_id.total_time_ms + dp_profile_time_info.total_mlpzero_time += hcom_info_tage_id.mlp_zero_time_ms + dp_profile_time_info.total_otherzero_time += hcom_info_tage_id.total_time_ms - hcom_info_tage_id.mlp_zero_time_ms + dp_profile_time_info.mlp_ag_time += hcom_info_tage_id.mlp_ag_time_ms + dp_profile_time_info.mlp_rs_time += hcom_info_tage_id.mlp_rs_time_ms + dp_profile_time_info.other_ag_time += hcom_info_tage_id.other_ag_time_ms + dp_profile_time_info.other_rs_time += hcom_info_tage_id.other_rs_time_ms + + def get_comm_info_list(self, dp_profile_time_info, config): + tp = config.tp + cp = config.cp + dp = config.dp + ep = config.ep + pp = config.pp + zero = config.zero1 + experts = config.num_experts if config.num_experts else 1 + + # attention + if dp * cp > 1: + comm_x = (dp * cp - 1) / (tp * pp) + K = dp * cp * tp / self.hccs_dev_num + comm_y = (K) / (tp * pp) + comm_z = (K - 1) / (tp * pp) + iv_list = [comm_x, comm_y, comm_z] + comm_time = dp_profile_time_info.total_otherzero_time + reducescatter_time = dp_profile_time_info.other_rs_time + allgather_time = dp_profile_time_info.other_ag_time + dp_total_time = dp_profile_time_info.total_comm_time + self.main_domain.max_domain = dp * cp * tp + self.main_domain.min_domain = cp * tp + self.main_domain.append_time_in_domain(self.attention, iv_list, comm_time) + self.main_domain.append_time_in_domain(self.attention_reducescatter, iv_list, reducescatter_time) + self.main_domain.append_time_in_domain(self.attention_allgather, iv_list, allgather_time) + self.main_domain.append_time_in_domain(self.comm, iv_list, dp_total_time) + # MLP + mlp_x = experts * (dp * cp / ep - 1) / tp / pp + comm_time = dp_profile_time_info.total_mlpzero_time + reducescatter_time = dp_profile_time_info.mlp_rs_time + allgather_time = dp_profile_time_info.mlp_ag_time + mlp_x = experts * (dp * cp / ep - 1) / tp / pp + K = dp * cp * tp / ep / self.hccs_dev_num + mlp_y = experts * (K) / (tp * pp) + mlp_z = experts * (K - 1) / (tp * pp) + iv_list = [mlp_x, mlp_y, mlp_z] + self.mlp_domain.max_domain = dp * cp * tp + self.mlp_domain.min_domain = cp * tp * ep + self.mlp_domain.append_time_in_domain(self.zero, iv_list, comm_time) + self.mlp_domain.append_time_in_domain(self.zero_reducescatter, iv_list, reducescatter_time) + self.mlp_domain.append_time_in_domain(self.zero_allgather, iv_list, allgather_time) + self.mlp_domain.append_time_in_domain(self.zero_comm, iv_list, dp_total_time) + + def modeling(self): + self.attention.modeling() + self.attention_reducescatter.modeling() + self.attention_allgather.modeling() + self.zero.modeling() + self.zero_reducescatter.modeling() + self.zero_allgather.modeling() + + def print_modeling(self, config_list): + self.logger.debug(f"****************** dp(ms) ***********************") + attention = [ + self.comm, + self.attention, + self.attention_reducescatter, + self.attention_allgather, + ] + self.logger.debug(f"attention time :") + self.print_modeling_unit(config_list, attention, self.main_domain) + self.logger.debug(f"\n\n") + + mlp = [ + self.zero_comm, + self.zero, + self.zero_reducescatter, + self.zero_allgather, + ] + self.logger.debug(f"mlp time :") + self.print_modeling_unit(config_list, mlp, self.mlp_domain) + self.logger.debug(f"\n\n\n") + + def print_modeling_unit(self, config_list, info_list, domain): + if domain.roce_comm_exist: + self.logger.debug(f" roce") + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8}\t{7:<8}\t{8:<8}\t{9:<8}\t{10:<8}" + self.logger.debug(tplt.format('No', 'tp', 'dp', 'pp', 'cp', 'ep', 'dp_time', + 'x', 'time', 'ag_time', 'rs_time', chr(12288))) + index = 0 + for i, _ in enumerate(config_list): + if config_list[i].dp * config_list[i].cp > 1: + if info_list[1].roce_x_list[index][0]: + self.logger.debug(tplt.format(i, config_list[i].tp, config_list[i].dp, config_list[i].pp, + config_list[i].cp, config_list[i].ep, + round(info_list[0].roce_time_list[index][0], 2), + round(info_list[1].roce_x_list[index][0], 3), + round(info_list[1].roce_time_list[index][0], 2), + round(info_list[2].roce_time_list[index][0], 3), + round(info_list[3].roce_time_list[index][0], 2), + chr(12288))) + index += 1 + self.logger.debug(f"-----------") + tplt = "{0:<9}\t{1:<9}\t{2:<9}\t{3:<9}\t{4:<9}\t{5:<9}" + self.logger.debug(tplt.format('time_w', 'time_b', 'rs_w', 'rs_b', 'ag_w', 'ag_b', chr(12288))) + self.logger.debug(tplt.format(round(info_list[1].roce_w, 2), round(info_list[1].roce_b, 2), + round(info_list[2].roce_w, 2), + round(info_list[2].roce_b, 2), + round(info_list[3].roce_w, 2), + round(info_list[3].roce_b, 2), chr(12288))) + self.logger.debug(f"----------------------") + if domain.hccs_comm_exist: + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8}\t{7:<8}\t{8:<8}\t{9:<8}\t{10:<8}" + self.logger.debug(f" hccs") + self.logger.debug(tplt.format('No', 'tp', 'dp', 'pp', 'cp', 'ep', 'dp_time', + 'x', 'time', 'ag_time', 'rs_time', chr(12288))) + index = 0 + for i, _ in enumerate(config_list): + if config_list[i].dp * config_list[i].cp > 1: + if info_list[1].hccs_x_list[index][0]: + self.logger.debug(tplt.format(i, config_list[i].tp, config_list[i].dp, config_list[i].pp, + config_list[i].cp, config_list[i].ep, + round(info_list[0].hccs_time_list[index][0], 2), + round(info_list[1].hccs_x_list[index][0], 3), + round(info_list[1].hccs_time_list[index][0], 2), + round(info_list[2].hccs_time_list[index][0], 3), + round(info_list[3].hccs_time_list[index][0], 2), + chr(12288))) + index += 1 + self.logger.debug(f"-----------") + tplt = "{0:<9}\t{1:<9}\t{2:<9}\t{3:<9}\t{4:<9}\t{5:<9}" + self.logger.debug(tplt.format('dp_w', 'dp_b', 'rs_w', 'rs_b', 'ag_w', 'ag_b', chr(12288))) + self.logger.debug(tplt.format(round(info_list[1].hccs_w, 2), round(self.attention.hccs_b, 2), + round(info_list[2].hccs_w, 2), + round(info_list[2].hccs_b, 2), + round(info_list[3].hccs_w, 2), + round(info_list[3].hccs_b, 2), chr(12288))) + self.logger.debug(f"----------------------") + if domain.cross_comm_exist: + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8}\t{7:<8}\t{8:<8}\t{9:<8}\t{10:<8}\t{11:<8}" + self.logger.debug(f" cross") + self.logger.debug(tplt.format('No', 'tp', 'dp', 'pp', 'cp', 'ep', 'dp_time', 'dp_x', 'dp_y', 'total_time', 'ag_time', + 'rs_time', chr(12288))) + index = 0 + for i, _ in enumerate(config_list): + if config_list[i].dp * config_list[i].cp > 1: + if info_list[1].cross_x_list[index][0]: + self.logger.debug(tplt.format(i, config_list[i].tp, config_list[i].dp, config_list[i].pp, + config_list[i].cp, config_list[i].ep, + round(info_list[0].cross_time_list[index][0], 2), + round(info_list[1].cross_x_list[index][0], 3), + round(info_list[1].cross_y_list[index][0], 3), + round(info_list[1].cross_time_list[index][0], 2), + round(info_list[2].cross_time_list[index][0], 3), + round(info_list[3].cross_time_list[index][0], 3), + chr(12288))) + index += 1 + self.logger.debug(f"----------------------") + + def performance(self, search_cfg): + tp = search_cfg.tensor_model_parallel_size + dp = search_cfg.data_parallel_size + pp = search_cfg.pipeline_model_parallel_size + cp = search_cfg.context_parallel_size + ep = search_cfg.expert_model_parallel_size if search_cfg.expert_model_parallel_size else 1 + zero = search_cfg.use_distributed_optimizer + experts = search_cfg.num_experts if search_cfg.num_experts else 1 + + dp_time = 0.0 + comm_time = 0.0 + mlp_time = 0.0 + overlap_time = 0.0 + other_reducescatter = 0.0 + other_allgather = 0.0 + zero_reducescatter = 0.0 + zero_allgather = 0.0 + if dp * cp > 1: + # attention: + self.main_domain.max_domain = dp * cp * tp + self.main_domain.min_domain = cp * tp + comm_x = (dp * cp - 1) / tp / pp + K = dp * cp * tp / self.hccs_dev_num + comm_y = (K) / (tp * pp) + comm_z = (K - 1) / (tp * pp) + iv_list = [comm_x, comm_y, comm_z] + comm_time = self.main_domain.cal_time_in_domain(self.attention, iv_list) + other_reducescatter = self.main_domain.cal_time_in_domain(self.attention_reducescatter, iv_list) + other_allgather = self.main_domain.cal_time_in_domain(self.attention_allgather, iv_list) + + # mlp + self.mlp_domain.max_domain = dp * cp * tp + self.mlp_domain.min_domain = cp * tp * ep + mlp_x = experts * (dp * cp / ep - 1) / tp / pp + K = dp * cp * tp / ep / self.hccs_dev_num + mlp_y = experts * (K) / (tp * pp) + mlp_z = experts * (K - 1) / (tp * pp) + mlp_iv_list = [mlp_x, mlp_y, mlp_z] + mlp_time = self.mlp_domain.cal_time_in_domain(self.zero, mlp_iv_list) + zero_reducescatter = self.mlp_domain.cal_time_in_domain(self.zero_reducescatter, mlp_iv_list) + zero_allgather = self.mlp_domain.cal_time_in_domain(self.zero_allgather, mlp_iv_list) + if zero: + if pp > 1: + overlap_time += (pp - 1) / pp * (other_reducescatter + zero_reducescatter) + if pp > 2: + overlap_time += (pp - 2) / pp * (other_allgather + zero_allgather) + dp_time = comm_time + mlp_time - overlap_time + # dp_time here is the total gbs time effect + return dp_time diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_ep.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_ep.py new file mode 100644 index 000000000..639ca6686 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_ep.py @@ -0,0 +1,119 @@ +from mindspeed.auto_tuning.module.communication.communication_model import CommunicationModel + + +class EpModel(CommunicationModel): + def __init__(self, hccs_dev_num): + super(EpModel, self).__init__(hccs_dev_num) + + def get_communication_info_from_profile(self, ep_profile_time_info, hcom_info_tage_id): + ep_profile_time_info.total_comm_time += hcom_info_tage_id.total_time_ms + ep_profile_time_info.wait_comm_time += hcom_info_tage_id.wait_time_ms + ep_profile_time_info.min_time += hcom_info_tage_id.min_comm_time_ms + + def get_comm_info_list(self, ep_profile_time_info, config): + tp = config.tp + cp = config.cp + ep = config.ep + pp = config.pp + s = config.seq_length / 1000 + experts = config.num_experts if config.num_experts else 1 + + if ep and ep > 1: + comm_x = experts * s * (ep - 1) * pp / ep / tp / cp + K = ep * tp / self.hccs_dev_num + comm_y = experts * s * (K) * pp / ep / tp / cp + comm_z = experts * s * (K - 1) / K * pp / ep / tp / cp + iv_list = [comm_x, comm_y, comm_z] + comm_time = ep_profile_time_info.min_time + self.main_domain.max_domain = ep * tp + self.main_domain.min_domain = tp + self.main_domain.append_time_in_domain(self.comm, iv_list, comm_time) + + def modeling(self): + self.comm.modeling() + + def print_modeling(self, config_list): + self.logger.debug(f"****************** ep(ms) ***********************") + if self.main_domain.roce_comm_exist: + self.logger.debug(f"roce") + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8}\t{7:<8}" + self.logger.debug(tplt.format('No', 'tp', 'dp', 'pp', 'cp', + 'ep', 'ep_roce_time', 'ep_roce_x', chr(12288))) + index = 0 + for i, _ in enumerate(config_list): + if config_list[i].ep > 1: + if self.comm.roce_x_list[index][0]: + self.logger.debug(tplt.format(i, config_list[i].tp, config_list[i].dp, config_list[i].pp, + config_list[i].cp, config_list[i].ep, + round(self.comm.roce_time_list[index][0], 2), round( + self.comm.roce_x_list[index][0], 3), + chr(12288))) + index += 1 + self.logger.debug(f"--------------") + tplt = "{0:<9}\t{1:<9}" + self.logger.debug(tplt.format('ep_w', 'ep_b', chr(12288))) + self.logger.debug(tplt.format(round(self.comm.roce_w, 3), + round(self.comm.roce_b, 3), chr(12288))) + self.logger.debug(f"--------------") + if self.main_domain.hccs_comm_exist: + self.logger.debug(f"hccs") + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8}\t{7:<8}" + self.logger.debug(tplt.format('No', 'tp', 'dp', 'pp', 'cp', + 'ep', 'ep_hccs_time', 'ep_hccs_x', chr(12288))) + index = 0 + for i, _ in enumerate(config_list): + if config_list[i].ep > 1: + if self.comm.hccs_x_list[index][0]: + self.logger.debug(tplt.format(i, config_list[i].tp, config_list[i].dp, config_list[i].pp, + config_list[i].cp, config_list[i].ep, + round( + self.comm.hccs_time_list[index][0], 2), + round(self.comm.hccs_x_list[index][0], 3), chr(12288))) + index += 1 + self.logger.debug(f"-----------") + tplt = "{0:<9}\t{1:<9}" + self.logger.debug(tplt.format('ep_HCCS_w', 'ep_HCCS_b', chr(12288))) + self.logger.debug(tplt.format(round(self.comm.hccs_w, 3), round(self.comm.hccs_b, 3), + chr(12288))) + self.logger.debug(f"-----------") + if self.main_domain.cross_comm_exist: + self.logger.debug(f"cross") + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8}\t{7:<8}\t{8:<8}" + self.logger.debug(tplt.format('No', 'tp', 'dp', 'pp', 'cp', + 'ep', 'ep_cross_time', 'ep_cross_x', 'ep_cross_y', chr(12288))) + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8.2f}\t{7:<8.2f}\t{8:<8.2f}" + index = 0 + for i, _ in enumerate(config_list): + if config_list[i].ep > 1: + if self.comm.cross_x_list[index][0]: + self.logger.debug(tplt.format(i, config_list[i].tp, config_list[i].dp, config_list[i].pp, + config_list[i].cp, config_list[i].ep, + self.comm.cross_time_list[index][0], + self.comm.cross_x_list[index][0], self.comm.cross_y_list[index][0], + chr(12288))) + index += 1 + self.logger.debug(f"-----------") + tplt = "{0:<9}\t{1:<9}" + self.logger.debug(tplt.format(round(self.comm.hccs_w, 3), round(self.comm.roce_w, 3), + chr(12288))) + self.logger.debug(f"-----------") + self.logger.debug(f"\n\n\n") + + def performance(self, search_cfg): + tp = search_cfg.tensor_model_parallel_size + pp = search_cfg.pipeline_model_parallel_size + cp = search_cfg.context_parallel_size + ep = search_cfg.expert_model_parallel_size + s = search_cfg.seq_length / 1000 + ep_time = 0.0 + experts = search_cfg.num_experts if search_cfg.num_experts else 1 + comm_x = experts * s * (ep - 1) * pp / ep / tp / cp + K = ep * tp / self.hccs_dev_num + comm_y = experts * s * (K) * pp / ep / tp / cp + comm_z = experts * s * (K - 1) / K * pp / ep / tp / cp + iv_list = [comm_x, comm_y, comm_z] + self.main_domain.max_domain = ep * tp + self.main_domain.min_domain = tp + if ep and ep > 1: + ep_time = self.main_domain.cal_time_in_domain(self.comm, iv_list) + return ep_time diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_mc2.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_mc2.py new file mode 100644 index 000000000..64b6dc9ec --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_mc2.py @@ -0,0 +1,57 @@ +from mindspeed.auto_tuning.module.communication.communication_model import CommunicationModel +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_constant import NumberConstant + + +class Mc2Model(CommunicationModel): + def __init__(self, hccs_dev_num): + super(Mc2Model, self).__init__(hccs_dev_num) + + def get_communication_info_from_profile(self, mc2_profile_time_info, hcom_info_tage_id, index): + mc2_res = hcom_info_tage_id[index][1] + mat_res = hcom_info_tage_id[index - 1][1] + mc2_profile_time_info.matmul_compute_time = mat_res.matmul_total_time[0] + mc2_profile_time_info.total_comm_time = mc2_res.mc2_total_time[0] + + def get_comm_info_list(self, mc2_profile_time_info, config): + tp = config.tp + cp = config.cp + s = config.seq_length / NumberConstant.CONVERSION_TIME + hccs_x = (s / (tp * cp)) + hccs_time = mc2_profile_time_info.total_comm_time - mc2_profile_time_info.matmul_compute_time + self.comm.append_hccs([hccs_x], hccs_time) + + def modeling(self): + sum_x = 0 + sum_time = 0 + for index, x in enumerate(self.comm.hccs_x_list): + sum_x += x[0] + sum_time += self.comm.hccs_time_list[index][0] + self.comm.hccs_w = sum_time / sum_x + + def print_modeling(self, config_list): + mc2lt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8}\t{7:<8}\t{8:<8}" + self.logger.debug(f"****************** mc2(ms) ***********************") + self.logger.debug(mc2lt.format('No', 'tp', 'dp', 'pp', 'cp', 'ep', 'mc2_time', 'mc2_x', chr(12288))) + index = 0 + for cfg in config_list: + if cfg.use_ascend_mc2: + self.logger.debug(mc2lt.format(index, cfg.tp, cfg.dp, cfg.pp, + cfg.cp, + cfg.ep, + round(self.comm.hccs_time_list[index][0], 2), round( + self.comm.hccs_x_list[index][0], 3), chr(12288))) + index += 1 + self.logger.debug(f"-----------") + mc2lt = "{0:<9}\t{1:<9}" + self.logger.debug(mc2lt.format('tp_w', 'tp_b', chr(12288))) + self.logger.debug(mc2lt.format(round(self.comm.hccs_w, 3), round(self.comm.hccs_b, 3), chr(12288))) + self.logger.debug(f"\n\n\n") + + def performance(self, search_cfg): + tp = search_cfg.tensor_model_parallel_size + cp = search_cfg.context_parallel_size + s = search_cfg.seq_length / 1000 + mc2_time = 0 + if tp > 1: + mc2_time = self.comm.hccs_w * (s / (tp * cp)) + self.comm.hccs_b + return mc2_time diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_pp.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_pp.py new file mode 100644 index 000000000..e5655fe56 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_pp.py @@ -0,0 +1,99 @@ +from mindspeed.auto_tuning.module.communication.communication_model import CommunicationModel + + +class PpModel(CommunicationModel): + def __init__(self, hccs_dev_num): + super(PpModel, self).__init__(hccs_dev_num) + + def get_communication_info_from_profile(self, pp_profile_time_info, hcom_info_tage_id, pp): + last_pp_start_time = 0 + total_pp_time = 0 + for i in range(0, pp - 1): + key = list(hcom_info_tage_id.details[i].keys())[0] + total_pp_time += hcom_info_tage_id.details[i][key]['Elapse Time(ms)'] + if last_pp_start_time == 0: + last_pp_start_time = hcom_info_tage_id.details[i][key]['Start Timestamp(us)'] + pp_profile_time_info.each_pp_time = total_pp_time / (pp - 1) + + def get_comm_info_list(self, pp_profile_time_info, config): + tp = config.tp + cp = config.cp + pp = config.pp + dp = config.dp + layers_per_vpp = config.layers_per_vpp if config.layers_per_vpp else 1 + comm_x = 1 / (layers_per_vpp * tp * cp) + iv_list = [comm_x, 0, 0] # PP does not need to consider cross modeling. + comm_time = pp_profile_time_info.each_pp_time + self.main_domain.max_domain = pp * dp * cp * tp + self.main_domain.min_domain = pp * dp * cp * tp + if pp > 1: + self.main_domain.append_time_in_domain(self.comm, iv_list, comm_time) + # PPtime indicates the time consumed by each PP communication. + + def modeling(self): + self.comm.modeling() + if self.comm.hccs_w == 0: + self.comm.hccs_w = self.comm.roce_w + + def print_modeling(self, config_list): + self.logger.debug(f"****************** pp(ms) ***********************") + if self.main_domain.roce_comm_exist: + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<1}\t{7:<8}\t{8:<8}" + self.logger.debug(tplt.format('No', 'tp', 'dp', 'pp', 'vp', + 'cp', 'ep', 'pp_x', 'pp_time', chr(12288))) + index = 0 + for i, _ in enumerate(config_list): + if config_list[i].pp > 1: + if self.comm.roce_x_list[index][0]: + self.logger.debug(tplt.format(i, config_list[i].tp, config_list[i].dp, config_list[i].pp, + str(config_list[i].layers_per_vpp), config_list[i].cp, config_list[i].ep, + round(self.comm.roce_x_list[index][0], 3), round( + self.comm.roce_time_list[index][0], 2), + chr(12288))) + index += 1 + self.logger.debug(f"-----------") + tplt = "{0:<9}\t{1:<9}" + self.logger.debug(tplt.format('pp_w', 'pp_b', chr(12288))) + self.logger.debug(tplt.format(round(self.comm.roce_w, 3), + round(self.comm.roce_b, 3), chr(12288))) + self.logger.debug(f"-----------") + if self.main_domain.hccs_comm_exist: + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<1}\t{7:<8}\t{8:<8}" + self.logger.debug(tplt.format('No', 'tp', 'dp', 'pp', 'vp', 'cp', + 'ep', 'pp_HCCS_x', 'pp_HCCS_time', chr(12288))) + index = 0 + for i, _ in enumerate(config_list): + if config_list[i].pp > 1: + if self.comm.hccs_x_list[index][0]: + self.logger.debug(tplt.format(i, config_list[i].tp, config_list[i].dp, config_list[i].pp, + str(config_list[i].layers_per_vpp), config_list[i].cp, config_list[i].ep, + round( + self.comm.hccs_x_list[index][0], 3), + round(self.comm.hccs_time_list[index][0], 2), chr(12288))) + index += 1 + self.logger.debug(f"-----------") + tplt = "{0:<9}\t{1:<9}" + self.logger.debug(tplt.format('pp_HCCS_w', 'pp_HCCS_b', chr(12288))) + self.logger.debug(tplt.format(round(self.comm.hccs_w, 3), round(self.comm.hccs_b, 3), + chr(12288))) + self.logger.debug(f"-----------") + self.logger.debug(f"\n\n\n") + + def performance(self, search_cfg): + tp = search_cfg.tensor_model_parallel_size + dp = search_cfg.data_parallel_size + pp = search_cfg.pipeline_model_parallel_size + vp = search_cfg.num_layers // ( + pp * search_cfg.num_layers_per_virtual_pipeline_stage) if search_cfg.num_layers_per_virtual_pipeline_stage else 1 + cp = search_cfg.context_parallel_size + + pp_time = 0.0 + comm_x = (1 / (vp * tp * cp)) + iv_list = [comm_x, 0, 0] # PP does not need to consider cross modeling. + self.main_domain.max_domain = pp * dp * cp * tp + self.main_domain.min_domain = pp * dp * cp * tp + if pp > 1: + each_pp_time = self.main_domain.cal_time_in_domain(self.comm, iv_list) + each_pp_time = each_pp_time * 2 # Multiply send and receive by 2. + pp_time = each_pp_time * (pp * vp - 1) * 2 + return pp_time diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_tp.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_tp.py new file mode 100644 index 000000000..c701b995b --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_model_tp.py @@ -0,0 +1,96 @@ +from mindspeed.auto_tuning.module.communication.communication_model import CommunicationModel + + +class TpModel(CommunicationModel): + def __init__(self, hccs_dev_num): + super(TpModel, self).__init__(hccs_dev_num) + # Profile modeling data table + self.tp_comm_total_time_list = [] + self.tp_comm_wait_time_list = [] + self.tp_comm_overlap_time_list = [] + + self.tp_hccs_overlap_w = 0 + self.tp_hccs_overlap_b = 0 + + def get_communication_info_from_profile(self, tp_profile_time_info, hcom_info_tage_id): + tp_profile_time_info.total_comm_time += hcom_info_tage_id.total_time_ms + tp_profile_time_info.wait_comm_time += hcom_info_tage_id.wait_time_ms + tp_profile_time_info.overlap_comm_time += hcom_info_tage_id.overlap_time_ms + + def get_comm_info_list(self, tp_profile_time_info, config): + tp = config.tp + cp = config.cp + pp = config.pp + s = config.seq_length / 1000 + total_time = tp_profile_time_info.total_comm_time + wait_time = tp_profile_time_info.wait_comm_time + overlap_time = tp_profile_time_info.overlap_comm_time + + comm_x = (s / (tp * cp)) + if pp == 1: + # The last forward allgather is not calculated. The first two reverse allgathers plus the last allgather + # are not calculated. + # When the PP function is disabled, there are 18 communications in the TP domain. Therefore, four loss + # communications need to be excluded. + comm_time = (total_time - wait_time) * 14 / 18 / pp + self.tp_comm_overlap_time_list.append([overlap_time * 2 / 3 / pp]) + else: + # When PP is enabled, there are 15 communications in the TP domain, and one loss communication needs to + # be excluded. + comm_time = (total_time - wait_time) * 14 / 15 / pp + self.tp_comm_overlap_time_list.append([overlap_time / pp]) + self.comm.append_hccs([comm_x], comm_time) + self.tp_comm_total_time_list.append([total_time]) + self.tp_comm_wait_time_list.append([wait_time]) + + def modeling(self): + self.comm.hccs_w, self.comm.hccs_b = self.comm.linear_x_y( + self.comm.hccs_x_list, self.comm.hccs_time_list) + self.tp_hccs_overlap_w, self.tp_hccs_overlap_b = self.comm.linear_x_y( + self.comm.hccs_x_list, self.tp_comm_overlap_time_list) + return + + def print_modeling(self, config_list): + self.logger.debug(f"******************profile info list***********************") + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8}\t{7:<8}\t{8:<8}\t{9:<8}\t{10:<8}\t{11:<8}" + self.logger.debug(f"****************** tp(ms) ***********************") + self.logger.debug(tplt.format('No', 'tp', 'dp', 'pp', 'cp', 'ep', 'tp_time', 'tp_x', 'overlap_time', 'total_time', + 'wait_time', chr(12288))) + + index = 0 + for i, _ in enumerate(config_list): + if config_list[i].use_ascend_mc2: + continue + self.logger.debug(tplt.format(i, config_list[i].tp, config_list[i].dp, config_list[i].pp, config_list[i].cp, + config_list[i].ep, + round(self.comm.hccs_time_list[index][0], 2), + round(self.comm.hccs_x_list[index][0], 3), + round(self.tp_comm_overlap_time_list[index][0], 2), + round(self.tp_comm_total_time_list[index][0], 2), + round(self.tp_comm_wait_time_list[index][0], 2), + chr(12288))) + index += 1 + self.logger.debug(f"-----------") + tplt = "{0:<9}\t{1:<9}\t{2:<9}\t{3:<9}" + self.logger.debug(tplt.format('tp_w', 'tp_b', 'overlap_w', 'overlap_b', chr(12288))) + self.logger.debug(tplt.format(round(self.comm.hccs_w, 3), round(self.comm.hccs_b, 3), + round(self.tp_hccs_overlap_w, 3), + round(self.tp_hccs_overlap_b, 3), + chr(12288))) + self.logger.debug(f"\n\n\n") + return + + def performance(self, search_cfg): + tp = search_cfg.tensor_model_parallel_size + cp = search_cfg.context_parallel_size + s = search_cfg.seq_length / 1000 + tp_overlap_time = 0 + tp_time = 0 + if tp > 1: + tp_time = self.comm.hccs_w * (s / (tp * cp)) + self.comm.hccs_b + tp_overlap_time = self.tp_hccs_overlap_w * \ + s / (tp * cp) + self.tp_hccs_overlap_b + tp_time = tp_time - tp_overlap_time + if tp_time < 0: + tp_time = 0 + return tp_time diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_profile.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_profile.py new file mode 100644 index 000000000..52fc022d8 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/communication/communication_profile.py @@ -0,0 +1,70 @@ +class ProfileTimeInfo(): + def __init__(self): + # Profile source information + self.total_comm_time = 0 + self.wait_comm_time = 0 + self.overlap_comm_time = 0 + + +class TpProfileTimeInfo(ProfileTimeInfo): + def __init__(self): + super(TpProfileTimeInfo, self).__init__() + # Total time when communication hiding is not performed + self.fixedtotal_tp_time = 0 + self.fixedwait_tp_time = 0 + + +class Mc2ProfileTimeInfo(ProfileTimeInfo): + def __init__(self): + super(Mc2ProfileTimeInfo, self).__init__() + self.matmul_compute_time = 0 + + +class CpProfileTimeInfo(ProfileTimeInfo): + def __init__(self): + super(CpProfileTimeInfo, self).__init__() + # Total time when communication hiding is not performed + self.attn_cp_time = 0 + self.attn_cpbw_time = 0 + self.vector_cp_time = 0 + + +class DpProfileTimeInfo(ProfileTimeInfo): + def __init__(self): + super(DpProfileTimeInfo, self).__init__() + # Total time when communication hiding is not performed + self.overlap_grad_reduce = 0 + self.overlap_param_gather = 0 + self.overlap = 0 + self.total_mlpzero_time = 0 + self.total_otherzero_time = 0 + self.mlp_ag_time = 0 + self.mlp_rs_time = 0 + self.other_ag_time = 0 + self.other_rs_time = 0 + + +class EpProfileTimeInfo(ProfileTimeInfo): + def __init__(self): + super(EpProfileTimeInfo, self).__init__() + self.min_time = 0 + + +class PpProfileTimeInfo(ProfileTimeInfo): + def __init__(self): + super(PpProfileTimeInfo, self).__init__() + # Total time when communication hiding is not performed + self.each_pp_time = 0 + self.bubble_end_time = 0 + self.bubble_start_time = 0 + + +class TotalProfileTimeInfo(): + def __init__(self): + # Profile source information + self.tp_profile_time_info = TpProfileTimeInfo() + self.cp_profile_time_info = CpProfileTimeInfo() + self.dp_profile_time_info = DpProfileTimeInfo() + self.ep_profile_time_info = EpProfileTimeInfo() + self.pp_profile_time_info = PpProfileTimeInfo() + self.mc2_profile_time_info = Mc2ProfileTimeInfo() diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/hardware.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/hardware.py new file mode 100644 index 000000000..f6d8bb668 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/hardware.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from mindspeed.auto_tuning.utils.singleton import Singleton + + +class Hardware(metaclass=Singleton): + """ + hardware modeling + """ + HARDWARE_PARSE_FILENAME = "auto_tuning_hardware.json" + + def __init__(self) -> None: + self.device_type: str = "910" + self.host_ip: str = "localhost" + self.user_name: str = "root" + + self.cube_performance: float = 363.7248 + self.vector_performance: float = 11.3664 + self.cube_utilization_ratio: float = 0.742 + self.cube_time_ratio: float = 0.62 + self.memory_limit: float = 60.0 * 1024 + + # intra-node config + self.devices_per_node: int = 8 + self.intra_node_bandwidth: int = 196 + self.intra_node_bandwidth_utilization_ratio: float = 0.65 + + # inter-node config + self.num_nodes: int = 2 + self.node_rank: int = 0 + self.inter_node_bandwidth: int = 25 + self.inter_node_bandwidth_utilization_ratio: float = 0.7 + + def __str__(self): + rt = [] + rt.append(f"{'Device Type':<30}{str(self.device_type):<40}") + rt.append(f"{'Host IP':<30}{str(self.host_ip):<40}") + rt.append(f"{'Devices Per Node':<30}{str(self.devices_per_node):<40}") + rt.append(f"{'Number Nodes':<30}{str(self.num_nodes):<40}") + rt.append(f"{'Node rank':<30}{str(self.node_rank):<40}") + return '\n'.join(rt) + + @property + def num_devices(self) -> int: + return self.devices_per_node * self.num_nodes + + def load(self, hardware: Hardware) -> None: + for k in self.__dict__.keys(): + if k in hardware.__dict__: + self.__dict__[k] = hardware.__dict__[k] diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/memory/__init__.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/memory/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/memory/dynamic_mem_modeling.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/memory/dynamic_mem_modeling.py new file mode 100644 index 000000000..366ae5857 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/memory/dynamic_mem_modeling.py @@ -0,0 +1,349 @@ +from typing import no_type_check, List, Tuple +from collections import namedtuple +from dataclasses import replace +import os.path + +from mindspeed.auto_tuning.utils.logger import get_logger +from mindspeed.auto_tuning.config.model_config import ModelConfig +from mindspeed.auto_tuning.config.search_config import SearchConfig +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_config import ProfilingModelInfo +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_node_parse import GatherNodeProfiling +from mindspeed.auto_tuning.utils.utils import get_prof_dir + +ProfileResult = namedtuple("ProfileResult", ["cfg", "prof"]) +MemModule = namedtuple( + "MemModule", + [ + "checkpoint_activation_layer", + "checkpoint_activation_embedding", + "checkpoint_activation_loss", + "forward_peak", + "loss_peak", + "backward_peak", + "optimizer_peak" + ] +) + + +class DynamicMemModeling: + BASELINE_SEQLEN = 4096 + + @no_type_check + def __init__(self, model_cfg: ModelConfig) -> None: + self.model_cfg = model_cfg + self._logger = get_logger("dynamic_mem") + self.ckpt_act_layer: float = None + self.ckpt_act_embedding: float = None + self.ckpt_act_tp_b_embedding: float = None + self.ckpt_act_loss: float = None + self.forward_peak: float = None + self.tp_b_forward_peak: float = None + self.backward_peak: float = None + self.tp_b_backward_peak: float = None + self.loss_peak: float = None + self.tp_b_loss_peak: float = None + self.optimizer_peak: float = None + self.tp_b_optimizer_peak: float = None + self.seq_b_optimizer_peak: float = None + + @staticmethod + def _cal_peak_mem_per_stage(mem_module, + cfg: SearchConfig, + schedule: str, + nlayer: int, + stage_id: int + ) -> float: + checkpoint_activation_layer, \ + checkpoint_activation_embedding, \ + checkpoint_activation_loss, \ + forward_peak, \ + loss_peak, \ + backward_peak, \ + _ = mem_module + + if schedule == "1f1b": + if not cfg.vpp: + num_warmup = cfg.pp - stage_id + num_embd = cfg.pp + else: + num_warmup = cfg.pp * (cfg.vpp + 1) - 1 - 2 * stage_id + num_embd = cfg.pp * 2 - 1 + + estimated_forward_peak = checkpoint_activation_layer * nlayer * (num_warmup - 1) + \ + checkpoint_activation_layer * (nlayer - 1 + 1) + \ + forward_peak + + estimated_backward_peak = checkpoint_activation_layer * nlayer * num_warmup + \ + backward_peak + + if stage_id == 0: + estimated_forward_peak += checkpoint_activation_embedding * num_embd + estimated_backward_peak += checkpoint_activation_embedding * num_embd + + if stage_id == cfg.pp - 1: + estimated_forward_peak += checkpoint_activation_loss + estimated_backward_peak += checkpoint_activation_loss + + estimated_loss_peak = checkpoint_activation_layer * nlayer * num_warmup + \ + checkpoint_activation_loss * (num_warmup - 1) + \ + loss_peak + else: + estimated_loss_peak = 0 + + peak_mem = max(estimated_forward_peak, + estimated_backward_peak, + estimated_loss_peak) + else: + peak_mem = 0 + + return peak_mem + + def generate_dynamic_mem_profiling_list(self) -> List[SearchConfig]: + result: List[SearchConfig] = list() + + baseline_cfg = SearchConfig() + baseline_cfg.copy_from_config(self.model_cfg) + baseline_cfg.tensor_model_parallel_size = 4 + baseline_cfg.context_parallel_size = 1 + baseline_cfg.pipeline_model_parallel_size = 1 + baseline_cfg.num_layers = 1 + baseline_cfg.seq_length = self.BASELINE_SEQLEN + if self.model_cfg.is_moe(): + baseline_cfg.num_experts = 4 + baseline_cfg.expert_model_parallel_size = 1 + result.append(baseline_cfg) + + tp8_cfg = replace(baseline_cfg, + tensor_model_parallel_size=8) + result.append(tp8_cfg) + + seq8k_cfg = replace(baseline_cfg, + seq_length=2 * self.BASELINE_SEQLEN) + result.append(seq8k_cfg) + + for cfg in result: + cfg.prepare_for_profiling() + + return result + + def model_dynamic_mem(self, working_dir: str) -> None: + def _get_profiling(cfg: SearchConfig) -> ProfilingModelInfo: + profiling_path = os.path.join(working_dir, get_prof_dir(cfg)) + profiling_node_parse = GatherNodeProfiling(profiling_path) + return profiling_node_parse.fuse_node_pkl() + + baseline_cfg, tp8_cfg, seq8k_cfg = \ + self.generate_dynamic_mem_profiling_list() + + tp4seq4k_prof = _get_profiling(baseline_cfg) + tp8seq4k_prof = _get_profiling(tp8_cfg) + tp4seq8k_prof = _get_profiling(seq8k_cfg) + + self._get_ckpt_act_layer_modeling(baseline_cfg, tp4seq4k_prof) + self._get_ckpt_act_embedding_modeling(baseline_cfg, + tp8_cfg, + tp4seq4k_prof, + tp8seq4k_prof) + self._get_ckpt_act_loss_modeling(baseline_cfg, tp4seq4k_prof) + self._get_forward_peak_modeling(baseline_cfg, + tp8_cfg, + tp4seq4k_prof, + tp8seq4k_prof) + self._get_backward_peak_modeling(baseline_cfg, + tp8_cfg, + tp4seq4k_prof, + tp8seq4k_prof) + self._get_loss_peak_modeling(baseline_cfg, + tp8_cfg, + tp4seq4k_prof, + tp8seq4k_prof) + self._get_optimizer_peak_modeling( + ProfileResult(cfg=baseline_cfg, prof=tp4seq4k_prof), + ProfileResult(cfg=seq8k_cfg, prof=tp4seq8k_prof), + ProfileResult(cfg=tp8_cfg, prof=tp8seq4k_prof) + ) + + self._logger.debug("== ckpt_act_layer:") + self._logger.debug(f"{self.ckpt_act_layer}") + self._logger.debug("== ckpt_act_embedding:") + self._logger.debug(f"{self.ckpt_act_embedding}, {self.ckpt_act_tp_b_embedding}") + self._logger.debug("== ckpt_act_loss:") + self._logger.debug(f"{self.ckpt_act_loss}") + self._logger.debug("== forward_peak:") + self._logger.debug(f"{self.forward_peak}, {self.tp_b_forward_peak}") + self._logger.debug("== backward_peak:") + self._logger.debug(f"{self.backward_peak}, {self.tp_b_backward_peak}") + self._logger.debug("== loss_peak:") + self._logger.debug(f"{self.loss_peak}, {self.tp_b_loss_peak}") + self._logger.debug("== optimizer_peak:") + self._logger.debug(f"{self.optimizer_peak}, {self.tp_b_optimizer_peak}, {self.seq_b_optimizer_peak}") + + def cal_dynamic_mem(self, + cfg: SearchConfig + ) -> Tuple[List[float], float]: + mem_module = self._cal_mem_module(cfg) + optimizer_peak = mem_module[-1] + + nlayer = self.model_cfg.num_layers // cfg.pp + if cfg.layers_per_vpp: + nlayer = cfg.layers_per_vpp + + schedule = "1f1b" + dynamic_mem_stages: List[float] = list() + for stage_id in range(cfg.pp): + peak_mem = self._cal_peak_mem_per_stage(mem_module, + cfg, + schedule, + nlayer, + stage_id) + peak_mem *= (cfg.mbs / 1) # mbs in profiling cfg equals 1 + dynamic_mem_stages.append(peak_mem) + return dynamic_mem_stages, optimizer_peak + + def _get_ckpt_act_layer_modeling(self, + base_cfg: SearchConfig, + base_prof: ProfilingModelInfo + ) -> None: + self.ckpt_act_layer = base_cfg.tp * \ + (base_prof.loss.start_memory[0][0] - + base_prof.forward.start_memory[0][0]) + + def _get_ckpt_act_embedding_modeling(self, + base_cfg: SearchConfig, + bi_tp_cfg: SearchConfig, + base_prof: ProfilingModelInfo, + bi_tp_prof: ProfilingModelInfo) -> None: + base_embd = base_prof.forward.start_memory[0][0] - \ + base_prof.embedding.start_memory[0][0] + bi_tp_embd = bi_tp_prof.forward.start_memory[0][0] - \ + bi_tp_prof.embedding.start_memory[0][0] + self.ckpt_act_tp_b_embedding = bi_tp_embd * \ + (bi_tp_cfg.tp // base_cfg.tp) - \ + base_embd + self.ckpt_act_embedding = base_embd * base_cfg.tp - \ + self.ckpt_act_tp_b_embedding * (base_cfg.tp - 1) + + def _get_ckpt_act_loss_modeling(self, + base_cfg: SearchConfig, + base_prof: ProfilingModelInfo) -> None: + self.ckpt_act_loss = base_cfg.tp * \ + (base_prof.backward.start_memory[0][0] - + base_prof.loss.start_memory[0][0]) + + def _get_forward_peak_modeling(self, + base_cfg: SearchConfig, + bi_tp_cfg: SearchConfig, + base_prof: ProfilingModelInfo, + bi_tp_prof: ProfilingModelInfo) -> None: + base_forward_peak = base_prof.forward.peak_memory[0][0] - \ + base_prof.loss.start_memory[0][0] + bi_tp_forward_peak = bi_tp_prof.forward.peak_memory[0][0] - \ + bi_tp_prof.loss.start_memory[0][0] + self.tp_b_forward_peak = bi_tp_forward_peak * \ + (bi_tp_cfg.tp // base_cfg.tp) - \ + base_forward_peak + self.forward_peak = base_forward_peak * base_cfg.tp - \ + self.tp_b_forward_peak * (base_cfg.tp - 1) + + def _get_backward_peak_modeling(self, + base_cfg: SearchConfig, + bi_tp_cfg: SearchConfig, + base_prof: ProfilingModelInfo, + bi_tp_prof: ProfilingModelInfo) -> None: + base_backward_peak = base_prof.backward.peak_memory[0][0] - \ + base_prof.backward.start_memory[0][0] + bi_tp_backward_peak = bi_tp_prof.backward.peak_memory[0][0] - \ + bi_tp_prof.backward.start_memory[0][0] + self.tp_b_backward_peak = bi_tp_backward_peak * \ + (bi_tp_cfg.tp // base_cfg.tp) - \ + base_backward_peak + self.backward_peak = base_backward_peak * base_cfg.tp - \ + self.tp_b_backward_peak * (base_cfg.tp - 1) + + def _get_loss_peak_modeling(self, + base_cfg: SearchConfig, + bi_tp_cfg: SearchConfig, + base_prof: ProfilingModelInfo, + bi_tp_prof: ProfilingModelInfo) -> None: + base_loss_peak = base_prof.loss.peak_memory[0][0] - \ + base_prof.loss.start_memory[0][0] + bi_tp_loss_peak = bi_tp_prof.loss.peak_memory[0][0] - \ + bi_tp_prof.loss.start_memory[0][0] + self.tp_b_loss_peak = bi_tp_loss_peak * \ + (bi_tp_cfg.tp // base_cfg.tp) - \ + base_loss_peak + self.loss_peak = base_loss_peak * base_cfg.tp - \ + self.tp_b_loss_peak * (base_cfg.tp - 1) + + def _get_optimizer_peak_modeling( + self, + base_res: ProfileResult, + bi_seq_res: ProfileResult, + bi_tp_res: ProfileResult + ) -> None: + base_cfg, base_prof = base_res + bi_seq_cfg, bi_seq_prof = bi_seq_res + bi_tp_cfg, bi_tp_prof = bi_tp_res + base_optimizer_peak = base_prof.optimizer.peak_memory[0][0] - \ + base_prof.optimizer.start_memory[0][0] + bi_seq_optimizer_peak = bi_seq_prof.optimizer.peak_memory[0][0] - \ + bi_seq_prof.optimizer.start_memory[0][0] + bi_tp_optimizer_peak = bi_tp_prof.optimizer.peak_memory[0][0] - \ + bi_tp_prof.optimizer.start_memory[0][0] + self.seq_b_optimizer_peak = (base_optimizer_peak * + (bi_seq_cfg.seq_length // base_cfg.seq_length) - + bi_seq_optimizer_peak) * base_cfg.tp + self.tp_b_optimizer_peak = bi_tp_optimizer_peak * \ + (bi_tp_cfg.tp // base_cfg.tp) - \ + base_optimizer_peak + self.optimizer_peak = base_optimizer_peak * base_cfg.tp - \ + self.tp_b_optimizer_peak * (base_cfg.tp - 1) + + def _cal_mem_module(self, cfg: SearchConfig) -> MemModule: + seq_length = self.model_cfg.seq_length + nseq = seq_length // cfg.cp // self.BASELINE_SEQLEN + tp = cfg.tp + tp_w = cfg.tp - 1 + + checkpoint_activation_layer = self.ckpt_act_layer * nseq / tp + + checkpoint_activation_embedding = \ + (self.ckpt_act_embedding + + tp_w * self.ckpt_act_tp_b_embedding) * nseq / tp + + checkpoint_activation_loss = self.ckpt_act_loss * nseq / tp + + forward_peak = \ + (self.forward_peak + + tp_w * self.tp_b_forward_peak) * nseq / tp + + loss_peak = \ + (self.loss_peak + + tp_w * self.tp_b_loss_peak) * nseq / tp + + backward_peak = \ + (self.backward_peak + + tp_w * self.tp_b_backward_peak) * nseq / tp + + optimizer_peak = \ + ((self.optimizer_peak + + tp_w * self.tp_b_optimizer_peak) * nseq - + self.seq_b_optimizer_peak * (nseq - 1)) / tp + + self._logger.debug(f"== checkpoint_activation_layer: {checkpoint_activation_layer}") + self._logger.debug(f"== checkpoint_activation_embedding: {checkpoint_activation_embedding}") + self._logger.debug(f"== checkpoint_activation_loss: {checkpoint_activation_loss}") + self._logger.debug(f"== forward_peak: {forward_peak}") + self._logger.debug(f"== loss_peak: {loss_peak}") + self._logger.debug(f"== backward_peak: {backward_peak}") + self._logger.debug(f"== optimizer_peak: {optimizer_peak}") + + return MemModule( + checkpoint_activation_layer, + checkpoint_activation_embedding, + checkpoint_activation_loss, + forward_peak, + loss_peak, + backward_peak, + optimizer_peak + ) diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/memory/memory_modeling.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/memory/memory_modeling.py new file mode 100644 index 000000000..c50d8cfde --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/memory/memory_modeling.py @@ -0,0 +1,55 @@ +from typing import List, Tuple +from logging import Logger + +from mindspeed.auto_tuning.utils.logger import get_logger +from mindspeed.auto_tuning.config.model_config import ModelConfig +from mindspeed.auto_tuning.config.search_config import SearchConfig +from mindspeed.auto_tuning.module.memory.static_mem_modeling import StaticMemModeling +from mindspeed.auto_tuning.module.memory.dynamic_mem_modeling import DynamicMemModeling + + +class MemoryModeling: + _static_modeling: StaticMemModeling = None # type: ignore + _dynamic_modeling: DynamicMemModeling = None # type: ignore + _logger: Logger = None # type: ignore + + def __new__(cls): + raise NotImplementedError("MemoryModeling is a static class.") + + @classmethod + def set_model_cfg(cls, model_cfg: ModelConfig) -> None: + if cls._static_modeling and cls._dynamic_modeling: + raise ValueError("ModelConfig has yet been set.") + cls._static_modeling = StaticMemModeling(model_cfg) + cls._dynamic_modeling = DynamicMemModeling(model_cfg) + cls._logger = get_logger("memory") + + @classmethod + def generate_mem_modeling_profiling_list(cls) -> Tuple[List[Tuple[SearchConfig, str]], List[SearchConfig]]: + return cls._static_modeling.generate_static_mem_profiling_list(), \ + cls._dynamic_modeling.generate_dynamic_mem_profiling_list() + + @classmethod + def modeling(cls, working_dir: str) -> None: + cls._static_modeling.model_static_mem(working_dir) + cls._dynamic_modeling.model_dynamic_mem(working_dir) + + @classmethod + def estimate(cls, cfg: SearchConfig) -> Tuple[float, float]: + cls._logger.debug("==========Memory Estimate Summary==========") + static_mem = cls._static_modeling.cal_static_mem(cfg) + dynamic_mem, optimizer_peak = \ + cls._dynamic_modeling.cal_dynamic_mem(cfg) + peak_stage_mem = float(0) + for stage_id in range(cfg.pp): + stage_mem = static_mem[stage_id] + dynamic_mem[stage_id] + peak_stage_mem = max(peak_stage_mem, stage_mem) + cls._logger.debug(f"== stage_id: {stage_id} ==\n" + f"static memory: {static_mem[stage_id]} MB\n" + f"dynamic peak memory: {dynamic_mem[stage_id]} MB\n" + f"peak memory: {stage_mem} MB") + optimizer_peak = max([m + optimizer_peak for m in static_mem]) + cls._logger.debug(f"optimizer peak memory: {optimizer_peak} MB") + cls._logger.debug("==========Memory Estimate Summary End==========") + + return max(peak_stage_mem, optimizer_peak), optimizer_peak diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/memory/model_param.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/memory/model_param.py new file mode 100644 index 000000000..ea5b9bb63 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/memory/model_param.py @@ -0,0 +1,16 @@ +from dataclasses import dataclass + + +@dataclass(frozen=True) +class ModelParam: + name: str + num_parameters: int + + @staticmethod + def cmp(left: object, right: object) -> int: + if isinstance(left, ModelParam) and isinstance(right, ModelParam): + if left == right: + return 1 + elif left.name == right.name: + return -1 + return 0 diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/memory/static_mem_modeling.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/memory/static_mem_modeling.py new file mode 100644 index 000000000..3598084e6 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/memory/static_mem_modeling.py @@ -0,0 +1,257 @@ +from typing import no_type_check, Any, List, Set, Tuple +from dataclasses import replace +from itertools import chain +import os.path + + +from mindspeed.auto_tuning.utils.logger import get_logger +from mindspeed.auto_tuning.config.model_config import ModelConfig +from mindspeed.auto_tuning.config.search_config import SearchConfig +from mindspeed.auto_tuning.module.memory.model_param import ModelParam +from mindspeed.auto_tuning.utils.dtype import DTYPE +from mindspeed.auto_tuning.utils.mem_utils import mem_b_to_mb +from mindspeed.auto_tuning.utils.restricted_unpickler import restricted_loads + + +class StaticMemModeling: + LAYER1_FILENAME = "auto_tuning_static_model_layer1.json" + PP4_FILENAME = "auto_tuning_static_model_pp4.json" + EXPERT2_FILENAME = "auto_tuning_static_model_expert2.json" + TP2_FILENAME = "auto_tuning_static_model_tp2.json" + + @no_type_check + def __init__(self, model_cfg: ModelConfig) -> None: + self.model_cfg = model_cfg + self._logger = get_logger("static_mem") + self.params_first_embedding: List[ModelParam] = None + self.params_per_layer_wo_experts: List[ModelParam] = None + self.params_per_experts: List[ModelParam] = None + self.params_last_layernorm_and_embedding: List[ModelParam] = None + self.params_pp_affected: List[ModelParam] = None + self.params_tp_unaffected: Set[str] = set() + + @staticmethod + def _diff_params(left: List[ModelParam], + right: List[ModelParam] + ) -> List[ModelParam]: + """ + Finds the difference between two lists of parameters. + The result follows these conditions: + 1. If a param exists in right but not in left, + it gets appended directly into the result + + 2. If a param exists in both lists and sharing a same name, + however the shape is different, the shape difference is appended + + 3. If a param (say A) exists only in left, + we assume there's another param B with the same name + but shape of 0 in the right list, + thus 0 (B's shape) subtracted by A's shape gets appended + """ + diff: List[ModelParam] = list() + + left_iter = iter(left) + left_p = next(left_iter, None) + for right_p in right: + cmp_result = ModelParam.cmp(left_p, right_p) + if cmp_result == 1: + left_p = next(left_iter, None) + elif cmp_result == -1 and left_p: + diff.append(ModelParam(left_p.name, + right_p.num_parameters - + left_p.num_parameters + )) + left_p = next(left_iter, None) + else: + diff.append(right_p) + + while left_p: + diff.append(ModelParam(left_p.name, -left_p.num_parameters)) + left_p = next(left_iter, None) + + return diff + + def generate_static_mem_profiling_list(self) -> List[Tuple[SearchConfig, str]]: + result: List[Tuple[SearchConfig, str]] = list() + + layer1_cfg = SearchConfig() + layer1_cfg.copy_from_config(self.model_cfg) + layer1_cfg.tensor_model_parallel_size = 1 + layer1_cfg.context_parallel_size = 1 + layer1_cfg.pipeline_model_parallel_size = 1 + layer1_cfg.num_layers = 1 + if self.model_cfg.is_moe(): + layer1_cfg.num_experts = 1 + layer1_cfg.expert_model_parallel_size = 1 + result.append((layer1_cfg, self.LAYER1_FILENAME)) + + pp4_cfg = replace(layer1_cfg, + pipeline_model_parallel_size=4, + num_layers=4) + result.append((pp4_cfg, self.PP4_FILENAME)) + + if self.model_cfg.is_moe(): + expert2_cfg = replace(pp4_cfg, num_experts=2) + result.append((expert2_cfg, self.EXPERT2_FILENAME)) + + tp2_cfg = replace(pp4_cfg, tensor_model_parallel_size=2) + result.append((tp2_cfg, self.TP2_FILENAME)) + + for cfg, _ in result: + cfg.prepare_for_profiling() + + return result + + def model_static_mem(self, working_dir: str) -> None: + def _decode(filename: str) -> Any: + filepath = os.path.join(working_dir, filename) + with open(filepath, mode="rb") as file: + decode = restricted_loads(file) + return decode + + def _get_pp_params(filename: str) -> List[List[ModelParam]]: + params = [None] * 4 + for pp_rank, model_params in _decode(filename): + if not params[pp_rank]: + params[pp_rank] = model_params + return params # type: ignore + + total_pp4_params = _get_pp_params(self.PP4_FILENAME) + per_layer_w_experts_params = total_pp4_params[1] + self.params_first_embedding = \ + self._diff_params(per_layer_w_experts_params, + total_pp4_params[0]) + self.params_last_layernorm_and_embedding = \ + self._diff_params(per_layer_w_experts_params, + total_pp4_params[-1]) + + if self.model_cfg.is_moe(): + total_expert2_params = _get_pp_params(self.EXPERT2_FILENAME) + self.params_per_experts = \ + self._diff_params(per_layer_w_experts_params, + total_expert2_params[1]) + else: + self.params_per_experts = list() + self.params_per_layer_wo_experts = \ + self._diff_params(self.params_per_experts, + per_layer_w_experts_params) + + total_layer1_params: List[List[ModelParam]] = \ + [p for _, p in _decode(self.LAYER1_FILENAME)] + layer1_params = total_layer1_params[0] + self.params_pp_affected = \ + self._diff_params(self.params_first_embedding + + self.params_per_layer_wo_experts + + self.params_per_experts + + self.params_last_layernorm_and_embedding, + layer1_params) + + total_tp2_params = _get_pp_params(self.TP2_FILENAME) + total_pp4_params_concat = list(chain.from_iterable(total_pp4_params)) + total_tp2_params_concat = list(chain.from_iterable(total_tp2_params)) + for i, param in enumerate(total_pp4_params_concat): + if param == total_tp2_params_concat[i]: + self.params_tp_unaffected.add(param.name) + + self._logger.debug("\n== first embedding params:\n" + + "\n".join( + [str(p) for p in self.params_first_embedding]) + + "\n== layer_wo_experts params:\n" + + "\n".join( + [str(p) for p in self.params_per_layer_wo_experts]) + + "\n== experts params:\n" + + "\n".join( + [str(p) for p in self.params_per_experts]) + + "\n== last layer norm and embedding params:\n" + + "\n".join( + [str(p) for p in self.params_last_layernorm_and_embedding]) + + "\n== pp affected params:\n" + + "\n".join( + [str(p) for p in self.params_pp_affected]) + + "\n== not tp affected params:\n" + + "\n".join( + [str(p) for p in self.params_tp_unaffected])) + + def cal_static_mem(self, cfg: SearchConfig) -> List[float]: + dtype = self.model_cfg.dtype + non_expert_zero1 = cfg.dp * cfg.cp + expert_zero1 = cfg.dp * cfg.cp / (cfg.ep if cfg.ep else 1) + + def _cal_static_mem_per_stage(non_expert_params: int, + expert_params: int, + not_zero1_div_bytes: int, + zero1_div_bytes: int + ) -> float: + result = float(0) + if cfg.zero1: + result += non_expert_params * \ + (not_zero1_div_bytes + zero1_div_bytes / non_expert_zero1) + result += expert_params * \ + (not_zero1_div_bytes + zero1_div_bytes / expert_zero1) + else: + result += (non_expert_params + expert_params) * \ + (not_zero1_div_bytes + zero1_div_bytes) + result = mem_b_to_mb(result * dtype.value[1]) + result += 5000 # roughly estimated cann+hccl+driver+os memory + return result + + static_mem_stages: List[float] = list() + for stage_id in range(cfg.pp): + non_expert_params_per_stage, expert_params_per_stage = \ + self._cal_num_params_per_stage(stage_id, cfg) + if dtype == DTYPE.fp16: + static_mem_per_stage = \ + _cal_static_mem_per_stage(non_expert_params_per_stage, + expert_params_per_stage, + 1 + 1, + 8) + elif dtype == DTYPE.bf16: + static_mem_per_stage = \ + _cal_static_mem_per_stage(non_expert_params_per_stage, + expert_params_per_stage, + 1 + 2, + 6) + else: + static_mem_per_stage = \ + _cal_static_mem_per_stage(non_expert_params_per_stage, + expert_params_per_stage, + 1 + 1, + 2) + static_mem_stages.append(static_mem_per_stage) + return static_mem_stages + + def _cal_num_params_per_stage(self, + stage_id: int, + cfg: SearchConfig + ) -> Tuple[int, int]: + def _cal_num_params(param: ModelParam, ep: int = 1): + if param.name in self.params_tp_unaffected: + return param.num_parameters + else: + return param.num_parameters // ep // cfg.tp + + num_layers = self.model_cfg.num_layers + + non_expert_params = 0 + for param in self.params_per_layer_wo_experts: + non_expert_params += _cal_num_params(param) + non_expert_params *= num_layers // cfg.pp + + expert_params = 0 + if cfg.num_experts and cfg.ep: + for param in self.params_per_experts: + expert_params += _cal_num_params(param, ep=cfg.ep) + expert_params *= (num_layers * cfg.num_experts) // cfg.pp + + if stage_id == 0: + for param in self.params_first_embedding: + non_expert_params += _cal_num_params(param) + if stage_id == cfg.pp - 1: + for param in self.params_last_layernorm_and_embedding: + non_expert_params += _cal_num_params(param) + + if cfg.pp == 1: + for param in self.params_pp_affected: + non_expert_params += _cal_num_params(param) + + return non_expert_params, expert_params diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/model_performance.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/model_performance.py new file mode 100644 index 000000000..728afb74f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/model_performance.py @@ -0,0 +1,159 @@ +import math +from mindspeed.auto_tuning.module.hardware import Hardware +from mindspeed.auto_tuning.config.model_config import ModelConfig +from mindspeed.auto_tuning.module.communication.communication import Communication +from mindspeed.auto_tuning.module.operator.operator import OperatorPerformance +from mindspeed.auto_tuning.module.operator.operator_re_profile import search_operator +from mindspeed.auto_tuning.utils.logger import get_logger + + +class ModelPerformance(object): + """ + Model Performance modeling + """ + + def __init__(self, hardware=None, model_cfg: ModelConfig = None, working_dir: str = None): + self.communication = Communication(hardware, model_cfg) + self.operator = OperatorPerformance(model_cfg, working_dir=working_dir) + self.hardware = hardware + self.logger = get_logger("ModelPerformance") + + def get_profiling_info(self, profiling_results): + self.communication.communication_modeling(profiling_results) + profiling_wo_mc2 = [] + for item in profiling_results: + if item[0].use_ascend_mc2: + pass + else: + profiling_wo_mc2.append(item) + self.operator.model_operator_timer(profiling_wo_mc2) + + def performance(self, search_cfg, working_dir, profile_count, re_profile_flag=False): + tp = search_cfg.tensor_model_parallel_size + dp = search_cfg.data_parallel_size + pp = search_cfg.pipeline_model_parallel_size + vp = search_cfg.num_layers // (pp * search_cfg.num_layers_per_virtual_pipeline_stage) \ + if search_cfg.num_layers_per_virtual_pipeline_stage else 1 + cp = search_cfg.context_parallel_size + ep = search_cfg.expert_model_parallel_size if search_cfg.expert_model_parallel_size else 1 + num_layers = self.communication.model_cfg.num_layers + global_batch_size = self.communication.model_cfg.global_batch_size + model_micro_batch_size = self.communication.model_cfg.micro_batch_size + search_micro_batch_size = search_cfg.micro_batch_size + zero = search_cfg.use_distributed_optimizer + operator_time, unsampled_profiling = self.operator_performance( + search_cfg, working_dir, profile_count, re_profile_flag + ) + comm_gap = 8 + + # Time for each micro-batch in each layer. + mc2_time = self.communication.mc2_model.performance(search_cfg) + tp_time = self.communication.tp_model.performance(search_cfg) + + self.logger.debug(f"mc2_time:{mc2_time} tp_time:{tp_time}") + use_mc2 = mc2_time < tp_time + tp_time = min(mc2_time, tp_time) + + cp_time = self.communication.cp_model.performance(search_cfg) + dp_time = self.communication.dp_model.performance(search_cfg) + pp_time = self.communication.pp_model.performance(search_cfg) + ep_time = self.communication.ep_model.performance(search_cfg) + + micro_batch_num = global_batch_size / (dp * search_micro_batch_size) + # total layer number,total global_batch_size + layer_num = math.ceil(micro_batch_num * (num_layers / pp)) + search_model_mbs_ratio = search_micro_batch_size / model_micro_batch_size + communication_time = (tp_time + cp_time + ep_time) * search_model_mbs_ratio * layer_num + total_operator_time = operator_time * layer_num + total_time = total_operator_time + communication_time + + total_communication_time = communication_time + pp_time * search_model_mbs_ratio + dp_time + self.logger.debug('global_batch_size : {}, num_layers : {}, search_micro_batch_size : {}, operator_time : {}, ' + 'layer_num : {}'.format(global_batch_size, num_layers, search_micro_batch_size, + operator_time, layer_num)) + bubble_ratio = (pp - 1) / (micro_batch_num * vp + pp - 1) + total_time = total_time / (1 - bubble_ratio) + bubble_time = total_time * bubble_ratio + total_time = total_time + pp_time * search_model_mbs_ratio + dp_time + + self.logger.debug(f"****************** total_time(ms) ***********************") + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:<8}\t{7:<10}\t{8:<8}\t{9:<8}" + self.logger.debug(tplt.format('tp', 'dp', 'pp', 'vp', 'cp', 'ep', 'operator_time', + 'comm_time', 'bubble_time', 'total_time', chr(12288))) + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<1}\t{6:8.2f}\t{7:8.2f}\t{8:8.2f}\t{9:8.2f}" + self.logger.debug(tplt.format(tp, dp, pp, vp, cp, ep, total_operator_time, + total_communication_time, bubble_time, total_time, chr(12288))) + tplt = "{0:<4}\t{1:<4}\t{2:<4}\t{3:<4}\t{4:<4}\t{5:<4}" + self.logger.debug(f"******* each layer mbs communication time(ms) ********") + self.logger.debug(tplt.format('tp_time', 'dp_time', 'pp_time', + 'bubble', 'cp_time', 'ep_time', chr(12288))) + tplt = "{0:4.2f}\t{1:4.2f}\t{2:4.2f}\t{3:4.2f}\t{4:4.2f}\t{5:4.2f}" + self.logger.debug(tplt.format(tp_time, dp_time, pp_time, + bubble_time, cp_time, ep_time, chr(12288))) + self.logger.debug(f"end-to-end, each*(global_batch_size / (dp *pp))* num_layers") + tplt = "{0:<4}\t{1:<4}\t{2:<4}\t{3:<4}\t{4:<4}\t{5:<4}" + self.logger.debug(tplt.format('tp_time', 'dp_time', 'pp_time', + 'bubble', 'cp_time', 'ep_time', chr(12288))) + tplt = "{0:4.0f}\t{1:4.2f}\t{2:4.2f}\t{3:4.2f}\t{4:4.2f}\t{5:4.2f}" + self.logger.debug(tplt.format(tp_time * layer_num * search_model_mbs_ratio, dp_time, + pp_time, bubble_time, cp_time * layer_num * search_model_mbs_ratio, + ep_time * layer_num * search_model_mbs_ratio, chr(12288))) + return total_time, unsampled_profiling, use_mc2 + + def operator_performance(self, search_cfg, working_dir, profile_count, + re_profile_flag=False): + tp = search_cfg.tensor_model_parallel_size + cp = search_cfg.context_parallel_size + pp = search_cfg.pipeline_model_parallel_size + ep = search_cfg.expert_model_parallel_size + dp = search_cfg.data_parallel_size + mbs = search_cfg.micro_batch_size + num_experts = search_cfg.num_experts if search_cfg.num_experts else 1 + communication = self.communication + model_config = communication.model_cfg + unsampled_profiling_info = [] + operators, cp_exist_list, cp_diff_list, ep_exist_list, ep_diff_list, operator_not_found_list = \ + self.operator.cal_operator_timer(search_cfg) + + scal_flag = True if model_config.global_world_size > Hardware().num_devices else False + self.logger.debug("Total number of operators have been found is {0}".format((len(operators) + + len(cp_exist_list) + + len(cp_diff_list) + + len(ep_exist_list) + + len(ep_diff_list)))) + if (re_profile_flag and profile_count[0] < 6 and + len(operator_not_found_list) / (len(operators) + len(cp_exist_list) + len(cp_diff_list) + + len(ep_exist_list) + len(ep_diff_list)) > 1): + unsampled_profiling_info = search_operator(working_dir, search_cfg, communication, profile_count, scal_flag) + operators, cp_exist_list, cp_diff_list, ep_exist_list, ep_diff_list, operator_not_found_list = \ + self.operator.cal_operator_timer(search_cfg) + operator_time = 0.0 + for operator in operators: + operator_time += operator.duration + + cp_exist_time = 0.0 + cp_diff_time = 0.0 + if cp > 1: + for operator in cp_exist_list: + cp_exist_time = cp_exist_time + operator.duration + operator_time += cp_exist_time + if cp > 2: + for operator in cp_diff_list: + cp_diff_time = cp_diff_time + operator.duration + operator_time += cp_diff_time * (cp - 2) + + ep_each_exist_time, ep_each_diff_time = 0.0, 0.0 + num_experts = self.communication.model_cfg.num_experts + if num_experts and num_experts > 0: + for operator in ep_exist_list: + ep_each_exist_time = ep_each_exist_time + operator.duration + ep_each_exist_time = ep_each_exist_time / 2 + for operator in ep_diff_list: + ep_each_diff_time = ep_each_diff_time + operator.duration + ep_each_diff_time = ep_each_diff_time / 2 + if num_experts: + operator_time = operator_time + (num_experts / ep - 1) * ep_each_exist_time + + # Convert to the total operator time for one micro_batch on a single node. + operator_time = (operator_time * 0.001) + return operator_time, unsampled_profiling_info diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/__init__.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator.py new file mode 100644 index 000000000..67f2ca6e3 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator.py @@ -0,0 +1,314 @@ +import json +import time + +from mindspeed.auto_tuning.utils.logger import get_logger +from mindspeed.auto_tuning.module.hardware import Hardware +from mindspeed.auto_tuning.config.model_config import ModelConfig +from mindspeed.auto_tuning.config.search_config import SearchConfig +from mindspeed.auto_tuning.module.operator.operator_profile_get import OriginalProfileDataList +from mindspeed.auto_tuning.module.operator.operator_note_cal import OperatorNoteList +from mindspeed.auto_tuning.module.operator.operator_base_block import BaseBlock +from mindspeed.auto_tuning.module.operator.operator_change_block_cp import CpBlock +from mindspeed.auto_tuning.module.operator.operator_change_block_ep import EpBlock +from mindspeed.auto_tuning.module.operator.operator_elemental import DictCalShape +from mindspeed.auto_tuning.module.operator.operator_database import DataBase, Operator, OperatorHistory +from mindspeed.auto_tuning.module.operator.operator_shape_analysis import separate_ep, separate_cp_tp +from mindspeed.auto_tuning.module.operator.operator_shape_cal import (model_operator_with_tp, + model_operator_with_shape, + cal_new_shape_tce, + cal_operator_flops, + cal_operator_duration_with_shape) + + +class OperatorPerformance(object): + """ + Operator Performance modeling + 1. Test Run + 2. Profiling Parser + 3. Modeling [taking the results from the test run and placing them into all modules within + modeling for mathematical modeling estimation, then dynamically adjusting the test run configuration and + performing mathematical modeling estimation again [loop]] + 4. Return recommended configuration + """ + + def __init__(self, model_config: ModelConfig, working_dir: str): + self.db = DataBase(working_dir=working_dir) + self.origin_profile_data_list = OriginalProfileDataList() + self.model_config = model_config + self._logger = get_logger('operator') + + self.base_block = BaseBlock() + self.cp_block = CpBlock() + self.ep_block = EpBlock() + + self.dict_model = dict() + + def model_operator_timer(self, profiling_results): + """ + Model shape and duration based on the profiling result. Currently, all operator only takes one micro_batch, + no matter whether pp is enabled. + """ + self.dict_model = dict() + # 1. get original data + self.origin_profile_data_list.get_origin_profile_data(profiling_results) + # 2. get base_block + self.base_block.get_block(self.origin_profile_data_list.data_list) + # 3. get change block + self.cp_block.get_block(self.origin_profile_data_list, self.base_block) + if self.origin_profile_data_list.data_list[0].config_info.num_experts: + self.ep_block.get_block(self.origin_profile_data_list, self.base_block) + + st_time = time.time() + # 第 3 轮, Note数据表重新排序,按照新生成的index_name分类 + operator_note_list = OperatorNoteList() + operator_note_list.get_operator_note(self) + + self.get_history_db(operator_note_list.operator_note_list) + self._logger.info(f'-----------------------------------') + # 第 4 轮,基于operator_note_model建shape计算operator_model_dao + self.get_operator_model(operator_note_list.operator_note_dict) + + self._logger.info("get operator_base_dao successful") + self._logger.info("total number of operator_note_dict: {}, dict_model {}, base_block {}, cp_block {}, " + "ep_block {}".format(len(operator_note_list.operator_note_dict), len(self.dict_model), + len(self.base_block.fw) + len(self.base_block.bw), + len(self.cp_block.fw) + len(self.cp_block.bw) + len(self.cp_block.re), + len(self.ep_block.fw) + len(self.ep_block.bw) + len(self.ep_block.re))) + self._logger.info(f'total time: {time.time() - st_time}') + self._logger.info(f'---------------------------【Add operator to db】---------------------------') + + def get_history_db(self, operator_note_list): + self._logger.info("****************** duration_sum(ms) ***********************") + tplt = "{0:<1}\t{1:<1}\t{2:<1}\t{3:<1}\t{4:<1}\t{5:<8}\t{6:<8}\t{7:<8}" + self._logger.info(tplt.format('tp', 'dp', 'pp', 'cp', 'ep', 'duration_sum', 'operator_num', chr(12288))) + self._logger.info(f'--------------------------------------------------------------------------') + for (index, operator_note) in enumerate(operator_note_list): + operator_history_list = [] + duration_sum = 0 + operator_list = operator_note.fw + operator_note.bw + for operator in operator_list: + duration_sum += float(operator.duration) + operator_history = OperatorHistory(types=operator.type, + accelerator_core=operator.accelerator_core, + input_shape=operator.input_shape, + output_shape=operator.output_shape, + duration=operator.duration, + device=Hardware().device_type, + jit=operator.jit, + cann="8.0.RC2.alpha002", + driver="24.1.rc2.b030", + dtype=self.model_config.dtype.value[0]) + operator_history_list.append(operator_history.convert_to_dict()) + # 历史数据 + self.db.operator_history_dao.insert_history(operator_history_list) + self._logger.info(tplt.format( + self.origin_profile_data_list.data_list[index].config_info.tp, + self.origin_profile_data_list.data_list[index].config_info.dp, + self.origin_profile_data_list.data_list[index].config_info.pp, + self.origin_profile_data_list.data_list[index].config_info.cp, + self.origin_profile_data_list.data_list[index].config_info.ep, + int(duration_sum), len(operator_note.fw), len(operator_note.bw), chr(12288))) + + def get_operator_model(self, operator_note_dict): + operator_list = self.base_block.fw + self.base_block.bw + self.get_operator_model_dao(operator_list, operator_note_dict) + self.base_block.exist_cal_list = self.get_dict_base_shape(operator_list, operator_note_dict) + + operator_list = self.cp_block.fw + self.cp_block.bw + self.cp_block.re + self.get_operator_model_dao(operator_list, operator_note_dict) + self.cp_block.exist_cal_list = self.get_dict_base_shape(operator_list, operator_note_dict) + + operator_list = self.cp_block.diff_list.fw + self.cp_block.diff_list.bw + self.cp_block.diff_list.re + self.get_operator_model_dao(operator_list, operator_note_dict) + self.cp_block.diff_cal_list = self.get_dict_base_shape(operator_list, operator_note_dict) + + operator_list = self.ep_block.fw + self.ep_block.bw + self.ep_block.re + self.get_operator_model_dao(operator_list, operator_note_dict) + self.ep_block.exist_cal_list = self.get_dict_base_shape(operator_list, operator_note_dict) + + operator_list = self.ep_block.diff_list.fw + self.ep_block.diff_list.bw + self.ep_block.diff_list.re + self.get_operator_model_dao(operator_list, operator_note_dict) + self.ep_block.diff_cal_list = self.get_dict_base_shape(operator_list, operator_note_dict) + + + def get_dict_base_shape(self, operator_list, operator_note_dict): + re_list = [] + for operator in operator_list: + index_name = operator.index_name + # cp 1 tp 1 2 4 8 -> shape_tp + # cp 2 tp 1 2 4 8 -> shape_tp + # shape_cp + # model the shape, according to the change between profiling result with different tp value, calculate the + # change formula for each position in the operator's shape + results = operator_note_dict[index_name] + # take ep first + result = separate_ep(results) + input_shape_cal, output_shape_cal = separate_cp_tp(result) + dict_shape = DictCalShape() + dict_shape.name = operator.name + dict_shape.index_name = index_name + dict_shape.accelerator_core = operator.accelerator_core + dict_shape.types = operator.type + dict_shape.input_cal = json.dumps(input_shape_cal) + dict_shape.output_cal = json.dumps(output_shape_cal) + re_list.append(dict_shape) + return re_list + + def get_operator_model_dao(self, operator_list, operator_note_dict): + for operator in operator_list: + index_name = operator.index_name + # cp 1 tp 1 2 4 8 -> shape_tp + # cp 2 tp 1 2 4 8 -> shape_tp + # shape_cp + # model the shape, according to the change between profiling result with different tp value, calculate the + # change formula for each position in the operator's shape + results = operator_note_dict[index_name] + # input_shape_cal, has the same format as the shape array, with positive numbers representing unchanged + # positions, and negative numbers representing varying positions. Assuming the number is num, the variation + # rule is -num/tp. + + # duration is modeled based on the same position operators and TPs. For operators with shape changes, + # it is initially observed that as TP increases [2, 4, 8], the duration decreases approximately by a + # factor of 2. + # tp_model_w is the number calculated when the duration decreases. Theoretically, it is the duration of the + # operator when tp=1. Therefore, when tp = 2, duration(2) = tp_model_w/2; tp_model_b is the redundancy + # coefficient. + tp_model_w, tp_model_b = model_operator_with_tp(results) + + # duration is modeled based on the Flops calculated from the shape. For all operators, + # F(duration) = shape_model_w * Flops + shape_model_b. + history_results = self.db.operator_history_dao.get_by_types_and_accelerator_core( + operator.accelerator_core, operator.type) + shape_model_w, shape_model_b = model_operator_with_shape(history_results) + dict_shape = { + 'index_name': index_name, + 'accelerator_core': operator.accelerator_core, + 'model_w': float(tp_model_w), + 'model_b': float(tp_model_b), + 'shape_model_w': shape_model_w, + 'shape_model_b': shape_model_b, + } + accelerator_core_exist = False + if dict_shape["index_name"] in self.dict_model.keys(): + for dict_temp in self.dict_model[dict_shape["index_name"]]: + if dict_temp['accelerator_core'] == dict_shape['accelerator_core']: + accelerator_core_exist = True + break + if not accelerator_core_exist: + self.dict_model[dict_shape["index_name"]].append(dict_shape) + else: + self.dict_model[dict_shape["index_name"]] = [dict_shape] + + def getmodel_by_accelerator_core_and_index_name(self, accelerator_core, index_name): + for dict_shape in self.dict_model.get(index_name): + if dict_shape['accelerator_core'] == accelerator_core: + return dict_shape + self._logger.info("can not find the accelerator_core!") + return self.dict_model.get(index_name)[0] + + def cal_operator_timer_bymodel(self, operator_list, search_cfg: SearchConfig, ratio=0.3, + re_profiling_flag=False): + operator_list_re = [] + + operator_total_num = len(operator_list) + operator_not_found = [] + for operator_base in operator_list: + # Calculate input_shape and output_shape based on tp, cp, and ep. + input_shape = cal_new_shape_tce(operator_base.input_cal, search_cfg) + output_shape = cal_new_shape_tce(operator_base.output_cal, search_cfg) + # 1. search duration through operator_history based on input_shape and types + operators = self.db.operator_history_dao.get_by_types_and_input_shape(operator_base.types, input_shape) + if len(operators) > 0: + operator_list_re.append(Operator(name=operator_base.index_name, types=operator_base.types, + accelerator_core=operator_base.accelerator_core, + input_shape=input_shape, + output_shape=output_shape, + duration=operators[0].duration)) + + # 2. Predict the results based on the tp --- duration modeling results. + else: + operator_not_found.append([OperatorHistory(types=operator_base.types, + accelerator_core=operator_base.accelerator_core, + input_shape=input_shape, + output_shape=output_shape, + duration=0, + device=Hardware().device_type, + jit=int(self.model_config.jit_compile), + cann="8.0.RC2.alpha002", + driver="24.1.rc2.b030", + dtype=self.model_config.dtype.value[0]), + operator_base.index_name]) + + operator_not_found_total_num = len(operator_not_found) + if operator_not_found_total_num / operator_total_num > ratio and re_profiling_flag: + return operator_list_re, operator_not_found + + else: + # If the proportion of missing operators is relatively low, by default, supplement the operators using + # linear interpolation. + if re_profiling_flag: + self._logger.info( + f'The total operator not found proportion is {operator_not_found_total_num / operator_total_num},' + f' there is no need for re profiling.') + for operator_cal_base in operator_not_found: + operator_base, operator_index_name = operator_cal_base + operator_model = self.getmodel_by_accelerator_core_and_index_name( + operator_base.accelerator_core, operator_index_name + ) + flops = cal_operator_flops(operator_base.input_shape, operator_base.output_shape, + operator_base.types) + + duration = cal_operator_duration_with_shape(operator_model["shape_model_w"], + operator_model["shape_model_b"], + flops) + operator_list_re.append(Operator(name=operator_index_name, types=operator_base.types, + accelerator_core=operator_base.accelerator_core, + input_shape=operator_base.input_shape, + output_shape=operator_base.output_shape, + duration=duration)) + return operator_list_re, operator_not_found + + def cal_operator_timer(self, search_cfg: SearchConfig) -> tuple: + """ + External interface, returns the duration based on changes in tp. + """ + # Obtain all operators of a model layer. + operator_not_found = [] + if len(self.base_block.fw) == 0: + return [], [], [], 1, 1, 1 + operator_base_list = self.base_block.exist_cal_list + operator_list, operator_not_found_list = self.cal_operator_timer_bymodel(operator_base_list, + search_cfg) + operator_not_found.extend(operator_not_found_list) + cp_operator_exist_list = self.cp_block.exist_cal_list + cp_operator_diff_list = self.cp_block.diff_cal_list + ep_operator_exist_list = self.ep_block.exist_cal_list + ep_operator_diff_list = self.ep_block.diff_cal_list + cp_exist_list, cp_exist_not_found_list = [], [] + if len(cp_operator_exist_list) > 0: + cp_exist_list, cp_exist_not_found_list = self.cal_operator_timer_bymodel( + cp_operator_exist_list, + search_cfg) + if search_cfg.cp > 1: + operator_not_found.extend(cp_exist_not_found_list) + cp_diff_list, cp_diff_not_found_list = [], [] + if len(cp_operator_diff_list) > 0: + cp_diff_list, cp_diff_not_found_list = self.cal_operator_timer_bymodel(cp_operator_diff_list, + search_cfg) + if search_cfg.cp > 1: + operator_not_found.extend(cp_diff_not_found_list) + ep_exist_list, ep_exist_not_found_list = [], [] + if len(ep_operator_exist_list) > 0: + ep_exist_list, ep_exist_not_found_list = self.cal_operator_timer_bymodel( + ep_operator_exist_list, search_cfg + ) + if search_cfg.ep and search_cfg.ep > 1: + operator_not_found.extend(ep_exist_not_found_list) + ep_diff_list, ep_diff_not_found_list = [], [] + if len(ep_operator_diff_list) > 0: + ep_diff_list, ep_diff_not_found_list = self.cal_operator_timer_bymodel(ep_operator_exist_list, + search_cfg) + if search_cfg.ep and search_cfg.ep > 1: + operator_not_found.extend(ep_diff_not_found_list) + self.db.insert_not_found_list(operator_not_found) + return operator_list, cp_exist_list, cp_diff_list, ep_exist_list, ep_diff_list, operator_not_found diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_base_block.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_base_block.py new file mode 100644 index 000000000..741224e4f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_base_block.py @@ -0,0 +1,91 @@ +import copy +from mindspeed.auto_tuning.module.operator.operator_elemental import OperatorList, ProfileList, DictShape + + +class Block(OperatorList): + def __init__(self): + super(Block, self).__init__() + self.exist_cal_list = [] + + @staticmethod + def longest_common_subsequence(list1, list2): + m, n = len(list1), len(list2) + dp = [[] for _ in range(m + 1)] + for index in range(m + 1): + dp[index] = [[] for _ in range(n + 1)] + for i in range(1, m + 1): + for j in range(1, n + 1): + if list1[i - 1].type == list2[j - 1].type: + dp[i][j] = dp[i - 1][j - 1].copy() + dp[i][j].append(list1[i - 1]) + else: + if len(dp[i - 1][j]) > len(dp[i][j - 1]): + dp[i][j] = dp[i - 1][j].copy() + else: + dp[i][j] = dp[i][j - 1].copy() + return dp[m][n] + + @staticmethod + def change_profilelist_into_dictshapelist_withindex(change_profile_list, change_operator_list): + for (index, item) in enumerate(change_profile_list.fw): + dict_shape_fw = DictShape() + dict_shape_fw.change_profile_into_dictshape(item, index) + change_operator_list.fw.append(dict_shape_fw) + for (index, item) in enumerate(change_profile_list.bw): + dict_shape_bw = DictShape() + dict_shape_bw.change_profile_into_dictshape(item, index) + change_operator_list.bw.append(dict_shape_bw) + + @staticmethod + def change_profilelist_into_dictshapelist(change_profile_list, change_operator_list): + for (index, item) in enumerate(change_profile_list.fw): + dict_shape_fw = DictShape() + dict_shape_fw.change_profile_into_dictshape(item, -1) + change_operator_list.fw.append(dict_shape_fw) + for (index, item) in enumerate(change_profile_list.bw): + dict_shape_bw = DictShape() + dict_shape_bw.change_profile_into_dictshape(item, -1) + change_operator_list.bw.append(dict_shape_bw) + + +class BaseBlock(Block): + def __init__(self): + super(BaseBlock, self).__init__() + + def get_block(self, data_list): + profile_list = self.get_profile(data_list) + self.change_profilelist_into_dictshapelist_withindex(profile_list, self) + + def get_profile(self, data_list): + profile_list = ProfileList() + for origin_profile_data in data_list: + fw = origin_profile_data.profile_list.fw + bw = origin_profile_data.profile_list.bw + if len(profile_list.fw) == 0: + profile_list.fw = copy.deepcopy(fw) + profile_list.bw = copy.deepcopy(bw) + else: + profile_list.fw = self.longest_common_subsequence(profile_list.fw, fw) + profile_list.bw = self.longest_common_subsequence(profile_list.bw, bw) + return profile_list + # + + # + def reset_index_name(self, list1, list2): + m, n = len(list1), len(list2) + i, j = 0, 0 + index = 0 + last_mat = (0, 0) + first_mat = 0 + while 1: + list1, i, j, last_mat, first_mat = self.reset_index_name_single(list1, list2, i, j, last_mat) + if j < n - 1 and index < 3: + # Skip a base operator. + index += 1 + i = last_mat[0] + 1 + j += 1 + else: + break + if first_mat == 0: + first_mat = last_mat[0] + 1 + return list1, first_mat diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_change_block.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_change_block.py new file mode 100644 index 000000000..2da677bac --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_change_block.py @@ -0,0 +1,163 @@ +import copy +from mindspeed.auto_tuning.module.operator.operator_base_block import Block +from mindspeed.auto_tuning.module.operator.operator_elemental import (OperatorList, ChangeList, + ChangeOperatorList) + + +class ChangeBlock(Block): + def __init__(self): + super(ChangeBlock, self).__init__() + self.diff_list = OperatorList() + self.diff_cal_list = [] + + @staticmethod + def get_operator_longest_common_subsequence(list1, list2): + m, n = len(list1), len(list2) + dp = [[] for _ in range(m + 1)] + for index in range(m + 1): + dp[index] = [[] for _ in range(n + 1)] + for i in range(1, m + 1): + for j in range(1, n + 1): + if list1[i - 1].type == list2[j - 1].type: + dp[i][j] = dp[i - 1][j - 1].copy() + dp[i][j].append(list1[i - 1]) + else: + if len(dp[i - 1][j]) > len(dp[i][j - 1]): + dp[i][j] = dp[i - 1][j].copy() + else: + dp[i][j] = dp[i][j - 1].copy() + return dp[m][n] + + def get_profile(self, origin_profile_data_list): + change_profile_list = ChangeList() + change_operator_list = ChangeOperatorList() + for origin_profile_data in origin_profile_data_list: + fw = origin_profile_data.operator_list.fw + bw = origin_profile_data.operator_list.bw + cp = origin_profile_data.config_info.cp + dp = origin_profile_data.config_info.dp + pp = origin_profile_data.config_info.pp + ep = origin_profile_data.config_info.ep + num_experts = origin_profile_data.config_info.num_experts + self.get_profile_info(cp, change_profile_list, fw, bw) + self.get_change_operator(change_profile_list, change_operator_list) + return change_operator_list + + def get_profile_info(self, change_num, change_profile_list, fw, bw): + if change_num == 2: + if len(change_profile_list.list_2.fw) == 0: + change_profile_list.list_2.fw = copy.deepcopy(fw) + change_profile_list.list_2.bw = copy.deepcopy(bw) + else: + change_profile_list.list_2.fw = self.longest_common_subsequence(change_profile_list.list_2.fw, fw) + change_profile_list.list_2.bw = self.longest_common_subsequence(change_profile_list.list_2.bw, bw) + if change_num == 4: + if len(change_profile_list.list_4.fw) == 0: + change_profile_list.list_4.fw = copy.deepcopy(fw) + change_profile_list.list_4.bw = copy.deepcopy(bw) + else: + change_profile_list.list_4.fw = self.longest_common_subsequence(change_profile_list.list_4.fw, fw) + change_profile_list.list_4.bw = self.longest_common_subsequence(change_profile_list.list_4.bw, bw) + if len(change_profile_list.list_2.fw) * len(change_profile_list.list_4.fw) > 0: + change_profile_list.list_2.fw = self.longest_common_subsequence(change_profile_list.list_2.fw, + change_profile_list.list_4.fw) + change_profile_list.list_2.bw = self.longest_common_subsequence(change_profile_list.list_2.bw, + change_profile_list.list_4.bw) + return + + def get_change_operator(self, change_profile_list, change_operator_list): + self.change_profilelist_into_dictshapelist(change_profile_list.list_2, change_operator_list.list_2) + self.change_profilelist_into_dictshapelist(change_profile_list.list_4, change_operator_list.list_4) + + def get_exist_block(self, change_operator_list, base_block, index_id): + return + + # calculate the recompute list, 1 for forward, 2 for backward + def get_re_block(self, list1, list2): + m, n = len(list1), len(list2) + list_re = [] + list_bw = [] + i, j = 0, 0 + while i < m: + if j < n and list1[i].type == list2[j].type: + list_re.append(list1[i]) + i += 1 + j += 1 + else: + list_bw.append(list1[i]) + i += 1 + return list_re, list_bw + + def comp_with_get_diff_list(self, list1, list2, index_id): + return + + # + + def reset_index_name(self, list1, list2): + m, n = len(list1), len(list2) + i, j = 0, 0 + index = 0 + last_mat = (0, 0) + first_mat = 0 + while 1: + list1, i, j, last_mat, first_mat = self.reset_index_name_single(list1, list2, i, j, last_mat) + if j < n - 1 and index < 3: + # Skip a base operator + index += 1 + i = last_mat[0] + 1 + j += 1 + else: + break + if first_mat == 0: + first_mat = last_mat[0] + 1 + return list1, first_mat + + def reset_index_name_single(self, list1, list2, i, j, last_mat): + m, n = len(list1), len(list2) + dp_flag = False + mat_flag = False + disperses_list = [] + first_mat = 0 + continue_num = 0 + while i < m: + if j < n and list1[i].index_name == '': + if list1[i].type == list2[j].type: + mat_flag = True + if dp_flag: + disperses_list.append(i) + continue_num += 1 + if continue_num > 5 or i >= m - 1: + dp_flag = False + continue_num = 0 + list1 = self.attract_list(disperses_list, list1, i) + disperses_list = [] + list1[i].index_name = list2[j].index_name + last_mat = (i, j) + j += 1 + else: + if mat_flag and first_mat == 0: + first_mat = i + disperses_list.append(i) + continue_num = 0 + dp_flag = True + elif dp_flag and len(disperses_list) > 0: + while i < m and list1[i].index_name == '': + i += 1 + i = i - 1 + dp_flag = False + continue_num = 0 + list1 = self.attract_list(disperses_list, list1, i) + disperses_list = [] + i += 1 + return list1, i, j, last_mat, first_mat + + def attract_list(self, disperses_list, list1, i): + index = 0 + len_dp = len(disperses_list) + while (i - index >= 0 and len_dp - index - 1 >= 0 and + list1[i - index].type == list1[disperses_list[len_dp - index - 1]].type): + temp = list1[disperses_list[len_dp - index - 1]].index_name + list1[disperses_list[len_dp - index - 1]].index_name = '' + list1[i - index].index_name = temp + index += 1 + return list1 diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_change_block_cp.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_change_block_cp.py new file mode 100644 index 000000000..89ffdf87e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_change_block_cp.py @@ -0,0 +1,126 @@ +from mindspeed.auto_tuning.module.operator.operator_change_block import ChangeBlock +from mindspeed.auto_tuning.module.operator.operator_elemental import (DictShape, ChangeList, + ChangeOperatorList) + + +class CpBlock(ChangeBlock): + def __init__(self): + super(CpBlock, self).__init__() + + def get_block(self, origin_profile_data_list, base_block): + change_operator_list = self.get_profile(origin_profile_data_list) + index_id = 1000 + self.get_exist_block(change_operator_list, base_block, index_id) + self.get_diff_block(change_operator_list, -1) + + def get_profile(self, origin_profile_data_list): + change_profile_list = ChangeList() + change_operator_list = ChangeOperatorList() + for origin_profile_data in origin_profile_data_list.data_list: + fw = origin_profile_data.profile_list.fw + bw = origin_profile_data.profile_list.bw + cp = origin_profile_data.config_info.cp + self.get_profile_info(cp, change_profile_list, fw, bw) + self.get_change_operator(change_profile_list, change_operator_list) + return change_operator_list + + def get_change_operator(self, change_profile_list, change_operator_list): + self.change_profilelist_into_dictshapelist(change_profile_list.list_2, change_operator_list.list_2) + self.change_profilelist_into_dictshapelist(change_profile_list.list_4, change_operator_list.list_4) + + def get_exist_block(self, change_operator_list, base_block, index_id): + self.fw = self.comp_with_get_diff_list(change_operator_list.list_2.fw, base_block.fw, index_id) + self.bw = self.comp_with_get_diff_list(change_operator_list.list_2.bw, base_block.bw, index_id + 500) + # recompute + if len(self.bw) > len(self.fw): + self.re, self.bw = self.get_re_block(self.bw, self.fw) + + def get_diff_block(self, change_operator_list, index_id): + self.diff_list.fw = self.comp_with_get_diff_list(change_operator_list.list_4.fw, change_operator_list.list_2.fw, + -1) + self.diff_list.bw = self.comp_with_get_diff_list(change_operator_list.list_4.bw, change_operator_list.list_2.bw, + index_id) + self.diff_list.fw = self.get_operator_longest_common_subsequence(self.fw, self.diff_list.fw) + self.diff_list.re = self.get_operator_longest_common_subsequence(self.re, self.diff_list.bw) + self.diff_list.bw = self.get_operator_longest_common_subsequence(self.bw, self.diff_list.bw) + + def get_re_block(self, list1, list2): + m, n = len(list1), len(list2) + list_re = [] + list_bw = [] + i, j = 0, 0 + while i < m: + if j < n and list1[i].type == list2[j].type: + list_re.append(list1[i]) + i += 1 + j += 1 + else: + list_bw.append(list1[i]) + i += 1 + return list_re, list_bw + + def comp_with_get_diff_list(self, list1, list2, index_id): + # Align first. + list1, first_mat = self.reset_index_name(list1, list2) + diff_info = [] + diff_index = index_id + for item in list1: + if item.index_name == '': + dict_shape = DictShape() + if diff_index != -1: + item.index_name = str(diff_index) + item.type + diff_index += 1 + else: + item.index_name = '' + dict_shape.name = item.name + dict_shape.type = item.type + dict_shape.accelerator_core = item.accelerator_core + dict_shape.index_name = item.index_name + diff_info.append(dict_shape) + return diff_info + + def reset_index_diff_cp(self, list1, list2, diff_flag, cp_num): + m, n = len(list1), len(list2) + if n < 2 or m < 2: + return list1 + i, j = diff_flag - 1, n + index = 0 + last_mat = (diff_flag - 1, n) + temp = -1, -1 + while j >= n - 2 and last_mat[0] + n < m and last_mat != temp: + cp_num -= 1 + if cp_num <= 0: + break + # Ensure that the entire process has been gone through once. + # Ensure that there is enough remaining space for one round of re-matching. + j = 0 + i = last_mat[0] + 1 + index = 0 + temp = last_mat + # Restart a round of matching. + list1, list2, i, j, last_mat = self.restart_mat(list1, list2, i, j, last_mat) + return list1 + + @staticmethod + def restart_mat(list1, list2, i, j, last_mat): + m, n = len(list1), len(list2) + flag = 0 + max_miss = 3 + while i < m and j < n: + if j < n and list1[i].index_name == '' and list1[i].type == list2[j].type: + list1[i].index_name = list2[j].index_name + last_mat = (i, j) + i += 1 + j += 1 + else: + if i + 1 < m and list1[i + 1].index_name == '' and list1[i + 1].type == list2[j].type: + i += 1 + elif j + 1 < n and list1[i].index_name == '' and list1[i].type == list2[j + 1].type: + j += 1 + else: + i += 1 + j += 1 + max_miss = max_miss - 1 + if max_miss <= 0: + return list1, list2, i, j, (0, 0) + return list1, list2, i, j, last_mat diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_change_block_ep.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_change_block_ep.py new file mode 100644 index 000000000..92fa4d572 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_change_block_ep.py @@ -0,0 +1,195 @@ +import copy +from mindspeed.auto_tuning.utils.logger import get_logger +from mindspeed.auto_tuning.module.operator.operator_change_block import ChangeBlock +from mindspeed.auto_tuning.module.operator.operator_elemental import (DictShape, ChangeList, + ChangeOperatorList) + + +class EpBlock(ChangeBlock): + def __init__(self): + super(EpBlock, self).__init__() + self._logger = get_logger('ep_block') + + def get_block(self, origin_profile_data_list, base_block): + change_operator_list = self.get_profile(origin_profile_data_list) + index_id = 2000 + self.get_exist_block(change_operator_list, base_block, index_id) + self.get_diff_block(change_operator_list, index_id) + self.diff_list.bw.pop() + + def get_profile(self, origin_profile_data_list): + change_profile_list = ChangeList() + change_operator_list = ChangeOperatorList() + for origin_profile_data in origin_profile_data_list.data_list: + fw = origin_profile_data.profile_list.fw + bw = origin_profile_data.profile_list.bw + ep = origin_profile_data.config_info.ep + num_experts = origin_profile_data.config_info.num_experts + self.get_profile_info(num_experts / ep, change_profile_list, fw, bw) + self.get_change_operator(change_profile_list, change_operator_list) + return change_operator_list + + def get_profile_info(self, change_num, change_profile_list, fw, bw): + if change_num == 2: + if len(change_profile_list.list_2.fw) == 0: + change_profile_list.list_2.fw = copy.deepcopy(fw) + change_profile_list.list_2.bw = copy.deepcopy(bw) + else: + change_profile_list.list_2.fw = self.longest_common_subsequence(change_profile_list.list_2.fw, fw) + change_profile_list.list_2.bw = self.longest_common_subsequence(change_profile_list.list_2.bw, bw) + if change_num == 4: + if len(change_profile_list.list_4.fw) == 0: + change_profile_list.list_4.fw = copy.deepcopy(fw) + change_profile_list.list_4.bw = copy.deepcopy(bw) + else: + change_profile_list.list_4.fw = self.longest_common_subsequence(change_profile_list.list_4.fw, fw) + change_profile_list.list_4.bw = self.longest_common_subsequence(change_profile_list.list_4.bw, bw) + if len(change_profile_list.list_2.fw) * len(change_profile_list.list_4.fw) > 0: + change_profile_list.list_2.fw = self.longest_common_subsequence(change_profile_list.list_2.fw, + change_profile_list.list_4.fw) + change_profile_list.list_2.bw = self.longest_common_subsequence(change_profile_list.list_2.bw, + change_profile_list.list_4.bw) + return + + def get_change_operator(self, change_profile_list, change_operator_list): + self.change_profilelist_into_dictshapelist(change_profile_list.list_2, change_operator_list.list_2) + self.change_profilelist_into_dictshapelist(change_profile_list.list_4, change_operator_list.list_4) + + # Compare the longest subsequence of 1 and 2, return the values of 1. + + def get_exist_block(self, change_operator_list, base_block, index_id): + self.fw = self.comp_with_get_diff_list(change_operator_list.list_2.fw, base_block.fw, index_id) + self.bw = self.comp_with_get_diff_list(change_operator_list.list_2.bw, base_block.bw, index_id + 500) + # recompute + if len(self.bw) > len(self.fw): + self.re, self.bw = self.get_re_block(self.bw, self.fw) + return + + def get_diff_block(self, change_operator_list, index_id): + if not change_operator_list.list_2.fw: + self._logger.warning("warning:缺少了并行配置为 EP=2 的数据,从而无法得到EPdiff") + return + self.diff_list.fw = self.comp_with_get_diff_list(change_operator_list.list_4.fw, change_operator_list.list_2.fw, + -1) + self.diff_list.bw = self.comp_with_get_diff_list(change_operator_list.list_4.bw, change_operator_list.list_2.bw, + -1) + # recompute + if len(self.diff_list.bw) > len(self.diff_list.fw): + self.diff_list.re, self.diff_list.bw = self.get_re_block(self.diff_list.bw, self.diff_list.fw) + self.diff_list.re = self.comp_with_get_diff_list(self.diff_list.re, self.re, -1) + self.diff_list.fw = self.comp_with_get_diff_list(self.diff_list.fw, self.fw, -1) + self.diff_list.bw = self.comp_with_get_diff_list(self.diff_list.bw, self.bw, -1) + return + + # calculate the recompute list, 1 for forward, 2 for backward + def get_re_block(self, list1, list2): + m, n = len(list1), len(list2) + list_re = [] + list_bw = [] + i, j = 0, 0 + while i < m: + if j < n and list1[i].type == list2[j].type: + list_re.append(list1[i]) + i += 1 + j += 1 + else: + list_bw.append(list1[i]) + i += 1 + return list_re, list_bw + + # Align list1 with list2 + def comp_with_get_diff_list(self, list1, list2, index_id): + # Align first + list1, first_mat = self.reset_index_name(list1, list2) + diff_info = [] + diff_index = index_id + for item in list1: + if item.index_name == '': + dict_shape = DictShape() + if diff_index != -1: + item.index_name = str(diff_index) + item.type + diff_index += 1 + else: + item.index_name = '' + dict_shape.name = item.name + dict_shape.type = item.type + dict_shape.accelerator_core = item.accelerator_core + dict_shape.index_name = item.index_name + diff_info.append(dict_shape) + return diff_info + + def get_exist_base_ep(self): + self.fw = self.get_diff_list_without_index(self.fw, self.diff_list.fw) + self.re = self.get_diff_list_without_index(self.re, self.diff_list.re) + self.bw = self.get_diff_list_without_index(self.bw, self.diff_list.bw) + + # Subtract the subsequence of 2 from 1. + + def get_diff_list_without_index(self, list1, list2): + list_comm = self.get_operator_longest_common_subsequence(list1, list2) + m, n = len(list1), len(list_comm) + flag = 0 + max_miss = 3 + diff_list = [] + i, j = 0, 0 + while i < m and j < n: + if list1[i].type == list_comm[j].type: + i += 1 + j += 1 + else: + diff_list.append(list1[i]) + i += 1 + if i < m: + diff_list.append(list1[i]) + i += 1 + return diff_list + + def reset_index_diff_ep(self, list1, list2, ep_diff_num): + m, n = len(list1), len(list2) + i, j = 0, 0 + index = 0 + last_mat, this_mat = (0, 0), (-1, 0) + while 1: + # Restart a round + if this_mat[0] + n > m or this_mat == last_mat or ep_diff_num <= 0: + break + last_mat = this_mat + list1, i, j, this_mat = self.reset_index_name_single_ep(list1, list2, i, j, last_mat) + ep_diff_num -= 1 + if j < n - 1 and index < 3: + # skip one base operator + index += 1 + i = this_mat[0] + 1 + j += 1 + else: + j = 0 + i = this_mat[0] + 1 + return list1 + + def reset_index_name_single_ep(self, list1, list2, i, j, start_mat): + m, n = len(list1), len(list2) + dp_flag = True + disperses_list = [] + continue_num = 0 + last_mat = start_mat + while i < m: + if j < n and list1[i].index_name == '': + if list1[i].type == list2[j].type: + if j == 0 and start_mat[0] > 0 and i - start_mat[0] > 3: + break + if dp_flag: + disperses_list.append(i) + continue_num += 1 + if continue_num > 5 or j + 1 == n: + dp_flag = False + continue_num = 0 + list1 = self.attract_list(disperses_list, list1, i) + disperses_list = [] + list1[i].index_name = list2[j].index_name + last_mat = (i, j) + j += 1 + else: + continue_num = 0 + dp_flag = True + i += 1 + return list1, i, j, last_mat diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_database.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_database.py new file mode 100644 index 000000000..18899eea9 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_database.py @@ -0,0 +1,195 @@ +import os + +from sqlalchemy import Column, Integer, String, UniqueConstraint, text, desc +from sqlalchemy import create_engine, Float +from sqlalchemy.orm import sessionmaker, declarative_base + +Base = declarative_base() +BaseHistory = declarative_base() + + +class DataBase: + def __init__(self, working_dir: str): + db_uri_history = f'sqlite:///{os.path.join(working_dir, "operator_history.db")}' + db_connection_history = DBConnection(db_uri_history) + self.operator_history_dao = OperatorHistoryDAO(db_connection_history) + BaseHistory.metadata.create_all(db_connection_history.engine) + db_uri_different = f'sqlite:///{os.path.join(working_dir, "operator_different.db")}' + db_connection_different = DBConnection(db_uri_different) + self.operator_different_dao = OperatorHistoryDAO(db_connection_different) + BaseHistory.metadata.create_all(db_connection_different.engine) + db_uri_profiling = f'sqlite:///{os.path.join(working_dir, "operator_profiling.db")}' + db_connection_profiling = DBConnection(db_uri_profiling) + self.operator_profiling_dao = OperatorHistoryDAO(db_connection_profiling) + BaseHistory.metadata.create_all(db_connection_profiling.engine) + + def insert_not_found_list(self, operator_list): + operator_different_list = [] + for operator in operator_list: + operator_different_list.append(operator[0].convert_to_dict()) + self.operator_different_dao.insert_history(operator_different_list) + + +class OperatorHistory(BaseHistory): + __tablename__ = 'operator_history' + id = Column(Integer, primary_key=True, autoincrement=True) + types = Column(String) + accelerator_core = Column(String) + input_shape = Column(String) + output_shape = Column(String) + duration = Column(Float) + device = Column(String) + jit = Column(Integer) + cann = Column(String) + driver = Column(String) + dtype = Column(String) + reverse1 = Column(String) + __table_args__ = ( + UniqueConstraint('types', 'accelerator_core', 'input_shape', 'output_shape', 'device', 'jit', + 'cann', 'driver', 'dtype', name='unique_operator'),) + + def __init__(self, types, accelerator_core, input_shape, output_shape, duration, device, jit, cann, driver, dtype): + self.types = types + self.accelerator_core = accelerator_core + self.input_shape = input_shape + self.output_shape = output_shape + self.duration = duration + self.device = device + self.jit = jit + self.cann = cann + self.driver = driver + self.dtype = dtype + self.reverse1 = "None" + + def __str__(self): + rt = [] + rt.append(f"{'Operator Types':<30}{str(self.types):<40}") + rt.append(f"{'accelerator_core':<30}{str(self.accelerator_core):<40}") + rt.append(f"{'input_shape':<30}{str(self.input_shape):<40}") + rt.append(f"{'output_shape':<30}{str(self.output_shape):<40}") + rt.append(f"{'duration':<30}{str(self.duration):<40}") + rt.append(f"{'device':<30}{str(self.device):<40}") + rt.append(f"{'jit':<30}{str(self.jit):<40}") + rt.append(f"{'cann':<30}{str(self.cann):<40}") + rt.append(f"{'driver':<30}{str(self.driver):<40}") + rt.append(f"{'dtype':<30}{str(self.dtype):<40}") + return "\n".join(rt) + + def convert_to_dict(self): + return { + 'types': self.types, + 'accelerator_core': self.accelerator_core, + 'input_shape': self.input_shape, + 'output_shape': self.output_shape, + 'duration': self.duration, + 'device': self.device, + 'jit': self.jit, + 'cann': self.cann, + 'driver': self.driver, + 'dtype': self.dtype, + 'reverse1': self.reverse1 + } + + +class OperatorHistoryDAO(object): + def __init__(self, db_connection): + self.db_connection = db_connection + + def insert_history(self, data_list): + def insert_data(session, dict_list): + for data in dict_list: + update_query = text(''' + UPDATE operator_history + SET duration = (duration + :duration) / 2 + WHERE types = :types AND accelerator_core = :accelerator_core AND input_shape = :input_shape AND + output_shape = :output_shape AND device = :device AND jit = :jit AND cann = :cann AND + driver = :driver AND dtype = :dtype + ''') + result = session.execute(update_query, data) + if result.rowcount == 0: + query = text(''' + INSERT INTO operator_history + (types, accelerator_core, input_shape, output_shape, duration, device, jit, cann, driver, dtype, reverse1) + SELECT :types, :accelerator_core, :input_shape, :output_shape, :duration, :device, :jit, :cann, :driver, :dtype, :reverse1 + WHERE NOT EXISTS( + SELECT 1 FROM operator_history WHERE + types = :types AND accelerator_core = :accelerator_core AND input_shape = :input_shape AND + output_shape = :output_shape AND device = :device AND jit = :jit AND cann = :cann AND + driver = :driver AND dtype = :dtype + ) + ''') + session.execute(query, data) + session.commit() + + self.db_connection.execute(insert_data, data_list) + + def get_by_types_and_input_shape(self, types, input_shape): + def get(session, key1, key2): + results = session.query(OperatorHistory).filter_by(types=key1, input_shape=key2).all() + objects = [OperatorHistory(types=result.types, + accelerator_core=result.accelerator_core, + input_shape=result.input_shape, + output_shape=result.output_shape, + duration=result.duration, + device=result.device, + jit=result.jit, + cann=result.cann, + driver=result.driver, + dtype=result.dtype) for result in results] + return objects + + return self.db_connection.execute(get, types, input_shape) + + def get_by_types_and_accelerator_core(self, accelerator_core, types): + def get(session, key1, key2): + results = session.query(OperatorHistory).filter_by(accelerator_core=key1, types=key2).all() + objects = [OperatorHistory(types=result.types, + accelerator_core=result.accelerator_core, + input_shape=result.input_shape, + output_shape=result.output_shape, + duration=result.duration, + device=result.device, + jit=result.jit, + cann=result.cann, + driver=result.driver, + dtype=result.dtype) for result in results] + return objects + + return self.db_connection.execute(get, accelerator_core, types) + + +class Operator(object): + + def __init__(self, name, types, accelerator_core, input_shape, output_shape, duration): + self.name = name + self.types = types + self.accelerator_core = accelerator_core + self.input_shape = input_shape + self.output_shape = output_shape + self.duration = duration + + def print_to_string(self): + print("name: {}, input_shape: {}, output_shape: {}, duration: {}".format( + self.name, + self.input_shape, + self.output_shape, + self.duration + )) + + +class DBConnection: + def __init__(self, db_url): + self.engine = create_engine(db_url) + self.Session = sessionmaker(bind=self.engine) + + def execute(self, func, *args, **kwargs): + session = self.Session() + try: + result = func(session, *args, **kwargs) + session.commit() + return result + except Exception as e: + session.rollback() + raise e + finally: + session.close() diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_elemental.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_elemental.py new file mode 100644 index 000000000..2cd6b8def --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_elemental.py @@ -0,0 +1,178 @@ +from mindspeed.auto_tuning.utils.logger import get_logger + + +class ProfileList(object): + def __init__(self): + self.fw = [] + self.bw = [] + self._logger = get_logger('ProfileList') + + # test 1 + + def reset_index_name(self, list1, list2): + M, N = len(list1), len(list2) + i, j = 0, 0 + index = 0 + last_mat = (0, 0) + first_mat = 0 + while 1: + list1, i, j, last_mat, first_mat = self.reset_index_name_single(list1, list2, i, j, last_mat) + if j < N - 1 and index < 3: + # skip a base operator + index += 1 + i = last_mat[0] + 1 + j += 1 + else: + break + if first_mat == 0: + first_mat = last_mat[0] + 1 + return list1, first_mat + + def reset_index_name_single(self, list1, list2, i, j, last_mat): + M, N = len(list1), len(list2) + dp_flag = False + mat_flag = False + disperses_list = [] + first_mat = 0 + continue_num = 0 + while i < M: + if j < N and list1[i].index_name == '': + if list1[i].type == list2[j].type: + mat_flag = True + if dp_flag: + disperses_list.append(i) + continue_num += 1 + if continue_num > 5 or i >= M - 1: + dp_flag = False + continue_num = 0 + list1 = self.attract_list(disperses_list, list1, i) + disperses_list = [] + list1[i].index_name = list2[j].index_name + last_mat = (i, j) + j += 1 + else: + if mat_flag and first_mat == 0: + first_mat = i + disperses_list.append(i) + continue_num = 0 + dp_flag = True + elif dp_flag and len(disperses_list) > 0: + while i < M and list1[i].index_name == '': + i += 1 + i = i - 1 + dp_flag = False + continue_num = 0 + list1 = self.attract_list(disperses_list, list1, i) + disperses_list = [] + i += 1 + return list1, i, j, last_mat, first_mat + + def attract_list(self, disperses_list, list1, i): + index = 0 + len_dp = len(disperses_list) + while i - index >= 0 and len_dp - index - 1 >= 0 and list1[i - index].type == list1[ + disperses_list[len_dp - index - 1]].type: + temp = list1[disperses_list[len_dp - index - 1]].index_name + list1[disperses_list[len_dp - index - 1]].index_name = '' + list1[i - index].index_name = temp + index += 1 + return list1 + + def print_list(self): + self.print_list_fw() + self.print_list_bw() + + def print_list_fw(self): + self._logger.debug("fw") + for item in self.fw: + self._logger.debug("name", item.name, "type", item.type, "index_name", item.index_name) + + def print_list_bw(self): + self._logger.debug("bw") + for item in self.bw: + self._logger.debug("name", item.name, "type", item.type, "index_name", item.index_name) + + +class ChangeList: + def __init__(self): + super(ChangeList, self).__init__() + self.list_2 = ProfileList() + self.list_4 = ProfileList() + + +class ChangeOperatorList: + def __init__(self): + super(ChangeOperatorList, self).__init__() + self.list_2 = ProfileList() + self.list_4 = ProfileList() + + +class DictShape(object): + def __init__(self): + self.name = "" + self.type = "" + self.accelerator_core = "" + self.index_name = "" + + def change_profile_into_dictshape(self, item, index): + self.name = item.name + self.type = item.type + self.accelerator_core = item.accelerator_core + if index == -1: + self.index_name = "" + else: + self.index_name = str(index) + str(item.type) + + +class DictModelShape(DictShape): + def __init__(self): + super(DictModelShape, self).__init__() + self.model_w = 0.0 + self.model_b = 0.0 + self.shape_model_w = 0.0 + self.shape_model_b = 0.0 + + +class DictCalShape(DictShape): + def __init__(self): + super(DictCalShape, self).__init__() + self.input_cal = 0.0 + self.output_cal = 0.0 + + +class OperatorList(ProfileList): + def __init__(self): + super(OperatorList, self).__init__() + self.fw = [] + self.bw = [] + self.re = [] + self._logger = get_logger('operator_list') + + def print_list(self): + self.print_list_fw() + self.print_list_bw() + self.print_list_re() + + def print_list_fw(self): + self._logger.debug("fw") + for item in self.fw: + self._logger.debug("name", item.name, "type", item.type, "index_name", item.index_name) + + def print_list_bw(self): + self._logger.debug("bw") + for item in self.bw: + self._logger.debug("name", item.name, "type", item.type, "index_name", item.index_name) + + def print_list_re(self): + self._logger.debug("re") + for item in self.re: + self._logger.debug("name", item.name, "type", item.type, "index_name", item.index_name) + + +class OperatorDetailList(OperatorList): + def __init__(self): + super(OperatorDetailList, self).__init__() + self.fw = [] + self.bw = [] + self.re = [] + self.all = [] diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_note_cal.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_note_cal.py new file mode 100644 index 000000000..392baf562 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_note_cal.py @@ -0,0 +1,115 @@ +from mindspeed.auto_tuning.utils.logger import get_logger +from mindspeed.auto_tuning.module.operator.operator_elemental import DictShape, ProfileList +from mindspeed.auto_tuning.module.operator.operator_shape_cal import cal_operator_flops + + +class DictNoteShape(DictShape): + def __init__(self): + super(DictNoteShape, self).__init__() + self.tp = 0 + self.cp = 0 + self.ep = 0 + self.type = "" + self.input_shape = "" + self.output_shape = "" + self.duration = 0.0 + self.num_experts = 0 + self.seq_length = 0 + self.flops = 0.0 + self.jit = 0 + + def change_profile_into_dictshape(self, item, config_info): + flops = cal_operator_flops(item.input_shapes.replace('"', ''), + item.output_shapes.replace('"', ''), + item.type) + self.name = item.name + self.type = item.type + self.accelerator_core = item.accelerator_core + self.index_name = '' + self.tp = config_info.tp + self.cp = config_info.cp + self.ep = config_info.ep + self.jit = config_info.jit + self.num_experts = config_info.num_experts or 1 + self.seq_length = config_info.seq_length + self.input_shape = item.input_shapes.replace('"', '') + self.output_shape = item.output_shapes.replace('"', '') + self.duration = float(item.duration_us) + self.flops = flops + + +class OperatorNoteList: + def __init__(self): + self.operator_note_list = [] + self.operator_note_dict = {} + self.seq_length = 0 + self._logger = get_logger('operator_note_list') + + def get_operator_note(self, block): + self.get_operator_list(block.origin_profile_data_list) + self.get_note_in_list(block) + self.get_note_dict() + + def get_note_in_list(self, block): + for (index, operator_note) in enumerate(self.operator_note_list): + tp = block.origin_profile_data_list.data_list[index].config_info.tp + cp = block.origin_profile_data_list.data_list[index].config_info.cp + ep = block.origin_profile_data_list.data_list[index].config_info.ep + num_experts = block.origin_profile_data_list.data_list[index].config_info.num_experts + # Align the base block + operator_note.reset_index_name(operator_note.fw, block.base_block.fw) + operator_note.reset_index_name(operator_note.bw, block.base_block.bw) + # Align the cp base block + if cp > 1: + _, cp_fw_index = operator_note.reset_index_name(operator_note.fw, block.cp_block.fw) + _, cp_re_index = operator_note.reset_index_name(operator_note.bw, block.cp_block.re) + _, cp_bw_index = operator_note.reset_index_name(operator_note.bw, block.cp_block.bw) + if cp > 2: + operator_note.fw = block.cp_block.reset_index_diff_cp(operator_note.fw, block.cp_block.diff_list.fw, + cp_fw_index, cp / 2) + operator_note.bw = block.cp_block.reset_index_diff_cp(operator_note.bw, block.cp_block.diff_list.re, + cp_re_index, cp / 2) + operator_note.bw = block.cp_block.reset_index_diff_cp(operator_note.bw, block.cp_block.diff_list.bw, + cp_bw_index, cp / 2) + # Align the ep block + if num_experts: + if num_experts // ep >= 2: + operator_note.fw = block.ep_block.reset_index_diff_ep(operator_note.fw, block.ep_block.fw, + (num_experts / ep) - 1) + operator_note.bw = block.ep_block.reset_index_diff_ep(operator_note.bw, block.ep_block.re, + (num_experts / ep) - 1) + operator_note.bw = block.ep_block.reset_index_diff_ep(operator_note.bw, block.ep_block.bw, + (num_experts / ep) - 1) + + def get_note_dict(self): + for operator_note in self.operator_note_list: + operator_list = operator_note.fw + operator_note.bw + for operator in operator_list: + dict_exist = False + if operator.index_name in self.operator_note_dict.keys(): + for dict_temp in self.operator_note_dict[operator.index_name]: + if dict_temp == operator: + dict_exist = True + if not dict_exist: + self.operator_note_dict[operator.index_name].append(operator) + else: + self.operator_note_dict[operator.index_name] = [operator] + + def get_operator_list(self, origin_profile_data_list): + self.seq_length = origin_profile_data_list.data_list[0].config_info.seq_length + for (index, origin_profile_data) in enumerate(origin_profile_data_list.data_list): + operator_note = ProfileList() + self.change_profile_list_into_dict_shape_list(origin_profile_data.profile_list, operator_note, + origin_profile_data.config_info) + self.operator_note_list.append(operator_note) + + @staticmethod + def change_profile_list_into_dict_shape_list(change_profile_list, change_operator_list, config_info): + for (index, item) in enumerate(change_profile_list.fw): + dict_shape_fw = DictNoteShape() + dict_shape_fw.change_profile_into_dictshape(item, config_info) + change_operator_list.fw.append(dict_shape_fw) + for (index, item) in enumerate(change_profile_list.bw): + dict_shape_bw = DictNoteShape() + dict_shape_bw.change_profile_into_dictshape(item, config_info) + change_operator_list.bw.append(dict_shape_bw) diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_profile_get.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_profile_get.py new file mode 100644 index 000000000..66e6613e6 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_profile_get.py @@ -0,0 +1,96 @@ +import copy +from mindspeed.auto_tuning.module.operator.operator_elemental import ProfileList + + +class ConfigInfo(object): + def __init__(self, config): + self.tp = config.tensor_model_parallel_size + self.dp = config.data_parallel_size + self.pp = config.pipeline_model_parallel_size + self.vp = config.num_layers_per_virtual_pipeline_stage if config.num_layers_per_virtual_pipeline_stage else 1 + self.cp = config.context_parallel_size + self.ep = config.expert_model_parallel_size or 1 + self.jit = 1 if config.jit_compile else 0 + self.seq_length = config.seq_length + self.num_experts = config.num_experts + + def __str__(self): + return (f"tp:{self.tp}, dp:{self.dp}, pp:{self.pp}, vp:{self.vp}, cp:{self.cp}, ep:{self.ep}, jit:{self.jit}, " + f"seq_length:{self.seq_length}, num_experts:{self.num_experts}") + + +class OriginalProfileData(object): + def __init__(self, config): + self.config_info = ConfigInfo(config) + self.profile_list = ProfileList() + + +class OriginalProfileDataList(object): + def __init__(self): + self.data_list = [] + + def get_origin_profile_data(self, profiling_results): + for config, model in profiling_results: + origin_profile_data = OriginalProfileData(config) + + profile_list_fw = self.get_profinfo_list_from_profiling(model.forward.operator_info[-1], + forwardflag=1) + profile_list_bw = self.get_profinfo_list_from_profiling(model.backward.operator_info[-1], + forwardflag=0) + origin_profile_data.profile_list.fw = copy.deepcopy(profile_list_fw) + origin_profile_data.profile_list.bw = copy.deepcopy(profile_list_bw) + + self.data_list.append(origin_profile_data) + + @staticmethod + def get_profinfo_list_from_profiling(items, forwardflag): + operator_info_list = [] + alltoall_flag = 0 + cp_flag1 = 0 + cp_flag = 0 + for (index, item) in enumerate(items): + # Mark forward network part for CP + if forwardflag == 1: + if "ConcatD" in item.name and index < (len(items) - 2): + if "hcom_send" in items[index + 1].name or "hcom_send" in items[index + 2].name: + cp_flag1 = 1 + if cp_flag1 == 1: + if "MatMul" in item.name: + cp_flag1 = 0 + continue + item.name = "cp_for_flag_" + item.name + # Mark the backward part for CP + if forwardflag == 0: + # Mark froward network part for CP re-computation + if cp_flag == 0 and "ConcatD" in item.name and index < (len(items) - 2): + if "hcom_send" in items[index + 1].name or "hcom_send" in items[index + 2].name: + cp_flag1 = 2 + if cp_flag1 == 2: + if "MatMul" in item.name: + cp_flag1 = 0 + continue + item.name = "cp_re_flag_" + item.name + # Mark backward network part for CP + if cp_flag == 0 and "Concat" in item.name and index < (len(items) - 2): + if "ZerosLike" in items[index + 1].name: + cp_flag = 1 + if cp_flag == 1: + if "Mul" in item.name: + cp_flag = 0 + if cp_flag == 1: + item.name = "cp_back_flag_" + item.name + + # Mark EP part + if "alltoall" in item.name: + alltoall_flag = alltoall_flag + 1 + if alltoall_flag % 2 == 1: + item.name = "ep_flag_" + item.name + + if ( + not ("hcom" in item.name) and item.input_shapes != 'N/A' + and item.input_shapes.replace('"', '').replace(';', '') != '' + ): + operator_info_list.append(item) + setattr(item, "index_name", '') + + return operator_info_list diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_re_profile.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_re_profile.py new file mode 100644 index 000000000..0a00d7676 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_re_profile.py @@ -0,0 +1,202 @@ +import os +import stat +import random +import pickle + +from mindspeed.auto_tuning.utils.logger import get_logger +from mindspeed.auto_tuning.utils.runner.model_executor import ExecutorFlag, ModelExecutor +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_node_parse import GatherNodeProfiling +from mindspeed.auto_tuning.utils.runner.torchrun_runner import TorchRunRunner +from mindspeed.auto_tuning.config.search_config import SearchConfig +from mindspeed.auto_tuning.utils.utils import get_prof_dir +from mindspeed.auto_tuning.module.operator.operator import OperatorPerformance +from mindspeed.auto_tuning.module.operator.operator_database import OperatorHistory + + +logger = get_logger('operator_re_profile') + + +def search_operator(working_dir, search_cfg, communication, profile_count, + scale_flag=False): + # After a certain amount of profiling, the rest operators have not been found will be predicted using + # regression method. + executor = ModelExecutor(TorchRunRunner()) + profiling_results = [] + search_cfg_list = [search_cfg] + model_config = communication.model_cfg + seed = 1234 + random.seed(seed) + unsampled_profiling_info = [] + if len(search_cfg_list) > 9: + sampled_profiling_info = random.sample(search_cfg_list, min(9, len(search_cfg_list))) + unsampled_profiling_info = list(set(search_cfg_list) - set(sampled_profiling_info)) + else: + sampled_profiling_info = [search_cfg] + for profiling_config in sampled_profiling_info: + if scale_flag: + profiling_config = scale_para(model_config, communication, profiling_config) + re_profiling_config = SearchConfig() + re_profiling_config.copy_from_config(model_config) + re_profiling_config.num_layers = profiling_config.pipeline_model_parallel_size + re_profiling_config.seq_length = profiling_config.seq_length + re_profiling_config.tensor_model_parallel_size = profiling_config.tensor_model_parallel_size + re_profiling_config.pipeline_model_parallel_size = profiling_config.pipeline_model_parallel_size + re_profiling_config.data_parallel_size = profiling_config.data_parallel_size + re_profiling_config.context_parallel_size = profiling_config.context_parallel_size + re_profiling_config.expert_model_parallel_size = profiling_config.expert_model_parallel_size + re_profiling_config.prepare_for_profiling() + + from mindspeed.auto_tuning.module.hardware import Hardware + res_dir = os.path.join(working_dir, get_prof_dir(re_profiling_config, re_profile=True)) + if not os.path.exists(res_dir): + profile_count[0] += 1 + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + mode = stat.S_IWUSR | stat.S_IRUSR + pkl_filename = os.path.join(working_dir, f'ootb_{Hardware().node_rank}.pkl') + with os.fdopen(os.open(pkl_filename, flags, mode=mode), 'wb') as f: + pickle.dump(re_profiling_config, f) + executor.execute(working_dir=working_dir, output_filename=res_dir, cfg=re_profiling_config, + flag=ExecutorFlag.PROFILE) + profiling_node_parse = GatherNodeProfiling(res_dir) + profiling_res = profiling_node_parse.fuse_node_pkl() + + re_profiling_config.jit_compile = search_cfg.jit_compile + profiling_results.append([re_profiling_config, profiling_res]) + + operator_list = OperatorPerformance(model_config, working_dir=working_dir) + operator_not_found = operator_list.origin_profile_data_list.get_profinfo_list_from_profiling( + profiling_res.forward.operator_info[-1], + forwardflag=1) + operator_not_found_part2 = operator_list.origin_profile_data_list.get_profinfo_list_from_profiling( + profiling_res.backward.operator_info[-1], + forwardflag=0) + operator_not_found.extend(operator_not_found_part2) + logger.debug(f'Total number of operator re profiling is {len(operator_not_found)}') + operator_history_list = [] + for operator in operator_not_found: + operator_history = OperatorHistory(types=operator.type, + accelerator_core=operator.accelerator_core, + input_shape=operator.input_shapes.replace('"', ''), + output_shape=operator.output_shapes.replace('"', ''), + duration=operator.duration_us, + device=Hardware().device_type, + jit=int(model_config.jit_compile), + cann="8.0.RC2.alpha002", + driver="24.1.rc2.b030", + dtype=model_config.dtype.value[0]) + operator_history_list.append(operator_history.convert_to_dict()) + operator_list.db.operator_history_dao.insert_history(operator_history_list) + operator_list.db.operator_profiling_dao.insert_history(operator_history_list) + return unsampled_profiling_info + + +def generate_scale_config(model_config): + scale_config = model_config.copy() + scale_config.num_layers = 256 + + # parameter need to be adjusted + scale_config.tensor_model_parallel_size = 64 + scale_config.num_attention_heads = 512 + scale_config.hidden_size = 65536 + scale_config.ffn_hidden_size = 229376 + scale_config.context_parallel_size = 32 + scale_config.seq_length = 131072 + scale_config.max_position_embeddings = 131072 + scale_config.expert_model_parallel_size = 32 + scale_config.num_experts = 32 + scale_config.pipeline_model_parallel_size = 16 + scale_config.normalize() + return scale_config + + +def scale_para(model_config, communication, search_cfg, test=False): + # load base parallel model config + tp = search_cfg.tensor_model_parallel_size + cp = search_cfg.context_parallel_size + pp = search_cfg.pipeline_model_parallel_size + ep = search_cfg.expert_model_parallel_size + dp = search_cfg.data_parallel_size + + if pp % 2 != 0 and pp != 1: + logger.warning('warning: pp value set is not even.') + + # load hardware config + # use test because in a mock situation, we do not have the real device number + if not test: + num_nodes = communication.hardware.num_nodes + num_devices = communication.hardware.num_devices + else: + num_nodes = 8 + num_devices = 2 * 8 + num_devices_ootb = 16 + + if not test: + # load model config + num_layers = communication.model_cfg.num_layers + num_attention_heads = communication.model_cfg.num_attention_heads + hidden_size = communication.model_cfg.hidden_size + ffn_hidden_size = communication.model_cfg.ffn_hidden_size + num_experts = communication.model_cfg.num_experts + sequence_length = communication.model_cfg.seq_length + else: + # for test only test whether the function works fine + num_layers = model_config.num_layers + num_attention_heads = model_config.num_attention_heads + hidden_size = model_config.hidden_size + ffn_hidden_size = model_config.ffn_hidden_size + num_experts = model_config.num_experts + sequence_length = model_config.seq_length + + scale_factor = 2 # here use default tp value 8 or 4 + # directly scale pp down to 1 + pp_scale_factor = pp + scale_tp, scale_cp, scale_pp, scale_ep, scale_dp = tp, cp, pp, ep, dp + scale_num_layers = num_layers + scale_num_attention_heads = num_attention_heads + scale_hidden_size = hidden_size + scale_ffn_hidden_size = ffn_hidden_size + scale_num_experts = num_experts + scale_sequence_length = sequence_length + scale_space = scale_tp * scale_cp * scale_pp + if pp >= 2: + scale_pp //= pp_scale_factor + scale_num_layers //= num_layers + scale_space = scale_tp * scale_cp * scale_pp + logger.debug(f"Search configs is\n{search_cfg}") + + while scale_space > num_devices_ootb: + logger.debug(f'the scale space is {scale_space}, the scale_tp is {scale_tp}, the scale_cp is {scale_cp}, ' + f'the scale_pp is {scale_pp}, the scale_ep is {scale_ep}') + if scale_cp >= 4: + scale_cp //= scale_factor + scale_sequence_length //= scale_factor + scale_space = scale_tp * scale_cp * scale_pp + continue + if scale_tp >= 4: + scale_tp //= scale_factor + scale_num_attention_heads //= scale_factor + scale_hidden_size //= scale_factor + scale_ffn_hidden_size //= scale_factor + scale_space = scale_tp * scale_cp * scale_pp + continue + + scale_dp = num_devices_ootb // (scale_tp * scale_cp * scale_pp) + while scale_dp * scale_cp < scale_ep: + scale_ep //= scale_factor + scale_num_experts //= scale_factor + + # set up config group + before_scale = SearchConfig() + before_scale.copy_from_config(model_config) + before_scale.tensor_model_parallel_size = scale_tp + before_scale.context_parallel_size = scale_cp + before_scale.pipeline_model_parallel_size = scale_pp + before_scale.num_layers = scale_num_layers + before_scale.num_attention_heads = scale_num_attention_heads + before_scale.expert_model_parallel_size = scale_ep + before_scale.hidden_size = scale_hidden_size + before_scale.ffn_hidden_size = scale_ffn_hidden_size + before_scale.num_experts = scale_num_experts + before_scale.seq_length = scale_sequence_length + before_scale.data_parallel_size = scale_dp + return before_scale diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_shape_analysis.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_shape_analysis.py new file mode 100644 index 000000000..a28361038 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_shape_analysis.py @@ -0,0 +1,357 @@ +from mindspeed.auto_tuning.utils.logger import get_logger + +logger = get_logger('operator_shape_analysis') + + +class DataEp: + def __init__(self): + self.tp = 0 + self.cp = 0 + self.ep = 0 + self.input_shape = "" + self.output_shape = "" + + +def separate_ep(results): + diff_idx_input = [] + diff_idx_output = [] + index_visit = [False] * len(results) + flag = 0 + result = [] + for i, _ in enumerate(results): + input_list = {} + output_list = {} + if index_visit[i]: + continue + index_visit[i] = True + result1 = results[i] + tp1 = str(result1.tp) + cp1 = str(result1.cp) + ep1 = str(result1.ep) + seq_length1 = str(result1.seq_length) + input_list[ep1] = get_default_shape_change(result1.input_shape) + output_list[ep1] = get_default_shape_change(result1.output_shape) + + for j in range(i + 1, len(results)): + if index_visit[j]: + continue + result2 = results[j] + cp2 = str(result2.cp) + tp2 = str(result2.tp) + ep2 = str(result2.ep) + seq_length2 = str(result2.seq_length) + if tp1 != tp2 or cp1 != cp2 or seq_length1 != seq_length2: + continue + index_visit[j] = True + input_list[ep2] = get_default_shape_change(result2.input_shape) + output_list[ep2] = get_default_shape_change(result2.output_shape) + # calculate linear relationship + ep_arr = list(input_list.keys()) + # The first occurrence of ep is recorded, other ep operator shapes directly modify the relevant dimension and + # insert into the dictionary. + if flag == 0: + diff_idx_input = [0] * count_num(input_list.get(str(ep1))) + diff_idx_output = [0] * count_num(output_list.get(str(ep1))) + input_cal_tmp, diff_idx_input = analyze_shape_arr_new(input_list, ep_arr, diff_idx_input, 2) + output_cal_tmp, diff_idx_output = analyze_shape_arr_new(output_list, ep_arr, diff_idx_output, 2) + if len(input_list) != 1: + flag = 1 + else: + input_cal_tmp = modify_by_index(input_list, diff_idx_input, ep_arr, mode=1) + output_cal_tmp = modify_by_index(output_list, diff_idx_output, ep_arr, mode=1) + tmp = DataEp() + tmp.tp = tp1 + tmp.cp = cp1 + tmp.ep = ep1 + tmp.seq_length = seq_length1 + tmp.input_shape = input_cal_tmp + tmp.output_shape = output_cal_tmp + result.append(tmp) + return result + + +def separate_cp_tp(results): + input_shape_dic = {} + output_shape_dic = {} + index_visit = [False] * len(results) + diff_idx_input = [] + diff_idx_output = [] + flag = 0 + for i, _ in enumerate(results): + input_list = {} + output_list = {} + if index_visit[i]: + continue + index_visit[i] = True + result1 = results[i] + cp1 = str(result1.cp) + tp1 = str(result1.tp) + seq_length1 = str(result1.seq_length) + input_list[tp1] = result1.input_shape + output_list[tp1] = result1.output_shape + for j in range(i + 1, len(results)): + if index_visit[j]: + continue + result2 = results[j] + cp2 = str(result2.cp) + tp2 = str(result2.tp) + seq_length2 = str(result2.seq_length) + if cp1 != cp2 or seq_length1 != seq_length2: + continue + index_visit[j] = True + input_list[tp2] = result2.input_shape + output_list[tp2] = result2.output_shape + # calculate linear relationship + tp_arr = list(input_list.keys()) + if set(input_list.keys()) == {'8', '4'}: + for index_i, sublist in enumerate(input_list.get('4')): + for j, value in enumerate(sublist): + check_value = isinstance(value, float) and '.1' in str(value) + if (check_value and index_i < len(input_list.get('8')) + and j < len(input_list.get('4')[index_i])): + input_list.get('8')[index_i][j] = value + # The first occurrence of cp is recorded, other cp operator shapes directly modify the relevant dimension + if flag == 0: + arr_in = input_list.get(str(tp1)) + arr_out = output_list.get(str(tp1)) + diff_idx_input = [0] * count_num(arr_in) + diff_idx_output = [0] * count_num(arr_out) + input_cal_tmp, diff_idx_input = analyze_shape_arr_new(input_list, tp_arr, diff_idx_input, 0) + output_cal_tmp, diff_idx_output = analyze_shape_arr_new(output_list, tp_arr, diff_idx_output, 0) + if len(input_list) != 1: + flag = 1 + else: + input_cal_tmp = modify_by_index(input_list, diff_idx_input, tp_arr, mode=2) + output_cal_tmp = modify_by_index(output_list, diff_idx_output, tp_arr, mode=2) + input_shape_dic[cp1] = input_cal_tmp + output_shape_dic[cp1] = output_cal_tmp + if set(input_shape_dic.keys()) == {'4', '2'}: + for i, sublist in enumerate(input_shape_dic.get('2')): + for j, value in enumerate(sublist): + check_value = isinstance(value, float) and '.4' in str(value) + if (check_value and + i < len(input_shape_dic.get('4')) and j < len(input_shape_dic.get('4')[i])): + input_shape_dic.get('4')[i][j] = value + # calculate linear relationship + cp_arr = list(input_shape_dic.keys()) + input_cal_arr, diff_idx_input = analyze_shape_arr_new(input_shape_dic, cp_arr, diff_idx_input, 1) + output_cal_arr, diff_idx_output = analyze_shape_arr_new(output_shape_dic, cp_arr, diff_idx_output, 1) + + return input_cal_arr, output_cal_arr + + +def analyze_shape_arr_new(input_shape_list, tp_arr, diff, mode=0): + # Data cleaning, removing some invalid data. + input_shape_list, tp_arr = normal_list(input_shape_list, tp_arr) + + # Initialize the result array, initializing values for each position in the shape, defaulting value means unchanged. + result_arr = input_shape_list.get(str(tp_arr[0])) + + # Compare the differences in shape between different TPs, and find the index of the differing columns + diff_idx, diff_arr = analyze_shape_list(input_shape_list, str(tp_arr[0])) + w_arr = [] + num = count_num(result_arr) + if len(diff_idx) != 0 and len(diff) < num: + diff = [0] * num + for i in diff_idx: + if mode == 0: + diff[i] |= 1 + elif mode == 1: + diff[i] += 1 + elif mode == 2: + diff[i] = 1 + """ + tp cp ep + 1 1 1 + Only cut by TP with a suffix of 0.4, only CP is 0.2, only EP is 0.1. + CP + EP binary corresponds to 0.3. + """ + for index, _ in enumerate(diff_idx): + # Calculate and record the pattern of changes based on the different data, with the default tp * shape_x + i = diff_idx[index] + if mode == 2: + w = cal_shape_change_with_ep(diff_arr[index], tp_arr) + else: + w = cal_shape_change_with_tp_cp(diff_arr[index], tp_arr) + flag = 0 + dis = float(float(w) - int(w)) + w = modify_special(w) + if abs(dis - 0.1) < 0.001: + flag = 1 + if diff[i] == 1: + if mode == 0: + if flag == 0: + # Only cut by TP 0.4 + w_arr.append(float(w) + 0.4) + elif flag == 1: + # tp + ep 0.5 + w_arr.append(float(int(w)) + 0.5) + elif mode == 1: + if flag == 0: + # Only cut by CP 0.2 + w_arr.append(float(w) + 0.2) + elif flag == 1: + # cp + ep 0.3 + w_arr.append(float(int(w)) + 0.3) + elif mode == 2: + # ep with suffix 0.1 + w_arr.append(float(w) + 0.1) + elif diff[i] == 2: + if flag == 0: + # tp + cp 0.6 + w_arr.append(float(int(w)) + 0.6) + elif flag == 1: + # tp + cp + ep 0.7 + w_arr.append(float(int(w)) + 0.7) + else: + logger.warning("error") + result_arr = convert_w_to_result_arr(result_arr, diff_idx, w_arr) + return result_arr, diff + + +def get_default_shape_change(param): + rows = param.split(';') + arr = [] + for row in rows: + nums = [] + for num in row.split(','): + if num != '': + nums.append(int(num)) + arr.append(nums) + return arr + + +def analyze_shape_list(input_shape_list, row1_value): + diff_index = [] # Save different column indices + diff_arr = [] # Save different data + # Compare the sublist within each list. + column_index = 0 + + for i in range(len(input_shape_list[row1_value])): + for index_n in range(len(input_shape_list[row1_value][i])): + tmp_list = [] + tmp_list_float = [] + for value in input_shape_list.values(): + tmp_list.append(int(value[i][index_n])) + tmp_list_float.append(value[i][index_n]) + if len(set(tmp_list)) != 1: + diff_arr.append(tmp_list_float) + diff_index.append(column_index) + column_index += 1 + + return diff_index, diff_arr + + +def cal_shape_change_with_tp_cp(y_arr, x_arr): + w_arr = [] + size = len(x_arr) + h = float(y_arr[0] - int(y_arr[0])) + for index in range(0, size): + if abs(h) < 0.001: + h = float(y_arr[index] - int(y_arr[index])) + w_arr.append(int(y_arr[index]) * int(x_arr[index])) + + return w_arr[0] + h + + +def cal_shape_change_with_ep(y_arr, x_arr): + w_arr = [] + size = len(x_arr) + h = float(y_arr[0] - int(y_arr[0])) + for index in range(0, size): + if abs(h) < 0.001: + h = float(y_arr[index] - int(y_arr[index])) + w_arr.append(int(y_arr[index]) / float(x_arr[index])) + + return w_arr[0] + h + + +def convert_w_to_result_arr(result_arr, index_arr, w_arr): + result_list = [] + column_index = 0 + index_index = 0 + for inner_arr in result_arr: + result = [] + for item in inner_arr: + if index_index < len(index_arr) and column_index == index_arr[index_index]: + result.append(float(w_arr[index_index])) + index_index = index_index + 1 + else: + result.append(float(item)) + column_index = column_index + 1 + result_list.append(result) + if len(inner_arr) == 0: + column_index = column_index + 1 + return result_list + + +def check_array_format(arr1, arr2): + if len(arr1) != len(arr2): + return False + for i, _ in enumerate(arr1): + if isinstance(arr1[i], list) and isinstance(arr2[i], list): + if not check_array_format(arr1[i], arr2[i]): + return False + return True + + +def normal_list(input_shape_list, tp_arr): + new_input_shape_list = {} + new_tp_arr = [] + if len(input_shape_list) > 0 and len(tp_arr) > 0: + new_input_shape_list[str(tp_arr[0])] = input_shape_list[str(tp_arr[0])] + new_tp_arr.append(tp_arr[0]) + for index in range(1, len(tp_arr)): + if check_array_format(input_shape_list[str(tp_arr[0])], input_shape_list[str(tp_arr[index])]): + new_input_shape_list[str(tp_arr[index])] = input_shape_list[str(tp_arr[index])] + new_tp_arr.append(tp_arr[index]) + else: + logger.warning(f'Incorrect input_shape_list or tp_arr: {input_shape_list}, {tp_arr}') + + return new_input_shape_list, new_tp_arr + + +def modify_special(w): + result = int(w) + if result == 9016: + result = 9024 + elif result == 1127: + result = 1128 + + return result + + +def count_num(arr): + cnt = 0 + for i in arr: + for _ in i: + cnt += 1 + return cnt + + +def modify_by_index(shape_list, index_diff, tp_arr, mode=0): + # Data cleaning, to remove invalid data elements, such as data that doesn't match the shape + input_shape_list, tp_arr = normal_list(shape_list, tp_arr) + + input_list = shape_list[str(tp_arr[0])] + result_list = [] + i_diff = 0 + column_index = 0 + for arr in input_list: + result = [] + for item in arr: + ans = 0.0 + if column_index < len(index_diff) and index_diff[column_index] == 1: + # 修改 + if mode == 1: + ans = float(int(item) / float(tp_arr[0])) + 0.1 + elif mode == 2: + ans = float(int(item) * float(tp_arr[0])) + 0.4 + i_diff += 1 + else: + ans = float(item) + result.append(float(ans)) + column_index += 1 + result_list.append(result) + + return result_list diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_shape_cal.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_shape_cal.py new file mode 100644 index 000000000..3488b6dee --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/operator/operator_shape_cal.py @@ -0,0 +1,178 @@ +import ast +import math +import numpy as np +from sklearn.linear_model import LinearRegression + +from mindspeed.auto_tuning.config.search_config import SearchConfig + + +def cal_new_shape_new(cal_arr, search_cfg: SearchConfig): + tp = search_cfg.tp + cp = search_cfg.cp + ep = search_cfg.ep or 1 + mbs = search_cfg.mbs + num_experts = search_cfg.num_experts or 1 + cal_arr = ast.literal_eval(cal_arr) + result_arr = [] + base = 0.0001 + mbs_flag = False + if mbs > 1: + mbs_flag = True + for inner_arr in cal_arr: + result = [] + for item in inner_arr: + dis = item - float(int(item)) + if abs(dis - 0) <= base: + result.append(int(item)) + elif abs(dis - 0.1) <= base: + result.append(math.ceil(int(item) * ep / num_experts)) + elif abs(dis - 0.2) <= base and mbs_flag: + result.append(math.ceil(int(item) * mbs / cp)) + elif abs(dis - 0.2) <= base: + result.append(math.ceil(int(item) / cp)) + elif abs(dis - 0.3) <= base and mbs_flag: + result.append(math.ceil(int(item) * mbs / cp * ep / num_experts)) + elif abs(dis - 0.3) <= base: + result.append(math.ceil(int(item) / cp * ep / num_experts)) + elif abs(dis - 0.4) <= base: + result.append(math.ceil(int(item) / tp)) + elif abs(dis - 0.5) <= base: + result.append(math.ceil(int(item) / tp * ep / num_experts)) + elif abs(dis - 0.6) <= base and mbs_flag: + result.append(math.ceil(int(item) * mbs / tp / cp)) + elif abs(dis - 0.6) <= base: + result.append(math.ceil(int(item) / tp / cp)) + elif abs(dis - 0.7) <= base and mbs_flag: + result.append(math.ceil(int(item) * mbs / tp / cp * ep / num_experts)) + elif abs(dis - 0.7) <= base: + result.append(math.ceil(int(item) / tp / cp * ep / num_experts)) + result_arr.append(result) + return result_arr + + +def cal_new_shape_tce(cal_arr, search_cfg: SearchConfig): + result_cal_arr = cal_new_shape_new(cal_arr, search_cfg) + result_str = ';'.join([','.join(map(str, arr)) if arr else '' for arr in result_cal_arr]) + return result_str + + +def mul_shape(shape): + result = 1 + for item in shape: + if item != 0: + result *= item + return result + + +def model_operator_with_shape(history_result_list): + if len(history_result_list) <= 0: + return 0, 0 + x_arr = [] + y_arr = [] + for history in history_result_list: + x_arr.append([cal_operator_flops(history.input_shape, history.output_shape, history.types)]) + y_arr.append([history.duration]) + shape_model_w, shape_model_b = linear_regression(x_arr, y_arr) + return shape_model_w, shape_model_b + + +def cal_operator_flops(input_shape, output_shape, types): + input_shape_arr_before = [] + output_shape_arr = [] + if len(input_shape) < 1 or input_shape == ';': + return 1 + for str_num in input_shape.split(';')[0].split(','): + if str_num == '': + return 1 + else: + input_shape_arr_before.append(int(str_num)) + if len(output_shape) < 1 or output_shape == ';': + return 1 + for str_num in output_shape.split(';')[0].split(','): + if str_num == '': + return 1 + else: + output_shape_arr.append(int(str_num)) + # other operator flops + x_item = mul_shape(input_shape_arr_before) + + # FLOPs(BatchMatMul) = b*x*y*n; [b, x, n] * [b, n, y] == [b, x, y] + if types in ['BatchMatMul']: + x_item = mul_shape(output_shape_arr) + if input_shape_arr_before[1] in output_shape_arr: + x_item *= input_shape_arr_before[2] + else: + x_item *= input_shape_arr_before[1] + + # FLOPs(MatMul) = x*y*n; [x, n] * [n, y] == [x, y] + if types in ['MatMul', 'MatMulCommon']: + input_shape_arr_after = [int(str_num) for str_num in input_shape.split(';')[1].split(',')] + x_item = 2 * mul_shape(output_shape_arr) + if input_shape_arr_before[0] in output_shape_arr: + x_item *= input_shape_arr_before[1] + else: + x_item *= input_shape_arr_before[0] + # The input matrix A needs to be transposed, resulting in additional FLOPs. + if output_shape_arr[0] != input_shape_arr_before[0]: + x_item += 2 * mul_shape(input_shape_arr_before) + # The input matrix B needs to be transposed, resulting in additional FLOPs. + if output_shape_arr[1] != input_shape_arr_after[1]: + x_item += 2 * mul_shape(input_shape_arr_after) + + if types in ['Mul', 'MulAiCore', 'ConcatD']: + x_item = 0 + str_arr = input_shape.split(';') + for arr in str_arr: + if len(arr) > 0: + int_arr = [int(str_num) for str_num in arr.split(',')] + x_item += mul_shape(int_arr) + + if types in ['Slice', 'SliceAiCore']: + x_item = 0 + str_arr = output_shape.split(';') + for arr in str_arr: + if len(arr) > 0: + int_arr = [int(str_num) for str_num in arr.split(',')] + x_item += mul_shape(int_arr) + + if types in ['FlashAttentionScore', 'FlashAttentionScoreGrad']: + x_item = mul_shape(input_shape_arr_before) + input_shape_arr_after_flash = [] + for str_num in input_shape.split(';')[1].split(','): + if str_num != '': + input_shape_arr_after_flash.append(int(str_num)) + x_tmp = input_shape_arr_after_flash[0] * x_item + x_item += x_tmp + + return x_item + + +def cal_operator_duration_with_shape(shape_model_w, shape_model_b, flops): + result_duration = float(shape_model_w) * flops + float(shape_model_b) + if result_duration < 0: + return 0 + return result_duration + + +def model_operator_with_tp(operator_notes_index_list): + """ + For operators with the same TP and index-name, the duration decreases linearly with TP, duration ~ w / tp. + Calculate the proportion of TP as a1 and the proportion of CP as a2. + The final result is d = model_w_tp / TP + model_w_cp / CP. + """ + result_tp = 0 + for operator_notes_index in operator_notes_index_list: + result_tp = result_tp + operator_notes_index.tp * operator_notes_index.duration + model_w_tp = result_tp / len(operator_notes_index_list) + + return model_w_tp, 0 + + +def linear_regression(x, y): + x = np.array(x) + y = np.array(y) + model = LinearRegression() + model.fit(x, y) + w = model.coef_[0] + b = model.intercept_ + return w[0], b[0] diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/__init__.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/__init__.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_communication_parse.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_communication_parse.py new file mode 100644 index 000000000..bfc42ce06 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_communication_parse.py @@ -0,0 +1,284 @@ +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_config import ( + TensorParallelCommunication, + DataParallelCommunication, + PipelineParallelCommunication, + ContextParallelCommunication, + ExpertParallelCommunication, + ProfilingConfig +) +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_constant import NumberConstant, SpecialKeyName + + +class AnalyseCommunicationMsg(ProfilingConfig): + """ Analyse communication massage. """ + + def __init__(self, search_cfg, communication_details, kernel_details): + super(AnalyseCommunicationMsg, self).__init__(search_cfg) + self.collective_hcom = communication_details.get('collective', {}) + self.p2p_hcom = communication_details.get('p2p', {}) + self.kernel_details = kernel_details + self.tensor_parallel_comm = TensorParallelCommunication() + self.pipeline_parallel_comm = PipelineParallelCommunication() + self.data_parallel_comm = DataParallelCommunication() + self.context_parallel_comm = ContextParallelCommunication() + self.expert_parallel_comm = ExpertParallelCommunication() + self.pp_stream_id = None + self.tp_stream_id = None + self.overlap_record = {} + self.overlap_list = [] + + @classmethod + def is_send_or_recv_op(cls, op_name: str) -> bool: + return 'send' in op_name or 'receive' in op_name + + def get_hcom_and_hcom_overlap(self, index, info): + current_name = self.kernel_details[index][SpecialKeyName.NAME] + next_name = self.kernel_details[index + 1][SpecialKeyName.NAME] + if current_name in self.overlap_list or next_name in self.overlap_list: + return + + if index + 1 >= len(self.kernel_details): + return + + hcom_time1 = float(info[SpecialKeyName.DURATION_US]) + hcom_time2 = float(self.kernel_details[index + 1][SpecialKeyName.DURATION_US]) + shorter_hcom = current_name if hcom_time1 <= hcom_time2 else next_name + self.overlap_list.append(shorter_hcom) + + def get_compute_and_hcom_overlap(self, index, info): + overlap_record = {} + overlap_list = [] + overlap_time = float(info[SpecialKeyName.DURATION_US]) + op1 = self.kernel_details[index + 1] + op2 = self.kernel_details[index + 2] if index + 2 < len(self.kernel_details) else None + op1_name = op1[SpecialKeyName.NAME] + hcom1_duration = float(op1[SpecialKeyName.DURATION_US]) + + if op2 and op2[SpecialKeyName.ACCELERATOR_CORE] == 'HCCL': + op2_name = op2[SpecialKeyName.NAME] + hcom2_duration = float(op2[SpecialKeyName.DURATION_US]) + + if hcom2_duration <= hcom1_duration: + overlap_list.append(op2_name) + overlap_record[op1_name] = min(overlap_time, hcom1_duration) + else: + overlap_list.append(op1_name) + overlap_record[op1_name] = min(overlap_time, hcom2_duration) + else: + overlap_record[op1_name] = min(overlap_time, hcom1_duration) + + return overlap_record, overlap_list + + def is_compute_and_hcom_overlap(self, index, row): + if index + 1 >= len(self.kernel_details): + return False + op1 = self.kernel_details[index + 1] + if op1[SpecialKeyName.ACCELERATOR_CORE] != 'HCCL' or row[SpecialKeyName.ACCELERATOR_CORE] == 'HCCL': + return False + start_time = float(row[SpecialKeyName.START_TIME_US]) + duration = float(row[SpecialKeyName.DURATION_US]) + op1_start_time = float(op1[SpecialKeyName.START_TIME_US]) + return op1_start_time < start_time + duration + + def is_hcom_hcom_overlap(self, index, row): + if index + 1 >= len(self.kernel_details): + return False + op1 = self.kernel_details[index + 1] + if row[SpecialKeyName.ACCELERATOR_CORE] != 'HCCL' or op1[SpecialKeyName.ACCELERATOR_CORE] != 'HCCL': + return False + start_time = float(row[SpecialKeyName.START_TIME_US]) + duration = float(row[SpecialKeyName.DURATION_US]) + op1_start_time = float(op1[SpecialKeyName.START_TIME_US]) + return op1_start_time < start_time + duration + + def analyse_parallel_comm(self): + self._analyse_communication_overlap() + min_expert_time = None + for name, info in self.collective_hcom.items(): + if 'hcom' not in name: + continue + if self.is_send_or_recv_op(name): + self._analyse_pp_comm(name, info) + continue + if 'alltoall' in name: + min_expert_time = self._analyse_ep_comm(name, info, min_expert_time) + continue + if self.search_cfg.tp > 1: + self._analyse_tp_comm(name, info) + self._analyse_dp_comm(name, info) + if self.search_cfg.pp > 1 and self.search_cfg.cp > 1: + self.pp_stream_id = self._analyse_pp_cp_process_id() + else: + self.pp_stream_id = None + for name, info in self.p2p_hcom.items(): + if 'hcom' not in name: + continue + hcom_name = name.split('@')[0] + stream_id = hcom_name.split('_')[3] + if (self.pp_stream_id and self.pp_stream_id == stream_id) or self.search_cfg.cp == 1: + self._analyse_pp_comm(name, info) + else: + self._analyse_cp_comm(name, info) + + self._get_zero1_hcom() + if min_expert_time: + self.expert_parallel_comm.min_comm_time_ms = len(self.expert_parallel_comm.details) * min_expert_time + self.expert_parallel_comm.wait_time_ms = self.expert_parallel_comm.total_time_ms - \ + self.expert_parallel_comm.min_comm_time_ms + + def get_tp_comm(self): + return self.tensor_parallel_comm + + def get_pp_comm(self): + return self.pipeline_parallel_comm + + def get_dp_comm(self): + return self.data_parallel_comm + + def get_cp_comm(self): + return self.context_parallel_comm + + def get_ep_comm(self): + return self.expert_parallel_comm + + def is_tp_communication(self, name): + return "reduceScatter" in name or "allGather" in name + + def _accumulate_communication_stats(self, comm_obj, name, info): + if isinstance(comm_obj, TensorParallelCommunication) and not self.is_tp_communication(name): + comm_obj.details.append({name: info}) + return + comm_obj.total_time_ms += info[SpecialKeyName.ELAPSE_TIME_MS] + comm_obj.wait_time_ms += (info[SpecialKeyName.WAIT_TIME_MS] + info[SpecialKeyName.IDLE_TIME_MS]) + hcom_name = name.split('@')[0] + if isinstance(comm_obj, TensorParallelCommunication): + if hcom_name in self.overlap_record: + comm_obj.overlap_time_ms += self.overlap_record[hcom_name] / NumberConstant.CONVERSION_TIME + comm_obj.fixed_wait_time_ms += (info[SpecialKeyName.WAIT_TIME_MS] + info[SpecialKeyName.IDLE_TIME_MS]) + else: + comm_obj.fixed_time_ms += info[SpecialKeyName.ELAPSE_TIME_MS] + elif hcom_name in self.overlap_record: + comm_obj.overlap_time_ms += self.overlap_record[hcom_name] / NumberConstant.CONVERSION_TIME + comm_obj.details.append({name: info}) + + def _analyse_pp_cp_process_id(self): + pp_and_cp_send_id = [] + pp_and_cp_receive_id = [] + pp_stream_id = None + for name, _ in self.p2p_hcom.items(): + if 'hcom' not in name: + continue + hcom_name = name.split('@')[0] + stream_id = hcom_name.split('_')[3] + if 'send' in name: + if len(pp_and_cp_receive_id) > 1 and stream_id in pp_and_cp_receive_id: + pp_stream_id = stream_id + if stream_id not in pp_and_cp_send_id: + pp_and_cp_send_id.append(stream_id) + elif 'receive' in name: + if len(pp_and_cp_send_id) > 1 and stream_id in pp_and_cp_send_id: + pp_stream_id = stream_id + if stream_id not in pp_and_cp_receive_id: + pp_and_cp_receive_id.append(stream_id) + if pp_stream_id is not None: + break + return pp_stream_id + + def _dp_comm_with_mlp_and_attention(self, mlp_process_id, process_id, name, info): + if mlp_process_id and process_id == mlp_process_id: + self.data_parallel_comm.mlp_zero_time_ms += info[SpecialKeyName.ELAPSE_TIME_MS] + if 'allGather' in name: + self.data_parallel_comm.mlp_ag_time_ms += info[SpecialKeyName.ELAPSE_TIME_MS] + if 'reduceScatter' in name: + self.data_parallel_comm.mlp_rs_time_ms += info[SpecialKeyName.ELAPSE_TIME_MS] + else: + self.data_parallel_comm.other_zero_time_ms += info[SpecialKeyName.ELAPSE_TIME_MS] + if 'allGather' in name: + self.data_parallel_comm.other_ag_time_ms += info[SpecialKeyName.ELAPSE_TIME_MS] + if 'reduceScatter' in name: + self.data_parallel_comm.other_rs_time_ms += info[SpecialKeyName.ELAPSE_TIME_MS] + + def _get_zero1_hcom(self): + mlp_process_id = None + if not self.data_parallel_comm.details: + return + if 'allGather' in list(self.data_parallel_comm.details[-1].keys())[0] \ + and (self.search_cfg.cp * self.search_cfg.dp / self.search_cfg.ep != 1): + mlp_process_id = list(self.data_parallel_comm.details[-1].keys())[0].split('_')[3] + for hcom in self.data_parallel_comm.details: + for name, info in hcom.items(): + process_id = name.split('_')[3] + if 'allReduce' in name and self.search_cfg.zero1: + continue + self._dp_comm_with_mlp_and_attention(mlp_process_id, process_id, name, info) + + def _analyse_tp_comm(self, name, info): + hcom_name = name.split('@')[0] + if hcom_name in self.overlap_list: + return + if ('reduceScatter' in hcom_name or 'broadcast' in hcom_name) and not self.tp_stream_id: + self.tp_stream_id = name.split('_')[3] + if self.search_cfg.tp > 1 and self.tp_stream_id and name.split('_')[3] == self.tp_stream_id: + self._accumulate_communication_stats(self.tensor_parallel_comm, name, info) + + def _analyse_pp_comm(self, name, info): + self._accumulate_communication_stats(self.pipeline_parallel_comm, name, info) + + def _analyse_dp_comm(self, name, info): + hcom_name = name.split('@')[0] + stream_id = hcom_name.split('_')[3] + if stream_id != self.tp_stream_id and hcom_name.split('_')[1] in ["reduceScatter", "allGather"]: + self._accumulate_communication_stats(self.data_parallel_comm, name, info) + + def _analyse_cp_comm(self, name, info): + self._accumulate_communication_stats(self.context_parallel_comm, name, info) + + cp_vector_time = self._analyse_cp_vector_time() + self.context_parallel_comm.vector_time_ms = cp_vector_time + + def _analyse_ep_comm(self, name, info, min_expert_time): + if not min_expert_time: + min_expert_time = info[SpecialKeyName.ELAPSE_TIME_MS] + else: + min_expert_time = min(min_expert_time, info[SpecialKeyName.ELAPSE_TIME_MS]) + self.expert_parallel_comm.total_time_ms += info[SpecialKeyName.ELAPSE_TIME_MS] + self.expert_parallel_comm.details.append({name: info}) + return min_expert_time + + def _analyse_communication_overlap(self): + for index, row in enumerate(self.kernel_details): + if "Name" not in row or "Type" not in row: + continue + if self.is_compute_and_hcom_overlap(index, row): + per_overlap_record, per_overlap_list = self.get_compute_and_hcom_overlap(index, row) + self.overlap_record = {**self.overlap_record, **per_overlap_record} + self.overlap_list.extend(per_overlap_list) + elif self.is_hcom_hcom_overlap(index, row): + self.get_hcom_and_hcom_overlap(index, row) + + def _cp_vector_operator_overlap(self, index, row): + if index >= len(self.kernel_details) - 1: + return False + is_hccl = row[SpecialKeyName.ACCELERATOR_CORE] == 'HCCL' + is_ai_vector_core = self.kernel_details[index + 1][SpecialKeyName.ACCELERATOR_CORE] == 'AI_VECTOR_CORE' + is_time_overlap = float(self.kernel_details[index + 1][SpecialKeyName.START_TIME_US]) < float( + row[SpecialKeyName.START_TIME_US]) + float(row[SpecialKeyName.DURATION_US]) + is_overlap = is_hccl and is_ai_vector_core and is_time_overlap + if is_overlap and self.is_send_or_recv_op(row[SpecialKeyName.NAME]): + return True + return False + + def _analyse_cp_vector_time(self): + is_cp_vector = False + total_cp_vector = 0 + for index, row in enumerate(self.kernel_details): + if "Name" not in row or "Type" not in row: + continue + is_ai_vector_core = row[SpecialKeyName.ACCELERATOR_CORE] == 'AI_VECTOR_CORE' + if is_cp_vector and is_ai_vector_core and 'Grad' not in row[SpecialKeyName.NAME]: + total_cp_vector += float(row[SpecialKeyName.DURATION_US]) / NumberConstant.CONVERSION_TIME + elif is_cp_vector and row[SpecialKeyName.ACCELERATOR_CORE] != 'AI_VECTOR_CORE': + is_cp_vector = False + if self._cp_vector_operator_overlap(index, row): + is_cp_vector = True + return total_cp_vector diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_config.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_config.py new file mode 100644 index 000000000..5850c8db9 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_config.py @@ -0,0 +1,259 @@ +from copy import deepcopy +from typing import List +from mindspeed.auto_tuning.module.hardware import Hardware +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_constant import NumberConstant + + +class ProfilingConfig: + """ + Basic parameters of profiling + """ + + def __init__(self, search_cfg=None, args=None): + self.search_cfg = deepcopy(search_cfg) + self.per_micro_layer = search_cfg.num_layers // search_cfg.pp + self.vpp = search_cfg.vpp if search_cfg.vpp else 1 + self.micro_num = search_cfg.gbs // (search_cfg.mbs * search_cfg.dp) * self.vpp + self.stage_id = 0 + + # hardware config + if args: + if isinstance(args, Hardware): + self.nodes = args.num_nodes + self.devices_per_node = args.devices_per_node + self.node_rank = args.node_rank + else: + self.nodes = args.nnodes + self.devices_per_node = args.nproc_per_node + self.node_rank = args.node_rank + else: + self.nodes = 1 + self.devices_per_node = 8 + self.node_rank = 0 + + def search_first_operator_idx_for_per_layer_enable_pp_last_stage(self, fw_norm_index, bw_norm_index): + fw_layer_start = [] + bw_layer_end = [] + recompute_fw = [] + warm_micro_num = self._calculate_warm_micro_num() + bw_idx = 0 + fw_idx = 0 + for micro in range(self.micro_num): + i = micro // (self.vpp * self.search_cfg.pp) + fw_layer_start.append([fw_norm_index[fw_idx]]) + fw_idx = self._calculate_fw_idx(fw_idx, i, micro) + bw_idx = self._calculate_bw_idx(bw_idx, i, micro) + bw_layer_end.append([bw_norm_index[bw_idx - 1]]) + if self.search_cfg.is_full_recompute: + if warm_micro_num <= micro + 1: + recompute_fw.append([fw_norm_index[fw_idx]]) + fw_idx += NumberConstant.FW_NORM_OP_NUM_ENABLE_PP_OTHER_STAGE + if micro == self.micro_num - 1: + for i in range(warm_micro_num - 1): + fw_idx += i * NumberConstant.FW_NORM_OP_NUM_ENABLE_PP_OTHER_STAGE + recompute_fw.append([fw_norm_index[fw_idx]]) + if self.vpp > 1: + fw_per_micro_opt_num = fw_layer_start[1][0] - fw_layer_start[0][0] + else: + fw_per_micro_opt_num = fw_norm_index[2] - fw_norm_index[0] + bw_per_micro_opt_num = bw_norm_index[2] - bw_norm_index[0] + return fw_layer_start, bw_layer_end, recompute_fw, fw_per_micro_opt_num, bw_per_micro_opt_num + + def search_first_operator_idx_for_per_layer_enable_pp_other_stage(self, fw_norm_index, bw_norm_index): + fw_layer_start = [] + bw_layer_end = [] + recompute_fw = [] + fw_norm_index = [fw_norm_index[i * 2: (i + 1) * 2] for i in range(len(fw_norm_index) // 2)] + bw_norm_index = [bw_norm_index[i * 2: (i + 1) * 2] for i in range(len(bw_norm_index) // 2)] + warm_micro_num = self._calculate_warm_micro_num() + + for micro in range(self.micro_num): + if micro < warm_micro_num: + fw_layer_start.append([fw_norm_index[micro][0]]) + else: + fw_layer_start.append([fw_norm_index[micro + micro - warm_micro_num + 1][0]]) + recompute_fw.append([fw_norm_index[micro + micro - warm_micro_num][0]]) + if micro == self.micro_num - 1: + recompute_fw.extend( + [[index[0]] for index in fw_norm_index[len(fw_norm_index) - warm_micro_num:]]) + bw_layer_end.append([bw_norm_index[micro][-1]]) + if self.search_cfg.is_full_recompute: + if len(recompute_fw) != self.micro_num: + for i in range(len(recompute_fw), self.micro_num): + recompute_fw.append([fw_norm_index[i + self.micro_num][0]]) + bw_per_micro_opt_num = bw_norm_index[0][-1] - recompute_fw[0][0] + else: + bw_per_micro_opt_num = bw_norm_index[1][0] - bw_norm_index[0][0] + fw_per_micro_opt_num = fw_layer_start[1][0] - fw_layer_start[0][0] + return fw_layer_start, bw_layer_end, recompute_fw, fw_per_micro_opt_num, bw_per_micro_opt_num + + def search_first_operator_idx_for_per_layer_enable_pp(self, fw_norm_index, bw_norm_index): + if self.stage_id == self.search_cfg.pp - 1: + return self.search_first_operator_idx_for_per_layer_enable_pp_last_stage(fw_norm_index, bw_norm_index) + else: + return self.search_first_operator_idx_for_per_layer_enable_pp_other_stage(fw_norm_index, bw_norm_index) + + def search_first_operator_idx_for_per_layer_disable_pp(self, fw_norm_index, bw_norm_index): + fw_layer_start = [] + bw_layer_end = [] + recompute_fw = [] + if self.search_cfg.is_full_recompute: + fw_micro_rms_num = len(fw_norm_index) // self.micro_num + + fw_norm_index = [fw_norm_index[fw_micro_rms_num * i:fw_micro_rms_num * (i + 1)] + for i in range(self.micro_num)] + bw_micro_rms_num = len(bw_norm_index) // self.micro_num + + bw_norm_index = [bw_norm_index[bw_micro_rms_num * i:bw_micro_rms_num * (i + 1)] + for i in range(self.micro_num)] + fw_per_micro_opt_num = fw_norm_index[0][2] - fw_norm_index[0][0] + bw_per_micro_opt_num = bw_norm_index[0][2] - bw_norm_index[0][0] + + for micro in range(self.micro_num): + fw_layer_start.append([fw_norm_index[micro][0]]) + bw_layer_end.append([bw_norm_index[micro][-1]]) + recompute_fw.append([fw_norm_index[micro][3]]) + else: + fw_per_micro_opt_num = fw_norm_index[2] - fw_norm_index[0] + bw_per_micro_opt_num = bw_norm_index[2] - bw_norm_index[0] + + for micro in range(self.micro_num): + fw_layer_start.append([fw_norm_index[3 * micro]]) + bw_layer_end.append([bw_norm_index[3 * (micro + 1) - 1]]) + return fw_layer_start, bw_layer_end, recompute_fw, fw_per_micro_opt_num, bw_per_micro_opt_num + + def _calculate_warm_micro_num(self): + if self.vpp != 1: + return self.search_cfg.pp * (self.vpp - 1) + 1 + (self.search_cfg.pp - self.stage_id - 1) * 2 + else: + return self.search_cfg.pp - self.stage_id + + def _calculate_fw_idx(self, fw_idx, i, micro): + if i * (self.vpp * self.search_cfg.pp) <= micro < i * ( + self.vpp * self.search_cfg.pp) + self.search_cfg.pp and self.vpp > 1: + fw_idx += NumberConstant.FW_NORM_OP_NUM_ENABLE_PP_OTHER_STAGE + else: + fw_idx += NumberConstant.FW_NORM_OP_NUM_ENABLE_PP_LAST_STAGE + return fw_idx + + def _calculate_bw_idx(self, bw_idx, i, micro): + if i * (self.vpp * self.search_cfg.pp) <= micro < i * ( + self.vpp * self.search_cfg.pp) + self.search_cfg.pp or self.vpp == 1: + bw_idx += NumberConstant.FW_NORM_OP_NUM_ENABLE_PP_LAST_STAGE + else: + bw_idx += NumberConstant.FW_NORM_OP_NUM_ENABLE_PP_OTHER_STAGE + return bw_idx + + +class ProfilingLayerInfo: + def __init__(self): + self.time = [] + self.start_memory = [] + self.peak_memory = [] + self.reserved_memory = [] + self.operator_info = [] + self.communication_info = [] + + def extend_attr(self, new_layer): + for attr_name in self.__dict__.keys(): + obj_attr = getattr(self, attr_name) + if isinstance(obj_attr, list): + target_attr = getattr(new_layer, attr_name, []) + obj_attr.extend(target_attr) + setattr(self, attr_name, obj_attr) + + +class ProfilingModelInfo: + def __init__(self): + self.embedding = ProfilingLayerInfo() + self.forward = ProfilingLayerInfo() + self.loss = ProfilingLayerInfo() + self.backward = ProfilingLayerInfo() + self.optimizer = ProfilingLayerInfo() + self.hccl_memory = [] + self.cann_and_driver_memory = [] + self.communication_matrix = [] + self.context_parallel_comm = [] + self.pipeline_parallel_comm = [] + self.data_parallel_comm = [] + self.tensor_parallel_comm = [] + self.expert_parallel_comm = [] + self.search_cfg = None + self.stage_id = 0 + self.mc2_total_time = [] + self.matmul_total_time = [] + + def extend_stage_info(self, new_model): + for attr_name in self.__dict__.keys(): + obj_attr = getattr(self, attr_name) + if isinstance(obj_attr, list): + target_attr = getattr(new_model, attr_name, []) + obj_attr.extend(target_attr) + setattr(self, attr_name, obj_attr) + elif isinstance(obj_attr, ProfilingLayerInfo): + target_attr = getattr(new_model, attr_name, None) + obj_attr.extend_attr(target_attr) + + +class BaseParallelCommunication: + """ + Basic parallel communication information. + """ + + def __init__(self): + self.total_time_ms: float = 0.0 + self.wait_time_ms: float = 0.0 + self.overlap_time_ms: float = 0.0 + self.details: List[dict] = [] + + +class ExpertParallelCommunication(BaseParallelCommunication): + """ + Expert parallel communication + """ + + def __init__(self): + super(ExpertParallelCommunication, self).__init__() + self.min_comm_time_ms: float = 0.0 + + +class TensorParallelCommunication(BaseParallelCommunication): + """ + Tensor parallel communication + """ + + def __init__(self): + super(TensorParallelCommunication, self).__init__() + self.fixed_time_ms: float = 0.0 + self.fixed_wait_time_ms: float = 0.0 + + +class ContextParallelCommunication(BaseParallelCommunication): + """ + Context parallel communication + """ + + def __init__(self): + super(ContextParallelCommunication, self).__init__() + self.vector_time_ms: float = 0.0 + + +class DataParallelCommunication(BaseParallelCommunication): + """ + Data parallel communication + """ + + def __init__(self): + super(DataParallelCommunication, self).__init__() + self.mlp_zero_time_ms: float = 0.0 + self.mlp_ag_time_ms: float = 0.0 + self.mlp_rs_time_ms: float = 0.0 + self.other_zero_time_ms: float = 0.0 + self.other_ag_time_ms: float = 0.0 + self.other_rs_time_ms: float = 0.0 + + +class PipelineParallelCommunication(BaseParallelCommunication): + """ + Pipeline parallel communication + """ diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_constant.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_constant.py new file mode 100644 index 000000000..bd731b042 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_constant.py @@ -0,0 +1,52 @@ +class NumberConstant: + """ + Constant for number + """ + CONVERSION_TIME = 1000.0 + FW_NORM_OP_NUM_DISABLE_PP = 3 + BW_NORM_OP_NUM_DISABLE_PP = 3 + FW_NORM_OP_NUM_ENABLE_PP_LAST_STAGE = 3 + FW_NORM_OP_NUM_ENABLE_PP_OTHER_STAGE = 2 + + @property + def conversion_time(self: any) -> float: + """ + time conversion us to ms + :return: time conversion + """ + return self.CONVERSION_TIME + + +class OperatorDetails: + def __init__(self, name, type_, input_shapes, output_shapes, duration_us, wait_time_us, accelerator_core): + self.name: str = name + self.type: str = type_ + self.input_shapes: str = input_shapes + self.output_shapes: str = output_shapes + self.duration_us: float = duration_us + self.wait_time_us: float = wait_time_us + self.accelerator_core: str = accelerator_core + + +class SpecialOperatorName: + EMBEDDING = 'embedding' + FW_RMS_NORM_TYPE = 'RmsNorm' + BW_RMS_NORM_TYPE = 'RmsNormGrad' + FW_LAYER_NORM_TYPE = 'LayerNormV3WithImplMode' + BW_LAYER_NORM_TYPE = 'LayerNormBetaGammaBackpropV2' + RMS_NORM = 'rms_norm' + LAYER_NORM = 'layer_norm' + BACKWARD = 'backward' + + +class SpecialKeyName: + NAME = 'Name' + COMPONENT = 'Component' + TOTAL_RESERVED = 'Total Reserved(MB)' + ALLOCATED_MEMORY = 'Allocation Total Allocated(MB)' + ACCELERATOR_CORE = 'Accelerator Core' + DURATION_US = 'Duration(us)' + START_TIME_US = 'Start Time(us)' + ELAPSE_TIME_MS = 'Elapse Time(ms)' + WAIT_TIME_MS = 'Wait Time(ms)' + IDLE_TIME_MS = 'Idle Time(ms)' diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_memory_parse.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_memory_parse.py new file mode 100644 index 000000000..154a5777d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_memory_parse.py @@ -0,0 +1,143 @@ +from typing import List + +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_meta_parse import StructureAnalyseTool +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_constant import SpecialOperatorName +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_config import ProfilingConfig +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_constant import SpecialKeyName + + +class AnalyseMemoryMsg(ProfilingConfig): + """ Analyse memory massage. """ + + def __init__(self, rank_file_path, search_cfg, memory_details, stage_id=0): + super(AnalyseMemoryMsg, self).__init__(search_cfg) + self._rank_file_path = rank_file_path + self._memory_details = memory_details + self._update_norm_op() + self.fw_memory_indices: List[List[int]] + self.bw_memory_indices: List[List[int]] + self.fw_memory_per_micro_opt_num: int + self.bw_memory_per_micro_opt_num: int + self.stage_id = stage_id + + @staticmethod + def compare_memory(row, start_memory, peak_memory): + """compare memory""" + if start_memory == 0: + start_memory = float(row[SpecialKeyName.ALLOCATED_MEMORY]) + peak_memory = max(peak_memory, float(row[SpecialKeyName.ALLOCATED_MEMORY])) + return start_memory, peak_memory + + @staticmethod + def analyse_cann_and_driver(memory_record_details): + app_mem = 0 + pta_mem = None + for row in memory_record_details: + if row[SpecialKeyName.COMPONENT] == 'APP': + app_mem = row[SpecialKeyName.TOTAL_RESERVED] + elif not pta_mem and row[SpecialKeyName.COMPONENT] == 'PTA': + pta_mem = row[SpecialKeyName.TOTAL_RESERVED] + if app_mem and pta_mem: + break + return [float(app_mem) - float(pta_mem)] + + def update_norm_indices(self): + fw_memory_indices, bw_memory_indices = self._analyse_norm_op() + if self.search_cfg.pp > 1: + self.fw_memory_indices, \ + self.bw_memory_indices, \ + recompute_fw, \ + self.fw_memory_per_micro_opt_num, \ + self.bw_memory_per_micro_opt_num = \ + self.search_first_operator_idx_for_per_layer_enable_pp(fw_memory_indices, bw_memory_indices) + else: + self.fw_memory_indices, \ + self.bw_memory_indices, \ + recompute_fw, \ + self.fw_memory_per_micro_opt_num, \ + self.bw_memory_per_micro_opt_num = \ + self.search_first_operator_idx_for_per_layer_disable_pp(fw_memory_indices, bw_memory_indices) + + def analyse_embedding(self): + em_start_memory, em_peak_memory = 0, 0 + if self.stage_id != 0: + return [em_start_memory], [em_peak_memory] + embedding_start_idx = 0 + for idx, msg in enumerate(self._memory_details[1:], start=1): + op_name = msg[SpecialKeyName.NAME] + if self.norm_op in op_name: + break + if SpecialOperatorName.EMBEDDING in op_name: + embedding_start_idx = idx + em_start_memory, em_peak_memory = self.compare_memory(self._memory_details[idx - 1], + em_start_memory, em_peak_memory) + if idx > embedding_start_idx != 0: + em_start_memory, em_peak_memory = self.compare_memory(msg, em_start_memory, em_peak_memory) + + return [em_start_memory], [em_peak_memory] + + def analyse_forward(self): + fw_start_memory = [0.0 for _ in range(self.micro_num)] + fw_peak_memory = [0.0 for _ in range(self.micro_num)] + for micro in range(self.micro_num): + self.fw_memory_indices[micro].append( + self.fw_memory_indices[micro][-1] + self.fw_memory_per_micro_opt_num - 1) + fw_start_memory[micro] = float( + self._memory_details[self.fw_memory_indices[micro][0]][SpecialKeyName.ALLOCATED_MEMORY]) + for msg in self._memory_details[self.fw_memory_indices[micro][0]: self.fw_memory_indices[micro][-1]]: + fw_start_memory[micro], fw_peak_memory[micro] = \ + self.compare_memory(msg, fw_start_memory[micro], fw_peak_memory[micro]) + + return fw_start_memory, fw_peak_memory + + def analyse_loss(self): + ls_start_memory, ls_peak_memory = 0, 0 + if self.stage_id != self.search_cfg.pp - 1: + return [ls_start_memory], [ls_peak_memory] + for idx, msg in enumerate( + self._memory_details[self.fw_memory_indices[0][-1] + 1: self.bw_memory_indices[0][0]]): + if 'norm' in self._memory_details[idx + 1 + self.fw_memory_indices[0][-1] + 1][SpecialKeyName.NAME]: + continue + ls_start_memory, ls_peak_memory = self.compare_memory(msg, ls_start_memory, ls_peak_memory) + return [ls_start_memory], [ls_peak_memory] + + def analyse_backward(self): + bw_start_memory = [0.0 for _ in range(self.micro_num)] + bw_peak_memory = [0.0 for _ in range(self.micro_num)] + for micro in range(self.micro_num): + self.bw_memory_indices[micro].insert(0, + self.bw_memory_indices[micro][-1] - self.bw_memory_per_micro_opt_num) + bw_start_memory[micro] = float( + self._memory_details[self.bw_memory_indices[micro][0]][SpecialKeyName.ALLOCATED_MEMORY]) + for msg in self._memory_details[self.bw_memory_indices[micro][0]: self.bw_memory_indices[micro][-1]]: + bw_start_memory[micro], bw_peak_memory[micro] = \ + self.compare_memory(msg, bw_start_memory[micro], bw_peak_memory[micro]) + + return bw_start_memory, bw_peak_memory + + def analyse_optimizer(self): + op_start_memory, op_peak_memory = 0, 0 + for msg in self._memory_details[self.bw_memory_indices[-1][-1] + 1:]: + op_start_memory, op_peak_memory = self.compare_memory(msg, op_start_memory, op_peak_memory) + return [op_start_memory], [op_peak_memory] + + def _analyse_norm_op(self): + fw_memory_indices, bw_memory_indices = [], [] + for index, row in enumerate(self._memory_details[1:], start=1): + if self.norm_op in self._memory_details[index - 1][SpecialKeyName.NAME]: + continue + if self.norm_op in row[SpecialKeyName.NAME] \ + and SpecialOperatorName.BACKWARD not in row[SpecialKeyName.NAME]: + fw_memory_indices.append(index) + elif self.norm_op in row[SpecialKeyName.NAME] \ + and SpecialOperatorName.BACKWARD in row[SpecialKeyName.NAME]: + bw_memory_indices.append(index) + + return fw_memory_indices, bw_memory_indices + + def _update_norm_op(self): + structure_cls = StructureAnalyseTool(self._rank_file_path, self._memory_details) + if structure_cls.fw_norm_op == SpecialOperatorName.FW_LAYER_NORM_TYPE: + self.norm_op = SpecialOperatorName.LAYER_NORM + else: + self.norm_op = SpecialOperatorName.RMS_NORM diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_meta_parse.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_meta_parse.py new file mode 100644 index 000000000..0f6b94704 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_meta_parse.py @@ -0,0 +1,95 @@ +import csv +import json +import os +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_constant import SpecialOperatorName +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_constant import NumberConstant +from mindspeed.auto_tuning.utils.file_utils import check_file_size + + +class FileAnalyseTool: + """ + support csv and json parse + """ + + @classmethod + def analyse_csv_info(cls, file_path: str, csv_name: str): + csv_path = os.path.join(file_path, csv_name) + try: + with open(csv_path, newline='') as csvfile: + check_file_size(csvfile) + reader = csv.DictReader(csvfile) + csv_details = list(reader) + + except FileNotFoundError as e: + raise f"'Please check file name, {e}" + except csv.Error as e: + raise f"An error occurred while reading the CSV file: {e}" + return csv_details + + @classmethod + def analyse_json_info(cls, file_path: str, json_name: str): + json_path = os.path.join(file_path, json_name) + json_details = {"p2p": {}, "collective": {}} + try: + with open(json_path, 'r') as f: + check_file_size(f) + details = json.load(f) + details_value = list(details.values())[0] + for name, info in details_value.get('p2p', {}).items(): + comm_name = name.split("@")[0] + json_details['p2p'][comm_name] = info["Communication Time Info"] + for name, info in details_value.get('collective', {}).items(): + comm_name = name.split("@")[0] + json_details['collective'][comm_name] = info["Communication Time Info"] + except KeyError as e: + raise f"'Please check file name, {e}" + except Exception as e: + raise f"Read communication file error: {e}" + + return json_details + + +class StructureAnalyseTool: + """ + support structure parse + """ + + def __init__(self, rank_file_path, memory_details): + self._rank_file_path = rank_file_path + self._memory_details = memory_details + self.fw_norm_op = SpecialOperatorName.FW_RMS_NORM_TYPE + self.bw_norm_op = SpecialOperatorName.BW_RMS_NORM_TYPE + self._search_special_norm_op() + + def analyse_norm_op(self): + """ Analyse the norm op details in kernel_details.csv. """ + fw_norm_op_idx_list = [] + bw_norm_op_idx_list = [] + matmul_total_time = 0 + mc2_total_time = 0 + for idx, row in enumerate(self._memory_details): + if "Name" not in row or "Type" not in row: + continue + if row["Type"] == "MatMulCommon": + time = float(row["Duration(us)"]) / NumberConstant.CONVERSION_TIME + matmul_total_time += time + mc2_total_time += time + if row["Type"] == "AllGatherMatmul" or row["Type"] == "MatmulReduceScatter": + mc2_total_time += float(row["Duration(us)"]) / NumberConstant.CONVERSION_TIME + if row["Type"] == self.fw_norm_op: + fw_norm_op_idx_list.append(idx) + elif row["Type"] == self.bw_norm_op: + bw_norm_op_idx_list.append(idx) + return fw_norm_op_idx_list, bw_norm_op_idx_list, matmul_total_time, mc2_total_time + + def get_fw_norm_op(self): + return self.fw_norm_op + + def _search_special_norm_op(self): + """ Special norm op: rms_norm, layer_norm, rms_norm_grad """ + op_statistic_details = FileAnalyseTool.analyse_csv_info(self._rank_file_path, 'op_statistic.csv') + for op in op_statistic_details: + if SpecialOperatorName.FW_LAYER_NORM_TYPE in op['OP Type']: + self.fw_norm_op = SpecialOperatorName.FW_LAYER_NORM_TYPE + self.bw_norm_op = SpecialOperatorName.BW_LAYER_NORM_TYPE + break diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_node_parse.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_node_parse.py new file mode 100644 index 000000000..7d09869ae --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_node_parse.py @@ -0,0 +1,94 @@ +import os +import stat +import pickle +import subprocess +import time + +import torch +from mindspeed.auto_tuning.utils.logger import get_logger +from mindspeed.auto_tuning.utils.restricted_unpickler import restricted_loads +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_config import ProfilingModelInfo +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_parse import ProfilingParser + + +class GatherNodeProfiling: + """ + Gather other node profiling result to rank0 + """ + + def __init__(self, profiling_file_path): + self.profiling_file_path = profiling_file_path + self.fusion_model = ProfilingModelInfo() + self.stage_id_list = [] + self.logger = get_logger('profiling_parser') + + @staticmethod + def _extend_stage_lists(source, target): + source.time.extend(target.time) + source.start_memory.extend(target.start_memory) + source.peak_memory.extend(target.peak_memory) + source.communication_info.extend(target.communication_info) + source.operator_info.extend(target.operator_info) + + def fuse_node_pkl(self): + """ + Args: + pkl_path: str + + Returns: + fusion_model: ProfilingModelInfo + """ + pkl_path = os.path.join(self.profiling_file_path, 'pkl_path') + pkl_files = sorted(os.listdir(pkl_path)) + if len(pkl_files) > 1: + self.logger.info(f'Get pp profiling parse result.') + for pkl_file in pkl_files: + node_pkl_path = os.path.join(pkl_path, pkl_file) + with open(node_pkl_path, 'rb') as f: + pkl_model = restricted_loads(f) + self._fuse_models(pkl_model) + else: + node_pkl_path = os.path.join(pkl_path, pkl_files[0]) + with open(node_pkl_path, 'rb') as f: + pkl_model = restricted_loads(f) + self.fusion_model = pkl_model + return self.fusion_model + + def parse_node_pkl(self, args): + parent_dir = os.path.dirname(self.profiling_file_path) + ootb_node_path = os.path.join(parent_dir, f'ootb_{args.node_rank}.pkl') + with open(ootb_node_path, 'rb') as f: + cfg = restricted_loads(f) + profiling_parser = ProfilingParser(self.profiling_file_path, search_cfg=cfg, args=args) + profiling_res = profiling_parser.parser() + if args.pipeline_model_parallel_size > 1 and profiling_parser.nodes > 1: + ranks = [i * profiling_parser.devices_per_node for i in range(profiling_parser.nodes)] + profiling_group = torch.distributed.new_group(ranks) + gather_objects = [None for _ in range(profiling_parser.nodes)] + torch.distributed.all_gather_object(gather_objects, profiling_res, group=profiling_group) + for i in range(profiling_parser.nodes): + pkl_path = os.path.join(self.profiling_file_path, 'pkl_path') + if not os.path.exists(pkl_path): + os.mkdir(pkl_path) + pkl_node_path = os.path.join(pkl_path, f'node_{i}.pkl') + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + mode = stat.S_IWUSR | stat.S_IRUSR + with os.fdopen(os.open(pkl_node_path, flags, mode=mode), 'wb') as f: + pickle.dump(gather_objects[i], f) + + torch.distributed.barrier(group=profiling_group) + torch.distributed.destroy_process_group(group=profiling_group) + else: + pkl_path = os.path.join(self.profiling_file_path, 'pkl_path') + if not os.path.exists(pkl_path): + os.mkdir(pkl_path) + pkl_node_path = os.path.join(pkl_path, f'node_{args.node_rank}.pkl') + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + mode = stat.S_IWUSR | stat.S_IRUSR + with os.fdopen(os.open(pkl_node_path, flags, mode=mode), 'wb') as f: + pickle.dump(profiling_res, f) + + def _fuse_models(self, new_model): + if new_model.stage_id not in self.stage_id_list: + self.stage_id_list.append(new_model.stage_id) + self.fusion_model.extend_stage_info(new_model) diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_operator_parse.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_operator_parse.py new file mode 100644 index 000000000..872682456 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_operator_parse.py @@ -0,0 +1,39 @@ +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_constant import OperatorDetails + + +class AnalyseOperatorMsg: + """ Analyse operator message. """ + + def __init__(self, operator_details): + self._operator_details = operator_details + + def analyse_embedding(self, start_idx, end_idx): + return self._analyse_operators(start_idx, end_idx) + + def analyse_forward(self, start_idx, end_idx): + return self._analyse_operators(start_idx, end_idx) + + def analyse_loss(self, start_idx, end_idx): + return self._analyse_operators(start_idx, end_idx) + + def analyse_backward(self, start_idx, end_idx): + return self._analyse_operators(start_idx, end_idx) + + def analyse_optimizer(self, start_idx, end_idx): + return self._analyse_operators(start_idx, end_idx) + + def _analyse_operators(self, start_idx, end_idx): + details_list = [] + for i in range(start_idx, end_idx): + detail = self._operator_details[i] + op_detail = OperatorDetails( + name=detail['Name'], + type_=detail['Type'], + input_shapes=detail['Input Shapes'], + output_shapes=detail['Output Shapes'], + duration_us=detail['Duration(us)'], + wait_time_us=detail['Wait Time(us)'], + accelerator_core=detail['Accelerator Core'] + ) + details_list.append(op_detail) + return details_list diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_parse.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_parse.py new file mode 100644 index 000000000..c1d7f5785 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/profiling_parse/profiling_parse.py @@ -0,0 +1,200 @@ +import math +import os +import re +from mindspeed.auto_tuning.utils.logger import get_logger +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_config import ProfilingConfig, \ + ProfilingModelInfo +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_meta_parse import StructureAnalyseTool +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_operator_parse import AnalyseOperatorMsg +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_communication_parse import \ + AnalyseCommunicationMsg +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_memory_parse import AnalyseMemoryMsg +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_meta_parse import FileAnalyseTool + + +class ProfilingParser(ProfilingConfig): + def __init__(self, root_path, search_cfg=None, args=None): + super(ProfilingParser, self).__init__(search_cfg, args) + self._root_path = root_path + self._ascend_operator_details = None + self.stage_id = 0 + self.rank_file_path = None + self.model = ProfilingModelInfo() + self.logger = get_logger('profiling_parser') + + def parse_fw_bw_structure(self, fw_norm_op_idx_list, bw_norm_op_idx_list): + if self.search_cfg.pp > 1: + fw_layer_start_index, bw_layer_start_index, recompute_fw, fw_per_micro_opt_num, bw_per_micro_opt_num = \ + self.search_first_operator_idx_for_per_layer_enable_pp(fw_norm_op_idx_list, bw_norm_op_idx_list) + else: + fw_layer_start_index, bw_layer_start_index, recompute_fw, fw_per_micro_opt_num, bw_per_micro_opt_num = \ + self.search_first_operator_idx_for_per_layer_disable_pp(fw_norm_op_idx_list, bw_norm_op_idx_list) + for micro in range(self.micro_num): + if self.per_micro_layer != 1: + fw_per_micro_opt_num = fw_layer_start_index[micro][-1] - fw_layer_start_index[micro][-2] + bw_per_micro_opt_num = bw_layer_start_index[micro][-1] - bw_layer_start_index[micro][-2] + fw_layer_start_index[micro].append(fw_layer_start_index[micro][-1] + fw_per_micro_opt_num - 1) + bw_layer_start_index[micro].insert(0, bw_layer_start_index[micro][-1] - bw_per_micro_opt_num) + return fw_layer_start_index, bw_layer_start_index + + def parse_model_structure(self): + self._update_profiling_file_path() + kernel_details = FileAnalyseTool.analyse_csv_info(self.rank_file_path, 'kernel_details.csv') + communication_details = FileAnalyseTool.analyse_json_info(self.rank_file_path, 'communication.json') + memory_details = FileAnalyseTool.analyse_csv_info(self.rank_file_path, 'operator_memory.csv') + memory_record_details = FileAnalyseTool.analyse_csv_info(self.rank_file_path, 'memory_record.csv') + structure_cls = StructureAnalyseTool(self.rank_file_path, kernel_details) + fw_norm_op_idx_list, bw_norm_op_idx_list, matmul_total_time, mc2_total_time = structure_cls.analyse_norm_op() + self.model.matmul_total_time = [matmul_total_time] + self.model.mc2_total_time = [mc2_total_time] + fw_layer_start_index, bw_layer_start_index = self.parse_fw_bw_structure(fw_norm_op_idx_list, + bw_norm_op_idx_list) + + self._parse_operator_info(kernel_details, fw_layer_start_index, bw_layer_start_index) + self._parse_communication_info(communication_details, kernel_details) + self._parse_memory_info(memory_details, memory_record_details) + + def parser(self): + """ + Parse profiling files. + Returns: + model: ProfilingModelInfo + """ + self.logger.info('>>>> Profiling parse starting!') + self._parse_each_node() + self.logger.info('>>>> Profiling parse success!') + return self.model + + def _validate_file_path(self, filename, attr_name): + file_path = os.path.join(self.rank_file_path, filename) + if not os.path.exists(file_path): + raise FileNotFoundError(f"The file {file_path} was not found.") + setattr(self, attr_name, file_path) + + def _update_profiling_file_path(self): + self._validate_file_path('kernel_details.csv', '_kernel_details_csv_path') + self._validate_file_path('memory_record.csv', '_memory_record_csv_path') + self._validate_file_path('operator_memory.csv', '_operator_memory_csv_path') + self._validate_file_path('npu_module_mem.csv', '_npu_module_mem_csv_path') + self._validate_file_path('communication.json', '_communication_json_path') + self._validate_file_path('op_statistic.csv', '_op_statistic_csv_path') + + def _extract_rank_file_path(self): + """ + Get all rank file path, the profiling process generates the profiler_info_{rank_id}.json file. + Returns: + rank_file_path: Dict[rank_id] = path + """ + + def extract_rankid_from_filename(filename): + match = re.search(r'profiler_info_(\d+)\.json', filename) + if match: + return int(match.group(1)) + else: + return None + + rank_file_path = {} + for ascend_dir in os.listdir(self._root_path): + profiling_path = os.path.join(self._root_path, ascend_dir) + if os.path.isdir(profiling_path) and 'ascend' in ascend_dir: + json_files = [f + for f in os.listdir(profiling_path) + if f.endswith('.json') and f.startswith('profiler_info_')] + if not json_files: + raise ValueError(f"Args profile error, JSON is not exist in {ascend_dir}.") + + rank_id = extract_rankid_from_filename(json_files[0]) + if rank_id is not None: + rank_file_path[rank_id] = profiling_path + return rank_file_path + + def _join_rank_ascend_path(self, file_name): + rank_file_path = os.path.join(self._root_path, file_name, "ASCEND_PROFILER_OUTPUT") + if not os.path.exists(rank_file_path): + raise f" {rank_file_path} is not exist." + return rank_file_path + + def _get_first_rank_and_stage_id_of_each_stage(self, node_first_rank_id, devices_each_stage, rank_file_path): + """ + Get the rank file path based on the number of devices each stage. For example: + devices_each_node devices_each_stage node pp + 1. 8 16 2 1 + 2. 8 8 2 2 + 3. 8 4 2 4 + """ + if devices_each_stage == self.devices_per_node: + return self._join_rank_ascend_path(rank_file_path[node_first_rank_id]), self.node_rank + elif devices_each_stage < self.devices_per_node: + paths_and_ids = [] + stage_num_each_node = math.ceil(len(rank_file_path) / devices_each_stage) + for i in range(stage_num_each_node): + cur_stage_rank = i * devices_each_stage + node_first_rank_id + cur_stage_id = i + self.node_rank * stage_num_each_node + paths_and_ids.append((self._join_rank_ascend_path(rank_file_path[cur_stage_rank]), cur_stage_id)) + return paths_and_ids + else: + return self._join_rank_ascend_path(rank_file_path[node_first_rank_id]), self.node_rank // ( + self.nodes // self.search_cfg.pp) + + def _parse_first_rank_of_each_stage(self, rank_file_path: dict): + """Parses the first rank file of each stage.""" + node_first_rank_id = self.node_rank * self.devices_per_node + devices_each_stage = self.nodes * self.devices_per_node // self.search_cfg.pp + paths_and_ids = self._get_first_rank_and_stage_id_of_each_stage(node_first_rank_id, devices_each_stage, + rank_file_path) + if isinstance(paths_and_ids, list): + for path, stage_id in paths_and_ids: + self.rank_file_path = path + self.stage_id = stage_id + self.model.stage_id = stage_id + self.parse_model_structure() + else: + self.rank_file_path, self.stage_id = paths_and_ids + self.model.stage_id = self.stage_id + self.parse_model_structure() + + def _parse_each_node(self): + rank_file_path = self._extract_rank_file_path() + self._parse_first_rank_of_each_stage(rank_file_path) + + def _parse_operator_info(self, kernel_details, fw_layer_start_index, bw_layer_start_index): + operator = AnalyseOperatorMsg(kernel_details) + embedding_operator = operator.analyse_embedding(0, fw_layer_start_index[0][0] - 1) + forward_operator = operator.analyse_forward(fw_layer_start_index[0][0], fw_layer_start_index[0][-1]) + loss_operator = operator.analyse_loss(fw_layer_start_index[0][-1], bw_layer_start_index[0][0] - 1) + backward_operator = operator.analyse_backward(bw_layer_start_index[0][0], bw_layer_start_index[0][-1]) + optimizer_operator = operator.analyse_optimizer(bw_layer_start_index[0][-1] + 1, len(kernel_details) - 1) + self.model.embedding.operator_info.append(embedding_operator) + self.model.forward.operator_info.append(forward_operator) + self.model.loss.operator_info.append(loss_operator) + self.model.backward.operator_info.append(backward_operator) + self.model.optimizer.operator_info.append(optimizer_operator) + + def _parse_memory_info(self, memory_details, memory_record_details): + memory_cls = AnalyseMemoryMsg(self.rank_file_path, self.search_cfg, memory_details, stage_id=self.stage_id) + memory_cls.update_norm_indices() + embedding_start, embedding_peak = memory_cls.analyse_embedding() + self.model.embedding.start_memory.append(embedding_start) + self.model.embedding.peak_memory.append(embedding_peak) + fw_start, fw_peak = memory_cls.analyse_forward() + self.model.forward.start_memory.append(fw_start) + self.model.forward.peak_memory.append(fw_peak) + loss_start, loss_peak = memory_cls.analyse_loss() + self.model.loss.start_memory.append(loss_start) + self.model.loss.peak_memory.append(loss_peak) + bw_start, bw_peak = memory_cls.analyse_backward() + self.model.backward.start_memory.append(bw_start) + self.model.backward.peak_memory.append(bw_peak) + optimizer_start, optimizer_peak = memory_cls.analyse_optimizer() + self.model.optimizer.start_memory.append(optimizer_start) + self.model.optimizer.peak_memory.append(optimizer_peak) + self.model.cann_and_driver_memory = memory_cls.analyse_cann_and_driver(memory_record_details) + + def _parse_communication_info(self, communication_details, kernel_details): + communication_cls = AnalyseCommunicationMsg(self.search_cfg, communication_details, kernel_details) + communication_cls.analyse_parallel_comm() + self.model.tensor_parallel_comm.append(communication_cls.get_tp_comm()) + self.model.pipeline_parallel_comm.append(communication_cls.get_pp_comm()) + self.model.data_parallel_comm.append(communication_cls.get_dp_comm()) + self.model.context_parallel_comm.append(communication_cls.get_cp_comm()) + self.model.expert_parallel_comm.append(communication_cls.get_ep_comm()) diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/recompute_module_info.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/recompute_module_info.py new file mode 100644 index 000000000..1c02337d6 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/recompute_module_info.py @@ -0,0 +1,12 @@ +from typing import Dict + + +class ModuleRecomputeInfo: + def __init__(self, context: Dict): + self.name = context.get("name") + self.prefix_name = context.get("prefix_name") + self.full_name = self.prefix_name + '.' + self.name + self.memory = context.get("memory") + self.input_size = context.get("input") + self.time = context.get("time") + self.recompute = False diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/recompute_parser.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/recompute_parser.py new file mode 100644 index 000000000..3e34c6dba --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/parse/recompute_parser.py @@ -0,0 +1,286 @@ +import os +import stat + +from functools import wraps +from collections.abc import Iterable +from typing import Dict, List +import pickle +import acl +import torch +import torch.nn +from megatron.training.global_vars import get_args + +from mindspeed.core.memory.adaptive_recomputing.swap_manager import get_tensor_mem_size + + +class RecomputeParser: + recompute_parser = None + + def __init__(self): + # layer profiling info + self.context = { + 'module': [] + } + self.models = None + # record allowed recomputing module + self.allowed_recomputing_module = [] + # profiling prefix + self.profiling_prefix = "" + # save modules hook, remove it after apply policy + self.modules_hooks = [] + # current profiling step + self.profiling_step = 0 + # step skip profiling, default is 3 + self.skip_profiling_step = 3 + # step for stop profiling, default is 6 + self.stop_profiling_step = 6 + # unit for device memory size(MB) + self.unit_mb = 1024 * 1024 + # store all module event + ''' + { + full_name1: [[, ][, ][, ][, ]] + full_name2: [[, ][, ][, ]] + full_name3: [[, ][, ]] + } + ''' + self.event_dict: Dict[str, List] = {} + + @staticmethod + def get_memory_status(): + free, all_memory, _ = acl.rt.get_mem_info(1) + memory_info = { + "free": free, + "all_memory": all_memory, + "used_memory": torch.npu.memory_allocated(), + "reserved_memory": torch.npu.memory_reserved(), + "max_memory_allocated": torch.npu.max_memory_allocated() + } + + return memory_info + + def pre_hook_func(self, state, *args, **kargs): + if 'memory' not in state: + state['memory'] = 0 + state['input'] = self.cal_input_output_size(args) + if self.profiling_step == self.stop_profiling_step: + state['memory'] = torch.npu.memory_allocated() - state['input'] * self.unit_mb + print(f"success print pre hook memory = {state['memory']}") + cur_module_full_name = state['prefix_name'] + '.' + state['name'] + if cur_module_full_name not in self.event_dict.keys(): + self.event_dict[cur_module_full_name] = [] + if self.profiling_step < self.stop_profiling_step: + start_event = torch.npu.Event(enable_timing=True) + self.event_dict[cur_module_full_name].append([start_event]) + start_event.record() + + def post_hook_func(self, state, args, output): + if self.profiling_step < self.stop_profiling_step: + cur_module_full_name = state['prefix_name'] + '.' + state['name'] + end_event = torch.npu.Event(enable_timing=True) + end_event.record() + # add end_event to corresponding position of list + for item in reversed(self.event_dict[cur_module_full_name]): + if len(item) == 1: + item.append(end_event) + break + + if self.profiling_step == self.stop_profiling_step: + output_memory = self.cal_input_output_size(output) + state['memory'] = (torch.npu.memory_allocated() - state['memory']) // self.unit_mb + print(f"success print post hook memory = {state['memory']} and output_memory = {output_memory}") + state['input'] += output_memory + + def forward_pre_hook(self, ctx): + def hook(module, *args, **kargs): + if 'module' in self.context: + self.context['module'].append(ctx) + self.pre_hook_func(ctx, *args, **kargs) + + return hook + + def forward_post_hook(self, ctx): + def hook(module, args, output): + self.post_hook_func(ctx, args, output) + if 'module' in self.context: + self.context['module'].pop() + + return hook + + def construct_context_recursive(self, prefix_name, model, ctx, have_allowed_recomputing): + # 1.construct context + next_have_allowed_recomputing = have_allowed_recomputing + for name, module in model.named_children(): + if 'layers' not in ctx: + ctx['layers'] = [] + + current_ctx = {'name': name, 'prefix_name': prefix_name} + if 'layers' in ctx: + ctx['layers'].append(current_ctx) + + next_name = prefix_name + "." + name if prefix_name != "" else name + + # 2.tag allowed_recomputing module + if have_allowed_recomputing: + for allowed_recomputing_module in self.allowed_recomputing_module: + if isinstance(module, allowed_recomputing_module): + current_ctx['allowed_recomputing'] = True + if isinstance(model, torch.nn.ModuleList): + ctx['is_module_list'] = True + ctx['is_recomputing_layer'] = True + else: + current_ctx['is_recomputing_layer'] = True + next_have_allowed_recomputing = False + self.construct_context_recursive(next_name, module, current_ctx, next_have_allowed_recomputing) + + def register_recursive_hook(self, model, ctx, profiling_prefix, layer_index=0): + index = layer_index or 0 + for module in model.children(): + if 'layers' not in ctx: + continue + current_ctx = ctx['layers'][index] + prefix_name = current_ctx['prefix_name'] + name = current_ctx['name'] + + is_recomputing_layer = not isinstance(module, torch.nn.ModuleList) and 'is_recomputing_layer' in current_ctx + is_allowed_recomputing = 'allowed_recomputing' in current_ctx and index == 0 + if is_recomputing_layer or is_allowed_recomputing: + profiling_prefix = prefix_name + "." + name + pre_hook = module.register_forward_pre_hook(self.forward_pre_hook(current_ctx)) + post_hook = module.register_forward_hook(self.forward_post_hook(current_ctx)) + self.modules_hooks.append(pre_hook) + self.modules_hooks.append(post_hook) + elif profiling_prefix and prefix_name.startswith(profiling_prefix): + pre_hook = module.register_forward_pre_hook(self.forward_pre_hook(current_ctx)) + post_hook = module.register_forward_hook(self.forward_post_hook(current_ctx)) + self.modules_hooks.append(pre_hook) + self.modules_hooks.append(post_hook) + self.register_recursive_hook(module, current_ctx, profiling_prefix) + index += 1 + + def reset_modules(self): + if torch.distributed.get_rank() % 8 == 0: + ootb_context_path = get_args().profile_save_path + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + mode = stat.S_IWUSR | stat.S_IRUSR + ootb_context_path_json = f'{ootb_context_path}.json' + with os.fdopen(os.open(ootb_context_path_json, flags, mode=mode), "wb") as file: + file.write(pickle.dumps(self.context)) + + def hook_step_func(self, step_func, models): + def custom_step_func(*args, **kargs): + result = step_func(*args, **kargs) + if self.profiling_step >= self.stop_profiling_step + 1: + return result + memory_info = self.get_memory_status() + try: + self.context['used_mem'] = memory_info["used_memory"] // self.unit_mb + self.context['max_device_memory'] = memory_info["all_memory"] // self.unit_mb + except KeyError: + print("[ERROR] Some of these keys don't exist.") + self.profiling_step += 1 + torch.npu.synchronize() + # record module time + cal_module_forward_time(self.context, self.event_dict) + + # reset modules + if self.profiling_step == self.stop_profiling_step + 1: + self.reset_modules() + return result + return custom_step_func + + def add_allowed_recomputing_module(self, module): + if module not in self.allowed_recomputing_module: + self.allowed_recomputing_module.append(module) + print(f"after append self.allowed_recomputing_module = {self.allowed_recomputing_module} and module = {module}") + + def cal_input_output_size(self, args): + size = 0 + if isinstance(args, torch.Tensor): + size += get_tensor_mem_size(args) + return size // self.unit_mb + for arg in args: + if isinstance(arg, torch.Tensor): + size += get_tensor_mem_size(arg) + elif isinstance(arg, Iterable): + for t in arg: + if isinstance(t, torch.Tensor): + size += get_tensor_mem_size(t) + elif t is None: + pass + else: + print(f"warning: unknown input/output type {str(type(t))}") + elif arg is None: + pass + else: + print(f"warning: unknown input/output type {str(type(arg))}") + return size // self.unit_mb + + +def get_recompute_parser(): + if RecomputeParser.recompute_parser is None: + RecomputeParser.recompute_parser = RecomputeParser() + return RecomputeParser.recompute_parser + + +def setup_model_and_optimizer_decorator(setup_model_and_optimizer): + @wraps(setup_model_and_optimizer) + def wrapper(*args, **kargs): + models, optimizer, opt_param_scheduler = setup_model_and_optimizer(*args, **kargs) + if os.getenv('OOTB_OPTIMIZER_PROFILING', 'FALSE') != 'TRUE': + print("OOTB_OPTIMIZER_PROFILING wrapper Error!") + return models, optimizer, opt_param_scheduler + print("OOTB_OPTIMIZER_PROFILING wrapper success!") + recompute_parser = get_recompute_parser() + recompute_parser.models = models + optimizer.step = recompute_parser.hook_step_func(optimizer.step, models) + + if isinstance(models, list): + for model in models: + recompute_parser.construct_context_recursive("module", model, recompute_parser.context, True) + else: + recompute_parser.construct_context_recursive("module", models, recompute_parser.context, True) + print("OOTB_OPTIMIZER-MODEL-PARSER: successfully hooking module") + return models, optimizer, opt_param_scheduler + + return wrapper + + +def call_hook_func(): + print("success enter call_hook_func") + recompute_parser = get_recompute_parser() + models = recompute_parser.models + if isinstance(models, list): + for index, model in enumerate(models): + recompute_parser.register_recursive_hook(model, recompute_parser.context, + recompute_parser.profiling_prefix, index) + else: + recompute_parser.register_recursive_hook(models, recompute_parser.context, + recompute_parser.profiling_prefix) + + +def allowed_recompute_parser_module_wrapper(allowed_recomputing_module): + recomputing = get_recompute_parser() + recomputing.add_allowed_recomputing_module(allowed_recomputing_module) + + +def cal_module_forward_time(context, event_dict: Dict[str, List]): + cur_module_full_name = context.get('prefix_name', "") + '.' + context.get('name', "") + if "memory" in context and cur_module_full_name in event_dict.keys(): + cur_module_event_list = event_dict.get(cur_module_full_name, []) + for cur_level_event_list in cur_module_event_list: + start_event = cur_level_event_list[0] + end_event = cur_level_event_list[1] + total_time = start_event.elapsed_time(end_event) + + context['forward_cnt'] = context.get('forward_cnt', 0) + 1 + context['pre_total_time'] = context.get('pre_total_time', 0) + total_time + try: + context['time'] = context['pre_total_time'] / context['forward_cnt'] + except ZeroDivisionError: + context['time'] = 0 + + if "layers" not in context: + return + for sub_layer_context in context["layers"]: + cal_module_forward_time(sub_layer_context, event_dict) diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/search/__init__.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/search/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/search/recompute_solver.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/search/recompute_solver.py new file mode 100644 index 000000000..c202d20ad --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/search/recompute_solver.py @@ -0,0 +1,304 @@ +from copy import deepcopy +from typing import List, Dict +from mindspeed.auto_tuning.config.search_config import SearchConfig +from mindspeed.auto_tuning.module.parse.recompute_module_info import ModuleRecomputeInfo + + +class RecomputeSolver: + + def __init__(self, first_layer_context, perf, static_memory, memory_limit, search_cfg: SearchConfig, model_config): + self.num_layers_per_pp = model_config.num_layers // search_cfg.pipeline_model_parallel_size + self.layer_num_per_chunk = 0 + self.virtual_pipeline_model_parallel_size = 1 if not search_cfg.num_layers_per_virtual_pipeline_stage \ + else (search_cfg.num_layers // search_cfg.num_layers_per_virtual_pipeline_stage // + search_cfg.pipeline_model_parallel_size) + self.search_config = search_cfg + self.model_config = model_config + self.module_layers: List[ModuleRecomputeInfo] = [] + self.parent_layers: List[ModuleRecomputeInfo] = [] + self.parent_children_dict: Dict[str, List[ModuleRecomputeInfo]] = {} + + self.first_layer_context = first_layer_context + self.first_layer_recompute_info = ModuleRecomputeInfo(self.first_layer_context) + self.full_recompute_performance = perf + self.static_memory = static_memory + self.memory_limit = memory_limit + + self.recompute_module: Dict[str, ModuleRecomputeInfo] = {} + + self.layers_combination: List[LayerCombination] = [] + self.layer_full_recompute_combination: LayerCombination = None + self.layer_without_recompute_combination: LayerCombination = None + self.layer_recompute_one_combination: LayerCombination = None + + self.node_split_flag = ',' + + self.num_warmup_micro_batches_per_chunk = [] + self.num_micro_batches = 0 + + if search_cfg.num_layers_per_virtual_pipeline_stage: + self.num_model_chunks = (search_cfg.num_layers // search_cfg.num_layers_per_virtual_pipeline_stage // + search_cfg.pipeline_model_parallel_size) + else: + self.num_model_chunks = 1 + + def get_num_warmup_micro_batches(self): + pipeline_parallel_size = self.search_config.pipeline_model_parallel_size + data_parallel_size = self.search_config.data_parallel_size + self.num_micro_batches = self.model_config.global_batch_size // self.model_config.micro_batch_size // data_parallel_size + if pipeline_parallel_size <= 1: + self.num_warmup_micro_batches_per_chunk.append(1) + return + pipeline_parallel_rank = 0 + total_num_micro_batches = self.num_micro_batches * self.num_model_chunks + if self.num_model_chunks == 1: + num_warmup_micro_batches = pipeline_parallel_size - pipeline_parallel_rank - 1 + num_warmup_micro_batches += 1 + self.num_warmup_micro_batches_per_chunk.append(num_warmup_micro_batches) + else: + num_warmup_micro_batches = (pipeline_parallel_size - pipeline_parallel_rank - 1) * 2 + num_warmup_micro_batches += (self.num_model_chunks - 1) * pipeline_parallel_size + num_warmup_micro_batches += 1 + num_warmup_micro_batches = min(num_warmup_micro_batches, total_num_micro_batches) + remain_batch_num = (num_warmup_micro_batches - pipeline_parallel_size * self.num_model_chunks) + for i in range(self.num_model_chunks): + if i == 0: + self.num_warmup_micro_batches_per_chunk.append(pipeline_parallel_size + max(0, remain_batch_num)) + elif i == self.num_model_chunks - 1: + self.num_warmup_micro_batches_per_chunk.append(pipeline_parallel_size + min(0, remain_batch_num)) + else: + self.num_warmup_micro_batches_per_chunk.append(pipeline_parallel_size) + + def build_solver_info(self): + self.prune_no_recompute_layer() + self.layers_combination_init(0) + self.get_num_warmup_micro_batches() + return self.knapsack_best() + + def get_recompute_op(self): + recompute_nodes = [] + parent_node_list = [] + for module_recompute_info in self.module_layers: + if not module_recompute_info.recompute: + continue + name = module_recompute_info.full_name + recompute_nodes.append(name) + separate_node_name_list = name.split(".") + for i in range(1, len(separate_node_name_list)): + parent_node_name = ".".join(separate_node_name_list[:-i]) + if parent_node_name not in parent_node_list: + parent_node_list.append(parent_node_name) + + for n in parent_node_list: + if n in recompute_nodes: + recompute_nodes.clear() + return recompute_nodes + return self.remove_full_selective_node(recompute_nodes) + + def prune_no_recompute_layer(self): + module_layers = [] + parent_layers = [self.first_layer_recompute_info] + children_module_list = [] + self.recursive_prune_modules(self.first_layer_context, module_layers, parent_layers, children_module_list) + cur_layer_name = self.first_layer_recompute_info.full_name + self.parent_children_dict.update({cur_layer_name: children_module_list}) + self.parent_layers = parent_layers + self.module_layers = module_layers + + def recursive_prune_modules(self, parent_module, module_layers: List, parent_layers: List, + children_module_list: List): + if "layers" not in parent_module: + return + parent_modules = parent_module['layers'] + parent_module_recompute_info = ModuleRecomputeInfo(parent_module) + if len(parent_modules) == 0: + return + parent_module_memory_time_rate = get_module_memory_time_rate(parent_module_recompute_info) + cur_sub_module_list = [] + for sub_layer in parent_modules: + sub_layer_recompute_info = ModuleRecomputeInfo(sub_layer) + cur_layer_name = sub_layer_recompute_info.full_name + cur_sub_module_list.append(sub_layer_recompute_info) + children_layer_name = [] + self.recursive_prune_modules(sub_layer, module_layers, parent_layers, children_layer_name) + if children_layer_name: + self.parent_children_dict.update({cur_layer_name: children_layer_name}) + parent_layers.append(sub_layer_recompute_info) + sub_layer_memory_time_rate = get_module_memory_time_rate(sub_layer_recompute_info) + if sub_layer_memory_time_rate < parent_module_memory_time_rate: + continue + if not sub_layer_recompute_info.memory or len(children_layer_name) == 1 and children_layer_name[0].memory == sub_layer.get("memory"): + continue + module_layers.append(sub_layer_recompute_info) + self.recompute_module.update({cur_layer_name: sub_layer_recompute_info}) + + children_module_list.extend(cur_sub_module_list) + + def remove_full_selective_node(self, recompute_nodes): + if len(recompute_nodes) == 0: + return recompute_nodes + try: + for parent_module in self.parent_layers: + parent_module_name = parent_module.full_name + if parent_module_name not in self.parent_children_dict.keys(): + continue + sub_layers_recompute_count = 0 + for sub_layer in self.parent_children_dict[parent_module_name]: + if sub_layer.full_name in recompute_nodes: + sub_layers_recompute_count += 1 + if sub_layers_recompute_count == len(self.parent_children_dict[parent_module_name]): + recompute_nodes.clear() + break + except KeyError: + print("[ERROR] Some of these keys don't exist.") + return recompute_nodes + + def layers_combination_init(self, idx): + if idx == 0: + self.layer_full_recompute_combination = LayerCombination({ + "name": "full_recompute", + "memory": self.first_layer_recompute_info.input_size, + "cost": self.first_layer_recompute_info.time, + "policy_name": "n_full" + }) + self.layers_combination.append(self.layer_full_recompute_combination) + self.layer_without_recompute_combination = LayerCombination({ + "name": "without_recompute", + "memory": self.first_layer_recompute_info.memory, + "cost": 0, + "policy_name": "n_without" + }) + self.layers_combination.append(self.layer_without_recompute_combination) + try: + if idx >= len(self.module_layers): + recompute_nodes = self.get_recompute_op() + if len(recompute_nodes) == 0: + return + + stash_mem_per_layer = (self.first_layer_recompute_info.memory - + self.first_layer_recompute_info.input_size) + recompute_cost = 0 + for recompute_module in recompute_nodes: + stash_mem_per_layer -= (self.recompute_module.get(recompute_module).memory - + self.recompute_module.get(recompute_module).input_size) + recompute_cost += self.recompute_module.get(recompute_module).time + self.layer_recompute_one_combination = LayerCombination({ + "name": self.node_split_flag.join(recompute_nodes), + "memory": stash_mem_per_layer, + "cost": recompute_cost, + "policy_name": "n_selective" + }) + self.layers_combination.append(self.layer_recompute_one_combination) + return + except KeyError: + print("[ERROR] The key \"module_layers\" doesn't exist.") + if self.module_layers[idx].memory > self.module_layers[idx].input_size: + self.module_layers[idx].recompute = True + self.layers_combination_init(idx + 1) + self.module_layers[idx].recompute = False + self.layers_combination_init(idx + 1) + + def get_max_goods_value(self, idx, ans): + i, j, k = idx[0], idx[1], idx[2] + pre_step_ans = ans[i - 1][j - k] + if k == 0: + return deepcopy(pre_step_ans) + + goods_value = ans[i][j] + memory = pre_step_ans.memory + pre_layer_num = j - k + for index in range(k): + cur_layer_index = pre_layer_num + index + cur_layer_chunk_rank = cur_layer_index // self.layer_num_per_chunk + memory += self.num_warmup_micro_batches_per_chunk[cur_layer_chunk_rank] * self.layers_combination[i].memory + cost = pre_step_ans.cost + k * self.layers_combination[i].cost * self.num_micro_batches + if pre_step_ans.cost == float('inf'): + cost = k * self.layers_combination[i].cost * self.num_micro_batches + + device_memory = self.memory_limit + + if device_memory >= memory and cost <= goods_value.cost and (len(pre_step_ans.layer_names) + k) == j: + goods_value.memory = memory + goods_value.cost = cost + goods_value.layer_names.clear() + if len(pre_step_ans.layer_names) > 0: + goods_value.layer_names.extend(pre_step_ans.layer_names) + goods_value.layer_names.extend(self.layers_combination[i].name for _ in range(k)) + + return goods_value + + def knapsack_best(self): + combination_num = len(self.layers_combination) + base_memory = (self.static_memory - self.num_layers_per_pp / self.num_model_chunks * sum(self.num_warmup_micro_batches_per_chunk) * + self.first_layer_recompute_info.input_size) + base_cost = (self.full_recompute_performance - self.num_layers_per_pp * self.num_micro_batches * + self.first_layer_recompute_info.time) + ans = [[GoodsValue(base_memory, base_cost) for _ in range(self.num_layers_per_pp + 1)] for _ in range(combination_num)] + self.layer_num_per_chunk = self.num_layers_per_pp // self.num_model_chunks + for i in range(1, self.num_layers_per_pp + 1): + ans[0][i].cost += self.first_layer_recompute_info.time * self.num_micro_batches * i + for j in range(i): + cur_layer_chunk_rank = j // self.layer_num_per_chunk + ans[0][i].memory += (self.first_layer_recompute_info.input_size * + self.num_warmup_micro_batches_per_chunk[cur_layer_chunk_rank]) + ans[0][i].layer_names.extend([self.layer_full_recompute_combination.name for _ in range(i)]) + + for i in range(1, combination_num): + for j in range(1, self.num_layers_per_pp + 1): + k = 0 + while k <= j: + ans[i][j] = self.get_max_goods_value([i, j, k], ans) + k += 1 + + best_goods_value = ans[combination_num - 1][self.num_layers_per_pp] + print(f"after solve, current memory is {best_goods_value.memory} and current perf = {best_goods_value.cost} " + f"and cur_recompute_combination is {best_goods_value.layer_names}") + need_recompute = False + for combination_name in best_goods_value.layer_names: + if combination_name != self.layer_without_recompute_combination.name: + need_recompute = True + break + return need_recompute, best_goods_value.memory, best_goods_value.cost + + +class LayerCombination: + def __init__(self, config): + self.name = config["name"] + self.memory = config["memory"] + self.cost = config["cost"] + self.policy_name = config["policy_name"] + + +class GoodsValue: + def __init__(self, memory, cost=float('inf')): + self.layer_names = [] + self.memory = memory + self.cost = cost + + +def get_module_memory_time_rate(module_recompute_info: ModuleRecomputeInfo): + if module_recompute_info.memory: + return (module_recompute_info.memory - module_recompute_info.input_size) / module_recompute_info.time + return -1 + + +def module_name_split_num(module): + module_name_split_list = module['prefix_name'].split(".") + return len(module_name_split_list) + + +def get_parent_module_list(sub_layer_full_name): + parent_module_list = [] + separate_node_name_list = sub_layer_full_name.split(".") + for i in range(1, len(separate_node_name_list)): + parent_node_name = ".".join(separate_node_name_list[:-i]) + parent_module_list.append(parent_node_name) + + return parent_module_list + + +def is_cur_module_tag_recompute(recompute_module_name_list, cur_module_name): + if cur_module_name in recompute_module_name_list: + return True + else: + return False diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/search/search_engine.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/search/search_engine.py new file mode 100644 index 000000000..b96400631 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/search/search_engine.py @@ -0,0 +1,272 @@ +from typing import Deque, List, Optional, Tuple +from collections import deque +from copy import deepcopy +import pickle +import os +import sys +import traceback as tb + + + +from mindspeed.auto_tuning.utils.logger import get_logger +from mindspeed.auto_tuning.module.memory.memory_modeling import MemoryModeling +from mindspeed.auto_tuning.config.search_config import SearchConfig +from mindspeed.auto_tuning.module.search.stage_1_prune import stage_1_discrete_search_space_prune +from mindspeed.auto_tuning.config.model_config import ModelConfig +from mindspeed.auto_tuning.module.hardware import Hardware +from mindspeed.auto_tuning.utils.utils import get_prof_dir +from mindspeed.auto_tuning.utils.restricted_unpickler import restricted_loads +from mindspeed.auto_tuning.config.generate_profiling_configs import generate_profiling_configs + + +_logger = get_logger("search") + + +def search_demo(model_config: ModelConfig, + perf_obj_function, + working_dir: str, + re_profiling_flag=True, + recomp_cfg_list=None) -> [List[Optional[SearchConfig]], tuple]: + device_mem_cap = Hardware().memory_limit + _logger.info(f"Search: total_device_num: {Hardware().num_devices}") + _logger.info(f"Search: device_mem_cap: {device_mem_cap}") + best_perf_cfg_map: Deque[Tuple[float, Optional[SearchConfig]]] = deque([(float("inf"), None)] * 3, 3) + + stage_1_valid_ptd_configs = stage_1_discrete_search_space_prune( + model_config, + pod_limit=8 + ) + + _logger.info(f"Stage [1] pruned result: number of valid PTD configurations [{len(stage_1_valid_ptd_configs)}]") + for cfg in stage_1_valid_ptd_configs: + _logger.info(f"Stage [1] pruned config: TP=[{cfg.tp}] PP=[{cfg.pp}] LAYERS_PER_VPP=[{cfg.layers_per_vpp}] DP=[{cfg.dp}] CP=[{cfg.cp}] EP=[{cfg.ep}] ZeRO=[{cfg.zero1}]") + + base_context = "" + base_search_cfg = None + for cfg in generate_profiling_configs(model_config): + json_path = os.path.join(working_dir, f'{get_prof_dir(cfg)}.json') + # find ep = 1 config + if (not os.path.exists(json_path) or cfg.expert_model_parallel_size and + cfg.expert_model_parallel_size != 1): + continue + try: + with open(json_path, "rb") as file: + base_context = restricted_loads(file) + base_search_cfg = cfg + except pickle.UnpicklingError as e: + _logger.warning(f"Incorrect pickle format. UnpicklingError: {e}") + raise e + if base_context: + break + + _logger.debug(f"success print base_context = {base_context}") + uncovered_prof = [] + profile_count = [0] + + for cfg in stage_1_valid_ptd_configs: + _logger.info("====================") + _logger.info(f"Looking at:\n\n{cfg}") + mem_estimated, _ = MemoryModeling.estimate(cfg) + if mem_estimated <= device_mem_cap: + try: + perf, uncovered_prof, use_mc2 = perf_obj_function(cfg, working_dir, profile_count, re_profiling_flag) + except Exception as err: + _logger.warning(f"Search: ERROR during perf_modeling_calculation: {type(err).__name__}") + tb.print_exc() + + context = "" + json_path = os.path.join(working_dir, f'{get_prof_dir(cfg)}.json') + if not os.path.exists(json_path): + _logger.debug("success modeling context…………") + context = get_context_by_ptd_config(base_context, base_search_cfg, cfg, model_config) + else: + try: + with open(json_path, "rb") as file: + context = restricted_loads(file) + except pickle.UnpicklingError as e: + _logger.warning(f"Incorrect pickle format. UnpicklingError: {e}") + raise e + _logger.debug(f"before recompute, perf = {perf} and memory = {mem_estimated}") + _logger.debug(f"success enter recompute_solver and tp = {cfg.tensor_model_parallel_size} " + f"pp = {cfg.pipeline_model_parallel_size} " + f"layers_per_vpp={cfg.num_layers_per_virtual_pipeline_stage} " + f"dp = {cfg.data_parallel_size} cp = {cfg.context_parallel_size} " + f"ep = {cfg.expert_model_parallel_size} zero = {cfg.use_distributed_optimizer}") + need_recompute, new_perf, add_mem, recompute_layer = full_recompute_solver(device_mem_cap - mem_estimated, context, + model_config, perf, cfg) + new_memory = add_mem + mem_estimated + _logger.debug(f"after recompute, perf = {new_perf} and need_recompute = {need_recompute}") + _logger.debug(f"cur mem_estimated = {new_memory}, recompute_layer = {recompute_layer}") + + better_found = False + for i, perf_cfg in enumerate(best_perf_cfg_map): + if new_perf < perf_cfg[0]: + better_found = True + cfg.adaptive_recompute_device_swap = need_recompute + cfg.performance = new_perf + cfg.memory = new_memory + cfg.recompute_num_layers = recompute_layer + cfg.use_ascend_mc2 = use_mc2 if cfg.tensor_model_parallel_size > 1 else False + _logger.info(f"Search: SUCCESSFUL Better #{i} Config Found.") + _logger.debug(f"Performance Estimation: {new_perf}.") + best_perf_cfg_map.pop() + best_perf_cfg_map.insert(i, (new_perf, deepcopy(cfg))) + break + if not better_found: + _logger.info(f"Sub-optimal performance, next!") + + else: + _logger.info(f"OOM found, next!") + + return [cfg for _, cfg in best_perf_cfg_map], uncovered_prof + + +def get_context_by_ptd_config(base_context, base_search_cfg, search_cfg, model_config): + cur_cfg_seq_multi_mbs_div_tp_cp = (search_cfg.seq_length / search_cfg.tensor_model_parallel_size / + search_cfg.context_parallel_size) * search_cfg.micro_batch_size + base_cfg_seq_multi_mbs_div_tp_cp = (base_search_cfg.seq_length / base_search_cfg.tensor_model_parallel_size / + base_search_cfg.context_parallel_size) * base_search_cfg.micro_batch_size + cur_cfg_resize_time = cur_cfg_seq_multi_mbs_div_tp_cp / base_cfg_seq_multi_mbs_div_tp_cp + context = deepcopy(base_context) + + cur_experts_num = 0 if model_config.num_experts is None \ + else model_config.num_experts // search_cfg.expert_model_parallel_size + recursive_change_context(context, cur_cfg_resize_time, cur_experts_num) + + return context + + +def recursive_change_context(context, cur_cfg_resize_time, cur_experts_num): + if "memory" in context: + context['memory'] *= cur_cfg_resize_time + if 'input' in context: + context['input'] *= cur_cfg_resize_time + if 'time' in context: + context['time'] *= cur_cfg_resize_time + + check_prefix_name = 'prefix_name' in context and 'mlp' in context.get('prefix_name') + check_layer = 'layers' in context and context['layers'][0]['name'] == '0' + if check_prefix_name and check_layer: + context['layers'] = context['layers'][:cur_experts_num] + if "layers" not in context: + return + for layer_context in context["layers"]: + recursive_change_context(layer_context, cur_cfg_resize_time, cur_experts_num) + + +class ToyModel(object): + def __init__(self): + return + + +def perf_test_obj_function(search_config): + return + + +def mem_test_toy_function(search_config): + return + + +def get_first_layer_context(context): + if "memory" in context: + return context + + if "layers" not in context: + return None + for layer_context in context["layers"]: + first_layer_context = get_first_layer_context(layer_context) + if first_layer_context is not None: + return first_layer_context + return None + + +def memory_time_rate(ele): + if ele["memory"] - ele["input"] == 0: + return sys.maxsize + return ele["time"] / (ele["memory"] - ele["input"]) + + +def full_recompute_solver(oom_cap, model_context, model_cfg, perf, search_config): + if search_config.layers_per_vpp: + num_model_chunks = search_config.num_layers // search_config.layers_per_vpp // search_config.pp + layers_per_vpp = search_config.layers_per_vpp + else: + num_model_chunks = 1 + layers_per_vpp = model_cfg.num_layers // search_config.pp + warmup_micro_batchs, total_num_micro_batches = get_num_warmup_micro_batches(num_model_chunks, search_config, + model_cfg) + ret_list = [] + find_recompute_layer(model_context, ret_list) + layer_module = ret_list[0] + + release_mem = 0 + time_cost = 0 + num_layers = model_cfg.num_layers // search_config.pp + ret_list.sort(key=memory_time_rate, reverse=True) + need_recompute = True + memory_per_layer = layer_module["memory"] - layer_module["input"] + # 1.No full recompute + max_release_mem = warmup_micro_batchs * layers_per_vpp * memory_per_layer - memory_per_layer + + if max_release_mem <= oom_cap: + return False, perf - total_num_micro_batches * num_layers * layer_module["time"], max_release_mem, 0 + + if search_config.layers_per_vpp: + # 2.Situation under per pp stage and per mbs recompute layers <= layers_per_vpp + max_release_mem = (num_model_chunks - 1) * search_config.pp * layers_per_vpp * memory_per_layer + if max_release_mem <= oom_cap: + layer_calculate = (oom_cap - max_release_mem) // ((2 * search_config.pp - 1) * memory_per_layer) + release_mem += (2 * search_config.pp - 1) * layer_calculate * memory_per_layer + max_release_mem - memory_per_layer + time_cost += (num_layers - layers_per_vpp + layer_calculate) * total_num_micro_batches * layer_module["time"] + return True, perf - time_cost, release_mem, layers_per_vpp - layer_calculate + + # Only consider layers temporarily + layer_calculate = (oom_cap // (memory_per_layer * search_config.pp)) + release_mem += layer_calculate * memory_per_layer * search_config.pp + if layer_calculate < num_layers: + release_mem -= memory_per_layer + time_cost += total_num_micro_batches * layer_calculate * layer_module["time"] + return need_recompute, perf - time_cost, release_mem, num_layers - layer_calculate + + else: + layer_calculate = (oom_cap // (memory_per_layer * search_config.pp)) + release_mem += layer_calculate * memory_per_layer * search_config.pp + if layer_calculate < num_layers: + release_mem -= memory_per_layer + time_cost += total_num_micro_batches * layer_calculate * layer_module["time"] + return need_recompute, perf - time_cost, release_mem, num_layers - layer_calculate + + +def get_num_warmup_micro_batches(num_model_chunks, search_config, model_cfg): + pipeline_parallel_size = search_config.pp + data_parallel_size = search_config.dp + num_microbatches = model_cfg.gbs // (search_config.mbs * data_parallel_size) + + if pipeline_parallel_size <= 1: + return 1, num_microbatches + + pipeline_parallel_size = pipeline_parallel_size + pipeline_parallel_rank = 0 + total_num_micro_batches = num_microbatches * num_model_chunks + if num_model_chunks == 1: + num_warmup_micro_batches = pipeline_parallel_size - pipeline_parallel_rank - 1 + + else: + num_warmup_micro_batches = (pipeline_parallel_size - pipeline_parallel_rank - 1) * 2 + num_warmup_micro_batches += (num_model_chunks - 1) * pipeline_parallel_size + num_warmup_micro_batches += 1 + num_warmup_micro_batches = min(num_warmup_micro_batches, total_num_micro_batches) + return num_warmup_micro_batches, num_microbatches + + +def find_recompute_layer(context, ret_list): + if "memory" in context and context["input"] <= context["memory"]: + layer_dict = {"memory": context["memory"], "time": context["time"], + "input": context["input"], "prefix_name": context["prefix_name"], "name": context["name"]} + ret_list.append(layer_dict) + + # layer module the first to be appened + if "layers" not in context: + return + for layer_context in context["layers"]: + find_recompute_layer(layer_context, ret_list) diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/module/search/stage_1_prune.py b/model/train/yoco_moe/mindspeed/auto_tuning/module/search/stage_1_prune.py new file mode 100644 index 000000000..16236ea16 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/module/search/stage_1_prune.py @@ -0,0 +1,127 @@ +from typing import List +from dataclasses import replace + +from mindspeed.auto_tuning.module.hardware import Hardware +from mindspeed.auto_tuning.config.model_config import ModelConfig +from mindspeed.auto_tuning.config.search_config import SearchConfig + + +def stage_1_discrete_search_space_prune( + mcfg: ModelConfig, + pod_limit=0, + model_in_pod=False, + device_fluctuation_down_ratio=0 +) -> List[SearchConfig]: + """ + Stage 1 prune is without any modeling. + This function prunes the search space for a distributed training job based on given constraints. + + Parameters: + layer_number (int): The total number of layers. + total_device_number (int): The total number of devices. + micro_batch_number (int): The number of micro-batches. + expert_number (int): The number of experts. + pod_limit (int, optional): The maximum number of devices in a super pod. Default is 0. + model_in_pod (bool, optional): If True, the product of tp and pp should be less than or equal to pod_limit. Default is False. + device_fluctuation_ratio (float, optional): The ratio of device fluctuation. Must be between 0 and 1. Default is 0. + + Returns: + list of dict: A list of valid configurations (tp, cp, pp, dp, ep, zero which stored as a dict) that satisfy all constraints. + """ + + num_devices = mcfg.global_world_size + device_type = Hardware().device_type + + valid_configs: List[SearchConfig] = list() + + # Iterate over all possible combinations of tp, cp, pp, dp, ep and zero + # Prune tp based on device_type, tp = 1 or 8 only if running on 910B + tp_search_list = [2 ** i for i in range(num_devices + 1)] + if "910B" in device_type: + tp_search_list = [1, 8] + for tp in tp_search_list: + + # Check if tp is less than or equal to pod_limit + if 0 < pod_limit < tp: + continue + + for cp in range(1, num_devices // tp + 1): + + # Check cp long sequence based on device_type + if cp > 1: + if ("910B" in device_type) and \ + ((mcfg.seq_length // cp) < 8 * 1024): + continue + if ("910_9" in device_type) and \ + ((mcfg.seq_length // cp) < 4 * 1024): + continue + + for pp in range(1, num_devices // (tp * cp) + 1): + + # Check if tp * pp is less than or equal to pod_limit + if model_in_pod and tp * pp > pod_limit: + continue + # Check if layer_number is divisible by pp + if mcfg.num_layers % pp != 0: + continue + + for dp in range(1, num_devices // (tp * cp * pp) + 1): + + # Check device number compatibility + if device_fluctuation_down_ratio > 0: + if not ((1 - device_fluctuation_down_ratio) * num_devices < tp * cp * pp * dp <= num_devices): + continue + else: + if tp * cp * pp * dp != num_devices: + continue + # Check if micro_batch_number is divisible by dp + if mcfg.num_micro_batches % dp != 0: + continue + # Check if micro_batch_number / (pp * dp) is greater than 1 + if mcfg.num_micro_batches // (pp * dp) <= 1: + continue + + num_experts = mcfg.num_experts if mcfg.num_experts else 1 + for ep in range(1, min(cp * dp, num_experts) + 1): + + # Check if (ep | cp * dp) and (ep | expert_number) + if ((cp * dp) % ep != 0) or (num_experts % ep != 0): + continue + + layers_per_vpp_search_domain = [None] + # Search vpp only if pp is enabled + if pp > 1: + # Search domain drops the last possible value (layer_number // pp) + # due to the constraint $layers_per_vpp * pp != layer_number$ + layers_per_vpp_search_domain += \ + [x for x in range(1, mcfg.num_layers // pp)] + for layers_per_vpp in layers_per_vpp_search_domain: + + # Check if $layers_per_vpp$ not None and $layers_per_vpp * pp | layer_number$ + if layers_per_vpp and \ + mcfg.num_layers % (layers_per_vpp * pp) != 0: + continue + + for mbs in [1, 2]: + cfg_zero0 = SearchConfig() + cfg_zero0.copy_from_config(mcfg) + cfg_zero0.tensor_model_parallel_size = tp + cfg_zero0.context_parallel_size = cp + cfg_zero0.pipeline_model_parallel_size = pp + cfg_zero0.num_layers_per_virtual_pipeline_stage = \ + layers_per_vpp + cfg_zero0.use_distributed_optimizer = False + cfg_zero0.micro_batch_size = mbs + if mcfg.is_moe(): + cfg_zero0.expert_model_parallel_size = ep + cfg_zero0.normalize() + + valid_configs.append(cfg_zero0) + + # When (dp * cp > 1), zero can be 1; add this config to the list + if dp * cp > 1: + cfg_zero1 = replace(cfg_zero0, + use_distributed_optimizer=True) + valid_configs.append(cfg_zero1) + + return valid_configs diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/utils/__init__.py b/model/train/yoco_moe/mindspeed/auto_tuning/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/utils/dtype.py b/model/train/yoco_moe/mindspeed/auto_tuning/utils/dtype.py new file mode 100644 index 000000000..3fd388597 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/utils/dtype.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class DTYPE(Enum): + fp16 = ("fp16", 2) + fp32 = ("fp32", 4) + bf16 = ("bf16", 2) diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/utils/file_utils.py b/model/train/yoco_moe/mindspeed/auto_tuning/utils/file_utils.py new file mode 100644 index 000000000..798fa12de --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/utils/file_utils.py @@ -0,0 +1,9 @@ +import os + + +def check_file_size(file): + max_file_size = 5 * 1024 * 1024 * 1024 + if os.fstat(file.fileno()).st_size <= max_file_size: + return + else: + raise IOError("file too large to read") diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/utils/logger.py b/model/train/yoco_moe/mindspeed/auto_tuning/utils/logger.py new file mode 100644 index 000000000..f2685083d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/utils/logger.py @@ -0,0 +1,41 @@ +from typing import Optional, Set +import logging +import os +from sys import stdout + +_LOGGERS: Set[str] = set() +_LOG_FMT = "[%(levelname)s] %(name)s: %(message)s" +_LOG_LEVEL = logging.INFO +_LOGGER_NAME_PREFIX = "auto-tuning" + + +def init_logger(level: str = "info"): + global _LOG_LEVEL + if level == "warning": + _LOG_LEVEL = logging.WARNING + elif level == "debug": + _LOG_LEVEL = logging.DEBUG + else: + _LOG_LEVEL = logging.INFO + + for name in _LOGGERS: + logger_name = f"{_LOGGER_NAME_PREFIX}.{name}" + logger = logging.getLogger(name=logger_name) + logger.setLevel(_LOG_LEVEL) + for handler in logger.handlers: + handler.setFormatter(logging.Formatter(fmt=_LOG_FMT)) + + +def get_logger(name: str): + global _LOGGERS + logger_name = f"{_LOGGER_NAME_PREFIX}.{name}" + if name not in _LOGGERS: + logger = logging.getLogger(name=logger_name) + logger.propagate = False + logger.setLevel(_LOG_LEVEL) + logger.addHandler(logging.StreamHandler(stream=stdout)) + for handler in logger.handlers: + handler.setFormatter(logging.Formatter(fmt=_LOG_FMT)) + _LOGGERS.add(name) + return logger + return logging.getLogger(logger_name) diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/utils/mem_utils.py b/model/train/yoco_moe/mindspeed/auto_tuning/utils/mem_utils.py new file mode 100644 index 000000000..e5047b7c7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/utils/mem_utils.py @@ -0,0 +1,29 @@ +from typing import Callable, List + + +def mem_b_to_kb(n: float, d: int = 2) -> float: + return round(n / pow(1024, 1), d) + + +def mem_kb_to_b(n: float, d: int = 2) -> float: + return round(n * pow(1024, 1), d) + + +def mem_b_to_mb(n: float, d: int = 2) -> float: + return round(n / pow(1024, 2), d) + + +def mem_mb_to_b(n: float, d: int = 2) -> float: + return round(n * pow(1024, 2), d) + + +def mem_b_to_gb(n: float, d: int = 2) -> float: + return round(n / pow(1024, 3), d) + + +def mem_gb_to_b(n: float, d: int = 2) -> float: + return round(n * pow(1024, 3), d) + + +def mem_convert_list(ns: List[float], func: Callable[[float, int], float], d: int = 2) -> List[float]: + return [func(n, d) for n in ns] diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/utils/restricted_unpickler.py b/model/train/yoco_moe/mindspeed/auto_tuning/utils/restricted_unpickler.py new file mode 100644 index 000000000..2a5be908e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/utils/restricted_unpickler.py @@ -0,0 +1,18 @@ +import pickle +import importlib +from mindspeed.auto_tuning.utils.file_utils import check_file_size + + +class _RestrictedUnpickler(pickle.Unpickler): + def find_class(self, module: str, name: str): + if module.startswith("mindspeed.auto_tuning"): + module_ = importlib.import_module(module) + clazz = getattr(module_, name) + if isinstance(clazz, type): + return clazz + raise pickle.UnpicklingError("global '%s.%s' is forbidden" % (module, name)) + + +def restricted_loads(s): + check_file_size(s) + return _RestrictedUnpickler(s).load() diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/__init__.py b/model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/irunner.py b/model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/irunner.py new file mode 100644 index 000000000..f88d46f6c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/irunner.py @@ -0,0 +1,22 @@ +from typing import Dict +import abc + +_Env = Dict[str, str] + + +class IRunner(metaclass=abc.ABCMeta): + + @classmethod + def __subclasshook__(cls, subclass: type) -> bool: + if cls is IRunner: + return hasattr(subclass, "get_base_env") and \ + hasattr(subclass, "run") + return NotImplemented + + @abc.abstractmethod + def get_base_env(self) -> _Env: + return NotImplemented + + @abc.abstractmethod + def run(self, env: _Env) -> int: + return NotImplemented diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/model_executor.py b/model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/model_executor.py new file mode 100644 index 000000000..88bd15af7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/model_executor.py @@ -0,0 +1,267 @@ +from typing import Dict, Optional, Tuple +import os +import stat +from enum import Enum + +import pickle + +from mindspeed.auto_tuning.config.search_config import SearchConfig +from mindspeed.auto_tuning.utils.runner.irunner import _Env, IRunner + + +_Argv = Dict[str, Optional[str]] + + +class ExecutorFlag(Enum): + RUN = 0 + PARSE_ARGS = 1 + PARSE_MODEL = 2 + PROFILE = 3 + + +class ModelExecutor: + """ + Execute the model with different configs. + """ + MODIFIED_ARGV_FILENAME = "auto_tuning_modified_argv.json" + PARSE_ARGS_ENV = "OOTB_OPTIMIZER_PARSE_ARGS" + PARSE_MODEL_ENV = "OOTB_OPTIMIZER_PARSE_MODEL" + PROFILING_ENV = "OOTB_OPTIMIZER_PROFILING" + MODIFIED_ARGV_PATH_ENV = "OOTB_OPTIMIZER_MODIFIED_ARGV_PATH" + ENABLED_ENV_MARKER = "TRUE" + + def __init__(self, + runner: IRunner, + num_layers_config="--num-layers", + num_experts_config="--num-experts", + seq_length_config="--seq-length", + max_position_embeddings_config="--max-position-embeddings", + micro_batch_size_config="--micro-batch-size", + global_batch_size_config="--global-batch-size", + recompute_granularity_config="--recompute-granularity", + recompute_method_config="--recompute-method", + recompute_num_layers_config="--recompute-num-layers", + adaptive_recompute_device_swap_config="--adaptive-recompute-device-swap", + enable_token_rearrange_opt_config="--enable-token-rearrange-opt", + tensor_model_parallel_size_config="--tensor-model-parallel-size", + pipeline_model_parallel_size_config="--pipeline-model-parallel-size", + num_layers_per_virtual_pipeline_stage_config="--num-layers-per-virtual-pipeline-stage", + expert_model_parallel_size_config="--expert-model-parallel-size", + context_parallel_size_config="--context-parallel-size", + use_distributed_optimizer_config="--use-distributed-optimizer", + use_ascend_mc2_config="--use-ascend-mc2", + train_iters_config="--train-iters", + profile_config="--profile", + profile_step_start_config="--profile-step-start", + profile_step_end_config="--profile-step-end", + profile_ranks_config="--profile-ranks", + profile_level_config="--profile-level", + profile_with_cpu_config="--profile-with-cpu", + profile_with_stack_config="--profile-with-stack", + profile_with_memory_config="--profile-with-memory", + profile_record_shapes_config="--profile-record-shapes", + profile_save_path_config="--profile-save-path" + ) -> None: + self.runner = runner + self.num_layers_config = num_layers_config + self.num_experts_config = num_experts_config + self.seq_length_config = seq_length_config + self.max_position_embeddings_config = max_position_embeddings_config + self.micro_batch_size_config = micro_batch_size_config + self.global_batch_size_config = global_batch_size_config + self.recompute_granularity_config = recompute_granularity_config + self.recompute_method_config = recompute_method_config + self.recompute_num_layers_config = recompute_num_layers_config + self.adaptive_recompute_device_swap_config = adaptive_recompute_device_swap_config + self.enable_token_rearrange_opt_config = enable_token_rearrange_opt_config + self.tensor_model_parallel_size_config = tensor_model_parallel_size_config + self.pipeline_model_parallel_size_config = pipeline_model_parallel_size_config + self.num_layers_per_virutal_pipeline_stage_config = num_layers_per_virtual_pipeline_stage_config + self.expert_model_parallel_size_config = expert_model_parallel_size_config + self.context_parallel_size_config = context_parallel_size_config + self.use_distributed_optimizer_config = use_distributed_optimizer_config + self.use_ascend_mc2_config = use_ascend_mc2_config + self.train_iters_config = train_iters_config + self.profile_config = profile_config + self.profile_step_start_config = profile_step_start_config + self.profile_step_end_config = profile_step_end_config + self.profile_ranks_config = profile_ranks_config + self.profile_level_config = profile_level_config + self.profile_with_cpu_config = profile_with_cpu_config + self.profile_with_stack_config = profile_with_stack_config + self.profile_with_memory_config = profile_with_memory_config + self.profile_record_shapes_config = profile_record_shapes_config + self.profile_save_path_config = profile_save_path_config + + def execute(self, + working_dir: str, + output_filename: str = str(), + cfg: Optional[SearchConfig] = None, + flag: ExecutorFlag = ExecutorFlag.RUN + ) -> int: + env = self.runner.get_base_env() + self._prepare_envvars(env, flag) + + modified_argv_path = os.path.join(working_dir, self.MODIFIED_ARGV_FILENAME) + + self._prepare_modified_argv_envvars(env, modified_argv_path) + + modified_argv = self._prepare_modified_argv(cfg, working_dir, output_filename, flag) + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + mode = stat.S_IWUSR | stat.S_IRUSR + with os.fdopen(os.open(modified_argv_path, flags, mode=mode), 'wb') as f: + pickle.dump(modified_argv, f) + + returncode = self.runner.run(env) + + return returncode + + def _prepare_envvars(self, + env: _Env, + flag: ExecutorFlag + ) -> _Env: + env.pop(self.PARSE_ARGS_ENV, None) + env.pop(self.PARSE_MODEL_ENV, None) + env.pop(self.PROFILING_ENV, None) + + if flag == ExecutorFlag.PARSE_ARGS: + env.update({self.PARSE_ARGS_ENV: self.ENABLED_ENV_MARKER}) + elif flag == ExecutorFlag.PARSE_MODEL: + env.update({self.PARSE_MODEL_ENV: self.ENABLED_ENV_MARKER}) + elif flag == ExecutorFlag.PROFILE: + env.update({self.PROFILING_ENV: self.ENABLED_ENV_MARKER}) + + return env + + def _prepare_modified_argv_envvars(self, + env: _Env, + modified_argv_path: str + ) -> _Env: + env.update({self.MODIFIED_ARGV_PATH_ENV: modified_argv_path}) + + return env + + def _prepare_modified_argv( + self, + cfg: Optional[SearchConfig], + working_dir: str, + output_filename: str, + flag: ExecutorFlag + ) -> Tuple[_Argv, _Argv]: + enabled_argv: _Argv = dict() + disabled_argv: _Argv = dict() + if cfg: + cfg.normalize() + + def _modify_model_argv(): + if self.recompute_granularity_config and self.recompute_method_config and self.recompute_num_layers_config: + if cfg.is_full_recompute(): + enabled_argv.update({self.recompute_granularity_config: cfg.recompute_granularity}) + enabled_argv.update({self.recompute_method_config: cfg.recompute_method}) + enabled_argv.update({self.recompute_num_layers_config: str(cfg.recompute_num_layers)}) + else: + disabled_argv.update({self.recompute_granularity_config: str()}) + disabled_argv.update({self.recompute_method_config: str()}) + disabled_argv.update({self.recompute_num_layers_config: str()}) + + if self.num_layers_config: + enabled_argv.update({self.num_layers_config: str(cfg.num_layers)}) + + if self.num_experts_config: + if cfg.num_experts: + enabled_argv.update({self.num_experts_config: str(cfg.num_experts)}) + else: + disabled_argv.update({self.num_experts_config: str()}) + + if self.seq_length_config: + enabled_argv.update({self.seq_length_config: str(cfg.seq_length)}) + enabled_argv.update({self.max_position_embeddings_config: str(cfg.seq_length)}) + + if self.micro_batch_size_config: + enabled_argv.update({self.micro_batch_size_config: str(cfg.micro_batch_size)}) + + if self.global_batch_size_config: + enabled_argv.update({self.global_batch_size_config: str(cfg.global_batch_size)}) + + if self.adaptive_recompute_device_swap_config: + if cfg.adaptive_recompute_device_swap: + enabled_argv.update({self.adaptive_recompute_device_swap_config: None}) + else: + disabled_argv.update({self.adaptive_recompute_device_swap_config: None}) + + if self.enable_token_rearrange_opt_config: + if cfg.enable_token_rearrange_opt: + enabled_argv.update({self.enable_token_rearrange_opt_config: None}) + else: + disabled_argv.update({self.enable_token_rearrange_opt_config: None}) + + if self.use_ascend_mc2_config: + if cfg.use_ascend_mc2: + enabled_argv.update({self.use_ascend_mc2_config: None}) + else: + disabled_argv.update({self.use_ascend_mc2_config: None}) + + def _modify_parallel_argv(): + if self.tensor_model_parallel_size_config: + enabled_argv.update({self.tensor_model_parallel_size_config: str(cfg.tensor_model_parallel_size)}) + + if self.pipeline_model_parallel_size_config: + enabled_argv.update({self.pipeline_model_parallel_size_config: str(cfg.pipeline_model_parallel_size)}) + + if self.num_layers_per_virutal_pipeline_stage_config: + if cfg.num_layers_per_virtual_pipeline_stage: + enabled_argv.update({self.num_layers_per_virutal_pipeline_stage_config: + str(cfg.num_layers_per_virtual_pipeline_stage)}) + else: + disabled_argv.update({self.num_layers_per_virutal_pipeline_stage_config: str()}) + + if self.expert_model_parallel_size_config: + if cfg.expert_model_parallel_size: + enabled_argv.update({self.expert_model_parallel_size_config: str(cfg.expert_model_parallel_size)}) + else: + disabled_argv.update({self.expert_model_parallel_size_config: str()}) + + if self.context_parallel_size_config: + enabled_argv.update({self.context_parallel_size_config: str(cfg.context_parallel_size)}) + + if self.use_distributed_optimizer_config: + if cfg.use_distributed_optimizer: + enabled_argv.update({self.use_distributed_optimizer_config: None}) + else: + disabled_argv.update({self.use_distributed_optimizer_config: None}) + + def _modify_profile_argv(): + if cfg.profile: + enabled_argv.update({self.train_iters_config: str(cfg.train_iters)}) + enabled_argv.update({self.profile_config: None}) + enabled_argv.update({self.profile_step_start_config: str(cfg.profile_step_start)}) + enabled_argv.update({self.profile_step_end_config: str(cfg.profile_step_end)}) + enabled_argv.update({self.profile_ranks_config: str(cfg.profile_ranks)}) + enabled_argv.update({self.profile_level_config: cfg.profile_level}) + if cfg.profile_with_cpu: + enabled_argv.update({self.profile_with_cpu_config: None}) + else: + disabled_argv.update({self.profile_with_cpu_config: None}) + if cfg.profile_with_stack: + enabled_argv.update({self.profile_with_stack_config: None}) + else: + disabled_argv.update({self.profile_with_stack_config: None}) + if cfg.profile_with_memory: + enabled_argv.update({self.profile_with_memory_config: None}) + else: + enabled_argv.update({self.profile_with_memory_config: None}) + if cfg.profile_record_shapes: + enabled_argv.update({self.profile_record_shapes_config: None}) + else: + disabled_argv.update({self.profile_record_shapes_config: None}) + + _modify_model_argv() + _modify_parallel_argv() + _modify_profile_argv() + + if flag == ExecutorFlag.PARSE_ARGS: + enabled_argv.update({self.profile_save_path_config: working_dir}) + elif flag == ExecutorFlag.PARSE_MODEL or flag == ExecutorFlag.PROFILE: + enabled_argv.update({self.profile_save_path_config: os.path.join(working_dir, output_filename)}) + + return enabled_argv, disabled_argv diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/torchrun_runner.py b/model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/torchrun_runner.py new file mode 100644 index 000000000..ac9db7d2a --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/utils/runner/torchrun_runner.py @@ -0,0 +1,71 @@ +import os +import sys +import subprocess + +from megatron.training import get_args + +from mindspeed.auto_tuning.utils.logger import get_logger +from mindspeed.auto_tuning.utils.runner.irunner import _Env, IRunner + +_AUTO_TUNING_ARGS = "--auto-tuning" +_logger = get_logger("runner") + + +class TorchRunRunner(IRunner): + + def __init__(self) -> None: + super().__init__() + + def get_base_env(self) -> _Env: + return os.environ.copy() + + def run(self, env: _Env) -> int: + + args = get_args() + argv: list = sys.argv[1:] + auto_tuning_filter_args_switch = ["--use-ascend-mc2", "--swap-attention", + "--ampipe-tp-sp-comm-overlap", + "--use-pipe-experts", "--pipe-experts-multi-stream", + "--recompute-in-advance", "--recompute-in-bubble", "--use-nanopipe"] + auto_tuning_filter_args_config = ["--ampipe-degree", "--pipe-experts-multi-data"] + + if _AUTO_TUNING_ARGS in sys.argv: + argv[argv.index("--tensor-model-parallel-size") + 1] = '8' + argv[argv.index("--pipeline-model-parallel-size") + 1] = '1' + argv[argv.index("--context-parallel-size") + 1] = '1' + if "--num-layers-per-virtual-pipeline-stage" in argv: + vpp_index = argv.index("--num-layers-per-virtual-pipeline-stage") + argv.pop(vpp_index + 1) + argv.pop(vpp_index) + if "--expert-model-parallel-size" in argv: + argv[argv.index("--expert-model-parallel-size") + 1] = '1' + if "--use-ascend-mc2" in argv: + argv.pop(argv.index("--use-ascend-mc2")) + for feature_args in auto_tuning_filter_args_switch: + if feature_args in argv: + argv.pop(argv.index(feature_args)) + for feature_args in auto_tuning_filter_args_config: + if feature_args in argv: + args_index = argv.index(feature_args) + argv.pop(args_index + 1) + argv.pop(args_index) + + while _AUTO_TUNING_ARGS in argv: + pos = argv.index(_AUTO_TUNING_ARGS) + argv.pop(pos) + + command = [ + 'torchrun', + '--nproc_per_node', str(args.nproc_per_node), + '--nnodes', str(args.nnodes), + '--node-rank', str(args.node_rank), + '--master_addr', str(args.master_addr), + '--master_port', str(args.master_port), + str(sys.argv[0]) + ] + argv + process = subprocess.Popen(command, shell=False, preexec_fn=lambda: os.setpgrp(), env=env) + process.wait() + returncode = process.returncode + _logger.info(returncode) + + return returncode diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/utils/singleton.py b/model/train/yoco_moe/mindspeed/auto_tuning/utils/singleton.py new file mode 100644 index 000000000..a36847408 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/utils/singleton.py @@ -0,0 +1,14 @@ +import threading + + +class Singleton(type): + _instances = dict() + _lock = threading.Lock() + + def __call__(cls, *args, **kwargs): + if cls not in cls._instances: + with cls._lock: + if cls not in cls._instances: + instance = super(Singleton, cls).__call__(*args, **kwargs) + cls._instances[cls] = instance + return cls._instances[cls] diff --git a/model/train/yoco_moe/mindspeed/auto_tuning/utils/utils.py b/model/train/yoco_moe/mindspeed/auto_tuning/utils/utils.py new file mode 100644 index 000000000..0ba294d98 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/auto_tuning/utils/utils.py @@ -0,0 +1,34 @@ +from mindspeed.auto_tuning.module.hardware import Hardware +from mindspeed.auto_tuning.config.model_config import ModelConfig +from mindspeed.auto_tuning.config.search_config import SearchConfig + + +def get_tp_for_profiling() -> int: + tp = Hardware().num_devices // 4 + if "910B" in Hardware().device_type: + tp = min(tp, 8) + return tp + + +def get_seq_length_for_profiling(model_cfg: ModelConfig) -> int: + if model_cfg.disable_cp_flag: + return model_cfg.seq_length + return min(model_cfg.seq_length, 32 * 1024) + + +def get_prof_dir(cfg: SearchConfig, re_profile=False) -> str: + prof_dir = "auto_tuning_profiling" + prof_dir += f"_{cfg.tp}tp" + prof_dir += f"_{cfg.dp}dp" + prof_dir += f"_{cfg.pp}pp" + prof_dir += f"_{cfg.cp}cp" + prof_dir += f"_{cfg.mbs}mbs" + if cfg.is_moe(): + prof_dir += f"_{cfg.ep}ep" + prof_dir += f"_{cfg.num_experts}experts" + if cfg.use_ascend_mc2: + prof_dir += f"_mc2" + prof_dir += f"_{cfg.seq_length}seq" + if re_profile: + prof_dir += f"_re_profile" + return prof_dir diff --git a/model/train/yoco_moe/mindspeed/checkpointing.py b/model/train/yoco_moe/mindspeed/checkpointing.py new file mode 100644 index 000000000..268ae2b9e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/checkpointing.py @@ -0,0 +1,412 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Input/output checkpointing.""" + +import os +import sys +from functools import wraps +import torch + +from megatron.core import mpu, tensor_parallel, dist_checkpointing +from megatron.core.optimizer.optimizer import ChainedOptimizer +from megatron.training import get_args +from megatron.training.utils import (unwrap_model, print_rank_0) + +from megatron.training.checkpointing import ( + get_rng_state, + get_checkpoint_name, + get_distributed_optimizer_checkpoint_name, + ensure_directory_exists, + get_checkpoint_tracker_filename, + read_metadata, + find_checkpoint_rank_0 +) + + +def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, + num_floating_point_operations_so_far, checkpointing_context=None): + args = get_args() + + # Only rank zero of the data parallel writes to the disk. + model = unwrap_model(model) + + ckpt_format = args.dist_ckpt_format if args.use_dist_ckpt else 'torch' + print_rank_0('saving checkpoint at iteration {:7d} to {} in {} format'.format( + iteration, args.save, ckpt_format)) + + # Collect rng state across data parallel ranks. + rng_state = get_rng_state(args.use_dist_ckpt) + + # Checkpoint name. + checkpoint_name = get_checkpoint_name(args.save, iteration, return_base_dir=args.use_dist_ckpt) + + # Save distributed optimizer's custom parameter state. + if args.use_distributed_optimizer and not args.no_save_optim and optimizer is not None and not args.use_dist_ckpt: + optim_checkpoint_name = \ + get_distributed_optimizer_checkpoint_name(checkpoint_name) + ensure_directory_exists(optim_checkpoint_name) + optimizer.save_parameter_state(optim_checkpoint_name) + + async_save_request = None + if args.async_save: + if not args.use_dist_ckpt: + raise NotImplementedError('Async checkpoint save not implemented for legacy checkpoints') + elif args.dist_ckpt_format != 'torch_dist': + raise NotImplementedError( + f'Async checkpoint save not implemented for {args.dist_ckpt_format} distributed checkpoint format') + + # Collect args, model, RNG. + if not torch.distributed.is_initialized() \ + or mpu.get_data_modulo_expert_parallel_rank() == 0 \ + or args.use_dist_ckpt: + + optim_sd_kwargs = {} + if args.use_dist_ckpt and args.use_distributed_optimizer: + optim_sd_kwargs['sharding_type'] = ('fully_sharded_bucket_space' + if args.ckpt_fully_parallel_save + else 'dp_zero_gather_scatter') + print_rank_0(f'Storing distributed optimizer sharded state of type {optim_sd_kwargs["sharding_type"]}') + state_dict = generate_state_dict(args, model, optimizer, opt_param_scheduler, rng_state, + args.use_dist_ckpt, iteration, optim_sd_kwargs=optim_sd_kwargs) + + state_dict['num_floating_point_operations_so_far'] = num_floating_point_operations_so_far + if args.use_dist_ckpt: + if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0: + ensure_directory_exists(checkpoint_name, check_parent=False) + validate_sharding_integrity = True + save_strategy = (checkpointing_context or {}).get('save_strategy', + get_default_save_sharded_strategy(args.dist_ckpt_format)) + if args.ckpt_fully_parallel_save: + if checkpointing_context is not None and 'save_strategy' in checkpointing_context: + # Already saved once before - don't need to rerun sharding validation + validate_sharding_integrity = not args.ckpt_assume_constant_structure + else: + save_strategy = FullyParallelSaveStrategyWrapper(save_strategy, mpu.get_data_parallel_group( + with_context_parallel=True), + args.ckpt_assume_constant_structure) + # Store save strategy for future checkpoint saves + if checkpointing_context is not None: + checkpointing_context['save_strategy'] = save_strategy + async_save_request = dist_checkpointing.save(state_dict, checkpoint_name, save_strategy, + async_sharded_save=args.async_save) + else: + # Save. + if args.use_ema: + ema_state_dict = {k: v for k, v in state_dict.items() if k.startswith('ema')} + state_dict = {k: v for k, v in state_dict.items() if not k.startswith('ema')} + + ensure_directory_exists(checkpoint_name) + torch.save(state_dict, checkpoint_name) + + if args.use_ema: + ema_state_dict = {k.replace('ema', 'model'): v for k, v in ema_state_dict.items()} + torch.save(ema_state_dict, checkpoint_name + ".ema") + + if not args.async_save: + assert async_save_request is None + # Wait so everyone is done (necessary) + if torch.distributed.is_initialized(): + torch.distributed.barrier() + + # And update the latest iteration + if not torch.distributed.is_initialized() \ + or torch.distributed.get_rank() == 0: + tracker_filename = get_checkpoint_tracker_filename(args.save) + + def iter_finalize_fn(): + with open(tracker_filename, 'w') as f: + f.write(str(iteration)) + print_rank_0(' successfully saved checkpoint from iteration {:7d} to {}' + .format(iteration, args.save)) + if args.log_progress and args.async_save: + append_to_progress_log(f'Saved async checkpoint\tIteration: {iteration}', + barrier=False) + + if args.async_save: + assert async_save_request is not None + async_save_request.add_finalize_fn(iter_finalize_fn) + else: + iter_finalize_fn() + + if args.async_save: + schedule_async_save(async_save_request) + print_rank_0(' scheduled an async checkpoint save at iteration {:7d} to {}' \ + .format(iteration, args.save)) + + # Wait so everyone is done (not necessary) + if torch.distributed.is_initialized(): + torch.distributed.barrier() + + +def generate_state_dict(args, model, optimizer, opt_param_scheduler, + rng_state, use_dist_ckpt=False, iteration=None, + optim_sd_kwargs=None): + # Arguments, iteration, and model. + state_dict = {} + ema_state_dict = {} + state_dict['args'] = args + state_dict['checkpoint_version'] = 3.0 + if iteration is not None: + state_dict['iteration'] = iteration + + if len(model) == 1: + state_dict['model'] = (model[0].sharded_state_dict() + if use_dist_ckpt else + model[0].state_dict_for_save_checkpoint()) + else: + for i in range(len(model)): + mpu.set_virtual_pipeline_model_parallel_rank(i) + state_dict['model%d' % i] = ( + model[i].sharded_state_dict() + if use_dist_ckpt else + model[i].state_dict_for_save_checkpoint()) + + if args.use_ema: + if len(model) == 1: + state_dict['ema'] = {k: v for k, v in state_dict['model'].items() if k.startswith('ema')} + state_dict['model'] = {k: v for k, v in state_dict['model'].items() if not k.startswith('ema')} + else: + for i in range(len(model)): + mpu.set_virtual_pipeline_model_parallel_rank(i) + state_dict['ema%d' % i] = {k.replace('ema.', ''): v for k, v in state_dict['model%d' % i].items() if + k.startswith('ema')} + state_dict['model%d' % i] = {k: v for k, v in state_dict['model%d' % i].items() if + not k.startswith('ema')} + + # Optimizer stuff. + if not args.no_save_optim: + if optimizer is not None: + state_dict['optimizer'] = (optimizer.sharded_state_dict(state_dict, **(optim_sd_kwargs or {})) + if use_dist_ckpt else + optimizer.state_dict()) + if opt_param_scheduler is not None: + state_dict['opt_param_scheduler'] = \ + opt_param_scheduler.state_dict() + # RNG states. + if not args.no_save_rng: + state_dict["rng_state"] = rng_state + return state_dict + + +def _load_base_checkpoint(load_dir, rank0=False, sharded_state_dict=None, + exit_on_missing_checkpoint=False, checkpoint_step=None): + """ Load the base state_dict from the given directory + + If rank0 is true, just loads rank 0 checkpoint, ignoring arguments. + """ + args = get_args() + + # Read the tracker file and set the iteration. + tracker_filename = get_checkpoint_tracker_filename(load_dir) + + # If no tracker file, return nothing + if not os.path.isfile(tracker_filename): + if not rank0: + print_rank_0('WARNING: could not find the metadata file {} '.format( + tracker_filename)) + print_rank_0(' will not load any checkpoints and will start from ' + 'random') + + # Conditionally exit if checkpoint not found. + if exit_on_missing_checkpoint: + print_rank_0(">> '--exit-on-missing-checkpoint' set ... exiting. <<") + if torch.distributed.is_initialized(): + torch.distributed.barrier() + sys.exit() + + return None, "", False + + # Otherwise, read the tracker file and either set the iteration or + # mark it as a release checkpoint. + if checkpoint_step is not None: + iteration = checkpoint_step + release = False + else: + iteration, release = read_metadata(tracker_filename) + + # Checkpoint. + if rank0: + checkpoint_name = find_checkpoint_rank_0(load_dir, iteration, release) + is_dist_ckpt = checkpoint_name is not None and dist_checkpointing.check_is_distributed_checkpoint( + checkpoint_name) + else: + checkpoint_name = get_checkpoint_name(load_dir, iteration, release, + return_base_dir=True) + is_dist_ckpt = dist_checkpointing.check_is_distributed_checkpoint(checkpoint_name) + if not is_dist_ckpt: + checkpoint_name = get_checkpoint_name(load_dir, iteration, release, + return_base_dir=False) + dist_infix = "distributed " if is_dist_ckpt else "" + if release: + print_rank_0(f' loading release {dist_infix}checkpoint from {load_dir}') + else: + print_rank_0(f' loading {dist_infix}checkpoint from {load_dir} at iteration {iteration}') + + # Load the checkpoint. + if is_dist_ckpt: + if rank0: + state_dict = dist_checkpointing.load_common_state_dict(checkpoint_name) + return state_dict, checkpoint_name, release + + # at this point args are available + args = get_args() + if sharded_state_dict is None: + assert not args.auto_detect_ckpt_format and not args.use_dist_ckpt, ( + args.auto_detect_ckpt_format, args.use_dist_ckpt) + raise RuntimeError( + 'Detected load from a distributed checkpoint, but neither --use-dist-ckpt nor --auto-detect-ckpt-format is set.') + + load_strategy = get_default_load_sharded_strategy(checkpoint_name) + if args.ckpt_fully_parallel_load: + load_strategy = FullyParallelLoadStrategyWrapper(load_strategy, + mpu.get_data_parallel_group(with_context_parallel=True)) + state_dict = dist_checkpointing.load(sharded_state_dict, checkpoint_name, load_strategy) + return state_dict, checkpoint_name, release + + try: + state_dict = torch.load(checkpoint_name, map_location='cpu') + try: + args = get_args() + if not args.use_ema: + return state_dict, checkpoint_name, release + + len_model = sum(1 for key in state_dict if key.startswith('model')) + ema_state_dict = torch.load(checkpoint_name + ".ema", map_location='cpu') + + if len(ema_state_dict) == 0: + return state_dict, checkpoint_name, release + + if len_model == 1: + ema_state_dict['model'] = {f'ema.{k}': v for k, v in ema_state_dict['model'].items()} + state_dict['model'].update(ema_state_dict['ema']) + else: + for i in range(len_model): + ema_state_dict['model%d' % i] = {f'ema.{k}': v for k, v in ema_state_dict['model%d' % i].items()} + state_dict['model%d' % i].update(ema_state_dict['model%d' % i]) + except BaseException as e: + print_rank_0('could not load the ema checkpoint, continue without ema checkpoint') + print_rank_0(e) + ema_state_dict = {} + except ModuleNotFoundError: + from megatron.legacy.fp16_deprecated import loss_scaler + # For backward compatibility. + if not rank0: + print_rank_0(' > deserializing using the old code structure ...') + sys.modules['fp16.loss_scaler'] = sys.modules[ + 'megatron.legacy.fp16_deprecated.loss_scaler'] + sys.modules['megatron.fp16.loss_scaler'] = sys.modules[ + 'megatron.legacy.fp16_deprecated.loss_scaler'] + sys.modules['megatron.model'] = sys.modules['megatron.legacy.model'] + state_dict = torch.load(checkpoint_name, map_location='cpu') + sys.modules.pop('fp16.loss_scaler', None) + sys.modules.pop('megatron.fp16.loss_scaler', None) + sys.modules.pop('megatron.model', None) + except BaseException as e: + print_rank_0('could not load the checkpoint') + print_rank_0(e) + sys.exit() + + return state_dict, checkpoint_name, release + + +def save_checkpoint_ema_wrapper(func): + @wraps(func) + def save_checkpoint_ema(*args, **kwargs): + model, optimizer, opt_param_scheduler = args[1:4] + state_dict = get_ema_model(model, optimizer) + setattr(opt_param_scheduler, 'ema_model_state_dict', state_dict) + func(*args[:3], opt_param_scheduler, *args[4:], **kwargs) + setattr(opt_param_scheduler, 'ema_model_state_dict', None) + + return save_checkpoint_ema + + +def generate_state_dict_ema_wrapper(func): + @wraps(func) + def generate_state_dict_ema(*args, **kwargs): + opt_param_scheduler = args[3] + state_dict = func(*args, **kwargs) + if hasattr(opt_param_scheduler, 'ema_model_state_dict'): + ema_model_state_dict = getattr(opt_param_scheduler, 'ema_model_state_dict') + state_dict.update(ema_model_state_dict) + return state_dict + + return generate_state_dict_ema + + +def get_ema_model(model, optimizer): + state_dict = dict() + global_args = get_args() + use_dist_ckpt = global_args.use_dist_ckpt + unwrapped_model = unwrap_model(model) + unchained_optimizer = unchain_optimizer(optimizer) + ema_optimizer_applier(unchained_optimizer) + if len(unwrapped_model) == 1: + state_dict['ema_model'] = (unwrapped_model[0].shared_state_dict() + if use_dist_ckpt else + unwrapped_model[0].state_dict_for_save_checkpoint()) + state_dict = ema_state_dict_to_cpu(state_dict, 'ema_model') + ema_optimizer_restore(unchained_optimizer) + return state_dict + for sub_model in unwrapped_model: + sub_model_idx = unwrapped_model.index(sub_model) + mpu.set_virtual_pipeline_model_parallel_rank(sub_model_idx) + state_dict['ema_model%d' % sub_model_idx] = ( + sub_model.sharded_state_dict() + if use_dist_ckpt else + sub_model.state_dict_for_save_checkpoint()) + state_dict = ema_state_dict_to_cpu(state_dict, 'ema_model%d' % sub_model_idx) + ema_optimizer_restore(unchained_optimizer) + return state_dict + + +def unchain_optimizer(chained_optimizer): + if isinstance(chained_optimizer, ChainedOptimizer): + return chained_optimizer.chained_optimizers + return [chained_optimizer] + + +def ema_optimizer_applier(unchained_optimizer): + for optim in unchained_optimizer: + optim.optimizer.store(optim.optimizer.param_groups) + optim.optimizer.copy_to() + param_sync(optim) + + +def ema_optimizer_restore(unchained_optimizer): + for optim in unchained_optimizer: + optim.optimizer.restore(optim.optimizer.param_groups) + param_sync(optim) + torch.distributed.barrier() + for optim in unchained_optimizer: + optim.update_successful = False + + +def param_sync(optim): + if hasattr(optim, "_copy_main_params_to_model_params"): + optim._copy_main_params_to_model_params() + if hasattr(optim, "_reset_metadata_and_sync_gather_all_model_params"): + optim.update_successful = True + optim._reset_metadata_and_sync_gather_all_model_params(force_sync=True) + + +def ema_state_dict_to_cpu(state_dict, ema_key): + for k, v in state_dict[ema_key].items(): + if not torch.is_tensor(v): + continue + new_v = v.detach().cpu().clone() + state_dict[ema_key][k] = new_v + return state_dict diff --git a/model/train/yoco_moe/mindspeed/core/__init__.py b/model/train/yoco_moe/mindspeed/core/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/__init__.py b/model/train/yoco_moe/mindspeed/core/auto_parallel/__init__.py new file mode 100644 index 000000000..9745d7d51 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/auto_parallel/__init__.py @@ -0,0 +1,245 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import json +import operator +from functools import reduce +import datetime +import threading + +import torch +import numpy as np +import pandas as pd + +KV_STORE = None +ITERATION_LOOP_TIME = 5 +BAND_WIDTH_UNIDIRECTIONAL = 25 # GB/s +SEARCH_CACHE_PATH = None +MODULE_PATTERN = 'PP{}_TP{}_DP{}_CP{}_UP{}_MBS{}_MODULE.json' +OPERATOR_PATTERN = 'PP{}_TP{}_DP{}_CP{}_UP{}_MBS{}_OPERATOR' + + +# Operator dims after merging +ARD_NUM_DIMS = { + 'MatMul': 3, + 'BatchMatMul': 4, + 'Softmax': 4, + 'SoftmaxGrad': 4, + 'RmsNorm': 3, + 'RmsNormGrad': 3, + 'LayerNorm': 3, + 'LayerNormGrad': 3, + 'FlashAttentionScore': 3, + 'FlashAttentionScoreGrad': 3 +} + + +# profiling data filed +class KeyField: + OpType = 'Type' + InputShapes = 'Input Shapes' + OutputShapes = 'Output Shapes' + Duration = 'Duration(us)' + FwdTime = 'fwd_time' + BwdTime = 'bwd_time' + + +class GlobalMemoryBuffer: + buffers_length = [0, 0, 0] + buffers = [None, None, None] + + @staticmethod + def get_tensor(shape: list, index): + if index not in (0, 1, 2): + raise AssertionError('index must be 0, 1, 2') + data_type = torch.float16 + required_len = reduce(operator.mul, shape, 1) + if GlobalMemoryBuffer.buffers_length[index] < required_len: + GlobalMemoryBuffer.buffers[index] = torch.empty( + required_len, dtype=data_type, requires_grad=False, device=torch.cuda.current_device() + ) + GlobalMemoryBuffer.buffers_length[index] = required_len + return GlobalMemoryBuffer.buffers[index][0:required_len].view(*shape).uniform_() + + +class SingletonType(type): + single_lock = threading.RLock() + + def __call__(cls, *args, **kwargs): + with SingletonType.single_lock: + if not hasattr(cls, "_instance"): + cls._instance = super(SingletonType, cls).__call__(*args, **kwargs) + return cls._instance + + +class SampleCache: + def __init__(self): + self.MatMul = {} + self.RmsNorm = {} + self.RmsNormGrad = {} + self.BatchMatMul = {} + self.Add = {} + self.LayerNorm = {} + self.LayerNormGrad = {} + self.ScaledMaskedSoftmax = {} + self.ScaledMaskedSoftmaxGrad = {} + self.FastGeluGrad = {} + self.FastGelu = {} + self.Mul = {} + self.Softmax = {} + self.SoftmaxGrad = {} + self.FlashAttentionScore = {} + self.FlashAttentionScoreGrad = {} + + def clear_cache(self): + for attr in self.__dict__: + setattr(self, attr, {}) + + +class ModelManager: + def __init__(self, npu_type='910B'): + self.models = {} + self.npu_type = npu_type + + def cache_model(self, model, op): + self.models[op] = model + + def get_cached_model(self, model_name: str): + return self.models.get(model_name, None) + + def load_model(self, model, op, model_dir): + if not os.path.exists(model_dir): + raise FileNotFoundError(f"Can't find '{model_dir}'.") + path = os.path.join(model_dir, f"{op}_{self.npu_type}.pth") + weight = torch.load(path) + model.set_model_info(weight.popitem()[1]) + model.load_state_dict(weight) + # if use model to predict,need to set training=False,otherwise require inputs dims==model_train_inputs dims + # during fit,after clear model cache(self.train()),training's value will be reset True + model.training = False + self.models[op] = model + + def save_model(self, model, op, model_dir): + if not os.path.exists(model_dir): + os.makedirs(model_dir, exist_ok=False) + weight = model.state_dict() + weight['model_info'] = model.get_model_info() + torch.save(weight, f'{model_dir}/{op}_{self.npu_type}.pth') + + def save_models(self, model_dir): + for op, op_model in self.models.items(): + self.save_model(op_model, op, model_dir) + + +class OperateProfileCache(metaclass=SingletonType): + def __init__(self): + self.data_frame = pd.DataFrame( + columns=[KeyField.OpType, KeyField.InputShapes, KeyField.OutputShapes, KeyField.FwdTime, KeyField.BwdTime] + ) + + def record(self, op_type: str, input_shapes: list, output_shapes: list, fwd_time: float, bwd_time: float): + _, _, exist = self.find(op_type, input_shapes) + if not exist: + input_shapes_str = OperateProfileCache.shapes_to_str(input_shapes) + output_shape_str = OperateProfileCache.shapes_to_str(output_shapes) + self.data_frame.loc[len(self.data_frame.index)] = [ + op_type, input_shapes_str, output_shape_str, fwd_time, bwd_time + ] + + def find(self, op_type: str, input_shapes: list): + input_shapes_str = OperateProfileCache.shapes_to_str(input_shapes) + data = self.data_frame[ + (self.data_frame[KeyField.OpType] == op_type) & + (self.data_frame[KeyField.InputShapes] == input_shapes_str) + ] + fwd_time = data[KeyField.FwdTime].mean() + bwd_time = data[KeyField.BwdTime].mean() + from_cache = False if np.isnan(fwd_time) and np.isnan(bwd_time) else True + return fwd_time, bwd_time, from_cache + + @staticmethod + def shapes_to_str(shapes): + result = '' + index = 0 + for shape in shapes: + result += ','.join(map(lambda x: str(x), shape)) if isinstance(shape, list) else str(shape) + if index < len(shapes) - 1: + result += ';' if isinstance(shape, list) else ',' + index += 1 + result = '"' + result + result = result + '"' + return result + + +def get_cache_path(): + global SEARCH_CACHE_PATH + if SEARCH_CACHE_PATH is None: + SEARCH_CACHE_PATH = os.getcwd() + os.sep + 'autoparallel_temp_cache' + os.sep + try: + os.makedirs(SEARCH_CACHE_PATH, exist_ok=True) + print(f"Create cache: {SEARCH_CACHE_PATH}") + except Exception: + print(f'Create cache directory failed') + SEARCH_CACHE_PATH = os.getcwd() + return SEARCH_CACHE_PATH + + +def analyse_module_profile(profile_file, key): + if key not in ('step_time', 'transformer_act_mem'): + raise AssertionError('key[{}] error'.format(key)) + + if not os.path.exists(path=profile_file): + return float('inf') + + with open(profile_file, 'r') as file: + try: + content = file.read() + content = json.loads(content) + return float(content.get(key)) + except Exception: + return float('inf') + + +def set_kv_store(args): + global KV_STORE + if args.node_rank == 0: + KV_STORE = torch.distributed.TCPStore( + host_name=args.master_addr, + port=int(args.master_port) + 2, + world_size=args.nnodes, + is_master=True, + timeout=datetime.timedelta(seconds=30) + ) + else: + KV_STORE = torch.distributed.TCPStore( + host_name=args.master_addr, + port=int(args.master_port) + 2, + world_size=args.nnodes, + is_master=False + ) + + +def get_kv_store(): + global KV_STORE + if KV_STORE is None: + raise AssertionError('KV_STORE must be initialized') + return KV_STORE + + +# init SingletonType class +model_manager = ModelManager() +sample_cache = SampleCache() +operator_cache = OperateProfileCache() diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_apply.py b/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_apply.py new file mode 100644 index 000000000..7f777a077 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_apply.py @@ -0,0 +1,156 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import time +import math + +import torch +from megatron.training.global_vars import get_args + +from mindspeed.core.auto_parallel import set_kv_store +from mindspeed.core.auto_parallel.auto_parallel_optimizer import SearchByGreyBox +from mindspeed.core.auto_parallel.auto_parallel_memory import MemoryCostModel +from mindspeed.core.auto_parallel.auto_parallel_profiling import ( + DistributedMemoryProfiler, + DistributedOperateProfiler, + DistributedPerformanceProfiler +) + + +def filter_unvalid_configs(search_spaces): + memory_model = MemoryCostModel() + fitting_configs = memory_model.get_fitting_configurations(search_spaces) + for config in fitting_configs: + mem = DistributedMemoryProfiler().launch(config) + if not math.isinf(mem): + memory_model.profiled_configs.append(config) + memory_model.profiled_configs_memory.append(mem) + + print(f"profiled_configs: {memory_model.profiled_configs}") + print(f"profiled_configs_mem: {memory_model.profiled_configs_memory}") + + memory_model.fit_model() + valid_configs, valid_configs_memory = [], [] + for config in search_spaces: + cost_memory = memory_model.get_peak_memory(config) + if not memory_model.is_oom(cost_memory): + valid_configs.append(config) + valid_configs_memory.append(cost_memory) + return valid_configs + + +def build_initial_spaces(args): + world_size = args.nproc_per_node * args.nnodes + device_count = args.nproc_per_node + + solutions = [] + for pp in range(1, world_size + 1): + if world_size % pp != 0 or args.num_layers % pp != 0: + continue + + for i in range(device_count): + tp = 2 ** i + if tp > device_count or tp > (world_size // pp): + break + if (args.num_query_groups > 1 and args.num_query_groups % tp != 0) \ + or (args.num_attention_heads % tp != 0): + break + + max_cp_size = world_size // (pp * tp) + for cp_size in range(1, max_cp_size + 1): + if world_size % (pp * tp * cp_size) != 0 or \ + args.global_batch_size % (world_size // (pp * tp * cp_size)) != 0: + continue + + for up in range(1, cp_size + 1): + if cp_size % up != 0: + continue + cp = cp_size // up + head, remainder = divmod(args.num_attention_heads, up * tp) + if (head < 1 or remainder != 0) or (args.seq_length % (2 * cp) != 0): + continue + + dp = world_size // (pp * tp * cp_size) + dp_group_batch_size = args.global_batch_size // dp + for num_mb in range(1, dp_group_batch_size + 1): + if dp_group_batch_size % num_mb != 0: + continue + mbs = dp_group_batch_size // num_mb + solutions.append([pp, tp, dp, cp, up, mbs]) + return solutions + + +def monitor_train_task(): + while True: + message = torch.tensor([0 for _ in range(7)], dtype=torch.int) + torch.distributed.broadcast(message, 0) + task_type = message[-1].item() + config = [m.item() for m in message[:-1]] + if task_type == -1: + break + elif task_type == 0: + DistributedMemoryProfiler().launch(config) + elif task_type == 1: + DistributedOperateProfiler().launch(config) + elif task_type == 2: + DistributedPerformanceProfiler().launch(config) + + +def export_results(config): + results = {} + results['optimal_parallel_strategy'] = {} + results['optimal_parallel_strategy']['pipeline-model-parallel-size'] = config[0] + results['optimal_parallel_strategy']['tensor-model-parallel-size'] = config[1] + results['optimal_parallel_strategy']['data-parallel-size'] = config[2] + results['optimal_parallel_strategy']['micro-batch-size'] = config[-1] + if config[3] > 1 and config[4] > 1: + results['optimal_parallel_strategy']['context-parallel-algo'] = 'hybrid_cp_algo' + results['optimal_parallel_strategy']['context-parallel-size'] = config[3] * config[4] + results['optimal_parallel_strategy']['ulysses-degree-in-cp'] = config[4] + elif config[3] > 1 and config[4] == 1: + results['optimal_parallel_strategy']['context-parallel-algo'] = 'megatron_cp_algo' + results['optimal_parallel_strategy']['context-parallel-size'] = config[3] + elif config[3] == 1 and config[4] > 1: + results['optimal_parallel_strategy']['context-parallel-algo'] = 'ulysses_cp_algo' + results['optimal_parallel_strategy']['context-parallel-size'] = config[4] + return json.dumps(results) + + +def search_optimal_configuration(args): + set_kv_store(args) + + init_method = 'tcp://{}:{}'.format(args.master_addr, int(args.master_port) + 1) + torch.distributed.init_process_group( + backend=torch.distributed.Backend.GLOO, + init_method=init_method, + rank=args.node_rank, + world_size=args.nnodes + ) + + if args.node_rank == 0: + start_time = time.time() + search_space = build_initial_spaces(args) + search_space = filter_unvalid_configs(search_space) + print(f"filter search_space: {len(search_space)}") + print("\n".join(str(item) for item in search_space), flush=True) + + config, _ = SearchByGreyBox().search(get_args(), search_space) + torch.distributed.broadcast(torch.tensor([-1 for _ in range(7)], dtype=torch.int), 0) + + results = export_results(config) + print(f"find optimal configuration: {results}, cost_time: {time.time() - start_time}") + else: + monitor_train_task() diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_memory.py b/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_memory.py new file mode 100644 index 000000000..f7a7edd31 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_memory.py @@ -0,0 +1,168 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from itertools import product + +import numpy as np +import torch +from megatron.training.global_vars import get_args + +from mindspeed.core.auto_parallel import SingletonType + + +class MemoryCostModel(metaclass=SingletonType): + def __init__(self): + args = get_args() + self.num_layers = args.num_layers + self.num_attn_heads = args.num_attention_heads + self.hidden_size = args.hidden_size + self.seq_length = args.seq_length + self.ffn_hidden_size = args.ffn_hidden_size + if not self.ffn_hidden_size: + self.ffn_hidden_size = 4 * self.hidden_size + + self.model = None + self.profiled_configs = [] + self.profiled_configs_memory = [] + self.max_available_memory = None + + @staticmethod + def cal_coeff(config): + _, tp, _, cp, up, b = config + coeff = [ + 1, + b * (1 / tp) * (1 / cp) * (1 / up), + b * (1 / tp) * (1 / cp) * (1 / cp) * (1 / up), + b * (1 / cp) * (1 / up) + ] + return np.array(coeff) + + @staticmethod + def cal_coeff_matrix(configs): + coeff_matrix = [] + for config in configs: + _, tp, _, cp, up, b = config + coeff_matrix.append([ + 1, + b * (1 / tp) * (1 / cp) * (1 / up), + b * (1 / tp) * (1 / cp) * (1 / cp) * (1 / up), + b * (1 / cp) * (1 / up) + ]) + return np.array(coeff_matrix) + + def is_oom(self, cost_memory): + if self.max_available_memory is None: + properties = torch.npu.get_device_properties(0) + self.max_available_memory = properties.total_memory / (1024 ** 3) + # Եڴ1.2ΪOOMֵ + return cost_memory > (self.max_available_memory * 1.2) + + def get_fitting_configurations(self, search_spaces): + search_spaces_matrix = np.array(search_spaces) + temp_search_spaces = [config for config in search_spaces if config[-1] < 8] + + tp_group = [] + max_tp = search_spaces_matrix[:, 1].max() + for config in temp_search_spaces: + _, tp, _, cp, up, _ = config + if cp == 1 and up == 1 and tp == max_tp: + tp_group.append(config) + + cp_group = [] + min_cp = search_spaces_matrix[:, 3].min() + for config in temp_search_spaces: + pp, tp, _, cp, up, _ = config + if tp > 1 or up > 1: + continue + if pp > 1 and cp > min_cp: + cp_group.append(config) + + up_group = [] + min_up = search_spaces_matrix[:, 4].min() + for config in temp_search_spaces: + pp, tp, _, cp, up, _ = config + if tp > 1 or cp > 1: + continue + if pp > 1 and up > min_up: + up_group.append(config) + + cp_up_group = [] + for config in temp_search_spaces: + _, tp, _, cp, up, _ = config + if tp == 1 and cp > 1 and up > 1: + cp_up_group.append(config) + + tp_cp_up_group = [] + for config in temp_search_spaces: + _, tp, _, cp, up, _ = config + if tp > 1 and cp > 1 and up > 1: + tp_cp_up_group.append(config) + + product_iter = product(*[tp_group, cp_group, up_group, cp_up_group, tp_cp_up_group]) + fitting_group, cur_condition_number = None, float('inf') + + for group in product_iter: + # С100ľֵȶЧ + if cur_condition_number < 100: + break + + empty_set = set([row[-1] for row in group]) + if len(empty_set) < 2: + continue + + coeff_matrix = MemoryCostModel.cal_coeff_matrix(group) + coeff_matrix = coeff_matrix.transpose() @ coeff_matrix + if np.linalg.matrix_rank(coeff_matrix) == coeff_matrix.shape[0]: + con_num = np.linalg.cond(coeff_matrix) + if con_num < cur_condition_number: + fitting_group = group + cur_condition_number = con_num + + print(f"fitting_group: {fitting_group} condition_number: {cur_condition_number}", flush=True) + return fitting_group + + + def fit_model(self): + coeff_matrix = MemoryCostModel.cal_coeff_matrix(self.profiled_configs) + profiled_configs_memory = np.array(self.profiled_configs_memory) + self.model = np.linalg.inv(coeff_matrix.transpose() @ coeff_matrix) \ + @ coeff_matrix.transpose() \ + @ profiled_configs_memory + + def predict(self, config): + config_matrix = MemoryCostModel.cal_coeff(config) + pred_memory = config_matrix @ self.model + return pred_memory + + def get_peak_memory(self, config): + args = get_args() + pp, tp, _ = config[0], config[1], config[-1] + hidden_size = self.hidden_size + ffn_hidden_size = self.ffn_hidden_size + if args.swiglu: + ffn_hidden_size *= 2 + transformer_params_count = (4 * hidden_size * hidden_size + 2 * hidden_size * ffn_hidden_size) / tp + total_params_count = transformer_params_count * (self.num_layers // pp) + + mem_para = 2 * total_params_count + mem_grad = 2 * total_params_count + mem_optimizer = 12 * total_params_count if args.reuse_fp32_param else 16 * total_params_count + mem_activation_layer = abs(self.predict(config)) * (1024 ** 3) + mem_activation_batch = mem_activation_layer * (self.num_layers // pp) + mem_activation = mem_activation_batch * pp + m1 = mem_para + mem_optimizer + mem_activation + m2 = mem_para + mem_optimizer + mem_activation + mem_grad - mem_activation_batch + peak_memory = max(m1, m2) + return peak_memory / (1024 ** 3) + 4 \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_model.py b/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_model.py new file mode 100644 index 000000000..a13c8ab02 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_model.py @@ -0,0 +1,462 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time +import math +from functools import reduce + +import numpy as np +import torch +import torch_npu +from megatron.training.global_vars import get_args + +from mindspeed.core.auto_parallel import ( + ITERATION_LOOP_TIME, + BAND_WIDTH_UNIDIRECTIONAL, + operator_cache, + GlobalMemoryBuffer +) +from mindspeed.core.auto_parallel.auto_parallel_rectify import Sampler +from mindspeed.core.auto_parallel.auto_parallel_profiling import CommProfiling +from mindspeed.model.transformer import ( + get_attention_mask, + generate_attention_mask +) + + +class Linear(torch.nn.Module): + def __init__(self): + super(Linear, self).__init__() + + def forward(self, inputs): + x, y = inputs + return torch.matmul(x, y.t()) + + +class LayerNorm(torch.nn.Module): + def __init__(self, hidden_size, eps=1e-5): + super(LayerNorm, self).__init__() + self.layer_norm = torch.nn.LayerNorm(normalized_shape=hidden_size, eps=eps) + + def forward(self, x): + return self.layer_norm(*x) + + +class FusedRmsNorm(torch.nn.Module): + def __init__(self, hidden_size, eps=1e-6) -> None: + super().__init__() + self.weight = torch.nn.Parameter(torch.ones(hidden_size, dtype=torch.float16)).npu() + self.eps = eps + + def forward(self, x): + return torch_npu.npu_rms_norm(x[0], self.weight, epsilon=self.eps)[0] + + +class BatchMatMul(torch.nn.Module): + def __init__(self): + super(BatchMatMul, self).__init__() + + def forward(self, inputs): + x, y = inputs + return torch.bmm(x, y) + + +class FlashAttention(torch.nn.Module): + def __init__(self, head_dim): + super().__init__() + self.head_dim = head_dim + self.scale = 1.0 / math.sqrt(self.head_dim) + self.pre_tockens = 65536 + self.next_tockens = 0 + + generate_attention_mask() + self.attention_mask = get_attention_mask() + + def forward(self, x): + q, k, v = x + seq_length, _, hd = q.shape[0], q.shape[1], q.shape[2] + head_num = hd // self.head_dim + output = torch_npu.npu_fusion_attention( + q, k, v, head_num, 'SBH', + pse=None, + padding_mask=None, + atten_mask=self.attention_mask, + scale=self.scale, + pre_tockens=self.pre_tockens, + next_tockens=self.next_tockens, + keep_prob=1.0, + inner_precise=0, + sparse_mode=get_args().sparse_mode + )[0] + return output + + +class TransformerBlock: + def __init__(self): + self.number_sample = 100 + self.noise_model = OperatorNoiseSampler(self.number_sample) + + def norm(self): + args = get_args() + tp = args.tensor_model_parallel_size + cp = args.context_parallel_size // args.ulysses_degree_in_cp + up = args.ulysses_degree_in_cp + input_shape = [args.seq_length // cp // tp // up, args.micro_batch_size, args.hidden_size] + if args.normalization == 'RMSNorm': + ftime, btime = self.noise_model.fused_rms_norm(input_shape, input_shape, args.hidden_size) + else: + ftime, btime = self.noise_model.layernorm(input_shape, input_shape, args.hidden_size) + return ftime, btime + + def self_attention_with_fa(self): + args = get_args() + tp = args.tensor_model_parallel_size + cp = args.context_parallel_size // args.ulysses_degree_in_cp + up = args.ulysses_degree_in_cp + ftime, btime = self.noise_model.flash_attention( + [args.seq_length // cp, args.micro_batch_size, args.hidden_size // tp // up], + [args.seq_length // cp, args.micro_batch_size, args.hidden_size // tp // up], + [args.seq_length // cp, args.micro_batch_size, args.hidden_size // tp // up], + [args.seq_length // cp, args.micro_batch_size, args.hidden_size // tp // up], + args.hidden_size // args.num_attention_heads, + ) + return ftime, btime + + def get_block_time(self): + args = get_args() + s = args.seq_length + a = args.num_attention_heads + h = args.hidden_size + ffn = args.ffn_hidden_size if args.ffn_hidden_size is not None else 4 * args.hidden_size + d = args.hidden_size // args.num_attention_heads + b = args.micro_batch_size + tp = args.tensor_model_parallel_size + cp = args.context_parallel_size // args.ulysses_degree_in_cp + up = args.ulysses_degree_in_cp + + fwd_time = np.array([0 for _ in range(self.number_sample)]).astype(np.float64) + bwd_time = np.array([0 for _ in range(self.number_sample)]).astype(np.float64) + + ftime, btime = self.norm() + fwd_time += ftime + bwd_time += btime + + all_gather_time = CommProfiling.get_comm_time([s // cp // up // tp, b, h], tp, 'all_gather') + reduce_scatter_time = CommProfiling.get_comm_time([s // cp // up, b, h], tp, 'reduce_scatter') + fwd_time += all_gather_time + bwd_time += reduce_scatter_time + + ftime, btime = self.noise_model.matmul( + [s // cp // up * b, h], + [3 * h // tp, h], + [s // cp // up * b, 3 * h // tp] + ) + fwd_time += ftime + bwd_time += btime + + if not args.use_flash_attn: + raise AssertionError('the auto-parallel only support FA') + else: + alltoall_time = CommProfiling.get_comm_time([s // cp // up, b, a // tp, d], up, 'alltoall') + fwd_time += (3 * alltoall_time) + bwd_time += (3 * alltoall_time) + + send_recv_time = CommProfiling.get_send_recv_time([2, 2, s // cp // 2, b, a // tp // up * d]) + ftime, btime = self.self_attention_with_fa() + for _ in range(cp - 1): + fwd_time += max([ftime.max(), send_recv_time]) + bwd_time += max([btime.max(), send_recv_time]) + fwd_time += ftime + bwd_time += btime + + alltoall_time = CommProfiling.get_comm_time([s // cp, b, a // tp // up, d], up, 'alltoall') + fwd_time += alltoall_time + bwd_time += alltoall_time + + ftime, btime = self.noise_model.matmul([s // cp // up * b, h // tp], [h, h // tp], [s // cp // up * b, h]) + fwd_time += ftime + bwd_time += btime + + reduce_scatter_time = CommProfiling.get_comm_time([s // cp // up, b, h], tp, 'reduce_scatter') + all_gather_time = CommProfiling.get_comm_time([s // cp // up // tp, b, h], tp, 'all_gather') + fwd_time += reduce_scatter_time + bwd_time += all_gather_time + + ftime, btime = self.norm() + fwd_time += ftime + bwd_time += btime + + all_gather_time = CommProfiling.get_comm_time([s // cp // up // tp, b, h], tp, 'all_gather') + reduce_scatter_time = CommProfiling.get_comm_time([s // cp // up, b, h], tp, 'reduce_scatter') + fwd_time += all_gather_time + bwd_time += reduce_scatter_time + + ftime, btime = self.noise_model.matmul([s // cp // up * b, h], [ffn // tp, h], [s // cp // up * b, ffn // tp]) + fwd_time += ftime + bwd_time += btime + + # 4h->h + ftime, btime = self.noise_model.matmul([s // cp // up * b, ffn // tp], [h, ffn // tp], [s // cp // up * b, h]) + fwd_time += ftime + bwd_time += btime + + reduce_scatter_time = CommProfiling.get_comm_time([s // cp // up, b, h], tp, 'reduce_scatter') + all_gather_time = CommProfiling.get_comm_time([s // cp // up // tp, b, h], tp, 'all_gather') + fwd_time += reduce_scatter_time + bwd_time += all_gather_time + + return fwd_time, bwd_time + + +class OperatorNoiseSampler: + def __init__(self, num_sample=100): + self.sampling = Sampler(num_sample=num_sample) + + @staticmethod + def measure_matmul_time(left_shape, left_transpose, right_shape, right_transpose): + left_matrix = GlobalMemoryBuffer.get_tensor(left_shape, 0) + left_matrix = left_matrix if not left_transpose else left_matrix.t() + right_matrix = GlobalMemoryBuffer.get_tensor(right_shape, 1) + right_matrix = right_matrix if not right_transpose else right_matrix.t() + + for _ in range(ITERATION_LOOP_TIME): + torch.matmul(left_matrix, right_matrix) + + torch.npu.synchronize() + start_time = time.time() + for _ in range(ITERATION_LOOP_TIME): + torch.matmul(left_matrix, right_matrix) + torch.npu.synchronize() + return (time.time() - start_time) * 1e6 / ITERATION_LOOP_TIME + + @staticmethod + def measure_batchmatmul_time(left_shape, left_transpose, right_shape, right_transpose): + left_matrix = GlobalMemoryBuffer.get_tensor(left_shape, 0) + left_matrix = left_matrix if not left_transpose else left_matrix.permute(0, 2, 1) + right_matrix = GlobalMemoryBuffer.get_tensor(right_shape, 0) + right_matrix = right_matrix if not right_transpose else right_matrix.permute(0, 2, 1) + + for _ in range(ITERATION_LOOP_TIME): + torch.bmm(left_matrix, right_matrix) + + torch.npu.synchronize() + start_time = time.time() + for _ in range(ITERATION_LOOP_TIME): + torch.bmm(left_matrix, right_matrix) + torch.npu.synchronize() + return (time.time() - start_time) * 1e6 / ITERATION_LOOP_TIME + + def matmul(self, input_shape1, input_shape2, output_shape): + ftime, _, from_cache = operator_cache.find('MatMul', [input_shape1, input_shape2]) + if not from_cache: + ftime = self.measure_matmul_time(input_shape1, False, input_shape2, True) + ftime_uncertainty = self.sampling.run('MatMul', ftime, output_shape, input_shape1, input_shape2) + operator_cache.record('MatMul', [input_shape1, input_shape2], output_shape, ftime, 0) + + btime1, _, from_cache = operator_cache.find('MatMul', [output_shape, input_shape2]) + if not from_cache: + btime1 = self.measure_matmul_time(output_shape, False, input_shape2, False) + btime1_uncertainty = self.sampling.run('MatMul', btime1, input_shape1, output_shape, input_shape2) + operator_cache.record('MatMul', [output_shape, input_shape2], input_shape1, btime1, 0) + + btime2, _, from_cache = operator_cache.find('MatMul', [output_shape, input_shape1]) + if not from_cache: + btime2 = self.measure_matmul_time(output_shape, True, input_shape1, False) + btime2_uncertainty = self.sampling.run('MatMul', btime2, input_shape2, output_shape, input_shape1) + operator_cache.record('MatMul', [output_shape, input_shape1], input_shape2, btime2, 0) + return ftime_uncertainty, btime1_uncertainty + btime2_uncertainty + + def batch_matmul(self, input_shape1, input_shape2, output_shape): + ftime, _, from_cache = operator_cache.find('BatchMatMul', [input_shape1, input_shape2]) + if not from_cache: + ftime = self.measure_batchmatmul_time(input_shape1, False, input_shape2, False) + ftime_uncertainty = self.sampling.run('BatchMatMul', ftime, output_shape, input_shape1, input_shape2) + operator_cache.record('BatchMatMul', [input_shape1, input_shape2], output_shape, ftime, 0) + + btime1, _, from_cache = operator_cache.find('BatchMatMul', [input_shape1, output_shape]) + if not from_cache: + btime1 = self.measure_batchmatmul_time(input_shape1, True, output_shape, False) + btime1_uncertainty = self.sampling.run('BatchMatMul', btime1, input_shape2, input_shape1, output_shape) + operator_cache.record('BatchMatMul', [input_shape1, output_shape], input_shape2, btime1, 0) + + btime2, _, from_cache = operator_cache.find('BatchMatMul', [output_shape, input_shape2]) + if not from_cache: + btime2 = self.measure_batchmatmul_time(output_shape, False, input_shape2, True) + btime2_uncertainty = self.sampling.run('BatchMatMul', btime2, input_shape1, output_shape, input_shape2) + operator_cache.record('BatchMatMul', [output_shape, input_shape2], input_shape1, btime2, 0) + return ftime_uncertainty, btime1_uncertainty + btime2_uncertainty + + def layernorm(self, input_shape, output_shape, hidden_size, eps=1e-5): + layernorm = LayerNorm(hidden_size, eps) + ftime, btime, from_cache = operator_cache.find('LayerNorm', input_shape) + if not from_cache: + ftime, btime = TimeCostModel.profile(layernorm, [input_shape]) + ftime_uncertainty = self.sampling.run('LayerNorm', ftime, output_shape, input_shape) + btime_uncertainty = self.sampling.run('LayerNormGrad', btime, input_shape, output_shape) + operator_cache.record('LayerNorm', input_shape, output_shape, ftime, btime) + return ftime_uncertainty, btime_uncertainty + + def fused_rms_norm(self, input_shape, output_shape, hidden_size, eps=1e-6): + fused_rms_norm = FusedRmsNorm(hidden_size, eps) + ftime, btime, from_cache = operator_cache.find('RmsNorm', input_shape) + if not from_cache: + ftime, btime = TimeCostModel.profile(fused_rms_norm, [input_shape]) + ftime_uncertainty = self.sampling.run('RmsNorm', ftime, output_shape, input_shape) + btime_uncertainty = self.sampling.run('RmsNormGrad', btime, output_shape, input_shape) + operator_cache.record('RmsNorm', input_shape, output_shape, ftime, btime) + return ftime_uncertainty, btime_uncertainty + + def flash_attention(self, q, k, v, output_shape, head_dim): + flash_attn = FlashAttention(head_dim) + ftime, btime, from_cache = operator_cache.find('FlashAttentionScore', [q, k, v]) + if not from_cache: + ftime, btime = TimeCostModel.profile(flash_attn, [q, k, v]) + ftime_uncertainty = self.sampling.run('FlashAttentionScore', ftime, output_shape, q, k, v) + btime_uncertainty = self.sampling.run('FlashAttentionScoreGrad', btime, output_shape, q, k, v) + operator_cache.record('FlashAttentionScore', [q, k, v], q, ftime, btime) + return ftime_uncertainty, btime_uncertainty + + +class TimeCostModel(object): + def __init__(self): + args = get_args() + self.seq_length = args.seq_length + self.hidden_size = args.hidden_size + self.pp_size = args.pipeline_model_parallel_size + self.dp_size = args.data_parallel_size + self.micro_batch_size = args.micro_batch_size + self.num_layers_per_stage = args.num_layers // args.pipeline_model_parallel_size + self.num_micro_batch = args.global_batch_size // args.micro_batch_size // args.data_parallel_size + + def get_iteration_time(self): + transformer_block = TransformerBlock() + fwd_time, bwd_time = transformer_block.get_block_time() + fwd_time *= self.num_layers_per_stage + bwd_time *= self.num_layers_per_stage + iteration_times = np.array([0 for _ in range(fwd_time.shape[0])]).astype(np.float64) + for i in range(fwd_time.shape[0]): + iteration_times[i] = self.pipeline_costmodel(fwd_time[i], bwd_time[i]) + return iteration_times + + def pipeline_costmodel(self, fwd_time, bwd_time): + if self.pp_size == 1: + return (fwd_time + bwd_time) * self.num_micro_batch + + send_recv_time = CommProfiling.get_send_recv_time( + [self.seq_length, self.micro_batch_size, self.hidden_size] + ) + # p and m start with 1 + SF = np.zeros((self.pp_size + 1, self.num_micro_batch + 1), np.float64) + SB = np.zeros((self.pp_size + 1, self.num_micro_batch + 1), np.float64) + EF = np.zeros((self.pp_size + 1, self.num_micro_batch + 1), np.float64) + EB = np.zeros((self.pp_size + 1, self.num_micro_batch + 1), np.float64) + + warmup = [self.pp_size - p - 1 for p in range(self.pp_size)] + remaining = [self.num_micro_batch - warmup[p] for p in range(self.pp_size)] + + # warmup + for p in range(1, self.pp_size + 1): + for m in range(1, warmup[p - 1] + 1): + if p == 1: + SF[p][m] = (m - 1) * fwd_time + EF[p][m] = m * fwd_time + else: + SF[p][m] = max(EF[p][m - 1], EF[p - 1][m] + send_recv_time) + EF[p][m] = SF[p][m] + fwd_time + + # 1f1b + for num_1f1b in range(1, self.num_micro_batch + 1): + # forward of 1f1b + for p in range(1, self.pp_size + 1): + if num_1f1b > remaining[p - 1]: + # cool down phase + continue + m = warmup[p - 1] + num_1f1b + if p == 1: + SF[p][m] = EB[p][m + p - self.pp_size - 1] + EF[p][m] = SF[p][m] + fwd_time + else: + SF[p][m] = max(EB[p][m + p - self.pp_size - 1], EF[p - 1][m] + send_recv_time) + EF[p][m] = SF[p][m] + fwd_time + + # backward of 1f1b + for p in range(self.pp_size, 0, -1): + m = num_1f1b + if num_1f1b > remaining[p - 1]: + # cool down phase + continue + if p == self.pp_size: + SB[p][m] = EF[p][m] + else: + SB[p][m] = max(EF[p][m + self.pp_size - p], EB[p + 1][m] + send_recv_time) + EB[p][m] = SB[p][m] + bwd_time + + # cool down phase + for p in range(self.pp_size, 0, -1): + m = num_1f1b + if num_1f1b <= remaining[p - 1]: + continue + SB[p][m] = max(EB[p][m - 1], EB[p + 1][m] + send_recv_time) + EB[p][m] = SB[p][m] + bwd_time + + e2e_time = max([max(EB[p]) for p in range(self.pp_size)]) + # allreduce_gradients + e2e_time += 0.0 + return e2e_time + + @staticmethod + def profile(model, shapes): + model.to(torch.cuda.current_device()) + + input_tensors = [] + index = 0 + for shape in shapes: + tensor = GlobalMemoryBuffer.get_tensor(shape, index).requires_grad_() + input_tensors.append(tensor) + index += 1 + + sum_z = None + for _ in range(3): + sum_z = model(input_tensors) + + # forward_time + torch.npu.synchronize() + start_time = time.time() + for _ in range(ITERATION_LOOP_TIME): + model(input_tensors) + torch.npu.synchronize() + fwd_time = (time.time() - start_time) * 1e6 / ITERATION_LOOP_TIME + + for _ in range(3): + z = model(input_tensors) + loss = torch.sum(z) + loss.backward() + + torch.npu.synchronize() + start_time = time.time() + for _ in range(ITERATION_LOOP_TIME): + torch.sum(sum_z) + torch.npu.synchronize() + loss_time = (time.time() - start_time) * 1e6 / ITERATION_LOOP_TIME + + torch.npu.synchronize() + start_time = time.time() + for i in range(ITERATION_LOOP_TIME): + z = model(input_tensors) + loss = torch.sum(z) + loss.backward() + torch.npu.synchronize() + bwd_time = (time.time() - start_time) * 1e6 / ITERATION_LOOP_TIME - fwd_time - loss_time + return fwd_time, bwd_time diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_optimizer.py b/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_optimizer.py new file mode 100644 index 000000000..8eab1cf86 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_optimizer.py @@ -0,0 +1,151 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +import math +import random +import ast +from pathlib import Path + +import pandas as pd +import gpytorch +from megatron.training.global_vars import get_args + +from mindspeed.core.auto_parallel import ( + model_manager, + sample_cache, + operator_cache, +) +from mindspeed.core.auto_parallel.auto_parallel_rectify import ExactGPModel +from mindspeed.core.auto_parallel.auto_parallel_model import TimeCostModel +from mindspeed.core.auto_parallel.auto_parallel_profiling import ( + BaseLaunch, + DistributedOperateProfiler, + DistributedPerformanceProfiler +) + + +class SearchByGreyBox: + def __init__(self, stop_threshold=0.05): + self.operators = [ + 'MatMul', + 'RmsNorm', + 'RmsNormGrad', + 'LayerNorm', + 'LayerNormGrad', + 'FlashAttentionScore', + 'FlashAttentionScoreGrad' + ] + + args = get_args() + if args.normalization == 'RMSNorm': + self.operators.remove('LayerNorm') + self.operators.remove('LayerNormGrad') + else: + self.operators.remove('RmsNorm') + self.operators.remove('RmsNormGrad') + + self.stop_threshold = stop_threshold + self.config_performances = {} + self.exist_config = [] + self.e2e_log = pd.DataFrame() + + @staticmethod + def find_csv(operator_profile, key='kernel_details'): + csv_files = [] + for cf in list(Path(operator_profile).rglob('*.csv')): + if key in str(cf): + csv_files.append(os.path.abspath(str(cf))) + if len(csv_files) <= 0: + print(f"not find kernel_details.csv") + return None + return sorted(csv_files)[0] + + @staticmethod + def theory_modeling(config): + base_launch = BaseLaunch() + base_launch.update_args(config) + cost_time = TimeCostModel().get_iteration_time() + base_launch.recover_args() + return cost_time + + def save(self, config, cost_time): + self.e2e_log[str(config)] = cost_time + + def generate_config(self): + best_config = self.e2e_log.apply(lambda col: col.idxmin(), axis=1).values + rest_config = [i for i in best_config if str(i) not in self.exist_config] + prop = len(rest_config) / len(best_config) + if prop > self.stop_threshold: + sample = random.choice(rest_config) + self.exist_config.append(sample) + return ast.literal_eval(sample) + print(f'Unexplored proportion: {prop} < stop_thd :{self.stop_threshold}, early stop triggered.') + return None + + def train(self, train_profiling_file, train_operator_data): + for operator in self.operators: + model = model_manager.get_cached_model(operator) + if model is None: + likelihood = gpytorch.likelihoods.GaussianLikelihood( + gpytorch.priors.NormalPrior(1e-3, 0.02) + ) + model = ExactGPModel(operator=operator, likelihood=likelihood) + model_manager.cache_model(model, operator) + model.fit(train_profiling_file, train_operator_data) + + def load_base_model(self, model_dir): + for operator in self.operators: + likelihood = gpytorch.likelihoods.GaussianLikelihood(gpytorch.priors.NormalPrior(1e-3, 0.02)) + model = ExactGPModel(operator=operator, likelihood=likelihood) + try: + model_manager.load_model(model, operator, model_dir) + except Exception: + print(f"{operator} load error") + + def search(self, args, search_spaces): + start_time = time.time() + self.load_base_model(os.path.dirname(os.path.abspath(__file__)) + os.sep + 'noise_predict_ckpt') + while ((time.time() - start_time) / 3600) < 8 \ + and len(self.config_performances) < len(search_spaces): + for config in search_spaces: + cost_time = SearchByGreyBox.theory_modeling(config) + self.save(config, cost_time) + print(f"complete model config: {config}", flush=True) + + next_config = self.generate_config() + if next_config is None: + break + print(f"next_config={next_config}", flush=True) + + operator_profile_path, analyse_thread = DistributedOperateProfiler().launch(next_config) + duration_time = DistributedPerformanceProfiler().launch(next_config) + self.config_performances[duration_time] = str(next_config) + if math.isinf(duration_time): + search_spaces.remove(next_config) + if analyse_thread is not None: + analyse_thread.join() + + operator_data = operator_cache.data_frame + operator_profile = SearchByGreyBox.find_csv(operator_profile_path) + if operator_profile is not None: + print(f"operator_data: {operator_data}\noperator_profile: {operator_profile}") + self.train(operator_profile, operator_data) + sample_cache.clear_cache() + + model_manager.save_models('final_model') + min_key = min(self.config_performances.keys()) + return ast.literal_eval(self.config_performances.get(min_key)), min_key diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_profiling.py b/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_profiling.py new file mode 100644 index 000000000..3ffcd64af --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_profiling.py @@ -0,0 +1,399 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import stat +import sys +import time +import json +import copy +import re +import operator +import functools +import subprocess +import signal +import threading + +import pandas as pd +import torch +import torch_npu +from torch_npu.profiler.profiler import analyse +from megatron.training.global_vars import set_args, get_args + +from mindspeed.core.auto_parallel import ( + SingletonType, + get_cache_path, + get_kv_store, + analyse_module_profile, + MODULE_PATTERN, + OPERATOR_PATTERN, + BAND_WIDTH_UNIDIRECTIONAL +) + + +class BaseLaunch: + def __init__(self): + self.old_args = None + + def launch(self, config): + def update_or_append_param(argv: list, key, value=None): + if not value: + argv.append(key) + return + + if key in argv: + argv[argv.index(key) + 1] = value + else: + argv.extend([key, value]) + + def remove_param(argv: list, key, has_value=False): + if key in argv: + pos = argv.index(key) + argv.pop(pos) + if has_value: + argv.pop(pos) + + def monitor_exit(process): + while True: + exit_flag = get_kv_store().get("exit_flag") + if int(exit_flag) == 1: + try: + process_group_id = os.getpgid(process.pid) + os.killpg(process_group_id, signal.SIGKILL) + break + except ProcessLookupError: + break + time.sleep(60) + + args = get_args() + argv: list = sys.argv[1:] + update_or_append_param(argv, '--eval-iters', '0') + update_or_append_param(argv, '--train-iters', '5') + update_or_append_param(argv, '--global-batch-size', str(args.global_batch_size)) + update_or_append_param(argv, '--num-layers', str(args.num_layers)) + update_or_append_param(argv, '--pipeline-model-parallel-size', str(args.pipeline_model_parallel_size)) + update_or_append_param(argv, '--tensor-model-parallel-size', str(args.tensor_model_parallel_size)) + update_or_append_param(argv, '--micro-batch-size', str(args.micro_batch_size)) + update_or_append_param(argv, '--sequence-parallel') + if args.profile_operator: + update_or_append_param(argv, '--profile-operator') + if args.profile_memory: + update_or_append_param(argv, '--profile-memory') + if args.module_profile_path: + update_or_append_param(argv, '--prof-file', str(args.module_profile_path)) + if args.context_parallel_algo == 'hybrid_cp_algo': + update_or_append_param(argv, '--context-parallel-algo', 'hybrid_cp_algo') + update_or_append_param(argv, '--context-parallel-size', str(args.context_parallel_size)) + update_or_append_param(argv, '--ulysses-degree-in-cp', str(args.ulysses_degree_in_cp)) + if args.context_parallel_algo == 'megatron_cp_algo': + update_or_append_param(argv, '--context-parallel-algo', 'megatron_cp_algo') + update_or_append_param(argv, '--context-parallel-size', str(args.context_parallel_size)) + if args.context_parallel_algo == 'ulysses_cp_algo': + update_or_append_param(argv, '--context-parallel-algo', 'ulysses_cp_algo') + update_or_append_param(argv, '--context-parallel-size', str(args.context_parallel_size)) + remove_param(argv, '--auto-parallel') + + command = [ + 'torchrun', + '--nproc_per_node', str(args.nproc_per_node), + '--nnodes', str(args.nnodes), + '--node-rank', str(args.node_rank), + '--master_addr', str(args.master_addr), + '--master_port', str(args.master_port), + str(sys.argv[0]) + ] + argv + + get_kv_store().set("exit_flag", "0") + process = subprocess.Popen(command, shell=False, preexec_fn=lambda: os.setpgrp()) + monitor_thread = threading.Thread(target=monitor_exit, args=(process,)) + monitor_thread.start() + process.wait() + get_kv_store().set("exit_flag", "1") + torch.distributed.barrier() + + def update_args(self, config): + args = get_args() + self.old_args = copy.deepcopy(args) + + args.pipeline_model_parallel_size = config[0] + args.tensor_model_parallel_size = config[1] + args.data_parallel_size = config[2] + args.context_parallel_size = config[3] * config[4] + args.ulysses_degree_in_cp = config[4] + args.micro_batch_size = config[5] + if config[3] > 1 and config[4] > 1: + args.context_parallel_algo = 'hybrid_cp_algo' + args.use_cp_send_recv_overlap = True + elif config[3] > 1 and config[4] == 1: + args.context_parallel_algo = 'megatron_cp_algo' + args.use_cp_send_recv_overlap = True + elif config[3] == 1 and config[4] > 1: + args.context_parallel_algo = 'ulysses_cp_algo' + + def recover_args(self): + set_args(self.old_args) + + +class DistributedMemoryProfiler(BaseLaunch): + def update_args(self, config): + super().update_args(config) + args = get_args() + args.module_profile_path = (get_cache_path() + MODULE_PATTERN).format(*config) + args.global_batch_size = args.pipeline_model_parallel_size * args.data_parallel_size * args.micro_batch_size + args.num_layers = args.pipeline_model_parallel_size + args.profile_memory = True + + def launch(self, config): + args = get_args() + if args.node_rank != 0: + self.update_args(config) + super().launch(config) + super().recover_args() + return None + + self.update_args(config) + module_profile_path = get_args().module_profile_path + if os.path.exists(module_profile_path): + super().recover_args() + return analyse_module_profile(module_profile_path, key='transformer_act_mem') + + buffer = config + [0] + torch.distributed.broadcast(torch.tensor(buffer, dtype=torch.int), 0) + + super().launch(config) + super().recover_args() + return analyse_module_profile(module_profile_path, key='transformer_act_mem') + + +class DistributedOperateProfiler(BaseLaunch): + def update_args(self, config): + super().update_args(config) + args = get_args() + args.module_profile_path = None + args.operator_profile_path = (get_cache_path() + OPERATOR_PATTERN).format(*config) + args.global_batch_size = 4 * args.pipeline_model_parallel_size * args.data_parallel_size * args.micro_batch_size + args.num_layers = 2 * args.pipeline_model_parallel_size + args.profile_operator = True + + def launch(self, config): + self.update_args(config) + args = get_args() + if args.node_rank != 0: + super().launch(config) + super().recover_args() + return None + + operator_profile_path = args.operator_profile_path + if os.path.exists(operator_profile_path): + super().recover_args() + return operator_profile_path, None + + buffer = config + [1] + torch.distributed.broadcast(torch.tensor(buffer, dtype=torch.int), 0) + + os.environ['ASCEND_WORK_PATH'] = operator_profile_path + os.makedirs(operator_profile_path) + super().launch(config) + super().recover_args() + + analyse_thread = threading.Thread( + target=analyse, args=(operator_profile_path + os.sep + 'profiling_data', 32) + ) + analyse_thread.daemon = True + analyse_thread.start() + return operator_profile_path, analyse_thread + + +class DistributedPerformanceProfiler(BaseLaunch): + def update_args(self, config): + super().update_args(config) + args = get_args() + args.module_profile_path = (get_cache_path() + MODULE_PATTERN).format(*config) + + def launch(self, config): + self.update_args(config) + args = get_args() + if args.node_rank != 0: + super().launch(config) + super().recover_args() + return None + + module_profile_path = get_args().module_profile_path + if os.path.exists(module_profile_path): + super().recover_args() + return analyse_module_profile(module_profile_path, key='step_time') + + buffer = config + [2] + torch.distributed.broadcast(torch.tensor(buffer, dtype=torch.int), 0) + super().launch(config) + super().recover_args() + return analyse_module_profile(module_profile_path, key='step_time') + + +class OperateProfile(metaclass=SingletonType): + def __init__(self, args): + experimental_config = torch_npu.profiler._ExperimentalConfig( + profiler_level=torch_npu.profiler.ProfilerLevel.Level2, + data_simplification=False + ) + activities = [torch_npu.profiler.ProfilerActivity.CPU, torch_npu.profiler.ProfilerActivity.NPU] + self.op_profiler = torch_npu.profiler.profile( + activities=activities, + record_shapes=True, + schedule=torch_npu.profiler.schedule(wait=0, warmup=0, active=1, repeat=1, skip_first=2), + experimental_config=experimental_config, + ) + self.op_profiler.start() + + def step(self): + if torch.distributed.get_rank() in (0,): + self.op_profiler.step() + + def stop(self): + if torch.distributed.get_rank() in (0,): + self.op_profiler.stop() + + +class Profiling(metaclass=SingletonType): + MEMORY_UNIT = 1024 ** 3 + + def __init__(self, args, warmup_step=3, stop_step=5): + self.args = args + self.warmup_step = warmup_step + self.stop_step = stop_step + self.curr_step = 0 + self.pattern = r'^module.module.language_model.encoder.layers.\d+$' + self.context = { + 'step_time': 0, + 'transformer_act_mem': 0 + } + + def should_profiling(self): + rank = torch.distributed.get_rank() + if rank in self.args.profile_ranks and \ + self.warmup_step <= self.curr_step < self.stop_step: + return True + return False + + def forward_pre_hook(self): + def hook(module, *args, **kwargs): + if torch.distributed.get_rank() in self.args.profile_ranks: + torch.npu.synchronize() + self.start_memory = torch.npu.memory_allocated() + torch.npu.reset_max_memory_allocated() + return hook + + def forward_post_hook(self): + def hook(module, *args, **kwargs): + if torch.distributed.get_rank() in self.args.profile_ranks: + torch.npu.synchronize() + self.end_memory = torch.npu.max_memory_allocated() + transformer_act_mem = (self.end_memory - self.start_memory) / Profiling.MEMORY_UNIT + self.context['transformer_act_mem'] = transformer_act_mem + return hook + + def register_recursive_hook(self, prefix_name, model): + model = model[0] if isinstance(model, list) else model + for name, module in model.named_children(): + next_name = prefix_name + "." + name if prefix_name != "" else name + if re.fullmatch(self.pattern, next_name): + module.register_forward_pre_hook(self.forward_pre_hook()) + module.register_forward_hook(self.forward_post_hook()) + break + self.register_recursive_hook(next_name, module) + + def hook_train_step(self, train_step): + def custom_train_step(*args, **kwargs): + start_time = time.time() + result = train_step(*args, **kwargs) + torch.cuda.synchronize() + step_time = time.time() - start_time + if self.should_profiling(): + cur_step_time = self.context.get('step_time') + cur_step_time += (step_time - cur_step_time) / (self.curr_step - self.warmup_step + 1) + self.context['step_time'] = cur_step_time + self.export_to_file() + self.curr_step += 1 + return result + return custom_train_step + + def export_to_file(self): + if torch.distributed.get_rank() in self.args.profile_ranks: + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + modes = stat.S_IWUSR | stat.S_IRUSR + with os.fdopen(os.open(self.args.prof_file, flags, modes), 'w') as fout: + fout.write(json.dumps(self.context)) + + +class CommProfiling: + @staticmethod + def get_comm_time(shape, domains, op): + if domains == 1: + return 0 + + if op == 'all_reduce': + return CommProfiling.cal_all_reduce(shape, domains) + if op == 'all_gather': + return CommProfiling.cal_all_gather(shape, domains) + if op == 'alltoall': + return CommProfiling.cal_alltoall(shape, domains) + if op == 'reduce_scatter': + return CommProfiling.cal_reduce_scatter(shape, domains) + raise AssertionError('communicate operator type error') + + @staticmethod + def cal_all_reduce(shape, domains): + data_size = CommProfiling.get_data_size(shape) + data_size = data_size / domains * (domains - 1) * domains * 2 + band_width = domains * (domains - 1) / 2 * BAND_WIDTH_UNIDIRECTIONAL + return CommProfiling.div(data_size, band_width) + + @staticmethod + def cal_all_gather(shape, domains): + data_size = CommProfiling.get_data_size(shape) + data_size = data_size / domains * (domains - 1) * domains + band_width = domains * (domains - 1) / 2 * BAND_WIDTH_UNIDIRECTIONAL + return CommProfiling.div(data_size, band_width) + + @staticmethod + def cal_alltoall(shape, domains): + data_size = CommProfiling.get_data_size(shape) + data_size = data_size / domains * (domains - 1) * domains + band_width = domains * (domains - 1) / 2 * BAND_WIDTH_UNIDIRECTIONAL + return CommProfiling.div(data_size, band_width) + + @staticmethod + def cal_reduce_scatter(shape, domains): + data_size = CommProfiling.get_data_size(shape) + data_size = data_size / domains * (domains - 1) * domains + band_width = domains * (domains - 1) / 2 * BAND_WIDTH_UNIDIRECTIONAL + return CommProfiling.div(data_size, band_width) + + @staticmethod + def get_send_recv_time(shape): + data_size = CommProfiling.get_data_size(shape) + return (data_size / BAND_WIDTH_UNIDIRECTIONAL) * 1e6 + + @staticmethod + def get_data_size(shape): + return functools.reduce(operator.mul, shape) * 2 // 1024**3 + + @staticmethod + def div(data_size, band_width): + try: + return data_size / band_width * 1e6 + except ZeroDivisionError: + print(f"band_width is zero") + return 0 \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_rectify.py b/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_rectify.py new file mode 100644 index 000000000..c3e09eba3 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/auto_parallel/auto_parallel_rectify.py @@ -0,0 +1,424 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import glob +import copy +import warnings +import ast +from typing import Optional + +import pandas as pd +import numpy as np +import gpytorch +import torch + +from mindspeed.core.auto_parallel import ( + ARD_NUM_DIMS, + KeyField, + sample_cache, + model_manager +) + + +class ExactGPModel(gpytorch.models.ExactGP): + def __init__(self, operator, train_inputs=None, + train_targets=None, raw_lengthscale=None, + likelihood=None, dtype=torch.float64): + super(ExactGPModel, self).__init__(train_inputs, train_targets, likelihood=likelihood) + self.operator = operator + self.dtype = dtype + + self.mean_module = gpytorch.means.ConstantMean() + self.covar_module = gpytorch.kernels.ScaleKernel( + gpytorch.kernels.MaternKernel(nu=0.5, ard_num_dims=ARD_NUM_DIMS[operator], + lengthscale_constraint=gpytorch.constraints.GreaterThan(3e-2))) + if raw_lengthscale is not None: + self.covar_module.base_kernel.raw_lengthscale.data \ + = self.raw_lengthscale * torch.ones_like(self.covar_module.base_kernel.raw_lengthscale.data) + + self.train_round = 0 + self.train_data = pd.DataFrame() + + self.y_train_mean: Optional[torch.Tensor] = None + self.y_train_std: Optional[torch.Tensor] = None + self.x_train_std: Optional[torch.Tensor] = None + + def get_model_info(self): + return self.train_data, self.train_round + + def set_model_info(self, values): + self.train_data, self.train_round = values + # set model info by train_data + self.data_standardize() + + def forward(self, x): + mean = self.mean_module(x) + covar = self.covar_module(x) + return gpytorch.distributions.MultivariateNormal(mean, covar) + + def fit(self, profiling_file, multi_operator_data, num_iter=3000, lr=0.03): + hd = DataHandler(profiling_file, multi_operator_data) + data = hd.generate_data(self.operator) + # merge self.train_data with new train_data + self.update_data(data) + # set model train_inputs and target_inputs + self.data_standardize() + # clear cache + self.train() + self.likelihood.train() + optimizer = torch.optim.Adam(self.parameters(), lr=lr) + mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self) + for i in range(num_iter): + optimizer.zero_grad() + output = self(self.train_inputs[0]) + loss = -mll(output, self.train_targets) + loss.backward() + if i % 100 == 0: + logs = 'Iter %d/%d - Loss: %.5f outputscale: %.5f noise: %.5f' % ( + i + 1, num_iter, loss.item(), + self.covar_module.outputscale.item(), + self.likelihood.noise.item() + ) + ' lengthscale: ' + str( + np.round(self.covar_module.base_kernel.lengthscale.detach().cpu().numpy()[0], 5)) + print(logs) + optimizer.step() + self.eval() + self.likelihood.eval() + self.train_round += 1 + + def update_data(self, data: pd.DataFrame): + """ + :param data columns = [shape error count] + """ + if not self.train_data.empty: + exits_shapes = self.train_data.loc[:, KeyField.InputShapes].values.tolist() + for index, rows in data.iterrows(): + shape = getattr(rows, KeyField.InputShapes) + # update existent input_shape + if shape in exits_shapes: + error, number = data[data[KeyField.InputShapes] == shape].iloc[:, 1:3].values.flatten() + current_train_data = self.train_data[self.train_data[KeyField.InputShapes] == shape] + train_error, train_number = current_train_data.iloc[:, 1:3].values.flatten() + count = int(number + train_number) + new_error = (error * number + train_error * train_number) / count + self.train_data[self.train_data[KeyField.InputShapes] == shape] = [shape, new_error, count] + else: + # save new input_shape + self.train_data = pd.concat([self.train_data, rows.to_frame().T], ignore_index=True) + else: + self.train_data = data + + def data_standardize(self): + y_train = torch.tensor(self.train_data['error'], dtype=self.dtype) + x_train = self.train_data[KeyField.InputShapes].str.split(',', expand=True).values.astype(int) + x_train = torch.tensor(x_train, dtype=self.dtype).log() + if x_train.shape[0] == 1: + self.x_train_std = torch.tensor(np.ones(x_train.shape), dtype=self.dtype) + self.y_train_std = torch.tensor(1, dtype=self.dtype) + else: + self.x_train_std, self.y_train_std = torch.std(x_train, dim=0), torch.std(y_train, dim=0) + self.x_train_std[self.x_train_std == 0] = 1. + self.y_train_std[self.y_train_std == 0] = 1. + x_train /= self.x_train_std + self.y_train_mean = torch.mean(y_train, dim=0) + y_train = (y_train - self.y_train_mean) / self.y_train_std + self.set_train_data(x_train, y_train, strict=False) + + +class Sampler: + def __init__(self, num_sample=10, pre_thd=0): + self.pre_thd = pre_thd + self.num_sample = torch.Size([num_sample]) + + def run(self, operator, direct_time, output_shape: list, *input_shape): + input_shape = copy.deepcopy(input_shape) + output_shape = copy.deepcopy(output_shape) + # modify input_shape + input_shape = Sampler.reduce_dim(operator, output_shape, input_shape) + # check cache + cached_samples = getattr(sample_cache, operator) + sample = cached_samples.get(input_shape, None) + if sample is not None: + return sample + # load model + model = model_manager.get_cached_model(operator) + # predict + input_shape_np = np.array(input_shape).reshape(1, -1) + fixed_shape = np.concatenate([input_shape_np, input_shape_np], axis=0) + x = torch.tensor(fixed_shape, dtype=torch.float64).log() + if model is None: + relative_error = np.zeros(self.num_sample) + else: + with torch.no_grad(), gpytorch.settings.fast_pred_var(): + pred = model(x / model.x_train_std) + pred = pred * model.y_train_std.item() + model.y_train_mean.item() + relative_error = pred.sample(self.num_sample).cpu().numpy()[:, 0] + sample = direct_time * (relative_error + 1.).flatten() + negative_indices = np.where(sample <= self.pre_thd)[0] + if negative_indices.size > 0: + sample[negative_indices] = 0 + warnings.warn(f'Uncertainty of {operator} is too large, input shape: {input_shape}', Warning) + # save prediction data + cached_samples[input_shape] = sample + return sample + + @staticmethod + def reduce_dim(operator, output_shape, input_shapes): + input_shapes = copy.deepcopy(input_shapes) + output_shape = copy.deepcopy(output_shape) + if operator in ['LayerNorm', 'LayerNormGrad']: + input_shape = input_shapes[0] + elif operator in ['FastGelu', 'FastGeluGrad']: + input_shape = output_shape + elif operator in ['Softmax', 'SoftmaxGrad']: + input_shape = output_shape + elif operator == 'Add' or operator == 'Mul': + if len(input_shapes[0]) >= len(input_shapes[1]): + max_dims, min_dims = input_shapes + else: + min_dims, max_dims = input_shapes + if len(max_dims) == 2: + max_dims.insert(0, 1) + if len(max_dims) == 1: + max_dims = [1, 1, max_dims[0]] + if len(min_dims) == 3: + min_dims = [1, 1, 1] + elif len(min_dims) == 2: + min_dims = [2, 1, 1] + else: + min_dims = [2, 2, 1] + max_dims.extend(min_dims) + input_shape = max_dims + elif operator == 'BatchMatMul': + if len(input_shapes) != 2: + raise AssertionError(f"Dim of BatchMatMul is {len(input_shapes)}") + b, k, m = output_shape[0], output_shape[2], output_shape[1] + n = input_shapes[0][1:] + input_shapes[1][1:] + for shape in output_shape[1:]: + n.remove(shape) + input_shape = [b, m, n[0], k] + elif operator == 'MatMul': + if len(input_shapes) != 2: + raise AssertionError(f"Dim of MatMul is {len(input_shapes)}") + input_shape = input_shapes[0] + input_shape.extend(input_shapes[1]) + for shape in output_shape: + input_shape.remove(shape) + output_shape.insert(1, input_shape[0]) + input_shape = output_shape + elif operator == 'RmsNorm' or operator == 'RmsNormGrad': + input_shape = input_shapes[0] + elif operator == 'FlashAttentionScore' or operator == 'FlashAttentionScoreGrad': + input_shape = input_shapes[0] + else: + raise ValueError(f"{operator} not supported.") + + return tuple(input_shape) + + +class DataHandler: + def __init__(self, profiling_file, multi_operator_data: pd.DataFrame): + self.sample_data = multi_operator_data + self.profiling = self.extract_target_data(profiling_file) + self.current_profiling_operator = None + self.current_sample_operator = None + self.backward_flag = False + + @staticmethod + def extract_target_data(file): + if os.path.isdir(file): + file = glob.glob(os.path.join(file, "*.csv")) + data = pd.concat((pd.read_csv(f) for f in file), ignore_index=True).loc[:, + [KeyField.OpType, KeyField.InputShapes, KeyField.OutputShapes, KeyField.Duration]] + else: + data = pd.read_csv(file).loc[:, + [KeyField.OpType, KeyField.InputShapes, KeyField.OutputShapes, KeyField.Duration]] + data.loc[data['Type'].str.startswith('MatMul'), 'Type'] = 'MatMul' + data.loc[data['Type'].str.startswith('BatchMatMul'), 'Type'] = 'BatchMatMul' + data.loc[ + (data['Type'].str.startswith('LayerNorm') & + ~(data['Type'].str.contains('Back') | data['Type'].str.contains('Grad'))), 'Type' + ] = 'LayerNorm' + data.loc[ + (data['Type'].str.startswith('LayerNorm') & + (data['Type'].str.contains('Back') | data['Type'].str.contains('Grad'))), 'Type' + ] = 'LayerNormGrad' + # filter + data = data[(data[KeyField.Duration] > 5) & (data[KeyField.InputShapes].str.len() > 4)].reset_index(drop=True) + return data + + @staticmethod + def convert_dim(data): + new_input_shape = [] + for index, tmp_data in data[[KeyField.OpType, KeyField.InputShapes, KeyField.OutputShapes]].iterrows(): + op, input_shape, output_shape = tmp_data.tolist() + input_shape, output_shape = ast.literal_eval(input_shape), ast.literal_eval(output_shape) + if op == 'LayerNorm' or op == 'LayerNormGrad': + input_shape = input_shape.split(';')[0] + elif op == 'Add' or op == 'Mul': + dims = input_shape.split(';') + d0_l, d1_l = dims[0].split(','), dims[1].split(',') + if len(d0_l) >= len(d1_l): + max_length_dim = d0_l + min_length_dim = d1_l + else: + max_length_dim = d1_l + min_length_dim = d0_l + if len(max_length_dim) == 2: + max_length_dim = ['1', '1', max_length_dim[0], max_length_dim[1]] + elif len(max_length_dim) == 1: + max_length_dim = ['1', '1', '1', max_length_dim[0]] + elif len(max_length_dim) == 3: + max_length_dim.insert(0, '1') + if len(min_length_dim) == 3: + min_length_dim = ['2', '1', '1', '1'] + elif len(min_length_dim) == 2: + min_length_dim = ['2', '2', '1', '1'] + elif len(min_length_dim) == 1: + min_length_dim = ['2', '2', '2', '1'] + elif len(min_length_dim) == 4: + min_length_dim = ['1', '1', '1', '1'] + max_length_dim.extend(min_length_dim) + input_shape = ','.join(max_length_dim) + elif op == 'BatchMatMul': + output_shape = output_shape.split(',') + b, k, m = output_shape[0], output_shape[2], output_shape[1] + input_shapes = input_shape.split(';') + n = input_shapes[0].split(',')[1:] + input_shapes[1].split(',')[1:] + for shape in output_shape[1:]: + n.remove(shape) + input_shape = ','.join([b, m, n[0], k]) + elif op == 'MatMul': + input_shape = input_shape.replace(';', ',').split(',') + output_shape = output_shape.split(',') + for shape in output_shape: + input_shape.remove(shape) + output_shape.insert(1, input_shape[0]) + input_shape = ','.join(output_shape) + elif op == 'Softmax' or op.startswith('SoftmaxGrad'): + input_shape = input_shape.split(';')[0] + elif op == 'RmsNorm' or op == 'RmsNormGrad': + input_shape = input_shape.split(';')[0] + elif op == 'FlashAttentionScore' or op == 'FlashAttentionScoreGrad': + input_shape = input_shape.split(';')[0] + else: + raise TypeError(f"{op} don't support") + new_input_shape.append(input_shape) + return new_input_shape + + def handle_transpose(self): + input_shapes = [] + for index, sample in self.current_profiling_operator.iterrows(): + input_shape = sample[KeyField.InputShapes] + input_shape = ast.literal_eval(input_shape).split(';') + input_shape = [list(map(lambda x: int(x), s.split(','))) for s in input_shape] + output_shape = ast.literal_eval(sample[KeyField.OutputShapes]).split(',') + output_shape = [int(s) for s in output_shape] + if sample[KeyField.OpType] == 'BatchMatMul': + if output_shape[1] != input_shape[0][1]: + input_shape[0][1], input_shape[0][2] = input_shape[0][2], input_shape[0][1] + if output_shape[-1] != input_shape[1][-1]: + input_shape[1][1], input_shape[1][2] = input_shape[1][2], input_shape[1][1] + elif sample[KeyField.OpType] == 'MatMul': + if output_shape[0] != input_shape[0][0]: + input_shape[0][0], input_shape[0][1] = input_shape[0][1], input_shape[0][0] + if output_shape[-1] != input_shape[1][-1]: + input_shape[1][0], input_shape[1][1] = input_shape[1][1], input_shape[1][0] + input_shape1 = ','.join([str(i) for i in input_shape[0]]) + input_shape2 = ','.join([str(i) for i in input_shape[1]]) + input_shape_sum = input_shape1 + ';' + input_shape2 + input_shapes.append(f'"{input_shape_sum}"') + self.current_profiling_operator.loc[:, KeyField.InputShapes] = input_shapes + + def handle_layer_norm_backward(self, operator): + profiling = self.profiling[self.profiling[KeyField.OpType] == operator].reset_index(drop=True) + back_grad_data = pd.DataFrame() + for index in range(0, profiling.shape[0], 2): + sum_duration = profiling.loc[index, KeyField.Duration] + profiling.loc[ + index + 1, KeyField.Duration] + input_shape = profiling.loc[index, KeyField.InputShapes].split(';')[0] + '"' + back_grad_data.loc[index, KeyField.OpType] = 'LayerNormGrad' + back_grad_data.loc[index, KeyField.InputShapes] = input_shape + back_grad_data.loc[index, KeyField.OutputShapes] = input_shape + back_grad_data.loc[index, KeyField.Duration] = sum_duration + return back_grad_data.reset_index(drop=True) + + def handle_fv(self): + condition = self.current_profiling_operator[KeyField.InputShapes].str.replace('"', '').str.split(';').map( + lambda x: x[:3]).map(lambda x: x[0] == x[1] == x[2]) + self.current_profiling_operator = self.current_profiling_operator[condition] + # 对FV_grad的input_shape可能出现的异常情况容错处理 + target_shape = self.current_sample_operator[KeyField.InputShapes].values[0] + current_shape = self.current_profiling_operator[KeyField.InputShapes].values[0] + if target_shape.split(';')[1] != current_shape.split(';')[1]: + self.current_profiling_operator[KeyField.InputShapes] = target_shape + + def generate_data(self, operator): + # 串行处理各个算子 + if len(operator) == 2: + # layer_norm反向特殊处理 + self.current_profiling_operator = self.handle_layer_norm_backward(operator) + operator = self.current_profiling_operator.loc[0][KeyField.OpType] + else: + self.current_profiling_operator = self.profiling[self.profiling[KeyField.OpType] == operator] + self.backward_flag = False + if operator.endswith('Grad'): + self.backward_flag = True + operator = operator.split('Grad')[0] + # matmul和batch_matmul需要考虑转置情况 + if operator in ['MatMul', 'BatchMatMul']: + self.handle_transpose() + # convert sample input_shape + self.current_sample_operator = self.sample_data[ + self.sample_data[KeyField.OpType].str.startswith(operator)].reset_index( + drop=True) + # 删除负载均衡产生的shape和对FVGrad可能出现的异常Input_shape容错处理. + if operator.startswith('FlashAttention'): + self.handle_fv() + # convert profiling input_shape + self.current_profiling_operator.loc[:, KeyField.InputShapes] = self.convert_dim( + self.current_profiling_operator + ) + self.current_sample_operator[KeyField.InputShapes] = self.convert_dim(self.current_sample_operator) + # 获取当前算子的所有input_shape + set_operator = self.current_sample_operator[KeyField.InputShapes].drop_duplicates().tolist() + errors_df = pd.DataFrame() + # 计算每个input_shape的相对误差 + for shape in set_operator: + # 获取profiling数据当前input_shape的所有样本 + tmp_data = self.current_profiling_operator[ + self.current_profiling_operator[KeyField.InputShapes] == shape].copy() + if self.backward_flag: + direct_mean = self.current_sample_operator[ + self.current_sample_operator[KeyField.InputShapes] == shape + ]['bwd_time'].values[0] + else: + direct_mean = self.current_sample_operator[ + self.current_sample_operator[KeyField.InputShapes] == shape + ]['fwd_time'].values[0] + # 计算相对误差 + tmp_data['error'] = (tmp_data[KeyField.Duration] - direct_mean) / direct_mean + tmp_data['direct_mean'] = direct_mean + errors_df = pd.concat([errors_df, tmp_data], axis=0) + if errors_df.empty: + raise AssertionError('profiling_shape mismatch operator_shape') + + # 分组平均和计数 + train_data = errors_df.groupby(KeyField.InputShapes).agg( + {'error': 'mean', KeyField.InputShapes: 'count'}) + train_data.rename(columns={KeyField.InputShapes: 'sample_number'}, inplace=True) + train_data.reset_index(inplace=True) + return train_data \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/help.py b/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/help.py new file mode 100644 index 000000000..9c0bfaa09 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/help.py @@ -0,0 +1,51 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import datetime +import json +import math + +import torch + + +SEARCH_CACHE_PATH = None +KV_STORE = None +PROFILE_CONTENT = {"fwd_time": [], "bwd_time": [], "act_mem": [], "module_param": []} +INITIAL_CONFIG = {} +GPT_ARGS_PATH = "gpt_args.json" +STAGE_PROFILE_PATH = 'stage_1_profile.json' + + +def broadcast_communicate(commum_data, source_rank): + temp_data = torch.cuda.FloatTensor([commum_data]) + torch.distributed.broadcast(temp_data, src=source_rank) + return temp_data.item() + + +def broadcast_communicate_list(commum_data, source_rank): + temp_data = torch.cuda.FloatTensor(commum_data) + torch.distributed.broadcast(temp_data, src=source_rank) + return temp_data.tolist() + + +def cal_throughput(run_time, profile_data, parallel_cfg): + sum_token = profile_data["text_decoder.seq_length"] * profile_data['grad_acc_step'] * profile_data['micro_batch_size'] + PP = parallel_cfg[0] + TP = parallel_cfg[1] + per_npu_throughput = sum_token / (run_time / 1000) / (PP * TP) + return per_npu_throughput + + +def get_json(json_path): + with open(json_path, 'r', encoding='utf-8') as f: + json_data = json.load(f) + return json_data + + +def save_json(json_path, json_data): + json_data_json = json.dumps(json_data) + with open(json_path, 'w') as f: + f.write(json_data_json) + + +def precise_round(num, ndigits=0): + multiplier = 10 ** ndigits + return math.floor(num * multiplier + 0.5) / multiplier diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/memory_modeling.py b/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/memory_modeling.py new file mode 100644 index 000000000..b0cd16ff2 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/memory_modeling.py @@ -0,0 +1,77 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import json + +import torch + +from mindspeed.core.auto_parallel.mm_search.help import get_json + + +def get_model_parameters(model_config): + transformer_params_count = 12 * model_config["hidden_size"] ** 2 + total_params_count = transformer_params_count * model_config["num_layers"] + return total_params_count + + +def get_model_total_static_memory(args, parallel_config): + model_config = get_json(args.mm_model) + DP = parallel_config[2] + + if model_config.get("image_encoder"): + vit_model_cfg = {"hidden_size": model_config["image_encoder"]["vision_encoder"]["hidden_size"], + "num_layers": model_config["image_encoder"]["vision_encoder"]["num_layers"]} + vit_model_params_count = get_model_parameters(vit_model_cfg) + if model_config.get("text_decoder"): + llm_model_cfg = {"hidden_size": model_config["text_decoder"]["hidden_size"], + "num_layers": model_config["text_decoder"]["num_layers"]} + llm_model_params_count = get_model_parameters(llm_model_cfg) + + mem_para, mem_grad, mem_optimizer = 0, 0, 0 + if model_config["image_encoder"]["vision_encoder"]["params_dtype"] == "bf16": + if not model_config["image_encoder"]["vision_encoder"].get("freeze", False): + mem_para += 2 * vit_model_params_count + mem_grad += 4 * vit_model_params_count + mem_optimizer += 4 * vit_model_params_count + 8 * vit_model_params_count / DP + else: + mem_para += 2 * vit_model_params_count + if model_config["text_decoder"]["params_dtype"] == "bf16": + if not model_config["text_decoder"].get("freeze", False): + mem_para += 2 * llm_model_params_count + mem_grad += 4 * llm_model_params_count + mem_optimizer += 4 * llm_model_params_count + 8 * llm_model_params_count / DP + else: + mem_para += 2 * llm_model_params_count + + model_total_static_memory = mem_para + mem_grad + mem_optimizer + return model_total_static_memory / (1024 ** 2) + + +def parallel_cluster_is_oom(args, parallel_config, static_mem): + PP, TP = parallel_config[0], parallel_config[1] + + max_available_memory = torch.npu.get_device_properties(0).total_memory * 0.95 / 1024**2 + + if PP * TP * max_available_memory < static_mem: + return True + else: + return False + + +def count_module_param(model): + for mod in model: + precision_placeholder = {torch.float32: 4, torch.float16: 2, torch.bfloat16: 2} + module_param_property = {name: [param.numel(), precision_placeholder.get(param.dtype, 0), param.requires_grad] for name, param in mod.named_parameters()} + # model_para, optimizer, grad + module_param_dict = [0, 0, 0] + for module_param in module_param_property: + module_param_dict[0] += module_param_property[module_param][0] * \ + module_param_property[module_param][1] / 1024 ** 2 + if module_param_property[module_param][2]: + module_param_dict[1] += (module_param_property[module_param][0] * 4 + \ + module_param_property[module_param][0] * 8) / 1024 ** 2 + module_param_dict[2] += module_param_property[module_param][0] * 4 / 1024 ** 2 + module_param_property_json = json.dumps(module_param_property) + with open(f'raw_profile_{torch.distributed.get_rank()}.json', 'w') as f: + f.write(module_param_property_json) + return module_param_dict + + diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/optimizer.py b/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/optimizer.py new file mode 100644 index 000000000..3cc012a3f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/optimizer.py @@ -0,0 +1,161 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import time +import copy +import sys +import json + +import torch + +from megatron.training import get_args +from mindspeed.core.auto_parallel import set_kv_store +from mindspeed.core.auto_parallel.mm_search.help import get_json, save_json, GPT_ARGS_PATH +from mindspeed.core.auto_parallel.mm_search.profiling import DistributedPerformanceProfiler +from mindspeed.core.auto_parallel.mm_search.solver import solve_auto_parallel_mm +from mindspeed.core.auto_parallel.mm_search.memory_modeling import get_model_total_static_memory, parallel_cluster_is_oom + + +class SearchByProfile: + def __init__(self): + self.merge_config_list = {} + + + def get_gpt_args(self, args): + gpt_args = {} + world_size = args.nproc_per_node * args.nnodes + tp = getattr(args, "tensor_model_parallel_size", 1) + pp = getattr(args, "pipeline_model_parallel_size", 1) + cp = getattr(args, "context_parallel_size", 1) + dp = world_size / tp / pp / cp + grad_acc_step = int(args.global_batch_size / args.micro_batch_size / dp) + gpt_args['grad_acc_step'] = grad_acc_step + save_json(GPT_ARGS_PATH, gpt_args) + + + def merge_config(self, args, search_spaces): + search_spaces_backup = copy.deepcopy(search_spaces) + world_size = args.nproc_per_node * args.nnodes + configs = [] + for ind, cfg in enumerate(search_spaces_backup): + cfg[0] = 4 # pp + cfg[2] = world_size // (cfg[0] * cfg[1]) # dp + if cfg[2] < 1: + continue + if cfg not in configs: + configs.append(cfg) + self.merge_config_list[tuple(cfg)] = [search_spaces[ind], ] + else: + self.merge_config_list[tuple(cfg)].append(search_spaces[ind]) + print("[INFO] merge config list", self.merge_config_list) + + return configs + + + def search(self, args, search_spaces): + self.get_gpt_args(args) + merge_cfg = self.merge_config(args, search_spaces) + + opt_config = [] + run_throughput = 0 + for config in merge_cfg: + print(f"[INFO] now profile config: {config}") + + status_code = 0 + status_code += DistributedPerformanceProfiler().launch(config, 'profiling_stage_1') + status_code += DistributedPerformanceProfiler().launch(config, 'profiling_stage_2') + + if status_code == 0: + parallel_split_config = self.merge_config_list[tuple(config)] + print(f"[INFO] now solve cfg: {parallel_split_config}") + + optimal_config = solve_auto_parallel_mm(args, parallel_split_config) + if optimal_config and optimal_config['throughput'] > run_throughput: + run_throughput = optimal_config['throughput'] + opt_config = optimal_config + + opt_config_json = json.dumps(opt_config) + with open(f'auto_parallel_search_optimal_config.json', 'w') as f: + f.write(opt_config_json) + print(f"[INFO] finally opt config: {opt_config}") + + + @staticmethod + def build_initial_spaces(args): + world_size = args.simulated_nproc_per_node * args.simulated_nnodes + device_count = args.simulated_nproc_per_node + + solutions = [] + for pp in range(1, world_size + 1): + if world_size % pp != 0: + continue + + for i in range(device_count): + tp = 2 ** i + if tp > device_count or tp > (world_size // pp): + break + if (args.num_query_groups > 1 and args.num_query_groups % tp != 0) \ + or (args.num_attention_heads % tp != 0): + break + + dp = world_size // (pp * tp) + dp_group_batch_size = args.global_batch_size // dp + for num_mb in range(1, dp_group_batch_size + 1): + if dp_group_batch_size % num_mb != 0: + continue + + mbs = dp_group_batch_size // num_mb + if mbs > 2: + continue + + solutions.append([pp, tp, dp, mbs]) + + return solutions + + + @staticmethod + def filter_invalid_configs(args, search_spaces): + rough_filter_configs = [] + for config in search_spaces: + static_mem = get_model_total_static_memory(args, config) + print(f"config: {config} static_mem: {static_mem}", flush=True) + # PPģֳ̬4 + if not parallel_cluster_is_oom(args, config, static_mem) and config[0] <= 16 and config[1] <= args.nproc_per_node / 4: + rough_filter_configs.append(config) + print(f"[INFO] finish static memory filter config {rough_filter_configs}") + + return rough_filter_configs + + +def monitor_train_task(): + while True: + print(f"monitor next task...", flush=True) + message = torch.tensor([0 for _ in range(5)], dtype=torch.int) + torch.distributed.broadcast(message, src=0) + task_type = message[-1].item() + config = [m.item() for m in message[:-1]] + if task_type == -1: + break + elif task_type == 0: + DistributedPerformanceProfiler().launch(config) + + +def auto_parallel_mm_search_optimal_config(args): + set_kv_store(args) + # set cluster communication + init_method = 'tcp://{}:{}'.format(args.master_addr, int(args.master_port) + 1) + torch.distributed.init_process_group( + backend=torch.distributed.Backend.GLOO, + init_method=init_method, + rank=args.node_rank, + world_size=args.nnodes + ) + + if args.node_rank == 0: + search_space = SearchByProfile().build_initial_spaces(args) + print(f"[INFO] len(init_search_space): {len(search_space)}, {search_space}") + + search_space = SearchByProfile().filter_invalid_configs(args, search_space) + print(f"[INFO] filter search_space: {len(search_space)}") + + SearchByProfile().search(get_args(), search_space) + else: + monitor_train_task() diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/pp_layer_search.py b/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/pp_layer_search.py new file mode 100644 index 000000000..ac359926e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/pp_layer_search.py @@ -0,0 +1,218 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import time +import functools +import operator + +import numpy as np +import pulp +from pulp import LpMinimize, LpProblem, LpVariable, lpDot, lpSum +import highspy + +from mindspeed.core.auto_parallel.mm_search.help import precise_round +from mindspeed.core.auto_parallel import BAND_WIDTH_UNIDIRECTIONAL + + +def get_send_recv_time(shape): + data_size = functools.reduce(operator.mul, shape) * 2 / (1024 ** 3) + return (data_size / BAND_WIDTH_UNIDIRECTIONAL) * 1e3 + + +def pp_layer_search(parallel_cfg, profile_data, npu_memory_limit, last_stage_max_layer): + print(f"[INFO] start pp layer search {time.ctime()}") + print(f"[INFO] profile: {profile_data}") + + PP = parallel_cfg[0] + DP = parallel_cfg[2] + + num_vit = profile_data["image_encoder.vision_encoder.num_layers"] + num_llm = profile_data["text_decoder.num_layers"] + + model_structure = [1, num_vit - 2, 1, 1, num_llm - 2, 1] + recomputing_fwd = [profile_data['vit']['fwd_time'], profile_data['llm']['fwd_time']] + recomputing_act = [profile_data['vit']['act_mem'], profile_data['llm']['act_mem']] + layer_name = ['vit_pre', 'vit', 'vit_post', 'llm_pre', 'llm', 'llm_post'] + model_num_layers = num_vit + num_llm + print(f"PP:{PP}, DP:{DP}, num_vit, {num_vit}, num_llm, {num_llm}, model_num_layers, {model_num_layers}, \ + model_structure, {model_structure}") + + fwd_time, bwd_time, act_memory, static_memory = [], [], [], [] + for key in layer_name: + fwd_time.append(int(profile_data[key]['fwd_time'])) + bwd_time.append(int(profile_data[key]['bwd_time'])) + act_memory.append(int(profile_data[key]['act_mem'])) + static_memory.append(int(sum(profile_data[key]['module_param']))) + print(f"fwd_time, {fwd_time}, bwd_time, {bwd_time}, act_memory, {act_memory}, static_memory, {static_memory}") + + fwd_duration_layers, bwd_duration_layers, act_memory_layers, static_memory_layers = [], [], [], [] + for ind, num in enumerate(model_structure): + fwd_duration_layers += num * [fwd_time[ind]] + bwd_duration_layers += num * [bwd_time[ind]] + act_memory_layers += num * [act_memory[ind]] + static_memory_layers += num * [static_memory[ind]] + + memory_reserved = [npu_memory_limit] * PP + num_micro_batches = profile_data["grad_acc_step"] + if num_micro_batches < PP: + return None, None, None + + send_recv_time = get_send_recv_time( + [profile_data["text_decoder.seq_length"], profile_data["micro_batch_size"], profile_data["text_decoder.hidden_size"]] + ) + comm_matrix = [[send_recv_time] * PP for _ in range(PP)] + for i in range(PP): + comm_matrix[i][i] = 0 + + prob = LpProblem("Min_duration_time", LpMinimize) + + layer_placement = [LpVariable.matrix(f"X_{i}", range(model_num_layers), cat="Binary") for i in range(PP - 1)] + + # variable: forward/backward stage start time + bwd_start, fwd_start = [], [] + for j in range(PP): + fwd_start.append(LpVariable.matrix(f"fs_{j}", range(num_micro_batches), lowBound=1e-4, cat="Continuous")) + bwd_start.append(LpVariable.matrix(f"bs_{j}", range(num_micro_batches), lowBound=1e-4, cat="Continuous")) + recomputing_layers = [LpVariable.matrix("vit_r", range(PP - 1), lowBound=0, cat='Integer'), + LpVariable.matrix("llm_r", range(PP - 1), lowBound=0, cat='Integer')] + + layers_per_stage = [lpSum(layer_placement[s][i] for i in range(model_num_layers)) for s in range(PP - 1)] + layers_per_stage.append(model_num_layers) + + Const1 = 0.0001 + Const2 = 10000 + Z = [LpVariable.matrix(f"Z_{i}", range(PP), cat="Binary") for i in range(2)] + + prob += recomputing_layers[0][0] + recomputing_layers[1][0] <= layers_per_stage[0] + for s in range(1, PP - 1): + prob += recomputing_layers[0][s] + recomputing_layers[1][s] <= layers_per_stage[s] - layers_per_stage[s - 1] + for s in range(PP - 1): + # constraint: llm recompute + prob += Z[1][s] <= 1 - (layers_per_stage[s] - num_vit) * Const1 + prob += Z[1][s] >= Const1 * (num_vit - layers_per_stage[s]) + prob += recomputing_layers[1][s] <= layers_per_stage[s] - num_vit + Const2 * Z[1][s] + prob += recomputing_layers[1][s] <= Const2 * (1 - Z[1][s]) + prob += recomputing_layers[0][0] <= num_vit + for s in range(1, PP - 1): + # constraint: vit recompute + prob += Z[0][s] <= 1 - (layers_per_stage[s - 1] - num_vit) * Const1 + prob += Z[0][s] >= Const1 * (num_vit - layers_per_stage[s - 1]) + prob += recomputing_layers[0][s] <= num_vit - layers_per_stage[s - 1] + Const2 * (1 - Z[0][s]) + prob += recomputing_layers[0][s] <= Const2 * Z[0][s] + + # variable: pp stage forward/backward time + fwd_duration_each_stage = [] + bwd_duration_each_stage = [] + fwd_duration_each_stage.append(lpSum(lpDot(fwd_duration_layers, layer_placement[0]))) + bwd_duration_each_stage.append(lpSum(lpDot(bwd_duration_layers, layer_placement[0])) + + recomputing_layers[0][0] * recomputing_fwd[0] + + recomputing_layers[1][0] * recomputing_fwd[1]) + for s in range(1, PP - 1): + fwd_duration_each_stage.append(lpSum(lpDot(fwd_duration_layers, layer_placement[s])) - + lpSum(lpDot(fwd_duration_layers, layer_placement[s - 1]))) + bwd_duration_each_stage.append(lpSum(lpDot(bwd_duration_layers, layer_placement[s])) + - lpSum(lpDot(bwd_duration_layers, layer_placement[s - 1])) + + recomputing_layers[0][s] * recomputing_fwd[0] + + recomputing_layers[1][s] * recomputing_fwd[1]) + fwd_duration_each_stage.append(sum(fwd_duration_layers) - lpSum(lpDot(fwd_duration_layers, layer_placement[-1]))) + bwd_duration_each_stage.append(sum(bwd_duration_layers) - lpSum(lpDot(bwd_duration_layers, layer_placement[-1]))) + + prob += bwd_duration_each_stage[0] >= 1e-4 + + # constraint: pp schedules constraints + # warm up + for s in range(PP): + for j in range(PP - s - 1): + prob += fwd_start[s][j] + fwd_duration_each_stage[s] <= fwd_start[s][j + 1] + # cool down + for s in range(PP): + for j in range(num_micro_batches + s - PP, num_micro_batches - 1): + prob += bwd_start[s][j] + bwd_duration_each_stage[s] <= bwd_start[s][j + 1] + + for s in range(PP): + for j in range(num_micro_batches - PP + s + 1): + prob += fwd_start[s][j + PP - s - 1] + fwd_duration_each_stage[s] <= bwd_start[s][j] + + for s in range(PP): + for j in range(num_micro_batches - PP + s): + prob += bwd_start[s][j] + bwd_duration_each_stage[s] <= fwd_start[s][j + PP - s] + + for s in range(PP - 1): + for j in range(num_micro_batches): + prob += fwd_start[s + 1][j] >= fwd_start[s][j] + fwd_duration_each_stage[s] + comm_matrix[s][s + 1] + prob += bwd_start[s + 1][j] + bwd_duration_each_stage[s + 1] + comm_matrix[s + 1][s] <= bwd_start[s][j] + + # constraint: model layer placement + for s in range(PP - 1): + for i in range(model_num_layers - 1): + prob += layer_placement[s][i] >= layer_placement[s][i + 1] + + for s in range(PP - 2): + prob += (lpSum(layer_placement[s + 1][j] for j in range(model_num_layers)) >= + lpSum(layer_placement[s][j] for j in range(model_num_layers)) + 1) + + # constraint: model memory + prob += ((lpSum(lpDot(layer_placement[0], act_memory_layers)) - + recomputing_layers[0][0] * recomputing_act[0] + - recomputing_layers[1][0] * recomputing_act[1]) * (PP - 1) + + lpSum(lpDot(layer_placement[0], act_memory_layers)) + + lpSum(lpDot(layer_placement[0], static_memory_layers)) <= memory_reserved[0]) + for s in range(1, PP - 1): + prob += ((lpSum(lpDot(layer_placement[s], act_memory_layers)) + - lpSum(lpDot(layer_placement[s - 1], act_memory_layers)) + - recomputing_layers[0][s] * recomputing_act[0] + - recomputing_layers[1][s] * recomputing_act[1]) * (PP - s - 1) + + lpSum(lpDot(layer_placement[s], act_memory_layers)) + - lpSum(lpDot(layer_placement[s - 1], act_memory_layers)) + + lpSum(lpDot(layer_placement[s], static_memory_layers)) + - lpSum(lpDot(layer_placement[s - 1], static_memory_layers)) <= memory_reserved[s]) + + prob += layer_placement[0][0] == 1 + + prob += lpSum(layer_placement[-1][i] for i in range(model_num_layers)) >= model_num_layers - last_stage_max_layer + + # object function + obj = bwd_start[0][num_micro_batches - 1] + bwd_duration_each_stage[0] + prob += obj + prob.writeLP("pp_layers_prob.lp") + + print(f"[INFO] start solve {time.ctime()}") + h = highspy.Highs() + filename = 'pp_layers_prob.lp' + h.readModel(filename) + h.run() + print(f"[INFO] finish solve {time.ctime()}, solve state {h.modelStatusToString(h.getModelStatus())}") + + if h.modelStatusToString(h.getModelStatus()) != "Optimal": + return None, None, None + + layer_placement_values = [[0 for t in range(model_num_layers)] for s in range(PP - 1)] + recompute_values = [[0 for z in range(PP - 1)] for j in range(2)] + e2e_time = 0 + for i, val in enumerate(h.getSolution().col_value): + for s in range(PP - 1): + for t in range(model_num_layers): + if h.getColByName(str(layer_placement[s][t]))[1] == i: + layer_placement_values[s][t] = precise_round(val) + break + for j in range(2): + for z in range(PP - 1): + if h.getColByName(str(recomputing_layers[j][z]))[1] == i: + recompute_values[j][z] = precise_round(val) + break + if h.getColByName(str(bwd_start[0][num_micro_batches - 1]))[1] == i: + e2e_time += int(val) + for m in range(model_num_layers): + if h.getColByName(str(layer_placement[0][m]))[1] == i: + e2e_time += val * bwd_duration_layers[m] + break + for time_id in range(2): + if h.getColByName(str(recomputing_layers[j][0]))[1] == i: + e2e_time += val * recomputing_fwd[time_id] + break + + layer_placement_result = np.array(layer_placement_values).sum(axis=1) + print(f"[INFO] result: layer recompute: {recompute_values}") + print(f"[INFO] the layer placement: {layer_placement_result}") + print(f"[INFO] e2e time: {e2e_time}") + + return layer_placement_result, recompute_values, e2e_time + diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/profiling.py b/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/profiling.py new file mode 100644 index 000000000..5f5182bd4 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/profiling.py @@ -0,0 +1,270 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import os +import sys +import time +import copy +import operator +import subprocess +import signal +import threading +import json + +import torch +import torch_npu + +from megatron.training.global_vars import set_args, get_args +from megatron.core import parallel_state +from mindspeed.core.auto_parallel import get_kv_store +from mindspeed.core.auto_parallel.mm_search.help import ( + broadcast_communicate_list, + get_json, + save_json, + INITIAL_CONFIG, + PROFILE_CONTENT, + STAGE_PROFILE_PATH) +from mindspeed.core.auto_parallel.mm_search.solver import record_train_config +from mindspeed.core.auto_parallel.auto_parallel_profiling import BaseLaunch + + +class DistributedPerformanceProfiler(BaseLaunch): + def update_args(self, config): + args = get_args() + self.old_args = copy.deepcopy(args) + + args.pipeline_model_parallel_size = config[0] + args.tensor_model_parallel_size = config[1] + args.data_parallel_size = config[2] + args.micro_batch_size = config[3] + + + def launch_model(self, config, profile_module): + def update_or_append_param(argv: list, key, value=None): + if not value: + argv.append(key) + return + + if key in argv: + argv[argv.index(key) + 1] = value + else: + argv.extend([key, value]) + + def remove_param(argv: list, key, has_value=False): + if key in argv: + pos = argv.index(key) + argv.pop(pos) + if has_value: + argv.pop(pos) + + def monitor_exit(process): + while True: + exit_flag = get_kv_store().get("exit_flag") + if int(exit_flag) == 1: + try: + process_group_id = os.getpgid(process.pid) + os.killpg(process_group_id, signal.SIGKILL) + break + except ProcessLookupError: + break + time.sleep(60) + + args = get_args() + argv: list = sys.argv[1:] + update_or_append_param(argv, '--eval-iters', '0') + update_or_append_param(argv, '--train-iters', '5') + update_or_append_param(argv, '--pipeline-model-parallel-size', str(args.pipeline_model_parallel_size)) + update_or_append_param(argv, '--tensor-model-parallel-size', str(args.tensor_model_parallel_size)) + update_or_append_param(argv, '--micro-batch-size', str(args.micro_batch_size)) + update_or_append_param(argv, '--auto-parallel-profile') + update_or_append_param(argv, '--profile-subgraph-seg') + update_or_append_param(argv, '--enable-dummy-optimizer') + remove_param(argv, '--auto-parallel-mm') + if profile_module == 'profiling_stage_1': + update_or_append_param(argv, '--profile-stage', '1') + elif profile_module == 'profiling_stage_2': + update_or_append_param(argv, '--profile-stage', '2') + + command = [ + 'torchrun', + '--nproc_per_node', str(args.nproc_per_node), + '--nnodes', str(args.nnodes), + '--node-rank', str(args.node_rank), + '--master_addr', str(args.master_addr), + '--master_port', str(args.master_port), + str(sys.argv[0]) + ] + argv + print(' '.join(map(str, command)), flush=True) + + get_kv_store().set("exit_flag", "0") + process = subprocess.Popen(command, shell=False, preexec_fn=lambda: os.setpgrp()) + monitor_thread = threading.Thread(target=monitor_exit, args=(process,)) + monitor_thread.start() + status_code = process.wait() + get_kv_store().set("exit_flag", "1") + torch.distributed.barrier() + return status_code + + + def launch(self, config, profile_module): + self.update_args(config) + args = get_args() + if args.node_rank != 0: + self.launch_model(config, profile_module) + super().recover_args() + return None + + buffer = config + [0] + torch.distributed.broadcast(torch.tensor(buffer, dtype=torch.int), 0) + status_code = self.launch_model(config, profile_module) + super().recover_args() + + return status_code + + +def save_profile_data(args): + global PROFILE_CONTENT + profile_content_json = json.dumps(PROFILE_CONTENT) + with open(f'model_profile_{torch.distributed.get_rank()}.json', 'w') as f: + f.write(profile_content_json) + if args.profile_subgraph_seg: + PROFILE_CONTENT = get_profile_from_rank(args) + PROFILE_CONTENT = record_train_config(PROFILE_CONTENT) + + if torch.distributed.get_rank() == 0: + profile_content_json = json.dumps(PROFILE_CONTENT) + with open(f'model_profile.json', 'w') as f: + f.write(profile_content_json) + print(PROFILE_CONTENT) + + +def set_profile_model_config(args): + vit_model_args = ["num_layers"] + llm_model_args = ["num_layers", "seq_length", "hidden_size"] + train_args = ["micro_batch_size", "use_distributed_optimizer", "simulated_nproc_per_node", "simulated_nnodes"] + for arg in vit_model_args: + if hasattr(args.mm.model.image_encoder.vision_encoder, arg): + INITIAL_CONFIG[f"image_encoder.vision_encoder.{arg}"] = getattr(args.mm.model.image_encoder.vision_encoder, arg) + for arg in llm_model_args: + if hasattr(args.mm.model.text_decoder, arg): + INITIAL_CONFIG[f"text_decoder.{arg}"] = getattr(args.mm.model.text_decoder, arg) + for arg in train_args: + if hasattr(args, arg): + INITIAL_CONFIG[arg] = getattr(args, arg) + + if args.profile_stage == 1: + args.mm.model.image_encoder.vision_encoder.num_layers = 2 + args.mm.model.image_encoder.vision_encoder.pipeline_num_layers = [1, ] * 2 + [0, ] * 2 + args.mm.model.text_decoder.num_layers = 2 + args.mm.model.text_decoder.pipeline_num_layers = [0, ] * 2 + [1, ] * 2 + elif args.profile_stage == 2: + args.mm.model.image_encoder.vision_encoder.num_layers = 4 + args.mm.model.image_encoder.vision_encoder.pipeline_num_layers = [2, ] * 2 + [0, ] * 2 + args.mm.model.text_decoder.num_layers = 4 + args.mm.model.text_decoder.pipeline_num_layers = [0, ] * 2 + [2, ] * 2 + + recompute_args = ["recompute_granularity", "recompute_method", "recompute_num_layers"] + for arg in recompute_args: + if hasattr(args.mm.model.image_encoder.vision_encoder, arg): + setattr(args.mm.model.image_encoder.vision_encoder, arg, None) + if hasattr(args.mm.model.image_encoder.vision_projector, arg): + setattr(args.mm.model.image_encoder.vision_projector, arg, None) + if hasattr(args.mm.model.text_decoder, arg): + setattr(args.mm.model.text_decoder, arg, None) + + print(f"[INFO] initial_config:", INITIAL_CONFIG) + print(f"[INFO] finish: vit pp layer: {args.mm.model.image_encoder.vision_encoder.pipeline_num_layers}, \ + vit num layer: {args.mm.model.image_encoder.vision_encoder.num_layers}, \ + llm pp layer: {args.mm.model.text_decoder.pipeline_num_layers}, \ + llm num layer: {args.mm.model.text_decoder.num_layers}, \ + PP: {args.pipeline_model_parallel_size}, \ + TP: {args.tensor_model_parallel_size}") + + +def get_profile_from_rank(args): + global PROFILE_CONTENT + + def get_average_time(data, m=2): + data = sorted(data) + median = data[len(data) // 2] + normal = [x for x in data if median - m * median < x < median + m * median] + try: + average = sum(normal) / len(normal) + return average + except ZeroDivisionError: + print("[Error] Divided by zero.") + return None + + def get_computer_time(): + if "fwd_time" in PROFILE_CONTENT: + PROFILE_CONTENT["fwd_time"] = get_average_time(PROFILE_CONTENT["fwd_time"]) + else: + PROFILE_CONTENT["fwd_time"] = 0 + if "bwd_time" in PROFILE_CONTENT: + PROFILE_CONTENT["bwd_time"] = get_average_time(PROFILE_CONTENT["bwd_time"]) + else: + PROFILE_CONTENT["bwd_time"] = 0 + if "act_mem" in PROFILE_CONTENT: + PROFILE_CONTENT["act_mem"] = get_average_time(PROFILE_CONTENT["act_mem"]) + else: + PROFILE_CONTENT["act_mem"] = 0 + + get_computer_time() + + tp_device_num = args.tensor_model_parallel_size + pp_device_num = args.pipeline_model_parallel_size + dp_device_num = int(args.nnodes * args.nproc_per_node / tp_device_num / pp_device_num) + + profile_data_list = [] + for rank_id in range(args.pipeline_model_parallel_size): + fwd_time, bwd_time, model_mem, act_mem = 0, 0, [0, 0, 0], 0 + if parallel_state.get_pipeline_model_parallel_rank() == rank_id: + fwd_time = PROFILE_CONTENT['fwd_time'] + bwd_time = PROFILE_CONTENT['bwd_time'] + model_mem = PROFILE_CONTENT['module_param'] + act_mem = PROFILE_CONTENT['act_mem'] + profile_rank_data = [fwd_time, bwd_time, act_mem] + model_mem + profile_rank_data = broadcast_communicate_list(profile_rank_data, rank_id * tp_device_num * dp_device_num) + profile_data_list.append(profile_rank_data) + + if args.profile_stage == 1: + PROFILE_CONTENT = {} + PROFILE_CONTENT['vit_pre'] = {"fwd_time": profile_data_list[0][0], + "bwd_time": profile_data_list[0][1], + "module_param": profile_data_list[0][-3:], + "act_mem": profile_data_list[0][2]} + PROFILE_CONTENT['vit_post'] = {"fwd_time": profile_data_list[1][0], + "bwd_time": profile_data_list[1][1], + "module_param": profile_data_list[1][-3:], + "act_mem": profile_data_list[1][2]} + PROFILE_CONTENT['llm_pre'] = {"fwd_time": profile_data_list[2][0], + "bwd_time": profile_data_list[2][1], + "module_param": profile_data_list[2][-3:], + "act_mem": profile_data_list[2][2]} + PROFILE_CONTENT['llm_post'] = {"fwd_time": profile_data_list[3][0], + "bwd_time": profile_data_list[3][1], + "module_param": profile_data_list[3][-3:], + "act_mem": profile_data_list[3][2]} + + save_json(STAGE_PROFILE_PATH, PROFILE_CONTENT) + + elif args.profile_stage == 2: + profile_data = get_json(STAGE_PROFILE_PATH) + + PROFILE_CONTENT = copy.deepcopy(profile_data) + model_mem_vit = copy.deepcopy(profile_data_list[0][-3:]) + model_mem_llm = copy.deepcopy(profile_data_list[2][-3:]) + for i, v in enumerate(profile_data['vit_pre']['module_param']): + model_mem_vit[i] = (profile_data_list[0][-3:][i] - profile_data['vit_pre']['module_param'][i]) + model_mem_llm[i] = (profile_data_list[2][-3:][i] - profile_data['llm_pre']['module_param'][i]) + + PROFILE_CONTENT['vit'] = {"fwd_time": (profile_data_list[0][0] - profile_data['vit_pre']['fwd_time']), + "bwd_time": (profile_data_list[0][1] - profile_data['vit_pre']['bwd_time']), + "module_param": model_mem_vit, + "act_mem": (profile_data_list[0][2] - profile_data['vit_pre']['act_mem'])} + PROFILE_CONTENT['llm'] = {"fwd_time": (profile_data_list[2][0] - profile_data['llm_pre']['fwd_time']), + "bwd_time": (profile_data_list[2][1] - profile_data['llm_pre']['bwd_time']), + "module_param": model_mem_llm, + "act_mem": (profile_data_list[2][2] - profile_data['llm_pre']['act_mem']), + "embed_time": profile_data['llm_pre']['fwd_time'] - (profile_data_list[2][0] - profile_data['llm_pre']['fwd_time']) if profile_data['llm_pre']['fwd_time'] - (profile_data_list[2][0] - profile_data['llm_pre']['fwd_time']) > 0 else 0} + + return PROFILE_CONTENT + diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/schedules.py b/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/schedules.py new file mode 100644 index 000000000..2e1c3c4a5 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/schedules.py @@ -0,0 +1,26 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import time +from functools import wraps + +import torch + +from megatron.training import get_args +from mindspeed.core.auto_parallel.mm_search.help import PROFILE_CONTENT + + +def backward_step_decorator(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + global_args = get_args() + if global_args.auto_parallel_profile: + # get model backward time + torch.npu.synchronize() + st_time = time.time() + grad = fn(*args, **kwargs) + torch.npu.synchronize() + PROFILE_CONTENT["bwd_time"].append((time.time() - st_time) * 1000) + else: + grad = fn(*args, **kwargs) + return grad + return wrapper + diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/solver.py b/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/solver.py new file mode 100644 index 000000000..7bfec8bea --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/auto_parallel/mm_search/solver.py @@ -0,0 +1,132 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import json +import itertools +import copy +import sys +import time +import math + +import torch +import numpy as np + +from mindspeed.core.auto_parallel.mm_search.help import ( + broadcast_communicate_list, + cal_throughput, + get_json, + INITIAL_CONFIG, + GPT_ARGS_PATH) +from mindspeed.core.auto_parallel.mm_search.pp_layer_search import pp_layer_search + + +def record_train_config(profile): + for key in INITIAL_CONFIG: + profile[key] = INITIAL_CONFIG[key] + gpt_args = get_json(GPT_ARGS_PATH) + for key in gpt_args: + profile[key] = gpt_args[key] + return profile + + +class AutoParallelSolver(): + def __init__(self, profile_data): + if torch.cuda.is_available(): + self.max_available_memory = torch.cuda.get_device_properties(0).total_memory / 1024**2 + else: + self.max_available_memory = 62000 + self.layer_name = ['vit_pre', 'vit', 'vit_post', 'llm_pre', 'llm', 'llm_post'] + print(f"[INFO] NPU available memory: {self.max_available_memory}") + + + def update_profile(self, args, parallel_cfg, profile_data): + update_profile_data = copy.deepcopy(profile_data) + + if args.use_distributed_optimizer: + DP = parallel_cfg[2] + + for key in profile_data: + if key in self.layer_name: + update_profile_data[key]['module_param'][1] = profile_data[key]['module_param'][1] / 12 * (4 + 8 / DP) + + return update_profile_data + + + def cal_max_layer(self, profile_data): + llm_available_memory = self.max_available_memory - sum(profile_data['llm_post']['module_param']) - profile_data['llm_post']['act_mem'] + last_stage_max_layer = math.floor(llm_available_memory / (sum(profile_data['llm']['module_param']))) + 1 + return last_stage_max_layer + + + def trans_optimal_config(self, optimal_config, profile_data): + parallel_config = optimal_config['parallel_config'] + optimal_config['parallel_config'] = {'PP': parallel_config[0], + 'TP': parallel_config[1], + 'DP': parallel_config[2], + 'MBS': parallel_config[3]} + + layer_placement = optimal_config['layer_placement'] + sum_model_layer = profile_data['image_encoder.vision_encoder.num_layers'] + profile_data['text_decoder.num_layers'] + layer_placement.append(sum_model_layer) + merge_layer_place = [] + merge_layer_place.append(int(layer_placement[0])) + for i in range(1, len(layer_placement)): + layer_num = int(layer_placement[i] - layer_placement[i - 1]) + merge_layer_place.append(layer_num) + + vit_layer_placement = [0] * optimal_config['parallel_config']['PP'] + llm_layer_placement = [0] * optimal_config['parallel_config']['PP'] + vit_layer_num = profile_data['image_encoder.vision_encoder.num_layers'] + llm_layer_num = profile_data['text_decoder.num_layers'] + for i, capacity in enumerate(merge_layer_place): + a_count = min(vit_layer_num, capacity) + vit_layer_placement[i] = a_count + vit_layer_num -= a_count + b_count = min(llm_layer_num, capacity - a_count) + llm_layer_placement[i] = b_count + llm_layer_num -= b_count + optimal_config['layer_placement'] = {'vit_layer_placement': vit_layer_placement, + 'llm_layer_placement': llm_layer_placement} + + layer_recompute = optimal_config['layer_recompute'] + optimal_config['layer_recompute'] = {'vit_layer_recompute': layer_recompute[0], + 'llm_layer_recompute': layer_recompute[1]} + return optimal_config + + +def solve_auto_parallel_mm(args, parallel_cfgs): + if torch.distributed.get_rank() == 0: + with open(f'model_profile.json', 'r', encoding='utf-8') as f: + profile_data = json.load(f) + + solver = AutoParallelSolver(profile_data) + + optimal_config = {} + optimal_throughput = 0 + for parallel_cfg in parallel_cfgs: + print(f"[INFO] now to solve config {parallel_cfg}") + + cfg_profile_data = solver.update_profile(args, parallel_cfg, profile_data) + + last_stage_max_layer = solver.cal_max_layer(cfg_profile_data) + print(f"[INFO] last stage max layer {last_stage_max_layer}") + + layer_placement, layer_recompute, e2e_time = pp_layer_search(parallel_cfg, cfg_profile_data, solver.max_available_memory, last_stage_max_layer) + + if e2e_time is None: + continue + + per_npu_throughput = cal_throughput(e2e_time, cfg_profile_data, parallel_cfg) + print(f"[INFO] per_npu throughput {per_npu_throughput}") + + if per_npu_throughput > optimal_throughput: + optimal_config = {"parallel_config": parallel_cfg, + "layer_placement": layer_placement.tolist(), + "layer_recompute": layer_recompute, + "e2e_time": e2e_time, + "throughput": per_npu_throughput} + optimal_config = solver.trans_optimal_config(optimal_config, profile_data) + optimal_throughput = per_npu_throughput + + print(f"optimal_config: {optimal_config}") + return optimal_config + return None + diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/noise_predict_ckpt/FlashAttentionScoreGrad_910B.pth b/model/train/yoco_moe/mindspeed/core/auto_parallel/noise_predict_ckpt/FlashAttentionScoreGrad_910B.pth new file mode 100644 index 0000000000000000000000000000000000000000..f742d0624c77f54018ed8e13d5d54f54c1b0a57a GIT binary patch literal 6466 zcmb_hYjE6F71r8$xauQmVwwihHmMV5-6R{Ych|dHlBQkKEvZ_^RS2k|M%8MsEn%&+ zN4o3OY1O6_+c9kkP*Vtm01X97p_FGzfj%54ly`UxGyH)WW|-k8Gw_E$aIUnrrA^k> zc6>eBm+t+(@0@$?IrqwgLi-Ai>*(MvYTLNIT+vV*?KVn@Np;ISq*^w)!&b_2KG|E? zT~?@)sE!)NLY_PFR@?D5RkaL*sEokj587p7lk$FDr95ywBE;g-W|U^=qr}iP%PJ>K zOLvH@T4Rcxu$7~-_Z`|)YT2rmkZV*o9A36bsiqrc8KpaxEsyo_&>oLVs}ak=p2`S; z75)&uXjlq8)~N5{ogu7#*yUp-0$95#^oo%5W-f$PQZQmiiER?Y6KD`~glax+ zUkEn~Da383kVae~CbUmNMrapelhE%+N0vfPguJw`QSTxg&1UoF5OogT>6^4CVA5U+ zw}`OsEz<>O_0|ww9&YpL3IV!zQP?lS?NX}IIu@CM*=S}_ah{?L5t6-dhtKiu0LK9e zcZwji#1S;(t}sUu?)Etb0vrb^42p1%#G?ufOk&8oIc$NbRu!|XI0=?W6T`MrA^d*K zqDAi$9O`)#-s8gIxP&R0lPe=Oyf-ugmm>y|VmOJ4Vqyxj9ln5BW%O?U8xQZB>n!q*61euGqFR&Jw4;_G>Hy-|P3{QM7j#jtJZY zykABCmx=1|#AF|MxX(+PB^Z)cp>ji^x|xIzOoA-J{ZpWbP>N$PDVA&l(Dl+g6t!HOT>mR5DH%`c`L)jbpy`~)F$Dq2w&1JtVx`21HN{EUk`Qu2nHY z6-s17DG|eg@1JmLCAL(~)m(Tkrt`e88o)5OCgwpZFvDTN{~> zaT8)X!GTn%OXUL)jwFr0;c}fh2QyL+Z(}}7ect;!tec2ebGk+J^Tk3{^$YJ zoKj*?SlvN)9o}*Zz0Iz{rU!*?ZvEm1h0-`B7Teet3u_ur3WiRpK^WP@-oI~T)q`El z$-20n#mU$b+}&F&Y-?n?^a-W>k29|^_J_ybyo*&`b}h!fBLaJ}SlAwo{m2n(-D5X1 z{>o77@um5qVpergz^7(hx?bM`MbC%{_ktS z|NNiN?qU4*pFCzP7k{b+{tf3Ye8-IF`RWWugmWR3`!pHAg7sZ6prmq}-nscbftOr^59d?u66=F-`GFH?EWeC6NlsSvvc zg%}e%|Ku)Ms<*Klp%&>mUL$0%AhSnY3#|vEb5Y3oirxsG%F__}mQEZ{DTfcVXd^E zG@Ha})ut`BW7-lfCO`?HG?Y7~TwBVeZ32Y?1;XWU4lg`#4hJ52;Dy6EFe9yPX%l(9 z;Mt?SY3BRB|DS*Ue?}uh`zns>=-@7D-P}H|U?`5Zj}X-)x@8_LS+*+YQ^~>JvO<(Z zb;2kN?&c2tKkay1sbm?3T4Dqaf50xQwpt$2ON0m4BO(@;x~McmpHK~5v#fH$v~)+6 zOV(kiHymEJ)nZLI$}*MiShjpP&7(aYm)24&hxSy)Rjl%d z@CCzC$U*;49;>Ao?UTeBshxh7sx_XZ~iMP~>%jW_tX8v?i+ z39b`yy)+PwyWmdhUeh7AqML-uKBC%k(W;qc+z_Ja!yA2?O#zx432qXxxn-JKwYosl z6{5+Y+o#zQpt+ggO(NbbT}Q2{sER39ta8mzy@>F{D<;9#5Kb1`e4Kayr-$Gc5!3lwb>A(h12eU5hoIQA3VE<$LD zBWT7QVU85;^f`tD90v$SMBF9uR0R!8)sS^_%z{{|DrQ-65-gD>#%!gc@EiCVL{l;+SH^9;D>Q;GM;b(m;Up@GNmH2Z@Pjm~Ox)@J;_>eJ?y{j5oy0%@?NU zaD)jvDq@lf(=L{D%#+0G=X?#t{Y=gSB3yz8i<(K9=?b2~Ln4kd?uSJ@vdH}?<9@%0 zsRi!oI`@GbW*GMg5wr77c>o^>J7o{SW5wA~JnqF>I+@AsNbSg`(s)84_#k6vX@s($ z6!9U({$UXxnZZX#F&Cc?7B3ogv#cIfodnAxylkpRj!W~tIo$Gr*MZ_?D-bx>2Ps)IAqYU)7Mwk;cvG z(Mp4~sM>f+CHQR7TNWl=G)aQHkOi z|J*3P;c+O0*bctw;#=ODkjD&V+`+e9d`H?y5fp>gRYxw@sx(3sqRNI+R1F8;J>k@f zY@Iw`bMb=qoVU^VURIK1fM``#AjFkSj|zl0wMF~^(u#r+X}=;BA3cVhz`vv?!8 zal%V3UiLd}iR@G#;B8*;ii>mpfbG!(F13Hac^5zSNA|ZiG9l9)i0O=3b_G9i@v0AU zIRyz6eEihK&-^`vW;NrLbsPytj1wH&b7r*xaYECgRBCPGu|GoFsj?+S2W*dZf7(fTYRu6k>VVexk3Yot|FzxyQ*-{!Tr>ZtTl7Ew$FsW_|9vNq87slx z*CPMM=P%?Kzt#ET>6PHmw8+nKE9?Jk%lxgxU-S9zZ;}7+Gx=AT{|C-K`}j)uf1pKv zw*Kau7YDEY;eJ-g*tJw1+SViw|F`_^DW!oO8hOE|LU8f z*78pu&%A!&%=q2x9Kx<>ulI7@(Oyq@#m7-?T`}}y^aNy?vR*C>-njHEq%z4=CY$Qd z^yRaC*;F!@%?u=S{e7v-KsuMnC)4R{GL_HenaUISCthbyh1fMB#F*HHCwIYW-OX-< zTGQuvjc~zY%yx*2Z7Yq;NVtW9--s71ubUUa=(kf#;rRu<5iD3M!(g5C=nyH+14}i6 z1q)wSNZDL-uw)}zu#AP#IAq+L3JjtFbk?5Km>)V^~8j?Sgm p=0E?&iWhA!Pzu)QIQFccdx{VL=oyGp)@(kI#weksuI+=c*8K?=Dp6ry8yvL4i@nZ z<#~(Y#NpKpq>GA%u-Q8#T}Wp0S{kl*$<)DTY?;LlnHvx+;o!z`%j9#pqRdj4OazwM zGRqw@Hz8QT!Oi?a>`g`zH6fEt=T(XB2x`2jA-KiG6NQyFkKe&li{MrcR`H8558YEv zqHG@J^2mmAvdj@2mVQA<&x0yBhy@L5~qa}JI52=3z`F)j_K z8=rS;gkXcM(dy9Hh@g#wO+1UOz#CIi1x4$~0+-5(T3R#$sjMyqI&?83vCTLww$N{| zxppsnp#b;$d7M%iA=9bD7hMQ^JK{|wszx9qYB+`I2HS+QO4kPajD;_aOczuoX#|S% zSs}0GlvI~02}uHAflq1qOm3HLNhYr%Mbve17fGD-zS08%2itl<p*U8ZN2Y*_@HkI27!|7leh90F(SZN=N9c;VIGT{JxH<6 zHd}|SC*#)9x!a)CJY+s@4&7s&nQ?=8(Yj!sGKZE}uihA5vD`Wq_nW89!L8=Y);V(^ zZXIqhFIZ=nnWwG8aqHqrbFVcx-8zVw+RXv$b#=G(w)KQLWW6-UJZ2p>Pi?ZEid#cg zuWG$%onB-eiJNb%x6ZFM4_V#TK)mB#>tXXu#Z>FLxOu{SrE|CWuKA92B5s{uW<9<1 z?znl<9D=XPI4)frs0cF2lpv5Yg~KHXtf49?91#u2sUqR)g)9fTL`5$E2fED556Cq5 zhKwVMILH?xx?Q$|sKj6g3F1x;cHtn(W5c(bhHobh-$=bC8;u+z=ibk9*UOE2@!oUz z8gc(Y4hjgqo|HB0Db8-a@DK-kh_8D&czD#;M~JVFa?oA$wP)DZjWOsYzV73oZ)C$f z2#>ipOdP@fWM4Zx?&oJ=A48#FFt$1x3a^d?8|&d4_F#Q57#tmZg1;8eB-iO!I2eh+ z0ryxm8jQd}3MO1OD`h-1I!IaR;FA5Yw(>K@F*3y-J4VOoc`-Nx5`yDoeMk|)>+?+xo+az^90$+$!VB$i z!atH~Xf9K>6#7vK%Qe72Pe8A#L-oKG;65qFb0cr!73 z_(aOVg@h0DU*zC>QV+bv!P|B^$N3K4;b&k=iJvt2!@Klc$Vf<}&JVW35Y-S7(hay& zfbY|cDs-q~rvYXGEPerI5LMij8A3Xr!#P7llAwx7Nj2aH`;2^&bPGSs7vM+oAZ=5B z?B_9rfu9uMr?dgyjtd9ww&7<9pFvC1Xm<@iFTgKkdf_^Z8NJ|ayWy7w_?10rdD%(B z0NVP(uM6;=4OmqkP^$Zc%LVw2jjSIZG9cjoRWmxWdIo-5fcI^ZYcPqUU4-8i-~;;# z*;US_4L&Tu@9j&Z$8=tzyL`BxgFh7DBYWDIrse8dGWcTwKDHU|yn>Loadfso4p;wF zfIr)0E4^gpdnov%0DrMj;j2eEBmCC_{EZUijbxxLF}>=ao1Zb?$GMce+R8p7&Sz>} z&xpxgNXn8&z?P<)9S@1cr^T^fR0SbbB8uI2{CyGWKFF)QVl}hYrAThOHNlpq`m2*0 zV?a&t;3cB|uX7M2MUlL&q`q#fdZ?u-HbM0*Te8)UtR?!HcHjO_*PqASUP2$c-!M@F zu)Emb=~atAXKG3H+UHGB zzhN@^&Ec6t(oHW6?tlIt)DHF}e?2K>%tAh$~CV&cGQn8 zB(JuFj~HA0<48Je)@7U6sx?K!dXPCT@*(=6n5G6 z;+;j%jjx&QHi6V)Og^zgg&NS@C*WgpOyfcxwH&DL^Kb*R}1kQV$Je-Xl)%d1#8c;g> zv?ZoZTRjX~Qh8euj&~nDMm`skWRH)F!?!xdk^ikQ?=2$l3`72j(w3M?nsH_h{v-44 Gx&H^V;482I literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/noise_predict_ckpt/RmsNormGrad_910B.pth b/model/train/yoco_moe/mindspeed/core/auto_parallel/noise_predict_ckpt/RmsNormGrad_910B.pth new file mode 100644 index 0000000000000000000000000000000000000000..3881b4048d6e5b78b44e61f6998a47f0ca597f58 GIT binary patch literal 6210 zcmb_gZFC$}8Jj5SyW(l8A;)W%_U_a-xDc4nWM zyJ=GxC}5ICs`V4UuqgFgLHq(m5l9uq?_xZj;~zao59j#PAO7)=-aE5NW;U6&*?Mzk zKQi||&-1?bz3=<(3`)(b1fjK6STZ&VYX!ON4%kllR!1wTz3I-rj*>>TRAtQU?^`V# z;^TNzv1pqnDN@6>T=9TYA`U6-Gm2D%&@Lm9RJJi_(-kh%-9Zb{WOxGOkc=h!+@$tvH75q)fYr zD|MzqU!?~uX3qZt-uvX2l)E!b77gv}9}4Bi;fYzfi4iDIjaH?NqcTB$71Y>UuzjkLyo_FDZ@u30aK!EAjS=cR-X55=CuGu16gSJ*+b~^tS8s{X z_2L}?U0;aqofP|Iyi3W{TgM|av>S02^)FB~F@$s{ZVfoz9pczeahnWjMI2!>ZjW-L zaYw*05aKvMF(~6sMPw>iU=mX`tPvY>v7%We%}o_;honXvtxUvyEF<>&pK!4KAq;uA zE2*%QDyZdA2X{wSu;a*rNHg73S+iIQb6l~HWtFizf^Q<;Ge2E5je?t6m@leTt6~(# zOrjR}0uf5ls+KDgfs%67q=x1=+5}IW!P&3!417;8OJt(`yEr5XlX!2D`M*Spu1GBY zLBwG{X%;Z7tYOMcjT%-O@0)@u+lEQatBo8ukVj3ed zMw3#rkG2Q;z`($5|7F8}kpv2(FryzVMHV%$wqVj8xLiqtd>$Z&K;zeo2Ca&@MX%BU`=9M=P3Mjl7Fu%j|2 zm@s{*q+`A$UjH_*ix1q#<=ijBqj;d8Ta1~V;AuQ4<2dJjNXEm9+>dbX56GBY;GU{+ zAIM{xbDxkgGw+o9@xiE5_EJ1rm>I@NKi0akxn1d9-JQKTd`P4CFlXm!gt4EJ@e$7c zQ5heb#>a>8iR65+_|a%sC32LwDV|3})gnjK{ro>7J{do)Hsdi5p9*4@mv?-6-mRbE zrhQh%=eTLjlXzU8VM;%r^fz$7#Zv3GgSu8BE>0_}nd6A#*bbghu3(>Ctz2Qz%7U~& z9DIRLe6ipk3yU2ZJULLE!WkJ~(wA4~Xx1nZW&T>z_+<}YN$O7y_(U$%9Ez_>f{)1h zfeOARC2C(t1z(pE4F4N4zDcI=Eg4S*2a-iBo>tZ}b=*$>_~P6CcePBY<{SUaFuvn+ zXp}lGzU$$8{+UomOl{P~_dWbTX=4bQ$?B@BmZ}vNp&BKssTGLn;@J~!wZPBGb5##N z)SvMijUOcywnV`59)9e%Fyrj#vwjIbkrJ-I@r(TtUhwc!-M?`yTeEm0+&JMy4=)9i zw#QDYE%2KcyzF5%Sgx2;cIfyjGF%YY)E(rY$Ku7gtpATMxer5NqP-E4&Q|9j3neezQL-_syJKY#PX zkAqkKbRREd{5oGA>WIk$`ToA0D@g;VX8-NS|2Fwww_J+-y$##HlK5LT|J9nI#_~@_ zF#Vg*PZFj zX0qwde73tM-IedjWP8)Oe7Y;g?q{?49ViaU979?E{5-x1K4O|>U z?03fUO46P6DBx;Q1ddJsF5HKQPNu^{Y7MUPN81x7&*Umd4})$S-rykqBL zL3-;!!s#d)ZA0sqQBq5Q^(f(V!GB6Dj@On$Sw4{sN^92as;%7Gyn6wIxz!)@?>a9Y iY#hBNcxUVQvv%QcHBa(~>Vwj1-rNi8*dL!C%>5s4ORUxa literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/mindspeed/core/auto_parallel/noise_predict_ckpt/RmsNorm_910B.pth b/model/train/yoco_moe/mindspeed/core/auto_parallel/noise_predict_ckpt/RmsNorm_910B.pth new file mode 100644 index 0000000000000000000000000000000000000000..3f08eebb1f60d6d83d8648fc710d3c6614f82ab6 GIT binary patch literal 6146 zcmbtYYjhlC8J=x=9VnDuK`NFO%f`0b-6WgrmO>%cXc{;HE3WCdDa$G1DuYxQ zi;a*agR6}IeHEd%wuY$JFszobMoC^Cs#nx39cyE_ZCDrLu8-huV7OMsbxK!r+-Y}` zdu@-onrSnVeOPzZf>X6ict?z;9oL668zVIDWVk`byXH(&tyHFIHpOTcox^D;2@o$&i+j39eiWtDsTDj_9tfTY*4Jw}+V#;?Ck!E?RvSw2Xb3L(}vdYNa;V%&%oSLp$X2DBM&llCIT``LzmaZ1~ z0uf5lu9hoDLnY;^#Z1k0wWB<7hI_xtGw}W4ERjS9XE7iNqxevf{9n?Go~YaWi-?0k z(kx(5SxU++jhS`^A0C4$6UHA}tyRWVnW6+;0R1c?PLuGOE)m4;i5_Gl#=m*bx~=NtiKP z(y>4iuYYUl-~$hGIS*gxj&truWjr>+{W$0TsEpBR?y(y8{v5_R z_X!yjQ%-pZAB#I>55p6Mi9tLW#9CXnb9-icM>^k$k82E{;Osn&5c^3PpXBVHlJV(r zd}a`zO-=<%5RIl?(vRp~isungwe`d5KK_r0&o!S`n{mp==fjxgB_3axa_bklX& z*9yV5uxZoancnIc&dB(>(O8|U*~7Xh^Y@y@XMKDlX*|;#5P3{<8NMkA0V4GS6?{ud z)PCp{d|OHo{&!@2S0BUoWPCr|kQA|aPFX?fxShfF#SentYFTGmVEprg_+h}IG3I*s zk&hRGJ)sU++OUTo`*=~=KnR*eb=6Z#)e1$Z#&p%v3cBUtCnvmWf$x)-sy=>dJRdX~ zKT9gKM8H`eKMz`%L$vj&U&1e>gcn@+nf?gB^zkbrII*7A%$$fePI%eJE8(QgO()eB z1kDRx^>Hp-u&w!mM*RcM`*}j#m$UH0N6*OAbZ0MvY!>^#Kds}Vn$-#JeKWG>me zZ2882?7eZ`{N#Z7^B;eN^Z$37|M$1&Un4Z~f7=55FaGV!ZqEO}$z#@h`P&!3zu~2e zInM8_J^R#r`8yWCF9-|we|ACqbBVvk*S~WC{QnxyzsmjJb>_Jz=j;Ej1@QCzH#LGd zc>S*r@V26-UY)4g-9Y2f6!{{->BN&ffk^R+*}Ap7SMf3xymtr41k{%IGv z*DszPzL(#J@T0lcw+K7pUQY$Z$J1R?v&>_3*W=i#S?cL-z5J_?=}2cfvYF0~_I$QI zn@Q)g9bK7pSFS75nayVNdFtqM*>pP3RlfL|`~rU}#E(8H!NpELd5c!-b^JtZG<*(M z4-qZEymg!zZ&3>ngP1Pz^%&7Ix@HD|zTHO=9m5CW(ZF z(ogplvlV30^$5{Y6i1kBj4(?>w3x(==)bCwhPHYM(P|I}m~05pUJnqpAr8=gbt45G z^#D;f#sMZ90%Yp}q6sw)(7&pYg3fvXCz1){0Fw;?y6OR3g@R~*ns1C>XRC6az zHUywe8V87`m3SoeudY*2OJwy3(Jj9vwx8atM`)ak`lO{xx7QZl(6VD1f?ODksm~-& j{xpu%JVopHvvT2SezQu0KNRnimhk3WSVb>BKb-r2HE5J} literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/mindspeed/core/context_parallel/__init__.py b/model/train/yoco_moe/mindspeed/core/context_parallel/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/context_parallel/adaptive_context_parallel.py b/model/train/yoco_moe/mindspeed/core/context_parallel/adaptive_context_parallel.py new file mode 100644 index 000000000..da8bd4f41 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/context_parallel/adaptive_context_parallel.py @@ -0,0 +1,371 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import torch +import torch_npu +from mindspeed.ops.npu_ring_attention_update import npu_ring_attention_update + + +def flash_attn_p2p_communicate(scheduling_info, send_q_dst, recv_q_src, send_kv_dst, recv_kv_src, cp_group, rank): + send_recv_ops = [] + is_recv_q = False + is_recv_kv = False + for send_dst in scheduling_info.send_q_dst: + send_op = torch.distributed.P2POp(torch.distributed.isend, send_q_dst, send_dst, group=cp_group, tag=send_dst) + send_recv_ops.append(send_op) + for send_dst in scheduling_info.send_kv_dst: + send_op = torch.distributed.P2POp(torch.distributed.isend, send_kv_dst, send_dst, group=cp_group, tag=send_dst) + send_recv_ops.append(send_op) + if scheduling_info.recv_q_src > -1: + recv_src = scheduling_info.recv_q_src + recv_op = torch.distributed.P2POp(torch.distributed.irecv, recv_q_src, recv_src, group=cp_group, tag=rank) + send_recv_ops.append(recv_op) + is_recv_q = True + if scheduling_info.recv_kv_src > -1: + recv_src = scheduling_info.recv_kv_src + recv_op = torch.distributed.P2POp(torch.distributed.irecv, recv_kv_src, recv_src, group=cp_group, tag=rank) + send_recv_ops.append(recv_op) + is_recv_kv = True + send_recv_ops_qkv = [] + if len(send_recv_ops) > 0: + send_recv_ops_qkv = torch.distributed.batch_isend_irecv(send_recv_ops) + return is_recv_q, is_recv_kv, send_recv_ops_qkv + + +def flash_attn_p2p_communicate_o(scheduling_info, send_o_dst, recv_o_src, cp_group, rank): + send_recv_ops = [] + is_recv_o = False + for recv_src in scheduling_info.recv_o_src: + recv_op = torch.distributed.P2POp(torch.distributed.irecv, recv_o_src, recv_src, group=cp_group, tag=100000 + rank) + send_recv_ops.append(recv_op) + is_recv_o = True + if scheduling_info.send_o_dst > -1: + send_dst = scheduling_info.send_o_dst + send_op = torch.distributed.P2POp(torch.distributed.isend, send_o_dst, send_dst, group=cp_group, tag=100000 + send_dst) + send_recv_ops.append(send_op) + send_recv_ops_o = [] + if len(send_recv_ops) > 0: + send_recv_ops_o = torch.distributed.batch_isend_irecv(send_recv_ops) + return is_recv_o, send_recv_ops_o + + +class AdaptiveAttention(torch.autograd.Function): + + @staticmethod + def forward(ctx, q, k, v, n, cp_para, softmax_scale=None, attn_mask=None, dropout_p=0.): + keep_prob = 1. - dropout_p + cp_size = cp_para.get("cp_size") + rank = cp_para.get("rank") + scheduling_info = cp_para.get('scheduling_info') + cp_group = cp_para.get('cp_group') + + seq_len = q.shape[0] + batch_size = q.shape[1] + head_dim = q.shape[-1] // n + + if softmax_scale is None: + softmax_scale = head_dim ** (-0.5) + send_kv_dst = torch.cat((k.unsqueeze(0), v.unsqueeze(0)), dim=0) # [2, s, b, h] + recv_q_src, recv_kv_src = None, None + send_recv_ops_qkv = [] + is_recv_q, is_recv_kv = False, False + send_o_dst, recv_o_src = None, None + send_recv_ops_o = [] + is_recv_o, is_send_o = False, False + attn_out, softmax_max, softmax_sum = None, None, None + + round_num = len(scheduling_info) + for i in range(round_num + 1): + is_activate = is_recv_q or is_recv_kv # receive q or kv last round means calculate this round + is_send_o = is_recv_q # receive q last round means send o this round + + # wait until QKV is received + if len(send_recv_ops_qkv) > 0: + for send_recv_op in send_recv_ops_qkv: + send_recv_op.wait() + + # determine QKV for this round + cur_q = recv_q_src if is_recv_q else q + cur_k = recv_kv_src[0] if is_recv_kv else k + cur_v = recv_kv_src[1] if is_recv_kv else v + + # send QKV for next round + if i < round_num - 1: + recv_q_src = torch.empty_like(q) + recv_kv_src = torch.empty_like(send_kv_dst) + is_recv_q, is_recv_kv, send_recv_ops_qkv = flash_attn_p2p_communicate(scheduling_info[i], + q, recv_q_src, + send_kv_dst, recv_kv_src, + cp_group, rank) + + # calculate QKV for this round + if i == 0 or (i < round_num and is_activate): + this_mask = attn_mask[i] if isinstance(attn_mask, list) else None + attn_outs = torch_npu.npu_fusion_attention( + cur_q, cur_k, cur_v, n, "SBH", + pse=None, + padding_mask=None, + atten_mask=this_mask, + scale=softmax_scale, + pre_tockens=cur_k.shape[0], + next_tockens=cur_k.shape[0], + keep_prob=keep_prob, + sparse_mode=0 + ) + cur_attn_out, cur_softmax_max, cur_softmax_sum = attn_outs[0], attn_outs[1], attn_outs[2] # [s, b, h], [b, n, s, 8], [b, n, s, 8] + if not is_send_o: + if i == 0: + softmax_sum = cur_softmax_sum + softmax_max = cur_softmax_max + attn_out = cur_attn_out + else: + attn_out_updated, softmax_max_updated, softmax_sum_updated = npu_ring_attention_update( + attn_out, softmax_max, softmax_sum, cur_attn_out, cur_softmax_max, cur_softmax_sum) + attn_out, softmax_max, softmax_sum = attn_out_updated, softmax_max_updated, softmax_sum_updated + + # wait until O is received + if len(send_recv_ops_o) > 0: + for send_recv_op in send_recv_ops_o: + send_recv_op.wait() + + # update O if receive O + if is_recv_o: + recv_attn_out = recv_o_src[:, :, :, :head_dim].permute(2, 0, 1, 3) # [b, n, s, d] -> [s, b, n, d] + recv_attn_out = recv_attn_out.view(seq_len, batch_size, -1).to(attn_out.dtype) # [s, b, n, d] -> [s, b, h] + recv_softmax_max = recv_o_src[:, :, :, head_dim:head_dim + 8] + recv_softmax_sum = recv_o_src[:, :, :, head_dim + 8:] + attn_out_updated, softmax_max_updated, softmax_sum_updated = npu_ring_attention_update( + attn_out, softmax_max, softmax_sum, recv_attn_out, recv_softmax_max, recv_softmax_sum) + attn_out, softmax_max, softmax_sum = attn_out_updated, softmax_max_updated, softmax_sum_updated + + # send O for next round + if i < round_num: + cur_attn_out = cur_attn_out.view(seq_len, batch_size, n, -1).permute(1, 2, 0, 3) # [s, b, h] -> [s, b, n, d] + send_o_dst = torch.cat((cur_attn_out, cur_softmax_max), dim=-1) # [s, b, n, d+8] + send_o_dst = torch.cat((send_o_dst, cur_softmax_sum), dim=-1) # [s, b, n, d+16] + recv_o_src = torch.empty_like(send_o_dst) + is_recv_o, send_recv_ops_o = flash_attn_p2p_communicate_o(scheduling_info[i], send_o_dst, recv_o_src, cp_group, rank) + + k, v = send_kv_dst[0], send_kv_dst[1] + attn_mask = attn_mask if isinstance(attn_mask, list) else [attn_mask] + ctx.save_for_backward(q, k, v, *attn_mask, attn_out, softmax_max, softmax_sum) + ctx.n = n + ctx.softmax_scale = softmax_scale + ctx.cp_group = cp_group + ctx.cp_size = cp_size + ctx.cp_rank = rank + ctx.scheduling_info = scheduling_info + return attn_out + + @staticmethod + def backward(ctx, dout): + q, k, v, *attn_mask, attn_out, softmax_max, softmax_sum = ctx.saved_tensors + softmax_max = softmax_max.contiguous() + softmax_sum = softmax_sum.contiguous() + + n = ctx.n + softmax_scale = ctx.softmax_scale + cp_group = ctx.cp_group + cp_size = ctx.cp_size + rank = ctx.cp_rank + dist_attn_scheduler = ctx.scheduling_info + + send_recv_reqs_input = [] + send_recv_reqs_dq = [] + send_recv_reqs_dkv = [] + num_received_dq, num_received_dkv = 0, 0 + + # 把m和l的1/8进行all-gather + softmax_max_all = torch.empty((cp_size, *(softmax_max.shape[:-1])), device=softmax_max.device, + dtype=softmax_max.dtype) + softmax_sum_all = torch.empty((cp_size, *(softmax_sum.shape[:-1])), device=softmax_sum.device, + dtype=softmax_sum.dtype) + softmax_max_local = softmax_max[:, :, :, 0].contiguous() # [b, n, s, 8] -> [b, n, s, 1] + softmax_sum_local = softmax_sum[:, :, :, 0].contiguous() # [b, n, s, 8] -> [b, n, s, 1] + # [b, n, s] -> [8, b, n, s] + handle_softmax_max = torch.distributed._all_gather_base(softmax_max_all, softmax_max_local, + group=cp_group, async_op=True) + handle_softmax_sum = torch.distributed._all_gather_base(softmax_sum_all, softmax_sum_local, + group=cp_group, async_op=True) + + # 组合需要发送的tensors + kv = torch.cat((k.unsqueeze(0), v.unsqueeze(0)), dim=0) # [2, s, b, h] + qodo = torch.cat((q.unsqueeze(0), attn_out.unsqueeze(0), dout.unsqueeze(0)), dim=0) # [3, s, b, h] + + # 创建接收tensors的buffer + kv_recv = torch.empty((2, *kv.shape), device=kv.device, dtype=kv.dtype) # [2, 2, s, b, h] + qodo_recv = torch.empty((2, 3, *q.shape), device=q.device, dtype=q.dtype) # [2, 3, s, b, h] + dq_recv = torch.empty((2, *q.shape), device=q.device, dtype=q.dtype) # [2, s, b, h] + dkv_recv = torch.empty((2, 2, *k.shape), device=k.device, dtype=k.dtype) # [2, 2, s, b, h] + + # 初始化第0轮的cur_q, cur_k, cur_v, cur_o, cur_do, cur_m, cur_l + cur_q, cur_k, cur_v = q, k, v + cur_o, cur_do = attn_out, dout + cur_m, cur_l = softmax_max, softmax_sum + + dq, dk, dv = None, None, None + + handle_softmax_max.wait() + handle_softmax_sum.wait() + + # 循环遍历每一个round + round_cnt = len(dist_attn_scheduler) + for rnd_idx in range(round_cnt): + is_active = True + if len(send_recv_reqs_input) > 0: + idx = 0 + for send_recv_op in send_recv_reqs_input: + send_recv_op.wait() + idx += 1 + + cur_recv_buf_idx = rnd_idx % 2 + prev_recv_buf_idx = 1 - cur_recv_buf_idx + + # 确定本轮的cur_q, cur_k, cur_v, cur_o, cur_do, cur_m, cur_l + if rnd_idx > 0: + prev_scheduling = dist_attn_scheduler[rnd_idx - 1] + if prev_scheduling.recv_q_src > -1: # 这一轮计算自己出KV + cur_q, cur_o, cur_do = (qodo_recv[prev_recv_buf_idx][0], qodo_recv[prev_recv_buf_idx][1], + qodo_recv[prev_recv_buf_idx][2]) + cur_k, cur_v = k, v + + idx = torch.distributed.get_group_rank(cp_group, prev_scheduling.recv_q_src) + cur_m = softmax_max_all[idx, :, :, :].view(softmax_max_all.shape[1:] + + (1,)).repeat(1, 1, 1, 8) + cur_l = softmax_sum_all[idx, :, :, :].view(softmax_max_all.shape[1:] + + (1,)).repeat(1, 1, 1, 8) + elif prev_scheduling.recv_kv_src > -1: # 这一轮计算自己出Q + cur_q, cur_o, cur_do = q, attn_out, dout + cur_k, cur_v = kv_recv[prev_recv_buf_idx][0], kv_recv[prev_recv_buf_idx][1] + cur_m, cur_l = softmax_max, softmax_sum + else: + is_active = False + + # 把本轮的input通信加入input通信队列(需要通信得到下一轮执行所需的q+o+do/k+v、发送下一轮别的device需要的q+o+do/k+v) + send_recv_ops_input, send_recv_reqs_input = [], [] + cur_scheduling = dist_attn_scheduler[rnd_idx] # 本轮计算过程中需要并行执行的通信调度 + + if cur_scheduling.recv_q_src > -1: + # recv q + attn_out + dout from cur_scheduling.recv_q_src + recv_op = torch.distributed.P2POp(torch.distributed.irecv, qodo_recv[cur_recv_buf_idx], + cur_scheduling.recv_q_src, cp_group, tag=rank) + send_recv_ops_input.append(recv_op) + elif cur_scheduling.recv_kv_src > -1: + # recv kv from cur_scheduling.recv_kv_src + recv_op = torch.distributed.P2POp(torch.distributed.irecv, kv_recv[cur_recv_buf_idx], + cur_scheduling.recv_kv_src, cp_group, tag=rank) + send_recv_ops_input.append(recv_op) + + if len(cur_scheduling.send_q_dst) > 0: + for send_q_dev in cur_scheduling.send_q_dst: + # send q + attn_out + dout to send_q_dev + send_op = torch.distributed.P2POp(torch.distributed.isend, qodo, send_q_dev, cp_group, + tag=send_q_dev) + send_recv_ops_input.append(send_op) + if len(cur_scheduling.send_kv_dst) > 0: + for send_kv_dev in cur_scheduling.send_kv_dst: + # send kv to send_kv_dev + send_op = torch.distributed.P2POp(torch.distributed.isend, kv, send_kv_dev, cp_group, + tag=send_kv_dev) + send_recv_ops_input.append(send_op) + + # 发起本轮的input通信 + if len(send_recv_ops_input) > 0: + send_recv_reqs_input = torch.distributed.batch_isend_irecv(send_recv_ops_input) + + # 仍然按照前向的调度顺序来进行反向的计算,需要q k v do_q m_q l_q + if is_active: + this_mask = attn_mask[rnd_idx] if attn_mask is not None else None + attn_grad_outs = torch_npu.npu_fusion_attention_grad( + cur_q, cur_k, cur_v, cur_do, n, + "SBH", + pse=None, + padding_mask=None, + atten_mask=this_mask, + softmax_max=cur_m, + softmax_sum=cur_l, + attention_in=cur_o, + scale_value=softmax_scale, + sparse_mode=0, + keep_prob=1., + ) + cur_dq, cur_dk, cur_dv = attn_grad_outs[0], attn_grad_outs[1], attn_grad_outs[2] + else: + cur_dq, cur_dk, cur_dv = None, None, None + + if rnd_idx == 0: + dq = cur_dq + dk = cur_dk + dv = cur_dv + else: + # 等待output send-recv结束,并用收到的dq/dkdv来更新结果 + if num_received_dq > 0: + for send_recv_op in send_recv_reqs_dq: + send_recv_op.wait() + for i in range(num_received_dq): + dq.add_(dq_recv[i]) + + if num_received_dkv > 0: + for send_recv_op in send_recv_reqs_dkv: + send_recv_op.wait() + for i in range(num_received_dkv): + dk.add_(dkv_recv[i][0]) + dv.add_(dkv_recv[i][1]) + # 用cur_dq, cur_dk, cur_dv更新结果:检查当前轮的计算是否是帮别人算的,如果是/不是,则加上cur_dk, cur_dv/cur_dq + send_recv_reqs_dq, send_recv_reqs_dkv = [], [] + send_recv_ops_dq, send_recv_ops_dkv = [], [] + num_received_dq, num_received_dkv = 0, 0 + prev_scheduling = dist_attn_scheduler[rnd_idx - 1] + if is_active: + if prev_scheduling.recv_q_src > -1: # 这一轮计算自己出KV,是帮别人算 + dk.add_(cur_dk) + dv.add_(cur_dv) + send_dq = cur_dq + send_op = torch.distributed.P2POp(torch.distributed.isend, send_dq, prev_scheduling.recv_q_src, + cp_group, tag=rank * 10) + send_recv_ops_dq.append(send_op) + elif prev_scheduling.recv_kv_src > -1: # 这一轮计算自己出Q + dq.add_(cur_dq) + send_dkv = torch.cat((cur_dk.unsqueeze(0), cur_dv.unsqueeze(0)), dim=0) # [2, s, b, h] + send_op = torch.distributed.P2POp(torch.distributed.isend, send_dkv, + prev_scheduling.recv_kv_src, cp_group, tag=rank * 10) + send_recv_ops_dkv.append(send_op) + + # 发起output recv:上一轮发Q的目的device,从那儿收dQ;上一轮发KV的目的device,从那儿收dKdV; + if len(prev_scheduling.send_q_dst) > 0: + for idx, send_q_dst in enumerate(prev_scheduling.send_q_dst): + recv_op = torch.distributed.P2POp(torch.distributed.irecv, dq_recv[idx], + send_q_dst, cp_group, tag=send_q_dst * 10) + send_recv_ops_dq.append(recv_op) + num_received_dq += 1 + if len(prev_scheduling.send_kv_dst) > 0: + for idx, send_kv_dst in enumerate(prev_scheduling.send_kv_dst): + recv_op = torch.distributed.P2POp(torch.distributed.irecv, dkv_recv[idx], + send_kv_dst, cp_group, tag=send_kv_dst * 10) + send_recv_ops_dkv.append(recv_op) + num_received_dkv += 1 + + if len(send_recv_ops_dq) > 0: + send_recv_reqs_dq = torch.distributed.batch_isend_irecv(send_recv_ops_dq) + if len(send_recv_ops_dkv) > 0: + send_recv_reqs_dkv = torch.distributed.batch_isend_irecv(send_recv_ops_dkv) + + if num_received_dq > 0: + for send_recv_op in send_recv_reqs_dq: + send_recv_op.wait() + for i in range(num_received_dq): + dq.add_(dq_recv[i]) + + if num_received_dkv > 0: + for send_recv_op in send_recv_reqs_dkv: + send_recv_op.wait() + for i in range(num_received_dkv): + dk.add_(dkv_recv[i][0]) + dv.add_(dkv_recv[i][1]) + + return dq, dk, dv, None, None, None, None, None + + +def adaptive_attn_context_parallel(q, k, v, n, cp_para, softmax_scale=None, attn_mask=None, dropout_p=0.): + out = AdaptiveAttention.apply( + q, k, v, n, cp_para, softmax_scale, attn_mask, dropout_p + ) + return out diff --git a/model/train/yoco_moe/mindspeed/core/context_parallel/context_parallel_kv_cache.py b/model/train/yoco_moe/mindspeed/core/context_parallel/context_parallel_kv_cache.py new file mode 100644 index 000000000..fd474fa6c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/context_parallel/context_parallel_kv_cache.py @@ -0,0 +1,124 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import torch + + +def get_cache_policy(layer_number, cache_policy_init, cache_interval): + cache_policy = cache_policy_init + if cache_interval != 0: + if layer_number % (cache_interval + 1) == 1: + cache_policy = cache_policy_init + else: + cache_policy = None + + return cache_policy + + +class ContextParallelKVCache: + """Context Parallelism KV Cache Implementation""" + + def __init__(self, cache_policy, outer_data, inner_data, k, v) -> None: + self.outer_size, self.outer_ring_p2p = outer_data + self.inner_size, self.inner_ring_p2p = inner_data + self.cache_policy = cache_policy + self.k = k + self.v = v + self.cp_size = self.outer_size * self.inner_size + self.outer_index = 0 + + send_data = torch.zeros((2, *self.k[-1].shape), dtype=self.k[-1].dtype, device=self.k[-1].device) + send_data.copy_(torch.cat((self.k[-1].unsqueeze(0), self.v[-1].unsqueeze(0)), dim=0)) + outer_recv_data = send_data.clone() + inner_recv_data = send_data.clone() + self.cur_kv, self.outer_next_kv, self.inner_next_kv = send_data, outer_recv_data, inner_recv_data + + self.k_out, self.v_out = None, None + + def communicate_outer_ring_kv(self, index) -> None: + """ + Implements of kv communications in outer ring + + Args: + index (int): the index of outer for loop + """ + self.outer_index = index + + # index > 0, using kv after communication + if index > 0: + if index == 1 and self.cache_policy == "half": + # special case: index=1, cache_policy=half, KV block should be transformed to K + self.outer_ring_p2p.wait() + if self.inner_size > 1: + # KV have been transformed in inner ring + self.cur_kv.copy_(self.outer_next_kv[1]) + self.outer_next_kv = self.outer_next_kv[1].clone() + else: + # KV is not transformed in inner ring + self.cur_kv, self.outer_next_kv = self.outer_next_kv, self.cur_kv + self.k_out, self.v_out = self.cur_kv[0].clone(), self.cur_kv[1].clone() + self.cur_kv = self.cur_kv[1].clone() + self.outer_next_kv = self.outer_next_kv[1].clone() + else: + self.outer_ring_p2p.wait() + self.cur_kv, self.outer_next_kv = self.outer_next_kv, self.cur_kv + + # last step, no need to communicate KV + is_last_step = index + 1 == self.outer_size + + # only need communicate KV in the first step when full cache + first_step_with_full_cache = self.cache_policy == "full" and index > 0 + + if not first_step_with_full_cache and not is_last_step: + self.outer_ring_p2p.async_send_recv(send_tensor=self.cur_kv, recv_tensor=self.outer_next_kv) + + def communicate_inner_ring_kv(self, index): + """ + Implements of kv communications in inner ring + + Args: + index (int): the index of inner for loop + + Returns: + cur_k (torch.tensor): k(keys), backward operator input in this iteration + cur_v (torch.tensor): v(values), backward operator input in this iteration + """ + total_index = self.outer_index * self.inner_size + index + + # index > 0, using kv after communication + if index > 0: + if total_index == 1 and self.cache_policy == "half": + # special case: index=1, cache_policy=half, KV block should be transformed to K + self.inner_ring_p2p.wait() + self.cur_kv, self.inner_next_kv = self.inner_next_kv, self.cur_kv + self.k_out, self.v_out = self.cur_kv[0].clone(), self.cur_kv[1].clone() + self.cur_kv = self.cur_kv[1].clone() + self.inner_next_kv = self.inner_next_kv[1].clone() + else: + self.inner_ring_p2p.wait() + self.cur_kv, self.inner_next_kv = self.inner_next_kv, self.cur_kv + + # last step, no need to communicate KV + is_last_step = index + 1 == self.inner_size + + # only need communicate KV in the first step when full cache + first_step_with_full_cache = self.cache_policy == "full" and total_index > 0 + + if not first_step_with_full_cache and not is_last_step: + self.inner_ring_p2p.async_send_recv(send_tensor=self.cur_kv, recv_tensor=self.inner_next_kv) + + cache_index = self.cp_size - total_index - 1 + if self.cache_policy is None: + self.k_out, self.v_out = self.cur_kv[0], self.cur_kv[1] + + elif self.cache_policy == "half": + if total_index == 0: + self.k_out, self.v_out = self.cur_kv[0], self.cur_kv[1] + elif total_index > 1: + self.k_out, self.v_out = self.k[cache_index], self.cur_kv + + elif self.cache_policy == "full": + if total_index <= 1: + self.k_out, self.v_out = self.cur_kv[0], self.cur_kv[1] + else: + self.k_out, self.v_out = self.k[cache_index], self.v[cache_index] + + return self.k_out, self.v_out \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/context_parallel/ring_context_parallel.py b/model/train/yoco_moe/mindspeed/core/context_parallel/ring_context_parallel.py new file mode 100644 index 000000000..910a72eaf --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/context_parallel/ring_context_parallel.py @@ -0,0 +1,939 @@ +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import torch +import torch_npu +from einops import rearrange +from mindspeed.ops.fusion_attention_v2 import npu_fusion_attention, npu_fusion_attention_grad +from mindspeed.core.context_parallel.context_parallel_kv_cache import ContextParallelKVCache +from .utils import RingP2P, tnd_out_update, causal_out_update, general_out_update, forward_update, sbh_to_tnd, tnd_to_sbh, unflatten_softmax, flatten_softmax, get_selection_indices_for_tnd_softmax_update + + +def causal_forward_fetch(q_block_id, kv_block_id, q, cur_k, cur_v, attn_mask=None): + cur_attn_mask = None + if q_block_id == kv_block_id: + # [2, s, b, h] -> [2s, b, h] + cur_attn_mask = attn_mask + cur_q, cur_k, cur_v = [x.view(-1, *x.shape[2:]) for x in [q, cur_k, cur_v]] + elif kv_block_id <= q_block_id: + # [2, s, b, h] -> [2s, b, h] + cur_q = q.view(-1, *q.shape[2:]) + # only k[0] v[0] need to be calculated + cur_k, cur_v = [x[0] for x in [cur_k, cur_v]] + else: + # only q[1] need to be calculated + cur_q = q[1] + # [2, s, b, h] -> [2s, b, h] + cur_k, cur_v = [x.view(-1, *x.shape[2:]) for x in [cur_k, cur_v]] + + return cur_q, cur_k, cur_v, cur_attn_mask + + +def tnd_forward_fetch(q_block_id, kv_block_id, q, cur_k, cur_v, fetch_ptrs, attn_mask=None): + seqlen, half_seqlen, q_index, kv_index = fetch_ptrs + actual_seq_qlen, actual_seq_kvlen, sub_out_seq_len = seqlen + half_actual_seq_qlen, half_actual_seq_kvlen, half_sub_out_seq_len = half_seqlen + + cur_attn_mask = None + if q_block_id == kv_block_id: + cur_attn_mask = attn_mask + cur_q = q + cur_seq_qlen, cur_seq_kvlen = actual_seq_qlen, actual_seq_kvlen + cur_sub_out_seq_len = sub_out_seq_len + elif kv_block_id <= q_block_id: + cur_q = q + cur_k, cur_v = [torch.index_select(x, 0, kv_index) for x in [cur_k, cur_v]] + cur_seq_qlen, cur_seq_kvlen = actual_seq_qlen, half_actual_seq_kvlen + cur_sub_out_seq_len = sub_out_seq_len + else: + cur_q = torch.index_select(q, 0, q_index) + cur_seq_qlen, cur_seq_kvlen = half_actual_seq_qlen, actual_seq_kvlen + cur_sub_out_seq_len = half_sub_out_seq_len + + return cur_q, cur_k, cur_v, cur_attn_mask, (cur_seq_qlen, cur_seq_kvlen, cur_sub_out_seq_len) + + +def tnd_backward_fetch(q_block_id, kv_block_id, q, cur_k, cur_v, attn_out, dout, + softmax_values, seq_lens, index_values, attn_mask=None): + # fetch backward output + actual_seq_qlen, actual_seq_kvlen, half_actual_seq_kvlen, half_actual_seq_qlen = seq_lens + softmax_max, softmax_sum, half_softmax_max, half_softmax_sum = softmax_values + q_index, kv_index = index_values + cur_attn_mask = None + if q_block_id >= kv_block_id: + if q_block_id == kv_block_id: + cur_attn_mask = attn_mask + cur_seq_qlen, cur_seq_kvlen = actual_seq_qlen, actual_seq_kvlen + else: + cur_k, cur_v = [torch.index_select(x, 0, kv_index) for x in [cur_k, cur_v]] + cur_seq_qlen, cur_seq_kvlen = actual_seq_qlen, half_actual_seq_kvlen + + cur_q, cur_attn_out, cur_dout = q, attn_out, dout + cur_softmax_max, cur_softmax_sum = softmax_max, softmax_sum + else: + cur_q, cur_attn_out, cur_dout = [torch.index_select(x, 0, q_index) for x in [q, attn_out, dout]] + cur_softmax_max, cur_softmax_sum = half_softmax_max, half_softmax_sum + cur_seq_qlen, cur_seq_kvlen = half_actual_seq_qlen, actual_seq_kvlen + + return (cur_q, cur_k, cur_v), cur_attn_out, cur_dout, (cur_softmax_max, cur_softmax_sum), cur_attn_mask, (cur_seq_qlen, cur_seq_kvlen) + + +def causal_backward_fetch(q_block_id, kv_block_id, q, cur_k, cur_v, attn_out, dout, + softmax_max, softmax_sum, attn_mask=None): + cur_attn_mask = None + if q_block_id >= kv_block_id: + # [b, n, 2, s, 8] -> [b, n, 2s, 8] + cur_softmax_max = softmax_max.view(softmax_max.shape[0], softmax_max.shape[1], -1, + softmax_max.shape[-1]) + cur_softmax_sum = softmax_sum.view(softmax_sum.shape[0], softmax_sum.shape[1], -1, + softmax_sum.shape[-1]) + # [2, s, b, h] -> [2s, b, h] + cur_q, cur_attn_out, cur_dout = [x.view(-1, *x.shape[2:]) for x in [q, attn_out, dout]] + if q_block_id == kv_block_id: + cur_attn_mask = attn_mask + # [2, s, b, h] -> [2s, b, h] + cur_k, cur_v, = [x.view(-1, *x.shape[2:]) for x in [cur_k, cur_v]] + else: + cur_k, cur_v = [x[0] for x in [cur_k, cur_v]] + else: + # [2, s, b, h] -> [2s, b, h] + cur_k, cur_v = [x.view(-1, *x.shape[2:]) for x in [cur_k, cur_v]] + # only q[1] attn_out[1] and dout[1] need to be calculated + cur_q, cur_attn_out, cur_dout = [x[1] for x in [q, attn_out, dout]] + cur_softmax_max, cur_softmax_sum = [x[:, :, 1, :, :] for x in [softmax_max, softmax_sum]] + + return cur_q, cur_k, cur_v, cur_attn_out, cur_dout, cur_softmax_max, cur_softmax_sum, cur_attn_mask + + +def tnd_grad_update(q_block_id, kv_block_id, cur_attn_grads, global_attn_grads, + q_index, kv_index): + cur_dq, cur_dk, cur_dv = cur_attn_grads + dq, dk, dv = global_attn_grads + if q_block_id == kv_block_id: + dq.add_(cur_dq) + dk.add_(cur_dk) + dv.add_(cur_dv) + elif q_block_id > kv_block_id: + dq.add_(cur_dq) + dk.index_add_(0, kv_index, cur_dk) + dv.index_add_(0, kv_index, cur_dv) + else: + dq.index_add_(0, q_index, cur_dq) + dk.add_(cur_dk) + dv.add_(cur_dv) + + return dq, dk, dv + + +def causal_grad_update(q_block_id, kv_block_id, cur_dq, cur_dk, cur_dv, dq, dk, dv): + if q_block_id == kv_block_id: + cur_dq = cur_dq.view(dq.shape) + cur_dk = cur_dk.view(dk.shape) + cur_dv = cur_dv.view(dv.shape) + dq.add_(cur_dq) + dk.add_(cur_dk) + dv.add_(cur_dv) + elif q_block_id > kv_block_id: + cur_dq = cur_dq.view(dq.shape) + dq.add_(cur_dq) + dk[0].add_(cur_dk) + dv[0].add_(cur_dv) + else: + dq[1].add_(cur_dq) + cur_dk = cur_dk.view(dk.shape) # [2s, b, h] -> [2, s, b, h] + cur_dv = cur_dv.view(dv.shape) + dk.add_(cur_dk) + dv.add_(cur_dv) + + return dq, dk, dv + + +def cal_row(cur_q, cur_k, cur_v, s, attn_info): + # q: [s, b, h], kv: [2s, b, h] + n, pse, pse_type, attn_mask, softmax_scale, keep_prob, \ + q_index_list, kv_index_list = attn_info + + # r1c0 + cur_attn_mask = None + attn_outs_r1c0 = npu_fusion_attention( + cur_q, cur_k[:s], cur_v[:s], n, 'SBH', + pse=pse, + pse_type=pse_type, + padding_mask=None, + atten_mask=cur_attn_mask, + scale=softmax_scale, + pre_tokens=s, + next_tokens=0 if cur_attn_mask is not None else s, + keep_prob=keep_prob, + sparse_mode=3 if cur_attn_mask is not None else 0, + q_start_idx=[q_index_list[1] * s, ] if q_index_list is not None else q_index_list, + kv_start_idx=[kv_index_list[0] * s, ] if kv_index_list is not None else kv_index_list + ) + # r1c1 + cur_attn_mask = attn_mask + attn_outs_r1c1 = npu_fusion_attention( + cur_q, cur_k[s:], cur_v[s:], n, 'SBH', + pse=pse, + pse_type=pse_type, + padding_mask=None, + atten_mask=cur_attn_mask, + scale=softmax_scale, + pre_tokens=s, + next_tokens=0 if cur_attn_mask is not None else s, + keep_prob=keep_prob, + sparse_mode=3 if cur_attn_mask is not None else 0, + q_start_idx=[q_index_list[1] * s, ] if q_index_list is not None else q_index_list, + kv_start_idx=[kv_index_list[1] * s, ] if kv_index_list is not None else kv_index_list + ) + + # update row1 + attn_out = attn_outs_r1c0[0] + softmax_max = attn_outs_r1c0[1] + softmax_sum = attn_outs_r1c0[2] + curr_attn_out = attn_outs_r1c1[0] + curr_softmax_max = attn_outs_r1c1[1] + curr_softmax_sum = attn_outs_r1c1[2] + attn_out_updated, softmax_max_updated, softmax_sum_updated = forward_update(attn_out, softmax_max, softmax_sum, + curr_attn_out, curr_softmax_max, + curr_softmax_sum) + return [attn_out_updated, softmax_max_updated, softmax_sum_updated] + + +def flash_attention_with_alibi_pse(q_block_id, kv_block_id, cur_qkv, attn_info, s): + n, pse, pse_type, cur_attn_mask, softmax_scale, keep_prob, \ + q_index_list, kv_index_list = attn_info + cur_q, cur_k, cur_v = cur_qkv + if q_block_id == kv_block_id: + attn_outs_r0c0 = npu_fusion_attention( + cur_q[:s], cur_k[:s], cur_v[:s], n, 'SBH', + pse=pse, + pse_type=pse_type, + padding_mask=None, + atten_mask=cur_attn_mask, + scale=softmax_scale, + pre_tokens=s, + next_tokens=0 if cur_attn_mask is not None else s, + keep_prob=keep_prob, + sparse_mode=3 if cur_attn_mask is not None else 0, + q_start_idx=[q_index_list[0] * s, ] if q_index_list is not None else None, + kv_start_idx=[kv_index_list[0] * s, ] if kv_index_list is not None else None, + ) + attn_outs_r1 = cal_row(cur_q[s:], cur_k, cur_v, s, attn_info) + # get output + attn_outs = [] + attn_outs.append(torch.cat([attn_outs_r0c0[0], attn_outs_r1[0]])) + attn_outs.append(torch.cat([attn_outs_r0c0[1], attn_outs_r1[1]], dim=2)) + attn_outs.append(torch.cat([attn_outs_r0c0[2], attn_outs_r1[2]], dim=2)) + elif q_block_id > kv_block_id: + attn_outs_r0c0 = npu_fusion_attention( + cur_q[:s], cur_k, cur_v, n, 'SBH', + pse=pse, + pse_type=pse_type, + padding_mask=None, + atten_mask=cur_attn_mask, + scale=softmax_scale, + pre_tokens=s, + next_tokens=0 if cur_attn_mask is not None else s, + keep_prob=keep_prob, + sparse_mode=3 if cur_attn_mask is not None else 0, + q_start_idx=[q_index_list[0] * s, ] if q_index_list is not None else None, + kv_start_idx=[kv_index_list[0] * s, ] if kv_index_list is not None else None, + ) + attn_outs_r1c0 = npu_fusion_attention( + cur_q[s:], cur_k, cur_v, n, 'SBH', + pse=pse, + pse_type=pse_type, + padding_mask=None, + atten_mask=cur_attn_mask, + scale=softmax_scale, + pre_tokens=s, + next_tokens=0 if cur_attn_mask is not None else s, + keep_prob=keep_prob, + sparse_mode=3 if cur_attn_mask is not None else 0, + q_start_idx=[q_index_list[1] * s, ] if q_index_list is not None else None, + kv_start_idx=[kv_index_list[0] * s, ] if kv_index_list is not None else None, + ) + # get output + attn_outs = [] + attn_outs.append(torch.cat([attn_outs_r0c0[0], attn_outs_r1c0[0]])) + attn_outs.append(torch.cat([attn_outs_r0c0[1], attn_outs_r1c0[1]], dim=2)) + attn_outs.append(torch.cat([attn_outs_r0c0[2], attn_outs_r1c0[2]], dim=2)) + else: + attn_outs = cal_row(cur_q, cur_k, cur_v, s, attn_info) + + return attn_outs + + +def cal_row_grad(cur_q, cur_k, cur_v, cur_dout, cur_softmax_max, cur_softmax_sum, cur_attn_out, + attn_grad_info, s, kv_block_id): + n, pse, pse_type, attn_mask, softmax_scale, keep_prob, rng_states, \ + q_index_list, kv_index_list = attn_grad_info + + cur_attn_mask = None + attn_grad_outs_r1c0 = npu_fusion_attention_grad( + cur_q, cur_k[:s], cur_v[:s], cur_dout, n, 'SBH', + pse=pse, + pse_type=pse_type, + padding_mask=None, + softmax_max=cur_softmax_max, + softmax_sum=cur_softmax_sum, + attention_in=cur_attn_out, + atten_mask=cur_attn_mask, + scale=softmax_scale, + pre_tokens=s, + next_tokens=0 if cur_attn_mask is not None else s, + keep_prob=keep_prob, + seed=rng_states[kv_block_id][0], + offset=rng_states[kv_block_id][1], + numels=rng_states[kv_block_id][2], + sparse_mode=3 if cur_attn_mask is not None else 0, + q_start_idx=[q_index_list[1] * s, ] if q_index_list is not None else q_index_list, + kv_start_idx=[kv_index_list[0] * s, ] if kv_index_list is not None else kv_index_list + ) + + cur_attn_mask = attn_mask + attn_grad_outs_r1c1 = npu_fusion_attention_grad( + cur_q, cur_k[s:], cur_v[s:], cur_dout, n, 'SBH', + pse=pse, + pse_type=pse_type, + padding_mask=None, + softmax_max=cur_softmax_max, + softmax_sum=cur_softmax_sum, + attention_in=cur_attn_out, + atten_mask=cur_attn_mask, + scale=softmax_scale, + pre_tokens=s, + next_tokens=0 if cur_attn_mask is not None else s, + keep_prob=keep_prob, + seed=rng_states[kv_block_id][0], + offset=rng_states[kv_block_id][1], + numels=rng_states[kv_block_id][2], + sparse_mode=3 if cur_attn_mask is not None else 0, + q_start_idx=[q_index_list[1] * s, ] if q_index_list is not None else q_index_list, + kv_start_idx=[kv_index_list[1] * s, ] if kv_index_list is not None else kv_index_list + ) + + return attn_grad_outs_r1c0, attn_grad_outs_r1c1 + + +def flash_attention_with_alibi_pse_grad(q_block_id, kv_block_id, cur_qkv, cur_dout, cur_attn_out, + cur_softmax_max, cur_softmax_sum, attn_grad_info, s): + n, pse, pse_type, cur_attn_mask, softmax_scale, keep_prob, rng_states, \ + q_index_list, kv_index_list = attn_grad_info + cur_q, cur_k, cur_v = cur_qkv + + if q_block_id == kv_block_id: + attn_grad_outs_r0c0 = npu_fusion_attention_grad( + cur_q[:s], cur_k[:s], cur_v[:s], cur_dout[:s], n, 'SBH', + pse=pse, + pse_type=pse_type, + padding_mask=None, + softmax_max=cur_softmax_max[:, :, :s], + softmax_sum=cur_softmax_sum[:, :, :s], + attention_in=cur_attn_out[:s], + atten_mask=cur_attn_mask, + scale=softmax_scale, + pre_tokens=s, + next_tokens=0 if cur_attn_mask is not None else s, + keep_prob=keep_prob, + seed=rng_states[kv_block_id][0], + offset=rng_states[kv_block_id][1], + numels=rng_states[kv_block_id][2], + sparse_mode=3 if cur_attn_mask is not None else 0, + q_start_idx=[q_index_list[0] * s, ] if q_index_list is not None else q_index_list, + kv_start_idx=[kv_index_list[0] * s, ] if kv_index_list is not None else kv_index_list + ) + attn_grad_outs_r1c0, attn_grad_outs_r1c1 = cal_row_grad( + cur_q[s:], cur_k, cur_v, cur_dout[s:], cur_softmax_max[:, :, s:], cur_softmax_sum[:, :, s:], + cur_attn_out[s:], attn_grad_info, s, kv_block_id + ) + attn_grad_outs = [] + attn_grad_outs.append(torch.cat( + [attn_grad_outs_r0c0[0], attn_grad_outs_r1c0[0] + attn_grad_outs_r1c1[0]])) + attn_grad_outs.append(torch.cat( + [attn_grad_outs_r0c0[1] + attn_grad_outs_r1c0[1], attn_grad_outs_r1c1[1]])) + attn_grad_outs.append(torch.cat( + [attn_grad_outs_r0c0[2] + attn_grad_outs_r1c0[2], attn_grad_outs_r1c1[2]])) + + elif q_block_id > kv_block_id: + attn_grad_outs_r0c0 = npu_fusion_attention_grad( + cur_q[:s], cur_k, cur_v, cur_dout[:s], n, 'SBH', + pse=pse, + pse_type=pse_type, + padding_mask=None, + softmax_max=cur_softmax_max[:, :, :s], + softmax_sum=cur_softmax_sum[:, :, :s], + attention_in=cur_attn_out[:s], + atten_mask=cur_attn_mask, + scale=softmax_scale, + pre_tokens=s, + next_tokens=0 if cur_attn_mask is not None else s, + keep_prob=keep_prob, + seed=rng_states[kv_block_id][0], + offset=rng_states[kv_block_id][1], + numels=rng_states[kv_block_id][2], + sparse_mode=3 if cur_attn_mask is not None else 0, + q_start_idx=[q_index_list[0] * s, ] if q_index_list is not None else q_index_list, + kv_start_idx=[kv_index_list[0] * s, ] if kv_index_list is not None else kv_index_list + ) + attn_grad_outs_r1c0 = npu_fusion_attention_grad( + cur_q[s:], cur_k, cur_v, cur_dout[s:], n, 'SBH', + pse=pse, + pse_type=pse_type, + padding_mask=None, + softmax_max=cur_softmax_max[:, :, s:], + softmax_sum=cur_softmax_sum[:, :, s:], + attention_in=cur_attn_out[s:], + atten_mask=cur_attn_mask, + scale=softmax_scale, + pre_tokens=s, + next_tokens=0 if cur_attn_mask is not None else s, + keep_prob=keep_prob, + seed=rng_states[kv_block_id][0], + offset=rng_states[kv_block_id][1], + numels=rng_states[kv_block_id][2], + sparse_mode=3 if cur_attn_mask is not None else 0, + q_start_idx=[q_index_list[1] * s, ] if q_index_list is not None else q_index_list, + kv_start_idx=[kv_index_list[0] * s, ] if kv_index_list is not None else kv_index_list + ) + attn_grad_outs = [] + attn_grad_outs.append(torch.cat([attn_grad_outs_r0c0[0], attn_grad_outs_r1c0[0]])) + attn_grad_outs.append(attn_grad_outs_r0c0[1] + attn_grad_outs_r1c0[1]) + attn_grad_outs.append(attn_grad_outs_r0c0[2] + attn_grad_outs_r1c0[2]) + + else: + attn_grad_outs_r1c0, attn_grad_outs_r1c1 = cal_row_grad( + cur_q, cur_k, cur_v, cur_dout, cur_softmax_max, cur_softmax_sum, cur_attn_out, + attn_grad_info, s, kv_block_id + ) + attn_grad_outs = [] + attn_grad_outs.append(attn_grad_outs_r1c0[0] + attn_grad_outs_r1c1[0]) + attn_grad_outs.append(torch.cat([attn_grad_outs_r1c0[1], attn_grad_outs_r1c1[1]])) + attn_grad_outs.append(torch.cat([attn_grad_outs_r1c0[2], attn_grad_outs_r1c1[2]])) + + + return attn_grad_outs + + + + +class AttentionWithCp(torch.autograd.Function): + """Attention implementation with context parallelism""" + + + @staticmethod + def forward(ctx, q, k, v, n, cp_para, softmax_scale=None, attn_mask=None, dropout_p=0., + packed_seq_params=None): + keep_prob = 1. - dropout_p + causal = cp_para['causal'] + cp_group = cp_para.get("cp_group") + cp_size = cp_para.get("cp_size") + rank = cp_para.get("rank") + cp_global_ranks = cp_para.get("cp_global_ranks") + cp_group_for_send_recv_overlap = cp_para.get("cp_group_for_send_recv_overlap") + # WARNING: Degrade to original ring attention, if ranks and comm groups for double ring are not provided + cp_inner_ranks = cp_para.get("cp_inner_ranks", [torch.distributed.get_rank()]) + cp_outer_ranks = cp_para.get("cp_outer_ranks", cp_global_ranks) + cp_group_for_intra_window = cp_para.get('cp_group_for_intra_window') + cp_group_for_intra_window_send_recv_overlap = cp_para.get('cp_group_for_intra_window_send_recv_overlap') + megatron_cp_in_bnsd = cp_para.get('megatron_cp_in_bnsd') + + pse = cp_para.get("pse") + pse_type = cp_para.get("pse_type") + + cache_policy = cp_para.get("cache_policy") + + inner_ring = RingP2P(cp_inner_ranks, cp_group_for_intra_window, cp_group_for_intra_window_send_recv_overlap) + outer_ring = RingP2P(cp_outer_ranks, cp_group, cp_group_for_send_recv_overlap) + inner_size = len(cp_inner_ranks) + outer_size = cp_size // inner_size + + actual_seq_kvlen = packed_seq_params.cu_seqlens_q.tolist() if packed_seq_params else None + actual_seq_qlen = packed_seq_params.cu_seqlens_kv.tolist() if packed_seq_params else None + is_eod_reset = (actual_seq_kvlen is not None) and (actual_seq_qlen is not None) + seq_len, bsz, hidden = q.shape + + if softmax_scale is None: + head_dim = q.shape[-1] // n + softmax_scale = head_dim ** (-0.5) + if causal and attn_mask is None: + attn_mask = torch.ones((2048, 2048), dtype=torch.bool, device=q.device) + attn_mask = torch.triu(attn_mask, diagonal=1) + + if causal: + if is_eod_reset: + # SBH -> TND + # fa varlen mode require TND layout + q, k, v = [sbh_to_tnd(x, n) for x in [q, k, v]] + + # only first half of each sub sequence KV block need to be calculated when i <= rank + kv_index = packed_seq_params.kv_index + # only last half of each sub sequence q block need to be calculated when i > rank + q_index = packed_seq_params.q_index + + sub_out_seq_len = (torch.tensor([0] + actual_seq_qlen)[1:] - torch.tensor([0] + actual_seq_qlen)[:-1]).tolist() + seq_lens = (actual_seq_qlen, actual_seq_kvlen, sub_out_seq_len) + half_seq_lens = [[x // 2 for x in lst] for lst in seq_lens] + fetch_ptrs = (seq_lens, half_seq_lens, q_index, kv_index) + + softmax_indices = get_selection_indices_for_tnd_softmax_update(q.shape[0], q.shape[1], half_seq_lens[2]).to(q.device) + else: + # split chunk[i]~chunk[cp_size-i-1] into chunk[i] and chunk[cp_size-i-1],, [2s, b, h] -> [2, s, b, h] + q, k, v = [x.view(2, x.shape[0] // 2, *x.shape[1:]) for x in [q, k, v]] + cur_kv = torch.cat((k.unsqueeze(0), v.unsqueeze(0)), dim=0) # [2, 2, s, b, h] + next_kv = torch.empty_like(cur_kv) + next_round_kv = torch.empty_like(cur_kv) + attn_out, softmax_max, softmax_sum = None, None, None + # (seed, offset, numels) for dropout mask + rng_states = [[0, 0, 0] for _ in range(cp_size)] + global_attn_outs = [attn_out, softmax_max, softmax_sum, rng_states] + q_block_id, kv_block_id, kv_block_id_outer = rank, rank, rank + + # kv cache list + k_cache_list = [] + v_cache_list = [] + + for j in range(outer_size): + kv_block_id = kv_block_id_outer + kv_block_offset = (kv_block_id // inner_size) * inner_size + if j < outer_size - 1: + outer_ring.async_send_recv(send_tensor=cur_kv, recv_tensor=next_round_kv) + for i in range(inner_size): + # wait until KV is received from recv_src + if i < inner_size - 1: + inner_ring.async_send_recv(send_tensor=cur_kv, recv_tensor=next_kv) + + cur_k, cur_v = cur_kv[0], cur_kv[1] # [2, s, b, h] + + # cache kv or k + if j * inner_size + i + 2 != cp_size: + if cache_policy == "full": + k_cache_list.append(cur_kv[0].clone()) + v_cache_list.append(cur_kv[1].clone()) + elif cache_policy == "half": + k_cache_list.append(cur_kv[0].clone()) + + if causal: + # flash attention forward + cur_sub_out_seq_len = None + attn_outs = None + if pse is None: + if is_eod_reset: + cur_q, cur_k, cur_v, cur_attn_mask, cur_seq_lens = tnd_forward_fetch(q_block_id, kv_block_id, q, cur_k, cur_v, + fetch_ptrs, attn_mask) + cur_seq_qlen, cur_seq_kvlen, cur_sub_out_seq_len = cur_seq_lens + # flash attention forward + attn_outs = torch_npu.npu_fusion_attention( + cur_q, cur_k, cur_v, n, "TND", + pse=None, + padding_mask=None, + atten_mask=cur_attn_mask, + scale=softmax_scale, + pre_tockens=cur_k.shape[0], + next_tockens=0 if cur_attn_mask is not None else cur_k.shape[0], + keep_prob=keep_prob, + sparse_mode=3 if cur_attn_mask is not None else 0, + actual_seq_qlen=cur_seq_qlen, + actual_seq_kvlen=cur_seq_kvlen + ) + else: + cur_q, cur_k, cur_v, cur_attn_mask = causal_forward_fetch(q_block_id, kv_block_id, + q, cur_k, cur_v, attn_mask) + + layout = "SBH" + pre_tockens_value = cur_k.shape[0] + if megatron_cp_in_bnsd: + cur_q = rearrange(cur_q, 's b (h d) -> b h s d', h=n).contiguous() + kv_n = cur_v.shape[2] // cur_q.shape[3] + cur_k, cur_v = [rearrange(x, 's b (h d) -> b h s d', h=kv_n).contiguous() for x in [cur_k, cur_v]] + layout = "BNSD" + pre_tockens_value = cur_k.shape[2] + + attn_outs = torch_npu.npu_fusion_attention( + cur_q, cur_k, cur_v, n, layout, + pse=None, + padding_mask=None, + atten_mask=cur_attn_mask, + scale=softmax_scale, + pre_tockens=pre_tockens_value, + next_tockens=0 if cur_attn_mask is not None else pre_tockens_value, + keep_prob=keep_prob, + sparse_mode=3 if cur_attn_mask is not None else 0 + ) + if megatron_cp_in_bnsd: + attn_outs = rearrange(attn_outs[0], 'b h s d -> s b (h d)').contiguous(), attn_outs[1], attn_outs[2] + else: + cur_q, cur_k, cur_v, cur_attn_mask = causal_forward_fetch(q_block_id, kv_block_id, + q, cur_k, cur_v, attn_mask) + q_index_list = [q_block_id, cp_size * 2 - 1 - q_block_id] + kv_index_list = [kv_block_id, cp_size * 2 - 1 - kv_block_id] + attn_info = [n, pse, pse_type, cur_attn_mask, softmax_scale, keep_prob, + q_index_list, kv_index_list] + s = q.shape[1] + attn_outs = flash_attention_with_alibi_pse( + q_block_id, kv_block_id, + (cur_q, cur_k, cur_v), + attn_info, + s + ) + if is_eod_reset: + global_attn_outs = tnd_out_update(q_block_id, kv_block_id, attn_outs, global_attn_outs, + q_index, softmax_indices, cur_sub_out_seq_len) + else: + global_attn_outs = causal_out_update(q_block_id, kv_block_id, attn_outs, global_attn_outs) + else: + # [2s, b, h], [b, n, 2s, 8], [b, n, 2s, 8] + this_mask = AttentionWithCp.compute_mask( + actual_seq_qlen, actual_seq_kvlen, + q_block_id, kv_block_id, + attn_mask + ) + + attn_outs = torch_npu.npu_fusion_attention( + q, cur_k, cur_v, n, "SBH", + pse=None, + padding_mask=None, + atten_mask=this_mask, + scale=softmax_scale, + pre_tockens=cur_k.shape[0], + next_tockens=cur_k.shape[0], + keep_prob=keep_prob, + sparse_mode=1 + ) + + global_attn_outs = general_out_update(q_block_id, kv_block_id, attn_outs, global_attn_outs) + + if inner_ring.wait(): + cur_kv, next_kv = next_kv, cur_kv # double buffer + kv_block_id = (kv_block_id + inner_size - 1) % inner_size + kv_block_offset + + if outer_ring.wait(): + cur_kv, next_round_kv = next_round_kv, cur_kv # double buffer + kv_block_id_outer = (kv_block_id_outer + cp_size - inner_size) % cp_size + + k_cache_list = k_cache_list if k_cache_list else [cur_kv[0].clone()] + v_cache_list = v_cache_list if v_cache_list else [cur_kv[1].clone()] + attn_mask = attn_mask if isinstance(attn_mask, list) else [attn_mask] + + attn_out, softmax_max, softmax_sum, rng_states = global_attn_outs + + if causal and not is_eod_reset: + q = q.view(-1, *q.shape[2:]) + k_cache_list = [x.view(-1, *x.shape[2:]) for x in k_cache_list] + v_cache_list = [x.view(-1, *x.shape[2:]) for x in v_cache_list] + + k_stack = torch.stack(k_cache_list) + v_stack = torch.stack(v_cache_list) + + ctx.save_for_backward(q, k_stack, v_stack, *attn_mask, attn_out, softmax_max, softmax_sum) + ctx.n = n + ctx.causal = causal + ctx.softmax_scale = softmax_scale + ctx.cp_group = cp_group + ctx.cp_size = cp_size + ctx.cp_rank = rank + ctx.cp_global_ranks = cp_global_ranks + ctx.cp_inner_ranks = cp_inner_ranks + ctx.cp_outer_ranks = cp_outer_ranks + ctx.cp_dkv_outer_ranks = cp_para.get('cp_dkv_outer_ranks', cp_global_ranks) + ctx.kv_block_id = kv_block_id + ctx.keep_prob = keep_prob + ctx.rng_states = rng_states + ctx.pse = pse + ctx.pse_type = pse_type + ctx.cp_group_for_send_recv_overlap = cp_group_for_send_recv_overlap + ctx.cp_group_for_intra_window = cp_group_for_intra_window + ctx.cp_group_for_intra_window_send_recv_overlap = cp_group_for_intra_window_send_recv_overlap + ctx.actual_seq_qlen = actual_seq_qlen + ctx.actual_seq_kvlen = actual_seq_kvlen + ctx.is_eod_reset = is_eod_reset + ctx.megatron_cp_in_bnsd = megatron_cp_in_bnsd + ctx.bsz = bsz + ctx.cache_policy = cache_policy + + if causal and is_eod_reset: + ctx.q_index = q_index + ctx.kv_index = kv_index + ctx.half_actual_seq_qlen = half_seq_lens[0] + ctx.half_actual_seq_kvlen = half_seq_lens[1] + ctx.half_sub_out_seq_len = half_seq_lens[2] + ctx.sub_out_seq_len = sub_out_seq_len + ctx.softmax_indices = softmax_indices + return tnd_to_sbh(attn_out, bsz) + + return attn_out + + @staticmethod + def backward(ctx, dout): + q, k_stack, v_stack, *attn_mask, attn_out, softmax_max, softmax_sum = ctx.saved_tensors + attn_mask = attn_mask[0] if len(attn_mask) == 1 else attn_mask + + n = ctx.n + causal = ctx.causal + softmax_scale = ctx.softmax_scale + cp_group = ctx.cp_group + cp_size = ctx.cp_size + rank = ctx.cp_rank + keep_prob = ctx.keep_prob + rng_states = ctx.rng_states + pse = ctx.pse + pse_type = ctx.pse_type + megatron_cp_in_bnsd = ctx.megatron_cp_in_bnsd + cp_group_for_send_recv_overlap = ctx.cp_group_for_send_recv_overlap + cp_group_for_intra_window = ctx.cp_group_for_intra_window + cp_group_for_intra_window_send_recv_overlap = ctx.cp_group_for_intra_window_send_recv_overlap + cache_policy = ctx.cache_policy + is_eod_reset = ctx.is_eod_reset + if causal and is_eod_reset: + dout = sbh_to_tnd(dout, n) + # Reversed order of forward + inner_size = len(ctx.cp_inner_ranks) + outer_size = len(ctx.cp_outer_ranks) + + intra_kv_comm = RingP2P(ctx.cp_inner_ranks, cp_group_for_intra_window, cp_group_for_intra_window_send_recv_overlap, is_backward=True) + intra_dkv_comm = RingP2P(ctx.cp_inner_ranks, cp_group_for_intra_window, cp_group_for_intra_window_send_recv_overlap, is_backward=True) + inter_kv_comm = RingP2P(ctx.cp_outer_ranks, cp_group, cp_group_for_send_recv_overlap, is_backward=True) + inter_dkv_comm = RingP2P(ctx.cp_dkv_outer_ranks, cp_group, cp_group_for_send_recv_overlap, is_backward=True) + + + if causal: + if is_eod_reset: + half_softmax_max = softmax_max.view(-1, 8)[ctx.softmax_indices].view(-1, n, 8) + half_softmax_sum = softmax_sum.view(-1, 8)[ctx.softmax_indices].view(-1, n, 8) + else: + # split chunk[i]~chunk[cp_size-i-1] into chunk[i] and chunk[cp_size-i-1], [2s, b, h] -> [2, s, b, h] + q, attn_out, dout = [x.view(2, x.shape[0] // 2, *x.shape[1:]) for x in [q, attn_out, dout]] + k_stack = [x.view(2, x.shape[0] // 2, *x.shape[1:]) for x in k_stack] + v_stack = [x.view(2, x.shape[0] // 2, *x.shape[1:]) for x in v_stack] + # [b, n, 2s, 8] -> [b, n, 2, s, 8] + softmax_max = softmax_max.view(softmax_max.shape[0], softmax_max.shape[1], + 2, softmax_max.shape[2] // 2, softmax_max.shape[-1]) + softmax_sum = softmax_sum.view(softmax_sum.shape[0], softmax_sum.shape[1], + 2, softmax_sum.shape[2] // 2, softmax_sum.shape[-1]) + + def backward_step_helper(q_block_id, kv_block_id, q, cur_k, cur_v): + if causal: + if pse is None: + # flash attention backward + if is_eod_reset: + softmax_values = (softmax_max, softmax_sum, half_softmax_max, half_softmax_sum) + seq_lens = (ctx.actual_seq_qlen, ctx.actual_seq_kvlen, ctx.half_actual_seq_qlen, ctx.half_actual_seq_kvlen) + index_values = (ctx.q_index, ctx.kv_index) + step_inputs = tnd_backward_fetch(q_block_id, kv_block_id, q, cur_k, cur_v, attn_out, dout, + softmax_values, seq_lens, index_values, attn_mask=attn_mask) + qkv, cur_attn_out, cur_dout, cur_softmax_values, cur_attn_mask, cur_seq_lens = step_inputs + cur_q, cur_k, cur_v = qkv + cur_softmax_max, cur_softmax_sum = cur_softmax_values + cur_seq_qlen, cur_seq_kvlen = cur_seq_lens + + # flash attention backward + attn_grad_outs = torch_npu.npu_fusion_attention_grad( + cur_q, cur_k, cur_v, cur_dout, n, + "TND", + pse=None, + padding_mask=None, + atten_mask=cur_attn_mask, + softmax_max=cur_softmax_max, + softmax_sum=cur_softmax_sum, + attention_in=cur_attn_out, + scale_value=softmax_scale, + pre_tockens=cur_k.shape[0], + next_tockens=0 if cur_attn_mask is not None else cur_k.shape[0], + sparse_mode=3 if cur_attn_mask is not None else 0, + actual_seq_qlen=cur_seq_qlen, + actual_seq_kvlen=cur_seq_kvlen, + keep_prob=keep_prob, + seed=rng_states[kv_block_id][0], + offset=rng_states[kv_block_id][1], + numels=rng_states[kv_block_id][2], + ) + else: + step_inputs = causal_backward_fetch(q_block_id, kv_block_id, q, cur_k, cur_v, attn_out, dout, + softmax_max, softmax_sum, attn_mask=attn_mask) + cur_q, cur_k, cur_v, cur_attn_out, cur_dout, cur_softmax_max, cur_softmax_sum, cur_attn_mask = step_inputs + layout = "SBH" + pre_tockens_value = cur_k.shape[0] + if megatron_cp_in_bnsd: + cur_q, cur_dout, cur_attn_out = [rearrange(x, 's b (h d) -> b h s d', h=n).contiguous() for x in [cur_q, cur_dout, cur_attn_out]] + kv_n = cur_v.shape[2] // cur_q.shape[3] + cur_k, cur_v = [rearrange(x, 's b (h d) -> b h s d', h=kv_n).contiguous() for x in [cur_k, cur_v]] + layout = "BNSD" + pre_tockens_value = cur_k.shape[2] + + attn_grad_outs = torch_npu.npu_fusion_attention_grad( + cur_q, cur_k, cur_v, cur_dout, n, + layout, + pse=None, + padding_mask=None, + atten_mask=cur_attn_mask, + softmax_max=cur_softmax_max, + softmax_sum=cur_softmax_sum, + attention_in=cur_attn_out, + scale_value=softmax_scale, + pre_tockens=pre_tockens_value, + next_tockens=0 if cur_attn_mask is not None else pre_tockens_value, + sparse_mode=3 if cur_attn_mask is not None else 0, + keep_prob=keep_prob, + seed=rng_states[kv_block_id][0], + offset=rng_states[kv_block_id][1], + numels=rng_states[kv_block_id][2], + ) + if megatron_cp_in_bnsd: + attn_grad_outs = [rearrange(x, 'b h s d -> s b (h d)').contiguous() for x in [attn_grad_outs[0], attn_grad_outs[1], attn_grad_outs[2]]] + else: + step_inputs = causal_backward_fetch(q_block_id, kv_block_id, q, cur_k, cur_v, attn_out, dout, + softmax_max, softmax_sum, attn_mask=attn_mask) + cur_q, cur_k, cur_v, cur_attn_out, cur_dout, cur_softmax_max, cur_softmax_sum, cur_attn_mask = step_inputs + q_index_list = [q_block_id, cp_size * 2 - 1 - q_block_id] + kv_index_list = [kv_block_id, cp_size * 2 - 1 - kv_block_id] + attn_grad_info = [n, pse, pse_type, cur_attn_mask, softmax_scale, keep_prob, rng_states, + q_index_list, kv_index_list] + s = q.shape[1] + attn_grad_outs = flash_attention_with_alibi_pse_grad( + q_block_id, kv_block_id, + (cur_q, cur_k, cur_v), cur_dout, cur_attn_out, + cur_softmax_max, cur_softmax_sum, + attn_grad_info, s + ) + + cur_dq, cur_dk, cur_dv = attn_grad_outs[0], attn_grad_outs[1], attn_grad_outs[2] + + + else: + this_mask = AttentionWithCp.compute_mask( + ctx.actual_seq_qlen, ctx.actual_seq_kvlen, + q_block_id, kv_block_id, + attn_mask + ) + attn_grad_outs = torch_npu.npu_fusion_attention_grad( + q, cur_k, cur_v, dout, n, + "SBH", + pse=None, + padding_mask=None, + atten_mask=this_mask, + softmax_max=softmax_max, + softmax_sum=softmax_sum, + attention_in=attn_out, + scale_value=softmax_scale, + pre_tockens=cur_k.shape[0], + next_tockens=cur_k.shape[0], + sparse_mode=1, + keep_prob=keep_prob, + seed=rng_states[kv_block_id][0], + offset=rng_states[kv_block_id][1], + numels=rng_states[kv_block_id][2], + ) + cur_dq, cur_dk, cur_dv = attn_grad_outs[0], attn_grad_outs[1], attn_grad_outs[2] + + return cur_dq, cur_dk, cur_dv + + + cur_dkv = torch.zeros((2, *k_stack[-1].shape), dtype=k_stack[-1].dtype, device=k_stack[-1].device) + next_dkv = cur_dkv.clone() + next_round_dkv = cur_dkv.clone() + + q_block_id, kv_block_id, kv_block_id_outer = rank, ctx.kv_block_id, ctx.kv_block_id + + outer_data = (outer_size, inter_kv_comm) + inner_data = (inner_size, intra_kv_comm) + cp_kv_cache = ContextParallelKVCache(cache_policy, outer_data, inner_data, k_stack, v_stack) + + dq = torch.zeros_like(q) # [2, s, b, h] + for j in range(outer_size): + kv_block_id = kv_block_id_outer + kv_block_offset = (kv_block_id // inner_size) * inner_size + + cp_kv_cache.communicate_outer_ring_kv(j) + + for i in range(inner_size): + cur_k, cur_v = cp_kv_cache.communicate_inner_ring_kv(i) + + dq_step, dk_step, dv_step = backward_step_helper(q_block_id, kv_block_id, q, cur_k, cur_v) + + if i == 0 and j > 0: # receive dk dv from last window + inter_dkv_comm.wait() + cur_dkv, next_round_dkv = next_round_dkv, cur_dkv + elif i > 0: # receive dk dv from last step + intra_dkv_comm.wait() + cur_dkv, next_dkv = next_dkv, cur_dkv + + dk, dv = cur_dkv[0], cur_dkv[1] + # update qkv grades + if is_eod_reset and causal: + tnd_grad_update(q_block_id, kv_block_id, (dq_step, dk_step, dv_step), (dq, dk, dv), + ctx.q_index, ctx.kv_index) + elif causal: + causal_grad_update(q_block_id, kv_block_id, dq_step, dk_step, dv_step, dq, dk, dv) + else: + dq.add_(dq_step) + dk.add_(dk_step) + dv.add_(dv_step) + + if i + 1 != inner_size: + intra_dkv_comm.async_send_recv(send_tensor=cur_dkv, recv_tensor=next_dkv) + + kv_block_id = (kv_block_id + 1) % inner_size + kv_block_offset + + if intra_dkv_comm.wait(): + cur_dkv, next_dkv = next_dkv, cur_dkv + + if j + 1 != outer_size: + inter_dkv_comm.async_send_recv(send_tensor=cur_dkv, recv_tensor=next_round_dkv) + + kv_block_id_outer = (kv_block_id_outer + inner_size) % cp_size + + if inter_dkv_comm.wait(): + cur_dkv, next_round_dkv = next_round_dkv, cur_dkv + + dk, dv = cur_dkv[0], cur_dkv[1] + + + # [2, s, b, h] -> [2s, b, h] + if causal and is_eod_reset: + dq, dk, dv = [tnd_to_sbh(x, ctx.bsz) for x in [dq, dk, dv]] + elif causal: + dq, dk, dv = [x.view(-1, *x.shape[2:]) for x in [dq, dk, dv]] + return dq, dk, dv, None, None, None, None, None, None, None + + @classmethod + def compute_mask(cls, actual_seq_qlen, actual_seq_kvlen, q_block_id, kv_block_id, attn_mask): + from bisect import bisect_right + from mindspeed.utils import batch_index + + if actual_seq_qlen: + seq_len = actual_seq_qlen[-1] // AttentionWithCp.batch_size + actual_seq_qlen = batch_index(actual_seq_qlen, seq_len) + actual_seq_kvlen = batch_index(actual_seq_kvlen, seq_len) + block_size = cls.block_size + actual_seq_qlen = [[0] + lst for lst in actual_seq_qlen] + sub_seq_qlen = [torch.tensor(x[1:]) - torch.tensor(x[:-1]) for x in actual_seq_qlen] + sub_seq_qid = torch.stack([torch.arange(len(lst)).repeat_interleave(lst) for lst in sub_seq_qlen]).npu() # B S + + this_ids = sub_seq_qid[:, q_block_id * block_size:(q_block_id + 1) * block_size].npu() + this_tile = this_ids.unsqueeze(dim=2) # B S 1 + + actual_seq_kvlen = [[0] + lst for lst in actual_seq_kvlen] + sub_seq_kvlen = [torch.tensor(x[1:]) - torch.tensor(x[:-1]) for x in actual_seq_kvlen] + sub_seq_kvid = torch.stack([torch.arange(len(lst)).repeat_interleave(lst) for lst in sub_seq_kvlen]).npu() # B S + other_ids = sub_seq_kvid[:, kv_block_id * block_size:(kv_block_id + 1) * block_size].npu() + other_tile = other_ids.unsqueeze(dim=1) # B 1 S + + mask = this_tile == other_tile # B S S + if kv_block_id > q_block_id: + mask = torch.zeros_like(mask) + elif kv_block_id == q_block_id: + mask = torch.tril(mask) + + return torch.logical_not(mask).unsqueeze(dim=1).npu() # B 1 S S + else: + return attn_mask[kv_block_id] if isinstance(attn_mask, list) else None + + +def ringattn_context_parallel(q, k, v, n, cp_para, softmax_scale=None, attn_mask=None, dropout_p=0., + packed_seq_params=None): + AttentionWithCp.block_size = q.shape[0] + AttentionWithCp.batch_size = q.shape[1] + out = AttentionWithCp.apply( + q, k, v, n, cp_para, softmax_scale, attn_mask, dropout_p, + packed_seq_params + ) + return out diff --git a/model/train/yoco_moe/mindspeed/core/context_parallel/ring_context_parallel_for_ampipe.py b/model/train/yoco_moe/mindspeed/core/context_parallel/ring_context_parallel_for_ampipe.py new file mode 100644 index 000000000..9a69a246e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/context_parallel/ring_context_parallel_for_ampipe.py @@ -0,0 +1,503 @@ +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch_npu +from megatron.training import get_args +from megatron.core import mpu + +from mindspeed.core.parallel_state import get_context_parallel_prev_rank, get_context_parallel_next_rank +from .utils import RingP2P, forward_update + + +def flash_attention_backward(qkvn, dy, softmax_max, softmax_sum, atten_out, attn_mask=None, softmax_scale=1., + keep_prob=1., seed=0, offset=0, numels=0): + """FlashAttention backward""" + q, k, v, n = qkvn + next_tockens = 0 if attn_mask is not None else k.shape[0] + sparse_mode = 3 if attn_mask is not None else 0 + output = torch_npu.npu_fusion_attention_grad( + q, k, v, dy, n, + "SBH", + pse=None, + padding_mask=None, + atten_mask=attn_mask, + softmax_max=softmax_max, + softmax_sum=softmax_sum, + attention_in=atten_out, + scale_value=softmax_scale, + pre_tockens=k.shape[0], + next_tockens=next_tockens, + sparse_mode=sparse_mode, + keep_prob=keep_prob, + seed=seed, + offset=offset, + numels=numels + ) + return output + + +def flash_attention_forward(qkvn, attn_mask=None, softmax_scale=1., keep_prob=1.): + """FlashAttention forward""" + q, k, v, n = qkvn + next_tockens = 0 if attn_mask is not None else k.shape[0] + sparse_mode = 3 if attn_mask is not None else 0 + + output = torch_npu.npu_fusion_attention( + q, k, v, n, "SBH", + pse=None, + padding_mask=None, + atten_mask=attn_mask, + scale=softmax_scale, + pre_tockens=k.shape[0], + next_tockens=next_tockens, + keep_prob=keep_prob, + sparse_mode=sparse_mode + ) + return output + + +def attn_with_cp_for_ampipe_forward(ctx, fa_cp_fwd_args, + fa_fwd_args, + dropout_p=0.): + args = get_args() + q, k, v = fa_cp_fwd_args.q, fa_cp_fwd_args.k, fa_cp_fwd_args.v + tensor_list, n = fa_fwd_args.flash_tensor_list, fa_fwd_args.head_num + kv_list, o_max_sum_list, ampipe_idx = fa_fwd_args.kv_list, fa_fwd_args.o_max_sum_list, fa_fwd_args.cur_degree + if kv_list is None: + kv_list = [] + if o_max_sum_list is None: + o_max_sum_list = [] + keep_prob = 1. - dropout_p + if args.ampipe_degree > 2: + raise RuntimeError(f"Context parallel only support ampipe_degree is 2, but got {args.ampipe_degree}") + + head_dim = q.shape[-1] // n + softmax_scale = head_dim ** (-0.5) + + rank = mpu.get_context_parallel_rank() + cp_global_ranks = mpu.get_context_parallel_global_ranks() + prev_rank = get_context_parallel_prev_rank() + next_rank = get_context_parallel_next_rank() + cp_size = mpu.get_context_parallel_world_size() + cp_group = mpu.get_context_parallel_group() + cp_group_for_send_recv_overlap = mpu.get_context_parallel_group_for_send_recv_overlap() if args.use_cp_send_recv_overlap else cp_group + send_recv_comm = RingP2P(cp_global_ranks, cp_group, cp_group_for_send_recv_overlap) + attn_mask = torch.ones((2048, 2048), dtype=torch.bool, device=q.device) + attn_mask = torch.triu(attn_mask, diagonal=1) + if ampipe_idx == 0: + # split chunk[i]~chunk[2cp-1-i] into chunk[i] and chunk[2cp-1-i],, [2s, b, h] -> [2, s, b, h] + q, k, v = [x.view(2, x.shape[0] // 2, *x.shape[1:]) for x in [q, k, v]] + # (seed, offset, numels) for dropout mask + rng_states_qa_kva = [[0, 0, 0] for _ in range(cp_size)] + rng_states_qb_kva = [[0, 0, 0] for _ in range(cp_size)] + rng_states_qb_kvb = [[0, 0, 0] for _ in range(cp_size)] + send_kv = torch.cat((k.unsqueeze(0), v.unsqueeze(0)), dim=0) # [2, 2, s, b, h] + recv_kv = None + # chunk[i] + attn_out_a, softmax_max_a, softmax_sum_a = None, None, None + # chunk[2cp-1-i] + attn_out_b, softmax_max_b, softmax_sum_b = None, None, None + + for i in range(cp_size): + # wait until KV is received from recv_src + if send_recv_comm.wait(): + send_kv = recv_kv + kv_list.append(send_kv) # tmp buffer for next ampipe + if i < cp_size - 1: + recv_kv = torch.empty_like(send_kv) + send_recv_comm.async_send_recv(send_kv, recv_kv) + if i == 0: + qa, ka, va = [x[0] for x in [q, k, v]] + qb, kb, vb = [x[1] for x in [q, k, v]] + + attn_outs_a = flash_attention_forward((qa, ka, va, n), + attn_mask=attn_mask, softmax_scale=softmax_scale, + keep_prob=keep_prob) + attn_outs_b = flash_attention_forward((qb, kb, vb, n), + attn_mask=attn_mask, softmax_scale=softmax_scale, + keep_prob=keep_prob) + attn_out_a, softmax_max_a, softmax_sum_a = attn_outs_a[0], attn_outs_a[1], attn_outs_a[2] + attn_out_b, softmax_max_b, softmax_sum_b = attn_outs_b[0], attn_outs_b[1], attn_outs_b[2] + # seed, offset, numels (for dropout) + rng_states_qa_kva[i] = (attn_outs_a[4], attn_outs_a[5], attn_outs_a[6]) + rng_states_qb_kvb[i] = (attn_outs_b[4], attn_outs_b[5], attn_outs_b[6]) + else: + cur_k, cur_v = send_kv[0], send_kv[1] # [2, s, b, h] + + if i <= rank: + qa, ka, va = [x[0] for x in [q, cur_k, cur_v]] + attn_outs_a = flash_attention_forward((qa, ka, va, n), + attn_mask=None, softmax_scale=softmax_scale, + keep_prob=keep_prob) + cur_attn_out_a, cur_softmax_max_a, cur_softmax_sum_a = attn_outs_a[0], attn_outs_a[1], attn_outs_a[ + 2] + rng_states_qa_kva[i] = (attn_outs_a[4], attn_outs_a[5], attn_outs_a[6]) + attn_out_updated, softmax_max_updated, softmax_sum_updated = forward_update( + attn_out_a, softmax_max_a, softmax_sum_a, + cur_attn_out_a, cur_softmax_max_a, cur_softmax_sum_a + ) + attn_out_a, softmax_max_a, softmax_sum_a = attn_out_updated, softmax_max_updated, softmax_sum_updated + else: + kv_idx = i - rank - 1 + kv = kv_list[kv_idx] + cur_k, cur_v = kv[0], kv[1] + qb = q[1] + ka, va = [x[0] for x in [cur_k, cur_v]] + + attn_outs_b = flash_attention_forward((qb, ka, va, n), + attn_mask=None, softmax_scale=softmax_scale) + cur_attn_out_b, cur_softmax_max_b, cur_softmax_sum_b = attn_outs_b[0], attn_outs_b[1], attn_outs_b[ + 2] + rng_states_qb_kva[kv_idx] = (attn_outs_b[4], attn_outs_b[5], attn_outs_b[6]) + + attn_out_updated, softmax_max_updated, softmax_sum_updated = forward_update( + attn_out_b, softmax_max_b, softmax_sum_b, + cur_attn_out_b, cur_softmax_max_b, cur_softmax_sum_b + ) + attn_out_b, softmax_max_b, softmax_sum_b = attn_out_updated, softmax_max_updated, softmax_sum_updated + + attn_out_all = torch.cat((attn_out_a.unsqueeze(0), attn_out_b.unsqueeze(0)), dim=0) + softmax_max_all = torch.cat((softmax_max_a.unsqueeze(0), softmax_max_b.unsqueeze(0)), dim=0) + softmax_sum_all = torch.cat((softmax_sum_a.unsqueeze(0), softmax_sum_b.unsqueeze(0)), dim=0) + o_max_sum_list.append(attn_out_all) + o_max_sum_list.append(softmax_max_all) + o_max_sum_list.append(softmax_sum_all) + + k, v = send_kv[0], send_kv[1] + q, k, v = [x.view(-1, *x.shape[2:]) for x in [q, k, v]] # [2s, b, h] + attn_out = attn_out_a + else: + q = q.view(2, q.shape[0] // 2, *q.shape[1:]) + qb = q[1] + attn_out_all, softmax_max_all, softmax_sum_all = o_max_sum_list + attn_out_b, softmax_max_b, softmax_sum_b = attn_out_all[1], softmax_max_all[1], softmax_sum_all[1] + rng_states_qa_kva = ctx.rng_states_qa_kva + rng_states_qb_kva = ctx.rng_states_qb_kva + rng_states_qb_kvb = ctx.rng_states_qb_kvb + + start_a_idx = cp_size - rank - 1 + start_b_idx = rank + 1 + + for i in range(cp_size): + cur_kv = kv_list[i] + cur_k, cur_v = cur_kv[0], cur_kv[1] + if i >= start_a_idx: + ka, va = cur_k[0], cur_v[0] + + attn_outs_b = flash_attention_forward((qb, ka, va, n), + attn_mask=None, softmax_scale=softmax_scale) + cur_attn_out_b, cur_softmax_max_b, cur_softmax_sum_b = attn_outs_b[0], attn_outs_b[1], attn_outs_b[2] + rng_states_qb_kva[i] = (attn_outs_b[4], attn_outs_b[5], attn_outs_b[6]) + attn_out_updated, softmax_max_updated, softmax_sum_updated = forward_update( + attn_out_b, softmax_max_b, softmax_sum_b, + cur_attn_out_b, cur_softmax_max_b, cur_softmax_sum_b + ) + attn_out_b, softmax_max_b, softmax_sum_b = attn_out_updated, softmax_max_updated, softmax_sum_updated + if i >= start_b_idx: + kb, vb = cur_k[1], cur_v[1] + attn_outs_b = flash_attention_forward((qb, kb, vb, n), + attn_mask=None, softmax_scale=softmax_scale) + cur_attn_out_b, cur_softmax_max_b, cur_softmax_sum_b = attn_outs_b[0], attn_outs_b[1], attn_outs_b[2] + rng_states_qb_kvb[i] = (attn_outs_b[4], attn_outs_b[5], attn_outs_b[6]) + attn_out_updated, softmax_max_updated, softmax_sum_updated = forward_update( + attn_out_b, softmax_max_b, softmax_sum_b, + cur_attn_out_b, cur_softmax_max_b, cur_softmax_sum_b + ) + attn_out_b, softmax_max_b, softmax_sum_b = attn_out_updated, softmax_max_updated, softmax_sum_updated + kv = kv_list[-1] + k, v = kv[0], kv[1] + q, k, v = [x.view(-1, *x.shape[2:]) for x in [q, k, v]] # [2s, b, h] + attn_out = attn_out_b + attn_out_all[1], softmax_max_all[1], softmax_sum_all[1] = attn_out_b, softmax_max_b, softmax_sum_b + + tensor_list.extend([q, k, v, attn_mask, softmax_max_all, softmax_sum_all]) + + ctx.n = n + ctx.rank = rank + ctx.keep_prob = keep_prob + ctx.cp_size = cp_size + ctx.cp_group = cp_group + ctx.prev_rank = prev_rank + ctx.next_rank = next_rank + ctx.cp_group_for_send_recv_overlap = cp_group_for_send_recv_overlap + ctx.softmax_scale = softmax_scale + ctx.rng_states_qa_kva = rng_states_qa_kva + ctx.rng_states_qb_kva = rng_states_qb_kva + ctx.rng_states_qb_kvb = rng_states_qb_kvb + return attn_out + + +def attn_with_cp_for_ampipe_backward(ctx, attn_out, saved_tensor_list, dout, fa_bwd_args): + args = get_args() + kv_list, dkv_list, dout_list, ampipe_idx = (fa_bwd_args.kv_list, fa_bwd_args.dkv_list, + fa_bwd_args.dout_list, fa_bwd_args.cur_degree) + + if kv_list is None: + kv_list = [] + if dkv_list is None: + dkv_list = [] + if dout_list is None: + dout_list = [] + if args.ampipe_degree > 2: + raise RuntimeError(f"Context parallel only support ampipe_degree is 2, but got {args.ampipe_degree}") + + q, k, v, attn_mask, softmax_max, softmax_sum = saved_tensor_list + n = ctx.n + rank = ctx.rank + softmax_scale = ctx.softmax_scale + cp_size = ctx.cp_size + cp_group = ctx.cp_group + cp_group_for_send_recv_overlap = ctx.cp_group_for_send_recv_overlap + cp_global_ranks = mpu.get_context_parallel_global_ranks() + keep_prob = ctx.keep_prob + rng_states_qa_kva = ctx.rng_states_qa_kva + rng_states_qb_kva = ctx.rng_states_qb_kva + rng_states_qb_kvb = ctx.rng_states_qb_kvb + # [2s, b, h] -> [2, s, b, h] + q, k, v = [x.view(2, x.shape[0] // 2, *x.shape[1:]) for x in [q, k, v]] + + attn_out_a, softmax_max_a, softmax_sum_a = attn_out[0], softmax_max[0], softmax_sum[0] + attn_out_b, softmax_max_b, softmax_sum_b = attn_out[1], softmax_max[1], softmax_sum[1] + + if ampipe_idx == 0: + send_recv_comm = RingP2P(cp_global_ranks, cp_group, cp_group_for_send_recv_overlap, is_backward=True) + dq, dk, dv = None, None, None + recv_kv_dkv = None + recv_kv = None + recv_dkv = None + # [s, b, h] + qa, ka, va = [x[0] for x in [q, k, v]] + qb, kb, vb = [x[1] for x in [q, k, v]] + dq_b = torch.zeros_like(qb) + dk = torch.zeros_like(k) + dv = torch.zeros_like(v) + kv = torch.cat((k.unsqueeze(0), v.unsqueeze(0)), dim=0) + send_kv_dkv = torch.empty((2, *kv.shape), dtype=kv.dtype, device=kv.device) + + for i in range(cp_size): + # wait until KV is received from recv_src + if send_recv_comm.wait(): + # only received kv in the second loop + if i == 1: + send_kv = recv_kv + send_kv_dkv[0].copy_(send_kv) + else: + send_kv_dkv = recv_kv_dkv + if i > 0: + dkv = torch.cat((dk.unsqueeze(0), dv.unsqueeze(0)), dim=0) + send_kv_dkv[1].copy_(dkv) + + # just send-recv kv in the first loop + if i == 0: + send_kv = kv + recv_kv = torch.empty_like(send_kv) + send_recv_comm.async_send_recv(send_kv, recv_kv) + kv_list.append(send_kv) + # just send-recv dkv in the last loop + elif i == cp_size - 1: + send_dkv = send_kv_dkv[1] + recv_dkv = torch.empty_like(send_dkv) + send_recv_comm.async_send_recv(send_dkv, recv_dkv) + cur_k, cur_v = send_kv_dkv[0][0], send_kv_dkv[0][1] + ka, va = cur_k[0], cur_v[0] + kv_list.append(send_kv_dkv[0]) + else: + recv_kv_dkv = torch.empty_like(send_kv_dkv) + send_recv_comm.async_send_recv(send_kv_dkv, recv_kv_dkv) + cur_k, cur_v = send_kv_dkv[0][0], send_kv_dkv[0][1] + ka, va = cur_k[0], cur_v[0] + kv_list.append(send_kv_dkv[0]) + + attn_grad_outs_b = flash_attention_backward( + (qb, ka, va, n), + dout, softmax_max_b, softmax_sum_b, attn_out_b, + None, softmax_scale, keep_prob, rng_states_qb_kva[cp_size - i - 1][0], + rng_states_qb_kva[cp_size - i - 1][1], rng_states_qb_kva[cp_size - i - 1][2] + ) + + cur_dq_b, cur_dk_a, cur_dv_a = attn_grad_outs_b[0], attn_grad_outs_b[1], attn_grad_outs_b[2] + if i == 0: + dq_b = cur_dq_b + dk[0].copy_(cur_dk_a) + dv[0].copy_(cur_dv_a) + else: + # wait until dKV is received from recv_src + send_recv_comm.wait() + # only received dkv in the last loop + if i == cp_size - 1: + dkv = recv_dkv + else: + send_kv_dkv = recv_kv_dkv + dkv = send_kv_dkv[1] + dk, dv = dkv[0], dkv[1] + dq_b.add_(cur_dq_b) + dk[0].add_(cur_dk_a) + dv[0].add_(cur_dv_a) + dkv_list.append(dq_b) + dkv_list.append(dk[0]) + dkv_list.append(dv[0]) + dout_list.append(dout) + else: + send_recv_comm = RingP2P(cp_global_ranks, cp_group, cp_group_for_send_recv_overlap) + kv_list.reverse() + + recv_dkv = None + # [s, b, h] + qa, ka, va = [x[0] for x in [q, k, v]] + qb, kb, vb = [x[1] for x in [q, k, v]] + dq_a, dk_a, dv_a, dq_b, dk_b, dv_b = [torch.zeros_like(x) for x in [qa, ka, va, qb, kb, vb]] + send_dkv = torch.empty((2, 2, *ka.shape), dtype=ka.dtype, device=ka.device) + + for i in range(cp_size): + # the first loop no send-recv + if i > 0: + if i <= rank + 1: + if i <= rank: + dkv_a = torch.cat((dk_a.unsqueeze(0), dv_a.unsqueeze(0)), dim=0) + # send_dkv = dkv_a + send_dkv[0].copy_(dkv_a) + else: + dkv_b = torch.cat((dk_b.unsqueeze(0), dv_b.unsqueeze(0)), dim=0) + # send_dkv = dkv_b + send_dkv[1].copy_(dkv_b) + else: + dkv_a = torch.cat((dk_a.unsqueeze(0), dv_a.unsqueeze(0)), dim=0) + dkv_b = torch.cat((dk_b.unsqueeze(0), dv_b.unsqueeze(0)), dim=0) + dkv = torch.cat((dkv_a.unsqueeze(0), dkv_b.unsqueeze(0)), dim=0) + send_dkv = dkv + + recv_dkv = torch.empty_like(send_dkv) + send_recv_comm.async_send_recv(send_dkv, recv_dkv) + + if i == cp_size - 1: + cur_kv = kv_list[0] + ka, va = cur_kv[0][0], cur_kv[1][0] + kb, vb = cur_kv[0][1], cur_kv[1][1] + attn_grad_outs_a = flash_attention_backward( + (qa, ka, va, n), + dout, softmax_max_a, softmax_sum_a, attn_out_a, + attn_mask, softmax_scale, keep_prob, + rng_states_qa_kva[0][0], rng_states_qa_kva[0][1], rng_states_qa_kva[0][2] + ) + attn_grad_outs_b = flash_attention_backward( + (qb, kb, vb, n), + dout_list[0], softmax_max_b, softmax_sum_b, attn_out_b, + attn_mask, softmax_scale, keep_prob, + rng_states_qb_kvb[0][0], rng_states_qb_kvb[0][1], rng_states_qb_kvb[0][2] + ) + cur_dq_a, cur_dk_a, cur_dv_a = attn_grad_outs_a[0], attn_grad_outs_a[1], attn_grad_outs_a[2] + cur_dq_b, cur_dk_b, cur_dv_b = attn_grad_outs_b[0], attn_grad_outs_b[1], attn_grad_outs_b[2] + elif i < rank: + cur_kv = kv_list[i + 1] + ka, va = cur_kv[0][0], cur_kv[1][0] + attn_grad_outs_a = flash_attention_backward( + (qa, ka, va, n), + dout, softmax_max_a, softmax_sum_a, attn_out_a, + None, softmax_scale, keep_prob, + rng_states_qa_kva[i + 1][0], rng_states_qa_kva[i + 1][1], rng_states_qa_kva[i + 1][2] + ) + cur_dq_a, cur_dk_a, cur_dv_a = attn_grad_outs_a[0], attn_grad_outs_a[1], attn_grad_outs_a[2] + else: + cur_kv = kv_list[i + 1] + kb, vb = cur_kv[0][1], cur_kv[1][1] + attn_grad_outs_b = flash_attention_backward( + (qb, kb, vb, n), + dout_list[0], softmax_max_b, softmax_sum_b, attn_out_b, + None, softmax_scale, keep_prob, + rng_states_qb_kvb[i + 1][0], rng_states_qb_kvb[i + 1][1], rng_states_qb_kvb[i + 1][2] + ) + cur_dq_b, cur_dk_b, cur_dv_b = attn_grad_outs_b[0], attn_grad_outs_b[1], attn_grad_outs_b[2] + + if i == 0: + if rank == 0: + dq_b, dk_b, dv_b = cur_dq_b, cur_dk_b, cur_dv_b + else: + dq_a, dk_a, dv_a = cur_dq_a, cur_dk_a, cur_dv_a + else: + # wait until dKV is received from recv_src + send_recv_comm.wait() + + if i < cp_size - 1: + if rank == 0: + dkv_a = recv_dkv[0] + dk_a, dv_a = dkv_a[0], dkv_a[1] + + dq_b.add_(cur_dq_b) + dk_b, dv_b = cur_dk_b, cur_dv_b + elif i <= rank: + if i == rank: + dkv_b = recv_dkv[1] + dk_b, dv_b = dkv_b[0], dkv_b[1] + + dq_b.add_(cur_dq_b) + dk_b.add_(cur_dk_b) + dv_b.add_(cur_dv_b) + else: + dkv_a = recv_dkv[0] + dk_a, dv_a = dkv_a[0], dkv_a[1] + + dq_a.add_(cur_dq_a) + dk_a.add_(cur_dk_a) + dv_a.add_(cur_dv_a) + else: + dkv = recv_dkv + dkv_a, dkv_b = dkv[0], dkv[1] + dk_a, dv_a = dkv_a[0], dkv_a[1] + dk_b, dv_b = dkv_b[0], dkv_b[1] + + dq_b.add_(cur_dq_b) + dk_b.add_(cur_dk_b) + dv_b.add_(cur_dv_b) + else: + prev_dq_b, prev_dk_a, prev_dv_a = dkv_list + if rank == 0: + dkv_a = recv_dkv[0] + dk_a, dv_a = dkv_a[0], dkv_a[1] + + dq_a = cur_dq_a + dk_a.add_(cur_dk_a) + dv_a.add_(cur_dv_a) + dk_b, dv_b = cur_dk_b, cur_dv_b + elif rank == cp_size - 1: + dkv_b = recv_dkv[1] + dk_b, dv_b = dkv_b[0], dkv_b[1] + + dq_a.add_(cur_dq_a) + dk_a, dv_a = cur_dk_a, cur_dv_a + dk_b.add_(cur_dk_b) + dv_b.add_(cur_dv_b) + else: + dkv = recv_dkv + dkv_a, dkv_b = dkv[0], dkv[1] + dk_a, dv_a = dkv_a[0], dkv_a[1] + dk_b, dv_b = dkv_b[0], dkv_b[1] + + dq_a.add_(cur_dq_a) + dk_a.add_(cur_dk_a) + dv_a.add_(cur_dv_a) + dk_b.add_(cur_dk_b) + dv_b.add_(cur_dv_b) + + dk_a.add_(prev_dk_a) + dv_a.add_(prev_dv_a) + dq_b.add_(cur_dq_b) + dq_b.add_(prev_dq_b) + + dq = torch.cat((dq_a.unsqueeze(0), dq_b.unsqueeze(0)), dim=0) + dk = torch.cat((dk_a.unsqueeze(0), dk_b.unsqueeze(0)), dim=0) + dv = torch.cat((dv_a.unsqueeze(0), dv_b.unsqueeze(0)), dim=0) + dq, dk, dv = [x.view(-1, *x.shape[2:]) for x in [dq, dk, dv]] + + return dq, dk, dv diff --git a/model/train/yoco_moe/mindspeed/core/context_parallel/ulysses_context_parallel.py b/model/train/yoco_moe/mindspeed/core/context_parallel/ulysses_context_parallel.py new file mode 100644 index 000000000..9c3266ea0 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/context_parallel/ulysses_context_parallel.py @@ -0,0 +1,745 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 +from abc import ABC, abstractmethod +from typing import Any, Tuple, Optional +import torch +import torch_npu +from torch import Tensor +from torch.nn import Module +from megatron.training import get_args +import mindspeed +from mindspeed.core.tensor_parallel.comm_group_api import CollectiveCommIntf +from mindspeed.core.tensor_parallel.comm_utils import sync_gather_along_first_dim, sync_reduce_scatter_along_first_dim +from mindspeed.core.context_parallel.unaligned_cp.mapping import all_to_all, cal_split_sizes + + +try: + from einops import rearrange +except ImportError: + rearrange = None + + +class GatherSizeCalculator(ABC): + """Abstract base class defining an interface for calculating the gather size in distributed operations. + + The gather size usually refers to the size of the output tensor in the `gather_idx` dimension after all-to-all + communication (in the Ulysses mechanism). + """ + + @abstractmethod + def calculate(self) -> Optional[int]: + """Calculates the gather size based on current context such as batch size or sequence length. + + Returns: + Optional[int]: The calculated gather size if applicable, otherwise None. + """ + pass + + +class DefaultGatherSizeCalculator(GatherSizeCalculator): + """Default implementation where the gather size is always None. If gather_size is None, it + will be calculated as the product of the original size of the `gather_idx` of the input tensor and the + `world_size`.""" + def calculate(self, *args, **kwargs) -> Optional[int]: + return None + + +class DynamicGatherSizeCalculator(GatherSizeCalculator): + """Dynamic implementation that calculates gather size based on the current batch attention mask sequence length.""" + + def calculate(self, *args: Any, **kwargs: Any) -> Optional[int]: + """Calculates the gather size based on the attention mask sequence length. + """ + # Check if the first argument is a tensor; general masks (which type is list) do not support dynamic gather size + if not isinstance(args[0], torch.Tensor): + return None + + atten_mask_seq_len = args[0].shape[-1] + return atten_mask_seq_len + + +class UlyssesCollectiveComm(CollectiveCommIntf): + group = None + + def __init__(self, group, name="ulysses"): + super().__init__(name) + UlyssesCollectiveComm.group = group + + @classmethod + def get_comm_rank(cls): + return torch.distributed.get_rank(group=cls.group) + + @classmethod + def get_comm_group_world_size(cls): + return torch.distributed.get_world_size(group=cls.group) + + @classmethod + def get_comm_group(cls): + return cls.group + + +def single_all_to_all(input_, scatter_idx, gather_idx, group): + seq_world_size = torch.distributed.get_world_size(group) + inp_shape = list(input_.shape) + inp_shape[scatter_idx] = inp_shape[scatter_idx] // seq_world_size + if scatter_idx < 2: + input_t = input_.reshape( + [seq_world_size, inp_shape[scatter_idx]] + \ + inp_shape[scatter_idx + 1:] + ).contiguous() + else: + # transpose groups of heads with the seq-len parallel dimension, so that we can scatter them! + input_t = input_.reshape( + [-1, seq_world_size, inp_shape[scatter_idx]] + \ + inp_shape[scatter_idx + 1:] + ).transpose(0, 1).contiguous() + + output = torch.empty_like(input_t) + torch.distributed.all_to_all_single(output, input_t, group=group) + + # if scattering the seq-dim, transpose the heads back to the original dimension + # e.g., [cp, s/cp, b, n/cp, d] -> [s/cp, b, cp, n/cp, d] + if scatter_idx < 2: + output = output.transpose(0, 1).transpose(1, 2).contiguous() + + return output.reshape( + inp_shape[: gather_idx] + [inp_shape[gather_idx] * seq_world_size, ] + inp_shape[gather_idx + 1:]).contiguous() + + +class _SeqAllToAll(torch.autograd.Function): + + @staticmethod + def forward(ctx: Any, group: torch.distributed.ProcessGroup, input_: Tensor, scatter_idx: int, + gather_idx: int) -> Tensor: + ctx.group = group + ctx.scatter_idx = scatter_idx + ctx.gather_idx = gather_idx + + return single_all_to_all(input_, scatter_idx, gather_idx, group) + + @staticmethod + def backward(ctx: Any, *grad_output: Tensor) -> Tuple[None, Tensor, None, None]: + return (None, _SeqAllToAll.apply(ctx.group, *grad_output, ctx.gather_idx, ctx.scatter_idx), None, None) + + +class UlyssesContextAttention(torch.nn.Module): + """Implementation of Ulysses Context Attention mechanism. + """ + + def __init__( + self, + local_attention: Module, + sequence_process_group: torch.distributed.ProcessGroup, + scatter_idx: int = 2, + gather_idx: int = 0, + gather_size_calculator: GatherSizeCalculator = DefaultGatherSizeCalculator(), # Injected dependency + ) -> None: + """Initialization + + Args: + local_attention (Module): An instance of a local attention mechanism + sequence_process_group (ProcessGroup): A PyTorch ProcessGroup object representing the process group for context parallelism. + scatter_idx (int): Index specifying along which dimension the data should be scattered during all-to-all communication. + gather_idx (int): Index specifying along which dimension the data should be gathered during all-to-all communication. + gather_size_calculator (GatherSizeCalculator): A callable object responsible for calculating the gather_size, + which is the total size of the all-to-all output tensor along the `gather_idx`. + Defaults to DefaultGatherSizeCalculator(). + """ + super(UlyssesContextAttention, self).__init__() + self.local_attn = local_attention + self.local_attn.ulysses_comm_para = { + 'spg': sequence_process_group, + 'scatter_idx': scatter_idx, + 'gather_idx': gather_idx, + 'gather_size_calculator': gather_size_calculator + } + + def forward(self, query: Tensor, key: Tensor, value: Tensor, *args: Any, **kwargs: Any) -> Tensor: + """ forward + + Arguments: + query (Tensor): query input to the layer + key (Tensor): key input to the layer + value (Tensor): value input to the layer + args: other args + + Returns: + * output (Tensor): context output + """ + global_args = get_args() + use_custom_ulysses_backward = ( + global_args.context_parallel_size > 1 and + global_args.context_parallel_algo == "ulysses_cp_algo" and + not global_args.use_legacy_models and + global_args.context_parallel_kv_cache_policy + ) + if use_custom_ulysses_backward: + output = self.local_attn(query, key, value, *args, **kwargs) + else: + spg = self.local_attn.ulysses_comm_para.get('spg') + scatter_idx = self.local_attn.ulysses_comm_para.get('scatter_idx') + gather_idx = self.local_attn.ulysses_comm_para.get('gather_idx') + seq_world_size = torch.distributed.get_world_size(spg) + + # Handle cases where the sequence length of keys/values needs to be adjusted to match queries. + if seq_world_size > key.shape[scatter_idx] and query.shape[scatter_idx] % key.shape[scatter_idx] == 0: + key = key.repeat_interleave(query.shape[scatter_idx] // key.shape[scatter_idx], dim=scatter_idx) + value = value.repeat_interleave(query.shape[scatter_idx] // value.shape[scatter_idx], dim=scatter_idx) + + # Calculate the gather size using the injected gather size calculator + gather_size = self.local_attn.ulysses_comm_para.get('gather_size_calculator').calculate(*args, **kwargs) + + # The gather size usually refers to the size of the output tensor in the `gather_idx` dimension after + # the all-to-all communication + # in shape : e.g., [s/p:h:] + query_layer = all_to_all(query, spg, scatter_idx, gather_idx, gather_size) + key_layer = all_to_all(key, spg, scatter_idx, gather_idx, gather_size) + value_layer = all_to_all(value, spg, scatter_idx, gather_idx, gather_size) + + # out shape : e.g., [s:h/p:] + context_layer = self.local_attn(query_layer, key_layer, value_layer, *args, **kwargs) + + # Reshape the context layer if necessary to align dimensions properly + if gather_size: + context_shape = context_layer.shape + scatter_sizes_query = cal_split_sizes(query.shape[scatter_idx], seq_world_size) + + # To reshape the context_layer tensor to ensure context_layer.size(gather_idx) and context_layer.size(scatter_idx) + # has the correct value. + context_layer = context_layer.reshape(context_shape[0], context_shape[1], + scatter_sizes_query[torch.distributed.get_rank(spg)], -1).contiguous() + + output = all_to_all(context_layer, spg, gather_idx, scatter_idx, query.size(scatter_idx)) + + # Final reshape to maintain correct dimensions after all-to-all communication + if gather_size: + output = output.reshape(output.shape[0], output.shape[1], -1).contiguous() + + # out e.g., [s/p::h] + return output + + +class AttnQKVReshape: + """Ulysses Attention Reshape QKV Implementation""" + + def __init__(self, attn_para): + self.attn_para = attn_para + + def reshape_forward(self, query, key, value): + """ + Implements of qkv reshape in forward of ulysses attention + + Args: + query (Tensor): query input to the attention layer with shape [s, b, h, d] + key (Tensor): key input to the attention layer with shape [s, b, h, d] + value (Tensor): value input to the attention layer with shape [s, b, h, d] + + Returns: + query (Tensor): query input to the attention layer with shape [s, b, h*d] or [s*b, h, d] + key (Tensor): key input to the attention layer with shape [s, b, h*d] or [s*b, h, d] + value (Tensor): value input to the attention layer with shape [s, b, h*d] or [s*b, h, d] + attn_para (Dict): the parameters used in attention computation + """ + # q, k, v: [s, b, h, d] + + # attention parameters + packed_seq_params = self.attn_para.get('packed_seq_params') + seq_length, bsz, n_head, head_dim = query.shape[0], query.shape[1], query.shape[2], query.shape[3] + + self.attn_para['n_head'] = n_head + self.attn_para['q_seq_len'] = seq_length + self.attn_para['k_head'] = key.shape[2] + self.attn_para['v_head'] = value.shape[2] + self.attn_para['k_seq_len'] = key.shape[0] + self.attn_para['v_seq_len'] = value.shape[0] + + # reshape [s, b, h, d] to SBH([s, b, h*d]) or TND([s*b, h, d]) + if packed_seq_params is not None: # TND + actual_seq_qlen = packed_seq_params.cu_seqlens_q.tolist() + actual_seq_kvlen = packed_seq_params.cu_seqlens_kv.tolist() + query, key, value = [rearrange(x, 's b h d -> (b s) h d') for x in [query, key, value]] + shape_order = 'TND' + else: # SBH + actual_seq_qlen = None + actual_seq_kvlen = None + query, key, value = [rearrange(x, 's b h d -> s b (h d)') for x in [query, key, value]] + shape_order = 'SBH' + + self.attn_para['shape_order'] = shape_order + self.attn_para['actual_seq_qlen'] = actual_seq_qlen + self.attn_para['actual_seq_kvlen'] = actual_seq_kvlen + + return query, key, value, self.attn_para + + def reshape_backward(self, dq, dk, dv): + """ + Implements of qkv reshape in backward of ulysses attention + + Args: + dq (Tensor): query grad output of the attention layer with shape [s, b, h*d] or [s*b, h, d] + dk (Tensor): key grad output of the attention layer with shape [s, b, h*d] or [s*b, h, d] + dv (Tensor): value grad output of the attention layer with shape [s, b, h*d] or [s*b, h, d] + + Returns: + dq (Tensor): query grad output of the attention layer with shape [s, b, h, d] + dk (Tensor): key grad output of the attention layer with shape [s, b, h, d] + dv (Tensor): value grad output of the attention layer with shape [s, b, h, d] + """ + # dq, dk, dv: [s, b, h*d] or [s*b, h, d] + + # attention parameters + packed_seq_params = self.attn_para.get('packed_seq_params') + q_seq_len = self.attn_para.get('q_seq_len') + k_seq_len = self.attn_para.get('k_seq_len') + v_seq_len = self.attn_para.get('v_seq_len') + n_head = self.attn_para.get('n_head') + k_head = self.attn_para.get('k_head') + v_head = self.attn_para.get('v_head') + + # reshape SBH([s, b, h*d]) or TND([s*b, h, d]) back to [s, b, h, d] + if packed_seq_params is not None: # TND + s, b = q_seq_len, dq.shape[0] // q_seq_len + dq = rearrange(dq, '(b s) h d -> s b h d', s=s, b=b) + s, b = k_seq_len, dk.shape[0] // k_seq_len + dk = rearrange(dk, '(b s) h d -> s b h d', s=s, b=b) + s, b = v_seq_len, dv.shape[0] // v_seq_len + dv = rearrange(dv, '(b s) h d -> s b h d', s=s, b=b) + else: # SBH + h, d = n_head, dq.shape[2] // n_head + dq = rearrange(dq, 's b (h d) -> s b h d', h=h, d=d) + h, d = k_head, dk.shape[2] // k_head + dk = rearrange(dk, 's b (h d) -> s b h d', h=h, d=d) + h, d = v_head, dv.shape[2] // v_head + dv = rearrange(dv, 's b (h d) -> s b h d', h=h, d=d) + + return dq, dk, dv + + +class RepeatAll2AllComm: + """Ulysses Attention Repeat All2All Communication Implementation""" + + def __init__(self, ulysses_comm_para, attn_para): + self.ulysses_comm_para = ulysses_comm_para + self.attn_para = attn_para + self.qkv_reshape = AttnQKVReshape(attn_para) + + def comm_forward(self, query, key, value): + """ + Implements of Repeat-All2All communication in forward of ulysses attention + + Args: + query (Tensor): query input to the attention layer with shape [s, b, h, d] + key (Tensor): key input to the attention layer with shape [s, b, h, d] + value (Tensor): value input to the attention layer with shape [s, b, h, d] + + Returns: + query (Tensor): query input to the attention layer with shape [s, b, h*d] or [s*b, h, d] + key (Tensor): key input to the attention layer with shape [s, b, h*d] or [s*b, h, d] + value (Tensor): value input to the attention layer with shape [s, b, h*d] or [s*b, h, d] + attn_para (Dict): the parameters used in attention computation + """ + # q, k, v: [s, b, h, d] + + # communication parameters + spg = self.ulysses_comm_para.get('spg') + scatter_idx = self.ulysses_comm_para.get('scatter_idx') + gather_idx = self.ulysses_comm_para.get('gather_idx') + cache_policy = self.ulysses_comm_para.get('cache_policy') + + # repeat parameters + seq_world_size = torch.distributed.get_world_size(spg) + do_repeat = seq_world_size > key.shape[scatter_idx] and query.shape[scatter_idx] % key.shape[scatter_idx] == 0 + self.ulysses_comm_para['do_repeat'] = do_repeat + self.ulysses_comm_para['repeat_num'] = query.shape[scatter_idx] // key.shape[scatter_idx] + + # if forward repeat, [s, b, h, d] -> [s, b, h*cp, d] + if do_repeat: + key = key.repeat_interleave(query.shape[scatter_idx] // key.shape[scatter_idx], dim=scatter_idx) + value = value.repeat_interleave(query.shape[scatter_idx] // value.shape[scatter_idx], dim=scatter_idx) + elif cache_policy is not None: + raise AssertionError( + 'KV Cache dose not suggest to use when key and value do not repeat' + ) + + # all2all communication forward, [s, b, h, d] -> [s*cp, b, h//cp, d] + query = single_all_to_all(query, scatter_idx, gather_idx, spg) + key = single_all_to_all(key, scatter_idx, gather_idx, spg) + value = single_all_to_all(value, scatter_idx, gather_idx, spg) + + # reshape [s, b, h, d] to SBH([s, b, h*d]) or TND([s*b, h, d]) + query, key, value, self.attn_para = self.qkv_reshape.reshape_forward(query, key, value) + + return query, key, value, self.attn_para + + def comm_backward(self, dq, dk, dv): + """ + Implements of Repeat-All2All communication in backward of ulysses attention + + Args: + dq (Tensor): query grad output of the attention layer with shape [s, b, h*d] or [s*b, h, d] + dk (Tensor): key grad output of the attention layer with shape [s, b, h*d] or [s*b, h, d] + dv (Tensor): value grad output of the attention layer with shape [s, b, h*d] or [s*b, h, d] + + Returns: + dq (Tensor): query grad output of the attention layer with shape [s, b, h, d] + dk (Tensor): key grad output of the attention layer with shape [s, b, h, d] + dv (Tensor): value grad output of the attention layer with shape [s, b, h, d] + """ + # dq, dk, dv: SBH([s, b, h*d]) or TND([s*b, h, d]) + + # reshape SBH([s, b, h*d]) or TND([s*b, h, d]) back to [s, b, h, d] + dq, dk, dv = self.qkv_reshape.reshape_backward(dq, dk, dv) + + # communication parameters + spg = self.ulysses_comm_para.get('spg') + scatter_idx = self.ulysses_comm_para.get('scatter_idx') + gather_idx = self.ulysses_comm_para.get('gather_idx') + do_repeat = self.ulysses_comm_para.get('do_repeat') + repeat_num = self.ulysses_comm_para.get('repeat_num') + + # all2all communication backward, [s, b, h, d] -> [s//cp, b, h*cp, d] + dq = single_all_to_all(dq, gather_idx, scatter_idx, spg) + dk = single_all_to_all(dk, gather_idx, scatter_idx, spg) + dv = single_all_to_all(dv, gather_idx, scatter_idx, spg) + + # if backward repeat, [s, b, h, d] -> [s, b, h//cp, d] + if do_repeat: + dk = dk.view( + *dk.shape[:scatter_idx], dk.shape[scatter_idx] // repeat_num, repeat_num, *dk.shape[scatter_idx + 1:] + ).sum(dim=scatter_idx + 1) + dv = dv.view( + *dv.shape[:scatter_idx], dv.shape[scatter_idx] // repeat_num, repeat_num, *dv.shape[scatter_idx + 1:] + ).sum(dim=scatter_idx + 1) + + return dq, dk, dv + + def recomm_backward(self, input_tensor): + """ + Implements of Repeat-All2All re-communication in backward of ulysses attention + + Args: + input_tensor (Tensor): key or value input of the attention layer with shape [s, b, h, d] + + Returns: + output (Tensor): key or value input of the attention layer with shape [s, b, h*d] or [s*b, h, d] + """ + # k, v: [s, b, h, d] + + # communication parameters + spg = self.ulysses_comm_para.get('spg') + scatter_idx = self.ulysses_comm_para.get('scatter_idx') + gather_idx = self.ulysses_comm_para.get('gather_idx') + do_repeat = self.ulysses_comm_para.get('do_repeat') + repeat_num = self.ulysses_comm_para.get('repeat_num') + + # attention parameters + packed_seq_params = self.attn_para.get('packed_seq_params') + + # if repeat, [s, b, h, d] -> [s, b, h*cp, d] + if do_repeat: + input_tensor = input_tensor.repeat_interleave(repeat_num, dim=scatter_idx) + + # all2all re-communication, [s, b, h, d] -> [s*cp, b, h//cp, d] + output = single_all_to_all(input_tensor, scatter_idx, gather_idx, spg) + + # reshape [s, b, h, d] to SBH([s, b, h*d]) or TND([s*b, h, d]) + if packed_seq_params is not None: + output = rearrange(output, 's b h d -> (b s) h d') + else: # SBH + output = rearrange(output, 's b h d -> s b (h d)') + + return output + + +class AllGatherComm: + """Ulysses Attention AllGather KV + All2All Q Communication Implementation""" + + def __init__(self, ulysses_comm_para, attn_para): + self.ulysses_comm_para = ulysses_comm_para + self.attn_para = attn_para + self.qkv_reshape = AttnQKVReshape(attn_para) + spg = self.ulysses_comm_para.get('spg') + self.ulysses_collective_comm = UlyssesCollectiveComm(spg) + + def comm_forward(self, query, key, value): + """ + Implements of AllGather KV + All2All Q communication in forward of ulysses attention + + Args: + query (Tensor): query input to the attention layer with shape [s, b, h, d] + key (Tensor): key input to the attention layer with shape [s, b, h, d] + value (Tensor): value input to the attention layer with shape [s, b, h, d] + + Returns: + query (Tensor): query input to the attention layer with shape [s, b, h*d] or [s*b, h, d] + key (Tensor): key input to the attention layer with shape [s, b, h*d] or [s*b, h, d] + value (Tensor): value input to the attention layer with shape [s, b, h*d] or [s*b, h, d] + attn_para (Dict): the parameters used in attention computation + """ + # q, k, v: [s, b, h, d] + + # communication parameters + spg = self.ulysses_comm_para.get('spg') + scatter_idx = self.ulysses_comm_para.get('scatter_idx') + gather_idx = self.ulysses_comm_para.get('gather_idx') + + # query all2all communication forward, [s, b, h, d] -> [s*cp, b, h//cp, d] + query = single_all_to_all(query, scatter_idx, gather_idx, spg) + + # key and value allgather communication forward, [s, b, h, d] -> [s*cp, b, h, d] + key = sync_gather_along_first_dim(key, self.ulysses_collective_comm) + value = sync_gather_along_first_dim(value, self.ulysses_collective_comm) + + # reshape [s, b, h, d] to SBH([s, b, h*d]) or TND([s*b, h, d]) + query, key, value, self.attn_para = self.qkv_reshape.reshape_forward(query, key, value) + + return query, key, value, self.attn_para + + def comm_backward(self, dq, dk, dv): + """ + Implements of AllGather KV + All2All Q communication in backward of ulysses attention + + Args: + dq (Tensor): query grad output of the attention layer with shape [s, b, h*d] or [s*b, h, d] + dk (Tensor): key grad output of the attention layer with shape [s, b, h*d] or [s*b, h, d] + dv (Tensor): value grad output of the attention layer with shape [s, b, h*d] or [s*b, h, d] + + Returns: + dq (Tensor): query grad output of the attention layer with shape [s, b, h, d] + dk (Tensor): key grad output of the attention layer with shape [s, b, h, d] + dv (Tensor): value grad output of the attention layer with shape [s, b, h, d] + """ + # dq, dk, dv: SBH([s, b, h*d]) or TND([s*b, h, d]) + + # reshape SBH([s, b, h*d]) or TND([s*b, h, d]) back to [s, b, h, d] + dq, dk, dv = self.qkv_reshape.reshape_backward(dq, dk, dv) + + # communication parameters + spg = self.ulysses_comm_para.get('spg') + scatter_idx = self.ulysses_comm_para.get('scatter_idx') + gather_idx = self.ulysses_comm_para.get('gather_idx') + + # query all2all communication backward, [s, b, h, d] -> [s//cp, b, h*cp, d] + dq = single_all_to_all(dq, gather_idx, scatter_idx, spg) + + # key and value allgather communication backward, [s, b, h, d] -> [s//cp, b, h, d] + dk = sync_reduce_scatter_along_first_dim(dk, self.ulysses_collective_comm) + dv = sync_reduce_scatter_along_first_dim(dv, self.ulysses_collective_comm) + + return dq, dk, dv + + def recomm_backward(self, input_tensor): + """ + Implements of AllGather KV + All2All Q re-communication in backward of ulysses attention + + Args: + input_tensor (Tensor): key or value input of the attention layer with shape [s, b, h, d] + + Returns: + output (Tensor): key or value input of the attention layer with shape [s, b, h*d] or [s*b, h, d] + """ + # k, v: [s, b, h, d] + + # attention parameters + packed_seq_params = self.attn_para.get('packed_seq_params') + + # allgather re-communication, [s, b, h, d] -> [s*cp, b, h, d] + output = sync_gather_along_first_dim(input_tensor, self.ulysses_collective_comm) + + # reshape [s, b, h, d] to SBH([s, b, h*d]) or TND([s*b, h, d]) + if packed_seq_params is not None: # TND + output = rearrange(output, 's b h d -> (b s) h d') + else: # SBH + output = rearrange(output, 's b h d -> s b (h d)') + + return output + + +class UlyssesAttnWithKVCache(torch.autograd.Function): + """Ulysses Attention With KV Cache Implementation""" + + @staticmethod + def forward(ctx, query, key, value, attn_para, ulysses_comm_para) -> Tensor: + """ + Implements of Ulysses Attention With KV Cache forward + + Args: + query (Tensor): query input to the attention layer with shape [s, b, h, d] + key (Tensor): key input to the attention layer with shape [s, b, h, d] + value (Tensor): value input to the attention layer with shape [s, b, h, d] + + Returns: + output (Tensor): ulysses attention output with shape [s, b, h*d] or [s*b, h, d] + """ + # q, k, v: [s, b, h, d] + + # communication parameters + spg = ulysses_comm_para.get('spg') + scatter_idx = ulysses_comm_para.get('scatter_idx') + gather_idx = ulysses_comm_para.get('gather_idx') + cache_policy = ulysses_comm_para.get('cache_policy') + use_ulysses_allgather_kv = ulysses_comm_para.get('use_ulysses_allgather_kv') + + # repeat-all2all or allgather kv + all2all q + if use_ulysses_allgather_kv: + if key.shape[2] != 1: + raise AssertionError( + 'When either the head number of key or value is not equal to 1, ' + 'use all2all communication to get better performance.' + ) + # allgather kv + all2all q communication forward + ulysses_comm = AllGatherComm(ulysses_comm_para, attn_para) + else: + # repeat-all2all communication forward + ulysses_comm = RepeatAll2AllComm(ulysses_comm_para, attn_para) + + # communication forward + q, k, v = query.clone(), key.clone(), value.clone() + q, k, v, attn_para = ulysses_comm.comm_forward(q, k, v) + + # attention parameters + packed_seq_params = attn_para.get('packed_seq_params') + attention_mask = attn_para.get('attention_mask') + scale = attn_para.get('scale') + pre_tokens = attn_para.get('pre_tokens') + next_tokens = attn_para.get('next_tokens') + keep_prob = attn_para.get('keep_prob') + sparse_mode = attn_para.get('sparse_mode') + n_head = attn_para.get('n_head') + shape_order = attn_para.get('shape_order') + actual_seq_len = attn_para.get('actual_seq_qlen') + actual_seq_kvlen = attn_para.get('actual_seq_kvlen') + seq_length = attn_para.get('q_seq_len') + + # kv cache + if cache_policy == "full": + k_cache, v_cache = key.clone(), value.clone() + elif cache_policy == "half": + k_cache, v_cache = key.clone(), v.clone() + else: + k_cache, v_cache = k.clone(), v.clone() + + # attention forward + res = torch_npu.npu_fusion_attention( + q, k, v, n_head, shape_order, + pse=None, + padding_mask=None, + atten_mask=attention_mask, + scale=scale, + pre_tockens=pre_tokens, + next_tockens=next_tokens, + keep_prob=keep_prob, + inner_precise=0, + sparse_mode=sparse_mode, + actual_seq_qlen=actual_seq_len, + actual_seq_kvlen=actual_seq_kvlen + ) + + attn_out, softmax_max, softmax_sum = res[0], res[1], res[2] + + # if TND, reshape TND([b*s, h, d]) to SBH([s, b, h*d]) + if packed_seq_params is not None: + s, b = seq_length, attn_out.shape[0] // seq_length + attn_out = rearrange(attn_out, '(b s) h d -> s b (h d)', s=s, b=b) + + # output all2all communication forward + output = single_all_to_all(attn_out, gather_idx, scatter_idx, spg) + + ctx.save_for_backward(q, k_cache, v_cache, attn_out, softmax_max, softmax_sum, attention_mask) + ctx.ulysses_comm = ulysses_comm + ctx.ulysses_comm_para = ulysses_comm_para + ctx.attn_para = attn_para + + return output + + @staticmethod + def backward(ctx, dout): + """ + Implements of Ulysses Attention With KV Cache backward + + Args: + dout (Tensor): the attention layer output grad with shape [s, b, h*d] or [s*b, h, d] + + Returns: + dq (Tensor): query grad output of the attention layer with shape [s, b, h, d] + dk (Tensor): key grad output of the attention layer with shape [s, b, h, d] + dv (Tensor): value grad output of the attention layer with shape [s, b, h, d] + """ + # input, attention output grad: [s, b, h*d] or [s*b, h, d] + + # get forward parameters + query, k_cache, v_cache, attn_out, softmax_max, softmax_sum, attention_mask = ctx.saved_tensors + ulysses_comm = ctx.ulysses_comm + ulysses_comm_para = ctx.ulysses_comm_para + attn_para = ctx.attn_para + + # communication parameters + spg = ulysses_comm_para.get('spg') + scatter_idx = ulysses_comm_para.get('scatter_idx') + gather_idx = ulysses_comm_para.get('gather_idx') + cache_policy = ulysses_comm_para.get('cache_policy') + + # attention parameters + packed_seq_params = attn_para.get('packed_seq_params') + attention_mask = attn_para.get('attention_mask') + scale = attn_para.get('scale') + pre_tokens = attn_para.get('pre_tokens') + next_tokens = attn_para.get('next_tokens') + keep_prob = attn_para.get('keep_prob') + sparse_mode = attn_para.get('sparse_mode') + n_head = attn_para.get('n_head') + shape_order = attn_para.get('shape_order') + actual_seq_len = attn_para.get('actual_seq_qlen') + actual_seq_kvlen = attn_para.get('actual_seq_kvlen') + + # output all2all communication backward + dout = single_all_to_all(dout, scatter_idx, gather_idx, spg) + + # if TND, reshape SBH([s, b, h*d]) to TND([b*s, h, d]) + if packed_seq_params is not None: + h, d = n_head, dout.shape[2] // n_head + dout = rearrange(dout, 's b (h d) -> (b s) h d', h=h, d=d) + attn_out = rearrange(attn_out, 's b (h d) -> (b s) h d', h=h, d=d) + + # kv cache re-communication + if cache_policy == "full": + key = ulysses_comm.recomm_backward(k_cache) + value = ulysses_comm.recomm_backward(v_cache) + elif cache_policy == "half": + key = ulysses_comm.recomm_backward(k_cache) + value = v_cache + else: + key = k_cache + value = v_cache + + # attention backward + attn_grad_outs = torch_npu.npu_fusion_attention_grad( + query, key, value, dout, n_head, + shape_order, + pse=None, + padding_mask=None, + atten_mask=attention_mask, + softmax_max=softmax_max, + softmax_sum=softmax_sum, + attention_in=attn_out, + scale_value=scale, + pre_tockens=pre_tokens, + next_tockens=next_tokens, + sparse_mode=sparse_mode, + keep_prob=keep_prob, + actual_seq_qlen=actual_seq_len, + actual_seq_kvlen=actual_seq_kvlen + ) + + dq, dk, dv = attn_grad_outs[0], attn_grad_outs[1], attn_grad_outs[2] + + dq, dk, dv = ulysses_comm.comm_backward(dq, dk, dv) + + return dq, dk, dv, None, None + + +def ulyssesattn_context_parallel(query, key, value, attn_para, ulysses_comm_para): + out = UlyssesAttnWithKVCache.apply(query, key, value, attn_para, ulysses_comm_para) + return out diff --git a/model/train/yoco_moe/mindspeed/core/context_parallel/unaligned_cp/__init__.py b/model/train/yoco_moe/mindspeed/core/context_parallel/unaligned_cp/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/context_parallel/unaligned_cp/mapping.py b/model/train/yoco_moe/mindspeed/core/context_parallel/unaligned_cp/mapping.py new file mode 100644 index 000000000..44f097e58 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/context_parallel/unaligned_cp/mapping.py @@ -0,0 +1,623 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +from typing import Optional, List + +import torch +import torch.distributed as dist + + +PERMUTE_DIMS1 = { + 4: (1, 2, 3, 0), + 5: (1, 2, 3, 0, 4), +} + + +PERMUTE_DIMS2 = { + 4: (1, 2, 0, 3), + 5: (1, 2, 0, 3, 4), +} + + +def adjust_tensor_dimensions(tensor, scatter_idx, gather_idx): + """ + Adjusts the dimensions of a tensor to move scatter_idx and gather_idx to dim 0 and dim 1 respectively. + + Args: + tensor (torch.Tensor): The input tensor. + scatter_idx (int): The index of the dimension to scatter. + gather_idx (int): The index of the dimension to gather. + + Returns: + tuple: A tuple containing the adjusted tensor and the list of adjusted dimensions. + """ + dims = list(range(tensor.dim())) + assert scatter_idx != gather_idx + if gather_idx == 0: + if scatter_idx != 1: + dims[1], dims[gather_idx] = dims[gather_idx], dims[1] + dims[0], dims[scatter_idx] = dims[scatter_idx], dims[0] + else: + dims[scatter_idx], dims[gather_idx] = dims[gather_idx], dims[scatter_idx] + + elif gather_idx == 1: + if scatter_idx != 0: + # If scatter_idx is not 0, move it to 0 + dims[0], dims[scatter_idx] = dims[scatter_idx], dims[0] + else: + if scatter_idx == 0: + dims[1], dims[gather_idx] = dims[gather_idx], dims[1] + else: + dims[0], dims[scatter_idx] = dims[scatter_idx], dims[0] + dims[1], dims[gather_idx] = dims[gather_idx], dims[1] + return tensor.permute(dims).contiguous(), dims + + +def unadjust_tensor_dimensions(tensor, adjusted_dims): + """ + Reverses the dimension adjustments using the list of adjusted dimensions. + + Args: + tensor (torch.Tensor): The tensor whose dimensions need to be restored. + adjusted_dims (list): The list of adjusted dimensions used during the adjustment process. + + Returns: + torch.Tensor: The tensor with its dimensions reverted to the original order. + """ + inverse_dims = [0] * len(adjusted_dims) + + for new_pos, old_pos in enumerate(adjusted_dims): + inverse_dims[old_pos] = new_pos + + # Restore the dimension order + unadjusted_tensor = tensor.permute(inverse_dims).contiguous() + return unadjusted_tensor + + +def _all_to_all( + input_: torch.Tensor, + group: dist.ProcessGroup, + scatter_dim: int, + gather_dim: int, + gather_size: Optional[int] = None +): + """ + Helper function to perform the all-to-all operation. It scatters the input tensor along the specified scatter + dimension and then gathers it along the specified gather dimension. The function supports aligned and unaligned + data. + Args: + input_ (torch.Tensor): The input tensor to be processed. + group (dist.ProcessGroup): The process group perform the operation within. + scatter_dim (int): The index of the dimension that needs to be scattered. + gather_dim (int): The index of the dimension that needs to be gathered. + gather_size (Optional[int]): The total size of the output tensor along the `gather_dim`. If not provided, it + will be calculated as the product of the original size of the `gather_dim` of the input tensor and the + `world_size`. + + Returns: + torch.Tensor: The resulting tensor after performing the all-to-all operation. + + Note: + - The tensor will be split into `world_size` chunks along the `scatter_dim`. Each process will receive one + chunk. If the total size of the `scatter_dim` is not divisible by `world_size`, the extra elements will be + distributed to the first few processes, ensuring that no process receives more than one additional element + compared to the others. + - The tensor will be gathered along the `gather_dim`, with each process contributing its part to form the + final output tensor. The gathering process also supports unaligned data, where the remainder elements + are distributed to the first few processes. + """ + assert 3 <= input_.dim() <= 4 + world_size = dist.get_world_size(group) + if world_size == 1: + return input_ + + scatter_size = input_.size(scatter_dim) + if gather_size is None: + gather_size = input_.size(gather_dim) * world_size + gather_mod = gather_size % world_size + scatter_mod = scatter_size % world_size + + if gather_mod == 0 and scatter_mod == 0: + # In the case of aligned data (both scatter_size and gather_size are divisible by world_size), + # _aligned_all_to_all function performs better than _partial_unaligned_all_to_all function + return _aligned_all_to_all(input_, group, scatter_dim, gather_dim) + elif gather_mod != 0 and scatter_mod != 0: + return _full_unaligned_all_to_all(input_, group, scatter_dim, gather_dim, gather_size) + else: + return _partial_unaligned_all_to_all(input_, group, scatter_dim, gather_dim, gather_size) + + +def _full_unaligned_all_to_all( + input_: torch.Tensor, + group: dist.ProcessGroup, + scatter_dim: int, + gather_dim: int, + gather_size: Optional[int] = None +): + """ + Helper function to perform the all-to-all operation. It scatters the input tensor along the specified scatter + dimension and then gathers it along the specified gather dimension. This function supports unaligned scatter + and gather sizes. + + Args: + input_ (torch.Tensor): The input tensor to be processed. + world_size (int): The number of processes in the process group. + group (dist.ProcessGroup): The process group to perform the operation within. + scatter_dim (int): The index of the dimension that needs to be scattered. + gather_dim (int): The index of the dimension that needs to be gathered. + gather_size (Optional[int]): The total size of the output tensor along the `gather_dim`. If not provided, it + will be calculated as the product of the original size of the `gather_dim` of the input tensor and the + `world_size`. + + Returns: + torch.Tensor: The resulting tensor after performing the all-to-all operation. + """ + world_size = dist.get_world_size(group) + rank = dist.get_rank(group) + + scatter_sizes = cal_split_sizes(dim_size=input_.size(scatter_dim), world_size=world_size) + input_list = [t.contiguous() for t in torch.split(input_, scatter_sizes, scatter_dim)] + + gather_sizes = cal_split_sizes(dim_size=gather_size, world_size=world_size) + output_list = [] + tensor_shape_base = input_list[rank].size() + for i in range(world_size): + tensor_shape = list(tensor_shape_base) + tensor_shape[gather_dim] = gather_sizes[i] + output_list.append(torch.empty(tensor_shape, dtype=input_.dtype, device=input_.device)) + + dist.all_to_all(output_list, input_list, group=group) + + return torch.cat(output_list, dim=gather_dim).contiguous() + + +def _aligned_all_to_all( + input_: torch.Tensor, + group: dist.ProcessGroup, + scatter_dim: int, + gather_dim: int, +): + """ + Helper function to perform the all-to-all operation. It scatters the input tensor along the specified scatter + dimension and then gathers it along the specified gather dimension. + Special note: The function only supports aligned data (both scatter_size and gather_size are divisible by + world_size) + """ + world_size = dist.get_world_size(group) + inp_shape = list(input_.shape) + inp_shape[scatter_dim] = inp_shape[scatter_dim] // world_size + if scatter_dim == 0: + input_t = input_.reshape([world_size] + inp_shape).contiguous() + else: + input_t = input_.reshape([-1, world_size] + inp_shape[scatter_dim:]).transpose(0, 1).contiguous() + + output = torch.empty_like(input_t) + + dist.all_to_all_single(output, input_t, group=group) + + output = output.view([world_size] + inp_shape).contiguous() + output_dim = output.dim() + if gather_dim == 1: + # the shape of input_t is (world_size, inp_shape[0], inp_shape[gather_dim], *inp_shape[2:]) + output = output.transpose(0, 1).contiguous() + # the shape of output is (inp_shape[0], world_size, inp_shape[gather_dim], *inp_shape[2:]) + elif gather_dim == 2: + # the shape of input_t is (world_size, inp_shape[0], inp_shape[1], *inp_shape[gather_dim:]) + output = output.permute(*PERMUTE_DIMS2[output_dim]).contiguous() + # the shape of output is (inp_shape[0], inp_shape[1], world_size, *inp_shape[gather_dim:]) + elif gather_dim == 3: + # the shape of input_t is (world_size, inp_shape[0], inp_shape[1], inp_shape[2], inp_shape[gather_dim]) + output = output.permute(*PERMUTE_DIMS1[output_dim]).contiguous() + # the shape of output is (inp_shape[0], inp_shape[1], inp_shape[2], world_size, inp_shape[gather_dim]) + # The last case: gather_dim == 0: + # the shape of input_t is (world_size, inp_shape[gather_dim], inp_shape[0], *inp_shape[1:]) + # output requires no action + # the shape of output is (world_size, inp_shape[gather_dim], inp_shape[0], *inp_shape[1:]) + output = output.view(inp_shape[:gather_dim] + [inp_shape[gather_dim] * world_size, ] + inp_shape[gather_dim + 1:] + ).contiguous() + + return output + + +def _partial_unaligned_all_to_all( + input_: torch.Tensor, + group: dist.ProcessGroup, + scatter_dim: int, + gather_dim: int, + gather_size: Optional[int] = None +): + """ + Helper function to perform the all-to-all operation. It scatters the input tensor along the specified scatter + dimension and then gathers it along the specified gather dimension. The function supports aligned and unaligned + data. + Special note: In the case of aligned data (both scatter_size and gather_size are divisible by world_size), + _partial_unaligned_all_to_all function performs worse than _aligned_all_to_all function. Therefore, in the case of + aligning data, it is recommended to use _aligned_all_to_all function. + """ + world_size = dist.get_world_size(group) + input_ = input_.contiguous() + rank = dist.get_rank(group=group) + + scatter_size = input_.size(scatter_dim) + if gather_size is None: + gather_size = input_.size(gather_dim) * world_size + assert not (gather_size % world_size != 0 and scatter_size % world_size != 0) + + scatter_size_per_rank = scatter_size // world_size + scatter_size_remainder = scatter_size % world_size + input_split_sizes = [scatter_size_per_rank + (1 if i < scatter_size_remainder else 0) for i in range(world_size)] + + gather_size_per_rank = gather_size // world_size + gather_size_remainder = gather_size % world_size + output_split_sizes = [gather_size_per_rank + (1 if i < gather_size_remainder else 0) for i in range(world_size)] + + # Adjusts the dimensions of a tensor to move scatter_idx and gather_idx to dim 0 and dim 1 respectively. + reshaped_input, reshaped_input_dims = adjust_tensor_dimensions(input_, scatter_dim, gather_dim) + reshaped_input_shape = list(reshaped_input.shape) + # the shape of reshaped_input is (input_.size(scatter_dim), input_.size(gather_dim), *reshaped_input_shape[2:]) + + if scatter_size % world_size == 0: + reshaped_input = reshaped_input.view( + [world_size, input_.size(scatter_dim) // world_size, input_.size(gather_dim)] + reshaped_input_shape[2:] + ).transpose(1, 2).contiguous() + + output_dims = reshaped_input_dims + # Relative to reshaped_input(the return value of adjust_tensor_dimensions func), + # which shape is (input_.size(scatter_dim), input_.size(gather_dim), *reshaped_input_shape[2:]), + # output just swaps the 0th and 1st axes. + output_dims[1], output_dims[0] = output_dims[0], output_dims[1] + output = torch.empty((gather_size, input_split_sizes[rank], *reshaped_input_shape[2:]), + dtype=input_.dtype, device=input_.device) + output_shape = list(output.shape) + + dist.all_to_all_single( + output, + reshaped_input, + output_split_sizes=output_split_sizes, + input_split_sizes=input_split_sizes if scatter_size % world_size != 0 else [1 for _ in range(world_size)], + group=group, + ) + + if gather_size % world_size == 0 and scatter_size % world_size != 0: + output = output.view( + [world_size, input_split_sizes[rank], gather_size // world_size] + reshaped_input_shape[2:] + ).transpose(1, 2).reshape(output_shape).contiguous() + + # Reverses the dimension adjustments using the list of adjusted dimensions. + unadjust_output_ = unadjust_tensor_dimensions(output, output_dims) + + return unadjust_output_ + + +class _AllToAll(torch.autograd.Function): + """Custom autograd function that performs an all-to-all communication. + This function supports both aligned and unaligned data. + """ + @staticmethod + def forward(ctx, input_, process_group, scatter_dim, gather_dim, gather_size=None): + """ + Forward pass: Perform all-to-all communication by scattering the input tensor along the specified scatter + dimension and then gathering it along the specified gather dimension. + + Args: + input_ (torch.Tensor): The input tensor to be processed. + process_group (dist.ProcessGroup): The process group to perform the operation within. + scatter_dim (int): The index of the dimension that needs to be scattered. + gather_dim (int): The index of the dimension that needs to be gathered. + gather_size (int): The size of the gather dimension. + + Returns: + torch.Tensor: The resulting tensor after performing the all-to-all operation. + """ + ctx.process_group = process_group + ctx.scatter_dim = scatter_dim + ctx.scatter_size = input_.size(scatter_dim) + ctx.gather_dim = gather_dim + ctx.gather_size = gather_size + output = _all_to_all( + input_, process_group, scatter_dim, gather_dim, gather_size + ) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + Backward pass: Perform the reverse all-to-all communication + + Args: + grad_output (torch.Tensor): The gradient of the output with respect to the loss. + + Returns: + tuple: The gradient of the input with respect to the loss and `None` for other arguments. + """ + grad_output = _all_to_all( + grad_output, + ctx.process_group, + ctx.gather_dim, + ctx.scatter_dim, + ctx.scatter_size + ) + return ( + grad_output, + None, + None, + None, + None, + None + ) + + +def _split( + input_: torch.Tensor, + pg: dist.ProcessGroup, + dim: int = -1, + split_sizes: Optional[List[int]] = None +) -> torch.Tensor: + """ + Splits a tensor across the specified dimension and returns the part corresponding to the current rank, + supporting aligned and unaligned data. + + Args: + input_ (torch.Tensor): The input tensor to be split. + pg (dist.ProcessGroup): The process group to perform the operation within. + dim (int, optional): The dimension along which to split the tensor. Defaults to -1 (last dimension). + split_sizes (Optional[List[int]], optional): A list of sizes for each part of the tensor to be split. + If not provided, the tensor will be split equally among the processes, with the remainder + distributed to the first few processes. Defaults to None. + + Returns: + torch.Tensor: The part of the tensor corresponding to the current rank in the process group. + """ + # Ensure split_sizes is a list if provided + assert split_sizes is None or isinstance(split_sizes, list) + + # skip if only one rank involved + world_size = dist.get_world_size(pg) + + if world_size == 1: + return input_ + + # Calculate split sizes if not provided + if split_sizes is None: + dim_size = input_.size(dim) + base_size = dim_size // world_size + remainder = dim_size % world_size + + # Calculate the size for each process + split_sizes = [base_size + 1 if i < remainder else base_size for i in range(world_size)] + + tensor_list = torch.split(input_, split_sizes, dim=dim) + + # Get the part corresponding to the current rank + rank = dist.get_rank(pg) + output = tensor_list[rank].contiguous() + + return output + + +def _gather(input_: torch.Tensor, + pg: dist.ProcessGroup, + dim: int = -1, + gather_sizes: Optional[List[int]] = None): + """ + Gathers tensors from all processes in the process group and concatenates them along the specified dimension, + supporting aligned and unaligned data. + + Args: + input_ (torch.Tensor): The input tensor to be gathered. + pg (dist.ProcessGroup): The process group to perform the operation within. + dim (int, optional): The dimension along which to concatenate the gathered tensors. Defaults to -1 (last dimension). + gather_sizes (Optional[List[int]], optional): A list of sizes for each part of the tensor to be gathered. + If not provided, it is assumed that all tensors have the same shape as the input tensor. Defaults to None. + + Returns: + torch.Tensor: The concatenated tensor after gathering from all processes in the process group. + """ + # Ensure gather_sizes is a list if provided + assert gather_sizes is None or isinstance(gather_sizes, list) + + # Skip if only one rank is involved + world_size = dist.get_world_size(pg) + if world_size == 1: + return input_ + + input_ = input_.contiguous() + + # Prepare the output list with appropriate shapes + if gather_sizes: + tensor_list = [] + tensor_shape_base = input_.size() + for i in range(world_size): + tensor_shape = list(tensor_shape_base) + tensor_shape[dim] = gather_sizes[i] + tensor_list.append(torch.empty(tensor_shape, dtype=input_.dtype, device=input_.device)) + else: + tensor_list = [torch.empty_like(input_, dtype=input_.dtype, device=input_.device) for _ in range(world_size)] + + assert input_.device.type == "cuda" or input_.device.type == "npu" + torch.distributed.all_gather(tensor_list, input_, group=pg) + + # concat + output = torch.cat(tensor_list, dim=dim).contiguous() + return output + + +class _GatherForwardSplitBackward(torch.autograd.Function): + """ + Custom autograd function that gathers the input tensor from all processes in the model parallel region and + concatenates them. + During the backward pass, it splits the gradients and scales them according to the gradient scaling mode. + + """ + + @staticmethod + def symbolic(graph, input_, process_group, dim, gather_sizes): + """ + Define the symbolic representation of the custom operation. + """ + return _gather(input_, process_group, dim, gather_sizes) + + @staticmethod + def forward(ctx, input_, process_group, dim, gather_sizes, grad_scale="up"): + """ + Forward pass: Gathers tensors from all processes in the specified process group and concatenates them along the specified dimension. + + Args: + input_ (torch.Tensor): The input tensor to be processed. + process_group (dist.ProcessGroup): The process group to perform the operation within. + dim (int): The dimension along which to concatenate the gathered tensors. + gather_sizes (Optional[List[int]], optional): A list of sizes for each part of the tensor to be gathered. + grad_scale (str, optional): Gradient scaling mode. Can be "up", "down", or None. Defaults to "up". + + Returns: + torch.Tensor: The resulting tensor after gathering and concatenating. + """ + ctx.mode = process_group + ctx.dim = dim + ctx.grad_scale = grad_scale + + ctx.gather_sizes = gather_sizes + return _gather(input_, process_group, dim, ctx.gather_sizes) + + @staticmethod + def backward(ctx, grad_output): + """ + Backward pass: Distribute the gradients to the input tensors and scales them according to the gradient scaling mode. + + Args: + grad_output (torch.Tensor): The gradient of the output. + + Returns: + torch.Tensor: The gradient of the input with respect to the loss. + """ + if ctx.grad_scale == "up": + grad_output = grad_output * dist.get_world_size(ctx.mode) + elif ctx.grad_scale == "down": + grad_output = grad_output / dist.get_world_size(ctx.mode) + + return _split(grad_output, ctx.mode, ctx.dim, ctx.gather_sizes), None, None, None, None + + +class _SplitForwardGatherBackward(torch.autograd.Function): + """ + Custom autograd function that splits the input tensor and keeps only the corresponding chunk for the current rank. + During the backward pass, it gathers the gradients and scales them according to the gradient scaling mode. + + """ + @staticmethod + def symbolic(graph, input_, process_group, dim, split_sizes): + return _split(input_, process_group, dim, split_sizes) + + @staticmethod + def forward(ctx, input_, process_group, dim, split_sizes, grad_scale): + ctx.mode = process_group + ctx.dim = dim + ctx.grad_scale = grad_scale + + ctx.split_sizes = split_sizes + + return _split(input_, process_group, dim, ctx.split_sizes) + + @staticmethod + def backward(ctx, grad_output): + if ctx.grad_scale == "up": + grad_output = grad_output * dist.get_world_size(ctx.mode) + elif ctx.grad_scale == "down": + grad_output = grad_output / dist.get_world_size(ctx.mode) + return _gather(grad_output, ctx.mode, ctx.dim, ctx.split_sizes), None, None, None, None + + +def all_to_all( + input_: torch.Tensor, + process_group: dist.ProcessGroup, + scatter_dim: int = 2, + gather_dim: int = 1, + gather_size: Optional[int] = None +): + """ + Performs an all-to-all operation on the input tensor. The input tensor is scattered along the specified scatter + dimension and then gathered along the specified gather dimension. + This function supports both aligned and unaligned data. + + Args: + input_ (torch.Tensor): The input tensor to be processed. + process_group (dist.ProcessGroup): The process group to perform the operation within. + scatter_dim (int, optional): The index of the dimension that needs to be scattered. Defaults to 2. + gather_dim (int, optional): The index of the dimension that needs to be gathered. Defaults to 1. + gather_size (Optional[int]): The total size of the output tensor along the `gather_dim`. If not provided, it + will be calculated as the product of the original size of the `gather_dim` of the input tensor and the + `world_size`. + + Returns: + torch.Tensor: The resulting tensor after performing the all-to-all operation. + """ + return _AllToAll.apply(input_, process_group, scatter_dim, gather_dim, gather_size) + + +def split_forward_gather_backward( + input_: torch.Tensor, + process_group: dist.ProcessGroup, + dim: int, + split_sizes: Optional[List[int]] = None, + grad_scale: str = "down" + +) -> torch.Tensor: + """ + Splits the input tensor and keeps only the corresponding chunk for the current rank. + During the backward pass, it gathers the gradients and scales them according to the gradient scaling mode. + This function supports both aligned and unaligned data. + Args: + input_ (torch.Tensor): The input tensor to be processed. + process_group (dist.ProcessGroup): The process group to perform the operation within. + dim (int): The dimension along which to split the tensor. + split_sizes (Optional[List[int]], optional): A list of sizes for each part of the tensor to be split. + If not provided, the tensor will be split equally among the processes. Defaults to None. + grad_scale (str, optional): Gradient scaling mode. Can be "up", "down", or None. Defaults to "down". + + Returns: + torch.Tensor: The resulting tensor after splitting and keeping only the corresponding chunk. + """ + return _SplitForwardGatherBackward.apply(input_, process_group, dim, split_sizes, grad_scale) + + +def gather_forward_split_backward( + input_: torch.Tensor, + process_group: dist.ProcessGroup, + dim: int, + gather_sizes: Optional[List[int]] = None, + grad_scale: str = "up" +) -> torch.Tensor: + """ + Gathers the input tensor from all processes in the model parallel region and concatenates them along the specified + dimension. During the backward pass, it splits the gradients and scales them according to the gradient scaling mode. + This function handles both aligned and unaligned data during the gather and scatter operations. + Args: + input_ (torch.Tensor): The input tensor to be processed. + process_group (dist.ProcessGroup): The process group to perform the operation within. + dim (int): The dimension along which to concatenate the gathered tensors. + gather_sizes (Optional[List[int]], optional): A list of sizes for each part of the tensor to be gathered. + If not provided, it is assumed that all tensors have the same shape as the input tensor. Defaults to None. + grad_scale (str, optional): Gradient scaling mode. Can be "up", "down", or None. Defaults to "up". + + Returns: + torch.Tensor: The resulting tensor after gathering and concatenating. + """ + return _GatherForwardSplitBackward.apply(input_, process_group, dim, gather_sizes, grad_scale) + + +def cal_split_sizes(dim_size, world_size): + """ + Calculate the split sizes for a given dimension size and number of processes. + + This function divides the dimension size into `world_size` parts, distributing any remainder + among the first few parts. + + Args: + dim_size (int): The total size of the dimension to be split. + world_size (int): The number of processes (or parts) to split the dimension into. + + Returns: + List[int]: A list of integers representing the size of each part after splitting. + """ + split_size = dim_size // world_size + remainder = dim_size % world_size + size_list = [split_size + (1 if i < remainder else 0) for i in range(world_size)] + return size_list diff --git a/model/train/yoco_moe/mindspeed/core/context_parallel/utils.py b/model/train/yoco_moe/mindspeed/core/context_parallel/utils.py new file mode 100644 index 000000000..f99970c59 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/context_parallel/utils.py @@ -0,0 +1,661 @@ +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import torch +import torch_npu +import torch.distributed as dist +import numpy as np +from einops import rearrange +from scipy.sparse.linalg import eigsh +from megatron.training import get_args +from megatron.core.parallel_state import get_context_parallel_global_ranks +from mindspeed.ops.npu_ring_attention_update import npu_ring_attention_update +from mindspeed.core.parallel_state import get_context_parallel_for_hybrid_ring_global_ranks +from mindspeed.op_builder import AdaptiveCpOpBuilder + + +ADAPTIVE_CP_SCHEDULING_INFO = None +ADAPTIVE_CP_SEQ_ORDER = None +CACHED_GRID_MASK = None +CACHED_SEQ = None +CACHED_MASK_LIST = [] +CACHED_SCHEDULING = None +COMM_THRESHOLD = 6 +ADAPTIVE_CP_DEFAULT_SHAPE = 1024 +ADAPTIVE_CP_MASK_LIST_SET_BY_USER = None +ADAPTIVE_CP_GRID_MASK_SET_BY_USER = None + + +# SBH -> TND +def sbh_to_tnd(x, n): + s, b, h = x.shape + d, t = h // n, int(b * s) + return x.transpose(0, 1).view(t, h).view(t, n, d) + + +# TND -> SBH +def tnd_to_sbh(x, b): + t, n, d = x.shape + s, h = t // b, int(n * d) + return x.view(b, s, n, d).transpose(0, 1).view(s, b, h) + + +def get_selection_indices_for_tnd_softmax_update(t, n, sub_seq_len): + full_indices = list(range(t * n)) + cur_seq_start_idx = 0 + indices = [] + seq_start = 0 + for seq_len in sub_seq_len: + for i in range(n): + start = seq_start + seq_len * 2 * i + seq_len + end = seq_start + seq_len * 2 * (i + 1) + indices.extend(full_indices[start:end]) + seq_start += seq_len * n * 2 + + return torch.tensor(indices) + + +def flatten_softmax(x, sub_seq_len): + orig_shape = x.shape + section_len = [s * orig_shape[1] for s in sub_seq_len] + splits = x.view(-1, orig_shape[-1]).split(section_len, dim=0) + merged = [item.view(orig_shape[1], -1, orig_shape[-1]).transpose(0, 1) for item in splits] + merged = torch.cat(merged, dim=0) + return merged + + +def unflatten_softmax(x, sub_seq_len): + orig_shape = x.shape + section_len = [s * orig_shape[1] for s in sub_seq_len] + splits = x.view(-1, orig_shape[-1]).split(section_len, dim=0) + merged = [item.view(-1, orig_shape[1], orig_shape[-1]).transpose(0, 1) \ + .view(-1, orig_shape[-1]) for item in splits] + merged = torch.cat(merged, dim=0) + return merged.view(*orig_shape) + + +def forward_update_without_fused(prev_attn_out, prev_softmax_max, prev_softmax_sum, + cur_attn_out, cur_softmax_max, cur_softmax_sum, actual_seq_qlen=None, layout='SBH'): + if layout == 'TND': + cur_softmax_max = flatten_softmax(cur_softmax_max, actual_seq_qlen) + cur_softmax_sum = flatten_softmax(cur_softmax_sum, actual_seq_qlen) + prev_softmax_max = flatten_softmax(prev_softmax_max, actual_seq_qlen) + prev_softmax_sum = flatten_softmax(prev_softmax_sum, actual_seq_qlen) + # update softmax_max + origin_dtype = prev_attn_out.dtype + softmax_max = torch.maximum(prev_softmax_max, cur_softmax_max) + prev_scale = torch.exp(prev_softmax_max - softmax_max) + cur_scale = torch.exp(cur_softmax_max - softmax_max) + + # update softmax_sum + prev_softmax_sum_scaled = prev_softmax_sum * prev_scale + cur_softmax_sum_scaled = cur_softmax_sum * cur_scale + softmax_sum = prev_softmax_sum_scaled + cur_softmax_sum_scaled + + # out updating scale + prev_out_scale = prev_softmax_sum_scaled / softmax_sum + cur_out_scale = cur_softmax_sum_scaled / softmax_sum + + # [b, n, s, 8] -> [s, b, h] + if layout == 'SBH': + n = prev_out_scale.shape[1] + h = prev_attn_out.shape[-1] + d = h // n + prev_out_scale = prev_out_scale[..., 0].unsqueeze(3).repeat(1, 1, 1, d) + prev_out_scale = rearrange(prev_out_scale, 'b n s d -> s b (n d)').contiguous() + cur_out_scale = cur_out_scale[..., 0].unsqueeze(3).repeat(1, 1, 1, d) + cur_out_scale = rearrange(cur_out_scale, 'b n s d -> s b (n d)').contiguous() + elif layout == 'TND': + d = prev_attn_out.shape[-1] + prev_out_scale = prev_out_scale[..., 0].unsqueeze(2).repeat(1, 1, d) + cur_out_scale = cur_out_scale[..., 0].unsqueeze(2).repeat(1, 1, d) + + # update output + attn_out = prev_attn_out * prev_out_scale + cur_attn_out * cur_out_scale + attn_out = attn_out.to(origin_dtype) + if layout == 'TND': + softmax_max = unflatten_softmax(softmax_max, actual_seq_qlen) + softmax_sum = unflatten_softmax(softmax_sum, actual_seq_qlen) + return attn_out, softmax_max, softmax_sum + + +class RingP2P: + def __init__(self, ring_global_ranks, group, group_for_send_recv_overlap=None, is_backward=False) -> None: + self.group = group + self.group_for_send_recv_overlap = group + if group_for_send_recv_overlap is not None: + self.group_for_send_recv_overlap = group_for_send_recv_overlap + + global_rank = dist.get_rank() + ring_rank = ring_global_ranks.index(global_rank) + ring_size = len(ring_global_ranks) + self.next = ring_global_ranks[(ring_rank + 1) % ring_size] + self.prev = ring_global_ranks[(ring_rank + ring_size - 1) % ring_size] + self.ring_rank = ring_rank + if is_backward: + self.next, self.prev = self.prev, self.next + + self.send_recv_ops = [] + + def async_send_recv(self, send_tensor, recv_tensor): + if self.ring_rank % 2 == 0: + send_op = dist.isend(send_tensor, self.next, self.group) + recv_op = dist.irecv(recv_tensor, self.prev, self.group_for_send_recv_overlap) + self.send_recv_ops.append(send_op) + self.send_recv_ops.append(recv_op) + else: + recv_op = dist.irecv(recv_tensor, self.prev, self.group) + send_op = dist.isend(send_tensor, self.next, self.group_for_send_recv_overlap) + self.send_recv_ops.append(recv_op) + self.send_recv_ops.append(send_op) + + def wait(self): + if len(self.send_recv_ops) > 0: + for op in self.send_recv_ops: + op.wait() + self.send_recv_ops = [] + return 1 + else: + return 0 + + +def forward_update(prev_attn_out, prev_softmax_max, prev_softmax_sum, + cur_attn_out, cur_softmax_max, cur_softmax_sum, actual_seq_qlen=None, layout='SBH'): + """ + Updates the attention output and softmax statistics for the ring attention mechanism, + with added parameters for enhanced flexibility and extensibility. + + This function is designed to update the attention output and related softmax statistics + for a given sequence length in a ring attention mechanism. It handles the merging of + previous and current attention outputs and their corresponding softmax statistics. + The introduction of `actual_seq_qlen` and `layout` parameters allows for greater flexibility + in handling variable sequence lengths and different tensor layouts, respectively. + + Parameters: + - prev_attn_out (Tensor): The attention output from the previous process. + - prev_softmax_max (Tensor): The maximum value of the softmax distribution from the previous process. + - prev_softmax_sum (Tensor): The sum of the softmax distribution from the previous process. + - cur_attn_out (Tensor): The attention output from the current process. + - cur_softmax_max (Tensor): The maximum value of the softmax distribution from the current process. + - cur_softmax_sum (Tensor): The sum of the softmax distribution from the current process. + - actual_seq_qlen (Tensor, optional): The actual sequence length for the query. This parameter + is crucial for handling variable-length sequences and ensuring + that the attention mechanism operates correctly under such conditions. + If not provided, it defaults to the length of the current attention output. + - layout (str, optional): The layout format of the input tensors. This parameter allows for the specification + of different tensor layouts, enhancing the function's versatility across various + model architectures. Default is 'SBH', where: + - S: Sequence length + - B: Batch size + - H: Hidden size (number of attention heads) + + Returns: + - updated_attn_out (Tensor): The updated attention output after merging previous and current process. + - updated_softmax_max (Tensor): The updated maximum value of the softmax distribution. + - updated_softmax_sum (Tensor): The updated sum of the softmax distribution. + """ + _args = get_args() + if hasattr(_args, 'use_fused_ring_attention_update') and _args.use_fused_ring_attention_update: + def accumulate_list(input_list): + """ + 借助numpy库将列表转换为numpy数组进行元素累加,再转换回列表并在开头添加0 + """ + np_array = np.array(input_list) + cumsum_result = np.cumsum(np_array) + return torch.tensor([0] + list(cumsum_result), dtype=torch.int64).to(prev_attn_out.device) + + if layout == "TND": + actual_seq_qlen = accumulate_list(actual_seq_qlen) + return npu_ring_attention_update(prev_attn_out, prev_softmax_max, prev_softmax_sum, cur_attn_out, + cur_softmax_max, cur_softmax_sum, actual_seq_qlen, layout) + + return forward_update_without_fused(prev_attn_out, prev_softmax_max, prev_softmax_sum, cur_attn_out, + cur_softmax_max, cur_softmax_sum, actual_seq_qlen, layout) + + +def tnd_out_update(q_block_id, kv_block_id, cur_attn_outs, global_attn_outs, q_index, softmax_indices, cur_sub_out_seq_len): + cur_attn_out, cur_softmax_max, cur_softmax_sum = cur_attn_outs[0], cur_attn_outs[1], cur_attn_outs[2] + attn_out, softmax_max, softmax_sum, rng_states = global_attn_outs + + layout = 'TND' + + if len(cur_attn_outs) > 3: + rng_states[kv_block_id] = (cur_attn_outs[4], cur_attn_outs[5], cur_attn_outs[6]) + + if q_block_id == kv_block_id: + attn_out = cur_attn_out + softmax_max = cur_softmax_max + softmax_sum = cur_softmax_sum + elif kv_block_id <= q_block_id: + attn_out_updated, softmax_max_updated, softmax_sum_updated = forward_update( + attn_out, softmax_max, softmax_sum, + cur_attn_out, cur_softmax_max, cur_softmax_sum, actual_seq_qlen=cur_sub_out_seq_len, layout=layout + ) + attn_out, softmax_max, softmax_sum = attn_out_updated, softmax_max_updated, softmax_sum_updated + else: + n = attn_out.shape[1] + t = attn_out.shape[0] + prev_softmax_max = softmax_max.view(-1, 8)[softmax_indices].view(-1, n, 8) + prev_softmax_sum = softmax_sum.view(-1, 8)[softmax_indices].view(-1, n, 8) + + attn_out_updated, softmax_max_updated, softmax_sum_updated = forward_update( + torch.index_select(attn_out, 0, q_index), prev_softmax_max, prev_softmax_sum, + cur_attn_out, cur_softmax_max, cur_softmax_sum, actual_seq_qlen=cur_sub_out_seq_len, layout=layout + ) + attn_out.index_copy_(0, q_index, attn_out_updated) + softmax_max = softmax_max.view(-1, 8).index_copy(0, softmax_indices, softmax_max_updated.view(-1, 8)).view(-1, n, 8) + softmax_sum = softmax_sum.view(-1, 8).index_copy(0, softmax_indices, softmax_sum_updated.view(-1, 8)).view(-1, n, 8) + + + return [attn_out, softmax_max, softmax_sum, rng_states] + + +def causal_out_update(q_block_id, kv_block_id, cur_attn_outs, global_attn_outs): + cur_attn_out, cur_softmax_max, cur_softmax_sum = cur_attn_outs[0], cur_attn_outs[1], cur_attn_outs[2] + attn_out, softmax_max, softmax_sum, rng_states = global_attn_outs + layout = 'SBH' + if len(cur_attn_outs) > 3: + rng_states[kv_block_id] = (cur_attn_outs[4], cur_attn_outs[5], cur_attn_outs[6]) + + if q_block_id == kv_block_id: + attn_out = cur_attn_out + softmax_max = cur_softmax_max + softmax_sum = cur_softmax_sum + elif kv_block_id <= q_block_id: + attn_out_updated, softmax_max_updated, softmax_sum_updated = forward_update( + attn_out, softmax_max, softmax_sum, + cur_attn_out, cur_softmax_max, cur_softmax_sum, actual_seq_qlen=None, layout=layout + ) + attn_out, softmax_max, softmax_sum = attn_out_updated, softmax_max_updated, softmax_sum_updated + else: + # [2s, b, h] -> [2, s, b, h] + attn_out = attn_out.view(2, attn_out.shape[0] // 2, *attn_out.shape[1:]) + # [b, n, 2s, 8] -> [b, n, 2, s, 8] + softmax_max = softmax_max.view(softmax_max.shape[0], softmax_max.shape[1], + 2, softmax_max.shape[2] // 2, softmax_max.shape[-1]) + softmax_sum = softmax_sum.view(softmax_sum.shape[0], softmax_sum.shape[1], + 2, softmax_sum.shape[2] // 2, softmax_sum.shape[-1]) + attn_out_updated, softmax_max_updated, softmax_sum_updated = forward_update( + attn_out[1], softmax_max[:, :, 1, :, :], softmax_sum[:, :, 1, :, :], + cur_attn_out, cur_softmax_max, cur_softmax_sum, actual_seq_qlen=None, layout=layout + ) + attn_out[1].copy_(attn_out_updated) + softmax_max[:, :, 1, :, :].copy_(softmax_max_updated) + softmax_sum[:, :, 1, :, :].copy_(softmax_sum_updated) + # [2, s, b, h] -> [2s, b, h] + attn_out = attn_out.view(-1, *attn_out.shape[2:]) + # [b, n, 2, s, 8] -> [b, n, 2s, 8] + softmax_max = softmax_max.view(softmax_max.shape[0], softmax_max.shape[1], -1, + softmax_max.shape[-1]) + softmax_sum = softmax_sum.view(softmax_sum.shape[0], softmax_sum.shape[1], -1, + softmax_sum.shape[-1]) + + return [attn_out, softmax_max, softmax_sum, rng_states] + + +def general_out_update(q_block_id, kv_block_id, cur_attn_outs, global_attn_outs): + cur_attn_out, cur_softmax_max, cur_softmax_sum = cur_attn_outs[0], cur_attn_outs[1], cur_attn_outs[2] + attn_out, softmax_max, softmax_sum, rng_states = global_attn_outs + layout = 'SBH' + rng_states[kv_block_id] = (cur_attn_outs[4], cur_attn_outs[5], cur_attn_outs[6]) + if q_block_id == kv_block_id: + attn_out = cur_attn_out + softmax_max = cur_softmax_max + softmax_sum = cur_softmax_sum + else: + attn_out_updated, softmax_max_updated, softmax_sum_updated = forward_update( + attn_out, softmax_max, softmax_sum, + cur_attn_out, cur_softmax_max, cur_softmax_sum, layout=layout + ) + attn_out, softmax_max, softmax_sum = attn_out_updated, softmax_max_updated, softmax_sum_updated + + return [attn_out, softmax_max, softmax_sum, rng_states] + + +class SchedulingInfo: + def __init__(self, round_idx, recv_q_src: int = -1, recv_kv_src: int = -1, recv_o_src: list = None, + send_q_dst=None, send_kv_dst: list = None, send_o_dst: int = -1, comm_unit_limit=6): + self.round_idx = round_idx + self.recv_q_src = recv_q_src # 下一轮计算需要的来自别处的Q,-1代表不需要 + self.recv_kv_src = recv_kv_src # 下一轮计算需要的来自别处的KV,-1代表不需要 + self.recv_o_src = [] if recv_o_src is None else recv_o_src # 本轮计算中哪些device帮本机算了 + self.send_q_dst = [] if send_q_dst is None else send_q_dst # 下一轮计算中哪些device需要本机的Q + self.send_kv_dst = [] if send_kv_dst is None else send_kv_dst # 下一轮计算中哪些device需要本机的KV + self.send_o_dst = send_o_dst # 本轮计算帮哪个device算 + self.comm_unit_limit = comm_unit_limit + self.cnt_comm_unit_forward = -1 + self.check_eligibility() + + def check_eligibility(self): + # 检查不能同时收Q和KV + if self.recv_q_src > -1 and self.recv_kv_src > -1: + raise ValueError("only receive one of q and kv in a single round") + # 检查总通信量是否符合限制 + self.count_comm_units() + if self.cnt_comm_unit_forward > self.comm_unit_limit: + raise ValueError(f"comm unit exceed limit: round {self.round_idx}, device {torch.npu.current_device()}") + + def count_comm_units(self): + sum_recv_units = self.recv_q_src > -1 + (self.recv_kv_src > -1) * 2 + len(self.recv_o_src) + sum_send_units = len(self.send_q_dst) + len(self.send_kv_dst) * 2 + self.send_o_dst > -1 + self.cnt_comm_unit_forward = sum_recv_units + sum_send_units + + +def coarsen_attn_mask_npu(attn_mask, coarse_ratio): + # 输出mask中0为需要计算的,1为不需要计算的 + orig_size = attn_mask.shape[0] + attn_mask_reshaped = (~attn_mask) + attn_mask_reshaped = attn_mask_reshaped.view(orig_size // coarse_ratio, coarse_ratio, + orig_size // coarse_ratio, coarse_ratio).permute(0, 2, 1, 3) + coarse_attn_mask = ~torch.any(torch.any(attn_mask_reshaped, dim=3), dim=2) + return coarse_attn_mask + + +def set_scheduling_info(cp_rank, scheduling): + global ADAPTIVE_CP_SCHEDULING_INFO + if ADAPTIVE_CP_SCHEDULING_INFO is None or get_args().adaptive_cp_dynamic_attn_mask: + ADAPTIVE_CP_SCHEDULING_INFO = process_scheduling_info(cp_rank, scheduling)[1:] + + +def get_scheduling_info(): + if ADAPTIVE_CP_SCHEDULING_INFO is None: + raise RuntimeError("Trying to get scheduling info before setting it, ADAPTIVE_CP_SCHEDULING_INFO is still None") + return ADAPTIVE_CP_SCHEDULING_INFO + + +def set_remapped_seq_order(seq_order): + global ADAPTIVE_CP_SEQ_ORDER + ADAPTIVE_CP_SEQ_ORDER = seq_order + + +def get_remapped_seq_order(): + if ADAPTIVE_CP_SEQ_ORDER is None: + raise RuntimeError("Trying to get optimized sequence before setting it, ADAPTIVE_CP_SEQ_ORDER is still None") + return ADAPTIVE_CP_SEQ_ORDER + + +def set_adaptive_cp_mask_list_by_user(mask_list): + global ADAPTIVE_CP_MASK_LIST_SET_BY_USER + ADAPTIVE_CP_MASK_LIST_SET_BY_USER = mask_list + + +def get_adaptive_cp_mask_list_by_user(): + global ADAPTIVE_CP_MASK_LIST_SET_BY_USER + if ADAPTIVE_CP_MASK_LIST_SET_BY_USER is None: + raise RuntimeError("Trying to get mask list before setting it, ADAPTIVE_CP_MASK_LIST_SET_BY_USER is still None") + return ADAPTIVE_CP_MASK_LIST_SET_BY_USER + + +def generate_adaptive_cp_mask_list_by_user(opt_seq, scheduling_info, cp_rank, cp_size): + mask_list = None # replace with customized function to generate mask list + set_adaptive_cp_mask_list_by_user(mask_list) + + +def set_adaptive_cp_grid_mask_by_user(grid_mask): + global ADAPTIVE_CP_GRID_MASK_SET_BY_USER + ADAPTIVE_CP_GRID_MASK_SET_BY_USER = grid_mask + + +def get_adaptive_cp_grid_mask_by_user(): + global ADAPTIVE_CP_GRID_MASK_SET_BY_USER + if ADAPTIVE_CP_GRID_MASK_SET_BY_USER is None: + raise RuntimeError("Trying to get grid mask before setting it, ADAPTIVE_CP_GRID_MASK_SET_BY_USER is still None") + return ADAPTIVE_CP_GRID_MASK_SET_BY_USER + + +def generate_adaptive_cp_grid_mask_by_user(cp_size): + grid_mask = None # replace with customized function to generate grid mask + set_adaptive_cp_grid_mask_by_user(grid_mask) + + +def process_scheduling_info(local_rank, orig_scheduling, comm_limit=6): + round_num = len(orig_scheduling) + device_num = len(orig_scheduling[0]) + processed_scheduling_info = [SchedulingInfo(round_idx=i, comm_unit_limit=comm_limit) for i in range(round_num + 1)] + for rnd_idx in range(round_num): + process_single_scheduling_info(local_rank, device_num, rnd_idx, orig_scheduling[rnd_idx], + processed_scheduling_info) + return processed_scheduling_info + + +def process_single_scheduling_info(local_rank, device_num, round_idx, round_scheduling_info, processed_scheduling_info): + if get_args().context_parallel_algo == 'adaptive_cp_algo': + rank_list = get_context_parallel_global_ranks() + else: + rank_list = get_context_parallel_for_hybrid_ring_global_ranks() + for execute_device_id, task_id in enumerate(round_scheduling_info): # 当前任务和实际执行当前任务的设备 + if task_id == -1: + continue + origin_device_id = rank_list[int(task_id / device_num)] # 原本应该执行当前任务的设备 + kv_device_id = rank_list[task_id % device_num] # 存储当前任务kv的设备 + execute_device_id = rank_list[execute_device_id] + if execute_device_id != origin_device_id: # 需要收发qo + if execute_device_id == local_rank: # 当前rank对应的device是执行任务的device + processed_scheduling_info[round_idx].recv_q_src = origin_device_id + processed_scheduling_info[round_idx + 1].send_o_dst = origin_device_id + elif origin_device_id == local_rank: # 当前rank对应的device是原始的device + processed_scheduling_info[round_idx].send_q_dst.append(execute_device_id) + processed_scheduling_info[round_idx + 1].recv_o_src.append(execute_device_id) + else: # 需要收发kv + if execute_device_id == local_rank: # 当前rank对应的device是执行任务的device + processed_scheduling_info[round_idx].recv_kv_src = kv_device_id + elif kv_device_id == local_rank: # 当前rank对应的device是存储kv的device + processed_scheduling_info[round_idx].send_kv_dst.append(execute_device_id) + processed_scheduling_info[round_idx].check_eligibility() + + +def adaptive_reschedule_task(grid_mask, cp_size): + scheduling_info = [] + total_task = torch.sum(grid_mask) + round_idx = 0 + next_comm = np.zeros(cp_size) + while total_task > 0: + scheduling_info.append([-1 for _ in range(cp_size)]) + cur_comm = next_comm + next_comm = np.zeros(cp_size) + total_task -= execute_scheduling(grid_mask, cp_size, round_idx, cur_comm, next_comm, scheduling_info[round_idx]) + round_idx += 1 + return scheduling_info + + +def execute_scheduling(grid_mask, cp_size, round_idx, cur_comm, next_comm, scheduling_info): + count = 0 + is_free = np.ones(cp_size) + for device_id in range(cp_size): + row, col = find_kv_task(grid_mask, cp_size, round_idx, cur_comm, device_id, is_free) + if row != -1 and col != -1: + scheduling_info[device_id] = row * cp_size + col + grid_mask[row][col] = 0 + count += 1 + is_send_q = np.zeros(cp_size, dtype=int) + for device_id in range(cp_size): + if is_free[device_id] == 0: + continue + row, col = find_qo_task(grid_mask, cp_size, cur_comm, next_comm, device_id, is_send_q) + if row != -1 and col != -1: + scheduling_info[device_id] = row * cp_size + col + grid_mask[row][col] = 0 + count += 1 + return count + + +def find_kv_task(grid_mask, cp_size, round_idx, cur_comm, device_id, is_free): + is_free[device_id] = 0 + row = device_id + col = (device_id + round_idx) % cp_size + if grid_mask[row][col] == 1: + cur_comm[row] = cur_comm[row] + 2 # recv KV + cur_comm[col] = cur_comm[col] + 2 # send KV + return row, col + for i in range(1, cp_size): # find kv task + row = device_id + col = (device_id - i + cp_size) % cp_size + if grid_mask[row][col] == 1 and cur_comm[row] <= COMM_THRESHOLD - 2 and cur_comm[col] <= COMM_THRESHOLD - 2: + cur_comm[row] += 2 # recv KV + cur_comm[col] += 2 # send KV + return row, col + is_free[device_id] = 1 + return -1, -1 + + +def find_qo_task(grid_mask, cp_size, cur_comm, next_comm, device_id, is_send_q): + for i in range(1, cp_size): # find qo task + row = (device_id + i) % cp_size + col = device_id + if grid_mask[row][col] == 1 and cur_comm[row] <= COMM_THRESHOLD - 1 and \ + cur_comm[col] <= COMM_THRESHOLD - 1 and is_send_q[row] != 1: + is_send_q[row] = 1 + cur_comm[row] += 1 # send Q + cur_comm[col] += 1 # recv Q + next_comm[row] += 1 # recv O + next_comm[col] += 1 # send O + return row, col + return -1, -1 + + +def clear_global_info(): + global CACHED_SEQ, CACHED_GRID_MASK, CACHED_MASK_LIST, CACHED_SCHEDULING, ADAPTIVE_CP_SCHEDULING_INFO + CACHED_SEQ, CACHED_GRID_MASK, CACHED_MASK_LIST, CACHED_SCHEDULING, ADAPTIVE_CP_SCHEDULING_INFO = (None, None, [], + None, None) + + +class AdaptiveCpOps: + def __init__(self): + self.ops = AdaptiveCpOpBuilder().load() + + def coarsen_attn_mask_cpu(self, attn_mask, sampling_ratio): + if not attn_mask.is_contiguous(): + attn_mask = attn_mask.contiguous() + mask_size_after_sampling = attn_mask.shape[0] // sampling_ratio + coarse_mask = torch.ones((mask_size_after_sampling, mask_size_after_sampling), dtype=torch.bool) + self.ops.coarsen_mask(attn_mask, mask_size_after_sampling, coarse_mask) + return coarse_mask + + def get_grid_mask(self, attn_mask, cp_size): + if not attn_mask.is_contiguous(): + attn_mask = attn_mask.contiguous() + if get_args().attention_mask_on_cpu: + grid_mask = torch.ones((cp_size, cp_size), dtype=torch.bool) + self.ops.coarsen_mask(attn_mask, cp_size, grid_mask) + else: + grid_mask = coarsen_attn_mask_npu(attn_mask, attn_mask.shape[0] // cp_size) + grid_mask = ~grid_mask + return grid_mask + + def search_kmeans_cpu(self, attn_mask, reduced_mask, cp_size, num_iters=100): + tmp_attn_mask = torch.ones_like(attn_mask) + tmp_grid_mask = torch.ones((cp_size, cp_size), dtype=torch.bool) + optimal_attn_mask = torch.ones_like(attn_mask) + optimal_grid_mask = torch.ones((cp_size, cp_size), dtype=torch.bool) + optimal_num_cluster = [-1] + optimal_sorted_indices = self.ops.search_kmeans(attn_mask, reduced_mask, tmp_attn_mask, tmp_grid_mask, + optimal_grid_mask, optimal_attn_mask, + optimal_num_cluster, cp_size, num_iters) + return optimal_sorted_indices, optimal_grid_mask, optimal_attn_mask, optimal_num_cluster + + def adaptive_remap(self, attn_mask, cp_size, truncated_dim=10): + args = get_args() + if attn_mask.dim() != 2 or attn_mask.shape[0] != attn_mask.shape[1]: + raise RuntimeError("Only 2-dimensional self-attention mask supported in adaptive cp") + + if args.adaptive_cp_without_coarse: + sampling_ratio = 1 + if args.attention_mask_on_cpu: + coarse_mask = attn_mask + else: + coarse_mask = attn_mask.cpu() + else: + if attn_mask.shape[0] % ADAPTIVE_CP_DEFAULT_SHAPE != 0: + raise RuntimeError("Shape of attention mask needs to be a multiple of 1024 if not enable " + "args.adaptive_cp_without_coarse in adaptive cp") + if args.attention_mask_on_cpu: + sampling_ratio = attn_mask.shape[0] // ADAPTIVE_CP_DEFAULT_SHAPE + coarse_mask = self.coarsen_attn_mask_cpu(attn_mask, sampling_ratio) + else: + sampling_ratio = attn_mask.shape[0] // ADAPTIVE_CP_DEFAULT_SHAPE + coarse_mask = coarsen_attn_mask_npu(attn_mask, sampling_ratio).cpu() + + coarse_mask_np = coarse_mask.to(torch.float16).numpy() + mean_matrix = np.mean(coarse_mask_np, axis=0) + centered_matrix = (coarse_mask_np - mean_matrix).astype(float) + cov_matrix = np.matmul(centered_matrix.T, centered_matrix) + eigenvalues, eigenvectors = eigsh(cov_matrix, k=truncated_dim, which='LM') + feature_matrix = np.matmul(coarse_mask_np, eigenvectors).tolist() + + optimal_seq, optimal_grid_mask, optimal_coarsen_attn_mask, optimal_num_cluster = ( + self.search_kmeans_cpu(coarse_mask, feature_matrix, cp_size)) + + if args.adaptive_cp_without_coarse: + final_opt_seq = optimal_seq + else: + final_opt_seq = sampling_ratio * torch.tensor(optimal_seq)[:, None] + torch.arange(sampling_ratio) + final_opt_seq = final_opt_seq.view(-1).tolist() + + optimal_grid_mask = ~optimal_grid_mask + + return optimal_grid_mask, final_opt_seq + + def get_adaptive_cp_info(self, attn_mask, cp_size): + args = get_args() + global CACHED_GRID_MASK, CACHED_SEQ + if args.attention_mask_on_cpu != (attn_mask.device.type == 'cpu'): + raise RuntimeError("args.attention_mask_on_cpu does not match the device of set attention mask") + + # 生成重映射后的序列和重排后的gird mask,输出tensor(npu/cpu) opt_grid_mask和list opt_seq + if not args.adaptive_cp_only_reschedule: + if args.adaptive_cp_dynamic_attn_mask or CACHED_GRID_MASK is None: + opt_grid_mask, opt_seq = self.adaptive_remap(attn_mask, cp_size) + if not args.adaptive_cp_dynamic_attn_mask: + CACHED_GRID_MASK, CACHED_SEQ = opt_grid_mask, opt_seq + else: + opt_grid_mask, opt_seq = CACHED_GRID_MASK, CACHED_SEQ + else: + opt_seq = list(range(attn_mask.shape[0])) + if args.adaptive_cp_dynamic_attn_mask or CACHED_GRID_MASK is None: + opt_grid_mask = self.get_grid_mask(attn_mask, cp_size) + CACHED_GRID_MASK = opt_grid_mask + else: + opt_grid_mask = CACHED_GRID_MASK + + # 生成调度方案 + opt_scheduling = adaptive_reschedule_task(opt_grid_mask, cp_size) + + return opt_seq, opt_scheduling + + def get_mask_list(self, attn_mask, opt_scheduling, opt_seq, cp_rank, cp_size): + args = get_args() + global CACHED_MASK_LIST + if not args.adaptive_cp_dynamic_attn_mask and len(CACHED_MASK_LIST) > 0: + return CACHED_MASK_LIST + round_num = len(opt_scheduling) + grid_size = attn_mask.shape[0] // cp_size + mask_list = [] + + for rnd_idx in range(round_num): + task_id = opt_scheduling[rnd_idx][cp_rank] + if task_id == -1: + mask_list.append(None) + continue + q_device_id = task_id // cp_size + kv_device_id = task_id % cp_size + if args.attention_mask_on_cpu: + mask_list.append(torch.empty((grid_size, grid_size), dtype=torch.bool, device='cpu')) + if args.adaptive_cp_only_reschedule: + grid_inds = [q_device_id, kv_device_id] + self.ops.get_mask_list_without_remap(attn_mask, mask_list[rnd_idx], grid_inds, cp_size) + else: + q_token_list = opt_seq[grid_size * q_device_id: grid_size * (q_device_id + 1)] + kv_token_list = opt_seq[grid_size * kv_device_id: grid_size * (kv_device_id + 1)] + self.ops.get_mask_list_with_remap(attn_mask, mask_list[rnd_idx], q_token_list, kv_token_list) + else: + q_token_list = opt_seq[grid_size * q_device_id: grid_size * (q_device_id + 1)] + kv_token_list = opt_seq[grid_size * kv_device_id: grid_size * (kv_device_id + 1)] + mask_list.append(attn_mask[q_token_list, :][:, kv_token_list]) + + if args.attention_mask_on_cpu: + for rnd_idx in range(round_num): + if mask_list[rnd_idx] is not None: + mask_list[rnd_idx] = mask_list[rnd_idx].npu(non_blocking=True) + + CACHED_MASK_LIST = mask_list + return mask_list + \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/data_parallel/distributed_data_parallel.py b/model/train/yoco_moe/mindspeed/core/data_parallel/distributed_data_parallel.py new file mode 100644 index 000000000..1292a347a --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/data_parallel/distributed_data_parallel.py @@ -0,0 +1,455 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Bytedance Inc. All rights reserved. +import logging +from functools import wraps +from collections import deque +from megatron.core.tensor_parallel import ColumnParallelLinear, RowParallelLinear +from megatron.core.parallel_state import ( + get_data_parallel_world_size, + get_data_parallel_group, + get_tensor_model_parallel_world_size, + get_global_memory_buffer) +from megatron.legacy.model.transformer import FlashSelfAttention + +from megatron.training import get_args +from megatron.core.distributed.distributed_data_parallel import DistributedDataParallel, logger +from megatron.core.distributed.param_and_grad_buffer import ParamAndGradBuffer +from megatron.core import parallel_state +from megatron.core.utils import log_single_rank +import torch + + +@torch.no_grad() +def all_gather_param(param, wait_buffer): + dp_size = get_data_parallel_world_size() + group = get_data_parallel_group() + dim_size = list(param.data.size()) + dim_size[0] = dim_size[0] * dp_size + param.ds_tensor = param.data + param.data = torch.empty(dim_size, dtype=param.data.dtype, device=torch.cuda.current_device()) + wait_buffer.append(torch.distributed._all_gather_base(param.data, param.ds_tensor.contiguous(), async_op=True, group=group)) + + +@torch.no_grad() +def reduce_scatter_grad(param, wait_grad_buffer): + dp_size = get_data_parallel_world_size() + scale = 1.0 + if dp_size > 0 : + scale = scale / dp_size + param.full_grad.data *= scale + group = get_data_parallel_group() + param.grad_data_buffer = torch.empty(param.ds_tensor.shape, dtype=param.full_grad.dtype, device=torch.cuda.current_device()) + wait_grad_buffer.append(torch.distributed._reduce_scatter_base(param.grad_data_buffer, param.full_grad.data.contiguous(), async_op=True, group=group)) + + +@torch.no_grad() +def release_param_data(param): + param.data = param.ds_tensor + + +def wait_grad(param, wait_grad_buffer): + wait_grad_buffer.popleft().wait() + param.main_grad.add_(param.grad_data_buffer) + param.grad_data_buffer = None + param.full_grad = None + param.grad = None + + +def set_model_fw_bw_hook(modules): + wait_buffer = deque() + wait_grad_buffer = deque() + dp_size = get_data_parallel_world_size() + if dp_size == 1: + return + module_list = [] + fa_module = False + for module in modules: + fa_module |= isinstance(module, FlashSelfAttention) + if isinstance(module, (ColumnParallelLinear, RowParallelLinear)): + module.pre_module_id = module.next_module_id = None + module_list.append(module) + if fa_module: + # Send h_to_4h information in advance for communication masking. + module.light_weight = True + fa_module = False + if len(module_list) > 0: + module_list[0].zero_start = True + module_list[-1].zero_end = True + for i in range(len(module_list) - 1): + module_list[i].next_module_id = i + 1 + module_list[i + 1].pre_module_id = i + + + def forward_pre_hook(module, *arg): + if hasattr(module, 'zero_start'): + all_gather_param(module.weight, wait_buffer) + wait_buffer.popleft().wait() + if hasattr(module, 'light_weight'): + return + next_module_id = module.next_module_id + if next_module_id is not None: + next_module = module_list[next_module_id] + all_gather_param(next_module.weight, wait_buffer) + if hasattr(next_module, 'light_weight') and next_module.next_module_id is not None: + all_gather_param(module_list[next_module.next_module_id].weight, wait_buffer) + + + def forward_hook(module, *args): + release_param_data(module.weight) + + + def backward_pre_hook(module, *args): + if hasattr(module, 'zero_end'): + all_gather_param(module.weight, wait_buffer) + wait_buffer.popleft().wait() + if hasattr(module, 'light_weight'): + return + pre_module_id = module.pre_module_id + if pre_module_id is not None: + pre_module = module_list[pre_module_id] + all_gather_param(pre_module.weight, wait_buffer) + if hasattr(pre_module, 'light_weight') and pre_module.pre_module_id is not None: + all_gather_param(module_list[pre_module.pre_module_id].weight, wait_buffer) + + + def backward_hook(module, *arg): + release_param_data(module.weight) + reduce_scatter_grad(module.weight, wait_grad_buffer) + if hasattr(module, 'light_weight'): + return + next_module_id = module.next_module_id + if next_module_id is not None: + next_module = module_list[next_module_id] + if hasattr(next_module, 'light_weight') and next_module.next_module_id is not None: + wait_grad(module_list[next_module.next_module_id].weight, wait_grad_buffer) + wait_grad(next_module.weight, wait_grad_buffer) + if hasattr(module, 'zero_start'): + wait_grad(module.weight, wait_grad_buffer) + + for module in module_list: + module.register_forward_pre_hook(hook=forward_pre_hook) + module.register_forward_hook(hook=forward_hook) + module.register_full_backward_pre_hook(hook=backward_pre_hook) + module.register_full_backward_hook(hook=backward_hook) + + +def distributed_data_parallel_init_zero3( + self, + config, + module, + data_parallel_group, + accumulate_allreduce_grads_in_fp32: bool, + overlap_grad_reduce: bool, + use_distributed_optimizer: bool, + expert_data_parallel_group, + disable_bucketing: bool = False, + check_for_nan_in_grad: bool = False, + bucket_size: int = 40000000, +): + super(DistributedDataParallel, self).__init__(config) + self.module = module + if get_args().enable_zero3: + set_model_fw_bw_hook(self.module.modules()) + + # Set bucket_size to infinity if overlap_grad_reduce is False. + self.overlap_grad_reduce = overlap_grad_reduce + self.use_distributed_optimizer = use_distributed_optimizer + + # Turn off bucketing if overlap_grad_reduce is False, if we are on a pipeline stage + # that is not the first (since data-parallel communication on these stages is not on + # the critical path), or if disable_bucketing is True (e.g., we might not want to + # break up model parameters into buckets for model chunks after the first + # in the interleaved schedule). + if not self.overlap_grad_reduce: + bucket_size = None + if parallel_state.get_pipeline_model_parallel_rank() > 0: + bucket_size = None + if disable_bucketing: + bucket_size = None + + self.check_for_nan_in_grad = check_for_nan_in_grad + self.bucket_size = bucket_size + + self.module = module + self.param_to_buffer = {} + self.zero3_param = [] + + # Group parameters by their gradient type. + param_to_name = {} + dense_params = [] + expert_parallel_params = [] + for name, param in self.module.named_parameters(): + if not param.requires_grad: + continue + dtype = param.dtype + param.grad_added_to_main_grad = False + param_to_name[param] = name + + if hasattr(param, 'enable_zero3') and param.enable_zero3: + param.main_grad = torch.zeros_like(param, dtype=dtype) + self.zero3_param.append(param) + continue + + if getattr(param, 'allreduce', True): + dense_params.append(param) + else: + expert_parallel_params.append(param) + + + def allocate_buffers_for_parameters( + input_params, data_parallel_group, gradient_scaling_factor=1.0, + ): + param_and_grad_dtype_to_params = {} + + # Group parameters by their gradient type. + for param in input_params: + if not param.requires_grad: + continue + + param_dtype = param.dtype + grad_dtype = torch.float if accumulate_allreduce_grads_in_fp32 else param.dtype + + params = param_and_grad_dtype_to_params.get((param_dtype, grad_dtype), []) + params.append(param) + param_and_grad_dtype_to_params[(param_dtype, grad_dtype)] = params + + # Allocate the grad buffers and map the grads. + buffers = [] + for (param_dtype, grad_dtype), params in param_and_grad_dtype_to_params.items(): + buffers.append( + ParamAndGradBuffer( + param_dtype, + grad_dtype, + params, + data_parallel_group, + bucket_size, + param_to_name, + self.overlap_grad_reduce, + self.use_distributed_optimizer, + gradient_scaling_factor, + self.check_for_nan_in_grad, + ) + ) + for param in params: + self.param_to_buffer[param] = buffers[-1] + + return buffers + + data_parallel_world_size = torch.distributed.get_world_size(data_parallel_group) + # Allocate the param+grad buffers for dense params' grads. + self.buffers = allocate_buffers_for_parameters( + dense_params, + data_parallel_group, + gradient_scaling_factor=1.0 / data_parallel_world_size, + ) + + # Allocate separate param+grad buffers for expert parallel params' grads. + self.expert_parallel_buffers = allocate_buffers_for_parameters( + expert_parallel_params, + expert_data_parallel_group, + gradient_scaling_factor=1.0 / data_parallel_world_size, + ) + + # Delete references to weight_tensor if they exist since we don't want two parameter copies + # if we re-mapped parameters (which happens when we use the distributed optimizer). + # This is a temporary workaround around a TE bug that is fixed with + # https://github.com/NVIDIA/TransformerEngine/pull/719. + if self.use_distributed_optimizer: + + @torch.no_grad() + def unmap_weight_tensor(m): + if hasattr(m, 'weight_tensor'): + m.weight_tensor = None + + self.module.apply(unmap_weight_tensor) + + # Register backward hook. + # Accumulation function for the gradients need to be stored so they + # don't go out of scope. + self.grad_accs = [] + for param in self.module.parameters(): + if param.requires_grad: + # Expand so we get access to grad_fn. + param_tmp = param.expand_as(param) + # Get the gradient accumulator function. + grad_acc = param_tmp.grad_fn.next_functions[0][0] + if not (hasattr(param, 'enable_zero3') and param.enable_zero3): + grad_acc.register_hook(self._make_param_hook(param, self.param_to_buffer)) + self.grad_accs.append(grad_acc) + + +def distributed_data_parallel_zero_grad_wrapper(function): + @wraps(function) + def distributed_data_parallel_zero_grad(self, *args, **kwargs): + function(self, *args, **kwargs) + for p in self.zero3_param: + p.main_grad.data.zero_() + return distributed_data_parallel_zero_grad + + +def distributed_data_parallel_init_with_cp( + self, + config, + ddp_config, + module: torch.nn.Module, + disable_bucketing: bool = False, +): + super(DistributedDataParallel, self).__init__(config) + self.module = module + + # If bucket_size is not provided as an input, use sane default. + # If using very large dp_sizes, make buckets larger to ensure that chunks used in NCCL + # ring-reduce implementations are large enough to remain bandwidth-bound rather than + # latency-bound. + if ddp_config.bucket_size is None: + ddp_config.bucket_size = max( + 40000000, 1000000 * parallel_state.get_data_parallel_world_size() + ) + # Set bucket_size to infinity if overlap_grad_reduce is False. + if not ddp_config.overlap_grad_reduce: + ddp_config.bucket_size = None + + self.ddp_config = ddp_config + log_single_rank( + logger, + logging.INFO, + f'Setting up DistributedDataParallel with config {self.ddp_config}', + ) + + # Turn off bucketing if we are on a pipeline stage that is not the first (since + # data-parallel communication on these stages is not on the critical path), or if + # disable_bucketing is True (e.g., we might not want to break up model parameters + # into buckets for model chunks after the first in the interleaved schedule). + self.bucket_size = self.ddp_config.bucket_size + if parallel_state.get_pipeline_model_parallel_rank() > 0: + self.bucket_size = None + if disable_bucketing: + self.bucket_size = None + + self.module = module + self.param_to_buffer = {} + + # Group parameters by their gradient type. + param_to_name = {} + dense_params = [] + expert_parallel_params = [] + for name, param in self.module.named_parameters(): + if not param.requires_grad: + continue + + param.grad_added_to_main_grad = False + param_to_name[param] = name + + if getattr(param, 'allreduce', True): + dense_params.append(param) + else: + expert_parallel_params.append(param) + + def allocate_buffers_for_parameters( + input_params, + data_parallel_group, + gradient_scaling_factor, + ): + param_and_grad_dtype_to_params = {} + + # Group parameters by their gradient type. + for param in input_params: + if not param.requires_grad: + continue + + param_dtype = param.dtype + grad_dtype = torch.float if self.ddp_config.grad_reduce_in_fp32 else param.dtype + + params = param_and_grad_dtype_to_params.get((param_dtype, grad_dtype), []) + params.append(param) + param_and_grad_dtype_to_params[(param_dtype, grad_dtype)] = params + + if not config.calculate_per_token_loss: + target_gradient_scaling_factor = 1.0 / parallel_state.get_data_parallel_world_size( + with_context_parallel=True + ) + if self.ddp_config.average_in_collective: + # Collective is averaging gradients in collective with data_parallel_group. + assert ( + gradient_scaling_factor + / torch.distributed.get_world_size(group=data_parallel_group) + == target_gradient_scaling_factor + ) + else: + assert gradient_scaling_factor == target_gradient_scaling_factor + + # Allocate the grad buffers and map the grads. + buffers = [] + for (param_dtype, grad_dtype), params in param_and_grad_dtype_to_params.items(): + buffers.append( + ParamAndGradBuffer( + self.ddp_config, + param_dtype, + grad_dtype, + params, + data_parallel_group, + self.bucket_size, + param_to_name, + gradient_scaling_factor, + ) + ) + for param in params: + self.param_to_buffer[param] = buffers[-1] + + return buffers + + if config.calculate_per_token_loss: + gradient_scaling_factor = 1.0 + expert_gradient_scaling_factor = 1.0 + else: + if self.ddp_config.average_in_collective: + gradient_scaling_factor = 1.0 + expert_gradient_scaling_factor = ( + 1.0 / parallel_state.get_expert_model_parallel_world_size() + ) + else: + data_parallel_world_size = parallel_state.get_data_parallel_world_size( + with_context_parallel=True + ) + gradient_scaling_factor = 1.0 / data_parallel_world_size + expert_gradient_scaling_factor = 1.0 / data_parallel_world_size + + # Allocate the param+grad buffers for dense params' grads. + self.buffers = allocate_buffers_for_parameters( + dense_params, + parallel_state.get_data_parallel_group(with_context_parallel=True), + gradient_scaling_factor=gradient_scaling_factor, + ) + + # Allocate separate param+grad buffers for expert parallel params' grads. + self.expert_parallel_buffers = allocate_buffers_for_parameters( + expert_parallel_params, + parallel_state.get_data_modulo_expert_parallel_group(with_context_parallel=True), + gradient_scaling_factor=expert_gradient_scaling_factor, + ) + + # Delete references to weight_tensor if they exist since we don't want two parameter copies + # if we re-mapped parameters (which happens when we use the distributed optimizer). + # This is a temporary workaround around a TE bug that is fixed with + if self.ddp_config.use_distributed_optimizer: + + @torch.no_grad() + def unmap_weight_tensor(m): + if hasattr(m, 'weight_tensor'): + m.weight_tensor = None + + self.module.apply(unmap_weight_tensor) + + # Register backward hook. + # Accumulation function for the gradients need to be stored so they + # don't go out of scope. + self.grad_accs = [] + for param in self.module.parameters(): + if param.requires_grad: + # Expand so we get access to grad_fn. + param_tmp = param.expand_as(param) + # Get the gradient accumulator function. + grad_acc = param_tmp.grad_fn.next_functions[0][0] + grad_acc.register_hook(self._make_param_hook(param, self.param_to_buffer)) + self.grad_accs.append(grad_acc) diff --git a/model/train/yoco_moe/mindspeed/core/datasets/gpt_dataset.py b/model/train/yoco_moe/mindspeed/core/datasets/gpt_dataset.py new file mode 100644 index 000000000..9ca57c1d7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/datasets/gpt_dataset.py @@ -0,0 +1,98 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +from functools import wraps +import torch + + +def _get_ltor_masks_and_position_ids( + data: torch.Tensor, + eod_token: int, + reset_position_ids: bool, + reset_attention_mask: bool, + eod_mask_loss: bool, + create_attention_mask: bool, +): + """Build masks and position id for left to right model. + + Args: + data (torch.Tensor): The data tenor that holds the tokens from the dataset + + eod_token (int): ID of the token to that is considered the EOD + + reset_position_ids (bool): Switch to reset the document position ID's + + reset_attention_mask (bool): Switch to reset the attention mask + + eod_mask_loss (bool): Switch to enable the EOD mask loss + + create_attention_mask (bool): Switch to enable the attention masks generation. Can be disabled if attention kernel generates masks by itself. + + Returns: + torch.Tensor: Attention mask needed to be used for Attention + + torch.Tensor: The mask used for loss value during training + + torch.Tensor: The position ID's of the token + """ + seq_length = data.numel() + + if create_attention_mask: + attention_mask = torch.tril( + torch.ones((seq_length, seq_length), device=data.device) + ).unsqueeze(0) + else: + attention_mask = None + + # Loss mask. + loss_mask = torch.ones(seq_length, dtype=torch.float, device=data.device) + if eod_mask_loss: + loss_mask[data == eod_token] = 0.0 + + # Position ids. + position_ids = torch.arange(seq_length, dtype=torch.long, device=data.device) + # We need to clone as the ids will be modifed based on batch index. + if reset_position_ids: + position_ids = position_ids.clone() + + if reset_attention_mask: + # Find indices where EOD token is. + eod_index = position_ids[data == eod_token] + # Detach indices from positions if going to modify positions. + if reset_position_ids: + eod_index = eod_index.clone() + + # Loop through EOD indices: + prev_index = 0 + for j in range(eod_index.numel()): + i = eod_index[j] + # Mask attention loss. + if reset_attention_mask and attention_mask is not None: + attention_mask[0, (i + 1) :, : (i + 1)] = 0 + # Reset positions. + if reset_position_ids: + position_ids[(i + 1) :] -= i + 1 - prev_index + prev_index = i + 1 + + if attention_mask is not None: + # Convert attention mask to binary: + attention_mask = attention_mask < 0.5 + + seq_length_tensor = torch.tensor([seq_length]) + actual_seq_len = torch.cat([eod_index + 1, seq_length_tensor]) + + return attention_mask, loss_mask, (position_ids, actual_seq_len) + + +def collate_wrapper(fn): + @wraps(fn) + def wrapper(samples): + actual_seq_len = [elem['position_ids'][1] for elem in samples] + samples = [{key: val if key != 'position_ids' else val[0] for key, val in elem.items()} for elem in samples] + batch = fn(samples) + seq_len = actual_seq_len[0][-1] + actual_seq_len = [elem + i * seq_len for i, elem in enumerate(actual_seq_len)] + batch['actual_seq_len'] = torch.cat(actual_seq_len) + return batch + + return wrapper \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/__init__.py b/model/train/yoco_moe/mindspeed/core/distributed/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/__init__.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/__init__.py new file mode 100644 index 000000000..30a4ad6c8 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/__init__.py @@ -0,0 +1,4 @@ +from .zero3 import LayerZeRO3 +from .config import layerzero_setup_model_and_optimizer_wrapper, layerzero_initialize_model_parallel_wrapper +from .megatron_adaptor import mga_finalize_model_grads_wrapper +from .state.mga_checkpoint import save_checkpoint \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/comm/hookwrap.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/comm/hookwrap.py new file mode 100644 index 000000000..46596b662 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/comm/hookwrap.py @@ -0,0 +1,69 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +from typing import Dict, Tuple, TYPE_CHECKING, Optional, Deque +from collections import deque +from abc import abstractmethod +from contextlib import contextmanager + +import torch + + +class EventQueueBase: + + def __init__(self) -> None: + pass + + @abstractmethod + @contextmanager + def block(self): + ... + + @abstractmethod + def empty(self): + ... + + @abstractmethod + def enqueue(self, free_event: torch.cuda.Event) -> None: + ... + + @abstractmethod + def pop_left(self) -> Optional[torch.cuda.Event]: + ... + + +class CriticalPathEventQueue(EventQueueBase): + + def __init__(self): + super().__init__() + self._queue: Deque[torch.cuda.Event] = deque() + self._buffer: Deque[torch.cuda.Event] = deque() + self.__blocked = False + + @contextmanager + def block(self): + try: + self.__blocked = True + yield + finally: + for event in self._buffer: + self.enqueue(event) + self._buffer.clear() + self.__blocked = False + + + def empty(self): + return len(self._queue) == 0 + + def enqueue(self, free_event: torch.cuda.Event) -> None: + if self.__blocked: + self._buffer.append(free_event) + else: + self._queue.append(free_event) + + @abstractmethod + def pop_left(self) -> Optional[torch.cuda.Event]: + if self._queue: + event = self._queue.popleft() + return event + return None + \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/config.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/config.py new file mode 100644 index 000000000..9a377397e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/config.py @@ -0,0 +1,415 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +import gc +import dataclasses +import importlib +from functools import wraps +from typing import Tuple, Literal, Union, Iterable, Optional + +import yaml +import torch +import torch.nn as nn +import torch.distributed as dist + +from megatron.core import mpu +from megatron.core.optimizer import OptimizerConfig +from megatron.training.training import get_optimizer_param_scheduler, get_model +from megatron.training.global_vars import get_args, get_timers +from megatron.training.utils import ( + print_rank_0, + unwrap_model, +) +from megatron.core.utils import get_model_config +from megatron.training.checkpointing import load_checkpoint + +from mindspeed.core.distributed.layerzero.zero3 import LayerZeRO3 +from mindspeed.core.distributed.layerzero.zero3.wrap import ModuleWrapPolicy +from mindspeed.core.distributed.layerzero.zero3.api import ( + BackwardPrefetch, + BackwardReduceScatter, + MixedPrecision, +) +from mindspeed.core.distributed.layerzero.megatron_adaptor import get_optimizer +from mindspeed.core.distributed.layerzero.state.mga_checkpoint import save_checkpoint, load_layerzero_checkpoint +from . import constants +#!===============Globals============================ +_ZERO1_PROCESS_GROUP = None +_ZERO3_PROCESS_GROUP = None +_ZERO1_PROCESS_GROUP_RANKS = None +_ZERO3_PROCESS_GROUP_RANKS = None +_TP_ZERO1_PROCESS_GROUP = None +_TP_ZERO1_PROCESS_GROUP_RANKS = None +_TP_ZERO3_PROCESS_GROUP = None +_TP_ZERO3_PROCESS_GROUP_RANKS = None + + +@dataclasses.dataclass +class LayerzeroConfig: + zero3_size: int = 8 + transformer_layers: Optional[Iterable[torch.nn.Module]] = None + backward_prefetch: Literal["BACKWARD_PRE", + "BACKWARD_POST"] = 'BACKWARD_PRE' + backward_reduce_scatter: Literal["BACKWARD_PRE", + "BACKWARD_POST"] = 'BACKWARD_PRE' + param_dtype: Optional[Literal["fp16", "bf16", "fp32"]] = "fp16" + reduce_dtype: Optional[Literal["fp16", "bf16", "fp32"]] = "fp16" + buffer_dtype: Optional[Literal["fp16", "bf16", "fp32"]] = None + ignored_modules: Optional[Iterable[torch.nn.Module]] = None + param_init_fn: Optional[str] = None, + forward_prefetch: bool = True + limit_all_gathers: bool = True + offload_grads: bool = False + ckpt_load_path: str = None + autocast_input: bool = True + autocast_output: bool = True + + def __post_init__(self): + if self.zero3_size <= 0 or not isinstance(self.zero3_size, int): + raise ValueError("zero3_size must be a non-negative int value") + + @classmethod + def load_from_yaml(cls, yml_file: str): + with open(yml_file, 'r') as f: + config = yaml.safe_load(f) + kwargs = {} + for f in dataclasses.fields(cls): + if f.name in config: + kwargs[f.name] = config[f.name] + print_rank_0(kwargs) + return cls(**kwargs) + + def to_dict(self): + process_group = self._process_group() + wrap_policy = self._wrap_policy() + mixed_precision = self._mp_policy() + backward_prefetch = self._backward_prefetch() + backward_rs = self._backward_reduce_scatter() + kwargs = { + "process_group": process_group, + "tp_zero_process_group": self._tp_process_group(), + "auto_wrap_policy": wrap_policy, + "mixed_precision": mixed_precision, + "device_id": torch.cuda.current_device(), + "backward_prefetch": backward_prefetch, + "backward_reduce_scatter": backward_rs, + "forward_prefetch": self.forward_prefetch, + "offload_grads": self.offload_grads + } + return kwargs + + def _mp_policy(self): + # if self.fwd_bwd_dtype or + param_dtype = _get_dtype( + self.param_dtype) if self.param_dtype else None + reduce_dtype = _get_dtype( + self.reduce_dtype) if self.reduce_dtype else None + buffer_dtype = _get_dtype( + self.buffer_dtype) if self.buffer_dtype else None + return MixedPrecision(param_dtype=param_dtype, + reduce_dtype=reduce_dtype, + buffer_dtype=buffer_dtype) + + def _wrap_policy(self): + if self.transformer_layers: + try: + transformer_layer_cls = set(_get_class_type( + m_class_name) for m_class_name in self.transformer_layers) + except ModuleNotFoundError as e: + raise ModuleNotFoundError(f"Module {transformer_layer_cls} Not Found, \ + check yaml config file and your model, or add it to PYTHONPATH") from e + else: + transformer_layer_cls = [] + print_rank_0(f"Each of these layers will be wrapped as a single layer:{transformer_layer_cls}") + wrap_policy = ModuleWrapPolicy(transformer_layer_cls) + return wrap_policy + + def _process_group(self): + if not _is_layerzero_pg_initialized(): + raise RuntimeError("Layerzero process group is not initialized") + return _ZERO3_PROCESS_GROUP, _ZERO1_PROCESS_GROUP + + def _tp_process_group(self): + return _TP_ZERO3_PROCESS_GROUP, _TP_ZERO1_PROCESS_GROUP + + def _backward_prefetch(self): + if self.backward_prefetch not in ['BACKWARD_PRE', 'BACKWARD_POST']: + raise ValueError(f"{self.backward_prefetch} is not supported") + return BackwardPrefetch[self.backward_prefetch] + + def _backward_reduce_scatter(self): + if self.backward_reduce_scatter not in ['BACKWARD_PRE', 'BACKWARD_POST']: + raise ValueError(f"{self.backward_reduce_scatter} is not supported") + return BackwardReduceScatter[self.backward_reduce_scatter] + + def setup_cast_settings(self): + constants.set_auto_cast_input(self.autocast_input) + constants.set_auto_cast_output(self.autocast_output) + + +def _get_module_attr(model: nn.Module, name: Iterable[str]): + if name is None: + return None + if not isinstance(name, list): + name = [name] + name = set(list(name)) + if not all(isinstance(n, str) for n in name): + raise AssertionError("All name should be str") + results = set(getattr(model, n, None) for n in name) + if all([m is None for m in results]): + return None + return results + + +def _get_module_and_class(name: str) -> Tuple[str, str]: + names = name.rsplit('.', 1) + if len(names) == 1: + raise RuntimeError(f"Please Provide a module.class name, got {name}") + module_name, class_name = names + return module_name, class_name + + +def _get_class_type(name: str) -> type: + """ + Args: + name (str): module.class + + Returns: + type: Class Type + """ + module_name, class_name = _get_module_and_class(name) + module = importlib.import_module(module_name) + class_type = getattr(module, class_name, None) + return class_type + + +def _get_dtype(dtype: str): + if dtype not in {'fp16', 'bf16', 'fp32'}: + raise AssertionError(f"dtype {dtype} not Supported") + if dtype == 'fp16': + return torch.float16 + elif dtype == 'bf16': + return torch.bfloat16 + elif dtype == 'fp32': + return torch.float32 + raise ValueError(f"Unsupported dtype: {dtype}") + + +def wrap_model_with_layerzero(model: Union[Iterable[torch.nn.Module], torch.nn.Module], lz_config: LayerzeroConfig): + + kwargs = lz_config.to_dict() + if isinstance(model, nn.Module): + model = [model] + + model_list = [] + for model_chunk in model: + ignored_modules = _get_module_attr( + model_chunk, lz_config.ignored_modules) + kwargs["ignored_modules"] = ignored_modules + zero3_model = LayerZeRO3(model_chunk, **kwargs) + model_list.append(zero3_model) + return model_list + + +def create_optimizer_layerzero(model, + no_wd_decay_cond=None, + scale_lr_cond=None, + lr_mult=1.0): + args = get_args() + timers = get_timers() + kwargs = {} + for f in dataclasses.fields(OptimizerConfig): + if hasattr(args, f.name): + kwargs[f.name] = getattr(args, f.name) + config = OptimizerConfig(**kwargs) + config.timers = timers + optimizer = get_optimizer(config, model[0], no_wd_decay_cond, + scale_lr_cond, lr_mult) + opt_param_scheduler = get_optimizer_param_scheduler(optimizer) + return optimizer, opt_param_scheduler + + +def layerzero_setup_model_and_optimizer_wrapper(setup_model_and_optimizer): + @wraps(setup_model_and_optimizer) + def wrapper(model_provider_func, + model_type, + no_wd_decay_cond=None, + scale_lr_cond=None, + lr_mult=1.0): + args = get_args() + if getattr(args, 'layerzero', False): + # ======================================================== + timers = get_timers() + models = get_model(model_provider_func, model_type, False) + if args.load is not None or args.pretrained_checkpoint is not None: + timers('load-checkpoint', log_level=0).start(barrier=True) + args.iteration, args.num_floating_point_operations_so_far = load_checkpoint( + models, None, None) + timers('load-checkpoint').stop(barrier=True) + timers.log(['load-checkpoint']) + else: + args.iteration = 0 + args.num_floating_point_operations_so_far = 0 + # ======================================================== + config_yaml = args.layerzero_config + config = LayerzeroConfig.load_from_yaml(config_yaml) + config.setup_cast_settings() + zero_models = wrap_model_with_layerzero( + unwrap_model(models), config) + del models + gc.collect() + + optimizer, opt_param_scheduler = create_optimizer_layerzero(zero_models, + no_wd_decay_cond=no_wd_decay_cond, + scale_lr_cond=scale_lr_cond, + lr_mult=lr_mult) + if config.ckpt_load_path is not None: + load_layerzero_checkpoint( + zero_models, config.ckpt_load_path, optimizer, opt_param_scheduler) + torch.cuda.empty_cache() + print_rank_0(f"{zero_models[0]=}") + + model_config = get_model_config(zero_models[0]) + if len(zero_models) == 1: + model_config.no_sync_func = zero_models[0].no_sync + else: + model_config.no_sync_func = [m.no_sync for m in zero_models] + return zero_models, optimizer, opt_param_scheduler + else: + return setup_model_and_optimizer(model_provider_func, + model_type, + no_wd_decay_cond, + scale_lr_cond, + lr_mult) + + return wrapper + + +def initialize_zero_process_group_with_pp(pp_size, zero3_size): + global _ZERO1_PROCESS_GROUP + global _ZERO1_PROCESS_GROUP_RANKS + global _ZERO3_PROCESS_GROUP + global _ZERO3_PROCESS_GROUP_RANKS + + world_size = dist.get_world_size() + global_rank = dist.get_rank() + zero1_size = world_size // pp_size + zero3_size = min(zero3_size, zero1_size) + ensure_divisibility(zero1_size, zero3_size) + num_zero3_groups = zero1_size // zero3_size + + for zero1_idx in range(pp_size): + cur_zero1_ranks = list( + range(zero1_idx * zero1_size, (zero1_idx + 1) * zero1_size)) + zero1_group = dist.new_group(ranks=cur_zero1_ranks, backend="hccl") + if global_rank in cur_zero1_ranks: + _ZERO1_PROCESS_GROUP = zero1_group + _ZERO1_PROCESS_GROUP_RANKS = cur_zero1_ranks + + for zero3_idx in range(num_zero3_groups): + cur_zero3_ranks = cur_zero1_ranks[zero3_idx * + zero3_size: (zero3_idx + 1) * zero3_size] + zero3_group = dist.new_group(ranks=cur_zero3_ranks, backend="hccl") + if global_rank in cur_zero3_ranks: + _ZERO3_PROCESS_GROUP = zero3_group + _ZERO3_PROCESS_GROUP_RANKS = cur_zero3_ranks + return + + +def initialize_tp_zero_process_group(tp_zero3_size: int): + if not mpu.is_initialized() or not _is_layerzero_pg_initialized(): + raise RuntimeError("Mpu or ZeRO process group is not initialized") + + global _TP_ZERO1_PROCESS_GROUP + global _TP_ZERO1_PROCESS_GROUP_RANKS + global _TP_ZERO3_PROCESS_GROUP + global _TP_ZERO3_PROCESS_GROUP_RANKS + + _TP_ZERO1_PROCESS_GROUP = mpu.get_data_parallel_group( + with_context_parallel=True) + _TP_ZERO1_PROCESS_GROUP_RANKS = list( + mpu._DATA_PARALLEL_GLOBAL_RANKS_WITH_CP) + tp_zero1_size = len(_TP_ZERO1_PROCESS_GROUP_RANKS) + tp_zero3_size = min(tp_zero1_size, tp_zero3_size) + ensure_divisibility(tp_zero1_size, tp_zero3_size) + + world_size = dist.get_world_size() + global_rank = dist.get_rank() + num_zero1_groups = world_size // tp_zero1_size + num_zero3_groups = tp_zero1_size // tp_zero3_size + for zero1_idx in range(num_zero1_groups): + for zero3_idx in range(num_zero3_groups): + cur_zero1_ranks = list( + range(zero1_idx, world_size, num_zero1_groups)) + group_ranks = cur_zero1_ranks[zero3_idx * + tp_zero3_size: (zero3_idx + 1) * tp_zero3_size] + group = dist.new_group(ranks=group_ranks, backend="hccl") + if global_rank in group_ranks: + _TP_ZERO3_PROCESS_GROUP = group + _TP_ZERO3_PROCESS_GROUP_RANKS = group_ranks + return + + +def initialized_zero_process_group(zero3_size): + ''' + For TP > 1 or PP > 1 or TP + PP situation, the process group needs to be taken care of. + ''' + if not mpu.is_initialized(): + raise AssertionError(f"mpu is not initialized") + args = get_args() + global _ZERO1_PROCESS_GROUP + global _ZERO1_PROCESS_GROUP_RANKS + global _ZERO3_PROCESS_GROUP + global _ZERO3_PROCESS_GROUP_RANKS + global _TP_ZERO1_PROCESS_GROUP + global _TP_ZERO1_PROCESS_GROUP_RANKS + global _TP_ZERO3_PROCESS_GROUP + global _TP_ZERO3_PROCESS_GROUP_RANKS + + initialize_zero_process_group_with_pp( + args.pipeline_model_parallel_size, zero3_size) + #! process TP process groups + if args.tensor_model_parallel_size > 1: + ensure_divisibility(zero3_size, args.tensor_model_parallel_size) + tp_zero3_size = max(1, zero3_size // args.tensor_model_parallel_size) + initialize_tp_zero_process_group(tp_zero3_size) + else: + _TP_ZERO1_PROCESS_GROUP = _ZERO1_PROCESS_GROUP + _TP_ZERO1_PROCESS_GROUP_RANKS = _ZERO1_PROCESS_GROUP_RANKS + _TP_ZERO3_PROCESS_GROUP = _ZERO3_PROCESS_GROUP + _TP_ZERO3_PROCESS_GROUP_RANKS = _ZERO3_PROCESS_GROUP_RANKS + + print(f"Layerzero with zero1 process group: {_ZERO1_PROCESS_GROUP_RANKS}, \ + zero3 process group: {_ZERO3_PROCESS_GROUP_RANKS}, \ + TP zero1 process group: {_TP_ZERO1_PROCESS_GROUP_RANKS}, \ + TP zero3 process group: {_TP_ZERO3_PROCESS_GROUP_RANKS}, \ + global rank: {dist.get_rank()}") + return + + +def _is_layerzero_pg_initialized(): + return _ZERO1_PROCESS_GROUP is not None and _ZERO3_PROCESS_GROUP is not None + + +def layerzero_initialize_model_parallel_wrapper(initialize_model_parallel): + @wraps(initialize_model_parallel) + def wrapper(*args, **kargs): + results = initialize_model_parallel(*args, **kargs) + global_args = get_args() + if getattr(global_args, 'layerzero', False): + print_rank_0( + f"Entering initialize_model_parallel to create layerzero process groups") + config_yaml = global_args.layerzero_config + config = LayerzeroConfig.load_from_yaml(config_yaml) + zero3_size = config.zero3_size + initialized_zero_process_group(zero3_size) + return results + + return wrapper + + +def ensure_divisibility(a: int, b: int): + """Ensure that 'a' is divisible by 'b'. If not, raise an AssertionError with a custom or default message.""" + if b == 0: + raise ValueError("The divisor (b) must not be zero.") + if a % b != 0: + raise ValueError(f"{a} is not divisible by {b}") \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/constants.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/constants.py new file mode 100644 index 000000000..7a915ec51 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/constants.py @@ -0,0 +1,18 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +AUTO_CAST_INPUT = True +AUTO_CAST_OUTPUT = True + + +def set_auto_cast_input(state: bool): + global AUTO_CAST_INPUT + if not isinstance(state, bool): + raise AssertionError("state must be a boolean") + AUTO_CAST_INPUT = state + + +def set_auto_cast_output(state: bool): + global AUTO_CAST_OUTPUT + if not isinstance(state, bool): + raise AssertionError("state must be a boolean") + AUTO_CAST_OUTPUT = state \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/debug/sum.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/debug/sum.py new file mode 100644 index 000000000..7aa4c0046 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/debug/sum.py @@ -0,0 +1,77 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import torch +import torch.distributed as dist +from megatron.training.utils import print_rank_0 + + +@torch.no_grad() +def print_total_grad_sum(params): + for param in params: + print_grad_sum(param) + + +@torch.no_grad() +def print_grad_sum(param): + from megatron.core import mpu + if getattr(param, "tensor_model_parallel", False): + print_grad_sum_helper(param, mpu.get_data_parallel_group(with_context_parallel=True), "TP_shard") + else: + print_grad_sum_helper(param, dist.group.WORLD, "None TP") + + +@torch.no_grad() +def print_grad_sum_helper(param, group, msg): + if param.grad is not None: + g_sum = param.grad.contiguous().float().sum() + p_sum = param.contiguous().float().sum() + else: + g_sum = torch.zeros([1]).float().to(param.device) + p_sum = torch.zeros([1]).float().to(param.device) + + dist.all_reduce(g_sum, group=group) + dist.all_reduce(p_sum, group=group) + print_rank_0(f"{msg} Psum {p_sum.item()}, Gsum {g_sum.item()}") + + +def all_gather_into_flat_tensor(tensor: torch.Tensor, process_group): + '''这个函数用于将不同rank上不同大小的tensor 聚合成一个大的flatTensor''' + world_size = process_group.size() + rank = dist.get_rank(process_group) + + # 如果tensor为None或没有元素,使用一个空 tensor + if tensor is None or tensor.numel() == 0: + local_tensor = torch.empty([0]).float().cuda() + else: + local_tensor = tensor.contiguous().flatten().float() + + # 获取所有进程中的 tensor 大小 + tensor_sizes = [torch.zeros(1, dtype=torch.int64).cuda() for _ in range(world_size)] + if local_tensor.numel() > 0: + tensor_sizes[rank] = torch.tensor([local_tensor.numel()], dtype=torch.int64).cuda() + else: + tensor_sizes[rank] = torch.tensor([0], dtype=torch.int64).cuda() + dist.all_gather(tensor_sizes, tensor_sizes[rank], group=process_group) + tensor_sizes = [int(size.item()) for size in tensor_sizes] + + # 找到最大 tensor 大小 + max_size = max(tensor_sizes) + + # 创建填充 tensor + if max_size > 0: + padding_tensor = torch.zeros(max_size, dtype=torch.float32, device=local_tensor.device).cuda() + else: + padding_tensor = torch.tensor([], dtype=torch.float32, device=local_tensor.device).cuda() + + # 将 local_tensor 填充到 padding_tensor + if local_tensor.numel() > 0: + padding_tensor[:local_tensor.numel()] = local_tensor + + # 创建列表来存储所有填充后的 tensor + all_padding_tensors = [torch.zeros_like(padding_tensor).cuda() for _ in range(world_size)] + + # 收集所有填充后的 tensor + dist.all_gather(all_padding_tensors, padding_tensor, group=process_group) + + # 拼接所有 tensor,去除填充部分 + flatten_tensor = torch.cat([t[:size] for t, size in zip(all_padding_tensors, tensor_sizes)], dim=0) + return flatten_tensor \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/__init__.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/__init__.py new file mode 100644 index 000000000..84e9a8274 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/__init__.py @@ -0,0 +1,2 @@ +from .optimizer.zero import LayerZeROptimizer, get_optimizer +from .optimizer.misc import mga_finalize_model_grads_wrapper \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/clip.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/clip.py new file mode 100644 index 000000000..13de37606 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/clip.py @@ -0,0 +1,95 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +from typing import Iterable +import math +import amp_C +import torch +import torch.nn as nn +import torch.distributed as dist +from apex.multi_tensor_apply import multi_tensor_applier +from mindspeed.core.distributed.layerzero.zero3._common_utils import _is_zero3_flattened + + +@torch.no_grad() +def _get_grad_norm( + params: Iterable[nn.Parameter], + norm_type: float, +) -> torch.Tensor: + """ + Return the gradient norm of parameters ``param`` s, where the gradients are viewed as a single vector. + + The returned norm is in FP32 even if parameters/gradients are in a low precision. This is because the downstream + use of this return value is a reduction across ranks. + """ + params_with_grad = [param for param in params if param.grad is not None] + if len(params_with_grad) == 0: + return torch.tensor(0.0) + grads = [param.grad for param in params_with_grad] + grad_dtypes = {grad.dtype for grad in grads} + if len(grad_dtypes) != 1: + raise ValueError( + f"Requires uniform dtype across all gradients but got {grad_dtypes}" + ) + # Compute the gradient norm in FP32, where we treat the gradients as a + # single vector + grad_norm = torch.linalg.vector_norm( + torch.stack( + [ + torch.linalg.vector_norm( + grad.detach(), norm_type, dtype=torch.float32) + for grad in grads + ], + ), + norm_type, + dtype=torch.float32, + ) + return grad_norm + + +def clip_grad_norm(params, max_norm, norm_type=2, process_group=dist.group.WORLD): + ''' + For distributed ZERO optimizers, the gradient norm is calculated since the parameter/gradient + is distributed across the individual ranks, Additional communication is required + It is worth noting here that the grad_norm is divided by world_size approximate DDP + #! ZeRO-managed parameters and non-ZeRO-managed parameters are handled separately + ''' + if not max_norm > 0.: + raise ValueError("clip_grad should be a number greater than 0.0") + + if isinstance(params, torch.Tensor): + params = [params] + norm_type = float(norm_type) + device = params[0].device + sharded_params = set(p for p in params if _is_zero3_flattened(p)) + non_sharded_params = set(p for p in params if p not in sharded_params) + + local_sharded_norm = _get_grad_norm(sharded_params, norm_type).to(device) + local_nonsharded_norm = _get_grad_norm( + non_sharded_params, norm_type).to(device) + if norm_type == math.inf: + total_norm = ( + torch.maximum(local_sharded_norm, local_nonsharded_norm) + if local_nonsharded_norm is not None + else local_sharded_norm + ) + dist.all_reduce( + total_norm, op=torch.distributed.ReduceOp.MAX, group=process_group + ) + else: + total_norm = local_sharded_norm**norm_type + dist.all_reduce(total_norm, group=process_group) + # All-reducing the local non-sharded norm would count it an extra + # world-size-many times + if local_nonsharded_norm is not None: + total_norm += local_nonsharded_norm**norm_type + total_norm = total_norm ** (1.0 / norm_type) + + clip_coef = max_norm / (total_norm + 1e-6) + grads = list(set(param.grad for param in params if param.grad is not None)) + if clip_coef < 1.0: + dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device='cuda') + multi_tensor_applier( + amp_C.multi_tensor_scale, dummy_overflow_buf, [ + grads, grads], clip_coef + ) + return total_norm diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/misc.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/misc.py new file mode 100644 index 000000000..5fb02ae82 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/misc.py @@ -0,0 +1,47 @@ +from typing import List, Optional +from functools import wraps + +import torch +from megatron.core import parallel_state +from megatron.training.global_vars import get_args +from mindspeed.core.distributed.layerzero.zero3 import LayerZeRO3 + + +def scale_gradients(model, scaling_factor: float): + if not (isinstance(model, LayerZeRO3) and model._is_root): + raise ValueError(f"This func expects to be called on a LayerZeRO3 root instance, got {type(model)}") + + for param in model.parameters(): + if param.requires_grad and param.grad is not None: + param.grad.data *= scaling_factor + + +def mga_finalize_model_grads_wrapper(finalize_model_grads): + @wraps(finalize_model_grads) + def wrapper(*args, **kwargs): + global_args = get_args() + if getattr(global_args, 'layerzero', False): + num_tokens = kwargs.get('num_tokens', None) + if num_tokens is not None: + # the number of tokens is only present on the last stage, so broadcast it + # to the other ranks in the pipeline parallel group. + torch.distributed.broadcast( + num_tokens, + src=parallel_state.get_pipeline_model_parallel_last_rank(), + group=parallel_state.get_pipeline_model_parallel_group(), + ) + # all-reduce across DP ranks. + torch.distributed.all_reduce(num_tokens, group=parallel_state.get_data_parallel_group()) + model = kwargs.get('model', None) + if model is None and args: + model = args[0] + for model_chunk in model: + if num_tokens > 0: + scaling = 1.0 / num_tokens + scale_gradients(model_chunk, scaling) + return None + else: + return finalize_model_grads(*args, **kwargs) + + return wrapper + \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/sharded_grad_scaler.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/sharded_grad_scaler.py new file mode 100644 index 000000000..3b0926fa7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/sharded_grad_scaler.py @@ -0,0 +1,387 @@ +import logging +from collections import abc, defaultdict +from typing import Dict, List, Optional, Union + +import torch +import torch.distributed as dist +from torch.cuda import FloatTensor +from torch.cuda.amp.grad_scaler import _MultiDeviceReplicator, GradScaler, OptState +from torch.distributed.distributed_c10d import ProcessGroup + + +log = logging.getLogger(__name__) + + +def _refresh_per_optimizer_state(): + return {"stage": OptState.READY, "found_inf_per_device": {}} + + +def _check_supported_device(tensor: torch.Tensor): + if tensor.is_cuda or tensor.device.type in ("xla", "cpu", "npu"): + return + raise AssertionError(f"Unsupported device: {tensor.device}") + + +class _GeneralMultiDeviceReplicator(_MultiDeviceReplicator): + """ + Lazily serves tensor to request device. This class extends + _MultiDeviceReplicator to allow support for "cpu" as a device. + """ + + def __init__(self, master_tensor: torch.Tensor) -> None: + _check_supported_device(master_tensor) + self.master = master_tensor + self._per_device_tensors: Dict[torch.device, torch.Tensor] = {} + + +class ShardedGradScaler(GradScaler): + + def __init__( + self, + init_scale: float = 2.0**16, + min_scale: float = 1., + backoff_factor: float = 0.5, + growth_factor: float = 2.0, + growth_interval: int = 2000, + hysteresis: int = 2, + enabled: bool = True, + process_group: Optional[ProcessGroup] = dist.group.WORLD, + ): + if init_scale is None: + init_scale = 1.0 + super().__init__( + init_scale=init_scale, + backoff_factor=backoff_factor, + growth_factor=growth_factor, + growth_interval=growth_interval, + enabled=enabled, + ) + if self._enabled: + self.process_group = process_group + self._per_optimizer_states = defaultdict( + _refresh_per_optimizer_state) + self.device = torch.device("cuda") + self.hysteresis = hysteresis + self._hysteresis_tracker = self.hysteresis + + @property + def loss_scale(self) -> torch.Tensor: + ''' + The scaler's scale is lazily initialized, or None if _lazy_init_scale_growth_tracker is not used + Initialization is only done when scale() is called for the first time + + But megatronOptimizer doesn't scale directly, but manually scales loss + ''' + if not self._enabled: + return torch.tensor([1.0], dtype=torch.float32, device=self.device) + elif self._scale is None: + self._lazy_init_scale_growth_tracker(self.device) + self._check_none_scale() + return self._scale + + def scale( + self, outputs: Union[torch.Tensor, List[torch.Tensor]] + ) -> Union[torch.Tensor, List[torch.Tensor]]: + if not self._enabled: + return outputs + + if isinstance(outputs, torch.Tensor): + _check_supported_device(outputs) + if self._scale is None: + self._lazy_init_scale_growth_tracker(outputs.device) + self._check_none_scale() + scaled_output = outputs * self._scale.to( + device=outputs.device, non_blocking=True + ) + # Here we ensure the return dtype is the same as the outputs dtype. + # For the FSDP + Mixed Precision use case, the loss output is in the Mixed Precision + # format (fp16, bf16) and so the scaled loss should be of the same dtype. + return scaled_output.type(outputs.dtype) + + stash: List[_GeneralMultiDeviceReplicator] = [] + + def apply_scale( + val: Union[torch.Tensor, abc.Iterable] + ) -> Union[torch.Tensor, abc.Iterable]: + if isinstance(val, torch.Tensor): + _check_supported_device(val) + if len(stash) == 0: + if self._scale is None: + self._lazy_init_scale_growth_tracker(val.device) + self._check_none_scale() + stash.append(_GeneralMultiDeviceReplicator(self._scale)) + scaled_val = val * stash[0].get(val.device) + + return scaled_val.type(val.dtype) + elif isinstance(val, abc.Iterable): + iterator = map(apply_scale, val) + if isinstance(val, (list, tuple)): + return type(val)(iterator) + else: + return iterator + else: + raise ValueError( + "outputs must be a Tensor or an iterable of Tensors") + + return apply_scale(outputs) # type: ignore[return-value] + + def _foreach_non_finite_check_and_unscale_cpu_( + self, grads: List, found_inf: torch.Tensor, inv_scale: torch.Tensor + ) -> None: + if len(grads) == 0: + return + if inv_scale.numel() != 1: + raise ValueError("inv_scale must be a 1-element tensor.") + if found_inf.numel() != 1: + raise ValueError("found_inf must be a 1-element tensor.") + + for grad in grads: + if grad.device.type != "cpu": + log.error( + "tensor device is %s but was expected to be ``cpu``", + grad.device, + ) + raise ValueError( + "Gradients were found on a non-CPU device when" + " expected to be on CPU." + ) + if ( + torch.isinf(grad).any().item() is True + or torch.isnan(grad).any().item() is True + ): + found_inf.data = torch.tensor([1.0]) + break + else: + grad.data *= inv_scale.item() + + def _unscale_grads_( + self, + optimizer: torch.optim.Optimizer, + inv_scale: torch.Tensor, + found_inf: torch.Tensor, + allow_fp16: bool = True, + ) -> Dict[torch.device, torch.Tensor]: + per_device_inv_scale = _GeneralMultiDeviceReplicator(inv_scale) + per_device_found_inf = _GeneralMultiDeviceReplicator(found_inf) + + per_device_and_dtype_grads = defaultdict( + lambda: defaultdict(list)) + with torch.no_grad(): + for group in optimizer.param_groups: + for param in group['params']: + if param.grad is None: + continue + if (not allow_fp16) and param.grad.dtype == torch.float16: + raise ValueError( + "Attempting to unscale FP16 gradients.") + if param.grad.is_sparse: + if param.grad.dtype is torch.float16: + # coalesce is not supported in torch.float16 + param_grad_fp32 = param.grad.type( + torch.float32).coalesce() + param.grad = param_grad_fp32.type(torch.float16) + to_unscale = param.grad._values() + else: + to_unscale = param.grad + + per_device_and_dtype_grads[to_unscale.device][ + to_unscale.dtype + ].append(to_unscale) + + for device, per_dtype_grads in per_device_and_dtype_grads.items(): + for grads in per_dtype_grads.values(): + if grads[0].device.type == "cpu": + self._foreach_non_finite_check_and_unscale_cpu_( + grads, + per_device_found_inf.get(device), + per_device_inv_scale.get(device), + ) + else: + torch._amp_foreach_non_finite_check_and_unscale_( + grads, + per_device_found_inf.get(device), + per_device_inv_scale.get(device), + ) + # There exist contexts (e.g. w/ `use_orig_params=True`) wherein some + # ranks may have no (non-zero sized) parameter shards, necessitating the + # initialization of `per_device_found_inf._per_device_tensors` here + if not per_device_found_inf._per_device_tensors: + self._check_none_scale() + per_device_found_inf.get(self._scale.device) + return per_device_found_inf._per_device_tensors + + def unscale_(self, optimizer: torch.optim.Optimizer) -> None: + if not self._enabled: + return False + + self._check_scale_growth_tracker("unscale_") + + optimizer_state = self._per_optimizer_states[id(optimizer)] + + if optimizer_state["stage"] is OptState.UNSCALED: + raise RuntimeError( + "unscale_() has already been called on this optimizer since the last update()." + ) + elif optimizer_state["stage"] is OptState.STEPPED: + raise RuntimeError("unscale_() is being called after step().") + + # FP32 division can be imprecise for certain compile options, so we carry out the reciprocal in FP64. + self._check_none_scale() + inv_scale = self._scale.double().reciprocal().float() + found_inf = torch.full( + (1,), 0.0, dtype=torch.float32, device=self._scale.device + ) + + optimizer_state["found_inf_per_device"] = self._unscale_grads_( + optimizer, inv_scale, found_inf, True + ) + optimizer_state["stage"] = OptState.UNSCALED + + # Synchronize the detected inf across the ranks + optimizer_state = self._per_optimizer_states[id(optimizer)] + future_handles = [] + + for v in optimizer_state["found_inf_per_device"].values(): + if v.device.type == "cpu": + v_on_cuda = v.cuda() + future_handles.append( + dist.all_reduce( + v_on_cuda, async_op=True, group=self.process_group + ).get_future() + ) + v.copy_(v_on_cuda.cpu()) + else: + future_handles.append( + dist.all_reduce( + v, async_op=True, group=self.process_group + ).get_future() + ) + + # Make sure that the calls are done before moving out. + if future_handles: + torch.futures.wait_all(future_handles) + + if ( + len(optimizer_state["found_inf_per_device"]) == 0 + ): + raise AssertionError("No inf checks were recorded for this optimizer.") + + found_inf = sum(v.item() + for v in optimizer_state["found_inf_per_device"].values()) + return found_inf > 0. + + def step( + self, optimizer: torch.optim.Optimizer, *args, **kwargs + ) -> Optional[float]: + return super().step(optimizer, *args, **kwargs) + + def _update_scale(self, found_inf) -> None: + """ + If found_inf is 1.0 (True), then scale is multiplied by backoff_factor and growth_tracker is set to zero. + Otherwise, scale is multiplied by the growth factor when the growth interval is reached. + """ + if found_inf.item() >= 1.0: + self._scale *= self._backoff_factor # type: ignore[arg-type] + self._growth_tracker = 0 + self._hysteresis_tracker -= 1 + if self._hysteresis_tracker <= 0: + self._scale = torch.max( + self._scale * self.backoff_factor, self.min_scale) + else: + successful = self._growth_tracker + 1 # type: ignore[operator] + if successful == self._growth_interval: # type: ignore[arg-type] + self._scale *= self._growth_factor # type: ignore[arg-type] + self._growth_tracker = 0 + self._hysteresis_tracker = self.hysteresis + else: + self._growth_tracker = successful + + def update(self, new_scale: Optional[Union[float, FloatTensor]] = None) -> None: + """ + Updates the scale factor. + If any optimizer steps were skipped the scale is multiplied by ``backoff_factor`` + to reduce it. If ``growth_interval`` unskipped iterations occurred consecutively, + the scale is multiplied by ``growth_factor`` to increase it. + Passing ``new_scale`` sets the new scale value manually. (``new_scale`` is not + used directly, it's used to fill GradScaler's internal scale tensor. So if + ``new_scale`` was a tensor, later in-place changes to that tensor will not further + affect the scale GradScaler uses internally.) + Args: + new_scale (float or :class:`torch.cuda.FloatTensor`, optional, default=None): New scale factor. + .. warning:: + :meth:`update` should only be called at the end of the iteration, after ``scaler.step(optimizer)`` has + been invoked for all optimizers used this iteration. + """ + + if not self._enabled: + return + + _scale, _growth_tracker = self._check_scale_growth_tracker( + "update") # type: ignore[var-annotated] + + if new_scale is not None: + # Accept a new user-defined scale. + if isinstance(new_scale, float): + self._scale.fill_(new_scale) # type: ignore[union-attr] + else: + if not (isinstance(new_scale, torch.cuda.FloatTensor) and (new_scale.numel() == 1) and not new_scale.requires_grad): + raise AssertionError("new_scale should be a float or a 1-element torch.cuda.FloatTensor with requires_grad=False.") + self._scale.copy_(new_scale) # type: ignore[union-attr] + else: + # Consume shared inf/nan data collected from optimizers to update the scale. + # If all found_inf tensors are on the same device as self._scale, this operation is asynchronous. + found_infs = [ + found_inf.to(device=_scale.device, non_blocking=True) + for state in self._per_optimizer_states.values() + for found_inf in state["found_inf_per_device"].values() + ] + + if len(found_infs) == 0: + raise AssertionError("No inf checks were recorded prior to update.") + + found_inf_combined = found_infs[0] + if len(found_infs) > 1: + for i in range(1, len(found_infs)): + found_inf_combined += found_infs[i] + + self._update_scale(found_inf_combined) + + # To prepare for next iteration, clear the data collected from optimizers this iteration. + self._per_optimizer_states = defaultdict(_refresh_per_optimizer_state) + + def _meg_step(self, optimizer, *args, **kwargs): + '''Split the optional step with unscale for adapted with megatron + In between we can insert other operations like clip grad + ''' + if not self._enabled: + return optimizer.step(*args, **kwargs) + + self._check_scale_growth_tracker("step") + + optimizer_state = self._per_optimizer_states[id(optimizer)] + + if optimizer_state["stage"] is OptState.STEPPED: + raise RuntimeError( + "step() has already been called since the last update()." + ) + + retval = self._maybe_opt_step( + optimizer, optimizer_state, *args, **kwargs) + optimizer_state["stage"] = OptState.STEPPED + return retval + + def state_dict(self): + state_dict = {} + state_dict['scale'] = self._scale + state_dict['growth_tracker'] = self._growth_tracker + state_dict['hysteresis_tracker'] = self._hysteresis_tracker + return state_dict + + def load_state_dict(self, state_dict: Dict): + self._scale = state_dict['scale'].cuda(torch.cuda.current_device()) + self._growth_tracker = state_dict['growth_tracker'] + self._hysteresis_tracker = state_dict['hysteresis_tracker'] + + def _check_none_scale(self): + if self._scale is None: + raise AssertionError("Got none scale") \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/zero.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/zero.py new file mode 100644 index 000000000..6a1418442 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/megatron_adaptor/optimizer/zero.py @@ -0,0 +1,277 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import warnings + +from typing import Callable, Optional, List, Tuple, Dict +import torch +import torch.distributed as dist +from torch.distributed.distributed_c10d import ProcessGroup + +from apex.optimizers import FusedAdam as Adam +from apex.optimizers import FusedSGD as SGD +from megatron.core.optimizer import MegatronOptimizer, OptimizerConfig +from megatron.training.utils import print_rank_0 +from megatron.core import mpu +from mindspeed.core.distributed.layerzero.debug.sum import all_gather_into_flat_tensor, print_total_grad_sum +from .sharded_grad_scaler import ShardedGradScaler +from .clip import clip_grad_norm + + +def _get_param_groups( + model_chunks: List, + no_weight_decay_cond: Callable, + scale_lr_cond: Callable, + lr_mult: float, +) -> List[Dict]: + """Create parameter groups for optimizer. + + Creates parameter groups based on weight decay condition (regularized vs + non regularized), learning rate scale condition (lr vs lr_mult * lr), + and whether it is expert parameters. scale_lr_cond is used during finetuning + where head of the network requires a scaled version of the base learning rate. + + Args: + model_chunks (List[MegatronModule]): model chunks to create parameter + groups for. + no_weight_decay_cond (func): function to determine whether a parameter + should not perform weight decay. + scale_lr_cond (func): function to determine whether a parameter + should have a scaled learning rate. + lr_mult (float): learning rate multiplier for parameters that + satisfy scale_lr_cond. + + Returns: + List of parameter groups. + """ + if not isinstance(model_chunks, list): + model_chunks = [model_chunks] + # Map (wd_mult, lr_mult, is_expert_parallel, is_decoupled_lr) to params. + params_map = {} + for model_chunk in model_chunks: + for name, param in model_chunk.named_parameters(): + if not param.requires_grad: + continue + if no_weight_decay_cond is not None: + no_wd = no_weight_decay_cond(name, param) + else: + # Do not regularize biases and norm parameters. + #! currently do not support norm parameters, case all zero1 param has len(param.shape) == 1 + no_wd = name.endswith(".bias") or getattr(param, "_is_1D_param", False) + + if scale_lr_cond is not None: + scale_lr = scale_lr_cond(name, param) + else: + scale_lr = False + + if not no_wd and not scale_lr: + wd_mult, lr_mult = 1.0, 1.0 + elif not no_wd and scale_lr: + wd_mult, lr_mult = 1.0, lr_mult + elif no_wd and not scale_lr: + wd_mult, lr_mult = 0.0, 1.0 + else: + wd_mult, lr_mult = 0.0, lr_mult + + key = (wd_mult, lr_mult) + if key not in params_map: + params_map[key] = [] + params_map[key].append(param) + + param_groups = [] + for (wd_mult, lr_mult), params in params_map.items(): + if len(params) == 0: + raise ValueError(f"Empty params list") + param_groups.append( + { + 'params': params, + 'wd_mult': wd_mult, + 'lr_mult': lr_mult, + 'is_decoupled_lr' : False + } + ) + return param_groups + + +def get_optimizer( + config: OptimizerConfig, + model: List, + no_weight_decay_cond: Callable = None, + scale_lr_cond: Callable = None, + lr_mult: float = 1.0 +) -> "MegatronOptimizer": + param_groups = _get_param_groups(model, no_weight_decay_cond, scale_lr_cond, lr_mult) + optimizer = _get_zero_optimizer(config, param_groups) + return optimizer + + +def _get_zero_optimizer( + config, + param_groups +): + print(f"{config.weight_decay=}") + if config.optimizer == 'adam': + optimizer = Adam( + param_groups, + lr=config.lr, + weight_decay=config.weight_decay, + betas=(config.adam_beta1, config.adam_beta2), + eps=config.adam_eps, + ) + init_state_fn = None + + elif config.optimizer == 'sgd': + optimizer = SGD( + param_groups, + lr=config.lr, + weight_decay=config.weight_decay, + momentum=config.sgd_momentum, + ) + init_state_fn = None + else: + raise Exception('{} optimizer is not supported.'.format(config.optimizer)) + + grad_scaler = None + if config.fp16: + grad_scaler = ShardedGradScaler( + init_scale=config.initial_loss_scale, + min_scale=config.min_loss_scale, + growth_factor=2.0, + backoff_factor=0.5, + growth_interval=config.loss_scale_window, + hysteresis=config.hysteresis, + ) + + optimizer_args = [optimizer, config, grad_scaler, init_state_fn] + optimizer = LayerZeROptimizer(*optimizer_args) + return optimizer + + +def pp_stages(): + if not mpu.is_initialized(): + return 1 + world_size = dist.get_world_size() + return world_size // len(mpu.get_pipeline_model_parallel_group()) + + +def pp_broadcast_grad_scale(grad_scale, device): + if pp_stages() == 1: + return grad_scale + pp_world_size = mpu.get_pipeline_model_parallel_world_size() + world_size = dist.get_world_size() + last_stage_rank0 = world_size - pp_world_size + if not isinstance(grad_scale, torch.Tensor): + grad_scale = torch.tensor(grad_scale, dtype=torch.float32).to(device) + dist.broadcast(grad_scale, src=last_stage_rank0) + return grad_scale + + +class LayerZeROptimizer(MegatronOptimizer): + def __init__( + self, + optimizer: torch.optim.Optimizer, + config: OptimizerConfig, + grad_scaler: Optional[ShardedGradScaler], + init_state_fn: Callable = lambda x: None, + process_group: Optional[ProcessGroup] = dist.group.WORLD, + ): + super().__init__(optimizer, config, lambda x: None) + self.grad_scaler = grad_scaler + self.process_group = process_group or dist.group.WORLD + self.device = torch.device('cuda') + + + def scale_loss(self, loss: torch.Tensor) -> torch.Tensor: + """Simple scaling.""" + return self.get_loss_scale() * loss + + def get_loss_scale(self) -> torch.Tensor: + '''if PP enabled, broadcast scale''' + if self.grad_scaler is None: + return torch.tensor([1.], dtype=torch.float32, device=self.device) + return self.grad_scaler.loss_scale.to(self.device) + + @torch.no_grad() + def step(self) -> Tuple[bool, torch.Tensor, torch.Tensor]: + if self.grad_scaler: + self.grad_scaler._scale = pp_broadcast_grad_scale(self.get_loss_scale(), self.device) + found_inf = self.grad_scaler.unscale_(self.optimizer) + else: + found_inf = False + + grad_norm = None + if self.config.clip_grad > 0.0: + if self.process_group is None: + raise RuntimeError(f"{self.process_group=} is None") + grad_norm = clip_grad_norm(self.get_parameters(), self.config.clip_grad, norm_type=2, process_group=self.process_group) + + num_zeros_in_grad = self.count_zeros() if self.config.log_num_zeros_in_grad else None + + if self.grad_scaler: + self.grad_scaler._meg_step(self.optimizer) + self.grad_scaler.update() + else: + self.optimizer.step() + + return not found_inf, grad_norm, num_zeros_in_grad + + def prepare_grads(self) -> bool: + raise RuntimeError("This function should not be explicitly called by user") + + def step_with_ready_grads(self) -> bool: + raise RuntimeError("This function should not be explicitly called by user") + + def get_main_grads_for_grad_norm(self) -> List[torch.Tensor]: + raise RuntimeError("This function should not be explicitly called by user") + + def count_zeros(self): + num_zeros = sum(param.grad.numel() - torch.count_nonzero(param.grad) \ + for param in self.get_parameters() if param.grad is not None) + dist.all_reduce(num_zeros, group=self.process_group) + return num_zeros + + def reload_model_params(self): + '''Megatron optimizer api''' + pass + + def state_dict(self): + state_dict = {} + state_dict['optimizer'] = self.optimizer.state_dict() + if self.grad_scaler: + state_dict['grad_scaler'] = self.grad_scaler.state_dict() + return state_dict + + def load_state_dict(self, state_dict): + # Optimizer. + optimizer_key = 'optimizer' + if optimizer_key not in state_dict: + optimizer_key = 'optimizer_state_dict' + self.optimizer.load_state_dict(state_dict[optimizer_key]) + # Grad scaler. + if self.grad_scaler: + if "grad_scaler" not in state_dict: + warnings.warn(f"grad scaler state dict missing") + else: + self.grad_scaler.load_state_dict(state_dict['grad_scaler']) + + def sharded_state_dict( + self, model_sharded_state_dict, is_loading: bool = False + ): + """ Builds sharded state dict for the optimizer, based on model's sharded state dict. + + Args: + model_sharded_state_dict (ShardedStateDict): sharded state dict of the model + is_loading (bool, optional): flag indicating whether the state dict will be used to save or load the optimizer state. + Defaults to False. + + Returns: optimizer sharded state dict + """ + raise NotImplementedError("This api should not be called") + + def zero_grad(self, set_to_none: bool = True): + self.optimizer.zero_grad() + + def disable_pre_hook(self): + return + + def enable_pre_hook(self): + return + \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_forward.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_forward.py new file mode 100644 index 000000000..500756cad --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_forward.py @@ -0,0 +1,566 @@ +import functools +from itertools import chain +from collections import deque +import logging +from typing import Any, Callable, Dict, List, no_type_check, Optional, Set, Tuple + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +from torch.distributed.utils import ( + _cast_forward_inputs, + _p_assert, + _apply_to_tensors +) +from torch.utils._pytree import tree_flatten +from torch.autograd.graph import register_multi_grad_hook +from mindspeed.core.distributed.layerzero.zero3.api import BackwardReduceScatter +from mindspeed.core.distributed.layerzero.zero3._common_utils import ( + _assert_in_training_states, + _is_composable, + _ZeRO3State, + TrainingState, +) + +from mindspeed.core.distributed.layerzero.zero3.flat_param import FlatParamHandle, HandleTrainingState +from mindspeed.core.distributed.layerzero import constants +from ._shard import _unshard, _reshard, _pre_forward_backward_unshard, _post_forward_reshard, _post_backward_reshard, _get_handle_to_post_backward +from ._grad import _reduce_grad, _accumulate_grad, _pre_bwd_reload_full_prec_grad +from ._utils import _reset_flat_param_grad_info_if_needed +from .hook import register_multi_post_grad_hook + +logger = logging.getLogger(__name__) +logger.setLevel(logging.WARNING) +BACKWARD_POST_QUEUE = deque() + + +@no_type_check +def _register_pre_backward_hooks( + state: _ZeRO3State, + module: nn.Module, + outputs: Any, + handle: FlatParamHandle, +) -> None: + """ + Registers pre-backward hooks on the tensors that require gradients in the + forward pass outputs ``outputs``, which were computed using the + ``FlatParameter`` s of ``handles``. + + Args: + module (nn.Module): Fully sharded module (see [Note: Fully Sharded + Module]). + + Returns: + Forward pass outputs with pre-backward hooks registered to tensors that + require gradients. + """ + # If there is no gradient computation, then there is no need for + # pre-backward logic + if not torch.is_grad_enabled(): + return outputs + if state._is_root: + state._post_backward_callback_queued = False # only defined on the root + + if handle: + handle._needs_pre_backward_unshard = False + handle._ran_pre_backward_hook = False + # Since these handles' `FlatParameter`s participated in a forward, we + # conservatively assume that they will be used in the backward + + def _register_hook(t: torch.Tensor) -> torch.Tensor: + if t.requires_grad: + t.register_hook( + functools.partial(_pre_backward_hook, state, module, handle) + ) + if handle: + handle._needs_pre_backward_unshard = True + return t + + return _apply_to_tensors(_register_hook, outputs) + + +def _register_post_backward_hook( + state: _ZeRO3State, + handle: Optional[FlatParamHandle], +) -> None: + # If there is no gradient computation, then there is no need for + # post-backward logic + if not handle: + return + flat_param = handle.flat_param + inp_tensors = [p for p in flat_param._tensors if p.requires_grad] + hook_handle = register_multi_post_grad_hook( + inp_tensors, functools.partial(_post_backward_ready_hook, state, handle) + ) + flat_param._post_backward_hook_state = ( + None, hook_handle) # type: ignore[attr-defined] + + +def _register_post_backward_reshard_only_hook( + state: _ZeRO3State, + handle: Optional[FlatParamHandle], + args: Tuple[Any, ...], + kwargs: Dict[str, Any], +) -> None: + """ + Registers post-backward hooks to reshard flat parameters that do not + require gradient. We register these using multi-post-grad hooks on the + input activations to ensure that all gradients that may depend on the + parameters have been computed before resharding. + """ + # If there is no gradient computation, then there is no need for + # post-backward logic + if not torch.is_grad_enabled(): + return + # Construct `inp_tensors` lazily to avoid CPU overhead in typical case + # where each flat parameter requires gradient + inp_tensors: Optional[List[torch.Tensor]] = None + if not handle: + return + if handle.flat_param.requires_grad: + return + if inp_tensors is None: + args_list, _ = tree_flatten(args) + kwargs_list, _ = tree_flatten(kwargs) + inp_tensors = [ + obj + for obj in chain(args_list, kwargs_list) + if torch.is_tensor(obj) and obj.requires_grad + ] + _p_assert(inp_tensors is not None, "Got None inp_tensor") + hook_handle = register_multi_grad_hook( + inp_tensors, functools.partial(_post_backward_reshard, state, handle) + ) + handle.flat_param._post_backward_hook_state = ( + hook_handle,) + + +@no_type_check +def _register_post_backward_final_callback( + state: _ZeRO3State, module: nn.Module +) -> None: + """ + Registers the post-backward final callback that runs at the end of the + backward pass. This should be called from the root FSDP instance at the + beginning of the pre-backward. + """ + _p_assert( + state._is_root, + "Only the root ZeRo3 instance should register the post-backward callback", + ) + if state._post_backward_callback_queued: + return + _assert_in_training_states(state, [TrainingState.IDLE]) + state._post_backward_callback_queued = True + Variable._execution_engine.queue_callback( + functools.partial(_post_backward_final_callback, state, module) + ) + + +@no_type_check +def _pre_forward( + state: _ZeRO3State, + handle: Optional[FlatParamHandle], + unshard_fn: Callable, + module: nn.Module, + args: Tuple[Any, ...], + kwargs: Dict[str, Any], +) -> Tuple[Tuple[Any, ...], Dict[str, Any]]: + """ + Runs the pre-forward logic. This includes an opportunity to unshard + currently sharded parameters such as those for the current forward and + registering post-backward hooks for these current parameters. This function + also converts forward ``args`` and ``kwargs`` to the given precision. + + Args: + handles (List[FlatParamHandle]): Handles giving the parameters used in + the current forward. + unshard_fn (Optional[Callable]): A callable to unshard any currently + sharded parameters or ``None`` to not do any unsharding. + module (nn.Module): Module whose forward this method runs right before; + expected by the hook signature. + args (Tuple[Any, ...]): Module forward ``args``. + kwargs (Dict[str, Any]): Module forward ``kwargs``. + """ + with torch.profiler.record_function(f"LayerZeRO3._pre_forward"): + # For `fully_shard` + `checkpoint`, skip pre-forward logic in the + # recomputed forward + if handle and handle._training_state == HandleTrainingState.BACKWARD_PRE: + return args, kwargs + state.training_state = TrainingState.FORWARD_BACKWARD + state._exec_order_data.record_pre_forward(handle, module.training) + if handle: + handle._training_state = HandleTrainingState.FORWARD + + with torch.autograd.profiler.record_function("Unshard Function"): + if unshard_fn is not None: + unshard_fn(state, handle) + if handle: + handle._use_unsharded_views(as_params=False) + if constants.AUTO_CAST_INPUT and state.mixed_precision: + # Recursively convert args and kwargs to specified precision. + input_dtype: Optional[torch.dtype] = state.mixed_precision.param_dtype + args, kwargs = _cast_forward_inputs(input_dtype, *args, **kwargs) + _register_post_backward_reshard_only_hook(state, handle, args, kwargs) + return args, kwargs + + +@no_type_check +def _post_forward( + state: _ZeRO3State, + handle: Optional[FlatParamHandle], + reshard_fn: Callable, + module: nn.Module, + inputs: Any, + output: Any, +) -> Any: + """ + Runs the post-forward logic. This includes an opportunity to reshard + currently unsharded parameters such as those used in the current forward + and registering pre-backward hooks on the forward outputs. + + Args: + handles (List[FlatParamHandle]): Handles giving the parameters used in + the current forward. + reshard_fn (Optional[Callable]): A callable to reshard any currently + unsharded parameters (e.g. from the current forward) or ``None`` to + not do any resharding. + module (nn.Module): Module whose forward just ran, which should be a + fully sharded module (see [Note: Fully Sharded Module]); expected + by the hook signature. + input (Any): Unused; expected by the hook signature. + output (Any): Forward pass output; pre-backward hooks are registered on + the tensors that require gradients in this output. + + Postcondition: Each ``FlatParameter`` 's data points to the sharded flat + parameter. + """ + with torch.profiler.record_function(f"LayerZeRO3._post_forward"): + # For `fully_shard` + `checkpoint`, skip post-forward logic in the + if handle and handle._training_state != HandleTrainingState.FORWARD: + return output + #! adapt megatron AC to avoid free after forward + if handle and not handle.enter_backward: + state._exec_order_data.record_post_forward(handle) + with torch.autograd.profiler.record_function("Reshard Function"): + if reshard_fn is not None: + reshard_fn(state, handle) + # Register pre-backward hooks to unshard the flat parameters for the + # gradient computation (if needed) + output = _register_pre_backward_hooks(state, module, output, handle) + state.training_state = TrainingState.IDLE + if handle: + handle._training_state = HandleTrainingState.IDLE + return output + + +@no_type_check +def _pre_backward_hook( + state: _ZeRO3State, + module: nn.Module, + handle: FlatParamHandle, + grad, + *unused: Any, +) -> Any: + """ + Prepares ``_handle`` 's ``FlatParameter`` s for gradient computation. + + Args: + module (nn.Module): Fully sharded module (see [Note: Fully Sharded + Module]). + Post Condition: + parameter in unshard and unpadded, used for grad compute + grad is unshard and unpadded. + """ + # Only run the pre-backward hook once per group of handles involved in the + # same module forward computation + if handle and getattr(handle, "_ran_pre_backward_hook", False): + return grad + if handle: + handle.enter_backward = True + with torch.profiler.record_function(f"LayerZeRO3._pre_backward_hook"): + # Queue the post-backward callback once for the root FSDP instance to + # attach it to the outermost backward graph task so that it is called + # after all backward calls complete + if state._is_root and not state._post_backward_callback_queued: + _register_post_backward_final_callback(state, module) + _reset_flat_param_grad_info_if_needed(state._all_handles) + elif handle: + allowed_states = [TrainingState.IDLE] + if _is_composable(state): + allowed_states.append(TrainingState.FORWARD_BACKWARD) + _assert_in_training_states(state, allowed_states) + + state.training_state = TrainingState.FORWARD_BACKWARD + # Queueing the post-backward callback is the only logic that is not + # per-handle in the pre-backward hook, so we can return early here if + # there are no handles. + if not handle: + return grad + #! ensure that last handle has finished accumulate grad (backward) on cpu + if len(BACKWARD_POST_QUEUE) > 0: + (_last_state, _last_handle) = BACKWARD_POST_QUEUE.popleft() + _post_backward_hook(_last_state, _last_handle) + handle._training_state = HandleTrainingState.BACKWARD_PRE + _register_post_backward_hook(state, handle) + _pre_forward_backward_unshard(state, handle) + _pre_bwd_reload_full_prec_grad(state, handle) + #! alloc memory on default stream if not allocated + handle.prepare_gradient_for_backward() + handle._ran_pre_backward_hook = True + return grad + + +@no_type_check +@torch.no_grad() +def _post_backward_ready_hook( + state: _ZeRO3State, + handle: FlatParamHandle, + *unused: Any, +): + if not handle: + return + BACKWARD_POST_QUEUE.append((state, handle)) + + +@no_type_check +@torch.no_grad() +def _post_backward_hook( + state: _ZeRO3State, + handle: FlatParamHandle, + *unused: Any, +): + """ + Reduce-scatters the gradient of ``handle`` 's ``FlatParameter``. + + Precondition: The ``FlatParameter`` 's ``.grad`` attribute contains the + unsharded gradient for the local batch. + + Postcondition: + - If no sync, then the ``.grad`` attribute is the reduced + unsharded gradient. + - Otherwise, the ``_saved_grad`` attribute is the reduced sharded + gradient. + """ + flat_param = handle.flat_param + handle.enter_backward = False + + with torch.autograd.profiler.record_function( + f"LayerZeRO3._post_backward_hook" + ): + _assert_in_training_states(state, [TrainingState.FORWARD_BACKWARD]) + # For multiple applications of reentrant AC across submodules sharing + # the same `FlatParameter`, the post-backward hook may run multiple + # times in one backward, in which case we permit the state to already + # be in `BACKWARD_POST`. + _p_assert( + handle._training_state + in (HandleTrainingState.BACKWARD_PRE, HandleTrainingState.BACKWARD_POST), + f"Expects `BACKWARD_PRE` or `BACKWARD_POST` state but got {handle._training_state}", + ) + handle._training_state = HandleTrainingState.BACKWARD_POST + + if flat_param.grad is None: + return + if flat_param.grad.requires_grad: + raise RuntimeError("ZeRO3 does not support gradients of gradients") + + _post_backward_reshard(state, handle) + _accumulate_grad(state, handle) + reduce_scatter_sync_gradients(state, handle) + handle._ran_post_backward_hook = True + + +def reduce_scatter_sync_gradients( + state: _ZeRO3State, + handle: FlatParamHandle): + ''' + Performs a sync in zero1 process group + ''' + with torch.autograd.profiler.record_function(f"Reduce Scatter Gradients"): + if not state._sync_gradients: + return + flat_param = handle.flat_param + if flat_param is not None and flat_param._post_backward_called: + return + flat_param._post_backward_called = True + if state.backward_reduce_scatter == BackwardReduceScatter.BACKWARD_PRE: + state.wait_critical_path_events() + _reduce_grad(state, handle) + + +@no_type_check +@torch.no_grad() +def _post_backward_final_callback_no_sync( + state: _ZeRO3State, + module: nn.Module, +): + if not state._is_root or state._sync_gradients: + raise RuntimeError("The post-backward no sync callback should only be called \ + on the root FSDP instance without sync gradients") + + while len(BACKWARD_POST_QUEUE) > 0: + (_last_state, _last_handle) = BACKWARD_POST_QUEUE.popleft() + _post_backward_hook(_last_state, _last_handle) + + root_state: _ZeRO3State = state + root_state._exec_order_data.next_iter_during_accumulation() + for zero3_state in state._all_zero3_states: + zero3_state.training_state = TrainingState.IDLE + handle: FlatParamHandle = zero3_state._handle + if handle: + handle._ran_pre_backward_hook = False + handle._ran_post_backward_hook = False + handle._training_state = HandleTrainingState.IDLE + handle.prev_iter_synced = False + if handle._offload_grads: + while True: + offload_event = root_state._offload_event_queue._dequeue() + if offload_event: + (event, last_handle) = offload_event + event.wait() + last_handle.free_full_prec_grad() + else: + break + root_state._post_backward_callback_queued = False + + +@no_type_check +@torch.no_grad() +def _post_backward_final_callback_sync_gradients( + state: _ZeRO3State, + module: nn.Module +): + if not (state._is_root and state._sync_gradients): + raise RuntimeError("The post-backward sync callback should \ + only be called on the root FSDP instance with sync gradients") + + while len(BACKWARD_POST_QUEUE) > 0: + (_last_state, _last_handle) = BACKWARD_POST_QUEUE.popleft() + _post_backward_hook(_last_state, _last_handle) + + root_state: _ZeRO3State = state + root_state._exec_order_data.next_iter() + for zero3_state in state._all_zero3_states: + _catch_all_reshard(zero3_state) + zero3_state.training_state = TrainingState.IDLE + handle: FlatParamHandle = zero3_state._handle + #! if post_backward is done, but flat_param has not reduce scatter + if state.backward_reduce_scatter == BackwardReduceScatter.BACKWARD_PRE: + if handle and handle._ran_post_backward_hook and not handle.flat_param._post_backward_called: + reduce_scatter_sync_gradients(zero3_state, handle) + if handle: + handle._ran_pre_backward_hook = False + handle._ran_post_backward_hook = False + handle._needs_pre_backward_unshard = False + handle._post_forward_index = None + handle._training_state = HandleTrainingState.IDLE + handle._prefetched = False + handle._needs_param_sync = root_state._sync_gradients + handle._param_synced = False + handle._grad_synced = False + #! free handle zero3 shard if _sync_gradients in reshard after backward cause next run we use zero1 shard + handle.flat_param._zero3_shard = None + handle.prev_iter_synced = True + + _finalize_params(zero3_state) + while True: + rs_event = root_state._rs_event_queue._dequeue() + if rs_event: + (rs, last_handle) = rs_event + rs.wait() + last_handle.free_full_prec_grad() + else: + break + + compute_stream = state._default_stream + compute_stream.wait_stream(root_state._post_backward_stream) + for handle in state._all_handles: + flat_param = handle.flat_param + if flat_param.requires_grad: + handle.prepare_gradient_for_zero1() + root_state._post_backward_callback_queued = False + + +@no_type_check +@torch.no_grad() +def _post_backward_final_callback( + state: _ZeRO3State, + module: nn.Module +): + """ + This waits for the post-backward to finish and performs some final cleanup. + This runs at the end of the entire backward pass and should only be called + on the root FSDP instance. + """ + if dist.get_rank() == 0: + logger.info( + f"_post_backward_final_callback Being Called and reset states") + if state._sync_gradients: + _post_backward_final_callback_sync_gradients(state, module) + else: + _post_backward_final_callback_no_sync(state, module) + + +@no_type_check +def _catch_all_reshard( + state: _ZeRO3State, +) -> None: + """ + Reshards the parameters that may not have been resharded in the + post-backward hook. This can happen when a module's output is used in the + forward pass, meaning that its pre-backward hook runs (unsharding the + parameter), but the post-backward hook does not run because the output was + not jused in the loss computation corresponding to this backward pass. + """ + # Wrap with a try-except to provide a more informative traceback if an + # error is raised + try: + if state._handle: + already_resharded = ( + state._handle.flat_param.data_ptr() + == state._handle.flat_param._zero1_shard.data_ptr() + # If FSDP skipped using sharded views, then the flat parameter + # still points to the sharded data, so we need to reshard to + # use sharded views + and not state._handle._skipped_use_sharded_views + ) + if already_resharded: + return + _reshard(state, state._handle, True) + except Exception as e: + _p_assert( + False, + f"Got exception in the catch-all reshard for {state}: {str(e)}", + raise_assertion_error=False, + ) + raise e + + +@no_type_check +def _finalize_params( + state: _ZeRO3State, +) -> None: + """Finalizes the parameters before the next iteration. + """ + handle = state._handle + if not handle: + return + flat_param = handle.flat_param + if hasattr(flat_param, "_post_backward_hook_state"): + post_backward_hook_state_len = len(flat_param._post_backward_hook_state) + expected_post_backward_hook_state_len = int(flat_param.requires_grad) + 1 + _p_assert( + post_backward_hook_state_len == expected_post_backward_hook_state_len, + f"Invalid: ``_post_backward_hook_state``: {flat_param._post_backward_hook_state}", + ) + flat_param._post_backward_hook_state[-1].remove() + delattr(flat_param, "_post_backward_hook_state") + if flat_param.requires_grad: + _p_assert( + hasattr(flat_param, "_post_backward_called"), + "Expects `_post_backward_called` to be set on the `FlatParameter`", + ) + flat_param._post_backward_called = False diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_grad.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_grad.py new file mode 100644 index 000000000..f6f8a365a --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_grad.py @@ -0,0 +1,94 @@ +from typing import Any, Callable, Dict, List, no_type_check, Optional, Set, Tuple + +import torch.distributed as dist + +from mindspeed.core.distributed.layerzero.zero3._common_utils import _ZeRO3State +from mindspeed.core.distributed.layerzero.zero3.flat_param import FlatParamHandle, HandleTrainingState +from ._utils import _div_if_needed +from ._shard import _is_last_order_forward + + +@no_type_check +def _reduce_grad(state: _ZeRO3State, handle: FlatParamHandle) -> None: + """ + Runs gradient reduction, sharded gradient and the post-reduction callback. + if accumulate grad, this func will not be called cause whole param unshard + grad will be stored, rather than shard grad. + """ + flat_param = handle.flat_param + rs_event = state._rs_event_queue._dequeue() + if rs_event: + rs, last_hanlde = rs_event + rs.wait() + last_hanlde.free_full_prec_grad() + padded_unsharded_grad, new_sharded_grad = handle._get_reduce_scatter_tensors() + _div_if_needed(padded_unsharded_grad, state._gradient_predivide_factor) + state._post_backward_stream.wait_stream(state._default_stream) + with state._device_handle.stream(state._post_backward_stream): + dist.reduce_scatter_tensor( + new_sharded_grad, + padded_unsharded_grad, + group=handle._get_reduce_scatter_group(), + ) + reduce_scatter_event = state._device_handle.Event() + reduce_scatter_event.record() + state._rs_event_queue.enqueue((reduce_scatter_event, handle)) + #! remove all-reduce logic and shard grad accumulation, and grad view logic + handle.set_shard_grad(new_sharded_grad) + + +def offload_grad( + state: _ZeRO3State, handle: FlatParamHandle +): + if not handle: + return + # do not offload the last backward cause it is needed at first + if _is_last_order_forward(state, handle): + return + off_event_handle = state._offload_event_queue._dequeue() + if off_event_handle is not None: + offload_event, last_handle = off_event_handle + offload_event.wait() + last_handle.free_full_prec_grad() + state._offload_stream.wait_stream(state._default_stream) + state._offload_stream.wait_stream(state._unshard_stream) + with state._device_handle.stream(state._offload_stream): + handle.offload_grad() + event = state._device_handle.Event() + event.record() + state._offload_event_queue.enqueue((event, handle)) + + +@no_type_check +def _pre_bwd_reload_full_prec_grad( + state: "_ZeRO3State", + handle: Optional["FlatParamHandle"], +) -> None: + if not handle or handle._training_state != HandleTrainingState.BACKWARD_PRE: + return + + if state._offload_grads: + if not handle.already_load_full_prec_grad(): + handle.alloc_full_prec_grad() + with state._device_handle.stream(state._offload_stream): + handle.reload_full_prec_grad() + handle._check_padded_unsharded( + handle.flat_param._full_prec_grad_padded) + + +def _accumulate_grad( + state: "_ZeRO3State", + handle: Optional["FlatParamHandle"], +): + if not handle or handle._training_state != HandleTrainingState.BACKWARD_POST: + return + if not handle.already_load_full_prec_grad(): + handle.alloc_full_prec_grad() + if state._offload_grads: + state._default_stream.wait_stream(state._offload_stream) + #! accumulate grad on compute stream + handle.accumulate_grad() + handle.free_runtime_unshard_grad() + + if state._offload_grads and not state._sync_gradients: + offload_grad(state, handle) diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_initialize.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_initialize.py new file mode 100644 index 000000000..aa2b2970e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_initialize.py @@ -0,0 +1,134 @@ +from typing import Any, Callable, Dict, List, no_type_check, Optional, Set, Tuple +import logging +import torch.nn as nn +from torch.distributed.utils import _p_assert +import torch.distributed as dist + +import mindspeed.core.distributed.layerzero.zero3._traversal_utils as traversal_utils +from mindspeed.core.distributed.layerzero.zero3._common_utils import ( + _assert_in_training_states, + _ZeRO3State, + TrainingState, +) +from ._utils import ( + _get_buffers_and_dtypes_for_computation, + _cast_buffers_to_dtype_and_device, +) + + +@no_type_check +def _lazy_init( + state: _ZeRO3State, + root_module: nn.Module, +) -> _ZeRO3State: + """ + Performs initialization lazily, typically right before the first forward + pass. The laziness is needed to ensure that the parameter device/dtype and + the FSDP hierarchy have finalized. This method's actual logic only runs on + the root FSDP instance, which performs initialization for all non-root FSDP + instances to avoid partial initialization. + + For the non-composable code path, ``state`` and ``root_module`` should be + the same, namely the zero3 instance itself. + """ + if state._is_root is not None: + return None + if not state._device_handle.is_available(): + # Allow the FSDP constructor to run even without CUDA but check this + # once we start real execution + raise RuntimeError("ZeRO3 does not support CPU only execution") + # The following logic is only run on the root FSDP instance since it will + # set `_is_root=False` for the non-root instances + state._is_root = True + _assert_in_training_states(state, [TrainingState.IDLE]) + _check_flat_params_on_expected_device(state, root_module) + state._all_zero3_states = traversal_utils._get_zero3_states(root_module) + _init_streams(state) + buffers, buffer_dtypes = _get_buffers_and_dtypes_for_computation(state, root_module) + _cast_buffers_to_dtype_and_device(buffers, buffer_dtypes, state.compute_device) + state._exec_order_data.init(state, root_module, state.zero1_process_group) + _share_state_and_init_handle_attrs(state, root_module) + if dist.get_rank() == 0: + logging.info(f"Root Layezero Contains {len(state._all_handles)} non-None handles") + return state + + +def _check_flat_params_on_expected_device(state: _ZeRO3State, module: nn.Module): + """ + Checks that all ``FlatParameter``s in ``module`` 's tree managed by + ``state`` are on the expected device for *lazy initialization*. + """ + for handle in traversal_utils._get_zero3_handles(module): + if handle.flat_param.device != state.compute_device: + raise RuntimeError( + "An ZeRO3-managed module unexpectedly has parameters on " + f"{handle.flat_param.device}. Make sure to move the module to " + f"{state.compute_device} before training." + ) + + +@no_type_check +def _share_state_and_init_handle_attrs( + root_state: _ZeRO3State, + root_module: nn.Module, +) -> None: + """ + Shares data structure state from the ``root_state`` to all zero3 states in + ``root_module`` 's module tree, and initializes handle attributes. These + are done together to require a single loop over the states. + """ + handle = root_state._handle + if handle: + handle.init_flat_param_attributes() + root_state._all_handles = root_state._exec_order_data.all_handles # share reference + for zero3_state in root_state._all_zero3_states: + if zero3_state is root_state: + continue + _p_assert( + zero3_state._is_root is None or not zero3_state._is_root, + "Non-root FSDP instance's `_is_root` should not have been " + "set yet or should have been set to `False`", + ) + zero3_state._is_root = False + zero3_state._unshard_stream = root_state._unshard_stream + zero3_state._post_backward_stream = root_state._post_backward_stream + zero3_state._pre_unshard_stream = root_state._pre_unshard_stream + zero3_state._default_stream = root_state._default_stream + zero3_state._offload_stream = root_state._offload_stream + + zero3_state._exec_order_data = root_state._exec_order_data + zero3_state._free_event_queue = root_state._free_event_queue + zero3_state._rs_event_queue = root_state._rs_event_queue + zero3_state._offload_event_queue = root_state._offload_event_queue + handle = zero3_state._handle + if handle: + handle.init_flat_param_attributes() + + +@no_type_check +def _init_streams( + state: _ZeRO3State, +) -> None: + """ + Initializes streams for overlapping communication, computation, and + data transfers. The streams should be shared across zero3 instances. + """ + if not (state._is_root and state._device_handle.is_available()): + raise RuntimeError(f"state is not initialized or device not available") + # Prioritize all-gathers/reduce-scatters over async all-reduce for HSDP and + # preserve the default priority of 0 otherwise + high_priority = 1 + mid_priority = 2 + low_priority = 3 + # Default stream for computation + state._default_stream = state._device_handle.current_stream() + # Stream for unshard logic, including allocating the all-gather destination + # tensors and the all-gathers themselves + state._unshard_stream = state._device_handle.Stream(priority=mid_priority) + # Stream for overlapping gradient reduction with the backward pass gradient + # computation + state._post_backward_stream = state._device_handle.Stream(priority=low_priority) + # Stream for pre-unshard logic, namely allocations and writes for CPU + # offloading (H2D copy) and mixed precision (low precision cast) + state._offload_stream = state._device_handle.Stream(priority=low_priority) + state._pre_unshard_stream = state._device_handle.current_stream() \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_root_forward.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_root_forward.py new file mode 100644 index 000000000..4d39d1b62 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_root_forward.py @@ -0,0 +1,69 @@ +from typing import Any, Callable, Dict, List, no_type_check, Optional, Set, Tuple + +import torch +import torch.nn as nn +from torch.distributed.utils import ( + _cast_forward_inputs, + _p_assert, + _to_kwargs, +) +from mindspeed.core.distributed.layerzero import constants +from mindspeed.core.distributed.layerzero.zero3._common_utils import _ZeRO3State, _is_composable +from mindspeed.core.distributed.layerzero.zero3.flat_param import FlatParamHandle + +from ._utils import ( + _reset_flat_param_grad_info_if_needed, + _wait_for_computation_stream +) +from ._initialize import _lazy_init + + +@no_type_check +def _zero3_root_pre_forward( + state: _ZeRO3State, + module: nn.Module, + args, + kwargs, +) -> None: + with torch.profiler.record_function("LayerZeRO3._root_pre_forward_check"): + _lazy_init(state, module) + _p_assert(state._is_root is not None, + "Expects a root ZeRO3 to have been set") + if not state._is_root: + if constants.AUTO_CAST_INPUT and _is_composable(state): + return _root_cast_forward_input(state, module, args, kwargs) + return args, kwargs + + with torch.profiler.record_function("LayerZeRO3._root_pre_forward"): + if state.forward_prefetch: + handles: List[FlatParamHandle] = [] + for zero3_state in state._all_zero3_states: + if zero3_state._handle: + handles.append(zero3_state._handle) + for handle in handles: + handle._needs_pre_forward_unshard = True + + _wait_for_computation_stream( + state._default_stream, state._unshard_stream, state._pre_unshard_stream) + _reset_flat_param_grad_info_if_needed(state._all_handles) + + # Prepares the forward inputs by moving them to ``compute_device`` + # the perf with/without it. + with torch.profiler.record_function("LayerZeRO3._to_kwargs"): + args_tuple, kwargs_tuple = _to_kwargs( + args, kwargs, state.compute_device, False + ) + args = args_tuple[0] + kwargs = kwargs_tuple[0] + return args, kwargs + + +@no_type_check +def _root_cast_forward_input( + state: _ZeRO3State, module: torch.nn.Module, args, kwargs +) -> Tuple[Any, Any]: + + if module.training and state.mixed_precision is not None: + input_dtype: Optional[torch.dtype] = state.mixed_precision.param_dtype + args, kwargs = _cast_forward_inputs(input_dtype, *args, **kwargs) + return args, kwargs \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_shard.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_shard.py new file mode 100644 index 000000000..627e280a3 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_shard.py @@ -0,0 +1,277 @@ +import logging + +from enum import auto, Enum +from typing import Any, no_type_check, Optional, Set, Tuple, TYPE_CHECKING + +import torch +from torch.distributed.utils import _p_assert +import torch.distributed as dist +from mindspeed.core.distributed.layerzero.zero3.api import BackwardPrefetch +from mindspeed.core.distributed.layerzero.zero3.flat_param import HandleTrainingState +if TYPE_CHECKING: + from mindspeed.core.distributed.layerzero.zero3._common_utils import _ZeRO3State + from mindspeed.core.distributed.layerzero.zero3.flat_param import FlatParamHandle + +logger = logging.getLogger(__name__) +logger.setLevel(logging.WARNING) + + +class _PrefetchMode(Enum): + BACKWARD = auto() + FORWARD = auto() + + +@no_type_check +def _unshard( + state: "_ZeRO3State", + handle: "FlatParamHandle", + unshard_stream: torch.Stream, + pre_unshard_stream: torch.Stream, +) -> None: + """ + Unshards the handles in ``handles``. If the handles are in + :meth:`summon_full_params` and are using mixed precision, then they are + forced to full precision. + + Postcondition: handle's ``FlatParameter`` 's data is the padded + unsharded flat parameter on the compute device. + """ + if not handle or not handle.needs_unshard(): + return + + with state._device_handle.stream(pre_unshard_stream): + handle.pre_unshard() + + unshard_stream.wait_stream(pre_unshard_stream) + if state.limit_all_gathers: + event = state._free_event_queue.dequeue_if_needed() + if event: + with torch.profiler.record_function( + "LayerZeRO3.rate_limiter" + ): + event.synchronize() + with state._device_handle.stream(unshard_stream): + handle.unshard() + handle.post_unshard() + + +@no_type_check +def _reshard( + state: "_ZeRO3State", + handle: "FlatParamHandle", + free_unsharded_flat_param: bool, +): + """ + Reshards the handle. ``free_unsharded_flat_param`` indicates whether to + free the handle's padded unsharded flat parameter. + """ + handle.reshard(free_unsharded_flat_param) + if state.limit_all_gathers and free_unsharded_flat_param: + free_event = state._device_handle.Event() + free_event.record() + state._free_event_queue.enqueue(free_event) + # Since we prefetch entire handles keys at a time, conservatively mark + # the entire key as no longer prefetched once we free at least one + if free_unsharded_flat_param: + handle._prefetched = False + else: + handle._prefetched = True + + +@no_type_check +def _pre_forward_backward_unshard( + state: "_ZeRO3State", + handle: Optional["FlatParamHandle"], +) -> None: + """Unshards parameters in the pre-forward. + 1. check handle exists + 2. check zero1 synced params to zero3 + 3. check zero3 prefetched + 4. prefetch next layer + modified _unshard func, which is called at each all-gather + + """ + if not handle: + return + # If the handles have been prefetched, then there is no need to call + # `_unshard()` again + if handle._training_state not in [HandleTrainingState.FORWARD, HandleTrainingState.BACKWARD_PRE]: + return + + in_forward = handle._training_state == HandleTrainingState.FORWARD + stage = "forward" if in_forward else "backward" + guard_state = f"_needs_pre_{stage}_unshard" + if in_forward or getattr(handle, guard_state): + _unshard( + state, + handle, + state._unshard_stream, + state._pre_unshard_stream + ) + setattr(handle, guard_state, False) + state._default_stream.wait_stream(state._unshard_stream) + handle._check_unsharded(handle.flat_param.data) + + _prefetch_mode = _PrefetchMode.FORWARD if handle._training_state == HandleTrainingState.FORWARD else _PrefetchMode.BACKWARD + with torch.profiler.record_function( + f"LayerZeRO3._pre_{stage}_prefetch" + ): + _prefetch_handle(state, handle, _prefetch_mode) + + +def _is_last_order_forward( + state: "_ZeRO3State", + handle: "FlatParamHandle" +) -> bool: + return handle._post_forward_index == len(state._exec_order_data.all_handles) - 1 + + +@no_type_check +def _post_forward_reshard( + state: "_ZeRO3State", + handle: "FlatParamHandle", +) -> None: + """Reshards parameters in the post-forward. + """ + if not handle: + return + free_unsharded_flat_param = not _is_last_order_forward(state, handle) + with torch.profiler.record_function( + "LayerZeRO3._post_forward_reshard" + ): + _reshard(state, handle, free_unsharded_flat_param) + + +def _post_backward_reshard( + state: "_ZeRO3State", + handle: "FlatParamHandle", + *unused: Any, +) -> None: + free_unsharded_flat_param = not ( + handle._pre_forward_order_index == 0 and not state._sync_gradients) + with torch.profiler.record_function( + "LayerZeRO3._post_backward_reshard" + ): + _reshard(state, handle, free_unsharded_flat_param) + + with torch.profiler.record_function( + "LayerZeRO3._post_backward_prefetch" + ): + _prefetch_handle(state, handle, _PrefetchMode.BACKWARD) + + +@no_type_check +def _prefetch_handle( + state: "_ZeRO3State", + current_handle: Optional["FlatParamHandle"], + prefetch_mode: _PrefetchMode, +) -> None: + """ + Prefetches the next handles if needed (without synchronization). An empty + handles key cannot prefetch. + """ + if not current_handle: + return + handle = _get_handle_to_prefetch(state, current_handle) + if not handle: + return + # Temporarily emulate the training state while calling `_unshard` to + # ensure the correct `as_params` for `_use_unsharded_views()` + prev_training_state = handle._training_state + if prefetch_mode == _PrefetchMode.BACKWARD: + handle._training_state = HandleTrainingState.BACKWARD_PRE + elif prefetch_mode == _PrefetchMode.FORWARD: + if handle.enter_backward: + return + handle._training_state = HandleTrainingState.FORWARD + else: + raise ValueError(f"Invalid prefetch mode on rank {state.zero3_rank}: {prefetch_mode}") + # Prefetch the next set of handles without synchronizing to allow + # the sync to happen as late as possible to maximize overlap + _unshard(state, handle, state._unshard_stream, state._pre_unshard_stream) + handle._training_state = prev_training_state + handle._prefetched = True + + +@no_type_check +def _get_handle_to_prefetch( + state: "_ZeRO3State", + current_handle: "FlatParamHandle", +) -> "FlatParamHandle": + """ + Returns a :class:`list` of the handles keys to prefetch for the next + module(s), where ``current_handle`` represents the current module. + + "Prefetching" refers to running the unshard logic early (without + synchronization), and the "next" modules depend on the recorded execution + order and the current training state. + """ + training_state = _get_training_state(current_handle) + valid_training_states = ( + HandleTrainingState.BACKWARD_PRE, + HandleTrainingState.BACKWARD_POST, + HandleTrainingState.FORWARD, + ) + _p_assert( + training_state in valid_training_states, + f"Prefetching is only supported in {valid_training_states} but " + f"currently in {training_state}", + ) + eod = state._exec_order_data + target_handle: Optional["FlatParamHandle"] = None + if ( + training_state == HandleTrainingState.BACKWARD_PRE + and state.backward_prefetch == BackwardPrefetch.BACKWARD_PRE + ) or ( + training_state == HandleTrainingState.BACKWARD_POST + and state.backward_prefetch == BackwardPrefetch.BACKWARD_POST + ): + target_handle_candidate = eod.get_handle_to_backward_prefetch( + current_handle) + if ( + target_handle_candidate + # and target_handle_candidate._needs_pre_backward_unshard + and not target_handle_candidate._prefetched + ): + target_handle = target_handle_candidate + else: + target_handle = None + elif training_state == HandleTrainingState.FORWARD and state.forward_prefetch: + target_handle_candidate = eod.get_handle_to_forward_prefetch( + current_handle) + if ( + target_handle_candidate + # and target_handle_candidate._needs_pre_forward_unshard + and not target_handle_candidate._prefetched + ): + target_handle = target_handle_candidate + else: + target_handle = None + + return target_handle + + +def _get_training_state( + handle: "FlatParamHandle", +) -> HandleTrainingState: + """Returns the training state of the handles in ``handle``.""" + _p_assert(handle, "Expects a non-empty handle") + return handle._training_state + + +@no_type_check +def _get_handle_to_post_backward( + state: "_ZeRO3State", + current_handle: "FlatParamHandle", +) -> "FlatParamHandle": + """ + Returns the last handle to do post_backward reduce scatter, where ``current_handle`` represents the current module. + """ + eod = state._exec_order_data + target_handle: Optional["FlatParamHandle"] = None + target_handle = eod.get_handle_to_post_backward(current_handle) + if target_handle: + return [handle for handle in target_handle + if (_get_training_state(handle) == HandleTrainingState.BACKWARD_POST) and not handle.flat_param._post_backward_called] + else: + return None diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_utils.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_utils.py new file mode 100644 index 000000000..679d91d6b --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/_utils.py @@ -0,0 +1,194 @@ +from typing import Any, Callable, Dict, List, no_type_check, Optional, Set, Tuple + +import torch +import torch.distributed as dist +import torch.nn as nn +from torch.autograd import Variable +from torch.distributed.utils import ( + _p_assert, + _apply_to_tensors +) + +import mindspeed.core.distributed.layerzero.zero3._traversal_utils as traversal_utils +from mindspeed.core.distributed.layerzero.zero3._common_utils import ( + _assert_in_training_states, + _get_module_zero3_state, + _no_dispatch_record_stream, + clean_tensor_name, + _ZeRO3State, + TrainingState, +) +from mindspeed.core.distributed.layerzero.zero3.flat_param import ( + FlatParameter, + FlatParamHandle, +) + + +def print0(msg): + if dist.get_rank() == 0: + print(msg) + + +def _get_zero3_root_states_with_modules( + module: nn.Module, +) -> Tuple[List[_ZeRO3State], List[nn.Module]]: + """ + Returns a tuple containing: + 1. A list of the root ``_FSDPState`` instances in the module tree rooted at + ``module`` without any duplicates and following the ``module.modules()`` + traversal order (which is assumed to be depth-first). + 2. A corresponding list of the root modules owning the states in the first + list. + + This is similar to :func:`_get_zero3_states_with_modules` except that we + must call :func:`_is_fsdp_root` to force a lazy initialization to determine + the FSDP root in case lazy initialization has not yet happened. + """ + zero3_root_states: List[_ZeRO3State] = [] + zero3_root_modules: List[nn.Module] = [] + visited_zero3_states: Set[_ZeRO3State] = set() + # NOTE: This function assumes that `module.modules()` proceeds top-down. + for submodule in module.modules(): + optional_state = _get_module_zero3_state(submodule) + if ( + optional_state is not None + and optional_state not in visited_zero3_states + and _is_zero3_root(optional_state, submodule) + ): + visited_zero3_states.add(optional_state) + zero3_root_states.append(optional_state) + zero3_root_modules.append(submodule) + return zero3_root_states, zero3_root_modules + + +def _get_zero3_root_states(module: nn.Module) -> List[_ZeRO3State]: + """See :func:`_get_zero3_root_states_with_modules`.""" + zero3_root_states, _ = _get_zero3_root_states_with_modules(module) + return zero3_root_states + + +def _is_zero3_root(state: _ZeRO3State, module: nn.Module) -> bool: + """ + Returns if ``state`` corresponds to that of an zero3 root. + + For the wrapper code path, ``state`` and ``module`` should be the same. For + the non-wrapper code path, ``state`` should be ``module`` 's state. + """ + if state._is_root is None: + raise ValueError(f"state is not initialized") + return state._is_root + + +def _div_if_needed(tensor: torch.Tensor, div_factor: float) -> None: + if div_factor > 1: + tensor.div_(div_factor) + + +def _wait_for_computation_stream( + computation_stream: torch.Stream, + unshard_stream: torch.Stream, + pre_unshard_stream: torch.Stream, +): + """ + Has the unshard and pre-unshard streams wait for the computation stream. + For example, this should be called in the zero3 root's pre-forward to + respect optimizer step computation. + """ + unshard_stream.wait_stream( + computation_stream) # type: ignore[attr-defined] + # Having the pre-all-gather stream wait for the current stream even if we + # do not leverage the pre-all-gather stream is tolerable since this only + # runs once per iteration + # type: ignore[attr-defined] + pre_unshard_stream.wait_stream(computation_stream) + + +@no_type_check +def _get_buffers_and_dtypes_for_computation( + state: _ZeRO3State, + root_module: nn.Module, +) -> Tuple[List[torch.Tensor], List[Optional[torch.dtype]]]: + """ + Returns all buffers in the module tree rooted at ``root_module`` and a + corresponding list of the buffer dtypes for computation. Each buffer dtype + is either ``None`` if buffer mixed precision is not enabled or the buffer + low precision dtype otherwise. + """ + _p_assert(state._is_root, "Expects the root to cast buffers") + buffers: List[torch.Tensor] = [] + buffer_dtypes: List[Optional[torch.dtype]] = [] + visited_buffers: Set[torch.Tensor] = set() + # Traverse the FSDP states bottom-up so that we prefer the owning FSDP + # instance's mixed precision setting for each buffer + zero3_states, zero3_modules = traversal_utils._get_zero3_states_with_modules( + root_module + ) + for zero3_state, zero3_module in zip(reversed(zero3_states), reversed(zero3_modules)): + for buffer_name, buffer in zero3_module.named_buffers(): + if buffer in visited_buffers: + continue + visited_buffers.add(buffer) + if clean_tensor_name(buffer_name) in zero3_state._ignored_buffer_names: + continue + buffers.append(buffer) + buffer_dtypes.append(zero3_state.mixed_precision.buffer_dtype) + _p_assert(len(buffers) == len(buffer_dtypes), f"{len(buffers)} {len(buffer_dtypes)}") + return buffers, buffer_dtypes + + +def _cast_buffers_to_dtype_and_device( + buffers: List[torch.Tensor], + buffer_dtypes: List[Optional[torch.dtype]], + device: torch.device, +) -> None: + """ + Casts ``buffers`` to the dtypes given by ``buffer_dtypes`` and moves them + to ``device``. If an element in ``buffer_dtypes`` is ``None``, then the + corresponding buffer is only moved to ``device``. + """ + _p_assert( + buffer_dtypes is None or len(buffers) == len(buffer_dtypes), + f"Expects `buffers` and `buffer_dtypes` to have the same length if " + f"`buffer_dtypes` is specified but got {len(buffers)} and " + f"{len(buffer_dtypes)}", + ) + for buffer, buffer_dtype in zip(buffers, buffer_dtypes): + if not torch.is_floating_point(buffer) or buffer_dtype is None: + buffer.data = buffer.to(device=device) + else: + buffer.data = buffer.to(device=device, dtype=buffer_dtype) + + +#!===================== grad================================================== +def _reset_flat_param_grad_info_if_needed( + handles: List[FlatParamHandle], +): + """ + Clears the original parameters' gradients if needed. This method's CPU + overhead is minimal, so we may call it throughout ZeRO3 methods, which serve + as callsites to free the gradient memory earlier. + """ + if not isinstance(handles, list): + handles = [handles] + for handle in handles: + handle._reset_flat_param_grad_info_if_needed() + + +def _cast_forward_outputs( + dtype: Optional[torch.dtype], + output +) -> Tuple[Any, Any]: + """ + Cast floating point tensors in ``args`` and ``kwargs`` to ``input_dtype``. + + This respects the existing ``requires_grad`` on the tensors. + """ + if dtype is None: + return output + + def cast_fn(x: torch.Tensor) -> torch.Tensor: + if not torch.is_floating_point(x) or x.dtype == dtype: + return x + return x.to(dtype) + + return _apply_to_tensors(cast_fn, output) diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/hook.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/hook.py new file mode 100644 index 000000000..6d710ce4f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/runtime/hook.py @@ -0,0 +1,78 @@ +import abc +import threading +from typing import ( + Callable, + Literal, + Optional, + Sequence, + Tuple, + Union, +) + +import torch +from torch.utils.hooks import RemovableHandle +from torch.autograd.graph import Node + + +class _MultiHandle(RemovableHandle): + handles: Tuple[RemovableHandle, ...] + + def __init__(self, handles: Tuple[RemovableHandle, ...]) -> None: + self.handles = handles + + def remove(self) -> None: + for handle in self.handles: + handle.remove() + + def __getstate__(self) -> Tuple[RemovableHandle, ...]: + return self.handles + + def __setstate__(self, state: Tuple[RemovableHandle, ...]) -> None: + self.handles = state + + +def _get_grad_fn_or_grad_acc(t: Union[torch.Tensor, None]) -> Node: + + if not (isinstance(t, torch.Tensor) and t.requires_grad): + raise ValueError( + f"Expects torch.Tensor with requires_grad=True, got {type(t)}") + if t.requires_grad and t.grad_fn is not None: + node = t.grad_fn + else: + with torch.enable_grad(): + node = t.grad_fn.next_functions[0][0] + if node is None: + raise AssertionError( + f"No graph.Node object returned from tensor.grad_fn") + return node + + +def register_multi_post_grad_hook( + tensors: Sequence[torch.Tensor], + fn: Union[ + Callable[[Sequence[Optional[torch.Tensor]]], None], + Callable[[torch.Tensor], None], + ], +) -> RemovableHandle: + """Note: + 1. This hook is only called once, so it needs to be re-registered. + 2. This hook is called only when all grad_fn or acc node is triggered + """ + lock = threading.Lock() + nb_calls = 0 + grad_fns = list(map(_get_grad_fn_or_grad_acc, tensors)) + len_tensors = len(tensors) + + def get_inner_hook() -> Callable[[torch.Tensor], None]: + def inner_hook(*grad: torch.Tensor) -> None: + nonlocal len_tensors, nb_calls, fn + with lock: + nb_calls += 1 + if len_tensors == nb_calls: + fn() + return inner_hook + + handles = tuple( + t.register_hook(get_inner_hook()) for t in grad_fns + ) + return _MultiHandle(handles) diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/__init__.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/fqn.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/fqn.py new file mode 100644 index 000000000..090d9b9c3 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/fqn.py @@ -0,0 +1,28 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +from dataclasses import dataclass +import torch + +ITERATION_KEY = "iteration" +ARGS_KEY = "args" +LOCAL_NAME_TO_FQN_KEY = "shard_state_dict" +D3PARALLEL_KEY = "" +MODEL_SD_KEY = "model" + + +@dataclass +class ShardFlattenInfo: + ''' + This class is unsed for saving flatten shard parameter global info + and helps to convert full param into shard param + + [offset, offset+numel] + ''' + in_shard: bool + numel: int + intra_param_start_idx: int + intra_param_end_idx: int # inclusive + shape: torch.Size + tensor_model_parallel: bool + partition_dim : int + partition_stride : int \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/mga_checkpoint.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/mga_checkpoint.py new file mode 100644 index 000000000..200937c74 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/mga_checkpoint.py @@ -0,0 +1,293 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +import os +import random +import warnings +import sys +import numpy as np + +import torch +import torch.distributed as dist +from megatron.training.checkpointing import get_rng_state +from megatron.training.global_vars import get_args +from megatron.training.utils import print_rank_0 +from megatron.core import mpu, tensor_parallel + +from .state_dict import shard_state_dict, clean_ignored_modules, use_zero1_params +from .optim_state import _shard_optim_state_dict + +PARALLE_STATE_KAY = "parallel_state" +MODEL_KEY = "model" +RNG_STATE_KEY = "rng_state" +SHRAD_KEY = "shard_state_dict" +EMA_MODEL_KEY = "ema_model" +OPTIM_STATE_KEY = "optimizer" +OPTIM_INFO_KEY = "optimizer_param_key_to_fqn" +OPTIM_SCHEDULER_KEY = "opt_param_scheduler" +LR_SCHEDULER_KEY = "lr_scheduler" + + +def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floating_point_operations_so_far=None, checkpointing_context=None, + pipeline_rank=None, expert_rank=None, tensor_rank=None, pipeline_parallel=None, expert_parallel=None): + """Save a model checkpoint. + + Checkpointing context is used to persist some checkpointing state + throughout a single job. Must be initialized externally (not used if None). + """ + args = get_args() + if not hasattr(args, "save"): + setattr(args, "save", "ckpt") + print_rank_0('saving checkpoint at iteration {:7d} to {} '.format( + iteration, args.save)) + rng_state = get_rng_state(False) + checkpoint_name = get_checkpoint_name(args.save, iteration, release=False) + + # Collect args, model, RNG. + state_dict = generate_state_dict(args, model, optimizer, opt_param_scheduler, rng_state, + False, iteration) + state_dict[PARALLE_STATE_KAY] = generate_3D_parallel_state() + state_dict['num_floating_point_operations_so_far'] = num_floating_point_operations_so_far + + ensure_directory_exists(checkpoint_name) + print_rank_0(f"Start Saving to {checkpoint_name}!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") + torch.save(state_dict, checkpoint_name) + + if dist.is_initialized(): + dist.barrier() + + +def generate_3D_parallel_state(): + # Ensure the distributed environment is initialized + if not dist.is_initialized(): + raise RuntimeError("Distributed environment is not initialized.") + + # Ensure Megatron's parallel utilities are initialized + if not mpu.is_initialized(): + raise RuntimeError( + "Megatron's parallel utilities are not initialized.") + + # Get global rank + global_rank = dist.get_rank() + # Get tensor parallel rank + tp_rank = mpu.get_tensor_model_parallel_rank() + # Get pipeline parallel rank + pp_rank = mpu.get_pipeline_model_parallel_rank() + # Get data parallel rank + dp_rank = mpu.get_data_parallel_rank() + # Get tensor parallel degree + tp_degree = mpu.get_tensor_model_parallel_world_size() + # Get pipeline parallel degree + pp_degree = mpu.get_pipeline_model_parallel_world_size() + # Get data parallel degree + dp_degree = mpu.get_data_parallel_world_size() + + # Assemble the dictionary + parallel_state = { + 'tp_rank': tp_rank, + 'pp_rank': pp_rank, + 'dp_rank': dp_rank, + 'tp_degree': tp_degree, + 'pp_degree': pp_degree, + 'dp_degree': dp_degree, + 'global_rank': global_rank + } + + return parallel_state + + +def generate_state_dict(args, model, optimizer, opt_param_scheduler, + rng_state, use_dist_ckpt=False, iteration=None, + optim_sd_kwargs=None): + # Arguments, iteration, and model. + state_dict = {} + state_dict['args'] = args + state_dict['checkpoint_version'] = 3.0 + if iteration is not None: + state_dict['iteration'] = iteration + + if not len(model) == 1: + raise ValueError(f"Only single model is supported, VPP not supported") + use_zero1_params(model[0]) + state_dict[MODEL_KEY] = clean_ignored_modules( + model[0], model[0].state_dict()) + state_dict[SHRAD_KEY] = shard_state_dict(model[0], state_dict[MODEL_KEY]) + + # Optimizer stuff. + if not args.no_save_optim: + if optimizer is not None: + state_dict[OPTIM_STATE_KEY] = optimizer.state_dict() + state_dict[OPTIM_INFO_KEY] = _shard_optim_state_dict( + model[0], optimizer.optimizer, state_dict[OPTIM_STATE_KEY]) + if getattr(args, "optimizer_selection", None) == 'fused_ema_adamw': + try: + ema_optimizer_applier(optimizer) + state_dict[EMA_MODEL_KEY] = clean_ignored_modules( + model[0], model[0].state_dict()) + state_dict = ema_state_dict_to_cpu( + state_dict, EMA_MODEL_KEY) + ema_optimizer_restore(optimizer) + print_rank_0("Ema model successful saved in state_dict") + except KeyError: + warnings.warn( + f"ema_optimizer_applier failed with KeyError, ema_model not saved") + if opt_param_scheduler is not None: + state_dict[OPTIM_SCHEDULER_KEY] = \ + opt_param_scheduler.state_dict() + # RNG states. + if not args.no_save_rng: + state_dict[RNG_STATE_KEY] = rng_state + return state_dict + + +def get_checkpoint_name(checkpoints_path, iteration, release=False): + """Determine the directory name for this rank's checkpoint.""" + if checkpoints_path is None: + raise ValueError("checkpoints_path cannot be None") + if release: + directory = 'release' + else: + directory = 'iter_{:07d}'.format(iteration) + common_path = os.path.join(checkpoints_path, directory) + global_rank = dist.get_rank() + return os.path.join(common_path, f"model_{global_rank}.pt") + + +def ensure_directory_exists(filename, check_parent=True): + """Build filename's path if it does not already exists.""" + if filename is None: + raise AssertionError(f"Got {filename} filename") + dirname = os.path.dirname(filename) if check_parent else filename + os.makedirs(dirname, exist_ok=True) + + +def load_layerzero_checkpoint(models, ckpt_dir, optimizer=None, opt_param_scheduler=None): + if ckpt_dir is None: + raise AssertionError(f"Got {ckpt_dir} filename") + if len(models) != 1: + raise ValueError(f"VPP is not supported by layerzero currently") + rank = dist.get_rank() + sd_file = os.path.join(ckpt_dir, f"model_{rank}.pt") + if not os.path.exists(sd_file): + raise FileNotFoundError( + f"No checkpoint found in load directory or pretrained directory: no such file {sd_file}") + args = get_args() + state_dict = torch.load(sd_file) + for i in range(len(models)): + models[i].load_state_dict(state_dict[MODEL_KEY], strict=False) + if not args.finetune and not args.no_load_optim: + try: + # Load state dict. + if optimizer is not None: + optimizer.load_state_dict(state_dict[OPTIM_STATE_KEY]) + if opt_param_scheduler is not None: + if LR_SCHEDULER_KEY in state_dict: # backward compatbility + opt_param_scheduler.load_state_dict( + state_dict[LR_SCHEDULER_KEY]) + else: + opt_param_scheduler.load_state_dict( + state_dict[OPTIM_SCHEDULER_KEY]) + except KeyError as e: + raise RuntimeError('Unable to load optimizer from checkpoint {}. ' + 'Specify --no-load-optim or --finetune to prevent ' + 'attempting to load the optimizer state, ' + 'exiting ...'.format(ckpt_dir)) from e + args.num_floating_point_operations_so_far = state_dict.get( + 'num_floating_point_operations_so_far', 0) + if args.finetune: + iteration = 0 + else: + try: + iteration = state_dict['iteration'] + except KeyError: + iteration = 0 + args.iteration = iteration + + # Check arguments. + update_consumed_samples(args, state_dict) + # rng states. + resume_rng_states(args, state_dict) + + # Some utilities want to load a checkpoint without distributed being initialized + if torch.distributed.is_initialized(): + torch.distributed.barrier() + + print_rank_0(f' successfully loaded checkpoint from {ckpt_dir} ' + f'[ t {mpu.get_tensor_model_parallel_rank()}, ' + f'p {mpu.get_pipeline_model_parallel_rank()} ] ' + f'at iteration {iteration}') + return args.iteration, args.num_floating_point_operations_so_far + + +def update_consumed_samples(args, state_dict): + if 'args' in state_dict and not args.finetune: + checkpoint_args = state_dict['args'] + args.consumed_train_samples = getattr(checkpoint_args, + 'consumed_train_samples', 0) + try: + from megatron.core.num_microbatches_calculator import update_num_microbatches + update_num_microbatches( + consumed_samples=args.consumed_train_samples) + except ImportError: + pass + args.consumed_valid_samples = getattr(checkpoint_args, + 'consumed_valid_samples', 0) + else: + print_rank_0('could not find arguments in the checkpoint ...') + + +def resume_rng_states(args, state_dict): + if not args.finetune and not args.no_load_rng: + try: + if RNG_STATE_KEY in state_dict: + # access rng_state for data parallel rank + if args.data_parallel_random_init: + rng_state = state_dict[RNG_STATE_KEY][mpu.get_data_parallel_rank( + )] + else: + rng_state = state_dict[RNG_STATE_KEY][0] + random.setstate(rng_state['random_rng_state']) + np.random.set_state(rng_state['np_rng_state']) + torch.set_rng_state(rng_state['torch_rng_state']) + torch.cuda.set_rng_state(rng_state['cuda_rng_state']) + # Check for empty states array + if not rng_state['rng_tracker_states']: + raise KeyError + tensor_parallel.get_cuda_rng_tracker().set_states( + rng_state['rng_tracker_states']) + else: # backward compatability + random.setstate(state_dict['random_rng_state']) + np.random.set_state(state_dict['np_rng_state']) + torch.set_rng_state(state_dict['torch_rng_state']) + torch.cuda.set_rng_state(state_dict['cuda_rng_state']) + # Check for empty states array + if not state_dict['rng_tracker_states']: + raise KeyError + tensor_parallel.get_cuda_rng_tracker().set_states( + state_dict['rng_tracker_states']) + except KeyError as e: + raise RuntimeError('Unable to load rng state from checkpoint ' + 'Specify --no-load-rng or --finetune to prevent ' + 'attempting to load the rng state, ' + 'exiting ...') from e + + +def ema_optimizer_applier(optimizer): + if hasattr(optimizer, "optimizer"): + optimizer.optimizer.store(optimizer.optimizer.param_groups) + optimizer.optimizer.copy_to() + return + + +def ema_optimizer_restore(optimizer): + if hasattr(optimizer, "optimizer"): + optimizer.optimizer.restore(optimizer.optimizer.param_groups) + return + + +def ema_state_dict_to_cpu(state_dict, ema_key): + for k, v in state_dict[ema_key].items(): + if not torch.is_tensor(v): + continue + new_v = v.detach().cpu().clone() + state_dict[ema_key][k] = new_v + return state_dict diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/optim_state.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/optim_state.py new file mode 100644 index 000000000..859551d44 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/optim_state.py @@ -0,0 +1,154 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +import warnings +from typing import Dict, List, Optional, Iterable, Union, Any + +import torch +import torch.nn as nn + +from ..zero3._common_utils import ( + clean_tensor_name, + _named_parameters_with_duplicates +) + + +@torch.no_grad() +def _shard_optim_state_dict( + model: nn.Module, + optim: torch.optim.Optimizer, + optim_state_dict: Dict[str, Any], +) -> Dict[str, Any]: + """ + + Args: + model (nn.Module): Root module (which may or may not be a + :class:`FullyShardedDataParallel` instance) whose parameters + were passed into the optimizer ``optim``. + optim (torch.optim.Optimizer): Optimizer for ``model`` 's + parameters. + rank0_only (bool): If ``True``, saves the populated :class:`dict` + only on rank 0; if ``False``, saves it on all ranks. (Default: + ``True``) + shard_state (bool): If ``True``, shard and distribute all + non-zero-dimension states. + + Returns: + Dict[str, Any]: A :class:`dict` containing the optimizer state that is sharded: FQN - > state_dict. + """ + param_to_fqns = _get_param_to_fqns(model) + is_named_optimizer = _is_named_optimizer(optim_state_dict) + + param_key_to_param = _get_param_key_to_param( + optim, model, is_named_optimizer, param_to_fqns + ) + param_key_to_fqns, missing_keys = _get_param_key_to_fqns( + param_to_fqns, param_key_to_param) + if missing_keys: + warnings.warn( + f"Missing keys that do not have FQN mappings {missing_keys}") + return param_key_to_fqns + + +def _get_param_key_to_fqns(param_to_fqns, param_key_to_param): + param_key_to_fqns = {} + missing_keys = set() + for param_key, param in param_key_to_param.items(): + if param in param_to_fqns: + param_key_to_fqns[param_key] = param_to_fqns[param] + else: + missing_keys.add(param_key) + return param_key_to_fqns, missing_keys + + +def _get_param_to_fqns( + model: torch.nn.Module, + dedup_shared_params: bool = True, +) -> Dict[nn.Parameter, List[str]]: + """ + Constructs a mapping from parameter to a list of its \"canonical\" FQNs. Here, + we use canonical to mean the fully-qualified name assigned to the parameter + based on its position in the original nn.Module hierarchy before any wrapper + or parallelism has been applied to it. This is in contrast to FQNs that may be + generated after parallelisms or wrappers have been applied to the model. + + Each normal parameter maps to a singleton list containing its FQN, while each + ``FlatParameter`` maps to a list of its original parameter FQNs, which may + have length greater than one. All FQNs are prefixed starting from ``model``. + """ + param_to_fqns = {} + for param_name, param in _named_parameters_with_duplicates( + model + ): + local_fqns = [param_name] + global_fqns = [ + clean_tensor_name(name) for name in local_fqns + ] # prefixed from the top level `model` (i.e. including `prefix`) + is_shared_param = param in param_to_fqns + if not is_shared_param: + param_to_fqns[param] = global_fqns + elif not dedup_shared_params: + param_to_fqns[param].extend(global_fqns) + + return param_to_fqns + + +def _is_named_optimizer(optim_state_dict: Dict[str, Any]) -> bool: + """ + Returns whether the state_dict is from a NamedOptimizer. + This function checks that the keys in the state_dict['state'] are strings + (which usually are FQNs) versus integers (which usually refer to param_ids + from a vanilla torch.optim.Optimizer). + """ + state = optim_state_dict.get("state", None) + if not state: + # If we cannot find a state, assume it is not NamedOptimizer as + # NamedOptimizer has eager initialization. + return False + try: + key = next(iter(state.keys())) + except Exception as e: + raise Exception(optim_state_dict) from e # noqa: TRY002 + return isinstance(key, str) + + +def _get_param_key_to_param( + optim: torch.optim.Optimizer, + model: Optional[nn.Module] = None, + is_named_optimizer: bool = False, + param_to_fqns: Optional[Dict[nn.Parameter, List[str]]] = None, +) -> Dict[Union[int, str], nn.Parameter]: + """ + Constructs a mapping from parameter keys to parameters. For the regular + optimizers, the keys are parameter IDs. For NamedOptimizer, the keys + are FQNs. This API may be used both for models with ``FlatParameter`` s and + without. + """ + clean_fqn_to_fsdp_fqn: Dict[str, str] = {} + if is_named_optimizer: + if param_to_fqns is None or model is None: + raise AssertionError("The optimizer is a NamedOptimizer, `param_to_fqns` must not be None.") + for key, _ in _named_parameters_with_duplicates(model): + clean_fqn_to_fsdp_fqn[clean_tensor_name(key)] = key + + param_key_to_param: Dict[Union[str, int], nn.Parameter] = {} + pid = 0 + for param_group in optim.param_groups: + if is_named_optimizer: + for param in param_group["params"]: + # use_orig_params case + if len(param_to_fqns[param]) != 1: + raise AssertionError("More than one fqn matches this param") + key = param_to_fqns[param][0] + try: + key = clean_fqn_to_fsdp_fqn[key] + except KeyError as e: + raise KeyError( + f"Can't find {key} from {list(clean_fqn_to_fsdp_fqn.keys())}." + ) from e + param_key_to_param[key] = param + else: + for param in param_group["params"]: + param_key_to_param[pid] = param + pid += 1 + + return param_key_to_param diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/scripts/__init__.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/scripts/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/scripts/convert_to_megatron.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/scripts/convert_to_megatron.py new file mode 100644 index 000000000..1f976e25f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/scripts/convert_to_megatron.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +import argparse +import os +from collections import OrderedDict + +import torch +import mindspeed.megatron_adaptor +from mindspeed.core.distributed.layerzero.state.scripts import layerzero_checkpointer +from mindspeed.core.distributed.layerzero.state.scripts.layerzero_checkpointer import LayerzeroCheckpoint +ARGS_KEY = 'args' + +FINAL_LAYER_NORM_KEY = 'final_layernorm' +CHECKPOINT_VERSION_KEY = 'checkpoint_version' +CHECKPOINT_VERSION_VALUE = 3.0 +ITERATION_KEY = 'iteration' + + +def parse_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument('--input_folder', default=None, + type=str, help='Input DeepSpeed Checkpoint folder') + parser.add_argument('--output_folder', default=None, + type=str, help='Output Megatron checkpoint folder') + parser.add_argument('--prefix', default="predictor", + help='Model prefix used in Layerzero') + parser.add_argument('--target_tp', default=1, + type=int, help='Target TP degree') + parser.add_argument('--target_pp', default=1, + type=int, help='Target PP degree') + parser.add_argument('--for_release', action='store_true', + help='Convert for release purpose, reset some (progress) counters.') + parser.add_argument('--ema_model', action='store_true', + help='Convert Ema models') + args = parser.parse_args() + print(f'args = {args}') + return args + + +def _create_checkpoint_paths(base_folder, iteration, tp_degree, pp_degree): + path_list = [] + iter_folder = f'iter_{iteration:07d}' + for i in range(0, tp_degree): + path_list.append([]) + for j in range(0, pp_degree): + rank_folder = f'mp_rank_{i:02d}' if pp_degree == 1 else f'mp_rank_{i:02d}_{j:03d}' + ckpt_path = os.path.join(rank_folder, 'model_optim_rng.pt') + path_list[i].append(os.path.join( + base_folder, iter_folder, ckpt_path)) + + return path_list + + +def _save_checkpoint(file_path, chkpt_sd): + ckpt_dir, _ = os.path.split(file_path) + os.makedirs(ckpt_dir, exist_ok=True) + torch.save(chkpt_sd, file_path) + + +def _create_rank_checkpoint(zero_checkpoint, tp_index, pp_index, tp_degree, pp_degree, for_release=False): + checkpoint_sd = OrderedDict() + checkpoint_sd[layerzero_checkpointer.MODEL_SD_KEY] = zero_checkpoint.create_rank_checkpoint( + tp_index, pp_index, tp_degree, pp_degree) + iteration = zero_checkpoint.get_iteration() + checkpoint_sd[ITERATION_KEY] = iteration + checkpoint_sd[ARGS_KEY] = zero_checkpoint.get_args() + # Adjust specific fields + checkpoint_sd[ARGS_KEY].tensor_model_parallel_size = tp_degree + checkpoint_sd[ARGS_KEY].pipeline_model_parallel_size = pp_degree + if for_release: + checkpoint_sd[ARGS_KEY].consumed_train_samples = 0 + checkpoint_sd[ARGS_KEY].consumed_valid_samples = 0 + checkpoint_sd[CHECKPOINT_VERSION_KEY] = CHECKPOINT_VERSION_VALUE + return checkpoint_sd + + +def _create_latest_file(base_folder, iteration): + file_path = os.path.join(base_folder, 'latest_checkpointed_iteration.txt') + os.makedirs(base_folder, exist_ok=True) + with open(file_path, 'w') as f: + f.write(str(iteration)) + + +def main(): + print(f'Convert Layerzero dist Checkpoint to a SINGLE Megatron Checkpoint') + + args = parse_arguments() + print(f'Converting Layerzero checkpoint in {args.input_folder} to Megatron checkpoint in {args.output_folder}') + if args.ema_model: + from mindspeed.core.distributed.layerzero.state.scripts.layerzero_checkpointer import set_ema_model + set_ema_model() + if args.prefix is not None: + from mindspeed.core.distributed.layerzero.state.scripts.layerzero_checkpointer import remove_model_prefix + remove_model_prefix(args.prefix) + + lz_checkpoint = LayerzeroCheckpoint(args.input_folder) + iteration = lz_checkpoint.get_iteration() + _create_latest_file(args.output_folder, iteration) + checkpoint_paths = _create_checkpoint_paths( + args.output_folder, iteration, args.target_tp, args.target_pp) + for i in range(0, args.target_tp): + for j in range(0, args.target_pp): + sd = _create_rank_checkpoint( + lz_checkpoint, i, j, args.target_tp, args.target_pp, args.for_release) + _save_checkpoint(checkpoint_paths[i][j], sd) + + +if __name__ == "__main__": + main() diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/scripts/layerzero_checkpointer.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/scripts/layerzero_checkpointer.py new file mode 100644 index 000000000..28ecc7910 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/scripts/layerzero_checkpointer.py @@ -0,0 +1,404 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +import os +import re +from typing import Dict, List, Tuple, Any +from dataclasses import dataclass +from collections import OrderedDict, defaultdict + +import torch +from mindspeed.core.distributed.layerzero.zero3._common_utils import ( + clean_tensor_name, +) + +ITERATION_KEY = "iteration" +ARGS_KEY = "args" +LOCAL_NAME_TO_FQN_KEY = "shard_state_dict" +PARALLE_STATE_KAY = "parallel_state" +MODEL_SD_KEY = "model" +PP_LAYER_PATTERN = re.compile(r"(layers\.)(\d+)(\..*)") + +MODEL_FILE_KEY = "model_" +NUM_LAYERS_KEY = "num_layers" +PP_LAYERS_KEY = "layers_per_pp" + +EMA_MODEL_SD_KEY = "ema_model" +MODEL_PREFIX = None + + +def remove_model_prefix(prefix): + print(f"[debug] set model prefix =", prefix) + global MODEL_PREFIX + if prefix: + MODEL_PREFIX = prefix + '.' + + +def clean_prefix(fqn, prefix): + if prefix: + fqn = fqn.replace(prefix, "") + return fqn + + +def set_ema_model(): + global MODEL_SD_KEY + global EMA_MODEL_SD_KEY + MODEL_SD_KEY = EMA_MODEL_SD_KEY + + +class ShardStateDict: + + def __init__(self, filename) -> None: + self.filename = filename + self._init_metadata() + + def _init_metadata(self): + state_dict = torch.load(self.filename, map_location='cpu') + + self.parallel_info = state_dict[PARALLE_STATE_KAY] + self._param_key_to_shard_info = state_dict[LOCAL_NAME_TO_FQN_KEY] + self.model_state_dict = state_dict[MODEL_SD_KEY] + + self.tp_rank = self.parallel_info["tp_rank"] + self.pp_rank = self.parallel_info["pp_rank"] + self.global_rank = self.parallel_info["global_rank"] + self.tp_degree = self.parallel_info["tp_degree"] + self.pp_degree = self.parallel_info["pp_degree"] + self.dp_degree = self.parallel_info["dp_degree"] + + def _get_param_by_param_key(self, param_key) -> torch.Tensor: + param = self.model_state_dict.get(param_key, None) + return param + + def _get_shape_by_param_key(self, key: str) -> torch.Tensor: + shard_info = self._get_shard_info_by_fqn(key) + return shard_info.shape + + def _get_tp_pp_rank(self) -> Tuple[int, int]: + return (self.tp_rank, self.pp_rank) + + def __lt__(self, rhs): + return self.global_rank < rhs.global_rank + + def __len__(self): + return len(self.model_state_dict) + + def _get_shard_info_by_fqn(self, key: str): + shard_info = self._param_key_to_shard_info.get(key, None) + return shard_info + + +class LayerzeroCheckpoint(object): + def __init__(self, ckpt_dir): + self.ckpt_dir = ckpt_dir + self.file_list = self._get_files_by_key(ckpt_dir, MODEL_FILE_KEY) + self.global_state = {} + self._build_global_state() + self.state_dicts = [ShardStateDict(f) for f in self.file_list] + self.pp_degree = self.state_dicts[0].pp_degree + self.tp_degree = self.state_dicts[0].tp_degree + self.layer_state_dicts = [{} for _ in range(self.num_layers)] + self.pre_process_sd = {} + self.post_process_sd = {} + self.other_sd = {} + self._sanity_check() + self.convert_to_full_state_dict() + + def _sanity_check(self): + pass + + def _build_global_state(self): + sd = torch.load(self.file_list[0], map_location=torch.device('cpu')) + self.global_state[ITERATION_KEY] = sd.get(ITERATION_KEY, 0) + self.global_state[ARGS_KEY] = sd.get(ARGS_KEY, None) + args = self.get_args() + self.global_state[NUM_LAYERS_KEY] = args.num_layers + self.global_state[PP_LAYERS_KEY] = args.num_layers // args.pipeline_model_parallel_size + + @property + def pp_layers_per_rank(self): + return self.global_state[PP_LAYERS_KEY] + + @property + def num_layers(self): + return self.global_state[NUM_LAYERS_KEY] + + def get_iteration(self): + if ITERATION_KEY not in self.global_state: + sd = torch.load( + self.mp_rank_files[0], map_location=torch.device('cpu')) + self.global_state[ITERATION_KEY] = sd.get(ITERATION_KEY, 0) + + return self.global_state[ITERATION_KEY] + + def get_args(self): + if ARGS_KEY not in self.global_state: + sd = torch.load( + self.mp_rank_files[0], map_location=torch.device('cpu')) + self.global_state[ARGS_KEY] = sd.get(ARGS_KEY, None) + + return self.global_state[ARGS_KEY] + + def _get_files_by_key(self, ckpt_dir, key): + file_list = [] + for root, dirs, files in os.walk(ckpt_dir): + for file in files: + if file.startswith(key): + file_list.append(os.path.join(root, file)) + return file_list + + def convert_to_full_state_dict(self) -> Dict[str, Any]: + state_dicts: List[ShardStateDict] = self.state_dicts + same_pp_groups = _get_same_pp_ranks(state_dicts) + for pp_rank, pp_groups in same_pp_groups.items(): + self.build_layer_state_dict(pp_rank, pp_groups) + return + + def build_layer_state_dict(self, pp_rank: int, state_dicts: List[ShardStateDict]) -> Dict: + ''' + This function converts dist layerzero state_dict file for each pp model + + Input: sorted state_dict based on global rank and belongs to same pp stage + + output: A single full_state_dict for this pp stage. (TP=1) + ''' + tp_zero_index = get_TP_unshard_idx_same_pp(state_dicts) + non_zero_keys = set() + for key, param in state_dicts[0].model_state_dict.items(): + fqn = clean_tensor_name(key) + shard_info = state_dicts[0]._get_shard_info_by_fqn(fqn) + if shard_info is None: + full_tensor = param + non_zero_keys.add(fqn) + else: + shape = shard_info.shape + tensor_model_parallel = shard_info.tensor_model_parallel + partition_dim = shard_info.partition_dim + + shard_lists = _get_shard_list_by_param_key(state_dicts, key) + if self.tp_degree > 1 and tensor_model_parallel: + full_tensor = zero_tp_to_full_tensor( + shard_lists, tp_zero_index, shape, partition_dim, self.tp_degree) + else: + full_tensor = zero_to_full_tensor(shard_lists, shape) + layer_num = _get_layer_num(fqn) + if layer_num is not None: + global_layer_num = self.local_to_global_layer_num( + layer_num, pp_rank) + self.layer_state_dicts[global_layer_num][key] = full_tensor + else: + if pp_rank == 0: + self.pre_process_sd[fqn] = full_tensor + if pp_rank == self.pp_degree - 1: + self.post_process_sd[fqn] = full_tensor + if not (pp_rank == 0) or (pp_rank == self.pp_degree - 1): + self.other_sd[fqn] = full_tensor + print(f"{non_zero_keys=}") + return + + def local_to_global_layer_num(self, layer_num: int, pp_rank: int): + return layer_num + pp_rank * self.pp_layers_per_rank + + def create_rank_checkpoint(self, tp_index: int, pp_index: int, tp_degree: int, pp_degree: int) -> Dict[str, torch.Tensor]: + ''' + 为指定的 tp_index 和 pp_index 生成对应的状态字典,并根据 tp_degree 对张量进行分片。 + + Args: + tp_index (int): 目标 TP 阶段的索引。 + pp_index (int): 目标 PP 阶段的索引。 + tp_degree (int): TP 的总阶段数。 + pp_degree (int): PP 的总阶段数。 + + Returns: + Dict[str, torch.Tensor]: 目标 TP 和 PP 阶段的状态字典。 + ''' + # 获取目标 PP 阶段的状态字典 + state_dict = self.get_layer_state_dict(pp_index, pp_degree) + # 对状态字典中的张量进行 TP 分片 + rank_state_dict = {} + for fqn, tensor in state_dict.items(): + shard_info = self.state_dicts[0]._get_shard_info_by_fqn(fqn) + + if MODEL_PREFIX: + fqn = clean_prefix(fqn, MODEL_PREFIX) + + if shard_info is not None and shard_info.tensor_model_parallel: + # 如果张量是 TP 分片的,则根据 tp_index 和 tp_degree 进行分片 + partition_dim = shard_info.partition_dim + stride = shard_info.partition_stride + rank_state_dict[fqn] = shard_tensor( + tensor, tp_degree, tp_index, partition_dim, stride) + else: + # 如果张量不是 TP 分片的,则直接使用原张量 + rank_state_dict[fqn] = tensor + return rank_state_dict + + def get_layer_state_dict(self, pp_index: int, pp_degree: int) -> Dict[str, torch.Tensor]: + ''' + 获取指定 pp_index 的状态字典,包括预处理、后处理以及该 pp_index 对应的层状态字典。 + + Args: + pp_index (int): 目标 PP 阶段的索引。 + pp_degree (int): PP 的总阶段数。 + + Returns: + Dict[str, torch.Tensor]: 目标 PP 阶段的状态字典。 + ''' + state_dict = {} + + # 添加预处理部分(仅在 pp_index == 0 时) + if pp_index == 0: + state_dict.update(self.pre_process_sd) + + # 添加后处理部分(仅在 pp_index == pp_degree - 1 时) + if pp_index == pp_degree - 1: + state_dict.update(self.post_process_sd) + state_dict.update(self.other_sd) + pp_layers_per_rank = self.pp_layers_per_rank + # 添加该 PP 阶段对应的层状态字典 + start_layer = pp_index * pp_layers_per_rank + end_layer = start_layer + pp_layers_per_rank + + for layer_idx, layer_state_dict in enumerate(self.layer_state_dicts[start_layer:end_layer]): + layer_state_dict = _rename_layer_sd_key( + layer_state_dict, layer_idx) + state_dict.update(layer_state_dict) + + return state_dict + + +def _get_layer_num(key: str) -> int: + match = PP_LAYER_PATTERN.match(key) + + if match: + # 提取前缀、层号和后缀 + prefix, layer_num, suffix = match.groups() + # 构建新的键 + return int(layer_num) + else: + return None + + +def _rename_layer_sd_key(layer_state_dict: Dict, layer_idx: int): + state_dict = {} + for key, value in layer_state_dict.items(): + state_dict[_rename_layer_key(key, layer_idx)] = value + return state_dict + + +def _rename_layer_key(old_key: str, idx: int) -> str: + """Generate new key based for pp stage, old_key -> new_key + + Args: + old_key (str): layers.{i}.name + idx (int): num_layers_idx new + + Returns: + str: layers.{idx}.name + """ + match = PP_LAYER_PATTERN.match(old_key) + + if match: + # 提取前缀、层号和后缀 + prefix, layer_num, suffix = match.groups() + # 构建新的键 + new_key = f"{prefix}{idx}{suffix}" + return new_key + else: + return old_key + + +def _get_shard_list_by_param_key(state_dicts, key): + ''' + Return the sharded paramter that belongs to same param key!!! + + Be aware of TP condition, the parameter is shard by TP then by ZeRO3 + ''' + if not state_dicts: + return [] + resutls = [sd._get_param_by_param_key(key) for sd in state_dicts] + return resutls + + +def set_tensor_model_parallel_attributes(tensor, is_parallel, dim, stride): + setattr(tensor, 'tensor_model_parallel', is_parallel) + setattr(tensor, 'partition_dim', dim) + setattr(tensor, 'partition_stride', stride) + + +def shard_tensor(full_tensor: torch.tensor, + tp_degree: int, + tp_rank: int, + partition_dim: int, stride: int = 1 + ) -> List[torch.tensor]: + shards = torch.chunk(full_tensor, tp_degree, dim=partition_dim) + set_tensor_model_parallel_attributes( + shards[tp_rank], is_parallel=True, dim=partition_dim, stride=stride) + return shards[tp_rank] + + +def zero_to_full_tensor(shards, global_shape): + if not isinstance(global_shape, torch.Size): + raise TypeError(f"Expect Type torch.Size, got {type(global_shape)}") + if not all(len(param.shape) <= 1 for param in shards): + raise AssertionError(f"Expect all zero param to be 1D, Got non Flat param") + return torch.cat(shards).reshape(global_shape) + + +def tp_full_shape(shape: torch.Size, partition_dim: int, tp_degree: int): + if len(shape) <= partition_dim: + raise AssertionError(f"{partition_dim} greater or equal to shape len {len(shape)}") + shape_list = list(shape) + # 修改指定维度的大小 + shape_list[partition_dim] *= tp_degree + return torch.Size(shape_list) + + +def zero_tp_to_full_tensor(shards: List[torch.tensor], + tp_zero_index: List[int], + shape: torch.Size, + partition_dim: int, + tp_degree: int): + if tp_degree > 1: + if len(shards) != len(tp_zero_index): + raise AssertionError(f"Not enough zero params for {tp_degree=}") + full_shape = tp_full_shape(shape, partition_dim, tp_degree) + shards = [shards[i] for i in tp_zero_index] + else: + full_shape = shape + return zero_to_full_tensor(shards, full_shape) + + +def _get_same_pp_ranks(shard_dict_list: List[ShardStateDict]) -> Dict[int, List[ShardStateDict]]: + results = defaultdict(list) + for shard_dict in shard_dict_list: + pp_rank = shard_dict.pp_rank + results[pp_rank].append(shard_dict) + + # 对每组进行 sanity check 和排序 + for pp_rank, group in results.items(): + # 检查所有状态字典是否具有相同的模型键 + model_keys = [set(sd.model_state_dict.keys()) for sd in group] + if not all(keys == model_keys[0] for keys in model_keys): + raise ValueError( + f"All state dicts in PP rank {pp_rank} must have the same model keys. " + f"Found mismatched keys: {model_keys}" + ) + # 按全局rank排序排序 + sort_shard_dict_by_global_rank(group) + return results + + +def sort_shard_dict_by_global_rank(shard_list: List[ShardStateDict]) -> None: + shard_list.sort() + + +def get_TP_unshard_idx_same_pp(state_dicts: List[ShardStateDict]) -> List[int]: + pp_ranks = set(sd.pp_rank for sd in state_dicts) + if len(pp_ranks) != 1: + raise AssertionError("Got more than 1 pp rank") + + tp_global_index = [(idx, sd.tp_rank, sd.global_rank) + for idx, sd in enumerate(state_dicts)] + sorted_list = sorted(tp_global_index, key=lambda x: (x[1], x[2])) + sorted_index = [x[0] for x in sorted_list] + return sorted_index diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/state_dict.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/state_dict.py new file mode 100644 index 000000000..3663a32b6 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/state/state_dict.py @@ -0,0 +1,135 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +from collections import OrderedDict +from typing import Dict, List + +import torch +import torch.nn as nn +import torch.distributed as dist +from megatron.training.utils import print_rank_0 + +from .fqn import ShardFlattenInfo +from ..zero3.fsdp import LayerZeRO3 +from ..zero3._common_utils import ( + clean_tensor_name, + _apply_to_modules, +) +from ..zero3._init_utils import _get_ignored_params +from ..runtime._initialize import _lazy_init + +TP_SHARD_ARGS = "tensor_model_parallel" + + +def clean_state_dict(state_dict: Dict): + sd = OrderedDict() + for key, param in state_dict.items(): + fqn = clean_tensor_name(key) + sd[fqn] = param + return sd + + +def use_zero1_params(zero3_model: LayerZeRO3): + if zero3_model._is_root is None: + _lazy_init(zero3_model, zero3_model) + for handle in zero3_model._all_handles: + if handle: + already_resharded = handle.flat_param.data_ptr( + ) == handle.flat_param._zero1_shard.data_ptr() + if already_resharded: + handle._use_sharded_views() + return + else: + with zero3_model._device_handle.stream(zero3_model._default_stream): + event = zero3_model._device_handle.Event() + event.record() + event.wait() + handle.reshard(True) + handle._prefetched = False + handle._use_sharded_views() + return + + +def clean_ignored_modules(zero3_model: LayerZeRO3, state_dict): + if zero3_model._is_root is None: + _lazy_init(zero3_model, zero3_model) + ignored_params = _get_ignored_params( + zero3_model, zero3_model._ignored_modules, zero3_model._ignored_params) + ignored_keys = set() + for key, param in zero3_model.named_parameters(): + if param in ignored_params: + ignored_keys.add(key) + new_state_dict = OrderedDict() + ignored_param_keys = set() + for key, param in state_dict.items(): + if key in ignored_keys: + ignored_param_keys.add(key) + else: + new_state_dict[key] = param + print_rank_0(f"Ignored parameter keys: {ignored_param_keys}") + return new_state_dict + + +def shard_state_dict(zero3_model: LayerZeRO3, state_dict): + '''This function returns a dict of FQN to shard info mappings for later converting to megatron ckpt. + missing keys maybe params that are not managed by Layerzero3, + These params later will directly convert to megatron with no-op + ''' + if zero3_model._is_root is None: + _lazy_init(zero3_model, zero3_model) + if not zero3_model._is_root: + raise ValueError("Expected a root zero3 model") + shard_infos = _get_param_fqns_to_shards(zero3_model) + missing_keys = set() + for key in state_dict.keys(): + fqn = clean_tensor_name(key) + if fqn not in shard_infos: + missing_keys.add(fqn) + print_rank_0(f"Layerzero3 Shard info {missing_keys=}") + return shard_infos + + +def _get_param_fqns_to_shards( + model: torch.nn.Module, +) -> Dict[str, ShardFlattenInfo]: + + def module_fn(module, prefix, tree_level, shard_infos): + if isinstance(module, LayerZeRO3): + handle = module._handle + if handle: + flat_param = handle.flat_param + for param, shard_param_info, fqn, shape in zip( + flat_param._params, + flat_param._shard_param_infos, + flat_param._fqns, + flat_param._shapes + ): + if hasattr(param, TP_SHARD_ARGS): + tensor_model_parallel = param.tensor_model_parallel + partition_dim = param.partition_dim + partition_stride = param.partition_stride + else: + tensor_model_parallel = False + partition_dim = -1, + partition_stride = 1, + global_fqn = prefix + fqn + shard_infos[global_fqn] = ShardFlattenInfo( + shard_param_info.in_shard, + shard_param_info.numel_in_shard, + shard_param_info.intra_param_start_idx, + shard_param_info.intra_param_end_idx, + shape, + tensor_model_parallel, + partition_dim, + partition_stride) + + def return_fn(shard_infos): + return shard_infos + + param_to_unflat_param_names: Dict[torch.nn.Parameter, List[str]] = {} + return _apply_to_modules( + model, + module_fn, + return_fn, + [], + param_to_unflat_param_names, + ) diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/__init__.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/__init__.py new file mode 100644 index 000000000..6ac9b1117 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/__init__.py @@ -0,0 +1,8 @@ +from .flat_param import FlatParameter +from .fsdp import ( + BackwardPrefetch, + LayerZeRO3, + MixedPrecision, +) +from .flat_param import FlatParamHandle, FlatParameter +from ._common_utils import _ZeRO3State \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_common_utils.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_common_utils.py new file mode 100644 index 000000000..cedf37f86 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_common_utils.py @@ -0,0 +1,367 @@ +""" +This file includes private common utilities for FSDP. +""" +import traceback +import warnings +import weakref +from enum import auto, Enum +from typing import ( + Any, + Callable, + cast, + Dict, + Generator, + List, + no_type_check, + Optional, + Set, + Tuple, + TYPE_CHECKING +) + +import torch +import torch.distributed as dist +import torch.nn as nn +from torch.utils.hooks import RemovableHandle +from torch.distributed._composable_state import _get_module_state, _State +from torch.distributed.algorithms._checkpoint.checkpoint_wrapper import ( + _CHECKPOINT_PREFIX, +) +from torch.utils._mode_utils import no_dispatch + +if TYPE_CHECKING: + from mindspeed.core.distributed.layerzero.zero3._exec_order_utils import _ExecOrderData +from mindspeed.core.distributed.layerzero.comm.hookwrap import CriticalPathEventQueue + +ZERO3_WRAPPED_MODULE = "_zero3_wrapped_module" +ZERO3_PREFIX = ZERO3_WRAPPED_MODULE + "." +ZERO3_FLATTENED = "_zero3_flattened" +CRITICAL_EVENT_QUEUE = CriticalPathEventQueue() + + +class _DeviceHandle: + """ + This is a simple abstraction for FSDP computing devices, + which enables custom backends that implement CUDA-like + semantics to be integrated with FSDP. + """ + + def __init__(self, device: torch.device, backend: Any = None): + if backend is None: + try: + self.__backend = getattr(torch, device.type) + self.__device = device + except AttributeError as e: + raise AttributeError( + f"Device '{device}' does not have a corresponding backend registered as 'torch.{device.type}'." + ) from e + else: + self.__backend = backend + + @classmethod + def from_device(cls, device: torch.device) -> "_DeviceHandle": + """ + Return an device handle corresponding to the device, and through this handle, + operations with the same semantics as CUDA can be performed on the device. + Just return torch.cuda if the device is cuda to make attribute-access faster. + Custom backend must first register a module with the same name with {device.type} on torch. + """ + if device.type == "cuda": + return cast(_DeviceHandle, torch.cuda) + return cls(device) + + def __getattr__(self, __name: str) -> Any: + try: + return getattr(self.__backend, __name) + except AttributeError as e: + raise AttributeError( + f"Custom backend '{self.__device.type}' not implement 'torch.{self.__device.type}.{__name}'" + ) from e + + +class _UninitializedDeviceHandle: + def __init__(self): + pass + + def __getattribute__(self, __name: str) -> Any: + raise RuntimeError("Trying to use an uninitialized device handle.") + + +class _ZeRO3State(_State): + + def __init__(self) -> None: + self._debug_level = None + #! zero3 related attributes + self._ignored_modules: Set[nn.Module] = set() + self._ignored_params: Set[nn.Parameter] = set() + # Buffer names are cleaned (without wrapper prefixes) + self._ignored_buffer_names: Set[str] = set() + self.zero3_process_group: Optional[dist.ProcessGroup] = None + #!=========================zero1 pg state=================== + self.zero1_process_group: Optional[dist.ProcessGroup] = None + self.global_rank: int = -1 + self.world_size: int = -1 + #!========================================================== + self.zero3_rank: int = -1 + self.zero3_world_size: int = -1 + self.limit_all_gathers: bool = False + self.training_state = TrainingState.IDLE + self._unshard_params_ctx: Dict[nn.Module, Generator] = {} + self._is_root: Optional[bool] = None + self._handle = None + # : Dict[nn.Module, Optional[flat_param_file.FlatParamHandle]] + self._zero3_module_to_handle = {} + self.compute_device: Optional[torch.device] = None + self._gradient_predivide_factor: int = 0 + # Abstract device handle for fsdp compute device. For now, + # the compute device must implement cuda semantics used by fsdp + self._device_handle: _DeviceHandle = _UninitializedDeviceHandle() + # All following attributes should only be used for root states: + # Save these static lists to avoid the repeated tree traversals + self._all_zero3_states: List[_ZeRO3State] = [] + self._all_handles = [] # : List[flat_param_file.FlatParamHandle] = [] + self.mixed_precision = None + self._offload_grads = False + #!===========================streams================================== + self._unshard_stream = None + self._post_backward_stream = None + self._pre_unshard_stream = None + self._default_stream = None + self._offload_stream = None + self._exec_order_data: "_ExecOrderData" = None + self._free_event_queue = None + self._rs_event_queue = None + self._offload_event_queue = None + #!==========================runtime state ========================= + self.backward_prefetch = None + self.backward_reduce_scatter = None + self.forward_prefetch: bool = None + self._root_pre_forward_handles: List[RemovableHandle] = [] + self._pre_forward_handles: List[RemovableHandle] = [] + self._post_forward_handles: List[RemovableHandle] = [] + self._sync_gradients: bool = False + self._root_needs_param_sync: bool = True + #!==========================hook state=========================== + self._post_backward_callback_queued: bool = False + #!================================================================= + + def wait_critical_path_events(self): + if CRITICAL_EVENT_QUEUE is None or CRITICAL_EVENT_QUEUE.empty(): + return + with torch.profiler.record_function("LayerZeRO3: wait critical path events"): + with CRITICAL_EVENT_QUEUE.block(): + while not CRITICAL_EVENT_QUEUE.empty(): + event = CRITICAL_EVENT_QUEUE.pop_left() + if event is not None: + with torch.profiler.record_function( + "LayerZeRO3.critical_path_events" + ): + event.wait() + + @classmethod + def record_critical_event(cls): + if dist.get_rank() == 0: + print("Record a critical event") + event = torch.cuda.Event() + event.record() + CRITICAL_EVENT_QUEUE.enqueue(event) + + +def _get_module_zero3_state(module: nn.Module) -> Optional[_ZeRO3State]: + state = _get_module_state(module) + if state is None or not isinstance(state, _ZeRO3State): + return None + return state + + +class TrainingState(Enum): + """ + An enum that indicates the state of a ``FullyShardedDataParallel` instance. + """ + + IDLE = auto() + FORWARD_BACKWARD = auto() + SUMMON_FULL_PARAMS = auto() + + +class HandleTrainingState(Enum): + """ + An enum that indicates the state of a ``FlatParamHandle`. + """ + + IDLE = auto() + FORWARD = auto() + BACKWARD_PRE = auto() + BACKWARD_POST = auto() + SUMMON_FULL_PARAMS = auto() + SYNC_PARAMS = auto() + + +def _is_composable(state: _ZeRO3State): + return not isinstance(state, nn.Module) + + +@no_type_check +def _module_handle(state: _ZeRO3State, module: nn.Module): + """ + Returns the ``FlatParamHandle`` s corresponding to ``module``. This is + the handle that contains some parameter in ``module``. + """ + if _is_composable(state): + # A valid FSDP state may have no managed parameters and hence no + # handles, meaning no entry in `_fully_sharded_module_to_handles` + if state._handle is None: + return None + if module not in state._zero3_module_to_handle: + raise AssertionError(f"Expects a fully sharded module but got {module} on rank {state.zero3_rank}") + return state._zero3_module_to_handle[module] + else: + # NOTE: This assumes `module` is a `FullyShardedDataParallel` instance. + return module._handle + + +@no_type_check +def _has_zero3_params(state: _ZeRO3State, module: nn.Module) -> bool: + """Returns if ``module`` has parameters managed by LayerZeRO3.""" + return _module_handle(state, module) is not None + + +def clean_tensor_name(tensor_name: str) -> str: + """ + Cleans the parameter or buffer name by removing any module wrapper + prefixes. + """ + tensor_name = tensor_name.replace(ZERO3_PREFIX, "") + # it couples `CheckpointWrapper` and FSDP and also does not scale for more + # module wrappers. + tensor_name = tensor_name.replace(_CHECKPOINT_PREFIX, "") + return tensor_name + + +def _set_zero3_flattened(tensor: torch.Tensor) -> None: + """ + Sets an attribute on ``tensor`` to mark it as flattened by FSDP. This is to + avoid re-flattening it during nested construction. + """ + setattr(tensor, ZERO3_FLATTENED, True) + + +def _is_zero3_flattened(tensor: torch.Tensor) -> bool: + """Returns if ``tensor`` has been marked as flattened by FSDP.""" + return getattr(tensor, ZERO3_FLATTENED, False) + + +def _named_parameters_with_duplicates( + module: nn.Module, **kwargs: Any +) -> List[Tuple[str, nn.Parameter]]: + """ + This API is required as some modules overwrite `named_parameters()` but do not support + `remove_duplicate`. + """ + kwargs["remove_duplicate"] = False + try: + ret = list(module.named_parameters(**kwargs)) + except AssertionError as e: + kwargs.pop("remove_duplicate") + ret = list(module.named_parameters(**kwargs)) + return ret + + +def _apply_to_modules( + root_module: torch.nn.Module, + module_fn: Callable, + return_fn: Callable, + filter_fqns: Optional[List[str]] = None, + *args, + **kwargs, +): + """ + Performs a pre-order traversal of the modules in the hierarchy rooted at + ``root_module``, applying ``module_fn`` at each module and finally + returning a value using ``return_fn``. The traversal constructs the full + module prefix name (e.g. "module.submodule." just like in model state dict) + and makes that available to ``module_fn``. + + ``filter_fqns`` is used because some module may have its own prefix similar + to ``FullyShardedDataParallel`` and the ``named_parameters()`` is overwritten + to remove the prefix. + """ + + def f(module: torch.nn.Module, prefix: str, tree_level: int, *args, **kwargs): + # Call the module function before recursing over children (pre-order) + module_fn(module, prefix, tree_level, *args, **kwargs) + for submodule_name, submodule in module.named_children(): + if submodule is None: + continue + new_prefix = prefix + submodule_name + "." + new_tree_level = tree_level + 1 + if filter_fqns is not None: + for fqn in filter_fqns: + if fqn.startswith(new_prefix): + break + else: + # DMP's named_parameter() will mess up the traversal with + # ``named_children`` + `named_parameter(recurse=False)``. + # This hack is a must to make the traversal work. + if ( + submodule_name == "_zero3_wrapped_module" + or submodule_name == "_dmp_wrapped_module" + ): + if ( + not torch.distributed._functional_collectives.is_torchdynamo_compiling() + ): + warnings.warn( + "An unexpected prefix is detected. This case " + " should only happen when using DMP with FSDP. " + f"prefix = {prefix}, " + f"submodule_name = {submodule_name}" + ) + new_prefix = prefix + elif submodule_name == "module": + warnings.warn( + "An unexpected prefix is detected. This case " + " should only happen when DDP wraps the outer " + " modules while FSDP wraps the inner ones." + f"prefix = {prefix}, " + f"submodule_name = {submodule_name}" + ) + new_prefix = prefix + f(submodule, new_prefix, new_tree_level, *args, **kwargs) + + f(root_module, "", 0, *args, **kwargs) + return return_fn(*args, **kwargs) + + +@no_type_check +def _assert_in_training_states( + state: _ZeRO3State, + training_states: List[TrainingState], +) -> None: + """Asserts that zero3 is in the states ``_training_states``.""" + # Raise a `ValueError` instead of using `assert` to ensure that these + # logical assertions run even if `assert`s are disabled + if state.training_state not in training_states: + msg = ( + f"expected to be in states {training_states} but current state is " + f"{state.training_state}" + ) + # Print the error on rank 0 in case this is called in the backward pass + if state.zero3_rank == 0: + if isinstance(state, nn.Module): + print(f"Asserting FSDP instance is: {state}") + print(f"ERROR: {msg}") + traceback.print_stack() + raise ValueError(msg) + + +def _no_dispatch_record_stream(tensor: torch.Tensor, stream: torch.Stream) -> None: + if tensor.device.type not in ["cuda", torch._C._get_privateuse1_backend_name(), "npu"]: + return + + # Don't no dispatch under torch compile like this + with no_dispatch(): + tensor.record_stream(stream) + + +def _same_storage_as_data_ptr(x: torch.Tensor, data_ptr: int) -> bool: + return x._typed_storage()._data_ptr() == data_ptr diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_exec_order_utils.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_exec_order_utils.py new file mode 100644 index 000000000..1230e2482 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_exec_order_utils.py @@ -0,0 +1,153 @@ +import logging +from enum import auto, Enum +from typing import Dict, List, Optional, Tuple, Union + +import torch.distributed as dist +import torch.nn as nn +from mindspeed.core.distributed.layerzero.zero3.flat_param import FlatParamHandle +import mindspeed.core.distributed.layerzero.zero3._traversal_utils as traversal_utils + +logger = logging.getLogger(__name__) +logger.setLevel(logging.WARNING) + + +class _ExecOrderWarnStatus(Enum): + """Used internally for execution order validation.""" + + NONE = auto() # no deviation yet + WARNING = auto() # deviated this iteration; currently issuing warnings + WARNED = auto() # deviated in a previous iteration + + +class _ExecOrderData: + + def __init__( + self, + backward_prefetch_limit: int, + forward_prefetch_limit: int, + ) -> None: + # Tracks the (static) pre-forward order for execution order validation + # and forward prefetching + self.handles_pre_forward_order: List[FlatParamHandle] = [] + # Tracks the post-forward order for pre-backward prefetching + self.handles_post_forward_order: List[Optional[FlatParamHandle]] = [] + self._iter = 0 + + # Gives the max number of backward/forward prefetched all-gathers by a + # single module + self._backward_prefetch_limit = backward_prefetch_limit + self._forward_prefetch_limit = forward_prefetch_limit + + self.process_group: Optional[dist.ProcessGroup] = None + self.world_size: Optional[int] = None + self.all_handles: List[FlatParamHandle] = [] + + def init( + self, + state, + root_module: nn.Module, + process_group: dist.ProcessGroup, + ) -> None: + """ + Initializes the data structures needed for checking the forward order. + This should be called after a root FSDP instance has been set during + lazy initialization. + """ + self.process_group = process_group + self.rank = process_group.rank() + self.world_size = process_group.size() + # Fix an order over the handles, which should be the same across ranks + for handle in traversal_utils._get_zero3_handles(root_module): + index = len(self.all_handles) + self.all_handles.append(handle) + handle._handle_index = index + + @property + def is_first_iter(self) -> bool: + return self._iter == 0 + + def get_handle_to_backward_prefetch( + self, + current_handle: FlatParamHandle, + ) -> Optional[FlatParamHandle]: + """ + Returns a :class:`list` of the handles keys of the handles to backward + prefetch given the current handles key. If there are no valid handles + keys to prefetch, then this returns an empty :class:`list`. + """ + current_index = current_handle._post_forward_index + if current_index is None: + return None + target_index = current_index - 1 + target_handle: Optional[FlatParamHandle] = None + for _ in range(self._backward_prefetch_limit): + if target_index < 0: + break + target_handle = self.handles_post_forward_order[target_index] + target_index -= 1 + return target_handle + + def get_handle_to_forward_prefetch( + self, + current_handle: FlatParamHandle, + ) -> Optional[FlatParamHandle]: + """ + Returns a :class:`list` of the handles keys of the handles to forward + prefetch given the current handles key. If there are no valid handles + keys to prefetch, then this returns an empty :class:`list`. + """ + current_index = current_handle._pre_forward_order_index + if current_index is None: + return None + target_index = current_index + 1 + target_handle: Optional[FlatParamHandle] = None + for _ in range(self._forward_prefetch_limit): + if target_index >= len(self.handles_pre_forward_order): + break + target_handle = self.handles_pre_forward_order[target_index] + target_index += 1 + return target_handle + + def get_handle_to_post_backward( + self, + current_handle: FlatParamHandle, + ) -> List[FlatParamHandle]: + current_index = current_handle._pre_forward_order_index + if current_index is None: + return [] + target_index = current_index + 1 + target_handle: List[FlatParamHandle] = [] + for _ in range(len(self.handles_pre_forward_order)): + if target_index >= len(self.handles_pre_forward_order): + break + target_handle.append(self.handles_pre_forward_order[target_index]) + target_index += 1 + return target_handle + + def record_post_forward(self, handle: Optional[FlatParamHandle]) -> None: + if not handle or handle._post_forward_index is not None: + return + index = len(self.handles_post_forward_order) + handle._post_forward_index = index + + self.handles_post_forward_order.append(handle) + + def record_pre_forward( + self, handle: Optional[FlatParamHandle], is_training: bool + ) -> None: + if not handle: + return + # Fix the order after the first iteration and only record the first + # usage of a handles key + if not self.is_first_iter or handle._pre_forward_order_index is not None: + return + index = len(self.handles_pre_forward_order) + handle._pre_forward_order_index = index + self.handles_pre_forward_order.append(handle) + + def next_iter(self): + self._iter += 1 + self.handles_post_forward_order.clear() + + def next_iter_during_accumulation(self): + self._iter += 1 diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_init_utils.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_init_utils.py new file mode 100644 index 000000000..033a3a591 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_init_utils.py @@ -0,0 +1,665 @@ +import collections +import warnings +from typing import ( + Any, + Callable, + Deque, + Dict, + Iterable, + Iterator, + List, + no_type_check, + Optional, + Set, + Tuple, + Union, +) + +import torch +import torch.distributed as dist + +import torch.nn as nn +from torch.utils.hooks import RemovableHandle +from megatron.core import mpu + +from mindspeed.core.distributed.layerzero.zero3._common_utils import ( + _DeviceHandle, + _ZeRO3State, + _get_module_zero3_state, + _is_zero3_flattened, + _named_parameters_with_duplicates, + clean_tensor_name, + TrainingState, +) +from mindspeed.core.distributed.layerzero.zero3.api import ( + BackwardPrefetch, + BackwardReduceScatter, + MixedPrecision, +) +from mindspeed.core.distributed.layerzero.zero3.flat_param import ( + FlatParameter, + FlatParamHandle, +) +from mindspeed.core.distributed.layerzero.zero3._limiter import _FreeEventQueue +import mindspeed.core.distributed.layerzero.zero3._exec_order_utils as exec_order_utils +import mindspeed.core.distributed.layerzero.zero3._traversal_utils as traversal_utils +import mindspeed.core.distributed.layerzero.zero3.fsdp as zero3_file + + +PARAM_BROADCAST_BUCKET_SIZE = int(250 * 1024 * 1024) +ZERO3_SYNCED = "_zero3_synced" +# Overall specification of process group. +ProcessGroupType = Tuple[dist.ProcessGroup, dist.ProcessGroup] + + +def _get_gradient_predivide_factor(world_size: int) -> float: + factor: int = 1 + while world_size % factor == 0 and world_size / factor > factor: + factor *= 2 + return float(factor) + + +@no_type_check +def _init_process_group_state( + state: _ZeRO3State, + process_group: ProcessGroupType, +) -> _ZeRO3State: + + state.zero3_process_group, state.zero1_process_group = process_group + state.zero3_rank = state.zero3_process_group.rank() + data_parallel_world_size = state.zero1_process_group.size() + state.world_size = data_parallel_world_size + state.global_rank = dist.get_rank() + if mpu.is_initialized(): + state._gradient_predivide_factor = float(dist.get_world_size( + mpu.get_data_parallel_group(with_context_parallel=True))) + else: + state._gradient_predivide_factor = data_parallel_world_size + return state + + +@no_type_check +def _init_ignored_module_states( + state: _ZeRO3State, + module: nn.Module, + ignored_modules: Optional[Iterable[torch.nn.Module]], + ignored_states: Union[ + Optional[Iterable[torch.nn.Parameter] + ], Optional[Iterable[torch.nn.Module]] + ] = None, +) -> _ZeRO3State: + if ignored_modules is not None and ignored_states is not None: + raise ValueError( + "Cannot pass both ignored_modules and ignored_states at the " + "same time. Please just pass ignored_states." + ) + ignored_parameters = None + passed_as_ignored_states = ignored_states is not None + if passed_as_ignored_states: + ignored_states_list = list(ignored_states) + _check_ignored_states(ignored_states_list, True) + else: + ignored_states_list = [] + _check_ignored_states( + list(ignored_modules) if ignored_modules is not None else [], False + ) + if len(ignored_states_list) > 0: + if isinstance(ignored_states_list[0], nn.Parameter): + ignored_parameters = ignored_states_list + else: + ignored_modules = ignored_states_list + state._ignored_modules = _get_ignored_modules(module, ignored_modules) + state._ignored_params = _get_ignored_params( + module, + state._ignored_modules, + ignored_parameters, + ) + state._ignored_buffer_names = _get_ignored_buffer_names( + module, + state._ignored_modules, + ) + return state + + +def _check_ignored_states( + ignored_states: List[Any], passed_as_ignored_states: bool +) -> None: + """ + Checks that the ignored states are uniformly parameters or uniformly + modules. We may remove this check in the future if we permit mixing. + """ + if len(ignored_states) == 0: + return + if passed_as_ignored_states: + all_params = all(isinstance(state, nn.Parameter) + for state in ignored_states) + all_modules = all(isinstance(state, nn.Module) + for state in ignored_states) + if not all_params and not all_modules: + # Sort for consistent ordering for unit test regex matching + sorted_types = sorted( + {type(state) for state in ignored_states}, key=lambda x: repr(x) + ) + raise ValueError( + "ignored_states expects all nn.Parameter or all nn.Module list " + f"elements but got types {sorted_types}" + ) + else: + if not all(isinstance(state, nn.Module) for state in ignored_states): + sorted_types = sorted( + {type(state) for state in ignored_states}, key=lambda x: repr(x) + ) + raise ValueError( + "ignored_modules expects nn.Module list elements but got " + f"types {sorted_types}" + ) + + +@no_type_check +def _init_device_handle( + state: _ZeRO3State, + module: nn.Module, + ignored_params: Set[nn.Parameter], + device_id: Optional[Union[int, torch.device]], +) -> _ZeRO3State: + determined_device = None + if device_id is not None: + determined_device = ( + device_id + if isinstance(device_id, torch.device) + else torch.device(device_id) + ) + if determined_device is None: + for param in _get_orig_params(module, ignored_params): + if param.device.type in {"cpu", "meta"}: + continue + if determined_device is None: + determined_device = param.device + else: + if param.device.type != determined_device.type: + raise RuntimeError( + f"FSDP does not support modules with different device types " + f"but got params on {determined_device.type} and {param.device.type}" + ) + determined_device = determined_device or torch.device( + "cuda", torch.cuda.current_device() + ) + + state._device_handle = _DeviceHandle.from_device(determined_device) + return state + + +@no_type_check +def _init_buffer_state( + state: _ZeRO3State, + module: nn.Module, +) -> _ZeRO3State: + state._buffer_names = _get_buffer_names(module) + # Save a mapping from clean fully-qualified buffer name (starting from + # `module`) to its original dtype for restoring that dtype during model + # checkpointing when buffer mixed precision is enabled. The names should + # be clean since the casting happens in a `summon_full_params()` context. + _buffer_name_to_orig_dtype: Dict[str, torch.dtype] = {} + for buffer_name, buffer in module.named_buffers(): + buffer_name = clean_tensor_name(buffer_name) + _buffer_name_to_orig_dtype[buffer_name] = buffer.dtype + state._buffer_name_to_orig_dtype = _buffer_name_to_orig_dtype + return state + + +@no_type_check +def _init_core_state( + state: _ZeRO3State, + mixed_precision: Optional[MixedPrecision], + limit_all_gathers: bool, + backward_prefetch_limit: int, + forward_prefetch_limit: int, + offload_grads: bool = False +) -> _ZeRO3State: + # We clamp the strategy to `NO_SHARD` for world size of 1 since they are + # currently functionally equivalent. This may change if/when we integrate + # FSDP with MoE. + state.mixed_precision = mixed_precision or MixedPrecision() + if mixed_precision is not None: + torch._C._log_api_usage_once( + f"mixed_precision.{str(state.mixed_precision)}" + ) + + state.limit_all_gathers = limit_all_gathers + state.training_state = TrainingState.IDLE + state._is_root = None + state._free_event_queue = _FreeEventQueue() + state._rs_event_queue = _FreeEventQueue() + state._offload_event_queue = _FreeEventQueue() + state._offload_grads = offload_grads + # ========================================== + state._debug_level = dist.get_debug_level() + state._exec_order_data = exec_order_utils._ExecOrderData( + backward_prefetch_limit, + forward_prefetch_limit, + ) + #! add support for zero1 events + # Mapping from fully sharded module to the handles it is responsible to + # unshard and reshard (see [Note: Fully Sharded Module]) + _fully_sharded_module_to_handle: Dict[nn.Module, FlatParamHandle] = dict() + state._zero3_module_to_handle = _fully_sharded_module_to_handle + # Invariant: `state.params` contains exactly the `FlatParameter`s of the + # handles in `state._handle` + _handle: FlatParamHandle = None + state._handle = _handle + params: List[FlatParameter] = [] + state.params = params + return state + + +@no_type_check +def _init_runtime_state( + state: _ZeRO3State, +) -> _ZeRO3State: + _root_pre_forward_handles: List[RemovableHandle] = [] + state._root_pre_forward_handles = _root_pre_forward_handles + _pre_forward_handles: List[RemovableHandle] = [] + state._pre_forward_handles = _pre_forward_handles + _post_forward_handles: List[RemovableHandle] = [] + state._post_forward_handles = _post_forward_handles + state._sync_gradients = True + # Used to prevent running the pre-backward hook multiple times + return state + + +@no_type_check +def _init_prefetching_state( + state: _ZeRO3State, + backward_prefetch: BackwardPrefetch, + forward_prefetch: bool, + backward_reduce_scatter: BackwardReduceScatter +) -> _ZeRO3State: + state.backward_prefetch = backward_prefetch + state.forward_prefetch = forward_prefetch + state.backward_reduce_scatter = backward_reduce_scatter + # The data structures use tuples of handles to generalize over the case + # where a module's forward involves multiple handles. + return state + + +@no_type_check +def _init_param_handle_from_module( + state: _ZeRO3State, + zero3_module: nn.Module, + device_id: Optional[Union[int, torch.device]], + param_init_fn: Optional[Callable[[nn.Module], None]], +) -> _ZeRO3State: + """ + Initializes a ``FlatParamHandle`` from a module ``fully_sharded_module``. + """ + _check_single_device_module(zero3_module, state._ignored_params, device_id) + device_from_device_id = _get_device_from_device_id( + device_id, state.global_rank) + _move_module_to_device( + zero3_module, state._ignored_params, device_from_device_id + ) + state.compute_device = _get_compute_device( + zero3_module, + state._ignored_params, + device_from_device_id, + state.global_rank, + ) + + managed_params = list(_get_orig_params( + zero3_module, state._ignored_params)) + for param in managed_params: + if len(param.shape) == 1: + param._is_1D_param = True + _init_param_handle_from_params( + state, managed_params, zero3_module) + return state + + +@no_type_check +def _init_param_handle_from_params( + state: _ZeRO3State, + params: List[nn.Parameter], + zero3_module: nn.Module, +): + if len(params) == 0: + return + handle = FlatParamHandle( + params, + zero3_module, + state.compute_device, + state.mixed_precision.param_dtype, + state.mixed_precision.reduce_dtype, + state.zero3_process_group, + state.zero1_process_group, + state._offload_grads + ) + handle.shard() + if state._handle is not None: + raise ValueError(f"state handle has been initialized") + state.params.append(handle.flat_param) + state._handle = handle + state._zero3_module_to_handle[handle._zero3_module] = handle + + +def _get_ignored_modules( + root_module: nn.Module, + _ignored_modules: Optional[Iterable[torch.nn.Module]], +) -> Set[nn.Module]: + """ + Checks that ``_ignored_modules`` is an iterable of ``nn.Module`` s without + any FSDP instances, and returns the modules contained in their module + subtrees as a :class:`set`. Nested FSDP instances are excluded, but their + already-computed ignored modules are included. + + ``_ignored_modules`` represents the argument passed by the user to FSDP. + """ + msg_prefix = "`ignored_modules` should be an iterable of `torch.nn.Module`s " + try: + ignored_root_modules = ( + set(_ignored_modules) if _ignored_modules is not None else set() + ) + except TypeError as e: + raise TypeError( + msg_prefix + f"but got {type(_ignored_modules)}") from e + for module in ignored_root_modules: + if not isinstance(module, torch.nn.Module): + raise TypeError( + msg_prefix + f"but got an iterable with {type(module)}") + if _get_module_zero3_state(module): + raise ValueError( + "`ignored_modules` should not include FSDP modules") + # Treat modules that cannot compose with `fully_shard` as ignored modules, + # meaning that their subtrees are ignored + for module in root_module.modules(): + if not traversal_utils._composable(module): + ignored_root_modules.add(module) + # NOTE: Even if `ignored_root_modules` is empty, do not return early so + # that this FSDP instance can get any ignored modules from its children. + + # Include child modules and exclude nested FSDP modules themselves + ignored_modules = { + child + for module in ignored_root_modules + for child in module.modules() + if not isinstance(child, zero3_file.LayerZeRO3) + } + if root_module in ignored_modules: + warnings.warn( + "Trying to ignore the top-level module passed into the FSDP " + "constructor itself will result in all parameters being " + f"ignored and is not well-supported: {module}" + ) + # Include nested FSDP modules' ignored modules + for submodule in root_module.modules(): + optional_fsdp_state = _get_module_zero3_state(submodule) + if optional_fsdp_state is not None: + if not hasattr(optional_fsdp_state, "_ignored_modules"): + raise AttributeError( + "State has not attribute _ignored_modules") + ignored_modules.update(optional_fsdp_state._ignored_modules) + return ignored_modules + + +def _get_ignored_params( + root_module: torch.nn.Module, + ignored_modules: Set[torch.nn.Module], + ignored_parameters: Optional[Iterable[torch.nn.Parameter]] = None, +) -> Set[torch.nn.Parameter]: + """ + Returns the parameters of the modules in ``ignored_modules`` and + the parameters in ``ignored_parameters``, excluding any :class:`FlatParameter` s. + """ + all_ignored_params: Set[torch.nn.Parameter] = set() + + params_in_ignored_modules = { + p for m in ignored_modules for p in m.parameters() if not _is_zero3_flattened(p) + } + + all_ignored_params.update(params_in_ignored_modules) + + if ignored_parameters is not None: + params_in_ignored_parameters = { + p for p in ignored_parameters if not _is_zero3_flattened(p) + } + all_ignored_params.update(params_in_ignored_parameters) + + # Always include nested FSDP modules' ignored parameters + for submodule in root_module.modules(): + optional_fsdp_state = _get_module_zero3_state(submodule) + if optional_fsdp_state is not None: + if not hasattr(optional_fsdp_state, "_ignored_params"): + raise AttributeError("State has not attribute _ignored_params") + all_ignored_params.update(optional_fsdp_state._ignored_params) + + return all_ignored_params + + +def _get_ignored_buffer_names( + root_module: torch.nn.Module, + ignored_modules: Set[torch.nn.Module], +) -> Set[str]: + """ + Returns the cleaned buffer FQNs in ``ignored_modules`` + """ + all_ignored_buffer_names: Set[str] = set() + + buffers_in_ignored_modules = { + buffer for m in ignored_modules for buffer in m.buffers() + } + + all_ignored_buffer_names.update( + { + clean_tensor_name(buffer_name) + for buffer_name, buffer in root_module.named_buffers() + if buffer in buffers_in_ignored_modules + } + ) + + # Always include nested FSDP modules' ignored buffer names + for submodule in root_module.modules(): + optional_fsdp_state = _get_module_zero3_state(submodule) + if optional_fsdp_state is not None: + if not hasattr(optional_fsdp_state, "_ignored_buffer_names"): + raise AttributeError( + "State has not attribute _ignored_buffer_names") + all_ignored_buffer_names.update( + optional_fsdp_state._ignored_buffer_names) + + return all_ignored_buffer_names + + +def _get_buffer_names(root_module: nn.Module) -> Set[str]: + """ + Returns the fully prefixed names of all buffers in the module hierarchy + rooted at ``root_module`` as a class:`set`. + """ + return { + clean_tensor_name(buffer_name) for buffer_name, _ in root_module.named_buffers() + } + + +def _check_single_device_module( + module: nn.Module, + ignored_params: Set[nn.Parameter], + device_id: Optional[Union[int, torch.device]], +) -> None: + """ + Raises an error if ``module`` has original parameters on multiple devices, + ignoring the parameters in ``ignored_params``. Thus, after this method, the + module must be either fully on the CPU or fully on a non-CPU device. + """ + devices = {param.device for param in _get_orig_params( + module, ignored_params)} + + if len(devices) == 2 and torch.device("cpu") in devices: + if device_id is None: + raise RuntimeError( + "To support a module with both CPU and GPU params, " + "please pass in device_id argument." + ) + elif len(devices) > 1: + raise RuntimeError( + f"ZeRO3 only supports single device modules but got params on {devices}" + ) + + +def _get_device_from_device_id( + device_id: Optional[Union[int, torch.device]], + rank: int, +) -> Optional[torch.device]: + """ + Processes ``device_id`` and returns either the corresponding device or + ``None`` if ``device_id`` is ``None``. + """ + if device_id is None: + return None + device = ( + device_id if isinstance( + device_id, torch.device) else torch.device(device_id) + ) + return device + + +def _move_module_to_device( + module: nn.Module, + ignored_params: Set[nn.Parameter], + device_from_device_id: Optional[torch.device], +) -> None: + cpu_device = torch.device("cpu") + if device_from_device_id is not None: + # BFS from `module` without traversing any nested FSDP instances to + # collect the parameters/buffers that have not yet been managed + queue: Deque[nn.Module] = collections.deque() + queue.append(module) + params: List[nn.Parameter] = [] + buffers: List[torch.Tensor] = [] + while queue: + curr_module = queue.popleft() + params.extend( + param + for param in curr_module.parameters(recurse=False) + if param.device == cpu_device + ) + buffers.extend( + buffer + for buffer in curr_module.buffers(recurse=False) + if buffer.device == cpu_device + ) + for submodule in curr_module.children(): + if not isinstance(submodule, zero3_file.LayerZeRO3): + queue.append(submodule) + + _move_states_to_device(params, buffers, device_from_device_id) + return + param = next(_get_orig_params(module, ignored_params), None) + if param is not None and param.device == cpu_device: + _warn_cpu_init() + + +def _move_states_to_device( + params: List[nn.Parameter], + buffers: List[torch.Tensor], + device_from_device_id: Optional[torch.device], +) -> None: + """ + Precondition: ``_check_single_device_module()`` and module's parameters and + buffers have been materialized if needed. + """ + if len(params) == 0 and len(buffers) == 0: + return + if len(params) > 0: + current_device = params[0].device + elif len(buffers) > 0: + current_device = buffers[0].device + cpu_device = torch.device("cpu") + if device_from_device_id is not None: + # Move the parameters and buffers like the `.data` code path in + # `nn.Module._apply()`, which underlies `nn.Module.to()` + for param in params: + with torch.no_grad(): + param.data = param.to(device_from_device_id) + if param.grad is not None: + param.grad.data = param.grad.to(device_from_device_id) + for buffer in buffers: + buffer.data = buffer.to(device_from_device_id) + elif current_device == cpu_device: + _warn_cpu_init() + + +def _warn_cpu_init(): + warnings.warn( + "The passed-in `module` is on CPU and will thus have FSDP's sharding " + "initialization run on CPU, which may be slower than on GPU. We " + "recommend passing in the `device_id` argument for FSDP to move " + "`module` to GPU for the sharding initialization. `module` must also " + "be on GPU device to work with the `sync_module_states=True` flag " + "since that requires GPU communication." + ) + + +def _get_compute_device( + module: nn.Module, + ignored_params: Set[nn.Parameter], + device_from_device_id: Optional[torch.device], + rank: int, +) -> torch.device: + """ + Determines and returns this FSDP instance's compute device. If a device is + specified by ``device_id``, then returns that device. Otherwise, If the + module is already on a non-CPU device, then the compute device is that non-CPU + device. If the module is on CPU, then the compute device is the current + device. + + Since this method should be called after materializing the module, any + non-CPU device should not be meta device. For now, the compute device is + always a CUDA GPU device with its explicit index. + + Precondition: ``_check_single_device_module()`` and + ``_move_module_to_device()``. + """ + param = next(_get_orig_params(module, ignored_params), None) + if param is not None and param.device.type != "cpu": + compute_device = param.device + else: + if device_from_device_id is not None and device_from_device_id.type != "cuda": + compute_device = device_from_device_id + else: + compute_device = torch.device("cuda", torch.cuda.current_device()) + if device_from_device_id is not None and compute_device != device_from_device_id: + raise ValueError( + f"Inconsistent compute device and `device_id` on rank {rank}: " + f"{compute_device} vs {device_from_device_id}" + ) + return compute_device + + +def _get_orig_params( + module: nn.Module, + ignored_params: Set[nn.Parameter], +) -> Iterator[nn.Parameter]: + param_gen = module.parameters() + try: + while True: + param = next(param_gen) + if param not in ignored_params and not _is_zero3_flattened(param): + yield param + except StopIteration: + pass + + +def _check_orig_params_flattened( + zero3_module, + ignored_params: Set[nn.Parameter], +) -> None: + """ + Checks that all original parameters have been flattened and hence made + invisible to ``named_parameters()`` for the module hierarchy rooted at + ``zero3_module``. This should be called as a sanity check after flattening + the wrapped module's parameters. + """ + for param_name, param in _named_parameters_with_duplicates(zero3_module): + if param not in ignored_params and not _is_zero3_flattened(param): + raise RuntimeError( + f"Found an unflattened parameter: {param_name}; " + f"{param.size()} {param.__class__}" + ) diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_limiter.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_limiter.py new file mode 100644 index 000000000..05a12a4f5 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_limiter.py @@ -0,0 +1,25 @@ +import collections + + +class _FreeEventQueue: + + def __init__(self, num_inflights: int = 3) -> None: + self._queue = collections.deque() + self._max_num_inflight_all_gathers = num_inflights + + def enqueue(self, free_event) -> None: + """Enqueues a free event.""" + self._queue.append(free_event) + + def dequeue_if_needed(self): + """Dequeues a single event if the limit is reached.""" + if len(self._queue) >= self._max_num_inflight_all_gathers: + return self._dequeue() + return None + + def _dequeue(self): + """Dequeues a free event if possible.""" + if self._queue: + event = self._queue.popleft() + return event + return None diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_traversal_utils.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_traversal_utils.py new file mode 100644 index 000000000..4c2b333ee --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_traversal_utils.py @@ -0,0 +1,107 @@ +""" +NOTE: This file must be imported like +``import torch.distributed.fsdp._traversal_utils`` and not like +``from torch.distirbuted.fsdp._traversal_utils import ...`` to avoid circular +imports. For brevity, we may import the file as ``traversal_utils``. +""" + +import collections +from typing import Deque, List, Set, Tuple, TYPE_CHECKING + +import torch.nn as nn +from torch.distributed._composable.contract import _get_registry +from mindspeed.core.distributed.layerzero.zero3._common_utils import _get_module_zero3_state + +if TYPE_CHECKING: + from mindspeed.core.distributed.layerzero.zero3._common_utils import _ZeRO3State + +""" +[Note: ZeRO3 State Traversal] +For the wrapper code path, ``_ZeRO3PState`` is the ``ZeRO3`` +module wrapping a fully sharded module, and for the non-wrapper code path, +``_ZeRO3PState`` is an object that gets embedded on a fully sharded module. + +There are three common traversal idioms: Given a root module, +- ``_get_zero3_states()`` returns all ``_ZeRO3PState`` s in the tree. +- ``get_zero3_root_states()`` returns all local root ``_ZeRO3PState`` s in the +tree (i.e. those with ``_is_root == True``). +- ``_get_zero3_handles()``returns all ``FlatParamHandle`` s in the tree. + +All of these methods must take in the root module (i.e. an ``nn.Module``) and +not a general ``_ZeRO3PState`` because ``_ZeRO3PState`` does not support a graph +traversal, whereas ``nn.Module`` has ``nn.Module.modules()`` for traversal. +""" + + +def _composable(module: nn.Module) -> bool: + """ + Returns if ``module`` can compose with ``fully_shard``. + """ + return "replicate" not in _get_registry(module) + + +def _get_zero3_states_with_modules( + module: nn.Module, +) -> Tuple[List["_ZeRO3State"], List[nn.Module]]: + """ + Returns a tuple containing: + 1. A list of the ``"_ZeRO3State"`` instances in the module tree rooted at + ``module`` without any duplicates and following the ``module.modules()`` + traversal order (which is assumed to be depth-first). + 2. A corresponding list of the modules owning the states in the first list. + + For the wrapper code path, both returned lists are the same, each + containing all ``FullyShardedDataParallel`` instances. For the composable + code path, this returns a list of all composable state instances and a list + of the corresponding fully sharded modules. See [Note: Fully Sharded + Module]. + + NOTE: The traversal does not proceed into any module annotated by an + incompatible API (e.g. ``replicate``). + """ + zero3_states: List["_ZeRO3State"] = [] + zero3_modules: List[nn.Module] = [] + # Track the visited FSDP states since multiple modules may share the same + # one and we want to return a de-duplicated list + visited_states: Set["_ZeRO3State"] = set() + # Track the visited modules in case of shared modules, which implies the + # module graph is no longer a tree + visited_modules: Set[nn.Module] = set() + + # Perform depth-first search from `module` to ensure that we do not + # traverse into an incompatible API's subtree (use DFS instead of BFS to + # match `.modules()` order) + deque: Deque[nn.Module] = collections.deque([module]) + while deque: + submodule = deque.popleft() + visited_modules.add(submodule) + if not _composable(submodule): + continue + for child_module in reversed(list(submodule.children())): + if child_module not in visited_modules: + deque.appendleft(child_module) + optional_state = _get_module_zero3_state(submodule) + if optional_state is not None and optional_state not in visited_states: + visited_states.add(optional_state) + zero3_states.append(optional_state) + zero3_modules.append(submodule) + return zero3_states, zero3_modules + + +def _get_zero3_states(module: nn.Module) -> List["_ZeRO3State"]: + """See :func:`_get_zero3_states_with_modules`.""" + zero3_states, _ = _get_zero3_states_with_modules(module) + return zero3_states + + +def _get_zero3_handles(module: nn.Module) -> List: + """ + Returns all ``FlatParamHandle`` s in the module tree rooted at ``module`` + following the rules in :func:`_get_zero3_state`. + """ + handles = [ + zero3_state._handle + for zero3_state in _get_zero3_states(module) + if zero3_state._handle is not None + ] + return handles diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_wrap_utils.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_wrap_utils.py new file mode 100644 index 000000000..cc11cdf9d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/_wrap_utils.py @@ -0,0 +1,129 @@ +import collections +import functools +import inspect +import warnings +from functools import partial +from typing import Any, Callable, Dict, List, Set, Tuple, Type, Union + +import torch.nn as nn +from torch.distributed.fsdp._wrap_utils import _override_module_mixed_precision, _validate_frozen_params, _warn_on_overridden_mixed_precision +from megatron.training.global_vars import get_args +from megatron.core.tensor_parallel.layers import ( + ColumnParallelLinear, + RowParallelLinear, + VocabParallelEmbedding +) +from mindspeed.core.distributed.layerzero.zero3._common_utils import ( + _get_module_zero3_state, +) +from mindspeed.core.distributed.layerzero.zero3.wrap import ( + _construct_wrap_fn, + _or_policy, + _Policy, + _post_order_apply, + _recursive_wrap, + _run_mixed_precision_override_policy, + _run_tensor_parallel_pg_override_policy, + _wrap_module_cls_individually, +) + + +def _auto_wrap( + root_module: nn.Module, + policy: Union[Callable, _Policy], + ignored_modules: Set[nn.Module], + ignored_params: Set[nn.Parameter], + root_kwargs: Dict[str, Any], + fsdp_fn: Callable, # e.g. `FullyShardedDataParallel` or `fully_shard` +): + """ + Auto wraps modules in ``root_module`` 's tree according to ``policy`` + following a post-order traversal. + + Precondition: ``root_kwargs`` should contain all arguments except + ``module``. This function accepts the kwargs dict directly since it gets + forwarded into the post-order traversal function. + """ + mixed_precision = root_kwargs["mixed_precision"] + is_wrapper = inspect.isclass(fsdp_fn) + _check_nested_wrapping(root_module) + + if isinstance(policy, _Policy): + root_kwargs["auto_wrap_policy" if is_wrapper else "policy"] = None + target_module_to_kwargs = policy._run_policy( + root_module, ignored_modules, root_kwargs + ) + if mixed_precision is not None: + target_module_to_kwargs = _run_mixed_precision_override_policy( + root_module, + mixed_precision._module_classes_to_ignore, + ignored_modules, + root_kwargs, + target_module_to_kwargs, + ) + overridden_module_classes = _override_module_mixed_precision( + root_module, mixed_precision._module_classes_to_ignore + ) + _warn_on_overridden_mixed_precision(overridden_module_classes) + try: + args = get_args() + if args.tensor_model_parallel_size > 1: + _run_tensor_parallel_pg_override_policy( + root_module, + {ColumnParallelLinear, RowParallelLinear, VocabParallelEmbedding}, + ignored_modules, + root_kwargs, + target_module_to_kwargs, + ) + except AssertionError: + warnings.warn( + "Global args is not correctly initialized, skip TP wrapping...") + + _validate_frozen_params( + root_module, + set(target_module_to_kwargs.keys()), + ignored_params, + True, + ) + wrap_fn = _construct_wrap_fn( + root_module, target_module_to_kwargs, fsdp_fn) + _post_order_apply(root_module, wrap_fn) + return + + recursive_wrap_kwargs = { + "module": root_module, + "auto_wrap_policy": policy, + "wrapper_cls": fsdp_fn, + "ignored_modules": ignored_modules, + "ignored_params": ignored_params, + "only_wrap_children": True, + } + if mixed_precision is not None: + # Wrap modules of the ignored types separately and register forward + # hooks to cast to fp32 and back to the original dtype, respectively + overridden_module_classes = _override_module_mixed_precision( + root_module, mixed_precision._module_classes_to_ignore + ) + policy = functools.partial( + _or_policy, + policies=[ + policy, + partial( + _wrap_module_cls_individually, + module_classes=mixed_precision._module_classes_to_ignore, + ), + ], + ) + recursive_wrap_kwargs["auto_wrap_policy"] = policy + _warn_on_overridden_mixed_precision(overridden_module_classes) + # type: ignore[arg-type] + _recursive_wrap(**recursive_wrap_kwargs, **root_kwargs) + + +def _check_nested_wrapping(root_module: nn.Module): + for module_name, module in root_module.named_modules(): + if _get_module_zero3_state(module) is not None: + raise ValueError( + "FSDP auto wrapping requires modules to not already have " + f"FSDP applied but found {module_name} in\n{root_module}" + ) diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/api.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/api.py new file mode 100644 index 000000000..18a7630e7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/api.py @@ -0,0 +1,34 @@ +""" +This file includes public APIs for FSDP such as the classes used for the +constructor arguments. +""" + +from dataclasses import dataclass +from enum import auto, Enum +from typing import Optional, Sequence, Type + +import torch +from torch.nn.modules.batchnorm import _BatchNorm + +__all__ = [ + "BackwardPrefetch", + "MixedPrecision", +] + + +class BackwardPrefetch(Enum): + BACKWARD_PRE = auto() + BACKWARD_POST = auto() + + +class BackwardReduceScatter(Enum): + BACKWARD_PRE = auto() + BACKWARD_POST = auto() + + +@dataclass +class MixedPrecision: + param_dtype: Optional[torch.dtype] = None + reduce_dtype: Optional[torch.dtype] = None + buffer_dtype: Optional[torch.dtype] = None + _module_classes_to_ignore: Sequence[Type[torch.nn.Module]] = (_BatchNorm,) diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/flat_param.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/flat_param.py new file mode 100644 index 000000000..743497c91 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/flat_param.py @@ -0,0 +1,1938 @@ +import contextlib +import functools +import logging +import os +import warnings +from itertools import accumulate, chain +from typing import ( + Any, + Callable, + cast, + Dict, + Generator, + Iterator, + List, + NamedTuple, + no_type_check, + Optional, + Sequence, + Set, + Tuple, + Union, +) + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor + +from torch.distributed.utils import _alloc_storage, _free_storage, _p_assert +from torch.nn.parameter import _ParameterMeta # type: ignore[attr-defined] +from mindspeed.core.distributed.layerzero.zero3._common_utils import ( + _DeviceHandle, + _named_parameters_with_duplicates, + _no_dispatch_record_stream, + _set_zero3_flattened, + HandleTrainingState, +) + +__all__ = [ + "FlatParameter", + "FlatParamHandle", + "FlatParamShardMetadata", + "ParamInfo", + "SharedParamInfo", + "HandleShardingStrategy", +] + +logger = logging.getLogger(__name__) +logger.setLevel(logging.WARNING) + + +""" +[Note: Fully Sharded Module] +We define the "fully sharded module" to be the original ``nn.Module`` that owns +a ``FlatParamHandle``. It is the *single* module logically responsible for the +*single* unshard/reshard pair for the handle's ``FlatParameter`` for a given +forward or backward pass. The fully sharded module should be passed to the +``FlatParamHandle`` constructor. + +For the wrapper code path: +- The ``FullyShardedDataParallel`` module wrapping the fully sharded module +runs the unshard/reshard on behalf of the ful+ly sharded module by overriding +``nn.Module.forward``. +- The fully sharded module is exactly the module passed to the +``FullyShardedDataParallel`` constructor's ``module`` argument. + +For the non-wrapper code path: +- Hooks registered on the fully sharded module run the unshard/reshard. +- The fully sharded module may either be the direct argument to ``fully_shard`` +or a submodule chosen by the provided wrapping policy. +""" + +# We should use 'safe' by default since it respects method overrides, but for +# special cases such as for high CPU overhead or for intentionally bypassing +# checks in the overrides, we may use 'unsafe'. +_FSDP_USE_UNSAFE_SETATTR = "FSDP_USE_UNSAFE_SETATTR" + +# Some value to set padding in tensors to for debuggability +_FLAT_PARAM_PADDING_VALUE = 42 + + +class ParamInfo(NamedTuple): + """Information for an original parameter.""" + + param_name: str # unprefixed + module: nn.Module + module_name: str + + +class SharedParamInfo(NamedTuple): + """ + Additional information for a shared parameter. + + For each shared parameter, we designate one module and its parameter + variable to be the primary owner, determined as the first one encountered + in the parameter walk. These are prefixed with "prim". The primary module + and parameter do not have their own :class:`SharedParamInfo` instance. + """ + + param_name: str # unprefixed + module: nn.Module + module_name: str + prim_param_name: str # unprefixed + prim_module: nn.Module + prim_module_name: str + + +class _ShardParamInfo(NamedTuple): + """Shard-related information for an original parameter.""" + + in_shard: bool + # Use to index into the sharded flat parameter, e.g. + # `flat_param[offset_in_shard : offset_in_shard + numel_in_shard]` + offset_in_shard: Optional[int] + numel_in_shard: Optional[int] + # Use to get part of the parameter in the local shard from a flattened + # version of the unsharded parameter, e.g. + # `param.flatten()[intra_param_start_idx : intra_param_end_idx + 1]` + intra_param_start_idx: Optional[int] + intra_param_end_idx: Optional[int] # inclusive + # `unshard_data [flat_param_start_idx : flat_param_end_idx]` + flat_param_start_idx: Optional[int] = None + flat_param_end_idx: Optional[int] = None # inclusive + + +class FlatParamShardMetadata(NamedTuple): + """ + This holds metadata specific to this rank's shard of the flat parameter. + + Attributes: + param_names (Tuple[str, ...]): Prefixed parameter names of this rank's + shard of the parameters; see :class:`FlatParameter`. + param_shapes (Tuple[torch.Size, ...]): Parameter shapes of this rank's + shard of the parameters; see :class:`FlatParameter`. + param_numels (Tuple[int, ...]): Parameter numels of this rank's shard + of the parameters; see :class:`FlatParameter`. + param_offsets (Tuple[Tuple[int, int], ...]): [start, end] offsets (in + units of numels) giving this rank's part of each flattened + original parameter. + """ + + param_names: Tuple[str, ...] + param_shapes: Tuple[torch.Size, ...] + param_numels: Tuple[int, ...] + param_offsets: Tuple[Tuple[int, int], ...] + + +class _FlatParameterMeta(_ParameterMeta): + # Make `isinstance(t, FlatParameter)` return True for custom tensor + # instances that have the _is_flat_param flag for BC + def __instancecheck__(self, instance): + # NB: do NOT test the super implementation + return isinstance(instance, torch.Tensor) and getattr( + instance, "_is_flat_param", False + ) + + +class FlatParameter(nn.Parameter, metaclass=_FlatParameterMeta): + _unpadded_unsharded_size: torch.Size + _padded_unsharded_size: torch.Size + _sharded_size: torch.Size + _num_params: int + _param_infos: Tuple[ParamInfo, ...] + _shapes: Tuple[torch.Size, ...] + _fqns: Tuple[str, ...] + _numels_with_padding: Tuple[int, ...] + _numels: Tuple[int, ...] + _shard_param_infos: Tuple[_ShardParamInfo, ...] + _shared_param_infos: Tuple[SharedParamInfo, ...] + _modules: Set[nn.Module] + _shard_numel_padded: int + _zero1_shard: Tensor + _zero3_shard: Tensor + _full_param_padded: Tensor + _full_grad_padded: Tensor + _full_prec_grad_padded: Tensor + _post_backward_hook_state: Tuple[Any, Any] + _saved_grad: Tensor + _params: Optional[List[nn.Parameter]] + _shared_params: Optional[List[nn.Parameter]] + _tensors: Optional[List[Optional[Tensor]]] + _is_grad_none_mask: Optional[List[bool]] + _is_padding_mask: List[bool] + _cpu_grad: Tensor = None + + def __new__(cls, data=None, requires_grad=True): + if cls is not FlatParameter: + raise ValueError("subclasses FlatParameter not supported") + r = nn.Parameter.__new__(nn.Parameter, data, requires_grad) # type: ignore[call-arg] + r._is_flat_param = True # type: ignore[attr-defined] + return r + + # NB: This is not a regular method, because FlatParameters are not actually + # instances of this class (see __new__ above). So you must indirectly + # call this directly through the classmethod. + @classmethod + def _init_metadata( + cls, + self, + param_infos: List[ParamInfo], + numels: List[int], + shapes: List[torch.Size], + fqns: List[str], + shared_param_infos: List[SharedParamInfo], + params: Optional[List[nn.Parameter]], + shared_params: Optional[List[nn.Parameter]], + is_padding_mask: List[bool], + ) -> None: + """ + Initializes attributes holding metadata about the original parameters + comprising the flat parameter. + + We expose this method separate from the constructor to keep the + constructor only responsible for the flat parameter's tensor data. This + method should only be called once per model, while the constructor may + be called multiple times, e.g. when reloading from a checkpoint, in + which case only the tensor data needs to be passed to the constructor. + + Args: + See the Attributes in the class docstring. + """ + if len(param_infos) != len(shapes) or len(param_infos) != len(fqns): + raise ValueError("Incorrect number of param_infos") + + self._num_params = len(param_infos) + self._param_infos = param_infos + self._shapes = shapes + self._fqns = fqns + self._is_padding_mask = is_padding_mask + + numels_without_padding: List[int] = [] + for numel, is_padding in zip(numels, is_padding_mask): + if not is_padding: + numels_without_padding.append(numel) + self._numels = tuple(numels_without_padding) + self._numels_with_padding = tuple(numels) + if len(self._numels) != self._num_params: + raise AssertionError("self._numels do not match num_param") + + self._shared_param_infos = tuple(shared_param_infos) + self._modules = {pi.module for pi in self._param_infos}.union( + {spi.module for spi in self._shared_param_infos} + ) + if (params is None) != (shared_params is None): + raise AssertionError("Param and Shared_param should be both None or non-None") + if params is not None: + if len(shared_params) != len(shared_param_infos): + raise AssertionError("shared_params do not match shared_param_infos") + self._params = [] + for param, is_padding in zip(params, is_padding_mask): + if not is_padding: + self._params.append(param) + self._shared_params = shared_params + # Mark the original parameters to avoid flattening them into + # another `FlatParameter` during recursive construction + for param in chain(self._params, self._shared_params): + _set_zero3_flattened(param) + self._is_grad_none_mask = [False for _ in range(self._num_params)] + self._tensors = [None for _ in range(self._num_params)] + else: + self._params = None + self._shared_params = None + self._is_grad_none_mask = None + self._tensors = None + self._unpadded_unsharded_size = self.size() + _set_zero3_flattened(self) + # Tracks whether the `FlatParameter`'s post-backward hook has been + # called to modify the behavior of the post-backward callback + self._post_backward_called = False + + +class FlatParamHandle: + ################## + # INITIALIZATION # + ################## + def __init__( + self, + params: Sequence[Union[nn.Parameter, Tensor]], + zero3_module: nn.Module, + device: torch.device, + mp_param_dtype: Optional[torch.dtype], + mp_reduce_dtype: Optional[torch.dtype], + zero3_process_group: dist.ProcessGroup, + zero1_process_group: dist.ProcessGroup, + offload_grads: bool = False + ): + self.initialize(params, + zero3_module, + device=device, + mp_param_dtype=mp_param_dtype, + mp_reduce_dtype=mp_reduce_dtype, + zero3_process_group=zero3_process_group, + zero1_process_group=zero1_process_group, + offload_grads=offload_grads + ) + self._init_flat_param_and_metadata( + params, zero3_module, self._aligned_numel, self.zero1_world_size # type: ignore[arg-type] + ) + self._use_unsharded_views(as_params=False) + + + def initialize( + self, + params: Sequence[Union[nn.Parameter, Tensor]], + zero3_module: nn.Module, + device: torch.device, + mp_param_dtype: Optional[torch.dtype], + mp_reduce_dtype: Optional[torch.dtype], + zero3_process_group: dist.ProcessGroup, + zero1_process_group: dist.ProcessGroup, + offload_grads: bool = False + ): + params = list(params) + if len(params) == 0: + raise ValueError( + f"Cannot construct a {self.__class__.__name__} with an empty parameter list" + ) + self._init_setattr_fns() + align_addresses = True + self._init_get_unflat_views_fn(align_addresses) + self.device = device + self._device_handle = _DeviceHandle.from_device(self.device) + self.zero3_process_group = zero3_process_group + self.zero1_process_group = zero1_process_group + self.zero1_world_size = zero1_process_group.size() + self.zero1_group_rank = zero1_process_group.rank() + self.zero3_group_rank = zero3_process_group.rank() + self.zero3_group_size = zero3_process_group.size() + self._training_state = HandleTrainingState.IDLE + self._debug_level = dist.get_debug_level() + self._zero3_module = zero3_module + # For strategies that do not free after forward, we skip using sharded + # views after forward since the unsharded data exists. We still switch + # `self.flat_param` to point to the sharded flat parameter since what + # it points to parameterizes behavior. We use the following attribute + # to track which tensor data the parameters are unsharded views into. + self._unsharded_flat_param_for_skipped_views: Optional[Tensor] = None + # The index in the state's `all_handles`, which must be the + # same across ranks for the execution order validation to work + self._handle_index: Optional[int] = None + # Index in handles_to_pre_forward_order + self._pre_forward_order_index: Optional[int] = None + # Index in `handles_post_forward_order` + self._post_forward_index: Optional[int] = None + # Used for guarding against mistargeted forward prefetches + self._needs_pre_forward_unshard = False + # Used for guarding against mistargeted backward prefetches + self._needs_pre_backward_unshard = False + # Was the handle prefetched? Set on successful _prefetch_handle and unshard + self._prefetched = False + self._ran_pre_backward_hook = False + self._ran_post_backward_hook = False + #!==================== add support for zero1 param & grad sync state========================= + self._needs_param_sync = True + self._param_synced = False + self._grad_synced = False + self.enter_backward = False + #!=================================================================================== + self._offload_grads = offload_grads + self.prev_iter_synced = True + # Optimistically assume a valid input `params` and set dtype attributes + # before `_init_flat_param()`, which performs the actual validation + self._orig_param_dtype = params[0].dtype + self._init_param_reduce_dtypes(mp_param_dtype, mp_reduce_dtype) + self._aligned_numel = ( + _get_aligned_numel(unsharded_dtype=self._fwd_bwd_param_dtype) + if align_addresses + else 0 + ) + if self.zero1_world_size % self.zero3_group_size != 0: + raise ValueError(f"The dp {self.zero1_world_size=} is not multiply of {self.zero3_group_size=}") + + @property + def full_prec_dtype(self): + return torch.float32 + + @property + def param_dtype(self): + return self._fwd_bwd_param_dtype + + @property + def grad_dtype(self): + return self._reduce_dtype + + def _init_setattr_fns(self): + use_unsafe_setattr = os.environ.get(_FSDP_USE_UNSAFE_SETATTR, "") == "1" + self._setattr_tensor: Callable[[nn.Module, str, Tensor], None] + self._setattr_param: Callable[[nn.Module, str, nn.Parameter], None] + if use_unsafe_setattr: + self._setattr_tensor = _unsafe_setattr_tensor + self._setattr_param = _unsafe_setattr_param + else: + self._setattr_tensor = _safe_setattr_tensor_or_param + self._setattr_param = _safe_setattr_tensor_or_param + + def _init_get_unflat_views_fn(self, align_addresses: bool): + self._get_unflat_views = ( + self._get_unflat_views_aligned + if align_addresses + else self._get_unflat_views_unaligned + ) + + def _init_flat_param_and_metadata( + self, + params: List[Union[Tensor, nn.Parameter]], + module: nn.Module, + aligned_numel: int, + div: int + ) -> None: + """ + NOTE: This should only be called once at construction time, after which + the ``FlatParameter`` metadata is assumed to be static. + + NOTE: The elements of ``params`` should only be ``Tensor`` s when + composing with ``DTensor`` -based tensor parallelism, in which case the + elements may be ``DTensor`` local shards. + """ + if len(params) == 0: + raise ValueError("Expects non-empty `params`") + if aligned_numel < 0: + raise ValueError( + f"Expects non-negative `aligned_numel` but got {aligned_numel}" + ) + ( + dtype, + flat_param_requires_grad, + device, + ) = self._validate_tensors_to_flatten(params) + params_set = set(params) + # For alignment padding, only `numels` gets strictly non-`None` + # elements, and all other lists get `None` elements for padding. + param_infos: List[ParamInfo] = [] + numels: List[int] = [] + shapes: List[torch.Size] = [] + fqns: List[str] = [] + shared_param_infos: List[SharedParamInfo] = [] + shared_param_memo: Dict[ + Union[Tensor, nn.Parameter], Tuple[nn.Module, str, str] + ] = {} + params_to_flatten: List[Union[Tensor, nn.Parameter]] = [] + shared_params: List[Union[Tensor, nn.Parameter]] = [] + is_padding_mask: List[bool] = [] + total_numel = total_numel_without_padding = 0 + for submodule_name, submodule in module.named_modules(remove_duplicate=False): + for param_name, param in _named_parameters_with_duplicates( + submodule, recurse=False + ): + if param not in params_set: + continue + if param in shared_param_memo: # shared reference + prim_module, prim_module_name, prim_param_name = shared_param_memo[ + param + ] + shared_params.append(param) + shared_param_infos.append( + SharedParamInfo( + param_name, + submodule, + submodule_name, + prim_param_name, + prim_module, + prim_module_name, + ) + ) + else: + if aligned_numel > 0: + numel_to_pad = aligned_numel - (total_numel % aligned_numel) + if numel_to_pad > 0 and numel_to_pad < aligned_numel: + padding_tensor = _construct_padding_tensor( + numel_to_pad, dtype, False, device + ) + params_to_flatten.append(padding_tensor) + is_padding_mask.append(True) + numels.append(numel_to_pad) + total_numel += numel_to_pad + param = cast(nn.Parameter, param) + shared_param_memo[param] = (submodule, submodule_name, param_name) + params_to_flatten.append(param) + is_padding_mask.append(False) + param_infos.append(ParamInfo(param_name, submodule, submodule_name)) + numels.append(param.numel()) + shapes.append(param.shape) + fqn = ( + submodule_name + "." + param_name + if submodule_name + else param_name + ) + fqns.append(fqn) + total_numel += param.numel() + total_numel_without_padding += param.numel() + if len(params_to_flatten) == 0: + raise ValueError( + f"`params` were not found in `module`'s tree" + f"params: {params}\nmodule: {module}" + ) + if ( + self.zero1_group_rank == 0 + and aligned_numel > 0 + and total_numel != total_numel_without_padding + ): + logger.info( + "ZeRo3 FlatParameter address alignment created " + "%s numel of padding (%s vs. %s)", + total_numel - total_numel_without_padding, + total_numel, + total_numel_without_padding, + ) + # if aligned_numel > 0: + # Pad to be divisible by world size to avoid a copy for the + # post-backward reduce-scatter + numel_to_pad = div - (total_numel % div) + if numel_to_pad > 0 and numel_to_pad < div: + if self.zero1_group_rank == 0: + logger.info( + "ZeRO3 FlatParameter world size divisibility created " + "%s numel of padding", + numel_to_pad, + ) + padding_tensor = _construct_padding_tensor( + numel_to_pad, dtype, False, device + ) + params_to_flatten.append(padding_tensor) + is_padding_mask.append(True) + numels.append(numel_to_pad) + total_numel += numel_to_pad + # Pass `aligned_numel=0` since we already included padding tensors + self.flat_param: FlatParameter = self.flatten_tensors_into_flat_param( + params_to_flatten, + aligned_numel=0, + requires_grad=flat_param_requires_grad, + div=div + ) + FlatParameter._init_metadata( + self.flat_param, + param_infos, + numels, + shapes, + fqns, + shared_param_infos, + _convert_to_params(params_to_flatten), + _convert_to_params(shared_params), + is_padding_mask, + ) + + def _validate_tensors_to_flatten( + self, tensors: List[Union[Tensor, nn.Parameter]] + ) -> Tuple: + """ + Validates the tensors to flatten and returns any necessary metadata. + """ + dtype: Optional[torch.dtype] = None + # Return as the logical OR over each tensor's value + flat_param_requires_grad: Optional[bool] = None + device: Optional[torch.device] = None + for tensor in tensors: + if isinstance(tensor, FlatParameter): + raise ValueError("Cannot flatten a `FlatParameter`") + if dtype is None and not tensor.is_floating_point(): + raise ValueError("Cannot flatten integer dtype tensors") + if dtype is not None and tensor.dtype != dtype: + raise ValueError( + f"Must flatten tensors with uniform dtype but got {dtype} " + f"and {tensor.dtype}" + ) + if device is not None and tensor.device != device: + raise ValueError( + "Must flatten tensors on the same device but got both " + f"{device} and {tensor.device}" + ) + dtype = tensor.dtype + flat_param_requires_grad = flat_param_requires_grad or tensor.requires_grad + device = tensor.device + return dtype, flat_param_requires_grad, device + + def flatten_tensors( + self, + tensors: List[Tensor], + aligned_numel: int, + div: int + ) -> Tensor: + """ + Flattens ``tensors`` into a single flat tensor optionally including + padding if ``aligned_numel`` is greater than 0, where ``aligned_numel`` + gives the numel required to have address alignment. + + div: The total tensor numel is a multipy of div to avoid different size among rank + NOTE: The padding alignment algorithm must be kept in sync with + :meth:`_init_flat_param_metadata`. We separate the two methods because + the initialization happens once, whereas this method may be called + multiple times throughout training (e.g. for checkpointing). + """ + if len(tensors) == 0: + raise ValueError("Expects non-empty `tensors`") + if aligned_numel < 0: + raise ValueError( + f"Expects non-negative `aligned_numel` but got {aligned_numel}" + ) + dtype, _, device = self._validate_tensors_to_flatten(tensors) + flat_tensors: List[Tensor] = [] + if aligned_numel > 0: + total_numel = 0 + for tensor in tensors: + numel_to_pad = aligned_numel - (total_numel % aligned_numel) + if numel_to_pad > 0 and numel_to_pad < aligned_numel: + padding_tensor = _construct_padding_tensor( + numel_to_pad, dtype, False, device + ) + flat_tensors.append(padding_tensor) + total_numel += numel_to_pad + flat_tensors.append(torch.flatten(_detach_if_needed(tensor))) + total_numel += tensor.numel() + numel_to_pad = div - (total_numel % div) + if numel_to_pad > 0 and numel_to_pad < div: + padding_tensor = _construct_padding_tensor( + numel_to_pad, dtype, False, device + ) + flat_tensors.append(padding_tensor) + total_numel += numel_to_pad + else: + flat_tensors = [ + torch.flatten(_detach_if_needed(tensor)) for tensor in tensors + ] + return torch.cat(flat_tensors, dim=0) + + def flatten_tensors_into_flat_param( + self, + tensors: List[Tensor], + aligned_numel: int, + requires_grad: bool, + div: int + ) -> FlatParameter: + flat_param_data = self.flatten_tensors(tensors, aligned_numel, div) + return FlatParameter(flat_param_data, requires_grad=requires_grad) + + def _init_param_reduce_dtypes( + self, + mp_param_dtype: Optional[torch.dtype], + mp_reduce_dtype: Optional[torch.dtype], + ) -> None: + """ + Precondition: ``self.flat_param`` is set. This ensures that this + handle's parameters have a single dtype. + + Postcondition: This sets ``self._fwd_bwd_param_dtype`` and + ``self._reduce_dtype``. If ``mp_param_dtype`` or ``mp_reduce_dtype`` + is ``None``, then we assume the original parameter dtype. One special + case is if ``mp_param_dtype`` is not ``None`` and ``mp_reduce_dtype`` + is ``None``, in which case we assume the gradient reduction dtype + matches the forward/backward parameter dtype. + """ + # Save whether these dtypes were specified so that we permit the + # parameter dtype to change up until the lazy initialization + self._fwd_bwd_param_dtype = mp_param_dtype or self._orig_param_dtype + self._reduce_dtype = mp_reduce_dtype or self._orig_param_dtype + if self._fwd_bwd_param_dtype is None or self._reduce_dtype is None: + raise ValueError(f"Runtime dtype not set") + + ################################### + # SHARD INITIALIZATION & METADATA # + ################################### + @torch.no_grad() + def shard(self): + """ + Shards the handle's ``FlatParameter``. This allocates new memory for + the sharded flat parameter and frees the unsharded flat parameter's + storage. + + Postcondition: ``self.flat_param`` is the sharded flat parameter. Shard + metadata attributes are set for all sharding strategies. + """ + flat_param = self.flat_param + _p_assert( + flat_param.storage_offset() == 0, + "The `FlatParameter` is not the sole occupant of its storage", + ) + orig_storage = flat_param._typed_storage() + #! _get_shard returns a clone of original parameter + zero1_flat_param, zero1_padded = FlatParamHandle._get_shard( + flat_param, self.zero1_group_rank, self.zero1_world_size + ) + zero1_flat_param = zero1_flat_param.to(self.full_prec_dtype) + flat_param._zero1_shard = zero1_flat_param + flat_param.data = zero1_flat_param # type: ignore[call-overload] + + start_idx = zero1_flat_param.numel() * self.zero1_group_rank + end_idx = zero1_flat_param.numel() * (self.zero1_group_rank + 1) - 1 # inclusive + + self._init_shard_metadata(zero1_padded, start_idx, end_idx) + if orig_storage._size() > 0: + orig_storage._resize_(0) + self._use_sharded_views() + + def _init_shard_metadata( + self, + numel_padded: int, + unsharded_start_idx: int, + unsharded_end_idx: int, + ) -> None: + """ + Initializes shard-related metadata for this rank's shard of the flat + parameter: ``_sharded_size``, ``_shard_param_infos``, and + ``_shard_numel_padded``. + + Args: + numel_padded (int): Numel padded for this rank's sharded flat + parameter. + unsharded_start_idx (int): Start index in the unsharded flat + parameter assigned to this rank. + unsharded_end_idx (int): End index (inclusive) in the unsharded + flat parameter assigned to this rank. + + Precondition: ``self.flat_param`` 's data is the sharded flat + parameter. + """ + flat_param = self.flat_param + flat_param._sharded_size = flat_param.size() # type: ignore[attr-defined] + sharded_flat_param_numel = flat_param.numel() # includes `numel_padded` + _p_assert( + unsharded_start_idx >= 0 and unsharded_start_idx <= unsharded_end_idx, + f"unsharded_start_idx: {unsharded_start_idx} unsharded_end_idx: {unsharded_end_idx}", + ) + _p_assert( + numel_padded <= sharded_flat_param_numel, + f"numel_padded: {numel_padded} " + f"sharded_flat_param_numel: {sharded_flat_param_numel}", + ) + shard_param_infos = self._get_shard_metadata( + unsharded_start_idx, unsharded_end_idx + ) + _p_assert( + len(shard_param_infos) == flat_param._num_params, + f"Expects length {flat_param._num_params} but got {len(shard_param_infos)}" + ) + flat_param._shard_param_infos = shard_param_infos # type: ignore[attr-defined] + flat_param._shard_numel_padded = numel_padded # type: ignore[attr-defined] + + def _get_shard_metadata( + self, + unsharded_start_idx: int, + unsharded_end_idx: int, + ) -> Tuple[_ShardParamInfo, ...]: + """ + Computes the shard metadata based on ``unsharded_start_idx`` and + ``unsharded_end_idx`` (inclusive), which give the interval of the + unsharded flat parameter specifying the shard. + """ + flat_param_offsets = self._get_flat_param_offsets() + _p_assert(len(flat_param_offsets) == len( + self.flat_param._numels_with_padding + ), f"Expected {len(self.flat_param._numels_with_padding)} but got {len(flat_param_offsets)}" + ) + shard_param_infos: List[_ShardParamInfo] = [] + sharded_flat_param_numel = unsharded_end_idx - unsharded_start_idx + 1 + # `unsharded_param_start_idx` and `unsharded_param_end_idx` are indices + # into the unsharded flat parameter (inclusive) of the given parameter + for i, ( + (unsharded_param_start_idx, unsharded_param_end_idx), + is_padding, + ) in enumerate(zip(flat_param_offsets, self.flat_param._is_padding_mask)): + if is_padding: + continue + in_sharded_flat_param = ( + unsharded_start_idx <= unsharded_param_end_idx + and unsharded_end_idx >= unsharded_param_start_idx + ) + if not in_sharded_flat_param: + shard_param_info = _ShardParamInfo(False, None, None, None, None, unsharded_param_start_idx, unsharded_param_end_idx) + else: + if unsharded_start_idx <= unsharded_param_start_idx: + # This branch can only happen once since the rank's + # unsharded start index can only intersect one parameter + intra_param_start_idx = 0 + offset_in_shard = unsharded_param_start_idx - unsharded_start_idx + else: + intra_param_start_idx = ( + unsharded_start_idx - unsharded_param_start_idx + ) + offset_in_shard = 0 + if not ( + offset_in_shard >= 0 and offset_in_shard < sharded_flat_param_numel + ): + raise ValueError( + f"Invalid `offset_in_shard` of {offset_in_shard} for " + f"sharded flat parameter with {sharded_flat_param_numel} numel" + ) + intra_param_end_idx = ( + min(unsharded_param_end_idx, unsharded_end_idx) + - unsharded_param_start_idx + ) + numel_in_shard = intra_param_end_idx - intra_param_start_idx + 1 + shard_param_info = _ShardParamInfo( + True, + offset_in_shard, + numel_in_shard, + intra_param_start_idx, + intra_param_end_idx, + unsharded_param_start_idx, + unsharded_param_end_idx, + ) + shard_param_infos.append(shard_param_info) + return tuple(shard_param_infos) + + @staticmethod + def _get_unpadded_shard( + tensor: Tensor, + rank: int, + world_size: int, + ) -> Tuple[Tensor, int]: + """ + Returns the shard of ``tensor`` without any padding for the given + ``rank`` and ``world_size`` and the numel to pad for that shard. + + If ``tensor`` is already flattened or may be viewed in the flattened + shape (which is true in the expected usage), then this method does not + allocate any new tensor memory. + """ + if rank >= world_size: + raise ValueError(f"Shard rank should be small than shard world size, got {rank} and {world_size}") + chunks = torch.flatten(tensor).chunk(world_size) + if len(chunks) < (rank + 1): + # This rank gets an empty chunk fully padded with zeros since there + # are not enough chunks across ranks + chunk = chunks[0].new_empty(0) + else: + chunk = chunks[rank] + numel_to_pad = chunks[0].numel() - chunk.numel() + return chunk, numel_to_pad + + @staticmethod + def _get_shard( + tensor: Tensor, + rank: int, + world_size: int, + ) -> Tuple[Tensor, int]: + """ + Returns the shard of ``tensor`` with padding for the given ``rank`` and + ``world_size`` and the numel padded for that shard. + + This method allocates new memory (via :meth:`clone`) since the + unsharded ``tensor`` may be deallocated after this method returns. + """ + chunk, numel_to_pad = FlatParamHandle._get_unpadded_shard( + tensor, rank, world_size + ) + shard = chunk.clone() + if numel_to_pad > 0: + shard = F.pad(shard, [0, numel_to_pad]) + return shard, numel_to_pad + + @staticmethod + def _get_shard_from_padded_unshard_tensor( + tensor: Tensor, + rank: int, + world_size: int, + ) -> Tuple[Tensor, int]: + """ + Returns the shard of ``tensor`` with padding for the given ``rank`` and + ``world_size`` and the numel padded for that shard. + + This method allocates new memory (via :meth:`clone`) since the + unsharded ``tensor`` may be deallocated after this method returns. + """ + chunk, numel_to_pad = FlatParamHandle._get_unpadded_shard( + tensor, rank, world_size + ) + shard = chunk.clone() + _p_assert(numel_to_pad == 0, f"The padded unshard flat param should be dividable with {world_size=}") + return shard + + def _get_flat_param_offsets(self) -> List[Tuple[int, int]]: + """ + Returns [start, end] offsets of each original parameter's flattened + data in the unsharded flat parameter (without padding). + NOTE: The returned list includes elements for alignment padding. + """ + cumulative_sum = list(accumulate(self.flat_param._numels_with_padding)) + starts = [0] + cumulative_sum[:-1] + ends = [end - 1 for end in cumulative_sum] # inclusive + param_offsets = list(zip(starts, ends)) + return param_offsets + + @no_type_check + @torch.no_grad() + def init_flat_param_attributes(self) -> None: + """ + This initializes some attributes on the handle's ``FlatParameter``. + This should be called during lazy initialization since it requires the + parameter to be on the compute device if not offloading to CPU and we + want to give users the chance to move the parameter appropriately after + the FSDP constructor. + + For each tensor attribute on the ``FlatParameter``, see the unshard and + reshard methods in this class for the allocation and free pattern. + """ + flat_param = self.flat_param + self._check_on_compute_device(self.flat_param) + # We maintain a padded unsharded tensor that serves as the + # all-gather destination and owns the original parameter storages. + padded_unsharded_numel = flat_param.numel() * self.zero1_world_size + flat_param._full_param_padded = torch.empty( + padded_unsharded_numel, + device=self.device, + dtype=self._fwd_bwd_param_dtype, + ) + flat_param._padded_unsharded_size = flat_param._full_param_padded.size() + _free_storage(flat_param._full_param_padded) + #! add support for grad saving + flat_param._full_grad_padded = torch.empty( + padded_unsharded_numel, + device=self.device, + dtype=self._fwd_bwd_param_dtype, + ) + _free_storage(flat_param._full_grad_padded) + #! grad accumulation support + flat_param._full_prec_grad_padded = torch.empty( + padded_unsharded_numel, + device=self.device, + dtype=self.full_prec_dtype, + ) + _free_storage(flat_param._full_prec_grad_padded) + if self._offload_grads: + cpu_device = torch.device("cpu") + flat_param._cpu_grad = torch.zeros( + padded_unsharded_numel, + device=cpu_device, + dtype=self.full_prec_dtype, + ).pin_memory(device=self.device) + ################### + # UNSHARD/RESHARD # + ################### + + def pre_unshard(self) -> bool: + """ + Returns: ``False`` if this is a no-op and ``True`` otherwise. + + Postcondition: ``self.flat_param`` 's data is on the device for + communication and is what should be all-gathered. + """ + if ( + self._training_state in [HandleTrainingState.SUMMON_FULL_PARAMS, HandleTrainingState.SYNC_PARAMS] + and self._skipped_use_sharded_views + ): + self._use_sharded_views() + self._check_on_compute_device(self.flat_param) + if self.needs_unshard(): + self._alloc_padded_unsharded_flat_tensor() + + def unshard(self): + padded_unsharded_flat_param = self._get_padded_unsharded_flat_tensor(param=True, free=False) + padded_unsharded_flat_param = self._all_gather_flat_param(padded_unsharded_flat_param) + self._use_unpadded_unsharded_flat_param(padded_unsharded_flat_param) + + def needs_unshard(self) -> bool: + """Returns if the handle's flat parameter needs to be unsharded.""" + padded_unsharded_flat_param = self._get_padded_unsharded_flat_tensor(free=False) + already_unsharded = ( + padded_unsharded_flat_param._typed_storage()._size() + == padded_unsharded_flat_param.numel() + ) + return not already_unsharded + + def _alloc_padded_unsharded_flat_tensor(self, param: bool = True): + flat_param = self.flat_param + unsharded_flat_tensor = self._get_padded_unsharded_flat_tensor(param) + self._check_storage_freed(unsharded_flat_tensor) + _alloc_storage(unsharded_flat_tensor, + flat_param._padded_unsharded_size) + return unsharded_flat_tensor + + def _get_padded_unsharded_flat_tensor(self, param: bool = True, free: bool = True) -> torch.Tensor: + """ + Returns a reference to the padded unsharded flat parameter depending on + the calling context. This should only be called if using a sharded + strategy. + """ + flat_param = self.flat_param + if param: + padded_unsharded_flat_tensor = flat_param._full_param_padded + dtype = self._fwd_bwd_param_dtype + else: + padded_unsharded_flat_tensor = flat_param._full_grad_padded + dtype = self._fwd_bwd_param_dtype + _p_assert( + padded_unsharded_flat_tensor.dtype == dtype, + f"Expects same precision but got {padded_unsharded_flat_tensor.dtype} vs {dtype}", + ) + + if free and padded_unsharded_flat_tensor.untyped_storage().size() > 0: + _free_storage(padded_unsharded_flat_tensor) + return padded_unsharded_flat_tensor + + def _all_gather_flat_param( + self, + padded_unsharded_flat_param: Tensor, + ) -> Tensor: + """ + All-gathers the handle's flat parameter to the destination + ``padded_unsharded_flat_param``, and switches to using the all-gathered + tensor. + """ + _p_assert( + hasattr(self, "zero3_process_group") and hasattr(self, "zero3_group_size"), + "Expects a process group and world size to have been set via `shard()`", + ) + #! cast zero1 param to zero3 param + #! be careful of recompute + if self._needs_param_sync and not self._param_synced: + sharded_flat_param = self.flat_param._zero1_shard.to(self._fwd_bwd_param_dtype) + expected_numel = sharded_flat_param.numel() * self.zero1_world_size + process_group = self.zero1_process_group + source = "zero1 shard" + else: + sharded_flat_param = self.flat_param._zero3_shard.to(self._fwd_bwd_param_dtype) + expected_numel = sharded_flat_param.numel() * self.zero3_group_size + process_group = self.zero3_process_group + source = "zero3 shard" + + _p_assert( + padded_unsharded_flat_param.numel() == expected_numel, + f"Expects {expected_numel} numel but got {padded_unsharded_flat_param.numel()}") + log0(f"All gather into full parameter from {source} with {process_group.size()=}") + dist.all_gather_into_tensor( + padded_unsharded_flat_param, + sharded_flat_param, + process_group, + ) + return padded_unsharded_flat_param + + def _use_unpadded_unsharded_flat_param( + self, + padded_unsharded_flat_param: torch.Tensor, + ) -> None: + """ + Switches to using the *unpadded* unsharded flat parameter, which is a + view into the *padded* unsharded flat parameter. + """ + unsharded_size = self.flat_param._unpadded_unsharded_size + self.flat_param.data = padded_unsharded_flat_param[:unsharded_size.numel()].view(unsharded_size) + # this `.view()` is not autograd visible + in_forward = self._training_state == HandleTrainingState.FORWARD + in_pre_backward = self._training_state == HandleTrainingState.BACKWARD_PRE + if in_forward or in_pre_backward: + self._use_unsharded_views(as_params=False) + else: + self._use_unsharded_views(as_params=True) + + def _use_unpadded_unsharded_flat_grad( + self, + padded_unsharded_flat_grad: torch.Tensor, + ) -> None: + """ + Switches to using the *unpadded* unsharded flat parameter, which is a + view into the *padded* unsharded flat parameter. + """ + unsharded_size = self.flat_param._unpadded_unsharded_size + self.flat_param.grad.data = padded_unsharded_flat_grad[:unsharded_size.numel()].view(unsharded_size) + self._use_unsharded_grad_views() + + def post_unshard(self): + """ + Runs the post-unshard logic. This includes freeing the low precision + shard if needed. + """ + self._check_on_compute_device(self.flat_param) + + @torch.no_grad() + def unshard_grad(self): + """ + Unshard the handle's ``FlatParameter``'s gradient. + + If all ranks have + ``None`` gradient, then all original parameters will as well. This + method performs an all-reduce and an all-gather. The additional + all-reduce is tolerable since this method is not meant to be used on + the computation critical path. + + Postcondition: ``_saved_grad_shard`` is defined and contains the value + to set ``flat_param.grad`` after gradients are resharded. + """ + flat_param = self.flat_param + self._check_unsharded(flat_param) + + # Check if all ranks have a `None` gradient + num_grad_none = torch.zeros(1, dtype=torch.int32, device=self.device) + num_grad_none[0] = flat_param.grad is None + dist.all_reduce(num_grad_none, group=self.zero1_process_group) + if num_grad_none[0] == self.zero1_world_size: + flat_param._saved_grad_shard = None # type: ignore[assignment] + self._use_unsharded_grad_views() + return + if flat_param.grad is None: + # In the case that only some ranks have `None` gradient, we use + # zeros to approximate as a best effort attempt + if self._debug_level == dist.DebugLevel.INFO: + warnings.warn( + f"[Rank {self.rank}] Only some but not all ranks have a " + "`None` `FlatParameter` gradient, so FSDP is using zeros to " + "approximate those ranks' sharded gradients being `None`" + ) + flat_param._saved_grad = None # type: ignore[assignment] + sharded_grad = torch.zeros(flat_param._sharded_size, device=self.device, dtype=self._fwd_bwd_param_dtype) # type: ignore[attr-defined] + #如果该rank上有梯度,保存在flat_param._saved_grad中 + else: + self._check_sharded(flat_param.grad) + # flat_param._saved_grad = flat_param.grad # type: ignore[attr-defined] + sharded_grad = flat_param.grad.to(self._fwd_bwd_param_dtype) # type: ignore[attr-defined] + # 分配内存,全聚合 + padded_unsharded_grad = torch.zeros( + flat_param._padded_unsharded_size, # type: ignore[attr-defined] + device=self.device, + dtype=self._fwd_bwd_param_dtype, + ) + dist.all_gather_into_tensor( + padded_unsharded_grad, sharded_grad, self.zero1_process_group + ) + # 使用非分片的梯度视图 + unsharded_size = self.flat_param._unpadded_unsharded_size + flat_param.grad = padded_unsharded_grad[: unsharded_size.numel()].view( + unsharded_size + ) + self._use_unsharded_grad_views() + + def reshard_grad(self): + self.flat_param.grad = self.flat_param._saved_grad # type: ignore[attr-defined] + self._use_sharded_grad_views() + delattr(self.flat_param, "_saved_grad") + + def offload_grad(self): + if not self._offload_grads: + warnings.warn(f"Call offload grad when offload grads is False") + return + cpu_tensor = self.flat_param._cpu_grad + gpu_tensor = self.flat_param._full_prec_grad_padded + self._check_on_cpu(cpu_tensor) + self._check_on_compute_device(gpu_tensor) + self._check_padded_unsharded(gpu_tensor) + cpu_tensor.untyped_storage().copy_(gpu_tensor.untyped_storage(), non_blocking=True) + + def alloc_full_prec_grad(self): + if not self.already_load_full_prec_grad(): + flat_param = self.flat_param + full_prec_grad = flat_param._full_prec_grad_padded + self._check_storage_freed(full_prec_grad) + _alloc_storage(full_prec_grad, flat_param._padded_unsharded_size) + full_prec_grad.zero_() + return + + def reload_full_prec_grad(self): + if not self._offload_grads: + return + with torch.no_grad(): + gpu_tensor = self.flat_param._full_prec_grad_padded + self._check_padded_unsharded(gpu_tensor) + self._check_on_compute_device(gpu_tensor) + cpu_tensor = self.flat_param._cpu_grad + self._check_on_cpu(cpu_tensor) + gpu_tensor.untyped_storage().copy_(cpu_tensor.untyped_storage(), non_blocking=True) + + def already_load_full_prec_grad(self): + gpu_tensor = self.flat_param._full_prec_grad_padded + return gpu_tensor.device == self.device and gpu_tensor.untyped_storage().size() > 0 + + def free_full_prec_grad(self): + full_prec_grad = self.flat_param._full_prec_grad_padded + self._check_on_compute_device(full_prec_grad) + _free_storage(full_prec_grad) + + def accumulate_grad(self): + ''' + Precondition: + runtime_grad: _full_grad_padded finished grad compute + + Postcondition: + grad is accumulated to full_prec_grad + ''' + full_prec_grad = self.flat_param._full_prec_grad_padded + runtime_grad = self.flat_param._full_grad_padded + self._check_padded_unsharded(full_prec_grad) + self._check_padded_unsharded(runtime_grad) + self._check_on_compute_device(full_prec_grad) + self._check_on_compute_device(runtime_grad) + full_prec_grad.add_(runtime_grad) + return + + def prepare_gradient_for_backward(self): + """ + Prepares the gradient for the backward computation by saving and + clearing any existing sharded gradient in ``.grad`` to enable computing + a new unsharded gradient. + + #! optimize this logic: + 1. if grad is not freed, Then last iter must not synced grad, then we use use_unshard_grad_view to accumulate grad + + 2. if grad is freed, Then last iter must synced grad. alloc memeory for grad. + 2.1 alloc memory for grad computation + 2.2 set grad views + + PostCondition: + flat_param.grad is the padded_unshard_grad + return the views of grad in correct position + """ + + _p_assert( + self._training_state + in (HandleTrainingState.BACKWARD_PRE, HandleTrainingState.IDLE), + "Expects to be in `BACKWARD_PRE` or `IDLE` (if prefetching)", + ) + + flat_param = self.flat_param + if not flat_param.requires_grad: + return + _p_assert(flat_param._full_grad_padded is not None, f"{self} got a None _full_grad_padded tensor for unshard flat parameters...") + self._check_on_compute_device(flat_param) + self._check_unsharded(flat_param.data) + #! 1. alloc memory if needed + padded_unsharded_flat_grad = flat_param._full_grad_padded + if self._is_storage_freed(padded_unsharded_flat_grad): + #! alloc memory + self._alloc_padded_unsharded_flat_tensor(param=False) + padded_unsharded_flat_grad.zero_() + else: + self._check_padded_unsharded(padded_unsharded_flat_grad) + #! 2. point grad to the reference tensor set proper view and grad view + flat_param.grad = flat_param._full_grad_padded + self._use_unpadded_unsharded_flat_grad(padded_unsharded_flat_grad) + + def set_shard_grad(self, shard_grad): + flat_param = self.flat_param + _p_assert(not self._grad_synced, "A parameter should only sync its grad only once during one grad sync cycle") + flat_param._saved_grad = shard_grad + self._grad_synced = True + + def free_runtime_unshard_grad(self): + self._free_unsharded_flat_tensor(param=False) + + def prepare_gradient_for_zero1(self): + """ + Prepares the gradient for optimizer computation by moving the sharded + gradient to the ``.grad`` attribute for the convienience of later reduce op + Precondition : saved_grad is the sharded grad + + Postcondition: storage of saved_grad is freed + + Post Condition: + ``.grad`` contains only the ``shard grad`` : Note : unshard grad storage free is done after zero1 grad sync + the full unsharded grad storage is freed + """ + self._use_sharded_views() + self._use_sharded_grad_views() + del self.flat_param._saved_grad + + def _get_reduce_scatter_tensors(self): + tensor = self.flat_param._full_prec_grad_padded + _p_assert(tensor.dtype == self.full_prec_dtype, "full_prec grad is not full prec.") + self._check_padded_unsharded(tensor) + self._check_on_compute_device(tensor) + chunks = tensor.chunk(self.zero1_world_size) + new_tensor = torch.empty_like(chunks[0]) + return tensor, new_tensor + + def _get_reduce_scatter_group(self): + return self.zero1_process_group + + def reshard(self, free_unsharded_flat_param: bool): + """ + Runs the reshard logic. This includes freeing the unsharded flat + parameter if ``free_unsharded_flat_param`` and switching to using the + sharded flat parameter. + """ + if self._needs_param_sync and not self._param_synced: + zero3_shard = FlatParamHandle._get_shard_from_padded_unshard_tensor(self.flat_param.data, self.zero3_group_rank, self.zero3_group_size) + self.flat_param._zero3_shard = zero3_shard + self._param_synced = True + + if free_unsharded_flat_param: + self._use_sharded_flat_param() + self._free_unsharded_flat_tensor() + + + def post_reshard(self): + """ + Runs the post-reshard logic. + Precondition: ``self.flat_param`` 's data points to the full precision + sharded flat parameter. + """ + pass + + def _free_unsharded_flat_tensor(self, param: bool = True): + """ + Frees the padded unsharded flat parameter. The tensor to free depends + on the calling context since the unshard may have forced full + precision, in which case a different tensor is used. + """ + msg = "Parameter" if param else "Gradient" + log0(f"Freeing {msg} memory on handle {self}, {self._pre_forward_order_index=} {self._post_forward_index=}") + + unsharded_flat_tensor = self._get_padded_unsharded_flat_tensor(param) + self._check_on_compute_device(unsharded_flat_tensor) + # Do not free the memory until all ops in the current stream finish + _no_dispatch_record_stream( + unsharded_flat_tensor, self._device_handle.current_stream() + ) + _free_storage(unsharded_flat_tensor) + + def _use_sharded_flat_param(self) -> None: + """Switches to using the sharded flat parameter.""" + flat_param = self.flat_param + flat_param.data = flat_param._zero1_shard # type: ignore[attr-defined] + self._use_sharded_views() + ######### + # VIEWS # + ######### + + @no_type_check + def _get_unflat_views_unaligned( + self, + tensor: Optional[torch.Tensor] = None, + ) -> Iterator[Tensor]: + """ + Returns unflattened ``Tensor`` views into ``tensor`` if it is not + ``None`` or ``flat_param`` otherwise, where the unflattening is based + on ``flat_param`` 's metadata. + + Examples for ``tensor`` include ``flat_param.grad`` or unsharded + tensor optimizer state. + """ + flat_param = self.flat_param + if tensor is None: + tensor = flat_param + + views = ( + subtensor.view(shape) + for (subtensor, shape) in zip( + torch.split(tensor, flat_param._numels, dim=0), + flat_param._shapes, + ) + ) + return views + + @no_type_check + def _get_unflat_views_aligned( + self, + tensor: Optional[Tensor] = None, + ) -> List[Tensor]: + """ + This has the same contract as :meth:`_get_unflat_views_unaligned` + except it checks for ``None`` placeholders representing padding for + alignment, which may incur slightly more CPU overhead. + """ + flat_param = self.flat_param + if tensor is None: + tensor = flat_param + splits: List[Tensor] = torch.split( + tensor, flat_param._numels_with_padding, dim=0 + ) + idx = 0 + views: List[Tensor] = [] + for split, is_padding in zip(splits, flat_param._is_padding_mask): + if is_padding: + continue + views.append( + split.view(flat_param._shapes[idx]) + ) + idx += 1 + return views + + @no_type_check + @torch.enable_grad() + def _use_unsharded_views(self, as_params: bool) -> None: + """ + Unflattens the unsharded flat parameter by setting the original + parameter variables to be views into it. + + unsharded unpadded and restore original parameter views + + Args: + as_params (bool): If ``True``, then registers the original + parameters as ``nn.Parameter`` s; if ``False``, then registers + the original parameters only as ``Tensor`` s. ``False`` should + be used during forward/backward computation and when hiding the + original parameters from :meth:`nn.Module.named_parameters`. + """ + log0(f"Change to unsharded Parameter View on {self._pre_forward_order_index=} {self._post_forward_index=}") + + flat_param = self.flat_param + self._check_unsharded(flat_param) + views = self._get_unflat_views() + + for i, (view, (param_name, module, _)) in enumerate( + zip(views, flat_param._param_infos) + ): + if as_params: + param = self.flat_param._params[i] + self._setattr_param(module, param_name, param) + param.data = view + else: # `as_params=False` + param_var: Tensor = view + if self.flat_param._tensors[i] is None: + # Save the `Tensor` for the pre-backward + self.flat_param._tensors[i] = view # save for pre-backward + else: + # Use the saved `Tensor` variable from the forward to + # preserve the autograd graph so that the post-backward + # hook fires (e.g. for reentrant AC) + tensor = self.flat_param._tensors[i] + tensor.data = view + param_var = tensor + self._setattr_tensor(module, param_name, param_var) + if self._training_state == HandleTrainingState.FORWARD: + module._parameters[param_name] = param_var + for i, ( + param_name, + module, + _, + prim_param_name, + prim_module, + _, + ) in enumerate(self.flat_param._shared_param_infos): + prim_param: Union[Tensor, nn.Parameter] = getattr( + prim_module, prim_param_name + ) + _p_assert( + not as_params or isinstance(prim_param, nn.Parameter), + f"as_params={as_params} type(prim_param)={type(prim_param)}", + ) + if as_params: + shared_param = self.flat_param._shared_params[i] + self._setattr_param(module, param_name, shared_param) + shared_param.data = prim_param + else: + self._setattr_tensor(module, param_name, prim_param) + if self._training_state == HandleTrainingState.FORWARD: + module._parameters[param_name] = prim_param + + @no_type_check + def _use_unsharded_grad_views(self) -> None: + """ + Unflattens the unsharded flat parameter's gradient by setting the + original parameter variables' gradients to be views into it. + + From the unpadded unshard grad to set parameter grad views at corresponing position relative to param + SO basically this is a similiar function to use_unsharded_param_views + """ + log0(f"Change to unsharded Gradient View on {self._pre_forward_order_index=} {self._post_forward_index=}") + + if self.flat_param.grad is None: + for param in chain(self.flat_param._params, self.flat_param._shared_params): + param.grad = None + return + # Expects the gradient to be in `flat_param.grad` + self._check_unsharded(self.flat_param.grad) + + views = self._get_unflat_views(self.flat_param.grad) + for i, (view, (param_name, module, _)) in enumerate( + zip(views, self.flat_param._param_infos) + ): + _p_assert( + hasattr(module, param_name), + f"{self.flat_param._fqns[i]} is missing", + ) + param = getattr(module, param_name) + if ( + param.shape != view.shape + or param.dtype != view.dtype + or param.device != view.device + ): + # NOTE: This is a hack using `.data` to side step the check + # that parameter/gradient sizes/dtypes/devices match. From + # calling `reshard()`, `param` has the sharded size, has the + # full precision dtype, and if CPU offloading is enabled, is on + # CPU. Thus, one or more of the following cases can hold when + # in `no_sync()`, where `view` is the original parameter's + # gradient: + # 1. `view` can have the unsharded size. + # 2. `view` can have the parameter low precision dtype. + # 3. `view` can be on GPU. + if param.grad is None: + param.grad = torch.empty_like(param) + param.grad.data = view + else: + param.grad = view + for i, ( + param_name, + module, + module_name, + prim_param_name, + prim_module, + _, + ) in enumerate(self.flat_param._shared_param_infos): + _p_assert( + hasattr(module, param_name), + f"{module_name + '.' + param_name if module_name else param_name} is missing", + ) # did not save FQN info in `_shared_param_infos` + param = getattr(module, param_name) + prim_param = getattr(prim_module, prim_param_name) + if ( + param.shape != prim_param.grad.shape + or param.dtype != prim_param.grad.dtype + or param.device != prim_param.grad.device + ): + # NOTE: This is the same hack to use `.data` to side step the + # size check. + if param.grad is None: + param.grad = torch.empty_like(param) + param.grad.data = prim_param.grad + else: + param.grad = prim_param.grad + + @contextlib.contextmanager + def unflatten_as_params(self) -> Generator: + """ + Assumes the flat parameter is unsharded. When in the context, + unflattens the original parameters as ``nn.Parameter`` views into the + flat parameter, and after the context, restores the original parameters + as ``Tensor`` views into the flat parameter. + """ + self._use_unsharded_views(as_params=True) + try: + yield + finally: + self._use_unsharded_views(as_params=False) + + @no_type_check + @torch.no_grad() + def _use_sharded_views(self) -> None: + """ + Sets the original parameter variables' data to be flattened views into + the sharded flat parameter. + + The views are kept as flattened to simplify the case where a parameter + is sharded across ranks. Parameters whose data is not present in the + sharded flat parameter have their data set to a size-0 empty tensor. We + do not delete them to ensure to preserve expected behaviors like model + printability. Parameters whose data is present must preserve their + variables to be passable to an optimizer. + """ + log0(f"Change to sharded Parameter View on {self._pre_forward_order_index=} {self._post_forward_index=}") + self._unsharded_flat_param_for_skipped_views = None + flat_param = self.flat_param + self._check_sharded(flat_param) + # Construct once and reuse for all parameters not in the local shard + size_0_empty_tensor = torch.empty( + 0, + dtype=self.flat_param.dtype, # in case `flat_param` changed dtype + device=self.flat_param.device, + requires_grad=False, + ) + for param, shard_param_info, (param_name, module, _) in zip( + flat_param._params, + flat_param._shard_param_infos, + flat_param._param_infos + ): + self._setattr_param(module, param_name, param) + if not shard_param_info.in_shard: + # Allow the original data to be freed via garbage collection + param.data = size_0_empty_tensor + else: + offset = shard_param_info.offset_in_shard + numel_in_shard = shard_param_info.numel_in_shard + param.data = flat_param[offset : offset + numel_in_shard] + for i, ( + param, + (param_name, module, _, prim_param_name, prim_module, _), + ) in enumerate( + zip(self.flat_param._shared_params, self.flat_param._shared_param_infos) + ): + self._setattr_param(module, param_name, param) + prim_param = getattr(prim_module, prim_param_name) + param.data = prim_param # could be both empty and non-empty + if self._training_state == HandleTrainingState.BACKWARD_POST: + # Clear the saved `Tensor`s since they are unneeded now + for i in range(len(self.flat_param._tensors)): + self.flat_param._tensors[i] = None + + @no_type_check + @torch.no_grad() + def _use_sharded_grad_views(self) -> None: + """ + Set the original parameter variables' gradients to be flattened views into the sharded flat parameter's gradient. + + This is a no-op if there is no gradient. + + Parameters whose data is not present in the sharded flat parameter and + parameters with ``requires_grad=False`` have their gradients set to + ``None``. Since the gradient variables do not need to be preserved, + this method does not manipulate existing ``Tensor`` data directly and + creates new ``Tensor`` variables instead. + """ + log0(f"Change to sharded Gradient View on {self._pre_forward_order_index=} {self._post_forward_index=}") + + flat_param = self.flat_param + self._check_sharded(flat_param) + grad = self.sharded_grad + if grad is None: + for param in chain(flat_param._params, flat_param._shared_params): + param.grad = None + return + self._check_sharded(grad) + for param, shard_param_info, is_grad_none in zip( + flat_param._params, + flat_param._shard_param_infos, + flat_param._is_grad_none_mask, + ): + if not shard_param_info.in_shard: + param.grad = None + else: + numel_in_shard = shard_param_info.numel_in_shard + + if param.requires_grad and not is_grad_none: + offset = shard_param_info.offset_in_shard + if param.dtype != grad.dtype: + if param.grad is None: + # `.grad` must have the same shape as `param` + param.grad = torch.empty_like(param) + param.grad.data = grad[ + offset : offset + numel_in_shard + ] + else: + param.grad = grad[ + offset : offset + numel_in_shard + ] + + else: + param.grad = None + + for i, (param, (_, _, _, prim_param_name, prim_module, _)) in enumerate( + zip(flat_param._shared_params, flat_param._shared_param_infos) + ): + in_sharded_flat_param = hasattr(prim_module, prim_param_name) + if in_sharded_flat_param and param.requires_grad: + prim_param = getattr(prim_module, prim_param_name) + param.grad = prim_param.grad + else: + param.grad = None + + def _reset_flat_param_grad_info_if_needed(self): + """ + + (1) sets the underlying ``flat_param.grad`` to ``None`` if *all* of the + original parameters' ``.grad`` are ``None``, and + (2) sets ``flat_param.requires_grad=False`` if *none* of the original + parameters require gradient. + For (1), this is targeting ``optim.zero_grad(set_to_none=True)``, in + which case we want to free the gradients as soon after the + ``zero_grad()`` call as possible. + """ + flat_param = self.flat_param + all_grad_none = True + requires_grad = False + for param in flat_param._params: + all_grad_none &= param.grad is None + requires_grad |= param.requires_grad + if all_grad_none: + flat_param.grad = None + # As long as one parameter requires gradient, then the flat parameter + # must require gradient + flat_param.requires_grad = requires_grad + + def _deregister_orig_params(self): + for param_info in self.flat_param._param_infos: + param_name, module, _ = param_info + if hasattr(module, param_name): + delattr(module, param_name) + for param_name, module, _, _, _, _ in self.flat_param._shared_param_infos: + if hasattr(module, param_name): + delattr(module, param_name) + + ########### + # HELPERS # + ########### + def _get_modules(self) -> Set[nn.Module]: + """ + Returns a :class:`set` of the modules whose parameters are included + in this handle's flat parameter. + """ + return {pi.module for pi in self.flat_param._param_infos}.union( + {spi.module for spi in self.flat_param._shared_param_infos} + ) + + def is_sharded(self, tensor: Tensor) -> bool: + """ + Returns if ``tensor`` is *currently* sharded. For ``NO_SHARD``, we + choose to have this always return ``False`` for clarity. + """ + if ( + not hasattr(self.flat_param, "_sharded_size") + ): + # `_sharded_size` is defined iff `handle.shard()` has been called + return False + sharded_size = self.flat_param._sharded_size # type: ignore[attr-defined] + return tensor.size() == sharded_size + + def param_module_names(self) -> Iterator[Tuple[str, str]]: + shared_param_infos = [ + ParamInfo(param_name, module, module_name) + for ( + param_name, + module, + module_name, + _, + _, + _, + ) in self.flat_param._shared_param_infos + ] + for param_info in chain(self.flat_param._param_infos, shared_param_infos): + param_name, _, module_name = param_info + yield (param_name, module_name) + + def shared_param_module_names(self) -> Iterator[Tuple[str, str]]: + for param_name, _, module_name in [ + ParamInfo(param_name, module, module_name) + for ( + param_name, + module, + module_name, + _, + _, + _, + ) in self.flat_param._shared_param_infos + ]: + yield (param_name, module_name) + + @property + def _fqns_in_shard(self) -> List[str]: + """Returns the FQNs of the parameters present in this rank's shard.""" + fqns_in_shard: List[str] = [] + for fqn, shard_param_info in zip( + self.flat_param._fqns, self.flat_param._shard_param_infos + ): + if shard_param_info.in_shard: + fqns_in_shard.append(fqn) + return fqns_in_shard + + @property + def sharded_grad(self) -> Optional[Tensor]: + """Returns the handle's sharded gradient.""" + flat_param = self.flat_param + grad: Optional[Tensor] + + if hasattr(flat_param, "_saved_grad"): + # In the post-backward hook, the sharded gradient is still in + # `_saved_grad_shard`. + grad = flat_param._saved_grad.to(self.full_prec_dtype) + else: + # If in IDLE or in FORWARD states, then there may be an + # (accumulated) gradient. If accessed in IDLE, then this should + # be due to re-registering the original parameters (e.g. in state + # dict load). + _p_assert( + flat_param.grad is None + or self._training_state + in (HandleTrainingState.FORWARD, HandleTrainingState.IDLE), + "Sharded strategies should use `_cpu_grad` or `_saved_grad_shard` " + "unless in IDLE or FORWARD", + ) + grad = None + return grad + + ####################### + # CHECKS & INVARIANTS # + ####################### + def _check_on_compute_device(self, tensor: Tensor): + _p_assert( + tensor.device == self.device, + f"Expects tensor to be on the compute device {self.device}", + ) + + def _check_on_cpu(self, tensor: Tensor): + _p_assert( + tensor.device == torch.device("cpu"), + f"Expects tensor to be on CPU but got {tensor.device}", + ) + + @staticmethod + def _check_storage_freed(tensor: Tensor): + storage_size: int = tensor._typed_storage()._size() + _p_assert( + storage_size == 0, + f"Expects storage to be freed but got storage with size {storage_size}", + ) + + @staticmethod + def _is_storage_freed(tensor: Tensor) -> bool: + return tensor is not None and tensor._typed_storage()._size() == 0 + + @staticmethod + def _check_storage_allocated(tensor: Tensor): + storage_size: int = tensor._typed_storage()._size() + _p_assert(storage_size > 0, "Expects storage to be allocated") + + def _check_unsharded(self, tensor: Tensor): + msg_prefix = "Expects tensor to be unsharded " + _p_assert(tensor is not None, msg_prefix + "but got `None`") + unsharded_size = self.flat_param._unpadded_unsharded_size + _p_assert( + tensor.size() == unsharded_size, + msg_prefix + f"with size {unsharded_size} but got {tensor.size()} with storage {tensor.untyped_storage().size()}", + ) + + def _check_padded_unsharded(self, tensor: Tensor): + msg_prefix = "Expects tensor to be unsharded and padded" + _p_assert(tensor is not None, msg_prefix + "but got `None`") + unsharded_size = self.flat_param._padded_unsharded_size + _p_assert( + tensor.size() == unsharded_size, + msg_prefix + f"with size {unsharded_size} but got {tensor.size()} with storage {tensor.untyped_storage().size()}", + ) + + def _check_sharded(self, tensor: Tensor): + msg_prefix = "Expects tensor to be sharded " + _p_assert(tensor is not None, msg_prefix + "but got `None`") + sharded_size = self.flat_param._sharded_size # type: ignore[attr-defined] + _p_assert( + tensor.size() == sharded_size, + msg_prefix + f"with size {sharded_size} but got {tensor.size()} with storage {tensor.untyped_storage().size()}", + ) + + ############## + # PROPERTIES # + ############## + + @property + def _skipped_use_sharded_views(self) -> bool: + return self._unsharded_flat_param_for_skipped_views is not None + #================== debug ========================= + + def _named_module_parameters(self): + #! 获取模型的parameter, 动态重建的参数 + for i, (param_name, module, module_name) in enumerate( + self.flat_param._param_infos + ): + _p_assert( + hasattr(module, param_name), + f"{self.flat_param._fqns[i]} is missing", + ) + param = getattr(module, param_name) + yield f"{module_name}.{param_name}", param + + def _get_orig_param_by_name(self, total_name): + flat_param = self.flat_param + for param, (param_name, _, module_name) in zip( + flat_param._params, flat_param._param_infos + ): + if total_name == f"{module_name}.{param_name}": + return param + return None + + def _get_module_param_by_name(self, total_name): + flat_param = self.flat_param + for param_name, module, module_name in flat_param._param_infos: + if total_name == f"{module_name}{param_name}": + return getattr(module, param_name) + return None + + def __param_list(self): + self._use_unsharded_grad_views() + for param in self.flat_param._params: + yield param + yield param + + yield param + + + def _shard_grad_list(self): + for param in self.flat_param._params: + yield param.grad + + +def _unsafe_setattr_param( + module: nn.Module, param_name: str, param: nn.Parameter +) -> None: + module._parameters[param_name] = param + # This bypasses any overrides in case `module` is an instance of an + # `nn.Module` subclass + super(nn.Module, module).__setattr__(param_name, param) + + +def _unsafe_setattr_tensor(module: nn.Module, param_name: str, tensor: Tensor) -> None: + module._parameters.pop(param_name, None) + # This bypasses any overrides in case `module` is an instance of an + # `nn.Module` subclass + super(nn.Module, module).__setattr__(param_name, tensor) + + +def _safe_setattr_tensor_or_param( + module: nn.Module, param_name: str, tensor_or_param: Union[Tensor, nn.Parameter] +): + # Call `delattr()` and `setattr()` to go through `nn.Module` checks + if hasattr(module, param_name): + delattr(module, param_name) + setattr(module, param_name, tensor_or_param) + + +def _convert_to_params( + tensors: List[Union[torch.Tensor, nn.Parameter]] +) -> List[nn.Parameter]: + return [t if isinstance(t, nn.Parameter) else nn.Parameter(t, requires_grad=t.requires_grad) for t in tensors] + + +def _detach_if_needed(param_or_tensor: Union[nn.Parameter, Tensor]) -> Tensor: + return ( + param_or_tensor.detach() + if isinstance(param_or_tensor, nn.Parameter) + else param_or_tensor + ) + + +def _get_aligned_numel(unsharded_dtype: torch.dtype): + # NOTE: This alignment constraint comes from TorchInductor. + ALIGNMENT = 16 # bytes + unsharded_dtype_size = _get_dtype_size(unsharded_dtype) + aligned_numel = ALIGNMENT // unsharded_dtype_size + return aligned_numel + + +@functools.lru_cache(8) +def _get_dtype_size(dtype): + return torch.empty((), dtype=dtype).element_size() + + +def _construct_padding_tensor( + padding_numel: int, dtype: torch.dtype, requires_grad: bool, device: torch.device +): + # NOTE: Set the padding value as a magic number for debuggability. The + # value itself should never be used in any user-facing computation. + return ( + # torch.ones( + torch.zeros( + (padding_numel,), dtype=dtype, requires_grad=requires_grad, device=device + ) + ) + + +def log0(msg): + if dist.get_rank() == 0: + logger.info(msg) + \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/fsdp.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/fsdp.py new file mode 100644 index 000000000..94e8dd9b7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/fsdp.py @@ -0,0 +1,364 @@ +import traceback +from contextlib import contextmanager +from typing import ( + Any, + Callable, + Generator, + Iterable, + Iterator, + List, + Optional, + Tuple, + Union, +) + +import torch +import torch.nn as nn +from torch.distributed.algorithms._checkpoint.checkpoint_wrapper import ( + _CHECKPOINT_WRAPPED_MODULE, + ActivationWrapper, +) +from torch.distributed.utils import _p_assert +from megatron.core import mpu +import mindspeed.core.distributed.layerzero.zero3._traversal_utils as traversal_utils +from mindspeed.core.distributed.layerzero.zero3._common_utils import ( + _ZeRO3State, + ZERO3_PREFIX, + ZERO3_WRAPPED_MODULE, + TrainingState, +) +from mindspeed.core.distributed.layerzero.zero3._init_utils import ( + _init_buffer_state, + _init_core_state, + _init_device_handle, + _init_ignored_module_states, + _init_param_handle_from_module, + _init_prefetching_state, + _init_process_group_state, + _init_runtime_state, + ProcessGroupType, +) + +from mindspeed.core.distributed.layerzero.zero3._wrap_utils import _auto_wrap +from mindspeed.core.distributed.layerzero.zero3.api import ( + BackwardPrefetch, + BackwardReduceScatter, + MixedPrecision, +) +from mindspeed.core.distributed.layerzero.zero3.flat_param import FlatParameter, FlatParamHandle +from mindspeed.core.distributed.layerzero.zero3.wrap import ModuleWrapPolicy +from mindspeed.core.distributed.layerzero.runtime._forward import ( + _post_forward, + _post_forward_reshard, + _pre_forward, + _pre_forward_backward_unshard, +) +from mindspeed.core.distributed.layerzero.runtime._root_forward import _zero3_root_pre_forward +from mindspeed.core.distributed.layerzero.runtime._utils import ( + _get_zero3_root_states, + _is_zero3_root, + _cast_forward_outputs, +) +from mindspeed.core.distributed.layerzero.runtime._initialize import _lazy_init +from mindspeed.core.distributed.layerzero import constants + + +__all__ = [ + "LayerZeRO3", +] +FLAT_PARAM = "_flat_param" + + +class LayerZeRO3(nn.Module, _ZeRO3State): + + def __init__( + self, + module: nn.Module, + process_group: ProcessGroupType = None, + tp_zero_process_group: ProcessGroupType = None, + auto_wrap_policy: Optional[Union[Callable, ModuleWrapPolicy]] = None, + backward_prefetch: Optional[BackwardPrefetch] = BackwardPrefetch.BACKWARD_PRE, + backward_reduce_scatter: Optional[BackwardReduceScatter] = BackwardReduceScatter.BACKWARD_PRE, + mixed_precision: Optional[MixedPrecision] = None, + offload_grads: bool = False, + ignored_modules: Optional[Iterable[torch.nn.Module]] = None, + param_init_fn: Optional[Callable[[nn.Module], None]] = None, + device_id: Optional[Union[int, torch.device]] = None, + forward_prefetch: bool = True, + limit_all_gathers: bool = True, + ignored_states: Union[ + Optional[Iterable[torch.nn.Parameter] + ], Optional[Iterable[torch.nn.Module]] + ] = None, + ): + torch._C._log_api_usage_once("layerzero") + super().__init__() + + _init_ignored_module_states( + self, module, ignored_modules, ignored_states) + _init_device_handle(self, module, self._ignored_params, device_id) + _init_process_group_state(self, process_group) + + if auto_wrap_policy is not None: + root_kwargs = { + "process_group": (self.zero3_process_group, self.zero1_process_group), + "tp_zero_process_group": tp_zero_process_group, + "backward_prefetch": backward_prefetch, + "backward_reduce_scatter": backward_reduce_scatter, + "mixed_precision": mixed_precision, + "offload_grads": offload_grads, + "param_init_fn": param_init_fn, + "device_id": device_id, + "forward_prefetch": forward_prefetch, + "limit_all_gathers": limit_all_gathers, + "ignored_states": self._ignored_params, + } + _auto_wrap( + module, + auto_wrap_policy, + self._ignored_modules, + self._ignored_params, + root_kwargs, + LayerZeRO3, + ) + + backward_prefetch_limit = 1 + forward_prefetch_limit = 1 + _init_core_state( + self, + mixed_precision, + limit_all_gathers, + backward_prefetch_limit, + forward_prefetch_limit, + offload_grads, + ) + _init_runtime_state(self) + + _init_prefetching_state(self, backward_prefetch, + forward_prefetch, backward_reduce_scatter) + _init_buffer_state(self, module) + _init_param_handle_from_module( + self, + module, + device_id, + param_init_fn, + ) + self._zero3_wrapped_module = module + + @property + def module(self) -> nn.Module: + """ + Returns the wrapped module (like :class:`DistributedDataParallel`). + """ + # FSDP's `.module` must refer to the innermost wrapped module when + # composing with other module wrappers in order for state dict to work + if isinstance(self._zero3_wrapped_module, ActivationWrapper): + return getattr(self._zero3_wrapped_module, _CHECKPOINT_WRAPPED_MODULE) + return self._zero3_wrapped_module + + @property + def _has_params(self) -> bool: + """Returns whether this FSDP instance manages any parameters.""" + return hasattr(self, "_handle") and self._handle is not None + + @property + def _flat_param(self) -> Optional[FlatParameter]: + return self._handle.flat_param if self._handle else None + + def __getattr__(self, name: str) -> Any: + """Forward missing attributes to the wrapped module.""" + try: + return super().__getattr__(name) # defer to nn.Module's logic + except AttributeError: + return getattr(self._zero3_wrapped_module, name) + + def __getitem__(self, key: int) -> Any: + """Forward indexing calls in case the module is an ``nn.Sequential``.""" + if hasattr(self, ZERO3_WRAPPED_MODULE): + # type: ignore[operator] + return self._zero3_wrapped_module.__getitem__(key) + return super().__getitem__(key) + + def check_is_root(self) -> bool: + return _is_zero3_root(self, self) + + @staticmethod + def zero3_modules( + module: nn.Module, + root_only: bool = False, + ) -> List["LayerZeRO3"]: + """ + Returns all nested ZeRO3 instances, possibly including ``module`` itself + and only including ZeRO3 root modules if ``root_only=True``. + + Args: + module (torch.nn.Module): Root module, which may or may not be an + ``FSDP`` module. + root_only (bool): Whether to return only FSDP root modules. + (Default: ``False``) + + Returns: + List[FullyShardedDataParallel]: FSDP modules that are nested in + the input ``module``. + """ + if root_only: + return _get_zero3_root_states(module) + return traversal_utils._get_zero3_states(module) + + def _mixed_precision_enabled_for_buffers(self) -> bool: + """ + Returns if the user explicitly enabled buffer mixed precision. + + NOTE: Unlike parameters and gradient reduction, buffer mixed precision + is applied at the FSDP instance level, not the ``FlatParameter`` level, + which may be different for the composable code path. + """ + return self.mixed_precision.buffer_dtype is not None + + def _reset_lazy_init(self) -> None: + """ + Reset instance so :func:`_lazy_init` will run on the next forward. + """ + self._is_root: Optional[bool] = None + + def forward(self, *args: Any, **kwargs: Any) -> Any: + """ + Runs the forward pass for the wrapped module, inserting FSDP-specific + pre- and post-forward sharding logic. + """ + handle = self._handle + with torch.autograd.profiler.record_function( + "LayerZeRO3.forward" + ): + args, kwargs = _zero3_root_pre_forward(self, self, args, kwargs) + unused = None + args, kwargs = _pre_forward( + self, + handle, + _pre_forward_backward_unshard, + self._zero3_wrapped_module, + args, + kwargs, + ) + if handle: + _p_assert( + handle.flat_param.device == self.compute_device, + "Expected `FlatParameter` to be on the compute device " + f"{self.compute_device} but got {handle.flat_param.device}", + ) + with torch.autograd.profiler.record_function("Wrapped Module Forward"): + output = self._zero3_wrapped_module(*args, **kwargs) + output = _post_forward( + self, handle, _post_forward_reshard, self, unused, output + ) + if constants.AUTO_CAST_OUTPUT and self._is_root: + if mpu.is_initialized(): + if mpu.is_pipeline_last_stage(): + output = _cast_forward_outputs(torch.float32, output) + else: + output = _cast_forward_outputs(torch.float32, output) + return output + + def named_buffers( + self, + *args, + **kwargs, + ) -> Iterator[Tuple[str, torch.Tensor]]: + """ + Overrides :meth:`named_buffers()` to intercept buffer names and + remove all occurrences of the FSDP-specific flattened buffer prefix + when inside the :meth:`summon_full_params` context manager. + """ + should_clean_name = self.training_state == TrainingState.SUMMON_FULL_PARAMS + for buffer_name, buffer in super().named_buffers(*args, **kwargs): + if should_clean_name: + # Remove any instances of the FSDP-specific prefix; there can + # be multiple in the case of nested FSDP modules + buffer_name = buffer_name.replace(ZERO3_PREFIX, "") + yield (buffer_name, buffer) + + def named_modules( + self, + *args, + **kwargs, + ) -> Iterator[Tuple[str, torch.Tensor]]: + """ + Overrides :meth:`named_buffers()` to intercept buffer names and + remove all occurrences of the FSDP-specific flattened buffer prefix + when inside the :meth:`summon_full_params` context manager. + """ + should_clean_name = self.training_state == TrainingState.SUMMON_FULL_PARAMS + for module_name, module in super().named_modules(*args, **kwargs): + if should_clean_name: + # Remove any instances of the FSDP-specific prefix; there can + # be multiple in the case of nested FSDP modules + module_name = module_name.replace(ZERO3_PREFIX, "") + yield (module_name, module) + + def named_parameters( + self, + *args, + **kwargs, + ) -> Iterator[Tuple[str, torch.nn.Parameter]]: + """ + Overrides :meth:`named_parameters()` to intercept parameter names and + remove all occurrences of the FSDP-specific flattened parameter prefix + when inside the :meth:`summon_full_params` context manager. + """ + should_clean_name = self.training_state == TrainingState.SUMMON_FULL_PARAMS + for param_name, param in super().named_parameters(*args, **kwargs): + if should_clean_name: + # Remove any instances of the FSDP-specific prefix; there can + # be multiple in the case of nested FSDP modules + param_name = param_name.replace(ZERO3_PREFIX, "") + yield (param_name, param) + + def _assert_state(self, state: Union[TrainingState, List[TrainingState]]) -> None: + """Assert we are in the given state.""" + if isinstance(state, TrainingState): + state = [state] + if self.training_state not in state: + msg = ( + f"expected to be in states {state} but current state " + f"is {self.training_state}" + ) + # In case we are failing in the context of autograd hook, asserting + # may not generate useful msg. So, let's print it to be sure. + if self.zero3_rank == 0: + print(f"Asserting FSDP instance is: {self}") + print(f"ERROR: {msg}") + traceback.print_stack() + raise ValueError(msg) + + @contextmanager + def no_sync(self) -> Generator: + _lazy_init(self, self) + if not self._is_root: + raise RuntimeError( + "`no_sync()` on inner LayerZeRO instances is not supported. Please call `no_sync()` on root LayerZeRO module." + ) + self._assert_state(TrainingState.IDLE) + old_flags = [] + for m in self.modules(): + if isinstance(m, LayerZeRO3): + old_flags.append((m, m._sync_gradients)) + m._sync_gradients = False + try: + yield + finally: + for m, old_flag in old_flags: + if m._sync_gradients: + raise ValueError( + "`_sync_gradients` was incorrectly set to `True` while in the `no_sync()` context manager" + ) + m._sync_gradients = old_flag + + def zero1_parameters(self, recurse: bool = True): + # for name, param in chain(handle. for handle in self._all_handles): + for param in self.parameters(recurse): + if param.requires_grad: + yield param + + def zero_grad_buffer(self): + '''This method is to used for accomendate with Megatron''' + pass diff --git a/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/wrap.py b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/wrap.py new file mode 100644 index 000000000..07ceb6a30 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/layerzero/zero3/wrap.py @@ -0,0 +1,62 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the BSD license found in the +# LICENSE file in the root directory of this source tree. + +from typing import ( + Any, + Dict, + Iterable, + Set, + Type, +) + +import torch.nn as nn +from torch.distributed.fsdp.wrap import ( + _post_order_apply, + _construct_wrap_fn, + always_wrap_policy, + _Policy, + _module_wrap_policy, + ModuleWrapPolicy, + CustomPolicy, + _run_mixed_precision_override_policy, + _or_policy, + _recursive_wrap, + _wrap_module_cls_individually, + lambda_auto_wrap_policy, + transformer_auto_wrap_policy, + size_based_auto_wrap_policy, + enable_wrap, + wrap +) +__all__ = [ + "always_wrap_policy", + "lambda_auto_wrap_policy", + "transformer_auto_wrap_policy", + "size_based_auto_wrap_policy", + "enable_wrap", + "wrap", + "CustomPolicy", + "ModuleWrapPolicy", +] + + +def _run_tensor_parallel_pg_override_policy( + root_module: nn.Module, + module_classes: Iterable[Type[nn.Module]], + ignored_modules: Set[nn.Module], + root_kwargs: Dict[str, Any], + target_module_to_kwargs: Dict[nn.Module, Dict[str, Any]], +): + module_classes_tuple = tuple(set(module_classes)) + for module in root_module.modules(): + if module in ignored_modules: + continue + elif isinstance(module, module_classes_tuple): + # This policy overrides any existing policy + if module not in target_module_to_kwargs: + # Only inherit from the root kwargs if not already specified + target_module_to_kwargs[module] = root_kwargs + target_module_to_kwargs[module]["process_group"] = root_kwargs["tp_zero_process_group"] + return target_module_to_kwargs diff --git a/model/train/yoco_moe/mindspeed/core/distributed/param_and_grad_buffer.py b/model/train/yoco_moe/mindspeed/core/distributed/param_and_grad_buffer.py new file mode 100644 index 000000000..f804f8422 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/distributed/param_and_grad_buffer.py @@ -0,0 +1,277 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import os +import math +import logging +from enum import Enum +from functools import wraps +from typing import Dict, List, Optional +from logging import getLogger +from contextlib import nullcontext +import torch +from megatron.training import get_args +from megatron.core.distributed.param_and_grad_buffer import BufferType +from megatron.core import parallel_state +from megatron.core.distributed.distributed_data_parallel_config import DistributedDataParallelConfig +from megatron.core.utils import log_on_each_pipeline_stage + + +logger = getLogger(__name__) + + +def pipe_register_grad_ready(self, param: torch.nn.Parameter): + assert (self.ddp_config.overlap_grad_reduce), 'register_grad_ready() should only be called when overlap_grad_reduce is True' + from mindspeed.moe.pipe_experts import FLAG_GRAD_REDUCE + if self.is_last_microbatch and FLAG_GRAD_REDUCE: + bucket = self.param_to_bucket[param] + bucket.register_grad_ready(param) + + +def reuse_fp32_param_param_and_grad_buffer_init_wrapper(init_func): + @wraps(init_func) + def reuse_fp32_param_param_and_grad_buffer_init(*args, **kwargs): + global_args = get_args() + math_ceil = math.ceil + if global_args.reuse_fp32_param and global_args.use_distributed_optimizer: + def ceil_even(x): + return math_ceil(math_ceil(x) / 2) * 2 + math.ceil = ceil_even + init_func(*args, **kwargs) + if global_args.reuse_fp32_param and global_args.use_distributed_optimizer: + math.ceil = math_ceil + return reuse_fp32_param_param_and_grad_buffer_init + + +def param_and_grad_buffer_init_pad( + self, + ddp_config: DistributedDataParallelConfig, + param_dtype: torch.dtype, + grad_dtype: torch.dtype, + params: List[torch.nn.Parameter], + data_parallel_group: torch.distributed.ProcessGroup, + bucket_size: int, + param_to_name: Dict[torch.nn.Parameter, str], + gradient_scaling_factor: float, +): + self.ddp_config = ddp_config + + # Check that params are unique. + unique_params = set() + for param in params: + assert param not in unique_params + unique_params.add(param) + del unique_params + + # Store attributes that will be needed later. + self.param_dtype = param_dtype + self.grad_dtype = grad_dtype + self.data_parallel_group = data_parallel_group + self.data_parallel_world_size = torch.distributed.get_world_size( + group=self.data_parallel_group + ) + self.gradient_scaling_factor = gradient_scaling_factor + self.is_last_microbatch = True + + # Data structures to store underlying buckets and relevant indexing data. + self.buckets = [] + self.param_to_bucket = {} # Param -> bucket mapping. + self.param_index_map = {} # Param -> location in buffer mapping (used in dist. optimizer). + + def _pad(number_to_be_padded: int, divisor: int) -> int: + return int(math.ceil(number_to_be_padded / divisor) * divisor) + + def _pad_end_of_bucket_if_needed(bucket_end_index: int) -> int: + """ + Pads end index of bucket if using distributed optimizer (to ensure uniform sharding). + """ + if self.ddp_config.use_distributed_optimizer: + # We now ensure that all buckets start at a memory address that is 512-byte + # If using a distributed optimizer, pad the memory buffer to be + # multiple of data_parallel_world_size. (This padding is done + # due to a constraint with the reduce_scatter op, which requires + # all tensors have equal size.) + # 512-byte for Ascend, 256-byte for nv. + + element_size = 4 if param_dtype == torch.float else 2 + global_args = get_args() + align_size = global_args.param_and_grad_buffer_pad // element_size + return _pad(bucket_end_index, self.data_parallel_world_size * align_size) + return bucket_end_index + + def _pad_start_of_param_if_needed(param_start_index: int) -> int: + """ + Pads start index of param if using distributed optimizer (to ensure "good" alignment). + """ + if self.ddp_config.use_distributed_optimizer: + # Ensure that params start at 128-byte aligned addresses (64 values + # since params are >= 16-bit precision). + return _pad(param_start_index, 64) + return param_start_index + + # First, figure out how many elements should be in the underlying buffer storage. + # Note that if we need to split the buffer into smaller buckets, each of these + # might need to be padded as well (if using the distributed optimizer). + data_start_index = 0 + bucket_data_start_index = data_start_index + bucket_params = set() + self.bucket_indices = [] + per_bucket_numel_unpadded = [] + bucket_id = 0 + + def _create_new_bucket(data_end_index: int) -> int: + """ + Create the bucket_id'th bucket with collected bucket_params, starting at + bucket_data_start_index. + """ + nonlocal bucket_data_start_index, bucket_params, bucket_id + per_bucket_numel_unpadded.append(data_end_index - bucket_data_start_index) + data_end_index = _pad_end_of_bucket_if_needed(data_end_index) + # Update bucket metadata. + self.bucket_indices.append((bucket_data_start_index, data_end_index)) + bucket_data_start_index = data_end_index + # Re-set bucket_params and increment bucket_id for next bucket. + bucket_params = set() + bucket_id += 1 + # Return the potentially padded data_end_index. + return data_end_index + + for param in params[::-1]: + # Iterate through parameters in reverse order to roughly follow backprop order, + # and skip parameters that don't require gradients. + if not param.requires_grad: + continue + this_numel = param.data.nelement() + data_start_index = _pad_start_of_param_if_needed(data_start_index) + data_end_index = data_start_index + this_numel + + def _does_param_require_new_bucket(param): + """ + Split shared embedding parameters into separate bucket if using distributed + optimizer that makes use of reduce-scatters instead of all-reduces. + This ensures that the first and last pipeline stage partition optimizer state + for the shared embedding parameters the same way across DP replicas, allowing + the DP reduce-scatter to be before the embedding all-reduce. + """ + return ( + getattr(param, "shared_embedding", False) + and self.ddp_config.use_distributed_optimizer + ) + + # Create bucket with already collected parameters if current param needs its own bucket. + if _does_param_require_new_bucket(param) and len(bucket_params) > 0: + # We are creating a bucket for the already accumulated parameters, whose params + # end at the current data_start_index. + if self.ddp_config.use_distributed_optimizer: + # data_start_index should already be padded. + assert data_start_index % self.data_parallel_world_size == 0 + _create_new_bucket(data_start_index) + + self.param_index_map[param] = ( + data_start_index, + data_end_index, + bucket_id, + ) + bucket_params.add(param) + + # If we have enough elements already or the current param is part of the shared embedding + # layer and needs a separate bucket, form a new bucket. + if ( + bucket_size is not None + and (data_end_index - bucket_data_start_index) >= bucket_size + ) or _does_param_require_new_bucket(param): + data_end_index = _create_new_bucket(data_end_index) + data_start_index = data_end_index + + # Add remaining params to a new bucket. + if len(bucket_params) > 0: + data_end_index = _create_new_bucket(data_end_index) + + # Next, create underlying storage for buffer (with numel elements that includes + # padding as necessary). + self.numel = data_end_index + self.numel_unpadded = sum(per_bucket_numel_unpadded) + assert self.numel_unpadded <= self.numel + if self.ddp_config.use_distributed_optimizer: + assert self.numel % self.data_parallel_world_size == 0 + else: + assert self.numel == self.numel_unpadded + + self.param_data = None + # Only re-map param tensors if using distributed optimizer. + if self.ddp_config.use_distributed_optimizer: + self.param_data = torch.zeros( + self.numel, + dtype=self.param_dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + self.grad_data = torch.zeros( + self.numel, + dtype=self.grad_dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + + # Finally, map param.data and param.main_grad fields to buffers. + bucket_params = set() + bucket_data_start_index = 0 + cur_bucket_id = 0 + for param in params[::-1]: + if not param.requires_grad: + continue + data_start_index, data_end_index, bucket_id = self.param_index_map[param] + + # Assign param.data to appropriate segment of self.param_data. + if self.param_data is not None: + old_param_data = param.data + param.data = self._get( + param.data.shape, data_start_index, buffer_type=BufferType.PARAM + ) + assert old_param_data._base is None + # Copy tensor values (from initialization or checkpoint). + param.data.detach().copy_(old_param_data) + del old_param_data + + param.main_grad = self._get( + param.data.shape, data_start_index, buffer_type=BufferType.GRAD + ) + if bucket_id != cur_bucket_id: + bucket_data_end_index = _pad_end_of_bucket_if_needed(data_start_index) + self._set_bucket( + bucket_params=bucket_params, + start_index=bucket_data_start_index, + end_index=bucket_data_end_index, + numel_unpadded=per_bucket_numel_unpadded[cur_bucket_id], + bucket_id=cur_bucket_id, + ) + bucket_data_start_index = bucket_data_end_index + bucket_params = set() + assert cur_bucket_id + 1 == len(self.buckets) + assert bucket_id == cur_bucket_id + 1 + cur_bucket_id = bucket_id + bucket_params.add(param) + + # Add remaining params to a new bucket. + if len(bucket_params) > 0: + bucket_data_end_index = _pad_end_of_bucket_if_needed(data_end_index) + self._set_bucket( + bucket_params=bucket_params, + start_index=bucket_data_start_index, + end_index=bucket_data_end_index, + numel_unpadded=per_bucket_numel_unpadded[cur_bucket_id], + bucket_id=cur_bucket_id, + ) + + # Log buckets for all PP stages. + log_strs = [] + log_strs.append( + f'Number of buckets for gradient all-reduce / reduce-scatter: {len(self.buckets)}' + ) + for index, bucket in enumerate(self.buckets): + numel = 0 + for param in bucket.params: + numel += param.data.nelement() + log_strs.append(f'Params for bucket {index + 1} ({numel} elements):') + for param in bucket.params: + log_strs.append(f'\t{param_to_name[param]}') + log_on_each_pipeline_stage(logger, logging.INFO, '\n'.join(log_strs)) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/fusions/__init__.py b/model/train/yoco_moe/mindspeed/core/fusions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/fusions/fused_bias_swiglu.py b/model/train/yoco_moe/mindspeed/core/fusions/fused_bias_swiglu.py new file mode 100644 index 000000000..496312c56 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/fusions/fused_bias_swiglu.py @@ -0,0 +1,17 @@ +import torch_npu + + +def fused_swiglu(x): + return torch_npu.npu_swiglu(x, dim=-1) + + +class SwiGLUFunction: + @staticmethod + def apply(x, *args): + return fused_swiglu(x) + + +class BiasSwiGLUFunction: + @staticmethod + def apply(x, bias, *args): + return fused_swiglu(x + bias) diff --git a/model/train/yoco_moe/mindspeed/core/fusions/fused_layer_norm.py b/model/train/yoco_moe/mindspeed/core/fusions/fused_layer_norm.py new file mode 100644 index 000000000..89ea8614d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/fusions/fused_layer_norm.py @@ -0,0 +1,25 @@ +import os +from functools import wraps + +import torch + + +class FusedLayerNormAffineFunction: + @staticmethod + def apply(input_, weight, bias, normalized_shape, eps): + return torch.nn.functional.layer_norm(input_, normalized_shape, weight, bias, eps) + + @staticmethod + def forward(*args, **kwargs): + return FusedLayerNormAffineFunction.apply(*args, **kwargs) + + +class FastLayerNormFN: + @staticmethod + def apply(input_, weight, bias, eps): + normalized_shape = torch.Size(weight.numel()) + return torch.nn.functional.layer_norm(input_, normalized_shape, weight, bias, eps) + + +def fused_layer_norm_affine(input_, weight, bias, normalized_shape, eps): + return torch.nn.functional.layer_norm(input_, normalized_shape, weight, bias, eps) diff --git a/model/train/yoco_moe/mindspeed/core/fusions/fused_softmax.py b/model/train/yoco_moe/mindspeed/core/fusions/fused_softmax.py new file mode 100644 index 000000000..4ec0a1256 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/fusions/fused_softmax.py @@ -0,0 +1,52 @@ +import torch +import torch_npu +from megatron.core.transformer.enums import AttnMaskType + + +class ScaledUpperTriangMaskedSoftmax: + @staticmethod + def apply(input_, scale): + # npu_scaled_masked_softmax must be 4D + size = input_.size() + input_ = input_.view((1,) + tuple(size)).contiguous() + dummy_mask = torch.zeros(input_.size()).bool().npu() + output = torch_npu.npu_scaled_masked_softmax(input_, dummy_mask, scale, True) + return output.view(size).contiguous() + + +class ScaledMaskedSoftmax: + @staticmethod + def apply(input_, mask, scale): + return torch_npu.npu_scaled_masked_softmax(input_, mask, scale, False) + + +class ScaledSoftmax: + @staticmethod + def apply(input_, scale): + dummy_mask = torch.zeros(input_.size()).bool().npu() + return torch_npu.npu_scaled_masked_softmax(input_, dummy_mask, scale, False) + + +def is_kernel_available(self, mask, b, np, sq, sk): + return ( + self.scaled_masked_softmax_fusion # user want to fuse + and self.input_in_float16 # input must be fp16 + and 32 < sk <= 4096 # sk must be 32 ~ 4096 + and sq % 16 == 0 # sq must be divisor of 16 + and sk % 16 == 0 # sk must be divisor of 16 + ) + + +def forward_fused_softmax(self, input_, mask): + b, np, sq, sk = input_.size() + scale = self.scale if self.scale is not None else 1.0 + + if self.attn_mask_type == AttnMaskType.causal: + assert sq == sk, "causal mask is only for self attention" + # if use upper triangular mask + return torch_npu.npu_scaled_masked_softmax(input_, mask, scale, True) + else: + if mask is not None: + return torch_npu.npu_scaled_masked_softmax(input_, mask, scale, False) + else: + return ScaledSoftmax.apply(input_, scale) diff --git a/model/train/yoco_moe/mindspeed/core/fusions/npu_moe_token_permute.py b/model/train/yoco_moe/mindspeed/core/fusions/npu_moe_token_permute.py new file mode 100644 index 000000000..01ac70be4 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/fusions/npu_moe_token_permute.py @@ -0,0 +1,19 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +from functools import wraps + +import torch + +from mindspeed.ops.npu_moe_token_permute import npu_moe_token_permute + + +def permute_wrapper(fn): + @wraps(fn) + def wrapper( + tokens: torch.Tensor, + indices: torch.Tensor, + num_out_tokens: int = None, + padded_mode: bool = False + ) -> torch.Tensor: + return npu_moe_token_permute(tokens, indices, num_out_tokens, padded_mode) + + return wrapper \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/fusions/npu_moe_token_unpermute.py b/model/train/yoco_moe/mindspeed/core/fusions/npu_moe_token_unpermute.py new file mode 100644 index 000000000..6141f8176 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/fusions/npu_moe_token_unpermute.py @@ -0,0 +1,25 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +from functools import wraps + +import torch + +from mindspeed.ops.npu_moe_token_unpermute import npu_moe_token_unpermute + + +def unpermute_wrapper(fn): + @wraps(fn) + def wrapper( + permuted_tokens: torch.Tensor, + sorted_indices: torch.Tensor, + probs: torch.Tensor = None, + padded_mode: bool = False, + restore_shape: torch.Size = None, +) -> torch.Tensor: + dtype = permuted_tokens.dtype + if probs is not None and probs.dtype != permuted_tokens.dtype: + # make sure permuted_tokens has the same dtype with probs. + permuted_tokens = permuted_tokens.to(probs.dtype) + return npu_moe_token_unpermute( + permuted_tokens, sorted_indices, probs, padded_mode=padded_mode, restore_shape=restore_shape).to(dtype) + + return wrapper diff --git a/model/train/yoco_moe/mindspeed/core/fusions/rms_norm.py b/model/train/yoco_moe/mindspeed/core/fusions/rms_norm.py new file mode 100644 index 000000000..dd5dcb401 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/fusions/rms_norm.py @@ -0,0 +1,51 @@ +import os +from functools import wraps +import torch +import torch_npu +from torch import nn +from megatron.legacy.model.rms_norm import RMSNorm +from megatron.training import get_args + +from mindspeed.core.tensor_parallel.mapping import reduce_from_tensor_model_parallel_region_nd + + +def rms_norm_init_wrapper(fn): + @wraps(fn) + def wrapper(self, *args, **kwargs): + fn(self, *args, **kwargs) + _args = get_args() + self.use_fused_rmsnorm = _args.use_fused_rmsnorm + self.use_nd_matmul = _args.use_nd_matmul + dim = args[0] if len(args) > 0 else kwargs.get('dim') + if self.use_nd_matmul: + if self.use_fused_rmsnorm: + raise RuntimeError('nd_matmul does not support fused_rmsnorm temporarily') + self.tensor_model_parallel_size = _args.tensor_model_parallel_size + self.weight = torch.nn.Parameter( + torch.ones(dim // self.tensor_model_parallel_size) + ) + return wrapper + + +def rms_norm_forward_wrapper(fn): + @wraps(fn) + def wrapper(self, x): + if int(os.getenv('NPU_ASD_ENABLE', '0')): + from torch_npu.utils import register_asd_hook + register_asd_hook(x, self.weight) + if self.use_fused_rmsnorm: + return torch_npu.npu_rms_norm(x, self.weight, epsilon=self.eps)[0] + return fn(self, x) + return wrapper + + +def rms_norm_norm_wrapper(fn): + @wraps(fn) + def wrapper(self, x): + if self.use_nd_matmul: + pow_mean = x.pow(2).mean(-1, keepdim=True) + all_pow_mean = reduce_from_tensor_model_parallel_region_nd(pow_mean) + pow_mean = torch.div(all_pow_mean, self.tensor_model_parallel_size) + return x * torch.rsqrt(pow_mean + self.eps) + return fn(self, x) + return wrapper diff --git a/model/train/yoco_moe/mindspeed/core/fusions/rotary_pos_embedding.py b/model/train/yoco_moe/mindspeed/core/fusions/rotary_pos_embedding.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/memory/__init__.py b/model/train/yoco_moe/mindspeed/core/memory/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/__init__.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_apply.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_apply.py new file mode 100644 index 000000000..5dd6ec95a --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_apply.py @@ -0,0 +1,159 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. +from typing import List +from copy import deepcopy +from megatron.training import print_rank_0 +from .adaptive_memory_tool import SingletonBase, LayerAction, ModuleAction, ContextKey as Key +from .adaptive_memory_solver import AdaptMemGraphSolver +from .adaptive_memory_swap_manager import SwapManager, transformer_layer_register_post_forward_hook, transformer_layer_register_pre_backward_hook +from .adaptive_memory_profiling import RecomputeHook, AdaptiveMemoryProfiling + + +class AdaptMemApplyManager(metaclass=SingletonBase): + + def __init__(self): + self.no_adapt_modules = [] # modules which don't join policy selections + self.cur_module_index = 0 # DFS index + + # optype 0 save_to_cache, 1 apply_to_context + def apply_op_to_context(self, adapt_policy_list: list, origin_context: dict): + if len(adapt_policy_list) == 0: + print_rank_0("adapt_policy_list Empty!") + return origin_context + context = deepcopy(origin_context) + # 1.get all layers by order + ordered_layers = [] + self.get_ordered_layers(context, ordered_layers, True) + # 2.handle policy list + idx = 0 + self.get_ordered_modules(ordered_layers[0][Key.SUBMODULES], [], 0) + for policy in adapt_policy_list: + n = policy[0] + adapt_nodes = [] + if policy[1] == LayerAction.FULL_RECOMPUTE: + status = ModuleAction.RECOMPUTE + adapt_nodes = [status for _ in range(len(policy[2:]))] + for i in range(idx, idx + n): + ordered_layers[i][ModuleAction.RECOMPUTE.name] = True + elif policy[1] == LayerAction.FULL_SWAP: + status = ModuleAction.SWAP + adapt_nodes = [status for _ in range(len(policy[2:]))] + elif policy[1] == LayerAction.ADAPTIVE: + adapt_nodes = policy[2:] + for i in range(idx, idx + n): + self.apply_op_to_layer(ordered_layers[i], adapt_nodes, i) + idx += n + + return context + + def apply_op_to_layer(self, ordered_layer, adapt_nodes: list, layer_index: int): + if len(adapt_nodes) == 0: + # don't need any operations if adapt_nodes is empty + return + # get all modules of the current layer through DFS + ordered_module: List[dict] = [] + if Key.SUBMODULES not in ordered_layer: + return + self.cur_module_index = 0 + if layer_index == 0: + self.no_adapt_modules.clear() + self.get_ordered_modules(ordered_layer[Key.SUBMODULES], ordered_module, layer_index) + + for i, nodes in enumerate(adapt_nodes): + if i >= len(ordered_module): + break + if Key.IS_FUNCTION in ordered_module[i]: + func_action = nodes + # add location infos for autofrad.function + AdaptMemGraphSolver().add_func_locations(layer_index, ordered_module[i][Key.NAME], func_action) + continue + if nodes == ModuleAction.RECOMPUTE: + ordered_module[i][ModuleAction.RECOMPUTE.name] = True + elif nodes == ModuleAction.SWAP: + ordered_module[i][ModuleAction.SWAP.name] = True + + def get_ordered_layers(self, model: dict, ordered_layers: list, is_root_layer: bool = False): + # root module may have multiple layers due to vpp parallel + if is_root_layer: + if Key.SUBMODULES not in model: + return + for sub_model in model[Key.SUBMODULES]: + self.get_ordered_layers(sub_model, ordered_layers) + return + + if Key.IS_ADAPT_LAYER in model: + for sub_layer in model[Key.SUBMODULES]: + ordered_layers.append(sub_layer) + if Key.SUBMODULES not in model: + return + for sub_model in model[Key.SUBMODULES]: + self.get_ordered_layers(sub_model, ordered_layers) + + def get_ordered_modules(self, layer: dict, ordered_modules: list, layer_index: int): + for sub_layer in layer: + # The first layer judges through ['memory'] + if layer_index == 0: + if Key.MEMORY in sub_layer: + ordered_modules.append(sub_layer) + else: + # use the DFS index as the unique identifier + self.no_adapt_modules.append(self.cur_module_index) + else: + if self.cur_module_index not in self.no_adapt_modules: + ordered_modules.append(sub_layer) + + self.cur_module_index += 1 + if Key.SUBMODULES in sub_layer: + self.get_ordered_modules(sub_layer[Key.SUBMODULES], ordered_modules, layer_index) + + def apply_hook_to_model(self, models, context, pre_context, is_root_layer: bool = False): + if is_root_layer and isinstance(models, list): + layer_idx = 0 + for model in models: + self.apply_hook_to_model(model, get_cur_layer_context(context, layer_idx), context) + layer_idx += 1 + return + # pass autograd.function + if Key.IS_FUNCTION in context: + if Key.SUBMODULES in context: + for i in range(0, len(context[Key.SUBMODULES])): + self.apply_hook_to_model(models, context[Key.SUBMODULES][i], context) + return + # apply hooks for recompute models + if context.get(ModuleAction.RECOMPUTE.name, False): + models.no_checkpoint_adaptive_recompute_forward = models.forward + models.forward = RecomputeHook().hook_checkpoint_forward(models.forward) + RecomputeHook().recompute_modules.append(models) + print_rank_0('recompute hooked on %s' % models._get_name()) + return + # apply hooks for swap modules + if context.get(ModuleAction.SWAP.name, False): + SwapManager().hook_prefetch_forward(models, '') + print_rank_0('swap hooked on %s' % models._get_name()) + return + # apply hooks for oom swap + if Key.ALLOWED_ADAPT in context: + transformer_layer_register_post_forward_hook(models) + transformer_layer_register_pre_backward_hook(models) + SwapManager().hook_oom_rescue_forward(models) + print_rank_0('oom rescue hooked on %s' % models._get_name()) + + module_idx = 0 + for name, module in models.named_children(): + self.apply_hook_to_model(module, context[Key.SUBMODULES][module_idx], context) + module_idx += 1 + + def apply_new_adapt_policy(self, adapt_policy_list, context, models): + AdaptMemGraphSolver().func_locations.clear() + new_context = self.apply_op_to_context(adapt_policy_list, context) + self.apply_hook_to_model(models, new_context, "", True) + + +# get layer by idx in root module +def get_cur_layer_context(context, idx): + current_context = {} + for k, v in context.items(): + if k == Key.SUBMODULES: + current_context[k] = [v[idx]] + continue + current_context[k] = v + return current_context diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_cache.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_cache.py new file mode 100644 index 000000000..88cdb022a --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_cache.py @@ -0,0 +1,277 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. +import os +import stat +import sys +import json +import hashlib +from typing import List +from pathlib import Path + +import torch +import torch_npu +from megatron.training import get_args, print_rank_0 +from megatron.core import parallel_state + +import mindspeed +from .adaptive_memory_tool import SingletonBase, ModuleAction, LayerAction + + +class AdaptiveLayerMemPolicy: + def __init__(self, recompute=None, swap=None, memory=0.0, time=sys.maxsize, adapt_type=LayerAction.ADAPTIVE): + self.recompute: List[str] = recompute or [] + self.swap: List[str] = swap or [] + self.memory: float = memory + self.time = time + self.adapt_type = adapt_type + + def get_modules_by_tag(self, tag): + if ModuleAction.RECOMPUTE == tag: + return self.recompute + elif ModuleAction.SWAP == tag: + return self.swap + else: + msg = f"unknown layer policy tag name:{tag}" + raise ValueError(msg) + + @staticmethod + def parse_from_json(src_json): + alp = AdaptiveLayerMemPolicy(memory=src_json["memory"], time=src_json["time"], recompute=[], swap=[]) + alp.recompute = [str(r) for r in src_json["recompute"]] + alp.swap = [str(r) for r in src_json["swap"]] + return alp + + def identity(self) -> str: + self.sort_modules() + modules = ",".join(self.recompute) + ":" + ",".join(self.swap) + return hashlib.md5(modules.encode('utf-8')).hexdigest() + + def sort_modules(self): + self.recompute.sort() + self.swap.sort() + + def __eq__(self, other): + if not isinstance(other, AdaptiveLayerMemPolicy): + return False + if len(self.recompute) != len(other.recompute) or len(self.swap) != len(other.swap): + return False + + # sort values before compare + self.sort_modules() + other.sort_modules() + + return self.recompute == other.recompute and self.swap == other.swap + + def __repr__(self): + result = {'recompute': self.recompute, 'swap': self.swap, 'memory': self.memory, 'time': self.time, 'adapt_type': self.adapt_type} + return str(result) + + +class AdaptiveModelMemPolicy: + def __init__(self, policy_type, polices, memory=0.0, time=sys.maxsize): + self.policy_type: str = policy_type + self.polices: List[AdaptiveLayerMemPolicy] = polices + self.memory: float = memory + self.time = time + + def __post_init__(self): + if self.policy_type not in ["normal", "oom"]: + raise ValueError(f"unknown policy type:{self.policy_type}, {self.__repr__()}") + + def __repr__(self): + return str(self.polices) + + def to_json(self): + return json.dumps(self, default=lambda x: x.__dict__, sort_keys=True) + + @staticmethod + def parse_from_json(src_json): + amp = AdaptiveModelMemPolicy(policy_type=src_json["policy_type"], polices=[]) + amp.polices = [AdaptiveLayerMemPolicy.parse_from_json(p) for p in src_json["polices"]] + return amp + + def __eq__(self, other): + if not isinstance(other, AdaptiveModelMemPolicy): + return False + if self.policy_type != other.policy_type or len(self.polices) != len(other.polices): + return False + + cur_hash = sorted([x.identity() for x in self.polices]) + other_hash = sorted([x.identity() for x in other.polices]) + return cur_hash == other_hash + + +class PolicyCacheManager(metaclass=SingletonBase): + + def __init__(self): + self.local_file_name_list = [] + self.normal_policy_cache: List[AdaptiveModelMemPolicy] = [] + self.oom_policy_cache: List[AdaptiveModelMemPolicy] = [] + + def load_cache_file(self): + self.local_file_name_list = self._buildup_filename() + self.load_stage_cache_file() + + def load_stage_cache_file(self): + cur_pp_rank = parallel_state.get_pipeline_model_parallel_rank() + if not os.path.isfile(self.local_file_name_list[cur_pp_rank]): + print_rank_0(f"load history oom policy False!!!!!!!!: {self.local_file_name_list[cur_pp_rank]}") + return + + with open(self.local_file_name_list[cur_pp_rank], "r") as f: + for line in f: + json_format = json.loads(line) + policy: AdaptiveModelMemPolicy = AdaptiveModelMemPolicy.parse_from_json(json_format) + self.oom_policy_cache.append(policy) + print_rank_0(f"load history oom policy Success!!!!!!!!: {self.local_file_name_list[cur_pp_rank]}") + + @staticmethod + def _get_version_file(src_path, key, version_file_name): + version_path = src_path[:src_path.index(key) + len(key)] + return os.path.join(version_path, version_file_name) + + def _get_software_version(self): + torch_version: str = torch.__version__ + torch_npu_version: str = torch_npu.__version__ + + library_path = os.environ.get("LD_LIBRARY_PATH").split(":") + ascend_toolkit_path = next((x for x in library_path if "ascend-toolkit" in x), None) + driver_path = next((x for x in library_path if "driver" in x), None) + if ascend_toolkit_path is None or driver_path is None: + return {} + + ascend_toolkit_version_file = self._get_version_file(ascend_toolkit_path, "ascend-toolkit", "version.cfg") + driver_version_file = self._get_version_file(driver_path, "driver", "version.info") + if not os.path.isfile(ascend_toolkit_version_file) or not os.path.isfile(driver_version_file): + return {} + + with open(ascend_toolkit_version_file, "r") as f: + f.readline() + ascend_version = f.readline() + + with open(driver_version_file, "r") as f: + driver_version = f.readline() + + return { + "torch": torch_version, + "torch_npu": torch_npu_version, + "ascend_toolkit": ascend_version, + "driver": driver_version + } + + def _scan_dir_recursively(self, dir_name, md5s): + with os.scandir(dir_name) as it: + for entry in it: + if entry.is_dir(follow_symlinks=False): + self._scan_dir_recursively(entry.path, md5s) + elif entry.is_file(follow_symlinks=False): + if not entry.path.endswith(".py"): + return + md5_instance = hashlib.md5() + with open(entry.path, "rb") as f: + md5_instance.update(f.read()) + md5s.append(md5_instance.hexdigest()) + + def _get_source_code_hash(self): + mindspeed_path, = mindspeed.__path__ + md5s = [] + self._scan_dir_recursively(mindspeed_path, md5s) + sorted(md5s) + md5_instance = hashlib.md5() + for x in md5s: + md5_instance.update(x.encode('utf-8')) + return md5_instance.hexdigest() + + def _buildup_filename(self): + args = get_args() + gbs = args.global_batch_size + mbs = args.micro_batch_size + seq_len = args.seq_length + hidden = args.hidden_size + tp = 1 if not args.tensor_model_parallel_size else args.tensor_model_parallel_size + cp = 1 if not args.context_parallel_size else args.context_parallel_size + sp = 1 if not args.sequence_parallel else tp + ep = 1 if not args.expert_model_parallel_size else args.expert_model_parallel_size + pp = 1 if not args.pipeline_model_parallel_size else args.pipeline_model_parallel_size + world_size = args.world_size + dp = world_size // tp // cp // pp + + arguments = { + "global_batch_size": gbs, + "micro_batch_size": mbs, + "sequence_len": seq_len, + "hidden": hidden, + "tp": tp, "cp": cp, "sp": sp, "ep": ep, "dp": dp, + "world_size": world_size, + "source_hash": self._get_source_code_hash() + } + software_versions = self._get_software_version() + arguments.update(software_versions) + args_content = json.dumps(arguments, sort_keys=True) + args_md5 = hashlib.md5(args_content.encode('utf-8')).hexdigest() + + mindspeed_home = os.path.dirname(os.path.dirname(mindspeed.__file__)) + adaptive_home = os.path.join(mindspeed_home, "adaptive_mem") + Path(adaptive_home).mkdir(parents=True, exist_ok=True) + file_abs_name_list = [] + + for i in range(pp): + file_name = f"b{mbs}_s{seq_len}_h{hidden}_tp{tp}_cp{cp}_w{world_size}_sp{sp}_ep{ep}_dp{dp}_stage{i}_{args_md5}.policy" + file_abs_name = os.path.join(adaptive_home, file_name) + file_abs_name_list.append(file_abs_name) + + return file_abs_name_list + + def _persistence(self): + cur_pp_rank = parallel_state.get_pipeline_model_parallel_rank() + cur_device_ranks = torch.cuda.device_count() + total_ranks = torch.distributed.get_world_size() + pp = 1 if not get_args().pipeline_model_parallel_size else get_args().pipeline_model_parallel_size + rank_per_pp = total_ranks // pp + # 不同节点的rank0需要存policy 以及 相同节点不同pp stage中的rank0需要存一下policy + if torch.distributed.get_rank() % cur_device_ranks == 0 or ( + torch.distributed.get_rank() % rank_per_pp == 0 and torch.distributed.get_rank() % cur_device_ranks != 0): + flags = os.O_WRONLY | os.O_CREAT + mode = stat.S_IWUSR | stat.S_IRUSR + with os.fdopen(os.open(self.local_file_name_list[cur_pp_rank], flags, mode), 'w') as fout: + fout.write("") + for p in self.oom_policy_cache: + fout.write(p.to_json() + "\n") + + + def add_normal_policy_cache(self, policy): + if policy in self.normal_policy_cache: + return + + self.normal_policy_cache.append(policy) + + def add_oom_policy_cache(self, policy): + if policy in self.oom_policy_cache: + return + + self.oom_policy_cache.append(policy) + self._persistence() + + def delete_normal_policy_cache(self, policy): + if policy not in self.normal_policy_cache: + return + + self.normal_policy_cache.remove(policy) + + def check_in_cache(self, policy: AdaptiveModelMemPolicy): + if policy is None: + raise ValueError(f"unexpect policy") + + in_normal = next((x for x in self.normal_policy_cache if x == policy), None) is not None + return in_normal or next((x for x in self.oom_policy_cache if x == policy), None) is not None + + def check_in_normal_cache(self, policy: AdaptiveModelMemPolicy): + if policy is None: + raise ValueError(f"unexpect policy") + + return next((x for x in self.normal_policy_cache if x == policy), None) is not None + + def check_in_oom_cache(self, policy: AdaptiveModelMemPolicy): + if policy is None: + raise ValueError(f"unexpect policy") + + return next((x for x in self.oom_policy_cache if x == policy), None) is not None diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_function.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_function.py new file mode 100644 index 000000000..0e68161f9 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_function.py @@ -0,0 +1,136 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + +from copy import copy +from typing import List, Any + +import torch +from megatron.core.tensor_parallel.random import checkpoint + +from megatron.training import print_rank_0, get_args +from megatron.core.num_microbatches_calculator import get_num_microbatches +from megatron.core.tensor_parallel.random import get_cuda_rng_tracker +from megatron.core import parallel_state as ps + +from mindspeed.core.tensor_parallel.random import _set_cuda_rng_state +from .adaptive_memory_profiling import AdaptiveMemoryProfiling +from .adaptive_memory_solver import AdaptMemGraphSolver +from .adaptive_memory_prefetch import AdaptiveMemoryPrefetch, pre_forward_func +from .adaptive_memory_tool import AdaptiveStepMgr, SingletonBase, ModuleAction, BYTES_PER_MB, ContextKey as Key +from .adaptive_memory_tool import FuncLocationMgr, ForwardCounter +from .adaptive_memory_swap_manager import SwapManager + + +class FunctionCtxMgr(metaclass=SingletonBase): + def __init__(self): + self._ctx_dict = {} + self._child_dict = {} + + def update_ctx(self, func_name, new_ctx, child_name): + if func_name not in self._ctx_dict: + self._ctx_dict[func_name] = new_ctx + self._ctx_dict[func_name][Key.FORWARD_CNT] = 1 + self._ctx_dict[func_name][Key.AVG_TIME] = new_ctx[Key.PRE_TOTAL_TIME] + self._ctx_dict[func_name][Key.IS_FUNCTION] = True + else: + target_ctx = self._ctx_dict[func_name] + target_ctx[Key.FORWARD_CNT] += 1 + target_ctx[Key.PRE_TOTAL_TIME] += new_ctx[Key.PRE_TOTAL_TIME] + target_ctx[Key.AVG_TIME] = target_ctx[Key.PRE_TOTAL_TIME] / target_ctx[Key.FORWARD_CNT] + + if func_name not in self._child_dict: + self._child_dict[func_name] = child_name + + def ctx_iter(self): + for key in self._ctx_dict.keys(): + yield self._ctx_dict.get(key), self._child_dict.get(key) + + +class FunctionProfilingWrapper: + def __init__(self, function): + self._function = function + self._ctx = {Key.NAME: function.__name__} + + self.start_event = torch.npu.Event(enable_timing=True) + self.end_evnet = torch.npu.Event(enable_timing=True) + + def _pre_process(self, *args): + self._ctx[Key.PREFIX_NAME] = FuncLocationMgr().get_latest_name() + self._ctx[Key.DEEP] = len(self._ctx[Key.PREFIX_NAME].split(".")) + self._ctx[Key.IS_MODLUE_OF_LAYER0] = True + FuncLocationMgr().set_function_in_stack() + + self._ctx[Key.INPUT] = AdaptiveMemoryProfiling().cal_input_output_size(args) / BYTES_PER_MB + self._ctx[Key.MEMORY] = torch.npu.memory_allocated() - self._ctx[Key.INPUT] + self.start_event.record() + + def _post_process(self, outputs): + self.end_evnet.record() + torch.npu.synchronize() + self._ctx[Key.PRE_TOTAL_TIME] = self.start_event.elapsed_time(self.end_evnet) + self._ctx[Key.OUTPUT] = AdaptiveMemoryProfiling().cal_input_output_size(outputs) / BYTES_PER_MB + self._ctx[Key.MEMORY] = (torch.npu.memory_allocated() - self._ctx[Key.MEMORY]) / BYTES_PER_MB + + child_name = FuncLocationMgr().get_function_location(self._ctx[Key.PREFIX_NAME]) + FunctionCtxMgr().update_ctx(self._function.__name__, self._ctx, child_name) + + def run_profiling(self, *args, **kwargs): + self._pre_process(args) + outputs = self._function.apply(*args, **kwargs) + self._post_process(outputs) + return outputs + + +def pack_hook(tensor): + return SwapManager().prefetch_pack(tensor) + + +def unpack_hook(swap_tensor): + return SwapManager().prefetch_unpack(swap_tensor) + + +def pre_profiling_process(module_name): + pre_forward_func(module_name, False) + + +def post_profiling_process(module_name): + AdaptiveMemoryPrefetch().sync_d2h_for_recording_time(module_name, True) + + +def wrap_swap_profiling(function, module_name, *args): + pre_profiling_process(module_name) + with torch.autograd.graph.saved_tensors_hooks(pack_hook, unpack_hook): + outputs = function.apply(*args) + post_profiling_process(module_name) + return outputs + + +def wrap_function(function, *args): + with torch.autograd.graph.saved_tensors_hooks(pack_hook, unpack_hook): + return function.apply(*args) + + +def adapt_mem_func_wrapper(fc_class, *args): + if not issubclass(fc_class, torch.autograd.Function): + raise TypeError("adapt_mem_func_wrapper only support subclass of torch.autograd.Function") + cnt = ForwardCounter().get_count() + is_first_layer = FuncLocationMgr().is_first_layer + if AdaptiveStepMgr().is_recompute_profiling_step() and is_first_layer: + if fc_class.__name__ not in AdaptiveMemoryPrefetch().function_list: + AdaptiveMemoryPrefetch().function_list.append(fc_class.__name__) + return FunctionProfilingWrapper(fc_class).run_profiling(*args) + elif AdaptiveStepMgr().is_swap_profiling_step() and is_first_layer: # recording swap profiling + if FunctionCtxMgr()._ctx_dict.get(fc_class.__name__)[Key.DEEP] == AdaptiveMemoryPrefetch().function_swap_profiling_deep: + module_full_name = FunctionCtxMgr()._ctx_dict.get(fc_class.__name__)[Key.PREFIX_NAME] + "." + fc_class.__name__ + return wrap_swap_profiling(fc_class, module_full_name, *args) + elif AdaptiveStepMgr().is_swap_profiling_done() and not AdaptiveMemoryPrefetch().is_stable_apply and is_first_layer: + if fc_class.__name__ in AdaptiveMemoryPrefetch().prefetch_function_list: + return wrap_function(fc_class, *args) + elif AdaptiveStepMgr().is_all_profiling_done() and AdaptiveMemoryPrefetch().is_stable_apply: # do one of prefetch/recompute/swap + action = AdaptMemGraphSolver().get_func_action(fc_class.__name__, cnt - 1) + if action == ModuleAction.RECOMPUTE: + def fc_class_apply(): + return fc_class.apply(*args) + return checkpoint(fc_class_apply, False) + elif action == ModuleAction.SWAP: + return wrap_function(fc_class, *args) + return fc_class.apply(*args) # do default function.apply \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_opt.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_opt.py new file mode 100644 index 000000000..8ffeb4b3e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_opt.py @@ -0,0 +1,212 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. +from functools import wraps + +import acl +import torch +import torch.nn + +from megatron.training import print_rank_0 +from megatron.core.num_microbatches_calculator import get_num_microbatches +from .adaptive_memory_profiling import AdaptiveMemoryProfiling, RecomputeHook +from .adaptive_memory_solver import AdaptMemGraphSolver +from .adaptive_memory_policy import AdaptMemPolicyManager +from .adaptive_memory_prefetch import AdaptiveMemoryPrefetch +from .adaptive_memory_tool import AdaptiveStepMgr, SingletonBase, ContextKey as Key, ForwardCounter +from .adaptive_memory_function import FunctionCtxMgr +from .adaptive_memory_swap_manager import SwapManager, LayerProfilingHook +from .adaptive_memory_apply import AdaptMemApplyManager +from .cpu_binder import bind_cpus +from .adaptive_memory_cache import PolicyCacheManager + + +class AdaptiveMemoryOpt(metaclass=SingletonBase): + + @staticmethod + def reset_all_adapt_mem_hooks(): + if not AdaptiveStepMgr().is_recompute_profiling_step(): + AdaptiveMemoryProfiling().reset_profiling_all_hooks() + + if AdaptiveMemoryOpt.is_policy_stable(): + AdaptiveMemoryOpt.reset_final_rescue_hooks() + + @staticmethod + def is_policy_stable(): + # current policy run 10 more steps unchanged is a stable policy + return AdaptiveStepMgr().get_cur_step() >= AdaptMemGraphSolver().remove_swap_manager_hook_step + + @staticmethod + def reset_final_rescue_hooks(): + SwapManager().reset_oom_rescue_hooked_modules() + + @staticmethod + def reset_adapt_mem_modules(): + RecomputeHook().reset_recompute_modules() # clear recompute modules + AdaptiveMemoryProfiling().reset_profiling_all_hooks() # clear profiling all hook + AdaptiveMemoryPrefetch().reset_adaptive_prefetch_all_hooks() # clear adaptive prefetch all hook + SwapManager().reset_all_for_oom_rescue() # clear all hook and tensor in oom rescue + + def set_adapt_mem_hook(self, models): + torch.npu.synchronize() + AdaptiveMemoryProfiling().record_time() + context = AdaptiveMemoryProfiling().context + # reset auto_function list + if not AdaptiveMemoryPrefetch().is_stable_apply: + AdaptiveMemoryPrefetch().function_swap_profiling_deep = 0 + AdaptiveMemoryPrefetch().prefetch_function_list = [] + AdaptiveMemoryPrefetch().prefetch_module_dict.clear() + + if AdaptiveStepMgr().is_recompute_profiling_step(): + if AdaptiveStepMgr().is_last_recompute_profiling_step(): + # insert function profiling to context + for ctx, child in FunctionCtxMgr().ctx_iter(): + AdaptiveMemoryProfiling().insert_func_profiling(ctx, child) + # update params when has function + if len(FunctionCtxMgr()._ctx_dict): + update_swap_profiling_step_and_deep_list() + + # clear recompute profiling hook + AdaptiveMemoryProfiling().reset_profiling_hooks() + AdaptiveMemoryPrefetch().reset_adaptive_prefetch_all_hooks() + # apply layer profiling hook for following steps + LayerProfilingHook().apply_layer_profiling_hook(AdaptiveMemoryProfiling().layer0_module) + return + + if AdaptiveStepMgr().is_layer_profiling_step(): + if AdaptiveStepMgr().is_last_layer_profiling_step(): + SwapManager().forward_time = LayerProfilingHook().get_single_layer_time() + LayerProfilingHook().reset_layer_profiling_hook() + LayerProfilingHook().forward_time_list.clear() + print_rank_0(f'forward time is {SwapManager().forward_time}') + config = AdaptiveMemoryPrefetch().config + AdaptiveMemoryPrefetch().register_recursive_apply_prefetch(config, models, context) + return + + # update swap profiling stats + if AdaptiveStepMgr().is_swap_profiling_step(): + AdaptiveMemoryPrefetch().update_ctx(models, context) + + + if AdaptiveStepMgr().is_swap_profiling_done() and not AdaptiveMemoryPrefetch().is_stable_apply: + AdaptiveMemoryPrefetch().adaptive_select_module(models, context) + if not AdaptiveMemoryPrefetch().is_stable_apply: + return + + if AdaptMemGraphSolver().need_prepare_solver: + # reduce max_device_memory and generate all policy combinations at first solver step + AdaptMemGraphSolver().reduce_device_memory(context[Key.DEVICE_MEMORY]) + AdaptMemPolicyManager().prepare_policy(context) + AdaptMemGraphSolver().prepare_solver(context) + + AdaptMemGraphSolver().check_cur_adapt_policy() + print_rank_0("==================== ADAPTIVE-MEMORY Report START====================") + adapt_policy_list = AdaptMemGraphSolver().solve_adapt_mem_policy() + print_rank_0("==================== ADAPTIVE-MEMORY Report End ====================") + if adapt_policy_list is not None: + self.reset_adapt_mem_modules() + AdaptMemApplyManager().apply_new_adapt_policy(adapt_policy_list, context, models) + print_rank_0(f"ADAPTIVE MEMORY OPTIMIZATION apply policy done") + + def hook_adapt_mem_step(self, step_func, models): + def custom_adapt_mem_step(*args, **kwargs): + try: + result = step_func(*args, **kwargs) # cur step is done after calling step_func + if AdaptMemPolicyManager().is_stable_mem_policy() or AdaptiveStepMgr().is_skipping_step(): + return result + + AdaptiveMemoryProfiling().update_whole_model_memory() + AdaptMemPolicyManager().update_hccl_memory() + self.set_adapt_mem_hook(models) + + return result + finally: + AdaptiveStepMgr().incr_step() # incr step num after step_func and adapting + + return custom_adapt_mem_step + + +def addup_allowed_mem_adapt_module(module): + AdaptiveMemoryProfiling().addup_allowed_mem_adapt_profiling_module(module) + + +def layer_beginning_callback_forward(module, *args, **kwargs): + ForwardCounter().incr_cnt() + + +def register_custom_hooks(modules): + for module in modules: + _register_one_module(module) + + +def _register_one_module(module): + allowed_list = AdaptiveMemoryProfiling().get_allowed_adapt_module() + if any(isinstance(module, a) for a in allowed_list): + module.register_forward_pre_hook(layer_beginning_callback_forward) + + for name, child in module.named_children(): + if isinstance(child, torch.nn.ModuleList): + for idx, sub_child in enumerate(child): + _register_one_module(sub_child) + else: + _register_one_module(child) + + +def cal_swap_profiling_step(num_micro_batches): + swap_depth = AdaptiveMemoryPrefetch().prefetch_deep_end - AdaptiveMemoryPrefetch().prefetch_deep_start + 1 + swap_profiling_times = 4 + swap_profiling_steps = swap_profiling_times // num_micro_batches + if swap_profiling_times % num_micro_batches != 0: + swap_profiling_steps += 1 + return swap_profiling_steps * swap_depth * AdaptiveMemoryPrefetch().each_depth_run_times + + +def cal_profiling_step(num_micro_batches): + recompute_profiling_times = 4 + min_profiling_steps = 5 + recompute_profiling_steps = recompute_profiling_times // num_micro_batches + if recompute_profiling_times % num_micro_batches != 0: + recompute_profiling_steps += 1 + return max(min_profiling_steps, recompute_profiling_steps) + + +def init_profiling_steps(): + num_micro_batches = get_num_microbatches() + # cal profiling step + recompute_profiling_steps = cal_profiling_step(num_micro_batches) + # cal swap profiling step + swap_profiling_steps = cal_swap_profiling_step(num_micro_batches) + # init step + AdaptiveStepMgr().init_steps(recompute_profiling_steps, swap_profiling_steps) + print_rank_0(f"init profiling steps, recompute:{recompute_profiling_steps}, swap:{swap_profiling_steps}") + + +def update_swap_profiling_step_and_deep_list(): + # update swap profiling step + swap_profiling_steps = cal_swap_profiling_step(get_num_microbatches()) + # update deep_list + AdaptiveMemoryPrefetch().solve_prefetch_config() + AdaptiveStepMgr().init_steps(AdaptiveStepMgr().recompute_profiling_steps, swap_profiling_steps) + print_rank_0(f"update profiling steps, recompute:{AdaptiveStepMgr().recompute_profiling_steps}, swap:{swap_profiling_steps}, " + f"prefetch_deep_list:{AdaptiveMemoryPrefetch().prefetch_deep_list}, prefetch_hook_interval:{AdaptiveMemoryPrefetch().prefetch_hook_interval}") + + +def setup_adapt_memory_optimizer_wrapper(setup_model_and_optimizer): + @wraps(setup_model_and_optimizer) + def wrapper(*args, **kwargs): + models, optimizer, opt_param_scheduler = setup_model_and_optimizer(*args, **kwargs) + + optimizer.step = AdaptiveMemoryOpt().hook_adapt_mem_step(optimizer.step, models) + AdaptiveMemoryProfiling().construct_and_register_profiling_hooks(models) + + init_profiling_steps() + register_custom_hooks(models) + + AdaptiveMemoryPrefetch().solve_prefetch_config() + # 绑核 + if "910B" in acl.get_soc_name() or "910A" in acl.get_soc_name(): + bind_cpus(torch.cuda.device_count(), torch.cuda.current_device(), 0) + # 加载历史策略 + PolicyCacheManager().load_cache_file() + + return models, optimizer, opt_param_scheduler + + return wrapper diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_policy.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_policy.py new file mode 100644 index 000000000..8fc3a4ad1 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_policy.py @@ -0,0 +1,185 @@ +import sys +from copy import deepcopy + +import acl +import torch +from megatron.training import print_rank_0 + +from .adaptive_memory_cache import AdaptiveLayerMemPolicy +from .adaptive_memory_prefetch import AdaptiveMemoryPrefetch +from .adaptive_memory_tool import AdaptiveStepMgr, SingletonBase, ModuleAction, LayerAction, ContextKey as Key + + +class AdaptMemPolicyManager(metaclass=SingletonBase): + + def __init__(self): + self.hccl_memory = 0 + + # policy combinations + self.policy_combinations = [] + self.without_adapt_mem = 0.0 + self.full_recompute_comb = None + self.full_swap_comb = None + self.without_adaptive_comb = None + # solve policy + self.adapt_modules_num = 0 + self.total_adapt_memory = 0.0 + self.module_layers_name = [] + # adaptive prefetch + self.prefetch_parents_comb = [] + self.memory_interval = 1 + + def prepare_policy(self, model_context): + self.traversal_model_context(model_context) + for comb in self.policy_combinations: + comb.memory = comb.memory + self.without_adapt_mem + # select policy that contains prefetch_parents_comb + self.select_policy_with_prefetch_parents_comb() + + def traversal_model_context(self, context): + for layer_context in context.get(Key.SUBMODULES, []): + # 统计一下做自适应的总动态内存 + if Key.IS_ADAPT_LAYER in layer_context and Key.MEMORY in context: + self.total_adapt_memory += context[Key.MEMORY] + if Key.ALLOWED_ADAPT in layer_context and Key.MEMORY in layer_context: + self.generate_full_combinations(layer_context, self.policy_combinations, "", 0, False) + return + else: + self.traversal_model_context(layer_context) + + def generate_full_combinations(self, ctx, pre_policy_comb, pre_allow_adapt_n, idx, without_layer): + new_policy_comb = [] + cycle_policy_comb = pre_policy_comb.copy() + if pre_allow_adapt_n: + ctx[Key.PREFIX_NAME] = remove_content_before_target(ctx[Key.PREFIX_NAME], pre_allow_adapt_n) + # root layers + if idx == 0: + self.build_initial_combinations(ctx) + pre_allow_adapt_n = ctx[Key.PREFIX_NAME] + '.' + self.prefetch_parents_comb = self.generate_prefetch_policy_combinations(pre_allow_adapt_n) + elif Key.MEMORY in ctx: + self.adapt_modules_num += 1 + self.module_layers_name.append(ctx[Key.PREFIX_NAME] + "." + ctx[Key.NAME]) + new_policy_comb.extend(self.build_combinations(ctx, pre_policy_comb, ModuleAction.SWAP, without_layer)) + if ctx[Key.MEMORY] > ctx[Key.INPUT] + ctx[Key.OUTPUT] + self.memory_interval and not ctx[Key.IS_SWAP]: + new_policy_comb.extend(self.build_combinations(ctx, pre_policy_comb, ModuleAction.RECOMPUTE, without_layer)) + if check_all_sub_same_mem(ctx): + same_mep_comb = pre_policy_comb.copy() + for sub in ctx.get(Key.SUBMODULES, []): + same_mep_comb = self.generate_full_combinations(sub, same_mep_comb, pre_allow_adapt_n, idx + 1, True) + new_policy_comb.extend(same_mep_comb) + cycle_policy_comb.extend(same_mep_comb) + else: + for sub in ctx.get(Key.SUBMODULES, []): + tmp_combs = self.generate_full_combinations(sub, cycle_policy_comb, pre_allow_adapt_n, idx + 1, False) + cycle_policy_comb.extend(tmp_combs) + new_policy_comb.extend(tmp_combs) + return new_policy_comb + + def build_initial_combinations(self, context): + self.without_adapt_mem = context[Key.MEMORY] + self.full_recompute_comb = AdaptiveLayerMemPolicy(recompute=[context[Key.NAME]], swap=[], + memory=context[Key.INPUT] + context[Key.OUTPUT] - self.without_adapt_mem, + time=context[Key.AVG_TIME], + adapt_type=LayerAction.FULL_RECOMPUTE) + self.full_swap_comb = AdaptiveLayerMemPolicy(recompute=[], swap=[context[Key.NAME]], + memory=-context[Key.MODULE_SWAP_AVG_MEMORY], + time=context[Key.MODULE_SWAP_AVG_TIME], adapt_type=LayerAction.FULL_SWAP) + self.without_adaptive_comb = AdaptiveLayerMemPolicy(recompute=[], swap=[], + memory=0, time=0, + adapt_type=LayerAction.NONE) + self.policy_combinations.append(self.full_recompute_comb) + self.policy_combinations.append(self.without_adaptive_comb) + + def generate_prefetch_policy_combinations(self, pre_allow_adapt_n): + prefetch_policy = AdaptiveLayerMemPolicy(time=0) + for module_name in AdaptiveMemoryPrefetch().need_swap_module_name: + suffix_name = remove_content_before_target(module_name, pre_allow_adapt_n) + prefetch_policy.swap.append(suffix_name) + return prefetch_policy + + + def build_combinations(self, context, pre_policy_combs, adapter_tag, without_cur_layer): + new_policy_combs = [] + cur_policy_combs = pre_policy_combs.copy() + for policy_comb in cur_policy_combs: + new_policy_combs.append(self.build_one_combination(context, policy_comb, adapter_tag)) + if without_cur_layer: + return new_policy_combs + single_policy_comb = self.build_one_combination(context, AdaptiveLayerMemPolicy(time=0), adapter_tag) + new_policy_combs.append(single_policy_comb) + return new_policy_combs + + def build_one_combination(self, context, pre_policy_comb, adapter_tag): + layer_name = context[Key.PREFIX_NAME] + '.' + context[Key.NAME] + layer_list = pre_policy_comb.get_modules_by_tag(adapter_tag).copy() + policy_comb = AdaptiveLayerMemPolicy() + layer_list.append(layer_name) + if ModuleAction.RECOMPUTE == adapter_tag: + policy_comb.swap = pre_policy_comb.swap.copy() + policy_comb.recompute = layer_list + policy_comb.memory = pre_policy_comb.memory - context[Key.MEMORY] + context[Key.INPUT] + context[Key.OUTPUT] + policy_comb.time = pre_policy_comb.time + context[Key.AVG_TIME] + if ModuleAction.SWAP == adapter_tag: + policy_comb.recompute = pre_policy_comb.recompute.copy() + policy_comb.swap = layer_list + # if the module has swap information + if Key.MODULE_SWAP_AVG_MEMORY in context: + policy_comb.memory = pre_policy_comb.memory - context[Key.MODULE_SWAP_AVG_MEMORY] + if context[Key.IS_SWAP]: + # if swap doesn't waste time + policy_comb.time = pre_policy_comb.time + else: + policy_comb.time = pre_policy_comb.time + context[Key.MODULE_SWAP_AVG_TIME] + else: + policy_comb.memory = pre_policy_comb.memory + policy_comb.time = pre_policy_comb.time + self.policy_combinations.append(policy_comb) + return policy_comb + + def select_policy_with_prefetch_parents_comb(self): + new_policy_comb = [] + for policy_comb in self.policy_combinations: + if policy_comb.adapt_type != LayerAction.ADAPTIVE: + new_policy_comb.append(policy_comb) + elif self.is_contained_prefetch_parents_comb(self.prefetch_parents_comb.swap, policy_comb.swap): + new_policy_comb.append(policy_comb) + self.policy_combinations = new_policy_comb + + def is_contained_prefetch_parents_comb(self, prefetch_parents_list, swap_list): + prefetch_parents_list_copy = prefetch_parents_list.copy() + prefetch_parents_list_copy.sort() + swap_list_copy = swap_list.copy() + swap_list_copy.sort() + return prefetch_parents_list_copy == swap_list_copy + + + def update_hccl_memory(self): + free, all_memory, _ = acl.rt.get_mem_info(1) + cur_hccl_memory = (all_memory - free - torch.npu.memory_reserved()) / 1024 / 1024 + self.hccl_memory = max(cur_hccl_memory, self.hccl_memory) + + def is_stable_mem_policy(self): + if not AdaptiveStepMgr().is_all_profiling_done(): + return False + if not AdaptiveMemoryPrefetch().is_stable_apply: + return False + from .adaptive_memory_solver import AdaptMemGraphSolver + if not AdaptMemGraphSolver().is_stable_policy(): + return False + return True + + +def remove_content_before_target(path: str, prefix: str): + if path.startswith(prefix): + return path[len(prefix):] + else: + return path + + +def check_all_sub_same_mem(context): + submodules = [child for child in context.get(Key.SUBMODULES, []) if Key.MEMORY in child] + for i in range(len(submodules) - 1): + if submodules[i][Key.MEMORY] != submodules[i + 1][Key.MEMORY]: + return False + return True diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_prefetch.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_prefetch.py new file mode 100644 index 000000000..f07c6846b --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_prefetch.py @@ -0,0 +1,438 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. +import re +import torch +from megatron.training import print_rank_0, get_args +from .adaptive_memory_tool import SingletonBase, FuncLocationMgr, broadcast_obj +from .adaptive_memory_tool import AdaptiveStepMgr, ContextKey as Key +from .adaptive_memory_swap_manager import SwapManager, transformer_layer_register_post_forward_hook, \ + transformer_layer_register_pre_backward_hook, LayerProfilingHook + + +class AdaptiveMemoryPrefetch(metaclass=SingletonBase): + def __init__(self): + self.modules_hooks = [] + self.is_stable_apply = False + self.is_first_select_module = False + self.config = { + "pre_layer_full_name": "", + "cur_layer_name": "module", + } + self.chunk_num = 0 + self.forward_time = 0 + self.swap_time = 0 + self.not_need_swap_module = [] + self.need_swap_module_full_name = [] + self.need_swap_module_name = [] + self.need_swap_module_ctx = [] + self.prefetch_module_dict = {} + self.abnormal_scenario_module_list = ["input_norm", "self_attention", "post_attention_norm"] + # 统计数据 + self.prefetch_hook_interval = None + self.prefetch_deep_list = [] + self.prefetch_deep_start = 0 + self.prefetch_deep_end = 0 + self.each_depth_run_times = 2 + self.layer_list = [] + self.swap_event_dict = {} + self.swap_memory_in_module_dict = {} + self.prefetch_module_event_dict = {} + # auto_function + self.function_swap_profiling_deep = 0 + self.function_list = [] + self.prefetch_function_list = [] + + def reset_prefetch_hooks(self): + SwapManager().reset_prefetch_hooked_modules() + + def reset_module_hooks(self): + for hook_handle in self.modules_hooks: + hook_handle.remove() + self.modules_hooks.clear() + + def reset_adaptive_prefetch_all_hooks(self): + self.reset_prefetch_hooks() + self.reset_module_hooks() + SwapManager().reset_post_layer_forward_and_pre_layer_backward_hooks() + LayerProfilingHook().reset_layer_profiling_hook() + + def set_forward_time(self): + self.forward_time = SwapManager().forward_time + + def _get_list_layers_context(self, ctx, idx): + current_ctx = {} + for k, v in ctx.items(): + if k == Key.SUBMODULES: + current_ctx[k] = [v[idx]] + continue + current_ctx[k] = v + return current_ctx + + def is_parent_module(self, key, keys): + if self.need_swap_module_name[key][-1] not in keys: + return True + else: + if not self.need_swap_module_name[self.need_swap_module_name[key][-1]][0]: + return True + else: + return False + + # get prefetch config + def solve_prefetch_config(self): + self.prefetch_deep_list = [num for num in range(self.prefetch_deep_start, self.prefetch_deep_end + 1) for _ in range(self.each_depth_run_times)] + self.prefetch_hook_interval = len(self.prefetch_deep_list) + self.set_chunk_num() + + def set_chunk_num(self): + all_args = get_args() + pp_size = all_args.pipeline_model_parallel_size or 1 + vpp_size = all_args.virtual_pipeline_model_parallel_size or 1 + num_prefetch = all_args.num_layers // pp_size + self.layer_list = [str(num) for num in range(0, num_prefetch)] + if vpp_size > 1: + if vpp_size <= num_prefetch: + self.chunk_num = vpp_size + else: + self.chunk_num = num_prefetch + else: + self.chunk_num = 1 + + def get_deep_index(self): + step = AdaptiveStepMgr().skip_steps + AdaptiveStepMgr().recompute_profiling_steps + return (AdaptiveStepMgr().get_cur_step() - step) % self.prefetch_hook_interval + + # profiling for layer0 + def prefetch_profiling_register(self, ctx, models, cur_layer_full_name): + if self.prefetch_deep_list[self.get_deep_index()] == ctx[Key.DEEP] and ctx.get(Key.IS_MODLUE_OF_LAYER0, False): + prefetch_register_forward_hook_for_recording_time(models, cur_layer_full_name) + prefetch_register_pre_forward_hook(models, cur_layer_full_name) + # register pack/unpack + print_rank_0(f"cur_step()={AdaptiveStepMgr().get_cur_step()}, is_recording=True, prefetch swap hook success: {cur_layer_full_name}") + SwapManager().hook_prefetch_forward(models, cur_layer_full_name) + + if ctx.get(Key.IS_LAYER0_OF_MODULE0, False): + print_rank_0(f"cur_step()={AdaptiveStepMgr().get_cur_step()}, is_recording=True, prefetch forward and backward hook success: {cur_layer_full_name}") + prefetch_register_pre_forward_hook(models, cur_layer_full_name, True) + transformer_layer_register_post_forward_hook(models, True) + transformer_layer_register_pre_backward_hook(models) + + def prefetch_profiling_register_for_function(self, ctx, cur_layer_full_name): + if self.prefetch_deep_list[self.get_deep_index()] == ctx[Key.DEEP]: + self.function_swap_profiling_deep = ctx[Key.DEEP] + print_rank_0(f"cur_step()={AdaptiveStepMgr().get_cur_step()}, {self.function_swap_profiling_deep=}, is_recording=True, prefetch swap hook success: {cur_layer_full_name}") + + def prefetch_register(self, ctx, models, cur_layer_full_name): + if ctx.get(Key.IS_LAYER0_OF_MODULE0, False): + print_rank_0(f"is_recording=False, prefetch forward and backward hook success: cur_step()={AdaptiveStepMgr().get_cur_step()}, {cur_layer_full_name}") + transformer_layer_register_post_forward_hook(models) + transformer_layer_register_pre_backward_hook(models) + from .adaptive_memory_profiling import AdaptiveMemoryProfiling + LayerProfilingHook().apply_layer_profiling_hook(models) + if cur_layer_full_name in self.need_swap_module_name: + print_rank_0(f"is_recording=False, prefetch swap hook success: cur_step()={AdaptiveStepMgr().get_cur_step()}, {cur_layer_full_name}") + SwapManager().hook_prefetch_forward(models, cur_layer_full_name) + ctx[Key.IS_SWAP] = True + elif Key.AVG_TIME in ctx and Key.IS_MODLUE_OF_LAYER0 in ctx: + ctx[Key.IS_SWAP] = False + + def prefetch_register_for_function(self, ctx, cur_layer_full_name): + if cur_layer_full_name in self.need_swap_module_name: + if ctx[Key.NAME] not in self.prefetch_function_list: + print_rank_0(f"is_recording=False, prefetch swap hook success: cur_step()={AdaptiveStepMgr().get_cur_step()}, {cur_layer_full_name}") + self.prefetch_function_list.append(ctx[Key.NAME]) + ctx[Key.IS_SWAP] = True + else: + ctx[Key.IS_SWAP] = False + + + def register_recursive_apply_prefetch(self, config, models, ctx, is_prefetch_prof=True): + pre_layer_full_name = config["pre_layer_full_name"] + cur_layer_name = config["cur_layer_name"] + if cur_layer_name == Key.MODULE and isinstance(models, list): + idx = 0 + for model in models: + if idx < self.chunk_num: + self.register_recursive_apply_prefetch(config, model, self._get_list_layers_context(ctx, idx), is_prefetch_prof) + idx += 1 + return + + # deal auto_function + if ctx.get(Key.IS_FUNCTION, False): + cur_layer_full_name = pre_layer_full_name + "." + ctx[Key.NAME] + if is_prefetch_prof: + # function profiling + self.prefetch_profiling_register_for_function(ctx, cur_layer_full_name) + else: + # function prefetch + self.prefetch_register_for_function(ctx, cur_layer_full_name) + + config = { + "pre_layer_full_name": cur_layer_full_name, + "cur_layer_name": cur_layer_name, + } + self.register_recursive_apply_prefetch(config, models, ctx[Key.SUBMODULES][0], is_prefetch_prof) + return + cur_layer_full_name = pre_layer_full_name + '.' + cur_layer_name + + if is_prefetch_prof: + self.prefetch_profiling_register(ctx, models, cur_layer_full_name) + else: + self.prefetch_register(ctx, models, cur_layer_full_name) + + pre_layer_full_name = ctx[Key.PREFIX_NAME] + "." + ctx[Key.NAME] + idx = 0 + for name, module in models.named_children(): + config = { + "pre_layer_full_name": pre_layer_full_name, + "cur_layer_name": name, + } + self.register_recursive_apply_prefetch(config, module, ctx[Key.SUBMODULES][idx], is_prefetch_prof) + idx += 1 + + def _get_swappable_child_ctx(self, module_ctx): + res_ctxs, res_names = [], [] + for child_ctx in module_ctx.get(Key.SUBMODULES, []): + if Key.AVG_TIME in child_ctx: + res_ctxs.append(child_ctx) + res_names.append(child_ctx[Key.PREFIX_NAME] + '.' + child_ctx[Key.NAME]) + else: + sub_res_ctxs, sub_res_names = self._get_swappable_child_ctx(child_ctx) + res_ctxs.extend(sub_res_ctxs) + res_names.extend(sub_res_names) + return res_ctxs, res_names + + def adjust_need_swap_module(self): + if len(self.need_swap_module_name) > 0: + last_module_ctx = self.need_swap_module_ctx.pop() + self.need_swap_module_name.pop() + child_module_ctxs, child_module_names = self._get_swappable_child_ctx(last_module_ctx) + self.need_swap_module_ctx.extend(child_module_ctxs) + self.need_swap_module_name.extend(child_module_names) + + def is_no_module_to_swap(self): + return len(self.need_swap_module_name) == 0 + + def record_prefetch_time(self, context): + if len(list(self.prefetch_module_event_dict.keys())) == 0: + return + first_key = list(self.prefetch_module_event_dict.keys())[0] + if Key.PREFIX_NAME in context and Key.NAME in context and first_key == context[Key.PREFIX_NAME] + "." + context[Key.NAME]: + cur_event_list = self.prefetch_module_event_dict.pop(first_key) + for event_list in cur_event_list: + start, end = event_list[0], event_list[1] + cur_time = start.elapsed_time(end) + if Key.MODULE_FORWARD_TOTAL_TIME in context: + context[Key.MODULE_FORWARD_CNT] += 1 + context[Key.MODULE_FORWARD_TOTAL_TIME] += cur_time + context[Key.MODULE_FORWARD_AVG_TIME] = context[Key.MODULE_FORWARD_TOTAL_TIME] / context[Key.MODULE_FORWARD_CNT] + else: + context[Key.MODULE_FORWARD_CNT] = 1 + context[Key.MODULE_FORWARD_TOTAL_TIME] = cur_time + context[Key.MODULE_FORWARD_AVG_TIME] = cur_time + if Key.SUBMODULES not in context: + return + for submodule in context[Key.SUBMODULES]: + self.record_prefetch_time(submodule) + + def record_swap_time(self, context): + if len(list(self.swap_event_dict.keys())) == 0: + return + first_key = list(self.swap_event_dict.keys())[0] + if Key.PREFIX_NAME in context and Key.NAME in context and first_key == context[Key.PREFIX_NAME] + "." + context[Key.NAME]: + cur_event_list = self.swap_event_dict.pop(first_key) + for event_list in cur_event_list: + start, end = event_list[0], event_list[1] + cur_time = start.elapsed_time(end) + if Key.MODULE_SWAP_TOTAL_TIME in context: + context[Key.MODULE_SWAP_CNT] += 1 + context[Key.MODULE_SWAP_TOTAL_TIME] += cur_time + context[Key.MODULE_SWAP_AVG_TIME] = context[Key.MODULE_SWAP_TOTAL_TIME] / context[Key.MODULE_SWAP_CNT] + else: + context[Key.MODULE_SWAP_CNT] = 1 + context[Key.MODULE_SWAP_TOTAL_TIME] = cur_time + context[Key.MODULE_SWAP_AVG_TIME] = cur_time + if Key.SUBMODULES not in context: + return + for submodule in context[Key.SUBMODULES]: + self.record_swap_time(submodule) + + def record_swap_memory(self, context): + if len(list(self.swap_memory_in_module_dict.keys())) == 0: + return + first_key = list(self.swap_memory_in_module_dict.keys())[0] + if Key.PREFIX_NAME in context and Key.NAME in context and first_key == context[Key.PREFIX_NAME] + "." + context[Key.NAME]: + memory = self.swap_memory_in_module_dict.pop(first_key) + if Key.MODULE_SWAP_TOTAL_MEMORY in context: + context[Key.MODULE_SWAP_TOTAL_MEMORY] += memory + context[Key.MODULE_SWAP_AVG_MEMORY] = context[Key.MODULE_SWAP_TOTAL_MEMORY] / context[Key.MODULE_SWAP_CNT] + else: + context[Key.MODULE_SWAP_TOTAL_MEMORY] = memory + context[Key.MODULE_SWAP_AVG_MEMORY] = context[Key.MODULE_SWAP_TOTAL_MEMORY] / context[Key.MODULE_SWAP_CNT] + if Key.SUBMODULES not in context: + return + for submodule in context[Key.SUBMODULES]: + self.record_swap_memory(submodule) + + def deal_not_need_swap_module(self, context): + + if context.get(Key.IS_MODLUE_OF_LAYER0, False) and Key.IS_SWAP not in context: + context[Key.IS_SWAP] = False + + if Key.IS_SWAP in context and not context[Key.IS_SWAP]: + self.not_need_swap_module.append(context[Key.PREFIX_NAME] + "." + context[Key.NAME]) + + if Key.SUBMODULES not in context: + return + + for submodule in context[Key.SUBMODULES]: + self.deal_not_need_swap_module(submodule) + + def clear_dict(self): + self.prefetch_module_event_dict.clear() + self.swap_event_dict.clear() + self.swap_memory_in_module_dict.clear() + + def update_ctx(self, models, context): + if self.get_deep_index() % self.each_depth_run_times == 0: + self.record_prefetch_time(context) + self.record_swap_time(context) + self.record_swap_memory(context) + # 清除所有钩子 + self.reset_adaptive_prefetch_all_hooks() + # 重新挂hook + if not AdaptiveStepMgr().is_swap_profiling_done(): + self.register_recursive_apply_prefetch(self.config, models, context) + # 清空dict + self.clear_dict() + + def init_swap_modules(self, context): + if Key.IS_LAYER0_OF_MODULE0 in context: + for child_ctx in context[Key.SUBMODULES]: + if Key.AVG_TIME in child_ctx: + self.need_swap_module_name.append(child_ctx[Key.PREFIX_NAME] + '.' + child_ctx[Key.NAME]) + self.need_swap_module_ctx.append(child_ctx) + return + for child_ctx in context.get(Key.SUBMODULES, []): + self.init_swap_modules(child_ctx) + + def adaptive_select_module(self, models, context): + if len(self.need_swap_module_name) == 0: + # 估计需要swap的module + self.set_forward_time() + self.init_swap_modules(context) + self.need_swap_module_name = broadcast_obj(self.need_swap_module_name) + + if self.is_first_select_module and SwapManager().is_need_adjust_module(): + # 微调swap module + print_rank_0(f"start adjust swap module, forward time is {LayerProfilingHook().get_single_layer_time()}") + self.adjust_need_swap_module() + if self.is_no_module_to_swap(): + # 处理异常场景 + self.is_stable_apply = True + elif self.is_first_select_module and not SwapManager().is_need_adjust_module(): + print_rank_0(f"swap is stable, step={AdaptiveStepMgr().get_cur_step()}, " + f"forward time is {LayerProfilingHook().get_single_layer_time()}") + self.is_stable_apply = True + + self.is_first_select_module = True + # 移除preftech的所有hook + self.reset_adaptive_prefetch_all_hooks() + # 重新挂preftech的钩子 + self.register_recursive_apply_prefetch(self.config, models, context, False) + # 清空 + self.clear_dict() + LayerProfilingHook().forward_time_list.clear() + + def sync_d2h_for_recording_time(self, module_name, is_function=False): + # 每个module前向结束后插入end_event + module_forward_end_event = torch.npu.Event(enable_timing=True) + module_forward_end_event.record() + self.prefetch_module_event_dict[module_name][-1].append(module_forward_end_event) + + torch.cuda.current_stream().wait_stream(SwapManager().prefetch_stream) + end_pack_event = None + if AdaptiveStepMgr().is_swap_profiling_step(): + end_pack_event = torch.npu.Event(enable_timing=True) + end_pack_event.record() + + for swap_tensor in SwapManager().swap_tensor_in_module: + # 更新每个tensor的pack_module_name + swap_tensor.pack_module_name = SwapManager().swap_tensors[-1].layer_name + if swap_tensor is SwapManager().swap_tensor_in_module[0]: + swap_tensor.first_tensor = True + swap_tensor.end_pack_event = end_pack_event + + # record swap info + for swap_tensor in SwapManager().swap_tensor_in_module: + # cal tensor memory (MB) + tensor_memory = (swap_tensor.tensor.numel() * swap_tensor.tensor.element_size()) / (1024 * 1024) + + if swap_tensor.pack_module_name == module_name: + self.recording_swap_momery_in_module(swap_tensor, swap_tensor.pack_module_name, tensor_memory) + self.recording_swap_time_in_module(swap_tensor, swap_tensor.pack_module_name, is_function) + else: + self.recording_swap_momery_in_module(swap_tensor, module_name, tensor_memory) + self.recording_swap_time_in_module(swap_tensor, module_name, is_function) + + # reset swap_tensor_in_module + SwapManager().swap_tensor_in_module = [] + + def is_module_in_need_swap_module_name(self, module_name): + if module_name in self.need_swap_module_name: + return module_name + return None + + # Records the memory swapped to the cpu in module + def recording_swap_momery_in_module(self, swap_tensor, key, tensor_memory): + has_key = key in AdaptiveMemoryPrefetch().swap_memory_in_module_dict.keys() + if not has_key: + AdaptiveMemoryPrefetch().swap_memory_in_module_dict[key] = tensor_memory + else: + if not swap_tensor.is_slice_tensor: + AdaptiveMemoryPrefetch().swap_memory_in_module_dict[key] += tensor_memory + + # Records the time swapped to the cpu in module + def recording_swap_time_in_module(self, swap_tensor, key, is_function): + has_key = key in AdaptiveMemoryPrefetch().swap_event_dict.keys() + if not has_key and swap_tensor.first_tensor: + AdaptiveMemoryPrefetch().swap_event_dict[key] = [[swap_tensor.start_pack_event, swap_tensor.end_pack_event]] + elif has_key and swap_tensor.first_tensor: + if is_function: + AdaptiveMemoryPrefetch().swap_event_dict[key].append([swap_tensor.start_pack_event, swap_tensor.end_pack_event]) + else: + AdaptiveMemoryPrefetch().swap_event_dict[swap_tensor.pack_module_name].append([swap_tensor.start_pack_event, swap_tensor.end_pack_event]) + + +def forward_post_hook_func_for_recording_time(module_name): + def custom_func(module, *args, **kargs): + AdaptiveMemoryPrefetch().sync_d2h_for_recording_time(module_name) + + return custom_func + + +def pre_forward_func(module_name, is_mark_first_layer): + if is_mark_first_layer: + FuncLocationMgr().is_first_layer = True + else: + module_forward_start_event = torch.npu.Event(enable_timing=True) + if module_name not in AdaptiveMemoryPrefetch().prefetch_module_event_dict.keys(): + AdaptiveMemoryPrefetch().prefetch_module_event_dict[module_name] = [[module_forward_start_event]] + else: + AdaptiveMemoryPrefetch().prefetch_module_event_dict[module_name].append([module_forward_start_event]) + module_forward_start_event.record() + + +def pre_forward_hook_func(module_name, is_mark_first_layer): + def custom_func(module, *args, **kargs): + pre_forward_func(module_name, is_mark_first_layer) + + return custom_func + + +def prefetch_register_forward_hook_for_recording_time(module, name): + post_hook = module.register_forward_hook(forward_post_hook_func_for_recording_time(name)) + AdaptiveMemoryPrefetch().modules_hooks.append(post_hook) + + +def prefetch_register_pre_forward_hook(module, name, is_mark_first_layer=False): + pre_hook = module.register_forward_pre_hook(pre_forward_hook_func(name, is_mark_first_layer)) + AdaptiveMemoryPrefetch().modules_hooks.append(pre_hook) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_profiling.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_profiling.py new file mode 100644 index 000000000..1299641c4 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_profiling.py @@ -0,0 +1,327 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + +from collections.abc import Iterable +from copy import deepcopy + +import re +import acl +import torch +import torch.nn + +from megatron.training import print_rank_0, get_args +from megatron.core import tensor_parallel + +from .adaptive_memory_tool import AdaptiveStepMgr, BYTES_PER_MB, SingletonBase, ContextKey as Key +from .adaptive_memory_tool import FuncLocationMgr +from .adaptive_memory_prefetch import AdaptiveMemoryPrefetch + + +class RecomputeHook(metaclass=SingletonBase): + def __init__(self): + self.recompute_modules = [] + + @staticmethod + def hook_checkpoint_forward(forward_func): + def custom_forward(*args, **kwargs): + def inside_forward(*new_args): + origin_args = new_args[:len(args)] + origin_kwargs = dict(zip(kwargs.keys(), new_args[len(args):])) + return forward_func(*origin_args, **origin_kwargs) + new_args = args + tuple(kwargs.values()) + return tensor_parallel.checkpoint(inside_forward, False, *new_args) + return custom_forward + + def reset_recompute_modules(self): + for m in self.recompute_modules: + m.forward = m.no_checkpoint_adaptive_recompute_forward + self.recompute_modules.clear() + + +class AdaptiveMemoryProfiling(metaclass=SingletonBase): + + def __init__(self): + # saved module data and structure + self.context = {'name': 'root', 'deep': 0, 'prefix_name': '', 'submodules': []} + # save modules hook + self.profiling_hooks = [] + # record allowed memory adaptation module + self.allowed_adapt_module = [] + # time events, used to calculate time + self.time_event_list = [] + # save origin modules + self.checkpointed_modules = [] + self.layer0_module = None + self.layer0_ctx = None + + def addup_allowed_mem_adapt_profiling_module(self, module): + if not issubclass(module, torch.nn.Module): + raise TypeError("Allowed adapt module must be subclass of torch.nn.Module") + self.allowed_adapt_module.append(module) + + @staticmethod + def _tag_module(ctx, current_ctx, current_is_adapt_module, upper_is_nn_module_list): + if current_is_adapt_module: + current_ctx[Key.ALLOWED_ADAPT] = True + if upper_is_nn_module_list: + ctx[Key.IS_MODULE_LIST] = True + ctx[Key.IS_ADAPT_LAYER] = True + else: + current_ctx[Key.IS_ADAPT_LAYER] = True + + return False + + return True + + def record_time(self): + while self.time_event_list: + self._record_submodule_forward_time(self.context) + + def update_whole_model_memory(self): + _, all_memory, _ = acl.rt.get_mem_info(1) + self.context[Key.USED_MEM] = torch.npu.memory_allocated() / BYTES_PER_MB + self.context[Key.DEVICE_MEMORY] = all_memory / BYTES_PER_MB + + def reset_profiling_all_hooks(self): + self.reset_profiling_hooks() + self.reset_profiling_recompute_hook() + + def reset_profiling_hooks(self): + for ph in self.profiling_hooks: + ph.remove() + self.profiling_hooks.clear() + + def reset_profiling_recompute_hook(self): + for m in self.checkpointed_modules: + m.forward = m.no_checkpoint_forward + self.checkpointed_modules.clear() + + def insert_func_profiling(self, ctx, child_name): + self._find_adapt_layer(self.context, ctx, child_name) + + def _find_adapt_layer(self, ctx, new_ctx, child): + if ctx.get(Key.ALLOWED_ADAPT, False): + self._insert_ctx(ctx, new_ctx, child) + return + for sub in ctx.get(Key.SUBMODULES, []): + self._find_adapt_layer(sub, new_ctx, child) + + @staticmethod + def _is_parent_child_relation(parent_ctx, child_ctx): + if parent_ctx[Key.DEEP] + 1 != child_ctx[Key.DEEP]: + return False + + part1 = f"{parent_ctx[Key.PREFIX_NAME]}.{parent_ctx[Key.NAME]}".split(".") + part2 = child_ctx[Key.PREFIX_NAME].split(".") + if len(part1) != len(part2): + return False + + # compare ctx parent cross chunks and layers, the prefix differ only with the index in torch.nn.ModuleList + def compare(p1, p2): + return re.sub(r'\d+$', '#', p1) == re.sub(r'\d+$', '#', p2) + + return all(compare(x, y) for x, y in zip(part1, part2)) + + @staticmethod + def _clone_to_insert_ctx(parent_ctx, new_ctx): + cur_prefix_name = f"{parent_ctx[Key.PREFIX_NAME]}.{parent_ctx[Key.NAME]}" + to_insert_ctx = deepcopy(new_ctx) + if to_insert_ctx[Key.PREFIX_NAME] != cur_prefix_name: + to_insert_ctx[Key.PREFIX_NAME] = cur_prefix_name + del to_insert_ctx[Key.INPUT] + del to_insert_ctx[Key.MEMORY] + del to_insert_ctx[Key.PRE_TOTAL_TIME] + del to_insert_ctx[Key.OUTPUT] + del to_insert_ctx[Key.FORWARD_CNT] + del to_insert_ctx[Key.AVG_TIME] + del to_insert_ctx[Key.IS_MODLUE_OF_LAYER0] + return to_insert_ctx + + def _insert_ctx(self, ctx, new_ctx, child_name): + if self._is_parent_child_relation(ctx, new_ctx): + to_insert_ctx = self._clone_to_insert_ctx(ctx, new_ctx) + if child_name: + idx = next(idx for idx, tmp in enumerate(ctx[Key.SUBMODULES]) if tmp[Key.NAME] == child_name) + child_ctx = ctx[Key.SUBMODULES][idx] + self._update_children_ctx(child_ctx, to_insert_ctx[Key.PREFIX_NAME], to_insert_ctx[Key.NAME]) + to_insert_ctx[Key.SUBMODULES] = [child_ctx] + ctx[Key.SUBMODULES][idx] = to_insert_ctx + else: + siblings = ctx.get(Key.SUBMODULES, []) + siblings.append(to_insert_ctx) + ctx[Key.SUBMODULES] = siblings + return True + + for sub in ctx.get(Key.SUBMODULES, []): + if self._insert_ctx(sub, new_ctx, child_name): + return True + return False + + def _update_children_ctx(self, ctx, parent, func_name): + old_prefix_name = ctx[Key.PREFIX_NAME] + new_prefix_name = old_prefix_name[0:len(parent)] + "." + func_name + old_prefix_name[len(parent):] + ctx[Key.PREFIX_NAME] = new_prefix_name + ctx[Key.DEEP] += 1 + AdaptiveMemoryPrefetch().prefetch_deep_end = max(AdaptiveMemoryPrefetch().prefetch_deep_end, ctx[Key.DEEP]) + + for sub in ctx.get(Key.SUBMODULES, []): + self._update_children_ctx(sub, parent, func_name) + + def get_allowed_adapt_module(self): + return self.allowed_adapt_module + + def is_layer0(self, ctx): + if ctx[Key.NAME] == "0" and "expert" not in ctx[Key.PREFIX_NAME]: + return True + return False + + def forward_pre_hook(self, prefix, name, ctx): + """ Hook, which will be registered before the FWD to add context parameters and add timer start event """ + def hook(module, *args, **kwargs): + FuncLocationMgr().push_name(prefix, name) + if Key.IS_LAYER0_OF_MODULE0 in ctx: + FuncLocationMgr().is_first_layer = True + + if AdaptiveStepMgr().is_skipping_step(): + return + + if AdaptiveStepMgr().is_last_recompute_profiling_step(): + ctx[Key.INPUT] = self.cal_input_output_size(args) / BYTES_PER_MB + mem_alloc = torch.npu.memory_allocated() + ctx[Key.MEMORY] = mem_alloc / BYTES_PER_MB - ctx[Key.INPUT] + else: + # 通过Key.MEMORY来判断此module是否被执行 + ctx[Key.INPUT] = 0 + ctx[Key.MEMORY] = 0 + + + if AdaptiveStepMgr().is_recompute_profiling_step() and not AdaptiveStepMgr().is_last_recompute_profiling_step(): + start_event = torch.npu.Event(enable_timing=True) + self.time_event_list.append([start_event]) + start_event.record() + + return hook + + def forward_post_hook(self, prefix, name, ctx): + """ Hook, which will be registered in the FWD to calculate context parameters and add timer stop event """ + def hook(module, args, output): + FuncLocationMgr().pop_name(prefix, name) + if Key.IS_LAYER0_OF_MODULE0 in ctx: + FuncLocationMgr().is_first_layer = False + + if AdaptiveStepMgr().is_recompute_profiling_step() and not AdaptiveStepMgr().is_last_recompute_profiling_step(): + end_event = torch.npu.Event(enable_timing=True) + end_event.record() + for item in reversed(self.time_event_list): + if len(item) == 1: + item.append(end_event) + break + + if AdaptiveStepMgr().is_last_recompute_profiling_step(): + ctx[Key.OUTPUT] = self.cal_input_output_size(output) / BYTES_PER_MB + ctx[Key.MEMORY] = torch.npu.memory_allocated() / BYTES_PER_MB - ctx[Key.MEMORY] + + return hook + + def construct_ctx_recursively(self, deep, prefix_name, model, ctx, allowed_adapting): + """ Function, recursively construct context to save profiling data in the future """ + next_allowed_adapting = allowed_adapting + for name, module in model.named_children(): + if Key.SUBMODULES not in ctx: + ctx[Key.SUBMODULES] = [] + current_ctx = {Key.NAME: name, Key.DEEP: deep, Key.PREFIX_NAME: prefix_name} + ctx[Key.SUBMODULES].append(current_ctx) + if self.is_layer0(current_ctx): + AdaptiveMemoryPrefetch().prefetch_deep_start = current_ctx[Key.DEEP] + if current_ctx[Key.DEEP] > AdaptiveMemoryPrefetch().prefetch_deep_end: + AdaptiveMemoryPrefetch().prefetch_deep_end = current_ctx[Key.DEEP] + if allowed_adapting: + for allowed_adapt_module in self.allowed_adapt_module: + module_flag = isinstance(module, allowed_adapt_module) + model_flag = isinstance(model, torch.nn.ModuleList) + next_allowed_adapting = self._tag_module(ctx, current_ctx, module_flag, model_flag) + next_name = (prefix_name + '.' + name) if prefix_name != '' else name + next_deep = deep + 1 + self.construct_ctx_recursively(next_deep, next_name, module, current_ctx, next_allowed_adapting) + + def register_hook_recursively(self, model, ctx, in_first_module=False, in_first_layer=False, start_index=0): + """ Function, recursively register hooks to get profiling data on needed modules """ + for module in model.children(): + if Key.SUBMODULES not in ctx: + continue + + current_ctx = ctx[Key.SUBMODULES][start_index] + name = current_ctx[Key.NAME] + prefix_name = current_ctx[Key.PREFIX_NAME] + + # whole first module or in layer 0 + if prefix_name in (Key.MODULE, Key.MODULE + '0') or in_first_layer: + if prefix_name not in (Key.MODULE, Key.MODULE + '0'): + current_ctx[Key.IS_MODLUE_OF_LAYER0] = True + self._register_hook(module, prefix_name, name, current_ctx) + self.register_hook_recursively(module, current_ctx, in_first_module, in_first_layer) + # whole layer 0 + elif Key.ALLOWED_ADAPT in current_ctx and in_first_module and start_index == 0: + self.layer0_ctx = current_ctx + self.layer0_module = module + current_ctx[Key.IS_LAYER0_OF_MODULE0] = True + current_ctx[Key.IS_MODLUE_OF_LAYER0] = True + self._register_hook(module, prefix_name, name, current_ctx) + self.register_hook_recursively(module, current_ctx, in_first_module, True) + # encoder + elif isinstance(module, torch.nn.ModuleList) and Key.IS_ADAPT_LAYER in current_ctx and in_first_module: + self._register_hook(model, ctx[Key.PREFIX_NAME], ctx[Key.NAME], ctx) + self.register_hook_recursively(module, current_ctx, in_first_module, in_first_layer) + # recompute layer hook + elif Key.IS_MODULE_LIST in ctx and Key.ALLOWED_ADAPT in current_ctx: + module.no_checkpoint_forward = module.forward + module.forward = RecomputeHook().hook_checkpoint_forward(module.forward) + self.checkpointed_modules.append(module) + # do not hook, and check next one + else: + self.register_hook_recursively(module, current_ctx, in_first_module, in_first_layer) + + start_index += 1 + + def cal_input_output_size(self, args): + size = 0 + if isinstance(args, torch.Tensor): + size += args.numel() * args.element_size() + elif isinstance(args, Iterable): + for arg in args: + size += self.cal_input_output_size(arg) + + return size + + def _register_hook(self, module, prefix_name, name, current_ctx): + pre_hook = module.register_forward_pre_hook(self.forward_pre_hook(prefix_name, name, current_ctx)) + post_hook = module.register_forward_hook(self.forward_post_hook(prefix_name, name, current_ctx)) + self.profiling_hooks.append(pre_hook) + self.profiling_hooks.append(post_hook) + + def _record_submodule_forward_time(self, context): + if Key.MEMORY in context and Key.IS_FUNCTION not in context: + cur_event_list = self.time_event_list.pop(0) + start, end = cur_event_list[0], cur_event_list[1] + cur_time = start.elapsed_time(end) + if Key.PRE_TOTAL_TIME in context: + context[Key.FORWARD_CNT] += 1 + context[Key.PRE_TOTAL_TIME] += cur_time + context[Key.AVG_TIME] = context[Key.PRE_TOTAL_TIME] / context[Key.FORWARD_CNT] + else: + context[Key.FORWARD_CNT] = 1 + context[Key.PRE_TOTAL_TIME] = cur_time + context[Key.AVG_TIME] = cur_time + if Key.SUBMODULES not in context: + return + for sub_layer in context[Key.SUBMODULES]: + self._record_submodule_forward_time(sub_layer) + + def construct_and_register_profiling_hooks(self, models): + """ Function, used to construct and register hooks into first model to get profiling data in the future """ + if isinstance(models, Iterable): + for idx, model in enumerate(models): + self.construct_ctx_recursively(1, Key.MODULE + str(idx), model, self.context, True) + self.register_hook_recursively(model, self.context, idx == 0, start_index=idx) + else: + self.construct_ctx_recursively(1, Key.MODULE, models, self.context, True) + self.register_hook_recursively(models, self.context) diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_solver.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_solver.py new file mode 100644 index 000000000..1bbf7f731 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_solver.py @@ -0,0 +1,438 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. +import sys +import time +import pickle +from copy import deepcopy +from typing import List + +import torch +import numpy as np +from megatron.training import print_rank_0, get_args +from megatron.core.num_microbatches_calculator import get_num_microbatches +from megatron.core import parallel_state as ps +from .adaptive_memory_cache import AdaptiveModelMemPolicy, PolicyCacheManager +from .adaptive_memory_policy import AdaptMemPolicyManager +from .adaptive_memory_swap_manager import SwapManager +from .adaptive_memory_tool import SingletonBase, LayerAction, ModuleAction, FuncLocation, ContextKey as Key, BYTES_PER_MB +from .adaptive_memory_tool import AdaptiveStepMgr, broadcast_obj + + +class AdaptMemGraphSolver(metaclass=SingletonBase): + def __init__(self): + self.num_warmup_bs_in_chunks = self.get_chunk_num_warmup_micro_batches() + self.adapt_mem_policy = {} + self.static_memory = 0 + self.best_layer_policy_comb = [] + self.func_locations: List[FuncLocation] = [] + self.need_prepare_solver = True + + self.device_memory = sys.maxsize + self.cur_adapt_policy = None + self.swap_size = 0 + self.record_swap_out_size = 0 + self.last_num_alloc_retries = torch.npu.memory_stats()["num_alloc_retries"] + self.remove_swap_manager_hook_step = 0 + self.cur_device_memory = -1 + self.flag_find_target_memory = False + self.first_non_oom_device_memory = 0 + self.min_dichotomy_value = 1 + self.dichotomy_memory_left = 0 + self.dichotomy_memory_right = 0 + self.alloc_retries_times = 0 # 记录当前策略alloc失败的次数 + self.is_stable_for_non_oom_policy = 1 # 判断非oom的策略是否稳定==>1:稳定、0:不稳定 + + def prepare_solver(self, model_context): + self.need_prepare_solver = False + self.static_memory = self.get_static_mem(model_context) + self.dichotomy_memory_left = self.static_memory + + @staticmethod + def get_chunk_num_warmup_micro_batches(): + num_warmup_bs_in_chunks = [] + pp = ps.get_pipeline_model_parallel_world_size() + vpp = ps.get_virtual_pipeline_model_parallel_world_size() or 1 + pp_rank = ps.get_pipeline_model_parallel_rank() + num_micro_batches = get_num_microbatches() + if pp <= 1 or None in (num_micro_batches, pp_rank, vpp): + return [1] + elif vpp == 1: + num_warmup_bs = pp - pp_rank - 1 + num_warmup_bs += 1 + num_warmup_bs_in_chunks.append(num_warmup_bs) + else: + total_num_micro_batches = num_micro_batches * vpp + num_warmup_bs = (pp - pp_rank - 1) * 2 + num_warmup_bs += (vpp - 1) * pp + num_warmup_bs += 1 + num_warmup_bs = min(num_warmup_bs, total_num_micro_batches) + remain_batch_num = (num_warmup_bs - pp * vpp) + for i in range(vpp): + if i == 0: + num_warmup_bs_in_chunks.append(pp + max(0, remain_batch_num)) + elif i == vpp - 1: + num_warmup_bs_in_chunks.append(pp + min(0, remain_batch_num)) + else: + num_warmup_bs_in_chunks.append(pp) + + print_rank_0(f"layer_num:{get_args().num_layers}") + print_rank_0(f"pp:{pp}") + print_rank_0(f"vpp:{vpp}") + print_rank_0(f"pp_rank:{pp_rank}") + print_rank_0(f"num_micro_batches:{num_micro_batches}") + print_rank_0(f"num_warmup_bs_in_chunks:{num_warmup_bs_in_chunks}") + print_rank_0(f"layer_num_per_ppstage:{get_args().num_layers // ps.get_pipeline_model_parallel_world_size()}") + + return num_warmup_bs_in_chunks + + @staticmethod + def tensor_all_reduce(num_list, op): + # all reduce the "num_list" between tp ranks in group + reduce_tensor = torch.tensor(num_list, device=torch.npu.current_device()) + if ps.get_tensor_model_parallel_world_size() > 1: + torch.distributed.all_reduce(reduce_tensor, op=op, group=ps.get_tensor_model_parallel_group()) + # all reduce the "num_list" between dp ranks in group + if ps.get_data_parallel_world_size(True) > 1: + torch.distributed.all_reduce(reduce_tensor, op=op, group=ps.get_data_parallel_group(True)) + result = reduce_tensor.cpu().numpy().tolist() + del reduce_tensor + return result + + def is_stable_policy(self): + if AdaptiveStepMgr().get_cur_step() > self.remove_swap_manager_hook_step != 0: + return True + total_swap_out_size = SwapManager().oom_rescue_total_swap_out_size + self.swap_size = (total_swap_out_size - self.record_swap_out_size) // BYTES_PER_MB + self.check_num_alloc_retries() + num_list = [ + int(total_swap_out_size), int(AdaptMemPolicyManager().hccl_memory), int(self.swap_size), + int(self.flag_find_target_memory), int(self.alloc_retries_times) + ] + size_tensor = self.tensor_all_reduce(num_list, torch.distributed.ReduceOp.MAX) + total_swap_out_size = size_tensor[0] + AdaptMemPolicyManager().hccl_memory = size_tensor[1] + self.swap_size = size_tensor[2] + self.flag_find_target_memory = bool(size_tensor[3]) + self.alloc_retries_times = size_tensor[4] + SwapManager().oom_rescue_total_swap_out_size = total_swap_out_size + + if self.swap_size <= 0 and self.flag_find_target_memory: + return True + self.record_swap_out_size = total_swap_out_size + return False + + def check_num_alloc_retries(self): + num_alloc_retries = torch.npu.memory_stats()["num_alloc_retries"] + # if policy is normal and stable + if num_alloc_retries == self.last_num_alloc_retries: + return + retries_times = num_alloc_retries - self.last_num_alloc_retries + self.last_num_alloc_retries = num_alloc_retries + # policy tag oom if policy is unstable + if self.swap_size == 0 and (retries_times > 1 or self.is_stable_for_non_oom_policy == 0): + self.swap_size = 1 + # if policy is oom or unstable + if self.swap_size > 0: + return + + self.alloc_retries_times += 1 + if self.alloc_retries_times > 1: + print_rank_0("this is a unstable policy, try select another one.") + self.swap_size = 1 + + def reduce_device_memory(self, device_memory): + cur_min_memory = min(self.device_memory, device_memory) + self.device_memory, = self.tensor_all_reduce([int(cur_min_memory)], torch.distributed.ReduceOp.MIN) + print_rank_0(f"reduce device memory from {device_memory} to {self.device_memory}") + + def check_cur_adapt_policy(self): + if not self.cur_adapt_policy: + return + + policy_cache_manager = PolicyCacheManager() + flag_in_oom_list = policy_cache_manager.check_in_oom_cache(self.cur_adapt_policy) + flag_in_normal_list = policy_cache_manager.check_in_normal_cache(self.cur_adapt_policy) + if self.swap_size > 0: + if not flag_in_oom_list: + policy_cache_manager.add_oom_policy_cache(deepcopy(self.cur_adapt_policy)) + if flag_in_normal_list: + policy_cache_manager.delete_normal_policy_cache(self.cur_adapt_policy) + return + if flag_in_oom_list or self.alloc_retries_times != 0: + return + if not flag_in_normal_list: + policy_cache_manager.add_normal_policy_cache(deepcopy(self.cur_adapt_policy)) + + def solve_adapt_mem_policy(self): + flag_is_known_policy = True + cur_step = AdaptiveStepMgr().get_cur_step() + self.remove_swap_manager_hook_step = cur_step + 1 + adapt_policy_list = None + while flag_is_known_policy: + torch.npu.synchronize() + self.cur_device_memory = self.dichotomy_find_memory() + print_rank_0(f"cur_device_memory:{self.cur_device_memory}") + if self.is_stable_for_non_oom_policy != 0: # 对于不稳定的策略不生成新策略,使用旧策略再测试一遍 + adapt_policy_list = self.get_mem_policy(self.cur_device_memory) + self.cur_adapt_policy = AdaptiveModelMemPolicy("normal", self.best_layer_policy_comb) + if self.flag_find_target_memory: + self.remove_swap_manager_hook_step = cur_step + 10 + print_rank_0( + f"success to find the target value of the current round of search: {self.cur_device_memory}") + break + # OOM policy + policy_cache_manager = PolicyCacheManager() + if policy_cache_manager.check_in_oom_cache(self.cur_adapt_policy): + self.swap_size = max(self.swap_size, 1) + continue + # no OOM policy + if policy_cache_manager.check_in_normal_cache(self.cur_adapt_policy): + self.swap_size = 0 + continue + flag_is_known_policy = False + + return adapt_policy_list + + def get_dichotomy_value(self): + return (self.dichotomy_memory_left + self.dichotomy_memory_right) // 2 + + def dichotomy_find_memory(self): + # last policy is instability + if self.flag_find_target_memory: + self.dichotomy_memory_left = self.first_non_oom_device_memory + self.dichotomy_memory_right = self.cur_device_memory + self.flag_find_target_memory = False + if self.cur_device_memory == -1: + return self.device_memory + + # OOM + if self.swap_size > 0: + print_rank_0(f"current policy is OOM, policy device memory: {self.cur_device_memory}") + self.is_stable_for_non_oom_policy = 1 + self.alloc_retries_times = 0 + self.dichotomy_memory_right = self.cur_device_memory + if self.first_non_oom_device_memory >= self.cur_device_memory: + self.first_non_oom_device_memory = 0 + if self.dichotomy_memory_right <= self.static_memory: + raise ValueError("out of Memory!!!!!!!!!!") + elif self.dichotomy_memory_right <= self.dichotomy_memory_left: + self.dichotomy_memory_left = self.static_memory + return self.get_dichotomy_value() + + # check non oom policy + if self.alloc_retries_times != 0 and self.is_stable_for_non_oom_policy == 1: + print_rank_0(f"current policy may be an unstable, policy device memory: {self.cur_device_memory}") + self.is_stable_for_non_oom_policy = 0 + self.alloc_retries_times = 0 + return self.cur_device_memory + + self.is_stable_for_non_oom_policy = 1 + self.alloc_retries_times = 0 + self.dichotomy_memory_left = self.cur_device_memory + if self.first_non_oom_device_memory == 0: + self.first_non_oom_device_memory = self.cur_device_memory + if self.dichotomy_memory_right - self.dichotomy_memory_left <= self.min_dichotomy_value: + self.flag_find_target_memory = True + return self.dichotomy_memory_left + + return self.get_dichotomy_value() + + @staticmethod + def get_pp_layer_num(): + return get_args().num_layers // ps.get_pipeline_model_parallel_world_size() + + @staticmethod + def get_layer_num_per_chunk(): + vpp = ps.get_virtual_pipeline_model_parallel_world_size() or 1 + return AdaptMemGraphSolver.get_pp_layer_num() // vpp + + def get_static_mem(self, model_context): + single_chunk_memory = 0 + num_of_chunk = len(model_context[Key.SUBMODULES]) + if num_of_chunk > 0 and Key.MEMORY in model_context[Key.SUBMODULES][0]: + single_chunk_memory = model_context[Key.SUBMODULES][0][Key.MEMORY] + # 不能被节省的动态内存 + mem_space_cannot_be_saved = (single_chunk_memory - AdaptMemPolicyManager().total_adapt_memory) * num_of_chunk + # 静态内存 = 模型总内存 + 不能被节省的动态内存 + static_mem_size = model_context[Key.USED_MEM] + mem_space_cannot_be_saved + print_rank_0(f"static_memory:{static_mem_size}") + return static_mem_size + + def get_mem_policy(self, device_memory): + print_rank_0("Using the knapsack algorithm to find the optimal strategy") + self.adapt_mem_policy.clear() + self.knapsack_best(device_memory) + adapt_mem_policy_list = self.get_adapt_mem_policy_list() + print_rank_0(f"adapt_mem_policy_list:{adapt_mem_policy_list}") + if torch.distributed.is_initialized(): + # 把self.recompute_policy字典转换为recompute_policy_list列表,方便广播到其他卡上 + adapt_mem_policy_list = broadcast_obj(adapt_mem_policy_list) + self.best_layer_policy_comb = broadcast_obj(self.best_layer_policy_comb) + return adapt_mem_policy_list + + def get_max_goods_value(self, idx, ans, device_memory): + i, j, k = idx + pre_step_ans = ans[i - 1][j - k] + if k == 0: + return deepcopy(pre_step_ans) + + goods_value = ans[i][j] + # calculate memory + memory = pre_step_ans.memory + pre_layer_num = len(pre_step_ans.polices) + for index in range(k): + cur_layer_index = pre_layer_num + index + cur_layer_chunk_rank = cur_layer_index // self.get_layer_num_per_chunk() + cur_layer_bs = self.num_warmup_bs_in_chunks[cur_layer_chunk_rank] + cur_layer_memory_cost = cur_layer_bs * AdaptMemPolicyManager().policy_combinations[i].memory + memory += cur_layer_memory_cost + # calculate cost + comb_time = pre_step_ans.time + k * AdaptMemPolicyManager().policy_combinations[i].time + # calculate device_memory + if pre_step_ans.time == sys.maxsize: + comb_time = k * AdaptMemPolicyManager().policy_combinations[i].time + max_free_memory = max(device_memory - self.static_memory, 0) + + if max_free_memory >= memory and comb_time <= goods_value.time and (len(pre_step_ans.polices) + k) == j: + goods_value.memory = memory + goods_value.time = comb_time + goods_value.polices.clear() + goods_value.polices.extend(pre_step_ans.polices) + goods_value.polices.extend(AdaptMemPolicyManager().policy_combinations[i] for _ in range(k)) + + return goods_value + + def add_func_locations(self, layer_idx, func_name, action): + self.func_locations.append(FuncLocation(layer_idx, func_name, action)) + + def get_cur_layer_idx(self, count): + pp = ps.get_pipeline_model_parallel_world_size() + vpp = ps.get_virtual_pipeline_model_parallel_world_size() or 1 + total_layers = get_args().num_layers + if vpp > 1: + layers_per_chunk = total_layers // pp // vpp + + # calc count belong to chunk and layer idx + remain = count % (pp * vpp * layers_per_chunk) + cur_chunk_idx = remain // (pp * layers_per_chunk) # 当前chunk id + cur_layer_idx = remain % (pp * layers_per_chunk) % layers_per_chunk # 当前layer在chunk内的id + global_layer_idx = cur_chunk_idx * layers_per_chunk + cur_layer_idx + return global_layer_idx + elif pp > 1: + layers_per_pp = total_layers // pp + global_layer_idx = count % layers_per_pp + return global_layer_idx + else: + global_layer_idx = count % total_layers + return global_layer_idx + + def get_func_action(self, function_name, count) -> ModuleAction: + pp = ps.get_pipeline_model_parallel_world_size() + total_layers = get_args().num_layers + layers_per_pp = total_layers // pp + + all_same_func_loc = [x for x in self.func_locations if x.func_name == function_name] + if len(all_same_func_loc) != layers_per_pp: + raise AssertionError("get_func_action error.") + global_layer_idx = self.get_cur_layer_idx(count) + if global_layer_idx != all_same_func_loc[global_layer_idx].layer_idx: + raise AssertionError("get_func_action error.") + return all_same_func_loc[global_layer_idx].action + + def get_mem_layer_policy(self, combination_num, layer_num, ans): + apm = AdaptMemPolicyManager() + layer_full_recompute_memory = 0 + for index in range(layer_num): + cur_layer_index = index + cur_layer_chunk_rank = cur_layer_index // self.get_layer_num_per_chunk() + cur_layer_memory_cost = self.num_warmup_bs_in_chunks[cur_layer_chunk_rank] * apm.full_recompute_comb.memory + layer_full_recompute_memory += cur_layer_memory_cost + + layer_full_recompute_time = layer_num * apm.full_recompute_comb.time + + self.best_layer_policy_comb = [apm.full_recompute_comb for _ in range(layer_num)] + + size = layer_num - len(ans[combination_num][layer_num].polices) + pre_layer_num = len(ans[combination_num][layer_num].polices) + memory = ans[combination_num][layer_num].memory + for index in range(size): + cur_layer_index = pre_layer_num + index + cur_layer_chunk_rank = cur_layer_index // self.get_layer_num_per_chunk() + memory += self.num_warmup_bs_in_chunks[cur_layer_chunk_rank] * apm.full_recompute_comb.memory + comb_time = ans[combination_num][layer_num].time + size * apm.full_recompute_comb.time + best_policy_comb = deepcopy(ans[combination_num][layer_num].polices) + best_policy_comb.extend(size * [apm.full_recompute_comb]) + + if comb_time < layer_full_recompute_time: + self.best_layer_policy_comb.clear() + self.best_layer_policy_comb = best_policy_comb + + print_rank_0(f"full_recompute_comb.time:{apm.full_recompute_comb.time}") + print_rank_0(f"full_recompute_comb.memory:{apm.full_recompute_comb.memory}") + print_rank_0(f"without_adaptive_comb.time:{apm.without_adaptive_comb.time}") + print_rank_0(f"without_adaptive_comb.memory:{apm.without_adaptive_comb.memory}") + print_rank_0(f"full_swap_comb.time:{apm.full_swap_comb.time}") + print_rank_0(f"full_swap_comb.memory:{apm.full_swap_comb.memory}") + + for policy in self.best_layer_policy_comb: + policy_recompute = str(policy.recompute) + policy_swap = str(policy.swap) + if (policy_recompute, policy_swap) in self.adapt_mem_policy.keys(): + self.adapt_mem_policy[policy_recompute, policy_swap] += 1 + else: + self.adapt_mem_policy[policy_recompute, policy_swap] = 1 + print_rank_0(f"adapt_mem_policy_dict:{self.adapt_mem_policy}") + + def knapsack_best(self, device_memory): + start_time = time.time() + combination_num = len(AdaptMemPolicyManager().policy_combinations) - 1 + if AdaptMemPolicyManager().policy_combinations[0] is not None: + combination_num = len(AdaptMemPolicyManager().policy_combinations) + # make combination index id begin for 1. + AdaptMemPolicyManager().policy_combinations.insert(0, None) + print_rank_0(f"combination_num:{combination_num}") + + # init ans + def default_policy(): + return AdaptiveModelMemPolicy("normal", []) + + ans = [[default_policy() for _ in range(self.get_pp_layer_num() + 1)] for _ in range(combination_num + 1)] + + # find max goods value + for i in range(1, combination_num + 1): + for j in range(self.get_pp_layer_num() + 1): + if i >= 2: + ans[i - 2][j].polices.clear() + for k in range(j + 1): + ans[i][j] = self.get_max_goods_value([i, j, k], ans, device_memory) + self.get_mem_layer_policy(combination_num, self.get_pp_layer_num(), ans) + end_time = time.time() + execution_time = end_time - start_time + print_rank_0(f"The execution time of the knapsack algorithm is {execution_time} seconds.") + + def get_adapt_mem_policy_list(self): + adapt_mem_policy_list = [] + apm = AdaptMemPolicyManager() + for key, times in self.adapt_mem_policy.items(): + temp_adapt_mem_policy_list = [times] + key_recompute = eval(key[0]) + key_swap = eval(key[1]) + if key_recompute == apm.without_adaptive_comb.recompute and key_swap == apm.without_adaptive_comb.swap: + temp_adapt_mem_policy_list.append(LayerAction.NONE) + temp_adapt_mem_policy_list.extend([ModuleAction.NONE] * apm.adapt_modules_num) + elif key_recompute == apm.full_recompute_comb.recompute and key_swap == apm.full_recompute_comb.swap: + temp_adapt_mem_policy_list.append(LayerAction.FULL_RECOMPUTE) + temp_adapt_mem_policy_list.extend([ModuleAction.RECOMPUTE] * apm.adapt_modules_num) + elif key_recompute == apm.full_swap_comb.recompute and key_swap == apm.full_swap_comb.swap: + temp_adapt_mem_policy_list.append(LayerAction.FULL_SWAP) + temp_adapt_mem_policy_list.extend([ModuleAction.SWAP] * apm.adapt_modules_num) + else: + temp_adapt_mem_policy_list.append(LayerAction.ADAPTIVE) + for module_name in apm.module_layers_name: + if module_name in key_recompute: + temp_adapt_mem_policy_list.append(ModuleAction.RECOMPUTE) + elif module_name in key_swap: + temp_adapt_mem_policy_list.append(ModuleAction.SWAP) + else: + temp_adapt_mem_policy_list.append(ModuleAction.NONE) + adapt_mem_policy_list.append(temp_adapt_mem_policy_list) + return adapt_mem_policy_list diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_swap_manager.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_swap_manager.py new file mode 100644 index 000000000..66757425c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_swap_manager.py @@ -0,0 +1,450 @@ +from enum import IntEnum +from typing import List, Tuple + +import torch +import torch_npu +from numpy import mean +from torch.cuda import Event +from megatron.training import print_rank_0 + +from .adaptive_memory_tool import SingletonBase, CpuTensorCache +from .adaptive_memory_tool import FuncLocationMgr, broadcast_obj +from .adaptive_memory_tool import AdaptiveStepMgr + + +class SwappableTensorStat(IntEnum): + HOST = 0 + DEVICE = 1 + D2H = 2 + H2D = 3 + + +class SwappableTensor: + def __init__(self, tensor, stream, is_prefetch=True): + self.stream = stream + self.tensor = tensor + self.h2d_event = None + self.is_prefetch = is_prefetch + self.tensor_cpu = None + self.storage_size = tensor.storage().size() + self.stat = SwappableTensorStat.DEVICE + self.data_ptr = tensor.data_ptr() + self.storage_data_ptr = tensor.storage().data_ptr() + self.is_slice_tensor = tensor.storage().size() != tensor.numel() + self.first_tensor = False + self.is_allowed_oom_rescue_swap = False + self.bro_tensors = None + self.cap_tensor = None # 和此tensor共享底层storage,并占用整个storage的tensor + # prefetch + self.start_pack_event = None + self.end_pack_event = None + self.layer_name = "" # 记录tensor在那个module被挂的hook + self.pack_module_name = None # 记录tensor在那个module被pack出去的 + self.is_firt_same_ptr_tensor = True + + + def launch_d2h(self): + if self.stat != SwappableTensorStat.DEVICE: + return + forward_event = torch.npu.Event() + forward_event.record() + with torch.no_grad(): + with torch_npu.npu.stream(self.stream): + self.stream.wait_event(forward_event) + if self.is_slice_tensor: + self.tensor_cpu.copy_(self.tensor, non_blocking=self.is_prefetch) + else: + self.tensor_cpu.storage().copy_(self.tensor.storage(), non_blocking=self.is_prefetch) + self.stat = SwappableTensorStat.D2H + + def change_stat_to_host(self): + if self.stat != SwappableTensorStat.D2H: + return + self.stat = SwappableTensorStat.HOST + + def launch_h2d(self): + if self.stat != SwappableTensorStat.HOST: + return + with torch.no_grad(): + with torch_npu.npu.stream(self.stream): + if self.is_slice_tensor: + self.tensor.copy_(self.tensor_cpu, non_blocking=self.is_prefetch) + else: + self.tensor.storage().copy_(self.tensor_cpu.storage(), non_blocking=self.is_prefetch) + if self.h2d_event is not None: + self.h2d_event.record() + self.stat = SwappableTensorStat.H2D + + + def change_stat_to_device(self): + if self.stat != SwappableTensorStat.H2D: + return + self.stat = SwappableTensorStat.DEVICE + + + +class SwapManager(metaclass=SingletonBase): + def __init__(self): + self.swap_tensors = [] # 存储swap出去的tensor + self.cpu_tensors = {} + self.cpu_tensors_h2d_events = {} + self.prefetch_hooked_modules = [] + + self.oom_rescue_device_tensors = {} + self.oom_rescue_host_tensors = {} + self.oom_rescue_total_swap_out_size = 0 + self.oom_rescue_hooked_modules = [] + + # recording + self.swap_tensor_in_module = [] + self.layer_name = "" + self.post_layer_forward_and_pre_layer_backward_hooks = [] + self.forward_time = 0 + + self.prefetch_stream = torch_npu.npu.Stream(device=torch.npu.current_device()) + self.oom_rescue_stream = torch_npu.npu.current_stream() + + def get_mean_wait_ms(self, event_pairs): + time_list = [] + for forward_time in event_pairs: + start, end = forward_time + cur_time = start.elapsed_time(end) + time_list.append(cur_time) + return mean(time_list) + + def is_need_adjust_module(self, max_overhead_percentage=0.05): + result = (LayerProfilingHook().get_single_layer_time() - self.forward_time) / self.forward_time > max_overhead_percentage + result = broadcast_obj(result) + return result + + def no_swap_tensor(self, ori_tensor): + if ori_tensor.numel() * ori_tensor.element_size() * 2 < 1024 * 1024: + return True + if ori_tensor.grad_fn is None: + return True + if ori_tensor.storage().size() == 0: + return True + ori_tensor_base = ori_tensor._base + if ori_tensor_base is not None and ori_tensor_base.dim() >= 5: + return True + if ori_tensor_base is not None and ori_tensor_base.grad_fn is None and ori_tensor_base.requires_grad: + return True + return False + + def prefetch_pack(self, origin_tensor): + if self.no_swap_tensor(origin_tensor): + return origin_tensor + swap_tensor = SwappableTensor(origin_tensor, self.prefetch_stream) + if swap_tensor.is_slice_tensor: + swap_tensor.tensor_cpu = CpuTensorCache().get_cpu_tensor(origin_tensor.shape, origin_tensor.dtype) + swap_tensor.h2d_event = torch.npu.Event() + else: + if swap_tensor.storage_data_ptr not in self.cpu_tensors: + self.cpu_tensors[swap_tensor.storage_data_ptr] = CpuTensorCache().get_cpu_tensor(origin_tensor.shape, origin_tensor.dtype) + self.cpu_tensors_h2d_events[swap_tensor.storage_data_ptr] = torch.npu.Event() + swap_tensor.tensor_cpu = self.cpu_tensors[swap_tensor.storage_data_ptr] + swap_tensor.h2d_event = self.cpu_tensors_h2d_events[swap_tensor.storage_data_ptr] + else: + swap_tensor.tensor_cpu = self.cpu_tensors[swap_tensor.storage_data_ptr] + swap_tensor.h2d_event = self.cpu_tensors_h2d_events[swap_tensor.storage_data_ptr] + swap_tensor.stat = SwappableTensorStat.HOST + swap_tensor.layer_name = self.layer_name + + # 在tensor开始pack的时候插入event + if AdaptiveStepMgr().is_swap_profiling_step(): + start_pack_event = torch.npu.Event(enable_timing=True) + start_pack_event.record() + swap_tensor.start_pack_event = start_pack_event # 记录tensor开始swap的时间 + + swap_tensor.launch_d2h() + self.swap_tensors.append(swap_tensor) + if swap_tensor.stat == SwappableTensorStat.D2H: + self.swap_tensor_in_module.append(swap_tensor) + return swap_tensor + + def prefetch_unpack(self, swap_tensor): + if isinstance(swap_tensor, torch.Tensor): + return swap_tensor + + if swap_tensor.h2d_event: + torch.cuda.current_stream().wait_event(swap_tensor.h2d_event) + swap_tensor.change_stat_to_device() + CpuTensorCache().release_cpu_tensor(swap_tensor.tensor_cpu) + return swap_tensor.tensor + + def _generate_prefetch_forward_hook(self, origin_forward, layer_name): + def custom_forward(*args, **kwargs): + self.layer_name = layer_name + with torch.autograd.graph.saved_tensors_hooks(self.prefetch_pack, self.prefetch_unpack): + return origin_forward(*args, **kwargs) + return custom_forward + + def hook_prefetch_forward(self, module, layer_name): + module.no_prefetch_hook_forward = module.forward + self.prefetch_hooked_modules.append(module) + module.forward = self._generate_prefetch_forward_hook(module.forward, layer_name) + + def reset_prefetch_hooked_modules(self): + for module in self.prefetch_hooked_modules: + module.forward = module.no_prefetch_hook_forward + self.prefetch_hooked_modules.clear() + + def sync_d2h(self, layer_module, is_mark_first_layer): + if not self.swap_tensors: + return + # Wait until the prefetch is complete. + torch.cuda.current_stream().wait_stream(self.prefetch_stream) + storage_resized = set() + for swap_tensor in self.swap_tensors: + if swap_tensor.stat == SwappableTensorStat.D2H: + if swap_tensor.storage_data_ptr not in storage_resized: + swap_tensor.tensor.storage().resize_(0) + storage_resized.add(swap_tensor.storage_data_ptr) + swap_tensor.change_stat_to_host() + + layer_module.microbatch_swap_tensors_queue.append(self.swap_tensors) + layer_module.microbatch_cpu_tensors_queue.append(self.cpu_tensors) + + self.swap_tensors = [] + self.cpu_tensors = {} + self.cpu_tensors_h2d_events = {} + self.swap_tensor_in_module = [] + if is_mark_first_layer: + FuncLocationMgr().is_first_layer = False + + + def h2d(self, layer_module): + if not hasattr(layer_module, 'microbatch_swap_tensors_queue'): + return + if len(layer_module.microbatch_swap_tensors_queue) == 0 or len(layer_module.microbatch_swap_tensors_queue[-1]) == 0: + return + swap_tensors = layer_module.microbatch_swap_tensors_queue.pop(0) + cpu_tensors = layer_module.microbatch_cpu_tensors_queue.pop(0) + storage_resized = set() + self.prefetch_stream.wait_stream(torch.cuda.current_stream()) + for swap_tensor in reversed(swap_tensors): + if swap_tensor.storage_data_ptr not in storage_resized: + swap_tensor.tensor.storage().resize_(swap_tensor.storage_size) + storage_resized.add(swap_tensor.storage_data_ptr) + if swap_tensor.storage_data_ptr in cpu_tensors: + cpu_tensors.pop(swap_tensor.storage_data_ptr) + elif not swap_tensor.is_slice_tensor: + swap_tensor.stat = SwappableTensorStat.DEVICE + swap_tensor.launch_h2d() + + def change_oom_rescue_tensors_status_to_allowed_swap(self): + for wrapped_tensor in self.oom_rescue_device_tensors: + wrapped_tensor.is_allowed_oom_rescue_swap = True + + def oom_rescue_pack(self, origin_tensor): + if self.no_swap_tensor(origin_tensor): + return origin_tensor + if origin_tensor.grad_fn is None: + return origin_tensor + wrapped_tensor = SwappableTensor(origin_tensor, self.oom_rescue_stream, is_prefetch=False) + self.oom_rescue_device_tensors[wrapped_tensor] = None + return wrapped_tensor + + def oom_rescue_unpack(self, wrapped_tensor: SwappableTensor): + if isinstance(wrapped_tensor, torch.Tensor): + return wrapped_tensor + if wrapped_tensor in self.oom_rescue_host_tensors: + self.move_storage_in(wrapped_tensor) + self.oom_rescue_device_tensors.pop(wrapped_tensor) + wrapped_tensor.cap_tensor = None + if wrapped_tensor.bro_tensors is not None: + wrapped_tensor.bro_tensors.remove(wrapped_tensor) + wrapped_tensor.bro_tensors = None + return wrapped_tensor.tensor + + def _generate_oom_rescue_forward_hook(self, origin_forward): + def custom_forward(*args, **kwargs): + with torch.autograd.graph.saved_tensors_hooks(self.oom_rescue_pack, self.oom_rescue_unpack): + return origin_forward(*args, **kwargs) + return custom_forward + + def hook_oom_rescue_forward(self, module): + module.no_oom_rescue_hook_forward = module.forward + self.oom_rescue_hooked_modules.append(module) + module.forward = self._generate_oom_rescue_forward_hook(module.forward) + + def reset_oom_rescue_hooked_modules(self): + for module in self.oom_rescue_hooked_modules: + module.forward = module.no_oom_rescue_hook_forward + self.oom_rescue_hooked_modules.clear() + + def get_storage_cap_tensor(self, wrapped_tensor: SwappableTensor): + if wrapped_tensor.cap_tensor is not None: + return wrapped_tensor.cap_tensor + storage_tensor = torch.tensor([], dtype=wrapped_tensor.tensor.dtype, device=wrapped_tensor.tensor.device).set_(wrapped_tensor.tensor.storage()) + wrapped_storage_tensor = SwappableTensor(storage_tensor, self.oom_rescue_stream, is_prefetch=False) + wrapped_storage_tensor.tensor_cpu = torch.empty(storage_tensor.shape, dtype=storage_tensor.dtype, pin_memory=True, device='cpu') + return wrapped_storage_tensor + + + def get_share_storage_tensors(self, wrapped_tensor: SwappableTensor): + result = set() + storage_data_ptr = wrapped_tensor.tensor.storage().data_ptr() + for wt in self.oom_rescue_device_tensors: + if wt.tensor.storage().data_ptr() == storage_data_ptr: + result.add(wt) + return result + + def move_storage_out(self, wrapped_tensor: SwappableTensor): + if wrapped_tensor not in self.oom_rescue_device_tensors: + return 0, 0 + storage_size = wrapped_tensor.storage_size * wrapped_tensor.tensor.element_size() + share_storage_tensors = wrapped_tensor.bro_tensors if wrapped_tensor.bro_tensors is not None else self.get_share_storage_tensors(wrapped_tensor) + cap_tensor = self.get_storage_cap_tensor(wrapped_tensor) + cap_tensor.launch_d2h() + cap_tensor.stat = SwappableTensorStat.HOST + for wt in share_storage_tensors: + wt.stat = SwappableTensorStat.HOST + wt.bro_tensors = share_storage_tensors + wt.cap_tensor = cap_tensor + self.oom_rescue_device_tensors.pop(wt) + self.oom_rescue_host_tensors[wt] = None + wrapped_tensor.tensor.storage().resize_(0) + return storage_size, len(share_storage_tensors) + + def move_storage_in(self, wrapped_tensor: SwappableTensor): + wrapped_tensor.tensor.storage().resize_(wrapped_tensor.storage_size) + share_storage_tensors = wrapped_tensor.bro_tensors + wrapped_tensor.cap_tensor.launch_h2d() + wrapped_tensor.cap_tensor.stat = SwappableTensorStat.DEVICE + for wt in share_storage_tensors: + wt.stat = SwappableTensorStat.DEVICE + self.oom_rescue_host_tensors.pop(wt) + self.oom_rescue_device_tensors[wt] = None + + + def is_exist_tensor_allowed_swap(self): + for wt in self.oom_rescue_device_tensors: + if wt.is_allowed_oom_rescue_swap: + return True + return False + + def is_exist_tensor_contiguous(self): + for wt in self.oom_rescue_device_tensors: + if wt.is_allowed_oom_rescue_swap and wt.tensor.is_contiguous(): + return True + return False + + def swap_out_by_size(self, size): + print_rank_0("Need size %d (%fMB)" % (size, size / 1024 / 1024)) + if not self.is_exist_tensor_allowed_swap(): + return False + swap_size = 0 + swap_num = 0 + only_swap_contiguous_tensor = self.is_exist_tensor_contiguous() + device_tensors = list(self.oom_rescue_device_tensors.keys()) + for wrapped_tensor in device_tensors: + if swap_size >= size: + break + if not wrapped_tensor.is_allowed_oom_rescue_swap: + continue + if only_swap_contiguous_tensor and not wrapped_tensor.tensor.is_contiguous(): + continue + + storage_size, moved_tensor_count = self.move_storage_out(wrapped_tensor) + swap_size += storage_size + swap_num += moved_tensor_count + + if swap_size != 0: + print_rank_0("swap tensor to CPU, tensor num: %d, release NPU memory size: %d (%fMB)" % ( + swap_num, swap_size, swap_size / 1024 / 1024)) + print_rank_0("tensor nums wrap manager for [device: %d, CPU: %d]" % ( + len(self.oom_rescue_device_tensors), len(self.oom_rescue_host_tensors))) + self.oom_rescue_total_swap_out_size += swap_size + return True + + def reset_oom_rescue_tensors(self): + self.oom_rescue_device_tensors.clear() + self.oom_rescue_host_tensors.clear() + + def reset_all_for_oom_rescue(self): + self.reset_oom_rescue_tensors() + self.reset_oom_rescue_hooked_modules() + + def reset_post_layer_forward_and_pre_layer_backward_hooks(self): + for hook_handle in self.post_layer_forward_and_pre_layer_backward_hooks: + hook_handle.remove() + self.post_layer_forward_and_pre_layer_backward_hooks.clear() + + +def transformer_layer_register_post_forward_hook(module, is_mark_first_layer=False): + def post_forward_hook(module, *args, **kwargs): + if not torch.is_grad_enabled(): + return + if not hasattr(module, 'microbatch_swap_tensors_queue'): + setattr(module, 'microbatch_swap_tensors_queue', []) + setattr(module, 'microbatch_cpu_tensors_queue', []) + SwapManager().sync_d2h(module, is_mark_first_layer) + SwapManager().change_oom_rescue_tensors_status_to_allowed_swap() + return + + post_hook = module.register_forward_hook(post_forward_hook) + SwapManager().post_layer_forward_and_pre_layer_backward_hooks.append(post_hook) + + +def transformer_layer_register_pre_backward_hook(module): + def post_forward_hook(module, args, output): + if not torch.is_grad_enabled(): + return + + def tensor_backward_hook(grad_output): + SwapManager().h2d(module) + if isinstance(output, tuple): + output = output[0] + output.register_hook(tensor_backward_hook) + pre_back_hook = module.register_forward_hook(post_forward_hook) + SwapManager().post_layer_forward_and_pre_layer_backward_hooks.append(pre_back_hook) + + +class LayerProfilingHook(metaclass=SingletonBase): + def __init__(self): + self.hook_handles = [] + self.forward_time_list = [] + self.last_compute_forward_time = None + + def _layer_register_pre_forward_hook(self, module): + def pre_forward_hook(module, args): + if AdaptiveStepMgr().is_layer_profiling_step() or AdaptiveStepMgr().is_all_profiling_done(): + start_event = torch.npu.Event(enable_timing=True) + self.forward_time_list.append([start_event]) + start_event.record() + else: + return + hook_handler = module.register_forward_pre_hook(pre_forward_hook) + self.hook_handles.append(hook_handler) + + + def _layer_register_post_forward_hook(self, module): + def post_forward_hook(module, args, output): + if AdaptiveStepMgr().is_layer_profiling_step() or AdaptiveStepMgr().is_all_profiling_done(): + end_event = torch.npu.Event(enable_timing=True) + self.forward_time_list[-1].append(end_event) + end_event.record() + else: + return + hook_handler = module.register_forward_hook(post_forward_hook) + self.hook_handles.append(hook_handler) + + def apply_layer_profiling_hook(self, module): + self._layer_register_pre_forward_hook(module) + self._layer_register_post_forward_hook(module) + + def reset_layer_profiling_hook(self): + for hook_handler in self.hook_handles: + hook_handler.remove() + self.hook_handles.clear() + + def get_single_layer_time(self): + total_time = 0 + forward_cnt = len(self.forward_time_list) + for event_pair in self.forward_time_list: + start_event, end_event = event_pair + tmp_time = start_event.elapsed_time(end_event) + total_time += tmp_time + self.last_compute_forward_time = total_time / forward_cnt + return self.last_compute_forward_time \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_tool.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_tool.py new file mode 100644 index 000000000..85dc24740 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/adaptive_memory_tool.py @@ -0,0 +1,219 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + +from enum import Enum, IntEnum +from typing import Set, Dict +import pickle +import torch +from megatron.core import parallel_state as ps +from megatron.training import print_rank_0 + +BYTES_PER_MB = 1024 * 1024 + + +class LayerAction(IntEnum): + FULL_RECOMPUTE = 0 + FULL_SWAP = 1 + ADAPTIVE = 2 + NONE = 3 + + +class ModuleAction(IntEnum): + RECOMPUTE = 0 + SWAP = 1 + NONE = 2 + + +class SingletonBase(type): + singleton_instances = {} + + def __call__(cls, *args, **kwargs): + if cls not in cls.singleton_instances: + instance = super().__call__(*args, **kwargs) + cls.singleton_instances[cls] = instance + return cls.singleton_instances[cls] + + +class ContextKey(SingletonBase): + NAME = "name" # module name + DEEP = "deep" # module depth + PREFIX_NAME = "prefix_name" # module parent name + MODULE = "module" # module field + SUBMODULES = "submodules" # children modules + INPUT = "input" # input args total size + MEMORY = "memory" # current module's activation memory + input + output + OUTPUT = "output" # output total size + FORWARD_CNT = "forward_cnt" # forward called times + PRE_TOTAL_TIME = "pre_total_time" # forward called total time + AVG_TIME = "avg_time" # forward called avg time + ALLOWED_ADAPT = "allowed_adapt" # allowed adapted modules, user init and set name at startup + IS_FUNCTION = "is_function" # mark module if it is a torch.autograd.Function + IS_MODULE_LIST = "is_module_list" # mark module if it is a torch.nn.ModuleList + IS_ADAPT_LAYER = "is_adapt_layer" # mark self or parent if self is ALLOWED_ADAPT + USED_MEM = "used_mem" # model memory consumption, as allocated memory + DEVICE_MEMORY = "device_memory" # device total memory + # adaptive + MODULE_FORWARD_TOTAL_TIME = "module_forward_total_time" # module forward called total time + MODULE_FORWARD_AVG_TIME = "module_forward_avg_time" # module forward called avg time + MODULE_FORWARD_CNT = "module_forward_cnt" # module forward called times + MODULE_SWAP_TOTAL_TIME = "module_swap_total_time" # module forward called total swap time + MODULE_SWAP_AVG_TIME = "module_swap_avg_time" # module forward called avg swap time + MODULE_SWAP_CNT = "module_swap_cnt" # module forward called swap times + MODULE_SWAP_TOTAL_MEMORY = "module_swap_total_memory" # module forward swap total memory + MODULE_SWAP_AVG_MEMORY = "module_swap_avg_memory" # module forward swap avg memory + IS_SWAP = "is_swap" # mark module if it is swap + IS_LAYER0_OF_MODULE0 = "is_layer0_of_module0" # mark module if it is layer0 of module0 + IS_MODLUE_OF_LAYER0 = "is_modlue_of_layer0" # mark module if it belongs to layer0 of module0 + + +class FuncLocationMgr(metaclass=SingletonBase): + def __init__(self): + self._module_names = [] + self._function_in_stack = None + self._function_child = None + self.is_first_layer = False + + def push_name(self, prefix, name): + self._module_names.append(f"{prefix}.{name}") + if self._function_in_stack and not self._function_child: + self._function_child = f"{prefix}.{name}" + + def pop_name(self, prefix, name): + last_name = self._module_names.pop() + if f"{prefix}.{name}" != last_name: + raise ValueError(f"unexpected module name in stack, expect:{prefix}.{name}, find:{last_name}") + + def get_latest_name(self): + return self._module_names[-1] + + def set_function_in_stack(self): + self._function_in_stack = True + + def get_function_location(self, parent): + if not self._function_child: + direct_child = "" + else: + first_child = self._function_child[len(parent):] + direct_child = first_child.split(".")[1] + self._function_child = None + self._function_in_stack = False + return direct_child + + +class AdaptiveStepMgr(metaclass=SingletonBase): + def __init__(self): + self.cur_step = 1 + self.skip_steps = 3 + self.recompute_profiling_steps = 0 + self.layer_profiling_steps = 5 + self.swap_profiling_steps = 0 + self.pre_steps = 0 + + def init_steps(self, recompute_profiling_steps, swap_profiling_steps): + self.recompute_profiling_steps = recompute_profiling_steps + self.swap_profiling_steps = swap_profiling_steps + self.pre_steps = self.skip_steps + recompute_profiling_steps + swap_profiling_steps + self.layer_profiling_steps + + def get_cur_step(self): + return self.cur_step + + def reset_step(self, step_num): + self.cur_step = step_num + + def incr_step(self): + self.cur_step += 1 + + def is_skipping_step(self): # 两处调用,profiling时决定是否下发event,step里是否return + return self.cur_step <= self.skip_steps + + def is_recompute_profiling_step(self): + pre_steps = self.skip_steps + return pre_steps < self.cur_step <= pre_steps + self.recompute_profiling_steps + + def is_last_recompute_profiling_step(self): + return self.cur_step == (self.skip_steps + self.recompute_profiling_steps) + + def is_layer_profiling_step(self): + pre_steps = self.skip_steps + self.recompute_profiling_steps + return pre_steps < self.cur_step <= pre_steps + self.layer_profiling_steps + + def is_last_layer_profiling_step(self): + return self.cur_step == self.skip_steps + self.recompute_profiling_steps + self.layer_profiling_steps + + def is_layer_profiling_done(self): + return self.cur_step >= self.skip_steps + self.recompute_profiling_steps + self.layer_profiling_steps + + def is_all_profiling_done(self): # note: this called in step_func, should use > instead of >= + return self.cur_step > self.pre_steps + + def is_swap_profiling_step(self): + pre_steps = self.skip_steps + self.recompute_profiling_steps + self.layer_profiling_steps + return pre_steps < self.cur_step <= self.pre_steps + + def is_swap_profiling_done(self): # note: this called after step_func, should use >= instead of > + return self.cur_step >= self.pre_steps + + +class ForwardCounter(metaclass=SingletonBase): + def __init__(self): + self._counter: int = 0 + + def get_count(self): + return self._counter + + def incr_cnt(self): + self._counter += 1 + + +class FuncLocation: + def __init__(self, idx: int, func_name: str, action: ModuleAction): + self.layer_idx = idx + self.func_name = func_name + self.action = action + + +class CpuTensorCache(metaclass=SingletonBase): + def __init__(self): + self.shape_to_tensor_list_map: Dict[(torch.Size, torch.dtype), Set[torch.Tensor]] = {} + + def get_cpu_tensor(self, shape: torch.Size, dtype: torch.dtype): + tensor_set = self.shape_to_tensor_list_map.setdefault((shape, dtype), set()) + if len(tensor_set) != 0: + cpu_tensor = tensor_set.pop() + else: + cpu_tensor = torch.empty(shape, dtype=dtype, pin_memory=True, device='cpu') + return cpu_tensor + + def release_cpu_tensor(self, cpu_tensor): + tensor_set = self.shape_to_tensor_list_map.setdefault((cpu_tensor.shape, cpu_tensor.dtype), set()) + tensor_set.add(cpu_tensor) + + +def broadcast_in_mp_dp(tensor, src, mp, dp): + if mp > 1 and ps.get_tensor_model_parallel_src_rank() == src: + broadcast_src = ps.get_tensor_model_parallel_src_rank() + broadcast_group = ps.get_tensor_model_parallel_group() + torch.distributed.broadcast(tensor, src=broadcast_src, group=broadcast_group) + if dp > 1: + broadcast_src = ps.get_data_parallel_src_rank(True) + broadcast_group = ps.get_data_parallel_group(True) + torch.distributed.broadcast(tensor, src=broadcast_src, group=broadcast_group) + + +def broadcast_obj(obj): + mp = ps.get_tensor_model_parallel_world_size() + dp = ps.get_data_parallel_world_size(True) + + global_rank = torch.distributed.get_rank() + src = (global_rank // (mp * dp)) * dp * mp + obj_tensor = torch.frombuffer(pickle.dumps(obj), dtype=torch.uint8) + obj_shape_tensor = torch.tensor(obj_tensor.shape, device=torch.npu.current_device()) + broadcast_in_mp_dp(obj_shape_tensor, src, mp, dp) + obj_len = obj_shape_tensor.cpu().tolist() + if global_rank == src: + obj_tensor_npu = obj_tensor.npu() + else: + obj_tensor_npu = torch.empty(obj_len, dtype=torch.uint8, device=torch.npu.current_device()) + broadcast_in_mp_dp(obj_tensor_npu, src, mp, dp) + result = pickle.loads(obj_tensor_npu.cpu().numpy().tobytes()) + del obj_tensor_npu + return result \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/cpu_binder.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/cpu_binder.py new file mode 100644 index 000000000..7b6708a45 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_memory/cpu_binder.py @@ -0,0 +1,109 @@ +import os +import psutil +from megatron.training import print_rank_0 + + +def _get_pcie_info(devices, keyword="PCIeBusInfo"): + device_pcie_tbl = dict() + for device in devices: + pcie_info = os.popen(f"npu-smi info -t board -i {device}").read().strip().split("\n") + for _ in pcie_info: + line = ''.join(_.split()) + if line.startswith(keyword): + device_pcie_tbl[device] = line[len(keyword) + 1:] + break + + return device_pcie_tbl + + +def _get_numa_info(pcie_tbl, keyword="NUMAnode"): + device_numa_tbl = dict() # key is device id, value is numa id + numa_devices_tbl = dict() # key is numa id, value is device id list + + for device, pcie_no in pcie_tbl.items(): + numa_info = os.popen(f"lspci -s {pcie_no} -vvv").read().strip().split("\n") + for _ in numa_info: + line = ''.join(_.split()) + if line.startswith(keyword): + numa_id = int(line[len(keyword) + 1:]) + device_numa_tbl[device] = numa_id + + devices = numa_devices_tbl.get(numa_id, None) + if devices is None: + numa_devices_tbl[numa_id] = list() + + numa_devices_tbl[numa_id].append(device) + break + + return device_numa_tbl, numa_devices_tbl + + +def _get_cpu_info(numa_ids, keyword1="NUMAnode", keyword2="CPU(s)"): + cpu_idx_tbl = dict() + numa_keywords = [keyword1 + str(idx) + keyword2 for idx in numa_ids] + cpu_info = os.popen(f"lscpu").read().strip().split("\n") + for _ in cpu_info: + line = ''.join(_.split()) + if any(line.startswith(word) for word in numa_keywords): + split_info = line.split(":") + cpu_id_ranges = split_info[-1].split(",") + + ranges = list() + for range_str in cpu_id_ranges: + endpoints = range_str.split("-") + if len(endpoints) != 2: + raise Exception("lscpu command output error, please check !") + + ranges += [cid for cid in range(int(endpoints[0]), int(endpoints[1]) + 1)] + + numa_id = int(split_info[0].replace(keyword1, '').replace(keyword2, '')) + cpu_idx_tbl[numa_id] = ranges + return cpu_idx_tbl + + +# 可以用export CPU_BINDING_NUM设置每个进程绑的核数;如果不设置CPU_BINDING_NUM, +# 会根据ratio(numa利用率)进行计算,如果有64个核,0.5表示用一半,用32个核, 平分给亲和在这个numa上的npu +def bind_cpus(world_size, rank_id, device_id, ratio=0.5): + devices = [_ for _ in range(device_id, device_id + world_size)] + # 获取npu和pcie的对应关系 + device_pcie_tbl = _get_pcie_info(devices) + # 根据pcie信息获取npu和numa的对应关系 + device_numa_tbl, numa_devices_tbl = _get_numa_info(device_pcie_tbl) + # 获取使用的numa对应的cpu核分配信息 + cpu_idx_tbl = _get_cpu_info(list(numa_devices_tbl.keys())) + + # 当前rank的npu id + cur_device = rank_id + device_id + # 获取npu对应的numa id + numa_id = device_numa_tbl[cur_device] + + # 获取共享该numa的npu信息 + shard_devices = numa_devices_tbl[numa_id] + # 按照npu id进行排序 + shard_devices.sort() + + # 获取该numa上所有的cpu id信息 + all_cpus = cpu_idx_tbl[numa_id] + + cpu_nums = len(all_cpus) + # 计算给该共享numa的npu分配的核的个数 + CPU_BINDING_NUM = os.environ.get("CPU_BINDING_NUM", None) + if CPU_BINDING_NUM is None: + cpu_num_per_device = int(cpu_nums * ratio // len(shard_devices)) + else: + cpu_num_per_device = int(CPU_BINDING_NUM) + if len(shard_devices) * cpu_num_per_device > cpu_nums: + raise Exception( + f"Cpu num in numa {numa_id} to assign {cpu_num_per_device} for every device is not enough, " + f"please decrease the value of CPU_BINDING_NUM!") + + # 获取该npu的下标信息 + idx = shard_devices.index(cur_device) + # 给该npu分配要绑定的cpu id + binding_cpus = [all_cpus[_] for _ in range(idx * cpu_num_per_device, (idx + 1) * cpu_num_per_device)] + + # cpu bind + p = psutil.Process() + p.cpu_affinity(binding_cpus) + new_affinity = p.cpu_affinity() + print_rank_0("Bind cpu successful!!!") diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/__init__.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/adaptive_recompute.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/adaptive_recompute.py new file mode 100644 index 000000000..3450c824f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/adaptive_recompute.py @@ -0,0 +1,782 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. +import sys +from copy import deepcopy +from functools import wraps +from collections.abc import Iterable + +import acl +import numpy as np +import torch +import torch.nn +import torch_npu + +from megatron.training import print_rank_0 +from megatron.core.num_microbatches_calculator import get_num_microbatches +from megatron.training import get_args +from megatron.core import parallel_state + +from mindspeed.core.memory.adaptive_recomputing.adaptive_recompute_apply import get_recompute_hook +from mindspeed.core.memory.adaptive_recomputing.adaptive_recompute_apply import get_swap_hook +from mindspeed.core.memory.adaptive_recomputing.adaptive_recompute_apply import register_recursive_apply as apply_adaptive_recompute +from mindspeed.core.memory.adaptive_recomputing.adaptive_recompute_apply import register_recursive_apply_prefetch as apply_prefetch_strategy +from mindspeed.core.memory.adaptive_recomputing.adaptive_recompute_solver import get_graph_solver, GraphSolver +from mindspeed.core.memory.adaptive_recomputing.swap_manager import SwapManager, get_tensor_mem_size + +DTYPE_NBYTES_MAP = {"bf16": 2, "fp16": 2, "fp32": 4} + + +class AdaptiveRecomputePolicy: + adaptive_recomputing_policy = None + + def __init__(self): + # total swap out size after OOM + self.record_swap_out_size = 0 + # module context copy + self.context_copy = None + # unit for device memory size(MB) + self.unit_mb = 1024 * 1024 + # find target device memory for policy + self.is_find_target_device_memory = False + # swap size for this step OOM + self.swap_size = 0 + + # policy + self.cur_recompute_policy = [] + self.oom_recompute_policy_list = [] + self.normal_recompute_policy_list = [] + + # device memory dichotomy for solve graph + self.device_memory_dichotomy_left = 0 + self.device_memory_dichotomy_right = 0 + self.cur_device_memory = -1 + self.stop_dichotomy_value = 1 + + # device memory free default is maxsize + self.default_device_memory = sys.maxsize + all_args = get_args() + if all_args.adaptive_recompute_device_size >= 0: + self.default_device_memory = all_args.adaptive_recompute_device_size + self.hccl_memory = 0 + + self.remove_swap_manager_hook_step = 0 + torch_npu.npu.init() + self.last_num_alloc_retries = torch.npu.memory_stats()["num_alloc_retries"] + self.change_num_alloc_retries_times = 0 + self.first_non_oom_device_memory = 0 + self.check_non_oom_times = 0 + + # swap_attention + self.interval = 0 + self.threshold_prefetch = 0 + self.num_prefetch = 0 + self.num_layers = 0 + + @staticmethod + def tensor_all_reduce(num_list, op): + shard_tensor = torch.tensor(num_list, device=torch.npu.current_device()) + if parallel_state.get_tensor_model_parallel_world_size() > 1: + torch.distributed.all_reduce( + shard_tensor, + op=op, + group=parallel_state.get_tensor_model_parallel_group(), ) + if parallel_state.get_data_parallel_world_size() > 1: + torch.distributed.all_reduce( + shard_tensor, + op=op, + group=parallel_state.get_data_parallel_group(), ) + result = shard_tensor.cpu().numpy().tolist() + del shard_tensor + return result + + @staticmethod + def is_policy_in_list(policy, policy_list): + for p in policy_list: + if np.all(p == policy): + return True + return False + + + def is_stable_policy(self, profiling_step): + all_args = get_args() + # not activate swap function or remove swap manager hook + if not all_args.adaptive_recompute_device_swap or (profiling_step > self.remove_swap_manager_hook_step != 0): + return True + + total_swap_out_size = SwapManager().total_swap_out_size + self.swap_size = (total_swap_out_size - self.record_swap_out_size) // self.unit_mb + self.check_num_alloc_retries() + num_list = [ + int(total_swap_out_size), int(self.hccl_memory), int(self.swap_size), + int(self.is_find_target_device_memory), int(self.change_num_alloc_retries_times) + ] + size_tensor = self.tensor_all_reduce(num_list, torch.distributed.ReduceOp.MAX) + total_swap_out_size = size_tensor[0] + self.hccl_memory = size_tensor[1] + self.swap_size = size_tensor[2] + self.is_find_target_device_memory = bool(size_tensor[3]) + self.change_num_alloc_retries_times = size_tensor[4] + SwapManager().total_swap_out_size = total_swap_out_size + + if self.swap_size <= 0 and self.is_find_target_device_memory: + return True + self.record_swap_out_size = total_swap_out_size + return False + + def get_default_device_memory(self, max_device_memory): + self.default_device_memory = min(self.default_device_memory, max_device_memory) + size_tensor = self.tensor_all_reduce([int(self.default_device_memory)], torch.distributed.ReduceOp.MIN) + self.default_device_memory = size_tensor[0] + + def check_cur_recompute_policy(self): + if len(self.cur_recompute_policy) == 0: + return + is_exist_oom = self.is_policy_in_list(self.cur_recompute_policy, self.oom_recompute_policy_list) + is_exist_normal = self.is_policy_in_list(self.cur_recompute_policy, self.normal_recompute_policy_list) + if self.swap_size > 0: + if not is_exist_oom: + self.oom_recompute_policy_list.append(deepcopy(self.cur_recompute_policy)) + if is_exist_normal: + self.normal_recompute_policy_list.remove(self.cur_recompute_policy) + return + if is_exist_oom or self.change_num_alloc_retries_times != 0: + return + if not is_exist_normal: + self.normal_recompute_policy_list.append(deepcopy(self.cur_recompute_policy)) + + def dichotomy_best(self): + # last policy is instability + if self.is_find_target_device_memory: + self.device_memory_dichotomy_left = self.first_non_oom_device_memory + self.device_memory_dichotomy_right = self.cur_device_memory + self.is_find_target_device_memory = False + if self.cur_device_memory == -1: + return self.default_device_memory + + # OOM + if self.swap_size > 0: + self.check_non_oom_times = 0 + self.change_num_alloc_retries_times = 0 + self.device_memory_dichotomy_right = self.cur_device_memory + if self.first_non_oom_device_memory >= self.cur_device_memory: + self.first_non_oom_device_memory = 0 + if self.device_memory_dichotomy_right <= self.device_memory_dichotomy_left: + self.device_memory_dichotomy_left = 0 + return (self.device_memory_dichotomy_left + self.device_memory_dichotomy_right) // 2 + + # check non oom policy + if self.change_num_alloc_retries_times != 0 and self.check_non_oom_times == 0: + print_rank_0(f"current policy may be an unstable one, try to check it once again, " + f"policy device memory: {self.cur_device_memory}") + self.check_non_oom_times += 1 + self.change_num_alloc_retries_times = 0 + return self.cur_device_memory + + self.check_non_oom_times = 0 + self.change_num_alloc_retries_times = 0 + self.device_memory_dichotomy_left = self.cur_device_memory + if self.first_non_oom_device_memory == 0: + self.first_non_oom_device_memory = self.cur_device_memory + if self.device_memory_dichotomy_right - self.device_memory_dichotomy_left <= self.stop_dichotomy_value: + self.is_find_target_device_memory = True + return self.device_memory_dichotomy_left + + return (self.device_memory_dichotomy_left + self.device_memory_dichotomy_right) // 2 + + def solve_recompute_policy(self, profiling_step): + is_known_policy = True + self.remove_swap_manager_hook_step = profiling_step + 1 + swap_size = self.swap_size + recompute_policy_list = None + while is_known_policy: + torch.npu.synchronize() + self.cur_device_memory = self.dichotomy_best() + if self.check_non_oom_times == 0: + recompute_policy_list = get_graph_solver().get_policy(self.cur_device_memory) + np_result = np.array(recompute_policy_list) + self.cur_recompute_policy = np.array([r * r[0] for r in np_result]).sum(axis=0).tolist() + if self.is_find_target_device_memory: + self.remove_swap_manager_hook_step = profiling_step + 10 + print_rank_0( + f"success to find the target value of the current round of search: {self.cur_device_memory}") + break + # OOM policy + if self.is_policy_in_list(self.cur_recompute_policy, self.oom_recompute_policy_list): + self.swap_size = max(self.swap_size, 1) + continue + # no OOM policy + if self.is_policy_in_list(self.cur_recompute_policy, self.normal_recompute_policy_list): + self.swap_size = 0 + continue + is_known_policy = False + if recompute_policy_list is None: + print_rank_0(f"{get_graph_solver().final_policy_info}") + return None + get_graph_solver().print_list_to_policy(recompute_policy_list) + print_rank_0( + f"max available memory: {self.context_copy['max_device_memory']}, previous policy swap size: {swap_size}, " + f"next policy device memory: {self.cur_device_memory}") + print_rank_0(f"{get_graph_solver().without_recompute_info}\n{get_graph_solver().all_recompute_info}\n" + f"{get_graph_solver().selective_recompute_info}\n{get_graph_solver().final_policy_info}") + return self.set_tag_to_context(recompute_policy_list) + + def set_tag_to_context(self, recompute_policy_list): + context = deepcopy(self.context_copy) + solver = GraphSolver() + solver.layer_full_recompute_combination = get_graph_solver().layer_full_recompute_combination + solver.layer_without_recompute_combination = get_graph_solver().layer_without_recompute_combination + solver.layer_recompute_one_combination = get_graph_solver().layer_recompute_one_combination + solver.layers_combination = get_graph_solver().layers_combination + solver.get_layers_module(context, "") + solver.get_no_recompute_layer() + solver.apply_policy_to_model(recompute_policy_list) + return context + + def check_num_alloc_retries(self): + num_alloc_retries = torch.npu.memory_stats()["num_alloc_retries"] + if num_alloc_retries == self.last_num_alloc_retries: + return + retries_times = num_alloc_retries - self.last_num_alloc_retries + self.last_num_alloc_retries = num_alloc_retries + if self.swap_size == 0 and (retries_times > 1 or self.check_non_oom_times != 0): + self.swap_size = 1 + if self.swap_size > 0: + return + + self.change_num_alloc_retries_times += 1 + if self.change_num_alloc_retries_times > 1: + print_rank_0(f"[^?^?^] this is a unstable policy, try select another one.") + self.swap_size = 1 + + def granular_module_allocation(self, vpp_size, recompute_num_layers, cur_pp_noop_layers): + swap_list = [] + recompute_list = [] + args = get_args() + cur_pp_rank = parallel_state.get_pipeline_model_parallel_rank() + pp_size = args.pipeline_model_parallel_size or 1 + vpp_layer = args.num_layers_per_virtual_pipeline_stage + if self.num_prefetch <= vpp_size: + swap_list = [['0'] if i < self.num_prefetch else [''] for i in range(vpp_size)] + else: + for chunk in range(vpp_size): + chunk_swap_layer = ['0'] + for layer_id in range(vpp_size, self.num_prefetch): + if layer_id % vpp_size == chunk: + chunk_swap_layer.append(f'{layer_id // vpp_size}') + swap_list.append(chunk_swap_layer) + + if recompute_num_layers <= vpp_size: + recompute_list = [['0'] if i < recompute_num_layers else [''] for i in range(vpp_size)] + if parallel_state.is_pipeline_last_stage(ignore_virtual=True) and args.reduce_recompute_for_last_chunk: + recompute_list[-1] = [''] + else: + for chunk in range(vpp_size): + chunk_recompute_layer = ['0'] + for layer_id in range(vpp_size, recompute_num_layers): + if layer_id % vpp_size == chunk: + chunk_recompute_layer.append(f'{layer_id // vpp_size}') + recompute_list.append(chunk_recompute_layer) + if parallel_state.is_pipeline_last_stage(ignore_virtual=True) and args.reduce_recompute_for_last_chunk: + if recompute_list[-1][-1] == str(args.num_layers_per_virtual_pipeline_stage - 1): + recompute_list[-1].pop() + if len(recompute_list[-1]) == 0: + recompute_list[-1].append('') + for vpp in range(vpp_size): + vpp_layers = swap_list[vpp] + for i in range(len(vpp_layers)): + layer_id = vpp * vpp_layer * pp_size + i + vpp_layer * cur_pp_rank + if layer_id in cur_pp_noop_layers: + swap_list[vpp][i] = '' + if len(recompute_list[vpp]) >= i + 1: + recompute_list[vpp][i] = '' + + prefetch_list = swap_list + interval = 0 + prefetch_recompute_group = [swap_list, prefetch_list, recompute_list] + return [prefetch_recompute_group, interval, self.num_prefetch, cur_pp_noop_layers] + + def get_cur_stage_noop_layers(self, noop_layers, cur_pp_rank): + all_args = get_args() + cur_pp_noop_layers = [] + pp_size = all_args.pipeline_model_parallel_size or 1 + layers_per_pp = all_args.num_layers // pp_size + vpp_layer = all_args.num_layers_per_virtual_pipeline_stage or layers_per_pp + vpp_layers = vpp_layer * pp_size + for i in noop_layers: + pp_id = (i % vpp_layers) // vpp_layer + if pp_id == cur_pp_rank: + cur_pp_noop_layers.append(i) + return cur_pp_noop_layers + + def solve_prefetch_policy(self): + all_args = get_args() + noop_layers = list(all_args.noop_layers) if isinstance(all_args.noop_layers, set) else [] + cur_pp_rank = parallel_state.get_pipeline_model_parallel_rank() + cur_pp_noop_layers = self.get_cur_stage_noop_layers(noop_layers, cur_pp_rank) + recompute_num_layers = all_args.recompute_num_layers or 0 + pp_size = all_args.pipeline_model_parallel_size or 1 + vpp_size = all_args.virtual_pipeline_model_parallel_size or 1 + per_pp_layers = all_args.num_layers // pp_size + per_vpp_layers = all_args.num_layers_per_virtual_pipeline_stage or per_pp_layers + if not all_args.enable_recompute_layers_per_pp_rank: + if recompute_num_layers >= per_vpp_layers: + recompute_num_layers = per_pp_layers + else: + recompute_num_layers *= vpp_size + else: + if recompute_num_layers >= per_pp_layers: + recompute_num_layers = per_pp_layers + if all_args.recompute_method == 'block': + self.num_prefetch = recompute_num_layers + elif all_args.recompute_method == 'uniform': + recompute_num_layers = per_pp_layers + self.num_prefetch = recompute_num_layers + else: + self.num_prefetch = per_pp_layers + self.interval = 0 + if vpp_size > 1: + return self.granular_module_allocation(vpp_size, recompute_num_layers, cur_pp_noop_layers) + else: + swap_list, recompute_list = [], [] + for i in range(self.num_prefetch): + if i + cur_pp_rank * per_pp_layers not in cur_pp_noop_layers: + swap_list.append(str(i)) + else: + swap_list.append('') + for i in range(recompute_num_layers): + if i + cur_pp_rank * per_pp_layers not in cur_pp_noop_layers: + recompute_list.append(str(i)) + else: + recompute_list.append('') + + prefetch_list = swap_list + prefetch_recompute_group = [[swap_list], [prefetch_list], [recompute_list]] + return [prefetch_recompute_group, 0, len(prefetch_list), cur_pp_noop_layers] + + +def get_adaptive_recomputing_policy(): + if AdaptiveRecomputePolicy.adaptive_recomputing_policy is None: + AdaptiveRecomputePolicy.adaptive_recomputing_policy = AdaptiveRecomputePolicy() + return AdaptiveRecomputePolicy.adaptive_recomputing_policy + + +class AdaptiveRecompute: + adaptive_recomputing = None + + def __init__(self): + # layer profiling info + self.context = { + 'module': [] + } + #record allowed recomputing module + self.allowed_recomputing_module = [] + # profiling prefix + self.profiling_prefix = "" + # save origin modules + self.checkpointed_modules = [] + # save modules hook, remove it after apply policy + self.modules_hooks = [] + # current profiling step + self.profiling_step = 0 + # step for stop profiling, default is 10 + self.stop_profiling_step = 10 + # skip step for profiling + self.skip_profiling_step = 3 + # step for solve graph by adaptive recompute, after step for stop profiling + self.solve_graph_at_step = 11 + # unit for device memory size(MB) + self.unit_mb = 1024 * 1024 + # pp or vpp + self.num_warmup_micro_batches = 1 + # store all module event + self.event_list = [] + + @staticmethod + def get_memory_status(): + free, all_memory, _ = acl.rt.get_mem_info(1) + memory_info = { + "free": free, + "all_memory": all_memory, + "used_memory": torch.npu.memory_allocated(), + "reserved_memory": torch.npu.memory_reserved(), + "max_memory_allocated": torch.npu.max_memory_allocated() + } + + return memory_info + + def get_num_warmup_micro_batches(self, num_model_chunks): + if parallel_state.get_pipeline_model_parallel_world_size() <= 1: + return + num_microbatches = get_num_microbatches() + pipeline_parallel_size = parallel_state.get_pipeline_model_parallel_world_size() + pipeline_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + total_num_micro_batches = num_microbatches * num_model_chunks + if num_model_chunks == 1: + num_warmup_micro_batches = pipeline_parallel_size - pipeline_parallel_rank - 1 + else: + num_warmup_micro_batches = (pipeline_parallel_size - pipeline_parallel_rank - 1) * 2 + num_warmup_micro_batches += (num_model_chunks - 1) * pipeline_parallel_size + num_warmup_micro_batches += 1 + if num_model_chunks >= 1: + self.num_warmup_micro_batches = min(num_warmup_micro_batches, total_num_micro_batches) / num_model_chunks + + def pre_hook_func(self, state, prefix, name, *args, **kargs): + if self.profiling_step < self.skip_profiling_step: + return + state['memory'] = 0 + state['input'] = self._cal_input_output_size(args) + if self.profiling_step == self.stop_profiling_step: + state['memory'] = torch.npu.memory_allocated() - state['input'] * self.unit_mb + # The memory and time information is obtained separately. The average time is calculated when the step in + # [skip_profiling_step, stop_profiling_step). The memory information is obtained only for the last time. + if self.profiling_step < self.stop_profiling_step: + start_event = torch.npu.Event(enable_timing=True) + self.event_list.append([start_event]) + start_event.record() + + def post_hook_func(self, state, prefix, name, args, output): + if self.profiling_step < self.skip_profiling_step: + return + if self.profiling_step < self.stop_profiling_step: + end_event = torch.npu.Event(enable_timing=True) + end_event.record() + # add end_event to corresponding position of list + for item in reversed(self.event_list): + if len(item) == 1: + item.append(end_event) + break + if self.profiling_step == self.stop_profiling_step: + output_memory = self._cal_input_output_size(output) + state['memory'] = (torch.npu.memory_allocated() - state['memory']) // self.unit_mb + state['input'] += output_memory + + def forward_pre_hook(self, prefix, name, ctx): + def hook(module, *args, **kargs): + if 'module' in self.context: + self.context['module'].append(ctx) + self.pre_hook_func(ctx, prefix, name, *args, **kargs) + + return hook + + def forward_post_hook(self, prefix, name, ctx): + def hook(module, args, output): + self.post_hook_func(ctx, prefix, name, args, output) + if 'module' in self.context: + self.context['module'].pop() + + return hook + + def construct_context_recursive(self, prefix_name, model, ctx, have_allowed_recomputing): + # 1.construct context + next_have_allowed_recomputing = have_allowed_recomputing + for name, module in model.named_children(): + if 'layers' not in ctx: + ctx['layers'] = [] + + current_ctx = {'name': name, 'prefix_name': prefix_name} + if 'layers' in ctx: + ctx['layers'].append(current_ctx) + + next_name = prefix_name + "." + name if prefix_name != "" else name + + # 2.tag allowed_recomputing module + if have_allowed_recomputing: + for allowed_recomputing_module in self.allowed_recomputing_module: + if isinstance(module, allowed_recomputing_module): + current_ctx['allowed_recomputing'] = True + if isinstance(model, torch.nn.ModuleList): + ctx['is_module_list'] = True + ctx['is_recomputing_layer'] = True + else: + current_ctx['is_recomputing_layer'] = True + next_have_allowed_recomputing = False + self.construct_context_recursive(next_name, module, current_ctx, next_have_allowed_recomputing) + + def register_recursive_hook(self, model, ctx, profiling_prefix, first_chunk=False, layer_index=0, prefetch=False): + index = layer_index + for module in model.children(): + if 'layers' not in ctx: + continue + current_ctx = ctx['layers'][index] + if prefetch: + if 'is_module_list' in ctx and 'allowed_recomputing' in current_ctx: + # transformer layer + module.no_checkpoint_forward = module.forward + module.forward = get_recompute_hook().hook_checkpoint_forward(module.forward) + self.checkpointed_modules.append(module) + else: + # only has allowed_recomputing Tag can set recomputing hook + recompute_layer_condition = index != 0 or index == 0 and not first_chunk + if 'is_module_list' in ctx and 'allowed_recomputing' in current_ctx and recompute_layer_condition: + # transformer layer + module.no_checkpoint_forward = module.forward + module.forward = get_recompute_hook().hook_checkpoint_forward(module.forward) + self.checkpointed_modules.append(module) + prefix_name = current_ctx['prefix_name'] + name = current_ctx['name'] + + # profiling entire module + if "module" == prefix_name or 'module0' == prefix_name: + pre_hook = module.register_forward_pre_hook(self.forward_pre_hook(prefix_name, name, current_ctx)) + post_hook = module.register_forward_hook(self.forward_post_hook(prefix_name, name, current_ctx)) + self.modules_hooks.append(pre_hook) + self.modules_hooks.append(post_hook) + + # profiling transformer Layers + if isinstance(module, torch.nn.ModuleList) and 'is_recomputing_layer' in current_ctx and first_chunk: + pre_hook = model.register_forward_pre_hook(self.forward_pre_hook(ctx['prefix_name'], ctx['name'], ctx)) + post_hook = model.register_forward_hook(self.forward_post_hook(ctx['prefix_name'], ctx['name'], ctx)) + self.modules_hooks.append(pre_hook) + self.modules_hooks.append(post_hook) + elif 'is_recomputing_layer' in current_ctx and first_chunk: + profiling_prefix = prefix_name + "." + name + pre_hook = module.register_forward_pre_hook(self.forward_pre_hook(prefix_name, name, current_ctx)) + post_hook = module.register_forward_hook(self.forward_post_hook(prefix_name, name, current_ctx)) + self.modules_hooks.append(pre_hook) + self.modules_hooks.append(post_hook) + + # only has allowed_recomputing Tag and its submodule can set profiling hook + if 'allowed_recomputing' in current_ctx and index == 0 and first_chunk: + profiling_prefix = prefix_name + "." + name + pre_hook = module.register_forward_pre_hook(self.forward_pre_hook(prefix_name, name, current_ctx)) + post_hook = module.register_forward_hook(self.forward_post_hook(prefix_name, name, current_ctx)) + self.modules_hooks.append(pre_hook) + self.modules_hooks.append(post_hook) + elif profiling_prefix and prefix_name.startswith(profiling_prefix) and first_chunk: + pre_hook = module.register_forward_pre_hook(self.forward_pre_hook(prefix_name, name, current_ctx)) + post_hook = module.register_forward_hook(self.forward_post_hook(prefix_name, name, current_ctx)) + self.modules_hooks.append(pre_hook) + self.modules_hooks.append(post_hook) + self.register_recursive_hook(module, current_ctx, profiling_prefix, first_chunk, prefetch=prefetch) + index += 1 + + def reset_modules(self): + for m in self.checkpointed_modules: + m.forward = m.no_checkpoint_forward + self.checkpointed_modules.clear() + get_recompute_hook().reset_recompute_modules() + get_swap_hook().reset_swap_manager_modules() + SwapManager().reset_swap_manager_tensors() + if (get_adaptive_recomputing_policy().check_non_oom_times == 0 + and not get_adaptive_recomputing_policy().is_find_target_device_memory): + torch_npu.npu.empty_cache() + + def reset_all_hook_args(self): + all_args = get_args() + step = get_adaptive_recomputing_policy().remove_swap_manager_hook_step + if not all_args.adaptive_recompute_device_swap: + for hook_handle in self.modules_hooks: + hook_handle.remove() + self.modules_hooks.clear() + SwapManager().reset_swap_manager_tensors() + get_swap_hook().reset_swap_manager_modules() + return + if self.profiling_step >= self.solve_graph_at_step: + for hook_handle in self.modules_hooks: + hook_handle.remove() + self.modules_hooks.clear() + if not get_adaptive_recomputing_policy().is_find_target_device_memory or self.profiling_step > step + 1: + return + if self.profiling_step == step + 1: + title = (f"===== finish to check policy, search policy memory size is: " + f"{get_adaptive_recomputing_policy().cur_device_memory} =====") + print_rank_0(f"{title}\n{get_graph_solver().final_policy_info}\n{'=' * len(title)}") + if self.profiling_step == step: + get_swap_hook().reset_swap_manager_modules() + if get_adaptive_recomputing_policy().is_find_target_device_memory: + SwapManager().reset_swap_manager_tensors() + + def prefetch_hook(self, models): + self.reset_modules() + all_args = get_args() + pp = all_args.pipeline_model_parallel_size + vpp = all_args.virtual_pipeline_model_parallel_size if all_args.virtual_pipeline_model_parallel_size else 1 + print_rank_0("ADAPTIVE-PREFETCH: Start applying policy to the model") + config = { + "pre_layer_full_name": "", + "pre_layer_ctx": {}, + "cur_layer_name": "module", + } + prefetch_recompute_group, interval, num_prefetch, swap_noop_layers = get_adaptive_recomputing_policy().solve_prefetch_policy() + print(f"[DEBUG] swap_list: {prefetch_recompute_group[0]}," + f" prefetch_list: {prefetch_recompute_group[1]}," + f" recompute_list: {prefetch_recompute_group[2]}") + for i in prefetch_recompute_group[0]: + if not any(filter(None, i)): + vpp -= 1 + prefetch_args = [prefetch_recompute_group[0], vpp, interval, num_prefetch] + apply_prefetch_strategy(config, models, self.context, prefetch_recompute_group, prefetch_args) + + + def step_hook(self, models): + torch.npu.synchronize() + while self.event_list: + record_time(self.context, self.event_list) + self.reset_all_hook_args() + if self.profiling_step < self.solve_graph_at_step: + return + + if get_adaptive_recomputing_policy().context_copy is None: + get_adaptive_recomputing_policy().context_copy = deepcopy(self.context) + try: + get_adaptive_recomputing_policy().get_default_device_memory(self.context["max_device_memory"]) + except KeyError: + print_rank_0("[ERROR] Some of these keys don't exist.") + get_graph_solver().build_solver_info(self.context, self.num_warmup_micro_batches, len(models)) + + get_adaptive_recomputing_policy().check_cur_recompute_policy() + print_rank_0("==================== ADAPTIVE-RECOMPUTE Report ====================") + context = get_adaptive_recomputing_policy().solve_recompute_policy(self.profiling_step) + print_rank_0("==================== ADAPTIVE-RECOMPUTE Report End ====================") + if context is not None: + self.context = context + self.reset_modules() + print_rank_0("ADAPTIVE-RECOMPUTE: Start applying policy to the model") + config = { + "pre_layer_ctx": {}, + "cur_layer_name": "module", + } + apply_adaptive_recompute(config, models, self.context) + print_rank_0("ADAPTIVE-RECOMPUTE: Finish applying policy to the model") + get_swap_hook().reset_tensor_layer_info() + + def hook_step_func(self, step_func, models): + def custom_step_func(*args, **kargs): + result = step_func(*args, **kargs) + if (self.profiling_step > self.solve_graph_at_step and \ + get_adaptive_recomputing_policy().is_stable_policy(self.profiling_step)): + return result + memory_info = self.get_memory_status() + try: + hccl_memory = (memory_info["all_memory"] - memory_info["free"] - memory_info[ + "reserved_memory"]) // self.unit_mb + get_adaptive_recomputing_policy().hccl_memory = max(hccl_memory, get_adaptive_recomputing_policy().hccl_memory) + self.context['used_mem'] = memory_info["used_memory"] // self.unit_mb + self.context['max_device_memory'] = memory_info["all_memory"] // self.unit_mb + except KeyError: + print_rank_0("[ERROR] Some of these keys don't exist.") + self.profiling_step += 1 + self.step_hook(models) + return result + + return custom_step_func + + def set_profiling_step(self, step): + self.stop_profiling_step = step + self.solve_graph_at_step = step + 1 + + def add_allowed_recomputing_module(self, module): + if module not in self.allowed_recomputing_module: + self.allowed_recomputing_module.append(module) + + def _cal_input_output_size(self, args): + size = 0 + if isinstance(args, torch.Tensor): + size += get_tensor_mem_size(args) + return size // self.unit_mb + for arg in args: + if isinstance(arg, torch.Tensor): + size += get_tensor_mem_size(arg) + elif isinstance(arg, Iterable): + for t in arg: + if isinstance(t, torch.Tensor): + size += get_tensor_mem_size(t) + elif t is None: + pass + else: + print_rank_0(f"[WARNING]: unknown input/output type {str(type(t))}") + elif arg is None: + pass + else: + print_rank_0(f"[WARNING]: unknown input/output type {str(type(t))}") + return size // self.unit_mb + + +def get_adaptive_recomputing(): + if AdaptiveRecompute.adaptive_recomputing is None: + AdaptiveRecompute.adaptive_recomputing = AdaptiveRecompute() + return AdaptiveRecompute.adaptive_recomputing + + +def record_time(context, remaining_event_list): + if "memory" in context: + cur_level_event_list = remaining_event_list.pop(0) + start_event = cur_level_event_list[0] + end_event = cur_level_event_list[1] + total_time = start_event.elapsed_time(end_event) + if 'pre_total_time' in context: + context['forward_cnt'] += 1 + context['time'] = total_time + context['pre_total_time'] += total_time + try: + context['time'] = context['pre_total_time'] / context['forward_cnt'] + except ZeroDivisionError: + context['time'] = 0 + else: + context['forward_cnt'] = 1 + context['time'] = total_time + context['pre_total_time'] = total_time + if "layers" not in context: + return + for sub_layer_context in context["layers"]: + record_time(sub_layer_context, remaining_event_list) + + +def is_activate_adaptive_recompute(): + all_args = get_args() + profiling_step = 0 + if all_args.adaptive_recompute_device_size < 0 and not all_args.adaptive_recompute_device_swap and not all_args.swap_attention: + print_rank_0("[ERROR] failed to activate adaptive selective recompute train, please add param: " + "\"adaptive-recompute-device-swap\", or set param: \"adaptive-recompute-device-size\".") + return False, profiling_step + max_profiling_step = all_args.train_iters // 10 + profiling_step = all_args.adaptive_recompute_profiling_step + if profiling_step > all_args.train_iters and not all_args.swap_attention: + raise AssertionError('\"adaptive-recompute-profiling-step\" cannot be greater than train_iters') + if profiling_step < 5 or profiling_step > max_profiling_step: + print_rank_0(f"[WARNING] consider set \"adaptive-recompute-profiling-step\" value >=5" + f"and <={max_profiling_step}, or remove it.") + if profiling_step <= 0: + print_rank_0("[WARNING] \"adaptive-recompute-profiling-step\" value can not <=0, will use default value 10.") + profiling_step = 10 + print_rank_0( + "success to activate adaptive recompute train: adaptive-recompute-device-swap={}, adaptive-recompute-device-size={}, " + "adaptive-recompute-profiling-step={}".format(all_args.adaptive_recompute_device_swap, + all_args.adaptive_recompute_device_size, profiling_step)) + return True, profiling_step + + +def setup_model_and_optimizer_wrapper(setup_model_and_optimizer): + @wraps(setup_model_and_optimizer) + def wrapper(*args, **kargs): + models, optimizer, opt_param_scheduler = setup_model_and_optimizer(*args, **kargs) + activated, profile_step = is_activate_adaptive_recompute() + if not activated: + return models, optimizer, opt_param_scheduler + recomputing = get_adaptive_recomputing() + recomputing.set_profiling_step(profile_step) + recomputing.get_num_warmup_micro_batches(len(models)) + args = get_args() + if not args.swap_attention: + optimizer.step = recomputing.hook_step_func(optimizer.step, models) + if isinstance(models, list): + for index, model in enumerate(models): + recomputing.construct_context_recursive("module" + str(index), model, recomputing.context, True) + if not args.swap_attention: + recomputing.register_recursive_hook(model, recomputing.context, recomputing.profiling_prefix, + index == 0, index, prefetch=args.swap_attention) + else: + recomputing.construct_context_recursive("module", models, recomputing.context, True) + if not args.swap_attention: + recomputing.register_recursive_hook(models, recomputing.context, recomputing.profiling_prefix, \ + True, prefetch=args.swap_attention) + if args.swap_attention: + recomputing.prefetch_hook(models) + print_rank_0("ADAPTIVE-RECOMPUTE: successfully hooking module") + return models, optimizer, opt_param_scheduler + + return wrapper + + +def allowed_recomputing_module_wrapper(allowed_recomputing_module): + recomputing = get_adaptive_recomputing() + recomputing.add_allowed_recomputing_module(allowed_recomputing_module) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/adaptive_recompute_apply.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/adaptive_recompute_apply.py new file mode 100644 index 000000000..bacb5c17d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/adaptive_recompute_apply.py @@ -0,0 +1,200 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. +import torch +from megatron.core import tensor_parallel +from megatron.training import print_rank_0 +from megatron.training import get_args +from mindspeed.core.memory.adaptive_recomputing.swap_manager import SwapManager +from mindspeed.core.memory.adaptive_recomputing.prefetch import prefetch_tensor, prefetch_register_post_backward_hook, prefetch_register_pre_forward_hook, get_swap_prefetch, get_layer_id + + +class RecomputeHook: + recompute_hook = None + + def __init__(self): + self.recompute_modules = [] + + @staticmethod + def hook_checkpoint_forward(forward_func): + def custom_forward(*args, **kargs): + def inside_forward(*args): + return forward_func(*args, **kargs) + + return tensor_parallel.checkpoint(inside_forward, None, *args) + + return custom_forward + + def reset_recompute_modules(self): + for m in self.recompute_modules: + m.forward = m.no_checkpoint_adaptive_recompute_forward + self.recompute_modules.clear() + + +def get_recompute_hook(): + if RecomputeHook.recompute_hook is None: + RecomputeHook.recompute_hook = RecomputeHook() + return RecomputeHook.recompute_hook + + +class SwapManagerHook: + swap_hook = None + + def __init__(self): + self.tensor_layer_name_prefix = "" + self.pre_tensor_layer_name_prefix = "" + self.swap_manager_modules = [] + + @staticmethod + def unpack_hook(data): + return SwapManager().unwrap_tensor(data) + + def pack_hook(self, origin_tensor): + pre_tensor_is_allowed_swap = False + # enter diff layer, make other layer tensor status to can be swapped + if self.tensor_layer_name_prefix != self.pre_tensor_layer_name_prefix: + pre_tensor_is_allowed_swap = True + self.pre_tensor_layer_name_prefix = self.tensor_layer_name_prefix + return SwapManager().wrap_tensor(origin_tensor, pre_tensor_is_allowed_swap) + + def hook_swap_manager_forward(self, forward_func, layer_name_prefix): + def custom_forward(*args, **kargs): + self.tensor_layer_name_prefix = layer_name_prefix + with torch.autograd.graph.saved_tensors_hooks(self.pack_hook, self.unpack_hook): + return forward_func(*args, **kargs) + + return custom_forward + + def reset_tensor_layer_info(self): + self.tensor_layer_name_prefix = "" + self.pre_tensor_layer_name_prefix = "" + + def reset_swap_manager_modules(self): + for m in self.swap_manager_modules: + m.forward = m.no_checkpoint_swap_forward + self.swap_manager_modules.clear() + + +def get_swap_hook(): + if SwapManagerHook.swap_hook is None: + SwapManagerHook.swap_hook = SwapManagerHook() + return SwapManagerHook.swap_hook + + +def register_recursive_apply(config, models, ctx): + pre_layer_ctx = config["pre_layer_ctx"] + cur_layer_name = config["cur_layer_name"] + if cur_layer_name == "module" and isinstance(models, list): + idx = 0 + for model in models: + register_recursive_apply(config, model, get_list_layers_context(ctx, idx)) + idx += 1 + return + + if 'recompute' in ctx and ctx['recompute']: + models.no_checkpoint_adaptive_recompute_forward = models.forward + models.forward = get_recompute_hook().hook_checkpoint_forward(models.forward) + get_recompute_hook().recompute_modules.append(models) + return + + if 'allowed_recomputing' in pre_layer_ctx: + models.no_checkpoint_swap_forward = models.forward + models.forward = get_swap_hook().hook_swap_manager_forward(models.forward, ctx["prefix_name"]) + get_swap_hook().swap_manager_modules.append(models) + return + + idx = 0 + for name, module in models.named_children(): + config = { + "pre_layer_ctx": ctx, + "cur_layer_name": name, + } + register_recursive_apply(config, module, ctx['layers'][idx]) + idx += 1 + + +def is_hook_layer(ctx, hook_list): + if "name" in ctx and ctx["name"] in hook_list and "expert" not in ctx['prefix_name']: + return True + return False + + +def is_recompute_layer(ctx, prefetch_list): + if "name" in ctx and "mlp" == ctx["name"] and get_layer_id(ctx["prefix_name"]) in prefetch_list: + return True + return False + + +def register_recursive_apply_prefetch(config, models, ctx, prefetch_recompute_group, prefetch_args): + args = get_args() + prefetch_list, hook_list, recompute_list = prefetch_recompute_group + if not isinstance(prefetch_list[0], list): + prefetch_layer = prefetch_list + hook_layer = hook_list + recompute_layer = recompute_list + + pre_layer_full_name = config["pre_layer_full_name"] + pre_layer_ctx = config["pre_layer_ctx"] + cur_layer_name = config["cur_layer_name"] + if cur_layer_name == "module" and isinstance(models, list): + idx = 0 + for model in models: + prefetch_layer = prefetch_list[idx] if isinstance(prefetch_list[0], list) else prefetch_list + hook_layer = hook_list[idx] if isinstance(hook_list[0], list) else hook_list + recompute_layer = recompute_list[idx] if isinstance(recompute_list[0], list) else recompute_list + print_rank_0(f'prefetch_layer: {prefetch_layer}---{hook_layer}') + if any(filter(None, prefetch_layer)): + prefetch_recompute_group = [prefetch_layer, hook_layer, recompute_layer] + register_recursive_apply_prefetch(config, model, get_list_layers_context(ctx, idx), + prefetch_recompute_group, prefetch_args) + idx += 1 + return + + if is_hook_layer(ctx, hook_list): + print_rank_0(f"prefetch forward and backward hook success: {pre_layer_full_name + '.' + cur_layer_name}") + prefetch_register_post_backward_hook(models, pre_layer_full_name + '.' + cur_layer_name, prefetch_args) + prefetch_register_pre_forward_hook(models, pre_layer_full_name + '.' + cur_layer_name, prefetch_args) + if hook_list == prefetch_list and prefetch_list != ['']: + if "name" in ctx and ctx["name"] in args.swap_modules and \ + get_layer_id(ctx["prefix_name"]) in prefetch_list: + print_rank_0(f"prefetch swap hook success: {pre_layer_full_name + '.' + cur_layer_name}") + models.no_checkpoint_adaptive_recompute_forward = models.forward + models.forward = get_swap_prefetch(prefetch_args).hook_swap_manager_forward(models.forward, + pre_layer_full_name + + '.' + cur_layer_name) + get_recompute_hook().recompute_modules.append(models) + return + elif is_recompute_layer(ctx, recompute_list): + print_rank_0(f"prefetch recompute hook success: {pre_layer_full_name + '.' + cur_layer_name}") + models.no_checkpoint_adaptive_recompute_forward = models.forward + models.forward = get_recompute_hook().hook_checkpoint_forward(models.forward) + get_recompute_hook().recompute_modules.append(models) + return + else: + if is_hook_layer(ctx, prefetch_list): + print_rank_0(f"prefetch tensor hook success: {pre_layer_full_name + '.' + cur_layer_name}") + models.no_checkpoint_adaptive_recompute_forward = models.forward + models.forward = get_swap_prefetch(prefetch_args).hook_swap_manager_forward(models.forward, + pre_layer_full_name + + '.' + cur_layer_name) + get_recompute_hook().recompute_modules.append(models) + return + pre_layer_full_name += "." + cur_layer_name if pre_layer_full_name != "" else cur_layer_name + idx = 0 + for name, module in models.named_children(): + config = { + "pre_layer_full_name": pre_layer_full_name, + "pre_layer_ctx": ctx, + "cur_layer_name": name, + } + prefetch_recompute_group = [prefetch_layer, hook_layer, recompute_layer] + register_recursive_apply_prefetch(config, module, ctx['layers'][idx], prefetch_recompute_group, prefetch_args) + idx += 1 + + +def get_list_layers_context(ctx, idx): + current_ctx = {} + for k, v in ctx.items(): + if k == "layers": + current_ctx[k] = [v[idx]] + continue + current_ctx[k] = v + return current_ctx \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/adaptive_recompute_solver.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/adaptive_recompute_solver.py new file mode 100644 index 000000000..ac6578dda --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/adaptive_recompute_solver.py @@ -0,0 +1,574 @@ +import sys +from copy import deepcopy + +import networkx as nx +import torch +import numpy as np + +from megatron.training import print_rank_0 +from megatron.core import parallel_state + + +class GraphSolver: + graph_solver = None + + def __init__(self): + self.total_recompute_cost = 0 + self.total_forward_cost = 0 + self.num_layers_module = [] + self.layers_num = 0 + self.transformer_module_memory = 0 + self.recompute_policy = {} + self.layers_combination = [] + self.layer_full_recompute_combination = None + self.layer_without_recompute_combination = None + self.layer_recompute_one_combination = None + self.module_layers = {} + self.node_split_flag = ", " + self.without_recompute_info = "" + self.all_recompute_info = "" + self.selective_recompute_info = "" + self.final_policy_info = "" + self.static_memory = 0 + self.pp = 1 + self.module_chunk = 1 + self.chp_input = 0 + self.chp_time = 0 + self.full_activation = 0 + self.first_layer_module = None + self.mp = 1 + self.dp = 1 + + @staticmethod + def get_dg(module_layers): + dg = nx.DiGraph() + dg.add_nodes_from([ + (i, {"name": module_layers[i]['name'], + "mem": module_layers[i]['memory'], + "input": module_layers[i]['input'], + "compute": module_layers[i]['time'], + "recompute": False, + "status": "no_status"}) + for i in range(len(module_layers)) + ]) + dg.add_edges_from([ + (i, i + 1) for i in range(len(module_layers) - 1) + ]) + return dg + + def broadcast_in_mp_dp(self, tensor, src): + if self.mp > 1 and parallel_state.get_tensor_model_parallel_src_rank() == src: + torch.distributed.broadcast(tensor, + src=parallel_state.get_tensor_model_parallel_src_rank(), + group=parallel_state.get_tensor_model_parallel_group()) + if self.dp > 1: + torch.distributed.broadcast(tensor, src=parallel_state.get_data_parallel_src_rank(), + group=parallel_state.get_data_parallel_group()) + + def get_no_recompute_layer(self): + self.first_layer_module = self.num_layers_module[0]['layers'][0] + layer_module = self.first_layer_module['layers'] + module_layers = [] + if len(layer_module) == 0: + return module_layers + parent_layers = [] + for layer in layer_module: + if "memory" not in layer: + continue + module_layers.append(layer) + parent_layers.append(layer) + if "layers" not in layer: + continue + parent_name = layer['name'] + sub_layer_name = [] + for sub_layer in layer['layers']: + if "memory" not in sub_layer: + continue + # rename sub_layer name, like 'self_attention.core_attention' + sub_layer['name'] = "{}.{}".format(parent_name, sub_layer['name']) + module_layers.append(sub_layer) + sub_layer_name.append(sub_layer) + self.module_layers.update({parent_name: sub_layer_name}) + self.module_layers.update({"parent_layers": parent_layers}) + self.module_layers.update({"module_layers": module_layers}) + return + + # remove full select node, like 'input_layernorm', 'self_attention', 'post_attention_layernorm' and 'mlp' in list + def remove_full_selective_node(self, recompute_nodes): + if len(recompute_nodes) == 0: + return recompute_nodes + + layers_recompute_count = 0 + try: + for layer in self.module_layers["parent_layers"]: + name = layer['name'] + if name in recompute_nodes: + layers_recompute_count += 1 + if layers_recompute_count == len(self.module_layers["parent_layers"]): + recompute_nodes.clear() + break + if name not in self.module_layers.keys(): + continue + sub_layers_recompute_count = 0 + for sub_layer in self.module_layers[name]: + if sub_layer['name'] in recompute_nodes: + sub_layers_recompute_count += 1 + if sub_layers_recompute_count == len(self.module_layers[name]): + recompute_nodes.clear() + break + except KeyError: + print_rank_0("[ERROR] Some of these keys don't exist.") + return recompute_nodes + + def get_recompute_op(self, graph): + recompute_nodes = [] + p_node = [] + for node in graph.nodes: + if not graph.nodes[node]['recompute']: + continue + name = graph.nodes[node]['name'] + recompute_nodes.append(name) + spd = name.split(".") + if len(spd) == 2 and spd[0] not in p_node: + p_node.append(spd[0]) + # remove parent and sub in list together, like 'self_attention' and 'self_attention.core_attention' in list + for n in p_node: + if n in recompute_nodes: + recompute_nodes.clear() + break + return self.remove_full_selective_node(recompute_nodes) + + def broadcast_recompute_policy(self, recompute_policy_list): + try: + self.mp = parallel_state.get_tensor_model_parallel_world_size() + self.dp = parallel_state.get_data_parallel_world_size() + except: + print_rank_0("WARNING: mp, dp is not defined") + global_rank = torch.distributed.get_rank() + src = (global_rank // (self.mp * self.dp)) * self.dp * self.mp + + policy_shape = np.array(recompute_policy_list).shape + policy_len_tensor = torch.tensor(policy_shape, device=torch.npu.current_device()) + self.broadcast_in_mp_dp(policy_len_tensor, src) + policy_len = tuple(policy_len_tensor.cpu().numpy().tolist()) + if global_rank == src: + recompute_policy_tensor = torch.tensor(recompute_policy_list, dtype=torch.int8, + device=torch.npu.current_device()) + else: + recompute_policy_tensor = torch.empty(policy_len, dtype=torch.int8, + device=torch.npu.current_device()) + + self.broadcast_in_mp_dp(recompute_policy_tensor, src) + result = recompute_policy_tensor.cpu().numpy().tolist() + del recompute_policy_tensor + return result + + def set_recompute_info_to_module(self, module, recompute_nodes_info): + for sub_module in module: + name = sub_module["name"] + if name not in recompute_nodes_info.keys(): + continue + info = recompute_nodes_info[name] + if isinstance(info, bool): + sub_module["recompute"] = info + continue + if "child_module" in info.keys(): + self.set_recompute_info_to_module(sub_module["layers"], info["child_module"]) + continue + if name in info.keys(): + sub_module["recompute"] = info[name] + + def covert_recompute_node_idx_to_name(self, recompute_nodes): + result = {} + try: + module_layers = self.module_layers["module_layers"] + except KeyError as e: + print_rank_0("[ERROR] The key \"module_layers\" doesn't exist.") + raise e + for i, node in enumerate(recompute_nodes): + if node != self.layer_recompute_one_combination.broadcast_value: + continue + name = module_layers[i]["name"] + parent_name = name + sub_name = "" + if "." in name: + parent_name, sub_name = name.split(".") + if parent_name not in result.keys(): + result[parent_name] = {} + if sub_name == "": + result[parent_name].update({name: True}) + continue + if "child_module" not in result[parent_name].keys(): + result[parent_name]["child_module"] = {} + result[parent_name]["child_module"].update({name: True, sub_name: True}) + return result + + def set_to_module(self, module, recompute_nodes, idx): + if len(recompute_nodes) == 0: + module["recompute"] = True + return + recompute_nodes_info = self.covert_recompute_node_idx_to_name(recompute_nodes) + if len(recompute_nodes_info) == 0: + return + self.set_recompute_info_to_module(module["layers"], recompute_nodes_info) + + def apply_policy_to_model(self, recompute_policy_list): + full_layers = [] + for layer in self.num_layers_module: + if 'is_module_list' in layer: + full_layers.extend(layer["layers"]) + else: + full_layers.append(layer) + if len(recompute_policy_list) == 0: + return + idx = 0 + if (recompute_policy_list[-1][2] == self.layer_full_recompute_combination.broadcast_value + or recompute_policy_list[0][2] == self.layer_without_recompute_combination.broadcast_value): + recompute_policy_list = list(reversed(recompute_policy_list)) + for policy in recompute_policy_list: + n = policy[0] + combination_idx = policy[1] + recompute_nodes = [] + if policy[2] == self.layer_without_recompute_combination.broadcast_value: + status = self.layer_without_recompute_combination.broadcast_value + try: + recompute_nodes = [status for _ in range(len(self.module_layers["module_layers"]))] + except KeyError: + print_rank_0("[ERROR] The key \"module_layers\" doesn't exist.") + if policy[2] == self.layer_recompute_one_combination.broadcast_value: + recompute_nodes = policy[3:] + for i in range(idx, idx + n): + self.set_to_module(full_layers[i], recompute_nodes, combination_idx) + idx += n + + # minimize the number of memory, results in all recompute + def calculate_cost_mem(self, g: nx.DiGraph, idx): + subtotal_cost = 0 + subtotal_compute_cost = 0 + memory_cost = (g.nodes[idx]['mem'] if not g.nodes[idx]['recompute'] else g.nodes[idx]['input']) + compute_cost = (g.nodes[idx]['compute'] if g.nodes[idx]['recompute'] else 0) + + successors = g.successors(idx) + for successor in successors: + a, b = self.calculate_cost_mem(g, successor) + subtotal_cost += a + subtotal_compute_cost += b + + return subtotal_cost + memory_cost, subtotal_compute_cost + compute_cost + + def cal_non_transformer_memory(self, model_context, num_model_chunks): + # total memory used + model_memory = 0 + for layer in model_context['layers']: + model_memory += layer['memory'] + break + non_size = (model_memory - self.transformer_module_memory) * num_model_chunks + return non_size + + def reset_cost(self, g: nx.DiGraph, idx, reset_node_name): + node_name = g.nodes[idx]['name'] + if node_name in reset_node_name: + g.nodes[idx]['mem'] = 0 + g.nodes[idx]['input'] = 0 + g.nodes[idx]['compute'] = 0 + successors = g.successors(idx) + for successor in successors: + self.reset_cost(g, successor, reset_node_name) + return + + # remove dg redundant nodes, like: self_attention and self_attention.core_attention, remove one + def reset_redundant_nodes(self, dg, recompute_nodes): + dg_copy = deepcopy(dg) + reset_node_name = [] + try: + for parent_layer in self.module_layers["parent_layers"]: + parent_name = parent_layer['name'] + if parent_name not in self.module_layers.keys(): + continue + sub_reset_node_name = [] + for sub_layer in self.module_layers[parent_name]: + sub_reset_node_name.append(sub_layer['name']) + if sub_layer['name'] in recompute_nodes: + reset_node_name.append(parent_name) + sub_reset_node_name.clear() + break + if len(sub_reset_node_name) != 0: + reset_node_name.extend(sub_reset_node_name) + except KeyError: + print_rank_0("[ERROR] The key \"parent_layers\" doesn't exist.") + self.reset_cost(dg_copy, 0, reset_node_name) + return dg_copy + + def layers_combination_init(self, g, idx): + if idx == 0: + self.layer_full_recompute_combination = LayerCombination({ + "name": "full_recompute", + "num": self.layers_num, + "memory": self.chp_input, + "cost": self.chp_time, + "broadcast_value": 0, + "policy_name": "n_full" + }) + self.layers_combination.append(self.layer_full_recompute_combination) + self.layer_without_recompute_combination = LayerCombination({ + "name": "without_recompute", + "num": self.layers_num, + "memory": self.full_activation, + "cost": 0, + "broadcast_value": 2, + "policy_name": "n_without" + }) + self.layers_combination.append(self.layer_without_recompute_combination) + try: + if idx >= len(self.module_layers["module_layers"]): + recompute_nodes = self.get_recompute_op(g) + if len(recompute_nodes) == 0: + return + dg = self.reset_redundant_nodes(g, recompute_nodes) + stash_mem_per_layer, recompute_cost = self.calculate_cost_mem(dg, 0) + self.layer_recompute_one_combination = LayerCombination({ + "name": self.node_split_flag.join(recompute_nodes), + "num": self.layers_num, + "memory": stash_mem_per_layer, + "cost": recompute_cost, + "broadcast_value": 1, + "policy_name": "n_selective" + }) + self.layers_combination.append(self.layer_recompute_one_combination) + return + except KeyError: + print_rank_0("[ERROR] The key \"module_layers\" doesn't exist.") + if g.nodes[idx]['mem'] >= g.nodes[idx]['input']: + g.nodes[idx]['recompute'] = True + self.layers_combination_init(g, idx + 1) + g.nodes[idx]['recompute'] = False + self.layers_combination_init(g, idx + 1) + + def get_max_goods_value(self, idx, ans, device_memory): + i, j, k = idx[0], idx[1], idx[2] + pre_step_ans = ans[i - 1][j - k] + if k == 0: + return pre_step_ans + + goods_value = ans[i][j] + memory = pre_step_ans.memory + k * self.layers_combination[i].memory + cost = pre_step_ans.cost + k * self.layers_combination[i].cost + if pre_step_ans.cost == float('inf'): + cost = k * self.layers_combination[i].cost + try: + device_memory = max(device_memory - self.static_memory, 0) / self.pp + except ZeroDivisionError: + device_memory = max(device_memory - self.static_memory, 0) + print_rank_0("[ERROR] pipeline model parallel world size is 0. ") + + if device_memory >= memory and cost <= goods_value.cost: + goods_value.memory = memory + goods_value.cost = cost + goods_value.layer_names.clear() + if len(pre_step_ans.layer_names) > 0: + goods_value.layer_names.extend(pre_step_ans.layer_names) + goods_value.layer_names.extend(self.layers_combination[i].name for _ in range(k)) + + return goods_value + + def print_recompute_policy(self, memory, cost): + fmt_str = "With selective recompute:\n" + for k, v in self.recompute_policy.items(): + if k == self.layer_full_recompute_combination.name: + policy_name = self.layer_full_recompute_combination.policy_name + elif k == self.layer_without_recompute_combination.name: + policy_name = self.layer_without_recompute_combination.policy_name + else: + policy_name = self.layer_recompute_one_combination.policy_name + fmt_str += "recomputeNodes=[{}], ".format(k) + fmt_str += "{} {}; ".format(v, policy_name) + all_recompute_cost = self.layers_num * self.layer_full_recompute_combination.cost + try: + performance = (all_recompute_cost - cost) / (all_recompute_cost * 4) + except ZeroDivisionError: + performance = 0 + print_rank_0("[ERROR] all recompute cost is 0. ") + fmt_str = fmt_str.strip().rstrip(";") + fmt_str += "\ntotal mem cost: {:.1f} GiB + {:.1f} GiB, speed up compared with all recompute {:.2%}".format( + self.static_memory / 1024, memory * self.pp / 1024, performance) + self.selective_recompute_info = fmt_str + + def get_all_layer_policy(self, combination_num, layer_num, ans): + layer_nodes = [self.layer_full_recompute_combination.name for _ in range(layer_num)] + memory = layer_num * self.layer_full_recompute_combination.memory + cost = layer_num * self.layer_full_recompute_combination.cost + for i in range(layer_num, 0, -1): + size = layer_num - len(ans[combination_num][i].layer_names) + if size != layer_num: + l_nodes = [] + l_nodes.extend(ans[combination_num][i].layer_names) + # if the policies of all layers are not found, the remaining layers ues all recompute policy. + l_nodes.extend(self.layer_full_recompute_combination.name for _ in range(size)) + l_memory = ans[combination_num][i].memory + size * self.layer_full_recompute_combination.memory + l_cost = ans[combination_num][i].cost + size * self.layer_full_recompute_combination.cost + if l_cost < cost: + cost = l_cost + memory = l_memory + layer_nodes.clear() + layer_nodes.extend(l_nodes) + + for nodes in layer_nodes: + if nodes not in self.recompute_policy.keys(): + self.recompute_policy.update({nodes: 1}) + continue + self.recompute_policy.update({nodes: self.recompute_policy[nodes] + 1}) + + self.print_recompute_policy(memory, cost) + + def knapsack_best(self, device_memory): + combination_num = len(self.layers_combination) - 1 + if self.layers_combination[0] is not None: + combination_num = len(self.layers_combination) + # make combination index id begin for 1. + self.layers_combination.insert(0, None) + # init ans + ans = [[GoodsValue() for _ in range(self.layers_num + 1)] for _ in range(combination_num + 1)] + # find max goods value + for i in range(1, combination_num + 1): + for j in range(self.layers_num + 1): + k = 0 + while k <= self.layers_combination[i].num and k <= j: + ans[i][j] = self.get_max_goods_value([i, j, k], ans, device_memory) + k += 1 + self.get_all_layer_policy(combination_num, self.layers_num, ans) + + def get_combination_idx(self, nodes_name): + for i in range(len(self.layers_combination)): + if self.layers_combination[i] is None: + continue + if nodes_name == self.layers_combination[i].name: + return i + return -1 + + def analyse_policy_to_list(self): + recompute_policy_list = [] + module_layers = [] + try: + module_layers = self.module_layers["module_layers"] + except KeyError: + print_rank_0("[ERROR] The key \"module_layers\" doesn't exist.") + module_layers_num = len(module_layers) + for nodes_name, v in self.recompute_policy.items(): + idx = self.get_combination_idx(nodes_name) + nodes_count = [v, idx] + if nodes_name == self.layer_without_recompute_combination.name: + broadcast_value = self.layer_without_recompute_combination.broadcast_value + nodes_count.extend(broadcast_value for _ in range(module_layers_num + 1)) + elif nodes_name == self.layer_full_recompute_combination.name: + broadcast_value = self.layer_full_recompute_combination.broadcast_value + nodes_count.extend(broadcast_value for _ in range(module_layers_num + 1)) + else: + nodes_count.append(self.layer_recompute_one_combination.broadcast_value) + recompute_nodes = nodes_name.split(self.node_split_flag) + for layer in module_layers: + if layer["name"] in recompute_nodes: + nodes_count.append(self.layer_recompute_one_combination.broadcast_value) + continue + nodes_count.append(self.layer_without_recompute_combination.broadcast_value) + recompute_policy_list.append(nodes_count) + return recompute_policy_list + + def print_list_to_policy(self, recompute_policy_list): + try: + module_layers = self.module_layers["module_layers"] + except KeyError as e: + print_rank_0("[ERROR] The key \"module_layers\" doesn't exist.") + raise e + module_layers_num = len(module_layers) + if len(recompute_policy_list) == 0: + return + fmt_str = ">> final selective strategy <<\n" + for policy in recompute_policy_list: + n = policy[0] + if policy[2] == self.layer_without_recompute_combination.broadcast_value: + policy_name = self.layer_without_recompute_combination.policy_name + elif policy[2] == self.layer_full_recompute_combination.broadcast_value: + policy_name = self.layer_full_recompute_combination.policy_name + else: + policy_name = self.layer_recompute_one_combination.policy_name + policy = policy[3:] + nodes = [] + for i in range(module_layers_num): + if policy[i] == self.layer_recompute_one_combination.broadcast_value: + nodes.append(module_layers[i]["name"]) + fmt_str += "recomputeNodes=[{}], ".format(self.node_split_flag.join(nodes)) + fmt_str += "{} {}\n".format(n, policy_name) + self.final_policy_info = fmt_str.rstrip("\n") + + def get_layers_module(self, model, parent_ctx): + if 'is_recomputing_layer' in model: + if 'is_module_list' in model and 'memory' in parent_ctx: + self.transformer_module_memory += parent_ctx['memory'] + elif 'is_module_list' not in model and 'memory' in model: + self.transformer_module_memory += model['memory'] + self.num_layers_module.append(model) + if "layers" in model: + self.layers_num += len(model["layers"]) + return + if "layers" not in model: + return + for sub_model in model["layers"]: + self.get_layers_module(sub_model, model) + + def build_solver_info(self, model_context, pp, num_model_chunks): + self.pp = max(self.pp, pp) + self.get_layers_module(model_context, "") + self.total_recompute_cost = sys.maxsize + # first layer is not recompute + self.get_no_recompute_layer() + self.chp_input = self.first_layer_module['input'] + self.chp_time = self.first_layer_module['time'] + self.full_activation = self.first_layer_module['memory'] + self.module_chunk = len(model_context['layers']) + self.total_forward_cost = self.chp_time * self.layers_num + self.static_memory = model_context['used_mem'] + self.cal_non_transformer_memory(model_context, + num_model_chunks) + + parent_dg = self.get_dg(self.module_layers["parent_layers"]) + stash_mem_per_layer, _ = self.calculate_cost_mem(parent_dg, 0) + stash_mem_total = stash_mem_per_layer * self.layers_num + self.without_recompute_info = ( + f"Without recompute: total mem cost: {self.static_memory / 1024:.1f} GiB + " + f"{stash_mem_total / 1024:.1f} GiB, total recompute 0, speed up over all recompute 25%") + stash_mem_total = self.chp_input * self.layers_num + self.all_recompute_info = (f"With all recompute: total mem cost: {self.static_memory / 1024:.1f} GiB + " + f"{stash_mem_total / 1024:.1f} GiB, total recompute all") + + dg = self.get_dg(self.module_layers["module_layers"]) + self.layers_combination_init(dg, 0) + + def get_policy(self, device_memory): + self.recompute_policy.clear() + self.knapsack_best(device_memory) + recompute_policy_list = self.analyse_policy_to_list() + if torch.distributed.is_initialized(): + recompute_policy_list = self.broadcast_recompute_policy(recompute_policy_list) + return recompute_policy_list + + +def get_graph_solver(): + if GraphSolver.graph_solver is None: + GraphSolver.graph_solver = GraphSolver() + return GraphSolver.graph_solver + + +class LayerCombination: + def __init__(self, config): + self.name = config["name"] + self.num = config["num"] + self.memory = config["memory"] + self.cost = config["cost"] + self.broadcast_value = config["broadcast_value"] + self.policy_name = config["policy_name"] + + +class GoodsValue: + def __init__(self): + self.layer_names = [] + self.memory = 0 + self.cost = float('inf') diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/pluggable_allocator_adpator.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/pluggable_allocator_adpator.py new file mode 100644 index 000000000..3ff356506 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/pluggable_allocator_adpator.py @@ -0,0 +1,24 @@ +import ctypes + +import torch_npu + +from mindspeed.op_builder import AdaptiveRecomputingPluggableAllocatorBuilder + + +def change_allocator(): + adaptive_recompute_module = AdaptiveRecomputingPluggableAllocatorBuilder().load() + adaptive_recompute_module_path = adaptive_recompute_module.__file__ + + new_alloc = torch_npu.npu.memory.NPUPluggableAllocator(adaptive_recompute_module_path, 'my_malloc', 'my_free') + torch_npu.npu.memory.change_current_allocator(new_alloc) + + myallocator = ctypes.CDLL(adaptive_recompute_module_path) + init_fn = ctypes.cast(getattr(myallocator, "my_init"), ctypes.c_void_p).value + empty_fn = ctypes.cast(getattr(myallocator, "my_empty_cache"), ctypes.c_void_p).value + get_device_stats_fn = ctypes.cast(getattr(myallocator, "my_get_device_stats"), ctypes.c_void_p).value + reset_peak_stats_fn = ctypes.cast(getattr(myallocator, "my_reset_peak_stats"), ctypes.c_void_p).value + + new_alloc.allocator().set_init_fn(init_fn) + new_alloc.allocator().set_reset_fn(empty_fn) + new_alloc.allocator().set_get_device_stats_fn(get_device_stats_fn) + new_alloc.allocator().set_reset_peak_status_fn(reset_peak_stats_fn) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/prefetch.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/prefetch.py new file mode 100644 index 000000000..4bbc1bd92 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/prefetch.py @@ -0,0 +1,328 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. +import re +import torch +import torch_npu +from megatron.training import get_args + + +def get_layer_id(name): + if name: + matches = re.findall(r'\.(\d+)\.?', str(name)) + if matches: + return matches[0] + return -1 + return -1 + + +class SwapTensor: + def __init__(self, tensor, layer_name): + self.tensor = tensor + self.size = tensor.size() + self.storage_size = tensor.storage().size() + self.tensor_cpu = torch.empty(tensor.shape, dtype=tensor.dtype, pin_memory=True, device='cpu') + + self.d2h_event = None + self.h2d_event = torch.npu.Event() + + self.stat = "device" + self.layer_name = layer_name + + self.prefetch_data_ptr = tensor.data_ptr() + self.storage_data_ptr = tensor.storage().data_ptr() + self.layer_id = None + self.first_tensor = False + self.last_tensor = False + self.is_slice_tensor = tensor.storage().size() != tensor.numel() + self.stream = None + self.layer_index = 0 + + # device to host + def launch_d2h(self, stream): + if self.stat != "device": + return + forward_event = torch.npu.Event() + forward_event.record() + with torch.no_grad(): + with torch_npu.npu.stream(stream): + stream.wait_event(forward_event) + if self.is_slice_tensor: + self.tensor_cpu.copy_(self.tensor, non_blocking=True) + else: + self.tensor_cpu.storage().copy_(self.tensor.storage(), non_blocking=True) + self.stat = "d2h" + + # synchronize d2h and resize 0 + def wait_d2h_finished(self, stream, need_wait=False): + if self.stat != "d2h": + return + if need_wait: + torch.npu.current_stream().wait_stream(stream) + torch.npu.default_stream().wait_stream(stream) + self.tensor.storage().resize_(0) + self.stat = "host" + + # resize storage_size and host to device + def launch_h2d(self, stream, flag): + if self.stat != "host": + return + backward_event = torch.npu.Event() + backward_event.record() + if flag: + self.tensor.storage().resize_(self.storage_size) + with torch.no_grad(): + with torch_npu.npu.stream(stream): + stream.wait_event(backward_event) + if self.is_slice_tensor: + self.tensor.copy_(self.tensor_cpu, non_blocking=True) + else: + self.tensor.storage().copy_(self.tensor_cpu.storage(), non_blocking=True) + self.h2d_event.record() + self.stat = "h2d" + + # synchronize h2d + def wait_h2d_finished(self, stream, need_wait=False): + if self.stat != "h2d": + return + if need_wait: + torch.npu.current_stream().wait_stream(stream) + torch.npu.default_stream().wait_stream(stream) + self.stat = "device" + + +class SwapPrefetch: + swap_prefetch = None + + def __init__(self, prefetch_args): + swap_list, vpp, interval, num_prefetch = prefetch_args + all_args = get_args() + self.prefetch_stream = torch_npu.npu.Stream(device=torch.npu.current_device()) + self.pp = all_args.pipeline_model_parallel_size + self.vpp = min(vpp, num_prefetch) + self.first_layer_id = 0 + if isinstance(all_args.noop_layers, set): + for layer_id in swap_list[0]: + if layer_id != '': + self.first_layer_id = int(layer_id) + break + + self.swap_tensors = [] + self.layer_name = "" + + self.data_ptr = {} + self.prefetch_list = [] + self.prefetch_data_ptr_list = [] + self.cur_micro_num = 0 + self.remove_num = 0 + self.forward_flag = False + self.interval = interval + self.slice_tensor_storage_ptr = {} + self.slice_tensor_storage_ptr_list = [] + self.eval_end_flag = False + + @staticmethod + def no_swap_tensor(ori_tensor): + if ori_tensor.numel() * ori_tensor.element_size() * 2 < 1024 * 1024: + return True + if ori_tensor.grad_fn is None: + return True + if ori_tensor.storage().size() == 0: + return True + if ori_tensor.storage().size() != ori_tensor.numel(): + return True + if ori_tensor._base is not None and ori_tensor._base.dim() >= 5: + return True + + return False + + def pack_hook(self, ori_tensor): + args = get_args() + if args.eval_interval: + if args.curr_iteration % args.eval_interval != 0: + self.eval_end_flag = False + if args.curr_iteration and args.curr_iteration % args.eval_interval == 0 and not self.eval_end_flag: + self.prefetch_data_ptr_list = [] + self.prefetch_list = [] + self.slice_tensor_storage_ptr_list = [] + self.eval_end_flag = True + + if self.no_swap_tensor(ori_tensor): + return ori_tensor + swap_tensor = SwapTensor(ori_tensor, self.layer_name) + if not self.swap_tensors: + swap_tensor.first_tensor = True + # Records the slice tensor status. + if ori_tensor.storage().size() != ori_tensor.numel(): + swap_tensor.is_slice_tensor = True + if ori_tensor.storage().data_ptr() not in self.slice_tensor_storage_ptr: + if self.swap_tensors and self.swap_tensors[0].layer_id != 0: + self.slice_tensor_storage_ptr[ori_tensor.storage().data_ptr()] = \ + [f'{len(self.prefetch_list) - 1}_{len(self.swap_tensors)}'] + else: + self.slice_tensor_storage_ptr[ori_tensor.storage().data_ptr()] = \ + [f'{len(self.prefetch_list)}_{len(self.swap_tensors)}'] + else: + if self.swap_tensors and self.swap_tensors[0].layer_id != 0: + self.slice_tensor_storage_ptr[ori_tensor.storage().data_ptr()].append( + f'{len(self.prefetch_list) - 1}_{len(self.swap_tensors)}') + else: + self.slice_tensor_storage_ptr[ori_tensor.storage().data_ptr()].append( + f'{len(self.prefetch_list)}_{len(self.swap_tensors)}') + + # Records the same data_ptr tensor status. + if ori_tensor.storage().data_ptr() in self.data_ptr: + self.swap_tensors[self.data_ptr[ori_tensor.storage().data_ptr()]].stat = 'h2d' + swap_tensor.stat = 'd2h' + swap_tensor.tensor_cpu = self.swap_tensors[self.data_ptr[ori_tensor.storage().data_ptr()]].tensor_cpu + self.data_ptr[ori_tensor.storage().data_ptr()] = len(self.swap_tensors) + else: + self.data_ptr[ori_tensor.storage().data_ptr()] = len(self.swap_tensors) + + swap_tensor.launch_d2h(self.prefetch_stream) + swap_tensor.stream = self.prefetch_stream + swap_tensor.layer_id = int(get_layer_id(swap_tensor.layer_name)) + self.swap_tensors.append(swap_tensor) + self.forward_flag = True + return swap_tensor + + def unpack_hook(self, swap_tensor): + if isinstance(swap_tensor, torch.Tensor): + return swap_tensor + swap_tensor.wait_h2d_finished(self.prefetch_stream, swap_tensor.last_tensor) + self.prefetch_list[self.cur_micro_num][swap_tensor.layer_index].remove(swap_tensor) + # Remove prefetch completed list + if len(self.prefetch_list[self.cur_micro_num][swap_tensor.layer_index]) == 0: + self.prefetch_list[self.cur_micro_num].remove( + self.prefetch_list[self.cur_micro_num][swap_tensor.layer_index]) + self.prefetch_data_ptr_list[self.cur_micro_num].remove( + self.prefetch_data_ptr_list[self.cur_micro_num][swap_tensor.layer_index]) + self.slice_tensor_storage_ptr_list[self.cur_micro_num].remove( + self.slice_tensor_storage_ptr_list[self.cur_micro_num][swap_tensor.layer_index]) + if len(self.prefetch_list[self.cur_micro_num]) == 0: + self.prefetch_list.remove(self.prefetch_list[self.cur_micro_num]) + self.prefetch_data_ptr_list.remove(self.prefetch_data_ptr_list[self.cur_micro_num]) + self.slice_tensor_storage_ptr_list.remove(self.slice_tensor_storage_ptr_list[self.cur_micro_num]) + self.remove_num += 1 + if self.remove_num // self.pp == self.vpp: + self.remove_num = 0 + self.forward_flag = False + return swap_tensor.tensor + + def hook_swap_manager_forward(self, forward_func, layer_name): + def custom_forward(*args, **kargs): + self.layer_name = layer_name + with torch.autograd.graph.saved_tensors_hooks(self.pack_hook, self.unpack_hook): + return forward_func(*args, **kargs) + + return custom_forward + + def update_slice_tensor_stat(self, swap_tensor): + if swap_tensor.is_slice_tensor and swap_tensor.storage_data_ptr in self.slice_tensor_storage_ptr: + _, index = self.slice_tensor_storage_ptr[swap_tensor.storage_data_ptr][0].split('_') + if swap_tensor != self.swap_tensors[int(index)]: + swap_tensor.stat = 'host' + return False + return True + + def sync_d2h(self, module_name): + if not self.swap_tensors: + return + if self.swap_tensors[0].layer_id <= self.first_layer_id: + self.first_layer_id = self.swap_tensors[0].layer_id + elif self.prefetch_list and self.swap_tensors[0].layer_id <= self.prefetch_list[-1][-1][-1].layer_id: + self.first_layer_id = self.swap_tensors[0].layer_id + first_resize_tensor = False + for swap_tensor in self.swap_tensors: + if self.swap_tensors[0].layer_id > self.first_layer_id and self.prefetch_list: + swap_tensor.layer_index = len(self.prefetch_list[-1]) + if swap_tensor.layer_id == int(get_layer_id(module_name)) \ + and swap_tensor.stat == "d2h": + if not self.update_slice_tensor_stat(swap_tensor): + continue + if not first_resize_tensor: + swap_tensor.first_tensor = True + first_resize_tensor = True + # During synchronization, let the first tensor wait for d2h + swap_tensor.wait_d2h_finished(swap_tensor.stream, swap_tensor.first_tensor) + self.swap_tensors[-1].last_tensor = True + if self.swap_tensors[-1].stat == 'host': + if self.swap_tensors[0].layer_id > self.first_layer_id and self.prefetch_list: + self.prefetch_list[-1].append(self.swap_tensors) + self.prefetch_data_ptr_list[-1].append(self.data_ptr) + self.slice_tensor_storage_ptr_list[-1].append(self.slice_tensor_storage_ptr) + else: + self.prefetch_list.append([self.swap_tensors]) + self.prefetch_data_ptr_list.append([self.data_ptr]) + self.slice_tensor_storage_ptr_list.append([self.slice_tensor_storage_ptr]) + self.swap_tensors = [] + self.data_ptr = {} + self.slice_tensor_storage_ptr = {} + if self.vpp == 1: + self.cur_micro_num = 0 + else: + if not self.remove_num and len(self.prefetch_list) > self.pp: + self.cur_micro_num = self.pp * (self.vpp - 1) + elif self.remove_num and self.remove_num % self.pp == 0: + self.cur_micro_num = self.pp * (self.vpp - 1 - self.remove_num // self.pp) + + def h2d_special_tensor(self, swap_tensor): + if swap_tensor.is_slice_tensor: + if swap_tensor.storage_data_ptr in self.slice_tensor_storage_ptr_list[self.cur_micro_num][swap_tensor.layer_index]: + _, index = self.slice_tensor_storage_ptr_list[self.cur_micro_num][swap_tensor.layer_index][swap_tensor.storage_data_ptr][ + 0].split('_') + if swap_tensor == self.prefetch_list[self.cur_micro_num][swap_tensor.layer_index][int(index)]: + swap_tensor.launch_h2d(self.prefetch_stream, True) + del self.slice_tensor_storage_ptr_list[self.cur_micro_num][swap_tensor.layer_index][swap_tensor.storage_data_ptr] + else: + swap_tensor.launch_h2d(self.prefetch_stream, False) + else: + swap_tensor.launch_h2d(self.prefetch_stream, True) + + def h2d(self, module_name): + if not self.prefetch_list: + return + if self.vpp != 1 and not self.forward_flag: + self.cur_micro_num = self.pp * (self.vpp - 1 - self.remove_num // self.pp) + for swap_tensor_list in self.prefetch_list[self.cur_micro_num]: + for swap_tensor in reversed(swap_tensor_list): + if swap_tensor.layer_id + self.interval == int(get_layer_id(module_name)) \ + and swap_tensor.stat == "host" \ + and swap_tensor.storage_data_ptr in self.prefetch_data_ptr_list[self.cur_micro_num][swap_tensor.layer_index]: + del self.prefetch_data_ptr_list[self.cur_micro_num][swap_tensor.layer_index][swap_tensor.storage_data_ptr] + # For slice tensors, only the first tensor is resized. Other h2d the tensor size + self.h2d_special_tensor(swap_tensor) + + +def get_swap_prefetch(prefetch_args): + if SwapPrefetch.swap_prefetch is None: + SwapPrefetch.swap_prefetch = SwapPrefetch(prefetch_args) + + return SwapPrefetch.swap_prefetch + + +def pre_forward_hook_func(module_name, prefetch_args): + def custom_func(module, *args, **kargs): + get_swap_prefetch(prefetch_args).sync_d2h(module_name) + + return custom_func + + +def post_backward_hook_func(module_name, prefetch_args): + def custom_func(module, *args, **kargs): + get_swap_prefetch(prefetch_args).h2d(module_name) + + return custom_func + + +# manage activation tensor +def prefetch_tensor(module, name, prefetch_args): + get_swap_prefetch(prefetch_args).hook_swap_manager_forward(module.forward, name) + + +# register prefetch before backward, prefetch h2d +def prefetch_register_post_backward_hook(module, name, prefetch_args): + module.register_backward_hook(post_backward_hook_func(name, prefetch_args)) + + +# register prefetch after forward, sync d2h +def prefetch_register_pre_forward_hook(module, name, prefetch_args): + module.register_forward_hook(pre_forward_hook_func(name, prefetch_args)) diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/swap_manager.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/swap_manager.py new file mode 100644 index 000000000..b46067dbd --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/swap_manager.py @@ -0,0 +1,226 @@ +import os +import time + +from megatron.training import print_rank_0 +from mindspeed.core.memory.adaptive_recomputing.swappable_tensor import SwappableTensor + + +class SwapManagerMeta(type): + swap_manager_instance = {} + + def __call__(cls, *args, **kwargs): + if cls not in cls.swap_manager_instance: + instance = super().__call__(*args, **kwargs) + cls.swap_manager_instance[cls] = instance + return cls.swap_manager_instance[cls] + + +class SwapManager(metaclass=SwapManagerMeta): + def __init__(self): + self.host_tensors = {} + self.device_tensors = {} + self.total_swap_out_size = 0 + + @staticmethod + def is_allowed_wrap_tensor(tensor): + if isinstance(tensor, SwappableTensor): + return False + # min wrap tensor size, default is 1024B + config = os.getenv('MIN_SWAP_TENSOR_SIZE') + min_swap_tensor_size = 1024 + if config is not None: + try: + min_swap_tensor_size = max(min_swap_tensor_size, int(config)) + except ValueError: + print_rank_0('WARNING: MIN_SWAP_TENSOR_SIZE value error, fallback to default value 1024') + if get_tensor_mem_size(tensor) < min_swap_tensor_size: + return False + # leaf node tensor + if tensor.grad_fn is None: + return False + + return True + + def change_manager_tensor_status_to_allowed_swap(self): + for k in self.device_tensors.keys(): + self.device_tensors[k].is_allowed_swap = True + + def wrap_tensor(self, tensor, pre_tensor_is_allowed_swap=False): + """ + Wrap the original tensor. + The tensor will be stored in the wrapped tensor. The original tensor may will be swap out to host cpu to release + device memory when the swapping function is called + :param pre_tensor_is_allowed_swap: pre tensor is allowed swap to CPU + :param tensor: torch tensor which is needed to wrap + :return: wrapped tensor + """ + if pre_tensor_is_allowed_swap: + self.change_manager_tensor_status_to_allowed_swap() + if not self.is_allowed_wrap_tensor(tensor): + return tensor + wrapped_tensor = SwappableTensor(tensor) + if tensor.storage().size() != tensor.numel(): + wrapped_tensor.is_slice_tensor = True + key = time.time() + wrapped_tensor.set_tensor(key, tensor) + self.device_tensors[key] = wrapped_tensor + return wrapped_tensor + + def is_exist_tensor_allowed_swap(self): + for tensor in self.device_tensors.values(): + if tensor.is_allowed_swap: + return True + return False + + def is_exist_tensor_contiguous(self): + for tensor in self.device_tensors.values(): + if tensor.get_tensor().is_contiguous() and tensor.is_allowed_swap: + return True + return False + + def move_shard_tensor_to_host(self, bro_key, bro_tensor): + move_count = 0 + device_tensors_keys = list(self.device_tensors.keys()) + for key in device_tensors_keys: + tensor = self.device_tensors[key] + if tensor.inner_tensor_data_ptr == bro_tensor.inner_tensor_data_ptr: + self.device_tensors.pop(key) + tensor.set_tensor_location("cpu") + tensor.inner_tensor_bro_keys.append(bro_key) + bro_tensor.inner_tensor_bro_keys.append(key) + self.host_tensors[key] = tensor + move_count += 1 + self.host_tensors[bro_key] = bro_tensor + + return move_count + + def is_last_slice_shard_tensor_to_host(self, bro_key, bro_tensor): + device_tensors_keys = list(self.device_tensors.keys()) + for key in device_tensors_keys: + tensor = self.device_tensors[key] + if key != bro_key and tensor.get_slice_tensor() and tensor.storage_data_ptr == bro_tensor.storage_data_ptr: + return False + return True + + def swap_out_by_size(self, size): + """ + swap some tensors to host memory + :param size: total size which is requested to release memory + :return: true or false + """ + print_rank_0("Need tensor size is : %d" % (size)) + if not self.device_tensors or not self.is_exist_tensor_allowed_swap(): + return False + swap_size = 0 + swap_tensor_num = 0 + only_swap_contiguous_tensor = self.is_exist_tensor_contiguous() + if only_swap_contiguous_tensor: + cur_swap_size, cur_swap_tensor_num = self.traverse_swap_device_tensors(size, swap_size, False) + else: + cur_swap_size, cur_swap_tensor_num = self.traverse_swap_device_tensors(size, swap_size, True) + swap_size += cur_swap_size + swap_tensor_num += cur_swap_tensor_num + if swap_size != 0: + print_rank_0("swap tensor to CPU, tensor num: %d, release NPU memory size: %s (%d)" % ( + swap_tensor_num, hum_convert(swap_size), swap_size)) + print_rank_0("tensor nums wrap manager for [device: %d, CPU: %d]" % ( + len(self.device_tensors), len(self.host_tensors))) + self.total_swap_out_size += swap_size + return True + + def traverse_swap_device_tensors(self, size, swap_size, is_swap_not_contiguous): + cur_swap_size = 0 + cur_swap_tensor_num = 0 + device_tensors_keys = list(self.device_tensors.keys()) + # swap device memory size multiple + config = os.getenv('SWAP_SIZE_MULTIPLE') + swap_size_multiple = 1 + if config is not None: + try: + swap_size_multiple = max(1, int(config)) + except ValueError: + print_rank_0('WARNING: SWAP_SIZE_MULTIPLE value error, fallback to default value 1') + for key in device_tensors_keys: + if swap_size + cur_swap_size >= size * swap_size_multiple: + break + if key not in self.device_tensors.keys(): + continue + tensor = self.device_tensors[key] + if not is_swap_not_contiguous and not tensor.get_tensor().is_contiguous(): + continue + if tensor.is_allowed_swap: + tensor_size = 0 + if tensor.get_slice_tensor(): + is_last_slice_tensor = self.is_last_slice_shard_tensor_to_host(key, tensor) + if is_last_slice_tensor: + tensor_size = tensor.get_tensor_origin_storage() + tensor.trans_to_cpu() + else: + tensor.slice_tensor_trans_to_cpu() + else: + tensor_size = tensor.get_tensor().numel() * tensor.get_tensor().element_size() + tensor.trans_to_cpu() + cur_swap_size += tensor_size + self.device_tensors.pop(key) + self.host_tensors[key] = tensor + move_count = self.move_shard_tensor_to_host(key, tensor) + cur_swap_tensor_num += 1 + move_count + return cur_swap_size, cur_swap_tensor_num + + def unwrap_tensor(self, tensor): + """ + Unwrap the tensor. + If tensor is not on the device, the tensor will be swapped in to make sure that tensor is on device to compute. + return the torch tensor to compute in torch graph + :param tensor: wrapped tensor + :return: origin tensor + """ + if not isinstance(tensor, SwappableTensor): + return tensor + + if tensor.id_key in self.host_tensors.keys(): + self.host_tensors.pop(tensor.id_key) + if tensor.get_tensor().storage().size() == 0: + self.move_shard_tensor_to_device(tensor) + else: + tensor.trans_to_device(False) + else: + self.device_tensors.pop(tensor.id_key) + + return tensor.get_tensor() + + def move_shard_tensor_to_device(self, tensor): + cap_tensor = tensor + if tensor.inner_tensor_cpu_data is None: + cap_key = tensor.inner_tensor_bro_keys[0] + try: + cap_tensor = self.host_tensors[cap_key] + except KeyError: + print_rank_0("[ERROR] The key doesn't exist.") + cap_tensor.trans_to_device(True) + if cap_tensor.id_key != tensor.id_key: + cap_tensor.inner_tensor_bro_keys.remove(tensor.id_key) + self.host_tensors.pop(cap_tensor.id_key) + self.device_tensors[cap_tensor.id_key] = cap_tensor + for key in cap_tensor.inner_tensor_bro_keys: + bro_tensor = self.host_tensors.pop(key) + bro_tensor.set_tensor_location("device") + self.device_tensors[key] = bro_tensor + + def reset_swap_manager_tensors(self): + self.device_tensors.clear() + self.host_tensors.clear() + + +def hum_convert(value): + units = ["B", "KB", "MB", "GB", "TB", "PB"] + origin_value = value + for unit in units: + if (value / 1024.0) < 1: + return "%.2f%s" % (value, unit) + value = value / 1024.0 + return "%.2f%s" % (origin_value, units[0]) + + +def get_tensor_mem_size(tensor): + return tensor.numel() * tensor.element_size() \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/swappable_tensor.py b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/swappable_tensor.py new file mode 100644 index 000000000..82d9184e6 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/adaptive_recomputing/swappable_tensor.py @@ -0,0 +1,88 @@ +import torch + + +class SwappableTensor(torch.Tensor): + + @classmethod + def __new__(cls, tensor, *args, **kwargs): + # construct a fake tensor to unique tensors + data = torch.Tensor([id(tensor)]) + return torch.Tensor._make_subclass(cls, data, False) + + def __init__(self, tensor): + self.id_key = None + self.inner_tensor = None + self.inner_tensor_bro_keys = [] + self.inner_tensor_cpu_data = None + self.storage_data_ptr = None + self.inner_tensor_data_ptr = None + self.inner_tensor_origin_storage_size = 0 + self.inner_tensor_origin_storage_ele_size = 0 + self.is_allowed_swap = False + self._device = None + self._location = None + self.is_slice_tensor = tensor.storage().size() != tensor.numel() + + @classmethod + def __torch_function__(cls, func, types, args=(), kwargs=None): + if kwargs is None: + kwargs = {} + return super().__torch_function__(func, types, args, kwargs) + + def set_tensor(self, id_key, tensor): + self.id_key = id_key + self.inner_tensor = tensor + self.inner_tensor_data_ptr = tensor.data_ptr() + self.storage_data_ptr = tensor.storage().data_ptr() + self.inner_tensor_origin_storage_size = tensor.storage().size() + self.inner_tensor_origin_storage_ele_size = tensor.storage().element_size() + self._location = "device" + self._device = tensor.device + + def get_tensor(self): + return self.inner_tensor + + def set_tensor_location(self, location): + self._location = location + + def trans_to_cpu(self): + """ + translate tensor to host memory + :return: + """ + with torch.no_grad(): + self.inner_tensor_cpu_data = self.inner_tensor.cpu() + self.inner_tensor.storage().resize_(0) + self._location = "cpu" + + def slice_tensor_trans_to_cpu(self): + """ + translate slice tensor to host memory + """ + with torch.no_grad(): + self.inner_tensor_cpu_data = self.inner_tensor.cpu() + self._location = "cpu" + + def trans_to_device(self, resize_flag): + """ + translate tensor to device + :return: + """ + with torch.no_grad(): + if resize_flag: + self.inner_tensor.storage().resize_(self.inner_tensor_origin_storage_size) + if self.is_slice_tensor: + self.inner_tensor.copy_(self.inner_tensor_cpu_data) + else: + self.inner_tensor.storage().copy_(self.inner_tensor_cpu_data.storage()) + self._location = "device" + + def get_location(self): + return self._location + + def get_slice_tensor(self): + return self.is_slice_tensor + + def get_tensor_origin_storage(self): + return self.inner_tensor_origin_storage_size * self.inner_tensor_origin_storage_ele_size + diff --git a/model/train/yoco_moe/mindspeed/core/memory/auto_pipeline/autopipeline.py b/model/train/yoco_moe/mindspeed/core/memory/auto_pipeline/autopipeline.py new file mode 100644 index 000000000..ac7ff2813 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/auto_pipeline/autopipeline.py @@ -0,0 +1,371 @@ +import time + +_TRAIN_START_TIME = time.time() +import json +import os.path +import gc +import copy +from functools import wraps +import torch +import torch.nn +import torch_npu +from megatron.training import print_rank_0 +from megatron.training.arguments import parse_args +from megatron.core import parallel_state +from megatron.core.parallel_state import get_embedding_group +from megatron.training import get_args +from megatron.training import get_timers +from megatron.training import training +from megatron.training.training import print_datetime +from megatron.core.pipeline_parallel import p2p_communication +from megatron.core import mpu, tensor_parallel +from megatron.training.initialize import initialize_megatron, set_jit_fusion_options + + +class AutoPipeline: + auto_pipeline = None + + def __init__(self, args): + self.args = copy.deepcopy(args) + self.context = { + 'module': [] + } + self.modules_hooks = [] + self.profiling_step = 0 + self.stop_profiling_step = 5 + self.unit_mb = 1024 * 1024 + + @staticmethod + def get_memory_status(): + used_memory = torch.npu.memory_allocated() + reserved_memory = torch.npu.memory_reserved() + return used_memory, reserved_memory + + def _cal_tensor_size(self, tensor): + try: + return tensor.numel() * tensor.element_size() / self.unit_mb + except ZeroDivisionError: + return 0 + + def pre_hook_func(self, state, sync: bool, *args, **kargs): + if sync: + torch.npu.synchronize() + used_memory, _ = self.get_memory_status() + torch.npu.reset_max_memory_allocated() + state['memory'] = used_memory + torch.npu.synchronize() + state['time'] = time.time() + size = 0 + for arg in args: + if isinstance(arg, torch.Tensor): + size += self._cal_tensor_size(arg) + elif isinstance(arg, tuple) or isinstance(arg, list): + for t in arg: + if isinstance(t, torch.Tensor): + size += self._cal_tensor_size(t) + state['input'] = size + + def post_hook_func(self, state, sync: bool, *args, **kargs): + if sync: + torch.npu.synchronize() + used_memory, _ = self.get_memory_status() + max_mem = torch.npu.max_memory_allocated() + state['peak_memory'] = max_mem - state['memory'] + state['memory'] = (used_memory - state['memory']) // self.unit_mb + if 'pre_total_time' in state: + state['forward_cnt'] += 1 + state['time'] = (time.time() - state['time']) * 1000 + state['pre_total_time'] += state['time'] + try: + state['time'] = state['pre_total_time'] / state['forward_cnt'] + except ZeroDivisionError: + state['time'] = 0 + else: + state['forward_cnt'] = 0 + state['time'] = (time.time() - state['time']) * 1000 + state['pre_total_time'] = 0 + + def forward_pre_hook(self, name, parent_ctx, ctx): + if self.profiling_step < self.stop_profiling_step: + ctx['name'] = name + if 'layers' in parent_ctx: + parent_ctx['layers'].append(ctx) + + def hook(module, *args, **kargs): + if self.profiling_step < self.stop_profiling_step: + if 'module' in self.context: + self.context['module'].append(ctx) + self.pre_hook_func(ctx, True, *args, **kargs) + + return hook + + def forward_post_hook(self, ctx): + def hook(module, *args, **kargs): + if self.profiling_step < self.stop_profiling_step: + self.post_hook_func(ctx, True, *args) + if 'module' in self.context: + self.context['module'].pop() + + return hook + + def register_recursive_hook(self, prefix_name, model, ctx): + for name, module in model.named_children(): + if 'layers' not in ctx: + ctx['layers'] = [] + current_ctx = {} + + next_name = prefix_name + "." + name if prefix_name != "" else name + pre_hook = module.register_forward_pre_hook(self.forward_pre_hook(name, ctx, current_ctx)) + post_hook = module.register_forward_hook(self.forward_post_hook(current_ctx)) + self.modules_hooks.append(pre_hook) + self.modules_hooks.append(post_hook) + self.register_recursive_hook(next_name, module, current_ctx) + + def step_hook(self, model): + self.profiling_step += 1 + + def hook_step_func(self, step_func, models): + def custom_step_func(*args, **kargs): + result = step_func(*args, **kargs) + if self.profiling_step < self.stop_profiling_step: + used_memory, reserved_memory = self.get_memory_status() + self.context['used_mem'] = used_memory // self.unit_mb + if isinstance(models, list): + for model in models: + self.step_hook(model) + else: + self.step_hook(models) + return result + + return custom_step_func + + def get_comm_time(self, config, sync: bool): + if torch.distributed.get_rank() == 0: + if sync: + torch.npu.synchronize() + input_tensor = torch.ones(self.args.seq_length, self.args.micro_batch_size, self.args.hidden_size) + start_time = time.time() + p2p_communication.send_backward(input_tensor, config) + comm_time = (time.time() - start_time) * 1000 + self.context['comm_time'] = comm_time + else: + self.context['comm_time'] = 0.028 + + def get_modules_params_by_stages(self, init_memory, sync: bool): + + if self.args.pipeline_model_parallel_size == 2: + self.context['first_stage_embed'] = self.args.padded_vocab_size * self.args.hidden_size + self.context['last_stage_embed'] = self.args.padded_vocab_size * self.args.hidden_size + attention_block = 3 * self.args.hidden_size * self.args.num_attention_heads * ( + self.args.hidden_size / self.args.num_attention_heads) + self.args.hidden_size * self.args.hidden_size + self.args.hidden_size + self.args.hidden_size + ffn_block = 3 * self.args.ffn_hidden_size * self.args.hidden_size + self.args.hidden_size + self.args.hidden_size + per_trans_layer_param = attention_block + ffn_block + per_trans_layer_param /= self.args.tensor_model_parallel_size + self.context['per_trans_layer_param'] = per_trans_layer_param + + else: + first_stage_param = 0 + per_trans_layer_param = 0 + last_stage_param = 0 + if sync: + torch.npu.synchronize() + first_stage_rank = 0 + last_stage_rank = torch.distributed.get_world_size() - 1 + layer_stage_rank = self.args.tensor_model_parallel_size + + first_stage_param = self.broadcast_param_in_ranks(first_stage_rank, first_stage_param, init_memory) + last_stage_param = self.broadcast_param_in_ranks(last_stage_rank, last_stage_param, init_memory) + per_trans_layer_param = self.broadcast_param_in_ranks(layer_stage_rank, per_trans_layer_param, init_memory) + + self.context['first_stage_embed'] = first_stage_param - per_trans_layer_param + self.context['last_stage_embed'] = last_stage_param - per_trans_layer_param + self.context['per_trans_layer_param'] = per_trans_layer_param + + def broadcast_param_in_ranks(self, src_rank, param, init_memory): + if torch.distributed.get_rank() == src_rank: + param = torch.npu.max_memory_allocated() / self.unit_mb - init_memory + tmp_param = torch.cuda.IntTensor([param]) + torch.distributed.broadcast(tmp_param, src=src_rank) + param = tmp_param.item() + return param + + def update_args_for_profiling(self): + args = get_args() + if args.num_layers_per_virtual_pipeline_stage is None: + args.num_layers = self.args.pipeline_model_parallel_size + args.encoder_num_layers = self.args.pipeline_model_parallel_size + args.train_iters = self.stop_profiling_step + args.save = False + args.log_interval = 10 + + def restore_args_for_training(self): + args = get_args() + if args.num_layers_per_virtual_pipeline_stage is None: + args.num_layers = self.args.num_layers + args.encoder_num_layers = self.args.num_layers + args.train_iters = self.args.train_iters + args.optimizer = self.args.optimizer + args.save = self.args.save + args.log_interval = self.args.log_interval + + +def check_equal_model_configs(args, parsed_contents): + model_index = 0 + for model_instance in parsed_contents: + if args.hidden_size == model_instance["model_configs"]["hidden_size"] \ + and args.ffn_hidden_size == model_instance["model_configs"]["ffn_hidden_size"] \ + and args.seq_length == model_instance["model_configs"]["seq_length"] \ + and args.num_attention_heads == model_instance["model_configs"]["num_attention_heads"]: + return model_index + else: + model_index += 1 + return -1 + + +def check_equal_parallel_configs(args, parsed_content): + for parallel_instance in parsed_content["autopipeline_policy"]: + if args.num_layers == parallel_instance["num_layers"] \ + and args.pipeline_model_parallel_size == parallel_instance["pipeline_model_parallel_size"] \ + and args.tensor_model_parallel_size == parallel_instance["tensor_model_parallel_size"] \ + and args.save_memory_ratio == parallel_instance["ratio"]: + return parallel_instance["num_layer_list"], parallel_instance["recompute_module_list"], parallel_instance[ + "recompute_type"] + return None, None, None + + +def check_skip_profiling(args, config_file): + if os.path.exists(config_file): + with open(config_file) as config_json: + config_contents = config_json.read() + parsed_contents = json.loads(config_contents) + index = check_equal_model_configs(args, parsed_contents) + if index != -1: + num_layer_list, recompute_module_list, recompute_type = check_equal_parallel_configs(args, + parsed_contents[index]) + if num_layer_list: + return True, [(num_layer_list, recompute_module_list, (0, [0]), recompute_type)] + return False, None + + +def set_recompute_mode(models): + for model in models: + for name, module in model.named_modules(): + if str.isdigit(name) and name != "0": + module.forward = hook_checkpoint_forward(module.forward) + + +def hook_checkpoint_forward(forward_func): + def custom_forward(*args, **kargs): + def inside_forward(*args): + return forward_func(*args, **kargs) + + return tensor_parallel.checkpoint(inside_forward, None, *args) + + return custom_forward + + +def get_auto_pipeline(args): + if AutoPipeline.auto_pipeline is None: + AutoPipeline.auto_pipeline = AutoPipeline(args) + return AutoPipeline.auto_pipeline + + +def initialize_cfg_from_args_wrapper(initialize_cfg_from_args): + @wraps(initialize_cfg_from_args) + def wrapper(*args, **kwargs): + from mindspeed.core import training as mc_training + argument = get_args() + disable_mc2 = argument.automated_pipeline and not mc_training.policy + if not disable_mc2: + initialize_cfg_from_args(*args, **kwargs) + return wrapper + + +def autopipeline_profiling(model_provider, model_type, forward_step_func, train_valid_test_dataset_provider, + process_non_loss_data_func, args): + is_skip, policy = check_skip_profiling(args, config_file="autopipeline_config.json") + if not is_skip: + initialize_megatron(extra_args_provider=None, + args_defaults={'tokenizer_type': 'GPT2BPETokenizer'}) + set_jit_fusion_options() + global _TRAIN_START_TIME + start_time_tensor = torch.cuda.DoubleTensor([_TRAIN_START_TIME]) + torch.distributed.all_reduce(start_time_tensor, + op=torch.distributed.ReduceOp.MIN) + _TRAIN_START_TIME = start_time_tensor.item() + print_rank_0('time to initialize megatron (seconds): {:.3f}'.format( + time.time() - _TRAIN_START_TIME)) + print_datetime('after megatron is initialized') + args = get_args() + pipelining = get_auto_pipeline(args) + pipelining.update_args_for_profiling() + init_memory = torch.npu.max_memory_allocated() / pipelining.unit_mb + models, optimizer, lr_scheduler = training.setup_model_and_optimizer(model_provider, model_type) + optimizer.step = pipelining.hook_step_func(optimizer.step, models) + config = training.get_model_config(models[0]) + + if args.virtual_pipeline_model_parallel_size is not None: + train_data_iterator = [] + valid_data_iterator = [] + for i in range(len(models)): + mpu.set_virtual_pipeline_model_parallel_rank(i) + iterators = training.build_train_valid_test_data_iterators( + train_valid_test_dataset_provider) + train_data_iterator.append(iterators[0]) + valid_data_iterator.append(iterators[1]) + else: + train_data_iterator, valid_data_iterator, _ = training.build_train_valid_test_data_iterators( + train_valid_test_dataset_provider) + if isinstance(models, list): + for model in models: + pipelining.register_recursive_hook("module", model, pipelining.context) + else: + pipelining.register_recursive_hook("module", models, pipelining.context) + pipelining.get_modules_params_by_stages(init_memory, sync=True) + set_recompute_mode(models) + checkpointing_context = {} + training.train(forward_step_func, models, optimizer, lr_scheduler, train_data_iterator, valid_data_iterator, + process_non_loss_data_func, config, checkpointing_context) + pipelining.get_comm_time(config, sync=True) + + timers = get_timers() + if timers('interval-time'): + timers('interval-time').stop(barrier=True) + + for hook_handle in pipelining.modules_hooks: + hook_handle.remove() + pipelining.modules_hooks.clear() + pipelining.restore_args_for_training() + + for key, value in optimizer.optimizer.state.items(): + key.detach() + key.grad = None + key.storage().resize_(0) + if "momentum_buffer" in value: + value["momentum_buffer"].detach() + value["momentum_buffer"].grad = None + value["momentum_buffer"].storage().resize_(0) + for ofg in optimizer.param_groups: + if "params" in ofg: + for og in ofg["params"]: + og.detach() + og.grad = None + og.storage().resize_(0) + for md in models: + for param in md.parameters(): + param.detach() + param.grad = None + param.storage().resize_(0) + for param_tensor in md.state_dict(): + if md.state_dict()[param_tensor] is not None: + md.state_dict()[param_tensor].detach() + md.state_dict()[param_tensor].grad = None + md.state_dict()[param_tensor].storage().resize_(0) + + gc.collect() + torch_npu.npu.empty_cache() + time.sleep(5) + return pipelining.context, policy + else: + print_rank_0("[INFO] Found existed automated pipeline policy, apply it directly.") + return None, policy diff --git a/model/train/yoco_moe/mindspeed/core/memory/auto_pipeline/autopipeline_apply.py b/model/train/yoco_moe/mindspeed/core/memory/auto_pipeline/autopipeline_apply.py new file mode 100644 index 000000000..f16004dfd --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/auto_pipeline/autopipeline_apply.py @@ -0,0 +1,53 @@ +import torch +from megatron.training import print_rank_0 +from megatron.training import get_args +from megatron.core import utils, parallel_state, tensor_parallel + + +def apply_autopipeline(models): + if isinstance(models, list): + for model in models: + apply_recompute_modules(model) + else: + apply_recompute_modules(models) + + +def apply_recompute_modules(model): + args = get_args() + for pp_rankid, recomp_value in enumerate(args.recompute_module_list): + if pp_rankid == parallel_state.get_pipeline_model_parallel_rank(): + if recomp_value > 0: + set_recompute_modules(model, recomp_value, args.recompute_type) + + +def set_recompute_modules(model, recomp_value, module_type): + recomp_pool = [] + recomp_name = "module.module.language_model.encoder.layers." + for i in range(0, recomp_value): + tmp_recomp_name = recomp_name + tmp_recomp_name += str(i) + # mlp recompute type + if module_type == 0: + tmp_recomp_name += ".mlp" + recomp_pool.append(tmp_recomp_name) + # attention recompute type + if module_type == 1: + tmp_recomp_name += ".self_attention" + recomp_pool.append(tmp_recomp_name) + # layer recompute type + if module_type == 2: + recomp_pool.append(tmp_recomp_name) + + for name, module in model.named_modules(): + if name in recomp_pool: + module.forward = hook_checkpoint_forward(module.forward) + + +def hook_checkpoint_forward(forward_func): + def custom_forward(*args, **kargs): + def inside_forward(*args): + return forward_func(*args, **kargs) + + return tensor_parallel.checkpoint(inside_forward, None, *args) + + return custom_forward \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/memory/auto_pipeline/autopipeline_solver.py b/model/train/yoco_moe/mindspeed/core/memory/auto_pipeline/autopipeline_solver.py new file mode 100644 index 000000000..55a0073ac --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/auto_pipeline/autopipeline_solver.py @@ -0,0 +1,501 @@ +import os +import json +import statistics +import math +import time +import multiprocessing +from functools import wraps +import torch +import megatron.training.global_vars +from megatron.training import get_args +from megatron.training import print_rank_0 +from .autopipeline import check_equal_model_configs +import mindspeed.model.transformer as mindspeed_transformer +import megatron.core.parallel_state as megatron_parallel_state +import mindspeed.core.parallel_state as mindspeed_parallel_state + + +class AutoPipelineSolver(): + def __init__(self, context): + self.context = context + self.MB_SIZE = 1024 * 1024 + # model configurations + args = get_args() + self.num_layers = args.num_layers + self.vocab_size = args.padded_vocab_size + self.hidden_size = args.hidden_size + self.ffn_hidden_size = args.ffn_hidden_size + self.micro_batch_size = args.micro_batch_size + self.global_batch_size = args.global_batch_size + self.seq_length = args.seq_length + self.num_attention_heads = args.num_attention_heads + self.pipeline_model_parallel_size = args.pipeline_model_parallel_size + self.tensor_model_parallel_size = args.tensor_model_parallel_size + + self.first_stage_embed = 0 + self.last_stage_embed = 0 + self.per_trans_layer_param = 0 + self.embed_activation = 0 + + self.forward_time = 0 + self.forward_activation = 0 + self.mlp_forward_time = 0 + self.comm_time = 0 + self.forward_mlp_activation = 0 + self.attention_forward_time = 0 + self.forward_attention_activation = 0 + self.layer_forward_time = 0 + self.forward_layer_activation = 0 + self.parse_profile() + + # hyper params settings + self.ratio = args.save_memory_ratio if args.save_memory_ratio == 1.0 else 1 - args.save_memory_ratio + self.min_layer, self.max_layer = self.get_min_max_layer() + self.target_memory = self.set_target_memory() + + # auto pipeline search result + self.ans = [] + self.backup = [] + self.backup_min_mem = 0 + # auto pipeline policy + self.policy = [] + self.optimal_sch = [] + self.minn = [] + + + def find_target_profile(self, module, target, profile_type): + context = self.context + while module in context: + for sub_context in context[module]: + if sub_context["name"] == target: + return sub_context[profile_type] + else: + context = sub_context + return 0 + + + def get_min_max_layer(self): + layer_avg = round(self.num_layers / self.pipeline_model_parallel_size) + if 1 <= layer_avg <= 4: + layer_range = 0 + elif 5 <= layer_avg < 8: + layer_range = 1 + else: + layer_range = 2 + return layer_avg - layer_range, layer_avg + layer_range + + + def parse_profile(self): + self.first_stage_embed = self.context["first_stage_embed"] * self.MB_SIZE + self.last_stage_embed = self.context["last_stage_embed"] * self.MB_SIZE + self.per_trans_layer_param = self.context["per_trans_layer_param"] * self.MB_SIZE + self.embed_activation = self.find_target_profile("layers", "embedding", "memory") * self.MB_SIZE + + self.forward_time = self.find_target_profile("layers", "module", "time") + self.mlp_forward_time = self.find_target_profile("layers", "mlp", "time") + self.attention_forward_time = self.find_target_profile("layers", "self_attention", "time") + self.layer_forward_time = self.find_target_profile("layers", "0", "time") + self.comm_time = self.context["comm_time"] + self.forward_activation = self.find_target_profile("layers", "module", "memory") * self.MB_SIZE + self.forward_mlp_activation = self.find_target_profile("layers", "mlp", "memory") * self.MB_SIZE + self.forward_attention_activation = self.find_target_profile("layers", "self_attention", "memory") * self.MB_SIZE + self.forward_layer_activation = self.find_target_profile("layers", "0", "memory") * self.MB_SIZE + + + def naive_search(self, module_type, answer_queue): + + def dfs_build_layers(prefix_n_layers, cur_layers_sum): + + if len(prefix_n_layers) > self.pipeline_model_parallel_size: + return + if cur_layers_sum > self.num_layers: + return + if 2 <= len(prefix_n_layers) < self.pipeline_model_parallel_size: + if prefix_n_layers[-1] < prefix_n_layers[-2]: + return + + if len(prefix_n_layers) == self.pipeline_model_parallel_size and cur_layers_sum == self.num_layers: + status, prefix_recomp_modules, mem_set = self.get_recompute_modules(prefix_n_layers, self.pipeline_model_parallel_size, module_type) + if status: + answer_queue.append((prefix_n_layers, prefix_recomp_modules, mem_set, module_type)) + if len(answer_queue) == 0 and len(self.ans) == 0: + if len(self.backup) == 0: + self.backup.append((prefix_n_layers, prefix_recomp_modules, mem_set, module_type)) + else: + temp_min_mem = min(mem_set[1]) + if temp_min_mem < self.backup_min_mem: + self.backup_min_mem = temp_min_mem + self.backup[0] = (prefix_n_layers, prefix_recomp_modules, mem_set, module_type) + return + + for cur_n_layer in range(self.max_layer, self.min_layer - 1, -1): + dfs_build_layers(prefix_n_layers + [cur_n_layer], cur_layers_sum + cur_n_layer) + + for prefix_n_layer in range(self.max_layer, self.min_layer - 1, -1): + dfs_build_layers([prefix_n_layer], prefix_n_layer) + + return answer_queue + + + def main_search(self): + mlp_answer_queue, attn_answer_queue, layer_answer_queue = [], [], [] + mlp_answer_queue = self.naive_search(0, mlp_answer_queue) + self.ans = mlp_answer_queue + attn_answer_queue = self.naive_search(1, attn_answer_queue) + self.ans += attn_answer_queue + layer_answer_queue = self.naive_search(2, layer_answer_queue) + self.ans += layer_answer_queue + + return self.ans + + + def cal_module_param(self, module_type): + + per_layer_activation_param = self.forward_activation + per_recompute_module_param = 0 + if module_type == 0: + # mlp activation param + per_recompute_module_param = self.forward_mlp_activation + if module_type == 1: + # attn param + per_recompute_module_param = self.forward_attention_activation + if module_type == 2: + # layer param + per_recompute_module_param = 2 * self.seq_length * self.micro_batch_size * self.hidden_size + + return per_layer_activation_param, per_recompute_module_param + + + def cal_model_mem(self, per_layer_activation_param, per_recompute_module_param, n_layer, n_recompute_module, parallel_num, \ + stage_num): + if stage_num == 0: + stage_max_optimizer_mem = (self.first_stage_embed + self.per_trans_layer_param * n_layer) + self.embed_activation + model_mem = self.first_stage_embed + self.per_trans_layer_param * n_layer \ + + stage_max_optimizer_mem \ + + per_layer_activation_param * n_layer * parallel_num + elif stage_num == self.pipeline_model_parallel_size - 1: + stage_max_optimizer_mem = (self.last_stage_embed + self.per_trans_layer_param * n_layer) + self.embed_activation + model_mem = self.last_stage_embed + self.per_trans_layer_param * n_layer \ + + stage_max_optimizer_mem \ + + per_layer_activation_param * n_layer * parallel_num + else: + stage_max_optimizer_mem = self.per_trans_layer_param * n_layer + model_mem = self.per_trans_layer_param * n_layer \ + + stage_max_optimizer_mem \ + + per_layer_activation_param * n_layer * parallel_num + return model_mem + + + def set_target_memory(self): + per_layer_activation_param, per_recompute_module_param = self.cal_module_param(0) + stage_num = 0 + default_n_layers_mems = [] + while stage_num < self.pipeline_model_parallel_size: + default_layer_mem = self.cal_model_mem(per_layer_activation_param, per_recompute_module_param, + self.num_layers/self.pipeline_model_parallel_size, 0, + self.pipeline_model_parallel_size - stage_num, stage_num) + default_n_layers_mems.append(default_layer_mem) + stage_num += 1 + + target_memory = sum(default_n_layers_mems)/len(default_n_layers_mems) + if self.ratio < 1.0: + target_memory = max(default_n_layers_mems) + return target_memory + + + def get_recompute_modules(self, n_layers, num_pp_stage, module_type): + per_layer_activation_param, per_recompute_module_param = self.cal_module_param(module_type) + init_recompute_modules = [] + new_n_layers_mems = [] + stage_num = 0 + status = True + + while stage_num < len(n_layers): + init_layer_mem = self.cal_model_mem(per_layer_activation_param, per_recompute_module_param,\ + n_layers[stage_num], 0, + num_pp_stage - stage_num, stage_num) + if init_layer_mem <= self.target_memory * self.ratio: + n_recompute_module = 0 + init_recompute_modules.append(n_recompute_module) + else: + if (per_recompute_module_param * (num_pp_stage - stage_num) / self.MB_SIZE) == 0: + n_recompute_module = 0 + else: + n_recompute_module = math.ceil((init_layer_mem / self.MB_SIZE - self.target_memory * self.ratio / self.MB_SIZE) / (per_recompute_module_param * (num_pp_stage - stage_num) / self.MB_SIZE)) + if n_recompute_module > n_layers[stage_num]: + status = False + n_recompute_module = n_layers[stage_num] + init_recompute_modules.append(n_recompute_module) + else: + init_recompute_modules.append(n_recompute_module) + + init_layer_mem = self.cal_model_mem(per_layer_activation_param, per_recompute_module_param, + n_layers[stage_num], n_recompute_module, + num_pp_stage - stage_num, stage_num) + init_layer_mem -= per_recompute_module_param*n_recompute_module + init_layer_mem /= self.MB_SIZE + new_n_layers_mems.append(init_layer_mem) + stage_num += 1 + + return status, init_recompute_modules, (self.target_memory/self.MB_SIZE, new_n_layers_mems) + + + def dp(self, examples): + # lookup duration via parallel params + (Fwd, Bwd, ComFwd, ComBwd) = self.forward_time, self.forward_time * 1.3, self.comm_time, self.comm_time + + RecompFwd = 0 + module_type = examples[3] + if module_type == 0: + RecompFwd = self.mlp_forward_time + elif module_type == 1: + RecompFwd = self.attention_forward_time + elif module_type == 2: + RecompFwd = self.layer_forward_time + + # to remember that n_layers can be divided by num_pp_stage + n_layers = [0] + examples[0] + n_recompute_layers = [0] + examples[1] + num_pp_stage = self.pipeline_model_parallel_size + + # number of micro-batch-size is 256 + mbs = [self.micro_batch_size for _ in range(self.global_batch_size)] + num_microbatch = len(mbs) + mbs = [0] + mbs + + SF = [[0 for i in range(num_microbatch + 1)] for _ in range(num_pp_stage + 1)] # start of forward 图中蓝色的左边 + EF = [[0 for i in range(num_microbatch + 1)] for _ in range(num_pp_stage + 1)] # end of forward 图中蓝色的右边 + + SB = [[0 for i in range(num_microbatch + 1)] for _ in range(num_pp_stage + 1)] # start of backward 图中绿色的左边 + EB = [[0 for i in range(num_microbatch + 1)] for _ in range(num_pp_stage + 1)] # end of backward 图中绿色的右边 + + warmup = [num_pp_stage - p - 1 for p in range(num_pp_stage)] + remaining = [num_microbatch - warmup[p] for p in range(num_pp_stage)] + + # for dp, p and m start with 1 + # warmup: only forward processing, add activations + for p in range(1, num_pp_stage + 1): + for m in range(1, num_pp_stage - p + 1): + SF[p][m] = max(EF[p][m - 1], EF[p - 1][m] + ComFwd) + EF[p][m] = SF[p][m] + Fwd * n_layers[p] + + # 1f1b + for num_1f1b in range(1, num_microbatch + 1): + + # # fwd of 1f1b + for p in range(1, num_pp_stage + 1): + if remaining[p - 1] < num_1f1b: + # this means it have to work for cool down phase + continue + + m = warmup[p - 1] + num_1f1b + if p == 1: + EF[0][m] = max(EF[1]) - ComFwd + + SF[p][m] = max(EB[p][m + p - num_pp_stage - 1], EF[p - 1][m] + ComFwd) + EF[p][m] = SF[p][m] + Fwd * n_layers[p] + + # bwd of 1f1b + for p in range(num_pp_stage, 0, -1): + m = num_1f1b + if remaining[p - 1] < num_1f1b: + # this means it have to work for cool down phase + continue + if p == num_pp_stage: + SB[p][m] = EF[p][m + num_pp_stage - p] + else: + SB[p][m] = max(EF[p][m + num_pp_stage - p], EB[p + 1][m] + ComBwd) + + EB[p][m] = SB[p][m] + Bwd * n_layers[p] + RecompFwd * n_recompute_layers[p] + + # cooldown + for p in range(num_pp_stage, 0, -1): + m = num_1f1b + if remaining[p - 1] >= num_1f1b: + continue + SB[p][m] = max(EB[p][m - 1], EB[p + 1][m] + ComBwd) + EB[p][m] = SB[p][m] + Bwd * n_layers[p] + RecompFwd * n_recompute_layers[p] + + itertime = max([max(EB[p]) for p in range(num_pp_stage)]) + self.policy.append((itertime, examples)) + return + + + def find_top_optimal_schedule(self): + self.main_search() + for examples in self.ans: + self.dp(examples) + + if len(self.policy) > 0: + min_itertime = self.policy[0][0] + self.minn.append(min_itertime) + self.optimal_sch.append(self.policy[0][1]) + for idx, res in enumerate(self.policy): + if res[0] < min_itertime: + min_itertime = res[0] + self.minn[0] = min_itertime + self.optimal_sch[0] = res[1] + else: + print_rank_0("[INFO] [Autopipeline Policy Time Searching Stage] No strategy is satisfied. We will apply the minimum memory strategy instead.") + self.minn.append(0) + self.optimal_sch.append(self.backup[0]) + + return self.optimal_sch, self.minn + + +def broadcast_policy_in_ranks(src_rank, policy=None): + args = get_args() + num_layer_list = args.pipeline_model_parallel_size * [0] + recompute_module_list = args.pipeline_model_parallel_size * [0] + recompute_type = 0 + if torch.distributed.get_rank() == 0: + num_layer_list = policy[0][0] + recompute_module_list = policy[0][1] + recompute_type = policy[0][3] + + tmp_layer_list = torch.cuda.IntTensor(num_layer_list) + torch.distributed.broadcast(tmp_layer_list, src=src_rank) + args.num_layer_list = tmp_layer_list.tolist() + + tmp_recompute_module_list = torch.cuda.IntTensor(recompute_module_list) + torch.distributed.broadcast(tmp_recompute_module_list, src=src_rank) + args.recompute_module_list = tmp_recompute_module_list.tolist() + + tmp_recompute_type = torch.cuda.IntTensor([recompute_type]) + torch.distributed.broadcast(tmp_recompute_type, src=src_rank) + args.recompute_type = tmp_recompute_type.item() + + +def destroy_global_vars(): + megatron.training.global_vars._GLOBAL_ARGS = None + megatron.training.global_vars._GLOBAL_RETRO_ARGS = None + megatron.training.global_vars._GLOBAL_NUM_MICROBATCHES_CALCULATOR = None + megatron.training.global_vars._GLOBAL_TOKENIZER = None + megatron.training.global_vars._GLOBAL_TENSORBOARD_WRITER = None + megatron.training.global_vars._GLOBAL_WANDB_WRITER = None + megatron.training.global_vars._GLOBAL_ADLR_AUTORESUME = None + megatron.training.global_vars._GLOBAL_TIMERS = None + megatron.training.global_vars._GLOBAL_SIGNAL_HANDLER = None + megatron_parallel_state._EXPERT_PARALLEL_GROUP = None + mindspeed_transformer._GLOBAL_ATTN_MASK = None + + +def destroy_global_parallel_group(): + global_parallel_group = [ + megatron_parallel_state._MODEL_PARALLEL_GROUP, + megatron_parallel_state._TENSOR_MODEL_PARALLEL_GROUP, + megatron_parallel_state._PIPELINE_MODEL_PARALLEL_GROUP, + mindspeed_parallel_state._PIPELINE_MODEL_PARALLEL_GROUP_FOR_NEW_STREAM, + megatron_parallel_state._DATA_PARALLEL_GROUP, + megatron_parallel_state._DATA_PARALLEL_GROUP_WITH_CP, + megatron_parallel_state._CONTEXT_PARALLEL_GROUP, + megatron_parallel_state._EMBEDDING_GROUP, + megatron_parallel_state._POSITION_EMBEDDING_GROUP, + megatron_parallel_state._TENSOR_AND_DATA_PARALLEL_GROUP, + megatron_parallel_state._TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP, + megatron_parallel_state._EXPERT_MODEL_PARALLEL_GROUP, + megatron_parallel_state._TENSOR_AND_EXPERT_PARALLEL_GROUP, + megatron_parallel_state._DATA_MODULO_EXPERT_PARALLEL_GROUP + ] + for gid in range(len(global_parallel_group)): + if global_parallel_group[gid]: + torch.distributed.destroy_process_group(global_parallel_group[gid]) + torch.distributed.barrier() + + megatron_parallel_state._MODEL_PARALLEL_GROUP = None + megatron_parallel_state._TENSOR_MODEL_PARALLEL_GROUP = None + megatron_parallel_state._PIPELINE_MODEL_PARALLEL_GROUP = None + mindspeed_parallel_state._PIPELINE_MODEL_PARALLEL_GROUP_FOR_NEW_STREAM = None + megatron_parallel_state._DATA_PARALLEL_GROUP = None + megatron_parallel_state._DATA_PARALLEL_GROUP_WITH_CP = None + megatron_parallel_state._CONTEXT_PARALLEL_GROUP = None + megatron_parallel_state._CONTEXT_PARALLEL_GLOBAL_RANKS = None + megatron_parallel_state._EMBEDDING_GROUP = None + megatron_parallel_state._POSITION_EMBEDDING_GROUP = None + megatron_parallel_state._TENSOR_AND_DATA_PARALLEL_GROUP = None + megatron_parallel_state._TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP = None + megatron_parallel_state._EXPERT_MODEL_PARALLEL_GROUP = None + megatron_parallel_state._TENSOR_AND_EXPERT_PARALLEL_GROUP = None + megatron_parallel_state._DATA_MODULO_EXPERT_PARALLEL_GROUP = None + megatron_parallel_state._VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK = None + megatron_parallel_state._VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = None + megatron_parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = None + megatron_parallel_state._MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = None + megatron_parallel_state._MPU_TENSOR_MODEL_PARALLEL_RANK = None + megatron_parallel_state._MPU_PIPELINE_MODEL_PARALLEL_RANK = None + megatron_parallel_state._GLOBAL_MEMORY_BUFFER = None + megatron_parallel_state._MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE = None + megatron_parallel_state._MPU_EXPERT_MODEL_PARALLEL_RANK = None + + +def destroy_model_parallel_profiling_wrapper(destroy_model_parallel): + @wraps(destroy_model_parallel) + def wrapper(*args, **kwargs): + argument = get_args() + enable_profiling_destroy = (argument.automated_pipeline and not argument.num_layer_list) \ + or (argument.automated_pipeline_perf and not argument.optimized_mbs_list) + if enable_profiling_destroy: + destroy_global_parallel_group() + else: + destroy_model_parallel(*args, **kwargs) + return wrapper + + +def get_profiling_data(policy, args): + instance = {"model_configs": { + "vocab_size": args.padded_vocab_size, + "hidden_size": args.hidden_size, + "ffn_hidden_size": args.ffn_hidden_size, + "seq_length": args.seq_length, + "num_attention_heads": args.num_attention_heads + }, "autopipeline_policy": [{ + "num_layers": args.num_layers, + "pipeline_model_parallel_size": args.pipeline_model_parallel_size, + "tensor_model_parallel_size": args.tensor_model_parallel_size, + "ratio": args.save_memory_ratio, + "num_layer_list": policy[0][0], + "recompute_module_list": policy[0][1], + "recompute_type": policy[0][3] + }]} + return instance + + +def save_profiling_data(policy, config_file): + + args = get_args() + instance = get_profiling_data(policy, args) + if os.path.exists(config_file): + with open(config_file, "r") as config_json: + config_contents = config_json.read() + parsed_contents = json.loads(config_contents) + index = check_equal_model_configs(args, parsed_contents) + if index != -1: + if "autopipeline_policy" in parsed_contents[index]: + parsed_contents[index]["autopipeline_policy"].append(instance["autopipeline_policy"][0]) + else: + parsed_contents.append(instance) + with open(config_file, "w") as f: + json.dump(parsed_contents, f, ensure_ascii=False) + os.chmod(config_file, 0o644) + else: + with open(config_file, "w") as f: + json.dump([instance], f, ensure_ascii=False) + os.chmod(config_file, 0o644) + + +def solve_autopipeline(context): + if torch.distributed.get_rank() == 0: + solver = AutoPipelineSolver(context) + solver.find_top_optimal_schedule() + torch.distributed.barrier() + if torch.distributed.get_rank() == 0: + print_rank_0("=============================================") + print_rank_0("[INFO] Applied Automated Pipeline Policy: ") + print_rank_0("network layer setting: " + str(solver.optimal_sch[0][0])) + print_rank_0("recompute policy setting: " + str(solver.optimal_sch[0][1])) + print_rank_0("recompute type: " + str(solver.optimal_sch[0][3])) + print_rank_0("=============================================") + save_profiling_data(solver.optimal_sch, config_file="autopipeline_config.json") + return solver.optimal_sch + else: + return [([0], [0], (0, [0]), 0)] \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/memory/common.py b/model/train/yoco_moe/mindspeed/core/memory/common.py new file mode 100644 index 000000000..576713052 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/common.py @@ -0,0 +1,11 @@ +from megatron.training import get_args +from mindspeed.core.memory.adaptive_memory.adaptive_memory_swap_manager import SwapManager as AdaptiveMemorySwapManager +from mindspeed.core.memory.adaptive_recomputing.swap_manager import SwapManager as AdaptiveRecomputingSwapManager + + +def swap_out_by_size(size): + args = get_args() + if args.adaptive_memory_optimization: + return AdaptiveMemorySwapManager().swap_out_by_size(size) + else: + return AdaptiveRecomputingSwapManager().swap_out_by_size(size) diff --git a/model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/malloc_recorder.py b/model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/malloc_recorder.py new file mode 100644 index 000000000..93d4030f9 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/malloc_recorder.py @@ -0,0 +1,18 @@ +from functools import wraps +from mindspeed.core.memory.memory_fragmentation.pluggable_allocator_adpator import load_memory_fragmentation_module + + +class MallocRecorder(object): + def __init__(self): + load_memory_fragmentation_module().malloc_recorder_start() + + def __del__(self): + load_memory_fragmentation_module().malloc_recorder_end() + + +def malloc_recorder_wrapper(train_step): + @wraps(train_step) + def rec_wrap(*args, **kwargs): + recorder = MallocRecorder() + return train_step(*args, **kwargs) + return rec_wrap diff --git a/model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/memory_recorder.py b/model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/memory_recorder.py new file mode 100644 index 000000000..ce4e8f697 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/memory_recorder.py @@ -0,0 +1,45 @@ +from functools import wraps + +from megatron.legacy.model.transformer import ParallelTransformer +from mindspeed.core.memory.memory_fragmentation.pluggable_allocator_adpator import load_memory_fragmentation_module + + +class MemoryRecorder(object): + def __init__(self): + load_memory_fragmentation_module().memory_recorder_start() + + def __del__(self): + load_memory_fragmentation_module().memory_recorder_end() + + def register_recursive_hook(self, prefix_name, model): + for name, module in model.named_children(): + if isinstance(module, ParallelTransformer): + module.no_checkpoint_forward = module.forward + module.forward = wrapper(module.forward) + + next_name = prefix_name + "." + name if prefix_name != "" else name + self.register_recursive_hook(next_name, module) + + +def memory_recorder_wrapper(setup_model_and_optimizer): + @wraps(setup_model_and_optimizer) + def get_model_hook_func(*args, **kwargs): + load_memory_fragmentation_module().precise_match_start() + models, optimizer, lr_scheduler = setup_model_and_optimizer(*args, **kwargs) + load_memory_fragmentation_module().precise_match_end() + memory = MemoryRecorder() + if isinstance(models, list): + for model in models: + memory.register_recursive_hook("module", model) + else: + memory.register_recursive_hook("module", models) + return models, optimizer, lr_scheduler + + return get_model_hook_func + + +def wrapper(f): + def rec_wrap(*args, **kwargs): + recorder = MemoryRecorder() + return f(*args, **kwargs) + return rec_wrap diff --git a/model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/optimizer_init_precise.py b/model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/optimizer_init_precise.py new file mode 100644 index 000000000..00dab6f45 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/optimizer_init_precise.py @@ -0,0 +1,22 @@ +import torch_npu +from functools import wraps +from mindspeed.core.memory.memory_fragmentation.pluggable_allocator_adpator import load_memory_fragmentation_module + +is_optimizer_init_end = False + + +def optimizer_init_wrapper(step): + @wraps(step) + def rec_wrap(*args, **kwargs): + global is_optimizer_init_end + if not is_optimizer_init_end: + torch_npu.npu.empty_cache() + load_memory_fragmentation_module().precise_match_start() + optimizer_initialized, grad_norm, num_zeros_in_grad = step(*args, **kwargs) + if not is_optimizer_init_end: + load_memory_fragmentation_module().precise_match_end() + is_optimizer_init_end = optimizer_initialized + + return optimizer_initialized, grad_norm, num_zeros_in_grad + + return rec_wrap diff --git a/model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/pluggable_allocator_adpator.py b/model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/pluggable_allocator_adpator.py new file mode 100644 index 000000000..90c6ed441 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/memory_fragmentation/pluggable_allocator_adpator.py @@ -0,0 +1,34 @@ +import ctypes + +import torch_npu +from mindspeed.op_builder import MemoryFragmentationBuilder + + +class PluggableAllocatorAdaptor(object): + MEMORY_FRAGMENTATION_MODULE = None + def __init__(self): + pass + +def load_memory_fragmentation_module(): + if PluggableAllocatorAdaptor.MEMORY_FRAGMENTATION_MODULE is None: + PluggableAllocatorAdaptor.MEMORY_FRAGMENTATION_MODULE = MemoryFragmentationBuilder().load() + return PluggableAllocatorAdaptor.MEMORY_FRAGMENTATION_MODULE + +def change_allocator(): + memory_fragmentation_module_path = load_memory_fragmentation_module().__file__ + + new_alloc = torch_npu.npu.memory.NPUPluggableAllocator(memory_fragmentation_module_path, 'memory_fragmentation_malloc', 'memory_fragmentation_free') + torch_npu.npu.memory.change_current_allocator(new_alloc) + + myallocator = ctypes.CDLL(memory_fragmentation_module_path) + init_fn = ctypes.cast(getattr(myallocator, "memory_fragmentation_init"), ctypes.c_void_p).value + empty_fn = ctypes.cast(getattr(myallocator, "memory_fragmentation_empty_cache"), ctypes.c_void_p).value + memory_fraction_fn = ctypes.cast(getattr(myallocator, "memory_fragmentation_memory_fraction"), ctypes.c_void_p).value + get_device_stats_fn = ctypes.cast(getattr(myallocator, "memory_fragmentation_get_device_stats"), ctypes.c_void_p).value + reset_peak_status_fn = ctypes.cast(getattr(myallocator, "my_reset_peak_stats"), ctypes.c_void_p).value + + new_alloc.allocator().set_init_fn(init_fn) + new_alloc.allocator().set_reset_fn(empty_fn) + new_alloc.allocator().set_memory_fraction_fn(memory_fraction_fn) + new_alloc.allocator().set_get_device_stats_fn(get_device_stats_fn) + new_alloc.allocator().set_reset_peak_status_fn(reset_peak_status_fn) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/memory/smart_swap/__init__.py b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/memory/smart_swap/hooks.py b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/hooks.py new file mode 100644 index 000000000..64a1f4a41 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/hooks.py @@ -0,0 +1,242 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import torch + +from .swap_utils import print_with_rank, PrintLevel + + +def get_module_name(module: torch.nn.Module): + return module.__module__ + "." + module.__class__.__name__ + + +class SwapHookRegister: + id = 0 + + def __init__(self): + self.id = SwapHookRegister.id + SwapHookRegister.id += 1 + + self.fwd_pre_hook_handle = None + self.fwd_post_hook_handle = None + self.bwd_pre_hook_handle = None + self.bwd_post_hook_handle = None + self.fwd_begin_module: torch.nn.Module = None + self.fwd_end_module: torch.nn.Module = None + self.bwd_begin_module: torch.nn.Module = None + self.bwd_end_module: torch.nn.Module = None + self.fwd_idx = 0 + self.bwd_idx = 0 + self.prehook_handles = [] + self.posthook_handls = [] + + self.fwd_pre_hook_custom_func = None + self.fwd_post_hook_custom_func = None + self.bwd_pre_hook_custom_func = None + self.bwd_post_hook_custom_func = None + + def __del__(self): + r"""if not need swap hook to module, del it.""" + + self.reset() + + if self.fwd_pre_hook_handle: + self.fwd_pre_hook_handle.remove() + if self.fwd_post_hook_handle: + self.fwd_post_hook_handle.remove() + if self.bwd_pre_hook_handle: + self.bwd_pre_hook_handle.remove() + if self.bwd_post_hook_handle: + self.bwd_post_hook_handle.remove() + + def reset(self): + self.fwd_begin_module = None + self.fwd_end_module = None + self.bwd_begin_module = None + self.bwd_end_module = None + + self.fwd_idx = 0 + self.bwd_idx = 0 + for hdl in self.prehook_handles: + hdl.remove() + for hdl in self.posthook_handls: + hdl.remove() + self.prehook_handles.clear() + self.posthook_handls.clear() + + def register_custom_func( + self, fwd_pre_hook_custom_func, fwd_post_hook_custom_func, bwd_pre_hook_custom_func, bwd_post_hook_custom_func + ): + r""" + custom_func(instance_id, fwd_or_bwd_idx) + """ + self.fwd_pre_hook_custom_func = fwd_pre_hook_custom_func + self.fwd_post_hook_custom_func = fwd_post_hook_custom_func + self.bwd_pre_hook_custom_func = bwd_pre_hook_custom_func + self.bwd_post_hook_custom_func = bwd_post_hook_custom_func + + def print_with_rank(self, message, print_level=PrintLevel.DEBUG): + print_with_rank(message, prefix="SwapHook", print_level=print_level) + + def register_hook_to_grad_fn(self, input_tensor, position, is_bwd_pre): + + def grad_fn_bwd_pre_hook(grad_outputs): + self.bwd_idx += 1 + self.print_with_rank(f"grad_fn_bwd_pre_hook: bwd begin, id[{self.id}], bwd_idx[{self.bwd_idx}]") + # border + if self.bwd_pre_hook_custom_func: + self.bwd_pre_hook_custom_func(self.id, self.bwd_idx) + return grad_outputs + + def grad_fn_bwd_post_hook(grad_inputs, _): + self.print_with_rank(f"grad_fn_bwd_post_hook: bwd end, id[{self.id}], bwd_idx[{self.bwd_idx}]") + # border + if self.bwd_post_hook_custom_func: + self.bwd_post_hook_custom_func(self.id, self.bwd_idx) + return grad_inputs + + if is_bwd_pre: + self.print_with_rank(f"{position}, register grad_fn_bwd_pre_hook to grad_fn: {input_tensor.grad_fn}") + self.prehook_handles.append(input_tensor.grad_fn.register_prehook(grad_fn_bwd_pre_hook)) + else: + self.print_with_rank(f"{position}, register grad_fn_bwd_post_hook to grad_fn: {input_tensor.grad_fn}") + self.posthook_handls.append(input_tensor.grad_fn.register_hook(grad_fn_bwd_post_hook)) + + def register_hook_to_bwd_end_module(self, module, inputs, position): + if not self.bwd_end_module or (self.bwd_end_module and module is self.bwd_end_module): + if isinstance(inputs, torch.Tensor): + inputs = (inputs,) + if isinstance(inputs, tuple): + for input_item in inputs: + if not isinstance(input_item, torch.Tensor): + continue + if (input_item.requires_grad and not input_item.is_leaf) and input_item.grad_fn: + if not self.bwd_end_module: + self.bwd_end_module = module + self.print_with_rank(f"{position}, set bwd_end_module: {get_module_name(module)}") + + self.register_hook_to_grad_fn(input_item, position, is_bwd_pre=False) + break + + def register_hook_to_bwd_begin_module(self, module, inputs, position): + if self.bwd_begin_module and module is self.bwd_begin_module: + if isinstance(inputs, torch.Tensor): + inputs = (inputs,) + if isinstance(inputs, tuple): + for input_item in inputs: + if not isinstance(input_item, torch.Tensor): + continue + if (input_item.requires_grad and not input_item.is_leaf) and input_item.grad_fn: + + self.register_hook_to_grad_fn(input_item, position, is_bwd_pre=True) + break + + def fwd_pre_hook(self, module, args): + self.print_with_rank(f"fwd_pre_hook, {get_module_name(module)}") + + if not self.fwd_begin_module: + self.fwd_begin_module = module + self.fwd_end_module = module + self.bwd_begin_module = module + self.print_with_rank( + f"fwd_pre_hook: set fwd_begin_module, fwd_end_module and bwd_begin_module: {get_module_name(module)}" + ) + + if self.fwd_begin_module and module is self.fwd_begin_module: + self.fwd_idx += 1 + self.print_with_rank( + f"fwd_pre_hook: fwd begin, id[{self.id}], fwd_idx[{self.fwd_idx}], {get_module_name(module)}" + ) + # border + if self.fwd_pre_hook_custom_func: + self.fwd_pre_hook_custom_func(self.id, self.fwd_idx) + + self.register_hook_to_bwd_end_module(module, args, "fwd_pre_hook") + + return None + + def fwd_post_hook(self, module, _, outputs): + self.print_with_rank(f"fwd_post_hook, {get_module_name(module)}") + + if self.fwd_end_module and module is self.fwd_end_module: + self.print_with_rank( + f"fwd_post_hook: fwd end, id[{self.id}], fwd_idx[{self.fwd_idx}], {get_module_name(module)}" + ) + # border + if self.fwd_post_hook_custom_func: + self.fwd_post_hook_custom_func(self.id, self.fwd_idx) + + self.register_hook_to_bwd_begin_module(module, outputs, "fwd_post_hook") + self.register_hook_to_bwd_end_module(module, outputs, "fwd_post_hook") + + return None + + def register_hooks_to_modules_recursively(self, module, name=""): + self.print_with_rank(f"register_hooks_to_modules_recursively, {get_module_name(module)}") + + for child_name, child in module.named_children(): + self.register_hooks_to_modules_recursively(child, name + child_name) + + def module_fwd_pre_hook(module, args): + return self.fwd_pre_hook(module, args) + + def module_fwd_post_hook(module, args, outputs): + return self.fwd_post_hook(module, args, outputs) + + self.fwd_pre_hook_handle = module.register_forward_pre_hook(module_fwd_pre_hook) + self.fwd_post_hook_handle = module.register_forward_hook(module_fwd_post_hook) + + +def register_swap_hooks_to_modules( + module, + fwd_pre_hook_custom_func=None, + fwd_post_hook_custom_func=None, + bwd_pre_hook_custom_func=None, + bwd_post_hook_custom_func=None, +): + r""" + usage: + + # before training + models = [model_1, model_2, ...] + swap_hook_registers = [] + + def fwd_pre_hook_custom_func(swap_hook_register_id, fwd_idx): + ... + + def fwd_post_hook_custom_func(swap_hook_register_id, fwd_idx): + ... + + def bwd_pre_hook_custom_func(swap_hook_register_id, bwd_idx): + ... + + def bwd_post_hook_custom_func(swap_hook_register_id, bwd_idx): + ... + + for model in models: + import smart_swap + swap_hook_register = smart_swap.xxx.register_swap_hooks_to_modules(. + model, + fwd_pre_hook_custom_func, fwd_post_hook_custom_func + bwd_pre_hook_custom_func, bwd_post_hook_custom_func) + + swap_hook_registers.append(swap_hook_register) + + # when training + for step in range(train_steps): + for swap_hook_register in swap_hook_registers: + swap_hook_register.reset() + + train_step(xxx) + + # after training + for swap_hook_register in swap_hook_registers: + del swap_hook_register + + """ + + swap_hook_register = SwapHookRegister() + swap_hook_register.register_hooks_to_modules_recursively(module) + swap_hook_register.register_custom_func( + fwd_pre_hook_custom_func, fwd_post_hook_custom_func, bwd_pre_hook_custom_func, bwd_post_hook_custom_func + ) + + return swap_hook_register diff --git a/model/train/yoco_moe/mindspeed/core/memory/smart_swap/policy_generator.py b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/policy_generator.py new file mode 100644 index 000000000..2a5573e1c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/policy_generator.py @@ -0,0 +1,320 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import os +from typing import Dict, List + +import numpy as np + +from .swap_policy_config import swap_policy_config +from .swap_utils import print_with_rank, PrintLevel, timer +from .swap_cpp_adaptor import ( + ProfilerDataOneStep, + SwapPolicyCandidate, + TensorInfoDetail, + UniqueSwapPtr, + MemoryReductionInfo, + MemoryPeakInfo, + SwapStage, + SwapStageType, + SwapTensorType, +) +from .swap_arranger import TensorArranger + + +class PolicyGenerator: + def __init__(self, profiler_op_step: ProfilerDataOneStep): + self.size_coverage_weight = swap_policy_config.size_coverage_weight + + self.profiler_op_step = profiler_op_step + self.tensor_info_dict: Dict[UniqueSwapPtr, TensorInfoDetail] = {} + self.policy_candidate_list: List[SwapPolicyCandidate] = [] + self.intersect_candidates: List[SwapPolicyCandidate] = [] + self.swap_list: List[SwapPolicyCandidate] = [] + self.peak_list: List[MemoryReductionInfo] = [] + + self.candidate_selected: Dict[SwapPolicyCandidate, bool] = {} + self.memory_reduction_list = profiler_op_step.memory_reduction_list + # new data structure + self.mri_opid2idx = self.profiler_op_step.mri_opid2idx + self.memory_peaks = self.profiler_op_step.memory_peaks + self.swap_arranger = TensorArranger( + self.profiler_op_step, + os.path.join(swap_policy_config.output_root_path, f"Simulation_{swap_policy_config.rank}.html"), + swap_policy_config.duration_time, + ) + + def print_with_rank(self, message, print_level=PrintLevel.DEBUG): + print_with_rank(message, prefix="Policy", print_level=print_level) + + def reduction_target_satisfied(self): + for memory_reduction in self.memory_reduction_list: + if not memory_reduction.cleared(): + return False + self.print_with_rank("Successfully reach reduction target ...", print_level=PrintLevel.INFO) + return True + + def get_covered_reductions(self, candidate_list=None): + if not self.memory_reduction_list: + return + flag = 0 + if candidate_list is None: + flag = 1 + candidate_list = self.policy_candidate_list + for memory_info in self.memory_reduction_list: + memory_info.intersect_candidate_list.clear() + for candidate in candidate_list: + candidate.num_covered_reductions = 0 + swap_out_stage = self.profiler_op_step.layer_info.get_next_layer(candidate.swap_out_stage_actual) + swap_in_stage = candidate.swap_in_stage_actual + start_op_id = self.profiler_op_step.layer_info.layer_start_opid[swap_out_stage] + end_op_id = self.profiler_op_step.layer_info.layer_start_opid[swap_in_stage] + if start_op_id >= self.memory_reduction_list[-1].op_id or end_op_id <= self.memory_reduction_list[0].op_id: + candidate.start_mri_opid = -1 + candidate.end_mri_opid = -1 + candidate.num_covered_reductions = 0 + else: + # 二分法查找 + # find the mri with smallest opid that has opid >= start_op_id + start_mri_opid = self.get_closest_mri(start_op_id, cmp="ge") + # find the mri with largest opid that has opid < end_op_id + end_mri_opid = self.get_closest_mri(end_op_id, cmp="lt") + if end_mri_opid == end_op_id: + end_mri_opid = self.memory_reduction_list[self.mri_opid2idx[end_mri_opid] - 1].op_id + if start_mri_opid < start_op_id: + self.print_with_rank( + f"start_op_id={start_op_id}, end_op_id={end_op_id}, \ + start_mri_opid={start_mri_opid}, end_mri_opid={end_mri_opid}", + print_level=PrintLevel.INFO, + ) + if start_mri_opid < start_op_id: + raise ValueError("candidate.start_mri_opid should be >= than start_op_id") + if end_mri_opid > end_op_id: + self.print_with_rank( + f"start_op_id={start_op_id}, end_op_id={end_op_id}, \ + start_mri_opid={start_mri_opid}, end_mri_opid={end_mri_opid}", + print_level=PrintLevel.INFO, + ) + if end_mri_opid > end_op_id: + raise ValueError("candidate.end_mri_opid should be <= end_op_id") + # candidate增加属性:start_mri_opid, end_mri_opid, num_covered_reductions + if end_mri_opid < start_mri_opid: + candidate.start_mri_opid = -1 + candidate.end_mri_opid = -1 + candidate.num_covered_reductions = 0 + else: + candidate.start_mri_opid = start_mri_opid + candidate.end_mri_opid = end_mri_opid + # 计算candidate能cover的mri的个数,通过mri_opid2idx的map算start_mri_opid和end_mri_opid之间的mri的个数 + candidate.num_covered_reductions = ( + self.mri_opid2idx[end_mri_opid] - self.mri_opid2idx[start_mri_opid] + 1 + ) + if flag: + if candidate.start_mri_opid != -1 and candidate.end_mri_opid != -1: + for mri_idx in range(self.mri_opid2idx[start_mri_opid], self.mri_opid2idx[end_mri_opid] + 1): + self.memory_reduction_list[mri_idx].intersect_candidate_list.append(candidate) + + def get_closest_mri(self, target_opid, cmp="ge"): + """ + Binary search for the opid closest to target_opid. + cmp: + 'ge': result opid greater than or equal to target_opid; + 'lt': result opid less than target_opid; + """ + p1 = 0 + p2 = len(self.memory_reduction_list) - 1 + if cmp not in ["ge", "lt"]: + raise ValueError("For now only support cmp='ge' or cmp='lt' ") + while p1 < p2 - 1: + mid = (p1 + p2) // 2 + mid_opid = self.memory_reduction_list[mid].op_id + if mid_opid == target_opid: + return mid_opid + elif mid_opid < target_opid: + p1 = mid + elif mid_opid > target_opid: + p2 = mid + if cmp == "ge": + if self.memory_reduction_list[p1].op_id >= target_opid: + return self.memory_reduction_list[p1].op_id + else: + return self.memory_reduction_list[p2].op_id + elif cmp == "lt": + if self.memory_reduction_list[p2].op_id < target_opid: + return self.memory_reduction_list[p2].op_id + else: + return self.memory_reduction_list[p1].op_id + + def update_memory_reduction(self, candidate_list: List[SwapPolicyCandidate]): + self.get_covered_reductions(candidate_list) + for candidate in candidate_list: + if candidate.start_mri_opid != -1 and candidate.end_mri_opid != -1: + for mri_idx in range( + self.mri_opid2idx[candidate.start_mri_opid], self.mri_opid2idx[candidate.end_mri_opid] + 1 + ): + mri = self.memory_reduction_list[mri_idx] + mri.update_memory_reduction_need(-candidate.tensor.info.size) + + @timer + def select_candidate(self): + self.tensor_info_dict.clear() + for op in self.profiler_op_step.op_list: + for tensor in op.tensor_list: + tensor_info = self.tensor_info_dict.setdefault(tensor.ptr, TensorInfoDetail(tensor)) + tensor_info.update_op(op) + + for detail_tensor in self.tensor_info_dict.values(): + detail_tensor.policy_candidate_list.clear() + if ( + not detail_tensor.is_used_multiple_times() + or detail_tensor.info.tensor_type == SwapTensorType.SHARED_MEMORY + or detail_tensor.info.size < swap_policy_config.tensor_size_filter + ): + continue + if detail_tensor.info.tensor_type == SwapTensorType.OPTIM: + self.select_optim_tensor(detail_tensor) + elif detail_tensor.info.tensor_type in (SwapTensorType.MODEL, SwapTensorType.OTHERS): + self.select_model_tensor(detail_tensor) + + self.policy_candidate_list = list( + set().union(*[i.policy_candidate_list for i in self.tensor_info_dict.values()]) + ) + self.candidate_selected = dict([(candidate, False) for candidate in self.policy_candidate_list]) + self.get_covered_reductions() + + def select_optim_tensor(self, detail_tensor: TensorInfoDetail): + first_op = detail_tensor.used_op_list[0] + if first_op.stage.stage_type != SwapStageType.OPTIM: + return + swap_out_stage = SwapStage(stage_type=SwapStageType.FWD, micro_batch_index=1, layer_index=1) + swap_in_stage = SwapStage(stage_type=SwapStageType.OPTIM, micro_batch_index=0, layer_index=0) + swap_policy_candidate = SwapPolicyCandidate( + detail_tensor, is_optimizer_or_weight=True, swap_out_stage=swap_out_stage, swap_in_stage=swap_in_stage + ) + detail_tensor.policy_candidate_list.append(swap_policy_candidate) + return + + # 找到FWD最后一次使用和BWD第一次使用 + def select_model_tensor(self, detail_tensor: TensorInfoDetail): + if any(op.stage.stage_type == SwapStageType.OPTIM for op in detail_tensor.used_op_list): + return + fwd_last_op = None + bwd_first_op = None + for op in detail_tensor.used_op_list: + if op.stage.stage_type == SwapStageType.FWD and (fwd_last_op is None or fwd_last_op.op_id < op.op_id): + fwd_last_op = op + if op.stage.stage_type == SwapStageType.BWD and (bwd_first_op is None or bwd_first_op.op_id > op.op_id): + bwd_first_op = op + if fwd_last_op and bwd_first_op: + swap_policy_candidate = SwapPolicyCandidate( + detail_tensor, is_optimizer_or_weight=False, swap_out_op=fwd_last_op, swap_in_op=bwd_first_op + ) + detail_tensor.policy_candidate_list.append(swap_policy_candidate) + return + + def compute_score(self): + if not self.policy_candidate_list: + return + tensor_info_sizes = [i.tensor.info.size for i in self.policy_candidate_list] + max_size = max(tensor_info_sizes) + min_size = min(tensor_info_sizes) + max_size = max_size ** (1 / 3) + min_size = min_size ** (1 / 3) + size_range = max(0.001, max_size - min_size) + + coverages = [i.num_covered_reductions for i in self.policy_candidate_list] + max_coverage = max(coverages) + min_coverage = min(coverages) + coverage_range = max(0.001, max_coverage - min_coverage) + + for candidate in self.policy_candidate_list: + normalized_coverage = (candidate.num_covered_reductions - min_coverage) / coverage_range + normalized_size = (candidate.tensor.info.size ** (1 / 3) - min_size) / size_range + candidate.score = normalized_coverage + self.size_coverage_weight * normalized_size + + def get_peak_list(self): + # Select the maximum mri value from the top mri of each MemoryPeakInfo (self.memory_peaks) + # so each iteration only one peak is selected. + self.peak_list.clear() + + def get_max_for_each_mp(mp: MemoryPeakInfo): + """ + 找到每个MemoryPeak区间内对应的MemoryReductionInfo当前的最大memory_reduction_need + """ + if mp.mp_mri_start_opid == -1 or mp.mp_mri_end_opid == -1: + return None + start_idx = self.mri_opid2idx[mp.mp_mri_start_opid] + end_idx = self.mri_opid2idx[mp.mp_mri_end_opid] + 1 + mri_list = self.memory_reduction_list[start_idx:end_idx] + mrn = [mri.memory_reduction_need for mri in mri_list] + max_idx = np.argmax(mrn) + self.print_with_rank( + f"current top mri in MemoryPeakInfo is {mri_list[max_idx]}", print_level=PrintLevel.INFO + ) + return mri_list[max_idx] + + mp_max = [(i, get_max_for_each_mp(mp)) for i, mp in enumerate(self.memory_peaks)] + for mp in mp_max: + self.print_with_rank(f"top mri from each MemoryPeakInfo {mp[1]}", print_level=PrintLevel.INFO) + mp_max_list = np.array([0 if not item[1] else item[1].memory_reduction_need for item in mp_max]) + self.print_with_rank(f"top mri from each MemoryPeakInfo {[mp_max_list]}", print_level=PrintLevel.INFO) + selected_peak_idx = np.argmax(mp_max_list) + self.peak_list = [mp_max[selected_peak_idx][1]] + + def get_intersect_candidates(self): + self.get_peak_list() + self.intersect_candidates.clear() + self.print_with_rank(f"len of peak list is {len(self.peak_list)}", print_level=PrintLevel.INFO) + peak = self.peak_list[0] + if not peak: + return + self.intersect_candidates = [ + cand for cand in peak.intersect_candidate_list if not self.candidate_selected[cand] + ] + self.intersect_candidates.sort(key=lambda x: (-x.score, x.start_mri_opid)) + self.print_with_rank( + f"len of self.intersect_candidates after {len(self.intersect_candidates)}", print_level=PrintLevel.INFO + ) + + def simulation_select(self): + reduction_need = self.peak_list[0].memory_reduction_need + selected_candidates = [] + for cand in self.intersect_candidates: + if not self.swap_arranger.cause_delay(cand): + selected_candidates.append(cand) + reduction_need -= cand.tensor.info.size + if reduction_need <= 0: + return selected_candidates, False + if not selected_candidates: + return [self.intersect_candidates[0]], True + return selected_candidates, False + + def simulation(self, use_custom_policy=False): + if use_custom_policy: + selected_candidates = self.policy_candidate_list + cause_delay = False + else: + selected_candidates, cause_delay = self.simulation_select() + self.print_with_rank(f"selected_candidates have {len(selected_candidates)} cands", print_level=PrintLevel.DEBUG) + self.swap_list.extend(selected_candidates) + self.swap_arranger.run(selected_candidates, self.swap_list, delay=cause_delay) + self.update_memory_reduction(selected_candidates) + for cand in selected_candidates: + self.candidate_selected[cand] = True + + def get_sorted_swap_list(self): + """ + Sort swap_list by: primary key: swap_out time; secondary key: tensor size reverse + """ + swap_list_out_opid = [ + ( + candidate, + ( + self.profiler_op_step.layer_info.layer_start_opid[candidate.swap_out_stage] + if candidate.is_optimizer_or_weight + else candidate.swap_out_op.op_id + ), + ) + for candidate in self.swap_list + ] + swap_list_out_opid = sorted(swap_list_out_opid, key=lambda item: (item[1], -item[0].tensor.info.size)) + swap_list = [candidate for (candidate, out_opid) in swap_list_out_opid] + return swap_list diff --git a/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_adaptor.py b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_adaptor.py new file mode 100644 index 000000000..bec50c4de --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_adaptor.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import ctypes +import torch_npu + +from mindspeed.op_builder import SmartSwapBuilder + + +class SmartSwapAdaptor(object): + SMART_SWAP_MODULE = None + + def __init__(self): + pass + + +def load_smart_swap_module(): + if SmartSwapAdaptor.SMART_SWAP_MODULE is None: + SmartSwapAdaptor.SMART_SWAP_MODULE = SmartSwapBuilder().load() + return SmartSwapAdaptor.SMART_SWAP_MODULE + + +def change_allocator(): + smart_swap_cpp = load_smart_swap_module() + smart_swap_module_path = smart_swap_cpp.__file__ + + new_alloc = torch_npu.npu.memory.NPUPluggableAllocator(smart_swap_module_path, "gmlake_malloc", "gmlake_free") + torch_npu.npu.memory.change_current_allocator(new_alloc) + + myallocator = ctypes.CDLL(smart_swap_module_path) + init_fn = ctypes.cast(getattr(myallocator, "gmlake_init"), ctypes.c_void_p).value + empty_fn = ctypes.cast(getattr(myallocator, "gmlake_empty_cache"), ctypes.c_void_p).value + memory_fraction_fn = ctypes.cast(getattr(myallocator, "gmlake_memory_fraction"), ctypes.c_void_p).value + get_device_stats_fn = ctypes.cast(getattr(myallocator, "gmlake_get_device_stats"), ctypes.c_void_p).value + reset_peak_stats_fn = ctypes.cast(getattr(myallocator, "gmlake_reset_peak_stats"), ctypes.c_void_p).value + record_stream_fn = ctypes.cast(getattr(myallocator, "gmlake_record_stream"), ctypes.c_void_p).value + erase_stream_fn = ctypes.cast(getattr(myallocator, "gmlake_erase_stream"), ctypes.c_void_p).value + + new_alloc.allocator().set_init_fn(init_fn) + new_alloc.allocator().set_reset_fn(empty_fn) + new_alloc.allocator().set_memory_fraction_fn(memory_fraction_fn) + new_alloc.allocator().set_get_device_stats_fn(get_device_stats_fn) + new_alloc.allocator().set_reset_peak_status_fn(reset_peak_stats_fn) + new_alloc.allocator().set_record_stream_fn(record_stream_fn) + new_alloc.allocator().set_erase_stream_fn(erase_stream_fn) diff --git a/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_arranger.py b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_arranger.py new file mode 100644 index 000000000..64f6753f9 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_arranger.py @@ -0,0 +1,205 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +from typing import List + +import numpy as np + +from .swap_policy_config import swap_policy_config +from .swap_utils import print_with_rank, PrintLevel +from .swap_cpp_adaptor import ProfilerDataOneStep, SwapPolicyCandidate, SwapStage + + +class TensorArrangerBase: + def __init__(self, profiler_op_step: ProfilerDataOneStep, output_file_path, duration_time): + self.op_list = profiler_op_step.op_list + self.profiler_op_step = profiler_op_step + self.duration_time = duration_time + self.stage_data = [] + self.stage_map = {} + self.stage_index_map = {} + self.stage_start_time = dict() + self.stage_end_time = dict() + self.set_data() + self.candidate_index = 0 + self.output_file_path = output_file_path + + self.D2H_bandwidth = swap_policy_config.D2H_bandwidth + self.H2D_bandwidth = swap_policy_config.H2D_bandwidth + self.color_map = { + "SwapStageType.INIT": "yellow", + "SwapStageType.FWD": "red", + "SwapStageType.BWD": "blue", + "SwapStageType.OPTIM": "purple", + "Delay": "green", + } + + def print_with_rank(self, message, print_level=PrintLevel.DEBUG): + print_with_rank(message, prefix="SwapEngine", print_level=print_level) + + def set_data(self): + time_line = list(np.linspace(0, self.duration_time, len(self.op_list) + 1))[1:] + for index, op in enumerate(self.op_list): + if not self.stage_data or op.stage != self.stage_data[-1]["stage"]: + self.stage_data.append( + { + "op_id": op.op_id, + "stage": op.stage, + "stage_type": str(op.stage.stage_type), + "start_time": time_line[index], + "type": "op_stream", + "candidate_index": -1, + } + ) + self.stage_data[-1]["end_time"] = time_line[index] + for index, row in enumerate(self.stage_data): + if row["stage"] in self.stage_map: + raise ValueError("Find duplicate stage ...") + self.stage_index_map[index] = row["stage"] + self.stage_map[row["stage"]] = { + "index": index, + "start_time": row["start_time"], + "end_time": row["end_time"], + "layer_time": row["end_time"] - row["start_time"], + "time_left": row["end_time"] - row["start_time"], + "candidate_list": [], + } + self.stage_end_time[index] = row["end_time"] + self.stage_start_time[index] = row["start_time"] + + def get_swap_time(self, size): + swap_out_time = size / 1024 / 1024 / self.D2H_bandwidth + swap_in_time = size / 1024 / 1024 / self.H2D_bandwidth + return swap_out_time, swap_in_time + + def reset_simulation(self): + self.candidate_index = 0 + for stage in self.stage_map: + self.stage_map[stage]["candidate_list"] = [] + self.stage_map[stage]["time_left"] = self.stage_map[stage]["layer_time"] + self.stage_time_left = dict() + + def set_swapin_free_stage_to_candidate(self, cur_time, candidate): + swap_in_free_stage_index = self.stage_map[candidate.swap_in_stage_actual]["index"] + # 首先swap_in后实际释放的时机设置为swap_in_stage_actual的后一个 + # 由于在排布时为了减少实际执行中计算流等待swap流swap in的情况, + # 所有candidate的swap_in_stage_actual设置都至少比理论上计算流需要的stage提前了一个stage + # 因此这里将所有candidate实际swap in释放的stage设置为swap_in_stage_actual的后一个stage,一定不会超出所有stage的边界 + candidate.swap_in_free_stage = self.stage_index_map[swap_in_free_stage_index + 1] + for index, stage in self.stage_index_map.items(): + # 如果当前candidate在排布中实际swap in结束时间所在stage, + # 加上延迟free的stage数后没有超过总stage数边界, + # 则将实际swap in 释放stage设置为排布获得的swap in结束时间所在stage再往后延swap_in_free_stage_delay个stage + if ( + index < len(self.stage_index_map) - swap_policy_config.swap_in_free_stage_delay + and cur_time < self.stage_end_time[index] + ): + candidate.swap_in_free_stage = self.stage_index_map[index + swap_policy_config.swap_in_free_stage_delay] + return + + def set_free_stage_to_candidate(self, cur_time, candidate): + candidate.free_stage = candidate.swap_in_stage_actual + for index, _ in self.stage_index_map.items(): + if ( + index < len(self.stage_index_map) - swap_policy_config.free_stage_delay + and cur_time < self.stage_end_time[index] + ): + candidate.free_stage = self.stage_index_map[index + swap_policy_config.free_stage_delay] + return + + def set_free_stage(self): + for index, stage in self.stage_index_map.items(): + value = self.stage_map[stage] + time_left = self.stage_time_left[value["index"]] + start_time = self.stage_start_time[index] - time_left + value["time_left"] + cur_time = start_time + + # Initialize an empty list to store swap information for each candidate + swap_list_out_opid = [] + + # Iterate through each item in the candidate list + for swap_stage, swap_time, stream_type, candidate_index, candidate in value["candidate_list"]: + # Determine operation ID based on candidate type + if candidate.is_optimizer_or_weight: + op_id = self.profiler_op_step.layer_start_opid[candidate.swap_out_stage] + else: + op_id = candidate.swap_out_op.op_id + + # Append a tuple with the relevant information to the list + swap_list_out_opid.append((swap_stage, swap_time, stream_type, candidate_index, candidate, op_id)) + + swap_list_out_opid = sorted(swap_list_out_opid, key=lambda item: (item[-1], -item[-2].tensor.info.size)) + value["candidate_list"] = [ + (swap_stage, swap_time, stream_type, candidate_index, candidate) + for swap_stage, swap_time, stream_type, candidate_index, candidate, _ in swap_list_out_opid + ] + + for swap_stage, swap_time, stream_type, candidate_index, candidate in value["candidate_list"]: + cur_time += swap_time + if stream_type == "swap_out_stream": + self.set_free_stage_to_candidate(cur_time, candidate) + elif stream_type == "swap_in_stream": + self.set_swapin_free_stage_to_candidate(cur_time, candidate) + + +class TensorArranger(TensorArrangerBase): + def __init__(self, profiler_op_step: ProfilerDataOneStep, output_file_path, duration_time): + super(TensorArranger, self).__init__(profiler_op_step, output_file_path, duration_time) + self.profiler_op_step = profiler_op_step + self.stage_time_left = dict() + + def calculate_time_left(self, find_index): + time_left = 0 + for index in range(find_index + 1): + time_left = min(0, time_left) + time_left += self.stage_map[self.stage_index_map[index]]["time_left"] + return time_left + + def save_stage_time_left(self): + time_left = 0 + for index, stage in self.stage_index_map.items(): + time_left = min(0, time_left) + time_left += self.stage_map[stage]["time_left"] + self.stage_time_left[index] = time_left + + def get_layer_time_excess(self, layer: SwapStage, swap_time): + return self.stage_map[layer]["time_left"] - swap_time + + def cause_delay(self, candidate: SwapPolicyCandidate): + swap_out_time, swap_in_time = self.get_swap_time(candidate.tensor.info.size) + swap_out_affected = self.get_layer_time_excess(candidate.swap_out_stage, swap_out_time) + swap_in_stage_index = self.stage_map[candidate.swap_in_stage]["index"] + swap_in_stage_index = swap_in_stage_index - 1 + swap_in_stage = self.stage_index_map[swap_in_stage_index] + swap_in_affected = self.get_layer_time_excess(swap_in_stage, swap_in_time) + return swap_out_affected < 0 or swap_in_affected < 0 + + def run(self, candidates: List[SwapPolicyCandidate], _: List[SwapPolicyCandidate], delay=False): + """ + delay: if False, then items in candidates would not cause delay in current simulation + """ + for cand in candidates: + swap_out_stage = cand.swap_out_stage + swap_in_stage = cand.swap_in_stage + swap_out_stage_index = self.stage_map[swap_out_stage]["index"] + swap_in_stage_index = self.stage_map[swap_in_stage]["index"] + swap_out_time, swap_in_time = self.get_swap_time(cand.tensor.info.size) + swap_in_stage_index = swap_in_stage_index - 1 + swap_in_stage = self.stage_index_map[swap_in_stage_index] + self.stage_map[swap_out_stage]["candidate_list"].append( + (swap_out_stage, swap_out_time, "swap_out_stream", self.candidate_index, cand) + ) + self.stage_map[swap_out_stage]["time_left"] -= swap_out_time + if delay: + find_flag = False + for find_swap_in_index in range(swap_in_stage_index, swap_out_stage_index, -1): + time_left = self.calculate_time_left(find_swap_in_index) + if time_left > swap_in_time: + swap_in_stage = self.stage_index_map[find_swap_in_index] + find_flag = True + break + if not find_flag: + swap_in_stage = self.stage_index_map[swap_in_stage_index] + cand.swap_in_stage_actual = swap_in_stage + self.stage_map[swap_in_stage]["candidate_list"].append( + (swap_in_stage, swap_in_time, "swap_in_stream", self.candidate_index, cand) + ) + self.stage_map[swap_in_stage]["time_left"] -= swap_in_time diff --git a/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_cpp_adaptor.py b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_cpp_adaptor.py new file mode 100644 index 000000000..9e7a8c087 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_cpp_adaptor.py @@ -0,0 +1,1206 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +from collections import Counter +from enum import Enum +from typing import List, Dict +from dataclasses import dataclass +from bisect import bisect_right, bisect_left + +import torch + +from .swap_policy_config import swap_policy_config +from .swap_utils import print_with_rank, PrintLevel +from .swap_adaptor import load_smart_swap_module + + +def get_smart_swap_cpp(): + return load_smart_swap_module() + + +class SwapTensorType(Enum): + MODEL = get_smart_swap_cpp().SwapTensorType.MODEL + OPTIM = get_smart_swap_cpp().SwapTensorType.OPTIM + SHARED_MEMORY = get_smart_swap_cpp().SwapTensorType.SHARED_MEMORY + OTHERS = get_smart_swap_cpp().SwapTensorType.OTHERS + RESERVED = get_smart_swap_cpp().SwapTensorType.RESERVED + + +class SwapStageType(Enum): + INIT = get_smart_swap_cpp().SwapStageType.INIT + FWD = get_smart_swap_cpp().SwapStageType.FWD + BWD = get_smart_swap_cpp().SwapStageType.BWD + OPTIM = get_smart_swap_cpp().SwapStageType.OPTIM + RESERVED = get_smart_swap_cpp().SwapStageType.RESERVED + + +def record_tensor_ptr_with_types( + tensors: List[torch.Tensor], tensor_type: SwapTensorType, update_weak_ptr_map=0, is_update_blacklist=False +): + # 调用下面的函数时,当前在c++侧会自动clear其维护的map + return get_smart_swap_cpp().recordTensorPtrWithTypes(tensors, tensor_type.value, update_weak_ptr_map, is_update_blacklist) + + +class SwapStage: + def __init__(self, cpp_instance=None, stage_type=None, micro_batch_index=None, layer_index=None): + self.stage_type: SwapStageType = None + self.micro_batch_index = None + self.layer_index = None + + if cpp_instance: + self.from_cpp(cpp_instance) + if stage_type is not None: + self.stage_type = stage_type + if micro_batch_index is not None: + self.micro_batch_index = micro_batch_index + if layer_index is not None: + self.layer_index = layer_index + + def __eq__(self, other): + if not isinstance(other, SwapStage): + return NotImplemented + return ( + self.stage_type == other.stage_type + and self.micro_batch_index == other.micro_batch_index + and self.layer_index == other.layer_index + ) + + def __ne__(self, other): + if not isinstance(other, SwapStage): + return NotImplemented + return not self.__eq__(other) + + def __hash__(self): + return hash((self.stage_type, self.micro_batch_index, self.layer_index)) + + def copy(self): + # return a python SwapStage copy + instance = SwapStage() + instance.stage_type = self.stage_type + instance.micro_batch_index = self.micro_batch_index + instance.layer_index = self.layer_index + return instance + + def from_cpp(self, instance): + self.stage_type = SwapStageType(instance.stageType) + self.micro_batch_index = instance.microBatchIndex + self.layer_index = instance.layerIndex + + def to_cpp(self, instance): + instance.stageType = self.stage_type.value + instance.microBatchIndex = self.micro_batch_index + instance.layerIndex = self.layer_index + + def __str__(self): + ret = dict(stage_type=self.stage_type.name, mbi=self.micro_batch_index, li=self.layer_index) + return str(ret) + + def calculate_layer_index(self, stage_op_idx, fwd_layer_info, bwd_layer_info): + # stage_op_idx: op_idx starting from the current stage + # op_layer_info: fwd_op_layer_info, or bwd_op_layer_info + self.layer_index = 0 + if self.stage_type == SwapStageType.FWD: + op_layer_info = fwd_layer_info + elif self.stage_type == SwapStageType.BWD: + op_layer_info = bwd_layer_info + elif self.stage_type == SwapStageType.OPTIM or self.stage_type == SwapStageType.INIT: + self.layer_index = 0 + return self.layer_index + else: + raise RuntimeError(f"calculate_layer_index error, stage_type={self.stage_type}") + + for i, op_layer_info_value in enumerate(op_layer_info): + if stage_op_idx <= op_layer_info_value: + self.layer_index = i + 1 # layerIndex 从1开始 + break + if self.layer_index == 0: + self.layer_index = len(op_layer_info) + 1 + return self.layer_index + + +class SwapConfig: + def __init__(self): + self.cpp_config = get_smart_swap_cpp().NPUSwapManager.GetInstance().config + + def dict(self): + return dict( + micro_batch_num=self.micro_batch_num, + layer_num=self.layer_num, + is_oom=self.is_oom, + stage=str(self.stage), + step=self.step, + one_step_duration=self.one_step_duration, + policy_step=self.policy_step, + current_stage_op_id=self.current_stage_op_id, + enable_profiler=self.enable_profiler, + enable_executor=self.enable_executor, + fwd_op_layer_info=self.fwd_op_layer_info, + bwd_op_layer_info=self.bwd_op_layer_info, + enable_custom_record_stream=self.enable_custom_record_stream, + ) + + @property + def micro_batch_num(self): + return self.cpp_config.microBatchNum + + @micro_batch_num.setter + def micro_batch_num(self, value): + self.cpp_config.microBatchNum = value + + @property + def layer_num(self): + return self.cpp_config.layerNum + + @layer_num.setter + def layer_num(self, value): + self.cpp_config.layerNum = value + + @property + def is_oom(self): + return self.cpp_config.isOOM + + @is_oom.setter + def is_oom(self, value): + self.cpp_config.isOOM = value + + @property + def stage(self) -> SwapStage: + stage = SwapStage() + stage.from_cpp(self.cpp_config.stage) + return stage + + @stage.setter + def stage(self, value: SwapStage): + value.to_cpp(self.cpp_config.stage) + + @property + def step(self): + return self.cpp_config.step + + @property + def next_step(self): + return self.step + 1 + + @step.setter + def step(self, value): + self.cpp_config.step = value + + @property + def one_step_duration(self): + return self.cpp_config.oneStepDuration + + @one_step_duration.setter + def one_step_duration(self, value): + self.cpp_config.oneStepDuration = value + + @property + def policy_step(self): + return self.cpp_config.policyStep + + @policy_step.setter + def policy_step(self, value): + self.cpp_config.policyStep = value + + @property + def current_stage_op_id(self): + return self.cpp_config.currentStageOpId + + @current_stage_op_id.setter + def current_stage_op_id(self, value): + self.cpp_config.currentStageOpId = value + + @property + def enable_profiler(self): + return self.cpp_config.enableProfiler + + @enable_profiler.setter + def enable_profiler(self, value): + self.cpp_config.enableProfiler = value + + @property + def enable_executor(self): + return self.cpp_config.enableExecutor + + @enable_executor.setter + def enable_executor(self, value): + self.cpp_config.enableExecutor = value + + @property + def enable_custom_record_stream(self): + return self.cpp_config.enableCustomRecordStream + + @enable_custom_record_stream.setter + def enable_custom_record_stream(self, value): + self.cpp_config.enableCustomRecordStream = value + + @property + def tensor_size_thresh(self): + return self.cpp_config.tensorSizeThresh + + @tensor_size_thresh.setter + def tensor_size_thresh(self, value): + self.cpp_config.tensorSizeThresh = value + + @property + def fwd_op_layer_info(self): + return self.cpp_config.fwdOpLayerInfo + + @fwd_op_layer_info.setter + def fwd_op_layer_info(self, value): + self.cpp_config.fwdOpLayerInfo = value + + @property + def bwd_op_layer_info(self): + return self.cpp_config.bwdOpLayerInfo + + @bwd_op_layer_info.setter + def bwd_op_layer_info(self, value): + self.cpp_config.bwdOpLayerInfo = value + + +class UniqueSwapPtr: + def __init__(self, cpp_instance=None, ptr_base=None, index=None): + self.ptr_base = None + self.index = None + + if cpp_instance: + self.from_cpp(cpp_instance) + if ptr_base: + self.ptr_base = ptr_base + if index: + self.index = index + + def from_cpp(self, instance): + self.ptr_base = instance.ptrBase + self.index = instance.index + + def to_cpp(self, instance): + instance.ptrBase = self.ptr_base + instance.index = self.index + + def __str__(self): + return f"{self.ptr_base}_{self.index}" + + def __eq__(self, other): + if not isinstance(other, UniqueSwapPtr): + return NotImplemented + return self.ptr_base == other.ptr_base and self.index == other.index + + def __ne__(self, other): + if not isinstance(other, UniqueSwapPtr): + return NotImplemented + return not self.__eq__(other) + + def __hash__(self): + return hash((self.ptr_base, self.index)) + + +class ProfilerTensorInfo: + def __init__(self, tensor_dict): + self.origi_ptr = UniqueSwapPtr(cpp_instance=tensor_dict["ptr"]) + self.ptr = UniqueSwapPtr(cpp_instance=tensor_dict["ptr"]) + self.size = tensor_dict["size"] + self.shape = tensor_dict["shape"] + self.dtype = tensor_dict["dtype"] + self.tensor_type = SwapTensorType(tensor_dict["tensorType"]) + + def get_dict(self): + ret = dict( + ptr=str(self.ptr), size=self.size, shape=self.shape, dtype=self.dtype, tensor_type=self.tensor_type.name + ) + return ret + + def __str__(self): + return str(self.get_dict()) + + +class ProfilerOpInfo: + def __init__(self, op_dict): + self.op_name = op_dict["opName"] + self.op_id = op_dict["opId"] + self.stage = SwapStage(cpp_instance=op_dict["stage"]) + self.step = op_dict["step"] + self.allocated_bytes = op_dict["allocated_bytes"] + self.reserved_bytes = op_dict["reserved_bytes"] + self.active_bytes = op_dict["active_bytes"] + self.tensor_list = [] + + tensor_list = op_dict["tensor"] + for tensor in tensor_list: + self.tensor_list.append(ProfilerTensorInfo(tensor)) + + def print_dict(self): + return dict( + name=self.op_name, + op_id=self.op_id, + stage=str(self.stage), + tensor_list=[str(tensor) for tensor in self.tensor_list], + ) + + def print_dict_brief(self): + return dict(name=self.op_name, op_id=self.op_id, stage=str(self.stage)) + + def __str__(self) -> str: + return str( + dict( + name=self.op_name, + op_id=self.op_id, + stage=str(self.stage), + tensor_list=[str(tensor) for tensor in self.tensor_list], + ) + ) + + def get_brief_dict(self): + return str(dict(name=self.op_name, op_id=self.op_id, stage=str(self.stage))) + + def __eq__(self, other): + if not isinstance(other, ProfilerOpInfo): + return NotImplemented + return self.op_name == other.op_name and self.op_id == other.op_id and self.stage == other.stage + + def __ne__(self, other): + if not isinstance(other, ProfilerOpInfo): + return NotImplemented + return not self.__eq__(other) + + def __hash__(self): + return hash((self.op_name, self.op_id, self.stage)) + + def __lt__(self, other): + if not isinstance(other, ProfilerOpInfo): + return NotImplemented + return self.op_id < other.op_id + + def __gt__(self, other): + if not isinstance(other, ProfilerOpInfo): + return NotImplemented + return self.op_id > other.op_id + + +class ProfilerSwapInfo: + def __init__(self, swap_dict): + self.op_id = swap_dict["opId"] + self.swap_name = swap_dict["swapName"] + self.size = swap_dict["size"] + self.is_oom = swap_dict["isOOM"] + self.src_ptr = UniqueSwapPtr(swap_dict["srcPtr"]) + self.dst_ptr = UniqueSwapPtr(swap_dict["dstPtr"]) + + def print_dict(self): + return dict( + op_id=self.op_id, + swap_name=self.swap_name, + size=str(self.size), + is_oom=self.is_oom, + src_ptr=str(self.src_ptr), + dst_ptr=str(self.dst_ptr), + ) + + def __str__(self) -> str: + return str( + dict( + op_id=self.op_id, + swap_name=self.swap_name, + size=str(self.size), + is_oom=self.is_oom, + src_ptr=str(self.src_ptr), + dst_ptr=str(self.dst_ptr), + ) + ) + + +class MemoryReductionInfo: + # 适用于1.去除OOM 2.通过策略下降xxxG峰值内存 两种情况 + def __init__(self, op, memory_reduction_total): + self.op = op + self.op_id = op.op_id + self.memory_reduction_need = memory_reduction_total + self.memory_reduction_total = memory_reduction_total + self.intersect_candidate_list: List[SwapPolicyCandidate] = [] + + def __str__(self): + return ( + f"Reduction_need:{self.memory_reduction_need}, " + f"Reduction_total:{self.memory_reduction_total}, " + f"OP: {self.op.get_brief_dict()}" + ) + + def update_memory_reduction_need(self, amount): + self.memory_reduction_need += amount + + def cleared(self): + return self.memory_reduction_need <= 0 + + def check_in_list(self, memory_reduction_list): + # precondition: memory_reduction_list is sorted according to op_id + if not memory_reduction_list or len(memory_reduction_list) == 0: + return False + if memory_reduction_list[0].op_id > self.op_id: + return False + if memory_reduction_list[-1].op_id < self.op_id: + return False + return True + + def print_dict(self): + ret = dict( + op_id=str(self.op_id), + op_name=str(self.op.op_name), + memory_reduction_need=str(self.memory_reduction_need), + memory_reduction_total=str(self.memory_reduction_total), + ) + return ret + + def print_dict_op(self): + return self.op.print_dict() + + +@dataclass +class MemoryPeakInfo: + """ + 模型运行中根据内存曲线进行抽象得到的数据结构。 + 以bwd-fwd的交替为标志,每个MemoryPeak为相邻两次bwd-fwd交替之间的op序列, + 代表内存曲线从一个local minima升至local maxima再降至local minima的区间 + 例如:在非PP的1F1B场景下,每个microbatch(一次前向后一次反向)为一个MemoryPeak; + 在PP场景中,如果stage序列为fwd1->fwd2->bwd1->fwd3->bwd2->fwd4->bwd3->bwd4, + 则第一个MemoryPeak为fwd1至bwd1, 第二个MemoryPeak为fwd3->bwd2, 等等 + + MemoryPeakInfo记录每个MemoryPeak的信息 + start_opid: 当前MemoryPeak开始的opid (这个MemoryPeak区间中第一个前向阶段的第一个op的opid) + end_opid: 当前MemoryPeak结束的opid (这个MemoryPeak中最后一个反向阶段的最后一个op的opid) + mp_mri_start_opid: 在这个MemoryPeak区间内第一处需要降内存(MemoryReductionInfo)的opid + mp_mri_end_opid: 在这个MemoryPeak区间内最后一处需要降内存(MemoryReductionInfo)的opid + """ + + start_opid: int + end_opid: int + mp_mri_start_opid: int = -1 + mp_mri_end_opid: int = -1 + + def print_with_rank(self, message, print_level=PrintLevel.DEBUG): + print_with_rank(message, prefix="MemoryPeakInfo", print_level=print_level) + + +class ProfilerLayerInfo: + def __init__(self, op_list: List[ProfilerOpInfo]): + self.op_list = op_list + self.logical_layer_num = swap_policy_config.logical_layer_num + + self.stage_data = [] + self.fwd_op_layer_info = [] + self.bwd_op_layer_info = [] + self.layer_start_opid: Dict[SwapStage, int] = {} + self.layer_to_index_map: Dict[SwapStage, int] = {} + self.index_to_layer_map: Dict[int, SwapStage] = {} + self.memory_peaks = [] + self.generate_layer_info() + + def print_with_rank(self, message, print_level=PrintLevel.DEBUG): + print_with_rank(message, prefix="ProfilerLayerInfo", print_level=print_level) + + def generate_layer_info(self): + self.stage_data.clear() + self.fwd_op_layer_info.clear() + self.bwd_op_layer_info.clear() + self.layer_start_opid.clear() + self.layer_to_index_map.clear() + self.index_to_layer_map.clear() + self.memory_peaks.clear() + self.logical_layer_num = swap_policy_config.logical_layer_num + + self.calculate_layer_info() + self.set_layer_info() + self.create_layer_mapping() + self.get_memory_peaks() + + def calculate_layer_info(self): + op_fwd_sequence = [] + op_bwd_sequence = [] + for op in self.op_list: + if op.stage.micro_batch_index == 1: + if op.stage.stage_type == SwapStageType.FWD: + op_fwd_sequence.append(op) + elif op.stage.stage_type == SwapStageType.BWD: + op_bwd_sequence.append(op) + if self.logical_layer_num < 0: # use per op level layer info + self.fwd_op_layer_info = list(range(len(op_fwd_sequence))) + self.bwd_op_layer_info = list(range(len(op_bwd_sequence))) + else: # layer divided by logical layer num + for i in range(self.logical_layer_num - 1): + self.fwd_op_layer_info.append(len(op_fwd_sequence) // self.logical_layer_num * (i + 1)) + self.bwd_op_layer_info.append(len(op_bwd_sequence) // self.logical_layer_num * (i + 1)) + + def set_layer_info(self): + cur_stage = SwapStage() + stage_start_idx = 0 + for op in self.op_list: + stage = op.stage + # 将layerindex的信息更新到stage中, 同时更新model_info.model_stage_seq + if stage.stage_type != cur_stage.stage_type or stage.micro_batch_index != cur_stage.micro_batch_index: + cur_stage.stage_type = stage.stage_type + cur_stage.micro_batch_index = stage.micro_batch_index + stage_start_idx = op.op_id + stage_op_idx = op.op_id - stage_start_idx + stage.calculate_layer_index(stage_op_idx, self.fwd_op_layer_info, self.bwd_op_layer_info) + + def create_layer_mapping(self): + for index, op in enumerate(self.op_list): + if not self.stage_data or op.stage != self.stage_data[-1]["stage"]: + self.stage_data.append( + { + "op_id": op.op_id, + "stage": op.stage, + "stage_type": op.stage.stage_type, + } + ) + self.print_with_rank( + (f"op_id:: {str(op.op_id)}, stage: {str(op.stage)}, stage_type: {str(op.stage.stage_type)}") + ) + for index, row in enumerate(self.stage_data): + if row["stage"] in self.layer_to_index_map: + raise ValueError("Find duplicate stage ...") + self.index_to_layer_map[index] = row["stage"] + self.layer_to_index_map[row["stage"]] = index + self.layer_start_opid[row["stage"]] = row["op_id"] + + def get_memory_peaks(self): + """ + 建立MemoryPeakInfo数据结构, 每次由反向阶段进入前向阶段时进入新的MemoryPeak区间 + 仅将前反向进行MemoryPeakInfo的划分抽象, 不包含优化器和INIT阶段 + """ + self.memory_peaks = [] + cur_peak_start = -1 + cur_peak_end = -1 + for index, layer in self.index_to_layer_map.items(): + if cur_peak_start == -1: + if index == 0 and layer.stage_type == SwapStageType.FWD: + cur_peak_start = self.layer_start_opid[layer] + elif index > 0: + prev_layer = self.index_to_layer_map[index - 1] + if layer.stage_type == SwapStageType.FWD and prev_layer.stage_type != SwapStageType.FWD: + cur_peak_start = self.layer_start_opid[layer] + if cur_peak_end == -1: + if index == -len(self.layer_to_index_map) - 1 and layer.stage_type == SwapStageType.BWD: + cur_peak_end = self.layer_end_opid[layer] + elif index < len(self.layer_to_index_map) - 1: + next_layer = self.index_to_layer_map[index + 1] + if layer.stage_type == SwapStageType.BWD and next_layer.stage_type != SwapStageType.BWD: + cur_peak_end = self.layer_start_opid[next_layer] - 1 + if cur_peak_start != -1 and cur_peak_end != -1: + cur_memory_peak = MemoryPeakInfo(cur_peak_start, cur_peak_end) + self.memory_peaks.append(cur_memory_peak) + cur_peak_start = -1 + cur_peak_end = -1 + self.print_with_rank( + f"current profiler step has {len(self.memory_peaks)} memory peaks", print_level=PrintLevel.INFO + ) + + def get_prev_layer(self, layer: SwapStage): + idx = self.layer_to_index_map[layer] + if idx - 1 not in self.index_to_layer_map: + return None + else: + return self.index_to_layer_map[idx - 1] + + def get_next_layer(self, layer: SwapStage): + idx = self.layer_to_index_map[layer] + if idx + 1 not in self.index_to_layer_map: + return None + else: + return self.index_to_layer_map[idx + 1] + + +class ProfilerDataOneStep: + def __init__(self, duration_time, step, is_oom, enable_profiler=True): + self.op_list: List[ProfilerOpInfo] = [] + self.swap_list: List[ProfilerSwapInfo] = [] + self.memory_reduction_list: List[MemoryReductionInfo] = [] + self.layer_start_opid: Dict[SwapStage, int] = dict() + self.layer_info: ProfilerLayerInfo = None + self.duration_time = duration_time + self.step = step + self.max_memory = None + self.target_memory = None + self.is_oom = is_oom + + if enable_profiler: + self.acquire_data() + self.layer_info = ProfilerLayerInfo(self.op_list) + self.layer_start_opid = self.layer_info.layer_start_opid + self.memory_peaks = self.layer_info.memory_peaks + self.init_memory_reduction_list() + self.get_memory_peak_mri() + + self.__stage_list: List[SwapStage] = [] + self.__stage_map: Dict[SwapStage, int] = {} + self.__parse_stage_info() + self.__op_info_cache: Dict[str, List[ProfilerOpInfo]] = {} # {op_name, List[ProfilerOpInfo]} + + def __parse_stage_info(self): + for op in self.op_list: + if not self.__stage_list or op.stage != self.__stage_list[-1]: + self.__stage_list.append(op.stage) + self.__stage_map[op.stage] = self.__stage_list.index(op.stage) + + def __get_op_info_from_list(self, from_op: ProfilerOpInfo, op_name: str, direction: str) -> ProfilerOpInfo: + # Determine the bisect function based on the direction + if direction == "next": + bisect_fn = bisect_right + op_name_check = op_name + idx_adjustment = 0 + elif direction == "prev": + bisect_fn = bisect_left + op_name_check = op_name + idx_adjustment = -1 + else: + raise ValueError("direction must be 'next' or 'prev'") + + if op_name_check == "": # when search op is not specified + begin_idx = bisect_fn(self.op_list, from_op) + idx_adjustment + if begin_idx < 0 or begin_idx >= len(self.op_list): + return None + return self.op_list[begin_idx] + + # Cache logic: Fetch or cache the op_info_list + if op_name_check not in self.__op_info_cache: + op_info_list = [op for op in self.op_list if op.op_name == op_name_check] + self.__op_info_cache[op_name_check] = op_info_list + else: + op_info_list = self.__op_info_cache[op_name_check] + + # Determine the index to start searching from + begin_idx = bisect_fn(self.op_list, from_op) + idx_adjustment + if begin_idx < 0 or begin_idx >= len(self.op_list): + return None + + # Search within the cached op_info_list + target_idx = bisect_fn(op_info_list, from_op) + idx_adjustment + if target_idx < 0 or target_idx >= len(op_info_list): + return None + return op_info_list[target_idx] + + def group_op_info_by(self, op_info_list: List[ProfilerOpInfo], method="") -> List[List[ProfilerOpInfo]]: + if not all(isinstance(item, ProfilerOpInfo) for item in op_info_list): + raise TypeError("op_info_list can only contain elements with ProfilerOpInfo type.") + if method == "microbatch": + result_op_info_list = [] + mb_group = [] + curr_mb = None + for op in op_info_list: + if curr_mb is None: + curr_mb = op.stage.micro_batch_index + mb_group.append(op) + else: + if op.stage.micro_batch_index != curr_mb: + curr_mb = op.stage.micro_batch_index + result_op_info_list.append(mb_group.copy()) + mb_group.clear() + mb_group.append(op) + return result_op_info_list + elif method == "": + return op_info_list + else: + raise NotImplementedError('group_by method other than "microbatch" is not implemented yet.') + + def get_all_op_info(self, op_names: List[str] = None) -> List[ProfilerOpInfo]: + if op_names is None or len(op_names) == 0: + return self.op_list + op_info_list = [] + for op_name in op_names: + if op_name in self.__op_info_cache: + op_info_list.extend(self.__op_info_cache[op_name]) + else: + op = self.get_first_op_info(op_name) + while op is not None: + op_info_list.append(op) + op = self.get_next_op_info(op, op_name) + return op_info_list + + def get_next_op_info(self, from_op: ProfilerOpInfo, next_op_name: str = "") -> ProfilerOpInfo: + if from_op is None: + return None + return self.__get_op_info_from_list(from_op, next_op_name, "next") + + def get_prev_op_info(self, from_op: ProfilerOpInfo, prev_op_name: str = "") -> ProfilerOpInfo: + if from_op is None: + return None + return self.__get_op_info_from_list(from_op, prev_op_name, "prev") + + def get_first_op_info(self, op_name: str = "") -> ProfilerOpInfo: + if len(self.op_list) == 0: + return None + first_op = self.op_list[0] + if op_name == "": + return first_op + return self.get_next_op_info(first_op, op_name) + + def get_last_op_info(self, op_name: str = "") -> ProfilerOpInfo: + if len(self.op_list) == 0: + return None + last_op = self.op_list[-1] + if op_name == "": + return last_op + return self.get_prev_op_info(last_op, op_name) + + def __get_adjacent_stage(self, stage: SwapStage, op_name: str, direction: str) -> SwapStage: + # Determine whether we are looking for the next or previous stage + if direction == "next": + stage_index_adjustment = 1 + get_op_fn = self.get_first_op_info + get_adj_op_fn = self.get_next_op_info + elif direction == "prev": + stage_index_adjustment = -1 + get_op_fn = self.get_last_op_info + get_adj_op_fn = self.get_prev_op_info + else: + raise ValueError("direction must be 'next' or 'prev'") + + # Get the stage index from the stage map + if stage is None: + return None + stage_index = self.__stage_map.get(stage, None) + if stage_index is None: + return None + + # If op_name is empty, handle the simple case of getting the next or previous stage + if op_name == "": + adjacent_stage_index = stage_index + stage_index_adjustment + if adjacent_stage_index < 0 or adjacent_stage_index >= len(self.__stage_list): + return None + return self.__stage_list[adjacent_stage_index] + + # If op_name is specified, traverse the operations to find the adjacent stage + result_stage = None + curr_op = get_op_fn(op_name) + while curr_op is not None: + curr_stage_idx = self.__stage_map.get(curr_op.stage, None) + if curr_stage_idx is None: + break # Avoid infinite loop if stage is not found + + is_valid_in_next_direction = direction == "next" and curr_stage_idx > stage_index + is_valid_in_prev_direction = direction == "prev" and curr_stage_idx < stage_index + + if is_valid_in_next_direction or is_valid_in_prev_direction: + result_stage = curr_op.stage + break + curr_op = get_adj_op_fn(curr_op, op_name) + return result_stage + + def get_next_stage(self, stage: SwapStage, op_name: str = "") -> SwapStage: + return self.__get_adjacent_stage(stage, op_name, "next") + + def get_prev_stage(self, stage: SwapStage, op_name: str = "") -> SwapStage: + return self.__get_adjacent_stage(stage, op_name, "prev") + + def print_with_rank(self, message, print_level=PrintLevel.DEBUG): + print_with_rank(message, prefix="ProfilerDataOneStep", print_level=print_level) + + def __str__(self): + ret = "=" * 20 + "ProfilerDataOneStep SHOW BEGIN" + "=" * 20 + "\n" + + ret += f"The length of op_list is {len(self.op_list)}\n" + for index, op_info in enumerate(self.op_list): + ret += f"op_info-{index}, {str(op_info.print_dict())}\n" + + ret += f"The length of swap_list is {len(self.swap_list)}\n" + for index, swap_info in enumerate(self.swap_list): + ret += f"swap_info-{index}, {str(swap_info.print_dict())}\n" + + for index, memory_reduction in enumerate(self.memory_reduction_list): + ret += f"memory_reduction-{index}, {str(memory_reduction.print_dict())}\n" + + ret += "=" * 20 + "ProfilerDataOneStep SHOW END" + "=" * 20 + "\n" + return ret + + @property + def length(self): + return len(self.op_list) + + def acquire_data(self): + op_list = get_smart_swap_cpp().getProfilerOpInfoData() + swap_list = get_smart_swap_cpp().getProfilerSwapInfoData() + self.op_list = [ProfilerOpInfo(i) for i in op_list] + self.swap_list = [ProfilerSwapInfo(i) for i in swap_list] + get_smart_swap_cpp().updateProfiler() + + def filter_swap_list(self): + """ + 修正内存建模曲线:将swap_list中有swap_out但是没有对应swap_in的tensor单独记录, + 以MemoryPeakInfo为单位记录当前MemoryPeakInfo中上述多余swap out的tensor的总size + """ + swap_in_list = [item for item in self.swap_list if item.swap_name == "swapIn"] + swap_in_total_size = sum([item.size for item in swap_in_list]) + self.print_with_rank( + f"original swap in: {len(swap_in_list)} swap_in items with total size {swap_in_total_size}", + print_level=PrintLevel.INFO, + ) + swap_out_list = [item for item in self.swap_list if item.swap_name == "swapOut"] + swap_out_total_size = sum([item.size for item in swap_out_list]) + self.print_with_rank( + f"original swap out: {len(swap_out_list)} swap_out items with total size {swap_out_total_size}", + print_level=PrintLevel.INFO, + ) + if swap_in_total_size == swap_out_total_size: + return None + swap_in_dict = dict([item.src_ptr, item] for item in swap_in_list) + extra_swap_out_dict = dict([(i, []) for i in range(len(self.memory_peaks))]) + swap_out_list.sort(key=lambda item: item.op_id) + cur_mp_idx = 0 + for item in swap_out_list: + if item.dst_ptr not in swap_in_dict: + while cur_mp_idx < len(self.memory_peaks) and item.op_id > self.memory_peaks[cur_mp_idx].end_opid: + cur_mp_idx += 1 + if cur_mp_idx < len(self.memory_peaks): + extra_swap_out_dict[cur_mp_idx].append(item) + else: + self.print_with_rank( + f"current swap out at op_id {item.op_id} happens at OPTIM stage", print_level=PrintLevel.INFO + ) + return extra_swap_out_dict + + def get_max_memory(self, extra_swap_out_dict=None): + if extra_swap_out_dict: + for i, item in extra_swap_out_dict.items(): + self.print_with_rank(f"extra_swap_out_dict has {len(item)} at {i}-th mp", print_level=PrintLevel.INFO) + swap_list_dict = {} + for swap_info in self.swap_list: + swap_list_dict.setdefault(swap_info.op_id, []).append(swap_info) + + theoretical_minus_actual = 0 + cur_mp_idx = 0 + for op in self.op_list: + swap_info_list = swap_list_dict.get(op.op_id, []) + # 可能一个opid对应了多个swap + swap_out_size = sum(info.size for info in swap_info_list if info.swap_name == "swapOut") + swap_in_size = sum(info.size for info in swap_info_list if info.swap_name == "swapIn") + + # 以MemmoryPeakInfo为单位进行内存曲线校正:进入每个新的MemoryPeakInfo时都去除swap out但没swap in的tensor的总size + if extra_swap_out_dict: + if cur_mp_idx < len(self.memory_peaks) and op.op_id > self.memory_peaks[cur_mp_idx].end_opid: + extra_swap_out_size = sum([item.size for item in extra_swap_out_dict[cur_mp_idx]]) + while cur_mp_idx < len(self.memory_peaks) and op.op_id > self.memory_peaks[cur_mp_idx].end_opid: + cur_mp_idx += 1 + theoretical_minus_actual -= extra_swap_out_size + + theoretical_minus_actual = theoretical_minus_actual + swap_out_size - swap_in_size + op.theoretical_active_bytes = op.active_bytes + theoretical_minus_actual + return max( + ( + op.theoretical_active_bytes + for op in self.op_list + if op.stage.stage_type not in [SwapStageType.INIT, SwapStageType.OPTIM] + ), + default=0, + ) + + def get_target_memory(self): + self.print_with_rank(f"is current step oom? {self.is_oom}", print_level=PrintLevel.INFO) + max_memory = max((op.active_bytes for op in self.op_list), default=0) + if self.is_oom: + return max_memory - swap_policy_config.redundant_memory + elif self.swap_list: + return max_memory + + if swap_policy_config.target_mode: + target_memory = swap_policy_config.target_memory + else: + target_memory = self.max_memory - swap_policy_config.reduction_memory + return target_memory + + def init_memory_reduction_list(self): + self.memory_reduction_list = [] + extra_swap_out_dict = self.filter_swap_list() + self.max_memory = self.get_max_memory(extra_swap_out_dict=extra_swap_out_dict) + self.target_memory = self.get_target_memory() + self.print_with_rank( + f"max_memory={self.max_memory}, target_memory={self.target_memory}", print_level=PrintLevel.INFO + ) + for op in self.op_list: + if op.theoretical_active_bytes > self.target_memory: + if op.stage.stage_type == SwapStageType.INIT: + self.print_with_rank("Skip init ... ") + continue + if op.stage.stage_type == SwapStageType.OPTIM: + self.print_with_rank("Memory Bound at Optim Stage ...") + break + memory_reduction_info = MemoryReductionInfo(op, op.theoretical_active_bytes - self.target_memory) + self.memory_reduction_list.append(memory_reduction_info) + # new data structure:build a map from index to opid of memory_reduction_info + self.mri_opid2idx = dict( + [(self.memory_reduction_list[i].op_id, i) for i in range(len(self.memory_reduction_list))] + ) + + def reset_memory_reduction_list(self): + for memory_info in self.memory_reduction_list: + memory_info.memory_reduction_need = memory_info.memory_reduction_total + + def get_memory_peak_mri(self): + """ + 建立每个MemoryPeakInfo对应的MemoryReductionInfo的开始和结束信息(mp_mri_start_opid, mp_mri_end_opid) + """ + self.print_with_rank( + f"current memory_reduction_list has len {len(self.memory_reduction_list)}", print_level=PrintLevel.INFO + ) + if len(self.memory_reduction_list) == 0: + return + cur_mri = 0 + for idx, mp in enumerate(self.memory_peaks): + mp.mp_mri_start_opid = -1 + mp.mp_mri_end_opid = -1 + while ( + cur_mri < len(self.memory_reduction_list) + and self.memory_reduction_list[cur_mri].op_id >= mp.start_opid + and self.memory_reduction_list[cur_mri].op_id <= mp.end_opid + ): + if mp.mp_mri_start_opid == -1: + mp.mp_mri_start_opid = self.memory_reduction_list[cur_mri].op_id + cur_mri += 1 + if mp.mp_mri_start_opid > -1: + mp.mp_mri_end_opid = self.memory_reduction_list[cur_mri - 1].op_id + self.print_with_rank( + f"current mp {idx} starts at opid {mp.mp_mri_start_opid} and ends at opid {mp.mp_mri_end_opid}", + print_level=PrintLevel.INFO, + ) + + def get_sorted_op_names(self, sort_by="frequency") -> List[str]: + op_name_sequence = [item.op_name for item in self.op_list] + op_names_frequency_map = Counter(op_name_sequence) + if sort_by == "frequency": + op_names_frequency_list = sorted( + op_names_frequency_map.keys(), key=lambda name: op_names_frequency_map[name], reverse=True + ) + elif sort_by == "alphabetical": + op_names_frequency_list = sorted(op_names_frequency_map.keys()) + else: + raise NotImplementedError('sort methods other than "frequency" and "alphabetical" are not supported.') + return op_names_frequency_list + + def map_unique_ptr_as_latest(self): + map_old2new = {} + for swap_row in self.swap_list: + for key, value in map_old2new.items(): + if value == swap_row.src_ptr: + map_old2new[key] = swap_row.dst_ptr + map_old2new[swap_row.src_ptr] = swap_row.dst_ptr + for op in self.op_list: + for tensor in op.tensor_list: + if tensor.ptr in map_old2new: + tensor.ptr = map_old2new[tensor.ptr] + + def update_tensor_types(self, map_ptr2type: Dict[UniqueSwapPtr, SwapTensorType]): + for op in self.op_list: + for tensor in op.tensor_list: + if tensor.ptr in map_ptr2type: + tensor.tensor_type = map_ptr2type[tensor.ptr] + + +class TensorInfoDetail: + def __init__(self, profiler_tensor_info): + self.info: ProfilerTensorInfo = profiler_tensor_info + self.used_op_list: List[ProfilerOpInfo] = [] + self.policy_candidate_list: List[SwapPolicyCandidate] = [] # 一个Tensor可能被多次Swap + + def update_op(self, op: ProfilerOpInfo): + if len(self.used_op_list) != 0 and self.used_op_list[-1].op_id == op.op_id: + return + self.used_op_list.append(op) + + def is_used_multiple_times(self): # 如果Tensor只被使用了一次,不需要Swap + return len(self.used_op_list) >= 2 + + +class SwapPolicyCandidate: + def __init__( + self, + tensor: TensorInfoDetail, + is_optimizer_or_weight: bool = False, + swap_out_op: ProfilerOpInfo = None, + swap_in_op: ProfilerOpInfo = None, + swap_out_stage: SwapStage = None, + swap_in_stage: SwapStage = None, + free_stage: SwapStage = None, + swap_in_free_stage: SwapStage = None, + ): + self.tensor: TensorInfoDetail = tensor + self.covered_reductions: List[MemoryReductionInfo] = [] # 可删除 + self.num_covered_reductions = 0 + self.start_mri_opid = -1 # 能覆盖的第一个mri的opid + self.end_mri_opid = -1 # 能覆盖的最后一个mri的opid + self.is_optimizer_or_weight = is_optimizer_or_weight + if not is_optimizer_or_weight: + self.swap_out_op = swap_out_op + self.swap_in_op = swap_in_op + self.swap_out_stage = swap_out_op.stage + self.swap_in_stage = swap_in_op.stage + self.swap_out_stage_actual = self.swap_out_stage + self.swap_in_stage_actual = self.swap_in_stage + else: + self.swap_out_stage = swap_out_stage + self.swap_in_stage = swap_in_stage + self.swap_out_stage_actual = self.swap_out_stage + self.swap_in_stage_actual = self.swap_in_stage + self.free_stage = free_stage + self.swap_in_free_stage = swap_in_free_stage + + def set_device_to_host_stage(self, stage: SwapStage): + self.swap_out_stage = stage + self.swap_out_stage_actual = stage + + def get_device_to_host_stage(self): + return self.swap_out_stage_actual + + def set_device_to_host_free_stage(self, stage: SwapStage): + self.free_stage = stage + + def set_host_to_device_stage(self, stage: SwapStage): + self.swap_in_stage = stage + self.swap_in_stage_actual = stage + + def get_host_to_device_stage(self): + return self.swap_in_stage_actual + + def set_host_to_device_free_stage(self, stage: SwapStage): + self.swap_in_free_stage = stage + + def to_cpp(self): + instance = get_smart_swap_cpp().SwapPolicyInfo() + instance.executorNeedMatch = not self.is_optimizer_or_weight + if not self.is_optimizer_or_weight: + self.tensor.info.origi_ptr.to_cpp(instance.ptr) + instance.swapOutOpId = self.swap_out_op.op_id + instance.swapInOpId = self.swap_in_op.op_id + else: + self.tensor.info.ptr.to_cpp(instance.ptr) + self.swap_out_stage.to_cpp(instance.swapOutStage) + self.swap_in_stage_actual.to_cpp(instance.swapInStage) + self.free_stage.to_cpp(instance.freeStage) + self.swap_in_free_stage.to_cpp(instance.swapInFreeStage) + return instance + + def __str__(self): + return str( + dict( + tensor=str(self.tensor.info), + is_optimizer_or_weight=str(self.is_optimizer_or_weight), + swap_out_op=self.swap_out_op.print_dict_brief() if hasattr(self, "swap_out_op") else "None", + swap_in_op=self.swap_in_op.print_dict_brief() if hasattr(self, "swap_in_op") else "None", + swap_out_stage=str(self.swap_out_stage), + swap_in_stage=str(self.swap_in_stage), + swap_out_stage_actual=str( + self.swap_out_stage_actual if hasattr(self, "swap_out_stage_actual") else "None" + ), + swap_in_stage_actual=str( + self.swap_in_stage_actual if hasattr(self, "swap_in_stage_actual") else "None" + ), + free_stage=str(self.free_stage), + swap_in_free_stage=str(self.swap_in_free_stage), + ) + ) + + +class SwapPolicy: + def __init__(self, swap_policy_candidates: List[SwapPolicyCandidate], profiler_data: ProfilerDataOneStep): + self.__swap_policy_candidates: List[SwapPolicyCandidate] = swap_policy_candidates + self.__profiler_data: ProfilerDataOneStep = profiler_data + self.__stage_list: List[SwapStage] = [] + self.__stage_map: Dict[SwapStage, int] = {} + self.__parse_stage_info() + + def __parse_stage_info(self): + for op in self.__profiler_data.op_list: + if not self.__stage_list or op.stage != self.__stage_list[-1]: + self.__stage_list.append(op.stage) + self.__stage_map[op.stage] = self.__stage_list.index(op.stage) + + def __auto_lint(self, policy: List[SwapPolicyCandidate]): + # remove candidates with identical swap out and swap in stages. + cand_remove_list = [] + for cand in policy: + swap_out_stage = cand.swap_out_stage_actual + swap_in_stage = cand.swap_in_stage_actual + if swap_out_stage == swap_in_stage: + cand_remove_list.append(cand) + continue + for cand in policy.copy(): + if cand in cand_remove_list: + policy.remove(cand) + + # offset free stage by one if overlap. + for cand in policy: + swap_in_stage_actual = cand.swap_in_stage_actual + swap_in_free_stage = cand.swap_in_free_stage + if swap_in_stage_actual == swap_in_free_stage: + cand.swap_in_free_stage = self.__profiler_data.get_next_stage(swap_in_free_stage) + swap_out_stage_actual = cand.swap_out_stage_actual + swap_out_free_stage = cand.free_stage + if swap_out_stage_actual == swap_out_free_stage: + cand.free_stage = self.__profiler_data.get_next_stage(swap_out_free_stage) + + def get_candidates(self) -> List[SwapPolicyCandidate]: + return self.__swap_policy_candidates + + def set_candidates(self, candidates: List[SwapPolicyCandidate]): + self.__auto_lint(candidates) + self.__swap_policy_candidates = candidates + + def get_profiler_data(self) -> ProfilerDataOneStep: + return self.__profiler_data + + +class PolicyResult: + MAX_OP_NAMES_LENGTH = 64 + + def __init__(self): + self.policy_list: List[SwapPolicyCandidate] = None # 用于SwapOut和SwapIn的Tensor信息列表 + self.policy_step = None # 用第几个Step的Profiling结果进行匹配 + self.tensor_size_thresh = None # 最小可能被Swap的Tensor的size大小 + self.fwd_op_layer_info = None # 当前policy_step的Profiling对应的前向层信息 + self.bwd_op_layer_info = None # 当前policy_step的Profiling对应的反向层信息 + self.op_names_frequency_list = None # 当前policy_step的Profiling的OpName的频次列表,由高到低,最多有64个元素 + + def clear(self): + self.policy_list = None + self.policy_step = None + self.tensor_size_thresh = None + self.fwd_op_layer_info = None + self.bwd_op_layer_info = None + self.op_names_frequency_list = None + + def __str__(self): + info = dict( + policy_step=self.policy_step, + tensor_size_thresh=self.tensor_size_thresh, + fwd_op_layer_info=self.fwd_op_layer_info, + bwd_op_layer_info=self.bwd_op_layer_info, + ) + ret = f"Basic policy is {info}\n" + ret += f"A total number of {len(self.policy_list)} swaps are selected.\n" + for index, item in enumerate(self.policy_list): + ret += f"policy-{index}: \t\t{item}\n" + return ret + + def set_py_swap_policy_to_cpp(self, config: SwapConfig): + # 设置候选swap的tensor到c++侧 + swap_policy_info_list = [] + if self.policy_list is not None: + for candidate in self.policy_list: + try: + swap_policy_info_list.append(candidate.to_cpp()) + except Exception as e: + raise RuntimeError(f"candidate.to_cpp() error ! \n{candidate}") from e + + if self.fwd_op_layer_info is not None: + config.fwd_op_layer_info = self.fwd_op_layer_info + if self.bwd_op_layer_info is not None: + config.bwd_op_layer_info = self.bwd_op_layer_info + + if self.policy_step: + # 设置config相关 + config.tensorSizeThresh = self.tensor_size_thresh + config.policy_step = self.policy_step + # 设置op_names出现的频率 + get_smart_swap_cpp().setFrequentOpNameData(self.op_names_frequency_list[: self.MAX_OP_NAMES_LENGTH]) + + else: + config.tensorSizeThresh = swap_policy_config.tensor_size_thresh + config.policy_step = 0 + get_smart_swap_cpp().setFrequentOpNameData([]) + + get_smart_swap_cpp().setPolicyInfoData(swap_policy_info_list) diff --git a/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_engine.py b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_engine.py new file mode 100644 index 000000000..d11551b47 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_engine.py @@ -0,0 +1,294 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import os +import stat +import time +import pickle +from typing import Dict + +import pandas + +from .policy_generator import PolicyGenerator +from .swap_policy_config import swap_policy_config +from .swap_utils import print_with_rank, PrintLevel, timer +from .swap_cpp_adaptor import ( + SwapConfig, + ProfilerDataOneStep, + PolicyResult, + SwapTensorType, + SwapPolicyCandidate, + UniqueSwapPtr, + TensorInfoDetail, + record_tensor_ptr_with_types, + SwapPolicy, +) + + +class SwapEngine: + def __init__(self, models, optimizer, get_optimizer_tensors_fcn, config: SwapConfig, custom_policy_fcn): + # 相关模块 + self.models = models + self.optimizer = optimizer + self.get_optimizer_tensors_fcn = get_optimizer_tensors_fcn + self.custom_policy_fcn = custom_policy_fcn + + # 控制参数 + self.config = config + self.rank = swap_policy_config.rank + self.output_root_path = swap_policy_config.output_root_path + if swap_policy_config.save_policy or swap_policy_config.save_profiler_data: + if not os.path.exists(self.output_root_path) and self.rank == 0: + os.makedirs(self.output_root_path) + self.duration_time = None + self.step_parameters = {} + self.all_step_duration = {} + + # profiling 数据 + self.profiler_op_step: ProfilerDataOneStep = None + self.profiler_all_step: Dict[int, ProfilerDataOneStep] = dict() # 目前为止所有step的profiler数据 + + # 处理后的数据,用于生成策略 + self.tensor_info_dict: Dict[UniqueSwapPtr, TensorInfoDetail] = dict() + + # 当前生成的最新policy + self.newest_policy_result: PolicyResult = PolicyResult() + self.map_unique_ptr2tensor_type = dict() + + # 用户policy策略函数 + if self.custom_policy_fcn is None: + print_with_rank("User policy is missing, skip user policy.", print_level=PrintLevel.INFO) + self.use_custom_policy = False + else: + print_with_rank("Found user policy.", print_level=PrintLevel.INFO) + self.use_custom_policy = True + + @property + def step(self): + return self.config.step + + def print_with_rank(self, message, print_level=PrintLevel.DEBUG): + print_with_rank(message, prefix="SwapEngine", print_level=print_level) + + def clear_policy(self): + self.newest_policy_result.clear() + + def append_profiler_data(self, profiler_op_step: ProfilerDataOneStep): + self.profiler_all_step[profiler_op_step.step] = profiler_op_step + self.forced_swap_list = [i for i in profiler_op_step.swap_list if i.is_oom] + if swap_policy_config.save_profiler_data: + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + mode = stat.S_IWUSR | stat.S_IRUSR + profiler_all_step_file = os.path.join(self.output_root_path, f"profiler_all_step_{self.rank}.pkl") + with os.fdopen(os.open(profiler_all_step_file, flags, mode=mode), "wb") as file: + pickle.dump(self.profiler_all_step, file) + + def save_policy_list(self, swap_list): + swap_list_pd = pandas.DataFrame([i.tensor.info.get_dict() for i in swap_list]) + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + mode = stat.S_IWUSR | stat.S_IRUSR + policy_file = os.path.join(self.output_root_path, f"Policy_{self.rank}.csv") + with os.fdopen(os.open(policy_file, flags, mode=mode), "wb") as file: + swap_list_pd.to_csv(file) + + def record_tensor_types(self): + self.map_unique_ptr2tensor_type.clear() + # 针对优化器状态的特殊类tensor,将其记录在C++侧的map映射中,方便其执行匹配 + if self.optimizer and self.get_optimizer_tensors_fcn: + tensors = self.get_optimizer_tensors_fcn(self.optimizer) + unique_ptrs = record_tensor_ptr_with_types(tensors, SwapTensorType.OPTIM, 1, False) + for unique_ptr in unique_ptrs: + self.map_unique_ptr2tensor_type[UniqueSwapPtr(unique_ptr)] = SwapTensorType.OPTIM + + def is_similar_with_policy_profiler(self, profiler_op_step: ProfilerDataOneStep): + if self.newest_policy_result.policy_step is None: + ret = True + self.print_with_rank("The policy step is None, maybe initial stage ...") + else: + ret = self.is_equal_op_sequence( + swap_policy_config, profiler_op_step, self.profiler_all_step[self.newest_policy_result.policy_step] + ) + self.print_with_rank( + ( + f"now: {len(profiler_op_step.op_list)}, " + f"last: {self.profiler_all_step[self.newest_policy_result.policy_step].length}, " + f"ret: {ret}" + ) + ) + if ret: + self.print_with_rank("The sequence is similar with the policy one ...") + return ret + + @timer + def process_profiler_data(self): + self.print_with_rank("Processing data ... ", print_level=PrintLevel.INFO) + # 获取特殊类tensor的unique_ptr信息 + self.record_tensor_types() + # 将profiler_op_step中的UniquePtr全部映射为最新的ptr + self.profiler_op_step.map_unique_ptr_as_latest() + # 刷新tensor type + self.profiler_op_step.update_tensor_types(self.map_unique_ptr2tensor_type) + self.print_with_rank(str(self.profiler_op_step)) + + self.newest_policy_result.policy_step = self.step + self.newest_policy_result.op_names_frequency_list = self.profiler_op_step.get_sorted_op_names() + + def run(self, profiler_op_step: ProfilerDataOneStep, is_new_op_sequence) -> PolicyResult: + self.current_profiler_step = profiler_op_step + self.profiler_op_step = ( + profiler_op_step if is_new_op_sequence else self.profiler_all_step[self.newest_policy_result.policy_step] + ) + + # 汇总参数 上一步的参数,运行时间,policy结果 + # 自适应迭代 fun,分优先级 + # 更新参数 + if is_new_op_sequence: + self.process_profiler_data() + + policy_candidates, tensor_size_thresh = self.make_policy() + self.newest_policy_result.tensor_size_thresh = tensor_size_thresh + self.newest_policy_result.policy_list = policy_candidates + self.newest_policy_result.fwd_op_layer_info = self.profiler_op_step.layer_info.fwd_op_layer_info + self.newest_policy_result.bwd_op_layer_info = self.profiler_op_step.layer_info.bwd_op_layer_info + + return self.newest_policy_result + + @staticmethod + def is_equal_op_sequence( + policy_config, cur_sequence: ProfilerDataOneStep, target_sequence: ProfilerDataOneStep = None + ) -> bool: + """ + Compare how different cur_sequence is from target_sequence, and return a ratio. + 暂时先只比较长度 + """ + if target_sequence is None: + return False + target_len = cur_sequence.length + cur_len = target_sequence.length + return abs(target_len - cur_len) / cur_len < policy_config.op_diff_thresh + + def record_parameters(self): + self.step_parameters[self.step] = { + "duration_time": swap_policy_config.duration_time, + "size_coverage_weight": swap_policy_config.size_coverage_weight, + "redundant_memory": swap_policy_config.redundant_memory, + } + + def set_parameters(self): + swap_step = list(self.step_parameters.keys()) + min_duration = min(self.all_step_duration[i] for i in swap_step) + best_step = [key for key, value in self.all_step_duration.items() if value == min_duration][0] + swap_policy_config.duration_time = self.step_parameters[best_step]["duration_time"] + swap_policy_config.size_coverage_weight = self.step_parameters[best_step]["size_coverage_weight"] + swap_policy_config.redundant_memory = self.step_parameters[best_step]["redundant_memory"] + + def adjust_parameters(self): + setattr( + swap_policy_config, + "duration_time", + min( + getattr(swap_policy_config, "duration_time", float("inf")), + self.current_profiler_step.duration_time * swap_policy_config.adjust_step_duration, + ), + ) + + if self.forced_swap_list: + swap_policy_config.redundant_memory += swap_policy_config.adjust_memory + self.profiler_op_step.init_memory_reduction_list() + self.record_parameters() + return + + swap_policy_config.size_coverage_weight += swap_policy_config.adjust_size_coverage_weight + self.record_parameters() + + def check_policy_valid(self, candidate: SwapPolicyCandidate): + # swap out free stage: (swap out op, swap in stage actual) + # swap in stage actual: (swap out free stage, swap in op) + # swap in free stage: (swap in op, ) + if not candidate.is_optimizer_or_weight: + free_stage_opid = self.profiler_op_step.layer_start_opid[candidate.free_stage] + swap_in_stage_actual_opid = self.profiler_op_step.layer_start_opid[candidate.swap_in_stage_actual] + swap_in_free_stage_opid = self.profiler_op_step.layer_start_opid[candidate.swap_in_free_stage] + swap_out_opid = ( + candidate.swap_out_op.op_id + if not candidate.is_optimizer_or_weight + else self.profiler_op_step.layer_start_opid[candidate.swap_out_stage] + ) + swap_in_opid = ( + candidate.swap_in_op.op_id + if not candidate.is_optimizer_or_weight + else self.profiler_op_step.layer_start_opid[candidate.swap_in_stage] + ) + if not (free_stage_opid > swap_out_opid and free_stage_opid < swap_in_stage_actual_opid): + print( + f"Error! swap_out_free_stage_opid [{free_stage_opid}] should be > swap_out_opid [{swap_out_opid}] and < swap_in_stage_actual_opid [{swap_in_stage_actual_opid}]" + ) + return False + if not (swap_in_stage_actual_opid < swap_in_opid): + print( + f"Error! swap_in_stage_actual_opid [{swap_in_stage_actual_opid}] should be < swap_in_opid [{swap_in_opid}]" + ) + return False + if not (swap_in_free_stage_opid > swap_in_stage_actual_opid): + print( + f"Error! swap_in_free_stage_opid [{swap_in_free_stage_opid}] should be > swap_in_stage_actual_opid [{swap_in_stage_actual_opid}]" + ) + return False + return True + + @timer + def make_policy(self): + self.print_with_rank("Making policy ...", print_level=PrintLevel.INFO) + self.adjust_parameters() + self.profiler_op_step.reset_memory_reduction_list() + policy_generator = PolicyGenerator(self.profiler_op_step) + policy_generator.select_candidate() + + start_time = time.time() + if self.use_custom_policy: + policy_generator.simulation(use_custom_policy=True) + else: + policy_generator.compute_score() + while not policy_generator.reduction_target_satisfied(): + # 寻找能降内存的policy + policy_generator.get_intersect_candidates() + # 选不出来就退出 + if not policy_generator.intersect_candidates: + self.print_with_rank(f"Fail to reach reduction target ...", print_level=PrintLevel.INFO) + break + policy_generator.simulation() + end_time = time.time() + self.print_with_rank(f"policy generate takes {end_time - start_time} seconds.", print_level=PrintLevel.INFO) + + policy_generator.swap_arranger.save_stage_time_left() + policy_generator.swap_arranger.set_free_stage() + + if self.use_custom_policy: + # create SwapPolicy by providing existing swap list and profiler info + curr_swap_policy = SwapPolicy(policy_generator.swap_list, self.profiler_op_step) + self.custom_policy_fcn(curr_swap_policy) + policy_generator.swap_list = curr_swap_policy.get_candidates() + swap_list = policy_generator.get_sorted_swap_list() + tensor_size_thresh = ( + min([candidate.tensor.info.size for candidate in swap_list]) + if swap_list + else swap_policy_config.tensor_size_thresh + ) + + self.print_with_rank( + ( + f"\n\tCurrent Step: {self.current_profiler_step.step}, " + f"Policy Step: {self.profiler_op_step.step}, " + f"Max Memory: {self.profiler_op_step.max_memory}, " + f"Target Memory: {self.profiler_op_step.target_memory}, " + f"Duration Time: {swap_policy_config.duration_time}, " + f"Size Cov Weight: {swap_policy_config.size_coverage_weight}, " + f"\n\tCandidate Num: {len(policy_generator.policy_candidate_list)}, " + f"Policy Num: {len(swap_list)}, " + f"Optim Num: {len([i for i in swap_list if i.tensor.info.tensor_type == SwapTensorType.OPTIM])}, " + f"Model Num: {len([i for i in swap_list if i.tensor.info.tensor_type != SwapTensorType.OPTIM])}, " + f"Min Tensor Size: {tensor_size_thresh}" + ), + print_level=PrintLevel.INFO, + ) + if swap_policy_config.save_policy: + self.save_policy_list(swap_list) + return swap_list, tensor_size_thresh diff --git a/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_manager.py b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_manager.py new file mode 100644 index 000000000..e53c32bd0 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_manager.py @@ -0,0 +1,235 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import time +from enum import Enum +from collections.abc import Iterable + +import torch + +from .hooks import register_swap_hooks_to_modules +from .swap_policy_config import swap_policy_config +from .swap_utils import print_with_rank, PrintLevel +from .swap_cpp_adaptor import ( + SwapConfig, + ProfilerDataOneStep, + SwapStageType, + SwapTensorType, + record_tensor_ptr_with_types, + get_smart_swap_cpp, +) +from .swap_engine import SwapEngine + + +def singleton(cls): + instances = {} + + def get_instance(*args, **kwargs): + if cls not in instances: + instances[cls] = cls(*args, **kwargs) + return instances[cls] + + return get_instance + + +class SwapRunningStage(Enum): + WARMUP_STAGE = 0 # Warmup阶段:opSequence不稳定 + SEARCHING_POLICY_STAGE = 1 # 迭代策略阶段:opSequence稳定,可能有OOM,策略不稳定 + STABLE_STAGE = 2 # 策略稳定阶段:opSequence稳定,策略稳定 + RESERVED = 3 + + +@singleton +class SwapManager: + def __init__( + self, + num_micro_batch_fcn, + models, + num_layers, + optimizer=None, + get_optimizer_tensors_fcn=None, + get_shared_tensors_fcn=None, + custom_policy_fcn=None, + ): + if torch.distributed.is_initialized(): + swap_policy_config.rank = torch.distributed.get_rank() + + option = {"OP_HOOK_ENABLE": "enable"} + torch.npu.set_option(option) + + self.smart_swap_cpp = get_smart_swap_cpp() + self.smart_swap_cpp.init_cpp_manager() + self.smart_swap_cpp.NPUSwapManager.GetInstance().swap_enable = True + self.smart_swap_cpp.NPUSwapManager.GetInstance().swap_oom_enable = True + self.config = SwapConfig() + self.num_micro_batch_fcn = num_micro_batch_fcn + self.models = models + self.get_shared_tensors_fcn = get_shared_tensors_fcn + self.swap_hook_registers: list = [] + self.swap_engine = SwapEngine(models, optimizer, get_optimizer_tensors_fcn, self.config, custom_policy_fcn) + self.start_time = time.time() + self.cur_warmup_step = 0 + self.running_stage = SwapRunningStage.RESERVED + self.is_new_op_sequence = True + self.model_num_layers = num_layers + self.global_initialize() + + def __del__(self): + option = {"OP_HOOK_ENABLE": "disable"} + torch.npu.set_option(option) + self.smart_swap_cpp.deinit_cpp_manager() + + def __check_layer_param(self, model_num_layers): + if not isinstance(model_num_layers, int): + raise ValueError("model_num_layers must be an integer.") + if model_num_layers != -1 and model_num_layers <= 0: + raise ValueError("model_num_layers must be a positive integer or -1.") + + def print_with_rank(self, message, print_level=PrintLevel.DEBUG): + print_with_rank(message, prefix="SwapManager", print_level=print_level) + + def global_initialize(self): + stage = self.config.stage + stage.stage_type = SwapStageType.INIT + self.config.stage = stage + self.config.step = 0 + self.config.micro_batch_num = self.num_micro_batch_fcn() + self.config.fwd_op_layer_info = [] + self.config.bwd_op_layer_info = [] + self.register_model_hooks(self.models) + self.record_shared_memory(self.models) + self.start_time = time.time() + self.init_for_new_op_seq() + self.config.enable_profiler = True + self.config.enable_executor = False + self.config.enable_custom_record_stream = swap_policy_config.enable_custom_record_stream + self.__check_layer_param(self.model_num_layers) + swap_policy_config.logical_layer_num = ( + -1 if self.model_num_layers < 0 else (10 // self.model_num_layers + 1) * self.model_num_layers + ) + + def record_model_tensor_type(self, models): + tensors = [] + for model in models: + # MODEL + for name, param in model.named_parameters(): + tensors.append(param.data) + + record_tensor_ptr_with_types(tensors, SwapTensorType.MODEL, 0, False) + + def record_shared_memory(self, models): + if models and self.get_shared_tensors_fcn: + tensors = self.get_shared_tensors_fcn(models) + record_tensor_ptr_with_types(tensors, SwapTensorType.SHARED_MEMORY, 0, True) + + def init_for_new_op_seq(self): + self.print_with_rank("Call init_for_new_op_seq") + self.running_stage = SwapRunningStage.WARMUP_STAGE + self.swap_engine.clear_policy() + self.is_new_op_sequence = True + self.cur_warmup_step = 0 + + def step(self): + end_time = time.time() + self.config.one_step_duration = end_time - self.start_time + for swap_hook_register in self.swap_hook_registers: + swap_hook_register.reset() + self.config.micro_batch_num = self.num_micro_batch_fcn() + profiler_data_one_step = ProfilerDataOneStep( + self.config.one_step_duration, self.config.step, self.config.is_oom, self.config.enable_profiler + ) + self.swap_engine.append_profiler_data(profiler_data_one_step) + self.swap_engine.all_step_duration[self.swap_engine.step] = self.config.one_step_duration + + self.print_with_rank( + ( + f"Step: {self.config.step}, Time elapsed: {self.config.one_step_duration}, " + f"Logical layer num: {swap_policy_config.logical_layer_num}, " + f"Op num: {len(profiler_data_one_step.op_list)}, " + f"Current running stage: {self.running_stage.name}, OOM state: {self.config.is_oom}" + ), + print_level=PrintLevel.INFO, + ) + self.print_with_rank( + ("OOM swap: \n" + "\n".join(str(i) for i in profiler_data_one_step.swap_list if i.is_oom)), + print_level=PrintLevel.INFO, + ) + self.print_with_rank(f"{str(profiler_data_one_step)}") + + if self.running_stage == SwapRunningStage.WARMUP_STAGE: + if self.swap_engine.is_similar_with_policy_profiler(profiler_data_one_step): + self.cur_warmup_step += 1 + if self.cur_warmup_step == swap_policy_config.warmup_step: + self.running_stage = SwapRunningStage.SEARCHING_POLICY_STAGE + elif self.running_stage == SwapRunningStage.SEARCHING_POLICY_STAGE: + self.cur_warmup_step += 1 + if not self.swap_engine.is_similar_with_policy_profiler(profiler_data_one_step): + self.init_for_new_op_seq() + elif self.cur_warmup_step == swap_policy_config.stable_step: + self.running_stage = SwapRunningStage.STABLE_STAGE + elif self.running_stage == SwapRunningStage.STABLE_STAGE: + if self.swap_engine.forced_swap_list: + self.init_for_new_op_seq() + else: + raise RuntimeError(f"Get incorrect running_stage: {self.running_stage.name}") + + self.print_with_rank(f"Change running stage to: {self.running_stage.name}", print_level=PrintLevel.INFO) + if self.running_stage == SwapRunningStage.WARMUP_STAGE: + self.config.enable_profiler = True + self.config.enable_executor = False + elif self.running_stage == SwapRunningStage.SEARCHING_POLICY_STAGE: + self.config.enable_profiler = True + self.config.enable_executor = True + policy_result = self.swap_engine.run(profiler_data_one_step, self.is_new_op_sequence) + policy_result.set_py_swap_policy_to_cpp(self.config) + self.smart_swap_cpp.updateStep() + self.is_new_op_sequence = False + self.print_with_rank(f"Policy result:\n{policy_result}", print_level=PrintLevel.DEBUG) + elif self.running_stage == SwapRunningStage.STABLE_STAGE: + self.config.enable_profiler = False + self.config.enable_executor = True + self.smart_swap_cpp.updateStep() + else: + raise RuntimeError(f"Get incorrect running_stage: {self.running_stage.name}") + + self.print_with_rank( + ( + f"All step duration: " + f"{[(step, time) for step, time in self.swap_engine.all_step_duration.items()]}\n\n" + ), + print_level=PrintLevel.INFO, + ) + + self.config.step += 1 + self._update_config_for_step_hook(SwapStageType.INIT, 0, 0, 0) + self.start_time = time.time() + + def _update_config_for_step_hook( + self, stage_type: SwapStageType, layer_index, micro_batch_index, current_stage_op_id + ): + stage = self.config.stage + stage.stage_type = stage_type + stage.layer_index = layer_index + stage.micro_batch_index = micro_batch_index + + self.config.stage = stage + self.config.current_stage_op_id = current_stage_op_id + + def fwd_pre_hook_custom_func(self, _, fwd_idx): + self._update_config_for_step_hook(SwapStageType.FWD, 1, fwd_idx, 0) + + def bwd_pre_hook_custom_func(self, _, bwd_idx): + self._update_config_for_step_hook(SwapStageType.BWD, 1, bwd_idx, 0) + + def bwd_post_hook_custom_func(self, _, bwd_idx): + if bwd_idx == self.num_micro_batch_fcn(): + self._update_config_for_step_hook(SwapStageType.OPTIM, 0, 0, 0) + + def register_model_hooks(self, models): + if not isinstance(models, Iterable): + models = [models] + for model in models: + swap_hook_register = register_swap_hooks_to_modules(model) + swap_hook_register.register_custom_func( + self.fwd_pre_hook_custom_func, None, self.bwd_pre_hook_custom_func, self.bwd_post_hook_custom_func + ) + self.swap_hook_registers.append(swap_hook_register) + self.print_with_rank("Register model swap hooks completed.") diff --git a/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_megatron_adaptor.py b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_megatron_adaptor.py new file mode 100644 index 000000000..8d8edc3d4 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_megatron_adaptor.py @@ -0,0 +1,72 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +from functools import wraps + +from megatron.training.training import get_num_microbatches +from megatron.training import get_args + +from .swap_manager import SwapManager + + +def megatron_get_optimizer_tensors_fcn(optimizer): + results = [] + for group in optimizer.optimizer.param_groups: + amsgrad = group["amsgrad"] + for p in group["params"]: + if p.grad is None: + continue + results.append(p.data) + + state = optimizer.optimizer.state[p] + if len(state) > 0: + results.append(state["exp_avg"]) + results.append(state["exp_avg_sq"]) + if amsgrad: + results.append(state["max_exp_avg_sq"]) + + return results + + +def megatron_get_shared_tensors_fcn(models): + results = [] + for model in models: + # SHARED_MEMORY + if model.buffers is not None: + for buffer in model.buffers: + if buffer.grad_data is not None: + results.append(buffer.grad_data) + if buffer.param_data is not None: + results.append(buffer.param_data) + return results + + +def MegatronSwapManager(train_step_args, cmd_args): + """ + Adapter to the megatron's train_step function. + train_step_args is from the arguments of train_step. + cmd_args is obtained from get_args() from megatron. + """ + if len(train_step_args) < 4: + raise ValueError("The length of arguments should be more than 4") + model = train_step_args[2] + optimizer = train_step_args[3] + num_layers = cmd_args.num_layers + return SwapManager( + get_num_microbatches, + model, + cmd_args.num_layers, + optimizer=optimizer, + get_optimizer_tensors_fcn=megatron_get_optimizer_tensors_fcn, + get_shared_tensors_fcn=megatron_get_shared_tensors_fcn, + ) + + +def train_step_wrapper(train_step): + @wraps(train_step) + def wrapper(*args, **kwargs): + args_ = get_args() + manager = MegatronSwapManager(args, args_) + ret = train_step(*args, **kwargs) + manager.step() + return ret + + return wrapper diff --git a/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_policy_config.py b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_policy_config.py new file mode 100644 index 000000000..b0efce52c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_policy_config.py @@ -0,0 +1,49 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +class SwapPolicyConfig: + def __init__(self): + # utils + self.rank = 0 # 获取当前rank + + self.save_policy = False + self.save_profiler_data = False + + self.print_level = 1 # 设置print级别 DEBUG=0, INFO=1, NONE=2 + self.print_rank = 0 # 设置打印信息的卡, -1打印所有卡 + self.output_root_path = "./swap_output" + + # 执行 + self.warmup_step = 2 # 多少步之后进入SEARCHING_POLICY_STAGE + self.stable_step = 10 # 多少步之后进入STABLE_STAGE + + self.op_diff_thresh = 0.05 + self.tensor_size_thresh = 2**31 - 1 + + self.enable_custom_record_stream = True + self.free_stage_delay = 4 # 表示将swap out任务的内存延后N个stage强制释放 + self.swap_in_free_stage_delay = 2 # 表示将swap in任务的内存延后N个stage强制释放 + + # 带宽设置 + self.D2H_bandwidth = 64 / 2.5 * 1000 + self.H2D_bandwidth = 64 / 2.5 * 1000 + + # 内存目标设置 + # OOM场景: 降低到 device最大内存 - redundant_memory 内存目标 + # 如果后续迭代中仍触发OOM swap, target_memory 将每步减少 adjust_memory 大小 + # 非OOM场景: target_mode = True 指降低至 target_memory 内存目标 + # target_mode = False 指仅降低 reduction_memory 内存目标 + self.target_mode = False + self.reduction_memory = 3 * 1024 * 1024 * 1024 # 手动设置目标内存 + self.target_memory = 40 * 1024 * 1024 * 1024 # 手动设置目标内存 + self.tensor_size_filter = 20 * 1024 * 1024 # 设置tensor size的过滤, 小于20MB的不会被选为candidate + + self.redundant_memory = 2 * 1024 * 1024 * 1024 + self.size_coverage_weight = 2 # 以coverage weight为1, size比之的比例 + self.adjust_memory = 300 * 1024 * 1024 # 自动化调整 redundant_memory + self.adjust_step_duration = 1 # 自动化调整duration time, 将得到的step duration乘以这个数值, 并与历史的取最小值 + self.adjust_size_coverage_weight = 0 # size_coverage_weight 每次递增这个数值 + + def __str__(self): + return str(self.__dict__) + + +swap_policy_config = SwapPolicyConfig() diff --git a/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_utils.py b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_utils.py new file mode 100644 index 000000000..ede692962 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/memory/smart_swap/swap_utils.py @@ -0,0 +1,39 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import time +from enum import Enum + +from .swap_policy_config import swap_policy_config + + +class PrintLevel(Enum): + DEBUG = 0 + INFO = 1 + NONE = 2 + + +def print_with_rank(message, prefix="", print_level=PrintLevel.DEBUG): + if swap_policy_config.print_level > print_level.value: + return + + rank = swap_policy_config.rank + print_rank = swap_policy_config.print_rank + if print_rank == -1: + print(f"[{print_level.name}] rank[{rank}] [{prefix}]: {message}", flush=True) + else: + if rank == print_rank: + print(f"[{print_level.name}] rank[{rank}] [{prefix}]: {message}", flush=True) + + +def timer(func): + def wrapper(*args, **kwargs): + start_time = time.time() + result = func(*args, **kwargs) + end_time = time.time() + print_with_rank( + f"Function {func.__name__} takes {end_time - start_time} seconds to execute.", + prefix="timer", + print_level=PrintLevel.INFO, + ) + return result + + return wrapper diff --git a/model/train/yoco_moe/mindspeed/core/mindspeed_parallel_group.py b/model/train/yoco_moe/mindspeed/core/mindspeed_parallel_group.py new file mode 100644 index 000000000..b60a1f91a --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/mindspeed_parallel_group.py @@ -0,0 +1,75 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import abc + +import torch +import torch.distributed +from mindspeed.core.simple_parallel_cfg import SimpleParallelCfg + + +class MindspeedParallelGroup: + def __init__( + self, + parallel_cfg: SimpleParallelCfg = None, + pg_name: str = None, + overlap_gp_name: str = None, + nccl_comm_cfgs=None, + ): + """Parallel group interface, any type of parallelism class can implement this class. + + :param parallel_cfg: Parallel configuration. + :param pg_name: parallel process group name. + :param overlap_gp_name: overlap process name, for the send/rcv parallel. + :param nccl_comm_cfgs: + """ + self._pg_name = pg_name + self._overlap_pg_name = overlap_gp_name + self._group, self._global_ranks, self._overlap_group = self.init_group( + parallel_cfg, pg_name, overlap_gp_name, nccl_comm_cfgs + ) + + @staticmethod + @abc.abstractmethod + def init_group( + parallel_cfg: SimpleParallelCfg, + pg_name: str, + overlap_gp_name: str = None, + nccl_comm_cfgs=None, + ): + raise NotImplementedError + + @property + def group(self): + return self._group + + @property + def overlap_group(self): + return self._overlap_group + + @property + def global_ranks(self): + return self._global_ranks + + def get_parallel_rank(self): + if torch.distributed.is_available() and torch.distributed.is_initialized(): + return torch.distributed.get_rank(group=self.group) + else: + raise AssertionError("The distribution is not available or not initialized.") + + def get_parallel_group_world_size(self): + if torch.distributed.is_available() and torch.distributed.is_initialized(): + return torch.distributed.get_world_size(group=self.group) + else: + return 0 diff --git a/model/train/yoco_moe/mindspeed/core/models/common/embeddings/language_model_embedding.py b/model/train/yoco_moe/mindspeed/core/models/common/embeddings/language_model_embedding.py new file mode 100644 index 000000000..58aa7a99a --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/models/common/embeddings/language_model_embedding.py @@ -0,0 +1,34 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import wraps +from megatron.training import get_args +from mindspeed.core.tensor_parallel.comm_autograd_function import ( + auto_grad_scatter_along_first_dim_then_last_dim, +) +from mindspeed.core.tensor_parallel.comm_group_api import TPXCollectiveComm +from mindspeed.core.tensor_parallel.comm_group_api import TPYCollectiveComm + + +def language_model_embedding_forward_wrapper(forward): + @wraps(forward) + def wrapper(self, *args, **kwargs): + encoder_input = forward(self, *args, **kwargs) + if get_args().tp_2d: + encoder_input = auto_grad_scatter_along_first_dim_then_last_dim( + encoder_input, TPXCollectiveComm, TPYCollectiveComm + ) + return encoder_input + return wrapper diff --git a/model/train/yoco_moe/mindspeed/core/models/common/embeddings/rotary_pos_embedding.py b/model/train/yoco_moe/mindspeed/core/models/common/embeddings/rotary_pos_embedding.py new file mode 100644 index 000000000..ee99257a2 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/models/common/embeddings/rotary_pos_embedding.py @@ -0,0 +1,386 @@ +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import math +import torch +from torch import Tensor +from functools import wraps + +from megatron.core.models.common.embeddings.rotary_pos_embedding import _rotate_half +from megatron.training import get_args +from megatron.core import parallel_state +from mindspeed.utils import get_position_ids, generate_rearrange_idx_tensor +from mindspeed.ops.npu_rotary_position_embedding import npu_rotary_position_embedding + +from mindspeed.core.parallel_state import (get_context_parallel_for_hybrid_ulysses_world_size, + get_context_parallel_for_hybrid_ulysses_rank, + get_context_parallel_for_hybrid_ring_world_size, + get_context_parallel_for_hybrid_ring_rank) +from mindspeed.core.context_parallel.utils import get_remapped_seq_order +from mindspeed.core.tensor_parallel_y_union_cp import TensorParallelYUnionCP + + +def yarn_find_correction_dim( + num_rotations, dim, base=10000, max_position_embeddings=2048 +): + return (dim * math.log(max_position_embeddings / (num_rotations * 2 * math.pi))) / ( + 2 * math.log(base) + ) + + +def yarn_find_correction_range( + low_rot, high_rot, dim, base=10000, max_position_embeddings=2048 +): + low = math.floor( + yarn_find_correction_dim(low_rot, dim, base, max_position_embeddings) + ) + high = math.ceil( + yarn_find_correction_dim(high_rot, dim, base, max_position_embeddings) + ) + return max(low, 0), min(high, dim - 1) # Clamp values just in case + + +def yarn_get_mscale(scale=1, mscale=1): + if scale <= 1: + return 1.0 + return 0.1 * mscale * math.log(scale) + 1.0 + + +def yarn_linear_ramp_mask(min_, max_, dim): + if min_ == max_: + max_ += 0.001 # Prevent singularity + + linear_func = (torch.arange(dim, dtype=torch.float32) - min_) / (max_ - min_) + ramp_func = torch.clamp(linear_func, 0, 1) + return ramp_func + + +def apply_rotary_pos_emb_bshd(t: Tensor, freqs: Tensor, rotary_interleaved: bool = False) -> Tensor: + args = get_args() + _mscale = 1.0 + if args.rope_scaling_type == "yarn": + _mscale = float( + yarn_get_mscale(args.rope_scaling_factor, args.rope_scaling_mscale) + / yarn_get_mscale(args.rope_scaling_factor, args.rope_scaling_mscale_all_dim) + ) + + rot_dim = freqs.shape[-1] + t, t_pass = t[..., :rot_dim], t[..., rot_dim:] + cos_ = (torch.cos(freqs) * _mscale).to(t.dtype) + sin_ = (torch.sin(freqs) * _mscale).to(t.dtype) + + if args.use_fused_rotary_pos_emb: + mode = 1 if rotary_interleaved else 0 + t = npu_rotary_position_embedding(t.contiguous(), cos_, sin_, mode).to(t.dtype) + else: + t = (t * cos_) + (_rotate_half(t, rotary_interleaved) * sin_) + + return torch.cat((t, t_pass), dim=-1) + + +def apply_yarn_scaling(freqs: torch.Tensor): + args = get_args() + + scaling_factor = args.rope_scaling_factor + dim = args.qk_rope_head_dim if args.multi_head_latent_attention else (args.hidden_size // args.num_attention_heads) + rotary_ratio = args.rotary_base ** (torch.arange(0, dim, 2, dtype=torch.float32, device=freqs.device) / dim) + freq_extra = 1.0 / rotary_ratio + freq_inter = 1.0 / (scaling_factor * rotary_ratio) + low, high = yarn_find_correction_range( + args.rope_scaling_beta_fast, + args.rope_scaling_beta_slow, + dim, + args.rotary_base, + args.rope_scaling_original_max_position_embeddings, + ) + + inv_freq_mask = 1.0 - yarn_linear_ramp_mask(low, high, dim // 2).to( + device=freqs.device, dtype=torch.float32 + ) + + inv_freq = freq_inter * (1 - inv_freq_mask) + freq_extra * inv_freq_mask + + return inv_freq + + +def rotary_embedding_init_wrapper(fn): + @wraps(fn) + def wrapper(self, *args, **kwargs): + _args = get_args() + if _args.rotary_base and ("rotary_base" not in kwargs or kwargs["rotary_base"] == 10000): # default value + kwargs["rotary_base"] = _args.rotary_base + fn(self, *args, **kwargs) + if hasattr(_args, "rope_scaling_type") and _args.rope_scaling_type == "yarn": + self.inv_freq = apply_yarn_scaling(self.inv_freq) + + return wrapper + + +def rotary_forward(self, max_seq_len: int, offset: int = 0) -> Tensor: + """Forward pass of RoPE embedding. + + Args: + max_seq_len (int): Maximum size of sequence + offset (int, optional): _description_. Defaults to 0. + + Returns: + Tensor: Embeddings after applying RoPE. + """ + if self.inv_freq.device.type == 'cpu': + # move `inv_freq` to GPU once at the first micro-batch forward pass + self.inv_freq = self.inv_freq.to(device=torch.cuda.current_device()) + seq = ( + torch.arange(max_seq_len, device=self.inv_freq.device, dtype=self.inv_freq.dtype) + + offset + ) + + if self.seq_len_interpolation_factor is not None: + seq *= 1 / self.seq_len_interpolation_factor + + freqs = torch.outer(seq, self.inv_freq) + # first part even vector components, second part odd vector components, + # 2 * dim in dimension size + if not self.rotary_interleaved: + emb = torch.cat((freqs, freqs), dim=-1) + else: + emb = torch.stack((freqs.view(-1, 1), freqs.view(-1, 1)), dim=-1).view( + freqs.shape[0], -1 + ) + # emb [seq_length, .., dim] + emb = emb[:, None, None, :] + + return emb + + +def apply_rotary_pos_emb_thd( + t: Tensor, cu_seqlens: Tensor, freqs: Tensor, rotary_interleaved: bool = False +) -> Tensor: + + """A baseline implementation of applying RoPE for `thd` format. + + Args: + t (Tensor): Input tensor T is of shape [t, h, d] + cu_seqlens(Tensor): Cumulative sum of sequence lengths in a batch for `t`, + with shape [b + 1] and dtype torch.int32. + freqs (Tensor): Rotary Positional embedding tensor freq is of shape [max_s, 1, 1, d] + + Returns: + Tensor: Shape [t, h, d]. The input tensor after applying RoPE. + """ + args = get_args() + + position_ids = cu_seqlens.position_ids + block_size, bsz = position_ids.shape + freqs = freqs[position_ids.view(-1)].reshape(block_size, bsz, 1, -1) + + return apply_rotary_pos_emb_bshd(t, freqs, rotary_interleaved) + + +def get_pos_emb_on_this_cp_rank(pos_emb, seq_dim): + args = get_args() + + cp_expanded_by_2d_tp = args.tp_y > 1 + if args.context_parallel_algo == 'megatron_cp_algo': + if args.attention_mask_type == 'general': + pos_emb = _get_pos_emb_on_this_cp_rank_in_ulysses_cp(pos_emb, seq_dim) + elif cp_expanded_by_2d_tp: + pos_emb = _get_pos_emb_on_this_tp_y_cp_rank_in_megatron_cp(pos_emb, seq_dim) + elif args.reset_position_ids and args.attention_mask_type == 'causal': + return pos_emb + else: + pos_emb = _get_pos_emb_on_this_cp_rank_in_megatron_cp(pos_emb, seq_dim) + elif args.context_parallel_algo == 'ulysses_cp_algo': + if cp_expanded_by_2d_tp: + pos_emb = _get_pos_emb_on_this_tp_y_cp_rank_in_ulysses_cp(pos_emb, seq_dim) + else: + pos_emb = _get_pos_emb_on_this_cp_rank_in_ulysses_cp(pos_emb, seq_dim) + elif args.context_parallel_algo == 'hybrid_cp_algo': + if args.attention_mask_type == 'general': + pos_emb = _get_pos_emb_on_this_cp_rank_in_hybrid_cp_general(pos_emb, seq_dim) + else: + pos_emb = _get_pos_emb_on_this_cp_rank_in_hybrid_cp(pos_emb, seq_dim) + elif args.context_parallel_algo == 'adaptive_cp_algo': + pos_emb = _get_pos_emb_on_this_cp_rank_in_adaptive_cp(pos_emb, seq_dim) + elif args.context_parallel_algo == 'hybrid_adaptive_cp_algo': + pos_emb = _get_pos_emb_on_this_cp_rank_in_hybrid_adaptive_cp(pos_emb, seq_dim) + return pos_emb + + +def _get_pos_emb_on_this_cp_rank_in_megatron_cp(pos_emb, seq_dim): + cp_size = parallel_state.get_context_parallel_world_size() + cp_rank = parallel_state.get_context_parallel_rank() + cp_idx = torch.tensor( + [cp_rank, (2 * cp_size - cp_rank - 1)], device="cpu", pin_memory=True + ).cuda(non_blocking=True) + pos_emb = pos_emb.view( + *pos_emb.shape[:seq_dim], 2 * cp_size, -1, *pos_emb.shape[(seq_dim + 1) :] + ) + pos_emb = pos_emb.index_select(seq_dim, cp_idx) + pos_emb = pos_emb.view(*pos_emb.shape[:seq_dim], -1, *pos_emb.shape[(seq_dim + 2) :]) + return pos_emb + + +def _get_pos_emb_on_this_tp_y_cp_rank_in_megatron_cp(pos_emb, seq_dim): + origin_pos_emb_shape = pos_emb.shape + tp_y_cp_group = TensorParallelYUnionCP() + tp_y_cp_size = tp_y_cp_group.get_parallel_group_world_size() + # [s, 1, 1, head_dim] ---> [2*tp_y_cp_size, s/(2*tp_y_cp_size), 1, 1, head_dim] + pos_emb = pos_emb.view( + *pos_emb.shape[:seq_dim], 2 * tp_y_cp_size, -1, *pos_emb.shape[(seq_dim + 1) :] + ) + rearrange_idx_tensor = generate_rearrange_idx_tensor(tp_y_cp_size) + + # Reorder pos embedding according dataset handling. + # selected res shape: [2 * tp_y_cp_size, s / (2 * tp_y_cp_size), 1, 1, head_dim] + pos_emb = pos_emb.index_select(seq_dim, index=rearrange_idx_tensor) + pos_emb = pos_emb.view(*origin_pos_emb_shape) + # viewed res shape: [tp_y_cp_sz, s/tp_y_cp_sz, 1, head_dim] + pos_emb = pos_emb.view( + *pos_emb.shape[0:seq_dim], + tp_y_cp_size, + pos_emb.shape[seq_dim] // tp_y_cp_size, + *pos_emb.shape[(seq_dim + 1):], + ) + # cur_rank_pos_emb shape: [s/cp, 1, 1, head_dim] + tp_y_cp_rank = tp_y_cp_group.get_parallel_rank() + cur_rank_pos_emb = pos_emb[tp_y_cp_rank].squeeze(axis=0) + return cur_rank_pos_emb + + +def _get_pos_emb_on_this_cp_rank_in_ulysses_cp(pos_emb, seq_dim): + cp_size = parallel_state.get_context_parallel_world_size() + cp_rank = parallel_state.get_context_parallel_rank() + pos_emb = pos_emb.chunk(cp_size, dim=seq_dim)[cp_rank] + + return pos_emb + + +def _get_pos_emb_on_this_cp_rank_in_hybrid_cp(pos_emb, seq_dim): + u_size = get_context_parallel_for_hybrid_ulysses_world_size() + r_size = get_context_parallel_for_hybrid_ring_world_size() + u_rank = get_context_parallel_for_hybrid_ulysses_rank() + r_rank = get_context_parallel_for_hybrid_ring_rank() + + cp_idx = torch.tensor( + [r_rank, (2 * r_size - r_rank - 1)], device="cpu", pin_memory=True + ).cuda(non_blocking=True) + pos_emb = pos_emb.view( + *pos_emb.shape[:seq_dim], 2 * r_size, -1, *pos_emb.shape[(seq_dim + 1) :] + ) + pos_emb = pos_emb.index_select(seq_dim, cp_idx) + pos_emb = pos_emb.view(*pos_emb.shape[:seq_dim], -1, *pos_emb.shape[(seq_dim + 2) :]) + + pos_emb = pos_emb.chunk(u_size, dim=seq_dim)[u_rank] + + return pos_emb + + +def _get_pos_emb_on_this_cp_rank_in_hybrid_cp_general(pos_emb, seq_dim): + u_size = get_context_parallel_for_hybrid_ulysses_world_size() + r_size = get_context_parallel_for_hybrid_ring_world_size() + u_rank = get_context_parallel_for_hybrid_ulysses_rank() + r_rank = get_context_parallel_for_hybrid_ring_rank() + + pos_emb = pos_emb.chunk(r_size, dim=seq_dim)[r_rank] + pos_emb = pos_emb.chunk(u_size, dim=seq_dim)[u_rank] + + return pos_emb + + +def _get_pos_emb_on_this_cp_rank_in_adaptive_cp(pos_emd, seq_dim): + cp_size = parallel_state.get_context_parallel_world_size() + cp_rank = parallel_state.get_context_parallel_rank() + + remapped_seq_order = get_remapped_seq_order() + if remapped_seq_order is not None: + per = pos_emd.shape[seq_dim] // cp_size + index = torch.tensor(remapped_seq_order[cp_rank * per:(cp_rank + 1) * per], dtype=torch.int, + device=pos_emd.device) + pos_emd = pos_emd.index_select(seq_dim, index) + + return pos_emd + + +def _get_pos_emb_on_this_cp_rank_in_hybrid_adaptive_cp(pos_emd, seq_dim): + ulys_size = get_context_parallel_for_hybrid_ulysses_world_size() + adap_size = get_context_parallel_for_hybrid_ring_world_size() + ulys_rank = get_context_parallel_for_hybrid_ulysses_rank() + adap_rank = get_context_parallel_for_hybrid_ring_rank() + + remapped_seq_order = get_remapped_seq_order() + if remapped_seq_order is not None: + per = pos_emd.shape[seq_dim] // adap_size // ulys_size + which_per = adap_rank * ulys_size + ulys_rank + index = torch.tensor(remapped_seq_order[which_per * per:(which_per + 1) * per], dtype=torch.int, + device=pos_emd.device) + pos_emd = pos_emd.index_select(seq_dim, index) + + return pos_emd + + +def rotary_embedding_forward(self, max_seq_len: int, offset: int = 0) -> Tensor: + """Forward pass of RoPE embedding. + + Args: + max_seq_len (int): Maximum size of sequence + offset (int, optional): _description_. Defaults to 0. + + Returns: + Tensor: Embeddings after applying RoPE. + """ + seq = ( + torch.arange(max_seq_len, device=self.inv_freq.device, dtype=self.inv_freq.dtype) + + offset + ) + + if self.seq_len_interpolation_factor is not None: + seq *= 1 / self.seq_len_interpolation_factor + + freqs = torch.outer(seq, self.inv_freq) + # first part even vector components, second part odd vector components, + # 2 * dim in dimension size + if not self.rotary_interleaved: + emb = torch.cat((freqs, freqs), dim=-1) + else: + emb = torch.stack((freqs.view(-1, 1), freqs.view(-1, 1)), dim=-1).view( + freqs.shape[0], -1 + ) + # emb [seq_length, .., dim] + emb = emb[:, None, None, :] + global_args = get_args() + cp = global_args.context_parallel_size + if global_args.tp_2d: + tp_y_cp_sz = cp * global_args.tp_y + else: + tp_y_cp_sz = cp + if tp_y_cp_sz > 1: + # slice rotary_pos_emb along sequence dimension and select the parition of the current CP rank + emb = get_pos_emb_on_this_cp_rank(emb, 0) + return emb + + +def rotary_embedding_forward_wrapper(fn): + @wraps(fn) + def wrapper(self, max_seq_len: int, offset: int = 0): + return rotary_embedding_forward(self, max_seq_len, offset) + + return wrapper + + +def _get_pos_emb_on_this_tp_y_cp_rank_in_ulysses_cp(pos_emb, seq_dim): + tp_y_cp_group = TensorParallelYUnionCP() + tp_y_cp_size = tp_y_cp_group.get_parallel_group_world_size() + + cp_rank = tp_y_cp_group.get_parallel_rank() + pos_emb = pos_emb.chunk(tp_y_cp_size, dim=seq_dim)[cp_rank] + return pos_emb + + +def rotary_embedding_get_rotary_seq_len_wrapper(fn): + @wraps(fn) + def wrapper(self, inference_params, transformer, transformer_input, transformer_config,): + rotary_seq_len = fn(self, inference_params, transformer, transformer_input, transformer_config,) + global_args = get_args() + if global_args.tp_2d: + rotary_seq_len *= global_args.tp_x + return rotary_seq_len + return wrapper diff --git a/model/train/yoco_moe/mindspeed/core/models/gpt/gpt_layer_specs.py b/model/train/yoco_moe/mindspeed/core/models/gpt/gpt_layer_specs.py new file mode 100644 index 000000000..e9f5f16cc --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/models/gpt/gpt_layer_specs.py @@ -0,0 +1,101 @@ +from functools import wraps +from megatron.core.tensor_parallel import ColumnParallelLinear, RowParallelLinear +from megatron.core.transformer.attention import SelfAttentionSubmodules +from megatron.core.transformer.dot_product_attention import DotProductAttention +from megatron.core.transformer.identity_op import IdentityOp +from megatron.core.transformer.mlp import MLP, MLPSubmodules +from megatron.training import get_args +from megatron.core.transformer.moe.moe_layer import MoELayer +from megatron.core.transformer.custom_layers.transformer_engine import TENorm +from megatron.core.transformer.spec_utils import ModuleSpec +from mindspeed.core.transformer.transformer import norm_recompute_forward +from mindspeed.core.transformer.transformer_block import NoopTransformerLayer +from mindspeed.model.transformer import should_recompute_norm +from mindspeed.core.transformer.moe.tp_2d.moe_layer_2d import MoELayer2D +import types + + +def get_gpt_layer_local_spec_wrapper(fn): + @wraps(fn) + def wrapper(num_experts: int = None, moe_grouped_gemm: bool = False, qk_layernorm: bool = False): + res = fn(num_experts, moe_grouped_gemm, qk_layernorm) + args = get_args() + if args.multi_head_latent_attention: + res.submodules.self_attention.submodules = SelfAttentionSubmodules( + linear_qkv=ColumnParallelLinear, + core_attention=DotProductAttention, + linear_proj=RowParallelLinear, + q_layernorm=TENorm if args.qk_layernorm else IdentityOp, + k_layernorm=TENorm if args.qk_layernorm else IdentityOp, + linear_qb=ColumnParallelLinear, + linear_kvb=ColumnParallelLinear + ) + else: + if qk_layernorm: + res.submodules.self_attention.submodules.q_layernorm = TENorm + res.submodules.self_attention.submodules.k_layernorm = TENorm + res.submodules.input_layernorm = TENorm + res.submodules.pre_mlp_layernorm = TENorm + return res + + return wrapper + + +def build_layers_wrapper(fn, column_forward, row_forward): + @wraps(fn) + def wrapper(self, *args, **kwargs): + fn(self, *args, **kwargs) + for layer in self.layers: + if isinstance(getattr(layer, 'mlp', None), MoELayer): + for local_expert in layer.mlp.experts.local_experts: + local_expert.linear_fc1.forward = types.MethodType(column_forward, local_expert.linear_fc1) + local_expert.linear_fc2.forward = types.MethodType(row_forward, local_expert.linear_fc2) + return wrapper + + +def build_norm_recompute_layer_wrapper(fn): + @wraps(fn) + def wrapper(self, *args, **kwargs): + fn(self, *args, **kwargs) + for layer in self.layers: + if isinstance(layer, NoopTransformerLayer): + continue + if should_recompute_norm(layer): + layer.forward = types.MethodType(norm_recompute_forward, layer) + return wrapper + + +def get_mlp_module_spec_wrapper(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + use_te, num_experts, moe_grouped_gemm = kwargs['use_te'], kwargs['num_experts'], kwargs['moe_grouped_gemm'] + if num_experts is None: + # Dense MLP w/ or w/o TE modules. + return ModuleSpec( + module=MLP, + submodules=MLPSubmodules( + linear_fc1=TELayerNormColumnParallelLinear if use_te else ColumnParallelLinear, + linear_fc2=TERowParallelLinear if use_te else RowParallelLinear, + ), + ) + else: + # Mixture of experts with modules in megatron core. + if use_te and moe_grouped_gemm: + linear_fc1 = TEColumnParallelGroupedLinear + linear_fc2 = TERowParallelGroupedLinear + else: + linear_fc1 = ColumnParallelLinear + linear_fc2 = RowParallelLinear + + use_te_grouped_gemm = use_te and TEColumnParallelGroupedLinear is not None + + return ModuleSpec( + module=MoELayer2D, + submodules=( + MLPSubmodules(linear_fc1=linear_fc1, linear_fc2=linear_fc2) + if not moe_grouped_gemm or use_te_grouped_gemm + else None + ), + ) + + return wrapper diff --git a/model/train/yoco_moe/mindspeed/core/models/gpt/gpt_model.py b/model/train/yoco_moe/mindspeed/core/models/gpt/gpt_model.py new file mode 100644 index 000000000..642d21c21 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/models/gpt/gpt_model.py @@ -0,0 +1,28 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +from functools import wraps +from torch import Tensor +from megatron.core.packed_seq_params import PackedSeqParams +from mindspeed.utils import get_actual_seq_len, compute_qkv_index, get_position_ids + + +def gpt_forward_wrapper(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + actual_seq_len = get_actual_seq_len() + + packed_seq_params = PackedSeqParams( + cu_seqlens_q=actual_seq_len, + cu_seqlens_kv=actual_seq_len + ) + + q_index, kv_index = compute_qkv_index(actual_seq_len.clone().tolist()) + packed_seq_params.q_index = q_index + packed_seq_params.kv_index = kv_index + packed_seq_params.position_ids = get_position_ids() + + kwargs['packed_seq_params'] = packed_seq_params + return fn(*args, **kwargs) + + return wrapper \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/parallel_state.py b/model/train/yoco_moe/mindspeed/core/parallel_state.py new file mode 100644 index 000000000..f83afae0e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/parallel_state.py @@ -0,0 +1,1223 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Expert parallel groups.""" +import os +from functools import wraps +from typing import Optional +from datetime import timedelta + +import torch +import torch_npu +import megatron +from megatron.core.parallel_state import get_context_parallel_world_size, get_nccl_options + +from mindspeed.core.simple_parallel_cfg import SimpleParallelCfg +from mindspeed.core.tensor_parallel_y_union_cp import TensorParallelYUnionCP + +_CONTEXT_PARALLEL_GROUP_FOR_SEND_RECV_OVERLAP = None +_CONTEXT_PARALLEL_GROUP_FOR_HYBRID_ULYSSES = None +_CONTEXT_PARALLEL_GROUP_FOR_HYBRID_RING = None +_PIPELINE_MODEL_PARALLEL_GROUP_FOR_NEW_STREAM = None + +_CONTEXT_PARALLEL_RANKS_FOR_HYBRID_ULYSSES = None +_CONTEXT_PARALLEL_RANKS_FOR_HYBRID_RING = None + +_CONTEXT_PARALLEL_RANKS_FOR_RING_INTRA_WINDOW = None +_CONTEXT_PARALLEL_RANKS_FOR_RING_INTER_WINDOW_KV = None +_CONTEXT_PARALLEL_RANKS_FOR_RING_INTER_WINDOW_DKV = None +_CONTEXT_PARALLEL_GROUP_FOR_RING_INTRA_WINDOW = None +_CONTEXT_PARALLEL_GROUP_FOR_RING_INTRA_WINDOW_SEND_RECV_OVERLAP = None + +_TP_X_EP_GROUP = None +_TP_X_EP_GROUP_WORLD_SIZE = None +_TP_X_EP_GROUP_RANK = None +_TP_X_PARALLEL_RING_RANKS = None +_TP_Y_PARALLEL_RING_RANKS = None + +_TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1 = None +_TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2 = None +_TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM1 = None +_TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM2 = None +_TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND1_DIM1 = None +_TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND1_DIM2 = None +_TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND2_DIM1 = None +_TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND2_DIM2 = None +_TP_X_SD_RCV_OVERLAP_GROUP = None +_TP_Y_SD_RCV_OVERLAP_GROUP = None +_TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1_RANK = None +_TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2_RANK = None +_TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1_WORLD_SIZE = None +_TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2_WORLD_SIZE = None + +_TENSOR_AND_CONTEXT_PARALLEL_GROUP = None +_TENSOR_AND_CONTEXT_PARALLEL_GLOBAL_RANKS = None + +_HCCL_GROUP_BUFFER = None + + +def parse_hccl_buffer_string(hccl_group_buffer): + global _HCCL_GROUP_BUFFER + + if hccl_group_buffer == None: + return + + allowed_keys = ["dp", "dp_cp", "cp", "mp", "mp_exp", "tp", "pp", "embd", "tp_dp_cp", + "tp_dp", "tp_cp", "tp_exp", "exp", "dp_modulo_exp", "pp_new_stream", + "cp2", "cp_ulysses", "cp_ring", "cp_ring_intra", "cp_ring_intra_overlap", "nd1_dim1", "ag_x_sd_rcv_overlap", + "nd1_dim2", "ag_y_sd_rcv_overlap", "nd2_dim1", "nd2_dim2"] + + parts = hccl_group_buffer.split(';') + for part in parts: + key_value = part.split(':') + if len(key_value) == 2: + key = key_value[0].strip() + value_str = key_value[1].strip() + key = key.replace(' ', '') + value_str = value_str.replace(' ', '') + if key in allowed_keys: + try: + value = int(value_str) + if value <= 0: + raise RuntimeError(f"Value {value} must be greater than 0") + _HCCL_GROUP_BUFFER[key] = value + except ValueError: + raise RuntimeError(f"{value_str} is not a valid positive integer") + else: + raise RuntimeError(f"Key {key} is not allowed") + else: + raise RuntimeError("The str of hccl-group-buffer is not valid") + + +def hccl_buffer_auto_adaptive(): + import math + from megatron.training import get_args + args = get_args() + + seq_length = args.seq_length + micro_batch_size = args.micro_batch_size + hidden_size = args.hidden_size + + context_parallel_size = args.context_parallel_size + tensor_model_parallel_size = args.tensor_model_parallel_size + expert_model_parallel_size = args.expert_model_parallel_size + + moe_router_topk = args.moe_router_topk + moe_token_dispatcher_type = args.moe_token_dispatcher_type + + context_parallel_algo = args.context_parallel_algo + num_attention_heads = args.num_attention_heads + group_query_attention = args.group_query_attention + + global _HCCL_GROUP_BUFFER + #The DP group, DP-CP group, and DP-EP group .Here, we take the default value of 200M. + + #Calculation of the maximum communication volume of the TP group. + if moe_token_dispatcher_type is not None and moe_token_dispatcher_type == 'alltoall': + #No MOE + No SP, AllReduce MaxComm: S/CP * B * H * 2;No MOE + SP, AllGather MaxComm: S/CP * B * H + hccl_tp_buffer_size_mlp = 2 * math.ceil(seq_length / context_parallel_size * micro_batch_size * hidden_size / 1024 / 1024) + if args.sequence_parallel: + _HCCL_GROUP_BUFFER['tp'] = hccl_tp_buffer_size_mlp + else: + _HCCL_GROUP_BUFFER['tp'] = hccl_tp_buffer_size_mlp * 2 + #MOE and AlltoAll MaxComm: (S/CP/TP * B * H * topK). + if args.hccl_ep_group_buffer_adaptive_factor > 0: + hccl_tp_buffer_size_moe = 2 * math.ceil(args.hccl_ep_group_buffer_adaptive_factor * seq_length / context_parallel_size / tensor_model_parallel_size * micro_batch_size * hidden_size / 1024 / 1024 * moe_router_topk) + else: + hccl_tp_buffer_size_moe = 200 + _HCCL_GROUP_BUFFER['tp'] = max(hccl_tp_buffer_size_moe, _HCCL_GROUP_BUFFER['tp']) + else: + #MOE + SP, AllReduce MaxComm: S/CP * B * H * 2;No MOE + SP, AllGather MaxComm: S/CP * B * H + hccl_tp_buffer_size_mlp = 2 * math.ceil(seq_length / context_parallel_size * micro_batch_size * hidden_size / 1024 / 1024) + if args.sequence_parallel: + _HCCL_GROUP_BUFFER['tp'] = hccl_tp_buffer_size_mlp + else: + _HCCL_GROUP_BUFFER['tp'] = hccl_tp_buffer_size_mlp * 2 + + #Calculation of the maximum communication volume of the PP group. + #P2P MaxComm::S/CP/TP * B *H + if args.sequence_parallel: + hccl_pp_buffer_size = 2 * math.ceil(seq_length / context_parallel_size / tensor_model_parallel_size * micro_batch_size * hidden_size / 1024 / 1024) + else: + hccl_pp_buffer_size = 2 * math.ceil(seq_length / context_parallel_size * micro_batch_size * hidden_size / 1024 / 1024) + _HCCL_GROUP_BUFFER['pp'] = hccl_pp_buffer_size + _HCCL_GROUP_BUFFER['pp_new_stream'] = hccl_pp_buffer_size + + #MP & MP-EXP groups for optimizer, based on num of zero gradients and max grad_norm. Just set a constant (default 10M). + #It won't be used after the distributed optimizer is enabled. + _HCCL_GROUP_BUFFER['mp'] = 10 + _HCCL_GROUP_BUFFER['mp_exp'] = 10 + + #Calculation of the maximum communication volume of the EP group. + #Moe of alltoall, MaxComm:S/CP/TP * B * H * Topk + if args.hccl_ep_group_buffer_adaptive_factor > 0: + hccl_ep_buffer_size = 2 * math.ceil(seq_length / context_parallel_size / tensor_model_parallel_size * micro_batch_size * hidden_size / 1024 / 1024 * moe_router_topk) + else: + hccl_ep_buffer_size = 200 + _HCCL_GROUP_BUFFER['exp'] = hccl_ep_buffer_size + + #Calculation of the maximum communication volume of the EP-TP group. + #Moe of allgather, MaxComm:S/CP/TP * B * H * EP * TP + #Moe of alltoall + moe-tp-extend-ep , MaxComm:S/CP/TP * B * H * topK + if moe_token_dispatcher_type is not None and moe_token_dispatcher_type == 'allgather': + if args.hccl_ep_group_buffer_adaptive_factor > 0: + hccl_tp_ep_buffer_size = 2 * math.ceil(args.hccl_ep_group_buffer_adaptive_factor * seq_length / context_parallel_size * micro_batch_size * hidden_size * expert_model_parallel_size / 1024 / 1024) + else: + hccl_tp_ep_buffer_size = 200 + _HCCL_GROUP_BUFFER['tp_exp'] = hccl_ep_buffer_size + elif moe_token_dispatcher_type is not None and moe_token_dispatcher_type == 'alltoall' and args.moe_tp_extend_ep: + if args.hccl_ep_group_buffer_adaptive_factor > 0: + hccl_tp_ep_buffer_size = 2 * math.ceil(args.hccl_ep_group_buffer_adaptive_factor * seq_length / context_parallel_size / tensor_model_parallel_size * micro_batch_size * hidden_size * moe_router_topk / 1024 / 1024) + else: + hccl_tp_ep_buffer_size = 200 + _HCCL_GROUP_BUFFER['tp_exp'] = hccl_ep_buffer_size + + #TP-CP group in 8.0 for seq count by experts & Router bal_loss. Small comm vol, set const (default 10M). + _HCCL_GROUP_BUFFER['tp_cp'] = 10 + + #Calculation of the maximum communication volume of the CP、CP2、CP_Ring、CP_Ulysess group. + #CP of RingAttention,SendRecv,MaxComm:S/CP * B * (H / headcount * GQA /TP ) * 2 + #CP of Ulysess,All2All,MaxComm:S/CP * B * (H / TP) + #CP_ulysess & CP_ring like CP in max comm. CP2 is half of CP. + if context_parallel_algo == 'ulysses_cp_algo' or context_parallel_algo is None: + hccl_cp_buffer_size = 2 * math.ceil(seq_length / context_parallel_size * micro_batch_size * hidden_size / tensor_model_parallel_size / 1024 / 1024) + _HCCL_GROUP_BUFFER['cp'] = hccl_cp_buffer_size + elif context_parallel_algo == 'megatron_cp_algo' : + hccl_cp2_buffer_size = 2 * math.ceil(seq_length / context_parallel_size * micro_batch_size * hidden_size / num_attention_heads * group_query_attention / tensor_model_parallel_size / 1024 / 1024) + hccl_cp_buffer_size = 2 * 2 * math.ceil(seq_length / context_parallel_size * micro_batch_size * hidden_size / num_attention_heads * group_query_attention / tensor_model_parallel_size / 1024 / 1024) + if args.cp_window_size > 1: + if args.use_cp_send_recv_overlap: + _HCCL_GROUP_BUFFER['cp2'] = hccl_cp2_buffer_size + _HCCL_GROUP_BUFFER['cp'] = hccl_cp2_buffer_size + _HCCL_GROUP_BUFFER['cp_ring_intra'] = hccl_cp2_buffer_size + _HCCL_GROUP_BUFFER['cp_ring_intra_overlap'] = hccl_cp2_buffer_size + else: + _HCCL_GROUP_BUFFER['cp'] = hccl_cp_buffer_size + _HCCL_GROUP_BUFFER['cp_ring_intra'] = hccl_cp_buffer_size + else: + if args.use_cp_send_recv_overlap: + _HCCL_GROUP_BUFFER['cp2'] = hccl_cp2_buffer_size + _HCCL_GROUP_BUFFER['cp'] = hccl_cp2_buffer_size + else: + _HCCL_GROUP_BUFFER['cp'] = hccl_cp_buffer_size + elif context_parallel_algo == 'hybrid_cp_algo': + ulysses_context_parallel_size = args.ulysses_degree_in_cp + ring_context_parallel_size = context_parallel_size / ulysses_context_parallel_size + hccl_cp_ulysess_buffer_size = 2 * math.ceil(seq_length / ulysses_context_parallel_size * micro_batch_size * hidden_size / tensor_model_parallel_size / 1024 / 1024) + hccl_cp_ring_buffer_size = 2 * math.ceil(seq_length / ring_context_parallel_size * micro_batch_size * hidden_size / num_attention_heads * group_query_attention / tensor_model_parallel_size / 1024 / 1024) + if args.cp_window_size > 1: + if args.use_cp_send_recv_overlap: + _HCCL_GROUP_BUFFER['cp_ulysses'] = hccl_cp_ulysess_buffer_size + _HCCL_GROUP_BUFFER['cp_ring'] = hccl_cp_ring_buffer_size + _HCCL_GROUP_BUFFER['cp2'] = hccl_cp_ring_buffer_size + _HCCL_GROUP_BUFFER['cp_ring_intra'] = hccl_cp_ring_buffer_size + _HCCL_GROUP_BUFFER['cp_ring_intra_overlap'] = hccl_cp_ring_buffer_size + #The CP group is used to calculate losses. The traffic volume is very small and is given a fixed value of 10M. + _HCCL_GROUP_BUFFER['cp'] = 10 + else: + _HCCL_GROUP_BUFFER['cp_ulysses'] = hccl_cp_ulysess_buffer_size + _HCCL_GROUP_BUFFER['cp_ring'] = hccl_cp_ring_buffer_size * 2 + _HCCL_GROUP_BUFFER['cp_ring_intra'] = hccl_cp_ring_buffer_size * 2 + #The CP group is used to calculate losses. The traffic volume is very small and is given a fixed value of 10M. + _HCCL_GROUP_BUFFER['cp'] = 10 + else: + if args.use_cp_send_recv_overlap: + _HCCL_GROUP_BUFFER['cp_ulysses'] = hccl_cp_ulysess_buffer_size + _HCCL_GROUP_BUFFER['cp_ring'] = hccl_cp_ring_buffer_size + _HCCL_GROUP_BUFFER['cp2'] = hccl_cp_ring_buffer_size + #The CP group is used to calculate losses. The traffic volume is very small and is given a fixed value of 10M. + _HCCL_GROUP_BUFFER['cp'] = 10 + else: + _HCCL_GROUP_BUFFER['cp_ulysses'] = hccl_cp_ulysess_buffer_size + _HCCL_GROUP_BUFFER['cp_ring'] = hccl_cp_ring_buffer_size * 2 + #The CP group is used to calculate losses. The traffic volume is very small and is given a fixed value of 10M. + _HCCL_GROUP_BUFFER['cp'] = 10 + + +def get_nccl_options_wrapper(get_nccl_options): + @wraps(get_nccl_options) + def wrapper(pg_name, nccl_comm_cfgs): + from megatron.training import get_args + args = get_args() + if args.hccl_group_buffer is not None or args.hccl_group_buffer_adaptive: + global _HCCL_GROUP_BUFFER + if _HCCL_GROUP_BUFFER.get(pg_name) is not None: + options = torch_npu._C._distributed_c10d.ProcessGroupHCCL.Options() + options.hccl_config = {"hccl_buffer_size":_HCCL_GROUP_BUFFER[pg_name]} + return options + return get_nccl_options(pg_name, nccl_comm_cfgs) + return wrapper + + +def initialize_model_parallel_wrapper(initialize_model_parallel): + @wraps(initialize_model_parallel) + def wrapper( + tensor_model_parallel_size: int = 1, + pipeline_model_parallel_size: int = 1, + virtual_pipeline_model_parallel_size: Optional[int] = None, + pipeline_model_parallel_split_rank: Optional[int] = None, + use_sharp: bool = False, + context_parallel_size: int = 1, + expert_model_parallel_size: int = 1, + nccl_communicator_config_path: Optional[str] = None, + distributed_timeout_minutes: int = 30, + order: str = "tp-cp-ep-dp-pp", + ): + from megatron.training.utils import print_rank_0 + from megatron.training import get_args + args = get_args() + + global _HCCL_GROUP_BUFFER + _HCCL_GROUP_BUFFER = {} + + if args.hccl_group_buffer_adaptive: + hccl_buffer_auto_adaptive() + print_rank_0(f"hccl_group_buffer_adaptive: {_HCCL_GROUP_BUFFER}") + + if args.hccl_group_buffer is not None: + parse_hccl_buffer_string(args.hccl_group_buffer) + + data_parallel_size = 1 # dp 1 + rank = torch.distributed.get_rank() + all_ep_groups = [] + if order == "tp-cp-ep-dp-pp": + # Megatron doesn't allow ep & cp combination, set ep to 1 to bypass that, ep related groups will be regenerated + initialize_model_parallel( + tensor_model_parallel_size, + pipeline_model_parallel_size, + virtual_pipeline_model_parallel_size, + pipeline_model_parallel_split_rank, + use_sharp, + context_parallel_size, + 1, + nccl_communicator_config_path, + distributed_timeout_minutes, + order + ) + + world_size: int = torch.distributed.get_world_size() + num_tensor_model_parallel_groups: int = world_size // tensor_model_parallel_size + num_pipeline_model_parallel_groups: int = world_size // pipeline_model_parallel_size + data_parallel_size: int = world_size // ( + tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size + ) + + if data_parallel_size * context_parallel_size % expert_model_parallel_size != 0: + raise RuntimeError( + f"data_parallel_size * context_parallel_size ({data_parallel_size * context_parallel_size}) is not " + f"divisible by expert_model_parallel_size " + ) + + nccl_comm_cfgs = {} + if nccl_communicator_config_path is not None: + import yaml + + with open(nccl_communicator_config_path, "r") as stream: + nccl_comm_cfgs = yaml.safe_load(stream) + + all_data_parallel_group_ranks = [] + all_data_parallel_group_ranks_with_cp = [] + for i in range(pipeline_model_parallel_size): + start_rank = i * num_pipeline_model_parallel_groups + end_rank = (i + 1) * num_pipeline_model_parallel_groups + for j in range(context_parallel_size * tensor_model_parallel_size): + ranks = range( + start_rank + j, end_rank, context_parallel_size * tensor_model_parallel_size + ) + all_data_parallel_group_ranks.append(list(ranks)) + for j in range(tensor_model_parallel_size): + ranks_with_cp = range( + start_rank + j, end_rank, tensor_model_parallel_size + ) + all_data_parallel_group_ranks_with_cp.append(list(ranks_with_cp)) + + timeout = timedelta(minutes=distributed_timeout_minutes) + + # # Regenerate ep related groups because ep is set to 1 in initialize_model_parallel func + rank_generator = megatron.core.parallel_state.RankGenerator( + tp=tensor_model_parallel_size, + ep=expert_model_parallel_size, + dp=data_parallel_size * context_parallel_size, + pp=pipeline_model_parallel_size, + cp=1, + order=order, + ) + for ranks in rank_generator.get_ranks('tp-ep-pp', independent_ep=True): + group = torch.distributed.new_group( + ranks, timeout=timeout, + pg_options=get_nccl_options('mp_exp', nccl_comm_cfgs) + ) + if rank in ranks: + megatron.core.parallel_state._MODEL_AND_EXPERT_PARALLEL_GROUP = group + + all_tensor_and_expert_group_ranks = [] + for ranks in rank_generator.get_ranks('tp-ep', independent_ep=True): + all_tensor_and_expert_group_ranks.append(list(ranks)) + group = torch.distributed.new_group( + ranks, timeout=timeout, pg_options=get_nccl_options('tp_exp', nccl_comm_cfgs) + ) + if rank in ranks: + megatron.core.parallel_state._TENSOR_AND_EXPERT_PARALLEL_GROUP = group + + for ranks in rank_generator.get_ranks('ep', independent_ep=True): + all_ep_groups.append(list(ranks)) + group = torch.distributed.new_group( + ranks, pg_options=get_nccl_options('exp', nccl_comm_cfgs) + ) + if rank in ranks: + megatron.core.parallel_state._EXPERT_MODEL_PARALLEL_GROUP = group + + all_dp_modulo_exp_group_ranks = [] + for ranks in rank_generator.get_ranks('dp', independent_ep=True): + all_dp_modulo_exp_group_ranks.append(list(ranks)) + group = torch.distributed.new_group( + ranks, timeout=timeout, pg_options=get_nccl_options('dp_modulo_exp', nccl_comm_cfgs) + ) + group_gloo = torch.distributed.new_group(ranks, backend="gloo") + if rank in ranks: + megatron.core.parallel_state._DATA_MODULO_EXPERT_PARALLEL_GROUP = group + megatron.core.parallel_state._DATA_MODULO_EXPERT_PARALLEL_GROUP_GLOO = group_gloo + + for ranks in rank_generator.get_ranks('dp-cp', independent_ep=True): + # Lazy initialization of the group + if get_context_parallel_world_size() > 1: + group = torch.distributed.new_group( + ranks, + timeout=timeout, + pg_options=get_nccl_options('dp_modulo_exp_cp', nccl_comm_cfgs), + ) + group_gloo = torch.distributed.new_group(ranks, backend="gloo") + else: + group = megatron.core.parallel_state._DATA_MODULO_EXPERT_PARALLEL_GROUP + group_gloo = megatron.core.parallel_state._DATA_MODULO_EXPERT_PARALLEL_GROUP_GLOO + if rank in ranks: + megatron.core.parallel_state._DATA_MODULO_EXPERT_PARALLEL_GROUP_WITH_CP = group + megatron.core.parallel_state._DATA_MODULO_EXPERT_PARALLEL_GROUP_WITH_CP_GLOO = group_gloo + + all_tp_groups = [] + for i in range(num_tensor_model_parallel_groups): + ranks = range(i * tensor_model_parallel_size, (i + 1) * tensor_model_parallel_size) + all_tp_groups.append(list(ranks)) + + print_rank_0(f"all tp gourps {all_tp_groups}") + print_rank_0(f"all ep groups {all_ep_groups}") + print_rank_0(f"all dp groups {all_data_parallel_group_ranks}") + print_rank_0(f"all_dp_modulo_exp_group_ranks {all_dp_modulo_exp_group_ranks}") + print_rank_0(f"all_tensor_and_expert_group_ranks {all_tensor_and_expert_group_ranks}") + print_rank_0(f"all_data_parallel_group_ranks_with_cp {all_data_parallel_group_ranks_with_cp}") + + else: + initialize_model_parallel( + tensor_model_parallel_size, + pipeline_model_parallel_size, + virtual_pipeline_model_parallel_size, + pipeline_model_parallel_split_rank, + use_sharp, + context_parallel_size, + expert_model_parallel_size, + nccl_communicator_config_path, + distributed_timeout_minutes, + order + ) + + initialize_context_parallel_group_for_send_recv_overlap( + tensor_model_parallel_size, + pipeline_model_parallel_size, + context_parallel_size, + nccl_comm_cfgs + ) + + initialize_context_parallel_group_for_hybrid_cp( + tensor_model_parallel_size, + pipeline_model_parallel_size, + context_parallel_size, + nccl_comm_cfgs + ) + + initialize_context_parallel_group_for_double_ring( + tensor_model_parallel_size, + pipeline_model_parallel_size, + context_parallel_size, + nccl_comm_cfgs + ) + + global _PIPELINE_MODEL_PARALLEL_GROUP_FOR_NEW_STREAM + if _PIPELINE_MODEL_PARALLEL_GROUP_FOR_NEW_STREAM is not None: + raise AttributeError('Pipeline parallel group for new stream is already initialized') + num_pipeline_model_parallel_groups: int = world_size // pipeline_model_parallel_size + for i in range(num_pipeline_model_parallel_groups): + ranks = range(i, world_size, num_pipeline_model_parallel_groups) + group = torch.distributed.new_group( + ranks, pg_options=megatron.core.parallel_state.get_nccl_options('pp_new_stream', nccl_comm_cfgs) + ) + if rank in ranks: + _PIPELINE_MODEL_PARALLEL_GROUP_FOR_NEW_STREAM = group + + from megatron.training import get_args + args = get_args() + nd1_dim1_sz = args.nd1_dim1_size if args.use_nd_matmul else args.tp_x + nd2_dim1_sz = args.nd2_dim1_size if args.use_nd_matmul else args.tp_y + tp_x_groups = initialize_ndmm_parallel_group( + nccl_comm_cfgs, + tensor_model_parallel_size=tensor_model_parallel_size, + nd1_dim1_size=nd1_dim1_sz, + nd2_dim1_size=nd2_dim1_sz, + ) + + if args.tp_2d: + from mindspeed.core.tensor_parallel_x_union_cp import TensorParallelXUnionCP + + tp_y_cp_group = TensorParallelYUnionCP( + parallel_cfg=SimpleParallelCfg( + dp=data_parallel_size, + pp=pipeline_model_parallel_size, + tp=tensor_model_parallel_size, + cp=context_parallel_size, + ep=expert_model_parallel_size, + tp_x=get_args().tp_x, + tp_y=get_args().tp_y, + ), + pg_name="tp-y-cp", + overlap_gp_name="tp-y-cp-overlap", + nccl_comm_cfgs=nccl_comm_cfgs + ) + print(f'tp_y_cp_group.global_ranks={tp_y_cp_group.global_ranks} for rank {rank}') + + tp_x_cp_group = TensorParallelXUnionCP( + parallel_cfg=SimpleParallelCfg( + dp=data_parallel_size, + pp=pipeline_model_parallel_size, + tp=tensor_model_parallel_size, + cp=context_parallel_size, + ep=expert_model_parallel_size, + tp_x=get_args().tp_x, + tp_y=get_args().tp_y, + ), + pg_name="tp-x-cp", + overlap_gp_name=None, + nccl_comm_cfgs=nccl_comm_cfgs + ) + print(f'tp_x_cp_group.global_ranks={tp_x_cp_group.global_ranks} for rank {rank}') + + if expert_model_parallel_size > 1: + all_tp_x_ep_groups = set() + print(f'all_ep_groups={all_ep_groups}') + for tp_x_ranks in tp_x_groups: + tp_x_ep_ranks_set = set() + for ep_ranks in all_ep_groups: + tp_x_ranks_set = set(tp_x_ranks) + ep_ranks_set = set(ep_ranks) + if not tp_x_ranks_set.intersection(ep_ranks_set): + continue + + cur_tp_x_ep_ranks_set = tp_x_ranks_set.union(ep_ranks_set) + tp_x_ep_ranks_set = tp_x_ep_ranks_set.union(cur_tp_x_ep_ranks_set) + + all_tp_x_ep_groups.add(tuple(sorted(list(tp_x_ep_ranks_set)))) + + print(f'{all_tp_x_ep_groups=}') + all_tp_x_ep_groups = [tp_x_ep_ranks for tp_x_ep_ranks in all_tp_x_ep_groups] + timeout = timedelta(minutes=distributed_timeout_minutes) + + global _TP_X_EP_GROUP + for tp_x_ep_ranks in all_tp_x_ep_groups: + group = torch.distributed.new_group( + tp_x_ep_ranks, timeout=timeout, + pg_options=get_nccl_options('tp_x_ep', nccl_comm_cfgs) + ) + if rank in tp_x_ep_ranks: + _TP_X_EP_GROUP = group + + print(f'{all_tp_x_ep_groups=}') + + return wrapper + + +def get_ring_group_for_intra_window(): + global _CONTEXT_PARALLEL_GROUP_FOR_RING_INTRA_WINDOW + return _CONTEXT_PARALLEL_GROUP_FOR_RING_INTRA_WINDOW + + +def get_ring_group_for_intra_window_send_recv_overlap(): + global _CONTEXT_PARALLEL_GROUP_FOR_RING_INTRA_WINDOW_SEND_RECV_OVERLAP + return _CONTEXT_PARALLEL_GROUP_FOR_RING_INTRA_WINDOW_SEND_RECV_OVERLAP + + +def get_ring_ranks_for_intra_window(): + global _CONTEXT_PARALLEL_RANKS_FOR_RING_INTRA_WINDOW + assert _CONTEXT_PARALLEL_RANKS_FOR_RING_INTRA_WINDOW is not None + return _CONTEXT_PARALLEL_RANKS_FOR_RING_INTRA_WINDOW + + +def get_ring_ranks_for_inter_window_kv(): + global _CONTEXT_PARALLEL_RANKS_FOR_RING_INTER_WINDOW_KV + assert _CONTEXT_PARALLEL_RANKS_FOR_RING_INTER_WINDOW_KV is not None + return _CONTEXT_PARALLEL_RANKS_FOR_RING_INTER_WINDOW_KV + + +def get_ring_ranks_for_inter_window_dkv(): + global _CONTEXT_PARALLEL_RANKS_FOR_RING_INTER_WINDOW_DKV + assert _CONTEXT_PARALLEL_RANKS_FOR_RING_INTER_WINDOW_DKV is not None + return _CONTEXT_PARALLEL_RANKS_FOR_RING_INTER_WINDOW_DKV + + +def initialize_context_parallel_group_for_send_recv_overlap( + tensor_model_parallel_size, + pipeline_model_parallel_size, + context_parallel_size, + nccl_comm_cfgs +): + from megatron.training import get_args + if not get_args().use_cp_send_recv_overlap: + return + # when tp_y > 1, use TensorParallelYUnionCP + if get_args().tp_2d and get_args().tp_y > 1: + return + rank = torch.distributed.get_rank() + world_size: int = torch.distributed.get_world_size() + num_pipeline_model_parallel_groups: int = world_size // pipeline_model_parallel_size + data_parallel_size: int = world_size // ( + tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size + ) + global _CONTEXT_PARALLEL_GROUP_FOR_SEND_RECV_OVERLAP + for i in range(pipeline_model_parallel_size): + for j in range(data_parallel_size): + start_rank = ( + i * num_pipeline_model_parallel_groups + + j * tensor_model_parallel_size * context_parallel_size + ) + end_rank = ( + i * num_pipeline_model_parallel_groups + + (j + 1) * tensor_model_parallel_size * context_parallel_size + ) + for k in range(tensor_model_parallel_size): + ranks = range(start_rank + k, end_rank, tensor_model_parallel_size) + group_send_recv_overlap = torch.distributed.new_group( + ranks, pg_options=megatron.core.parallel_state.get_nccl_options('cp2', nccl_comm_cfgs) + ) + if rank in ranks: + _CONTEXT_PARALLEL_GROUP_FOR_SEND_RECV_OVERLAP = group_send_recv_overlap + + +def initialize_context_parallel_group_for_hybrid_cp( + tensor_model_parallel_size, + pipeline_model_parallel_size, + context_parallel_size, + nccl_comm_cfgs +): + from megatron.training import get_args + if (not hasattr(get_args(), 'context_parallel_algo') or + ( + get_args().context_parallel_algo != 'hybrid_cp_algo' and get_args().context_parallel_algo != 'hybrid_adaptive_cp_algo')): + return + + rank = torch.distributed.get_rank() + world_size: int = torch.distributed.get_world_size() + num_pipeline_model_parallel_groups: int = world_size // pipeline_model_parallel_size + data_parallel_size: int = world_size // ( + tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size + ) + + ulysses_degree = get_args().ulysses_degree_in_cp + assert (context_parallel_size > ulysses_degree and context_parallel_size % ulysses_degree == 0) + ring_degree = context_parallel_size // ulysses_degree + + global _CONTEXT_PARALLEL_GROUP_FOR_HYBRID_ULYSSES + global _CONTEXT_PARALLEL_RANKS_FOR_HYBRID_ULYSSES + global _CONTEXT_PARALLEL_GROUP_FOR_HYBRID_RING + global _CONTEXT_PARALLEL_RANKS_FOR_HYBRID_RING + for i in range(pipeline_model_parallel_size): + for j in range(data_parallel_size): + start_rank = ( + i * num_pipeline_model_parallel_groups + + j * tensor_model_parallel_size * context_parallel_size + ) + end_rank = ( + i * num_pipeline_model_parallel_groups + + (j + 1) * tensor_model_parallel_size * context_parallel_size + ) + for k in range(tensor_model_parallel_size): + # cp ranks + ranks = list(range(start_rank + k, end_rank, tensor_model_parallel_size)) + # ulysses cp ranks. + # Ulysses need higher communication bandwidth than Ring. + # Try to put Ulysses ranks in the same node. + for m in range(ring_degree): + ulysses_ranks = [ranks[idx] for idx in range(m * ulysses_degree, (m + 1) * ulysses_degree)] + ulysses_group = torch.distributed.new_group( + ulysses_ranks, + pg_options=megatron.core.parallel_state.get_nccl_options('cp_ulysses', nccl_comm_cfgs) + ) + if rank in ulysses_ranks: + _CONTEXT_PARALLEL_GROUP_FOR_HYBRID_ULYSSES = ulysses_group + _CONTEXT_PARALLEL_RANKS_FOR_HYBRID_ULYSSES = ulysses_ranks + + # ring cp ranks + for m in range(ulysses_degree): + ring_ranks = [ranks[idx] for idx in range(m, len(ranks), ulysses_degree)] + ring_group = torch.distributed.new_group( + ring_ranks, pg_options=megatron.core.parallel_state.get_nccl_options('cp_ring', nccl_comm_cfgs) + ) + if rank in ring_ranks: + _CONTEXT_PARALLEL_GROUP_FOR_HYBRID_RING = ring_group + _CONTEXT_PARALLEL_RANKS_FOR_HYBRID_RING = ring_ranks + + +def initialize_context_parallel_group_for_double_ring( + tensor_model_parallel_size, + pipeline_model_parallel_size, + context_parallel_size, + nccl_comm_cfgs, +): + from megatron.training import get_args + import megatron.core.parallel_state as ps + args = get_args() + if args.tp_2d: + return + if context_parallel_size == 1 or args.context_parallel_algo not in ['megatron_cp_algo', 'hybrid_cp_algo']: + return + + use_hybrid_cp = args.context_parallel_algo == 'hybrid_cp_algo' and args.ulysses_degree_in_cp > 1 + + rank = torch.distributed.get_rank() + world_size: int = torch.distributed.get_world_size() + num_pipeline_model_parallel_groups: int = world_size // pipeline_model_parallel_size + data_parallel_size: int = world_size // ( + tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size + ) + + def _initialize_helper( + rank, + ring_global_ranks, + window_size + ): + from megatron.training import get_args + global _CONTEXT_PARALLEL_RANKS_FOR_RING_INTRA_WINDOW + global _CONTEXT_PARALLEL_RANKS_FOR_RING_INTER_WINDOW_KV + global _CONTEXT_PARALLEL_RANKS_FOR_RING_INTER_WINDOW_DKV + global _CONTEXT_PARALLEL_GROUP_FOR_RING_INTRA_WINDOW + global _CONTEXT_PARALLEL_GROUP_FOR_RING_INTRA_WINDOW_SEND_RECV_OVERLAP + + ring_size = len(ring_global_ranks) + inter_size = ring_size // window_size + for wid in range(inter_size): + intra_ranks = [ring_global_ranks[idx] for idx in range(wid * window_size, (wid + 1) * window_size)] + intra_group = torch.distributed.new_group(intra_ranks, pg_options=ps.get_nccl_options('cp_ring_intra', nccl_comm_cfgs)) + intra_group_for_send_recv_overlap = None + if args.use_cp_send_recv_overlap: + intra_group_for_send_recv_overlap = torch.distributed.new_group(intra_ranks, pg_options=ps.get_nccl_options('cp_ring_intra_overlap', nccl_comm_cfgs)) + + if rank in intra_ranks: + _CONTEXT_PARALLEL_RANKS_FOR_RING_INTRA_WINDOW = intra_ranks + _CONTEXT_PARALLEL_GROUP_FOR_RING_INTRA_WINDOW = intra_group + _CONTEXT_PARALLEL_GROUP_FOR_RING_INTRA_WINDOW_SEND_RECV_OVERLAP = intra_group_for_send_recv_overlap + + for inner_id in range(window_size): + inter_ranks = [ring_global_ranks[idx] for idx in range(inner_id, ring_size, window_size)] + if rank in inter_ranks: + _CONTEXT_PARALLEL_RANKS_FOR_RING_INTER_WINDOW_KV = inter_ranks + break + + for inner_id in range(window_size): + inter_dkv_ranks = [] + cur_rank = ring_global_ranks[inner_id] + cur_idx = inner_id + cur_window = 0 + while cur_rank not in inter_dkv_ranks: + inter_dkv_ranks.append(cur_rank) + cur_window = (cur_window + 1) % inter_size + window_start = cur_window * window_size + cur_idx = window_start + (cur_idx + 1) % window_size + cur_rank = ring_global_ranks[cur_idx] + + if rank in inter_dkv_ranks: + _CONTEXT_PARALLEL_RANKS_FOR_RING_INTER_WINDOW_DKV = inter_dkv_ranks + break + + + for i in range(pipeline_model_parallel_size): + for j in range(data_parallel_size): + start_rank = ( + i * num_pipeline_model_parallel_groups + + j * tensor_model_parallel_size * context_parallel_size + ) + end_rank = ( + i * num_pipeline_model_parallel_groups + + (j + 1) * tensor_model_parallel_size * context_parallel_size + ) + for k in range(tensor_model_parallel_size): + cp_ranks = range(start_rank + k, end_rank, tensor_model_parallel_size) + + if use_hybrid_cp: + ulysses_degree = get_args().ulysses_degree_in_cp + assert (context_parallel_size > ulysses_degree and context_parallel_size % ulysses_degree == 0) + # ring cp ranks + for m in range(ulysses_degree): + ring_ranks = [cp_ranks[idx] for idx in range(m, len(cp_ranks), ulysses_degree)] + + _initialize_helper(rank, ring_ranks, args.cp_window_size) + else: + _initialize_helper(rank, cp_ranks, args.cp_window_size) + + +def get_context_parallel_group_for_send_recv_overlap(check_initialized=True): + """Get the context parallel group for send-recv overlap the caller rank belongs to.""" + if check_initialized: + assert ( + _CONTEXT_PARALLEL_GROUP_FOR_SEND_RECV_OVERLAP is not None + ), 'context parallel group for send-recv overlap is not initialized' + return _CONTEXT_PARALLEL_GROUP_FOR_SEND_RECV_OVERLAP + + +def get_context_parallel_next_rank(): + """Return the global rank that follows the caller in the context parallel""" + import megatron.core.parallel_state as ps + assert ps._CONTEXT_PARALLEL_GLOBAL_RANKS is not None, "Context parallel group is not initialized" + rank_in_context = ps.get_context_parallel_rank() + world_size = ps.get_context_parallel_world_size() + return ps._CONTEXT_PARALLEL_GLOBAL_RANKS[(rank_in_context + 1) % world_size] + + +def get_context_parallel_prev_rank(): + """Return the global rank that preceeds the caller in the context parallel""" + import megatron.core.parallel_state as ps + assert ps._CONTEXT_PARALLEL_GLOBAL_RANKS is not None, "Context parallel group is not initialized" + rank_in_context = ps.get_context_parallel_rank() + world_size = ps.get_context_parallel_world_size() + return ps._CONTEXT_PARALLEL_GLOBAL_RANKS[(rank_in_context - 1) % world_size] + + +def get_pipeline_parallel_group_for_new_stream(): + if _PIPELINE_MODEL_PARALLEL_GROUP_FOR_NEW_STREAM is None: + raise AttributeError('Pipeline parallel group of backward is not initialized') + return _PIPELINE_MODEL_PARALLEL_GROUP_FOR_NEW_STREAM + + +def get_context_parallel_group_for_hybrid_ulysses(check_initialized=True): + """Get the context parallel group for hybrid ulysses the caller rank belongs to.""" + if check_initialized: + assert ( + _CONTEXT_PARALLEL_GROUP_FOR_HYBRID_ULYSSES is not None + ), 'context parallel group for hybrid ulysses is not initialized' + return _CONTEXT_PARALLEL_GROUP_FOR_HYBRID_ULYSSES + + +def get_context_parallel_for_hybrid_ulysses_world_size(): + return torch.distributed.get_world_size(group=get_context_parallel_group_for_hybrid_ulysses()) + + +def get_context_parallel_for_hybrid_ulysses_rank(): + return torch.distributed.get_rank(group=get_context_parallel_group_for_hybrid_ulysses()) + + +def get_context_parallel_group_for_hybrid_ring(check_initialized=True): + """Get the context parallel group for hybrid ring the caller rank belongs to.""" + if check_initialized: + assert ( + _CONTEXT_PARALLEL_GROUP_FOR_HYBRID_RING is not None + ), 'context parallel group for hybrid ring is not initialized' + return _CONTEXT_PARALLEL_GROUP_FOR_HYBRID_RING + + +def get_context_parallel_for_hybrid_ring_world_size(): + return torch.distributed.get_world_size(group=get_context_parallel_group_for_hybrid_ring()) + + +def get_context_parallel_for_hybrid_ring_rank(): + return torch.distributed.get_rank(group=get_context_parallel_group_for_hybrid_ring()) + + +def get_context_parallel_for_hybrid_ring_global_ranks(): + assert (_CONTEXT_PARALLEL_GROUP_FOR_HYBRID_RING is not None + ), 'context parallel group for hybrid ring is not initialized' + global _CONTEXT_PARALLEL_RANKS_FOR_HYBRID_RING + return _CONTEXT_PARALLEL_RANKS_FOR_HYBRID_RING + + +def get_tp_x_ring_global_ranks(): + global _TP_X_PARALLEL_RING_RANKS + assert (_TP_X_PARALLEL_RING_RANKS is not None), 'TP-X parallel group for ring is not initialized' + return _TP_X_PARALLEL_RING_RANKS + + +def get_tp_y_ring_global_ranks(): + global _TP_Y_PARALLEL_RING_RANKS + assert (_TP_Y_PARALLEL_RING_RANKS is not None), 'TP-Y parallel group for ring is not initialized' + return _TP_Y_PARALLEL_RING_RANKS + + +def destroy_model_parallel_wrapper(destroy_model_parallel): + @wraps(destroy_model_parallel) + def wrapper(): + destroy_model_parallel() + + global _CONTEXT_PARALLEL_GROUP_FOR_SEND_RECV_OVERLAP + global _PIPELINE_MODEL_PARALLEL_GROUP_FOR_NEW_STREAM + global _CONTEXT_PARALLEL_GROUP_FOR_HYBRID_RING + global _CONTEXT_PARALLEL_GROUP_FOR_HYBRID_ULYSSES + global _CONTEXT_PARALLEL_RANKS_FOR_HYBRID_RING + global _CONTEXT_PARALLEL_RANKS_FOR_HYBRID_ULYSSES + global _TP_X_PARALLEL_RING_RANKS + global _TP_Y_PARALLEL_RING_RANKS + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1 + global _TP_X_SD_RCV_OVERLAP_GROUP + global _TP_Y_SD_RCV_OVERLAP_GROUP + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2 + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1_RANK + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2_RANK + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1_WORLD_SIZE + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2_WORLD_SIZE + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM1 + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM2 + global _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND1_DIM1 + global _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND1_DIM2 + global _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND2_DIM1 + global _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND2_DIM2 + global _TENSOR_AND_CONTEXT_PARALLEL_GROUP + global _TENSOR_AND_CONTEXT_PARALLEL_GLOBAL_RANKS + _CONTEXT_PARALLEL_GROUP_FOR_SEND_RECV_OVERLAP = None + _PIPELINE_MODEL_PARALLEL_GROUP_FOR_NEW_STREAM = None + _CONTEXT_PARALLEL_GROUP_FOR_HYBRID_RING = None + _CONTEXT_PARALLEL_GROUP_FOR_HYBRID_ULYSSES = None + _CONTEXT_PARALLEL_RANKS_FOR_HYBRID_RING = None + _CONTEXT_PARALLEL_RANKS_FOR_HYBRID_ULYSSES = None + _TENSOR_AND_CONTEXT_PARALLEL_GROUP = None + _TENSOR_AND_CONTEXT_PARALLEL_GLOBAL_RANKS = None + _TP_X_PARALLEL_RING_RANKS = None + _TP_Y_PARALLEL_RING_RANKS = None + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1 = None + _TP_X_SD_RCV_OVERLAP_GROUP = None + _TP_Y_SD_RCV_OVERLAP_GROUP = None + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2 = None + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1_RANK = None + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2_RANK = None + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1_WORLD_SIZE = None + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2_WORLD_SIZE = None + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM1 = None + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM2 = None + _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND1_DIM1 = None + _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND1_DIM2 = None + _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND2_DIM1 = None + _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND2_DIM2 = None + + return wrapper + + +def get_tensor_model_parallel_group_for_nd1_dim1(check_initialized=True): + if check_initialized and _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1 is None: + raise AssertionError('tensor model parallel group for nd1 dim1 is not initialized') + return _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1 + + +def get_tp_x_sd_rcv_overlap_group(check_initialized=True): + if check_initialized and _TP_X_SD_RCV_OVERLAP_GROUP is None: + raise AssertionError('tp-x send recv overlap group is not initialized') + return _TP_X_SD_RCV_OVERLAP_GROUP + + +def get_tp_y_sd_rcv_overlap_group(check_initialized=True): + if check_initialized and _TP_Y_SD_RCV_OVERLAP_GROUP is None: + raise AssertionError('tp-y send recv overlap group is not initialized') + return _TP_Y_SD_RCV_OVERLAP_GROUP + + +def get_tensor_model_parallel_group_for_nd1_dim2(check_initialized=True): + if check_initialized and _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2 is None: + raise AssertionError('tensor model parallel group for nd1 dim2 is not initialized') + return _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2 + + +def get_tp_x_ep_group(check_initialized=True): + if check_initialized and _TP_X_EP_GROUP is None: + return get_tensor_model_parallel_group_for_nd1_dim1() + return _TP_X_EP_GROUP + + +def get_tp_x_ep_group_world_size(): + global _TP_X_EP_GROUP_WORLD_SIZE + if _TP_X_EP_GROUP_WORLD_SIZE is None: + _TP_X_EP_GROUP_WORLD_SIZE = torch.distributed.get_world_size(group=get_tp_x_ep_group()) + + return _TP_X_EP_GROUP_WORLD_SIZE + + +def get_tp_x_ep_group_rank(): + global _TP_X_EP_GROUP_RANK + if _TP_X_EP_GROUP_RANK is None: + _TP_X_EP_GROUP_RANK = torch.distributed.get_rank( + group=get_tp_x_ep_group()) + + return _TP_X_EP_GROUP_RANK + + +def get_tensor_model_parallel_group_for_nd2_dim1(check_initialized=True): + if check_initialized and _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM1 is None: + raise AssertionError('tensor model parallel group for nd2 dim1 is not initialized') + return _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM1 + + +def get_tensor_model_parallel_group_for_nd1_dim1_rank(): + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1_RANK + if _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1_RANK is None: + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1_RANK = torch.distributed.get_rank( + group=get_tensor_model_parallel_group_for_nd1_dim1()) + + return _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1_RANK + + +def get_tensor_model_parallel_group_for_nd1_dim2_rank(): + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2_RANK + if _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2_RANK is None: + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2_RANK = torch.distributed.get_rank( + group=get_tensor_model_parallel_group_for_nd1_dim2()) + + return _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2_RANK + + +def get_tensor_model_parallel_group_for_nd1_dim1_world_size(): + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1_WORLD_SIZE + if _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1_WORLD_SIZE is None: + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1_WORLD_SIZE = torch.distributed.get_world_size( + group=get_tensor_model_parallel_group_for_nd1_dim1()) + + return _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1_WORLD_SIZE + + +def get_tensor_model_parallel_group_for_nd1_dim2_world_size(): + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2_WORLD_SIZE + if _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2_WORLD_SIZE is None: + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2_WORLD_SIZE = torch.distributed.get_world_size( + group=get_tensor_model_parallel_group_for_nd1_dim2()) + + return _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2_WORLD_SIZE + + +def get_tensor_model_parallel_group_for_nd2_dim2(check_initialized=True): + if check_initialized and _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM2 is None: + raise AssertionError('tensor model parallel group for nd2 dim2 is not initialized') + return _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM2 + + +def get_tensor_model_parallel_world_size_for_nd1_dim1(): + global _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND1_DIM1 + if _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND1_DIM1 is None: + _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND1_DIM1 = torch.distributed.get_world_size( + group=get_tensor_model_parallel_group_for_nd1_dim1() + ) + return _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND1_DIM1 + + +def get_tensor_model_parallel_world_size_for_nd1_dim2(): + global _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND1_DIM2 + if _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND1_DIM2 is None: + _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND1_DIM2 = torch.distributed.get_world_size( + group=get_tensor_model_parallel_group_for_nd1_dim2() + ) + return _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND1_DIM2 + + +def get_tensor_model_parallel_world_size_for_nd2_dim1(): + global _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND2_DIM1 + if _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND2_DIM1 is None: + _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND2_DIM1 = torch.distributed.get_world_size( + group=get_tensor_model_parallel_group_for_nd2_dim1() + ) + return _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND2_DIM1 + + +def get_tensor_model_parallel_world_size_for_nd2_dim2(): + global _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND2_DIM2 + if _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND2_DIM2 is None: + _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND2_DIM2 = torch.distributed.get_world_size( + group=get_tensor_model_parallel_group_for_nd2_dim2() + ) + return _TENSOR_MODEL_PARALLEL_WORLD_SIZE_FOR_ND2_DIM2 + + +def initialize_ndmm_parallel_group( + nccl_comm_cfgs: dict, + tensor_model_parallel_size: int = 1, + nd1_dim1_size: int = 1, + nd2_dim1_size: int = 1, +): + import megatron.core.parallel_state as ps + from megatron.training import get_args + from megatron.training.global_vars import _ensure_var_is_not_initialized + + args = get_args() + if not (args.use_nd_matmul or args.tp_2d): + return + + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1 + _ensure_var_is_not_initialized( + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1, 'nd1_dim1' + ) + + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2 + _ensure_var_is_not_initialized( + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2, 'nd1_dim2' + ) + + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM1 + _ensure_var_is_not_initialized( + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM1, 'nd2_dim1' + ) + + global _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM2 + _ensure_var_is_not_initialized( + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM2, 'nd2_dim2' + ) + + global _TP_X_PARALLEL_RING_RANKS + _ensure_var_is_not_initialized(_TP_X_PARALLEL_RING_RANKS, 'tp_x_ring_ranks') + + global _TP_Y_PARALLEL_RING_RANKS + _ensure_var_is_not_initialized(_TP_Y_PARALLEL_RING_RANKS, 'tp_y_ring_ranks') + + global _TP_X_SD_RCV_OVERLAP_GROUP + _ensure_var_is_not_initialized(_TP_X_SD_RCV_OVERLAP_GROUP, 'tp_x_overlap_ranks') + + global _TP_Y_SD_RCV_OVERLAP_GROUP + _ensure_var_is_not_initialized(_TP_Y_SD_RCV_OVERLAP_GROUP, 'tp_y_overlap_ranks') + + if tensor_model_parallel_size % nd1_dim1_size != 0: + raise RuntimeError( + f"tensor_model_parallel_size can't divisible by nd1_dim1_size" + ) + + if tensor_model_parallel_size % nd2_dim1_size != 0: + raise RuntimeError( + f"tensor_model_parallel_size can't divisible by nd2_dim1_size" + ) + + rank = torch.distributed.get_rank() + world_size: int = torch.distributed.get_world_size() + num_tensor_model_parallel_group: int = world_size // tensor_model_parallel_size + + tp_nd1_dim1_groups = [] # TPX-RANKS + tp_nd1_dim2_groups = [] + tp_nd2_dim1_groups = [] + tp_nd2_dim2_groups = [] + for i in range(num_tensor_model_parallel_group): + for j in range(tensor_model_parallel_size // nd1_dim1_size): + ranks = range( + i * tensor_model_parallel_size + j * nd1_dim1_size, + i * tensor_model_parallel_size + (j + 1) * nd1_dim1_size + ) + tp_nd1_dim1_groups.append(list(ranks)) + group = torch.distributed.new_group( + ranks, pg_options=ps.get_nccl_options('nd1_dim1', nccl_comm_cfgs) + ) + if args.enable_overlap_ag_with_matmul or args.enable_backward_overlap_ag_with_matmul: + tp_x_ag_overlap_group = torch.distributed.new_group( + ranks, pg_options=ps.get_nccl_options('ag_x_sd_rcv_overlap', nccl_comm_cfgs) + ) + else: + tp_x_ag_overlap_group = None + if rank in ranks: + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM1 = group + _TP_X_SD_RCV_OVERLAP_GROUP = tp_x_ag_overlap_group + _TP_X_PARALLEL_RING_RANKS = ranks + + nd1_dim2_size = tensor_model_parallel_size // nd1_dim1_size + for j in range(tensor_model_parallel_size // nd1_dim2_size): + ranks = range( + i * tensor_model_parallel_size + j, + (i + 1) * tensor_model_parallel_size, + nd1_dim1_size + ) + tp_nd1_dim2_groups.append(list(ranks)) + group = torch.distributed.new_group( + ranks, pg_options=ps.get_nccl_options('nd1_dim2', nccl_comm_cfgs) + ) + if args.enable_overlap_ag_with_matmul or args.enable_backward_overlap_ag_with_matmul: + tp_y_ag_overlap_group = torch.distributed.new_group( + ranks, pg_options=ps.get_nccl_options('ag_y_sd_rcv_overlap', nccl_comm_cfgs) + ) + else: + tp_y_ag_overlap_group = None + if rank in ranks: + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND1_DIM2 = group + _TP_Y_SD_RCV_OVERLAP_GROUP = tp_y_ag_overlap_group + _TP_Y_PARALLEL_RING_RANKS = ranks + + for j in range(tensor_model_parallel_size // nd2_dim1_size): + ranks = range( + i * tensor_model_parallel_size + j * nd2_dim1_size, + i * tensor_model_parallel_size + (j + 1) * nd2_dim1_size + ) + tp_nd2_dim1_groups.append(list(ranks)) + group = torch.distributed.new_group( + ranks, pg_options=ps.get_nccl_options('nd2_dim1', nccl_comm_cfgs) + ) + if rank in ranks: + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM1 = group + + nd2_dim2_size = tensor_model_parallel_size // nd2_dim1_size + for j in range(tensor_model_parallel_size // nd2_dim2_size): + ranks = range( + i * tensor_model_parallel_size + j, + (i + 1) * tensor_model_parallel_size, + nd2_dim1_size + ) + tp_nd2_dim2_groups.append(list(ranks)) + group = torch.distributed.new_group( + ranks, pg_options=ps.get_nccl_options('nd2_dim2', nccl_comm_cfgs) + ) + if rank in ranks: + _TENSOR_MODEL_PARALLEL_GROUP_FOR_ND2_DIM2 = group + + print(f'tp-x groups: {tp_nd1_dim1_groups}') + return tp_nd1_dim1_groups + + +def get_data_parallel_group_gloo_replace(with_context_parallel=False): + """Get the data parallel group-gloo the caller rank belongs to.""" + import megatron.core.parallel_state as ps + + if with_context_parallel: + assert ( + ps._DATA_PARALLEL_GROUP_WITH_CP_GLOO is None + ), 'data parallel group-gloo with context parallel combined should be None when args.disable_gloo_group is True' + return ps._DATA_PARALLEL_GROUP_WITH_CP + else: + assert ps._DATA_PARALLEL_GROUP_GLOO is None, 'data parallel group-gloo should be None when args.disable_gloo_group is True' + return ps._DATA_PARALLEL_GROUP + + +def get_data_modulo_expert_parallel_group_gloo_replace(with_context_parallel=False): + import megatron.core.parallel_state as ps + + if with_context_parallel: + assert ( + ps._DATA_MODULO_EXPERT_PARALLEL_GROUP_WITH_CP_GLOO is None + ), 'data modulo expert parallel group-gloo with context parallel is not initialized' + return ps._DATA_MODULO_EXPERT_PARALLEL_GROUP_WITH_CP + else: + assert ( + ps._DATA_MODULO_EXPERT_PARALLEL_GROUP_GLOO is None + ), 'data modulo expert parallel group-gloo should be None when args.disable_gloo_group is True' + return ps._DATA_MODULO_EXPERT_PARALLEL_GROUP + + +def new_group_wrapper(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + from megatron.training import get_args + if get_args().disable_gloo_group: + if "backend" in kwargs and kwargs["backend"] == "gloo": + return None + return fn(*args, **kwargs) + return wrapper \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/autopipeline_perf.py b/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/autopipeline_perf.py new file mode 100644 index 000000000..b1ad8a596 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/autopipeline_perf.py @@ -0,0 +1,401 @@ +import time +from functools import partial +_TRAIN_START_TIME = time.time() +import json +import os.path +import gc +import copy +import torch +import torch.nn +import torch_npu +from megatron.training import print_rank_0 +from megatron.training.arguments import parse_args +from megatron.core.parallel_state import get_embedding_group +from megatron.training import get_args +from megatron.training import get_timers +from megatron.training import training +from megatron.training.training import print_datetime +from megatron.core.pipeline_parallel import p2p_communication +from megatron.core import mpu, tensor_parallel +from megatron.training.initialize import initialize_megatron +from megatron.training.initialize import set_jit_fusion_options + + +profile_context = {"fwd_time":[], "bwd_time":[]} + + +class AutoPipeline_Perf: + autopipeline_perf = None + + def __init__(self, args): + self.args = copy.deepcopy(args) + self.context = { + 'module': [] + } + self.modules_hooks = [] + self.profiling_step = 0 + self.stop_profiling_step = 3 + self.unit_mb = 1024 * 1024 + + @staticmethod + def get_memory_status(): + used_memory = torch.npu.memory_allocated() + reserved_memory = torch.npu.memory_reserved() + return used_memory, reserved_memory + + def _cal_tensor_size(self, tensor): + try: + return tensor.numel() * tensor.element_size() / self.unit_mb + except ZeroDivisionError: + return 0 + + def pre_hook_func(self, state, sync: bool, *args, **kargs): + used_memory, _ = self.get_memory_status() + torch.npu.reset_max_memory_allocated() + state['memory'] = used_memory + size = 0 + for arg in args: + if isinstance(arg, torch.Tensor): + size += self._cal_tensor_size(arg) + elif isinstance(arg, tuple) or isinstance(arg, list): + for t in arg: + if isinstance(t, torch.Tensor): + size += self._cal_tensor_size(t) + state['input'] = size + + def post_hook_func(self, state, sync: bool, *args, **kargs): + used_memory, _ = self.get_memory_status() + max_mem = torch.npu.max_memory_allocated() + state['peak_memory'] = max_mem - state['memory'] + state['memory'] = (used_memory - state['memory']) // self.unit_mb + + def forward_pre_hook(self, name, parent_ctx, ctx): + if self.profiling_step < self.stop_profiling_step: + ctx['name'] = name + if 'layers' in parent_ctx: + parent_ctx['layers'].append(ctx) + + def hook(module, *args, **kargs): + if self.profiling_step < self.stop_profiling_step: + if 'module' in self.context: + self.context['module'].append(ctx) + self.pre_hook_func(ctx, True, *args, **kargs) + + return hook + + def forward_post_hook(self, ctx): + def hook(module, *args, **kargs): + if self.profiling_step < self.stop_profiling_step: + self.post_hook_func(ctx, True, *args) + if 'module' in self.context: + self.context['module'].pop() + + return hook + + def register_recursive_hook(self, prefix_name, model, ctx): + for name, module in model.named_children(): + if 'layers' not in ctx: + ctx['layers'] = [] + current_ctx = {} + + next_name = prefix_name + "." + name if prefix_name != "" else name + if next_name == "module.module": + pre_hook = module.register_forward_pre_hook(self.forward_pre_hook(name, ctx, current_ctx)) + post_hook = module.register_forward_hook(self.forward_post_hook(current_ctx)) + self.modules_hooks.append(pre_hook) + self.modules_hooks.append(post_hook) + self.register_recursive_hook(next_name, module, current_ctx) + + def step_hook(self, model): + self.profiling_step += 1 + + def hook_step_func(self, step_func, models): + def custom_step_func(*args, **kargs): + result = step_func(*args, **kargs) + if self.profiling_step < self.stop_profiling_step: + used_memory, reserved_memory = self.get_memory_status() + self.context['used_mem'] = used_memory // self.unit_mb + if isinstance(models, list): + for model in models: + self.step_hook(model) + else: + self.step_hook(models) + return result + + return custom_step_func + + def remove_outliers(self, data, m=2): + data = sorted(data) + median = data[len(data) // 2] + deviation = [x for x in data if median - m * median < x < median + m * median] + return deviation + + def get_forward_context(self): + global profile_context + if "fwd_time" in profile_context: + fwd_time_list = self.remove_outliers(profile_context["fwd_time"]) + try: + self.context["fwd_time"] = sum(fwd_time_list) / len(fwd_time_list) + except ZeroDivisionError: + print("[Error] Divided by zero.") + else: + self.context["fwd_time"] = 0 + + def get_backward_context(self): + global profile_context + if "bwd_time" in profile_context: + bwd_time_list = self.remove_outliers(profile_context["bwd_time"]) + try: + self.context["bwd_time"] = sum(bwd_time_list) / len(bwd_time_list) + except ZeroDivisionError: + print("[Error] Divided by zero.") + else: + self.context["bwd_time"] = 0 + + def clear_global_context(self): + global profile_context + profile_context["fwd_time"] = [] + profile_context["bwd_time"] = [] + + def get_comm_time(self, config, sync: bool): + if torch.distributed.get_rank() == 0: + if sync: + torch.cuda.synchronize() + input_tensor = torch.ones(self.args.seq_length, self.args.micro_batch_size, self.args.hidden_size) + start_time = time.time() + p2p_communication.send_backward(input_tensor, config) + comm_time = (time.time() - start_time) * 1000 + self.context['comm_time'] = comm_time + else: + self.context['comm_time'] = 0.028 + + def get_peak_memory(self, sync: bool): + if sync: + torch.cuda.synchronize() + max_mem = torch.npu.max_memory_allocated() / (1 << 20) + self.context['peak_memory'] = max_mem + + def get_smi_peak_memory(self, sync: bool): + if sync: + torch.cuda.synchronize() + mem_infos = torch.npu.mem_get_info() + smi_peak_memory = (mem_infos[1] - mem_infos[0]) / (1 << 20) + self.context['smi_peak_memory'] = smi_peak_memory + + def get_smi_left_memory(self, sync: bool): + if sync: + torch.cuda.synchronize() + mem_infos = torch.npu.mem_get_info() + smi_left_memory = mem_infos[0] / (1 << 20) + self.context['smi_left_memory'] = smi_left_memory + + def get_data_parallel_size(self, data_parallel_size): + if data_parallel_size: + self.context['data_parallel_size'] = data_parallel_size + else: + self.context['data_parallel_size'] = 1 + + def broadcast_param_in_ranks(self, src_rank, param, init_memory): + if torch.distributed.get_rank() == src_rank: + try: + param = torch.npu.max_memory_allocated() / self.unit_mb - init_memory + except ZeroDivisionError: + print("[Error] Divided by zero.") + tmp_param = torch.cuda.IntTensor([param]) + torch.distributed.broadcast(tmp_param, src=src_rank) + param = tmp_param.item() + return param + + def update_args_for_profiling(self, micro_batch_size=None): + args = get_args() + args.train_iters = self.stop_profiling_step + if micro_batch_size: + args.micro_batch_size = micro_batch_size + args.global_batch_size = args.micro_batch_size * 16 + args.save = False + args.log_interval = 10 + + def restore_args_for_training(self): + args = get_args() + if args.num_layers_per_virtual_pipeline_stage is None: + args.num_layers = self.args.num_layers + args.encoder_num_layers = self.args.num_layers + args.train_iters = self.args.train_iters + args.micro_batch_size = self.args.micro_batch_size + args.global_batch_size = self.args.global_batch_size + args.save = self.args.save + args.log_interval = self.args.log_interval + + +def check_equal_model_configs(args, parsed_contents): + model_index = 0 + for model_instance in parsed_contents: + if args.hidden_size == model_instance.get("model_configs", {}).get("hidden_size") \ + and args.ffn_hidden_size == model_instance.get("model_configs", {}).get("ffn_hidden_size") \ + and args.seq_length == model_instance.get("model_configs", {}).get("seq_length") \ + and args.num_attention_heads == model_instance.get("model_configs", {}).get("num_attention_heads"): + return model_index + else: + model_index += 1 + return -1 + + +def check_equal_parallel_configs(args, parsed_content): + for parallel_instance in parsed_content.get("optimpipeline_policy"): + if args.num_layers == parallel_instance.get("num_layers") \ + and args.pipeline_model_parallel_size == parallel_instance.get("pipeline_model_parallel_size") \ + and args.tensor_model_parallel_size == parallel_instance.get("tensor_model_parallel_size") \ + and args.micro_batch_size == parallel_instance.get("micro_batch_size") \ + and args.global_batch_size == parallel_instance.get("global_batch_size"): + return parallel_instance.get("enable_scheduler"), parallel_instance.get("optimized_mbs_list"), parallel_instance.get( + "pp_schedule_list"), parallel_instance.get("optimal_layers") + return None, None, None, None + + +def check_skip_profiling(args, config_file): + if os.path.exists(config_file): + with open(config_file) as config_json: + config_contents = config_json.read() + parsed_contents = json.loads(config_contents) + index = check_equal_model_configs(args, parsed_contents) + if index != -1: + optimized_type, optimized_mbs_list, pp_schedule_list, optimal_layers = check_equal_parallel_configs(args, parsed_contents[index]) + if optimized_mbs_list or pp_schedule_list: + return True, (optimized_type, optimized_mbs_list, pp_schedule_list, optimal_layers) + return False, (None, None, None, None) + + +def check_out_of_memory(args, context, mbs_tries): + total_memory = torch_npu.npu.get_device_properties(0).total_memory / (1 << 20) + per_activation_memory_allocated = context["layers"][0]["memory"] // mbs_tries + predict_next_max_memory_allocated = context["smi_peak_memory"] + per_activation_memory_allocated * args.pipeline_model_parallel_size + 1000 + if predict_next_max_memory_allocated > total_memory: + return True + else: + return False + + +def broadcast_skip_in_ranks(src_rank, policy): + is_skip = [False] + if torch.distributed.get_rank() == src_rank: + is_skip = [policy] + tmp_is_skip = torch.cuda.BoolTensor(is_skip) + torch.distributed.broadcast(tmp_is_skip, src=src_rank) + return tmp_is_skip.item() + + +def calculate_num_of_activations(context): + total_memory = torch_npu.npu.get_device_properties(0).total_memory / (1 << 20) + activation_memory_allocated = context["layers"][0]["memory"] + num_of_activations_left = (total_memory - context["smi_peak_memory"]) // activation_memory_allocated + return int(num_of_activations_left) + + +def get_autopipeline_perf(args): + AutoPipeline_Perf.autopipeline_perf = AutoPipeline_Perf(args) + return AutoPipeline_Perf.autopipeline_perf + + +def autopipelineperf_profiling(mbs_tries, model_provider, model_type, forward_step_func, train_valid_test_dataset_provider, + process_non_loss_data_func): + initialize_megatron(extra_args_provider=None, + args_defaults={'tokenizer_type': 'GPT2BPETokenizer'}) + set_jit_fusion_options() + global _TRAIN_START_TIME + start_time_tensor = torch.cuda.DoubleTensor([_TRAIN_START_TIME]) + torch.distributed.all_reduce(start_time_tensor, + op=torch.distributed.ReduceOp.MIN) + _TRAIN_START_TIME = start_time_tensor.item() + print_rank_0('time to initialize megatron (seconds): {:.3f}'.format( + time.time() - _TRAIN_START_TIME)) + print_datetime('after megatron is initialized') + args = get_args() + pipelining = get_autopipeline_perf(args) + pipelining.update_args_for_profiling(mbs_tries) + models, optimizer, lr_scheduler = training.setup_model_and_optimizer(model_provider, model_type) + optimizer.step = pipelining.hook_step_func(optimizer.step, models) + config = training.get_model_config(models[0]) + + if args.virtual_pipeline_model_parallel_size is not None: + train_data_iterator = [] + valid_data_iterator = [] + for i in range(len(models)): + mpu.set_virtual_pipeline_model_parallel_rank(i) + iterators = training.build_train_valid_test_data_iterators( + train_valid_test_dataset_provider) + train_data_iterator.append(iterators[0]) + valid_data_iterator.append(iterators[1]) + else: + train_data_iterator, valid_data_iterator, _ = training.build_train_valid_test_data_iterators( + train_valid_test_dataset_provider) + if isinstance(models, list): + for model in models: + pipelining.register_recursive_hook("module", model, pipelining.context) + else: + pipelining.register_recursive_hook("module", models, pipelining.context) + checkpointing_context = {} + training.train(forward_step_func, models, optimizer, lr_scheduler, train_data_iterator, valid_data_iterator, + process_non_loss_data_func, config, checkpointing_context) + pipelining.get_smi_peak_memory(sync=True) + pipelining.get_smi_left_memory(sync=True) + pipelining.get_comm_time(config, sync=True) + pipelining.get_peak_memory(sync=True) + pipelining.get_data_parallel_size(args.data_parallel_size) + pipelining.get_forward_context() + pipelining.get_backward_context() + pipelining.clear_global_context() + + timers = get_timers() + if timers('interval-time'): + timers('interval-time').stop(barrier=True) + + for hook_handle in pipelining.modules_hooks: + hook_handle.remove() + pipelining.modules_hooks.clear() + pipelining.restore_args_for_training() + + if hasattr(optimizer, 'chained_optimizers'): + for op in optimizer.chained_optimizers: + for key, value in op.optimizer.state.items(): + key.detach() + key.grad = None + key.storage().resize_(0) + if "momentum_buffer" in value: + value["momentum_buffer"].detach() + value["momentum_buffer"].grad = None + value["momentum_buffer"].storage().resize_(0) + for ofg in op.param_groups: + if "params" in ofg: + for og in ofg["params"]: + og.detach() + og.grad = None + og.storage().resize_(0) + else: + for key, value in optimizer.optimizer.state.items(): + key.detach() + key.grad = None + key.storage().resize_(0) + if "momentum_buffer" in value: + value["momentum_buffer"].detach() + value["momentum_buffer"].grad = None + value["momentum_buffer"].storage().resize_(0) + for ofg in optimizer.param_groups: + if "params" in ofg: + for og in ofg["params"]: + og.detach() + og.grad = None + og.storage().resize_(0) + for md in models: + for param in md.parameters(): + param.detach() + param.grad = None + param.storage().resize_(0) + for param_tensor in md.state_dict(): + if md.state_dict()[param_tensor] is not None: + md.state_dict()[param_tensor].detach() + md.state_dict()[param_tensor].grad = None + md.state_dict()[param_tensor].storage().resize_(0) + + gc.collect() + torch_npu.npu.empty_cache() + return pipelining.context diff --git a/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/data_samplers.py b/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/data_samplers.py new file mode 100644 index 000000000..bebdbb871 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/data_samplers.py @@ -0,0 +1,71 @@ +import random +from functools import wraps +import numpy as np +import torch +from torch.utils.data import Dataset +from megatron.training import get_args +from megatron.core import mpu + + +def build_pretraining_data_loader_decorator(build_pretraining_data_loader): + @wraps(build_pretraining_data_loader) + def wrapper(*args, **kwargs): + if args[0] is None: + return None + argument = get_args() + if argument.dataloader_type == 'single' and argument.automated_pipeline_perf and argument.optimized_mbs_list: + batch_sampler = DynamicMicroBatchPretrainingSampler( + total_samples=len(args[0]), + consumed_samples=args[1], + micro_batch_size=argument.micro_batch_size, + data_parallel_rank=mpu.get_data_parallel_rank(), + data_parallel_size=mpu.get_data_parallel_world_size()) + return torch.utils.data.DataLoader(args[0], + batch_sampler=batch_sampler, + num_workers=argument.num_workers, + pin_memory=True) + else: + dataloader = build_pretraining_data_loader(*args, **kwargs) + return dataloader + return wrapper + + +class DynamicMicroBatchPretrainingSampler: + + def __init__(self, total_samples, consumed_samples, micro_batch_size, + data_parallel_rank, data_parallel_size, drop_last=True): + + args = get_args() + self.total_samples = total_samples + self.consumed_samples = consumed_samples + self.micro_batch_size = micro_batch_size + self.data_parallel_rank = data_parallel_rank + self.drop_last = drop_last + self.dynamic_micro_batch_size = args.optimized_mbs_list + self.micro_batch_times_data_parallel_size = [ + self.dynamic_micro_batch_size[i] * data_parallel_size \ + for i in range(len(self.dynamic_micro_batch_size)) + ] + + def __len__(self): + return self.total_samples + + def get_start_end_idx(self, n_mbs): + start_idx = self.data_parallel_rank * self.dynamic_micro_batch_size[n_mbs] + end_idx = start_idx + self.dynamic_micro_batch_size[n_mbs] + return start_idx, end_idx + + def __iter__(self): + batch = [] + n_mbs = 0 + for idx in range(self.consumed_samples, self.total_samples): + batch.append(idx) + if len(batch) == self.micro_batch_times_data_parallel_size[n_mbs]: + start_idx, end_idx = self.get_start_end_idx(n_mbs) + yield batch[start_idx:end_idx] + batch = [] + n_mbs = (n_mbs + 1) % len(self.micro_batch_times_data_parallel_size) + + if len(batch) > 0 and not self.drop_last: + start_idx, end_idx = self.get_start_end_idx() + yield batch[start_idx:end_idx] diff --git a/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/global_vars.py b/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/global_vars.py new file mode 100644 index 000000000..f91a5703c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/global_vars.py @@ -0,0 +1,16 @@ +from functools import wraps +from megatron.training import get_args + + +def get_num_microbatches_wrapper(get_num_microbatches): + @wraps(get_num_microbatches) + def wrapper(*args, **kwargs): + argument = get_args() + automated_pipeline_profile = argument.automated_pipeline_perf and not argument.optimized_mbs_list + if argument.automated_pipeline_perf and argument.optimized_mbs_list and argument.optimized_mbs_mode: + return len(argument.optimized_mbs_list) + elif automated_pipeline_profile: + return argument.global_batch_size // argument.data_parallel_size // argument.micro_batch_size + else: + return get_num_microbatches(*args, **kwargs) + return wrapper diff --git a/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/optimpipeline_solver.py b/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/optimpipeline_solver.py new file mode 100644 index 000000000..b643d85ea --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/optimpipeline_solver.py @@ -0,0 +1,304 @@ +import os +import json +import math +import time +from datetime import datetime +from itertools import product +import numpy as np +import torch +from megatron.training import get_args +from megatron.training.arguments import parse_args +from mindspeed.arguments import parse_args_wrapper +from .autopipeline_perf import check_equal_model_configs + + +class Parallel_Paras: + def __init__(self, + num_stages, + fwd_durations, + bwd_durations, + num_microbatch, + comm_matrix): + self.num_stages = num_stages + self.num_microbatch = num_microbatch + self.fwd_durations = fwd_durations + self.bwd_durations = bwd_durations + self.comm_matrix = comm_matrix + + +def dynamic_mbs_1f1b(paras): + num_stages = paras.num_stages + num_microbatch = paras.num_microbatch + computation_placement = list(range(num_stages)) + list(range(num_stages - 1, -1, -1)) + fwd_durations = paras.fwd_durations + bwd_durations = paras.bwd_durations + comm_matrix = paras.comm_matrix + + fwd_bwd_order = ([f'F_{i}' for i in range(num_stages)] + + [f'B_{i}' for i in range(num_stages - 1, -1, -1)]) + fwd_bwd_chunk_stage = dict(zip(fwd_bwd_order, computation_placement)) + + def get_stage_list(fwd_seq, bwd_seq, num_advanced): + stage_order = [] + n = len(fwd_seq) + for idx in range(n): + if idx < num_advanced: + stage_order.append(fwd_seq[idx]) + else: + stage_order.append(fwd_seq[idx]) + stage_order.append(bwd_seq[idx - num_advanced]) + if idx == n - 1: + for i in range(num_advanced): + stage_order.append(bwd_seq[i - num_advanced]) + + return stage_order + + def get_stage_schedule(all_jobs_array, comp_placement): + stage_list = [] + for s in range(num_stages): + stage_chunk_id = [index for index, element in enumerate(comp_placement) if element == s] + warmup = num_stages - s + stage_s_list = get_stage_list(all_jobs_array[stage_chunk_id[0]], + all_jobs_array[stage_chunk_id[1]], + warmup - 1) + stage_list.append(stage_s_list) + + return stage_list + + all_jobs = np.array([[s + f'-{i}' for i in range(num_microbatch)] for s in fwd_bwd_order]) + stage_list = get_stage_schedule(all_jobs, computation_placement) + + fwd_bwd_list = ([f"F_{j}-{i}" for i in range(num_microbatch) for j in range(num_stages)] + + [f"B_{j}-{i}" for i in range(num_microbatch) for j in range(num_stages)]) + values = [0 for _ in range(num_stages * num_microbatch * 2)] + start_time = dict(zip(fwd_bwd_list, values)) + fwd_bwd_durations = dict() + for j in range(num_stages): + for i in range(num_microbatch): + fwd_bwd_durations[f"F_{j}-{i}"] = fwd_durations[j, i] + fwd_bwd_durations[f"B_{j}-{i}"] = bwd_durations[j, i] + + for n in range(num_stages - 1): + for s in range(n + 1): + start_time[f"F_{s}-{n - s + 1}"] = max(start_time[f"F_{s}-{n - s + 1}"], + start_time[f"F_{s}-{n - s}"] + fwd_durations[s, n - s] + comm_matrix[s][s + 1]) + start_time[f"F_{s + 1}-{n - s}"] = max(start_time[f"F_{s + 1}-{n - s}"], + start_time[f"F_{s}-{n - s}"] + fwd_durations[s, n - s] + comm_matrix[s][s + 1]) + + def get_prev_job_time(comp_start_time, pp_list, pp_id, mb_idx, + comp_chunk_stage, comp_order, model_chunk_times, + comm_time_matrix): + current_job = pp_list[pp_id][mb_idx] + prev_job_stage = pp_list[pp_id][mb_idx - 1] + chunk_prev_job_stage, _ = prev_job_stage.split('-') + stage_id_prev_job = comp_chunk_stage[chunk_prev_job_stage] + chunk_position = comp_order.index(chunk_prev_job_stage) + if chunk_position < len(comp_order) - 1: + stage_id_next = comp_chunk_stage[comp_order[chunk_position + 1]] + comm_time = comm_time_matrix[stage_id_prev_job][stage_id_next] + else: + comm_time = 0 + end_time_prev_job_stage = (comp_start_time[prev_job_stage] + model_chunk_times[prev_job_stage] + + comm_time) + + cur_model_chunk, cur_mb = current_job.split('-') + chunk_position = comp_order.index(cur_model_chunk) + if chunk_position > 0: + prev_model_chunk = comp_order[chunk_position - 1] + prev_job_batch = prev_model_chunk + '-' + cur_mb + comm_time = comm_time_matrix[comp_chunk_stage[prev_model_chunk]][comp_chunk_stage[cur_model_chunk]] + end_time_prev_job_batch = comp_start_time[prev_job_batch] + model_chunk_times[prev_job_batch] + comm_time + completed_flag = comp_start_time[prev_job_stage] > 0 and comp_start_time[prev_job_batch] > 0 + else: + end_time_prev_job_batch = 0 + completed_flag = comp_start_time[prev_job_stage] > 0 + + return end_time_prev_job_stage, end_time_prev_job_batch, completed_flag + + begin_up = [num_stages - s for s in range(num_stages)] + remaining = [num_microbatch * 2 - begin_up[p] for p in range(num_stages)] + remaining_flag = True + while remaining_flag: + ids_old = [] + ids_new = [] + for s in range(num_stages): + ids_old.append(remaining[s]) + if remaining[s]: + idx = len(stage_list[0]) - remaining[s] + end_time_prev_stage, end_time_prev_batch, job_flag = get_prev_job_time(start_time, stage_list, s, idx, + fwd_bwd_chunk_stage, + fwd_bwd_order, + fwd_bwd_durations, + comm_matrix) + + if job_flag: + start_time[stage_list[s][idx]] = max(end_time_prev_stage, end_time_prev_batch) + remaining[s] = remaining[s] - 1 + + ids_new.append(remaining[s]) + if all(item == 0 for item in remaining): + remaining_flag = False + if ids_old == ids_new: + break + + e2e_time = start_time[f'B_0-{num_microbatch-1}'] + bwd_durations[0, -1] + stage_start_time = [[start_time[job_name] for job_name in stage_list[s]] for s in range(num_stages)] + return e2e_time, stage_start_time, stage_list, start_time + + +def find_integer_solutions(coefficients, global_batch_size): + n = len(coefficients) + mbs_max_value = (n + 1) // 2 + solutions = [] + all_comb = [] + for i in range(n): + if i == mbs_max_value - 1: + batch_using = sum(coefficients[0:mbs_max_value - 1] * 4) + all_comb.append(list(range((global_batch_size - batch_using) // mbs_max_value, + global_batch_size // mbs_max_value + 1))) + else: + all_comb.append(list(range(4))) + + for x in product(*all_comb): + if sum(coefficients[i] * x[i] for i in range(n)) == global_batch_size: + solutions.append(x) + + return solutions + + +def dynamic_mbs_search(num_stages, global_batch_size, fwd_mbs, bwd_mbs, comm_matrix): + comp_mbs_ratio = [value / (index + 1) for index, value in enumerate(fwd_mbs)] + fwd_mbs_selected = fwd_mbs[0:comp_mbs_ratio.index(min(comp_mbs_ratio)) + 1] + bwd_mbs_selected = bwd_mbs[0:comp_mbs_ratio.index(min(comp_mbs_ratio)) + 1] + mbs_max_value = len(fwd_mbs_selected) + bwd_mbs_stages = [fwd_mbs_selected] * num_stages + fwd_mbs_stages = [bwd_mbs_selected] * num_stages + + coefficients = list(range(1, mbs_max_value + 1)) + list(range(mbs_max_value - 1, 0, -1)) + solutions = find_integer_solutions(coefficients, global_batch_size) + + mbs_list = sum([solutions[0][i] * [coefficients[i]] for i in range(len(solutions[0]))], []) + num_microbatch = len(mbs_list) + fwd_durations = np.zeros([num_stages, num_microbatch]) + bwd_durations = np.zeros([num_stages, num_microbatch]) + for j in range(num_microbatch): + for i in range(num_stages): + fwd_durations[i, j] = fwd_mbs_stages[i][mbs_list[j] - 1] + bwd_durations[i, j] = bwd_mbs_stages[i][mbs_list[j] - 1] + + paras = Parallel_Paras(num_stages, fwd_durations, bwd_durations, num_microbatch, comm_matrix) + e2e_time = [] + for sol in solutions: + mbs_list = sum([sol[i] * [coefficients[i]] for i in range(len(sol))], []) + num_microbatch = len(mbs_list) + fwd_durations = np.zeros([num_stages, num_microbatch]) + bwd_durations = np.zeros([num_stages, num_microbatch]) + for j in range(num_microbatch): + for i in range(num_stages): + fwd_durations[i, j] = fwd_mbs_stages[i][mbs_list[j] - 1] + bwd_durations[i, j] = bwd_mbs_stages[i][mbs_list[j] - 1] + + paras.fwd_durations = fwd_durations + paras.bwd_durations = bwd_durations + paras.num_microbatch = num_microbatch + + e2e_time0, stage_start_time0, stage_list0, start_time0 = dynamic_mbs_1f1b(paras) + e2e_time.append(e2e_time0) + + e2e_time_array = np.array(e2e_time) + optimal_solution = solutions[e2e_time_array.argmin()] + return optimal_solution, e2e_time_array.min() + + +def broadcast_oom_in_ranks(src_rank, policy): + is_oom = [True] + if torch.distributed.get_rank() == src_rank: + is_oom = [policy] + tmp_is_oom = torch.cuda.BoolTensor(is_oom) + torch.distributed.broadcast(tmp_is_oom, src=src_rank) + return tmp_is_oom.item() + + +def broadcast_mbs_in_ranks(src_rank, optimal_solution): + args = get_args() + solution_length = [0] + if torch.distributed.get_rank() == src_rank: + solution_length = [len(optimal_solution)] + tmp_solution_length = torch.cuda.IntTensor(solution_length) + torch.distributed.broadcast(tmp_solution_length, src=src_rank) + solution_length = tmp_solution_length.item() + + tmp_optimal_solution = [0] * solution_length + if torch.distributed.get_rank() == src_rank: + tmp_optimal_solution = optimal_solution + tmp_optimal_solution = torch.cuda.IntTensor(tmp_optimal_solution) + torch.distributed.broadcast(tmp_optimal_solution, src=src_rank) + tmp_optimal_solution = tmp_optimal_solution.tolist() + mbs_max_value = math.ceil(len(tmp_optimal_solution) / 2) + coefficients = list(range(1, mbs_max_value + 1)) + list(range(mbs_max_value - 1, 0, -1)) + optimal_mbs_list = sum([tmp_optimal_solution[i] * [coefficients[i]] for i in range(len(tmp_optimal_solution))], []) + args.optimized_mbs_list = optimal_mbs_list + return optimal_mbs_list + + +def get_profiling_data(policy, args): + instance = {"model_configs": { + "hidden_size": args.hidden_size, + "ffn_hidden_size": args.ffn_hidden_size, + "seq_length": args.seq_length, + "num_attention_heads": args.num_attention_heads + }, "optimpipeline_policy": [{ + "num_layers": args.num_layers, + "pipeline_model_parallel_size": args.pipeline_model_parallel_size, + "tensor_model_parallel_size": args.tensor_model_parallel_size, + "micro_batch_size": args.micro_batch_size, + "global_batch_size": args.global_batch_size, + "enable_scheduler": policy[0], + "optimized_mbs_list": policy[1], + "pp_schedule_list": policy[2], + "optimal_layers": policy[3] + }]} + return instance + + +def save_profiling_data(policy, config_file): + if torch.distributed.get_rank() % int(os.getenv('GPUS_PER_NODE', '8')) == 0: + new_parse_args = parse_args_wrapper(parse_args) + args = new_parse_args(None, False) + instance = get_profiling_data(policy, args) + if os.path.exists(config_file): + with open(config_file, "r") as config_json: + config_contents = config_json.read() + parsed_contents = json.loads(config_contents) + index = check_equal_model_configs(args, parsed_contents) + if index != -1: + if "optimpipeline_policy" in parsed_contents[index]: + parsed_contents[index]["optimpipeline_policy"].append(instance["optimpipeline_policy"][0]) + else: + parsed_contents.append(instance) + with open(config_file, "w") as f: + json.dump(parsed_contents, f, ensure_ascii=False) + os.chmod(config_file, 0o644) + else: + with open(config_file, "w") as f: + json.dump([instance], f, ensure_ascii=False) + os.chmod(config_file, 0o644) + + +def solve_optimpipeline(args, data_parallel_size, global_context): + mbs_max_value = len(global_context) + coefficients = list(range(1, mbs_max_value + 1)) + list(range(mbs_max_value - 1, 0, -1)) + optimal_solution = [0] * len(coefficients) + optimal_time = 0 + if torch.distributed.get_rank() == 0: + num_stages = args.pipeline_model_parallel_size + global_batch_size = args.global_batch_size // data_parallel_size + fwd_mbs = [item[0] for item in global_context] + bwd_mbs = [item[1] for item in global_context] + comm_matrix = [[0.05] * num_stages for _ in range(num_stages)] + for i in range(num_stages): + comm_matrix[i][i] = 0 + + optimal_solution, optimal_time = dynamic_mbs_search(num_stages, global_batch_size, fwd_mbs, bwd_mbs, comm_matrix) + torch.distributed.barrier() + return optimal_solution, optimal_time diff --git a/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/schedulepipeline_solver.py b/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/schedulepipeline_solver.py new file mode 100644 index 000000000..13bacd71d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/schedulepipeline_solver.py @@ -0,0 +1,445 @@ +import time +import json +import numpy as np +import torch +import torch_npu +from megatron.training import get_args +from megatron.training import print_rank_0 + + +class PipelineParallelParas: + def __init__(self, + num_stages, + fwd_durations, + bwd_durations, + num_microbatches, + comm_matrix, + num_layers): + self.num_stages = num_stages + self.num_microbatches = num_microbatches + self.fwd_durations = fwd_durations + self.bwd_durations = bwd_durations + self.comm_matrix = comm_matrix + self.num_layers = num_layers + + + +def time_model_1f1b(paras): + # obtain the E2E time for 1F1B scheme + num_stages = paras.num_stages + num_micro_batches = paras.num_microbatches + fwd_durations = paras.fwd_durations + bwd_durations = paras.bwd_durations + p2p_matrix = paras.comm_matrix + fwd_start = np.zeros([num_stages, num_micro_batches]) + bwd_start = np.zeros([num_stages, num_micro_batches]) + + warmup = [num_stages - s for s in range(num_stages)] + remaining = [num_micro_batches - warmup[p] for p in range(num_stages)] + # warm_up stage-0 + for m in range(num_stages): + fwd_start[0, m] = m * fwd_durations[0] + # warm_up stage + for s in range(1, num_stages, 1): + fwd_start[s, 0] = fwd_start[s - 1, 0] + fwd_durations[s - 1] + p2p_matrix[s - 1][s] + for m in range(1, num_stages - s, 1): + fwd_start[s, m] = max(fwd_start[s - 1, m] + fwd_durations[s - 1] + p2p_matrix[s - 1][s], + fwd_start[s, m - 1] + fwd_durations[s]) + + # 0 micro batch at last stage bwd start + bwd_start[num_stages - 1, 0] = fwd_start[num_stages - 1, 0] + fwd_durations[num_stages - 1] + for s in range(num_stages - 2, -1, -1): + bwd_start[s, 0] = bwd_start[s + 1, 0] + bwd_durations[s + 1] + p2p_matrix[s + 1][s] + + # steady state + for m in range(1, num_micro_batches, 1): + # forward time + for s in range(num_stages): + if m > remaining[s]: + continue + if s == 0: + fwd_start[s, m + num_stages - 1] = bwd_start[s, m - 1] + bwd_durations[s] + else: + fwd_start[s, m + num_stages - s - 1] = max( + fwd_start[s - 1, m + num_stages - s - 1] + fwd_durations[s - 1] + p2p_matrix[s - 1][s], + bwd_start[s, m - 1] + bwd_durations[s]) + + # backward time + for s in range(num_stages - 1, -1, -1): + # cool down stage + if m + num_stages - s > num_micro_batches: + bwd_start[s, m] = bwd_start[s + 1, m] + bwd_durations[s + 1] + p2p_matrix[s + 1][s] + continue + + if s == num_stages - 1: + bwd_start[s, m] = fwd_start[s, m] + fwd_durations[s] + else: + bwd_start[s, m] = max(bwd_start[s + 1, m] + bwd_durations[s + 1] + p2p_matrix[s + 1][s], + fwd_start[s, m + num_stages - s - 1] + fwd_durations[s]) + + e2e_time = bwd_start[0, -1] + bwd_durations[0] + return e2e_time, fwd_start, bwd_start + + +def time_model_nfmb(paras, stage_schedule): + # 给定一个调度序列,计算端到端时间 + num_stages = paras.num_stages + num_mb = paras.num_microbatches + comm_matrix = paras.comm_matrix + chunk_placement = list(range(num_stages)) + list(range(num_stages - 1, -1, -1)) + # Fwd Bwd执行顺序 + fwd_bwd_comp_order = ([f'F_{i}' for i in range(num_stages)] + + [f'B_{i}' for i in range(num_stages - 1, -1, -1)]) + chunk_stage_map = dict(zip(fwd_bwd_comp_order, chunk_placement)) + + if isinstance(stage_schedule, dict): + stage_list = [] + for s in range(num_stages): + fb_list = stage_schedule[f"stage{s}"] + stage_list.append([element[0]+f"_{s}-"+element[1:] for element in fb_list]) + else: + stage_list = stage_schedule + + # 初始化 + fwd_bwd_list = ([f"F_{j}-{i}" for i in range(num_mb) for j in range(num_stages)] + + [f"B_{j}-{i}" for i in range(num_mb) for j in range(num_stages)]) + values = [0 for _ in range(num_stages * num_mb * 2)] + start_time = dict(zip(fwd_bwd_list, values)) + fwd_bwd_durations = dict() + fwd_durations = np.array(paras.fwd_durations * num_mb).reshape(num_mb, num_stages).transpose() + bwd_durations = np.array(paras.bwd_durations * num_mb).reshape(num_mb, num_stages).transpose() + for j in range(num_stages): + for i in range(num_mb): + fwd_bwd_durations[f"F_{j}-{i}"] = fwd_durations[j, i] + fwd_bwd_durations[f"B_{j}-{i}"] = bwd_durations[j, i] + + start_time[f"F_{0}-{0}"] = 0.1 + for s in range(num_stages - 1): + start_time[f"F_{s + 1}-{0}"] = start_time[f"F_{s}-{0}"] + fwd_durations[s, 0] + comm_matrix[s][s + 1] + + # 获取当前任务的上一个任务以及依赖任务的结束时间 + def get_prev_task_time(task_start_time, task_list, pp_stage_id, mb_idx, + chunk_stage_map, comp_order, model_chunk_times, + comm_time_matrix): + current_task = task_list[pp_stage_id][mb_idx] + prev_task_same_stage = task_list[pp_stage_id][mb_idx - 1] + chunk_id_prev_task_same_stage, _ = prev_task_same_stage.split('-') + stage_id_prev_task = chunk_stage_map[chunk_id_prev_task_same_stage] + chunk_position = comp_order.index(chunk_id_prev_task_same_stage) + # 前一个任务计算完成后的通信时间 + if chunk_position < len(comp_order) - 1: + stage_id_next = chunk_stage_map[comp_order[chunk_position + 1]] + comm_time = comm_time_matrix[stage_id_prev_task][stage_id_next] + else: + comm_time = 0.01 + # 同一个stage上,前一个任务完成时间 + end_time_prev_task_stage = (task_start_time[prev_task_same_stage] + + model_chunk_times[prev_task_same_stage] + + comm_time) + + # 相同micro batch id,上一个model chunk上的计算时间 + cur_model_chunk, cur_mb = current_task.split('-') + chunk_position = comp_order.index(cur_model_chunk) + if chunk_position > 0: + prev_model_chunk = comp_order[chunk_position - 1] + prev_task_batch = prev_model_chunk + '-' + cur_mb + comm_time = comm_time_matrix[chunk_stage_map[prev_model_chunk]][chunk_stage_map[cur_model_chunk]] + end_time_dependent_task_batch = (task_start_time[prev_task_batch] + + model_chunk_times[prev_task_batch] + + comm_time) + completed_flag = task_start_time[prev_task_same_stage] > 0 and task_start_time[prev_task_batch] > 0 + else: + end_time_dependent_task_batch = 0.1 + completed_flag = task_start_time[prev_task_same_stage] > 0 + + return end_time_prev_task_stage, end_time_dependent_task_batch, completed_flag + + # 更新计算时间 + begin_up = [1] * num_stages + remaining = [num_mb * 2 - begin_up[p] for p in range(num_stages)] + remaining_flag = True + count = 0 + while remaining_flag: + ids_old = [] + ids_new = [] + for s in range(num_stages): + ids_old.append(remaining[s]) + if remaining[s]: + microbatch_idx = len(stage_list[0]) - remaining[s] + (end_time_prev_task_same_stage, + end_time_dependent_task_same_microbatch, + job_flag) = get_prev_task_time(start_time, stage_list, s, microbatch_idx, chunk_stage_map, + fwd_bwd_comp_order, fwd_bwd_durations, comm_matrix) + + if job_flag: + start_time[stage_list[s][microbatch_idx]] = max(end_time_prev_task_same_stage, + end_time_dependent_task_same_microbatch) + remaining[s] = remaining[s] - 1 + + ids_new.append(remaining[s]) + + if all(item == 0 for item in remaining): + remaining_flag = False + + if ids_old == ids_new: + count += 1 + if count == 3: + start_time[f'B_0-{num_mb - 1}'] = 1e7 + break + + e2e_time = start_time[f'B_0-{num_mb - 1}'] + bwd_durations[0, -1] + stage_start_time = [[start_time[job_name] for job_name in stage_list[s]] for s in range(num_stages)] + + return e2e_time, stage_start_time + + +def get_schedule_1f1b(paras): + # generate 1f1b schedule list + num_stages = paras.num_stages + num_microbatches = paras.num_microbatches + computation_placement = list(range(num_stages)) + list(range(num_stages - 1, -1, -1)) + + # Fwd Bwd执行顺序 + fwd_bwd_order = ([f'F_{i}' for i in range(num_stages)] + + [f'B_{i}' for i in range(num_stages - 1, -1, -1)]) + + # 根据1F1B策略生成每个stage上的调度顺序 + def get_stage_list(fwd_seq, bwd_seq, num_advanced): + stage_order = [] + n = len(fwd_seq) + for idx in range(n): + if idx < num_advanced: + stage_order.append(fwd_seq[idx]) + else: + stage_order.append(fwd_seq[idx]) + stage_order.append(bwd_seq[idx - num_advanced]) + if idx == n - 1: + for i in range(num_advanced): + stage_order.append(bwd_seq[i - num_advanced]) + + return stage_order + + def get_stage_schedule(all_jobs_array, comp_placement, num_stages): + stage_list = [] + for s in range(num_stages): + stage_chunk_id = [index for index, element in enumerate(comp_placement) if element == s] + warmup = num_stages - s + stage_s_list = get_stage_list(all_jobs_array[stage_chunk_id[0]], + all_jobs_array[stage_chunk_id[1]], + warmup - 1) + stage_list.append(stage_s_list) + return stage_list + + all_jobs = np.array([[s + f'-{i}' for i in range(num_microbatches)] for s in fwd_bwd_order]) + stage_list = get_stage_schedule(all_jobs, computation_placement, num_stages) + stage_schedule_dict = dict() + for s in range(paras.num_stages): + stage_s_list = [] + for element in stage_list[s]: + item1, item2 = element.split("-") + stage_s_list.append(item1[0] + item2) + stage_schedule_dict[f"stage{s}"] = stage_s_list + return stage_schedule_dict + + +def get_schedule_eager1f1b(paras, num_forwards, layers_placement): + # generate 1f1b schedule list + num_stages = paras.num_stages + num_microbatches = paras.num_microbatches + # 将原始模型切分为多个model chunk,chunk在PP stage上的放置顺序 + chunk_placement = list(range(num_stages)) + list(range(num_stages - 1, -1, -1)) + + # Fwd Bwd执行顺序 + fwd_bwd_comp_order = ([f'F_{i}' for i in range(num_stages)] + + [f'B_{i}' for i in range(num_stages - 1, -1, -1)]) + + # 根据1F1B策略生成每个stage上的调度顺序 + def get_stage_list(fwd_seq, bwd_seq, num_advanced): + stage_order = [] + n = len(fwd_seq) + for idx in range(n): + if idx < num_advanced: + stage_order.append(fwd_seq[idx]) + else: + stage_order.append(fwd_seq[idx]) + stage_order.append(bwd_seq[idx - num_advanced]) + if idx == n - 1: + for i in range(num_advanced): + stage_order.append(bwd_seq[i - num_advanced]) + + return stage_order + + def get_stage_schedule(all_jobs_array, comp_placement, num_advanced, paras, layers_placement): + stage_list = [] + activations_num = int(paras.num_layers // paras.num_stages) * (num_advanced + paras.num_stages) + nums_under_memory = [int(activations_num // layers_placement[i]) for i in range(paras.num_stages)] + warmups = [min(nums_under_memory[s] - s - 1, + 2 * paras.num_stages - 2 * s - 2) for s in range(paras.num_stages)] + for i in range(paras.num_stages - 1): + warmups[i + 1] = min(warmups[i] - 1, warmups[i + 1]) + warmups[i + 1] = max(warmups[i + 1], 0) + + for s in range(paras.num_stages): + stage_chunk_id = [index for index, element in enumerate(comp_placement) if element == s] + num = sum(np.array(paras.bwd_durations[s + 1:]) + + np.array(paras.fwd_durations[s + 1:])) // np.array(paras.fwd_durations[s]) + stage_s_list = get_stage_list(all_jobs_array[stage_chunk_id[0]], + all_jobs_array[stage_chunk_id[1]], + warmups[s]) + stage_list.append(stage_s_list) + return stage_list + + all_jobs = np.array([[s + f'-{i}' for i in range(num_microbatches)] for s in fwd_bwd_comp_order]) + stage_list = get_stage_schedule(all_jobs, chunk_placement, num_forwards, paras, layers_placement) + + # 转换为dictionary + stage_schedule_dict = dict() + for s in range(paras.num_stages): + stage_s_list = [] + for element in stage_list[s]: + item1, item2 = element.split("-") + stage_s_list.append(item1[0] + item2) + stage_schedule_dict[f"stage{s}"] = stage_s_list + + return stage_schedule_dict + + +def schedule_layers(paras, num_mb_for_remaining_memory): + # 调整层分布,对比层分布改变后,1F1B建模时间 + stage_layers = int(paras.num_layers // paras.num_stages) + if paras.num_stages > 2: + fwd_time_per_layer = sum(paras.fwd_durations[1:-1]) / (paras.num_stages - 2) / stage_layers + bwd_time_per_layer = sum(paras.bwd_durations[1:-1]) / (paras.num_stages - 2) / stage_layers + else: + fwd_time_per_layer = paras.fwd_durations[0] / stage_layers + bwd_time_per_layer = paras.bwd_durations[0] / stage_layers + + # 1f1b as baseline + e2e_time = np.ones([2, paras.num_stages]) * 1e9 + paras_all = [] + layers_placement = [] + schedule_1f1b = get_schedule_1f1b(paras) + e2e_time[0, 0], stage_start_time1 = time_model_nfmb(paras, schedule_1f1b) + paras_all.append(paras) + layers_p1 = [stage_layers] * paras.num_stages + layers_placement.append(layers_p1) + # 调度序列 + schedule_eager_1f1b = get_schedule_eager1f1b(paras, num_mb_for_remaining_memory, layers_p1) + e2e_time[1, 0], stage_start_time2 = time_model_nfmb(paras, schedule_eager_1f1b) + + if stage_layers >= 2: + for i in range(paras.num_stages - 1): + fwd_new = np.array(paras.fwd_durations) + fwd_new[i] += fwd_time_per_layer + fwd_new[-1] -= fwd_time_per_layer + bwd_new = np.array(paras.bwd_durations) + bwd_new[i] += bwd_time_per_layer + bwd_new[-1] -= bwd_time_per_layer + paras1 = PipelineParallelParas(paras.num_stages, + fwd_new.tolist(), + bwd_new.tolist(), + paras.num_microbatches, + paras.comm_matrix, + paras.num_layers) + e2e_time[0, i + 1], stage_start_time1 = time_model_nfmb(paras1, schedule_1f1b) + paras_all.append(paras1) + layers_p1 = [stage_layers] * paras.num_stages + layers_p1[i] += 1 + layers_p1[-1] -= 1 + layers_placement.append(layers_p1) + schedule_eager_1f1b = get_schedule_eager1f1b(paras1, num_mb_for_remaining_memory, layers_p1) + e2e_time[1, i + 1], stage_start_time2 = time_model_nfmb(paras1, schedule_eager_1f1b) + + optimal_paras = paras_all[e2e_time[1, :].argmin()] + optimal_layer = layers_placement[e2e_time[1, :].argmin()] + schedule_scheme = get_schedule_eager1f1b(optimal_paras, num_mb_for_remaining_memory, optimal_layer) + + return schedule_scheme, optimal_layer, e2e_time[1, :].min() + + +def broadcast_enable_schedule_in_ranks(src_rank, policy): + enable_schedule = [False] + if torch.distributed.get_rank() == src_rank: + enable_schedule = [policy] + tmp_enable_schedule = torch.cuda.BoolTensor(enable_schedule) + torch.distributed.broadcast(tmp_enable_schedule, src=src_rank) + return tmp_enable_schedule.item() + + +def broadcast_scheduler_in_ranks(src_rank, policy): + args = get_args() + policy_str = json.dumps(policy) + byte_tensor = torch.cuda.ByteTensor(list(policy_str.encode())) + torch.distributed.broadcast(byte_tensor, src_rank) + if torch.distributed.get_rank() != 0: + received_byte_tensor = torch.cuda.ByteTensor([0] * len(byte_tensor)) + else: + received_byte_tensor = byte_tensor.clone() + torch.distributed.broadcast(received_byte_tensor, src_rank) + received_policy_str = ''.join([chr(byte) for byte in received_byte_tensor.tolist()]) + received_policy_data = json.loads(received_policy_str) + args.pp_schedule_list = received_policy_data + return received_policy_data + + +def broadcast_layer_in_ranks(src_rank, policy): + args = get_args() + num_layer_list = args.pipeline_model_parallel_size * [0] + if torch.distributed.get_rank() == 0: + num_layer_list = policy + tmp_layer_list = torch.cuda.IntTensor(num_layer_list) + torch.distributed.broadcast(tmp_layer_list, src=src_rank) + args.num_layer_list = tmp_layer_list.tolist() + return tmp_layer_list.tolist() + + +def all_gather_time(args, gather_time): + recv_gather_time_list = [] + world_size = torch.distributed.get_world_size() + gather_time = torch.cuda.FloatTensor([gather_time]) + gathered_tensors = [torch.zeros_like(gather_time) for _ in range(world_size)] + torch.distributed.all_gather(gathered_tensors, gather_time) + for rank, tensor in enumerate(gathered_tensors): + pipeline_stage_rank = get_pipeline_stage_rank(world_size, args.pipeline_model_parallel_size, rank) + recv_gather_time_list.append((pipeline_stage_rank, tensor.item())) + return recv_gather_time_list + + +def average_time_by_rank(time_list): + time_dict = {} + for item in time_list: + if item[0] not in time_dict: + time_dict[item[0]] = item[1] + else: + time_dict[item[0]] += item[1] + time_dict[item[0]] /= 2 + return time_dict + + +def get_pipeline_stage_rank(world_size, num_stages, global_rank): + assert world_size % num_stages == 0, "World size must be divisible by the number of stages" + assert global_rank < world_size, "Global rank must be less than world size" + + stage_size = world_size // num_stages + return global_rank // stage_size + + +def solve_pipelineschedule(args, data_parallel_size, num_forwards_first_stage, forward_time_dict, backward_time_dict): + pipeline_stages = args.pipeline_model_parallel_size + forward_time_each_stage = [forward_time_dict[rank] for rank in forward_time_dict] + backward_time_each_stage = [backward_time_dict[rank] for rank in backward_time_dict] + comm_matrix = [[0.05] * pipeline_stages for _ in range(pipeline_stages)] + num_micro_batches = args.global_batch_size // data_parallel_size // args.micro_batch_size + num_layers = args.num_layers + + pp_paras = PipelineParallelParas(pipeline_stages, + forward_time_each_stage, + backward_time_each_stage, + num_micro_batches, + comm_matrix, + num_layers) + # FB schedule + start_time = time.time() + schedule_list, optimal_layers, schedule_time = schedule_layers(pp_paras, num_forwards_first_stage) + end_time = time.time() + return schedule_list, schedule_time, optimal_layers diff --git a/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/schedules.py b/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/schedules.py new file mode 100644 index 000000000..9c009216e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/schedules.py @@ -0,0 +1,274 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time +from functools import wraps +import contextlib +from typing import Callable, Iterator, List, Optional, Union +import torch +from torch.autograd.variable import Variable +from megatron.core import parallel_state +from megatron.core.enums import ModelType +from megatron.core.pipeline_parallel import p2p_communication +from megatron.core.utils import get_model_config, get_model_type +from megatron.training import get_args +from megatron.core.pipeline_parallel.schedules import forward_step, backward_step, deallocate_output_tensor, check_first_val_step +from mindspeed.core.performance.auto_pipeline_perf.autopipeline_perf import profile_context +import mindspeed.core.training as training + + +def get_forward_backward_func_decorator(get_forward_backward_func): + @wraps(get_forward_backward_func) + def wrapper(*args, **kwargs): + argument = get_args() + pipeline_model_parallel_size = parallel_state.get_pipeline_model_parallel_world_size() + if pipeline_model_parallel_size > 1 and argument.automated_pipeline_perf and argument.optimized_mbs_list: + forward_backward_func = optimized_forward_backward_pipelining + else: + forward_backward_func = get_forward_backward_func(*args, **kwargs) + return forward_backward_func + return wrapper + + +def forward_step_decorator(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + argument = get_args() + if argument.automated_pipeline_perf and not (argument.optimized_mbs_list or argument.pp_schedule_list): + torch.cuda.synchronize() + start_time = time.time() + output_tensor = fn(*args, **kwargs) + torch.cuda.synchronize() + profile_context["fwd_time"].append((time.time() - start_time) * 1000) + else: + output_tensor = fn(*args, **kwargs) + return output_tensor + + return wrapper + + +def backward_step_decorator(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + argument = get_args() + if argument.automated_pipeline_perf and not (argument.optimized_mbs_list or argument.pp_schedule_list): + torch.cuda.synchronize() + start_time = time.time() + input_tensor_grad = fn(*args, **kwargs) + torch.cuda.synchronize() + profile_context["bwd_time"].append((time.time() - start_time) * 1000) + else: + input_tensor_grad = fn(*args, **kwargs) + return input_tensor_grad + return wrapper + + +def get_tensor_shapes(): + args = get_args() + tensor_shapes = [] + mbs = args.optimized_mbs_list + for m in mbs: + tensor_shapes.append((args.seq_length // parallel_state.get_context_parallel_world_size() // parallel_state.get_tensor_model_parallel_world_size(), m, args.hidden_size)) + return tensor_shapes + + +def optimized_forward_backward_pipelining( + *, + forward_step_func, + data_iterator: Union[Iterator, List[Iterator]], + model: Union[torch.nn.Module, List[torch.nn.Module]], + num_microbatches: int, + seq_length: int, + micro_batch_size: int, + decoder_seq_length: int = None, + forward_only: bool = False, + collect_non_loss_data: bool = False, + first_val_step: bool = None, +): + """Run non-interleaved 1F1B schedule, with reduced pipeline bubble. + Returns dictionary with losses if the last stage, empty dict otherwise. + """ + if isinstance(model, list): + model = model[0] + if isinstance(data_iterator, list): + data_iterator = data_iterator[0] + argument = get_args() + config = get_model_config(model) + model_type = get_model_type(model) + tensor_shapes = get_tensor_shapes() + cnt_fwd, cnt_bwd = 0, 0 + argument.mbs_idx = cnt_fwd + argument.optimized_mbs_mode = True + num_microbatches = len(argument.optimized_mbs_list) + if config.overlap_p2p_comm: + raise ValueError( + "Optimized pipeline parallelism does not support overlapping p2p communication" + ) + + # Disable async grad reductions + no_sync_func = config.no_sync_func + if no_sync_func is None: + no_sync_func = contextlib.nullcontext + no_sync_context = None + + def disable_grad_sync(): + """Disable asynchronous grad reductions""" + nonlocal no_sync_context + if no_sync_context is None: + no_sync_context = no_sync_func() + no_sync_context.__enter__() + + def enable_grad_sync(): + """Enable asynchronous grad reductions""" + nonlocal no_sync_context + if no_sync_context is not None: + no_sync_context.__exit__(None, None, None) + no_sync_context = None + + disable_grad_sync() + + # Compute number of warmup microbatches. + num_warmup_microbatches = \ + (parallel_state.get_pipeline_model_parallel_world_size() - + parallel_state.get_pipeline_model_parallel_rank() - 1) + num_warmup_microbatches = min( + num_warmup_microbatches, + num_microbatches) + num_microbatches_remaining = \ + num_microbatches - num_warmup_microbatches + + input_tensors = [] + output_tensors = [] + forward_data_store = [] + rank = parallel_state.get_pipeline_model_parallel_rank() + + # Run warmup forward passes. + for i in range(num_warmup_microbatches): + input_tensor = p2p_communication.recv_forward(config=config, + tensor_shape=tensor_shapes[cnt_fwd]) + argument.micro_batch_size = argument.optimized_mbs_list[cnt_fwd] + output_tensor = forward_step( + forward_step_func, + data_iterator, + model, + num_microbatches, + input_tensor, + forward_data_store, + config, + collect_non_loss_data, + None, + check_first_val_step(first_val_step, forward_only, i == 0), + ) + p2p_communication.send_forward(output_tensor, config=config) + cnt_fwd += 1 + input_tensors.append(input_tensor) + output_tensors.append(output_tensor) + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + # Before running 1F1B, need to receive first forward tensor. + # If all microbatches are run in warmup / cooldown phase, then no need to + # receive this tensor here. + if num_microbatches_remaining > 0: + input_tensor = p2p_communication.recv_forward(config=config, + tensor_shape=tensor_shapes[cnt_fwd]) + + # Run 1F1B in steady state. + for i in range(num_microbatches_remaining): + last_iteration = (i == (num_microbatches_remaining - 1)) + argument.micro_batch_size = argument.optimized_mbs_list[cnt_fwd] + output_tensor = forward_step( + forward_step_func, + data_iterator, + model, + num_microbatches, + input_tensor, + forward_data_store, + config, + collect_non_loss_data, + None, + check_first_val_step( + first_val_step, forward_only, (i == 0) and (num_warmup_microbatches == 0) + ), + ) + if forward_only: + p2p_communication.send_forward(output_tensor, config=config) + if not last_iteration: + input_tensor = p2p_communication.recv_forward(tensor_shapes=tensor_shapes[cnt_fwd], config=config) + else: + output_tensor_grad = \ + p2p_communication.send_forward_recv_backward(output_tensor, + tensor_shape=tensor_shapes[cnt_bwd], config=config) + + cnt_fwd += 1 + # Add input_tensor and output_tensor to end of list, then pop from the + # start of the list for backward pass. + input_tensors.append(input_tensor) + output_tensors.append(output_tensor) + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + if forward_only: + if not last_iteration: + input_tensor = p2p_communication.recv_forward(config=config, + tensor_shape=tensor_shapes[cnt_fwd]) + else: + input_tensor, output_tensor = input_tensors.pop(0), output_tensors.pop(0) + if num_warmup_microbatches == 0 and last_iteration: + if config.grad_sync_func is None or rank == 0: + enable_grad_sync() + + input_tensor_grad = \ + backward_step(input_tensor, output_tensor, + output_tensor_grad, model_type, config) + + if last_iteration: + input_tensor = None + p2p_communication.send_backward(input_tensor_grad, config=config) + else: + input_tensor = \ + p2p_communication.send_backward_recv_forward( + input_tensor_grad, tensor_shape=tensor_shapes[cnt_fwd], config=config) + cnt_bwd += 1 + + # Run cooldown backward passes. + if not forward_only: + for _ in range(num_warmup_microbatches): + if i == num_warmup_microbatches - 1: + if config.grad_sync_func is None or rank == 0: + enable_grad_sync() + + input_tensor = input_tensors.pop(0) + output_tensor = output_tensors.pop(0) + output_tensor_grad = p2p_communication.recv_backward( + tensor_shape=tensor_shapes[cnt_bwd], config=config) + + input_tensor_grad = \ + backward_step(input_tensor, output_tensor, + output_tensor_grad, model_type, config) + p2p_communication.send_backward(input_tensor_grad, config) + cnt_bwd += 1 + + # Launch any remaining grad reductions. + if no_sync_context is not None: + enable_grad_sync() + if config.grad_sync_func is not None: + config.grad_sync_func(model.parameters()) + + if config.finalize_model_grads_func is not None and not forward_only: + # Finalize model grads (perform full grad all-reduce / reduce-scatter for + # data parallelism, layernorm all-reduce for sequence parallelism, and + # embedding all-reduce for pipeline parallelism). + config.finalize_model_grads_func([model]) + argument.optimized_mbs_mode = False + argument.micro_batch_size = training.ORIGIN_MBS + return forward_data_store diff --git a/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/transformer.py b/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/transformer.py new file mode 100644 index 000000000..56a2f94e8 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/performance/auto_pipeline_perf/transformer.py @@ -0,0 +1,17 @@ +from functools import wraps +from megatron.training import get_args +from mindspeed.model.transformer import generate_attention_mask +import mindspeed.model.transformer + + +def get_attention_mask_wrapper(get_attention_mask): + @wraps(get_attention_mask) + def wrapper(*args, **kwargs): + argument = get_args() + automated_pipeline_perf = argument.automated_pipeline_perf and argument.optimized_mbs_list + if automated_pipeline_perf: + generate_attention_mask() + else: + get_attention_mask(*args, **kwargs) + return mindspeed.model.transformer._GLOBAL_ATTN_MASK + return wrapper \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/pipeline_parallel/__init__.py b/model/train/yoco_moe/mindspeed/core/pipeline_parallel/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/pipeline_parallel/flexible_schedules.py b/model/train/yoco_moe/mindspeed/core/pipeline_parallel/flexible_schedules.py new file mode 100644 index 000000000..1a6b901e9 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/pipeline_parallel/flexible_schedules.py @@ -0,0 +1,1771 @@ +# coding=utf-8 +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. +import contextlib +from functools import wraps +from typing import Callable, Iterator, List, Optional, Union, Tuple +import torch +import torch.distributed as dist +from megatron import core +from megatron.core import ModelParallelConfig, parallel_state +from megatron.core.enums import ModelType +from megatron.core.utils import get_model_config, get_model_type +from megatron.core.pipeline_parallel.schedules import ( + get_tensor_shapes, + forward_step, + backward_step, + deallocate_output_tensor, + check_first_val_step, + clear_embedding_activation_buffer, + finish_embedding_wgrad_compute +) +from megatron.core.pipeline_parallel import p2p_communication +from megatron.core.pipeline_parallel.p2p_communication import ( + Shape, + _communicate_shapes, + _communicate, + _batched_p2p_ops, + _p2p_ops +) +from megatron.core.parallel_state import get_pipeline_model_parallel_group +from megatron.training import get_args +from mindspeed.core.parallel_state import get_pipeline_parallel_group_for_new_stream +from mindspeed.core.weight_grad_store import WeightGradStore + + +forward_comm_stream = None +backward_comm_stream = None +default_stream = None +scheduler_plan = None + + +def recv_forward(tensor_shapes, config, group): + input_tensors = [] + wait_handles = [] + for tensor_shape in tensor_shapes: + if tensor_shape is None or core.parallel_state.is_pipeline_first_stage(): + input_tensor = None + wait_handle = None + else: + input_tensor, _, wait_handle = _communicate( + tensor_send_next=None, + tensor_send_prev=None, + recv_prev=True, + recv_next=False, + tensor_shape=tensor_shape, + config=config, + group=group, + wait_on_reqs=False + ) + input_tensors.append(input_tensor) + wait_handles.append(wait_handle) + return input_tensors, wait_handles + + +def recv_backward(tensor_shapes, config, group): + output_tensor_grads = [] + wait_handlers = [] + for tensor_shape in tensor_shapes: + if tensor_shape is None or core.parallel_state.is_pipeline_last_stage(): + output_tensor_grad = None + wait_handle = None + else: + _, output_tensor_grad, wait_handle = _communicate( + tensor_send_next=None, + tensor_send_prev=None, + recv_prev=False, + recv_next=True, + tensor_shape=tensor_shape, + config=config, + group=group, + wait_on_reqs=False + ) + output_tensor_grads.append(output_tensor_grad) + wait_handlers.append(wait_handle) + return output_tensor_grads, wait_handlers + + +def send_forward(output_tensors, tensor_shapes, config, group): + if not isinstance(output_tensors, list): + output_tensors = [output_tensors] + for (output_tensor, tensor_shape) in zip(output_tensors, tensor_shapes): + if tensor_shape is None or core.parallel_state.is_pipeline_last_stage(): + continue + + _communicate( + tensor_send_next=output_tensor, + tensor_send_prev=None, + recv_prev=False, + recv_next=False, + tensor_shape=None, + config=config, + group=group, + wait_on_reqs=False + ) + + +def send_backward(input_tensor_grads, tensor_shapes, config, group): + if not isinstance(input_tensor_grads, list): + input_tensor_grads = [input_tensor_grads] + + for (input_tensor_grad, tensor_shape) in zip(input_tensor_grads, tensor_shapes): + if tensor_shape is None or core.parallel_state.is_pipeline_first_stage(): + continue + + _communicate( + tensor_send_next=None, + tensor_send_prev=input_tensor_grad, + recv_prev=False, + recv_next=False, + tensor_shape=None, + config=config, + group=group, + wait_on_reqs=False + ) + + +def _communicate( + *, + tensor_send_next: Optional[torch.Tensor], + tensor_send_prev: Optional[torch.Tensor], + recv_prev: bool, + recv_next: bool, + tensor_shape: Shape, + config: ModelParallelConfig, + wait_on_reqs: bool = True, + group: dist.ProcessGroup = None +) -> Tuple[torch.Tensor, torch.Tensor]: + """Communicate tensors between stages. Used as helper method in other + communication methods that are used in megatron/schedules.py. + + Args: + tensor_send_next (torch.Tensor, optional): + Tensor to send to next rank (no tensor sent if None) + + tensor_send_prev (torch.Tensor, optional): + Tensor to send to prev rank (no tensor sent if None) + + recv_prev (boolean, required): + whether tensor should be received from previous rank. + + recv_next (boolean, required): + whether tensor should be received from next rank. + + tensor_shape (List[int] or torch.Size, required): + shape of tensor to receive (this method assumes that all + tensors sent and received in a single function call are + the same shape). + + wait_on_reqs (boolean, optional, default=False): + For non-batched p2p communication, wait on each request + before returning. + + Returns: + tuple containing + + - tensor_recv_prev: torch.Tensor if recv_prev is True, None otherwise. + - tensor_recv_next: torch.Tensor if recv_next is True, None otherwise. + + """ + # Create placeholder tensors for receive in forward and backward directions + # if needed. + tensor_recv_prev = None + tensor_recv_next = None + + if not config.variable_seq_lengths: + recv_prev_shape = tensor_shape + recv_next_shape = tensor_shape + else: + recv_prev_shape, recv_next_shape = _communicate_shapes( + tensor_send_next, tensor_send_prev, recv_prev, recv_next, config + ) + + if recv_prev: + if config.pipeline_dtype is None: + raise RuntimeError("pipeline_dtype must be provided if recv_prev is True") + if tensor_shape is None: + raise RuntimeError( + "tensor_shape must be specified if recv_prev is True. " + "Common tensor_shape is (seq_length, micro_batch_size, hidden_size)" + ) + tensor_recv_prev = torch.empty( + recv_prev_shape, + requires_grad=True, + device=torch.cuda.current_device(), + dtype=config.pipeline_dtype, + ) + if recv_next: + if config.pipeline_dtype is None: + raise RuntimeError("dtype must be provided if recv_next is True") + if tensor_shape is None: + raise RuntimeError( + "tensor_shape must be specified if recv_next is True. " + "Common tensor_shape is (seq_length, micro_batch_size, hidden_size)" + ) + tensor_recv_next = torch.empty( + recv_next_shape, + requires_grad=True, + device=torch.cuda.current_device(), + dtype=config.pipeline_dtype, + ) + + # Send tensors in both the forward and backward directions as appropriate. + if config.use_ring_exchange_p2p: + + def _ring_exchange_wrapper(**kwargs): + torch.distributed.ring_exchange(**kwargs) + return [] + + p2p_func = _ring_exchange_wrapper + elif config.batch_p2p_comm: + if not wait_on_reqs: + raise AssertionError('wait_on_reqs must be True when use batch_p2p_comm') + p2p_func = _batched_p2p_ops + else: + p2p_func = _p2p_ops + + reqs = p2p_func( + tensor_send_prev=tensor_send_prev, + tensor_recv_prev=tensor_recv_prev, + tensor_send_next=tensor_send_next, + tensor_recv_next=tensor_recv_next, + group=group + ) + + if wait_on_reqs and len(reqs) > 0: + for req in reqs: + req.wait() + reqs = None + + if config.batch_p2p_comm and config.batch_p2p_sync: + # To protect against race condition when using batch_isend_irecv(). + # User should assert that we have a modern enough PyTorch to not need this + torch.cuda.synchronize() + + return tensor_recv_prev, tensor_recv_next, reqs + + +def generate_1f1b_scheduler_plan(pp_size, num_micro_batch): + scheduler_plan_all_stages = {} + + num_warmup_microbatch = [pp_size - r - 1 for r in range(pp_size)] + num_cooldown_microbatch = num_warmup_microbatch + num_stable_microbatch = [(num_micro_batch * 2 - num_warmup_microbatch[r] - num_cooldown_microbatch[r]) // 2 + for r in range(pp_size)] + + forward_count = [1 for _ in range(pp_size)] + backward_count = [1 for _ in range(pp_size)] + + # warmup + for pp_rank in range(pp_size): + key = 'stage{}'.format(pp_rank) + scheduler_plan_all_stages[key] = [] + for i in range(num_warmup_microbatch[pp_rank]): + value = 'F{}'.format(forward_count[pp_rank]) + scheduler_plan_all_stages[key].append(value) + forward_count[pp_rank] += 1 + + # stable + for pp_rank in range(pp_size): + key = 'stage{}'.format(pp_rank) + for i in range(num_stable_microbatch[pp_rank]): + value = 'F{}'.format(forward_count[pp_rank]) + scheduler_plan_all_stages[key].append(value) + forward_count[pp_rank] += 1 + + value = 'B{}'.format(backward_count[pp_rank]) + scheduler_plan_all_stages[key].append(value) + backward_count[pp_rank] += 1 + + # cooldown + for pp_rank in range(pp_size): + key = 'stage{}'.format(pp_rank) + for i in range(num_cooldown_microbatch[pp_rank]): + value = 'B{}'.format(backward_count[pp_rank]) + scheduler_plan_all_stages[key].append(value) + backward_count[pp_rank] += 1 + + return scheduler_plan_all_stages + + +def forward_backward_pipelining_without_interleaving( + *, + forward_step_func, + data_iterator: Union[Iterator, List[Iterator]], + model: Union[torch.nn.Module, List[torch.nn.Module]], + num_microbatches: int, + seq_length: int, + micro_batch_size: int, + decoder_seq_length: int = None, + forward_only: bool = False, + collect_non_loss_data: bool = False, + first_val_step: bool = None +): + """Run non-interleaved 1F1B schedule, with communication between pipeline + stages. + + Returns dictionary with losses if the last stage, empty dict otherwise. + + """ + + if isinstance(model, list): + if not len(model) == 1: + raise AssertionError("non-interleaved pipeline parallelism does not support model chunking") + model = model[0] + if isinstance(data_iterator, list): + if not len(data_iterator) == 1: + raise AssertionError("non-pipeline-parallel schedule does not support model chunking") + data_iterator = data_iterator[0] + + config = get_model_config(model) + if config.timers is not None: + config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time) + + # Disable async grad reductions + no_sync_func = config.no_sync_func + if no_sync_func is None: + no_sync_func = contextlib.nullcontext + no_sync_context = None + + def disable_grad_sync(): + """Disable asynchronous grad reductions""" + nonlocal no_sync_context + if no_sync_context is None: + no_sync_context = no_sync_func() + no_sync_context.__enter__() + + def enable_grad_sync(): + """Enable asynchronous grad reductions""" + nonlocal no_sync_context + if no_sync_context is not None: + no_sync_context.__exit__(None, None, None) + no_sync_context = None + + disable_grad_sync() + + # Compute number of warmup microbatches. + num_warmup_microbatches = ( + parallel_state.get_pipeline_model_parallel_world_size() + - parallel_state.get_pipeline_model_parallel_rank() + - 1 + ) + num_warmup_microbatches = min(num_warmup_microbatches, num_microbatches) + + # Checkpoint the activations of partial Transformer layers in a number of micro-batches + # within the maximum outstanding micro-batch backpropagations. + # Micro-batches with the ids less than 'num_microbatches_with_partial_activation_checkpoints' + # checkpoint partial Transformer layers (or skip checkpointing) and + # the rest of micro-batches within a window of micro-batches checkpoint + # all Transformer layers. The window of micro-batches is set by the maximum + # outstanding backpropagations and becomes smaller at later pipeline stages. + # Please refer the appendix C in https://arxiv.org/pdf/2205.05198.pdf + max_outstanding_backprops = None + if config.num_microbatches_with_partial_activation_checkpoints is not None: + max_outstanding_backprops = num_warmup_microbatches + 1 + + model_type = get_model_type(model) + + rank = parallel_state.get_pipeline_model_parallel_rank() + recv_tensor_shapes = get_tensor_shapes( + rank=rank - 1, + model_type=model_type, + seq_length=seq_length, + micro_batch_size=micro_batch_size, + decoder_seq_length=decoder_seq_length, + config=config, + ) + send_tensor_shapes = get_tensor_shapes( + rank=rank, + model_type=model_type, + seq_length=seq_length, + micro_batch_size=micro_batch_size, + decoder_seq_length=decoder_seq_length, + config=config, + ) + + # Input, output tensors only need to be saved when doing backward passes + input_tensors = None + output_tensors = None + if not forward_only: + input_tensors = [] + output_tensors = [] + forward_data_store = [] + + def wait_helper(wait_handlers): + for reqs in wait_handlers: + if reqs is not None: + for req in reqs: + req.wait() + + global forward_comm_stream + if forward_comm_stream is None: + forward_comm_stream = torch.cuda.Stream() + + global backward_comm_stream + if backward_comm_stream is None: + backward_comm_stream = torch.cuda.Stream() + + global default_stream + if default_stream is None: + default_stream = torch.cuda.default_stream() + + global scheduler_plan + arguments = get_args() + key = 'stage{}'.format(parallel_state.get_pipeline_model_parallel_rank()) + if scheduler_plan is None and arguments.pp_schedule_list: + scheduler_plan = arguments.pp_schedule_list.get(key) + elif scheduler_plan is None and arguments.pp_schedule_list is None: + scheduler_plan = generate_1f1b_scheduler_plan(parallel_state.get_pipeline_model_parallel_world_size(), + num_microbatches) + scheduler_plan = scheduler_plan.get(key) + + config.batch_p2p_comm = False + fwd_wait_handles, bwd_wait_handles = None, None + current_tag_id = -1 + for tag in scheduler_plan: + current_tag_id += 1 + if tag.startswith('F'): + # Decide to checkpoint all layers' activations of the current micro-batch + if max_outstanding_backprops is not None: + checkpoint_activations_microbatch = ( + current_tag_id % max_outstanding_backprops >= config.num_microbatches_with_partial_activation_checkpoints + ) + else: + checkpoint_activations_microbatch = None + + with torch.cuda.stream(forward_comm_stream): + input_tensor, fwd_wait_handles = recv_forward( + recv_tensor_shapes, config, get_pipeline_model_parallel_group() + ) + + wait_helper(fwd_wait_handles) + output_tensor, _ = forward_step( + forward_step_func, + data_iterator, + model, + num_microbatches, + input_tensor, + forward_data_store, + config, + collect_non_loss_data, + checkpoint_activations_microbatch, + check_first_val_step(first_val_step, forward_only, current_tag_id == 0) + ) + + with torch.cuda.stream(forward_comm_stream): + forward_comm_stream.wait_stream(default_stream) + send_forward( + output_tensor, + send_tensor_shapes, + config, + get_pipeline_model_parallel_group() + ) + for tensor in output_tensor: + if tensor is not None: + tensor.record_stream(forward_comm_stream) + + + if not forward_only: + input_tensors.append(input_tensor) + output_tensors.append(output_tensor) + deallocate_output_tensor(output_tensor[0], config.deallocate_pipeline_outputs) + + else: + if forward_only: + continue + + if current_tag_id == len(scheduler_plan) - 1: + if config.grad_sync_func is None or rank == 0: + enable_grad_sync() + + with torch.cuda.stream(backward_comm_stream): + output_tensor_grads, bwd_wait_handles = recv_backward( + send_tensor_shapes, config, get_pipeline_parallel_group_for_new_stream() + ) + + input_tensor = input_tensors.pop(0) + output_tensor = output_tensors.pop(0) + + wait_helper(bwd_wait_handles) + input_tensor_grad = backward_step( + input_tensor, + output_tensor, + output_tensor_grads, + model_type, + config + ) + + with torch.cuda.stream(backward_comm_stream): + backward_comm_stream.wait_stream(default_stream) + send_backward( + input_tensor_grad, + recv_tensor_shapes, + config, + get_pipeline_parallel_group_for_new_stream() + ) + for tensor in input_tensor_grad: + if tensor is not None: + tensor.record_stream(backward_comm_stream) + + if not forward_only: + if no_sync_context is not None: + enable_grad_sync() + if config.grad_sync_func is not None: + config.grad_sync_func(model.parameters()) + + if config.timers is not None: + config.timers('forward-backward').stop() + + if config.finalize_model_grads_func is not None and not forward_only: + # Finalize model grads (perform full grad all-reduce / reduce-scatter for + # data parallelism, layernorm all-reduce for sequence parallelism, and + # embedding all-reduce for pipeline parallelism). + config.finalize_model_grads_func([model]) + + return forward_data_store + + +def forward_backward_pipelining_with_interleaving_nano_pipe( + *, + forward_step_func, + data_iterator: Union[Iterator, List[Iterator]], + model: Union[torch.nn.Module, List[torch.nn.Module]], + num_microbatches: int, + seq_length: int, + micro_batch_size: int, + decoder_seq_length: int = None, + forward_only: bool = False, + collect_non_loss_data: bool = False, + first_val_step: bool = None, +): + """Run interleaved 1F1B-nanopipe schedule (model split into model chunks), with + communication between pipeline stages as needed. + + Returns dictionary with losses if the last stage, empty dict otherwise. + """ + if not isinstance(model, list): + raise AssertionError("interleaved pipeline parallelism expected model chunking") + if not all(isinstance(chunk, torch.nn.Module) for chunk in model): + raise AssertionError("invalid model chunking") + if not isinstance(data_iterator, list): + raise AssertionError("interleaved pipeline parallelism expected each model chunk to have a data iterator") + args = get_args() + config = get_model_config(model[0]) + if config.overlap_p2p_comm and config.batch_p2p_comm: + raise ValueError("Can not use both overlap_p2p_comm and batch_p2p_comm") + + if config.timers is not None: + config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time) + + # Disable async grad reductions + no_sync_func = config.no_sync_func + if isinstance(no_sync_func, list): + + def multi_no_sync(): + stack = contextlib.ExitStack() + for model_chunk_no_sync_func in config.no_sync_func: + stack.enter_context(model_chunk_no_sync_func()) + return stack + + no_sync_func = multi_no_sync + if no_sync_func is None: + no_sync_func = contextlib.nullcontext + no_sync_context = None + + if config.grad_sync_func is not None and not isinstance(config.grad_sync_func, list): + config.grad_sync_func = [config.grad_sync_func for _ in model] + + if config.param_sync_func is not None and not isinstance(config.param_sync_func, list): + config.param_sync_func = [config.param_sync_func for _ in model] + + def disable_grad_sync(): + """Disable asynchronous grad reductions""" + nonlocal no_sync_context + if no_sync_context is None: + no_sync_context = no_sync_func() + no_sync_context.__enter__() + + def enable_grad_sync(): + """Enable asynchronous grad reductions""" + nonlocal no_sync_context + if no_sync_context is not None: + no_sync_context.__exit__(None, None, None) + no_sync_context = None + + disable_grad_sync() + + # Model chunk IDs with synchronized grads + synchronized_model_chunks = set() + + input_tensors = [[] for _ in range(len(model))] + output_tensors = [[] for _ in range(len(model))] + forward_data_store = [] + if not forward_only: + output_tensor_grads = [[] for _ in range(len(model))] + + pipeline_parallel_size = parallel_state.get_pipeline_model_parallel_world_size() + pipeline_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + + if num_microbatches % pipeline_parallel_size != 0: + msg = f'number of microbatches ({num_microbatches}) is not divisible by ' + msg += f'pipeline-model-parallel-size ({pipeline_parallel_size}) ' + msg += 'when using interleaved schedule' + raise RuntimeError(msg) + + model_type = get_model_type(model[0]) + if model_type == ModelType.encoder_and_decoder: + raise RuntimeError("Interleaving is not supported with an encoder and decoder model.") + + if decoder_seq_length is not None and decoder_seq_length != seq_length: + raise RuntimeError( + "Interleaving is not supported with a different decoder sequence length." + ) + + tensor_shape = [seq_length, micro_batch_size, config.hidden_size] + tensor_shape[0] = tensor_shape[0] // parallel_state.get_context_parallel_world_size() + if config.sequence_parallel: + tensor_shape[0] = tensor_shape[0] // parallel_state.get_tensor_model_parallel_world_size() + tensor_shape[0] = tensor_shape[0] // args.tp_x + tensor_shape[-1] = tensor_shape[-1] // args.tp_y + # Compute number of warmup and remaining microbatches. + num_model_chunks = len(model) + total_num_microbatches = num_microbatches * num_model_chunks + all_warmup_microbatches = False + if forward_only: + num_warmup_microbatches = total_num_microbatches + else: + # Run all forward passes and then all backward passes if number of + # microbatches is just the number of pipeline stages. + # Otherwise, perform (num_model_chunks-1)*pipeline_parallel_size on + # all workers, followed by more microbatches after depending on + # stage ID (more forward passes for earlier stages, later stages can + # immediately start with 1F1B). + if num_microbatches == pipeline_parallel_size: + num_warmup_microbatches = total_num_microbatches + all_warmup_microbatches = True + else: + num_warmup_microbatches = (pipeline_parallel_size - pipeline_parallel_rank - 1) * 2 + num_warmup_microbatches += (num_model_chunks - 1) * pipeline_parallel_size + num_warmup_microbatches = min(num_warmup_microbatches, total_num_microbatches) + + num_fwd = min((pipeline_parallel_size - 1) * 2 + (num_model_chunks - 1) * pipeline_parallel_size, total_num_microbatches) + num_microbatches_remaining = total_num_microbatches - num_warmup_microbatches + num_dx = num_fwd - num_warmup_microbatches + overlap_chunks_num = (num_dx + pipeline_parallel_size - 1) // pipeline_parallel_size + nano_flag = [True] * len(model) + for i in range(overlap_chunks_num): + nano_flag[-i - 1] = False + + # Checkpoint the activations of partial Transformer layers in a number of micro-batches + # within the maximum outstanding micro-batch backpropagations. + # Micro-batches with the ids less than 'num_microbatches_with_partial_activation_checkpoints' + # checkpoint partial Transformer layers (or skip checkpointing) and + # the rest of micro-batches within a window of micro-batches checkpoint + # all Transformer layers. The window of micro-batches is set by the maximum + # outstanding backpropagations and becomes smaller at later pipeline stages. + # Please refer the appendix C in https://arxiv.org/pdf/2205.05198.pdf + max_outstanding_backprops = None + if config.num_microbatches_with_partial_activation_checkpoints is not None: + max_outstanding_backprops = num_warmup_microbatches + 1 + + # Synchronize params for first two model chunks + if config.param_sync_func is not None: + config.param_sync_func[0](model[0].parameters()) + config.param_sync_func[1](model[1].parameters()) + + def get_model_chunk_id(microbatch_id, forward): + """Helper method to get the model chunk ID given the iteration number.""" + microbatch_id_in_group = microbatch_id % (pipeline_parallel_size * num_model_chunks) + model_chunk_id = microbatch_id_in_group // pipeline_parallel_size + if not forward: + model_chunk_id = num_model_chunks - model_chunk_id - 1 + return model_chunk_id + + def is_first_microbatch_for_model_chunk(microbatch_id: int) -> bool: + """Check if an iteration is the first for a model chunk.""" + microbatch_group_size = pipeline_parallel_size * num_model_chunks + num_microbatch_groups = total_num_microbatches // microbatch_group_size + microbatch_group_id = microbatch_id // microbatch_group_size + microbatch_id_in_group = microbatch_id % microbatch_group_size + if microbatch_group_id == 0: + return microbatch_id_in_group % pipeline_parallel_size == 0 + else: + return False + + def is_last_microbatch_for_model_chunk(microbatch_id: int) -> bool: + """Check if an iteration is the last for a model chunk.""" + microbatch_group_size = pipeline_parallel_size * num_model_chunks + num_microbatch_groups = total_num_microbatches // microbatch_group_size + microbatch_group_id = microbatch_id // microbatch_group_size + microbatch_id_in_group = microbatch_id % microbatch_group_size + if microbatch_group_id == num_microbatch_groups - 1: + return microbatch_id_in_group % pipeline_parallel_size == pipeline_parallel_size - 1 + else: + return False + + def forward_step_helper(microbatch_id, checkpoint_activations_microbatch): + """Helper method to run forward step with model split into chunks + (run set_virtual_pipeline_model_parallel_rank() before calling + forward_step()).""" + model_chunk_id = get_model_chunk_id(microbatch_id, forward=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(model_chunk_id) + + # launch param synchronization for next model chunk + # Note: Asynchronous communication tends to slow down compute. + # To reduce idling from mismatched microbatch times, we launch + # asynchronous communication at the same time across the + # pipeline-parallel group. + if config.param_sync_func is not None: + param_sync_microbatch_id = microbatch_id + pipeline_parallel_rank + if ( + param_sync_microbatch_id < total_num_microbatches + and is_first_microbatch_for_model_chunk(param_sync_microbatch_id) + ): + param_sync_chunk_id = get_model_chunk_id(param_sync_microbatch_id, forward=True) + 1 + if 1 < param_sync_chunk_id < num_model_chunks: + config.param_sync_func[param_sync_chunk_id]( + model[param_sync_chunk_id].parameters() + ) + + # forward step + if parallel_state.is_pipeline_first_stage(): + if len(input_tensors[model_chunk_id]) == len(output_tensors[model_chunk_id]): + input_tensors[model_chunk_id].append(None) + input_tensor = input_tensors[model_chunk_id][-1] + output_tensor, _ = forward_step( + forward_step_func, + data_iterator[model_chunk_id], + model[model_chunk_id], + num_microbatches, + input_tensor, + forward_data_store, + config, + collect_non_loss_data, + checkpoint_activations_microbatch, + check_first_val_step( + first_val_step, forward_only, is_first_microbatch_for_model_chunk(microbatch_id), + ), + ) + output_tensors[model_chunk_id].append(output_tensor) + + # if forward-only, no need to save tensors for a backward pass + if forward_only: + input_tensors[model_chunk_id].pop() + output_tensors[model_chunk_id].pop() + + return output_tensor + + def backward_step_helper(microbatch_id): + """Helper method to run backward step with model split into chunks + (run set_virtual_pipeline_model_parallel_rank() before calling + backward_step()).""" + model_chunk_id = get_model_chunk_id(microbatch_id, forward=False) + parallel_state.set_virtual_pipeline_model_parallel_rank(model_chunk_id) + + # launch grad synchronization (default) + if config.grad_sync_func is None and is_last_microbatch_for_model_chunk(microbatch_id) and nano_flag[model_chunk_id]: + enable_grad_sync() + synchronized_model_chunks.add(model_chunk_id) + + if parallel_state.is_pipeline_last_stage(): + if len(output_tensor_grads[model_chunk_id]) == 0: + output_tensor_grads[model_chunk_id].append(None) + input_tensor = input_tensors[model_chunk_id].pop(0) + output_tensor = output_tensors[model_chunk_id].pop(0) + output_tensor_grad = output_tensor_grads[model_chunk_id].pop(0) + input_tensor_grad = backward_step( + input_tensor, output_tensor, output_tensor_grad, model_type, config + ) + + # launch grad synchronization (custom grad sync) + # Note: Asynchronous communication tends to slow down compute. + # To reduce idling from mismatched microbatch times, we launch + # asynchronous communication at the same time across the + # pipeline-parallel group. + if config.grad_sync_func is not None: + grad_sync_microbatch_id = microbatch_id - pipeline_parallel_rank + if grad_sync_microbatch_id >= 0 and is_last_microbatch_for_model_chunk( + grad_sync_microbatch_id + ): + grad_sync_chunk_id = get_model_chunk_id(grad_sync_microbatch_id, forward=False) + if nano_flag[grad_sync_chunk_id]: + enable_grad_sync() + config.grad_sync_func[grad_sync_chunk_id](model[grad_sync_chunk_id].parameters()) + synchronized_model_chunks.add(grad_sync_chunk_id) + disable_grad_sync() + + return input_tensor_grad + + # Run warmup forward passes. + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + input_tensors[0].append(p2p_communication.recv_forward(tensor_shape, config)) + + fwd_wait_handles = None + bwd_wait_handles = None + + for k in range(num_warmup_microbatches): + + if fwd_wait_handles is not None: + for req in fwd_wait_handles: + req.wait() + + # Decide to checkpoint all layers' activations of the current micro-batch + if max_outstanding_backprops is not None: + checkpoint_activations_microbatch = ( + k % max_outstanding_backprops + >= config.num_microbatches_with_partial_activation_checkpoints + ) + else: + checkpoint_activations_microbatch = None + + output_tensor = forward_step_helper(k, checkpoint_activations_microbatch) + + # Determine if tensor should be received from previous stage. + next_forward_model_chunk_id = get_model_chunk_id(k + 1, forward=True) + recv_prev = True + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + if next_forward_model_chunk_id == 0: + recv_prev = False + if k == (total_num_microbatches - 1): + recv_prev = False + + # Don't send tensor downstream if on last stage. + if parallel_state.is_pipeline_last_stage(): + output_tensor = None + + # Send and receive tensors as appropriate (send tensors computed + # in this iteration; receive tensors for next iteration). + if not config.overlap_p2p_comm: + if ( + k == (num_warmup_microbatches - 1) + and not forward_only + and not all_warmup_microbatches + ): + input_tensor_grad = None + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + recv_next = False + ( + input_tensor, + output_tensor_grad, + ) = p2p_communication.send_forward_backward_recv_forward_backward( + output_tensor, + input_tensor_grad, + recv_prev=recv_prev, + recv_next=recv_next, + tensor_shape=tensor_shape, + config=config, + ) + output_tensor_grads[num_model_chunks - 1].append(output_tensor_grad) + else: + input_tensor = p2p_communication.send_forward_recv_forward( + output_tensor, recv_prev=recv_prev, tensor_shape=tensor_shape, config=config + ) + input_tensors[next_forward_model_chunk_id].append(input_tensor) + else: + input_tensor, fwd_wait_handles = p2p_communication.send_forward_recv_forward( + output_tensor, + recv_prev=recv_prev, + tensor_shape=tensor_shape, + config=config, + overlap_p2p_comm=True, + ) + + if ( + k == (num_warmup_microbatches - 1) + and not forward_only + and not all_warmup_microbatches + ): + input_tensor_grad = None + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + recv_next = False + + ( + output_tensor_grad, + bwd_wait_handles, + ) = p2p_communication.send_backward_recv_backward( + input_tensor_grad, + recv_next=recv_next, + tensor_shape=tensor_shape, + config=config, + overlap_p2p_comm=True, + ) + + output_tensor_grads[num_model_chunks - 1].append(output_tensor_grad) + input_tensors[next_forward_model_chunk_id].append(input_tensor) + + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + output_tensor = None + # Run 1F1B in steady state. + for k in range(num_microbatches_remaining): + # Forward pass. + forward_k = k + num_warmup_microbatches + # Decide to checkpoint all layers' activations of the current micro-batch + if max_outstanding_backprops is not None: + checkpoint_activations_microbatch = ( + forward_k % max_outstanding_backprops + >= config.num_microbatches_with_partial_activation_checkpoints + ) + else: + checkpoint_activations_microbatch = None + + if config.overlap_p2p_comm: + if fwd_wait_handles is not None: + for req in fwd_wait_handles: + req.wait() + + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + output_tensor = forward_step_helper(forward_k, checkpoint_activations_microbatch) + + # Determine if current stage has anything to send in either direction, + # otherwise set tensor to None. + forward_model_chunk_id = get_model_chunk_id(forward_k, forward=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(forward_model_chunk_id) + + # Last virtual stage no activation tensor to send + if parallel_state.is_pipeline_last_stage(): + output_tensor = None + + # Determine if peers are sending, and where in data structure to put + # received tensors. + recv_prev = True + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + # First stage is ahead of last stage by (pipeline_parallel_size - 1). + next_forward_model_chunk_id = get_model_chunk_id( + forward_k - (pipeline_parallel_size - 1), forward=True + ) + if next_forward_model_chunk_id == (num_model_chunks - 1): + recv_prev = False + next_forward_model_chunk_id += 1 + else: + next_forward_model_chunk_id = get_model_chunk_id(forward_k + 1, forward=True) + + # If last iteration, don't receive; we already received one extra + # before the start of the for loop. + if k == (num_microbatches_remaining - 1): + recv_prev = False + + # Send activation tensor to the next stage and receive activation tensor from the + # previous stage + input_tensor, fwd_wait_handles = p2p_communication.send_forward_recv_forward( + output_tensor, + recv_prev=recv_prev, + tensor_shape=tensor_shape, + config=config, + overlap_p2p_comm=True, + ) + # assert fwd_wait_handles is not None + + if bwd_wait_handles is not None: + for req in bwd_wait_handles: + req.wait() + + # Backward pass. + backward_k = k + if k < num_dx: + WeightGradStore.start_decouple() + + if args.use_nanopipe: + WeightGradStore.resize_ori_storage(args.use_nanopipe_swap) + + input_tensor_grad = backward_step_helper(backward_k) + if WeightGradStore.is_decoupleBlock: + WeightGradStore.flush() + if k == num_dx - 1: + WeightGradStore.end_decouple() + backward_model_chunk_id = get_model_chunk_id(backward_k, forward=False) + + parallel_state.set_virtual_pipeline_model_parallel_rank(backward_model_chunk_id) + + + # First virtual stage no activation gradient tensor to send + if parallel_state.is_pipeline_first_stage(): + input_tensor_grad = None + + # Determine if the current virtual stage has an activation gradient tensor to receive + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + # Last stage is ahead of first stage by (pipeline_parallel_size - 1). + next_backward_model_chunk_id = get_model_chunk_id( + backward_k - (pipeline_parallel_size - 1), forward=False + ) + if next_backward_model_chunk_id == 0: + recv_next = False + next_backward_model_chunk_id -= 1 + else: + next_backward_model_chunk_id = get_model_chunk_id(backward_k + 1, forward=False) + + output_tensor_grad, bwd_wait_handles = p2p_communication.send_backward_recv_backward( + input_tensor_grad, + recv_next=recv_next, + tensor_shape=tensor_shape, + config=config, + overlap_p2p_comm=True, + ) + else: # no p2p overlap + output_tensor = forward_step_helper(forward_k, checkpoint_activations_microbatch) + + # Backward pass. + backward_k = k + if k < num_dx: + WeightGradStore.start_decouple() + + if args.use_nanopipe: + WeightGradStore.resize_ori_storage(args.use_nanopipe_swap) + + input_tensor_grad = backward_step_helper(backward_k) + if WeightGradStore.is_decoupleBlock: + WeightGradStore.flush() + if k == num_dx - 1: + WeightGradStore.end_decouple() + + # Send output_tensor and input_tensor_grad, receive input_tensor + # and output_tensor_grad. + + # Determine if current stage has anything to send in either direction, + # otherwise set tensor to None. + forward_model_chunk_id = get_model_chunk_id(forward_k, forward=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(forward_model_chunk_id) + if parallel_state.is_pipeline_last_stage(): + output_tensor = None + + backward_model_chunk_id = get_model_chunk_id(backward_k, forward=False) + parallel_state.set_virtual_pipeline_model_parallel_rank(backward_model_chunk_id) + if parallel_state.is_pipeline_first_stage(): + input_tensor_grad = None + + # Determine if peers are sending, and where in data structure to put + # received tensors. + recv_prev = True + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + # First stage is ahead of last stage by (pipeline_parallel_size - 1). + next_forward_model_chunk_id = get_model_chunk_id( + forward_k - (pipeline_parallel_size - 1), forward=True + ) + if next_forward_model_chunk_id == (num_model_chunks - 1): + recv_prev = False + next_forward_model_chunk_id += 1 + else: + next_forward_model_chunk_id = get_model_chunk_id(forward_k + 1, forward=True) + + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + # Last stage is ahead of first stage by (pipeline_parallel_size - 1). + next_backward_model_chunk_id = get_model_chunk_id( + backward_k - (pipeline_parallel_size - 1), forward=False + ) + if next_backward_model_chunk_id == 0: + recv_next = False + next_backward_model_chunk_id -= 1 + else: + next_backward_model_chunk_id = get_model_chunk_id(backward_k + 1, forward=False) + + # If last iteration, don't receive; we already received one extra + # before the start of the for loop. + if k == (num_microbatches_remaining - 1): + recv_prev = False + + # Communicate tensors. + ( + input_tensor, + output_tensor_grad, + ) = p2p_communication.send_forward_backward_recv_forward_backward( + output_tensor, + input_tensor_grad, + recv_prev=recv_prev, + recv_next=recv_next, + tensor_shape=tensor_shape, + config=config, + ) + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + # Put input_tensor and output_tensor_grad in data structures in the + # right location. + if recv_prev: + input_tensors[next_forward_model_chunk_id].append(input_tensor) + if recv_next: + output_tensor_grads[next_backward_model_chunk_id].append(output_tensor_grad) + + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + # Run cooldown backward passes (flush out pipeline). + if not forward_only: + if config.overlap_p2p_comm and bwd_wait_handles is not None: + for wait_handle in bwd_wait_handles: + wait_handle.wait() + + if all_warmup_microbatches: + output_tensor_grads[num_model_chunks - 1].append( + p2p_communication.recv_backward(tensor_shape, config=config) + ) + for k in range(num_microbatches_remaining, total_num_microbatches): + input_tensor_grad = backward_step_helper(k) + next_backward_model_chunk_id = get_model_chunk_id(k + 1, forward=False) + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + if next_backward_model_chunk_id == (num_model_chunks - 1): + recv_next = False + if k == (total_num_microbatches - 1): + recv_next = False + output_tensor_grads[next_backward_model_chunk_id].append( + p2p_communication.send_backward_recv_backward( + input_tensor_grad, recv_next=recv_next, tensor_shape=tensor_shape, config=config + ) + ) + if args.use_nanopipe_swap and k == max(num_microbatches_remaining + 1, (total_num_microbatches + num_microbatches_remaining) // 2): + WeightGradStore.swap_tensors() + if nano_flag[0] and 0 not in synchronized_model_chunks: + config.grad_sync_func[0](model[0].parameters()) + synchronized_model_chunks.add(0) + overlap_arg = [pipeline_parallel_size, nano_flag, synchronized_model_chunks, config.grad_sync_func, model] + WeightGradStore.pop(overlap_arg) + # Launch any remaining grad reductions. + enable_grad_sync() + if config.grad_sync_func is not None: + for model_chunk_id in range(num_model_chunks): + if model_chunk_id not in synchronized_model_chunks: + config.grad_sync_func[model_chunk_id](model[model_chunk_id].parameters()) + synchronized_model_chunks.add(model_chunk_id) + + if config.timers is not None: + config.timers('forward-backward').stop() + + if config.finalize_model_grads_func is not None and not forward_only: + # Finalize model grads (perform full grad all-reduce / reduce-scatter for + # data parallelism, layernorm all-reduce for sequence parallelism, and + # embedding all-reduce for pipeline parallelism). + config.finalize_model_grads_func(model) + + return forward_data_store + + +def forward_backward_pipelining_with_interleaving_patch( + *, + forward_step_func, + data_iterator: Union[Iterator, List[Iterator]], + model: Union[torch.nn.Module, List[torch.nn.Module]], + num_microbatches: int, + seq_length: int, + micro_batch_size: int, + decoder_seq_length: int = None, + forward_only: bool = False, + collect_non_loss_data: bool = False, + first_val_step: bool = None, +): + """Run interleaved 1F1B schedule (model split into model chunks), with + communication between pipeline stages as needed. + + Returns dictionary with losses if the last stage, empty dict otherwise.""" + if not isinstance(model, list): + raise AssertionError("interleaved pipeline parallelism expected model chunking") + if not all(isinstance(chunk, torch.nn.Module) for chunk in model): + raise AssertionError("invalid model chunking") + if not isinstance(data_iterator, list): + raise AssertionError("interleaved pipeline parallelism expected each model chunk to have a data iterator") + config = get_model_config(model[0]) + if config.overlap_p2p_comm and config.batch_p2p_comm: + raise ValueError("Can not use both overlap_p2p_comm and batch_p2p_comm") + + # Needed only when gradients are finalized in M-Core + if config.finalize_model_grads_func is not None and not forward_only: + embedding_module = clear_embedding_activation_buffer(config, model) + + if config.timers is not None: + config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time) + + # Disable async grad reductions + no_sync_func = config.no_sync_func + if isinstance(no_sync_func, list): + + def multi_no_sync(): + stack = contextlib.ExitStack() + for model_chunk_no_sync_func in config.no_sync_func: + stack.enter_context(model_chunk_no_sync_func()) + return stack + + no_sync_func = multi_no_sync + if no_sync_func is None: + no_sync_func = contextlib.nullcontext + no_sync_context = None + + if config.grad_sync_func is not None and not isinstance(config.grad_sync_func, list): + config.grad_sync_func = [config.grad_sync_func for _ in model] + + if config.param_sync_func is not None and not isinstance(config.param_sync_func, list): + config.param_sync_func = [config.param_sync_func for _ in model] + + def disable_grad_sync(): + """Disable asynchronous grad reductions""" + nonlocal no_sync_context + if no_sync_context is None: + no_sync_context = no_sync_func() + no_sync_context.__enter__() + + def enable_grad_sync(): + """Enable asynchronous grad reductions""" + nonlocal no_sync_context + if no_sync_context is not None: + no_sync_context.__exit__(None, None, None) + no_sync_context = None + + disable_grad_sync() + + # Model chunk IDs with synchronized grads + synchronized_model_chunks = set() + + input_tensors = [[] for _ in range(len(model))] + output_tensors = [[] for _ in range(len(model))] + total_num_tokens = torch.tensor(0, dtype=torch.int).cuda() + + forward_data_store = [] + if not forward_only: + output_tensor_grads = [[] for _ in range(len(model))] + + pipeline_parallel_size = parallel_state.get_pipeline_model_parallel_world_size() + pipeline_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + + if num_microbatches % pipeline_parallel_size != 0: + msg = f'number of microbatches ({num_microbatches}) is not divisible by ' + msg += f'pipeline-model-parallel-size ({pipeline_parallel_size}) ' + msg += 'when using interleaved schedule' + raise RuntimeError(msg) + + model_type = get_model_type(model[0]) + if model_type == ModelType.encoder_and_decoder: + raise RuntimeError("Interleaving is not supported with an encoder and decoder model.") + + if decoder_seq_length is not None and decoder_seq_length != seq_length: + raise RuntimeError( + "Interleaving is not supported with a different decoder sequence length." + ) + + tensor_shape = [seq_length, micro_batch_size, config.hidden_size] + tensor_shape[0] = tensor_shape[0] // parallel_state.get_context_parallel_world_size() + if config.sequence_parallel: + tensor_shape[0] = tensor_shape[0] // parallel_state.get_tensor_model_parallel_world_size() + tensor_shape[0] = tensor_shape[0] // get_args().tp_x + tensor_shape[-1] = tensor_shape[-1] // get_args().tp_y + # Compute number of warmup and remaining microbatches. + num_model_chunks = len(model) + total_num_microbatches = num_microbatches * num_model_chunks + all_warmup_microbatches = False + if forward_only: + num_warmup_microbatches = total_num_microbatches + else: + # Run all forward passes and then all backward passes if number of + # microbatches is just the number of pipeline stages. + # Otherwise, perform (num_model_chunks-1)*pipeline_parallel_size on + # all workers, followed by more microbatches after depending on + # stage ID (more forward passes for earlier stages, later stages can + # immediately start with 1F1B). + if num_microbatches == pipeline_parallel_size: + num_warmup_microbatches = total_num_microbatches + all_warmup_microbatches = True + else: + num_warmup_microbatches = (pipeline_parallel_size - pipeline_parallel_rank - 1) * 2 + num_warmup_microbatches += (num_model_chunks - 1) * pipeline_parallel_size + num_warmup_microbatches = min(num_warmup_microbatches, total_num_microbatches) + num_microbatches_remaining = total_num_microbatches - num_warmup_microbatches + + # Checkpoint the activations of partial Transformer layers in a number of micro-batches + # within the maximum outstanding micro-batch backpropagations. + # Micro-batches with the ids less than 'num_microbatches_with_partial_activation_checkpoints' + # checkpoint partial Transformer layers (or skip checkpointing) and + # the rest of micro-batches within a window of micro-batches checkpoint + # all Transformer layers. The window of micro-batches is set by the maximum + # outstanding backpropagations and becomes smaller at later pipeline stages. + # Please refer the appendix C in https://arxiv.org/pdf/2205.05198.pdf + max_outstanding_backprops = None + if config.num_microbatches_with_partial_activation_checkpoints is not None: + max_outstanding_backprops = num_warmup_microbatches + 1 + + # Synchronize params for first two model chunks + if config.param_sync_func is not None: + config.param_sync_func[0](model[0].parameters()) + config.param_sync_func[1](model[1].parameters()) + + def get_model_chunk_id(microbatch_id, forward): + """Helper method to get the model chunk ID given the iteration number.""" + microbatch_id_in_group = microbatch_id % (pipeline_parallel_size * num_model_chunks) + model_chunk_id = microbatch_id_in_group // pipeline_parallel_size + if not forward: + model_chunk_id = num_model_chunks - model_chunk_id - 1 + return model_chunk_id + + def get_microbatch_id_in_model_chunk(iteration_id, forward): + """Helper method to get the microbatch_id within model chunk given the iteration number.""" + assert forward + iteration_group_id = iteration_id // (pipeline_parallel_size * num_model_chunks) + microbatch_id_in_model_chunk = (iteration_group_id * pipeline_parallel_size) + ( + iteration_id % pipeline_parallel_size + ) + return microbatch_id_in_model_chunk + + def is_first_microbatch_for_model_chunk(microbatch_id: int) -> bool: + """Check if an iteration is the first for a model chunk.""" + microbatch_group_size = pipeline_parallel_size * num_model_chunks + num_microbatch_groups = total_num_microbatches // microbatch_group_size + microbatch_group_id = microbatch_id // microbatch_group_size + microbatch_id_in_group = microbatch_id % microbatch_group_size + if microbatch_group_id == 0: + return microbatch_id_in_group % pipeline_parallel_size == 0 + else: + return False + + def is_last_microbatch_for_model_chunk(microbatch_id: int) -> bool: + """Check if an iteration is the last for a model chunk.""" + microbatch_group_size = pipeline_parallel_size * num_model_chunks + num_microbatch_groups = total_num_microbatches // microbatch_group_size + microbatch_group_id = microbatch_id // microbatch_group_size + microbatch_id_in_group = microbatch_id % microbatch_group_size + if microbatch_group_id == num_microbatch_groups - 1: + return microbatch_id_in_group % pipeline_parallel_size == pipeline_parallel_size - 1 + else: + return False + + def forward_step_helper(microbatch_id, current_microbatch, checkpoint_activations_microbatch): + """Helper method to run forward step with model split into chunks + (run set_virtual_pipeline_model_parallel_rank() before calling + forward_step()).""" + model_chunk_id = get_model_chunk_id(microbatch_id, forward=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(model_chunk_id) + + # launch param synchronization for next model chunk + # Note: Asynchronous communication tends to slow down compute. + # To reduce idling from mismatched microbatch times, we launch + # asynchronous communication at the same time across the + # pipeline-parallel group. + if config.param_sync_func is not None: + param_sync_microbatch_id = microbatch_id + pipeline_parallel_rank + if ( + param_sync_microbatch_id < total_num_microbatches + and is_first_microbatch_for_model_chunk(param_sync_microbatch_id) + ): + param_sync_chunk_id = get_model_chunk_id(param_sync_microbatch_id, forward=True) + 1 + if 1 < param_sync_chunk_id < num_model_chunks: + config.param_sync_func[param_sync_chunk_id]( + model[param_sync_chunk_id].parameters() + ) + + # forward step + if parallel_state.is_pipeline_first_stage(): + if len(input_tensors[model_chunk_id]) == len(output_tensors[model_chunk_id]): + input_tensors[model_chunk_id].append(None) + input_tensor = input_tensors[model_chunk_id][-1] + + output_tensor, num_tokens = forward_step( + forward_step_func, + data_iterator[model_chunk_id], + model[model_chunk_id], + num_microbatches, + input_tensor, + forward_data_store, + config, + collect_non_loss_data, + checkpoint_activations_microbatch, + check_first_val_step( + first_val_step, forward_only, is_first_microbatch_for_model_chunk(microbatch_id), + ), + current_microbatch=current_microbatch, + ) + output_tensors[model_chunk_id].append(output_tensor) + + nonlocal total_num_tokens + total_num_tokens += num_tokens.item() + + # if forward-only, no need to save tensors for a backward pass + if forward_only: + input_tensors[model_chunk_id].pop() + output_tensors[model_chunk_id].pop() + + return output_tensor + + def backward_step_helper(microbatch_id): + """Helper method to run backward step with model split into chunks + (run set_virtual_pipeline_model_parallel_rank() before calling + backward_step()).""" + model_chunk_id = get_model_chunk_id(microbatch_id, forward=False) + parallel_state.set_virtual_pipeline_model_parallel_rank(model_chunk_id) + + # launch grad synchronization (default) + if config.grad_sync_func is None and is_last_microbatch_for_model_chunk(microbatch_id): + enable_grad_sync() + synchronized_model_chunks.add(model_chunk_id) + + if parallel_state.is_pipeline_last_stage(): + if len(output_tensor_grads[model_chunk_id]) == 0: + output_tensor_grads[model_chunk_id].append(None) + input_tensor = input_tensors[model_chunk_id].pop(0) + output_tensor = output_tensors[model_chunk_id].pop(0) + output_tensor_grad = output_tensor_grads[model_chunk_id].pop(0) + input_tensor_grad = backward_step( + input_tensor, output_tensor, output_tensor_grad, model_type, config + ) + + # launch grad synchronization (custom grad sync) + # Note: Asynchronous communication tends to slow down compute. + # To reduce idling from mismatched microbatch times, we launch + # asynchronous communication at the same time across the + # pipeline-parallel group. + if config.grad_sync_func is not None: + grad_sync_microbatch_id = microbatch_id - pipeline_parallel_rank + if grad_sync_microbatch_id >= 0 and is_last_microbatch_for_model_chunk( + grad_sync_microbatch_id + ): + grad_sync_chunk_id = get_model_chunk_id(grad_sync_microbatch_id, forward=False) + enable_grad_sync() + config.grad_sync_func[grad_sync_chunk_id](model[grad_sync_chunk_id].parameters()) + synchronized_model_chunks.add(grad_sync_chunk_id) + disable_grad_sync() + + return input_tensor_grad + + # Run warmup forward passes. + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + input_tensors[0].append(p2p_communication.recv_forward(tensor_shape, config)) + + fwd_wait_handles = None + bwd_wait_handles = None + + for k in range(num_warmup_microbatches): + + if fwd_wait_handles is not None: + for req in fwd_wait_handles: + req.wait() + + cur_model_chunk_id = get_model_chunk_id(k, forward=True) + # Decide to checkpoint all layers' activations of the current micro-batch + if max_outstanding_backprops is not None: + checkpoint_activations_microbatch = ( + k % max_outstanding_backprops + >= config.num_microbatches_with_partial_activation_checkpoints + ) + else: + checkpoint_activations_microbatch = None + + current_microbatch = get_microbatch_id_in_model_chunk(k, forward=True) + output_tensor = forward_step_helper( + k, current_microbatch, checkpoint_activations_microbatch + ) + + # Determine if tensor should be received from previous stage. + next_forward_model_chunk_id = get_model_chunk_id(k + 1, forward=True) + recv_prev = True + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + if next_forward_model_chunk_id == 0: + recv_prev = False + if k == (total_num_microbatches - 1): + recv_prev = False + + # Don't send tensor downstream if on last stage. + if parallel_state.is_pipeline_last_stage(): + output_tensor = None + + # Send and receive tensors as appropriate (send tensors computed + # in this iteration; receive tensors for next iteration). + if not config.overlap_p2p_comm: + if ( + k == (num_warmup_microbatches - 1) + and not forward_only + and not all_warmup_microbatches + ): + input_tensor_grad = None + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + recv_next = False + ( + input_tensor, + output_tensor_grad, + ) = p2p_communication.send_forward_backward_recv_forward_backward( + output_tensor, + input_tensor_grad, + recv_prev=recv_prev, + recv_next=recv_next, + tensor_shape=tensor_shape, + config=config, + ) + output_tensor_grads[num_model_chunks - 1].append(output_tensor_grad) + else: + input_tensor = p2p_communication.send_forward_recv_forward( + output_tensor, recv_prev=recv_prev, tensor_shape=tensor_shape, config=config + ) + input_tensors[next_forward_model_chunk_id].append(input_tensor) + else: + input_tensor, fwd_wait_handles = p2p_communication.send_forward_recv_forward( + output_tensor, + recv_prev=recv_prev, + tensor_shape=tensor_shape, + config=config, + overlap_p2p_comm=True, + ) + + if ( + k == (num_warmup_microbatches - 1) + and not forward_only + and not all_warmup_microbatches + ): + input_tensor_grad = None + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + recv_next = False + + ( + output_tensor_grad, + bwd_wait_handles, + ) = p2p_communication.send_backward_recv_backward( + input_tensor_grad, + recv_next=recv_next, + tensor_shape=tensor_shape, + config=config, + overlap_p2p_comm=True, + ) + + output_tensor_grads[num_model_chunks - 1].append(output_tensor_grad) + input_tensors[next_forward_model_chunk_id].append(input_tensor) + + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + # Run 1F1B in steady state. + for k in range(num_microbatches_remaining): + # Forward pass. + forward_k = k + num_warmup_microbatches + + # Decide to checkpoint all layers' activations of the current micro-batch + if max_outstanding_backprops is not None: + checkpoint_activations_microbatch = ( + forward_k % max_outstanding_backprops + >= config.num_microbatches_with_partial_activation_checkpoints + ) + else: + checkpoint_activations_microbatch = None + + cur_model_chunk_id = get_model_chunk_id(forward_k, forward=True) + current_microbatch = get_microbatch_id_in_model_chunk(forward_k, forward=True) + if config.overlap_p2p_comm: + if fwd_wait_handles is not None: + for req in fwd_wait_handles: + req.wait() + + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + output_tensor = forward_step_helper( + forward_k, current_microbatch, checkpoint_activations_microbatch + ) + + # Determine if current stage has anything to send in either direction, + # otherwise set tensor to None. + forward_model_chunk_id = get_model_chunk_id(forward_k, forward=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(forward_model_chunk_id) + + # Last virtual stage no activation tensor to send + if parallel_state.is_pipeline_last_stage(): + output_tensor = None + + # Determine if peers are sending, and where in data structure to put + # received tensors. + recv_prev = True + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + # First stage is ahead of last stage by (pipeline_parallel_size - 1). + next_forward_model_chunk_id = get_model_chunk_id( + forward_k - (pipeline_parallel_size - 1), forward=True + ) + if next_forward_model_chunk_id == (num_model_chunks - 1): + recv_prev = False + next_forward_model_chunk_id += 1 + else: + next_forward_model_chunk_id = get_model_chunk_id(forward_k + 1, forward=True) + + # If last iteration, don't receive; we already received one extra + # before the start of the for loop. + if k == (num_microbatches_remaining - 1): + recv_prev = False + + # Send activation tensor to the next stage and receive activation tensor from the + # previous stage + input_tensor, fwd_wait_handles = p2p_communication.send_forward_recv_forward( + output_tensor, + recv_prev=recv_prev, + tensor_shape=tensor_shape, + config=config, + overlap_p2p_comm=True, + ) + # assert fwd_wait_handles is not None + + if bwd_wait_handles is not None: + for req in bwd_wait_handles: + req.wait() + + # Backward pass. + backward_k = k + input_tensor_grad = backward_step_helper(backward_k) + + backward_model_chunk_id = get_model_chunk_id(backward_k, forward=False) + parallel_state.set_virtual_pipeline_model_parallel_rank(backward_model_chunk_id) + + # First virtual stage no activation gradient tensor to send + if parallel_state.is_pipeline_first_stage(): + input_tensor_grad = None + + # Determine if the current virtual stage has an activation gradient tensor to receive + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + # Last stage is ahead of first stage by (pipeline_parallel_size - 1). + next_backward_model_chunk_id = get_model_chunk_id( + backward_k - (pipeline_parallel_size - 1), forward=False + ) + if next_backward_model_chunk_id == 0: + recv_next = False + next_backward_model_chunk_id -= 1 + else: + next_backward_model_chunk_id = get_model_chunk_id(backward_k + 1, forward=False) + + output_tensor_grad, bwd_wait_handles = p2p_communication.send_backward_recv_backward( + input_tensor_grad, + recv_next=recv_next, + tensor_shape=tensor_shape, + config=config, + overlap_p2p_comm=True, + ) + + else: # no p2p overlap + output_tensor = forward_step_helper( + forward_k, current_microbatch, checkpoint_activations_microbatch + ) + + # Backward pass. + backward_k = k + input_tensor_grad = backward_step_helper(backward_k) + + # Send output_tensor and input_tensor_grad, receive input_tensor + # and output_tensor_grad. + + # Determine if current stage has anything to send in either direction, + # otherwise set tensor to None. + forward_model_chunk_id = get_model_chunk_id(forward_k, forward=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(forward_model_chunk_id) + if parallel_state.is_pipeline_last_stage(): + output_tensor = None + + backward_model_chunk_id = get_model_chunk_id(backward_k, forward=False) + parallel_state.set_virtual_pipeline_model_parallel_rank(backward_model_chunk_id) + if parallel_state.is_pipeline_first_stage(): + input_tensor_grad = None + + # Determine if peers are sending, and where in data structure to put + # received tensors. + recv_prev = True + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + # First stage is ahead of last stage by (pipeline_parallel_size - 1). + next_forward_model_chunk_id = get_model_chunk_id( + forward_k - (pipeline_parallel_size - 1), forward=True + ) + if next_forward_model_chunk_id == (num_model_chunks - 1): + recv_prev = False + next_forward_model_chunk_id += 1 + else: + next_forward_model_chunk_id = get_model_chunk_id(forward_k + 1, forward=True) + + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + # Last stage is ahead of first stage by (pipeline_parallel_size - 1). + next_backward_model_chunk_id = get_model_chunk_id( + backward_k - (pipeline_parallel_size - 1), forward=False + ) + if next_backward_model_chunk_id == 0: + recv_next = False + next_backward_model_chunk_id -= 1 + else: + next_backward_model_chunk_id = get_model_chunk_id(backward_k + 1, forward=False) + + # If last iteration, don't receive; we already received one extra + # before the start of the for loop. + if k == (num_microbatches_remaining - 1): + recv_prev = False + + # Communicate tensors. + ( + input_tensor, + output_tensor_grad, + ) = p2p_communication.send_forward_backward_recv_forward_backward( + output_tensor, + input_tensor_grad, + recv_prev=recv_prev, + recv_next=recv_next, + tensor_shape=tensor_shape, + config=config, + ) + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + # Put input_tensor and output_tensor_grad in data structures in the + # right location. + if recv_prev: + input_tensors[next_forward_model_chunk_id].append(input_tensor) + if recv_next: + output_tensor_grads[next_backward_model_chunk_id].append(output_tensor_grad) + + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + # Run cooldown backward passes (flush out pipeline). + if not forward_only: + if config.overlap_p2p_comm and bwd_wait_handles is not None: + for wait_handle in bwd_wait_handles: + wait_handle.wait() + + if all_warmup_microbatches: + output_tensor_grads[num_model_chunks - 1].append( + p2p_communication.recv_backward(tensor_shape, config=config) + ) + for k in range(num_microbatches_remaining, total_num_microbatches): + input_tensor_grad = backward_step_helper(k) + next_backward_model_chunk_id = get_model_chunk_id(k + 1, forward=False) + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + if next_backward_model_chunk_id == (num_model_chunks - 1): + recv_next = False + if k == (total_num_microbatches - 1): + recv_next = False + output_tensor_grads[next_backward_model_chunk_id].append( + p2p_communication.send_backward_recv_backward( + input_tensor_grad, recv_next=recv_next, tensor_shape=tensor_shape, config=config + ) + ) + + # Launch any remaining grad reductions. + enable_grad_sync() + if config.grad_sync_func is not None: + for model_chunk_id in range(num_model_chunks): + if model_chunk_id not in synchronized_model_chunks: + config.grad_sync_func[model_chunk_id](model[model_chunk_id].parameters()) + synchronized_model_chunks.add(model_chunk_id) + + if config.finalize_model_grads_func is not None and not forward_only: + # If defer_embedding_wgrad_compute is enabled we need to do the + # weight gradient GEMM's here. + finish_embedding_wgrad_compute(config, embedding_module) + + # Finalize model grads (perform full grad all-reduce / reduce-scatter for + # data parallelism, layernorm all-reduce for sequence parallelism, and + # embedding all-reduce for pipeline parallelism). + config.finalize_model_grads_func( + model, total_num_tokens if config.calculate_per_token_loss else None + ) + + if config.timers is not None: + config.timers('forward-backward').stop() + + return forward_data_store diff --git a/model/train/yoco_moe/mindspeed/core/pipeline_parallel/multiparameter_schedules.py b/model/train/yoco_moe/mindspeed/core/pipeline_parallel/multiparameter_schedules.py new file mode 100644 index 000000000..c1d1f5edd --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/pipeline_parallel/multiparameter_schedules.py @@ -0,0 +1,965 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and + +import contextlib +from functools import wraps +from typing import Iterator, List, Union + +import torch + +from megatron.training import get_args +from megatron.core import parallel_state +from megatron.core.enums import ModelType +from megatron.core.pipeline_parallel import p2p_communication +from megatron.core.utils import get_attr_wrapped_model, get_model_config, get_model_type + +from megatron.core.pipeline_parallel.schedules import ( + forward_step, + deallocate_output_tensor, + check_first_val_step +) + + +def forward_step_wrapper(fn): + @wraps(fn) + def wrapper(*arg, **kwargs): + output_tensor, num_tokens = fn(*arg, **kwargs) + if len(output_tensor) > 0 and isinstance(output_tensor[0], list): + return output_tensor[0], num_tokens + else: + return output_tensor, num_tokens + return wrapper + + +def backward_step(input_tensor, output_tensor, output_tensor_grad, model_type, config): + if config.timers is not None: + config.timers('backward-compute', log_level=2).start() + + # Retain the grad on the input_tensor. + unwrap_input_tensor_grad = False + if not isinstance(input_tensor, list): + input_tensor = [input_tensor] + unwrap_input_tensor_grad = True + for x in input_tensor: + if x is not None and x.requires_grad: + x.retain_grad() + + if not isinstance(output_tensor, list): + output_tensor = [output_tensor] + if not isinstance(output_tensor_grad, list): + output_tensor_grad = [output_tensor_grad] + + # Backward pass. + if output_tensor_grad[0] is None and config.grad_scale_func is not None: + output_tensor[0] = config.grad_scale_func(output_tensor[0]) + + output_tensors = [] + output_grad_tensors = [] + if output_tensor_grad[0] is None: + # The last stage have no input gradients and only one loss is used to backward + torch.autograd.backward(output_tensor[0], grad_tensors=output_tensor_grad[0]) + else: + for output, grad in zip(output_tensor, output_tensor_grad): + if output.requires_grad: + output_tensors.append(output) + output_grad_tensors.append(grad) + torch.autograd.backward(output_tensors, grad_tensors=output_grad_tensors) + + # Collect the grad of the input_tensor. + input_tensor_grad = [None] + if input_tensor is not None: + input_tensor_grad = [] + for x in input_tensor: + if x is None: + input_tensor_grad.append(None) + else: + if x.grad is None: + input_tensor_grad.append(torch.zeros_like(x, device=torch.cuda.current_device())) + else: + input_tensor_grad.append(x.grad) + + # Handle single skip connection if it exists (encoder_hidden_state in + # model with encoder and decoder). + if ( + parallel_state.get_pipeline_model_parallel_world_size() > 1 + and parallel_state.is_pipeline_stage_after_split() + and model_type == ModelType.encoder_and_decoder + ): + if output_tensor_grad[1] is not None: + input_tensor_grad[-1].add_(output_tensor_grad[1]) + if unwrap_input_tensor_grad: + input_tensor_grad = input_tensor_grad[0] + + if config.timers is not None: + config.timers('backward-compute').stop() + + return input_tensor_grad + + +def backward_step_wrapper(fn): + @wraps(fn) + def wrapper(*arg, **kwargs): + return backward_step(*arg, **kwargs) + return wrapper + + +def get_tensor_shapes_wrapper(fn): + @wraps(fn) + def wrapper(*arg, **kwargs): + args = get_args() + return args.pipeline_tensor_shapes + return wrapper + + +def forward_backward_pipelining_with_interleaving( + *, + forward_step_func, + data_iterator: Union[Iterator, List[Iterator]], + model: Union[torch.nn.Module, List[torch.nn.Module]], + num_microbatches: int, + seq_length: int, + micro_batch_size: int, + decoder_seq_length: int = None, + forward_only: bool = False, + collect_non_loss_data: bool = False, + first_val_step: bool = None, +): + assert isinstance(model, list), "interleaved pipeline parallelism expected model chunking" + assert all(isinstance(chunk, torch.nn.Module) for chunk in model), "invalid model chunking" + assert isinstance( + data_iterator, list + ), "interleaved pipeline parallelism expected each model chunk to have a data iterator" + + config = get_model_config(model[0]) + if config.overlap_p2p_comm and config.batch_p2p_comm: + raise ValueError("Can not use both overlap_p2p_comm and batch_p2p_comm") + + if config.timers is not None: + config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time) + + # Disable async grad reductions + no_sync_func = config.no_sync_func + if isinstance(no_sync_func, list): + + def multi_no_sync(): + stack = contextlib.ExitStack() + for model_chunk_no_sync_func in config.no_sync_func: + stack.enter_context(model_chunk_no_sync_func()) + return stack + + no_sync_func = multi_no_sync + if no_sync_func is None: + no_sync_func = contextlib.nullcontext + no_sync_context = None + + if config.grad_sync_func is not None and not isinstance(config.grad_sync_func, list): + config.grad_sync_func = [config.grad_sync_func for _ in model] + + if config.param_sync_func is not None and not isinstance(config.param_sync_func, list): + config.param_sync_func = [config.param_sync_func for _ in model] + + def disable_grad_sync(): + """Disable asynchronous grad reductions""" + nonlocal no_sync_context + if no_sync_context is None: + no_sync_context = no_sync_func() + no_sync_context.__enter__() + + def enable_grad_sync(): + """Enable asynchronous grad reductions""" + nonlocal no_sync_context + if no_sync_context is not None: + no_sync_context.__exit__(None, None, None) + no_sync_context = None + + disable_grad_sync() + + # Model chunk IDs with synchronized grads + synchronized_model_chunks = set() + + input_tensors = [[] for _ in range(len(model))] + output_tensors = [[] for _ in range(len(model))] + total_num_tokens = torch.tensor(0, dtype=torch.int).cuda() + + forward_data_store = [] + if not forward_only: + output_tensor_grads = [[] for _ in range(len(model))] + + pipeline_parallel_size = parallel_state.get_pipeline_model_parallel_world_size() + pipeline_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + + if num_microbatches % pipeline_parallel_size != 0: + msg = f'number of microbatches ({num_microbatches}) is not divisible by ' + msg += f'pipeline-model-parallel-size ({pipeline_parallel_size}) ' + msg += 'when using interleaved schedule' + raise RuntimeError(msg) + + model_type = get_model_type(model[0]) + if model_type == ModelType.encoder_and_decoder: + raise RuntimeError("Interleaving is not supported with an encoder and decoder model.") + + if decoder_seq_length is not None and decoder_seq_length != seq_length: + raise RuntimeError( + "Interleaving is not supported with a different decoder sequence length." + ) + + tensor_shape = get_args().pipeline_tensor_shapes + + # Compute number of warmup and remaining microbatches. + num_model_chunks = len(model) + total_num_microbatches = num_microbatches * num_model_chunks + all_warmup_microbatches = False + if forward_only: + num_warmup_microbatches = total_num_microbatches + else: + # Run all forward passes and then all backward passes if number of + # microbatches is just the number of pipeline stages. + # Otherwise, perform (num_model_chunks-1)*pipeline_parallel_size on + # all workers, followed by more microbatches after depending on + # stage ID (more forward passes for earlier stages, later stages can + # immediately start with 1F1B). + if num_microbatches == pipeline_parallel_size: + num_warmup_microbatches = total_num_microbatches + all_warmup_microbatches = True + else: + num_warmup_microbatches = (pipeline_parallel_size - pipeline_parallel_rank - 1) * 2 + num_warmup_microbatches += (num_model_chunks - 1) * pipeline_parallel_size + num_warmup_microbatches = min(num_warmup_microbatches, total_num_microbatches) + num_microbatches_remaining = total_num_microbatches - num_warmup_microbatches + + # Checkpoint the activations of partial Transformer layers in a number of micro-batches + # within the maximum outstanding micro-batch backpropagations. + # Micro-batches with the ids less than 'num_microbatches_with_partial_activation_checkpoints' + # checkpoint partial Transformer layers (or skip checkpointing) and + # the rest of micro-batches within a window of micro-batches checkpoint + # all Transformer layers. The window of micro-batches is set by the maximum + # outstanding backpropagations and becomes smaller at later pipeline stages. + # Please refer the appendix C in https://arxiv.org/pdf/2205.05198.pdf + max_outstanding_backprops = None + if config.num_microbatches_with_partial_activation_checkpoints is not None: + max_outstanding_backprops = num_warmup_microbatches + 1 + + # Synchronize params for first two model chunks + if config.param_sync_func is not None: + config.param_sync_func[0](model[0].parameters()) + config.param_sync_func[1](model[1].parameters()) + + def get_model_chunk_id(microbatch_id, forward): + """Helper method to get the model chunk ID given the iteration number.""" + microbatch_id_in_group = microbatch_id % (pipeline_parallel_size * num_model_chunks) + model_chunk_id = microbatch_id_in_group // pipeline_parallel_size + if not forward: + model_chunk_id = num_model_chunks - model_chunk_id - 1 + return model_chunk_id + + def get_microbatch_id_in_model_chunk(iteration_id, forward): + """Helper method to get the microbatch_id within model chunk given the iteration number.""" + assert forward + iteration_group_id = iteration_id // (pipeline_parallel_size * num_model_chunks) + microbatch_id_in_model_chunk = (iteration_group_id * pipeline_parallel_size) + ( + iteration_id % pipeline_parallel_size + ) + return microbatch_id_in_model_chunk + + def is_first_microbatch_for_model_chunk(microbatch_id: int) -> bool: + """Check if an iteration is the first for a model chunk.""" + microbatch_group_size = pipeline_parallel_size * num_model_chunks + num_microbatch_groups = total_num_microbatches // microbatch_group_size + microbatch_group_id = microbatch_id // microbatch_group_size + microbatch_id_in_group = microbatch_id % microbatch_group_size + if microbatch_group_id == 0: + return microbatch_id_in_group % pipeline_parallel_size == 0 + else: + return False + + def is_last_microbatch_for_model_chunk(microbatch_id: int) -> bool: + """Check if an iteration is the last for a model chunk.""" + microbatch_group_size = pipeline_parallel_size * num_model_chunks + num_microbatch_groups = total_num_microbatches // microbatch_group_size + microbatch_group_id = microbatch_id // microbatch_group_size + microbatch_id_in_group = microbatch_id % microbatch_group_size + if microbatch_group_id == num_microbatch_groups - 1: + return microbatch_id_in_group % pipeline_parallel_size == pipeline_parallel_size - 1 + else: + return False + + def forward_step_helper(microbatch_id, current_microbatch, checkpoint_activations_microbatch): + """Helper method to run forward step with model split into chunks + (run set_virtual_pipeline_model_parallel_rank() before calling + forward_step()).""" + model_chunk_id = get_model_chunk_id(microbatch_id, forward=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(model_chunk_id) + + # launch param synchronization for next model chunk + # Note: Asynchronous communication tends to slow down compute. + # To reduce idling from mismatched microbatch times, we launch + # asynchronous communication at the same time across the + # pipeline-parallel group. + if config.param_sync_func is not None: + param_sync_microbatch_id = microbatch_id + pipeline_parallel_rank + if ( + param_sync_microbatch_id < total_num_microbatches + and is_first_microbatch_for_model_chunk(param_sync_microbatch_id) + ): + param_sync_chunk_id = get_model_chunk_id(param_sync_microbatch_id, forward=True) + 1 + if 1 < param_sync_chunk_id < num_model_chunks: + config.param_sync_func[param_sync_chunk_id]( + model[param_sync_chunk_id].parameters() + ) + + # forward step + if parallel_state.is_pipeline_first_stage(): + if len(input_tensors[model_chunk_id]) == len(output_tensors[model_chunk_id]): + input_tensors[model_chunk_id].append(None) + input_tensor = input_tensors[model_chunk_id][-1] + + output_tensor, num_tokens = forward_step( + forward_step_func, + data_iterator[model_chunk_id], + model[model_chunk_id], + num_microbatches, + input_tensor, + forward_data_store, + config, + collect_non_loss_data, + checkpoint_activations_microbatch, + check_first_val_step( + first_val_step, forward_only, is_first_microbatch_for_model_chunk(microbatch_id), + ), + current_microbatch=current_microbatch, + ) + output_tensors[model_chunk_id].append(output_tensor) + + nonlocal total_num_tokens + total_num_tokens += num_tokens.item() + + # if forward-only, no need to save tensors for a backward pass + if forward_only: + input_tensors[model_chunk_id].pop() + output_tensors[model_chunk_id].pop() + + return output_tensor + + def backward_step_helper(microbatch_id): + """Helper method to run backward step with model split into chunks + (run set_virtual_pipeline_model_parallel_rank() before calling + backward_step()).""" + model_chunk_id = get_model_chunk_id(microbatch_id, forward=False) + parallel_state.set_virtual_pipeline_model_parallel_rank(model_chunk_id) + + # launch grad synchronization (default) + if config.grad_sync_func is None and is_last_microbatch_for_model_chunk(microbatch_id): + enable_grad_sync() + synchronized_model_chunks.add(model_chunk_id) + + if parallel_state.is_pipeline_last_stage(): + if len(output_tensor_grads[model_chunk_id]) == 0: + output_tensor_grads[model_chunk_id].append(None) + input_tensor = input_tensors[model_chunk_id].pop(0) + output_tensor = output_tensors[model_chunk_id].pop(0) + output_tensor_grad = output_tensor_grads[model_chunk_id].pop(0) + input_tensor_grad = backward_step( + input_tensor, output_tensor, output_tensor_grad, model_type, config + ) + + # launch grad synchronization (custom grad sync) + # Note: Asynchronous communication tends to slow down compute. + # To reduce idling from mismatched microbatch times, we launch + # asynchronous communication at the same time across the + # pipeline-parallel group. + if config.grad_sync_func is not None: + grad_sync_microbatch_id = microbatch_id - pipeline_parallel_rank + if grad_sync_microbatch_id >= 0 and is_last_microbatch_for_model_chunk( + grad_sync_microbatch_id + ): + grad_sync_chunk_id = get_model_chunk_id(grad_sync_microbatch_id, forward=False) + enable_grad_sync() + config.grad_sync_func[grad_sync_chunk_id](model[grad_sync_chunk_id].parameters()) + synchronized_model_chunks.add(grad_sync_chunk_id) + disable_grad_sync() + + return input_tensor_grad + + # Run warmup forward passes. + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + input_tensors[0].append(recv_forward(tensor_shape, config)) + + fwd_wait_handles = None + bwd_wait_handles = None + + for k in range(num_warmup_microbatches): + + if fwd_wait_handles is not None: + for req in fwd_wait_handles: + req.wait() + + cur_model_chunk_id = get_model_chunk_id(k, forward=True) + # Decide to checkpoint all layers' activations of the current micro-batch + if max_outstanding_backprops is not None: + checkpoint_activations_microbatch = ( + k % max_outstanding_backprops + >= config.num_microbatches_with_partial_activation_checkpoints + ) + else: + checkpoint_activations_microbatch = None + + current_microbatch = get_microbatch_id_in_model_chunk(k, forward=True) + output_tensor = forward_step_helper( + k, current_microbatch, checkpoint_activations_microbatch + ) + + # Determine if tensor should be received from previous stage. + next_forward_model_chunk_id = get_model_chunk_id(k + 1, forward=True) + recv_prev = True + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + if next_forward_model_chunk_id == 0: + recv_prev = False + if k == (total_num_microbatches - 1): + recv_prev = False + + # Don't send tensor downstream if on last stage. + if parallel_state.is_pipeline_last_stage(): + output_tensor = None + + # Send and receive tensors as appropriate (send tensors computed + # in this iteration; receive tensors for next iteration). + if not config.overlap_p2p_comm: + if ( + k == (num_warmup_microbatches - 1) + and not forward_only + and not all_warmup_microbatches + ): + input_tensor_grad = None + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + recv_next = False + ( + input_tensor, + output_tensor_grad, + ) = send_forward_backward_recv_forward_backward( + output_tensor, + input_tensor_grad, + tensor_shape, + recv_prev=recv_prev, + recv_next=recv_next, + config=config, + ) + output_tensor_grads[num_model_chunks - 1].append(output_tensor_grad) + else: + input_tensor = send_forward_recv_forward( + output_tensor, tensor_shape, recv_prev=recv_prev, config=config + ) + input_tensors[next_forward_model_chunk_id].append(input_tensor) + else: + input_tensor, fwd_wait_handles = send_forward_recv_forward( + output_tensor, + tensor_shape, + recv_prev=recv_prev, + config=config, + overlap_p2p_comm=True, + ) + + if ( + k == (num_warmup_microbatches - 1) + and not forward_only + and not all_warmup_microbatches + ): + input_tensor_grad = None + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + recv_next = False + + ( + output_tensor_grad, + bwd_wait_handles, + ) = send_backward_recv_backward( + input_tensor_grad, + tensor_shape, + recv_next=recv_next, + config=config, + overlap_p2p_comm=True, + ) + + output_tensor_grads[num_model_chunks - 1].append(output_tensor_grad) + input_tensors[next_forward_model_chunk_id].append(input_tensor) + + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + # Run 1F1B in steady state. + for k in range(num_microbatches_remaining): + # Forward pass. + forward_k = k + num_warmup_microbatches + + # Decide to checkpoint all layers' activations of the current micro-batch + if max_outstanding_backprops is not None: + checkpoint_activations_microbatch = ( + forward_k % max_outstanding_backprops + >= config.num_microbatches_with_partial_activation_checkpoints + ) + else: + checkpoint_activations_microbatch = None + + cur_model_chunk_id = get_model_chunk_id(forward_k, forward=True) + current_microbatch = get_microbatch_id_in_model_chunk(forward_k, forward=True) + if config.overlap_p2p_comm: + if fwd_wait_handles is not None: + for req in fwd_wait_handles: + req.wait() + + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + output_tensor = forward_step_helper( + forward_k, current_microbatch, checkpoint_activations_microbatch + ) + + # Determine if current stage has anything to send in either direction, + # otherwise set tensor to None. + forward_model_chunk_id = get_model_chunk_id(forward_k, forward=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(forward_model_chunk_id) + + # Last virtual stage no activation tensor to send + if parallel_state.is_pipeline_last_stage(): + output_tensor = None + + # Determine if peers are sending, and where in data structure to put + # received tensors. + recv_prev = True + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + # First stage is ahead of last stage by (pipeline_parallel_size - 1). + next_forward_model_chunk_id = get_model_chunk_id( + forward_k - (pipeline_parallel_size - 1), forward=True + ) + if next_forward_model_chunk_id == (num_model_chunks - 1): + recv_prev = False + next_forward_model_chunk_id += 1 + else: + next_forward_model_chunk_id = get_model_chunk_id(forward_k + 1, forward=True) + + # If last iteration, don't receive; we already received one extra + # before the start of the for loop. + if k == (num_microbatches_remaining - 1): + recv_prev = False + + # Send activation tensor to the next stage and receive activation tensor from the + # previous stage + input_tensor, fwd_wait_handles = send_forward_recv_forward( + output_tensor, + tensor_shape, + recv_prev=recv_prev, + config=config, + overlap_p2p_comm=True, + ) + # assert fwd_wait_handles is not None + + if bwd_wait_handles is not None: + for req in bwd_wait_handles: + req.wait() + + # Backward pass. + backward_k = k + input_tensor_grad = backward_step_helper(backward_k) + + backward_model_chunk_id = get_model_chunk_id(backward_k, forward=False) + parallel_state.set_virtual_pipeline_model_parallel_rank(backward_model_chunk_id) + + # First virtual stage no activation gradient tensor to send + if parallel_state.is_pipeline_first_stage(): + input_tensor_grad = None + + # Determine if the current virtual stage has an activation gradient tensor to receive + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + # Last stage is ahead of first stage by (pipeline_parallel_size - 1). + next_backward_model_chunk_id = get_model_chunk_id( + backward_k - (pipeline_parallel_size - 1), forward=False + ) + if next_backward_model_chunk_id == 0: + recv_next = False + next_backward_model_chunk_id -= 1 + else: + next_backward_model_chunk_id = get_model_chunk_id(backward_k + 1, forward=False) + + output_tensor_grad, bwd_wait_handles = send_backward_recv_backward( + input_tensor_grad, + tensor_shape, + recv_next=recv_next, + config=config, + overlap_p2p_comm=True, + ) + + else: # no p2p overlap + output_tensor = forward_step_helper( + forward_k, current_microbatch, checkpoint_activations_microbatch + ) + + # Backward pass. + backward_k = k + input_tensor_grad = backward_step_helper(backward_k) + + # Send output_tensor and input_tensor_grad, receive input_tensor + # and output_tensor_grad. + + # Determine if current stage has anything to send in either direction, + # otherwise set tensor to None. + forward_model_chunk_id = get_model_chunk_id(forward_k, forward=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(forward_model_chunk_id) + if parallel_state.is_pipeline_last_stage(): + output_tensor = None + + backward_model_chunk_id = get_model_chunk_id(backward_k, forward=False) + parallel_state.set_virtual_pipeline_model_parallel_rank(backward_model_chunk_id) + if parallel_state.is_pipeline_first_stage(): + input_tensor_grad = None + + # Determine if peers are sending, and where in data structure to put + # received tensors. + recv_prev = True + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + # First stage is ahead of last stage by (pipeline_parallel_size - 1). + next_forward_model_chunk_id = get_model_chunk_id( + forward_k - (pipeline_parallel_size - 1), forward=True + ) + if next_forward_model_chunk_id == (num_model_chunks - 1): + recv_prev = False + next_forward_model_chunk_id += 1 + else: + next_forward_model_chunk_id = get_model_chunk_id(forward_k + 1, forward=True) + + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + # Last stage is ahead of first stage by (pipeline_parallel_size - 1). + next_backward_model_chunk_id = get_model_chunk_id( + backward_k - (pipeline_parallel_size - 1), forward=False + ) + if next_backward_model_chunk_id == 0: + recv_next = False + next_backward_model_chunk_id -= 1 + else: + next_backward_model_chunk_id = get_model_chunk_id(backward_k + 1, forward=False) + + # If last iteration, don't receive; we already received one extra + # before the start of the for loop. + if k == (num_microbatches_remaining - 1): + recv_prev = False + + # Communicate tensors. + ( + input_tensor, + output_tensor_grad, + ) = send_forward_backward_recv_forward_backward( + output_tensor, + input_tensor_grad, + tensor_shape, + recv_prev=recv_prev, + recv_next=recv_next, + config=config, + ) + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + # Put input_tensor and output_tensor_grad in data structures in the + # right location. + if recv_prev: + input_tensors[next_forward_model_chunk_id].append(input_tensor) + if recv_next: + output_tensor_grads[next_backward_model_chunk_id].append(output_tensor_grad) + + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + # Run cooldown backward passes (flush out pipeline). + if not forward_only: + if config.overlap_p2p_comm and bwd_wait_handles is not None: + for wait_handle in bwd_wait_handles: + wait_handle.wait() + + if all_warmup_microbatches: + output_tensor_grads[num_model_chunks - 1].append( + recv_backward(tensor_shape, config=config) + ) + for k in range(num_microbatches_remaining, total_num_microbatches): + input_tensor_grad = backward_step_helper(k) + next_backward_model_chunk_id = get_model_chunk_id(k + 1, forward=False) + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + if next_backward_model_chunk_id == (num_model_chunks - 1): + recv_next = False + if k == (total_num_microbatches - 1): + recv_next = False + output_tensor_grads[next_backward_model_chunk_id].append( + send_backward_recv_backward( + input_tensor_grad, tensor_shape, recv_next=recv_next, config=config + ) + ) + + # Launch any remaining grad reductions. + enable_grad_sync() + if config.grad_sync_func is not None: + for model_chunk_id in range(num_model_chunks): + if model_chunk_id not in synchronized_model_chunks: + config.grad_sync_func[model_chunk_id](model[model_chunk_id].parameters()) + synchronized_model_chunks.add(model_chunk_id) + + if config.finalize_model_grads_func is not None and not forward_only: + # Finalize model grads (perform full grad all-reduce / reduce-scatter for + # data parallelism, layernorm all-reduce for sequence parallelism, and + # embedding all-reduce for pipeline parallelism). + config.finalize_model_grads_func( + model, total_num_tokens if config.calculate_per_token_loss else None + ) + + if config.timers is not None: + config.timers('forward-backward').stop() + + return forward_data_store + + +def recv_forward(tensor_shapes, config): + input_tensors = [] + for tensor_shape in tensor_shapes: + if tensor_shape is None: + input_tensors.append(None) + else: + config.pipeline_dtype = tensor_shape['dtype'] + input_tensors.append(p2p_communication.recv_forward(tensor_shape['shape'], config)) + return input_tensors + + +def recv_forward_wrapper(fn): + @wraps(fn) + def wrapper(*arg, **kwargs): + return recv_forward(*arg, **kwargs) + return wrapper + + +def recv_backward(tensor_shapes, config): + output_tensor_grads = [] + for tensor_shape in tensor_shapes: + if tensor_shape is None: + output_tensor_grads.append(None) + else: + config.pipeline_dtype = tensor_shape['dtype'] + output_tensor_grads.append(p2p_communication.recv_backward(tensor_shape['shape'], config)) + return output_tensor_grads + + +def recv_backward_wrapper(fn): + @wraps(fn) + def wrapper(*arg, **kwargs): + return recv_backward(*arg, **kwargs) + return wrapper + + +def send_forward(output_tensors, tensor_shapes, config): + if output_tensors is None: + output_tensors = [None] * len(tensor_shapes) + if not isinstance(output_tensors, list): + output_tensors = [output_tensors] + for (output_tensor, tensor_shape) in zip(output_tensors, tensor_shapes): + if tensor_shape is None: + continue + config.pipeline_dtype = tensor_shape['dtype'] + p2p_communication.send_forward(output_tensor, config) + + +def send_forward_wrapper(fn): + @wraps(fn) + def wrapper(*arg, **kwargs): + return send_forward(*arg, **kwargs) + return wrapper + + +def send_backward(input_tensor_grads, tensor_shapes, config): + if input_tensor_grads is None: + input_tensor_grads = [None] * len(tensor_shapes) + if not isinstance(input_tensor_grads, list): + input_tensor_grads = [input_tensor_grads] + for (input_tensor_grad, tensor_shape) in zip(input_tensor_grads, tensor_shapes): + if tensor_shape is None: + continue + config.pipeline_dtype = tensor_shape['dtype'] + p2p_communication.send_backward(input_tensor_grad, config) + + +def send_backward_wrapper(fn): + @wraps(fn) + def wrapper(*arg, **kwargs): + return send_backward(*arg, **kwargs) + return wrapper + + +def send_forward_recv_backward(output_tensors, tensor_shapes, config): + if not isinstance(output_tensors, list): + output_tensors = [None] * len(tensor_shapes) + output_tensor_grads = [] + for (output_tensor, tensor_shape) in zip(output_tensors, tensor_shapes): + if tensor_shape is None: + output_tensor_grads.append(None) + continue + config.pipeline_dtype = tensor_shape['dtype'] + output_tensor_grad = p2p_communication.send_forward_recv_backward( + output_tensor, tensor_shape['shape'], config + ) + output_tensor_grads.append(output_tensor_grad) + return output_tensor_grads + + +def send_forward_recv_backward_wrapper(fn): + @wraps(fn) + def wrapper(*arg, **kwargs): + return send_forward_recv_backward(*arg, **kwargs) + return wrapper + + +def send_backward_recv_forward(input_tensor_grads, tensor_shapes, config): + if not isinstance(input_tensor_grads, list): + input_tensor_grads = [input_tensor_grads] + input_tensors = [] + for (input_tensor_grad, tensor_shape) in zip(input_tensor_grads, tensor_shapes): + if tensor_shape is None: + input_tensors.append(None) + continue + config.pipeline_dtype = tensor_shape['dtype'] + input_tensor = p2p_communication.send_backward_recv_forward( + input_tensor_grad, tensor_shape['shape'], config + ) + input_tensors.append(input_tensor) + return input_tensors + + +def send_backward_recv_forward_wrapper(fn): + @wraps(fn) + def wrapper(*arg, **kwargs): + return send_backward_recv_forward(*arg, **kwargs) + return wrapper + + +def send_forward_recv_forward(output_tensors, tensor_shapes, recv_prev, config, overlap_p2p_comm=False): + # overlap_p2p_comm + if output_tensors is None: + output_tensors = [None] * len(tensor_shapes) + + if not isinstance(output_tensors, list): + output_tensors = [output_tensors] + input_tensors = [] + all_fwd_wait_handles = [] + + if overlap_p2p_comm: + for (output_tensor, tensor_shape) in zip(output_tensors, tensor_shapes): + if tensor_shape is None: + input_tensors.append(None) + continue + config.pipeline_dtype = tensor_shape['dtype'] + input_tensor, wait_handles = p2p_communication.send_forward_recv_forward( + output_tensor, + recv_prev=recv_prev, + tensor_shape=tensor_shape['shape'], + config=config, + overlap_p2p_comm=overlap_p2p_comm, + ) + input_tensors.append(input_tensor) + all_fwd_wait_handles.extend(wait_handles) + return input_tensors, all_fwd_wait_handles + + else: + for (output_tensor, tensor_shape) in zip(output_tensors, tensor_shapes): + if tensor_shape is None: + input_tensors.append(None) + continue + config.pipeline_dtype = tensor_shape['dtype'] + input_tensor = p2p_communication.send_forward_recv_forward( + output_tensor, + recv_prev=recv_prev, + tensor_shape=tensor_shape['shape'], + config=config, + overlap_p2p_comm=overlap_p2p_comm, + ) + input_tensors.append(input_tensor) + return input_tensors + + +def send_backward_recv_backward(input_tensor_grads, tensor_shapes, recv_next, config, overlap_p2p_comm=False): + if input_tensor_grads is None: + input_tensor_grads = [None] * len(tensor_shapes) + + if not isinstance(input_tensor_grads, list): + input_tensor_grads = [input_tensor_grads] + output_tensor_grads = [] + all_fwd_wait_handles = [] + + if overlap_p2p_comm: + for (input_tensor_grad, tensor_shape) in zip(input_tensor_grads, tensor_shapes): + if tensor_shape is None: + output_tensor_grads.append(None) + continue + config.pipeline_dtype = tensor_shape['dtype'] + output_tensor_grad, bwd_wait_handles = p2p_communication.send_backward_recv_backward( + input_tensor_grad, + recv_next=recv_next, + tensor_shape=tensor_shape['shape'], + config=config, + overlap_p2p_comm=overlap_p2p_comm, + ) + output_tensor_grads.append(output_tensor_grad) + all_fwd_wait_handles.extend(bwd_wait_handles) + return output_tensor_grads, all_fwd_wait_handles + + else: + for (input_tensor_grad, tensor_shape) in zip(input_tensor_grads, tensor_shapes): + if tensor_shape is None: + output_tensor_grads.append(None) + continue + config.pipeline_dtype = tensor_shape['dtype'] + output_tensor_grad = p2p_communication.send_backward_recv_backward( + input_tensor_grad, + recv_next=recv_next, + tensor_shape=tensor_shape['shape'], + config=config, + overlap_p2p_comm=overlap_p2p_comm, + ) + output_tensor_grads.append(output_tensor_grad) + return output_tensor_grads + + +def send_forward_backward_recv_forward_backward(output_tensors, input_tensor_grads, tensor_shapes, recv_prev, recv_next, + config): + if output_tensors is None: + output_tensors = [None] * len(tensor_shapes) + if input_tensor_grads is None: + input_tensor_grads = [None] * len(tensor_shapes) + + if not isinstance(input_tensor_grads, list): + input_tensor_grads = [input_tensor_grads] + input_tensors = [] + output_tensor_grads = [] + + for (output_tensor, input_tensor_grad, tensor_shape) in zip(output_tensors, input_tensor_grads, tensor_shapes): + config.pipeline_dtype = tensor_shape['dtype'] + if tensor_shape is None: + input_tensors.append(None) + output_tensor_grads.append(None) + continue + ( + input_tensor, + output_tensor_grad, + ) = p2p_communication.send_forward_backward_recv_forward_backward( + output_tensor, + input_tensor_grad, + recv_prev=recv_prev, + recv_next=recv_next, + tensor_shape=tensor_shape['shape'], + config=config, + ) + input_tensors.append(input_tensor) + output_tensor_grads.append(output_tensor_grad) + return input_tensors, output_tensor_grads diff --git a/model/train/yoco_moe/mindspeed/core/pipeline_parallel/p2p_communication.py b/model/train/yoco_moe/mindspeed/core/pipeline_parallel/p2p_communication.py new file mode 100644 index 000000000..7a72dc45a --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/pipeline_parallel/p2p_communication.py @@ -0,0 +1,471 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +from typing import List, Optional, Tuple, Union + +import torch + +from megatron.core.parallel_state import ( + get_pipeline_model_parallel_group, + get_pipeline_model_parallel_next_rank, + get_pipeline_model_parallel_prev_rank, + get_pipeline_model_parallel_rank, +) +from megatron.core.pipeline_parallel.p2p_communication import _batched_p2p_ops, _p2p_ops +from megatron.core import ModelParallelConfig +from megatron.training import get_args +from mindspeed.utils import get_actual_seq_len, set_actual_seq_len, get_position_ids, set_position_ids +# Types +Shape = Union[List[int], torch.Size] + + +def _communicate_shapes(tensor_send_next, tensor_send_prev, recv_prev, recv_next, config, tensor_dim: int = 3): + """Communicate tensor shapes between stages. Used to communicate + tensor shapes before the actual tensor communication happens. + This is required when the sequence lengths across micro batches + are not uniform. + + Args: + tensor_send_next: tensor to send to next rank (no tensor sent if + set to None). + tensor_send_prev: tensor to send to prev rank (no tensor sent if + set to None). + recv_prev: boolean for whether tensor should be received from + previous rank. + recv_next: boolean for whether tensor should be received from + next rank. + Returns: + (recv_prev_shape, recv_next_shape) + """ + + recv_prev_shape_tensor = None + recv_next_shape_tensor = None + send_prev_shape_tensor = None + send_next_shape_tensor = None + if recv_prev: + recv_prev_shape_tensor = torch.empty( + (tensor_dim), device=torch.cuda.current_device(), dtype=torch.int64 + ) + if recv_next: + recv_next_shape_tensor = torch.empty( + (tensor_dim), device=torch.cuda.current_device(), dtype=torch.int64 + ) + if tensor_send_prev is not None: + send_prev_shape_tensor = torch.tensor( + tensor_send_prev.size(), device=torch.cuda.current_device(), dtype=torch.int64 + ) + if tensor_send_next is not None: + send_next_shape_tensor = torch.tensor( + tensor_send_next.size(), device=torch.cuda.current_device(), dtype=torch.int64 + ) + + if config.use_ring_exchange_p2p: + torch.distributed.ring_exchange( + tensor_send_prev=send_prev_shape_tensor, + tensor_recv_prev=recv_prev_shape_tensor, + tensor_send_next=send_next_shape_tensor, + tensor_recv_next=recv_next_shape_tensor, + group=get_pipeline_model_parallel_group(), + ) + + # Send tensors in both the forward and backward directions as appropriate. + if config.use_ring_exchange_p2p: + + def _ring_exchange_wrapper(**kwargs): + torch.distributed.ring_exchange(**kwargs) + return [] + + p2p_func = _ring_exchange_wrapper + elif config.batch_p2p_comm: + p2p_func = _batched_p2p_ops + else: + p2p_func = _p2p_ops + + reqs = p2p_func( + tensor_send_prev=send_prev_shape_tensor, + tensor_recv_prev=recv_prev_shape_tensor, + tensor_send_next=send_next_shape_tensor, + tensor_recv_next=recv_next_shape_tensor, + group=get_pipeline_model_parallel_group(), + ) + + if len(reqs) > 0: + for req in reqs: + req.wait() + reqs = None + + if config.batch_p2p_comm and config.batch_p2p_sync: + # To protect against race condition when using batch_isend_irecv(). + # User should assert that we have a modern enough PyTorch to not need this + torch.cuda.synchronize() + + recv_prev_shape = [0, 0, 0] + if recv_prev_shape_tensor is not None: + recv_prev_shape = recv_prev_shape_tensor.tolist() + + recv_next_shape = [0, 0, 0] + if recv_next_shape_tensor is not None: + recv_next_shape = recv_next_shape_tensor.tolist() + + return recv_prev_shape, recv_next_shape + + +def _communicate( + *, + tensor_send_next: Optional[torch.Tensor], + tensor_send_prev: Optional[torch.Tensor], + recv_prev: bool, + recv_next: bool, + tensor_shape: Shape, + config: ModelParallelConfig, + wait_on_reqs: bool = True +) -> Tuple[torch.Tensor, torch.Tensor]: + """Communicate tensors between stages. Used as helper method in other + communication methods that are used in megatron/schedules.py. + + Args: + tensor_send_next (torch.Tensor, optional): + Tensor to send to next rank (no tensor sent if None) + + tensor_send_prev (torch.Tensor, optional): + Tensor to send to prev rank (no tensor sent if None) + + recv_prev (boolean, required): + whether tensor should be received from previous rank. + + recv_next (boolean, required): + whether tensor should be received from next rank. + + tensor_shape (List[int] or torch.Size, required): + shape of tensor to receive (this method assumes that all + tensors sent and received in a single function call are + the same shape). + + wait_on_reqs (boolean, optional, default=False): + For non-batched p2p communication, wait on each request + before returning. + + Returns: + tuple containing + + - tensor_recv_prev: torch.Tensor if recv_prev is True, None otherwise. + - tensor_recv_next: torch.Tensor if recv_next is True, None otherwise. + + """ + + # Create placeholder tensors for receive in forward and backward directions + # if needed. + tensor_recv_prev = None + tensor_recv_next = None + + if not config.variable_seq_lengths: + recv_prev_shape = tensor_shape + recv_next_shape = tensor_shape + else: + tensor_dim = len(tensor_shape) if tensor_shape is not None else 3 + recv_prev_shape, recv_next_shape = _communicate_shapes( + tensor_send_next, tensor_send_prev, recv_prev, recv_next, config, tensor_dim, + ) + + if recv_prev: + if config.pipeline_dtype is None: + raise RuntimeError("pipeline_dtype must be provided if recv_prev is True") + if tensor_shape is None: + raise RuntimeError( + "tensor_shape must be specified if recv_prev is True. " + "Common tensor_shape is (seq_length, micro_batch_size, hidden_size)" + ) + tensor_recv_prev = torch.empty( + recv_prev_shape, + requires_grad=True, + device=torch.cuda.current_device(), + dtype=config.pipeline_dtype, + ) + if recv_next: + if config.pipeline_dtype is None: + raise RuntimeError("dtype must be provided if recv_next is True") + if tensor_shape is None: + raise RuntimeError( + "tensor_shape must be specified if recv_next is True. " + "Common tensor_shape is (seq_length, micro_batch_size, hidden_size)" + ) + tensor_recv_next = torch.empty( + recv_next_shape, + requires_grad=True, + device=torch.cuda.current_device(), + dtype=config.pipeline_dtype, + ) + + # Send tensors in both the forward and backward directions as appropriate. + if config.use_ring_exchange_p2p: + + def _ring_exchange_wrapper(**kwargs): + torch.distributed.ring_exchange(**kwargs) + return [] + + p2p_func = _ring_exchange_wrapper + elif config.batch_p2p_comm: + assert wait_on_reqs + p2p_func = _batched_p2p_ops + else: + p2p_func = _p2p_ops + + reqs = p2p_func( + tensor_send_prev=tensor_send_prev, + tensor_recv_prev=tensor_recv_prev, + tensor_send_next=tensor_send_next, + tensor_recv_next=tensor_recv_next, + group=get_pipeline_model_parallel_group(), + ) + + if wait_on_reqs and len(reqs) > 0: + for req in reqs: + req.wait() + reqs = None + + if config.batch_p2p_comm and config.batch_p2p_sync: + # To protect against race condition when using batch_isend_irecv(). + # User should assert that we have a modern enough PyTorch to not need this + torch.cuda.synchronize() + + return tensor_recv_prev, tensor_recv_next, reqs + + +def _p2p_ops_eod( + *, + tensor_send_prev: Optional[torch.Tensor], + tensor_recv_prev: Optional[torch.Tensor], + tensor_send_next: Optional[torch.Tensor], + tensor_recv_next: Optional[torch.Tensor], + group: torch.distributed.ProcessGroup, +): + reqs = [] + rank = get_pipeline_model_parallel_rank() + prev_actual_seq_len = get_actual_seq_len() + prev_position_ids = get_position_ids() + + tensor_length = None + length_buffer = None + args = get_args() + bsz = args.micro_batch_size + block_size = args.seq_length // args.context_parallel_size + + if tensor_send_next is not None: + tensor_length = torch.tensor(prev_actual_seq_len.numel()).npu() + + if tensor_recv_prev is not None: + length_buffer = torch.empty((), dtype=torch.int64, device=torch.cuda.current_device()) + + if rank % 2 == 0: + if tensor_length is not None: + send_next_req = torch.distributed.isend( + tensor=tensor_length, dst=get_pipeline_model_parallel_next_rank(), group=group, + ) + reqs.append(send_next_req) + + if length_buffer is not None: + recv_prev_req = torch.distributed.irecv( + tensor=length_buffer, src=get_pipeline_model_parallel_prev_rank(), group=group, + ) + reqs.append(recv_prev_req) + else: + if length_buffer is not None: + recv_prev_req = torch.distributed.irecv( + tensor=length_buffer, src=get_pipeline_model_parallel_prev_rank(), group=group, + ) + reqs.append(recv_prev_req) + + if tensor_length is not None: + send_next_req = torch.distributed.isend( + tensor=tensor_length, dst=get_pipeline_model_parallel_next_rank(), group=group, + ) + reqs.append(send_next_req) + + for req in reqs: + req.wait() + + reqs = [] + + if get_pipeline_model_parallel_rank() % 2 == 0: + if tensor_send_next is not None: + req = torch.distributed.isend( + tensor=prev_actual_seq_len, dst=get_pipeline_model_parallel_next_rank(), group=get_pipeline_model_parallel_group(), + ) + reqs.append(req) + + req = torch.distributed.isend( + tensor=prev_position_ids, dst=get_pipeline_model_parallel_next_rank(), group=get_pipeline_model_parallel_group(), + ) + reqs.append(req) + + send_next_req = torch.distributed.isend( + tensor=tensor_send_next, dst=get_pipeline_model_parallel_next_rank(), group=group, + ) + reqs.append(send_next_req) + + if tensor_recv_prev is not None: + actual_seq_len_buffer = torch.empty([length_buffer.item()], dtype=torch.int64, device=torch.cuda.current_device()) + + req = torch.distributed.irecv( + tensor=actual_seq_len_buffer, src=get_pipeline_model_parallel_prev_rank(), group=group, + ) + reqs.append(req) + set_actual_seq_len(actual_seq_len_buffer) + + position_ids_buffer = torch.empty((block_size, bsz), dtype=torch.int64, device=torch.cuda.current_device()) + req = torch.distributed.irecv( + tensor=position_ids_buffer, src=get_pipeline_model_parallel_prev_rank(), group=group, + ) + set_position_ids(position_ids_buffer) + reqs.append(req) + + recv_prev_req = torch.distributed.irecv( + tensor=tensor_recv_prev, src=get_pipeline_model_parallel_prev_rank(), group=group, + ) + reqs.append(recv_prev_req) + + if tensor_send_prev is not None: + send_prev_req = torch.distributed.isend( + tensor=tensor_send_prev, dst=get_pipeline_model_parallel_prev_rank(), group=group, + ) + reqs.append(send_prev_req) + + if tensor_recv_next is not None: + recv_next_req = torch.distributed.irecv( + tensor=tensor_recv_next, src=get_pipeline_model_parallel_next_rank(), group=group, + ) + reqs.append(recv_next_req) + + else: + if tensor_recv_prev is not None: + actual_seq_len_buffer = torch.empty([length_buffer.item()], dtype=torch.int64, device=torch.cuda.current_device()) + + req = torch.distributed.irecv( + tensor=actual_seq_len_buffer, src=get_pipeline_model_parallel_prev_rank(), group=group, + ) + reqs.append(req) + set_actual_seq_len(actual_seq_len_buffer) + + position_ids_buffer = torch.empty((block_size, bsz), dtype=torch.int64, device=torch.cuda.current_device()) + req = torch.distributed.irecv( + tensor=position_ids_buffer, src=get_pipeline_model_parallel_prev_rank(), group=group, + ) + set_position_ids(position_ids_buffer) + reqs.append(req) + + recv_prev_req = torch.distributed.irecv( + tensor=tensor_recv_prev, src=get_pipeline_model_parallel_prev_rank(), group=group, + ) + reqs.append(recv_prev_req) + + if tensor_send_next is not None: + req = torch.distributed.isend( + tensor=prev_actual_seq_len, dst=get_pipeline_model_parallel_next_rank(), group=get_pipeline_model_parallel_group(), + ) + reqs.append(req) + + req = torch.distributed.isend( + tensor=prev_position_ids, dst=get_pipeline_model_parallel_next_rank(), group=get_pipeline_model_parallel_group(), + ) + reqs.append(req) + + send_next_req = torch.distributed.isend( + tensor=tensor_send_next, dst=get_pipeline_model_parallel_next_rank(), group=group, + ) + reqs.append(send_next_req) + + if tensor_recv_next is not None: + recv_next_req = torch.distributed.irecv( + tensor=tensor_recv_next, src=get_pipeline_model_parallel_next_rank(), group=group, + ) + reqs.append(recv_next_req) + + if tensor_send_prev is not None: + send_prev_req = torch.distributed.isend( + tensor=tensor_send_prev, dst=get_pipeline_model_parallel_prev_rank(), group=group, + ) + reqs.append(send_prev_req) + return reqs + + +def _p2p_ops_send_recv_overlap( + *, + tensor_send_prev: Optional[torch.Tensor], + tensor_recv_prev: Optional[torch.Tensor], + tensor_send_next: Optional[torch.Tensor], + tensor_recv_next: Optional[torch.Tensor], + group: torch.distributed.ProcessGroup +): + ops = [] + if get_pipeline_model_parallel_rank() % 2 == 0: + if tensor_send_prev is not None: + send_prev_op = torch.distributed.P2POp( + torch.distributed.isend, + tensor_send_prev, + get_pipeline_model_parallel_prev_rank(), + group, + ) + ops.append(send_prev_op) + if tensor_recv_prev is not None: + recv_prev_op = torch.distributed.P2POp( + torch.distributed.irecv, + tensor_recv_prev, + get_pipeline_model_parallel_prev_rank(), + group, + ) + ops.append(recv_prev_op) + if tensor_send_next is not None: + send_next_op = torch.distributed.P2POp( + torch.distributed.isend, + tensor_send_next, + get_pipeline_model_parallel_next_rank(), + group, + ) + ops.append(send_next_op) + if tensor_recv_next is not None: + recv_next_op = torch.distributed.P2POp( + torch.distributed.irecv, + tensor_recv_next, + get_pipeline_model_parallel_next_rank(), + group, + ) + ops.append(recv_next_op) + else: + if tensor_recv_next is not None: + recv_next_op = torch.distributed.P2POp( + torch.distributed.irecv, + tensor_recv_next, + get_pipeline_model_parallel_next_rank(), + group, + ) + ops.append(recv_next_op) + if tensor_send_next is not None: + send_next_op = torch.distributed.P2POp( + torch.distributed.isend, + tensor_send_next, + get_pipeline_model_parallel_next_rank(), + group, + ) + ops.append(send_next_op) + if tensor_recv_prev is not None: + recv_prev_op = torch.distributed.P2POp( + torch.distributed.irecv, + tensor_recv_prev, + get_pipeline_model_parallel_prev_rank(), + group, + ) + ops.append(recv_prev_op) + if tensor_send_prev is not None: + send_prev_op = torch.distributed.P2POp( + torch.distributed.isend, + tensor_send_prev, + get_pipeline_model_parallel_prev_rank(), + group, + ) + ops.append(send_prev_op) + + if len(ops) > 0: + reqs = torch.distributed.batch_isend_irecv(ops) + else: + reqs = [] + return reqs diff --git a/model/train/yoco_moe/mindspeed/core/pipeline_parallel/ripipe_schedules.py b/model/train/yoco_moe/mindspeed/core/pipeline_parallel/ripipe_schedules.py new file mode 100644 index 000000000..c0c8a27f7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/pipeline_parallel/ripipe_schedules.py @@ -0,0 +1,784 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import collections +from typing import Iterator, List, Union +import contextlib + +import torch +from megatron.training import get_args +from megatron.core import parallel_state +from megatron.core.pipeline_parallel.schedules import deallocate_output_tensor, forward_step, backward_step, \ + check_first_val_step +from megatron.core.pipeline_parallel import p2p_communication +from megatron.core.utils import get_model_config, get_model_type +from megatron.core.enums import ModelType + +from mindspeed.core.tensor_parallel.checkpoint_manager import get_pipeline_checkpoint_manager +from mindspeed.core.weight_grad_store import WeightGradStore + + +def forward_backward_ripipe_pipelining( + *, + forward_step_func, + data_iterator: Union[Iterator, List[Iterator]], + model: Union[torch.nn.Module, List[torch.nn.Module]], + num_microbatches: int, + seq_length: int, + micro_batch_size: int, + decoder_seq_length: int = None, + forward_only: bool = False, + collect_non_loss_data: bool = False, + first_val_step: bool = None, +): + """Almost directly copied from megatron's forward_backward_pipelining_with_interleaving + function, all modifications are annotated with 'ripipe related' or 'nanopipe related' """ + # ripipe related, setup checkpoint manager. + pipeline_checkpoint_manager = get_pipeline_checkpoint_manager( + num_of_chunks=parallel_state.get_virtual_pipeline_model_parallel_world_size()) + args = get_args() + if args.recompute_in_bubble or args.recompute_in_advance: + pipeline_checkpoint_manager.open_ri_pipe = True + pipeline_checkpoint_manager.do_pre_recompute = True + + + """Run interleaved 1F1B schedule (model split into model chunks), with + communication between pipeline stages as needed. + + Returns dictionary with losses if the last stage, empty dict otherwise.""" + assert isinstance(model, list), "interleaved pipeline parallelism expected model chunking" + assert all(isinstance(chunk, torch.nn.Module) for chunk in model), "invalid model chunking" + assert isinstance( + data_iterator, list + ), "interleaved pipeline parallelism expected each model chunk to have a data iterator" + + config = get_model_config(model[0]) + if config.overlap_p2p_comm and config.batch_p2p_comm: + raise ValueError("Can not use both overlap_p2p_comm and batch_p2p_comm") + + if config.timers is not None: + config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time) + + # Disable async grad reductions + no_sync_func = config.no_sync_func + if isinstance(no_sync_func, list): + + def multi_no_sync(): + stack = contextlib.ExitStack() + for model_chunk_no_sync_func in config.no_sync_func: + stack.enter_context(model_chunk_no_sync_func()) + return stack + + no_sync_func = multi_no_sync + if no_sync_func is None: + no_sync_func = contextlib.nullcontext + no_sync_context = None + + if config.grad_sync_func is not None and not isinstance(config.grad_sync_func, list): + config.grad_sync_func = [config.grad_sync_func for _ in model] + + if config.param_sync_func is not None and not isinstance(config.param_sync_func, list): + config.param_sync_func = [config.param_sync_func for _ in model] + + def disable_grad_sync(): + """Disable asynchronous grad reductions""" + nonlocal no_sync_context + if no_sync_context is None: + no_sync_context = no_sync_func() + no_sync_context.__enter__() + + def enable_grad_sync(): + """Enable asynchronous grad reductions""" + nonlocal no_sync_context + if no_sync_context is not None: + no_sync_context.__exit__(None, None, None) + no_sync_context = None + + disable_grad_sync() + + # Model chunk IDs with synchronized grads + synchronized_model_chunks = set() + + input_tensors = [[] for _ in range(len(model))] + output_tensors = [[] for _ in range(len(model))] + forward_data_store = [] + if not forward_only: + output_tensor_grads = [[] for _ in range(len(model))] + + pipeline_parallel_size = parallel_state.get_pipeline_model_parallel_world_size() + pipeline_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + + if num_microbatches % pipeline_parallel_size != 0: + msg = f'number of microbatches ({num_microbatches}) is not divisible by ' + msg += f'pipeline-model-parallel-size ({pipeline_parallel_size}) ' + msg += 'when using interleaved schedule' + raise RuntimeError(msg) + + model_type = get_model_type(model[0]) + if model_type == ModelType.encoder_and_decoder: + raise RuntimeError("Interleaving is not supported with an encoder and decoder model.") + + if decoder_seq_length is not None and decoder_seq_length != seq_length: + raise RuntimeError( + "Interleaving is not supported with a different decoder sequence length." + ) + + tensor_shape = [seq_length, micro_batch_size, config.hidden_size] + tensor_shape[0] = tensor_shape[0] // parallel_state.get_context_parallel_world_size() + if config.sequence_parallel: + tensor_shape[0] = tensor_shape[0] // parallel_state.get_tensor_model_parallel_world_size() + tensor_shape[0] = tensor_shape[0] // args.tp_x + tensor_shape[-1] = tensor_shape[-1] // args.tp_y + # Compute number of warmup and remaining microbatches. + num_model_chunks = len(model) + total_num_microbatches = num_microbatches * num_model_chunks + all_warmup_microbatches = False + if forward_only: + num_warmup_microbatches = total_num_microbatches + else: + # ripipe related, no special handling of 'num_warmup_microbatches' when 'num_microbatches == pipeline_parallel_size' + num_warmup_microbatches = (pipeline_parallel_size - pipeline_parallel_rank - 1) * 2 + num_warmup_microbatches += (num_model_chunks - 1) * pipeline_parallel_size + num_warmup_microbatches = min(num_warmup_microbatches, total_num_microbatches) + num_microbatches_remaining = total_num_microbatches - num_warmup_microbatches + + num_fwd = min((pipeline_parallel_size - 1) * 2 + (num_model_chunks - 1) * pipeline_parallel_size, total_num_microbatches) + num_dx = num_fwd - num_warmup_microbatches + overlap_chunks_num = (num_dx + pipeline_parallel_size - 1) // pipeline_parallel_size + nano_flag = [True] * len(model) + for i in range(overlap_chunks_num): + nano_flag[-i - 1] = False + # ripipe related, calculate the variables needed by the recompute_in_bubble function + num_microbatches_recompute, num_microbatches_recompute_forward, num_microbatches_recompute_steady_groups, \ + num_microbatches_recompute_tail = get_ripipe_recompute_count_params(num_microbatches, + num_model_chunks, + num_warmup_microbatches) + + # Checkpoint the activations of partial Transformer layers in a number of micro-batches + # within the maximum outstanding micro-batch backpropagations. + # Micro-batches with the ids less than 'num_microbatches_with_partial_activation_checkpoints' + # checkpoint partial Transformer layers (or skip checkpointing) and + # the rest of micro-batches within a window of micro-batches checkpoint + # all Transformer layers. The window of micro-batches is set by the maximum + # outstanding backpropagations and becomes smaller at later pipeline stages. + # Please refer the appendix C in https://arxiv.org/pdf/2205.05198.pdf + max_outstanding_backprops = None + if config.num_microbatches_with_partial_activation_checkpoints is not None: + max_outstanding_backprops = num_warmup_microbatches + 1 + + # Synchronize params for first two model chunks + if config.param_sync_func is not None: + config.param_sync_func[0](model[0].parameters()) + config.param_sync_func[1](model[1].parameters()) + + def get_chunk_batch_id(microbatch_id, forward): + """ripipe related, needed by recompute_in_bubble function.""" + microbatch_id_in_group = microbatch_id % (pipeline_parallel_size * num_model_chunks) + model_chunk_id = microbatch_id_in_group // pipeline_parallel_size + if not forward: + model_chunk_id = num_model_chunks - model_chunk_id - 1 + group_id = microbatch_id // (pipeline_parallel_size * num_model_chunks) + intra_chunk_batch_id = (microbatch_id_in_group % pipeline_parallel_size) + return group_id, intra_chunk_batch_id, model_chunk_id + + def should_recompute(fk): + """ripipe related, needed by recompute_in_bubble function, used to determine + whether a mircobatch needs to be recomputed in the 1f1b stage.""" + gid, intro_group_bid, chunk_id = get_chunk_batch_id(fk, forward=True) + if chunk_id == 0: + if gid < 2: + return False + elif gid < 2 + num_microbatches_recompute_steady_groups: + if intro_group_bid >= (1 + 2 * pipeline_parallel_rank): + return True + else: + if intro_group_bid >= pipeline_parallel_size - num_microbatches_recompute_tail: + return True + return False + + def get_model_chunk_id(microbatch_id, forward): + """Helper method to get the model chunk ID given the iteration number.""" + microbatch_id_in_group = microbatch_id % (pipeline_parallel_size * num_model_chunks) + model_chunk_id = microbatch_id_in_group // pipeline_parallel_size + if not forward: + model_chunk_id = num_model_chunks - model_chunk_id - 1 + return model_chunk_id + + def is_first_microbatch_for_model_chunk(microbatch_id: int) -> bool: + """Check if an iteration is the first for a model chunk.""" + microbatch_group_size = pipeline_parallel_size * num_model_chunks + num_microbatch_groups = total_num_microbatches // microbatch_group_size + microbatch_group_id = microbatch_id // microbatch_group_size + microbatch_id_in_group = microbatch_id % microbatch_group_size + if microbatch_group_id == 0: + return microbatch_id_in_group % pipeline_parallel_size == 0 + else: + return False + + def is_last_microbatch_for_model_chunk(microbatch_id: int) -> bool: + """Check if an iteration is the last for a model chunk.""" + microbatch_group_size = pipeline_parallel_size * num_model_chunks + num_microbatch_groups = total_num_microbatches // microbatch_group_size + microbatch_group_id = microbatch_id // microbatch_group_size + microbatch_id_in_group = microbatch_id % microbatch_group_size + if microbatch_group_id == num_microbatch_groups - 1: + return microbatch_id_in_group % pipeline_parallel_size == pipeline_parallel_size - 1 + else: + return False + + def forward_step_helper(microbatch_id, checkpoint_activations_microbatch): + """Helper method to run forward step with model split into chunks + (run set_virtual_pipeline_model_parallel_rank() before calling + forward_step()).""" + model_chunk_id = get_model_chunk_id(microbatch_id, forward=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(model_chunk_id) + + # launch param synchronization for next model chunk + # Note: Asynchronous communication tends to slow down compute. + # To reduce idling from mismatched microbatch times, we launch + # asynchronous communication at the same time across the + # pipeline-parallel group. + if config.param_sync_func is not None: + param_sync_microbatch_id = microbatch_id + pipeline_parallel_rank + if ( + param_sync_microbatch_id < total_num_microbatches + and is_first_microbatch_for_model_chunk(param_sync_microbatch_id) + ): + param_sync_chunk_id = get_model_chunk_id(param_sync_microbatch_id, forward=True) + 1 + if 1 < param_sync_chunk_id < num_model_chunks: + config.param_sync_func[param_sync_chunk_id]( + model[param_sync_chunk_id].parameters() + ) + + # forward step + if parallel_state.is_pipeline_first_stage(): + if len(input_tensors[model_chunk_id]) == len(output_tensors[model_chunk_id]): + input_tensors[model_chunk_id].append(None) + input_tensor = input_tensors[model_chunk_id][-1] + output_tensor, _ = forward_step( + forward_step_func, + data_iterator[model_chunk_id], + model[model_chunk_id], + num_microbatches, + input_tensor, + forward_data_store, + config, + collect_non_loss_data, + checkpoint_activations_microbatch, + check_first_val_step( + first_val_step, forward_only, is_first_microbatch_for_model_chunk(microbatch_id), + ), + ) + output_tensors[model_chunk_id].append(output_tensor) + + # if forward-only, no need to save tensors for a backward pass + if forward_only: + input_tensors[model_chunk_id].pop() + output_tensors[model_chunk_id].pop() + + # ripipe related, when a microbatch finish its forward pass, save needed recomputation + # functions for this microbatch. + if args.recompute_in_bubble or args.recompute_in_advance: + pipeline_checkpoint_manager.batch_fin(model_chunk_id) + + return output_tensor + + def backward_step_helper(microbatch_id): + """Helper method to run backward step with model split into chunks + (run set_virtual_pipeline_model_parallel_rank() before calling + backward_step()).""" + model_chunk_id = get_model_chunk_id(microbatch_id, forward=False) + parallel_state.set_virtual_pipeline_model_parallel_rank(model_chunk_id) + + # launch grad synchronization (default) + if config.grad_sync_func is None and is_last_microbatch_for_model_chunk(microbatch_id) and nano_flag[model_chunk_id]: + enable_grad_sync() + synchronized_model_chunks.add(model_chunk_id) + + if parallel_state.is_pipeline_last_stage(): + if len(output_tensor_grads[model_chunk_id]) == 0: + output_tensor_grads[model_chunk_id].append(None) + input_tensor = input_tensors[model_chunk_id].pop(0) + output_tensor = output_tensors[model_chunk_id].pop(0) + output_tensor_grad = output_tensor_grads[model_chunk_id].pop(0) + input_tensor_grad = backward_step( + input_tensor, output_tensor, output_tensor_grad, model_type, config + ) + + # launch grad synchronization (custom grad sync) + # Note: Asynchronous communication tends to slow down compute. + # To reduce idling from mismatched microbatch times, we launch + # asynchronous communication at the same time across the + # pipeline-parallel group. + if config.grad_sync_func is not None: + grad_sync_microbatch_id = microbatch_id - pipeline_parallel_rank + if grad_sync_microbatch_id >= 0 and is_last_microbatch_for_model_chunk( + grad_sync_microbatch_id + ): + grad_sync_chunk_id = get_model_chunk_id(grad_sync_microbatch_id, forward=False) + if nano_flag[grad_sync_chunk_id]: + enable_grad_sync() + config.grad_sync_func[grad_sync_chunk_id](model[grad_sync_chunk_id].parameters()) + synchronized_model_chunks.add(grad_sync_chunk_id) + disable_grad_sync() + + return input_tensor_grad + + # Run warmup forward passes. + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + input_tensors[0].append(p2p_communication.recv_forward(tensor_shape, config)) + + fwd_wait_handles = None + bwd_wait_handles = None + + for k in range(num_warmup_microbatches): + + if fwd_wait_handles is not None: + for req in fwd_wait_handles: + req.wait() + + # Decide to checkpoint all layers' activations of the current micro-batch + if max_outstanding_backprops is not None: + checkpoint_activations_microbatch = ( + k % max_outstanding_backprops + >= config.num_microbatches_with_partial_activation_checkpoints + ) + else: + checkpoint_activations_microbatch = None + + # ripipe related, when use recompute_in_bubble function, do not do recompute + # for the first pp * vp microbatches. + if args.recompute_in_bubble: + if k < pipeline_parallel_size * num_model_chunks: + pipeline_checkpoint_manager.disable_recompute() + else: + num_microbatches_recompute_forward -= 1 + output_tensor = forward_step_helper(k, checkpoint_activations_microbatch) + if args.recompute_in_bubble or args.recompute_in_advance: + pipeline_checkpoint_manager.enable_recompute() + + # Determine if tensor should be received from previous stage. + next_forward_model_chunk_id = get_model_chunk_id(k + 1, forward=True) + recv_prev = True + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + if next_forward_model_chunk_id == 0: + recv_prev = False + if k == (total_num_microbatches - 1): + recv_prev = False + + # Don't send tensor downstream if on last stage. + if parallel_state.is_pipeline_last_stage(): + output_tensor = None + + # Send and receive tensors as appropriate (send tensors computed + # in this iteration; receive tensors for next iteration). + if not config.overlap_p2p_comm: + if ( + k == (num_warmup_microbatches - 1) + and not forward_only + and not all_warmup_microbatches + ): + input_tensor_grad = None + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + recv_next = False + ( + input_tensor, + output_tensor_grad, + ) = p2p_communication.send_forward_backward_recv_forward_backward( + output_tensor, + input_tensor_grad, + recv_prev=recv_prev, + recv_next=recv_next, + tensor_shape=tensor_shape, + config=config, + ) + output_tensor_grads[num_model_chunks - 1].append(output_tensor_grad) + else: + input_tensor = p2p_communication.send_forward_recv_forward( + output_tensor, recv_prev=recv_prev, tensor_shape=tensor_shape, config=config + ) + input_tensors[next_forward_model_chunk_id].append(input_tensor) + else: + input_tensor, fwd_wait_handles = p2p_communication.send_forward_recv_forward( + output_tensor, + recv_prev=recv_prev, + tensor_shape=tensor_shape, + config=config, + overlap_p2p_comm=True, + ) + + if ( + k == (num_warmup_microbatches - 1) + and not forward_only + and not all_warmup_microbatches + ): + input_tensor_grad = None + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + recv_next = False + + ( + output_tensor_grad, + bwd_wait_handles, + ) = p2p_communication.send_backward_recv_backward( + input_tensor_grad, + recv_next=recv_next, + tensor_shape=tensor_shape, + config=config, + overlap_p2p_comm=True, + ) + + output_tensor_grads[num_model_chunks - 1].append(output_tensor_grad) + input_tensors[next_forward_model_chunk_id].append(input_tensor) + + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + # Run 1F1B in steady state. + for k in range(num_microbatches_remaining): + # Forward pass. + forward_k = k + num_warmup_microbatches + + # Decide to checkpoint all layers' activations of the current micro-batch + if max_outstanding_backprops is not None: + checkpoint_activations_microbatch = ( + forward_k % max_outstanding_backprops + >= config.num_microbatches_with_partial_activation_checkpoints + ) + else: + checkpoint_activations_microbatch = None + + if config.overlap_p2p_comm: + if fwd_wait_handles is not None: + for req in fwd_wait_handles: + req.wait() + + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + # ripipe related, determine whether this microbatch should be recomputed + # when using recompute_in_bubble function. + if args.recompute_in_bubble: + if num_microbatches_recompute_forward > 0: + num_microbatches_recompute_forward -= 1 + elif num_microbatches_recompute > 0 and should_recompute(forward_k): + pass + else: + pipeline_checkpoint_manager.disable_recompute() + output_tensor = forward_step_helper(forward_k, checkpoint_activations_microbatch) + if args.recompute_in_bubble or args.recompute_in_advance: + pipeline_checkpoint_manager.enable_recompute() + # Determine if current stage has anything to send in either direction, + # otherwise set tensor to None. + forward_model_chunk_id = get_model_chunk_id(forward_k, forward=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(forward_model_chunk_id) + + # Last virtual stage no activation tensor to send + if parallel_state.is_pipeline_last_stage(): + output_tensor = None + + # Determine if peers are sending, and where in data structure to put + # received tensors. + recv_prev = True + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + # First stage is ahead of last stage by (pipeline_parallel_size - 1). + next_forward_model_chunk_id = get_model_chunk_id( + forward_k - (pipeline_parallel_size - 1), forward=True + ) + if next_forward_model_chunk_id == (num_model_chunks - 1): + recv_prev = False + next_forward_model_chunk_id += 1 + else: + next_forward_model_chunk_id = get_model_chunk_id(forward_k + 1, forward=True) + + # If last iteration, don't receive; we already received one extra + # before the start of the for loop. + if k == (num_microbatches_remaining - 1): + recv_prev = False + + # Send activation tensor to the next stage and receive activation tensor from the + # previous stage + input_tensor, fwd_wait_handles = p2p_communication.send_forward_recv_forward( + output_tensor, + recv_prev=recv_prev, + tensor_shape=tensor_shape, + config=config, + overlap_p2p_comm=True, + ) + # assert fwd_wait_handles is not None + + # ripipe related, actually do the recomputation. + if args.recompute_in_advance or args.recompute_in_bubble: + vpp_rank = get_model_chunk_id(k, forward=False) + parallel_state.set_virtual_pipeline_model_parallel_rank(vpp_rank) + if not parallel_state.is_pipeline_last_stage() or args.recompute_in_bubble: + pipeline_checkpoint_manager.recompute_next(vpp_rank) + + if bwd_wait_handles is not None: + for req in bwd_wait_handles: + req.wait() + + # Backward pass. + backward_k = k + if k < num_dx and args.use_nanopipe: + WeightGradStore.start_decouple() + + if args.use_nanopipe: + WeightGradStore.resize_ori_storage(args.use_nanopipe_swap) + + input_tensor_grad = backward_step_helper(backward_k) + if args.use_nanopipe: + if WeightGradStore.is_decoupleBlock: + WeightGradStore.flush() + if k == num_dx - 1: + WeightGradStore.end_decouple() + + backward_model_chunk_id = get_model_chunk_id(backward_k, forward=False) + parallel_state.set_virtual_pipeline_model_parallel_rank(backward_model_chunk_id) + + # First virtual stage no activation gradient tensor to send + if parallel_state.is_pipeline_first_stage(): + input_tensor_grad = None + + # Determine if the current virtual stage has an activation gradient tensor to receive + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + # Last stage is ahead of first stage by (pipeline_parallel_size - 1). + next_backward_model_chunk_id = get_model_chunk_id( + backward_k - (pipeline_parallel_size - 1), forward=False + ) + if next_backward_model_chunk_id == 0: + recv_next = False + next_backward_model_chunk_id -= 1 + else: + next_backward_model_chunk_id = get_model_chunk_id(backward_k + 1, forward=False) + + output_tensor_grad, bwd_wait_handles = p2p_communication.send_backward_recv_backward( + input_tensor_grad, + recv_next=recv_next, + tensor_shape=tensor_shape, + config=config, + overlap_p2p_comm=True, + ) + + else: # no p2p overlap + output_tensor = forward_step_helper(forward_k, checkpoint_activations_microbatch) + + # Backward pass. + backward_k = k + if k < num_dx and args.use_nanopipe: + WeightGradStore.start_decouple() + + if args.use_nanopipe: + WeightGradStore.resize_ori_storage(args.use_nanopipe_swap) + + input_tensor_grad = backward_step_helper(backward_k) + if k == num_dx - 1 and args.use_nanopipe: + WeightGradStore.end_decouple() + + # Send output_tensor and input_tensor_grad, receive input_tensor + # and output_tensor_grad. + + # Determine if current stage has anything to send in either direction, + # otherwise set tensor to None. + forward_model_chunk_id = get_model_chunk_id(forward_k, forward=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(forward_model_chunk_id) + if parallel_state.is_pipeline_last_stage(): + output_tensor = None + + backward_model_chunk_id = get_model_chunk_id(backward_k, forward=False) + parallel_state.set_virtual_pipeline_model_parallel_rank(backward_model_chunk_id) + if parallel_state.is_pipeline_first_stage(): + input_tensor_grad = None + + # Determine if peers are sending, and where in data structure to put + # received tensors. + recv_prev = True + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + # First stage is ahead of last stage by (pipeline_parallel_size - 1). + next_forward_model_chunk_id = get_model_chunk_id( + forward_k - (pipeline_parallel_size - 1), forward=True + ) + if next_forward_model_chunk_id == (num_model_chunks - 1): + recv_prev = False + next_forward_model_chunk_id += 1 + else: + next_forward_model_chunk_id = get_model_chunk_id(forward_k + 1, forward=True) + + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + # Last stage is ahead of first stage by (pipeline_parallel_size - 1). + next_backward_model_chunk_id = get_model_chunk_id( + backward_k - (pipeline_parallel_size - 1), forward=False + ) + if next_backward_model_chunk_id == 0: + recv_next = False + next_backward_model_chunk_id -= 1 + else: + next_backward_model_chunk_id = get_model_chunk_id(backward_k + 1, forward=False) + + # If last iteration, don't receive; we already received one extra + # before the start of the for loop. + if k == (num_microbatches_remaining - 1): + recv_prev = False + + # Communicate tensors. + ( + input_tensor, + output_tensor_grad, + ) = p2p_communication.send_forward_backward_recv_forward_backward( + output_tensor, + input_tensor_grad, + recv_prev=recv_prev, + recv_next=recv_next, + tensor_shape=tensor_shape, + config=config, + ) + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + # Put input_tensor and output_tensor_grad in data structures in the + # right location. + if recv_prev: + input_tensors[next_forward_model_chunk_id].append(input_tensor) + if recv_next: + output_tensor_grads[next_backward_model_chunk_id].append(output_tensor_grad) + + deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs) + + # Run cooldown backward passes (flush out pipeline). + if not forward_only: + # ripipe related, actually do the recomputation. + if args.recompute_in_advance: + vpp_rank = get_model_chunk_id(num_microbatches_remaining, forward=False) + parallel_state.set_virtual_pipeline_model_parallel_rank(vpp_rank) + if not parallel_state.is_pipeline_last_stage(): + pipeline_checkpoint_manager.recompute_next(vpp_rank) + if args.recompute_in_bubble and num_microbatches_recompute > 0: + old_vpp_rank = parallel_state.get_virtual_pipeline_model_parallel_rank() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + pipeline_checkpoint_manager.recompute_next_force(0) + parallel_state.set_virtual_pipeline_model_parallel_rank(old_vpp_rank) + if config.overlap_p2p_comm and bwd_wait_handles is not None: + for wait_handle in bwd_wait_handles: + wait_handle.wait() + + if all_warmup_microbatches: + output_tensor_grads[num_model_chunks - 1].append( + p2p_communication.recv_backward(tensor_shape, config=config) + ) + + # ripipe related + if args.recompute_in_bubble: + num_microbatches_recompute_forward = 1 + for k in range(num_microbatches_remaining, total_num_microbatches): + input_tensor_grad = backward_step_helper(k) + next_backward_model_chunk_id = get_model_chunk_id(k + 1, forward=False) + recv_next = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + if next_backward_model_chunk_id == (num_model_chunks - 1): + recv_next = False + if k == (total_num_microbatches - 1): + recv_next = False + + # ripipe related, use async communication + out_tensor, bwd_wait_handles = p2p_communication.send_backward_recv_backward( + input_tensor_grad, recv_next=recv_next, tensor_shape=tensor_shape, config=config, overlap_p2p_comm=True + ) + output_tensor_grads[next_backward_model_chunk_id].append( + out_tensor + ) + + if args.use_nanopipe and args.use_nanopipe_swap and k == max(num_microbatches_remaining + 1, (total_num_microbatches + num_microbatches_remaining) // 2): + WeightGradStore.swap_tensors() + + # ripipe related, actually do the recomputation + if args.recompute_in_bubble and num_microbatches_recompute > 0 and \ + num_microbatches_recompute_forward < num_microbatches_recompute: + old_vpp_rank = parallel_state.get_virtual_pipeline_model_parallel_rank() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + pipeline_checkpoint_manager.recompute_next_force(0) + parallel_state.set_virtual_pipeline_model_parallel_rank(old_vpp_rank) + num_microbatches_recompute_forward += 1 + if args.recompute_in_advance and k != (total_num_microbatches - 1): + vpp_rank = get_model_chunk_id(k + 1, forward=False) + parallel_state.set_virtual_pipeline_model_parallel_rank(vpp_rank) + if not parallel_state.is_pipeline_last_stage(): + pipeline_checkpoint_manager.recompute_next(vpp_rank) + # ripipe related, use async communication + if config.overlap_p2p_comm and bwd_wait_handles is not None: + for wait_handle in bwd_wait_handles: + wait_handle.wait() + + # nanopipe related + if args.use_nanopipe: + if nano_flag[0] and 0 not in synchronized_model_chunks: + config.grad_sync_func[0](model[0].parameters()) + synchronized_model_chunks.add(0) + overlap_arg = [pipeline_parallel_size, nano_flag, synchronized_model_chunks, config.grad_sync_func, model] + WeightGradStore.pop(overlap_arg) + + # Launch any remaining grad reductions. + enable_grad_sync() + if config.grad_sync_func is not None: + for model_chunk_id in range(num_model_chunks): + if model_chunk_id not in synchronized_model_chunks: + config.grad_sync_func[model_chunk_id](model[model_chunk_id].parameters()) + synchronized_model_chunks.add(model_chunk_id) + + if config.timers is not None: + config.timers('forward-backward').stop() + + if config.finalize_model_grads_func is not None and not forward_only: + # Finalize model grads (perform full grad all-reduce / reduce-scatter for + # data parallelism, layernorm all-reduce for sequence parallelism, and + # embedding all-reduce for pipeline parallelism). + config.finalize_model_grads_func(model) + + # ripipe related, check all the needed recomputation is done. + if args.recompute_in_bubble or args.recompute_in_advance: + pipeline_checkpoint_manager.iter_fin() + + return forward_data_store + + +def get_ripipe_recompute_count_params(num_microbatches, num_model_chunks, num_warmup_microbatches): + """ripipe related, calculate the variables needed by the recompute_in_bubble function""" + args = get_args() + pipeline_parallel_size = parallel_state.get_pipeline_model_parallel_world_size() + pipeline_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + num_microbatches_recompute_steady_groups = 0 + num_microbatches_recompute_tail = 0 + num_microbatches_recompute = 0 + num_microbatches_recompute_forward = 0 + if args.recompute_in_bubble and num_microbatches // pipeline_parallel_size > 1: + num_microbatches_recompute = num_warmup_microbatches + 1 - num_model_chunks * pipeline_parallel_size + if num_microbatches_recompute < 0: + num_microbatches_recompute = 0 + + num_microbatches_recompute_forward = num_microbatches_recompute + if num_microbatches_recompute > 0 and num_microbatches // pipeline_parallel_size >= 3: + num_microbatches_recompute_steady_groups = (num_microbatches // pipeline_parallel_size) - 3 + num_microbatches_recompute_tail = 2 + 2 * pipeline_parallel_rank + if num_microbatches_recompute_steady_groups == 0: + if num_microbatches_recompute_tail >= pipeline_parallel_size - 1 - 2 * pipeline_parallel_rank: + num_microbatches_recompute_tail = 0 + num_microbatches_recompute_steady_groups = 1 + else: + num_microbatches_recompute_tail = 1 + + params = collections.namedtuple('RecomputeCountParams', + ['num_microbatches_recompute', 'num_microbatches_recompute_forward', + 'num_microbatches_recompute_steady_groups', 'num_microbatches_recompute_tail']) + return params(num_microbatches_recompute, num_microbatches_recompute_forward, + num_microbatches_recompute_steady_groups, num_microbatches_recompute_tail) diff --git a/model/train/yoco_moe/mindspeed/core/pipeline_parallel/schedules.py b/model/train/yoco_moe/mindspeed/core/pipeline_parallel/schedules.py new file mode 100644 index 000000000..e4b9f166c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/pipeline_parallel/schedules.py @@ -0,0 +1,184 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reversed. +# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. +import contextlib +import time +import torch +from functools import wraps +from megatron.core.enums import ModelType +from megatron.training import get_args +from megatron.core import parallel_state +from megatron.core.utils import get_attr_wrapped_model, get_model_config, get_model_type +from megatron.core.transformer.moe.router import MoEAuxLossAutoScaler +from megatron.core.pipeline_parallel.schedules import set_current_microbatch +from mindspeed.core.pipeline_parallel import flexible_schedules +from mindspeed.core.pipeline_parallel.ripipe_schedules import forward_backward_ripipe_pipelining +from mindspeed.core.pipeline_parallel import multiparameter_schedules +from mindspeed.core.auto_parallel.mm_search.help import PROFILE_CONTENT + +LOSS_BACKWARD_SCALE = torch.tensor(1.0) + + +def get_forward_backward_func_wrapper(get_forward_backward_func): + @wraps(get_forward_backward_func) + def wrapper(*args, **kwargs): + arguments = get_args() + if arguments.optimize_send_recv_comm and arguments.num_layers_per_virtual_pipeline_stage is None: + return flexible_schedules.forward_backward_pipelining_without_interleaving + + if arguments.automated_pipeline_perf and arguments.pp_schedule_list: + return flexible_schedules.forward_backward_pipelining_without_interleaving + + if (arguments.recompute_in_bubble or arguments.recompute_in_advance) and torch.is_grad_enabled(): + return forward_backward_ripipe_pipelining + + if parallel_state.get_pipeline_model_parallel_world_size() > 1 \ + and parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None \ + and arguments.use_nanopipe: + return flexible_schedules.forward_backward_pipelining_with_interleaving_nano_pipe + + if arguments.use_multiparameter_pipeline_model_parallel: + pipeline_model_parallel_size = parallel_state.get_pipeline_model_parallel_world_size() + if pipeline_model_parallel_size > 1 \ + and parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None: + return multiparameter_schedules.forward_backward_pipelining_with_interleaving + + return get_forward_backward_func(*args, **kwargs) + return wrapper + + +def forward_step( + forward_step_func, + data_iterator, + model, + num_microbatches, + input_tensor, + forward_data_store, + config, + collect_non_loss_data=False, + checkpoint_activations_microbatch=None, + is_first_microbatch=False, + current_microbatch=None, +): + + """Forward step for passed-in model. + + If first stage, input tensor is obtained from data_iterator, otherwise + passed-in input_tensor is used. + + Returns output tensor.""" + arguments = get_args() + if arguments.auto_parallel_profile: + torch.cuda.synchronize() + start_time = time.time() + torch.npu.reset_max_memory_allocated() + start_memory = torch.npu.memory_allocated() + + if config.timers is not None: + config.timers('forward-compute', log_level=2).start() + + if is_first_microbatch and hasattr(model, 'set_is_first_microbatch'): + model.set_is_first_microbatch() + if current_microbatch is not None: + set_current_microbatch(model, current_microbatch) + + unwrap_output_tensor = False + if not isinstance(input_tensor, list): + input_tensor = [input_tensor] + unwrap_output_tensor = True + + set_input_tensor = get_attr_wrapped_model(model, "set_input_tensor") + set_input_tensor(input_tensor) + + if config.enable_autocast: + context_manager = torch.autocast("cuda", dtype=config.autocast_dtype) + else: + context_manager = contextlib.nullcontext() + with context_manager: + if checkpoint_activations_microbatch is None: + output_tensor, loss_func = forward_step_func(data_iterator, model) + else: + output_tensor, loss_func = forward_step_func( + data_iterator, model, checkpoint_activations_microbatch + ) + + num_tokens = torch.tensor(0, dtype=torch.int) + if parallel_state.is_pipeline_last_stage(): + if not collect_non_loss_data: + outputs = loss_func(output_tensor) + if len(outputs) == 3: + output_tensor, num_tokens, loss_reduced = outputs + if not config.calculate_per_token_loss: + output_tensor /= num_tokens + output_tensor /= num_microbatches + else: + # preserve legacy loss averaging behavior (ie, over the number of microbatches) + assert len(outputs) == 2 + output_tensor, loss_reduced = outputs + output_tensor /= num_microbatches + forward_data_store.append(loss_reduced) + else: + data = loss_func(output_tensor, non_loss_data=True) + forward_data_store.append(data) + + if config.timers is not None: + config.timers('forward-compute').stop() + + # Set the loss scale for the auxiliary loss of the MoE layer. + # Since we use a trick to do backward on the auxiliary loss, we need to set the scale explicitly. + if hasattr(config, 'num_moe_experts') and config.num_moe_experts is not None: + # Calculate the loss scale based on the grad_scale_func if available, else default to 1. + loss_scale = ( + config.grad_scale_func(LOSS_BACKWARD_SCALE) + if config.grad_scale_func is not None + else torch.tensor(1.0) + ) + # Set the loss scale + MoEAuxLossAutoScaler.set_loss_scale(loss_scale / num_microbatches) + + if arguments.auto_parallel_profile: + torch.cuda.synchronize() + PROFILE_CONTENT["fwd_time"].append((time.time() - start_time) * 1000) # time: ms + PROFILE_CONTENT["act_mem"].append((torch.npu.memory_allocated() - start_memory) / 1024 ** 2) # memory: MB + max_mem = torch.npu.max_memory_allocated() + + # If T5 model (or other model with encoder and decoder) + # and in decoder stack, then send encoder_hidden_state + # downstream as well. + model_type = get_model_type(model) + if ( + parallel_state.is_pipeline_stage_after_split() + and model_type == ModelType.encoder_and_decoder + ): + return [output_tensor, input_tensor[-1]], num_tokens + + if unwrap_output_tensor: + return output_tensor, num_tokens + return [output_tensor], num_tokens + + +def get_tensor_shapes_wrapper(get_tensor_shapes): + @wraps(get_tensor_shapes) + def wrapper(*args, **kwargs): + # [s, b, h] + tensor_shapes = get_tensor_shapes(*args, **kwargs) + arguments = get_args() + if arguments.tp_2d: + tensor_shapes = [[tensor_shape[0] // arguments.tp_x, tensor_shape[1], tensor_shape[2] // arguments.tp_y] + for tensor_shape in tensor_shapes] + + return tensor_shapes + return wrapper diff --git a/model/train/yoco_moe/mindspeed/core/simple_parallel_cfg.py b/model/train/yoco_moe/mindspeed/core/simple_parallel_cfg.py new file mode 100644 index 000000000..e1e4524f8 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/simple_parallel_cfg.py @@ -0,0 +1,19 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from collections import namedtuple + +SimpleParallelCfg = namedtuple( + "SimpleParallelCfg", field_names=["dp", "pp", "tp", "cp", "ep", "tp_x", "tp_y"] +) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/singleton_meta.py b/model/train/yoco_moe/mindspeed/core/singleton_meta.py new file mode 100644 index 000000000..581dc2c95 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/singleton_meta.py @@ -0,0 +1,27 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import threading + + +class SingletonMeta(type): + _instances = {} + _lock = threading.Lock() + + def __call__(cls, *args, **kwargs): + with cls._lock: + if cls not in cls._instances: + instance = super().__call__(*args, **kwargs) + cls._instances[cls] = instance + return cls._instances[cls] \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/__init__.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/__init__.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/ascend_turbo_cfg.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/ascend_turbo_cfg.py new file mode 100644 index 000000000..0e0ed5acd --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/ascend_turbo_cfg.py @@ -0,0 +1,40 @@ +class AscendConfig: + def __init__(self): + self.ColumnParallelLinear = None + self.RowParallelLinear = None + self.group_func = None + self.world_size_func = None + + self.sequence_parallel_enabled = True + self.all_gather_recomputation = True + + def set_sequence_parallel(self, sequence_parallel): + self.sequence_parallel = sequence_parallel + + def set_all_gather_recomputation(self, all_gather_recomputation): + self.all_gather_recomputation = all_gather_recomputation + + def set_group(self, group_func): + self.group_func = group_func + + def get_group(self): + return self.group_func() + + def set_world_size(self, world_size_func): + self.world_size_func = world_size_func + + def get_world_size(self): + return self.world_size_func() + + def set_column_parallel_linear(self, column_parallel_linear): + self.ColumnParallelLinear = column_parallel_linear + + def set_row_parallel_linear(self, row_parallel_linear): + self.RowParallelLinear = row_parallel_linear + + def parallel_linear_plugin(self, column_parallel_forward, row_parallel_forward): + self.ColumnParallelLinear.forward = column_parallel_forward + self.RowParallelLinear.forward = row_parallel_forward + + +ascend_turbo_cfg = AscendConfig() diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/initialize.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/initialize.py new file mode 100644 index 000000000..c5f43167a --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/initialize.py @@ -0,0 +1,94 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .ascend_turbo_cfg import ascend_turbo_cfg +from .mc2_linears_seq_parallel import (ColumnSeqParallelLinear, RowSeqParallelLinear, + ColumnSeqParallelLinearWithFrozenWeight, RowSeqParallelLinearWithFrozenWeight) + + +def column_parallel_forward(self, input_, weight=None): + if weight is None: + if self.weight is None: + raise RuntimeError( + "weight was not supplied to ColumnParallelLinear forward pass" + "and skip_weight_param_allocation is True." + ) + weight = self.weight + else: + # Check the weight passed in is the correct shape + expected_shape = (self.output_size_per_partition, self.input_size) + if weight.shape != expected_shape: + raise RuntimeError( + f"supplied weight's shape is {tuple(weight.shape)}," + f"not {expected_shape} as expected" + ) + + bias = self.bias if not self.skip_bias_add else None + + if not weight.requires_grad: + output = ColumnSeqParallelLinearWithFrozenWeight.apply( + input_, weight, bias, ascend_turbo_cfg.get_group() + ) + else: + output = ColumnSeqParallelLinear.apply( + input_, weight, bias, ascend_turbo_cfg.get_group() + ) + + output_bias = self.bias if self.skip_bias_add else None + return output, output_bias + + +def row_parallel_forward(self, input_): + if not self.weight.requires_grad: + output = RowSeqParallelLinearWithFrozenWeight.apply( + input_, self.weight, None, ascend_turbo_cfg.get_group() + ) + else: + output = RowSeqParallelLinear.apply( + input_, self.weight, None, ascend_turbo_cfg.get_group() + ) + + if not self.skip_bias_add: + output = output + self.bias if self.bias is not None else output + output_bias = None + else: + output_bias = self.bias + + return output, output_bias + + +def initialize_cfg_from_framework(): + from megatron.core.tensor_parallel import ColumnParallelLinear, RowParallelLinear + from megatron.core.parallel_state import get_tensor_model_parallel_group, get_tensor_model_parallel_world_size + + ascend_turbo_cfg.set_group(get_tensor_model_parallel_group) + ascend_turbo_cfg.set_world_size(get_tensor_model_parallel_world_size) + + ascend_turbo_cfg.set_column_parallel_linear(ColumnParallelLinear) + ascend_turbo_cfg.set_row_parallel_linear(RowParallelLinear) + + ascend_turbo_cfg.parallel_linear_plugin( + column_parallel_forward, row_parallel_forward + ) + + +def initialize_cfg_from_args(args): + if not args.sequence_parallel or args.tensor_model_parallel_size == 1 or args.use_pipe_experts: + return + if args.use_nanopipe: + return + ascend_turbo_cfg.set_sequence_parallel(args.sequence_parallel) + ascend_turbo_cfg.set_all_gather_recomputation(True) + initialize_cfg_from_framework() diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/mc2_linears_seq_parallel.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/mc2_linears_seq_parallel.py new file mode 100644 index 000000000..b571022be --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/ascend_turbo/mc2_linears_seq_parallel.py @@ -0,0 +1,352 @@ +import torch +import torch_npu +from megatron.training import get_args +from .ascend_turbo_cfg import ascend_turbo_cfg + + +class ColumnSeqParallelLinear(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, weight, bias, group): + ctx.save_for_backward(input_) + ctx.use_bias = bias is not None + ctx.weight = weight + ctx.gradient_accumulation_fusion = get_args().gradient_accumulation_fusion + + rank = torch.distributed.get_rank(group) + hcomm_info = None + if torch.__version__ > "2.0": + global_rank = torch.distributed.get_global_rank(group, rank) + hcomm_info = group._get_backend(torch.device("npu")).get_hccl_comm_name( + global_rank + ) + + else: + hcomm_info = group.get_hccl_comm_name(rank) + + x = input_.reshape(input_.shape[0] * input_.shape[1], input_.shape[2]) + + world_size = ascend_turbo_cfg.get_world_size() + # npu_all_gather_base_mm currently do not support bias + output, all_gather_grad_output = torch_npu.npu_all_gather_base_mm( + x, + weight.t(), + hcomm_info, + world_size, + bias=None, + gather_index=0, + gather_output=(not ascend_turbo_cfg.all_gather_recomputation), + ) + + if bias is not None: + output = output + bias + + output = output.view( + int(output.shape[0] / input_.shape[1]), input_.shape[1], output.shape[1] + ) + + ctx.all_gather_output = all_gather_grad_output + ctx.world_size = world_size + ctx.group = group + return output + + @staticmethod + def backward(ctx, grad_output): + input_ = ctx.saved_tensors[0] + weight = ctx.weight + + grad_output_ = grad_output.reshape( + grad_output.shape[0] * grad_output.shape[1], grad_output.shape[2] + ) + + if ascend_turbo_cfg.all_gather_recomputation: + dim_size = list(input_.size()) + dim_size[0] = dim_size[0] * ctx.world_size + all_gather_output = torch.empty( + dim_size, + dtype=input_.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + all_gather_work = torch.distributed._all_gather_base( + all_gather_output, input_.contiguous(), group=ctx.group, async_op=True + ) + else: + all_gather_output = ctx.all_gather_output + + grad_input = grad_output_.matmul(weight) + grad_input = grad_input.reshape( + grad_output.shape[0], grad_output.shape[1], weight.shape[1] + ) + + sub_grad_input = torch.empty( + list(input_.size()), dtype=input_.dtype, device=torch.cuda.current_device() + ) + reduce_scatter_work = torch.distributed._reduce_scatter_base( + sub_grad_input, grad_input, group=ctx.group, async_op=True + ) + + if ascend_turbo_cfg.all_gather_recomputation: + all_gather_work.wait() + all_gather_output = all_gather_output.reshape( + all_gather_output.shape[0] * all_gather_output.shape[1], + all_gather_output.shape[2], + ) + + if ctx.gradient_accumulation_fusion and weight.main_grad.dtype == torch.float32: + from mindspeed.ops.npu_matmul_add import npu_matmul_add_fp32 + npu_matmul_add_fp32(all_gather_output, grad_output_, weight.main_grad) + + if hasattr(weight, 'grad_added_to_main_grad'): + # When overlap_grad_reduce is True, need to ensure that backward hooks + # are all run on the main backprop thread to prevent deadlocks. Setup + # dummy grad_weight tensor to prevent backward hooks from being run + # in a background thread. + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=input_.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=input_.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = None + else: + grad_weight = grad_output_.t().matmul(all_gather_output) + + is_grad_bias_needed = ctx.needs_input_grad[2] + if is_grad_bias_needed and ctx.use_bias: + grad_bias = ( + grad_output_.sum(dim=0) + if grad_output_.is_contiguous() + else grad_output_.t().sum(dim=1) + ) + else: + grad_bias = None + + reduce_scatter_work.wait() + return sub_grad_input, grad_weight, grad_bias, None + + +class RowSeqParallelLinear(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, weight, bias, group): + ctx.save_for_backward(input_) + ctx.use_bias = bias is not None + ctx.weight = weight + ctx.gradient_accumulation_fusion = get_args().gradient_accumulation_fusion + + rank = torch.distributed.get_rank(group) + world_size = ascend_turbo_cfg.get_world_size() + hcomm_info = None + if torch.__version__ > "2.0": + global_rank = torch.distributed.get_global_rank(group, rank) + hcomm_info = group._get_backend(torch.device("npu")).get_hccl_comm_name( + global_rank + ) + else: + hcomm_info = group.get_hccl_comm_name(rank) + + x = input_.reshape(input_.shape[0] * input_.shape[1], input_.shape[2]) + + # npu_mm_reduce_scatter_base currently do not support bias + output = torch_npu.npu_mm_reduce_scatter_base( + x, weight.t(), hcomm_info, world_size, reduce_op="sum", bias=None + ) + + if bias is not None: + output = output + bias + + ctx.hcomm_info = hcomm_info + ctx.world_size = world_size + + output = output.view( + int(output.shape[0] / input_.shape[1]), input_.shape[1], output.shape[1] + ) + + return output + + @staticmethod + def backward(ctx, grad_output): + input_ = ctx.saved_tensors[0] + weight = ctx.weight + hcomm_info = ctx.hcomm_info + world_size = ctx.world_size + + grad_output_ = grad_output.reshape( + grad_output.shape[0] * grad_output.shape[1], grad_output.shape[2] + ) + + grad_input, all_gather_grad_output = torch_npu.npu_all_gather_base_mm( + grad_output_, weight, hcomm_info, world_size, bias=None, gather_index=0 + ) + + grad_input = grad_input.view_as(input_) + + x = input_.reshape(input_.shape[0] * input_.shape[1], input_.shape[2]) + if ctx.gradient_accumulation_fusion and weight.main_grad.dtype == torch.float32: + from mindspeed.ops.npu_matmul_add import npu_matmul_add_fp32 + npu_matmul_add_fp32(x, all_gather_grad_output, weight.main_grad) + + if hasattr(weight, 'grad_added_to_main_grad'): + # When overlap_grad_reduce is True, need to ensure that backward hooks + # are all run on the main backprop thread to prevent deadlocks. Setup + # dummy grad_weight tensor to prevent backward hooks from being run + # in a background thread. + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=input_.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=input_.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = None + else: + grad_weight = all_gather_grad_output.t().matmul(x) + + is_grad_bias_needed = ctx.needs_input_grad[2] + if is_grad_bias_needed and ctx.use_bias: + grad_bias = ( + grad_output.sum(dim=0) + if grad_output.is_contiguous() + else grad_output.t().sum(dim=1) + ) + else: + grad_bias = None + + return grad_input, grad_weight, grad_bias, None + + +class ColumnSeqParallelLinearWithFrozenWeight(ColumnSeqParallelLinear): + @staticmethod + def forward(ctx, input_, weight, bias, group): + ctx.input_shape = input_.shape + ctx.use_bias = bias is not None + ctx.weight = weight + + rank = torch.distributed.get_rank(group) + hcomm_info = None + if torch.__version__ > "2.0": + global_rank = torch.distributed.get_global_rank(group, rank) + hcomm_info = group._get_backend(torch.device("npu")).get_hccl_comm_name( + global_rank + ) + + else: + hcomm_info = group.get_hccl_comm_name(rank) + + x = input_.reshape(input_.shape[0] * input_.shape[1], input_.shape[2]) + + world_size = ascend_turbo_cfg.get_world_size() + # npu_all_gather_base_mm currently do not support bias + output, all_gather_grad_output = torch_npu.npu_all_gather_base_mm( + x, + weight.t(), + hcomm_info, + world_size, + bias=None, + gather_index=0, + gather_output=(not ascend_turbo_cfg.all_gather_recomputation), + ) + + if bias is not None: + output = output + bias + + output = output.view( + int(output.shape[0] / input_.shape[1]), input_.shape[1], output.shape[1] + ) + ctx.hcomm_info = hcomm_info + ctx.world_size = world_size + ctx.group = group + return output + + @staticmethod + def backward(ctx, grad_output): + input_shape = ctx.input_shape + weight = ctx.weight + + hcomm_info = ctx.hcomm_info + world_size = ctx.world_size + grad_output_ = grad_output.reshape( + grad_output.shape[0] * grad_output.shape[1], grad_output.shape[2] + ) + + sub_grad_input = torch_npu.npu_mm_reduce_scatter_base( + grad_output_, weight, hcomm_info, world_size, bias=None + ) + + sub_grad_input = sub_grad_input.view(input_shape) + + return sub_grad_input, None, None, None + + +class RowSeqParallelLinearWithFrozenWeight(RowSeqParallelLinear): + @staticmethod + def forward(ctx, input_, weight, bias, group): + ctx.input_shape = input_.shape + ctx.use_bias = bias is not None + ctx.weight = weight + + rank = torch.distributed.get_rank(group) + world_size = ascend_turbo_cfg.get_world_size() + hcomm_info = None + if torch.__version__ > "2.0": + global_rank = torch.distributed.get_global_rank(group, rank) + hcomm_info = group._get_backend(torch.device("npu")).get_hccl_comm_name( + global_rank + ) + else: + hcomm_info = group.get_hccl_comm_name(rank) + + x = input_.reshape(input_.shape[0] * input_.shape[1], input_.shape[2]) + + # npu_mm_reduce_scatter_base currently do not support bias + output = torch_npu.npu_mm_reduce_scatter_base( + x, weight.t(), hcomm_info, world_size, reduce_op="sum", bias=None + ) + + if bias is not None: + output = output + bias + + ctx.hcomm_info = hcomm_info + ctx.world_size = world_size + + output = output.view( + int(output.shape[0] / input_.shape[1]), input_.shape[1], output.shape[1] + ) + + return output + + @staticmethod + def backward(ctx, grad_output): + input_shape = ctx.input_shape + weight = ctx.weight + hcomm_info = ctx.hcomm_info + world_size = ctx.world_size + grad_output_ = grad_output.reshape( + grad_output.shape[0] * grad_output.shape[1], grad_output.shape[2] + ) + + grad_input, _ = torch_npu.npu_all_gather_base_mm( + grad_output_, weight, hcomm_info, world_size, bias=None, gather_index=0 + ) + + grad_input = grad_input.view(input_shape) + + return grad_input, None, None, None diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/checkpoint_manager.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/checkpoint_manager.py new file mode 100644 index 000000000..4d6820e09 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/checkpoint_manager.py @@ -0,0 +1,62 @@ +import torch + + +class PipelineCheckpointManager: + instance = None + + def __init__(self, num_of_chunks=2): + self.open_ri_pipe = False + self.do_pre_recompute = False + self.checkpoint_list = [] + self.chunk_list = [[] for i in range(num_of_chunks)] + self.chunk_do_recompute = True + + def batch_fin(self, chunk_idx): + self.chunk_list[chunk_idx].append(self.checkpoint_list) + self.checkpoint_list = [] + + def iter_fin(self): + if len(self.checkpoint_list) != 0: + raise RuntimeError("recompute list is not empty") + + for batch_list_for_chunk in self.chunk_list: + for layer_list_for_batch in batch_list_for_chunk: + if len(layer_list_for_batch) != 0: + raise RuntimeError( + f"{torch.distributed.get_rank()} batch recompute list is not empty, got {layer_list_for_batch[0]} in index 0") + batch_list_for_chunk.clear() + self.open_ri_pipe = False + self.do_pre_recompute = False + self.chunk_do_recompute = True + + def add_recompute(self, recompute_func): + self.checkpoint_list.append(recompute_func) + + def recompute_next_force(self, chunk_idx): + layer_list_for_head_batch = [] + while len(layer_list_for_head_batch) == 0: + if len(self.chunk_list[chunk_idx]) == 0: + print(f"rank-{torch.distributed.get_rank()} warning, poping empty list") + break + layer_list_for_head_batch = self.chunk_list[chunk_idx].pop(0) + + for layer_func in layer_list_for_head_batch: + layer_func() + + def recompute_next(self, chunk_idx): + layer_list_for_head_batch = self.chunk_list[chunk_idx].pop(0) + if self.do_pre_recompute: + for layer_func in layer_list_for_head_batch: + layer_func() + + def disable_recompute(self): + self.chunk_do_recompute = False + + def enable_recompute(self): + self.chunk_do_recompute = True + + +def get_pipeline_checkpoint_manager(num_of_chunks=2): + if PipelineCheckpointManager.instance is None: + PipelineCheckpointManager.instance = PipelineCheckpointManager(num_of_chunks=num_of_chunks) + return PipelineCheckpointManager.instance diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/comm_autograd_function.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/comm_autograd_function.py new file mode 100644 index 000000000..4e324c767 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/comm_autograd_function.py @@ -0,0 +1,246 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch +import torch.distributed + +from mindspeed.core.tensor_parallel.comm_group_api import CollectiveCommIntf +from mindspeed.core.tensor_parallel.comm_group_api import TPXCollectiveComm +from mindspeed.core.tensor_parallel.comm_group_api import TPYCollectiveComm +from mindspeed.core.tensor_parallel.comm_utils import _gather_along_last_dim +from mindspeed.core.tensor_parallel.comm_utils import _split_along_first_dim +from mindspeed.core.tensor_parallel.comm_utils import _split_along_last_dim +from mindspeed.core.tensor_parallel.comm_utils import sync_gather_along_first_dim +from mindspeed.core.tensor_parallel.comm_utils import sync_gather_along_last_dim +from mindspeed.core.tensor_parallel.comm_utils import sync_reduce_scatter_along_first_dim + + +class _SyncGatherAlongFirstDim(torch.autograd.Function): + """Gather the input from model parallel X region and concatinate.""" + + @staticmethod + def symbolic(graph, input_): + return sync_gather_along_first_dim(input_, TPXCollectiveComm) + + @staticmethod + def forward(ctx, input_, comm_intf: CollectiveCommIntf): + ctx.comm_intf = comm_intf + return sync_gather_along_first_dim(input_, comm_intf) + + @staticmethod + def backward(ctx, grad_output): + return _split_along_first_dim(grad_output, ctx.comm_intf), None + + +class _SyncGatherAlongLastDim(torch.autograd.Function): + """Gather the input from model parallel Y region and concatinate.""" + + @staticmethod + def symbolic(graph, input_): + return sync_gather_along_last_dim(input_, TPYCollectiveComm) + + @staticmethod + def forward(ctx, input_, comm_intf: CollectiveCommIntf): + ctx.comm_intf = comm_intf + return sync_gather_along_last_dim(input_, comm_intf) + + @staticmethod + def backward(ctx, grad_output): + return _split_along_last_dim(grad_output, ctx.comm_intf), None + + +def _reduce(input_, tp_intf: CollectiveCommIntf = TPXCollectiveComm): + """All-reduce the input tensor across model parallel group.""" + + # Bypass the function if we are using only 1 GPU. + if tp_intf.get_comm_group_world_size() == 1: + return input_ + + # All-reduce. + torch.distributed.all_reduce(input_, group=tp_intf.get_comm_group()) + return input_ + + +class _ReduceFromModelParallelRegion(torch.autograd.Function): + """All-reduce the input from the model parallel region.""" + + @staticmethod + def symbolic(graph, input_, tp_intf: CollectiveCommIntf = TPXCollectiveComm): + return _reduce(input_, tp_intf), None + + @staticmethod + def forward(ctx, input_, tp_intf: CollectiveCommIntf = TPXCollectiveComm): + return _reduce(input_, tp_intf) + + @staticmethod + def backward(ctx, grad_output): + return grad_output, None + + +class _GatherFromParallelRegion(torch.autograd.Function): + """Gather the input from model parallel region and concatinate.""" + + @staticmethod + def symbolic(graph, input_): + return _gather_along_last_dim(input_) + + @staticmethod + def forward(ctx, input_, comm_intf: CollectiveCommIntf): + ctx.comm_intf = comm_intf + return _gather_along_last_dim(input_, comm_intf) + + @staticmethod + def backward(ctx, grad_output): + return _split_along_last_dim(grad_output, ctx.comm_intf), None + + +class _ScatterAlongLastDim(torch.autograd.Function): + """Split the input and keep only the corresponding chuck to the rank.""" + + @staticmethod + def symbolic(graph, input_, comm_intf: CollectiveCommIntf): + return _split_along_last_dim(input_, comm_intf) + + @staticmethod + def forward(ctx, input_, comm_intf: CollectiveCommIntf): + ctx.comm_intf = comm_intf + return _split_along_last_dim(input_, comm_intf) + + @staticmethod + def backward(ctx, grad_output): + return _gather_along_last_dim(grad_output, ctx.comm_intf), None + + +class _ScatterAlongFirstDim(torch.autograd.Function): + """Split the input and keep only the corresponding chuck to the rank.""" + + @staticmethod + def symbolic(graph, input_, comm_intf: CollectiveCommIntf): + return _split_along_first_dim(input_, comm_intf) + + @staticmethod + def forward(ctx, input_, comm_intf: CollectiveCommIntf): + ctx.comm_intf = comm_intf + return _split_along_first_dim(input_, comm_intf) + + @staticmethod + def backward(ctx, grad_output): + return sync_gather_along_first_dim(grad_output, ctx.comm_intf), None + + +class _ScatterAlongFirstDimThenLastDim(torch.autograd.Function): + """Split the input and keep only the corresponding chuck to the rank.""" + + @staticmethod + def symbolic(graph, local_rank_input, first_dim_comm_intf, last_dim_comm_intf): + graph.first_dim_comm_intf = first_dim_comm_intf + graph.last_dim_comm_intf = last_dim_comm_intf + + first_dim_split_output = _split_along_first_dim(local_rank_input, first_dim_comm_intf) + return _split_along_last_dim(first_dim_split_output, last_dim_comm_intf) + + @staticmethod + def forward(ctx, local_rank_input, first_dim_comm_intf, last_dim_comm_intf): + ctx.first_dim_comm_intf = first_dim_comm_intf + ctx.last_dim_comm_intf = last_dim_comm_intf + + first_dim_split_output = _split_along_first_dim(local_rank_input, first_dim_comm_intf) + return _split_along_last_dim(first_dim_split_output, last_dim_comm_intf) + + @staticmethod + def backward(ctx, grad_output): + last_dim_gather_output = _gather_along_last_dim(grad_output, ctx.last_dim_comm_intf) + first_dim_gather_output = sync_gather_along_first_dim( + last_dim_gather_output, ctx.first_dim_comm_intf) + return first_dim_gather_output, None, None + + +class _SyncGatherAlongFirstDimRS(torch.autograd.Function): + """Gather the input from model parallel X region and concatinate.""" + + @staticmethod + def symbolic(graph, input_, comm_intf: CollectiveCommIntf): + return sync_gather_along_first_dim(input_, comm_intf) + + @staticmethod + def forward(ctx, input_, comm_intf: CollectiveCommIntf): + ctx.comm_intf = comm_intf + return sync_gather_along_first_dim(input_, comm_intf) + + @staticmethod + def backward(ctx, grad_output): + return sync_reduce_scatter_along_first_dim(grad_output, ctx.comm_intf), None + + +class _SyncReduceScatterAlongFirstDim(torch.autograd.Function): + """Reduce scatter the input along first dim""" + + @staticmethod + def symbolic(graph, input_, comm_intf: CollectiveCommIntf): + return sync_reduce_scatter_along_first_dim(input_, comm_intf) + + @staticmethod + def forward(ctx, input_, comm_intf: CollectiveCommIntf): + ctx.comm_intf = comm_intf + return sync_reduce_scatter_along_first_dim(input_, comm_intf) + + @staticmethod + def backward(ctx, grad_output): + return sync_gather_along_first_dim(grad_output, ctx.comm_intf), None + + +def auto_grad_sync_gather_along_first_dim(input_, comm_intf: CollectiveCommIntf): + return _SyncGatherAlongFirstDim.apply(input_, comm_intf) + + +def auto_grad_sync_gather_along_last_dim(input_, comm_intf: CollectiveCommIntf): + return _SyncGatherAlongLastDim.apply(input_, comm_intf) + + +def scatter_to_tensor_parallel_y_region(input_): + return _ScatterAlongLastDim.apply(input_) + + +def auto_grad_scatter_along_last_dim(input_, comm_intf: CollectiveCommIntf): + return _ScatterAlongLastDim.apply(input_, comm_intf) + + +def auto_grad_scatter_along_first_dim(input_, comm_intf: CollectiveCommIntf): + return _ScatterAlongFirstDim.apply(input_, comm_intf) + + +def auto_grad_scatter_along_first_dim_then_last_dim( + local_rank_input: torch.Tensor, + first_dim_comm_intf: CollectiveCommIntf, + last_dim_comm_intf: CollectiveCommIntf, +): + return _ScatterAlongFirstDimThenLastDim.apply( + local_rank_input, first_dim_comm_intf, last_dim_comm_intf + ) + + +def reduce_from_parallel_region(input_, tp_intf: CollectiveCommIntf = TPXCollectiveComm): + return _ReduceFromModelParallelRegion.apply(input_, tp_intf) + + +def gather_from_parallel_region(input_, comm_intf: CollectiveCommIntf): + return _GatherFromParallelRegion.apply(input_, comm_intf) + + +def auto_grad_sync_gather_along_first_dim_rs(input_, comm_intf: CollectiveCommIntf): + return _SyncGatherAlongFirstDimRS.apply(input_, comm_intf) + + +def auto_grad_reduce_scatter_along_first_dim(input_, comm_intf: CollectiveCommIntf): + return _SyncReduceScatterAlongFirstDim.apply(input_, comm_intf) diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/comm_group_api.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/comm_group_api.py new file mode 100644 index 000000000..2422b3bca --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/comm_group_api.py @@ -0,0 +1,174 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import abc + +from megatron.core.parallel_state import get_context_parallel_group +from megatron.core.parallel_state import get_context_parallel_rank +from megatron.core.parallel_state import get_context_parallel_world_size +from megatron.core.parallel_state import get_tensor_model_parallel_group +from megatron.core.parallel_state import get_tensor_model_parallel_rank +from megatron.core.parallel_state import get_tensor_model_parallel_world_size + +from mindspeed.core.parallel_state import get_tensor_model_parallel_group_for_nd1_dim1 +from mindspeed.core.parallel_state import get_tensor_model_parallel_group_for_nd1_dim1_rank +from mindspeed.core.parallel_state import get_tensor_model_parallel_group_for_nd1_dim1_world_size +from mindspeed.core.parallel_state import get_tensor_model_parallel_group_for_nd1_dim2 +from mindspeed.core.parallel_state import get_tensor_model_parallel_group_for_nd1_dim2_rank +from mindspeed.core.parallel_state import get_tensor_model_parallel_group_for_nd1_dim2_world_size +from mindspeed.core.parallel_state import get_tp_x_ep_group +from mindspeed.core.parallel_state import get_tp_x_ep_group_rank +from mindspeed.core.parallel_state import get_tp_x_ep_group_world_size +from mindspeed.core.parallel_state import get_tp_x_ring_global_ranks +from mindspeed.core.parallel_state import get_tp_x_sd_rcv_overlap_group +from mindspeed.core.parallel_state import get_tp_y_ring_global_ranks +from mindspeed.core.parallel_state import get_tp_y_sd_rcv_overlap_group + + +class CollectiveCommIntf: + def __init__(self, comm_group_name): + self.comm_group_name = comm_group_name + + @classmethod + @abc.abstractmethod + def get_comm_group_world_size(cls): + raise NotImplementedError + + @classmethod + @abc.abstractmethod + def get_comm_group(cls): + raise NotImplementedError + + @classmethod + @abc.abstractmethod + def get_comm_rank(cls): + raise NotImplementedError + + def get_comm_group_name(self): + return self.comm_group_name + + +class OverlapCollectiveIntf(CollectiveCommIntf): + @classmethod + @abc.abstractmethod + def get_ring_global_ranks(cls): + raise NotImplementedError + + +class CPCollectiveComm(CollectiveCommIntf): + @classmethod + def get_comm_group_world_size(cls): + return get_context_parallel_world_size() + + @classmethod + def get_comm_group(cls): + return get_context_parallel_group() + + @classmethod + def get_comm_rank(cls): + return get_context_parallel_rank() + + +class TPXCollectiveComm(CollectiveCommIntf): + def __init__(self, name="tp-x"): + super().__init__(name) + + @classmethod + def get_comm_rank(cls): + return get_tensor_model_parallel_group_for_nd1_dim1_rank() + + @classmethod + def get_comm_group_world_size(cls): + return get_tensor_model_parallel_group_for_nd1_dim1_world_size() + + @classmethod + def get_comm_group(cls): + return get_tensor_model_parallel_group_for_nd1_dim1() + + +class TPXEPCollectiveComm(CollectiveCommIntf): + def __init__(self, name="tp-x-ep"): + super().__init__(name) + + @classmethod + def get_comm_rank(cls): + return get_tp_x_ep_group_rank() + + @classmethod + def get_comm_group_world_size(cls): + return get_tp_x_ep_group_world_size() + + @classmethod + def get_comm_group(cls): + return get_tp_x_ep_group() + + +class TPXOverlapCollectiveComm(TPXCollectiveComm, OverlapCollectiveIntf): + def __init__(self): + super().__init__("tp-x-overlap") + + @classmethod + def get_comm_group(cls): + return get_tp_x_sd_rcv_overlap_group() + + @classmethod + def get_ring_global_ranks(cls): + return get_tp_x_ring_global_ranks() + + +class TPYCollectiveComm(CollectiveCommIntf): + def __init__(self, name="tp-y"): + super().__init__(name) + + @classmethod + def get_comm_rank(cls): + return get_tensor_model_parallel_group_for_nd1_dim2_rank() + + @classmethod + def get_comm_group_world_size(cls): + return get_tensor_model_parallel_group_for_nd1_dim2_world_size() + + @classmethod + def get_comm_group(cls): + return get_tensor_model_parallel_group_for_nd1_dim2() + + +class TPYOverlapCollectiveComm(TPYCollectiveComm, OverlapCollectiveIntf): + def __init__(self): + super().__init__("y-overlap") + + @classmethod + def get_comm_group(cls): + return get_tp_y_sd_rcv_overlap_group() + + @classmethod + def get_ring_global_ranks(cls): + return get_tp_y_ring_global_ranks() + + +class TPXYCollectiveComm(CollectiveCommIntf): + def __init__(self): + super().__init__("tp-xy") + + @classmethod + def get_comm_rank(cls): + return get_tensor_model_parallel_rank() + + @classmethod + def get_comm_group_world_size(cls): + return get_tensor_model_parallel_world_size() + + @classmethod + def get_comm_group(cls): + return get_tensor_model_parallel_group() diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/comm_utils.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/comm_utils.py new file mode 100644 index 000000000..2360fa8d5 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/comm_utils.py @@ -0,0 +1,271 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch +from torch import Tensor +from torch import distributed +import torch.distributed as dist + +from megatron.core.parallel_state import get_global_memory_buffer +from mindspeed.core.tensor_parallel.comm_group_api import CollectiveCommIntf +from mindspeed.core.tensor_parallel.comm_group_api import TPXCollectiveComm + + +def _split_along_last_dim( + local_rank_input: Tensor, comm_intf: CollectiveCommIntf = TPXCollectiveComm +): + """Split the tensor along its last dimension and keep the + corresponding slice.""" + + world_size = comm_intf.get_comm_group_world_size() + # Bypass the function if we are using only 1 GPU. + if world_size == 1: + return local_rank_input + + # Split along last dimension. + last_dim = local_rank_input.dim() - 1 + last_dim_size = local_rank_input.size()[last_dim] // world_size + # Split. + tensor_list = torch.split(local_rank_input, last_dim_size, dim=last_dim) + + # Note: torch.split does not create contiguous tensors by default. + rank = comm_intf.get_comm_rank() + output = tensor_list[rank].contiguous() + + return output + + +def _split_along_first_dim(local_rank_input, comm_intf: CollectiveCommIntf = TPXCollectiveComm): + """Split the tensor along its first dimension and keep the + corresponding slice.""" + + world_size = comm_intf.get_comm_group_world_size() + # Bypass the function if we are using only 1 GPU. + if world_size == 1: + return local_rank_input + + # Split along first dimension. + dim_size = local_rank_input.size()[0] + if dim_size % world_size: + raise AssertionError("First dimension of the tensor should be divisible by parallel size") + local_dim_size = dim_size // world_size + rank = comm_intf.get_comm_rank() + dim_offset = rank * local_dim_size + + output = local_rank_input[dim_offset : dim_offset + local_dim_size].contiguous() + + return output + + +def _gather_along_last_dim( + local_rank_input: Tensor, ag_comm_intf: CollectiveCommIntf = TPXCollectiveComm +): + """Gather tensors and concatinate along the last dimension.""" + + world_size = ag_comm_intf.get_comm_group_world_size() + # Bypass the function if we are using only 1 GPU. + if world_size == 1: + return local_rank_input + + tensor_list = [torch.empty_like(local_rank_input) for _ in range(world_size)] + torch.distributed.all_gather( + tensor_list, local_rank_input, group=ag_comm_intf.get_comm_group(), async_op=False + ) + + # Note: torch.cat already creates a contiguous tensor. + last_dim = local_rank_input.dim() - 1 + output = torch.cat(tensor_list, dim=last_dim).contiguous() + return output + + +def sync_gather_along_last_dim( + local_rank_tensor: Tensor, ag_comm_intf: CollectiveCommIntf = TPXCollectiveComm +): + """Gather tensors and concatinate along the last dimension synchronously. + + :param local_rank_tensor: input of current rank. + :param ag_comm_intf: the communication process group interface. + :return: the AllGather-ed result. + """ + + world_size = ag_comm_intf.get_comm_group_world_size() + # Bypass the function if we are using only 1 GPU/NPU. + if world_size == 1: + return local_rank_tensor + + gathered_tensors = [torch.empty_like(local_rank_tensor) for _ in range(world_size)] + torch.distributed.all_gather( + gathered_tensors, + local_rank_tensor.contiguous(), + group=ag_comm_intf.get_comm_group(), + async_op=False, + ) + + return torch.cat(gathered_tensors, dim=local_rank_tensor.dim() - 1).contiguous() + + +def async_gather_tensors( + local_rank_input: Tensor, + ag_comm_intf: CollectiveCommIntf = TPXCollectiveComm, + buffer_name="mpu-async-tp-2d", +): + """Gather tensors and concatinate along the last dimension asynchronously. + + :param local_rank_input: input of current rank. + :param ag_comm_intf: the AllGather communication process group interface. + :param buffer_name: buffer name of str type. + :return: the AllGather op handle and tensor list storing the op result tensors. + + Note: the result tensors may be handled as following according to your need: + output = torch.cat(gathered_tensors, dim=xx_dim).contiguous() + """ + + world_size = ag_comm_intf.get_comm_group_world_size() + # Bypass the function if we are using only 1 NPU/GPU. + if world_size == 1: + return None, local_rank_input + + dim_size = list(local_rank_input.size()) + dim_size[0] *= world_size + + ag_out = torch.empty(dim_size, dtype=local_rank_input.dtype, device=torch.cuda.current_device()) + handle = torch.distributed._all_gather_base( + ag_out, local_rank_input, group=ag_comm_intf.get_comm_group(), async_op=True + ) + + return handle, ag_out + + +def sync_gather_along_first_dim( + local_rank_input: Tensor, + comm_intf: CollectiveCommIntf = TPXCollectiveComm, + buffer_name=None, +): + """Gather tensors and concatinate along the first dimension.""" + + world_size = comm_intf.get_comm_group_world_size() + # Bypass the function if we are using only 1 GPU. + if world_size == 1: + return local_rank_input + + dim_size = list(local_rank_input.size()) + dim_size[0] *= world_size + + if buffer_name is None: + output = torch.empty(dim_size, dtype=local_rank_input.dtype, device=torch.cuda.current_device()) + else: + output = get_global_memory_buffer().get_tensor(dim_size, local_rank_input.dtype, buffer_name) + torch.distributed._all_gather_base( + output, local_rank_input.contiguous(), group=comm_intf.get_comm_group() + ) + + return output + + +def sync_reduce_scatter_along_first_dim( + local_rank_input, comm_intf: CollectiveCommIntf = TPXCollectiveComm +): + """Reduce-scatter the input tensor across specified parallel group.""" + world_size = comm_intf.get_comm_group_world_size() + # Bypass the function if we are using only 1 GPU. + if world_size == 1: + return local_rank_input + + dim_size = list(local_rank_input.size()) + if dim_size[0] % world_size: + raise AssertionError("First dimension of the tensor should be divisible by tensor parallel size") + + dim_size[0] = dim_size[0] // world_size + + output = torch.empty(dim_size, dtype=local_rank_input.dtype, device=torch.cuda.current_device()) + dist.reduce_scatter_tensor( + output, local_rank_input.contiguous(), group=comm_intf.get_comm_group(), async_op=False + ) + + return output + + +def async_reduce_scatter_along_first_dim( + local_rank_input, comm_intf: CollectiveCommIntf = TPXCollectiveComm +): + """Reduce-scatter the input tensor across parallel group specified by comm_intf.""" + world_size = comm_intf.get_comm_group_world_size() + # Bypass the function if we are using only 1 GPU. + if world_size == 1: + return None, local_rank_input + + dim_size = list(local_rank_input.size()) + if dim_size[0] % world_size: + raise AssertionError("First dimension of the tensor should be divisible by parallel size") + + dim_size[0] = dim_size[0] // world_size + + rs_output = torch.empty( + dim_size, dtype=local_rank_input.dtype, device=torch.cuda.current_device() + ) + handle = dist.reduce_scatter_tensor( + rs_output, local_rank_input.contiguous(), group=comm_intf.get_comm_group(), async_op=True + ) + return handle, rs_output + + +def async_gather_along_last_dim(input_, comm_intf: CollectiveCommIntf = TPXCollectiveComm): + world_size = comm_intf.get_comm_group_world_size() + # Bypass the function if we are using only 1 GPU/NPU. + if world_size == 1: + return None, input_ + + gathered_tensors = [torch.empty_like(input_) for _ in range(world_size)] + handle = torch.distributed.all_gather( + gathered_tensors, input_.contiguous(), group=comm_intf.get_comm_group(), async_op=True, + ) + + return handle, gathered_tensors + + +def sync_reduce_scatter_along_last_dim( + local_rank_input, rs_comm_intf: CollectiveCommIntf = TPXCollectiveComm +): + """Reduce-scatter the input tensor across specified parallel group.""" + world_size = rs_comm_intf.get_comm_group_world_size() + # Bypass the function if we are using only 1 GPU. + if world_size == 1: + return local_rank_input + + local_rank_input = local_rank_input.transpose(0, -1) + output = sync_reduce_scatter_along_first_dim(local_rank_input, rs_comm_intf) + return output.transpose(0, -1).contiguous() + + +def async_reduce_scatter_along_last_dim( + local_rank_input, rs_comm_intf: CollectiveCommIntf = TPXCollectiveComm +): + """Reduce-scatter the input tensor across model parallel group. + + :param local_rank_input: input of local rank + :param rs_comm_intf: Reduce scatter comm intf. + :return: + + Note: the result tensors should be handled as following: + rs_output = rs_output.transpose(0, 2).contiguous() + + """ + world_size = rs_comm_intf.get_comm_group_world_size() + # Bypass the function if we are using only 1 GPU. + if world_size == 1: + return None, local_rank_input + + local_rank_input = local_rank_input.transpose(0, 2) + return async_reduce_scatter_along_first_dim(local_rank_input, rs_comm_intf) diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/cross_entropy.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/cross_entropy.py new file mode 100644 index 000000000..8e42a8685 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/cross_entropy.py @@ -0,0 +1,38 @@ +from typing import Tuple + +import torch + + +def calculate_predicted_logits( + vocab_parallel_logits: torch.Tensor, + target: torch.Tensor, + logits_max: torch.Tensor, + vocab_start_index: int, + vocab_end_index: int, +) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + # subtraction the maximum value. + # Use in-place to reduce memory pressure. + vocab_parallel_logits = vocab_parallel_logits - logits_max.unsqueeze(dim=-1) + + # Create a mask of valid vocab ids (1 means it needs to be masked). + target_mask = (target < vocab_start_index) | (target >= vocab_end_index) + masked_target = target.clone() - vocab_start_index + masked_target *= ~target_mask + + # Get predicted-logits = logits[target]. + # For Simplicity, we convert logits to a 2-D tensor with size + # [*, partition-vocab-size] and target to a 1-D tensor of size [*]. + partition_vocab_size = vocab_parallel_logits.size()[-1] + logits_2d = vocab_parallel_logits.view(-1, partition_vocab_size) + masked_target_1d = masked_target.view(-1) + arange_1d = torch.arange(start=0, end=logits_2d.size()[0], device=logits_2d.device) + predicted_logits_1d = logits_2d[arange_1d, masked_target_1d] + predicted_logits_1d = predicted_logits_1d.clone().contiguous() + predicted_logits = predicted_logits_1d.view_as(target) + predicted_logits *= ~target_mask + + exp_logits = vocab_parallel_logits + torch.exp(vocab_parallel_logits, out=exp_logits) + sum_exp_logits = exp_logits.sum(dim=-1) + + return target_mask, masked_target_1d, predicted_logits, sum_exp_logits, exp_logits diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/layers.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/layers.py new file mode 100644 index 000000000..d348d2c7b --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/layers.py @@ -0,0 +1,1969 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2024, Bytedance Inc. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import wraps +from webbrowser import get +import os +import warnings +from typing import List, Optional, Callable, Any + +import torch +import torch_npu +import torch.distributed +import torch.nn.functional as F + +from megatron.core.transformer import TransformerConfig +from .comm_group_api import TPXCollectiveComm +from .comm_group_api import TPYCollectiveComm + +try: + import fused_weight_gradient_mlp_cuda +except Exception: + warnings.warn("failed to generate the npu_matmul_add_fp32") +from torch.nn.parameter import Parameter +from torch.cuda.amp import custom_bwd, custom_fwd +from torch.nn.parameter import Parameter +import megatron.core.parallel_state as ps +from megatron.core.tensor_parallel.layers import ( + _initialize_affine_weight_cpu, + _initialize_affine_weight_gpu +) + +from megatron.core import parallel_state, tensor_parallel, mpu +from megatron.training import get_args + +from megatron.core.tensor_parallel.mappings import ( + copy_to_tensor_model_parallel_region, + gather_from_tensor_model_parallel_region, + reduce_from_tensor_model_parallel_region, + reduce_scatter_to_sequence_parallel_region, + scatter_to_tensor_model_parallel_region, + _reduce_scatter_along_first_dim, + _gather_along_first_dim, + _ReduceFromModelParallelRegion, +) +from megatron.core.tensor_parallel.layers import ( + LinearWithGradAccumulationAndAsyncCommunication, + linear_with_grad_accumulation_and_async_allreduce, + linear_with_frozen_weight, +) +from megatron.core.parallel_state import ( + get_global_memory_buffer, + get_tensor_model_parallel_group, + get_tensor_model_parallel_rank, + get_tensor_model_parallel_world_size, + is_pipeline_first_stage, + get_data_parallel_world_size, + get_data_parallel_rank, +) +from megatron.core.tensor_parallel.layers import set_tensor_model_parallel_attributes +from megatron.core.parallel_state import get_tensor_model_parallel_world_size +from megatron.core.model_parallel_config import ModelParallelConfig +from megatron.core.tensor_parallel.utils import VocabUtility, divide, split_tensor_along_last_dim +from megatron.core.utils import ( + make_tp_sharded_tensor_for_checkpoint, + prepare_input_tensors_for_wgrad_compute +) +from mindspeed.core.parallel_state import ( + get_tensor_model_parallel_group_for_nd1_dim1, + get_tensor_model_parallel_group_for_nd1_dim2, + get_tensor_model_parallel_group_for_nd2_dim1, + get_tensor_model_parallel_group_for_nd2_dim2, + get_tensor_model_parallel_world_size_for_nd1_dim1, + get_tensor_model_parallel_world_size_for_nd1_dim2, + get_tensor_model_parallel_world_size_for_nd2_dim1, + get_tensor_model_parallel_world_size_for_nd2_dim2 +) +from mindspeed.core.weight_grad_store import WeightGradStore +from mindspeed.moe.async_comm_utils import get_fw_ag_output +from mindspeed.moe.utils import get_slice_indices_from_disorder_to_order +from .ascend_turbo.mc2_linears_seq_parallel import RowSeqParallelLinear + + +def linear_with_grad_accumulation_and_async_allreduce_zero3( + input, + weight, + bias, + gradient_accumulation_fusion: bool, + async_grad_allreduce: bool, + sequence_parallel: bool, + grad_output_buffer=None, + need_gather_param_in_bw=False): + + args = [ + input, + weight, + bias, + gradient_accumulation_fusion, + async_grad_allreduce, + sequence_parallel, + grad_output_buffer, + need_gather_param_in_bw, + ] + + if not linear_with_grad_accumulation_and_async_allreduce_zero3.warned: + if os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS') != "1": + if sequence_parallel: + warnings.warn( + "When using sequence parallelism it is recommended to set the " + "environment variable CUDA_DEVICE_MAX_CONNECTIONS to 1 for " + "maximum speedup" + ) + linear_with_grad_accumulation_and_async_allreduce_zero3.warned = True + + if async_grad_allreduce: + warnings.warn( + "When using async grad allreduce it is recommended to set the " + "environment variable CUDA_DEVICE_MAX_CONNECTIONS to 1 for " + "maximum speedup" + ) + linear_with_grad_accumulation_and_async_allreduce_zero3.warned = True + + return LinearWithGradAccumulationAndAsyncCommunication.apply(*args) +linear_with_grad_accumulation_and_async_allreduce_zero3.warned = False + + +def linear_forward_zero3_wrapper(forward_func): + @wraps(forward_func) + def linear_forward_zero3( + ctx, + input, + weight, + bias, + gradient_accumulation_fusion, + async_grad_allreduce, + sequence_parallel, + grad_output_buffer, + need_gather_param_in_bw=False): + + ctx.need_gather_param_in_bw = need_gather_param_in_bw + + return forward_func( + ctx, + input, + weight, + bias, + gradient_accumulation_fusion, + async_grad_allreduce, + sequence_parallel, + grad_output_buffer) + + return linear_forward_zero3 + + +def linear_backward_zero3_wrapper(func): + @wraps(func) + def linear_backward_zero3(ctx, grad_output): + ctx.gradient_accumulation_fusion = (ctx.gradient_accumulation_fusion and not ctx.need_gather_param_in_bw) + grad_input, grad_weight, grad_bias, _, _, _, _ = func(ctx, grad_output) + if ctx.need_gather_param_in_bw: + _, weight = ctx.saved_tensors + weight.full_grad = grad_weight + grad_weight = None + return grad_input, grad_weight, grad_bias, None, None, None, None, None + + return linear_backward_zero3 + + +def linear_forward_main_grad_wrapper(forward_func): + @wraps(forward_func) + def linear_forward_main_grad(ctx, + inputs, + weight, + bias, + gradient_accumulation_fusion, + allreduce_dgrad, + wgrad_deferral_limit, + sequence_parallel, + grad_output_buffer,): + output = forward_func(ctx, + inputs, + weight, + bias, + gradient_accumulation_fusion, + allreduce_dgrad, + wgrad_deferral_limit, + sequence_parallel, + grad_output_buffer,) + ctx.weight = weight + return output + + return linear_forward_main_grad + + +def linear_backward_main_grad_wrapper(backward_func): + @wraps(backward_func) + def linear_backward_main_grad(ctx, grad_output): + class NewCtx: + pass + new_ctx = NewCtx() + inputs, _ = ctx.saved_tensors + for key in dir(ctx): + if key == 'saved_tensors': + setattr(new_ctx, 'saved_tensors', (inputs, ctx.weight)) + elif key.startswith('__') or key == 'saved_variables': + continue + else: + try: + getattr(ctx, key) + except AttributeError: + continue + setattr(new_ctx, key, getattr(ctx, key)) + return backward_func(new_ctx, grad_output) + + return linear_backward_main_grad + + +def parallel_linear_init_zero3_wrapper(func): + @wraps(func) + def parallel_linear_init(self, *args, **kwargs): + global_args = get_args() + self.enable_zero3 = global_args.enable_zero3 + func(self, *args, **kwargs) + if self.enable_zero3: + dp_size = get_data_parallel_world_size() + dp_rank = get_data_parallel_rank() + tmp_tensor = self.weight.chunk(dp_size, dim=0)[dp_rank] + self.weight = Parameter( + torch.empty( + tmp_tensor.shape, dtype=self.config.params_dtype + ) + ) + self.weight.data.copy_(tmp_tensor) + setattr(self.weight, 'enable_zero3', self.enable_zero3) + + return parallel_linear_init + + +def column_parallel_linear_forward_zero3(self, input_, weight=None): + """Forward of ColumnParallelLinear + + Args: + input_: 3D tensor whose order of dimension is [sequence, batch, hidden] + + weight (optional): weight tensor to use, compulsory when + skip_weight_param_allocation is True. + + Returns: + - output + - bias + + """ + if weight is None: + if self.weight is None: + raise RuntimeError( + "weight was not supplied to ColumnParallelLinear forward pass " + "and skip_weight_param_allocation is True." + ) + weight = self.weight + else: + # Check the weight passed in is the correct shape + expected_shape = (self.output_size_per_partition, self.input_size) + if weight.shape != expected_shape: + raise RuntimeError( + f"supplied weight's shape is {tuple(weight.shape)}, " + f"not {expected_shape} as expected" + ) + + if self.config._cpu_offloading_context is not None: + if self.config._cpu_offloading_context.inside_context == True: + assert ( + self.config.cpu_offloading == False + ), "CPU Offloading cannot be enabled while using non-TE modules" + + bias = self.bias if not self.skip_bias_add else None + + if ( + self.async_tensor_model_parallel_allreduce + or self.sequence_parallel + or self.explicit_expert_comm + ): + input_parallel = input_ + else: + input_parallel = copy_to_tensor_model_parallel_region(input_) + + if self.config.defer_embedding_wgrad_compute: + self.embedding_activation_buffer.append(input_parallel) + + # Matrix multiply. + if not weight.requires_grad: + self._forward_impl = linear_with_frozen_weight + else: + self._forward_impl = linear_with_grad_accumulation_and_async_allreduce + + output_parallel = self._forward_impl( + input=input_parallel, + weight=weight, + bias=bias, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + async_grad_allreduce=False + if self.explicit_expert_comm + else self.async_tensor_model_parallel_allreduce, + sequence_parallel=False if self.explicit_expert_comm else self.sequence_parallel, + grad_output_buffer=self.grad_output_buffer + if self.config.defer_embedding_wgrad_compute + else None, + need_gather_param_in_bw=self.enable_zero3 + ) + if self.gather_output: + # All-gather across the partitions. + assert not self.sequence_parallel + output = gather_from_tensor_model_parallel_region(output_parallel) + else: + output = output_parallel + output_bias = self.bias if self.skip_bias_add else None + return output, output_bias + + +def row_parallel_linear_forward_zero3(self, input_): + + if self.config._cpu_offloading_context is not None: + if self.config._cpu_offloading_context.inside_context == True: + assert ( + self.config.cpu_offloading == False + ), "CPU Offloading cannot be enabled while using non-TE modules" + + # Set up backprop all-reduce. + if self.input_is_parallel: + input_parallel = input_ + else: + assert not self.sequence_parallel + input_parallel = scatter_to_tensor_model_parallel_region(input_) + # Matrix multiply. + if not self.weight.requires_grad: + self._forward_impl = linear_with_frozen_weight + else: + self._forward_impl = linear_with_grad_accumulation_and_async_allreduce + output_parallel = self._forward_impl( + input=input_parallel, + weight=self.weight, + bias=None, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + async_grad_allreduce=False, + sequence_parallel=False, + need_gather_param_in_bw=self.enable_zero3 + ) + + # All-reduce across all the partitions. + if self.explicit_expert_comm: + assert self.skip_bias_add + output_ = output_parallel + elif self.sequence_parallel: + output_ = reduce_scatter_to_sequence_parallel_region(output_parallel) + else: + output_ = reduce_from_tensor_model_parallel_region(output_parallel) + if not self.skip_bias_add: + output = (output_ + self.bias) if self.bias is not None else output_ + output_bias = None + else: + output = output_ + output_bias = self.bias + return output, output_bias + + +def vocab_parallel_embedding_forward(self, input_): + if self.tensor_model_parallel_size > 1: + # Build the mask. + input_mask = (input_ < self.vocab_start_index) | \ + (input_ >= self.vocab_end_index) + # Mask the input. + masked_input = input_.clone() - self.vocab_start_index + masked_input *= ~input_mask + else: + masked_input = input_ + # Get the embeddings. + + if self.deterministic_mode: + output_parallel = self.weight[masked_input] + else: + # F.embedding currently has a non-deterministic backward function + # For higher accumulation accuracy for bf16 on NPU. + output_parallel = F.embedding(masked_input, self.weight) + + # Mask the output embedding. + if self.tensor_model_parallel_size > 1: + output_parallel *= ~input_mask[..., None] + # Reduce across all the model parallel GPUs. + if self.reduce_scatter_embeddings: + # Data format change to avoid explicit tranposes : [b s h] --> [s b h]. + output_parallel = output_parallel.transpose(0, 1).contiguous() + output = reduce_scatter_to_sequence_parallel_region(output_parallel) + else: + # Reduce across all the model parallel GPUs. + output = reduce_from_tensor_model_parallel_region(output_parallel) + return output + + +def row_parallel_nocomm_optimizer_wrapper(forward_func): + @wraps(forward_func) + def row_parallel_forward(*args, **kwargs): + global_args = get_args() + output = forward_func(*args, **kwargs) + recompute_num_layers = global_args.recompute_num_layers or 0 + + def is_need_avoid_infinite_recompute_loop(): + return isinstance(output, tuple) and ((global_args.swap_attention and recompute_num_layers > 0) + or global_args.adaptive_memory_optimization) + + if is_need_avoid_infinite_recompute_loop(): + output, bias = output + if bias is not None: + # where only recompute mlp, training enters an infinite loop, this * 1 fix this bug + bias = bias * 1 + return output, bias + + return output + return row_parallel_forward + + +class LinearWithGradAccumulationAndAsyncCommunicationPipeExperts(torch.autograd.Function): + """See linear_with_grad_accumulation_and_async_allreduce""" + + @staticmethod + @custom_fwd + def forward( + ctx, + input, + weight, + bias, + gradient_accumulation_fusion, + async_grad_allreduce, + sequence_parallel, + grad_output_buffer, + wgrad_deferral_limit, + pipe_experts, + ampipe_degree + ): + ctx.save_for_backward(input, weight) + ctx.use_bias = bias is not None + ctx.gradient_accumulation_fusion = gradient_accumulation_fusion + ctx.async_grad_allreduce = async_grad_allreduce + ctx.sequence_parallel = sequence_parallel + ctx.grad_output_buffer = grad_output_buffer + ctx.wgrad_deferral_limit = wgrad_deferral_limit + ctx.pipe_experts = pipe_experts + + if sequence_parallel: + global_args = get_args() + if global_args.use_ascend_mc2 and not pipe_experts: + from .ascend_turbo.ascend_turbo_cfg import ascend_turbo_cfg + group = get_tensor_model_parallel_group() + rank = get_tensor_model_parallel_rank() + ascend_turbo_cfg.set_world_size(get_tensor_model_parallel_world_size) + hcomm_info = None + + if torch.__version__ > "2.0": + global_rank = torch.distributed.get_global_rank(group, rank) + hcomm_info = group._get_backend(torch.device("npu")).get_hccl_comm_name(global_rank) + else: + hcomm_info = group.get_hccl_comm_name(rank) + + x = input.reshape(input.shape[0] * input.shape[1], input.shape[2]) + world_size = ascend_turbo_cfg.get_world_size() + output, _ = torch_npu.npu_all_gather_base_mm( + x, + weight.t(), + hcomm_info, + world_size, + bias=bias, + gather_index=0, + gather_output=(not ascend_turbo_cfg.all_gather_recomputation) + ) + output = output.view( + output.shape[0] // input.shape[1], input.shape[1], output.shape[1] + ) + elif pipe_experts: + total_input = get_fw_ag_output()[0] + output = torch.matmul(total_input, weight.t()) + else: + world_size = get_tensor_model_parallel_world_size() + dim_size = list(input.size()) + dim_size[0] = dim_size[0] * world_size + + all_gather_buffer = get_global_memory_buffer().get_tensor(dim_size, input.dtype, "mpu") + torch.distributed._all_gather_base( + all_gather_buffer, input, group=get_tensor_model_parallel_group() + ) + total_input = all_gather_buffer + output = torch.matmul(total_input, weight.t()) + else: + total_input = input + output = torch.matmul(total_input, weight.t()) + + if bias is not None: + output = output + bias + return output + + @staticmethod + @custom_bwd + def backward(ctx, grad_output): + input, weight = ctx.saved_tensors + use_bias = ctx.use_bias + grad_output_buffer = ctx.grad_output_buffer + wgrad_deferral_limit = ctx.wgrad_deferral_limit + + wgrad_compute = True + if grad_output_buffer is not None: + if wgrad_deferral_limit == 0 or len(grad_output_buffer) < wgrad_deferral_limit: + grad_output_buffer.append(grad_output) + wgrad_compute = False + + if wgrad_compute: + if ctx.sequence_parallel: + world_size = get_tensor_model_parallel_world_size() + dim_size = list(input.size()) + dim_size[0] = dim_size[0] * world_size + + if ctx.pipe_experts: + all_gather_buffer = torch.empty(dim_size, dtype=input.dtype, device=torch.cuda.current_device()) + else: + all_gather_buffer = get_global_memory_buffer().get_tensor(dim_size, input.dtype, "mpu") + + handle = torch.distributed._all_gather_base( + all_gather_buffer, input, group=get_tensor_model_parallel_group(), async_op=True + ) + + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # gather is scheduled before the input gradient computation + total_input = all_gather_buffer + else: + total_input = input + grad_input = grad_output.matmul(weight) + + if ctx.sequence_parallel and wgrad_compute: + handle.wait() + + if wgrad_compute: + grad_output, total_input = prepare_input_tensors_for_wgrad_compute( + grad_output, total_input + ) + + if ctx.async_grad_allreduce: + # Asynchronous all-reduce + handle = torch.distributed.all_reduce( + grad_input, group=get_tensor_model_parallel_group(), async_op=True + ) + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # all-reduce is scheduled before the weight gradient computation + + if ctx.sequence_parallel: + assert not ctx.async_grad_allreduce + dim_size = list(input.size()) + sub_grad_input = torch.empty( + dim_size, dtype=input.dtype, device=torch.cuda.current_device(), requires_grad=False + ) + # reduce_scatter + handle = torch.distributed._reduce_scatter_base( + sub_grad_input, grad_input, group=get_tensor_model_parallel_group(), async_op=True + ) + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # reduce scatter is scheduled before the weight gradient computation + + if ctx.gradient_accumulation_fusion: + if wgrad_compute: + if weight.main_grad.dtype == torch.float32: + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp32( + total_input, grad_output, weight.main_grad + ) + elif weight.main_grad.dtype in (torch.float16, torch.bfloat16): + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp16( + total_input, grad_output, weight.main_grad + ) + else: + raise RuntimeError("Unsupported gradient type for gradient accumulation fusion") + + if hasattr(weight, 'grad_added_to_main_grad'): + # When overlap_grad_reduce is True, need to ensure that backward hooks + # are all run on the main backprop thread to prevent deadlocks. Setup + # dummy grad_weight tensor to prevent backward hooks from being run + # in a background thread. + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = None + else: + grad_weight = grad_output.t().matmul(total_input) + grad_bias = grad_output.sum(dim=0) if use_bias else None + from mindspeed.moe.pipe_experts import get_async_bw_all_gather_count + if ctx.pipe_experts and get_async_bw_all_gather_count() != 2: + grad_output.storage().resize_(0) + + if ctx.sequence_parallel: + handle.wait() + # Need to return None's as gradient has to flow for all the input arguments + # provided during forward + return sub_grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None + + if ctx.async_grad_allreduce: + handle.wait() + return grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None + + +class LinearWithGradAccumulationAndAsyncCommunication_nano(torch.autograd.Function): + """See linear_with_grad_accumulation_and_async_allreduce""" + + @staticmethod + @custom_fwd + def forward( + ctx, + input, + weight, + bias, + gradient_accumulation_fusion, + async_grad_allreduce, + wgrad_deferral_limit, + sequence_parallel, + pipe_experts, + is_nano_row, + is_nano_column, + ): + ctx.weight = weight + ctx.save_for_backward(input) + ctx.is_nano_row = is_nano_row + ctx.is_nano_column = is_nano_column + ctx.use_bias = bias is not None + ctx.gradient_accumulation_fusion = gradient_accumulation_fusion + ctx.async_grad_allreduce = async_grad_allreduce + ctx.wgrad_deferral_limit = wgrad_deferral_limit + ctx.sequence_parallel = sequence_parallel + ctx.pipe_experts = pipe_experts + global_args = get_args() + if is_nano_row: + total_input = input + if sequence_parallel: + if pipe_experts: + output = torch.matmul(total_input, weight.t()) + elif global_args.use_ascend_mc2: + from .ascend_turbo.ascend_turbo_cfg import ascend_turbo_cfg + rank = get_tensor_model_parallel_rank() + ascend_turbo_cfg.set_world_size(get_tensor_model_parallel_world_size) + world_size = ascend_turbo_cfg.get_world_size() + group = get_tensor_model_parallel_group() + hcomm_info = None + if torch.__version__ > "2.0": + global_rank = torch.distributed.get_global_rank(group, rank) + hcomm_info = group._get_backend(torch.device("npu")).get_hccl_comm_name(global_rank) + else: + hcomm_info = group.get_hccl_comm_name(rank) + + x = input.reshape(input.shape[0] * input.shape[1], input.shape[2]) + output = torch_npu.npu_mm_reduce_scatter_base( + x, weight.t(), hcomm_info, world_size, reduce_op="sum", bias=bias + ) + ctx.hcomm_info = hcomm_info + ctx.world_size = world_size + output = output.view( + output.shape[0] // input.shape[1], input.shape[1], output.shape[1] + ) + return output + else: + output = torch.matmul(total_input, weight.t()) + output = _reduce_scatter_along_first_dim(output) + else: + output = torch.matmul(total_input, weight.t()) + if bias is not None: + output = output + bias + return output + + if sequence_parallel: + if pipe_experts: + total_input = get_fw_ag_output()[0] + output = torch.matmul(total_input, weight.t()) + elif global_args.use_ascend_mc2: + from .ascend_turbo.ascend_turbo_cfg import ascend_turbo_cfg + group = get_tensor_model_parallel_group() + rank = get_tensor_model_parallel_rank() + ascend_turbo_cfg.set_world_size(get_tensor_model_parallel_world_size) + hcomm_info = None + if torch.__version__ > "2.0": + global_rank = torch.distributed.get_global_rank(group, rank) + hcomm_info = group._get_backend(torch.device('npu')).get_hccl_comm_name(global_rank) + else: + hcomm_info = group.get_hccl_comm_name(rank) + x = input.reshape(input.shape[0] * input.shape[1], input.shape[2]) + world_size = ascend_turbo_cfg.get_world_size() + output, _ = torch_npu.npu_all_gather_base_mm( + x, + weight.t(), + hcomm_info, + world_size, + bias=bias, + gather_index=0, + gather_output=(not ascend_turbo_cfg.all_gather_recomputation), + ) + output = output.view( + output.shape[0] // input.shape[1], input.shape[1], output.shape[1] + ) + else: + world_size = get_tensor_model_parallel_world_size() + dim_size = list(input.size()) + dim_size[0] = dim_size[0] * world_size + all_gather_buffer = get_global_memory_buffer().get_tensor(dim_size, input.dtype, "mpu") + torch.distributed._all_gather_base( + all_gather_buffer, input, group=get_tensor_model_parallel_group(), + ) + total_input = all_gather_buffer + output = torch.matmul(total_input, weight.t()) + else: + total_input = input + output = torch.matmul(total_input, weight.t()) + if bias is not None: + output = output + bias + return output + + @staticmethod + @custom_bwd + def backward(ctx, grad_output): + input = ctx.saved_tensors[0] + weight = ctx.weight + use_bias = ctx.use_bias + sequence_parallel = ctx.sequence_parallel + pipe_experts = ctx.pipe_experts + global_args = get_args() + grad_output_gathered = grad_output + grad_input = None + if ctx.is_nano_row: + if ctx.sequence_parallel: + if pipe_experts: + grad_input = grad_output.matmul(weight) + elif global_args.use_ascend_mc2: + hcomm_info = ctx.hcomm_info + world_size = ctx.world_size + grad_output_ = grad_output.reshape( + grad_output.shape[0] * grad_output.shape[1], grad_output.shape[2] + ) + grad_input, grad_output_gathered = torch_npu.npu_all_gather_base_mm( + grad_output_, weight, hcomm_info, world_size, bias=None, gather_index=0 + ) + + grad_input = grad_input.view_as(input) + else: + grad_output_gathered = _gather_along_first_dim(grad_output) + grad_input = grad_output_gathered.matmul(weight) + else: + grad_input = grad_output.matmul(weight) + + if WeightGradStore.is_decoupleBlock: + if pipe_experts and ctx.sequence_parallel: + WeightGradStore.put( + input.clone().detach(), + None, + weight, + sequence_parallel, + in_row=True, + pipe_experts=True + ) + else: + WeightGradStore.put( + input.clone().detach(), + grad_output.clone().detach(), + weight, + sequence_parallel, + in_row=True, + pipe_experts=False + ) + if hasattr(weight, 'grad_added_to_main_grad'): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = None + else: + total_input = input + grad_output = grad_output_gathered.contiguous() + # Convert the tensonr shapes to 2D for execution compatibility + if len(grad_output.shape) != 2: + grad_output = grad_output.view( + grad_output.shape[0] * grad_output.shape[1], grad_output.shape[2] + ) + total_input = total_input.view( + total_input.shape[0] * total_input.shape[1], total_input.shape[2] + ) + if ctx.gradient_accumulation_fusion: + if weight.main_grad.dtype == torch.float32: + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp32( + total_input, grad_output, weight.main_grad + ) + elif weight.main_grad.dtype in (torch.float16, torch.bfloat16): + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp16( + total_input, grad_output, weight.main_grad + ) + else: + raise RuntimeError("Unsupported gradient type for gradient accumulation fusion") + if hasattr(weight, 'grad_added_to_main_grad'): + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = grad_output.t().matmul(total_input) + grad_bias = grad_output.sum(dim=0) if use_bias else None + + return grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None + + if WeightGradStore.is_decoupleBlock: + WeightGradStore.put( + input.clone().detach(), + grad_output.clone().detach(), + weight, + ctx.sequence_parallel + ) + if hasattr(weight, 'grad_added_to_main_grad'): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = None + if not WeightGradStore.is_decoupleBlock: + if ctx.sequence_parallel: + world_size = get_tensor_model_parallel_world_size() + dim_size = list(input.size()) + dim_size[0] = dim_size[0] * world_size + + all_gather_buffer = get_global_memory_buffer().get_tensor(dim_size, input.dtype, "mpu") + handle = torch.distributed._all_gather_base( + all_gather_buffer, input, group=get_tensor_model_parallel_group(), async_op=True + ) + + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # gather is scheduled before the input gradient computation + total_input = all_gather_buffer + else: + total_input = input + grad_input = grad_output.matmul(weight) + + if not WeightGradStore.is_decoupleBlock: + if ctx.sequence_parallel: + handle.wait() + + # Doing gather + slicing during the NeMo forward pass can make this tensor + # not be contiguous. PyTorch only checks if the tensor is contiguous, and only + # clones it if it's not contiguous + + grad_output = grad_output.contiguous() + # Convert the tensor shape to 2D for execution compatibility + grad_output = grad_output.view( + grad_output.shape[0] * grad_output.shape[1], grad_output.shape[2] + ) + total_input = total_input.view( + total_input.shape[0] * total_input.shape[1], total_input.shape[2] + ) + + if ctx.async_grad_allreduce: + # Asynchronous all_reduce + handle = torch.distributed.all_reduce( + grad_input, group=get_tensor_model_parallel_group(), async_op=True + ) + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # all-reduce is scheduled before the weight gradient computation + + if ctx.sequence_parallel: + assert not ctx.async_grad_allreduce + dim_size = list(input.size()) + sub_grad_input = torch.empty( + dim_size, dtype=input.dtype, device=torch.cuda.current_device(), requires_grad=False + ) + # reduce_scatter + handle = torch.distributed._reduce_scatter_base( + sub_grad_input, grad_input, group=get_tensor_model_parallel_group(), async_op=True + ) + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # reduce_scatter is scheduled before the weight gradient computation + if not WeightGradStore.is_decoupleBlock: + if ctx.gradient_accumulation_fusion: + if weight.main_grad.dtype == torch.float32: + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp32( + total_input, grad_output, weight.main_grad + ) + elif weight.main_grad.dtype in (torch.float16, torch.bfloat16): + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp16( + total_input, grad_output, weight.main_grad + ) + else: + raise RuntimeError("Unsupported gradient type for gradient accumulation fusion") + + if hasattr(weight, 'grad_added_to_main_grad'): + # When overlap_grad_reduce is True, need to ensure that backward hooks + # are all run on the main backprop thread to prevent deadlocks. Setup + # dummy grad_weight tensor to prevent backward hooks from being run + # in a background thread. + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = None + else: + grad_weight = grad_output.t().matmul(total_input) + grad_bias = grad_output.sum(dim=0) if use_bias else None + + if ctx.sequence_parallel: + handle.wait() + return sub_grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None + + if ctx.async_grad_allreduce: + handle.wait() + return grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None + + +class LinearWithGradAccumulationAndAsyncCommunicationAmpipe(torch.autograd.Function): + """See linear_with_grad_accumulation_and_async_allreduce""" + + @staticmethod + @custom_fwd + def forward( + ctx, + input, + weight, + bias, + gradient_accumulation_fusion, + allreduce_dgrad, + sequence_parallel, + grad_output_buffer, + wgrad_deferral_limit, + ampipe_degree, + is_dense_h_to_3h + ): + ctx.save_for_backward(input, weight) + ctx.use_bias = bias is not None + ctx.gradient_accumulation_fusion = gradient_accumulation_fusion + ctx.allreduce_dgrad = allreduce_dgrad + ctx.sequence_parallel = sequence_parallel + ctx.wgrad_deferral_limit = wgrad_deferral_limit + ctx.grad_output_buffer = grad_output_buffer + ctx.ampipe_degree = ampipe_degree + ctx.is_dense_h_to_3h = is_dense_h_to_3h + global_args = get_args() + ampipe_tp_sp_comm_overlap = global_args.ampipe_tp_sp_comm_overlap + ctx.ampipe_tp_sp_comm_overlap = ampipe_tp_sp_comm_overlap + + if sequence_parallel: + if global_args.use_ascend_mc2 and ampipe_degree <= 1: + group = get_tensor_model_parallel_group() + world_size = get_tensor_model_parallel_world_size() + rank = torch.distributed.get_rank(group) + hcomm_info = None + if torch.__version__ > "2.0": + global_rank = torch.distributed.get_global_rank(group, rank) + hcomm_info = group._get_backend(torch.device("npu")).get_hccl_comm_name(global_rank) + else: + hcomm_info = group.get_hccl_comm_name(rank) + x = input.reshape(input.shape[0] * input.shape[1], input.shape[2]) + output, all_gather_grad_output = torch_npu.npu_all_gather_base_mm( + x, + weight.t(), + hcomm_info, + world_size, + bias=bias, + gather_index=0, + gather_output=False, + ) + output = output.view( + int(output.shape[0] / input.shape[1]), input.shape[1], output.shape[1] + ) + elif ampipe_degree > 1 and is_dense_h_to_3h: + input_list = input.chunk(ampipe_degree, dim=0) + output_list = [] + for i in range(ampipe_degree): + input_chunk = input_list[i] + world_size = get_tensor_model_parallel_world_size() + dim_size = list(input_chunk.size()) + dim_size[0] = dim_size[0] * world_size + + all_gather_buffer = torch.empty(dim_size, dtype=input_chunk.dtype, + device=torch.cuda.current_device()) + torch.distributed._all_gather_base( + all_gather_buffer, input_chunk, group=get_tensor_model_parallel_group() + ) + output_chunk = torch.matmul(all_gather_buffer, weight.t()) + output_list.append(output_chunk) + + output = torch.cat(output_list, dim=0) + elif ampipe_degree > 1 and not is_dense_h_to_3h and ampipe_tp_sp_comm_overlap: + total_input = get_fw_ag_output().pop(0) + output = torch.matmul(total_input, weight.t()) + if bias is not None: + output = output + bias + total_input.untyped_storage().resize_(0) + + else: + world_size = get_tensor_model_parallel_world_size() + dim_size = list(input.size()) + dim_size[0] = dim_size[0] * world_size + + all_gather_buffer = get_global_memory_buffer().get_tensor(dim_size, input.dtype, "mpu") + torch.distributed._all_gather_base( + all_gather_buffer, input, group=get_tensor_model_parallel_group() + ) + total_input = all_gather_buffer + output = torch.matmul(total_input, weight.t()) + else: + total_input = input + + output = torch.matmul(total_input, weight.t()) + if bias is not None: + output = output + bias + return output + + @staticmethod + @custom_bwd + def backward(ctx, grad_output): + input, weight = ctx.saved_tensors + use_bias = ctx.use_bias + grad_output_buffer = ctx.grad_output_buffer + wgrad_deferral_limit = ctx.wgrad_deferral_limit + + wgrad_compute = True + if grad_output_buffer is not None: + if wgrad_deferral_limit == 0 or len(grad_output_buffer) < wgrad_deferral_limit: + grad_output_buffer.append(grad_output) + wgrad_compute = False + + if wgrad_compute: + if ctx.sequence_parallel: + world_size = get_tensor_model_parallel_world_size() + dim_size = list(input.size()) + dim_size[0] = dim_size[0] * world_size + if ctx.ampipe_degree > 1 and ctx.is_dense_h_to_3h: + new_indices = get_slice_indices_from_disorder_to_order(dim_size[0], + ctx.ampipe_degree, + device=torch.cuda.current_device()) + grad_output = torch.index_select(grad_output, dim=0, index=new_indices) + + all_gather_buffer = get_global_memory_buffer().get_tensor( + dim_size, input.dtype, "mpu" + ) + handle = torch.distributed._all_gather_base( + all_gather_buffer, input, group=get_tensor_model_parallel_group(), async_op=True + ) + + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # gather is scheduled before the input gradient computation + total_input = all_gather_buffer + else: + total_input = input + grad_input = grad_output.matmul(weight) + + if ctx.sequence_parallel and wgrad_compute: + handle.wait() + + if wgrad_compute: + grad_output, total_input = prepare_input_tensors_for_wgrad_compute( + grad_output, total_input + ) + + if ctx.allreduce_dgrad: + # Asynchronous all-reduce + handle = torch.distributed.all_reduce( + grad_input, group=get_tensor_model_parallel_group(), async_op=True + ) + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # all-reduce is scheduled before the weight gradient computation + + if ctx.sequence_parallel: + assert not ctx.allreduce_dgrad + dim_size = list(input.size()) + sub_grad_input = torch.empty( + dim_size, dtype=input.dtype, device=torch.cuda.current_device(), requires_grad=False + ) + # reduce_scatter + handle = torch.distributed._reduce_scatter_base( + sub_grad_input, grad_input, group=get_tensor_model_parallel_group(), async_op=True + ) + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # reduce scatter is scheduled before the weight gradient computation + + if ctx.gradient_accumulation_fusion: + if wgrad_compute: + if weight.main_grad.dtype == torch.float32: + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp32( + total_input, grad_output, weight.main_grad + ) + elif weight.main_grad.dtype in (torch.float16, torch.bfloat16): + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp16( + total_input, grad_output, weight.main_grad + ) + else: + raise RuntimeError("Unsupported gradient type for gradient accumulation fusion") + + if hasattr(weight, 'grad_added_to_main_grad'): + # When overlap_grad_reduce is True, need to ensure that backward hooks + # are all run on the main backprop thread to prevent deadlocks. Setup + # dummy grad_weight tensor to prevent backward hooks from being run + # in a background thread. + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = None + else: + grad_weight = grad_output.t().matmul(total_input) + grad_bias = grad_output.sum(dim=0) if use_bias else None + + if ctx.sequence_parallel: + handle.wait() + # Need to return None's as gradient has to flow for all the input arguments + # provided during forward + return sub_grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None + + if ctx.allreduce_dgrad: + handle.wait() + + return grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None + + +def linear_with_grad_accumulation_and_async_allreduce_moe( + input: torch.Tensor, + weight: torch.Tensor, + bias: Optional[torch.Tensor], + gradient_accumulation_fusion: bool, + async_grad_allreduce: bool, + sequence_parallel: bool, + pipe_experts=False, + grad_output_buffer: Optional[List[torch.Tensor]] = None, + wgrad_deferral_limit: Optional[int] = 0, + allreduce_dgrad: bool = None, + matmul_id: int = 1, + is_nano_row: bool = False, + is_nano_column: bool = False, + ampipe_degree: int = 1, + is_dense_h_to_3h: bool = False, +) -> torch.Tensor: + """Linear layer execution with asynchronous communication and + gradient accumulation fusion in backprop. + + This has the option to accumulate the result of backprop + calculation into an existing gradient buffer, preventing the need + to do an additional addition kernel after the gradient + calculation. + + Additionally, the tensor parallel all reduce of the input + gradients can be done asynchronously with the calculation of + the weight gradients. + + In the case of sequence parallelism, the reduce scatter of the + input gradients is done asynchronously with the calcluation of the + weight gradients. + + Use of this module requires that the environment variable + CUDA_DEVICE_MAX_CONNECTIONS=1. There are a few collective + operations, noted in the code, that should be scheduled before + compute kernels to overlap the communication with the computation, + which is necessary for a speedup but not for correctness so that + ordering isn't imposed by the scheduler. Setting + CUDA_DEVICE_MAX_CONNECTIONS=1 forces the kernels to be scheduled + in the order they are called. + + Args: + + input (torch.Tensor required): input like torch.nn.functional.linear + + weight (torch.Tensor required): weight like torch.nn.functional.linear + + bias (torch.Tensor optional): bias like torch.nn.functional.linear + + gradient_accumulation_fusion (bool required): Perform the gradient + accumulation fusion, requires the custom CUDA extension + fused_weight_gradient_mlp_cuda module. To use + gradient_accumulation_fusion you must install APEX with + --cpp_ext and --cuda_ext. For example: "pip install + --global-option=\"--cpp_ext\" --global-option=\"--cuda_ext .\" + " Note that the extension requires CUDA>=11. Otherwise, you + must turn off gradient accumulation fusion." + + async_grad_allreduce (bool required): Do the allreduce of input + gradients asyncronously with the computation of weight + gradients. If sequence_parallel is True, this must be + False, as no all reduce is performed. + + sequence_parallel (bool required): Indicates that sequence + parallelism is used and thus in the forward pass the input is + all gathered, and the backward pass the input gradients are + reduce scattered. + + grad_output_buffer (List[torch.Tensor] optional): Buffer used to save + output gradients when embedding table wgrad compute is deferred. + Defaults to None. + """ + if allreduce_dgrad is None: + warnings.warn( + "async_grad_allreduce is deprecated and will be removed in a future release. use allreduce_dgrad instead." + ) + allreduce_dgrad = async_grad_allreduce + + args = [ + input, + weight, + bias, + gradient_accumulation_fusion, + allreduce_dgrad, + sequence_parallel, + grad_output_buffer, + wgrad_deferral_limit + ] + + if not linear_with_grad_accumulation_and_async_allreduce_moe.warned: + if os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS') != "1": + if sequence_parallel: + warnings.warn( + "When using sequence parallelism it is recommended to set the " + "environment variable CUDA_DEVICE_MAX_CONNECTIONS to 1 for " + "maximum speedup" + ) + linear_with_grad_accumulation_and_async_allreduce_moe.warned = True + + if allreduce_dgrad: + warnings.warn( + "When using async grad allreduce it is recommended to set the " + "environment variable CUDA_DEVICE_MAX_CONNECTIONS to 1 for " + "maximum speedup" + ) + linear_with_grad_accumulation_and_async_allreduce_moe.warned = True + + if get_args().use_nanopipe and parallel_state.get_pipeline_model_parallel_world_size() > 1 \ + and parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None: + if get_args().use_nanopipe and (is_nano_row or is_nano_column): + args = [ + input, + weight, + bias, + gradient_accumulation_fusion, + wgrad_deferral_limit, + async_grad_allreduce, + sequence_parallel, + pipe_experts, + is_nano_row, + is_nano_column + ] + return LinearWithGradAccumulationAndAsyncCommunication_nano.apply(*args) + if pipe_experts: + return LinearWithGradAccumulationAndAsyncCommunicationPipeExperts.apply(*args, pipe_experts, ampipe_degree) + if ampipe_degree > 1: + return LinearWithGradAccumulationAndAsyncCommunicationAmpipe.apply(*args, ampipe_degree, is_dense_h_to_3h) + + if get_args().use_nd_matmul: + args.append(pipe_experts) + args.append(matmul_id) + return LinearWithGradAccumulationAndAsyncCommunication_Nd.apply(*args) + + return LinearWithGradAccumulationAndAsyncCommunication.apply(*args) + + +linear_with_grad_accumulation_and_async_allreduce_moe.warned = False + + +def parallel_linear_init_wrapper(init_func): + @wraps(init_func) + def parallel_linear_init_func(self, *args, pipe_experts: bool = False, in_nano: bool = False, + ampipe_degree: int = 1, + is_dense_h_to_3h: bool = False, + **kwargs): + output = init_func(self, *args, **kwargs) + self.pipe_experts = pipe_experts + self.in_nano = in_nano + self.ampipe_degree = ampipe_degree + self.is_dense_h_to_3h = is_dense_h_to_3h + return output + return parallel_linear_init_func + + +def row_parallel_moe(self, input_): + """Forward of RowParallelLinear + + Args: + input_: 3D tensor whose order of dimension is [sequence, batch, hidden] + + Returns: + - output + - bias + """ + + if self.config._cpu_offloading_context is not None: + if self.config._cpu_offloading_context.inside_context == True: + assert ( + self.config.cpu_offloading == False + ), "CPU Offloading cannot be enabled while using non-TE modules" + + # Set up backprop all-reduce. + global_args = get_args() + if global_args.use_ascend_mc2 and not self.pipe_experts and not self.in_nano: + output = Mc2RowSeqParallelLinear.apply( + input_, self.weight, None, get_tensor_model_parallel_group() + ) + + if not self.skip_bias_add: + output = output + self.bias if self.bias is not None else output + output_bias = None + else: + output_bias = self.bias + + return output, output_bias + + if self.input_is_parallel: + input_parallel = input_ + else: + assert not self.sequence_parallel + input_parallel = scatter_to_tensor_model_parallel_region(input_) + # Matrix multiply. + if not self.weight.requires_grad: + self._forward_impl = linear_with_frozen_weight + else: + self._forward_impl = linear_with_grad_accumulation_and_async_allreduce + + if self.in_nano and self.sequence_parallel: + output_parallel = self._forward_impl( + input=input_parallel, + weight=self.weight, + bias=None, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + async_grad_allreduce=False, + sequence_parallel=True, + pipe_experts=self.pipe_experts, + is_nano_row=self.in_nano, + ) + output_ = output_parallel + elif self.ampipe_degree > 1: + output_parallel = self._forward_impl( + input=input_parallel, + weight=self.weight, + bias=None, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + async_grad_allreduce=False, + sequence_parallel=False, + ampipe_degree=self.ampipe_degree, + pipe_experts=self.pipe_experts + ) + ampipe_tp_sp_comm_overlap = get_args().ampipe_tp_sp_comm_overlap + if ampipe_tp_sp_comm_overlap or self.pipe_experts: + output_ = output_parallel + elif self.sequence_parallel: + output_ = reduce_scatter_to_sequence_parallel_region(output_parallel) + else: + output_ = reduce_from_tensor_model_parallel_region(output_parallel) + else: + output_parallel = self._forward_impl( + input=input_parallel, + weight=self.weight, + bias=None, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + async_grad_allreduce=False, + sequence_parallel=False, + pipe_experts=self.pipe_experts, + is_nano_row=self.in_nano, + ) + # All-reduce across all the partitions or self.pipe_experts + if self.explicit_expert_comm or self.pipe_experts: + assert self.skip_bias_add + output_ = output_parallel + elif self.sequence_parallel: + output_ = reduce_scatter_to_sequence_parallel_region(output_parallel) + else: + output_ = reduce_from_tensor_model_parallel_region(output_parallel) + if not self.skip_bias_add: + output = (output_ + self.bias) if self.bias is not None else output_ + output_bias = None + else: + output = output_ + output_bias = self.bias + return output, output_bias + + +def column_parallel_moe(self, input_: torch.Tensor, weight: Optional[torch.Tensor] = None): + """Forward of ColumnParallelLinear + + Args: + input_: 3D tensor whose order of dimension is [sequence, batch, hidden] + + weight (optional): weight tensor to use, compulsory when + skip_weight_param_allocation is True. + + Returns: + - output + - bias + + """ + if weight is None: + if self.weight is None: + raise RuntimeError( + "weight was not supplied to ColumnParallelLinear forward pass " + "and skip_weight_param_allocation is True." + ) + weight = self.weight + else: + # Check the weight passed in is the correct shape + expected_shape = (self.output_size_per_partition, self.input_size) + if weight.shape != expected_shape: + raise RuntimeError( + f"supplied weight's shape is {tuple(weight.shape)}, " + f"not {expected_shape} as expected" + ) + + if self.config._cpu_offloading_context is not None: + if self.config._cpu_offloading_context.inside_context == True: + assert ( + self.config.cpu_offloading == False + ), "CPU Offloading cannot be enabled while using non-TE modules" + + bias = self.bias if not self.skip_bias_add else None + + if ( + self.allreduce_dgrad + or self.sequence_parallel + or self.explicit_expert_comm + ): + input_parallel = input_ + else: + input_parallel = copy_to_tensor_model_parallel_region(input_) + + if self.config.defer_embedding_wgrad_compute: + if ( + self.config.wgrad_deferral_limit == 0 + or len(self.embedding_activation_buffer) < self.config.wgrad_deferral_limit + ): + self.embedding_activation_buffer.append(input_parallel) + + # Matrix multiply. + if not weight.requires_grad: + self._forward_impl = linear_with_frozen_weight + else: + self._forward_impl = linear_with_grad_accumulation_and_async_allreduce + + output_parallel = self._forward_impl( + input=input_parallel, + weight=weight, + bias=bias, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + async_grad_allreduce=False + if self.explicit_expert_comm + else self.allreduce_dgrad, + sequence_parallel=False if self.explicit_expert_comm else self.sequence_parallel, + grad_output_buffer=( + self.grad_output_buffer if self.config.defer_embedding_wgrad_compute else None + ), + wgrad_deferral_limit=( + self.config.wgrad_deferral_limit + if self.config.defer_embedding_wgrad_compute + else None + ), + pipe_experts=self.pipe_experts, + is_nano_column=self.in_nano, + ampipe_degree=self.ampipe_degree, + is_dense_h_to_3h=self.is_dense_h_to_3h + ) + if self.gather_output: + # All-gather across the partitions. + assert not self.sequence_parallel + output = gather_from_tensor_model_parallel_region(output_parallel) + else: + output = output_parallel + output_bias = self.bias if self.skip_bias_add else None + return output, output_bias + + +class Mc2RowSeqParallelLinear(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, weight, bias, group): + ctx.save_for_backward(input_, weight) + ctx.use_bias = bias is not None + + from .ascend_turbo.ascend_turbo_cfg import ascend_turbo_cfg + rank = get_tensor_model_parallel_rank() + ascend_turbo_cfg.set_world_size(get_tensor_model_parallel_world_size) + world_size = ascend_turbo_cfg.get_world_size() + hcomm_info = None + + if torch.__version__ > "2.0": + global_rank = torch.distributed.get_global_rank(group, rank) + hcomm_info = group._get_backend(torch.device("npu")).get_hccl_comm_name( + global_rank + ) + else: + hcomm_info = group.get_hccl_comm_name(rank) + + x = input_.reshape(input_.shape[0] * input_.shape[1], input_.shape[2]) + + output = torch_npu.npu_mm_reduce_scatter_base( + x, weight.t(), hcomm_info, world_size, reduce_op="sum", bias=bias + ) + + ctx.hcomm_info = hcomm_info + ctx.world_size = world_size + + output = output.view( + output.shape[0] // input_.shape[1], input_.shape[1], output.shape[1] + ) + + return output + + @staticmethod + def backward(ctx, grad_output): + input_, weight = ctx.saved_tensors + hcomm_info = ctx.hcomm_info + world_size = ctx.world_size + + grad_output_ = grad_output.reshape( + grad_output.shape[0] * grad_output.shape[1], grad_output.shape[2] + ) + + grad_input, all_gather_grad_output = torch_npu.npu_all_gather_base_mm( + grad_output_, weight, hcomm_info, world_size, bias=None, gather_index=0 + ) + grad_input = grad_input.view_as(input_) + + total_input = input_ + total_input = total_input.view( + total_input.shape[0] * total_input.shape[1], total_input.shape[2] + ) + grad_weight = all_gather_grad_output.t().matmul(total_input) + + is_grad_bias_needed = ctx.needs_input_grad[2] + if is_grad_bias_needed and ctx.use_bias: + grad_bias = ( + grad_output.sum(dim=0) + if grad_output.is_contiguous() + else grad_output.t().sum(dim=1) + + ) + else: + grad_bias = None + + return grad_input, grad_weight, grad_bias, None + + +def _initialize_affine_weight_cpu_2d(weight, partition_dim, stride=1, return_master_weight=False, *, + config: TransformerConfig): + """Initialize affine weight for model parallel when use tp-2d""" + set_tensor_model_parallel_attributes( + tensor=weight, is_parallel=True, dim=partition_dim, stride=stride + ) + + if partition_dim == 1: + row_num = TPYCollectiveComm.get_comm_group_world_size() + col_num = TPXCollectiveComm.get_comm_group_world_size() + else: + row_num = TPXCollectiveComm.get_comm_group_world_size() + col_num = TPYCollectiveComm.get_comm_group_world_size() + + # Initialize master weight + split_input_size, split_output_size = weight.size() + input_size = split_input_size * row_num + output_size = split_output_size * col_num + + master_weight = torch.empty(input_size, output_size, dtype=torch.float, requires_grad=False) + config.init_method(master_weight) + + master_weight = master_weight.to(dtype=config.params_dtype) + + x = TPXCollectiveComm.get_comm_rank() + y = TPYCollectiveComm.get_comm_rank() + + rows = torch.chunk(master_weight, row_num, dim=0) + if partition_dim == 1: + row_idx = y + col_idx = x + else: + row_idx = x + col_idx = y + + row = rows[row_idx] + cols = torch.chunk(row, col_num, dim=1) + final_weight = cols[col_idx].contiguous() + weight.data.copy_(final_weight) + + if return_master_weight: + return master_weight + + +def _initialize_affine_weight_cpu_nd( + weight, + output_size, + input_size, + input_size_per_partition, + output_size_per_partition, + init_method, + stride=1, + return_master_weight=False, + *, + params_dtype=torch.float32 +): + """Initialize affine weight for model parallel when use nd-matmul""" + set_tensor_model_parallel_attributes( + tensor=weight, is_parallel=True, dim=0, stride=stride + ) + + # Initialize master weight + master_weight = torch.empty(output_size, input_size, dtype=torch.float, requires_grad=False) + init_method(master_weight) + + master_weight = master_weight.to(dtype=params_dtype) + # Split and copy + rank = ps.get_tensor_model_parallel_rank() + world_size = ps.get_tensor_model_parallel_world_size() + + def compute_target_rank(rank, row_num, col_num): + return rank % row_num * col_num + rank // row_num + + # The weight positions of nd and megatron are different. So weight needs to be rearranged. + # This rearrangement is only to make the calculations of nd and megatron consistent. + # Even if this rearrangement is removed, it will not affect the correctness of nd calculation. + row_num = input_size // input_size_per_partition + col_num = output_size // output_size_per_partition + weight_list = torch.split(master_weight, master_weight.size()[0] // world_size, dim=0) + tensor_list = [weight_list[compute_target_rank(i, row_num, col_num)] for i in range(world_size)] + master_weight = torch.cat(tensor_list, dim=0) + + weight_list_1 = torch.split(master_weight, input_size_per_partition, dim=1) + weight_1 = weight_list_1[rank // col_num] + weight_list_2 = torch.split(weight_1, output_size_per_partition, dim=0) + my_weight_list = weight_list_2[rank % col_num:: world_size] + + with torch.no_grad(): + torch.cat(my_weight_list, dim=0, out=weight) + if return_master_weight: + return master_weight + return None + + +class LinearWithGradAccumulationAndAsyncCommunication_Nd(torch.autograd.Function): + + @staticmethod + def forward( + ctx, + input, + weight, + bias, + gradient_accumulation_fusion, + async_grad_allreduce, + wgrad_deferral_limit, + sequence_parallel, + grad_output_buffer, + pipe_experts, + matmul_id, + ): + if sequence_parallel: + raise AssertionError( + 'Nd_matmul cannot be used with sequence_parallel.' + 'If you want to train long sequences, ' + 'you can use ulysess or context_parallel that is compatible with nd_matmul.' + ) + ctx.use_bias = bias is not None + ctx.gradient_accumulation_fusion = gradient_accumulation_fusion + ctx.async_grad_allreduce = async_grad_allreduce + ctx.wgrad_deferral_limit = wgrad_deferral_limit + ctx.sequence_parallel = sequence_parallel + ctx.save_for_backward(input, weight) + + if matmul_id == 1: + world_size1 = get_tensor_model_parallel_world_size_for_nd1_dim1() + comm_group1 = get_tensor_model_parallel_group_for_nd1_dim1() + world_size2 = get_tensor_model_parallel_world_size_for_nd1_dim2() + comm_group2 = get_tensor_model_parallel_group_for_nd1_dim2() + else: + world_size1 = get_tensor_model_parallel_world_size_for_nd2_dim1() + comm_group1 = get_tensor_model_parallel_group_for_nd2_dim1() + world_size2 = get_tensor_model_parallel_world_size_for_nd2_dim2() + comm_group2 = get_tensor_model_parallel_group_for_nd2_dim2() + + ctx.world_size1 = world_size1 + ctx.comm_group1 = comm_group1 + ctx.world_size2 = world_size2 + ctx.comm_group2 = comm_group2 + + last_dim = input.dim() - 1 + total_input_list = [torch.empty_like(input) for _ in range(world_size1)] + torch.distributed.all_gather(total_input_list, input, group=comm_group1) + total_input = torch.cat(total_input_list, dim=last_dim) + + output_parallel = torch.matmul(total_input, weight.t()) + output_parallel = output_parallel.transpose(0, 2) + + dim_size = list(output_parallel.size()) + dim_size[0] //= world_size2 + output = torch.empty(dim_size, dtype=output_parallel.dtype, device=torch.cuda.current_device()) + torch.distributed._reduce_scatter_base( + output, output_parallel.contiguous(), group=comm_group2 + ) + output = output.transpose(0, 2).contiguous() + if bias is not None: + output = output + bias + return output + + @staticmethod + def backward(ctx, grad_output): + world_size1 = ctx.world_size1 + comm_group1 = ctx.comm_group1 + world_size2 = ctx.world_size2 + comm_group2 = ctx.comm_group2 + input, weight = ctx.saved_tensors + use_bias = ctx.use_bias + last_dim = grad_output.dim() - 1 + + grad_output_ag_list = [torch.empty_like(grad_output) for _ in range(world_size2)] + torch.distributed.all_gather(grad_output_ag_list, grad_output.contiguous(), group=comm_group2) + grad_output_ag = torch.cat(grad_output_ag_list, dim=last_dim) + + total_input_list = [torch.empty_like(input) for _ in range(world_size1)] + handle1 = torch.distributed.all_gather(total_input_list, input, group=comm_group1, async_op=True) + + grad_bias = grad_output_ag.view( + grad_output_ag.shape[0] * grad_output_ag.shape[1], grad_output_ag.shape[2] + ).sum(dim=0) if use_bias else None + + grad_input = grad_output_ag.matmul(weight) + + grad_input = grad_input.transpose(0, 2) + dim_size = list(grad_input.size()) + dim_size[0] = dim_size[0] // world_size1 + + handle1.wait() + total_input = torch.cat(total_input_list, dim=last_dim) + + grad_input_rs = torch.empty(dim_size, dtype=grad_input.dtype, device=torch.cuda.current_device()) + + handle2 = torch.distributed._reduce_scatter_base( + grad_input_rs, grad_input.contiguous(), group=comm_group1, async_op=True + ) + + grad_output_ag = grad_output_ag.view( + grad_output_ag.shape[0] * grad_output_ag.shape[1], grad_output_ag.shape[2] + ) + total_input = total_input.view( + total_input.shape[0] * total_input.shape[1], total_input.shape[2] + ) + + if ctx.gradient_accumulation_fusion: + if weight.main_grad.dtype == torch.float32: + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp32( + total_input, grad_output_ag, weight.main_grad + ) + elif weight.main_grad.dtype in (torch.float16, torch.bfloat16): + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp16( + total_input, grad_output_ag, weight.main_grad + ) + else: + raise RuntimeError("Unsupported gradient type for gradient accumulation fusion") + + if hasattr(weight, 'grad_added_to_main_grad'): + # When overlap_grad_reduce is True, need to ensure that backward hooks + # are all run on the main backprop thread to prevent deadlocks. Setup + # dummy grad_weight tensor to prevent backward hooks from being run + # in a background thread. + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = None + else: + grad_weight = grad_output_ag.t().matmul(total_input) + + handle2.wait() + grad_input_rs = grad_input_rs.transpose(0, 2).contiguous() + return grad_input_rs, grad_weight, grad_bias, None, None, None, None, None, None, None, None + + +class Nd_ParallelLinear(torch.nn.Module): + def __init__( + self, + input_size: int, + output_size: int, + *, + config: ModelParallelConfig, + init_method: Callable, + bias: bool, + input_is_parallel: bool, + skip_bias_add: bool, + stride: int = 1, + keep_master_weight_for_test: bool = False, + is_expert: bool = False, + tp_comm_buffer_name: str = None, # Not used + matmul_id: int = 1, + ): + """Nd_ParallelLinear is used to replace the columnParallelLinear and RowParallelLinear in Megatron TP. + + Args: + matmul_id: which GEMM operation within the attention or FFN block. + if matmul_id is 1 in attention, which represents GEMM for compute QKV. + """ + super(Nd_ParallelLinear, self).__init__() + + self.input_size = input_size + self.output_size = output_size + self.input_is_parallel = input_is_parallel + if matmul_id == 1: + self.world_size_dim1 = get_tensor_model_parallel_world_size_for_nd1_dim1() + self.world_size_dim2 = get_tensor_model_parallel_world_size_for_nd1_dim2() + else: + self.world_size_dim1 = get_tensor_model_parallel_world_size_for_nd2_dim1() + self.world_size_dim2 = get_tensor_model_parallel_world_size_for_nd2_dim2() + + self.matmul_id = matmul_id + self.input_size_per_partition = divide(input_size, self.world_size_dim2) + self.output_size_per_partition = divide(output_size, self.world_size_dim1) + + self.skip_bias_add = skip_bias_add + self.config = config + self.is_expert = is_expert + self.expert_parallel = config.expert_model_parallel_size > 1 + self.gradient_accumulation_fusion = config.gradient_accumulation_fusion + self.sequence_parallel = config.sequence_parallel + if self.sequence_parallel: + raise RuntimeError( + 'Nd_matmul cannot be used with sequence_parallel.' + 'If you want to train long sequences, ' + 'you can use ulysess or context_parallel that is compatible with nd_matmul.' + ) + + if config.use_cpu_initialization: + self.weight = torch.nn.Parameter( + torch.empty(self.output_size, self.input_size_per_partition, dtype=config.params_dtype) + ) + + if config.perform_initialization: + self.master_weight = _initialize_affine_weight_cpu_nd( + self.weight, + self.output_size, + self.input_size, + self.input_size_per_partition, + self.output_size_per_partition, + init_method, + stride=stride, + return_master_weight=keep_master_weight_for_test, + params_dtype=config.params_dtype + ) + else: + self.weight = torch.nn.Parameter( + torch.empty( + self.output_size_per_partition, + self.input_size_per_partition, + device=torch.cuda.current_device(), + dtype=config.params_dtype + ) + ) + if config.perform_initialization: + _initialize_affine_weight_gpu( + self.weight, + init_method, + partition_dim=1, + stride=stride, + expert_parallel=(self.is_expert and self.expert_parallel) + ) + + setattr(self.weight, 'allreduce', not (self.is_expert and self.expert_parallel)) + + if bias: + if config.use_cpu_initialization: + self.bias = torch.nn.Parameter( + torch.empty(self.output_size, dtype=config.params_dtype) + ) + else: + self.bias = torch.nn.Parameter( + torch.empty( + self.output_size, + device=torch.cuda.current_device(), + dtype=config.params_dtype + ) + ) + + if config.perform_initialization: + with torch.no_grad(): + self.bias.zero_() + + setattr(self.bias, 'allreduce', not (self.is_expert and self.expert_parallel)) + setattr(self.bias, 'sequence_parallel', self.sequence_parallel) + else: + self.register_parameter('bias', None) + + self.explicit_expert_comm = self.is_expert and ( + self.sequence_parallel or self.expert_parallel + ) + + def forward(self, input_): + self._forward_impl = linear_with_grad_accumulation_and_async_allreduce_moe + output_ = self._forward_impl( + input=input_, + weight=self.weight, + bias=None, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + async_grad_allreduce=False, + sequence_parallel=False, + matmul_id=self.matmul_id + ) + + if not self.skip_bias_add: + output = (output_ + self.bias) if self.bias is not None else output_ + output_bias = None + else: + output = output_ + output_bias = self.bias + + return output, output_bias diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/__init__.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/__init__.py new file mode 100644 index 000000000..66da6a3f4 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/__init__.py @@ -0,0 +1 @@ +# lcal coc (communication over computation) diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_all_reduce.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_all_reduce.py new file mode 100644 index 000000000..d15a2d575 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_all_reduce.py @@ -0,0 +1,73 @@ +from functools import reduce +import torch +import torch_npu + +from .min_comm_cfg import min_comm_config +from .matmul_soc_friendly import get_aligned_mm_inputs +from .rewrite_parallel_linears_all_reduce import RewriteColumnAllReduceFunction, RewriteRowAllReduceFunction +from .coc_utils import set_context, CommunicationType, COCParallel +from .coc_utils import reshape_to_2D, is_grad_needed, get_parallel_num, get_output_shape + + +class COCColumnAllReduceFunction(RewriteColumnAllReduceFunction): + @staticmethod + def backward(ctx, grad_output): + input_, weight = ctx.saved_tensors + is_grad_weight_needed, is_grad_bias_needed = is_grad_needed(ctx.needs_input_grad) + grad_input = grad_output.matmul(weight) + handle = torch.distributed.all_reduce(grad_input, group=min_comm_config.tp_group, async_op=True) + grad_weight, grad_bias = None, None + if is_grad_weight_needed: + grad_output = reshape_to_2D(grad_output) + grad_weight = grad_output.t().matmul(reshape_to_2D(input_)) + handle.wait() + grad_bias = grad_output.sum(dim=0) if ctx.use_bias and is_grad_bias_needed else None + else: + handle.wait() + + return grad_input, grad_weight, grad_bias + + +class COCRowAllReduceFunction(RewriteRowAllReduceFunction): + @staticmethod + def forward(ctx, input_, weight, bias): + set_context(ctx, input_, weight, bias) + trans_weight = weight.t() + + parallel_num = get_parallel_num(m=reduce(lambda x, y: x * y, input_.shape[:-1]), + k=trans_weight.shape[0], + n=trans_weight.shape[1]) + if parallel_num == 1: + return RewriteRowAllReduceFunction.forward(ctx, input_, weight, bias) + + output_orig_shape = get_output_shape(input_, trans_weight, 1, is_gather=True) + input_ = reshape_to_2D(input_) + + if min_comm_config.matmul_soc_friendly_enabled: + input_, trans_weight = get_aligned_mm_inputs(input_, trans_weight, sp_coef=min_comm_config.tp_world_size, + parallel_num=parallel_num) + + def compute_fcn(input_tensor, output_tensor): + torch.matmul(input_tensor, trans_weight, out=output_tensor) + return output_tensor + + coc_all_gather = COCParallel(input_, CommunicationType.ALL_REDUCE, compute_fcn, compute_first=True, + weight_shape_list=list(trans_weight.shape)) + output_ = coc_all_gather.run() + output_ = output_.reshape(output_orig_shape) + if bias is not None: + output_ = output_ + bias + return output_ + + @staticmethod + def backward(ctx, grad_output): + total_input, weight = ctx.saved_tensors + is_grad_weight_needed, is_grad_bias_needed = is_grad_needed(ctx.needs_input_grad) + grad_input = grad_output.matmul(weight) + grad_weight, grad_bias = None, None + if is_grad_weight_needed: + grad_output = reshape_to_2D(grad_output) + grad_weight = grad_output.t().matmul(reshape_to_2D(total_input)) + grad_bias = grad_output.sum(dim=0) if ctx.use_bias and is_grad_bias_needed else None + + return grad_input, grad_weight, grad_bias diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_all_reduce_fused.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_all_reduce_fused.py new file mode 100644 index 000000000..3520f363d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_all_reduce_fused.py @@ -0,0 +1,43 @@ +from functools import reduce + +from .coc_parallel_linears_all_reduce import COCRowAllReduceFunction +from .rewrite_parallel_linears_all_reduce import RewriteRowAllReduceFunction +from .coc_utils import get_parallel_num, get_output_shape, allocate_for_output +from .coc_utils import set_context, allocate_for_output, reshape_to_2D, is_grad_needed + + +class FusedCOCRowAllReduceFunction(RewriteRowAllReduceFunction): + @staticmethod + def forward(ctx, input_, weight, bias): + from mindspeed.ops.lcal_functional import coc_ops + + set_context(ctx, input_, weight, bias) + parallel_num = get_parallel_num(m=reduce(lambda x, y: x * y, input_.shape[:-1]), + k=weight.shape[1], + n=weight.shape[0]) + if parallel_num == 1: + return RewriteRowAllReduceFunction.forward(ctx, input_, weight, bias) + elif parallel_num in [2, 4, 8]: + return COCRowAllReduceFunction.forward(ctx, input_, weight, bias) + + output_orig_shape = get_output_shape(input_, weight.t(), 1, is_gather=True) + input_ = reshape_to_2D(input_) + output_ = allocate_for_output(input_, weight.t(), 1, is_gather=True) + + coc_ops.matmul_all_reduce(input_, weight, output_, bias) + output_ = output_.reshape(output_orig_shape) + + return output_ + + @staticmethod + def backward(ctx, grad_output): + total_input, weight = ctx.saved_tensors + is_grad_weight_needed, is_grad_bias_needed = is_grad_needed(ctx.needs_input_grad) + grad_input = grad_output.matmul(weight) + grad_weight, grad_bias = None, None + if is_grad_weight_needed: + grad_output = reshape_to_2D(grad_output) + grad_weight = grad_output.t().matmul(reshape_to_2D(total_input)) + grad_bias = grad_output.sum(dim=0) if ctx.use_bias and is_grad_bias_needed else None + + return grad_input, grad_weight, grad_bias diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_sequence_parallel.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_sequence_parallel.py new file mode 100644 index 000000000..add98553e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_sequence_parallel.py @@ -0,0 +1,242 @@ +from functools import reduce +import torch +import torch_npu + +from megatron.training import get_args +from mindspeed.ops.npu_matmul_add import npu_matmul_add_fp32, npu_matmul_add_fp16 +from .min_comm_cfg import min_comm_config +from .matmul_soc_friendly import get_aligned_mm_inputs +from .coc_utils import CommunicationType, COCParallel, get_output_shape +from .coc_utils import shuffle_as_coc_reduce_scatter, shuffle_as_coc_all_gather +from .coc_utils import set_context, reshape_to_2D, async_gather_along_first_dim, is_grad_needed, get_parallel_num +from .rewrite_parallel_linears_sequence_parallel import RewriteColumnSeqParallelFunction, RewriteRowSeqParallelFunction + +ALIGN_SIZE = 512 + + +class COCColumnSeqParallelFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, weight, bias): + ctx.save_for_backward(input_) + ctx.use_bias = bias is not None + ctx.weight = weight + trans_weight = weight.t() + + parallel_num = get_parallel_num(m=reduce(lambda x, y: x * y, input_.shape[:-1]) * min_comm_config.tp_world_size, + k=trans_weight.shape[0], + n=trans_weight.shape[1]) + if parallel_num == 1: + return RewriteColumnSeqParallelFunction.forward(ctx, input_, weight, bias) + + output_orig_shape = get_output_shape(input_, trans_weight, min_comm_config.tp_world_size, is_gather=True) + gathered_input_shape = get_output_shape(input_, None, min_comm_config.tp_world_size, is_gather=True) + input_ = reshape_to_2D(input_) + + if min_comm_config.matmul_soc_friendly_enabled: + input_, trans_weight = get_aligned_mm_inputs(input_, trans_weight, sp_coef=min_comm_config.tp_world_size, + parallel_num=parallel_num) + + def compute_fcn(input_tensor, output_tensor): + torch.matmul(input_tensor, trans_weight, out=output_tensor) + return output_tensor + + coc_parallel = COCParallel(input_, CommunicationType.ALL_GATHER, compute_fcn, compute_first=False, + weight_shape_list=list(trans_weight.shape), parallel_num=parallel_num) + output = coc_parallel.run() + output = shuffle_as_coc_reduce_scatter(output, min_comm_config.tp_world_size, parallel_num) + if not min_comm_config.all_gather_recomputation_enabled: + total_input = shuffle_as_coc_reduce_scatter(coc_parallel.comm_output, min_comm_config.tp_world_size, + parallel_num) + ctx.total_input = total_input.reshape(gathered_input_shape) + output = output.reshape(output_orig_shape) + if bias is not None: + output = output + bias + return output + + @staticmethod + def backward(ctx, grad_output): + input_ = ctx.saved_tensors[0] + weight = ctx.weight + grad_input_orig_shape = get_output_shape(grad_output, weight, 1, is_gather=True) + grad_output = reshape_to_2D(grad_output) + + is_grad_weight_needed, is_grad_bias_needed = is_grad_needed(ctx.needs_input_grad) + total_input_work, total_input = None, None + + if is_grad_weight_needed: + if min_comm_config.all_gather_recomputation_enabled: + total_input_work, total_input = async_gather_along_first_dim(input_, min_comm_config.tp_group, + min_comm_config.tp_world_size) + else: + total_input = ctx.total_input + + # if grad_output.shape[-1] is not 512B aligned, transpose its memory alignment but keep its shape + if grad_output.is_contiguous() and (grad_output.shape[-1] * grad_output.element_size()) % ALIGN_SIZE > 0: + grad_output = grad_output.t().contiguous().t() + grad_input = grad_output.matmul(weight) + grad_input = grad_input.reshape(grad_input_orig_shape) + sub_grad_input = torch.empty(list(input_.size()), dtype=input_.dtype, device=torch.cuda.current_device()) + sub_grad_input_work = torch.distributed._reduce_scatter_base(sub_grad_input, grad_input, + group=min_comm_config.tp_group, async_op=True) + grad_weight, grad_bias = None, None + if is_grad_weight_needed: + if min_comm_config.all_gather_recomputation_enabled: + total_input_work.wait() + total_input = reshape_to_2D(total_input) + if get_args().gradient_accumulation_fusion: + if weight.main_grad.dtype == torch.float32: + npu_matmul_add_fp32( + total_input, grad_output, weight.main_grad + ) + elif weight.main_grad.dtype in (torch.float16, torch.bfloat16): + npu_matmul_add_fp16( + total_input, grad_output, weight.main_grad + ) + else: + raise RuntimeError("Unsupported gradient type for gradient accumulation fusion") + + if hasattr(weight, 'grad_added_to_main_grad'): + # When overlap_grad_reduce is True, need to ensure that backward hooks + # are all run on the main backprop thread to prevent deadlocks. Setup + # dummy grad_weight tensor to prevent backward hooks from being run + # in a background thread. + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=total_input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=total_input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = None + else: + grad_weight = grad_output.t().matmul(total_input) + sub_grad_input_work.wait() + if is_grad_bias_needed and ctx.use_bias: + grad_bias = grad_output.sum(dim=0) if grad_output.is_contiguous() else grad_output.t().sum(dim=1) + else: + sub_grad_input_work.wait() + return sub_grad_input, grad_weight, grad_bias + + +class COCRowSeqParallelFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, weight, bias): + ctx.save_for_backward(input_) + ctx.use_bias = bias is not None + ctx.weight = weight + ctx.world_size = min_comm_config.tp_world_size + trans_weight = weight.t() + + parallel_num = get_parallel_num(m=reduce(lambda x, y: x * y, input_.shape[:-1]), + k=trans_weight.shape[0], + n=trans_weight.shape[1]) + if parallel_num == 1: + return RewriteRowSeqParallelFunction.forward(ctx, input_, weight, bias) + + output_orig_shape = get_output_shape(input_, trans_weight, min_comm_config.tp_world_size, is_gather=False) + input_ = reshape_to_2D(input_) + + if min_comm_config.matmul_soc_friendly_enabled: + input_, trans_weight = get_aligned_mm_inputs(input_, trans_weight, parallel_num=parallel_num) + + def compute_fcn(input_tensor): + sub_output = torch.matmul(input_tensor, trans_weight) + return sub_output + + input_ = shuffle_as_coc_all_gather(input_, ctx.world_size, parallel_num) + coc_reduce_scatter = COCParallel(input_, CommunicationType.REDUCE_SCATTER, compute_fcn, compute_first=True, + weight_shape_list=list(trans_weight.shape), parallel_num=parallel_num) + output_ = coc_reduce_scatter.run() + output_ = output_.reshape(output_orig_shape) + if bias is not None: + output_ = output_ + bias + return output_ + + @staticmethod + def backward(ctx, grad_output): + total_input = ctx.saved_tensors[0] + weight = ctx.weight + + parallel_num = get_parallel_num( + m=reduce(lambda x, y: x * y, grad_output.shape[:-1]) * min_comm_config.tp_world_size, + k=weight.shape[0], + n=weight.shape[1] + ) + if parallel_num == 1: + return RewriteRowSeqParallelFunction.backward(ctx, grad_output) + + grad_input_orig_shape = get_output_shape(grad_output, weight, min_comm_config.tp_world_size, is_gather=True) + grad_output = reshape_to_2D(grad_output) + + if min_comm_config.matmul_soc_friendly_enabled: + grad_output, weight = get_aligned_mm_inputs(grad_output, weight, sp_coef=min_comm_config.tp_world_size, + parallel_num=parallel_num) + + def compute_fcn(input_tensor, output_tensor): + torch.matmul(input_tensor, weight, out=output_tensor) + return output_tensor + + is_grad_weight_needed, is_grad_bias_needed = is_grad_needed(ctx.needs_input_grad) + + coc_all_gather = COCParallel(grad_output, CommunicationType.ALL_GATHER, compute_fcn, compute_first=False, + weight_shape_list=list(weight.shape), parallel_num=parallel_num) + grad_input = coc_all_gather.run() + grad_input = shuffle_as_coc_reduce_scatter(grad_input, ctx.world_size, parallel_num) + + grad_input = grad_input.reshape(grad_input_orig_shape) + + grad_weight, grad_bias = None, None + + if is_grad_weight_needed: + grad_output = coc_all_gather.comm_output + grad_output = shuffle_as_coc_reduce_scatter(grad_output, ctx.world_size, parallel_num) + total_input = reshape_to_2D(total_input) + if get_args().gradient_accumulation_fusion: + if weight.main_grad.dtype == torch.float32: + npu_matmul_add_fp32( + total_input, grad_output, weight.main_grad + ) + elif weight.main_grad.dtype in (torch.float16, torch.bfloat16): + npu_matmul_add_fp16( + total_input, grad_output, weight.main_grad + ) + else: + raise RuntimeError("Unsupported gradient type for gradient accumulation fusion") + + if hasattr(weight, 'grad_added_to_main_grad'): + # When overlap_grad_reduce is True, need to ensure that backward hooks + # are all run on the main backprop thread to prevent deadlocks. Setup + # dummy grad_weight tensor to prevent backward hooks from being run + # in a background thread. + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=total_input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=total_input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = None + else: + grad_weight = grad_output.t().matmul(total_input) + if is_grad_bias_needed and ctx.use_bias: + grad_bias = grad_output.sum(dim=0) if grad_output.is_contiguous() else grad_output.t().sum(dim=1) + + return grad_input, grad_weight, grad_bias diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_sequence_parallel_fused.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_sequence_parallel_fused.py new file mode 100644 index 000000000..6459ccded --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_parallel_linears_sequence_parallel_fused.py @@ -0,0 +1,153 @@ +from functools import reduce +import torch +import torch_npu + +from .min_comm_cfg import min_comm_config +from .coc_utils import get_parallel_num, set_context, is_grad_needed, check_equal +from .coc_utils import async_gather_along_first_dim, reshape_to_2D, allocate_for_output +from .coc_parallel_linears_sequence_parallel import COCColumnSeqParallelFunction, COCRowSeqParallelFunction +from .rewrite_parallel_linears_sequence_parallel import RewriteColumnSeqParallelFunction, RewriteRowSeqParallelFunction + +ALIGN_SIZE = 512 + + +class FusedCOCColumnSeqParallelFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, weight, bias): + from mindspeed.ops.lcal_functional import coc_ops + + set_context(ctx, input_, weight, bias) + + parallel_num = get_parallel_num(reduce(lambda x, y: x * y, input_.shape[:-1]) * min_comm_config.tp_world_size, + weight.shape[1], weight.shape[0], default_parallel_num=-1) + if parallel_num == 1: + return RewriteColumnSeqParallelFunction.forward(ctx, input_, weight, bias) + elif parallel_num in [2, 4, 8]: + return COCColumnSeqParallelFunction.forward(ctx, input_, weight, bias) + + output_shape = list(input_.shape)[:-1] + list([weight.shape[0]]) + output_shape[0] = output_shape[0] * min_comm_config.tp_world_size + input_ = reshape_to_2D(input_) + + output = allocate_for_output(input1=input_, input2=weight.t(), + tp_world_size=min_comm_config.tp_world_size, is_gather=True) + + coc_ops.all_gather_matmul(input_, weight, output, bias) + output = output.reshape(output_shape) + + return output + + @staticmethod + def backward(ctx, grad_output): + from mindspeed.ops.lcal_functional import coc_ops + + input_, weight = ctx.saved_tensors + check_equal(grad_output.shape[0] % min_comm_config.tp_world_size, 0, + error_info="m size must be multiple of world size") + sub_grad_input_shape = [grad_output.shape[0] // min_comm_config.tp_world_size] + \ + list(grad_output.shape[1:-1]) + [weight.shape[-1]] + # manually make sure grad_output is 2D and its memory inner axis is 512B aligned + grad_output = reshape_to_2D(grad_output) + if grad_output.is_contiguous() and (grad_output.shape[-1] * grad_output.element_size()) % ALIGN_SIZE > 0: + grad_output = grad_output.t().contiguous().t() + sub_grad_input = allocate_for_output(input1=reshape_to_2D(input_)) + is_grad_weight_needed, is_grad_bias_needed = is_grad_needed(ctx.needs_input_grad) + grad_weight, grad_bias = None, None + + if is_grad_weight_needed: + if min_comm_config.all_gather_recomputation_enabled: + total_input_work, total_input = async_gather_along_first_dim(input_, min_comm_config.tp_group, + min_comm_config.tp_world_size) + else: + total_input = ctx.total_input + total_input = reshape_to_2D(total_input) + + if min_comm_config.enable_coc_in_column_backward: + coc_ops.matmul_reduce_scatter(grad_output, weight, sub_grad_input, bias=None) + else: + grad_input = grad_output.matmul(weight) + sub_grad_input_work = torch.distributed._reduce_scatter_base(sub_grad_input, grad_input, + group=min_comm_config.tp_group, + async_op=True) + + if min_comm_config.all_gather_recomputation_enabled: + total_input_work.wait() + + grad_weight = grad_output.t().matmul(total_input) + if is_grad_bias_needed and ctx.use_bias: + grad_bias = grad_output.sum(dim=0) if grad_output.is_contiguous() else grad_output.t().sum(dim=1) + + if not min_comm_config.enable_coc_in_column_backward: + sub_grad_input_work.wait() + + else: + grad_input = grad_output.matmul(weight) + torch.distributed._reduce_scatter_base(sub_grad_input, grad_input, group=min_comm_config.tp_group) + + sub_grad_input = sub_grad_input.reshape(sub_grad_input_shape) + return sub_grad_input, grad_weight, grad_bias + + +class FusedCOCRowSeqParallelFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, weight, bias): + from mindspeed.ops.lcal_functional import coc_ops + + set_context(ctx, input_, weight, bias) + ctx.world_size = min_comm_config.tp_world_size + + parallel_num = get_parallel_num(reduce(lambda x, y: x * y, input_.shape[:-1]), weight.shape[1], + weight.shape[0], default_parallel_num=-1) + if parallel_num == 1: + return RewriteRowSeqParallelFunction.forward(ctx, input_, weight, bias) + elif parallel_num in [2, 4, 8]: + return COCRowSeqParallelFunction.forward(ctx, input_, weight, bias) + + output_shape = list(input_.shape)[:-1] + list([weight.shape[0]]) + output_shape[0] = output_shape[0] // min_comm_config.tp_world_size + input_ = reshape_to_2D(input_) + + output = allocate_for_output(input_, weight.t(), min_comm_config.tp_world_size, is_gather=False) + coc_ops.matmul_reduce_scatter(input_, weight, output, bias) + output = output.reshape(output_shape) + + return output + + @staticmethod + def backward(ctx, grad_output): + from mindspeed.ops.lcal_functional import coc_ops + + total_input, weight = ctx.saved_tensors + + parallel_num = get_parallel_num( + reduce(lambda x, y: x * y, grad_output.shape[:-1]) * min_comm_config.tp_world_size, + weight.shape[0], weight.shape[1], default_parallel_num=-1 + ) + if parallel_num == 1: + return RewriteRowSeqParallelFunction.backward(ctx, grad_output) + elif parallel_num in [2, 4, 8]: + return COCRowSeqParallelFunction.backward(ctx, grad_output) + + grad_input_shape = list(grad_output.shape)[:-1] + list([weight.shape[-1]]) + grad_input_shape[0] = grad_input_shape[0] * min_comm_config.tp_world_size + grad_output = reshape_to_2D(grad_output) + total_input = reshape_to_2D(total_input) + grad_input = allocate_for_output(grad_output, weight, min_comm_config.tp_world_size, is_gather=True) + + is_grad_weight_needed, is_grad_bias_needed = is_grad_needed(ctx.needs_input_grad) + grad_weight, grad_bias = None, None + + if is_grad_weight_needed: + gathered_grad_output = allocate_for_output(grad_output, tp_world_size=min_comm_config.tp_world_size, + is_gather=True) + coc_ops.all_gather_matmul_v2(grad_output, weight, grad_input, gathered_grad_output, bias=None) + + grad_weight = gathered_grad_output.t().matmul(total_input) + if is_grad_bias_needed and ctx.use_bias: + grad_bias = gathered_grad_output.sum(dim=0) if gathered_grad_output.is_contiguous() \ + else gathered_grad_output.t().sum(dim=1) + else: + coc_ops.all_gather_matmul(grad_output, weight, grad_input, bias=None) + + grad_input = grad_input.reshape(grad_input_shape) + return grad_input, grad_weight, grad_bias diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_utils.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_utils.py new file mode 100644 index 000000000..2d68f8ffe --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/coc_utils.py @@ -0,0 +1,245 @@ +from enum import Enum +import torch + +from .min_comm_cfg import min_comm_config + + +def check_equal(a, b, error_info): + if a != b: + if torch.npu.current_device() == 0: + print(error_info) + + +def print_tensor_value(name, value, device_id=0): + if min_comm_config.print_tensor_value_enabled and torch.npu.current_device() == device_id: + n = min_comm_config.parallel_num * min_comm_config.tp_world_size + per = value.shape[0] // n + slices = [] + for k in range(n): + v = torch.flatten(value[k * per: (k + 1) * per]) + slices.append(v[:5]) + print(f"{name}, shape={value.shape}, value=\n{torch.cat(tuple(slices)).view(n, -1)}", flush=True) + + +def set_context(ctx, input_, weight, bias): + ctx.save_for_backward(input_, weight) + ctx.use_bias = bias is not None + + +def infer_matmul_out_shape(shape_a, shape_b): + shape_a[-1] = shape_b[-1] + return shape_a + + +def reshape_to_2D(input_tensor): + # Convert the tensor shapes to 2D for execution compatibility + input_tensor = input_tensor.reshape(input_tensor.shape[0] * input_tensor.shape[1], + input_tensor.shape[2]) + return input_tensor + + +def async_gather_along_first_dim(input_, group, world_size): + dim_size = list(input_.size()) + dim_size[0] = dim_size[0] * world_size + output_ = torch.empty(dim_size, dtype=input_.dtype, device=torch.npu.current_device(), requires_grad=False) + work = torch.distributed._all_gather_base(output_, input_.contiguous(), group=group, async_op=True) + return work, output_ + + +def shuffle_as_coc_reduce_scatter(input_, world_size, parallel_num): + per = input_.shape[0] // parallel_num // world_size + input_shape = list(input_.shape) + reshape_tensor = torch.reshape(input_, [parallel_num, world_size, per] + input_shape[1:]) + return torch.reshape(reshape_tensor.transpose(0, 1), tuple(input_shape)) + + +def shuffle_as_coc_all_gather(input_, world_size, parallel_num): + per = input_.shape[0] // parallel_num // world_size + input_shape = list(input_.shape) + reshape_tensor = torch.reshape(input_, [world_size, parallel_num, per] + input_shape[1:]) + return torch.reshape(reshape_tensor.transpose(0, 1), tuple(input_shape)) + + +def is_grad_needed(needs_input_grad): + is_grad_input_needed, is_grad_weight_needed, is_grad_bias_needed = needs_input_grad + if not is_grad_input_needed: + raise RuntimeError("To use COC, grad_input is necessary to compute. Check if optimizer update is turned off by \ + mistake.") + if not is_grad_weight_needed and is_grad_bias_needed: + raise RuntimeError("To use COC, grad_weight must be needed if grad_bias is required.") + return is_grad_weight_needed, is_grad_bias_needed + + +def get_parallel_num(m, k, n, default_parallel_num=min_comm_config.parallel_num): + parallel_num = default_parallel_num + shape_str = str([m, k, n]) + if len(min_comm_config.customized_coc_dict) > 0 and str(shape_str) in min_comm_config.customized_coc_dict.keys(): + parallel_num = min_comm_config.customized_coc_dict.get(shape_str) + if not min_comm_config.coc_fused_kernel and m < parallel_num: + return 1 + if parallel_num not in [-1, 1, 2, 4, 8]: + raise RuntimeError("invalid parallel num, only support integer from 1, 2, 4 or 8.") + return parallel_num + + +def get_output_shape(input1, input2=None, tp_world_size=1, is_gather=True): + check_equal(input1.dim() >= 2 and (input2 is None or input2.dim() == 2), True, + error_info="invalid matmul input shape for CoC") + output_shape = list(input1.shape)[:-1] + list([input2.shape[-1]]) if input2 is not None else list(input1.shape) + if not is_gather: + check_equal(output_shape[0] % tp_world_size == 0 and output_shape[0] >= tp_world_size, True, + error_info="invalid matmul m shape for CoC") + output_shape[0] = output_shape[0] * tp_world_size if is_gather else output_shape[0] // tp_world_size + return output_shape + + +# input1 is required to be 2-dimensional here. +def allocate_for_output(input1, input2=None, tp_world_size=1, is_gather=True): + if input2 is not None: + dim_size = list(input1.shape)[:-1] + list([input2.shape[1]]) + else: + dim_size = list(input1.shape) + dim_size[0] = dim_size[0] * tp_world_size if is_gather else dim_size[0] // tp_world_size + output = torch.empty(dim_size, dtype=input1.dtype, device=torch.npu.current_device()) + return output + + +class CommunicationType(Enum): + ALL_GATHER = 0 + ALL_REDUCE = 1 + REDUCE_SCATTER = 2 + + +class COCParallel: + def __init__(self, input_data, comm_type, compute_fcn, compute_first=True, synchronize=True, weight_shape_list=None, + parallel_num=min_comm_config.parallel_num): + self.input_data = input_data + self.split_num = parallel_num + self.synchronize = synchronize + self.comm_type = comm_type + self.compute_fcn = compute_fcn + self.compute_first = compute_first + self.works = [] + self.group = min_comm_config.tp_group + self.world_size = min_comm_config.tp_world_size + self.input_slice = input_data.shape[0] // self.split_num + self.init_output_space(input_data, weight_shape_list, compute_first) + + def init_output_space(self, input_data, weight_shape_list, compute_first): + if weight_shape_list is None: + self.compute_output_shape_slice = list(input_data.shape) + else: + check_equal(input_data.shape[-1], weight_shape_list[0], error_info="In COCParallel, input_data should be of \ + shape [m,k] and weight_shape_list should be [k,n]") + self.compute_output_shape_slice = infer_matmul_out_shape(list(input_data.shape), weight_shape_list) + self.output = self.allocate_output_memory() + self.output_slice = self.output.shape[0] // self.split_num + if compute_first: + self.comm_output = self.output + else: + self.comm_output = self.allocate_communicate_memory_for_communicate_first() + self.comm_slice = self.comm_output.shape[0] // self.split_num + + def get_dim_size_after_comm(self, dim_size): + if self.comm_type == CommunicationType.ALL_GATHER: + dim_size[0] = dim_size[0] * self.world_size + elif self.comm_type == CommunicationType.REDUCE_SCATTER: + dim_size[0] = dim_size[0] // self.world_size + elif self.comm_type == CommunicationType.ALL_REDUCE: + pass + else: + raise ValueError("Invalid comm_type.") + return dim_size + + def allocate_output_memory(self): + # No matter compute first or communicate first, the output shape remains the same + output_dim_size = self.get_dim_size_after_comm(self.compute_output_shape_slice) + output_ = torch.empty(output_dim_size, dtype=self.input_data.dtype, + device=torch.npu.current_device(), requires_grad=False) + return output_ + + def allocate_communicate_memory_for_communicate_first(self): + dim_size = list(self.input_data.shape) + dim_size = self.get_dim_size_after_comm(dim_size) + comm_output = torch.empty(dim_size, dtype=self.input_data.dtype, + device=torch.npu.current_device(), requires_grad=False) + return comm_output + + def run_synchronize(self): + for work in self.works: + work.wait() + return self.comm_output + + def run(self): + if self.compute_first: + return self.run_compute_first() + else: + return self.run_communicate_first() + + def comm_fcn(self, i, input_): + if self.comm_type == CommunicationType.ALL_GATHER: + output_ = self.comm_output[i * self.comm_slice: (i + 1) * self.comm_slice] + work = torch.distributed._all_gather_base(output_, input_.contiguous(), group=self.group, async_op=True) + elif self.comm_type == CommunicationType.REDUCE_SCATTER: + output_ = self.comm_output[i * self.comm_slice: (i + 1) * self.comm_slice] + work = torch.distributed._reduce_scatter_base(output_, input_.contiguous(), group=self.group, async_op=True) + elif self.comm_type == CommunicationType.ALL_REDUCE: + # all_reduce interface currently only supports overwriting the same address of input + output_ = input_ + work = torch.distributed.all_reduce(output_, group=self.group, async_op=True) + else: + raise ValueError("Invalid comm_type.") + return work, output_ + + def get_input_slice(self, i): + return self.input_data[i * self.input_slice: (i + 1) * self.input_slice] + + def run_compute_first(self): + compute_outputs = [] + for i in range(self.split_num): + input_slice = self.get_input_slice(i) + if self.comm_type == CommunicationType.ALL_REDUCE: + compute_output = self.output[i * self.comm_slice: (i + 1) * self.comm_slice] + self.compute_fcn(input_tensor=input_slice, output_tensor=compute_output) + else: + compute_output = self.compute_fcn(input_slice) + compute_outputs.append(compute_output) + work, _ = self.comm_fcn(i, compute_output) + self.works.append(work) + + if self.synchronize: + return self.run_synchronize() + else: + return self.output, self.works + + def get_output_slice(self, i): + return self.output[i * self.output_slice: (i + 1) * self.output_slice] + + def run_communicate_first(self): + check_equal(self.synchronize, True, error_info="In COCParallel, must synchronize before return if communicate \ + first") + pre_work = None + pre_output = None + outputs = [] + + for i in range(self.split_num): + input_slice = self.get_input_slice(i) + if self.comm_type == CommunicationType.ALL_REDUCE: + input_ = torch.empty_like(input_slice).copy_(input_slice) + else: + input_ = input_slice + work, output_i = self.comm_fcn(i, input_) + outputs.append(output_i) + + self.works.append(work) + + if pre_output is not None: + pre_work.wait() + self.compute_fcn(input_tensor=pre_output, output_tensor=self.get_output_slice(i - 1)) + + pre_work = work + pre_output = output_i + + pre_work.wait() + self.compute_fcn(input_tensor=pre_output, output_tensor=self.get_output_slice(self.split_num - 1)) + return self.output diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/matmul_soc_friendly.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/matmul_soc_friendly.py new file mode 100644 index 000000000..a8ca65c8e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/matmul_soc_friendly.py @@ -0,0 +1,136 @@ +from functools import reduce +import torch +from torch.nn import functional as F + +from .coc_utils import check_equal +from .min_comm_cfg import min_comm_config + + +def extract_info_from_mm_tensors(left, right): + m = reduce(lambda x, y: x * y, left.shape[:-1]) + k = left.shape[-1] + check_equal(right.shape[0], k, error_info="For matmul_soc_friendly in CoC, the two input tensors left and right \ + should be of shape [.., k] and [k, n] respectively") + n = reduce(lambda x, y: x * y, right.shape[1:]) + return m, k, n + + +def is_transposed(input_): + if input_.dim() < 2 or input_.dim() > 3: + raise RuntimeError("input tensor of is_tensor_transposed should be either 2- or 3-dimensional") + dim1 = input_.dim() - 1 + dim2 = input_.dim() - 2 + if input_.stride()[dim2] == 1 and input_.stride()[dim1] == reduce(lambda x, y: x * y, input_.shape[:-1]): + return True + else: + return False + + +def ceil_div(a, b): + if b == 0: + raise ZeroDivisionError + return (a + b - 1) // b + + +def ceil_coc(a, b): + if b == 0: + raise ZeroDivisionError + return ((a + b - 1) // b) * b + + +# 512B aligned shape is soc friendly +kPackage512 = 512 +kPackage32 = 32 + + +def compute_pad_num(single_dim_size, element_size, kPackage=kPackage512): + least_size = ceil_coc(single_dim_size, ceil_div(kPackage, element_size)) + pad_num = least_size - single_dim_size + return pad_num + + +# pad_dim could be in the form of 3 / 2 / 1 or -1 / -2 / -3 +def pad_tensor(input_, pad_num, pad_dim): + dim_size = input_.dim() + pad_list = [0] * (dim_size * 2) + pad_list[pad_dim * (-2) - 1] += pad_num + input_ = F.pad(input_, tuple(pad_list), mode='constant', value=0) if pad_num > 0 else input_ + return input_ + + +def process_with_k_aligned(left, right, mn_aligned, is_left_transposed, is_right_transposed): + if is_left_transposed: + left = left.contiguous() + if not mn_aligned and not is_right_transposed: + main_grad = right.main_grad + right = right.t().contiguous().t() + right.main_grad = main_grad + return left, right + + +def process_left_with_padding_k(left, is_left_transposed, k_pad_num): + if is_left_transposed: + left = pad_tensor(left.permute(2, 0, 1), k_pad_num, 0) + left = left.permute(1, 2, 0).contiguous() + else: + left = pad_tensor(left, k_pad_num, 2) + return left + + +def process_right_with_padding_k(right, is_right_transposed, k_pad_num): + if is_right_transposed: + right = pad_tensor(right.t(), k_pad_num, 1) + right = right.t() + else: + right = pad_tensor(right, k_pad_num, 0) + return right + + +def process_with_padding_k(left, right, is_left_transposed, is_right_transposed, k_pad_num): + left = process_left_with_padding_k(left, is_left_transposed, k_pad_num) + right = process_right_with_padding_k(right, is_right_transposed, k_pad_num) + return left, right + + +def get_aligned_mm_inputs(left, right, sp_coef=1, parallel_num=min_comm_config.parallel_num): + """Get properly aligned tensors for matmul, according to soc friendly properties. + + Inputs + left: the left tensor of matmul, in the shape of [m,k]. + right: the right tensor of matmul, in the shape of [k,n]. + sp_coef: the coefficient for compensating m due to any expected collective communications before the matmul. + parallel_num: the number of parts to divide the left tensor in, by row. + + Outputs: + left: the properly processed left tensor for matmul, in the shape of [m,k]. + right: the properly processed right tensor for matmul, in the shape of [k,n]. + + """ + + # The dtype of left and right tensors for matmul should be the same + check_equal(left.element_size(), right.element_size(), error_info="In matmul_soc_friendly of CoC, the dtype of \ + left and right tensors for matmul should be the same") + element_size = left.element_size() + + m, k, n = extract_info_from_mm_tensors(left, right) + + # check if the shape of left or right matches its memory alignment + is_left_transposed = is_transposed(left) + is_right_transposed = is_transposed(right) + + # After communication (if applicable) and dividing left tensor, check if m-dim and n-dim are both 512B aligned + is_mn_aligned_512b = ((m * sp_coef // parallel_num) * element_size) % kPackage512 == 0 and ( + n * element_size) % kPackage512 == 0 + # Check if k-dim is 512B aligned + is_k_aligned_512b = (k * element_size) % kPackage512 == 0 + # Check if k-dim is 32B aligned + is_k_aligned_32b = (k * element_size) % kPackage32 == 0 + # Compute the required amount of padding for k-dim, if already aligned then gives 0 + k_pad_num = compute_pad_num(k, element_size, kPackage=kPackage512) + + if is_k_aligned_512b: + return process_with_k_aligned(left, right, is_mn_aligned_512b, is_left_transposed, is_right_transposed) + elif is_mn_aligned_512b and not is_k_aligned_32b and min_comm_config.k_min <= k <= min_comm_config.k_max: + return process_with_padding_k(left, right, is_left_transposed, is_right_transposed, k_pad_num) + + return left, right diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/min_comm_cfg.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/min_comm_cfg.py new file mode 100644 index 000000000..6a042fd7c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/min_comm_cfg.py @@ -0,0 +1,224 @@ +import ast +import os +from enum import Enum +import torch +import torch_npu +import torch.nn.functional as F +from megatron.training import get_args + + +def column_forward(self, input_, weight, column_parallel_function=None, check_fcn=None): + if check_fcn is not None: + check_fcn() + bias = self.bias if not self.skip_bias_add else None + input_parallel = input_ + use_weight = self.weight if weight is None else weight + if hasattr(self, "norm") and self.norm: + use_weight = F.normalize(self.weight) + output_parallel = column_parallel_function.apply( + input_parallel, + use_weight, + bias + ) + output = output_parallel + output_bias = self.bias if self.skip_bias_add else None + return output, output_bias + + +def row_forward(self, input_, row_parallel_function=None, check_fcn=None): + if check_fcn is not None: + check_fcn() + input_parallel = input_ + output_parallel = row_parallel_function.apply( + input_parallel, + self.weight, + None + ) + output = output_parallel + if not self.skip_bias_add: + output = output + self.bias if self.bias is not None else output + output_bias = None + else: + output_bias = self.bias + return output, output_bias + + +class ModuleType(Enum): + ORIGINAL_ALL_REDUCE = 0 + ORIGINAL_SEQ_PARALLEL = 1 + REWRITE_ALL_REDUCE = 2 + REWRITE_SEQ_PARALLEL = 3 + COC_FOR_ALL_REDUCE = 4 + COC_FOR_SEQ_PARALLEL = 5 + + +class MinCommConfig: + def __init__(self): + # basic settings acquired from environmental variables + # default module_type is ModuleType.ORIGINAL_SEQ_PARALLEL + global_args = get_args() + + self.module_type: ModuleType = ModuleType.ORIGINAL_SEQ_PARALLEL + self.coc_mode = global_args.coc_mode + self.parallel_num = global_args.coc_parallel_num + self.coc_fused_kernel = global_args.coc_fused_kernel + + # configurations registered from framework + self.ColumnParallelLinear = None + self.RowParallelLinear = None + self.column_parallel_forward = None + self.row_parallel_forward = None + self.tp_group_fcn = None + self.tp_world_size_fcn = None + self.tp_rank_fcn = None + self.all_reduce = None + self.reduce_scatter_along_first_dim = None + self.gather_along_first_dim = None + self.prefix = None + self.check_fcn = None + self.tp_enabled = True + self.sequence_parallel_enabled = True + + # configurations manually set by users in user_config.py + self.k_min = 1024 + self.k_max = 4096 + self.all_gather_recomputation_enabled = False + self.print_tensor_value_enabled = False + self.matmul_soc_friendly_enabled = True + self.customized_coc_dict = {} + self.enable_coc_in_column_backward = True + + def print_settings(self): + if self.coc_fused_kernel: + enable_coc_in_column_backward = True if self.enable_coc_in_column_backward else False + else: + enable_coc_in_column_backward = False + if self.coc_fused_kernel: + settings_dict = { + "is coc turned on": True, + "use script or use fused kernel": "fused kernel", + "is sequence parallel enabled": self.sequence_parallel_enabled, + "is coc enabled in column backward": enable_coc_in_column_backward + } + elif "ORIGINAL" in self.module_type.name: + settings_dict = { + "is coc turned on": False + } + else: + settings_dict = { + "is coc turned on": True, + "use script or use fused kernel": "script", + "coc mode": self.coc_mode, + "parallel num": self.parallel_num, + "module type": self.module_type.name, + "is sequence parallel enabled": self.sequence_parallel_enabled, + "if get aligned mm inputs": self.matmul_soc_friendly_enabled + } + if torch.npu.current_device() == 0: + print("\n-----------------------------COC Settings: ------------------------------------") + for key, value in settings_dict.items(): + print(f"{key}: {value}") + print("-------------------------------------------------------------------------------\n") + + @property + def tp_rank(self): + return self.tp_rank_fcn() + + @property + def tp_group(self): + return self.tp_group_fcn() + + @property + def tp_world_size(self): + return self.tp_world_size_fcn() + + def register_tp_get_functions(self, tp_group_fcn, tp_world_size_fcn, tp_rank_fcn): + self.tp_group_fcn = tp_group_fcn + self.tp_world_size_fcn = tp_world_size_fcn + self.tp_rank_fcn = tp_rank_fcn + + def register_class(self, column_parallel_linear, row_parallel_linear): + self.ColumnParallelLinear = column_parallel_linear + self.RowParallelLinear = row_parallel_linear + + def register_mappings(self, _all_reduce, _reduce_scatter_along_first_dim, _gather_along_first_dim): + self.all_reduce = _all_reduce + self.reduce_scatter_along_first_dim = _reduce_scatter_along_first_dim + self.gather_along_first_dim = _gather_along_first_dim + + def replace_forward_functions_by_autograd_class(self, column_autograd_class, row_autograd_class): + def column_parallel_forward(x, input_, weight=None): + return column_forward(x, input_, weight, column_parallel_function=column_autograd_class, + check_fcn=self.check_fcn) + + def row_parallel_forward(x, y): + return row_forward(x, y, row_parallel_function=row_autograd_class, check_fcn=self.check_fcn) + + self.column_parallel_forward = column_parallel_forward + self.row_parallel_forward = row_parallel_forward + self.ColumnParallelLinear.forward = self.column_parallel_forward + self.RowParallelLinear.forward = self.row_parallel_forward + + def register_sequence_parallel_switch(self, sequence_parallel_enabled): + self.sequence_parallel_enabled = sequence_parallel_enabled + + def register_check_fcn(self, check_fcn): + self.check_fcn = check_fcn + + def register_customized_coc(self, customized_coc): + if len(customized_coc) == 0: + return + for coc_shape_yaml_str in customized_coc.keys(): + key_list = ast.literal_eval(coc_shape_yaml_str) + coc_shape_key_str = str(key_list) + self.customized_coc_dict.update({coc_shape_key_str: customized_coc[coc_shape_yaml_str]}) + print("self.customized_coc_dict: ", self.customized_coc_dict) + + def register_matmul_soc_friendly_setting(self, matmul_soc_friendly, k_min, k_max): + self.matmul_soc_friendly_enabled = matmul_soc_friendly + self.k_min = k_min + self.k_max = k_max + + def register_all_gather_recomputation_switch(self, all_gather_recomputation_enabled): + self.all_gather_recomputation_enabled = all_gather_recomputation_enabled + + def register_print_tensor_value_switch(self, print_tensor_value_enabled): + self.print_tensor_value_enabled = print_tensor_value_enabled + + def register_column_backward_coc_switch(self, enable_coc_in_column_backward): + self.enable_coc_in_column_backward = enable_coc_in_column_backward + + def acquire_module_type(self, tp_size): + sequence_parallel_types = [ModuleType.ORIGINAL_SEQ_PARALLEL, + ModuleType.REWRITE_SEQ_PARALLEL, + ModuleType.COC_FOR_SEQ_PARALLEL] + all_reduce_types = [ModuleType.ORIGINAL_ALL_REDUCE, + ModuleType.REWRITE_ALL_REDUCE, + ModuleType.COC_FOR_ALL_REDUCE] + + if self.parallel_num not in [1, 2, 4, 8]: + raise RuntimeError("coc_parallel_num must be either 1, 2, 4 or 8. Current value not supported") + if self.coc_mode not in [-1, 0, 1, 2]: + raise RuntimeError("coc_mode must be either 0, 1, or 2. Current value not supported") + + if self.coc_mode == -1: + self.coc_mode = 0 if self.parallel_num == 1 else 2 + + if tp_size == 1: + self.coc_mode = 0 + self.parallel_num = 1 + + if self.sequence_parallel_enabled: + self.module_type = sequence_parallel_types[self.coc_mode] + else: + self.module_type = all_reduce_types[self.coc_mode] + + if "COC" in self.module_type.name: + self.prefix = f"module_{self.module_type.name}_parallel_num_{self.parallel_num}" + else: + self.prefix = f"module_{self.module_type.name}" + + self.print_settings() + + +min_comm_config = MinCommConfig() diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/rewrite_parallel_linears_all_reduce.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/rewrite_parallel_linears_all_reduce.py new file mode 100644 index 000000000..a31273cf4 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/rewrite_parallel_linears_all_reduce.py @@ -0,0 +1,59 @@ +import torch + +from .min_comm_cfg import min_comm_config +from .coc_utils import set_context, reshape_to_2D, is_grad_needed + + +class RewriteColumnAllReduceFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, weight, bias): + set_context(ctx, input_, weight, bias) + output_parallel = torch.matmul(input_, weight.t()) + if bias is not None: + output_parallel = output_parallel + bias + return output_parallel + + @staticmethod + def backward(ctx, grad_output): + input_, weight = ctx.saved_tensors + is_grad_weight_needed, is_grad_bias_needed = is_grad_needed(ctx.needs_input_grad) + + grad_input = grad_output.matmul(weight) + handle = torch.distributed.all_reduce(grad_input, group=min_comm_config.tp_group, async_op=True) + grad_weight, grad_bias = None, None + + if is_grad_weight_needed: + grad_output = reshape_to_2D(grad_output) + grad_weight = grad_output.t().matmul(reshape_to_2D(input_)) + handle.wait() + grad_bias = grad_output.sum(dim=0) if is_grad_bias_needed and ctx.use_bias else None + else: + handle.wait() + + return grad_input, grad_weight, grad_bias + + +class RewriteRowAllReduceFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, weight, bias): + set_context(ctx, input_, weight, bias) + output_ = torch.matmul(input_, weight.t()) + output_parallel = min_comm_config.all_reduce(output_) + if bias is not None: + output_parallel = output_parallel + bias + return output_parallel + + @staticmethod + def backward(ctx, grad_output): + total_input, weight = ctx.saved_tensors + is_grad_weight_needed, is_grad_bias_needed = is_grad_needed(ctx.needs_input_grad) + + grad_input = grad_output.matmul(weight) + grad_weight, grad_bias = None, None + + if is_grad_weight_needed: + grad_output = reshape_to_2D(grad_output) + grad_weight = grad_output.t().matmul(reshape_to_2D(total_input)) + grad_bias = grad_output.sum(dim=0) if is_grad_bias_needed and ctx.use_bias else None + + return grad_input, grad_weight, grad_bias diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/rewrite_parallel_linears_sequence_parallel.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/rewrite_parallel_linears_sequence_parallel.py new file mode 100644 index 000000000..8685be775 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/rewrite_parallel_linears_sequence_parallel.py @@ -0,0 +1,82 @@ +import torch + +from .min_comm_cfg import min_comm_config +from .coc_utils import set_context, async_gather_along_first_dim, reshape_to_2D, is_grad_needed + + +class RewriteColumnSeqParallelFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, weight, bias): + set_context(ctx, input_, weight, bias) + dim_size = list(input_.size()) + dim_size[0] = dim_size[0] * min_comm_config.tp_world_size + + all_gather_buffer = torch.empty(dim_size, dtype=input_.dtype, device=torch.cuda.current_device()) + torch.distributed._all_gather_base(all_gather_buffer, input_, group=min_comm_config.tp_group) + total_input = all_gather_buffer + + output_parallel = torch.matmul(total_input, weight.t()) + if bias is not None: + output_parallel = output_parallel + bias + return output_parallel + + @staticmethod + def backward(ctx, grad_output): + input_, weight = ctx.saved_tensors + is_grad_weight_needed, is_grad_bias_needed = is_grad_needed(ctx.needs_input_grad) + tp_group = min_comm_config.tp_group + if is_grad_weight_needed: + handle_all_gather, total_input = async_gather_along_first_dim(input_, tp_group, + min_comm_config.tp_world_size) + grad_input = grad_output.matmul(weight) + handle_all_gather.wait() + dim_size = list(input_.size()) + sub_grad_input = torch.empty(dim_size, dtype=input_.dtype, device=torch.cuda.current_device(), + requires_grad=False) + # reduce_scatter + handle_reduce_scatter = torch.distributed._reduce_scatter_base(sub_grad_input, grad_input, group=tp_group, + async_op=True) + grad_output = reshape_to_2D(grad_output) + grad_weight = grad_output.t().matmul(reshape_to_2D(total_input)) + handle_reduce_scatter.wait() + grad_bias = grad_output.sum(dim=0) if is_grad_bias_needed and ctx.use_bias else None + else: + grad_input = grad_output.matmul(weight) + dim_size = list(input_.size()) + sub_grad_input = torch.empty(dim_size, dtype=input_.dtype, device=torch.cuda.current_device(), + requires_grad=False) + # reduce_scatter + handle_reduce_scatter = torch.distributed._reduce_scatter_base(sub_grad_input, grad_input, group=tp_group, + async_op=True) + handle_reduce_scatter.wait() + grad_weight, grad_bias = None, None + return sub_grad_input, grad_weight, grad_bias + + +class RewriteRowSeqParallelFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, weight, bias): + set_context(ctx, input_, weight, bias) + # ctx.world_size is needed for the case: rewrite forward (manually skipped) with coc backward + ctx.world_size = min_comm_config.tp_world_size + output_ = torch.matmul(input_, weight.t()) + output_parallel = min_comm_config.reduce_scatter_along_first_dim(output_) + if bias is not None: + output_parallel = output_parallel + bias + return output_parallel + + @staticmethod + def backward(ctx, grad_output): + total_input, weight = ctx.saved_tensors + grad_output = min_comm_config.gather_along_first_dim(grad_output) + is_grad_weight_needed, is_grad_bias_needed = is_grad_needed(ctx.needs_input_grad) + + grad_input = grad_output.matmul(weight) + grad_weight, grad_bias = None, None + + if is_grad_weight_needed: + grad_output = reshape_to_2D(grad_output) + grad_weight = grad_output.t().matmul(reshape_to_2D(total_input)) + grad_bias = grad_output.sum(dim=0) if is_grad_bias_needed and ctx.use_bias else None + + return grad_input, grad_weight, grad_bias diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/user_config.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/user_config.py new file mode 100644 index 000000000..d613e90e4 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/lcal_coc/user_config.py @@ -0,0 +1,110 @@ +import torch +import torch_npu + +from .min_comm_cfg import min_comm_config, ModuleType +from .coc_parallel_linears_all_reduce_fused import FusedCOCRowAllReduceFunction +from .coc_parallel_linears_all_reduce import COCColumnAllReduceFunction, COCRowAllReduceFunction +from .coc_parallel_linears_sequence_parallel import COCColumnSeqParallelFunction, COCRowSeqParallelFunction +from .rewrite_parallel_linears_all_reduce import RewriteColumnAllReduceFunction, RewriteRowAllReduceFunction +from .rewrite_parallel_linears_sequence_parallel import RewriteColumnSeqParallelFunction, RewriteRowSeqParallelFunction +from .coc_parallel_linears_sequence_parallel_fused import FusedCOCColumnSeqParallelFunction, FusedCOCRowSeqParallelFunction + + +coc_cfgs = { + 'recompute_all_gather': True, + 'matmul_soc_friendly': True, + 'print_tensor_value_open': False, + 'customized_coc': {}, + 'enable_coc_in_column_backward': False, + 'k_min': 1024, + 'k_max': 4096, +} + + +def check_config_valid(): + if min_comm_config.sequence_parallel_enabled: + if min_comm_config.module_type not in [ModuleType.ORIGINAL_SEQ_PARALLEL, + ModuleType.REWRITE_SEQ_PARALLEL, + ModuleType.COC_FOR_SEQ_PARALLEL]: + raise ValueError("In CoC, the config of sequence parallel is not valid") + else: + if min_comm_config.module_type not in [ModuleType.ORIGINAL_ALL_REDUCE, + ModuleType.REWRITE_ALL_REDUCE, + ModuleType.COC_FOR_ALL_REDUCE]: + raise ValueError("In CoC, the config of sequence parallel is not valid") + + +def get_value_from_cfg(attr_name): + if attr_name not in coc_cfgs.keys(): + raise RuntimeError("Lack attr_name: ", attr_name) + return coc_cfgs[attr_name] + + +def print_on_device0(msg): + if torch.npu.current_device() == 0: + print(msg) + + +def initialize_coc_from_cfg(cfg): + from megatron.core.tensor_parallel import ColumnParallelLinear, RowParallelLinear + from megatron.core.parallel_state import ( + get_tensor_model_parallel_group, + get_tensor_model_parallel_world_size, + get_tensor_model_parallel_rank + ) + from megatron.core.tensor_parallel.mappings import ( + _reduce, + _reduce_scatter_along_first_dim, + _gather_along_first_dim + ) + min_comm_config.register_tp_get_functions(get_tensor_model_parallel_group, + get_tensor_model_parallel_world_size, + get_tensor_model_parallel_rank) + min_comm_config.register_class(ColumnParallelLinear, + RowParallelLinear) + min_comm_config.register_mappings(_reduce, + _reduce_scatter_along_first_dim, + _gather_along_first_dim) + min_comm_config.register_sequence_parallel_switch(cfg.sequence_parallel) + + min_comm_config.register_customized_coc(get_value_from_cfg('customized_coc')) + min_comm_config.register_matmul_soc_friendly_setting(get_value_from_cfg('matmul_soc_friendly'), + int(get_value_from_cfg('k_min')), + int(get_value_from_cfg('k_max'))) + min_comm_config.register_all_gather_recomputation_switch(get_value_from_cfg('recompute_all_gather')) + min_comm_config.register_print_tensor_value_switch(get_value_from_cfg('print_tensor_value_open')) + min_comm_config.register_column_backward_coc_switch(get_value_from_cfg('enable_coc_in_column_backward')) + min_comm_config.register_check_fcn(check_config_valid) + min_comm_config.acquire_module_type(cfg.tensor_model_parallel_size) + + map_type2autograd_class = { + ModuleType.REWRITE_SEQ_PARALLEL: [RewriteColumnSeqParallelFunction, + RewriteRowSeqParallelFunction], + ModuleType.REWRITE_ALL_REDUCE: [RewriteColumnAllReduceFunction, + RewriteRowAllReduceFunction], + ModuleType.COC_FOR_SEQ_PARALLEL: [COCColumnSeqParallelFunction, + COCRowSeqParallelFunction], + ModuleType.COC_FOR_ALL_REDUCE: [COCColumnAllReduceFunction, + COCRowAllReduceFunction] + } + + if min_comm_config.coc_fused_kernel: + print_on_device0("COC REPLACE WITH COC FUSED KERNEL SCRIPT!") + if min_comm_config.sequence_parallel_enabled: + min_comm_config.replace_forward_functions_by_autograd_class(FusedCOCColumnSeqParallelFunction, + FusedCOCRowSeqParallelFunction) + else: + min_comm_config.replace_forward_functions_by_autograd_class(COCColumnAllReduceFunction, + FusedCOCRowAllReduceFunction) + elif "ORIGINAL" not in min_comm_config.module_type.name: + if "REWRITE" in min_comm_config.module_type.name: + print_on_device0("COC REPLACE WITH REWRITE SCRIPT!") + else: + print_on_device0("COC REPLACE WITH COC SCRIPT!") + parallel_linear_autograd_class = map_type2autograd_class.get(min_comm_config.module_type) + if parallel_linear_autograd_class is None: + raise RuntimeError("Module type is not matched.") + min_comm_config.replace_forward_functions_by_autograd_class(parallel_linear_autograd_class[0], + parallel_linear_autograd_class[1]) + else: + print_on_device0("COC REPLACE NONE!") diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/mapping.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/mapping.py new file mode 100644 index 000000000..0eef0d9be --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/mapping.py @@ -0,0 +1,36 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from megatron.core.tensor_parallel.mappings import _reduce + + +class _ReduceFromModelParallelRegion_Nd(torch.autograd.Function): + @staticmethod + def symbolic(graph, input_): + return _reduce(input_) + + @staticmethod + def forward(ctx, input_): + return _reduce(input_) + + @staticmethod + def backward(ctx, grad_output): + return _reduce(grad_output) + + +def reduce_from_tensor_model_parallel_region_nd(input_): + return _ReduceFromModelParallelRegion_Nd.apply(input_) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/random.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/random.py new file mode 100644 index 000000000..efaa14829 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/random.py @@ -0,0 +1,300 @@ +import os +from functools import wraps +from typing import List, Union +import torch +from torch import _C +from torch_npu.npu import _lazy_call, device as device_ctx_manager +from torch.utils.checkpoint import _get_autocast_kwargs +from megatron.training import get_args +from megatron.core.tensor_parallel.utils import gather_split_1d_tensor +from megatron.core.tensor_parallel.random import get_cuda_rng_tracker +from megatron.core.utils import safely_set_viewless_tensor_data +from torch.utils.checkpoint import detach_variable +from megatron.core.parallel_state import ( + get_tensor_model_parallel_group, + get_tensor_model_parallel_world_size, + is_pipeline_last_stage, + get_virtual_pipeline_model_parallel_rank, +) +from mindspeed.core.tensor_parallel.checkpoint_manager import get_pipeline_checkpoint_manager + + +def _set_cuda_rng_state(new_state, device=-1): + if hasattr(_C, '_cuda_setRNGState') and callable(_C._cuda_setRNGState): + # older PyTorch + def cb(): + with device_ctx_manager(device): + _C._cuda_setRNGState(new_state) + + else: + # newer PyTorch + if device == -1: + device = torch.device('cuda') + elif isinstance(device, str): + device = torch.device(device) + elif isinstance(device, int): + device = torch.device('cuda', device) + + def cb(): + idx = device.index + if idx is None: + idx = torch.cuda.current_device() + default_generator = torch.npu.default_generators[idx] + default_generator.set_state(new_state) + + _lazy_call(cb) + + +def checkpoint_function_backward(ctx, *args): + global_args = get_args() + if not torch.autograd._is_checkpoint_valid(): + raise RuntimeError( + "Checkpointing is not compatible with .grad(), " + "please use .backward() if possible" + ) + inputs = ctx.saved_tensors + if ctx.distribute_saved_activations: + safely_set_viewless_tensor_data( + inputs[0], gather_split_1d_tensor(inputs[0].data).view(ctx.input_0_shape) + ) + + # Store the current states. + bwd_cpu_rng_state = torch.get_rng_state() + bwd_cuda_rng_state = torch.cuda.get_rng_state() + bwd_cuda_rng_state_tracker = get_cuda_rng_tracker().get_states() + + # Set the states to what it used to be before the forward pass. + torch.set_rng_state(ctx.fwd_cpu_rng_state) + _set_cuda_rng_state(ctx.fwd_cuda_rng_state) + get_cuda_rng_tracker().set_states(ctx.fwd_cuda_rng_state_tracker) + + # Compute the forward pass. + flops_counter = None + if global_args.op_cal_tflops: + from mindspeed.core.training import get_flops_counter + flops_counter = get_flops_counter() + flops_counter.pause() + + detached_inputs = detach_variable(inputs) + from mindspeed.auto_tuning.module.parse.recompute_parser import get_recompute_parser, call_hook_func + recompute_parser = get_recompute_parser() + + if ( + recompute_parser.skip_profiling_step <= recompute_parser.profiling_step <= recompute_parser.stop_profiling_step + and os.getenv('OOTB_OPTIMIZER_PROFILING', 'FALSE') == 'TRUE'): + call_hook_func() + with torch.enable_grad(): + outputs = ctx.run_function(*detached_inputs) + # remove hook + for hook_handle in recompute_parser.modules_hooks: + hook_handle.remove() + recompute_parser.modules_hooks.clear() + + if global_args.op_cal_tflops: + flops_counter.resume() + + # Set the states back to what it was at the start of this function. + torch.set_rng_state(bwd_cpu_rng_state) + _set_cuda_rng_state(bwd_cuda_rng_state) + get_cuda_rng_tracker().set_states(bwd_cuda_rng_state_tracker) + + if isinstance(outputs, torch.Tensor): + outputs = (outputs,) + + # filter out non tensor outputs for backward pass + outputs, args = zip(*filter(lambda x: torch.is_tensor(x[0]) and x[0].grad_fn is not None, zip(outputs, args))) + torch.autograd.backward(outputs, args) + grads = tuple(inp.grad if isinstance(inp, torch.Tensor) else inp for inp in detached_inputs) + return (None, None) + grads + + +class CheckpointFunctionWithoutOutput(torch.autograd.Function): + @staticmethod + def forward(ctx, run_function, checkpoint, *args): + with torch.no_grad(): + outputs = run_function(*args) + + # Store everything + ctx.save_for_backward(*detach_variable(args)) + checkpoint.ctx = ctx + + return outputs + + @staticmethod + def backward(ctx, *args): + inputs = ctx.saved_tensors + outputs = ctx.outputs + torch.autograd.backward(outputs, args) + ctx.outputs = None + grads = tuple(inp.grad if isinstance(inp, torch.Tensor) else inp for inp in inputs) + return (None, None) + grads + + +class CheckpointWithoutOutput: + def __init__(self): + self.run_function = None + self.fwd_cpu_rng_state = None + self.fwd_cuda_rng_state = None + self.fwd_cuda_rng_state_tracker = None + self.outputs = None + + def checkpoint(self, run_function, distribute_saved_activations, *args): + self.run_function = run_function + + if distribute_saved_activations: + raise RuntimeError( + "CheckpointFunctionWithoutOutput does not support " + "distribute_saved_activations" + ) + + #Copy the rng states. + self.fwd_cpu_rng_state = torch.get_rng_state() + self.fwd_cuda_rng_state = torch.cuda.get_rng_state() + self.fwd_cuda_rng_state_tracker = get_cuda_rng_tracker().get_states() + + outputs = CheckpointFunctionWithoutOutput.apply(run_function, self, *args) + self.outputs = outputs + if isinstance(self.outputs, torch.Tensor): + self.outputs = (self.outputs,) + + return outputs + + def discard_output(self): + for output in self.outputs: + output.untyped_storage().resize_(0) + + def recompute(self, _): + if not torch.autograd._is_checkpoint_valid(): + raise RuntimeError( + "Checkpointing is not compatible with .grad(), " + "please use .backward() if possible" + ) + + # Store the current states. + cur_cpu_rng_state = torch.get_rng_state() + cur_cuda_rng_state = torch.cuda.get_rng_state() + cur_cuda_rng_state_tracker = get_cuda_rng_tracker().get_states() + + # Set the states to what it used to be before the forward pass. + torch.set_rng_state(self.fwd_cpu_rng_state) + _set_cuda_rng_state(self.fwd_cuda_rng_state) + get_cuda_rng_tracker().set_states(self.fwd_cuda_rng_state_tracker) + + with torch.enable_grad(): + outputs = self.run_function(*self.ctx.saved_tensors) + self.run_function = None + self.fwd_cpu_rng_state = None + self.fwd_cuda_rng_state = None + self.fwd_cuda_rng_state_tracker = None + + # Set the states back to what it was at the start of this function. + torch.set_rng_state(cur_cpu_rng_state) + _set_cuda_rng_state(cur_cuda_rng_state) + get_cuda_rng_tracker().set_states(cur_cuda_rng_state_tracker) + + if isinstance(outputs, torch.Tensor): + outputs = (outputs,) + + for output, recomputation_output in zip(self.outputs, outputs): + output_size = recomputation_output.untyped_storage().size() + output.untyped_storage().resize_(output_size) + with torch.no_grad(): + output.untyped_storage().copy_(recomputation_output.untyped_storage()) + + self.ctx.outputs = outputs + self.outputs = None + self.ctx = None + + + +class RngStateContext: + def __init__(self, cpu_rng_state, cuda_rng_state, cuda_rng_state_tracker): + self.fwd_cpu_rng_state = cpu_rng_state + self.fwd_cuda_rng_state = cuda_rng_state + self.fwd_cuda_rng_state_tracker = cuda_rng_state_tracker + + +class CheckpointFunctionRipipe(torch.autograd.Function): + @staticmethod + def forward(ctx, run_function, distribute_saved_activations, *args): + fwd_rng_state = RngStateContext(torch.get_rng_state(), torch.cuda.get_rng_state(), get_cuda_rng_tracker().get_states()) + with torch.no_grad(): + outputs = run_function(*args) + + # Store everything. + ctx.detached_inputs = detach_variable(args) + + def recompute(): + # Store the current states. + bwd_cpu_rng_state = torch.get_rng_state() + bwd_cuda_rng_state = torch.cuda.get_rng_state() + bwd_cuda_rng_state_tracker = get_cuda_rng_tracker().get_states() + + # Set the states to what it used to be before the forward pass. + torch.set_rng_state(fwd_rng_state.fwd_cpu_rng_state) + _set_cuda_rng_state(fwd_rng_state.fwd_cuda_rng_state) + get_cuda_rng_tracker().set_states(fwd_rng_state.fwd_cuda_rng_state_tracker) + + with torch.enable_grad(): + outputs = run_function(*ctx.detached_inputs) + ctx.outputs = outputs + + # Set the states back to what it was at the start of this function. + torch.set_rng_state(bwd_cpu_rng_state) + _set_cuda_rng_state(bwd_cuda_rng_state) + get_cuda_rng_tracker().set_states(bwd_cuda_rng_state_tracker) + if get_pipeline_checkpoint_manager().do_pre_recompute: + get_pipeline_checkpoint_manager().add_recompute(recompute) + ctx.recompute_func = recompute + + return outputs + + @staticmethod + def backward(ctx, *args): + if not torch.autograd._is_checkpoint_valid(): + raise RuntimeError( + "Checkpointing is not compatible with .grad(), " + "please use .backward() if possible" + ) + if not hasattr(ctx, 'outputs'): + if get_pipeline_checkpoint_manager().do_pre_recompute: + global_args = get_args() + vpp_rank = get_virtual_pipeline_model_parallel_rank() + # For last vpp chunk of last pp stage, we don't advance its recomputation. + if global_args.recompute_in_advance and is_pipeline_last_stage(): + get_pipeline_checkpoint_manager().recompute_next(vpp_rank) + if not hasattr(ctx, 'outputs'): + raise RuntimeError(f"rank-{torch.distributed.get_rank()}: recompute is not done") + else: + ctx.recompute_func() + + outputs = ctx.outputs + detached_inputs = ctx.detached_inputs + ctx.outputs = None + ctx.detached_inputs = None + ctx.recompute_func = None + + if isinstance(outputs, torch.Tensor): + outputs = (outputs,) + + # filter out non tensor outputs for backward pass + outputs, args = zip(*filter(lambda x: torch.is_tensor(x[0]), zip(outputs, args))) + torch.autograd.backward(outputs, args) + grads = tuple(inp.grad if isinstance(inp, torch.Tensor) else inp for inp in detached_inputs) + return (None, None) + grads + + +def checkpoint_wrapper(checkpoint): + @wraps(checkpoint) + def wrapper(function, distribute_saved_activations, *args): + if not get_pipeline_checkpoint_manager().open_ri_pipe: + return checkpoint(function, distribute_saved_activations, *args) + if not get_pipeline_checkpoint_manager().chunk_do_recompute: + return function(*args) + + if distribute_saved_activations: + raise RuntimeError("no distributed") + + return CheckpointFunctionRipipe.apply(function, distribute_saved_activations, *args) + + return wrapper \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/layernorm_2d.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/layernorm_2d.py new file mode 100644 index 000000000..4c2d1ecf1 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/layernorm_2d.py @@ -0,0 +1,179 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from typing import Any +from typing import Tuple + +import torch +import torch.distributed as dist +from torch import Tensor +from torch.cuda.amp import custom_bwd +from torch.cuda.amp import custom_fwd +from torch.nn import Parameter + +from megatron.core.utils import divide +from mindspeed.core.tensor_parallel.comm_group_api import CollectiveCommIntf +from mindspeed.core.tensor_parallel.comm_group_api import TPYCollectiveComm + + +class LayerNorm2D(torch.nn.Module): + """LayerNorm2D layer with row and column parallelism. + + Arguments: + hidden_size (int): input normalized size from an expected input of size + eps: a value added to the denominator for numerical stability. Default: 1e-5 + bias: (bool, optional): Whether to add a bias, defaults to ``True``. + dtype: (:class:`torch.dtype`, optional): The dtype of parameters, defaults to None. + last_dim_split_comm_intf: Reduce scatter comm intf. + """ + + def __init__( + self, + hidden_size: int, + eps: float = 1e-5, + bias: bool = True, + dtype=None, + last_dim_split_comm_intf: CollectiveCommIntf = TPYCollectiveComm(), + ) -> None: + super(LayerNorm2D, self).__init__() + # layer norm config + self.hidden_size = hidden_size + self.epsilon = eps + + # parallel setting + self.last_dim_split_comm_intf = last_dim_split_comm_intf + self.rs_comm_world_sz = self.last_dim_split_comm_intf.get_comm_group_world_size() + # partitioning dimension + self.partitioned_dim = divide(hidden_size, self.rs_comm_world_sz) + # create parameters + factory_kwargs = {"device": torch.cuda.current_device(), "dtype": dtype} + + # [H/(xy)] + self.weight = Parameter(torch.ones(self.partitioned_dim, **factory_kwargs)) + if bias: + # [H/(xy)] + self.bias = Parameter(torch.zeros(self.partitioned_dim, **factory_kwargs)) + else: + self.bias = None + + # set sequence parallelism flag on weight and bias parameters + setattr(self.weight, "2d_tp", True) + setattr(self.bias, "2d_tp", True) + + def forward(self, x: Tensor) -> Tensor: + return _ParallelLayerNorm2D.apply( + x, + self.weight, + self.bias, + self.epsilon, + self.hidden_size, + self.last_dim_split_comm_intf, + ) + + +class _ParallelLayerNorm2D(torch.autograd.Function): + @staticmethod + @custom_fwd + def forward( + ctx: Any, + input_: Tensor, + weight, + bias, + epsilon, + hidden_size: int, + last_dim_split_comm_intf: CollectiveCommIntf + ) -> Tensor: + """ + + :param ctx: + :param input_: [s/(cp*x), b, H/y] + :param weight: [H/(xy)] + :param bias: [H/(xy)] + :param epsilon: + :param hidden_size: H + :param last_dim_split_comm_intf: + :return: + """ + # [s/(cp*x), b, H/y]---> [s/(cp*x), b, 1] + e_x = torch.sum(input_, dim=-1, keepdim=True) + # [s/(cp*x), b, 1] + handle_ex = torch.distributed.all_reduce( + e_x, group=last_dim_split_comm_intf.get_comm_group(), async_op=True + ) + + # [s/(cp*x), b, H/y]---> [s/(cp*x), b, 1] + var_x = torch.sum(input_.float().pow(2), dim=-1, keepdim=True) + if handle_ex: + handle_ex.wait() + + handle_var = torch.distributed.all_reduce( + var_x, group=last_dim_split_comm_intf.get_comm_group(), async_op=True + ) + + input_.sub_(e_x.div_(hidden_size)) + e_x.mul_(e_x) + if handle_var: + handle_var.wait() + + var_x = torch.rsqrt(var_x.div_(hidden_size).sub_(e_x).add_(epsilon)) + + ctx.hidden_size = hidden_size + ctx.last_dim_split_comm_intf = last_dim_split_comm_intf + # [s/(cp*x), b, H/y] * [s/(cp*x), b, 1] --> [s/(cp*x), b, H/y] + norm_x = torch.mul(input_, var_x) + + if bias is not None: + # bias + weight * norm, [H/y] + [H/y] * [s/(cp*x), b, H/y] + output = torch.addcmul(bias, weight, norm_x) + else: + output = torch.mul(weight, norm_x) + + ctx.save_for_backward(norm_x, var_x, bias, weight) + return output + + @staticmethod + @custom_bwd + def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: + x, var_x, bias, weight = ctx.saved_tensors + # calculate grad_bias + if bias is None: + grad_bias = None + else: + grad_bias = output_grad.sum(dim=(0, 1)) + + # calculate grad_input + grad_norm_x = torch.mul(output_grad, weight) + output_grad_sum = torch.sum(grad_norm_x, dim=-1, keepdim=True) + handle_grad_sum = torch.distributed.all_reduce( + output_grad_sum, group=ctx.last_dim_split_comm_intf.get_comm_group(), async_op=True + ) + output_grad_mul_x_sum = torch.sum(grad_norm_x * x, dim=-1, keepdim=True) + + # calculate grad_weight + grad_weight = torch.mul(output_grad, x) + grad_weight = grad_weight.sum(dim=(0, 1)) + + if handle_grad_sum: + handle_grad_sum.wait() + + handle_grad_mul_x = torch.distributed.all_reduce( + output_grad_mul_x_sum, group=ctx.last_dim_split_comm_intf.get_comm_group(), async_op=True + ) + output_grad_sum.div_(ctx.hidden_size) + grad_input = grad_norm_x.sub(output_grad_sum) + if handle_grad_mul_x: + handle_grad_mul_x.wait() + + grad_input = (grad_input - x * (output_grad_mul_x_sum / ctx.hidden_size)) * var_x + return grad_input, grad_weight, grad_bias, None, None, None diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/linear_2d_moe_split_along_first_dim.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/linear_2d_moe_split_along_first_dim.py new file mode 100644 index 000000000..5ce89918f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/linear_2d_moe_split_along_first_dim.py @@ -0,0 +1,298 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. + +import torch +import torch_npu +from torch.cuda.amp import custom_bwd +from torch.cuda.amp import custom_fwd + +from mindspeed.core.tensor_parallel.comm_autograd_function import CollectiveCommIntf +from mindspeed.core.tensor_parallel.comm_group_api import OverlapCollectiveIntf, TPYCollectiveComm +from mindspeed.core.tensor_parallel.comm_utils import async_gather_tensors, sync_gather_along_last_dim, \ + _split_along_last_dim +from mindspeed.core.tensor_parallel.comm_utils import async_reduce_scatter_along_first_dim +from mindspeed.core.tensor_parallel.comm_utils import sync_gather_along_first_dim +from mindspeed.core.tensor_parallel.comm_utils import sync_reduce_scatter_along_first_dim + +G_FORWARD_PADDING_SIZE = 0 +G_BACKWARD_PADDING_SIZE = 0 + + +class MoELinear2DFC1(torch.autograd.Function): + """2D Linear out axe communication implementation.""" + + @staticmethod + @custom_fwd + def forward( + ctx, + activation_input, + weight, + bias, + ag_comm_intf: CollectiveCommIntf, + ag_overlap_comm_intf: OverlapCollectiveIntf, + rs_comm_intf: CollectiveCommIntf, + rs_overlap_comm_intf: OverlapCollectiveIntf, + enable_overlap_ag_with_matmul=False, + enable_overlap_matmul_with_rs=False, + gradient_accumulation_fusion=False, + enable_backward_overlap_ag_with_matmul=False, + partition_dim=0, + ): + """ + :param ctx: context to save some tensors or vars for backward use. + :param activation_input: with shape: [s/(x*cp), b, h/y] + :param weight: with shape: [h/y, E/x], E means the output size. + :param bias: bias parameter tensor. + :param ag_comm_intf: AllGather communication process group interface. + :param ag_overlap_comm_intf: AllGather communication overlap send and recv comm group + :param rs_comm_intf: ReduceScatter communication process group interface. + :param rs_overlap_comm_intf: ReduceScatter communication overlap send and recv comm group + :param enable_overlap_ag_with_matmul: enable overlap all-gather with matmul in forward + :param enable_overlap_matmul_with_rs: enable overlap matmul with reduce-scatter in forward + :param gradient_accumulation_fusion: enable gradient accumulation fusion + :param enable_backward_overlap_ag_with_matmul: enable overlap all-gather with matmul + :return: forward result tensor. + """ + ctx.weight = weight + ctx.use_bias = bias is not None + ctx.rs_comm_intf = rs_comm_intf + ctx.ag_comm_intf = ag_comm_intf + ctx.ag_overlap_comm_intf = ag_overlap_comm_intf + ctx.rs_overlap_comm_intf = rs_overlap_comm_intf + ctx.gradient_accumulation_fusion = gradient_accumulation_fusion + ctx.enable_backward_overlap_ag_with_matmul = enable_backward_overlap_ag_with_matmul + + activation_input = activation_input.contiguous() + # [n, h] -> [n, h/y] + activation_input = _split_along_last_dim(activation_input, TPYCollectiveComm) + ctx.save_for_backward(activation_input) + # [N, h/y] @ [h/y, E/x] -> [N, E/x] + matmul_res = torch.matmul(activation_input, weight.npu().t()) + matmul_res = matmul_res.contiguous() + n_tokens, h = matmul_res.shape + rs_size = rs_comm_intf.get_comm_group_world_size() + global G_FORWARD_PADDING_SIZE + remaining = n_tokens - n_tokens // rs_size * rs_size + G_FORWARD_PADDING_SIZE = rs_size - remaining if remaining else 0 + if G_FORWARD_PADDING_SIZE != 0: + padding_tensor = torch.zeros(G_FORWARD_PADDING_SIZE, h, dtype=matmul_res.dtype, + device=matmul_res.device) + matmul_res = torch.cat((matmul_res, padding_tensor), dim=0) + matmul_res = matmul_res.contiguous() + # [N1, E/x] -> [N1/y, E/x] + matmul_res = sync_reduce_scatter_along_first_dim(matmul_res, rs_comm_intf) + return matmul_res + + @staticmethod + @custom_bwd + def backward(ctx, grad_output): + # activation_input shape: [n, h] + # weight shape: [h/y, E/x] + activation_input, = ctx.saved_tensors + weight = ctx.weight + use_bias = ctx.use_bias + # [N1/y, E/x]---AG(y)---> [N1, E/x] + grad_output = grad_output.contiguous() + global G_BACKWARD_PADDING_SIZE + total_grad_output = sync_gather_along_first_dim(grad_output, ctx.rs_comm_intf) + if G_BACKWARD_PADDING_SIZE != 0: + real_input_num = total_grad_output.shape[0] - G_BACKWARD_PADDING_SIZE + # [N1, E/x] --> [N, E/x] + total_grad_output = total_grad_output[:real_input_num, :] + + # prepare total activation_input for computing grad weight. + # [N, h/y] + total_activation_input = activation_input.contiguous() + + # [N, E/x] @ [E/x, H/y]--> [N, H/y] (partial x) + partial_grad_input = total_grad_output.matmul(weight).contiguous() + grad_input = partial_grad_input + if ctx.gradient_accumulation_fusion: + import fused_weight_gradient_mlp_cuda + total_grad_output = total_grad_output.contiguous() + if weight.main_grad.dtype == torch.float32: + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp32( + total_activation_input, total_grad_output, weight.main_grad + ) + elif weight.main_grad.dtype in (torch.float16, torch.bfloat16): + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp16( + total_activation_input, total_grad_output, weight.main_grad + ) + else: + raise RuntimeError("Unsupported gradient type for gradient accumulation fusion") + + if hasattr(weight, 'grad_added_to_main_grad'): + # When overlap_grad_reduce is True, need to ensure that backward hooks + # are all run on the main backprop thread to prevent deadlocks. Setup + # dummy grad_weight tensor to prevent backward hooks from being run + # in a background thread. + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=activation_input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=activation_input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = None + else: + # [E/x, N] @ [N, h/y] ---> [E/x, h/y] + grad_weight = total_grad_output.t().matmul(total_activation_input) + grad_bias = total_grad_output.sum(dim=0) if use_bias else None + grad_input = sync_gather_along_last_dim(grad_input, ctx.rs_comm_intf) + return grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None, None, None + + +class MoELinear2DFC2(torch.autograd.Function): + """2D Linear out axe communication implementation.""" + + @staticmethod + @custom_fwd + def forward( + ctx, + activation_input, + weight, + bias, + ag_comm_intf: CollectiveCommIntf, + ag_overlap_comm_intf: OverlapCollectiveIntf, + rs_comm_intf: CollectiveCommIntf, + rs_overlap_comm_intf: OverlapCollectiveIntf, + enable_overlap_ag_with_matmul=False, + enable_overlap_matmul_with_rs=False, + gradient_accumulation_fusion=False, + enable_backward_overlap_ag_with_matmul=False, + partition_dim=0, + ): + """ + :param ctx: context to save some tensors or vars for backward use. + :param activation_input: with shape: [s/(x*cp), b, h/y] + :param weight: with shape: [h/y, E/x], E means the output size. + :param bias: bias parameter tensor. + :param ag_comm_intf: AllGather communication process group interface. + :param ag_overlap_comm_intf: AllGather communication overlap send and recv comm group + :param rs_comm_intf: ReduceScatter communication process group interface. + :param rs_overlap_comm_intf: ReduceScatter communication overlap send and recv comm group + :param enable_overlap_ag_with_matmul: enable overlap all-gather with matmul in forward + :param enable_overlap_matmul_with_rs: enable overlap matmul with reduce-scatter in forward + :param gradient_accumulation_fusion: enable gradient accumulation fusion + :param enable_backward_overlap_ag_with_matmul: enable overlap all-gather with matmul + :return: forward result tensor. + """ + ctx.save_for_backward(activation_input) + ctx.weight = weight + ctx.use_bias = bias is not None + ctx.rs_comm_intf = rs_comm_intf + ctx.ag_comm_intf = ag_comm_intf + ctx.ag_overlap_comm_intf = ag_overlap_comm_intf + ctx.rs_overlap_comm_intf = rs_overlap_comm_intf + ctx.gradient_accumulation_fusion = gradient_accumulation_fusion + ctx.enable_backward_overlap_ag_with_matmul = enable_backward_overlap_ag_with_matmul + activation_input = activation_input.contiguous() + # [N1/y, E/x] -> ag(y) -> [N1, E/x] + total_input = sync_gather_along_first_dim(activation_input, ag_comm_intf) + if G_FORWARD_PADDING_SIZE != 0: + real_input_num = total_input.shape[0] - G_FORWARD_PADDING_SIZE + # [N1, E/x] -> [N, E/x] + total_input = total_input[:real_input_num, :] + # [N, E/x] @ [E/x, h/y] -> [N, h/y] (partial x) + matmul_res = torch.matmul(total_input, weight.npu().t()) + matmul_res = sync_gather_along_last_dim(matmul_res, TPYCollectiveComm) + return matmul_res + + @staticmethod + @custom_bwd + def backward(ctx, grad_output): + # activation_input shape: [N1/y, E/x] + # weight shape: [h/y, E/x] + activation_input, = ctx.saved_tensors + weight = ctx.weight + use_bias = ctx.use_bias + # [N, h] -> [N, h/y] + grad_output = grad_output.contiguous() + grad_output = _split_along_last_dim(grad_output, ctx.ag_comm_intf) + + global G_BACKWARD_PADDING_SIZE + # [N1/y, E/x]---AG(y)--->[N1, E/x] + activation_input = activation_input.contiguous() + gather_input_handle, gathered_tensors = async_gather_tensors( + local_rank_input=activation_input, ag_comm_intf=ctx.ag_comm_intf + ) + # [N, h/y] @ [E/x, H/y]--> [N, E/x] (partial y) + partial_grad_input = grad_output.matmul(weight).contiguous() + sb, h = partial_grad_input.shape + rs_size = ctx.ag_comm_intf.get_comm_group_world_size() + + remaining = sb - sb // rs_size * rs_size + G_BACKWARD_PADDING_SIZE = rs_size - remaining if remaining else 0 + + if G_BACKWARD_PADDING_SIZE != 0: + padding_tensor = torch.zeros(G_BACKWARD_PADDING_SIZE, h, dtype=partial_grad_input.dtype, + device=partial_grad_input.device) + # [N, E/x] --> [N1, E/x] + partial_grad_input = torch.cat((partial_grad_input, padding_tensor), dim=0) + partial_grad_input = partial_grad_input.contiguous() + # [N1, E/x] --> [N1/y, E/x] + rs_grad_input_handle, grad_input = async_reduce_scatter_along_first_dim( + partial_grad_input, comm_intf=ctx.ag_comm_intf + ) + + if gather_input_handle: + gather_input_handle.wait() + # [N1, E/x] + total_activation_input = gathered_tensors.contiguous() + if G_BACKWARD_PADDING_SIZE != 0: + real_input_num = total_activation_input.shape[0] - G_BACKWARD_PADDING_SIZE + # [N1, E/x] -> [N, E/x] + total_activation_input = total_activation_input[:real_input_num, :] + total_activation_input = total_activation_input.contiguous() + if ctx.gradient_accumulation_fusion: + import fused_weight_gradient_mlp_cuda + grad_output = grad_output.contiguous() + if weight.main_grad.dtype == torch.float32: + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp32( + total_activation_input, grad_output, weight.main_grad + ) + elif weight.main_grad.dtype in (torch.float16, torch.bfloat16): + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp16( + total_activation_input, grad_output, weight.main_grad + ) + else: + raise RuntimeError("Unsupported gradient type for gradient accumulation fusion") + + if hasattr(weight, 'grad_added_to_main_grad'): + # When overlap_grad_reduce is True, need to ensure that backward hooks + # are all run on the main backprop thread to prevent deadlocks. Setup + # dummy grad_weight tensor to prevent backward hooks from being run + # in a background thread. + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=activation_input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=activation_input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = None + else: + # [N, E/x] @ [E/x, h/y] --> [N, h/y] (partial x) + grad_weight = grad_output.t().matmul(total_activation_input) + grad_bias = grad_output.sum(dim=0) if use_bias else None + + if rs_grad_input_handle: + rs_grad_input_handle.wait() + return grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None, None, None diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/linear_2d_split_along_first_dim.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/linear_2d_split_along_first_dim.py new file mode 100644 index 000000000..f482c1076 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/linear_2d_split_along_first_dim.py @@ -0,0 +1,499 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch +import torch_npu +from torch import distributed as torch_dist +from torch.cuda.amp import custom_bwd +from torch.cuda.amp import custom_fwd +from megatron.training import get_args + +from mindspeed.core.tensor_parallel.comm_autograd_function import CollectiveCommIntf +from mindspeed.core.tensor_parallel.comm_group_api import OverlapCollectiveIntf +from mindspeed.core.tensor_parallel.comm_utils import async_gather_tensors +from mindspeed.core.tensor_parallel.comm_utils import async_reduce_scatter_along_first_dim +from mindspeed.core.tensor_parallel.comm_utils import sync_gather_along_first_dim +from mindspeed.core.tensor_parallel.comm_utils import sync_reduce_scatter_along_first_dim + + + +def get_comm_domain_rank(devid, ag_size, rs_size, para_type=0): # 在RS domain做agv2 + if para_type == 0: # TFTF + if ag_size == 2: # RS=8, [0 1 2 ... 7], [8 9 10 ... 15] + return str(10 + devid // rs_size), devid % rs_size + else: # RS=2, [0, 8], [1, 9] ... [7, 15] + return str(20 + devid % ag_size), devid // ag_size + else: # FTFT + if ag_size == 2: # RS=8, [0 2 4 ... 14], [1 3 5 ... 15] + return str(10 + devid % ag_size), devid // ag_size + else: # RS=2, [0 1], [2 3], [4 5]... + return str(20 + devid // rs_size), devid % rs_size + + +class Linear2DSplitAlongFirstDim(torch.autograd.Function): + """2D Linear out axe communication implementation.""" + + @staticmethod + @custom_fwd + def forward( + ctx, + activation_input, + weight, + bias, + ag_comm_intf: CollectiveCommIntf, + ag_overlap_comm_intf: OverlapCollectiveIntf, + rs_comm_intf: CollectiveCommIntf, + rs_overlap_comm_intf: OverlapCollectiveIntf, + enable_overlap_ag_with_matmul=False, + enable_overlap_matmul_with_rs=False, + gradient_accumulation_fusion=False, + enable_backward_overlap_ag_with_matmul=False, + partition_dim=0, + ): + """ + :param ctx: context to save some tensors or vars for backward use. + :param activation_input: with shape: [s/(x*cp), b, h/y] + :param weight: with shape: [h/y, E/x], E means the output size. + :param bias: bias parameter tensor. + :param ag_comm_intf: AllGather communication process group interface. + :param ag_overlap_comm_intf: AllGather communication overlap send and recv comm group + :param rs_comm_intf: ReduceScatter communication process group interface. + :param rs_overlap_comm_intf: ReduceScatter communication overlap send and recv comm group + :param enable_overlap_ag_with_matmul: enable overlap all-gather with matmul in forward + :param enable_overlap_matmul_with_rs: enable overlap matmul with reduce-scatter in forward + :param gradient_accumulation_fusion: enable gradient accumulation fusion + :param enable_backward_overlap_ag_with_matmul: enable overlap all-gather with matmul + :return: forward result tensor. + """ + ctx.save_for_backward(activation_input) + ctx.weight = weight + ctx.use_bias = bias is not None + ctx.rs_comm_intf = rs_comm_intf + ctx.ag_comm_intf = ag_comm_intf + ctx.ag_overlap_comm_intf = ag_overlap_comm_intf + ctx.rs_overlap_comm_intf = rs_overlap_comm_intf + ctx.gradient_accumulation_fusion = gradient_accumulation_fusion + ctx.enable_backward_overlap_ag_with_matmul = enable_backward_overlap_ag_with_matmul + + if enable_overlap_matmul_with_rs: + activation_input = activation_input.contiguous() + return Linear2DSplitAlongFirstDim._do_mm_overlap_reducescatter( + activation_input, weight.t(), bias, ag_comm_intf, rs_comm_intf + ) + + # first_linear forward: [s/cp, b, H/y] @ [H/y, e/x] -> [s/cp, b, e/x] + if enable_overlap_ag_with_matmul: + matmul_res, _ = Linear2DSplitAlongFirstDim._do_allgather_left_tensor_and_matmul_overlap( + ag_comm_intf, + ag_overlap_comm_intf, + part_left_tensor=activation_input, + full_right_tensor=weight.t(), + ) + + if bias is not None: + matmul_res += bias + elif get_args().coc_fused_kernel: + from mindspeed.ops.lcal_functional import coc_ops, TP2DConfig + inner_dim_is_ag = True + if partition_dim == 0: + inner_dim_is_ag = True + else: + inner_dim_is_ag = False + # [s/(x*cp), b, H/y] -> [s/cp, b, H/y] -> [s/(cp*y), b, H/x] + s, b, h = activation_input.shape + # Convert the tensor shapes to 2D for execution compatibility + activation_input = activation_input.view( + s * b, h + ) + res_shape_0 = s * ag_comm_intf.get_comm_group_world_size() // rs_comm_intf.get_comm_group_world_size() + res_shape_1 = weight.shape[0] + matmul_res = torch.empty(res_shape_0, res_shape_1, dtype=activation_input.dtype, device=torch.cuda.current_device()) + coc_ops.all_gather_matmul_reduce_scatter(activation_input, weight, matmul_res, + TP2DConfig( + ag_comm_intf.get_comm_group_world_size(), + rs_comm_intf.get_comm_group_world_size(), + inner_dim_is_ag), + bias=bias) + return matmul_res.view(-1, b, res_shape_1) + else: + # [s/(x*cp), b, H/y] -> [s/cp, b, H/y] + activation_input = activation_input.contiguous() + total_input = sync_gather_along_first_dim(activation_input, ag_comm_intf, buffer_name="mpu-sync-tp-2d") + # [s/cp, b, H/y] @ [H/y, e/x] -> [s/cp, b, e/x] + matmul_res = torch.matmul(total_input, weight.t()) + # [s/cp, b, E/x] -> [s/(y*cp), b, E/x] + matmul_res = matmul_res.contiguous() + matmul_res = sync_reduce_scatter_along_first_dim(matmul_res, rs_comm_intf) + return matmul_res + + + + @staticmethod + @custom_bwd + def backward(ctx, grad_output): + """Backward implementation of Linear2DSplitAlongFirstDim, the computation and communication + overlap: + + ----------------------------------------------------------------------------->time + | AG(grad_o, Y|X) + | AG(activation_input, X|Y) + | part_grad_act = MM(tot_grad_o, weight) + | RS(part_grad_act, X|Y) + | MM(tot_grad_o^T, tot_act_input) + + + :param ctx: context + :param grad_output: with shape: [s/cp, b, E/(xy)] + :return:grads of all the input para of forward function as a tuple + """ + # activation_input shape: [s/(x*cp), b, h/y] + # weight shape: [h/y, E/x] + activation_input, = ctx.saved_tensors + weight = ctx.weight + use_bias = ctx.use_bias + s, b, h = grad_output.shape + # first we prepare the total inputs needed to compute grad_input, grad_weight. + # [s/(y*cp), b, E/x]---AG(y)---> [s/cp, b, E/x] + # Use sync AG to avoid communication competition, for the bandwidth is shared for A3. + grad_output = grad_output.contiguous() + if ctx.enable_backward_overlap_ag_with_matmul and get_args().coc_fused_kernel: + from mindspeed.ops.lcal_functional import coc_ops, CoCConfig + # prepare total activation_input for computing grad weight. + # [s/(x*cp), b, h/y]---AG(X)--->[s/cp, b, h/y] + activation_input = activation_input.contiguous() + gather_input_handle, gathered_tensors = async_gather_tensors( + local_rank_input=activation_input, ag_comm_intf=ctx.ag_comm_intf + ) + + # Convert the tensor shapes to 2D for execution compatibility + grad_output = grad_output.view(s * b, h) + ag_size = ctx.ag_comm_intf.get_comm_group_world_size() + rs_size = ctx.rs_comm_intf.get_comm_group_world_size() + res_shape_0 = s * b * rs_size + + res_shape_1 = weight.shape[1] + partial_grad_input = torch.empty(res_shape_0, res_shape_1, dtype=grad_output.dtype, device=torch.cuda.current_device()) + + total_grad_output = torch.empty(res_shape_0, h, dtype=grad_output.dtype, device=torch.npu.current_device()) + comm_domain, coc_rank = get_comm_domain_rank(total_grad_output.device.index, ag_size, rs_size) + coc_ops.set_comm_config(CoCConfig(coc_rank, rs_size, comm_domain)) + coc_ops.all_gather_matmul_v2(input1=grad_output, input2=weight, output=partial_grad_input, comm_output=total_grad_output) + partial_grad_input = partial_grad_input.view(-1, b, partial_grad_input.shape[1]) + else: + total_grad_output = sync_gather_along_first_dim(grad_output, ctx.rs_comm_intf, buffer_name="mpu-sync-tp-2d") + # prepare total activation_input for computing grad weight. + # [s/(x*cp), b, h/y]---AG(X)--->[s/cp, b, h/y] + activation_input = activation_input.contiguous() + gather_input_handle, gathered_tensors = async_gather_tensors( + local_rank_input=activation_input, ag_comm_intf=ctx.ag_comm_intf + ) + + # [s/cp, b, E/x] @ [E/x, H/y]--> [s/cp, b, H/y] (partial sum) + partial_grad_input = total_grad_output.matmul(weight).contiguous() + + # Convert the tensor shapes to 2D for execution compatibility + sb = total_grad_output.shape[0] * total_grad_output.shape[1] + # [s/cp, b, E/x]--view--> [sb/cp, E/x] + total_grad_output = total_grad_output.view(sb, total_grad_output.shape[2]) + + # [s/cp, b, H/y] (partial sum)---RS(X)--->[s/cp, b, H/(xy)] (full sum) + rs_grad_input_handle, grad_input = async_reduce_scatter_along_first_dim( + partial_grad_input, comm_intf=ctx.ag_comm_intf + ) + + if gather_input_handle: + gather_input_handle.wait() + + # [s/(x*cp), b, h/y]---AG(X)--->[s/cp, b, h/y] + total_activation_input = gathered_tensors + # [s/cp, b, h/y]--view--> [sb/cp, h/y] + total_activation_input = total_activation_input.view(-1, total_activation_input.shape[2]) + if ctx.gradient_accumulation_fusion: + import fused_weight_gradient_mlp_cuda + total_grad_output = total_grad_output.contiguous() + if weight.main_grad.dtype == torch.float32: + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp32( + total_activation_input, total_grad_output, weight.main_grad + ) + elif weight.main_grad.dtype in (torch.float16, torch.bfloat16): + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp16( + total_activation_input, total_grad_output, weight.main_grad + ) + else: + raise RuntimeError("Unsupported gradient type for gradient accumulation fusion") + + if hasattr(weight, 'grad_added_to_main_grad'): + # When overlap_grad_reduce is True, need to ensure that backward hooks + # are all run on the main backprop thread to prevent deadlocks. Setup + # dummy grad_weight tensor to prevent backward hooks from being run + # in a background thread. + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=activation_input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=activation_input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = None + else: + # [E/x, sb/cp] @ [sb/cp, h/y] ---> [E/x, h/y] + grad_weight = total_grad_output.t().matmul(total_activation_input) + grad_bias = total_grad_output.sum(dim=0) if use_bias else None + + if rs_grad_input_handle: + rs_grad_input_handle.wait() + return grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None, None, None + + @staticmethod + def _do_allgather_left_tensor_and_matmul_overlap( + ag_comm_intf, ag_overlap_comm_intf, part_left_tensor, full_right_tensor, return_ag_res=False + ): + cur_ag_rank = ag_comm_intf.get_comm_rank() + ag_world_sz = ag_comm_intf.get_comm_group_world_size() + + # do tp-x times matmul and reduce the partial res. + matmul_res = [None] * ag_world_sz + cur_step_rcv_handle = None + ring_ag_ranks = ag_overlap_comm_intf.get_ring_global_ranks() + next_rank = ring_ag_ranks[(cur_ag_rank + ag_world_sz - 1) % ag_world_sz] + prev_rank = ring_ag_ranks[(cur_ag_rank + 1) % ag_world_sz] + ag_comm_group = ag_comm_intf.get_comm_group() + ag_overlap_comm_group = ag_overlap_comm_intf.get_comm_group() + cur_step_tensor_to_send = part_left_tensor + + # 下一次要计算的数据(本次要从上一个 rank 接收的 tensor。) + cur_step_rcv_input = torch.empty_like(part_left_tensor) + all_ag_res = None + if return_ag_res: + all_ag_res = [None] * ag_world_sz + all_ag_res[cur_ag_rank] = part_left_tensor + + # first_linear forward: [H/y, e/x] -> [H/(xy), e/x] + for step in range(ag_world_sz): + if step < ag_world_sz - 1 and cur_ag_rank % 2 == 0: # 偶数 rank 先发再收 + torch_dist.isend(cur_step_tensor_to_send, next_rank, ag_comm_group) + cur_step_rcv_handle = torch_dist.irecv( + cur_step_rcv_input, prev_rank, ag_overlap_comm_group + ) + elif step < ag_world_sz - 1 and cur_ag_rank % 2 == 1: # 奇数 rank 先收再发 + cur_step_rcv_handle = torch_dist.irecv(cur_step_rcv_input, prev_rank, ag_comm_group) + torch_dist.isend(cur_step_tensor_to_send, next_rank, ag_overlap_comm_group) + + # compute: part_left_tensor @ split_right(split by inner dim) + # [e/x, h/(xy)] + cur_tensor_idx = (step + cur_ag_rank) % ag_world_sz + if return_ag_res and step > 0: + all_ag_res[cur_tensor_idx] = cur_step_tensor_to_send.clone() + + # first linear forward: [s/(x*cp), b, H/y] @ [H/y, e/x] -> [s/(x*cp), b, e/x] + cur_step_matmul_res = torch.matmul(cur_step_tensor_to_send, full_right_tensor) + matmul_res[cur_tensor_idx] = cur_step_matmul_res + + if step < ag_world_sz - 1: + cur_step_rcv_handle.wait() + cur_step_tensor_to_send = cur_step_rcv_input.clone() + + final_matmul_res = torch.cat(matmul_res) + + return final_matmul_res, all_ag_res + + @staticmethod + def _do_mm_overlap_reducescatter(activation_input, weight, bias, ag_comm_intf, rs_comm_intf): + # [s/(x*cp), b, H/y] -> [s/cp, b, H/y] + activation_input = activation_input.contiguous() + total_input = sync_gather_along_first_dim(activation_input, ag_comm_intf, buffer_name="mpu-sync-tp-2d") + # [s/cp, b, H/y] @ [H/y, e/x] -> [s/cp, b, e/x] + chunk_num = rs_comm_intf.get_comm_group_world_size() + rs_chunks = [] + rs_handle_and_tmp_tensors = [] + # convert tuple to list to free used tensors ahead. + seq_len, b, h = total_input.size() + chunk_size = seq_len // chunk_num + input_chunks = torch.reshape(total_input.view(chunk_size, -1, h).transpose(0, 1), (chunk_num, -1, h)) + rs_res = torch.empty((chunk_size, b, weight.size(1)), dtype=weight.dtype, device=weight.device) + for idx in range(chunk_num): + input_chunk = input_chunks[idx].reshape(chunk_size, -1, h) + # [s/(cp*y), b, H/y] @ [H/y, e/x] -> [s/(cp*y), b, e/x] + chunk_matmul_res = torch.matmul(input_chunk, weight).contiguous() + if bias is not None: + chunk_matmul_res += bias + + # [s/(cp*y), b, e/x]--rs--> [s/(cp*y*y), b, e/x] + rs_handle, rs_chunk = async_reduce_scatter_along_first_dim( + chunk_matmul_res, rs_comm_intf + ) + rs_chunks.append(rs_chunk) + rs_handle_and_tmp_tensors.append((idx, rs_handle, chunk_matmul_res)) + + offset = 0 + sub_chunk_size = chunk_size // chunk_num + for idx, rs_handle, chunk_matmul_res_tensor in rs_handle_and_tmp_tensors: + if rs_handle: + rs_handle.wait() + chunk_matmul_res_tensor.untyped_storage().resize_(0) + rs_res[offset:offset + sub_chunk_size] = rs_chunks[idx] + offset += sub_chunk_size + + # [s / (cp * y * y), b, e / x] -> [s/(cp*y), b, e/x] + final_res = torch.reshape(rs_res.view(chunk_num, -1, weight.size(1)).transpose(0, 1), (chunk_size, -1, weight.size(1))) + return final_res + + @staticmethod + def _backward_ag_overlap_with_mm(ctx, grad_output): + """Backward implementation of Linear2DSplitAlongFirstDim, the computation and communication + overlap: + + ----------------------------------------------------------------------------->time + | send(grad_o-0, Y|X) + | recive(grad_o-1, Y|X) + | part_grad_act = MM(tot_grad_o-0, weight) + | part_grad_act = MM2(tot_grad_o-1, weight) + | RS(part_grad_act, X|Y) + | MM(tot_grad_o^T, tot_act_input) + + + :param ctx: context + :param grad_output: with shape: [s/cp, b, E/(xy)] + :return:grads of all the input para of forward function as a tuple + """ + # activation_input shape: [s/(x*cp), b, h/y] + # weight shape: [h/y, E/x] + activation_input, = ctx.saved_tensors + weight = ctx.weight + use_bias = ctx.use_bias + # first we prepare the total inputs needed to compute grad_input, grad_weight. + # [s/(y*cp), b, E/x]---AG(y)---> [s/cp, b, E/x] + # Use sync AG to avoid communication competition, for the bandwidth is shared for A3. + rs_comm_intf = ctx.rs_comm_intf + rs_overlap_comm_intf = ctx.rs_overlap_comm_intf + grad_output = grad_output.contiguous() + cur_rs_rank = ctx.rs_comm_intf.get_comm_rank() + rs_world_sz = ctx.rs_comm_intf.get_comm_group_world_size() + # do tp-x times matmul and reduce the partial res. + matmul_res = [None] * rs_world_sz + cur_step_rcv_handle = None + ring_rs_ranks = rs_overlap_comm_intf.get_ring_global_ranks() + next_rank = ring_rs_ranks[(cur_rs_rank + rs_world_sz - 1) % rs_world_sz] + prev_rank = ring_rs_ranks[(cur_rs_rank + 1) % rs_world_sz] + rs_comm_group = rs_comm_intf.get_comm_group() + rs_overlap_comm_group = rs_overlap_comm_intf.get_comm_group() + cur_step_tensor_to_send = grad_output + # 下一次要计算的数据(本次要从上一个 rank 接收的 tensor。) + cur_step_rcv_input = torch.empty_like(grad_output) + # first_linear forward: [H/y, e/x] -> [H/(xy), e/x] + # collect total_grad_output + grad_output_list = [None] * rs_world_sz + grad_output_list[cur_rs_rank] = grad_output + gather_input_handle, gathered_tensors = None, None + for step in range(rs_world_sz): + if step < rs_world_sz - 1 and cur_rs_rank % 2 == 0: # 偶数 rank 先发再收 + torch_dist.isend(cur_step_tensor_to_send, next_rank, rs_comm_group) + cur_step_rcv_handle = torch_dist.irecv( + cur_step_rcv_input, prev_rank, rs_overlap_comm_group + ) + elif step < rs_world_sz - 1 and cur_rs_rank % 2 == 1: # 奇数 rank 先收再发 + cur_step_rcv_handle = torch_dist.irecv(cur_step_rcv_input, prev_rank, rs_comm_group) + torch_dist.isend(cur_step_tensor_to_send, next_rank, rs_overlap_comm_group) + + # compute: grad_output @ split_right(split by inner dim) + # [e/x, h/(xy)] + cur_tensor_idx = (step + cur_rs_rank) % rs_world_sz + + # first linear forward: [s/(x*cp), b, H/y] @ [H/y, e/x] -> [s/(x*cp), b, e/x] + cur_step_matmul_res = torch.matmul(cur_step_tensor_to_send, weight) + matmul_res[cur_tensor_idx] = cur_step_matmul_res + if step > 0: + grad_output_list[cur_tensor_idx] = cur_step_tensor_to_send.clone() + if step < rs_world_sz - 1: + cur_step_rcv_handle.wait() + cur_step_tensor_to_send = cur_step_rcv_input.clone() + if step == 0: + # prepare total activation_input for computing grad weight. + # [s/(x*cp), b, h/y]---AG(X)--->[s/cp, b, h/y] + activation_input = activation_input.contiguous() + gather_input_handle, gathered_tensors = async_gather_tensors( + local_rank_input=activation_input, ag_comm_intf=ctx.ag_comm_intf + ) + + partial_grad_input = torch.cat(matmul_res) + # [s/cp, b, H/y] (partial sum)---RS(X)--->[s/cp, b, H/(xy)] (full sum) + rs_grad_input_handle, grad_input = async_reduce_scatter_along_first_dim( + partial_grad_input, comm_intf=ctx.ag_comm_intf + ) + + total_grad_output = torch.cat(grad_output_list, dim=0) + + # Convert the tensor shapes to 2D for execution compatibility + sb = total_grad_output.shape[0] * total_grad_output.shape[1] + # [s/cp, b, E/x]--view--> [sb/cp, E/x] + total_grad_output = total_grad_output.view(sb, total_grad_output.shape[2]) + + if gather_input_handle: + gather_input_handle.wait() + + # [s/(x*cp), b, h/y]---AG(X)--->[s/cp, b, h/y] + total_activation_input = gathered_tensors + # [s/cp, b, h/y]--view--> [sb/cp, h/y] + total_activation_input = total_activation_input.view(sb, total_activation_input.shape[2]) + if ctx.gradient_accumulation_fusion: + import fused_weight_gradient_mlp_cuda + total_grad_output = total_grad_output.contiguous() + if weight.main_grad.dtype == torch.float32: + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp32( + total_activation_input, total_grad_output, weight.main_grad + ) + elif weight.main_grad.dtype in (torch.float16, torch.bfloat16): + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp16( + total_activation_input, total_grad_output, weight.main_grad + ) + else: + raise RuntimeError("Unsupported gradient type for gradient accumulation fusion") + + if hasattr(weight, 'grad_added_to_main_grad'): + # When overlap_grad_reduce is True, need to ensure that backward hooks + # are all run on the main backprop thread to prevent deadlocks. Setup + # dummy grad_weight tensor to prevent backward hooks from being run + # in a background thread. + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=activation_input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=activation_input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = None + else: + # [E/x, sb/cp] @ [sb/cp, h/y] ---> [E/x, h/y] + grad_weight = total_grad_output.t().matmul(total_activation_input) + grad_bias = total_grad_output.sum(dim=0) if use_bias else None + + if rs_grad_input_handle: + rs_grad_input_handle.wait() + return grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None, None diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/norm_factory.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/norm_factory.py new file mode 100644 index 000000000..1544371e4 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/norm_factory.py @@ -0,0 +1,73 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from functools import wraps +from typing import List + +import torch +import torch.distributed as dist +from torch._utils import _flatten_dense_tensors +from torch._utils import _unflatten_dense_tensors + +from megatron.core.transformer import TransformerConfig +from megatron.core.utils import get_attr_wrapped_model +from megatron.training import get_args +from mindspeed.core.tensor_parallel.comm_group_api import TPXCollectiveComm +from mindspeed.core.tensor_parallel.comm_group_api import TPYCollectiveComm +from mindspeed.core.tensor_parallel.tp_2d.layernorm_2d import LayerNorm2D +from mindspeed.core.tensor_parallel.tp_2d.rms_norm_2d import RMSNorm2D + + +def _allreduce_layernorm_grads_wrapper(function): + @wraps(function) + def wrapper(model: List[torch.nn.Module], config: TransformerConfig): + function(model, config) + layer_norm_2d_grads = [] + for model_chunk in model: + for name, param in get_attr_wrapped_model(model_chunk, "named_parameters")(): + if param.requires_grad and getattr(param, "2d_tp", False): + layer_norm_2d_grad = param.main_grad + layer_norm_2d_grads.append(layer_norm_2d_grad.data) + + if layer_norm_2d_grads: + coalesced = _flatten_dense_tensors(layer_norm_2d_grads) + torch.distributed.all_reduce(coalesced, group=TPXCollectiveComm.get_comm_group()) + for buf, synced in zip( + layer_norm_2d_grads, _unflatten_dense_tensors(coalesced, layer_norm_2d_grads) + ): + buf.copy_(synced) + + return wrapper + + +def get_norm_tp_2d(config): + args = get_args() + if args.normalization == "LayerNorm": + return LayerNorm2D( + config.hidden_size, + eps=config.layernorm_epsilon, + last_dim_split_comm_intf=TPYCollectiveComm(), + ) + elif args.normalization == "RMSNorm": + if args.apply_layernorm_1p: + raise NotImplementedError( + "RMSNorm does not currently support the layernorm_1p formulation." + ) + return RMSNorm2D( + config.hidden_size, + eps=config.layernorm_epsilon, + last_dim_split_comm_intf=TPYCollectiveComm(), + ) + else: + raise Exception(f"unsupported norm type '{args.normalization}'.") diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/parallel_linear_2d.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/parallel_linear_2d.py new file mode 100644 index 000000000..b6ff0293f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/parallel_linear_2d.py @@ -0,0 +1,204 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from typing import Any, Callable + +import torch + +from megatron.core import ModelParallelConfig +from megatron.core.tensor_parallel.layers import _initialize_affine_weight_gpu +from megatron.core.utils import divide +from mindspeed.core.tensor_parallel.comm_group_api import CollectiveCommIntf +from mindspeed.core.tensor_parallel.comm_group_api import OverlapCollectiveIntf +from mindspeed.core.tensor_parallel.layers import _initialize_affine_weight_cpu_2d +from mindspeed.core.tensor_parallel.tp_2d.linear_2d_moe_split_along_first_dim import MoELinear2DFC1, MoELinear2DFC2 +from mindspeed.core.tensor_parallel.tp_2d.linear_2d_split_along_first_dim import Linear2DSplitAlongFirstDim + + +class ParallelLinear2D(torch.nn.Module): + """Linear2D layer with row and column parallelism. + + The linear layer is defined as Y = XA + b. A is parallelized along + its second dimension as A = [A_1, ..., A_p]. + + Arguments: + input_size: first dimension of matrix A. + output_size: second dimension of matrix A. + + Keyword Arguments + bias: If true, add bias + gather_output: If true, call all-gather on output and make Y available + to all GPUs, otherwise, every GPU will have its output + which is Y_i = XA_i + init_method: method to initialize weights. Note that bias is always set + to zero. + stride: For the strided linear layers. + keep_master_weight_for_test: This was added for testing and should be + set to False. It returns the master weights + used for initialization. + skip_bias_add: If True, do not add the bias term, instead + return it to be added by the caller. This + enables performance optimations where bias can + be fused with other elementwise operations. + skip_weight_param_allocation: If True, weight parameter is not allocated and must be passed + as a keyword argument `weight` during the forward pass. Note + that this does not affect bias, which will be allocated if + bias is True. Defaults to False. + is_expert: If True, the layer is treated as an MoE expert layer. + config: ModelParallelConfig object + tp_comm_buffer_name: Communication buffer name is not used in + non-Transformer-Engine modules. + partition_dim: divide with dim, column parallel set 0, row parallel set 1 + enable_backward_overlap_ag_with_matmul: enable overlap all-gather with matmul + + """ + + def __init__( + self, + input_size, + output_size, + *, + config: ModelParallelConfig, + init_method: Callable, + add_bias=True, + gather_output=False, + stride=1, + keep_master_weight_for_test=False, + skip_bias_add=True, + skip_weight_param_allocation: bool = False, + is_expert: bool = False, + ag_comm_intf: CollectiveCommIntf = None, + ag_sd_rcv_overlap_comm_intf: OverlapCollectiveIntf = None, + rs_comm_intf: CollectiveCommIntf = None, + rs_sd_rcv_overlap_comm_intf: OverlapCollectiveIntf = None, + enable_overlap_ag_with_matmul=False, + enable_overlap_matmul_with_rs=False, + partition_dim: int = 0, + enable_backward_overlap_ag_with_matmul=False, + ): + super().__init__() + self.mp_config: ModelParallelConfig = config + self.para_init_method = init_method + self.stride = stride + self.keep_master_weight_for_test = keep_master_weight_for_test + self.add_bias = add_bias + self.input_size = input_size + self.output_size = output_size + self.ag_comm_intf = ag_comm_intf + self.rs_comm_intf = rs_comm_intf + self.ag_comm_world_sz = ag_comm_intf.get_comm_group_world_size() + self.rs_comm_world_sz = rs_comm_intf.get_comm_group_world_size() + # when AG comm group is small, do overlap AG with matmul. + self.enable_overlap_ag_with_matmul = enable_overlap_ag_with_matmul + self.enable_overlap_matmul_with_rs = enable_overlap_matmul_with_rs + self.ag_overlap_comm_intf = ag_sd_rcv_overlap_comm_intf + self.rs_sd_rcv_overlap_comm_intf = rs_sd_rcv_overlap_comm_intf + + if input_size % self.rs_comm_world_sz: + raise AssertionError("input size should be divisible by tp-y") + if output_size % self.ag_comm_world_sz: + raise AssertionError("output size should be divisible by tp-x") + + self.input_size_per_partition = divide(input_size, self.rs_comm_world_sz) + self.output_size_per_partition = divide(output_size, self.ag_comm_world_sz) + self.skip_bias_add = skip_bias_add + self.is_expert = is_expert + self.expert_parallel = config.expert_model_parallel_size > 1 + self.gradient_accumulation_fusion = config.gradient_accumulation_fusion + self.enable_backward_overlap_ag_with_matmul = enable_backward_overlap_ag_with_matmul + if config.sequence_parallel: + raise RuntimeError( + "Nd_matmul cannot be used with sequence_parallel." + "If you want to train long sequences, " + "you can use ulysess or context_parallel that is compatible with nd_matmul." + ) + self.partition_dim = partition_dim + self.init_linear_weights() + + def init_linear_weights(self): + init_with_cpu = self.mp_config.use_cpu_initialization + device = None if init_with_cpu else torch.cuda.current_device() + + self.weight = torch.nn.Parameter( + torch.empty( + self.output_size_per_partition, + self.input_size_per_partition, + device=device, + dtype=self.mp_config.params_dtype, + ) + ) + if self.add_bias: + self.bias = torch.nn.Parameter( + torch.empty(self.output_size_per_partition, dtype=self.mp_config.params_dtype, device=device) + ) + else: + self.register_parameter("bias", None) + + if init_with_cpu and self.mp_config.perform_initialization: + _initialize_affine_weight_cpu_2d(self.weight, self.partition_dim, stride=self.stride, + return_master_weight=self.keep_master_weight_for_test, + config=self.mp_config) + elif self.mp_config.perform_initialization: + _initialize_affine_weight_gpu( + self.weight, + self.para_init_method, + partition_dim=self.partition_dim, + stride=self.stride, + expert_parallel=(self.is_expert and self.expert_parallel), + ) + + setattr(self.weight, "allreduce", not (self.is_expert and self.expert_parallel)) + + if self.add_bias and self.mp_config.perform_initialization: + with torch.no_grad(): + self.bias.zero_() + + setattr(self.bias, "allreduce", not (self.is_expert and self.expert_parallel)) + setattr(self.bias, "sequence_parallel", False) + + def set_extra_state(self, state: Any): + """ Extra state is ignored """ + + def get_extra_state(self) -> None: + """ Keep compatibility with TE state dict. """ + return None + + def forward(self, activation_input): + if self.is_expert: + if self.partition_dim == 0: + linear_func = MoELinear2DFC1 + else: + linear_func = MoELinear2DFC2 + else: + linear_func = Linear2DSplitAlongFirstDim + matmul_output = linear_func.apply( + activation_input, + self.weight, + self.bias, + self.ag_comm_intf, + self.ag_overlap_comm_intf, + self.rs_comm_intf, + self.rs_sd_rcv_overlap_comm_intf, + self.enable_overlap_ag_with_matmul, + self.enable_overlap_matmul_with_rs, + self.gradient_accumulation_fusion, + self.enable_backward_overlap_ag_with_matmul, + self.partition_dim, + ) + + if not self.skip_bias_add: + output = (matmul_output + self.bias) if self.bias is not None else matmul_output + output_bias = None + else: + output = matmul_output + output_bias = self.bias + + return output, output_bias diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/rms_norm_2d.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/rms_norm_2d.py new file mode 100644 index 000000000..77e0868e7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/tp_2d/rms_norm_2d.py @@ -0,0 +1,98 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from typing import Any +from typing import Tuple + +import torch +import torch.distributed as dist +from torch import Tensor +from torch import nn +from torch.cuda.amp import custom_bwd +from torch.cuda.amp import custom_fwd +from megatron.core.utils import divide +from mindspeed.core.tensor_parallel.comm_group_api import CollectiveCommIntf +from mindspeed.core.tensor_parallel.comm_group_api import TPYCollectiveComm + + +class RMSNorm2D(torch.nn.Module): + + def __init__(self, + hidden_size: int, + eps: float = 1e-6, + last_dim_split_comm_intf: CollectiveCommIntf = TPYCollectiveComm()): + """RMS Normaliation 2d module + + Args: + hidden_size (int): The width of input, i.e. hidden size + eps (float): epsilon to use for the norm, default to 1e-6 + last_dim_split_comm_intf: All-reduce at last dim comm intf. + """ + super().__init__() + self.eps = eps + self.hidden_size = hidden_size + self.last_dim_split_comm_intf = last_dim_split_comm_intf + self.last_dim_split_comm_world_sz = self.last_dim_split_comm_intf.get_comm_group_world_size() + # partitioning dimension + self.partitioned_dim = divide(hidden_size, self.last_dim_split_comm_world_sz) + self.weight = nn.Parameter(torch.ones(self.partitioned_dim)) + + setattr(self.weight, "2d_tp", True) + + def forward(self, x): + return _ParallelRMSNorm2D.apply( + x, + self.weight, + self.eps, + self.hidden_size, + self.last_dim_split_comm_intf, + ) + + +class _ParallelRMSNorm2D(torch.autograd.Function): + @staticmethod + @custom_fwd + def forward( + ctx: Any, + input_: Tensor, + weight, + epsilon, + hidden_size: int, + last_dim_split_comm_intf: CollectiveCommIntf, + ) -> Tensor: + # input_ inner: [s/cp, b, h/xy] + # input_ outer: [s/(cp*x), b, h/y] + ctx.last_dim_split_comm_intf = last_dim_split_comm_intf + ctx.hidden_size = hidden_size + pow_mean = input_.float().pow(2).mean(-1, keepdim=True) + torch.distributed.all_reduce(pow_mean, group=last_dim_split_comm_intf.get_comm_group()) + var_x = torch.rsqrt(pow_mean.div_(last_dim_split_comm_intf.get_comm_group_world_size()) + epsilon) + norm_x = torch.mul(input_, var_x).type_as(input_) + ctx.save_for_backward(norm_x, var_x, weight) + return norm_x * weight + + @staticmethod + @custom_bwd + def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: + x, var_x, weight = ctx.saved_tensors + grad_norm_x = torch.mul(output_grad, weight) + output_grad_mul_x_sum = torch.sum(grad_norm_x * x, dim=-1, keepdim=True) + handle_grad = torch.distributed.all_reduce( + output_grad_mul_x_sum, group=ctx.last_dim_split_comm_intf.get_comm_group(), async_op=True) + # calculate grad_weight + grad_weight = torch.mul(output_grad, x) + if handle_grad: + handle_grad.wait() + grad_input = (grad_norm_x - x * (output_grad_mul_x_sum / ctx.hidden_size)) * var_x + return grad_input, grad_weight, None, None, None diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/__init__.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/adaptor.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/adaptor.py new file mode 100644 index 000000000..ced826370 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/adaptor.py @@ -0,0 +1,143 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +import torch +from megatron.training import get_args +from megatron.core.parallel_state import get_tensor_model_parallel_group, get_tensor_and_expert_parallel_group +from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear, _initialize_affine_weight_cpu, \ + _initialize_affine_weight_gpu, set_tensor_model_parallel_attributes, _grad_accum_fusion_available, \ + linear_with_grad_accumulation_and_async_allreduce, linear_with_frozen_weight +from megatron.core.tensor_parallel.mappings import scatter_to_tensor_model_parallel_region, \ + reduce_from_tensor_model_parallel_region, gather_from_tensor_model_parallel_region, copy_to_tensor_model_parallel_region +from megatron.core.tensor_parallel.mappings import scatter_to_sequence_parallel_region as megatron_scatter_to_sequence_parallel_region +from megatron.core.tensor_parallel.mappings import gather_from_sequence_parallel_region as megatron_gather_from_sequence_parallel_region + +from .unaligned_column_parallel_linear import UnalignedColumnParallelLinear +from .unaligned_row_parallel_linear import UnalignedRowParallelLinear +from .unaligned_utils import unaligned_divide, unaligned_scatter_to_sequence_parallel_region, \ + unaligned_reduce_scatter_to_sequence_parallel_region, unaligned_gather_from_sequence_parallel_region + + +class UnalignedColumnParallelLinearAdaptor(UnalignedColumnParallelLinear, ColumnParallelLinear): + def __init__(self, *args, **kwargs): + config = kwargs['config'] + explicit_expert_comm = config.tensor_model_parallel_size > 1 or config.expert_model_parallel_size > 1 + if 'is_expert' not in kwargs: + kwargs['is_expert'] = False + if 'tp_comm_buffer_name' not in kwargs: + kwargs['tp_comm_buffer_name'] = None + + if kwargs['is_expert'] and explicit_expert_comm and config.moe_extended_tp: + kwargs['parallel_group'] = get_tensor_and_expert_parallel_group() + else: + kwargs['parallel_group'] = get_tensor_model_parallel_group() + + if kwargs['tp_comm_buffer_name'] == 'qkv': + kwargs['fusion_number'] = (config.hidden_size + 2 * config.kv_channels * config.num_query_groups) // config.num_query_groups + else: + kwargs['fusion_number'] = 1 + + if not config.variable_seq_lengths: + kwargs['seq_length'] = get_args().seq_length + + kwargs['_initialize_affine_weight_cpu'] = _initialize_affine_weight_cpu + kwargs['_initialize_affine_weight_gpu'] = _initialize_affine_weight_gpu + kwargs['set_tensor_model_parallel_attributes'] = set_tensor_model_parallel_attributes + kwargs['linear_with_grad_accumulation_and_async_allreduce'] = linear_with_grad_accumulation_and_async_allreduce + kwargs['gather_from_tensor_model_parallel_region'] = gather_from_tensor_model_parallel_region + kwargs['copy_to_tensor_model_parallel_region'] = copy_to_tensor_model_parallel_region + kwargs['linear_with_frozen_weight'] = linear_with_frozen_weight + super(UnalignedColumnParallelLinearAdaptor, self).__init__(*args, **kwargs) + + +class UnalignedRowParallelLinearAdaptor(UnalignedRowParallelLinear, RowParallelLinear): + def __init__(self, *args, **kwargs): + config = kwargs['config'] + explicit_expert_comm = config.tensor_model_parallel_size > 1 or config.expert_model_parallel_size > 1 + if 'is_expert' not in kwargs: + kwargs['is_expert'] = False + if 'tp_comm_buffer_name' not in kwargs: + kwargs['tp_comm_buffer_name'] = None + + if kwargs['is_expert'] and explicit_expert_comm and config.moe_extended_tp: + kwargs['parallel_group'] = get_tensor_and_expert_parallel_group() + else: + kwargs['parallel_group'] = get_tensor_model_parallel_group() + + if kwargs['tp_comm_buffer_name'] is not None and not kwargs['tp_comm_buffer_name'].startswith('fc'): # attention.linear_proj + kwargs['fusion_number'] = config.hidden_size // config.num_query_groups + else: + kwargs['fusion_number'] = 1 + + if not config.variable_seq_lengths: + kwargs['seq_length'] = get_args().seq_length + + kwargs['_initialize_affine_weight_cpu'] = _initialize_affine_weight_cpu + kwargs['_initialize_affine_weight_gpu'] = _initialize_affine_weight_gpu + kwargs['linear_with_grad_accumulation_and_async_allreduce'] = linear_with_grad_accumulation_and_async_allreduce + kwargs['scatter_to_tensor_model_parallel_region'] = scatter_to_tensor_model_parallel_region + kwargs['linear_with_frozen_weight'] = linear_with_frozen_weight + kwargs['reduce_from_tensor_model_parallel_region'] = reduce_from_tensor_model_parallel_region + super(UnalignedRowParallelLinearAdaptor, self).__init__(*args, **kwargs) + + +def divide_adaptor(numerator, denominator): + if numerator % denominator != 0: + rank = torch.distributed.get_rank(group=get_tensor_model_parallel_group()) + return unaligned_divide(numerator, denominator, rank) + return numerator // denominator + + +def scatter_to_sequence_parallel_region_adaptor(embeddings): + world_size = torch.distributed.get_world_size(group=get_tensor_model_parallel_group()) + if embeddings.size()[0] % world_size != 0: + return unaligned_scatter_to_sequence_parallel_region(embeddings, get_tensor_model_parallel_group()) + else: + return megatron_scatter_to_sequence_parallel_region(embeddings) + + +def reduce_scatter_to_sequence_parallel_region_adaptor(inputs): + group = get_tensor_model_parallel_group() + return unaligned_reduce_scatter_to_sequence_parallel_region(inputs, group) + + +def gather_from_sequence_parallel_region_adaptor(inputs, tensor_parallel_output_grad=True): + world_size = torch.distributed.get_world_size(group=get_tensor_model_parallel_group()) + + dim_size = torch.tensor(inputs.shape[0], dtype=torch.long, device=inputs.device) + torch.distributed.all_reduce(dim_size) + total_dim_size = dim_size.item() + + group = get_tensor_model_parallel_group() + if total_dim_size % world_size != 0: + return unaligned_gather_from_sequence_parallel_region(inputs, group, tensor_parallel_output_grad) + else: + return megatron_gather_from_sequence_parallel_region(inputs, tensor_parallel_output_grad) + + +def get_rotary_seq_len( + self, + inference_params, + transformer, + transformer_input, + transformer_config, +) -> float: + if inference_params is not None: + rotary_seq_len = inference_params.max_sequence_length + else: + if transformer.input_tensor is not None: + rotary_seq_len = transformer.input_tensor.size(0) + else: + rotary_seq_len = transformer_input.size(0) + + if transformer_config.sequence_parallel: + if not transformer_config.variable_seq_lengths: + rotary_seq_len = get_args().seq_length + else: + rotary_seq_len = torch.Tensor([rotary_seq_len]).cuda().int() + torch.distributed.all_reduce(rotary_seq_len, op=torch.distributed.ReduceOp.SUM, group=get_tensor_model_parallel_group()) + rotary_seq_len = rotary_seq_len.item() + + rotary_seq_len *= transformer_config.context_parallel_size + + return rotary_seq_len + diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/unaligned_column_parallel_linear.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/unaligned_column_parallel_linear.py new file mode 100644 index 000000000..d7438c2cf --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/unaligned_column_parallel_linear.py @@ -0,0 +1,250 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +import warnings +from typing import Callable, Optional, List + +import torch +from torch.nn import Parameter + +from mindspeed.core.tensor_parallel.unaligned_layers.unaligned_utils import unaligned_divide, \ + unaligned_linear_with_grad_accumulation_and_async_allreduce + + +class UnalignedColumnParallelLinear(torch.nn.Module): + + def __init__( + self, + input_size, + output_size, + *, + config, + init_method: Callable, + bias=True, + gather_output=False, + stride=1, + keep_master_weight_for_test=False, + skip_bias_add=False, + skip_weight_param_allocation: bool = False, + embedding_activation_buffer: Optional[List[torch.Tensor]] = None, + grad_output_buffer: Optional[List[torch.Tensor]] = None, + is_expert: bool = False, + tp_comm_buffer_name: str = None, # Not used + disable_grad_reduce: bool = False, + + # unaligned parallel arguments + parallel_group: Optional[torch.distributed.ProcessGroup] = None, + fusion_number: int = 1, # the number of linear fused + seq_length: int = None, + _initialize_affine_weight_cpu: Callable = None, + _initialize_affine_weight_gpu: Callable = None, + set_tensor_model_parallel_attributes: Callable = None, + linear_with_grad_accumulation_and_async_allreduce=None, + copy_to_tensor_model_parallel_region=None, + linear_with_frozen_weight=None, + gather_from_tensor_model_parallel_region=None + ): + torch.nn.Module.__init__(self) + # Keep input parameters + self.input_size = input_size + self.output_size = output_size + self.gather_output = gather_output + # Divide the weight matrix along the last dimension. + self.skip_bias_add = skip_bias_add + self.is_expert = is_expert + self.expert_parallel = config.expert_model_parallel_size > 1 + self.embedding_activation_buffer = embedding_activation_buffer + self.grad_output_buffer = grad_output_buffer + self.config = config + self.disable_grad_reduce = disable_grad_reduce + + self.explicit_expert_comm = self.is_expert and ( + config.tensor_model_parallel_size > 1 or self.expert_parallel + ) + + world_size = torch.distributed.get_world_size(group=parallel_group) + rank = torch.distributed.get_rank(group=parallel_group) + + if self.output_size % fusion_number != 0: + raise AssertionError('output_size({}) must be divisible by fusion number({})'.format(self.output_size, fusion_number)) + if fusion_number != 1: + self.output_size_per_partition = unaligned_divide(config.num_query_groups, world_size, rank) + self.output_size_per_partition *= fusion_number + else: + self.output_size_per_partition = unaligned_divide(self.output_size, world_size, rank) + + # Parameters. + # Note: torch.nn.functional.linear performs XA^T + b and as a result + # we allocate the transpose. + # Initialize weight. + if not skip_weight_param_allocation: + if config.use_cpu_initialization: + self.weight = Parameter( + torch.empty( + self.output_size_per_partition, self.input_size, dtype=config.params_dtype + ) + ) + if config.perform_initialization: + self.master_weight = _initialize_affine_weight_cpu( + self.weight, + self.output_size, + self.input_size, + self.output_size_per_partition, + 0, + init_method, + stride=stride, + return_master_weight=keep_master_weight_for_test, + rank=rank, + world_size=world_size, + ) + else: + self.weight = Parameter( + torch.empty( + self.output_size_per_partition, + self.input_size, + device=torch.cuda.current_device(), + dtype=config.params_dtype, + ) + ) + if config.perform_initialization: + _initialize_affine_weight_gpu( + self.weight, + init_method, + partition_dim=0, + stride=stride, + expert_parallel=(self.is_expert and self.expert_parallel), + ) + + setattr(self.weight, 'allreduce', not (self.is_expert and self.expert_parallel)) + else: + self.weight = None + + if bias: + if config.use_cpu_initialization: + self.bias = Parameter( + torch.empty(self.output_size_per_partition, dtype=config.params_dtype) + ) + else: + self.bias = Parameter( + torch.empty( + self.output_size_per_partition, + device=torch.cuda.current_device(), + dtype=config.params_dtype, + ) + ) + set_tensor_model_parallel_attributes(self.bias, True, 0, stride) + if config.perform_initialization: + # Always initialize bias to zero. + with torch.no_grad(): + self.bias.zero_() + setattr(self.bias, 'allreduce', not (self.is_expert and self.expert_parallel)) + else: + self.register_parameter('bias', None) + + self.sequence_parallel = config.sequence_parallel + if self.sequence_parallel and world_size <= 1: + warnings.warn( + f"`sequence_parallel` is set to `True`, but tensor model parallel size is {world_size}. " + f"Disabling sequence parallel." + ) + self.sequence_parallel = False + + self.allreduce_dgrad = world_size > 1 and not self.sequence_parallel + self.gradient_accumulation_fusion = config.gradient_accumulation_fusion + + if self.allreduce_dgrad and self.sequence_parallel: + raise RuntimeError( + "`allreduce_dgrad` and `sequence_parallel` cannot be enabled at the same time." + ) + + self._forward_impl = linear_with_grad_accumulation_and_async_allreduce + + # Hook adding a default empty _extra_state for state dict + self._register_load_state_dict_pre_hook( + lambda state_dict, prefix, *args, **kwargs: state_dict.setdefault( + f'{prefix}_extra_state' + ) + ) + + self.seq_length = seq_length + self.copy_to_tensor_model_parallel_region = copy_to_tensor_model_parallel_region + self.linear_with_frozen_weight = linear_with_frozen_weight + self.parallel_group = parallel_group + self.gather_from_tensor_model_parallel_region = gather_from_tensor_model_parallel_region + + def forward(self, input_: torch.Tensor, weight: Optional[torch.Tensor] = None): + if weight is None: + if self.weight is None: + raise RuntimeError( + "weight was not supplied to ColumnParallelLinear forward pass " + "and skip_weight_param_allocation is True." + ) + weight = self.weight + else: + # Check the weight passed in is the correct shape + expected_shape = (self.output_size_per_partition, self.input_size) + if weight.shape != expected_shape: + raise RuntimeError( + f"supplied weight's shape is {tuple(weight.shape)}, " + f"not {expected_shape} as expected" + ) + + if self.config._cpu_offloading_context is not None: + if self.config._cpu_offloading_context.inside_context == True: + assert ( + self.config.cpu_offloading == False + ), "CPU Offloading cannot be enabled while using non-TE modules" + + bias = self.bias if not self.skip_bias_add else None + + if ( + self.allreduce_dgrad + or self.sequence_parallel + or self.explicit_expert_comm + or self.disable_grad_reduce + ): + input_parallel = input_ + else: + input_parallel = self.copy_to_tensor_model_parallel_region(input_) + + if self.config.defer_embedding_wgrad_compute: + self.embedding_activation_buffer.append(input_parallel) + + allreduce_dgrad = False if self.explicit_expert_comm else self.allreduce_dgrad + # Matrix multiply. + if not weight.requires_grad: + self._forward_impl = self.linear_with_frozen_weight + output_parallel = self._forward_impl( + input=input_parallel, + weight=weight, + bias=bias, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + async_grad_allreduce=allreduce_dgrad, + sequence_parallel=False if self.explicit_expert_comm else self.sequence_parallel, + grad_output_buffer=self.grad_output_buffer + if self.config.defer_embedding_wgrad_compute + else None, + allreduce_dgrad=allreduce_dgrad + ) + else: + self._forward_impl = unaligned_linear_with_grad_accumulation_and_async_allreduce + output_parallel = self._forward_impl( + input=input_parallel, + weight=weight, + bias=bias, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + sequence_parallel=False if self.explicit_expert_comm else self.sequence_parallel, + grad_output_buffer=self.grad_output_buffer + if self.config.defer_embedding_wgrad_compute + else None, + allreduce_dgrad=allreduce_dgrad, + parallel_group=self.parallel_group, + seq_length=self.seq_length + ) + if self.gather_output: + # All-gather across the partitions. + assert not self.sequence_parallel + output = self.gather_from_tensor_model_parallel_region(output_parallel) + else: + output = output_parallel + output_bias = self.bias if self.skip_bias_add else None + return output, output_bias \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/unaligned_row_parallel_linear.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/unaligned_row_parallel_linear.py new file mode 100644 index 000000000..37668ccce --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/unaligned_row_parallel_linear.py @@ -0,0 +1,206 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +from typing import Callable, Optional + +import torch +from torch.nn import Parameter + +from mindspeed.core.tensor_parallel.unaligned_layers.unaligned_utils import unaligned_divide, \ + unaligned_reduce_scatter_to_sequence_parallel_region, unaligned_linear_with_grad_accumulation_and_async_allreduce + + +class UnalignedRowParallelLinear(torch.nn.Module): + + def __init__( + self, + input_size: int, + output_size: int, + *, + config, + init_method: Callable, + bias: bool, + input_is_parallel: bool, + skip_bias_add: bool, + stride: int = 1, + keep_master_weight_for_test: bool = False, + is_expert: bool = False, + tp_comm_buffer_name: str = None, # Not used + + # unaligned parallel arguments + parallel_group: Optional[torch.distributed.ProcessGroup] = None, + fusion_number: int = 1, # the number of linear fused + seq_length: int = None, + _initialize_affine_weight_cpu: Callable = None, + _initialize_affine_weight_gpu: Callable = None, + linear_with_grad_accumulation_and_async_allreduce=None, + scatter_to_tensor_model_parallel_region=None, + linear_with_frozen_weight=None, + reduce_from_tensor_model_parallel_region=None, + ): + torch.nn.Module.__init__(self) + + # Keep input parameters + self.input_size = input_size + self.output_size = output_size + self.input_is_parallel = input_is_parallel + self.skip_bias_add = skip_bias_add + self.config = config + self.is_expert = is_expert + self.expert_parallel = config.expert_model_parallel_size > 1 + self.gradient_accumulation_fusion = config.gradient_accumulation_fusion + self.sequence_parallel = config.sequence_parallel + if self.sequence_parallel and not self.input_is_parallel: + raise RuntimeError("To enable `sequence_parallel`, `input_is_parallel` must be `True`") + + self.explicit_expert_comm = self.is_expert and ( + config.tensor_model_parallel_size > 1 or self.expert_parallel + ) + + # Divide the weight matrix along the last dimension. + world_size = torch.distributed.get_world_size(group=parallel_group) + rank = torch.distributed.get_rank(group=parallel_group) + + if self.input_size % fusion_number != 0: + raise AssertionError('input_size({}) must be divisible by fusion number({})'.format(self.input_size, fusion_number)) + + if fusion_number != 1: + self.input_size_per_partition = unaligned_divide(config.num_query_groups, world_size, rank) + self.input_size_per_partition *= fusion_number + else: + self.input_size_per_partition = unaligned_divide(self.input_size, world_size, rank) + + # Parameters. + # Note: torch.nn.functional.linear performs XA^T + b and as a result + # we allocate the transpose. + # Initialize weight. + if config.use_cpu_initialization: + self.weight = Parameter( + torch.empty( + self.output_size, self.input_size_per_partition, dtype=config.params_dtype + ) + ) + if config.perform_initialization: + self.master_weight = _initialize_affine_weight_cpu( + self.weight, + self.output_size, + self.input_size, + self.input_size_per_partition, + 1, + init_method, + stride=stride, + return_master_weight=keep_master_weight_for_test, + params_dtype=config.params_dtype, + rank=rank, + world_size=world_size, + ) + else: + self.weight = Parameter( + torch.empty( + self.output_size, + self.input_size_per_partition, + device=torch.cuda.current_device(), + dtype=config.params_dtype, + ) + ) + if config.perform_initialization: + _initialize_affine_weight_gpu( + self.weight, + init_method, + partition_dim=1, + stride=stride, + expert_parallel=(self.is_expert and self.expert_parallel), + ) + setattr(self.weight, 'allreduce', not (self.is_expert and self.expert_parallel)) + + if bias: + if config.use_cpu_initialization: + self.bias = Parameter(torch.empty(self.output_size, dtype=config.params_dtype)) + else: + self.bias = Parameter( + torch.empty( + self.output_size, + device=torch.cuda.current_device(), + dtype=config.params_dtype, + ) + ) + + if config.perform_initialization: + # Always initialize bias to zero. + with torch.no_grad(): + self.bias.zero_() + setattr(self.bias, 'allreduce', not (self.is_expert and self.expert_parallel)) + setattr(self.bias, 'sequence_parallel', self.sequence_parallel) + else: + self.register_parameter('bias', None) + + self._forward_impl = unaligned_linear_with_grad_accumulation_and_async_allreduce + + # Hook adding a default empty _extra_state for state dict + self._register_load_state_dict_pre_hook( + lambda state_dict, prefix, *args, **kwargs: state_dict.setdefault( + f'{prefix}_extra_state' + ) + ) + + self.seq_length = seq_length + self.scatter_to_tensor_model_parallel_region = scatter_to_tensor_model_parallel_region + self.linear_with_frozen_weight = linear_with_frozen_weight + self.reduce_from_tensor_model_parallel_region = reduce_from_tensor_model_parallel_region + self.parallel_group = parallel_group + + def forward(self, input_): + if self.config._cpu_offloading_context is not None: + if self.config._cpu_offloading_context.inside_context == True: + assert ( + self.config.cpu_offloading == False + ), "CPU Offloading cannot be enabled while using non-TE modules" + + # Set up backprop all-reduce. + if self.input_is_parallel: + input_parallel = input_ + else: + assert not self.sequence_parallel + input_parallel = self.scatter_to_tensor_model_parallel_region(input_) + # Matrix multiply. + allreduce_dgrad = False + if not self.weight.requires_grad: + self._forward_impl = self.linear_with_frozen_weight + output_parallel = self._forward_impl( + input=input_parallel, + weight=self.weight, + bias=None, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + async_grad_allreduce=allreduce_dgrad, + sequence_parallel=False, + grad_output_buffer=None, + allreduce_dgrad=allreduce_dgrad + ) + else: + self._forward_impl = unaligned_linear_with_grad_accumulation_and_async_allreduce + output_parallel = self._forward_impl( + input=input_parallel, + weight=self.weight, + bias=None, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + sequence_parallel=False, + grad_output_buffer=None, + allreduce_dgrad=allreduce_dgrad, + parallel_group=self.parallel_group, + seq_length=self.seq_length + ) + + # All-reduce across all the partitions. + if self.explicit_expert_comm: + assert self.skip_bias_add + output_ = output_parallel + elif self.sequence_parallel: + output_ = unaligned_reduce_scatter_to_sequence_parallel_region(output_parallel, self.parallel_group) + else: + output_ = self.reduce_from_tensor_model_parallel_region(output_parallel) + if not self.skip_bias_add: + output = (output_ + self.bias) if self.bias is not None else output_ + output_bias = None + else: + output = output_ + output_bias = self.bias + return output, output_bias diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/unaligned_utils.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/unaligned_utils.py new file mode 100644 index 000000000..98217089d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel/unaligned_layers/unaligned_utils.py @@ -0,0 +1,304 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +import torch +from mindspeed.ops.npu_matmul_add import npu_matmul_add_fp32, npu_matmul_add_fp16 + + +def unaligned_divide(numerator, world_size, rank): + res = numerator // world_size + if rank < numerator % world_size: + res += 1 + return res + + +def unaligned_split_along_first_dim(input_, group): + world_size = torch.distributed.get_world_size(group=group) + # Bypass the function if we are using only 1 GPU. + if world_size == 1: + return input_ + rank = torch.distributed.get_rank(group=group) + + # Split along first dimension. + dim_size = input_.size()[0] + + local_dim_size = unaligned_divide(dim_size, world_size, rank) + + less_dim_size = dim_size // world_size + dim_offset = rank * less_dim_size + if rank >= dim_size % world_size: + dim_offset += dim_size % world_size + else: + dim_offset += rank + + output = input_[dim_offset: dim_offset + local_dim_size].contiguous() + + return output + + +def unaligned_gather_along_first_dim(input_, dim_size, group, async_op=False): + """Gather tensors and concatinate along the first dimension.""" + + world_size = torch.distributed.get_world_size(group=group) + # Bypass the function if we are using only 1 GPU. + if world_size == 1: + return input_ + + output = [] + for rank in range(world_size): + rank_dim_size = dim_size // world_size + if rank < dim_size % world_size: + rank_dim_size += 1 + output.append(torch.empty((int(rank_dim_size), *(input_.size()[1:])), dtype=input_.dtype, + device=torch.cuda.current_device())) + + handle = torch.distributed.all_gather(output, input_.contiguous(), group=group, async_op=async_op) + + def post_process(): + if handle is not None: + handle.wait() + return torch.cat(output) + + if async_op: + return post_process + return post_process() + + +class UnalignedScatterToSequenceParallelRegion(torch.autograd.Function): + """Split the input and keep only the corresponding chuck to the rank.""" + + @staticmethod + def forward(ctx, input_, group): + ctx.dim_size = list(input_.size())[0] + ctx.parallel_group = group + return unaligned_split_along_first_dim(input_, group) + + @staticmethod + def backward(ctx, grad_output): + return unaligned_gather_along_first_dim(grad_output, ctx.dim_size, ctx.parallel_group), None + + +def unaligned_scatter_to_sequence_parallel_region(input_, group): + return UnalignedScatterToSequenceParallelRegion.apply(input_, group) + + +def unaligned_reduce_scatter_along_first_dim(input_, group, async_op=False): + """Reduce-scatter the input tensor across model parallel group.""" + world_size = torch.distributed.get_world_size(group=group) + # Bypass the function if we are using only 1 GPU. + if world_size == 1: + return input_ + rank = torch.distributed.get_rank(group=group) + + # Split along first dimension. + dim_size = input_.size()[0] + + local_dim_size = unaligned_divide(dim_size, world_size, rank) + + less_dim_size = dim_size // world_size + dim_offset = rank * less_dim_size + if rank >= dim_size % world_size: + dim_offset += dim_size % world_size + else: + dim_offset += rank + + input_ = input_.contiguous() + handle = torch.distributed.all_reduce(input_, group=group, async_op=async_op) + + def post_process(): + if handle is not None: + handle.wait() + return input_[dim_offset: dim_offset + local_dim_size].contiguous() + + if async_op: + return post_process + return post_process() + + +class UnalignedReduceScatterToSequenceParallelRegion(torch.autograd.Function): + """Reduce scatter the input from the model parallel region.""" + + @staticmethod + def forward(ctx, input_, group): + ctx.dim_size = list(input_.size())[0] + ctx.parallel_group = group + return unaligned_reduce_scatter_along_first_dim(input_, group) + + @staticmethod + def backward(ctx, grad_output): + return unaligned_gather_along_first_dim(grad_output, ctx.dim_size, ctx.parallel_group), None + + +def unaligned_reduce_scatter_to_sequence_parallel_region(input_, group): + return UnalignedReduceScatterToSequenceParallelRegion.apply(input_, group) + + +class UnalignedGatherFromSequenceParallelRegion(torch.autograd.Function): + """Gather the input from the sequence parallel region.""" + + @staticmethod + def forward(ctx, input_, dim_size, group, tensor_parallel_output_grad): + ctx.dim_size = dim_size + ctx.parallel_group = group + ctx.tensor_parallel_output_grad = tensor_parallel_output_grad + return unaligned_gather_along_first_dim(input_, dim_size, group) + + @staticmethod + def backward(ctx, grad_output): + if ctx.tensor_parallel_output_grad: + return ( + unaligned_reduce_scatter_to_sequence_parallel_region(grad_output), + None, + None, + None + ) + + else: + return ( + unaligned_split_along_first_dim(grad_output), + None, + None, + None + ) + + +class UnalignedLinearWithGradAccumulationAndAsyncCommunication(torch.autograd.Function): + """See linear_with_grad_accumulation_and_async_allreduce""" + + @staticmethod + def forward( + ctx, + input, + weight, + bias, + gradient_accumulation_fusion, + allreduce_dgrad, + sequence_parallel, + grad_output_buffer, + + # unaligned parallel arguments + parallel_group, + seq_length=None + ): + ctx.save_for_backward(input, weight) + ctx.use_bias = bias is not None + ctx.gradient_accumulation_fusion = gradient_accumulation_fusion + ctx.allreduce_dgrad = allreduce_dgrad + ctx.sequence_parallel = sequence_parallel + ctx.grad_output_buffer = grad_output_buffer + ctx.parallel_group = parallel_group + + if sequence_parallel: + if seq_length is None: + seq_len = torch.Tensor([list(input.size())[0]]).cuda() + torch.distributed.all_reduce(seq_len, group=parallel_group) + seq_length = seq_len.item() + total_input = unaligned_gather_along_first_dim(input, seq_length, parallel_group) + else: + total_input = input + + output = torch.matmul(total_input, weight.t()) + if bias is not None: + output = output + bias + + ctx.seq_length = seq_length + return output + + @staticmethod + def backward(ctx, grad_output): + input, weight = ctx.saved_tensors + use_bias = ctx.use_bias + grad_output_buffer = ctx.grad_output_buffer + parallel_group = ctx.parallel_group + + wgrad_compute = True + post_process = None + total_input = None + if grad_output_buffer is not None: + grad_output_buffer.append(grad_output) + wgrad_compute = False + + if wgrad_compute: + if ctx.sequence_parallel: + post_process = unaligned_gather_along_first_dim(input, ctx.seq_length, parallel_group, async_op=True) + else: + total_input = input + grad_input = grad_output.matmul(weight) + + if ctx.sequence_parallel and wgrad_compute: + total_input = post_process() + + if wgrad_compute and grad_output.dim() == 3: + grad_output = grad_output.contiguous() + grad_output = grad_output.view(-1, grad_output.shape[2]) + total_input = total_input.view(-1, total_input.shape[2]) + + if ctx.allreduce_dgrad: + # Asynchronous all-reduce + handle = torch.distributed.all_reduce(grad_input, group=parallel_group, async_op=True) + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # all-reduce is scheduled before the weight gradient computation + + if ctx.sequence_parallel: + assert not ctx.allreduce_dgrad + post_process = unaligned_reduce_scatter_along_first_dim(grad_input, parallel_group, async_op=True) + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # reduce scatter is scheduled before the weight gradient computation + + if ctx.gradient_accumulation_fusion: + if wgrad_compute: + if weight.main_grad.dtype == torch.float32: + npu_matmul_add_fp32(total_input, grad_output, weight.main_grad) + elif weight.main_grad.dtype in (torch.float16, torch.bfloat16): + npu_matmul_add_fp16(total_input, grad_output, weight.main_grad) + else: + raise RuntimeError("Unsupported gradient type for gradient accumulation fusion") + + if hasattr(weight, 'grad_added_to_main_grad'): + # When overlap_grad_reduce is True, need to ensure that backward hooks + # are all run on the main backprop thread to prevent deadlocks. Setup + # dummy grad_weight tensor to prevent backward hooks from being run + # in a background thread. + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = None + else: + grad_weight = grad_output.t().matmul(total_input) + grad_bias = grad_output.sum(dim=0) if use_bias else None + + if ctx.sequence_parallel: + sub_grad_input = post_process() + # Need to return None's as gradient has to flow for all the input arguments + # provided during forward + return sub_grad_input, grad_weight, grad_bias, None, None, None, None, None, None + + if ctx.allreduce_dgrad: + handle.wait() + + return grad_input, grad_weight, grad_bias, None, None, None, None, None, None + + +def unaligned_gather_from_sequence_parallel_region(input_, dim_size, group, tensor_parallel_output_grad): + return UnalignedGatherFromSequenceParallelRegion.apply(input_, dim_size, group, tensor_parallel_output_grad) + + +def unaligned_linear_with_grad_accumulation_and_async_allreduce( + input, weight, bias, gradient_accumulation_fusion, allreduce_dgrad, sequence_parallel, grad_output_buffer, + parallel_group, seq_length=None +): + args = [input, weight, bias, gradient_accumulation_fusion, allreduce_dgrad, sequence_parallel, grad_output_buffer, + parallel_group, seq_length] + return UnalignedLinearWithGradAccumulationAndAsyncCommunication.apply(*args) diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel_x_union_cp.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel_x_union_cp.py new file mode 100644 index 000000000..0a914f02d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel_x_union_cp.py @@ -0,0 +1,87 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. + +import torch +import torch.distributed + +from megatron.core.parallel_state import get_nccl_options + +from mindspeed.core.mindspeed_parallel_group import MindspeedParallelGroup +from mindspeed.core.simple_parallel_cfg import SimpleParallelCfg +from mindspeed.core.singleton_meta import SingletonMeta + + +class TensorParallelXUnionCP(MindspeedParallelGroup, metaclass=SingletonMeta): + def __init__( + self, + parallel_cfg: SimpleParallelCfg = None, + pg_name: str = None, + overlap_gp_name: str = None, + nccl_comm_cfgs=None, + ): + super().__init__(parallel_cfg, pg_name, overlap_gp_name, nccl_comm_cfgs) + + @staticmethod + def init_group( + parallel_cfg: SimpleParallelCfg, + pg_name: str, + overlap_gp_name: str = None, + nccl_comm_cfgs=None, + ): + pp = parallel_cfg.pp + tp = parallel_cfg.tp + cp = parallel_cfg.cp + tp_x = parallel_cfg.tp_x + + rank = torch.distributed.get_rank() + world_size: int = torch.distributed.get_world_size() + num_pp_groups: int = world_size // pp + dp = world_size // (tp * pp * cp) + + all_cp_grps = [] + for i in range(pp): + for j in range(dp): + start_rank = i * num_pp_groups + j * tp * cp + end_rank = i * num_pp_groups + (j + 1) * tp * cp + for k in range(tp): + ranks = range(start_rank + k, end_rank, tp) + all_cp_grps.append(list(ranks)) + + all_tp_x_grps = [] + num_tp_grps: int = world_size // tp + for i in range(num_tp_grps): + for j in range(tp // tp_x): + ranks = range(i * tp + j * tp_x, i * tp + (j + 1) * tp_x) + all_tp_x_grps.append(list(ranks)) + + # Build the tensor model-parallel-x-cp groups. + res_group, res_overlap_group, res_global_ranks = None, None, None + tp_x_cp_grp_ranks = [] + tp_x_cp_rank_in_grp = [] + for cp_grp in all_cp_grps: + for cp_rank in cp_grp: + for tp_x_grp in all_tp_x_grps: + if cp_rank in tp_x_grp and tp_x_grp not in tp_x_cp_rank_in_grp: + tp_x_cp_rank_in_grp += tp_x_grp + + tp_x_cp_rank_in_grp = sorted(tp_x_cp_rank_in_grp) + if tp_x_cp_rank_in_grp not in tp_x_cp_grp_ranks: + tp_x_cp_grp_ranks.append(tp_x_cp_rank_in_grp) + tp_x_cp_rank_in_grp = [] + + cur_overlap_group = None + for tp_x_cp_ranks in tp_x_cp_grp_ranks: + cur_group = torch.distributed.new_group( + tp_x_cp_ranks, pg_options=get_nccl_options(pg_name, nccl_comm_cfgs) + ) + # for send-recv in parallel + if overlap_gp_name: + cur_overlap_group = torch.distributed.new_group( + tp_x_cp_ranks, pg_options=get_nccl_options(overlap_gp_name, nccl_comm_cfgs), + ) + + if rank in tp_x_cp_ranks: + res_global_ranks = tp_x_cp_ranks + res_group = cur_group + res_overlap_group = cur_overlap_group + + return res_group, res_global_ranks, res_overlap_group diff --git a/model/train/yoco_moe/mindspeed/core/tensor_parallel_y_union_cp.py b/model/train/yoco_moe/mindspeed/core/tensor_parallel_y_union_cp.py new file mode 100644 index 000000000..e9796f13b --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/tensor_parallel_y_union_cp.py @@ -0,0 +1,88 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. + +import torch +import torch.distributed + +from megatron.core.parallel_state import get_nccl_options + +from mindspeed.core.mindspeed_parallel_group import MindspeedParallelGroup +from mindspeed.core.simple_parallel_cfg import SimpleParallelCfg +from mindspeed.core.singleton_meta import SingletonMeta + + +class TensorParallelYUnionCP(MindspeedParallelGroup, metaclass=SingletonMeta): + def __init__( + self, + parallel_cfg: SimpleParallelCfg = None, + pg_name: str = None, + overlap_gp_name: str = None, + nccl_comm_cfgs=None, + ): + super().__init__(parallel_cfg, pg_name, overlap_gp_name, nccl_comm_cfgs) + + @staticmethod + def init_group( + parallel_cfg: SimpleParallelCfg, + pg_name: str, + overlap_gp_name: str = None, + nccl_comm_cfgs=None, + ): + pp = parallel_cfg.pp + tp = parallel_cfg.tp + cp = parallel_cfg.cp + tp_x = parallel_cfg.tp_x + + rank = torch.distributed.get_rank() + world_size: int = torch.distributed.get_world_size() + num_pp_groups: int = world_size // pp + dp = world_size // (tp * pp * cp) + + all_cp_grps = [] + for i in range(pp): + for j in range(dp): + start_rank = i * num_pp_groups + j * tp * cp + end_rank = i * num_pp_groups + (j + 1) * tp * cp + for k in range(tp): + ranks = range(start_rank + k, end_rank, tp) + all_cp_grps.append(list(ranks)) + + all_tp_y_grps = [] + num_tp_grps: int = world_size // tp + for i in range(num_tp_grps): + num_tp_x_group = tp // tp_x + for j in range(tp // num_tp_x_group): + ranks = range(i * tp + j, (i + 1) * tp, tp_x) + all_tp_y_grps.append(list(ranks)) + + # Build the tensor model-parallel-y-cp groups. + res_group, res_overlap_group, res_global_ranks = None, None, None + tp_y_cp_grp_ranks = [] + tp_y_cp_rank_in_grp = [] + for cp_grp in all_cp_grps: + for cp_rank in cp_grp: + for tp_y_grp in all_tp_y_grps: + if cp_rank in tp_y_grp and tp_y_grp not in tp_y_cp_rank_in_grp: + tp_y_cp_rank_in_grp += tp_y_grp + + tp_y_cp_rank_in_grp = sorted(tp_y_cp_rank_in_grp) + if tp_y_cp_rank_in_grp not in tp_y_cp_grp_ranks: + tp_y_cp_grp_ranks.append(tp_y_cp_rank_in_grp) + tp_y_cp_rank_in_grp = [] + + cur_overlap_group = None + for tp_y_cp_ranks in tp_y_cp_grp_ranks: + cur_group = torch.distributed.new_group( + tp_y_cp_ranks, pg_options=get_nccl_options(pg_name, nccl_comm_cfgs) + ) + # for send-recv in parallel + if overlap_gp_name: + cur_overlap_group = torch.distributed.new_group( + tp_y_cp_ranks, pg_options=get_nccl_options(overlap_gp_name, nccl_comm_cfgs), + ) + + if rank in tp_y_cp_ranks: + res_global_ranks = tp_y_cp_ranks + res_group = cur_group + res_overlap_group = cur_overlap_group + + return res_group, res_global_ranks, res_overlap_group diff --git a/model/train/yoco_moe/mindspeed/core/training.py b/model/train/yoco_moe/mindspeed/core/training.py new file mode 100644 index 000000000..53dc5ef17 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/training.py @@ -0,0 +1,544 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +"""Pretrain utilities.""" + +import os +import sys +import gc +import os +from functools import wraps +import torch +import torch_npu +from datetime import datetime +from megatron.training import get_args +from megatron.training import get_timers +from megatron.training import is_last_rank +from megatron.core import parallel_state +from megatron.core.num_microbatches_calculator import get_num_microbatches +from megatron.core.transformer.moe.moe_utils import track_moe_metrics +from megatron.training import print_rank_0 +from megatron.training.arguments import parse_args +from megatron.training.global_vars import (set_args, get_tensorboard_writer, get_wandb_writer, + get_one_logger) +from megatron.training.training import num_floating_point_operations +from megatron.training.utils import print_rank_last, report_memory +from megatron.training.theoretical_memory_usage import report_theoretical_memory +from mindspeed.core.auto_parallel.auto_parallel_apply import search_optimal_configuration +from mindspeed.core.auto_parallel.auto_parallel_profiling import Profiling, OperateProfile +from mindspeed.core.memory.auto_pipeline.autopipeline import autopipeline_profiling +from mindspeed.core.performance.auto_pipeline_perf.autopipeline_perf import (autopipelineperf_profiling, check_out_of_memory, + calculate_num_of_activations, check_skip_profiling, + broadcast_skip_in_ranks) +from mindspeed.core.performance.auto_pipeline_perf.optimpipeline_solver import solve_optimpipeline, broadcast_oom_in_ranks, broadcast_mbs_in_ranks, save_profiling_data +from mindspeed.core.performance.auto_pipeline_perf.schedulepipeline_solver import (solve_pipelineschedule, broadcast_enable_schedule_in_ranks, + broadcast_scheduler_in_ranks, broadcast_layer_in_ranks, + all_gather_time, average_time_by_rank) +from mindspeed.core.memory.auto_pipeline.autopipeline_apply import apply_autopipeline +from mindspeed.core.memory.auto_pipeline.autopipeline_solver import solve_autopipeline, broadcast_policy_in_ranks, destroy_global_vars +from mindspeed.arguments import parse_args_wrapper + + +POLICY = None +OPTIMIZED_MBS_LIST = None +PP_SCHEDULE_LIST = None +OPTIMAL_LAYERS = None +ORIGIN_MBS = None +DATA_PARALLEL_SIZE = 1 +ENABLE_SCHEDULER = False +FLOPS_COUNTER = None +RECORDED_COUNT = 0 +TRAVERSED_COUNT = 0 + + +def generated_flops_counter(): + from torch_npu.utils.flops_count import FlopsCounter + global FLOPS_COUNTER + FLOPS_COUNTER = FlopsCounter() + + +def get_flops_counter(): + global FLOPS_COUNTER + if FLOPS_COUNTER is None: + generated_flops_counter() + return FLOPS_COUNTER + + +def set_count(count): + global RECORDED_COUNT + global TRAVERSED_COUNT + RECORDED_COUNT = count[0] + TRAVERSED_COUNT = count[1] + + +def get_count(): + global RECORDED_COUNT + global TRAVERSED_COUNT + if RECORDED_COUNT == 0 and TRAVERSED_COUNT == 0: + flops_counter = get_flops_counter() + count = flops_counter.get_flops() + set_count(count) + return RECORDED_COUNT, TRAVERSED_COUNT + + +def train_decorator(train): + @wraps(train) + def wrapper(*args, **kwargs): + args_ = get_args() + if args_.profile: + args_.profile_npu = True + args_.profile = False + else: + args_.profile_npu = False + + is_profile = hasattr(args_, 'profile_npu') and args_.profile_npu \ + and ((torch.distributed.get_rank() in args_.profile_ranks) or (-1 in args_.profile_ranks)) + if is_profile: + active = args_.profile_step_end - args_.profile_step_start + skip_first = args_.profile_step_start + + if args_.profile_with_cpu: + activities = [torch_npu.profiler.ProfilerActivity.NPU, torch_npu.profiler.ProfilerActivity.CPU] + else: + activities = [torch_npu.profiler.ProfilerActivity.NPU] + + if args_.profile_level == 'level0': + profiler_level = torch_npu.profiler.ProfilerLevel.Level0 + elif args_.profile_level == 'level1': + profiler_level = torch_npu.profiler.ProfilerLevel.Level1 + elif args_.profile_level == 'level2': + profiler_level = torch_npu.profiler.ProfilerLevel.Level2 + else: + raise ValueError(f"profiler_level only support level0, level1, level2, but gets {args_.profile_level}") + + experimental_config = torch_npu.profiler._ExperimentalConfig( + aic_metrics=torch_npu.profiler.AiCMetrics.PipeUtilization, + profiler_level=profiler_level, + l2_cache=False + ) + + with torch_npu.profiler.profile( + activities=activities, + record_shapes=args_.profile_record_shapes, + profile_memory=args_.profile_with_memory, + with_stack=args_.profile_with_stack, + experimental_config=experimental_config, + schedule=torch_npu.profiler.schedule(wait=0, warmup=0, active=active, repeat=1, skip_first=skip_first), + on_trace_ready=torch_npu.profiler.tensorboard_trace_handler(args_.profile_save_path) + ) as prof: + args_.prof = prof + return train(*args, **kwargs) + else: + return train(*args, **kwargs) + + return wrapper + + +def train_step_decorator(train_step): + @wraps(train_step) + def wrapper(*args, **kwargs): + nonlocal train_step + args_ = get_args() + flop_count = None + if args_.op_cal_tflops: + flop_count = get_flops_counter() + flop_count.start() + if args_.profile_operator: + op_profile = OperateProfile(args_) + ret = train_step(*args, **kwargs) + op_profile.step() + elif args_.prof_file: + profiling = Profiling(args_) + train_step = profiling.hook_train_step(train_step) + ret = train_step(*args, **kwargs) + else: + ret = train_step(*args, **kwargs) + is_profile = args_.profile_npu and ((torch.distributed.get_rank() in args_.profile_ranks) or (-1 in args_.profile_ranks)) + if is_profile: + args_.prof.step() + if args_.op_cal_tflops: + counts = flop_count.get_flops() + set_count(counts) + flop_count.stop() + return ret + return wrapper + + +def training_log(loss_dict, total_loss_dict, learning_rate, decoupled_learning_rate, iteration, + loss_scale, report_memory_flag, skipped_iter, + grad_norm, params_norm, num_zeros_in_grad): + """Log training information such as losses, timing, ....""" + args = get_args() + timers = get_timers() + writer = get_tensorboard_writer() + wandb_writer = get_wandb_writer() + one_logger = get_one_logger() + + # Advanced, skipped, and Nan iterations. + advanced_iters_key = 'advanced iterations' + skipped_iters_key = 'skipped iterations' + nan_iters_key = 'nan iterations' + # Advanced iterations. + if not skipped_iter: + total_loss_dict[advanced_iters_key] = total_loss_dict.get( + advanced_iters_key, 0) + 1 + else: + if advanced_iters_key not in total_loss_dict: + total_loss_dict[advanced_iters_key] = 0 + # Skipped iterations. + total_loss_dict[skipped_iters_key] = total_loss_dict.get( + skipped_iters_key, 0) + skipped_iter + # Update losses and set nan iterations + got_nan = False + for key in loss_dict: + if not skipped_iter: + total_loss_dict[key] = total_loss_dict.get( + key, torch.tensor([0.0], dtype=torch.float, device='cuda')) + loss_dict[key] + else: + value = loss_dict[key].float().sum().item() + is_nan = value == float('inf') or \ + value == -float('inf') or \ + value != value + got_nan = got_nan or is_nan + total_loss_dict[nan_iters_key] = total_loss_dict.get( + nan_iters_key, 0) + int(got_nan) + + # Logging. + timers_to_log = [ + 'forward-backward', + 'forward-compute', + 'backward-compute', + 'batch-generator', + 'forward-recv', + 'forward-send', + 'backward-recv', + 'backward-send', + 'forward-send-forward-recv', + 'forward-send-backward-recv', + 'backward-send-forward-recv', + 'backward-send-backward-recv', + 'forward-backward-send-forward-backward-recv', + 'layernorm-grads-all-reduce', + 'embedding-grads-all-reduce', + 'all-grads-sync', + 'params-all-gather', + 'optimizer-copy-to-main-grad', + 'optimizer-unscale-and-check-inf', + 'optimizer-clip-main-grad', + 'optimizer-count-zeros', + 'optimizer-inner-step', + 'optimizer-copy-main-to-model-params', + 'optimizer'] + + # Calculate batch size. + batch_size = args.micro_batch_size * args.data_parallel_size * \ + get_num_microbatches() + + # Track app tag & app tag ID + if one_logger: + job_name = os.environ.get('SLURM_JOB_NAME', None) + current_app_tag = f'{job_name}_{batch_size}_{args.world_size}' + one_logger.log_app_tag(current_app_tag) + + total_iterations = total_loss_dict[advanced_iters_key] + \ + total_loss_dict[skipped_iters_key] + + # Tensorboard values. + # Timer requires all the ranks to call. + if args.log_timers_to_tensorboard and \ + (iteration % args.tensorboard_log_interval == 0): + timers.write(timers_to_log, writer, iteration, + normalizer=total_iterations) + if writer and (iteration % args.tensorboard_log_interval == 0): + if wandb_writer: + wandb_writer.log({'samples vs steps': args.consumed_train_samples}, + iteration) + if args.log_learning_rate_to_tensorboard: + writer.add_scalar('learning-rate', learning_rate, iteration) + if args.decoupled_lr is not None: + writer.add_scalar('decoupled-learning-rate', decoupled_learning_rate, iteration) + writer.add_scalar('learning-rate vs samples', learning_rate, + args.consumed_train_samples) + if wandb_writer: + wandb_writer.log({'learning-rate': learning_rate}, iteration) + if args.log_batch_size_to_tensorboard: + writer.add_scalar('batch-size', batch_size, iteration) + writer.add_scalar('batch-size vs samples', batch_size, + args.consumed_train_samples) + if wandb_writer: + wandb_writer.log({'batch-size': batch_size}, iteration) + for key in loss_dict: + writer.add_scalar(key , loss_dict[key], iteration) + writer.add_scalar(key + ' vs samples', loss_dict[key], + args.consumed_train_samples) + if wandb_writer: + wandb_writer.log({key: loss_dict[key]}, iteration) + if args.log_loss_scale_to_tensorboard: + writer.add_scalar('loss-scale', loss_scale, iteration) + writer.add_scalar('loss-scale vs samples', loss_scale, + args.consumed_train_samples) + if wandb_writer: + wandb_writer.log({'loss-scale': loss_scale}, iteration) + if args.log_world_size_to_tensorboard: + writer.add_scalar('world-size', args.world_size, iteration) + writer.add_scalar('world-size vs samples', args.world_size, + args.consumed_train_samples) + if wandb_writer: + wandb_writer.log({'world-size': args.world_size}, iteration) + if grad_norm is not None: + writer.add_scalar('grad-norm', grad_norm, iteration) + writer.add_scalar('grad-norm vs samples', grad_norm, + args.consumed_train_samples) + if wandb_writer: + wandb_writer.log({'grad-norm': grad_norm}, iteration) + if num_zeros_in_grad is not None: + writer.add_scalar('num-zeros', num_zeros_in_grad, iteration) + writer.add_scalar('num-zeros vs samples', num_zeros_in_grad, + args.consumed_train_samples) + if wandb_writer: + wandb_writer.log({'num-zeros': num_zeros_in_grad}, iteration) + if params_norm is not None: + writer.add_scalar('params-norm', params_norm, iteration) + writer.add_scalar('params-norm vs samples', params_norm, + args.consumed_train_samples) + if wandb_writer: + wandb_writer.log({'params-norm': params_norm}, iteration) + if args.log_memory_to_tensorboard: + mem_stats = torch.cuda.memory_stats() + writer.add_scalar( + "mem-reserved-bytes", + mem_stats["reserved_bytes.all.current"], + iteration, + ) + writer.add_scalar( + "mem-allocated-bytes", + mem_stats["allocated_bytes.all.current"], + iteration, + ) + writer.add_scalar( + "mem-allocated-count", + mem_stats["allocation.all.current"], + iteration, + ) + if args.num_experts is not None: + moe_loss_scale = 1 / get_num_microbatches() + track_moe_metrics(moe_loss_scale, iteration, writer, wandb_writer, total_loss_dict, args.moe_per_layer_logging) + + if iteration % args.log_interval == 0: + elapsed_time = timers('interval-time').elapsed(barrier=True) + elapsed_time_per_iteration = elapsed_time / total_iterations + + throughput = num_floating_point_operations(args, batch_size) / ( + elapsed_time_per_iteration * 10**12 * args.world_size) + + # select all nodes info + counts_0, counts_1 = get_count() + counts_0_tensor = torch.tensor([counts_0], device="npu") + counts_1_tensor = torch.tensor([counts_1], device="npu") + + torch.distributed.all_reduce( + counts_0_tensor, op=torch.distributed.ReduceOp.SUM + ) + torch.distributed.all_reduce( + counts_1_tensor, op=torch.distributed.ReduceOp.SUM + ) + + mfu = counts_0_tensor.cpu().item() / (10 ** 12 * elapsed_time_per_iteration * args.world_size) + hfu = counts_1_tensor.cpu().item() / (10 ** 12 * elapsed_time_per_iteration * args.world_size) + + if args.log_timers_to_tensorboard: + if writer: + writer.add_scalar('iteration-time', + elapsed_time_per_iteration, iteration) + if wandb_writer: + wandb_writer.log({'iteration-time': elapsed_time_per_iteration}, + iteration) + log_string = f" [{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}]" + log_string += ' iteration {:8d}/{:8d} |'.format( + iteration, args.train_iters) + log_string += ' consumed samples: {:12d} |'.format( + args.consumed_train_samples) + log_string += ' elapsed time per iteration (ms): {:.1f} |'.format( + elapsed_time_per_iteration * 1000.0) + if args.log_throughput: + log_string += f' theoretical throughput per NPU (TFLOP/s/NPU): {throughput:.1f} |' + log_string += f' actual throughput per NPU (TFLOP/s/NPU): {mfu:.1f} |' + log_string += f' actual throughput per NPU with recompute (TFLOP/s/NPU): {hfu:.1f} |' + if args.log_timers_to_tensorboard: + if writer: + writer.add_scalar('throughput', throughput, iteration) + if wandb_writer: + wandb_writer.log({'throughput': throughput}, iteration) + assert learning_rate is not None + # Decoupled_learning_rate should be not None only on first and last pipeline stage. + log_string += ' learning rate: {:.6E} |'.format(learning_rate) + if args.decoupled_lr is not None and (parallel_state.is_pipeline_first_stage(ignore_virtual=True) or + parallel_state.is_pipeline_last_stage(ignore_virtual=True)): + assert decoupled_learning_rate is not None + log_string += ' decoupled learning rate: {:.6E} |'.format(decoupled_learning_rate) + else: + assert decoupled_learning_rate is None + log_string += ' global batch size: {:5d} |'.format(batch_size) + for key in total_loss_dict: + if key not in [advanced_iters_key, skipped_iters_key, + nan_iters_key]: + avg = total_loss_dict[key].item() / \ + float(max(1, total_loss_dict[advanced_iters_key])) + if avg > 0.0: + log_string += ' {}: {:.6E} |'.format(key, avg) + total_loss_dict[key] = torch.tensor([0.0], dtype=torch.float, device='cuda') + log_string += ' loss scale: {:.1f} |'.format(loss_scale) + if grad_norm is not None: + log_string += ' grad norm: {:.3f} |'.format(grad_norm) + if num_zeros_in_grad is not None: + log_string += ' num zeros: {:.1f} |'.format(num_zeros_in_grad) + if params_norm is not None: + log_string += ' params norm: {:.3f} |'.format(params_norm) + log_string += ' number of skipped iterations: {:3d} |'.format( + total_loss_dict[skipped_iters_key]) + log_string += ' number of nan iterations: {:3d} |'.format( + total_loss_dict[nan_iters_key]) + total_loss_dict[advanced_iters_key] = 0 + total_loss_dict[skipped_iters_key] = 0 + total_loss_dict[nan_iters_key] = 0 + print_rank_last(log_string) + if report_memory_flag and learning_rate > 0.: + # Report memory after optimizer state has been initialized. + if torch.distributed.get_rank() == 0: + num_microbatches = get_num_microbatches() + report_theoretical_memory(args, num_microbatches=num_microbatches, verbose=True) + report_memory('(after {} iterations)'.format(iteration)) + report_memory_flag = False + timers.log(timers_to_log, normalizer=args.log_interval) + + return report_memory_flag + + +def pretrain_decorator(pretrain): + @wraps(pretrain) + def wrapper(*args, **kwargs): + global POLICY + global OPTIMIZED_MBS_LIST + global PP_SCHEDULE_LIST + global OPTIMAL_LAYERS + global ORIGIN_MBS + global DATA_PARALLEL_SIZE + global ENABLE_SCHEDULER + new_parse_args = parse_args_wrapper(parse_args) + argument = new_parse_args(kwargs.get('extra_args_provider'), False) + + if argument.auto_tuning: + set_args(argument) + print("pretrain_decorator set_args ========================================") + + from mindspeed.auto_tuning.auto_tuning import auto_tuning + global_args = get_args() + assert global_args.auto_tuning_ranks >= 16, "Auto-tuning searching space should be >= 16." + working_dir_root = os.path.realpath(global_args.auto_tuning_work_dir) + if not os.path.exists(working_dir_root) and global_args.rank % torch.cuda.device_count() == 0: + os.makedirs(working_dir_root) + + if global_args.rank % torch.cuda.device_count() == 0: + print("only rank 0 run auto tuning ========================================") + auto_tuning(global_args, working_dir=working_dir_root) + return + + if argument.auto_parallel: + set_args(argument) + search_optimal_configuration(argument) + return + + if argument.automated_pipeline and not argument.num_layer_list: + context, POLICY = autopipeline_profiling(args[1], args[2], args[3], + args[0], None, argument) + if context: + POLICY = solve_autopipeline(context) + parallel_state.destroy_global_memory_buffer() + parallel_state.destroy_model_parallel() + destroy_global_vars() + gc.collect() + torch.cuda.empty_cache() + + if argument.automated_pipeline_perf: + ORIGIN_MBS = argument.micro_batch_size + is_skip, exist_policy = check_skip_profiling(argument, config_file="autopipeline_perf_config.json") + if not is_skip: + global_context = [] + mbs_time, pp_schedule_time = 0, 0 + mbs_tries = 1 + num_forwards_first_stage = 0 + is_oom = False + forward_time_dict = {} + backward_time_dict = {} + + while mbs_tries < ORIGIN_MBS + 2: + context = autopipelineperf_profiling(mbs_tries, args[1], args[2], args[3], + args[0], None) + if mbs_tries == ORIGIN_MBS: + schedule_context = context + forward_time_list = all_gather_time(argument, schedule_context['fwd_time']) + forward_time_dict = average_time_by_rank(forward_time_list) + backward_time_list = all_gather_time(argument, schedule_context['bwd_time']) + backward_time_dict = average_time_by_rank(backward_time_list) + num_forwards_first_stage = calculate_num_of_activations(schedule_context) + + parallel_state.destroy_global_memory_buffer() + parallel_state.destroy_model_parallel() + destroy_global_vars() + gc.collect() + torch.cuda.empty_cache() + global_context.append((context['fwd_time'], context['bwd_time'], context['comm_time'])) + DATA_PARALLEL_SIZE = context['data_parallel_size'] + if not is_oom: + is_oom = check_out_of_memory(argument, context, mbs_tries) + is_oom = broadcast_oom_in_ranks(0, is_oom) + mbs_tries += 1 + if mbs_tries <= ORIGIN_MBS and is_oom: + raise AssertionError( + 'A risk of Out of Memory could occur, please ' + 'reset to a smaller micro batch size.') + if mbs_tries > ORIGIN_MBS and is_oom: + break + if len(global_context) > 0: + OPTIMIZED_MBS_LIST, mbs_time = solve_optimpipeline(argument, DATA_PARALLEL_SIZE, global_context) + PP_SCHEDULE_LIST, pp_schedule_time, OPTIMAL_LAYERS = solve_pipelineschedule(argument, DATA_PARALLEL_SIZE, num_forwards_first_stage, forward_time_dict, backward_time_dict) + if torch.distributed.get_rank() == 0 and mbs_time > pp_schedule_time and num_forwards_first_stage > 2: + ENABLE_SCHEDULER = True + ENABLE_SCHEDULER = broadcast_enable_schedule_in_ranks(0, ENABLE_SCHEDULER) + optimized_policy = (ENABLE_SCHEDULER, OPTIMIZED_MBS_LIST, PP_SCHEDULE_LIST, OPTIMAL_LAYERS) + save_profiling_data(optimized_policy, config_file="autopipeline_perf_config.json") + else: + ENABLE_SCHEDULER = exist_policy[0] + OPTIMIZED_MBS_LIST = exist_policy[1] + PP_SCHEDULE_LIST = exist_policy[2] + OPTIMAL_LAYERS = exist_policy[3] + pretrain(*args, **kwargs) + return wrapper + + +def setup_model_and_optimizer_decorator(setup_model_and_optimizer): + @wraps(setup_model_and_optimizer) + def wrapper(*args, **kwargs): + global POLICY + global OPTIMIZED_MBS_LIST + global PP_SCHEDULE_LIST + global OPTIMAL_LAYERS + global ENABLE_SCHEDULER + argument = get_args() + if argument.automated_pipeline and POLICY: + if torch.distributed.get_rank() == 0: + broadcast_policy_in_ranks(0, POLICY) + else: + broadcast_policy_in_ranks(0) + if argument.automated_pipeline_perf and ENABLE_SCHEDULER: + broadcast_scheduler_in_ranks(0, PP_SCHEDULE_LIST) + broadcast_layer_in_ranks(0, OPTIMAL_LAYERS) + elif argument.automated_pipeline_perf and OPTIMIZED_MBS_LIST: + broadcast_mbs_in_ranks(0, OPTIMIZED_MBS_LIST) + model, optimizer, opt_param_scheduler = setup_model_and_optimizer(*args, **kwargs) + if argument.recompute_module_list: + apply_autopipeline(model) + if argument.profile_memory and torch.distributed.get_rank() in argument.profile_ranks: + profiling = Profiling(argument) + profiling.register_recursive_hook("", model) + return model, optimizer, opt_param_scheduler + return wrapper diff --git a/model/train/yoco_moe/mindspeed/core/transformer/__init__.py b/model/train/yoco_moe/mindspeed/core/transformer/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/transformer/attention.py b/model/train/yoco_moe/mindspeed/core/transformer/attention.py new file mode 100644 index 000000000..338ae8c02 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/attention.py @@ -0,0 +1,496 @@ +# coding=utf-8 +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +from dataclasses import dataclass +from functools import wraps +from typing import Union + +import torch +import torch.nn.functional as F + +from megatron.core.models.common.embeddings.rotary_pos_embedding import apply_rotary_pos_emb +from megatron.core.transformer import TransformerConfig, ModuleSpec, build_module +from megatron.core.transformer.attention import SelfAttentionSubmodules, CrossAttentionSubmodules, \ + Attention +from megatron.core.transformer.enums import AttnMaskType +from megatron.core import mpu, parallel_state +from megatron.core.utils import divide +from megatron.training import get_args + +from mindspeed.core.context_parallel.ulysses_context_parallel import UlyssesContextAttention +from mindspeed.core.parallel_state import get_context_parallel_group_for_hybrid_ulysses, \ + get_tensor_model_parallel_world_size_for_nd1_dim1 +from mindspeed.core.tensor_parallel.comm_group_api import TPXCollectiveComm, TPXOverlapCollectiveComm, \ + TPYCollectiveComm, TPYOverlapCollectiveComm +from mindspeed.core.tensor_parallel_y_union_cp import TensorParallelYUnionCP +from mindspeed.core.tensor_parallel.tp_2d.parallel_linear_2d import ParallelLinear2D + + +@dataclass +class SelfAttentionSubmodules: + linear_qkv: Union[ModuleSpec, type] = None + core_attention: Union[ModuleSpec, type] = None + linear_proj: Union[ModuleSpec, type] = None + q_layernorm: Union[ModuleSpec, type] = None + k_layernorm: Union[ModuleSpec, type] = None + linear_qb: Union[ModuleSpec, type] = None + linear_kvb: Union[ModuleSpec, type] = None + + +def attention_init( + self, + config: TransformerConfig, + submodules: Union[SelfAttentionSubmodules, CrossAttentionSubmodules], + layer_number: int, + attn_mask_type: AttnMaskType, + attention_type: str, +): + super(Attention, self).__init__(config=config) + self.config = config + self.layer_number = layer_number + self.attn_mask_type = attn_mask_type + self.attention_type = attention_type + + # For normal attention without groups, num_query_groups == num_attention_heads, + # so these two will be the same + self.query_projection_size = self.config.kv_channels * self.config.num_attention_heads + self.kv_projection_size = self.config.kv_channels * self.config.num_query_groups + + args = get_args() + # patch for tp-2d + world_size = args.tp_x if args.tp_2d else parallel_state.get_tensor_model_parallel_world_size() + # Per attention head and per partition values. + self.hidden_size_per_attention_head = divide( + self.query_projection_size, self.config.num_attention_heads + ) + self.num_attention_heads_per_partition = divide(self.config.num_attention_heads, world_size) + self.num_query_groups_per_partition = divide(self.config.num_query_groups, world_size) + + self.core_attention = build_module( + submodules.core_attention, + config=self.config, + layer_number=self.layer_number, + attn_mask_type=self.attn_mask_type, + attention_type=self.attention_type, + ) + + self.checkpoint_core_attention = self.config.recompute_granularity == 'selective' + + # Output. + self.linear_proj = build_module( + submodules.linear_proj, + self.query_projection_size, + self.config.hidden_size, + config=self.config, + init_method=self.config.output_layer_init_method, + bias=self.config.add_bias_linear, + input_is_parallel=True, + skip_bias_add=True, + is_expert=False, + tp_comm_buffer_name='proj', + ) + cp = config.context_parallel_size + if args.tp_2d: + tp_y_cp_sz = cp * args.tp_y + else: + tp_y_cp_sz = cp + if tp_y_cp_sz > 1 and args.context_parallel_algo in ['ulysses_cp_algo', 'hybrid_cp_algo', + 'hybrid_adaptive_cp_algo']: + if args.tp_2d: + tp_y_cp = TensorParallelYUnionCP() + ulysses_group = tp_y_cp.group + else: + ulysses_group = mpu.get_context_parallel_group() + if args.context_parallel_algo in ['hybrid_cp_algo', 'hybrid_adaptive_cp_algo']: + ulysses_group = get_context_parallel_group_for_hybrid_ulysses() + self.core_attention = UlyssesContextAttention(self.core_attention, ulysses_group) + + +def attention_init_wrapper(fn): + @wraps(fn) + def wrapper(self, *args, **kwargs): + fn(self, *args, **kwargs) + if self.config.num_query_groups is None: + self.config.num_query_groups = self.config.num_attention_heads + self.num_attention_heads_per_partition = self.config.num_attention_heads * self.num_query_groups_per_partition // self.config.num_query_groups + + return wrapper + + +def self_attention_init_wrapper(fn): + @wraps(fn) + def wrapper(self, + config: TransformerConfig, + submodules: SelfAttentionSubmodules, + layer_number: int, + attn_mask_type=AttnMaskType.padding, ): + args = get_args() + if args.overlap_param_gather: + config.reset_attention_order = True + fn(self, config, submodules, layer_number, attn_mask_type) + return wrapper + + +def self_attention_init_mla_wrapper(fn): + @wraps(fn) + def wrapper(self, + config: TransformerConfig, + submodules: SelfAttentionSubmodules, + layer_number: int, + attn_mask_type=AttnMaskType.padding, ): + + args = get_args() + fn(self, config, submodules, layer_number, attn_mask_type) + if args.multi_head_latent_attention: + self.use_flash_attn = args.use_flash_attn + self.shape_order = args.shape_order + self.qk_rope_head_dim = args.qk_rope_head_dim + self.qk_nope_head_dim = args.qk_nope_head_dim + self.q_lora_rank = args.q_lora_rank + self.kv_lora_rank = args.kv_lora_rank + self.v_head_dim = args.v_head_dim + + query_projection_size = self.config.num_attention_heads * self.v_head_dim + self.q_head_dim = self.qk_nope_head_dim + self.qk_rope_head_dim + + if self.q_lora_rank is None: + self.q_rank = self.config.num_attention_heads * self.q_head_dim + self.q_layernorm = None + else: + self.q_rank = self.q_lora_rank + if submodules.q_layernorm is not None: + self.q_layernorm = build_module( + submodules.q_layernorm, + hidden_size=self.q_lora_rank, + config=self.config, + eps=self.config.layernorm_epsilon, + ) + else: + self.q_layernorm = None + self.linear_qb = build_module( + submodules.linear_qb, + self.q_lora_rank, + self.config.num_attention_heads * self.q_head_dim, + config=self.config, + init_method=self.config.init_method, + gather_output=False, + bias=self.config.add_bias_linear or self.config.add_qkv_bias, + skip_bias_add=False, + is_expert=False, + tp_comm_buffer_name='qb', + ) + + self.linear_qkv = build_module( + submodules.linear_qkv, + self.config.hidden_size, + self.q_rank + self.kv_lora_rank + self.qk_rope_head_dim, + config=self.config, + init_method=self.config.init_method, + gather_output=False, + bias=self.config.add_bias_linear or self.config.add_qkv_bias, + skip_bias_add=False, + is_expert=False, + tp_comm_buffer_name='qkv', + ) + + if submodules.k_layernorm is not None: + self.k_layernorm = build_module( + submodules.k_layernorm, + hidden_size=self.kv_lora_rank, + config=self.config, + eps=self.config.layernorm_epsilon, + ) + else: + self.k_layernorm = None + + self.linear_kvb = build_module( + submodules.linear_kvb, + self.kv_lora_rank, + self.config.num_attention_heads * (self.q_head_dim - self.qk_rope_head_dim + self.v_head_dim), + config=self.config, + init_method=self.config.init_method, + gather_output=False, + bias=self.config.add_bias_linear or self.config.add_qkv_bias, + skip_bias_add=False, + is_expert=False, + tp_comm_buffer_name='kvb', + ) + + self.linear_proj = build_module( + submodules.linear_proj, + query_projection_size, + self.config.hidden_size, + config=self.config, + init_method=self.config.output_layer_init_method, + bias=self.config.add_bias_linear, + input_is_parallel=True, + skip_bias_add=True, + is_expert=False, + tp_comm_buffer_name='proj', + ) + + return wrapper + + +def self_attention_init_tp2d_wrapper(fn): + @wraps(fn) + def wrapper(self, + config: TransformerConfig, + submodules: SelfAttentionSubmodules, + layer_number: int, + attn_mask_type=AttnMaskType.padding, ): + + args = get_args() + fn(self, config, submodules, layer_number, attn_mask_type) + if args.tp_2d: + attn_heads_split_num = get_tensor_model_parallel_world_size_for_nd1_dim1() + self.num_attention_heads_per_partition = divide(self.config.num_attention_heads, attn_heads_split_num) + self.num_query_groups_per_partition = divide(self.config.num_query_groups, attn_heads_split_num) + self.linear_qkv = ParallelLinear2D( + self.config.hidden_size, + self.query_projection_size + 2 * self.kv_projection_size, + config=self.config, + init_method=self.config.init_method, + add_bias=self.config.add_bias_linear, + skip_bias_add=True, + ag_comm_intf=TPXCollectiveComm, + ag_sd_rcv_overlap_comm_intf=TPXOverlapCollectiveComm, + rs_comm_intf=TPYCollectiveComm, + rs_sd_rcv_overlap_comm_intf=TPYOverlapCollectiveComm, + enable_overlap_ag_with_matmul=False, + enable_overlap_matmul_with_rs=False, + partition_dim=0, + enable_backward_overlap_ag_with_matmul=False, + ) + self.linear_proj = ParallelLinear2D( + self.query_projection_size, + self.config.hidden_size, + config=self.config, + init_method=self.config.output_layer_init_method, + add_bias=self.config.add_bias_linear, + skip_bias_add=True, + ag_comm_intf=TPYCollectiveComm, + ag_sd_rcv_overlap_comm_intf=TPYOverlapCollectiveComm, + rs_comm_intf=TPXCollectiveComm, + rs_sd_rcv_overlap_comm_intf=TPXOverlapCollectiveComm, + enable_overlap_ag_with_matmul=False, + enable_overlap_matmul_with_rs=False, + partition_dim=1, + enable_backward_overlap_ag_with_matmul=args.enable_backward_overlap_ag_with_matmul + ) + + return wrapper + + +def attention_forward_wrapper(fn): + @wraps(fn) + def wrapper( + self, + hidden_states, + attention_mask, + key_value_states=None, + inference_params=None, + rotary_pos_emb=None, + packed_seq_params=None, + ): + args = get_args() + if args.multi_head_latent_attention: + # hidden_states: [sq, b, h] + + # For self attention we just duplicate the rotary_pos_emb if it isn't already + if rotary_pos_emb is not None and not isinstance(rotary_pos_emb, tuple): + rotary_pos_emb = (rotary_pos_emb,) * 2 + + q_len, bsz, _ = hidden_states.shape + mixed_x_layer, _ = self.linear_qkv(hidden_states) + + # [sq, b, hp] --> [sq, b, ng, hn] + q_a, compressed_kv, k_pe = torch.split( + mixed_x_layer, + [ + self.q_rank, self.kv_lora_rank, self.qk_rope_head_dim, + ], + dim=-1) + + if self.q_layernorm is None: + q = q_a + else: + q, _ = self.linear_qb(self.q_layernorm(q_a)) + + q = q.view(q_len, bsz, self.config.num_attention_heads, -1) + + q_nope, q_pe = torch.split( + q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1 + ) + + k_pe = k_pe.view(q_len, bsz, 1, self.qk_rope_head_dim) + kv, _ = self.linear_kvb(self.k_layernorm(compressed_kv)) + kv = kv.view(q_len, bsz, self.config.num_attention_heads, self.qk_nope_head_dim + + self.v_head_dim) + k_nope, value = torch.split( + kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1 + ) + + if rotary_pos_emb is not None: + q_pos_emb, k_pos_emb = rotary_pos_emb + + b, h, s, d = q_pe.shape + q_pe = q_pe.view(b, h, s, d // 2, 2).transpose(4, 3).reshape(b, h, s, d) + b, h, s, d = k_pe.shape + k_pe = k_pe.view(b, h, s, d // 2, 2).transpose(4, 3).reshape(b, h, s, d) + + if packed_seq_params is not None: + cu_seqlens_q = packed_seq_params.cu_seqlens_q + cu_seqlens_kv = packed_seq_params.cu_seqlens_kv + else: + cu_seqlens_q = cu_seqlens_kv = None + + q_pe = apply_rotary_pos_emb(q_pe, q_pos_emb, config=self.config, cu_seqlens=cu_seqlens_q) + k_pe = apply_rotary_pos_emb(k_pe, k_pos_emb, config=self.config, cu_seqlens=cu_seqlens_kv) + + query = torch.cat([q_nope, q_pe], dim=-1) + + k_pe = k_pe.repeat(1, 1, query.shape[2], 1) + key = torch.cat([k_nope, k_pe], dim=-1) + + if self.use_flash_attn and self.q_head_dim != self.v_head_dim: + if self.shape_order == "BNSD": + value = F.pad(value, [0, self.q_head_dim - self.v_head_dim]) + else: + query = F.pad(query, [0, 256 - self.q_head_dim]) + key = F.pad(key, [0, 256 - self.q_head_dim]) + value = F.pad(value, [0, 256 - self.v_head_dim]) + + # ================================== + # core attention computation + # ================================== + attn_mask_type = AttnMaskType.causal + if self.checkpoint_core_attention and self.training: + core_attn_out = self._checkpointed_attention_forward( + query, + key, + value, + attention_mask, + attn_mask_type=attn_mask_type, + packed_seq_params=packed_seq_params, + ) + else: + core_attn_out = self.core_attention( + query, + key, + value, + attention_mask, + attn_mask_type=attn_mask_type, + packed_seq_params=packed_seq_params, + ) + + if packed_seq_params is not None: + # reshape to same output shape as unpacked case + # (t, np, hn) -> (t, b=1, h=np*hn) + # t is the pack size = sum (sq_i) + # note that batch is a dummy dimension in the packed case + core_attn_out = core_attn_out.reshape(core_attn_out.size(0), 1, -1) + + if self.use_flash_attn: + core_attn_out = core_attn_out.view(q_len, bsz, self.config.num_attention_heads, -1) + core_attn_out = core_attn_out[:, :, :, : self.v_head_dim] + core_attn_out = core_attn_out.reshape(q_len, bsz, self.config.num_attention_heads * self.v_head_dim) + + # ================= + # Output. [sq, b, h] + # ================= + + output, bias = self.linear_proj(core_attn_out) + else: + output, bias = fn( + self, + hidden_states, + attention_mask, + key_value_states, + inference_params, + rotary_pos_emb, + packed_seq_params + ) + + return output, bias + + return wrapper + + +def attention_forward( + self, + hidden_states, + attention_mask, + key_value_states=None, + inference_params=None, + rotary_pos_emb=None, + packed_seq_params=None, +): + + # For self attention we just duplicate the rotary_pos_emb if it isn't already + if rotary_pos_emb is not None and not isinstance(rotary_pos_emb, tuple): + rotary_pos_emb = (rotary_pos_emb,) * 2 + + # ===================== + # Query, Key, and Value + # ===================== + # Get the query, key and value tensors based on the type of attention - + # self or cross attn. + query, key, value = self.get_query_key_value_tensors(hidden_states, key_value_states) + + # =================================================== + # Adjust key, value, and rotary_pos_emb for inference + # =================================================== + key, value, rotary_pos_emb, attn_mask_type = self._adjust_key_value_for_inference( + inference_params, key, value, rotary_pos_emb + ) + + # ================================================ + # relative positional embedding (rotary embedding) + # ================================================ + if rotary_pos_emb is not None: + q_pos_emb, k_pos_emb = rotary_pos_emb + + if packed_seq_params is not None: + cu_seqlens_q = packed_seq_params + cu_seqlens_kv = packed_seq_params + else: + cu_seqlens_q = cu_seqlens_kv = None + query = apply_rotary_pos_emb( + query, q_pos_emb, config=self.config, cu_seqlens=cu_seqlens_q, + ) + key = apply_rotary_pos_emb( + key, k_pos_emb, config=self.config, cu_seqlens=cu_seqlens_kv, + ) + + + # ================================== + # core attention computation + # ================================== + + if self.checkpoint_core_attention and self.training: + core_attn_out = self._checkpointed_attention_forward( + query, + key, + value, + attention_mask, + attn_mask_type=attn_mask_type, + packed_seq_params=packed_seq_params, + ) + else: + core_attn_out = self.core_attention( + query, + key, + value, + attention_mask, + attn_mask_type=attn_mask_type, + packed_seq_params=packed_seq_params, + ) + + # ================= + # Output. [sq, b, h] + # ================= + + output, bias = self.linear_proj(core_attn_out) + + return output, bias \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/transformer/custom_layers/transformer_engine.py b/model/train/yoco_moe/mindspeed/core/transformer/custom_layers/transformer_engine.py new file mode 100644 index 000000000..16e385f48 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/custom_layers/transformer_engine.py @@ -0,0 +1,53 @@ +import torch +import torch_npu +import torch.nn as nn + +from megatron.core.transformer.transformer_config import TransformerConfig +from megatron.legacy.model.rms_norm import RMSNorm +from megatron.training import get_args +from mindspeed.core.tensor_parallel.comm_group_api import TPXCollectiveComm +from mindspeed.core.tensor_parallel.comm_group_api import TPYCollectiveComm +from mindspeed.core.tensor_parallel.tp_2d.layernorm_2d import LayerNorm2D +from mindspeed.core.tensor_parallel.tp_2d.rms_norm_2d import RMSNorm2D + + +class PTNorm: + """ + Conditional Initialization of Transformer-Engine’s LayerNorm or RMSNorm Instance + """ + + def __new__( + cls, config: TransformerConfig, hidden_size: int, eps: float = 1e-5, + ): + args = get_args() + if config.normalization == "LayerNorm": + if args.tp_2d: + instance = LayerNorm2D( + hidden_size, + eps=eps, + last_dim_split_comm_intf=TPYCollectiveComm(), + ) + else: + instance = nn.LayerNorm( + normalized_shape=hidden_size, + eps=eps, + ) + elif config.normalization == "RMSNorm": + if args.tp_2d: + instance = RMSNorm2D( + hidden_size, + eps=eps, + last_dim_split_comm_intf=TPYCollectiveComm(), + ) + instance.use_fused_rmsnorm = False + else: + instance = RMSNorm( + dim=hidden_size, + eps=eps, + sequence_parallel=config.sequence_parallel, + ) + instance.use_fused_rmsnorm = True + else: + raise Exception('Only LayerNorm and RMSNorm are curently supported') + + return instance diff --git a/model/train/yoco_moe/mindspeed/core/transformer/dot_product_attention.py b/model/train/yoco_moe/mindspeed/core/transformer/dot_product_attention.py new file mode 100644 index 000000000..dc1b9eb38 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/dot_product_attention.py @@ -0,0 +1,309 @@ +# coding=utf-8 +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +import math +from functools import wraps + +import torch +import torch_npu +from torch import Tensor +from megatron.core.fusions.fused_softmax import FusedScaleMaskSoftmax +from megatron.core.transformer import TransformerConfig +from megatron.core.transformer.dot_product_attention import DotProductAttention +from megatron.core.transformer.enums import AttnMaskType +from megatron.core.transformer.utils import attention_mask_func +from megatron.core.utils import divide +from megatron.training import get_args +from megatron.core import mpu, parallel_state +from mindspeed.core.models.common.embeddings.rotary_pos_embedding import yarn_get_mscale +from mindspeed.core.context_parallel.ring_context_parallel import ringattn_context_parallel +from mindspeed.core.context_parallel.ulysses_context_parallel import ulyssesattn_context_parallel +from mindspeed.core.context_parallel.context_parallel_kv_cache import get_cache_policy +from mindspeed.ops.fusion_attention_v2 import npu_fusion_attention +from mindspeed.model.alibi_mask import AlibiForFusionAttnSingleton +from mindspeed.core.parallel_state import (get_context_parallel_group_for_hybrid_ring, + get_context_parallel_for_hybrid_ring_world_size, + get_context_parallel_for_hybrid_ring_rank, + get_context_parallel_for_hybrid_ring_global_ranks, + get_ring_ranks_for_intra_window, + get_ring_ranks_for_inter_window_kv, + get_ring_ranks_for_inter_window_dkv, + get_ring_group_for_intra_window, + get_ring_group_for_intra_window_send_recv_overlap) +from mindspeed.core.tensor_parallel_y_union_cp import TensorParallelYUnionCP +from mindspeed.model.transformer import get_attention_mask +from mindspeed.utils import get_actual_seq_len +from mindspeed.core.context_parallel.adaptive_context_parallel import adaptive_attn_context_parallel +from mindspeed.core.context_parallel.utils import get_scheduling_info + +try: + from einops import rearrange +except ImportError: + rearrange = None + + +def dot_product_attention_init( + self, + config: TransformerConfig, + layer_number: int, + attn_mask_type: AttnMaskType, + attention_type: str, + attention_dropout: float = None, +): + cp_size = config.context_parallel_size + config.context_parallel_size = 1 + + super(DotProductAttention, self).__init__(config=config) + assert ( + self.config.context_parallel_size == 1 + ), "Context parallelism is only supported by TEDotProductAttention!" + + assert ( + self.config.window_size is None + ), "Sliding Window Attention is only supported by TEDotProductAttention!" + + self.layer_number = max(1, layer_number) + self.attn_mask_type = attn_mask_type + self.attention_type = attention_type # unused for now + + projection_size = self.config.kv_channels * self.config.num_attention_heads + args = get_args() + # Per attention head and per partition values. + world_size = args.tp_x if args.tp_2d else parallel_state.get_tensor_model_parallel_world_size() + self.hidden_size_per_partition = divide(projection_size, world_size) + self.hidden_size_per_attention_head = divide(projection_size, config.num_attention_heads) + self.num_attention_heads_per_partition = divide(self.config.num_attention_heads, world_size) + self.num_query_groups_per_partition = divide(self.config.num_query_groups, world_size) + + coeff = None + self.norm_factor = math.sqrt(self.hidden_size_per_attention_head) + if self.config.apply_query_key_layer_scaling: + coeff = self.layer_number + self.norm_factor *= coeff + + self.scale_mask_softmax = FusedScaleMaskSoftmax( + input_in_fp16=self.config.fp16, + input_in_bf16=self.config.bf16, + attn_mask_type=self.attn_mask_type, + scaled_masked_softmax_fusion=self.config.masked_softmax_fusion, + mask_func=attention_mask_func, + softmax_in_fp32=self.config.attention_softmax_in_fp32, + scale=coeff, + ) + + # Dropout. Note that for a single iteration, this layer will generate + # different outputs on different number of parallel partitions but + # on average it should not be partition dependent. + self.attention_dropout = torch.nn.Dropout( + self.config.attention_dropout if attention_dropout is None else attention_dropout + ) + + config.context_parallel_size = cp_size + + # add pse + self.pse = None + self.pse_type = args.alibi_fusion_attn_type + + if args.multi_head_latent_attention: + self.scale_mask_softmax.scale = True + self.hidden_size_per_partition = config.num_attention_heads * args.v_head_dim + self.q_head_dim = args.qk_nope_head_dim + args.qk_rope_head_dim + self.softmax_scale = self.q_head_dim ** (-0.5) + + if args.rope_scaling_type is not None: + mscale_all_dim = args.rope_scaling_mscale_all_dim if args.rope_scaling_mscale_all_dim else 0 + scaling_factor = args.rope_scaling_factor + + if mscale_all_dim: + mscale = yarn_get_mscale(scaling_factor, mscale_all_dim) + self.softmax_scale = self.softmax_scale * mscale * mscale + + self.norm_factor = 1.0 / self.softmax_scale + + if self.pse_type is None: + self.pse_type = 1 # not use pse + elif self.pse_type == 0: + alibi = AlibiForFusionAttnSingleton.get_alibi_tensor_for_fusion_attn(args.seq_length, + config.num_attention_heads, + config.params_dtype, + args.alibi_diagonal_opposite, + 1024) + self.pse = alibi + elif self.pse_type == 2 or self.pse_type == 3: + self.pse = AlibiForFusionAttnSingleton.get_alibi_slopes_for_fusion_attn(config.num_attention_heads) + + +def dot_product_attention_init_wrapper(fn): + @wraps(fn) + def wrapper(self, *args, **kwargs): + fn(self, *args, **kwargs) + if self.config.num_query_groups is None: + self.config.num_query_groups = self.config.num_attention_heads + self.num_attention_heads_per_partition = self.config.num_attention_heads * self.num_query_groups_per_partition // self.config.num_query_groups + return wrapper + + +def dot_product_attention_forward_wrapper(fn): + @wraps(fn) + def wrapper(self, query, key, value, attention_mask, attn_mask_type, packed_seq_params): + if attention_mask is None and self.attn_mask_type == AttnMaskType.causal: + attention_mask = get_attention_mask() + if get_args().use_flash_attn: + return dot_product_attention_forward(self, query, key, value, attention_mask, attn_mask_type, packed_seq_params) + return fn(self, query, key, value, attention_mask, attn_mask_type, packed_seq_params) + + return wrapper + + +def dot_product_attention_forward( + self, + query: Tensor, + key: Tensor, + value: Tensor, + attention_mask, + attn_mask_type, + packed_seq_params, +): + args = get_args() + seq_len, bsz, n_head, head_dim = query.shape[0], query.shape[1], query.shape[2], query.shape[3] + + sparse_mode = args.sparse_mode + if attn_mask_type == AttnMaskType.no_mask: + sparse_mode = 0 # default mask + + scale = 1.0 / math.sqrt( + self.hidden_size_per_attention_head) if self.scale_mask_softmax.scale is None else self.softmax_scale + + cp_expanded_by_2d_tp = args.tp_2d and args.tp_y > 1 + if cp_expanded_by_2d_tp: + tp_y_cp_sz = TensorParallelYUnionCP().get_parallel_group_world_size() + else: + tp_y_cp_sz = self.config.context_parallel_size + + if (self.config.context_parallel_size > 1 and args.context_parallel_algo == "ulysses_cp_algo" + and args.context_parallel_kv_cache_policy): + self.ulysses_comm_para['cache_policy'] = get_cache_policy( + self.layer_number, args.context_parallel_kv_cache_policy, args.context_parallel_cache_interval + ) + self.ulysses_comm_para['use_ulysses_allgather_kv'] = args.use_ulysses_allgather_kv + + attn_para = dict() + attn_para['packed_seq_params'] = packed_seq_params + attn_para['attention_mask'] = attention_mask + attn_para['scale'] = scale + attn_para['pre_tokens'] = args.pre_tockens + attn_para['next_tokens'] = args.next_tockens + attn_para['keep_prob'] = 1 - self.attention_dropout.p + attn_para['sparse_mode'] = sparse_mode + output = ulyssesattn_context_parallel(query, key, value, attn_para, self.ulysses_comm_para) + + return output + + if tp_y_cp_sz > 1 and args.context_parallel_algo in ['megatron_cp_algo', 'hybrid_cp_algo', + 'adaptive_cp_algo', 'hybrid_adaptive_cp_algo']: + in_hybrid_mode = False + if get_context_parallel_group_for_hybrid_ring(check_initialized=False) is not None: + in_hybrid_mode = True + + if not in_hybrid_mode: + if cp_expanded_by_2d_tp: + tp_y_cp = TensorParallelYUnionCP() + cp_group = tp_y_cp.group + cp_size = tp_y_cp.get_parallel_group_world_size() + rank = tp_y_cp.get_parallel_rank() + cp_global_ranks = tp_y_cp.global_ranks + else: + cp_group = mpu.get_context_parallel_group() + cp_size = mpu.get_context_parallel_world_size() + rank = mpu.get_context_parallel_rank() + cp_global_ranks = mpu.get_context_parallel_global_ranks() + else: + cp_group = get_context_parallel_group_for_hybrid_ring() + cp_size = get_context_parallel_for_hybrid_ring_world_size() + rank = get_context_parallel_for_hybrid_ring_rank() + cp_global_ranks = get_context_parallel_for_hybrid_ring_global_ranks() + + cp_para = dict() + cp_para['megatron_cp_in_bnsd'] = self.config.megatron_cp_in_bnsd + cp_para['causal'] = args.attention_mask_type == 'causal' + cp_para['cp_group'] = cp_group + cp_para['cp_size'] = cp_size + cp_para['rank'] = rank + + query, key, value = [rearrange(x, 's b h d -> s b (h d)') for x in [query, key, value]] + if args.context_parallel_algo in ['megatron_cp_algo', 'hybrid_cp_algo']: + cp_para['cp_global_ranks'] = cp_global_ranks + if args.use_cp_send_recv_overlap: + if cp_expanded_by_2d_tp: + cp_para['cp_group_for_send_recv_overlap'] = tp_y_cp.overlap_group + else: + cp_para['cp_group_for_send_recv_overlap'] = mpu.get_context_parallel_group_for_send_recv_overlap() + else: + cp_para['cp_group_for_send_recv_overlap'] = None + cp_para['pse'] = self.pse + cp_para['pse_type'] = self.pse_type + + if self.config.context_parallel_size > 1 and not args.tp_2d: + cp_para['cp_inner_ranks'] = get_ring_ranks_for_intra_window() + cp_para['cp_outer_ranks'] = get_ring_ranks_for_inter_window_kv() + cp_para['cp_dkv_outer_ranks'] = get_ring_ranks_for_inter_window_dkv() + cp_para['cp_group_for_intra_window'] = get_ring_group_for_intra_window() + cp_para['cp_group_for_intra_window_send_recv_overlap'] = get_ring_group_for_intra_window_send_recv_overlap() + + cp_para['cache_policy'] = get_cache_policy( + self.layer_number, args.context_parallel_kv_cache_policy, args.context_parallel_cache_interval + ) + + output = ringattn_context_parallel(query, key, value, n_head, cp_para, scale, attention_mask, self.attention_dropout.p, + packed_seq_params) + else: + cp_para['scheduling_info'] = get_scheduling_info() + output = adaptive_attn_context_parallel(query, key, value, n_head, cp_para, scale, attention_mask, self.attention_dropout.p) + + else: + if packed_seq_params is not None: # TND + cp_size = mpu.get_context_parallel_world_size() + actual_seq_qlen = packed_seq_params.cu_seqlens_q.tolist() + actual_seq_kvlen = packed_seq_params.cu_seqlens_kv.tolist() + query, key, value = [rearrange(x, 's b h d -> (b s) h d') for x in [query, key, value]] + shape_order = 'TND' + else: # SBH + actual_seq_qlen = None + actual_seq_kvlen = None + query, key, value = [rearrange(x, 's b h d -> s b (h d)') for x in [query, key, value]] + shape_order = 'SBH' + if args.use_fusion_attn_v2: + output = npu_fusion_attention( + query, key, value, n_head, shape_order, + pse=self.pse, + padding_mask=None, + atten_mask=attention_mask, + scale=scale, + pse_type=self.pse_type, + pre_tokens=args.pre_tockens, + next_tokens=args.next_tockens, + keep_prob=1 - self.attention_dropout.p, + inner_precise=0, + sparse_mode=sparse_mode, + actual_seq_qlen=actual_seq_qlen, + actual_seq_kvlen=actual_seq_kvlen + )[0] + else: + output = torch_npu.npu_fusion_attention( + query, key, value, n_head, shape_order, + pse=None, + padding_mask=None, + atten_mask=attention_mask, + scale=scale, + pre_tockens=args.pre_tockens, + next_tockens=args.next_tockens, + keep_prob=1 - self.attention_dropout.p, + inner_precise=0, + sparse_mode=sparse_mode, + actual_seq_qlen=actual_seq_qlen, + actual_seq_kvlen=actual_seq_kvlen + )[0] + if packed_seq_params is not None: + output = rearrange(output, '(b s) h d -> s b (h d)', s=seq_len, b=bsz) + return output diff --git a/model/train/yoco_moe/mindspeed/core/transformer/mlp.py b/model/train/yoco_moe/mindspeed/core/transformer/mlp.py new file mode 100644 index 000000000..0dfc883cc --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/mlp.py @@ -0,0 +1,150 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from functools import wraps +from megatron.core.transformer.spec_utils import build_module +from megatron.core.transformer.transformer_config import TransformerConfig +from megatron.core.transformer.mlp import MLPSubmodules, MLP +from megatron.training import get_args +from mindspeed.core.tensor_parallel.comm_group_api import TPXCollectiveComm, TPXOverlapCollectiveComm, \ + TPYCollectiveComm, TPYOverlapCollectiveComm +from mindspeed.core.tensor_parallel.tp_2d.parallel_linear_2d import ParallelLinear2D + + +def mlp_init( + self, + config: TransformerConfig, + submodules: MLPSubmodules, + is_expert: bool = False, + input_size: int = None, + shared_expert=False, +): + super(MLP, self).__init__(config=config) + + self.config: TransformerConfig = config + + self.input_size = input_size if input_size is not None else self.config.hidden_size + + ffn_hidden_size = self.config.ffn_hidden_size + if self.config.gated_linear_unit: + ffn_hidden_size *= 2 + if shared_expert: + self.linear_fc1 = build_module( + submodules.linear_fc1, + self.input_size, + ffn_hidden_size, + config=self.config, + init_method=self.config.init_method, + gather_output=False, + bias=self.config.add_bias_linear, + skip_bias_add=True, + is_expert=is_expert, + tp_comm_buffer_name='fc1', + shared_expert=shared_expert + ) + else: + self.linear_fc1 = build_module( + submodules.linear_fc1, + self.input_size, + ffn_hidden_size, + config=self.config, + init_method=self.config.init_method, + gather_output=False, + bias=self.config.add_bias_linear, + skip_bias_add=True, + is_expert=is_expert, + tp_comm_buffer_name='fc1' + ) + + self.activation_func = self.config.activation_func + + if shared_expert: + self.linear_fc2 = build_module( + submodules.linear_fc2, + self.config.ffn_hidden_size, + self.config.hidden_size, + config=self.config, + init_method=self.config.output_layer_init_method, + bias=self.config.add_bias_linear, + input_is_parallel=True, + skip_bias_add=True, + is_expert=is_expert, + tp_comm_buffer_name='fc2', + shared_expert=shared_expert + ) + else: + self.linear_fc2 = build_module( + submodules.linear_fc2, + self.config.ffn_hidden_size, + self.config.hidden_size, + config=self.config, + init_method=self.config.output_layer_init_method, + bias=self.config.add_bias_linear, + input_is_parallel=True, + skip_bias_add=True, + is_expert=is_expert, + tp_comm_buffer_name='fc2' + ) + + self.shared_expert = shared_expert + + +def mlp_init_2d_wrapper(fn): + @wraps(fn) + def wrapper(self, *arg, **kwargs): + fn(self, *arg, **kwargs) + args = get_args() + is_expert = False + if get_args().num_experts is not None: + is_expert = True + if args.tp_2d: + ffn_hidden_size = self.config.ffn_hidden_size + if self.config.gated_linear_unit: + ffn_hidden_size *= 2 + self.linear_fc1 = ParallelLinear2D( + self.config.hidden_size, + ffn_hidden_size, + config=self.config, + init_method=self.config.init_method, + add_bias=self.config.add_bias_linear, + skip_bias_add=True, + is_expert=is_expert, + ag_comm_intf=TPXCollectiveComm, + ag_sd_rcv_overlap_comm_intf=TPXOverlapCollectiveComm, + rs_comm_intf=TPYCollectiveComm, + rs_sd_rcv_overlap_comm_intf=TPYOverlapCollectiveComm, + enable_overlap_ag_with_matmul=False, + enable_overlap_matmul_with_rs=args.enable_overlap_matmul_with_rs, + partition_dim=0, + enable_backward_overlap_ag_with_matmul=args.enable_backward_overlap_ag_with_matmul) + self.linear_fc2 = ParallelLinear2D( + self.config.ffn_hidden_size, + self.config.hidden_size, + config=self.config, + init_method=self.config.output_layer_init_method, + add_bias=self.config.add_bias_linear, + skip_bias_add=True, + is_expert=is_expert, + ag_comm_intf=TPYCollectiveComm, + ag_sd_rcv_overlap_comm_intf=TPYOverlapCollectiveComm, + rs_comm_intf=TPXCollectiveComm, + rs_sd_rcv_overlap_comm_intf=TPXOverlapCollectiveComm, + enable_overlap_ag_with_matmul=args.enable_overlap_ag_with_matmul, + enable_overlap_matmul_with_rs=False, + partition_dim=1, + enable_backward_overlap_ag_with_matmul=args.enable_backward_overlap_ag_with_matmul) + return wrapper diff --git a/model/train/yoco_moe/mindspeed/core/transformer/module.py b/model/train/yoco_moe/mindspeed/core/transformer/module.py new file mode 100644 index 000000000..5986c8c8d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/module.py @@ -0,0 +1,17 @@ +from functools import wraps +import torch +from megatron.core.transformer.spec_utils import build_module + + +def megatron_module_init_wrapper(fn): + @wraps(fn) + def wrapper(self, config): + fn(self, config) + if hasattr(config, 'reset_attention_order') and config.reset_attention_order: + # Create linear_qkv module before self_attention. + self.linear_qkv = build_module(torch.nn.GELU) + # Free memory to avoid memory fragmentation. It will be assigned a real linear function later. + self.linear_qkv = None + config.reset_attention_order = False + + return wrapper diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/__init__.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/comm_utils.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/comm_utils.py new file mode 100644 index 000000000..bd2a4dae5 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/comm_utils.py @@ -0,0 +1,257 @@ +# Copyright (c) 2024; NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import einops +import torch +import torch.distributed +import torch.distributed as dist +import torch_npu +from megatron.core import parallel_state +from megatron.core.parallel_state import get_global_memory_buffer, get_tensor_model_parallel_rank + +from typing import Optional, List + +COMM_STREAM = None + + +def async_all_gather(input_, group, event=None, is_use_get_global_memory_buffer=False, last_dim=False): + world_size = dist.get_world_size(group) + if world_size == 1: + return input_, input_, None + if last_dim: + rank = get_tensor_model_parallel_rank() + ag_out = [torch.empty_like(input_) for _ in range(world_size)] + ag_out[rank] = input_ + else: + dim_size = list(input_.size()) + new_dim_size = dim_size[0] * world_size + dim_size[0] = new_dim_size + + if is_use_get_global_memory_buffer: + ag_out = get_global_memory_buffer().get_tensor(dim_size, input_.dtype, "mpu") + else: + ag_out = torch.empty(dim_size, dtype=input_.dtype, device=torch.cuda.current_device()) + if event: + # multi stream wait event + global COMM_STREAM + if COMM_STREAM is None: + COMM_STREAM = torch_npu.npu.Stream(device=torch.npu.current_device()) + with torch_npu.npu.stream(COMM_STREAM): + event.wait() + if last_dim: + handle = torch.distributed.all_gather(ag_out, input_.contiguous(), group=group, async_op=True) + else: + handle = torch.distributed._all_gather_base( + ag_out, input_.contiguous(), group=group, async_op=True + ) + else: + if last_dim: + handle = torch.distributed.all_gather(ag_out, input_.contiguous(), group=group, async_op=True) + else: + handle = torch.distributed._all_gather_base( + ag_out, input_.contiguous(), group=group, async_op=True + ) + return input_, ag_out, handle + + +def async_reduce_scatter(input_, group, event=None, stream=None, is_use_get_global_memory_buffer=False): + world_size = dist.get_world_size(group) + if world_size == 1: + return input_, input_, None + dim_size = list(input_.size()) + dim_size[0] = dim_size[0] // world_size + if is_use_get_global_memory_buffer: + rs_out = get_global_memory_buffer().get_tensor(dim_size, input_.dtype, "mpu") + else: + rs_out = torch.empty(dim_size, dtype=input_.dtype, device=torch.cuda.current_device()) + if event or stream: + # multi stream wait event + global COMM_STREAM + if COMM_STREAM is None: + COMM_STREAM = torch_npu.npu.Stream(device=torch.npu.current_device()) + with torch_npu.npu.stream(COMM_STREAM): + if event: + event.wait() + if stream: + torch.cuda.current_stream().wait_stream(stream) + handle = torch.distributed._reduce_scatter_base( + rs_out, input_.contiguous(), group=group, async_op=True + ) + else: + handle = torch.distributed._reduce_scatter_base( + rs_out, input_.contiguous(), group=group, async_op=True + ) + return input_, rs_out, handle + + +def async_all_to_all(input_, output_split_sizes, input_split_sizes, group, event=None): + world_size = dist.get_world_size(group) + if world_size == 1: + return input_, input_, None + if output_split_sizes is None: + # Equal split (all2all) + a2a_out = torch.empty_like(input_) + else: + # Unequal split (all2all-v) + a2a_out = input_.new_empty( + size=[sum(output_split_sizes)] + list(input_.size()[1:]), + dtype=input_.dtype, + device=torch.cuda.current_device(), + ) + + if event: + # multi stream wait event + global COMM_STREAM + if COMM_STREAM is None: + COMM_STREAM = torch_npu.npu.Stream(device=torch.npu.current_device()) + with torch_npu.npu.stream(COMM_STREAM): + event.wait() + handle = dist.all_to_all_single( + a2a_out, + input_.contiguous(), + output_split_sizes=output_split_sizes, + input_split_sizes=input_split_sizes, + group=group, + async_op=True + ) + else: + handle = dist.all_to_all_single( + a2a_out, + input_.contiguous(), + output_split_sizes=output_split_sizes, + input_split_sizes=input_split_sizes, + group=group, + async_op=True + ) + return input_, a2a_out, handle + + +def transfer_tensor_last_dim_to_first(input_x): + num_dims = input_x.dim() + return einops.rearrange(input_x, "... lst -> lst ...").contiguous(), num_dims + + +def transfer_tensor_first_dim_to_last(input_x, num_dims): + return einops.rearrange(input_x, "first ... -> ... first").contiguous() + + +def _gather_no_grad(input_: torch.Tensor, output_split_sizes=None, group=None): + if group is None: + group = parallel_state.get_tensor_model_parallel_group() + world_size = torch.distributed.get_world_size(group) + # Bypass the function if we are using only 1 GPU. + if world_size == 1: + return input_ + + dim_size = list(input_.size()) + if output_split_sizes is None: + dim_size[0] = dim_size[0] * world_size + output = torch.empty(dim_size, dtype=input_.dtype, device=input_.device) + torch.distributed._all_gather_base(output, input_.contiguous(), group=group) + else: + dim_size[0] = sum(output_split_sizes) + output = torch.empty(dim_size, dtype=input_.dtype, device=input_.device) + output_tensor_list = list(torch.split(output, output_split_sizes, dim=0)) + torch.distributed.all_gather(output_tensor_list, input_, group=group) + + return output + + +def _reduce_scatter_no_grad(input_: torch.Tensor, input_split_sizes=None, group=None): + if group is None: + group = parallel_state.get_tensor_model_parallel_group() + world_size = torch.distributed.get_world_size(group) + # Bypass the function if we are using only 1 GPU. + if world_size == 1: + return input_ + + if input_split_sizes is None: + dim_size = list(input_.size()) + if dim_size[0] % world_size != 0: + raise ValueError("First dimension of the tensor should be divisible by tensor parallel size") + dim_size[0] = dim_size[0] // world_size + + output = torch.empty(dim_size, dtype=input_.dtype, device=input_.device) + torch.distributed._reduce_scatter_base(output, input_.contiguous(), group=group) + else: + rank = torch.distributed.get_rank(group) + input_tensor_list = list(torch.split(input_, input_split_sizes, dim=0)) + output = torch.empty_like(input_tensor_list[rank]) + torch.distributed.reduce_scatter(output, input_tensor_list, group=group) + return output + + +class _Gather(torch.autograd.Function): + + @staticmethod + def forward(ctx, input_, output_split_sizes=None, group=None): + """Forward function.""" + ctx.output_split_sizes = output_split_sizes + ctx.group = group + return _gather_no_grad(input_, output_split_sizes, group) + + @staticmethod + def backward(ctx, grad_output): + """Backward function.""" + output_split_sizes = ctx.output_split_sizes + group = ctx.group + return _reduce_scatter_no_grad(grad_output, output_split_sizes, group), None, None + + +class _ReduceScatter(torch.autograd.Function): + + @staticmethod + def forward(ctx, input_, input_split_sizes=None, group=None): + """Forward function.""" + ctx.input_split_sizes = input_split_sizes + ctx.group = group + return _reduce_scatter_no_grad(input_, input_split_sizes, group) + + @staticmethod + def backward(ctx, grad_output): + """Backward function.""" + input_split_sizes = ctx.input_split_sizes + group = ctx.group + return _gather_no_grad(grad_output, input_split_sizes, group), None, None + + +# Backport from latest Megatron-LM +def all_gather( + input_: torch.Tensor, + output_split_sizes: Optional[List[int]] = None, + group: torch.distributed.ProcessGroup = None, +) -> torch.Tensor: + """Gather tensors and concatenate along the first dimension. + + Args: + input_tensor (torch.Tensor): + A tensor to be gathered. + output_split_sizes (List[int], optional): + A list specifying the sizes of the output splits along the first dimension. + If None, equal splitting is assumed. Default: None. + group (torch.distributed.ProcessGroup): + The process group to perform communication. + Default: None, will use tensor model parallel group. + + Returns: + torch.Tensor: Gathered tensor. + """ + return _Gather.apply(input_, output_split_sizes, group) + + +def reduce_scatter( + input_: torch.Tensor, + input_split_sizes: Optional[List[int]] = None, + group: torch.distributed.ProcessGroup = None, +) -> torch.Tensor: + """Reduce-scatter the input tensor across model parallel group. + + Args: + input_ (torch.Tensor): The input tensor to be reduce-scattered. + input_split_sizes (List[int], optional): A list specifying the sizes of + the input splits along the first dimension for each rank. If None, + equal splitting is assumed. Default: None. + group (torch.distributed.ProcessGroup): + The process group to perform communication. + Default: None, will use tensor model parallel group. + """ + return _ReduceScatter.apply(input_, input_split_sizes, group) diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/experts.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/experts.py new file mode 100644 index 000000000..fdccc93ae --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/experts.py @@ -0,0 +1,197 @@ +# Copyright (c) 2024; NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +from functools import wraps +import torch +import torch.nn.functional as F +from megatron.core import parallel_state, tensor_parallel +from megatron.training import get_args +from mindspeed.model.transformer import should_recompute_activation +from mindspeed.core.fusions.fused_bias_swiglu import fused_swiglu +from mindspeed.core.tensor_parallel.random import CheckpointWithoutOutput +from mindspeed.core.transformer.moe.grouped_gemm_util import fused_alltoall_gather_bmm, fused_bmm_reducescatter_alltoall +from mindspeed.core.transformer.moe.grouped_mlp_with_comp_and_comm_overlap_all2all import grouped_mlp_with_comp_and_comm_overlap_all2all +from mindspeed.core.transformer.moe.grouped_mlp_with_comp_and_comm_overlap_allgather import grouped_mlp_with_comp_and_comm_overlap_allgather +from mindspeed.core.transformer.moe import grouped_gemm_util as gg + + +def get_zeros_with_tp(input_): + world_size = parallel_state.get_tensor_model_parallel_world_size() + zeros_shape = input_.shape[:-1] + (input_.shape[-1] * world_size,) + return torch.zeros(zeros_shape, dtype=input_.dtype, layout=input_.layout, device=input_.device) + + +def sequential_mlp_forward(self, permuted_local_hidden_states, tokens_per_expert): + output_local = get_zeros_with_tp(permuted_local_hidden_states) + output_bias_local = None + if self.add_bias: + output_bias_local = get_zeros_with_tp(permuted_local_hidden_states) + + cumsum_num_tokens = torch.cumsum(tokens_per_expert, dim=0) + # Insert zero at the begining for offset index's convenience + zero_tensor = torch.zeros(1, dtype=torch.long, device=cumsum_num_tokens.device) + cumsum_num_tokens = torch.cat((zero_tensor, cumsum_num_tokens)) + + if parallel_state.get_tensor_model_parallel_world_size() > 1: + if not hasattr(self, 'comm_stream'): + self.comm_stream = torch.cuda.Stream() + self.comm_stream.wait_stream(torch.cuda.current_stream()) + + for expert_num, expert in enumerate(self.local_experts): + start = cumsum_num_tokens[expert_num] + end = cumsum_num_tokens[expert_num + 1] + hidden = permuted_local_hidden_states[start:end] + + if parallel_state.get_tensor_model_parallel_world_size() > 1: + with torch.cuda.stream(self.comm_stream): + hidden = tensor_parallel.all_gather_last_dim_from_tensor_parallel_region(hidden) + torch.cuda.current_stream().wait_stream(self.comm_stream) + + output, output_bias = expert(hidden) + + output_local[start:end] = output + if self.add_bias: + output_bias = output_bias.expand_as(output) + output_bias_local[start:end, :] = output_bias + + return output_local, output_bias_local + + +def group_mlp_forward(self, permuted_local_hidden_states, tokens_per_expert, ctx=None): + if permuted_local_hidden_states.nelement() != 0: + w1 = self.weight1.view(self.num_local_experts, self.config.hidden_size, -1) + w2 = self.weight2.view(self.num_local_experts, -1, self.config.hidden_size) + else: + w1 = self.weight1.view(self.config.hidden_size, -1) + w2 = self.weight2.view(-1, self.config.hidden_size) + group_list = torch.cumsum(tokens_per_expert, dim=0) + if get_args().moe_alltoall_overlap_comm: + return grouped_mlp_with_comp_and_comm_overlap_all2all(permuted_local_hidden_states, w1, w2, + (self.weight1, self.weight2, self.activation_func, + group_list, ctx.layer_number), + ctx=ctx) + else: + return grouped_mlp_with_comp_and_comm_overlap_allgather(permuted_local_hidden_states, w1, w2, + (self.weight1, self.weight2, self.activation_func, + group_list, self.layer_number)) + + +def groupedmlp_init_wrapper(fn): + @wraps(fn) + def wrapper(self, *args, **kwargs): + args_ = get_args() + tp_size = parallel_state.get_tensor_model_parallel_world_size() + # set tp size to 1 before GMM init to aviod weight sharding + if args_.moe_tp_extend_ep: + parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = 1 + fn(self, *args, **kwargs) + if args_.moe_tp_extend_ep: + parallel_state._MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = tp_size + if self.config.gated_linear_unit: + assert (self.config.activation_func == F.silu + ), 'Activation function must be silu when using fused_swiglu.' + self.activation_func = fused_swiglu + self.layer_number = None + self.set_recompute_activation_func = False + + return wrapper + + +def groupedmlp_forward(self, permuted_local_hidden_states, tokens_per_expert): + args = get_args() + is_recompute_activation = should_recompute_activation( + self.layer_number) and not args.moe_alltoall_overlap_comm and not args.moe_allgather_overlap_comm + + gemm_fusion = args.gemm_gradient_accumulation_fusion + tp_group = parallel_state.get_tensor_model_parallel_group() + ep_group = parallel_state.get_expert_model_parallel_group() + + if not is_recompute_activation: + if permuted_local_hidden_states.nelement() != 0: + # Reshape the weights for the grouped GEMMs. + w1 = self.weight1.view(self.num_local_experts, self.config.hidden_size, -1) + w2 = self.weight2.view(self.num_local_experts, -1, self.config.hidden_size) + + if args.moe_bmm_mc2: + # input to alltoall_gather_bmm op input: [E*C, H/TP] -> [E, C, H/TP] + permuted_local_hidden_states = permuted_local_hidden_states.view(self.config.num_moe_experts, + permuted_local_hidden_states.shape[ + 0] // self.config.num_moe_experts, + -1) + bmm_param = {'group_ep': ep_group, 'group_tp': tp_group, 'shard_type': 0, + 'need_recompute': False} + fc1_output = fused_alltoall_gather_bmm(permuted_local_hidden_states, w1, None, bmm_param) + intermediate_parallel = self.activation_func(fc1_output) + fc2_output = fused_bmm_reducescatter_alltoall(intermediate_parallel, w2, None, bmm_param) + # revert the output shape: [E, C, H/TP] -> [E*C, H/TP] + fc2_output = fc2_output.view(-1, fc2_output.shape[2]) + else: + fc1_output = gg.ops.gmm(permuted_local_hidden_states, w1, tokens_per_expert, trans_b=False, + gemm_fusion=gemm_fusion, original_weight=self.weight1) + intermediate_parallel = self.activation_func(fc1_output) + fc2_output = gg.ops.gmm(intermediate_parallel, w2, tokens_per_expert, trans_b=False, + gemm_fusion=gemm_fusion, original_weight=self.weight2) + else: + # No token is allocated for local experts. + assert torch.count_nonzero(tokens_per_expert) == 0 + + # Make sure parameters still have gradients when no tokens are routed to this set of experts. + w1 = self.weight1.view(self.config.hidden_size, -1) + w2 = self.weight2.view(-1, self.config.hidden_size) + h = torch.matmul(permuted_local_hidden_states, w1) + h = self.activation_func(h) + h = torch.matmul(h, w2) + fc2_output = h + else: + if permuted_local_hidden_states.nelement() != 0: + w1 = self.weight1.view(self.num_local_experts, self.config.hidden_size, -1) + w2 = self.weight2.view(self.num_local_experts, -1, self.config.hidden_size) + + bmm_param = {'group_ep': ep_group, 'group_tp': tp_group, 'shard_type': 0, + 'need_recompute': False} + + if args.moe_bmm_mc2: + # input to alltoall_gather_bmm op input: [E*C, H/TP] -> [E, C, H/TP] + permuted_local_hidden_states = permuted_local_hidden_states.view(self.config.num_moe_experts, + permuted_local_hidden_states.shape[ + 0] // self.config.num_moe_experts, + -1) + + fc1_output = fused_alltoall_gather_bmm(permuted_local_hidden_states, w1, None, bmm_param) + else: + fc1_output = gg.ops.gmm( + permuted_local_hidden_states, w1, tokens_per_expert, trans_b=False, gemm_fusion=gemm_fusion, + original_weight=self.weight1 + ) + + self.activation_checkpoint_manager = CheckpointWithoutOutput() + intermediate_parallel = self.activation_checkpoint_manager.checkpoint(self.activation_func, + False, + fc1_output) + if args.moe_bmm_mc2: + fc2_output = fused_bmm_reducescatter_alltoall(intermediate_parallel, w2, None, bmm_param) + # revert the output shape: [E, C, H/TP] -> [E*C, H/TP] + fc2_output = fc2_output.view(-1, fc2_output.shape[2]) + else: + fc2_output = gg.ops.gmm(intermediate_parallel, w2, tokens_per_expert, trans_b=False, + gemm_fusion=gemm_fusion, original_weight=self.weight2) + else: + assert torch.count_nonzero(tokens_per_expert) == 0 + w1 = self.weight1.view(self.config.hidden_size, -1) + w2 = self.weight2.view(-1, self.config.hidden_size) + h = torch.matmul(permuted_local_hidden_states, w1) + self.activation_checkpoint_manager = CheckpointWithoutOutput() + intermediate_parallel = self.activation_checkpoint_manager.checkpoint(self.activation_func, + False, + h) + h = torch.matmul(intermediate_parallel, w2) + fc2_output = h + + # discard the output of the activation function, + # which will be restored by recomputation during backward. + self.activation_checkpoint_manager.discard_output() + + # when backward to output of dense_4h_to_h, + # recompute and restore the output of activation function. + if fc2_output.requires_grad: + fc2_output.register_hook(self.activation_checkpoint_manager.recompute) + return fc2_output, None diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/grouped_gemm_util.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/grouped_gemm_util.py new file mode 100644 index 000000000..0c0ed476c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/grouped_gemm_util.py @@ -0,0 +1,195 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +import torch +from mindspeed.ops.npu_all_to_all_all_gather_bmm import npu_alltoall_allgather_bmm +from mindspeed.ops.npu_bmm_reduce_scatter_all_to_all import npu_bmm_reducescatter_alltoall + + +def grouped_gemm_is_available(): + try: + from mindspeed.ops.gmm import npu_gmm + return True + except ImportError: + return False + + +def assert_grouped_gemm_is_available(): + if not grouped_gemm_is_available(): + raise ImportError("from mindspeed.ops.gmm import npu_gmm failed.") + + +class Ops: + @staticmethod + def gmm(a, b, batch_sizes, trans_b=False, gemm_fusion=False, original_weight=None): + from mindspeed.ops.gmm import npu_gmm + + if trans_b: + b = b.t() + group_list = torch.cumsum(batch_sizes, dim=0) + return npu_gmm(a, b, bias=None, group_list=group_list, group_type=0, gemm_fusion=gemm_fusion, original_weight=original_weight) + + +ops = Ops + + +def get_device_capability(): + return 9, 0 + + +def get_hcomm_info_world(comm_group): + rank = torch.distributed.get_rank() + hcomm_info = None + + if torch.__version__ > "2.0.1": + hcomm_info = comm_group._get_backend(torch.device("npu")).get_hccl_comm_name(rank) + else: + hcomm_info = comm_group.get_hccl_comm_name(rank) + return hcomm_info + + +class FusedAllgatherBmmFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, weight, bias, bmm_param): + + group_ep = bmm_param['group_ep'] + group_tp = bmm_param['group_tp'] + need_recompute = bmm_param['need_recompute'] + shard_type = bmm_param['shard_type'] + + ep_size = torch.distributed.get_world_size(group=group_ep) + tp_size = torch.distributed.get_world_size(group=group_tp) + + tp_group_hcomm = get_hcomm_info_world(group_tp) + ep_group_hcomm = get_hcomm_info_world(group_ep) + + out = npu_alltoall_allgather_bmm( + input_, weight, ep_group_hcomm, ep_size, tp_group_hcomm, tp_size, bias=bias, shard_type=shard_type, + act_type="None", need_allgather_out=True, need_activation_feature=False + ) + bmm_out = out[0] + allgather_out = out[1] + + if need_recompute: + ctx.save_for_backward(input_, weight) + else: + ctx.save_for_backward(allgather_out, weight) + + ctx.bias = bias + ctx.need_recompute = need_recompute + ctx.group_ep = ep_group_hcomm + ctx.group_tp = tp_group_hcomm + ctx.ep_size = ep_size + ctx.tp_size = tp_size + ctx.shard_type = shard_type + return bmm_out + + @staticmethod + def backward(ctx, grad_output): + + need_recompute = ctx.need_recompute + bias = ctx.bias + group_ep = ctx.group_ep + group_tp = ctx.group_tp + ep_size = ctx.ep_size + tp_size = ctx.tp_size + shard_type = ctx.shard_type + + allgather_out = None + input_ = None + + if need_recompute: + input_, weight = ctx.saved_tensors + else: + allgather_out, weight = ctx.saved_tensors + + if need_recompute: + out = npu_alltoall_allgather_bmm( + input_, weight, group_ep, ep_size, group_tp, tp_size, bias=bias, shard_type=shard_type, + act_type="None", need_allgather_out=True, need_activation_feature=False + ) + allgather_out = out[1] + + # b,m,k @ b,k,n -> b,m,n + # dx: b,m,n @ (b,k,n).t() -> b,m,k + out = npu_bmm_reducescatter_alltoall( + grad_output, weight.transpose(-1, -2), group_ep, ep_size, group_tp, tp_size, + bias=None, shard_type=shard_type + ) + + # b,m,k @ b,k,n -> b,m,n + # dw: (b,m,k).t() @ (b,m,n).t() -> b,k,n + grad_bmm_w = torch.bmm(allgather_out.transpose(-1, -2), grad_output) + grad_bias = None + if bias is not None: + grad_bias = torch.sum(grad_output, dim=-1) + + return out, grad_bmm_w, grad_bias, None + + +class FusedBmmReduceScatterFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, weight, bias, bmm_param): + + group_ep = bmm_param['group_ep'] + group_tp = bmm_param['group_tp'] + shard_type = bmm_param['shard_type'] + + ep_size = torch.distributed.get_world_size(group=group_ep) + tp_size = torch.distributed.get_world_size(group=group_tp) + + tp_group_hcomm = get_hcomm_info_world(group_tp) + ep_group_hcomm = get_hcomm_info_world(group_ep) + + out = npu_bmm_reducescatter_alltoall( + input_, weight, ep_group_hcomm, ep_size, tp_group_hcomm, tp_size, + bias=bias, shard_type=shard_type + ) + + ctx.save_for_backward(input_, weight) + + ctx.bias = bias + ctx.group_ep = ep_group_hcomm + ctx.group_tp = tp_group_hcomm + ctx.ep_size = ep_size + ctx.tp_size = tp_size + ctx.shard_type = shard_type + return out + + @staticmethod + def backward(ctx, grad_output): + + bias = ctx.bias + group_ep = ctx.group_ep + group_tp = ctx.group_tp + ep_size = ctx.ep_size + tp_size = ctx.tp_size + shard_type = ctx.shard_type + + input_, weight = ctx.saved_tensors + + # b,m,k @ b,k,n -> b,m,n + # dx: b,m,n @ (b,k,n).t() -> b,m,k + out = npu_alltoall_allgather_bmm( + grad_output, weight.transpose(-1, -2), group_ep, ep_size, group_tp, tp_size, + bias=bias, shard_type=shard_type, act_type="None", need_allgather_out=True, need_activation_feature=False + ) + bmm_out_grad = out[0] + allgather_out_grad = out[1] + + # b,m,k @ b,k,n -> b,m,n + # dw: (b,m,k).t() @ (b,m,n).t() -> b,k,n + grad_bmm_w = torch.bmm(input_.transpose(-1, -2), allgather_out_grad) + grad_bias = None + if bias is not None: + grad_bias = torch.sum(grad_output, dim=-1) + + return bmm_out_grad, grad_bmm_w, grad_bias, None + + +def fused_alltoall_gather_bmm(input_, weight, bias, bmm_param): + return FusedAllgatherBmmFunction.apply(input_, weight, bias, bmm_param) + + +def fused_bmm_reducescatter_alltoall(input_, weight, bias, bmm_param): + return FusedBmmReduceScatterFunction.apply(input_, weight, bias, bmm_param) diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/grouped_mlp_with_comp_and_comm_overlap_all2all.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/grouped_mlp_with_comp_and_comm_overlap_all2all.py new file mode 100644 index 000000000..18a487f6d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/grouped_mlp_with_comp_and_comm_overlap_all2all.py @@ -0,0 +1,298 @@ +# Copyright (c) 2024, Huawei Technologies. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from einops import rearrange +from megatron.training import get_args +from megatron.core import parallel_state +from megatron.core.parallel_state import get_expert_model_parallel_group, get_tensor_and_expert_parallel_group, get_tensor_model_parallel_group +from megatron.core.transformer.moe.moe_utils import permute +from mindspeed.model.transformer import should_recompute_activation +from mindspeed.core.transformer.moe.moe_layer_overlap_all2all import gmm_op +from mindspeed.core.transformer.moe.comm_utils import (async_all_to_all, async_reduce_scatter, async_all_gather, + transfer_tensor_last_dim_to_first) +from mindspeed.core.transformer.moe.moe_utils import (only_recompute_activation, forward_func, backward_func, + get_gemm_backward_need_tensors, + set_all2all_experts_output, + permute_with_ep, get_all2all_experts_output, + get_permute_with_ep_local_input_tokens) +from mindspeed.ops.npu_groupmatmul_add import npu_groupmatmul_add_fp32 + + +class GroupedMlpWithCompAndCommOverlapAll2All(torch.autograd.Function): + @staticmethod + def forward(ctx, inputs, weights1, weights2, args, moe_layer_ctx): + original_weight1, original_weight2, activation_func, group_list, layer_number = args + global_args = get_args() + moe_zero_memory = global_args.moe_zero_memory + moe_experts_pipeline_degree = global_args.moe_experts_pipeline_degree + ctx.layer_number = layer_number + ctx.moe_zero_memory = moe_zero_memory + ctx.moe_experts_pipeline_degree = moe_experts_pipeline_degree + use_gmm = (inputs.nelement() != 0) + ctx.use_gmm = use_gmm + if use_gmm: + mm1_out = gmm_op(inputs, weights1, [], group_list, 0)[0] + else: + mm1_out = torch.matmul(inputs, weights1) + if moe_zero_memory != "disable" or moe_experts_pipeline_degree: + inputs.untyped_storage().resize_(0) + act_out, detached_act_inputs = forward_func(activation_func, mm1_out) + + is_only_recompute_activation = only_recompute_activation(layer_number) + if moe_zero_memory == "level1" and not is_only_recompute_activation: + mm1_out.untyped_storage().resize_(0) + if use_gmm: + mm2_out = gmm_op(act_out, weights2, [], group_list, 0)[0] + else: + mm2_out = torch.matmul(act_out, weights2) + + if moe_zero_memory == "level1" and not is_only_recompute_activation: + act_out.untyped_storage().resize_(0) + moe_layer_ctx.recompute_tensors = (inputs, mm1_out, act_out) + is_recompute_activation = moe_zero_memory == "level0" or should_recompute_activation(layer_number) or ( + moe_zero_memory == "level1" and is_only_recompute_activation) + if is_recompute_activation: + act_out.untyped_storage().resize_(0) + ctx.activation_func = activation_func + if moe_zero_memory != "level0" and not (moe_zero_memory == "level1" and is_only_recompute_activation): + ctx.save_for_backward(inputs, detached_act_inputs, act_out, weights1, weights2, original_weight1, + original_weight2, group_list) + else: + ctx.save_for_backward(detached_act_inputs, act_out, weights1, weights2, original_weight1, original_weight2, + group_list) + + return mm2_out, None + + @staticmethod + def backward(ctx, *grad_outs): + grad_outs = grad_outs[0] + global_args = get_args() + moe_hierarchical_alltoallv = global_args.moe_hierarchical_alltoallv + layer_number = ctx.layer_number + moe_zero_memory = ctx.moe_zero_memory + moe_experts_pipeline_degree = ctx.moe_experts_pipeline_degree + is_only_recompute_activation = only_recompute_activation(layer_number) + if moe_zero_memory != "level0" and not (moe_zero_memory == "level1" and is_only_recompute_activation): + mm1_inputs, act_inputs, mm2_inputs, weights1, weights2, original_weight1, original_weight2, group_list = ctx.saved_tensors + else: + act_inputs, mm2_inputs, weights1, weights2, original_weight1, original_weight2, group_list = ctx.saved_tensors + if moe_experts_pipeline_degree: + inputs_save = get_gemm_backward_need_tensors() + _, inputs, ag_handle_i = async_all_gather(inputs_save, get_tensor_model_parallel_group(()), last_dim=True) + else: + ((detach_input, indices, scores_ep, router_topk, global_input_tokens_local_experts_indices), + permute2_input_detach, permute2_graph, output_splits, input_splits, + input_splits_tp_ep) = get_gemm_backward_need_tensors() + + # grad of mm2 dx + if ctx.use_gmm: + weights2 = rearrange(weights2, 'n h f -> n f h') + grad_mm2_inputs = gmm_op(grad_outs, weights2, [], group_list, 0)[0] + else: + grad_mm2_inputs = torch.matmul(grad_outs, weights2.t()) + act_graph = mm2_inputs + is_recompute_activation = moe_zero_memory == "level0" or should_recompute_activation(layer_number) or ( + moe_zero_memory == "level1" and is_only_recompute_activation) + if is_recompute_activation: + activation_func = ctx.activation_func + mm2_inputs = activation_func(act_inputs) + + if moe_hierarchical_alltoallv: + ep_group = parallel_state.get_expert_model_parallel_group() + tp_group = parallel_state.get_tensor_model_parallel_group() + permute1_graph, scores_ep, hidden_states_ep = get_all2all_experts_output() + if moe_zero_memory == "disable": + _, detach_scores_grad, detach_scores_handle = async_reduce_scatter(scores_ep.grad, group=ep_group) + else: + detach_scores_grad = None + detach_scores_handle = None + + # grad of activation_func + act_graph.backward(grad_mm2_inputs) + if moe_zero_memory == "level0" or (moe_zero_memory == "level1" and is_only_recompute_activation): + permutated_local_input_tokens = get_permute_with_ep_local_input_tokens() + _, global_input_tokens, permute1_ep_all_to_all_handle = async_all_to_all( + permutated_local_input_tokens, + output_splits, + input_splits, + tp_group, + ) + + # gmm1 dx + if ctx.use_gmm: + weights1 = rearrange(weights1, 'n h f -> n f h') + mm1_inputs_grad = \ + gmm_op(act_inputs.grad, weights1, [], group_list, 0)[0] + else: + mm1_inputs_grad = torch.matmul(act_inputs.grad, weights1.t()) + + backward_func(permute2_graph, mm1_inputs_grad) + mm1_inputs_grad.untyped_storage().resize_(0) + + if moe_zero_memory == "level0" or (moe_zero_memory == "level1" and is_only_recompute_activation): + permute1_ep_all_to_all_handle.wait() + permutated_local_input_tokens.untyped_storage().resize_(0) + _, permute1_backward_input, bw_permute1_ep_all2all_handle = async_all_to_all( + permute2_input_detach.grad, + input_splits, + output_splits, + tp_group, + ) + + # gmm2 dw + if ctx.use_gmm: + if get_args().gemm_gradient_accumulation_fusion: + + npu_groupmatmul_add_fp32(mm2_inputs, grad_outs, group_list, original_weight2.main_grad) + + if hasattr(original_weight2, 'grad_added_to_main_grad'): + if getattr(weights2, 'zero_out_wgrad', False): + grad_weights2 = torch.zeros( + weights2.transpose(-1, -2).shape, + dtype=mm2_inputs.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weights2 = torch.empty( + weights2.transpose(-1, -2).shape, + dtype=mm2_inputs.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + original_weight2.grad_added_to_main_grad = True + else: + grad_weights2 = None + else: + grad_weights2 = gmm_op(mm2_inputs.t(), grad_outs, [], group_list, 2)[0] + else: + grad_weights2 = torch.matmul(mm2_inputs.t(), grad_outs) + + # grad of activation_func + grad_outs.untyped_storage().resize_(0) + mm2_inputs.untyped_storage().resize_(0) + if moe_hierarchical_alltoallv: + grad_mm2_inputs.untyped_storage().resize_(0) + act_inputs.untyped_storage().resize_(0) + bw_permute1_ep_all2all_handle.wait() + + backward_func(permute1_graph, permute1_backward_input) + permute1_backward_input.untyped_storage().resize_(0) + if moe_zero_memory == "disable": + detach_scores_handle.wait() + + ep_group = parallel_state.get_expert_model_parallel_group() + _, detach_input_grad, detach_input_handle = async_reduce_scatter(hidden_states_ep.grad, group=ep_group) + set_all2all_experts_output((detach_scores_grad, detach_input_grad, detach_input_handle)) + else: + act_graph.backward(grad_mm2_inputs) + grad_mm2_inputs.untyped_storage().resize_(0) + act_inputs.untyped_storage().resize_(0) + if moe_zero_memory == "level0" or (moe_zero_memory == "level1" and is_only_recompute_activation): + def alltoall_token_permutation1(hidden_states, indices): + hidden_states = hidden_states.view(-1, hidden_states.shape[-1]) + permutated_local_input_tokens, _ = permute( + hidden_states, indices + ) + return permutated_local_input_tokens + + permutated_local_input_tokens = alltoall_token_permutation1(detach_input, indices) + + ep_group = get_expert_model_parallel_group() + if global_args.moe_tp_extend_ep: + ep_group = get_tensor_and_expert_parallel_group() + _, global_input_tokens, permute1_ep_all_to_all_handle = async_all_to_all( + permutated_local_input_tokens, + output_splits, + input_splits, + ep_group, + ) + if ctx.use_gmm: + weights1 = rearrange(weights1, 'n h f -> n f h') + mm1_inputs_grad = gmm_op(act_inputs.grad, weights1, [], group_list, 0)[0] + else: + mm1_inputs_grad = torch.matmul(act_inputs.grad, weights1.t()) + + # 峰值 + if moe_experts_pipeline_degree: + ag_handle_i.wait() + mm1_inputs = torch.cat(inputs, dim=inputs_save.dim() - 1).contiguous() + else: + backward_func(permute2_graph, mm1_inputs_grad) + mm1_inputs_grad.untyped_storage().resize_(0) + ep_group = get_expert_model_parallel_group() + if global_args.moe_tp_extend_ep: + ep_group = get_tensor_and_expert_parallel_group() + + if moe_zero_memory == "level0" or (moe_zero_memory == "level1" and is_only_recompute_activation): + permute1_ep_all_to_all_handle.wait() + permutated_local_input_tokens.untyped_storage().resize_(0) + + if moe_experts_pipeline_degree: + mm1_inputs_grad, num_dim = transfer_tensor_last_dim_to_first(mm1_inputs_grad) + rs_input_i, expert_output, rs_handle_i = async_reduce_scatter(mm1_inputs_grad, + get_tensor_model_parallel_group()) + set_all2all_experts_output((rs_input_i, expert_output, rs_handle_i, mm1_inputs_grad, num_dim)) + else: + _, permute1_backward_input, bw_permute1_ep_all2all_handle = async_all_to_all( + permute2_input_detach.grad, + input_splits, + output_splits, + ep_group, + ) + set_all2all_experts_output((permute1_backward_input, bw_permute1_ep_all2all_handle)) + + if moe_zero_memory == "level0" or (moe_zero_memory == "level1" and is_only_recompute_activation): + mm1_inputs, _ = permute( + global_input_tokens, global_input_tokens_local_experts_indices + ) + + global_input_tokens.untyped_storage().resize_(0) + + if ctx.use_gmm: + if get_args().gemm_gradient_accumulation_fusion: + npu_groupmatmul_add_fp32(mm1_inputs, act_inputs.grad, group_list, original_weight1.main_grad) + if hasattr(original_weight1, 'grad_added_to_main_grad'): + if getattr(weights1, 'zero_out_wgrad', False): + mm1_weights_grad = torch.zeros( + weights1.transpose(-1, -2).shape, + dtype=mm1_inputs.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + mm1_weights_grad = torch.empty( + weights1.transpose(-1, -2).shape, + dtype=mm1_inputs.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + original_weight1.grad_added_to_main_grad = True + else: + mm1_weights_grad = None + else: + mm1_weights_grad = gmm_op(mm1_inputs.t(), act_inputs.grad, [], group_list, 2)[0] + else: + mm1_weights_grad = torch.matmul(mm1_inputs.t(), act_inputs.grad) + act_inputs.grad.untyped_storage().resize_(0) + if moe_experts_pipeline_degree: + return None, mm1_weights_grad, grad_weights2, None, None + else: + return mm1_inputs_grad, mm1_weights_grad, grad_weights2, None, None + + +def grouped_mlp_with_comp_and_comm_overlap_all2all(inputs, weights1, weights2, args, ctx): + return GroupedMlpWithCompAndCommOverlapAll2All.apply(inputs, weights1, weights2, args, ctx) diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/grouped_mlp_with_comp_and_comm_overlap_allgather.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/grouped_mlp_with_comp_and_comm_overlap_allgather.py new file mode 100644 index 000000000..42b5a2e79 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/grouped_mlp_with_comp_and_comm_overlap_allgather.py @@ -0,0 +1,187 @@ +# Copyright (c) 2024, Huawei Technologies. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import acl +from einops import rearrange +from megatron.core.parallel_state import get_expert_model_parallel_group, get_tensor_and_expert_parallel_group, get_tensor_and_expert_parallel_world_size, get_expert_model_parallel_world_size +from megatron.training import get_args +from mindspeed.ops.gmm import GMMFunction +from mindspeed.model.transformer import should_recompute_activation +from mindspeed.core.transformer.moe.moe_utils import (get_gemm_backward_need_tensors, get_ag_tp_hidden_status, + set_rs_global_hidden_states_grad_with_handle) +from mindspeed.core.transformer.moe.moe_utils import forward_func, backward_func +from mindspeed.core.transformer.moe.comm_utils import async_all_gather, async_reduce_scatter +from mindspeed.core.transformer.moe.token_dispatcher import cann_version_check +from mindspeed.ops.npu_groupmatmul_add import npu_groupmatmul_add_fp32 +from .moe_layer_overlap_all2all import gmm_op + + +class GroupedMlpWithCompAndCommOverlapAllGather(torch.autograd.Function): + @staticmethod + def forward(ctx, inputs, weights1, weights2, args): + original_weight1, original_weight2, activation_func, group_list, layer_number = args + use_gmm = (inputs.nelement() != 0) + ctx.use_gmm = use_gmm + if use_gmm: + mm1_out = gmm_op(inputs, weights1, [], group_list, 0)[0] + else: + mm1_out = torch.matmul(inputs, weights1) + inputs.untyped_storage().resize_(0) + act_out, detached_act_inputs = forward_func(activation_func, mm1_out) + if use_gmm: + mm2_out = gmm_op(act_out, weights2, [], group_list, 0)[0] + else: + mm2_out = torch.matmul(act_out, weights2) + if should_recompute_activation(layer_number): + act_out.untyped_storage().resize_(0) + ctx.activation_func = activation_func + ctx.layer_number = layer_number + ctx.save_for_backward(detached_act_inputs, act_out, weights1, weights2, original_weight1, original_weight2, group_list) + return mm2_out, None + + @staticmethod + def backward(ctx, *grad_outs): + grad_outs = grad_outs[0] + layer_number = ctx.layer_number + act_inputs, act_graph, weights1, weights2, original_weight1, original_weight2, group_list = ctx.saved_tensors + token_unpermutation_graph, global_hidden_states_detach, indices, global_local_map = get_gemm_backward_need_tensors() + + # grad of mm2 + if ctx.use_gmm: + weights2 = rearrange(weights2, 'n h f -> n f h') + grad_mm2_inputs = gmm_op(grad_outs, weights2, [], group_list, 0)[0] + else: + grad_mm2_inputs = torch.matmul(grad_outs, weights2.t()) + if should_recompute_activation(layer_number): + activation_func = ctx.activation_func + act_out = activation_func(act_inputs) + mm2_inputs = act_out + else: + mm2_inputs = act_graph + + if ctx.use_gmm: + if get_args().gemm_gradient_accumulation_fusion: + + npu_groupmatmul_add_fp32(mm2_inputs, grad_outs, group_list, original_weight2.main_grad) + + if hasattr(original_weight2, 'grad_added_to_main_grad'): + if getattr(weights2, 'zero_out_wgrad', False): + grad_weights2 = torch.zeros( + weights2.transpose(-1, -2).shape, + dtype=mm2_inputs.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weights2 = torch.empty( + weights2.transpose(-1, -2).shape, + dtype=mm2_inputs.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + original_weight2.grad_added_to_main_grad = True + else: + grad_weights2 = None + else: + grad_weights2 = gmm_op(mm2_inputs.t(), grad_outs, [], group_list, 2)[0] + else: + grad_weights2 = torch.matmul(mm2_inputs.t(), grad_outs) + + grad_outs.untyped_storage().resize_(0) + mm2_inputs.untyped_storage().resize_(0) + + # grad of activation_func + act_graph.backward(grad_mm2_inputs) + grad_mm2_inputs.untyped_storage().resize_(0) + act_inputs.untyped_storage().resize_(0) + mm1_outs_grad = act_inputs.grad + + # re-gather mm1 forward inputs + ag_inputs_tp = get_ag_tp_hidden_status() + ag_inputs_tp = ag_inputs_tp.view(-1, ag_inputs_tp.shape[-1]) + ag_group = get_expert_model_parallel_group() + if '910B' in acl.get_soc_name() or not get_args().n_shared_experts: + ag_group = get_tensor_and_expert_parallel_group() + _, ag_inputs_tp_ep, ag_handle = async_all_gather(ag_inputs_tp, ag_group) + if ctx.use_gmm: + # grad of mm1-inputs + weights1 = rearrange(weights1, 'n h f -> n f h') + mm1_inputs_grad = gmm_op(act_inputs.grad, weights1, [], group_list, 0)[0] + else: + mm1_inputs_grad = torch.matmul(act_inputs.grad, weights1.t()) + + # token 反重排的反向 + backward_func(token_unpermutation_graph, mm1_inputs_grad) + mm1_inputs_grad.untyped_storage().resize_(0) + _, rs_global_hidden_states_grad, rs_handle = async_reduce_scatter(global_hidden_states_detach.grad, + get_tensor_and_expert_parallel_group()) + rs_global_hidden_states_grad_with_handle = (rs_global_hidden_states_grad, rs_handle) + ag_handle.wait() + + # token 重排计算 + global_args = get_args() + num_local_experts = global_args.num_experts // get_expert_model_parallel_world_size() + if global_args.moe_tp_extend_ep: + num_local_experts = global_args.num_experts // get_tensor_and_expert_parallel_world_size() + if cann_version_check: + mm1_inputs = ag_inputs_tp_ep[global_local_map, :] + if num_local_experts > 1: + mm1_inputs = mm1_inputs[indices, :] + else: + mm1_inputs = torch.gather(ag_inputs_tp_ep, 0, global_local_map) + if num_local_experts > 1: + mm1_inputs = torch.gather(mm1_inputs, 0, indices) + + global_local_map.untyped_storage().resize_(0) + indices.untyped_storage().resize_(0) + ag_inputs_tp_ep.untyped_storage().resize_(0) + + if ctx.use_gmm: + if get_args().gemm_gradient_accumulation_fusion: + + npu_groupmatmul_add_fp32(mm1_inputs, act_inputs.grad, group_list, original_weight1.main_grad) + + if hasattr(original_weight1, 'grad_added_to_main_grad'): + if getattr(weights1, 'zero_out_wgrad', False): + mm1_weights_grad = torch.zeros( + weights1.transpose(-1, -2).shape, + dtype=mm1_inputs.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + mm1_weights_grad = torch.empty( + weights1.transpose(-1, -2).shape, + dtype=mm1_inputs.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + original_weight1.grad_added_to_main_grad = True + else: + mm1_weights_grad = None + else: + mm1_weights_grad = gmm_op(mm1_inputs.t(), act_inputs.grad, [], group_list, 2)[0] + else: + mm1_weights_grad = torch.matmul(mm1_inputs.t(), act_inputs.grad) + + mm1_outs_grad.untyped_storage().resize_(0) + + set_rs_global_hidden_states_grad_with_handle(rs_global_hidden_states_grad_with_handle) + return mm1_inputs_grad, mm1_weights_grad, grad_weights2, None + + +def grouped_mlp_with_comp_and_comm_overlap_allgather(inputs, weights1, weights2, args): + return GroupedMlpWithCompAndCommOverlapAllGather.apply(inputs, weights1, weights2, args) diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/layers.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/layers.py new file mode 100644 index 000000000..7ca1f8b34 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/layers.py @@ -0,0 +1,638 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + +import os +import warnings +from typing import Any, Callable, List, Optional + +import acl +import torch +import torch_npu +import torch.distributed +from torch.cuda.amp import custom_bwd, custom_fwd +from torch.nn.parameter import Parameter +from megatron.core.tensor_parallel.layers import ( + _initialize_affine_weight_cpu, + _initialize_affine_weight_gpu, + linear_with_grad_accumulation_and_async_allreduce, + linear_with_frozen_weight +) +from megatron.core.tensor_parallel.mappings import ( + copy_to_tensor_model_parallel_region, + gather_from_tensor_model_parallel_region, + reduce_from_tensor_model_parallel_region, + reduce_scatter_to_sequence_parallel_region, + scatter_to_tensor_model_parallel_region, + _reduce_scatter_along_first_dim, + _gather_along_first_dim +) +from megatron.core.tensor_parallel.utils import VocabUtility, divide, split_tensor_along_last_dim +from megatron.core.utils import ( + make_tp_sharded_tensor_for_checkpoint, + prepare_input_tensors_for_wgrad_compute +) +from megatron.core.transformer.utils import make_sharded_tensors_for_checkpoint +from megatron.core.model_parallel_config import ModelParallelConfig +from megatron.core.parallel_state import ( + get_global_memory_buffer, + get_tensor_model_parallel_group, + get_tensor_model_parallel_rank, + get_tensor_model_parallel_world_size, +) + + +class LinearWithGradAccumulationAndAsyncCommunication(torch.autograd.Function): + """See linear_with_grad_accumulation_and_async_allreduce""" + + @staticmethod + @custom_fwd + def forward( + ctx, + input, + weight, + bias, + gradient_accumulation_fusion, + async_grad_allreduce, + sequence_parallel, + grad_output_buffer, + shared_expert, + ): + ctx.save_for_backward(input, weight) + ctx.use_bias = bias is not None + ctx.gradient_accumulation_fusion = gradient_accumulation_fusion + ctx.async_grad_allreduce = async_grad_allreduce + ctx.sequence_parallel = sequence_parallel + ctx.grad_output_buffer = grad_output_buffer + ctx.shared_expert = shared_expert + ctx.need_save = True + if sequence_parallel: + if shared_expert: + from mindspeed.core.transformer.moe.moe_utils import AG_SHARED_EXPERTS_INPUTS + ag_shared_experts_inputs = AG_SHARED_EXPERTS_INPUTS.pop(0) + if isinstance(ag_shared_experts_inputs, tuple): + ag_shared_experts_inputs, handle = ag_shared_experts_inputs + handle.wait() + ctx.need_save = False + total_input = ag_shared_experts_inputs + else: + world_size = get_tensor_model_parallel_world_size() + dim_size = list(input.size()) + dim_size[0] = dim_size[0] * world_size + + all_gather_buffer = get_global_memory_buffer().get_tensor(dim_size, input.dtype, "mpu") + torch.distributed._all_gather_base( + all_gather_buffer, input, group=get_tensor_model_parallel_group() + ) + total_input = all_gather_buffer + else: + total_input = input + + output = torch.matmul(total_input, weight.t()) + + if bias is not None: + output = output + bias + return output + + @staticmethod + @custom_bwd + def backward(ctx, grad_output): + input, weight = ctx.saved_tensors + use_bias = ctx.use_bias + grad_output_buffer = ctx.grad_output_buffer + + wgrad_compute = True + if grad_output_buffer is not None: + grad_output_buffer.append(grad_output) + wgrad_compute = False + + if wgrad_compute: + from mindspeed.core.transformer.moe.moe_utils import set_ag_tp_hidden_status + if ctx.sequence_parallel: + world_size = get_tensor_model_parallel_world_size() + dim_size = list(input.size()) + dim_size[0] = dim_size[0] * world_size + + all_gather_buffer = get_global_memory_buffer().get_tensor( + dim_size, input.dtype, "mpu" + ) + handle = torch.distributed._all_gather_base( + all_gather_buffer, input, group=get_tensor_model_parallel_group(), async_op=True + ) + + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # gather is scheduled before the input gradient computation + total_input = all_gather_buffer + else: + total_input = input + if ctx.need_save and '910B' not in acl.get_soc_name(): + set_ag_tp_hidden_status(total_input) + grad_input = grad_output.matmul(weight) + + if ctx.sequence_parallel and wgrad_compute: + handle.wait() + + if wgrad_compute: + grad_output, total_input = prepare_input_tensors_for_wgrad_compute( + grad_output, total_input + ) + + if ctx.async_grad_allreduce: + # Asynchronous all-reduce + handle = torch.distributed.all_reduce( + grad_input, group=get_tensor_model_parallel_group(), async_op=True + ) + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # all-reduce is scheduled before the weight gradient computation + + if ctx.sequence_parallel: + assert not ctx.async_grad_allreduce + dim_size = list(input.size()) + sub_grad_input = torch.empty( + dim_size, dtype=input.dtype, device=torch.cuda.current_device(), requires_grad=False + ) + # reduce_scatter + handle = torch.distributed._reduce_scatter_base( + sub_grad_input, grad_input, group=get_tensor_model_parallel_group(), async_op=True + ) + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # reduce scatter is scheduled before the weight gradient computation + + if ctx.gradient_accumulation_fusion: + if wgrad_compute: + import fused_weight_gradient_mlp_cuda + if weight.main_grad.dtype == torch.float32: + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp32( + total_input, grad_output, weight.main_grad + ) + elif weight.main_grad.dtype in (torch.float16, torch.bfloat16): + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp16( + total_input, grad_output, weight.main_grad + ) + else: + raise RuntimeError("Unsupported gradient type for gradient accumulation fusion") + + if hasattr(weight, 'grad_added_to_main_grad'): + # When overlap_grad_reduce is True, need to ensure that backward hooks + # are all run on the main backprop thread to prevent deadlocks. Setup + # dummy grad_weight tensor to prevent backward hooks from being run + # in a background thread. + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.main_grad.shape, + dtype=input.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + weight.grad_added_to_main_grad = True + else: + grad_weight = None + else: + grad_weight = grad_output.t().matmul(total_input) + grad_bias = grad_output.sum(dim=0) if use_bias else None + + if ctx.sequence_parallel: + handle.wait() + # Need to return None's as gradient has to flow for all the input arguments + # provided during forward + return sub_grad_input, grad_weight, grad_bias, None, None, None, None, None + + if ctx.async_grad_allreduce: + handle.wait() + + return grad_input, grad_weight, grad_bias, None, None, None, None, None + + +def linear_with_grad_accumulation_and_async_allreduce( + input: torch.Tensor, + weight: torch.Tensor, + bias: Optional[torch.Tensor], + gradient_accumulation_fusion: bool, + async_grad_allreduce: bool, + sequence_parallel: bool, + grad_output_buffer: Optional[List[torch.Tensor]] = None, + shared_expert: bool = False +) -> torch.Tensor: + args = [ + input, + weight, + bias, + gradient_accumulation_fusion, + async_grad_allreduce, + sequence_parallel, + grad_output_buffer, + shared_expert, + ] + + if not linear_with_grad_accumulation_and_async_allreduce.warned: + if os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS') != "1": + if sequence_parallel: + warnings.warn( + "When using sequence parallelism it is recommended to set the " + "environment variable CUDA_DEVICE_MAX_CONNECTIONS to 1 for " + "maximum speedup" + ) + linear_with_grad_accumulation_and_async_allreduce.warned = True + + if async_grad_allreduce: + warnings.warn( + "When using async grad allreduce it is recommended to set the " + "environment variable CUDA_DEVICE_MAX_CONNECTIONS to 1 for " + "maximum speedup" + ) + linear_with_grad_accumulation_and_async_allreduce.warned = True + + return LinearWithGradAccumulationAndAsyncCommunication.apply(*args) + + +linear_with_grad_accumulation_and_async_allreduce.warned = False + + +class ColumnParallelLinear(torch.nn.Module): + + def __init__( + self, + input_size, + output_size, + *, + config: ModelParallelConfig, + init_method: Callable, + bias=True, + gather_output=False, + stride=1, + keep_master_weight_for_test=False, + skip_bias_add=False, + skip_weight_param_allocation: bool = False, + embedding_activation_buffer: Optional[List[torch.Tensor]] = None, + grad_output_buffer: Optional[List[torch.Tensor]] = None, + is_expert: bool = False, + tp_comm_buffer_name: str = None, # Not used + shared_expert: bool = False + ): + super(ColumnParallelLinear, self).__init__() + + # Keep input parameters + self.input_size = input_size + self.output_size = output_size + self.gather_output = gather_output + # Divide the weight matrix along the last dimension. + world_size = get_tensor_model_parallel_world_size() + self.output_size_per_partition = divide(output_size, world_size) + self.skip_bias_add = skip_bias_add + self.is_expert = is_expert + self.expert_parallel = config.expert_model_parallel_size > 1 + self.embedding_activation_buffer = embedding_activation_buffer + self.grad_output_buffer = grad_output_buffer + self.config = config + self.shared_expert = shared_expert + + # Parameters. + # Note: torch.nn.functional.linear performs XA^T + b and as a result + # we allocate the transpose. + # Initialize weight. + if not skip_weight_param_allocation: + if config.use_cpu_initialization: + self.weight = Parameter( + torch.empty( + self.output_size_per_partition, self.input_size, dtype=config.params_dtype + ) + ) + if config.perform_initialization: + self.master_weight = _initialize_affine_weight_cpu( + self.weight, + self.output_size, + self.input_size, + self.output_size_per_partition, + 0, + init_method, + stride=stride, + return_master_weight=keep_master_weight_for_test, + ) + else: + self.weight = Parameter( + torch.empty( + self.output_size_per_partition, + self.input_size, + device=torch.cuda.current_device(), + dtype=config.params_dtype, + ) + ) + if config.perform_initialization: + _initialize_affine_weight_gpu( + self.weight, + init_method, + partition_dim=0, + stride=stride, + expert_parallel=(self.is_expert and self.expert_parallel), + ) + + setattr(self.weight, 'allreduce', not (self.is_expert and self.expert_parallel)) + else: + self.weight = None + + self.register_parameter('bias', None) + + self.async_tensor_model_parallel_allreduce = ( + config.async_tensor_model_parallel_allreduce and world_size > 1 + ) + + self.sequence_parallel = config.sequence_parallel + if self.sequence_parallel and world_size <= 1: + self.sequence_parallel = False + + self.gradient_accumulation_fusion = config.gradient_accumulation_fusion + + if self.async_tensor_model_parallel_allreduce and self.sequence_parallel: + raise RuntimeError( + "`async_tensor_model_parallel_allreduce` and `sequence_parallel` " + "cannot be enabled at the same time." + ) + + self._forward_impl = linear_with_grad_accumulation_and_async_allreduce + self.explicit_expert_comm = self.is_expert and ( + self.sequence_parallel or self.expert_parallel + ) + + # Hook adding a default empty _extra_state for state dict + self._register_load_state_dict_pre_hook( + lambda state_dict, prefix, *args, **kwargs: state_dict.setdefault( + f'{prefix}_extra_state' + ) + ) + + def forward(self, input_: torch.Tensor, weight: Optional[torch.Tensor] = None): + """Forward of ColumnParallelLinear + + Args: + input_: 3D tensor whose order of dimension is [sequence, batch, hidden] + + weight (optional): weight tensor to use, compulsory when + skip_weight_param_allocation is True. + + Returns: + - output + - bias + + """ + if weight is None: + if self.weight is None: + raise RuntimeError( + "weight was not supplied to ColumnParallelLinear forward pass " + "and skip_weight_param_allocation is True." + ) + weight = self.weight + else: + # Check the weight passed in is the correct shape + expected_shape = (self.output_size_per_partition, self.input_size) + if weight.shape != expected_shape: + raise RuntimeError( + f"supplied weight's shape is {tuple(weight.shape)}, " + f"not {expected_shape} as expected" + ) + + if self.config._cpu_offloading_context is not None: + if self.config._cpu_offloading_context.inside_context == True: + assert ( + self.config.cpu_offloading == False + ), "CPU Offloading cannot be enabled while using non-TE modules" + + bias = self.bias if not self.skip_bias_add else None + + if ( + self.async_tensor_model_parallel_allreduce + or self.sequence_parallel + or self.explicit_expert_comm + ): + input_parallel = input_ + else: + input_parallel = copy_to_tensor_model_parallel_region(input_) + + if self.config.defer_embedding_wgrad_compute: + self.embedding_activation_buffer.append(input_parallel) + + # Matrix multiply. + if not weight.requires_grad: + self._forward_impl = linear_with_frozen_weight + else: + self._forward_impl = linear_with_grad_accumulation_and_async_allreduce + + output_parallel = self._forward_impl( + input=input_parallel, + weight=weight, + bias=bias, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + async_grad_allreduce=False + if self.explicit_expert_comm + else self.async_tensor_model_parallel_allreduce, + sequence_parallel=False if self.explicit_expert_comm else self.sequence_parallel, + grad_output_buffer=self.grad_output_buffer + if self.config.defer_embedding_wgrad_compute + else None, + shared_expert=self.shared_expert + ) + if self.gather_output: + # All-gather across the partitions. + assert not self.sequence_parallel + output = gather_from_tensor_model_parallel_region(output_parallel) + else: + output = output_parallel + output_bias = self.bias if self.skip_bias_add else None + return output, output_bias + + def sharded_state_dict(self, prefix='', sharded_offsets=(), metadata=None): + """ Sharding along axis 0, bias sharded """ + state_dict = self.state_dict(prefix='', keep_vars=True) + return make_sharded_tensors_for_checkpoint( + state_dict, prefix, {'weight': 0, 'bias': 0}, sharded_offsets + ) + + def set_extra_state(self, state: Any): + """ Extra state is ignored """ + + def get_extra_state(self) -> None: + """ Keep compatibility with TE state dict. """ + return None + + +class RowParallelLinear(torch.nn.Module): + def __init__( + self, + input_size: int, + output_size: int, + *, + config: ModelParallelConfig, + init_method: Callable, + bias: bool, + input_is_parallel: bool, + skip_bias_add: bool, + stride: int = 1, + keep_master_weight_for_test: bool = False, + is_expert: bool = False, + tp_comm_buffer_name: str = None, # Not used + shared_expert: bool = False + ): + super(RowParallelLinear, self).__init__() + + # Keep input parameters + self.input_size = input_size + self.output_size = output_size + self.input_is_parallel = input_is_parallel + # Divide the weight matrix along the last dimension. + world_size = get_tensor_model_parallel_world_size() + self.input_size_per_partition = divide(input_size, world_size) + self.skip_bias_add = skip_bias_add + self.config = config + self.is_expert = is_expert + self.expert_parallel = config.expert_model_parallel_size > 1 + self.gradient_accumulation_fusion = config.gradient_accumulation_fusion + self.sequence_parallel = config.sequence_parallel + self.shared_expert = shared_expert + if self.sequence_parallel and not self.input_is_parallel: + raise RuntimeError("To enable `sequence_parallel`, `input_is_parallel` must be `True`") + + # Parameters. + # Note: torch.nn.functional.linear performs XA^T + b and as a result + # we allocate the transpose. + # Initialize weight. + if config.use_cpu_initialization: + self.weight = Parameter( + torch.empty( + self.output_size, self.input_size_per_partition, dtype=config.params_dtype + ) + ) + if config.perform_initialization: + self.master_weight = _initialize_affine_weight_cpu( + self.weight, + self.output_size, + self.input_size, + self.input_size_per_partition, + 1, + init_method, + stride=stride, + return_master_weight=keep_master_weight_for_test, + params_dtype=config.params_dtype, + ) + else: + self.weight = Parameter( + torch.empty( + self.output_size, + self.input_size_per_partition, + device=torch.cuda.current_device(), + dtype=config.params_dtype, + ) + ) + if config.perform_initialization: + _initialize_affine_weight_gpu( + self.weight, + init_method, + partition_dim=1, + stride=stride, + expert_parallel=(self.is_expert and self.expert_parallel), + ) + setattr(self.weight, 'allreduce', not (self.is_expert and self.expert_parallel)) + + if bias: + if config.use_cpu_initialization: + self.bias = Parameter(torch.empty(self.output_size, dtype=config.params_dtype)) + else: + self.bias = Parameter( + torch.empty( + self.output_size, + device=torch.cuda.current_device(), + dtype=config.params_dtype, + ) + ) + + if config.perform_initialization: + # Always initialize bias to zero. + with torch.no_grad(): + self.bias.zero_() + setattr(self.bias, 'allreduce', not (self.is_expert and self.expert_parallel)) + setattr(self.bias, 'sequence_parallel', self.sequence_parallel) + else: + self.register_parameter('bias', None) + + self._forward_impl = linear_with_grad_accumulation_and_async_allreduce + self.explicit_expert_comm = self.is_expert and ( + self.sequence_parallel or self.expert_parallel + ) + + # Hook adding a default empty _extra_state for state dict + self._register_load_state_dict_pre_hook( + lambda state_dict, prefix, *args, **kwargs: state_dict.setdefault( + f'{prefix}_extra_state' + ) + ) + + def forward(self, input_): + """Forward of RowParallelLinear + + Args: + input_: 3D tensor whose order of dimension is [sequence, batch, hidden] + + Returns: + - output + - bias + """ + + if self.config._cpu_offloading_context is not None: + if self.config._cpu_offloading_context.inside_context == True: + assert ( + self.config.cpu_offloading == False + ), "CPU Offloading cannot be enabled while using non-TE modules" + + # Set up backprop all-reduce. + if self.input_is_parallel: + input_parallel = input_ + else: + assert not self.sequence_parallel + input_parallel = scatter_to_tensor_model_parallel_region(input_) + # Matrix multiply. + if not self.weight.requires_grad: + self._forward_impl = linear_with_frozen_weight + else: + self._forward_impl = linear_with_grad_accumulation_and_async_allreduce + output_parallel = self._forward_impl( + input=input_parallel, + weight=self.weight, + bias=None, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + async_grad_allreduce=False, + sequence_parallel=False, + ) + + # All-reduce across all the partitions. + if self.explicit_expert_comm or self.shared_expert: + assert self.skip_bias_add + output_ = output_parallel + elif self.sequence_parallel: + output_ = reduce_scatter_to_sequence_parallel_region(output_parallel) + else: + output_ = reduce_from_tensor_model_parallel_region(output_parallel) + if not self.skip_bias_add: + output = (output_ + self.bias) if self.bias is not None else output_ + output_bias = None + else: + output = output_ + output_bias = self.bias + return output, output_bias + + def sharded_state_dict(self, prefix='', sharded_offsets=(), metadata=None): + """ Sharding along axis 1, bias not sharded """ + state_dict = self.state_dict(prefix='', keep_vars=True) + return make_sharded_tensors_for_checkpoint( + state_dict, prefix, {'weight': 1}, sharded_offsets + ) + + def set_extra_state(self, state: Any): + """ Extra state is ignored """ + + def get_extra_state(self) -> None: + """ Keep compatibility with TE state dict. """ + return None diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/moe_layer.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/moe_layer.py new file mode 100644 index 000000000..6ac9b45a0 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/moe_layer.py @@ -0,0 +1,143 @@ +# Copyright (c) 2024; NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import types +import copy +from copy import deepcopy +from functools import wraps +import torch +from megatron.training import get_args +from megatron.core import parallel_state, tensor_parallel +from megatron.core.transformer.mlp import MLPSubmodules, MLP +from megatron.core.transformer.moe.moe_layer import MoELayer +from megatron.core.transformer.moe.router import TopKRouter +from megatron.core.transformer.moe.experts import GroupedMLP, SequentialMLP +from megatron.core.transformer.moe.token_dispatcher import ( + MoEAllGatherTokenDispatcher, + MoEAlltoAllTokenDispatcher, +) +from mindspeed.core.transformer.moe.moe_layer_overlap_all2all import MoELayerOverlapAll2All +from mindspeed.core.transformer.moe.moe_layer_overlap_allgather import MoELayerOverlapAllGather + + +def base_moe_init_wrapper(init_func): + @wraps(init_func) + def base_moe_init(*args, **kwargs): + init_func(*args, **kwargs) + self = args[0] + global_args = get_args() + if global_args.moe_tp_extend_ep: + tp_size = parallel_state.get_tensor_model_parallel_world_size() + assert self.config.num_moe_experts % (self.expert_parallel_size * tp_size) == 0 + self.num_local_experts = self.config.num_moe_experts // self.expert_parallel_size // tp_size + local_expert_indices_offset = ( + parallel_state.get_expert_model_parallel_rank() * self.num_local_experts * tp_size + \ + parallel_state.get_tensor_model_parallel_rank() * self.num_local_experts + ) + self.local_expert_indices = [ + local_expert_indices_offset + i for i in range(self.num_local_experts) + ] + assert all(map(lambda x: x < self.config.num_moe_experts, self.local_expert_indices)) + + return base_moe_init + + +def moe_layer_init(self, config, submodules=None, layer_number=None): + self.submodules = submodules + super(MoELayer, self).__init__(config=config, layer_number=layer_number) + self.router = TopKRouter(config=self.config) + moe_experts_pipeline_degree = get_args().moe_experts_pipeline_degree + if self.config.moe_grouped_gemm: + if moe_experts_pipeline_degree == 0: + self.experts = GroupedMLP(self.num_local_experts, self.config) + else: + expert = GroupedMLP(self.num_local_experts // moe_experts_pipeline_degree, self.config) + self.experts = torch.nn.ModuleList([copy.deepcopy(expert) for i in range(moe_experts_pipeline_degree)]) + else: + if not isinstance(self.submodules, MLPSubmodules): + raise TypeError("submodules should be instance of MLPSubmodules") + self.experts = SequentialMLP(self.num_local_experts, self.config, self.submodules) + if config.moe_token_dispatcher_type == "allgather": + self.token_dispatcher = MoEAllGatherTokenDispatcher( + self.num_local_experts, self.local_expert_indices, config=self.config + ) + elif config.moe_token_dispatcher_type == "alltoall": + self.token_dispatcher = MoEAlltoAllTokenDispatcher( + self.num_local_experts, self.local_expert_indices, config=self.config + ) + else: + raise ValueError( + f"Unsupported token dispatcher type: {config.moe_token_dispatcher_type}" + ) + + return moe_layer_init + + +def moe_layer_init_wrapper(init_func): + @wraps(init_func) + def wrapper(*args, **kwargs): + init_func(*args, **kwargs) + self = args[0] + global_args = get_args() + self.moe_alltoall_overlap_comm = global_args.moe_alltoall_overlap_comm + self.moe_allgather_overlap_comm = global_args.moe_allgather_overlap_comm + + if global_args.n_shared_experts: + config = deepcopy(self.config) + config.ffn_hidden_size = global_args.n_shared_experts * self.config.ffn_hidden_size + if self.moe_allgather_overlap_comm or self.moe_alltoall_overlap_comm: + from mindspeed.core.transformer.moe.layers import ColumnParallelLinear, RowParallelLinear + self.shared_experts = MLP(config, MLPSubmodules(linear_fc1=ColumnParallelLinear, + linear_fc2=RowParallelLinear,), + shared_expert=True) + else: + from megatron.core.tensor_parallel import ColumnParallelLinear, RowParallelLinear + self.shared_experts = MLP(config, MLPSubmodules(linear_fc1=ColumnParallelLinear, + linear_fc2=RowParallelLinear,)) + + self.moe_adaptive_recompute_activation = global_args.moe_adaptive_recompute_activation + self.recompute_threshold = 0 + if hasattr(self.config, 'moe_token_dispatcher_type') and self.config.moe_token_dispatcher_type == 'allgather': + self.moe_adaptive_recompute_activation_scale = global_args.moe_adaptive_recompute_activation_scale + self.recompute_threshold = parallel_state.get_tensor_model_parallel_world_size() * parallel_state.get_data_parallel_world_size() * \ + self.config.moe_router_topk * self.moe_adaptive_recompute_activation_scale / self.config.num_moe_experts + self.token_dispatcher.all_tokens_per_expert = None + self.forward = types.MethodType(moe_adaptive_forward, self) + + return wrapper + + +def moe_adaptive_forward(self, hidden_states: torch.Tensor): + if self.moe_alltoall_overlap_comm: + return MoELayerOverlapAll2All.apply(hidden_states, self) + if self.moe_allgather_overlap_comm: + return MoELayerOverlapAllGather.apply(hidden_states, self) + + def custom_forward(hidden_states): + args = get_args() + scores, indices = self.router(hidden_states) + if args.n_shared_experts: + if not hasattr(self, 'comm_stream'): + self.comm_stream = torch.cuda.Stream() + self.comm_stream.wait_stream(torch.cuda.current_stream()) + with torch.cuda.stream(self.comm_stream): + share_experts_output, share_experts_bias = self.shared_experts(hidden_states) + (dispatched_input, tokens_per_expert) = self.token_dispatcher.token_permutation( + hidden_states, scores, indices + ) + expert_output, mlp_bias = self.experts(dispatched_input, tokens_per_expert) + output, mlp_bias = self.token_dispatcher.token_unpermutation(expert_output, mlp_bias) + if args.n_shared_experts: + torch.cuda.current_stream().wait_stream(self.comm_stream) + output = output + share_experts_output + if self.token_dispatcher.add_bias: + mlp_bias = mlp_bias + share_experts_bias + return output, mlp_bias + + threshold = hidden_states.shape[0] * hidden_states.shape[1] * self.recompute_threshold + moe_adaptive_recompute_activation_bool = self.moe_adaptive_recompute_activation and \ + (self.token_dispatcher.all_tokens_per_expert is None or torch.max(self.token_dispatcher.all_tokens_per_expert) > threshold) + if self.moe_layer_recompute or moe_adaptive_recompute_activation_bool: + output, mlp_bias = tensor_parallel.checkpoint(custom_forward, False, hidden_states) + else: + output, mlp_bias = custom_forward(hidden_states) + return output, mlp_bias diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/moe_layer_overlap_all2all.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/moe_layer_overlap_all2all.py new file mode 100644 index 000000000..cf446323e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/moe_layer_overlap_all2all.py @@ -0,0 +1,605 @@ +# Copyright (c) 2024, Huawei Technologies. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + +from megatron.core.parallel_state import get_tensor_model_parallel_group, get_tensor_model_parallel_world_size +from megatron.core import tensor_parallel, parallel_state +from megatron.core.transformer.moe.moe_layer import MoELayer +from megatron.training import get_args +from megatron.core.transformer.moe.moe_utils import permute, save_to_aux_losses_tracker +from mindspeed.moe.utils import MoEAuxLossAutoScaler +from mindspeed.core.transformer.moe.comm_utils import (async_all_to_all, async_all_gather, async_reduce_scatter, + transfer_tensor_last_dim_to_first, transfer_tensor_first_dim_to_last) +from mindspeed.core.transformer.moe.moe_utils import (forward_func, backward_func, permute_with_ep) +from mindspeed.ops.gmm import GMMFunction +from mindspeed.core.transformer.moe.moe_utils import (AG_SHARED_EXPERTS_INPUTS, only_recompute_activation, + set_gemm_backward_need_tensors, + set_all2all_experts_output, get_all2all_experts_output, + get_prob_backward_need_tensors, + set_permute_with_ep_local_input_tokens) + + +def gmm_op(x, weight, bias, group_list, group_type): + if isinstance(group_list, torch.Tensor) and group_list.device.type == 'cpu': + group_list = group_list.tolist() + return GMMFunction.builder.load().npu_gmm([x], [weight], bias, group_list, group_type, 0) + + +def moe_experts_pipeline_forward_func(tokens_per_expert, moe_layer, dispatched_input, ctx, save_tensors): + input_list = [] + expert_graphs = [] + expert_outputs = [] + tokens_per_expert_list = [] + moe_experts_pipeline_degree = ctx.moe_experts_pipeline_degree + + # 1. 划分子集 + # 赋值self.input_list和self.tokens_per_expert_list + tokens_per_expert = tokens_per_expert.cpu() + group_list = torch.cumsum(tokens_per_expert, dim=0) + num_experts_overlap = moe_layer.num_local_experts // moe_experts_pipeline_degree + + for i in range(moe_experts_pipeline_degree): + start_id = i * num_experts_overlap + start = 0 + if i != 0: + start = group_list[start_id - 1] + end_id = (i + 1) * num_experts_overlap + end = group_list[end_id - 1] + input_i = dispatched_input[start : end] + tokens_per_expert_i = tokens_per_expert[start_id : end_id] + input_list.append(input_i) + tokens_per_expert_list.append(tokens_per_expert_i) + ctx.input_list = input_list + + # 2. 对每个专家子集的输入数据进行模型计算,并将计算结果保存在expert_outputs中 + ag_handle_i_next = None + rs_handle_i = None + input_i_next = None + num_dim = None + rs_input_i = None + + for i in range(moe_experts_pipeline_degree): + if i == 0: + _, input_i, ag_handle_i = async_all_gather(input_list[i], get_tensor_model_parallel_group(), last_dim=True) + _, input_i_next, ag_handle_i_next = async_all_gather(input_list[i + 1], get_tensor_model_parallel_group(), last_dim=True) + elif i != (moe_experts_pipeline_degree - 1): + input_i = input_i_next + ag_handle_i = ag_handle_i_next + _, input_i_next, ag_handle_i_next = async_all_gather(input_list[i + 1], get_tensor_model_parallel_group(), + last_dim=True) + else: + input_i = input_i_next + ag_handle_i = ag_handle_i_next + + ag_handle_i.wait() + input_i = torch.cat(input_i, dim=input_list[i].dim() - 1).contiguous() + input_i = input_i.detach() + input_i.requires_grad = True + (expert_output, mlp_bias), *_ = forward_func(moe_layer.experts[i], (input_i, tokens_per_expert_list[i], ctx)) + if rs_handle_i is not None: + rs_handle_i.wait() + rs_input_i.untyped_storage().resize_(0) + expert_graphs[i - 1].untyped_storage().resize_(0) + expert_outputs[i - 1] = transfer_tensor_first_dim_to_last(expert_outputs[i - 1], num_dim) + expert_outputs[i - 1].requires_grad = True + # sub expert graph + expert_graphs.append(expert_output) + + expert_output, num_dim = transfer_tensor_last_dim_to_first(expert_output) + rs_input_i, rs_expert_output, rs_handle_i = async_reduce_scatter(expert_output, get_tensor_model_parallel_group()) + + expert_outputs.append(rs_expert_output) + + if i == (moe_experts_pipeline_degree - 1): + rs_handle_i.wait() + rs_input_i.untyped_storage().resize_(0) + expert_graphs[i].untyped_storage().resize_(0) + expert_outputs[i] = transfer_tensor_first_dim_to_last(expert_outputs[i], num_dim) + expert_outputs[i].requires_grad = True + + ctx.expert_graphs = expert_graphs + ctx.expert_outputs = expert_outputs + + # 3. 将所有子集的计算结果拼接在一起,保存在`expert_output`中 + with torch.enable_grad(): + expert_output = torch.cat(expert_outputs, dim=0) + + for temp in expert_outputs: + temp.untyped_storage().resize_(0) + + return expert_output, mlp_bias + + +def moe_experts_pipeline_backward_func(ctx, input_list): + expert_grad_outputs = [] + + ag_handle_i_next = None + rs_handle_i = None + input_i_next = None + num_dim = None + mm1_inputs_grad = None + ag_input_i = None + ag_input_i_next = None + rs_input_i = None + ag_input_list = [] + + moe_experts_pipeline_degree = ctx.moe_experts_pipeline_degree + expert_graphs = ctx. expert_graphs + expert_outputs = ctx.expert_outputs + + for i in range(moe_experts_pipeline_degree): + if i == 0: + ag_input_i, input_i, ag_handle_i = async_all_gather(expert_outputs[i].grad, get_tensor_model_parallel_group(), + last_dim=True) + ag_input_i_next, input_i_next, ag_handle_i_next = async_all_gather(expert_outputs[i + 1].grad, + get_tensor_model_parallel_group(), + last_dim=True) + elif i != (moe_experts_pipeline_degree - 1): + input_i = input_i_next + ag_handle_i = ag_handle_i_next + ag_input_i = ag_input_i_next + ag_input_i_next, input_i_next, ag_handle_i_next = async_all_gather(expert_outputs[i + 1].grad, + get_tensor_model_parallel_group(), + last_dim=True) + else: + input_i = input_i_next + ag_handle_i = ag_handle_i_next + ag_input_i = ag_input_i_next + + ag_handle_i.wait() + ag_input_list.append(ag_input_i) + input_i = torch.cat(input_i, dim=expert_outputs[i].grad.dim() - 1).contiguous() + + set_gemm_backward_need_tensors(input_list[i]) + + backward_func(expert_graphs[i], input_i) + + if rs_handle_i is not None: + rs_handle_i.wait() + rs_input_i.untyped_storage().resize_(0) + mm1_inputs_grad.untyped_storage().resize_(0) + expert_grad_outputs[i - 1] = transfer_tensor_first_dim_to_last(expert_grad_outputs[i - 1], num_dim) + + rs_input_i, expert_output, rs_handle_i, mm1_inputs_grad, num_dim = get_all2all_experts_output() + expert_grad_outputs.append(expert_output) + + if i == (moe_experts_pipeline_degree - 1): + rs_handle_i.wait() + rs_input_i.untyped_storage().resize_(0) + mm1_inputs_grad.untyped_storage().resize_(0) + expert_grad_outputs[i] = transfer_tensor_first_dim_to_last(expert_grad_outputs[i], num_dim) + + for ag_input in ag_input_list: + ag_input.untyped_storage().resize_(0) + + expert_grad_output = torch.cat(expert_grad_outputs, dim=0) + return expert_grad_output + + +class MoELayerOverlapAll2All(torch.autograd.Function): + @staticmethod + def forward(ctx, hidden_states, moe_layer: MoELayer): + args = get_args() + moe_hierarchical_alltoallv = args.moe_hierarchical_alltoallv + moe_experts_pipeline_degree = args.moe_experts_pipeline_degree + ctx.moe_experts_pipeline_degree = moe_experts_pipeline_degree + save_tensors = [] + ctx.input_shape = hidden_states.shape + hidden_states = hidden_states.detach() + hidden_states.requires_grad = True + ctx.is_only_recompute_activation = only_recompute_activation(moe_layer.layer_number) + ctx.layer_number = moe_layer.layer_number + if not moe_hierarchical_alltoallv and args.n_shared_experts: + if get_tensor_model_parallel_world_size() > 1: + _, shared_experts_input, shared_experts_allgather_handle = async_all_gather( + hidden_states, get_tensor_model_parallel_group(), is_use_get_global_memory_buffer=True + ) + AG_SHARED_EXPERTS_INPUTS.append((shared_experts_input, shared_experts_allgather_handle)) + + # router + with torch.enable_grad(): + scores, indices = moe_layer.router(hidden_states) + + save_tensors.append(scores) + scores = scores.detach() + scores.requires_grad = True + save_tensors.append(scores) + moe_zero_memory = args.moe_zero_memory + n_shared_experts = args.n_shared_experts + ctx.n_shared_experts = n_shared_experts + ctx.moe_zero_memory = moe_zero_memory + shared_expert_gate = hasattr(args, 'shared_expert_gate') and args.shared_expert_gate + group_limited_greedy = hasattr(args, 'moe_router_load_balancing_type') and args.moe_router_load_balancing_type == "group_limited_greedy" + ctx.shared_expert_gate = shared_expert_gate + + if moe_zero_memory == "level1" and not ctx.is_only_recompute_activation: + ctx.activation_func = moe_layer.experts.activation_func + ctx.hidden_size = moe_layer.experts.config.hidden_size + ctx.num_local_experts = moe_layer.experts.num_local_experts + ctx.weight1 = moe_layer.experts.weight1 + ctx.moe_grouped_gemm = moe_layer.token_dispatcher.config.moe_grouped_gemm + ctx.num_local_experts = moe_layer.token_dispatcher.num_local_experts + + save_tensors.append(indices) + + if n_shared_experts: + ctx.shared_experts = moe_layer.shared_experts + else: + ctx.shared_experts = None + + if shared_expert_gate: + shared_expert_gate = moe_layer.shared_expert_gate + else: + shared_expert_gate = None + + (share_experts_output, dispatched_input, tokens_per_expert) = moe_layer.token_dispatcher.token_permutation( + hidden_states, scores, indices, ctx.shared_experts, save_tensors, shared_expert_gate, ctx + ) + if moe_experts_pipeline_degree: + save_tensors.append(None) + save_tensors.append(None) + expert_output, mlp_bias = moe_experts_pipeline_forward_func(tokens_per_expert, moe_layer, dispatched_input, ctx, save_tensors) + output, mlp_bias = moe_layer.token_dispatcher.token_unpermutation(expert_output, mlp_bias, save_tensors) + + + if isinstance(share_experts_output, tuple): + share_experts_output, rs_share_experts_output, rs_shared_experts_handle = share_experts_output + else: + rs_share_experts_output = share_experts_output + rs_shared_experts_handle = None + + expert_output.untyped_storage().resize_(0) + else: + + if isinstance(share_experts_output, tuple): + share_experts_output, rs_share_experts_output, rs_shared_experts_handle = share_experts_output + else: + rs_share_experts_output = share_experts_output + rs_shared_experts_handle = None + + (expert_output, mlp_bias), *_ = forward_func(moe_layer.experts, (dispatched_input, tokens_per_expert, ctx)) + save_tensors.append(expert_output) + + output, mlp_bias = moe_layer.token_dispatcher.token_unpermutation(expert_output, mlp_bias, save_tensors) + + if group_limited_greedy: + save_tensors.append(moe_layer.router.l_aux) + moe_layer.router.l_aux = moe_layer.router.l_aux.detach() + moe_layer.router.l_aux.requires_grad = True + save_tensors.append(moe_layer.router.l_aux) + with torch.enable_grad(): + save_to_aux_losses_tracker( + "load_balancing_loss", + moe_layer.router.l_aux, + moe_layer.layer_number, + moe_layer.config.num_layers, + ) + save_to_aux_losses_tracker( + "load_balancing_expert_level_loss", + moe_layer.router.l_expert_aux / args.moe_aux_loss_coeff, + moe_layer.layer_number, + moe_layer.config.num_layers, + ) + if hasattr(moe_layer.router, 'l_device_aux'): + save_to_aux_losses_tracker( + "load_balancing_device_level_loss", + moe_layer.router.l_device_aux / args.moe_device_level_aux_loss_coeff, + moe_layer.layer_number, + moe_layer.config.num_layers, + ) + if hasattr(moe_layer.router, 'l_comm_aux'): + save_to_aux_losses_tracker( + "load_balancing_comm_level_loss", + moe_layer.router.l_comm_aux / args.moe_comm_aux_loss_coeff, + moe_layer.layer_number, + moe_layer.config.num_layers, + ) + output = MoEAuxLossAutoScaler.apply(output, moe_layer.router.l_aux) + else: + save_tensors.append(None) + save_tensors.append(None) + + save_tensors.append(hidden_states) + + if moe_zero_memory == "level1" and not ctx.is_only_recompute_activation: + ctx.tokens_per_expert = tokens_per_expert + + ctx.output_splits = moe_layer.token_dispatcher.output_splits + ctx.input_splits = moe_layer.token_dispatcher.input_splits + ctx.router_topk = moe_layer.token_dispatcher.router_topk + ctx.input_splits_tp_ep = getattr(moe_layer.token_dispatcher, 'input_splits_tp_ep', None) + if n_shared_experts: + if rs_shared_experts_handle is not None: + rs_shared_experts_handle.wait() + output_sum = output + rs_share_experts_output + output.untyped_storage().resize_(0) + share_experts_output.untyped_storage().resize_(0) + else: + output_sum = output.detach() + + save_tensors.append(share_experts_output) + if hasattr(moe_layer.token_dispatcher, 'global_input_tokens_local_experts_indices'): + save_tensors.append(moe_layer.token_dispatcher.global_input_tokens_local_experts_indices) + else: + save_tensors.append(None) + ctx.save_for_backward(*save_tensors) + return output_sum, mlp_bias + + @staticmethod + def backward(ctx, *args): + global_args = get_args() + + output_splits = ctx.output_splits + input_splits = ctx.input_splits + router_topk = ctx.router_topk + n_shared_experts = ctx.n_shared_experts + moe_zero_memory = ctx.moe_zero_memory + moe_experts_pipeline_degree = ctx.moe_experts_pipeline_degree + moe_tp_extend_ep = global_args.moe_tp_extend_ep + moe_hierarchical_alltoallv = global_args.moe_hierarchical_alltoallv + shared_expert_gate = ctx.shared_expert_gate + input_splits_tp_ep = ctx.input_splits_tp_ep + + (route_graph, detach_scores, + indices, indices_ep, + hidden_states_ep, scores_ep, + permute1_graph, + permute2_input_detach, permute2_graph, + experts_graph, + unpermute1_input_detach, unpermute1_graph, + unpermute2_input_detach, unpermute2_graph, l_aux_graph, l_aux_detach, + detach_input, share_experts_graph, + global_input_tokens_local_experts_indices, + ) = ctx.saved_tensors + if moe_hierarchical_alltoallv: + set_gemm_backward_need_tensors( + ((hidden_states_ep, indices_ep, scores_ep, router_topk, global_input_tokens_local_experts_indices), + permute2_input_detach, permute2_graph, + output_splits, input_splits, input_splits_tp_ep)) + elif moe_experts_pipeline_degree: + input_list = ctx.input_list + else: + set_gemm_backward_need_tensors( + ((detach_input, indices, scores_ep, router_topk, global_input_tokens_local_experts_indices), + permute2_input_detach, permute2_graph, + output_splits, input_splits, input_splits_tp_ep)) + + if n_shared_experts: + if get_tensor_model_parallel_world_size() > 1 and not shared_expert_gate: + _, backward_ag_shared, backward_ag_shared_handle = async_all_gather( + args[0], get_tensor_model_parallel_group() + ) + else: + backward_ag_shared = args[0] + backward_ag_shared_handle = None + + if moe_hierarchical_alltoallv: + ep_group = parallel_state.get_expert_model_parallel_group() + unpermute2_graph_backward_input = args[0].view(-1, args[0].shape[-1]) + _, unpermute2_graph_backward_input, output_backward_handle = \ + async_all_gather(unpermute2_graph_backward_input, group=ep_group) + if moe_zero_memory == "level0": + def alltoall_token_permutation1(hidden_states, indices, router_topk): + hidden_states = hidden_states.view(-1, hidden_states.shape[-1]) + permutated_local_input_tokens, _, _ = permute_with_ep( + hidden_states, indices, probs=scores_ep, topk=router_topk, gb_inputs_splits=input_splits_tp_ep + ) + return permutated_local_input_tokens + + permutated_local_input_tokens = alltoall_token_permutation1(hidden_states_ep, indices_ep, router_topk) + set_permute_with_ep_local_input_tokens(permutated_local_input_tokens) + + if moe_zero_memory == "level1" and not ctx.is_only_recompute_activation: + with torch.no_grad(): + if get_tensor_model_parallel_world_size() > 1 and n_shared_experts: + _, shared_experts_input, shared_experts_allgather_handle = async_all_gather( + detach_input, get_tensor_model_parallel_group(), is_use_get_global_memory_buffer=True + ) + AG_SHARED_EXPERTS_INPUTS.append((shared_experts_input, shared_experts_allgather_handle)) + + # Recompute token rearrange in permutation1 + if moe_hierarchical_alltoallv: + permutated_local_input_tokens, _, _ = permute_with_ep( + hidden_states_ep.view(-1, hidden_states_ep.shape[-1]), indices_ep, probs=scores_ep, topk=ctx.router_topk, + gb_inputs_splits=ctx.input_splits_tp_ep + ) + else: + permutated_local_input_tokens, _ = permute( + detach_input.view(-1, detach_input.shape[-1]), indices + ) + + # Recompute expert parallel AlltoAll communication + ep_group = parallel_state.get_expert_model_parallel_group() + if moe_tp_extend_ep: + ep_group = parallel_state.get_tensor_and_expert_parallel_group() + if moe_hierarchical_alltoallv: + tp_group = parallel_state.get_tensor_model_parallel_group() + _, global_input_tokens, permute1_ep_all_to_all_handle = async_all_to_all( + permutated_local_input_tokens, + ctx.output_splits, + ctx.input_splits, + tp_group, + ) + else: + _, global_input_tokens, permute1_ep_all_to_all_handle = async_all_to_all( + permutated_local_input_tokens, + ctx.output_splits, + ctx.input_splits, + ep_group, + ) + if moe_hierarchical_alltoallv: + output_backward_handle.wait() + unpermute2_graph.backward(unpermute2_graph_backward_input) + else: + unpermute2_graph.backward(args[0]) + unpermute2_graph = None + if moe_zero_memory == "level1" and not ctx.is_only_recompute_activation: + if n_shared_experts: + with torch.no_grad(): + # Recompute mm1 and act of shared experts + shared_fc1_out, bias_parallel = ctx.shared_experts.linear_fc1(detach_input) + shared_act_out = ctx.shared_experts.activation_function(shared_fc1_out, bias_parallel) + shared_act_out_size = shared_act_out.untyped_storage().size() + ctx.shared_act_out.untyped_storage().resize_(shared_act_out_size) + ctx.shared_act_out.untyped_storage().copy_(shared_act_out.untyped_storage()) + shared_act_out.untyped_storage().resize_(0) + shared_fc1_out_size = shared_fc1_out.untyped_storage().size() + ctx.shared_fc1_out.untyped_storage().resize_(shared_fc1_out_size) + ctx.shared_fc1_out.untyped_storage().copy_(shared_fc1_out.untyped_storage()) + shared_fc1_out.untyped_storage().resize_(0) + if backward_ag_shared_handle is not None: + backward_ag_shared_handle.wait() + share_experts_graph.backward(backward_ag_shared) + share_experts_graph = None + if backward_ag_shared_handle is not None: + backward_ag_shared.untyped_storage().resize_(0) + ctx.shared_act_out.untyped_storage().resize_(0) + ctx.shared_fc1_out.untyped_storage().resize_(0) + + permute1_ep_all_to_all_handle.wait() + permutated_local_input_tokens.untyped_storage().resize_(0) + + ep_group = parallel_state.get_expert_model_parallel_group() + if moe_tp_extend_ep: + ep_group = parallel_state.get_tensor_and_expert_parallel_group() + if moe_hierarchical_alltoallv: + tp_group = parallel_state.get_tensor_model_parallel_group() + _, unpermute1_backward_input, handle = async_all_to_all( + unpermute2_input_detach.grad, + output_splits, + input_splits, + tp_group, + ) + else: + _, unpermute1_backward_input, handle = async_all_to_all( + unpermute2_input_detach.grad, + output_splits, + input_splits, + ep_group, + ) + + if moe_zero_memory == "level1" and not ctx.is_only_recompute_activation: + with torch.no_grad(): + if ctx.num_local_experts > 1: + # Recompute permutation2 + global_input_tokens, _ = permute( + global_input_tokens, global_input_tokens_local_experts_indices + ) + if not moe_tp_extend_ep and get_tensor_model_parallel_world_size() > 1 and ctx.moe_grouped_gemm: + global_input_tokens = tensor_parallel.all_gather_last_dim_from_tensor_parallel_region( + global_input_tokens + ) + # Recompute mm1 and act + input_, mm1_out, act_out = ctx.recompute_tensors + ctx.recompute_tensors = None + if global_input_tokens.nelement() != 0: + group_list = torch.cumsum(ctx.tokens_per_expert, dim=0) + w1 = ctx.weight1.view(ctx.num_local_experts, ctx.hidden_size, -1) + mm1_out_ = gmm_op(global_input_tokens, w1, [], group_list, 0)[0] + group_list.untyped_storage().resize_(0) + else: + w1 = ctx.weight1.view(ctx.hidden_size, -1) + mm1_out_ = torch.matmul(global_input_tokens, w1) + + act_out_ = ctx.activation_func(mm1_out_) + act_out_size = act_out_.untyped_storage().size() + act_out.untyped_storage().resize_(act_out_size) + act_out.untyped_storage().copy_(act_out_.untyped_storage()) + act_out = None + act_out_.untyped_storage().resize_(0) + mm1_out_size = mm1_out_.untyped_storage().size() + mm1_out.untyped_storage().resize_(mm1_out_size) + mm1_out.untyped_storage().copy_(mm1_out_.untyped_storage()) + mm1_out = None + mm1_out_.untyped_storage().resize_(0) + input_size = global_input_tokens.untyped_storage().size() + input_.untyped_storage().resize_(input_size) + input_.untyped_storage().copy_(global_input_tokens.untyped_storage()) + input_ = None + global_input_tokens.untyped_storage().resize_(0) + ctx.activation_func = None + ctx.hidden_size = None + ctx.num_local_experts = None + ctx.weight1 = None + ctx.moe_grouped_gemm = None + ctx.num_local_experts = None + ctx.input_splits = None + ctx.output_splits = None + if moe_hierarchical_alltoallv: + ctx.input_splits_tp_ep = None + elif share_experts_graph is not None: + if backward_ag_shared_handle is not None: + backward_ag_shared_handle.wait() + share_experts_graph.backward(backward_ag_shared) + share_experts_graph = None + if backward_ag_shared_handle is not None: + backward_ag_shared.untyped_storage().resize_(0) + if handle is not None: + handle.wait() + unpermute2_input_detach.grad.untyped_storage().resize_(0) + + backward_func(unpermute1_graph, unpermute1_backward_input) + + unpermute1_backward_input.untyped_storage().resize_(0) + if moe_hierarchical_alltoallv: + set_all2all_experts_output((permute1_graph, scores_ep, hidden_states_ep)) + backward_func(experts_graph, unpermute1_input_detach.grad) + unpermute1_input_detach.grad.untyped_storage().resize_(0) + permute2_input_detach.grad.untyped_storage().resize_(0) + detach_scores_grad, detach_input_grad, detach_input_handle = get_all2all_experts_output() + elif moe_experts_pipeline_degree: + expert_grad_output = moe_experts_pipeline_backward_func(ctx, ctx.input_list) + for input_tensor in input_list: + input_tensor.untyped_storage().resize_(0) + permute2_graph.backward(expert_grad_output) + backward_func(permute1_graph, permute2_input_detach.grad) + permute2_input_detach.grad.untyped_storage().resize_(0) + else: + backward_func(experts_graph, unpermute1_input_detach.grad) + unpermute1_input_detach.grad.untyped_storage().resize_(0) + permute1_backward_input, bw_permute1_ep_all2all_handle = get_all2all_experts_output() + bw_permute1_ep_all2all_handle.wait() + permute2_input_detach.grad.untyped_storage().resize_(0) + backward_func(permute1_graph, permute1_backward_input) + permute1_backward_input.untyped_storage().resize_(0) + if l_aux_graph is not None: + l_aux_graph.backward(l_aux_detach.grad, retain_graph=True) + if moe_zero_memory != "disable": + if ctx.router_topk > 1: + stream, matmul_output_grad, unpermuted_tokens = get_prob_backward_need_tensors() + torch.npu.current_stream().wait_stream(stream) + probs_grad = (matmul_output_grad * unpermuted_tokens).sum(-1).squeeze(-1) + if moe_hierarchical_alltoallv: + ep_group = parallel_state.get_expert_model_parallel_group() + _, probs_grad, probs_grad_handle = async_reduce_scatter(probs_grad, group=ep_group) + probs_grad_handle.wait() + route_graph.backward(probs_grad) + detach_input_handle.wait() + else: + route_graph.backward(probs_grad) + ctx.router_topk = None + else: + if moe_hierarchical_alltoallv: + route_graph.backward(detach_scores_grad) + detach_input_handle.wait() + else: + route_graph.backward(detach_scores.grad) + route_graph = None + if moe_hierarchical_alltoallv: + grad_output = detach_input.grad + detach_input_grad + else: + grad_output = detach_input.grad + return grad_output, None diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/moe_layer_overlap_allgather.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/moe_layer_overlap_allgather.py new file mode 100644 index 000000000..5317b0694 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/moe_layer_overlap_allgather.py @@ -0,0 +1,229 @@ +import acl +import torch +import torch_npu + +from megatron.core.parallel_state import (get_expert_model_parallel_group, get_tensor_and_expert_parallel_group, + get_tensor_model_parallel_group, get_tensor_model_parallel_world_size) +from megatron.core.transformer.moe.moe_layer import MoELayer +from megatron.training import get_args +from mindspeed.core.transformer.moe.token_dispatcher import cann_version_check +from mindspeed.core.transformer.moe.moe_utils import AG_SHARED_EXPERTS_INPUTS +from mindspeed.core.transformer.moe.comm_utils import async_all_gather, async_reduce_scatter +from mindspeed.core.transformer.moe.moe_utils import (forward_func, backward_func, set_gemm_backward_need_tensors, + get_rs_global_hidden_states_grad_with_handle) + + +class MoELayerOverlapAllGather(torch.autograd.Function): + @staticmethod + def forward(ctx, hidden_states, moe_layer: MoELayer): + args = get_args() + save_tensors = [] + ctx.input_shape = hidden_states.shape + moe_layer.experts.layer_number = moe_layer.layer_number + # input detach graph, leaf node + hidden_states = hidden_states.detach() + hidden_states.requires_grad = True + + # 共享专家 tp group allgather hidden_states + # 默认 tp 开启 sp + if args.n_shared_experts and get_tensor_model_parallel_world_size() > 1: + hidden_states, shared_experts_input, shared_experts_allgather_handle = async_all_gather( + hidden_states, get_tensor_model_parallel_group(), is_use_get_global_memory_buffer=True + ) + AG_SHARED_EXPERTS_INPUTS.append(shared_experts_input) + else: + shared_experts_input = hidden_states + shared_experts_allgather_handle = None + + # router + (scores, indices), _ = forward_func(moe_layer.router, hidden_states) + + # after router, do 2 allgather + global_indices_tuple = None + global_probs_tuple = None + if moe_layer.config.sequence_parallel or (moe_layer.config.expert_model_parallel_size > 1): + if isinstance(indices, tuple): + global_indices, gi_handle = indices + else: + _, global_indices, gi_handle = async_all_gather(indices, get_tensor_and_expert_parallel_group()) + + global_indices_tuple = (global_indices, gi_handle) + + _, global_probs, gp_handle = async_all_gather( + scores, get_tensor_and_expert_parallel_group() + ) + + global_probs_tuple = (global_probs, gp_handle) + + # 专家 ep group allgather hidden_states + global_hidden_states_tuple = None + if moe_layer.config.sequence_parallel or moe_layer.config.expert_model_parallel_size > 1: + if '910B' in acl.get_soc_name(): + _, global_hidden_states, ghs_handle = async_all_gather( + hidden_states, + get_tensor_and_expert_parallel_group() + ) + else: + _, global_hidden_states, ghs_handle = async_all_gather( + shared_experts_input, + get_expert_model_parallel_group() + if shared_experts_allgather_handle + else get_tensor_and_expert_parallel_group(), + shared_experts_allgather_handle + ) + global_hidden_states = global_hidden_states.view(-1, global_hidden_states.shape[-1]) + global_hidden_states_tuple = (global_hidden_states, ghs_handle) + + # shared experts + shared_experts_rs_handle = None + share_experts_output = None + rs_share_experts_output = None + share_experts_bias = None + if args.n_shared_experts: + if shared_experts_allgather_handle is not None: + shared_experts_allgather_handle.wait() + (share_experts_output, share_experts_bias), _ = forward_func( + moe_layer.shared_experts, hidden_states + ) + + if get_tensor_model_parallel_world_size() > 1: + # reduce scatter + _, rs_share_experts_output, shared_experts_rs_handle = async_reduce_scatter( + share_experts_output, get_tensor_model_parallel_group() + ) + else: + rs_share_experts_output = share_experts_output + shared_experts_rs_handle = None + + token_permutation_input = ( + global_indices_tuple, + global_probs_tuple, + global_hidden_states_tuple + ) + + # dispatch input + save_tensors.append(scores) + + moe_layer.token_dispatcher.hidden_shape = hidden_states.shape + (dispatched_input, tokens_per_expert, global_local_map, indices), *token_permutation_input = forward_func( + moe_layer.token_dispatcher.token_permutation, token_permutation_input + ) + + save_tensors.append(global_local_map) + save_tensors.append(indices) + + # token_permutation_input : (global_indices, handle), (global_probs, handle), (global_hidden_states, handle) + global_probs_detach, global_hidden_states_detach = token_permutation_input[1][0], token_permutation_input[2][0] + + global_hidden_states_detach.untyped_storage().resize_(0) + if cann_version_check: + global_probs_detach.untyped_storage().resize_(0) + save_tensors.append(global_probs_detach) + save_tensors.append(global_hidden_states_detach) + + expert_input = (dispatched_input, tokens_per_expert) + + def func(dispatched_input, tokens_per_expert): + expert_output, mlp_bias = moe_layer.experts(dispatched_input, tokens_per_expert) + output, mlp_bias = moe_layer.token_dispatcher.token_unpermutation( + expert_output, mlp_bias + ) + return output, mlp_bias + + (output, mlp_bias), *_ = forward_func(func, expert_input) + + save_tensors.append(dispatched_input) + + _, output_rs, token_unpermutation_rs_handle = async_reduce_scatter( + output, get_tensor_and_expert_parallel_group() + ) + + ctx.token_unpermutation_output_shape = output.shape + + token_unpermutation_rs_handle.wait() + output.untyped_storage().resize_(0) + output_rs = output_rs.view(moe_layer.token_dispatcher.hidden_shape) + + save_tensors.append(hidden_states) + save_tensors.append(output) + save_tensors.append(share_experts_output) + ctx.save_for_backward(*save_tensors) + + if args.n_shared_experts: + if shared_experts_rs_handle is not None: + shared_experts_rs_handle.wait() + + output_rs = output_rs + rs_share_experts_output + if moe_layer.token_dispatcher.add_bias: + mlp_bias = mlp_bias + share_experts_bias + share_experts_output.untyped_storage().resize_(0) + return output_rs, mlp_bias + + return output_rs.detach(), mlp_bias + + @staticmethod + def backward(ctx, *args): + (scores, global_local_map, indices, + global_probs_detach, global_hidden_states_detach, dispatched_input, + input_, output, share_experts_graph) = ctx.saved_tensors + + token_unpermutation_output_shape = ctx.token_unpermutation_output_shape + # tp group ag grad_out + if share_experts_graph is not None and get_tensor_model_parallel_world_size() > 1: + _, ag_share_experts_grad_input, ag_share_experts_handle = async_all_gather( + args[0], get_tensor_model_parallel_group() + ) + else: + ag_share_experts_grad_input = args[0] + ag_share_experts_handle = None + + if '910B' not in acl.get_soc_name() and share_experts_graph: + _, ag_experts_grad_input, ag_experts_handle = async_all_gather( + ag_share_experts_grad_input, + get_expert_model_parallel_group(), + ag_share_experts_handle + ) + else: + _, ag_experts_grad_input, ag_experts_handle = async_all_gather( + args[0], + get_tensor_and_expert_parallel_group(), + ) + + args = None + if ag_share_experts_handle is not None: + ag_share_experts_handle.wait() + + if share_experts_graph is not None: + # 反向 —— 共享专家 + share_experts_graph.backward(ag_share_experts_grad_input) + if '910B' in acl.get_soc_name() or share_experts_graph is None: + from mindspeed.core.transformer.moe.moe_utils import set_ag_tp_hidden_status + set_ag_tp_hidden_status(input_) + + ag_experts_handle.wait() + ag_share_experts_grad_input = None + ag_experts_grad_input = ag_experts_grad_input.view(token_unpermutation_output_shape) + + # token 重排反向 function set + set_gemm_backward_need_tensors((dispatched_input, global_hidden_states_detach, indices, global_local_map)) + + # 反向 —— token 反重排 expert + output.backward(ag_experts_grad_input) + + global_probs_grad = global_probs_detach.grad + + _, rs_global_probs_grad, rs_global_probs_grad_handle = async_reduce_scatter( + global_probs_grad, get_tensor_and_expert_parallel_group() + ) + rs_global_probs_grad_handle.wait() + global_probs_grad.untyped_storage().resize_(0) + + # 反向 —— router + backward_func(scores, rs_global_probs_grad) + + rs_global_hidden_states_grad, rs_handle = get_rs_global_hidden_states_grad_with_handle() + rs_handle.wait() + rs_global_hidden_states_grad = rs_global_hidden_states_grad.view(ctx.input_shape) + # expert grad + shared expert grad + rs_global_hidden_states_grad += input_.grad + return rs_global_hidden_states_grad, None diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/moe_utils.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/moe_utils.py new file mode 100644 index 000000000..461b8739c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/moe_utils.py @@ -0,0 +1,376 @@ +# Copyright (c) 2024; NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import torch +import torch_npu +from megatron.core.transformer.moe.moe_utils import permute_with_padded_tokens, unpermute_with_padded_tokens +from megatron.training import get_args +from megatron.core import mpu, parallel_state +from megatron.core.transformer.moe.moe_utils import (reduce_aux_losses_tracker_across_ranks, + clear_aux_losses_tracker) + + +AG_TP_HIDDEN_STATUS = None +AG_SHARED_EXPERTS_INPUTS = [] +GEMM_BACKWARD_NEED_TENSORS = None +RS_GLOBAL_HIDDEN_STATES_GRAD_WITH_HANDLE = None +SWAP_STREAM = None +SWAP_STREAM2 = None +SWAP_TENSOR = None +MATMUL_OUTPUT_GRAD = None +UNPERMUTED_TOKENS = None +PERMUTE_WITH_EP_LOCAL_INPUT_TOKENS = None + + +def get_swap_stream(): + global SWAP_STREAM2 + if SWAP_STREAM2 is None: + _ = torch_npu.npu.Stream(device=torch.npu.current_device()) + SWAP_STREAM2 = torch_npu.npu.Stream(device=torch.npu.current_device()) + stream = SWAP_STREAM2 + return stream + + +def set_swap_status(tensor): + global SWAP_TENSOR + SWAP_TENSOR = tensor + + +def get_swap_status(): + global SWAP_STREAM + if SWAP_STREAM is None: + SWAP_STREAM = torch_npu.npu.Stream(device=torch.npu.current_device()) + global SWAP_TENSOR + stream = SWAP_STREAM + tensor = SWAP_TENSOR + SWAP_TENSOR = None + return stream, tensor + + +def set_prob_backward_need_tensors(matmul_output_grad, unpermuted_tokens): + global MATMUL_OUTPUT_GRAD + MATMUL_OUTPUT_GRAD = matmul_output_grad + global UNPERMUTED_TOKENS + UNPERMUTED_TOKENS = unpermuted_tokens + + +def get_prob_backward_need_tensors(): + global SWAP_STREAM2 + if SWAP_STREAM2 is None: + _ = torch_npu.npu.Stream(device=torch.npu.current_device()) + SWAP_STREAM2 = torch_npu.npu.Stream(device=torch.npu.current_device()) + global MATMUL_OUTPUT_GRAD + global UNPERMUTED_TOKENS + stream = SWAP_STREAM2 + matmul_output_grad = MATMUL_OUTPUT_GRAD + unpermuted_tokens = UNPERMUTED_TOKENS + MATMUL_OUTPUT_GRAD = None + UNPERMUTED_TOKENS = None + return stream, matmul_output_grad, unpermuted_tokens + + +def set_ag_tp_hidden_status(_inputs): + global AG_TP_HIDDEN_STATUS + AG_TP_HIDDEN_STATUS = _inputs + + +def get_ag_tp_hidden_status(): + global AG_TP_HIDDEN_STATUS + result = AG_TP_HIDDEN_STATUS + AG_TP_HIDDEN_STATUS = None + return result + + +def set_gemm_backward_need_tensors(_inputs): + global GEMM_BACKWARD_NEED_TENSORS + GEMM_BACKWARD_NEED_TENSORS = _inputs + + +def get_gemm_backward_need_tensors(): + global GEMM_BACKWARD_NEED_TENSORS + result = GEMM_BACKWARD_NEED_TENSORS + GEMM_BACKWARD_NEED_TENSORS = None + return result + + +def set_permute_with_ep_local_input_tokens(_inputs): + global PERMUTE_WITH_EP_LOCAL_INPUT_TOKENS + PERMUTE_WITH_EP_LOCAL_INPUT_TOKENS = _inputs + + +def get_permute_with_ep_local_input_tokens(): + global PERMUTE_WITH_EP_LOCAL_INPUT_TOKENS + result = PERMUTE_WITH_EP_LOCAL_INPUT_TOKENS + PERMUTE_WITH_EP_LOCAL_INPUT_TOKENS = None + return result + + +def set_rs_global_hidden_states_grad_with_handle(_inputs): + global RS_GLOBAL_HIDDEN_STATES_GRAD_WITH_HANDLE + RS_GLOBAL_HIDDEN_STATES_GRAD_WITH_HANDLE = _inputs + + +def get_rs_global_hidden_states_grad_with_handle(): + global RS_GLOBAL_HIDDEN_STATES_GRAD_WITH_HANDLE + result = RS_GLOBAL_HIDDEN_STATES_GRAD_WITH_HANDLE + RS_GLOBAL_HIDDEN_STATES_GRAD_WITH_HANDLE = None + return result + + +ALL2ALL_EXPERTS_OUTPUT = None + + +def set_all2all_experts_output(_input): + global ALL2ALL_EXPERTS_OUTPUT + ALL2ALL_EXPERTS_OUTPUT = _input + + +def get_all2all_experts_output(): + global ALL2ALL_EXPERTS_OUTPUT + result = ALL2ALL_EXPERTS_OUTPUT + ALL2ALL_EXPERTS_OUTPUT = None + return result + + +def only_recompute_activation(layer_number): + args = get_args() + vpp_rank = parallel_state.get_virtual_pipeline_model_parallel_rank() + vpp_size = args.virtual_pipeline_model_parallel_size + pp_size = args.transformer_pipeline_model_parallel_size + + if vpp_size is not None: + layer_per_chunk = args.num_layers_per_virtual_pipeline_stage + elif pp_size is not None: + layer_per_chunk = args.num_layers // pp_size + else: + layer_per_chunk = args.num_layers + + if vpp_rank is None: + vpp_rank = 0 + if vpp_size is None: + vpp_size = 1 + recompute_priority = ((layer_number - 1) % layer_per_chunk) * vpp_size + vpp_rank + moe_zero_memory_num_layers = args.moe_zero_memory_num_layers + + if moe_zero_memory_num_layers: + if recompute_priority < moe_zero_memory_num_layers: + return False + else: + return True + else: + return False + + +def forward_func(func, inputs): + def detach_tensor(input_): + if input_.requires_grad and input_.grad_fn is None: + return input_ + else: + new_input = input_.detach() + new_input.requires_grad = True + return new_input + + detach_inputs = [] + if isinstance(inputs, tuple): + for input_ in inputs: + if isinstance(input_, tuple): + detach_input = [] + for i in input_: + if isinstance(i, torch.Tensor) and torch.is_floating_point(i): + detach_input.append(detach_tensor(i)) + else: + detach_input.append(i) + detach_inputs.append(tuple(detach_input)) + else: + if isinstance(input_, torch.Tensor) and torch.is_floating_point(input_): + detach_input = detach_tensor(input_) + else: + detach_input = input_ + detach_inputs.append(detach_input) + elif isinstance(inputs, torch.Tensor): + detach_inputs.append(detach_tensor(inputs)) + + with torch.enable_grad(): + output = func(*detach_inputs) + + return output, *detach_inputs + + +def backward_func(func_tensor, gradinputs): + if gradinputs is None or func_tensor.grad_fn is None: + return + if isinstance(gradinputs, torch.Tensor): + func_tensor.backward(gradinputs) + elif isinstance(gradinputs, tuple): + func_tensor.backward(*gradinputs) + + +def permute(tokens, indices, num_out_tokens: int = None, padded_mode: bool = False): + if padded_mode: + return permute_with_padded_tokens(tokens, indices) + + if indices.dim() == 1: + topk = 1 + else: + topk = indices.size(1) + flatten_indices = indices.view(-1) + # previous use argsort, argsort int64 will be run on host cpu + sorted_indices = torch.sort(flatten_indices.float(), stable=True)[1] + if num_out_tokens is not None: + sorted_indices = sorted_indices[:num_out_tokens] + permuted_tokens = tokens.index_select(0, sorted_indices // topk) + return permuted_tokens, sorted_indices + + +def permute_with_ep(tokens: torch.Tensor, + indices: torch.Tensor, + probs: torch.Tensor, + topk: int = 1, + gb_inputs_splits=None): + if topk > 1: + if indices.size(1) != topk: + raise RuntimeError("indices.size(1) should be equal to topk") + flatten_indices = indices.view(-1) + sorted_indices = torch.sort(flatten_indices.float(), stable=True)[1] + ep_rank = mpu.get_expert_model_parallel_rank() + import numpy as np + gb_inputs_splits_sum = np.cumsum(gb_inputs_splits) + start = 0 + if ep_rank > 0: + start = gb_inputs_splits_sum[ep_rank - 1] + end = gb_inputs_splits_sum[ep_rank] + result_indices = sorted_indices[start : end] + permuted_tokens = tokens.index_select(0, result_indices // topk) + flatten_probs = probs.view(-1) + permuted_probs = flatten_probs.index_select(0, result_indices) + return permuted_tokens, permuted_probs, result_indices + + +def unpermute_with_ep( + unpermute_with_ep_input_tensors_list, + probs: torch.Tensor = None, + padded_mode: bool = False, + restore_shape: torch.Size = None, + topk: int = 1, +): + permuted_tokens, sorted_indices, permuted_probs = unpermute_with_ep_input_tensors_list + if padded_mode: + return unpermute_with_padded_tokens( + permuted_tokens, sorted_indices, probs, restore_shape=restore_shape + ) + + assert sorted_indices.numel() == permuted_tokens.size(0) + if permuted_probs is not None: + permuted_tokens = permuted_tokens * permuted_probs.unsqueeze(-1) + unpermuted_tokens = torch.zeros(restore_shape[0], permuted_tokens.size(-1), + dtype=permuted_tokens.dtype, device=permuted_tokens.device) + sorted_indices = sorted_indices // topk + unpermuted_tokens = unpermuted_tokens.scatter_add_(0, + sorted_indices.unsqueeze(1).expand(-1, permuted_tokens.shape[1]), + permuted_tokens) + return unpermuted_tokens + + +def unpermute( + permuted_tokens: torch.Tensor, + sorted_indices: torch.Tensor, + probs: torch.Tensor = None, + padded_mode: bool = False, + restore_shape: torch.Size = None, +): + if padded_mode: + return unpermute_with_padded_tokens( + permuted_tokens, sorted_indices, probs, restore_shape=restore_shape + ) + + assert sorted_indices.numel() == permuted_tokens.size(0) + if probs is not None: + # Unpermute and merge the tokens with their probabilities + num_unpermuted_tokens = probs.numel() + topk = probs.size(1) + else: + # Unpermute the tokens without merge + num_unpermuted_tokens = permuted_tokens.size(0) + topk = 1 + + unpermuted_tokens = torch.zeros( + [num_unpermuted_tokens, permuted_tokens.shape[-1]], + dtype=permuted_tokens.dtype, + device=permuted_tokens.device, + ) + unpermuted_tokens.index_copy_(0, sorted_indices, permuted_tokens) + unpermuted_tokens = unpermuted_tokens.reshape(-1, topk, permuted_tokens.size(-1)) + if probs is not None: + unpermuted_tokens = unpermuted_tokens * probs.unsqueeze(-1) + unpermuted_tokens = unpermuted_tokens.sum(dim=1) + + return unpermuted_tokens + + +def get_mean(tensor): + """ + Calculate the mean of a tensor, excluding specified 'noop_layers'. + + Parameters: + tensor (torch.Tensor): A one-dimensional tensor. + + Returns: + float: The mean of the tensor, excluding the 'noop_layers' if specified. + + Notes: + - If `args.noop_layers` is a set and is not empty, the mean is calculated by excluding these layers. + - If `args.noop_layers` is empty or None, the mean is calculated directly from the tensor. + - `args.num_layers` represents the total number of layers, used to adjust the mean calculation when + excluding 'noop_layers'. + """ + args = get_args() + if hasattr(args, 'noop_layers') and isinstance(args.noop_layers, set) and len(args.noop_layers) > 0: + return tensor.sum() / (args.num_layers - len(args.noop_layers)) + return tensor.mean() + + +def track_moe_metrics( + loss_scale, iteration, writer, wandb_writer=None, total_loss_dict=None, per_layer_logging=False +): + # Aux loss logging + + reduce_aux_losses_tracker_across_ranks() + tracker = parallel_state.get_moe_layer_wise_logging_tracker() + if writer is not None: + aux_losses = {k: v['values'].float() * loss_scale for k, v in tracker.items()} + for name, loss_list in aux_losses.items(): + # adaptation for + loss_list_mean = get_mean(loss_list) + if total_loss_dict is not None: + if name not in total_loss_dict: + # adaptation for loss_list.mean() + total_loss_dict[name] = loss_list_mean + else: + # adaptation for loss_list.mean() + total_loss_dict[name] += loss_list_mean + + # currently when using add_scalars, + # torch.utils.add_scalars makes each timer its own run, which + # polutes the runs list, so we just add each as a scalar + # adaptation for loss_list.mean() + writer.add_scalar(name, loss_list_mean, iteration) + if per_layer_logging: + for i, loss in enumerate(loss_list.tolist()): + writer.add_scalar(f"moe/{name}_layer_{i}", loss, iteration) + + # W&B logging lacks support for logging multiple scalars simultaneously. + # As a workaround, we log each scalar individually first, then we can create + # a custom panel to manually group them to a single plot. + if wandb_writer: + # adaptation for loss_list.mean() + wandb_writer.log({f"{name}": loss_list_mean}, iteration) + if per_layer_logging: + wandb_writer.log( + { + f"moe/{name}_layer_{i}": loss + for i, loss in enumerate(loss_list.tolist()) + }, + iteration, + ) + + clear_aux_losses_tracker() + diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/router.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/router.py new file mode 100644 index 000000000..21e640133 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/router.py @@ -0,0 +1,97 @@ +# Copyright (c) 2022; NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import torch +from megatron.training import get_args +from megatron.core.parallel_state import get_tensor_and_expert_parallel_group +from megatron.core.tensor_parallel.mappings import _reduce_scatter_along_first_dim_moe +from megatron.core.transformer.moe.moe_utils import topk_softmax_with_capacity + + +def _gather_along_first_dim_moe_async(input_, async_op): + """Gather tensors and concatenate along the first dimension.""" + group = get_tensor_and_expert_parallel_group() + world_size = torch.distributed.get_world_size(group=group) + # Bypass the function if we are using only 1 GPU. + if world_size == 1: + return input_ + + dim_size = list(input_.size()) + dim_size[0] = dim_size[0] * world_size + + output = torch.empty(dim_size, dtype=input_.dtype, device=torch.cuda.current_device()) + handle = torch.distributed._all_gather_base(output, input_.contiguous(), group=group, async_op=async_op) + + return output, handle + + +class _GatherFromSequenceParallelRegionToMOEAsync(torch.autograd.Function): + @staticmethod + def symbolic(graph, input_): + return _gather_along_first_dim_moe_async(input_, async_op=True) + + @staticmethod + def forward(ctx, input_): + return _gather_along_first_dim_moe_async(input_, async_op=True) + + @staticmethod + def backward(ctx, grad_output, grad_handle): + return _reduce_scatter_along_first_dim_moe(grad_output) + + +def gather_from_sequence_parallel_region_to_moe_async(input_): + return _GatherFromSequenceParallelRegionToMOEAsync.apply(input_) + + +def aux_loss_load_balancing(self, logits: torch.Tensor): + probs, indices, tokens_per_expert = topk_softmax_with_capacity( + logits, + self.topk, + capacity_factor=self.config.moe_expert_capacity_factor, + pad_to_capacity=self.config.moe_pad_expert_input_to_capacity, + drop_policy=self.config.moe_token_drop_policy, + use_pre_softmax=self.config.moe_router_pre_softmax, + ) + global_indices = indices + if self.config.sequence_parallel or (self.config.expert_model_parallel_size > 1 and not get_args().tp_2d): + with torch.no_grad(): + global_indices = gather_from_sequence_parallel_region_to_moe_async(indices) + + # Apply load balancing loss + if self.training: + scores = torch.softmax(logits, dim=-1, dtype=torch.float32) + probs = self.apply_load_balancing_loss(scores, tokens_per_expert, activation=probs) + return probs, global_indices + + +def routing_tp_extend_ep(self, logits: torch.Tensor): + """Top-k routing function + + Args: + logits (torch.Tensor): Logits tensor after gating. + + Returns: + probs (torch.Tensor): the probabilities tensor after load balancing. + indices (torch.Tensor): the indices tensor after top-k selection. + """ + logits = logits.view(-1, self.config.num_moe_experts) + + # Apply Z-Loss + logits = self.apply_z_loss(logits) + + if self.routing_type == "sinkhorn": + scores, indices = self.sinkhorn_load_balancing(logits) + elif self.routing_type == "aux_loss": + scores, indices = self.aux_loss_load_balancing(logits) + elif self.routing_type == "none": + # A naive top-k routing without load balancing + scores, indices, _ = topk_softmax_with_capacity( + logits, + self.topk, + capacity_factor=self.config.moe_expert_capacity_factor, + pad_to_capacity=self.config.moe_pad_expert_input_to_capacity, + drop_policy=self.config.moe_token_drop_policy, + ) + else: + raise ValueError(f"Unsupported MoE routing type: {self.routing_type}") + + return scores, indices diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/token_dispatcher.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/token_dispatcher.py new file mode 100644 index 000000000..3645d5cd6 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/token_dispatcher.py @@ -0,0 +1,1262 @@ +# Copyright (c) 2024; NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import torch +from torch_npu.utils.collect_env import get_cann_version +from megatron.training import get_args +from megatron.core import parallel_state, tensor_parallel, mpu +from megatron.core.transformer.moe.moe_utils import moe_gather, moe_scatter, permute, unpermute +from megatron.core.tensor_parallel.mappings import _gather_along_first_dim_expert_parallel, reduce_scatter_to_sequence_parallel_region +from mindspeed.core.transformer.moe.router import gather_from_sequence_parallel_region_to_moe_async +from mindspeed.core.transformer.moe.comm_utils import (async_all_to_all, async_reduce_scatter, + async_all_gather, reduce_scatter, all_gather) +from mindspeed.core.transformer.moe.moe_layer_overlap_all2all import forward_func +from mindspeed.core.transformer.moe.unpermute_without_activation import UnpermuteWithoutActivation +from mindspeed.core.transformer.moe.moe_utils import (AG_SHARED_EXPERTS_INPUTS, + permute_with_ep, unpermute_with_ep) + + +def is_less_or_equal_rc2_cann_version(): + cann_starts_with = ('8.0.RC1', '8.0.RC2') + cann_all = ('not known', '8.0.T1', '8.0.T2', '8.0.T3', '8.0.T37', '8.0.T5', '8.0.T6', '8.0.T7', + '8.0.T8', '8.0.T10', '8.0.T13', '8.0.T16', '8.0.T50', '8.0.T51', '8.0.T52') + cann_version = get_cann_version() + return cann_version in cann_all or cann_version.startswith(cann_starts_with) + + +cann_version_check = is_less_or_equal_rc2_cann_version() + + +def allgather_token_permutation(self, hidden_states: torch.Tensor, max_prob: torch.Tensor, max_ind): + args = get_args() + self.hidden_shape = hidden_states.shape + # [S/TP, B, H] -> [S*B/TP, H] + hidden_states = hidden_states.view(-1, self.hidden_shape[-1]) + + # Permute the tokens across the expert parallel devices. + if (self.config.tensor_model_parallel_size > 1) or ( + self.config.expert_model_parallel_size > 1 + ): + # [S*B/TP, H] -> [S*B, H] + with torch.no_grad(): + global_indices, gi_handle = max_ind if isinstance(max_ind, + tuple) else gather_from_sequence_parallel_region_to_moe_async( + max_ind) + global_probs, gp_handle = gather_from_sequence_parallel_region_to_moe_async(max_prob) + global_hidden_states, ghs_handle = gather_from_sequence_parallel_region_to_moe_async(hidden_states) + + with torch.no_grad(): + gi_handle.wait() + global_local_mask = (global_indices >= self.local_expert_indices[0]) & \ + (global_indices <= self.local_expert_indices[-1]) + local_indices = global_indices.masked_select(global_local_mask) + self.indices = torch.argsort(local_indices.float(), dim=0) + num_global_experts = self.num_local_experts * parallel_state.get_expert_model_parallel_world_size() + if args.moe_tp_extend_ep: + num_global_experts *= parallel_state.get_tensor_model_parallel_world_size() + all_tokens_per_expert = torch.histc( + global_indices, + bins=num_global_experts, + min=0, + max=num_global_experts - 1, + ) + self.all_tokens_per_expert = all_tokens_per_expert.to(torch.long) + tokens_per_expert = self.all_tokens_per_expert[self.local_expert_indices[0]: self.local_expert_indices[-1] + 1] + self.global_local_map = global_local_mask.nonzero()[:, 0] + + if self.router_topk > 1: # k > 1 + gp_handle.wait() + self.local_probs = global_probs.masked_select(global_local_mask) + else: + self.local_probs = max_prob + + ghs_handle.wait() + if cann_version_check: + local_hidden_states = global_hidden_states[self.global_local_map, :] + else: + self.global_local_map = self.global_local_map.view(-1, 1).expand(-1, hidden_states.shape[-1]) + local_hidden_states = moe_gather.apply(global_hidden_states, self.global_local_map) + else: + if self.router_topk > 1: + global_local_mask = torch.ones_like(max_ind).bool() + local_indices = max_ind.masked_select(global_local_mask) + self.local_probs = max_prob.masked_select(global_local_mask) + self.global_local_map = global_local_mask.nonzero()[:, 0] + if cann_version_check: + local_hidden_states = hidden_states[self.global_local_map, :] + else: + self.global_local_map = self.global_local_map.view(-1, 1).expand( + -1, hidden_states.shape[-1] + ) + local_hidden_states = torch.gather(hidden_states, 0, self.global_local_map) + else: + local_indices = max_ind + self.local_probs = max_prob + local_hidden_states = hidden_states + self.global_local_map = None + + with torch.no_grad(): + # The indices of local_indices that give its sorted order along dim 0. + self.indices = torch.argsort(local_indices, dim=0) + # use 0.7.0 implement for better performance + tokens_per_expert = torch.histc( + local_indices, + bins=self.num_local_experts, + min=self.local_expert_indices[0], + max=self.local_expert_indices[-1], + ) + tokens_per_expert = tokens_per_expert.to(torch.long) + self.all_tokens_per_expert = tokens_per_expert + + if self.num_local_experts > 1: + if cann_version_check: + permuted_local_hidden_states = local_hidden_states[self.indices, :] + else: + self.indices = self.indices.view(-1, 1).expand(-1, hidden_states.shape[-1]) + permuted_local_hidden_states = moe_gather.apply(local_hidden_states, self.indices) + else: + permuted_local_hidden_states = local_hidden_states + return ( + permuted_local_hidden_states, + tokens_per_expert, + ) + + +class NewIndePut(torch.autograd.Function): + @staticmethod + def forward(self, tensor, map_, value_): + self.map_ = map_ + ori_dtype = None + if value_.dtype != torch.float32: + ori_dtype = value_.dtype + value_ = value_.float() + output = tensor.index_put_(map_, value_, accumulate=True) + if ori_dtype: + return output.to(ori_dtype) + return output + + def backward(self, grad_input): + map_ = self.map_ + grad_output = grad_input.index_select(0, map_[0]) + return None, None, grad_output + + +def allgather_token_unpermutation(self, hidden_states: torch.Tensor, bias: torch.Tensor = None, ): + # Stage1: unpermute the tokens and bias locally respectively.w + scores = self.local_probs.to(dtype=hidden_states.dtype) + if self.num_local_experts > 1: + if cann_version_check: + unpermuted_local_hidden = torch.zeros_like(hidden_states) + unpermuted_local_hidden.index_put_((self.indices,), hidden_states[:self.indices.shape[0], :], + accumulate=False) + else: + assert self.indices.shape == hidden_states.shape + unpermuted_local_hidden = moe_scatter.apply(hidden_states, self.indices) + else: + unpermuted_local_hidden = hidden_states + + # Scale the expert output prior to reduction and subsequent to local unpermutation if k > 1. + if self.router_topk > 1: + unpermuted_local_hidden = unpermuted_local_hidden * scores.view(-1, 1) + + unpermuted_local_bias = None + if self.add_bias: + assert bias is not None + unpermuted_local_bias = torch.zeros_like(hidden_states) + if cann_version_check: + unpermuted_local_bias.index_put_((self.indices,), bias[:self.indices.shape[0], :], accumulate=False) + else: + assert self.indices.shape == bias.shape + unpermuted_local_bias = unpermuted_local_bias.scatter(0, self.indices, bias) + if self.router_topk > 1: + unpermuted_local_bias = unpermuted_local_bias * scores.view(-1, 1) + + output_total = unpermuted_local_hidden + output_bias_total = unpermuted_local_bias + + # Unpermute the tokens across expert parallel devices. + if (self.config.tensor_model_parallel_size > 1) or ( + self.config.expert_model_parallel_size > 1 + ): + assert ( + self.global_local_map is not None + ), "global_local_map is necessary for `AllGather`." + ep_group_size = parallel_state.get_tensor_and_expert_parallel_world_size() + # hidden_shape: [SeqLen/TP, MBS, HiddenSize], glboal_num_tokens = SeqLen/TP*MBS*(TP*EP) + global_num_tokens = self.hidden_shape[0] * self.hidden_shape[1] * ep_group_size + global_hidden_shape = [global_num_tokens, hidden_states.shape[-1]] + if cann_version_check: + unpermuted_global_hidden = torch.zeros(global_hidden_shape, dtype=torch.float, + device=torch.cuda.current_device()) + unpermuted_global_hidden = NewIndePut.apply(unpermuted_global_hidden, (self.global_local_map,), + unpermuted_local_hidden[:self.global_local_map.shape[0], :]) + else: + assert self.global_local_map.shape == unpermuted_local_hidden.shape + unpermuted_global_hidden = moe_scatter.apply( + unpermuted_local_hidden, self.global_local_map, global_hidden_shape + ) + + output_total = tensor_parallel.reduce_scatter_to_sequence_parallel_region_from_moe(unpermuted_global_hidden) + if self.add_bias: + # Unpermute the bias across expert parallel devices. + unpermuted_global_bias = torch.zeros_like(unpermuted_global_hidden) + if cann_version_check: + unpermuted_global_bias.index_put_((self.global_local_map,), + unpermuted_local_bias[:self.global_local_map.shape[0], :], + accumulate=True) + else: + unpermuted_global_bias = unpermuted_global_bias.scatter_add( + 0, self.global_local_map, unpermuted_local_bias + ) + + output_bias_total = ( + tensor_parallel.reduce_scatter_to_sequence_parallel_region_from_moe( + unpermuted_global_bias + ) + ) + # bias is duplicated across tensor parallelism ranks; + # reduce scatter reduces bias across tensor parallel_ranks + output_bias_total = (output_bias_total / parallel_state.get_tensor_model_parallel_world_size()) + else: + if self.router_topk > 1: + global_num_tokens = self.hidden_shape[0] * self.hidden_shape[1] + global_hidden_shape = [global_num_tokens, hidden_states.shape[-1]] + unpermuted_global_hidden = torch.zeros( + global_hidden_shape, + dtype=hidden_states.dtype, + device=torch.cuda.current_device(), + ) + if cann_version_check: + output_total = unpermuted_global_hidden.index_put((self.global_local_map,), + unpermuted_local_hidden[ + :self.global_local_map.shape[0], :], + accumulate=True) + else: + output_total = unpermuted_global_hidden.scatter_add( + 0, self.global_local_map, unpermuted_local_hidden + ) + if self.add_bias: + unpermuted_global_bias = torch.zeros_like(unpermuted_global_hidden) + if cann_version_check: + output_bias_total = unpermuted_global_bias.index_put((self.global_local_map,), + unpermuted_local_bias[ + :self.global_local_map.shape[0], :], + accumulate=True) + else: + output_bias_total = unpermuted_global_bias.scatter_add( + 0, self.global_local_map, unpermuted_local_bias + ) + + if self.router_topk == 1: + output_total = output_total * scores + output_total = output_total.view(self.hidden_shape) + if self.add_bias: + assert output_bias_total is not None + if self.router_topk == 1: + output_bias_total = output_bias_total * scores + output_bias_total = output_bias_total.view(self.hidden_shape) + else: + output_bias_total = None + + return output_total, output_bias_total + + +def preprocess(self, indices: torch.Tensor) -> torch.Tensor: + # use 0.7.0 implement for better performance + num_local_tokens_per_expert = torch.histc( + indices, bins=self.num_experts, min=0, max=self.num_experts + ) + # num_local_tokens_per_expert: [num_experts] + + ep_size = self.config.expert_model_parallel_size + if self.drop_and_pad: + # probs: [num_experts, capacity] + self.capacity = self.probs.size(1) + num_tokens_per_local_expert = torch.full( + (self.num_local_experts,), self.capacity * self.ep_size, dtype=torch.long, + device=torch.cuda.current_device() + ) + return num_tokens_per_local_expert + elif self.config.moe_expert_capacity_factor is not None: + # Token drop but no pad. A synchronization is needed before the first + # permutation to get the `num_out_tokens` CPU value. + self.num_out_tokens = num_local_tokens_per_expert.sum().to( + torch.device("cpu"), non_blocking=True + ) + self.cuda_sync_point = "before_permutation_1" + elif ep_size > 1: + # Token dropless and enable ep. A synchronization is needed before expert parallel + # AlltoAll communication to get the `input_splits` and `output_splits` CPU values. + self.cuda_sync_point = "before_ep_alltoall" + else: + # Token dropless and no ep. A synchronization is needed before the token_permutation() + # function returns to get the `tokens_per_expert` CPU value. + self.cuda_sync_point = "before_finish" + + if ep_size > 1: + # =================================================== + # Calculate input_splits, output_splits for alltoall-v. + # =================================================== + self.input_splits = ( + num_local_tokens_per_expert.reshape(ep_size, self.num_local_experts) + .sum(axis=1) + .to(torch.device("cpu"), non_blocking=True) + .numpy() + ) + num_global_tokens_per_expert = _gather_along_first_dim_expert_parallel( + num_local_tokens_per_expert + ).reshape(ep_size, self.num_experts) + self.num_global_tokens_per_local_expert = num_global_tokens_per_expert[ + :, self.local_expert_indices[0]: self.local_expert_indices[-1] + 1 + ] + self.output_splits = ( + self.num_global_tokens_per_local_expert.sum(axis=-1).to(torch.device("cpu")).numpy() + ) + num_tokens_per_local_expert = self.num_global_tokens_per_local_expert.sum(axis=0) + # =================================================== + # num_global_tokens_per_expert: [ep_size, num_experts] + # num_global_tokens_per_local_expert: [ep_size, num_local_experts] + # num_tokens_per_local_expert: [num_local_experts] + # =================================================== + else: + self.num_global_tokens_per_local_expert = num_local_tokens_per_expert.reshape( + -1, self.num_experts + ) + num_tokens_per_local_expert = num_local_tokens_per_expert + + if self.num_local_experts > 1: + if not hasattr(self, 'comm_stream'): + self.comm_stream = torch.cuda.Stream() + self.comm_stream.wait_stream(torch.cuda.current_stream()) + with torch.cuda.stream(self.comm_stream): + # No further synchronization is needed because torch.repeat_interleave() calls stream + # synchronization internally when the `output_size` parameter is not provided. + self.cuda_sync_point = "no_sync" + self.global_input_tokens_local_experts_indices = torch.repeat_interleave( + self.expert_ids_per_ep_rank, self.num_global_tokens_per_local_expert.ravel() + ) + + return num_tokens_per_local_expert + + +def alltoall_token_permutation( + self, hidden_states: torch.Tensor, probs: torch.Tensor, indices: torch.Tensor, +): + self.hidden_shape = hidden_states.shape + self.probs = probs + assert probs.dim() == 2, "Expected 2D tensor for probs" + assert indices.dim() == 2, "Expected 2D tensor for indices" + tokens_per_expert = self.preprocess(indices) + + # Flatten the input tensor + # hidden_states: [S/TP, B, H] -> [S*B/TP, H] + hidden_states = hidden_states.view(-1, self.hidden_shape[-1]) + + # Perform tensor parallel AlltoAll communication + # hidden_states: [S*B/TP, H] -> [S*B, H/TP] + if parallel_state.get_tensor_model_parallel_world_size() > 1: + hidden_states = tensor_parallel.all_to_all_sp2hp(hidden_states) + + # Permutation 1: input to AlltoAll input + self.hiddden_shape_before_permute = hidden_states.shape + if self.cuda_sync_point == "before_permutation_1": + torch.cuda.current_stream().synchronize() + permutated_local_input_tokens, self.reversed_local_input_permutation_mapping = permute( + hidden_states, + indices, + num_out_tokens=self.num_out_tokens, + padded_mode=self.drop_and_pad, + ) + + if get_args().moe_bmm_mc2: + return permutated_local_input_tokens, tokens_per_expert + + # Perform expert parallel AlltoAll communication + if self.cuda_sync_point == "before_ep_alltoall": + torch.cuda.current_stream().synchronize() + global_input_tokens = tensor_parallel.all_to_all( + parallel_state.get_expert_model_parallel_group(), + permutated_local_input_tokens, + self.output_splits, + self.input_splits, + ) + + # Permutation 2: AlltoAll output to expert input if num_local_experts > 1 + if self.num_local_experts > 1: + if not self.drop_and_pad: + torch.cuda.current_stream().wait_stream(self.comm_stream) + global_input_tokens, self.reversed_global_input_permutation_mapping = permute( + global_input_tokens, self.global_input_tokens_local_experts_indices + ) + else: + global_input_tokens = global_input_tokens.reshape( + self.ep_size, self.num_local_experts, self.capacity, -1 + ) + global_input_tokens = ( + global_input_tokens.transpose(0, 1) + .reshape(self.num_local_experts * self.ep_size * self.capacity, -1) + .contiguous() + ) + + # Perform tensor parallel All-Gather on the hidden dimension to obtain the input tokens. + # global_input_tokens: [SEQL, H/TP] -> [SEQL, H] + if parallel_state.get_tensor_model_parallel_world_size() > 1 and self.config.moe_grouped_gemm: + global_input_tokens = tensor_parallel.all_gather_last_dim_from_tensor_parallel_region( + global_input_tokens + ) + if self.cuda_sync_point == "before_finish": + torch.cuda.current_stream().synchronize() + + return global_input_tokens, tokens_per_expert + + +def alltoall_token_unpermutation_with_bmm( + self, hidden_states: torch.Tensor, bias: torch.Tensor = None, +): + # if use op bmm_reducescatter_alltoall to skip reducescatter and alltoall + output = unpermute( + hidden_states, + self.reversed_local_input_permutation_mapping, + probs=self.probs, + padded_mode=self.drop_and_pad, + restore_shape=self.hiddden_shape_before_permute, + ) + + if parallel_state.get_tensor_model_parallel_world_size() > 1: + output = tensor_parallel.all_to_all_hp2sp(output) + + output = output.view(self.hidden_shape) + return output, None + + +def alltoall_token_permutation_with_bmm( + self, hidden_states: torch.Tensor, probs: torch.Tensor, indices: torch.Tensor, +): + # if use op alltoall_allgather_bmm to skip alltoall and allgather + self.hidden_states = hidden_states.shape + self.probs = probs + assert probs.dim() == 2, "Experted 2D tensor for probs" + assert indices.dim() == 2, "Experted 2D tensor for indices" + hidden_states = hidden_states.view(-1, self.hidden_shape[-1]) + tokens_per_expert = self.preprocess(indices) + + if parallel_state.get_tensor_model_parallel_world_size() > 1: + hidden_states = tensor_parallel.all_to_all_sp2hp(hidden_states) + + self.hidden_shape_before_permute = hidden_states.shape + permutated_local_input_tokens, self.reversed_local_input_permutation_mapping = permute( + hidden_states, + indices, + num_out_tokens=self.num_out_tokens, + padded_mode=self.drop_and_pad, + ) + return permutated_local_input_tokens, tokens_per_expert + + +def preprocess_tp_extend_ep(self, indices: torch.Tensor, *args) -> torch.Tensor: + moe_hierarchical_alltoallv = get_args().moe_hierarchical_alltoallv + num_local_tokens_per_expert = torch.histc( + indices, bins=self.num_experts, min=0, max=self.num_experts + ) + # num_local_tokens_per_expert: [num_experts] + + ep_size = self.config.expert_model_parallel_size + if self.drop_and_pad: + # probs: [num_experts, capacity] + self.capacity = self.probs.size(1) + num_tokens_per_local_expert = torch.full( + (self.num_local_experts,), self.capacity * self.ep_size, dtype=torch.long, + device=torch.cuda.current_device() + ) + return num_tokens_per_local_expert + elif self.config.moe_expert_capacity_factor is not None: + self.num_out_tokens = num_local_tokens_per_expert.sum().cpu() + tp_size = parallel_state.get_tensor_model_parallel_world_size() + tp_extended_ep_size = ep_size * tp_size + if tp_extended_ep_size > 1: + # =================================================== + # Calculate input_splits, output_splits for alltoall-v. + # =================================================== + if moe_hierarchical_alltoallv: + tp_group = parallel_state.get_tensor_model_parallel_group() + self.input_splits_tp_ep = ( + num_local_tokens_per_expert.reshape(tp_extended_ep_size, self.num_local_experts) + .sum(axis=1) + .to(torch.device("cpu")) + .numpy() + ) + expert_parallel_rank = mpu.get_expert_model_parallel_rank() + tp_size = parallel_state.get_tensor_model_parallel_world_size() + offset = expert_parallel_rank * tp_size + self.input_splits = [self.input_splits_tp_ep[i + offset] for i in range(tp_size)] + self.input_splits_tp_ep = self.input_splits_tp_ep.reshape(ep_size, tp_size).sum(axis=1) + num_global_tokens_per_expert = \ + all_gather(num_local_tokens_per_expert, group=tp_group).reshape(tp_size, self.num_experts) + # shared_experts allgather with tp + if get_args().n_shared_experts and parallel_state.get_tensor_model_parallel_world_size() > 1: + _, shared_experts_input, shared_experts_allgather_handle = async_all_gather( + args[0], parallel_state.get_tensor_model_parallel_group(), is_use_get_global_memory_buffer=True + ) + AG_SHARED_EXPERTS_INPUTS.append((shared_experts_input, shared_experts_allgather_handle)) + else: + self.input_splits_tp_ep = None + self.input_splits = ( + num_local_tokens_per_expert.reshape(tp_extended_ep_size, self.num_local_experts) + .sum(axis=1) + .to(torch.device("cpu")) + .numpy() + ) + num_global_tokens_per_expert = tensor_parallel.gather_from_sequence_parallel_region_to_moe( + num_local_tokens_per_expert + ).reshape(tp_extended_ep_size, self.num_experts) + self.num_global_tokens_per_local_expert = num_global_tokens_per_expert[ + :, self.local_expert_indices + ] + self.output_splits = ( + self.num_global_tokens_per_local_expert.sum(axis=-1).to(torch.device("cpu")).numpy() + ) + num_tokens_per_local_expert = self.num_global_tokens_per_local_expert.sum(axis=0) + # =================================================== + # num_global_tokens_per_expert: [ep_size, num_experts] + # num_global_tokens_per_local_expert: [ep_size, num_local_experts] + # num_tokens_per_local_expert: [num_local_experts] + # =================================================== + else: + self.num_global_tokens_per_local_expert = num_local_tokens_per_expert.reshape( + -1, self.num_experts + ) + num_tokens_per_local_expert = num_local_tokens_per_expert + + if self.num_local_experts > 1: + if not hasattr(self, 'comm_stream'): + self.comm_stream = torch.cuda.Stream() + self.comm_stream.wait_stream(torch.cuda.current_stream()) + with torch.cuda.stream(self.comm_stream): + if moe_hierarchical_alltoallv: + expert_ids_per_ep_rank = torch.tensor( + [i % self.num_local_experts for i in range(self.config.num_moe_experts // ep_size)], + dtype=torch.int32, + device=torch.cuda.current_device(), + ) + else: + expert_ids_per_ep_rank = torch.tensor( + [i % self.num_local_experts for i in range(self.config.num_moe_experts)], + dtype=torch.int32, + device=torch.cuda.current_device(), + ) + self.global_input_tokens_local_experts_indices = torch.repeat_interleave( + expert_ids_per_ep_rank, self.num_global_tokens_per_local_expert.ravel() + ) + + return num_tokens_per_local_expert + + +def alltoall_token_permutation_tp_extend_ep( + self, hidden_states: torch.Tensor, probs: torch.Tensor, indices: torch.Tensor, +): + self.hidden_shape = hidden_states.shape + self.probs = probs + assert probs.dim() == 2, "Expected 2D tensor for probs" + assert indices.dim() == 2, "Expected 2D tensor for indices" + tokens_per_expert = self.preprocess(indices) + + # Flatten the input tensor + # hidden_states: [S/TP, B, H] -> [S*B/TP, H] + hidden_states = hidden_states.view(-1, self.hidden_shape[-1]) + + # Permutation 1: input to AlltoAll input + self.hiddden_shape_before_permute = hidden_states.shape + permutated_local_input_tokens, self.reversed_local_input_permutation_mapping = permute( + hidden_states, + indices, + num_out_tokens=self.num_out_tokens, + padded_mode=self.drop_and_pad, + ) + + # Perform expert parallel AlltoAll communication + global_input_tokens = tensor_parallel.all_to_all( + parallel_state.get_tensor_and_expert_parallel_group(), + permutated_local_input_tokens, + self.output_splits, + self.input_splits, + ) + + # Permutation 2: AlltoAll output to expert input if num_local_experts > 1 + if self.num_local_experts > 1: + if not self.drop_and_pad: + torch.cuda.current_stream().wait_stream(self.comm_stream) + global_input_tokens, self.reversed_global_input_permutation_mapping = permute( + global_input_tokens, self.global_input_tokens_local_experts_indices + ) + else: + global_input_tokens = global_input_tokens.reshape( + self.ep_size, self.num_local_experts, self.capacity, -1 + ) + global_input_tokens = ( + global_input_tokens.transpose(0, 1) + .reshape(self.num_local_experts * self.ep_size * self.capacity, -1) + .contiguous() + ) + + return global_input_tokens, tokens_per_expert + + +def alltoall_token_unpermutation_tp_extend_ep( + self, hidden_states: torch.Tensor, bias: torch.Tensor = None, +): + """ + Reverse the token permutation to restore the original order. + + Args: + hidden_states (torch.Tensor): Output from local experts. + bias (torch.Tensor, optional): Bias tensor (not supported). + + Returns: + Tuple[torch.Tensor, Optional[torch.Tensor]]: + - Unpermuted token embeddings in the original order. + - None (bias is not supported). + """ + assert bias is None, "Bias is not supported in MoEAlltoAllTokenDispatcher" + + # Unpermutation 2: expert output to AlltoAll input + # hidden_states: [SEQL, H] -> [SEQL, H/TP] + if self.num_local_experts > 1: + if not self.drop_and_pad: + hidden_states = unpermute( + hidden_states, self.reversed_global_input_permutation_mapping, + ) + else: + hidden_states = hidden_states.reshape( + self.num_local_experts, self.ep_size, self.capacity, -1 + ) + hidden_states = ( + hidden_states.transpose(0, 1) + .reshape(self.ep_size * self.num_local_experts * self.capacity, -1) + .contiguous() + ) + + # Perform expert parallel AlltoAll communication + permutated_local_input_tokens = tensor_parallel.all_to_all( + parallel_state.get_tensor_and_expert_parallel_group(), + hidden_states, + self.input_splits, + self.output_splits, + ) + + # Unpermutation 1: AlltoAll output to output + output = unpermute( + permutated_local_input_tokens, + self.reversed_local_input_permutation_mapping, + probs=self.probs, + padded_mode=self.drop_and_pad, + restore_shape=self.hiddden_shape_before_permute, + ) + + # Reshape the output tensor + output = output.view(self.hidden_shape) + return output, None + + +def allgather_token_permutation_new(self, global_indices_2_tuple, global_probs_2_tuple, global_hidden_states_2_tuple): + global_indices, gi_handle = global_indices_2_tuple + global_probs, gp_handle = global_probs_2_tuple + global_hidden_states, ghs_handle = global_hidden_states_2_tuple + + local_hidden_states = None + tokens_per_expert = None + + if (self.config.tensor_model_parallel_size > 1) or ( + self.config.expert_model_parallel_size > 1 + ): + with (torch.no_grad()): + gi_handle.wait() + global_local_mask = (global_indices >= self.local_expert_indices[0]) & \ + (global_indices <= self.local_expert_indices[-1]) + + # masked_select -> reshape + local_indices = global_indices.masked_select(global_local_mask) + self.indices = torch.argsort(local_indices.float(), dim=0) + num_global_experts = self.num_local_experts * parallel_state.get_expert_model_parallel_world_size() + if get_args().moe_tp_extend_ep: + num_global_experts *= parallel_state.get_tensor_model_parallel_world_size() + all_tokens_per_expert = torch.histc( + global_indices, + bins=num_global_experts, + min=0, + max=num_global_experts + ) + self.all_tokens_per_expert = all_tokens_per_expert.to(torch.long) + tokens_per_expert = self.all_tokens_per_expert[self.local_expert_indices[0]: self.local_expert_indices[-1] + 1] + self.global_local_map = global_local_mask.nonzero()[:, 0] + + if self.router_topk > 1: # k > 1 + gp_handle.wait() + # masked_select -> reshape + self.local_probs = global_probs.masked_select(global_local_mask) + + ghs_handle.wait() + if cann_version_check: + local_hidden_states = global_hidden_states[self.global_local_map, :] + else: + self.global_local_map = self.global_local_map.view(-1, 1).expand(-1, self.hidden_shape[-1]) + local_hidden_states = moe_gather.apply(global_hidden_states, self.global_local_map) + if self.num_local_experts > 1: + if cann_version_check: + permuted_local_hidden_states = local_hidden_states[self.indices, :] + else: + self.indices = self.indices.view(-1, 1).expand(-1, self.hidden_shape[-1]) + permuted_local_hidden_states = moe_gather.apply(local_hidden_states, self.indices) + else: + permuted_local_hidden_states = local_hidden_states + return ( + permuted_local_hidden_states, + tokens_per_expert, + self.global_local_map, + self.indices + ) + + +def allgather_token_unpermutation_new(self, hidden_states: torch.Tensor, bias: torch.Tensor = None): + # Stage1: unpermute the tokens and bias locally respectively.w + scores = self.local_probs.to(dtype=hidden_states.dtype) + if self.num_local_experts > 1: + if cann_version_check: + unpermuted_local_hidden = torch.zeros_like(hidden_states) + unpermuted_local_hidden.index_put_((self.indices,), hidden_states[:self.indices.shape[0], :], + accumulate=False) + else: + assert self.indices.shape == hidden_states.shape + unpermuted_local_hidden = moe_scatter.apply(hidden_states, self.indices) + else: + unpermuted_local_hidden = hidden_states + + # Scale the expert output prior to reduction and subsequent to local unpermutation if k > 1. + if self.router_topk > 1: + unpermuted_local_hidden = unpermuted_local_hidden * scores.view(-1, 1) + + unpermuted_local_bias = None + if self.add_bias: + assert bias is not None + unpermuted_local_bias = torch.zeros_like(hidden_states) + if cann_version_check: + unpermuted_local_bias.index_put_((self.indices,), bias[:self.indices.shape[0], :], accumulate=False) + else: + assert self.indices.shape == bias.shape + unpermuted_local_bias = unpermuted_local_bias.scatter(0, self.indices, bias) + + if self.router_topk > 1: + unpermuted_local_bias = unpermuted_local_bias * scores.view(-1, 1) + + output_total = unpermuted_local_hidden + output_bias_total = unpermuted_local_bias + + # Unpermute the tokens across expert parallel devices. + if (self.config.tensor_model_parallel_size > 1) or ( + self.config.expert_model_parallel_size > 1 + ): + assert ( + self.global_local_map is not None + ), "global_local_map is necessary for 'AllGather'." + ep_group_size = parallel_state.get_tensor_and_expert_parallel_world_size() + # hidden_shape: [SeqLen/TP, MBS, HiddenSize], global_num_tokens = SeqLen/TP*MBS*(TP*EP) + global_num_tokens = self.hidden_shape[0] * self.hidden_shape[1] * ep_group_size + global_hidden_shape = [global_num_tokens, hidden_states.shape[-1]] + + if cann_version_check: + unpermuted_global_hidden = torch.zeros(global_hidden_shape, dtype=torch.float, + device=torch.cuda.current_device()) + unpermuted_global_hidden = NewIndePut.apply(unpermuted_global_hidden, (self.global_local_map,), + unpermuted_local_hidden[:self.global_local_map.shape[0], :]) + else: + unpermuted_global_hidden = torch.zeros( + global_hidden_shape, dtype=hidden_states.dtype, device=torch.cuda.current_device() + ) + # Reshape global_local_map to be compatible with Tensor.scatter + assert self.global_local_map.shape == unpermuted_local_hidden.shape + unpermuted_global_hidden = unpermuted_global_hidden.scatter_add( + 0, self.global_local_map, unpermuted_local_hidden) + + output_total = unpermuted_global_hidden + if self.add_bias: + # Unpermute the bias across expert parallel devices. + unpermuted_global_bias = torch.zeros_like(unpermuted_global_hidden) + if cann_version_check: + unpermuted_global_bias.index_put_((self.global_local_map,), + unpermuted_local_bias[:self.global_local_map.shape[0], :], + accumulate=True) + else: + unpermuted_global_bias = unpermuted_global_bias.scatter_add( + 0, self.global_local_map, unpermuted_local_bias + ) + + output_bias_total = tensor_parallel.reduce_scatter_to_sequence_parallel_region_from_moe( + unpermuted_global_bias + ) + # bias is duplicated across tensor parallelism ranks; + # reduce scatter reduces bias across tensor parallel_ranks + output_bias_total = (output_bias_total / parallel_state.get_tensor_model_parallel_world_size()) + else: + if self.router_topk > 1: + global_num_tokens = self.hidden_shape[0] * self.hidden_shape[1] + global_hidden_shape = [global_num_tokens, hidden_states.shape[-1]] + unpermuted_global_hidden = torch.zeros( + global_hidden_shape, + dtype=hidden_states.dtype, + device=torch.cuda.current_device() + ) + if cann_version_check: + output_total = unpermuted_global_hidden.index_put((self.global_local_map,), + unpermuted_local_hidden[ + :self.global_local_map.shape[0], :], + accumulate=True) + else: + output_total = unpermuted_global_hidden.scatter_add( + 0, self.global_local_map, unpermuted_local_hidden + ) + + if self.add_bias: + unpermuted_global_bias = torch.zeros_like(unpermuted_global_hidden) + if cann_version_check: + output_bias_total = unpermuted_global_bias.index_put((self.global_local_map,), + unpermuted_local_bias[ + :self.global_local_map.shape[0], :], + accumulate=True) + else: + output_bias_total = unpermuted_global_bias.scatter_add( + 0, self.global_local_map, unpermuted_local_bias + ) + + if self.router_topk == 1: + output_total = output_total * scores + if self.add_bias: + assert output_bias_total is not None + if self.router_topk == 1: + output_bias_total = output_bias_total * scores + output_bias_total = output_bias_total.view(self.hidden_shape) + else: + output_bias_total = None + + return output_total, output_bias_total + + +def alltoall_token_permutation_new( + self, hidden_states: torch.Tensor, probs: torch.Tensor, indices: torch.Tensor, shared_experts, save_tensors, shared_expert_gate, moe_ctx=None +): + moe_hierarchical_alltoallv = get_args().moe_hierarchical_alltoallv + self.hidden_shape = hidden_states.shape + self.probs = probs + assert probs.dim() == 2, "Expected 2D tensor for probs" + assert indices.dim() == 2, "Expected 2D tensor for indices" + if moe_hierarchical_alltoallv: + ep_group = parallel_state.get_expert_model_parallel_group() + _, indices, indices_handle = async_all_gather(indices, group=ep_group) + indices_handle.wait() + save_tensors.append(indices) + _, hidden_states_ep, hidden_states_ep_handle = async_all_gather(hidden_states, group=ep_group) + else: + indices_ep, hidden_states_ep, hidden_states_ep_handle = None, None, None + save_tensors.append(indices_ep) + + def alltoall_token_permutation1(hidden_states, indices, *args): + if moe_hierarchical_alltoallv: + _, self.probs, probs_handle = async_all_gather(self.probs, group=ep_group) + tokens_per_expert = self.preprocess(indices, hidden_states) + args[1].wait() # hidden_states_ep_handle + save_tensors.append(args[0]) # hidden_states_ep + # hidden_states: [S/TP, B, H] -> [S*B/TP, H] + hidden_states = args[0].view(-1, self.hidden_shape[-1]) + self.hidden_shape_before_permute = hidden_states.shape + # Permutation 1: input to AlltoAll input + if self.cuda_sync_point == "before_permutation_1": + torch.cuda.current_stream().synchronize() + probs_handle.wait() + self.probs = self.probs.detach() + self.probs.requires_grad = True + save_tensors.append(self.probs) + permutated_local_input_tokens, permuted_probs, self.reversed_local_input_permutation_mapping = permute_with_ep( + hidden_states, indices, probs=self.probs, topk=self.router_topk, + gb_inputs_splits=self.input_splits_tp_ep, + ) + self.permuted_probs = permuted_probs + else: + tokens_per_expert = self.preprocess(indices) + save_tensors.append(args[0]) + if get_args().moe_experts_pipeline_degree: + tokens_per_expert = tokens_per_expert.cpu() + + # Flatten the input tensor + # hidden_states: [S/TP, B, H] -> [S*B/TP, H] + hidden_states = hidden_states.view(-1, self.hidden_shape[-1]) + + # Perform tensor parallel AlltoAll communication + # hidden_states: [S*B/TP, H] -> [S*B, H/TP] + if not get_args().moe_tp_extend_ep and parallel_state.get_tensor_model_parallel_world_size() > 1: + hidden_states = tensor_parallel.all_to_all_sp2hp(hidden_states) + + # Permutation 1: input to AlltoAll input + self.hiddden_shape_before_permute = hidden_states.shape + if self.cuda_sync_point == "before_permutation_1": + torch.cuda.current_stream().synchronize() + scores_ep = None + save_tensors.append(scores_ep) + permutated_local_input_tokens, self.reversed_local_input_permutation_mapping = permute( + hidden_states, + indices, + num_out_tokens=self.num_out_tokens, + padded_mode=self.drop_and_pad, + ) + return tokens_per_expert, permutated_local_input_tokens + + (tokens_per_expert, permutated_local_input_tokens), *_ = forward_func(alltoall_token_permutation1, + (hidden_states, indices, + hidden_states_ep, hidden_states_ep_handle)) + + # permute 1 + save_tensors.append(permutated_local_input_tokens) + + # Perform expert parallel AlltoAll communication + ep_group = parallel_state.get_expert_model_parallel_group() + if get_args().moe_tp_extend_ep: + ep_group = parallel_state.get_tensor_and_expert_parallel_group() + + # Perform expert parallel AlltoAll communication + if self.cuda_sync_point == "before_ep_alltoall": + torch.cuda.current_stream().synchronize() + if moe_hierarchical_alltoallv: + tp_group = parallel_state.get_tensor_model_parallel_group() + _, global_input_tokens, permute1_ep_all_to_all_handle = async_all_to_all( + permutated_local_input_tokens, + self.output_splits, + self.input_splits, + tp_group, + ) + else: + _, global_input_tokens, permute1_ep_all_to_all_handle = async_all_to_all( + permutated_local_input_tokens, + self.output_splits, + self.input_splits, + ep_group, + ) + + # shared experts + if shared_experts is not None: + (share_experts_output, _), *_ = forward_func(shared_experts, (hidden_states, moe_ctx)) + if parallel_state.get_tensor_model_parallel_world_size() > 1 and shared_expert_gate is None: + share_experts_graph, share_experts_output, rs_shared_experts_handle = async_reduce_scatter(share_experts_output, parallel_state.get_tensor_model_parallel_group(), + event=permute1_ep_all_to_all_handle, stream=torch.npu.default_stream()) + share_experts_output = (share_experts_graph, share_experts_output, rs_shared_experts_handle) + if shared_expert_gate is not None: + with torch.enable_grad(): + # tp not support shared expert gate for now + if parallel_state.get_tensor_model_parallel_world_size() > 1: + share_experts_output = reduce_scatter_to_sequence_parallel_region(share_experts_output) + share_experts_output = torch.nn.functional.sigmoid(shared_expert_gate(hidden_states)) * share_experts_output + else: + share_experts_output = None + + if permute1_ep_all_to_all_handle is not None: + permute1_ep_all_to_all_handle.wait() + permutated_local_input_tokens.untyped_storage().resize_(0) + + def alltoall_token_permutation2(global_input_tokens): + # Permutation 2: AlltoAll output to expert input if num_local_experts > 1 + if self.num_local_experts > 1: + if not self.drop_and_pad: + if self.comm_stream is not None: + torch.cuda.current_stream().wait_stream(self.comm_stream) + global_input_tokens, self.reversed_global_input_permutation_mapping = permute( + global_input_tokens, self.global_input_tokens_local_experts_indices + ) + else: + global_input_tokens = global_input_tokens.reshape( + self.ep_size, self.num_local_experts, self.capacity, -1 + ) + global_input_tokens = ( + global_input_tokens.transpose(0, 1) + .reshape(self.num_local_experts * self.ep_size * self.capacity, -1) + .contiguous() + ) + # Perform tensor parallel AllGather on the hidden dimension to obtain the input tokens. + # global_input_tokens: [SEQL, H/TP] -> [SEQL, H] + need_tp_comm = (not get_args().moe_tp_extend_ep and + parallel_state.get_tensor_model_parallel_world_size() > 1 and + self.config.moe_grouped_gemm) and get_args().moe_experts_pipeline_degree == 0 + if need_tp_comm: + global_input_tokens = tensor_parallel.all_gather_last_dim_from_tensor_parallel_region( + global_input_tokens + ) + if self.cuda_sync_point == "before_finish": + torch.cuda.current_stream().synchronize() + + return global_input_tokens + + # token 重排2 input + (global_input_tokens), global_input_tokens_detach = forward_func(alltoall_token_permutation2, + global_input_tokens) + save_tensors.append(global_input_tokens_detach) + save_tensors.append(global_input_tokens) + global_input_tokens_detach.untyped_storage().resize_(0) + + return share_experts_output, global_input_tokens, tokens_per_expert + + +def alltoall_token_unpermutation_new( + self, hidden_states, bias, save_tensors +): + moe_hierarchical_alltoallv = get_args().moe_hierarchical_alltoallv + + def alltoall_token_unpermutation1(hidden_states): + assert bias is None, "Bias is not supported in MoEAlltoAllTokenDispatcher" + + # Perform tensor parallel Reduce-Scatter + # hidden_states: [SEQL, H] -> [SEQL, H/TP] + if not get_args().moe_tp_extend_ep and parallel_state.get_tensor_model_parallel_world_size() > 1 and get_args().moe_experts_pipeline_degree == 0: + hidden_states = tensor_parallel.reduce_scatter_last_dim_to_tensor_parallel_region(hidden_states) + + # Unpermutation 2: expert output to AlltoAll input + if self.num_local_experts > 1: + if not self.drop_and_pad: + hidden_states = unpermute( + hidden_states, self.reversed_global_input_permutation_mapping, + ) + else: + hidden_states = hidden_states.reshape( + self.num_local_experts, self.ep_size, self.capacity, -1 + ) + hidden_states = ( + hidden_states.transpose(0, 1) + .reshape(self.ep_size * self.num_local_experts * self.capacity, -1) + .contiguous() + ) + return hidden_states + if get_args().moe_experts_pipeline_degree: + with torch.enable_grad(): + hidden_states = alltoall_token_unpermutation1(hidden_states) + save_tensors.append(hidden_states) + else: + hidden_states, unpermute1_input_detach = forward_func(alltoall_token_unpermutation1, hidden_states) + save_tensors.append(unpermute1_input_detach) + save_tensors.append(hidden_states) + unpermute1_input_detach.untyped_storage().resize_(0) + + ep_group = parallel_state.get_expert_model_parallel_group() + if get_args().moe_tp_extend_ep: + ep_group = parallel_state.get_tensor_and_expert_parallel_group() + # Perform expert parallel AlltoAll communication + # hidden_states: [SEQL, H] -> [SEQL, H/TP] + if moe_hierarchical_alltoallv: + tp_group = parallel_state.get_tensor_model_parallel_group() + _, permutated_local_input_tokens, handle = async_all_to_all( + hidden_states, + self.input_splits, + self.output_splits, + tp_group + ) + else: + _, permutated_local_input_tokens, handle = async_all_to_all( + hidden_states, + self.input_splits, + self.output_splits, + ep_group + ) + if handle is not None: + handle.wait() + hidden_states.untyped_storage().resize_(0) + + def alltoall_token_unpermutation2(permutated_local_input_tokens): + # Unpermutation 1: AlltoAll output to output + if get_args().moe_zero_memory != "disable": + output = UnpermuteWithoutActivation.apply( + permutated_local_input_tokens, + self.reversed_local_input_permutation_mapping, + self.probs + ) + else: + if moe_hierarchical_alltoallv: + unpermute_with_ep_input_tensors_list = [permutated_local_input_tokens, + self.reversed_local_input_permutation_mapping, + self.permuted_probs] + output = unpermute_with_ep( + unpermute_with_ep_input_tensors_list, + restore_shape=self.hidden_shape_before_permute, + probs=self.probs, + topk=self.router_topk + ) + else: + output = unpermute( + permutated_local_input_tokens, + self.reversed_local_input_permutation_mapping, + probs=self.probs, + padded_mode=self.drop_and_pad, + restore_shape=self.hiddden_shape_before_permute, + ) + if moe_hierarchical_alltoallv: + return output + # Perform tensor parallel AlltoAll communication + # output: [S*B, H/TP] -> [S*B/TP, H] + if not get_args().moe_tp_extend_ep and parallel_state.get_tensor_model_parallel_world_size() > 1: + output = tensor_parallel.all_to_all_hp2sp(output) + + # Reshape the output tensor + output = output.view(self.hidden_shape) + return output + + output, unpermute2_input_detach = forward_func(alltoall_token_unpermutation2, permutated_local_input_tokens) + save_tensors.append(unpermute2_input_detach) + should_resize = not self.drop_and_pad and not moe_hierarchical_alltoallv and \ + not get_args().use_fused_moe_token_permute_and_unpermute or get_args().moe_zero_memory != "disable" + if should_resize: + unpermute2_input_detach.untyped_storage().resize_(0) + save_tensors.append(output) + + if moe_hierarchical_alltoallv: + ep_group = parallel_state.get_expert_model_parallel_group() + _, output, output_handle = async_reduce_scatter(output, group=ep_group) + output_handle.wait() + output = output.view(self.hidden_shape) + return output, None + + +def allgather_token_permutation_npu(self, hidden_states: torch.Tensor, max_prob: torch.Tensor, max_ind: torch.Tensor): + self.hidden_shape = hidden_states.shape + # [S/TP, B, H] -> [S*B/TP, H] + hidden_states = hidden_states.view(-1, self.hidden_shape[-1]) + + # Permute the tokens across the expert parallel devices. + if (self.config.tensor_model_parallel_size > 1) or ( + self.config.expert_model_parallel_size > 1 + ): + with torch.no_grad(): + global_indices = tensor_parallel.gather_from_sequence_parallel_region_to_moe( + max_ind + ) + # Create a mask of mapping between global and local tokens where each + # element is True if it's between the local_expert_indices + global_local_mask = (global_indices >= self.local_expert_indices[0]) & ( + global_indices <= self.local_expert_indices[-1] + ) + local_indices = global_indices.masked_select(global_local_mask) + + if self.router_topk > 1: # k > 1 + global_probs = tensor_parallel.gather_from_sequence_parallel_region_to_moe(max_prob) + self.local_probs = global_probs.masked_select(global_local_mask) + else: + self.local_probs = max_prob + + # [S*B/TP, H] -> [S*B, H] + global_hidden_states = tensor_parallel.gather_from_sequence_parallel_region_to_moe( + hidden_states, use_global_buffer=True + ) + # Reshape global_local_mask to be compatible with Tensor.gather + global_local_map = global_local_mask.nonzero()[:, 0] + self.global_local_map = global_local_map.view(-1, 1).expand(-1, hidden_states.shape[-1]) + local_hidden_states = moe_gather.apply(global_hidden_states, self.global_local_map) + else: + if self.router_topk > 1: + global_local_mask = torch.ones_like(max_ind).bool() + local_indices = max_ind.masked_select(global_local_mask) + self.local_probs = max_prob.masked_select(global_local_mask) + global_local_map = global_local_mask.nonzero()[:, 0] + self.global_local_map = global_local_map.view(-1, 1).expand( + -1, hidden_states.shape[-1] + ) + local_hidden_states = torch.gather(hidden_states, 0, self.global_local_map) + else: + local_indices = max_ind + self.local_probs = max_prob + local_hidden_states = hidden_states + self.global_local_map = None + + with torch.no_grad(): + # The indices of local_indices that give its sorted order along dim 0. + self.indices = torch.argsort(local_indices, dim=0) + # use 0.7.0 implement for better performance + tokens_per_expert = torch.histc( + local_indices, + bins=self.num_local_experts, + min=self.local_expert_indices[0], + max=self.local_expert_indices[-1], + ) + tokens_per_expert = tokens_per_expert.to(torch.long) + + # Stage2: permute the tokens locally so that they are grouped by their expert assignment + # Reshape indices to be compatible with Tensor.gather + self.indices = self.indices.view(-1, 1).expand(-1, hidden_states.shape[-1]) + if self.num_local_experts > 1: + permuted_local_hidden_states = moe_gather.apply(local_hidden_states, self.indices) + else: + permuted_local_hidden_states = local_hidden_states + return ( + permuted_local_hidden_states, + tokens_per_expert, + ) + + +def alltoall_preprocess_npu(self, indices: torch.Tensor): + # use 0.7.0 implement for better performance + num_local_tokens_per_expert = torch.histc( + indices, bins=self.num_experts, min=0, max=self.num_experts + ) + # num_local_tokens_per_expert: [num_experts] + + ep_size = self.config.expert_model_parallel_size + if self.drop_and_pad: + # probs: [num_experts, capacity] + self.capacity = self.probs.size(1) + num_tokens_per_local_expert = torch.full( + (self.num_local_experts,), self.capacity * self.ep_size, dtype=torch.long, + device=torch.cuda.current_device() + ) + return num_tokens_per_local_expert + elif self.config.moe_expert_capacity_factor is not None: + # Token drop but no pad. + self.num_out_tokens = num_local_tokens_per_expert.sum().to( + torch.device("cpu"), non_blocking=True + ) + self.cuda_sync_point = "before_permutation_1" + elif ep_size > 1: + # Token dropless and enable ep. + self.cuda_sync_point = "before_ep_alltoall" + else: + # Token dropless and no ep. + self.cuda_sync_point = "before_finish" + + if ep_size > 1: + # =================================================== + # Calculate input_splits, output_splits for alltoall-v. + # =================================================== + self.input_splits = ( + num_local_tokens_per_expert.reshape(ep_size, self.num_local_experts) + .sum(axis=1) + .to(torch.device("cpu"), non_blocking=True) + .numpy() + ) + num_global_tokens_per_expert = _gather_along_first_dim_expert_parallel( + num_local_tokens_per_expert + ).reshape(ep_size, self.num_experts) + self.num_global_tokens_per_local_expert = num_global_tokens_per_expert[ + :, self.local_expert_indices[0]: self.local_expert_indices[-1] + 1 + ] + self.output_splits = ( + self.num_global_tokens_per_local_expert.sum(axis=-1).to(torch.device("cpu")).numpy() + ) + num_tokens_per_local_expert = self.num_global_tokens_per_local_expert.sum(axis=0) + # =================================================== + # num_global_tokens_per_expert: [ep_size, num_experts] + # num_global_tokens_per_local_expert: [ep_size, num_local_experts] + # num_tokens_per_local_expert: [num_local_experts] + # =================================================== + else: + self.num_global_tokens_per_local_expert = num_local_tokens_per_expert.reshape( + -1, self.num_experts + ) + num_tokens_per_local_expert = num_local_tokens_per_expert + + if self.num_local_experts > 1: + # No further synchronization is needed because torch.repeat_interleave() calls stream + # synchronization internally when the `output_size` parameter is not provided. + self.cuda_sync_point = "no_sync" + self.global_input_tokens_local_experts_indices = torch.repeat_interleave( + self.expert_ids_per_ep_rank, self.num_global_tokens_per_local_expert.ravel() + ) + + return num_tokens_per_local_expert \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/__init__.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/__init__.py new file mode 100644 index 000000000..20e5a4690 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/__init__.py @@ -0,0 +1 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/grouped_mlp_2d.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/grouped_mlp_2d.py new file mode 100644 index 000000000..c704524a8 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/grouped_mlp_2d.py @@ -0,0 +1,595 @@ +# Copyright (c) 2024; NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +from copy import deepcopy +from functools import partial +from typing import Optional + +import torch +import torch.nn.functional as F +from torch.nn.parameter import Parameter +from torch.cuda.amp import custom_bwd +from torch.cuda.amp import custom_fwd + +from megatron.core import parallel_state +from megatron.core.dist_checkpointing import ShardedTensor +from megatron.core.dist_checkpointing.mapping import ReplicaId +from megatron.core.dist_checkpointing.mapping import ShardedTensorFactory +from megatron.core.jit import jit_fuser +from megatron.core.tensor_parallel.layers import _initialize_affine_weight_gpu +from megatron.core.transformer.module import MegatronModule +from megatron.core.transformer.moe import grouped_gemm_util as gg +from megatron.core.transformer.transformer_config import TransformerConfig +from megatron.core.transformer.utils import make_sharded_object_for_checkpoint +from megatron.core.utils import divide +from megatron.training import get_args +from mindspeed.core.tensor_parallel.comm_group_api import TPXCollectiveComm +from mindspeed.core.tensor_parallel.comm_group_api import TPYCollectiveComm +from mindspeed.core.tensor_parallel.layers import _initialize_affine_weight_cpu_2d +from mindspeed.core.tensor_parallel.comm_utils import _split_along_last_dim +from mindspeed.core.tensor_parallel.comm_utils import sync_reduce_scatter_along_first_dim +from mindspeed.core.tensor_parallel.comm_utils import sync_gather_along_first_dim +from mindspeed.core.tensor_parallel.comm_utils import sync_gather_along_last_dim +from mindspeed.core.fusions.fused_bias_swiglu import fused_swiglu +from mindspeed.ops.gmm import GMMFunction + + +G_FORWARD_PADDING_SIZE = 0 +G_BACKWARD_PADDING_SIZE = 0 + + +class GroupedMLP2D(MegatronModule): + """An efficient implementation of the Experts layer using CUTLASS GroupedGEMM. + + This class is designed to execute multiple experts in parallel, thereby maximizing computational efficiency. + """ + + def __init__(self, num_local_experts: int, config: TransformerConfig): + super().__init__(config=config) + self.config: TransformerConfig = config + self.num_local_experts = num_local_experts + gg.assert_grouped_gemm_is_available() + assert ( + config.add_bias_linear == False + ), "bias in the expert layer is not supported in Grouped GEMM yet, please set '--disable-bias-linear' instead." + + self.expert_parallel = config.expert_model_parallel_size > 1 + if self.config.gated_linear_unit: + if self.config.activation_func not in (F.silu, F.gelu): + raise ValueError("Activation function must be silu or gelu when using GroupedMLP.") + + self.activation_func = fused_swiglu + else: + self.activation_func = self.config.activation_func + + # How many feature each rank holds for fc1 and fc2, respectively. + self.moe_extended_tp = config.moe_extended_tp + + self.config = config + self.num_local_experts = num_local_experts + gg.assert_grouped_gemm_is_available() + assert config.add_bias_linear is False, ( + "bias in the expert layer is not supported in Grouped GEMM yet, " + "please set '--disable-bias-linear' instead." + ) + + self.init_paras() + + def remove_extra_states_check(self, incompatible_keys): + """ + Remove _extra_state from unexpected keys. + These keys are for dist ckpt compatibility with SequentialMLP. + """ + keys = deepcopy(incompatible_keys.unexpected_keys) + for key in keys: + if "_extra_state" in key: + incompatible_keys.unexpected_keys.remove(key) + + self.register_load_state_dict_post_hook(remove_extra_states_check) + + def init_paras(self): + config = self.config + # How many feature each rank holds for fc1. + all_local_expert_fc1_output_size = self.config.ffn_hidden_size * self.num_local_experts + expert_fc1_output_size = self.config.ffn_hidden_size + if config.gated_linear_unit: + # Project to 4h. If using swiglu double the output width, + # see https://arxiv.org/pdf/2002.05202.pdf + all_local_expert_fc1_output_size *= 2 + expert_fc1_output_size *= 2 + + tpx_comm_world_sz = TPXCollectiveComm.get_comm_group_world_size() + tpy_comm_world_sz = TPYCollectiveComm.get_comm_group_world_size() + assert self.config.hidden_size % tpy_comm_world_sz == 0, ( + "fc1 input size should be " "divisible by tp-y" + ) + assert ( + all_local_expert_fc1_output_size % tpx_comm_world_sz == 0 + ), "fc1 output size should be divisible by tp-x" + # h/y + # 2e*dff_h/x + all_local_experts_fc1_output_size_per_partition = divide( + all_local_expert_fc1_output_size, tpx_comm_world_sz + ) + # How many feature each rank holds for fc2. + all_local_experts_fc2_input_size = self.config.ffn_hidden_size * self.num_local_experts + assert ( + all_local_experts_fc2_input_size % tpx_comm_world_sz == 0 + ), "all local expert fc2 output size should be divisible by tp-y" + assert self.config.hidden_size % tpy_comm_world_sz == 0, ( + "fc2 input size should be " "divisible by tp-x" + ) + # e*dff_h/x + all_local_experts_fc2_input_size_per_partition = divide( + all_local_experts_fc2_input_size, tpx_comm_world_sz + ) + # h/y + # Note: The current kernel implementations of grouped_gemm + # does not support transposition with CUTLASS grouped GEMM + # (https://github.com/fanshiqing/grouped_gemm/blob/main/csrc/grouped_gemm.cu#L355-L358) + # and as a result we avoid allocate the transpose of weights. + # Initialize weight. + if config.use_cpu_initialization: + w1s = [] # e1: splited_w1, e2: splited_w1 .. + w2s = [] # e1: splited_w2, e2: splited_w2 .. + master_w1s = [] + master_w2s = [] + for idx in range(self.num_local_experts): + # [h/y, 2*dff_h/x] + w1 = Parameter( + torch.empty( + self.config.hidden_size // tpy_comm_world_sz, + expert_fc1_output_size // tpx_comm_world_sz, + dtype=config.params_dtype, + ) + ) + + master_w1 = _initialize_affine_weight_cpu_2d(w1, 1, return_master_weight=True, config=self.config) + w1s.append(w1) + master_w1s.append(master_w1) + # [dff_h/x, h/y] + w2 = Parameter( + torch.empty( + self.config.ffn_hidden_size // tpx_comm_world_sz, + self.config.hidden_size // tpy_comm_world_sz, + dtype=config.params_dtype, + ) + ) + master_w2 = _initialize_affine_weight_cpu_2d(w2, 0, return_master_weight=True, config=self.config) + w2s.append(w2) + master_w2s.append(master_w2) + + self.master_weight1 = Parameter(torch.cat(master_w1s, dim=-1).contiguous().npu()) + self.master_weight2 = Parameter(torch.cat(master_w2s, dim=0).contiguous().npu()) + # [h/y, e*2*dff_h/x] + self.weight1 = Parameter(torch.cat(w1s, dim=-1).contiguous().npu()) + # [e*dff_h/x, h/y] + self.weight2 = Parameter(torch.cat(w2s, dim=0).contiguous().npu()) + else: + # [h/y, 2e*dff_h/x] + self.weight1 = Parameter( + torch.empty( + divide(self.config.hidden_size, tpy_comm_world_sz), + all_local_experts_fc1_output_size_per_partition, + device=torch.cuda.current_device(), + dtype=config.params_dtype, + ) + ) + # [e*dff_h/x, h/y] + self.weight2 = Parameter( + torch.empty( + all_local_experts_fc2_input_size_per_partition, + divide(self.config.hidden_size, tpy_comm_world_sz), + device=torch.cuda.current_device(), + dtype=config.params_dtype, + ) + ) + if config.perform_initialization: + _initialize_affine_weight_gpu( + self.weight1, + config.init_method, + partition_dim=1, + expert_parallel=self.expert_parallel, + ) + _initialize_affine_weight_gpu( + self.weight2, + config.output_layer_init_method, + partition_dim=0, + expert_parallel=self.expert_parallel, + ) + + setattr(self.weight1, "allreduce", not self.expert_parallel) + setattr(self.weight2, "allreduce", not self.expert_parallel) + + def forward(self, permuted_local_hidden_states, tokens_per_expert): + grouped_mlp_paras = dict() + grouped_mlp_paras['tokens_per_expert'] = tokens_per_expert + grouped_mlp_paras['hidden_size'] = self.config.hidden_size + grouped_mlp_paras['num_local_experts'] = self.num_local_experts + grouped_mlp_paras['gemm_fusion'] = get_args().gemm_gradient_accumulation_fusion + grouped_mlp_paras['tp_y'] = get_args().tp_y + + # [n, h] -> [n1/y, 2e*dff_h/x] + fc1_output = CustomGMM2DFC1.apply(permuted_local_hidden_states, self.weight1, grouped_mlp_paras) + + # [n1/y, 2e*dff_h/x] -> [n1/y, e*dff_h/x] + intermediate_parallel = self.activation_func(fc1_output) + + # [n1/y, e*dff_h/x] -> [n, h] partial-x + fc2_output = CustomGMM2DFC2.apply(intermediate_parallel, self.weight2, grouped_mlp_paras) + + return fc2_output, None + + def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None): + """Maps local expert to global experts.""" + if self.moe_extended_tp: + raise NotImplementedError( + "Currently distributed checkpointing is not supported for moe_extended_tp" + ) + + sharded_state_dict = {} + num_global_experts = ( + parallel_state.get_expert_model_parallel_world_size() * self.num_local_experts + ) + local_expert_indices_offset = ( + parallel_state.get_expert_model_parallel_rank() * self.num_local_experts + ) + tp_size = TPXCollectiveComm.get_comm_group_world_size() + tp_rank = TPXCollectiveComm.get_comm_rank() + + prepend_axis_num = len(sharded_offsets) + replica_id = ( + 0, + 0, + parallel_state.get_data_modulo_expert_parallel_rank(with_context_parallel=True), + ) + + @torch.no_grad() + def sh_ten_build_fn( + key: str, + t: torch.Tensor, + replica_id: ReplicaId, + flattened_range: Optional[slice], + tp_axis: int, + with_glu: bool, + ): + if tp_axis == 0: + real_shape = (self.num_local_experts, self.config.hidden_size // get_args().tp_y, -1) + elif tp_axis == 1: + real_shape = (self.num_local_experts, -1, self.config.hidden_size // get_args().tp_y) + assert with_glu == False + else: + raise ValueError("tp_axis should be 0 or 1.") + if flattened_range is None: + t = t.view(real_shape).transpose(-1, -2) + if with_glu: + local_tensors = torch.chunk(t, 2, -2) + sub_states = [ + ShardedTensor.from_rank_offsets( + key, + local_tensors[0].contiguous(), + *sharded_offsets, + ( + prepend_axis_num, + parallel_state.get_expert_model_parallel_rank(), + parallel_state.get_expert_model_parallel_world_size(), + ), + (prepend_axis_num + 1, tp_rank, tp_size * 2), + replica_id=replica_id, + prepend_axis_num=prepend_axis_num, + ), + ShardedTensor.from_rank_offsets( + key, + local_tensors[1].contiguous(), + *sharded_offsets, + ( + prepend_axis_num, + parallel_state.get_expert_model_parallel_rank(), + parallel_state.get_expert_model_parallel_world_size(), + ), + (prepend_axis_num + 1, tp_size + tp_rank, tp_size * 2), + replica_id=replica_id, + prepend_axis_num=prepend_axis_num, + ), + ] + else: + sub_states = ShardedTensor.from_rank_offsets( + key, + t.contiguous(), + *sharded_offsets, + ( + prepend_axis_num, + parallel_state.get_expert_model_parallel_rank(), + parallel_state.get_expert_model_parallel_world_size(), + ), + (prepend_axis_num + 1 + tp_axis, tp_rank, tp_size), + replica_id=replica_id, + prepend_axis_num=prepend_axis_num, + ) + else: + raise NotImplementedError( + "Currently GroupedMLP does not support distributed checkpointing " + "with the distributed optimizer." + ) + return sub_states + + @torch.no_grad() + def sh_ten_merge_fn(sub_state_dict, tp_axis: int, with_glu: bool): + if tp_axis == 0: + weight_shape = (self.config.hidden_size, -1) + elif tp_axis == 1: + weight_shape = (-1, self.config.hidden_size) + assert with_glu == False + else: + raise ValueError("tp_axis should be 0 or 1.") + if with_glu: + sub_state_dict = torch.cat(sub_state_dict, -2) + return sub_state_dict.transpose(-1, -2).reshape(weight_shape) + + state_dict = self.state_dict(prefix="", keep_vars=True) + # To align with SequentialMLP, the weight tensors are transposed, + # and the tp_axis is also for the transposed tensors + for name, tensor in state_dict.items(): + if name == "weight1": + tp_axis = 0 + with_glu = self.config.gated_linear_unit + wkey = f"{prefix}experts.linear_fc1.weight" + else: + tp_axis = 1 + with_glu = False + wkey = f"{prefix}experts.linear_fc2.weight" + sharded_state_dict[f"{prefix}{name}"] = ShardedTensorFactory( + wkey, + tensor, + partial(sh_ten_build_fn, tp_axis=tp_axis, with_glu=with_glu), + partial(sh_ten_merge_fn, tp_axis=tp_axis, with_glu=with_glu), + replica_id, + ) + + replica_id = ( + 0, + parallel_state.get_tensor_model_parallel_rank(), + parallel_state.get_data_modulo_expert_parallel_rank(with_context_parallel=True), + ) + # Add fake _extra_state to be compatible with SequentialMLP + for expert_local_idx in range(self.num_local_experts): + expert_global_idx = local_expert_indices_offset + expert_local_idx + expert_sharded_offsets = ( + *sharded_offsets, + (len(sharded_offsets), expert_global_idx, num_global_experts), + ) + for mod in ["linear_fc1", "linear_fc2"]: + sharded_state_dict[ + f"{prefix}expert{expert_global_idx}.{mod}._extra_state" + ] = make_sharded_object_for_checkpoint( + None, f"{prefix}experts.{mod}._extra_state", expert_sharded_offsets, replica_id, + ) + + return sharded_state_dict + + +class CustomGMM2DFC1(torch.autograd.Function): + + @staticmethod + @custom_fwd + def forward(ctx, activation_input, weight, grouped_mlp_paras): + # activation_input: [n, h], weight: [h/y, 2e*dff_h/x] + + ctx.grouped_mlp_paras = grouped_mlp_paras + ctx.weight = weight + + num_local_experts = grouped_mlp_paras.get('num_local_experts') + hidden_size = grouped_mlp_paras.get('hidden_size') + tokens_per_expert = grouped_mlp_paras.get('tokens_per_expert') + gemm_fusion = grouped_mlp_paras.get('gemm_fusion') + tp_y = grouped_mlp_paras.get('tp_y') + + # [n, h] -> [n, h/y] + activation_input = _split_along_last_dim(activation_input, TPYCollectiveComm) + ctx.save_for_backward(activation_input) + + # [h/y, 2e*dff_h/x]-> [2e*dff_h/x, h/y] + w1 = weight.transpose(0, -1).contiguous() + # [2e*dff_h/x, h/y] -> [e, 2*dff_h/x, h/y] + w1 = w1.view(num_local_experts, -1, hidden_size // tp_y) + # [e, 2*dff_h/x, h/y] -> [e, h/y, 2*dff_h/x] + w1 = w1.transpose(1, -1).contiguous() + + # [n, h/y] @ [e, h/y, 2*dff_h/x] -> [n, 2e*dff_h/x] partial-y + fc1_output = gg.ops.gmm( + activation_input, + w1, + tokens_per_expert, + trans_b=False, + gemm_fusion=gemm_fusion, + original_weight=weight + ) + + # padding for reduce scatter, [n, 2e*dff_h/x] partial-y -> [n1, 2e*dff_h/x] partial-y + global G_FORWARD_PADDING_SIZE + n_tokens, h = fc1_output.shape + rs_size = TPYCollectiveComm.get_comm_group_world_size() + remaining = n_tokens - n_tokens // rs_size * rs_size + G_FORWARD_PADDING_SIZE = rs_size - remaining if remaining else 0 + if G_FORWARD_PADDING_SIZE != 0: + padding_tensor = torch.zeros( + G_FORWARD_PADDING_SIZE, h, dtype=fc1_output.dtype, device=fc1_output.device + ) + fc1_output = torch.cat((fc1_output, padding_tensor), dim=0) + + # [n1, 2e*dff_h/x] partial-y -> [n1/y, 2e*dff_h/x] + fc1_output = sync_reduce_scatter_along_first_dim(fc1_output, TPYCollectiveComm) + + return fc1_output + + @staticmethod + @custom_bwd + def backward(ctx, grad_output): + # grad_output shape: [n1/y, 2e*dff_h/x] + + # activation_input shape: [n, h/y] + activation_input, = ctx.saved_tensors + grouped_mlp_paras = ctx.grouped_mlp_paras + + # weight shape: [h/y, 2e*dff_h/x] + weight = ctx.weight + + num_local_experts = grouped_mlp_paras.get('num_local_experts') + tokens_per_expert = grouped_mlp_paras.get('tokens_per_expert') + hidden_size = grouped_mlp_paras.get('hidden_size') + gemm_fusion = grouped_mlp_paras.get('gemm_fusion') + tp_y = grouped_mlp_paras.get('tp_y') + + # weight shape: [h/y, 2e*dff_h/x] -> [2e*dff_h/x, h/y] + w1 = weight.t().contiguous() + # [2e*dff_h/x, h/y] -> [e, 2*dff_h/x, h/y] + w1 = w1.view(num_local_experts, -1, hidden_size // tp_y) + + # [n1/y, 2e*dff_h/x] -> [n1, 2e*dff_h/x] + total_grad_output = sync_gather_along_first_dim(grad_output, TPYCollectiveComm) + + # unpadding, [n1, 2e*dff_h/x] -> [n, 2e*dff_h/x] + global G_BACKWARD_PADDING_SIZE + if G_BACKWARD_PADDING_SIZE != 0: + real_input_num = total_grad_output.shape[0] - G_BACKWARD_PADDING_SIZE + total_grad_output = total_grad_output[:real_input_num, :] + + # [n, 2e*dff_h/x] @ [e, 2*dff_h/x, h/y] = [n, h/y] partial-x + grad_gmm_output = gg.ops.gmm( + total_grad_output, + w1, + tokens_per_expert, + trans_b=False, + gemm_fusion=gemm_fusion, + ) + + group_list = torch.cumsum(tokens_per_expert, dim=0) + # [h/y, n] @ [n, 2e*dff_h/x] = [e, h/y, 2*dff_h/x] + grad_weight_output = GMMFunction.builder.load().npu_gmm( + [activation_input.t()], + [total_grad_output], + [], + group_list, + 2, + 0)[0] + + # [e, h/y, 2*dff_h/x] -> [e, 2*dff_h/x, h/y] + grad_weight_output = grad_weight_output.transpose(1, -1).contiguous() + + # [e, 2*dff_h/x, h/y] -> [2e*dff_h/x, h/y] + grad_weight_output = grad_weight_output.view(-1, grad_weight_output.shape[-1]) + # [2e*dff_h/x, h/y] -> [h/y, 2e*dff_h/x] + grad_weight_output = grad_weight_output.transpose(0, 1).contiguous() + + # [n, h/y] partial-x -> [n, h] partial-x + grad_gmm_output = sync_gather_along_last_dim(grad_gmm_output, TPYCollectiveComm) + + return grad_gmm_output, grad_weight_output, None + + +class CustomGMM2DFC2(torch.autograd.Function): + + @staticmethod + @custom_fwd + def forward(ctx, activation_input, weight, grouped_mlp_paras): + # activation_input shape: [n1/y, e*dff_h/x], weight shape: [e*dff_h/x, h/y] + + ctx.grouped_mlp_paras = grouped_mlp_paras + ctx.weight = weight + + num_local_experts = grouped_mlp_paras.get('num_local_experts') + hidden_size = grouped_mlp_paras.get('hidden_size') + tokens_per_expert = grouped_mlp_paras.get('tokens_per_expert') + gemm_fusion = grouped_mlp_paras.get('gemm_fusion') + tp_y = grouped_mlp_paras.get('tp_y') + + # [e*dff_h/x, h/y] -> [e, dff_h/x, h/y] + w2 = weight.view(num_local_experts, -1, hidden_size // tp_y) + + # [n1/y, e*dff_h/x] -> [n1, e*dff_h/x] + total_input = sync_gather_along_first_dim(activation_input, TPYCollectiveComm) + + # unpadding, [n1, e*dff_h/x] -> [n, e*dff_h/x] + global G_FORWARD_PADDING_SIZE + if G_FORWARD_PADDING_SIZE != 0: + real_input_num = total_input.shape[0] - G_FORWARD_PADDING_SIZE + total_input = total_input[:real_input_num, :] + + ctx.save_for_backward(total_input) + + # [n, e*dff_h/x] @ [e, dff_h/x, h/y] -> [n, h/y] partial-x + fc2_output = gg.ops.gmm( + total_input, + w2, + tokens_per_expert, + trans_b=False, + gemm_fusion=gemm_fusion, + original_weight=weight + ) + + # [n, h/y] partial-x -> [n, h] partial-x + fc2_output = sync_gather_along_last_dim(fc2_output, TPYCollectiveComm) + + return fc2_output + + @staticmethod + @custom_bwd + def backward(ctx, grad_output): + # grad_output shape: [n, h] + + # activation_input shape: [n, e*dff_h/x] + activation_input, = ctx.saved_tensors + grouped_mlp_paras = ctx.grouped_mlp_paras + + # weight 2 shape: [e*dff_h/x, h/y] + weight = ctx.weight + + num_local_experts = grouped_mlp_paras.get('num_local_experts') + tokens_per_expert = grouped_mlp_paras.get('tokens_per_expert') + hidden_size = grouped_mlp_paras.get('hidden_size') + gemm_fusion = grouped_mlp_paras.get('gemm_fusion') + tp_y = grouped_mlp_paras.get('tp_y') + + # weight shape: [e*dff_h/x, h/y] -> [e, dff_h/x, h/y] + w2 = weight.view(num_local_experts, -1, hidden_size // tp_y) + # [e, dff_h/x, h/y] -> [e, h/y, dff_h/x] + w2 = w2.transpose(1, -1).contiguous() + + # [n, h] -> [n, h/y] + grad_output = _split_along_last_dim(grad_output, TPYCollectiveComm) + + # [n, h/y] @ [e, h/y, dff_h/x] = [n, e*dff_h/x] partial-y + partial_grad_gmm_output = gg.ops.gmm( + grad_output, + w2, + tokens_per_expert, + trans_b=False, + gemm_fusion=gemm_fusion, + ) + + # padding for reduce scatter, [n, e*dff_h/x] -> [n1, e*dff_h/x] + global G_BACKWARD_PADDING_SIZE + n_tokens, h = partial_grad_gmm_output.shape + rs_size = TPYCollectiveComm.get_comm_group_world_size() + remaining = n_tokens - n_tokens // rs_size * rs_size + G_BACKWARD_PADDING_SIZE = rs_size - remaining if remaining else 0 + if G_BACKWARD_PADDING_SIZE != 0: + padding_tensor = torch.zeros(G_BACKWARD_PADDING_SIZE, h, dtype=partial_grad_gmm_output.dtype, + device=partial_grad_gmm_output.device) + partial_grad_gmm_output = torch.cat((partial_grad_gmm_output, padding_tensor), dim=0) + + # [n1, e*dff_h/x] -> [n1/y, e*dff_h/x] + grad_gmm_output = sync_reduce_scatter_along_first_dim(partial_grad_gmm_output, TPYCollectiveComm) + + group_list = torch.cumsum(tokens_per_expert, dim=0) + # [e*dff_h/x, n] @ [n, h/y] = [e, dff_h/x, h/y] + grad_weight_output = GMMFunction.builder.load().npu_gmm( + [activation_input.t()], + [grad_output], + [], + group_list, + 2, + 0)[0] + # [e, dff_h/x, h/y] -> [e*dff_h/x, h/y] + grad_weight_output = grad_weight_output.view(-1, grad_weight_output.shape[-1]) + + return grad_gmm_output, grad_weight_output, None diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/moe_allgather_token_dispatcher_2d.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/moe_allgather_token_dispatcher_2d.py new file mode 100644 index 000000000..564a63dc2 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/moe_allgather_token_dispatcher_2d.py @@ -0,0 +1,279 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +from typing import List + +import torch + +from megatron.core import parallel_state +from megatron.core.transformer.moe.moe_utils import moe_gather +from megatron.core.transformer.moe.moe_utils import moe_scatter +from megatron.core.transformer.moe.token_dispatcher import MoETokenDispatcher +from megatron.core.transformer.transformer_config import TransformerConfig +from mindspeed.core.tensor_parallel.comm_autograd_function import \ + auto_grad_reduce_scatter_along_first_dim +from mindspeed.core.tensor_parallel.comm_autograd_function import \ + auto_grad_sync_gather_along_first_dim_rs +from mindspeed.core.tensor_parallel.comm_group_api import TPXCollectiveComm +from mindspeed.core.tensor_parallel.comm_group_api import TPXEPCollectiveComm +from mindspeed.core.transformer.moe.token_dispatcher import NewIndePut +from mindspeed.core.transformer.moe.token_dispatcher import cann_version_check + + +class MoEAllGatherTokenDispatcher2D(MoETokenDispatcher): + """ + AllGather Based Token dispatcher. + """ + + def __init__( + self, num_local_experts: int, local_expert_indices: List[int], config: TransformerConfig, + ) -> None: + """ + Initialize the zero token dropping router. + """ + super().__init__(config=config) + self.num_local_experts = num_local_experts + self.num_experts = config.num_moe_experts + assert self.num_local_experts > 0, "Expected at least one expert" + self.local_expert_indices = local_expert_indices + assert len(self.local_expert_indices) > 0, "Expected at least one local expert index" + self.router_topk = config.moe_router_topk + self.add_bias = config.add_bias_linear + + # self.local_probs: probs of global token assignment to local experts. + self.local_probs = None + + # self.indices: The indices of `local_indices` + self.indices = None + + # self.global_local_map: 2D tensor + self.global_local_map = None + + def token_permutation( + self, hidden_states: torch.Tensor, topk_probs: torch.Tensor, topk_indices: torch.Tensor + ): + """Dispatch tokens to local experts. It's composed of two stages: + (1) Permute the tokens across the expert parallel devices. After this stage, + each device receives all the tokens assigned to its local set of experts + in its local HBM. + (2) Permute the tokens locally so that they are grouped by their expert + assignment. + After the stage (1), the tokens are grouped by which device + they came from. We re-order them locally for subsequent efficient computation. + + Args: + hidden_states: input tokens of shape [s/(cp*x), b, h] + topk_probs: probs of local token assignment to global experts + with shape: [sb/(cp*x), topK] + topk_indices: token assignment to local experts with shape: [sb/(cp*x), topK] + + Returns: + permuted_local_hidden_states: Permutation of tokens to local experts group. + tokens_per_expert: the number of tokens each local expert to process. + """ + + self.hidden_shape = hidden_states.shape + # [S/TP, B, H] -> [S*B/(cp*x), H] + hidden_states = hidden_states.view(-1, self.hidden_shape[-1]) + + # Permute the tokens across the expert parallel devices. + if TPXCollectiveComm.get_comm_group_world_size() > 1 or self.config.expert_model_parallel_size > 1: + # [S*B/(cp*x), H] -> [S*B, H] + with torch.no_grad(): + # [sb/x, topk] -> [sb*ep, topK] + global_indices = auto_grad_sync_gather_along_first_dim_rs(topk_indices, TPXEPCollectiveComm) + + # [sb/x, topk] -> [sb*ep, topK] + global_probs = auto_grad_sync_gather_along_first_dim_rs(topk_probs, TPXEPCollectiveComm) + # [S/x, b, h] -> [sb*ep, h] + global_hidden_states = auto_grad_sync_gather_along_first_dim_rs(hidden_states, TPXEPCollectiveComm) + + with torch.no_grad(): + global_local_mask = (global_indices >= self.local_expert_indices[0]) & ( + global_indices <= self.local_expert_indices[-1]) + local_indices = global_indices.masked_select(global_local_mask) + self.indices = torch.argsort(local_indices.float(), dim=0) + num_global_experts = self.num_local_experts * parallel_state.get_expert_model_parallel_world_size() + + all_tokens_per_expert = torch.histc(global_indices, bins=num_global_experts, min=0, + max=num_global_experts - 1, ) + self.all_tokens_per_expert = all_tokens_per_expert.to(torch.long) + tokens_per_expert = self.all_tokens_per_expert[ + self.local_expert_indices[0]: self.local_expert_indices[-1] + 1] + self.global_local_map = global_local_mask.nonzero()[:, 0] + + if self.router_topk > 1: + self.local_probs = global_probs.masked_select(global_local_mask) + else: + self.local_probs = topk_probs + + if cann_version_check: + local_hidden_states = global_hidden_states[self.global_local_map, :] + else: + self.global_local_map = (self.global_local_map.view(-1, 1).expand(-1, hidden_states.shape[-1])) + local_hidden_states = moe_gather.apply(global_hidden_states, self.global_local_map) + else: + if self.router_topk > 1: + global_local_mask = torch.ones_like(topk_indices).bool() + local_indices = topk_indices.masked_select(global_local_mask) + self.local_probs = topk_probs.masked_select(global_local_mask) + self.global_local_map = global_local_mask.nonzero()[:, 0] + if cann_version_check: + local_hidden_states = hidden_states[self.global_local_map, :] + else: + self.global_local_map = self.global_local_map.view(-1, 1).expand(-1, hidden_states.shape[-1]) + local_hidden_states = torch.gather(hidden_states, 0, self.global_local_map) + else: + local_indices = topk_indices + self.local_probs = topk_probs + local_hidden_states = hidden_states + self.global_local_map = None + + with torch.no_grad(): + # The indices of local_indices that give its sorted order along dim 0. + self.indices = torch.argsort(local_indices, dim=0) + # use 0.7.0 implement for better performance + tokens_per_expert = torch.histc(local_indices, bins=self.num_local_experts, + min=self.local_expert_indices[0], max=self.local_expert_indices[-1], ) + tokens_per_expert = tokens_per_expert.to(torch.long) + self.all_tokens_per_expert = tokens_per_expert + + if self.num_local_experts > 1: + if cann_version_check: + permuted_local_hidden_states = local_hidden_states[self.indices, :] + else: + self.indices = self.indices.view(-1, 1).expand(-1, hidden_states.shape[-1]) + permuted_local_hidden_states = moe_gather.apply(local_hidden_states, self.indices) + else: + permuted_local_hidden_states = local_hidden_states + + return permuted_local_hidden_states, tokens_per_expert + + + def token_unpermutation( + self, + hidden_states: torch.Tensor, + bias: torch.Tensor = None, + ): + """ + Reverse process of `dispatch()` which permutes the output of local + experts locally and across expert parallel rank into the original order to + produce the final output. + + Args: + hidden_states: 2D tensor of shape [sum_tokens_of_all_local_experts, HiddenSize], + output of local experts. + bias (optional): The bias tensor. + + Returns: + output_total: un-permuted updated hidden states output from all local experts + with shape of [SeqLen/TP, MBS, HiddenSize] + """ + # Stage1: unpermute the tokens and bias locally respectively. + scores = self.local_probs.to(dtype=hidden_states.dtype) + if self.num_local_experts > 1: + if cann_version_check: + unpermuted_local_hidden = torch.zeros_like(hidden_states) + unpermuted_local_hidden.index_put_((self.indices,), + hidden_states[:self.indices.shape[0], :], + accumulate=False) + else: + assert self.indices.shape == hidden_states.shape + unpermuted_local_hidden = moe_scatter.apply(hidden_states, self.indices) + else: + unpermuted_local_hidden = hidden_states + + # Scale the expert output prior to reduction and subsequent to local unpermutation if k > 1. + if self.router_topk > 1: + unpermuted_local_hidden = unpermuted_local_hidden * scores.view(-1, 1) + + unpermuted_local_bias = None + if self.add_bias: + assert bias is not None + unpermuted_local_bias = torch.zeros_like(hidden_states) + if cann_version_check: + unpermuted_local_bias.index_put_((self.indices,), bias[:self.indices.shape[0], :], + accumulate=False) + else: + assert self.indices.shape == bias.shape + unpermuted_local_bias = unpermuted_local_bias.scatter(0, self.indices, bias) + if self.router_topk > 1: + unpermuted_local_bias = unpermuted_local_bias * scores.view(-1, 1) + + output_total = unpermuted_local_hidden + output_bias_total = unpermuted_local_bias + + # Unpermute the tokens across expert parallel devices. + if TPXCollectiveComm.get_comm_group_world_size() > 1 or self.config.expert_model_parallel_size > 1: + assert (self.global_local_map is not None), \ + "global_local_map is necessary for `AllGather`." + ep_group_size = TPXEPCollectiveComm.get_comm_group_world_size() + # hidden_shape: [SeqLen/TP, MBS, HiddenSize], glboal_num_tokens = SeqLen/TP*MBS*(TP*EP) + global_num_tokens = self.hidden_shape[0] * self.hidden_shape[1] * ep_group_size + global_hidden_shape = [global_num_tokens, hidden_states.shape[-1]] + if cann_version_check: + unpermuted_global_hidden = torch.zeros(global_hidden_shape, dtype=torch.float, + device=torch.cuda.current_device()) + unpermuted_global_hidden = NewIndePut.apply(unpermuted_global_hidden, + (self.global_local_map,), + unpermuted_local_hidden[ + :self.global_local_map.shape[0], :]) + else: + assert self.global_local_map.shape == unpermuted_local_hidden.shape + unpermuted_global_hidden = moe_scatter.apply(unpermuted_local_hidden, + self.global_local_map, global_hidden_shape) + + output_total = auto_grad_reduce_scatter_along_first_dim(unpermuted_global_hidden, TPXEPCollectiveComm) + if self.add_bias: + # Unpermute the bias across expert parallel devices. + unpermuted_global_bias = torch.zeros_like(unpermuted_global_hidden) + if cann_version_check: + unpermuted_global_bias.index_put_((self.global_local_map,), + unpermuted_local_bias[ + :self.global_local_map.shape[0], :], + accumulate=True) + else: + unpermuted_global_bias = unpermuted_global_bias.scatter_add(0, + self.global_local_map, unpermuted_local_bias) + + output_bias_total = auto_grad_reduce_scatter_along_first_dim(unpermuted_global_bias, + TPXEPCollectiveComm) + # bias is duplicated across tensor parallelism ranks; + # reduce scatter reduces bias across tensor parallel_ranks + output_bias_total = (output_bias_total / + TPXCollectiveComm.get_comm_group_world_size()) + else: + if self.router_topk > 1: + global_num_tokens = self.hidden_shape[0] * self.hidden_shape[1] + global_hidden_shape = [global_num_tokens, hidden_states.shape[-1]] + unpermuted_global_hidden = torch.zeros(global_hidden_shape, + dtype=hidden_states.dtype, device=torch.cuda.current_device(), ) + if cann_version_check: + output_total = unpermuted_global_hidden.index_put((self.global_local_map,), + unpermuted_local_hidden[ + :self.global_local_map.shape[ + 0], :], accumulate=True) + else: + output_total = unpermuted_global_hidden.scatter_add(0, self.global_local_map, + unpermuted_local_hidden) + if self.add_bias: + unpermuted_global_bias = torch.zeros_like(unpermuted_global_hidden) + if cann_version_check: + output_bias_total = unpermuted_global_bias.index_put( + (self.global_local_map,), + unpermuted_local_bias[:self.global_local_map.shape[0], :], + accumulate=True) + else: + output_bias_total = unpermuted_global_bias.scatter_add(0, + self.global_local_map, unpermuted_local_bias) + + if self.router_topk == 1: + output_total = output_total * scores + output_total = output_total.view(self.hidden_shape) + if self.add_bias: + assert output_bias_total is not None + if self.router_topk == 1: + output_bias_total = output_bias_total * scores + output_bias_total = output_bias_total.view(self.hidden_shape) + else: + output_bias_total = None + + return output_total, output_bias_total diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/moe_layer_2d.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/moe_layer_2d.py new file mode 100644 index 000000000..68bed3223 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/moe_layer_2d.py @@ -0,0 +1,62 @@ +# Copyright (c) 2023; NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import torch + +from megatron.core.transformer.mlp import MLPSubmodules +from megatron.core.transformer.moe.moe_layer import BaseMoELayer +from megatron.core.transformer.transformer_config import TransformerConfig +from mindspeed.core.tensor_parallel.comm_autograd_function import auto_grad_scatter_along_last_dim +from mindspeed.core.tensor_parallel.comm_autograd_function import \ + auto_grad_sync_gather_along_last_dim +from mindspeed.core.tensor_parallel.comm_group_api import TPYCollectiveComm +from mindspeed.core.transformer.moe.tp_2d.grouped_mlp_2d import GroupedMLP2D +from mindspeed.core.transformer.moe.tp_2d.moe_allgather_token_dispatcher_2d import \ + MoEAllGatherTokenDispatcher2D +from mindspeed.core.transformer.moe.tp_2d.sequential_mlp_2d import SequentialMLP2D +from mindspeed.core.transformer.moe.tp_2d.topk_router_2d import TopKRouter2D + + +class MoELayer2D(BaseMoELayer): + """2D Mixture of experts Layer **currently only supports allgather gmm**. + + """ + + def __init__( + self, config: TransformerConfig, submodules: MLPSubmodules = None, layer_number: int = None + ): + super(MoELayer2D, self).__init__(config=config, layer_number=layer_number) + self.submodules = submodules + self.router = TopKRouter2D(config=self.config) + if self.config.moe_grouped_gemm: + self.experts = GroupedMLP2D(self.num_local_experts, self.config) + else: + assert isinstance(self.submodules, MLPSubmodules) + self.experts = SequentialMLP2D(self.num_local_experts, self.config, self.submodules) + if config.moe_token_dispatcher_type == "allgather": + self.token_dispatcher = MoEAllGatherTokenDispatcher2D( + self.num_local_experts, self.local_expert_indices, config=self.config + ) + else: + raise ValueError( + f"Unsupported token dispatcher type: {config.moe_token_dispatcher_type}" + ) + self.moe_layer_recompute = config.moe_layer_recompute + + def forward(self, hidden_states: torch.Tensor): + # [s/x, b, h/y] -> [s/x, b, h] + hidden_states = auto_grad_sync_gather_along_last_dim(hidden_states, TPYCollectiveComm) + + # [sb/x, h] => [sb/x, topK], [sb/x, topK] + topk_probs, topk_indices = self.router(hidden_states) + + (dispatched_input, tokens_per_expert) = self.token_dispatcher.token_permutation(hidden_states, topk_probs, + topk_indices) + expert_output, bias = self.experts(dispatched_input, tokens_per_expert) + output, mlp_bias = self.token_dispatcher.token_unpermutation(expert_output, bias) + + # [s/x, b, h] -> [s/x, b, h/y] + output = auto_grad_scatter_along_last_dim(output, TPYCollectiveComm) + if mlp_bias: + mlp_bias = auto_grad_scatter_along_last_dim(mlp_bias, TPYCollectiveComm) + + return output, mlp_bias diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/sequential_mlp_2d.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/sequential_mlp_2d.py new file mode 100644 index 000000000..b76d1fa64 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/sequential_mlp_2d.py @@ -0,0 +1,92 @@ +# Copyright (c) 2024; NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import torch +from megatron.core import parallel_state +from megatron.core.dist_checkpointing.utils import replace_prefix_for_sharding +from megatron.core.transformer.module import MegatronModule +from megatron.core.transformer.mlp import MLP, MLPSubmodules +from megatron.core.transformer.transformer_config import TransformerConfig + + +class SequentialMLP2D(MegatronModule): + """An implementation of the Experts layer using a sequence of MLP layers. + This class executes each expert sequentially. + """ + + def __init__(self, num_local_experts, config: TransformerConfig, submodules: MLPSubmodules): + super().__init__(config=config) + self.add_bias = config.add_bias_linear + self.moe_extended_tp = config.moe_extended_tp + self.num_local_experts = num_local_experts + self.local_experts = torch.nn.ModuleList() + for _ in range(self.num_local_experts): + expert = MLP(self.config, submodules, is_expert=True) + self.local_experts.append(expert) + + def forward(self, permuted_local_hidden_states, tokens_per_expert): + + output_local = torch.zeros_like(permuted_local_hidden_states) + output_bias_local = None + if self.add_bias: + output_bias_local = torch.zeros_like(permuted_local_hidden_states) + + cumsum_num_tokens = torch.cumsum(tokens_per_expert, dim=0) + # Insert zero at the begining for offset index's convenience + zero_tensor = torch.zeros(1, dtype=torch.long, device=cumsum_num_tokens.device) + cumsum_num_tokens = torch.cat((zero_tensor, cumsum_num_tokens)) + for expert_num, expert in enumerate(self.local_experts): + start = cumsum_num_tokens[expert_num] + end = cumsum_num_tokens[expert_num + 1] + hidden = permuted_local_hidden_states[start:end] + output, output_bias = expert(hidden) + + output_local[start:end] = output + if self.add_bias: + output_bias = output_bias.expand_as(output) + output_bias_local[start:end, :] = output_bias + return output_local, output_bias_local + + def sharded_state_dict(self, prefix='', sharded_offsets=(), metadata=None): + """Maps local expert to global experts.""" + if self.moe_extended_tp: + raise NotImplementedError( + 'Currently distributed checkpointing is not supported for moe_extended_tp' + ) + + sharded_state_dict = {} + num_global_experts = ( + parallel_state.get_expert_model_parallel_world_size() * self.num_local_experts + ) + local_expert_indices_offset = ( + parallel_state.get_expert_model_parallel_rank() * self.num_local_experts + ) + + expert_sharded_prefix = f'{prefix}experts.' + for expert_local_idx, expert in enumerate(self.local_experts): + expert_global_idx = local_expert_indices_offset + expert_local_idx + expert_state_dict_prefix = f'{prefix}local_experts.{expert_local_idx}.' + expert_sharded_offsets = ( + *sharded_offsets, + (len(sharded_offsets), expert_global_idx, num_global_experts), + ) + + expert_state_dict = expert.sharded_state_dict( + expert_state_dict_prefix, expert_sharded_offsets, metadata + ) + # Remove expert layers indexing from sharded keys + replace_prefix_for_sharding( + expert_state_dict, expert_state_dict_prefix, expert_sharded_prefix + ) + # Adjust replica ids - replication along DP modulo EP + for k, sh_ten in expert_state_dict.items(): + replica_id = sh_ten.replica_id + assert ( + len(replica_id) == 3 + ), f'Expected replica_id for {k} to be in (PP, TP, DP) format, got: {replica_id}' + sh_ten.replica_id = ( + *replica_id[:2], + parallel_state.get_data_modulo_expert_parallel_rank(with_context_parallel=True), + ) + + sharded_state_dict.update(expert_state_dict) + return sharded_state_dict diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/topk_router_2d.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/topk_router_2d.py new file mode 100644 index 000000000..5dea94933 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/tp_2d/topk_router_2d.py @@ -0,0 +1,81 @@ +# Copyright (c) 2023; NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import torch + +from megatron.core.transformer import TransformerConfig +from megatron.core.transformer.moe.moe_utils import save_to_aux_losses_tracker +from megatron.core.transformer.moe.moe_utils import switch_load_balancing_loss_func +from megatron.core.transformer.moe.moe_utils import z_loss_func +from megatron.core.transformer.moe.router import TopKRouter +from mindspeed.core.tensor_parallel_x_union_cp import TensorParallelXUnionCP +from mindspeed.core.tensor_parallel_y_union_cp import TensorParallelYUnionCP +from mindspeed.moe.utils import MoEAuxLossAutoScaler + + +class TopKRouter2D(TopKRouter): + """Route each token to the top-k experts for 2-D tensor parallel.""" + + def __init__(self, config: TransformerConfig) -> None: + super().__init__(config) + setattr(self.weight, 'sequence_parallel', False) + setattr(self.weight, "2d_tp", True) + + def apply_load_balancing_loss(self, probs: torch.Tensor, + num_local_tokens_per_expert: torch.Tensor, + activation: torch.Tensor): + """Applies auxiliary loss to the MoE layer. + + Args: + probs (torch.Tensor): The probs output by the router for each token, with shape: + [sb / (x * cp), E] + num_local_tokens_per_expert (torch.Tensor): The number of tokens per expert + with shape: [E] + activation, ie top_k_probs (torch.Tensor): The activation tensor to attach the gradient function to. + with shape: [sb/(x*cp), topK] + + Returns: + torch.Tensor: The activation tensor with the attached gradient function with + shape: [sb/(x*cp), topK] + """ + moe_aux_loss_coeff = self.config.moe_aux_loss_coeff + sequence_partition_group = None + if self.config.moe_token_dispatcher_type == "allgather": + tp_x_cp_group = TensorParallelXUnionCP() + sequence_partition_group = tp_x_cp_group.group + elif self.config.moe_token_dispatcher_type == "alltoall": + tp_y_cp_group = TensorParallelYUnionCP() + sequence_partition_group = tp_y_cp_group.group + moe_aux_loss_coeff /= tp_y_cp_group.get_parallel_group_world_size() + + aux_loss = switch_load_balancing_loss_func(probs, num_local_tokens_per_expert, self.topk, + moe_aux_loss_coeff, sequence_partition_group=sequence_partition_group) + save_to_aux_losses_tracker("load_balancing_loss", aux_loss / moe_aux_loss_coeff, + self.layer_number, self.config.num_layers, reduce_group=sequence_partition_group) + + activation = MoEAuxLossAutoScaler.apply(activation, aux_loss) + return activation + + def apply_z_loss(self, logits): + """Encourages the router's logits to remain small to enhance stability. + Please refer to the ST-MoE paper (https://arxiv.org/pdf/2202.08906.pdf) for details. + + Args: + logits (torch.Tensor): The logits of the router. + + Returns: + torch.Tensor: The logits after applying the z-loss. + """ + if self.config.moe_z_loss_coeff is not None and self.training: + moe_z_loss_coeff = ( + self.config.moe_z_loss_coeff + / TensorParallelXUnionCP().get_parallel_group_world_size() + ) + z_loss = z_loss_func(logits, moe_z_loss_coeff) + logits = MoEAuxLossAutoScaler.apply(logits, z_loss) + save_to_aux_losses_tracker( + "z_loss", + z_loss / moe_z_loss_coeff, + self.layer_number, + self.config.num_layers, + ) + return logits diff --git a/model/train/yoco_moe/mindspeed/core/transformer/moe/unpermute_without_activation.py b/model/train/yoco_moe/mindspeed/core/transformer/moe/unpermute_without_activation.py new file mode 100644 index 000000000..9826ae279 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/moe/unpermute_without_activation.py @@ -0,0 +1,135 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + +import torch +import torch.distributed +import torch.distributed as dist +import torch_npu +from megatron.training import get_args +from mindspeed.core.transformer.moe.moe_utils import (set_swap_status, get_swap_status, + set_prob_backward_need_tensors, get_swap_stream) + + +class UnpermuteWithoutActivation(torch.autograd.Function): + @staticmethod + def forward(ctx, + permuted_tokens: torch.Tensor, + sorted_indices: torch.Tensor, + probs: torch.Tensor = None, + padded_mode: bool = False, + restore_shape: torch.Size = None, + ): + """Unpermute a tensor of permuted tokens based on sorted indices, and optionally merge the tokens with their corresponding probabilities. + + Args: + permuted_tokens (torch.Tensor): The tensor of permuted tokens to be unpermuted. + sorted_indices (torch.Tensor): The tensor of sorted indices used to unpermute the tokens. + probs (torch.Tensor, optional): The tensor of probabilities corresponding to the permuted tokens. If provided, the unpermuted tokens will be merged with their respective probabilities. + padded_mode (bool, optional): If True, indicating the indices are padded to [num_expert, capacity] to denote selected tokens per expert. Defaults to False. + restore_shape (torch.Size, optional): The input shape before permutation, only used in padding mode. Defaults to None. + + Returns: + torch.Tensor: The unpermuted tokens, optionally merged with probabilities. + """ + moe_hierarchical_alltoallv = get_args().moe_hierarchical_alltoallv + if padded_mode: + raise ValueError("moe-zero-memory temporally does not support padded mode") + + if sorted_indices.numel() != permuted_tokens.size(0): + raise AssertionError("") + saved_tensors = [sorted_indices] + + with torch.no_grad(): + if probs is not None: + # Unpermute and merge the tokens with their probabilities + num_unpermuted_tokens = probs.numel() + saved_tensors.append(probs) + ctx.topk = probs.size(1) + ctx.probs_shape = probs.shape + ctx.probs_dtype = probs.dtype + else: + # Unpermute the tokens without merge + num_unpermuted_tokens = permuted_tokens.size(0) + ctx.topk = 1 + ctx.save_for_backward(*saved_tensors) + if moe_hierarchical_alltoallv: + unpermuted_tokens = torch.zeros( + [ctx.topk * probs.shape[0], permuted_tokens.shape[-1]], + dtype=permuted_tokens.dtype, + device=permuted_tokens.device, + ) + unpermuted_tokens = \ + unpermuted_tokens.scatter(0, sorted_indices.unsqueeze(1).expand(-1, permuted_tokens.shape[1]), + permuted_tokens) + elif not get_args().use_fused_moe_token_permute_and_unpermute: + unpermuted_tokens = torch.zeros( + [num_unpermuted_tokens, permuted_tokens.shape[-1]], + dtype=permuted_tokens.dtype, + device=permuted_tokens.device, + ) + unpermuted_tokens.index_copy_(0, sorted_indices, permuted_tokens) + else: + unpermuted_tokens = permuted_tokens.index_select(0, sorted_indices) + + ctx.permuted_tokens_shape = permuted_tokens.shape + ctx.unpermuted_tokens_shape = unpermuted_tokens.shape + unpermuted_tokens = unpermuted_tokens.reshape(-1, ctx.topk, permuted_tokens.size(-1)) + permuted_tokens.untyped_storage().resize_(0) + + if probs is not None: + tensor_to_swap = unpermuted_tokens + unpermuted_tokens = unpermuted_tokens * probs.unsqueeze(-1) + swap_stream, last_tensor = get_swap_status() + if last_tensor is not None: + torch.npu.current_stream().wait_stream(swap_stream) + last_tensor.untyped_storage().resize_(0) + forward_event = torch.npu.Event() + forward_event.record() + set_swap_status(tensor_to_swap) + ctx.tensor_cpu = torch.empty(tensor_to_swap.shape, dtype=tensor_to_swap.dtype, pin_memory=True, device='cpu') + with torch_npu.npu.stream(swap_stream): + swap_stream.wait_event(forward_event) + ctx.tensor_cpu.untyped_storage().copy_(tensor_to_swap.untyped_storage(), non_blocking=True) + ctx.swap_event = torch.npu.Event() + ctx.swap_event.record() + + ctx.matmul_output_shape = unpermuted_tokens.shape + unpermuted_tokens = unpermuted_tokens.sum(dim=1) + + return unpermuted_tokens + + @staticmethod + def backward(ctx, *args): + moe_hierarchical_alltoallv = get_args().moe_hierarchical_alltoallv + if ctx.topk > 1: + (indices, probs) = ctx.saved_tensors + else: + (indices,) = ctx.saved_tensors + ctx.save_for_backward() + + if ctx.topk > 1: + matmul_output_grad = args[0].unsqueeze(dim=1).expand(ctx.matmul_output_shape) + backward_event1 = torch.npu.Event() + backward_event1.record() + swap_stream = get_swap_stream() + unpermuted_tokens = torch.empty(ctx.tensor_cpu.shape, dtype=ctx.tensor_cpu.dtype, device=torch.npu.current_device()) + with torch_npu.npu.stream(swap_stream): + swap_stream.wait_event(backward_event1) + swap_stream.wait_event(ctx.swap_event) + unpermuted_tokens.untyped_storage().copy_(ctx.tensor_cpu.untyped_storage(), non_blocking=True) + ctx.tensor_cpu = None + unpermuted_tokens_grad = matmul_output_grad * probs.unsqueeze(-1) + set_prob_backward_need_tensors(matmul_output_grad, unpermuted_tokens) + else: + unpermuted_tokens_grad = args[0] + + unpermuted_tokens_grad = unpermuted_tokens_grad.reshape(ctx.unpermuted_tokens_shape) + + if not get_args().use_fused_moe_token_permute_and_unpermute or moe_hierarchical_alltoallv: + permuted_tokens_grad = unpermuted_tokens_grad.index_select(0, indices) + else: + permuted_tokens_grad = torch.zeros(size=ctx.permuted_tokens_shape, dtype=unpermuted_tokens_grad.dtype, + device=torch.npu.current_device()) + permuted_tokens_grad.index_add_(0, indices, unpermuted_tokens_grad) + + return permuted_tokens_grad, None, None, None, None diff --git a/model/train/yoco_moe/mindspeed/core/transformer/transformer.py b/model/train/yoco_moe/mindspeed/core/transformer/transformer.py new file mode 100644 index 000000000..2c3557393 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/transformer.py @@ -0,0 +1,289 @@ +import enum +import os +from functools import wraps + +from contextlib import nullcontext +import torch +import torch_npu +import torch.nn.functional as F + +from megatron import core +from megatron.training import get_args +from megatron.core.num_microbatches_calculator import get_num_microbatches +from megatron.core import tensor_parallel, parallel_state, mpu +from megatron.core.utils import make_viewless_tensor +from megatron.legacy.model.transformer import bias_dropout_add_fused_train, get_bias_dropout_add, bias_dropout_add_fused_inference +from megatron.legacy.model.enums import AttnMaskType, LayerType, AttnType +from mindspeed.model.transformer import should_recompute_activation +from mindspeed.core.tensor_parallel.random import CheckpointWithoutOutput +from mindspeed.core.fusions.fused_bias_swiglu import fused_swiglu +from mindspeed.core.transformer.moe.moe_utils import only_recompute_activation + + +def parallel_transformer_layer_init_wrapper(fn): + @wraps(fn) + def wrapper(self, *args, **kwargs): + from megatron.core.transformer.moe.moe_layer import MoELayer + from megatron.core.transformer.moe.experts import GroupedMLP, SequentialMLP + fn(self, *args, **kwargs) + if self.mlp.__class__ is MoELayer: + if self.mlp.experts.__class__ is GroupedMLP: + self.mlp.experts.layer_number = self.layer_number + if self.mlp.experts.__class__ is SequentialMLP: + for expert in self.mlp.experts.local_experts: + expert.layer_number = self.layer_number + global_args = get_args() + if global_args.n_shared_experts: + self.mlp.shared_experts.layer_number = self.layer_number + else: + self.mlp.layer_number = self.layer_number + + return wrapper + + +def parallel_transformer_checkpointed_forward_wrapper(forward_func): + @wraps(forward_func) + def row_parallel_forward(*args, **kwargs): + global_args = get_args() + if global_args.recompute_method != 'block' and not global_args.swap_attention: + output = forward_func(*args, **kwargs) + else: + output = parallel_transformer_checkpointed_forward(*args, **kwargs) + return output + + return row_parallel_forward + + +def parallel_transformer_checkpointed_forward(self, hidden_states, attention_mask, + encoder_output, enc_dec_attn_mask, + rotary_pos_emb, is_first_microbatch): + """Forward method with activation checkpointing.""" + + def custom(start, end): + def custom_forward(*args, **kwargs): + x_, *args = args + for index in range(start, end): + layer = self._get_layer(index) + x_ = layer(x_, *args, **kwargs) + return x_ + + return custom_forward + + global_args = get_args() + num_layers_per_pipeline_rank = global_args.num_layers // global_args.pipeline_model_parallel_size + if self.recompute_method == 'uniform': + # Uniformly divide the total number of Transformer layers and + # checkpoint the input activation of each divided chunk. + # A method to further reduce memory usage reducing checkpoints. + if not global_args.swap_attention: + l = 0 + while l < num_layers_per_pipeline_rank: + hidden_states = tensor_parallel.checkpoint( + custom(l, l + self.recompute_num_layers), + self.distribute_saved_activations, + hidden_states, attention_mask, + encoder_output, enc_dec_attn_mask, + None, None, None, None, rotary_pos_emb) + + l += self.recompute_num_layers + else: + for l in range(num_layers_per_pipeline_rank): + hidden_states = custom(l, l + 1)( + hidden_states, attention_mask, + encoder_output, enc_dec_attn_mask, + None, None, None, None, rotary_pos_emb) + elif self.recompute_method == 'block': + # Checkpoint the input activation of only a set number of individual + # Transformer layers and skip the rest. + # A method fully use the device memory removing redundant re-computation. + vpp_rank = mpu.get_virtual_pipeline_model_parallel_rank() + vpp_size = global_args.virtual_pipeline_model_parallel_size + if vpp_rank is None or not global_args.enable_recompute_layers_per_pp_rank: + vpp_rank = 0 + if vpp_size is None or not global_args.enable_recompute_layers_per_pp_rank: + vpp_size = 1 + for l in range(self.num_layers): + # The number of layers each pipeline rank recomputes is self.recompute_num_layers. + # If self.recompute_num_layers cannot divide exactly the number of layers in each pp rank, + # we try to balance the number of recomputed layers in each model chunk. + # e.g. with 8 layers, 2 stages, and 2 virtual stages, the assignment of + # layers to stages like (each list is a model chunk): + # Stage 0: [0, 1] [4, 5] + # Stage 1: [2, 3] [6, 7] + # With self.recompute_num_layers = 2, we will recompute layers 0,4 for stage 0, and 2,6 for stage 1. + # With self.recompute_num_layers = 3, we will recompute layers 0,1,4 for stage 0, and 2,3,6 for stage 1. + def should_recompute(): + if global_args.reduce_recompute_for_last_chunk: + def is_last_layer(): + return (l == self.num_layers - 1) and mpu.is_pipeline_last_stage() + + return ((l * vpp_size + vpp_rank) < self.recompute_num_layers) and not is_last_layer() + else: + return (l * vpp_size + vpp_rank) < self.recompute_num_layers + + if should_recompute() and not global_args.swap_attention: + hidden_states = tensor_parallel.checkpoint( + custom(l, l + 1), + self.distribute_saved_activations, + hidden_states, attention_mask, + encoder_output, enc_dec_attn_mask, + None, None, None, None, rotary_pos_emb) + else: + hidden_states = custom(l, l + 1)( + hidden_states, attention_mask, + encoder_output, enc_dec_attn_mask, + None, None, None, None, rotary_pos_emb) + else: + raise ValueError("Invalid activation recompute method.") + + return hidden_states + + +def core_mlp_forward_wrapper(fn): + @wraps(fn) + def wrapper(self, *args, **kwargs): + self.layer_number = getattr(self, "layer_number", None) + is_recompute_activation = should_recompute_activation(self.layer_number) + if get_args().moe_alltoall_overlap_comm and not isinstance(args[-1], torch.Tensor): + moe_ctx = args[-1] + args = args[:-1] + + def activation_function(*function_args): + intermediate, bias = function_args + if bias is not None: + intermediate = intermediate + bias + if self.config.gated_linear_unit: + assert (self.config.activation_func == F.silu), 'Activation function must be silu when using fused_swiglu' + if not hasattr(self, 'origin_activation_func'): + self.origin_activation_func = self.activation_func + self.activation_func = fused_swiglu + intermediate = self.activation_func(intermediate) + else: + intermediate = self.activation_func(intermediate) + + return intermediate + + moe_zero_memory = get_args().moe_zero_memory + if not (is_recompute_activation or moe_zero_memory != "disable"): + if hasattr(self, 'origin_activation_func'): + self.activation_func = self.origin_activation_func + output, output_bias = fn(self, *args, **kwargs) + elif moe_zero_memory == "level1" and not only_recompute_activation(self.layer_number): + if self.shared_expert: + self.activation_function = activation_function + hidden_states = args[0] + fc1_out_parallel, bias_parallel = self.linear_fc1(hidden_states) + act_out_parallel = activation_function(fc1_out_parallel, bias_parallel) + output, output_bias = self.linear_fc2(act_out_parallel) + fc1_out_parallel.untyped_storage().resize_(0) + act_out_parallel.untyped_storage().resize_(0) + moe_ctx.shared_fc1_out = fc1_out_parallel + moe_ctx.shared_act_out = act_out_parallel + else: + output, output_bias = fn(self, *args, **kwargs) + else: + hidden_states = args[0] + intermediate_parallel, bias_parallel = self.linear_fc1(hidden_states) + self.activation_checkpoint_manager = CheckpointWithoutOutput() + intermediate_parallel = self.activation_checkpoint_manager.checkpoint(activation_function, + False, + intermediate_parallel, + bias_parallel) + # [s, b, h] + output, output_bias = self.linear_fc2(intermediate_parallel) + + # discard the output of the activation function, + # which will be restored by recomputation during backward. + self.activation_checkpoint_manager.discard_output() + + # when backward to output of dense_4h_to_h, + # recompute and restore the output of activation function. + if output.requires_grad: + output.register_hook(self.activation_checkpoint_manager.recompute) + return output, output_bias + return wrapper + + +def norm_recompute_forward( + self, + hidden_states, + attention_mask, + context=None, + context_mask=None, + rotary_pos_emb=None, + inference_params=None, + packed_seq_params=None, +): + # hidden_states: [s, b, h] + + # Residual connection. + residual = hidden_states + + # Optional Input Layer norm + self.norm_ckpt1 = CheckpointWithoutOutput() + input_layernorm_output = self.norm_ckpt1.checkpoint(self.input_layernorm, False, hidden_states) + + # Self attention. + attention_output_with_bias = self.self_attention( + input_layernorm_output, + attention_mask=attention_mask, + inference_params=inference_params, + rotary_pos_emb=rotary_pos_emb, + packed_seq_params=packed_seq_params, + ) + + self.norm_ckpt1.discard_output() + if self.training: + attention_output_with_bias[0].register_hook(self.norm_ckpt1.recompute) + + with self.bias_dropout_add_exec_handler(): + hidden_states = self.self_attn_bda(self.training, self.config.bias_dropout_fusion)( + attention_output_with_bias, residual, self.hidden_dropout + ) + + # Residual connection. + residual = hidden_states + + # Optional Layer norm after self-attention + pre_cross_attn_layernorm_output = self.pre_cross_attn_layernorm(hidden_states) + + # Cross attention. + attention_output_with_bias = self.cross_attention( + pre_cross_attn_layernorm_output, + attention_mask=context_mask, + key_value_states=context, + inference_params=inference_params, + ) + + if isinstance(attention_output_with_bias, dict) and "context" in attention_output_with_bias: + context = attention_output_with_bias["context"] + + with self.bias_dropout_add_exec_handler(): + hidden_states = self.cross_attn_bda(self.training, self.config.bias_dropout_fusion)( + attention_output_with_bias, residual, self.hidden_dropout + ) + + # Residual connection. + residual = hidden_states + + # Optional Layer norm post the cross-attention. + self.norm_ckpt2 = CheckpointWithoutOutput() + pre_mlp_layernorm_output = self.norm_ckpt2.checkpoint(self.pre_mlp_layernorm, False, hidden_states) + + # MLP. + mlp_output_with_bias = self.mlp(pre_mlp_layernorm_output) + + if self.training: + self.norm_ckpt2.discard_output() + mlp_output_with_bias[0].register_hook(self.norm_ckpt2.recompute) + + with self.bias_dropout_add_exec_handler(): + hidden_states = self.mlp_bda(self.training, self.config.bias_dropout_fusion)( + mlp_output_with_bias, residual, self.hidden_dropout + ) + + output = make_viewless_tensor( + inp=hidden_states, requires_grad=hidden_states.requires_grad, keep_graph=True + ) + + return output, context diff --git a/model/train/yoco_moe/mindspeed/core/transformer/transformer_block.py b/model/train/yoco_moe/mindspeed/core/transformer/transformer_block.py new file mode 100644 index 000000000..ad0063d76 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/transformer_block.py @@ -0,0 +1,223 @@ +from functools import wraps +import torch +from torch import Tensor + +from megatron.core import tensor_parallel, parallel_state, mpu +from megatron.core.packed_seq_params import PackedSeqParams +from megatron.core.transformer.module import MegatronModule +from megatron.core.transformer.spec_utils import build_module +from megatron.training import get_args +from megatron.core.transformer.custom_layers.transformer_engine import TENorm +from mindspeed.core.tensor_parallel.comm_autograd_function import auto_grad_sync_gather_along_last_dim, \ + auto_grad_sync_gather_along_first_dim +from mindspeed.core.tensor_parallel.comm_group_api import TPXCollectiveComm, TPYCollectiveComm + + +def transformer_block_checkpointed_forward_wrapper(forward_func): + @wraps(forward_func) + def row_parallel_forward(*args, **kwargs): + global_args = get_args() + if global_args.recompute_method != 'block' and not global_args.swap_attention: + output = forward_func(*args, **kwargs) + else: + output = transformer_block_checkpointed_forward(*args, **kwargs) + return output + + return row_parallel_forward + + +def transformer_block_checkpointed_forward( + self, + hidden_states: Tensor, + attention_mask: Tensor, + context: Tensor, + context_mask: Tensor, + rotary_pos_emb: Tensor, + packed_seq_params: PackedSeqParams, +): + """Forward method with activation checkpointing.""" + + def custom(start: int, end: int): + def custom_forward( + hidden_states, + attention_mask, + context, + context_mask, + rotary_pos_emb, + ): + for index in range(start, end): + layer = self._get_layer(index) + hidden_states, context = layer( + hidden_states=hidden_states, + attention_mask=attention_mask, + context=context, + context_mask=context_mask, + rotary_pos_emb=rotary_pos_emb, + inference_params=None, + packed_seq_params=packed_seq_params, + ) + return hidden_states, context + + return custom_forward + + def checkpoint_handler(forward_func): + if self.config.fp8: + from transformer_engine.pytorch.distributed import checkpoint as te_checkpoint + + return te_checkpoint( + forward_func, + self.config.distribute_saved_activations, + tensor_parallel.random.get_cuda_rng_tracker, + parallel_state.get_tensor_model_parallel_group(), + hidden_states, + attention_mask, + context, + context_mask, + rotary_pos_emb, + ) + else: + return tensor_parallel.checkpoint( + forward_func, + self.config.distribute_saved_activations, + hidden_states, + attention_mask, + context, + context_mask, + rotary_pos_emb, + ) + + # Checkpoint the input activation of only a set number of individual + # Transformer layers and skip the rest. + # A method fully use the device memory removing redundant re-computation. + global_args = get_args() + if self.config.recompute_method == 'uniform': + # Uniformly divide the total number of Transformer layers and + # checkpoint the input activation of each divided chunk. + # A method to further reduce memory usage reducing checkpoints. + if not global_args.swap_attention: + l = 0 + while l < self.num_layers_per_pipeline_rank: + hidden_states = checkpoint_handler(custom(l, l + 1)) + + l += self.config.recompute_num_layers + else: + for l in range(self.num_layers_per_pipeline_rank): + hidden_states, context = custom(l, l + 1)( + hidden_states, + attention_mask, + context, + context_mask, + rotary_pos_emb, + ) + elif self.config.recompute_method == 'block': + vpp_rank = mpu.get_virtual_pipeline_model_parallel_rank() + vpp_size = self.config.virtual_pipeline_model_parallel_size + if vpp_rank is None or not global_args.enable_recompute_layers_per_pp_rank: + vpp_rank = 0 + if vpp_size is None or not global_args.enable_recompute_layers_per_pp_rank: + vpp_size = 1 + for l in range(self.num_layers_per_pipeline_rank): + # The number of layers each pipeline rank recomputes is self.recompute_num_layers. + # If self.recompute_num_layers cannot divide exactly the number of layers in each pp rank, + # we try to balance the number of recomputed layers in each model chunk. + # e.g. with 8 layers, 2 stages, and 2 virtual stages, the assignment of + # layers to stages like (each list is a model chunk): + # Stage 0: [0, 1] [4, 5] + # Stage 1: [2, 3] [6, 7] + # With self.recompute_num_layers = 2, we will recompute layers 0,4 for stage 0, and 2,6 for stage 1. + # With self.recompute_num_layers = 3, we will recompute layers 0,1,4 for stage 0, and 2,3,6 for stage 1. + def should_recompute(): + if global_args.reduce_recompute_for_last_chunk: + def is_last_layer(): + return (l == self.num_layers_per_pipeline_rank - 1) and mpu.is_pipeline_last_stage() + + return ((l * vpp_size + vpp_rank) < self.config.recompute_num_layers) and not is_last_layer() + else: + return (l * vpp_size + vpp_rank) < self.config.recompute_num_layers + + if should_recompute() and not global_args.swap_attention: + hidden_states, context = checkpoint_handler(custom(l, l + 1)) + else: + hidden_states, context = custom(l, l + 1)( + hidden_states, + attention_mask, + context, + context_mask, + rotary_pos_emb, + ) + + return hidden_states + + +class NoopTransformerLayer(MegatronModule): + def __init__(self, layer_number): + super().__init__(None) + self.layer_number = layer_number + + def forward(self, hidden_states, attention_mask, context, context_mask, rotary_pos_emb, inference_params, packed_seq_params): + return hidden_states.clone(), context + + +def _get_layer_offset(args): + num_layers = args.num_layers + pipeline_rank = parallel_state.get_pipeline_model_parallel_rank() + + num_layers_per_pipeline_rank = ( + num_layers // parallel_state.get_pipeline_model_parallel_world_size() + ) + + if parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None: + vp_rank = parallel_state.get_virtual_pipeline_model_parallel_rank() + vp_size = parallel_state.get_virtual_pipeline_model_parallel_world_size() + + total_num_layers = num_layers + num_layers_per_virtual_rank = num_layers_per_pipeline_rank // vp_size + total_virtual_chunks = total_num_layers // vp_size + offset = vp_rank * total_virtual_chunks + (pipeline_rank * num_layers_per_virtual_rank) + + else: + # Each stage gets a contiguous set of layers. + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + offset = pipeline_rank * num_layers_per_pipeline_rank + else: + offset = 0 + return offset + + +def _build_layers(self): + args = get_args() + + def build_layer(layer_spec, layer_number): + global_layer_number = _get_layer_offset(args) + layer_number + if (hasattr(args, 'noop_layers') and isinstance(args.noop_layers, set) + and global_layer_number - 1 in args.noop_layers): + return NoopTransformerLayer(global_layer_number) + return build_module(layer_spec, config=self.config, layer_number=layer_number,) + + self.layers = torch.nn.ModuleList( + [ + build_layer(layer_spec, i + 1) + for i, layer_spec in enumerate(self.submodules.layer_specs) + ] + ) + + if self.submodules.layer_norm and self.post_process and self.post_layer_norm: + self.final_layernorm = build_module( + self.submodules.layer_norm, + config=self.config, + hidden_size=self.config.hidden_size, + eps=self.config.layernorm_epsilon, + ) + else: + self.final_layernorm = None # Either this or nn.Identity + + +def transformer_block_forward_wrapper(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + hidden_states = fn(*args, **kwargs) + if get_args().tp_2d and parallel_state.is_pipeline_last_stage(): + hidden_states = auto_grad_sync_gather_along_first_dim(hidden_states, TPXCollectiveComm) + hidden_states = auto_grad_sync_gather_along_last_dim(hidden_states, TPYCollectiveComm) + return hidden_states + return wrapper diff --git a/model/train/yoco_moe/mindspeed/core/transformer/transformer_config.py b/model/train/yoco_moe/mindspeed/core/transformer/transformer_config.py new file mode 100644 index 000000000..bbf75ae0a --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/transformer/transformer_config.py @@ -0,0 +1,187 @@ +# Copyright (c) 2023; NVIDIA CORPORATION. All rights reserved. +# Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. +from dataclasses import make_dataclass, field +from functools import wraps + +import torch.nn.functional as F + +from megatron.core.transformer import TransformerConfig +from megatron.core.utils import init_method_normal, scaled_init_method_normal +from megatron.training import get_args + + +def transformer_config_post_init(self): + super(TransformerConfig, self).__post_init__() + if self.fp16 and self.bf16: + raise ValueError( + f'Only one of self.fp16: {self.fp16} and self.bf16 {self.bf16} should be True.' + ) + args = get_args() + world_size = args.tp_x if args.tp_2d else self.tensor_model_parallel_size + if self.num_attention_heads % world_size != 0: + if not args.unaligned_linear: + raise ValueError( + f"num_attention_heads ({self.num_attention_heads}) must be a multiple of " + f"tensor_model_parallel_size ({world_size})." + ) + + if self.ffn_hidden_size is None: + self.ffn_hidden_size = 4 * self.hidden_size + + if self.kv_channels is None: + self.kv_channels = self.hidden_size // self.num_attention_heads + + if self.num_query_groups is None: + self.num_query_groups = self.num_attention_heads + + if self.num_query_groups % world_size != 0: + if not args.unaligned_linear: + raise ValueError( + f"num_query_groups ({self.num_query_groups}) must be a multiple of " + f"tensor_model_parallel_size ({world_size})." + ) + + if self.apply_query_key_layer_scaling: + self.attention_softmax_in_fp32 = True + + if self.expert_model_parallel_size > 1 and self.num_moe_experts is None: + raise ValueError(f'num_moe_experts must be non None to use expert-parallel.') + + if self.num_moe_experts is not None and self.num_moe_experts <= 0: + raise ValueError(f'num_moe_experts must be non-negative.') + + if self.moe_expert_capacity_factor is not None: + if self.moe_token_dispatcher_type != "alltoall": + raise ValueError( + f'moe_expert_capacity_factor only works with alltoall token dispatcher' + ) + if self.moe_expert_capacity_factor < 0: + self.moe_expert_capacity_factor = None + if self.moe_router_load_balancing_type not in ["aux_loss", "none"]: + raise ValueError( + f'moe_expert_capacity_factor only works with aux_loss or none load balancing' + ) + + if self.moe_pad_expert_input_to_capacity: + if self.moe_expert_capacity_factor is None: + raise ValueError( + f'moe_expert_capacity_factor must be set to use moe_pad_expert_input_to_capacity' + ) + + if self.cpu_offloading and ( + self.cpu_offloading_num_layers < 0 or self.cpu_offloading_num_layers >= self.num_layers + ): + raise ValueError( + f'CPU offloading can be done only for layers less than {self.num_layers}' + ) + + if self.cpu_offloading and self.pipeline_model_parallel_size > 1: + raise ValueError( + f'Currently there is no support for Pipeline parallelism with CPU offloading' + ) + + if self.cpu_offloading and self.recompute_granularity is not None: + raise ValueError( + f'CPU offloading does not work when activation recomputation is enabled' + ) + + if self.recompute_granularity is not None: + if self.recompute_granularity not in ['full', 'selective']: + raise ValueError( + f'When using recompute_granuarlity: {self.recompute_granularity} must be "full" or "selective".' + ) + + if self.recompute_method is not None: + if self.recompute_method not in ['block', 'uniform']: + raise ValueError( + f'recompute_method: {self.recompute_method} must be "block" or "uniform".' + ) + elif self.recompute_granularity != 'selective': + raise ValueError( + f'Using recompute_granularity: {self.recompute_granularity} so recompute_method must be "block" or "uniform"' + ) + + if self.recompute_granularity != 'selective' and self.recompute_num_layers is None: + raise ValueError( + f'When using recompute_granularity: {self.recompute_granularity} recompute_num_layers must be between ' + f'1 and num_layers_per_pipeline_rank: {self.num_layers // self.pipeline_model_parallel_size}' + ) + elif ( + self.recompute_granularity == 'selective' and self.recompute_num_layers is not None + ): + raise ValueError( + f'When using recompute_granularity: {self.recompute_granularity} recompute_num_layers must be None.' + ) + + if self.distribute_saved_activations and self.sequence_parallel: + raise ValueError( + f'distribute_saved_activations: {self.distribute_saved_activations} must be false when sequence parallel is enabled: {self.sequence_parallel}' + ) + + if self.virtual_pipeline_model_parallel_size is not None: + if not self.num_layers % self.virtual_pipeline_model_parallel_size == 0: + raise ValueError( + f'num_layers: {self.num_layers} must be divisible by virtual_model_parallel_size {self.virtual_pipeline_model_parallel_size}' + ) + + if self.apply_query_key_layer_scaling: + self.attention_softmax_in_fp32 = True + + if self.bias_activation_fusion: + if self.activation_func not in [F.gelu, F.silu]: + raise ValueError( + "When bias_activation_fusion is True, activation function should be either gelu or swiglu" + ) + if ( + self.activation_func == F.gelu + and not self.gated_linear_unit + and not self.add_bias_linear + ): + raise ValueError( + "When bias_activation_fusion is True, gated_linear_unit is False, " + "and activation function is gelu, add_bias_linear must also be True." + ) + if self.activation_func_fp8_input_store: + if self.activation_func != F.silu or not self.gated_linear_unit: + raise ValueError("Storing activation input in FP8 is supported only for SwiGLU.") + if self.apply_rope_fusion and self.rotary_interleaved: + raise ValueError(f'rotary_interleaved does not work with apply_rope_fusion.') + + if self.init_method is None: + self.init_method = init_method_normal(self.init_method_std) + + if self.output_layer_init_method is None: + self.output_layer_init_method = scaled_init_method_normal( + self.init_method_std, self.num_layers + ) + + if self.moe_extended_tp: + if self.moe_token_dispatcher_type != 'allgather': + raise ValueError( + "Moe extended TP parallelism only applies to allgather based token dispatcher." + ) + extended_tp_size = self.tensor_model_parallel_size * self.expert_model_parallel_size + if self.ffn_hidden_size % extended_tp_size != 0: + raise ValueError( + f'ffn_hidden_size: {self.ffn_hidden_size} must be divisible by extended_tp_size {extended_tp_size}' + ) + + +def transformer_config_post_init_wrapper(fn): + @wraps(fn) + def wrapper(self): + fn(self) + args = get_args() + fields = [] + for key, value in vars(args).items(): + field_name = str(key) + field_type = type(value) + if not hasattr(self, key): + field_def = (field_name, field_type, field(init=False)) + fields.append(field_def) + self.__class__ = make_dataclass(self.__class__.__name__, fields=fields, bases=(self.__class__,)) + + for key, value in vars(args).items(): + if not hasattr(self, key): + setattr(self, key, value) + return wrapper \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/core/weight_grad_store.py b/model/train/yoco_moe/mindspeed/core/weight_grad_store.py new file mode 100644 index 000000000..eb50a0999 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/core/weight_grad_store.py @@ -0,0 +1,241 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import operator +import queue +from functools import reduce +import torch +import torch_npu + +from megatron.core.parallel_state import ( + get_tensor_model_parallel_group, + get_tensor_model_parallel_world_size +) +from megatron.training import get_args + + +def gather(input_slice, stream): + world_size = get_tensor_model_parallel_world_size() + dim_size = list(input_slice.size()) + dim_size[0] = dim_size[0] * world_size + + all_gather_buffer = torch.empty( + dim_size, dtype=input_slice.dtype, device=torch.cuda.current_device(), requires_grad=False + ) + handle = None + forward_event = torch.npu.Event() + forward_event.record() + if get_args().use_nanopipe_swap: + swap_event = WeightGradStore.swap_event.pop(0) + with torch.no_grad(): + with torch_npu.npu.stream(stream): + stream.wait_event(forward_event) + if get_args().use_nanopipe_swap: + stream.wait_event(swap_event) + handle = torch.distributed._all_gather_base( + all_gather_buffer, input_slice, group=get_tensor_model_parallel_group(), async_op=True + ) + + # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the + # gather is scheduled before the input gradient computation + return all_gather_buffer, handle + + +def swap_d2h(ori_tensor, stream): + storage_size = ori_tensor.storage().size() + tensor_cpu = torch.empty(ori_tensor.shape, dtype=ori_tensor.dtype, pin_memory=True, device='cpu') + forward_event = torch.npu.Event() + forward_event.record() + with torch.no_grad(): + with torch_npu.npu.stream(stream): + stream.wait_event(forward_event) + tensor_cpu.storage().copy_(ori_tensor.storage(), non_blocking=True) + WeightGradStore.ori_storage.append(ori_tensor) + + return storage_size, tensor_cpu + + +def swap_h2d(ori_tensor, tensor_cpu, storage_size, stream): + with torch.no_grad(): + with torch_npu.npu.stream(stream): + ori_tensor.storage().resize_(storage_size) + ori_tensor.storage().copy_(tensor_cpu.storage(), non_blocking=True) + + +class WeightGradStore: + cache = [] + weight_grad_queue = queue.Queue() + store_grad_cache = [] + grad_store = [] + swap_event = [] + prefetch_stream = None + gather_stream = None + host_tensors_gradoutput = [] + host_pipe_experts_grad = [] + host_tensors_input = [] + ori_storage = [] + is_decoupleBlock = False + grad_overlap_count = 0 + interval_per_layers_count = 0 + interval_per_layers = [] + + @classmethod + def put(cls, total_input, grad_output, weight, sequence_parallel, in_row=False, pipe_experts=False): + if get_args().use_nanopipe_swap: + if cls.prefetch_stream is None: + cls.prefetch_stream = torch_npu.npu.Stream(device=torch.npu.current_device()) + if grad_output is not None: + cls.host_tensors_gradoutput.append(swap_d2h(grad_output, cls.prefetch_stream)) + cls.host_tensors_input.append(swap_d2h(total_input, cls.prefetch_stream)) + cls.interval_per_layers_count += 1 + cls.cache.append((total_input, grad_output, weight, sequence_parallel, in_row, pipe_experts)) + + @classmethod + def flush(cls): + cls.interval_per_layers.append(cls.interval_per_layers_count) + cls.interval_per_layers_count = 0 + + @classmethod + def save_grad_output(cls, grad): + if get_args().use_nanopipe_swap: + if cls.prefetch_stream is None: + cls.prefetch_stream = torch_npu.npu.Stream(device=torch.npu.current_device()) + cls.host_pipe_experts_grad.append(swap_d2h(grad, cls.prefetch_stream)) + cls.grad_store.append(grad) + + @classmethod + def start_decouple(cls): + cls.is_decoupleBlock = True + + @classmethod + def end_decouple(cls): + cls.is_decoupleBlock = False + + @classmethod + def overlap_all_gather(cls): + # used for grad_output all gather in RowParallel and input all gather in ColumnParallel. + if len(cls.cache) > 0: + [input, grad_output_slice, weight, sequence_parallel, in_row, pipe_experts] = cls.cache.pop(0) + if not sequence_parallel: + return (input, grad_output_slice, weight, sequence_parallel, in_row, pipe_experts), None + if not in_row: + total_input, handle = gather(input, cls.gather_stream) + grad_output = grad_output_slice + else: + if pipe_experts and not get_args().use_nanopipe_swap: + grad_output_slice = cls.grad_store.pop(0) + grad_output, handle = gather(grad_output_slice, cls.gather_stream) + total_input = input + return [total_input, grad_output, weight, sequence_parallel, in_row, pipe_experts], handle + else: + raise Exception("All Gather empty queue.") + + @classmethod + def swap_tensors(cls): + if get_args().use_nanopipe_swap: + if cls.prefetch_stream is None: + cls.prefetch_stream = torch_npu.npu.Stream(device=torch.npu.current_device()) + cls.prefetch_stream.wait_stream(torch.npu.current_stream()) + for cache_id in range(len(cls.cache)): + cls.cache[cache_id] = list(cls.cache[cache_id]) + if cls.cache[cache_id][-1] and cls.cache[cache_id][1] is None: + cls.cache[cache_id][1] = cls.grad_store.pop(0) + input, grad_output_slice, weight, sequence_parallel, in_row, pipe_experts = cls.cache[cache_id] + if pipe_experts: + storage_size_g, tensor_cpu_g = cls.host_pipe_experts_grad.pop(0) + else: + storage_size_g, tensor_cpu_g = cls.host_tensors_gradoutput.pop(0) + storage_size_i, tensor_cpu_i = cls.host_tensors_input.pop(0) + swap_h2d(grad_output_slice, tensor_cpu_g, storage_size_g, cls.prefetch_stream) + swap_h2d(input, tensor_cpu_i, storage_size_i, cls.prefetch_stream) + cls.swap_event.append((cls.prefetch_stream.record_event())) + + @classmethod + def overlap_matmul(cls, grad_store_cache): + total_input, grad_output, weight, sequence_parallel, in_row, pipe_experts = grad_store_cache + grad_output = grad_output.contiguous() + sb = grad_output.shape[0] * grad_output.shape[1] + # Convert the tensor shapes to 2D for execution compatibility + grad_output = grad_output.view( + sb, grad_output.shape[2] + ) + total_input = total_input.view( + sb, total_input.shape[2] + ) + if get_args().gradient_accumulation_fusion: + import fused_weight_gradient_mlp_cuda + if weight.main_grad.dtype == torch.float32: + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp32( + total_input, grad_output, weight.main_grad + ) + elif weight.main_grad.dtype in (torch.float16, torch.bfloat16): + fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp16( + total_input, grad_output, weight.main_grad + ) + else: + raise RuntimeError("Unsupported gradient type for gradient accumulation fusion") + else: + grad_weight = grad_output.t().matmul(total_input) + weight.main_grad.data.add_(grad_weight) + cls.grad_overlap_count += 1 + + @classmethod + def pop(cls, overlap_arg=None): + if len(cls.cache) == 0: + return + if cls.gather_stream is None: + cls.gather_stream = torch_npu.npu.Stream(device=torch.npu.current_device()) + if get_args().overlap_grad_reduce: + if overlap_arg is None: + raise RuntimeError("overlap_arg is invalid") + pipeline_parallel_size, nano_flag, synchronized_model_chunks, grad_sync_func, model = overlap_arg + model_chunk_id = len(nano_flag) - 1 + input, grad_output_slice, weight, sequence_parallel, in_row, pipe_experts = cls.cache.pop(0) + if not sequence_parallel: + grad_output = grad_output_slice + handle = None + else: + if pipe_experts and not get_args().use_nanopipe_swap: + grad_output_slice = cls.grad_store.pop(0) + grad_output, handle = gather(grad_output_slice, cls.gather_stream) + layers_count = 0 + cls.store_grad_cache = (input, grad_output, weight, sequence_parallel, in_row, pipe_experts) + while len(cls.cache) > 0: + if handle is not None: + handle.wait() + next_grad_cache, handle = cls.overlap_all_gather() + cls.overlap_matmul(cls.store_grad_cache) + if get_args().overlap_grad_reduce: + if cls.grad_overlap_count == cls.interval_per_layers[0]: + cls.interval_per_layers.pop(0) + layers_count += 1 + if layers_count == pipeline_parallel_size: + if model_chunk_id not in synchronized_model_chunks and not nano_flag[model_chunk_id]: + grad_sync_func[model_chunk_id](model[model_chunk_id].parameters()) + synchronized_model_chunks.add(model_chunk_id) + model_chunk_id -= 1 + layers_count = 0 + cls.grad_overlap_count = 0 + cls.store_grad_cache = next_grad_cache + if handle is not None: + handle.wait() + cls.overlap_matmul(cls.store_grad_cache) + if get_args().overlap_grad_reduce: + if model_chunk_id not in synchronized_model_chunks and not nano_flag[model_chunk_id]: + grad_sync_func[model_chunk_id](model[model_chunk_id].parameters()) + synchronized_model_chunks.add(model_chunk_id) + model_chunk_id -= 1 + cls.grad_overlap_count = 0 + cls.stored_grads = None + cls.store_grad_cache = None + cls.swap_event = [] + cls.grad_store = [] + cls.host_pipe_experts_grad = [] + cls.interval_per_layers = [] + cls.interval_per_layers_count = 0 + + @classmethod + def resize_ori_storage(cls, use_nano_swap): + if use_nano_swap and len(cls.ori_storage) > 0: + torch.npu.current_stream().wait_stream(cls.prefetch_stream) + for ori_storage_ in cls.ori_storage: + ori_storage_.storage().resize_(0) + cls.ori_storage = [] \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/features_manager/__init__.py b/model/train/yoco_moe/mindspeed/features_manager/__init__.py new file mode 100644 index 000000000..b98d707f7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/features_manager/__init__.py @@ -0,0 +1,9 @@ +from .tensor_parallel.unaligned_linear_feature import UnalignedLinearFeature +from .functional.profiler_default_feature import ProfilerDefaultFeature + +FEATURES_LIST = [ + # Functional features + ProfilerDefaultFeature(), + # Tensor parallel features + UnalignedLinearFeature() +] diff --git a/model/train/yoco_moe/mindspeed/features_manager/feature.py b/model/train/yoco_moe/mindspeed/features_manager/feature.py new file mode 100644 index 000000000..6d014dcb7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/features_manager/feature.py @@ -0,0 +1,38 @@ +import argparse + + +class MindSpeedFeature: + def __init__(self, feature_name: str, optimization_level: int = 2): + self.feature_name = feature_name.strip().replace('-', '_') + self.optimization_level = optimization_level + self.default_patches = self.optimization_level == 0 + + def register_args(self, parser): + pass + + def pre_validate_args(self, args): + pass + + def validate_args(self, args): + pass + + def post_validate_args(self, args): + pass + + def register_patches(self, patch_manager, args): + ... + + def incompatible_check(self, global_args, check_args): + if getattr(global_args, self.feature_name, None) and getattr(global_args, check_args, None): + raise AssertionError('{} and {} are incompatible.'.format(self.feature_name, check_args)) + + def dependency_check(self, global_args, check_args): + if getattr(global_args, self.feature_name, None) and not getattr(global_args, check_args, None): + raise AssertionError('{} requires {}.'.format(self.feature_name, check_args)) + + @staticmethod + def add_parser_argument_choices_value(parser, argument_name, new_choice): + for action in parser._actions: + exist_arg = isinstance(action, argparse.Action) and argument_name in action.option_strings + if exist_arg and action.choices is not None and new_choice not in action.choices: + action.choices.append(new_choice) diff --git a/model/train/yoco_moe/mindspeed/features_manager/functional/__init__.py b/model/train/yoco_moe/mindspeed/features_manager/functional/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/features_manager/functional/profiler_default_feature.py b/model/train/yoco_moe/mindspeed/features_manager/functional/profiler_default_feature.py new file mode 100644 index 000000000..8a737b362 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/features_manager/functional/profiler_default_feature.py @@ -0,0 +1,10 @@ +from mindspeed.features_manager.feature import MindSpeedFeature + + +class ProfilerDefaultFeature(MindSpeedFeature): + def __init__(self): + super(ProfilerDefaultFeature, self).__init__('profiler-default', 0) + + def register_patches(self, patch_manager, args): + from mindspeed.functional.profiler.profiler_info import get_nccl_options_add_group_info_wrapper + patch_manager.register_patch('megatron.core.parallel_state.get_nccl_options', get_nccl_options_add_group_info_wrapper) diff --git a/model/train/yoco_moe/mindspeed/features_manager/tensor_parallel/__init__.py b/model/train/yoco_moe/mindspeed/features_manager/tensor_parallel/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/features_manager/tensor_parallel/unaligned_linear_feature.py b/model/train/yoco_moe/mindspeed/features_manager/tensor_parallel/unaligned_linear_feature.py new file mode 100644 index 000000000..14471c7b6 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/features_manager/tensor_parallel/unaligned_linear_feature.py @@ -0,0 +1,58 @@ +from argparse import ArgumentParser + +from mindspeed.features_manager.feature import MindSpeedFeature + + +class UnalignedLinearFeature(MindSpeedFeature): + + def __init__(self): + super().__init__('unaligned-linear') + + def register_args(self, parser: ArgumentParser): + group = parser.add_argument_group(title=self.feature_name) + group.add_argument('--unaligned-linear', action='store_true', + help='Replace ColumnParallelLinear/RowParallelLinear with ' + 'UnalignedColumnParallelLinearAdaptor/UnalignedRowParallelLinearAdaptor.') + + def validate_args(self, args): + self.incompatible_check(args, 'use_ascend_mc2') + self.incompatible_check(args, 'tp_2d') + if args.unaligned_linear and args.num_experts and args.num_experts > 1: + raise AssertionError("The unaligned linear feature does not support the moe model.") + # self.dependency_check(..) + + def register_patches(self, patch_manager, args): + from mindspeed.core.tensor_parallel.unaligned_layers.adaptor import divide_adaptor, \ + scatter_to_sequence_parallel_region_adaptor, get_rotary_seq_len, UnalignedColumnParallelLinearAdaptor, \ + UnalignedRowParallelLinearAdaptor, reduce_scatter_to_sequence_parallel_region_adaptor, \ + gather_from_sequence_parallel_region_adaptor + from mindspeed.core.transformer.transformer_config import transformer_config_post_init + from mindspeed.core.transformer.dot_product_attention import dot_product_attention_init_wrapper + from mindspeed.core.transformer.attention import attention_init_wrapper + if getattr(args, self.feature_name, None): + patch_manager.register_patch('megatron.core.tensor_parallel.layers.ColumnParallelLinear', + UnalignedColumnParallelLinearAdaptor) + patch_manager.register_patch('megatron.core.tensor_parallel.layers.RowParallelLinear', + UnalignedRowParallelLinearAdaptor) + + # To adapt to the distribution of MHA attention heads + patch_manager.register_patch('megatron.core.utils.divide', divide_adaptor) + patch_manager.register_patch( + 'megatron.core.models.common.embeddings.rotary_pos_embedding.RotaryEmbedding.get_rotary_seq_len', + get_rotary_seq_len) + patch_manager.register_patch('megatron.core.transformer.transformer_config.TransformerConfig.__post_init__', + transformer_config_post_init) + + # To adapt to the distribution of GQA attention heads + patch_manager.register_patch('megatron.core.transformer.dot_product_attention.DotProductAttention.__init__', + dot_product_attention_init_wrapper) + patch_manager.register_patch('megatron.core.transformer.attention.Attention.__init__', + attention_init_wrapper) + patch_manager.register_patch('megatron.core.tensor_parallel.mappings.gather_from_sequence_parallel_region', + gather_from_sequence_parallel_region_adaptor) + + # To adapt to the sequence parallel feature + patch_manager.register_patch('megatron.core.tensor_parallel.mappings.scatter_to_sequence_parallel_region', + scatter_to_sequence_parallel_region_adaptor) + patch_manager.register_patch('megatron.core.tensor_parallel.mappings.reduce_scatter_to_sequence_parallel_region', + reduce_scatter_to_sequence_parallel_region_adaptor) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/functional/__init__.py b/model/train/yoco_moe/mindspeed/functional/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/functional/profiler/__init__.py b/model/train/yoco_moe/mindspeed/functional/profiler/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/functional/profiler/profiler_info.py b/model/train/yoco_moe/mindspeed/functional/profiler/profiler_info.py new file mode 100644 index 000000000..afd515c51 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/functional/profiler/profiler_info.py @@ -0,0 +1,19 @@ +from functools import wraps +import torch_npu + + +def get_nccl_options_add_group_info_wrapper(get_nccl_options): + @wraps(get_nccl_options) + def wrapper(pg_name, nccl_comm_cfgs): + options = get_nccl_options(pg_name, nccl_comm_cfgs) + if hasattr(torch_npu._C._distributed_c10d.ProcessGroupHCCL.Options, 'hccl_config'): + options = options if options is not None else torch_npu._C._distributed_c10d.ProcessGroupHCCL.Options() + try: + # torch_npu not support inplace update + hccl_config = options.hccl_config + hccl_config.update({'group_name': pg_name}) + options.hccl_config = hccl_config + except TypeError as e: + pass # compatible with old torch_npu version + return options + return wrapper diff --git a/model/train/yoco_moe/mindspeed/initialize.py b/model/train/yoco_moe/mindspeed/initialize.py new file mode 100644 index 000000000..168b17d38 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/initialize.py @@ -0,0 +1,65 @@ +import time +from functools import wraps +import torch +import torch_npu +from megatron.training import get_args +from megatron.training.utils import print_rank_0 +from megatron.training.initialize import _warmup_jit_function +from mindspeed.core.tensor_parallel.ascend_turbo.initialize import initialize_cfg_from_args +from .utils import extend_seed_all + + +def _compile_dependencies(): + if torch.distributed.get_rank() == 0: + start_time = time.time() + print('> compiling dataset index builder ...') + from megatron.core.datasets.utils import compile_helpers + compile_helpers() + print('>>> done with dataset index builder. Compilation time: {:.3f} ' + 'seconds'.format(time.time() - start_time), flush=True) + + +def set_jit_fusion_options_wrapper(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + def _jit_set_nvfuser_enabled(option): + pass + torch._C._jit_set_nvfuser_enabled = _jit_set_nvfuser_enabled + fn(*args, **kwargs) + args = get_args() + if args.jit_compile: + torch_npu.npu.set_compile_mode(jit_compile=True) + + return wrapper + + +def coc_registration_wrapper(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + res = fn(*args, **kwargs) + from mindspeed.core.tensor_parallel.lcal_coc.user_config import initialize_coc_from_cfg + args = get_args() + initialize_coc_from_cfg(args) + return res + + return wrapper + + +def mc2_wrapper(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + res = fn(*args, **kwargs) + args = get_args() + initialize_cfg_from_args(args) + return res + + return wrapper + + +def deter_comp_wrapper(fn): + @wraps(fn) + def wrapper(seed_, data_parallel_random_init=False): + fn(seed_, data_parallel_random_init=False) + extend_seed_all(seed_) + print_rank_0("deterministic computing is applied for npu.") + return wrapper diff --git a/model/train/yoco_moe/mindspeed/megatron_adaptor.py b/model/train/yoco_moe/mindspeed/megatron_adaptor.py new file mode 100644 index 000000000..241a8768b --- /dev/null +++ b/model/train/yoco_moe/mindspeed/megatron_adaptor.py @@ -0,0 +1,1145 @@ +import os +import sys +import shutil +import argparse +import time +from functools import wraps +from multiprocessing import Lock +import torch +from torch.distributed import all_gather_into_tensor, reduce_scatter_tensor +from torch_npu.contrib import transfer_to_npu +from mindspeed.features_manager import FEATURES_LIST +from .arguments import process_args + + +_ARGS = None + + +def add_args(args, key, value): + if key is not None: + key = key[2:].replace('-', '_') + if value is None: + value = True + elif len(value) == 1: + value = value[0] + setattr(args, key, value) + + +def parser_unknown_args(args, unknown): + i = 0 + key = value = None + while i < len(unknown): + if unknown[i].startswith("--"): + add_args(args, key, value) + key = unknown[i] + value = None + else: + if value is None: + value = [unknown[i]] + else: + value.append(unknown[i]) + i += 1 + add_args(args, key, value) + + +def get_mindspeed_args(): + global _ARGS + if _ARGS is None: + parser = argparse.ArgumentParser(description='MindSpeed Arguments', allow_abbrev=False) + _ARGS, unknown = process_args(parser).parse_known_args() + parser_unknown_args(_ARGS, unknown) + return _ARGS + + +def dummy_jit(fn): + def wrapper(*args, **kwargs): + return fn(*args, **kwargs) + + return wrapper + + +def lcm(a, b): + import math + return (a * b) // math.gcd(a, b) + + +def type_wrapper(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + res = fn(*args, **kwargs) + if isinstance(res, str): + res = res.replace('npu', 'cuda') + return res + + return wrapper + + +def version_wrapper(fn): + @wraps(fn) + def wrapper(name, *args, **kwargs): + if name == 'transformer-engine': + return '0.0' + res = fn(name, *args, **kwargs) + return res + + return wrapper + + +# Patch view method to ensure tensor is contiguous before performing view +def ensure_contiguous_wrapper(fn): + def wrapper(tensor, *args, **kwargs): + if not tensor.is_contiguous(): + tensor = tensor.contiguous() + return fn(tensor, *args, **kwargs) + + return wrapper + + +def multi_tensor_applier(op, noop_flag_buffer, tensor_lists, *args): + return op(noop_flag_buffer, tensor_lists, *args) + + +def multi_tensor_l2norm(overflow_buf, tensor_lists, per_parameter): + total_norm = 0.0 + norm_type = 2.0 + ret_per_tensor = [] if per_parameter else None + for grads_for_norm in tensor_lists: + for grad in grads_for_norm: + grad_norm = torch.norm(grad, norm_type) + total_norm += grad_norm ** norm_type + if per_parameter: + ret_per_tensor.append(total_norm.clone()) + if not tensor_lists: + grad_norm = torch.cuda.FloatTensor([0]) + total_norm = grad_norm ** norm_type + return total_norm ** (1 / norm_type), ret_per_tensor + + +def multi_tensor_scale(overflow_buf, tensor_lists, scale): + if len(tensor_lists) != 2: + raise AssertionError('The size of tensor list must be 2, but got {}'.format(len(tensor_lists))) + if len(tensor_lists[0]) != len(tensor_lists[1]): + raise AssertionError('The size of tensor list must be same, but got {} and {}'.format(len(tensor_lists[0]), + len(tensor_lists[1]))) + + with torch.no_grad(): + for i in range(len(tensor_lists[0])): + tensor_lists[1][i].copy_(tensor_lists[0][i] * scale) + + +def te_adaptation(aspm): + aspm.register_patch('torch.compile', torch.jit.script) + # Need replace modules before import megatron + aspm.register_patch('importlib.metadata.version', version_wrapper) + aspm.register_patch('transformer_engine.pytorch.LayerNormLinear', torch.nn.Module, create_dummy=True) + aspm.register_patch('transformer_engine.pytorch.DotProductAttention', torch.nn.Module, create_dummy=True) + aspm.register_patch('transformer_engine.pytorch.Linear', torch.nn.Module, create_dummy=True) + aspm.register_patch('transformer_engine.common.recipe.DelayedScaling', torch.nn.Module, create_dummy=True) + aspm.register_patch('flash_attn.flash_attn_interface.flash_attn_unpadded_func', create_dummy=True) + + +def apex_adaptation(aspm): + from .core.fusions.fused_layer_norm import fused_layer_norm_affine + from .ops.npu_matmul_add import npu_matmul_add_fp32, npu_matmul_add_fp16 + aspm.register_patch('amp_C.multi_tensor_l2norm', multi_tensor_l2norm, create_dummy=True) + aspm.register_patch('amp_C.multi_tensor_scale', multi_tensor_scale, create_dummy=True) + aspm.register_patch('fused_layer_norm_cuda', create_dummy=True) + aspm.register_patch('apex.multi_tensor_apply.multi_tensor_applier', multi_tensor_applier, create_dummy=True) + aspm.register_patch('apex.normalization.fused_layer_norm.fused_layer_norm_affine', fused_layer_norm_affine, + create_dummy=True) + aspm.register_patch('fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp32', npu_matmul_add_fp32, create_dummy=True) + aspm.register_patch('fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp16', npu_matmul_add_fp16, create_dummy=True) + + +def torch_adaptation(aspm): + aspm.register_patch('torch.nn.parameter.Parameter.type', type_wrapper) + aspm.register_patch('torch.Tensor.type', type_wrapper) + aspm.register_patch('torch.Tensor.view', ensure_contiguous_wrapper) + aspm.register_patch('torch.distributed._all_gather_base', all_gather_into_tensor) + aspm.register_patch('torch.distributed._reduce_scatter_base', reduce_scatter_tensor) + # lmc is supported python >=3.9 + if sys.version_info < (3, 9): + aspm.register_patch('math.lcm', lcm, create_dummy=True) + + +def communication_adaptation(aspm, mindspeed_args): + if mindspeed_args.disable_gloo_group: + from mindspeed.optimizer.distrib_optimizer import get_parameter_state_dp_zero_hccl, \ + load_parameter_state_from_dp_zero_hccl + from mindspeed.core.parallel_state import (get_data_parallel_group_gloo_replace, + get_data_modulo_expert_parallel_group_gloo_replace, + new_group_wrapper) + from mindspeed.utils import check_param_hashes_across_dp_replicas_hccl + + aspm.register_patch('megatron.core.optimizer.distrib_optimizer.DistributedOptimizer.get_parameter_state_dp_zero', + get_parameter_state_dp_zero_hccl) + aspm.register_patch('megatron.core.optimizer.distrib_optimizer.DistributedOptimizer.load_parameter_state_from_dp_zero', + load_parameter_state_from_dp_zero_hccl) + aspm.register_patch('megatron.core.utils.check_param_hashes_across_dp_replicas', + check_param_hashes_across_dp_replicas_hccl) + + aspm.register_patch('megatron.core.parallel_state.get_data_parallel_group_gloo', + get_data_parallel_group_gloo_replace) + aspm.register_patch('megatron.core.parallel_state.get_data_modulo_expert_parallel_group_gloo', + get_data_modulo_expert_parallel_group_gloo_replace) + aspm.register_patch('torch.distributed.new_group', new_group_wrapper) + + +def mcore_models_adaptation_l0(aspm): + from .core.models.gpt.gpt_layer_specs import get_gpt_layer_local_spec_wrapper + from .core.parallel_state import get_nccl_options_wrapper + # Replace FusedLayerNorm with MindSpeed's PTNorm operator in get_gpt-layer + aspm.register_patch('megatron.core.models.gpt.gpt_layer_specs.get_gpt_layer_local_spec', + get_gpt_layer_local_spec_wrapper) + aspm.register_patch('megatron.core.parallel_state.get_nccl_options', get_nccl_options_wrapper) + + +def mcore_models_adaptation(aspm, mindspeed_args): + import megatron.core + megatron.core.jit.jit_fuser = dummy_jit + + from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_local_spec + from .core.models.common.embeddings.rotary_pos_embedding import get_pos_emb_on_this_cp_rank, \ + rotary_embedding_init_wrapper + aspm.register_patch('megatron.core.models.common.embeddings.rotary_pos_embedding.get_pos_emb_on_this_cp_rank', + get_pos_emb_on_this_cp_rank) + aspm.register_patch('megatron.core.models.gpt.gpt_layer_specs.get_gpt_layer_with_transformer_engine_spec', + get_gpt_layer_local_spec) + aspm.register_patch('megatron.core.models.common.embeddings.rotary_pos_embedding.RotaryEmbedding.__init__', + rotary_embedding_init_wrapper) + from .core.models.common.embeddings.language_model_embedding import language_model_embedding_forward_wrapper + aspm.register_patch('megatron.core.models.common.embeddings.language_model_embedding.LanguageModelEmbedding.forward', + language_model_embedding_forward_wrapper) + from .core.models.common.embeddings.rotary_pos_embedding import rotary_embedding_get_rotary_seq_len_wrapper + aspm.register_patch('megatron.core.models.common.embeddings.rotary_pos_embedding.RotaryEmbedding.get_rotary_seq_len', + rotary_embedding_get_rotary_seq_len_wrapper) + # Fix DDP scaling factor with Context Parallel + from .core.data_parallel.distributed_data_parallel import distributed_data_parallel_init_with_cp + aspm.register_patch('megatron.core.distributed.distributed_data_parallel.DistributedDataParallel.__init__', + distributed_data_parallel_init_with_cp) + + if not mindspeed_args.automated_pipeline and mindspeed_args.noop_layers: + from .core.transformer.transformer_block import _build_layers + from .core.transformer.moe.moe_utils import track_moe_metrics + from megatron.core.transformer.transformer_block import TransformerBlock + from mindspeed.training import num_floating_point_wrapper + TransformerBlock._build_layers = _build_layers + aspm.register_patch('megatron.training.training.num_floating_point_operations', num_floating_point_wrapper) + aspm.register_patch('megatron.core.transformer.moe.moe_utils.track_moe_metrics', track_moe_metrics) + + + if mindspeed_args.recompute_norm: + from .core.models.gpt.gpt_layer_specs import build_norm_recompute_layer_wrapper + aspm.register_patch('megatron.core.transformer.transformer_block.TransformerBlock._build_layers', build_norm_recompute_layer_wrapper) + + if getattr(mindspeed_args, 'reset_attention_mask', False): + from .core.datasets.gpt_dataset import _get_ltor_masks_and_position_ids, collate_wrapper + from .utils import get_batch_on_this_cp_rank_wrapper + aspm.register_patch('megatron.core.datasets.gpt_dataset._get_ltor_masks_and_position_ids', _get_ltor_masks_and_position_ids) + aspm.register_patch('torch.utils.data._utils.collate.default_collate', collate_wrapper) + aspm.register_patch('megatron.training.utils.get_batch_on_this_cp_rank', get_batch_on_this_cp_rank_wrapper) + + from mindspeed.core.pipeline_parallel.p2p_communication import _p2p_ops_eod + aspm.register_patch('megatron.core.pipeline_parallel.p2p_communication._p2p_ops', _p2p_ops_eod) + from mindspeed.core.models.gpt.gpt_model import gpt_forward_wrapper + aspm.register_patch('megatron.core.models.gpt.gpt_model.GPTModel.forward', gpt_forward_wrapper) + from .core.models.common.embeddings.rotary_pos_embedding import apply_rotary_pos_emb_thd + aspm.register_patch('megatron.core.models.common.embeddings.rotary_pos_embedding.apply_rotary_pos_emb_thd', apply_rotary_pos_emb_thd) + from .core.transformer.attention import attention_forward + aspm.register_patch('megatron.core.transformer.attention.Attention.forward', attention_forward) + + from .core.models.common.embeddings.rotary_pos_embedding import rotary_forward + aspm.register_patch('megatron.core.models.common.embeddings.rotary_pos_embedding.RotaryEmbedding.forward', rotary_forward) + + +def mcore_transformer_adaptation_l0(aspm): + import megatron.core + from .core.transformer.custom_layers.transformer_engine import PTNorm + from .core.transformer.dot_product_attention import dot_product_attention_forward_wrapper, \ + dot_product_attention_init + megatron.core.transformer.transformer_block.LayerNormImpl = PTNorm + aspm.register_patch('megatron.core.transformer.custom_layers.transformer_engine.TENorm', PTNorm) + # Add cp parameters to dot_deduct_mattention init, and add fusion attention support for alibi in non cp situations + aspm.register_patch('megatron.core.transformer.dot_product_attention.DotProductAttention.__init__', + dot_product_attention_init) + aspm.register_patch('megatron.core.transformer.dot_product_attention.DotProductAttention.forward', + dot_product_attention_forward_wrapper) + + +def mcore_transformer_adaptation(aspm, args): + from .core.transformer.module import megatron_module_init_wrapper + from .core.transformer.attention import (attention_init, SelfAttentionSubmodules, + self_attention_init_wrapper, attention_forward_wrapper) + from .core.transformer.transformer_block import transformer_block_checkpointed_forward_wrapper + from .core.transformer.transformer import parallel_transformer_layer_init_wrapper + from .core.transformer.transformer import core_mlp_forward_wrapper + from .core.transformer.mlp import mlp_init_2d_wrapper + from .core.transformer.transformer_block import transformer_block_forward_wrapper + aspm.register_patch('megatron.core.transformer.attention.SelfAttentionSubmodules', SelfAttentionSubmodules) + aspm.register_patch('megatron.core.transformer.attention.SelfAttention.__init__', self_attention_init_wrapper) + aspm.register_patch("megatron.core.transformer.attention.Attention.forward", attention_forward_wrapper) + aspm.register_patch('megatron.core.transformer.attention.Attention.__init__', attention_init) + aspm.register_patch('megatron.core.transformer.module.MegatronModule.__init__', megatron_module_init_wrapper) + aspm.register_patch('megatron.core.transformer.transformer_block.TransformerBlock._checkpointed_forward', + transformer_block_checkpointed_forward_wrapper) + aspm.register_patch('megatron.core.transformer.transformer_layer.TransformerLayer.__init__', + parallel_transformer_layer_init_wrapper) + aspm.register_patch('megatron.core.transformer.mlp.MLP.forward', + core_mlp_forward_wrapper) + aspm.register_patch('megatron.core.transformer.mlp.MLP.__init__', mlp_init_2d_wrapper) + aspm.register_patch('megatron.core.transformer.transformer_block.TransformerBlock.forward', + transformer_block_forward_wrapper) + if hasattr(args, "multi_head_latent_attention") and args.multi_head_latent_attention: + from mindspeed.core.transformer.attention import self_attention_init_mla_wrapper + aspm.register_patch('megatron.core.transformer.attention.SelfAttention.__init__', self_attention_init_mla_wrapper) + + +def mcore_parallel_state_adaptation(aspm): + from .core.parallel_state import initialize_model_parallel_wrapper + from .core.parallel_state import destroy_model_parallel_wrapper + from .core.memory.auto_pipeline.autopipeline_solver import destroy_model_parallel_profiling_wrapper + from .core.parallel_state import get_context_parallel_group_for_send_recv_overlap + aspm.register_patch('megatron.core.parallel_state.initialize_model_parallel', + initialize_model_parallel_wrapper) + aspm.register_patch('megatron.core.parallel_state.destroy_model_parallel', + destroy_model_parallel_wrapper) + aspm.register_patch('megatron.core.parallel_state.destroy_model_parallel', + destroy_model_parallel_profiling_wrapper) + aspm.register_patch('megatron.core.parallel_state.get_context_parallel_group_for_send_recv_overlap', + get_context_parallel_group_for_send_recv_overlap) + + +def mcore_fusions_adaptation(aspm, args): + from .core.fusions.fused_bias_swiglu import SwiGLUFunction, BiasSwiGLUFunction + from .core.fusions.fused_layer_norm import FusedLayerNormAffineFunction, FastLayerNormFN + from .core.fusions.fused_softmax import is_kernel_available, ScaledUpperTriangMaskedSoftmax, ScaledMaskedSoftmax, \ + ScaledSoftmax, forward_fused_softmax + from .core.models.common.embeddings.rotary_pos_embedding import apply_rotary_pos_emb_bshd + aspm.register_patch('megatron.core.fusions.fused_layer_norm.FusedLayerNormAffineFunction', + FusedLayerNormAffineFunction) + aspm.register_patch('megatron.core.fusions.fused_layer_norm.FastLayerNormFN', FastLayerNormFN) + aspm.register_patch('megatron.core.fusions.fused_softmax.ScaledUpperTriangMaskedSoftmax', + ScaledUpperTriangMaskedSoftmax) + aspm.register_patch('megatron.core.fusions.fused_softmax.ScaledMaskedSoftmax', ScaledMaskedSoftmax) + aspm.register_patch('megatron.core.fusions.fused_softmax.ScaledSoftmax', ScaledSoftmax) + aspm.register_patch('megatron.core.fusions.fused_softmax.FusedScaleMaskSoftmax.is_kernel_available', + is_kernel_available) + aspm.register_patch('megatron.core.fusions.fused_softmax.FusedScaleMaskSoftmax.forward_fused_softmax', + forward_fused_softmax) + aspm.register_patch('megatron.core.fusions.fused_bias_swiglu.SwiGLUFunction', SwiGLUFunction) + aspm.register_patch('megatron.core.fusions.fused_bias_swiglu.BiasSwiGLUFunction', BiasSwiGLUFunction) + + aspm.register_patch('megatron.core.models.common.embeddings.rotary_pos_embedding.apply_rotary_pos_emb_bshd', + apply_rotary_pos_emb_bshd) + if hasattr(args, 'use_fused_moe_token_permute_and_unpermute') and args.use_fused_moe_token_permute_and_unpermute: + from .core.fusions.npu_moe_token_permute import permute_wrapper + from .core.fusions.npu_moe_token_unpermute import unpermute_wrapper + aspm.register_patch('megatron.core.transformer.moe.moe_utils.permute', permute_wrapper) + aspm.register_patch('megatron.core.transformer.moe.moe_utils.unpermute', unpermute_wrapper) + if args.npu_deterministic: + from mindspeed.initialize import deter_comp_wrapper + aspm.register_patch('megatron.training.initialize._set_random_seed', deter_comp_wrapper) + + +def mcore_optimizer_adapation(aspm, mindspeed_args): + from .optimizer.distrib_optimizer import reuse_fp32_param_distrib_optimizer_init_wrapper + from .optimizer.optimizer import (step_with_ready_grads, prepare_grads, + reuse_fp32_param_init_wrapper, optimizer_config_init_wrapper) + from .core.distributed.param_and_grad_buffer import reuse_fp32_param_param_and_grad_buffer_init_wrapper + # optim relative. + aspm.register_patch('megatron.core.optimizer.optimizer.MixedPrecisionOptimizer.prepare_grads', + prepare_grads) + aspm.register_patch('megatron.core.optimizer.optimizer.MixedPrecisionOptimizer.step_with_ready_grads', + step_with_ready_grads) + aspm.register_patch('megatron.core.optimizer.optimizer.Float16OptimizerWithFloat16Params.__init__', + reuse_fp32_param_init_wrapper) + aspm.register_patch('megatron.core.optimizer.optimizer_config.OptimizerConfig.__init__', + optimizer_config_init_wrapper) + aspm.register_patch('megatron.core.optimizer.distrib_optimizer.DistributedOptimizer.__init__', + reuse_fp32_param_distrib_optimizer_init_wrapper) + aspm.register_patch('megatron.core.distributed.ParamAndGradBuffer.__init__', + reuse_fp32_param_param_and_grad_buffer_init_wrapper) + + if mindspeed_args.param_and_grad_buffer_pad: + from .core.distributed.param_and_grad_buffer import param_and_grad_buffer_init_pad + aspm.register_patch('megatron.core.distributed.ParamAndGradBuffer.__init__', + param_and_grad_buffer_init_pad) + + +def mcore_pipeline_parallel_adaptation(aspm, mindspeed_args): + from .core.pipeline_parallel.schedules import get_tensor_shapes_wrapper, get_forward_backward_func_wrapper + from .core.performance.auto_pipeline_perf.schedules import get_forward_backward_func_decorator, \ + backward_step_decorator, forward_step_decorator + + aspm.register_patch('megatron.core.pipeline_parallel.schedules.get_forward_backward_func', + get_forward_backward_func_wrapper) + aspm.register_patch('megatron.core.pipeline_parallel.schedules.get_forward_backward_func', + get_forward_backward_func_decorator) + aspm.register_patch('megatron.core.pipeline_parallel.schedules.backward_step', + backward_step_decorator) + aspm.register_patch('megatron.core.pipeline_parallel.schedules.forward_step', + forward_step_decorator) + aspm.register_patch('megatron.core.pipeline_parallel.schedules.get_tensor_shapes', + get_tensor_shapes_wrapper) + if mindspeed_args.optimize_vpp_send_recv_comm: + from .core.pipeline_parallel.p2p_communication import _p2p_ops_send_recv_overlap + aspm.register_patch('megatron.core.pipeline_parallel.p2p_communication._p2p_ops', + _p2p_ops_send_recv_overlap) + if mindspeed_args.variable_seq_lengths: + from .core.pipeline_parallel.p2p_communication import _communicate_shapes, _communicate + aspm.register_patch('megatron.core.pipeline_parallel.p2p_communication._communicate', + _communicate) + aspm.register_patch('megatron.core.pipeline_parallel.p2p_communication._communicate_shapes', + _communicate_shapes) + + +def mcore_multiparam_pipeline_parallel_adaptation(aspm, mindspeed_args): + if mindspeed_args.use_multiparameter_pipeline_model_parallel: + from .core.pipeline_parallel.multiparameter_schedules import get_tensor_shapes_wrapper, forward_step_wrapper, \ + recv_forward_wrapper, recv_backward_wrapper, send_forward_wrapper, send_backward_wrapper, \ + send_forward_recv_backward_wrapper, send_backward_recv_forward_wrapper, backward_step_wrapper + + aspm.register_patch('megatron.core.pipeline_parallel.schedules.get_tensor_shapes', + get_tensor_shapes_wrapper) + aspm.register_patch('megatron.core.pipeline_parallel.schedules.forward_step', + forward_step_wrapper) + aspm.register_patch('megatron.core.pipeline_parallel.schedules.backward_step', + backward_step_wrapper) + aspm.register_patch('megatron.core.pipeline_parallel.schedules.recv_forward', + recv_forward_wrapper) + aspm.register_patch('megatron.core.pipeline_parallel.schedules.recv_backward', + recv_backward_wrapper) + aspm.register_patch('megatron.core.pipeline_parallel.schedules.send_forward', + send_forward_wrapper) + aspm.register_patch('megatron.core.pipeline_parallel.schedules.send_backward', + send_backward_wrapper) + aspm.register_patch('megatron.core.pipeline_parallel.schedules.send_forward_recv_backward', + send_forward_recv_backward_wrapper) + aspm.register_patch('megatron.core.pipeline_parallel.schedules.send_backward_recv_forward', + send_backward_recv_forward_wrapper) + + +def mcore_tensor_parallel_adaptation_l0(aspm): + from .core.tensor_parallel.random import _set_cuda_rng_state + aspm.register_patch('megatron.core.tensor_parallel.random._set_cuda_rng_state', _set_cuda_rng_state) + + +def mcore_tensor_parallel_adaptation_l1(aspm): + from .core.tensor_parallel.cross_entropy import calculate_predicted_logits + # use logical negation followed by multiplication to achieve the same effect as setting selected elements to zero + aspm.register_patch('megatron.core.tensor_parallel.cross_entropy.VocabParallelCrossEntropy.calculate_predicted_logits', + calculate_predicted_logits) + + +def mcore_tensor_parallel_adaptation(aspm, args): + from .core.tensor_parallel.random import checkpoint_wrapper + from .core.tensor_parallel.random import checkpoint_function_backward + from .core.tensor_parallel.layers import vocab_parallel_embedding_forward + from .core.tensor_parallel.layers import row_parallel_nocomm_optimizer_wrapper + from .core.tensor_parallel.layers import parallel_linear_init_wrapper + + def has_recomputation_or_swap(args): + return (args.swap_attention or + args.recompute_in_bubble or + args.adaptive_recompute_device_swap or + args.recompute_in_advance or + args.adaptive_memory_optimization) + + aspm.register_patch('megatron.core.tensor_parallel.random.CheckpointFunction.backward', + checkpoint_function_backward) + aspm.register_patch('megatron.core.tensor_parallel.layers.VocabParallelEmbedding.forward', + vocab_parallel_embedding_forward) + aspm.register_patch('megatron.core.tensor_parallel.layers.RowParallelLinear.forward', + row_parallel_nocomm_optimizer_wrapper) + aspm.register_patch('megatron.core.tensor_parallel.layers.RowParallelLinear.__init__', + parallel_linear_init_wrapper) + aspm.register_patch('megatron.core.tensor_parallel.layers.ColumnParallelLinear.__init__', + parallel_linear_init_wrapper) + aspm.register_patch('megatron.core.tensor_parallel.random.checkpoint', checkpoint_wrapper) + if has_recomputation_or_swap(args): + from .core.tensor_parallel.layers import linear_forward_main_grad_wrapper, linear_backward_main_grad_wrapper + aspm.register_patch('megatron.core.tensor_parallel.layers.LinearWithGradAccumulationAndAsyncCommunication.forward', + linear_forward_main_grad_wrapper) + aspm.register_patch('megatron.core.tensor_parallel.layers.LinearWithGradAccumulationAndAsyncCommunication.backward', + linear_backward_main_grad_wrapper) + + +def megatron_legacy_adaptation(aspm): + from .model.language_model import parallel_lm_logits, embedding_forward_wrapper + from .core.performance.auto_pipeline_perf.data_samplers import build_pretraining_data_loader_decorator + from .core.performance.auto_pipeline_perf.transformer import get_attention_mask_wrapper + aspm.register_patch('mindspeed.model.transformer.get_attention_mask', get_attention_mask_wrapper) + aspm.register_patch('megatron.legacy.data.data_samplers.build_pretraining_data_loader', + build_pretraining_data_loader_decorator) + aspm.register_patch('megatron.legacy.model.language_model.parallel_lm_logits', parallel_lm_logits) + aspm.register_patch('megatron.legacy.model.language_model.Embedding.forward', embedding_forward_wrapper) + + +def legacy_model_fusions_adaptation(aspm): + from .core.fusions.fused_layer_norm import FusedLayerNormAffineFunction, FastLayerNormFN, fused_layer_norm_affine + from .core.fusions.fused_softmax import is_kernel_available, ScaledUpperTriangMaskedSoftmax, ScaledMaskedSoftmax, \ + ScaledSoftmax, forward_fused_softmax + aspm.register_patch('megatron.legacy.model.fused_layer_norm.FusedLayerNormAffineFunction', + FusedLayerNormAffineFunction) + aspm.register_patch('megatron.legacy.model.fused_layer_norm.FastLayerNormFN', FastLayerNormFN) + aspm.register_patch('megatron.legacy.model.fused_layer_norm.fused_layer_norm_affine', fused_layer_norm_affine) + aspm.register_patch('megatron.legacy.model.fused_softmax.ScaledUpperTriangMaskedSoftmax', + ScaledUpperTriangMaskedSoftmax) + aspm.register_patch('megatron.legacy.model.fused_softmax.ScaledMaskedSoftmax', ScaledMaskedSoftmax) + aspm.register_patch('megatron.legacy.model.fused_softmax.ScaledSoftmax', ScaledSoftmax) + aspm.register_patch('megatron.legacy.model.fused_softmax.FusedScaleMaskSoftmax.is_kernel_available', + is_kernel_available) + aspm.register_patch('megatron.legacy.model.fused_softmax.FusedScaleMaskSoftmax.forward_fused_softmax', + forward_fused_softmax) + + +def legacy_model_rms_norm_adaptation(aspm): + from .core.fusions.rms_norm import rms_norm_init_wrapper, rms_norm_forward_wrapper, rms_norm_norm_wrapper + aspm.register_patch('megatron.legacy.model.rms_norm.RMSNorm.__init__', rms_norm_init_wrapper) + aspm.register_patch('megatron.legacy.model.rms_norm.RMSNorm.forward', rms_norm_forward_wrapper) + aspm.register_patch('megatron.legacy.model.rms_norm.RMSNorm._norm', rms_norm_norm_wrapper) + + +def legacy_model_transformer_l0(aspm): + from .model.transformer import parallel_mlp_init_wrapper, flash_self_attention_forward, \ + flash_self_attention_init_wrapper, parallel_transformer_forward_wrapper, flash_self_attention_init_add_config_wrapper + from .model.transformer import parallel_attention_init, parallel_attention_forward + aspm.register_patch('megatron.legacy.model.transformer.ParallelTransformer.forward', + parallel_transformer_forward_wrapper) + aspm.register_patch('megatron.legacy.model.transformer.ParallelMLP.__init__', parallel_mlp_init_wrapper) + aspm.register_patch('megatron.legacy.model.transformer.FlashSelfAttention.forward', flash_self_attention_forward) + aspm.register_patch('megatron.legacy.model.transformer.FlashSelfAttention.__init__', + flash_self_attention_init_add_config_wrapper) + aspm.register_patch('megatron.legacy.model.transformer.FlashSelfAttention.__init__', + flash_self_attention_init_wrapper) + aspm.register_patch('megatron.legacy.model.transformer.ParallelAttention.__init__', parallel_attention_init) + aspm.register_patch('megatron.legacy.model.transformer.ParallelAttention.forward', + parallel_attention_forward) + + +def legacy_model_transformer(aspm, args): + from .model.transformer import parallel_mlp_forward, parallel_transformer_init_wrapper, \ + parallel_transformer_init + from .model.transformer import core_attention_init_wrapper, core_attention_forward + from .core.transformer.transformer import parallel_transformer_checkpointed_forward_wrapper + from .model.transformer import switch_mlp_init_wrapper, switch_mlp_forward_wrapper, \ + parallel_transformer_layer_init_wrapper + if not args.automated_pipeline and args.noop_layers: + aspm.register_patch('megatron.legacy.model.transformer.ParallelTransformer.__init__', parallel_transformer_init) + aspm.register_patch('megatron.legacy.model.transformer.ParallelTransformer.__init__', + parallel_transformer_init_wrapper) + aspm.register_patch('megatron.legacy.model.transformer.ParallelMLP.forward', parallel_mlp_forward) + aspm.register_patch('megatron.legacy.model.transformer.CoreAttention.__init__', core_attention_init_wrapper) + aspm.register_patch('megatron.legacy.model.transformer.CoreAttention.forward', core_attention_forward) + aspm.register_patch('megatron.legacy.model.transformer.ParallelTransformer._checkpointed_forward', + parallel_transformer_checkpointed_forward_wrapper) + aspm.register_patch('megatron.legacy.model.transformer.SwitchMLP.__init__', switch_mlp_init_wrapper) + aspm.register_patch('megatron.legacy.model.transformer.SwitchMLP.forward', switch_mlp_forward_wrapper) + aspm.register_patch('megatron.legacy.model.transformer.ParallelTransformerLayer.__init__', + parallel_transformer_layer_init_wrapper) + + +def megatron_training_adaptation_l0(aspm): + from .initialize import _compile_dependencies, set_jit_fusion_options_wrapper + from .utils import get_batch_on_this_cp_rank + from .training import pretrain, get_device_wrapper + from .arguments import parse_args_wrapper, validate_args_wrapper, core_transformer_config_from_args_wrapper + from .yaml_arguments import core_transformer_config_from_yaml_wrapper, print_args_wrapper + + from .core.training import train_decorator, train_step_decorator + from .core.transformer.transformer_config import transformer_config_post_init_wrapper + aspm.register_patch('megatron.training.training.train', train_decorator) + aspm.register_patch('megatron.training.training.train_step', train_step_decorator) + aspm.register_patch('megatron.training.yaml_arguments.core_transformer_config_from_yaml', + core_transformer_config_from_yaml_wrapper) + aspm.register_patch('megatron.training.initialize._compile_dependencies', _compile_dependencies) + aspm.register_patch('megatron.training.utils.get_batch_on_this_cp_rank', get_batch_on_this_cp_rank) + aspm.register_patch('megatron.training.arguments.parse_args', parse_args_wrapper) + aspm.register_patch('megatron.training.arguments.validate_args', validate_args_wrapper) + aspm.register_patch('megatron.training.arguments._print_args', print_args_wrapper) + aspm.register_patch('megatron.training.yaml_arguments.validate_yaml', validate_args_wrapper) + aspm.register_patch('megatron.training.yaml_arguments._print_args', print_args_wrapper) + aspm.register_patch('megatron.training.arguments.core_transformer_config_from_args', + core_transformer_config_from_args_wrapper) + aspm.register_patch('megatron.training.initialize.set_jit_fusion_options', set_jit_fusion_options_wrapper) + aspm.register_patch('megatron.training.training.pretrain', pretrain) + aspm.register_patch('megatron.core.transformer.transformer_config.TransformerConfig.__post_init__', + transformer_config_post_init_wrapper) + aspm.register_patch('megatron.training.dist_signal_handler.get_device', get_device_wrapper) + + +def megatron_training_adaptation(aspm, mindspeed_args): + from .core.performance.auto_pipeline_perf.global_vars import get_num_microbatches_wrapper + from .core.training import training_log + from .utils import get_batch_on_this_tp_rank + from .tokenizer import build_tokenizer_wrapper + from .core.training import pretrain_decorator, setup_model_and_optimizer_decorator + aspm.register_patch('megatron.core.num_microbatches_calculator.get_num_microbatches', get_num_microbatches_wrapper) + aspm.register_patch('megatron.training.training.pretrain', pretrain_decorator) + aspm.register_patch('megatron.training.training.setup_model_and_optimizer', setup_model_and_optimizer_decorator) + aspm.register_patch('megatron.training.utils.get_batch_on_this_tp_rank', get_batch_on_this_tp_rank) + if mindspeed_args.op_cal_tflops: + aspm.register_patch('megatron.training.training.training_log', training_log) + aspm.register_patch('megatron.training.tokenizer.tokenizer.build_tokenizer', build_tokenizer_wrapper) + + +def megatron_training_ema_adaptation(aspm, mindspeed_args): + if mindspeed_args.optimizer_selection == 'fused_ema_adamw': + from .checkpointing import generate_state_dict_ema_wrapper, save_checkpoint_ema_wrapper + from .optimizer.distrib_optimizer import ema_distrib_optimizer_init_wrapper + aspm.register_patch('megatron.training.checkpointing.save_checkpoint', save_checkpoint_ema_wrapper) + aspm.register_patch('megatron.training.checkpointing.generate_state_dict', generate_state_dict_ema_wrapper) + aspm.register_patch('megatron.core.optimizer.distrib_optimizer.DistributedOptimizer.__init__', + ema_distrib_optimizer_init_wrapper) + if hasattr(mindspeed_args, "ema_decay"): + from .optimizer.optimizer import get_megatron_optimizer_func_wrapper + aspm.register_patch('megatron.core.optimizer.get_megatron_optimizer', + get_megatron_optimizer_func_wrapper) + elif mindspeed_args.use_ema: + from .training import pretrain, train_step + from .checkpointing import save_checkpoint, _load_base_checkpoint + aspm.register_patch('megatron.training.training.train_step', train_step) + aspm.register_patch('megatron.training.checkpointing.save_checkpoint', save_checkpoint) + aspm.register_patch('megatron.training.checkpointing._load_base_checkpoint', _load_base_checkpoint) + + +def memory_fragmentation_adaptation(aspm, args): + from megatron.legacy.model.transformer import ParallelTransformerLayer + if args.memory_fragmentation: + from .core.memory.memory_fragmentation.pluggable_allocator_adpator import change_allocator + time.sleep(10) + change_allocator() + + from .core.memory.memory_fragmentation.memory_recorder import memory_recorder_wrapper + aspm.register_patch('megatron.training.training.setup_model_and_optimizer', memory_recorder_wrapper) + + from .core.memory.memory_fragmentation.malloc_recorder import malloc_recorder_wrapper + aspm.register_patch('megatron.training.training.train_step', malloc_recorder_wrapper) + + from .core.memory.memory_fragmentation.optimizer_init_precise import optimizer_init_wrapper + aspm.register_patch('megatron.core.optimizer.optimizer.MixedPrecisionOptimizer.step', optimizer_init_wrapper) + + from .core.memory.adaptive_recomputing.adaptive_recompute import allowed_recomputing_module_wrapper + allowed_recomputing_module_wrapper(ParallelTransformerLayer) + from .core.memory.adaptive_recomputing.adaptive_recompute import setup_model_and_optimizer_wrapper + aspm.register_patch('megatron.training.training.setup_model_and_optimizer', setup_model_and_optimizer_wrapper) + if (args.adaptive_recompute_enable and not args.memory_fragmentation) or args.swap_attention: + from .core.memory.adaptive_recomputing.adaptive_recompute import allowed_recomputing_module_wrapper + if hasattr(args, "use_legacy_models") and not args.use_legacy_models: + from megatron.core.transformer.transformer_layer import TransformerLayer + allowed_recomputing_module_wrapper(TransformerLayer) + else: + allowed_recomputing_module_wrapper(ParallelTransformerLayer) + from .core.memory.adaptive_recomputing.adaptive_recompute import setup_model_and_optimizer_wrapper + aspm.register_patch('megatron.training.training.setup_model_and_optimizer', setup_model_and_optimizer_wrapper) + if args.smart_swap and (not args.memory_fragmentation and not args.adaptive_recompute_enable): + from .core.memory.smart_swap.swap_adaptor import change_allocator + time.sleep(10) + change_allocator() + from .core.memory.smart_swap.swap_megatron_adaptor import train_step_wrapper + aspm.register_patch('megatron.training.training.train_step', train_step_wrapper) + if args.adaptive_memory_optimization and not (args.adaptive_recompute_enable or args.memory_fragmentation or args.swap_attention or args.smart_swap): + from .core.memory.adaptive_memory.adaptive_memory_opt import addup_allowed_mem_adapt_module + if hasattr(args, "use_legacy_models") and args.use_legacy_models: + addup_allowed_mem_adapt_module(ParallelTransformerLayer) + else: + from megatron.core.transformer.transformer_layer import TransformerLayer + addup_allowed_mem_adapt_module(TransformerLayer) + from .core.memory.adaptive_memory.adaptive_memory_opt import setup_adapt_memory_optimizer_wrapper + aspm.register_patch('megatron.training.training.setup_model_and_optimizer', setup_adapt_memory_optimizer_wrapper) + from .core.memory.adaptive_recomputing.pluggable_allocator_adpator import change_allocator + time.sleep(10) + change_allocator() + + if os.getenv('OOTB_OPTIMIZER_PROFILING', 'FALSE') == 'TRUE': + print(f"OOTB_OPTIMIZER_PROFILING success open") + from .core.memory.adaptive_recomputing.pluggable_allocator_adpator import change_allocator + import megatron.training + from mindspeed.auto_tuning.module.parse.recompute_parser import allowed_recompute_parser_module_wrapper + allowed_recompute_parser_module_wrapper(megatron.legacy.model.transformer.ParallelTransformerLayer) + from mindspeed.auto_tuning.module.parse.recompute_parser import setup_model_and_optimizer_decorator + aspm.register_patch('megatron.training.training.setup_model_and_optimizer', setup_model_and_optimizer_decorator) + print(f"setup_model_and_optimizer_decorator success") + + if args.adaptive_recompute_enable or args.memory_fragmentation: + import megatron.training.initialize + aspm.register_patch('megatron.training.initialize_megatron', megatron.training.initialize.initialize_megatron) + + +def mcore_moe_adaptation_l0(pm): + from .core.transformer.moe.grouped_gemm_util import Ops, grouped_gemm_is_available, get_device_capability, \ + assert_grouped_gemm_is_available + pm.register_patch('megatron.core.transformer.moe.grouped_gemm_util.ops', Ops) + pm.register_patch('megatron.core.transformer.moe.grouped_gemm_util.grouped_gemm_is_available', + grouped_gemm_is_available) + pm.register_patch('megatron.core.transformer.moe.grouped_gemm_util.assert_grouped_gemm_is_available', + assert_grouped_gemm_is_available) + pm.register_patch('torch.cuda.get_device_capability', get_device_capability) + + +def mcore_moe_adaptation(pm, args): + from .core.pipeline_parallel.schedules import forward_step + pm.register_patch('megatron.core.pipeline_parallel.schedules.forward_step', + forward_step) + if args.moe_permutation_async_comm: + if hasattr(args, 'moe_token_dispatcher_type') and args.moe_token_dispatcher_type == 'alltoall': + from .core.transformer.moe.experts import sequential_mlp_forward + from .core.transformer.moe.moe_utils import permute, unpermute + if args.moe_tp_extend_ep: + from .core.transformer.moe.token_dispatcher import ( + preprocess_tp_extend_ep, alltoall_token_unpermutation_tp_extend_ep, + alltoall_token_permutation_tp_extend_ep + ) + from .core.transformer.moe.router import routing_tp_extend_ep + from .core.transformer.moe.moe_layer import base_moe_init_wrapper + pm.register_patch('megatron.core.transformer.moe.moe_layer.BaseMoELayer.__init__', + base_moe_init_wrapper) + pm.register_patch( + 'megatron.core.transformer.moe.token_dispatcher.MoEAlltoAllTokenDispatcher.preprocess', + preprocess_tp_extend_ep) + pm.register_patch('megatron.core.transformer.moe.router.TopKRouter.routing', routing_tp_extend_ep) + + if args.moe_alltoall_overlap_comm: + from .core.transformer.moe.token_dispatcher import alltoall_token_permutation_new, \ + alltoall_token_unpermutation_new + from .core.transformer.moe.experts import group_mlp_forward + from .core.transformer.mlp import mlp_init + from .core.transformer.moe.moe_layer import moe_layer_init + pm.register_patch('megatron.core.transformer.mlp.MLP.__init__', mlp_init) + pm.register_patch('megatron.core.transformer.moe.experts.GroupedMLP.forward', group_mlp_forward) + pm.register_patch( + 'megatron.core.transformer.moe.token_dispatcher.MoEAlltoAllTokenDispatcher.token_permutation', + alltoall_token_permutation_new) + pm.register_patch( + 'megatron.core.transformer.moe.token_dispatcher.MoEAlltoAllTokenDispatcher.token_unpermutation', + alltoall_token_unpermutation_new) + pm.register_patch('megatron.core.transformer.moe.moe_layer.MoELayer.__init__', moe_layer_init) + else: + pm.register_patch('megatron.core.transformer.moe.token_dispatcher.MoEAlltoAllTokenDispatcher.token_permutation', + alltoall_token_permutation_tp_extend_ep) + pm.register_patch('megatron.core.transformer.moe.token_dispatcher.MoEAlltoAllTokenDispatcher.token_unpermutation', + alltoall_token_unpermutation_tp_extend_ep) + else: + from .core.transformer.moe.token_dispatcher import preprocess, alltoall_token_permutation, \ + alltoall_token_unpermutation_with_bmm + pm.register_patch('megatron.core.transformer.moe.token_dispatcher.MoEAlltoAllTokenDispatcher.preprocess', + preprocess) + if args.moe_alltoall_overlap_comm: + from .core.transformer.moe.token_dispatcher import alltoall_token_permutation_new, \ + alltoall_token_unpermutation_new + from .core.transformer.moe.experts import group_mlp_forward + from .core.transformer.mlp import mlp_init + from .core.transformer.moe.moe_layer import moe_layer_init + pm.register_patch('megatron.core.transformer.mlp.MLP.__init__', mlp_init) + pm.register_patch('megatron.core.transformer.moe.experts.GroupedMLP.forward', group_mlp_forward) + pm.register_patch( + 'megatron.core.transformer.moe.token_dispatcher.MoEAlltoAllTokenDispatcher.token_permutation', + alltoall_token_permutation_new) + pm.register_patch( + 'megatron.core.transformer.moe.token_dispatcher.MoEAlltoAllTokenDispatcher.token_unpermutation', + alltoall_token_unpermutation_new) + pm.register_patch('megatron.core.transformer.moe.moe_layer.MoELayer.__init__', moe_layer_init) + else: + pm.register_patch('megatron.core.transformer.moe.token_dispatcher.MoEAlltoAllTokenDispatcher.token_permutation', + alltoall_token_permutation) + if args.moe_bmm_mc2: + pm.register_patch( + 'megatron.core.transformer.moe.token_dispatcher.MoEAlltoAllTokenDispatcher.token_unpermutation', + alltoall_token_unpermutation_with_bmm) + pm.register_patch('megatron.core.transformer.moe.experts.SequentialMLP.forward', sequential_mlp_forward) + pm.register_patch('megatron.core.transformer.moe.moe_utils.permute', permute) + pm.register_patch('megatron.core.transformer.moe.moe_utils.unpermute', unpermute) + else: + from .core.transformer.moe.router import aux_loss_load_balancing + pm.register_patch('megatron.core.transformer.moe.router.TopKRouter.aux_loss_load_balancing', aux_loss_load_balancing) + + if args.moe_tp_extend_ep: + from .core.transformer.moe.moe_layer import base_moe_init_wrapper + pm.register_patch('megatron.core.transformer.moe.moe_layer.BaseMoELayer.__init__', base_moe_init_wrapper) + + if args.moe_allgather_overlap_comm: + from .core.transformer.moe.token_dispatcher import (allgather_token_permutation_new, + allgather_token_unpermutation_new) + from .core.transformer.moe.experts import group_mlp_forward + from .core.transformer.mlp import mlp_init + pm.register_patch('megatron.core.transformer.mlp.MLP.__init__', mlp_init) + pm.register_patch('megatron.core.transformer.moe.experts.GroupedMLP.forward', group_mlp_forward) + pm.register_patch( + 'megatron.core.transformer.moe.token_dispatcher.MoEAllGatherTokenDispatcher.token_permutation', + allgather_token_permutation_new) + pm.register_patch( + 'megatron.core.transformer.moe.token_dispatcher.MoEAllGatherTokenDispatcher.token_unpermutation', + allgather_token_unpermutation_new) + else: + from .core.transformer.moe.token_dispatcher import (allgather_token_permutation, + allgather_token_unpermutation) + pm.register_patch( + 'megatron.core.transformer.moe.token_dispatcher.MoEAllGatherTokenDispatcher.token_permutation', + allgather_token_permutation) + pm.register_patch( + 'megatron.core.transformer.moe.token_dispatcher.MoEAllGatherTokenDispatcher.token_unpermutation', + allgather_token_unpermutation) + + from .core.transformer.moe.moe_layer import moe_layer_init_wrapper + pm.register_patch('megatron.core.transformer.moe.moe_layer.MoELayer.__init__', moe_layer_init_wrapper) + else: + if hasattr(args, 'moe_token_dispatcher_type') and args.moe_token_dispatcher_type == 'alltoall': + from .core.transformer.moe.token_dispatcher import alltoall_preprocess_npu, \ + alltoall_token_unpermutation_with_bmm, alltoall_token_permutation_with_bmm + pm.register_patch('megatron.core.transformer.moe.token_dispatcher.MoEAlltoAllTokenDispatcher.preprocess', + alltoall_preprocess_npu) + if args.moe_bmm_mc2: + pm.register_patch( + 'megatron.core.transformer.moe.token_dispatcher.MoEAlltoAllTokenDispatcher.token_permutation', + alltoall_token_permutation_with_bmm) + pm.register_patch( + 'megatron.core.transformer.moe.token_dispatcher.MoEAlltoAllTokenDispatcher.token_unpermutation', + alltoall_token_unpermutation_with_bmm) + else: + from .core.transformer.moe.token_dispatcher import allgather_token_permutation_npu + pm.register_patch('megatron.core.transformer.moe.token_dispatcher.MoEAllGatherTokenDispatcher.token_permutation', allgather_token_permutation_npu) + + from .core.transformer.moe.experts import groupedmlp_init_wrapper, groupedmlp_forward + pm.register_patch('megatron.core.transformer.moe.experts.GroupedMLP.__init__', groupedmlp_init_wrapper) + if not args.moe_alltoall_overlap_comm and not args.moe_allgather_overlap_comm: + pm.register_patch('megatron.core.transformer.moe.experts.GroupedMLP.forward', groupedmlp_forward) + + if args.use_ascend_mc2 and not hasattr(args, 'moe_grouped_gemm'): + # MoE MLP not use mc2 linear + from .core.models.gpt.gpt_layer_specs import build_layers_wrapper + from megatron.core.tensor_parallel import ColumnParallelLinear, RowParallelLinear + from megatron.core.transformer.transformer_block import TransformerBlock + TransformerBlock._build_layers = build_layers_wrapper(TransformerBlock._build_layers, + ColumnParallelLinear.forward, + RowParallelLinear.forward) + + +def deepspeed_moe_adaptation(pm, args): + if args.use_pipe_experts or args.use_nanopipe or args.ampipe_degree > 1: + from .core.tensor_parallel.layers import (row_parallel_moe, column_parallel_moe, + linear_with_grad_accumulation_and_async_allreduce_moe) + pm.register_patch('megatron.core.tensor_parallel.layers.RowParallelLinear.forward', row_parallel_moe) + pm.register_patch('megatron.core.tensor_parallel.layers.ColumnParallelLinear.forward', column_parallel_moe) + pm.register_patch('megatron.core.tensor_parallel.layers.linear_with_grad_accumulation_and_async_allreduce', + linear_with_grad_accumulation_and_async_allreduce_moe) + if args.use_pipe_experts: + from .core.distributed.param_and_grad_buffer import pipe_register_grad_ready + pm.register_patch('megatron.core.distributed.ParamAndGradBuffer.register_grad_ready', pipe_register_grad_ready) + if args.ampipe_degree > 1: + from mindspeed.model.language_model import embedding_forward_ampipe + from mindspeed.model.transformer import parallel_transformer_forward_ampipe + from mindspeed.model.transformer import parallel_transformer_layer_forward_ampipe + pm.register_patch('megatron.legacy.model.language_model.Embedding.forward', embedding_forward_ampipe) + pm.register_patch('megatron.legacy.model.transformer.ParallelTransformer.forward', + parallel_transformer_forward_ampipe) + pm.register_patch('megatron.legacy.model.transformer.ParallelTransformerLayer.forward', + parallel_transformer_layer_forward_ampipe) + + +def coc_adaptation(aspm, args): + from .initialize import coc_registration_wrapper, mc2_wrapper + if args.use_ascend_mc2: + from .core.memory.auto_pipeline.autopipeline import initialize_cfg_from_args_wrapper + aspm.register_patch('megatron.training.initialize.initialize_megatron', mc2_wrapper) + aspm.register_patch('mindspeed.core.tensor_parallel.ascend_turbo.initialize.initialize_cfg_from_args', + initialize_cfg_from_args_wrapper) + if args.use_ascend_coc: + aspm.register_patch('megatron.training.initialize.initialize_megatron', coc_registration_wrapper) + + +def zero3_adaptation(aspm, args): + if args.enable_zero3: + from .core.data_parallel.distributed_data_parallel import distributed_data_parallel_init_zero3, \ + distributed_data_parallel_zero_grad_wrapper + from .core.tensor_parallel.layers import (parallel_linear_init_zero3_wrapper, + column_parallel_linear_forward_zero3, + linear_forward_zero3_wrapper, linear_backward_zero3_wrapper, + row_parallel_linear_forward_zero3, + linear_with_grad_accumulation_and_async_allreduce_zero3) + from .optimizer.distrib_optimizer import (build_optimizer_group_ranges_zero3_wrapper, + _copy_main_params_to_model_params_zero3, + _copy_model_grads_to_main_grads_zero3, + build_model_and_main_param_groups_zero3_wrapper, + distributed_optimizer_zero3_init) + aspm.register_patch('megatron.core.tensor_parallel.layers.linear_with_grad_accumulation_and_async_allreduce', + linear_with_grad_accumulation_and_async_allreduce_zero3) + aspm.register_patch('megatron.core.tensor_parallel.layers.RowParallelLinear.__init__', + parallel_linear_init_zero3_wrapper) + aspm.register_patch('megatron.core.tensor_parallel.layers.ColumnParallelLinear.__init__', + parallel_linear_init_zero3_wrapper) + aspm.register_patch('megatron.core.tensor_parallel.layers.ColumnParallelLinear.forward', + column_parallel_linear_forward_zero3) + aspm.register_patch('megatron.core.tensor_parallel.layers.RowParallelLinear.forward', + row_parallel_linear_forward_zero3) + aspm.register_patch( + 'megatron.core.optimizer.distrib_optimizer.DistributedOptimizer._build_optimizer_group_ranges', + build_optimizer_group_ranges_zero3_wrapper) + aspm.register_patch( + 'megatron.core.optimizer.distrib_optimizer.DistributedOptimizer._copy_main_params_to_model_params', + _copy_main_params_to_model_params_zero3) + aspm.register_patch( + 'megatron.core.optimizer.distrib_optimizer.DistributedOptimizer._copy_model_grads_to_main_grads', + _copy_model_grads_to_main_grads_zero3) + aspm.register_patch( + 'megatron.core.optimizer.distrib_optimizer.DistributedOptimizer._build_model_and_main_param_groups', + build_model_and_main_param_groups_zero3_wrapper) + aspm.register_patch('megatron.core.optimizer.distrib_optimizer.DistributedOptimizer.__init__', + distributed_optimizer_zero3_init) + aspm.register_patch( + 'megatron.core.tensor_parallel.layers.LinearWithGradAccumulationAndAsyncCommunication.forward', + linear_forward_zero3_wrapper) + aspm.register_patch( + 'megatron.core.tensor_parallel.layers.LinearWithGradAccumulationAndAsyncCommunication.backward', + linear_backward_zero3_wrapper) + aspm.register_patch('megatron.core.distributed.distributed_data_parallel.DistributedDataParallel.__init__', + distributed_data_parallel_init_zero3) + aspm.register_patch( + 'megatron.core.distributed.distributed_data_parallel.DistributedDataParallel.zero_grad_buffer', + distributed_data_parallel_zero_grad_wrapper) + + +def tensor_2d_adaptation(aspm, args): + if args.tp_2d: + from mindspeed.core.tensor_parallel.tp_2d.norm_factory import get_norm_tp_2d + from mindspeed.core.tensor_parallel.tp_2d.norm_factory import _allreduce_layernorm_grads_wrapper + from mindspeed.core.models.common.embeddings.rotary_pos_embedding import rotary_embedding_forward_wrapper + from mindspeed.core.pipeline_parallel.flexible_schedules import forward_backward_pipelining_with_interleaving_patch + aspm.register_patch('megatron.legacy.model.utils.get_norm', get_norm_tp_2d) + aspm.register_patch('megatron.core.distributed.finalize_model_grads._allreduce_layernorm_grads', + _allreduce_layernorm_grads_wrapper) + aspm.register_patch('megatron.core.models.common.embeddings.rotary_pos_embedding.RotaryEmbedding.forward', + rotary_embedding_forward_wrapper) + aspm.register_patch('megatron.core.pipeline_parallel.schedules.forward_backward_pipelining_with_interleaving', + forward_backward_pipelining_with_interleaving_patch) + from .core.transformer.transformer_config import transformer_config_post_init + aspm.register_patch('megatron.core.transformer.transformer_config.TransformerConfig.__post_init__', + transformer_config_post_init) + from mindspeed.model.language_model import model_parallel_config_post_init_wrapper + aspm.register_patch('megatron.core.model_parallel_config.ModelParallelConfig.__post_init__', + model_parallel_config_post_init_wrapper) + from mindspeed.core.models.gpt.gpt_layer_specs import get_mlp_module_spec_wrapper + aspm.register_patch('megatron.core.models.gpt.gpt_layer_specs._get_mlp_module_spec', + get_mlp_module_spec_wrapper) + from mindspeed.core.transformer.attention import self_attention_init_tp2d_wrapper + aspm.register_patch('megatron.core.transformer.attention.SelfAttention.__init__', self_attention_init_tp2d_wrapper) + + +def megatron_training_adaptation_with_layerzero(aspm, mindspeed_args): + '''This function is used to add layerzero feature within mindspeed + layerzero manages the paramter in a different manner compared to Megatron Optimizer + + So if layerzero is on, setup_model_and_optimizer will return a module wrapped by layerzero and the Optimizer will be replaced. + ''' + if mindspeed_args.layerzero: + from mindspeed.core.distributed.layerzero import (layerzero_setup_model_and_optimizer_wrapper, + layerzero_initialize_model_parallel_wrapper, + mga_finalize_model_grads_wrapper, + save_checkpoint, + ) + aspm.register_patch('megatron.training.training.setup_model_and_optimizer', layerzero_setup_model_and_optimizer_wrapper) + aspm.register_patch('megatron.core.parallel_state.initialize_model_parallel', layerzero_initialize_model_parallel_wrapper) + aspm.register_patch('megatron.core.distributed.finalize_model_grads', mga_finalize_model_grads_wrapper) + aspm.register_patch('megatron.training.checkpointing.save_checkpoint', save_checkpoint) + + +def auto_parallel_mm_adaptation(aspm, mindspeed_args): + from mindspeed.core.auto_parallel.mm_search.schedules import backward_step_decorator + if mindspeed_args.auto_parallel_mm or mindspeed_args.auto_parallel_profile: + aspm.register_patch('megatron.core.pipeline_parallel.schedules.backward_step', + backward_step_decorator) + + +def dist_train_adaptation(aspm, args): + if args.dist_train: + from mindspeed.multi_modal import dist_train + # pipeline parallel adaption + aspm.register_patch('megatron.core.pipeline_parallel.schedules.get_forward_backward_func', dist_train.pipeline_parallel.dist_schedules.get_forward_backward_func_wrapper) + aspm.register_patch('megatron.core.pipeline_parallel.p2p_communication._p2p_ops', dist_train.pipeline_parallel.dist_schedules.p2p_ops_wrapper) + # parallel state adaption + aspm.register_patch('megatron.training.initialize._initialize_distributed', dist_train.training.initialize_distributed_wrapper) + aspm.register_patch('megatron.core.mpu.initialize_model_parallel', dist_train.parallel_state.initialize_model_parallel) + aspm.register_patch('megatron.core.mpu.is_pipeline_last_stage', dist_train.parallel_state.get_is_pipeline_last_stage_wrapper) + aspm.register_patch('megatron.core.mpu.is_pipeline_first_stage', dist_train.parallel_state.get_is_pipeline_first_stage_wrapper) + aspm.register_patch('megatron.core.mpu.get_tensor_model_parallel_src_rank', dist_train.parallel_state.get_tensor_model_parallel_src_rank_wrapper) + aspm.register_patch('megatron.core.mpu.is_initialized', dist_train.parallel_state.is_initialized) + aspm.register_patch('megatron.core.mpu.model_parallel_is_initialized', dist_train.parallel_state.model_parallel_is_initialized) + aspm.register_patch('megatron.core.mpu.get_model_parallel_group', dist_train.parallel_state.get_model_parallel_group) + aspm.register_patch('megatron.core.mpu.get_tensor_model_parallel_group', dist_train.parallel_state.get_tensor_model_parallel_group) + aspm.register_patch('megatron.core.mpu.get_pipeline_model_parallel_group', dist_train.parallel_state.get_pipeline_model_parallel_group) + aspm.register_patch('megatron.core.mpu.get_data_parallel_group', dist_train.parallel_state.get_data_parallel_group) + aspm.register_patch('megatron.core.mpu.get_data_parallel_group_gloo', dist_train.parallel_state.get_data_parallel_group_gloo) + aspm.register_patch('megatron.core.mpu.get_context_parallel_group', dist_train.parallel_state.get_context_parallel_group) + aspm.register_patch('megatron.core.mpu.get_context_parallel_global_ranks', dist_train.parallel_state.get_context_parallel_global_ranks) + aspm.register_patch('megatron.core.mpu.get_embedding_group', dist_train.parallel_state.get_embedding_group) + aspm.register_patch('megatron.core.mpu.get_position_embedding_group', dist_train.parallel_state.get_position_embedding_group) + aspm.register_patch('megatron.core.mpu.get_data_modulo_expert_parallel_group_gloo', dist_train.parallel_state.get_data_modulo_expert_parallel_group_gloo) + aspm.register_patch('megatron.core.mpu.get_amax_reduction_group', dist_train.parallel_state.get_amax_reduction_group) + aspm.register_patch('megatron.core.mpu.get_tensor_and_data_parallel_group', dist_train.parallel_state.get_tensor_and_data_parallel_group) + aspm.register_patch('megatron.core.mpu.get_tensor_and_context_parallel_group', dist_train.parallel_state.get_tensor_and_context_parallel_group) + aspm.register_patch('megatron.core.mpu.get_expert_model_parallel_group', dist_train.parallel_state.get_expert_model_parallel_group) + aspm.register_patch('megatron.core.mpu.get_tensor_and_expert_parallel_group', dist_train.parallel_state.get_tensor_and_expert_parallel_group) + aspm.register_patch('megatron.core.mpu.get_data_modulo_expert_parallel_group', dist_train.parallel_state.get_data_modulo_expert_parallel_group) + aspm.register_patch('megatron.core.mpu.get_tensor_model_parallel_world_size', dist_train.parallel_state.get_tensor_model_parallel_world_size) + aspm.register_patch('megatron.core.mpu.get_pipeline_model_parallel_world_size', dist_train.parallel_state.get_pipeline_model_parallel_world_size) + aspm.register_patch('megatron.core.mpu.get_tensor_model_parallel_rank', dist_train.parallel_state.get_tensor_model_parallel_rank) + aspm.register_patch('megatron.core.mpu.get_pipeline_model_parallel_rank', dist_train.parallel_state.get_pipeline_model_parallel_rank) + aspm.register_patch('megatron.core.mpu.get_pipeline_model_parallel_split_rank', dist_train.parallel_state.get_pipeline_model_parallel_split_rank) + aspm.register_patch('megatron.core.mpu.is_rank_in_embedding_group', dist_train.parallel_state.is_rank_in_embedding_group) + aspm.register_patch('megatron.core.mpu.is_rank_in_position_embedding_group', dist_train.parallel_state.is_rank_in_position_embedding_group) + aspm.register_patch('megatron.core.mpu.get_virtual_pipeline_model_parallel_rank', dist_train.parallel_state.get_virtual_pipeline_model_parallel_rank) + aspm.register_patch('megatron.core.mpu.get_virtual_pipeline_model_parallel_world_size', dist_train.parallel_state.get_virtual_pipeline_model_parallel_world_size) + aspm.register_patch('megatron.core.mpu.get_data_parallel_src_rank', dist_train.parallel_state.get_data_parallel_src_rank) + aspm.register_patch('megatron.core.mpu.get_pipeline_model_parallel_first_rank', dist_train.parallel_state.get_pipeline_model_parallel_first_rank) + aspm.register_patch('megatron.core.mpu.get_pipeline_model_parallel_last_rank', dist_train.parallel_state.get_pipeline_model_parallel_last_rank) + aspm.register_patch('megatron.core.mpu.get_pipeline_model_parallel_next_rank', dist_train.parallel_state.get_pipeline_model_parallel_next_rank) + aspm.register_patch('megatron.core.mpu.get_pipeline_model_parallel_prev_rank', dist_train.parallel_state.get_pipeline_model_parallel_prev_rank) + aspm.register_patch('megatron.core.mpu.get_expert_model_parallel_world_size', dist_train.parallel_state.get_expert_model_parallel_world_size) + aspm.register_patch('megatron.core.mpu.get_expert_model_parallel_rank', dist_train.parallel_state.get_expert_model_parallel_rank) + aspm.register_patch('megatron.core.mpu.get_global_memory_buffer', dist_train.parallel_state.get_global_memory_buffer) + aspm.register_patch('megatron.core.mpu.get_moe_layer_wise_logging_tracker', dist_train.parallel_state.get_moe_layer_wise_logging_tracker) + # checkpoint + aspm.register_patch('megatron.training.checkpointing.get_checkpoint_name', dist_train.checkpointing.get_checkpoint_name_wrapper) + + +def optimizer_selection(aspm, mindspeed_args): + if mindspeed_args.optimizer_selection == 'fused_torch_adamw': + from .optimizer.adamw import FusedTorchAdamW as AdamW + elif mindspeed_args.optimizer_selection == 'fused_adamw': + from .optimizer.adamw import AdamW + elif mindspeed_args.optimizer_selection == 'fused_ema_adamw': + from .optimizer.ema_adamw import FusedEmaAdamW as AdamW + aspm.register_patch('apex.optimizers.FusedAdam', AdamW, create_dummy=True) + + +def adaptation_l0(aspm, mindspeed_args): + """ + The minimum patch set for megatron to adapt to NPU + """ + # transformer_engine + te_adaptation(aspm) + apex_adaptation(aspm) + torch_adaptation(aspm) + # Need replace transformer_engine modules before import megatron + aspm.apply_patches() + + mcore_models_adaptation_l0(aspm) + mcore_tensor_parallel_adaptation_l0(aspm) + mcore_transformer_adaptation_l0(aspm) + mcore_moe_adaptation_l0(aspm) + legacy_model_transformer_l0(aspm) + megatron_training_adaptation_l0(aspm) + # context parallel(ring attention) requires mcore parallel state patch + mcore_parallel_state_adaptation(aspm) + communication_adaptation(aspm, mindspeed_args) + + +def adaptation_l1(aspm, mindspeed_args): + """ + Affinity optimization (fusion operators, etc.) + """ + # fusion operators + mcore_fusions_adaptation(aspm, mindspeed_args) + legacy_model_fusions_adaptation(aspm) + # affinity optimization + mcore_tensor_parallel_adaptation_l1(aspm) + + +def adaptation_l2(aspm, mindspeed_args): + """ + Advanced acceleration algorithm + """ + mcore_models_adaptation(aspm, mindspeed_args) + mcore_optimizer_adapation(aspm, mindspeed_args) + mcore_pipeline_parallel_adaptation(aspm, mindspeed_args) + mcore_multiparam_pipeline_parallel_adaptation(aspm, mindspeed_args) + mcore_tensor_parallel_adaptation(aspm, mindspeed_args) + mcore_transformer_adaptation(aspm, mindspeed_args) + + # megatron legacy + megatron_legacy_adaptation(aspm) + legacy_model_transformer(aspm, mindspeed_args) + legacy_model_rms_norm_adaptation(aspm) + + megatron_training_adaptation(aspm, mindspeed_args) + megatron_training_ema_adaptation(aspm, mindspeed_args) + memory_fragmentation_adaptation(aspm, mindspeed_args) + coc_adaptation(aspm, mindspeed_args) + mcore_moe_adaptation(aspm, mindspeed_args) + deepspeed_moe_adaptation(aspm, mindspeed_args) + zero3_adaptation(aspm, mindspeed_args) + tensor_2d_adaptation(aspm, mindspeed_args) + auto_parallel_mm_adaptation(aspm, mindspeed_args) + dist_train_adaptation(aspm, mindspeed_args) + + +def delete_lock_file(directory, lock): + with lock: + flag_lock = False + if os.path.exists(directory): + for root, dirs, files in os.walk(directory): + for name in files: + if name.endswith('.lock') or name.endswith('lock'): + if os.path.exists(directory): + flag_lock = True + print(f"Process (PID: {os.getpid()}) is deleting Lock directory") + shutil.rmtree(directory) + print(f"Process (PID: {os.getpid()}) deleted Lock directory") + if flag_lock: + break + else: + print(f"Process (PID: {os.getpid()}) Directory {directory} does not exist.") + if flag_lock: + break + + +def exe_adaptation(): + modified_argv_path = os.getenv("OOTB_OPTIMIZER_MODIFIED_ARGV_PATH", None) + if modified_argv_path: + from mindspeed.auto_tuning.mindspeed_adaptor import MindSpeedAdaptor + MindSpeedAdaptor.set_argv(sys.argv, modified_argv_path) + print("================OOTB_OPTIMIZER_MODIFIED_ARGV DONE!====================") + mindspeed_args = get_mindspeed_args() + + from torch.utils.cpp_extension import _get_build_directory + build_directory = _get_build_directory("", True) + delete_lock = Lock() + delete_lock_file(build_directory, delete_lock) + mindspeed_args.adaptive_recompute_enable = mindspeed_args.adaptive_recompute_device_size > 0 or mindspeed_args.adaptive_recompute_device_swap + if (mindspeed_args.adaptive_recompute_enable and not mindspeed_args.memory_fragmentation) or mindspeed_args.swap_attention: + from .core.memory.adaptive_recomputing.pluggable_allocator_adpator import change_allocator + if not mindspeed_args.swap_attention: + time.sleep(10) + change_allocator() + from .patch_utils import MindSpeedPatchesManager as aspm + + if mindspeed_args.optimization_level >= 0: + # The minimum patch set for megatron to adapt to NPU + optimizer_selection(aspm, mindspeed_args) + adaptation_l0(aspm, mindspeed_args) + + if mindspeed_args.optimization_level >= 1: + # Affinity optimization (fusion operators, etc.) + adaptation_l1(aspm, mindspeed_args) + + if mindspeed_args.optimization_level >= 2: + # Advanced acceleration algorithm + adaptation_l2(aspm, mindspeed_args) + + if mindspeed_args.layerzero: + # layerzero features + megatron_training_adaptation_with_layerzero(aspm, mindspeed_args) + + aspm.apply_patches() + + # New features structure + for feature in FEATURES_LIST: + if getattr(mindspeed_args, feature.feature_name, None) or feature.default_patches: + feature.register_patches(aspm, mindspeed_args) + + aspm.apply_patches() + + # accelerate package will check TE on sys.modules,so we need remove this patch + del sys.modules['transformer_engine'] + + +exe_adaptation() diff --git a/model/train/yoco_moe/mindspeed/model/__init__.py b/model/train/yoco_moe/mindspeed/model/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/model/alibi_mask.py b/model/train/yoco_moe/mindspeed/model/alibi_mask.py new file mode 100644 index 000000000..bd749b762 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/model/alibi_mask.py @@ -0,0 +1,79 @@ +import math +import torch + +from megatron.core import parallel_state + + +def get_slopes(n): + def get_slopes_power_of_2(n): + start = (2 ** (-2 ** -(math.log2(n) - 3))) + ratio = start + return [start * ratio ** i for i in range(n)] + + if math.log2(n).is_integer(): + return get_slopes_power_of_2(n) + else: + closest_power_of_2 = 2 ** math.floor(math.log2(n)) + return get_slopes_power_of_2(closest_power_of_2) + get_slopes(2 * closest_power_of_2)[0::2][ + :n - closest_power_of_2] + + +class AlibiForFusionAttnSingleton: + _alibi_tensor_args = None + _alibi_tensor = None + + _alibi_slopes_headnum = None + _alibi_slopes = None + + @classmethod + def get_alibi_tensor_for_fusion_attn(cls, max_seq_len, num_attention_heads, dtype, neg_diagonal_opposite=False, + last_k=1024): + if cls._alibi_tensor is None or cls._alibi_tensor_args != ( + max_seq_len, num_attention_heads, neg_diagonal_opposite, last_k): + if last_k > max_seq_len: + last_k = max_seq_len + + tp_world_size = parallel_state.get_tensor_model_parallel_world_size() + current_head_num = num_attention_heads // tp_world_size + slopes = AlibiForFusionAttnSingleton.get_alibi_slopes_for_fusion_attn(num_attention_heads) + + position_point = torch.arange(max_seq_len) - max_seq_len + 1 + diag = torch.diag(torch.diag(position_point)).unsqueeze(0).unsqueeze(0) + + position_point = position_point.unsqueeze(0).unsqueeze(0).expand(current_head_num, last_k, -1) + position_point = position_point - diag.transpose(-1, -2)[:, -last_k:, :].expand(current_head_num, last_k, + max_seq_len) + + alibi = slopes.unsqueeze(1).unsqueeze(1) * position_point.npu() + + if not neg_diagonal_opposite: + alibi = -torch.abs(alibi) + + alibi = alibi.unsqueeze(0) + alibi = torch.Tensor(alibi).npu() + if dtype == torch.float16: + alibi = alibi.to(torch.float16) + elif dtype == torch.bfloat16: + alibi = alibi.to(torch.bfloat16) + + cls._alibi_tensor = alibi + cls._alibi_tensor_args = (max_seq_len, num_attention_heads, neg_diagonal_opposite, last_k) + + return cls._alibi_tensor + + @classmethod + def get_alibi_slopes_for_fusion_attn(cls, n): + if cls._alibi_slopes is None or cls._alibi_slopes_headnum != n: + slopes = get_slopes(n) + + tp_world_size = parallel_state.get_tensor_model_parallel_world_size() + tp_index = parallel_state.get_tensor_model_parallel_rank() + + current_head_num = n // tp_world_size + slopes = torch.Tensor(slopes[tp_index * current_head_num: tp_index * current_head_num + current_head_num]).npu() + + cls._alibi_slopes = slopes + cls._alibi_slopes_headnum = n + return cls._alibi_slopes + + return cls._alibi_slopes diff --git a/model/train/yoco_moe/mindspeed/model/language_model.py b/model/train/yoco_moe/mindspeed/model/language_model.py new file mode 100644 index 000000000..79368b248 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/model/language_model.py @@ -0,0 +1,161 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import wraps +import torch +from megatron.training import get_args +from megatron.core import mpu, tensor_parallel +from mindspeed.core.tensor_parallel.comm_group_api import TPXCollectiveComm, TPYCollectiveComm +from mindspeed.core.tensor_parallel.comm_autograd_function import (auto_grad_sync_gather_along_first_dim, + auto_grad_sync_gather_along_last_dim, + auto_grad_scatter_along_first_dim_then_last_dim) +from mindspeed.moe.utils import get_slice_indices_from_disorder_to_order, get_slice_indices_from_order_to_disorder + + +def parallel_lm_logits( + input_, + word_embeddings_weight, + parallel_output, + bias=None +): + args = get_args() + # Parallel logits. + if args.async_tensor_model_parallel_allreduce or\ + args.sequence_parallel: + input_parallel = input_ + model_parallel = mpu.get_tensor_model_parallel_world_size() > 1 + async_grad_allreduce = args.async_tensor_model_parallel_allreduce and \ + model_parallel and not args.sequence_parallel + else: + input_parallel = tensor_parallel.copy_to_tensor_model_parallel_region(input_) + async_grad_allreduce = False + + if args.use_nd_matmul: + input_parallel = tensor_parallel.gather_from_tensor_model_parallel_region(input_parallel) + + if args.tp_2d: + input_parallel = auto_grad_sync_gather_along_first_dim(input_parallel, TPXCollectiveComm) + input_parallel = auto_grad_sync_gather_along_last_dim(input_parallel, TPYCollectiveComm) + + # Matrix multiply. + logits_parallel = tensor_parallel.linear_with_grad_accumulation_and_async_allreduce( + input=input_parallel, + weight=word_embeddings_weight, + bias=bias, + gradient_accumulation_fusion=args.gradient_accumulation_fusion, + async_grad_allreduce=async_grad_allreduce, + sequence_parallel=args.sequence_parallel) + # Gather if needed. + if parallel_output: + return logits_parallel + + return tensor_parallel.gather_from_tensor_model_parallel_region(logits_parallel) + + +def embedding_forward_wrapper(forward): + @wraps(forward) + def wrapper(self, *args, **kwargs): + encoder_input = forward(self, *args, **kwargs) + if get_args().use_nd_matmul: + encoder_input = tensor_parallel.scatter_to_tensor_model_parallel_region(encoder_input) + if get_args().tp_2d: + encoder_input = auto_grad_scatter_along_first_dim_then_last_dim( + encoder_input, TPXCollectiveComm, TPYCollectiveComm + ) + return encoder_input + return wrapper + + +class AmpipeEmbeddingRearrange(torch.autograd.Function): + @staticmethod + def forward(ctx, embeddings, ampipe_degree): + seqlen = embeddings.size(0) + new_indices = get_slice_indices_from_disorder_to_order(seqlen, ampipe_degree, device=embeddings.device) + embeddings = torch.index_select(embeddings, dim=0, index=new_indices) + ctx.ampipe_degree = ampipe_degree + return embeddings + + @staticmethod + def backward(ctx, grad_input): + seqlen = grad_input.size(0) + new_indices = get_slice_indices_from_order_to_disorder(seqlen, ctx.ampipe_degree, device=grad_input.device) + grad_input = torch.index_select(grad_input, dim=0, index=new_indices) + return grad_input, None + + +def embedding_forward_ampipe(self, input_ids, position_ids, tokentype_ids=None): + # Embeddings. + words_embeddings = self.word_embeddings(input_ids) + if self.add_position_embedding: + position_embeddings = self.position_embeddings(position_ids) + embeddings = words_embeddings + position_embeddings + else: + embeddings = words_embeddings + + if tokentype_ids is not None: + assert self.tokentype_embeddings is not None + embeddings = embeddings + self.tokentype_embeddings(tokentype_ids) + else: + assert self.tokentype_embeddings is None + + # Data format change to avoid explicit tranposes : [b s h] --> [s b h]. + embeddings = embeddings.transpose(0, 1).contiguous() + + # If the input flag for fp32 residual connection is set, convert for float. + if self.fp32_residual_connection: + embeddings = embeddings.float() + + # Dropout. + if self.sequence_parallel: + ampipe_degree = get_args().ampipe_degree + if ampipe_degree > 1: + embeddings = AmpipeEmbeddingRearrange.apply(embeddings, ampipe_degree) + embeddings = tensor_parallel.scatter_to_sequence_parallel_region(embeddings) + # `scatter_to_sequence_parallel_region` returns a view, which prevents + # the original tensor from being garbage collected. Clone to facilitate GC. + # Has a small runtime cost (~0.5%). + if self.clone_scatter_output_in_embedding: + embeddings = embeddings.clone() + with tensor_parallel.get_cuda_rng_tracker().fork(): + embeddings = self.embedding_dropout(embeddings) + else: + embeddings = self.embedding_dropout(embeddings) + + return embeddings + + +def model_parallel_config_post_init_wrapper(init_func): + @wraps(init_func) + def wrapper(self, *args, **kwargs): + if self.pipeline_model_parallel_size > 1: + if self.pipeline_dtype is None: + raise ValueError( + "When using pipeline parallelism, pipeline_dtype must be specified" + ) + + if self.autocast_dtype is None: + self.autocast_dtype = self.params_dtype + + if self.defer_embedding_wgrad_compute and self.pipeline_model_parallel_size == 1: + raise ValueError( + "Cannot defer embedding wgrad compute when pipeline model parallel is not used" + ) + + if self.defer_embedding_wgrad_compute and not self.gradient_accumulation_fusion: + raise ValueError( + "Cannot defer embedding wgrad compute when gradient accumulation fusion is not used" + ) + + return wrapper diff --git a/model/train/yoco_moe/mindspeed/model/transformer.py b/model/train/yoco_moe/mindspeed/model/transformer.py new file mode 100644 index 000000000..f1dd298a6 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/model/transformer.py @@ -0,0 +1,1814 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2023 Alibaba PAI and Nvidia Megatron-LM Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import math +import threading +from functools import wraps +from contextlib import nullcontext +from typing import Any +import warnings + +import torch +import torch_npu +import torch.nn.functional as F +import numpy as np +from megatron.training import get_args, get_tokenizer +from megatron.core import parallel_state, mpu, tensor_parallel +from megatron.core.num_microbatches_calculator import get_num_microbatches +from megatron.core.utils import make_viewless_tensor +from megatron.core.models.common.embeddings.rotary_pos_embedding import apply_rotary_pos_emb +from megatron.legacy.model.utils import openai_gelu, erf_gelu, get_norm +from megatron.legacy.model.transformer import ParallelMLP, ParallelTransformer, ParallelTransformerLayer, CoreAttention, \ + FlashSelfAttention, ParallelAttention +from megatron.core.enums import ModelType +from megatron.legacy.model.enums import AttnType, AttnMaskType, LayerType +from megatron.legacy.model.transformer import (_get_num_layers, + _get_layer_type, + bias_dropout_add_fused_train, + bias_dropout_add_fused_inference, + get_bias_dropout_add) +from megatron.legacy.model.fused_bias_gelu import bias_gelu_impl +from megatron.core.tensor_parallel.mappings import _split_along_first_dim +from megatron.core.transformer.module import MegatronModule + +from mindspeed.core.context_parallel.ulysses_context_parallel import UlyssesContextAttention +from mindspeed.core.context_parallel.ring_context_parallel import ringattn_context_parallel +from mindspeed.core.parallel_state import (get_context_parallel_group_for_hybrid_ulysses, + get_context_parallel_group_for_hybrid_ring, + get_context_parallel_for_hybrid_ring_world_size, + get_context_parallel_for_hybrid_ring_rank, + get_context_parallel_for_hybrid_ring_global_ranks, + get_ring_ranks_for_intra_window, + get_ring_ranks_for_inter_window_kv, + get_ring_ranks_for_inter_window_dkv, + get_ring_group_for_intra_window, + get_ring_group_for_intra_window_send_recv_overlap) +from mindspeed.core.fusions.fused_bias_swiglu import fused_swiglu +from mindspeed.core.parallel_state import get_tensor_model_parallel_world_size_for_nd1_dim1 +from mindspeed.core.tensor_parallel.comm_group_api import TPXCollectiveComm +from mindspeed.core.tensor_parallel.comm_group_api import TPXOverlapCollectiveComm +from mindspeed.core.tensor_parallel.comm_group_api import TPYCollectiveComm +from mindspeed.core.tensor_parallel.comm_group_api import TPYOverlapCollectiveComm +from mindspeed.core.tensor_parallel.tp_2d.parallel_linear_2d import ParallelLinear2D +from mindspeed.core.tensor_parallel.random import CheckpointWithoutOutput +from mindspeed.core.tensor_parallel_y_union_cp import TensorParallelYUnionCP +from mindspeed.moe.ampipe.ampipe import AttMoEPipe +from mindspeed.ops.fusion_attention_v2 import npu_fusion_attention +from mindspeed.core.tensor_parallel.layers import Nd_ParallelLinear +from mindspeed.core.tensor_parallel.checkpoint_manager import get_pipeline_checkpoint_manager +from mindspeed.model.alibi_mask import AlibiForFusionAttnSingleton, get_slopes +from mindspeed.moe.ampipe.ampipe_args import ForwardArgs +from mindspeed.moe.utils import (get_slice_indices_from_order_to_disorder, + get_slice_indices_from_disorder_to_order, + all_gather_along_first_dim) +from mindspeed.core.context_parallel.adaptive_context_parallel import adaptive_attn_context_parallel +from mindspeed.core.context_parallel.utils import get_scheduling_info + +try: + from einops import rearrange +except ImportError: + rearrange = None + +_GLOBAL_ATTN_MASK = None + + +class Alibi: + _instance = None + alibi = None + matmul_result = None + output_size = None + lock = threading.Lock() + + def __new__(cls, *args, **kwargs): + if cls._instance: + return cls._instance + else: + with cls.lock: + cls._instance = super().__new__(cls) + return cls._instance + + +def _get_inverted_mask(attention_mask, alibi): + inverted_mask = attention_mask.to(alibi.dtype) + inverted_mask = inverted_mask.masked_fill( + inverted_mask.to(torch.bool), float("-inf") + ) + return inverted_mask.to(alibi.device) + alibi.unsqueeze(0) + + +def _build_alibi_tensor(max_seq_len, num_attention_heads, square_alibi_mask, fill_neg_inf): + def _fill_with_neg_inf(t): + """FP16-compatible function that fills a tensor with -inf.""" + return t.float().fill_(float("-inf")).type_as(t) + + def _buffered_future_mask(maxpos, alibi, attn_heads): + _future_mask = torch.triu(_fill_with_neg_inf(torch.zeros([maxpos, maxpos])), 1) + _future_mask = _future_mask.unsqueeze(0) + alibi + return _future_mask[:attn_heads, :maxpos, :maxpos] + + slopes = torch.Tensor(get_slopes(num_attention_heads)) + if square_alibi_mask: + position_point = torch.arange(max_seq_len) - max_seq_len + 1 + position_point = position_point.unsqueeze(0).unsqueeze(0).expand(num_attention_heads, max_seq_len, -1) + diag = torch.diag(position_point[0]) + position_point = position_point - diag.unsqueeze(0).unsqueeze(0).transpose(-1, -2) + alibi = slopes.unsqueeze(1).unsqueeze(1) * position_point + else: + alibi = slopes.unsqueeze(1).unsqueeze(1) * torch.arange(max_seq_len).unsqueeze(0).unsqueeze(0).expand( + num_attention_heads, -1, -1) + + # Select the part of the tensor that corresponds to our tensor parallel index. + tp_world_size = parallel_state.get_tensor_model_parallel_world_size() + tp_index = parallel_state.get_tensor_model_parallel_rank() + alibi = alibi.reshape((tp_world_size, -1, *alibi.shape[1:]))[tp_index] + + if fill_neg_inf: + return _buffered_future_mask(max_seq_len, alibi, num_attention_heads) + + return alibi + + +def core_attention_init_wrapper(fn): + @wraps(fn) + def wrapper(self, *arg, **kwargs): + fn(self, *arg, **kwargs) + + args = get_args() + self.hidden_size_per_partition = self.hidden_size_per_partition // arg[1].context_parallel_size + self.square_alibi_mask = args.square_alibi_mask + self.fill_neg_inf = args.fill_neg_inf + self.beta = 1.0 + self.config = arg[1] + if self.apply_query_key_layer_scaling: + self.beta = 1.0 / self.layer_number + if args.position_embedding_type == 'alibi': + self.alibi = Alibi() + alibi = _build_alibi_tensor(args.seq_length, + self.config.num_attention_heads, + args.square_alibi_mask, + args.fill_neg_inf + ).to(torch.cuda.current_device()) + if self.config.params_dtype == torch.float16: + alibi = alibi.to(torch.float16) + elif self.config.params_dtype == torch.bfloat16: + alibi = alibi.to(torch.bfloat16) + self.alibi.alibi = alibi + else: + self.alibi = None + + return wrapper + + +def core_attention_forward(self, query_layer, key_layer, value_layer, attention_mask): + # =================================== + # Raw attention scores. [b, np, s, s] + # =================================== + + # [b, np, sq, sk] + output_size = (query_layer.size(1), + query_layer.size(2), + query_layer.size(0), + key_layer.size(0)) + + # [sq, b, np, hn] -> [sq, b * np, hn] + query_layer = query_layer.reshape(output_size[2], + output_size[0] * output_size[1], -1) + # [sk, b, np, hn] -> [sk, b * np, hn] + key_layer = key_layer.view(output_size[3], + output_size[0] * output_size[1], -1) + + if self.alibi is None: + matmul_input_buffer = mpu.get_global_memory_buffer().get_tensor( + (output_size[0] * output_size[1], output_size[2], output_size[3]), + query_layer.dtype, "mpu") + + matmul_result = torch.baddbmm( + matmul_input_buffer, + query_layer.transpose(0, 1), + key_layer.transpose(0, 1).transpose(1, 2), + beta=0.0, alpha=(1.0 / self.norm_factor)) + else: + if self.alibi.matmul_result is None or self.alibi.output_size != output_size: + args = get_args() + + self.alibi.output_size = output_size + alibi = _build_alibi_tensor(args.seq_length, + self.config.num_attention_heads, + args.square_alibi_mask, + args.fill_neg_inf + ).to(torch.cuda.current_device()) + if self.config.params_dtype == torch.float16: + alibi = alibi.to(torch.float16) + elif self.config.params_dtype == torch.bfloat16: + alibi = alibi.to(torch.bfloat16) + self.alibi.alibi = alibi + + if self.fill_neg_inf: + _alibi = self.alibi.alibi[:, :output_size[3], :output_size[3]] + attention_mask = attention_mask.repeat(output_size[0], 1, 1, 1)[:output_size[0], :, :, :] + self.alibi.matmul_result = _get_inverted_mask(attention_mask, _alibi).view(-1, output_size[2], + output_size[2]).contiguous() + else: + self.alibi.matmul_result = self.alibi.alibi[:, :, :output_size[3]].repeat(output_size[0], 1, 1) + + q_trans = query_layer.transpose(0, 1).contiguous() + k_trans = key_layer.transpose(0, 1).transpose(1, 2).contiguous() + matmul_result = self.beta * self.alibi.matmul_result + torch.bmm(q_trans, k_trans) * (1.0 / self.norm_factor) + + # change view to [b, np, sq, sk] + attention_scores = matmul_result.view(*output_size) + + # =========================== + # Attention probs and dropout + # =========================== + + # attention scores and attention mask [b, np, sq, sk] + if self.square_alibi_mask: + attention_scores = torch.max( + attention_scores, torch.tensor(torch.finfo(attention_scores.dtype).min) + ) + attention_probs = torch.nn.functional.softmax(attention_scores, -1) + else: + attention_probs = self.scale_mask_softmax(attention_scores, + attention_mask) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + if not self.sequence_parallel: + with tensor_parallel.get_cuda_rng_tracker().fork(): + attention_probs = self.attention_dropout(attention_probs) + else: + attention_probs = self.attention_dropout(attention_probs) + + # ========================= + # Context layer. [sq, b, hp] + # ========================= + + # value_layer -> context layer. + # [sk, b, np, hn] --> [b, np, sq, hn] + + # context layer shape: [b, np, sq, hn] + output_size = (value_layer.size(1), + value_layer.size(2), + query_layer.size(0), + value_layer.size(3)) + + # change view [sk, b * np, hn] + value_layer = value_layer.view(value_layer.size(0), + output_size[0] * output_size[1], -1) + + # change view [b * np, sq, sk] + attention_probs = attention_probs.view(output_size[0] * output_size[1], + output_size[2], -1) + + # matmul: [b * np, sq, hn] + context_layer = torch.bmm(attention_probs, value_layer.transpose(0, 1)) + + # change view [b, np, sq, hn] + context_layer = context_layer.view(*output_size) + + # [b, np, sq, hn] --> [sq, b, np, hn] + context_layer = context_layer.permute(2, 0, 1, 3).contiguous() + + # [sq, b, np, hn] --> [sq, b, hp] + new_context_layer_shape = context_layer.size()[:-2] + \ + (self.hidden_size_per_partition,) + context_layer = context_layer.view(*new_context_layer_shape) + + return context_layer + + +class NoopTransformerLayer(MegatronModule): + def __init__(self, layer_number): + super().__init__(None) + self.layer_number = layer_number + + def forward(self, hidden_states, *args, **kwargs): + return hidden_states.clone() + + +def parallel_transformer_init(self, config, + model_type, layer_type=LayerType.encoder, + self_attn_mask_type=AttnMaskType.padding, + post_norm=True, + pre_process=True, + post_process=True, + drop_path_rate=0.0): + super(ParallelTransformer, self).__init__() + + args = get_args() + + self.layer_type = layer_type + self.model_type = model_type + self.bf16 = config.bf16 + self.fp32_residual_connection = config.fp32_residual_connection + self.post_norm = post_norm + self.pre_process = pre_process + self.post_process = post_process + self.input_tensor = None + self.drop_path_rate = drop_path_rate + self.transformer_impl = args.transformer_impl + self.retro_add_retriever = args.retro_add_retriever + + # Store activation checkpoiting flag. + self.recompute_granularity = config.recompute_granularity + self.recompute_method = config.recompute_method + self.recompute_num_layers = config.recompute_num_layers + self.distribute_saved_activations = \ + config.distribute_saved_activations and not config.sequence_parallel + + self.sequence_parallel = config.sequence_parallel + + # Transformer Engine Init. + self.transformer_engine_v_0_10 = False + self.transformer_engine_v_0_11 = False + self.transformer_engine_v_0_8 = False + if self.transformer_impl == 'transformer_engine': + global transformer_engine + import transformer_engine + from importlib.metadata import version + from pkg_resources import packaging + + te_version = packaging.version.Version(version("transformer-engine")) + if te_version >= packaging.version.Version("0.8.0"): + self.transformer_engine_v_0_8 = True + if te_version >= packaging.version.Version("0.10.0"): + self.transformer_engine_v_0_10 = True + if te_version >= packaging.version.Version("0.11.0"): + self.transformer_engine_v_0_11 = True + + del version, packaging + + assert not args.squared_relu, "TransformerEngine does not support squared relu activation." + + self.use_fp8 = config.fp8 is not None + self.fp8_recipe = None + self.fp8_group = None + if self.use_fp8: + assert args.transformer_impl == 'transformer_engine', \ + 'transformer-engine required for fp8 training and inference' + self.fp8_group = mpu.get_amax_reduction_group() + if config.fp8 == "e4m3": + fp8_format = transformer_engine.common.recipe.Format.E4M3 + elif config.fp8 == "hybrid": + fp8_format = transformer_engine.common.recipe.Format.HYBRID + else: + raise ValueError("The DelayedScaling recipe only supports E4M3 and HYBRID formats.") + self.fp8_recipe = transformer_engine.common.recipe.DelayedScaling( + margin=config.fp8_margin, + interval=config.fp8_interval, + fp8_format=fp8_format, + amax_history_len=config.fp8_amax_history_len, + amax_compute_algo=config.fp8_amax_compute_algo, + override_linear_precision=(False, False, not config.fp8_wgrad), + ) + + self.num_microbatches_in_previous_step = -1 + self.microbatch_count = 0 + self.checkpoint_core_attention = config.recompute_granularity == 'selective' + + # Number of layers. + self.num_layers = _get_num_layers(args, model_type, + layer_type==LayerType.decoder) + + self.drop_path_rates = [ + rate.item() for rate in + torch.linspace(0, self.drop_path_rate, config.num_layers)] + + self.retro_layer_numbers = None + if model_type == ModelType.retro_decoder: + retro_layer_start = 6 if config.num_layers <= 15 else 9 + self.retro_layer_numbers = \ + np.arange(retro_layer_start, config.num_layers + 1, 3).tolist() + if model_type == ModelType.retro_encoder: + self.retro_layer_numbers = [1] + + # Transformer layers. + if args.retro_add_retriever: + assert self.recompute_granularity != 'full', \ + "Full recompute not supported for Retro." + assert args.transformer_impl == 'local', \ + "Transformer engine does not support Retro layers." + def build_layer(layer_number): + if args.transformer_impl == 'local': + if (hasattr(args, 'noop_layers') and isinstance(args.noop_layers, set) + and layer_number - 1 in args.noop_layers): + return NoopTransformerLayer(layer_number) + + current_layer_type = _get_layer_type( + model_type, layer_type, self.retro_layer_numbers, + layer_number) + return ParallelTransformerLayer( + config, + layer_number, + layer_type=current_layer_type, + self_attn_mask_type=self_attn_mask_type, + drop_path_rate=self.drop_path_rates[layer_number - 1]) + else: + # This argument is only available from TE v0.10 onwards. + extra_transformer_engine_kwargs = {} + if self.transformer_engine_v_0_8: + extra_transformer_engine_kwargs["bias"] = config.add_bias_linear + if self.transformer_engine_v_0_10: + extra_transformer_engine_kwargs["activation"] = "swiglu" if args.swiglu else "gelu" + if self.transformer_engine_v_0_11: + extra_transformer_engine_kwargs["normalization"] = config.normalization + assert config.attention_softmax_in_fp32, "TransformerEngine only supports softmax compute in FP32." + assert ( + (bool(int(os.getenv("NVTE_APPLY_QK_LAYER_SCALING", "0"))) and config.fp16) == config.apply_query_key_layer_scaling + ), "Unsupported config for apply_query_key_layer_scaling in TransformerEngine." + return transformer_engine.pytorch.TransformerLayer( + config.hidden_size, + config.ffn_hidden_size, + config.num_attention_heads, + layernorm_epsilon=config.layernorm_epsilon, + hidden_dropout=config.hidden_dropout, + attention_dropout=config.attention_dropout, + init_method=config.init_method, + output_layer_init_method=config.output_layer_init_method, + layer_number=layer_number, + kv_channels=config.kv_channels, + self_attn_mask_type=self_attn_mask_type.name, + tp_group=mpu.get_tensor_model_parallel_group(), + get_rng_state_tracker=tensor_parallel.get_cuda_rng_tracker, + fuse_wgrad_accumulation=config.gradient_accumulation_fusion, + seq_length=args.seq_length, + micro_batch_size=args.micro_batch_size, + sequence_parallel=config.sequence_parallel, + params_dtype=config.params_dtype, + apply_residual_connection_post_layernorm=config.apply_residual_connection_post_layernorm, + output_layernorm=False, + layer_type="encoder", + drop_path_rate=self.drop_path_rates[layer_number - 1], + set_parallel_mode=True, + fuse_qkv_params=True, + **extra_transformer_engine_kwargs) + + if config.virtual_pipeline_model_parallel_size is not None: + assert config.num_layers % config.virtual_pipeline_model_parallel_size == 0, \ + 'num_layers_per_stage must be divisible by ' \ + 'virtual_pipeline_model_parallel_size' + assert args.model_type != ModelType.encoder_and_decoder + # Number of layers in each model chunk is the number of layers in the stage, + # divided by the number of model chunks in a stage. + self.num_layers = self.num_layers // config.virtual_pipeline_model_parallel_size + # With 8 layers, 2 stages, and 4 model chunks, we want an assignment of + # layers to stages like (each list is a model chunk): + # Stage 0: [0] [2] [4] [6] + # Stage 1: [1] [3] [5] [7] + # With 8 layers, 2 stages, and 2 virtual stages, we want an assignment of + # layers to stages like (each list is a model chunk): + # Stage 0: [0, 1] [4, 5] + # Stage 1: [2, 3] [6, 7] + offset = mpu.get_virtual_pipeline_model_parallel_rank() * ( + config.num_layers // config.virtual_pipeline_model_parallel_size) + \ + (mpu.get_pipeline_model_parallel_rank() * self.num_layers) + else: + # Each stage gets a contiguous set of layers. + if args.model_type == ModelType.encoder_and_decoder and \ + mpu.get_pipeline_model_parallel_world_size() > 1: + pipeline_rank = mpu.get_pipeline_model_parallel_rank() + if layer_type == LayerType.encoder: + offset = pipeline_rank * self.num_layers + else: + num_ranks_in_enc = config.pipeline_model_parallel_split_rank + offset = (pipeline_rank - num_ranks_in_enc) * self.num_layers + else: + offset = mpu.get_pipeline_model_parallel_rank() * self.num_layers + + if self.num_layers == 0: + # When a standalone embedding stage is used (e.g., + # args.standalone_embedding_stage == True), virtual pipeline ranks + # on pipeline rank 0 will have zero transformer layers assigned to + # them. This results in the model's input and output tensors to be + # the same, which will cause failure for certain output tensor + # optimizations (e.g., pipeline output deallocation). To remedy + # this, we assign a 'no-op' layer on these ranks, which will + # disconnect the input tensor from the output tensor. + self.num_layers = 1 + self.layers = torch.nn.ModuleList([ NoopTransformerLayer(1) ]) + else: + self.layers = torch.nn.ModuleList( + [build_layer(i + 1 + offset) for i in range(self.num_layers)]) + + # Update dropout rate for Retro encoder. + if model_type == ModelType.retro_encoder: + for layer in self.layers: + if layer.self_attention.use_flash_attn: + layer.self_attention.core_attention_flash.dropout_p = \ + torch.nn.Dropout(args.retro_encoder_attention_dropout) + else: + layer.self_attention.core_attention.attention_dropout.p =\ + args.retro_encoder_attention_dropout + layer.hidden_dropout = args.retro_encoder_hidden_dropout + + if self.post_process and self.post_norm: + # Final layer norm before output. + self.final_norm = get_norm(config) + + +def parallel_transformer_init_wrapper(fn): + @wraps(fn) + def wrapper(self, *args, **kwargs): + def build_layer(model_type, config, layer_number, layer_type=LayerType.encoder, self_attn_mask_type=AttnMaskType.padding): + current_layer_type = _get_layer_type( + model_type, layer_type, self.retro_layer_numbers, + layer_number) + return ParallelTransformerLayer( + config, + layer_number, + layer_type=current_layer_type, + self_attn_mask_type=self_attn_mask_type, + drop_path_rate=self.drop_path_rates[layer_number - 1]) + fn(self, *args, **kwargs) + + argument = get_args() + if argument.automated_pipeline and argument.num_layer_list and argument.virtual_pipeline_model_parallel_size is None: + start_layer_num = 1 + self.layers = torch.nn.ModuleList() + for idx, value in enumerate(argument.num_layer_list): + if parallel_state.get_pipeline_model_parallel_rank() == idx: + self.num_layers = value + for layer_num in range(start_layer_num, start_layer_num + value): + self.layers.append(build_layer(kwargs['model_type'], args[0], layer_num, self_attn_mask_type=kwargs['self_attn_mask_type'])) + start_layer_num += value + self.layers = torch.nn.ModuleList(self.layers) + + # Update dropout rate for Retro encoder. + if kwargs['model_type'] == ModelType.retro_encoder: + for layer in self.layers: + if layer.self_attention.use_flash_attn: + layer.self_attention.core_attention_flash.dropout_p = \ + torch.nn.Dropout(argument.retro_encoder_attention_dropout) + else: + layer.self_attention.core_attention.attention_dropout.p = \ + argument.retro_encoder_attention_dropout + layer.hidden_dropout = argument.retro_encoder_hidden_dropout + return wrapper + + +def set_attention_mask(attn_mask): + global _GLOBAL_ATTN_MASK + _GLOBAL_ATTN_MASK = attn_mask + + +def generate_attention_mask(compress, device): + global _GLOBAL_ATTN_MASK + args = get_args() + if not args.use_flash_attn: + warnings.warn("Flash Attention is highly recommended") + _GLOBAL_ATTN_MASK = (torch.tril(torch.ones([args.micro_batch_size, 1, args.seq_length, args.seq_length], dtype=bool, device=device), diagonal=-(args.pre_tockens + 1)) \ + + torch.triu(torch.ones([args.micro_batch_size, 1, args.seq_length, args.seq_length], dtype=bool, device=device), diagonal=args.next_tockens + 1)) + return + + if compress: + seq_len = 2048 + else: + seq_len = args.seq_length + + _GLOBAL_ATTN_MASK = torch.triu( + torch.ones((seq_len, seq_len), + device=device, dtype=torch.bool), diagonal=1) + + +def get_attention_mask(): + global _GLOBAL_ATTN_MASK + if _GLOBAL_ATTN_MASK is not None: + return _GLOBAL_ATTN_MASK + + args = get_args() + should_generate_mask = False + device = 'npu' + + if args.attention_mask_type == 'causal': + args.sparse_mode = 2 + should_generate_mask = True + compress = True + + # ampipe开启在同时不开cp时需要生成全量mask,开cp时生成causal mask + if args.ampipe_degree > 1 and args.context_parallel_size <= 1: + args.sparse_mode = 0 + should_generate_mask = True + compress = False + + # EoD 模式 Ring Attention的实现 + # general 为基线方案,causal 为加速方案 + # 如果 cp > 1 且使用了Ring Attention 并行(包括Hybrid并行)。则Mask为动态生成的,不需要额外的Mask + if args.reset_attention_mask: + if args.attention_mask_type == 'general': + args.sparse_mode = 2 + if args.context_parallel_size == 1 or args.context_parallel_algo == 'ulysses_cp_algo': + should_generate_mask = True + compress = True + else: + args.sparse_mode = 1 + should_generate_mask = False + else: + should_generate_mask = True + compress = True + + + if args.attention_mask_on_cpu: + device = 'cpu' + + if should_generate_mask: + generate_attention_mask(compress, device) + + return _GLOBAL_ATTN_MASK + + +def parallel_transformer_forward_wrapper(fn): + @wraps(fn) + def wrapper(self, hidden_states, attention_mask, **kwargs): + args = get_args() + if attention_mask is None: + attention_mask = get_attention_mask() + return fn(self, hidden_states, attention_mask, **kwargs) + return wrapper + + +def parallel_transformer_forward_ampipe(self, hidden_states, attention_mask, + encoder_output=None, enc_dec_attn_mask=None, + retriever_input=None, + retriever_output=None, + retriever_attn_mask=None, + inference_params=None, + rotary_pos_emb=None): + # hidden_states: [s, b, h] + + # Checks. + if inference_params: + assert self.recompute_granularity is None, \ + 'inference does not work with activation checkpointing' + + if not self.pre_process: + # See set_input_tensor() + hidden_states = self.input_tensor + + # Viewless tensor. + # - We only need to create a viewless tensor in the case of micro batch + # size (mbs) == 1, since in this case, 'hidden_states.transpose()' + # above creates a view tensor, and '.contiguous()' is a pass-through. + # For mbs >= 2, '.contiguous()' creates a new tensor, eliminating + # the need to make it viewless. + # + # However, we don't explicitly check mbs == 1 here because + # make_viewless_tensor() has negligible overhead when its input + # is already viewless. + # + # - For the 'else' case above, calling make_viewless_tensor() here is + # likely redundant, since p2p_communication.py (likely originator) + # already creates viewless tensors. That said, make_viewless_tensor() + # is called here to be future-proof and corner-case-proof. + hidden_states = make_viewless_tensor( + hidden_states, + requires_grad=True, + keep_graph=True, + ) + + # RNG context. + if self.sequence_parallel: + rng_context = tensor_parallel.get_cuda_rng_tracker().fork() + else: + rng_context = nullcontext() + + # Forward layers. + with rng_context: + # Determine if the current iteration is first microbatch + if self.num_microbatches_in_previous_step != get_num_microbatches(): + self.microbatch_count = 0 # Reset count on new batch size rampup interval + self.num_microbatches_in_previous_step = get_num_microbatches() + is_first_microbatch = self.microbatch_count % get_num_microbatches() == 0 + + # Forward pass. + if self.recompute_granularity == 'full': + hidden_states = self._checkpointed_forward(hidden_states, + attention_mask, + encoder_output, + enc_dec_attn_mask, + rotary_pos_emb, + is_first_microbatch) + else: + forward_kwargs = { + 'encoder_output': encoder_output, + 'enc_dec_attn_mask': enc_dec_attn_mask, + 'inference_params': inference_params, + } + + forward_kwargs['rotary_pos_emb'] = rotary_pos_emb + forward_kwargs['retriever_input'] = retriever_input + forward_kwargs['retriever_output'] = retriever_output + forward_kwargs['retriever_attn_mask'] = retriever_attn_mask + + for index in range(self.num_layers): + layer = self._get_layer(index) + + hidden_states = layer( + hidden_states, + attention_mask, + **forward_kwargs) + + # First Retro decoder layer returns both hidden_states + # and retriever_output. Make retriever_output available + # to subsequence Retro layers. + if isinstance(hidden_states, tuple): + assert len(hidden_states) == 2 + hidden_states, retriever_output = hidden_states + forward_kwargs["retriever_output"] = retriever_output + if self.sequence_parallel: + ampipe_degree = get_args().ampipe_degree + if ampipe_degree > 1: + hidden_states = AmpipeLastTransformerLayerRearrange.apply(hidden_states, ampipe_degree) + # Skip counter update for eval and activation checkpointing + if torch.is_grad_enabled() and self.training: + self.microbatch_count += 1 + + # Final layer norm. + if self.post_process and self.post_norm: + hidden_states = self.final_norm(hidden_states) + + return hidden_states + + +class AmpipeLastTransformerLayerRearrange(torch.autograd.Function): + @staticmethod + def forward(ctx, hidden_states, ampipe_degree) -> Any: + ag_hidden_states = all_gather_along_first_dim(hidden_states, True) + hidden_states.untyped_storage().resize_(0) + seqlen = ag_hidden_states.size(0) + + new_indices = get_slice_indices_from_order_to_disorder(seqlen, ampipe_degree, device=torch.npu.current_device()) + select_hidden_states = torch.index_select(ag_hidden_states, dim=0, index=new_indices) + hidden_states_chunk = _split_along_first_dim(select_hidden_states) + hidden_states_chunk = hidden_states_chunk.clone() + select_hidden_states.untyped_storage().resize_(0) + ctx.ampipe_degree = ampipe_degree + return hidden_states_chunk + + @staticmethod + def backward(ctx, grad_input) -> Any: + ag_grad_input = all_gather_along_first_dim(grad_input, True) + grad_input.untyped_storage().resize_(0) + seqlen = ag_grad_input.size(0) + + new_indices = get_slice_indices_from_disorder_to_order(seqlen, ctx.ampipe_degree, device=torch.npu.current_device()) + select_grad_input = torch.index_select(ag_grad_input, dim=0, index=new_indices) + grad_output_chunk = _split_along_first_dim(select_grad_input) + grad_output_chunk = grad_output_chunk.clone() + select_grad_input.untyped_storage().resize_(0) + return grad_output_chunk, None + + +def parallel_mlp_init_wrapper(fn): + @wraps(fn) + def wrapper(self, *args, **kwargs): + fn(self, *args, **kwargs) + self.layer_number = None + _args = get_args() + if _args.swiglu and _args.use_fused_swiglu: + self.activation_func = fused_swiglu + + config = args[0] + is_expert = kwargs.get('is_expert') if 'is_expert' in kwargs.keys() else False + + ffn_hidden_size = config.ffn_hidden_size + if config.gated_linear_unit: + ffn_hidden_size *= 2 + if _args.use_nd_matmul: + self.dense_h_to_4h = Nd_ParallelLinear( + config.hidden_size, + ffn_hidden_size, + config=config, + init_method=config.init_method, + bias=self.add_bias, + skip_bias_add=True, + input_is_parallel=True, + is_expert=is_expert, + matmul_id=1 + ) + self.dense_4h_to_h = Nd_ParallelLinear( + config.ffn_hidden_size, + config.hidden_size, + config=config, + init_method=config.output_layer_init_method, + bias=self.add_bias, + skip_bias_add=True, + input_is_parallel=True, + is_expert=is_expert, + matmul_id=2 + ) + elif _args.tp_2d: + self.dense_h_to_4h = ParallelLinear2D( + config.hidden_size, + ffn_hidden_size, + config=config, + init_method=config.init_method, + add_bias=self.add_bias, + skip_bias_add=True, + is_expert=is_expert, + ag_comm_intf=TPXCollectiveComm, + ag_sd_rcv_overlap_comm_intf=TPXOverlapCollectiveComm, + rs_comm_intf=TPYCollectiveComm, + rs_sd_rcv_overlap_comm_intf=TPYOverlapCollectiveComm, + enable_overlap_ag_with_matmul=False, + enable_overlap_matmul_with_rs=_args.enable_overlap_matmul_with_rs, + partition_dim=0, + enable_backward_overlap_ag_with_matmul=_args.enable_backward_overlap_ag_with_matmul) + self.dense_4h_to_h = ParallelLinear2D( + config.ffn_hidden_size, + config.hidden_size, + config=config, + init_method=config.output_layer_init_method, + add_bias=self.add_bias, + skip_bias_add=True, + ag_comm_intf=TPYCollectiveComm, + ag_sd_rcv_overlap_comm_intf=TPYOverlapCollectiveComm, + rs_comm_intf=TPXCollectiveComm, + rs_sd_rcv_overlap_comm_intf=TPXOverlapCollectiveComm, + enable_overlap_ag_with_matmul=_args.enable_overlap_ag_with_matmul, + enable_overlap_matmul_with_rs=False, + partition_dim=1, + enable_backward_overlap_ag_with_matmul=_args.enable_backward_overlap_ag_with_matmul) + else: + self.dense_h_to_4h = tensor_parallel.ColumnParallelLinear( + config.hidden_size, + ffn_hidden_size, + config=config, + init_method=config.init_method, + bias=self.add_bias, + gather_output=False, + skip_bias_add=True, + is_expert=is_expert + ) + self.dense_4h_to_h = tensor_parallel.RowParallelLinear( + config.ffn_hidden_size, + config.hidden_size, + config=config, + init_method=config.output_layer_init_method, + bias=self.add_bias, + skip_bias_add=True, + input_is_parallel=True, + is_expert=is_expert + ) + if _args.use_nanopipe and parallel_state.get_pipeline_model_parallel_world_size() > 1 \ + and parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None: + setattr(self.dense_h_to_4h, "in_nano", True) + setattr(self.dense_4h_to_h, "in_nano", True) + # use dynamic property assignment to ADD pipe_experts attribution + if not _args.swiglu: + self.dense_h_to_4h.pipe_experts = _args.use_pipe_experts + self.dense_4h_to_h.pipe_experts = _args.use_pipe_experts + if _args.ampipe_degree > 1: + setattr(self.dense_h_to_4h, "ampipe_degree", _args.ampipe_degree) + setattr(self.dense_4h_to_h, "ampipe_degree", _args.ampipe_degree) + return wrapper + + +def should_recompute(args, layer_number, num_recompute): + vpp_rank = mpu.get_virtual_pipeline_model_parallel_rank() + vpp_size = args.virtual_pipeline_model_parallel_size + pp_size = args.transformer_pipeline_model_parallel_size + + if vpp_size is not None: + layer_per_chunk = args.num_layers_per_virtual_pipeline_stage + elif pp_size is not None: + layer_per_chunk = args.num_layers // pp_size + else: + layer_per_chunk = args.num_layers + + if vpp_rank is None or not args.enable_recompute_layers_per_pp_rank: + vpp_rank = 0 + if vpp_size is None or not args.enable_recompute_layers_per_pp_rank: + vpp_size = 1 + recompute_priority = ((layer_number - 1) % layer_per_chunk) * vpp_size + vpp_rank + full_recompute_layers = args.recompute_num_layers + + if full_recompute_layers: + if recompute_priority < full_recompute_layers: + # Do full recomputation + return False + elif num_recompute is None: + return True + elif recompute_priority < full_recompute_layers + num_recompute: + return True + else: + return False + + if num_recompute is None: + return True + else: + return recompute_priority < num_recompute + + +def should_recompute_activation(layer_number): + args = get_args() + if not args.recompute_activation_function or layer_number is None: + return False + + if args.recompute_in_bubble or args.recompute_in_advance: + pipeline_checkpoint_manager = get_pipeline_checkpoint_manager(args.virtual_pipeline_model_parallel_size) + if pipeline_checkpoint_manager.chunk_do_recompute: + return False + elif args.recompute_in_bubble: + return True + + if args.recompute_activation_function_num_layers is not None: + if args.recompute_activation_function_num_layers < 0: + raise AssertionError('--recompute-activation-function-num-layers cannot be less than 0.') + elif args.recompute_activation_function_num_layers > args.num_layers: + raise AssertionError('--recompute-activation-function-num-layers cannot be greater than the number of layers.') + return should_recompute(args, layer_number, args.recompute_activation_function_num_layers) + + +def should_recompute_norm(self): + args = get_args() + if not args.recompute_norm or self.layer_number is None: + return False + return should_recompute(args, self.layer_number, args.recompute_norm_num_layers) + + +def parallel_mlp_forward(self, hidden_states): + self.layer_number = getattr(self, "layer_number", None) + is_recompute_activation = should_recompute_activation(self.layer_number) + args = get_args() + + def activation_function(*function_args): + intermediate, bias = function_args + + if self.bias_gelu_fusion: + assert self.add_bias is True + assert self.activation_func == F.gelu + intermediate = bias_gelu_impl(intermediate, bias) + else: + if bias is not None: + intermediate = intermediate + bias + intermediate = self.activation_func(intermediate) + return intermediate + + if not is_recompute_activation: + # [s, b, 4hp] + intermediate_parallel, bias_parallel = self.dense_h_to_4h(hidden_states) + if not args.use_pipe_experts and args.ampipe_degree > 1 and args.ampipe_tp_sp_comm_overlap: + from mindspeed.moe.async_comm_utils import get_fw_ar_rs_output_ampipe, async_all_to_all + last_chunk_output = get_fw_ar_rs_output_ampipe(args.sequence_parallel) + if last_chunk_output is not None: + a2a_output, handle = async_all_to_all(last_chunk_output) + + if self.bias_gelu_fusion: + assert self.add_bias is True + assert self.activation_func == F.gelu + intermediate_parallel = bias_gelu_impl(intermediate_parallel, bias_parallel) + else: + if bias_parallel is not None: + intermediate_parallel = intermediate_parallel + bias_parallel + intermediate_parallel = self.activation_func(intermediate_parallel) + + # [s, b, h] + output, output_bias = self.dense_4h_to_h(intermediate_parallel) + if not args.use_pipe_experts and args.ampipe_degree > 1 and args.ampipe_tp_sp_comm_overlap: + if last_chunk_output is not None: + handle.wait() + return output, output_bias, a2a_output + else: + if not args.use_pipe_experts and args.ampipe_degree > 1 and args.ampipe_tp_sp_comm_overlap: + from mindspeed.moe.async_comm_utils import (get_fw_ar_rs_output_ampipe, + async_all_to_all) + last_chunk_output = get_fw_ar_rs_output_ampipe(args.sequence_parallel) + if last_chunk_output is not None: + a2a_output, handle = async_all_to_all(last_chunk_output) + + intermediate_parallel, bias_parallel = self.dense_h_to_4h(hidden_states) + self.activation_checkpoint_manager = CheckpointWithoutOutput() + intermediate_parallel = self.activation_checkpoint_manager.checkpoint(activation_function, + False, + intermediate_parallel, + bias_parallel) + # [s, b, h] + output, output_bias = self.dense_4h_to_h(intermediate_parallel) + + # discard the output of the activation function, + # which will be restored by recomputation during backward. + self.activation_checkpoint_manager.discard_output() + + # when backward to output of dense_4h_to_h, + # recompute and restore the output of activation function. + if output.requires_grad: + output.register_hook(self.activation_checkpoint_manager.recompute) + if not args.use_pipe_experts and args.ampipe_degree > 1 and args.ampipe_tp_sp_comm_overlap: + if last_chunk_output is not None: + handle.wait() + return output, output_bias, a2a_output + return output, output_bias + + +def flash_self_attention_init_wrapper(fn): + @wraps(fn) + def wrapper(self, *arg, **kwargs): + fn(self, *arg, **kwargs) + args = get_args() + + self.pse = None + self.pse_type = args.alibi_fusion_attn_type + + if self.pse_type is None: + self.pse_type = 1 # not use pse + elif self.pse_type == 0: + alibi = AlibiForFusionAttnSingleton.get_alibi_tensor_for_fusion_attn(args.seq_length, + args.num_attention_heads, + args.params_dtype, + args.alibi_diagonal_opposite, + 1024) + self.pse = alibi + + elif self.pse_type == 2 or self.pse_type == 3: + self.pse = AlibiForFusionAttnSingleton.get_alibi_slopes_for_fusion_attn(args.num_attention_heads) + + return wrapper + + +def flash_self_attention_init_add_config_wrapper(fn): + @wraps(fn) + def wrapper(self, *arg, **kwargs): + if 'config' in kwargs: + self.config = kwargs.pop('config') + fn(self, *arg, **kwargs) + + return wrapper + + +def flash_self_attention_forward(self, q, k, v, attention_mask): + """Implements the multihead softmax attention. + Arguments + --------- + q, k, v: The tensor containing the query, key, and value. (S, B, H, D) + """ + args = get_args() + seq_length, _, head_num, head_dim = q.shape[0], q.shape[1], q.shape[2], q.shape[3] + + q, k, v = [rearrange(x, 's b h d -> s b (h d)') for x in [q, k, v]] + + try: + scale = 1.0 / math.sqrt(head_dim) if self.softmax_scale is None else self.softmax_scale + except Exception as e: + raise ValueError('Invalid head_dim: {}'.format(head_dim)) from e + cp_expanded_by_2d_tp = args.tp_2d and args.tp_y > 1 + if cp_expanded_by_2d_tp: + tp_y_cp_sz = TensorParallelYUnionCP().get_parallel_group_world_size() + else: + tp_y_cp_sz = args.context_parallel_size + if tp_y_cp_sz > 1 and args.context_parallel_algo in ['megatron_cp_algo', 'hybrid_cp_algo', + 'adaptive_cp_algo', 'hybrid_adaptive_cp_algo']: + in_hybrid_mode = False + if get_context_parallel_group_for_hybrid_ring(check_initialized=False) is not None: + in_hybrid_mode = True + + if not in_hybrid_mode: + if cp_expanded_by_2d_tp: + tp_y_cp = TensorParallelYUnionCP() + cp_group = tp_y_cp.group + cp_size = tp_y_cp.get_parallel_group_world_size() + rank = tp_y_cp.get_parallel_rank() + cp_global_ranks = tp_y_cp.global_ranks + else: + cp_group = mpu.get_context_parallel_group() + cp_size = mpu.get_context_parallel_world_size() + rank = mpu.get_context_parallel_rank() + cp_global_ranks = mpu.get_context_parallel_global_ranks() + else: + cp_group = get_context_parallel_group_for_hybrid_ring() + cp_size = get_context_parallel_for_hybrid_ring_world_size() + rank = get_context_parallel_for_hybrid_ring_rank() + cp_global_ranks = get_context_parallel_for_hybrid_ring_global_ranks() + + cp_para = dict() + if hasattr(self, 'config'): + cp_para['megatron_cp_in_bnsd'] = self.config.megatron_cp_in_bnsd + cp_para['causal'] = args.attention_mask_type == 'causal' + cp_para['cp_group'] = cp_group + cp_para['cp_size'] = cp_size + cp_para['rank'] = rank + + if args.context_parallel_algo in ['megatron_cp_algo', 'hybrid_cp_algo']: + cp_para['cp_global_ranks'] = cp_global_ranks + if args.use_cp_send_recv_overlap: + if cp_expanded_by_2d_tp: + cp_para['cp_group_for_send_recv_overlap'] = tp_y_cp.overlap_group + else: + cp_para['cp_group_for_send_recv_overlap'] = mpu.get_context_parallel_group_for_send_recv_overlap() + else: + cp_para['cp_group_for_send_recv_overlap'] = None + cp_para['pse'] = self.pse + cp_para['pse_type'] = self.pse_type + if args.context_parallel_size > 1 and not args.tp_2d: + cp_para['cp_inner_ranks'] = get_ring_ranks_for_intra_window() + cp_para['cp_outer_ranks'] = get_ring_ranks_for_inter_window_kv() + cp_para['cp_dkv_outer_ranks'] = get_ring_ranks_for_inter_window_dkv() + cp_para['cp_group_for_intra_window'] = get_ring_group_for_intra_window() + cp_para['cp_group_for_intra_window_send_recv_overlap'] = get_ring_group_for_intra_window_send_recv_overlap() + output = ringattn_context_parallel(q, k, v, head_num, cp_para, scale, attention_mask, self.dropout_p) + else: + cp_para['scheduling_info'] = get_scheduling_info() + output = adaptive_attn_context_parallel(q, k, v, head_num, cp_para, scale, attention_mask, self.dropout_p) + else: + if args.use_fusion_attn_v2: + output = npu_fusion_attention( + q, k, v, head_num, args.shape_order, + pse=self.pse, + padding_mask=None, + atten_mask=attention_mask, + scale=scale, + pse_type=self.pse_type, + pre_tokens=args.pre_tockens, + next_tokens=args.next_tockens, + keep_prob=1 - self.dropout_p, + inner_precise=0, + sparse_mode=args.sparse_mode + )[0] + else: + output = torch_npu.npu_fusion_attention( + q, k, v, head_num, args.shape_order, + pse=None, + padding_mask=None, + atten_mask=attention_mask, + scale=scale, + pre_tockens=args.pre_tockens, + next_tockens=args.next_tockens, + keep_prob=1 - self.dropout_p, + inner_precise=0, + sparse_mode=args.sparse_mode + )[0] + return output + + +def parallel_attention_init(self, config, layer_number, + attention_type=AttnType.self_attn, + attn_mask_type=AttnMaskType.padding): + super(ParallelAttention, self).__init__() + args = get_args() + self.layer_number = max(1, layer_number) + self.attention_type = attention_type + self.attn_mask_type = attn_mask_type + self.params_dtype = config.params_dtype + self.sequence_parallel = config.sequence_parallel + self.config = config + self.group_query_attention = args.group_query_attention + self.num_query_groups = config.num_query_groups + + query_projection_size = config.kv_channels * config.num_attention_heads + if self.group_query_attention: + kv_projection_size = config.kv_channels * config.num_query_groups + else: + kv_projection_size = config.kv_channels * config.num_attention_heads + + self.use_flash_attn = args.use_flash_attn \ + and attention_type == AttnType.self_attn \ + and self.attn_mask_type == AttnMaskType.causal + if self.use_flash_attn: + try: + from flash_attn.flash_attn_interface import flash_attn_unpadded_func + except ImportError: + try: + from flash_attn.flash_attn_interface import flash_attn_varlen_func as flash_attn_unpadded_func + except ImportError: + flash_attn_unpadded_func = None + if flash_attn_unpadded_func is None: + raise ImportError('FlashAttention is not installed, please install with ' + 'pip install flash-attn') + assert attention_type == AttnType.self_attn, ('FlashAttention code path only supports ' + 'self-attention for now') + assert self.attn_mask_type == AttnMaskType.causal, ('FlashAttention code path only ' + 'supports causal mask for now') + if rearrange is None: + raise ImportError('einops is not installed, please install with pip install einops') + + # Per attention head and per partition values. + from megatron import core + self.hidden_size_per_attention_head = core.utils.divide( + query_projection_size, config.num_attention_heads) + + # Strided linear layer. + if attention_type == AttnType.self_attn: + self.query_key_value = tensor_parallel.ColumnParallelLinear( + config.hidden_size, + query_projection_size + 2 * kv_projection_size, + config=config, + init_method=config.init_method, + bias=config.add_bias_linear or config.add_qkv_bias, + gather_output=False) + else: + assert attention_type == AttnType.cross_attn + + if self.group_query_attention: + raise NotImplementedError("Grouped query attention not implemented for cross-attention.") + assert query_projection_size == kv_projection_size + + self.query = tensor_parallel.ColumnParallelLinear( + config.hidden_size, + query_projection_size, + config=config, + init_method=config.init_method, + bias=config.add_bias_linear, + gather_output=False) + + self.key_value = tensor_parallel.ColumnParallelLinear( + config.hidden_size, + 2 * kv_projection_size, + config=config, + init_method=config.init_method, + bias=config.add_bias_linear, + gather_output=False) + + self.core_attention = CoreAttention(self.layer_number, config, + self.attn_mask_type) + self.checkpoint_core_attention = config.recompute_granularity == 'selective' + + if self.use_flash_attn: + self.core_attention_flash = FlashSelfAttention( + causal=True, attention_dropout=config.attention_dropout, config=config + ) + + # Output. + self.dense = tensor_parallel.RowParallelLinear( + query_projection_size, + config.hidden_size, + config=config, + init_method=config.output_layer_init_method, + bias=config.add_bias_linear, + input_is_parallel=True, + skip_bias_add=True) + # patch for attention + patch_for_attention(config, self) + + +def patch_for_attention(config, self): + _args = get_args() + attn_heads_split_num = ( + get_tensor_model_parallel_world_size_for_nd1_dim1() + if _args.tp_2d + else mpu.get_tensor_model_parallel_world_size() + ) + # Per attention head and per partition values. + self.num_attention_heads_per_partition = config.num_attention_heads // attn_heads_split_num + if self.group_query_attention: + if config.num_query_groups % attn_heads_split_num != 0: + raise NotImplementedError( + "Currently the num_query_groups should be a multiple of the tensor parallel size" + ) + self.num_query_groups_per_partition = config.num_query_groups // attn_heads_split_num + else: + self.num_query_groups_per_partition = self.num_attention_heads_per_partition + query_projection_size = config.kv_channels * config.num_attention_heads + if _args.group_query_attention: + kv_projection_size = config.kv_channels * config.num_query_groups + else: + kv_projection_size = config.kv_channels * config.num_attention_heads + # qkv bias + bias = config.add_qkv_bias or config.add_bias_linear + cp = config.context_parallel_size + if _args.tp_2d: + tp_y_cp_sz = cp * _args.tp_y + else: + tp_y_cp_sz = cp + if tp_y_cp_sz > 1 and _args.context_parallel_algo in ['ulysses_cp_algo', 'hybrid_cp_algo', + 'hybrid_adaptive_cp_algo']: + if _args.tp_2d: + tp_y_cp = TensorParallelYUnionCP() + ulysses_group = tp_y_cp.group + else: + ulysses_group = mpu.get_context_parallel_group() + if _args.context_parallel_algo == 'hybrid_cp_algo' or _args.context_parallel_algo == 'hybrid_adaptive_cp_algo': + ulysses_group = get_context_parallel_group_for_hybrid_ulysses() + if self.use_flash_attn: + self.core_attention_flash = UlyssesContextAttention(self.core_attention_flash, ulysses_group) + else: + self.core_attention = UlyssesContextAttention(self.core_attention, ulysses_group) + if _args.use_nd_matmul: + self.query_key_value = Nd_ParallelLinear( + config.hidden_size, + query_projection_size + 2 * kv_projection_size, + config=config, + init_method=config.init_method, + bias=bias, + skip_bias_add=True, + input_is_parallel=True, + matmul_id=1 + ) + elif _args.tp_2d: + self.query_key_value = ParallelLinear2D( + config.hidden_size, + query_projection_size + 2 * kv_projection_size, + config=config, + init_method=config.init_method, + add_bias=bias, + skip_bias_add=True, + ag_comm_intf=TPXCollectiveComm, + ag_sd_rcv_overlap_comm_intf=TPXOverlapCollectiveComm, + rs_comm_intf=TPYCollectiveComm, + rs_sd_rcv_overlap_comm_intf=TPYOverlapCollectiveComm, + enable_overlap_ag_with_matmul=False, + enable_overlap_matmul_with_rs=False, + partition_dim=0, + enable_backward_overlap_ag_with_matmul=False) + else: + self.query_key_value = tensor_parallel.ColumnParallelLinear( + config.hidden_size, + query_projection_size + 2 * kv_projection_size, + config=config, + init_method=config.init_method, + bias=bias, + gather_output=False) + # dense bias + bias = _args.add_dense_bias or config.add_bias_linear + skip_bias_add = _args.skip_bias_add + # Output. + if _args.use_nd_matmul: + self.dense = Nd_ParallelLinear( + query_projection_size, + config.hidden_size, + config=config, + init_method=config.output_layer_init_method, + bias=bias, + skip_bias_add=True, + input_is_parallel=True, + matmul_id=2 + ) + elif _args.tp_2d: + self.dense = ParallelLinear2D( + query_projection_size, + config.hidden_size, + config=config, + init_method=config.output_layer_init_method, + add_bias=bias, + skip_bias_add=True, + ag_comm_intf=TPYCollectiveComm, + ag_sd_rcv_overlap_comm_intf=TPYOverlapCollectiveComm, + rs_comm_intf=TPXCollectiveComm, + rs_sd_rcv_overlap_comm_intf=TPXOverlapCollectiveComm, + enable_overlap_ag_with_matmul=False, + enable_overlap_matmul_with_rs=False, + partition_dim=1, + enable_backward_overlap_ag_with_matmul=_args.enable_backward_overlap_ag_with_matmul) + else: + self.dense = tensor_parallel.RowParallelLinear( + query_projection_size, + config.hidden_size, + config=config, + init_method=config.output_layer_init_method, + bias=bias, + input_is_parallel=True, + skip_bias_add=skip_bias_add) + if _args.use_nanopipe and parallel_state.get_pipeline_model_parallel_world_size() > 1 \ + and parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None: + setattr(self.query_key_value, "in_nano", True) + setattr(self.dense, "in_nano", True) + if _args.ampipe_degree > 1: + setattr(self.query_key_value, 'ampipe_degree', _args.ampipe_degree) + setattr(self.query_key_value, 'is_dense_h_to_3h', True) + + +def parallel_attention_forward(self, hidden_states, attention_mask, + encoder_output=None, inference_params=None, + rotary_pos_emb=None): + # hidden_states: [sq, b, h] + + # ================================================= + # Pre-allocate memory for key-values for inference. + # ================================================= + is_first_step = False + if inference_params: + if self.layer_number not in inference_params.key_value_memory_dict: + inf_max_seq_len = inference_params.max_sequence_length + inf_max_batch_size = inference_params.max_batch_size + inference_key_memory = self._allocate_memory( + inf_max_seq_len, inf_max_batch_size, + self.num_query_groups_per_partition) + inference_value_memory = self._allocate_memory( + inf_max_seq_len, inf_max_batch_size, + self.num_query_groups_per_partition) + + inference_params.key_value_memory_dict[self.layer_number] = ( + inference_key_memory, inference_value_memory) + is_first_step = True + else: + inference_key_memory, inference_value_memory = \ + inference_params.key_value_memory_dict[self.layer_number] + + # ===================== + # Query, Key, and Value + # ===================== + if self.attention_type == AttnType.self_attn: + + # Attention heads [sq, b, h] --> [sq, b, ng * (np/ng + 2) * hn)] + mixed_x_layer, _ = self.query_key_value(hidden_states) + + # [sq, b, hp] --> [sq, b, ng, (np/ng + 2) * hn] + new_tensor_shape = mixed_x_layer.size()[:-1] + ( + self.num_query_groups_per_partition, + ( + (self.num_attention_heads_per_partition // self.num_query_groups_per_partition + 2) + * self.hidden_size_per_attention_head + ), + ) + mixed_x_layer = mixed_x_layer.view(*new_tensor_shape) + + # [sq, b, ng, (np/ng + 2) * hn] --> [sq, b, ng, np/ng * hn], [sq, b, ng, hn], [sq, b, ng, hn] + (query_layer, + key_layer, + value_layer) = torch.split( + mixed_x_layer, + [ + ( + self.num_attention_heads_per_partition // self.num_query_groups_per_partition + * self.hidden_size_per_attention_head + ), + self.hidden_size_per_attention_head, + self.hidden_size_per_attention_head + ], + dim=3) + + # [sq, b, ng, np/ng * hn] -> [sq, b, np, hn] - + query_layer = query_layer.view(query_layer.size(0), query_layer.size(1), -1, self.hidden_size_per_attention_head) + else: + # Attention heads [sk, b, h] --> [sk, b, (np * 2 * hn)] + mixed_kv_layer, _ = self.key_value(encoder_output) + + # [sk, b, (np * 2 * hn)] --> [sk, b, np, 2 * hn] + new_tensor_shape = mixed_kv_layer.size()[:-1] + \ + (self.num_attention_heads_per_partition, + 2 * self.hidden_size_per_attention_head) + mixed_kv_layer = mixed_kv_layer.view(*new_tensor_shape) + + # [sk, b, np, 2 * hn] --> 2 [sk, b, np, hn] + (key_layer, + value_layer) = tensor_parallel.split_tensor_along_last_dim(mixed_kv_layer, 2) + + # Attention head [sq, b, h] --> [sq, b, hp] + query_layer, _ = self.query(hidden_states) + # [sq, b, hp] --> [sq, b, np, hn] + new_tensor_shape = query_layer.size()[:-1] + \ + (self.num_attention_heads_per_partition, + self.hidden_size_per_attention_head) + query_layer = query_layer.view(*new_tensor_shape) + + # ================================== + # Adjust key and value for inference + # ================================== + + # duplicate the pos_emb for self attention + if rotary_pos_emb is not None: + if isinstance(rotary_pos_emb, tuple): + rotary_pos_emb = rotary_pos_emb + else: + rotary_pos_emb = ((rotary_pos_emb,) * 2) + + if inference_params: + batch_start = inference_params.batch_size_offset + batch_end = batch_start + key_layer.size(1) + assert batch_end <= inference_key_memory.size(1) + sequence_start = inference_params.sequence_len_offset + sequence_end = sequence_start + key_layer.size(0) + assert sequence_end <= inference_key_memory.size(0) + # Copy key and values. + inference_key_memory[sequence_start:sequence_end, + batch_start:batch_end, ...] = key_layer + inference_value_memory[sequence_start:sequence_end, + batch_start:batch_end, ...] = value_layer + key_layer = inference_key_memory[ + :sequence_end, batch_start:batch_end, ...] + value_layer = inference_value_memory[ + :sequence_end, batch_start:batch_end, ...] + + + # adjust the key rotary positional embedding + if rotary_pos_emb is not None: + q_pos_emb, k_pos_emb = rotary_pos_emb + # need to cross check this condition during inference + # if not set_inference_key_value_memory: + if not is_first_step: + # In inference, we compute one token at a time. + # Select the correct positional embedding + # (only the last token in the sequence) + q_pos_emb = q_pos_emb[sequence_end - 1 : sequence_end] + else: + # In the first forward pass of inference, + # we use the entire provided prefix. + # q_pos_emb here has the rope embeddings of the entire + # prefix + to-be-generated output so + # we slice to just the prefix. + q_pos_emb = q_pos_emb[:sequence_end, :, :, :] + k_pos_emb = k_pos_emb[:sequence_end, :, :, :] + rotary_pos_emb = (q_pos_emb, k_pos_emb) + + # ================================== + # core attention computation + # ================================== + + # apply relative positional encoding (rotary embedding) + if rotary_pos_emb is not None: + q_pos_emb, k_pos_emb = rotary_pos_emb + query_layer = apply_rotary_pos_emb(query_layer, q_pos_emb, self.config) + key_layer = apply_rotary_pos_emb(key_layer, k_pos_emb, self.config) + + if not self.use_flash_attn: + if self.num_attention_heads_per_partition // self.num_query_groups_per_partition > 1: + key_layer = key_layer.repeat_interleave( + self.num_attention_heads_per_partition // self.num_query_groups_per_partition, dim=2) + value_layer = value_layer.repeat_interleave( + self.num_attention_heads_per_partition // self.num_query_groups_per_partition, dim=2) + if self.checkpoint_core_attention: + context_layer = self._checkpointed_attention_forward( + query_layer, key_layer, value_layer, attention_mask) + else: + context_layer = self.core_attention( + query_layer, key_layer, value_layer, attention_mask) + else: + if get_args().ampipe_degree > 1: + return query_layer, key_layer, value_layer + if not self.sequence_parallel: + with tensor_parallel.get_cuda_rng_tracker().fork(): + context_layer = self.core_attention_flash(query_layer, key_layer, value_layer, attention_mask) + else: + context_layer = self.core_attention_flash(query_layer, key_layer, value_layer, attention_mask) + + # ================= + # Output. [sq, b, h] + # ================= + + output, bias = self.dense(context_layer) + + return output, bias + + +def switch_mlp_init_wrapper(fn): + @wraps(fn) + def wrapper(self, *args, **kwargs): + global_args = get_args() + if global_args.moe_model_type == 'megatron_moe': + fn(self, *args, **kwargs) + return + from megatron.legacy.model.transformer import SwitchMLP + super(SwitchMLP, self).__init__() + config = args[0] + layer_number = args[1] if len(args) > 1 else None + from megatron.core.parallel_state import get_expert_model_parallel_group + from mindspeed.moe.moe import MoE + from mindspeed.moe.mixtral_parallel_mlpbm import MixtralParallelMLPBM + try: + expert_parallel_group = get_expert_model_parallel_group() + except AttributeError: + expert_parallel_group = None + + if layer_number is None: + self.block = MoE( + config.hidden_size, + MixtralParallelMLPBM(config, ) if global_args.swiglu else ParallelMLP(config, is_expert=False), + num_experts=global_args.num_experts, + ep_size=config.expert_model_parallel_size, + k=config.moe_router_topk, + capacity_factor=global_args.moe_train_capacity_factor, + eval_capacity_factor=global_args.moe_train_capacity_factor, + aux_loss_coef=config.moe_aux_loss_coeff, + ep_group=expert_parallel_group, + noisy_gate_policy=global_args.noisy_gate_policy, + no_drop=global_args.moe_no_drop, + dynamic_padding=global_args.moe_dynamic_padding, + use_sinkhorn=global_args.moe_use_sinkhorn, + sequence_parallel=config.sequence_parallel + ) + else: + if layer_number % global_args.expert_interval == 0: + self.block = MoE( + config.hidden_size, + MixtralParallelMLPBM(config, ) if global_args.swiglu else ParallelMLP(config, is_expert=False), + num_experts=global_args.num_experts, + ep_size=config.expert_model_parallel_size, + k=config.moe_router_topk, + capacity_factor=global_args.moe_train_capacity_factor, + eval_capacity_factor=global_args.moe_train_capacity_factor, + aux_loss_coef=config.moe_aux_loss_coeff, + ep_group=expert_parallel_group, + noisy_gate_policy=global_args.noisy_gate_policy, + no_drop=global_args.moe_no_drop, + dynamic_padding=global_args.moe_dynamic_padding, + use_sinkhorn=global_args.moe_use_sinkhorn, + sequence_parallel=config.sequence_parallel + ) + else: + self.block = ParallelMLP(config) + return + return wrapper + + +def switch_mlp_forward_wrapper(fn): + @wraps(fn) + def wrapper(self, *args, **kwargs): + global_args = get_args() + if global_args.moe_model_type == 'megatron_moe': + return fn(self, *args, **kwargs) + hidden_states = args[0] + used_token = args[1] if len(args) > 1 else None + output = self.block(hidden_states, used_token) + return output[0], None + return wrapper + + +def parallel_transformer_layer_init_wrapper(fn): + @wraps(fn) + def wrapper(self, *args, **kwargs): + from megatron.legacy.model.transformer import SwitchMLP + super(ParallelTransformerLayer, self).__init__() + global_args = get_args() + fn(self, *args, **kwargs) + self.pipe_degree = global_args.ampipe_degree + self.ampipe_enabled = global_args.ampipe_degree > 1 + if self.mlp.__class__ is SwitchMLP: + experts_modules = self.mlp.block.moe_layer.experts.experts if global_args.moe_model_type == 'deepspeed_moe' \ + else self.mlp.local_experts + for expert in experts_modules: + expert.layer_number = self.layer_number + else: + self.mlp.layer_number = self.layer_number + + return wrapper + + +def parallel_transformer_layer_forward_ampipe(self, hidden_states, attention_mask, + encoder_output=None, enc_dec_attn_mask=None, + retriever_input=None, + retriever_output=None, + retriever_attn_mask=None, + inference_params=None, + rotary_pos_emb=None): + + # Update the params in case the retro param changes during inference + # TODO: better redesign with inference param + args = get_args() + if args.retro_add_retriever: + self.retro_num_neighbors = args.retro_num_neighbors + self.retro_chunk_length = args.retro_chunk_length + self.retro_retrieved_length = \ + args.retro_num_retrieved_chunks * args.retro_chunk_length + + # hidden_states: [s, b, h] + + # Layer norm at the beginning of the transformer layer. + norm_output = self.input_norm(hidden_states) + + if self.ampipe_enabled: + q, k, v = self.self_attention( + norm_output, + attention_mask, + inference_params=inference_params, + rotary_pos_emb=rotary_pos_emb) + # release memory to reduce peak memory usage + del norm_output + dense_layer = self.self_attention.dense + ln = self.post_attention_norm + k, v = [rearrange(x, 's b n d -> s b (n d)') for x in [k, v]] + + ampipe_forward_args = ForwardArgs( + dense_layer, bias_dropout_add_fused_train, ln, self.mlp.block, self.hidden_dropout + ) + out_mlp, residual = AttMoEPipe.apply(q, k, v, hidden_states, attention_mask, ampipe_forward_args) + + with self.bias_dropout_add_exec_handler(): + output = bias_dropout_add_fused_train( + out_mlp, + None, + residual, + self.hidden_dropout) + + output = make_viewless_tensor(inp=output, requires_grad=output.requires_grad, keep_graph=True) + + return output + + # Self attention. + attention_output, attention_bias = \ + self.self_attention( + norm_output, + attention_mask, + inference_params=inference_params, + rotary_pos_emb=rotary_pos_emb) + + # Residual connection. + if self.apply_residual_connection_post_norm: + residual = norm_output + else: + residual = hidden_states + + if self.drop_path is None: + # jit scripting for a nn.module (with dropout) is not + # trigerring the fusion kernel. For now, we use two + # different nn.functional routines to account for varying + # dropout semantics during training and inference phases. + if self.bias_dropout_fusion: + if self.training: + bias_dropout_add_func = bias_dropout_add_fused_train + else: + bias_dropout_add_func = bias_dropout_add_fused_inference + else: + bias_dropout_add_func = get_bias_dropout_add(self.training) + + if attention_bias is not None: + attention_bias = attention_bias.expand_as(residual) + with self.bias_dropout_add_exec_handler(): + norm_input = bias_dropout_add_func( + attention_output, + attention_bias, + residual, + self.hidden_dropout) + else: + out = torch.nn.functional.dropout(attention_output + attention_bias, + p=self.hidden_dropout, + training=self.training) + norm_input = residual + self.drop_path(out) + + # Layer norm post the self attention. + norm_output = self.post_attention_norm(norm_input) + + # Cross attention. + if self.layer_type == LayerType.encoder: + pass + elif self.layer_type == LayerType.decoder: + norm_input, norm_output = \ + self.default_decoder_cross_attention( + encoder_output, + enc_dec_attn_mask, + norm_input, + norm_output, + bias_dropout_add_func) + elif self.layer_type == LayerType.retro_encoder: + norm_input, norm_output = \ + self.retro_encoder_cross_attention( + retriever_output, + norm_input, + norm_output, + bias_dropout_add_func) + elif self.layer_type in (LayerType.retro_decoder, + LayerType.retro_decoder_with_retriever): + retriever_output, norm_input, norm_output = \ + self.retro_decoder_cross_attention( + retriever_input, + retriever_output, + retriever_attn_mask, + norm_input, + norm_output, + inference_params, + bias_dropout_add_func) + else: + raise Exception("Unsupported layer type, '%s'." % + self.layer_type.name) + + # MLP. + mlp_output, mlp_bias = self.mlp(norm_output) + + # Second residual connection. + if self.apply_residual_connection_post_norm: + residual = norm_output + else: + residual = norm_input + + if self.drop_path is None: + if mlp_bias is not None: + mlp_bias = mlp_bias.expand_as(residual) + with self.bias_dropout_add_exec_handler(): + output = bias_dropout_add_func( + mlp_output, + mlp_bias, + residual, + self.hidden_dropout) + + # Jit compiled function creates 'view' tensor. This tensor + # potentially gets saved in the MPU checkpoint function context, + # which rejects view tensors. While making a viewless tensor here + # won't result in memory savings (like the data loader, or + # p2p_communication), it serves to document the origin of this + # 'view' tensor. + output = make_viewless_tensor(inp=output, requires_grad=output.requires_grad, keep_graph=True) + + else: + if mlp_bias is not None: + mlp_output = mlp_output + mlp_bias + out = torch.nn.functional.dropout(mlp_output, + p=self.hidden_dropout, + training=self.training) + output = residual + self.drop_path(out) + + if self.layer_type == LayerType.retro_decoder_with_retriever: + return output, retriever_output + else: + return output diff --git a/model/train/yoco_moe/mindspeed/moe/__init__.py b/model/train/yoco_moe/mindspeed/moe/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/moe/ampipe/__init__.py b/model/train/yoco_moe/mindspeed/moe/ampipe/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe.py b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe.py new file mode 100644 index 000000000..7addb594c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe.py @@ -0,0 +1,327 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +from collections import namedtuple + +import torch +from einops import rearrange + +from megatron.training import get_args +from mindspeed.moe.ampipe.ampipe_args import (ForwardArgs, FlashAttentionFwdArgs, FwdCommArgs, BiasDropoutAddNormArgs, + MLPFwdArgs, PostMLPArgs, BwdCommArgs, FlashAttentionBwdArgs, MLPBwdArgs) +from mindspeed.moe.ampipe.ampipe_async_communication import AsyncCommunication +from mindspeed.moe.ampipe.ampipe_bias_dropout_add_ln_computer import BiasDropoutAddNormComputer +from mindspeed.moe.ampipe.ampipe_fa_computer import FlashAttentionComputer +from mindspeed.moe.ampipe.ampipe_moe_gating_computer import MoEGatingComputer +from mindspeed.moe.ampipe.ampipe_moe_mlp_computer import MoEMLPComputer +from mindspeed.moe.ampipe.ampipe_post_mlp_computer import MoEPostMLPComputer +from mindspeed.moe.async_comm_utils import get_async_comm_utils_data_instance + + +class AttMoEPipe(torch.autograd.Function): + """ + Ampipe autograd.Function Class + + Include FlashAttention & LayerNorm & MoE Layer + Args: + q: query + k: key + v: value + hidden_states: hidden_states before transformer layer used as residual. + attention_mask: global attention mask. + attention_dense: post attention dense layer object. + bias_dropout_add_func: bias dropout add function + post_attention_norm: post attention norm object. + moe: moe layer object. + hidden_dropout: dropout prob. + """ + @staticmethod + def forward(ctx, q, k, v, hidden_states, attention_mask, ampipe_forward_args: ForwardArgs): + attention_dense = ampipe_forward_args.attention_dense + bias_dropout_add_func = ampipe_forward_args.bias_dropout_add_func + post_attention_norm = ampipe_forward_args.post_attention_norm + moe = ampipe_forward_args.moe + hidden_dropout = ampipe_forward_args.hidden_dropout + + global_args = get_args() + pipe_degree = global_args.ampipe_degree + AttMoEPipe.save_args_to_ctx(ctx, ampipe_forward_args, global_args) + + # 初始化反向保存tensor列表 + flash_tensor_list = [] + dense_tensor_list = [] + bdal_tensor_list = [] + gate_tensor_list = [] + mlp_tensor_list = [] + post_mlp_tensor_list = [] + + # 初始化临时列表 + ln_input_list = [] + moe_output_list = [] + weights_list = [None] * pipe_degree + token_ec_idx_list = [None] * pipe_degree + mlp_inputs, a2a_inputs, a2a_events, ag_events = AttMoEPipe._init_fwd_comm_list() + + # 初始化attention相关变量 + q_shape = q.shape + ctx.head = q_shape[2] + q = rearrange(q, "s b n d -> s b (n d)") + fa_fwd_args = AttMoEPipe._init_attention_args(pipe_degree, q_shape, attention_dense, flash_tensor_list) + # 切分残差以及bias + hidden_states_chunks = hidden_states.chunk(pipe_degree, dim=0) + bias_chunks = attention_dense.bias.chunk(pipe_degree, dim=0) if attention_dense.bias is not None else None + ln_seq_len = hidden_states.shape[0] + ctx.fa_computer = fa_computer = FlashAttentionComputer(fa_fwd_args) + for c in range(pipe_degree): + # Attention(FA) + fa_fwd_args.cur_degree = c + fwd_comm_args = FwdCommArgs(c, mlp_inputs, a2a_inputs, a2a_events, ag_events) + ctx.async_comm = async_comm = AsyncCommunication(fwd_comm_args) + detach_attn_out, attn_out, attn_bias = fa_computer.forward(ctx, q, k, v, attention_mask) + fa_fwd_args.q_token_start_idx += fa_fwd_args.chunk_len + + # Bias + Dropout + Add + LN + bias_chunk = bias_chunks[c] if attention_dense.bias is not None else None + bdal_fwd_args = BiasDropoutAddNormArgs(bias_dropout_add_func, post_attention_norm, + hidden_states_chunks[c], bias_chunk, hidden_dropout) + ctx.bdal_computer = bdal_computer = BiasDropoutAddNormComputer(bdal_tensor_list, bdal_fwd_args) + ln_output, ln_input = bdal_computer.forward(ctx, attn_out) + attn_out.untyped_storage().resize_(0) + dense_tensor_list.append(detach_attn_out) + dense_tensor_list.append(attn_out) + ln_input_list.append(ln_input) + + # MoE Gating以及token重排 + ctx.gate_computer = gate_computer = MoEGatingComputer(moe, gate_tensor_list) + gate_output = gate_computer.forward(ln_output) + if global_args.enable_token_rearrange_opt: + dispatched_input, l_aux, token_ec_idx_list[c], weights_list[c] = gate_output + else: + dispatched_input, l_aux, weights_list[c] = gate_output + ln_output.untyped_storage().resize_(0) + bdal_tensor_list.append(ln_output) + + # mlp前第一次all2all以及allgather通信 + mlp_inputs = async_comm.comm_before_moe_mlp_fwd(ctx, dispatched_input) + dispatched_input.untyped_storage().resize_(0) + gate_tensor_list.append(dispatched_input) + + # MoE MLP + mlp_fwd_args = MLPFwdArgs(a2a_events, ag_events) + ctx.mlp_computer = mlp_computer = MoEMLPComputer(moe, mlp_tensor_list, mlp_fwd_args) + mlp_outputs = mlp_computer.forward(ctx, mlp_inputs, a2a_inputs) + + # token反重排 + post_mlp_fwd_args = PostMLPArgs(ln_seq_len // pipe_degree, a2a_events, + moe_output_list, weights_list, token_ec_idx_list) + ctx.post_mlp_computer = post_mlp_computer = MoEPostMLPComputer(post_mlp_tensor_list, post_mlp_fwd_args) + moe_output_list = post_mlp_computer.forward(ctx, mlp_outputs) + AttMoEPipe.save_tensors_for_bwd(ctx, [flash_tensor_list, dense_tensor_list, bdal_tensor_list, + gate_tensor_list, mlp_tensor_list, post_mlp_tensor_list]) + ret = torch.cat(moe_output_list), torch.cat(ln_input_list) + return ret + + @staticmethod + def backward(ctx, grad_moe_outs, grad_ln_ins): + global_args = get_args() + pipe_degree = ctx.pipe_degree + context_parallel = global_args.context_parallel_size > 1 + sequence_parallel = global_args.sequence_parallel + + # 取前向保存的tensor + saved_tensors_list = list(ctx.saved_tensors) + (flash_tensor_list_len, dense_tensor_list_len, + bdal_tensor_list_len, gate_tensor_list_len, + mlp_tensor_list_len, post_mlp_tensor_list_len) = ctx.tensor_list_length + start_index = 0 + segments = [] + + for length in ctx.tensor_list_length: + end_index = start_index + length + segments.append(saved_tensors_list[start_index:end_index]) + start_index = end_index + (flash_tensor_list, dense_tensor_list, + bdal_tensor_list, gate_tensor_list, + mlp_tensor_list, post_mlp_tensor_list) = segments + + # 切分传入backward的grad + grad_moe_out_list = grad_moe_outs.chunk(pipe_degree) + grad_ln_ins_list = grad_ln_ins.chunk(pipe_degree) + # 初始化临时变量 + grad_hidden, grad_q, grad_k, grad_v = [], [], None, None + grad_mlp_input_list, grad_a2a_input_list, a2a_events, ag_events = AttMoEPipe._init_bwd_comm_list(ctx) + + for c in range(pipe_degree - 1, -1, -1): + # 计算token反重排的反向 + grad_moe_out_chunk = grad_moe_out_list[c].view(-1, ctx.hidden_size) + post_mlp_list_slice_len = post_mlp_tensor_list_len // pipe_degree + grad_post_mlp = ctx.post_mlp_computer.backward( + post_mlp_tensor_list[c * post_mlp_list_slice_len:(c + 1) * post_mlp_list_slice_len], + grad_moe_out_chunk + ) + # 反向第一次all2all以及allgather通信 + bwd_comm_args = BwdCommArgs(c, grad_mlp_input_list, grad_a2a_input_list, a2a_events, ag_events) + ctx.async_comm.bwd_args = bwd_comm_args + grad_mlp_input_list = ctx.async_comm.comm_before_moe_mlp_bwd(ctx, grad_post_mlp) + del post_mlp_tensor_list[c * post_mlp_list_slice_len:(c + 1) * post_mlp_list_slice_len] + # 手动清理ctx中computer保存的tensor,以减少峰值内存 + ctx.post_mlp_computer = None + ctx.async_comm = None + # 专家mlp反向计算 + bwd_mlp_args = MLPBwdArgs(sequence_parallel, mlp_tensor_list_len, a2a_events, ag_events, mlp_tensor_list) + if ctx.pipe_experts: + bwd_mlp_args.second_a2a_events = [] + ctx.mlp_computer.mlp_bwd_args = bwd_mlp_args + mlp_bwd_grads = ctx.mlp_computer.backward(ctx, grad_mlp_input_list, grad_a2a_input_list) + # 手动清理ctx中computer保存的tensor,以减少峰值内存 + ctx.mlp_computer = None + + fa_bwd_args = FlashAttentionBwdArgs(grad_q, grad_k, grad_v, flash_tensor_list, dense_tensor_list, + flash_tensor_list_len=flash_tensor_list_len, + dense_tensor_list_len=dense_tensor_list_len) + if context_parallel: + fa_bwd_args.kv_list = [] + fa_bwd_args.dkv_list = [] + fa_bwd_args.dout_list = [] + else: + fa_bwd_args.v = flash_tensor_list.pop() + fa_bwd_args.k = flash_tensor_list.pop() + ctx.fa_computer.fa_bwd_args = fa_bwd_args + for c in range(pipe_degree - 1, -1, -1): + # 反向等待最后一次all2all + grad_mlp = AttMoEPipe.bwd_second_all2all_wait_last(ctx, c, mlp_bwd_grads, a2a_events, bwd_mlp_args) + # gating&token重排反向 + gate_list_slice_len = gate_tensor_list_len // pipe_degree + grad_ln_out = ctx.gate_computer.backward( + gate_tensor_list[c * gate_list_slice_len:(c + 1) * gate_list_slice_len], + grad_mlp + ) + del gate_tensor_list[c * gate_list_slice_len:(c + 1) * gate_list_slice_len] + + # bias dropout add ln 反向 + bdal_list_slice_len = bdal_tensor_list_len // pipe_degree + bdal_list_slice = bdal_tensor_list[c * bdal_list_slice_len:(c + 1) * bdal_list_slice_len] + grad_dense, d_hidden_grad, d_bias_grad = ctx.bdal_computer.backward(ctx, bdal_list_slice, + grad_ln_out, grad_ln_ins_list[c]) + grad_hidden.insert(0, d_hidden_grad) + del bdal_list_slice + del bdal_tensor_list[c * bdal_list_slice_len:(c + 1) * bdal_list_slice_len] + + # fa反向 + fa_bwd_args.cur_degree = c + grad_q, grad_k, grad_v = ctx.fa_computer.backward(ctx, grad_dense) + # 手动清理ctx中computer保存的tensor,以减少峰值内存 + ctx.gate_computer = None + ctx.bdal_computer = None + ctx.fa_computer = None + if not context_parallel: + grad_q = torch.cat(grad_q, dim=0) + grad_q = rearrange(grad_q, "s b (n d) -> s b n d", n=ctx.head) + return grad_q, grad_k, grad_v, torch.cat(grad_hidden), None, None + + @staticmethod + def save_args_to_ctx(ctx, ampipe_forward_args, global_args): + ctx.ampipe_forward_args = ampipe_forward_args + ctx.sequence_parallel = global_args.sequence_parallel + ctx.num_experts = global_args.num_experts + ctx.num_local_experts = global_args.num_experts // global_args.expert_model_parallel_size + ctx.ep_size = global_args.expert_model_parallel_size + ctx.hidden_size = global_args.hidden_size + ctx.pipe_degree = global_args.ampipe_degree + ctx.ampipe_tp_sp_comm_overlap = global_args.ampipe_tp_sp_comm_overlap + ctx.pipe_experts = global_args.use_pipe_experts + ctx.pipe_experts_multi_data = global_args.pipe_experts_multi_data + ctx.pipe_experts_multi_stream = global_args.pipe_experts_multi_stream + ctx.flash_args = [] + ctx.mlp_args = [] + + @staticmethod + def save_tensors_for_bwd(ctx, tensor_list): + flat_list = itertools.chain.from_iterable(tensor_list) + ctx.save_for_backward(*flat_list) + ctx.tensor_list_length = [len(x) for x in tensor_list] + for lst in tensor_list: + lst.clear() + + @staticmethod + def _init_attention_args(pipe_degree, q_shape, attention_dense, flash_tensor_list): + seqlen, batch_size, head_num, head_dim = q_shape + chunk_len = seqlen // pipe_degree + softmax_scale = head_dim ** (-0.5) + return FlashAttentionFwdArgs(flash_tensor_list, attention_dense, head_num, softmax_scale, chunk_len) + + @staticmethod + def bwd_second_all2all_wait_last(ctx, cur_degree, mlp_bwd_grads, a2a_events, mlp_bwd_args): + grad_mlp_last = mlp_bwd_grads[cur_degree] + if ctx.use_ampipe_with_pipe_expert and cur_degree == 0: + mlp_bwd_args.second_a2a_events[-1].wait() + grad_combine = torch.cat([torch.cat(i, dim=1) for i in grad_mlp_last], dim=1) + grad_mlp_last = grad_combine.reshape(ctx.num_experts, -1, ctx.hidden_size) + elif ctx.ampipe_tp_sp_comm_overlap and cur_degree == 0: + a2a_events[-1].wait() + grad_combine = torch.cat(grad_mlp_last, dim=1) + grad_mlp_last = grad_combine.reshape(ctx.num_experts, -1, ctx.hidden_size) + + if not ctx.ampipe_tp_sp_comm_overlap: + a2a_events[cur_degree].wait() + return grad_mlp_last + + @staticmethod + def _init_fwd_comm_list(): + global_args = get_args() + pipe_degree = global_args.ampipe_degree + num_local_experts = global_args.num_experts // global_args.expert_model_parallel_size + pipe_experts_multi_data = global_args.pipe_experts_multi_data + pipe_experts_multi_stream = global_args.pipe_experts_multi_stream + a2a_inputs = [] + ag_events = [] + + if not global_args.ampipe_tp_sp_comm_overlap: + mlp_inputs = [None] * pipe_degree + a2a_events = [] + elif not global_args.use_pipe_experts or pipe_experts_multi_data <= pipe_degree: + mlp_inputs = [None] * (pipe_degree * num_local_experts) + a2a_events = [None] * (pipe_degree * num_local_experts) + else: + mlp_inputs = [None] * (pipe_experts_multi_data * num_local_experts) + a2a_events = [None] * (pipe_experts_multi_data * num_local_experts) + + if pipe_experts_multi_stream: + ag_events = [None] * (pipe_experts_multi_data * num_local_experts) + get_async_comm_utils_data_instance().fw_ag_output = [None] * (pipe_experts_multi_data * num_local_experts) + CommList = namedtuple("CommList", ["mlp_inputs", "a2a_inputs", "a2a_events", "ag_events"]) + comm_list = CommList(mlp_inputs, a2a_inputs, a2a_events, ag_events) + return comm_list + + @staticmethod + def _init_bwd_comm_list(ctx): + if not ctx.ampipe_tp_sp_comm_overlap: + grad_mlp_input_list = [None] * ctx.pipe_degree + grad_a2a_input_list = [None] * ctx.pipe_degree + a2a_events = [] + elif not ctx.pipe_experts or ctx.pipe_experts_multi_data <= ctx.pipe_degree: + grad_mlp_input_list = [None] * (ctx.pipe_degree * ctx.num_local_experts) + grad_a2a_input_list = [None] * (ctx.pipe_degree * ctx.num_local_experts) + a2a_events = [None] * (ctx.pipe_degree * ctx.num_local_experts) + else: + grad_mlp_input_list = [None] * (ctx.pipe_experts_multi_data * ctx.num_local_experts) + grad_a2a_input_list = [None] * (ctx.pipe_experts_multi_data * ctx.num_local_experts) + a2a_events = [None] * (ctx.pipe_experts_multi_data * ctx.num_local_experts) + + ag_events = [] + if ctx.pipe_experts_multi_stream: + ag_events = [None] * (ctx.pipe_experts_multi_data * ctx.num_local_experts) + CommList = namedtuple("CommList", ["mlp_inputs", "a2a_inputs", "a2a_events", "ag_events"]) + comm_list = CommList(grad_mlp_input_list, grad_a2a_input_list, a2a_events, ag_events) + return comm_list diff --git a/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_args.py b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_args.py new file mode 100644 index 000000000..883c476a3 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_args.py @@ -0,0 +1,150 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass, field +from typing import Union, Callable, Optional, List + +import torch +from torch import Tensor + +from megatron.core import tensor_parallel +from megatron.legacy.model import LayerNorm, RMSNorm + + +@dataclass +class ForwardArgs: + attention_dense: tensor_parallel.RowParallelLinear + bias_dropout_add_func: Callable + post_attention_norm: Union[LayerNorm, RMSNorm] + moe: torch.nn.Module + hidden_dropout: float + + +@dataclass +class FlashAttentionFwdArgs: + flash_tensor_list: List[Tensor] + attention_dense: tensor_parallel.RowParallelLinear + head_num: int + softmax_scale: float + chunk_len: int + q_token_start_idx: int = 0 + sparse_mode: int = 0 + cur_degree: int = 0 + kv_list: List[Tensor] = field(default_factory=list) + o_max_sum_list: List[Tensor] = field(default_factory=list) + + +@dataclass +class FACpFwdArgs: + q: Tensor + k: Tensor + v: Tensor + + +@dataclass +class FlashAttentionSaveForBwdArgs: + n: int = 0 + rank: int = 0 + keep_prob: float = 0.0 + cp_size: int = 0 + prev_rank: int = 0 + next_rank: int = 0 + softmax_scale: float = 0.0 + next_tokens: int = 0 + cp_group: torch.distributed.ProcessGroup = None + cp_group_for_send_recv_overlap: torch.distributed.ProcessGroup = None + rng_states_qa_kva: List = field(default_factory=list) + rng_states_qb_kva: List = field(default_factory=list) + rng_states_qb_kvb: List = field(default_factory=list) + + +@dataclass +class FlashAttentionBwdArgs: + grad_q: List + grad_k: Optional[Tensor] + grad_v: Optional[Tensor] + flash_tensor_list: List[Tensor] + dense_tensor_list: List[Tensor] + attn_out_all: Tensor = None + k: Tensor = None + v: Tensor = None + cur_degree: int = 0 + flash_tensor_list_len: int = 0 + dense_tensor_list_len: int = 0 + kv_list: List[Tensor] = field(default_factory=list) + dkv_list: List[Tensor] = field(default_factory=list) + dout_list: List[Tensor] = field(default_factory=list) + + +@dataclass +class BiasDropoutAddNormArgs: + bias_dropout_add_func: Callable + post_attention_norm: Union[LayerNorm, RMSNorm] + residual: Tensor + bias: Optional[Tensor] + prob: float + + +@dataclass +class FwdCommArgs: + cur_degree: int + mlp_inputs: List[Tensor] + a2a_inputs: List[Tensor] + a2a_events: List + ag_events: List + + +@dataclass +class BwdCommArgs: + cur_degree: int + grad_mlp_input_list: List[Tensor] + grad_a2a_input_list: List[Tensor] + a2a_events: List + ag_events: List + + +@dataclass +class MLPFwdArgs: + a2a_events: List = field(default_factory=list) + ag_events: List = field(default_factory=list) + + +@dataclass +class MLPSaveForBwdArgs: + ampipe_degree: int = 0 + num_local_experts: int = 0 + ep_size: int = 0 + hidden_size: int = 0 + sequence_parallel: bool = False + multi_data: int = 0 + multi_stream: bool = False + input_list_before_expert: List[Tensor] = field(default_factory=list) + + +@dataclass +class MLPBwdArgs: + sequence_parallel: bool + mlp_tensor_list_len: int + a2a_events: List + ag_events: List + mlp_tensor_list: List[Tensor] + second_a2a_events: List = field(default_factory=list) + + +@dataclass +class PostMLPArgs: + seqlen: int = 0 + a2a_events: List = field(default_factory=list) + moe_output_list: List[Tensor] = field(default_factory=list) + weights_list: List[Tensor] = field(default_factory=list) + token_ec_idx_list: List[Tensor] = field(default_factory=list) diff --git a/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_async_communication.py b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_async_communication.py new file mode 100644 index 000000000..d29492f00 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_async_communication.py @@ -0,0 +1,172 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from megatron.training import get_args +from mindspeed.moe.async_comm_utils import async_fw_all_gather, async_all_to_all, async_all_gather + + +class AsyncCommunication: + def __init__(self, fwd_args, bwd_args=None): + self.bwd_args = bwd_args + self.fwd_args = fwd_args + + def comm_before_moe_mlp_fwd(self, ctx, dispatched_input): + cur_degree = self.fwd_args.cur_degree + a2a_events = self.fwd_args.a2a_events + mlp_inputs = self.fwd_args.mlp_inputs + a2a_inputs = self.fwd_args.a2a_inputs + args = get_args() + pipe_experts = args.use_pipe_experts + num_experts = args.num_experts + num_local_experts = num_experts // args.expert_model_parallel_size + + # 不开启ampipe_tp_sp_comm_overlap时,不切分专家维度,直接做全量专家的all2all + if not args.ampipe_tp_sp_comm_overlap: + a2a_tokens, a2a_handle = async_all_to_all(dispatched_input) + a2a_events.append(a2a_handle) + mlp_inputs[cur_degree] = a2a_tokens + return mlp_inputs + + # 开启ampipe_tp_sp_comm_overlap时,按照专家切分token后再all2all + chunk_list = dispatched_input.chunk(num_experts) + for exp_index in range(num_local_experts): + chunks = chunk_list[exp_index:num_experts:num_local_experts] + a2a_tokens = torch.cat(chunks) + # pipe-experts适配 + if pipe_experts: + comm_result = self._pipe_expert_comm_before_moe_mlp_fwd(ctx, exp_index, a2a_tokens) + if comm_result is not None: + continue + # 不开启pipe_experts或者pipe_experts_multi_data < ampipe_degree时不再切分token,直接all2all + output, a2a_handle = async_all_to_all(a2a_tokens) + index = cur_degree * num_local_experts + exp_index + mlp_inputs[index] = output + a2a_events[index] = a2a_handle + # 不提前析构通信tensor,保证正常释放通信后tensor内存 + a2a_inputs.append(a2a_tokens) + return mlp_inputs + + def comm_before_moe_mlp_bwd(self, ctx, grad_moe_out_chunk): + cur_degree = self.bwd_args.cur_degree + a2a_events = self.bwd_args.a2a_events + grad_mlp_input_list = self.bwd_args.grad_mlp_input_list + grad_a2a_input_list = self.bwd_args.grad_a2a_input_list + # 反向第一次all2all + # 纯ep通信隐藏 + if not ctx.ampipe_tp_sp_comm_overlap: + grad_mlp_input_list[cur_degree], a2a_handle = async_all_to_all(grad_moe_out_chunk) + a2a_events.insert(0, a2a_handle) + return grad_mlp_input_list + + # tp-sp域&ep域通信隐藏适配 + chunk_list = grad_moe_out_chunk.chunk(ctx.num_experts) + for exp_index in range(ctx.num_local_experts): + chunks = chunk_list[exp_index:ctx.num_experts:ctx.num_local_experts] + grad_mlp_tokens = torch.cat(chunks) + # pipe-experts适配 + if ctx.pipe_experts: + comm_result = self._pipe_expert_comm_before_moe_mlp_bwd(ctx, exp_index, grad_mlp_tokens) + if comm_result is not None: + continue + # 不开启pipe_experts或者pipe_experts_multi_data < ampipe_degree时不再切分token,直接all2all + grad_a2a_tokens, a2a_handle = async_all_to_all(grad_mlp_tokens) + index = (ctx.pipe_degree - 1 - cur_degree) * ctx.num_local_experts + exp_index + grad_mlp_input_list[index] = grad_a2a_tokens + a2a_events[index] = a2a_handle + # 不提前析构通信tensor,保证正常释放通信后tensor内存 + grad_a2a_input_list[index] = grad_mlp_tokens + return grad_mlp_input_list + + def _pipe_expert_comm_before_moe_mlp_fwd(self, ctx, exp_index, input_tokens): + cur_degree = self.fwd_args.cur_degree + a2a_events = self.fwd_args.a2a_events + mlp_inputs = self.fwd_args.mlp_inputs + a2a_inputs = self.fwd_args.a2a_inputs + ag_events = self.fwd_args.ag_events + args = get_args() + pipe_degree = args.ampipe_degree + pipe_experts_multi_data = args.pipe_experts_multi_data + pipe_experts_multi_stream = args.pipe_experts_multi_stream + # pipe_experts_multi_data > ampipe_degree时, 对token的C维度再切分 + ctx.slice_size = slice_size = pipe_experts_multi_data // pipe_degree + a2a_token_chunk = input_tokens.chunk(slice_size, dim=1) + # 多流场景下pipe_experts_multi_data必须大于等于ampipe_degree + if pipe_experts_multi_data >= pipe_degree and pipe_experts_multi_stream: + for i in range(slice_size): + # 计算列表中索引适配pipe_experts + index = cur_degree * slice_size + exp_index * pipe_experts_multi_data + i + if (cur_degree + exp_index + i) == 0 and args.sequence_parallel: + a2a_token, a2a_handle = async_all_to_all(a2a_token_chunk[i]) + else: + a2a_token, a2a_handle = async_all_to_all(a2a_token_chunk[i], ag_events[index]) + a2a_events[index] = a2a_handle + mlp_inputs[index] = a2a_token + if args.sequence_parallel: + ag_token, ag_handle = async_fw_all_gather(a2a_token, a2a_handle, ampipe_with_mlp_multistream=True, + index=index) + ag_events[index] = ag_handle + mlp_inputs[index] = ag_token + return mlp_inputs + # 非多流场景下pipe_experts_multi_data必须大于ampipe_degree + elif pipe_experts_multi_data > pipe_degree and not pipe_experts_multi_stream: + for i in range(slice_size): + a2a_token, a2a_handle = async_all_to_all(a2a_token_chunk[i]) + index = cur_degree * slice_size + exp_index * pipe_experts_multi_data + i + a2a_events[index] = a2a_handle + mlp_inputs[index] = a2a_token + a2a_inputs.append(a2a_token_chunk[i]) + return mlp_inputs + return None + + def _pipe_expert_comm_before_moe_mlp_bwd(self, ctx, exp_index, grad_tokens): + cur_degree = self.bwd_args.cur_degree + a2a_events = self.bwd_args.a2a_events + grad_mlp_input_list = self.bwd_args.grad_mlp_input_list + ag_events = self.bwd_args.ag_events + args = get_args() + pipe_degree = args.ampipe_degree + grad_token_list = grad_tokens.chunk(ctx.slice_size, dim=1) + # 多流场景下pipe_experts_multi_data必须大于等于ampipe_degree + if ctx.pipe_experts_multi_data >= pipe_degree and ctx.pipe_experts_multi_stream: + for i in range(ctx.slice_size): + # 计算列表中索引适配pipe_experts + index = (pipe_degree - 1 - cur_degree) * ctx.slice_size + exp_index * ctx.pipe_experts_multi_data + i + if cur_degree == pipe_degree - 1 and (exp_index + i) == 0 and args.sequence_parallel: + a2a_token, a2a_handle = async_all_to_all(grad_token_list[i]) + else: + a2a_token, a2a_handle = async_all_to_all(grad_token_list[i], ag_events[index]) + a2a_events[index] = a2a_handle + grad_mlp_input_list[index] = a2a_token + if args.sequence_parallel: + ag_token, ag_handle = async_all_gather(a2a_token, a2a_handle, is_bwd=True) + ag_events[index] = ag_handle + grad_mlp_input_list[index] = ag_token + return grad_mlp_input_list + # 非多流场景下pipe_experts_multi_data必须大于ampipe_degree + elif ctx.pipe_experts_multi_data > pipe_degree and not ctx.pipe_experts_multi_stream: + for i in range(ctx.slice_size): + a2a_token, a2a_handle = async_all_to_all(grad_token_list[i]) + index = (pipe_degree - 1 - cur_degree) * ctx.slice_size + exp_index * ctx.pipe_experts_multi_data + i + a2a_events[index] = a2a_handle + grad_mlp_input_list[index] = a2a_token + return grad_mlp_input_list + return None + + def fw_all_gather_not_multistream(self): + self.fwd_args.a2a_events[0].wait() + # 释放通信内存 + self.fwd_args.a2a_inputs.pop() + _, ag_handle = async_fw_all_gather(self.fwd_args.mlp_inputs[0]) + self.fwd_args.ag_events.append(ag_handle) diff --git a/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_bias_dropout_add_ln_computer.py b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_bias_dropout_add_ln_computer.py new file mode 100644 index 000000000..369628456 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_bias_dropout_add_ln_computer.py @@ -0,0 +1,57 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + + +class BiasDropoutAddNormComputer: + def __init__(self, bdal_tensor_list, fwd_args): + super().__init__() + self.bdal_tensor_list = bdal_tensor_list + self.fwd_args = fwd_args + + def forward(self, ctx, input_tensor): + residual = self.fwd_args.residual + bias = self.fwd_args.bias + prob = self.fwd_args.prob + + input_tensor = input_tensor.detach() + residual = residual.detach() + input_tensor.requires_grad = True + residual.requires_grad = True + ctx.bias = False + if isinstance(bias, torch.Tensor): + bias = bias.detach() + bias.requires_grad = True + self.bdal_tensor_list.append(bias) + ctx.bias = True + + with torch.enable_grad(): + ln_input = self.fwd_args.bias_dropout_add_func(input_tensor, bias, residual, prob) + detach_ln_input = ln_input.detach() + detach_ln_input.requires_grad = True + output = self.fwd_args.post_attention_norm(detach_ln_input) + self.bdal_tensor_list.extend([ln_input, detach_ln_input, input_tensor, residual]) + return output, ln_input + + def backward(self, ctx, saved_tensor_list, grad_ln_outs, grad_ln_ins): + if ctx.bias: + bias = saved_tensor_list.pop(0) + ln_input, detach_ln_input, input_tensor, residual, output = saved_tensor_list + output.backward(grad_ln_outs) + grad_ln = detach_ln_input.grad + ln_input.backward(grad_ln + grad_ln_ins) + input_grad = input_tensor.grad + residual_grad = residual.grad + bias_grad = bias.grad if ctx.bias else None + return input_grad, residual_grad, bias_grad diff --git a/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_fa.py b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_fa.py new file mode 100644 index 000000000..bae3cb275 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_fa.py @@ -0,0 +1,55 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch_npu +from megatron.training import get_args + + +def flash_attn_forward(qkvn, attn_mask=None, softmax_scale=1.0, sparse_mode=0, next_tokens=0): + """FlashAttention forward""" + args = get_args() + q, k, v, n = qkvn + output = torch_npu.npu_fusion_attention( + q, k, v, n, "SBH", + pse=None, + padding_mask=None, + atten_mask=attn_mask, + scale=softmax_scale, + pre_tockens=args.pre_tockens, + next_tockens=next_tokens, + keep_prob=1.0 - args.attention_dropout, + inner_precise=0, + sparse_mode=sparse_mode + ) + return output + + +def flash_attn_backward(qkvn, dy, softmax_max, softmax_sum, attn_out, + attn_mask=None, softmax_scale=1.0, sparse_mode=0, next_tokens=0): + """FlashAttention backward""" + q, k, v, n = qkvn + output = torch_npu.npu_fusion_attention_grad( + q, k, v, dy, n, + "SBH", + pse=None, + padding_mask=None, + atten_mask=attn_mask, + softmax_max=softmax_max, + softmax_sum=softmax_sum, + attention_in=attn_out, + scale_value=softmax_scale, + pre_tockens=k.shape[0], + next_tockens=next_tokens, + sparse_mode=sparse_mode + ) + return output diff --git a/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_fa_computer.py b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_fa_computer.py new file mode 100644 index 000000000..47846823d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_fa_computer.py @@ -0,0 +1,132 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from megatron.training import get_args +from mindspeed.core.context_parallel.ring_context_parallel_for_ampipe import (attn_with_cp_for_ampipe_forward, + attn_with_cp_for_ampipe_backward) +from mindspeed.moe.ampipe.ampipe_args import FlashAttentionSaveForBwdArgs, FACpFwdArgs +from mindspeed.moe.ampipe.ampipe_fa import flash_attn_forward, flash_attn_backward + + +class FlashAttentionComputer: + def __init__(self, fa_fwd_args, fa_bwd_args=None): + super().__init__() + self.fa_bwd_args = fa_bwd_args + self.fa_fwd_args = fa_fwd_args + self.context_parallel = get_args().context_parallel_size > 1 + + def forward(self, ctx, q, k, v, attention_mask): + global_args = get_args() + flash_tensor_list = self.fa_fwd_args.flash_tensor_list + cur_degree = self.fa_fwd_args.cur_degree + + if self.context_parallel: + if cur_degree == 0: + flash_args_save_for_bwd = FlashAttentionSaveForBwdArgs() + ctx.flash_args.append(flash_args_save_for_bwd) + fa_cp_fwd_args = FACpFwdArgs(q, k, v) + cur_attn_out = attn_with_cp_for_ampipe_forward(ctx.flash_args[0], + fa_cp_fwd_args=fa_cp_fwd_args, + fa_fwd_args=self.fa_fwd_args) + else: + flash_args_save_for_bwd = FlashAttentionSaveForBwdArgs() + q_token_start_idx = self.fa_fwd_args.q_token_start_idx + q_token_end_idx = q_token_start_idx + self.fa_fwd_args.chunk_len + next_tokens = q_token_start_idx + q_use = q[q_token_start_idx:q_token_end_idx] + cur_attn_mask = attention_mask[q_token_start_idx:q_token_end_idx] + output_chunk = flash_attn_forward((q_use, k, v, self.fa_fwd_args.head_num), + attn_mask=cur_attn_mask, + softmax_scale=self.fa_fwd_args.softmax_scale, + sparse_mode=self.fa_fwd_args.sparse_mode, + next_tokens=next_tokens) + cur_attn_out, cur_softmax_max, cur_softmax_sum = output_chunk[0], output_chunk[1], output_chunk[2] + flash_tensor_list.extend([q_use, cur_attn_mask, cur_softmax_max, cur_softmax_sum]) + flash_args_save_for_bwd.next_tokens = next_tokens + ctx.flash_args.append(flash_args_save_for_bwd) + # 内存优化 + self._optimize_attn_memory(k, v) + # 提前做一次mlp的allgather + should_do_allgather_in_attention = ( + cur_degree == global_args.ampipe_degree - 1 + and global_args.sequence_parallel + and global_args.ampipe_tp_sp_comm_overlap + and not global_args.pipe_experts_multi_stream + ) + if should_do_allgather_in_attention: + ctx.async_comm.fw_all_gather_not_multistream() + # attention后的matmul (RowParallelLinear) + detach_attn_out = cur_attn_out.detach() + detach_attn_out.requires_grad = True + with torch.enable_grad(): + attn_dense_out, attn_bias = self.fa_fwd_args.attention_dense(detach_attn_out) + return detach_attn_out, attn_dense_out, attn_bias + + def backward(self, ctx, grad_output): + # attention dense 反向 + c = self.fa_bwd_args.cur_degree + dense_list_slice_len = self.fa_bwd_args.dense_tensor_list_len // ctx.pipe_degree + cur_attn_out, attn_dense_out = self.fa_bwd_args.dense_tensor_list[ + c * dense_list_slice_len:(c + 1) * dense_list_slice_len + ] + if self.context_parallel and c == ctx.pipe_degree - 1: + next_attn_out = self.fa_bwd_args.dense_tensor_list[0] + attn_out_all = torch.cat((next_attn_out.unsqueeze(0), cur_attn_out.unsqueeze(0)), dim=0) + self.fa_bwd_args.attn_out_all = attn_out_all + attn_dense_out.backward(grad_output) + grad_flash = cur_attn_out.grad + del self.fa_bwd_args.dense_tensor_list[c * dense_list_slice_len:(c + 1) * dense_list_slice_len] + + # FA反向 + flash_tensor_list = self.fa_bwd_args.flash_tensor_list + if self.context_parallel: + self.fa_bwd_args.cur_degree = ctx.pipe_degree - 1 - c + grad_attention = attn_with_cp_for_ampipe_backward( + ctx.flash_args[0], self.fa_bwd_args.attn_out_all, flash_tensor_list, grad_flash, + self.fa_bwd_args + ) + grad_q, grad_k, grad_v = grad_attention[0], grad_attention[1], grad_attention[2] + else: + grad_q, grad_k, grad_v = self.fa_bwd_args.grad_q, self.fa_bwd_args.grad_k, self.fa_bwd_args.grad_v + fa_list_slice_len = (self.fa_bwd_args.flash_tensor_list_len - 2) // ctx.pipe_degree + q, cur_attn_mask, cur_softmax_max, cur_softmax_sum = flash_tensor_list[ + c * fa_list_slice_len:(c + 1) * fa_list_slice_len + ] + softmax_scale = self.fa_fwd_args.softmax_scale + grad_attention = flash_attn_backward( + (q, self.fa_bwd_args.k, self.fa_bwd_args.v, ctx.head), grad_flash, + cur_softmax_max, cur_softmax_sum, cur_attn_out, cur_attn_mask, softmax_scale, + next_tokens=ctx.flash_args[c].next_tokens + ) + d_q, d_k, d_v = grad_attention[0], grad_attention[1], grad_attention[2] + grad_k = grad_k + d_k if grad_k is not None else d_k + grad_v = grad_v + d_v if grad_v is not None else d_v + grad_q.insert(0, d_q) + self.fa_bwd_args.grad_q, self.fa_bwd_args.grad_k, self.fa_bwd_args.grad_v = grad_q, grad_k, grad_v + return grad_q, grad_k, grad_v + + def _optimize_attn_memory(self, k, v): + if self.fa_fwd_args.cur_degree == get_args().ampipe_degree - 1: + if self.context_parallel: + for i, kv in enumerate(self.fa_fwd_args.kv_list): + if i < len(self.fa_fwd_args.kv_list) - 1: + kv.untyped_storage().resize_(0) + k.untyped_storage().resize_(0) + v.untyped_storage().resize_(0) + self.fa_fwd_args.kv_list.clear() + self.fa_fwd_args.o_max_sum_list.clear() + else: + self.fa_fwd_args.flash_tensor_list.append(k) + self.fa_fwd_args.flash_tensor_list.append(v) diff --git a/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_moe_gating_computer.py b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_moe_gating_computer.py new file mode 100644 index 000000000..a3f958613 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_moe_gating_computer.py @@ -0,0 +1,63 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) Microsoft Corporation. +# +# This source code is licensed under the Apache license found in the +# LICENSE file in the root directory of this source tree. + +# copied from https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/sharded_moe.py +# reworked/refactored some parts to make it run. +from collections import namedtuple + +import torch + +from megatron.training import get_args +from mindspeed.moe.utils import einsum + + +class MoEGatingComputer: + def __init__(self, moe, gate_tensor_list): + super().__init__() + self.gate_tensor_list = gate_tensor_list + self.moe = moe + + def forward(self, logits): + detach_logits = logits.detach() + detach_logits.requires_grad = True + + d_model = logits.shape[-1] + with torch.enable_grad(): + reshaped_input = detach_logits.reshape(-1, d_model) + global_args = get_args() + if not global_args.enable_token_rearrange_opt: + l_aux, combine_weights, dispatch_mask = self.moe.moe_layer.gate(reshaped_input) + dispatch_mask = dispatch_mask.type_as(logits) + dispatched_input = einsum("sec,sm->ecm", dispatch_mask, reshaped_input) + self.gate_tensor_list.append(detach_logits) + return dispatched_input, l_aux, combine_weights + else: + l_aux, (token_ec_idx, token_weights, expert_select_token_idx) = self.moe.moe_layer.gate(reshaped_input) + org_dtype = reshaped_input.dtype + if org_dtype == torch.bfloat16: # 规避算子性能劣化问题, 解决后可删除 + rearranged_input = torch.index_select( + reshaped_input.to(torch.float32), dim=0, index=expert_select_token_idx + ).to(org_dtype) + else: + rearranged_input = torch.index_select( + reshaped_input, dim=0, index=expert_select_token_idx + ) + capacity = expert_select_token_idx.size(0) // self.moe.num_experts + dispatched_input = rearranged_input.reshape(self.moe.num_experts, capacity, d_model).contiguous() + self.gate_tensor_list.append(detach_logits) + GatingComputerRet = namedtuple('GatingComputerRet', + ['dispatched_input', 'l_aux', + 'token_ec_idx', 'token_weights']) + gating_computer_ret = GatingComputerRet(dispatched_input=dispatched_input, l_aux=l_aux, + token_ec_idx=token_ec_idx, token_weights=token_weights) + return gating_computer_ret + + def backward(self, saved_tensor_list, grad_output): + logits, dispatched_input = saved_tensor_list + dispatched_input.backward(grad_output) + grad_logits = logits.grad + logits.untyped_storage().resize_(0) + return grad_logits diff --git a/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_moe_mlp_computer.py b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_moe_mlp_computer.py new file mode 100644 index 000000000..e4ed2edfa --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_moe_mlp_computer.py @@ -0,0 +1,229 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from megatron.training import get_args +from mindspeed.moe.ampipe.ampipe_args import MLPSaveForBwdArgs +from mindspeed.moe.async_comm_utils import async_all_to_all, async_all_gather, async_fw_all_gather, \ + async_fw_all_reduce_scatter_ampipe, get_fw_ar_rs_output_ampipe +from mindspeed.moe.pipe_experts import PipeExpert + + +class MoEMLPComputer: + def __init__(self, moe, save_tensor_list, mlp_fwd_args, mlp_bwd_args=None): + super().__init__() + self.mlp_bwd_args = mlp_bwd_args + self.mlp_fwd_args = mlp_fwd_args + self.save_tensor_list = save_tensor_list + self.moe = moe + + def forward(self, ctx, mlp_inputs, a2a_inputs): + global_args = get_args() + mlp_save_for_bwd_args = MLPSaveForBwdArgs() + a2a_events = self.mlp_fwd_args.a2a_events + ag_events = self.mlp_fwd_args.ag_events + pipe_degree = global_args.ampipe_degree + sequence_parallel = global_args.sequence_parallel + num_local_experts = global_args.num_experts // global_args.expert_model_parallel_size + ep_size = global_args.expert_model_parallel_size + hidden_size = global_args.hidden_size + pipe_experts = global_args.use_pipe_experts + multi_data = global_args.pipe_experts_multi_data + multi_stream = global_args.pipe_experts_multi_stream + + ctx.use_ampipe_with_pipe_expert = (pipe_experts and + (multi_data >= pipe_degree and multi_stream) + or (multi_data > pipe_degree and not multi_stream)) + if ctx.use_ampipe_with_pipe_expert: + second_a2a_event = [] + pipe_expert_args = [mlp_inputs, ep_size, num_local_experts, sequence_parallel, multi_data, multi_stream, + a2a_events, second_a2a_event, ag_events, hidden_size, self.save_tensor_list] + mlp_outputs = PipeExpert.forward(mlp_save_for_bwd_args, self.moe.moe_layer.experts, *pipe_expert_args) + ctx.mlp_args = mlp_save_for_bwd_args + elif global_args.ampipe_tp_sp_comm_overlap: + mlp_outputs = self.ampipe_experts_forward(mlp_save_for_bwd_args, mlp_inputs, a2a_inputs) + ctx.mlp_args = mlp_save_for_bwd_args + else: + mlp_outputs = [] + for c in range(pipe_degree): + a2a_events.pop(0).wait() + expert_input = mlp_inputs[c].reshape(ep_size, num_local_experts, -1, hidden_size) + detach_expert_input = expert_input.detach() + detach_expert_input.requires_grad = True + with torch.enable_grad(): + expert_output = self.moe.moe_layer.experts(detach_expert_input) + self.save_tensor_list.extend([detach_expert_input, expert_output]) + mlp_inputs[c] = expert_output + a2a_tokens, a2a_handle = async_all_to_all(expert_output) + a2a_events.append(a2a_handle) + mlp_outputs.append(a2a_tokens) + return mlp_outputs + + def backward(self, ctx, grad_mlp_input_list, grad_a2a_input_list): + a2a_events = self.mlp_bwd_args.a2a_events + ag_events = self.mlp_bwd_args.ag_events + mlp_tensor_list = self.mlp_bwd_args.mlp_tensor_list + mlp_bwd_grads = [] + multi_stream = ctx.pipe_experts_multi_stream + # 适配pipe-experts + if ctx.use_ampipe_with_pipe_expert: + if self.mlp_bwd_args.sequence_parallel and not multi_stream: + a2a_events[0].wait() + grad_a2a_input_list.pop(0) + grad_mlp_input_list[0], ag_handle = async_all_gather(grad_mlp_input_list[0], is_bwd=True) + ag_events.append(ag_handle) + mlp_bwd_grads = PipeExpert.backward(ctx.mlp_args, grad_mlp_input_list, a2a_events, ag_events, + self.mlp_bwd_args.second_a2a_events, mlp_tensor_list) + # mlp反向tp-sp&ep通信隐藏流水实现 + elif ctx.ampipe_tp_sp_comm_overlap: + if self.mlp_bwd_args.sequence_parallel: + a2a_events[0].wait() + grad_a2a_input_list.pop(0) + grad_mlp_input_list[0], ag_handle = async_all_gather(grad_mlp_input_list[0], is_bwd=True) + ag_events.append(ag_handle) + mlp_bwd_grads = self.ampipe_experts_backward(ctx.mlp_args, mlp_tensor_list, grad_mlp_input_list, + grad_a2a_input_list, a2a_events, ag_events) + # mlp反向纯ep通信隐藏流水实现 + else: + mlp_list_slice_len = self.mlp_bwd_args.mlp_tensor_list_len // ctx.pipe_degree + for c in range(ctx.pipe_degree - 1, -1, -1): + a2a_events.pop().wait() + expert_input, expert_output = mlp_tensor_list[c * mlp_list_slice_len:(c + 1) * mlp_list_slice_len] + expert_output.backward(grad_mlp_input_list[c]) + grad_mlp_input = expert_input.grad.reshape(self.moe.num_experts, -1, self.moe.hidden_size) + a2a_grad_mlp_input, a2a_handle = async_all_to_all(grad_mlp_input) + mlp_bwd_grads.insert(0, a2a_grad_mlp_input) + a2a_events.insert(0, a2a_handle) + mlp_tensor_list.clear() + return mlp_bwd_grads + + def ampipe_experts_forward(self, ctx, inputs, a2a_inputs): + ctx.ampipe_degree = pipe_degree = get_args().ampipe_degree + ctx.ep_size = ep_size = get_args().expert_model_parallel_size + ctx.num_local_experts = num_local_experts = get_args().num_experts // ep_size + ctx.hidden_size = hidden_size = get_args().hidden_size + ctx.sequence_parallel = sequence_parallel = get_args().sequence_parallel + ag_events = self.mlp_fwd_args.ag_events + a2a_events = self.mlp_fwd_args.a2a_events + + output_list = [] + before_exp_input_list = [] + after_exp_out_list = [] + + for c in range(pipe_degree): + for i in range(num_local_experts): + cur_index = c * num_local_experts + i + # pre expert process + if sequence_parallel: + ag_events[cur_index].wait() + if cur_index < num_local_experts * pipe_degree - 1: + a2a_events[cur_index + 1].wait() + a2a_inputs.pop() + _, ag_handle = async_fw_all_gather(inputs[cur_index + 1], + is_use_global_memory_buffer=False) + ag_events.append(ag_handle) + else: + a2a_events[cur_index].wait() + a2a_inputs.pop() + # expert compute + detach_input_chunk = inputs[cur_index].detach() + detach_input_chunk.requires_grad = True + before_exp_input_list.append(detach_input_chunk) + with torch.enable_grad(): + out = self.moe.moe_layer.experts.experts[i](detach_input_chunk) + if isinstance(out, tuple): + if cur_index > 0: + out, last_chunk_out = out[0], out[-1] + else: + out = out[0] # Ignore the bias term for now + + # post expert comm + async_fw_all_reduce_scatter_ampipe(out, sequence_parallel) + after_exp_out_list.append(out) + if cur_index > 0: + after_exp_out_list[cur_index - 1].untyped_storage().resize_(0) + output_list.append(last_chunk_out) + if cur_index == pipe_degree * num_local_experts - 1: + ar_rs_out = get_fw_ar_rs_output_ampipe(sequence_parallel) + a2a_out, a2a2_handle = async_all_to_all(ar_rs_out) + a2a2_handle.wait() + output_list.append(a2a_out) + + for t in after_exp_out_list: + t.untyped_storage().resize_(0) + self.save_tensor_list.extend(before_exp_input_list) + self.save_tensor_list.extend(after_exp_out_list) + outputs = [] + for c in range(pipe_degree): + cur_pipe_out_list = output_list[c * num_local_experts:(c + 1) * num_local_experts] + cur_pipe_out = torch.cat(cur_pipe_out_list, dim=1) + cur_pipe_out = cur_pipe_out.reshape((num_local_experts * ep_size), -1, hidden_size) + outputs.append(cur_pipe_out) + return outputs + + def ampipe_experts_backward(self, ctx, saved_tensor_list, *args): + pipe_degree = ctx.ampipe_degree + num_local_experts = ctx.num_local_experts + ep_size = ctx.ep_size + hidden_size = ctx.hidden_size + sequence_parallel = ctx.sequence_parallel + + before_exp_input_list = saved_tensor_list[:num_local_experts * pipe_degree] + after_exp_out_list = saved_tensor_list[num_local_experts * pipe_degree:] + grad_output_list, grad_a2a_input_list, a2a_event, ag_events = args + grad_a2a2_input_list = [] + output_list = [] + + for c in range(pipe_degree - 1, -1, -1): + for i in range(num_local_experts): + reversed_index = c * num_local_experts + i + normal_index = (pipe_degree - c - 1) * num_local_experts + i + # pre expert process + if sequence_parallel: + ag_events[normal_index].wait() + if normal_index < num_local_experts * pipe_degree - 1: + a2a_event[normal_index + 1].wait() + grad_a2a_input_list.pop(0) + grad_output = grad_output_list[normal_index + 1] + ag_grad_output, ag_handle = async_all_gather(grad_output, is_bwd=True) + grad_output_list[normal_index + 1] = ag_grad_output + ag_events.append(ag_handle) + else: + a2a_event[normal_index].wait() + grad_a2a_input_list.pop(0) + # expert backward compute + mlp_grad_output = grad_output_list[normal_index] + after_exp_out_list[reversed_index].backward(mlp_grad_output) + grad_input = before_exp_input_list[reversed_index].grad + mlp_grad_output.untyped_storage().resize_(0) + before_exp_input_list[reversed_index].untyped_storage().resize_(0) + # post expert process + a2a_grad_input, a2a1_handle = async_all_to_all(grad_input) + output_list.append(a2a_grad_input) + grad_a2a2_input_list.append(grad_input) + if normal_index > 0: + a2a_event[-1].wait() + grad_a2a2_input_list.pop(0) + a2a_event.append(a2a1_handle) + + outputs = [] + for c in range(pipe_degree): + cur_pipe_out_list = output_list[c * num_local_experts:(c + 1) * num_local_experts] + if c == pipe_degree - 1: + outputs.insert(0, cur_pipe_out_list) + continue + cur_pipe_out = torch.cat(cur_pipe_out_list, dim=1) + cur_pipe_out = cur_pipe_out.reshape((num_local_experts * ep_size), -1, hidden_size) + outputs.insert(0, cur_pipe_out) + return outputs diff --git a/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_post_mlp_computer.py b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_post_mlp_computer.py new file mode 100644 index 000000000..742be3cfd --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/ampipe/ampipe_post_mlp_computer.py @@ -0,0 +1,63 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) Microsoft Corporation. +# +# This source code is licensed under the Apache license found in the +# LICENSE file in the root directory of this source tree. + +# copied from https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/sharded_moe.py +# reworked/refactored some parts to make it run. +import torch + +from megatron.training import get_args +from mindspeed.moe.utils import einsum + + +class MoEPostMLPComputer: + def __init__(self, save_tensor_list, fwd_args): + super().__init__() + self.fwd_args = fwd_args + self.save_tensor_list = save_tensor_list + + def forward(self, ctx, mlp_outputs): + global_args = get_args() + weights_list = self.fwd_args.weights_list + token_ec_idx_list = self.fwd_args.token_ec_idx_list + moe_output_list = self.fwd_args.moe_output_list + for c in range(global_args.ampipe_degree): + if not global_args.ampipe_tp_sp_comm_overlap: + self.fwd_args.a2a_events[c].wait() + detach_exp_out = mlp_outputs[c].detach() + detach_exp_out.requires_grad = True + with torch.enable_grad(): + reshape_out = detach_exp_out.reshape(ctx.ep_size * ctx.num_local_experts, -1, ctx.hidden_size) + if not global_args.enable_token_rearrange_opt: + combine_weights = weights_list[c].type_as(reshape_out) + combined_output = einsum("sec,ecm->sm", combine_weights.type_as(reshape_out), reshape_out) + else: + token_rearranged_ec_idx, token_exp_weights = token_ec_idx_list[c], weights_list[c] + E, C, M = reshape_out.shape + org_dtype = reshape_out.dtype + if org_dtype == torch.bfloat16: + valid_expert_out = torch.index_select( + reshape_out.view(E * C, M).to(torch.float32), dim=0, index=token_rearranged_ec_idx + ).to(org_dtype) + else: + valid_expert_out = torch.index_select( + reshape_out.view(E * C, M), dim=0, index=token_rearranged_ec_idx + ) + combined_output = valid_expert_out * token_exp_weights.unsqueeze(1).type_as(reshape_out) + if global_args.moe_router_topk == 2: + combined_output = torch.add(*torch.chunk(combined_output, global_args.moe_router_topk, dim=0)) + clone_out = combined_output.clone() + clone_out.untyped_storage().resize_(0) + self.save_tensor_list.extend([detach_exp_out, clone_out]) + moe_out = combined_output.reshape((self.fwd_args.seqlen, -1, ctx.hidden_size)) + moe_output_list.append(moe_out) + return moe_output_list + + def backward(self, saved_tensor_list, grad_moe_out_chunk): + exp_out, combined_output = saved_tensor_list + combined_output.backward(grad_moe_out_chunk) + exp_grad = exp_out.grad + exp_out.untyped_storage().resize_(0) + return exp_grad diff --git a/model/train/yoco_moe/mindspeed/moe/async_comm_utils.py b/model/train/yoco_moe/mindspeed/moe/async_comm_utils.py new file mode 100644 index 000000000..326a6de20 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/async_comm_utils.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (c) Huawei Technologies Co., Ltd. 2022-2024. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import threading + +import torch +import torch_npu +import torch.distributed as dist + +from megatron.core.parallel_state import ( + get_global_memory_buffer, + get_tensor_model_parallel_group, + get_tensor_model_parallel_world_size, + get_expert_model_parallel_group +) +from megatron.training import get_args +from mindspeed.core.weight_grad_store import WeightGradStore + + +class SingletonMeta(type): + _instances = {} + _lock = threading.Lock() + + def __call__(cls, *args, **kwargs): + with cls._lock: + if cls not in cls._instances: + cls._instances[cls] = super().__call__(*args, **kwargs) + return cls._instances[cls] + + +class AsyncCommUtilsDataSingleton(metaclass=SingletonMeta): + def __init__(self): + self.all2all_stream = torch_npu.npu.Stream(device=torch.npu.current_device()) + self.tp_stream = torch_npu.npu.Stream(device=torch.npu.current_device()) + self.fw_rs_output_ampipe = [] + self.fw_rs_event_ampipe = [] + self.fw_ar_output_ampipe = [] + self.fw_ar_event_ampipe = [] + self.fw_ag_output = [] + + +def get_async_comm_utils_data_instance(): + return AsyncCommUtilsDataSingleton() + + +def get_fw_ag_output(): + return get_async_comm_utils_data_instance().fw_ag_output + + +def get_fw_ar_rs_output_ampipe(sequence_parallel): + if sequence_parallel: + output_list = get_async_comm_utils_data_instance().fw_rs_output_ampipe + event_list = get_async_comm_utils_data_instance().fw_rs_event_ampipe + else: + output_list = get_async_comm_utils_data_instance().fw_ar_output_ampipe + event_list = get_async_comm_utils_data_instance().fw_ar_event_ampipe + + if not output_list or not event_list: + return None + + handle = event_list.pop(0) + handle.wait() + return output_list.pop(0) + + +def async_fw_all_reduce_scatter_ampipe(input_, sequence_parallel): + world_size = get_tensor_model_parallel_world_size() + if sequence_parallel: + # reduce scatter + dim_size = list(input_.size()) + dim_size[0] = dim_size[0] // world_size + output = torch.empty(dim_size, dtype=input_.dtype, device=torch.cuda.current_device()) + handle = torch.distributed._reduce_scatter_base( + output, input_.contiguous(), group=get_tensor_model_parallel_group(), async_op=True + ) + get_async_comm_utils_data_instance().fw_rs_output_ampipe.append(output) + get_async_comm_utils_data_instance().fw_rs_event_ampipe.append(handle) + else: + # all reduce + handle = torch.distributed.all_reduce(input_, group=get_tensor_model_parallel_group(), async_op=True) + get_async_comm_utils_data_instance().fw_ar_output_ampipe.append(input_) + get_async_comm_utils_data_instance().fw_ar_event_ampipe.append(handle) + + +def async_all_gather(input_, a2a_event=None, is_use_global_memory_buffer=False, is_bwd=False, is_save_input=False): + world_size = get_tensor_model_parallel_world_size() + dim_size = list(input_.size()) + new_dim_size = dim_size[0] * world_size + dim_size[0] = new_dim_size + if is_bwd: + is_save_input = True + + if is_use_global_memory_buffer: + ag_out = get_global_memory_buffer().get_tensor(dim_size, input_.dtype, "mpu") + else: + ag_out = torch.empty(dim_size, dtype=input_.dtype, device=torch.cuda.current_device()) + input_ = input_.contiguous() + if a2a_event: + # multi stream wait event + if get_async_comm_utils_data_instance().tp_stream is None: + get_async_comm_utils_data_instance().tp_stream = torch_npu.npu.Stream(device=torch.npu.current_device()) + with torch_npu.npu.stream(get_async_comm_utils_data_instance().tp_stream): + a2a_event.wait() + if is_save_input and get_args().use_nanopipe and WeightGradStore.is_decoupleBlock: + WeightGradStore.save_grad_output(input_.clone().detach()) + handle = torch.distributed._all_gather_base( + ag_out, input_, group=get_tensor_model_parallel_group(), async_op=True + ) + else: + handle = torch.distributed._all_gather_base( + ag_out, input_, group=get_tensor_model_parallel_group(), async_op=True + ) + if is_bwd: + return ag_out, handle + return input_, ag_out, handle + + +def async_fw_all_gather(input_, a2a_event=None, is_use_global_memory_buffer=False, + ampipe_with_mlp_multistream=False, index=0): + input_, ag_out, handle = async_all_gather(input_, a2a_event, is_use_global_memory_buffer) + if ampipe_with_mlp_multistream: + get_async_comm_utils_data_instance().fw_ag_output[index] = ag_out + else: + get_async_comm_utils_data_instance().fw_ag_output.append(ag_out) + return input_, handle + + +def async_all_to_all(input_, event=None): + input_ = input_.contiguous() + output = torch.empty_like(input_) + if event: + # multi stream wait event + if get_async_comm_utils_data_instance().all2all_stream is None: + get_async_comm_utils_data_instance().all2all_stream = torch_npu.npu.Stream(device=torch.npu.current_device()) + with torch_npu.npu.stream(get_async_comm_utils_data_instance().all2all_stream): + event.wait() + handle = dist.all_to_all_single(output, input_, group=get_expert_model_parallel_group(), async_op=True) + else: + handle = dist.all_to_all_single(output, input_, group=get_expert_model_parallel_group(), async_op=True) + return output, handle + + +def async_fw_ar_rs(input_, sequence_parallel): + world_size = get_tensor_model_parallel_world_size() + if sequence_parallel: + # reduce scatter + dim_size = list(input_.size()) + dim_size[0] = dim_size[0] // world_size + output = torch.empty(dim_size, dtype=input_.dtype, device=torch.cuda.current_device()) + handle = torch.distributed._reduce_scatter_base( + output, input_.contiguous(), group=get_tensor_model_parallel_group(), async_op=True + ) + return output, handle + else: + # all reduce + handle = torch.distributed.all_reduce(input_, group=get_tensor_model_parallel_group(), async_op=True) + return input_, handle diff --git a/model/train/yoco_moe/mindspeed/moe/config.py b/model/train/yoco_moe/mindspeed/moe/config.py new file mode 100644 index 000000000..0a5429748 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/config.py @@ -0,0 +1,43 @@ +# coding=utf-8 +# Copyright (c) 2024 Huawei Technologies Co., Ltd. All rights reserved. + +import torch + + +class Config: + def __init__(self, + hidden_size, + num_experts=1, + ep_size=1, + topk=1, + capacity_factor=1., + eval_capacity_factor=1., + min_capacity=4, + aux_loss_coef=0.0, + z_loss_coef=0.0, + noisy_gate_policy=None, + no_drop=False, + dynamic_padding=False, + use_sinkhorn=False, + sequence_parallel=False, + reshape_index_select=None, + ): + self.hidden_size = hidden_size + self.num_experts = num_experts + self.ep_size = ep_size + self.topk = topk + self.capacity_factor = capacity_factor + self.eval_capacity_factor = eval_capacity_factor + self.min_capacity = min_capacity + self.aux_loss_coef = aux_loss_coef + self.z_loss_coef = z_loss_coef + self.noisy_gate_policy = noisy_gate_policy + self.no_drop = no_drop + self.dynamic_padding = dynamic_padding + self.use_sinkhorn = use_sinkhorn + self.dynamic_capacity = None + self.sequence_parallel = sequence_parallel + self.reshape_index_select = None + if reshape_index_select: + self.reshape_index_select = torch.tensor(reshape_index_select, dtype=torch.int32, + device=torch.npu.current_device()) diff --git a/model/train/yoco_moe/mindspeed/moe/experts.py b/model/train/yoco_moe/mindspeed/moe/experts.py new file mode 100644 index 000000000..ad61789b0 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/experts.py @@ -0,0 +1,39 @@ +# Copyright (c) Microsoft Corporation. +# +# This source code is licensed under the Apache license found in the +# LICENSE file in the root directory of this source tree. + +# copied from https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/experts.py +# reworked/refactored some parts to make it run. +import copy +import torch + + +class Experts(torch.nn.Module): + + def __init__(self, expert, num_local_experts=1): + super(Experts, self).__init__() + + self.experts = torch.nn.ModuleList([copy.deepcopy(expert) for i in range(num_local_experts)]) + self.num_local_experts = num_local_experts + + # All-reduce moe-expert parameters across data modulo expert parallel nodes + for expert in self.experts: + for name, param in expert.named_parameters(): + param.allreduce = False + + def forward(self, inputs): + chunks = inputs.chunk(self.num_local_experts, dim=1) + expert_outputs = [] + for chunk, expert in zip(chunks, self.experts): + chunk = torch.squeeze(chunk, dim=1).contiguous() + out = expert(chunk) + if type(out) is tuple: + out, bias = out + if bias is not None: + out = out + bias + out = torch.unsqueeze(out, dim=1) + expert_outputs += [out] + + expert_output = torch.cat(expert_outputs, dim=1) + return expert_output diff --git a/model/train/yoco_moe/mindspeed/moe/gate.py b/model/train/yoco_moe/mindspeed/moe/gate.py new file mode 100644 index 000000000..f676917ee --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/gate.py @@ -0,0 +1,306 @@ +# coding=utf-8 +# Copyright (c) Microsoft Corporation. +# +# This source code is licensed under the Apache license found in the +# LICENSE file in the root directory of this source tree. + +# copied from https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/sharded_moe.py +# reworked/refactored some parts to make it run. +from typing import Callable, Dict, Tuple +from collections import namedtuple + +import torch +import torch.nn.functional as F +from torch import Tensor +from torch.nn import Module +import torch.distributed as dist +from megatron.training import get_args +from megatron.core.transformer.moe.moe_utils import sinkhorn + +from .config import Config +from .utils import gumbel_rsample, _capacity, einsum, _one_hot_to_float, MoEAuxLossAutoScaler + +exp_selection_uniform_map: Dict[torch.device, Callable] = {} + + +GatingTokenRearrangeInfo = namedtuple('GatingTokenRearrangeInfo', ['token_rearranged_ec_idx', 'token_exp_weights', 'expert_select_token_idx']) + + +class TopKGate(Module): + """Gate module which implements Top2Gating as described in Gshard_. + :: + + gate = TopKGate(model_dim, num_experts) + l_aux, combine_weights, dispatch_mask = gate(input) + + .. Gshard_: https://arxiv.org/pdf/2006.16668.pdf + + Args: + model_dim (int): + size of model embedding dimension + num_experts (ints): + number of experts in model + """ + + weight: torch.nn.Linear + + def __init__(self, config: Config) -> None: + super().__init__() + + # Only top-1 and top-2 are supported at the moment. + if config.topk != 1 and config.topk != 2: + raise ValueError('Only top-1 and top-2 gatings are supported.') + self.weight = torch.nn.Linear(config.hidden_size, config.num_experts, bias=False).float() + setattr(self.weight, 'sequence_parallel', config.sequence_parallel) + self.config = config + + def forward(self, gate_input: torch.Tensor) -> Tuple[Tensor, ...]: # type: ignore + input_fp32 = gate_input.float() + logits = torch.nn.functional.linear(input_fp32, weight=self.weight.weight.float(), bias=None) + + if self.config.use_sinkhorn: + logits = sinkhorn(logits) + if self.config.topk == 1: + gate_output = top1gating(logits, self.config) + else: + gate_output = top2gating(logits, self.config) + + return gate_output + + +def top1gating(logits: Tensor, config: Config) -> Tuple[Tensor, ...]: + """Implements Top1Gating on logits.""" + args = get_args() + if config.noisy_gate_policy == 'RSample': + logits_w_noise = logits + gumbel_rsample(logits.shape, device=logits.device) + # everything is in fp32 in this function + # token_sel_expert_weights: [S, E], 每个token选择每个专家的概率 + token_sel_expert_weights = F.softmax(logits, dim=1) + + if config.reshape_index_select is not None and args.ampipe_degree <= 1: + token_sel_expert_weights = token_sel_expert_weights[:, config.reshape_index_select] + + capacity = _capacity(token_sel_expert_weights, + torch.tensor(config.capacity_factor), + torch.tensor(config.min_capacity)) + + # Create a mask for 1st's expert per token + # noisy gating + final_logits = logits_w_noise if config.noisy_gate_policy == "RSample" else \ + token_sel_expert_weights + # [S] 每个token对应的专家(取概率最大的) + token_sel_expert_idx = torch.argmax(final_logits, dim=1) + num_experts = int(token_sel_expert_weights.shape[1]) + token_sel_expert_mask = F.one_hot(token_sel_expert_idx, num_classes=num_experts) + + # if we don't want to drop any tokens + if config.no_drop: + # gating decisions + exp_counts = torch.sum(token_sel_expert_mask, dim=0).detach() + if config.dynamic_padding: + new_capacity = torch.max(exp_counts) + cur_capacity = new_capacity.item() + capacity = config.dynamic_capacity.to(logits.device) + + flag = cur_capacity > capacity + dist.reduce(flag, dst=0, op=torch.distributed.ReduceOp.SUM, group=dist.group.WORLD) + dist.broadcast(flag, src=0, group=dist.group.WORLD) + if flag: + dist.all_reduce(new_capacity, op=dist.ReduceOp.MAX, group=dist.group.WORLD) + capacity = new_capacity + + if cur_capacity > logits.shape[0]: + capacity = torch.ceil(torch.tensor(logits.shape[0])).to(torch.int64) + else: + new_capacity = torch.max(exp_counts).to(logits.device) + dist.all_reduce(new_capacity, op=dist.ReduceOp.MAX, group=dist.group.WORLD) + capacity = new_capacity + + # Compute l_aux负载均衡aux_loss + me = torch.mean(token_sel_expert_weights, dim=0) + ce = torch.mean(token_sel_expert_mask.float(), dim=0) + l_aux = torch.sum(me * ce) * num_experts + all_args = get_args() + # Random Token Selection(将token选择专家的掩码0/1矩阵中的1转成0~1之间的权重值) + if all_args.use_rts: # default True. + uniform = exp_selection_uniform_map.get(logits.device) + if uniform is None: + uniform = torch.distributions.uniform.Uniform( + low=torch.tensor(0.0, device=logits.device), + high=torch.tensor(1.0, device=logits.device)).rsample + exp_selection_uniform_map[logits.device] = uniform + # [S, E] + token_sel_expert_score = token_sel_expert_mask * uniform(token_sel_expert_mask.shape) + else: + token_sel_expert_score = token_sel_expert_mask + + # 通过topC每个专家选择至多C个token,然后和原始的mask1(每个专家可能选择超过C个token)矩阵相乘, + # 丢掉超过专家容量的权重低的token,更新得到 token_sel_expert_mask + expert_sel_top_c_token_idx = torch.topk(token_sel_expert_score, k=capacity, dim=0)[1] + token_sel_expert_mask *= torch.zeros_like(token_sel_expert_mask).scatter_(0, expert_sel_top_c_token_idx, 1) + + # Normalize gate probabilities + token_sel_expert_mask_float = token_sel_expert_mask.float() + token_sel_expert_weights = token_sel_expert_weights * token_sel_expert_mask_float + + token_idx_in_expert_with_noise = torch.cumsum(token_sel_expert_mask, dim=0) - 1 + masked_token_idx_in_expert = token_idx_in_expert_with_noise * token_sel_expert_mask + token_offset_for_expert = torch.sum(masked_token_idx_in_expert, dim=1) + if all_args.enable_token_rearrange_opt: + # 重排过程:计算出每个专家选择的token的索引:expert_select_token_idx,shape为: [E*C] + # MoE前向过程中根据此索引通过index_select API实现token的重排 + # shape变化过程:[S, E]->[C, E]->[E, C]->[E*C] + expert_sel_top_c_token_idx = torch.topk(token_sel_expert_mask, + k=capacity, + dim=0, + sorted=True)[1] + expert_select_token_idx = expert_sel_top_c_token_idx.t().reshape(config.num_experts * capacity) + token_exp_weights, token_exp_idx = torch.max(token_sel_expert_weights, dim=1) + token_rearranged_ec_idx = (capacity.to(torch.int32) * token_exp_idx.to(torch.int32) + + token_offset_for_expert.to(torch.int32)) + top1_gating_token_infos = GatingTokenRearrangeInfo(token_rearranged_ec_idx=token_rearranged_ec_idx, + token_exp_weights=token_exp_weights, + expert_select_token_idx=expert_select_token_idx) + return l_aux, top1_gating_token_infos + else: + token_locations_sc = _one_hot_to_float(token_offset_for_expert, capacity) + combine_weights = einsum("se,sc->sec", token_sel_expert_weights, token_locations_sc) + dispatch_mask = combine_weights.bool() + if config.dynamic_padding: + return l_aux, combine_weights, dispatch_mask, cur_capacity + else: + return l_aux, combine_weights, dispatch_mask + + +def apply_aux_loss(config, gates, mask1): + num_experts = int(gates.shape[1]) + me = torch.mean(gates, dim=0) + ce = torch.mean(mask1.float(), dim=0) + l_aux = torch.mean(me * ce) * num_experts * num_experts + if config.aux_loss_coef > 0: + l_aux = l_aux * config.aux_loss_coef + gates = MoEAuxLossAutoScaler.apply(gates, l_aux) + return gates, l_aux + + +def apply_z_loss(config, logits): + """Encourages the router's logits to remain small to enhance stability. + Please refer to the ST-MoE paper (https://arxiv.org/pdf/2202.08906.pdf) for details. + + Args: + logits (torch.Tensor): The logits of the router. + + Returns: + torch.Tensor: The logits after applying the z-loss. + """ + if config.z_loss_coef > 0: + z_loss = torch.mean(torch.square(torch.logsumexp(logits, dim=-1))) * config.z_loss_coef + logits = MoEAuxLossAutoScaler.apply(logits, z_loss) + return logits + + +def top2gating(logits: Tensor, config: Config) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + """Implements Top2Gating on logits.""" + # apply z loss + args = get_args() + logits = apply_z_loss(config, logits) + + # everything is in fp32 in this function + token_sel_expert_weights = F.softmax(logits, dim=1) + + if config.reshape_index_select is not None and args.ampipe_degree <= 1: + token_sel_expert_weights = token_sel_expert_weights[:, config.reshape_index_select] + + num_experts = int(token_sel_expert_weights.shape[1]) + + capacity = _capacity(token_sel_expert_weights, + torch.tensor(config.capacity_factor * 2), + torch.tensor(config.min_capacity)) + + _, selected_experts = torch.topk(token_sel_expert_weights, config.topk, dim=-1) + mask = F.one_hot(selected_experts, num_classes=num_experts) + first_expert_mask = mask[:, 0, :] + second_expert_mask = mask[:, 1, :] + + # Compute locations in capacity buffer + locations_in_first_expert = torch.cumsum(first_expert_mask, dim=0) - 1 + locations_in_second_expert = torch.cumsum(second_expert_mask, dim=0) - 1 + # Update 2nd's location by accounting for locations of 1st + locations_in_second_expert += torch.sum(first_expert_mask, dim=0, keepdim=True) + + # gating decisions + token_sel_expert_weights, l_aux = apply_aux_loss(config, token_sel_expert_weights, first_expert_mask) + if config.no_drop: + if config.dynamic_padding: + new_capacity = torch.max(locations_in_second_expert) + 2 + cur_capacity = new_capacity.item() + capacity = config.dynamic_capacity.to(logits.device) + + flag = cur_capacity > capacity + dist.reduce(flag, dst=0, op=torch.distributed.ReduceOp.SUM, group=dist.group.WORLD) + dist.broadcast(flag, src=0, group=dist.group.WORLD) + if flag: + dist.all_reduce(new_capacity, op=dist.ReduceOp.MAX, group=dist.group.WORLD) + capacity = new_capacity + if cur_capacity > logits.shape[0]: + capacity = torch.ceil(torch.tensor(logits.shape[0])).to(torch.int64) + else: + new_capacity = torch.max(locations_in_second_expert) + 2 + dist.all_reduce(new_capacity, op=dist.ReduceOp.MAX, group=dist.group.WORLD) + capacity = new_capacity + + # Remove locations outside capacity from mask + first_expert_mask *= torch.lt(locations_in_first_expert, capacity) + second_expert_mask *= torch.lt(locations_in_second_expert, capacity) + + # Store the capacity location for each token + token_idx_in_first_expert = torch.sum(locations_in_first_expert * first_expert_mask, dim=1) + token_idx_in_second_expert = torch.sum(locations_in_second_expert * second_expert_mask, dim=1) + + # Normalize gate probabilities + first_expert_mask_float = first_expert_mask.float() + second_expert_mask_float = second_expert_mask.float() + token_first_exp_weights, token_first_exp_idx = torch.max(token_sel_expert_weights * first_expert_mask_float, dim=1) + token_second_exp_weights, token_second_exp_idx = torch.max(token_sel_expert_weights * second_expert_mask_float, + dim=1) + denom_s = token_first_exp_weights + token_second_exp_weights + # Avoid divide-by-zero + denom_s = torch.clamp(denom_s, min=torch.finfo(denom_s.dtype).eps) + token_first_exp_weights /= denom_s + token_second_exp_weights /= denom_s + all_args = get_args() + if all_args.enable_token_rearrange_opt: + token_rearranged_first_ec_idx = token_first_exp_idx.int() * capacity + token_idx_in_first_expert.int() + token_rearranged_second_ec_idx = token_second_exp_idx.int() * capacity + token_idx_in_second_expert.int() + # 重排过程:计算出每个专家选择的token的索引:expert_select_token_idx,shape为: [E*C] + # MoE前向过程中根据此索引通过index_select API实现token的重排 + # shape变化过程:[S, E]->[C, E]->[E, C]->[E*C] + token_sel_first_exp_int_mask = first_expert_mask * 2 + token_sel_second_exp_int_mask = second_expert_mask + expert_sel_top_c_token_idx = torch.topk(token_sel_first_exp_int_mask + token_sel_second_exp_int_mask, + k=capacity, + dim=0, + sorted=True)[1] + expert_select_token_idx = expert_sel_top_c_token_idx.t().reshape(num_experts * capacity) + token_rearranged_ec_idx = torch.cat([token_rearranged_first_ec_idx, token_rearranged_second_ec_idx], dim=0) + token_exp_weights = torch.cat([token_first_exp_weights, token_second_exp_weights], dim=0) + + top2_gating_token_infos = GatingTokenRearrangeInfo(token_rearranged_ec_idx=token_rearranged_ec_idx, + token_exp_weights=token_exp_weights, + expert_select_token_idx=expert_select_token_idx) + return l_aux, top2_gating_token_infos + else: + # Calculate combine_weights and dispatch_mask + gates1 = einsum("s,se->se", token_first_exp_weights, first_expert_mask_float) + gates2 = einsum("s,se->se", token_second_exp_weights, second_expert_mask_float) + locations1_sc = _one_hot_to_float(token_idx_in_first_expert, capacity) + locations2_sc = _one_hot_to_float(token_idx_in_second_expert, capacity) + combine1_sec = einsum("se,sc->sec", gates1, locations1_sc) + combine2_sec = einsum("se,sc->sec", gates2, locations2_sc) + combine_weights = combine1_sec + combine2_sec + dispatch_mask = combine_weights.bool() + + if config.dynamic_padding: + return l_aux, combine_weights, dispatch_mask, cur_capacity + else: + return l_aux, combine_weights, dispatch_mask diff --git a/model/train/yoco_moe/mindspeed/moe/mixtral_parallel_mlpbm.py b/model/train/yoco_moe/mindspeed/moe/mixtral_parallel_mlpbm.py new file mode 100644 index 000000000..baf96df9a --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/mixtral_parallel_mlpbm.py @@ -0,0 +1,93 @@ +# coding=utf-8 +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn.functional as F + +from megatron.core.tensor_parallel import ColumnParallelLinear, RowParallelLinear +from megatron.training import get_args +from megatron.core import parallel_state +from mindspeed.core.tensor_parallel.random import CheckpointWithoutOutput +from mindspeed.model.transformer import should_recompute_activation + + +class MixtralParallelMLPBM(torch.nn.Module): + def __init__(self, config): + super().__init__() + self.activation_checkpoint_manager = None + self.ffn_dim = config.ffn_hidden_size + self.hidden_dim = config.hidden_size + self.layer_number = None + + self.w1 = ColumnParallelLinear( + config.hidden_size, + config.ffn_hidden_size, + config=config, + init_method=config.init_method, + bias=False, + gather_output=False, + skip_bias_add=True, + is_expert=False, + pipe_experts=get_args().use_pipe_experts + ) + + self.w2 = RowParallelLinear( + config.ffn_hidden_size, + config.hidden_size, + config=config, + init_method=config.output_layer_init_method, + bias=False, + skip_bias_add=True, + input_is_parallel=True, + is_expert=False, + pipe_experts=get_args().use_pipe_experts + ) + + self.w3 = ColumnParallelLinear( + config.hidden_size, + config.ffn_hidden_size, + config=config, + init_method=config.init_method, + bias=False, + gather_output=False, + skip_bias_add=True, + is_expert=False, + pipe_experts=get_args().use_pipe_experts + ) + + self.act_fn = F.silu + if get_args().use_nanopipe and parallel_state.get_pipeline_model_parallel_world_size() > 1 \ + and parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None: + setattr(self.w1, "in_nano", True) + setattr(self.w2, "in_nano", True) + setattr(self.w3, "in_nano", True) + + def forward(self, hidden_states): + is_recompute_activation = should_recompute_activation(self.layer_number) + + if is_recompute_activation: + self.activation_checkpoint_manager = CheckpointWithoutOutput() + act_intermediate_parallel = self.activation_checkpoint_manager.checkpoint(self.act_fn, False, self.w1(hidden_states)[0]) + current_hidden_states = act_intermediate_parallel * self.w3(hidden_states)[0] + self.activation_checkpoint_manager.discard_output() + current_hidden_states = self.w2(current_hidden_states)[0] + if current_hidden_states.requires_grad: + current_hidden_states.register_hook(self.activation_checkpoint_manager.recompute) + else: + current_hidden_states = self.act_fn(self.w1(hidden_states)[0]) * self.w3(hidden_states)[0] + current_hidden_states = self.w2(current_hidden_states)[0] + + return current_hidden_states diff --git a/model/train/yoco_moe/mindspeed/moe/moe.py b/model/train/yoco_moe/mindspeed/moe/moe.py new file mode 100644 index 000000000..f75638d9f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/moe.py @@ -0,0 +1,105 @@ +# Copyright (c) Microsoft Corporation. +# +# This source code is licensed under the Apache license found in the +# LICENSE file in the root directory of this source tree. + +# copied from https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/layer.py +# reworked/refactored some parts to make it run. +import typing + +import torch +from megatron.training import get_args + +from .experts import Experts +from .gate import TopKGate +from .moe_layer import MOELayer +from .config import Config +from .utils import get_reshape_index_select + + +class MoE(torch.nn.Module): + """Initialize an MoE layer. + + Arguments: + hidden_size (int): the hidden dimension of the model, importantly this is also the input and output dimension. + expert (torch.nn.Module): the torch module that defines the expert (e.g., MLP, torch.linear). + num_experts (int, optional): default=1, the total number of experts per layer. + ep_size (int, optional): default=1, number of ranks in the expert parallel world or group. + k (int, optional): default=1, top-k gating value, only supports k=1 or k=2. + capacity_factor (float, optional): default=1.0, the capacity of the expert at training time. + eval_capacity_factor (float, optional): default=1.0, the capacity of the expert at eval time. + min_capacity (int, optional): default=4, the minimum capacity per expert regardless of the capacity_factor. + aux_loss_coef (int, optional): default=0.0, scaling coefficient for the aux loss. + z_loss_coef (int, optional): default=0.0, scaling coefficient for the z loss. + noisy_gate_policy (str, optional): default=None, noisy gate policy, valid options are 'Jitter', 'RSample' or 'None'. + """ + + def __init__(self, + hidden_size, + expert, + num_experts=1, + ep_size=1, + k=1, + capacity_factor=1., + eval_capacity_factor=1., + min_capacity=4, + aux_loss_coef=0.0, + z_loss_coef=0.0, + ep_group=None, + noisy_gate_policy: typing.Optional[str] = None, + no_drop=False, + dynamic_padding=False, + use_sinkhorn=False, + sequence_parallel=False): + super(MoE, self).__init__() + args = get_args() + pipe_experts = args.use_pipe_experts + sequence_parallel = sequence_parallel + pipe_experts_multi_data = args.pipe_experts_multi_data + pipe_experts_multi_stream = args.pipe_experts_multi_stream + + if num_experts % ep_size != 0: + raise AssertionError(f"Number of experts should be divisible by expert parallel size") + num_local_experts = num_experts // ep_size + + if pipe_experts: + reshape_index_select = get_reshape_index_select(num_local_experts, ep_size) + else: + reshape_index_select = None + + self.num_experts = num_experts + self.ep_size = ep_size + self.num_local_experts = num_local_experts + self.ep_group = ep_group + self.hidden_size = hidden_size + + config = Config(hidden_size=hidden_size, + num_experts=num_experts, + ep_size=ep_size, + topk=k, + capacity_factor=capacity_factor, + eval_capacity_factor=eval_capacity_factor, + min_capacity=min_capacity, + aux_loss_coef=aux_loss_coef, + z_loss_coef=z_loss_coef, + noisy_gate_policy=noisy_gate_policy, + no_drop=no_drop, + dynamic_padding=dynamic_padding, + use_sinkhorn=use_sinkhorn, + sequence_parallel=sequence_parallel, + reshape_index_select=reshape_index_select + ) + self.moe_layer = MOELayer(TopKGate(config), + Experts(expert, num_local_experts), + ep_size, + num_local_experts, + pipe_experts=pipe_experts, + sequence_parallel=sequence_parallel, + pipe_experts_multi_data=pipe_experts_multi_data, + pipe_experts_multi_stream=pipe_experts_multi_stream, + ) + self.moe_layer.set_ep_group(ep_group) + + def forward(self, hidden_states, used_token=None): + output = self.moe_layer(hidden_states, used_token) + return output, self.moe_layer.l_aux, self.moe_layer.exp_counts diff --git a/model/train/yoco_moe/mindspeed/moe/moe_layer.py b/model/train/yoco_moe/mindspeed/moe/moe_layer.py new file mode 100644 index 000000000..e5450c626 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/moe_layer.py @@ -0,0 +1,148 @@ +# Copyright (c) Microsoft Corporation. +# +# This source code is licensed under the Apache license found in the +# LICENSE file in the root directory of this source tree. + +# copied from https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/sharded_moe.py +# reworked/refactored some parts to make it run. +from typing import TYPE_CHECKING, Any + +import torch +from torch import Tensor +from torch.nn import Module +import torch.distributed as dist + +from .utils import _AllToAll, einsum +from .pipe_experts import PipeExpert + +if TYPE_CHECKING: + Base = Module[Tensor] +else: + Base = Module + + +class MOELayer(Base): + """MOELayer module which implements MixtureOfExperts as described in Gshard_. + :: + + gate = TopKGate(model_dim, num_experts) + moe = MOELayer(gate, expert) + output = moe(input) + l_aux = moe.l_aux + + .. Gshard_: https://arxiv.org/pdf/2006.16668.pdf + + Args: + gate (torch.nn.Module): + gate network + expert (torch.nn.Module): + expert network + """ + + def __init__(self, + gate: Module, + experts: Module, + ep_size, + num_local_experts: int, + pipe_experts: bool = False, + sequence_parallel: bool = False, + pipe_experts_multi_data: int = 1, + pipe_experts_multi_stream: bool = False) -> None: + super().__init__() + self.gate = gate + self.experts = experts + self.ep_group = None + self.ep_size = ep_size + self.num_local_experts = num_local_experts + self.num_experts = ep_size * num_local_experts + self.exp_counts = None + self.l_aux = None + + self.cur_index_window = 0 + self.capacity_window_size = 20 + self.capacity_history_window = [] + self.gate.config.dynamic_capacity = torch.ceil(torch.tensor(256)).to(torch.int64) + + self.pipe_experts = pipe_experts + self.sequence_parallel = sequence_parallel + self.pipe_experts_multi_data = pipe_experts_multi_data + self.pipe_experts_multi_stream = pipe_experts_multi_stream + + def set_ep_group(self, ep_group): + self.ep_group = ep_group + + def forward(self, *input: Tensor, **kwargs: Any) -> Tensor: + d_model = input[0].shape[-1] + reshaped_input = input[0].reshape(-1, d_model) + from megatron.training import get_args + all_args = get_args() + # gate + if not all_args.enable_token_rearrange_opt: + if self.gate.config.dynamic_padding: + self.l_aux, combine_weights, dispatch_mask, cur_capacity_cur_rank = self.gate(reshaped_input) + self.capacity_history_window.append(cur_capacity_cur_rank) + self.cur_index_window += 1 + if len(self.capacity_history_window) > self.capacity_window_size: + self.capacity_history_window.pop(0) + if self.cur_index_window == self.capacity_window_size - 1: + self.cur_index_window = 0 + capacity_history_window_tensor = torch.Tensor(self.capacity_history_window[-5:]).to(combine_weights.device) + dist.all_reduce(capacity_history_window_tensor, op=torch.distributed.ReduceOp.MAX, + group=dist.group.WORLD) + self.capacity_history_window = capacity_history_window_tensor.cpu().numpy().tolist() + + if len(self.capacity_history_window) > 0: + capacity_next_window = sum(self.capacity_history_window) / len(self.capacity_history_window) + 20 + else: + capacity_next_window = 256 + self.gate.config.dynamic_capacity = torch.ceil(torch.tensor(capacity_next_window)).to(torch.int64) + else: + self.l_aux, combine_weights, dispatch_mask = self.gate(reshaped_input) + dispatched_input = einsum("sec,sm->ecm", dispatch_mask.type_as(input[0]), reshaped_input) + else: + self.l_aux, token_rearrange_infos = self.gate(reshaped_input) + org_dtype = reshaped_input.dtype + if org_dtype == torch.bfloat16: # 规避算子性能劣化问题, 解决后可删除 + rearranged_input = torch.index_select( + reshaped_input.to(torch.float32), dim=0, index=token_rearrange_infos.expert_select_token_idx + ).to(org_dtype) + else: + rearranged_input = torch.index_select( + reshaped_input, dim=0, index=token_rearrange_infos.expert_select_token_idx + ) + capacity = token_rearrange_infos.expert_select_token_idx.size(0) // self.num_experts + dispatched_input = rearranged_input.reshape(self.num_experts, capacity, d_model).contiguous() + + if self.pipe_experts: + expert_output = PipeExpert.apply(self.experts, dispatched_input, self.ep_size, self.num_local_experts, + self.sequence_parallel, self.pipe_experts_multi_data, + self.pipe_experts_multi_stream) + else: + # dispatch all2all + dispatched_input = _AllToAll.apply(self.ep_group, dispatched_input) + + # Re-shape after all-to-all: ecm -> gecm + dispatched_input = dispatched_input.reshape(self.ep_size, self.num_local_experts, -1, d_model) + expert_output = self.experts(dispatched_input) + + # combine all2all + expert_output = _AllToAll.apply(self.ep_group, expert_output) + + # Re-shape back: gecm -> ecm + expert_output = expert_output.reshape(self.ep_size * self.num_local_experts, -1, d_model) + + if not all_args.enable_token_rearrange_opt: + combined_output = einsum("sec,ecm->sm", combine_weights.type_as(input[0]), expert_output) + else: + E, C, M = expert_output.shape + org_dtype = expert_output.dtype + if org_dtype == torch.bfloat16: + valid_expert_out = torch.index_select( + expert_output.view(E * C, M).to(torch.float32), dim=0, index=token_rearrange_infos.token_rearranged_ec_idx + ).to(org_dtype) + else: + valid_expert_out = torch.index_select(expert_output.view(E * C, M), dim=0, index=token_rearrange_infos.token_rearranged_ec_idx) + combined_output = valid_expert_out * token_rearrange_infos.token_exp_weights.unsqueeze(1).type_as(input[0]) + if all_args.moe_router_topk == 2: + combined_output = torch.add(*torch.chunk(combined_output, all_args.moe_router_topk, dim=0)) + return combined_output.reshape(input[0].shape) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/moe/pipe_experts.py b/model/train/yoco_moe/mindspeed/moe/pipe_experts.py new file mode 100644 index 000000000..8a48205a0 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/pipe_experts.py @@ -0,0 +1,422 @@ +# coding=utf-8 +# Copyright (c) 2024 Huawei Technologies Co., Ltd. All rights reserved. + +import torch + +from megatron.training import get_args +from mindspeed.core.weight_grad_store import WeightGradStore + +from .async_comm_utils import (async_all_to_all, async_fw_ar_rs, get_fw_ag_output, async_fw_all_gather, + async_all_gather) + +ASYNC_BW_ALL_GATHER_COUNT = 0 +FLAG_GRAD_REDUCE = True + + +def get_async_bw_all_gather_count(): + return ASYNC_BW_ALL_GATHER_COUNT + + +class PipeExpertUtil: + multi_data = None + num_local_experts = None + slice_seq_size = None + ep_size = None + + first_a2a_event = [] + second_a2a_event = [] + fw_ag_event = [] + bw_ag_event = [] + ar_rs_event = [] + + @classmethod + def set_parameters(cls, args, slice_seq_size): + cls.multi_data = args[4] + cls.num_local_experts = args[2] + cls.slice_seq_size = slice_seq_size + cls.ep_size = args[1] + + @classmethod + def get_first_a2a_event(cls): + return cls.first_a2a_event + + @classmethod + def get_second_a2a_event(cls): + return cls.second_a2a_event + + @classmethod + def get_fw_ag_event(cls): + return cls.fw_ag_event + + @classmethod + def get_bw_ag_event(cls): + return cls.bw_ag_event + + @classmethod + def get_ar_rs_event(cls): + return cls.ar_rs_event + + @classmethod + def deal_data(cls, origin_data, output_data): + for i in range(cls.num_local_experts): + for j in range(cls.multi_data): + output_data.append(origin_data[i * cls.ep_size: (i + 1) * cls.ep_size, + j * cls.slice_seq_size: (j + 1) * cls.slice_seq_size].clone().contiguous()) + + @classmethod + def first_a2a_when_not_multi_stream(cls, input_data_list): + for i in range(cls.num_local_experts): + for j in range(cls.multi_data): + input_data_list[j + i * cls.multi_data], handle = async_all_to_all( + input_data_list[j + i * cls.multi_data]) + cls.first_a2a_event.append(handle) + + @classmethod + def fw_bw_ag_after_first_a2a_when_not_multi_stream(cls, input_data_list, num_local_experts_index, multi_data_index, + is_fw_ag): + index = num_local_experts_index * cls.multi_data + multi_data_index + if index == 0 and get_args().ampipe_degree <= 1: + cls.first_a2a_event[index].wait() + if is_fw_ag: + input_data_list[index], handle = async_fw_all_gather(input_data_list[index]) + cls.fw_ag_event.append(handle) + else: + if get_args().use_nanopipe and WeightGradStore.is_decoupleBlock: + WeightGradStore.save_grad_output(input_data_list[num_local_experts_index * cls.multi_data + multi_data_index].clone().detach()) + input_data_list[index], handle = async_all_gather(input_data_list[index], is_bwd=True) + cls.bw_ag_event.append(handle) + if index < (cls.num_local_experts * cls.multi_data - 1): + cls.first_a2a_event[index + 1].wait() + if is_fw_ag: + if index == 0 and not get_args().use_nanopipe: + input_data_list[index + 1], handle = async_fw_all_gather(input_data_list[index + 1], None, True) + else: + input_data_list[index + 1], handle = async_fw_all_gather(input_data_list[index + 1]) + cls.fw_ag_event.append(handle) + else: + if get_args().use_nanopipe and WeightGradStore.is_decoupleBlock: + WeightGradStore.save_grad_output(input_data_list[num_local_experts_index * cls.multi_data + multi_data_index + 1].clone().detach()) + if index == 0 and not get_args().use_nanopipe: + input_data_list[index + 1], handle = async_all_gather(input_data_list[index + 1], None, True, True) + else: + input_data_list[index + 1], handle = async_all_gather(input_data_list[index + 1], is_bwd=True) + cls.bw_ag_event.append(handle) + + @classmethod + def fw_bw_ag_after_first_a2a_when_multi_stream(cls, input_data_list, num_local_experts_index, multi_data_index, + is_fw_ag): + index = num_local_experts_index * cls.multi_data + multi_data_index + if index == 0: + input_data_list[index], handle = async_all_to_all(input_data_list[index]) + cls.first_a2a_event.append(handle) + if is_fw_ag: + input_data_list[index], handle = async_fw_all_gather( + input_data_list[index], cls.first_a2a_event[index]) + cls.fw_ag_event.append(handle) + else: + input_data_list[index], handle = async_all_gather( + input_data_list[index], cls.first_a2a_event[index], is_bwd=True) + cls.bw_ag_event.append(handle) + if index < (cls.num_local_experts * cls.multi_data - 1): + if is_fw_ag: + input_data_list[index + 1], handle = async_all_to_all( + input_data_list[index + 1], cls.fw_ag_event[index]) + cls.first_a2a_event.append(handle) + if index == 0 and not get_args().use_nanopipe: + input_data_list[index + 1], handle = async_fw_all_gather( + input_data_list[index + 1], cls.first_a2a_event[index + 1], True) + else: + input_data_list[index + 1], handle = async_fw_all_gather( + input_data_list[index + 1], cls.first_a2a_event[index + 1]) + cls.fw_ag_event.append(handle) + else: + input_data_list[index + 1], handle = async_all_to_all( + input_data_list[index + 1], cls.bw_ag_event[index]) + cls.first_a2a_event.append(handle) + if index == 0 and not get_args().use_nanopipe: + input_data_list[index + 1], handle = async_all_gather( + input_data_list[index + 1], cls.first_a2a_event[index + 1], True, True) + else: + input_data_list[index + 1], handle = async_all_gather( + input_data_list[index + 1], cls.first_a2a_event[index + 1], is_bwd=True) + cls.bw_ag_event.append(handle) + + @classmethod + def fw_a2a_after_ar_rs_when_not_multi_stream(cls, num_local_experts_index, multi_data_index, + output_list_for_each_multi_data, outputs_list_for_each_local_expert): + if cls.multi_data == 1: + if num_local_experts_index > 0: + cls.ar_rs_event[num_local_experts_index - 1].wait() + outputs_list_for_each_local_expert[num_local_experts_index - 1][0], handle = async_all_to_all( + outputs_list_for_each_local_expert[num_local_experts_index - 1][0]) + cls.second_a2a_event.append(handle) + else: + if multi_data_index > 0: + cls.ar_rs_event[num_local_experts_index * cls.multi_data + multi_data_index - 1].wait() + output_list_for_each_multi_data[multi_data_index - 1], handle = async_all_to_all( + output_list_for_each_multi_data[multi_data_index - 1]) + cls.second_a2a_event.append(handle) + else: + if num_local_experts_index > 0: + cls.ar_rs_event[num_local_experts_index * cls.multi_data + multi_data_index - 1].wait() + outputs_list_for_each_local_expert[num_local_experts_index - 1][ + cls.multi_data - 1], handle = async_all_to_all( + outputs_list_for_each_local_expert[num_local_experts_index - 1][cls.multi_data - 1]) + cls.second_a2a_event.append(handle) + + @classmethod + def fw_a2a_for_final_data_when_not_multi_stream(cls, outputs_list_for_each_local_expert): + cls.ar_rs_event[cls.num_local_experts * cls.multi_data - 1].wait() + outputs_list_for_each_local_expert[cls.num_local_experts - 1][ + cls.multi_data - 1], handle = async_all_to_all( + outputs_list_for_each_local_expert[cls.num_local_experts - 1][cls.multi_data - 1]) + cls.second_a2a_event.append(handle) + + +class PipeExpert(torch.autograd.Function): + + @staticmethod + def forward(ctx, Experts, *args): + inputs = args[0] + ep_size = args[1] + num_local_experts = args[2] + sequence_parallel = args[3] + multi_data = args[4] + multi_stream = args[5] + + ctx.num_local_experts = num_local_experts + ctx.sequence_parallel = sequence_parallel + ctx.multi_data = multi_data + ctx.multi_stream = multi_stream + + inputs_list = [] + ampipe_degree = get_args().ampipe_degree + ctx.ampipe_degree = ampipe_degree + if ampipe_degree > 1: + PipeExpertUtil.first_a2a_event = args[6] + PipeExpertUtil.second_a2a_event = args[7] + PipeExpertUtil.fw_ag_event = args[8] + ctx.hidden_size = hidden_size = args[9] + save_tensors_list = args[10] + inputs_list = inputs + slice_seq_size = 0 + else: + input_shape = list(inputs.size()) + if multi_data > input_shape[1]: + raise ValueError('--pipe-experts-multi-data cannot be greater than experts capacity') + slice_seq_size = input_shape[1] // multi_data + if input_shape[1] % multi_data != 0: + slice_seq_size += 1 + + outputs_list_for_each_local_expert = [] + input_list_before_expert = [] + output_list_after_expert = [] + PipeExpertUtil.set_parameters(args, slice_seq_size) + + if ampipe_degree <= 1: + PipeExpertUtil.deal_data(inputs, inputs_list) + inputs.untyped_storage().resize_(0) + + if not multi_stream and ampipe_degree <= 1: + PipeExpertUtil.first_a2a_when_not_multi_stream(inputs_list) + + for i in range(num_local_experts): + output_list_for_each_multi_data = [] + for j in range(multi_data): + if sequence_parallel: + if not multi_stream: + PipeExpertUtil.fw_bw_ag_after_first_a2a_when_not_multi_stream(inputs_list, i, j, True) + elif ampipe_degree <= 1: + PipeExpertUtil.fw_bw_ag_after_first_a2a_when_multi_stream(inputs_list, i, j, True) + + PipeExpertUtil.get_fw_ag_event()[i * multi_data + j].wait() + else: + PipeExpertUtil.get_first_a2a_event()[i * multi_data + j].wait() + + input_detach_before_expert = inputs_list[i * multi_data + j].detach() + input_detach_before_expert.requires_grad = True + input_list_before_expert.append(input_detach_before_expert) + + with torch.enable_grad(): + output_expert = Experts.experts[i](input_list_before_expert[i * multi_data + j]) + if sequence_parallel: + get_fw_ag_output().pop(0) + + if isinstance(output_expert, tuple): + output_expert, bias = output_expert + if bias is not None: + with torch.enable_grad(): + output_expert = output_expert + bias + + output_list_after_expert.append(output_expert) + output_detach_after_expert = output_expert.detach() + + if not multi_stream: + PipeExpertUtil.fw_a2a_after_ar_rs_when_not_multi_stream(i, j, output_list_for_each_multi_data, + outputs_list_for_each_local_expert) + + output_detach_after_expert, handle = async_fw_ar_rs(output_detach_after_expert, sequence_parallel) + output_list_for_each_multi_data.append(output_detach_after_expert) + PipeExpertUtil.get_ar_rs_event().append(handle) + else: + # all2all allgather wait release memory + PipeExpertUtil.get_first_a2a_event()[i * multi_data + j].wait() + PipeExpertUtil.get_fw_ag_event()[i * multi_data + j].wait() + + output_detach_after_expert, handle = async_fw_ar_rs(output_detach_after_expert, sequence_parallel) + PipeExpertUtil.get_ar_rs_event().append(handle) + output_detach_after_expert, handle = async_all_to_all(output_detach_after_expert, + PipeExpertUtil.get_ar_rs_event()[ + i * multi_data + j]) + output_list_for_each_multi_data.append(output_detach_after_expert) + PipeExpertUtil.get_second_a2a_event().append(handle) + + outputs_list_for_each_local_expert.append(output_list_for_each_multi_data) + + if not multi_stream: + PipeExpertUtil.fw_a2a_for_final_data_when_not_multi_stream(outputs_list_for_each_local_expert) + + for i in range(num_local_experts): + for j in range(multi_data): + PipeExpertUtil.get_second_a2a_event()[i * multi_data + j].wait() + # reduce scatter + PipeExpertUtil.get_ar_rs_event()[i * multi_data + j].wait() + + PipeExpertUtil.get_first_a2a_event().clear() + PipeExpertUtil.get_second_a2a_event().clear() + PipeExpertUtil.get_fw_ag_event().clear() + PipeExpertUtil.get_ar_rs_event().clear() + + for tensor in output_list_after_expert: + tensor.untyped_storage().resize_(0) + + ctx.input_list_before_expert = input_list_before_expert + + if 1 < ampipe_degree <= multi_data: + save_tensors_list.extend(output_list_after_expert) + output_list = [] + for i in range(num_local_experts): + exp_out_list = [] + for j in range(ampipe_degree): + ampipe_tokens = outputs_list_for_each_local_expert[i][ + j * multi_data // ampipe_degree:(j + 1) * multi_data // ampipe_degree] + ampipe_tokens = torch.cat(ampipe_tokens, dim=1) + exp_out_list.append(ampipe_tokens) + output_list.append(exp_out_list) + output_forward = [ + torch.cat([i[j] for i in output_list], dim=1).reshape(num_local_experts * ep_size, -1, hidden_size) for + j in range(ampipe_degree)] + + else: + ctx.save_for_backward(*tuple(output_list_after_expert)) + output_forward = torch.cat([torch.cat((outputs_list_for_each_local_expert[i]), dim=1) for i in range(num_local_experts)], dim=0) + + return output_forward + + @staticmethod + def backward(ctx, *args): + num_local_experts = ctx.num_local_experts + sequence_parallel = ctx.sequence_parallel + multi_stream = ctx.multi_stream + multi_data = ctx.multi_data + ampipe_degree = ctx.ampipe_degree + + grad_outputs = args[0] + global ASYNC_BW_ALL_GATHER_COUNT + ASYNC_BW_ALL_GATHER_COUNT = 0 + + grad_outputs_list = [] + grad_outputs_list_for_each_local_expert = [] + if ampipe_degree > 1: + PipeExpertUtil.first_a2a_event = args[1] + PipeExpertUtil.bw_ag_event = args[2] + PipeExpertUtil.second_a2a_event = args[3] + output_list_after_expert = args[4] + grad_outputs_list = grad_outputs + else: + output_list_after_expert = list(ctx.saved_tensors) + + if ampipe_degree <= 1: + PipeExpertUtil.deal_data(grad_outputs, grad_outputs_list) + grad_outputs.storage().resize_(0) + + if not multi_stream and ampipe_degree <= 1: + PipeExpertUtil.first_a2a_when_not_multi_stream(grad_outputs_list) + + for i in range(num_local_experts): + grad_output_list_for_each_multi_data = [] + global FLAG_GRAD_REDUCE + FLAG_GRAD_REDUCE = False + for j in range(multi_data): + if sequence_parallel: + if not multi_stream: + PipeExpertUtil.fw_bw_ag_after_first_a2a_when_not_multi_stream(grad_outputs_list, i, j, False) + + elif ampipe_degree <= 1: + PipeExpertUtil.fw_bw_ag_after_first_a2a_when_multi_stream(grad_outputs_list, i, j, False) + + PipeExpertUtil.get_bw_ag_event()[i * multi_data + j].wait() + else: + PipeExpertUtil.get_first_a2a_event()[i * multi_data + j].wait() + ASYNC_BW_ALL_GATHER_COUNT += 1 + if j == multi_data - 1: + FLAG_GRAD_REDUCE = True + output_list_after_expert[i * multi_data + (multi_data // ampipe_degree + j) % multi_data].backward( + grad_outputs_list[i * multi_data + j]) + grads_expert_output = ctx.input_list_before_expert[ + i * multi_data + (multi_data // ampipe_degree + j) % multi_data].grad + + grads_expert_output, handle = async_all_to_all(grads_expert_output) + grad_output_list_for_each_multi_data.append(grads_expert_output) + PipeExpertUtil.get_second_a2a_event().append(handle) + grad_outputs_list_for_each_local_expert.append(grad_output_list_for_each_multi_data) + + if 1 < ampipe_degree <= multi_data: + for i in range(num_local_experts): + for j in range(multi_data): + index = i * multi_data + j + if index < len(PipeExpertUtil.get_second_a2a_event()) - 1: + PipeExpertUtil.get_second_a2a_event()[index].wait() + + for event in PipeExpertUtil.get_first_a2a_event(): + event.wait() + + for event in PipeExpertUtil.get_bw_ag_event(): + event.wait() + + PipeExpertUtil.get_first_a2a_event().clear() + PipeExpertUtil.get_bw_ag_event().clear() + + output_list = [] + for i in range(num_local_experts): + exp_out_list = [] + for j in range(ampipe_degree): + ampipe_tokens = grad_outputs_list_for_each_local_expert[i][ + j * multi_data // ampipe_degree:(j + 1) * multi_data // ampipe_degree] + exp_out_list.append(ampipe_tokens) + output_list.append(exp_out_list) + second_ampipe_grad_input = torch.cat([torch.cat(i[0], dim=1) for i in output_list], dim=1) + second_ampipe_grad_input = second_ampipe_grad_input.reshape((num_local_experts * PipeExpertUtil.ep_size), -1, + ctx.hidden_size) + first_ampipe_grad_inputs = [i[1] for i in output_list] + + return [first_ampipe_grad_inputs, second_ampipe_grad_input] + else: + for i in range(num_local_experts): + for j in range(multi_data): + PipeExpertUtil.get_second_a2a_event()[i * multi_data + j].wait() + + for event in PipeExpertUtil.get_first_a2a_event(): + event.wait() + + for event in PipeExpertUtil.get_bw_ag_event(): + event.wait() + + PipeExpertUtil.get_second_a2a_event().clear() + PipeExpertUtil.get_first_a2a_event().clear() + PipeExpertUtil.get_bw_ag_event().clear() + grad_output = torch.cat( + [torch.cat((grad_outputs_list_for_each_local_expert[i]), dim=1) for i in range(num_local_experts)], dim=0) + + ctx.input_list_before_expert = None + return None, grad_output, None, None, None, None, None diff --git a/model/train/yoco_moe/mindspeed/moe/utils.py b/model/train/yoco_moe/mindspeed/moe/utils.py new file mode 100644 index 000000000..e2b343b3d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/moe/utils.py @@ -0,0 +1,210 @@ +# Copyright (c) Microsoft Corporation. +# +# This source code is licensed under the Apache license found in the +# LICENSE file in the root directory of this source tree. + +# copied from https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/moe/sharded_moe.py +# copied from https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/transformer/moe/moe_utils.py +# reworked/refactored some parts to make it run. +from typing import Any +from typing import Callable, Dict, Tuple + +import torch +import torch.distributed as dist +import torch.nn.functional as F +from torch import Tensor +from megatron.core import mpu + +gumbel_map: Dict[torch.device, Callable] = {} +USE_EINSUM = False +ampipe_slices_map = {} + + +def print_rank_0(message): + """If distributed is initialized, print only on rank 0.""" + if torch.distributed.is_initialized(): + if torch.distributed.get_rank() == 0: + print(message, flush=True) + else: + print(message, flush=True) + + +# Based on https://github.com/pytorch/pytorch/pull/40762 +class _AllToAll(torch.autograd.Function): + @staticmethod + def forward(ctx: Any, group: dist.ProcessGroup, input: Tensor) -> Tensor: # type: ignore + ctx.group = group + input = input.contiguous() + output = torch.empty_like(input) + dist.all_to_all_single(output, input, group=group) + return output + + @staticmethod + def backward(ctx: Any, *grad_output: Tensor) -> Tuple[None, Tensor]: + return (None, _AllToAll.apply(ctx.group, *grad_output)) + + +def all_gather_along_first_dim(input_, is_use_global_memory_buffer=False): + world_size = mpu.get_tensor_model_parallel_world_size() + if world_size == 1: + return input_ + dim_size = list(input_.size()) + dim_size[0] = dim_size[0] * world_size + if is_use_global_memory_buffer: + ag_out = mpu.get_global_memory_buffer().get_tensor(dim_size, input_.dtype, "mpu") + else: + ag_out = torch.empty(dim_size, dtype=input_.dtype, device=torch.cuda.current_device()) + torch.distributed._all_gather_base( + ag_out, input_.contiguous(), group=mpu.get_tensor_model_parallel_group() + ) + return ag_out + + +def get_reshape_index_select(num_local_experts, ep_size): + reshape_index_select = [] + for i in range(num_local_experts): + index = i + for j in range(ep_size): + reshape_index_select.append(index) + index += num_local_experts + return reshape_index_select + + +def get_slice_indices_from_order_to_disorder(seq_length, pipe_degree, device): + if ampipe_slices_map.get('order_to_disorder') is not None: + return ampipe_slices_map.get('order_to_disorder') + tp_size = mpu.get_tensor_model_parallel_world_size() + slice_size = seq_length // tp_size // pipe_degree + + output = [] + for out_idx in range(0, seq_length // tp_size, slice_size): + for i in range(out_idx, seq_length, pipe_degree * slice_size): + for j in range(slice_size): + output.append(i + j) + output = torch.tensor(output, dtype=torch.int32, device=device) + ampipe_slices_map['order_to_disorder'] = output + return output + + +def get_slice_indices_from_disorder_to_order(seq_length, pipe_degree, device): + if ampipe_slices_map.get('disorder_to_order') is not None: + return ampipe_slices_map.get('disorder_to_order') + tp_size = mpu.get_tensor_model_parallel_world_size() + slice_size = seq_length // tp_size // pipe_degree + + output = [] + for out_idx in range(0, seq_length // pipe_degree, slice_size): + for i in range(out_idx, seq_length, tp_size * slice_size): + for j in range(slice_size): + output.append(i + j) + output = torch.tensor(output, dtype=torch.int32, device=device) + ampipe_slices_map['disorder_to_order'] = output + return output + + +def _one_hot_to_float(x, num_classes): + return F.one_hot(x, num_classes=num_classes).float() + + +def _capacity(gates: Tensor, capacity_factor: Tensor, min_capacity: Tensor) -> Tensor: + # gates has shape of S,E + num_tokens = gates.shape[0] + num_experts = gates.shape[1] + max_capacity = num_tokens + # to(torch.int64) works around a bug in torch.onnx.export: + # it should cast k to int64 when converting torch.topk but it doesn't. + capacity = torch.ceil((num_tokens / num_experts) * capacity_factor).to(torch.int64) + if capacity < min_capacity: + capacity = min_capacity.to(torch.int64) + elif capacity > max_capacity: + capacity = torch.tensor(max_capacity, dtype=torch.int64) + return capacity + + +def gumbel_rsample(shape: Tuple, device: torch.device) -> Tensor: + gumbel = gumbel_map.get(device) + if gumbel is None: + one = torch.tensor(1.0, device=device) + zero = torch.tensor(0.0, device=device) + gumbel = torch.distributions.gumbel.Gumbel(zero, one).rsample # type: ignore + gumbel_map[device] = gumbel + return gumbel(shape) + + +# einsum dimensions: (g)roup, (s)equence, (e)xpert, (m)odel, (c)apacity +# See https://arxiv.org/pdf/2006.16668.pdf for details. +def einsum(rule, a, b): + if USE_EINSUM: + return torch.einsum(rule, a, b) + elif rule == 's,se->se': + return a.reshape(a.shape[0], -1) * b + elif rule == 'se,sc->sec': + return a.unsqueeze(2) * b.unsqueeze(1) + elif rule == 'se,se->s': + return torch.bmm(a.unsqueeze(1), b.unsqueeze(2)).reshape(-1) + elif rule == 'sec,sm->ecm': + s = a.shape[0] + e = a.shape[1] + c = a.shape[2] + m = b.shape[1] + return torch.matmul(a.reshape(s, -1).t(), b).reshape(e, c, m) + elif rule == 'sec,ecm->sm': + return torch.matmul(a.reshape(a.shape[0], -1), b.reshape(-1, b.shape[-1])) + elif rule == 'ks,ksm->sm': + k = b.shape[0] + s = b.shape[1] + m = b.shape[2] + # [k, s] -> [s, k] -> [s, 1, k] + a = a.t().unsqueeze(1) + # [k,s,m] -> [k, sm] -> [sm, k] -> [s, m, k] + b = b.reshape(k, -1).t().reshape(s, m, k) + # bmm([s, 1, k], [s, m, k]^t) -> [s, m, 1] + return torch.bmm(a, b.transpose(1, 2)).squeeze(2) + else: + return torch.einsum(rule, a, b) + + +class MoEAuxLossAutoScaler(torch.autograd.Function): + """An AutoScaler that compute and scales the grad for auxiliary loss. + + """ + + main_loss_backward_scale: torch.Tensor = torch.tensor(1.0) + + @staticmethod + def forward(ctx, output: torch.Tensor, aux_loss: torch.Tensor): + """Preserve the aux_loss by storing it in the context to avoid garbage collection. + + Args: + output (torch.Tensor): The output tensor. + aux_loss (torch.Tensor): The auxiliary loss tensor. + + Returns: + torch.Tensor: The output tensor. + """ + ctx.save_for_backward(aux_loss) + return output + + @staticmethod + def backward(ctx, grad_output: torch.Tensor): + """Compute and scale the gradient for auxiliary loss.. + + Args: + grad_output (torch.Tensor): The gradient of the output. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: The gradient of the output, scaled auxiliary loss gradient. + """ + (aux_loss,) = ctx.saved_tensors + aux_loss_backward_scale = MoEAuxLossAutoScaler.main_loss_backward_scale + scaled_aux_loss_grad = torch.ones_like(aux_loss) * aux_loss_backward_scale + return grad_output, scaled_aux_loss_grad + + @staticmethod + def set_loss_scale(scale: torch.Tensor): + """set the scale of the aux loss. + + Args: + scale (torch.Tensor): The scale value to set. Please ensure that the scale passed in matches the scale of the main_loss. + """ + MoEAuxLossAutoScaler.main_loss_backward_scale = scale \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/multi_modal/conv3d/__init__.py b/model/train/yoco_moe/mindspeed/multi_modal/conv3d/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/multi_modal/conv3d/conv3d_depth_parallel.py b/model/train/yoco_moe/mindspeed/multi_modal/conv3d/conv3d_depth_parallel.py new file mode 100644 index 000000000..88776419e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/conv3d/conv3d_depth_parallel.py @@ -0,0 +1,205 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +import torch +import torch.nn as nn +import torch.distributed as dist + + +def get_range_list_of_3dshape(dim_size, world_size, kernel_size, stride): + def find_last_le_k(arr, k): + return max((element for element in arr if element < k), default=arr[-1]) + + def find_first_ge_k(arr, k): + return next((element for element in arr if element >= k), arr[-1]) + + range_list = [] + stride_index = [i for i in range(0, dim_size, stride)] + for rank in range(world_size): + depth_per_sp = dim_size // world_size + start_idx = find_first_ge_k(stride_index, rank * depth_per_sp) + last_idx = find_last_le_k(stride_index, (rank + 1) * depth_per_sp) + 1 + end_idx = last_idx + kernel_size - 1 if rank < world_size - 1 else dim_size + + range_list.append([start_idx, end_idx]) + return range_list + + +def _split(input_, pg: dist.ProcessGroup, dim=-1, kernel_size=1, stride=1, depth_range=None): + # skip if only one rank involved + world_size = dist.get_world_size(pg) + rank = dist.get_rank(pg) + if world_size == 1: + return input_ + + if depth_range: + start_idx, end_idx = depth_range[rank] + output = input_[:, :, start_idx:end_idx, :, :].contiguous() + return output, None + + # Split along last dimension. + dim_size = input_.size(dim) + + start_end_idx_list = get_range_list_of_3dshape(dim_size, world_size, kernel_size, stride) + start_idx, end_idx = start_end_idx_list[rank] + output = input_[:, :, start_idx:end_idx, :, :].contiguous() + + return output, start_end_idx_list + + +def _gather(input_, pg: dist.ProcessGroup, total_depth, dim=2, kernel_size=1, stride=1, is_forward=True): + input_ = input_.contiguous() + world_size = dist.get_world_size(pg) + padding = 0 # not support padding currently + + # skip if only one rank involved + if world_size == 1: + return input_ + + tensor_list = [] + start_end_idx_list = get_range_list_of_3dshape(total_depth, world_size, kernel_size, stride) + original_start_end_idx_list = [] + conv_start_end_idx_list = [] + + if is_forward: + # forward: build the shapes after conv + last_end_idx = 0 + for start_idx, end_idx in start_end_idx_list: + length = end_idx - start_idx + # O = (W-K+2P)/S + 1 + length = (length - kernel_size + 2 * padding) // stride + 1 + conv_start_end_idx_list.append([last_end_idx, last_end_idx + length]) + last_end_idx = last_end_idx + length + tensor_list.append(torch.empty_like(input_[:, :, 0:1, :, :].expand(-1, -1, length, -1, -1))) + output_start_end_idx_list = conv_start_end_idx_list + else: + # backward: build the original shapes before conv + for start_idx, end_idx in start_end_idx_list: + # O = (W-K+2P)/S + 1 + original_start_end_idx_list.append([start_idx, end_idx]) + tensor_list.append(torch.empty_like(input_[:, :, 0:1, :, :].expand(-1, -1, end_idx - start_idx, -1, -1))) + output_start_end_idx_list = original_start_end_idx_list + + dist.all_gather(tensor_list, input_, group=pg) + output = torch.cat(tensor_list, dim=dim).contiguous() + if not is_forward: + real_output = torch.zeros_like(input_[:, :, 0:1, :, :].expand(-1, -1, total_depth, -1, -1)) + for tensor, idx in zip(tensor_list, output_start_end_idx_list): + start_idx, end_idx = idx + for i in range(start_idx, end_idx): + j = i - start_idx + real_output[:, :, i, :, :] = real_output[:, :, i, :, :] + tensor[:, :, j, :, :] + + output = real_output + return output, output_start_end_idx_list + + +class _ConvGatherForwardSplitBackward(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, process_group, total_depth, dim, kernel_size, stride): + ctx.mode = process_group + ctx.dim = dim + ctx.kernel_size = kernel_size + ctx.stride = stride + output, depth_range = _gather(input_, process_group, total_depth, dim, kernel_size, stride, True) + ctx.depth_range = depth_range + return output + + + @staticmethod + def backward(ctx, grad_output): + output, _ = _split(grad_output, ctx.mode, ctx.dim, ctx.kernel_size, ctx.stride, ctx.depth_range) + return output, None, None, None, None, None, None + + +class _ConvSplitForwardGatherBackward(torch.autograd.Function): + @staticmethod + def forward(ctx, input_, process_group, dim, kernel_size, stride): + ctx.mode = process_group + ctx.dim = dim + ctx.kernel_size = kernel_size + ctx.stride = stride + ctx.total_depth = input_.shape[dim] + output, _ = _split(input_, process_group, dim, kernel_size, stride) + return output + + @staticmethod + def backward(ctx, grad_output): + output, _ = _gather(grad_output, ctx.mode, ctx.total_depth, ctx.dim, ctx.kernel_size, ctx.stride, False) + return output, None, None, None, None, None, None + + +class AllReduceFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, input, conv3d_module, param_async, grad_reduce_handles): + ctx.grad_reduce_handles = grad_reduce_handles + ctx.param_async = param_async + ctx.conv3d = conv3d_module + return input + + @staticmethod + def backward(ctx, grad_output): + for param in ctx.conv3d.parameters(): + if param.grad is not None: + if ctx.param_async: + handle = torch.distributed.all_reduce(param.grad, op=torch.distributed.ReduceOp.SUM, async_op=True) + ctx.grad_reduce_handles.append(handle) + else: + torch.distributed.all_reduce(param.grad, op=torch.distributed.ReduceOp.SUM) + return grad_output, None, None, None + + +class Conv3DSequenceParallel(nn.Module): + def __init__(self, + pg: dist.ProcessGroup, + in_channels, + out_channels, + kernel_size=(1, 1, 1), + stride=(1, 1, 1), + dilation=1, + bias=True, + param_async=False, + dtype=torch.bfloat16, + sp_size=1): + super(Conv3DSequenceParallel, self).__init__() + self.sp_size = sp_size + self.depth_kernel_size = kernel_size[0] + self.depth_stride = stride[0] + self.param_async = param_async + self.padding = 0 # not support padding currently + self.pg = pg + self.world_size = dist.get_world_size(pg) + self.grad_reduce_handles = [] + + self.conv3d = nn.Conv3d( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=self.padding, + dilation=dilation, + bias=bias + ).npu().to(dtype) + + def forward(self, x): + depth = x.shape[2] # [batch_size, in_channels, depth, height, width] + + # O = (W-K+2P)/S + 1 + depth_after_conv = (depth - self.depth_kernel_size + 2 * self.padding) // self.depth_stride + 1 + if self.sp_size > 1 and (depth_after_conv // self.world_size) > 0: + x = AllReduceFunction.apply(x, self.conv3d, self.param_async, self.grad_reduce_handles) + x = _ConvSplitForwardGatherBackward.apply(x, self.pg, 2, self.depth_kernel_size, self.depth_stride) + + x = self.conv3d(x) + + if self.sp_size > 1 and (depth_after_conv // self.world_size) > 0: + x = _ConvGatherForwardSplitBackward.apply(x, self.pg, depth, 2, self.depth_kernel_size, self.depth_stride) + + return x + + def get_param_grad_reduce_handles(self): + return self.grad_reduce_handles + + def wait_param_grad_reduce_handles(self): + for handle in self.grad_reduce_handles: + handle.wait() + self.grad_reduce_handles = [] diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/__init__.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/__init__.py new file mode 100644 index 000000000..985e3ed26 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/__init__.py @@ -0,0 +1 @@ +from . import communication, config, inner_data_parallel, pipeline_parallel, parallel_state, training, checkpointing diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/checkpointing.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/checkpointing.py new file mode 100644 index 000000000..3faf3f1d0 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/checkpointing.py @@ -0,0 +1,48 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import os +from functools import wraps +from megatron.core import mpu +from .config.dist_train_config import get_dist_model_name + + +def get_checkpoint_name_wrapper(get_checkpoint_name): + @wraps(get_checkpoint_name) + def wrapper(*args, **kwargs): + return _get_checkpoint_name(*args, **kwargs) + return wrapper + + +def _get_checkpoint_name(checkpoints_path, iteration, release=False, **kwargs): + if release: + directory = 'release' + else: + directory = 'iter_{:07d}'.format(iteration) + if kwargs.get('return_base_dir', False): + common_path = os.path.join(checkpoints_path, directory) + return common_path + + pipeline_parallel = (mpu.get_pipeline_model_parallel_world_size() > 1) + tensor_rank = mpu.get_tensor_model_parallel_rank() + pipeline_rank = mpu.get_pipeline_model_parallel_rank() + model_name = get_dist_model_name() + if not pipeline_parallel: + common_path = os.path.join(checkpoints_path, directory, + f'mp_{model_name}_rank_{tensor_rank:02d}') + else: + common_path = os.path.join(checkpoints_path, directory, + f'mp_{model_name}_rank_{tensor_rank:02d}_{pipeline_rank:03d}') + + return os.path.join(common_path, "model_optim_rng.pt") diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/communication/__init__.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/communication/__init__.py new file mode 100644 index 000000000..ae82f5bfb --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/communication/__init__.py @@ -0,0 +1,2 @@ +from .dist_communication import send_recv_tensor_list, generate_send_recv_mask +from .dist_ranks_match import get_dst_ranks diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/communication/dist_communication.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/communication/dist_communication.py new file mode 100644 index 000000000..7c4833195 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/communication/dist_communication.py @@ -0,0 +1,230 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +from typing import Optional, Sequence, Tuple, List, Dict +import torch +import torch_npu +from ..parallel_state import _is_pipeline_first_stage, _is_pipeline_last_stage, get_global_pipeline_parallel_rank +from ..config.dist_train_config import get_dist_model_index, get_rank_number_to_model_index, get_dist_model_config + +TENSOR_SYNC_TOOL: "TensorSyncTool" = None + + +def init_tensor_sync_tool(): + global TENSOR_SYNC_TOOL + if TENSOR_SYNC_TOOL is None: + TENSOR_SYNC_TOOL = TensorSyncTool() + return TENSOR_SYNC_TOOL + + +class TensorSyncTool: + def __init__(self): + dtypes = [] + for name in dir(torch): + attr = getattr(torch, name) + if isinstance(attr, torch.dtype) and attr not in torch_npu.unsupported_dtype: + dtypes.append(attr) + # Sorting enables different machines to obtain dtypes in the same sequence. + dtypes = sorted(set(dtypes), key=lambda x: str(x)) + + self.type_to_int = {None: -1} + self.type_to_int.update({dtype: i for i, dtype in enumerate(dtypes)}) + self.int_to_type = {v: k for k, v in self.type_to_int.items()} + # fixed_header_len (10) = dtype (1) + req_grads (1) + len_shape (1) + shape (x) + pad(10 - 3 - x) + # Thus, the maximum dimension of tensors that can be supported here is 7. + self.fixed_header_len = 10 + + def encode_tensor_header(self, tensor: torch.Tensor): + """ + | int32 | int32 | int32 | int32 | int32 | + | type | req_grads | len(shape) | shape | pad | + """ + header = [0] * self.fixed_header_len + + header[0] = self.type_to_int.get(tensor.dtype, -1) if tensor is not None else -1 + if header[0] not in self.type_to_int.values(): + if header[0] == -1: # `-1` matches `None` + return header + raise RuntimeError(f"The tensor dtype is not supported or recorded on this device: {tensor.dtype}") + header[1] = int(tensor.requires_grad) + header[2] = len(tensor.shape) + if self.fixed_header_len - 3 < len(tensor.shape): # `3` equals the len of [dtype, req_grads, len_shape] + raise ValueError('`len(tensor.shape)` is too long to be stored in the remaining space of the header.') + header[3:] = tensor.shape + + device = torch.npu.current_device() + index = list(range(len(header))) + index = torch.tensor(index, dtype=torch.int32, device=device) + header = torch.tensor(header, dtype=torch.int32, device=device) + header_tensor = torch.zeros(TENSOR_SYNC_TOOL.fixed_header_len, dtype=torch.int32, device=device) + header_tensor.scatter_(0, index, header) + return header_tensor + + def decode_tensor_header(self, header_tensor: torch.Tensor): + dtype = self.int_to_type.get(int(header_tensor[0]), None) + if dtype is None: + return dtype, None, None + requires_grad = bool(header_tensor[1]) + shape_len = header_tensor[2] + shape = header_tensor.tolist()[3:3 + shape_len] + return dtype, shape, requires_grad + + +def send_recv(tensor: Optional[torch.Tensor], is_recv: bool, ranks: Sequence) -> Optional[Sequence[torch.Tensor]]: + """ + force_send is used for text_only backward situations.pre_subworld skips backward if recv None tensor. + """ + if isinstance(tensor, Sequence): + tensor = tensor[0] + + recv_tensor = None + # To prevent deadlocks caused by different pipeline stages receiving tensor simultaneously. + if not get_global_pipeline_parallel_rank() % 2: + if tensor is not None: + _send_tensor(tensor, ranks) + if is_recv: + recv_tensor = _recv_tensor(ranks) + else: + if is_recv: + recv_tensor = _recv_tensor(ranks) + if tensor is not None: + _send_tensor(tensor, ranks) + + if is_recv and not isinstance(recv_tensor, list): + recv_tensor = [recv_tensor] + + return recv_tensor + + +def send_recv_tensor_list( + tensor_list: Optional[Sequence[torch.Tensor]], + is_recv: bool, + dst_ranks: Sequence[int], +) -> Optional[Sequence[Sequence[torch.Tensor]]]: + if tensor_list is None: + if not is_recv: + raise ValueError('`tensor_list` can be set to `None` only on the receive side.') + elif isinstance(tensor_list, Sequence) and len(tensor_list) > 0 and isinstance(tensor_list[0], Sequence): + tensor_list = tensor_list[0] + else: + if not isinstance(tensor_list, Sequence): + raise TypeError(f'`tensor_list` is an unsupported type: {type(tensor_list)}') + if not isinstance(tensor_list[0], torch.Tensor): + raise TypeError(f'item of `tensor_list` is an unsupported type: {type(tensor_list[0])}') + + tensor_list_ret = None + # To prevent deadlocks caused by different pipeline stages receiving tensor simultaneously. + if not get_global_pipeline_parallel_rank() % 2: + if tensor_list is not None: + send_tensor_list(tensor_list, dst_ranks) + if is_recv: + tensor_list_ret = recv_tensor_list(dst_ranks) + else: + if is_recv: + tensor_list_ret = recv_tensor_list(dst_ranks) + if tensor_list is not None: + send_tensor_list(tensor_list, dst_ranks) + + return tensor_list_ret + + +def recv_tensor_list(src_ranks: Sequence[int]) -> Optional[Sequence[Sequence[torch.Tensor]]]: + tensor_list_len = [] + recv_tensor = torch.tensor([0], device=torch.npu.current_device()) + for rank in src_ranks: + torch.distributed.recv(recv_tensor, rank) + tensor_list_len.append(recv_tensor.item()) + + if not all(tensor_list_len[0] == len_ for len_ in tensor_list_len[1:]): + raise ValueError(f'Tensor sequences of different lengths cannot be received from different cards.') + tensor_list_ret = [_recv_tensor(src_ranks) for _ in range(tensor_list_len[0])] + + return tensor_list_ret + + +def send_tensor_list(tensor_list: Optional[Sequence[torch.Tensor]], dst_ranks: Sequence[int]) -> None: + tensor_list_len = len(tensor_list) + if tensor_list_len == 0: + return + send_tensor = torch.tensor([tensor_list_len], device=torch.npu.current_device()) + for rank in dst_ranks: + torch.distributed.send(send_tensor, rank) + for i in range(tensor_list_len): + _send_tensor(tensor_list[i], dst_ranks) + + +def _send_header(tensor: torch.Tensor, dst: int) -> None: + header_tensor = TENSOR_SYNC_TOOL.encode_tensor_header(tensor) + torch.distributed.send(header_tensor, dst) + + +def _send_tensor(tensor: torch.tensor, dst_ranks: Sequence) -> None: + if tensor is None: + return + for dst in dst_ranks: + _send_header(tensor, dst) + torch.distributed.send(tensor=tensor, dst=dst) + + +def _recv_header(src: int) -> Tuple[Optional[torch.dtype], Optional[List[int]], Optional[bool]]: + device = torch.npu.current_device() + header_tensor = torch.zeros(TENSOR_SYNC_TOOL.fixed_header_len, dtype=torch.int32, device=device) + torch.distributed.recv(header_tensor, src) + header = TENSOR_SYNC_TOOL.decode_tensor_header(header_tensor) + return header + + +def _recv_tensor(dst_ranks: Sequence) -> Optional[Sequence[torch.Tensor]]: + """Asynchronously receiving tensors + + first receive the shape and dtype, use these to initialize an empty tensor, + then receive the tensor data, and finally return the tensor. + """ + recv_tensors = [] + for rank in dst_ranks: + # recv header + dtype, shape, requires_grad = _recv_header(rank) + device = torch.npu.current_device() + if dtype is None: + print('[WARNING] Get dtype=None from received header.') + return None + # recv tensor + tensor_recv_prev = torch.empty(tuple(shape), dtype=dtype, device=device, requires_grad=requires_grad) + torch.distributed.recv(tensor=tensor_recv_prev, src=rank) + + recv_tensors.append(tensor_recv_prev) + return recv_tensors + + +def generate_send_recv_mask(rank: int = None) -> Dict[str, bool]: + model_index = get_dist_model_index(rank) + rank_number_to_model_index = get_rank_number_to_model_index() + if model_index not in rank_number_to_model_index: + raise RuntimeError(f"model_index ({model_index}) not in _RANK_NUMBER_TO_MODEL_INDEX") + + result = { + 'send_forward': False, + 'send_backward': False, + 'recv_forward': False, + 'recv_backward': False + } + if _is_pipeline_first_stage(is_global=False): + for i, index in enumerate(rank_number_to_model_index): + if index < model_index: + result['recv_forward'] = True + if (not get_dist_model_config(rank=i).forward_only) \ + and (not get_dist_model_config(rank=rank).forward_only): + result['send_backward'] = True + break + + if _is_pipeline_last_stage(is_global=False): + for i, index in enumerate(rank_number_to_model_index): + if index > model_index: + result['send_forward'] = True + if (not get_dist_model_config(rank=i).forward_only) \ + and (not get_dist_model_config(rank=rank).forward_only): + result['recv_backward'] = True + break + + return result + + +init_tensor_sync_tool() diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/communication/dist_ranks_match.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/communication/dist_ranks_match.py new file mode 100644 index 000000000..a048705b2 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/communication/dist_ranks_match.py @@ -0,0 +1,116 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +from itertools import accumulate +import torch.distributed as dist +from ..config import dist_train_config as config + + +""" +key:int: cur rank +value:list: dst ranks +""" +_MODEL_COMM_RANKS = {} + + +def generate_model_comm_ranks(pp_ranks_prev: [[]], tp_ranks_prev: [[]], pp_ranks_last: [[]], tp_ranks_last: [[]]): + global _MODEL_COMM_RANKS + if _MODEL_COMM_RANKS and config.get_all_config_size() != 2: + # If the size is 2, this method is expected to be invoked only once. + raise RuntimeError(f'Get config size ({config.get_all_config_size()}) is not equal to 2, ' + f'and _MODEL_COMM_RANKS is initialized.') + tp_ranks_prev_ = [] + tp_ranks_last_ = [] + + # Take the ranks of the last stage of 'prev' and the first stage of 'last'. + for pp_ranks in pp_ranks_prev: + for tp_ranks in tp_ranks_prev: + if pp_ranks[-1] in tp_ranks and tp_ranks not in tp_ranks_prev_: + tp_ranks_prev_.append(tp_ranks) + + for pp_ranks in pp_ranks_last: + for tp_ranks in tp_ranks_last: + if pp_ranks[0] in tp_ranks and tp_ranks not in tp_ranks_last_: + tp_ranks_last_.append(tp_ranks) + + if not (len(tp_ranks_prev_) and len(tp_ranks_last_)): + raise ValueError("tp ranks must not empty") + + # Place the TP units with fewer counts at the front and those with more at the back, + # so that when generating the forward correspondence, it traverses through fewer iterations. + if len(tp_ranks_prev_) > len(tp_ranks_last_): + tp_ranks_prev_, tp_ranks_last_ = tp_ranks_last_, tp_ranks_prev_ + + # Generate correspondence. + lens_last = get_size_list(len(tp_ranks_last_), len(tp_ranks_prev_), 1) + index_for_last = [0] + list(accumulate(lens_last)) + ranks_dict_prev = {} + for i, prev_ranks in enumerate(tp_ranks_prev_): + last_ranks = [rank for lst in tp_ranks_last_[index_for_last[i]: index_for_last[i + 1]] for rank in lst] + num_take_last = lens_last[i] # The actual number of data sets taken from tp_ranks_last_ in this round. + num_unit_last = len(tp_ranks_last_[0]) + + # Place the elements with fewer counts at the front and those with more at the back, + # to facilitate the execution of the general logic. + if len(last_ranks) < len(prev_ranks): + prev_ranks, last_ranks = last_ranks, prev_ranks + num_take_last = 1 # Only one sublist will be extracted from tp_ranks_Prev_ in each round. + num_unit_last = len(tp_ranks_prev_[0]) + + # Establish the corresponding relationships. + per_ranks = get_size_list(len(last_ranks), len(prev_ranks), num_unit_last) + index_for_prev = [0] + list(accumulate(per_ranks)) + for j, rank_ in enumerate(prev_ranks): + ranks_dict_prev[rank_] = last_ranks[index_for_prev[j]: index_for_prev[j + 1]] + + print(f"rank={dist.get_rank()}, num_take_last: {num_take_last}, num_unit_last: {num_unit_last}, " + f"prev: {prev_ranks}, last: {last_ranks}") + + # Conversely, establish the corresponding relationships again; + # currently, this is only compatible with scenarios where the model is divided into two parts. + ranks_dict_last = {last: [prev] for prev in ranks_dict_prev for last in ranks_dict_prev.get(prev, None)} + if None in ranks_dict_last.keys(): + raise KeyError('Found unexpected keys in `ranks_dict_last`') + + # Update data + keys = ranks_dict_prev.keys() | ranks_dict_last.keys() + for k in keys: + _MODEL_COMM_RANKS[k] = _MODEL_COMM_RANKS.get(k, []) + ranks_dict_prev.get(k, []) + ranks_dict_last.get(k, []) + + +def get_dst_ranks(rank=None): + global _MODEL_COMM_RANKS + if rank is None: + rank = dist.get_rank() + + return _MODEL_COMM_RANKS.get(rank, None) + + +def clear_model_comm_ranks(): + global _MODEL_COMM_RANKS + _MODEL_COMM_RANKS = {} + + +def get_size_list(sum_, len_, base_): + """ + sum, len, base: + 12, 2, 7 => 12, 2, 6 => [6, 6] base is too large, let the base cycle subtract 1 first + 15, 2, 5 => [5, 5] => [10, 5] base is appropriate, try to allocate with multiple of base num + 12, 2, 5 => [5, 5] => [6, 6] base is too small, try to allocate as much as possible + """ + if not all(isinstance(num, int) for num in (sum_, len_, base_)): + raise ValueError("sum_, base_ and len_ must be integers.") + if base_ <= 0 or len_ <= 0: + raise ValueError("base_ and len_ cannot be zero.") + while sum_ // base_ < len_: + base_ -= 1 + list_base_ = sum_ // len_ // base_ * base_ + list_ = [list_base_ for _ in range(len_)] + rem_ = sum_ - len_ * list_base_ + base_ = base_ if rem_ % base_ == 0 else 1 + index_ = 0 + + while rem_ > 0: + list_[index_ % len_] += base_ + rem_ -= base_ + index_ += 1 + + return list_ diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/config/__init__.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/config/__init__.py new file mode 100644 index 000000000..51c120956 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/config/__init__.py @@ -0,0 +1,5 @@ +from .dist_train_config import ( + get_all_config, get_dist_model_config, get_dist_model_index, get_rank_number_to_model_index, get_all_config_size, + get_dist_model_name, get_rank_number_to_model_name, get_dist_global_model_index, + merge_dist_train_args, is_forward_only_model +) diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/config/dist_train_config.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/config/dist_train_config.py new file mode 100644 index 000000000..ab3b8c075 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/config/dist_train_config.py @@ -0,0 +1,322 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import os +import json +import torch.distributed + +_ALL_CONFIG = {} # {name: DetachedConfig()} +# model_idx: 0 1 +# vae rank0 ↘ +# vit rank2,3 +# t5 rank1 ↗ +_RANK_NUMBER_TO_MODEL_INDEX = [] # rank index (list index) -- model index -- [0, 0, 1, 1] +_RANK_NUMBER_TO_MODEL_NAME = [] # rank index (list index) -- model name -- ['vae', 't5', 'vit', 'vit'] +_NUMBER_OF_MODELS = 0 +_USE_MULTIPARAM_SEND_RECV = False +_ALL_DIST_MODEL_INDEX = [] +_ALL_DIST_MODEL_NAME = [] +_ALL_DIST_MODEL_CONFIG = [] +_SUPPORT_MODEL_NAME = {"internvl2": ["vit", "gpt"], "opensoraplan1.3": ["vae", "dit"]} + + +class ContextKey: + DIST_CONFIG = 'dist_config' + # global config keys + MODEL_CONFIG = 'model_config' + USE_MULTIPARAM_SEND_RECV = 'use_multiparam_send_recv' + MODEL_NAME = 'model_name' + # model config keys + NAME = 'name' + MODEL_INDEX = 'model_index' + WORLD_SIZE = 'world_size' + TENSOR_MODEL_PARALLEL_SIZE = 'tensor_model_parallel_size' + PIPELINE_MODEL_PARALLEL_SIZE = 'pipeline_model_parallel_size' + CONTEXT_PARALLEL_SIZE = 'context_parallel_size' + MAIN_DP = 'main_dp' + FORWARD_ONLY = 'forward_only' + + +CK = ContextKey() + + +class ModelConfig: + def __init__(self, config_dict: dict, start_rank): + self._keys = {CK.NAME, CK.MODEL_INDEX, CK.WORLD_SIZE, CK.TENSOR_MODEL_PARALLEL_SIZE, + CK.PIPELINE_MODEL_PARALLEL_SIZE, CK.CONTEXT_PARALLEL_SIZE, CK.FORWARD_ONLY, CK.MAIN_DP} + self._base_validate(config_dict) + + setattr(self, CK.NAME, None) + setattr(self, CK.MODEL_INDEX, None) + setattr(self, CK.WORLD_SIZE, None) + setattr(self, CK.TENSOR_MODEL_PARALLEL_SIZE, 1) + setattr(self, CK.PIPELINE_MODEL_PARALLEL_SIZE, 1) + setattr(self, CK.CONTEXT_PARALLEL_SIZE, 1) + setattr(self, CK.FORWARD_ONLY, False) + setattr(self, CK.MAIN_DP, False) + self._set_single_model_config(config_dict) + + # Additional generated attributes. + self.start_rank = start_rank + self.ranks = list(range(self.start_rank, self.start_rank + getattr(self, CK.WORLD_SIZE))) + + def __getitem__(self, key): + return getattr(self, key) + + def __setitem__(self, key, value): + setattr(self, key, value) + + def __delitem__(self, key): + delattr(self, key) + + def __repr__(self): + repr_str = '(' + for k in self._keys: + repr_str += f'{k}: {getattr(self, k)}, ' + repr_str = repr_str.rstrip(', ') + ')' + return repr_str + + def _set_single_model_config(self, config_dict): + for k, v in config_dict.items(): + setattr(self, k, v) + self._keys.add(k) + + def _base_validate(self, ori_cfg): + # startswith + if any(key.startswith('_') for key in ori_cfg.keys()): + raise ValueError('The configuration item field cannot start with an underscore (_) ' + 'to prevent unexpected overwriting.') + # check valid key + valid_keys = list(self._keys) + invalid_keys = [key for key in ori_cfg if key not in valid_keys] + if invalid_keys: + raise KeyError(f"The following keys in DistTrain config are not valid: {invalid_keys}") + # world_size + world_size = ori_cfg.get(CK.WORLD_SIZE) + if not (isinstance(world_size, int) and world_size > 0): + raise ValueError(f'`{CK.WORLD_SIZE}` ({world_size}) should be greater than or equal to 0') + # parallel + tp_size = ori_cfg.get(CK.TENSOR_MODEL_PARALLEL_SIZE, 1) + pp_size = ori_cfg.get(CK.PIPELINE_MODEL_PARALLEL_SIZE, 1) + cp_size = ori_cfg.get(CK.CONTEXT_PARALLEL_SIZE, 1) + if not (isinstance(tp_size, int) and tp_size > 0): + raise ValueError(f'`{CK.TENSOR_MODEL_PARALLEL_SIZE}` ({tp_size}) should be greater than 0') + if not (isinstance(pp_size, int) and pp_size > 0): + raise ValueError(f'`{CK.PIPELINE_MODEL_PARALLEL_SIZE}` ({pp_size}) should be greater than 0') + if not (isinstance(cp_size, int) and cp_size > 0): + raise ValueError(f'`{CK.CONTEXT_PARALLEL_SIZE}` ({cp_size}) should be greater than 0') + if world_size % (tp_size * pp_size * cp_size): + raise ValueError((f'`{CK.WORLD_SIZE}` ({world_size}) should be divisible by the product of ' + f'`{CK.TENSOR_MODEL_PARALLEL_SIZE}` ({tp_size}), `{CK.PIPELINE_MODEL_PARALLEL_SIZE}` ' + f'({pp_size}), and `{CK.CONTEXT_PARALLEL_SIZE}` ({cp_size})')) + if CK.FORWARD_ONLY in ori_cfg and not isinstance(ori_cfg.get(CK.FORWARD_ONLY), bool): + raise TypeError(f"The `{CK.FORWARD_ONLY}` value type must be bool.") + + +def validate_configs_world_size(args): + world_size = 0 + for cfg in _ALL_CONFIG.values(): + world_size += cfg[CK.WORLD_SIZE] + if world_size != args.world_size: + raise ValueError('The sum of `world_size` in config must be equal to the actual `world_size`.') + + +def get_all_config(): + return _ALL_CONFIG + + +def get_all_config_size(): + return len(_ALL_CONFIG) + + +def get_rank_number_to_model_index(): + return _RANK_NUMBER_TO_MODEL_INDEX + + +def get_rank_number_to_model_name(): + return _RANK_NUMBER_TO_MODEL_NAME + + +def get_dist_model_name(rank: int = None, global_index: int = None) -> str: + if global_index is not None: + if not (0 - _NUMBER_OF_MODELS <= global_index < _NUMBER_OF_MODELS): + raise ValueError(f'`global_index` must between `0 - _NUMBER_OF_MODELS` ({0 - _NUMBER_OF_MODELS}) ' + f'and `_NUMBER_OF_MODELS` ({_NUMBER_OF_MODELS})') + key = list(_ALL_CONFIG.keys())[global_index] + index_name = _ALL_CONFIG[key][CK.NAME] + if rank is None: + return index_name + else: + if not (0 <= rank < len(_RANK_NUMBER_TO_MODEL_NAME)): + raise IndexError(f'{rank=} should between 0 and {len(_RANK_NUMBER_TO_MODEL_NAME)=}, ' + f'check the config file and launch params') + name = _RANK_NUMBER_TO_MODEL_NAME[rank] + if index_name != name: + raise RuntimeError(f'{rank=}, `{index_name}` should equals `{name}`') + return name + + if rank is None: + rank = torch.distributed.get_rank() + if not (0 <= rank < len(_RANK_NUMBER_TO_MODEL_NAME)): + raise IndexError(f'{rank=} should between 0 and {len(_RANK_NUMBER_TO_MODEL_NAME)=}, ' + f'check the config file and launch params') + + name = _RANK_NUMBER_TO_MODEL_NAME[rank] + return name + + +def get_dist_model_config(name: str = None, rank: int = None, global_index: int = None): + if global_index is not None: + if not (0 - _NUMBER_OF_MODELS <= global_index < _NUMBER_OF_MODELS): + raise ValueError(f'`global_index` must between `0 - _NUMBER_OF_MODELS` ({0 - _NUMBER_OF_MODELS}) ' + f'and `_NUMBER_OF_MODELS` ({_NUMBER_OF_MODELS})') + if name is not None: + if rank is not None or global_index is not None: + if name != get_dist_model_name(rank, global_index): + raise RuntimeError(f'{rank=}, `{name}` should equals `{get_dist_model_name(rank, global_index)}`') + else: + name = get_dist_model_name(rank, global_index) + if name not in _ALL_CONFIG.keys(): + raise KeyError(f'{name=} not in {_ALL_CONFIG.keys()=}') + return _ALL_CONFIG[name] + + +def get_dist_model_index(rank: int = None) -> int: + if rank is None: + rank = torch.distributed.get_rank() + if not (0 - len(_RANK_NUMBER_TO_MODEL_INDEX) <= rank < len(_RANK_NUMBER_TO_MODEL_INDEX)): + raise IndexError(f'{0 - len(_RANK_NUMBER_TO_MODEL_INDEX)=} <= {rank=} < {len(_RANK_NUMBER_TO_MODEL_INDEX)=}, ' + f'check the config file and launch params') + return _RANK_NUMBER_TO_MODEL_INDEX[rank] + + +def get_dist_global_model_index(rank: int = None) -> int: + name = get_dist_model_name(rank) + keys = _ALL_CONFIG.keys() + return list(keys).index(name) + + +def is_use_multiparam_send_recv(): + return _USE_MULTIPARAM_SEND_RECV + + +def _read_json(json_path): + try: + with open(json_path, mode="r") as f: + json_file = f.read() + configs_list = json.loads(json_file) + return configs_list + except FileNotFoundError as e: + raise FileNotFoundError(f"The file {json_path} does not exist.") from e + except json.JSONDecodeError as e: + raise ValueError(f"The file {json_path} is not a valid JSON file.") from e + except Exception as e: + raise RuntimeError(f"An unexpected error occurred: {e}") from e + + +def _check_config(config_dict): + if CK.MODEL_CONFIG not in config_dict.keys(): + raise KeyError(f"The `{CK.MODEL_CONFIG}` key does not exist in DistTrain config.") + if CK.USE_MULTIPARAM_SEND_RECV in config_dict.keys() and not isinstance(config_dict[CK.USE_MULTIPARAM_SEND_RECV], bool): + raise TypeError(f"The `{CK.USE_MULTIPARAM_SEND_RECV}` value type must be bool.") + if CK.MODEL_NAME not in config_dict.keys(): + raise KeyError(f"The `{CK.MODEL_NAME}` key does not exist in DistTrain config.") + if not isinstance(config_dict[CK.MODEL_NAME], str): + raise TypeError(f"The `{CK.MODEL_NAME}` value type must be string.") + global _SUPPORT_MODEL_NAME + if config_dict[CK.MODEL_NAME] not in _SUPPORT_MODEL_NAME: + raise ValueError(f"The `{CK.MODEL_NAME}` current not support.") + valid_keys = [CK.MODEL_CONFIG, CK.USE_MULTIPARAM_SEND_RECV, CK.MODEL_NAME] + invalid_keys = [key for key in config_dict.keys() if key not in valid_keys] + if invalid_keys: + raise KeyError(f"Get unexpected keywords: {invalid_keys}") + if not isinstance(config_dict[CK.MODEL_CONFIG], list): + raise TypeError(f"The `{CK.MODEL_CONFIG}` type must be list.") + if not config_dict[CK.MODEL_CONFIG]: + raise ValueError(f"The `{CK.MODEL_CONFIG}` must not be empty.") + global _ALL_DIST_MODEL_INDEX, _ALL_DIST_MODEL_NAME, _ALL_DIST_MODEL_CONFIG + _ALL_DIST_MODEL_INDEX = [config.get(CK.MODEL_INDEX) for config in config_dict[CK.MODEL_CONFIG]] + _ALL_DIST_MODEL_NAME = [config.get(CK.NAME) for config in config_dict[CK.MODEL_CONFIG]] + _ALL_DIST_MODEL_CONFIG = config_dict[CK.MODEL_CONFIG] + if not all(key in config.keys() for config in _ALL_DIST_MODEL_CONFIG for key in [CK.NAME, CK.WORLD_SIZE, CK.MODEL_INDEX]): + raise ValueError(f"At least three items must be configured: `{CK.NAME}`, `{CK.WORLD_SIZE}`, and `{CK.MODEL_INDEX}`.") + if not all(isinstance(name, str) for name in _ALL_DIST_MODEL_NAME): + raise TypeError(f"The `{CK.NAME}` value type must be str.") + if len(_ALL_DIST_MODEL_NAME) != len(set(_ALL_DIST_MODEL_NAME)): + raise ValueError(f"`{CK.NAME}` is duplicate in DistTrain config.") + if not all(name.isidentifier() for name in _ALL_DIST_MODEL_NAME): + raise ValueError(f"`{CK.NAME}` is not a valid string.") + valid_names = _SUPPORT_MODEL_NAME.get(config_dict[CK.MODEL_NAME]) + if len(_ALL_DIST_MODEL_NAME) != len(valid_names): + raise ValueError(f"`{config_dict[CK.MODEL_NAME]}` model current only support {valid_names}.") + if not all(isinstance(index, int) for index in _ALL_DIST_MODEL_INDEX): + raise TypeError(f"The `{CK.MODEL_INDEX}` value type must be int.") + _ALL_DIST_MODEL_INDEX.sort() + if not all(_ALL_DIST_MODEL_INDEX[i] - _ALL_DIST_MODEL_INDEX[i - 1] == 1 for i in range(1, len(_ALL_DIST_MODEL_INDEX))): + raise ValueError(f"`{CK.MODEL_INDEX}` must be continuous.") + + # 把model_index升序的name保存 + combined = list(zip(_ALL_DIST_MODEL_INDEX, _ALL_DIST_MODEL_CONFIG)) + combined.sort(key=lambda x: x[0]) + _, _ALL_DIST_MODEL_CONFIG = list(zip(*combined)) + if _ALL_DIST_MODEL_CONFIG[0][CK.MODEL_INDEX] < 0: + raise ValueError(f"`{CK.MODEL_INDEX}` must start from 0.") + if not all(name == valid for name, valid in zip(_ALL_DIST_MODEL_NAME, valid_names)): + raise ValueError(f"`{CK.NAME}` sequence is incorrect, {config_dict[CK.MODEL_NAME]} " + f"model name list strictly follow the sequence [{valid_names}].") + if not all( + isinstance(config.get(CK.MAIN_DP), bool) + for config in _ALL_DIST_MODEL_CONFIG + if CK.MAIN_DP in config + ): + raise TypeError(f"The `{CK.MAIN_DP}` value type must be bool.") + if sum(1 for config in _ALL_DIST_MODEL_CONFIG if config.get(CK.MAIN_DP, False)) > 1: + raise ValueError(f"Only one `{CK.MAIN_DP}` can be true.") + + +def _set_config(config_dict): + _check_config(config_dict) + global _NUMBER_OF_MODELS, _ALL_DIST_MODEL_CONFIG + _NUMBER_OF_MODELS = len(_ALL_DIST_MODEL_CONFIG) + config_dict[CK.MODEL_CONFIG] = _ALL_DIST_MODEL_CONFIG + # Save the config in ascending order by name. + for k, v in config_dict.items(): + if k == CK.USE_MULTIPARAM_SEND_RECV: + global _USE_MULTIPARAM_SEND_RECV + _USE_MULTIPARAM_SEND_RECV = v + elif k == CK.MODEL_CONFIG: + global _ALL_CONFIG, _RANK_NUMBER_TO_MODEL_NAME, _RANK_NUMBER_TO_MODEL_INDEX + for model_config in v: # v == [{}, {}, {}, ...] + _ALL_CONFIG[model_config.get(CK.NAME)] = ModelConfig(model_config, len(_RANK_NUMBER_TO_MODEL_INDEX)) + _RANK_NUMBER_TO_MODEL_INDEX.extend([model_config.get(CK.MODEL_INDEX)] * model_config.get(CK.WORLD_SIZE)) + _RANK_NUMBER_TO_MODEL_NAME.extend([model_config.get(CK.NAME)] * model_config.get(CK.WORLD_SIZE)) + print(f"{_ALL_CONFIG=}\n{_RANK_NUMBER_TO_MODEL_NAME=}\n{_RANK_NUMBER_TO_MODEL_INDEX=}") + + +def _clear_dist_config(): + global _ALL_CONFIG, _RANK_NUMBER_TO_MODEL_NAME, _RANK_NUMBER_TO_MODEL_INDEX, _NUMBER_OF_MODELS, \ + _USE_MULTIPARAM_SEND_RECV, _ALL_DIST_MODEL_INDEX, _ALL_DIST_MODEL_NAME, _ALL_DIST_MODEL_CONFIG + _ALL_CONFIG = {} + _RANK_NUMBER_TO_MODEL_NAME = [] + _RANK_NUMBER_TO_MODEL_INDEX = [] + _NUMBER_OF_MODELS = 0 + _USE_MULTIPARAM_SEND_RECV = False + _ALL_DIST_MODEL_INDEX = [] + _ALL_DIST_MODEL_NAME = [] + _ALL_DIST_MODEL_CONFIG = [] + + +def merge_dist_train_args(path): + real_path = os.path.realpath(path) + if real_path.endswith(".json"): # MindSpeed-MM use json config + config = _read_json(real_path) + if isinstance(config, dict): + config = config.get(CK.DIST_CONFIG, {}) + else: + raise ValueError('Unexpected json file, not contain dist_config dict data.') + else: + raise TypeError("Unexpected file type.") + _clear_dist_config() + _set_config(config) + + +def is_forward_only_model(name: str = None, rank: int = None, global_index: int = None): + return get_dist_model_config(name, rank, global_index)[CK.FORWARD_ONLY] diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/__init__.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/inner_data_parallel.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/inner_data_parallel.py new file mode 100644 index 000000000..972f42e92 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/inner_data_parallel.py @@ -0,0 +1,43 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import torch +from .. import parallel_state as dist_ps + + +@dist_ps.subwrold_decorator +def get_inner_data_parallel_group(): + """Get the inner data parallel group the caller rank belongs to.""" + if dist_ps._INNER_DATA_PARALLEL_GROUP is None: + raise RuntimeError('inner data parallel group is not initialized') + return dist_ps._INNER_DATA_PARALLEL_GROUP + + +@dist_ps.subwrold_decorator +def get_inner_data_parallel_world_size(): + """Return world size for the inner data parallel group.""" + if torch.distributed.is_available() and torch.distributed.is_initialized(): + return torch.distributed.get_world_size( + group=get_inner_data_parallel_group() + ) + else: + return 0 + + +@dist_ps.subwrold_decorator +def get_inner_data_parallel_rank(): + """Return my rank for the inner data parallel group.""" + if torch.distributed.is_available() and torch.distributed.is_initialized(): + return torch.distributed.get_rank( + group=get_inner_data_parallel_group() + ) + else: + return 0 + + +def get_inner_data_parallel_src_rank(): + """Calculate the global rank corresponding to the first local rank in the inner data parallel group.""" + if dist_ps._CUR_SUB_WORLD is None: + return 0 + global_rank = (torch.distributed.get_rank() - dist_ps._CUR_SUB_WORLD.start_rank) + local_world_size = get_inner_data_parallel_world_size() + return (global_rank // local_world_size) * local_world_size + dist_ps._CUR_SUB_WORLD.start_rank diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/mappings.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/mappings.py new file mode 100644 index 000000000..717bd77a0 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/mappings.py @@ -0,0 +1,83 @@ +# Copied from Megatron-LM: https://github.com/NVIDIA/Megatron-LM +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import torch +from mindspeed.core.tensor_parallel.comm_utils import ( + _split_along_first_dim, + sync_gather_along_first_dim, + sync_reduce_scatter_along_first_dim +) +from mindspeed.core.tensor_parallel.comm_group_api import CollectiveCommIntf +from .inner_data_parallel import ( + get_inner_data_parallel_group, + get_inner_data_parallel_world_size, + get_inner_data_parallel_rank, +) + + +def gather_from_inner_dp_region(input_, inner_dp_parallel_output_grad=True): + return _GatherFromInnerDataParallelRegion.apply(input_, inner_dp_parallel_output_grad) + + +class _GatherFromInnerDataParallelRegion(torch.autograd.Function): + """Gather the input from sequence parallel region and concatinate.""" + + @staticmethod + def symbolic(graph, input_, inner_dp_parallel_output_grad=True): + return sync_gather_along_first_dim(input_, InnerDPCollectiveComm) + + @staticmethod + def forward(ctx, input_, inner_dp_parallel_output_grad=True): + ctx.inner_dp_parallel_output_grad = inner_dp_parallel_output_grad + return sync_gather_along_first_dim(input_, InnerDPCollectiveComm) + + @staticmethod + def backward(ctx, grad_output): + inner_dp_parallel_output_grad = ctx.inner_dp_parallel_output_grad + + # If the computation graph after the gather operation is + # in the tensor parallel mode, output gradients need to reduce + # scattered and whereas if the computation is duplicated, + # output gradients need to be scattered. + if inner_dp_parallel_output_grad: + return sync_reduce_scatter_along_first_dim(grad_output, InnerDPCollectiveComm), None + else: + return _split_along_first_dim(grad_output, InnerDPCollectiveComm), None + + +class InnerDPCollectiveComm(CollectiveCommIntf): + def __init__(self, name='inner-dp'): + super().__init__(name) + + @classmethod + def get_comm_rank(cls): + return get_inner_data_parallel_rank() + + @classmethod + def get_comm_group_world_size(cls): + return get_inner_data_parallel_world_size() + + @classmethod + def get_comm_group(cls): + return get_inner_data_parallel_group() + + +def split_data(data: torch.Tensor, padding_val: int = 0): + data_num = data.shape[0] + dp_size = get_inner_data_parallel_world_size() + data_pad_num = (data_num + dp_size - 1) // dp_size * dp_size + data_shape = tuple(data.shape[1:]) + + # Flatten the data associated with the keys + flatten_data = data.contiguous().view(-1, *data_shape).cuda() + + if padding_val != 0: + flatten_data_pad = torch.full((data_pad_num, *data_shape), padding_val, device=torch.cuda.current_device()) + else: + flatten_data_pad = torch.zeros((data_pad_num, *data_shape), device=torch.cuda.current_device()) + + flatten_data_pad[:data_num, ...] = flatten_data + split_data_list = list(torch.chunk(flatten_data_pad, dp_size, dim=0)) + data = split_data_list[get_inner_data_parallel_rank()] + + return data diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/utils.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/utils.py new file mode 100644 index 000000000..b924b6a40 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/inner_data_parallel/utils.py @@ -0,0 +1,26 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +from megatron.core.parallel_state import get_data_parallel_world_size +from ..config.dist_train_config import get_all_config +from ..parallel_state import is_in_subworld + + +def need_inner_data_parallel(): + # only support for vit model + if not is_in_subworld("vit"): + return False + if not any(cfg.main_dp for cfg in get_all_config().values()): + return False + return get_data_parallel_world_size() // get_global_data_parallel_size() > 1 + + +def get_global_data_parallel_size(): + """Return main data parallel size""" + all_config = get_all_config() + global_dp_size = 0 + for config in all_config.values(): + if config.main_dp: + global_dp_size = config.world_size // \ + (config.tensor_model_parallel_size * config.pipeline_model_parallel_size * config.context_parallel_size) + if global_dp_size == 0: + raise AssertionError("No Main DP") + return global_dp_size diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/parallel_state.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/parallel_state.py new file mode 100644 index 000000000..7457a7a77 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/parallel_state.py @@ -0,0 +1,1260 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + +"""Model and data parallel groups.""" +import os +from datetime import timedelta +from typing import Optional, Sequence +from functools import wraps +import torch +from torch._C._distributed_c10d import ProcessGroup +from megatron.core.parallel_state import RankGenerator +from megatron.core.utils import GlobalMemoryBuffer +from megatron.training import get_args +from .config.dist_train_config import get_dist_model_config, get_all_config_size, get_all_config + +# Current subworld, adapts to the situation when different model shares one rank +_CUR_SUB_WORLD = None +ALL_SUB_WORLD = {} + +# Intra-layer model parallel group that the current rank belongs to. +_TENSOR_MODEL_PARALLEL_GROUP = None +# Inter-layer model parallel group that the current rank belongs to. +_PIPELINE_MODEL_PARALLEL_GROUP = None +# Model parallel group (both intra- and pipeline) that the current rank belongs to. +_MODEL_PARALLEL_GROUP = None +# Model parallel group (both intra-, pipeline, and expert) that the current rank belongs to. +_MODEL_AND_EXPERT_PARALLEL_GROUP = None +# Embedding group. +_EMBEDDING_GROUP = None +# Position embedding group. +_POSITION_EMBEDDING_GROUP = None +# Data parallel group that the current rank belongs to. +_DATA_PARALLEL_GROUP = None +_DATA_PARALLEL_GROUP_GLOO = None +# tensor model parallel group and data parallel group combined +# used for fp8 and moe training +_TENSOR_AND_DATA_PARALLEL_GROUP = None +# Expert parallel group that the current rank belongs to. +_EXPERT_MODEL_PARALLEL_GROUP = None +_TENSOR_AND_EXPERT_PARALLEL_GROUP = None +_DATA_MODULO_EXPERT_PARALLEL_GROUP = None +_DATA_MODULO_EXPERT_PARALLEL_GROUP_GLOO = None +_DATA_MODULO_EXPERT_PARALLEL_GROUP_WITH_CP = None +_DATA_MODULO_EXPERT_PARALLEL_GROUP_WITH_CP_GLOO = None + +_VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK = None +_VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = None +_PIPELINE_MODEL_PARALLEL_SPLIT_RANK = None + +# These values enable us to change the mpu sizes on the fly. +_MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = None +_MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = None +_MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE = None +_MPU_TENSOR_MODEL_PARALLEL_RANK = None +_MPU_PIPELINE_MODEL_PARALLEL_RANK = None +_MPU_EXPERT_MODEL_PARALLEL_RANK = None + +# A list of ranks that have a copy of the embedding. +_EMBEDDING_GLOBAL_RANKS = None + +# A list of ranks that have a copy of the position embedding. +_POSITION_EMBEDDING_GLOBAL_RANKS = None + +# A list of global ranks for each pipeline group to ease calculation of the source +# rank when broadcasting from the first or last pipeline stage. +_PIPELINE_GLOBAL_RANKS = None + +# A list of global ranks for each data parallel group to ease calculation of the source +# rank when broadcasting weights from src to all other data parallel ranks +_DATA_PARALLEL_GLOBAL_RANKS = None + +# A list of global ranks for each tensor model parallel group to ease calculation of +# the first local rank in the tensor model parallel group +_TENSOR_MODEL_PARALLEL_GLOBAL_RANKS = None + +# Context parallel group that the current rank belongs to +_CONTEXT_PARALLEL_GROUP = None +# A list of global ranks for each context parallel group to ease calculation of the +# destination rank when exchanging KV/dKV between context parallel_ranks +_CONTEXT_PARALLEL_GLOBAL_RANKS = None + +# Data parallel group information with context parallel combined. +_DATA_PARALLEL_GROUP_WITH_CP = None +_DATA_PARALLEL_GROUP_WITH_CP_GLOO = None +_DATA_PARALLEL_GLOBAL_RANKS_WITH_CP = None + +# combined parallel group of TP and CP +_TENSOR_AND_CONTEXT_PARALLEL_GROUP = None + +# combined parallel group of TP, DP, and CP used for fp8 +_TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP = None + +# inner data parallel group +_INNER_DATA_PARALLEL_GROUP = None +# Memory buffers to avoid dynamic memory allocation +_GLOBAL_MEMORY_BUFFER = None + +# MOE logging +_MOE_LAYER_WISE_LOGGING_TRACKER = {} + + +class DetachedSubWorld: + def __init__(self, name: str, start_rank, ranks: list): + self.name = name + self.ranks = ranks + self.start_rank = start_rank + + # Intra-layer model parallel group that the current rank belongs to. + self.tensor_model_parallel_group = None + # Inter-layer model parallel group that the current rank belongs to. + self.pipeline_model_parallel_group = None + # Model parallel group (both intra- and pipeline) that the current rank belongs to. + self.model_parallel_group = None + # Model parallel group (both intra-, pipeline, and expert) that the current rank belongs to. + self.model_and_expert_parallel_group = None + # Embedding group. + self.embedding_group = None + # Position embedding group. + self.position_embedding_group = None + # Data parallel group that the current rank belongs to. + self.data_parallel_group = None + self.data_parallel_group_gloo = None + # tensor model parallel group and data parallel group combined + # used for fp8 and moe training + self.tensor_and_data_parallel_group = None + # Expert parallel group that the current rank belongs to. + self.expert_model_parallel_group = None + self.tensor_and_expert_parallel_group = None + self.data_modulo_expert_parallel_group = None + self.data_modulo_expert_parallel_group_gloo = None + self.data_modulo_expert_parallel_group_with_cp = None + self.data_modulo_expert_parallel_group_with_cp_gloo = None + + self.virtual_pipeline_model_parallel_rank = None + self.virtual_pipeline_model_parallel_world_size = None + self.pipeline_model_parallel_split_rank = None + + # These values enable us to change the mpu sizes on the fly. + self.mpu_tensor_model_parallel_world_size = None + self.mpu_pipeline_model_parallel_world_size = None + self.mpu_expert_model_parallel_world_size = None + self.mpu_tensor_model_parallel_rank = None + self.mpu_pipeline_model_parallel_rank = None + self.mpu_expert_model_parallel_rank = None + + # A list of ranks that have a copy of the embedding. + self.embedding_global_ranks = None + + # A list of ranks that have a copy of the position embedding. + self.position_embedding_global_ranks = None + + # A list of global ranks for each pipeline group to ease calculation of the source + # rank when broadcasting from the first or last pipeline stage. + self.pipeline_global_ranks = None + + # A list of global ranks for each data parallel group to ease calculation of the source + # rank when broadcasting weights from src to all other data parallel ranks + self.data_parallel_global_ranks = None + + # A list of global ranks for each tensor model parallel group to ease calculation of + # the first local rank in the tensor model parallel group + self.tensor_model_parallel_global_ranks = None + + # Context parallel group that the current rank belongs to + self.context_parallel_group = None + # A list of global ranks for each context parallel group to ease calculation of the + # destination rank when exchanging KV/dKV between context parallel_ranks + self.context_parallel_global_ranks = None + + # Data parallel group information with context parallel combined. + self.data_parallel_group_with_cp = None + self.data_parallel_group_with_cp_gloo = None + self.data_parallel_global_ranks_with_cp = None + + # combined parallel group of TP and CP + self.tensor_and_context_parallel_group = None + + # combined parallel group of TP, DP, and CP used for fp8 + self.tensor_and_data_parallel_group_with_cp = None + + # inner data parallel group + self.inner_data_parallel_group = None + # Memory buffers to avoid dynamic memory allocation + self.global_memory_buffer = None + + # MOE logging + self.moe_layer_wise_logging_tracker = {} + + def __repr__(self): + repr_str = "" + + print_keys = {"name": "model", + "pipeline_model_parallel_group": "PP_RANKS", + "tensor_model_parallel_group": "TP_RANKS", + "data_parallel_group": "DP_RANKS", + "context_parallel_group": "CP_RANKS", + "tensor_and_data_parallel_group": "TP_DP_RANKS", + "tensor_and_expert_parallel_group": "TP_EP_RANKS"} + + for name, value in vars(self).items(): + if name not in print_keys: + continue + else: + name = print_keys[name] + + repr_str += f"{name}=" + if isinstance(value, range): + repr_str += f"{list(value)}," + elif isinstance(value, ProcessGroup): + if value is not None: + repr_str += f"{torch.distributed.get_process_group_ranks(value)}," + else: + repr_str += f"{value}," + else: + repr_str += f"{value}," + + return repr_str + + +def reset_global_group_and_ranks(): + # create an empty subworld, then use its members' default value to reset global group and ranks + empty_subworld = DetachedSubWorld("empty_subworld", 0, [0]) + set_global_group_and_ranks_by_subworld(empty_subworld) + + +def set_global_group_and_ranks_by_subworld(subworld: DetachedSubWorld): + global _TENSOR_MODEL_PARALLEL_GROUP + global _PIPELINE_MODEL_PARALLEL_GROUP + global _MODEL_PARALLEL_GROUP + global _MODEL_AND_EXPERT_PARALLEL_GROUP + global _EMBEDDING_GROUP + global _POSITION_EMBEDDING_GROUP + global _DATA_PARALLEL_GROUP + global _DATA_PARALLEL_GROUP_GLOO + global _TENSOR_AND_DATA_PARALLEL_GROUP + global _EXPERT_MODEL_PARALLEL_GROUP + global _TENSOR_AND_EXPERT_PARALLEL_GROUP + global _DATA_MODULO_EXPERT_PARALLEL_GROUP + global _DATA_MODULO_EXPERT_PARALLEL_GROUP_GLOO + global _DATA_MODULO_EXPERT_PARALLEL_GROUP_WITH_CP + global _DATA_MODULO_EXPERT_PARALLEL_GROUP_WITH_CP_GLOO + global _VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK + global _VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE + global _PIPELINE_MODEL_PARALLEL_SPLIT_RANK + global _MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE + global _MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE + global _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE + global _MPU_TENSOR_MODEL_PARALLEL_RANK + global _MPU_PIPELINE_MODEL_PARALLEL_RANK + global _MPU_EXPERT_MODEL_PARALLEL_RANK + global _EMBEDDING_GLOBAL_RANKS + global _POSITION_EMBEDDING_GLOBAL_RANKS + global _PIPELINE_GLOBAL_RANKS + global _DATA_PARALLEL_GLOBAL_RANKS + global _TENSOR_MODEL_PARALLEL_GLOBAL_RANKS + global _CONTEXT_PARALLEL_GROUP + global _CONTEXT_PARALLEL_GLOBAL_RANKS + global _DATA_PARALLEL_GROUP_WITH_CP + global _DATA_PARALLEL_GROUP_WITH_CP_GLOO + global _DATA_PARALLEL_GLOBAL_RANKS_WITH_CP + global _TENSOR_AND_CONTEXT_PARALLEL_GROUP + global _TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP + global _INNER_DATA_PARALLEL_GROUP + global _GLOBAL_MEMORY_BUFFER + global _MOE_LAYER_WISE_LOGGING_TRACKER + + # Intra-layer model parallel group that the current rank belongs to. + _TENSOR_MODEL_PARALLEL_GROUP = subworld.tensor_model_parallel_group + # Inter-layer model parallel group that the current rank belongs to. + _PIPELINE_MODEL_PARALLEL_GROUP = subworld.pipeline_model_parallel_group + # Model parallel group (both intra- and pipeline) that the current rank belongs to. + _MODEL_PARALLEL_GROUP = subworld.model_parallel_group + # Model parallel group (both intra-, pipeline, and expert) that the current rank belongs to. + _MODEL_AND_EXPERT_PARALLEL_GROUP = subworld.model_and_expert_parallel_group + # Embedding group. + _EMBEDDING_GROUP = subworld.embedding_group + # Position embedding group. + _POSITION_EMBEDDING_GROUP = subworld.position_embedding_group + # Data parallel group that the current rank belongs to. + _DATA_PARALLEL_GROUP = subworld.data_parallel_group + _DATA_PARALLEL_GROUP_GLOO = subworld.data_parallel_group_gloo + _DATA_MODULO_EXPERT_PARALLEL_GROUP_WITH_CP = subworld.data_modulo_expert_parallel_group_with_cp + _DATA_MODULO_EXPERT_PARALLEL_GROUP_WITH_CP_GLOO = subworld.data_modulo_expert_parallel_group_with_cp_gloo + # tensor model parallel group and data parallel group combined + # used for fp8 and moe training + _TENSOR_AND_DATA_PARALLEL_GROUP = subworld.tensor_and_data_parallel_group + # Expert parallel group that the current rank belongs to. + _EXPERT_MODEL_PARALLEL_GROUP = subworld.expert_model_parallel_group + _TENSOR_AND_EXPERT_PARALLEL_GROUP = subworld.tensor_and_expert_parallel_group + _DATA_MODULO_EXPERT_PARALLEL_GROUP = subworld.data_modulo_expert_parallel_group + _DATA_MODULO_EXPERT_PARALLEL_GROUP_GLOO = subworld.data_modulo_expert_parallel_group_gloo + + _VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK = subworld.virtual_pipeline_model_parallel_rank + _VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = subworld.virtual_pipeline_model_parallel_world_size + _PIPELINE_MODEL_PARALLEL_SPLIT_RANK = subworld.pipeline_model_parallel_split_rank + + # These values enable us to change the mpu sizes on the fly. + _MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = subworld.mpu_tensor_model_parallel_world_size + _MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = subworld.mpu_pipeline_model_parallel_world_size + _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE = subworld.mpu_expert_model_parallel_world_size + _MPU_TENSOR_MODEL_PARALLEL_RANK = subworld.mpu_tensor_model_parallel_rank + _MPU_PIPELINE_MODEL_PARALLEL_RANK = subworld.mpu_pipeline_model_parallel_rank + _MPU_EXPERT_MODEL_PARALLEL_RANK = subworld.mpu_expert_model_parallel_rank + + # A list of ranks that have a copy of the embedding. + _EMBEDDING_GLOBAL_RANKS = subworld.embedding_global_ranks + + # A list of ranks that have a copy of the position embedding. + _POSITION_EMBEDDING_GLOBAL_RANKS = subworld.position_embedding_global_ranks + + # A list of global ranks for each pipeline group to ease calculation of the source + # rank when broadcasting from the first or last pipeline stage. + _PIPELINE_GLOBAL_RANKS = subworld.pipeline_global_ranks + + # A list of global ranks for each data parallel group to ease calculation of the source + # rank when broadcasting weights from src to all other data parallel ranks + _DATA_PARALLEL_GLOBAL_RANKS = subworld.data_parallel_global_ranks + + # A list of global ranks for each tensor model parallel group to ease calculation of + # the first local rank in the tensor model parallel group + _TENSOR_MODEL_PARALLEL_GLOBAL_RANKS = subworld.tensor_model_parallel_global_ranks + + # Context parallel group that the current rank belongs to + _CONTEXT_PARALLEL_GROUP = subworld.context_parallel_group + # A list of global ranks for each context parallel group to ease calculation of the + # destination rank when exchanging KV/dKV between context parallel_ranks + _CONTEXT_PARALLEL_GLOBAL_RANKS = subworld.context_parallel_global_ranks + + # Data parallel group information with context parallel combined. + _DATA_PARALLEL_GROUP_WITH_CP = subworld.data_parallel_group_with_cp + _DATA_PARALLEL_GROUP_WITH_CP_GLOO = subworld.data_parallel_group_with_cp_gloo + _DATA_PARALLEL_GLOBAL_RANKS_WITH_CP = subworld.data_parallel_global_ranks_with_cp + + # combined parallel group of TP and CP + _TENSOR_AND_CONTEXT_PARALLEL_GROUP = subworld.tensor_and_context_parallel_group + + # combined parallel group of TP, DP, and CP used for fp8 + _TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP = subworld.tensor_and_data_parallel_group_with_cp + + # inner data parallel group + _INNER_DATA_PARALLEL_GROUP = subworld.inner_data_parallel_group + + # Memory buffers to avoid dynamic memory allocation + _GLOBAL_MEMORY_BUFFER = subworld.global_memory_buffer + + # MOE logging + _MOE_LAYER_WISE_LOGGING_TRACKER = subworld.moe_layer_wise_logging_tracker + + +def get_nccl_options(pg_name, nccl_comm_cfgs): + """Set the NCCL process group options. + + Args: + pg_name (str): process group name + nccl_comm_cfgs (dict): nccl communicator configurations + + When an option (e.g., max_ctas) is not found in the config, use the NCCL default setting. + """ + if pg_name in nccl_comm_cfgs: + nccl_options = torch.distributed.ProcessGroupNCCL.Options() + nccl_options.config.cga_cluster_size = nccl_comm_cfgs[pg_name].get('cga_cluster_size', 4) + nccl_options.config.max_ctas = nccl_comm_cfgs[pg_name].get('max_ctas', 32) + nccl_options.config.min_ctas = nccl_comm_cfgs[pg_name].get('min_ctas', 1) + return nccl_options + else: + return None + + +def is_last_rank(): + global _CUR_SUB_WORLD + rank = torch.distributed.get_rank() + if _CUR_SUB_WORLD is None: + raise RuntimeError('_CUR_SUB_WORLD should not be None') + if rank == _CUR_SUB_WORLD.ranks[-1]: + return True + return False + + +def _initialize_model_parallel( + tensor_model_parallel_size: int = 1, + pipeline_model_parallel_size: int = 1, + virtual_pipeline_model_parallel_size: Optional[int] = None, + pipeline_model_parallel_split_rank: Optional[int] = None, + use_sharp: bool = False, + context_parallel_size: int = 1, + expert_model_parallel_size: int = 1, + nccl_communicator_config_path: Optional[str] = None, + distributed_timeout_minutes: int = 30, + order: str = "tp-cp-ep-dp-pp", + subworld: DetachedSubWorld = None +): + # Get world size and rank. Ensure some consistencies. + tp_ranks = [] + pp_ranks = [] + if subworld is None: + return pp_ranks, tp_ranks + + if not torch.distributed.is_initialized(): + raise RuntimeError('Distributed is not initialized.') + world_size: int = torch.distributed.get_world_size() + sub_world_size = len(subworld.ranks) + if sub_world_size > world_size: + raise RuntimeError(f"world_size ({world_size}) is less than sub_world_size ({sub_world_size})") + world_size = sub_world_size + reset_global_group_and_ranks() + + def adjust_rank(ranks_: Sequence): + for i_, _ in enumerate(ranks_): + ranks_[i_] += subworld.start_rank + return ranks_ + + if ( + world_size + % (tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size) + != 0 + ): + raise RuntimeError( + f"world_size ({world_size}) is not divisible by tensor_model_parallel_size " + f"({tensor_model_parallel_size}) x pipeline_model_parallel_size ({pipeline_model_parallel_size}) " + f"x context_parallel_size ({context_parallel_size})" + ) + + data_parallel_size: int = world_size // ( + tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size + ) + + if data_parallel_size % expert_model_parallel_size != 0: + raise RuntimeError( + f"data_parallel_size ({data_parallel_size}) is not divisible by expert_model_parallel_size " + ) + + if virtual_pipeline_model_parallel_size is not None: + if not pipeline_model_parallel_size > 1: + raise RuntimeError( + "pipeline-model-parallel size should be greater than 1 with interleaved schedule" + ) + subworld.virtual_pipeline_model_parallel_rank = 0 + subworld.virtual_pipeline_model_parallel_world_size = virtual_pipeline_model_parallel_size + + if pipeline_model_parallel_split_rank is not None: + subworld.pipeline_model_parallel_split_rank = pipeline_model_parallel_split_rank + + rank = torch.distributed.get_rank() + + nccl_comm_cfgs = {} + if nccl_communicator_config_path is not None: + try: + import yaml + except ImportError: + raise RuntimeError( + "Cannot import `yaml`. Setting custom nccl communicator configs " + "requires the yaml package." + ) + + with open(nccl_communicator_config_path, "r") as stream: + nccl_comm_cfgs = yaml.safe_load(stream) + + rank_generator = RankGenerator( + tp=tensor_model_parallel_size, + ep=expert_model_parallel_size, + dp=data_parallel_size, + pp=pipeline_model_parallel_size, + cp=context_parallel_size, + order=order, + ) + timeout = timedelta(minutes=distributed_timeout_minutes) + + # Build the data-parallel groups. + assert subworld.data_parallel_group is None, 'data parallel group is already initialized' + + for ranks in rank_generator.get_ranks('dp'): + ranks = adjust_rank(ranks) + group = torch.distributed.new_group( + ranks, timeout=timeout, pg_options=get_nccl_options('dp', nccl_comm_cfgs) + ) + group_gloo = torch.distributed.new_group(ranks, timeout=timeout, backend="gloo") + if rank in ranks: + subworld.data_parallel_group = group + subworld.data_parallel_group_gloo = group_gloo + subworld.data_parallel_global_ranks = ranks + for ranks_with_cp in rank_generator.get_ranks('dp-cp'): + ranks_with_cp = adjust_rank(ranks_with_cp) + group_with_cp = torch.distributed.new_group( + ranks_with_cp, timeout=timeout, pg_options=get_nccl_options('dp_cp', nccl_comm_cfgs) + ) + group_with_cp_gloo = torch.distributed.new_group( + ranks_with_cp, timeout=timeout, backend="gloo" + ) + if rank in ranks_with_cp: + subworld.data_parallel_group_with_cp = group_with_cp + subworld.data_parallel_group_with_cp_gloo = group_with_cp_gloo + subworld.data_parallel_global_ranks_with_cp = ranks_with_cp + + # Apply SHARP to DP process groups + if use_sharp: + if rank == 0: + print( + "The number of process groups to use SHARP with depends on the type " + "of the network switch. Nvidia QM1 switch supports SAHRP up to 8 " + "process groups and QM2 supports up to 256 process groups. We apply " + "SHARP to the communications of the data-parallel domain. If the " + "number of data-parallel process groups is larger than the max " + "process groups that the network switch supports, the communication " + "will fall back to non-SHARP operators. To enable SHARP, " + "`#SBATCH_NETWORK=sharp` should be set in the sbatch script." + ) + torch.distributed.barrier( + group=get_data_parallel_group(with_context_parallel=True), + device_ids=[torch.cuda.current_device()], + ) + # Set `NCCL_COLLNET_ENABLE=0` to restrict SHARP application to DP process groups + os.environ["NCCL_COLLNET_ENABLE"] = "0" + + # Build the context-parallel groups. + assert subworld.context_parallel_group is None, 'context parallel group is already initialized' + for ranks in rank_generator.get_ranks('cp'): + ranks = adjust_rank(ranks) + group = torch.distributed.new_group( + ranks, timeout=timeout, pg_options=get_nccl_options('cp', nccl_comm_cfgs) + ) + if rank in ranks: + subworld.context_parallel_group = group + subworld.context_parallel_global_ranks = ranks + + # Build the model-parallel groups. + assert subworld.model_parallel_group is None, 'model parallel group is already initialized' + for ranks in rank_generator.get_ranks('tp-pp'): + ranks = adjust_rank(ranks) + group = torch.distributed.new_group( + ranks, timeout=timeout, pg_options=get_nccl_options('mp', nccl_comm_cfgs) + ) + if rank in ranks: + subworld.model_parallel_group = group + + # Build the model-parallel groups with expert parallel + assert subworld.model_and_expert_parallel_group is None, 'model and expert parallel group is already initialized' + for ranks in rank_generator.get_ranks('tp-ep-pp', independent_ep=True): + ranks = adjust_rank(ranks) + group = torch.distributed.new_group( + ranks, timeout=timeout, pg_options=get_nccl_options('mp_exp', nccl_comm_cfgs) + ) + if rank in ranks: + subworld.model_and_expert_parallel_group = group + + # Build the tensor model-parallel groups. + assert subworld.tensor_model_parallel_group is None, 'tensor model parallel group is already initialized' + for ranks in rank_generator.get_ranks('tp'): + ranks = adjust_rank(ranks) + group = torch.distributed.new_group( + ranks, timeout=timeout, pg_options=get_nccl_options('tp', nccl_comm_cfgs) + ) + if rank in ranks: + subworld.tensor_model_parallel_group = group + subworld.tensor_model_parallel_global_ranks = ranks + + # Build the pipeline model-parallel groups and embedding groups + # (first and last rank in each pipeline model-parallel group). + assert subworld.pipeline_model_parallel_group is None, 'pipeline model parallel group is already initialized' + assert subworld.embedding_group is None, 'embedding group is already initialized' + assert subworld.position_embedding_group is None, 'position embedding group is already initialized' + for ranks in rank_generator.get_ranks('pp'): + ranks = adjust_rank(ranks) + group = torch.distributed.new_group( + ranks, timeout=timeout, pg_options=get_nccl_options('pp', nccl_comm_cfgs) + ) + pp_ranks.append(list(ranks)) + if rank in ranks: + subworld.pipeline_model_parallel_group = group + subworld.pipeline_global_ranks = ranks + # Setup embedding group (to exchange gradients between + # first and last stages). + if len(ranks) > 1: + embedding_ranks = [ranks[0], ranks[-1]] + position_embedding_ranks = [ranks[0]] + if pipeline_model_parallel_split_rank is not None: + if ranks[pipeline_model_parallel_split_rank] not in embedding_ranks: + embedding_ranks = [ + ranks[0], + ranks[pipeline_model_parallel_split_rank], + ranks[-1], + ] + if ranks[pipeline_model_parallel_split_rank] not in position_embedding_ranks: + position_embedding_ranks = [ranks[0], ranks[pipeline_model_parallel_split_rank]] + else: + embedding_ranks = ranks + position_embedding_ranks = ranks + + group = torch.distributed.new_group( + embedding_ranks, timeout=timeout, pg_options=get_nccl_options('embd', nccl_comm_cfgs) + ) + if rank in embedding_ranks: + subworld.embedding_group = group + if rank in ranks: + subworld.embedding_global_ranks = embedding_ranks + + group = torch.distributed.new_group( + position_embedding_ranks, + timeout=timeout, + pg_options=get_nccl_options('embd', nccl_comm_cfgs), + ) + if rank in position_embedding_ranks: + subworld.position_embedding_group = group + if rank in ranks: + subworld.position_embedding_global_ranks = position_embedding_ranks + + # Build the tensor + data parallel groups. + assert subworld.tensor_and_data_parallel_group is None, 'Tensor + data parallel group is already initialized' + for ranks in rank_generator.get_ranks('tp-dp-cp'): + ranks = adjust_rank(ranks) + group = torch.distributed.new_group( + ranks, timeout=timeout, pg_options=get_nccl_options('tp_dp_cp', nccl_comm_cfgs) + ) + if rank in ranks: + subworld.tensor_and_data_parallel_group_with_cp = group + for ranks in rank_generator.get_ranks('tp-dp'): + ranks = adjust_rank(ranks) + group = torch.distributed.new_group( + ranks, timeout=timeout, pg_options=get_nccl_options('tp_dp', nccl_comm_cfgs) + ) + tp_ranks.append(list(ranks)) + if rank in ranks: + subworld.tensor_and_data_parallel_group = group + + assert subworld.tensor_and_context_parallel_group is None, 'Tensor + context parallel group is already initialized' + for ranks in rank_generator.get_ranks('tp-cp'): + ranks = adjust_rank(ranks) + group = torch.distributed.new_group( + ranks, timeout=timeout, pg_options=get_nccl_options('tp_cp', nccl_comm_cfgs) + ) + if rank in ranks: + subworld.tensor_and_context_parallel_group = group + + # Build the tensor + expert parallel groups + assert subworld.expert_model_parallel_group is None, 'Expert parallel group is already initialized' + assert subworld.tensor_and_expert_parallel_group is None, 'Tensor + expert parallel group is already initialized' + assert subworld.data_modulo_expert_parallel_group is None, 'Data modulo expert group is already initialized' + assert ( + subworld.data_modulo_expert_parallel_group_with_cp is None + ), 'Data modulo expert group with context parallel is already initialized' + + for ranks in rank_generator.get_ranks('tp-ep', independent_ep=True): + ranks = adjust_rank(ranks) + group = torch.distributed.new_group( + ranks, timeout=timeout, pg_options=get_nccl_options('tp_exp', nccl_comm_cfgs) + ) + if rank in ranks: + subworld.tensor_and_expert_parallel_group = group + + for ranks in rank_generator.get_ranks('ep', independent_ep=True): + ranks = adjust_rank(ranks) + group = torch.distributed.new_group( + ranks, pg_options=get_nccl_options('exp', nccl_comm_cfgs) + ) + if rank in ranks: + subworld.expert_model_parallel_group = group + + for ranks in rank_generator.get_ranks('dp', independent_ep=True): + ranks = adjust_rank(ranks) + group = torch.distributed.new_group( + ranks, timeout=timeout, pg_options=get_nccl_options('dp_modulo_exp', nccl_comm_cfgs) + ) + group_gloo = torch.distributed.new_group(ranks, backend="gloo") + if rank in ranks: + subworld.data_modulo_expert_parallel_group = group + subworld.data_modulo_expert_parallel_group_gloo = group_gloo + + for ranks in rank_generator.get_ranks('dp-cp', independent_ep=True): + # Lazy initialization of the group + ranks = adjust_rank(ranks) + cp_world_size = torch.distributed.get_world_size(subworld.context_parallel_group) + if cp_world_size > 1: + group = torch.distributed.new_group( + ranks, + timeout=timeout, + pg_options=get_nccl_options('dp_modulo_exp_cp', nccl_comm_cfgs), + ) + group_gloo = torch.distributed.new_group(ranks, backend="gloo") + else: + group = subworld.data_modulo_expert_parallel_group + group_gloo = subworld.data_modulo_expert_parallel_group_gloo + if rank in ranks: + subworld.data_modulo_expert_parallel_group_with_cp = group + subworld.data_modulo_expert_parallel_group_with_cp_gloo = group_gloo + + if any(cfg.main_dp for cfg in get_all_config().values()): + from .inner_data_parallel.utils import get_global_data_parallel_size + if subworld.inner_data_parallel_group is not None: + raise RuntimeError('inner dp model parallel group is already initialized') + if get_global_data_parallel_size() > data_parallel_size: + raise RuntimeError(f'global dp size ({get_global_data_parallel_size()}) should smaller than or equals to ' + f'subworld dp size ({data_parallel_size})') + inner_dp_size = data_parallel_size // get_global_data_parallel_size() + for i in range(world_size // inner_dp_size): + start_rank = i * inner_dp_size + end_rank = (i + 1) * inner_dp_size + ranks = adjust_rank(list(range(start_rank, end_rank))) + group = torch.distributed.new_group( + ranks, timeout=timeout, pg_options=get_nccl_options('inner_dp', nccl_comm_cfgs) + ) + if rank in ranks: + subworld.inner_data_parallel_group = group + # Initialize global memory buffer + # This isn't really "parallel state" but there isn't another good place to + # put this. If we end up with a more generic initialization of megatron-core + # we could stick it there + _set_global_memory_buffer(subworld=subworld) + + # append to all sub world list + global ALL_SUB_WORLD + if rank in subworld.ranks: + reset_global_group_and_ranks() + set_global_group_and_ranks_by_subworld(subworld=subworld) + ALL_SUB_WORLD[subworld.name] = subworld + print(f"rank={rank},{subworld}") + return pp_ranks, tp_ranks + + +def initialize_model_parallel(*args, **kwargs) -> None: + global _CUR_SUB_WORLD, ALL_SUB_WORLD + _CUR_SUB_WORLD = None + ALL_SUB_WORLD = {} + world_size: int = torch.distributed.get_world_size() + all_cfg = [] + all_pp_and_tp_ranks = {} + + # 初始化并行组 + dist_all_world_size = 0 + for i in range(get_all_config_size()): + cfg = get_dist_model_config(global_index=i) + dist_all_world_size += cfg.world_size + subworld = DetachedSubWorld(cfg.name, cfg.start_rank, + list(range(cfg.start_rank, cfg.start_rank + cfg.world_size))) + pp_ranks, tp_ranks = _initialize_model_parallel(cfg.tensor_model_parallel_size, cfg.pipeline_model_parallel_size, + context_parallel_size=cfg.context_parallel_size, + subworld=subworld) + all_cfg.append(cfg) + all_pp_and_tp_ranks[cfg.model_index] = all_pp_and_tp_ranks.get(cfg.model_index, []) + [[pp_ranks, tp_ranks]] + if world_size != dist_all_world_size: + raise RuntimeError(f"{world_size=} should equals to {dist_all_world_size=}") + + # 生成映射关系 + from .communication.dist_ranks_match import generate_model_comm_ranks, get_dst_ranks + for i in range(len(all_pp_and_tp_ranks) - 1): + for ranks_prev in all_pp_and_tp_ranks.get(i, []): + for ranks_post in all_pp_and_tp_ranks.get(i + 1, []): + comm_args = ranks_prev + ranks_post + generate_model_comm_ranks(*comm_args) + dst_ranks = get_dst_ranks() + if dst_ranks is not None: + print(f"rank={torch.distributed.get_rank()} " + f"--> {dst_ranks}, prev: {list(comm_args[1])}, last: {list(comm_args[3])}") + + +def _set_global_memory_buffer(subworld: DetachedSubWorld): + # Initialize subworld buffer + if subworld.global_memory_buffer is not None: + raise RuntimeError('subworld memory buffer is already initialized') + subworld.global_memory_buffer = GlobalMemoryBuffer() + + +def _get_subworld_by_name(name=""): + if ALL_SUB_WORLD is None: + raise RuntimeError('all subworld is not initialized') + return ALL_SUB_WORLD.get(name, None) + + +def set_subworld_by_name(name=""): + global _CUR_SUB_WORLD + if is_in_subworld(name): + _CUR_SUB_WORLD = _get_subworld_by_name(name) + + +def is_in_subworld(name=""): + subworld = _get_subworld_by_name(name) + if subworld is None: + return False + rank = torch.distributed.get_rank() + return rank in subworld.ranks + + +def is_not_use_dist_train_or_in_subworld(name=""): + args = get_args() + if getattr(args, "dist_train", False): + return is_in_subworld(name) + return True + + +def is_use_dist_train_and_in_subworld(name=""): + args = get_args() + if getattr(args, "dist_train", False): + return is_in_subworld(name) + return False + + +def get_is_pipeline_first_stage_wrapper(is_pipeline_first_stage): + @wraps(is_pipeline_first_stage) + def wrapper(*args, **kwargs): + return _is_pipeline_first_stage(*args, **kwargs) + return wrapper + + +def _is_pipeline_first_stage(ignore_virtual=False, is_global=True): + """Return True if in the first pipeline model-parallel stage, False otherwise.""" + if is_global: + from .config.dist_train_config import get_dist_model_name + if _get_subworld_by_name(get_dist_model_name()) is None: + return False + + if not ignore_virtual: + if ( + get_virtual_pipeline_model_parallel_world_size() is not None + and get_virtual_pipeline_model_parallel_rank() != 0 + ): + return False + return get_pipeline_model_parallel_rank() == 0 + + +def get_is_pipeline_last_stage_wrapper(is_pipeline_last_stage): + @wraps(is_pipeline_last_stage) + def wrapper(*args, **kwargs): + return _is_pipeline_last_stage(*args, **kwargs) + return wrapper + + +def _is_pipeline_last_stage(ignore_virtual=False, is_global=True): + """Return True if in the last pipeline model-parallel stage, False otherwise.""" + if is_global: + from .config import dist_train_config + name = dist_train_config._RANK_NUMBER_TO_MODEL_NAME[-1] + if _get_subworld_by_name(name) is None: + return False + + if not ignore_virtual: + virtual_pipeline_model_parallel_world_size = ( + get_virtual_pipeline_model_parallel_world_size() + ) + if virtual_pipeline_model_parallel_world_size is not None and get_virtual_pipeline_model_parallel_rank() != ( + virtual_pipeline_model_parallel_world_size - 1 + ): + return False + return get_pipeline_model_parallel_rank() == (get_pipeline_model_parallel_world_size() - 1) + + +def subwrold_decorator(wrap_func): + @wraps(wrap_func) + def wrap_the_function(*args, **kwargs): + global _CUR_SUB_WORLD + reset_global_group_and_ranks() + if _CUR_SUB_WORLD is None: + from .config.dist_train_config import get_dist_model_name + name = get_dist_model_name() + set_subworld_by_name(name) + if _CUR_SUB_WORLD is not None: + set_global_group_and_ranks_by_subworld(subworld=_CUR_SUB_WORLD) + ret = wrap_func(*args, **kwargs) + return ret + return wrap_the_function + + +def get_tensor_model_parallel_src_rank_wrapper(get_tensor_model_parallel_src_rank): + @wraps(get_tensor_model_parallel_src_rank) + def wrapper(): + return _get_tensor_model_parallel_src_rank() + return wrapper + + +@subwrold_decorator +def _get_tensor_model_parallel_src_rank(): + """Calculate the global rank corresponding to the first local rank in the tensor model parallel group.""" + if _CUR_SUB_WORLD is None: + return 0 + global_rank = (torch.distributed.get_rank() - _CUR_SUB_WORLD.start_rank) + local_world_size = get_tensor_model_parallel_world_size() + return (global_rank // local_world_size) * local_world_size + _CUR_SUB_WORLD.start_rank + + +@subwrold_decorator +def is_initialized(): + """Useful for code segments that may be accessed with or without mpu initialization""" + return _DATA_PARALLEL_GROUP is not None + + +@subwrold_decorator +def model_parallel_is_initialized(): + """Check if model and data parallel groups are initialized.""" + if ( + _TENSOR_MODEL_PARALLEL_GROUP is None + or _PIPELINE_MODEL_PARALLEL_GROUP is None + or _DATA_PARALLEL_GROUP is None + ): + return False + return True + + +@subwrold_decorator +def get_model_parallel_group(with_expert_parallel=False): + """Get the model parallel group the caller rank belongs to.""" + if with_expert_parallel: + assert ( + _MODEL_AND_EXPERT_PARALLEL_GROUP is not None + ), 'model parallel group is not initialized' + return _MODEL_AND_EXPERT_PARALLEL_GROUP + assert _MODEL_PARALLEL_GROUP is not None, 'model parallel group is not initialized' + return _MODEL_PARALLEL_GROUP + + +@subwrold_decorator +def get_tensor_model_parallel_group(check_initialized=True): + """Get the tensor model parallel group the caller rank belongs to.""" + if check_initialized: + assert ( + _TENSOR_MODEL_PARALLEL_GROUP is not None + ), 'tensor model parallel group is not initialized' + return _TENSOR_MODEL_PARALLEL_GROUP + + +@subwrold_decorator +def get_pipeline_model_parallel_group(): + """Get the pipeline model parallel group the caller rank belongs to.""" + assert ( + _PIPELINE_MODEL_PARALLEL_GROUP is not None + ), 'pipeline_model parallel group is not initialized' + return _PIPELINE_MODEL_PARALLEL_GROUP + + +@subwrold_decorator +def get_data_parallel_group(with_context_parallel=False): + """Get the data parallel group the caller rank belongs to.""" + if with_context_parallel: + assert ( + _DATA_PARALLEL_GROUP_WITH_CP is not None + ), 'data parallel group with context parallel combined is not initialized' + return _DATA_PARALLEL_GROUP_WITH_CP + else: + assert _DATA_PARALLEL_GROUP is not None, 'data parallel group is not initialized' + return _DATA_PARALLEL_GROUP + + +@subwrold_decorator +def get_data_parallel_group_gloo(with_context_parallel=False): + """Get the data parallel group-gloo the caller rank belongs to.""" + if with_context_parallel: + assert ( + _DATA_PARALLEL_GROUP_WITH_CP_GLOO is not None + ), 'data parallel group-gloo with context parallel combined is not initialized' + return _DATA_PARALLEL_GROUP_WITH_CP_GLOO + else: + assert _DATA_PARALLEL_GROUP_GLOO is not None, 'data parallel group-gloo is not initialized' + return _DATA_PARALLEL_GROUP_GLOO + + +@subwrold_decorator +def get_context_parallel_group(check_initialized=True): + """Get the context parallel group the caller rank belongs to.""" + if check_initialized: + assert _CONTEXT_PARALLEL_GROUP is not None, 'context parallel group is not initialized' + return _CONTEXT_PARALLEL_GROUP + + +@subwrold_decorator +def get_context_parallel_global_ranks(check_initialized=True): + """Get all global ranks of the context parallel group that the caller rank belongs to.""" + if check_initialized: + assert _CONTEXT_PARALLEL_GLOBAL_RANKS is not None, 'context parallel group is not initialized' + return _CONTEXT_PARALLEL_GLOBAL_RANKS + + +@subwrold_decorator +def get_embedding_group(): + """Get the embedding group the caller rank belongs to.""" + assert _EMBEDDING_GROUP is not None, 'embedding group is not initialized' + return _EMBEDDING_GROUP + + +@subwrold_decorator +def get_position_embedding_group(): + """Get the position embedding group the caller rank belongs to.""" + assert _POSITION_EMBEDDING_GROUP is not None, 'position embedding group is not initialized' + return _POSITION_EMBEDDING_GROUP + + +@subwrold_decorator +def get_position_embedding_group(): + """Get the position embedding group the caller rank belongs to.""" + if _POSITION_EMBEDDING_GROUP is None: + raise RuntimeError('position embedding group is not initialized') + return _POSITION_EMBEDDING_GROUP + + +@subwrold_decorator +def get_amax_reduction_group(with_context_parallel=False): + """Get the FP8 amax reduction group the caller rank belongs to.""" + if with_context_parallel: + assert ( + _TENSOR_AND_CONTEXT_PARALLEL_GROUP is not None + ), 'FP8 amax reduction group is not initialized' + return _TENSOR_AND_CONTEXT_PARALLEL_GROUP + else: + assert ( + _TENSOR_MODEL_PARALLEL_GROUP is not None + ), 'FP8 amax reduction group is not initialized' + return _TENSOR_MODEL_PARALLEL_GROUP + + +@subwrold_decorator +def get_tensor_and_data_parallel_group(with_context_parallel=False): + """Get the tensor and data parallel group the caller rank belongs to.""" + if with_context_parallel: + assert ( + _TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP is not None + ), 'tensor and data parallel group is not initialized' + return _TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP + else: + assert ( + _TENSOR_AND_DATA_PARALLEL_GROUP is not None + ), 'tensor and data parallel group is not initialized' + return _TENSOR_AND_DATA_PARALLEL_GROUP + + +@subwrold_decorator +def get_tensor_and_context_parallel_group(): + """Get the tensor and context parallel group the caller rank belongs to.""" + assert ( + _TENSOR_AND_CONTEXT_PARALLEL_GROUP is not None + ), 'tensor and context parallel group is not initialized' + return _TENSOR_AND_CONTEXT_PARALLEL_GROUP + + +@subwrold_decorator +def get_expert_model_parallel_group(): + assert ( + _EXPERT_MODEL_PARALLEL_GROUP is not None + ), 'expert model parallel group is not initialized' + return _EXPERT_MODEL_PARALLEL_GROUP + + +@subwrold_decorator +def get_tensor_and_expert_parallel_group(): + assert ( + _TENSOR_AND_EXPERT_PARALLEL_GROUP is not None + ), 'tensor and expert parallel group is not initialized' + return _TENSOR_AND_EXPERT_PARALLEL_GROUP + + +@subwrold_decorator +def get_data_modulo_expert_parallel_group(with_context_parallel=False): + if with_context_parallel: + assert ( + _DATA_MODULO_EXPERT_PARALLEL_GROUP_WITH_CP is not None + ), 'data modulo expert parallel group with context parallel is not initialized' + return _DATA_MODULO_EXPERT_PARALLEL_GROUP_WITH_CP + else: + assert ( + _DATA_MODULO_EXPERT_PARALLEL_GROUP is not None + ), 'data modulo expert parallel group is not initialized' + return _DATA_MODULO_EXPERT_PARALLEL_GROUP + + +@subwrold_decorator +def get_data_modulo_expert_parallel_group_gloo(with_context_parallel=False): + if with_context_parallel: + assert ( + _DATA_MODULO_EXPERT_PARALLEL_GROUP_WITH_CP_GLOO is not None + ), 'data modulo expert parallel group-gloo with context parallel is not initialized' + return _DATA_MODULO_EXPERT_PARALLEL_GROUP_WITH_CP_GLOO + else: + assert ( + _DATA_MODULO_EXPERT_PARALLEL_GROUP_GLOO is not None + ), 'data modulo expert parallel group-gloo is not initialized' + return _DATA_MODULO_EXPERT_PARALLEL_GROUP_GLOO + + +@subwrold_decorator +def get_tensor_model_parallel_world_size(): + """Return world size for the tensor model parallel group.""" + global _MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE + if _MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE is not None: + return _MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE + return torch.distributed.get_world_size(group=get_tensor_model_parallel_group()) + + +@subwrold_decorator +def get_pipeline_model_parallel_world_size(): + """Return world size for the pipeline model parallel group.""" + global _MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE + if _MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE is not None: + return _MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE + return torch.distributed.get_world_size(group=get_pipeline_model_parallel_group()) + + +@subwrold_decorator +def get_tensor_model_parallel_rank(): + """Return my rank for the tensor model parallel group.""" + global _MPU_TENSOR_MODEL_PARALLEL_RANK + if _MPU_TENSOR_MODEL_PARALLEL_RANK is not None: + return _MPU_TENSOR_MODEL_PARALLEL_RANK + return torch.distributed.get_rank(group=get_tensor_model_parallel_group()) + + +@subwrold_decorator +def get_pipeline_model_parallel_rank(is_global=False): + """Return my rank for the pipeline model parallel group.""" + global _MPU_PIPELINE_MODEL_PARALLEL_RANK + if is_global: + return get_global_pipeline_parallel_rank() + else: + if _MPU_PIPELINE_MODEL_PARALLEL_RANK is not None: + return _MPU_PIPELINE_MODEL_PARALLEL_RANK + return torch.distributed.get_rank(group=get_pipeline_model_parallel_group()) + + +@subwrold_decorator +def get_pipeline_model_parallel_split_rank(): + """Return pipeline model parallel split rank.""" + global _PIPELINE_MODEL_PARALLEL_SPLIT_RANK + return _PIPELINE_MODEL_PARALLEL_SPLIT_RANK + + +def is_rank_in_embedding_group(ignore_virtual=False): + """Return true if current rank is in embedding group, False otherwise.""" + rank = torch.distributed.get_rank() + if ignore_virtual: + return rank in _EMBEDDING_GLOBAL_RANKS + if rank in _EMBEDDING_GLOBAL_RANKS: + if get_args().multimodal: + if rank == _EMBEDDING_GLOBAL_RANKS[-1]: + return _is_pipeline_last_stage() + else: + return True + else: + if rank == _EMBEDDING_GLOBAL_RANKS[0]: + return _is_pipeline_first_stage() + elif rank == _EMBEDDING_GLOBAL_RANKS[-1]: + return _is_pipeline_last_stage() + else: + return True + return False + + +@subwrold_decorator +def is_rank_in_position_embedding_group(): + """Return true if current rank is in position embedding group, False otherwise.""" + rank = torch.distributed.get_rank() + global _POSITION_EMBEDDING_GLOBAL_RANKS + return rank in _POSITION_EMBEDDING_GLOBAL_RANKS + + +@subwrold_decorator +def get_virtual_pipeline_model_parallel_rank(): + """Return the virtual pipeline-parallel rank.""" + global _VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK + return _VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK + + +@subwrold_decorator +def get_virtual_pipeline_model_parallel_world_size(): + """Return the virtual pipeline-parallel world size.""" + global _VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE + return _VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE + + +@subwrold_decorator +def get_data_parallel_src_rank(with_context_parallel=False): + """Calculate the global rank corresponding to the first local rank in the data parallel group.""" + if with_context_parallel: + assert ( + _DATA_PARALLEL_GLOBAL_RANKS_WITH_CP is not None + ), "Data parallel group with context parallel combined is not initialized" + return _DATA_PARALLEL_GLOBAL_RANKS_WITH_CP[0] + else: + assert _DATA_PARALLEL_GLOBAL_RANKS is not None, "Data parallel group is not initialized" + return _DATA_PARALLEL_GLOBAL_RANKS[0] + + +@subwrold_decorator +def get_pipeline_model_parallel_first_rank(): + """Return the global rank of the first process in the pipeline for the current tensor parallel group""" + assert _PIPELINE_GLOBAL_RANKS is not None, "Pipeline parallel group is not initialized" + return _PIPELINE_GLOBAL_RANKS[0] + + +@subwrold_decorator +def get_pipeline_model_parallel_last_rank(): + """Return the global rank of the last process in the pipeline for the current tensor parallel group""" + assert _PIPELINE_GLOBAL_RANKS is not None, "Pipeline parallel group is not initialized" + last_rank_local = get_pipeline_model_parallel_world_size() - 1 + return _PIPELINE_GLOBAL_RANKS[last_rank_local] + + +@subwrold_decorator +def get_pipeline_model_parallel_next_rank(): + """Return the global rank that follows the caller in the pipeline""" + assert _PIPELINE_GLOBAL_RANKS is not None, "Pipeline parallel group is not initialized" + rank_in_pipeline = get_pipeline_model_parallel_rank() + world_size = get_pipeline_model_parallel_world_size() + return _PIPELINE_GLOBAL_RANKS[(rank_in_pipeline + 1) % world_size] + + +@subwrold_decorator +def get_pipeline_model_parallel_prev_rank(): + """Return the global rank that preceeds the caller in the pipeline""" + assert _PIPELINE_GLOBAL_RANKS is not None, "Pipeline parallel group is not initialized" + rank_in_pipeline = get_pipeline_model_parallel_rank() + world_size = get_pipeline_model_parallel_world_size() + return _PIPELINE_GLOBAL_RANKS[(rank_in_pipeline - 1) % world_size] + + +@subwrold_decorator +def get_expert_model_parallel_world_size(): + """Return world size for the expert model parallel group""" + if _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE: + return _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE + if torch.distributed.is_available() and torch.distributed.is_initialized(): + tensor_and_expert_parallel_world_size = torch.distributed.get_world_size( + group=get_tensor_and_expert_parallel_group() + ) + return tensor_and_expert_parallel_world_size // get_tensor_model_parallel_world_size() + else: + return 0 + + +@subwrold_decorator +def get_expert_model_parallel_rank(): + """Return my rank for the expert parallel group""" + if _MPU_EXPERT_MODEL_PARALLEL_RANK: + return _MPU_EXPERT_MODEL_PARALLEL_RANK + if torch.distributed.is_available() and torch.distributed.is_initialized(): + tensor_and_expert_parallel_rank = torch.distributed.get_rank( + group=get_tensor_and_expert_parallel_group() + ) + return tensor_and_expert_parallel_rank // get_tensor_model_parallel_world_size() + else: + return 0 + + +@subwrold_decorator +def get_global_memory_buffer(): + """Return the global GlobalMemoryBuffer object""" + if _GLOBAL_MEMORY_BUFFER is None: + raise RuntimeError('global memory buffer is not initialized') + return _GLOBAL_MEMORY_BUFFER + + +@subwrold_decorator +def get_moe_layer_wise_logging_tracker(): + """Return the moe layer wise tracker.""" + global _MOE_LAYER_WISE_LOGGING_TRACKER + return _MOE_LAYER_WISE_LOGGING_TRACKER + + +def get_global_pipeline_parallel_rank(): + """Return main data parallel size""" + all_config = get_all_config() + global_pp_rank = 0 + for config in all_config.values(): + if config.model_index < get_dist_model_config().model_index: + global_pp_rank += config.pipeline_model_parallel_size + else: + global_pp_rank += get_pipeline_model_parallel_rank() + break + return global_pp_rank diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/pipeline_parallel/__init__.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/pipeline_parallel/__init__.py new file mode 100644 index 000000000..68da32c88 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/pipeline_parallel/__init__.py @@ -0,0 +1 @@ +from . import dist_schedules diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/pipeline_parallel/dist_schedules.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/pipeline_parallel/dist_schedules.py new file mode 100644 index 000000000..82cf9fddd --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/pipeline_parallel/dist_schedules.py @@ -0,0 +1,524 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +import contextlib +from typing import Iterator, List, Union, Optional +from functools import wraps +import torch +from megatron.training import get_args +from megatron.core.utils import get_model_config, get_model_type +from megatron.core.enums import ModelType +import megatron.core.pipeline_parallel.schedules as schedules +from megatron.core.parallel_state import ( + get_tensor_model_parallel_world_size, + get_pipeline_model_parallel_rank, + get_context_parallel_world_size, + is_pipeline_stage_before_split, + is_pipeline_stage_after_split, + get_pipeline_model_parallel_world_size, + get_pipeline_model_parallel_next_rank, + get_pipeline_model_parallel_prev_rank +) +from ..communication.dist_ranks_match import get_dst_ranks +from ..communication.dist_communication import generate_send_recv_mask, send_recv_tensor_list, send_recv +from ..config.dist_train_config import ( + get_dist_model_config, + get_all_config_size, + is_forward_only_model, + is_use_multiparam_send_recv +) + + +def get_forward_backward_func_wrapper(get_forward_backward_func): + @wraps(get_forward_backward_func) + def wrapper(*args, **kwargs): + if get_args().dist_train: + return forward_backward_pipelining_without_interleaving + return get_forward_backward_func(*args, **kwargs) + + return wrapper + + +def p2p_ops_wrapper(p2p_ops): + @wraps(p2p_ops) + def wrapper(*args, **kwargs): + arguments = get_args() + if arguments.dist_train: + return _p2p_ops(*args, **kwargs) + return p2p_ops(*args, **kwargs) + return wrapper + + +def _p2p_ops( + *, + tensor_send_prev: Optional[torch.Tensor], + tensor_recv_prev: Optional[torch.Tensor], + tensor_send_next: Optional[torch.Tensor], + tensor_recv_next: Optional[torch.Tensor], + group: torch.distributed.ProcessGroup +): + reqs = [] + # To prevent deadlocks caused by different pipeline stages receiving tensor simultaneously. + if get_pipeline_model_parallel_rank(is_global=True) % 2 == 0: + if tensor_send_next is not None: + send_next_req = torch.distributed.isend( + tensor=tensor_send_next, dst=get_pipeline_model_parallel_next_rank(), group=group, + ) + reqs.append(send_next_req) + + if tensor_recv_prev is not None: + recv_prev_req = torch.distributed.irecv( + tensor=tensor_recv_prev, src=get_pipeline_model_parallel_prev_rank(), group=group, + ) + reqs.append(recv_prev_req) + + if tensor_send_prev is not None: + send_prev_req = torch.distributed.isend( + tensor=tensor_send_prev, dst=get_pipeline_model_parallel_prev_rank(), group=group, + ) + reqs.append(send_prev_req) + + if tensor_recv_next is not None: + recv_next_req = torch.distributed.irecv( + tensor=tensor_recv_next, src=get_pipeline_model_parallel_next_rank(), group=group, + ) + reqs.append(recv_next_req) + + else: + if tensor_recv_prev is not None: + recv_prev_req = torch.distributed.irecv( + tensor=tensor_recv_prev, src=get_pipeline_model_parallel_prev_rank(), group=group, + ) + reqs.append(recv_prev_req) + + if tensor_send_next is not None: + send_next_req = torch.distributed.isend( + tensor=tensor_send_next, dst=get_pipeline_model_parallel_next_rank(), group=group, + ) + reqs.append(send_next_req) + + if tensor_recv_next is not None: + recv_next_req = torch.distributed.irecv( + tensor=tensor_recv_next, src=get_pipeline_model_parallel_next_rank(), group=group, + ) + reqs.append(recv_next_req) + + if tensor_send_prev is not None: + send_prev_req = torch.distributed.isend( + tensor=tensor_send_prev, dst=get_pipeline_model_parallel_prev_rank(), group=group, + ) + reqs.append(send_prev_req) + return reqs + + +def get_tensor_shapes( + *, + rank: int, + model_type: ModelType, + seq_length: int, + micro_batch_size: int, + decoder_seq_length: int, + config, +): + # Determine right tensor sizes (based on position of rank with respect to split + # rank) and model size. + # Send two tensors if model is T5 and rank is in decoder stage: + # first tensor is decoder (pre-transpose), + # second tensor is encoder (post-transpose). + # If model is T5 and rank is at the boundary: + # send one tensor (post-transpose from encoder). + # Otherwise, send one tensor (pre-transpose). + tensor_shapes = [] + + seq_length = seq_length // get_context_parallel_world_size() + if model_type == ModelType.encoder_and_decoder: + decoder_seq_length = decoder_seq_length // get_context_parallel_world_size() + + if config.sequence_parallel: + seq_length = seq_length // get_tensor_model_parallel_world_size() + if model_type == ModelType.encoder_and_decoder: + decoder_seq_length = ( + decoder_seq_length // get_tensor_model_parallel_world_size() + ) + + if model_type == ModelType.encoder_and_decoder: + if is_pipeline_stage_before_split(rank): + if is_use_multiparam_send_recv(): + tensor_shapes = [ + {'shape': (seq_length, micro_batch_size, config.hidden_size), 'dtype': config.params_dtype}, + ] + else: + tensor_shapes.append((seq_length, micro_batch_size, config.hidden_size)) + else: + if is_use_multiparam_send_recv(): + tensor_shapes = [ + {'shape': ((decoder_seq_length, micro_batch_size, config.hidden_size)), 'dtype': config.params_dtype}, + {'shape': ((seq_length, micro_batch_size, config.hidden_size)), 'dtype': config.params_dtype} + ] + else: + tensor_shapes.append((decoder_seq_length, micro_batch_size, config.hidden_size)) + tensor_shapes.append((seq_length, micro_batch_size, config.hidden_size)) + else: + if is_use_multiparam_send_recv(): + tensor_shapes = [ + {'shape': ((seq_length, micro_batch_size, config.hidden_size)), 'dtype': config.params_dtype}, + ] + else: + tensor_shapes.append((seq_length, micro_batch_size, config.hidden_size)) + + return tensor_shapes + + +def forward_backward_pipelining_without_interleaving( + *, + forward_step_func, + data_iterator: Union[Iterator, List[Iterator]], + model: Union[torch.nn.Module, List[torch.nn.Module]], + num_microbatches: int, + seq_length: int, + micro_batch_size: int, + decoder_seq_length: int = None, + forward_only: bool = False, + collect_non_loss_data: bool = False, + first_val_step: bool = None, +): + """ + Run non-interleaved 1F1B schedule, with communication between pipeline stages. + Returns dictionary with losses if the last stage, empty dict otherwise. + """ + model_config = get_dist_model_config() + if hasattr(model_config, 'forward_only'): + forward_only = model_config.forward_only + if isinstance(model, list): + if len(model) != 1: + raise ValueError( + "non-interleaved pipeline parallelism does not support model chunking" + ) + model = model[0] + if isinstance(data_iterator, list): + if len(data_iterator) != 1: + raise ValueError( + "non-pipeline-parallel schedule does not support model chunking" + ) + data_iterator = data_iterator[0] + + config = get_model_config(model) + config.deallocate_pipeline_outputs = False + if config.overlap_p2p_comm: + raise ValueError( + "Non-interleaved pipeline parallelism does not support overlapping p2p communication" + ) + + # Needed only when gradients are finalized in M-Core + if config.finalize_model_grads_func is not None and not forward_only: + embedding_module = schedules.clear_embedding_activation_buffer(config, model) + + if config.timers is not None: + config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time) + + # Disable async grad reductions + no_sync_func = config.no_sync_func + if no_sync_func is None: + no_sync_func = contextlib.nullcontext + no_sync_context = None + + def disable_grad_sync(): + """Disable asynchronous grad reductions""" + nonlocal no_sync_context + if no_sync_context is None: + no_sync_context = no_sync_func() + no_sync_context.__enter__() + + def enable_grad_sync(): + """Enable asynchronous grad reductions""" + nonlocal no_sync_context + if no_sync_context is not None: + no_sync_context.__exit__(None, None, None) + no_sync_context = None + + disable_grad_sync() + + # Compute number of warmup microbatches. + rank = get_pipeline_model_parallel_rank() + model_config = get_dist_model_config(rank=torch.distributed.get_rank()) + num_warmup_microbatches = 0 + for index in range(model_config.model_index, get_all_config_size()): + num_warmup_microbatches += get_dist_model_config(global_index=index).pipeline_model_parallel_size + num_warmup_microbatches = num_warmup_microbatches - rank - 1 + num_warmup_microbatches = min(num_warmup_microbatches, num_microbatches) + num_microbatches_remaining = num_microbatches - num_warmup_microbatches + + max_outstanding_backprops = None + if config.num_microbatches_with_partial_activation_checkpoints is not None: + max_outstanding_backprops = num_warmup_microbatches + 1 + + model_type = get_model_type(model) + + get_shape_func = schedules.get_tensor_shapes if not is_forward_only_model() else get_tensor_shapes + + recv_tensor_shapes = get_shape_func( + rank=rank - 1, + model_type=model_type, + seq_length=seq_length, + micro_batch_size=micro_batch_size, + decoder_seq_length=decoder_seq_length, + config=config, + ) + send_tensor_shapes = get_shape_func( + rank=rank, + model_type=model_type, + seq_length=seq_length, + micro_batch_size=micro_batch_size, + decoder_seq_length=decoder_seq_length, + config=config, + ) + + send_recv_ops = generate_send_recv_mask(torch.distributed.get_rank()) + + # Input, output tensors only need to be saved when doing backward passes + input_tensors = None + output_tensors = None + total_num_tokens = torch.tensor(0, dtype=torch.int).cuda() + + if not forward_only: + input_tensors = [] + output_tensors = [] + forward_data_store = [] + + # Run warmup forward passes. + for i in range(num_warmup_microbatches): + # Decide to checkpoint all layers' activations of the current micro-batch + if max_outstanding_backprops is not None: + checkpoint_activations_microbatch = ( + i % max_outstanding_backprops + >= config.num_microbatches_with_partial_activation_checkpoints + ) + else: + checkpoint_activations_microbatch = None + + input_tensor = recv_forward(recv_tensor_shapes, config, send_recv_ops) + output_tensor, num_tokens = schedules.forward_step( + forward_step_func, + data_iterator, + model, + num_microbatches, + input_tensor, + forward_data_store, + config, + collect_non_loss_data, + checkpoint_activations_microbatch, + schedules.check_first_val_step(first_val_step, forward_only, i == 0), + current_microbatch=i, + ) + send_forward(output_tensor, send_tensor_shapes, config, send_recv_ops) + total_num_tokens += num_tokens.item() + + if not forward_only: + input_tensors.append(input_tensor) + output_tensors.append(output_tensor) + schedules.deallocate_output_tensor(output_tensor[0], config.deallocate_pipeline_outputs) + + # Before running 1F1B, need to receive first forward tensor. + # If all microbatches are run in warmup / cooldown phase, then no need to + # receive this tensor here. + if num_microbatches_remaining > 0: + input_tensor = recv_forward(recv_tensor_shapes, config, send_recv_ops) + + # Run 1F1B in steady state. + for i in range(num_microbatches_remaining): + last_iteration = i == (num_microbatches_remaining - 1) + + # Decide to checkpoint all layers' activations of the current micro-batch + if max_outstanding_backprops is not None: + checkpoint_activations_microbatch = ( + (i + num_warmup_microbatches) % max_outstanding_backprops + ) >= config.num_microbatches_with_partial_activation_checkpoints + else: + checkpoint_activations_microbatch = None + + output_tensor, num_tokens = schedules.forward_step( + forward_step_func, + data_iterator, + model, + num_microbatches, + input_tensor, + forward_data_store, + config, + collect_non_loss_data, + checkpoint_activations_microbatch, + schedules.check_first_val_step( + first_val_step, forward_only, (i == 0) and (num_warmup_microbatches == 0) + ), + current_microbatch=i + num_warmup_microbatches, + ) + total_num_tokens += num_tokens.item() + + if forward_only: + send_forward(output_tensor, send_tensor_shapes, config, send_recv_ops) + + if not last_iteration: + input_tensor = recv_forward(recv_tensor_shapes, config, send_recv_ops) + + else: + output_tensor_grad = send_forward_recv_backward( + output_tensor, send_tensor_shapes, config, send_recv_ops + ) + + # Add input_tensor and output_tensor to end of list. + input_tensors.append(input_tensor) + output_tensors.append(output_tensor) + schedules.deallocate_output_tensor(output_tensor[0], config.deallocate_pipeline_outputs) + + # Pop input_tensor and output_tensor from the start of the list for + # the backward pass. + input_tensor = input_tensors.pop(0) + output_tensor = output_tensors.pop(0) + + # Enable grad sync for the last microbatch in the batch if the full + # backward pass completes in the 1F1B stage. + if num_warmup_microbatches == 0 and last_iteration: + if config.grad_sync_func is None or rank == 0: + enable_grad_sync() + + input_tensor_grad = _backward_step( + input_tensor, output_tensor, output_tensor_grad, model_type, config + ) + + if last_iteration: + input_tensor = None + send_backward(input_tensor_grad, recv_tensor_shapes, config, send_recv_ops) + else: + input_tensor = send_backward_recv_forward( + input_tensor_grad, recv_tensor_shapes, config, send_recv_ops + ) + + # Run cooldown backward passes. + if not forward_only: + for i in range(num_warmup_microbatches): + + # Enable async grad reduction in the last backward pass + # Note: If grad sync function is provided, only enable + # async grad reduction in first pipeline stage. Other + # pipeline stages do grad reduction during pipeline + # bubble. + if i == num_warmup_microbatches - 1: + if config.grad_sync_func is None or rank == 0: + enable_grad_sync() + + input_tensor = input_tensors.pop(0) + output_tensor = output_tensors.pop(0) + + output_tensor_grad = recv_backward(send_tensor_shapes, config, send_recv_ops) + + input_tensor_grad = _backward_step( + input_tensor, output_tensor, output_tensor_grad, model_type, config + ) + + send_backward(input_tensor_grad, recv_tensor_shapes, config, send_recv_ops) + + # Launch any remaining grad reductions. + if no_sync_context is not None: + enable_grad_sync() + if config.grad_sync_func is not None: + config.grad_sync_func(model.parameters()) + + if config.finalize_model_grads_func is not None and not forward_only: + + # If defer_embedding_wgrad_compute is enabled we need to do the + # weight gradient GEMM's here. + schedules.finish_embedding_wgrad_compute(config, embedding_module) + + # Finalize model grads (perform full grad all-reduce / reduce-scatter for + # data parallelism, layernorm all-reduce for sequence parallelism, and + # embedding all-reduce for pipeline parallelism). + config.finalize_model_grads_func( + [model], total_num_tokens if config.calculate_per_token_loss else None + ) + + if config.timers is not None: + config.timers('forward-backward').stop() + + return forward_data_store + + +def _backward_step(*args, **kwargs): + if is_use_multiparam_send_recv(): + from mindspeed.core.pipeline_parallel.multiparameter_schedules import backward_step + return backward_step(*args, **kwargs) + + return schedules.backward_step(*args, **kwargs) + + +def get_send_recv_fun(): + if is_use_multiparam_send_recv(): + return send_recv_tensor_list + else: + return send_recv + + +def post_process_for_recving(recv_tensors: List): + if is_use_multiparam_send_recv(): + return [tensors[0] for tensors in recv_tensors] + else: + return [recv_tensors[0]] + + +def send_forward(output_tensors, tensor_shapes, config, send_recv_ops): + if send_recv_ops['send_forward']: + send_recv_func = get_send_recv_fun() + send_recv_func(output_tensors, False, get_dst_ranks()) + else: + schedules.send_forward(output_tensors, tensor_shapes, config) + + +def recv_forward(tensor_shapes, config, send_recv_ops): + if send_recv_ops['recv_forward']: + send_recv_func = get_send_recv_fun() + recv_tensors = send_recv_func(None, True, get_dst_ranks()) + input_tensor = post_process_for_recving(recv_tensors) + else: + input_tensor = schedules.recv_forward(tensor_shapes, config) + return input_tensor + + +def send_backward(input_tensor_grad, tensor_shapes, config, send_recv_ops): + if send_recv_ops['send_backward']: + send_recv_func = get_send_recv_fun() + send_recv_func(input_tensor_grad, False, get_dst_ranks()) + else: + schedules.send_backward(input_tensor_grad, tensor_shapes, config) + + +def send_forward_recv_backward(output_tensor, tensor_shape, config, send_recv_ops): + if send_recv_ops['send_forward'] and send_recv_ops['recv_backward']: + send_recv_func = get_send_recv_fun() + recv_tensors = send_recv_func(output_tensor, True, get_dst_ranks()) + output_tensor_grad = post_process_for_recving(recv_tensors) + else: + output_tensor_grad = schedules.send_forward_recv_backward( + output_tensor, tensor_shape, config + ) + return output_tensor_grad + + +def send_backward_recv_forward(input_tensor_grad, tensor_shapes, config, send_recv_ops): + if send_recv_ops['recv_forward']: + if not send_recv_ops['send_backward']: # forward_only model adaption + input_tensor_grad = [[]] + send_recv_func = get_send_recv_fun() + recv_tensors = send_recv_func(input_tensor_grad, True, get_dst_ranks()) + input_tensor = post_process_for_recving(recv_tensors) + else: + input_tensor = schedules.send_backward_recv_forward( + input_tensor_grad, tensor_shapes, config + ) + return input_tensor + + +def recv_backward(tensor_shapes, config, send_recv_ops): + if send_recv_ops['recv_backward']: + send_recv_func = get_send_recv_fun() + recv_tensors = send_recv_func(None, True, get_dst_ranks()) + output_tensor_grad = post_process_for_recving(recv_tensors) + else: + output_tensor_grad = schedules.recv_backward(tensor_shapes, config) + return output_tensor_grad diff --git a/model/train/yoco_moe/mindspeed/multi_modal/dist_train/training.py b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/training.py new file mode 100644 index 000000000..eeb4d5082 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/multi_modal/dist_train/training.py @@ -0,0 +1,21 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. All rights reserved. +from functools import wraps +from megatron.training import get_args +from .config.dist_train_config import get_all_config + + +def initialize_distributed_wrapper(_initialize_distributed): + @wraps(_initialize_distributed) + def wrapper(): + args = get_args() + temp_world_size = args.world_size + true_world_size = 0 + for cfg in get_all_config().values(): + true_world_size += cfg.world_size + args.world_size = true_world_size + + _initialize_distributed() + + args.world_size = temp_world_size + + return wrapper diff --git a/model/train/yoco_moe/mindspeed/op_builder/__init__.py b/model/train/yoco_moe/mindspeed/op_builder/__init__.py new file mode 100644 index 000000000..34803beda --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/__init__.py @@ -0,0 +1,27 @@ +from .fusion_attention_v2_builder import FusionAttentionV2OpBuilder +from .algorithm_builder import AlgorithmOpBuilder +from .memory_fragmentation_builder import MemoryFragmentationBuilder +from .adaptive_recomputing_builder import AdaptiveRecomputingPluggableAllocatorBuilder +from .npu_dropout_add_layer_norm_builder import NpuDropoutAddLayerNormOpBuilder +from .atb_builder import AtbOpBuilder +from .swiglu_builder import SwigluOpBuilder +from .lcal_builder import LcalOpBuilder +from .rms_norm_builder import RmsNormOpBuilder +from .npu_grouped_mat_mul_all_reduce_builder import GroupedMatMulAllReduceOpBuilder +from .gmm_builder import GMMOpBuilder, GMMV2OpBuilder +from .quant_gmm_builder import QuantGMMOpBuilder +from .weight_quant_gmm_builder import WeightQuantGMMOpBuilder +from .ffn_builder import FFNOpBuilder +from .npu_mm_all_reduce_add_rms_norm_builder import MatmulAllReduceAddRmsNormOpBuilder +from .npu_inplace_mm_all_reduce_add_rms_norm_builder import InplaceMatmulAllReduceAddRmsNormOpBuilder +from .npu_rotary_position_embedding_builder import RotaryPositionEmbeddingOpBuilder +from .npu_moe_token_permute_builder import MoeTokenPermuteOpBuilder +from .npu_moe_token_unpermute_builder import MoeTokenUnpermuteOpBuilder +from .npu_ring_attention_update_builder import RingAttentionUpdateOpBuilder +from .npu_bmm_reduce_scatter_all_to_all_builder import BatchMatMulReduceScatterAlltoAllOpBuilder +from .npu_all_to_all_all_gather_bmm_builder import AllToAllAllGatherBatchMatMulOpBuilder +from .adaptive_cp_builder import AdaptiveCpOpBuilder +from .matmul_add_builder import MatmulAddOpBuilder +from .groupmatmul_add_builder import GroupMatmulAddOpBuilder +from .fused_ema_adamw_builder import FusedEmaAdamWOpBuilder +from .smart_swap_builder import SmartSwapBuilder diff --git a/model/train/yoco_moe/mindspeed/op_builder/adaptive_cp_builder.py b/model/train/yoco_moe/mindspeed/op_builder/adaptive_cp_builder.py new file mode 100644 index 000000000..335d34ad7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/adaptive_cp_builder.py @@ -0,0 +1,11 @@ +from mindspeed.op_builder.builder import MindSpeedOpBuilder + + +class AdaptiveCpOpBuilder(MindSpeedOpBuilder): + OP_NAME = "adaptive_cp" + + def __init__(self): + super(AdaptiveCpOpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/algorithm/adaptive_cp/adaptive_cp.cpp'] diff --git a/model/train/yoco_moe/mindspeed/op_builder/adaptive_recomputing_builder.py b/model/train/yoco_moe/mindspeed/op_builder/adaptive_recomputing_builder.py new file mode 100644 index 000000000..4f736af84 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/adaptive_recomputing_builder.py @@ -0,0 +1,37 @@ +import os + +from mindspeed.op_builder.builder import MindSpeedOpBuilder + + +class AdaptiveRecomputingPluggableAllocatorBuilder(MindSpeedOpBuilder): + NAME = "AdaptiveRecomputing" + _torch_path = None + + def __init__(self): + import torch + self._torch_path = os.path.dirname(os.path.abspath(torch.__file__)) + super(AdaptiveRecomputingPluggableAllocatorBuilder, self).__init__(self.NAME) + + def include_paths(self): + paths = super().include_paths() + paths += [ + os.path.join(self._torch_path, 'include'), + os.path.join(self._torch_path, 'include/torch/csrc/api/include'), + os.path.join(self._torch_npu_path, 'include/third_party/acl/inc/acl/'), + ] + return paths + + def sources(self): + return ['ops/csrc/pluggable_allocator/adaptive_recomputing/NpuCachingCustomAllocator.cpp'] + + def cxx_args(self): + args = ['-fstack-protector-all', '-Wl,-z,relro,-z,now,-z,noexecstack', '-fPIC', '-pie', + '-s', '-D_FORTIFY_SOURCE=2', '-O2', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'"] + return args + + def extra_ldflags(self): + flags = [ + '-L' + os.path.join(self._torch_npu_path, 'lib'), '-ltorch_npu' + ] + return flags diff --git a/model/train/yoco_moe/mindspeed/op_builder/algorithm_builder.py b/model/train/yoco_moe/mindspeed/op_builder/algorithm_builder.py new file mode 100644 index 000000000..3e6a54374 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/algorithm_builder.py @@ -0,0 +1,11 @@ +from mindspeed.op_builder.builder import MindSpeedOpBuilder + + +class AlgorithmOpBuilder(MindSpeedOpBuilder): + OP_NAME = "algorithm" + + def __init__(self): + super(AlgorithmOpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/algorithm/algorithm.cpp'] diff --git a/model/train/yoco_moe/mindspeed/op_builder/atb_builder.py b/model/train/yoco_moe/mindspeed/op_builder/atb_builder.py new file mode 100644 index 000000000..1f0750bfc --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/atb_builder.py @@ -0,0 +1,49 @@ +import os +import site +from torch.utils.cpp_extension import load +from mindspeed.op_builder.builder import MindSpeedOpBuilder + + +class AtbOpBuilder(MindSpeedOpBuilder): + def __init__(self, name): + super(AtbOpBuilder, self).__init__(name) + + def get_atb_path(self): + if os.getenv('ATB_HOME_PATH') is not None: + return os.environ['ATB_HOME_PATH'] # 'ATB_HOME_PATH' refers to home path of Ascend Transformer Boost lib, which is used for ops loading. + else: + raise RuntimeError("please install cann-nnal package first") + + def include_paths(self): + paths = super().include_paths() + paths.append(os.path.join(self._torch_npu_path, 'include', 'third_party', 'acl', 'inc')) + paths.append('ops/csrc/atb/extensions/inc') + paths.append(os.path.join(self.get_atb_path(), 'include')) + return paths + + def cxx_args(self): + args = super().cxx_args() + args += [ + '-Wno-sign-compare', + '-Wno-deprecated-declarations', + '-Wno-return-type', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'", + '-D ENABLE_ATB', + '-fstack-protector-all', + '-Wl,-z,relro,-z,now,-z,noexecstack', + '-fPIC', '-pie', '-s' + ] + return args + + def extra_ldflags(self): + flags = ['-L' + os.path.join(self._torch_npu_path, 'lib'), '-ltorch_npu', + '-L' + os.path.join(self.get_atb_path(), 'lib'), '-lasdops', + '-L' + os.path.join(self.get_atb_path(), 'lib'), '-llcal', + '-L' + os.path.join(self.get_atb_path(), 'lib'), '-latb'] + return flags + + def load(self, verbose=True): + if os.getenv('ASDOPS_HOME_PATH') is None or os.getenv('ASDOPS_OPS_PATH') is None: + os.environ['ASDOPS_HOME_PATH'] = self.get_atb_path() # 'ASDOPS_HOME_PATH' refers to home path of Ascend ops, which is used for ops loading. + os.environ['ASDOPS_OPS_PATH'] = os.path.join(self.get_atb_path(), 'ops') # 'ASDOPS_OPS_PATH' refers to ops path of Ascend ops, which is used for ops loading. + return super().load() \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/op_builder/builder.py b/model/train/yoco_moe/mindspeed/op_builder/builder.py new file mode 100644 index 000000000..8cacac2a3 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/builder.py @@ -0,0 +1,77 @@ +import re +import os +from abc import ABC, abstractmethod +from typing import List, Union +from torch.utils.cpp_extension import load +from torch.library import Library +import torch_npu +import mindspeed + +ASCEND_HOME_PATH = "ASCEND_HOME_PATH" +AS_LIBRARY = Library("mindspeed", "DEF") + + +class MindSpeedOpBuilder(ABC): + _cann_path = None + _torch_npu_path = None + _cann_version = None + _loaded_ops = {} + + def __init__(self, name): + self.name = name + self._cann_path = self.get_cann_path() + self._torch_npu_path = os.path.dirname(os.path.abspath(torch_npu.__file__)) + + def get_cann_path(self): + if ASCEND_HOME_PATH in os.environ and os.path.exists(os.environ[ASCEND_HOME_PATH]): + return os.environ[ASCEND_HOME_PATH] + return None + + def get_absolute_paths(self, paths): + mindspeed_path = os.path.abspath(os.path.dirname(mindspeed.__file__)) + return [os.path.join(mindspeed_path, path) for path in paths] + + def register_op_proto(self, op_proto: Union[str, List[str]]): + if isinstance(op_proto, str): + op_proto = [op_proto] + for proto in op_proto: + AS_LIBRARY.define(proto) + + @abstractmethod + def sources(self): + ... + + def include_paths(self): + paths = [ + os.path.join(self._torch_npu_path, 'include'), + os.path.join(self._torch_npu_path, 'include/third_party/hccl/inc'), + os.path.join(self._torch_npu_path, 'include/third_party/acl/inc'), + os.path.join(self._cann_path, 'include'), + ] + return paths + + def cxx_args(self): + args = ['-fstack-protector-all', '-Wl,-z,relro,-z,now,-z,noexecstack', '-fPIC', '-pie', + '-s', '-fvisibility=hidden', '-D_FORTIFY_SOURCE=2', '-O2'] + return args + + def extra_ldflags(self): + flags = [ + '-L' + os.path.join(self._cann_path, 'lib64'), '-lascendcl', + '-L' + os.path.join(self._torch_npu_path, 'lib'), '-ltorch_npu' + ] + return flags + + def load(self, verbose=True): + if self.name in __class__._loaded_ops: + return __class__._loaded_ops[self.name] + + op_module = load(name=self.name, + sources=self.get_absolute_paths(self.sources()), + extra_include_paths=self.get_absolute_paths(self.include_paths()), + extra_cflags=self.cxx_args(), + extra_ldflags=self.extra_ldflags(), + verbose=verbose) + __class__._loaded_ops[self.name] = op_module + + return op_module diff --git a/model/train/yoco_moe/mindspeed/op_builder/ffn_builder.py b/model/train/yoco_moe/mindspeed/op_builder/ffn_builder.py new file mode 100644 index 000000000..276308183 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/ffn_builder.py @@ -0,0 +1,373 @@ +# Copyright (c) 2024, Huawei Technologies. + + +from typing import ( + Any, + Callable, + ContextManager, + Iterator, + List, + Literal, + NamedTuple, + Optional, + Sequence, + Tuple, + TypeVar, + Union, + overload, +) + +from mindspeed.op_builder.builder import MindSpeedOpBuilder, AS_LIBRARY + +import torch +import torchair +from torch.library import Library, impl + +torch_npu_api_version = None +try: + from torchair import ge + from torchair.ge import Tensor, TensorSpec, DataType + from torchair import register_fx_node_ge_converter +except ImportError: + torch_npu_api_version = 1 + from torchair.ge_concrete_graph import ge_apis as ge + from torchair.ge_concrete_graph.fx2ge_converter import register_fx_node_ge_converter + from torchair.ge_concrete_graph.ge_graph import Tensor, TensorSpec, DataType + from torchair.ge_concrete_graph.ge_graph import get_default_ge_graph, next_unique_name + from torchair.ge_concrete_graph.ge_graph import compat_as_bytes + from torchair.ge_concrete_graph.ge_graph import get_invalid_desc +else: + torch_npu_api_version = 2 + + +class FFNOpBuilder(MindSpeedOpBuilder): + OP_NAME = "npu_ffn" + OP_PROTO = "npu_ffn(Tensor x, Tensor weight1, Tensor weight2, str activation, *, Tensor? expert_tokens=None, \ + Tensor? expert_tokens_index=None, Tensor? bias1=None, Tensor? bias2=None, Tensor? scale=None, \ + Tensor? offset=None, Tensor? deq_scale1=None, Tensor? deq_scale2=None, Tensor? antiquant_scale1=None, \ + Tensor? antiquant_scale2=None, Tensor? antiquant_offset1=None, Tensor? antiquant_offset2=None, \ + int? inner_precise=None, ScalarType? output_dtype=None) -> Tensor" + + def __init__(self): + super(FFNOpBuilder, self).__init__(self.OP_NAME) + self.register_op_proto(self.OP_PROTO) + self.register_op_ir() + + def sources(self): + return ['ops/csrc/cann/ffn.cpp'] + + def include_paths(self): + paths = super().include_paths() + paths += ['ops/csrc/cann/inc'] + return paths + + def cxx_args(self): + args = super().cxx_args() + args += [ + '-Wno-sign-compare', + '-Wno-deprecated-declarations', + '-Wno-return-type', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'" + ] + return args + + def register_op_ir(self): + @impl(AS_LIBRARY, "npu_ffn", "Meta") + def npu_ffn_forward(x, weight1, weight2, activation, *, expert_tokens=None, expert_tokens_index=None, + bias1=None, bias2=None, scale=None, offset=None, deq_scale1=None, deq_scale2=None, + antiquant_scale1=None, antiquant_scale2=None, antiquant_offset1=None, + antiquant_offset2=None, inner_precise=0, output_dtype=None): + dim_list = [] + for i in range(0, x.dim() - 1): + dim_list.append(x.size(i)) + dim_list.append(weight2.size(weight2.dim() - 1)) + if x.dtype == torch.int8: + if output_dtype is not None and output_dtype == torch.bfloat16: + return x.new_empty(tuple(dim_list), dtype=torch.bfloat16) + else: + return x.new_empty(tuple(dim_list), dtype=torch.float16) + else: + return x.new_empty(tuple(dim_list)) + + @register_fx_node_ge_converter(torch.ops.mindspeed.npu_ffn.default) + def convert_npu_ffn( + x: Tensor, + weight1: Tensor, + weight2: Tensor, + activation: str, + *, + expert_tokens: Optional[Tensor] = None, + expert_tokens_index: Optional[Tensor] = None, + bias1: Optional[Tensor] = None, + bias2: Optional[Tensor] = None, + scale: Optional[Tensor] = None, + offset: Optional[Tensor] = None, + deq_scale1: Optional[Tensor] = None, + deq_scale2: Optional[Tensor] = None, + antiquant_scale1: Optional[Tensor] = None, + antiquant_scale2: Optional[Tensor] = None, + antiquant_offset1: Optional[Tensor] = None, + antiquant_offset2: Optional[Tensor] = None, + inner_precise: Optional[int] = 0, + output_dtype: Optional[int] = None, + meta_outputs: TensorSpec = None + ): + '''"npu::npu_ffn(Tensor x, Tensor weight1, Tensor weight2, str activation, *, Tensor? expert_tokens=None, + Tensor? expert_tokens_index=None, Tensor? bias1=None, Tensor? bias2=None, Tensor? scale=None, + Tensor? offset=None, Tensor? deq_scale1=None, Tensor? deq_scale2=None, + Tensor? antiquant_scale1=None, Tensor? antiquant_scale2=None, Tensor? antiquant_offset1=None, + Tensor? antiquant_offset2=None, int? inner_precise=None, ScalarType? output_dtype=None) + -> Tensor + "''' + tokens_index_flag = False + if expert_tokens is not None and expert_tokens_index is not None: + raise ValueError("Cannot assign the value to expert_tokens and expert_tokens_index simultaneously!") + elif expert_tokens_index is not None: + tokens_index_flag = True + expert_tokens = expert_tokens_index + + y_dtype = -1 + if x.dtype == DataType.DT_INT8 and output_dtype is not None: + if output_dtype == torch.float16: + y_dtype = 0 + elif output_dtype == torch.bfloat16: + y_dtype = 1 + else: + raise NotImplementedError("In the quant scenario, output_dtype should be float16 or bfloat16," + "otherwise it should be None!") + + return FFN(x, weight1, weight2, expert_tokens=expert_tokens, bias1=bias1, bias2=bias2, scale=scale, + offset=offset, deq_scale1=deq_scale1, deq_scale2=deq_scale2, antiquant_scale1=antiquant_scale1, + antiquant_scale2=antiquant_scale2, antiquant_offset1=antiquant_offset1, + antiquant_offset2=antiquant_offset2, activation=activation, inner_precise=inner_precise, + output_dtype=y_dtype, tokens_index_flag=tokens_index_flag) + + +FFN = None +if torch_npu_api_version == 2: + def FFNV2(x: Tensor, + weight1: Tensor, + weight2: Tensor, + expert_tokens: Optional[Tensor], + bias1: Optional[Tensor], + bias2: Optional[Tensor], + scale: Optional[Tensor], + offset: Optional[Tensor], + deq_scale1: Optional[Tensor], + deq_scale2: Optional[Tensor], + antiquant_scale1: Optional[Tensor], + antiquant_scale2: Optional[Tensor], + antiquant_offset1: Optional[Tensor], + antiquant_offset2: Optional[Tensor], + *, + activation: str, + inner_precise: int = 0, + output_dtype: int = -1, + tokens_index_flag: bool = False): + """REG_OP(FFN)\n + .INPUT(x, TensorType({DT_INT8, DT_FLOAT16, DT_BF16}))\n + .INPUT(weight1, TensorType({DT_INT8, DT_FLOAT16, DT_BF16, DT_INT4}))\n + .INPUT(weight2, TensorType({DT_INT8, DT_FLOAT16, DT_BF16, DT_INT4}))\n + .OPTIONAL_INPUT(expert_tokens, TensorType({DT_INT64}))\n + .OPTIONAL_INPUT(bias1, TensorType({DT_INT32, DT_FLOAT16, DT_FLOAT}))\n + .OPTIONAL_INPUT(bias2, TensorType({DT_INT32, DT_FLOAT16, DT_FLOAT}))\n + .OPTIONAL_INPUT(scale, TensorType({DT_FLOAT}))\n + .OPTIONAL_INPUT(offset, TensorType({DT_FLOAT}))\n + .OPTIONAL_INPUT(deq_scale1, TensorType({DT_UINT64, DT_BF16}))\n + .OPTIONAL_INPUT(deq_scale2, TensorType({DT_UINT64, DT_BF16}))\n + .OPTIONAL_INPUT(antiquant_scale1, TensorType({DT_FLOAT16, DT_BF16}))\n + .OPTIONAL_INPUT(antiquant_scale2, TensorType({DT_FLOAT16, DT_BF16}))\n + .OPTIONAL_INPUT(antiquant_offset1, TensorType({DT_FLOAT16, DT_BF16}))\n + .OPTIONAL_INPUT(antiquant_offset2, TensorType({DT_FLOAT16, DT_BF16}))\n + .OUTPUT(y, TensorType({DT_FLOAT16, DT_BF16}))\n + .REQUIRED_ATTR(activation, String)\n + .ATTR(inner_precise, Int, 0)\n + .ATTR(output_dtype, Int, -1)\n + .ATTR(tokens_index_flag, Bool, false)\n + """ + + y = torchair.ge.custom_op("FFN", + inputs={ + "x": x, + "weight1": weight1, + "weight2": weight2, + "expert_tokens": expert_tokens, + "bias1": bias1, + "bias2": bias2, + "scale": scale, + "offset": offset, + "deq_scale1": deq_scale1, + "deq_scale2": deq_scale2, + "antiquant_scale1": antiquant_scale1, + "antiquant_scale2": antiquant_scale2, + "antiquant_offset1": antiquant_offset1, + "antiquant_offset2": antiquant_offset2 + }, + attrs={ + "activation": ge.attr.Str(activation), + "inner_precise": ge.attr.Int(inner_precise), + "output_dtype": ge.attr.Int(output_dtype), + "tokens_index_flag": ge.attr.Bool(tokens_index_flag) + }, + outputs=[ + "y" + ]) + + return y + FFN = FFNV2 +elif torch_npu_api_version == 1: + def FFNV1(x: Tensor, + weight1: Tensor, + weight2: Tensor, + expert_tokens: Optional[Tensor], + bias1: Optional[Tensor], + bias2: Optional[Tensor], + scale: Optional[Tensor], + offset: Optional[Tensor], + deq_scale1: Optional[Tensor], + deq_scale2: Optional[Tensor], + antiquant_scale1: Optional[Tensor], + antiquant_scale2: Optional[Tensor], + antiquant_offset1: Optional[Tensor], + antiquant_offset2: Optional[Tensor], + *, + activation: str, + inner_precise: int = 0, + output_dtype: int = -1, + tokens_index_flag: bool = False, + dependencies=[], + node_name=None): + """REG_OP(FFN)\n + .INPUT(x, TensorType({DT_INT8, DT_FLOAT16, DT_BF16}))\n + .INPUT(weight1, TensorType({DT_INT8, DT_FLOAT16, DT_BF16, DT_INT4}))\n + .INPUT(weight2, TensorType({DT_INT8, DT_FLOAT16, DT_BF16, DT_INT4}))\n + .OPTIONAL_INPUT(expert_tokens, TensorType({DT_INT64}))\n + .OPTIONAL_INPUT(bias1, TensorType({DT_INT32, DT_FLOAT16, DT_FLOAT}))\n + .OPTIONAL_INPUT(bias2, TensorType({DT_INT32, DT_FLOAT16, DT_FLOAT}))\n + .OPTIONAL_INPUT(scale, TensorType({DT_FLOAT}))\n + .OPTIONAL_INPUT(offset, TensorType({DT_FLOAT}))\n + .OPTIONAL_INPUT(deq_scale1, TensorType({DT_UINT64, DT_BF16}))\n + .OPTIONAL_INPUT(deq_scale2, TensorType({DT_UINT64, DT_BF16}))\n + .OPTIONAL_INPUT(antiquant_scale1, TensorType({DT_FLOAT16, DT_BF16}))\n + .OPTIONAL_INPUT(antiquant_scale2, TensorType({DT_FLOAT16, DT_BF16}))\n + .OPTIONAL_INPUT(antiquant_offset1, TensorType({DT_FLOAT16, DT_BF16}))\n + .OPTIONAL_INPUT(antiquant_offset2, TensorType({DT_FLOAT16, DT_BF16}))\n + .OUTPUT(y, TensorType({DT_FLOAT16, DT_BF16}))\n + .REQUIRED_ATTR(activation, String)\n + .ATTR(inner_precise, Int, 0)\n + .ATTR(output_dtype, Int, -1)\n + .ATTR(tokens_index_flag, Bool, false)\n + """ + + op = get_default_ge_graph().op.add() + op.type = "FFN" + op.name = next_unique_name(node_name, "FFN") + + # process dependices + for dependency in dependencies: + op.input.append(dependency.controller) + + # process inputs + op.input.append(x.tensor) + op.input_desc.add().CopyFrom(x.desc) + op.input_desc[-1].name = "x" + op.input.append(weight1.tensor) + op.input_desc.add().CopyFrom(weight1.desc) + op.input_desc[-1].name = "weight1" + op.input.append(weight2.tensor) + op.input_desc.add().CopyFrom(weight2.desc) + op.input_desc[-1].name = "weight2" + if expert_tokens is not None: + op.input.append(expert_tokens.tensor) + op.input_desc.add().CopyFrom(expert_tokens.desc) + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "expert_tokens" + if bias1 is not None: + op.input.append(bias1.tensor) + op.input_desc.add().CopyFrom(bias1.desc) + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "bias1" + if bias2 is not None: + op.input.append(bias2.tensor) + op.input_desc.add().CopyFrom(bias2.desc) + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "bias2" + if scale is not None: + op.input.append(scale.tensor) + op.input_desc.add().CopyFrom(scale.desc) + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "scale" + if offset is not None: + op.input.append(offset.tensor) + op.input_desc.add().CopyFrom(offset.desc) + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "offset" + if deq_scale1 is not None: + op.input.append(deq_scale1.tensor) + op.input_desc.add().CopyFrom(deq_scale1.desc) + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "deq_scale1" + if deq_scale2 is not None: + op.input.append(deq_scale2.tensor) + op.input_desc.add().CopyFrom(deq_scale2.desc) + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "deq_scale2" + if antiquant_scale1 is not None: + op.input.append(antiquant_scale1.tensor) + op.input_desc.add().CopyFrom(antiquant_scale1.desc) + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "antiquant_scale1" + if antiquant_scale2 is not None: + op.input.append(antiquant_scale2.tensor) + op.input_desc.add().CopyFrom(antiquant_scale2.desc) + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "antiquant_scale2" + if antiquant_offset1 is not None: + op.input.append(antiquant_offset1.tensor) + op.input_desc.add().CopyFrom(antiquant_offset1.desc) + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "antiquant_offset1" + if antiquant_offset2 is not None: + op.input.append(antiquant_offset2.tensor) + op.input_desc.add().CopyFrom(antiquant_offset2.desc) + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "antiquant_offset2" + + # process attrs + op.attr["activation"].s = compat_as_bytes(activation) + op.attr["inner_precise"].i = inner_precise + op.attr["output_dtype"].i = output_dtype + op.attr["tokens_index_flag"].b = tokens_index_flag + + # process outputs + output_index = 0 + op.output_desc.add().name = "y" + y = Tensor(op, output_index) + output_index += 1 + + return y + FFN = FFNV1 +else: + raise ValueError("touch_npu_api_version unsupport!") diff --git a/model/train/yoco_moe/mindspeed/op_builder/fused_ema_adamw_builder.py b/model/train/yoco_moe/mindspeed/op_builder/fused_ema_adamw_builder.py new file mode 100644 index 000000000..25ac3b892 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/fused_ema_adamw_builder.py @@ -0,0 +1,31 @@ +import os +import torch +from mindspeed.op_builder.builder import MindSpeedOpBuilder + + +class FusedEmaAdamWOpBuilder(MindSpeedOpBuilder): + OP_NAME = "npu_apply_fused_ema_adamw" + _torch_path = None + + def __init__(self): + from sysconfig import get_paths + self._torch_path = os.path.dirname(os.path.abspath(torch.__file__)) + super(FusedEmaAdamWOpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/cann/npu_apply_fused_ema_adamw.cpp'] + + def include_paths(self): + paths = super().include_paths() + paths += ['ops/csrc/cann/inc', + os.path.join(self._torch_path, 'include'), + os.path.join(self._torch_path, 'include/torch/csrc/api/include'), + os.path.join(self._torch_npu_path, 'include/torch_npu/csrc/framework/utils'), + os.path.join(self._torch_npu_path, 'include/torch_npu/csrc/aten'), + ] + return paths + + def cxx_args(self): + args = super().cxx_args() + args += ['-Wno-narrowing'] + return args \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/op_builder/fusion_attention_v2_builder.py b/model/train/yoco_moe/mindspeed/op_builder/fusion_attention_v2_builder.py new file mode 100644 index 000000000..d6cf99c72 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/fusion_attention_v2_builder.py @@ -0,0 +1,41 @@ +# Copyright (c) 2024, Huawei Technologies. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from mindspeed.op_builder.builder import MindSpeedOpBuilder + + +class FusionAttentionV2OpBuilder(MindSpeedOpBuilder): + OP_NAME = "fusion_attention_v2" + + def __init__(self): + super(FusionAttentionV2OpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/cann/fusion_attention_v2.cpp', 'ops/csrc/flop_counter/flop_counter.cpp'] + + def include_paths(self): + paths = super().include_paths() + paths += ['ops/csrc/cann/inc'] + return paths + + def cxx_args(self): + args = super().cxx_args() + args += [ + '-Wno-sign-compare', + '-Wno-deprecated-declarations', + '-Wno-return-type', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'" + ] + return args diff --git a/model/train/yoco_moe/mindspeed/op_builder/gmm_builder.py b/model/train/yoco_moe/mindspeed/op_builder/gmm_builder.py new file mode 100644 index 000000000..b53b9e7d7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/gmm_builder.py @@ -0,0 +1,352 @@ +from typing import List, Optional +from collections import namedtuple +import torch +import torchair +from torch.library import impl + +from mindspeed.op_builder.builder import MindSpeedOpBuilder, AS_LIBRARY + +torch_npu_api_version = None +try: + from torchair import ge + from torchair.ge import Tensor, TensorSpec, DataType + from torchair import register_fx_node_ge_converter +except ImportError: + torch_npu_api_version = 1 + from torchair.ge_concrete_graph import ge_apis as ge + from torchair.ge_concrete_graph.fx2ge_converter import register_fx_node_ge_converter + from torchair.ge_concrete_graph.ge_graph import Tensor, TensorSpec, DataType + from torchair.ge_concrete_graph.ge_graph import get_default_ge_graph, next_unique_name + from torchair.ge_concrete_graph.ge_graph import compat_as_bytes + from torchair.ge_concrete_graph.ge_graph import get_invalid_desc +else: + torch_npu_api_version = 2 + +if torch_npu_api_version == 2: + def fill_empty_tensor(dtype): + return Fill(ge.Const(0), ge.Cast(0., dst_type=dtype)) +else: + def fill_empty_tensor(dtype): + return ge.Fill([0], ge.Cast(0., dst_type=dtype)) + + +gmm_param = namedtuple('gmm_param', ['bias', 'scale', 'offset', 'antiquant_scale', 'antiquant_offset']) + + +def conveter_npu_gmm_param( + x: Tensor, + bias: Tensor, + group_type: int +): + if group_type == 2: + raise ValueError(f"GMM: graph mode does not support group_type 2!") + x_dtype = x.dtype + if bias is None: + if x_dtype == DataType.DT_BF16: + bias = fill_empty_tensor(DataType.DT_FLOAT) + elif x_dtype == DataType.DT_UINT8: + bias = fill_empty_tensor(DataType.DT_INT32) + else: + bias = fill_empty_tensor(x_dtype) + scale = [fill_empty_tensor(DataType.DT_UINT64)] + offset = [fill_empty_tensor(DataType.DT_FLOAT)] + antiquant_scale = [fill_empty_tensor(DataType.DT_FLOAT16)] + antiquant_offset = [fill_empty_tensor(DataType.DT_FLOAT16)] + if x_dtype == DataType.DT_BF16: + antiquant_scale = [fill_empty_tensor(DataType.DT_BF16)] + antiquant_offset = [fill_empty_tensor(DataType.DT_BF16)] + return gmm_param(bias, scale, offset, antiquant_scale, antiquant_offset) + + +class GMMOpBuilderPublic(MindSpeedOpBuilder): + TORCH_MAJOR, TORCH_MINOR = map(int, torch.__version__.split('.')[:2]) + + def sources(self): + return ['ops/csrc/cann/gmm.cpp', 'ops/csrc/flop_counter/flop_counter.cpp'] + + def include_paths(self): + paths = super().include_paths() + paths += ['ops/csrc/cann/inc'] + return paths + + def cxx_args(self): + args = super().cxx_args() + args += [ + '-Wno-sign-compare', + '-Wno-deprecated-declarations', + '-Wno-return-type', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'" + ] + if self.TORCH_MAJOR >= 2 and self.TORCH_MINOR >= 1: + cpp_std = " -std=c++17" + else: + cpp_std = " -std=c++14" + args.append(cpp_std) + return args + + +class GMMOpBuilder(GMMOpBuilderPublic): + OP_NAME = "grouped_matmul" + OP_PROTO = ( + "npu_gmm.Tensor(Tensor original_weight, Tensor x, Tensor weight, *, Tensor? bias=None, Tensor? group_list=None, int? group_type=0, bool? gemm_fusion=False) -> Tensor", + "npu_gmm.List(Tensor original_weight, Tensor x, Tensor weight, *, Tensor? bias=None, int[]? group_list=None, int? group_type=0, bool? gemm_fusion=False) -> Tensor" + ) + + def __init__(self): + super(GMMOpBuilder, self).__init__(self.OP_NAME) + self.register_op_proto(self.OP_PROTO) + self.register_op_ir() + + def register_op_ir(self): + @impl(AS_LIBRARY, "npu_gmm.Tensor", "Meta") + def npu_gmm_forward(original_weight, x, weight, *, bias=None, group_list=None, group_type=0, gemm_fusion=False): + BM = x.shape[0] + N = weight.shape[-1] + y = x.new_empty((BM, N), dtype=x.dtype) + return y + + @register_fx_node_ge_converter(torch.ops.mindspeed.npu_gmm.Tensor) + def conveter_npu_gmm( + original_weight: Tensor, + x: Tensor, + weight: Tensor, + *, + bias: Optional[Tensor] = None, + group_list: Optional[Tensor] = None, + group_type: Optional[int] = 0, + gemm_fusion: Optional[bool] = False, + meta_outputs: TensorSpec = None, + ): + """npu_gmm(Tensor x, Tensor weight, *, Tensor? bias=None, Tensor? group_list=None, int? group_type=0) -> Tensor + """ + result = conveter_npu_gmm_param(x, bias, group_type) + + return GroupedMatmul([x], [weight], [result.bias], result.scale, result.offset, result.antiquant_scale, + result.antiquant_offset, group_list, split_item=3, group_type=group_type, + dtype=-1, transpose_weight=False, group_list_type=0)[0] + + +class GMMV2OpBuilder(GMMOpBuilderPublic): + OP_NAME = "grouped_matmul_v2" + OP_PROTO = ( + "npu_gmm_v2.Tensor(Tensor original_weight, Tensor x, Tensor weight, *, Tensor? bias=None, Tensor? group_list=None, int? group_type=0, bool? gemm_fusion=False) -> Tensor" + ) + + def __init__(self): + super(GMMV2OpBuilder, self).__init__(self.OP_NAME) + self.register_op_proto(self.OP_PROTO) + self.register_op_ir() + + def register_op_ir(self): + @impl(AS_LIBRARY, "npu_gmm_v2.Tensor", "Meta") + def npu_gmm_v2_forward(original_weight, x, weight, *, bias=None, group_list=None, group_type=0, gemm_fusion=False): + BM = x.shape[0] + N = weight.shape[-1] + y = x.new_empty((BM, N), dtype=x.dtype) + return y + + @register_fx_node_ge_converter(torch.ops.mindspeed.npu_gmm_v2.Tensor) + def conveter_npu_gmm_v2( + original_weight: Tensor, + x: Tensor, + weight: Tensor, + *, + bias: Optional[Tensor] = None, + group_list: Optional[Tensor] = None, + group_type: Optional[int] = 0, + gemm_fusion: Optional[bool] = False, + meta_outputs: TensorSpec = None, + ): + """npu_gmm_v2(Tensor x, Tensor weight, *, Tensor? bias=None, Tensor? group_list=None, int? group_type=0) -> Tensor + """ + result = conveter_npu_gmm_param(x, bias, group_type) + + return GroupedMatmul([x], [weight], [result.bias], result.scale, result.offset, result.antiquant_scale, + result.antiquant_offset, group_list, split_item=3, group_type=group_type, + dtype=-1, transpose_weight=False, group_list_type=1)[0] + +if torch_npu_api_version == 2: + def Fill(dims: Tensor, value: Tensor): + """REG_OP(Fill)\n + .INPUT(dims, TensorType::IndexNumberType())\n + .INPUT(value, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16, DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8, DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16, DT_UINT16, DT_COMPLEX128, DT_FLOAT16, DT_BF16, DT_UINT32, DT_UINT64, DT_STRING}))\n + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16, DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8, DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16, DT_UINT16, DT_COMPLEX128, DT_FLOAT16, DT_BF16, DT_UINT32, DT_UINT64, DT_STRING}))\n + """ + + y = torchair.ge.custom_op("Fill", + inputs={ + "dims":dims, + "value":value + }, + outputs=["y"] + ) + + return y + +GroupedMatmul = None +if torch_npu_api_version == 2: + def GroupedMatmulV2(x: List[Tensor], weight: List[Tensor], bias: List[Tensor], scale: List[Tensor], + offset: List[Tensor], antiquant_scale: List[Tensor], antiquant_offset: List[Tensor], + group_list: Optional[Tensor] = None, per_token_scale: Optional[Tensor] = None, *, + split_item: int = 0, dtype: int = 0, transpose_weight: bool = False, transpose_x: bool = False, + group_type: int = -1, group_list_type: int = 0, act_type: int = 0): + """REG_OP(GroupedMatmul)\n + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_BF16, DT_INT8}))\n + .DYNAMIC_INPUT(weight, TensorType({DT_FLOAT16, DT_BF16, DT_INT8}))\n + .DYNAMIC_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))\n + .DYNAMIC_INPUT(scale, TensorType({DT_UINT64}))\n + .DYNAMIC_INPUT(offset, TensorType({DT_FLOAT32}))\n + .DYNAMIC_INPUT(antiquant_scale, TensorType({DT_FLOAT16, DT_BF16}))\n + .DYNAMIC_INPUT(antiquant_offset, TensorType({DT_FLOAT16, DT_BF16}))\n + .OPTIONAL_INPUT(group_list, TensorType({DT_INT64}))\n + .OPTIONAL_INPUT(per_token_scale, TensorType({DT_FLOAT}))\n + .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT16, DT_BF16, DT_INT8, DT_FLOAT}))\n + .ATTR(split_item, Int, 0)\n + .ATTR(dtype, Int, 0)\n + .ATTR(transpose_weight, Bool, false)\n + .ATTR(transpose_x, Bool, false)\n + .ATTR(group_type, Int, -1)\n + .ATTR(group_list_type, Int, 0)\n + .ATTR(act_type, Int, 0)\n + """ + + y = torchair.ge.custom_op("GroupedMatmul", + inputs={ + "x":x, + "weight":weight, + "bias":bias, + "scale":scale, + "offset":offset, + "antiquant_scale":antiquant_scale, + "antiquant_offset":antiquant_offset, + "group_list":group_list, + "per_token_scale": per_token_scale + }, + attrs={ + "split_item":ge.attr.Int(split_item), + "dtype":ge.attr.Int(dtype), + "transpose_weight":ge.attr.Bool(transpose_weight), + "transpose_x":ge.attr.Bool(transpose_x), + "group_type":ge.attr.Int(group_type), + "group_list_type":ge.attr.Int(group_list_type), + "act_type":ge.attr.Int(act_type) + }, + outputs=[("y", 1)] + ) + + return y + GroupedMatmul = GroupedMatmulV2 +elif torch_npu_api_version == 1: + def GroupedMatmulV1(x: List[Tensor], weight: List[Tensor], bias: List[Tensor], scale: List[Tensor], + offset: List[Tensor], antiquant_scale: List[Tensor], antiquant_offset: List[Tensor], + group_list: Optional[Tensor] = None, per_token_scale: Optional[Tensor] = None, *, + split_item: int = 0, dtype: int = 0, transpose_weight: bool = False, transpose_x: bool = False, + group_type: int = -1, group_list_type: int = 0, act_type: int = 0, + dependencies=None, node_name=None): + """REG_OP(GroupedMatmul)\n + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_BF16, DT_INT8}))\n + .DYNAMIC_INPUT(weight, TensorType({DT_FLOAT16, DT_BF16, DT_INT8}))\n + .DYNAMIC_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))\n + .DYNAMIC_INPUT(scale, TensorType({DT_UINT64}))\n + .DYNAMIC_INPUT(offset, TensorType({DT_FLOAT32}))\n + .DYNAMIC_INPUT(antiquant_scale, TensorType({DT_FLOAT16, DT_BF16}))\n + .DYNAMIC_INPUT(antiquant_offset, TensorType({DT_FLOAT16, DT_BF16}))\n + .OPTIONAL_INPUT(group_list, TensorType({DT_INT64}))\n + .OPTIONAL_INPUT(per_token_scale, TensorType({DT_FLOAT}))\n + .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT16, DT_BF16, DT_INT8, DT_FLOAT}))\n + .ATTR(split_item, Int, 0)\n + .ATTR(dtype, Int, 0)\n + .ATTR(transpose_weight, Bool, false)\n + .ATTR(transpose_x, Bool, false)\n + .ATTR(group_type, Int, -1)\n + .ATTR(group_list_type, Int, 0)\n + .ATTR(act_type, Int, 0)\n + """ + + op = get_default_ge_graph().op.add() + op.type = "GroupedMatmul" + op.name = next_unique_name(node_name, "GroupedMatmul") + + # process dependices + if dependencies is not None: + for dependency in dependencies: + op.input.append(dependency.controller) + + # process inputs + if not isinstance(x, (tuple, list)): + raise AssertionError + for i, v in enumerate(x): + op.input.append(v.tensor) + op.input_desc.add().CopyFrom(v.desc) + op.input_desc[-1].name = "x" + str(i) + if not isinstance(weight, (tuple, list)): + raise AssertionError("weight must be a tuple or a list.") + for i, v in enumerate(weight): + op.input.append(v.tensor) + op.input_desc.add().CopyFrom(v.desc) + op.input_desc[-1].name = "weight" + str(i) + if not isinstance(bias, (tuple, list)): + raise AssertionError("bias must be a tuple or a list.") + for i, v in enumerate(bias): + op.input.append(v.tensor) + op.input_desc.add().CopyFrom(v.desc) + op.input_desc[-1].name = "bias" + str(i) + if not isinstance(scale, (tuple, list)): + raise AssertionError("scale must be a tuple or a list.") + for i, v in enumerate(scale): + op.input.append(v.tensor) + op.input_desc.add().CopyFrom(v.desc) + op.input_desc[-1].name = "scale" + str(i) + if not isinstance(offset, (tuple, list)): + raise AssertionError("offset must be a tuple or a list.") + for i, v in enumerate(offset): + op.input.append(v.tensor) + op.input_desc.add().CopyFrom(v.desc) + op.input_desc[-1].name = "offset" + str(i) + if not isinstance(antiquant_scale, (tuple, list)): + raise AssertionError("antiquant_scale must be a tuple or a list.") + for i, v in enumerate(antiquant_scale): + op.input.append(v.tensor) + op.input_desc.add().CopyFrom(v.desc) + op.input_desc[-1].name = "antiquant_scale" + str(i) + if not isinstance(antiquant_offset, (tuple, list)): + raise AssertionError("antiquant_offset must be a tuple or a list.") + for i, v in enumerate(antiquant_offset): + op.input.append(v.tensor) + op.input_desc.add().CopyFrom(v.desc) + op.input_desc[-1].name = "antiquant_offset" + str(i) + if group_list is not None: + op.input.append(group_list.tensor) + op.input_desc.add().CopyFrom(group_list.desc) + op.input_desc[-1].name = "group_list" + else: + op.input.append("") + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "group_list" + if per_token_scale is not None: + op.input.append(per_token_scale.tensor) + op.input_desc.add().CopyFrom(per_token_scale.desc) + op.input_desc[-1].name = "per_token_scale" + else: + op.input.append("") + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "per_token_scale" + + # process attrs + op.attr["split_item"].i = split_item + op.attr["dtype"].i = dtype + op.attr["transpose_weight"].b = transpose_weight + op.attr["transpose_x"].b = transpose_x + op.attr["group_type"].i = group_type + op.attr["group_list_type"].i = group_list_type + op.attr["act_type"].i = act_type + + # process outputs + output_index = 0 + op.output_desc.add().name = "y" + str(output_index) + y = [Tensor(op, output_index)] + + return y + GroupedMatmul = GroupedMatmulV1 +else: + raise ValueError("touch_npu_api_version unsupport!") diff --git a/model/train/yoco_moe/mindspeed/op_builder/groupmatmul_add_builder.py b/model/train/yoco_moe/mindspeed/op_builder/groupmatmul_add_builder.py new file mode 100644 index 000000000..24dba7565 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/groupmatmul_add_builder.py @@ -0,0 +1,15 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +from mindspeed.op_builder.atb_builder import AtbOpBuilder + + +class GroupMatmulAddOpBuilder(AtbOpBuilder): + OP_NAME = "npu_groupmatmul_add_fp32" + + def __init__(self): + super(GroupMatmulAddOpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/atb/groupmatmul_add.cpp', + 'ops/csrc/atb/utils/atb_adapter.cpp', + 'ops/csrc/flop_counter/flop_counter.cpp'] + diff --git a/model/train/yoco_moe/mindspeed/op_builder/lcal_builder.py b/model/train/yoco_moe/mindspeed/op_builder/lcal_builder.py new file mode 100644 index 000000000..10acf0e53 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/lcal_builder.py @@ -0,0 +1,18 @@ +from mindspeed.op_builder.atb_builder import AtbOpBuilder + + +class LcalOpBuilder(AtbOpBuilder): + OP_NAME = "lcal" + + def __init__(self): + super(LcalOpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/atb/lcal_coc.cpp', + 'ops/csrc/atb/utils/atb_adapter.cpp', + 'ops/csrc/flop_counter/flop_counter.cpp'] + + def cxx_args(self): + args = super().cxx_args() + args.append(" -std=c++17") + return args \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/op_builder/matmul_add_builder.py b/model/train/yoco_moe/mindspeed/op_builder/matmul_add_builder.py new file mode 100644 index 000000000..1df752298 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/matmul_add_builder.py @@ -0,0 +1,15 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +from mindspeed.op_builder.atb_builder import AtbOpBuilder + + +class MatmulAddOpBuilder(AtbOpBuilder): + OP_NAME = "npu_matmul_add_fp32" + + def __init__(self): + super(MatmulAddOpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/atb/matmul_add.cpp', + 'ops/csrc/atb/utils/atb_adapter.cpp', + 'ops/csrc/flop_counter/flop_counter.cpp'] + \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/op_builder/memory_fragmentation_builder.py b/model/train/yoco_moe/mindspeed/op_builder/memory_fragmentation_builder.py new file mode 100644 index 000000000..17955efb1 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/memory_fragmentation_builder.py @@ -0,0 +1,47 @@ +import torch +import os + +from mindspeed.op_builder.builder import MindSpeedOpBuilder + + +class MemoryFragmentationBuilder(MindSpeedOpBuilder): + NAME = "MemoryFragmentation" + _torch_path = None + _python_path = None + + def __init__(self): + from sysconfig import get_paths + self._torch_path = os.path.dirname(os.path.abspath(torch.__file__)) + self._python_path = os.path.dirname(os.path.abspath(get_paths().get('include'))) + super(MemoryFragmentationBuilder, self).__init__(self.NAME) + + def include_paths(self): + paths = super().include_paths() + paths += [ + os.path.join(self._torch_path, 'include'), + os.path.join(self._torch_path, 'include/torch/csrc/api/include'), + os.path.join(self._torch_npu_path, 'include/third_party/acl/inc/acl/'), + os.path.join(self._python_path), + ] + return paths + + def sources(self): + return ['ops/csrc/pluggable_allocator/memory_fragmentation/EventPool.cpp', + 'ops/csrc/pluggable_allocator/memory_fragmentation/CachingAllocatorConfig.cpp', + 'ops/csrc/pluggable_allocator/memory_fragmentation/DeviceCachingAllocator.cpp', + 'ops/csrc/pluggable_allocator/memory_fragmentation/PluggableAllocator.cpp', + 'ops/csrc/pluggable_allocator/memory_fragmentation/PluggableAllocatorFunctions.cpp', + 'ops/csrc/pluggable_allocator/memory_fragmentation/Decorator.cpp', + 'ops/csrc/pluggable_allocator/memory_fragmentation/Recorder.cpp', + 'ops/csrc/pluggable_allocator/memory_fragmentation/common.cpp'] + + def cxx_args(self): + args = ['-fstack-protector-all', '-Wl,-z,relro,-z,now,-z,noexecstack', '-fPIC', '-pie', + '-s', '-D_FORTIFY_SOURCE=2', '-O2', "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'"] + return args + + def extra_ldflags(self): + flags = [ + '-L' + os.path.join(self._torch_npu_path, 'lib'), '-ltorch_npu' + ] + return flags \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/op_builder/npu_all_to_all_all_gather_bmm_builder.py b/model/train/yoco_moe/mindspeed/op_builder/npu_all_to_all_all_gather_bmm_builder.py new file mode 100644 index 000000000..f4ac84f26 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/npu_all_to_all_all_gather_bmm_builder.py @@ -0,0 +1,244 @@ +# Copyright (c) 2024, Huawei Technologies. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Optional +import torch +import torchair +from torch.library import Library, impl +from mindspeed.op_builder.builder import MindSpeedOpBuilder, AS_LIBRARY +torch_npu_api_version = None +try: + from torchair import ge + from torchair import register_fx_node_ge_converter + from torchair.ge import Tensor, TensorSpec, DataType +except ImportError: + ge, Tensor, TensorSpec, DataType = None, None, None, None + from torchair.ge_concrete_graph.fx2ge_converter import register_fx_node_ge_converter + torch_npu_api_version = 1 +else: + torch_npu_api_version = 2 + + +class AllToAllAllGatherBatchMatMulOpBuilder(MindSpeedOpBuilder): + OP_NAME = "npu_alltoall_allgather_bmm" + OP_PROTO = "npu_alltoall_allgather_bmm(Tensor x, Tensor weight, \ + str group_ep, int group_ep_worldsize, \ + str group_tp, int group_tp_worldsize, \ + *, Tensor? bias=None, int shard_type=0, int act_type=0, \ + bool need_allgather_out=False, \ + bool need_activation_feature=False) -> (Tensor, Tensor, Tensor)" + + def __init__(self): + super(AllToAllAllGatherBatchMatMulOpBuilder, self).__init__(self.OP_NAME) + self.register_op_proto(self.OP_PROTO) + self.register_op_ir() + + def sources(self): + return ['ops/csrc/cann/npu_all_to_all_all_gather_bmm.cpp'] + + def include_paths(self): + paths = super().include_paths() + paths += ['ops/csrc/cann/inc'] + return paths + + def cxx_args(self): + args = super().cxx_args() + args += [ + '-Wno-sign-compare', + '-Wno-deprecated-declarations', + '-Wno-return-type', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'" + ] + return args + + def register_op_ir(self): + @impl(AS_LIBRARY, "npu_alltoall_allgather_bmm", "Meta") + def npu_alltoall_allgather_bmm_forward(x, weight, + group_ep, group_ep_worldsize, group_tp, group_tp_worldsize, + *, bias=None, shard_type=0, act_type=0, + need_allgather_out=False, need_activation_feature=False): + batch = weight.size(0) + m = x.size(1) * group_ep_worldsize + if shard_type == 1: + m *= group_tp_worldsize + n = weight.size(2) + k = weight.size(1) + + if x.size(0) == 0: + raise AssertionError('The first dim of x can not be 0.') + if x.size(1) == 0: + raise AssertionError('The second dim of x can not be 0.') + if x.size(2) == 0: + raise AssertionError('The last dim of x can not be 0.') + if weight.size(0) == 0: + raise AssertionError('The first dim of weight can not be 0.') + if weight.size(1) == 0: + raise AssertionError('The second dim of weight can not be 0.') + if weight.size(2) == 0: + raise AssertionError('The last dim of weight can not be 0.') + + empty_tensor = x.new_empty((0)) + return (x.new_empty((batch, m, n)), + x.new_empty((batch, m, k)) if need_allgather_out else empty_tensor, + x.new_empty((batch, m, n)) if need_activation_feature else empty_tensor) + + @register_fx_node_ge_converter(torch.ops.mindspeed.npu_alltoall_allgather_bmm.default) + def convert_npu_alltoall_allgather_bmm( + x: Tensor, + weight: Tensor, + group_ep: str, + group_ep_worldsize: int, + group_tp: str, + group_tp_worldsize: int, + *, + bias: Optional[Tensor] = None, + shard_type: Optional[int] = 0, + act_type: Optional[int] = 0, + need_allgather_out: Optional[bool] = False, + need_activation_feature: Optional[bool] = False, + meta_outputs: List[TensorSpec] = None): + '''"npu_alltoall_allgather_bmm(Tensor x, Tensor weight, str group_ep, str group_tp, + int ep_world_size, int tp_world_size, *, Tensor? bias=None, int x_shard_type=0, int act_type=0, + bool need_allgather_out=False, bool need_activation_feature=False) -> (Tensor, Tensor, Tensor)"''' + if torch_npu_api_version != 2: + raise ValueError(f"torch_npu_api_version {torch_npu_api_version} unsupport") + CheckDtype(x, weight, bias) + return AllToAllAllGatherBatchMatmul(x, + weight, + group_ep, + group_ep_worldsize, + group_tp, + group_tp_worldsize, + bias=bias, + shard_type=shard_type, + act_type=act_type, + need_allgather_out=need_allgather_out, + need_activation_feature=need_activation_feature) + + +def CheckDtype(x: Tensor, weight: Tensor, bias: Optional[Tensor]): + if x.dtype != DataType.DT_BF16 and x.dtype != DataType.DT_FLOAT16: + raise AssertionError(f'type of x must be DT_FLOAT16/DT_BF16, but got {GeDtypeToStr(x.dtype)}.') + if weight.dtype != DataType.DT_BF16 and weight.dtype != DataType.DT_FLOAT16: + raise AssertionError(f'type of weight must be DT_FLOAT16/DT_BF16, but got {GeDtypeToStr(weight.dtype)}.') + if x.dtype != weight.dtype: + raise AssertionError(f'type of x and weight must be same, but got x {GeDtypeToStr(x.dtype)} '\ + f'weight {GeDtypeToStr(weight.dtype)}.') + if bias is not None: + if bias.dtype != DataType.DT_FLOAT16 and bias.dtype != DataType.DT_FLOAT: + raise AssertionError(f'type of bias must DT_FLOAT16/DT_FLOAT32, but got {GeDtypeToStr(bias.dtype)}.') + if x.dtype == DataType.DT_FLOAT16 and bias.dtype != DataType.DT_FLOAT16: + raise AssertionError(f'type of bias must DT_FLOAT16 when x is DT_FLOAT16, '\ + f'but got {GeDtypeToStr(bias.dtype)}.') + if x.dtype == DataType.DT_BF16 and bias.dtype != DataType.DT_FLOAT: + raise AssertionError(f'type of bias must DT_FLOAT32 when x is DT_BF16, '\ + f'but got {GeDtypeToStr(bias.dtype)}.') + + +def GeDtypeToStr(ge_dtype: DataType): + ge_datatype = { + DataType.DT_FLOAT: 'DT_FLOAT32', + DataType.DT_FLOAT16: 'DT_FLOAT16', + DataType.DT_INT8: 'DT_INT8', + DataType.DT_INT16: 'DT_INT16', + DataType.DT_UINT16: 'DT_UINT16', + DataType.DT_UINT8: 'DT_UINT8', + DataType.DT_INT32: 'DT_INT32', + DataType.DT_INT64: 'DT_INT64', + DataType.DT_UINT32: 'DT_UINT32', + DataType.DT_UINT64: 'DT_UINT64', + DataType.DT_BOOL: 'DT_BOOL', + DataType.DT_DOUBLE: 'DT_DOUBLE', + DataType.DT_STRING: 'DT_STRING', + DataType.DT_DUAL_SUB_INT8: 'DT_DUAL_SUB_INT8', + DataType.DT_DUAL_SUB_UINT8: 'DT_DUAL_SUB_UINT8', + DataType.DT_COMPLEX64: 'DT_COMPLEX64', + DataType.DT_COMPLEX128: 'DT_COMPLEX128', + DataType.DT_QINT8: 'DT_QINT8', + DataType.DT_QINT16: 'DT_QINT16', + DataType.DT_QINT32: 'DT_QINT32', + DataType.DT_QUINT8: 'DT_QUINT8', + DataType.DT_QUINT16: 'DT_QUINT16', + DataType.DT_RESOURCE: 'DT_RESOURCE', + DataType.DT_STRING_REF: 'DT_STRING_REF', + DataType.DT_DUAL: 'DT_DUAL', + DataType.DT_VARIANT: 'DT_VARIANT', + DataType.DT_BF16: 'DT_BF16', + DataType.DT_UNDEFINED: 'DT_UNDEFINED', + DataType.DT_INT4: 'DT_INT4', + DataType.DT_UINT1: 'DT_UINT1', + DataType.DT_INT2: 'DT_INT2', + DataType.DT_UINT2: 'DT_UINT2', + DataType.DT_COMPLEX32: 'DT_COMPLEX32', + DataType.DT_MAX: 'DT_MAX', + } + if ge_dtype in ge_datatype: + return ge_datatype[ge_dtype] + else: + return 'unknown' + + +def AllToAllAllGatherBatchMatmul( + x: Tensor, + weight: Tensor, + group_ep: str, + group_ep_worldsize: int, + group_tp: str, + group_tp_worldsize: int, + *, + bias: Optional[Tensor] = None, + shard_type: Optional[int] = 0, + act_type: Optional[int] = 0, + need_allgather_out: Optional[bool] = False, + need_activation_feature: Optional[bool] = False): + """REG_OP(AlltoAllAllGatherBatchMatMul)\n + .INPUT(x, TensorType({DT_FLOAT16, DT_BF16}))\n + .INPUT(weight, TensorType({DT_FLOAT16, DT_BF16}))\n + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT32}))\n + .OUTPUT(y1, TensorType({DT_FLOAT16, DT_BF16}))\n + .OUTPUT(y2, TensorType({DT_FLOAT16, DT_BF16}))\n + .OUTPUT(y3, TensorType({DT_FLOAT16, DT_BF16}))\n + .REQUIRED_ATTR(group_ep, String)\n + .REQUIRED_ATTR(group_tp, String)\n + .REQUIRED_ATTR(ep_world_size, int)\n + .REQUIRED_ATTR(tp_world_size, int)\n + .ATTR(x_shard_type, Int, 1)\n + .ATTR(act_type, Int, 0)\n + .ATTR(need_allgather_out, Bool, False)\n + .ATTR(need_activation_feature, Bool, False)\n + .OP_END_FACTORY_REG(AlltoAllAllGatherBatchMatMul) + use to construct Opdesc + """ + transpose_weight = False + return torchair.ge.custom_op( + "AlltoAllAllGatherBatchMatMul", + inputs={ + "x": x, + "weight": weight, + "bias": bias + }, + attrs={ + "group_ep": ge.attr.Str(group_ep), + "group_tp": ge.attr.Str(group_tp), + "ep_world_size": ge.attr.Int(group_ep_worldsize), + "tp_world_size": ge.attr.Int(group_tp_worldsize), + "x_shard_type": ge.attr.Int(shard_type), + "act_type": ge.attr.Int(act_type), + "transpose_weight": ge.attr.Bool(transpose_weight), + "output_y2_flag": ge.attr.Bool(need_allgather_out), + "output_y3_flag": ge.attr.Bool(need_activation_feature) + }, + outputs=["y1", "y2", "y3"] + ) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/op_builder/npu_bmm_reduce_scatter_all_to_all_builder.py b/model/train/yoco_moe/mindspeed/op_builder/npu_bmm_reduce_scatter_all_to_all_builder.py new file mode 100644 index 000000000..e1a983301 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/npu_bmm_reduce_scatter_all_to_all_builder.py @@ -0,0 +1,147 @@ +# Copyright (c) 2024, Huawei Technologies. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Optional +import torch +import torchair +from torch.library import Library, impl +from mindspeed.op_builder.builder import MindSpeedOpBuilder, AS_LIBRARY +from mindspeed.op_builder.npu_all_to_all_all_gather_bmm_builder import CheckDtype +torch_npu_api_version = None +try: + from torchair import ge + from torchair import register_fx_node_ge_converter + from torchair.ge import Tensor, TensorSpec, DataType +except ImportError: + ge, Tensor, TensorSpec, DataType = None, None, None, None + from torchair.ge_concrete_graph.fx2ge_converter import register_fx_node_ge_converter + torch_npu_api_version = 1 +else: + torch_npu_api_version = 2 + + +class BatchMatMulReduceScatterAlltoAllOpBuilder(MindSpeedOpBuilder): + OP_NAME = "npu_bmm_reducescatter_alltoall" + OP_PROTO = "npu_bmm_reducescatter_alltoall(Tensor x, Tensor weight, str group_ep, int group_ep_worldsize, \ + str group_tp, int group_tp_worldsize, *, Tensor? bias=None, int shard_type=0) -> Tensor" + + def __init__(self): + super(BatchMatMulReduceScatterAlltoAllOpBuilder, self).__init__(self.OP_NAME) + self.register_op_proto(self.OP_PROTO) + self.register_op_ir() + + def sources(self): + return ['ops/csrc/cann/npu_bmm_reduce_scatter_all_to_all.cpp'] + + def include_paths(self): + paths = super().include_paths() + paths += ['ops/csrc/cann/inc'] + return paths + + def cxx_args(self): + args = super().cxx_args() + args += [ + '-Wno-sign-compare', + '-Wno-deprecated-declarations', + '-Wno-return-type', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'" + ] + return args + + def register_op_ir(self): + @impl(AS_LIBRARY, "npu_bmm_reducescatter_alltoall", "Meta") + def npu_bmm_reducescatter_alltoall_forward(x, weight, group_ep, group_ep_worldsize, + group_tp, group_tp_worldsize, *, bias=None, shard_type=0): + if group_ep_worldsize == 0: + raise AssertionError('group_ep_worldsize can not be 0.') + if group_tp_worldsize == 0: + raise AssertionError('group_tp_worldsize can not be 0.') + e = x.size(0) * group_ep_worldsize + c = x.size(1) // group_ep_worldsize + h = weight.size(2) + + if x.size(0) == 0: + raise AssertionError('The first dim of x can not be 0.') + if x.size(1) == 0: + raise AssertionError('The second dim of x can not be 0.') + if x.size(2) == 0: + raise AssertionError('The last dim of x can not be 0.') + if weight.size(0) == 0: + raise AssertionError('The first dim of weight can not be 0.') + if weight.size(1) == 0: + raise AssertionError('The second dim of weight can not be 0.') + if weight.size(2) == 0: + raise AssertionError('The last dim of weight can not be 0.') + + if shard_type == 0: + # shard in h dimensions + h = h // group_tp_worldsize + else: + # shard in c dimensions + c = c // group_tp_worldsize + + return x.new_empty((e, c, h)) + + @register_fx_node_ge_converter(torch.ops.mindspeed.npu_bmm_reducescatter_alltoall.default) + def convert_npu_bmm_reducescatter_alltoall(x: Tensor, + weight: Tensor, + group_ep: str, + group_ep_worldsize: int, + group_tp: str, + group_tp_worldsize: int, + *, + bias: Optional[Tensor] = None, + shard_type: Optional[int] = 0, + meta_outputs: TensorSpec = None): + if torch_npu_api_version != 2: + raise ValueError(f"torch_npu_api_version {torch_npu_api_version} unsupport") + CheckDtype(x, weight, bias) + return BatchMatmulReduceScatterAlltoAll(x, + weight, + group_ep, + group_ep_worldsize, + group_tp, + group_tp_worldsize, + bias=bias, + shard_type=shard_type) + + +def BatchMatmulReduceScatterAlltoAll(x: Tensor, + weight: Tensor, + group_ep: str, + group_ep_worldsize: int, + group_tp: str, + group_tp_worldsize: int, + *, + bias: Tensor = None, + shard_type: int = 0): + transpose_weight = False + return torchair.ge.custom_op( + "BatchMatMulReduceScatterAlltoAll", + inputs={ + "x": x, + "weight": weight, + "bias": bias + }, + attrs={ + "group_ep": ge.attr.Str(group_ep), + "group_tp": ge.attr.Str(group_tp), + "ep_world_size": ge.attr.Int(group_ep_worldsize), + "tp_world_size": ge.attr.Int(group_tp_worldsize), + "y_shard_type": ge.attr.Int(shard_type), + "transpose_weight": ge.attr.Bool(transpose_weight) + }, + outputs=["y"] + ) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/op_builder/npu_dropout_add_layer_norm_builder.py b/model/train/yoco_moe/mindspeed/op_builder/npu_dropout_add_layer_norm_builder.py new file mode 100644 index 000000000..9dd19a124 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/npu_dropout_add_layer_norm_builder.py @@ -0,0 +1,26 @@ +from mindspeed.op_builder.builder import MindSpeedOpBuilder + + +class NpuDropoutAddLayerNormOpBuilder(MindSpeedOpBuilder): + OP_NAME = "npu_dropout_add_layer_norm" + + def __init__(self): + super(NpuDropoutAddLayerNormOpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/cann/npu_dropout_add_layer_norm.cpp'] + + def include_paths(self): + paths = super().include_paths() + paths += ['ops/csrc/cann/inc'] + return paths + + def cxx_args(self): + args = super().cxx_args() + args += [ + '-Wno-sign-compare', + '-Wno-deprecated-declarations', + '-Wno-return-type', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'" + ] + return args diff --git a/model/train/yoco_moe/mindspeed/op_builder/npu_grouped_mat_mul_all_reduce_builder.py b/model/train/yoco_moe/mindspeed/op_builder/npu_grouped_mat_mul_all_reduce_builder.py new file mode 100644 index 000000000..4bad183d5 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/npu_grouped_mat_mul_all_reduce_builder.py @@ -0,0 +1,37 @@ +import torch +from mindspeed.op_builder.builder import MindSpeedOpBuilder + + +class GroupedMatMulAllReduceOpBuilder(MindSpeedOpBuilder): + OP_NAME = "npu_grouped_mat_mul_all_reduce" + TORCH_MAJOR, TORCH_MINOR = map(int, torch.__version__.split('.')[:2]) + + def __init__(self): + super(GroupedMatMulAllReduceOpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/cann/npu_grouped_mat_mul_all_reduce.cpp'] + + def include_paths(self): + paths = super().include_paths() + paths += ['ops/csrc/cann/inc'] + return paths + + def cxx_args(self): + args = super().cxx_args() + args += [ + '-Wno-sign-compare', + '-Wno-deprecated-declarations', + '-Wno-return-type', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'" + ] + if self.TORCH_MAJOR >= 2 and self.TORCH_MINOR >= 1: + cpp_std = " -std=c++17" + compile_maroc = " -D__TORCH_2__" + else: + cpp_std = " -std=c++14" + compile_maroc = " -D__TORCH_1__" + args.append(cpp_std) + args.append(compile_maroc) + + return args diff --git a/model/train/yoco_moe/mindspeed/op_builder/npu_inplace_mm_all_reduce_add_rms_norm_builder.py b/model/train/yoco_moe/mindspeed/op_builder/npu_inplace_mm_all_reduce_add_rms_norm_builder.py new file mode 100644 index 000000000..117e4d897 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/npu_inplace_mm_all_reduce_add_rms_norm_builder.py @@ -0,0 +1,110 @@ +# Copyright (c) 2024, Huawei Technologies. + + +from typing import List, Optional + +from mindspeed.op_builder.builder import MindSpeedOpBuilder, AS_LIBRARY +from mindspeed.op_builder.npu_mm_all_reduce_add_rms_norm_builder import MatmulAllReduceAddRmsNorm, CheckDtype + +import torch +import torchair +from torch.library import Library, impl + +torch_npu_api_version = None +try: + from torchair import ge + from torchair.ge import Tensor, TensorSpec, DataType + from torchair import register_fx_node_ge_converter +except ImportError: + torch_npu_api_version = 1 + from torchair.ge_concrete_graph import ge_apis as ge + from torchair.ge_concrete_graph.fx2ge_converter import declare_supported, register_fx_node_ge_converter + from torchair.ge_concrete_graph.ge_graph import Tensor, TensorSpec + from torchair.ge_concrete_graph.ge_graph import get_default_ge_graph, next_unique_name + from torchair.ge_concrete_graph.ge_graph import compat_as_bytes + from torchair.ge_concrete_graph.ge_graph import get_invalid_desc +else: + torch_npu_api_version = 2 + + +class InplaceMatmulAllReduceAddRmsNormOpBuilder(MindSpeedOpBuilder): + OP_NAME = "npu_mm_all_reduce_add_rms_norm_" + OP_PROTO = "npu_mm_all_reduce_add_rms_norm_(Tensor x1, Tensor x2, Tensor residual, Tensor gamma, \ + str hcom, *, str reduce_op='sum', float epsilon=1e-06, Tensor? bias=None, Tensor? antiquant_scale=None, \ + Tensor? antiquant_offset=None, Tensor? dequant_scale=None, int antiquant_group_size=0, int comm_turn=0) \ + -> (Tensor, Tensor)" + + def __init__(self): + super(InplaceMatmulAllReduceAddRmsNormOpBuilder, self).__init__(self.OP_NAME) + self.register_op_proto(self.OP_PROTO) + self.register_op_ir() + + def sources(self): + return ['ops/csrc/cann/npu_mm_all_reduce_add_rms_norm_.cpp'] + + def include_paths(self): + paths = super().include_paths() + paths += ['ops/csrc/cann/inc'] + return paths + + def cxx_args(self): + args = super().cxx_args() + args += [ + '-Wno-sign-compare', + '-Wno-deprecated-declarations', + '-Wno-return-type', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'" + ] + return args + + def register_op_ir(self): + @impl(AS_LIBRARY, "npu_mm_all_reduce_add_rms_norm_", "Meta") + def npu_inplace_mm_all_reduce_add_rms_norm_forward( + x1, x2, residual, gamma, hcom, reduce_op='sum', epsilon=1e-6, + bias=None, antiquant_scale=None, antiquant_offset=None, + dequant_scale=None, antiquant_group_size=0, comm_turn=0): + return (torch.empty_like(residual, dtype=residual.dtype), + torch.empty_like(residual, dtype=residual.dtype)) + + @register_fx_node_ge_converter(torch.ops.mindspeed.npu_mm_all_reduce_add_rms_norm_.default) + def convert_npu_mm_all_reduce_add_rms_norm_( + x1: Tensor, + x2: Tensor, + residual: Tensor, + gamma: Tensor, + hcom: str, + *, + reduce_op: str = 'sum', + epsilon: float = 1e-6, + bias: Optional[Tensor] = None, + antiquant_scale: Optional[Tensor] = None, + antiquant_offset: Optional[Tensor] = None, + dequant_scale: Optional[Tensor] = None, + antiquant_group_size: int = 0, + comm_turn: int = 0, + meta_outputs: List[TensorSpec] = None + ): + # transpose_x1 is set to False by default + transpose_x1 = False + transpose_x2 = False + '''npu_mm_all_reduce_add_rms_norm_(Tensor x1, Tensor x2, Tensor(a!) residual, Tensor gamma, + str hcom, *, str reduce_op='sum', float epsilon=1e-06, Tensor? bias=None, + Tensor? antiquant_scale=None, Tensor? antiquant_offset=None, Tensor? dequant_scale=None, + int antiquant_group_size=0, int comm_turn=0) -> (Tensor(a!), Tensor)''' + CheckDtype(x1, x2, bias=bias, residual=residual, gamma=gamma, antiquant_scale=antiquant_scale, + antiquant_offset=antiquant_offset, dequant_scale=dequant_scale) + return MatmulAllReduceAddRmsNorm(x1, + x2, + bias=bias, + residual=residual, + gamma=gamma, + antiquant_scale=antiquant_scale, + antiquant_offset=antiquant_offset, + dequant_scale=dequant_scale, + group=hcom, + reduce_op=reduce_op, + is_trans_a=transpose_x1, + is_trans_b=transpose_x2, + comm_turn=comm_turn, + antiquant_group_size=antiquant_group_size, + epsilon=epsilon) diff --git a/model/train/yoco_moe/mindspeed/op_builder/npu_mm_all_reduce_add_rms_norm_builder.py b/model/train/yoco_moe/mindspeed/op_builder/npu_mm_all_reduce_add_rms_norm_builder.py new file mode 100644 index 000000000..bc8e1d4b0 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/npu_mm_all_reduce_add_rms_norm_builder.py @@ -0,0 +1,324 @@ +# Copyright (c) 2024, Huawei Technologies. + + +from typing import List, Optional + +from mindspeed.op_builder.builder import MindSpeedOpBuilder, AS_LIBRARY + +import torch +import torchair +from torch.library import Library, impl + +torch_npu_api_version = None +try: + from torchair import ge + from torchair.ge import Tensor, TensorSpec, DataType + from torchair import register_fx_node_ge_converter +except ImportError: + torch_npu_api_version = 1 + from torchair.ge_concrete_graph import ge_apis as ge + from torchair.ge_concrete_graph.fx2ge_converter import declare_supported, register_fx_node_ge_converter + from torchair.ge_concrete_graph.ge_graph import Tensor, TensorSpec + from torchair.ge_concrete_graph.ge_graph import get_default_ge_graph, next_unique_name + from torchair.ge_concrete_graph.ge_graph import compat_as_bytes + from torchair.ge_concrete_graph.ge_graph import get_invalid_desc +else: + torch_npu_api_version = 2 + +DataType = dict( + DT_FLOAT16=1, + DT_INT8=2, + DT_INT32=3, + DT_INT64=9, + DT_UINT64=10, + DT_BF16=27, +) + + +class MatmulAllReduceAddRmsNormOpBuilder(MindSpeedOpBuilder): + OP_NAME = "npu_mm_all_reduce_add_rms_norm" + OP_PROTO = "npu_mm_all_reduce_add_rms_norm(Tensor x1, Tensor x2, Tensor residual, Tensor gamma, str hcom, *, \ + str reduce_op='sum', float epsilon=1e-06, Tensor? bias=None, Tensor? antiquant_scale=None, Tensor? \ + antiquant_offset=None, Tensor? dequant_scale=None, int antiquant_group_size=0, int comm_turn=0) \ + -> (Tensor, Tensor)" + + def __init__(self): + super(MatmulAllReduceAddRmsNormOpBuilder, self).__init__(self.OP_NAME) + self.register_op_proto(self.OP_PROTO) + self.register_op_ir() + + def sources(self): + return ['ops/csrc/cann/npu_mm_all_reduce_add_rms_norm.cpp'] + + def include_paths(self): + paths = super().include_paths() + paths += ['ops/csrc/cann/inc'] + return paths + + def cxx_args(self): + args = super().cxx_args() + args += [ + '-Wno-sign-compare', + '-Wno-deprecated-declarations', + '-Wno-return-type', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'" + ] + return args + + def register_op_ir(self): + @impl(AS_LIBRARY, "npu_mm_all_reduce_add_rms_norm", "Meta") + def npu_mm_all_reduce_add_rms_norm_forward(x1, x2, residual, gamma, hcom, reduce_op='sum', epsilon=1e-6, + bias=None, antiquant_scale=None, antiquant_offset=None, + dequant_scale=None, antiquant_group_size=0, comm_turn=0): + return (torch.empty_like(residual, dtype=residual.dtype), + torch.empty_like(residual, dtype=residual.dtype)) + + @register_fx_node_ge_converter(torch.ops.mindspeed.npu_mm_all_reduce_add_rms_norm.default) + def convert_npu_mm_all_reduce_add_rms_norm( + x1: Tensor, + x2: Tensor, + residual: Tensor, + gamma: Tensor, + hcom: str, + *, + reduce_op: str = 'sum', + epsilon: float = 1e-6, + bias: Optional[Tensor] = None, + antiquant_scale: Optional[Tensor] = None, + antiquant_offset: Optional[Tensor] = None, + dequant_scale: Optional[Tensor] = None, + antiquant_group_size: int = 0, + comm_turn: int = 0, + meta_outputs: List[TensorSpec] = None + ): + # transpose_x1 is set to False by default + transpose_x1 = False + transpose_x2 = False + '''"npu_mm_all_reduce_add_rms_norm(Tensor x1, Tensor x2, Tensor residual, Tensor gamma, str hcom, + *, str reduce_op='sum', float epsilon=1e-06, Tensor? bias=None, Tensor? antiquant_scale=None, + Tensor? antiquant_offset=None, Tensor? dequant_scale=None, int antiquant_group_size=0, + int comm_turn=0) -> (Tensor, Tensor)"''' + CheckDtype(x1, x2, bias=bias, residual=residual, gamma=gamma, antiquant_scale=antiquant_scale, + antiquant_offset=antiquant_offset, dequant_scale=dequant_scale) + return MatmulAllReduceAddRmsNorm(x1, + x2, + bias=bias, + residual=residual, + gamma=gamma, + antiquant_scale=antiquant_scale, + antiquant_offset=antiquant_offset, + dequant_scale=dequant_scale, + group=hcom, + reduce_op=reduce_op, + is_trans_a=transpose_x1, + is_trans_b=transpose_x2, + comm_turn=comm_turn, + antiquant_group_size=antiquant_group_size, + epsilon=epsilon) + + +def CheckDtype(x1: Tensor, x2: Tensor, bias: Optional[Tensor], residual: Tensor, gamma: Tensor, + antiquant_scale: Optional[Tensor], antiquant_offset: Optional[Tensor], + dequant_scale: Optional[Tensor]): + if residual.dtype != gamma.dtype: + raise AssertionError('type of residual and gamma must be same.') + if x1.dtype in (DataType["DT_FLOAT16"], DataType["DT_BF16"]) and \ + x2.dtype in (DataType["DT_FLOAT16"], DataType["DT_BF16"]): + if x2.dtype != x1.dtype: + raise AssertionError('type of x1 and x2 must be same.') + if bias is not None and bias.dtype != x1.dtype: + raise AssertionError('type of x1 and bias must be same.') + if residual.dtype != x1.dtype: + raise AssertionError('type of x1 and residual must be same.') + elif x1.dtype is DataType["DT_INT8"] and x2.dtype is DataType["DT_INT8"]: + if bias is not None and bias.dtype != DataType["DT_INT32"]: + raise AssertionError('type of bias must be int32.') + if dequant_scale is None: + raise AssertionError('dequant_scale must not be None.') + if dequant_scale.dtype in (DataType["DT_INT64"], DataType["DT_UINT64"]): + if residual.dtype != DataType["DT_FLOAT16"]: + raise AssertionError('when dequant_scale is int64(uint64), residual type must be fp16.') + elif dequant_scale.dtype is DataType["DT_BF16"]: + if residual.dtype != DataType["DT_BF16"]: + raise AssertionError('type of dequant_scale and residual should be bf16.') + else: + raise AssertionError('dequant_scale type must be int64, uint64 or bf16') + elif x1.dtype in (DataType["DT_FLOAT16"], DataType["DT_BF16"]) and \ + x2.dtype is DataType["DT_INT8"]: + if bias is not None and bias.dtype != x1.dtype: + raise AssertionError('type of x1 and bias must be same.') + if antiquant_scale is None: + raise AssertionError('antiquant_scale must not be None.') + if antiquant_scale.dtype != x1.dtype: + raise AssertionError('type of x1 and antiquant_scale must be same.') + if antiquant_offset is not None and antiquant_offset.dtype != antiquant_scale.dtype: + raise AssertionError('type of antiquant_scale and antiquant_offset must be same.') + if residual.dtype != x1.dtype: + raise AssertionError('type of x1 and residual must be same.') + else: + raise AssertionError("the type of x1 and x2 should be suit the not quant scenario, "\ + "dequant scenario, antiquant scenario.") + +MatmulAllReduceAddRmsNorm = None +if torch_npu_api_version == 2: + def MatmulAllReduceAddRmsNormV2(x1: Tensor, + x2: Tensor, + bias: Optional[Tensor], + residual: Tensor, + gamma: Tensor, + antiquant_scale: Optional[Tensor], + antiquant_offset: Optional[Tensor], + dequant_scale: Optional[Tensor], + *, + group: str, + reduce_op: str = "sum", + is_trans_a: bool = False, + is_trans_b: bool = False, + comm_turn: int = 0, + antiquant_group_size: int = 0, + epsilon: float = 0.000001): + """REG_OP(MatmulAllReduceAddRmsNorm)\n + .INPUT(x1, TensorType({DT_FLOAT16, DT_BF16, DT_INT8, DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_BF16}))\n + .INPUT(x2, TensorType({DT_FLOAT16, DT_BF16, DT_INT8, DT_INT8, DT_INT8, DT_INT4, DT_INT4}))\n + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_BF16, DT_INT32, DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_BF16}))\n + .INPUT(residual, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_BF16}))\n + .INPUT(gamma, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_BF16}))\n + .OPTIONAL_INPUT(antiquant_scale, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_BF16}))\n + .OPTIONAL_INPUT(antiquant_offset, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_BF16}))\n + .OPTIONAL_INPUT(dequant_scale, TensorType({DT_FLOAT16, DT_BF16, DT_UINT64, DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_BF16}))\n + .OUTPUT(y, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_BF16}))\n + .OUTPUT(norm_out, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_FLOAT16, DT_BF16, DT_FLOAT16, DT_BF16}))\n + .REQUIRED_ATTR(group, String)\n + .ATTR(reduce_op, String, "sum")\n + .ATTR(is_trans_a, Bool, false)\n + .ATTR(is_trans_b, Bool, false)\n + .ATTR(comm_turn, Int, 0)\n + .ATTR(antiquant_group_size, Int, 0)\n + .ATTR(epsilon, Float, 1e-6)\n + .OP_END_FACTORY_REG(MatmulAllReduceAddRmsNorm) + """ + + y, norm_out = torchair.ge.custom_op( + "MatmulAllReduceAddRmsNorm", + inputs={ + "x1" : x1, + "x2" : x2, + "bias" : bias, + "residual" : residual, + "gamma" : gamma, + "antiquant_scale" : antiquant_scale, + "antiquant_offset" : antiquant_offset, + "dequant_scale" : dequant_scale, + }, + attrs={ + "group" : ge.attr.Str(group), + "reduce_op" : ge.attr.Str(reduce_op), + "is_trans_a" : ge.attr.Bool(is_trans_a), + "is_trans_b" : ge.attr.Bool(is_trans_b), + "comm_turn" : ge.attr.Int(comm_turn), + "antiquant_group_size" : ge.attr.Int(antiquant_group_size), + "epsilon" : ge.attr.Float(epsilon), + }, + outputs=[ + "y", + "norm_out" + ] + ) + return y, norm_out + MatmulAllReduceAddRmsNorm = MatmulAllReduceAddRmsNormV2 +elif torch_npu_api_version == 1: + def MatmulAllReduceAddRmsNormV1(x1: Tensor, + x2: Tensor, + bias: Optional[Tensor], + residual: Tensor, + gamma: Tensor, + antiquant_scale: Optional[Tensor], + antiquant_offset: Optional[Tensor], + dequant_scale: Optional[Tensor], + *, + group: str, + reduce_op: str = "sum", + is_trans_a: bool = False, + is_trans_b: bool = False, + comm_turn: int = 0, + antiquant_group_size: int = 0, + epsilon: float = 0.000001, + dependencies=None, + node_name=None): + op = get_default_ge_graph().op.add() + op.type = "MatmulAllReduceAddRmsNorm" + op.name = next_unique_name(node_name, "MatmulAllReduceAddRmsNorm") + + # process dependices + if dependencies is not None: + for dependency in dependencies: + op.input.append(dependency.controller) + + # process inputs + op.input.append(x1.tensor) + op.input_desc.add().CopyFrom(x1.desc) + op.input_desc[-1].name = "x1" + op.input.append(x2.tensor) + op.input_desc.add().CopyFrom(x2.desc) + op.input_desc[-1].name = "x2" + if bias is not None: + op.input.append(bias.tensor) + op.input_desc.add().CopyFrom(bias.desc) + op.input_desc[-1].name = "bias" + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "bias" + op.input.append(residual.tensor) + op.input_desc.add().CopyFrom(residual.desc) + op.input_desc[-1].name = "residual" + op.input.append(gamma.tensor) + op.input_desc.add().CopyFrom(gamma.desc) + op.input_desc[-1].name = "gamma" + if antiquant_scale is not None: + op.input.append(antiquant_scale.tensor) + op.input_desc.add().CopyFrom(antiquant_scale.desc) + op.input_desc[-1].name = "antiquant_scale" + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "antiquant_scale" + if antiquant_offset is not None: + op.input.append(antiquant_offset.tensor) + op.input_desc.add().CopyFrom(antiquant_offset.desc) + op.input_desc[-1].name = "antiquant_offset" + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "antiquant_offset" + if dequant_scale is not None: + op.input.append(dequant_scale.tensor) + op.input_desc.add().CopyFrom(dequant_scale.desc) + op.input_desc[-1].name = "dequant_scale" + else: + op.input.append('') + op.input_desc.add().CopyFrom(get_invalid_desc()) + op.input_desc[-1].name = "dequant_scale" + + # process attrs + op.attr["group"].s = compat_as_bytes(group) + op.attr["reduce_op"].s = compat_as_bytes(reduce_op) + op.attr["is_trans_a"].b = is_trans_a + op.attr["is_trans_b"].b = is_trans_b + op.attr["comm_turn"].i = comm_turn + op.attr["antiquant_group_size"].i = antiquant_group_size + op.attr["epsilon"].f = epsilon + + # process outputs + output_index = 0 + op.output_desc.add().name = "y" + y = Tensor(op, output_index) + output_index += 1 + op.output_desc.add().name = "norm_out" + norm_out = Tensor(op, output_index) + output_index += 1 + + return y, norm_out + MatmulAllReduceAddRmsNorm = MatmulAllReduceAddRmsNormV1 +else: + raise ValueError("torch_npu_api_version unsupport") \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/op_builder/npu_moe_token_permute_builder.py b/model/train/yoco_moe/mindspeed/op_builder/npu_moe_token_permute_builder.py new file mode 100644 index 000000000..09a2913f3 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/npu_moe_token_permute_builder.py @@ -0,0 +1,27 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +from mindspeed.op_builder.builder import MindSpeedOpBuilder + + +class MoeTokenPermuteOpBuilder(MindSpeedOpBuilder): + OP_NAME = "npu_moe_token_permute" + + def __init__(self): + super(MoeTokenPermuteOpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/cann/npu_moe_token_permute.cpp'] + + def include_paths(self): + paths = super().include_paths() + paths += ['ops/csrc/cann/inc'] + return paths + + def cxx_args(self): + args = super().cxx_args() + args += [ + '-Wno-sign-compare', + '-Wno-deprecated-declarations', + '-Wno-return-type', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'" + ] + return args diff --git a/model/train/yoco_moe/mindspeed/op_builder/npu_moe_token_unpermute_builder.py b/model/train/yoco_moe/mindspeed/op_builder/npu_moe_token_unpermute_builder.py new file mode 100644 index 000000000..e6b15a544 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/npu_moe_token_unpermute_builder.py @@ -0,0 +1,27 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +from mindspeed.op_builder.builder import MindSpeedOpBuilder + + +class MoeTokenUnpermuteOpBuilder(MindSpeedOpBuilder): + OP_NAME = "npu_moe_token_unpermute" + + def __init__(self): + super(MoeTokenUnpermuteOpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/cann/npu_moe_token_unpermute.cpp'] + + def include_paths(self): + paths = super().include_paths() + paths += ['ops/csrc/cann/inc'] + return paths + + def cxx_args(self): + args = super().cxx_args() + args += [ + '-Wno-sign-compare', + '-Wno-deprecated-declarations', + '-Wno-return-type', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'" + ] + return args \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/op_builder/npu_ring_attention_update_builder.py b/model/train/yoco_moe/mindspeed/op_builder/npu_ring_attention_update_builder.py new file mode 100644 index 000000000..1960d86c2 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/npu_ring_attention_update_builder.py @@ -0,0 +1,27 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +from mindspeed.op_builder.builder import MindSpeedOpBuilder + + +class RingAttentionUpdateOpBuilder(MindSpeedOpBuilder): + OP_NAME = "npu_ring_attention_update" + + def __init__(self): + super(RingAttentionUpdateOpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/cann/npu_ring_attention_update.cpp'] + + def include_paths(self): + paths = super().include_paths() + paths += ['ops/csrc/cann/inc'] + return paths + + def cxx_args(self): + args = super().cxx_args() + args += [ + '-Wno-sign-compare', + '-Wno-deprecated-declarations', + '-Wno-return-type', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'" + ] + return args diff --git a/model/train/yoco_moe/mindspeed/op_builder/npu_rotary_position_embedding_builder.py b/model/train/yoco_moe/mindspeed/op_builder/npu_rotary_position_embedding_builder.py new file mode 100644 index 000000000..eac82e537 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/npu_rotary_position_embedding_builder.py @@ -0,0 +1,26 @@ +from mindspeed.op_builder.builder import MindSpeedOpBuilder + + +class RotaryPositionEmbeddingOpBuilder(MindSpeedOpBuilder): + OP_NAME = "npu_rotary_position_embedding" + + def __init__(self): + super(RotaryPositionEmbeddingOpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/cann/npu_rotary_position_embedding.cpp'] + + def include_paths(self): + paths = super().include_paths() + paths += ['ops/csrc/cann/inc'] + return paths + + def cxx_args(self): + args = super().cxx_args() + args += [ + '-Wno-sign-compare', + '-Wno-deprecated-declarations', + '-Wno-return-type', + "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'" + ] + return args diff --git a/model/train/yoco_moe/mindspeed/op_builder/quant_gmm_builder.py b/model/train/yoco_moe/mindspeed/op_builder/quant_gmm_builder.py new file mode 100644 index 000000000..34632a61c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/quant_gmm_builder.py @@ -0,0 +1,69 @@ +from typing import Optional +from functools import partial +import torch +from torch.library import impl + +from mindspeed.op_builder.builder import AS_LIBRARY +from mindspeed.op_builder.gmm_builder import GMMOpBuilderPublic, GroupedMatmul, fill_empty_tensor +from mindspeed.op_builder.gmm_builder import ge, Tensor, TensorSpec, DataType, register_fx_node_ge_converter + + +class QuantGMMOpBuilder(GMMOpBuilderPublic): + OP_NAME = "quant_grouped_matmul" + OP_PROTO = ( + "npu_quant_gmm(Tensor x, Tensor weight, Tensor scale, *, Tensor? offset=None, Tensor? per_token_scale=None, \ + Tensor? bias=None, Tensor? group_list=None, int? group_list_type=0, ScalarType? output_dtype=None, \ + int? act_type=0) -> Tensor" + ) + + def __init__(self): + super(QuantGMMOpBuilder, self).__init__(self.OP_NAME) + self.register_op_proto(self.OP_PROTO) + self.register_op_ir() + + def sources(self): + return ['ops/csrc/cann/quant_gmm.cpp'] + + def register_op_ir(self): + @impl(AS_LIBRARY, "npu_quant_gmm", "Meta") + def npu_quant_gmm_forward(x, weight, scale, *, offset=None, per_token_scale=None, bias=None, group_list=None, + group_list_type=0, output_dtype=None, act_type=0): + BM = x.shape[0] + N = weight.shape[-1] + output_dtype = output_dtype or torch.float16 + return x.new_empty((BM, N), dtype=output_dtype) + + @register_fx_node_ge_converter(torch.ops.mindspeed.npu_quant_gmm.default) + def conveter_npu_quant_gmm( + x: Tensor, + weight: Tensor, + scale: Tensor, + *, + offset: Optional[Tensor] = None, + per_token_scale: Optional[Tensor] = None, + bias: Optional[Tensor] = None, + group_list: Optional[Tensor] = None, + group_list_type: Optional[int] = 0, + output_dtype: Optional[DataType] = None, + act_type: Optional[int] = 0, + meta_outputs: TensorSpec = None, + ): + bias = bias or fill_empty_tensor(DataType.DT_INT32) + offset = offset or fill_empty_tensor(DataType.DT_FLOAT) + antiquant_scale = fill_empty_tensor(DataType.DT_FLOAT16) + antiquant_offset = fill_empty_tensor(DataType.DT_FLOAT16) + + y_dtype = 0 + if output_dtype is None or output_dtype == torch.float16: + y_dtype = 0 + elif output_dtype == torch.bfloat16: + y_dtype = 1 + elif output_dtype == torch.int8: + raise ValueError("output_dtype not support int8 yet for graph mode") + else: + raise ValueError(f"output_dtype should be int8, float16 or bfloat16, " + f"otherwise it should be None, but got {output_dtype}") + + return GroupedMatmul([x], [weight], [bias], [scale], [offset], [antiquant_scale], [antiquant_offset], + group_list, per_token_scale, split_item=3, group_type=0, + group_list_type=group_list_type, dtype=y_dtype, act_type=act_type)[0] diff --git a/model/train/yoco_moe/mindspeed/op_builder/rms_norm_builder.py b/model/train/yoco_moe/mindspeed/op_builder/rms_norm_builder.py new file mode 100644 index 000000000..9b9ae8ce1 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/rms_norm_builder.py @@ -0,0 +1,13 @@ +from mindspeed.op_builder.atb_builder import AtbOpBuilder + + +class RmsNormOpBuilder(AtbOpBuilder): + OP_NAME = "rms_norm" + + def __init__(self): + super(RmsNormOpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/atb/rms_norm.cpp', + 'ops/csrc/atb/utils/atb_adapter.cpp'] + \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/op_builder/smart_swap_builder.py b/model/train/yoco_moe/mindspeed/op_builder/smart_swap_builder.py new file mode 100644 index 000000000..1d44b2503 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/smart_swap_builder.py @@ -0,0 +1,44 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import os +import torch +from mindspeed.op_builder.builder import MindSpeedOpBuilder + + +class SmartSwapBuilder(MindSpeedOpBuilder): + NAME = "smart_swap" + _torch_path = None + _python_path = None + + def __init__(self): + from sysconfig import get_paths + + self._torch_path = os.path.dirname(os.path.abspath(torch.__file__)) + self._python_path = os.path.dirname(os.path.abspath(get_paths().get("include"))) + super(SmartSwapBuilder, self).__init__(self.NAME) + + def sources(self): + root_path = "ops/csrc/pluggable_allocator/smart_swap" + files = [ + "./SwapBindings.cpp", + "./NPUSwapManager.cpp", + "./DeviceCachingAllocator.cpp", + "./CachingAllocatorConfig.cpp", + "./EventPool.cpp", + "./PluggableAllocator.cpp", + "./common.cpp", + ] + return [os.path.join(root_path, item) for item in files] + + def cxx_args(self): + args = super().cxx_args() + args += ["-D_GLIBCXX_USE_CXX11_ABI=0", "-D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'"] + + remove_arg = "-fvisibility=hidden" + while remove_arg in args: + args.remove(remove_arg) + + return args + + def extra_ldflags(self): + flags = ["-L" + os.path.join(self._torch_npu_path, "lib"), "-ltorch_npu"] + return flags diff --git a/model/train/yoco_moe/mindspeed/op_builder/swiglu_builder.py b/model/train/yoco_moe/mindspeed/op_builder/swiglu_builder.py new file mode 100644 index 000000000..b6b06e99e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/swiglu_builder.py @@ -0,0 +1,12 @@ +from mindspeed.op_builder.atb_builder import AtbOpBuilder + + +class SwigluOpBuilder(AtbOpBuilder): + OP_NAME = "swiglu" + + def __init__(self): + super(SwigluOpBuilder, self).__init__(self.OP_NAME) + + def sources(self): + return ['ops/csrc/atb/swiglu.cpp', + 'ops/csrc/atb/utils/atb_adapter.cpp'] \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/op_builder/weight_quant_gmm_builder.py b/model/train/yoco_moe/mindspeed/op_builder/weight_quant_gmm_builder.py new file mode 100644 index 000000000..4619edb70 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/op_builder/weight_quant_gmm_builder.py @@ -0,0 +1,61 @@ +from typing import Optional +from functools import partial +import torch +from torch.library import impl + +from mindspeed.op_builder.builder import AS_LIBRARY +from mindspeed.op_builder.gmm_builder import GMMOpBuilderPublic, GroupedMatmul, fill_empty_tensor +from mindspeed.op_builder.gmm_builder import ge, Tensor, TensorSpec, DataType, register_fx_node_ge_converter + + +class WeightQuantGMMOpBuilder(GMMOpBuilderPublic): + OP_NAME = "weight_quant_grouped_matmul" + OP_PROTO = ( + "npu_weight_quant_gmm(Tensor x, Tensor weight, Tensor antiquant_scale, *, Tensor? antiquant_offset=None, \ + Tensor? bias=None, Tensor? group_list=None, int? group_list_type=0, int? act_type=0) -> Tensor" + ) + + def __init__(self): + super(WeightQuantGMMOpBuilder, self).__init__(self.OP_NAME) + self.register_op_proto(self.OP_PROTO) + self.register_op_ir() + + def sources(self): + return ['ops/csrc/cann/weight_quant_gmm.cpp'] + + def register_op_ir(self): + @impl(AS_LIBRARY, "npu_weight_quant_gmm", "Meta") + def npu_weight_quant_gmm_forward(x, weight, antiquant_scale, *, antiquant_offset=None, bias=None, + group_list=None, group_list_type=0, act_type=0): + BM = x.shape[0] + N = weight.shape[-1] + output_dtype = x.dtype + return x.new_empty((BM, N), dtype=output_dtype) + + @register_fx_node_ge_converter(torch.ops.mindspeed.npu_weight_quant_gmm.default) + def conveter_npu_weight_quant_gmm( + x: Tensor, + weight: Tensor, + antiquant_scale: Tensor, + *, + antiquant_offset: Optional[Tensor] = None, + bias: Optional[Tensor] = None, + group_list: Optional[Tensor] = None, + group_list_type: Optional[int] = 0, + act_type: Optional[int] = 0, + meta_outputs: TensorSpec = None, + ): + x_dtype = x.dtype + if bias is None: + if x_dtype == DataType.DT_BF16: + bias = fill_empty_tensor(DataType.DT_FLOAT) + elif x_dtype == DataType.DT_FLOAT16: + bias = fill_empty_tensor(DataType.DT_FLOAT16) + antiquant_offset = antiquant_offset or fill_empty_tensor(antiquant_scale.dtype) + scale = fill_empty_tensor(DataType.DT_UINT64) + offset = fill_empty_tensor(DataType.DT_FLOAT) + + + return GroupedMatmul([x], [weight], [bias], [scale], [offset], [antiquant_scale], [antiquant_offset], + group_list, split_item=3, group_type=0, + group_list_type=group_list_type, act_type=act_type)[0] diff --git a/model/train/yoco_moe/mindspeed/ops/__init__.py b/model/train/yoco_moe/mindspeed/ops/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/algorithm/adaptive_cp/adaptive_cp.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/algorithm/adaptive_cp/adaptive_cp.cpp new file mode 100644 index 000000000..1bbdc0479 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/algorithm/adaptive_cp/adaptive_cp.cpp @@ -0,0 +1,454 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace py = pybind11; + + +bool is_block_all_one(const uint64_t* dataPtr, int rowBlockSize, int colBlockSize, int splitNum) +{ + for (size_t i = 0; i < rowBlockSize; i++) { + for (size_t j = 0; j < colBlockSize; j++) { + if (*(dataPtr++) != 0x0101010101010101) { + return false; + } + } + dataPtr += colBlockSize * (splitNum - 1); + } + return true; +} + +void sub_coarsen_mask(const uint64_t *dataPtr, int rowBlockSize, int colBlockSize, int splitNum, + at::Tensor &output, int blockIdxStart, int blockIdxEnd) +{ + if (splitNum == 0) { + throw std::runtime_error("Split Number must be a positive integer."); + } + auto outputPtr = (uint8_t *) output.data_ptr(); + outputPtr += blockIdxStart; + for (size_t i = blockIdxStart; i < blockIdxEnd; i++) { + int blockRowIdx = std::floor(i / splitNum); + int blockColIdx = i % splitNum; + int grid_val = is_block_all_one( + dataPtr + (blockRowIdx * rowBlockSize) * (splitNum * colBlockSize) + (blockColIdx * colBlockSize), + rowBlockSize, colBlockSize, splitNum); + *(outputPtr++) = grid_val; + } +} + +void coarsen_mask(const at::Tensor& input, const int splitNum, at::Tensor& output) +{ + int rowDim = input.size(0); + int colDim = input.size(1); + if (splitNum == 0) { + throw std::runtime_error("Split number must be a positive integer."); + } + if (rowDim % splitNum != 0 || colDim % splitNum != 0) { + throw std::runtime_error("Both dims of the input 2-dim matrix must be divisible by split num."); + } + int rowBlockSize = rowDim / splitNum; + int colBlockSize = colDim / splitNum; + int sizeRatioInt64ToBool = sizeof(uint64_t) / sizeof(bool); + if (rowBlockSize % sizeRatioInt64ToBool != 0 || colBlockSize % sizeRatioInt64ToBool != 0) { + throw std::runtime_error("Both dims of the input 2-dim matrix must be divisible by 8 * split_num, to iterate " + "data pointer in uint64 instead of bool."); + } + auto dataPtr = (uint64_t*) input.data_ptr(); + colBlockSize /= sizeRatioInt64ToBool; + std::vector threads; + int totalNumBlocks = splitNum * splitNum; + int numThreads = std::thread::hardware_concurrency(); + if (numThreads == 0) { + throw std::runtime_error("Number of threads must be a positive integer."); + } + if (totalNumBlocks < numThreads) { + numThreads = totalNumBlocks; + } + int blockNumPerThread = totalNumBlocks / numThreads; + for (size_t i = 0; i < numThreads; ++i) { + int blockIdxStart = i * blockNumPerThread; + threads.emplace_back(sub_coarsen_mask, dataPtr, rowBlockSize, colBlockSize, splitNum, std::ref(output), + blockIdxStart, blockIdxStart + blockNumPerThread); + } + // 等待所有线程完成 + for (auto& t : threads) { + t.join(); + } +} + +void sub_select_perm_mask(const at::Tensor &input, const std::vector indList, at::Tensor &output, int subIndCnt, + int subStartIdx) +{ + uint64_t seqLen = input.size(0); + uint64_t indCnt = indList.size(); + auto maskTensorPtr = (uint8_t *) input.data_ptr(); + auto outputTensorPtr = (uint8_t *) output.data_ptr(); + uint8_t *subOutputPtr = outputTensorPtr + subStartIdx * indCnt; + std::vector rowStartIdxList(subIndCnt); + for (size_t i = 0; i < subIndCnt; i++) { + rowStartIdxList[i] = ((uint64_t) indList[subStartIdx + i] * seqLen); + } + + for (size_t i = 0; i < subIndCnt; i++) { + uint64_t rowStartIdx = rowStartIdxList[i]; + for (size_t j = 0; j < indCnt; j++) { + uint64_t colIdx = indList[j]; + uint8_t extractedValue = *(maskTensorPtr + (rowStartIdx + colIdx)); + *(subOutputPtr++) = extractedValue; + } + } +} + +void select_perm_mask(const at::Tensor &input, const std::vector indList, at::Tensor &output) +{ + if (input.dim() != 2 || input.size(0) != input.size(1)) { + throw std::runtime_error("Input mask must be 2-dimensional squared tensor."); + } + if (input.scalar_type() != torch::kBool) { + throw std::runtime_error("The datatype of input mask must be bool."); + } + uint64_t indCnt = indList.size(); + std::vector threads; + int numThreads = std::thread::hardware_concurrency(); + if (numThreads == 0) { + throw std::runtime_error("Number of threads must be a positive integer."); + } + if (indCnt % numThreads != 0 || numThreads > indCnt) { + numThreads = indCnt; + } + int subIndCnt = indCnt / numThreads; + for (size_t i = 0; i < numThreads; ++i) { + int subStartIdx = i * subIndCnt; + threads.emplace_back(sub_select_perm_mask, input, indList, std::ref(output), subIndCnt, subStartIdx); + } + // 等待所有线程完成 + for (auto& t : threads) { + t.join(); + } +} + +// Function to calculate the Euclidean distance between two points +float euclidean_distance(const std::vector& point1, const std::vector& point2) +{ + float sum = 0.0f; + for (size_t i = 0; i < point1.size(); ++i) { + sum += (point1[i] - point2[i]) * (point1[i] - point2[i]); + } + return std::sqrt(sum); +} + +// Function to calculate distances between each point and all centroids +std::vector> calculate_distances( + const std::vector>& data, + const std::vector>& centroids) +{ + std::vector> distances(data.size(), std::vector(centroids.size())); + for (size_t i = 0; i < data.size(); ++i) { + for (size_t j = 0; j < centroids.size(); ++j) { + distances[i][j] = euclidean_distance(data[i], centroids[j]); + } + } + return distances; +} + +// Function to find the index of the minimum element in a vector +size_t argmin(const std::vector& dataVec) +{ + return std::distance(dataVec.begin(), std::min_element(dataVec.begin(), dataVec.end())); +} + +// FUnction to update centroids +std::vector> update_centroids( + const std::vector>& data, + const std::vector& labels, + size_t numClusters, + size_t dimensionSize) +{ + std::vector> newCentroids(numClusters, std::vector(dimensionSize, 0.0f)); + std::vector counts(numClusters, 0); + + for (size_t i = 0; i < data.size(); ++i) { + for (size_t j = 0; j < dimensionSize; ++j) { + newCentroids[labels[i]][j] += data[i][j]; + } + counts[labels[i]]++; + } + + for (size_t i = 0; i < numClusters; ++i) { + if (counts[i] > 0) { + for (size_t j = 0; j < dimensionSize; ++j) { + newCentroids[i][j] /= counts[i]; + } + } else { + // Reinitialize centroid randomly if no points are assigned to this cluster + newCentroids[i] = data[std::rand() % data.size()]; + } + } + + return newCentroids; +} + +bool allClose(const std::vector>& centroids, + const std::vector>& newCentroids, + float rtol = 1e-5, float atol = 1e-8) +{ + // Check if the dimensions match + if (centroids.size() != newCentroids.size()) { + return false; + } + + for (size_t i = 0; i < centroids.size(); ++i) { + if (centroids[i].size() != newCentroids[i].size()) { + return false; + } + + for (size_t j = 0; j < centroids[i].size(); ++j) { + float diff = std::fabs(centroids[i][j] - newCentroids[i][j]); + float tol = atol + rtol * std::fabs(newCentroids[i][j]); + if (diff > tol) { + return false; + } + } + } + return true; +} + +// Function to check if centroids have converged +bool centroids_converged( + const std::vector>& centroids, + const std::vector>& newCentroids) +{ + return allClose(centroids, newCentroids); +} + +std::vector get_num_tasks_on_device(const torch::Tensor& gridMask) +{ + int P = gridMask.size(0); + std::vector numTaskList(P, 0); + + // 计算每行和每列中0的数量 + for (int i = 0; i < P; ++i) { + int rowZeroCnt = 0; + int colZeroCnt = 0; + + // 计算第i行中0的数量 + for (int j = 0; j < P; ++j) { + if (gridMask[i][j].item() == 0) { + rowZeroCnt++; + } + } + + // 计算第i列中0的数量 + for (int j = 0; j < P; ++j) { + if (gridMask[j][i].item() == 0) { + colZeroCnt++; + } + } + + // 第i行和第i列的0的数量之和 + numTaskList[i] = rowZeroCnt + colZeroCnt - (gridMask[i][i].item() == 0 ? 1 : 0); + } + + return numTaskList; +} + +std::pair get_score(const at::Tensor& mask, size_t cpSize, at::Tensor &gridMask) +{ + if (cpSize == 0) { + throw std::runtime_error("CP size must be a positive integer."); + } + size_t maskSize = mask.size(0); + coarsen_mask(mask, cpSize, gridMask); + float totalTaskDensity = 1 - (gridMask.sum().item() / (cpSize * cpSize)); + std::vector numTaskList = get_num_tasks_on_device(gridMask); + float taskNumDev = 0.0f; + if (!numTaskList.empty()) { + float mean = std::accumulate(numTaskList.begin(), numTaskList.end(), 0.0f) / numTaskList.size(); + float sum = 0.0f; + for (const auto& num : numTaskList) { + sum += (num - mean) * (num - mean); + } + taskNumDev = std::sqrt(sum / numTaskList.size()); + } + return {totalTaskDensity, taskNumDev}; +} + +// Kmeans function +std::pair>, std::vector> kmeans( + const std::vector>& data, + size_t numClusters, + size_t numIters) +{ + size_t seqLen = data.size(); + size_t dimensionSize = data[0].size(); + // Initialize centroids randomly + std::vector> centroids(numClusters); + std::srand(0); + std::vector indices(seqLen); + std::iota(indices.begin(), indices.end(), 0); + std::random_shuffle(indices.begin(), indices.end()); + for (size_t i = 0; i < numClusters; ++i) { + centroids[i] = data[indices[i]]; + } + std::vector labels(seqLen); + for (size_t iterIdx = 0; iterIdx < numIters; ++iterIdx) { + // Calculate distances between each point and centroids + std::vector> distances = calculate_distances(data, centroids); + // Assign labels based on nearest centroid + for (size_t i = 0; i < seqLen; ++ i) { + labels[i] = argmin(distances[i]); + } + // Update centroids + std::vector> newCentroids = update_centroids(data, labels, numClusters, dimensionSize); + // Check for convergence + if (centroids_converged(centroids, newCentroids)) { + break; + } + centroids = newCentroids; + } + return {centroids, labels}; +} + +std::vector search_kmeans( + const at::Tensor& attnMask, + const std::vector>& reducedMask, + at::Tensor &tmpAttnMask, + at::Tensor &tmpGridMask, + at::Tensor &optGridMask, + at::Tensor &optAttnMask, + py::list optNumCluster, + size_t cpSize, + size_t numIters) +{ + std::vector optSeq(attnMask.size(0)); + std::iota(optSeq.begin(), optSeq.end(), 0); + auto [minTaskDensity, optTaskDev] = get_score(attnMask, cpSize, optGridMask); + for (int numClusters = 2; numClusters < 9 ; ++numClusters) { + auto [centroids, labels] = kmeans(reducedMask, numClusters, numIters); + // Sort indices based on labels + std::vector sortedSeq(labels.size()); + std::iota(sortedSeq.begin(), sortedSeq.end(), 0); + std::sort(sortedSeq.begin(), sortedSeq.end(), [&labels](size_t i, size_t j) { + return labels[i] < labels[j]; + }); + select_perm_mask(attnMask, sortedSeq, tmpAttnMask); + auto [taskDensity, taskNumDev] = get_score(tmpAttnMask, cpSize, tmpGridMask); + if (taskDensity < minTaskDensity) { + minTaskDensity = taskDensity; + optAttnMask.copy_(tmpAttnMask); + optNumCluster[0] = numClusters; + optTaskDev = taskNumDev; + optSeq = sortedSeq; + optGridMask.copy_(tmpAttnMask); + } else if (taskDensity == minTaskDensity && taskNumDev < optTaskDev) { + optAttnMask.copy_(tmpAttnMask); + optNumCluster[0] = numClusters; + optTaskDev = taskNumDev; + optSeq = sortedSeq; + optGridMask.copy_(tmpGridMask); + } + } + return optSeq; +} + +void get_mask_list_with_remap(const at::Tensor& attnMask, at::Tensor& output, std::vector rowIdxSeq, std::vector colIdxSeq) +{ + size_t maskColLen = attnMask.size(1); + size_t rowIdxLen = rowIdxSeq.size(); + size_t colIdxLen = colIdxSeq.size(); + if (rowIdxLen > output.size(0) || colIdxLen > output.size(1)) { + throw std::runtime_error("Row or colum index length large than size of attention mask"); + } + uint8_t *inputPtr = (uint8_t *) attnMask.data_ptr(); + uint8_t *outputPtr = (uint8_t *) output.data_ptr(); + + for (size_t i = 0; i < rowIdxLen; i++) { + uint8_t *inputRowStartPtr = inputPtr + rowIdxSeq[i] * maskColLen; + for (size_t j = 0; j < colIdxLen; j++) { + *(outputPtr++) = *(inputRowStartPtr + colIdxSeq[j]); + } + } +} + +void get_mask_list_without_remap(const at::Tensor& attnMask, at::Tensor& output, std::vector blockIdx, int cpSize) +{ + if (cpSize == 0) { + throw std::runtime_error("CP size must be a positive integer."); + } + int sizeRatioInt64ToBool = sizeof(uint64_t) / sizeof(bool); + int rowGridSize = attnMask.size(0) / cpSize; + int colGridSize = rowGridSize / sizeRatioInt64ToBool; + if (rowGridSize % sizeRatioInt64ToBool != 0) { + throw std::runtime_error("Sequence length on each cp rank must be a multiple of 8"); + } + int rowStartIdx = blockIdx[0] * rowGridSize; + int colStartIdx = blockIdx[1] * colGridSize; + + uint64_t *inputPtr = (uint64_t*) attnMask.data_ptr(); + uint64_t *outputPtr = (uint64_t*) output.data_ptr(); + + uint64_t *currPtr = inputPtr + rowStartIdx * (colGridSize * cpSize) + colStartIdx; + int numUnitToNextRow = cpSize * colGridSize; + + uint64_t memmoveCnt = 0; + if (colGridSize > std::numeric_limits::max() / rowGridSize) { + throw std::runtime_error("sequence length too long or context parallel size too small"); + } + uint64_t outputSize = static_cast(rowGridSize) * colGridSize; + + for (size_t i = 0; i < rowGridSize; i++) { + if (memmoveCnt + colGridSize > outputSize) { + throw std::runtime_error("Memory move out of range."); + } + memmove(outputPtr, currPtr, colGridSize * sizeof(uint64_t)); + memmoveCnt += colGridSize; + outputPtr += colGridSize; + currPtr += numUnitToNextRow; + } +} + +PYBIND11_MODULE(adaptive_cp, m) +{ +m.def("coarsen_mask", + &coarsen_mask, + "A function that coarse a bool tensor with given split number", + py::arg("input"), py::arg("splitNum"), py::arg("output")); +m.def("search_kmeans", + &search_kmeans, + "Search optimal k-means clustering result among various number of clusters", + py::arg("attnMask"), py::arg("reduceMask"), py::arg("tmpAttnMask"), py::arg("tmpGridMask"), + py::arg("optGridMask"), py::arg("optAttnMask"), py::arg("optNumCluster"), py::arg("cpSize"), + py::arg("numIters")); +m.def("get_mask_list_with_remap", + &get_mask_list_with_remap, + py::arg("attnMask"), py::arg("output"), py::arg("rowIdxSeq"), py::arg("colIdxSeq")); +m.def("get_mask_list_without_remap", + &get_mask_list_without_remap, + py::arg("attnMask"), py::arg("output"), py::arg("blockIdx"), py::arg("cpSize")); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/algorithm/algorithm.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/algorithm/algorithm.cpp new file mode 100644 index 000000000..580746e49 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/algorithm/algorithm.cpp @@ -0,0 +1,24 @@ +#include + +void reuse_data_ptr(at::Tensor& des, at::Tensor& src, int64_t offset) +{ + TORCH_CHECK( + offset >= 0, + "Expect offset equal or greater than zero, got: ", offset); + + TORCH_CHECK( + (offset + des.numel()) * des.element_size() <= + src.numel() * src.element_size(), + "Offsets overflow, got: ", + "offset ", offset * des.element_size(), + ", des storage size ", des.numel() * des.element_size(), + ", src storage size ", src.numel()* src.element_size()); + + char* data_ptr = static_cast(src.storage().data_ptr().get()) + offset * des.element_size(); + at::DataPtr aim_data_ptr = at::DataPtr(data_ptr, des.storage().device()); + des.storage().set_data_ptr(std::move(aim_data_ptr)); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("reuse_data_ptr", &reuse_data_ptr, "reuse tensor data ptr"); +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/atb/groupmatmul_add.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/atb/groupmatmul_add.cpp new file mode 100644 index 000000000..9f7335e11 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/atb/groupmatmul_add.cpp @@ -0,0 +1,71 @@ +// Copyright (c) 2023 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include + +#ifdef ENABLE_ATB +#include "inc/atb_adapter.h" +#include "atb/operation.h" +#include "atb/train_op_params.h" +#include "atb/infer_op_params.h" +#include "../flop_counter/flop_counter.h" +#endif + +using namespace std; +using torch::autograd::AutogradContext; +using torch::autograd::Function; +namespace { + + +void groupmatmul_add_fp32(const at::Tensor &x, const at::Tensor &weight, const at::Tensor &group_list, at::Tensor & grad) +{ +#ifndef ENABLE_ATB + TORCH_CHECK(false, "ATB MatmulAdd not implemented"); +#else + atb::infer::GroupedMatmulInplaceAddParam param; + param.transposeA = true; // 是否转置A矩阵 + param.transposeB = false; // 是否转置B矩阵 + + ParamSetter paramsetter; + paramsetter.Input(x) + .Input(weight) + .Input(group_list) + .Input(grad) + .Output(grad); + // 构造算子并执行 + atb::Operation* op = nullptr; + atb::CreateOperation(param, &op); + TORCH_CHECK(op != nullptr, "GroupMatmulAdd get op failed!"); + RunAtbCmd(op, paramsetter, "GroupedMatmulInplaceAddOperation"); + #ifdef FLOP_COUNT + FLOP_COUNT(FlopCounter::gmm_add_flop, x, weight, group_list); + #endif + return ; +#endif +} +} // namespace + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("npu_groupmatmul_add_fp32", &groupmatmul_add_fp32, "matmul_add on ascend device", + pybind11::arg("x"), pybind11::arg("weight"), pybind11::arg("group_list"), pybind11::arg("grad")); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/atb/inc/atb_adapter.h b/model/train/yoco_moe/mindspeed/ops/csrc/atb/inc/atb_adapter.h new file mode 100644 index 000000000..061f86189 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/atb/inc/atb_adapter.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPEED_OPS_CSRC_ATB_INC_ATB_ADAPTER_H +#define MINDSPEED_OPS_CSRC_ATB_INC_ATB_ADAPTER_H +#include +#include +#include +#include +#include +#include "atb/types.h" +#include "atb/operation.h" +#include "atb/utils.h" +#if __has_include("torch_npu/csrc/flopcount/FlopCount.h") + #include "torch_npu/csrc/flopcount/FlopCount.h" +#endif + +atb::Tensor AtTensor2Tensor(const at::Tensor atTensor); +atb::Context* GetContext(); +at::Tensor GetWorkspaceTensor(uint64_t workspaceSize, atb::Operation *operation); +uint64_t OperationSetup(atb::VariantPack variantPack, atb::Operation *operation, atb::Context* contextPtr); +class ParamSetter { +public: + ParamSetter& Input(const at::Tensor &tensor); + ParamSetter& Input(const c10::optional &tensor); + ParamSetter& Output(at::Tensor &tensor); + atb::VariantPack variantPack; +}; + +void RunAtbCmd(atb::Operation *op, const ParamSetter ¶msetter, const std::string &name); + +#endif diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/atb/lcal_coc.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/atb/lcal_coc.cpp new file mode 100644 index 000000000..885fd6cb5 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/atb/lcal_coc.cpp @@ -0,0 +1,283 @@ +// Copyright (c) 2023 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef ENABLE_ATB +#include +#include +#include "inc/atb_adapter.h" +#include "atb/operation.h" +#include "atb/infer_op_params.h" +#include "../flop_counter/flop_counter.h" +#endif + + +void matmul_all_reduce(const at::Tensor &input1, const at::Tensor &input2, const c10::optional &biasOpt, + at::Tensor &output, int rank, int rankSize, const std::string &commDomain) +{ + const at::Tensor &bias = biasOpt.value_or(at::Tensor()); + + atb::infer::LinearParallelParam param; + bool transB = input1.size(1) != input2.size(0); + param.transWeight = transB; + param.rank = rank; + param.rankSize = rankSize; + param.rankRoot = 0; + param.hasResidual = biasOpt.has_value(); + param.backend = "lcoc"; + param.commMode = atb::infer::CommMode::COMM_MULTI_PROCESS; + param.type = atb::infer::LinearParallelParam::ParallelType::LINEAR_ALL_REDUCE; + param.keepIntermediate = false; + param.commDomain = commDomain; + + ParamSetter paramsetter; + paramsetter.Input(input1) + .Input(input2); + if (biasOpt.has_value()) { + paramsetter.Input(bias); + } + paramsetter.Output(output); + + atb::Operation* op = nullptr; + atb::CreateOperation(param, &op); + TORCH_CHECK(op != nullptr, "lcal coc get op failed!"); + RunAtbCmd(op, paramsetter, "matmul_all_reduce"); +#ifdef FLOP_COUNT + FLOP_COUNT(FlopCounter::coc_flop, input1, input2, transB, rankSize, false); +#endif +} + + +void all_gather_matmul(const at::Tensor &input1, const at::Tensor &input2, const c10::optional &biasOpt, + at::Tensor &output, int rank, int rankSize, const std::string &commDomain) +{ + const at::Tensor &bias = biasOpt.value_or(at::Tensor()); + + atb::infer::LinearParallelParam param; + bool transB = input1.size(1) != input2.size(0); + param.transWeight = transB; + param.rank = rank; + param.rankSize = rankSize; + param.rankRoot = 0; + param.hasResidual = biasOpt.has_value(); + param.backend = "lcoc"; + param.commMode = atb::infer::CommMode::COMM_MULTI_PROCESS; + param.type = atb::infer::LinearParallelParam::ParallelType::ALL_GATHER_LINEAR; + param.keepIntermediate = false; + param.commDomain = commDomain; + + ParamSetter paramsetter; + paramsetter.Input(input1) + .Input(input2); + if (biasOpt.has_value()) { + paramsetter.Input(bias); + } + paramsetter.Output(output); + + atb::Operation* op = nullptr; + atb::CreateOperation(param, &op); + TORCH_CHECK(op != nullptr, "lcal coc get op failed!"); + RunAtbCmd(op, paramsetter, "all_gather_matmul"); +#ifdef FLOP_COUNT + FLOP_COUNT(FlopCounter::coc_flop, input1, input2, transB, rankSize, true); +#endif +} + + +void all_gather_matmul_v2(const at::Tensor &input1, const at::Tensor &input2, const c10::optional &biasOpt, + at::Tensor &output, at::Tensor &commOutput, int rank, int rankSize, const std::string &commDomain) +{ + const at::Tensor &bias = biasOpt.value_or(at::Tensor()); + + atb::infer::LinearParallelParam param; + bool transB = input1.size(1) != input2.size(0); + param.transWeight = transB; + param.rank = rank; + param.rankSize = rankSize; + param.rankRoot = 0; + param.hasResidual = biasOpt.has_value(); + param.backend = "lcoc"; + param.commMode = atb::infer::CommMode::COMM_MULTI_PROCESS; + param.type = atb::infer::LinearParallelParam::ParallelType::ALL_GATHER_LINEAR; + param.keepIntermediate = true; + param.commDomain = commDomain; + + ParamSetter paramsetter; + paramsetter.Input(input1) + .Input(input2); + if (biasOpt.has_value()) { + paramsetter.Input(bias); + } + paramsetter.Output(output) + .Output(commOutput); + + atb::Operation* op = nullptr; + atb::CreateOperation(param, &op); + TORCH_CHECK(op != nullptr, "lcal coc get op failed!"); + RunAtbCmd(op, paramsetter, "all_gather_matmul_v2"); +#ifdef FLOP_COUNT + FLOP_COUNT(FlopCounter::coc_flop, input1, input2, transB, rankSize, true); +#endif +} + + +void matmul_reduce_scatter(const at::Tensor &input1, const at::Tensor &input2, const c10::optional &biasOpt, + at::Tensor &output, int rank, int rankSize, const std::string &commDomain) +{ + const at::Tensor &bias = biasOpt.value_or(at::Tensor()); + + atb::infer::LinearParallelParam param; + bool transB = input1.size(1) != input2.size(0); + param.transWeight = transB; + param.rank = rank; + param.rankSize = rankSize; + param.rankRoot = 0; + param.hasResidual = biasOpt.has_value(); + param.backend = "lcoc"; + param.commMode = atb::infer::CommMode::COMM_MULTI_PROCESS; + param.type = atb::infer::LinearParallelParam::ParallelType::LINEAR_REDUCE_SCATTER; + param.keepIntermediate = false; + param.commDomain = commDomain; + + ParamSetter paramsetter; + paramsetter.Input(input1) + .Input(input2); + if (biasOpt.has_value()) { + paramsetter.Input(bias); + } + paramsetter.Output(output); + + atb::Operation* op = nullptr; + atb::CreateOperation(param, &op); + TORCH_CHECK(op != nullptr, "lcal coc get op failed!"); + RunAtbCmd(op, paramsetter, "matmul_reduce_scatter"); +#ifdef FLOP_COUNT + FLOP_COUNT(FlopCounter::coc_flop, input1, input2, transB, rankSize, false); +#endif +} + + +void pure_matmul(const at::Tensor &input1, const at::Tensor &input2, const c10::optional &biasOpt, + at::Tensor &output, int rank, int rankSize, const std::string &commDomain) +{ + const at::Tensor &bias = biasOpt.value_or(at::Tensor()); + + atb::infer::LinearParallelParam param; + bool transB = input1.size(1) != input2.size(0); + param.transWeight = transB; + param.rank = rank; + param.rankSize = rankSize; + param.rankRoot = 0; + param.hasResidual = biasOpt.has_value(); + param.backend = "lcoc"; + param.commMode = atb::infer::CommMode::COMM_MULTI_PROCESS; + param.type = atb::infer::LinearParallelParam::ParallelType::PURE_LINEAR; + param.keepIntermediate = false; + param.commDomain = commDomain; + + ParamSetter paramsetter; + paramsetter.Input(input1) + .Input(input2); + if (biasOpt.has_value()) { + paramsetter.Input(bias); + } + paramsetter.Output(output); + + atb::Operation* op = nullptr; + atb::CreateOperation(param, &op); + TORCH_CHECK(op != nullptr, "lcal coc get op failed!"); + RunAtbCmd(op, paramsetter, "pure_matmul"); +#ifdef FLOP_COUNT + FLOP_COUNT(FlopCounter::coc_flop, input1, input2, transB, rankSize, false); +#endif +} + +template +struct atb_support_all_gather_matmul_reduce_scatter_op : std::false_type {}; + +template +struct atb_support_all_gather_matmul_reduce_scatter_op> : std::true_type {}; + +template +void all_gather_matmul_reduce_scatter(const at::Tensor &input1, const at::Tensor &input2, + const c10::optional &biasOpt, at::Tensor &output, int rank, + int tpSize, const std::string &commDomain, int agDim, int rsDim, bool innerDimIsAg) +{ + if constexpr (atb_support_all_gather_matmul_reduce_scatter_op::value) { + const at::Tensor &bias = biasOpt.value_or(at::Tensor()); + T param; + bool transB = input1.size(1) != input2.size(0); + param.transWeight = transB; + param.rank = rank; + param.rankSize = tpSize; + param.rankRoot = 0; + param.twoDimTPInfo.agDim = agDim; + param.twoDimTPInfo.rsDim = rsDim; + param.twoDimTPInfo.innerDimIsAg = innerDimIsAg; + param.hasResidual = biasOpt.has_value(); + param.backend = "lcoc"; + param.commMode = atb::infer::CommMode::COMM_MULTI_PROCESS; + param.type = T::ParallelType::ALL_GATHER_LINEAR_REDUCE_SCATTER; + param.keepIntermediate = false; + param.commDomain = commDomain; + + ParamSetter paramsetter; + paramsetter.Input(input1) + .Input(input2); + if (biasOpt.has_value()) { + paramsetter.Input(bias); + } + paramsetter.Output(output); + + atb::Operation* op = nullptr; + atb::CreateOperation(param, &op); + TORCH_CHECK(op != nullptr, "lcal coc get op failed!"); + RunAtbCmd(op, paramsetter, "all_gather_matmul_reduce_scatter"); +#ifdef FLOP_COUNT + FLOP_COUNT(FlopCounter::coc_flop, input1, input2, transB, agDim, true); +#endif + } else { + TORCH_CHECK(false, "Current version of ATB doesn't support the all_gather_matmul_reduce_scatter operator!"); + } +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) +{ + m.def("matmul_all_reduce", &matmul_all_reduce, "matmul_all_reduce", pybind11::arg("input1"), + pybind11::arg("input2"), pybind11::arg("biasOpt"), pybind11::arg("output"), pybind11::arg("rank"), pybind11::arg("rankSize"), pybind11::arg("commDomain")); + m.def("all_gather_matmul", &all_gather_matmul, "all_gather_matmul", pybind11::arg("input1"), + pybind11::arg("input2"), pybind11::arg("biasOpt"), pybind11::arg("output"), pybind11::arg("rank"), pybind11::arg("rankSize"), pybind11::arg("commDomain")); + m.def("all_gather_matmul_v2", &all_gather_matmul_v2, "all_gather_matmul_v2", pybind11::arg("input1"), + pybind11::arg("input2"), pybind11::arg("biasOpt"), pybind11::arg("output"), pybind11::arg("commOutput"), + pybind11::arg("rank"), pybind11::arg("rankSize"), pybind11::arg("commDomain")); + m.def("matmul_reduce_scatter", &matmul_reduce_scatter, "matmul_reduce_scatter", pybind11::arg("input1"), + pybind11::arg("input2"), pybind11::arg("biasOpt"), pybind11::arg("output"), pybind11::arg("rank"), pybind11::arg("rankSize"), pybind11::arg("commDomain")); + m.def("pure_matmul", &pure_matmul, "pure_matmul", pybind11::arg("input1"), pybind11::arg("input2"), + pybind11::arg("biasOpt"), pybind11::arg("output"), pybind11::arg("rank"), pybind11::arg("rankSize"), pybind11::arg("commDomain")); + m.def("all_gather_matmul_reduce_scatter", &all_gather_matmul_reduce_scatter, "all_gather_matmul_reduce_scatter", pybind11::arg("input1"), + pybind11::arg("input2"), pybind11::arg("biasOpt"), pybind11::arg("output"), pybind11::arg("rank"), pybind11::arg("tpSize"), pybind11::arg("commDomain"), pybind11::arg("agDim"), pybind11::arg("rsDim"), pybind11::arg("innerDimIsAg")); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/atb/matmul_add.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/atb/matmul_add.cpp new file mode 100644 index 000000000..2399e4dfc --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/atb/matmul_add.cpp @@ -0,0 +1,72 @@ +// Copyright (c) 2023 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include + +#ifdef ENABLE_ATB +#include "inc/atb_adapter.h" +#include "atb/operation.h" +#include "atb/train_op_params.h" +#include "atb/infer_op_params.h" +#include "../flop_counter/flop_counter.h" +#endif + +using namespace std; +using torch::autograd::AutogradContext; +using torch::autograd::Function; +namespace { + + +void matmul_add_fp32(const at::Tensor &x, const at::Tensor &weight, at::Tensor & C) +{ +#ifndef ENABLE_ATB + TORCH_CHECK(false, "ATB MatmulAdd not implemented"); +#else + atb::infer::LinearParam param; + param.transposeA = true; // 是否转置A矩阵 + param.transposeB = false; // 是否转置B矩阵 + param.hasBias = false; + param.enAccum = true; + + ParamSetter paramsetter; + paramsetter.Input(x) + .Input(weight) + .Input(C) + .Output(C); + // 构造算子并执行 + atb::Operation* op = nullptr; + atb::CreateOperation(param, &op); + TORCH_CHECK(op != nullptr, "MatmulAdd_forward get op failed!"); + RunAtbCmd(op, paramsetter, "LinearOperation"); + #ifdef FLOP_COUNT + FLOP_COUNT(FlopCounter::mm_flop, x, weight); + #endif + return ; +#endif +} +} // namespace + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("npu_matmul_add_fp32", &matmul_add_fp32, "matmul_add on ascend device", + pybind11::arg("x"), pybind11::arg("weight"), pybind11::arg("C")); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/atb/rms_norm.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/atb/rms_norm.cpp new file mode 100644 index 000000000..81a7769e7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/atb/rms_norm.cpp @@ -0,0 +1,145 @@ +// Copyright (c) 2023 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#ifdef ENABLE_ATB +#include "inc/atb_adapter.h" +#include "atb/operation.h" +#include "atb/train_op_params.h" +#include "atb/infer_op_params.h" +#endif + +using namespace std; +using torch::autograd::AutogradContext; +using torch::autograd::Function; +namespace { +const static int RMSNORM_LAYERTYPE = 1; +const static int SAVE_X = 0; +const static int SAVE_RSTD = 1; +const static int SAVE_GAMMA = 2; +const static int N = 32; + +void InferShapeRmsNorm(c10::SmallVector &size, const at::Tensor &self, const at::Tensor &gamma) +{ + int64_t rstd_dim = self.dim(); + rstd_dim -= gamma.dim(); + TORCH_CHECK(rstd_dim >= 0, + "RmsNorm intensor gamma dim error,gamma's dim should not greater than x's dim"); + for (uint64_t i = 0; i < self.dim(); i++) { + if (i < rstd_dim) { + size.emplace_back(self.size(i)); + } else { + size.emplace_back(1); + } + } +} + +void CheckRmsNorm(const at::Tensor &x, const at::Tensor &gamma) +{ + TORCH_CHECK(x.scalar_type() == at::ScalarType::Half || x.scalar_type() == at::ScalarType::BFloat16 || + x.scalar_type() == at::ScalarType::Float, + "Input x dtype ", x.scalar_type(), " invalid, should be float, float16 or bfloat16"); + TORCH_CHECK(x.scalar_type() == gamma.scalar_type(), + "Input x dtype should be same with gamma, but got x ", x.scalar_type(), " gamma ", gamma.scalar_type()); +} + +class NPURmsNormFunction : public torch::autograd::Function { +public: + static at::Tensor forward( + AutogradContext *ctx, const at::Tensor &x, const at::Tensor &gamma, float epsilon) + { +#ifndef ENABLE_ATB + TORCH_CHECK(false, "ATB RmsNorm not implemented"); +#else + at::AutoNonVariableTypeMode g; + c10::SmallVector tensor_rstd_shape; + CheckRmsNorm(x, gamma); + InferShapeRmsNorm(tensor_rstd_shape, x, gamma); + // apply tensor + at::Tensor tensor_rstd = at::empty(at::IntArrayRef(tensor_rstd_shape), x.options().dtype(at::ScalarType::Float)); + at::Tensor tensor_y = at::empty(x.sizes(), x.options()); + + atb::infer::RmsNormParam param; + param.layerType = (atb::infer::RmsNormParam::RmsNormType)RMSNORM_LAYERTYPE; + param.normParam.epsilon = epsilon; + param.normParam.rstd = true; + + // set input and output + ParamSetter paramsetter; + paramsetter.Input(x) + .Input(gamma) + .Output(tensor_y) + .Output(tensor_rstd); + + atb::Operation* op = nullptr; + atb::CreateOperation(param, &op); + TORCH_CHECK(op != nullptr, "RmsNorm get op failed!"); + RunAtbCmd(op, paramsetter, "RmsNorm_forward"); + + ctx->save_for_backward({x, tensor_rstd, gamma}); + + return tensor_y; +#endif + } + + static std::vector backward(AutogradContext *ctx, std::vector grad_output) + { +#ifndef ENABLE_ATB + TORCH_CHECK(false, "RmsNormBackward not implemented"); +#else + auto saved = ctx->get_saved_variables(); + auto x = saved[SAVE_X]; + auto rstd = saved[SAVE_RSTD]; + auto gamma = saved[SAVE_GAMMA]; + atb::train::RmsNormBackwardParam param; + + at::Tensor tensor_x_grad = at::empty(x.sizes(), x.options()); + at::Tensor tensor_gamma_grad = at::empty(gamma.sizes(), gamma.options().dtype(at::ScalarType::Float)); + + ParamSetter paramsetter; + paramsetter.Input(grad_output[0]) + .Input(x) + .Input(rstd) + .Input(gamma) + .Output(tensor_x_grad) + .Output(tensor_gamma_grad); + + atb::Operation* op = nullptr; + atb::CreateOperation(param, &op); + TORCH_CHECK(op != nullptr, "RmsNormBackward get op failed!"); + RunAtbCmd(op, paramsetter, "RmsNorm_backward"); + + return {tensor_x_grad, tensor_gamma_grad, at::Tensor()}; +#endif + } +}; +} // namespace + +at::Tensor npu_rms_norm(const at::Tensor &x, const at::Tensor &gamma, float epsilon) +{ + return NPURmsNormFunction::apply(x, gamma, epsilon); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("rms_norm", &npu_rms_norm, "rms_norm on ascend device", + pybind11::arg("x"), pybind11::arg("gamma"), pybind11::arg("epsilon")=1e-6); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/atb/swiglu.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/atb/swiglu.cpp new file mode 100644 index 000000000..1a364bb50 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/atb/swiglu.cpp @@ -0,0 +1,136 @@ +// Copyright (c) 2023 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#ifdef ENABLE_ATB +#include "inc/atb_adapter.h" +#include "atb/operation.h" +#include "atb/infer_op_params.h" +#endif + +using namespace std; +using torch::autograd::AutogradContext; +using torch::autograd::Function; +namespace { +const static int N = 32; +void InferSwigluForward(c10::SmallVector &out_tensor_shape, const at::Tensor &x, int32_t dim) +{ + int64_t split_dim = dim; + if (split_dim < 0) { + split_dim += x.dim(); + } + TORCH_CHECK(split_dim >= 0 && split_dim < x.dim(), "Input dim range is invalid"); + const int32_t split_num = 2; + out_tensor_shape[split_dim] = x.size(split_dim) / split_num; +} + +void CheckSwigluForward(const at::Tensor &x) +{ + TORCH_CHECK(x.scalar_type() == at::ScalarType::Half || x.scalar_type() == at::ScalarType::BFloat16 || + x.scalar_type() == at::ScalarType::Float, "Input tensor dtype ", x.scalar_type(), + " invalid, should be float32, float16 or bfloat16"); +} + +void CheckSwigluBackward(const at::Tensor &y_grad, const at::Tensor &x) +{ + TORCH_CHECK(y_grad.scalar_type() == at::ScalarType::Half || y_grad.scalar_type() == at::ScalarType::BFloat16 || + y_grad.scalar_type() == at::ScalarType::Float, "Input y_grad tensor dtype ", y_grad.scalar_type(), + " invalid, should be float32, float16 or bfloat16"); + TORCH_CHECK(x.scalar_type() == at::ScalarType::Half || x.scalar_type() == at::ScalarType::BFloat16 || + x.scalar_type() == at::ScalarType::Float, "Input x tensor dtype ", x.scalar_type(), + " invalid, should be float32, float16 or bfloat16"); + TORCH_CHECK(x.scalar_type() == y_grad.scalar_type(), "Input x tensor dtype is not equal to y_grad"); +} + +class NPUSwigluFunction : public torch::autograd::Function { +public: + static at::Tensor forward(AutogradContext *ctx, const at::Tensor &x, int32_t dim = -1) + { +#ifndef ENABLE_ATB + TORCH_CHECK(false, "swiglu_forward not implemented"); +#else + at::AutoNonVariableTypeMode g; + CheckSwigluForward(x); + c10::SmallVector out_tensor_shape{x.sizes()}; + InferSwigluForward(out_tensor_shape, x, dim); + // apply tensor + at::Tensor y = at::empty(out_tensor_shape, x.options()); + + atb::infer::ActivationParam param; + param.activationType = atb::infer::ActivationType::ACTIVATION_SWIGLU_FORWARD; + param.dim = dim; + + // set input and output + ParamSetter paramsetter; + paramsetter.Input(x) + .Output(y); + + atb::Operation* op = nullptr; + atb::CreateOperation(param, &op); + TORCH_CHECK(op != nullptr, "swiglu_forward get op failed!"); + RunAtbCmd(op, paramsetter, "swiglu_forward"); + + ctx->save_for_backward({x}); + ctx->saved_data["dim"] = dim; + return y; +#endif + } + + static std::vector backward(AutogradContext *ctx, std::vector grad_output) + { +#ifndef ENABLE_ATB + TORCH_CHECK(false, "swiglu_backward not implemented"); +#else + auto dim = ctx->saved_data["dim"].toInt(); + auto saved = ctx->get_saved_variables(); + auto x = saved[0]; + CheckSwigluBackward(grad_output[0], x); + at::Tensor x_grad = at::empty(x.sizes(), x.options()); + + atb::infer::ActivationParam param; + param.activationType = atb::infer::ActivationType::ACTIVATION_SWIGLU_BACKWARD; + param.dim = dim; + + ParamSetter paramsetter; + paramsetter.Input(grad_output[0]) + .Input(x) + .Output(x_grad); + + atb::Operation* op = nullptr; + atb::CreateOperation(param, &op); + TORCH_CHECK(op != nullptr, "swiglu_backward get op failed!"); + RunAtbCmd(op, paramsetter, "swiglu_backward"); + + return {x_grad, at::Tensor()}; +#endif + } +}; +} // namespace + +at::Tensor npu_swiglu(const at::Tensor &x, int32_t dim) +{ + return NPUSwigluFunction::apply(x, dim); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("swiglu", &npu_swiglu, "swiglu realization", pybind11::arg("x"), pybind11::arg("dim")=-1); +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/atb/utils/atb_adapter.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/atb/utils/atb_adapter.cpp new file mode 100644 index 000000000..835758e14 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/atb/utils/atb_adapter.cpp @@ -0,0 +1,140 @@ + +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "../inc/atb_adapter.h" +#include +#include +#include + +using namespace std; + +static atb::Context* msContext = nullptr; + +at::Tensor FormatTrans(const at::Tensor &at_tensor) +{ + if (at_tensor.defined()) { + TORCH_CHECK(torch_npu::utils::is_npu(at_tensor), "only npu tensor is supported"); + return at_npu::native::npu_format_cast(at_tensor, ACL_FORMAT_ND); + } + return at_tensor; +} + +atb::Tensor AtTensor2Tensor(const at::Tensor atTensor) +{ + static std::map dtypeMap = { + {at::ScalarType::Bool, ACL_BOOL}, {at::ScalarType::Byte, ACL_UINT8}, + {at::ScalarType::Char, ACL_INT8}, {at::ScalarType::Half, ACL_FLOAT16}, + {at::ScalarType::Float, ACL_FLOAT}, {at::ScalarType::Int, ACL_INT32}, + {at::ScalarType::Long, ACL_INT64}, {at::ScalarType::BFloat16, ACL_BF16}, + }; + + TORCH_CHECK(atTensor.is_contiguous(), "atTensor is not contiguous"); + atb::Tensor tensor; + tensor.desc.format = ACL_FORMAT_ND; + tensor.deviceData = atTensor.data_ptr(); + + tensor.desc.shape.dimNum = atTensor.sizes().size(); + for (uint64_t i = 0; i < atTensor.sizes().size(); i++) { + tensor.desc.shape.dims[i] = atTensor.sizes()[i]; + } + + auto it = dtypeMap.find(atTensor.scalar_type()); + TORCH_CHECK(it != dtypeMap.end(), "not support dtype:"); + tensor.desc.dtype = it->second; + + tensor.dataSize = atb::Utils::GetTensorSize(tensor); + + return tensor; +} + +void RunAtbCmd(atb::Operation *op, const ParamSetter ¶msetter, const std::string &name) +{ + auto contextPtr = GetContext(); + uint64_t workspaceSize = OperationSetup(paramsetter.variantPack, op, contextPtr); + auto workspaceTensor = GetWorkspaceTensor(workspaceSize, op); + const void *workspacePtr = nullptr; + workspacePtr = workspaceTensor.storage().data(); + auto acl_call = [op, contextPtr, paramsetter, workspacePtr, workspaceSize]() -> int { + auto st = op->Execute(paramsetter.variantPack, (uint8_t *)workspacePtr, workspaceSize, contextPtr); + DestroyOperation(op); + return 0; + }; + at_npu::native::OpCommand cmd; + cmd.Name(name); + cmd.SetCustomHandler(acl_call); + cmd.Run(); +} + +ParamSetter& ParamSetter::Input(const at::Tensor &tensor) +{ + if (!tensor.defined()) { + variantPack.inTensors.push_back(atb::Tensor()); + return *this; + } + at::Tensor newTensor = FormatTrans(tensor); + if(!newTensor.is_contiguous()) { + newTensor = newTensor.contiguous(); + } + auto AtTensor = AtTensor2Tensor(newTensor); + + variantPack.inTensors.push_back(AtTensor); + return *this; +} + +ParamSetter& ParamSetter::Input(const c10::optional &tensor) +{ + if (!tensor.has_value()) { + variantPack.inTensors.push_back(atb::Tensor()); + return *this; + } + return Input(tensor.value()); +} + +ParamSetter& ParamSetter::Output(at::Tensor &output) +{ + auto AtTensor = AtTensor2Tensor(output); + variantPack.outTensors.push_back(AtTensor); + return *this; +} + +uint64_t OperationSetup(atb::VariantPack variantPack, atb::Operation *operation, atb::Context* contextPtr) +{ + uint64_t workspaceSize = 0; + atb::Status status = operation->Setup(variantPack, workspaceSize, contextPtr); + TORCH_CHECK(status == 0, "setup failed!"); + return workspaceSize; +} + +at::Tensor GetWorkspaceTensor(uint64_t workspaceSize, atb::Operation *operation) +{ + at::TensorOptions options = at::TensorOptions(torch_npu::utils::get_npu_device_type()); + at::Tensor workspaceTensor = at::empty(at::IntArrayRef(workspaceSize), options.dtype(at::kByte)); + return workspaceTensor; +} + +atb::Context* GetContext() +{ + if (msContext == nullptr) { + auto status = atb::CreateContext(&msContext); + TORCH_CHECK(status == 0, "create context failed!"); + int32_t devId = 0; + aclrtGetDevice(&devId); + aclrtStream stream = c10_npu::getCurrentNPUStream(devId).stream(false); + TORCH_CHECK(stream != nullptr, "get current stream failed"); + msContext->SetExecuteStream(stream); + } + return msContext; +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/ffn.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/ffn.cpp new file mode 100644 index 000000000..1886e6da6 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/ffn.cpp @@ -0,0 +1,96 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include +#include + +#include "inc/aclnn_common.h" + +const static int MIN_DIM = 2; +const static int X_MAX_DIM = 8; + +using npu_preparation = at_npu::native::OpPreparation; + +namespace op_infer { +constexpr int SIZE = 8; + +c10::SmallVector array_to_small_vector(c10::IntArrayRef shape) +{ + c10::SmallVector small_shape; + for (size_t i = 0; i < shape.size(); ++i) { + small_shape.emplace_back(shape[i]); + } + return small_shape; +} +} + +at::Tensor npu_ffn(const at::Tensor &x, const at::Tensor &weight1, const at::Tensor &weight2, + std::string activation, c10::optional expert_tokens, c10::optional expert_tokens_index, + const c10::optional &bias1, const c10::optional &bias2, + const c10::optional &scale, const c10::optional &offset, + const c10::optional &deq_scale1, const c10::optional &deq_scale2, + const c10::optional &antiquant_scale1, const c10::optional &antiquant_scale2, + const c10::optional &antiquant_offset1, const c10::optional &antiquant_offset2, + c10::optional inner_precise, c10::optional output_dtype) +{ + auto weight1_dim_num = weight1.dim(); + auto weight2_dim_num = weight2.dim(); + auto x_dim_num = x.dim(); + TORCH_CHECK(x_dim_num >= MIN_DIM && x_dim_num <= X_MAX_DIM, "x shape dims should be 2~8, but it is ", x_dim_num); + auto x_k_dim = x.size(x.dim() - 1); + auto wight1_k_dim = weight1.size(weight1.dim() - 2); + TORCH_CHECK(x_k_dim == wight1_k_dim, "The k of x and weight should be equal. but x_k_dim is ", + x_k_dim, ", wight1_k_dim is ", wight1_k_dim); + + TORCH_CHECK(!(expert_tokens.has_value() && expert_tokens_index.has_value()), + "expert_tokens and expert_tokens_index should not have the value simultaneously."); + + char *activation_ptr = const_cast(activation.data()); + const at::Tensor &bias1_real = bias1.value_or(at::Tensor()); + const at::Tensor &bias2_real = bias2.value_or(at::Tensor()); + const at::Tensor &scale_real = scale.value_or(at::Tensor()); + const at::Tensor &offset_real = offset.value_or(at::Tensor()); + const at::Tensor &deq_scale1_real = deq_scale1.value_or(at::Tensor()); + const at::Tensor &deq_scale2_real = deq_scale2.value_or(at::Tensor()); + const at::Tensor &antiquant_scale1_real = antiquant_scale1.value_or(at::Tensor()); + const at::Tensor &antiquant_scale2_real = antiquant_scale2.value_or(at::Tensor()); + const at::Tensor &antiquant_offset1_real = antiquant_offset1.value_or(at::Tensor()); + const at::Tensor &antiquant_offset2_real = antiquant_offset2.value_or(at::Tensor()); + auto output_size = op_infer::array_to_small_vector(x.sizes()); + output_size[x.dim() - 1] = weight2.size(weight2.dim() - 1); + c10::TensorOptions options = x.options().dtype(x.scalar_type()); + if (x.scalar_type() == at::kChar && weight1.scalar_type() == at::kChar && weight2.scalar_type() == at::kChar) { + options = x.options().dtype(output_dtype.value_or(at::kHalf)); + } + at::Tensor result = at::empty(output_size, options); + int64_t inner_precise_val = inner_precise.has_value() ? inner_precise.value() : 0; + + bool tokens_index_flag = expert_tokens_index.has_value(); + + const at::Tensor &expert_tokens_real = expert_tokens.has_value() ? expert_tokens.value() : + expert_tokens_index.value_or(at::Tensor()); + + ACLNN_CMD(aclnnFFNV3, x, weight1, weight2, expert_tokens_real, bias1_real, bias2_real, + scale_real, offset_real, deq_scale1_real, deq_scale2_real, antiquant_scale1_real, antiquant_scale2_real, + antiquant_offset1_real, antiquant_offset2_real, activation_ptr, inner_precise_val, tokens_index_flag, result); + + return result; +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("npu_ffn", &npu_ffn, "npu_ffn"); +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/fusion_attention_v2.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/fusion_attention_v2.cpp new file mode 100644 index 000000000..3013b2fb5 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/fusion_attention_v2.cpp @@ -0,0 +1,458 @@ +// Copyright (c) 2023 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include "torch_npu/csrc/framework/utils/RandomOpAdapter.h" +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/core/npu/NPUFormat.h" +#include + +#include "inc/aclnn_common.h" +#include "../flop_counter/flop_counter.h" + +const static int FLASH_THRESHOLD = 512; +const static int N = 32; +const static int64_t SOFTMAXMAX_LAST_DIMSHAPE = 8; +using namespace at_npu::native; + +constexpr static int SIZE_8 = 8; + +enum class DropOutStatus { + DROPOUT_NORMAL = 0, + DROPOUT_NONE, + DROPOUT_ALL +}; + +enum class SparseMode { + NO_MASK = 0, + ALL_MASK, + LEFT_UP_CAUSAL, + RIGHT_DOWN_CAUSAL, + BAND, + PREFIX, + PREFIX_COMPRESS, + RIGHT_DOWN_CAUSAL_BAND, + BAND_LEFT_UP_CAUSAL +}; +DropOutStatus get_dropout_status(double keep_prob) +{ + if (keep_prob == 0) { + return DropOutStatus::DROPOUT_ALL; + } + if (keep_prob == 1.) { + return DropOutStatus::DROPOUT_NONE; + } + return DropOutStatus::DROPOUT_NORMAL; +} + +at::Tensor format_trans(const at::Tensor &at_tensor) +{ + if (at_tensor.defined()) { + TORCH_CHECK(torch_npu::utils::is_npu(at_tensor), "only npu tensor is supported"); + return at_npu::native::npu_format_cast(at_tensor, ACL_FORMAT_ND); + } + return at_tensor; +} + +at::Tensor dropout_gen_mask(const at::Tensor &query, const at::Tensor &key, double keep_prob, int64_t head_num, const std::string &input_layout, + bool gen_mask_parallel, bool sync, int64_t &seed, int64_t &offset, int64_t &numels) +{ + at::Tensor drop_mask; + if (input_layout == "BSH") { + numels = query.size(0) * head_num * query.size(1) * key.size(1); // [B,N,S,S] + } else if (input_layout == "SBH") { + numels = query.size(1) * head_num * query.size(0) * key.size(0); // [B,N,S,S] + } else if (input_layout == "BNSD") { + numels = query.size(0) * query.size(1) * query.size(2) * key.size(2); // [B,N,S,S] + } else if (input_layout == "BSND") { + numels = query.size(0) * query.size(2) * query.size(1) * key.size(1); // [B,N,S,S] + } + int64_t length = (numels + 128 - 1) / 128 * 128 / 8; + length += 32; + if (get_dropout_status(keep_prob) == DropOutStatus::DROPOUT_NORMAL) { + const auto gen = at_npu::detail::getDefaultNPUGenerator(); + auto pair = at::check_generator(gen)->philox_engine_inputs(10); + seed = pair.first; + offset = pair.second; + drop_mask = at_npu::native::npu_dropout_gen_mask(query, at::IntArrayRef{ numels }, 1 - keep_prob, + seed, offset, gen_mask_parallel, sync); + } else if (get_dropout_status(keep_prob) == DropOutStatus::DROPOUT_ALL) { + drop_mask = at::zeros(at::IntArrayRef{length}, query.options().dtype(at::kByte)); + } + return drop_mask; +} + +std::tuple npu_fusion_attention_backward_v2( + const at::Tensor &query, + const at::Tensor &key, + const at::Tensor &value, + const at::Tensor &dy, + int64_t head_num, + const std::string &input_layout, + const c10::optional &pse, + const c10::optional &drop_mask, + const c10::optional &padding_mask, + const c10::optional &atten_mask, + const c10::optional &softmax_max, + const c10::optional &softmax_sum, + const c10::optional &softmax_in, + const c10::optional &attention_in, + double scale_value, + double keep_prob, + int64_t pre_tokens, + int64_t next_tokens, + int64_t inner_precise, + const c10::optional> &prefix, + const c10::optional> &actual_seq_qlen, + const c10::optional> &actual_seq_kvlen, + const c10::optional> &q_start_idx, + const c10::optional> &kv_start_idx, + int64_t sparse_mode, + int64_t pse_type) +{ + double scale = scale_value; + + const at::Tensor &pse_const = pse.value_or(at::Tensor()); + const at::Tensor &drop_mask_const = drop_mask.value_or(at::Tensor()); + const at::Tensor &padding_mask_const = padding_mask.value_or(at::Tensor()); + const at::Tensor &atten_mask_const = atten_mask.value_or(at::Tensor()); + const at::Tensor &softmax_max_const = softmax_max.value_or(at::Tensor()); + const at::Tensor &softmax_sum_const = softmax_sum.value_or(at::Tensor()); + const at::Tensor &softmax_const = softmax_in.value_or(at::Tensor()); + const at::Tensor &attention_const = attention_in.value_or(at::Tensor()); + auto prefixN_tmp = prefix.value_or(std::vector{}); + auto ac_seq_qlen_tmp = actual_seq_qlen.value_or(std::vector{}); + auto ac_seq_kvlen_tmp = actual_seq_kvlen.value_or(std::vector{}); + auto q_start_idx_val_tmp = q_start_idx.value_or(std::vector{}); + auto kv_start_idx_val_tmp = kv_start_idx.value_or(std::vector{}); + + c10::optional prefixN(prefixN_tmp); + c10::optional ac_seq_qlen(ac_seq_qlen_tmp); + c10::optional ac_seq_kvlen(ac_seq_kvlen_tmp); + c10::optional q_start_idx_val(q_start_idx_val_tmp); + c10::optional kv_start_idx_val(kv_start_idx_val_tmp); + + at::Tensor format_query = format_trans(query); + at::Tensor format_key = format_trans(key); + at::Tensor format_value = format_trans(value); + at::Tensor format_dy = format_trans(dy); + + at::Tensor format_pse = format_trans(pse_const); + at::Tensor format_drop_mask = format_trans(drop_mask_const); + at::Tensor format_padding_mask = format_trans(padding_mask_const); + at::Tensor format_atten_mask = format_trans(atten_mask_const); + at::Tensor format_softmax_max = format_trans(softmax_max_const); + at::Tensor format_softmax_sum = format_trans(softmax_sum_const); + at::Tensor format_softmax = format_trans(softmax_const); + at::Tensor format_attention = format_trans(attention_const); + at::Tensor dq = at::empty(format_query.sizes(), format_query.options()); + at::Tensor dk = at::empty(format_key.sizes(), format_key.options()); + at::Tensor dv = at::empty(format_value.sizes(), format_value.options()); + char* input_layout_ptr = const_cast(input_layout.c_str()); + at::Tensor dpse; + if (format_pse.defined()) { + dpse = at::empty(format_pse.sizes(), format_pse.options()); + } else { + dpse = at::empty({0}, query.options()); + } + + if (!ac_seq_qlen_tmp.empty() && !ac_seq_kvlen_tmp.empty()) { + ACLNN_CMD( + aclnnFlashAttentionUnpaddingScoreGradV2, format_query, format_key, format_value, format_dy, + format_pse, format_drop_mask, format_padding_mask, format_atten_mask, format_softmax_max, + format_softmax_sum, format_softmax, format_attention, prefixN, ac_seq_qlen, ac_seq_kvlen, q_start_idx_val, kv_start_idx_val, + scale_value, keep_prob, pre_tokens, next_tokens, head_num, input_layout_ptr, inner_precise, sparse_mode, pse_type, + dq, dk, dv, dpse); + } else { + ACLNN_CMD( + aclnnFlashAttentionScoreGradV2, format_query, format_key, format_value, format_dy, + format_pse, format_drop_mask, format_padding_mask, format_atten_mask, format_softmax_max, + format_softmax_sum, format_softmax, format_attention, prefixN, q_start_idx_val, kv_start_idx_val, scale_value, keep_prob, + pre_tokens, next_tokens, head_num, input_layout_ptr, inner_precise, sparse_mode, pse_type, dq, dk, dv, dpse); + } + + if (!format_pse.defined()) { + at::Tensor dpse_required; + dpse = dpse_required; + } + #ifdef FLOP_COUNT + FLOP_COUNT(FlopCounter::flash_attention_backward_flop, query, key, value, dy, head_num, input_layout, actual_seq_qlen, actual_seq_kvlen); + #endif + return std::make_tuple(dq, dk, dv, dpse); +} + +std::tuple npu_fusion_attention_grad_v2( + const at::Tensor &query, + const at::Tensor &key, + const at::Tensor &value, + const at::Tensor &dy, + int64_t head_num, + const std::string &input_layout, + const c10::optional &pse, + const c10::optional &padding_mask, + const c10::optional &atten_mask, + const c10::optional &softmax_max, + const c10::optional &softmax_sum, + const c10::optional &softmax_in, + const c10::optional &attention_in, + double scale_value, + double keep_prob, + int64_t pre_tokens, + int64_t next_tokens, + int64_t inner_precise, + int64_t seed, + int64_t offset, + int64_t numels, + const c10::optional> &prefix, + const c10::optional> &actual_seq_qlen, + const c10::optional> &actual_seq_kvlen, + int64_t sparse_mode, + bool gen_mask_parallel, + bool sync, + int64_t pse_type, + const c10::optional> &q_start_idx, + const c10::optional> &kv_start_idx) +{ + TORCH_CHECK(query.dim() == 3 || query.dim() == 4, "The shapes of the input query should be 3 or 4 dimensional, but got ", + query.dim(), "-dimensional"); + TORCH_CHECK(key.dim() == 3 || key.dim() == 4, "The shapes of the input key should be 3 or 4 dimensional, but got ", + key.dim(), "-dimensional"); + TORCH_CHECK(value.dim() == 3 || value.dim() == 4, "The shapes of the input value should be 3 or 4 dimensional, but got ", + value.dim(), "-dimensional"); + TORCH_CHECK(dy.dim() == 3 || dy.dim() == 4, "The shapes of the input dy should be 3 or 4 dimensional, but got ", dy.dim(), "-dimensional"); + TORCH_CHECK(keep_prob >= 0 && keep_prob <= 1, "The keep_prob value must be in range of [0, 1], but got ", keep_prob); + TORCH_CHECK(pse_type >= 0 && pse_type <= 3, "The pse_type value must be in range of [0, 3], but got ", pse_type); + std::string input_layout_str = std::string(input_layout); + if (input_layout_str == "TND") { + TORCH_CHECK((sparse_mode >= static_cast(SparseMode::NO_MASK) && + sparse_mode < static_cast(SparseMode::PREFIX)) || + (sparse_mode > static_cast(SparseMode::PREFIX) && + sparse_mode <= static_cast(SparseMode::BAND_LEFT_UP_CAUSAL)), + "The sparse_mode value must be in range of [0,5) or (5,8], but got ", + sparse_mode); + } else { + TORCH_CHECK(sparse_mode >= static_cast(SparseMode::NO_MASK) && + sparse_mode <= static_cast(SparseMode::PREFIX_COMPRESS), + "The sparse_mode value must be in range of [0,6], but got ", + sparse_mode); + } + for (auto &c : input_layout_str) { + c = toupper(c); + } + TORCH_CHECK(input_layout_str == "BSH" || input_layout_str == "SBH" || input_layout_str == "BNSD" || + input_layout_str == "BSND" || input_layout_str == "TND", + "The input_layout should be BSH/SBH/BNSD/BSND/TND(case-insensitive), but got ", input_layout); + + int64_t length = (numels + 128 - 1) / 128 * 128 / 8; + length += 32; + at::Tensor drop_mask; + if (get_dropout_status(keep_prob) == DropOutStatus::DROPOUT_NORMAL) { + drop_mask = at_npu::native::npu_dropout_gen_mask(query, at::IntArrayRef{ numels }, 1 - keep_prob, + seed, offset, gen_mask_parallel, sync); + } else if (get_dropout_status(keep_prob) == DropOutStatus::DROPOUT_ALL) { + drop_mask = at::zeros(at::IntArrayRef{length}, query.options().dtype(at::kByte)); + } + auto result = npu_fusion_attention_backward_v2(query, + key, value, dy, head_num, input_layout_str, pse, drop_mask, padding_mask, atten_mask, + softmax_max, softmax_sum, softmax_in, attention_in, scale_value, keep_prob, pre_tokens, + next_tokens, inner_precise, prefix, actual_seq_qlen, actual_seq_kvlen, q_start_idx, kv_start_idx, sparse_mode, pse_type); + if (!sync && get_dropout_status(keep_prob) != DropOutStatus::DROPOUT_NONE) { + c10::Device device = drop_mask.device(); + c10::impl::VirtualGuardImpl impl(device.type()); + impl.recordDataPtrOnStream(drop_mask.storage().data_ptr(), c10_npu::getCurrentNPUStream()); + } + return result; +} + +std::tuple npu_fusion_attention_v2( + const at::Tensor &query, const at::Tensor &key, + const at::Tensor &value, int64_t head_num, const std::string &input_layout, + const c10::optional &pse_opt, const c10::optional &padding_mask_opt, + const c10::optional &atten_mask_opt, + double scale, double keep_prob, int64_t pre_tokens, int64_t next_tokens, int64_t inner_precise, + const c10::optional> &prefix_opt, const c10::optional> &actual_seq_qlen, + const c10::optional> &actual_seq_kvlen, int64_t sparse_mode, bool gen_mask_parallel, bool sync, + int64_t pse_type, const c10::optional> &q_start_idx, const c10::optional> &kv_start_idx) +{ + const at::Tensor &pse = pse_opt.value_or(at::Tensor()); + const at::Tensor &padding_mask = padding_mask_opt.value_or(at::Tensor()); + const at::Tensor &atten_mask = atten_mask_opt.value_or(at::Tensor()); + auto prefixN_tmp = prefix_opt.value_or(std::vector{}); + auto ac_seq_qlen_tmp = actual_seq_qlen.value_or(std::vector{}); + auto ac_seq_kvlen_tmp = actual_seq_kvlen.value_or(std::vector{}); + auto q_start_idx_val_tmp = q_start_idx.value_or(std::vector{}); + auto kv_start_idx_val_tmp = kv_start_idx.value_or(std::vector{}); + + c10::optional prefixN(prefixN_tmp); + c10::optional ac_seq_qlen(ac_seq_qlen_tmp); + c10::optional ac_seq_kvlen(ac_seq_kvlen_tmp); + c10::optional q_start_idx_val(q_start_idx_val_tmp); + c10::optional kv_start_idx_val(kv_start_idx_val_tmp); + + TORCH_CHECK(head_num > 0, "head_num must > 0, but got ", head_num); + TORCH_CHECK(query.dim() == 3 || query.dim() == 4, "The shapes of the input query should be 3 or 4 dimensional, but got ", + query.dim(), "-dimensional"); + TORCH_CHECK(key.dim() == 3 || key.dim() == 4, "The shapes of the input key should be 3 or 4 dimensional, but got ", key.dim(), + "-dimensional"); + TORCH_CHECK(value.dim() == 3 || value.dim() == 4, "The shapes of the input value should be 3 or 4 dimensional, but got ", + value.dim(), "-dimensional"); + TORCH_CHECK(keep_prob >= 0 && keep_prob <= 1, "The keep_prob value must be in range of [0, 1], but got ", keep_prob); + TORCH_CHECK(pse_type >= 0 && pse_type <= 3, "The pse_type value must be in range of [0, 3], but got ", pse_type); + std::string input_layout_str = std::string(input_layout); + if (input_layout_str == "TND") { + TORCH_CHECK((sparse_mode >= static_cast(SparseMode::NO_MASK) && + sparse_mode < static_cast(SparseMode::PREFIX)) || + (sparse_mode > static_cast(SparseMode::PREFIX) && + sparse_mode <= static_cast(SparseMode::BAND_LEFT_UP_CAUSAL)), + "The sparse_mode value must be in range of [0,5) or (5,8], but got ", + sparse_mode); + } else { + TORCH_CHECK(sparse_mode >= static_cast(SparseMode::NO_MASK) && + sparse_mode <= static_cast(SparseMode::PREFIX_COMPRESS), + "The sparse_mode value must be in range of [0,6], but got ", + sparse_mode); + } + for (auto &c : input_layout_str) { + c = toupper(c); + } + TORCH_CHECK(input_layout_str == "BSH" || input_layout_str == "SBH" || input_layout_str == "BNSD" || + input_layout_str == "BSND" || input_layout_str == "TND", + "The input_layout should be BSH/SBH/BNSD/BSND/TND(case-insensitive), but got ", input_layout); + + int64_t B = 0; + int64_t S0 = 0; // S for query + int64_t S1 = 0; // S for key & value + int64_t N = 0; + int64_t D = 0; + int64_t H = 0; + int64_t T = 0; + int64_t D2 = 0; // D2 for value head-dim + c10::SmallVector atten_score_shape; + + if (input_layout_str == "BSH") { + B = query.size(0); + S0 = query.size(1); + S1 = key.size(1); + H = query.size(2); + D = H / head_num; + D2 = (!D || !key.size(2)) ? 0 : value.size(2) / (key.size(2) / D); + atten_score_shape = {B, S0, head_num * D2}; + } else if (input_layout_str == "SBH") { + B = query.size(1); + S0 = query.size(0); + S1 = key.size(0); + H = query.size(2); + D = H / head_num; + D2 = (!D || !key.size(2)) ? 0 : value.size(2) / (key.size(2) / D); + atten_score_shape = {S0, B, head_num * D2}; + } else if (input_layout_str == "BNSD") { + B = query.size(0); + N = query.size(1); + S0 = query.size(2); + S1 = key.size(2); + D = query.size(3); + D2 = value.size(3); + atten_score_shape = {B, N, S0, D2}; + } else if (input_layout_str == "BSND") { + B = query.size(0); + N = query.size(2); + S0 = query.size(1); + S1 = key.size(1); + D = query.size(3); + D2 = value.size(3); + atten_score_shape = {B, S0, N, D2}; + } else if (input_layout_str == "TND") { + T = query.size(0); + N = query.size(1); + D = query.size(2); + D2 = value.size(2); + atten_score_shape = {T, N, D2}; + } + + double scale_value = scale; + + at::Tensor format_query = format_trans(query); + at::Tensor attention_score = at::empty(atten_score_shape, query.options()); + at::Tensor format_key = format_trans(key); + at::Tensor format_value = format_trans(value); + + at::Tensor format_pse = format_trans(pse); + at::Tensor format_padding_mask = format_trans(padding_mask); + at::Tensor format_atten_mask = format_trans(atten_mask); + + int64_t seed; + int64_t offset; + int64_t numels; + //check + for(size_t i = 0; i < ac_seq_qlen_tmp.size(); i++){ + TORCH_CHECK(ac_seq_qlen_tmp[i] <= 1000000 && ac_seq_kvlen_tmp[i] <= 1000000, "The sequence length should not greater than 1M, but got q", ac_seq_qlen_tmp[i],"kv", ac_seq_kvlen_tmp[i]); + } + + if (input_layout_str == "TND" && ac_seq_qlen_tmp.size() == ac_seq_kvlen_tmp.size()) { + numels = N; + int64_t accum = ac_seq_qlen_tmp[0] * ac_seq_kvlen_tmp[0]; + for (size_t i = 1; i < ac_seq_qlen_tmp.size(); i++) { + accum += ((ac_seq_qlen_tmp[i] - ac_seq_qlen_tmp[i - 1]) * (ac_seq_kvlen_tmp[i] - ac_seq_kvlen_tmp[i - 1])); + } + numels *= accum; + } + + at::Tensor format_drop_mask = dropout_gen_mask(format_query, format_key, keep_prob, head_num, input_layout_str, + gen_mask_parallel, sync, seed, offset, numels); + + at::Tensor softmax_max; + at::Tensor softmax_sum; + at::Tensor softmax_out; + + if (input_layout_str != "TND") { + softmax_max = at::empty({B, head_num, S0, SOFTMAXMAX_LAST_DIMSHAPE}, query.options().dtype(at::kFloat)); // [B, N, S0, 8] + softmax_sum = at::empty({B, head_num, S0, SOFTMAXMAX_LAST_DIMSHAPE}, query.options().dtype(at::kFloat)); // [B, N, S0, 8] + } else { + softmax_max = at::empty({T, N, SOFTMAXMAX_LAST_DIMSHAPE}, query.options().dtype(at::kFloat)); // [T, N, 8] + softmax_sum = at::empty({T, N, SOFTMAXMAX_LAST_DIMSHAPE}, query.options().dtype(at::kFloat)); // [T, N, 8] + } + softmax_out = at::empty({0}, query.options()); + + char* input_layout_ptr = const_cast(input_layout_str.c_str()); + if (!ac_seq_qlen_tmp.empty() && !ac_seq_kvlen_tmp.empty()) { + ACLNN_CMD( + aclnnFlashAttentionVarLenScoreV2, format_query, format_key, format_value, + format_pse, format_drop_mask, format_padding_mask, format_atten_mask, prefixN, + ac_seq_qlen, ac_seq_kvlen, q_start_idx_val, kv_start_idx_val, scale, keep_prob, pre_tokens, next_tokens, head_num, + input_layout_ptr, inner_precise, sparse_mode, pse_type, softmax_max, softmax_sum, + softmax_out, attention_score); + } else { + ACLNN_CMD( + aclnnFlashAttentionScoreV2, format_query, format_key, format_value, + format_pse, format_drop_mask, format_padding_mask, format_atten_mask, prefixN, q_start_idx_val, kv_start_idx_val, + scale, keep_prob, pre_tokens, next_tokens, head_num, input_layout_ptr, inner_precise, + sparse_mode, pse_type, softmax_max, softmax_sum, softmax_out, attention_score); + } + if (!sync && get_dropout_status(keep_prob) != DropOutStatus::DROPOUT_NONE) { + c10::Device device = format_drop_mask.device(); + c10::impl::VirtualGuardImpl impl(device.type()); + impl.recordDataPtrOnStream(format_drop_mask.storage().data_ptr(), c10_npu::getCurrentNPUStream()); + } + #ifdef FLOP_COUNT + FLOP_COUNT(FlopCounter::flash_attention_forward_flop, query, key, value, head_num, input_layout, actual_seq_qlen, actual_seq_kvlen); + #endif + return std::make_tuple(attention_score, softmax_max, softmax_sum, softmax_out, + seed, offset, numels); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("npu_fusion_attention_v2", &npu_fusion_attention_v2, "fusion attention forward v2"); + m.def("npu_fusion_attention_grad_v2", &npu_fusion_attention_grad_v2, "fusion attention backward v2"); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/gmm.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/gmm.cpp new file mode 100644 index 000000000..fc3a63f3f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/gmm.cpp @@ -0,0 +1,309 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include +#include + +#include "../flop_counter/flop_counter.h" +#include "inc/aclnn_common.h" + +using npu_preparation = at_npu::native::OpPreparation; + +namespace op_infer { +constexpr int SIZE = 8; + +c10::SmallVector array_to_small_vector(c10::IntArrayRef shape) +{ + c10::SmallVector small_shape; + for (size_t i = 0; i < shape.size(); ++i) { + small_shape.emplace_back(shape[i]); + } + return small_shape; +} +} + +void _check_dims(size_t num_x, const at::TensorList &weight, size_t num_group_list) +{ + size_t num_w = weight.size(); + TORCH_CHECK(num_x > 0 && num_w > 0, + "Neither x nor weight could be empty."); + size_t dim_num_w = weight[0].sizes().size(); + size_t dim_0_w = weight[0].sizes()[0]; +} + +void _create_new_tensor(std::vector &y, int64_t dim_m, int64_t dim_n, c10::TensorOptions options, + int64_t group_type_value, int64_t num_group_list) +{ + auto output_size = (2 == group_type_value) ? op_infer::array_to_small_vector({num_group_list, dim_m, dim_n}) + : op_infer::array_to_small_vector({dim_m, dim_n}); + y.emplace_back(at::empty(output_size, options)); +} + +void _foreach_transpose(const at::TensorList &tensorList, std::vector &tensors) +{ + for (int i = 0; i< tensorList.size(); i++) { + at::Tensor tensor = tensorList[i].transpose(-1, -2); + tensors.emplace_back(tensor); + } +} + +bool _is_transposed(at::Tensor &tensors) +{ + int dim_sum = tensors.dim(); + TORCH_CHECK(dim_sum >= 2 && dim_sum <= 3, // 2/3: gmm weight only support 2- or 3-dimensional + "input tensor of is_tensor_transposed should be either 2- or 3-dimensional."); + int shape_dim = tensors.sizes().size() - 2; + if (tensors.stride(dim_sum - 2) == 1 && tensors.stride(dim_sum - 1) == tensors.sizes().at(shape_dim)) { + return true; + } else { + return false; + } +} + +std::vector npu_gmm(const std::vector& x, + const std::vector& weight, + const std::vector& bias, + c10::optional> group_list, + c10::optional group_type, + c10::optional group_list_type) +{ + auto num_x = x.size(); + auto num_w = weight.size(); + auto group_list_real_ = group_list.value_or(std::vector{}); + at::IntArrayRef group_list_real(group_list_real_); + auto num_group_list = group_list_real.size(); + int64_t split_item_value = 3; + int64_t group_type_value = group_type.value_or(-1); + + const at::TensorList x_(x); + const at::TensorList weight_(weight); + const at::TensorList bias_(bias); + + _check_dims(num_x, weight_, num_group_list); + + std::vector y; + c10::TensorOptions options = x_[0].options().dtype(x_[0].scalar_type()); + + size_t dim_num_w = weight[0].sizes().size(); + _create_new_tensor(y, x[0].sizes()[0], weight[0].sizes()[dim_num_w - 1], options, group_type_value, + num_group_list); + + at::TensorList result = at::TensorList(y); + auto scale_real = at::TensorList(); + auto offset_real = at::TensorList(); + auto antiquant_scale_real = at::TensorList(); + auto antiquant_offset_real = at::TensorList(); + ACLNN_CMD(aclnnGroupedMatmulV2, x_, weight_, bias_, scale_real, offset_real, antiquant_scale_real, + antiquant_offset_real, group_list_real, split_item_value, group_type_value, result); + #ifdef FLOP_COUNT + FLOP_COUNT(FlopCounter::gmm_flop_int, x_, weight_, group_list, group_type_value); + #endif + return y; +} + +std::vector npu_gmm(const std::vector& x, + const std::vector& weight, + const std::vector& bias, + const c10::optional& group_list, + c10::optional group_type, + c10::optional group_list_type) +{ + auto num_x = x.size(); + auto num_w = weight.size(); + auto group_list_real = group_list.value_or(at::Tensor()); + auto num_group_list = group_list_real.sizes()[0]; + int64_t split_item_value = 3; + int64_t group_type_value = group_type.value_or(-1); + int64_t group_list_type_value = group_list_type.value_or(0); + int64_t act_type_value = 0; + + const at::TensorList x_(x); + const at::TensorList weight_(weight); + const at::TensorList bias_(bias); + + _check_dims(num_x, weight_, num_group_list); + + std::vector y; + c10::TensorOptions options = x_[0].options().dtype(x_[0].scalar_type()); + + size_t dim_num_w = weight[0].sizes().size(); + _create_new_tensor(y, x[0].sizes()[0], weight[0].sizes()[dim_num_w - 1], options, group_type_value, + num_group_list); + + at::TensorList result = at::TensorList(y); + auto scale_real = at::TensorList(); + auto offset_real = at::TensorList(); + auto antiquant_scale_real = at::TensorList(); + auto antiquant_offset_real = at::TensorList(); + auto perToken_scale_real = at::TensorList(); + auto activation_input_real = at::TensorList(); + auto activation_quant_scale_real = at::TensorList(); + auto activation_quant_offset_real = at::TensorList(); + auto activation_feature_out_real = at::TensorList(); + auto dynQuant_scale_out_real = at::TensorList(); + + ACLNN_CMD(aclnnGroupedMatmulV4, x_, weight_, bias_, scale_real, offset_real, antiquant_scale_real, + antiquant_offset_real, perToken_scale_real, group_list_real, activation_input_real, + activation_quant_scale_real, activation_quant_offset_real, split_item_value, group_type_value, + group_list_type_value, act_type_value, result, activation_feature_out_real, dynQuant_scale_out_real); + #ifdef FLOP_COUNT + FLOP_COUNT(FlopCounter::gmm_flop_tensor, x_, weight_, group_list, group_type_value); + #endif + return y; +} + +std::tuple, std::vector, std::vector> npu_gmm_backward( + const std::vector& grad, + const std::vector& x, + const std::vector& weight, + const c10::optional> group_list, + c10::optional group_list_type) +{ + auto num_w = weight.size(); + auto group_list_real = group_list.value_or(std::vector{}); + + const at::TensorList x_(x); + const at::TensorList weight_(weight); + + std::vector xt; + std::vector wt; + + _foreach_transpose(x_, xt); + _foreach_transpose(weight_, wt); + + std::vector bias_real; + + std::vector dx = npu_gmm(grad, wt, bias_real, group_list_real, 0, group_list_type); + std::vector dw = npu_gmm(xt, grad, bias_real, group_list_real, 2, group_list_type); + std::vector dbias; + + std::vector dw_output; + for (int i = 0; i < num_w; i++) { + at::Tensor dw_tensor = dw[i].reshape(weight[i].sizes()); + dw_output.emplace_back(dw_tensor); + } + + return std::make_tuple(dx, dw_output, dbias); +} + +std::tuple, std::vector, std::vector> npu_gmm_backward( + const std::vector& grad, + const std::vector& x, + const std::vector& weight, + const c10::optional& group_list, + c10::optional group_list_type) +{ + auto num_w = weight.size(); + auto group_list_real = group_list.value_or(at::Tensor()); + + std::vector bias_real; + at::TensorList weight_(weight); + std::vector wt; + _foreach_transpose(weight_, wt); + std::vector dx = npu_gmm(grad, wt, bias_real, group_list_real, 0, group_list_type); + + at::Tensor weight_tensor = weight.at(0); + bool is_weight_transposed = _is_transposed(weight_tensor); + + std::vector dw; + if (is_weight_transposed == true) { + at::Tensor grad_tensor = grad.at(0).contiguous(); + at::TensorList grad_(grad_tensor); + std::vector gradt; + _foreach_transpose(grad_, gradt); + std::vector dwt = npu_gmm(gradt, x, bias_real, group_list_real, 2, group_list_type); + at::TensorList dwt_(dwt); + _foreach_transpose(dwt_, dw); + } else { + at::TensorList x_(x); + std::vector xt; + _foreach_transpose(x_, xt); + dw = npu_gmm(xt, grad, bias_real, group_list_real, 2, group_list_type); + } + + std::vector dbias; + std::vector dw_output; + for (int i = 0; i < num_w; i++) { + at::Tensor dw_tensor = dw[i].reshape(weight[i].sizes()); + dw_output.emplace_back(dw_tensor); + } + + return std::make_tuple(dx, dw_output, dbias); +} + +std::tuple, std::vector, std::vector> npu_gmm_backward_fusion( + const std::vector& grad, + const std::vector& weight, + const c10::optional> group_list, + c10::optional group_list_type) +{ + auto num_w = weight.size(); + auto group_list_real = group_list.value_or(std::vector{}); + + const at::TensorList weight_(weight); + + std::vector wt; + + _foreach_transpose(weight_, wt); + + std::vector bias_real; + + std::vector dx = npu_gmm(grad, wt, bias_real, group_list_real, 0, group_list_type); + + std::vector dbias; + + std::vector dw_output; + + return std::make_tuple(dx, dw_output, dbias); +} + +std::tuple, std::vector, std::vector> npu_gmm_backward_fusion( + const std::vector& grad, + const std::vector& weight, + const c10::optional& group_list, + c10::optional group_list_type) +{ + auto num_w = weight.size(); + auto group_list_real = group_list.value_or(at::Tensor()); + + std::vector bias_real; + at::TensorList weight_(weight); + std::vector wt; + _foreach_transpose(weight_, wt); + std::vector dx = npu_gmm(grad, wt, bias_real, group_list_real, 0, group_list_type); + + std::vector dbias; + std::vector dw_output; + + return std::make_tuple(dx, dw_output, dbias); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + using gmmv1 = std::vector(*)(const std::vector&, const std::vector&, const std::vector&, c10::optional>, c10::optional, c10::optional); + using gmmv2 = std::vector(*)(const std::vector&, const std::vector&, const std::vector&, const c10::optional&, c10::optional, c10::optional); + using gmmv1_backward = std::tuple, std::vector, std::vector>(*)(const std::vector&, const std::vector&, const std::vector&, const c10::optional>, c10::optional); + using gmmv2_backward = std::tuple, std::vector, std::vector>(*)(const std::vector&, const std::vector&, const std::vector&, const c10::optional&, c10::optional); + using gmmv1_backward_fusion = std::tuple, std::vector, std::vector>(*)(const std::vector&, const std::vector&, const c10::optional>, c10::optional); + using gmmv2_backward_fusion = std::tuple, std::vector, std::vector>(*)(const std::vector&, const std::vector&, const c10::optional&, c10::optional); + + m.def("npu_gmm", (gmmv1)&npu_gmm, "grouped matmul forward with group_list type List[int]"); + m.def("npu_gmm_backward", (gmmv1_backward)&npu_gmm_backward, "grouped matmul backward with group_list type List[int]"); + m.def("npu_gmm_backward_fusion", (gmmv1_backward_fusion)&npu_gmm_backward_fusion, "grouped matmul backward with group_list type List[int]"); + m.def("npu_gmm", (gmmv2)&npu_gmm, "grouped matmul forward with group_list type Tensor"); + m.def("npu_gmm_backward", (gmmv2_backward)&npu_gmm_backward, "grouped matmul backward with group_list type Tensor"); + m.def("npu_gmm_backward_fusion", (gmmv2_backward_fusion)&npu_gmm_backward_fusion, "grouped matmul backward with group_list type Tensor"); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/inc/aclnn_common.h b/model/train/yoco_moe/mindspeed/ops/csrc/cann/inc/aclnn_common.h new file mode 100644 index 000000000..bf70ff763 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/inc/aclnn_common.h @@ -0,0 +1,646 @@ +/****************************************************************************** + * Copyright (c) 2022 Huawei Technologies Co., Ltd + * All rights reserved. + * + * Licensed under the BSD 3-Clause License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSD-3-Clause + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef MINDSPEED_OPS_CSRC_CANN_INC_ACL_COMMON_H +#define MINDSPEED_OPS_CSRC_CANN_INC_ACL_COMMON_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "torch_npu/csrc/core/npu/NPUStream.h" +#include "torch_npu/csrc/framework/OpCommand.h" +#include "torch_npu/csrc/framework/interface/EnvVariables.h" +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" +#include "torch_npu/csrc/core/npu/DeviceUtils.h" +#if __has_include("torch_npu/csrc/flopcount/FlopCount.h") + #include "torch_npu/csrc/flopcount/FlopCount.h" +#endif +#define NPU_NAME_SPACE at_npu::native + +using aclOpExecutor = struct aclOpExecutor; +using aclTensor = struct aclTensor; +using aclScalar = struct aclScalar; +using aclIntArray = struct aclIntArray; +using aclFloatArray = struct aclFloatArray; +using aclBoolArray = struct aclBoolArray; +using aclTensorList = struct aclTensorList; + +using _aclCreateTensor = aclTensor *(*)(const int64_t *view_dims, uint64_t view_dims_num, aclDataType data_type, + const int64_t *stride, int64_t offset, aclFormat format, const int64_t *storage_dims, uint64_t storage_dims_num, + void *tensor_data); +using _aclCreateScalar = aclScalar *(*)(void *value, aclDataType data_type); +using _aclCreateIntArray = aclIntArray *(*)(const int64_t *value, uint64_t size); +using _aclCreateFloatArray = aclFloatArray *(*)(const float *value, uint64_t size); +using _aclCreateBoolArray = aclBoolArray *(*)(const bool *value, uint64_t size); +using _aclCreateTensorList = aclTensorList *(*)(const aclTensor *const *value, uint64_t size); + +using _aclDestroyTensor = int (*)(const aclTensor *tensor); +using _aclDestroyScalar = int (*)(const aclScalar *scalar); +using _aclDestroyIntArray = int (*)(const aclIntArray *array); +using _aclDestroyFloatArray = int (*)(const aclFloatArray *array); +using _aclDestroyBoolArray = int (*)(const aclBoolArray *array); +using _aclDestroyTensorList = int (*)(const aclTensorList *array); + +constexpr int kHashBufSize = 8192; +constexpr int kHashBufMaxSize = kHashBufSize + 1024; +extern thread_local char g_hashBuf[kHashBufSize]; +extern thread_local int g_hashOffset; + +#define AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(_) \ + _(at::ScalarType::Byte, ACL_UINT8) \ + _(at::ScalarType::Char, ACL_INT8) \ + _(at::ScalarType::Short, ACL_INT16) \ + _(at::ScalarType::Int, ACL_INT32) \ + _(at::ScalarType::Long, ACL_INT64) \ + _(at::ScalarType::Half, ACL_FLOAT16) \ + _(at::ScalarType::Float, ACL_FLOAT) \ + _(at::ScalarType::Double, ACL_DOUBLE) \ + _(at::ScalarType::ComplexHalf, ACL_DT_UNDEFINED) \ + _(at::ScalarType::ComplexFloat, ACL_COMPLEX64) \ + _(at::ScalarType::ComplexDouble, ACL_COMPLEX128) \ + _(at::ScalarType::Bool, ACL_BOOL) \ + _(at::ScalarType::QInt8, ACL_DT_UNDEFINED) \ + _(at::ScalarType::QUInt8, ACL_DT_UNDEFINED) \ + _(at::ScalarType::QInt32, ACL_DT_UNDEFINED) \ + _(at::ScalarType::BFloat16, ACL_BF16) \ + _(at::ScalarType::QUInt4x2, ACL_DT_UNDEFINED) \ + _(at::ScalarType::QUInt2x4, ACL_DT_UNDEFINED) \ + _(at::ScalarType::Undefined, ACL_DT_UNDEFINED) \ + _(at::ScalarType::NumOptions, ACL_DT_UNDEFINED) + +constexpr aclDataType kATenScalarTypeToAclDataTypeTable[static_cast(at::ScalarType::NumOptions) + 1] = { +#define DEFINE_ENUM(_1, n) n, + AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(DEFINE_ENUM) +#undef DEFINE_ENUM +}; + +#define GET_OP_API_FUNC(apiName) reinterpret_cast<_##apiName>(GetOpApiFuncAddr(#apiName)) + +#define MEMCPY_TO_BUF(data_expression, size_expression) \ + if (g_hashOffset + (size_expression) > kHashBufSize) { \ + g_hashOffset = kHashBufMaxSize; \ + return; \ + } \ + memcpy_s(g_hashBuf + g_hashOffset, size_expression, data_expression, size_expression); \ + g_hashOffset += size_expression; + +inline const char *GetOpApiLibName(void) +{ + return "libopapi.so"; +} + +inline const char *GetCustOpApiLibName(void) +{ + return "libcust_opapi.so"; +} + +inline void *GetOpApiFuncAddrInLib(void *handler, const char *libName, const char *apiName) +{ + auto funcAddr = dlsym(handler, apiName); + if (funcAddr == nullptr) { + ASCEND_LOGW("dlsym %s from %s failed, error:%s.", apiName, libName, dlerror()); + } + return funcAddr; +} + +inline void *GetOpApiLibHandler(const char *libName) +{ + auto handler = dlopen(libName, RTLD_LAZY); + if (handler == nullptr) { + ASCEND_LOGW("dlopen %s failed, error:%s.", libName, dlerror()); + } + return handler; +} + +inline void *GetOpApiFuncAddr(const char *apiName) +{ + static auto custOpApiHandler = GetOpApiLibHandler(GetCustOpApiLibName()); + if (custOpApiHandler != nullptr) { + auto funcAddr = GetOpApiFuncAddrInLib(custOpApiHandler, GetCustOpApiLibName(), apiName); + if (funcAddr != nullptr) { + return funcAddr; + } + } + + static auto opApiHandler = GetOpApiLibHandler(GetOpApiLibName()); + if (opApiHandler == nullptr) { + return nullptr; + } + return GetOpApiFuncAddrInLib(opApiHandler, GetOpApiLibName(), apiName); +} + +inline c10::Scalar ConvertTensorToScalar(const at::Tensor &tensor) +{ + c10::Scalar expScalar; + const at::Tensor *aclInput = &tensor; + if (aclInput->scalar_type() == at::ScalarType::Double) { + double value = *(double *)aclInput->data_ptr(); + c10::Scalar scalar(value); + expScalar = scalar; + } else if (aclInput->scalar_type() == at::ScalarType::Long) { + int64_t value = *(int64_t *)aclInput->data_ptr(); + c10::Scalar scalar(value); + expScalar = scalar; + } else if (aclInput->scalar_type() == at::ScalarType::Float) { + float value = *(float *)aclInput->data_ptr(); + c10::Scalar scalar(value); + expScalar = scalar; + } else if (aclInput->scalar_type() == at::ScalarType::Int) { + int value = *(int *)aclInput->data_ptr(); + c10::Scalar scalar(value); + expScalar = scalar; + } else if (aclInput->scalar_type() == at::ScalarType::Half) { + c10::Half value = *(c10::Half *)aclInput->data_ptr(); + c10::Scalar scalar(value); + expScalar = scalar; + } else if (aclInput->scalar_type() == at::ScalarType::Bool) { + int8_t value = *(int8_t *)aclInput->data_ptr(); + c10::Scalar scalar(value); + expScalar = scalar; + } else if (aclInput->scalar_type() == at::ScalarType::ComplexDouble) { + c10::complex value = *(c10::complex *)aclInput->data_ptr(); + c10::Scalar scalar(value); + expScalar = scalar; + } else if (aclInput->scalar_type() == at::ScalarType::ComplexFloat) { + c10::complex value = *(c10::complex *)aclInput->data_ptr(); + c10::Scalar scalar(value); + expScalar = scalar; + } else if (aclInput->scalar_type() == at::ScalarType::BFloat16) { + c10::BFloat16 value = *(c10::BFloat16 *)aclInput->data_ptr(); + c10::Scalar scalar(value); + expScalar = scalar; + } else { + ASCEND_LOGE("unsupported scalar type! "); + } + return expScalar; +} + +inline at::Tensor CopyTensorHostToDevice(const at::Tensor &cpu_tensor) +{ + at::Tensor cpuPinMemTensor = cpu_tensor.pin_memory(); + int deviceIndex = 0; + return cpuPinMemTensor.to( + c10::Device(torch_npu::utils::get_npu_device_type(), deviceIndex), cpuPinMemTensor.scalar_type(), true, true); +} + +inline at::Tensor CopyScalarToDevice(const c10::Scalar &cpu_scalar, at::ScalarType scalar_data_type) +{ + return CopyTensorHostToDevice(scalar_to_tensor(cpu_scalar).to(scalar_data_type)); +} + +inline aclTensor *ConvertType(const at::Tensor &at_tensor) +{ + static const auto aclCreateTensor = GET_OP_API_FUNC(aclCreateTensor); + if (aclCreateTensor == nullptr) { + return nullptr; + } + + if (!at_tensor.defined()) { + return nullptr; + } + at::ScalarType scalar_data_type = at_tensor.scalar_type(); + aclDataType acl_data_type = kATenScalarTypeToAclDataTypeTable[static_cast(scalar_data_type)]; + TORCH_CHECK( + acl_data_type != ACL_DT_UNDEFINED, std::string(c10::toString(scalar_data_type)) + " has not been supported") + c10::SmallVector storageDims; + // if acl_data_type is ACL_STRING, storageDims is empty. + auto itemsize = at_tensor.itemsize(); + if (itemsize == 0) { + AT_ERROR("When ConvertType, tensor item size of cannot be zero."); + return nullptr; + } + if (acl_data_type != ACL_STRING) { + storageDims.push_back(at_tensor.storage().nbytes() / itemsize); + } + + const auto dimNum = at_tensor.sizes().size(); + aclFormat format = ACL_FORMAT_ND; + switch (dimNum) { + case 3: + format = ACL_FORMAT_NCL; + break; + case 4: + format = ACL_FORMAT_NCHW; + break; + case 5: + format = ACL_FORMAT_NCDHW; + break; + default: + format = ACL_FORMAT_ND; + } + + if (at_tensor.unsafeGetTensorImpl()->is_wrapped_number()) { + c10::Scalar expScalar = ConvertTensorToScalar(at_tensor); + at::Tensor aclInput = CopyScalarToDevice(expScalar, scalar_data_type); + return aclCreateTensor(aclInput.sizes().data(), + aclInput.sizes().size(), + acl_data_type, + aclInput.strides().data(), + aclInput.storage_offset(), + format, + storageDims.data(), + storageDims.size(), + const_cast(aclInput.storage().data())); + } + + auto acl_tensor = aclCreateTensor(at_tensor.sizes().data(), + at_tensor.sizes().size(), + acl_data_type, + at_tensor.strides().data(), + at_tensor.storage_offset(), + format, + storageDims.data(), + storageDims.size(), + const_cast(at_tensor.storage().data())); + return acl_tensor; +} + +inline aclScalar *ConvertType(const at::Scalar &at_scalar) +{ + static const auto aclCreateScalar = GET_OP_API_FUNC(aclCreateScalar); + if (aclCreateScalar == nullptr) { + return nullptr; + } + + at::ScalarType scalar_data_type = at_scalar.type(); + aclDataType acl_data_type = kATenScalarTypeToAclDataTypeTable[static_cast(scalar_data_type)]; + TORCH_CHECK( + acl_data_type != ACL_DT_UNDEFINED, std::string(c10::toString(scalar_data_type)) + " has not been supported") + aclScalar *acl_scalar = nullptr; + switch (scalar_data_type) { + case at::ScalarType::Double: { + double value = at_scalar.toDouble(); + acl_scalar = aclCreateScalar(&value, acl_data_type); + break; + } + case at::ScalarType::Long: { + int64_t value = at_scalar.toLong(); + acl_scalar = aclCreateScalar(&value, acl_data_type); + break; + } + case at::ScalarType::Bool: { + bool value = at_scalar.toBool(); + acl_scalar = aclCreateScalar(&value, acl_data_type); + break; + } + case at::ScalarType::ComplexDouble: { + auto value = at_scalar.toComplexDouble(); + acl_scalar = aclCreateScalar(&value, acl_data_type); + break; + } + default: + acl_scalar = nullptr; + break; + } + return acl_scalar; +} + +inline aclIntArray *ConvertType(const at::IntArrayRef &at_array) +{ + static const auto aclCreateIntArray = GET_OP_API_FUNC(aclCreateIntArray); + if (aclCreateIntArray == nullptr) { + return nullptr; + } + auto array = aclCreateIntArray(at_array.data(), at_array.size()); + return array; +} + +template +inline aclBoolArray *ConvertType(const std::array &value) +{ + static const auto aclCreateBoolArray = GET_OP_API_FUNC(aclCreateBoolArray); + if (aclCreateBoolArray == nullptr) { + return nullptr; + } + + auto array = aclCreateBoolArray(value.data(), value.size()); + return array; +} + +inline aclBoolArray *ConvertType(const at::ArrayRef &value) +{ + static const auto aclCreateBoolArray = GET_OP_API_FUNC(aclCreateBoolArray); + if (aclCreateBoolArray == nullptr) { + return nullptr; + } + + auto array = aclCreateBoolArray(value.data(), value.size()); + return array; +} + +inline aclTensorList *ConvertType(const at::TensorList &at_tensor_list) +{ + static const auto aclCreateTensorList = GET_OP_API_FUNC(aclCreateTensorList); + if (aclCreateTensorList == nullptr) { + return nullptr; + } + + std::vector tensor_list(at_tensor_list.size()); + for (size_t i = 0; i < at_tensor_list.size(); i++) { + tensor_list[i] = ConvertType(at_tensor_list[i]); + } + auto acl_tensor_list = aclCreateTensorList(tensor_list.data(), tensor_list.size()); + return acl_tensor_list; +} + +inline aclTensor *ConvertType(const c10::optional &opt_tensor) +{ + if (opt_tensor.has_value() && opt_tensor.value().defined()) { + return ConvertType(opt_tensor.value()); + } + return nullptr; +} + +inline aclIntArray *ConvertType(const c10::optional &opt_array) +{ + if (opt_array.has_value()) { + return ConvertType(opt_array.value()); + } + return nullptr; +} + +inline aclScalar *ConvertType(const c10::optional &opt_scalar) +{ + if (opt_scalar.has_value()) { + return ConvertType(opt_scalar.value()); + } + return nullptr; +} + +inline aclDataType ConvertType(const at::ScalarType scalarType) +{ + return kATenScalarTypeToAclDataTypeTable[static_cast(scalarType)]; +} + +template +T ConvertType(T value) +{ + return value; +} + +template +auto ConvertToOpApiFunc(const Tuple ¶ms, void *opApiAddr, std::index_sequence) +{ + using OpApiFunc = int (*)(typename std::decay(params))>::type...); + auto func = reinterpret_cast(opApiAddr); + return func; +} + +template +auto ConvertToOpApiFunc(const Tuple ¶ms, void *opApiAddr) +{ + static constexpr auto size = std::tuple_size::value; + return ConvertToOpApiFunc(params, opApiAddr, std::make_index_sequence{}); +} + +inline void Release(aclTensor *p) +{ + static const auto aclDestroyTensor = GET_OP_API_FUNC(aclDestroyTensor); + if (aclDestroyTensor == nullptr) { + return; + } + aclDestroyTensor(p); +} + +inline void Release(aclScalar *p) +{ + static const auto aclDestroyScalar = GET_OP_API_FUNC(aclDestroyScalar); + if (aclDestroyScalar == nullptr) { + return; + } + aclDestroyScalar(p); +} + +inline void Release(aclIntArray *p) +{ + static const auto aclDestroyIntArray = GET_OP_API_FUNC(aclDestroyIntArray); + if (aclDestroyIntArray == nullptr) { + return; + } + + aclDestroyIntArray(p); +} + +inline void Release(aclBoolArray *p) +{ + static const auto aclDestroyBoolArray = GET_OP_API_FUNC(aclDestroyBoolArray); + if (aclDestroyBoolArray == nullptr) { + return; + } + + aclDestroyBoolArray(p); +} + +inline void Release(aclTensorList *p) +{ + static const auto aclDestroyTensorList = GET_OP_API_FUNC(aclDestroyTensorList); + if (aclDestroyTensorList == nullptr) { + return; + } + + aclDestroyTensorList(p); +} + +template +void Release(T value) +{ + (void)value; +} + +template +void CallRelease(Tuple t, std::index_sequence) +{ + (void)std::initializer_list{(Release(std::get(t)), 0)...}; +} + +template +void ReleaseConvertTypes(Tuple &t) +{ + static constexpr auto size = std::tuple_size::value; + CallRelease(t, std::make_index_sequence{}); +} + +template +constexpr auto ConvertTypes(Ts &...args) +{ + return std::make_tuple(ConvertType(args)...); +} + +template +auto call(Function f, Tuple t, std::index_sequence) +{ + return f(std::get(t)...); +} + +template +auto call(Function f, Tuple t) +{ + static constexpr auto size = std::tuple_size::value; + return call(f, t, std::make_index_sequence{}); +} + +template +void AddParamToBuf(const std::array &value) +{ + MEMCPY_TO_BUF(value.data(), value.size() * sizeof(bool)); +} + +template +void AddParamToBuf(const T &value) +{ + MEMCPY_TO_BUF(&value, sizeof(T)); +} + +void AddParamToBuf(const at::Tensor &); +void AddParamToBuf(const at::Scalar &); +void AddParamToBuf(const at::IntArrayRef &); +void AddParamToBuf(const at::ArrayRef &); +void AddParamToBuf(const at::TensorList &); +void AddParamToBuf(const c10::optional &); +void AddParamToBuf(const c10::optional &); +void AddParamToBuf(const c10::optional &); +void AddParamToBuf(const at::ScalarType); +void AddParamToBuf(const string &); +void AddParamToBuf(); + +template +void AddParamToBuf(const T &arg, Args &...args) +{ + AddParamToBuf(arg); + AddParamToBuf(args...); +} + +uint64_t CalcHashId(); +using InitHugeMemThreadLocal = int (*)(void *, bool); +using UnInitHugeMemThreadLocal = void (*)(void *, bool); +using ReleaseHugeMem = void (*)(void *, bool); + +/** + * check arg is at::Tensor ? + */ +template +struct is_at_tensor : std::false_type {}; + +template<> +struct is_at_tensor : std::true_type {}; + +/** + * check arg is at::TensorList ? + */ +template +struct is_at_tensor_list : std::false_type {}; + +template<> +struct is_at_tensor_list : std::true_type {}; + +/** + * find first at::Tensor + */ +template +typename std::enable_if::type GetFirstTensor(const std::tuple& t, at::Tensor& res) {} + +template +typename std::enable_if < I::type GetFirstTensor(const std::tuple &t, at::Tensor &res) +{ + if constexpr (is_at_tensor>::type>::value) { + res = std::get(t); + return; + } else if constexpr (is_at_tensor_list>::type>::value) { + res = std::get(t)[0]; + return; + } + return GetFirstTensor(t, res); +} + +/** + * get the device + */ +template +auto DecodeDevice(Ts&... args) -> at::Device +{ + auto tp = std::make_tuple(args...); + at::Tensor ft; + GetFirstTensor(tp, ft); + return ft.device(); +} + +#define ACLNN_CMD(aclnn_api, ...) \ + do { \ + auto device = DecodeDevice(__VA_ARGS__); \ + const c10::OptionalDeviceGuard device_guard(device); \ + static const auto getWorkspaceSizeFuncAddr = GetOpApiFuncAddr(#aclnn_api "GetWorkspaceSize"); \ + static const auto opApiFuncAddr = GetOpApiFuncAddr(#aclnn_api); \ + static const auto initMemAddr = GetOpApiFuncAddr("InitHugeMemThreadLocal"); \ + static const auto unInitMemAddr = GetOpApiFuncAddr("UnInitHugeMemThreadLocal"); \ + static const auto releaseMemAddr = GetOpApiFuncAddr("ReleaseHugeMem"); \ + TORCH_CHECK(getWorkspaceSizeFuncAddr != nullptr && opApiFuncAddr != nullptr, \ + #aclnn_api, \ + " or ", \ + #aclnn_api "GetWorkspaceSize", \ + " not in ", \ + GetOpApiLibName(), \ + ", or ", \ + GetOpApiLibName(), \ + "not found."); \ + auto acl_stream = c10_npu::getCurrentNPUStream().stream(false); \ + uint64_t workspace_size = 0; \ + uint64_t *workspace_size_addr = &workspace_size; \ + aclOpExecutor *executor = nullptr; \ + aclOpExecutor **executor_addr = &executor; \ + InitHugeMemThreadLocal initMemFunc = reinterpret_cast(initMemAddr); \ + UnInitHugeMemThreadLocal unInitMemFunc = reinterpret_cast(unInitMemAddr); \ + if (initMemFunc) { \ + initMemFunc(nullptr, false); \ + } \ + auto converted_params = ConvertTypes(__VA_ARGS__, workspace_size_addr, executor_addr); \ + static auto getWorkspaceSizeFunc = ConvertToOpApiFunc(converted_params, getWorkspaceSizeFuncAddr); \ + auto workspace_status = call(getWorkspaceSizeFunc, converted_params); \ + TORCH_CHECK(workspace_status == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ + void *workspace_addr = nullptr; \ + if (workspace_size != 0) { \ + at::TensorOptions options = at::TensorOptions(torch_npu::utils::get_npu_device_type()); \ + auto workspace_tensor = at::empty({workspace_size}, options.dtype(at::kByte)); \ + workspace_addr = const_cast(workspace_tensor.storage().data()); \ + } \ + auto acl_call = [converted_params, workspace_addr, workspace_size, acl_stream, executor]() -> int { \ + typedef int (*OpApiFunc)(void *, uint64_t, aclOpExecutor *, const aclrtStream); \ + OpApiFunc opApiFunc = reinterpret_cast(opApiFuncAddr); \ + auto api_ret = opApiFunc(workspace_addr, workspace_size, executor, acl_stream); \ + TORCH_CHECK(api_ret == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ + ReleaseConvertTypes(converted_params); \ + ReleaseHugeMem releaseMemFunc = reinterpret_cast(releaseMemAddr); \ + if (releaseMemFunc) { \ + releaseMemFunc(nullptr, false); \ + } \ + return api_ret; \ + }; \ + at_npu::native::OpCommand cmd; \ + cmd.Name(#aclnn_api); \ + cmd.SetCustomHandler(acl_call); \ + cmd.Run(); \ + if (unInitMemFunc) { \ + unInitMemFunc(nullptr, false); \ + } \ + } while (false) + +#endif // MINDSPEED_OPS_CSRC_CANN_INC_ACL_COMMON_H diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/inc/mc2_utils.h b/model/train/yoco_moe/mindspeed/ops/csrc/cann/inc/mc2_utils.h new file mode 100644 index 000000000..95f262215 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/inc/mc2_utils.h @@ -0,0 +1,73 @@ +/****************************************************************************** + * Copyright (c) 2024 Huawei Technologies Co., Ltd + * All rights reserved. + * + * Licensed under the BSD 3-Clause License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSD-3-Clause + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#include +#include "torch_npu/csrc/framework/utils/RandomOpAdapter.h" +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/core/npu/NPUFormat.h" +#include + +void check_npu_mm_all_reduce_add_rms_norm_params(const at::Tensor &x1, const at::Tensor &x2, + const at::Tensor &residual, + const at::Tensor &gamma, + const c10::optional &antiquant_scale, + const c10::optional &antiquant_offset, + const c10::optional &dequant_scale) +{ + // check shape: shape of x1:[b,s,k], shape of x2:[k,n], shape of residual:[b,s,n], shape of gamma:[n], + TORCH_CHECK(x1.dim() == 2 || x1.dim() == 3, "x1 needs to be 2D or 3D, but got: ", x1.dim(), "D"); + TORCH_CHECK(x2.dim() == 2, "x2 needs to be 2D, but got: ", x2.dim(), "D"); + TORCH_CHECK(residual.dim() == 3, "residual needs to be 3D, but got: ", residual.dim(), "D"); + TORCH_CHECK(gamma.dim() == 1, "gamma needs to be 1D, but got: ", gamma.dim(), "D"); + TORCH_CHECK(x1.size(x1.dim() - 1) == x2.size(0), "K of x1 and x2 should be same, but they are x1_k: ", + x1.size(x1.dim() - 1), ", x2_k: ", x2.size(0)); + size_t x1_bs = x1.size(0); + if (x1.dim() == 3) { + x1_bs *= x1.size(1); + } + TORCH_CHECK(x1_bs == (residual.size(0) * residual.size(1)), "(b*s) of x1 and residual should be same,", + "but they are x1_(b*s): ", x1_bs, ", residual_(b*s): ", (residual.size(0) * residual.size(1))); + TORCH_CHECK(x2.size(x2.dim() - 1) == residual.size(residual.dim() - 1), "n of x2 and residual should be same,", + "but they are x2_n: ", x2.size(x2.dim() - 1), ", residual_n: ", residual.size(residual.dim() - 1)); + TORCH_CHECK(residual.size(residual.dim() - 1) == gamma.size(0), "n of residual and gamma should be same,", + "but they are residual_n: ", residual.size(residual.dim() - 1), ", gamma_n: ", gamma.size(0)); + + // check parameters. + // aclnn apis for MC2 share one torch_npu api, therefore, each aclnn api only accepts parameters + // that will be used. Any unused parameter will be seen as illegal. The job must be done here in + // torch_npu api. + // A8W8: antiquantScale and antiquantOffset should be None. + if (isIntegralType(x1.scalar_type()) && isIntegralType(x2.scalar_type())) { + TORCH_CHECK(x1.scalar_type() == at::kChar, "x1 must be an int8 tensor for quant."); + TORCH_CHECK(x2.scalar_type() == at::kChar, "x2 must be an int8 tensor for quant."); + TORCH_CHECK((!antiquant_scale.has_value() && !antiquant_offset.has_value()), + "when both dtype of x1 and dtype of x2 are equal to int8, " + "antiquantScale, antiquantOffset should both be null"); + } + // A16W8: dequantScale should be None. + if (!isIntegralType(x1.scalar_type()) && isIntegralType(x2.scalar_type())) { + TORCH_CHECK(x2.scalar_type() == at::kChar, "x2 must be an int8 tensor for weight quant."); + TORCH_CHECK((!dequant_scale.has_value()), + "when only dtype of x2 is equal to int8, dequantScale should be null"); + } + // MC2 without quantization. antiquantScale and antiquantOffset and dequantScale should be None. + if (!isIntegralType(x1.scalar_type()) && !isIntegralType(x2.scalar_type())) { + TORCH_CHECK((!antiquant_scale.has_value() && !antiquant_offset.has_value() && !dequant_scale.has_value()), + "when neither dtype of x1 or dtype of x2 is equal to int8, " + "antiquantScale, antiquantOffset and dequantScale should all be null"); + } +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_all_to_all_all_gather_bmm.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_all_to_all_all_gather_bmm.cpp new file mode 100644 index 000000000..67483ab76 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_all_to_all_all_gather_bmm.cpp @@ -0,0 +1,97 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include "inc/aclnn_common.h" + +enum class X_SHARD_TYPE : int64_t { + ALLGATHER_IN_H = 0, + ALLGATHER_IN_C, +}; + +namespace op_infer { +constexpr int SIZE = 8; + +c10::SmallVector array_to_small_vector(c10::IntArrayRef shape) +{ + c10::SmallVector small_shape; + for (size_t i = 0; i < shape.size(); ++i) { + small_shape.emplace_back(shape[i]); + } + return small_shape; +} +} // namespace op_infer + +const static int DIMS = 3; +std::tuple npu_alltoall_allgather_bmm( + const at::Tensor &x, + const at::Tensor &weight, + const c10::optional &bias, + const std::string &group_ep, + int64_t group_ep_worldsize, + const std::string &group_tp, + int64_t group_tp_worldsize, + c10::optional shard_type, + c10::optional act_type, + c10::optional need_allgather_out, + c10::optional need_activation_feature) +{ + TORCH_CHECK(x.dim() == DIMS, + "The dims of input x should be 3 dimensional, but got ", x.dim(), "-dimensional."); + TORCH_CHECK(weight.dim() == DIMS, + "The dims of input weight should be 3 dimensional, but got ", weight.dim(), "-dimensional."); + const at::Tensor &bias_const = bias.value_or(at::Tensor()); + const int64_t shard_type_value = shard_type.value_or(0); + const X_SHARD_TYPE x_shard_type = static_cast(shard_type_value); + const int64_t act_type_value = act_type.value_or(0); + const bool need_allgather_out_value = need_allgather_out.value_or(false); + const bool need_activation_feature_value = need_activation_feature.value_or(false); + char *group_ep_ptr = const_cast(group_ep.c_str()); + char *group_tp_ptr = const_cast(group_tp.c_str()); + + c10::TensorOptions yoptions = x.options().dtype(x.scalar_type()); + auto batch = weight.size(0); + auto m = x.size(1) * group_ep_worldsize; + if (x_shard_type == X_SHARD_TYPE::ALLGATHER_IN_C) { + m *= group_tp_worldsize; + } + auto k = weight.size(1); + auto n = weight.size(2); + + auto y1_output_size = op_infer::array_to_small_vector({batch, m, n}); + at::Tensor y1out = at::empty(y1_output_size, yoptions); + at::Tensor y2out{nullptr}; + if (need_allgather_out_value) { + auto y2_output_size = op_infer::array_to_small_vector({batch, m, k}); + y2out = at::empty(y2_output_size, yoptions); + } + at::Tensor y3out{nullptr}; + if (need_activation_feature_value) { + y3out = at::empty(y1_output_size, yoptions); + } + + ACLNN_CMD(aclnnAlltoAllAllGatherBatchMatMul, x, weight, bias_const, + group_ep_ptr, group_tp_ptr, group_ep_worldsize, group_tp_worldsize, + shard_type_value, act_type_value, y1out, y2out, y3out); + return std::tie(y1out, y2out, y3out); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("npu_alltoall_allgather_bmm", &npu_alltoall_allgather_bmm, "npu_alltoall_allgather_bmm realize"); +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_apply_fused_ema_adamw.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_apply_fused_ema_adamw.cpp new file mode 100644 index 000000000..beb95abec --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_apply_fused_ema_adamw.cpp @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include "torch_npu/csrc/core/npu/NPUFormat.h" +#include "torch_npu/csrc/framework/utils/RandomOpAdapter.h" +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "inc/aclnn_common.h" + +at::Tensor format_trans(const at::Tensor &at_tensor) +{ + if (at_tensor.defined()) { + TORCH_CHECK(torch_npu::utils::is_npu(at_tensor), "only npu tensor is supported"); + return at_npu::native::npu_format_cast(at_tensor, ACL_FORMAT_ND); + } + return at_tensor; +} + +std::tuplenpu_apply_fused_ema_adamw( + at::Tensor grad, + at::Tensor var, + at::Tensor m, + at::Tensor v, + at::Tensor s, + at::Tensor step, + c10::optional lr, + c10::optional ema_decay, + c10::optional beta1, + c10::optional beta2, + c10::optional eps, + c10::optional mode, + c10::optional bias_correction, + c10::optional weight_decay) +{ + at::Tensor grad_ = format_trans(grad); + at::Tensor var_ = format_trans(var); + at::Tensor m_ = format_trans(m); + at::Tensor v_ = format_trans(v); + at::Tensor s_ = format_trans(s); + at::Tensor step_ = format_trans(step); + double lr_ = double(lr.value()); + double ema_decay_ = double(ema_decay.value()); + double beta1_ = double(beta1.value()); + double beta2_ = double(beta2.value()); + double eps_ = double(eps.value()); + int64_t mode_ = int64_t(mode.value()); + bool bias_correction_ = bool(bias_correction.value()); + double weight_decay_ = double(weight_decay.value()); + ACLNN_CMD(aclnnApplyFusedEmaAdam, + grad_, + var_, + m_, + v_, + s_, + step_, + lr_, + ema_decay_, + beta1_, + beta2_, + eps_, + mode_, + bias_correction_, + weight_decay_); + return std::tie(var_, m_, v_, s_); +} +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) +{ + m.def("npu_apply_fused_ema_adamw", + &npu_apply_fused_ema_adamw, + "npu_apply_fused_ema_adamw", + pybind11::arg("grad"), + pybind11::arg("var"), + pybind11::arg("m"), + pybind11::arg("v"), + pybind11::arg("s"), + pybind11::arg("step"), + pybind11::arg("lr") = 1e-3f, + pybind11::arg("ema_decay") = 0.9999, + pybind11::arg("beta1") = 0.9, + pybind11::arg("beta2") = 0.999, + pybind11::arg("eps") = 1e-8f, + pybind11::arg("mode") = 1, + pybind11::arg("bias_correction") = true, + pybind11::arg("weight_decay") = 0.0); +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_bmm_reduce_scatter_all_to_all.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_bmm_reduce_scatter_all_to_all.cpp new file mode 100644 index 000000000..92195522a --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_bmm_reduce_scatter_all_to_all.cpp @@ -0,0 +1,87 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include "torch_npu/csrc/framework/utils/RandomOpAdapter.h" +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/core/npu/NPUFormat.h" +#include + +#include "inc/aclnn_common.h" +#include "inc/mc2_utils.h" + +static constexpr uint64_t DIMS = 3; +static constexpr uint64_t E_DIM_IDX = 0; +static constexpr uint64_t C_DIM_IDX = 1; +static constexpr uint64_t H_DIM_IDX = 2; +enum class Y_SHARD_TYPE : int64_t { + SHARD_IN_H = 0, + SHARD_IN_C, +}; + +static void check_params_dim(const at::Tensor &x, const at::Tensor &weight, + const c10::optional &bias) +{ + TORCH_CHECK(x.dim() == DIMS, "x needs to be 3D, but got: ", x.dim(), "D"); + TORCH_CHECK(weight.dim() == DIMS, "weight needs to be 3D, but got: ", weight.dim(), "D"); + TORCH_CHECK(x.size(2) == weight.size(1), + "The K-axis in the two inputs of Matmul must be equal, but in reality, the K-axis of x is ", + x.size(2), " and the K-axis of weight is ", weight.size(1)); + if (bias.has_value()) { + const at::Tensor &bias_const = bias.value_or(at::Tensor()); + TORCH_CHECK(bias_const.dim() == DIMS or bias_const.dim() == 2, + "bias needs to be 2D or 3D, but got: ", bias_const.dim(), "D"); + } +} + +at::Tensor npu_bmm_reducescatter_alltoall( + const at::Tensor &x, + const at::Tensor &weight, + const c10::optional &bias, + std::string group_ep, + int64_t group_ep_worldsize, + std::string group_tp, + int64_t group_tp_worldsize, + c10::optional shard_type) +{ + check_params_dim(x, weight, bias); + const at::Tensor &bias_const = bias.value_or(at::Tensor()); + const int64_t shard_type_value = shard_type.value_or(0); + const Y_SHARD_TYPE y_shard_type = static_cast(shard_type_value); + char *group_ep_ptr = const_cast(group_ep.c_str()); + char *group_tp_ptr = const_cast(group_tp.c_str()); + + TORCH_CHECK(group_ep_worldsize > 0, "group_ep_worldsize should over 0, now is: ", group_ep_worldsize); + TORCH_CHECK(group_tp_worldsize > 0, "group_tp_worldsize should over 0, now is: ", group_tp_worldsize); + TORCH_CHECK((shard_type_value == 0) || (shard_type_value == 1), "shard_type should be 0 or 1, now is: ", shard_type_value); + + auto e = x.size(0) * group_ep_worldsize; + + int64_t c = x.size(C_DIM_IDX) / group_ep_worldsize; + auto out_c_size = (y_shard_type == Y_SHARD_TYPE::SHARD_IN_C) ? (c / group_tp_worldsize) : c; + + int64_t h = weight.size(H_DIM_IDX); + auto out_h_size = (y_shard_type == Y_SHARD_TYPE::SHARD_IN_H) ? (h / group_tp_worldsize) : h; + auto y_shape = {e, out_c_size, out_h_size}; + + at::Tensor y = at::empty(y_shape, x.options()); + ACLNN_CMD(aclnnBatchMatMulReduceScatterAlltoAll, x, weight, bias_const, group_ep_ptr, group_tp_ptr, + group_ep_worldsize, group_tp_worldsize, shard_type_value, y); + return y; +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("npu_bmm_reducescatter_alltoall", &npu_bmm_reducescatter_alltoall, "npu_bmm_reducescatter_alltoall realize"); +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_dropout_add_layer_norm.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_dropout_add_layer_norm.cpp new file mode 100644 index 000000000..72b190d3f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_dropout_add_layer_norm.cpp @@ -0,0 +1,181 @@ +// Copyright (c) 2023 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" + +#include "inc/aclnn_common.h" + +inline void npu_dropout_add_layer_norm_check( + const at::Tensor &x0, // BxSxhidden_size + const at::Tensor &weight, // hidden_size + const c10::optional &residual_opt, // BxSxhidden_size + const c10::optional &bias_opt, // hidden_size + const c10::optional &rowscale_opt, // BxS + const c10::optional &layerscale_opt, // hidden_size + double p, + double eps) +{ + TORCH_CHECK( + torch_npu::utils::is_npu(x0), + "npu_dropout_add_layer_norm only supports device for NPU!"); + + auto itype = x0.scalar_type(); + auto wtype = weight.scalar_type(); + + TORCH_CHECK( + !(itype == at::kBFloat16 && wtype == at::kHalf), + "weight_dtype == torch.float16 and input_dtype == torch.bfloat16 was not supported"); + + if (bias_opt.has_value()) { + auto bias = bias_opt.value(); + TORCH_CHECK(bias.dtype() == wtype); + TORCH_CHECK(bias.sizes() == weight.sizes()); + } + + if (residual_opt.has_value()) { + auto residual = residual_opt.value(); + TORCH_CHECK(residual.sizes() == x0.sizes()); + } + + if (rowscale_opt.has_value()) { + auto rowscale = rowscale_opt.value(); + TORCH_CHECK(rowscale.dim() == x0.dim() - 1); + TORCH_CHECK(rowscale.dtype() == itype); + } + + if (layerscale_opt.has_value()) { + auto layerscale = layerscale_opt.value(); + TORCH_CHECK(layerscale.sizes()[0] == x0.sizes().back()); + TORCH_CHECK(layerscale.dtype() == wtype); + } + + TORCH_CHECK( + p >= 0 && p <= 1, + "dropout probability has to be between 0 and 1, but got ", p); + + TORCH_CHECK(eps >= 0.f); + + auto hidden_size = weight.numel(); + TORCH_CHECK((hidden_size % 8 == 0) && (hidden_size <= 8192)); +} + +std::tuple npu_dropout_add_layer_norm( + const at::Tensor &x0, + const at::Tensor &weight, + const c10::optional &residual_opt, + const c10::optional &bias_opt, + const c10::optional &rowscale_opt, + const c10::optional &layerscale_opt, + double p, + double eps, + bool prenorm, + bool residual_in_fp32, + bool is_rms_norm, + bool return_dropout_mask) +{ + npu_dropout_add_layer_norm_check( + x0, weight, residual_opt, bias_opt, rowscale_opt, layerscale_opt, p, eps); + + const at::Tensor &residual_ = c10::value_or_else(residual_opt, [] { return at::Tensor(); }); + const at::Tensor &bias_ = c10::value_or_else(bias_opt, [] { return at::Tensor(); }); + const at::Tensor &rowscale_ = c10::value_or_else(rowscale_opt, [] { return at::Tensor(); }); + const at::Tensor &layerscale_ = c10::value_or_else(layerscale_opt, [] { return at::Tensor(); }); + + at::Tensor residual = residual_; + at::Tensor bias = bias_; + at::Tensor rowscale = rowscale_; + at::Tensor layerscale = layerscale_; + + at::IntArrayRef x0_sizes = x0.sizes(); + at::ScalarType x0_dtype = x0.scalar_type(); + + // residual_in_fp32 only has an effect if residual is None. + // Otherwise residual dtype is residual.dtype. + at::ScalarType residual_dtype = residual.defined() ? + residual.scalar_type() : + (residual_in_fp32 ? at::kFloat : x0_dtype); + + const at::Tensor x0_fp32 = (x0_dtype == at::kFloat) ? x0 : x0.to(at::kFloat); + const at::Tensor weight_fp32 = (weight.scalar_type() == at::kFloat) ? weight : weight.to(at::kFloat); + if (residual.defined()) { + residual = (residual.scalar_type() == at::kFloat) ? residual : residual.to(at::kFloat); + } + if (bias.defined()) { + bias = (bias.scalar_type() == at::kFloat) ? bias : bias.to(at::kFloat); + } + // Calculate scaled_x0 + at::Tensor scaled_x0 = x0_fp32; + int64_t batch = scaled_x0.size(0); + int64_t seq = scaled_x0.size(1); + int64_t head = scaled_x0.size(2); + + if (rowscale.defined()) { + rowscale = (rowscale.scalar_type() == at::kFloat) ? rowscale : rowscale.to(at::kFloat); + rowscale = rowscale.view({batch, seq, 1}); + scaled_x0 = scaled_x0.mul(rowscale); + } + if (layerscale.defined()) { + layerscale = (layerscale.scalar_type() == at::kFloat) ? layerscale : layerscale.to(at::kFloat); + layerscale = layerscale.view({1, 1, head}); + scaled_x0 = scaled_x0.mul(layerscale); + } + + // Apply dropout to scaled_x0 + at::Tensor dropout_result; + at::Tensor mask; + bool train = p == 0.0 ? false : true; + if (train) { + double p1m = 1. - p; + double scale = p1m == 0 ? 0. : 1. / p1m; + mask = at::empty_like(scaled_x0, scaled_x0.options().dtype(c10::CppTypeToScalarType::value)); + mask.bernoulli_(p1m); + dropout_result = scaled_x0.mul(mask).mul_(scale); + } else { + mask = at::ones_like(scaled_x0, scaled_x0.options().dtype(c10::CppTypeToScalarType::value)); + dropout_result = scaled_x0; + } + + // Apply layer_norm or rms_norm to (dropout_result + residual) + at::Tensor norm_result; + at::Tensor pre_norm = residual.defined() ? dropout_result.add(residual) : dropout_result; + int hidden_size = weight.numel(); + float inverse_cols = 1.f / float(hidden_size); + if (!is_rms_norm) { + auto native_layer_norm_output = at::native_layer_norm(pre_norm, hidden_size, weight_fp32, bias, eps); + norm_result = std::get<0>(native_layer_norm_output); + } else { + at::Tensor norm_x = (pre_norm.mul(pre_norm)).sum(2, true).mul(inverse_cols).add(eps); + + norm_result = pre_norm.mul(norm_x.pow(-0.5)).mul(weight_fp32.view({1,1,head})); + } + + // Update outputs + norm_result = (norm_result.scalar_type() == x0_dtype) ? norm_result : norm_result.to(x0_dtype); + at::Tensor pre_norm_result; + if (prenorm) { + pre_norm_result = (pre_norm.scalar_type() == residual_dtype) ? pre_norm : pre_norm.to(residual_dtype); + } + at::Tensor mask_result; + if (return_dropout_mask) { + mask_result = mask; + } + + return std::tie(norm_result, pre_norm_result, mask_result); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("npu_dropout_add_layer_norm", &npu_dropout_add_layer_norm, "npu_dropout_add_layer_norm forward"); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_grouped_mat_mul_all_reduce.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_grouped_mat_mul_all_reduce.cpp new file mode 100644 index 000000000..c141434b6 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_grouped_mat_mul_all_reduce.cpp @@ -0,0 +1,179 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "inc/aclnn_common.h" + +static const int64_t IN_NOT_SPLIT_OUT_NOT_SPLIT = 0; +static const int64_t IN_SPLIT_OUT_NOT_SPLIT = 1; +static const int64_t IN_NOT_SPLIT_OUT_SPLIT = 2; +static const int64_t IN_SPLIT_OUT_SPLIT = 3; + +using npu_preparation = at_npu::native::OpPreparation; +#ifdef __TORCH_2__ + using BiasType = c10::optional>; +#else + using BiasType = std::vector; +#endif + +namespace op_infer { +constexpr int SIZE = 8; + +c10::SmallVector array_to_small_vector(c10::IntArrayRef shape) +{ + c10::SmallVector small_shape; + for (size_t i = 0; i < shape.size(); ++i) { + small_shape.emplace_back(shape[i]); + } + return small_shape; +} +} // namespace op_infer + +bool _check_w_dim(size_t num_w, size_t dim_num_w, size_t dim_0_w, size_t num_group_list, + size_t sum_group_list) +{ + bool result = false; + if (2 == dim_num_w && num_w == num_group_list) { + result = true; + } else if (3 == dim_num_w && 1 == num_w && dim_0_w == num_group_list) { + result = true; + } else if (2 == dim_num_w && 1 == num_w && dim_0_w == sum_group_list) { + result = true; + } + return result; +} + +void _check_dims(int64_t split_item, size_t num_x, size_t num_w, const at::TensorList &x, + const at::TensorList &weight, size_t num_group_list, size_t sum_group_list) +{ + TORCH_CHECK(num_x > 0 && num_w > 0, + "Neither x nor weight could be empty."); + TORCH_CHECK(IN_NOT_SPLIT_OUT_NOT_SPLIT == split_item || IN_NOT_SPLIT_OUT_SPLIT == split_item + || IN_SPLIT_OUT_NOT_SPLIT == split_item || IN_SPLIT_OUT_SPLIT == split_item, + "The given split_item [", split_item, "] is invalid, which must be one of 0/1/2/3"); + if (IN_NOT_SPLIT_OUT_NOT_SPLIT == split_item || IN_NOT_SPLIT_OUT_SPLIT == split_item) { + TORCH_CHECK(num_x == num_w && 0 == num_group_list, + "When split_item = 0 or 2, the num of x tensors must equal the num of weight tensors, " + "and there is supposed not to be group_list input"); + } else if (IN_SPLIT_OUT_NOT_SPLIT == split_item) { + TORCH_CHECK(num_x == 1 && num_w == num_group_list && sum_group_list == x[0].sizes()[0], + "When split_item = 1, the num of x tensors must equal 1, " + "and the num of weight tensors is supposed to equal the length of group_list"); + } else if (IN_SPLIT_OUT_SPLIT == split_item) { + size_t dim_num_w = weight[0].sizes().size(); + size_t dim_0_w = weight[0].sizes()[0]; + TORCH_CHECK(_check_w_dim(num_w, dim_num_w, dim_0_w, num_group_list, sum_group_list), + "Invalid dim of weight. When split_item = 3, only the following three situations are allowed:" + "(1) The tensor nums of weight equals the length of group_list; the dim num of each tensor equals 2. " + "(2) There is one tensor in weight with a dim num of 3; its first dim equals the length of group_list. " + "(3) There is one tensor in weight with a dim num of 2; its first dim equals the sum of group_list. "); + } +} + +void _create_new_tensor_multi_dim(std::vector &y, const at::Tensor &x_i, + const at::Tensor &w_i, c10::TensorOptions options) +{ + auto x_sizes = x_i.sizes(); + std::vector y_sizes(x_sizes.begin(), x_sizes.end()); + y_sizes.at(x_sizes.size() - 1) = w_i.sizes()[1]; + y.emplace_back(at::empty(y_sizes, options)); +} + +void _create_new_tensor(std::vector &y, size_t dim_m, size_t dim_n, c10::TensorOptions options, + size_t num_group_list) +{ + auto output_size = op_infer::array_to_small_vector({dim_m, dim_n}); + y.emplace_back(at::empty(output_size, options)); +} + +std::vector npu_grouped_mat_mul_all_reduce(const std::vector& x, + const std::vector& weight, + const BiasType& bias, + c10::optional> group_list, + c10::optional split_item, + std::string hccl_group, + std::string reduce_op, + int64_t comm_turn) +{ + size_t num_x = x.size(); + size_t num_w = weight.size(); +#ifdef __TORCH_2__ + const std::vector& new_bias = bias.value_or(std::vector{}); +#else + const std::vector& new_bias = bias; +#endif + size_t num_bias = new_bias.size(); + + const at::TensorList x_(x); + const at::TensorList weight_(weight); + const at::TensorList new_bias_(new_bias); + + auto group_list_real_ = group_list.value_or(std::vector{}); + at::IntArrayRef group_list_real(group_list_real_); + size_t num_group_list = group_list_real.size(); + int64_t sum_group_list = num_group_list > 0 ? group_list_real[num_group_list - 1] : 0; + int64_t split_item_value = split_item.value_or(0); + + const char* hccl_group_value = hccl_group.c_str(); + const char* reduce_op_value = reduce_op.c_str(); + + int64_t comm_turn_value = comm_turn; + int64_t stream_mode_value = 1; + + _check_dims(split_item_value, num_x, num_w, x_, weight_, num_group_list, sum_group_list); + + std::vector y; + c10::TensorOptions options = x_[0].options().dtype(x_[0].scalar_type()); + + if (IN_NOT_SPLIT_OUT_NOT_SPLIT == split_item_value) { + y.reserve(num_x); + for (size_t i = 0; i < num_x; i++) { + _create_new_tensor_multi_dim(y, x[i], weight[i], options); + } + } else if (IN_SPLIT_OUT_NOT_SPLIT == split_item_value) { + y.reserve(num_group_list); + _create_new_tensor(y, group_list_real[0], weight[0].sizes()[1], options, num_group_list); + for (size_t i = 1; i < num_group_list; i++) { + _create_new_tensor(y, group_list_real[i] - group_list_real[i - 1], weight[i].sizes()[1], options, + num_group_list); + } + } else if (IN_NOT_SPLIT_OUT_SPLIT == split_item_value) { + size_t dim_m = 0; + for (size_t i = 0; i < num_x; i++) { + dim_m += x[i].sizes()[0]; + } + _create_new_tensor(y, dim_m, weight[0].sizes()[1], options, num_group_list); + } else if (IN_SPLIT_OUT_SPLIT == split_item_value) { + size_t dim_num_w = weight[0].sizes().size(); + _create_new_tensor(y, x[0].sizes()[0], weight[0].sizes()[dim_num_w - 1], options, num_group_list); + } + + at::TensorList result(y); + + ACLNN_CMD(aclnnGroupedMatMulAllReduce, x_, weight_, new_bias_, + group_list_real, split_item_value, hccl_group_value, + reduce_op_value, comm_turn_value, stream_mode_value, result); + + return y; +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("npu_grouped_mat_mul_all_reduce", &npu_grouped_mat_mul_all_reduce, "grouped mat_mul all_reduce forward"); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_mm_all_reduce_add_rms_norm.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_mm_all_reduce_add_rms_norm.cpp new file mode 100644 index 000000000..e88484b6c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_mm_all_reduce_add_rms_norm.cpp @@ -0,0 +1,89 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include "torch_npu/csrc/framework/utils/RandomOpAdapter.h" +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/core/npu/NPUFormat.h" +#include + +#include "inc/aclnn_common.h" +#include "inc/mc2_utils.h" + +at::Tensor format_trans(const at::Tensor &at_tensor) +{ + if (at_tensor.defined()) { + TORCH_CHECK(torch_npu::utils::is_npu(at_tensor), "only npu tensor is supported"); + return at_npu::native::npu_format_cast(at_tensor, ACL_FORMAT_ND); + } + return at_tensor; +} + +std::tuple npu_mm_all_reduce_add_rms_norm( + const at::Tensor &x1, + const at::Tensor &x2, + const at::Tensor &residual, + const at::Tensor &gamma, + std::string hcom, + std::string reduce_op, + double epsilon, + const c10::optional &bias, + const c10::optional &antiquant_scale, + const c10::optional &antiquant_offset, + const c10::optional &dequant_scale, + int64_t antiquant_group_size, + int64_t comm_turn) +{ + check_npu_mm_all_reduce_add_rms_norm_params(x1, x2, residual, gamma, antiquant_scale, antiquant_offset, + dequant_scale); + at::Tensor format_x1 = format_trans(x1); + at::Tensor format_x2 = format_trans(x2); + at::Tensor format_residual = format_trans(residual); + at::Tensor format_gamma = format_trans(gamma); + char *hcom_ptr = const_cast(hcom.c_str()); + char *reduce_op_ptr = const_cast(reduce_op.c_str()); + const at::Tensor &bias_const = bias.value_or(at::Tensor()); + at::Tensor y = at::empty(format_residual.sizes(), format_residual.options()); + at::Tensor norm_out = at::empty(format_residual.sizes(), format_residual.options()); + int64_t stream_mode = ACL_STOP_ON_FAILURE; + // a8w8: x1\x2 kChar; a16w8: x2 kChar; + if ((x1.scalar_type() == at::ScalarType::Half || x1.scalar_type() == at::ScalarType::BFloat16) && + (x2.scalar_type() == at::ScalarType::Half || x2.scalar_type() == at::ScalarType::BFloat16)) { + TORCH_CHECK(!(antiquant_scale.has_value() || dequant_scale.has_value()), + "In not quant scenario, antiquant_scale and dequant_scale must be null."); + ACLNN_CMD(aclnnMatmulAllReduceAddRmsNorm, x1, x2, bias_const, residual, gamma, epsilon, hcom_ptr, + reduce_op_ptr, comm_turn, stream_mode, y, norm_out); + } else if (x1.scalar_type() == at::ScalarType::Char && x2.scalar_type() == at::ScalarType::Char) { + const at::Tensor &dequant_scale_real = dequant_scale.value_or(at::Tensor()); + ACLNN_CMD(aclnnQuantMatmulAllReduceAddRmsNorm, x1, x2, bias_const, dequant_scale_real, residual, gamma, + epsilon, hcom_ptr, reduce_op_ptr, comm_turn, stream_mode, y, norm_out); + } else if ((x1.scalar_type() == at::ScalarType::Half || x1.scalar_type() == at::ScalarType::BFloat16) && + x2.scalar_type() == at::ScalarType::Char) { + const at::Tensor &antiquant_scale_real = antiquant_scale.value_or(at::Tensor()); + const at::Tensor &antiquant_offset_real = antiquant_offset.value_or(at::Tensor()); + ACLNN_CMD(aclnnWeightQuantMatmulAllReduceAddRmsNorm, x1, x2, bias_const, antiquant_scale_real, + antiquant_offset_real, residual, gamma, epsilon, hcom_ptr, reduce_op_ptr, comm_turn, + stream_mode, antiquant_group_size, y, norm_out); + } else { + TORCH_CHECK(false, "the type of x1 and x2 should be suit the not quant scenario, " + "dequant scenario, antiquant scenario."); + } + + return std::make_tuple(y, norm_out); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("npu_mm_all_reduce_add_rms_norm", &npu_mm_all_reduce_add_rms_norm, "npu_mm_all_reduce_add_rms_norm"); +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_mm_all_reduce_add_rms_norm_.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_mm_all_reduce_add_rms_norm_.cpp new file mode 100644 index 000000000..b64f0e0f3 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_mm_all_reduce_add_rms_norm_.cpp @@ -0,0 +1,89 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include "torch_npu/csrc/framework/utils/RandomOpAdapter.h" +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/core/npu/NPUFormat.h" +#include + +#include "inc/aclnn_common.h" +#include "inc/mc2_utils.h" + +at::Tensor format_trans(const at::Tensor &at_tensor) +{ + if (at_tensor.defined()) { + TORCH_CHECK(torch_npu::utils::is_npu(at_tensor), "only npu tensor is supported"); + return at_npu::native::npu_format_cast(at_tensor, ACL_FORMAT_ND); + } + return at_tensor; +} + +std::tuple npu_mm_all_reduce_add_rms_norm_( + const at::Tensor &x1, + const at::Tensor &x2, + const at::Tensor &residual, + const at::Tensor &gamma, + std::string hcom, + std::string reduce_op, + double epsilon, + const c10::optional &bias, + const c10::optional &antiquant_scale, + const c10::optional &antiquant_offset, + const c10::optional &dequant_scale, + int64_t antiquant_group_size, + int64_t comm_turn) +{ + check_npu_mm_all_reduce_add_rms_norm_params(x1, x2, residual, gamma, antiquant_scale, antiquant_offset, + dequant_scale); + at::Tensor format_x1 = format_trans(x1); + at::Tensor format_x2 = format_trans(x2); + at::Tensor format_residual = format_trans(residual); + at::Tensor format_gamma = format_trans(gamma); + char *hcom_ptr = const_cast(hcom.c_str()); + char *reduce_op_ptr = const_cast(reduce_op.c_str()); + const at::Tensor &bias_const = bias.value_or(at::Tensor()); + at::Tensor norm_out = at::empty(format_residual.sizes(), format_residual.options()); + int64_t stream_mode = ACL_STOP_ON_FAILURE; + + // a8w8: x1\x2 kChar; a16w8: x2 kChar; + if ((x1.scalar_type() == at::ScalarType::Half || x1.scalar_type() == at::ScalarType::BFloat16) && + (x2.scalar_type() == at::ScalarType::Half || x2.scalar_type() == at::ScalarType::BFloat16)) { + TORCH_CHECK(!(antiquant_scale.has_value() || dequant_scale.has_value()), + "In not quant scenario, antiquant_scale and dequant_scale must be null."); + ACLNN_CMD(aclnnInplaceMatmulAllReduceAddRmsNorm, x1, x2, bias_const, residual, gamma, epsilon, hcom_ptr, + reduce_op_ptr, comm_turn, stream_mode, norm_out); + } else if (x1.scalar_type() == at::ScalarType::Char && x2.scalar_type() == at::ScalarType::Char) { + const at::Tensor &dequant_scale_real = dequant_scale.value_or(at::Tensor()); + ACLNN_CMD(aclnnInplaceQuantMatmulAllReduceAddRmsNorm, x1, x2, bias_const, dequant_scale_real, residual, gamma, + epsilon, hcom_ptr, reduce_op_ptr, comm_turn, stream_mode, norm_out); + } else if ((x1.scalar_type() == at::ScalarType::Half || x1.scalar_type() == at::ScalarType::BFloat16) && + x2.scalar_type() == at::ScalarType::Char) { + const at::Tensor &antiquant_scale_real = antiquant_scale.value_or(at::Tensor()); + const at::Tensor &antiquant_offset_real = antiquant_offset.value_or(at::Tensor()); + ACLNN_CMD(aclnnInplaceWeightQuantMatmulAllReduceAddRmsNorm, x1, x2, bias_const, antiquant_scale_real, + antiquant_offset_real, residual, gamma, epsilon, hcom_ptr, reduce_op_ptr, comm_turn, + stream_mode, antiquant_group_size, norm_out); + } else { + TORCH_CHECK(false, "the type of x1 and x2 should be suit the not quant scenario, " + "dequant scenario, antiquant scenario."); + } + + return std::make_tuple(residual, norm_out); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("npu_mm_all_reduce_add_rms_norm_", &npu_mm_all_reduce_add_rms_norm_, "npu_mm_all_reduce_add_rms_norm_"); +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_moe_token_permute.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_moe_token_permute.cpp new file mode 100644 index 000000000..3be1a5c8f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_moe_token_permute.cpp @@ -0,0 +1,117 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +#include +#include +#include +#include +#include +#include "inc/aclnn_common.h" +#include + +using namespace at_npu::native; +using torch::autograd::AutogradContext; +using torch::autograd::Function; + +namespace { + const static int DIMS = 2; + const static int MIN_DIMS = 1; + + void CheckMoeTokenPermuteForward( + const at::Tensor &tokens, + const at::Tensor &indices, + bool padded_mode = false + ) + { + if (padded_mode) { + throw std::runtime_error("current version only support padded_mode is false"); + } + // current version tokens only support bfloat16 + TORCH_CHECK(tokens.dim() == DIMS, + "The dims of input tokens should be 2 dimensional, but got ", tokens.dim(), "-dimensional."); + TORCH_CHECK(indices.dim() == DIMS || indices.dim() == MIN_DIMS, + "The dims of input indices should be 2 or 1 dimensional, but got ", indices.dim(), "-dimensional."); + } + + void CheckMoeTokenPermuteBackward(const at::Tensor &grad_permuted_tokens) + { + TORCH_CHECK(grad_permuted_tokens.dim() == DIMS, + "The dims of input grad_permuted_tokens should be 2 dimensional, but got ", grad_permuted_tokens.dim(), "-dimensional."); + } + + class NPUMoeTokenPermute : public torch::autograd::Function { + public: + static std::vector forward( + AutogradContext *ctx, + const at::Tensor &tokens, + const at::Tensor &indices, + c10::optional num_out_tokens, + c10::optional padded_mode + ) + { + at::AutoDispatchBelowADInplaceOrView guard; + int64_t num_out_tokens_value = num_out_tokens.value_or(0); + bool padded_mode_vale = padded_mode.value_or(false); + CheckMoeTokenPermuteForward(tokens, indices, padded_mode_vale); + + int64_t topk = (indices.dim() == 1) ? 1 : indices.size(1); + int64_t flatten_size = indices.numel(); + int64_t actual_num_out_tokens = (num_out_tokens_value > 0) ? std::min(num_out_tokens_value, flatten_size) : num_out_tokens_value + flatten_size; + // The sorted_indices actually implemented by the aclnn operator are different from the sorted_indices + // output by the permute function of the megatron source code. + // The actual sorted_indices implemented by the aclnn operator are not sliced. + // current version sorted_indices only support dtype(at::kInt) + at::Tensor sorted_indices = at::empty({flatten_size}, indices.options().dtype(at::kInt)); + at::Tensor permuted_tokens = at::empty({actual_num_out_tokens, tokens.size(1)}, tokens.options()); + + ACLNN_CMD(aclnnMoeTokenPermute, tokens, indices, actual_num_out_tokens, padded_mode_vale, permuted_tokens, sorted_indices); + + ctx->save_for_backward({sorted_indices}); + + ctx->saved_data["num_tokens"] = tokens.size(0); + ctx->saved_data["num_topK"] = topk; + ctx->saved_data["padded_mode"] = padded_mode_vale; + + return {permuted_tokens, sorted_indices}; + } + + static std::vector backward( + AutogradContext *ctx, + std::vector& grad_output + ) + { + auto grad_permuted_tokens = grad_output[0]; + auto saved_tensors = ctx->get_saved_variables(); + auto sorted_indices = saved_tensors[0]; + + int64_t num_tokens = ctx->saved_data["num_tokens"].toInt(); + int64_t num_topK = ctx->saved_data["num_topK"].toInt(); + bool padded_mode = ctx->saved_data["padded_mode"].toBool(); + CheckMoeTokenPermuteBackward(grad_permuted_tokens); + + at::Tensor grad_tokens = at::empty({num_tokens, grad_permuted_tokens.size(1)}, grad_permuted_tokens.options()); + + ACLNN_CMD(aclnnMoeTokenPermuteGrad, grad_permuted_tokens, sorted_indices, num_topK, padded_mode, grad_tokens); + + return {grad_tokens, at::Tensor(), at::Tensor(), at::Tensor()}; + } + }; +} // namespace + +std::vector npu_moe_token_permute( + const at::Tensor &tokens, + const at::Tensor &indices, + c10::optional num_out_tokens, + c10::optional padded_mode +) +{ + return NPUMoeTokenPermute::apply(tokens, indices, num_out_tokens, padded_mode); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) +{ + m.def("npu_moe_token_permute", &npu_moe_token_permute, + "npu moe token permute", + pybind11::arg("tokens"), + pybind11::arg("indices"), + pybind11::arg("num_out_tokens") = 0, + pybind11::arg("padded_mode") = false); +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_moe_token_unpermute.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_moe_token_unpermute.cpp new file mode 100644 index 000000000..cba84594b --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_moe_token_unpermute.cpp @@ -0,0 +1,133 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +#include +#include +#include +#include +#include +#include "inc/aclnn_common.h" + +using namespace at_npu::native; +using torch::autograd::AutogradContext; +using torch::autograd::Function; + +namespace { + const static int DIMS = 2; + const static int MIN_DIMS = 1; + const static int64_t DEFAULT_TOPK = 1; + + void CheckMoeTokenUnpermuteForward( + const at::Tensor& permuted_tokens, + const at::Tensor& sorted_indices, + c10::optional& probs, + bool padded_mode = false + ) + { + if (padded_mode) { + throw std::runtime_error("current version only support padded_mode is false"); + } + TORCH_CHECK(permuted_tokens.dim() == DIMS, + "The dims of input permuted_tokens should be 2 dimensional, but got ", permuted_tokens.dim(), "-dimensional."); + TORCH_CHECK(sorted_indices.dim() == MIN_DIMS, + "The dims of input sorted_indices should be 1 dimensional, but got ", sorted_indices.dim(), "-dimensional."); + if (probs.has_value()) { + TORCH_CHECK(probs.value().dim() == DIMS, + "The dims of input probs should be 2 dimensional, but got ", probs.value().dim(), "-dimensional."); + } + } + + void CheckMoeTokenUnpermuteBackward( + const at::Tensor &unpermuted_tokens_grad, + const at::Tensor &sorted_indices, + const at::Tensor &probs + ) + { + TORCH_CHECK(unpermuted_tokens_grad.dim() == DIMS, + "The dims of input unpermuted_tokens_grad should be 2 dimensional, but got ", unpermuted_tokens_grad.dim(), "-dimensional."); + TORCH_CHECK(sorted_indices.dim() == MIN_DIMS, + "The dims of input sorted_indices should be 1 dimensional, but got ", sorted_indices.dim(), "-dimensional."); + if (probs.defined()) { + TORCH_CHECK(probs.dim() == DIMS, + "The dims of input probs should be 2 dimensional, but got ", probs.dim(), "-dimensional."); + } + } + + class NPUMoeTokenUnpermute : public torch::autograd::Function { + public: + static at::Tensor forward( + AutogradContext *ctx, + const at::Tensor& permuted_tokens, + const at::Tensor& sorted_indices, + c10::optional& probs, + c10::optional padded_mode, + c10::optional& restore_shape + ) + { + at::AutoDispatchBelowADInplaceOrView guard; + bool padded_mode_vale = padded_mode.value_or(false); + auto restore_shape_vale = restore_shape.value_or(at::IntArrayRef{1}); + CheckMoeTokenUnpermuteForward(permuted_tokens, sorted_indices, probs, padded_mode_vale); + int64_t topk = probs.has_value() ? probs.value().size(1) : DEFAULT_TOPK; + // The sorted_indices actually implemented by the aclnn operator are different from the sorted_indices + // output by the permute function of the megatron source code. + // The actual sorted_indices implemented by the aclnn operator are not sliced. + // so, num_unpermuted_tokens is obtained by dividing sorted_indices.size(0) by topk + int64_t num_unpermuted_tokens = sorted_indices.size(0) / topk; + at::Tensor unpermuted_tokens = at::empty({num_unpermuted_tokens, permuted_tokens.size(-1)}, permuted_tokens.options()); + at::Tensor probs_value = probs.has_value() ? probs.value() : at::Tensor(); + ACLNN_CMD(aclnnMoeTokenUnpermute, permuted_tokens, sorted_indices, probs_value, padded_mode_vale, restore_shape_vale, unpermuted_tokens); + ctx->save_for_backward({permuted_tokens, sorted_indices, probs_value}); + ctx->saved_data["padded_mode"] = padded_mode_vale; + ctx->saved_data["restore_shape"] = restore_shape; + + return unpermuted_tokens; + } + + static std::vector backward( + AutogradContext *ctx, + const std::vector& grad_outputs + ) + { + auto saved_tensors = ctx->get_saved_variables(); + auto permuted_tokens = saved_tensors[0]; + auto sorted_indices = saved_tensors[1]; + auto probs = saved_tensors[2]; + bool padded_mode = ctx->saved_data["padded_mode"].toBool(); + auto restore_shape = ctx->saved_data["restore_shape"]; + at::IntArrayRef restore_shape_vale{1, 1}; + + at::Tensor grad_unpermuted_tokens = grad_outputs[0]; + CheckMoeTokenUnpermuteBackward(grad_unpermuted_tokens, sorted_indices, probs); + + at::Tensor grad_permuted_tokens = at::empty(permuted_tokens.sizes(), permuted_tokens.options()); + at::Tensor grad_probs = probs.defined() ? at::empty(probs.sizes(), probs.options()) : at::empty({0}, permuted_tokens.options()); + ACLNN_CMD(aclnnMoeTokenUnpermuteGrad, permuted_tokens, grad_unpermuted_tokens, sorted_indices, probs, padded_mode, restore_shape_vale, grad_permuted_tokens, grad_probs); + if (probs.defined()) { + return {grad_permuted_tokens, at::Tensor(), grad_probs, at::Tensor(), at::Tensor()}; + } else { + return {grad_permuted_tokens, at::Tensor(), at::Tensor(), at::Tensor(), at::Tensor()}; + } + } + }; +} // namespace + +at::Tensor npu_moe_token_unpermute( + const at::Tensor& permuted_tokens, + const at::Tensor& sorted_indices, + c10::optional& probs, + c10::optional padded_mode, + c10::optional& restore_shape +) +{ + return NPUMoeTokenUnpermute::apply(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) +{ + m.def("npu_moe_token_unpermute", &npu_moe_token_unpermute, + "npu moe token unpermute", + pybind11::arg("permuted_tokens"), + pybind11::arg("sorted_indices"), + pybind11::arg("probs") = pybind11::none(), + pybind11::arg("padded_mode") = false, + pybind11::arg("restore_shape") = pybind11::none()); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_ring_attention_update.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_ring_attention_update.cpp new file mode 100644 index 000000000..bc4f0f204 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_ring_attention_update.cpp @@ -0,0 +1,60 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "inc/aclnn_common.h" + + +std::vector npu_ring_attention_update(const at::Tensor& prev_attn_out, + const at::Tensor& prev_softmax_max, + const at::Tensor& prev_softmax_sum, + const at::Tensor& cur_attn_out, + const at::Tensor& cur_softmax_max, + const at::Tensor& cur_softmax_sum, + c10::optional& actual_seq_qlen, + c10::optional layout) +{ + auto softmax_max_shape = prev_softmax_max.sizes(); + auto softmax_sum_shape = prev_softmax_sum.sizes(); + auto attn_out_shape = prev_attn_out.sizes(); + + auto softmax_max = at::empty(softmax_max_shape, prev_softmax_max.options()); + auto softmax_sum = at::empty(softmax_sum_shape, prev_softmax_sum.options()); + auto attn_out = at::empty(attn_out_shape, prev_attn_out.options()); + std::string layout_value = layout.value_or("SBH"); + char* input_layout_ptr = const_cast(layout_value.c_str()); + at::Tensor actual_seq_qlen_value = actual_seq_qlen.has_value() ? actual_seq_qlen.value() : at::empty( + {}, prev_softmax_sum.options().dtype(at::ScalarType::Long)); + ACLNN_CMD(aclnnRingAttentionUpdate, prev_attn_out, prev_softmax_max, prev_softmax_sum, cur_attn_out, + cur_softmax_max, cur_softmax_sum, actual_seq_qlen_value, input_layout_ptr, attn_out, softmax_max, softmax_sum); + return {attn_out, softmax_max, softmax_sum}; +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("npu_ring_attention_update", &npu_ring_attention_update, "npu ring attention update", + pybind11::arg("prev_attn_out"), + pybind11::arg("prev_softmax_max"), + pybind11::arg("prev_softmax_sum"), + pybind11::arg("cur_attn_out"), + pybind11::arg("cur_softmax_max"), + pybind11::arg("cur_softmax_sum"), + pybind11::arg("actual_seq_qlen") = pybind11::none(), + pybind11::arg("layout") = "SBH"); +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_rotary_position_embedding.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_rotary_position_embedding.cpp new file mode 100644 index 000000000..ea9985843 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/npu_rotary_position_embedding.cpp @@ -0,0 +1,159 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include +#include +#include "inc/aclnn_common.h" + +using namespace at_npu::native; +using torch::autograd::AutogradContext; +using torch::autograd::Function; + +namespace { + const static int DIMS = 4; + const static int D_INDEX = 3; + const static int EVEN_DIM_CHECK = 2; + const static int BROADCAST_LIMIT = 1024; + const static int64_t ROTATE_HALF = 0; + const static int64_t ROTATE_INTERLEAVED = 1; + + void CheckRopeFroward(const at::Tensor &x, const at::Tensor &cos, const at::Tensor &sin, int64_t mode) + { + TORCH_CHECK(x.scalar_type() == at::ScalarType::Half || x.scalar_type() == at::ScalarType::BFloat16 || + x.scalar_type() == at::ScalarType::Float, + "Input tensor x dtype [", x.scalar_type(), + "] is invalid, should be float32, float16 or bfloat16"); + TORCH_CHECK(cos.scalar_type() == at::ScalarType::Half || cos.scalar_type() == at::ScalarType::BFloat16 || + cos.scalar_type() == at::ScalarType::Float, + "Input tensor cos dtype [", cos.scalar_type(), + "] is invalid, should be float32, float16 or bfloat16"); + TORCH_CHECK(sin.scalar_type() == at::ScalarType::Half || sin.scalar_type() == at::ScalarType::BFloat16 || + sin.scalar_type() == at::ScalarType::Float, + "Input tensor sin dtype [", sin.scalar_type(), + "] is invalid, should be float32, float16 or bfloat16"); + TORCH_CHECK(x.dim() == DIMS, + "The dims of input x should be 4 dimensional, bug got ", x.dim(), "-dimensional."); + TORCH_CHECK(cos.dim() == DIMS, + "The dims of input cos should be 4 dimensional, bug got ", cos.dim(), "-dimensional."); + TORCH_CHECK(sin.dim() == DIMS, + "The dims of input sin should be 4 dimensional, bug got ", sin.dim(), "-dimensional."); + TORCH_CHECK(x.sizes()[D_INDEX] % EVEN_DIM_CHECK == 0, + "The head_dim length of input must be an even number, but got ", x.sizes()[D_INDEX], "."); + TORCH_CHECK(cos.sizes() == sin.sizes(), "The shape of input Tensor cos and sin should be same."); + TORCH_CHECK(mode == ROTATE_HALF || mode == ROTATE_INTERLEAVED, + "The mode of rotate shoule be 0(rotate_half) or 1(rotate_interleaved), but got ", mode, "."); + } + + void CheckRopeBackward(const at::Tensor &y_grad, const at::Tensor &cos, const at::Tensor &sin, int64_t mode) + { + TORCH_CHECK(y_grad.scalar_type() == at::ScalarType::Half || y_grad.scalar_type() == at::ScalarType::BFloat16 || + y_grad.scalar_type() == at::ScalarType::Float, + "Input tensor y_grad dtype [", y_grad.scalar_type(), + "] is invalid, should be float32, float16 or bfloat16"); + TORCH_CHECK(cos.scalar_type() == at::ScalarType::Half || cos.scalar_type() == at::ScalarType::BFloat16 || + cos.scalar_type() == at::ScalarType::Float, + "Input tensor cos dtype [", cos.scalar_type(), + "] is invalid, should be float32, float16 or bfloat16"); + TORCH_CHECK(sin.scalar_type() == at::ScalarType::Half || sin.scalar_type() == at::ScalarType::BFloat16 || + sin.scalar_type() == at::ScalarType::Float, + "Input tensor sin dtype [", sin.scalar_type(), + "] is invalid, should be float32, float16 or bfloat16"); + TORCH_CHECK(y_grad.dim() == DIMS, + "The dims of input y_grad should be 4 dimensional, bug got ", y_grad.dim(), "-dimensional."); + TORCH_CHECK(cos.dim() == DIMS, + "The dims of input cos should be 4 dimensional, bug got ", cos.dim(), "-dimensional."); + TORCH_CHECK(sin.dim() == DIMS, + "The dims of input sin should be 4 dimensional, bug got ", sin.dim(), "-dimensional."); + TORCH_CHECK(y_grad.sizes()[D_INDEX] % EVEN_DIM_CHECK == 0, + "The head_dim length of input must be an even number, but got ", y_grad.sizes()[D_INDEX], "."); + TORCH_CHECK(cos.sizes() == sin.sizes(), "The shape of input Tensor cos and sin should be same."); + TORCH_CHECK(mode == ROTATE_HALF || mode == ROTATE_INTERLEAVED, + "The mode of rotate shoule be 0(rotate_half) or 1(rotate_interleaved), but got ", mode, "."); + // when need to compute dcos and dsin, B * N < 1024 + if (cos.requires_grad() == true && sin.requires_grad() == true) { + bool check_support = true; + int64_t broadcast_dim_num = 1; + for (int64_t i = 0; i < y_grad.dim(); i++) { + if (y_grad.sizes()[i] != cos.sizes()[i]) { + broadcast_dim_num = broadcast_dim_num * y_grad.sizes()[i]; + } + if (broadcast_dim_num > BROADCAST_LIMIT) { + check_support = false; + break; + } + } + TORCH_CHECK(check_support == true, + "The broadcast shape: [", broadcast_dim_num, "] > 1024 is too large, do not support in backward function."); + } + } + + class NPURotaryPositionEmbedding : public torch::autograd::Function { + public: + static at::Tensor forward(AutogradContext *ctx, const at::Tensor &x, const at::Tensor &cos, const at::Tensor &sin, c10::optional mode) + { + at::AutoDispatchBelowADInplaceOrView guard; + int64_t mode_value = mode.value_or(ROTATE_HALF); + CheckRopeFroward(x, cos, sin, mode_value); + + at::Tensor y = at::empty(x.sizes(), x.options()); + ACLNN_CMD(aclnnRotaryPositionEmbedding, x, cos, sin, mode_value, y); + + if (cos.requires_grad() == true && sin.requires_grad() == true) { + ctx->save_for_backward({x, cos, sin}); + } else { + ctx->save_for_backward({at::Tensor(), cos, sin}); + } + ctx->saved_data["mode"] = mode_value; + return y; + } + + static std::vector backward(AutogradContext *ctx, std::vector grad_output) + { + auto mode_value = ctx->saved_data["mode"].toInt(); + auto saved_vars = ctx->get_saved_variables(); + auto dy = grad_output[0]; + auto x = saved_vars[0]; + auto cos = saved_vars[1]; + auto sin = saved_vars[2]; + CheckRopeBackward(dy, cos, sin, mode_value); + + at::Tensor dx = at::empty(dy.sizes(), dy.options()); + at::Tensor dcos, dsin; + if (cos.requires_grad() == true && sin.requires_grad() == true) { + dcos = at::empty(cos.sizes(), cos.options()); + dsin = at::empty(sin.sizes(), sin.options()); + } else { + dcos = at::empty({0}, cos.options()); + dsin = at::empty({0}, sin.options()); + } + ACLNN_CMD(aclnnRotaryPositionEmbeddingGrad, dy, cos, sin, x, mode_value, dx, dcos, dsin); + return {dx, dcos, dsin, at::Tensor()}; + } + }; +} // namespace + +at::Tensor npu_rotary_position_embedding(const at::Tensor &x, const at::Tensor &cos, const at::Tensor &sin, int64_t mode) +{ + return NPURotaryPositionEmbedding::apply(x, cos, sin, mode); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) +{ + m.def("npu_rotary_position_embedding", &npu_rotary_position_embedding, + "rotary position embedding, mode 0: GPT-NeoX style, mode 1: GPT-J style", + pybind11::arg("x"), pybind11::arg("cos"), pybind11::arg("sin"), pybind11::arg("mode") = 0); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/quant_gmm.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/quant_gmm.cpp new file mode 100644 index 000000000..0a79fdfde --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/quant_gmm.cpp @@ -0,0 +1,97 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include +#include + +#include "inc/aclnn_common.h" + +using npu_preparation = at_npu::native::OpPreparation; + +namespace op_infer { +constexpr int SIZE = 8; + +c10::SmallVector array_to_small_vector(c10::IntArrayRef shape) +{ + c10::SmallVector small_shape; + for (size_t i = 0; i < shape.size(); ++i) { + small_shape.emplace_back(shape[i]); + } + return small_shape; +} +} + +std::vector npu_quant_gmm(const std::vector& x, + const std::vector& weight, + const std::vector& scale, + const std::vector& offset, + const std::vector& per_token_scale, + const std::vector& bias, + const c10::optional& group_list, + c10::optional group_list_type, + c10::optional output_dtype, + c10::optional act_type) +{ + TORCH_CHECK(x[0].scalar_type() == at::kChar && weight[0].scalar_type() == at::kChar, + "Input x and weight dtype must be int8."); + auto group_list_real = group_list.value_or(at::Tensor()); + int64_t split_item_value = 3; + int64_t group_type_value = 0; + int64_t group_list_type_value = group_list_type.value_or(0); + int64_t act_type_value = act_type.value_or(0); + + const at::TensorList x_(x); + const at::TensorList weight_(weight); + const at::TensorList bias_(bias); + const at::TensorList scale_(scale); + const at::TensorList offset_(offset); + const at::TensorList per_token_scale_(per_token_scale); + + c10::TensorOptions options; + int64_t output_dtype_value = output_dtype.value_or(0); + if (output_dtype_value == 1) { + options = x[0].options().dtype(at::kBFloat16); + } else if (output_dtype_value == 0) { + options = x[0].options().dtype(at::kHalf); + } else if (output_dtype_value == -1) { + options = x[0].options().dtype(at::kChar); + } else { + AT_ERROR("Value of output_dtype must be one of -1/0/1, but got ", output_dtype_value); + } + + size_t dim_num_w = weight[0].sizes().size(); + auto output_size = op_infer::array_to_small_vector({x[0].sizes()[0], weight[0].sizes()[dim_num_w - 1]}); + std::vector y{at::empty(output_size, options)}; + at::TensorList result = at::TensorList(y); + auto antiquant_scale = nullptr; + auto antiquant_offset = nullptr; + auto act_in = nullptr; + auto act_quant_scale = nullptr; + auto act_quant_offset = nullptr; + auto act_out = nullptr; + auto dynamic_quant_scale_out = nullptr; + ACLNN_CMD(aclnnGroupedMatmulV4, x_, weight_, bias_, scale_, offset_, antiquant_scale, + antiquant_offset, per_token_scale_, group_list_real, act_in, act_quant_scale, act_quant_offset, + split_item_value, group_type_value, group_list_type_value, act_type_value, + result, act_out, dynamic_quant_scale_out); + + return y; +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("npu_quant_gmm", &npu_quant_gmm, "quantize grouped matmul forward"); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/cann/weight_quant_gmm.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/cann/weight_quant_gmm.cpp new file mode 100644 index 000000000..9157e6223 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/cann/weight_quant_gmm.cpp @@ -0,0 +1,83 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include +#include + +#include "inc/aclnn_common.h" + +using npu_preparation = at_npu::native::OpPreparation; + +namespace op_infer { +constexpr int SIZE = 8; + +c10::SmallVector array_to_small_vector(c10::IntArrayRef shape) +{ + c10::SmallVector small_shape; + for (size_t i = 0; i < shape.size(); ++i) { + small_shape.emplace_back(shape[i]); + } + return small_shape; +} +} + +std::vector npu_weight_quant_gmm(const std::vector& x, + const std::vector& weight, + const std::vector& antiquant_scale, + const std::vector& antiquant_offset, + const std::vector& bias, + const c10::optional& group_list, + c10::optional group_list_type, + c10::optional act_type) +{ + auto group_list_real = group_list.value_or(at::Tensor()); + int64_t split_item_value = 3; + int64_t group_type_value = 0; + int64_t group_list_type_value = group_list_type.value_or(0); + int64_t act_type_value = act_type.value_or(0); + + const at::TensorList x_(x); + const at::TensorList weight_(weight); + const at::TensorList bias_(bias); + const at::TensorList antiquant_scale_(antiquant_scale); + const at::TensorList antiquant_offset_(antiquant_offset); + + c10::TensorOptions options = x_[0].options().dtype(x_[0].scalar_type()); + + size_t dim_num_w = weight[0].sizes().size(); + auto output_size = op_infer::array_to_small_vector({x[0].sizes()[0], weight[0].sizes()[dim_num_w - 1]}); + std::vector y{at::empty(output_size, options)}; + at::TensorList result = at::TensorList(y); + auto scale = nullptr; + auto offset = nullptr; + auto per_token_scale = nullptr; + auto act_in = nullptr; + auto act_quant_scale = nullptr; + auto act_quant_offset = nullptr; + auto act_out = nullptr; + auto dynamic_quant_scale_out = nullptr; + ACLNN_CMD(aclnnGroupedMatmulV4, x_, weight_, bias_, scale, offset, antiquant_scale_, + antiquant_offset_, per_token_scale, group_list_real, act_in, act_quant_scale, act_quant_offset, + split_item_value, group_type_value, group_list_type_value, act_type_value, + result, act_out, dynamic_quant_scale_out); + + return y; +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("npu_weight_quant_gmm", &npu_weight_quant_gmm, "weight quantize grouped matmul forward"); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/flop_counter/flop_counter.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/flop_counter/flop_counter.cpp new file mode 100644 index 000000000..40318188d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/flop_counter/flop_counter.cpp @@ -0,0 +1,363 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "torch_npu/csrc/core/npu/NPUException.h" + +#include "flop_counter.h" + + +int64_t FlopCounter::mm_flop(const at::Tensor &tensor1, const at::Tensor &tensor2) +{ + // Count flops for matmul. + // Inputs contains the shapes of two matrices. + auto dim_tensor1 = tensor1.dim(); + auto dim_tensor2 = tensor2.dim(); + TORCH_CHECK(dim_tensor1 > 0 && dim_tensor2 > 0, "matmul got error dimentions: ", "(", dim_tensor1, ", ", + dim_tensor2, ")"); + // A(x1, m, k1) and B(x2, k2, n) + // Get x1 and x2's infer sizes + auto x1_size = dim_tensor1 > 2 ? dim_tensor1 - 2 : 0; + auto x2_size = dim_tensor2 > 2 ? dim_tensor2 - 2 : 0; + at::IntArrayRef x1_sizes(tensor1.sizes().data(), x1_size); + at::IntArrayRef x2_sizes(tensor2.sizes().data(), x2_size); + std::vector output_size = at::infer_size(x1_sizes, x2_sizes); + + // Get m + if (dim_tensor1 >= 2) { + output_size.push_back(tensor1.size(-2)); + } + // Get n + if (dim_tensor2 >= 2) { + output_size.push_back(tensor2.size(-1)); + } + // Get k1 and k2 + int64_t k = tensor1.size(-1); + // Compute + int64_t flop = 2 * k; + for (const auto& elem : output_size) { + flop *= elem; + } + + return flop; +} + +int64_t FlopCounter::coc_flop(const at::Tensor &tensor1, const at::Tensor &tensor2, bool trans, int rankSize, bool is_ag_mm) +{ + // Count flops for coc. + at::Tensor tensor2_transposed; + if (trans) { + tensor2_transposed = at::transpose(tensor2, 0, 1); + } else { + tensor2_transposed = tensor2; + } + int64_t total_flops = FlopCounter::mm_flop(tensor1, tensor2_transposed); + return is_ag_mm ? total_flops * rankSize : total_flops; +} + +int64_t FlopCounter::bmm_flop(const at::Tensor &self, const at::Tensor &mat2) +{ + // Count flops for the bmm operation. + // Inputs should be a list of length 2. + // Inputs contains the shapes of two tensor. + int64_t b = self.size(0); + int64_t m = self.size(1); + int64_t k = self.size(2); + int64_t b2 = mat2.size(0); + int64_t k2 = mat2.size(1); + int64_t n = mat2.size(2); + TORCH_CHECK(b == b2 && k == k2, "The tensor dimension is incorrect"); + return b * m * n * 2 * k; +} + +std::vector, std::vector, std::vector, std::vector>> _unpack_flash_attention_nested_shapes(std::vector query, + std::vector key, std::vector value, int64_t head_num, std::vector grad_out, + std::vector cum_seq_q, std::vector cum_seq_k, std::string input_layer_str) +{ + // Given inputs to a flash_attention_(forward|backward) kernel, this will handle behavior for + // GQA and MQA and TND + + // for GQA and MQA, the dim 2 or 3 of kv should equal to q + // for general, shape should view to [B, N, S, D] + + std::vector, std::vector, std::vector, std::vector>> result; + int64_t q_0 = query[0]; + int64_t q_1 = query[1]; + int64_t q_2 = query[2]; + int64_t q_3 = query[3]; + int64_t k_0 = key[0]; + int64_t k_1 = key[1]; + int64_t k_2 = key[2]; + int64_t k_3 = key[3]; + int64_t v_0 = value[0]; + int64_t v_1 = value[1]; + int64_t v_2 = value[2]; + int64_t v_3 = value[3]; + + // for GQA and MQA + if (input_layer_str == "SBH" || input_layer_str == "BSH" || input_layer_str == "BSND") { + if (q_2 != k_2 && q_2!= v_2) { + k_2 = q_2; + v_2 = q_2; + } + } else { + if (q_1 != k_1 && q_1!= v_1) { + k_1 = q_1; + v_1 = q_1; + } + } + + std::vector new_query_shape; + std::vector new_key_shape; + std::vector new_value_shape; + std::vector new_grad_out_shape; + if (input_layer_str == "BSH") { + new_query_shape = {q_0, head_num, q_1, q_2/head_num}; + new_key_shape = {k_0, head_num, k_1, k_2/head_num}; + new_value_shape = {v_0, head_num, v_1, v_2/head_num}; + } else if (input_layer_str == "SBH") { + new_query_shape = {q_1, head_num, q_0, q_2/head_num}; + new_key_shape = {k_1, head_num, k_0, k_2/head_num}; + new_value_shape = {v_1, head_num, v_0, v_2/head_num}; + } else if (input_layer_str == "BSND") { + new_query_shape = {q_0, q_2, q_1, q_3}; + new_key_shape = {k_0, k_2, k_1, k_3}; + new_value_shape = {v_0, v_2, v_1, v_3}; + } else if (input_layer_str == "TND") { + TORCH_CHECK(!cum_seq_q.empty(), "The actual_seq_qlen is not empty when TND"); + TORCH_CHECK(!cum_seq_k.empty(), "The actual_seq_kvlen is not empty when TND"); + TORCH_CHECK(cum_seq_q.size() == cum_seq_k.size(), "The size of actual_seq_qlen is equal actual_seq_kvlen when TND"); + + int64_t b = cum_seq_q.size(); + new_query_shape = {b, q_1, q_0/b, q_2}; + new_key_shape = {b, k_1, k_0/b, k_2}; + new_value_shape = {b, v_1, v_0/b, v_2}; + } + + if (!grad_out.empty()) { + new_grad_out_shape = new_query_shape; + } + result.emplace_back(new_query_shape, new_key_shape, new_value_shape, new_grad_out_shape); + return result; +} + +int64_t sdpa_flop_count(const std::vector query_shape, const std::vector key_shape, const std::vector value_shape) +{ + int64_t b, h, s_q, d_q; + int64_t _b2, _h2, s_k, _d2; + int64_t _b3, _h3, _s3, d_v; + + b = query_shape[0]; + h = query_shape[1]; + s_q = query_shape[2]; + d_q = query_shape[3]; + + _b2 = key_shape[0]; + _h2 = key_shape[1]; + s_k = key_shape[2]; + _d2 = key_shape[3]; + + _b3 = value_shape[0]; + _h3 = value_shape[1]; + _s3 = value_shape[2]; + d_v = value_shape[3]; + + TORCH_CHECK(b == _b2 && b == _b3, "the dim of 0 is not equal between q and kv"); + TORCH_CHECK(h == _h2 && h == _h3, "the dim of 1 is not equal between q and kv"); + TORCH_CHECK(s_k == _s3, "the dim of 2 is not equal between k and v"); + TORCH_CHECK(d_q == _d2, "the dim of 3 is not equal between q and k"); + + int64_t total_flops = 0; + + // q: [b, h, s_q, d_q] @ k: [b, h, d_q, s_k] -> scores: [b, h, s_q, s_k] + total_flops += b * h * s_q * d_q * s_k * 2; + + // scores: [b, h, s_q, s_k] @ v: [b, h, s_k, d_v] -> out: [b, h, s_q, d_v] + total_flops += b * h * s_q * s_k * d_v * 2; + + return total_flops; +} + +int64_t sdpa_backward_flop_count(const std::vector query_shape, const std::vector key_shape, const std::vector value_shape, const std::vector grad_out_shape) +{ + int64_t b, h, s_q, d_q; + int64_t _b2, _h2, s_k, _d2; + int64_t _b3, _h3, _s3, d_v; + int64_t _b4, _h4, _s4, d_4; + + b = query_shape[0]; + h = query_shape[1]; + s_q = query_shape[2]; + d_q = query_shape[3]; + + _b2 = key_shape[0]; + _h2 = key_shape[1]; + s_k = key_shape[2]; + _d2 = key_shape[3]; + + _b3 = value_shape[0]; + _h3 = value_shape[1]; + _s3 = value_shape[2]; + d_v = value_shape[3]; + + _b4 = grad_out_shape[0]; + _h4 = grad_out_shape[1]; + _s4 = grad_out_shape[2]; + d_4 = grad_out_shape[3]; + + TORCH_CHECK(b == _b2 && b == _b3 && b == _b4, "the dim of 0 is not equal between qkv and grad"); + TORCH_CHECK(h == _h2 && h == _h3 && h == _h4, "the dim of 1 is not equal between qkv and grad"); + TORCH_CHECK(s_k == _s3, "the dim of 2 is not equal between k and v"); + TORCH_CHECK(s_q == _s4, "the dim of 2 is not equal between q and grad"); + TORCH_CHECK(d_q == _d2, "the dim of 3 is not equal between q and k"); + TORCH_CHECK(d_v == d_4, "the dim of 3 is not equal between v and grad"); + + int64_t total_flops = 0; + + // gradOut: [b, h, s_q, d_v] @ v: [b, h, d_v, s_k] -> gradScores: [b, h, s_q, s_k] + total_flops += b * h * s_q * d_v * s_k * 2; + + // scores: [b, h, s_k, s_q] @ gradOut: [b, h, s_q, d_v] -> gradV: [b, h, s_k, d_v] + total_flops += b * h * s_k * s_q * d_v * 2; + + // gradScores: [b, h, s_q, s_k] @ k: [b, h, s_k, d_q] -> gradQ: [b, h, s_q, d_q] + total_flops += b * h * s_q * s_k * d_q * 2; + + // q: [b, h, d_q, s_q] @ gradScores: [b, h, s_q, s_k] -> gradK: [b, h, d_q, s_k] + total_flops += b * h * d_q * s_q * s_k * 2; + + return total_flops; +} + +int64_t FlopCounter::flash_attention_forward_flop( + const at::Tensor &query, const at::Tensor &key, const at::Tensor &value, int64_t head_num, + const std::string &input_layout, const c10::optional> &actual_seq_qlen, + const c10::optional> &actual_seq_kvlen) +{ + std::vector grad_out_shape; + std::vector query_shape(query.sizes().begin(), query.sizes().end()); + std::vector key_shape(key.sizes().begin(), key.sizes().end()); + std::vector value_shape(value.sizes().begin(), value.sizes().end()); + auto ac_seq_qlen_tmp = actual_seq_qlen.value_or(std::vector{}); + auto ac_seq_kvlen_tmp = actual_seq_kvlen.value_or(std::vector{}); + + auto sizes = _unpack_flash_attention_nested_shapes(query_shape, key_shape, value_shape, head_num, grad_out_shape, ac_seq_qlen_tmp, ac_seq_kvlen_tmp, input_layout); + + int64_t total_flops = 0; + for (const auto& [query_shape_new, key_shape_new, value_shape_new, _] : sizes) { + total_flops += sdpa_flop_count(query_shape_new, key_shape_new, value_shape_new); + } + return total_flops; +} + +int64_t FlopCounter::flash_attention_backward_flop( + const at::Tensor &query, const at::Tensor &key, const at::Tensor &value, const at::Tensor &dy, int64_t head_num, + const std::string &input_layout, const c10::optional> &actual_seq_qlen, + const c10::optional> &actual_seq_kvlen) +{ + std::vector dy_shape(query.sizes().begin(), query.sizes().end()); + std::vector query_shape(query.sizes().begin(), query.sizes().end()); + std::vector key_shape(key.sizes().begin(), key.sizes().end()); + std::vector value_shape(value.sizes().begin(), value.sizes().end()); + auto ac_seq_qlen_tmp = actual_seq_qlen.value_or(std::vector{}); + auto ac_seq_kvlen_tmp = actual_seq_kvlen.value_or(std::vector{}); + + auto sizes = _unpack_flash_attention_nested_shapes(query_shape, key_shape, value_shape, head_num, dy_shape, ac_seq_qlen_tmp, ac_seq_kvlen_tmp, input_layout); + + int64_t total_flops = 0; + for (const auto& [query_shape_new, key_shape_new, value_shape_new, grad_out_shape] : sizes) { + total_flops += sdpa_backward_flop_count(query_shape_new, key_shape_new, value_shape_new, grad_out_shape); + } + return total_flops; +} + +int64_t FlopCounter::gmm_flop_int(const at::TensorList &x, const at::TensorList &weight, c10::optional> group_list, int64_t group_type_value) +{ + int64_t total_flops = 0; + + std::vector x_shape(x[0].sizes().begin(), x[0].sizes().end()); + std::vector weight_shape(weight[0].sizes().begin(), weight[0].sizes().end()); + auto group_list_real_ = group_list.value_or(std::vector{}); + at::IntArrayRef group_list_real(group_list_real_); + + int64_t before_i = 0; + + if (group_type_value == 0) { + for (int64_t i = 0; i < group_list_real.size(); i++) { + int64_t after_i = group_list_real[i]; + total_flops += (after_i - before_i) * x_shape.back() * weight_shape.back() * 2; + before_i = after_i; + } + } + + if (group_type_value == 2) { + for (int64_t i = 0; i < group_list_real.size(); i++) { + int64_t after_i = group_list_real[i]; + total_flops += x_shape.front() * (after_i - before_i) * weight_shape.back() * 2; + before_i = after_i; + } + } + + return total_flops; +} + +int64_t FlopCounter::gmm_flop_tensor(const at::TensorList &x, const at::TensorList &weight, const c10::optional &group_list, int64_t group_type_value) +{ + int64_t total_flops = 0; + + std::vector x_shape(x[0].sizes().begin(), x[0].sizes().end()); + std::vector weight_shape(weight[0].sizes().begin(), weight[0].sizes().end()); + auto group_list_real = group_list.value_or(at::Tensor()); + auto num_elements = group_list_real.numel(); + + int64_t before_i = 0; + + if (group_type_value == 0) { + for (int64_t i = 0; i < num_elements; i++) { + int64_t after_i = group_list_real[i].item(); + total_flops += (after_i - before_i) * x_shape.back() * weight_shape.back() * 2; + before_i = after_i; + } + } + + if (group_type_value == 2) { + for (int64_t i = 0; i < num_elements; i++) { + int64_t after_i = group_list_real[i].item(); + total_flops += x_shape.front() * (after_i - before_i) * weight_shape.back() * 2; + before_i = after_i; + } + } + + return total_flops; +} + +int64_t FlopCounter::gmm_add_flop(const at::Tensor &x, const at::Tensor &weight, const at::Tensor &group_list) +{ + int64_t total_flops = 0; + + std::vector x_shape(x.sizes().begin(), x.sizes().end()); + std::vector weight_shape(weight.sizes().begin(), weight.sizes().end()); + auto num_elements = group_list.numel(); + + int64_t before_i = 0; + + for (int64_t i = 0; i < num_elements; i++) { + int64_t after_i = group_list[i].item(); + total_flops += x_shape.back() * (after_i - before_i) * weight_shape.back() * 2; + before_i = after_i; + } + + return total_flops; +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/flop_counter/flop_counter.h b/model/train/yoco_moe/mindspeed/ops/csrc/flop_counter/flop_counter.h new file mode 100644 index 000000000..dde135d45 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/flop_counter/flop_counter.h @@ -0,0 +1,44 @@ +/****************************************************************************** + * Copyright (c) 2024 Huawei Technologies Co., Ltd + * All rights reserved. + * + * Licensed under the BSD 3-Clause License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSD-3-Clause + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef FLOP_COUNTER_MS_H +#define FLOP_COUNTER_MS_H + +#include +#include + +class FlopCounter { +public: + FlopCounter() = default; + ~FlopCounter() = default; + + static int64_t mm_flop(const at::Tensor &tensor1, const at::Tensor &tensor2); + static int64_t coc_flop(const at::Tensor &tensor1, const at::Tensor &tensor2, bool trans, int rankSize, bool is_ag_mm); + static int64_t bmm_flop(const at::Tensor &self, const at::Tensor &mat2); + static int64_t flash_attention_forward_flop(const at::Tensor &query, const at::Tensor &key, const at::Tensor &value, + int64_t head_num, const std::string &input_layout, const c10::optional> &actual_seq_qlen, + const c10::optional> &actual_seq_kvlen); + static int64_t flash_attention_backward_flop(const at::Tensor &query, const at::Tensor &key, const at::Tensor &value, + const at::Tensor &dy, int64_t head_num, const std::string &input_layout, + const c10::optional> &actual_seq_qlen, + const c10::optional> &actual_seq_kvlen); + static int64_t gmm_flop_int(const at::TensorList &x, const at::TensorList &weight, c10::optional> group_list, int64_t group_type_value); + static int64_t gmm_flop_tensor(const at::TensorList &x, const at::TensorList &weight, const c10::optional &group_list, int64_t group_type_value); + static int64_t gmm_add_flop(const at::Tensor &x, const at::Tensor &weight, const at::Tensor &group_list); +}; + +#endif // FLOP_COUNTER_MS_H \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/adaptive_recomputing/NpuCachingCustomAllocator.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/adaptive_recomputing/NpuCachingCustomAllocator.cpp new file mode 100644 index 000000000..fb0724c7c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/adaptive_recomputing/NpuCachingCustomAllocator.cpp @@ -0,0 +1,219 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "NpuCachingCustomAllocator.h" + +std::mutex *NpuCachingCustomAllocator::getFreeMutex() const { + static std::mutex npu_free_mutex; + return &npu_free_mutex; +} + +Block *NpuCachingCustomAllocator::get_allocated_block(void *ptr, bool remove) { + std::lock_guard lock(mutex); + auto it = allocated_blocks.find(ptr); + if (it == allocated_blocks.end()) { + return nullptr; + } + Block *block = it->second; + if (remove) { + allocated_blocks.erase(it); + } + return block; +} + +void NpuCachingCustomAllocator::init(int device_count) { + int max_device_count = 1000000; + TORCH_INTERNAL_ASSERT(device_count < max_device_count, "Error, out of maximum device"); + int size = static_cast(device_allocator.size()); + if (size < device_count) { + device_allocator.resize(device_count); + for (const auto i : c10::irange(size, device_count)) { + device_allocator[i] = std::make_unique(); + } + } +} + +bool NpuCachingCustomAllocator::initialized() { return !device_allocator.empty(); } + +/** allocates a block which is safe to use from the provided stream */ +void *NpuCachingCustomAllocator::malloc(int device, size_t size, aclrtStream stream) { + TORCH_INTERNAL_ASSERT( + 0 <= device && static_cast(device) < device_allocator.size(), + "device index out of range."); + Block *block = device_allocator[device]->malloc(device, size, stream); + add_allocated_block(block); + void *devPtr = static_cast(block->ptr); + return devPtr; +} + +void NpuCachingCustomAllocator::free(void *ptr) { + if (!ptr) { + return; + } + Block *block = get_allocated_block(ptr, true); + if (!block) { + AT_ERROR("invalid device pointer: ", ptr); + } + TORCH_INTERNAL_ASSERT( + 0 <= block->device && static_cast(block->device) < device_allocator.size(), + "device index out of range."); + device_allocator[block->device]->free(block); +} + +void NpuCachingCustomAllocator::emptyCache(bool check_error) { + int count = static_cast(device_allocator.size()); + for (int i = 0; i < count; i++) device_allocator[i]->emptyCache(check_error); +} + +void NpuCachingCustomAllocator::assertValidDevice(int device) { + int device_num = c10_npu::device_count(); + AT_ASSERTM(0 <= device && device < device_num, "Invalid device argument."); +} + +DeviceStats NpuCachingCustomAllocator::getDeviceStats(int device) { + assertValidDevice(device); + return device_allocator[device]->getStats(); +} + +void NpuCachingCustomAllocator::resetPeakStats(int device) { + assertValidDevice(device); + device_allocator[device]->resetPeakStats(); +} + +std::string NpuCachingCustomAllocator::name() { return "native"; } + +void CachingAllocatorConfig::lexArgs(const char *env, std::vector &config) { + std::vector buf; + + size_t env_length = strlen(env); + for (size_t i = 0; i < env_length; i++) { + if (env[i] == ',' || env[i] == ':' || env[i] == '[' || env[i] == ']') { + if (!buf.empty()) { + config.emplace_back(buf.begin(), buf.end()); + buf.clear(); + } + config.emplace_back(1, env[i]); + } else if (env[i] != ' ') { + buf.emplace_back(static_cast(env[i])); + } + } + if (!buf.empty()) { + config.emplace_back(buf.begin(), buf.end()); + } +} + +void CachingAllocatorConfig::consumeToken(const std::vector &config, size_t i, const char c) { + TORCH_CHECK(i < config.size() && config[i].compare(std::string(1, c)) == 0, + "Error parsing CachingAllocator settings, expected ", c); +} + +size_t CachingAllocatorConfig::parseMaxSplitSize(const std::vector &config, size_t i) { + consumeToken(config, ++i, ':'); + if (++i < config.size()) { + size_t val1 = 0; + try{ + val1 = static_cast(stoi(config[i])); + } catch (const std::invalid_argument& e){ + TORCH_CHECK(false, "Error, expecting digit string in config"); + } catch (const std::out_of_range& e){ + TORCH_CHECK(false, "Error, out of int range"); + } + TORCH_CHECK(val1 > kLargeBuffer / kUnitMB, "CachingAllocator option max_split_size_mb too small, must be > ", + kLargeBuffer / kUnitMB); + val1 = std::max(val1, kLargeBuffer / kUnitMB); + val1 = std::min(val1, (std::numeric_limits::max() / kUnitMB)); + m_max_split_size = val1 * kUnitMB; + } else { + TORCH_CHECK(false, "Error, expecting max_split_size_mb value"); + } + return i; +} + +size_t CachingAllocatorConfig::parseGarbageCollectionThreshold(const std::vector &config, size_t i) { + consumeToken(config, ++i, ':'); + if (++i < config.size()) { + double val1 = 0.0; + try { + val1 = stod(config[i]); + } catch (const std::invalid_argument& e){ + TORCH_CHECK(false, "Error, expecting digital string in config"); + } catch (const std::out_of_range& e) { + TORCH_CHECK(false, "Error, out of double range"); + } + TORCH_CHECK(val1 > 0, "garbage_collect_threshold too small, set it 0.0~1.0"); + TORCH_CHECK(val1 < 1.0, "garbage_collect_threshold too big, set it 0.0~1.0"); + m_garbage_collection_threshold = val1; + } else { + TORCH_CHECK(false, "Error, expecting garbage_collection_threshold value"); + } + return i; +} + +size_t CachingAllocatorConfig::parseExpandableSegments(const std::vector &config, size_t i) { + consumeToken(config, ++i, ':'); + if (++i < config.size()) { + TORCH_CHECK(i < config.size() && (config[i] == "True" || config[i] == "False"), + "Expected a single True/False argument for expandable_segments"); + m_expandable_segments = (config[i] == "True"); + if (m_expandable_segments) { + void *ptr = nullptr; + constexpr size_t virtual_mem_size = 512; + auto status = aclrtReserveMemAddress(&ptr, virtual_mem_size, 0, NULL, 1); + if (status == ACL_ERROR_NONE) { + TORCH_CHECK(aclrtReleaseMemAddress(ptr) == ACL_ERROR_NONE, "aclrtReleaseMemAddress failed."); + } else { + NPU_CHECK_SUPPORT_OR_ERROR(status); + m_expandable_segments = false; + } + } + } else { + TORCH_CHECK(false, "Error, expecting expandable_segments value"); + } + return i; +} + +void CachingAllocatorConfig::parseArgs(const char *env) { + // If empty, set the default values + m_max_split_size = std::numeric_limits::max(); + m_garbage_collection_threshold = 0; + + if (env == nullptr) { + return; + } + + std::vector config; + lexArgs(env, config); + + for (size_t i = 0; i < config.size(); i++) { + if (config[i].compare("max_split_size_mb") == 0) { + i = parseMaxSplitSize(config, i); + } else if (config[i].compare("garbage_collection_threshold") == 0) { + i = parseGarbageCollectionThreshold(config, i); + } else if (config[i] == "expandable_segments") { + set_expandable_segments_flag = true; + i = parseExpandableSegments(config, i); + } else { + TORCH_CHECK(false, "Unrecognized CachingAllocator option: ", config[i]); + } + + if (i + 1 < config.size()) { + consumeToken(config, ++i, ','); + } + } + if (m_expandable_segments) { + if (set_expandable_segments_flag) { + } else if (m_max_split_size != std::numeric_limits::max() || m_garbage_collection_threshold != 0) { + m_expandable_segments = false; + } + } +} + +NpuCachingCustomAllocator my_allocator; +void local_raw_delete(void *ptr) { my_allocator.free(ptr); } \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/adaptive_recomputing/NpuCachingCustomAllocator.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/adaptive_recomputing/NpuCachingCustomAllocator.h new file mode 100644 index 000000000..21d186895 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/adaptive_recomputing/NpuCachingCustomAllocator.h @@ -0,0 +1,1593 @@ +#pragma once + +#include +#include +#include + +#include "torch_npu/csrc/core/npu/NPUCachingAllocator.h" + +#include +#include +#include + +#include "acl_base.h" +#include "acl_rt.h" +#include "torch_npu/csrc/core/npu/NPUBlockHandle.h" +#include "torch_npu/csrc/core/npu/NPUEvent.h" +#include "torch_npu/csrc/core/npu/NPUGuard.h" +#include "torch_npu/csrc/core/npu/interface/AsyncTaskQueueInterface.h" +#include "torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.h" + +#include + +#include +#include + +using c10_npu::NPUCachingAllocator::BlockInfo; +using c10_npu::NPUCachingAllocator::DeviceStats; +using c10_npu::NPUCachingAllocator::RecordContext; +using c10_npu::NPUCachingAllocator::SegmentInfo; +using c10_npu::NPUCachingAllocator::Stat; +using c10_npu::NPUCachingAllocator::StatArray; +using c10_npu::NPUCachingAllocator::StatType; +using c10_npu::NPUCachingAllocator::TraceEntry; + +using stream_set = ska::flat_hash_set; + +#define NPU_CHECK_SUPPORT_OR_ERROR(err_code, ...) \ + do { \ + auto Error = err_code; \ + static c10_npu::acl::AclErrorCode err_map; \ + if ((Error) != ACL_ERROR_NONE) { \ + if ((Error) == ACL_ERROR_RT_FEATURE_NOT_SUPPORT) { \ + static auto feature_not_support_warn_once = []() { \ + printf("[WARN]%s,%s:%u:%s\n", \ + __FUNCTION__, __FILENAME__, __LINE__, \ + "Feature is not supportted and the possible cause is" \ + " that driver and firmware packages do not match."); \ + return true; \ + }(); \ + } else { \ + TORCH_CHECK( \ + false, \ + __func__, \ + ":", \ + __FILE__, \ + ":", \ + __LINE__, \ + "\n", c10_npu::c10_npu_get_error_message()); \ + } \ + } \ + } while (0) + +typedef std::shared_ptr (*CreateContextFn)(void); + +constexpr size_t kMinBlockSize = 512; // all sizes are rounded to at least 512 bytes +constexpr size_t kSmallSize = 1048576; // largest "small" allocation is 1 MiB +constexpr size_t kSmallBuffer = 2097152; // "small" allocations are packed in 2 MiB blocks +constexpr size_t kLargeBuffer = 20971520; // "large" allocations may be packed in 20 MiB blocks +constexpr size_t kMinLargeAlloc = 10485760; // allocations between 1 and 10 MiB may use kLargeBuffer +constexpr size_t kRoundLarge = 2097152; // round up large allocs to 2 MiB +constexpr size_t kUnitMB = 1024 * 1024; // 1MiB = 1024 * 1024 bytes + +using StatTypes = std::array(StatType::NUM_TYPES)>; + +void update_stat(Stat &stat, int64_t amount) { + stat.current += amount; + stat.peak = std::max(stat.current, stat.peak); + if (amount > 0) { + stat.allocated += amount; + } + if (amount < 0) { + stat.freed += -amount; + } +} + +void reset_accumulated_stat(Stat &stat) { + stat.allocated = 0; + stat.freed = 0; +} + +void reset_peak_stat(Stat &stat) { stat.peak = stat.current; } + +template +void for_each_selected_stat_type(const StatTypes &stat_types, Func f) { + for (const auto stat_type : c10::irange(stat_types.size())) { + if (stat_types[stat_type]) { + f(stat_type); + } + } +} + +void update_stat_array(StatArray &stat_array, int64_t amount, const StatTypes &stat_types) { + for_each_selected_stat_type(stat_types, + [&stat_array, amount](size_t stat_type) { update_stat(stat_array[stat_type], amount); }); +} + +struct Block; +using Comparison = bool (*)(const Block *, const Block *); +static bool BlockComparatorSize(const Block *a, const Block *b); +static bool BlockComparatorAddress(const Block *a, const Block *b); + +struct BlockPool { + std::set blocks; + std::set unmapped; + const bool is_small; + + BlockPool(bool small) : blocks(BlockComparatorSize), unmapped(BlockComparatorAddress), is_small(small) {} +}; + +struct ExpandableSegment; + +struct Block { + int device; // npu + aclrtStream stream; // allocation stream + stream_set stream_uses; // streams on which the block was used + size_t size; // block size in bytes + size_t requested_size; // memory originally requested + BlockPool *pool; // owning memory pool + void *ptr; // memory address + bool allocated; // in-use flag + bool mapped{true}; // is the virtual address range this Block references + // backed by physical pages. Always true when + // expandable_segment_ is null. When false + // This Block will be aligned to the segment size + // of its expandable_segment_. + Block *prev; // prev block if split from a larger allocation + Block *next; // next block if split from a larger allocation + int event_count; // number of outstanding NPU events + int gc_count{0}; // counter for prioritizing older / less useful blocks for + // garbage collection + ExpandableSegment *expandable_segment_{nullptr}; + + std::shared_ptr context_when_allocated; + // only set for the first block in the segment (when prev == null) + // this records the frame information when cudaMalloc was called + // whereas context_when_allocated records the last time we handed this + // memory out from our cache. + std::shared_ptr context_when_segment_allocated; + + Block(int device, aclrtStream stream, size_t size, BlockPool *pool, void *ptr) + : device(device), + stream(stream), + stream_uses(), + size(size), + requested_size(0), + pool(pool), + ptr(ptr), + allocated(0), + prev(nullptr), + next(nullptr), + event_count(0), + gc_count(0) {} + + // constructor for search key + Block(int device, aclrtStream stream, size_t size) + : device(device), + stream(stream), + stream_uses(), + size(size), + requested_size(0), + pool(nullptr), + ptr(nullptr), + allocated(0), + prev(nullptr), + next(nullptr), + event_count(0), + gc_count(0) {} + + bool is_split() const { return (prev != nullptr) || (next != nullptr); } + + void splice(Block *before, Block *after) { + if (before) { + TORCH_INTERNAL_ASSERT(before->next == after); + before->next = this; + } + prev = before; + if (after) { + TORCH_INTERNAL_ASSERT(after->prev == before); + after->prev = this; + } + next = after; + } +}; + +struct SegmentRange { + char *ptr; + size_t size; + SegmentRange(void *p, size_t s) : ptr(static_cast(p)), size(s) {} +}; + +struct ExpandableSegment { + ExpandableSegment(int device, aclrtStream stream, size_t size) + : device_(device), + stream_(stream), + max_handles_(0), + // 2MB for small pool, 20MB for large pool + segment_size_(size) { + size_t device_free; + size_t device_total; + TORCH_CHECK(aclrtGetMemInfo(ACL_HBM_MEM, &device_free, &device_total) == ACL_ERROR_NONE, "aclrtGetMemInfo failed."); + TORCH_INTERNAL_ASSERT(device_free <= device_total); + // we allocate enough address space for 1 1/8 the total memory on the NPU. + // This allows for some cases where we have to unmap pages earlier in the + // segment to put them at the end. + constexpr size_t extra_space_factor = 8; + max_handles_ = numSegments(device_total + device_total / extra_space_factor); + TORCH_CHECK(aclrtReserveMemAddress(&ptr_, segment_size_ * max_handles_, 0, NULL, 1) == ACL_ERROR_NONE, \ + "Error, failed to reserve memory address"); + } + // begin must be aligned to segment_size_. + // returns the actual range mapped, which may be + // greater than requested if size is not aligned to segment_size_. + // return size of 0 indicates OOM + SegmentRange map(SegmentRange range) { + auto begin = segmentLeft(range.ptr); + auto end = segmentRight(range.ptr + range.size); + TORCH_INTERNAL_ASSERT(ptr() + begin * segment_size_ == range.ptr); + if (begin == end) { + return rangeFromHandles(begin, end); + } + while (end > handles_.size()) { + handles_.emplace_back(c10::nullopt); + } + for (auto i : c10::irange(begin, end)) { + TORCH_INTERNAL_ASSERT(!handles_.at(i)); + aclrtDrvMemHandle handle = nullptr; + aclrtPhysicalMemProp prop = {}; + prop.handleType = ACL_MEM_HANDLE_TYPE_NONE; + prop.allocationType = ACL_MEM_ALLOCATION_TYPE_PINNED; + prop.memAttr = ACL_HBM_MEM_HUGE; + prop.location.type = ACL_MEM_LOCATION_TYPE_DEVICE; + prop.location.id = device_; + prop.reserve = 0; + auto status = aclrtMallocPhysical(&handle, segment_size_, &prop, 0); + if (status == ACL_ERROR_RT_MEMORY_ALLOCATION) { + for (auto j : c10::irange(begin, i)) { + auto h = handles_.at(j).value(); + handles_.at(j) = c10::nullopt; + TORCH_CHECK(aclrtFreePhysical(h) == ACL_ERROR_NONE, "aclrtFreePhysical failed."); + } + trimHandles(); + return rangeFromHandles(begin, begin); + } + handles_.at(i) = handle; + } + for (auto i : c10::irange(begin, end)) { + TORCH_CHECK(aclrtMapMem(ptr_ + i * segment_size_, segment_size_, 0, handles_.at(i).value(), 0) == ACL_ERROR_NONE, \ + "Error, failed to map memory"); + } + return rangeFromHandles(begin, end); + } + + // unmaps all the completely empty segment_size_ segments between + // [begin, begin + size), returns the offset where the range begin, + // and the actual size unmapped (multiple of segment_size_) + SegmentRange unmap(SegmentRange range) { + auto begin = segmentRight(range.ptr); + auto end = segmentLeft(range.ptr + range.size); + if (begin >= end) { + return SegmentRange{range.ptr, 0}; + } + unmapHandles(begin, end); + return rangeFromHandles(begin, end); + } + + char *ptr() const { return (char *)ptr_; } + + size_t size() const { return max_handles_ * segment_size_; } + + ~ExpandableSegment() { + forEachAllocatedRange([&](size_t begin, size_t end) { unmapHandles(begin, end); }); + TORCH_CHECK(aclrtReleaseMemAddress(ptr_) == ACL_ERROR_NONE, "aclrtReleaseMemAddress failed."); + } + + private: + void unmapHandles(size_t begin, size_t end) { + // note: unlike aclrtFree, MemUnmap and MemRelease do + // not appear to synchronize in all cases, so we have to wait for the + // stream to finish before this memory is truly free. + + // cannot call c10::npu::stream_synchronize because + // it might grab the GIL which can lead to a deadlock + // Locking order must be GIL -> Allocator Lock + TORCH_CHECK(aclrtSynchronizeStream(stream_) == ACL_ERROR_NONE, "aclrtSynchronizeStream failed."); + for (auto i : c10::irange(begin, end)) { + aclrtDrvMemHandle h = handles_.at(i).value(); + handles_.at(i) = c10::nullopt; + TORCH_CHECK(aclrtUnmapMem(ptr_ + segment_size_ * i) == ACL_ERROR_NONE, "aclrtUnmapMem failed."); + TORCH_CHECK(aclrtFreePhysical(h) == ACL_ERROR_NONE, "aclrtFreePhysical failed."); + } + trimHandles(); + } + + void trimHandles() { + while (!handles_.empty() && !handles_.back()) { + handles_.pop_back(); + } + } + + void forEachAllocatedRange(std::function fn) { + auto start = 0; + for (auto i : c10::irange(handles_.size())) { + if (handles_.at(i) && (i == 0 || !handles_.at(i - 1))) { + start = i; + } + if (handles_.at(i) && (i + 1 == handles_.size() || !handles_.at(i + 1))) { + fn(start, i + 1); + } + } + } + + size_t numSegments(size_t size) { return (size + segment_size_ - 1) / segment_size_; } + + size_t segmentLeft(char *p) { + auto size = p - ptr(); + return size / segment_size_; + } + + size_t segmentRight(char *p) { + auto size = p - ptr(); + return numSegments(size); + } + + SegmentRange rangeFromHandles(size_t begin, size_t end) { + TORCH_INTERNAL_ASSERT(end >= begin); + return SegmentRange(ptr() + segment_size_ * begin, segment_size_ * (end - begin)); + } + + int device_; + aclrtStream stream_; + void *ptr_{}; + size_t max_handles_; + size_t segment_size_; + std::vector> handles_; +}; + +static bool BlockComparatorSize(const Block *a, const Block *b) { + if (a->stream != b->stream) { + return reinterpret_cast(a->stream) < reinterpret_cast(b->stream); + } + if (a->size != b->size) { + return a->size < b->size; + } + return reinterpret_cast(a->ptr) < reinterpret_cast(b->ptr); +} + +static bool BlockComparatorAddress(const Block *a, const Block *b) { + if (a->stream != b->stream) { + return reinterpret_cast(a->stream) < reinterpret_cast(b->stream); + } + return reinterpret_cast(a->ptr) < reinterpret_cast(b->ptr); +} + +inline std::string format_size(uint64_t size) { + std::ostringstream os; + os.precision(2); + os << std::fixed; + if (size <= 1024) { + os << size << " bytes"; + } else if (size <= 1048576) { + os << (size / 1024.0); + os << " KiB"; + } else if (size <= 1073741824ULL) { + os << (size / 1048576.0); + os << " MiB"; + } else { + os << (size / 1073741824.0); + os << " GiB"; + } + return os.str(); +} + +struct AllocParams { + AllocParams(int device, size_t size, aclrtStream stream, BlockPool *pool, size_t alloc_size, DeviceStats &stats) + : search_key(device, stream, size), pool(pool), alloc_size(alloc_size), block(nullptr), err(ACL_ERROR_NONE) {} + + int device() const { return search_key.device; } + aclrtStream stream() const { return search_key.stream; } + size_t size() const { return search_key.size; } + + Block search_key; + BlockPool *pool; + size_t alloc_size; + Block *block; + StatTypes stat_types = {false}; + aclError err; +}; + +class EventPool { + public: + using Event = std::unique_ptr>; + // Explicit device count + EventPool() : pools_(c10_npu::device_count()) {} + + Event get(int device) { + TORCH_INTERNAL_ASSERT(0 <= device); + TORCH_INTERNAL_ASSERT(device < static_cast(pools_.size())); + auto &pool = pools_[device]; + auto destructor = [&pool](c10_npu::NPUEvent *event) { + std::lock_guard g(pool.mutex_); + pool.event_pool_.push_back(std::unique_ptr(event)); + }; + + // Try to acquire an event from the per-device pool. + { + std::lock_guard g(pool.mutex_); + if (!pool.event_pool_.empty()) { + auto *event = pool.event_pool_.back().release(); + pool.event_pool_.pop_back(); + return Event(event, destructor); + } + } + // otherwise, allocate a new event that will be returned to the pool on + // destruction. + return Event(std::make_unique(ACL_EVENT_CAPTURE_STREAM_PROGRESS).release(), destructor); + } + + void empty_cache() { + for (auto &pool : pools_) { + std::lock_guard g(pool.mutex_); + pool.event_pool_.clear(); + } + } + + private: + struct PerDevicePool { + alignas(64) std::mutex mutex_; + std::vector> event_pool_; + }; + std::vector pools_; +}; + +class CachingAllocatorConfig { + public: + static size_t max_split_size() { return instance().m_max_split_size; } + + static double garbage_collection_threshold() { return instance().m_garbage_collection_threshold; } + + static bool expandable_segments() { return instance().m_expandable_segments; } + + static CachingAllocatorConfig &instance() { + static CachingAllocatorConfig *s_instance = ([]() { + auto inst = new CachingAllocatorConfig(); + const char *env = getenv("PYTORCH_NPU_ALLOC_CONF"); + inst->parseArgs(env); + return inst; + })(); + return *s_instance; + } + + void parseArgs(const char *env); + + private: + size_t m_max_split_size; + double m_garbage_collection_threshold; + bool m_expandable_segments; + bool set_expandable_segments_flag = false; + + CachingAllocatorConfig() + : m_max_split_size(std::numeric_limits::max()), + m_garbage_collection_threshold(0), + m_expandable_segments(true) { + void *ptr = nullptr; + constexpr size_t virtual_mem_size = 512; + auto status = aclrtReserveMemAddress(&ptr, virtual_mem_size, 0, NULL, 1); + if (status == ACL_ERROR_NONE) { + TORCH_CHECK(aclrtReleaseMemAddress(ptr) == ACL_ERROR_NONE, "aclrtReleaseMemAddress failed."); + } else { + m_expandable_segments = false; + } + } + + void lexArgs(const char *env, std::vector &config); + void consumeToken(const std::vector &config, size_t i, const char c); + size_t parseMaxSplitSize(const std::vector &config, size_t i); + size_t parseGarbageCollectionThreshold(const std::vector &config, size_t i); + size_t parseExpandableSegments(const std::vector &config, size_t i); +}; + +class DeviceCachingAllocator { + private: + // lock around all operations + mutable std::recursive_mutex mutex; + + // device statistics + DeviceStats stats; + + // unallocated cached blocks larger than 1 MB + BlockPool large_blocks; + + // unallocated cached blocks 1 MB or smaller + BlockPool small_blocks; + + // allocated or in use by a stream + ska::flat_hash_set active_blocks; + + // outstanding acl events + ska::flat_hash_map>> npu_events; + + // record used memory. + size_t total_allocated_memory = 0; + + // record maximum allowed memory. + size_t allowed_memory_maximum = 0; + + // all live expandable segments + std::vector expandable_segments_; + + bool set_fraction = false; + + bool record_history = false; + + std::atomic context_recorder_; + size_t alloc_trace_next = 0; + RecordContext record_context_ = RecordContext::NEVER; + size_t alloc_trace_max_entries_ = 1; + std::vector *alloc_trace; // pointer because we need to intentionally leak this on + // deallocation it can hold references to Python state which + // will already be destroyed when we are in exit handlers + + public: + DeviceCachingAllocator() + : large_blocks(false), small_blocks(true), alloc_trace(new std::vector()) + { + stats.max_split_size = static_cast(CachingAllocatorConfig::max_split_size()); + context_recorder_.store(nullptr); + } + + // Must be called outside of `mutex` or deadlocks are possible with Python + std::shared_ptr maybeGatherContext(RecordContext level) + { + if (record_context_ < level) { + return nullptr; + } + return context_recorder_.load()(); + } + + // All public methods (except the above) acquire the allocator mutex. + // Thus, do not call a public method from another public method. + + Block *malloc(int device, size_t orig_size, aclrtStream stream) { + // done outside the lock because we don't know what locks the recorder needs + // to have... + auto context = maybeGatherContext(RecordContext::STATE); + + std::unique_lock lock(mutex); + + if (device == -1) { + TORCH_CHECK(c10_npu::GetDevice(&device) == ACL_ERROR_NONE, "GetDevice failed."); + } + + // process outstanding npuEvents + process_events(context); + auto size = round_size(orig_size); + auto &pool = get_pool(size); + + const size_t alloc_size = get_allocation_size(size); + AllocParams params(device, size, stream, &pool, alloc_size, stats); + params.stat_types = get_stat_types_for_pool(pool); + + bool block_found = false; + while (!block_found) { + // First, try to get a block from the existing pool. + block_found = + // Search pool + get_free_block(params) || + // Trigger callbacks and retry search + (trigger_free_memory_callbacks(params) && get_free_block(params)); + // Can't reuse an existing block; try to get a new one. + if (!block_found) { + // Do garbage collection if the flag is set. + if (C10_UNLIKELY(set_fraction && CachingAllocatorConfig::garbage_collection_threshold() > 0.0)) { + garbage_collect_cached_blocks(context); + } + // Attempt allocate + block_found = alloc_block(params, false, context, lock) || + // Free enough available cached blocks to satisfy alloc and retry + // alloc. + (release_available_cached_blocks(params, context) && alloc_block(params, false, context, lock)); + } + if (!block_found) { + ASCEND_LOGE( + "Get a block from the existing pool failed. Try to free cached blocks and reallocate. This error log " + "can be ignored."); + // Free all non-split cached blocks and retry alloc. + block_found = (release_cached_blocks(true, context) && alloc_block(params, true, context, lock)); + } + if (!block_found) { + if (params.err == ACL_ERROR_NONE) { + break; + } + PyGILState_STATE state = PyGILState_Ensure(); + PyObject *pModule = PyImport_ImportModule("mindspeed.core.memory.common"); + if (!pModule) { + PyGILState_Release(state); + std::cout << "No MindSpeed Module" << std::endl; + break; + } + PyObject *pFunc = PyObject_GetAttrString(pModule, "swap_out_by_size"); + + PyObject *pArgs = PyTuple_New(1); + TORCH_CHECK(PyTuple_SetItem(pArgs, 0, PyLong_FromLong(size)) == 0, "PyTuple_SetItem failed."); + + PyObject *pResult = PyObject_CallObject(pFunc, pArgs); + bool ret = false; + TORCH_CHECK(PyArg_Parse(pResult, "p", &ret), "PyArg_Parse failed."); + PyGILState_Release(state); + if (!ret) { + std::cout << "SWAP Failed" << std::endl; + break; + } + params.err = ACL_ERROR_NONE; + } + } + if (!block_found) { + if (params.err == ACL_ERROR_RT_MEMORY_ALLOCATION) { + size_t device_free; + size_t device_total; + TORCH_CHECK(aclrtGetMemInfo(ACL_HBM_MEM, &device_free, &device_total) == ACL_ERROR_NONE, "aclrtGetMemInfo failed."); + TORCH_INTERNAL_ASSERT(device_free <= device_total); + + std::string allowed_info; + if (set_fraction) { + allowed_info = format_size(allowed_memory_maximum) + " allowed; "; + } + stats.num_ooms += 1; + // "total capacity": total global memory on NPU + // "allowed": memory is allowed to use, which set by fraction. + // "already allocated": memory allocated by the program using the + // caching allocator + // "free": free memory as reported by the NPU API + // "cached": memory held by the allocator but not used by the program + // + // The "allocated" amount does not include memory allocated outside + // of the caching allocator, such as memory allocated by other programs + // or memory held by the driver. + // + // The sum of "allocated" + "free" + "cached" may be less than the + // total capacity due to memory held by the driver and usage by other + // programs. + // + // Note that at this point free_cached_blocks has already returned all + // possible "cached" memory to the driver. The only remaining "cached" + // memory is split from a larger block that is partially in-use. + AT_ERROR("NPU out of memory. Tried to allocate ", format_size(alloc_size), " (NPU ", device, "; ", + format_size(device_total), " total capacity; ", + format_size(stats.allocated_bytes[static_cast(StatType::AGGREGATE)].current), + " already allocated; ", + format_size(stats.active_bytes[static_cast(StatType::AGGREGATE)].current), " current active; ", + format_size(device_free), " free; ", allowed_info, + format_size(stats.reserved_bytes[static_cast(StatType::AGGREGATE)].current), + " reserved in total by PyTorch)", + " If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation."); + } else { + params.err; + } + } + + bool split_remainder = should_split(params.block, params.size()); + return alloc_found_block(std::move(params), orig_size, std::move(context), split_remainder); + } + + Block *alloc_found_block(AllocParams params, size_t orig_size, std::shared_ptr context, + bool split_remainder) + { + auto size = params.size(); + auto device = params.device(); + auto pool = params.pool; + auto stream = params.stream(); + + TORCH_INTERNAL_ASSERT(params.err == ACL_ERROR_NONE && params.block != nullptr && params.block->ptr != nullptr); + Block *block = params.block; + Block *remaining = nullptr; + + const bool already_split = block->is_split(); + if (split_remainder) { + remaining = block; + + block = new Block(device, stream, size, pool, block->ptr); + block->expandable_segment_ = remaining->expandable_segment_; + block->prev = remaining->prev; + if (block->prev) { + block->prev->next = block; + } + block->next = remaining; + + remaining->prev = block; + remaining->ptr = static_cast(remaining->ptr) + size; + remaining->size -= size; + pool->blocks.insert(remaining); + + if (already_split && !block->expandable_segment_) { + // An already-split inactive block is being shrunk by size bytes. + update_stat_array(stats.inactive_split_bytes, -static_cast(block->size), params.stat_types); + } else if (!block->expandable_segment_) { + // A new split inactive block is being created from a previously unsplit + // block, size remaining->size bytes. + for_each_selected_stat_type(params.stat_types, [&](size_t stat_type) { + update_stat(stats.inactive_split_bytes[stat_type], static_cast(remaining->size)); + update_stat(stats.inactive_split[stat_type], 1); + }); + } + } else if (already_split && !block->expandable_segment_) { + // An already-split block is becoming active + for_each_selected_stat_type(params.stat_types, [&](size_t stat_type) { + update_stat(stats.inactive_split_bytes[stat_type], -static_cast(block->size)); + update_stat(stats.inactive_split[stat_type], -1); + }); + } + + block->allocated = true; + block->requested_size = orig_size; + + block->context_when_allocated = std::move(context); + record_trace(TraceEntry::ALLOC, int64_t(block->ptr), orig_size, block->stream, block->device, + block->context_when_allocated); + + active_blocks.insert(block); + + for_each_selected_stat_type(params.stat_types, [&](size_t stat_type) { + update_stat(stats.allocation[stat_type], 1); + update_stat(stats.allocated_bytes[stat_type], static_cast(block->size)); + update_stat(stats.active[stat_type], 1); + update_stat(stats.active_bytes[stat_type], static_cast(block->size)); + update_stat(stats.requested_bytes[stat_type], static_cast(block->requested_size)); + }); + + if (block->size >= CachingAllocatorConfig::max_split_size()) update_stat(stats.oversize_allocations, 1); + + ASCEND_LOGD("PTA CachingAllocator malloc: malloc = %zu, cached = %lu, allocated = %lu", block->size, + stats.reserved_bytes[static_cast(StatType::AGGREGATE)].current, + stats.allocated_bytes[static_cast(StatType::AGGREGATE)].current); + + return block; + } + + void free(Block *block) { + std::shared_ptr context = maybeGatherContext(RecordContext::ALL); + std::lock_guard lock(mutex); + + block->allocated = false; + + // following logic might modifying underlaying Block, causing the size + // changed. We store ahead for reporting + auto orig_block_ptr = block->ptr; + auto orig_block_size = block->size; + + StatTypes stat_types = get_stat_types_for_pool(*(block->pool)); + for_each_selected_stat_type(stat_types, [&](size_t stat_type) { + update_stat(stats.allocation[stat_type], -1); + update_stat(stats.allocated_bytes[stat_type], -block->size); + }); + + record_trace(TraceEntry::FREE_REQUESTED, int64_t(block->ptr), block->requested_size, block->stream, block->device, + context ? context : block->context_when_allocated); + + if (block->size >= CachingAllocatorConfig::max_split_size()) update_stat(stats.oversize_allocations, -1); + + if (!block->stream_uses.empty() && c10_npu::NpuSysCtrl::GetInstance().GetInitFlag()) { + insert_events(block); + } else { + free_block(block, context); + } + + ASCEND_LOGD("PTA CachingAllocator free: free = %zu, cached = %lu, allocated = %lu", orig_block_size, + stats.reserved_bytes[static_cast(StatType::AGGREGATE)].current, + stats.allocated_bytes[static_cast(StatType::AGGREGATE)].current); + } + + /** returns cached blocks to the system allocator **/ + void emptyCache(bool check_error) { + std::shared_ptr context = maybeGatherContext(RecordContext::ALL); + std::lock_guard lock(mutex); + TORCH_CHECK(release_cached_blocks(check_error, context), "release_cached_blocks failed."); + } + + /** Returns a copy of the memory allocator stats **/ + DeviceStats getStats() { + std::lock_guard lock(mutex); + return stats; + } + + /** Resets the historical accumulation stats for the device **/ + void resetAccumulatedStats() { + std::lock_guard lock(mutex); + + for (size_t statType = 0; statType < static_cast(StatType::NUM_TYPES); ++statType) { + reset_accumulated_stat(stats.allocation[statType]); + reset_accumulated_stat(stats.segment[statType]); + reset_accumulated_stat(stats.active[statType]); + reset_accumulated_stat(stats.inactive_split[statType]); + reset_accumulated_stat(stats.allocated_bytes[statType]); + reset_accumulated_stat(stats.reserved_bytes[statType]); + reset_accumulated_stat(stats.active_bytes[statType]); + reset_accumulated_stat(stats.inactive_split_bytes[statType]); + reset_accumulated_stat(stats.requested_bytes[statType]); + } + + stats.num_alloc_retries = 0; + stats.num_ooms = 0; + reset_accumulated_stat(stats.oversize_allocations); + reset_accumulated_stat(stats.oversize_segments); + } + + /** Resets the historical peak stats for the device **/ + void resetPeakStats() { + std::lock_guard lock(mutex); + + for (size_t statType = 0; statType < static_cast(StatType::NUM_TYPES); ++statType) { + reset_peak_stat(stats.allocation[statType]); + reset_peak_stat(stats.segment[statType]); + reset_peak_stat(stats.active[statType]); + reset_peak_stat(stats.inactive_split[statType]); + reset_peak_stat(stats.allocated_bytes[statType]); + reset_peak_stat(stats.reserved_bytes[statType]); + reset_peak_stat(stats.active_bytes[statType]); + reset_peak_stat(stats.inactive_split_bytes[statType]); + reset_peak_stat(stats.requested_bytes[statType]); + } + + reset_peak_stat(stats.oversize_allocations); + reset_peak_stat(stats.oversize_segments); + } + + std::vector trace() + { + std::lock_guard lock(mutex); + std::vector result; + result.reserve(alloc_trace->size()); + result.insert(result.end(), alloc_trace->begin() + alloc_trace_next, alloc_trace->end()); + result.insert(result.end(), alloc_trace->begin(), alloc_trace->begin() + alloc_trace_next); + + return result; + } + + static size_t round_size(size_t size) { + const size_t align_size = 32; + size = size + align_size; + if (size < kMinBlockSize) { + return kMinBlockSize; + } else { + return kMinBlockSize * ((size + kMinBlockSize - 1) / kMinBlockSize); + } + } + + private: + // All private methods do not acquire the allocator mutex. + + std::vector get_all_blocks() const { + std::vector blocks; + blocks.insert(blocks.end(), small_blocks.blocks.begin(), small_blocks.blocks.end()); + blocks.insert(blocks.end(), large_blocks.blocks.begin(), large_blocks.blocks.end()); + blocks.insert(blocks.end(), active_blocks.begin(), active_blocks.end()); + return blocks; + } + + // returns the smallest possible address in any segment + // where there is enough free address space to fit size + // may be composed of free and unmapped segments + Block *find_expandable_block(int device, aclrtStream stream, BlockPool *pool, size_t size) { + Block key(device, stream, 0); + + auto allocatable = [](Block *b) { return b && !b->allocated && b->event_count == 0 && b->stream_uses.empty(); }; + auto has_available_address_space = [&](Block *b) { + size_t bytes = 0; + while (bytes < size && allocatable(b)) { + bytes += b->size; + b = b->next; + } + return bytes >= size; + }; + for (auto it = pool->unmapped.lower_bound(&key); it != pool->unmapped.end() && (*it)->stream == stream; ++it) { + Block *c = *it; + // we found the lowest address of an unmapped segment + // but there might be a free segment we can also use + // right before it + if (allocatable(c->prev)) { + c = c->prev; + } + if (has_available_address_space(c)) { + return c; + } + } + auto segment_size = pool->is_small ? kSmallBuffer : kLargeBuffer; + expandable_segments_.emplace_back(new ExpandableSegment(device, stream, segment_size)); + + ExpandableSegment *es = expandable_segments_.back(); + Block *candidate = new Block(device, stream, es->size(), pool, es->ptr()); + candidate->mapped = false; + candidate->expandable_segment_ = es; + pool->unmapped.insert(candidate); + return candidate; + } + + bool map_block(Block *to_map, size_t size, const std::shared_ptr &ctx) + { + TORCH_INTERNAL_ASSERT(!to_map->mapped && size <= to_map->size); + auto mapped_range = to_map->expandable_segment_->map(SegmentRange{to_map->ptr, size}); + // failed to map the memory + if (mapped_range.size == 0) { + return false; + } + TORCH_INTERNAL_ASSERT(mapped_range.ptr == to_map->ptr && mapped_range.size >= size); + + BlockPool &pool = *to_map->pool; + pool.unmapped.erase(to_map); + to_map->mapped = true; + + if (mapped_range.size < to_map->size) { + // to_map -> remaining -> to_map->next(?) + Block *remaining = new Block(to_map->device, to_map->stream, to_map->size - mapped_range.size, &pool, + static_cast(to_map->ptr) + mapped_range.size); + remaining->mapped = false; + remaining->expandable_segment_ = to_map->expandable_segment_; + remaining->splice(to_map, to_map->next); + pool.unmapped.insert(remaining); + to_map->size = mapped_range.size; + } + + TORCH_CHECK(try_merge_blocks(to_map, to_map->prev, pool) >= 0, "try_merge_blocks failed."); + TORCH_CHECK(try_merge_blocks(to_map, to_map->next, pool) >= 0, "try_merge_blocks failed."); + + pool.blocks.insert(to_map); + + // update statistics + total_allocated_memory += mapped_range.size; + StatTypes stat_types = get_stat_types_for_pool(*to_map->pool); + for_each_selected_stat_type( + stat_types, [&](size_t stat_type) { update_stat(stats.reserved_bytes[stat_type], mapped_range.size); }); + + record_trace(TraceEntry::SEGMENT_MAP, int64_t(mapped_range.ptr), mapped_range.size, to_map->stream, to_map->device, + ctx); + if (!to_map->prev && !to_map->context_when_segment_allocated) { + to_map->context_when_segment_allocated = ctx; + } + + return true; + } + + Block *try_allocate_expandable_block(int device, aclrtStream stream, BlockPool *pool, size_t size, + const std::shared_ptr &ctx) + { + Block *candidate = find_expandable_block(device, stream, pool, size); + // Candidate is now a list free/unmapped blocks with at least size room: + // unmapped -> null + // unmapped -> free -> * + // free -> unmapped -> * + + if (!candidate->mapped && !map_block(candidate, std::min(candidate->size, size), ctx)) { + return nullptr; + } + TORCH_INTERNAL_ASSERT(candidate->mapped); + + while (candidate->size < size) { + // invariant: free -> unmapped -> * + // map_block will map some of unmapped and merge with free + auto remaining = size - candidate->size; + auto new_candidate = candidate->next; + if (!map_block(new_candidate, std::min(remaining, candidate->next->size), ctx)) { + return nullptr; + } + candidate = new_candidate; + } + pool->blocks.erase(candidate); + return candidate; + } + + /** moves a block into a pool of cached free blocks **/ + void free_block(Block *block, const std::shared_ptr &context) + { + AT_ASSERT(!block->allocated && block->event_count == 0); + + record_trace(TraceEntry::FREE_COMPLETED, int64_t(block->ptr), block->requested_size, block->stream, block->device, + context ? context : block->context_when_allocated); + + block->context_when_allocated = nullptr; + size_t original_block_size = block->size; + size_t requested_size = block->requested_size; + + auto &pool = *block->pool; + int64_t net_change_inactive_split_blocks = 0; + int64_t net_change_inactive_split_size = 0; + + const std::array merge_candidates = {block->prev, block->next}; + for (Block *merge_candidate : merge_candidates) { + const int64_t subsumed_size = static_cast(try_merge_blocks(block, merge_candidate, pool)); + if (subsumed_size > 0) { + net_change_inactive_split_blocks -= 1; + net_change_inactive_split_size -= subsumed_size; + } + } + + active_blocks.erase(block); + pool.blocks.insert(block); + + if (block->is_split()) { + net_change_inactive_split_blocks += 1; + net_change_inactive_split_size += static_cast(block->size); + } + + StatTypes stat_types = get_stat_types_for_pool(pool); + for_each_selected_stat_type(stat_types, [&](size_t stat_type) { + // inactive_split tries to capture the idea that blocks + // cannot be freed when requested, but fully free pages + // of expandable blocks can always be freed. + // The logic to track this as statistic is pretty involved, + // so we simply just exclude expandable segements from + // inactive_split + if (!block->expandable_segment_) { + update_stat(stats.inactive_split[stat_type], net_change_inactive_split_blocks); + update_stat(stats.inactive_split_bytes[stat_type], net_change_inactive_split_size); + } + update_stat(stats.active[stat_type], -1); + update_stat(stats.active_bytes[stat_type], -original_block_size); + update_stat(stats.requested_bytes[stat_type], -static_cast(requested_size)); + }); + } + + /** combine previously split blocks. returns the size of the subsumed block, or 0 on failure. **/ + size_t try_merge_blocks(Block *dst, Block *src, BlockPool &pool) { + if (!src || src->allocated || src->event_count > 0 || !src->stream_uses.empty() || dst->mapped != src->mapped) { + return 0; + } + + AT_ASSERT(dst->is_split() && src->is_split()); + + if (dst->prev == src) { + dst->ptr = src->ptr; + dst->prev = src->prev; + if (dst->prev) { + dst->prev->next = dst; + } + } else { + dst->next = src->next; + if (dst->next) { + dst->next->prev = dst; + } + } + + const size_t subsumed_size = src->size; + dst->size += subsumed_size; + auto erased = src->mapped ? pool.blocks.erase(src) : pool.unmapped.erase(src); + delete src; + src = nullptr; + + return subsumed_size; + } + + BlockPool &get_pool(size_t size) { + if (size <= kSmallSize) { + return small_blocks; + } else { + return large_blocks; + } + } + + StatTypes get_stat_types_for_pool(const BlockPool &pool) { + StatTypes stat_types = {false}; + stat_types[static_cast(StatType::AGGREGATE)] = true; + stat_types[static_cast(pool.is_small ? StatType::SMALL_POOL : StatType::LARGE_POOL)] = true; + return stat_types; + } + + bool should_split(const Block *block, size_t size) { + TORCH_INTERNAL_ASSERT(block->size >= size); + size_t remaining = block->size - size; + if (block->pool->is_small || CachingAllocatorConfig::expandable_segments()) { + return remaining >= kMinBlockSize; + } else { + return (size < CachingAllocatorConfig::max_split_size()) && (remaining > kSmallSize); + } + } + + static size_t get_allocation_size(size_t size) { + if (size <= kSmallSize) { + return kSmallBuffer; + } else if (size < kMinLargeAlloc) { + return kLargeBuffer; + } else { + return kRoundLarge * ((size + kRoundLarge - 1) / kRoundLarge); + } + } + + bool get_free_block(AllocParams &p) { + BlockPool &pool = *p.pool; + + if (C10_UNLIKELY(set_fraction && CachingAllocatorConfig::garbage_collection_threshold() > 0.0)) { + // Track block reuse interval only when garbage collection is enabled. + for (auto &b : pool.blocks) { + ++b->gc_count; + } + } + auto it = pool.blocks.lower_bound(&p.search_key); + if (it == pool.blocks.end() || (*it)->stream != p.stream()) { + return false; + } + + if ((*it)->expandable_segment_) { + if (CachingAllocatorConfig::expandable_segments()) { + // if we are allocated to the part of the block that is expandable + // for the purposes of "best fit" we consider its size to be the size it + // can expand to, not the size it currently is. This means that we + // sometimes have to search for blocks with bigger 'size' before + // choosing this segment. + auto expandable_size = [](Block *b) { return b->size + (b->next && !b->next->mapped ? b->next->size : 0); }; + auto next = it; + next++; + while ((*it)->expandable_segment_ && next != pool.blocks.end() && (*next)->stream == p.stream() && + expandable_size(*next) < expandable_size(*it)) { + it = next++; + } + } else { + // Rarely expandable segments has been turned off after we have + // already allocated some blocks as expandable. For instance, + // since we cannot share expandable memory via IPC, someone might + // temporarily disable it. In this case we need to honor this request + // by only finding non-expandable blocks + do { + it++; + } while (it != pool.blocks.end() && (*it)->expandable_segment_ && (*it)->stream == p.stream()); + if (it == pool.blocks.end() || (*it)->stream != p.stream()) { + return false; + } + } + } + + // Do not return an oversized block for a large request + if ((p.size() < CachingAllocatorConfig::max_split_size()) && + ((*it)->size >= CachingAllocatorConfig::max_split_size())) { + return false; + } + // Allow oversized block size to be rounded up but within a limit + if ((p.size() >= CachingAllocatorConfig::max_split_size()) && ((*it)->size >= p.size() + kLargeBuffer)) { + return false; + } + p.block = *it; + (*it)->gc_count = 0; // Denote this block has been used + pool.blocks.erase(it); + return true; + } + + bool trigger_free_memory_callbacks(AllocParams &p) { + bool freed_memory = false; + return freed_memory; + } + + void garbage_collect_cached_blocks(const std::shared_ptr &ctx) + { + // Free unused cached blocks to reclaim NPU memory. + // Unlike release_cached_blocks(), this does not enforce synchronization and + // therefore should be of less overheads. + + size_t gc_threshold = + static_cast(CachingAllocatorConfig::garbage_collection_threshold() * allowed_memory_maximum); + // No need to trigger GC yet + if (total_allocated_memory <= gc_threshold) { + return; + } + const auto target_size = total_allocated_memory - gc_threshold; + size_t gc_reclaimed = 0; + + // Calculate the total age of the free-able blocks. We'll use it later to + // get "avg age" threshold. + double total_age = 0.0; + int freeable_block_count = 0; + for (auto &b : large_blocks.blocks) { + if (!b->is_split()) { + total_age += b->gc_count; + ++freeable_block_count; + } + } + // No free-able blocks? + if (freeable_block_count == 0) { + return; + } + + TORCH_CHECK(c10_npu::npuSynchronizeDevice(true), "npuSynchronizeDevice failed."); + + // Repeat GC until we reach reclaim > target size. + bool block_freed = true; + while (gc_reclaimed < target_size && block_freed == true && freeable_block_count > 0) { + // Free blocks exceeding this age threshold first. + double age_threshold = total_age / freeable_block_count; + // Stop iteration if we can no longer free a block. + block_freed = false; + + // Free blocks of > avg age. Don't stop upon reaching the target_size, + // we don't want this GC to be triggered frequently. + auto it = large_blocks.blocks.begin(); + while (it != large_blocks.blocks.end()) { + Block *block = *it; + ++it; + if (!block->is_split() && block->gc_count >= age_threshold) { + block_freed = true; + gc_reclaimed += block->size; + total_age -= block->gc_count; // Decrement the age + freeable_block_count--; // One less block that can be freed + release_block(block, ctx); + + ASCEND_LOGD("PTA CachingAllocator gc: free = %zu, cached = %lu, allocated = %lu", block->size, + stats.reserved_bytes[static_cast(StatType::AGGREGATE)].current, + stats.allocated_bytes[static_cast(StatType::AGGREGATE)].current); + } + } + } + } + + bool alloc_block(AllocParams &p, bool isRetry, const std::shared_ptr &ctx, + std::unique_lock &lock) + { + size_t size = p.alloc_size; + void *ptr = nullptr; + + if (isRetry) { + stats.num_alloc_retries += 1; + } + + if (set_fraction && total_allocated_memory + size > allowed_memory_maximum) { + p.err = ACL_ERROR_RT_MEMORY_ALLOCATION; + } else if (CachingAllocatorConfig::expandable_segments()) { + p.block = try_allocate_expandable_block(p.device(), p.stream(), p.pool, p.size(), ctx); + if (p.block) { + p.err = ACL_ERROR_NONE; + } else { + p.err = ACL_ERROR_RT_MEMORY_ALLOCATION; + } + return bool(p.block); + } else { + p.err = aclrtMallocAlign32(&ptr, size, aclrtMemMallocPolicy::ACL_MEM_MALLOC_HUGE_FIRST); + } + + if (p.err != ACL_ERROR_NONE) { + p.err = ACL_ERROR_RT_MEMORY_ALLOCATION; + return false; + } + + total_allocated_memory += size; + p.block = new Block(p.device(), p.stream(), size, p.pool, (char *)ptr); + for_each_selected_stat_type(p.stat_types, [&](size_t stat_type) { + update_stat(stats.segment[stat_type], 1); + update_stat(stats.reserved_bytes[stat_type], size); + }); + if (size >= CachingAllocatorConfig::max_split_size()) update_stat(stats.oversize_segments, 1); + ASCEND_LOGD("pta_memory acl_malloc: malloc = %zu, ret = %d", size, p.err); + + // p.block came from new, not cudaMalloc. It should not be nullptr here. + TORCH_INTERNAL_ASSERT(p.block != nullptr && p.block->ptr != nullptr); + record_trace(TraceEntry::SEGMENT_ALLOC, int64_t(p.block->ptr), p.block->size, p.stream(), p.device(), ctx); + p.block->context_when_segment_allocated = ctx; + return true; + } + + /** Free one or more oversize blocks to the system allocator. But only enough to satisfy the target size **/ + bool release_available_cached_blocks(const AllocParams &p, const std::shared_ptr &ctx) + { + if (CachingAllocatorConfig::max_split_size() == std::numeric_limits::max()) { + return false; + } + BlockPool &pool = *p.pool; + Block key = p.search_key; + key.size = + (key.size < CachingAllocatorConfig::max_split_size()) ? CachingAllocatorConfig::max_split_size() : key.size; + auto it = pool.blocks.lower_bound(&key); + + TORCH_CHECK(c10_npu::npuSynchronizeDevice(true), "npuSynchronizeDevice failed."); + + if (it == pool.blocks.end() || (*it)->stream != p.stream()) { + // No single block is large enough; free multiple oversize blocks, starting with the largest + if (it == pool.blocks.begin()) { + return false; + } + size_t totalReleased = 0; + // Back up one item. Now on the largest block for the correct stream + --it; + while ((totalReleased < key.size) && ((*it)->size >= CachingAllocatorConfig::max_split_size()) && + ((*it)->stream == p.stream())) { + auto cur = it; + totalReleased += (*it)->size; + if (it != pool.blocks.begin()) { + --it; + release_block(*cur, ctx); + } else { + release_block(*cur, ctx); + break; + } + } + if (totalReleased < key.size) { + return false; + } + } else { + release_block(*it, ctx); + } + return true; + } + + bool release_cached_blocks(bool check_error, const std::shared_ptr &context) + { + // Make sure event deque from taskqueue, then synchronize Event + TORCH_CHECK(c10_npu::npuSynchronizeDevice(check_error), "npuSynchronizeDevice failed."); + + // First ensure that all blocks that can't currently be allocated due to + // outstanding events are returned to the pool. + synchronize_and_free_events(check_error, context); + + // Free all non-split cached blocks + release_blocks(large_blocks, context); + release_blocks(small_blocks, context); + + return true; + } + + void release_expandable_segment(Block *block) { + TORCH_INTERNAL_ASSERT(block->size == block->expandable_segment_->size(), "block disagrees with segment"); + TORCH_INTERNAL_ASSERT(!block->mapped); + auto it = std::find(expandable_segments_.begin(), expandable_segments_.end(), block->expandable_segment_); + TORCH_INTERNAL_ASSERT(it != expandable_segments_.end()); + expandable_segments_.erase(it); + block->pool->unmapped.erase(block); + delete block->expandable_segment_; + block->expandable_segment_ = nullptr; + delete block; + block = nullptr; + } + + void release_block(Block *block, const std::shared_ptr &context) + { + TORCH_INTERNAL_ASSERT(!block->expandable_segment_); + record_trace(TraceEntry::SEGMENT_FREE, int64_t(block->ptr), block->size, block->stream, block->device, + context ? context : block->context_when_segment_allocated); + TORCH_CHECK(aclrtFree((void *)block->ptr) == ACL_ERROR_NONE, "aclrtFree failed."); + total_allocated_memory -= block->size; + + auto *pool = block->pool; + + StatTypes stat_types = get_stat_types_for_pool(*pool); + for_each_selected_stat_type(stat_types, [&](size_t stat_type) { + update_stat(stats.segment[stat_type], -1); + update_stat(stats.reserved_bytes[stat_type], -block->size); + }); + + if (block->size >= CachingAllocatorConfig::max_split_size()) update_stat(stats.oversize_segments, -1); + + ASCEND_LOGD("pta_memory acl_free: free_size = %zu", block->size); + + pool->blocks.erase(block); + delete block; + block = nullptr; + } + + void unmap_block(Block *block, const std::shared_ptr &context) + { + auto unmapped = block->expandable_segment_->unmap(SegmentRange{block->ptr, block->size}); + if (unmapped.size == 0) { + return; + } + block->pool->blocks.erase(block); + + ptrdiff_t before_size = static_cast(unmapped.ptr) - static_cast(block->ptr); + if (before_size > 0) { + // prev? -> before_free -> block + Block *before_free = new Block(block->device, block->stream, before_size, block->pool, block->ptr); + before_free->expandable_segment_ = block->expandable_segment_; + before_free->splice(block->prev, block); + block->pool->blocks.insert(before_free); + } + + TORCH_CHECK(block->size >= before_size + unmapped.size, "after size should be greater than or equal to 0"); + auto after_size = block->size - (before_size + unmapped.size); + if (after_size > 0) { + // block -> after_free -> next? + Block *after_free = new Block(block->device, block->stream, after_size, block->pool, + static_cast(unmapped.ptr) + unmapped.size); + after_free->expandable_segment_ = block->expandable_segment_; + after_free->splice(block, block->next); + block->pool->blocks.insert(after_free); + } + + block->ptr = unmapped.ptr; + block->size = unmapped.size; + block->mapped = false; + + TORCH_CHECK(try_merge_blocks(block, block->prev, *block->pool) >= 0, "try_merge_blocks failed."); + TORCH_CHECK(try_merge_blocks(block, block->next, *block->pool) >= 0, "try_merge_blocks failed."); + block->pool->unmapped.insert(block); + + // update statistics + total_allocated_memory -= unmapped.size; + StatTypes stat_types = get_stat_types_for_pool(*block->pool); + for_each_selected_stat_type( + stat_types, [&](size_t stat_type) { update_stat(stats.reserved_bytes[stat_type], -unmapped.size); }); + record_trace(TraceEntry::SEGMENT_UNMAP, int64_t(unmapped.ptr), unmapped.size, block->stream, block->device, + context ? context : block->context_when_segment_allocated); + } + + void release_blocks(BlockPool &pool, const std::shared_ptr &context) + { + std::vector to_unmap; + // Frees all non-split blocks + auto it = pool.blocks.begin(); + while (it != pool.blocks.end()) { + Block *block = *it; + ++it; + if (block->expandable_segment_) { + // unmapping will mutate the free pool + // so just gather what needs to be freed + // to avoid invalidating the iterator + to_unmap.push_back(block); + } else if (!block->prev && !block->next) { + release_block(block, context); + } + } + for (Block *block : to_unmap) { + unmap_block(block, context); + if (!block->prev && !block->next) { + release_expandable_segment(block); + } + } + } + + EventPool::Event create_event_internal(int idx) { + // Leak the event pool to avoid shutdown issues. + static auto *event_pool = new EventPool(); + return event_pool->get(idx); + } + + void synchronize_and_free_events(bool check_error, const std::shared_ptr &context) + { + // Synchronize on outstanding events and then free associated blocks. + for (auto &st : npu_events) { + for (auto &e : st.second) { + EventPool::Event event = std::move(e.first); + Block *block = e.second; + + if (check_error) { + TORCH_CHECK(aclrtSynchronizeEvent(*event) == ACL_ERROR_NONE, "aclrtSynchronizeEvent failed."); + } else { + TORCH_CHECK(aclrtSynchronizeEvent(*event) == ACL_ERROR_NONE, "aclrtSynchronizeEvent failed"); + } + ASCEND_LOGI("Event: aclrtSynchronizeEvent is successfully executed"); + + block->event_count--; + if (block->event_count == 0) { + free_block(block, context); + } + } + } + + npu_events.clear(); + } + + void insert_events(Block *block) { + aclrtContext compiler_ctx = aclrtContext(); + aclError ret_ctx = aclrtGetCurrentContext(&compiler_ctx); + + stream_set streams(std::move(block->stream_uses)); + AT_ASSERT(block->stream_uses.empty()); + for (auto &stream : streams) { + TORCH_CHECK(c10_npu::SetDevice(stream.device_index()) == ACL_ERROR_NONE, "SetDevice failed."); + + EventPool::Event event = create_event_internal(stream.device_index()); + event->record(stream); + ASCEND_LOGI("Event: record DeviceAllocator is successfully executed"); + + block->event_count++; + npu_events[stream].emplace_back(std::move(event), block); + } + if (ret_ctx == ACL_ERROR_NONE) { + TORCH_CHECK(aclrtSetCurrentContext(compiler_ctx) == ACL_ERROR_NONE, "aclrtSetCurrentContext failed."); + } + } + + void process_events(const std::shared_ptr &context) + { + // Process outstanding npuEvents. Events that are completed are removed + // from the queue, and the 'event_count' for the corresponding allocation + // is decremented. Stops at the first event which has not been completed. + // Since events on different devices or streams may occur out of order, + // the processing of some events may be delayed. + for (auto it = npu_events.begin(); it != npu_events.end();) { + while (!it->second.empty()) { + auto &e = it->second.front(); + EventPool::Event event = std::move(e.first); + Block *block = e.second; + + if (!event->query()) { + e.first = std::move(event); + break; + } + + block->event_count--; + if (block->event_count == 0) { + free_block(block, context); + } + it->second.pop_front(); + } + + if (it->second.empty()) { + it = npu_events.erase(it); + } else { + it++; + } + } + } + + void record_trace(TraceEntry::Action action, int64_t addr, size_t size, aclrtStream stream, int device, + std::shared_ptr context) + { + if (!record_history) { + return; + } + + auto te = TraceEntry(action, device, addr, size, stream, + record_context_ >= RecordContext::ALLOC ? std::move(context) : nullptr); + + if (record_history) { + if (alloc_trace->size() < alloc_trace_max_entries_) { + alloc_trace->emplace_back(te); + } else { + (*alloc_trace)[alloc_trace_next++] = te; + if (alloc_trace_next == alloc_trace_max_entries_) { + alloc_trace_next = 0; + } + } + } + } +}; + +void local_raw_delete(void *ptr); + +class NpuCachingCustomAllocator { + private: + std::mutex mutex; + + // allocated blocks by device pointer + ska::flat_hash_map allocated_blocks; + + void add_allocated_block(Block *block) { + std::lock_guard lock(mutex); + allocated_blocks[block->ptr] = block; + } + + public: + std::vector> device_allocator; + + std::mutex *getFreeMutex() const; + Block *get_allocated_block(void *ptr, bool remove = false); + + void setMemoryFraction(double fraction, int device); + void init(int device_count); + bool initialized(); + void emptyCache(bool check_error); + DeviceStats getDeviceStats(int device); + void resetPeakStats(int device); + std::string name(); + void *malloc(int device, size_t size, aclrtStream stream); + void free(void *ptr); + void assertValidDevice(int device); +}; + +extern NpuCachingCustomAllocator my_allocator; + +extern "C" { +void *my_malloc(size_t size, int device, aclrtStream stream) { + void *ptr = nullptr; + if (size == 0) { + return ptr; + } + ptr = my_allocator.malloc(device, size, stream); + return ptr; +} + +void my_free(void *ptr, size_t size, int device, aclrtStream stream) { my_allocator.free(ptr); } + +void my_init(int device_count) { my_allocator.init(device_count); } + +void my_empty_cache(bool check_error) { my_allocator.emptyCache(true); } + +DeviceStats my_get_device_stats(int device) { return my_allocator.getDeviceStats(device); } + +void my_reset_peak_stats(int device) { return my_allocator.resetPeakStats(device); } +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/CachingAllocatorConfig.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/CachingAllocatorConfig.cpp new file mode 100644 index 000000000..1e1e4a775 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/CachingAllocatorConfig.cpp @@ -0,0 +1,183 @@ +#include "CachingAllocatorConfig.h" + +size_t CachingAllocatorConfig::max_split_size() { return instance().m_max_split_size; } + +double CachingAllocatorConfig::garbage_collection_threshold() { return instance().m_garbage_collection_threshold; } + +bool CachingAllocatorConfig::expandable_segments() { return instance().m_expandable_segments; } + +double CachingAllocatorConfig::default_lc_threshold() { return instance().m_default_lc_threshold; } + +bool CachingAllocatorConfig::open_memory_optimize() { return instance().m_open_memory_optimize; } + +CachingAllocatorConfig::CachingAllocatorConfig() + : m_max_split_size(std::numeric_limits::max()), + m_garbage_collection_threshold(0), + m_expandable_segments(false), + m_default_lc_threshold(0), + m_open_memory_optimize(false) {} + +CachingAllocatorConfig& CachingAllocatorConfig::instance() { + static CachingAllocatorConfig *s_instance = ([]() { + auto inst = new CachingAllocatorConfig(); + const char* env = getenv("PYTORCH_NPU_ALLOC_CONF"); + inst->parseArgs(env); + return inst; + })(); + return *s_instance; +} + +void CachingAllocatorConfig::lexArgs(const char* env, std::vector& config) { + std::vector buf; + + size_t env_length = strlen(env); + for (size_t i = 0; i < env_length; i++) { + if (env[i] == ',' || env[i] == ':' || env[i] == '[' || env[i] == ']') { + if (!buf.empty()) { + config.emplace_back(buf.begin(), buf.end()); + buf.clear(); + } + config.emplace_back(1, env[i]); + } else if (env[i] != ' ') { + buf.emplace_back(static_cast(env[i])); + } + } + if (!buf.empty()) { + config.emplace_back(buf.begin(), buf.end()); + } +} + +void CachingAllocatorConfig::consumeToken(const std::vector& config, size_t i, const char c) { + TORCH_CHECK(i < config.size() && config[i].compare(std::string(1, c)) == 0, + "Error parsing CachingAllocator settings, expected ", c); +} + +size_t CachingAllocatorConfig::parseMaxSplitSize(const std::vector& config, size_t i) { + consumeToken(config, ++i, ':'); + if (++i < config.size()) { + size_t val1 = 0; + try{ + val1 = static_cast(stoi(config[i])); + } catch (const std::invalid_argument& e){ + TORCH_CHECK(false, "Error, expecting digital string in config"); + } catch (const std::out_of_range& e){ + TORCH_CHECK(false, "Error, out of int range"); + } + TORCH_CHECK(val1 > kLargeBuffer / kUnitMB, "CachingAllocator option max_split_size_mb too small, must be > ", + kLargeBuffer / kUnitMB); + val1 = std::max(val1, kLargeBuffer / kUnitMB); + val1 = std::min(val1, (std::numeric_limits::max() / kUnitMB)); + m_max_split_size = val1 * kUnitMB; + } else { + TORCH_CHECK(false, "Error, expecting max_split_size_mb value"); + } + return i; +} + +size_t CachingAllocatorConfig::parseGarbageCollectionThreshold(const std::vector& config, size_t i) { + consumeToken(config, ++i, ':'); + if (++i < config.size()) { + double val1 = 0.0; + try { + val1 = stod(config[i]); + } catch (const std::invalid_argument& e){ + TORCH_CHECK(false, "Error, expecting digital string in config"); + } catch (const std::out_of_range& e) { + TORCH_CHECK(false, "Error, out of double range"); + } + TORCH_CHECK(val1 > 0, "garbage_collect_threshold too small, set it 0.0~1.0"); + TORCH_CHECK(val1 < 1.0, "garbage_collect_threshold too big, set it 0.0~1.0"); + m_garbage_collection_threshold = val1; + } else { + TORCH_CHECK(false, "Error, expecting garbage_collection_threshold value"); + } + return i; +} + +size_t CachingAllocatorConfig::parseExpandableSegments(const std::vector& config, size_t i) { + consumeToken(config, ++i, ':'); + if (++i < config.size()) { + TORCH_CHECK(i < config.size() && (config[i] == "True" || config[i] == "False"), + "Expected a single True/False argument for expandable_segments"); + m_expandable_segments = (config[i] == "True"); + void* ptr = nullptr; + constexpr size_t virtual_mem_size = 512; + TORCH_CHECK(aclrtReserveMemAddress(&ptr, virtual_mem_size, 0, NULL, 1) == ACL_ERROR_NONE, \ + "Error, failed to reserve memory address"); + TORCH_CHECK(aclrtReleaseMemAddress(ptr) == ACL_ERROR_NONE, \ + "Error, failed to release memory address"); + } else { + TORCH_CHECK(false, "Error, expecting expandable_segments value"); + } + return i; +} + +size_t CachingAllocatorConfig::parseDefaultLcThreshold(const std::vector &config, size_t i) { + consumeToken(config, ++i, ':'); + if (++i < config.size()) { + double val1 = 0.0; + try { + val1 = stod(config[i]); + } catch (const std::invalid_argument& e){ + TORCH_CHECK(false, "Error, expecting digital string in config"); + } catch (const std::out_of_range& e) { + TORCH_CHECK(false, "Error, out of double range"); + } + TORCH_CHECK(val1 >= 0, "default_lc_threshold too small, set it 0.0~INF"); + m_default_lc_threshold = val1; + } else { + TORCH_CHECK(false, "Error, expecting default_lc_threshold value"); + } + return i; +} + +size_t CachingAllocatorConfig::parseOpenMemoryOptimize(const std::vector &config, size_t i) { + consumeToken(config, ++i, ':'); + if (++i < config.size()) { + if (config[i] == "true" || config[i] == "1") { + m_open_memory_optimize = true; + } else if (config[i] == "false" || config[i] == "0") { + m_open_memory_optimize = false; + } else { + TORCH_CHECK(false, "Error, open_memory_optimize should be true or false or 1 or 0"); + } + } else { + TORCH_CHECK(false, "Error, expecting open_memory_optimize value"); + } + return i; +} + +void CachingAllocatorConfig::parseArgs(const char* env) { + // If empty, set the default values + m_max_split_size = std::numeric_limits::max(); + m_garbage_collection_threshold = 0; + m_default_lc_threshold = 0; + m_open_memory_optimize = false; + + if (env == nullptr) { + return; + } + + std::vector config; + lexArgs(env, config); + + for (size_t i = 0; i < config.size(); i++) { + if (config[i].compare("max_split_size_mb") == 0) { + i = parseMaxSplitSize(config, i); + } else if (config[i].compare("garbage_collection_threshold") == 0) { + i = parseGarbageCollectionThreshold(config, i); + } else if (config[i] == "expandable_segments") { + i = parseExpandableSegments(config, i); + } else if (config[i].compare("default_lc_threshold") == 0) { + i = parseDefaultLcThreshold(config, i); + } else if (config[i].compare("open_memory_optimize") == 0) { + i = parseOpenMemoryOptimize(config, i); + } else { + TORCH_CHECK(false, "Unrecognized CachingAllocator option: ", config[i]); + } + + if (i + 1 < config.size()) { + consumeToken(config, ++i, ','); + } + } +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/CachingAllocatorConfig.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/CachingAllocatorConfig.h new file mode 100644 index 000000000..08b1f7bbb --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/CachingAllocatorConfig.h @@ -0,0 +1,41 @@ +#ifndef PLUGGABLEALLOCATOR_CACHINGALLOCATORCONFIG_H +#define PLUGGABLEALLOCATOR_CACHINGALLOCATORCONFIG_H + +#include "common.h" +#include "Recorder.h" + +class CachingAllocatorConfig { +public: + static size_t max_split_size(); + + static double garbage_collection_threshold(); + + static bool expandable_segments(); + + static double default_lc_threshold(); + + static bool open_memory_optimize(); + + static CachingAllocatorConfig &instance(); + + void parseArgs(const char* env); + +private: + size_t m_max_split_size; + double m_garbage_collection_threshold; + bool m_expandable_segments; + double m_default_lc_threshold; + bool m_open_memory_optimize; + + CachingAllocatorConfig(); + + void lexArgs(const char* env, std::vector& config); + void consumeToken(const std::vector& config, size_t i, const char c); + size_t parseMaxSplitSize(const std::vector& config, size_t i); + size_t parseGarbageCollectionThreshold(const std::vector& config, size_t i); + size_t parseExpandableSegments(const std::vector& config, size_t i); + size_t parseDefaultLcThreshold(const std::vector& config, size_t i); + size_t parseOpenMemoryOptimize(const std::vector& config, size_t i); +}; + +#endif diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Decorator.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Decorator.cpp new file mode 100644 index 000000000..697623749 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Decorator.cpp @@ -0,0 +1,25 @@ +#include "Decorator.h" + +void Decorator::memory_recorder_start() { + MemoryRecorder::start_record(); +} + +void Decorator::memory_recorder_end() { + MemoryRecorder::end_record(); +} + +void Decorator::malloc_recorder_start() { + MallocRecorder::start_record(); +} + +void Decorator::malloc_recorder_end() { + MallocRecorder::end_record(); +} + +void Decorator::precise_match_start() { + is_precise_match = true; +} + +void Decorator::precise_match_end() { + is_precise_match = false; +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Decorator.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Decorator.h new file mode 100644 index 000000000..f936adb58 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Decorator.h @@ -0,0 +1,16 @@ +#ifndef PLUGGABLEALLOCATOR_DECORATOR_H +#define PLUGGABLEALLOCATOR_DECORATOR_H + +#include "Recorder.h" + +class Decorator { +public: + static void memory_recorder_start(); + static void memory_recorder_end(); + static void malloc_recorder_start(); + static void malloc_recorder_end(); + static void precise_match_start(); + static void precise_match_end(); +}; + +#endif diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/DeviceCachingAllocator.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/DeviceCachingAllocator.cpp new file mode 100644 index 000000000..25614da15 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/DeviceCachingAllocator.cpp @@ -0,0 +1,1444 @@ +#include "DeviceCachingAllocator.h" +#include + +DeviceCachingAllocator::DeviceCachingAllocator() + : long_lc_pools(BLOCK_POOL_LONG), default_lc_pools(BLOCK_POOL_DEFAULT) { + stats.max_split_size = static_cast(CachingAllocatorConfig::max_split_size()); +} + +void DeviceCachingAllocator::print_memory_analysis() { + std::vector seg_array = snapshot(); + std::map> memory_cnt; + for (SegmentInfo &seg : seg_array) { + std::string seg_type = seg.is_large ? "large" : "small"; +#ifdef MEMORY_RECORDER_DEBUG + seg_type += seg.type_str; +#endif + printf("SEG info: dev %ld, size %lu, allocated %lu, type %s\n", seg.device, seg.total_size, seg.allocated_size, + seg_type.c_str()); + std::vector active, inactive, allocated, notallocated; + for (BlockInfo& blk : seg.blocks) { + if (blk.active) { + active.push_back(blk.size); + } else { + inactive.push_back(blk.size); + } + if (blk.allocated) { + allocated.push_back(blk.size); + } else { + notallocated.push_back(blk.size); + } + } + size_t active_cnt = std::accumulate(active.begin(), active.end(), (size_t)0); + size_t inactive_cnt = std::accumulate(inactive.begin(), inactive.end(), (size_t)0); + size_t allocated_cnt = std::accumulate(allocated.begin(), allocated.end(), (size_t)0); + size_t notallocated_cnt = std::accumulate(notallocated.begin(), notallocated.end(), (size_t)0); + + auto& active_info = memory_cnt[seg_type + "-active"]; + auto& inactive_info = memory_cnt[seg_type + "-inactive"]; + auto& allocated_info = memory_cnt[seg_type + "-allocated"]; + auto& notallocated_info = memory_cnt[seg_type + "-notallocated"]; + + active_info.first += active.size(); + active_info.second += active_cnt; + inactive_info.first += inactive.size(); + inactive_info.second += inactive_cnt; + + allocated_info.first += allocated.size(); + allocated_info.second += allocated_cnt; + notallocated_info.first += notallocated.size(); + notallocated_info.second += notallocated_cnt; + } + for (auto &x : memory_cnt) { + printf("%s cnt %lu size %lu\n", x.first.c_str(), x.second.first, x.second.second); + } + TORCH_CHECK(fflush(stdout) == 0, "fflush failed."); +} + +Block* DeviceCachingAllocator::malloc(int device, size_t orig_size, aclrtStream stream) { + std::unique_lock lock(mutex); + if (device == -1) { + TORCH_CHECK(c10_npu::GetDevice(&device) == ACL_ERROR_NONE, "GetDevice failed."); + } + + size_t tensor_forward_end = std::numeric_limits::max(); + size_t tensor_forward_start = recorder.forward_tik; + // Obtain the lifecycle of the current tensor + LifeCycleType lc = recorder.get_lc(orig_size, &tensor_forward_end, &tensor_forward_start); + // Call the current malloc_internal to apply for a block + Block *ret = malloc_internal(device, orig_size, stream, lc, tensor_forward_end, tensor_forward_start); + return ret; +} + +Block* DeviceCachingAllocator::malloc_internal(int device, size_t orig_size, aclrtStream stream, LifeCycleType lc, + size_t tensor_forward_end, size_t tensor_forward_start) { + std::unique_lock lock(mutex); + + if (device == -1) { + TORCH_CHECK(c10_npu::GetDevice(&device) == ACL_ERROR_NONE, "GetDevice failed."); + } + + // process outstanding npuEvents + process_events(); + auto size = round_size(orig_size); + size_t tensor_step_end = std::numeric_limits::max(); + size_t tensor_step_start = malloc_recorder.tik; + // Determine if a tensor has a long lifecycle + if (!is_precise_match && malloc_recorder.predict_long(size, &tensor_step_end, &tensor_step_start)) { + lc = LifeCycleType::LONG_LC; + } + + size_t default_lc_threshold = + static_cast(static_cast(CachingAllocatorConfig::default_lc_threshold())); + if (size <= default_lc_threshold) lc = LifeCycleType::DEFAULT_LC; + + auto pool_list = get_pool_list(size, lc); + size_t alloc_size = 0; + AllocParams params(device, size, stream, pool_list[0], alloc_size, stats); + bool block_found = false; + + while (!block_found) { + BlockPool* pool; + pool_idx = 0; + + for (auto iter_pool : pool_list) { + pool = iter_pool; + alloc_size = get_allocation_size( + size, pool == &long_lc_pools.large_blocks ? LifeCycleType::LONG_LC : LifeCycleType::DEFAULT_LC); + AllocParams _params(device, size, stream, pool, alloc_size, stats); + _params.stat_types = get_stat_types_for_pool(*pool); + + if (CachingAllocatorConfig::open_memory_optimize()) { // When the tensor lifecycle conflicts + block_found = + // Search pool + get_free_block_memory_optimize(_params, tensor_forward_end, tensor_step_end, tensor_forward_start, + tensor_step_start) + // Trigger callbacks and retry search + || (trigger_free_memory_callbacks(_params) && + get_free_block_memory_optimize(_params, tensor_forward_end, tensor_step_end, tensor_forward_start, + tensor_step_start)); + } else { + block_found = + // Search pool + get_free_block(_params) + // Trigger callbacks and retry search + || (trigger_free_memory_callbacks(_params) && get_free_block(_params)); + } + + params = _params; + + if (block_found) { + break; + } + pool_idx++; + } + + if (!block_found) { + pool = pool_list[0]; + alloc_size = get_allocation_size( + size, pool == &long_lc_pools.large_blocks ? LifeCycleType::LONG_LC : LifeCycleType::DEFAULT_LC); + AllocParams _params(device, size, stream, pool, alloc_size, stats); + _params.stat_types = get_stat_types_for_pool(*pool); + + block_found = + // Attempt allocate + alloc_block(_params, false) || + // Free enough available cached blocks to satisfy alloc and retry alloc. + (release_available_cached_blocks(_params) && alloc_block(_params, false)); + params = _params; + } + + pool_idx = 0; + if (!block_found) { + // Prioritize searching in another pool + for (auto pool_it = pool_list.rbegin(); pool_it != pool_list.rend(); ++pool_it) { + pool = *pool_it; + alloc_size = get_allocation_size( + size, pool == &long_lc_pools.large_blocks ? LifeCycleType::LONG_LC : LifeCycleType::DEFAULT_LC); + AllocParams _params(device, size, stream, pool, alloc_size, stats); + _params.stat_types = get_stat_types_for_pool(*pool); + + block_found = + // Search pool + get_free_block_after_alloc(_params) + // Trigger callbacks and retry search + || (trigger_free_memory_callbacks(_params) && get_free_block_after_alloc(_params)); + + params = _params; + + if (block_found) { + break; + } + pool_idx++; + } + } + + // When it is a small tensor, search in a large memory pool to prevent OOM + if (!block_found && size <= kSmallSize) { + pool_list = get_pool_list(kLargeBuffer, lc); + for (auto pool_it = pool_list.begin(); pool_it != pool_list.end(); ++pool_it) { + pool = *pool_it; + alloc_size = get_allocation_size( + size, pool == &long_lc_pools.large_blocks ? LifeCycleType::LONG_LC : LifeCycleType::DEFAULT_LC); + AllocParams _params(device, size, stream, pool, alloc_size, stats); + _params.stat_types = get_stat_types_for_pool(*pool); + + block_found = + // Search pool + get_free_block_after_alloc(_params) + // Trigger callbacks and retry search + || (trigger_free_memory_callbacks(_params) && get_free_block_after_alloc(_params)); + + params = _params; + + if (block_found) { + break; + } + pool_idx++; + } + } + + if (!block_found) { + pool = pool_list[0]; + alloc_size = get_allocation_size( + size, pool == &long_lc_pools.large_blocks ? LifeCycleType::LONG_LC : LifeCycleType::DEFAULT_LC); + + if (pool == &long_lc_pools.large_blocks || pool == &long_lc_pools.small_blocks) { + printf("try long_lc pool fail, size:%lu\n", alloc_size); + } else { + printf("try default_lc pool fail, size:%lu\n", alloc_size); + } + AllocParams _params(device, size, stream, pool, alloc_size, stats); + _params.stat_types = get_stat_types_for_pool(*pool); + block_found = release_cached_blocks_default(true) && release_cached_blocks_long(true) && alloc_block(_params, true); + params = _params; + } + + if (!block_found) { + if (params.err == ACL_ERROR_NONE) { + break; + } + PyGILState_STATE state = PyGILState_Ensure(); + PyObject *pModule = PyImport_ImportModule("mindspeed.core.memory.adaptive_recomputing.swap_manager"); + if (!pModule) { + std::cout << "No MindSpeed Module" << std::endl; + PyGILState_Release(state); + break; + } + PyObject *pFunc1 = PyObject_GetAttrString(pModule, "SwapManager"); + PyObject *pClass = PyObject_CallObject(pFunc1, nullptr); + PyObject *pFunc2 = PyObject_GetAttrString(pClass, "swap_out_by_size"); + + PyObject *pArgs = PyTuple_New(1); + TORCH_CHECK(PyTuple_SetItem(pArgs, 0, PyLong_FromLong(size)) == 0, "PyTuple_SetItem failed."); + + PyObject *pResult = PyObject_CallObject(pFunc2, pArgs); + bool ret = false; + TORCH_CHECK(PyArg_Parse(pResult, "p", &ret), "PyArg_Parse failed."); + PyGILState_Release(state); + if (!ret) { + std::cout << "SWAP Failed" << std::endl; + break; + } + params.err = ACL_ERROR_NONE; + } + } + + if (!block_found) { + if (params.err == ACL_ERROR_RT_MEMORY_ALLOCATION) { + size_t device_free; + size_t device_total; + TORCH_CHECK(aclrtGetMemInfo(ACL_HBM_MEM, &device_free, &device_total) == ACL_ERROR_NONE, "aclrtGetMemInfo failed."); + TORCH_INTERNAL_ASSERT(device_free <= device_total); + + std::string allowed_info; + if (set_fraction) { + allowed_info = format_size(allowed_memory_maximum) + " allowed; "; + } + stats.num_ooms += 1; + print_memory_analysis(); + // "total capacity": total global memory on NPU + // "allowed": memory is allowed to use, which set by fraction. + // "already allocated": memory allocated by the program using the + // caching allocator + // "free": free memory as reported by the NPU API + // "cached": memory held by the allocator but not used by the program + // + // The "allocated" amount does not include memory allocated outside + // of the caching allocator, such as memory allocated by other programs + // or memory held by the driver. + // + // The sum of "allocated" + "free" + "cached" may be less than the + // total capacity due to memory held by the driver and usage by other + // programs. + // + // Note that at this point free_cached_blocks has already returned all + // possible "cached" memory to the driver. The only remaining "cached" + // memory is split from a larger block that is partially in-use. + AT_ERROR("NPU out of memory. Tried to allocate ", format_size(alloc_size), " (NPU ", device, "; ", + format_size(device_total), " total capacity; ", + format_size(stats.allocated_bytes[static_cast(StatType::AGGREGATE)].current), + " already allocated; ", + format_size(stats.active_bytes[static_cast(StatType::AGGREGATE)].current), " current active; ", + format_size(device_free), " free; ", allowed_info, + format_size(stats.reserved_bytes[static_cast(StatType::AGGREGATE)].current), + " reserved in total by PyTorch)", + " If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation."); + } + } + + bool split_remainder = should_split(params.block, params.size()); + return alloc_found_block(std::move(params), orig_size, split_remainder); +} + +Block* DeviceCachingAllocator::alloc_found_block(AllocParams params, size_t orig_size, bool split_remainder) { + auto size = params.size(); + auto device = params.device(); + auto pool = params.pool; + auto stream = params.stream(); + + TORCH_INTERNAL_ASSERT(params.err == ACL_ERROR_NONE && params.block != nullptr && params.block->ptr != nullptr); + Block* block = params.block; + Block* remaining = nullptr; + + const bool already_split = block->is_split(); + if (split_remainder) { + remaining = block; + + block = new Block(device, stream, size, pool, block->ptr); + block->expandable_segment_ = remaining->expandable_segment_; + block->prev = remaining->prev; + if (block->prev) { + block->prev->next = block; + } + block->next = remaining; + + remaining->prev = block; + remaining->ptr = static_cast(remaining->ptr) + size; + remaining->size -= size; + pool->blocks.insert(remaining); + + if (already_split && !block->expandable_segment_) { + // An already-split inactive block is being shrunk by size bytes. + update_stat_array(stats.inactive_split_bytes,-static_cast(block->size), params.stat_types); + } else if (!block->expandable_segment_) { + // A new split inactive block is being created from a previously unsplit + // block, size remaining->size bytes. + for_each_selected_stat_type(params.stat_types, [&](size_t stat_type) { + update_stat(stats.inactive_split_bytes[stat_type], static_cast(remaining->size)); + update_stat(stats.inactive_split[stat_type], 1); + }); + } + } else if (already_split && !block->expandable_segment_) { + // An already-split block is becoming active + for_each_selected_stat_type(params.stat_types, [&](size_t stat_type) { + update_stat(stats.inactive_split_bytes[stat_type], -static_cast(block->size)); + update_stat(stats.inactive_split[stat_type], -1); + }); + } + + block->allocated = true; + block->requested_size = orig_size; + + active_blocks.insert(block); + + for_each_selected_stat_type(params.stat_types, [&](size_t stat_type) { + update_stat(stats.allocation[stat_type], 1); + update_stat(stats.allocated_bytes[stat_type], static_cast(block->size)); + update_stat(stats.active[stat_type], 1); + update_stat(stats.active_bytes[stat_type], static_cast(block->size)); + update_stat(stats.requested_bytes[stat_type], static_cast(block->requested_size)); + }); + + if (block->size >= CachingAllocatorConfig::max_split_size()) update_stat(stats.oversize_allocations, 1); + + ASCEND_LOGD("PTA CachingAllocator malloc: malloc = %zu, cached = %lu, allocated = %lu", block->size, + stats.reserved_bytes[static_cast(StatType::AGGREGATE)].current, + stats.allocated_bytes[static_cast(StatType::AGGREGATE)].current); + + c10::reportMemoryUsageToProfiler(block->ptr, block->size, + stats.allocated_bytes[static_cast(StatType::AGGREGATE)].current, + stats.reserved_bytes[static_cast(StatType::AGGREGATE)].current, + c10::Device(c10::DeviceType::PrivateUse1, block->device)); + + // Record the forward information of the tensor + block->forward_start_tik = recorder.forward_tik++; + recorder.add(block->forward_start_tik, std::numeric_limits::max(), orig_size); + block->orig_size = orig_size; + block->forward_count = recorder.forward_count; + block->in_forward = _check(); + + // Record the step information of the tensor + block->start_tik = malloc_recorder.tik++; + malloc_recorder.add(block->start_tik, std::numeric_limits::max(), size); + block->tensor_size = size; + block->step_count = malloc_recorder.step_count; + block->in_step = malloc_recorder._check(); + + return block; +} + +void DeviceCachingAllocator::free(Block* block) { + std::lock_guard lock(mutex); + + block->allocated = false; + + // Tensor information processing in the step stage + TORCH_INTERNAL_ASSERT(malloc_recorder.step_count >= block->step_count); + unsigned int step_distance = malloc_recorder.step_count - block->step_count; + malloc_recorder.change_end_tik(block->start_tik, malloc_recorder.tik, block->tensor_size, step_distance, + block->in_step); + + // following logic might modifying underlaying Block, causing the size + // changed. We store ahead for reporting + auto orig_block_ptr = block->ptr; + auto orig_block_size = block->size; + + // Tensor information processing in the forward stage + TORCH_INTERNAL_ASSERT(recorder.forward_count >= block->forward_count); + unsigned int forward_distance = recorder.forward_count - block->forward_count; + recorder.change_forward_end_tik(block->forward_start_tik, recorder.forward_tik, block->orig_size, forward_distance, + block->in_forward); + + StatTypes stat_types = get_stat_types_for_pool(*(block->pool)); + for_each_selected_stat_type(stat_types, [&](size_t stat_type) { + update_stat(stats.allocation[stat_type], -1); + update_stat(stats.allocated_bytes[stat_type], -block->size); + }); + if (block->size >= CachingAllocatorConfig::max_split_size()) update_stat(stats.oversize_allocations, -1); + + if (!block->stream_uses.empty() && !shutdown_stats) { + insert_events(block); + } else { + free_block(block); + } + + ASCEND_LOGD("PTA CachingAllocator free: free = %zu, cached = %lu, allocated = %lu", orig_block_size, + stats.reserved_bytes[static_cast(StatType::AGGREGATE)].current, + stats.allocated_bytes[static_cast(StatType::AGGREGATE)].current); + + c10::reportMemoryUsageToProfiler(orig_block_ptr, -orig_block_size, + stats.allocated_bytes[static_cast(StatType::AGGREGATE)].current, + stats.reserved_bytes[static_cast(StatType::AGGREGATE)].current, + c10::Device(c10::DeviceType::PrivateUse1, block->device)); +} + +void* DeviceCachingAllocator::get_base_allocation(Block* block, size_t* outSize) { + std::lock_guard lock(mutex); + while (block->prev) { + block = block->prev; + } + void* basePtr = block->ptr; + if (outSize) { + size_t size = 0; + while (block) { + size += block->size; + block = block->next; + } + *outSize = size; + } + return basePtr; +} + +void DeviceCachingAllocator::record_stream(Block* block, c10_npu::NPUStream stream) { + std::lock_guard lock(mutex); + block->stream_uses.insert(stream); +} + +void DeviceCachingAllocator::erase_stream(Block* block, c10_npu::NPUStream stream) { + std::lock_guard lock(mutex); + block->stream_uses.erase(stream); + + // free block, lazy destory block related events + for (auto it = npu_events[stream].begin(); it != npu_events[stream].end();) { + if (block != it->second) { + it++; + continue; + } + it = npu_events[stream].erase(it); + block->event_count--; + if (block->event_count == 0) { + free_block(block); + break; + } + } +} + +void DeviceCachingAllocator::set_memory_fraction(double fraction) { + size_t device_free; + size_t device_total; + TORCH_CHECK(aclrtGetMemInfo(ACL_HBM_MEM, &device_free, &device_total) == ACL_ERROR_NONE, "acl interface call failed"); + TORCH_INTERNAL_ASSERT(device_free <= device_total); + allowed_memory_maximum = static_cast(fraction * device_total); + set_fraction = true; +} + +void DeviceCachingAllocator::empty_cache(bool check_error) { + std::lock_guard lock(mutex); + TORCH_CHECK(release_cached_blocks(check_error), "release_cached_blocks failed."); +} + +void DeviceCachingAllocator::dev_set_shutdown_stats() { shutdown_stats = true; } + +void DeviceCachingAllocator::cache_info(size_t* total, size_t* largest) { + std::lock_guard lock(mutex); + cache_info_aux(default_lc_pools.large_blocks, total, largest); + cache_info_aux(default_lc_pools.small_blocks, total, largest); + cache_info_aux(long_lc_pools.large_blocks, total, largest); + cache_info_aux(long_lc_pools.small_blocks, total, largest); +} + +DeviceStats DeviceCachingAllocator::get_stats() { + std::lock_guard lock(mutex); + return stats; +} + +void DeviceCachingAllocator::reset_accumulated_stats() { + std::lock_guard lock(mutex); + + for (size_t statType = 0; statType < static_cast(StatType::NUM_TYPES); ++statType) { + reset_accumulated_stat(stats.allocation[statType]); + reset_accumulated_stat(stats.segment[statType]); + reset_accumulated_stat(stats.active[statType]); + reset_accumulated_stat(stats.inactive_split[statType]); + reset_accumulated_stat(stats.allocated_bytes[statType]); + reset_accumulated_stat(stats.reserved_bytes[statType]); + reset_accumulated_stat(stats.active_bytes[statType]); + reset_accumulated_stat(stats.inactive_split_bytes[statType]); + reset_accumulated_stat(stats.requested_bytes[statType]); + } + + stats.num_alloc_retries = 0; + stats.num_ooms = 0; + reset_accumulated_stat(stats.oversize_allocations); + reset_accumulated_stat(stats.oversize_segments); +} + +void DeviceCachingAllocator::reset_peak_stats() { + std::lock_guard lock(mutex); + + for (size_t statType = 0; statType < static_cast(StatType::NUM_TYPES); ++statType) { + reset_peak_stat(stats.allocation[statType]); + reset_peak_stat(stats.segment[statType]); + reset_peak_stat(stats.active[statType]); + reset_peak_stat(stats.inactive_split[statType]); + reset_peak_stat(stats.allocated_bytes[statType]); + reset_peak_stat(stats.reserved_bytes[statType]); + reset_peak_stat(stats.active_bytes[statType]); + reset_peak_stat(stats.inactive_split_bytes[statType]); + reset_peak_stat(stats.requested_bytes[statType]); + } + + reset_peak_stat(stats.oversize_allocations); + reset_peak_stat(stats.oversize_segments); +} + +std::vector DeviceCachingAllocator::snapshot() const { + std::lock_guard lock(mutex); + + std::vector result; + const auto all_blocks = get_all_blocks(); + + for (const Block* const head_block : all_blocks) { + // For expandable segments, we report one segment for each continguous + // mapped range of memory + if (head_block->prev && head_block->prev->mapped) { + continue; + } + result.emplace_back(); + SegmentInfo& segment_info = result.back(); + segment_info.device = head_block->device; + segment_info.address = reinterpret_cast(head_block->ptr); + segment_info.is_large = (!head_block->pool->is_small); + segment_info.is_expandable = head_block->expandable_segment_; +#ifdef MEMORY_RECORDER_DEBUG + segment_info.type = head_block->pool->type; + segment_info.type_str = get_block_pool_str(segment_info.type); +#endif + + const Block* block = head_block; + while (block != nullptr && block->mapped) { + segment_info.blocks.emplace_back(); + BlockInfo& block_info = segment_info.blocks.back(); + + block_info.size = block->size; + block_info.allocated = block->allocated; + block_info.active = block->allocated || (block->event_count > 0); + + segment_info.total_size += block_info.size; + if (block_info.allocated) { + segment_info.allocated_size += block_info.size; + } + if (block_info.active) { + segment_info.active_size += block_info.size; + } + + block = block->next; + } + } + + std::sort(result.begin(), result.end(), + [](const SegmentInfo& a, const SegmentInfo& b) { return a.address < b.address; }); + + return result; +} + +size_t DeviceCachingAllocator::round_size(size_t size) { + if (size < kMinBlockSize) { + return kMinBlockSize; + } else { + return kMinBlockSize * ((size + kMinBlockSize - 1) / kMinBlockSize); + } +} + +std::vector DeviceCachingAllocator::get_all_blocks() const { + std::vector blocks; + const BlockPool* pools[] = { + &long_lc_pools.small_blocks, + &long_lc_pools.large_blocks, + &default_lc_pools.small_blocks, + &default_lc_pools.large_blocks + }; + for (auto pool : pools) { + blocks.insert(blocks.end(), pool->blocks.begin(), pool->blocks.end()); + } + blocks.insert(blocks.end(), active_blocks.begin(), active_blocks.end()); + return blocks; +} + +Block* DeviceCachingAllocator::find_expandable_block(int device, aclrtStream stream, BlockPool* pool, size_t size) { + Block key(device, stream, 0); + + auto allocatable = [](Block* b) { return b && !b->allocated && b->event_count == 0 && b->stream_uses.empty(); }; + auto has_available_address_space = [&](Block* b) { + size_t bytes = 0; + while (bytes < size && allocatable(b)) { + bytes += b->size; + b = b->next; + } + return bytes >= size; + }; + for (auto it = pool->unmapped.lower_bound(&key); it != pool->unmapped.end() && (*it)->stream == stream; ++it) { + Block* c = *it; + // we found the lowest address of an unmapped segment + // but there might be a free segment we can also use + // right before it + if (allocatable(c->prev)) { + c = c->prev; + } + if (has_available_address_space(c)) { + return c; + } + } + auto segment_size = pool->is_small ? kSmallBuffer : kLargeBuffer; + expandable_segments_.emplace_back(new ExpandableSegment(device, stream, segment_size)); + + ExpandableSegment* es = expandable_segments_.back(); + Block* candidate = new Block(device, stream, es->size(), pool, es->ptr()); + candidate->mapped = false; + candidate->expandable_segment_ = es; + pool->unmapped.insert(candidate); + return candidate; +} + +bool DeviceCachingAllocator::map_block(Block* to_map, size_t size) { + TORCH_INTERNAL_ASSERT(!to_map->mapped && size <= to_map->size); + auto mapped_range = to_map->expandable_segment_->map(SegmentRange{to_map->ptr, size}); + // failed to map the memory + if (mapped_range.size == 0) { + return false; + } + TORCH_INTERNAL_ASSERT(mapped_range.ptr == to_map->ptr && mapped_range.size >= size); + + BlockPool& pool = *to_map->pool; + pool.unmapped.erase(to_map); + to_map->mapped = true; + + if (mapped_range.size < to_map->size) { + // to_map -> remaining -> to_map->next(?) + Block* remaining = new Block(to_map->device, to_map->stream, to_map->size - mapped_range.size, &pool, + static_cast(to_map->ptr) + mapped_range.size); + remaining->mapped = false; + remaining->expandable_segment_ = to_map->expandable_segment_; + remaining->splice(to_map, to_map->next); + pool.unmapped.insert(remaining); + to_map->size = mapped_range.size; + } + + TORCH_CHECK(try_merge_blocks(to_map, to_map->prev, pool) >= 0, "try_merge_blocks failed."); + TORCH_CHECK(try_merge_blocks(to_map, to_map->next, pool) >= 0, "try_merge_blocks failed."); + + pool.blocks.insert(to_map); + + // update statistics + total_allocated_memory += mapped_range.size; + StatTypes stat_types = get_stat_types_for_pool(*to_map->pool); + for_each_selected_stat_type( + stat_types, [&](size_t stat_type) { update_stat(stats.reserved_bytes[stat_type], mapped_range.size); }); + + return true; +} + +Block* DeviceCachingAllocator::try_allocate_expandable_block(int device, aclrtStream stream, BlockPool* pool, size_t size) { + Block* candidate = find_expandable_block(device, stream, pool, size); + // Candidate is now a list free/unmapped blocks with at least size room: + // unmapped -> null + // unmapped -> free -> * + // free -> unmapped -> * + + if (!candidate->mapped && !map_block(candidate, std::min(candidate->size, size))) { + return nullptr; + } + TORCH_INTERNAL_ASSERT(candidate->mapped); + + while (candidate->size < size) { + // invariant: free -> unmapped -> * + // map_block will map some of unmapped and merge with free + auto remaining = size - candidate->size; + auto new_candidate = candidate->next; + if (!map_block(new_candidate, std::min(remaining, candidate->next->size))) { + return nullptr; + } + candidate = new_candidate; + } + pool->blocks.erase(candidate); + return candidate; +} + +void DeviceCachingAllocator::free_block(Block* block) { + AT_ASSERT(!block->allocated && block->event_count == 0); + + size_t original_block_size = block->size; + size_t requested_size = block->requested_size; + + auto& pool = *block->pool; + int64_t net_change_inactive_split_blocks = 0; + int64_t net_change_inactive_split_size = 0; + + const std::array merge_candidates = {block->prev, block->next}; + for (Block* merge_candidate : merge_candidates) { + const int64_t subsumed_size = static_cast(try_merge_blocks(block, merge_candidate, pool)); + if (subsumed_size > 0) { + net_change_inactive_split_blocks -= 1; + net_change_inactive_split_size -= subsumed_size; + } + } + + active_blocks.erase(block); + pool.blocks.insert(block); + + if (block->is_split()) { + net_change_inactive_split_blocks += 1; + net_change_inactive_split_size += static_cast(block->size); + } + + StatTypes stat_types = get_stat_types_for_pool(pool); + for_each_selected_stat_type(stat_types, [&](size_t stat_type) { + // inactive_split tries to capture the idea that blocks + // cannot be freed when requested, but fully free pages + // of expandable blocks can always be freed. + // The logic to track this as statistic is pretty involved, + // so we simply just exclude expandable segements from + // inactive_split + if (!block->expandable_segment_) { + update_stat(stats.inactive_split[stat_type], net_change_inactive_split_blocks); + update_stat(stats.inactive_split_bytes[stat_type], net_change_inactive_split_size); + } + update_stat(stats.active[stat_type], -1); + update_stat(stats.active_bytes[stat_type], -original_block_size); + update_stat(stats.requested_bytes[stat_type], -static_cast(requested_size)); + }); +} + +size_t DeviceCachingAllocator::try_merge_blocks(Block* dst, Block* src, BlockPool& pool) { + if (!src || src->allocated || src->event_count > 0 || !src->stream_uses.empty() || dst->mapped != src->mapped) { + return 0; + } + + AT_ASSERT(dst->is_split() && src->is_split()); + + if (dst->prev == src) { + dst->ptr = src->ptr; + dst->prev = src->prev; + if (dst->prev) { + dst->prev->next = dst; + } + } else { + dst->next = src->next; + if (dst->next) { + dst->next->prev = dst; + } + } + + const size_t subsumed_size = src->size; + dst->size += subsumed_size; + auto erased = src->mapped ? pool.blocks.erase(src) : pool.unmapped.erase(src); + delete src; + src = nullptr; + + return subsumed_size; +} + +DeviceCachingAllocator::LcPool& DeviceCachingAllocator::get_lc_pool(LifeCycleType lc) { + switch (lc) { + case LifeCycleType::DEFAULT_LC: + return default_lc_pools; + case LifeCycleType::LONG_LC: + case LifeCycleType::FIRST_STEP_LC: + return long_lc_pools; + } + AT_ASSERT(0); +} + +BlockPool& DeviceCachingAllocator::get_pool(size_t size, LifeCycleType lc) { + LcPool& pool = get_lc_pool(lc); + if (size <= kSmallSize) { + return pool.small_blocks; + } else { + return pool.large_blocks; + } +} + +std::vector DeviceCachingAllocator::get_pool_list(size_t size, LifeCycleType lc) { + std::vector pool_list = {&default_lc_pools, &long_lc_pools}; + LcPool& lc_pool = get_lc_pool(lc); + int idx = 0; + for (auto& x : pool_list) { + if (x == &lc_pool) break; + ++idx; + } + AT_ASSERT(idx < pool_list.size()); + std::vector target_pool_list; + for (int i = 0; i < (int)pool_list.size(); i++) { + target_pool_list.push_back(pool_list[(idx + i) % pool_list.size()]); + } + std::vector target_list; + for (auto& x : target_pool_list) { + target_list.push_back(size <= kSmallSize ? &x->small_blocks : &x->large_blocks); + } + return target_list; +} + +bool DeviceCachingAllocator::should_split(const Block* block, size_t size) { + TORCH_INTERNAL_ASSERT(block->size >= size); + size_t remaining = block->size - size; + if (block->pool->is_small || CachingAllocatorConfig::expandable_segments()) { + return remaining >= kMinBlockSize; + } else { + return (size < CachingAllocatorConfig::max_split_size()) && (remaining > kSmallSize); + } +} + +StatTypes DeviceCachingAllocator::get_stat_types_for_pool(const BlockPool& pool) { + StatTypes stat_types = {false}; + stat_types[static_cast(StatType::AGGREGATE)] = true; + stat_types[static_cast(pool.is_small ? StatType::SMALL_POOL : StatType::LARGE_POOL)] = true; + return stat_types; +} + +size_t DeviceCachingAllocator::get_allocation_size(size_t size, LifeCycleType lc) { + if (lc == LifeCycleType::LONG_LC) { + if (size <= kSmallSize) { + return kSmallBuffer; + } else { + return kRoundLarge * ((size + kRoundLarge - 1) / kRoundLarge); + } + } + if (size <= kSmallSize) { + return kSmallBuffer; + } else if (size < kMinLargeAlloc) { + return kLargeBuffer; + } else { + return kRoundLarge * ((size + kRoundLarge - 1) / kRoundLarge); + } +} + +bool DeviceCachingAllocator::get_free_block(AllocParams& p) { + BlockPool& pool = *p.pool; + + if (C10_UNLIKELY(set_fraction && CachingAllocatorConfig::garbage_collection_threshold() > 0.0)) { + // Track block reuse interval only when garbage collection is enabled. + for (auto& b : pool.blocks) { + ++b->gc_count; + } + } + auto it = pool.blocks.lower_bound(&p.search_key); + if (it == pool.blocks.end() || (*it)->stream != p.stream()) { + return false; + } + + if ((*it)->expandable_segment_) { + if (CachingAllocatorConfig::expandable_segments()) { + // if we are allocated to the part of the block that is expandable + // for the purposes of "best fit" we consider its size to be the size it + // can expand to, not the size it currently is. This means that we + // sometimes have to search for blocks with bigger 'size' before + // choosing this segment. + auto expandable_size = [](Block* b) { return b->size + (b->next && !b->next->mapped ? b->next->size : 0); }; + auto next = it; + next++; + while ((*it)->expandable_segment_ && next != pool.blocks.end() && (*next)->stream == p.stream() && + expandable_size(*next) < expandable_size(*it)) { + it = next++; + } + } else { + // Rarely expandable segments has been turned off after we have + // already allocated some blocks as expandable. For instance, + // since we cannot share expandable memory via IPC, someone might + // temporarily disable it. In this case we need to honor this request + // by only finding non-expandable blocks + do { + it++; + } while (it != pool.blocks.end() && (*it)->expandable_segment_ && (*it)->stream == p.stream()); + if (it == pool.blocks.end() || (*it)->stream != p.stream()) { + return false; + } + } + } + + // Do not return an oversized block for a large request + if ((p.size() < CachingAllocatorConfig::max_split_size()) && + ((*it)->size >= CachingAllocatorConfig::max_split_size())) { + return false; + } + // Allow oversized block size to be rounded up but within a limit + if ((p.size() >= CachingAllocatorConfig::max_split_size()) && ((*it)->size >= p.size() + kLargeBuffer)) { + return false; + } + // Short lifecycle tensor. In the short lifecycle memory pool, when in a non forward phase, + // to prevent large blocks from being occupied by small tensors and causing excessive fragmentation, myMaxSplitSize and + // kLargeBuffer are used as restrictions.Overall, the purpose of this section is to prevent the + // generation of large fragments. + if (!pool_idx && &pool == &default_lc_pools.large_blocks && (*it)->size >= myMaxSplitSize && + (*it)->size - p.size() >= kLargeBuffer && !_check()) { + return false; + } + // Short lifecycle tensor, not allowed to be placed in the long lifecycle memory pool in the forward phase + // In the non forward stage, when a long lifecycle block is idle, + // it can store a short lifecycle tensor to improve memory reuse rate. + if (&pool == &long_lc_pools.large_blocks && pool_idx && _check()) return false; + // Long lifecycle tensor can only be placed in the long lifecycle memory pool and must achieve zero fragmentation. + if (&pool == &long_lc_pools.large_blocks && !pool_idx) { + if (p.alloc_size != (*it)->size || (*it)->prev || (*it)->next) { + return false; + } + } + // Long lifecycle tensor, not allowed to be placed in short lifecycle memory pool + if (&pool == &default_lc_pools.large_blocks && pool_idx) { + return false; + } + p.block = *it; + (*it)->gc_count = 0; // Denote this block has been used + pool.blocks.erase(it); + return true; +} + +bool DeviceCachingAllocator::get_free_block_memory_optimize(AllocParams &p, size_t tensor_forward_end, + size_t tensor_step_end, size_t tensor_forward_start, + size_t tensor_step_start) { + BlockPool& pool = *p.pool; + if (C10_UNLIKELY(set_fraction && CachingAllocatorConfig::garbage_collection_threshold() > 0.0)) { + // Track block reuse interval only when garbage collection is enabled. + for (auto& b : pool.blocks) { + ++b->gc_count; + } + } + auto it = pool.blocks.lower_bound(&p.search_key); + bool flag = false; + + for (int i = 1; i <= DeviceCachingAllocator::prevent_memory_conflict_num; i++) { + if (it == pool.blocks.end() || (*it)->stream != p.stream()) return false; + // Do not return an oversized block for a large request + if ((p.size() < CachingAllocatorConfig::max_split_size()) && + ((*it)->size >= CachingAllocatorConfig::max_split_size())) + return false; + // Allow oversized block size to be rounded up but within a limit + if ((p.size() >= CachingAllocatorConfig::max_split_size()) && ((*it)->size >= p.size() + kLargeBuffer)) + return false; + if (!pool_idx && &pool == &default_lc_pools.large_blocks && (*it)->size >= myMaxSplitSize && + (*it)->size - p.size() >= kLargeBuffer && !_check()) { + return false; + } + if (&pool == &long_lc_pools.large_blocks && pool_idx && _check()) return false; + if (&pool == &long_lc_pools.large_blocks && !pool_idx) { + if (p.alloc_size != (*it)->size || (*it)->prev || (*it)->next) { + return false; + } + } + if (&pool == &default_lc_pools.large_blocks && pool_idx) { + return false; + } + + // Add up the allocs before and after the block + size_t seg_size = (*it)->size; + Block *befor_block = (*it)->prev; + Block *next_block = (*it)->next; + // look up from the front + while (befor_block) { + seg_size += befor_block->size; + befor_block = befor_block->prev; + } + // look up from the end + while (next_block) { + seg_size += next_block->size; + next_block = next_block->next; + } + + // p.size() --> round_size、(*it)->orig_size时origin大小、(*it)->size时alloc大小 + if (seg_size >= p.size() + kSizeLimit) { + // Determine if there are size blocks in the step within the lifecycle of the tensor. + // If there are, it indicates that there will be block sized tensors generated within the lifecycle of the tensor + bool is_tensor_in_step = malloc_recorder.has_tensor_in_step(tensor_step_start, tensor_step_end, seg_size); + if (is_tensor_in_step) { + // Find the last block of the iterator + while (it != pool.blocks.end()) { + it++; + if (it == pool.blocks.end()) { + return false; + } + if ((*it)->device == p.search_key.device && (*it)->stream == p.search_key.stream && + p.search_key.size <= (*it)->size) { + break; + } + } + continue; + } else { + flag = true; + break; + } + } + } + if (flag) { + p.block = *it; + pool.blocks.erase(it); + return true; + } else { + return false; + } +} + +bool DeviceCachingAllocator::get_free_block_after_alloc(AllocParams &p) { + BlockPool& pool = *p.pool; + if (C10_UNLIKELY(set_fraction && CachingAllocatorConfig::garbage_collection_threshold() > 0.0)) { + // Track block reuse interval only when garbage collection is enabled. + for (auto& b : pool.blocks) { + ++b->gc_count; + } + } + auto it = pool.blocks.lower_bound(&p.search_key); + if (it == pool.blocks.end() || (*it)->stream != p.stream()) return false; + // Do not return an oversized block for a large request + if ((p.size() < CachingAllocatorConfig::max_split_size()) && + ((*it)->size >= CachingAllocatorConfig::max_split_size())) + return false; + // Allow oversized block size to be rounded up but within a limit + if ((p.size() >= CachingAllocatorConfig::max_split_size()) && ((*it)->size >= p.size() + kLargeBuffer)) return false; + + // Forward stage, short lifecycle tensor, cannot be placed in long lifecycle memory pool to prevent tensor conflicts + if (&pool == &long_lc_pools.large_blocks) { + if (pool_idx == 0 && _check()) return false; + } + p.block = *it; + (*it)->gc_count = 0; // Denote this block has been used + pool.blocks.erase(it); + return true; +} + +bool DeviceCachingAllocator::trigger_free_memory_callbacks(AllocParams& p) { + bool freed_memory = false; + return freed_memory; +} + +void DeviceCachingAllocator::garbage_collect_cached_blocks() { + // Free unused cached blocks to reclaim NPU memory. + // Unlike release_cached_blocks(), this does not enforce synchronization and + // therefore should be of less overheads. + + size_t gc_threshold = + static_cast(CachingAllocatorConfig::garbage_collection_threshold() * allowed_memory_maximum); + // No need to trigger GC yet + if (total_allocated_memory <= gc_threshold) { + return; + } + const auto target_size = total_allocated_memory - gc_threshold; + size_t gc_reclaimed = 0; + + // Calculate the total age of the free-able blocks. We'll use it later to + // get "avg age" threshold. + double total_age = 0.0; + int freeable_block_count = 0; + const BlockPool* pools[] = {&long_lc_pools.large_blocks, &default_lc_pools.large_blocks}; + for (auto pool : pools) { + for (auto& b : pool->blocks) { + if (!b->is_split()) { + total_age += b->gc_count; + ++freeable_block_count; + } + } + } + // No free-able blocks? + if (freeable_block_count == 0) { + return; + } + + TORCH_CHECK(c10_npu::npuSynchronizeDevice(true), "npuSynchronizeDevice failed."); + + // Repeat GC until we reach reclaim > target size. + bool block_freed = true; + while (gc_reclaimed < target_size && block_freed && freeable_block_count > 0) { + // Free blocks exceeding this age threshold first. + double age_threshold = total_age / freeable_block_count; + // Stop iteration if we can no longer free a block. + block_freed = false; + + // Free blocks of > avg age. Don't stop upon reaching the target_size, + // we don't want this GC to be triggered frequently. + for (auto pool : pools) { + auto it = pool->blocks.begin(); + while (it != pool->blocks.end()) { + Block* block = *it; + ++it; + if (!block->is_split() && block->gc_count >= age_threshold) { + block_freed = true; + gc_reclaimed += block->size; + total_age -= block->gc_count; // Decrement the age + freeable_block_count--; // One less block that can be freed + release_block(block); + + ASCEND_LOGD("PTA CachingAllocator gc: free = %zu, cached = %lu, allocated = %lu", block->size, + stats.reserved_bytes[static_cast(StatType::AGGREGATE)].current, + stats.allocated_bytes[static_cast(StatType::AGGREGATE)].current); + } + } + } + } +} + +bool DeviceCachingAllocator::alloc_block(AllocParams& p, bool isRetry) { + size_t size = p.alloc_size; + void* ptr = nullptr; + + // In order to prevent the failure of aclrtMalloc_wrapper from consuming a lot of time, prediction is made in advance + static size_t usable_total = 0; + if (usable_total && + alr_total_size + size + DeviceCachingAllocator::memory_fail_prejudgment > usable_total && !isRetry) { + return false; + } + + if (isRetry) { + stats.num_alloc_retries += 1; + } + + if (set_fraction && total_allocated_memory + size > allowed_memory_maximum) { + p.err = ACL_ERROR_RT_MEMORY_ALLOCATION; + } else if (CachingAllocatorConfig::expandable_segments()) { + p.block = try_allocate_expandable_block(p.device(), p.stream(), p.pool, p.size()); + if (p.block) { + p.err = ACL_ERROR_NONE; + } else { + p.err = ACL_ERROR_RT_MEMORY_ALLOCATION; + } + return bool(p.block); + } else { + p.err = aclrtMalloc_wrapper(&ptr, size, aclrtMemMallocPolicy::ACL_MEM_MALLOC_HUGE_FIRST); + size_t device_free; + size_t device_total; + TORCH_CHECK(aclrtGetMemInfo(ACL_HBM_MEM, &device_free, &device_total) == ACL_ERROR_NONE, \ + "Error, failed to get memory info"); + TORCH_INTERNAL_ASSERT(device_free <= device_total); + usable_total = alr_total_size + device_free; + ASCEND_LOGD("pytorch-change code, reserved:%lu, free:%lu, reserved+free:%lu (after aclrtmalloc)\n", alr_total_size, + device_free, alr_total_size + device_free); + } + + if (p.err != ACL_ERROR_NONE) { + return false; + } + + total_allocated_memory += size; + p.block = new Block(p.device(), p.stream(), size, p.pool, (char*)ptr); + for_each_selected_stat_type(p.stat_types, [&](size_t stat_type) { + update_stat(stats.segment[stat_type], 1); + update_stat(stats.reserved_bytes[stat_type], size); + }); + if (size >= CachingAllocatorConfig::max_split_size()) update_stat(stats.oversize_segments, 1); + ASCEND_LOGD("pta_memory acl_malloc: malloc = %zu, ret = %d", size, p.err); + + return (p.block != nullptr); +} + +bool DeviceCachingAllocator::release_available_cached_blocks(const AllocParams& p) { + if (CachingAllocatorConfig::max_split_size() == std::numeric_limits::max()) { + return false; + } + BlockPool &pool = *p.pool; + Block key = p.search_key; + key.size = + (key.size < CachingAllocatorConfig::max_split_size()) ? CachingAllocatorConfig::max_split_size() : key.size; + auto it = pool.blocks.lower_bound(&key); + + TORCH_CHECK(c10_npu::npuSynchronizeDevice(true), "npuSynchronizeDevice failed."); + + if (it == pool.blocks.end() || (*it)->stream != p.stream()) { + // No single block is large enough; free multiple oversize blocks, starting with the largest + if (it == pool.blocks.begin()) { + return false; + } + size_t totalReleased = 0; + // Back up one item. Now on the largest block for the correct stream + --it; + while ((totalReleased < key.size) && ((*it)->size >= CachingAllocatorConfig::max_split_size()) && + ((*it)->stream == p.stream())) { + auto cur = it; + totalReleased += (*it)->size; + if (it != pool.blocks.begin()) { + --it; + release_block(*cur); + } else { + release_block(*cur); + break; + } + } + if (totalReleased < key.size) { + return false; + } + } else { + release_block(*it); + } + return true; +} + +bool DeviceCachingAllocator::release_cached_blocks(bool check_error) { + // First ensure that all blocks that can't currently be allocated due to + // outstanding events are returned to the pool. + synchronize_and_free_events(check_error); + + // Free all non-split cached blocks + TORCH_CHECK(c10_npu::npuSynchronizeDevice(check_error), "npuSynchronizeDevice failed."); + release_blocks(long_lc_pools.large_blocks); + release_blocks(long_lc_pools.small_blocks); + release_blocks(default_lc_pools.large_blocks); + release_blocks(default_lc_pools.small_blocks); + + return true; +} + +void DeviceCachingAllocator::release_expandable_segment(Block* block) { + TORCH_INTERNAL_ASSERT(block->size == block->expandable_segment_->size(), "block disagrees with segment"); + TORCH_INTERNAL_ASSERT(!block->mapped); + auto it = std::find(expandable_segments_.begin(), expandable_segments_.end(), block->expandable_segment_); + TORCH_INTERNAL_ASSERT(it != expandable_segments_.end()); + expandable_segments_.erase(it); + block->pool->unmapped.erase(block); + delete block->expandable_segment_; + block->expandable_segment_ = nullptr; + delete block; + block = nullptr; +} + +bool DeviceCachingAllocator::release_cached_blocks_long(bool check_error) { + // First ensure that all blocks that can't currently be allocated due to + // outstanding events are returned to the pool. + synchronize_and_free_events(check_error); + + // Free all non-split cached blocks + TORCH_CHECK(c10_npu::npuSynchronizeDevice(check_error), "npuSynchronizeDevice failed."); + release_blocks(long_lc_pools.large_blocks); + release_blocks(long_lc_pools.small_blocks); + + return true; +} + +bool DeviceCachingAllocator::release_cached_blocks_default(bool check_error) { + // First ensure that all blocks that can't currently be allocated due to + // outstanding events are returned to the pool. + synchronize_and_free_events(check_error); + + // Free all non-split cached blocks + TORCH_CHECK(c10_npu::npuSynchronizeDevice(check_error), "npuSynchronizeDevice failed."); + release_blocks(default_lc_pools.large_blocks); + release_blocks(default_lc_pools.small_blocks); + + return true; +} + +void DeviceCachingAllocator::release_block(Block* block) { + TORCH_INTERNAL_ASSERT(!block->expandable_segment_); + TORCH_CHECK(aclrtFree_wrapper((void*)block->ptr) == ACL_ERROR_NONE, "aclrtFree_wrapper failed."); + total_allocated_memory -= block->size; + + auto* pool = block->pool; + + StatTypes stat_types = get_stat_types_for_pool(*pool); + for_each_selected_stat_type(stat_types, [&](size_t stat_type) { + update_stat(stats.segment[stat_type], -1); + update_stat(stats.reserved_bytes[stat_type], -block->size); + }); + + if (block->size >= CachingAllocatorConfig::max_split_size()) update_stat(stats.oversize_segments, -1); + + ASCEND_LOGD("pta_memory acl_free: free_size = %zu", block->size); + + pool->blocks.erase(block); + delete block; + block = nullptr; +} + +void DeviceCachingAllocator::unmap_block(Block* block) { + auto unmapped = block->expandable_segment_->unmap(SegmentRange{block->ptr, block->size}); + if (unmapped.size == 0) { + return; + } + block->pool->blocks.erase(block); + + ptrdiff_t before_size = static_cast(unmapped.ptr) - static_cast(block->ptr); + if (before_size > 0) { + // prev? -> before_free -> block + Block* before_free = new Block(block->device, block->stream, before_size, block->pool, block->ptr); + before_free->expandable_segment_ = block->expandable_segment_; + before_free->splice(block->prev, block); + block->pool->blocks.insert(before_free); + } + + TORCH_CHECK(block->size >= before_size + unmapped.size, "after size should be greater than or equal to 0"); + auto after_size = block->size - (before_size + unmapped.size); + if (after_size > 0) { + // block -> after_free -> next? + Block* after_free = new Block(block->device, block->stream, after_size, block->pool, + static_cast(unmapped.ptr) + unmapped.size); + after_free->expandable_segment_ = block->expandable_segment_; + after_free->splice(block, block->next); + block->pool->blocks.insert(after_free); + } + + block->ptr = unmapped.ptr; + block->size = unmapped.size; + block->mapped = false; + + TORCH_CHECK(try_merge_blocks(block, block->prev, *block->pool) >= 0, "try_merge_blocks failed."); + TORCH_CHECK(try_merge_blocks(block, block->next, *block->pool) >= 0, "try_merge_blocks failed."); + block->pool->unmapped.insert(block); + + // update statistics + total_allocated_memory -= unmapped.size; + StatTypes stat_types = get_stat_types_for_pool(*block->pool); + for_each_selected_stat_type(stat_types, + [&](size_t stat_type) { update_stat(stats.reserved_bytes[stat_type], -unmapped.size); }); +} + +void DeviceCachingAllocator::release_blocks(BlockPool& pool) { + std::vector to_unmap; + // Frees all non-split blocks + auto it = pool.blocks.begin(); + while (it != pool.blocks.end()) { + Block *block = *it; + ++it; + if (block->expandable_segment_) { + // unmapping will mutate the free pool + // so just gather what needs to be freed + // to avoid invalidating the iterator + to_unmap.push_back(block); + } else if (!block->prev && !block->next) { + release_block(block); + } + } + for (Block* block : to_unmap) { + unmap_block(block); + if (!block->prev && !block->next) { + release_expandable_segment(block); + } + } +} + +EventPool::Event DeviceCachingAllocator::create_event_internal(int idx) { + // Leak the event pool to avoid shutdown issues. + static auto* event_pool = new EventPool(); + return event_pool->get(idx); +} + +void DeviceCachingAllocator::synchronize_and_free_events(bool check_error) { + // Synchronize on outstanding events and then free associated blocks. + for (auto& st : npu_events) { + for (auto& e : st.second) { + EventPool::Event event = std::move(e.first); + Block* block = e.second; + + if (check_error) { + TORCH_CHECK(aclrtSynchronizeEvent(*event) == ACL_ERROR_NONE, "acl interface call failed"); + } else { + TORCH_CHECK(aclrtSynchronizeEvent(*event) == ACL_ERROR_NONE, "acl interface call failed"); + } + ASCEND_LOGI("Event: aclrtSynchronizeEvent is successfully executed."); + + block->event_count--; + if (block->event_count == 0) { + free_block(block); + } + } + } + + npu_events.clear(); +} + +void DeviceCachingAllocator::insert_events(Block* block) { + aclrtContext compiler_ctx = aclrtContext(); + aclError ret_ctx = aclrtGetCurrentContext(&compiler_ctx); + + stream_set streams(std::move(block->stream_uses)); + AT_ASSERT(block->stream_uses.empty()); + for (auto& stream : streams) { + TORCH_CHECK(c10_npu::SetDevice(stream.device_index()) == ACL_ERROR_NONE, "SetDevice failed."); + + EventPool::Event event = create_event_internal(stream.device_index()); + event->record(stream); + ASCEND_LOGI("Event: record DeviceAllocator is successfully executed."); + + block->event_count++; + npu_events[stream].emplace_back(std::move(event), block); + } + if (ret_ctx == ACL_ERROR_NONE) { + TORCH_CHECK(aclrtSetCurrentContext(compiler_ctx) == ACL_ERROR_NONE, "aclrtSetCurrentContext failed."); + } +} + +void DeviceCachingAllocator::process_events() { + // Process outstanding npuEvents. Events that are completed are removed + // from the queue, and the 'event_count' for the corresponding allocation + // is decremented. Stops at the first event which has not been completed. + // Since events on different devices or streams may occur out of order, + // the processing of some events may be delayed. + for (auto it = npu_events.begin(); it != npu_events.end();) { + while (!it->second.empty()) { + auto& e = it->second.front(); + EventPool::Event event = std::move(e.first); + Block* block = e.second; + + if (!event->query()) { + e.first = std::move(event); + break; + } + + block->event_count--; + if (block->event_count == 0) { + free_block(block); + } + it->second.pop_front(); + } + + if (it->second.empty()) { + it = npu_events.erase(it); + } else { + it++; + } + } +} + +void DeviceCachingAllocator::cache_info_aux(BlockPool& blocks, size_t* total, size_t* largest) { + for (auto it = blocks.blocks.begin(); it != blocks.blocks.end(); ++it) { + size_t blocksize = (*it)->size; + *total += blocksize; + if (blocksize > *largest) { + *largest = blocksize; + } + } +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/DeviceCachingAllocator.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/DeviceCachingAllocator.h new file mode 100644 index 000000000..57d1b666b --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/DeviceCachingAllocator.h @@ -0,0 +1,174 @@ +#ifndef PLUGGABLEALLOCATOR_DEVICECACHINGALLOCATOR_H +#define PLUGGABLEALLOCATOR_DEVICECACHINGALLOCATOR_H + +#include "common.h" +#include "EventPool.h" +#include "CachingAllocatorConfig.h" +#include "Recorder.h" + +class DeviceCachingAllocator { +private: + // lock around all operations + mutable std::recursive_mutex mutex; + + // device statistics + DeviceStats stats; + + struct LcPool { + BlockPool large_blocks; + BlockPool small_blocks; + LcPool(BlockPoolType type) : large_blocks(false, type), small_blocks(true, type) {} + } long_lc_pools, default_lc_pools; + + MemoryRecorder recorder; + + MallocRecorder malloc_recorder; + + // allocated or in use by a stream + ska::flat_hash_set active_blocks; + + // outstanding acl events + ska::flat_hash_map>> npu_events; + + // record used memory. + size_t total_allocated_memory = 0; + + // record maximum allowed memory. + size_t allowed_memory_maximum = 0; + + // all live expandable segments + std::vector expandable_segments_; + + bool set_fraction = false; + + // whether shutdown. + bool shutdown_stats = false; + +public: + // The last three blocks receive the tensor to prevent memory conflicts. + static constexpr int prevent_memory_conflict_num = 3; + // A pre-judgment is made for the memory decrease caused by too many malloc failures. + static constexpr int memory_fail_prejudgment = 209715200; + + DeviceCachingAllocator(); + + void print_memory_analysis(); + + // All public methods (except the above) acquire the allocator mutex. + // Thus, do not call a public method from another public method. + + Block* malloc(int device, size_t orig_size, aclrtStream stream); + + Block* malloc_internal(int device, size_t orig_size, aclrtStream stream, LifeCycleType lc, size_t tensor_forward_end, + size_t tensor_forward_start); + + Block* alloc_found_block(AllocParams params, size_t orig_size, bool split_remainder); + + void free(Block* block); + + void* get_base_allocation(Block* block, size_t* outSize); + + void record_stream(Block* block, c10_npu::NPUStream stream); + + void erase_stream(Block* block, c10_npu::NPUStream stream); + + /** set memory fraction to limit maximum allocated memory **/ + void set_memory_fraction(double fraction); + + /** returns cached blocks to the system allocator **/ + void empty_cache(bool check_error); + + void dev_set_shutdown_stats(); + + /** Retrieves info (total size + largest block) of the memory cache **/ + void cache_info(size_t* total, size_t* largest); + + /** Returns a copy of the memory allocator stats **/ + DeviceStats get_stats(); + + /** Resets the historical accumulation stats for the device **/ + void reset_accumulated_stats(); + + /** Resets the historical peak stats for the device **/ + void reset_peak_stats(); + + /** Dump a complete snapshot of the memory held by the allocator. Potentially VERY expensive. **/ + std::vector snapshot() const; + + static size_t round_size(size_t size); + +private: + // All private methods do not acquire the allocator mutex. + + std::vector get_all_blocks() const; + + // returns the smallest possible address in any segment + // where there is enough free address space to fit size + // may be composed of free and unmapped segments + Block* find_expandable_block(int device, aclrtStream stream, BlockPool* pool, size_t size); + + bool map_block(Block* to_map, size_t size); + + Block* try_allocate_expandable_block(int device, aclrtStream stream, BlockPool* pool, size_t size); + + /** moves a block into a pool of cached free blocks **/ + void free_block(Block* block); + + /** combine previously split blocks. returns the size of the subsumed block, or 0 on failure. **/ + size_t try_merge_blocks(Block* dst, Block* src, BlockPool& pool); + + LcPool& get_lc_pool(LifeCycleType lc); + + BlockPool& get_pool(size_t size, LifeCycleType lc); + + std::vector get_pool_list(size_t size, LifeCycleType lc); + + StatTypes get_stat_types_for_pool(const BlockPool& pool); + + bool should_split(const Block* block, size_t size); + + static size_t get_allocation_size(size_t size, LifeCycleType lc = LifeCycleType::DEFAULT_LC); + + bool get_free_block(AllocParams& p); + + bool get_free_block_memory_optimize(AllocParams &p, size_t tensor_forward_end, size_t tensor_step_end, + size_t tensor_forward_start, size_t tensor_step_start); + + bool get_free_block_after_alloc(AllocParams& p); + + bool trigger_free_memory_callbacks(AllocParams& p); + + void garbage_collect_cached_blocks(); + + bool alloc_block(AllocParams& p, bool isRetry); + + /** Free one or more oversize blocks to the system allocator. But only enough to satisfy the target size **/ + bool release_available_cached_blocks(const AllocParams& p); + + bool release_cached_blocks(bool check_error); + + void release_expandable_segment(Block* block); + + bool release_cached_blocks_long(bool check_error); + + bool release_cached_blocks_default(bool check_error); + + void release_block(Block* block); + + void unmap_block(Block* block); + + void release_blocks(BlockPool& pool); + + EventPool::Event create_event_internal(int idx); + + void synchronize_and_free_events(bool check_error); + + void insert_events(Block* block); + + void process_events(); + + // Accumulates sizes of all memory blocks for given device in given pool + void cache_info_aux(BlockPool& blocks, size_t* total, size_t* largest); +}; + +#endif diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/EventPool.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/EventPool.cpp new file mode 100644 index 000000000..258238993 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/EventPool.cpp @@ -0,0 +1,31 @@ +#include "EventPool.h" + +EventPool::Event EventPool::get(int device) { + TORCH_INTERNAL_ASSERT(0 <= device && static_cast(device) < pools_.size(), + "device index out of range"); + auto& pool = pools_[device]; + auto destructor = [&pool](c10_npu::NPUEvent* event) { + std::lock_guard g(pool.mutex_); + pool.event_pool_.push_back(std::unique_ptr(event)); + }; + + // Try to acquire an event from the per-device pool. + { + std::lock_guard g(pool.mutex_); + if (!pool.event_pool_.empty()) { + auto* event = pool.event_pool_.back().release(); + pool.event_pool_.pop_back(); + return Event(event, destructor); + } + } + // otherwise, allocate a new event that will be returned to the pool on + // destruction. + return Event(std::make_unique(ACL_EVENT_CAPTURE_STREAM_PROGRESS).release(), destructor); +} + +void EventPool::empty_cache() { + for (auto &pool : pools_) { + std::lock_guard g(pool.mutex_); + pool.event_pool_.clear(); + } +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/EventPool.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/EventPool.h new file mode 100644 index 000000000..bbece3d9c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/EventPool.h @@ -0,0 +1,27 @@ +#ifndef PLUGGABLEALLOCATOR_EVENTPOOL_H +#define PLUGGABLEALLOCATOR_EVENTPOOL_H + +#include +#include + +#include "common.h" + +class EventPool { +public: + using Event = std::unique_ptr>; + // Explicit device count + EventPool() : pools_(c10_npu::device_count()) {} + + Event get(int device); + + void empty_cache(); + +private: + struct PerDevicePool { + alignas(64) std::mutex mutex_; + std::vector> event_pool_; + }; + std::vector pools_; +}; + +#endif diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/PluggableAllocator.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/PluggableAllocator.cpp new file mode 100644 index 000000000..241864857 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/PluggableAllocator.cpp @@ -0,0 +1,233 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "PluggableAllocator.h" + +void local_raw_delete(void *ptr) { + PluggableAllocator::getInstance().free(ptr); +} + +void PluggableAllocator::add_allocated_block(Block *block) { + std::lock_guard lock(mutex); + allocated_blocks[block->ptr] = block; +} + +std::mutex *PluggableAllocator::getFreeMutex() const { + return &npu_free_mutex; +} + +Block *PluggableAllocator::get_allocated_block(void *ptr, bool remove) { + std::lock_guard lock(mutex); + auto it = allocated_blocks.find(ptr); + if (it == allocated_blocks.end()) { + return nullptr; + } + Block *block = it->second; + if (remove) { + allocated_blocks.erase(it); + } + return block; +} + +void PluggableAllocator::init(int device_count) { + int max_device_count = 1000000; + TORCH_INTERNAL_ASSERT(device_count < max_device_count, "Error, out of maximum device"); + int size = static_cast(device_allocator.size()); + if (size < device_count) { + device_allocator.resize(device_count); + for (const auto i: c10::irange(size, device_count)) { + device_allocator[i] = std::make_unique(); + } + } +} + +bool PluggableAllocator::initialized() { + return !device_allocator.empty(); +} + +/** allocates a block which is safe to use from the provided stream */ +void *PluggableAllocator::malloc(int device, size_t size, aclrtStream stream) { + TORCH_INTERNAL_ASSERT( + 0 <= device && static_cast(device) < device_allocator.size(), + "device index out of range."); + Block *block = device_allocator[device]->malloc(device, size, stream); + add_allocated_block(block); + void *devPtr = static_cast(block->ptr); + return devPtr; +} + +void PluggableAllocator::free(void *ptr) { + if (!ptr) { + return; + } + Block *block = get_allocated_block(ptr, true); + if (!block) { + AT_ERROR("invalid device pointer: ", ptr); + } + TORCH_INTERNAL_ASSERT( + 0 <= block->device && static_cast(block->device) < device_allocator.size(), + "device index out of range."); + device_allocator[block->device]->free(block); +} + +void PluggableAllocator::setMemoryFraction(double fraction, int device) { + TORCH_INTERNAL_ASSERT( + 0 <= device && static_cast(device) < device_allocator.size(), + "device index out of range."); + TORCH_INTERNAL_ASSERT( + 0 <= fraction && fraction <= 1, + "invalid fraction:", + fraction, + ". Please set within (0, 1)."); + + TORCH_CHECK(c10_npu::SetDevice(device) == ACL_ERROR_NONE, "SetDevice failed."); + + device_allocator[device]->set_memory_fraction(fraction); +} + +void PluggableAllocator::emptyCache(bool check_error) { + int count = static_cast(device_allocator.size()); + for (int i = 0; i < count; i++) + device_allocator[i]->empty_cache(check_error); +} + +void PluggableAllocator::setShutdownStats() { + int count = static_cast(device_allocator.size()); + for (int i = 0; i < count; i++) { + device_allocator[i]->dev_set_shutdown_stats(); + } +} + +void *PluggableAllocator::getBaseAllocation(void *ptr, size_t *outSize) { + Block *block = get_allocated_block(ptr); + if (!block) { + AT_ERROR("invalid device pointer: ", ptr); + } + TORCH_INTERNAL_ASSERT( + 0 <= block->device && static_cast(block->device) < device_allocator.size(), + "device index out of range."); + return device_allocator[block->device]->get_base_allocation(block, outSize); +} + +void PluggableAllocator::recordStream(const c10::DataPtr &ptr, c10_npu::NPUStream stream) { + // Empty tensor's storage().data() might be a null ptr. As there is no + // blocks associated with those tensors, it is fine to do nothing here. + if (!ptr.get()) { + return; + } + + // If a tensor is not allocated by this instance, simply skip + // This usually happens when NPU tensors are shared across processes, + // we have implemented reference counting based sharing mechanism to + // guarantee tensors won't be accidentally freed by one process while + // they are still being used in another + if (ptr.get_deleter() != &local_raw_delete) { + return; + } + + Block *block = get_allocated_block(ptr.get()); + // block must not be null reaching here + TORCH_INTERNAL_ASSERT(block != nullptr, "No allocated block can be found"); + TORCH_INTERNAL_ASSERT( + 0 <= block->device && static_cast(block->device) < device_allocator.size(), + "device index out of range."); + device_allocator[block->device]->record_stream(block, stream); +} + +void PluggableAllocator::eraseStream(const c10::DataPtr &ptr, c10_npu::NPUStream stream) { + if (!ptr.get()) { + return; + } + + // If a tensor is not allocated by this instance, simply skip + // This usually happens when NPU tensors are shared across processes, + // we have implemented reference counting based sharing mechanism to + // guarantee tensors won't be accidentally freed by one process while + // they are still being used in another + if (ptr.get_deleter() != &local_raw_delete) { + return; + } + + Block *block = get_allocated_block(ptr.get()); + if (!block) { + AT_ERROR("invalid device pointer: ", ptr.get()); + } + + if (block->stream != c10_npu::getCurrentNPUStream(block->device)) { + // If the Stream applying for tensor block different from + // the stream of submiting event wait task in HCCL synchronize() + // method, the recordSteam can not be erased. + // New tensor creation may use the block before HCCL op is complete. + return; + } + + TORCH_INTERNAL_ASSERT( + 0 <= block->device && static_cast(block->device) < device_allocator.size(), + "device index out of range."); + device_allocator[block->device]->erase_stream(block, stream); +} + +std::vector PluggableAllocator::snapshot() { + std::vector result; + int count = static_cast(device_allocator.size()); + for (int i = 0; i < count; i++) { + auto snap = device_allocator[i]->snapshot(); + result.insert(result.end(), snap.begin(), snap.end()); + } + return result; +} + +c10::DeleterFnPtr PluggableAllocator::raw_deleter() const { + return &local_raw_delete; +} + +void PluggableAllocator::cacheInfo(int dev_id, size_t *cachedAndFree, size_t *largestBlock) { + TORCH_INTERNAL_ASSERT( + 0 <= dev_id && static_cast(dev_id) < device_allocator.size(), + "device index out of range."); + device_allocator[dev_id]->cache_info(cachedAndFree, largestBlock); +} + +void PluggableAllocator::assertValidDevice(int device) { + int device_num = c10_npu::device_count(); + AT_ASSERTM(0 <= device && device < device_num, "Invalid device argument."); +} + +DeviceStats PluggableAllocator::getDeviceStats(int device) { + assertValidDevice(device); + return device_allocator[device]->get_stats(); +} + +void PluggableAllocator::resetAccumulatedStats(int device) { + assertValidDevice(device); + device_allocator[device]->reset_accumulated_stats(); +} + +void PluggableAllocator::resetPeakStats(int device) { + assertValidDevice(device); + device_allocator[device]->reset_peak_stats(); +} + +void PluggableAllocator::raw_delete(void *ptr) { + this->free(ptr); +} + +void PluggableAllocator::FreeDeviceCachedMemory(int device) { + TORCH_INTERNAL_ASSERT( + 0 <= device && static_cast(device) < device_allocator.size(), + "device index out of range."); + device_allocator[device]->empty_cache(true); +} + +std::string PluggableAllocator::name() { + return "native"; +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/PluggableAllocator.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/PluggableAllocator.h new file mode 100644 index 000000000..b1f81a032 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/PluggableAllocator.h @@ -0,0 +1,58 @@ +#ifndef NPU_CACHE_ALLOCATOR_PLUGGABLEALLOCATOR_H +#define NPU_CACHE_ALLOCATOR_PLUGGABLEALLOCATOR_H + +#include "CachingAllocatorConfig.h" +#include "EventPool.h" +#include "DeviceCachingAllocator.h" +#include "Recorder.h" + +class PluggableAllocator { + private: + std::mutex mutex; + + // allocated blocks by device pointer + ska::flat_hash_map allocated_blocks; + + mutable std::mutex npu_free_mutex; + void add_allocated_block(Block *block); + + PluggableAllocator() {} + public: + PluggableAllocator(const PluggableAllocator &) = delete; + PluggableAllocator& operator=(const PluggableAllocator&) = delete; + + static PluggableAllocator& getInstance() { + static PluggableAllocator instance; + return instance; + } + + std::vector> device_allocator; + + std::mutex *getFreeMutex() const; + Block *get_allocated_block(void *ptr, bool remove = false); + void init(int device_count); + bool initialized(); + void *malloc(int device, size_t size, aclrtStream stream); + void free(void *ptr); + void setMemoryFraction(double fraction, int device); + void emptyCache(bool check_error); + void setShutdownStats(); + void *getBaseAllocation(void *ptr, size_t *outSize); + void recordStream(const c10::DataPtr &ptr, c10_npu::NPUStream stream); + void eraseStream(const c10::DataPtr &ptr, c10_npu::NPUStream stream); + std::vector snapshot(); + c10::DataPtr allocate(size_t size) const; + c10::DeleterFnPtr raw_deleter() const; + void cacheInfo(int dev_id, size_t *cachedAndFree, size_t *largestBlock); + void assertValidDevice(int device); + DeviceStats getDeviceStats(int device); + void resetAccumulatedStats(int device); + void resetPeakStats(int device); + void *raw_alloc(size_t nbytes); + void *raw_alloc_with_stream(size_t nbytes, aclrtStream stream); + void raw_delete(void *ptr); + void FreeDeviceCachedMemory(int device); + std::string name(); +}; + +#endif \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/PluggableAllocatorFunctions.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/PluggableAllocatorFunctions.cpp new file mode 100644 index 000000000..b2dcf4e13 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/PluggableAllocatorFunctions.cpp @@ -0,0 +1,314 @@ +#include +#include +#include + +#include "acl_base.h" +#include "acl_rt.h" +#include "PluggableAllocator.h" +#include "Decorator.h" +#include "Recorder.h" + +extern "C" { +void *memory_fragmentation_malloc(size_t size, int device, aclrtStream stream) +{ + void *ptr; + ptr = PluggableAllocator::getInstance().malloc(device, size, stream); + return ptr; +} + +void memory_fragmentation_free(void *ptr, size_t size, int device, aclrtStream stream) +{ + PluggableAllocator::getInstance().free(ptr); +} + +void memory_fragmentation_init(int device_count) +{ + PluggableAllocator::getInstance().init(device_count); +} + +void memory_fragmentation_empty_cache(bool check_error) +{ + PluggableAllocator::getInstance().emptyCache(true); +} + +void memory_fragmentation_memory_fraction(double fraction, int device) +{ + PluggableAllocator::getInstance().setMemoryFraction(fraction, device); +} + +DeviceStats memory_fragmentation_get_device_stats(int device) +{ + return PluggableAllocator::getInstance().getDeviceStats(device); +} + +void my_reset_peak_stats(int device) +{ + return PluggableAllocator::getInstance().resetPeakStats(device); +} +} + +namespace memory_recorder_test { + +using RecSet = + std::set; +void add(RecSet &rec_set, size_t tensor_forward_start_tik, + size_t tensor_forward_end_tik, size_t origin_size) { + rec_set.emplace(MemoryRecorder::RecEle(tensor_forward_start_tik, + tensor_forward_end_tik, origin_size)); +} + +bool test_setup_tensor_lc() { + MemoryRecorder recorder; + set_is_precise_match(false); + set_g_record_flag(true); + + size_t forward_start, forward_end; + // 从setup_model到第一个forward阶段结束的tensor全部标记为长生命周期 + return LifeCycleType::LONG_LC == + recorder.get_lc(0, &forward_end, &forward_start); +} + +bool test_forward_tensor_long_lc() { + MemoryRecorder recorder; + set_is_precise_match(false); + set_g_record_flag(true); + + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 1); + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 2); + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 3); + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 4); + + size_t forward_start, forward_end; + // 当前分支中查找 + return LifeCycleType::LONG_LC == + recorder.get_lc(2, &forward_end, &forward_start); +} + +bool test_forward_tensor_short_lc() { + MemoryRecorder recorder; + set_is_precise_match(false); + set_g_record_flag(true); + + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 1); + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 2); + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 3); + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 4); + add(recorder.last_forward_rec_set, 1, 9, 5); + + size_t forward_start, forward_end; + // 当前分支中查找 + return LifeCycleType::DEFAULT_LC == + recorder.get_lc(5, &forward_end, &forward_start); +} + +bool test_forward_other_branch_long_lc() { + MemoryRecorder recorder; + set_is_precise_match(false); + set_g_record_flag(true); + + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 1); + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 2); + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 3); + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 4); + + RecSet s(MemoryRecorder::_RecorderComparator); + add(s, 1, std::numeric_limits::max(), 5); + add(s, 1, std::numeric_limits::max(), 6); + add(s, 1, std::numeric_limits::max(), 7); + add(s, 1, std::numeric_limits::max(), 8); + + recorder.forward_history.push_back(s); + size_t forward_start, forward_end; + // 在其他分支中查找 + return LifeCycleType::LONG_LC == + recorder.get_lc(6, &forward_end, &forward_start); +} + +bool test_forward_other_branch_short_lc() { + MemoryRecorder recorder; + set_is_precise_match(false); + set_g_record_flag(true); + + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 1); + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 2); + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 3); + add(recorder.last_forward_rec_set, 1, std::numeric_limits::max(), 4); + + RecSet s(MemoryRecorder::_RecorderComparator); + add(s, 1, std::numeric_limits::max(), 5); + add(s, 1, std::numeric_limits::max(), 6); + add(s, 1, 9, 7); + add(s, 1, std::numeric_limits::max(), 8); + + recorder.forward_history.push_back(s); + size_t forward_start, forward_end; + // 在其他分支中查找 + return LifeCycleType::DEFAULT_LC == + recorder.get_lc(7, &forward_end, &forward_start); +} + +bool test_change_forward_end_tik() { + MemoryRecorder recorder; + set_g_record_flag(true); + + add(recorder.forward_rec_set, 1, std::numeric_limits::max(), 1); + add(recorder.forward_rec_set, 1, std::numeric_limits::max(), 2); + add(recorder.forward_rec_set, 1, std::numeric_limits::max(), 3); + add(recorder.forward_rec_set, 1, std::numeric_limits::max(), 4); + + recorder.change_forward_end_tik(1, 100, 3, 0, true); + return recorder.forward_rec_set.find(MemoryRecorder::RecEle(1, 100, 3)) != recorder.forward_rec_set.end(); +} +} + + +namespace malloc_recorder_test { + +using RecSet = + std::set; +void add(RecSet &rec_set, size_t start_tik, + size_t end_tik, size_t round_size) { + rec_set.emplace(MallocRecorder::MallocRecorderEle(start_tik, + end_tik, round_size)); +} + +bool test_step_tensor_long_lc() { + MallocRecorder recorder; + MallocRecorder::in_step_flag = true; + + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 1); + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 2); + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 3); + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 4); + + size_t step_start, step_end; + // find in current branch + return recorder.predict_long(2, &step_end, &step_start); +} + + +bool test_step_tensor_short_lc() { + MallocRecorder recorder; + MallocRecorder::in_step_flag = true; + + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 1); + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 2); + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 3); + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 4); + add(recorder.last_rec_set, 1, 9, 5); + + size_t step_start, step_end; + // find in current branch + return !recorder.predict_long(5, &step_end, &step_start); +} + + +bool test_step_other_branch_long_lc() { + MallocRecorder recorder; + MallocRecorder::in_step_flag = true; + + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 1); + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 2); + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 3); + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 4); + + RecSet s(MallocRecorder::_RecorderComparator); + add(s, 1, std::numeric_limits::max(), 5); + add(s, 1, std::numeric_limits::max(), 6); + add(s, 1, std::numeric_limits::max(), 7); + add(s, 1, std::numeric_limits::max(), 8); + + recorder.step_history.push_back(s); + size_t step_start, step_end; + // find in other branch + return recorder.predict_long(6, &step_end, &step_start); +} + + +bool test_step_other_branch_short_lc() { + MallocRecorder recorder; + MallocRecorder::in_step_flag = true; + + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 1); + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 2); + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 3); + add(recorder.last_rec_set, 1, std::numeric_limits::max(), 4); + + RecSet s(MallocRecorder::_RecorderComparator); + add(s, 1, std::numeric_limits::max(), 5); + add(s, 1, std::numeric_limits::max(), 6); + add(s, 1, 9, 7); + add(s, 1, std::numeric_limits::max(), 8); + + recorder.step_history.push_back(s); + size_t step_start, step_end; + // find in other branch + return !recorder.predict_long(7, &step_end, &step_start); +} + +bool test_change_end_tik() { + MallocRecorder recorder; + MallocRecorder::in_step_flag = true; + + add(recorder.rec_set, 1, std::numeric_limits::max(), 1); + add(recorder.rec_set, 1, std::numeric_limits::max(), 2); + add(recorder.rec_set, 1, std::numeric_limits::max(), 3); + add(recorder.rec_set, 1, std::numeric_limits::max(), 4); + + recorder.change_end_tik(1, 100, 3, 0, true); + return recorder.rec_set.find(MallocRecorder::MallocRecorderEle(1, 100, 3)) != recorder.rec_set.end(); +} + +} + +namespace device_caching_allocator_test { +#include "DeviceCachingAllocator.h" + +bool test_round_size() { + return DeviceCachingAllocator::round_size(200) == kMinBlockSize && DeviceCachingAllocator::round_size(1025) == 3 * kMinBlockSize; +} +} + + +namespace caching_allocator_config_test { +#include "CachingAllocatorConfig.h" + +bool test_parse_args() { + auto &config = CachingAllocatorConfig::instance(); + const char *args = "max_split_size_mb:40, garbage_collection_threshold:0.5, expandable_segments:False, " + "default_lc_threshold:128.5, open_memory_optimize:1"; + config.parseArgs(args); + return config.max_split_size() == 40 * 1024 * 1024 && config.garbage_collection_threshold() == 0.5 && + !config.expandable_segments() && config.default_lc_threshold() == 128.5 && config.open_memory_optimize(); +} + +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("memory_recorder_start", &Decorator::memory_recorder_start, "start mark the life cycle of a tensor in forward"); + m.def("memory_recorder_end", &Decorator::memory_recorder_end, "end mark the life cycle of a tensor in forward"); + m.def("malloc_recorder_start", &Decorator::malloc_recorder_start, "start mark the life cycle of a tensor in step"); + m.def("malloc_recorder_end", &Decorator::malloc_recorder_end, "end mark the life cycle of a tensor in step"); + m.def("precise_match_start", &Decorator::precise_match_start, "start mark the life cycle of a tensor in optimizer init stage"); + m.def("precise_match_end", &Decorator::precise_match_end, "end mark the life cycle of a tensor in optimizer init stage"); + + // 以下为ut用接口,非业务接口 + m.def("test_setup_tensor_lc", &memory_recorder_test::test_setup_tensor_lc, ""); + m.def("test_forward_tensor_long_lc", &memory_recorder_test::test_forward_tensor_long_lc, ""); + m.def("test_forward_tensor_short_lc", &memory_recorder_test::test_forward_tensor_short_lc, ""); + m.def("test_forward_other_branch_long_lc", &memory_recorder_test::test_forward_other_branch_long_lc, ""); + m.def("test_forward_other_branch_short_lc", &memory_recorder_test::test_forward_other_branch_short_lc, ""); + m.def("test_change_forward_end_tik", &memory_recorder_test::test_change_forward_end_tik, ""); + + m.def("test_step_tensor_long_lc", &malloc_recorder_test::test_step_tensor_long_lc, ""); + m.def("test_step_tensor_short_lc", &malloc_recorder_test::test_step_tensor_short_lc, ""); + m.def("test_step_other_branch_long_lc", &malloc_recorder_test::test_step_other_branch_long_lc, ""); + m.def("test_step_other_branch_short_lc", &malloc_recorder_test::test_step_other_branch_short_lc, ""); + m.def("test_change_end_tik", &malloc_recorder_test::test_change_end_tik, ""); + + m.def("test_round_size", &device_caching_allocator_test::test_round_size, ""); + + m.def("test_parse_args", &caching_allocator_config_test::test_parse_args, ""); +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Recorder.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Recorder.cpp new file mode 100644 index 000000000..e4c544e7b --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Recorder.cpp @@ -0,0 +1,317 @@ +#include +#include "Recorder.h" + +unsigned int pool_idx = 0; +bool g_record_flag = false; +size_t g_record_cnt = 0; +bool is_precise_match = false; +std::mutex MemoryRecorder::lock; // protect the static value +std::unordered_set MemoryRecorder::recorder_set; + +bool MallocRecorder::in_step_flag; +std::mutex MallocRecorder::lock; +std::unordered_set MallocRecorder::recorder_set; // Recorder_set records MallocRecorder objects + +void set_g_record_flag(bool flag) { + g_record_flag = flag; +} + +void set_is_precise_match(bool flag) { + is_precise_match = flag; +} + +bool MemoryRecorder::_RecorderComparator(const RecEle a, const RecEle b) { + if (a.tensor_forward_start_tik != b.tensor_forward_start_tik) { + return a.tensor_forward_start_tik < b.tensor_forward_start_tik; + } else if (a.tensor_forward_end_tik != b.tensor_forward_end_tik) { + return a.tensor_forward_end_tik < b.tensor_forward_end_tik; + } else + return a.size < b.size; +} + +MemoryRecorder::MemoryRecorder() + : lc_id_cnt(0), forward_tik(1), forward_count(0), forward_rec_set(_RecorderComparator), last_forward_rec_set(_RecorderComparator) { + lock.lock(); + recorder_set.insert(this); + lock.unlock(); +} + +MemoryRecorder::~MemoryRecorder() { + lock.lock(); + recorder_set.erase(this); + lock.unlock(); +} + +void MemoryRecorder::add(size_t tensor_forward_start_tik, size_t tensor_forward_end_tik, size_t origin_size) { + if (_check()) { + forward_rec_set.emplace(tensor_forward_start_tik, tensor_forward_end_tik, origin_size); + } +} + +LifeCycleType MemoryRecorder::get_lc(size_t origin_size, size_t *tensor_forward_end, size_t *tensor_forward_start) { + // Mark all tensors in the initialization phase of the optimizer as long lifecycle + if (is_precise_match) { + return LifeCycleType::LONG_LC; + } + // All tensors that are not in the forward stage are marked as short lifecycles + if (!_check()) { + return LifeCycleType::DEFAULT_LC; + } + // All tensors from setup_model to the end of the first forward stage are marked as long lifecycle + if(last_forward_rec_set.size() == 0) { + return LifeCycleType::LONG_LC; + } + + long lc_id = lc_id_cnt++; + // Find tensor in all branches + // Find in current branch + auto it1 = last_forward_rec_set.upper_bound( + RecEle(forward_tik, std::numeric_limits::max(), std::numeric_limits::max())); + while (it1 != last_forward_rec_set.begin()) { + it1--; + if (it1->size == origin_size && it1->tensor_forward_end_tik == std::numeric_limits::max()) { + *tensor_forward_start = it1->tensor_forward_start_tik; + *tensor_forward_end = it1->tensor_forward_end_tik; + return LifeCycleType::LONG_LC; + } else if (it1->size == origin_size) { + // Record the forward start and end periods of the current tensor + *tensor_forward_start = it1->tensor_forward_start_tik; + *tensor_forward_end = it1->tensor_forward_end_tik; + return LifeCycleType::DEFAULT_LC; + } + } + + // Search in other branches + for (auto &other_branch : forward_history) { + it1 = other_branch.upper_bound( + RecEle(forward_tik, std::numeric_limits::max(), std::numeric_limits::max())); + while (it1 != other_branch.begin()) { + it1--; + if (it1->size == origin_size && it1->tensor_forward_end_tik == std::numeric_limits::max()) { + *tensor_forward_start = it1->tensor_forward_start_tik; + *tensor_forward_end = it1->tensor_forward_end_tik; + return LifeCycleType::LONG_LC; + } else if (it1->size == origin_size) { + *tensor_forward_start = it1->tensor_forward_start_tik; + *tensor_forward_end = it1->tensor_forward_end_tik; + return LifeCycleType::DEFAULT_LC; + } + } + } + // If it cannot be found in all branches and the number of tensors in the current branch is greater than lc_id, + // it indicates that it may be a new branch. + if (last_forward_rec_set.size() >= lc_id) { + has_Another_Situation = true; + } + + // If it cannot be found in all branches, it indicates that it may be a new branch + return LifeCycleType::DEFAULT_LC; +} + + +void MemoryRecorder::change_forward_end_tik(size_t start_tik, size_t end_tik, size_t origin_size, unsigned int forward_distance, bool in_forward) { + if (in_forward && _check() && forward_distance == 0) { + auto it = forward_rec_set.find(RecEle(start_tik, std::numeric_limits::max(), origin_size)); + if (it != forward_rec_set.end()) { + forward_rec_set.erase(it); + forward_rec_set.emplace(RecEle(start_tik, end_tik, origin_size)); + } + } +} + +void MemoryRecorder::start_record() { + if (!is_precise_match && g_record_cnt++) { + g_record_flag = true; + } + for (auto i:recorder_set) { + i->lock.unlock(); + i->forward_tik = 1; + } +} + +void MemoryRecorder::_end_record() { + lc_id_cnt = 0; + forward_tik = 1; + forward_count++; + // If there is a fork, store the tensor record of the previous forward in forward_history + if (forward_count == 0 || has_Another_Situation) { + forward_history.emplace_back(forward_rec_set); + has_Another_Situation = false; + } + last_forward_rec_set = forward_rec_set; + forward_rec_set.clear(); +} + +void MemoryRecorder::end_record() { + g_record_flag = false; + lock.lock(); + for (auto i:recorder_set) { + i->_end_record(); + } + lock.unlock(); +} + + +bool MallocRecorder::_RecorderComparator(const MallocRecorderEle a, const MallocRecorderEle b) { + if (a.start_tik != b.start_tik) { + return a.start_tik < b.start_tik; + } else if (a.end_tik != b.end_tik) { + return a.end_tik < b.end_tik; + } else + return a.size < b.size; +} + +bool MallocRecorder::_check() { + return in_step_flag; +} + +void MallocRecorder::add(size_t start_tik, size_t end_tik, size_t round_size) { + if (MallocRecorder::_check()) { + rec_set.emplace(MallocRecorderEle(start_tik, end_tik, round_size)); + } +} + +void MallocRecorder::change_end_tik(size_t start_tik, size_t end_tik, size_t round_size,size_t step_distance, bool in_step) { + if (in_step && MallocRecorder::_check() && step_distance == 0) { + auto it = rec_set.find(MallocRecorderEle(start_tik, std::numeric_limits::max(), round_size)); + if (it != rec_set.end()) { + rec_set.erase(it); + rec_set.emplace(start_tik, end_tik, round_size); + } + } +} + +bool MallocRecorder::predict_long(size_t round_size, size_t *tensor_step_end, size_t *tensor_step_start) { + if (!MallocRecorder::_check()) return false; + long step_lc_id = step_lc_id_cnt++; + // Find in the current branch + auto it1 = last_rec_set.upper_bound( + MallocRecorderEle(tik, std::numeric_limits::max(), std::numeric_limits::max())); + while (it1 != last_rec_set.begin()) { + it1--; + if (it1->size == round_size && it1->end_tik == std::numeric_limits::max()) { + *tensor_step_start = it1->start_tik; + *tensor_step_end = it1->end_tik; + return true; + } else if (it1->size == round_size) { + // Record the start and end times of the tensor in the step phase + *tensor_step_start = it1->start_tik; + *tensor_step_end = it1->end_tik; + return false; + } + } + // Search in the history branch + for (auto &other_branch : step_history) { + it1 = other_branch.upper_bound( + MallocRecorderEle(tik, std::numeric_limits::max(), std::numeric_limits::max())); + while (it1 != other_branch.begin()) { + it1--; + if (it1->size == round_size && it1->end_tik == std::numeric_limits::max()) { + *tensor_step_start = it1->start_tik; + *tensor_step_end = it1->end_tik; + return true; + } else if (it1->size == round_size) { + *tensor_step_start = it1->start_tik; + *tensor_step_end = it1->end_tik; + return false; + } + } + } + // If it cannot be found in all branches and the number of tensors in the current branch is greater than step_lc_id, + // it indicates that it may be a new branch + if (last_rec_set.size() >= step_lc_id) { + has_Another_Situation = true; + } + return false; +} + +bool MallocRecorder::has_tensor_in_step(size_t tensor_step_start, size_t tensor_step_end, size_t seg_size) { + // Find in the current branch + auto it1 = last_rec_set.upper_bound( + MallocRecorderEle(tensor_step_start, std::numeric_limits::max(), std::numeric_limits::max())); + while (it1 != last_rec_set.end()) { + size_t alloc_size = MallocRecorder::get_allocation_size(it1->size); + if (it1->end_tik == std::numeric_limits::max()) { + alloc_size = MallocRecorder::get_allocation_size(it1->size, LifeCycleType::LONG_LC); + } + + if (it1->start_tik < tensor_step_end && alloc_size == seg_size) { + return true; + } + it1++; + } + + // Search in the history branch + for (auto &other_branch : step_history) { + it1 = other_branch.upper_bound( + MallocRecorderEle(tensor_step_start, std::numeric_limits::max(), std::numeric_limits::max())); + while (it1 != other_branch.end()) { + size_t alloc_size = MallocRecorder::get_allocation_size(it1->size); + if (it1->end_tik == std::numeric_limits::max()) { + alloc_size = MallocRecorder::get_allocation_size(it1->size, LifeCycleType::LONG_LC); + } + + if (it1->start_tik < tensor_step_end && alloc_size == seg_size) { + return true; + } + it1++; + } + } + return false; +} + +size_t MallocRecorder::get_allocation_size(size_t size, LifeCycleType lc) { + if (lc == LifeCycleType::LONG_LC) { + if (size <= kSmallSize) { + return kSmallBuffer; + } else { + return kRoundLarge * ((size + kRoundLarge - 1) / kRoundLarge); + } + } + if (size <= kSmallSize) { + return kSmallBuffer; + } else if (size < kMinLargeAlloc) { + return kLargeBuffer; + } else { + return kRoundLarge * ((size + kRoundLarge - 1) / kRoundLarge); + } +} + +MallocRecorder::MallocRecorder() : step_lc_id_cnt(0), tik(1), step_count(0), rec_set(_RecorderComparator), last_rec_set(_RecorderComparator) { + lock.lock(); + recorder_set.insert(this); + lock.unlock(); +} + +MallocRecorder::~MallocRecorder() { + lock.lock(); + recorder_set.erase(this); + lock.unlock(); +} + +void MallocRecorder::start_record() { + in_step_flag = true; + lock.lock(); + for (auto i : recorder_set) { + i->lock.unlock(); + i->tik = 1; + } +} + +void MallocRecorder::end_record() { + in_step_flag = false; + lock.lock(); + for (auto i : recorder_set) { + i->lock.unlock(); + i->step_lc_id_cnt = 0; + i->tik = 1; + i->step_count++; + // If there is a fork, store the tensor record of the previous forward in forward_history + if (i->step_count == 0 || i->has_Another_Situation) { + i->step_history.emplace_back(i->rec_set); + i->has_Another_Situation = false; + } + i->last_rec_set = i->rec_set; + i->rec_set.clear(); + } +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Recorder.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Recorder.h new file mode 100644 index 000000000..707804b95 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/Recorder.h @@ -0,0 +1,146 @@ +#ifndef NPU_CACHE_ALLOCATOR_RECORDER_H +#define NPU_CACHE_ALLOCATOR_RECORDER_H +#include +#include +#include +#include +#include +#include + +enum struct LifeCycleType : uint64_t { + DEFAULT_LC = 0, + FIRST_STEP_LC, + LONG_LC, +}; + +extern bool g_record_flag; // is the record in the forward or not +extern size_t g_record_cnt; +extern bool is_precise_match; // record whether it matches precisely +extern unsigned int pool_idx; + +constexpr size_t kMinBlockSize = 512; // all sizes are rounded to at least 512 bytes +constexpr size_t kSmallSize = 1048576; // largest "small" allocation is 1 MiB +constexpr size_t kSmallBuffer = 2097152; // "small" allocations are packed in 2 MiB blocks +constexpr size_t kLargeBuffer = 20971520; // "large" allocations may be packed in 20 MiB blocks +constexpr size_t kMinLargeAlloc = 10485760; // allocations between 1 and 10 MiB may use kLargeBuffer +constexpr size_t kRoundLarge = 2097152; // round up large allocs to 2 MB +constexpr size_t kSizeLimit = 1395864371; // 1.3G = 1395864371 1.5G=1610612736 +constexpr size_t myMaxSplitSize = 1800000000; // 1.67G +constexpr size_t kUnitMB = 1024 * 1024; // 1MiB = 1024 * 1024 bytes + +// Check if it is in the forward stage +static bool _check() { return g_record_flag; } + +// Record tense information during the forward phase +void set_g_record_flag(bool); +void set_is_precise_match(bool); + +struct MemoryRecorder { + static std::mutex lock; // protect the static value + static std::unordered_set recorder_set; + + long lc_id_cnt; + + // Record the tensor in the forward phase of RecEle: + // start creation time, release time, tense size, and whether to release it + struct RecEle { + size_t tensor_forward_start_tik; // tensor forward start time + size_t tensor_forward_end_tik; // tensor forward release time + size_t size; + RecEle(size_t tensor_forward_start_tik, size_t tensor_forward_end_tik, size_t size) + : tensor_forward_start_tik(tensor_forward_start_tik), + tensor_forward_end_tik(tensor_forward_end_tik), + size(size) {} + RecEle() = default; + }; + + // specify sorting in the set + static bool _RecorderComparator(const RecEle a, const RecEle b); + + // forward_rec_set: record the information of tensors in the current forward + std::set forward_rec_set; + // last_forward_rec_set: record the information of the tensor in the previous forward + std::set last_forward_rec_set; + size_t forward_tik; // increase after malloc + unsigned int forward_count; // record forward count + + // Record whether there is a fork in the current forward stage. + // If there is a fork, insert the current forward_rec_set into the forward_history array. + bool has_Another_Situation = false; + // Forward_history stores possible branches that may arise during the training process. + std::vector> forward_history; + + MemoryRecorder(); + + ~MemoryRecorder(); + + void add(size_t tensor_forward_start_tik, size_t tensor_forward_end_tik, size_t origin_size); + + void change_forward_end_tik(size_t start_tik, size_t end_tik, size_t origin_size, unsigned int forward_distance, + bool in_forward); + + LifeCycleType get_lc(size_t origin_size, size_t *tensor_forward_end, size_t *tensor_forward_start); + + static void start_record(); + + void _end_record(); + + static void end_record(); +}; + +// Record information for the step stage +struct MallocRecorder { + static bool in_step_flag; + static std::mutex lock; + static std::unordered_set recorder_set; // Recorder_set records MallocRecorder objects + + long step_lc_id_cnt; + + // Record the start creation time, release time, and size of the tensor in MallocRecorderEle + struct MallocRecorderEle { + size_t start_tik; // tensor alloc start time + size_t end_tik; // tensor alloc release time + size_t size; // round_size + + MallocRecorderEle(size_t start_tik, size_t end_tik, size_t size) + : start_tik(start_tik), end_tik(end_tik), size(size) {} + }; + + static bool _RecorderComparator(const MallocRecorderEle a, const MallocRecorderEle b); + + // rec_set records the information of tensors in the current step + std::set rec_set; + std::set last_rec_set; + size_t tik; // increase after malloc + unsigned int step_count; // record step times + + // Record whether there is a fork in the current step stage. + // If there is a fork, insert the current rec_set into the step_history array + bool has_Another_Situation = false; + // step_history stores possible branches that may occur during the training process + std::vector> step_history; + + static bool _check(); + + void add(size_t start_tik, size_t end_tik, size_t round_size); + + void change_end_tik(size_t start_tik, size_t end_tik, size_t round_size, size_t step_distance, bool in_step); + + bool predict_long(size_t round_size, size_t *tensor_step_end, size_t *tensor_step_start); + + // Determine if there will be a size tensor during the tensor_step_start to tensor_step_end stages. + bool has_tensor_in_step(size_t tensor_step_start, size_t tensor_step_end, size_t seg_size); + + static size_t get_allocation_size(size_t size, LifeCycleType lc = LifeCycleType::DEFAULT_LC); + + MallocRecorder(); + + ~MallocRecorder(); + + static void start_record(); + + static void end_record(); +}; + + +#endif diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/common.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/common.cpp new file mode 100644 index 000000000..701392cc0 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/common.cpp @@ -0,0 +1,142 @@ +#include "common.h" + + +std::mutex alr_lock; +std::unordered_map alr_recorder; +size_t alr_total_size = 0; +size_t alr_get_max() { + static size_t rc = 0; + if (rc == 0) { + char *e = getenv("ALR_MAX"); + if (e) { + std::string s(e); + long long alr_max = 0; + try { + alr_max = std::stoll(s); + } catch (const std::invalid_argument& e) { + TORCH_CHECK(false, "Error, expecting digital string in ALR_MAX"); + } catch (const std::out_of_range& e) { + TORCH_CHECK(false, "Error, out of long long range"); + } + if (alr_max < 0) { + TORCH_CHECK(false, "Error, alr_max cannot be nagative number"); + } else { + rc = static_cast(alr_max); + } + } else { + rc = LLONG_MAX; + } + printf("ALR MAX SET %lu\n", rc); + } + return rc; +} + + +aclError aclrtMalloc_wrapper(void **devPtr, size_t size, aclrtMemMallocPolicy policy) { + alr_lock.lock(); + if (alr_total_size + size > alr_get_max()) { + alr_lock.unlock(); + return ACL_ERROR_RT_MEMORY_ALLOCATION; + } + alr_lock.unlock(); + + auto rc = aclrtMallocAlign32(devPtr, size, policy); + if (rc != ACL_ERROR_NONE) { + return rc; + } + alr_lock.lock(); + alr_recorder[*devPtr] = size; + alr_total_size += size; + alr_lock.unlock(); + return rc; +} + + +aclError aclrtFree_wrapper(void *devPtr) { + alr_lock.lock(); + TORCH_INTERNAL_ASSERT(alr_total_size >= alr_recorder[devPtr]); + alr_total_size -= alr_recorder[devPtr]; + alr_recorder.erase(devPtr); + alr_lock.unlock(); + return aclrtFree(devPtr); +} + + +void update_stat(Stat &stat, int64_t amount) { + stat.current += amount; + stat.peak = std::max(stat.current, stat.peak); + if (amount > 0) { + stat.allocated += amount; + } + if (amount < 0) { + stat.freed += -amount; + } +} + + +void reset_accumulated_stat(Stat& stat) { + stat.allocated = 0; + stat.freed = 0; +} + + +void reset_peak_stat(Stat& stat) { stat.peak = stat.current; } + + +void update_stat_array(StatArray& stat_array, int64_t amount, const StatTypes& stat_types) { + for_each_selected_stat_type(stat_types, + [&stat_array, amount](size_t stat_type) { update_stat(stat_array[stat_type], amount); }); +} + + +std::string get_block_pool_str(BlockPoolType type) { + switch (type) { + case BLOCK_POOL_DEFAULT: + return "default"; + case BLOCK_POOL_LONG: + return "long"; + case BLOCK_POOL_SHORT: + return "short"; + default: + return "unknown"; + } + AT_ASSERT(0); + return ""; +} + + +bool BlockComparatorSize(const Block* a, const Block* b) { + if (a->stream != b->stream) { + return reinterpret_cast(a->stream) < reinterpret_cast(b->stream); + } + if (a->size != b->size) { + return a->size < b->size; + } + return reinterpret_cast(a->ptr) < reinterpret_cast(b->ptr); +} + +bool BlockComparatorAddress(const Block* a, const Block* b) { + if (a->stream != b->stream) { + return reinterpret_cast(a->stream) < reinterpret_cast(b->stream); + } + return reinterpret_cast(a->ptr) < reinterpret_cast(b->ptr); +} + +std::string format_size(uint64_t size) { + std::ostringstream os; + os.precision(2); + os << std::fixed; + if (size <= 1024) { + os << size << " bytes"; + } else if (size <= 1048576) { + os << (size / 1024.0); + os << " KiB"; + } else if (size <= 1073741824ULL) { + os << (size / 1048576.0); + os << " MiB"; + } else { + os << (size / 1073741824.0); + os << " GiB"; + } + return os.str(); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/common.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/common.h new file mode 100644 index 000000000..541d6b682 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/common.h @@ -0,0 +1,399 @@ +#ifndef NPU_CACHE_ALLOCATOR_COMMON_H +#define NPU_CACHE_ALLOCATOR_COMMON_H + +#include +#include +#include +#include + +#include +#include "acl_base.h" +#include "acl_rt.h" + +#include "torch_npu/csrc/core/npu/NPUStream.h" + +#define MEMORY_RECORDER_DEBUG + +extern std::mutex alr_lock; +extern std::unordered_map alr_recorder; +extern size_t alr_total_size; + +size_t alr_get_max(); + +aclError aclrtMalloc_wrapper(void **devPtr, size_t size, aclrtMemMallocPolicy policy); + +aclError aclrtFree_wrapper(void *devPtr); + +struct Stat { + int64_t current = 0; + int64_t peak = 0; + int64_t allocated = 0; + int64_t freed = 0; +}; + +enum struct StatType : uint64_t { + AGGREGATE = 0, + SMALL_POOL = 1, + LARGE_POOL = 2, + NUM_TYPES = 3 // remember to update this whenever a new stat type is added +}; + +// Struct containing info of an allocation block (i.e. a fractional part of a cudaMalloc).. +struct BlockInfo { + int64_t size = 0; + int64_t requested_size = 0; + int32_t gc_counter = 0; + bool allocated = false; + bool active = false; +}; + +enum BlockPoolType : int { + BLOCK_POOL_DEFAULT, + BLOCK_POOL_SHORT, + BLOCK_POOL_LONG, +}; + +struct SegmentInfo { + int64_t device = 0; + uintptr_t address = 0; + int64_t total_size = 0; + int64_t requested_size = 0; + int64_t allocated_size = 0; + int64_t active_size = 0; + bool is_large = false; + bool is_expandable = false; +#ifdef MEMORY_RECORDER_DEBUG + BlockPoolType type; + std::string type_str; +#endif + std::vector blocks; +}; + +typedef std::array(StatType::NUM_TYPES)> StatArray; + +struct DeviceStats { + // COUNT: allocations requested by client code + StatArray allocation; + // COUNT: number of allocated segments from npuMalloc(). + StatArray segment; + // COUNT: number of active memory blocks (allocated or used by stream) + StatArray active; + // COUNT: number of inactive, split memory blocks (unallocated but can't be released via npuFree) + StatArray inactive_split; + + // SUM: bytes requested by client code + StatArray allocated_bytes; + // SUM: bytes reserved by this memory allocator (both free and used) + StatArray reserved_bytes; + // SUM: bytes within active memory blocks + StatArray active_bytes; + // SUM: bytes within inactive, split memory blocks + StatArray inactive_split_bytes; + // SUM: bytes requested by client code + StatArray requested_bytes; + + // COUNT: total number of failed calls to NPU malloc necessitating cache flushes. + int64_t num_alloc_retries = 0; + + // COUNT: total number of OOMs (i.e. failed calls to NPU after cache flush) + int64_t num_ooms = 0; + + // COUNT: total number of oversize blocks allocated from pool + Stat oversize_allocations; + + // COUNT: total number of oversize blocks requiring malloc + Stat oversize_segments; + + // SIZE: maximum block size that is allowed to be split. + int64_t max_split_size = 0; +}; + +using stream_set = ska::flat_hash_set; + +using StatTypes = std::array(StatType::NUM_TYPES)>; + +void update_stat(Stat &stat, int64_t amount); + +void reset_accumulated_stat(Stat& stat); + +void reset_peak_stat(Stat& stat); + +template +void for_each_selected_stat_type(const StatTypes& stat_types, Func f) { + for (const auto stat_type : c10::irange(stat_types.size())) { + if (stat_types[stat_type]) { + f(stat_type); + } + } +} + +void update_stat_array(StatArray& stat_array, int64_t amount, const StatTypes& stat_types); + +struct Block; +using Comparison = bool (*)(const Block*, const Block*); +bool BlockComparatorSize(const Block* a, const Block* b); +bool BlockComparatorAddress(const Block* a, const Block* b); + +struct BlockPool{ + std::set blocks; + std::set unmapped; + const bool is_small; + const BlockPoolType type; + + BlockPool(bool small, BlockPoolType type) + : blocks(BlockComparatorSize), unmapped(BlockComparatorAddress), is_small(small), type(type) {} +}; + +std::string get_block_pool_str(BlockPoolType type); + +struct ExpandableSegment; + +struct Block { + int device; // npu + aclrtStream stream; // allocation stream + stream_set stream_uses; // streams on which the block was used + size_t size; // block size in bytes + size_t requested_size; // memory originally requested + BlockPool* pool; // owning memory pool + void* ptr; // memory address + bool allocated; // in-use flag + bool mapped{true}; // is the virtual address range this Block references + // backed by physical pages. Always true when + // expandable_segment_ is null. When false + // This Block will be aligned to the segment size + // of its expandable_segment_. + Block* prev; // prev block if split from a larger allocation + Block* next; // next block if split from a larger allocation + int event_count; // number of outstanding NPU events + size_t start_tik{0}; // Record the time when the block was created during the step phase + size_t forward_start_tik{0}; // Record the time when the block was created in the forward phase + size_t tensor_size{0}; // Tensor_size is the size processed by orig_size + size_t orig_size{0}; // origin tensor size + int step_count{0}; // how many steps have passed (Record how many steps the current block has passed) + int forward_count{0}; // Record how many forwards have been passed + bool in_step{0}; // Determine whether the current block is in the step stage + bool in_forward{0}; // Determine if the current block is in the forward stage + int gc_count{0}; // counter for prioritizing older / less useful blocks for + // garbage collection + ExpandableSegment* expandable_segment_{nullptr}; + + Block(int device, aclrtStream stream, size_t size, BlockPool* pool, void* ptr) + : device(device), + stream(stream), + stream_uses(), + size(size), + requested_size(0), + pool(pool), + ptr(ptr), + allocated(0), + prev(nullptr), + next(nullptr), + event_count(0), + gc_count(0) {} + + // constructor for search key + Block(int device, aclrtStream stream, size_t size) + : device(device), + stream(stream), + stream_uses(), + size(size), + requested_size(0), + pool(nullptr), + ptr(nullptr), + allocated(0), + prev(nullptr), + next(nullptr), + event_count(0), + gc_count(0) {} + + bool is_split() const { return (prev != nullptr) || (next != nullptr); } + + void splice(Block* before, Block* after) { + if (before) { + before->next = this; + } + prev = before; + if (after) { + after->prev = this; + } + next = after; + } +}; + +struct SegmentRange { + char* ptr; + size_t size; + SegmentRange(void* p, size_t s) : ptr(static_cast(p)), size(s) {} +}; + +struct ExpandableSegment { + ExpandableSegment(int device, aclrtStream stream, size_t size) + : device_(device), + stream_(stream), + max_handles_(0), + // 2MB for small pool, 20MB for large pool + segment_size_(size) { + size_t device_free; + size_t device_total; + TORCH_CHECK(aclrtGetMemInfo(ACL_HBM_MEM, &device_free, &device_total) == ACL_ERROR_NONE, \ + "Error, failed to get memory info"); + TORCH_INTERNAL_ASSERT(device_free <= device_total); + // we allocate enough address space for 1 1/8 the total memory on the NPU. + // This allows for some cases where we have to unmap pages earlier in the + // segment to put them at the end. + constexpr size_t extra_space_factor = 8; + max_handles_ = numSegments(device_total + device_total / extra_space_factor); + TORCH_CHECK(aclrtReserveMemAddress(&ptr_, segment_size_ * max_handles_, 0, NULL, 1) == ACL_ERROR_NONE, \ + "Error, failed to reserve memory address"); + } + // begin must be aligned to segment_size_. + // returns the actual range mapped, which may be + // greater than requested if size is not aligned to segment_size_. + // return size of 0 indicates OOM + SegmentRange map(SegmentRange range) { + auto begin = segmentLeft(range.ptr); + auto end = segmentRight(range.ptr + range.size); + if (begin == end) { + return rangeFromHandles(begin, end); + } + while (end > handles_.size()) { + handles_.emplace_back(c10::nullopt); + } + for (auto i : c10::irange(begin, end)) { + aclrtDrvMemHandle handle = nullptr; + aclrtPhysicalMemProp prop = {}; + prop.handleType = ACL_MEM_HANDLE_TYPE_NONE; + prop.allocationType = ACL_MEM_ALLOCATION_TYPE_PINNED; + prop.memAttr = ACL_HBM_MEM_HUGE; + prop.location.type = ACL_MEM_LOCATION_TYPE_DEVICE; + prop.location.id = device_; + prop.reserve = 0; + auto status = aclrtMallocPhysical(&handle, segment_size_, &prop, 0); + if (status == ACL_ERROR_RT_MEMORY_ALLOCATION) { + for (auto j : c10::irange(begin, i)) { + auto h = handles_.at(j).value(); + handles_.at(j) = c10::nullopt; + TORCH_CHECK(aclrtFreePhysical(h) == ACL_ERROR_NONE, \ + "Error, failed to free physical memory"); + } + trimHandles(); + return rangeFromHandles(begin, begin); + } + handles_.at(i) = handle; + } + for (auto i : c10::irange(begin, end)) { + TORCH_CHECK(aclrtMapMem(ptr_ + i * segment_size_, segment_size_, 0, handles_.at(i).value(), 0) == ACL_ERROR_NONE, \ + "Error, failed to map memory"); + } + return rangeFromHandles(begin, end); + } + + // unmaps all the completely empty segment_size_ segments between + // [begin, begin + size), returns the offset where the range begin, + // and the actual size unmapped (multiple of segment_size_) + SegmentRange unmap(SegmentRange range) { + auto begin = segmentRight(range.ptr); + auto end = segmentLeft(range.ptr + range.size); + if (begin >= end) { + return SegmentRange{range.ptr, 0}; + } + unmapHandles(begin, end); + return rangeFromHandles(begin, end); + } + + char* ptr() const { return (char*)ptr_; } + + size_t size() const { return max_handles_ * segment_size_; } + + ~ExpandableSegment() { + forEachAllocatedRange([&](size_t begin, size_t end) { unmapHandles(begin, end); }); + TORCH_CHECK(aclrtReleaseMemAddress(ptr_) == ACL_ERROR_NONE, \ + "Error, failed to release memory address"); + } + +private: + void unmapHandles(size_t begin, size_t end) { + // note: unlike aclrtFree, MemUnmap and MemRelease do + // not appear to synchronize in all cases, so we have to wait for the + // stream to finish before this memory is truly free. + + // cannot call c10::npu::stream_synchronize because + // it might grab the GIL which can lead to a deadlock + // Locking order must be GIL -> Allocator Lock + TORCH_CHECK(aclrtSynchronizeStream(stream_) == ACL_ERROR_NONE, "aclrtSynchronizeStream failed."); + for (auto i : c10::irange(begin, end)) { + aclrtDrvMemHandle h = handles_.at(i).value(); + handles_.at(i) = c10::nullopt; + TORCH_CHECK(aclrtUnmapMem(ptr_ + segment_size_ * i) == ACL_ERROR_NONE, \ + "Error, failed to unmap memory"); + TORCH_CHECK(aclrtFreePhysical(h) == ACL_ERROR_NONE, \ + "Error, failed to unmap memory"); + } + trimHandles(); + } + + void trimHandles() { + while (!handles_.empty() && !handles_.back()) { + handles_.pop_back(); + } + } + + void forEachAllocatedRange(std::function fn) { + auto start = 0; + for (auto i : c10::irange(handles_.size())) { + if (handles_.at(i) && (i == 0 || !handles_.at(i - 1))) { + start = i; + } + if (handles_.at(i) && (i + 1 == handles_.size() || !handles_.at(i + 1))) { + fn(start, i + 1); + } + } + } + + size_t numSegments(size_t size) { return (size + segment_size_ - 1) / segment_size_; } + + size_t segmentLeft(char* p) { + auto size = p - ptr(); + return size / segment_size_; + } + + size_t segmentRight(char* p) { + auto size = p - ptr(); + return numSegments(size); + } + + SegmentRange rangeFromHandles(size_t begin, size_t end) { + TORCH_INTERNAL_ASSERT(end >= begin); + return SegmentRange(ptr() + segment_size_ * begin, segment_size_ * (end - begin)); + } + + int device_; + aclrtStream stream_; + void* ptr_{}; + size_t max_handles_; + size_t segment_size_; + std::vector> handles_; +}; + + +std::string format_size(uint64_t size); + +struct AllocParams { + AllocParams(int device, size_t size, aclrtStream stream, BlockPool* pool, size_t alloc_size, DeviceStats& stats) + : search_key(device, stream, size), pool(pool), alloc_size(alloc_size), block(nullptr), err(ACL_ERROR_NONE) {} + + AllocParams() = default; + + int device() const { return search_key.device; } + aclrtStream stream() const { return search_key.stream; } + size_t size() const { return search_key.size; } + + Block search_key; + BlockPool* pool; + size_t alloc_size; + Block* block; + StatTypes stat_types = {false}; + aclError err; +}; + +#endif diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/test.py b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/test.py new file mode 100644 index 000000000..57637df6f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/memory_fragmentation/test.py @@ -0,0 +1,36 @@ +import torch +import torch_npu +import ctypes +from mindspeed.core.memory.memory_fragmentation.pluggable_allocator_adpator import load_memory_fragmentation_module + +os_path = load_memory_fragmentation_module().__file__ +new_alloc = torch_npu.npu.memory.NPUPluggableAllocator(os_path, 'memory_fragmentation_malloc', 'memory_fragmentation_free') +torch_npu.npu.memory.change_current_allocator(new_alloc) + +myallocator = ctypes.CDLL(os_path) +init_fn = ctypes.cast(getattr(myallocator, "memory_fragmentation_init"), ctypes.c_void_p).value +empty_fn = ctypes.cast(getattr(myallocator, "memory_fragmentation_empty_cache"), ctypes.c_void_p).value +memory_fraction_fn = ctypes.cast(getattr(myallocator, "memory_fragmentation_memory_fraction"), ctypes.c_void_p).value +get_device_stats_fn = ctypes.cast(getattr(myallocator, "memory_fragmentation_get_device_stats"), ctypes.c_void_p).value + +new_alloc.allocator().set_init_fn(init_fn) +new_alloc.allocator().set_reset_fn(empty_fn) +new_alloc.allocator().set_memory_fraction_fn(memory_fraction_fn) +new_alloc.allocator().set_get_device_stats_fn(get_device_stats_fn) + +load_memory_fragmentation_module().precise_match_start() +load_memory_fragmentation_module().precise_match_end() + + +def report_memory(name): + mega_bytes = 1024.0 * 1024.0 + string = name + ' memory (MB)' + string += ' | allocated:{}'.format(torch.cuda.memory_allocated() / mega_bytes) + print(string) + + +new_tensor = torch.zeros(10, device='npu') +report_memory("report_memory") +torch_npu.npu.set_per_process_memory_fraction(0.5) + +del new_tensor diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/CachingAllocatorConfig.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/CachingAllocatorConfig.cpp new file mode 100644 index 000000000..5882d9c5a --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/CachingAllocatorConfig.cpp @@ -0,0 +1,88 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "CachingAllocatorConfig.h" + +#include "common.h" + +void CachingAllocatorConfig::lexArgs(const char *env, std::vector &config) +{ + std::vector buf; + + size_t env_length = strlen(env); + for (size_t i = 0; i < env_length; i++) { + if (env[i] == ',' || env[i] == ':' || env[i] == '[' || env[i] == ']') { + if (!buf.empty()) { + config.emplace_back(buf.begin(), buf.end()); + buf.clear(); + } + config.emplace_back(1, env[i]); + } else if (env[i] != ' ') { + buf.emplace_back(static_cast(env[i])); + } + } + if (!buf.empty()) { + config.emplace_back(buf.begin(), buf.end()); + } +} + +void CachingAllocatorConfig::consumeToken(const std::vector &config, size_t i, const char c) {} + +size_t CachingAllocatorConfig::parseMaxSplitSize(const std::vector &config, size_t i) +{ + consumeToken(config, ++i, ':'); + if (++i < config.size()) { + size_t val1 = static_cast(stoi(config[i])); + val1 = std::max(val1, kLargeBuffer / (1024 * 1024)); + val1 = std::min(val1, (std::numeric_limits::max() / (1024 * 1024))); + m_max_split_size = val1 * 1024 * 1024; + } + return i; +} + +size_t CachingAllocatorConfig::parseGarbageCollectionThreshold(const std::vector &config, size_t i) +{ + consumeToken(config, ++i, ':'); + if (++i < config.size()) { + double val1 = stod(config[i]); + m_garbage_collection_threshold = val1; + } + return i; +} + +void CachingAllocatorConfig::parseArgs(const char *env) +{ + // If empty, set the default values + m_max_split_size = std::numeric_limits::max(); + m_garbage_collection_threshold = 0; + + if (env == nullptr) { + return; + } + + std::vector config; + lexArgs(env, config); + + for (size_t i = 0; i < config.size(); i++) { + if (config[i].compare("max_split_size_mb") == 0) { + i = parseMaxSplitSize(config, i); + } else if (config[i].compare("garbage_collection_threshold") == 0) { + i = parseGarbageCollectionThreshold(config, i); + } + + if (i + 1 < config.size()) { + consumeToken(config, ++i, ','); + } + } +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/CachingAllocatorConfig.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/CachingAllocatorConfig.h new file mode 100644 index 000000000..2b3a8a20f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/CachingAllocatorConfig.h @@ -0,0 +1,58 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include + +class CachingAllocatorConfig { +public: + static size_t max_split_size() + { + return instance().m_max_split_size; + } + + static double garbage_collection_threshold() + { + return instance().m_garbage_collection_threshold; + } + + static CachingAllocatorConfig &instance() + { + static CachingAllocatorConfig *s_instance = ([]() { + auto inst = new CachingAllocatorConfig(); + const char *env = getenv("PYTORCH_NPU_ALLOC_CONF"); + inst->parseArgs(env); + return inst; + })(); + return *s_instance; + } + + void parseArgs(const char *env); + +private: + size_t m_max_split_size; + double m_garbage_collection_threshold; + + CachingAllocatorConfig() + : m_max_split_size(std::numeric_limits::max()), + m_garbage_collection_threshold(0) {} + + void lexArgs(const char *env, std::vector &config); + void consumeToken(const std::vector &config, size_t i, const char c); + size_t parseMaxSplitSize(const std::vector &config, size_t i); + size_t parseGarbageCollectionThreshold(const std::vector &config, size_t i); +}; diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/DeviceCachingAllocator.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/DeviceCachingAllocator.cpp new file mode 100644 index 000000000..803f6c780 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/DeviceCachingAllocator.cpp @@ -0,0 +1,1703 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// Copyright 2022 The GLake Authors. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "DeviceCachingAllocator.h" + +#include + +#include "swap_log.h" +#include "NPUSwapManager.h" + +Block *DeviceCachingAllocator::malloc(int device, size_t orig_size, aclrtStream stream) +{ + std::unique_lock lock(mutex); + + if (device == -1) { + SWAP_CHECK_ERROR(c10_npu::GetDevice(&device)); + } + // process outstanding npuEvents + process_events(); + auto size = round_size(orig_size); + auto &pool = get_pool(size); + + const size_t alloc_size = get_allocation_size(size); + AllocParams params(device, size, stream, &pool, alloc_size, stats); + params.stat_types = get_stat_types_for_pool(pool); + + AllocParams swap_params(device, size, stream, &pool, alloc_size, stats); + swap_params.stat_types = get_stat_types_for_pool(pool); + + // First, try to get a block from the existing pool. + bool block_found = + // Search pool + get_free_block(params) || + // Trigger callbacks and retry search + (trigger_free_memory_callbacks(params) && get_free_block(params)) || + get_fused_fragmented_blocks(params, 0); + if (!block_found) { + // Do garbage collection if the flag is set. + if (C10_UNLIKELY(set_fraction && CachingAllocatorConfig::garbage_collection_threshold() > 0.0)) { + garbage_collect_cached_blocks(); + } + if (c10_npu::swap::NPUSwapManager::GetInstance().swap_enable) { + block_found = realloc_block(params, false); + if (!block_found) { + block_found = (release_swapout_blocks() && + (get_free_block(swap_params) || get_fused_fragmented_blocks(swap_params, 1))); + if (block_found) { + params.err = swap_params.err; + params.block = swap_params.block; + } else { + block_found = realloc_block(params, true) || + (release_available_cached_blocks(params) && realloc_block(params, true)) || + // Free all non-split cached blocks and retry alloc. + (release_cached_blocks() && realloc_block(params, true)) || + get_fused_fragmented_blocks(params, 2); + } + } + } else { + // Attempt allocate + block_found = realloc_block(params, false) || + // Free enough available cached blocks to satisfy alloc and retry alloc. + ((release_swapout_blocks() || release_available_cached_blocks(params)) && + realloc_block(params, true)) || + get_fused_fragmented_blocks(params, 1) || + // Free all non-split cached blocks and retry alloc. + (C10_LIKELY(captures_underway == 0) && release_cached_blocks() && realloc_block(params, true)) || + get_fused_fragmented_blocks(params, 2); + } + } + + if (!block_found) { + if (params.err == ACL_ERROR_RT_MEMORY_ALLOCATION) { + if (c10_npu::swap::NPUSwapManager::GetInstance().swap_oom_enable) { + SWAP_LOG_WARN("[SwapOomEnable] Trigger OOM error when malloc()"); + c10_npu::swap::NPUSwapManager::GetInstance().config.isOOM = true; + if (!active_blocks.empty() && + !c10_npu::swap::NPUSwapManager::GetInstance().GetStorageImplMap().empty()) { + Block *findBlock = nullptr; + for (std::deque::iterator itQ = + c10_npu::swap::NPUSwapManager::GetInstance().GetTensorQueue().begin(); + itQ != c10_npu::swap::NPUSwapManager::GetInstance().GetTensorQueue().end();) { + auto it = std::find_if(active_fused_blocks.begin(), active_fused_blocks.end(), + [&itQ](const Block *block) { return block->ptr == *itQ; }); + if (it != active_fused_blocks.end()) { + if (!c10_npu::swap::NPUSwapManager::GetInstance().config.enableCustomRecordStream) { + c10_npu::npuSynchronizeDevice(true); + eraseStream((*it), c10_npu::swap::NPUSwapManager::GetInstance().GetSwapStream()); + } + if ((*it)->stream_uses.empty()) { + // 该判断实现的效果是找到比alloc_size大的最小的block,如果没有block比alloc_size大,则会找到比alloc_size小的最大的block + if (findBlock == nullptr || // 1.如果当前为遍历到的第一个block,直接赋给findBlock + (findBlock->size < alloc_size && (*it)->size > + findBlock->size) || // 2.如果已经找到的block大小比alloc_size小,只要遍历到的block比其大小大,就更新findBlock + (findBlock->size >= alloc_size && (*it)->size >= alloc_size && + (*it)->size < + findBlock->size)) { // 3.如果已经找到的block大小比alloc_size大,那么只有在当前遍历到的block大小不小于alloc_size,又比已经找到的block小,才更新findBlock + findBlock = *it; + } + } + ++itQ; + } else { + auto it = std::find_if(active_blocks.begin(), active_blocks.end(), + [&itQ](const Block *block) { return block->ptr == *itQ; }); + if (it != active_blocks.end()) { + if (!c10_npu::swap::NPUSwapManager::GetInstance().config.enableCustomRecordStream) { + c10_npu::npuSynchronizeDevice(true); + eraseStream((*it), c10_npu::swap::NPUSwapManager::GetInstance().GetSwapStream()); + } + if ((*it)->stream_uses.empty()) { + // 该判断实现的效果是找到比alloc_size大的最小的block,如果没有block比alloc_size大,则会找到比alloc_size小的最大的block + if (findBlock == nullptr || // 1.如果当前为遍历到的第一个block,直接赋给findBlock + (findBlock->size < alloc_size && (*it)->size > + findBlock->size) || // 2.如果已经找到的block大小比alloc_size小,只要遍历到的block比其大小大,就更新findBlock + (findBlock->size >= alloc_size && (*it)->size >= alloc_size && + (*it)->size < + findBlock->size)) { // 3.如果已经找到的block大小比alloc_size大,那么只有在当前遍历到的block大小不小于alloc_size,又比已经找到的block小,才更新findBlock + findBlock = *it; + } + } + ++itQ; + } else { + c10_npu::swap::NPUSwapManager::GetInstance().GetStorageImplMap().erase(*itQ); + itQ = c10_npu::swap::NPUSwapManager::GetInstance().GetTensorQueue().erase(itQ); + } + } + } + + if (findBlock != nullptr) { + SWAP_LOG_WARN("[SwapOomEnable] malloc OOM, need swap out ptrInBlock, size[%zu]", + findBlock->size); + throw c10_npu::swap::SwapOutOfMemError("malloc OOM, need swap out ptrInBlock.", findBlock->ptr); + } + } + } + // For any error code other than ACL_ERROR_RT_MEMORY_ALLOCATION, + // alloc_block should have thrown an exception already. + TORCH_INTERNAL_ASSERT(params.err == ACL_ERROR_RT_MEMORY_ALLOCATION); + + size_t device_free; + size_t device_total; + SWAP_CHECK_ERROR(aclrtGetMemInfo(ACL_HBM_MEM, &device_free, &device_total)); + std::string allowed_info; + + if (set_fraction) { + allowed_info = format_size(allowed_memory_maximum) + " allowed; "; + } + + stats.num_ooms += 1; + auto observers_local = oom_observers_; + lock.unlock(); + + for (const auto &obs : observers_local) { + obs(device, alloc_size, set_fraction ? allowed_memory_maximum : device_total, device_free); + } + // "total capacity": total global memory on GPU + // "allowed": memory is allowed to use, which set by fraction. + // "already allocated": memory allocated by the program using the + // caching allocator + // "free": free memory as reported by the CUDA API + // "cached": memory held by the allocator but not used by the program + // + // The "allocated" amount does not include memory allocated outside + // of the caching allocator, such as memory allocated by other programs + // or memory held by the driver. + // + // The sum of "allocated" + "free" + "cached" may be less than the + // total capacity due to memory held by the driver and usage by other + // programs. + // + // Note that at this point free_cached_blocks has already returned all + // possible "cached" memory to the driver. The only remaining "cached" + // memory is split from a larger block that is partially in-use. + AT_ERROR("NPU out of memory. Tried to allocate ", format_size(alloc_size), " (NPU ", device, "; ", + format_size(device_total), " total capacity; ", + format_size(stats.allocated_bytes[static_cast(StatType::AGGREGATE)].current), + " already allocated; ", + format_size(stats.active_bytes[static_cast(StatType::AGGREGATE)].current), " current active; ", + format_size(device_free), " free; ", allowed_info, + format_size(stats.reserved_bytes[static_cast(StatType::AGGREGATE)].current), + " reserved in total by PyTorch)", + " If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation."); + } else { + SWAP_CHECK_ERROR(params.err); + } + } + + Block *block = params.block; + Block *remaining = nullptr; + + static const int vmmDefragment = ([]() -> int { + const char *env = getenv("vmmDefragment"); + if (env) { + return atoi(env); + } else { + return 1; + } + })(); + + const bool already_split = block->is_split(); + + if (pool.is_small && should_split(block, size)) { + remaining = block; + + block = new Block(device, stream, size, &pool, block->ptr); + block->prev = remaining->prev; + if (block->prev) { + block->prev->next = block; + } + block->next = remaining; + + remaining->prev = block; + remaining->ptr = static_cast(remaining->ptr) + size; + remaining->size -= size; + + bool inserted = pool.blocks.insert(remaining).second; + + if (already_split) { + // An already-split inactive block is being shrunk by size bytes. + update_stat_array(stats.inactive_split_bytes, -block->size, params.stat_types); + } else { + // A new split inactive block is being created from a previously unsplit + // block, size remaining->size bytes. + for_each_selected_stat_type(params.stat_types, [&](size_t stat_type) { + update_stat(stats.inactive_split_bytes[stat_type], remaining->size); + update_stat(stats.inactive_split[stat_type], 1); + }); + } + } else if (already_split) { + // An already-split block is becoming active + for_each_selected_stat_type(params.stat_types, [&](size_t stat_type) { + update_stat(stats.inactive_split_bytes[stat_type], -static_cast(block->size)); + update_stat(stats.inactive_split[stat_type], -1); + }); + } + + block->allocated = true; + block->requested_size = orig_size; + block->actual_size = size; + + bool inserted = false; + if (block->vmm_segment && block->vmm_segment->fused) { + active_fused_blocks.insert(block); + } else { + inserted = active_blocks.insert(block).second; + } + + for_each_selected_stat_type(params.stat_types, [&](size_t stat_type) { + update_stat(stats.allocation[stat_type], 1); + update_stat(stats.allocated_bytes[stat_type], static_cast(block->actual_size)); + update_stat(stats.requested_bytes[stat_type], static_cast(block->requested_size)); + update_stat(stats.active[stat_type], 1); + update_stat(stats.active_bytes[stat_type], block->size); + }); + + if (block->size >= CachingAllocatorConfig::max_split_size()) + update_stat(stats.oversize_allocations, 1); + + c10_npu::swap::NPUSwapManager::GetInstance().tensorPtrCountMap[reinterpret_cast(block->ptr)]++; + + return block; +} + + +void DeviceCachingAllocator::free(Block *block) +{ + std::lock_guard lock(mutex); + auto orig_block_ptr = block->ptr; + auto orig_block_size = block->size; + + StatTypes stat_types = { false }; + stat_types[static_cast(StatType::AGGREGATE)] = true; + stat_types[static_cast(get_stat_type_for_pool(*(block->pool)))] = true; + for_each_selected_stat_type(stat_types, [&](size_t stat_type) { + update_stat(stats.allocation[stat_type], -1); + update_stat(stats.allocated_bytes[stat_type], -static_cast(block->actual_size)); + }); + + if (block->size >= CachingAllocatorConfig::max_split_size()) + update_stat(stats.oversize_allocations, -1); + + if (!block->stream_uses.empty()) { + if (C10_UNLIKELY(captures_underway)) { + needs_events_deferred_until_no_capture.push_back(block); + } else { + insert_events(block); + } + } else { + insert_free_event_into_alloc_stream(block); + update_block(block); + } +} + +void DeviceCachingAllocator::update_block(Block *block) +{ + block->allocated = false; + std::lock_guard lock(mutex); + bool flag = false; + + size_t original_block_size = block->size; + size_t requested_size = block->requested_size; + + auto &pool = *block->pool; + int64_t net_change_inactive_split_blocks = 0; + int64_t net_change_inactive_split_size = 0; + StatTypes stat_types = { false }; + stat_types[static_cast(StatType::AGGREGATE)] = true; + stat_types[static_cast(get_stat_type_for_pool(pool))] = true; + for_each_selected_stat_type(stat_types, [&](size_t stat_type) { + update_stat(stats.inactive_split[stat_type], net_change_inactive_split_blocks); + update_stat(stats.inactive_split_bytes[stat_type], net_change_inactive_split_size); + update_stat(stats.active[stat_type], -1); + update_stat(stats.active_bytes[stat_type], -static_cast(original_block_size)); + if (!flag) { + update_stat(stats.requested_bytes[stat_type], -static_cast(requested_size)); + } + }); + + if (block->pool->is_small) { + free_block(block, flag); + } else { + deactivate_large_block(block); + } +} + +void *DeviceCachingAllocator::getBaseAllocation(Block *block, size_t *outSize) +{ + std::lock_guard lock(mutex); + while (block->prev) { + block = block->prev; + } + void *basePtr = block->ptr; + if (outSize) { + size_t size = 0; + while (block) { + size += block->size; + block = block->next; + } + *outSize = size; + } + return basePtr; +} + +void DeviceCachingAllocator::recordStream(Block *block, c10_npu::NPUStream stream) +{ + std::lock_guard lock(mutex); + block->stream_uses.insert(stream); +} + +void DeviceCachingAllocator::eraseStream(Block *block, c10_npu::NPUStream stream) +{ + std::lock_guard lock(mutex); + block->stream_uses.erase(stream); + + // free block, lazy destory block related events + for (auto it = npu_events[stream].begin(); it != npu_events[stream].end();) { + if (block != it->second) { + it++; + continue; + } + it = npu_events[stream].erase(it); + block->event_count--; + if (block->event_count == 0) { + update_block(block); + break; + } + } +} + +void DeviceCachingAllocator::setMemoryFraction(double fraction) +{ + size_t device_free; + size_t device_total; + aclrtGetMemInfo(ACL_HBM_MEM, &device_free, &device_total); + allowed_memory_maximum = static_cast(fraction * device_total); + set_fraction = true; +} + +void DeviceCachingAllocator::emptyCache(bool check_error) +{ + std::lock_guard lock(mutex); + release_cached_blocks(); + size_t garbage_size = garbage_collect_fused_blocks(2, 0); +} + +void DeviceCachingAllocator::cacheInfo(size_t *total, size_t *largest) +{ + std::lock_guard lock(mutex); + cache_info_aux(large_blocks, total, largest); + cache_info_aux(small_blocks, total, largest); +} + +DeviceStats DeviceCachingAllocator::getStats() +{ + std::lock_guard lock(mutex); + return stats; +} + +void DeviceCachingAllocator::resetAccumulatedStats() +{ + std::lock_guard lock(mutex); + + for (size_t statType = 0; statType < static_cast(StatType::NUM_TYPES); ++statType) { + reset_accumulated_stat(stats.allocation[statType]); + reset_accumulated_stat(stats.segment[statType]); + reset_accumulated_stat(stats.active[statType]); + reset_accumulated_stat(stats.inactive_split[statType]); + reset_accumulated_stat(stats.allocated_bytes[statType]); + reset_accumulated_stat(stats.reserved_bytes[statType]); + reset_accumulated_stat(stats.active_bytes[statType]); + reset_accumulated_stat(stats.inactive_split_bytes[statType]); + reset_accumulated_stat(stats.requested_bytes[statType]); + } + + stats.num_alloc_retries = 0; + stats.num_ooms = 0; + reset_accumulated_stat(stats.oversize_allocations); + reset_accumulated_stat(stats.oversize_segments); +} + +/* * Resets the historical peak stats for the device * */ +void DeviceCachingAllocator::resetPeakStats() +{ + std::lock_guard lock(mutex); + + for (size_t statType = 0; statType < static_cast(StatType::NUM_TYPES); ++statType) { + reset_peak_stat(stats.allocation[statType]); + reset_peak_stat(stats.segment[statType]); + reset_peak_stat(stats.active[statType]); + reset_peak_stat(stats.inactive_split[statType]); + reset_peak_stat(stats.allocated_bytes[statType]); + reset_peak_stat(stats.reserved_bytes[statType]); + reset_peak_stat(stats.active_bytes[statType]); + reset_peak_stat(stats.inactive_split_bytes[statType]); + reset_peak_stat(stats.requested_bytes[statType]); + } + + reset_peak_stat(stats.oversize_allocations); + reset_peak_stat(stats.oversize_segments); +} + +std::vector DeviceCachingAllocator::snapshot() +{ + std::lock_guard lock(mutex); + + size_t total_active = 0; + std::vector result; + const auto all_blocks = get_all_blocks(); + for (const Block * const head_block : all_blocks) { + if (head_block->prev != nullptr) { + continue; + } + result.emplace_back(); + SegmentInfo &segment_info = result.back(); + segment_info.device = head_block->device; + segment_info.address = reinterpret_cast(head_block->ptr); + segment_info.stream = head_block->stream; + segment_info.is_large = (!head_block->pool->is_small); + + const Block *block = head_block; + while (block != nullptr) { + segment_info.blocks.emplace_back(); + auto &block_info = segment_info.blocks.back(); + + block_info.size = block->size; + block_info.requested_size = block->requested_size; + block_info.allocated = block->allocated; + block_info.active = block->allocated || (block->event_count > 0) || !block->stream_uses.empty(); + + segment_info.total_size += block_info.size; + if (block_info.allocated) { + segment_info.allocated_size += block_info.size; + } + if (block_info.active) { + segment_info.active_size += block_info.size; + segment_info.requested_size += block_info.requested_size; + } + block = block->next; + } + total_active += segment_info.active_size; + } + + std::sort(result.begin(), result.end(), + [](const SegmentInfo &a, const SegmentInfo &b) { return a.address < b.address; }); + + return result; +} + +size_t DeviceCachingAllocator::round_size(size_t size) +{ + size = size + 32; + if (size < kMinBlockSize) { + return kMinBlockSize; + } else { + size_t block_round_size = kMinBlockSize * ((size + kMinBlockSize - 1) / kMinBlockSize); + if (block_round_size > kSmallSize) { + // if block will alloc from large_blocks, round to 2M + block_round_size = kGranularity * ((size + kGranularity - 1) / kGranularity); + } + return block_round_size; + } +} + +std::vector DeviceCachingAllocator::get_all_blocks() const +{ + std::vector blocks; + blocks.insert(blocks.end(), small_blocks.blocks.begin(), small_blocks.blocks.end()); + blocks.insert(blocks.end(), large_blocks.blocks.begin(), large_blocks.blocks.end()); + blocks.insert(blocks.end(), active_blocks.begin(), active_blocks.end()); + return blocks; +} + +void DeviceCachingAllocator::free_block(Block *block, bool flag) +{ + TORCH_INTERNAL_ASSERT(!block->allocated && block->event_count == 0 && block->stream_uses.empty()); + static const int vmmDefragment = ([]() -> int { + const char *env = getenv("vmmDefragment"); + if (env) { + return atoi(env); + } else { + return 1; + } + })(); + + size_t original_block_size = block->size; + size_t requested_size = block->requested_size; + + auto &pool = *block->pool; + int64_t net_change_inactive_split_blocks = 0; + int64_t net_change_inactive_split_size = 0; + + const std::array merge_candidates = { block->prev, block->next }; + for (Block *merge_candidate : merge_candidates) { + const int64_t subsumed_size = try_merge_blocks(block, merge_candidate, pool); + if (subsumed_size > 0) { + net_change_inactive_split_blocks -= 1; + net_change_inactive_split_size -= subsumed_size; + } + } + + active_blocks.erase(block); + // Makes sure the Block* isn't already present in the pool we're freeing it back into. + bool inserted = pool.blocks.insert(block).second; + + TORCH_INTERNAL_ASSERT(inserted); + if (vmmDefragment > 0 && block->vmm_segment /* !pool.is_small */) { + for (size_t i = 0; i < block->vmm_segment->phy_chunks.size(); i++) { + auto &p = block->vmm_segment->phy_chunks[i]; + p->free = true; + } + block->vmm_segment->num_free_chunks = block->vmm_segment->phy_chunks.size(); + block->vmm_segment->num_used_chunks = 0; + } + + if (block->is_split()) { + net_change_inactive_split_blocks += 1; + net_change_inactive_split_size += block->size; + } +} + +bool DeviceCachingAllocator::need_merge(Block *dst, Block *src) +{ + if (!src || src->allocated || src->event_count > 0 || !src->stream_uses.empty()) { + return false; + } + return true; +} + +size_t DeviceCachingAllocator::try_merge_blocks(Block *dst, Block *src, BlockPool &pool) +{ + if (!src || src->allocated || src->event_count > 0 || !src->stream_uses.empty()) { + return 0; + } + if (src->vmm_segment && src->vmm_segment->phy_chunks[0]->mapped_blocks.size() > 1) { + return 0; + } + if (dst->vmm_segment && dst->vmm_segment->phy_chunks[0]->mapped_blocks.size() > 1) { + return 0; + } + + AT_ASSERT(dst->is_split() && src->is_split()); + + if (dst->prev == src) { // [src dst] + dst->ptr = src->ptr; + dst->prev = src->prev; + if (dst->prev) { + dst->prev->next = dst; + } + if (!dst->history) { + dst->history = std::move(src->history); + dst->history_last = src->history_last; + } else if (src->history) { + src->history_last->next = std::move(dst->history); + dst->history = std::move(src->history); + } + src->history_last = nullptr; + } else { // [dest src] + dst->next = src->next; + if (dst->next) { + dst->next->prev = dst; + } + + if (!dst->history) { + dst->history = std::move(src->history); + dst->history_last = src->history_last; + } else if (src->history) { + dst->history_last->next = std::move(src->history); + dst->history_last = src->history_last; + } + src->history_last = nullptr; + } + + const size_t subsumed_size = src->size; + dst->size += subsumed_size; + auto erased = pool.blocks.erase(src); + static const int vmmDefragment = ([]() -> int { + const char *env = getenv("vmmDefragment"); + if (env) { + return atoi(env); + } else { + return 1; + } + })(); + if (vmmDefragment > 0 && dst->vmm_segment) { + bool ret = dst->vmm_segment->remerge(*(src->vmm_segment)); + size_t offset = 0; + for (auto &phy_block : dst->vmm_segment->phy_chunks) { + phy_block->mapped_blocks[0].block = dst; + phy_block->mapped_blocks[0].offset = offset; + offset++; + } + } + + delete src; + return subsumed_size; +} + +BlockPool &DeviceCachingAllocator::get_pool(size_t size) +{ + if (size <= kSmallSize) { + return small_blocks; + } else { + return large_blocks; + } +} + +bool DeviceCachingAllocator::should_split(const Block *block, size_t size) +{ + size_t remaining = block->size - size; + if (block->pool->is_small) { + return remaining >= kMinBlockSize; + } else { + return (size < CachingAllocatorConfig::max_split_size()) && (remaining >= kGranularity); + } +} + +StatType DeviceCachingAllocator::get_stat_type_for_pool(const BlockPool &pool) +{ + return pool.is_small ? StatType::SMALL_POOL : StatType::LARGE_POOL; +} + +StatTypes DeviceCachingAllocator::get_stat_types_for_pool(const BlockPool &pool) +{ + StatTypes stat_types = { false }; + stat_types[static_cast(StatType::AGGREGATE)] = true; + stat_types[static_cast(pool.is_small ? StatType::SMALL_POOL : StatType::LARGE_POOL)] = true; + return stat_types; +} + +size_t DeviceCachingAllocator::get_allocation_size(size_t size) +{ + if (size <= kSmallSize) { + return kSmallBuffer; + } else if (size < kMinLargeAlloc) { + return kLargeBuffer; + } else { + return kRoundLarge * ((size + kRoundLarge - 1) / kRoundLarge); + } +} + +bool DeviceCachingAllocator::get_free_block(AllocParams &p) +{ + static const int vmmDefragment = ([]() -> int { + const char *env = getenv("vmmDefragment"); + if (env) { + return atoi(env); + } else { + return 1; + } + })(); + + static const double reuseLimit = ([]() -> double { + const char *env = getenv("reuseLimit"); + if (env) { + return atof(env); + } else { + return 1.0f; + } + })(); + + static const size_t fragment_limit = ([]() -> size_t { + const char *env = getenv("fragLimit"); + if (env) { + return static_cast(std::stoll(env)); + } else { + return static_cast(16777216); + } + })(); + + int64_t net_change_inactive_split_blocks = 0; + int64_t net_change_inactive_split_size = 0; + + BlockPool &pool = *p.pool; + if (C10_UNLIKELY(set_fraction && CachingAllocatorConfig::garbage_collection_threshold() > 0.0)) { + // Track block reuse interval only when garbage collection is enabled. + for (auto &b : pool.blocks) { + ++b->gc_count; + } + } + + if (vmmDefragment > 0 && !pool.is_small && p.search_key.size >= fragment_limit) { + auto block_it = free_fused_blocks.blocks.lower_bound(&p.search_key); + if (block_it == free_fused_blocks.blocks.end() || (*block_it)->stream != p.stream() || + (*block_it)->size > (p.search_key.size * reuseLimit)) { + } else { + p.block = *block_it; + activate_large_block(p.block); + p.err = ACL_ERROR_NONE; + + update_stat_array(stats.inactive_split, net_change_inactive_split_blocks, p.stat_types); + update_stat_array(stats.inactive_split_bytes, net_change_inactive_split_size, p.stat_types); + return true; + } + } + + auto it = pool.blocks.lower_bound(&p.search_key); + if (it == pool.blocks.end() || (*it)->stream != p.stream()) { + return false; + } + // Do not return an oversized block for a large request + if ((p.size() < CachingAllocatorConfig::max_split_size()) && + ((*it)->size >= CachingAllocatorConfig::max_split_size())) { + return false; + } + // Allow oversized block size to be rounded up but within a limit + if ((p.size() >= CachingAllocatorConfig::max_split_size()) && ((*it)->size >= p.size() + kLargeBuffer)) { + return false; + } + p.block = *it; + (*it)->gc_count = 0; // Denote this block has been used + + if (pool.is_small) { + pool.blocks.erase(p.block); + } + if (vmmDefragment > 0 && p.block->vmm_segment) { + if (should_split(p.block, p.size())) { + p.block = split_large_block(p.block, p.size()); + } + activate_large_block(p.block); + } + + p.err = ACL_ERROR_NONE; + update_stat_array(stats.inactive_split, net_change_inactive_split_blocks, p.stat_types); + update_stat_array(stats.inactive_split_bytes, net_change_inactive_split_size, p.stat_types); + return true; +} + +bool DeviceCachingAllocator::trigger_free_memory_callbacks(AllocParams &p) +{ + bool freed_memory = false; + return freed_memory; +} + +void DeviceCachingAllocator::garbage_collect_cached_blocks() +{ + // Free unused cached blocks to reclaim NPU memory. + // Unlike release_cached_blocks(), this does not enforce synchronization and + // therefore should be of less overheads. + + size_t gc_threshold = + static_cast(CachingAllocatorConfig::garbage_collection_threshold() * allowed_memory_maximum); + // No need to trigger GC yet + if (total_allocated_memory <= gc_threshold) { + return; + } + const auto target_size = total_allocated_memory - gc_threshold; + size_t gc_reclaimed = 0; + + // Calculate the total age of the free-able blocks. We'll use it later to + // get "avg age" threshold. + double total_age = 0.0; + int freeable_block_count = 0; + for (auto &b : large_blocks.blocks) { + if (!b->is_split()) { + total_age += b->gc_count; + ++freeable_block_count; + } + } + // No free-able blocks? + if (freeable_block_count == 0) { + return; + } + + c10_npu::npuSynchronizeDevice(true); + + // Repeat GC until we reach reclaim > target size. + bool block_freed = true; + while (gc_reclaimed < target_size && block_freed == true && freeable_block_count > 0) { + // Free blocks exceeding this age threshold first. + double age_threshold = total_age / freeable_block_count; + // Stop iteration if we can no longer free a block. + block_freed = false; + + // Free blocks of > avg age. Don't stop upon reaching the target_size, + // we don't want this GC to be triggered frequently. + auto it = large_blocks.blocks.begin(); + while (it != large_blocks.blocks.end()) { + Block *block = *it; + ++it; + if (!block->is_split() && block->gc_count >= age_threshold) { + block_freed = true; + gc_reclaimed += block->size; + total_age -= block->gc_count; // Decrement the age + freeable_block_count--; // One less block that can be freed + release_block(block); + + ASCEND_LOGD("PTA CachingAllocator gc: free = %zu, cached = %lu, allocated = %lu", block->size, + stats.reserved_bytes[static_cast(StatType::AGGREGATE)].current, + stats.allocated_bytes[static_cast(StatType::AGGREGATE)].current); + } + } + } +} + +bool DeviceCachingAllocator::realloc_block(AllocParams &p, bool isRetry) +{ + // Defensively checks for preexisting CUDA error state. + static const int vmmDefragment = ([]() -> int { + const char *env = getenv("vmmDefragment"); + if (env) { + return atoi(env); + } else { + return 1; + } + })(); + + static const int reAlloc = ([]() -> int { + const char *env = getenv("reAlloc"); + if (env) { + return atoi(env); + } else { + return 1; + } + })(); + + static const size_t fragment_limit = ([]() -> size_t { + const char *env = getenv("fragLimit"); + if (env) { + return static_cast(std::stoll(env)); + } else { + return static_cast(16777216); + } + })(); + + size_t size = p.alloc_size; + size_t free_block_size = 0; + void *ptr; + + if (isRetry) { + stats.num_alloc_retries += 1; + } + + std::shared_ptr vmm_segment; + if (set_fraction && total_allocated_memory + size > allowed_memory_maximum) { + p.err = ACL_ERROR_RT_MEMORY_ALLOCATION; + return false; + } + + if (vmmDefragment <= 0 || p.pool->is_small) { + p.err = aclrtMallocAlign32(&ptr, size, aclrtMemMallocPolicy::ACL_MEM_MALLOC_HUGE_FIRST); + if (p.err != ACL_ERROR_NONE) { + p.err = ACL_ERROR_RT_MEMORY_ALLOCATION; + return false; + } + for_each_selected_stat_type(p.stat_types, [&](size_t stat_type) { + update_stat(stats.segment[stat_type], 1); + update_stat(stats.reserved_bytes[stat_type], size); + }); + } else { + if (reAlloc > 0 && p.search_key.size > fragment_limit) { + Block left_search_key(p.search_key.device, p.search_key.stream, p.search_key.size, p.search_key.pool, + p.search_key.ptr); + Block right_search_key(p.search_key.device, p.search_key.stream, p.search_key.size, p.search_key.pool, + p.search_key.ptr); + + left_search_key.size = 0; + right_search_key.size = std::numeric_limits::max(); + + auto it_begin = large_blocks.blocks.lower_bound(&left_search_key); + auto it_end = large_blocks.blocks.lower_bound(&right_search_key); + if (it_begin != large_blocks.blocks.end() && (*it_begin)->stream == p.stream() && + it_end != large_blocks.blocks.begin() && (*std::prev(it_end))->stream == p.stream()) { + auto it = it_begin; + while (it != it_end) { + free_block_size += (*it)->size; + it++; + } + } + + size_t request_size = p.search_key.size; + if (free_block_size >= request_size) { + return false; + } + + if (free_block_size > 0) { + request_size -= free_block_size; + size = get_allocation_size(request_size); + } + } + + using Ms = std::chrono::duration; + Ms fuse_time = Ms{ 0 }; + + int gc_time = 0; + do { + auto t0 = std::chrono::steady_clock::now(); + + vmm_segment = std::make_shared(size / kGranularity, kGranularity, p.device()); + + auto t1 = std::chrono::steady_clock::now(); + fuse_time = (t1 - t0); + + if (vmm_segment->status == ACL_SUCCESS && vmm_segment->segment_ptr) { + for_each_selected_stat_type(p.stat_types, [&](size_t stat_type) { + update_stat(stats.segment[stat_type], 1); + update_stat(stats.reserved_bytes[stat_type], size); + }); + break; + } else { + size_t device_free; + size_t device_total; + SWAP_CHECK_ERROR(aclrtGetMemInfo(ACL_HBM_MEM, &device_free, &device_total)); + size_t total_garbage_size = fragmented_free_fused_blocks[p.stream()].pool_size + + free_fused_blocks_in_release_order[p.stream()].pool_size; + if (device_free > size && total_garbage_size >= size) { + vmm_segment.reset(); + size_t garbage_size = garbage_collect_fused_blocks(gc_time, p.alloc_size); + gc_time++; + } else { + break; + } + } + } while (gc_time < 3); + + if (!vmm_segment || vmm_segment->status != ACL_SUCCESS || !vmm_segment->segment_ptr) { + p.err = ACL_ERROR_RT_MEMORY_ALLOCATION; + vmm_segment.reset(); + return false; + } + ptr = vmm_segment->segment_ptr; + } + + total_allocated_memory += size; + Block *new_block = new Block(p.device(), p.stream(), size, p.pool, (char *)ptr); + if (vmm_segment != nullptr) { + new_block->vmm_segment = std::move(vmm_segment); + } + + if (size >= CachingAllocatorConfig::max_split_size()) + update_stat(stats.oversize_segments, 1); + + // p.block came from new, not cudaMalloc. It should not be nullptr here. + TORCH_INTERNAL_ASSERT(new_block != nullptr && new_block->ptr != nullptr); + + if (new_block->vmm_segment) { + if (new_block->size < p.search_key.size) { + for (size_t i = 0; i < new_block->vmm_segment->phy_chunks.size(); i++) { + new_block->vmm_segment->phy_chunks[i]->mapped_blocks.emplace_back(new_block, i); + new_block->vmm_segment->phy_chunks[i]->free = true; + } + + new_block->vmm_segment->num_free_chunks = new_block->vmm_segment->phy_chunks.size(); + new_block->vmm_segment->num_used_chunks = 0; + + large_blocks.blocks.insert(new_block); + + if (!get_fused_fragmented_blocks(p, 4)) { + throw GMLakeError("Call get_fused_fragmented_blocks Failed"); + } + } else { + for (size_t i = 0; i < new_block->vmm_segment->phy_chunks.size(); i++) { + new_block->vmm_segment->phy_chunks[i]->mapped_blocks.emplace_back(new_block, i); + new_block->vmm_segment->phy_chunks[i]->free = false; + } + + new_block->vmm_segment->num_free_chunks = 0; + new_block->vmm_segment->num_used_chunks = new_block->vmm_segment->phy_chunks.size(); + + p.block = new_block; + p.err = ACL_ERROR_NONE; + } + } else { + p.block = new_block; + p.err = ACL_ERROR_NONE; + } + return true; +} + +bool DeviceCachingAllocator::release_available_cached_blocks(const AllocParams &p) +{ + if (CachingAllocatorConfig::max_split_size() == std::numeric_limits::max()) { + return false; + } + BlockPool &pool = *p.pool; + Block key(p.search_key.device, p.search_key.stream, p.search_key.size, p.search_key.pool, p.search_key.ptr); + key.size = + (key.size < CachingAllocatorConfig::max_split_size()) ? CachingAllocatorConfig::max_split_size() : key.size; + auto it = pool.blocks.lower_bound(&key); + + c10_npu::npuSynchronizeDevice(true); + + if (it == pool.blocks.end() || (*it)->stream != p.stream()) { + // No single block is large enough; free multiple oversize blocks, starting with the largest + if (it == pool.blocks.begin()) { + return false; + } + size_t totalReleased = 0; + // Back up one item. Now on the largest block for the correct stream + --it; + while ((totalReleased < key.size) && ((*it)->size >= CachingAllocatorConfig::max_split_size()) && + ((*it)->stream == p.stream())) { + auto cur = it; + totalReleased += (*it)->size; + if (it != pool.blocks.begin()) { + --it; + release_block(*cur); + } else { + release_block(*cur); + break; + } + } + if (totalReleased < key.size) { + return false; + } + } else { + release_block(*it); + } + return true; +} + +bool DeviceCachingAllocator::release_cached_blocks() +{ + c10_npu::npuSynchronizeDevice(); + // First ensure that all blocks that can't currently be allocated due to + // outstanding events are returned to the pool. + synchronize_and_free_events(); + release_blocks(small_blocks); + // Free all non-split cached blocks to system allocator + release_blocks(large_blocks); + + return true; +} + +void DeviceCachingAllocator::release_block(Block *block) +{ + static const int vmmDefragment = ([]() -> int { + const char *env = getenv("vmmDefragment"); + if (env) { + return atoi(env); + } else { + return 1; + } + })(); + + if (block->pool->is_small || !block->vmm_segment->fused) { + total_allocated_memory -= block->size; + auto *pool = block->pool; + StatTypes stat_types = { false }; + stat_types[static_cast(StatType::AGGREGATE)] = true; + stat_types[static_cast(get_stat_type_for_pool(*pool))] = true; + for_each_selected_stat_type(stat_types, [&](size_t stat_type) { + update_stat(stats.segment[stat_type], -1); + update_stat(stats.reserved_bytes[stat_type], -static_cast(block->size)); + }); + if (block->size >= CachingAllocatorConfig::max_split_size()) + update_stat(stats.oversize_segments, -1); + } + + if (vmmDefragment > 0 && block->vmm_segment) { + release_large_block(block); + } else { + SWAP_CHECK_ERROR(aclrtFree(block->ptr)); + block->pool->blocks.erase(block); + delete block; + } +} + +void DeviceCachingAllocator::release_blocks(BlockPool &pool) +{ + auto it = pool.blocks.begin(); + while (it != pool.blocks.end()) { + Block *block = *it; + ++it; + if (!block->prev && !block->next) { + release_block(block); + } else if (!block->pool->is_small && block->vmm_segment != nullptr) { + if (block->prev && large_blocks.blocks.count(block->prev) == 1) { + auto src = block->prev; + } + if (block->next && large_blocks.blocks.count(block->next) == 1) { + auto src = block->next; + } + } + } +} + +EventPool::Event DeviceCachingAllocator::create_event_internal(int idx) +{ + // Leak the event pool to avoid shutdown issues. + static auto *event_pool = new EventPool(); + return event_pool->get(idx); +} + +void DeviceCachingAllocator::synchronize_and_free_events() +{ + // Synchronize on outstanding events and then free associated blocks. + + // This function syncs, so capture should not be underway. Might as well + // make sure capture-deferred end of life events get processed too. + TORCH_INTERNAL_ASSERT(captures_underway == 0); + insert_events_deferred_until_no_capture(); + + for (auto &st : npu_events) { + for (auto &e : st.second) { + EventPool::Event event = std::move(e.first); + Block *block = e.second; + + SWAP_CHECK_ERROR(aclrtSynchronizeEvent(*event)); + + block->event_count--; + if (block->event_count == 0) { + update_block(block); + } + } + } + + npu_events.clear(); +} + +void DeviceCachingAllocator::insert_events(Block *block) +{ + aclrtContext compiler_ctx = aclrtContext(); + aclError ret_ctx = aclrtGetCurrentContext(&compiler_ctx); + + stream_set streams(std::move(block->stream_uses)); + AT_ASSERT(block->stream_uses.empty()); + for (auto &stream : streams) { + c10_npu::SetDevice(stream.device_index()); + EventPool::Event event = create_event_internal(stream.device_index()); + event->record(stream); + + ASCEND_LOGI("Event: record DeviceAllocator is successfully executed."); + block->event_count++; + npu_events[stream].emplace_back(std::move(event), block); + } + if (ret_ctx == ACL_ERROR_NONE) { + aclrtSetCurrentContext(compiler_ctx); + } +} + +void DeviceCachingAllocator::insert_free_event_into_alloc_stream(Block *block) +{ + int prev_device = -1; + SWAP_CHECK_ERROR(c10_npu::GetDevice(&prev_device)); + if (prev_device != block->device) { + SWAP_CHECK_ERROR(c10_npu::SetDevice(block->device)); + } + + if (prev_device != block->device) { + SWAP_CHECK_ERROR(c10_npu::SetDevice(prev_device)); + } +} + +void DeviceCachingAllocator::insert_events_deferred_until_no_capture() +{ + if (C10_UNLIKELY(needs_events_deferred_until_no_capture.size() > 0)) { + for (auto *block : needs_events_deferred_until_no_capture) { + TORCH_INTERNAL_ASSERT(!block->stream_uses.empty()); + insert_events(block); + } + needs_events_deferred_until_no_capture.clear(); + } +} + +void DeviceCachingAllocator::process_events() +{ + // Process outstanding npuEvents. Events that are completed are removed + // from the queue, and the 'event_count' for the corresponding allocation + // is decremented. Stops at the first event which has not been completed. + // Since events on different devices or streams may occur out of order, + // the processing of some events may be delayed. + for (auto it = npu_events.begin(); it != npu_events.end();) { + while (!it->second.empty()) { + auto &e = it->second.front(); + EventPool::Event event = std::move(e.first); + Block *block = e.second; + + if (!event->query()) { + e.first = std::move(event); + break; + } + + block->event_count--; + if (block->event_count == 0) { + update_block(block); + } + it->second.pop_front(); + } + + if (it->second.empty()) { + it = npu_events.erase(it); + } else { + it++; + } + } +} + +void DeviceCachingAllocator::cache_info_aux(BlockPool &blocks, size_t *total, size_t *largest) +{ + for (auto it = blocks.blocks.begin(); it != blocks.blocks.end(); ++it) { + size_t blocksize = (*it)->size; + *total += blocksize; + if (blocksize > *largest) { + *largest = blocksize; + } + } +} + +bool DeviceCachingAllocator::get_fused_fragmented_blocks(AllocParams &p, int time) +{ + static const int vmmDefragment = ([]() -> int { + const char *env = getenv("vmmDefragment"); + if (env) { + return atoi(env); + } else { + return 1; + } + })(); + + static const size_t fragment_limit = ([]() -> size_t { + const char *env = getenv("fragLimit"); + if (env) { + return static_cast(std::stoll(env)); + } else { + return static_cast(16777216); + } + })(); + + static const int defragment_level = ([]() -> int { + const char *env = getenv("defragLevel"); + if (env) { + return static_cast(std::atoi(env)); + } else { + return 0; + } + })(); + + static const int auto_gc_limits = ([]() -> int { + const char *env = getenv("autoGC"); + if (env) { + return static_cast(std::atoi(env)); + } else { + return 3000; + } + })(); + + static const int split_limit = ([]() -> int { + const char *env = getenv("split_limit"); + if (env) { + return static_cast(std::atoi(env)); + } else { + return 10; + } + })(); + + if (vmmDefragment <= 0) { + return false; + } + + if (time < defragment_level) { + return false; + } + + if (p.pool->is_small || p.search_key.size < fragment_limit) { + return false; + } + + Block left_search_key(p.search_key.device, p.search_key.stream, p.search_key.size, p.search_key.pool, + p.search_key.ptr); + Block right_search_key(p.search_key.device, p.search_key.stream, p.search_key.size, p.search_key.pool, + p.search_key.ptr); + + left_search_key.size = 0; + right_search_key.size = std::numeric_limits::max(); + + auto it_begin = large_blocks.blocks.lower_bound(&left_search_key); + if (it_begin == large_blocks.blocks.end() || (*it_begin)->stream != p.stream()) { + return false; + } + auto it_end = large_blocks.blocks.lower_bound(&right_search_key); + if (it_end == large_blocks.blocks.begin() || (*std::prev(it_end))->stream != p.stream()) { + return false; + } + + if (std::prev(it_end) == it_begin) { + return false; + } + + size_t fuse_size = 0; + std::vector blocks2fuse; + + auto it = it_end; + while (it != it_begin) { + it = std::prev(it); + if (fuse_size + (*it)->size >= p.search_key.size) { + Block last_block_search_key(p.search_key.device, p.search_key.stream, p.search_key.size - fuse_size, + p.search_key.pool, p.search_key.ptr); + auto last_block_it = large_blocks.blocks.lower_bound(&last_block_search_key); + blocks2fuse.push_back((*last_block_it)); + fuse_size += (*last_block_it)->size; + break; + } else { + blocks2fuse.push_back((*it)); + fuse_size += (*it)->size; + } + } + + if (fuse_size < p.search_key.size) { + return false; + } + + if (fuse_size > p.search_key.size && (fuse_size - p.search_key.size) >= kGranularity) { + Block *last_block = blocks2fuse.back(); + blocks2fuse.pop_back(); + size_t original_size = last_block->size; + size_t remain_size = (fuse_size - p.search_key.size); + size_t keep_size = original_size - remain_size; + Block *a = split_large_block(last_block, keep_size); + blocks2fuse.push_back(a); + } + + int64_t net_change_segments = 0; + int64_t net_change_inactive_split_blocks = 0; + int64_t net_change_inactive_split_size = 0; + + std::vector> phy_chunks2glue; + auto sblock = stitch_block(blocks2fuse, p); + activate_large_block(sblock); + p.block = sblock; + p.err = ACL_ERROR_NONE; + + net_change_segments += 1; + + update_stat_array(stats.segment, net_change_segments, p.stat_types); + update_stat_array(stats.inactive_split, net_change_inactive_split_blocks, p.stat_types); + update_stat_array(stats.inactive_split_bytes, net_change_inactive_split_size, p.stat_types); + + return fuse_size >= p.search_key.size; +} + +bool DeviceCachingAllocator::release_swapout_blocks() +{ + return c10_npu::swap::NPUSwapManager::GetInstance().ProcessMallocEvent(); +} + +Block *DeviceCachingAllocator::stitch_block(std::vector &blocks2fuse, AllocParams &p) +{ + static constexpr size_t G = 1024 * 1024 * 1024; + static const int auto_gc_limits = ([]() -> int { + const char *env = getenv("autoGC"); + if (env) { + return static_cast(std::atoi(env)); + } else { + return 3000; + } + })(); + + std::vector> phy_chunks2glue; + + for (auto &block : blocks2fuse) { + for (auto &phy_block : block->vmm_segment->phy_chunks) { + phy_chunks2glue.push_back(phy_block); + } + } + size_t fuse_size = phy_chunks2glue.size() * kGranularity; + using Ms = std::chrono::duration; + Ms fuse_time = Ms{ 0 }; + std::shared_ptr vmm_segment; + int gc_time = 0; + do { + auto t0 = std::chrono::steady_clock::now(); + vmm_segment = std::make_shared(std::move(phy_chunks2glue)); + auto t1 = std::chrono::steady_clock::now(); + fuse_time = (t1 - t0); + if (vmm_segment->status == ACL_SUCCESS && vmm_segment->segment_ptr) { + break; + } else { + phy_chunks2glue = std::move(vmm_segment->phy_chunks); + size_t garbage_size = garbage_collect_fused_blocks(gc_time, fuse_size); + gc_time++; + } + } while (gc_time < 3); + + if (!vmm_segment || vmm_segment->status != ACL_SUCCESS || !vmm_segment->segment_ptr) { + throw GMLakeError("stitch pBlocks failed, something wrong happended !"); + } + + void *block_ptr = vmm_segment->segment_ptr; + Block *fused_block = new Block(p.device(), p.stream(), fuse_size, p.pool, (char *)block_ptr); + fused_block->vmm_segment = std::move(vmm_segment); + size_t offset = 0; + for (auto &phy_block : fused_block->vmm_segment->phy_chunks) { + phy_block->mapped_blocks.emplace_back(fused_block, offset); + offset++; + } + fused_block->vmm_segment->num_free_chunks = fused_block->vmm_segment->phy_chunks.size(); + fused_block->vmm_segment->num_used_chunks = 0; + + total_fuse_size += fuse_size; + if (total_fuse_size > auto_gc_limits * G) { + size_t garbage_size = garbage_collect_fused_blocks(2, 0); + } + free_fused_blocks.blocks.insert(fused_block); + free_fused_blocks.hash.insert(fused_block->ptr_hash); + return fused_block; +} + +Block *DeviceCachingAllocator::split_large_block(Block *block, size_t request_size) +{ + static const int vmmDefragment = ([]() -> int { + const char *env = getenv("vmmDefragment"); + if (env) { + return atoi(env); + } else { + return 1; + } + })(); + + large_blocks.blocks.erase(block); + + const bool already_split = block->is_split(); + const bool is_block_free = large_blocks.blocks.count(block) == 1 ? true : false; + + Block *remaining_block = block; + block = new Block(block->device, block->stream, request_size, block->pool, block->ptr); + block->prev = remaining_block->prev; + if (block->prev) { + block->prev->next = block; + } + block->next = remaining_block; + + remaining_block->prev = block; + remaining_block->ptr = static_cast(remaining_block->ptr) + request_size; + remaining_block->size -= request_size; + + if (vmmDefragment > 0 && remaining_block->vmm_segment) { + auto remaining_segment = remaining_block->vmm_segment->split(request_size); + block->vmm_segment = std::move(remaining_block->vmm_segment); + remaining_block->vmm_segment = std::move(remaining_segment); + + size_t offset = 0; + for (auto &phy_block : block->vmm_segment->phy_chunks) { + phy_block->mapped_blocks[0].block = block; + phy_block->mapped_blocks[0].offset = offset; + phy_block->free = true; + offset++; + } + + block->vmm_segment->num_free_chunks = block->vmm_segment->phy_chunks.size(); + block->vmm_segment->num_used_chunks = 0; + + offset = 0; + for (auto &phy_block : remaining_block->vmm_segment->phy_chunks) { + phy_block->mapped_blocks[0].block = remaining_block; + phy_block->mapped_blocks[0].offset = offset; + phy_block->free = true; + offset++; + } + remaining_block->vmm_segment->num_free_chunks = remaining_block->vmm_segment->phy_chunks.size(); + remaining_block->vmm_segment->num_used_chunks = 0; + } + + large_blocks.blocks.insert(block); + large_blocks.blocks.insert(remaining_block); + remaining_block->allocated = false; + block->allocated = false; + return block; +} + +void DeviceCachingAllocator::release_large_block(Block *block) +{ + if (!block->vmm_segment->fused) { + // 确认pblock内所有chunk关联的pblock/sblock是否一致 + // sblock集合,存储待释放的sblock + // 抽象为release_pblock release_sblock + for (auto &phy_block : block->vmm_segment->phy_chunks) { + while (phy_block->mapped_blocks.size() > 1) { + release_large_block(phy_block->mapped_blocks[1].block); + } + } + } + if (block->vmm_segment->fused) { + total_fuse_size -= block->size; + } + + if (free_fused_blocks.hash.count(block->ptr_hash)) { + free_fused_blocks.blocks.erase(block); + free_fused_blocks.hash.erase(block->ptr_hash); + } else if (fragmented_free_fused_blocks[block->stream].blocks.count(block)) { + fragmented_free_fused_blocks[block->stream].erase(block); + } else if (large_blocks.blocks.count(block)) { + large_blocks.blocks.erase(block); + } + for (auto &phy_block : block->vmm_segment->phy_chunks) { + int i = 0; + for (int j = 0; j < phy_block->mapped_blocks.size(); j++) { + if (phy_block->mapped_blocks[j].block != block) { + if (i != j) { + phy_block->mapped_blocks[i] = phy_block->mapped_blocks[j]; + } + i++; + } + } + phy_block->mapped_blocks.resize(i); + } + + { + auto tmp = std::move(block->vmm_segment); + } + delete block; +} + +void DeviceCachingAllocator::activate_large_block(Block *block) +{ + ska::flat_hash_set active_pblocks; + if (block->vmm_segment->fused) { + free_fused_blocks.blocks.erase(block); + free_fused_blocks.hash.erase(block->ptr_hash); + active_fused_blocks.insert(block); + } else { + large_blocks.blocks.erase(block); + active_blocks.insert(block); + } + int phy_chunks_size = block->vmm_segment->phy_chunks.size(); + int vir_chunks_size = block->vmm_segment->vir_chunks.size(); + + for (int i = 0; i < phy_chunks_size; i++) { + auto chunk = block->vmm_segment->phy_chunks[i]; + chunk->free = false; + block->vmm_segment->num_free_chunks--; + block->vmm_segment->num_used_chunks++; + + for (int j = 0; j < chunk->mapped_blocks.size(); j++) { + Block *other_block = chunk->mapped_blocks[j].block; + if (other_block == block) { + continue; + } + if (other_block->vmm_segment->fused) { + if (free_fused_blocks.hash.count(other_block->ptr_hash) == 1) { + free_fused_blocks.blocks.erase(other_block); + free_fused_blocks.hash.erase(other_block->ptr_hash); + fragmented_free_fused_blocks[other_block->stream].insert(other_block); + } + } else { + if (large_blocks.blocks.count(other_block) == 1) { + large_blocks.blocks.erase(other_block); + other_block->allocated = true; + active_blocks.insert(other_block); + active_pblocks.insert(other_block); + } + } + other_block->vmm_segment->num_free_chunks--; + other_block->vmm_segment->num_used_chunks++; + } + } +} + +void DeviceCachingAllocator::deactivate_large_block(Block *block) +{ + ska::flat_hash_set active_pblocks; + if (block->vmm_segment->fused) { + active_fused_blocks.erase(block); + free_fused_blocks.blocks.insert(block); + free_fused_blocks.hash.insert(block->ptr_hash); + } else { + active_blocks.erase(block); + } + int phy_chunks_size = block->vmm_segment->phy_chunks.size(); + int vir_chunks_size = block->vmm_segment->vir_chunks.size(); + AT_ASSERT(phy_chunks_size == vir_chunks_size, + "when inactive_block, phy_chunks_size is not equal to vir_chunks_size"); + for (int i = 0; i < phy_chunks_size; i++) { + auto chunk = block->vmm_segment->phy_chunks[i]; + chunk->free = true; + block->vmm_segment->num_used_chunks--; + block->vmm_segment->num_free_chunks++; + + for (int j = 0; j < chunk->mapped_blocks.size(); j++) { + Block *other_block = chunk->mapped_blocks[j].block; + if (other_block == block) { + continue; + } + if (other_block->vmm_segment->fused) { + other_block->vmm_segment->num_free_chunks++; + other_block->vmm_segment->num_used_chunks--; + if (other_block->vmm_segment->num_used_chunks == 0) { + fragmented_free_fused_blocks[other_block->stream].erase(other_block); + free_fused_blocks.blocks.insert(other_block); + free_fused_blocks.hash.insert(other_block->ptr_hash); + } + } else { + if (active_blocks.count(other_block) == 1) { + other_block->allocated = false; + active_pblocks.insert(other_block); + } + + other_block->vmm_segment->num_free_chunks++; + other_block->vmm_segment->num_used_chunks--; + } + } + } + if (!block->vmm_segment->fused) { + TORCH_INTERNAL_ASSERT(!block->allocated && block->event_count == 0 && block->stream_uses.empty()); + + auto &pool = *block->pool; + const std::array merge_candidates = { block->prev, block->next }; + for (Block *merge_candidate : merge_candidates) { + try_merge_blocks(block, merge_candidate, pool); + } + large_blocks.blocks.insert(block); + } + + for (auto &other_block : active_pblocks) { + free_block(other_block, false); + } +} + +size_t DeviceCachingAllocator::garbage_collect_fused_blocks(int time, size_t require_size) +{ + c10_npu::npuSynchronizeDevice(true); + + static const int gc_thresh = ([]() -> int { + const char *env = getenv("gc_thresh"); + if (env) { + return atoi(env); + } else { + return 100; + } + })(); + + std::lock_guard lock(mutex); + + size_t garbage_size = 0; + size_t garbage_blocks = 0; + + const size_t G = 1024 * 1024 * 1024; + for (auto &it : fragmented_free_fused_blocks) { + while (!it.second.blocks.empty() && garbage_size <= gc_thresh * G) { + Block *block = *(it.second.blocks.begin()); + aclError err = ACL_ERROR_NONE; + aclrtEventRecordedStatus eventStatus = ACL_EVENT_RECORDED_STATUS_NOT_READY; + if (err == ACL_ERROR_NONE) { + garbage_blocks++; + garbage_size += block->size; + release_large_block(block); + } else { + break; + } + } + } + + if (time > 0) { + while (!free_fused_blocks.blocks.empty()) { + Block *block = *(free_fused_blocks.blocks.begin()); + garbage_size += block->size; + release_large_block(block); + if (garbage_size <= gc_thresh * G) { + break; + } + } + } + + return garbage_size; +} \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/DeviceCachingAllocator.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/DeviceCachingAllocator.h new file mode 100644 index 000000000..f73eccf94 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/DeviceCachingAllocator.h @@ -0,0 +1,207 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// Copyright 2022 The GLake Authors. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "common.h" +#include "EventPool.h" +#include "CachingAllocatorConfig.h" + +class DeviceCachingAllocator { +private: + // lock around all operations + mutable std::recursive_mutex mutex; + + // device statistics + DeviceStats stats; + + // unallocated cached blocks larger than 1 MB + BlockPool large_blocks; + + // unallocated cached blocks larger than 64 MB + // BlockPool huge_blocks; + + // fused blocks that has been mapped to fragment blocks in size order + BlockPool free_fused_blocks; + + // fused blocks that has been mapped to fragment blocks in release order + std::unordered_map free_fused_blocks_in_release_order; + + // fused blocks which is free, but it's phy_chunks are used by other block of my stream + std::unordered_map fragmented_free_fused_blocks; + + // unallocated cached blocks 1 MB or smaller + BlockPool small_blocks; + + // allocated or in use by a stream. Holds all active allocations, + // whether they came from graph_pools or one of the BlockPools above. + ska::flat_hash_set active_blocks; + + // active fused blocks + ska::flat_hash_set active_fused_blocks; + + // active fused blocks to be garbage collected + ska::flat_hash_set active_fused_blocks_to_gc; + + // captures_underway tracks if a capture might be underway on any stream. + // Most of the time it's zero, in which case malloc can avoid calling + // cudaStreamGetCaptureInfo in the hot path. + int captures_underway = 0; + // See free() for this thing's purpose + std::vector needs_events_deferred_until_no_capture; + // outstanding cuda events + ska::flat_hash_map>> npu_events; + + // record used memory. + size_t total_allocated_memory = 0; + + size_t total_fuse_size = 0; + + size_t allowed_memory_maximum = 0; + + bool set_fraction = false; + + std::atomic context_recorder_; + size_t alloc_trace_next = 0; + bool alloc_trace_record_context_ = false; + RecordContext record_context_ = RecordContext::NEVER; + size_t alloc_trace_max_entries_ = 1; + std::vector *alloc_trace; // pointer because we need to intentionally leak this on + // deallocation it can hold references to Python state which + // will already be destroyed when we are in exit handlers + + // XXX - maybe we should generalize and have multiple events + std::vector oom_observers_; + +public: + DeviceCachingAllocator() + : large_blocks(BlockComparator, false), + free_fused_blocks(BlockComparator, false), + small_blocks(BlockComparator, true), + alloc_trace(new std::vector()) + { + stats.max_split_size = CachingAllocatorConfig::max_split_size(); + context_recorder_.store(nullptr); + } + + // All public methods (except the above) acquire the allocator mutex. + // Thus, do not call a public method from another public method. + + Block *malloc(int device, size_t orig_size, aclrtStream stream); + + Block *alloc_found_block(AllocParams params, size_t orig_size, bool split_remainder); + + void free(Block *block); + + void update_block(Block *block); + + void *getBaseAllocation(Block *block, size_t *outSize); + + void recordStream(Block *block, c10_npu::NPUStream stream); + + void eraseStream(Block *block, c10_npu::NPUStream stream); + + /* * set memory fraction to limit maximum allocated memory * */ + void setMemoryFraction(double fraction); + + /* * returns cached blocks to the system allocator * */ + void emptyCache(bool check_error); + + /* * Retrieves info (total size + largest block) of the memory cache * */ + void cacheInfo(size_t *total, size_t *largest); + + /* * Returns a copy of the memory allocator stats * */ + DeviceStats getStats(); + + /* * Resets the historical accumulation stats for the device * */ + void resetAccumulatedStats(); + + /* * Resets the historical peak stats for the device * */ + void resetPeakStats(); + + /* * Dump a complete snapshot of the memory held by the allocator. Potentially VERY expensive. * */ + std::vector snapshot(); + + static size_t round_size(size_t size); + +private: + // All private methods do not acquire the allocator mutex. + + std::vector get_all_blocks() const; + + /* * moves a block into a pool of cached free blocks * */ + void free_block(Block *block, bool flag); + + bool need_merge(Block *dst, Block *src); + + /* * combine previously split blocks. returns the size of the subsumed block, or 0 on failure. * */ + size_t try_merge_blocks(Block *dst, Block *src, BlockPool &pool); + + BlockPool &get_pool(size_t size); + + StatType get_stat_type_for_pool(const BlockPool &pool); + + StatTypes get_stat_types_for_pool(const BlockPool &pool); + + bool should_split(const Block *block, size_t size); + + static size_t get_allocation_size(size_t size); + + bool get_free_block(AllocParams &p); + + bool trigger_free_memory_callbacks(AllocParams &p); + + void garbage_collect_cached_blocks(); + + bool realloc_block(AllocParams &p, bool isRetry); + + /* * Free one or more oversize blocks to the system allocator. But only enough to satisfy the target size * */ + bool release_available_cached_blocks(const AllocParams &p); + + bool release_cached_blocks(); + + void release_block(Block *block); + + void release_blocks(BlockPool &pool); + + EventPool::Event create_event_internal(int idx); + + void synchronize_and_free_events(); + + void insert_events(Block *block); + + void insert_free_event_into_alloc_stream(Block *block); + + void insert_events_deferred_until_no_capture(); + + void process_events(); + + // Accumulates sizes of all memory blocks for given device in given pool + void cache_info_aux(BlockPool &blocks, size_t *total, size_t *largest); + + bool get_fused_fragmented_blocks(AllocParams &p, int time); + + bool release_swapout_blocks(); + + Block *stitch_block(std::vector &blocks2fuse, AllocParams &p); + + Block *split_large_block(Block *block, size_t request_size); + + void release_large_block(Block *block); + + void activate_large_block(Block *block); + + void deactivate_large_block(Block *block); + + size_t garbage_collect_fused_blocks(int time, size_t require_size = 0); +}; diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/EventPool.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/EventPool.cpp new file mode 100644 index 000000000..babbd388e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/EventPool.cpp @@ -0,0 +1,45 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "EventPool.h" + +EventPool::Event EventPool::get(int device) +{ + auto &pool = pools_[device]; + auto destructor = [&pool](c10_npu::NPUEvent *event) { + std::lock_guard g(pool.mutex_); + pool.event_pool_.push_back(std::unique_ptr(event)); + }; + + // Try to acquire an event from the per-device pool. + { + std::lock_guard g(pool.mutex_); + if (!pool.event_pool_.empty()) { + auto *event = pool.event_pool_.back().release(); + pool.event_pool_.pop_back(); + return Event(event, destructor); + } + } + // otherwise, allocate a new event that will be returned to the pool on + // destruction. + return Event(std::make_unique(ACL_EVENT_CAPTURE_STREAM_PROGRESS).release(), destructor); +} + +void EventPool::empty_cache() +{ + for (auto &pool : pools_) { + std::lock_guard g(pool.mutex_); + pool.event_pool_.clear(); + } +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/EventPool.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/EventPool.h new file mode 100644 index 000000000..0e7212363 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/EventPool.h @@ -0,0 +1,36 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include + +class EventPool { +public: + using Event = std::unique_ptr>; + // Explicit device count + EventPool() : pools_(c10_npu::device_count()) {} + + Event get(int device); + + void empty_cache(); + +private: + struct PerDevicePool { + alignas(64) std::mutex mutex_; + std::vector> event_pool_; + }; + std::vector pools_; +}; diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/NPUSwapManager.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/NPUSwapManager.cpp new file mode 100644 index 000000000..5bd671cdc --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/NPUSwapManager.cpp @@ -0,0 +1,1442 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "NPUSwapManager.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "swap_log.h" +#include "SwapException.h" + +namespace c10_npu { +namespace swap { +SwapStage::SwapStage() : stageType(SwapStageType::INIT), microBatchIndex(0), layerIndex(0) {} + +bool SwapStage::operator == (const SwapStage &other) const +{ + return stageType == other.stageType && microBatchIndex == other.microBatchIndex && layerIndex == other.layerIndex; +} + +std::ostream &operator << (std::ostream &os, const SwapStage &obj) +{ + os << "SwapStage: " + << "stageType: " << static_cast(obj.stageType) << " " + << "microBatchIndex: " << obj.microBatchIndex << " " + << "layerIndex: " << obj.layerIndex << std::endl; + return os; +} + +SwapConfig::SwapConfig() + : microBatchNum(0), + layerNum(0), + isOOM(false), + step(0), + oneStepDuration(0.0), + policyStep(0), + enableProfiler(false), + tensorSizeThresh(0), + enableExecutor(false), + enableCustomRecordStream(true) +{} + +UniqueSwapPtr::UniqueSwapPtr() : ptrBase(0), index(0) {} + +bool UniqueSwapPtr::operator == (const UniqueSwapPtr &other) const +{ + return ptrBase == other.ptrBase && index == other.index; +} + +std::ostream &operator << (std::ostream &os, const UniqueSwapPtr &obj) +{ + os << "UniqueSwapPtr: " + << "ptrBase: " << std::hex << obj.ptrBase << std::dec << " " + << "index: " << obj.index << std::endl; + return os; +} + +UniqueSwapPtr::operator std::string() const +{ + std::stringstream ss; + ss << ptrBase << "_" << index; + return ss.str(); +} + +UniqueSwapMemory::UniqueSwapMemory() : allocated_bytes(0), reserved_bytes(0), active_bytes(0) {} + +UniqueSwapMemory::UniqueSwapMemory(int64_t allocated_bytes, int64_t reserved_bytes, int64_t active_bytes) + : allocated_bytes(allocated_bytes), reserved_bytes(reserved_bytes), active_bytes(active_bytes) +{} + +std::ostream &operator << (std::ostream &os, const UniqueSwapMemory &obj) +{ + os << "UniqueSwapMemory: " + << "allocated_bytes: " << obj.allocated_bytes << " " + << "reserved_bytes: " << obj.reserved_bytes << " " + << "active_bytes: " << obj.active_bytes << std::endl; + return os; +} + +// class ProfilerTensorInfo +ProfilerTensorInfo::ProfilerTensorInfo(const at::Tensor &tensor) +{ + this->ptr = NPUSwapManager::GetInstance().getUniqueSwapPtr(tensor); + this->nbytes = tensor.storage().nbytes(); + this->dtype = tensor.scalar_type(); + + // 根据tensorPtrTypeMap进行查找 + auto tensorPtrTypeIter = NPUSwapManager::GetInstance().tensorPtrTypeMap.find(this->ptr); + if (tensorPtrTypeIter == NPUSwapManager::GetInstance().tensorPtrTypeMap.end()) { + this->tensorType = SwapTensorType::OTHERS; + } else { + this->tensorType = tensorPtrTypeIter->second; + } + + for (int i = 0; i < tensor.sizes().size(); i++) { + this->shapeV2.push_back(tensor.sizes()[i]); + } +} + +std::ostream &operator << (std::ostream &os, const ProfilerTensorInfo &obj) +{ + os << "ProfilerTensorInfo: " + << "ptr: " << obj.ptr << " " + << "nbytes: " << obj.nbytes << " " + << "dtype: " << obj.dtype << " " + << "tensorType: " << static_cast(obj.tensorType) << " " + << "shape: " << obj.shapeV2 << std::endl; + return os; +} + +// class ProfilerOpInfo +ProfilerOpInfo::ProfilerOpInfo(int opId, std::string opName, int64_t allocated_bytes, int64_t reserved_bytes, + int64_t active_bytes) + : opId(opId), opName(opName), swapMemory(allocated_bytes, reserved_bytes, active_bytes) +{ + this->stage = NPUSwapManager::GetInstance().config.stage; + this->step = NPUSwapManager::GetInstance().config.step; +} + +std::ostream &operator << (std::ostream &os, const ProfilerOpInfo &obj) +{ + os << "ProfilerOpInfo: " + << "opId: " << obj.opId << " " + << "opName: " << obj.opName << " " + << "stage: " << obj.stage << " " + << "step: " << obj.step << " " + << "swapMemory: " << obj.swapMemory << std::endl; + for (auto &t : obj.profilerTensorInfoVec) { + os << t << std::endl; + } + return os; +} + +void ProfilerOpInfo::appendTensorInfo(const at::Tensor &tensor) +{ + profilerTensorInfoVec.emplace_back(ProfilerTensorInfo(tensor)); +} + +ProfilerSwapInfo::ProfilerSwapInfo(int opId, std::string swapName, size_t size, bool isOOM, UniqueSwapPtr srcDataPtr, + UniqueSwapPtr dstDataPtr) + : opId(opId), swapName(swapName), size(size), isOOM(isOOM), srcPtr(srcDataPtr), dstPtr(dstDataPtr) +{} + +// class SwapProfiler +SwapProfiler::SwapProfiler() : isInit(false) {} + +SwapProfiler::~SwapProfiler() +{ + isInit = false; +} + +int SwapProfiler::Init() +{ + isInit = true; + lastOpId = 0; + return 0; +} + +void SwapProfiler::updateStep() +{ + profilerOpInfoMap[NPUSwapManager::GetInstance().config.step] = profilerOpInfoVec; + lastOpId = profilerOpInfoVec.back().opId; + profilerOpInfoVec.clear(); + profilerSwapInfoVec.clear(); +} + +void SwapProfiler::appendOpInfo(std::string &opName, int &opId) +{ + int device = 0; + SWAP_CHECK_ERROR(c10_npu::GetDevice(&device)); + const c10_npu::NPUCachingAllocator::DeviceStats stats = + c10_npu::NPUCachingAllocator::allocator.load()->getDeviceStats(device); + + ProfilerOpInfo profilerOpInfo(opId, opName, + stats.allocated_bytes[static_cast(c10_npu::NPUCachingAllocator::StatType::AGGREGATE)].current, + stats.reserved_bytes[static_cast(c10_npu::NPUCachingAllocator::StatType::AGGREGATE)].current, + stats.active_bytes[static_cast(c10_npu::NPUCachingAllocator::StatType::AGGREGATE)].current); + profilerOpInfoVec.emplace_back(profilerOpInfo); +} + +void SwapProfiler::ReportInfoV2(std::string &opName, int &opId, c10::SmallVector &tensors) +{ + appendOpInfo(opName, opId); + ProfilerOpInfo &profilerOpInfo = profilerOpInfoVec.back(); + for (const auto &tensor : tensors) { + profilerOpInfo.appendTensorInfo(tensor); + } +} + +void SwapProfiler::ReportInfoV2(bool isSwapOut, at::DataPtr &srcDataPtr, at::DataPtr &dstDataPtr, size_t size, + bool isOOM) +{ + std::string swapName = isSwapOut ? "swapOut" : "swapIn"; + int opId = profilerOpInfoVec.empty() ? lastOpId : profilerOpInfoVec.back().opId; + ProfilerSwapInfo profilerSwapInfo(opId, swapName, size, isOOM, + NPUSwapManager::GetInstance().getUniqueSwapPtr(srcDataPtr.get()), + NPUSwapManager::GetInstance().getUniqueSwapPtr(dstDataPtr.get())); + profilerSwapInfoVec.emplace_back(profilerSwapInfo); +} + +std::vector &SwapProfiler::getPolicyStepOpVec() +{ + return profilerOpInfoMap[NPUSwapManager::GetInstance().config.policyStep]; +} + +SwapPolicyInfo::SwapPolicyInfo() : executorNeedMatch(false), swapOutOpId(0), swapInOpId(0) {} + +std::ostream &operator << (std::ostream &os, const SwapPolicyInfo &obj) +{ + os << "SwapPolicyInfo: " + << "ptr: " << obj.ptr << " " + << "swapOutOpId: " << obj.swapOutOpId << " " + << "swapOutStage: " << obj.swapOutStage << " " + << "swapInOpId: " << obj.swapInOpId << " " + << "swapInStage.: " << obj.swapInStage << " " + << "freeStage: " << obj.freeStage << " " + << "swapInFreeStage.: " << obj.swapInFreeStage << std::endl; + return os; +} + +ExecutorTensorInfo::ExecutorTensorInfo() + : opCount(0), + opTag(0), + dtype(at::ScalarType::Byte), + nbytes(0), + shape(0), + opCallsStack(0), + tensorIndexCallsStack(0) +{} + +ExecutorTensorInfo::ExecutorTensorInfo(const at::Tensor &tensor, UniqueSwapPtr uniqueSwapPtr) + : ptr(uniqueSwapPtr), + opCount(0), + opTag(0), + dtype(tensor.scalar_type()), + opCallsStack(0), + tensorIndexCallsStack(0) +{ + nbytes = tensor.storage().nbytes(); + shape = convertShapeToInt64(tensor); +} + +ExecutorTensorInfo::ExecutorTensorInfo(const SwapStage &s1, const SwapStage &s2) + : opCount(0), + opTag(0), + dtype(at::ScalarType::Byte), + nbytes(0), + shape(0), + opCallsStack(0), + tensorIndexCallsStack(0), + swapOutStage(s1), + swapInStage(s2) +{} + +bool ExecutorTensorInfo::operator == (const ExecutorTensorInfo &other) const +{ + return opCount == other.opCount && opTag == other.opTag && dtype == other.dtype && nbytes == other.nbytes && + shape == other.shape && opCallsStack == other.opCallsStack && + tensorIndexCallsStack == other.tensorIndexCallsStack; +} + +std::ostream &operator << (std::ostream &os, const ExecutorTensorInfo &obj) +{ + os << "ExecutorTensorInfo: " + << "ptr: " << obj.ptr << " " + << "opCount: " << obj.opCount << " " + << "opTag: " << obj.opTag << " " + << "nbytes: " << obj.nbytes << " " + << "shape: " << obj.shape << " " + << "opCallsStack: " << obj.opCallsStack << " " + << "tensorIndexCallsStack: " << obj.tensorIndexCallsStack << " " + << "swapOutStage: " << obj.swapOutStage << " " + << "swapInStage: " << obj.swapInStage << " " + << "freeStage: " << obj.freeStage << " " + << "swapInFreeStage: " << obj.swapInFreeStage << std::endl; + return os; +} + +size_t ExecutorTensorInfo::convertShapeToInt64(const at::Tensor &tensor) +{ + size_t res = 0; + for (auto s : tensor.sizes()) { + res = (res << 16) + s; + } + return res; +} + +size_t ExecutorTensorInfo::convertShapeToInt64(const c10::SmallVector &sizes) +{ + size_t res = 0; + for (auto s : sizes) { + res = (res << 16) + s; + } + return res; +} + +void ExecutorTensorInfo::initFromProfilerTensorInfo(const ProfilerTensorInfo &pti) +{ + nbytes = pti.nbytes; + shape = convertShapeToInt64(pti.shapeV2); + dtype = pti.dtype; + ptr.ptrBase = pti.ptr.ptrBase; + ptr.index = pti.ptr.index; +} + +void ExecutorTensorInfo::updateCallsStack(int opOneHot, int opIndex, int tensorIndex) +{ + ++opCount; + opTag |= opOneHot; + opCallsStack = (opCallsStack << 8) + opIndex; + tensorIndexCallsStack = (tensorIndexCallsStack << 8) + tensorIndex; +} + +// class SwapExecutor +SwapExecutor::SwapExecutor() : isInit(false) {} + +SwapExecutor::~SwapExecutor() +{ + DeInit(); +} + +int SwapExecutor::Init() +{ + if (isInit) { + return 0; + } + + this->swapStreams.push_back(getNPUStreamFromPool(c10_npu::current_device())); + + isInit = true; + return 0; +} + +int SwapExecutor::DeInit() +{ + if (!isInit) { + return 0; + } + isInit = false; + return 0; +} + +int SwapExecutor::SwapOut(c10::intrusive_ptr storageImplPtr, bool isOOM, SwapStage *freeStage) +{ + at::DataPtr &dataPtrNpu = storageImplPtr->mutable_data_ptr(); + if (dataPtrNpu.device().is_cpu()) { + SWAP_LOG_WARN("SwapOut tensor dataPtr is on cpu, skip."); + return 1; + } + uint64_t uniqueId = static_cast(storageImplPtr.get())->get_unique_id(); + auto inEventIter = swapInEventMap.find(uniqueId); + if (inEventIter != swapInEventMap.end()) { + SWAP_LOG_WARN("SwapOut tensor need to process swapin wait task, skip."); + return 1; + } + + auto swapOutStorageImpIter = swapOutStorageImplMap.find(uniqueId); + if (swapOutStorageImpIter != swapOutStorageImplMap.end()) { + SWAP_LOG_WARN("Tensor cannot be swapped out twice consecutively, skip."); + return 1; + } + + RECORD_FUNCTION("swap_out", std::vector({})); + + SWAP_LOG_INFO("SwapOut pre, storage uniqueId[%lu], mem ptr on npu[%p][%s]", uniqueId, storageImplPtr->data(), + std::string(NPUSwapManager::GetInstance().getUniqueSwapPtr(storageImplPtr->data())).c_str()); + + auto allocatorCPU = at_npu::native::getCachingHostAllocator(); + size_t size = storageImplPtr->nbytes(); + at::DataPtr dataPtrCpu = allocatorCPU->allocate(size); + + NPUSwapManager::GetInstance().tensorPtrCountMap[reinterpret_cast(dataPtrCpu.get())]++; + + if (NPUSwapManager::GetInstance().config.enableProfiler) { + NPUSwapManager::GetInstance().ReportInfoToSwapProfiler(true, dataPtrNpu, dataPtrCpu, size, isOOM); + } + + c10_npu::NPUStream &swapStream = this->swapStreams[0]; + c10_npu::NPUEvent event; + event.record(c10_npu::getCurrentNPUStream()); + event.block(swapStream); + + c10_npu::NPUStream currentStream = c10_npu::getCurrentNPUStream(); + c10_npu::setCurrentNPUStream(swapStream); + at_npu::native::memory_swap(dataPtrCpu.get(), size, dataPtrNpu.get(), size, 2); + c10_npu::setCurrentNPUStream(currentStream); + + if (!NPUSwapManager::GetInstance().config.enableCustomRecordStream) { + c10_npu::NPUCachingAllocator::allocator.load()->recordStream(dataPtrNpu, swapStream); + } + + dataPtrCpu.unsafe_set_device(dataPtrNpu.device()); + if (NPUSwapManager::GetInstance().config.enableCustomRecordStream) { + NPUSwapManager::GetInstance().RecordStream(dataPtrNpu, swapStream, freeStage); + } + storageImplPtr->set_data_ptr_noswap(std::move(dataPtrCpu)); + SWAP_LOG_INFO("SwapOut post, storage uniqueId[%lu], mem ptr on cpu[%p][%s]", uniqueId, storageImplPtr->data(), + std::string(NPUSwapManager::GetInstance().getUniqueSwapPtr(storageImplPtr->data())).c_str()); + + swapOutStorageImplMap.insert(std::make_pair(uniqueId, c10::weak_intrusive_ptr(storageImplPtr))); + + return 0; +} + +int SwapExecutor::SwapOut(const at::Tensor &tensor, SwapStage *freeStage) +{ + c10::intrusive_ptr storageImplPtr = tensor.storage().getWeakStorageImpl().lock(); + if (!storageImplPtr) { + return 1; + } + + return SwapOut(storageImplPtr, false, freeStage); +} + +int SwapExecutor::SwapIn(uint64_t uniqueId, bool needWait) +{ + auto outTensorIter = swapOutStorageImplMap.find(uniqueId); + if (outTensorIter == swapOutStorageImplMap.end()) { + return 1; + } + + c10::intrusive_ptr storageImplPtr = outTensorIter->second.lock(); + + if (!storageImplPtr) { + SWAP_LOG_INFO( + "SwapIn pre: StorageImpl of the tensor for current SwapIn is already destructed since the tensor would \ +not be used anymore. swapOutStorageImplMap.find(uniqueId[%lu])->second.weak_count[%zu], use_count[%zu]", + uniqueId, outTensorIter->second.weak_use_count(), outTensorIter->second.use_count()); + swapOutStorageImplMap.erase(outTensorIter); + return 1; + } + + RECORD_FUNCTION("swap_in", std::vector({})); + + c10_npu::NPUStream &swapStream = this->swapStreams[0]; + c10_npu::NPUEvent beforeSwapInEvent; + beforeSwapInEvent.record(c10_npu::getCurrentNPUStream()); + beforeSwapInEvent.block(swapStream); + + at::DataPtr &dataPtrCpu = storageImplPtr->mutable_data_ptr(); + + SWAP_LOG_INFO("SwapIn pre, storage uniqueId[%lu], mem ptr on cpu[%p][%s]", uniqueId, storageImplPtr->data(), + std::string(NPUSwapManager::GetInstance().getUniqueSwapPtr(storageImplPtr->data())).c_str()); + + auto allocatorNPU = c10_npu::NPUCachingAllocator::allocator.load(); + size_t size = storageImplPtr->nbytes(); + at::DataPtr dataPtrNpu = allocatorNPU->allocate(size); + + if (NPUSwapManager::GetInstance().config.enableProfiler) { + NPUSwapManager::GetInstance().ReportInfoToSwapProfiler(false, dataPtrCpu, dataPtrNpu, size); + } + + c10_npu::NPUStream currentStream = c10_npu::getCurrentNPUStream(); + c10_npu::setCurrentNPUStream(swapStream); + at_npu::native::memory_swap(dataPtrNpu.get(), size, dataPtrCpu.get(), size, 1); + c10_npu::setCurrentNPUStream(currentStream); + + if (NPUSwapManager::GetInstance().config.enableCustomRecordStream) { + auto it = uniqueIdToSwapInFreeStageMap.find(uniqueId); + if (it != uniqueIdToSwapInFreeStageMap.end()) { + NPUSwapManager::GetInstance().RecordStream(storageImplPtr, swapStream, &(it->second)); + uniqueIdToSwapInFreeStageMap.erase(it); + } else { + NPUSwapManager::GetInstance().RecordStream(storageImplPtr, swapStream); + } + NPUSwapManager::GetInstance().RecordStream(dataPtrCpu, swapStream); + } else { + c10_npu::NPUCachingAllocator::allocator.load()->recordStream(dataPtrNpu, swapStream); + at_npu::native::CachingHostAllocator_recordEvent(dataPtrCpu.get(), swapStream); + } + + storageImplPtr->set_data_ptr_noswap(std::move(dataPtrNpu)); + + SWAP_LOG_INFO("SwapIn post, storage uniqueId[%lu], mem ptr on npu[%p][%s]", uniqueId, storageImplPtr->data(), + std::string(NPUSwapManager::GetInstance().getUniqueSwapPtr(storageImplPtr->data())).c_str()); + + c10_npu::NPUEvent afterSwapInEvent; + afterSwapInEvent.record(swapStream); + + swapOutStorageImplMap.erase(outTensorIter); + swapInEventMap.insert(std::make_pair(uniqueId, std::move(afterSwapInEvent))); + + if (needWait) { + SwapInWait(uniqueId); + } + return 0; +} + +int SwapExecutor::SwapInWait(uint64_t uniqueId) +{ + auto inEventIter = swapInEventMap.find(uniqueId); + if (inEventIter == swapInEventMap.end()) { + return 1; + } + + RECORD_FUNCTION("swap_in_wait", std::vector({})); + + SWAP_LOG_INFO("SwapIn wait, storage uniqueId[%lu]", uniqueId); + inEventIter->second.block(c10_npu::getCurrentNPUStream()); + swapInEventMap.erase(inEventIter); + return 0; +} + +void SwapExecutor::CheckAndInsertStorageToMap(const at::Tensor &src, const at::Tensor &dst) +{ + uint64_t uniqueIdSrc = + static_cast(src.storage().unsafeGetStorageImpl())->get_unique_id(); + auto tensorIter = swapOutStorageImplMap.find(uniqueIdSrc); + if (tensorIter == swapOutStorageImplMap.end()) { + return; + } + + c10::intrusive_ptr storageImplPtrDst = dst.storage().getWeakStorageImpl().lock(); + if (!storageImplPtrDst) { + return; + } + + uint64_t uniqueIdDst = + static_cast(dst.storage().unsafeGetStorageImpl())->get_unique_id(); + swapOutStorageImplMap.insert( + std::make_pair(uniqueIdDst, c10::weak_intrusive_ptr(storageImplPtrDst))); + SWAP_LOG_INFO("Insert storage to SwapOutStorageImplMap, uniqueId[%lu], mem ptr on cpu[%p][%s]", uniqueIdDst, + storageImplPtrDst->data(), + std::string(NPUSwapManager::GetInstance().getUniqueSwapPtr(storageImplPtrDst->data())).c_str()); +} + +bool SwapExecutor::needGenerateTensorInfo(const at::Tensor &tensor) +{ + if (tensor.nbytes() < NPUSwapManager::GetInstance().config.tensorSizeThresh) { + return false; + } + return true; +} + +void SwapExecutor::initOpNameToOneHotAndIndexMap(const std::vector &opNames) +{ + opNameToOneHotAndIndexMap.clear(); + size_t oneHot = 1; + size_t opIndex = 1; + for (const auto &opName : opNames) { + opNameToOneHotAndIndexMap[opName] = std::make_pair(oneHot, opIndex); + oneHot = oneHot << 1; + opIndex += 1; + } +} + +bool SwapExecutor::checkMatchAndSwapOut(ExecutorTensorInfo &eti, std::vector &candidateSwapOutVec) +{ + int matchCount = 0; + for (auto it = candidateSwapOutVec.rbegin(); matchCount < 5 && it != candidateSwapOutVec.rend(); ++it) { + ++matchCount; + if ((*(*it)) == eti && NPUSwapManager::GetInstance().config.stage == (*it)->swapOutStage) { + eti.swapInStage = (*it)->swapInStage; + eti.freeStage = (*it)->freeStage; + eti.swapInFreeStage = (*it)->swapInFreeStage; + candidateSwapOutVec.erase(std::next(it).base()); + return true; + } + } + return false; +} + +void SwapExecutor::initStanderdSwapOutVec(std::vector &standerdSwapOutVec, + const std::vector &opInfosVec, const std::vector &policyInfosVec) +{ + for (const auto &policyInfo : policyInfosVec) { + if (!policyInfo.executorNeedMatch) { + continue; + } + ExecutorTensorInfo *eti = new ExecutorTensorInfo(policyInfo.swapOutStage, policyInfo.swapInStage); + for (const auto &opInfoIter : opInfosVec) { + if (opInfoIter.opId > policyInfo.swapOutOpId) { + break; + } + std::pair oneHotAndIndex = GetOpOneHotAndIndex(opInfoIter.opName); + int tensorIndex = 0; + for (const auto &tensorInfoIter : opInfoIter.profilerTensorInfoVec) { + if (tensorInfoIter.ptr == policyInfo.ptr) { + if (eti->opCount == 0) { + eti->initFromProfilerTensorInfo(tensorInfoIter); + eti->swapInStage = policyInfo.swapInStage; + eti->swapOutStage = policyInfo.swapOutStage; + eti->freeStage = policyInfo.freeStage; + eti->swapInFreeStage = policyInfo.swapInFreeStage; + } + eti->updateCallsStack(oneHotAndIndex.first, oneHotAndIndex.second, tensorIndex); + } + tensorIndex++; + } + } + standerdSwapOutVec.push_back(eti); + } +} + +void SwapExecutor::initCandidateOptimPolicyVec(const std::vector &policyInfosVec) +{ + for (const auto &policyInfo : policyInfosVec) { + if (policyInfo.executorNeedMatch) { + continue; + } + candidateOptimPolicyVec.emplace_back(policyInfo); + } +} + +void SwapExecutor::processOptimTask(std::unordered_map, + HashUniqueSwapPtr> &tensorPtrWeakPtrMap) +{ + for (const auto &policyInfo : candidateOptimPolicyVec) { + auto weakPtr = tensorPtrWeakPtrMap.find(policyInfo.ptr); + if (weakPtr != tensorPtrWeakPtrMap.end()) { + auto storageImplPtr = weakPtr->second.lock(); + if (!storageImplPtr) { + continue; + } + // swapout + auto tensorToSwapOutVecIter = stageToSwapOutMap + .try_emplace(policyInfo.swapOutStage, + c10::SmallVector, N>()) + .first; + tensorToSwapOutVecIter->second.push_back(weakPtr->second); + // swapin + auto tensorToSwapInVecIter = stageToSwapInMap + .try_emplace(policyInfo.swapInStage, + c10::SmallVector, N>()) + .first; + tensorToSwapInVecIter->second.push_back(weakPtr->second); + + auto iter = stageToOptimFreeStageMap.try_emplace(policyInfo.swapOutStage, std::vector()).first; + iter->second.push_back(policyInfo.freeStage); + + uint64_t uniqueId = + static_cast(storageImplPtr.get())->get_unique_id(); + uniqueIdToSwapInFreeStageMap[uniqueId] = policyInfo.swapInFreeStage; + } + } +} + +std::pair SwapExecutor::GetOpOneHotAndIndex(const std::string &opName) +{ + auto it = opNameToOneHotAndIndexMap.find(opName); + if (it != opNameToOneHotAndIndexMap.end()) { + return it->second; + } + return std::pair(0, 0); +} + +void SwapExecutor::ProcessTensorMatchTask(const std::string &opName, const c10::SmallVector &curTensors) +{ + if (candidateSwapOutVec.empty()) { + return; + } + std::pair oneHotAndIndex = GetOpOneHotAndIndex(opName); + int tensorIndex = 0; + for (const auto &tensor : curTensors) { + if (needGenerateTensorInfo(tensor)) { + UniqueSwapPtr uniqueSwapPtr = NPUSwapManager::GetInstance().getUniqueSwapPtr(tensor); + auto executorTensorInfoIter = ptrToTensorInfoMap.find(uniqueSwapPtr); + if (executorTensorInfoIter == ptrToTensorInfoMap.end()) { + executorTensorInfoIter = + ptrToTensorInfoMap.try_emplace(uniqueSwapPtr, ExecutorTensorInfo(tensor, uniqueSwapPtr)).first; + } + (executorTensorInfoIter->second).updateCallsStack(oneHotAndIndex.first, oneHotAndIndex.second, tensorIndex); + if (checkMatchAndSwapOut(executorTensorInfoIter->second, candidateSwapOutVec)) { + SwapOut(tensor, &(executorTensorInfoIter->second.freeStage)); + auto tensorToSwapInVecIter = stageToSwapInMap + .try_emplace((executorTensorInfoIter->second).swapInStage, + c10::SmallVector, N>()) + .first; + tensorToSwapInVecIter->second.push_back(tensor.storage().getWeakStorageImpl()); + + uint64_t uniqueId = static_cast(tensor.storage().unsafeGetStorageImpl()) + ->get_unique_id(); + uniqueIdToSwapInFreeStageMap[uniqueId] = executorTensorInfoIter->second.swapInFreeStage; + } + } + tensorIndex++; + } +} + +void SwapExecutor::ProcessStageMatchTask(const SwapStage ¤tStage) +{ + auto itOut = stageToSwapOutMap.find(currentStage); + if (itOut != stageToSwapOutMap.end()) { + auto tempIter = stageToOptimFreeStageMap.find(currentStage); + int count = 0; + for (auto &storageImpl : itOut->second) { + auto storageImplPtr = storageImpl.lock(); + if (!storageImplPtr) { + count++; + continue; + } + SwapOut(storageImplPtr, false, &(tempIter->second[count++])); + } + stageToSwapOutMap.erase(itOut); + stageToOptimFreeStageMap.erase(tempIter); + } + + auto itIn = stageToSwapInMap.find(currentStage); + if (itIn != stageToSwapInMap.end()) { + for (auto storageImpl = itIn->second.rbegin(); storageImpl != itIn->second.rend(); ++storageImpl) { + SwapIn(*storageImpl); + } + stageToSwapInMap.erase(itIn); + } +} + +void SwapExecutor::clearStanderdSwapOutVec() +{ + for (auto it = standerdSwapOutVec.begin(); it != standerdSwapOutVec.end(); ++it) { + delete *it; + } + standerdSwapOutVec.clear(); +} + +void SwapExecutor::clearCandidateOptimPolicyVec() +{ + candidateOptimPolicyVec.clear(); +} + +void SwapExecutor::SwapIn(c10::weak_intrusive_ptr &storageImplWeakPtr) +{ + auto storageImplPtr = storageImplWeakPtr.lock(); + if (!storageImplPtr) { + return; + } + uint64_t uniqueId = static_cast(storageImplPtr.get())->get_unique_id(); + SwapIn(uniqueId, false); +} + +void SwapExecutor::SwapOut(c10::weak_intrusive_ptr &storageImplWeakPtr) +{ + auto storageImplPtr = storageImplWeakPtr.lock(); + if (!storageImplPtr) { + return; + } + SwapOut(storageImplPtr); +} + +void SwapExecutor::updateStep(std::unordered_map, + HashUniqueSwapPtr> &tensorPtrWeakPtrMap) +{ + ptrToTensorInfoMap.clear(); + candidateSwapOutVec.clear(); + candidateSwapOutVec.resize(standerdSwapOutVec.size()); + std::reverse_copy(standerdSwapOutVec.begin(), standerdSwapOutVec.end(), candidateSwapOutVec.begin()); + processOptimTask(tensorPtrWeakPtrMap); +} + +template RecordStreamManager::RecordStreamManager() : isInit(false) {} + +template RecordStreamManager::~RecordStreamManager() +{ + isInit = false; +} + +template int RecordStreamManager::Init() +{ + if (isInit) { + return 0; + } + isInit = true; + return 0; +} + +template int RecordStreamManager::DeInit() +{ + isInit = false; + return 0; +} + +template void RecordStreamManager::RecordStream(T &ptr, c10_npu::NPUStream stream) +{ + if (!isInit) { + return; + } + c10_npu::NPUEvent recordStreamEvent; + recordStreamEvent.record(stream); + recordedQueue.push_back(std::make_pair(std::move(ptr), std::move(recordStreamEvent))); +} + +template void RecordStreamManager::ProcessEvent() +{ + if (!isInit) { + return; + } + while (!recordedQueue.empty()) { + auto &recordStreamEvent = recordedQueue.front().second; + + if (recordStreamEvent.query()) { + recordedQueue.pop_front(); + } else { + break; + } + } +} + +template bool RecordStreamManager::ProcessMallocEvent() +{ + if (!isInit) { + return false; + } + bool res = false; + while (!recordedQueue.empty()) { + auto &recordStreamEvent = recordedQueue.front().second; + recordStreamEvent.block(c10_npu::getCurrentNPUStream()); + recordedQueue.pop_front(); + res = true; + } + return res; +} + +template RecordStreamWithFreeStageManager::RecordStreamWithFreeStageManager() : isInit(false) {} + +template RecordStreamWithFreeStageManager::~RecordStreamWithFreeStageManager() +{ + isInit = false; +} + +template int RecordStreamWithFreeStageManager::Init() +{ + if (isInit) { + return 0; + } + isInit = true; + return 0; +} + +template int RecordStreamWithFreeStageManager::DeInit() +{ + isInit = false; + return 0; +} + +template +void RecordStreamWithFreeStageManager::RecordStream(T &ptr, c10_npu::NPUStream stream, SwapStage &freeStage) +{ + if (!isInit) { + return; + } + c10_npu::NPUEvent recordStreamEvent; + recordStreamEvent.record(stream); + auto stageToFreeIter = + StageToFreeEventMap.try_emplace(freeStage, std::deque>()).first; + stageToFreeIter->second.push_back(std::make_pair(std::move(ptr), std::move(recordStreamEvent))); +} + +template void RecordStreamWithFreeStageManager::ProcessEvent() +{ + if (!isInit) { + return; + } + for (const auto &pair : StageToFreeEventMap) { + const SwapStage &stage = pair.first; + const std::pair &recordedQueue = pair.second; + while (!recordedQueue.empty()) { + auto &recordStreamEvent = recordedQueue.front().second; + if (recordStreamEvent.query()) { + recordedQueue.pop_front(); + } else { + break; + } + } + } +} + +template bool RecordStreamWithFreeStageManager::FreeEventWithStage(SwapStage &freeStage) +{ + if (!isInit) { + return false; + } + bool res = false; + auto stageToFreeIter = StageToFreeEventMap.find(freeStage); + if (stageToFreeIter == StageToFreeEventMap.end()) { + return false; + } + auto &recordedQueue = stageToFreeIter->second; + while (!recordedQueue.empty()) { + auto &recordStreamEvent = recordedQueue.front().second; + recordStreamEvent.block(c10_npu::getCurrentNPUStream()); + recordedQueue.pop_front(); + res = true; + } + return res; +} + +template bool RecordStreamWithFreeStageManager::ProcessMallocEvent() +{ + if (!isInit) { + return false; + } + bool res = false; + + for (auto &pair : StageToFreeEventMap) { + const SwapStage &stage = pair.first; + auto &recordedQueue = pair.second; + while (!recordedQueue.empty()) { + auto &recordStreamEvent = recordedQueue.front().second; + recordStreamEvent.block(c10_npu::getCurrentNPUStream()); + recordedQueue.pop_front(); + res = true; + } + } + return res; +} + +// class NPUSwapManager +NPUSwapManager::NPUSwapManager() + : swap_enable(false), + swap_oom_enable(false), + isInit(false), + executor(nullptr), + profiler(nullptr), + opId(0), + recordedDataPtrManager(nullptr), + recordedStorageImplManager(nullptr), + recordedDataPtrWithFreeStageManager(nullptr), + recordedStorageImplWithFreeStageManager(nullptr) +{} + +NPUSwapManager::~NPUSwapManager() +{ + DeInit(); +} + +NPUSwapManager &NPUSwapManager::GetInstance() +{ + static NPUSwapManager instance; + return instance; +} + +int NPUSwapManager::Init() +{ + if (isInit) { + return 0; + } + if (executor == nullptr) { + executor = new SwapExecutor(); + if (executor != nullptr) { + executor->Init(); + } + } + if (profiler == nullptr) { + profiler = new SwapProfiler(); + if (profiler != nullptr) { + profiler->Init(); + } + } + if (recordedDataPtrManager == nullptr) { + recordedDataPtrManager = new RecordStreamManager(); + if (recordedDataPtrManager != nullptr) { + recordedDataPtrManager->Init(); + } + } + if (recordedStorageImplManager == nullptr) { + recordedStorageImplManager = new RecordStreamManager>(); + if (recordedStorageImplManager != nullptr) { + recordedStorageImplManager->Init(); + } + } + if (recordedDataPtrWithFreeStageManager == nullptr) { + recordedDataPtrWithFreeStageManager = new RecordStreamWithFreeStageManager(); + if (recordedDataPtrWithFreeStageManager != nullptr) { + recordedDataPtrWithFreeStageManager->Init(); + } + } + if (recordedStorageImplWithFreeStageManager == nullptr) { + recordedStorageImplWithFreeStageManager = + new RecordStreamWithFreeStageManager>(); + if (recordedStorageImplWithFreeStageManager != nullptr) { + recordedStorageImplWithFreeStageManager->Init(); + } + } + + at_npu::native::RegisterOpHookBeginFn( + [](const std::string &op_name) -> void { c10_npu::swap::NPUSwapManager::GetInstance().BeginHook(op_name); }); + at_npu::native::RegisterOpHookEndFn([]() -> void { + c10_npu::swap::NPUSwapManager::GetInstance().PostHook(); + c10_npu::swap::NPUSwapManager::GetInstance().EndHook(); + }); + at_npu::native::RegisterOpHookPreFn([](const at::Tensor &at_tensor) -> void { + if (!at_tensor.defined()) { + return; + } + c10_npu::swap::NPUSwapManager::GetInstance().TensorHook(at_tensor); + }); + at_npu::native::RegisterOpHookPostFn([](const at::Tensor &at_tensor) -> void { + if (!at_tensor.defined()) { + return; + } + c10_npu::swap::NPUSwapManager::GetInstance().TensorHook(at_tensor); + }); + + isInit = true; + return 0; +} + +int NPUSwapManager::DeInit() +{ + if (!isInit) { + return 0; + } + if (executor != nullptr) { + delete executor; + executor = nullptr; + } + if (profiler != nullptr) { + delete profiler; + profiler = nullptr; + } + if (recordedDataPtrManager != nullptr) { + delete recordedDataPtrManager; + recordedDataPtrManager = nullptr; + } + if (recordedStorageImplManager != nullptr) { + delete recordedStorageImplManager; + recordedStorageImplManager = nullptr; + } + if (recordedDataPtrWithFreeStageManager != nullptr) { + delete recordedDataPtrWithFreeStageManager; + recordedDataPtrWithFreeStageManager = nullptr; + } + if (recordedStorageImplWithFreeStageManager != nullptr) { + delete recordedStorageImplWithFreeStageManager; + recordedStorageImplWithFreeStageManager = nullptr; + } + isInit = false; + return 0; +} + +void NPUSwapManager::RecordStream(at::DataPtr &dataPtr, c10_npu::NPUStream stream, SwapStage *freeStage) +{ + if (!isInit) { + return; + } + if (freeStage == nullptr) { + recordedDataPtrManager->RecordStream(dataPtr, stream); + } else { + recordedDataPtrWithFreeStageManager->RecordStream(dataPtr, stream, *freeStage); + } +} + +void NPUSwapManager::RecordStream(c10::intrusive_ptr storageImpl, c10_npu::NPUStream stream, + SwapStage *freeStage) +{ + if (!isInit) { + return; + } + if (freeStage == nullptr) { + recordedStorageImplManager->RecordStream(storageImpl, stream); + } else { + recordedStorageImplWithFreeStageManager->RecordStream(storageImpl, stream, *freeStage); + } +} + +void NPUSwapManager::ProcessEvent() +{ + if (!isInit) { + return; + } + recordedDataPtrManager->ProcessEvent(); + recordedStorageImplManager->ProcessEvent(); +} + +bool NPUSwapManager::ProcessMallocEvent() +{ + if (!isInit) { + return false; + } + if (!config.enableCustomRecordStream) { + return false; + } + bool res = recordedDataPtrManager->ProcessMallocEvent(); + res = res || recordedDataPtrWithFreeStageManager->ProcessMallocEvent(); + res = res || recordedStorageImplWithFreeStageManager->ProcessMallocEvent(); + return res; +} + +int NPUSwapManager::BeginHook(const std::string &opName) +{ + if (!isInit) { + return 0; + } + + SWAP_LOG_INFO("BeginHook in, opIdStk.size[%zu], opNameStk.size[%zu], curTensorsStk.size[%zu]", opIdStk.size(), + curOpNameStk.size(), curTensorsStk.size()); + + opIdStk.push_back(opId); + opId++; + curOpNameStk.push_back(opName); + c10::SmallVector curTensors; + curTensorsStk.push_back(curTensors); + + ProcessEvent(); + + SWAP_LOG_INFO("BeginHook out, opId[%d], opName[%s], curTensors num[%zu]", opIdStk.back(), + curOpNameStk.back().c_str(), curTensorsStk.back().size()); + + return 0; +} + +int NPUSwapManager::EndHook() +{ + if (!isInit) { + return 0; + } + SWAP_LOG_INFO("EndHook in, opId[%d], opName[%s], curTensors num[%zu]", opIdStk.back(), curOpNameStk.back().c_str(), + curTensorsStk.back().size()); + + for (auto &tensor : curTensorsStk.back()) { + SaveTensor(tensor); + } + tensorValidMap.clear(); + + for (size_t i = 0; i < curTensorsStk.back().size(); ++i) { + SWAP_LOG_DEBUG( + "EndHook post, opId[%d], opName[%s], curTensors num[%zu], idx[%zu], storage uniqueId[%lu], mem ptr[%p][%s]", + opIdStk.back(), curOpNameStk.back().c_str(), curTensorsStk.back().size(), i, + static_cast(curTensorsStk.back()[i].storage().unsafeGetStorageImpl()) + ->get_unique_id(), + curTensorsStk.back()[i].storage().data(), + std::string(getUniqueSwapPtr(curTensorsStk.back()[i])).c_str()); + } + + opIdStk.pop_front(); + curOpNameStk.pop_back(); + curTensorsStk.pop_back(); + SWAP_LOG_INFO("EndHook out, opIdStk.size[%zu], opNameStk.size[%zu], curTensorsStk.size[%zu]", opIdStk.size(), + curOpNameStk.size(), curTensorsStk.size()); + + return 0; +} + +int NPUSwapManager::TensorHook(const at::Tensor &tensor) +{ + if (!isInit) { + return 0; + } + + if (!tensor.device().is_privateuseone()) { + return 1; + } + + uint64_t uniqueId = static_cast(tensor.storage().unsafeGetStorageImpl()) + ->get_unique_id(); + + SWAP_LOG_INFO("TensorHook in, before process, opId[%d], opName[%s], curTensors num[%zu], storage uniqueId[%lu], " + "mem ptr[%p][%s]", + opIdStk.back(), curOpNameStk.back().c_str(), curTensorsStk.back().size(), uniqueId, tensor.storage().data(), + std::string(getUniqueSwapPtr(tensor)).c_str()); + + curTensorsStk.back().emplace_back(tensor); + tensorValidMap[tensor.storage().mutable_data()] = true; + + executor->SwapInWait(uniqueId); + executor->SwapIn(uniqueId, true); + + SWAP_LOG_INFO("TensorHook out, after process, opId[%d], opName[%s], curTensors num[%zu], storage uniqueId[%lu], " + "mem ptr[%p][%s]", + opIdStk.back(), curOpNameStk.back().c_str(), curTensorsStk.back().size(), uniqueId, tensor.storage().data(), + std::string(getUniqueSwapPtr(tensor)).c_str()); + + return 0; +} + +int NPUSwapManager::PostHook() +{ + if (!isInit) { + return 0; + } + + SWAP_LOG_INFO("PostHook in, opId[%d], opName[%s], curTensors num[%zu]", opIdStk.back(), curOpNameStk.back().c_str(), + curTensorsStk.back().size()); + + for (size_t i = 0; i < curTensorsStk.back().size(); ++i) { + SWAP_LOG_DEBUG("PostHook before process, opId[%d], opName[%s], curTensors num[%zu], idx[%zu], storage \ +uniqueId[%lu], mem ptr[%p][%s]", + opIdStk.back(), curOpNameStk.back().c_str(), curTensorsStk.back().size(), i, + static_cast(curTensorsStk.back()[i].storage().unsafeGetStorageImpl()) + ->get_unique_id(), + curTensorsStk.back()[i].storage().data(), + std::string(getUniqueSwapPtr(curTensorsStk.back()[i])).c_str()); + } + + if (config.enableProfiler) { + profiler->ReportInfoV2(curOpNameStk.back(), opIdStk.front(), curTensorsStk.back()); + } + + if (config.enableExecutor) { + executor->ProcessTensorMatchTask(curOpNameStk.back(), curTensorsStk.back()); + executor->ProcessStageMatchTask(config.stage); + recordedDataPtrWithFreeStageManager->FreeEventWithStage(config.stage); + recordedStorageImplWithFreeStageManager->FreeEventWithStage(config.stage); + UpdateCurrentStagePerOp(); + } + + for (size_t i = 0; i < curTensorsStk.back().size(); ++i) { + SWAP_LOG_DEBUG("PostHook after process, opId[%d], opName[%s], curTensors num[%zu], idx[%zu], storage \ +uniqueId[%lu], mem ptr[%p][%s]", + opIdStk.back(), curOpNameStk.back().c_str(), curTensorsStk.back().size(), i, + static_cast(curTensorsStk.back()[i].storage().unsafeGetStorageImpl()) + ->get_unique_id(), + curTensorsStk.back()[i].storage().data(), + std::string(getUniqueSwapPtr(curTensorsStk.back()[i])).c_str()); + } + SWAP_LOG_INFO("PostHook out, opId[%d], opName[%s], curTensors num[%zu]", opIdStk.back(), + curOpNameStk.back().c_str(), curTensorsStk.back().size()); + return 0; +} + +void NPUSwapManager::SaveTensor(const at::Tensor &tensor) +{ + if (!swap_oom_enable) { + return; + } + + void *dataPtr = tensor.storage().mutable_data(); + auto storageImplIter = storageImplMap.find(dataPtr); + if (storageImplIter == storageImplMap.end()) { + storageImplMap.emplace(dataPtr, tensor.storage().getWeakStorageImpl()); + } else { + storageImplMap.erase(storageImplIter); + storageImplMap.emplace(dataPtr, tensor.storage().getWeakStorageImpl()); + } + + auto it = + std::find_if(tensorQueue.begin(), tensorQueue.end(), [&dataPtr](const void *ptr) { return ptr == dataPtr; }); + if (it != tensorQueue.end()) { + tensorQueue.erase(it); + } + tensorQueue.push_back(dataPtr); +} + +void NPUSwapManager::CheckAndSwapOutForOOM(void *ptrInBlock) +{ + if (!swap_oom_enable) { + return; + } + + auto storageImplIter = storageImplMap.find(ptrInBlock); + if (storageImplIter == storageImplMap.end()) { + return; + } + + c10::intrusive_ptr storageImplPtr = storageImplIter->second.lock(); + if (storageImplPtr) { + auto validIter = tensorValidMap.find(ptrInBlock); + if (validIter == tensorValidMap.end()) { + auto blacklistIter = ptrBlacklist.find(getUniqueSwapPtr(storageImplPtr->mutable_data())); + if (blacklistIter == ptrBlacklist.end()) { + executor->SwapOut(storageImplPtr, true); + c10_npu::NPUStream &swapStream = executor->swapStreams[0]; + swapStream.synchronize(); + } + } + } + storageImplMap.erase(storageImplIter); + + auto it = std::find_if(tensorQueue.begin(), tensorQueue.end(), + [&ptrInBlock](const void *ptr) { return ptr == ptrInBlock; }); + if (it != tensorQueue.end()) { + tensorQueue.erase(it); + } +} + +std::map> &NPUSwapManager::GetStorageImplMap() +{ + return storageImplMap; +} + +std::deque &NPUSwapManager::GetTensorQueue() +{ + return tensorQueue; +} + +void NPUSwapManager::ReportInfoToSwapProfiler(bool isSwapOut, at::DataPtr &srcDataPtr, at::DataPtr &dstDataPtr, + size_t size, bool isOOM) +{ + if (!isInit) { + return; + } + profiler->ReportInfoV2(isSwapOut, srcDataPtr, dstDataPtr, size, isOOM); +} + +void NPUSwapManager::CheckAndInsertStorageToMap(const at::Tensor &src, const at::Tensor &dst) +{ + if (!isInit) { + return; + } + executor->CheckAndInsertStorageToMap(src, dst); +} + +UniqueSwapPtr NPUSwapManager::getUniqueSwapPtr(const at::Tensor &tensor) +{ + size_t ptrBase = reinterpret_cast(tensor.storage().data()); + UniqueSwapPtr uniqueSwapPtr; + uniqueSwapPtr.ptrBase = ptrBase; + auto it = tensorPtrCountMap.find(ptrBase); + if (it == tensorPtrCountMap.end()) { + uniqueSwapPtr.index = 0; + } else { + uniqueSwapPtr.index = tensorPtrCountMap[ptrBase]; + } + return uniqueSwapPtr; +} + +UniqueSwapPtr NPUSwapManager::getUniqueSwapPtr(const void *storagePtr) +{ + size_t ptrBase = reinterpret_cast(storagePtr); + UniqueSwapPtr uniqueSwapPtr; + uniqueSwapPtr.ptrBase = ptrBase; + auto it = tensorPtrCountMap.find(ptrBase); + if (it == tensorPtrCountMap.end()) { + uniqueSwapPtr.index = 0; + } else { + uniqueSwapPtr.index = tensorPtrCountMap[ptrBase]; + } + return uniqueSwapPtr; +} + +UniqueSwapPtr NPUSwapManager::getUniqueSwapPtr(size_t p) +{ + size_t ptrBase = p; + UniqueSwapPtr uniqueSwapPtr; + uniqueSwapPtr.ptrBase = ptrBase; + auto it = tensorPtrCountMap.find(ptrBase); + if (it == tensorPtrCountMap.end()) { + uniqueSwapPtr.index = 0; + } else { + uniqueSwapPtr.index = tensorPtrCountMap[ptrBase]; + } + return uniqueSwapPtr; +} + +std::vector NPUSwapManager::recordTensorPtrWithTypes(const std::vector &tensors, + SwapTensorType type, int updateWeakPtrMap, bool isUpdateBlacklist) +{ + if (updateWeakPtrMap == 1) { + tensorPtrWeakPtrMap.clear(); + } + + std::vector results; + results.reserve(tensors.size()); + + for (const auto &tensor : tensors) { + auto uniquePtr = getUniqueSwapPtr(tensor); + + tensorPtrTypeMap.try_emplace(uniquePtr, type); + + if (updateWeakPtrMap > 0) { + tensorPtrWeakPtrMap.try_emplace(uniquePtr, tensor.storage().getWeakStorageImpl()); + } + if (isUpdateBlacklist) { + ptrBlacklist.insert(uniquePtr); + } + + results.emplace_back(uniquePtr); + } + return results; +} + +void NPUSwapManager::initOpNameToOneHotAndIndexMap(std::vector &frequentOpNames) +{ + executor->initOpNameToOneHotAndIndexMap(frequentOpNames); +} + +void NPUSwapManager::FunAfterProfiler(std::vector &policyInfoVec) +{ + if (!isInit) { + return; + } + if (config.enableExecutor) { + executor->clearStanderdSwapOutVec(); + executor->initStanderdSwapOutVec(executor->standerdSwapOutVec, profiler->getPolicyStepOpVec(), policyInfoVec); + executor->clearCandidateOptimPolicyVec(); + executor->initCandidateOptimPolicyVec(policyInfoVec); + } +} + +void NPUSwapManager::UpdateCurrentStagePerOp() +{ + if (config.fwdOpLayerInfo.empty() || config.bwdOpLayerInfo.empty()) { + return; + } + config.currentStageOpId++; + if (config.stage.stageType == SwapStageType::FWD) { + for (int i = 0; i < config.fwdOpLayerInfo.size(); i++) { + if (config.currentStageOpId <= config.fwdOpLayerInfo[i]) { + config.stage.layerIndex = i + 1; + break; + } + } + if (config.currentStageOpId > config.fwdOpLayerInfo.back()) { + config.stage.layerIndex = config.fwdOpLayerInfo.size() + 1; + } + } else if (config.stage.stageType == SwapStageType::BWD) { + for (int i = 0; i < config.bwdOpLayerInfo.size(); i++) { + if (config.currentStageOpId <= config.bwdOpLayerInfo[i]) { + config.stage.layerIndex = i + 1; + break; + } + } + if (config.currentStageOpId > config.bwdOpLayerInfo.back()) { + config.stage.layerIndex = config.bwdOpLayerInfo.size() + 1; + } + } +} + +void NPUSwapManager::updateStep() +{ + if (!isInit) { + return; + } + config.currentStageOpId = 0; + executor->updateStep(tensorPtrWeakPtrMap); + tensorQueue.clear(); + config.isOOM = false; +} + +c10_npu::NPUStream &NPUSwapManager::GetSwapStream() +{ + return this->executor->swapStreams[0]; +} +} // namespace swap +} // namespace c10_npu diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/NPUSwapManager.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/NPUSwapManager.h new file mode 100644 index 000000000..e3b76167b --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/NPUSwapManager.h @@ -0,0 +1,501 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace c10_npu { +namespace swap { +enum class TORCH_NPU_API SwapTensorType { + MODEL, + OPTIM, + SHARED_MEMORY, + OTHERS, + RESERVED, +}; + +enum class TORCH_NPU_API SwapStageType { + INIT = 1, + FWD, + BWD, + OPTIM, + RESERVED, +}; + +class TORCH_NPU_API SwapStage { +public: + SwapStage(); + + bool operator == (const SwapStage &other) const; + friend std::ostream &operator << (std::ostream &os, const SwapStage &obj); + + SwapStageType stageType; + uint32_t microBatchIndex; + uint32_t layerIndex; +}; + +class HashSwapStage { +public: + size_t operator () (const SwapStage &s) const + { + return std::hash()((s.layerIndex) + ((s.microBatchIndex) << 8) + ((static_cast(s.stageType) << 16))); + } +}; + +class TORCH_NPU_API SwapConfig { +public: + SwapConfig(); + + // static + // model config + uint32_t microBatchNum; + uint32_t layerNum; + + // update + bool isOOM; + SwapStage stage; + uint32_t step; + float oneStepDuration; + uint32_t policyStep; + int currentStageOpId; + + // update policy + bool enableProfiler; + + uint64_t tensorSizeThresh; + bool enableExecutor; + bool enableCustomRecordStream; + std::vector fwdOpLayerInfo; + std::vector bwdOpLayerInfo; +}; + +class TORCH_NPU_API UniqueSwapPtr { +public: + UniqueSwapPtr(); + + bool operator == (const UniqueSwapPtr &other) const; + bool operator < (const UniqueSwapPtr &other) const + { + return ptrBase < other.ptrBase; + }; + friend std::ostream &operator << (std::ostream &os, const UniqueSwapPtr &obj); + operator std::string() const; + + size_t ptrBase; + size_t index; +}; + +class HashUniqueSwapPtr { +public: + size_t operator () (const UniqueSwapPtr &p) const + { + return std::hash()(p.ptrBase) ^ std::hash()(p.index); + } +}; + +class TORCH_NPU_API UniqueSwapMemory { +public: + UniqueSwapMemory(); + UniqueSwapMemory(int64_t allocated_bytes, int64_t reserved_bytes, int64_t active_bytes); + + friend std::ostream &operator << (std::ostream &os, const UniqueSwapMemory &obj); + + int64_t allocated_bytes; + int64_t reserved_bytes; + int64_t active_bytes; +}; + +class TORCH_NPU_API ProfilerTensorInfo { +public: + explicit ProfilerTensorInfo(const at::Tensor &tensor); + + friend std::ostream &operator << (std::ostream &os, const ProfilerTensorInfo &obj); + + UniqueSwapPtr &getPtr() + { + return ptr; + } + size_t &getNbytes() + { + return nbytes; + } + c10::SmallVector &getShapeV2() + { + return shapeV2; + } + at::ScalarType &getDtype() + { + return dtype; + } + SwapTensorType &getTensorType() + { + return tensorType; + } + + UniqueSwapPtr ptr; + size_t nbytes; + at::ScalarType dtype; + SwapTensorType tensorType; + c10::SmallVector shapeV2; +}; + +class TORCH_NPU_API ProfilerOpInfo { +public: + ProfilerOpInfo(int opId, std::string opName, int64_t allocated_bytes, int64_t reserved_bytes, int64_t active_bytes); + + friend std::ostream &operator << (std::ostream &os, const ProfilerOpInfo &obj); + + int &getOpId() + { + return opId; + } + std::string &getOpName() + { + return opName; + } + SwapStage &getStage() + { + return stage; + } + uint32_t &getStep() + { + return step; + } + UniqueSwapMemory &getSwapMemory() + { + return swapMemory; + } + std::vector &getProfilerTensorInfo() + { + return profilerTensorInfoVec; + } + void appendTensorInfo(const at::Tensor &tensor); + + int opId; + std::string opName; + SwapStage stage; + uint32_t step; + UniqueSwapMemory swapMemory; + std::vector profilerTensorInfoVec; +}; + +class TORCH_NPU_API ProfilerSwapInfo { +public: + ProfilerSwapInfo(int opId, std::string swapName, size_t size, bool isOOM, UniqueSwapPtr srcDataPtr, + UniqueSwapPtr dstDataPtr); + + int &getOpId() + { + return opId; + } + std::string &getSwapName() + { + return swapName; + } + size_t &getSize() + { + return size; + } + bool &getIsOOM() + { + return isOOM; + } + UniqueSwapPtr &getSrcPtr() + { + return srcPtr; + } + UniqueSwapPtr &getDstPtr() + { + return dstPtr; + } + + int opId; + std::string swapName; + size_t size; + bool isOOM; + UniqueSwapPtr srcPtr; + UniqueSwapPtr dstPtr; +}; + +class TORCH_NPU_API SwapProfiler { +public: + SwapProfiler(); + ~SwapProfiler(); + int Init(); + + void updateStep(); + void appendOpInfo(std::string &opName, int &opId); + void ReportInfoV2(std::string &opName, int &opId, c10::SmallVector &tensors); + void ReportInfoV2(bool isSwapOut, at::DataPtr &srcDataPtr, at::DataPtr &dstDataPtr, size_t size, bool isOOM); + std::vector &getProfilerOpInfoVec() + { + return profilerOpInfoVec; + } + std::vector &getProfilerSwapInfoVec() + { + return profilerSwapInfoVec; + } + std::vector &getPolicyStepOpVec(); + + std::map> profilerOpInfoMap; + +private: + bool isInit; + int lastOpId; + std::vector profilerOpInfoVec; + std::vector profilerSwapInfoVec; +}; + +class TORCH_NPU_API SwapPolicyInfo { +public: + SwapPolicyInfo(); + + friend std::ostream &operator << (std::ostream &os, const SwapPolicyInfo &obj); + + bool executorNeedMatch; + UniqueSwapPtr ptr; + int swapOutOpId; + int swapInOpId; + SwapStage swapOutStage; + SwapStage swapInStage; + SwapStage freeStage; + SwapStage swapInFreeStage; +}; + +class ExecutorTensorInfo { +public: + ExecutorTensorInfo(); + ExecutorTensorInfo(const at::Tensor &tensor, UniqueSwapPtr uniqueSwapPtr); + ExecutorTensorInfo(const SwapStage &swapOutStage, const SwapStage &swapInStage); + + bool operator == (const ExecutorTensorInfo &other) const; + friend std::ostream &operator << (std::ostream &os, const ExecutorTensorInfo &obj); + + size_t convertShapeToInt64(const at::Tensor &tensor); + size_t convertShapeToInt64(const c10::SmallVector &sizes); + void initFromProfilerTensorInfo(const ProfilerTensorInfo &pti); + void updateCallsStack(int opOneHot, int opIndex, int tensorIndex); + + UniqueSwapPtr ptr; + size_t opCount; + size_t opTag; + at::ScalarType dtype; + size_t nbytes; + size_t shape; + size_t opCallsStack; + size_t tensorIndexCallsStack; + SwapStage swapOutStage; + SwapStage swapInStage; + SwapStage freeStage; + SwapStage swapInFreeStage; +}; + +class SwapExecutor { +public: + SwapExecutor(); + ~SwapExecutor(); + int Init(); + int DeInit(); + + int SwapOut(c10::intrusive_ptr storageImplPtr, bool isOOM = false, + SwapStage *freeStage = nullptr); + int SwapOut(const at::Tensor &tensor, SwapStage *freeStage = nullptr); + int SwapIn(uint64_t uniqueId, bool needWait); + int SwapInWait(uint64_t uniqueId); + void CheckAndInsertStorageToMap(const at::Tensor &src, const at::Tensor &dst); + void ProcessTensorMatchTask(const std::string &opName, const c10::SmallVector &curTensors); + void ProcessStageMatchTask(const SwapStage ¤tStage); + void updateStep(std::unordered_map, HashUniqueSwapPtr> + &tensorPtrWeakPtrMap); + +public: + bool isInit; + std::vector swapStreams; + + std::map> swapOutStorageImplMap; + std::map swapInEventMap; + std::vector standerdSwapOutVec; + std::vector candidateSwapOutVec; + std::vector candidateOptimPolicyVec; + std::unordered_map> opNameToOneHotAndIndexMap; + std::unordered_map ptrToTensorInfoMap; + std::unordered_map, N>, HashSwapStage> + stageToSwapInMap; + std::unordered_map, N>, HashSwapStage> + stageToSwapOutMap; + + std::unordered_map, HashSwapStage> stageToOptimFreeStageMap; + std::unordered_map uniqueIdToSwapInFreeStageMap; + + std::pair GetOpOneHotAndIndex(const std::string &opName); + + bool needGenerateTensorInfo(const at::Tensor &tensor); + void initOpNameToOneHotAndIndexMap(const std::vector &opNames); + bool checkMatchAndSwapOut(ExecutorTensorInfo &eti, std::vector &candidateSwapOutVec); + void initStanderdSwapOutVec(std::vector &swapOutVec, + const std::vector &opInfosVec, const std::vector &policyInfosVec); + void clearStanderdSwapOutVec(); + void clearCandidateOptimPolicyVec(); + void checkStageAndSwapIn(const SwapStage &swapStage); + void SwapIn(c10::weak_intrusive_ptr &storageImplPtr); + void SwapOut(c10::weak_intrusive_ptr &storageImplWeakPtr); + void initCandidateOptimPolicyVec(const std::vector &policyInfosVec); + void processOptimTask(std::unordered_map, + HashUniqueSwapPtr> &tensorPtrWeakPtrMap); +}; + +template class RecordStreamManager { +public: + RecordStreamManager(); + ~RecordStreamManager(); + int Init(); + int DeInit(); + + void RecordStream(T &ptr, c10_npu::NPUStream stream); + void ProcessEvent(); + bool ProcessMallocEvent(); + +private: + bool isInit; + std::deque> recordedQueue; +}; + +template class RecordStreamWithFreeStageManager { +public: + RecordStreamWithFreeStageManager(); + ~RecordStreamWithFreeStageManager(); + int Init(); + int DeInit(); + + void ProcessEvent(); + void RecordStream(T &ptr, c10_npu::NPUStream stream, SwapStage &freeStage); + bool FreeEventWithStage(SwapStage &freeStage); + bool ProcessMallocEvent(); + +private: + bool isInit; + std::unordered_map>, HashSwapStage> StageToFreeEventMap; +}; + +class TORCH_NPU_API NPUSwapManager { +public: + static NPUSwapManager &GetInstance(); + ~NPUSwapManager(); + int Init(); + int DeInit(); + + int BeginHook(const std::string &opName); + int EndHook(); + int TensorHook(const at::Tensor &tensor); + int PostHook(); + + void SaveTensor(const at::Tensor &tensor); + void CheckAndSwapOutForOOM(void *ptrInBlock); + std::map> &GetStorageImplMap(); + std::deque &GetTensorQueue(); + + void ReportInfoToSwapProfiler(bool isSwapOut, at::DataPtr &srcDataPtr, at::DataPtr &dstDataPtr, size_t size, + bool isOOM = false); + void CheckAndInsertStorageToMap(const at::Tensor &src, const at::Tensor &dst); + + void RecordStream(at::DataPtr &dataPtr, c10_npu::NPUStream stream, SwapStage *freeStage = nullptr); + void RecordStream(c10::intrusive_ptr storageImpl, c10_npu::NPUStream stream, + SwapStage *freeStage = nullptr); + void ProcessEvent(); + bool ProcessMallocEvent(); + + void updateStage(); + void FunAfterProfiler(std::vector &policyInfoVec); + void updateStep(); + void initOpNameToOneHotAndIndexMap(std::vector &frequentOpNames); + std::vector recordTensorPtrWithTypes(const std::vector &tensors, SwapTensorType type, + int updateWeakPtrMap = 0, // 0: do nothing, 1: clear, 2: append + bool isUpdateBlacklist = false); + void UpdateCurrentStagePerOp(); + + bool swap_enable; + bool swap_oom_enable; + SwapConfig config; + std::map tensorPtrCountMap; + std::unordered_map tensorPtrTypeMap; + std::unordered_map, HashUniqueSwapPtr> tensorPtrWeakPtrMap; + std::set ptrBlacklist; + + SwapProfiler *getSwapProfiler() + { + return profiler; + } + UniqueSwapPtr getUniqueSwapPtr(const at::Tensor &tensor); + UniqueSwapPtr getUniqueSwapPtr(const void *storagePtr); + UniqueSwapPtr getUniqueSwapPtr(size_t p); + + c10_npu::NPUStream &GetSwapStream(); + +private: + NPUSwapManager(); + + bool isInit; + SwapExecutor *executor; + SwapProfiler *profiler; + + // always update + int opId; + std::map> storageImplMap; + std::deque tensorQueue; + std::map tensorValidMap; + + // use deque to store current variables to deal with nested OpCommand calls + std::deque opIdStk; + std::deque curOpNameStk; + std::deque> curTensorsStk; + + RecordStreamManager *recordedDataPtrManager; + RecordStreamManager> *recordedStorageImplManager; + RecordStreamWithFreeStageManager *recordedDataPtrWithFreeStageManager; + RecordStreamWithFreeStageManager> *recordedStorageImplWithFreeStageManager; +}; + +class SwapOutOfMemError : public std::exception { +public: + SwapOutOfMemError(const std::string &message, void *data) : message(message), data(data) {} + const char *what() const noexcept override + { + return message.c_str(); + } + void *GetData() const noexcept + { + return data; + } + +private: + std::string message; + void *data = nullptr; +}; +} // namespace swap +} // namespace c10_npu diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/NPUVmmApi.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/NPUVmmApi.h new file mode 100644 index 000000000..136ab5c58 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/NPUVmmApi.h @@ -0,0 +1,431 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// Copyright 2022 The GLake Authors. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include "SwapException.h" + +using CUmemGenericAllocationHandle = unsigned long long; + +constexpr size_t granularitySize = 2097152; + +struct Block; + +struct BlockSegment { + BlockSegment() : block(nullptr), offset(0) {} + BlockSegment(Block *block, size_t offset) : block(block), offset(offset) {} + + Block *block; + size_t offset; +}; + +class GMLakeError : public std::exception { +public: + explicit GMLakeError(const std::string &message) : message(message) {} + const char *what() const noexcept override + { + return message.c_str(); + } + +private: + std::string message; +}; + +struct PhyBlock { + explicit PhyBlock(int device_id_in = -1, size_t block_size_in = granularitySize) + : device_id(device_id_in), + block_size(block_size_in), + status(ACL_SUCCESS), + free(true), + owner_stream(nullptr), + released(false) + { + if (device_id == -1) { + SWAP_CHECK_ERROR(c10_npu::GetDevice(&device_id)); + } + + aclrtPhysicalMemProp prop = {}; + prop.handleType = ACL_MEM_HANDLE_TYPE_NONE; + prop.allocationType = ACL_MEM_ALLOCATION_TYPE_PINNED; + prop.memAttr = ACL_HBM_MEM_HUGE; + prop.location.type = ACL_MEM_LOCATION_TYPE_DEVICE; + prop.location.id = device_id; + prop.reserve = 0; + status = aclrtMallocPhysical(&alloc_handle, block_size, &prop, 0); + if (status != ACL_ERROR_NONE) { + throw GMLakeError("PhyBlock Construct Failed"); + } + } + + void release_resources() + { + if (status == ACL_SUCCESS) { + auto err = aclrtFreePhysical(alloc_handle); + if (err != ACL_SUCCESS) { + throw GMLakeError("PhyBlock Release_resources Failed"); + } + alloc_handle = nullptr; + } + released = true; + } + + ~PhyBlock() + { + if (!released) { + this->release_resources(); + released = true; + } + } + + int device_id; + const size_t block_size; + aclrtDrvMemHandle alloc_handle = nullptr; + aclError status; + + bool free; + aclrtStream owner_stream; + std::vector mapped_blocks; + bool released; +}; + +struct VirDevPtr { + VirDevPtr(void *addr_in, size_t allocSize_in, int device_id = -1) + : allocSize(allocSize_in), mapped(false), device_id(device_id), status(ACL_SUCCESS), released(false) + { + if (device_id == -1) { + SWAP_CHECK_ERROR(c10_npu::GetDevice(&device_id)); + } + + void *device_ptr; + void *request_ptr = addr_in; + auto status = aclrtReserveMemAddress(&device_ptr, allocSize, 0, request_ptr, 1); + if (status != ACL_SUCCESS || request_ptr != nullptr && device_ptr != request_ptr) { + if (device_ptr != nullptr) { + SWAP_CHECK_ERROR(aclrtReleaseMemAddress(device_ptr)); + } + virAddr = nullptr; + if (status == ACL_SUCCESS) { + status = ACL_ERROR_FAILURE; + } + return; + } + + virAddr = device_ptr; + } + + void release_resources() + { + if (virAddr) { + if (mapped) { + for (size_t i = 0; i * granularitySize < allocSize; i++) { + SWAP_CHECK_ERROR(aclrtUnmapMem(virAddr + i * granularitySize)); + } + } + SWAP_CHECK_ERROR(aclrtReleaseMemAddress(virAddr)); + } + + released = true; + } + + ~VirDevPtr() + { + if (!released) { + this->release_resources(); + released = true; + } + } + + void *virAddr; + const size_t allocSize; + bool mapped; + int device_id; + aclError status; + bool released; +}; + +struct VirBlock { + VirBlock(std::shared_ptr vir_dev_ptr_in, size_t offset_in, size_t blockSize_in, + std::shared_ptr phy_block_in, int device_id = -1) + : vir_dev_ptr(vir_dev_ptr_in), + offset(offset_in), + blockSize(blockSize_in), + phy_block(phy_block_in), + device_id(device_id), + status(ACL_SUCCESS), + released(false) + { + if (device_id == -1) { + SWAP_CHECK_ERROR(c10_npu::GetDevice(&device_id)); + } + + block_ptr = (void *)(((char *)vir_dev_ptr->virAddr) + offset); + void *device_ptr = block_ptr; + SWAP_CHECK_ERROR(aclrtMapMem(device_ptr, blockSize, 0, phy_block->alloc_handle, 0)); + if (offset == 0) { + vir_dev_ptr->mapped = true; + } + } + + void release_resources() + { + vir_dev_ptr.reset(); + released = true; + } + + ~VirBlock() + { + if (!released) { + this->release_resources(); + released = true; + } + } + + std::shared_ptr vir_dev_ptr; + + size_t offset; + size_t blockSize; + void *block_ptr; + + std::shared_ptr phy_block; + + int device_id; + aclError status; + bool released; +}; + +struct VmmSegment { + VmmSegment() : granul_size(0), segment_ptr(nullptr), status(ACL_SUCCESS), num_free_chunks(0), released(false) {} + + explicit VmmSegment(size_t blocks, size_t block_size_in = granularitySize, int device_id_in = -1) + : granul_size(block_size_in), + segment_ptr(nullptr), + device_id(device_id_in), + status(ACL_SUCCESS), + num_free_chunks(blocks), + num_used_chunks(0), + fused(false), + released(false) + { + if (device_id == -1) { + SWAP_CHECK_ERROR(c10_npu::GetDevice(&device_id)); + } + + allocate_phy_chunks(blocks, block_size_in, device_id); + if (status == ACL_SUCCESS) { + mapVirAddr(); + } + } + + explicit VmmSegment(std::vector> &&phy_chunks_in) + : phy_chunks(std::move(phy_chunks_in)), + granul_size(phy_chunks[0]->block_size), + segment_ptr(nullptr), + device_id(phy_chunks[0]->device_id), + status(ACL_SUCCESS), + num_free_chunks(phy_chunks.size()), + num_used_chunks(0), + fused(true), + released(false) + { + mapVirAddr(); + } + + explicit VmmSegment(std::vector> phy_chunks_in, + std::vector> vir_chunks_in) + : phy_chunks(std::move(phy_chunks_in)), + vir_chunks(std::move(vir_chunks_in)), + granul_size(phy_chunks[0]->block_size), + segment_ptr(vir_chunks[0]->block_ptr), + device_id(phy_chunks[0]->device_id), + status(ACL_SUCCESS), + num_free_chunks(phy_chunks.size()), + num_used_chunks(0), + fused(false), + released(false) + {} + + void allocate_phy_chunks(size_t blocks, size_t block_size_in, int device_id_in) + { + phy_chunks.reserve(blocks); + for (size_t i = 0; i < blocks; i++) { + auto phy_block = std::make_shared(device_id_in, block_size_in); + if (phy_block->status != ACL_SUCCESS) { + size_t device_free; + size_t device_total; + + status = phy_block->status; + phy_chunks.clear(); + break; + } else { + phy_chunks.emplace_back(std::move(phy_block)); + } + } + } + + void release_resources() + { + { + auto tmp_vir = std::move(vir_chunks); + } + { + auto tmp_phy = std::move(phy_chunks); + } + released = true; + } + + virtual ~VmmSegment() + { + if (!released) { + this->release_resources(); + released = true; + } + } + + void *mapVirAddr() + { + static constexpr int retry_times = 8; + static std::mutex alloc_mutex; + + void *device_ptr = nullptr; + size_t segment_size = phy_chunks.size() * granul_size; + + int current_try = 0; + aclError result = ACL_ERROR_NONE; + do { + std::lock_guard lock(alloc_mutex); + + auto vir_dev_ptr = std::make_shared(device_ptr, segment_size, device_id); + device_ptr = vir_dev_ptr->virAddr; + + if (vir_dev_ptr->status != ACL_SUCCESS || !vir_dev_ptr->virAddr) { + result = vir_dev_ptr->status; + } else { + vir_chunks.clear(); + + size_t offset = 0; + for (size_t j = 0; j < phy_chunks.size(); j++) { + auto phy_block = phy_chunks[j]; + auto vir_block = std::make_shared(vir_dev_ptr, offset, granul_size, phy_block, device_id); + + if (vir_block->status != ACL_SUCCESS) { + result = vir_block->status; + vir_chunks.clear(); + break; + } else { + vir_chunks.emplace_back(std::move(vir_block)); + } + + offset += granul_size; + } + } + + current_try++; + device_ptr = nullptr; + } while (result != ACL_SUCCESS && current_try < retry_times); + + status = result; + if (result == ACL_ERROR_NONE) { + segment_ptr = vir_chunks[0]->block_ptr; + return segment_ptr; + } + + return nullptr; + } + + std::shared_ptr split(size_t keep_size) + { + size_t keep_blocks = keep_size / granul_size; + + std::vector> remain_phy_chunks; + std::vector> remain_vir_chunks; + + size_t remaining_free_blocks = 0; + for (size_t i = keep_blocks; i < phy_chunks.size(); i++) { + if (phy_chunks[i]->free) { + remaining_free_blocks++; + } + remain_phy_chunks.emplace_back(std::move(phy_chunks[i])); + remain_vir_chunks.emplace_back(std::move(vir_chunks[i])); + } + + this->phy_chunks.resize(keep_blocks); + this->vir_chunks.resize(keep_blocks); + + auto remaining_segment = + std::make_shared(std::move(remain_phy_chunks), std::move(remain_vir_chunks)); + + remaining_segment->segment_ptr = (void *)((char *)segment_ptr + keep_size); + remaining_segment->num_free_chunks = remaining_free_blocks; + + num_free_chunks -= remaining_free_blocks; + return remaining_segment; + } + + bool remerge(VmmSegment &segment) + { + if (segment.segment_ptr == + static_cast(static_cast(this->segment_ptr) + this->phy_chunks.size() * granul_size)) { + for (size_t i = 0; i < segment.phy_chunks.size(); i++) { + this->phy_chunks.emplace_back(std::move(segment.phy_chunks[i])); + this->vir_chunks.emplace_back(std::move(segment.vir_chunks[i])); + } + } else if (this->segment_ptr == + static_cast(static_cast(segment.segment_ptr) + segment.phy_chunks.size() * granul_size)) { + for (size_t i = 0; i < phy_chunks.size(); i++) { + segment.phy_chunks.emplace_back(std::move(this->phy_chunks[i])); + segment.vir_chunks.emplace_back(std::move(this->vir_chunks[i])); + } + + this->phy_chunks = std::move(segment.phy_chunks); + this->vir_chunks = std::move(segment.vir_chunks); + + this->segment_ptr = segment.segment_ptr; + } else { + throw GMLakeError("remerge(VmmSegment& segment)"); + return false; + } + + this->num_free_chunks += segment.num_free_chunks; + segment.num_free_chunks = 0; + + segment.phy_chunks.clear(); + segment.vir_chunks.clear(); + + segment.segment_ptr = nullptr; + + return true; + } + + std::vector> phy_chunks; + std::vector> vir_chunks; + + const size_t granul_size; + void *segment_ptr; + + int device_id; + aclError status; + + size_t num_free_chunks; + size_t num_used_chunks; + bool fused; + bool released; +}; diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/PluggableAllocator.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/PluggableAllocator.cpp new file mode 100644 index 000000000..acd5e8d33 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/PluggableAllocator.cpp @@ -0,0 +1,218 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "PluggableAllocator.h" + +#include + +#include "swap_log.h" +#include "NPUSwapManager.h" + +void local_raw_delete(void *ptr) +{ + PluggableAllocator::getInstance().free(ptr); +} + +void PluggableAllocator::add_allocated_block(Block *block) +{ + std::lock_guard lock(mutex); + allocated_blocks[block->ptr] = block; +} + +std::mutex *PluggableAllocator::getFreeMutex() const +{ + return &npu_free_mutex; +} + +Block *PluggableAllocator::get_allocated_block(void *ptr, bool remove) +{ + std::lock_guard lock(mutex); + auto it = allocated_blocks.find(ptr); + if (it == allocated_blocks.end()) { + return nullptr; + } + Block *block = it->second; + if (remove) { + allocated_blocks.erase(it); + } + return block; +} + +void PluggableAllocator::init(int device_count) +{ + int size = static_cast(device_allocator.size()); + if (size < device_count) { + device_allocator.resize(device_count); + for (const auto i : c10::irange(size, device_count)) { + device_allocator[i] = std::make_unique(); + } + } +} + +bool PluggableAllocator::initialized() +{ + return !device_allocator.empty(); +} + +/* * allocates a block which is safe to use from the provided stream */ +void *PluggableAllocator::malloc(int device, size_t size, aclrtStream stream) +{ + void *devPtr = nullptr; + if (c10_npu::swap::NPUSwapManager::GetInstance().swap_oom_enable) { + bool isTryMallocExit = false; + uint32_t tryMallocCount = 0; + while (!isTryMallocExit) { + try { + Block *block = device_allocator[device]->malloc(device, size, stream); + add_allocated_block(block); + devPtr = static_cast(block->ptr); + if (devPtr != nullptr) { + if (tryMallocCount > 0) { + SWAP_LOG_WARN("[SwapOomEnable] try malloc count[%u], finally success!", tryMallocCount); + } + isTryMallocExit = true; + } + } catch (const c10_npu::swap::SwapOutOfMemError &err) { + c10_npu::swap::NPUSwapManager::GetInstance().CheckAndSwapOutForOOM(err.GetData()); + } + tryMallocCount++; + } + } else { + Block *block = device_allocator[device]->malloc(device, size, stream); + add_allocated_block(block); + devPtr = static_cast(block->ptr); + } + + return devPtr; +} + +void PluggableAllocator::free(void *ptr) +{ + if (!ptr) { + return; + } + Block *block = get_allocated_block(ptr, true); + if (!block) { + AT_ERROR("invalid device pointer: ", ptr); + } + device_allocator[block->device]->free(block); +} + +void PluggableAllocator::setMemoryFraction(double fraction, int device) +{ + TORCH_INTERNAL_ASSERT(0 <= device && device < device_allocator.size(), "Allocator not initialized for device ", + device, ": did you call init?"); + TORCH_INTERNAL_ASSERT(0 <= fraction && fraction <= 1, "invalid fraction:", fraction, ". Please set within (0, 1)."); + + c10_npu::SetDevice(device); + + device_allocator[device]->setMemoryFraction(fraction); +} + +void PluggableAllocator::emptyCache(bool check_error) +{ + int count = static_cast(device_allocator.size()); + for (int i = 0; i < count; i++) + device_allocator[i]->emptyCache(check_error); +} + +void PluggableAllocator::recordStream(void *ptr, c10_npu::NPUStream stream) +{ + if (!ptr) { + return; + } + Block *block = get_allocated_block(ptr); + device_allocator[block->device]->recordStream(block, stream); +} + +void PluggableAllocator::eraseStream(void *ptr, c10_npu::NPUStream stream) +{ + if (!ptr) { + return; + } + Block *block = get_allocated_block(ptr); + if (!block) { + AT_ERROR("invalid device pointer: ", ptr); + } + + if (block->stream != c10_npu::getCurrentNPUStream(block->device).stream(false)) { + // If the Stream applying for tensor block different from + // the stream of submiting event wait task in HCCL synchronize() + // method, the recordSteam can not be erased. + // New tensor creation may use the block before HCCL op is complete. + return; + } + + device_allocator[block->device]->eraseStream(block, stream); +} + +std::vector PluggableAllocator::snapshot() +{ + std::vector result; + int count = static_cast(device_allocator.size()); + for (int i = 0; i < count; i++) { + auto snap = device_allocator[i]->snapshot(); + result.insert(result.end(), snap.begin(), snap.end()); + } + return result; +} + +c10::DeleterFnPtr PluggableAllocator::raw_deleter() const +{ + return &local_raw_delete; +} + +void PluggableAllocator::cacheInfo(int dev_id, size_t *cachedAndFree, size_t *largestBlock) +{ + device_allocator[dev_id]->cacheInfo(cachedAndFree, largestBlock); +} + +void PluggableAllocator::assertValidDevice(int device) +{ + int device_num = c10_npu::device_count(); + AT_ASSERTM(0 <= device && device < device_num, "Invalid device argument."); +} + +DeviceStats PluggableAllocator::getDeviceStats(int device) +{ + assertValidDevice(device); + return device_allocator[device]->getStats(); +} + +void PluggableAllocator::resetAccumulatedStats(int device) +{ + assertValidDevice(device); + device_allocator[device]->resetAccumulatedStats(); +} + +void PluggableAllocator::resetPeakStats(int device) +{ + assertValidDevice(device); + device_allocator[device]->resetPeakStats(); +} + +void PluggableAllocator::raw_delete(void *ptr) +{ + this->free(ptr); +} + +void PluggableAllocator::FreeDeviceCachedMemory(int device) +{ + device_allocator[device]->emptyCache(true); +} + +std::string PluggableAllocator::name() +{ + return "native"; +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/PluggableAllocator.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/PluggableAllocator.h new file mode 100644 index 000000000..27790fcf6 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/PluggableAllocator.h @@ -0,0 +1,66 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "DeviceCachingAllocator.h" + +class PluggableAllocator { +private: + std::mutex mutex; + + // allocated blocks by device pointer + ska::flat_hash_map allocated_blocks; + + mutable std::mutex npu_free_mutex; + + PluggableAllocator() {} + +public: + PluggableAllocator(const PluggableAllocator &) = delete; + PluggableAllocator &operator = (const PluggableAllocator &) = delete; + + static PluggableAllocator &getInstance() + { + static PluggableAllocator instance; + return instance; + } + + std::vector> device_allocator; + + std::mutex *getFreeMutex() const; + void add_allocated_block(Block *block); + Block *get_allocated_block(void *ptr, bool remove = false); + void init(int device_count); + bool initialized(); + void *malloc(int device, size_t size, aclrtStream stream); + void free(void *ptr); + void setMemoryFraction(double fraction, int device); + void emptyCache(bool check_error); + void recordStream(void *ptr, c10_npu::NPUStream stream); + void eraseStream(void *ptr, c10_npu::NPUStream stream); + std::vector snapshot(); + c10::DataPtr allocate(size_t size) const; + c10::DeleterFnPtr raw_deleter() const; + void cacheInfo(int dev_id, size_t *cachedAndFree, size_t *largestBlock); + void assertValidDevice(int device); + DeviceStats getDeviceStats(int device); + void resetAccumulatedStats(int device); + void resetPeakStats(int device); + void *raw_alloc(size_t nbytes); + void *raw_alloc_with_stream(size_t nbytes, aclrtStream stream); + void raw_delete(void *ptr); + void FreeDeviceCachedMemory(int device); + std::string name(); +}; diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/SwapBindings.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/SwapBindings.cpp new file mode 100644 index 000000000..10cc2d268 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/SwapBindings.cpp @@ -0,0 +1,237 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include + +#include "PluggableAllocator.h" +#include "NPUSwapManager.h" + +extern "C" { +void *gmlake_malloc(size_t size, int device, aclrtStream stream) +{ + void *ptr = PluggableAllocator::getInstance().malloc(device, size, stream); + return ptr; +} + +void gmlake_free(void *ptr, size_t size, int device, aclrtStream stream) +{ + PluggableAllocator::getInstance().free(ptr); +} + +void gmlake_init(int device_count) +{ + PluggableAllocator::getInstance().init(device_count); +} + +void gmlake_empty_cache(bool check_error) +{ + PluggableAllocator::getInstance().emptyCache(true); +} + +void gmlake_memory_fraction(double fraction, int device) +{ + PluggableAllocator::getInstance().setMemoryFraction(fraction, device); +} + +DeviceStats gmlake_get_device_stats(int device) +{ + return PluggableAllocator::getInstance().getDeviceStats(device); +} + +void gmlake_reset_peak_stats(int device) +{ + return PluggableAllocator::getInstance().resetPeakStats(device); +} + +void gmlake_record_stream(void *ptr, c10_npu::NPUStream stream) +{ + PluggableAllocator::getInstance().recordStream(ptr, stream); +} + +void gmlake_erase_stream(void *ptr, c10_npu::NPUStream stream) +{ + PluggableAllocator::getInstance().eraseStream(ptr, stream); +} +} + +py::list small_vector_to_list(const c10::SmallVector &sizes) +{ + py::list result; + for (const auto &value : sizes) { + result.append(value); + } + return result; +} + +py::list getProfilerOpInfoData() +{ + py::list opList; + for (auto &opInfo : c10_npu::swap::NPUSwapManager::GetInstance().getSwapProfiler()->getProfilerOpInfoVec()) { + py::dict opDict; + opDict["opName"] = opInfo.getOpName(); + opDict["opId"] = opInfo.getOpId(); + opDict["stage"] = opInfo.getStage(); + opDict["step"] = opInfo.getStep(); + opDict["allocated_bytes"] = opInfo.getSwapMemory().allocated_bytes; + opDict["reserved_bytes"] = opInfo.getSwapMemory().reserved_bytes; + opDict["active_bytes"] = opInfo.getSwapMemory().active_bytes; + py::list tensorList; + for (auto &tensorInfo : opInfo.getProfilerTensorInfo()) { + py::dict tensorDict; + tensorDict["ptr"] = tensorInfo.getPtr(); + tensorDict["size"] = tensorInfo.getNbytes(); + tensorDict["shape"] = small_vector_to_list(tensorInfo.getShapeV2()); + tensorDict["dtype"] = c10::toString(tensorInfo.getDtype()); + tensorDict["tensorType"] = tensorInfo.getTensorType(); + tensorList.append(tensorDict); + } + opDict["tensor"] = tensorList; + opList.append(opDict); + } + return opList; +} + +py::list getProfilerSwapInfoData() +{ + py::list opList; + for (auto &opInfo : c10_npu::swap::NPUSwapManager::GetInstance().getSwapProfiler()->getProfilerSwapInfoVec()) { + py::dict opDict; + opDict["opId"] = opInfo.getOpId(); + opDict["swapName"] = opInfo.getSwapName(); + opDict["size"] = opInfo.getSize(); + opDict["isOOM"] = opInfo.getIsOOM(); + opDict["srcPtr"] = opInfo.getSrcPtr(); + opDict["dstPtr"] = opInfo.getDstPtr(); + opList.append(opDict); + } + return opList; +} + +void setPolicyInfoData(std::vector &policyInfoVec) +{ + c10_npu::swap::NPUSwapManager::GetInstance().FunAfterProfiler(policyInfoVec); +} + +void setFrequentOpNameData(std::vector &frequentOpNames) +{ + c10_npu::swap::NPUSwapManager::GetInstance().initOpNameToOneHotAndIndexMap(frequentOpNames); +} + +void updateStep() +{ + c10_npu::swap::NPUSwapManager::GetInstance().updateStep(); +} + +void updateProfiler() +{ + c10_npu::swap::NPUSwapManager::GetInstance().getSwapProfiler()->updateStep(); +} + +std::vector recordTensorPtrWithTypes(const std::vector &tensors, + c10_npu::swap::SwapTensorType tensorType, int updateWeakPtrMap, bool isUpdateBlacklist) +{ + auto uniquePtrs = c10_npu::swap::NPUSwapManager::GetInstance().recordTensorPtrWithTypes(tensors, tensorType, + updateWeakPtrMap, isUpdateBlacklist); + return uniquePtrs; +} + +void InitCppManager() +{ + c10_npu::swap::NPUSwapManager::GetInstance().Init(); +} + +void DeInitCppManager() +{ + c10_npu::swap::NPUSwapManager::GetInstance().DeInit(); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) +{ + py::enum_(m, "SwapTensorType") + .value("MODEL", c10_npu::swap::SwapTensorType::MODEL) + .value("OPTIM", c10_npu::swap::SwapTensorType::OPTIM) + .value("SHARED_MEMORY", c10_npu::swap::SwapTensorType::SHARED_MEMORY) + .value("OTHERS", c10_npu::swap::SwapTensorType::OTHERS) + .value("RESERVED", c10_npu::swap::SwapTensorType::RESERVED); + + py::enum_(m, "SwapStageType") + .value("INIT", c10_npu::swap::SwapStageType::INIT) + .value("FWD", c10_npu::swap::SwapStageType::FWD) + .value("BWD", c10_npu::swap::SwapStageType::BWD) + .value("OPTIM", c10_npu::swap::SwapStageType::OPTIM) + .value("RESERVED", c10_npu::swap::SwapStageType::RESERVED); + + py::class_(m, "UniqueSwapPtr") + .def(py::init<>()) + .def_readwrite("ptrBase", &c10_npu::swap::UniqueSwapPtr::ptrBase) + .def_readwrite("index", &c10_npu::swap::UniqueSwapPtr::index); + + py::class_(m, "UniqueSwapMemory") + .def(py::init<>()) + .def_readwrite("allocated_bytes", &c10_npu::swap::UniqueSwapMemory::allocated_bytes) + .def_readwrite("reserved_bytes", &c10_npu::swap::UniqueSwapMemory::reserved_bytes) + .def_readwrite("active_bytes", &c10_npu::swap::UniqueSwapMemory::active_bytes); + + py::class_(m, "SwapStage") + .def(py::init<>()) + .def_readwrite("stageType", &c10_npu::swap::SwapStage::stageType) + .def_readwrite("microBatchIndex", &c10_npu::swap::SwapStage::microBatchIndex) + .def_readwrite("layerIndex", &c10_npu::swap::SwapStage::layerIndex); + + py::class_(m, "SwapConfig") + .def(py::init<>()) // 测试用,可删除 + .def_readwrite("microBatchNum", &c10_npu::swap::SwapConfig::microBatchNum) + .def_readwrite("layerNum", &c10_npu::swap::SwapConfig::layerNum) + .def_readwrite("isOOM", &c10_npu::swap::SwapConfig::isOOM) + .def_readwrite("stage", &c10_npu::swap::SwapConfig::stage) + .def_readwrite("step", &c10_npu::swap::SwapConfig::step) + .def_readwrite("policyStep", &c10_npu::swap::SwapConfig::policyStep) + .def_readwrite("currentStageOpId", &c10_npu::swap::SwapConfig::currentStageOpId) + .def_readwrite("oneStepDuration", &c10_npu::swap::SwapConfig::oneStepDuration) + .def_readwrite("tensorSizeThresh", &c10_npu::swap::SwapConfig::tensorSizeThresh) + .def_readwrite("fwdOpLayerInfo", &c10_npu::swap::SwapConfig::fwdOpLayerInfo) + .def_readwrite("bwdOpLayerInfo", &c10_npu::swap::SwapConfig::bwdOpLayerInfo) + .def_readwrite("enableProfiler", &c10_npu::swap::SwapConfig::enableProfiler) + .def_readwrite("enableExecutor", &c10_npu::swap::SwapConfig::enableExecutor) + .def_readwrite("enableCustomRecordStream", &c10_npu::swap::SwapConfig::enableCustomRecordStream); + + py::class_(m, "SwapPolicyInfo") + .def(py::init<>()) + .def_readwrite("ptr", &c10_npu::swap::SwapPolicyInfo::ptr) + .def_readwrite("executorNeedMatch", &c10_npu::swap::SwapPolicyInfo::executorNeedMatch) + .def_readwrite("swapOutOpId", &c10_npu::swap::SwapPolicyInfo::swapOutOpId) + .def_readwrite("swapInOpId", &c10_npu::swap::SwapPolicyInfo::swapInOpId) + .def_readwrite("swapOutStage", &c10_npu::swap::SwapPolicyInfo::swapOutStage) + .def_readwrite("swapInStage", &c10_npu::swap::SwapPolicyInfo::swapInStage) + .def_readwrite("freeStage", &c10_npu::swap::SwapPolicyInfo::freeStage) + .def_readwrite("swapInFreeStage", &c10_npu::swap::SwapPolicyInfo::swapInFreeStage); + + py::class_(m, "NPUSwapManager") + .def_static("GetInstance", &c10_npu::swap::NPUSwapManager::GetInstance, py::return_value_policy::reference) + .def_readwrite("config", &c10_npu::swap::NPUSwapManager::config) + .def_readwrite("swap_enable", &c10_npu::swap::NPUSwapManager::swap_enable) + .def_readwrite("swap_oom_enable", &c10_npu::swap::NPUSwapManager::swap_oom_enable); + + m.def("getProfilerOpInfoData", &getProfilerOpInfoData); + m.def("getProfilerSwapInfoData", &getProfilerSwapInfoData); + m.def("setPolicyInfoData", &setPolicyInfoData); + m.def("setFrequentOpNameData", &setFrequentOpNameData); + m.def("updateStep", &updateStep); + m.def("updateProfiler", &updateProfiler); + m.def("recordTensorPtrWithTypes", &recordTensorPtrWithTypes, "record tensor type and tensor unique ptr"); + m.def("init_cpp_manager", &InitCppManager, "init cpp manager"); + m.def("deinit_cpp_manager", &DeInitCppManager, "deinit cpp manager"); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/SwapException.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/SwapException.h new file mode 100644 index 000000000..a877e25b8 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/SwapException.h @@ -0,0 +1,39 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include + + +inline const char *getSwapErrorFunction(const char *msg) +{ + return msg; +} + +// If there is just 1 provided C-string argument, use it. +inline const char *getSwapErrorFunction(const char * /* msg */, const char *args) +{ + return args; +} + +#define SWAP_CHECK_ERROR(err_code, ...) \ + do { \ + auto Error = err_code; \ + if ((Error) != ACL_ERROR_NONE) { \ + TORCH_CHECK(false, __func__, ":", __FILE__, ":", __LINE__, \ + " SWAP NPU function error: ", getSwapErrorFunction(#err_code, ##__VA_ARGS__), ", error code is ", \ + Error) \ + } \ + } while (0) diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/common.cpp b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/common.cpp new file mode 100644 index 000000000..1e8bc3dba --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/common.cpp @@ -0,0 +1,54 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// Copyright 2022 The GLake Authors. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "common.h" + +void update_stat(Stat &stat, int64_t amount) +{ + stat.current += amount; + stat.peak = std::max(stat.current, stat.peak); + if (amount > 0) { + stat.allocated += amount; + } + if (amount < 0) { + stat.freed += -amount; + } +} + +void reset_accumulated_stat(Stat &stat) +{ + stat.allocated = 0; + stat.freed = 0; +} + +void reset_peak_stat(Stat &stat) +{ + stat.peak = stat.current; +} + +void update_stat_array(StatArray &stat_array, int64_t amount, const StatTypes &stat_types) +{ + for_each_selected_stat_type(stat_types, + [&stat_array, amount](size_t stat_type) { update_stat(stat_array[stat_type], amount); }); +} + +bool BlockComparator(const Block *a, const Block *b) +{ + if (a->stream != b->stream) { + return reinterpret_cast(a->stream) < reinterpret_cast(b->stream); + } + if (a->size != b->size) { + return a->size < b->size; + } + return reinterpret_cast(a->ptr) < reinterpret_cast(b->ptr); +} diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/common.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/common.h new file mode 100644 index 000000000..3f13c2242 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/common.h @@ -0,0 +1,262 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// Copyright 2022 The GLake Authors. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "NPUVmmApi.h" + +using c10_npu::NPUCachingAllocator::DeviceStats; +using c10_npu::NPUCachingAllocator::RecordContext; +using c10_npu::NPUCachingAllocator::SegmentInfo; +using c10_npu::NPUCachingAllocator::Stat; +using c10_npu::NPUCachingAllocator::StatArray; +using c10_npu::NPUCachingAllocator::StatType; +using c10_npu::NPUCachingAllocator::TraceEntry; +// using c10_npu::NPUCachingAllocator::History; +using OutOfMemoryObserver = + std::function; + +struct History { + void *addr; + size_t real_size; // unrounded, actually requested size + std::shared_ptr context; // per-watcher context +}; + +struct BlockInfo { + int64_t size = 0; + int64_t requested_size = 0; + int32_t gc_counter = 0; + bool allocated = false; + bool active = false; + std::shared_ptr context_when_allocated; + std::vector history; +}; + +using stream_set = ska::flat_hash_set; + +using CreateContextFn = std::shared_ptr (*)(void); + +constexpr size_t kMinBlockSize = 512; // all sizes are rounded to at least 512 bytes +constexpr size_t kSmallSize = 1048576; // largest "small" allocation is 1 MiB +constexpr size_t kSmallBuffer = 2097152; // "small" allocations are packed in 2 MiB blocks +constexpr size_t kLargeBuffer = 20971520; // "large" allocations may be packed in 20 MiB blocks +constexpr size_t kMinLargeAlloc = 10485760; // allocations between 1 and 10 MiB may use kLargeBuffer +constexpr size_t kRoundLarge = 2097152; // round up large allocs to 2 MiB +constexpr size_t kGranularity = 2097152; + +using StatTypes = std::array(StatType::NUM_TYPES)>; + +void update_stat(Stat &stat, int64_t amount); + +void reset_accumulated_stat(Stat &stat); + +void reset_peak_stat(Stat &stat); + +template void for_each_selected_stat_type(const StatTypes &stat_types, Func f) +{ + for (const auto stat_type : c10::irange(stat_types.size())) { + if (stat_types[stat_type]) { + f(stat_type); + } + } +} + +void update_stat_array(StatArray &stat_array, int64_t amount, const StatTypes &stat_types); + +struct Block; +using Comparison = bool (*)(const Block *, const Block *); + +struct BlockPool { + BlockPool(Comparison comparator, bool small) : blocks(comparator), is_small(small) {} + std::set blocks; + std::unordered_set hash; + const bool is_small; +}; + +struct HistoryChain { + History h; + std::unique_ptr next; // when blocks are merged we keep records + // of what used to be in the block +}; + +struct Block { + int device; // gpu + aclrtStream stream; // allocation stream + stream_set stream_uses; // streams on which the block was used + size_t size; // block size in bytes + size_t requested_size; // memory originally requested + size_t actual_size; + BlockPool *pool{ nullptr }; // owning memory pool + void *ptr{ nullptr }; // memory address + bool allocated{ false }; // in-use flag + Block *prev{ nullptr }; // prev block if split from a larger allocation + Block *next{ nullptr }; // next block if split from a larger allocation + int event_count{ 0 }; // number of outstanding CUDA events + int gc_count{ 0 }; // counter for prioritizing older / less useful blocks for + // garbage collection + std::unique_ptr history{ nullptr }; + HistoryChain *history_last{ nullptr }; + std::shared_ptr vmm_segment; + size_t ptr_hash; + // std::shared_ptr self_last_event; + + Block(int device, aclrtStream stream, size_t size, BlockPool *pool, void *ptr) + : device(device), + stream(stream), + stream_uses(), + size(size), + actual_size(0), + requested_size(0), + pool(pool), + // self_last_event(std::make_shared(stream)), + ptr(ptr) + { + ptr_hash = reinterpret_cast(ptr); + } + + // constructor for search key + Block(int device, aclrtStream stream, size_t size) + : device(device), + stream(stream), + stream_uses(), + size(size), + actual_size(0), + // self_last_event(std::make_shared(stream)), + requested_size(0) + { + ptr_hash = 0; + } + + bool is_split() const + { + return (prev != nullptr) || (next != nullptr); + } + + void splice(Block *before, Block *after) + { + if (before) { + before->next = this; + } + prev = before; + if (after) { + after->prev = this; + } + next = after; + } +}; + +struct BlockHash { + size_t operator () (const Block *b) const + { + return b->ptr_hash; + } +}; + +bool BlockComparator(const Block *a, const Block *b); + +using EventOrderedBlockSet = std::unordered_set; +using SetIterator = EventOrderedBlockSet::iterator; + +struct BlockEventOrderPool { + BlockEventOrderPool() : pool_size(0) {} + + void insert(Block *block) + { + if (blocks.count(block) == 0) { + blocks.insert(block); + pool_size += block->size; + } + } + + bool erase(Block *block) + { + if (blocks.count(block)) { + blocks.erase(block); + pool_size -= block->size; + + return true; + } else { + return false; + } + } + + SetIterator erase(SetIterator it) + { + if (blocks.count(*it)) { + pool_size -= (*it)->size; + + return blocks.erase(it); + } else { + return blocks.end(); + } + } + + EventOrderedBlockSet blocks; + size_t pool_size; +}; + +inline std::string format_size(uint64_t size) +{ + std::ostringstream os; + os.precision(2); + os << std::fixed; + if (size <= 1024) { + os << size << " bytes"; + } else if (size <= 1048576) { + os << (size / 1024.0); + os << " KiB"; + } else if (size <= 1073741824ULL) { + os << (size / 1048576.0); + os << " MiB"; + } else { + os << (size / 1073741824.0); + os << " GiB"; + } + return os.str(); +} + +struct AllocParams { + AllocParams(int device, size_t size, aclrtStream stream, BlockPool *pool, size_t alloc_size, DeviceStats &stats) + : search_key(device, stream, size), pool(pool), alloc_size(alloc_size), block(nullptr), err(ACL_ERROR_NONE) + {} + + int device() const + { + return search_key.device; + } + aclrtStream stream() const + { + return search_key.stream; + } + size_t size() const + { + return search_key.size; + } + + Block search_key; + BlockPool *pool; + size_t alloc_size; + Block *block; + StatTypes stat_types = { false }; + aclError err; +}; diff --git a/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/swap_log.h b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/swap_log.h new file mode 100644 index 000000000..5f9b0d960 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/csrc/pluggable_allocator/smart_swap/swap_log.h @@ -0,0 +1,123 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include + +enum class SwapLogLevel { + SWAP_DEBUG = 0, + SWAP_INFO = 1, + SWAP_WARN = 2, + SWAP_ERROR = 3, + SWAP_NONE = 4, +}; + +class SwapLogApi { +public: + static bool IsLogEnable(SwapLogLevel logLevel) + { + static bool INIT_SWAP_LOG_LEVEL = false; + static int GLOBAL_SWAP_LOG_LEVEL = 0; + if (!INIT_SWAP_LOG_LEVEL) { + const char *levelStr = std::getenv("SWAP_LOG_LEVEL"); + int curLevel = static_cast(SwapLogLevel::SWAP_ERROR); + if (levelStr != nullptr) { + int level = std::atoi(levelStr); + if (level >= static_cast(SwapLogLevel::SWAP_DEBUG) && + level <= static_cast(SwapLogLevel::SWAP_NONE)) { + curLevel = level; + } + } + + GLOBAL_SWAP_LOG_LEVEL = curLevel; + INIT_SWAP_LOG_LEVEL = true; + } + return (GLOBAL_SWAP_LOG_LEVEL <= static_cast(logLevel)); + } + + static int GetLogRank() + { + const char *envStr = std::getenv("RANK"); + int64_t envRank = (envStr != nullptr) ? strtol(envStr, nullptr, 10) : -1; + return static_cast(envRank); + } + + static bool IsLogRankEnable(int rank) + { + static bool INIT_SWAP_LOG_RANK = false; + static int GLOBAL_SWAP_LOG_RANK = -1; + if (!INIT_SWAP_LOG_RANK) { + const char *envStr = std::getenv("SWAP_LOG_RANK"); + int64_t envRank = (envStr != nullptr) ? strtol(envStr, nullptr, 10) : -1; + int curRank = static_cast(envRank); + if (curRank >= -1 && curRank < 8) { + GLOBAL_SWAP_LOG_RANK = curRank; + } + INIT_SWAP_LOG_RANK = true; + } + if (GLOBAL_SWAP_LOG_RANK == -1 || rank == -1 || GLOBAL_SWAP_LOG_RANK == rank) { + return true; + } + return false; + } +}; + +#define SWAP_LOG_DEBUG(fmt, ...) \ + do { \ + if (SwapLogApi::IsLogEnable(SwapLogLevel::SWAP_DEBUG)) { \ + const char * const funcName = __FUNCTION__; \ + int rank = SwapLogApi::GetLogRank(); \ + if (SwapLogApi::IsLogRankEnable(rank)) { \ + printf("[SWAP_DEBUG] %s:%d:%s,rank[%d]: " #fmt "\n", __FILENAME__, __LINE__, \ + static_cast(funcName), rank, ##__VA_ARGS__); \ + } \ + } \ + } while (false) + +#define SWAP_LOG_INFO(fmt, ...) \ + do { \ + if (SwapLogApi::IsLogEnable(SwapLogLevel::SWAP_INFO)) { \ + const char * const funcName = __FUNCTION__; \ + int rank = SwapLogApi::GetLogRank(); \ + if (SwapLogApi::IsLogRankEnable(rank)) { \ + printf("[SWAP_INFO] %s:%d:%s,rank[%d]: " #fmt "\n", __FILENAME__, __LINE__, \ + static_cast(funcName), rank, ##__VA_ARGS__); \ + } \ + } \ + } while (false) + +#define SWAP_LOG_WARN(fmt, ...) \ + do { \ + if (SwapLogApi::IsLogEnable(SwapLogLevel::SWAP_WARN)) { \ + const char * const funcName = __FUNCTION__; \ + int rank = SwapLogApi::GetLogRank(); \ + if (SwapLogApi::IsLogRankEnable(rank)) { \ + printf("[SWAP_WARN] %s:%d:%s,rank[%d]: " #fmt "\n", __FILENAME__, __LINE__, \ + static_cast(funcName), rank, ##__VA_ARGS__); \ + } \ + } \ + } while (false) + +#define SWAP_LOG_ERROR(fmt, ...) \ + do { \ + if (SwapLogApi::IsLogEnable(SwapLogLevel::SWAP_ERROR)) { \ + const char * const funcName = __FUNCTION__; \ + int rank = SwapLogApi::GetLogRank(); \ + if (SwapLogApi::IsLogRankEnable(rank)) { \ + printf("[SWAP_ERROR] %s:%d:%s,rank[%d]: " #fmt "\n", __FILENAME__, __LINE__, \ + static_cast(funcName), rank, ##__VA_ARGS__); \ + } \ + } \ + } while (false) diff --git a/model/train/yoco_moe/mindspeed/ops/dropout_add_layer_norm.py b/model/train/yoco_moe/mindspeed/ops/dropout_add_layer_norm.py new file mode 100644 index 000000000..f0d0de879 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/dropout_add_layer_norm.py @@ -0,0 +1,49 @@ +import torch +import torch_npu +from mindspeed.op_builder import NpuDropoutAddLayerNormOpBuilder + +__all__ = ["npu_dropout_add_layer_norm"] + + +def npu_dropout_add_layer_norm(x0, + residual, + weight, + bias, + dropout_p, + epsilon, + rowscale=None, + layerscale=None, + prenorm=False, + residual_in_fp32=False, + return_dropout_mask=False): + mindspeed_ops = NpuDropoutAddLayerNormOpBuilder().load() + is_rms_norm = False + out, pre_norm, dmask = mindspeed_ops.npu_dropout_add_layer_norm( + x0, + weight, + residual, + bias, + rowscale, + layerscale, + dropout_p, + epsilon, + prenorm, + residual_in_fp32, + is_rms_norm, + return_dropout_mask + ) + if not return_dropout_mask: + return ( + out.view(x0.shape) if not prenorm else (out.view(x0.shape), pre_norm.view(x0.shape)) + ) + else: + dmask = ( + dmask.view(x0.shape) + if dropout_p > 0.0 + else torch.ones(x0.shape, dtype=torch.uint8, device=x0.device) + ) + return ( + (out.view(x0.shape), dmask) + if not prenorm + else (out.view(x0.shape), pre_norm.view(x0.shape), dmask) + ) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/dropout_add_rms_norm.py b/model/train/yoco_moe/mindspeed/ops/dropout_add_rms_norm.py new file mode 100644 index 000000000..b7eebbced --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/dropout_add_rms_norm.py @@ -0,0 +1,47 @@ +import torch +import torch_npu +from mindspeed.op_builder import NpuDropoutAddLayerNormOpBuilder + + +def npu_dropout_add_rms_norm(x0, + residual, + weight, + bias, + dropout_p, + epsilon, + rowscale=None, + layerscale=None, + prenorm=False, + residual_in_fp32=False, + return_dropout_mask=False): + mindspeed_ops = NpuDropoutAddLayerNormOpBuilder().load() + is_rms_norm = True + out, pre_norm, dmask = mindspeed_ops.npu_dropout_add_layer_norm( + x0, + weight, + residual, + bias, + rowscale, + layerscale, + dropout_p, + epsilon, + prenorm, + residual_in_fp32, + is_rms_norm, + return_dropout_mask + ) + if not return_dropout_mask: + return ( + out.view(x0.shape) if not prenorm else (out.view(x0.shape), pre_norm.view(x0.shape)) + ) + else: + dmask = ( + dmask.view(x0.shape) + if dropout_p > 0.0 + else torch.ones(x0.shape, dtype=torch.uint8, device=x0.device) + ) + return ( + (out.view(x0.shape), dmask) + if not prenorm + else (out.view(x0.shape), pre_norm.view(x0.shape), dmask) + ) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/ffn.py b/model/train/yoco_moe/mindspeed/ops/ffn.py new file mode 100644 index 000000000..61035f26c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/ffn.py @@ -0,0 +1,69 @@ +# Copyright (c) 2024, Huawei Technologies. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch_npu +from torch.library import impl +from mindspeed.op_builder.ffn_builder import FFNOpBuilder +from mindspeed.op_builder.builder import AS_LIBRARY + +__all__ = ["npu_ffn"] + + +op_builder = FFNOpBuilder() + + +@impl(AS_LIBRARY, "npu_ffn", "PrivateUse1") +def _npu_ffn(x, + weight1, + weight2, + activation, + *, + expert_tokens=None, + expert_tokens_index=None, + bias1=None, + bias2=None, + scale=None, + offset=None, + deq_scale1=None, + deq_scale2=None, + antiquant_scale1=None, + antiquant_scale2=None, + antiquant_offset1=None, + antiquant_offset2=None, + inner_precise=None, + output_dtype=None): + return op_builder.load().npu_ffn(x, + weight1, + weight2, + activation, + expert_tokens, + expert_tokens_index, + bias1, + bias2, + scale, + offset, + deq_scale1, + deq_scale2, + antiquant_scale1, + antiquant_scale2, + antiquant_offset1, + antiquant_offset2, + inner_precise, + output_dtype) + + +def npu_ffn(*args, **kwargs): + return torch.ops.mindspeed.npu_ffn(*args, **kwargs) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/fusion_attention_v2.py b/model/train/yoco_moe/mindspeed/ops/fusion_attention_v2.py new file mode 100644 index 000000000..24556f216 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/fusion_attention_v2.py @@ -0,0 +1,114 @@ +# Copyright (c) 2024, Huawei Technologies. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from mindspeed.op_builder import FusionAttentionV2OpBuilder + +__all__ = ["npu_fusion_attention"] + + +class FusionAttentionV2Function(torch.autograd.Function): + + @staticmethod + def forward(ctx, query, key, value, head_num, input_layout, pse, padding_mask, atten_mask, scale, keep_prob, + pre_tokens, next_tokens, inner_precise, prefix, actual_seq_qlen, actual_seq_kvlen, sparse_mode, + gen_mask_parallel, sync, pse_type, q_start_idx, kv_start_idx): + mindspeed_ops = FusionAttentionV2OpBuilder().load() + outputs = mindspeed_ops.npu_fusion_attention_v2(query, key, value, head_num, + input_layout, pse, + padding_mask, atten_mask, + scale, keep_prob, pre_tokens, + next_tokens, inner_precise, prefix, + actual_seq_qlen, actual_seq_kvlen, + sparse_mode, gen_mask_parallel, + sync, pse_type, q_start_idx, + kv_start_idx) + attention_in, softmax_max, softmax_sum, softmax_in, seed, offset, numels = outputs + ctx.save_for_backward(query, key, value, pse, padding_mask, atten_mask, attention_in, + softmax_max, softmax_sum, softmax_in) + ctx.scale = scale + ctx.input_layout = input_layout + ctx.head_num = head_num + ctx.pre_tokens = pre_tokens + ctx.next_tokens = next_tokens + ctx.inner_precise = inner_precise + ctx.gen_mask_parallel = gen_mask_parallel + ctx.sync = sync + ctx.seed = seed + ctx.offset = offset + ctx.numels = numels + ctx.prefix = prefix + ctx.keep_prob = keep_prob + ctx.actual_seq_qlen = actual_seq_qlen + ctx.actual_seq_kvlen = actual_seq_kvlen + ctx.sparse_mode = sparse_mode + ctx.pse_type = pse_type + ctx.q_start_idx = q_start_idx + ctx.kv_start_idx = kv_start_idx + + return outputs + + @staticmethod + def backward(ctx, grad_outputs, dq=None, dk=None, dv=None, seed=0, offset=0, numels=0): + mindspeed_ops = FusionAttentionV2OpBuilder().load() + query, key, value, pse, padding_mask, atten_mask, attention_in, softmax_max, \ + softmax_sum, softmax_in = ctx.saved_tensors + results = mindspeed_ops.npu_fusion_attention_grad_v2( + query, key, value, grad_outputs, ctx.head_num, ctx.input_layout, pse, padding_mask, atten_mask, + softmax_max, softmax_sum, softmax_in, attention_in, ctx.scale, ctx.keep_prob, ctx.pre_tokens, + ctx.next_tokens, ctx.inner_precise, ctx.seed, ctx.offset, ctx.numels, ctx.prefix, ctx.actual_seq_qlen, + ctx.actual_seq_kvlen, ctx.sparse_mode, ctx.gen_mask_parallel, ctx.sync, ctx.pse_type, ctx.q_start_idx, + ctx.kv_start_idx) + + return results[0], results[1], results[2], None, None, results[3], None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None + + +def npu_fusion_attention(query, key, value, head_num, + input_layout, *, pse=None, + padding_mask=None, atten_mask=None, + scale=1., keep_prob=1., pre_tokens=2147483647, + next_tokens=2147483647, inner_precise=0, prefix=None, + actual_seq_qlen=None, actual_seq_kvlen=None, + sparse_mode=0, gen_mask_parallel=True, + sync=False, pse_type=1, q_start_idx=None, + kv_start_idx=None): + return FusionAttentionV2Function.apply(query, key, value, head_num, + input_layout, pse, + padding_mask, atten_mask, + scale, keep_prob, pre_tokens, + next_tokens, inner_precise, prefix, + actual_seq_qlen, actual_seq_kvlen, + sparse_mode, gen_mask_parallel, + sync, pse_type, q_start_idx, + kv_start_idx) + + +def npu_fusion_attention_grad(query, key, value, grad_outputs, + head_num, input_layout, *, pse=None, + padding_mask=None, atten_mask=None, + softmax_max=None, softmax_sum=None, softmax_in=None, attention_in=None, + scale=1., keep_prob=1., pre_tokens=2147483647, + next_tokens=2147483647, inner_precise=0, + seed=1234, offset=0, numels=0, prefix=None, + actual_seq_qlen=None, actual_seq_kvlen=None, sparse_mode=0, + gen_mask_parallel=True, sync=False, pse_type=1, q_start_idx=None, + kv_start_idx=None): + mindspeed_ops = FusionAttentionV2OpBuilder().load() + return mindspeed_ops.npu_fusion_attention_grad_v2(query, key, value, grad_outputs, head_num, input_layout, pse, + padding_mask, atten_mask, softmax_max, softmax_sum, softmax_in, + attention_in, scale, keep_prob, pre_tokens, next_tokens, + inner_precise, seed, offset, numels, prefix, actual_seq_qlen, + actual_seq_kvlen, sparse_mode, gen_mask_parallel, sync, + pse_type, q_start_idx, kv_start_idx) diff --git a/model/train/yoco_moe/mindspeed/ops/gmm.py b/model/train/yoco_moe/mindspeed/ops/gmm.py new file mode 100644 index 000000000..b7299bda3 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/gmm.py @@ -0,0 +1,175 @@ +import torch +from torch.library import impl +from mindspeed.op_builder import GMMOpBuilder, GMMV2OpBuilder +from mindspeed.op_builder.builder import AS_LIBRARY +from mindspeed.ops.npu_groupmatmul_add import npu_groupmatmul_add_fp32 + + +__all__ = ["npu_gmm", "npu_gmm_v2"] + + +def check_optional_tensor(tensor, device, name): + if not isinstance(tensor, (torch.Tensor, type(None))): + raise TypeError(f"{name} must be a torch.Tensor or None, got {type(tensor)}.") + if isinstance(tensor, torch.Tensor) and tensor.device != device: + raise RuntimeError( + f"Expected all tensors to be on the same device, but found at least two devices, " + f"{device}(arg0) and {tensor.device}({name})!") + + +class GMMFunction(torch.autograd.Function): + builder = GMMOpBuilder() + builder2 = GMMV2OpBuilder() + + @staticmethod + def forward(ctx, original_weight, x, weight, bias, group_args): + group_list, group_type, gemm_fusion, group_list_type, group_list_data_type = group_args + if bias is not None and bias.requires_grad: + raise ValueError("Bias is not supported to compute gradient!") + if (x.requires_grad or weight.requires_grad) and group_type != 0: + raise ValueError("group_type must be zero to compute gradients of x and weight!") + bias = [] if bias is None else [bias] + if group_list_type == 0: + outputs = GMMFunction.builder.load().npu_gmm([x], [weight], bias, group_list, group_type, group_list_type) + elif group_list_type == 1: + outputs = GMMFunction.builder2.load().npu_gmm([x], [weight], bias, group_list, group_type, group_list_type) + if group_list_data_type == 0: + ctx.save_for_backward(x, weight, original_weight) + ctx.group_list = group_list + else: + ctx.save_for_backward(x, weight, group_list, original_weight) + ctx.gemm_fusion = gemm_fusion + ctx.group_list_type = group_list_type + ctx.group_list_data_type = group_list_data_type + + + return outputs[0] + + @staticmethod + def backward(ctx, grad_outputs): + if ctx.group_list_data_type == 0: + x, weight, original_weight = ctx.saved_tensors + group_list = ctx.group_list + else: + x, weight, group_list, original_weight = ctx.saved_tensors + + if ctx.gemm_fusion: + if ctx.group_list_type == 0: + dx, _, dbias = GMMFunction.builder.load().npu_gmm_backward_fusion([grad_outputs], [weight], group_list, + ctx.group_list_type) + npu_groupmatmul_add_fp32(x, grad_outputs, group_list, original_weight.main_grad) + + elif ctx.group_list_type == 1: + dx, _, dbias = GMMFunction.builder2.load().npu_gmm_backward_fusion([grad_outputs], [weight], group_list, + ctx.group_list_type) + group_list_v2 = torch.cumsum(group_list, dim=0) + npu_groupmatmul_add_fp32(x, grad_outputs, group_list_v2, original_weight.main_grad) + + dbias = None if len(dbias) == 0 else dbias[0] + + if hasattr(original_weight, 'grad_added_to_main_grad'): + if getattr(weight, 'zero_out_wgrad', False): + grad_weight = torch.zeros( + weight.shape, + dtype=x.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + else: + grad_weight = torch.empty( + weight.shape, + dtype=x.dtype, + device=torch.cuda.current_device(), + requires_grad=False, + ) + original_weight.grad_added_to_main_grad = True + else: + grad_weight = None + + return None, dx[0], grad_weight, dbias, None + else: + if ctx.group_list_type == 0: + dx, dw, dbias = GMMFunction.builder.load().npu_gmm_backward([grad_outputs], [x], [weight], group_list, + ctx.group_list_type) + elif ctx.group_list_type == 1: + dx, dw, dbias = GMMFunction.builder2.load().npu_gmm_backward([grad_outputs], [x], [weight], group_list, + ctx.group_list_type) + dbias = None if len(dbias) == 0 else dbias[0] + + return None, dx[0], dw[0], dbias, None + + + +def npu_gmm_param_verification(x, weight, *, bias=None, group_list=None, group_type=0, group_list_type=0): + if not isinstance(x, torch.Tensor): + raise TypeError(f"arg0 must be a torch.Tensor, got {type(x)}.") + if not isinstance(weight, torch.Tensor): + raise TypeError(f"arg1 must be a torch.Tensor, got {type(weight)}.") + if not isinstance(bias, (torch.Tensor, type(None))): + raise TypeError(f"bias must be a torch.Tensor or None, got {type(bias)}.") + if (group_list_type == 0): + if not ( + isinstance(group_list, (torch.Tensor, type(None))) + or (isinstance(group_list, list) and all(isinstance(x, int) for x in group_list)) + ): + raise TypeError(f"group_list must be a List of int64, torch.Tensor or None, got {type(group_list)}.") + else: + if not (isinstance(group_list, (torch.Tensor, type(None)))): + raise TypeError(f"group_list must be a torch.Tensor or None, got {type(group_list)}.") + if isinstance(group_list, torch.Tensor): + if len(group_list.shape) > 1: + raise ValueError(f"If group_list is not None, it must be an one-dimensional tensor, " + f"got dimension of group_list: {len(group_list.shape)}!") + if group_list.dtype != torch.int64: + raise TypeError(f"group_list must be a List of int64, got group_list type: {type(group_list)}, " + f"dtype: {group_list.dtype}!") + if not isinstance(group_type, (int, type(None))): + raise TypeError(f"group_type must be an int or None, got {type(group_type)}.") + # Ensure all tensors on the same device + x_device = x.device + device_warning = "Expected all tensors to be on the same device, but found at least two devices" + if weight.device != x_device: + raise RuntimeError(f"{device_warning}, {x_device}(arg0) and {weight.device}(arg1)!") + if bias is not None and bias.device != x_device: + raise RuntimeError(f"{device_warning}, {x_device}(arg0) and {bias.device}(bias)!") + if isinstance(group_list, torch.Tensor) and group_list.device != x_device: + raise RuntimeError(f"{device_warning}, {x_device}(arg0) and {group_list.device}(group_list)!") + + +def _npu_gmm_common(original_weight, x, weight, *, bias=None, group_list=None, group_type=0, group_list_type=0, gemm_fusion=False): + support_dtype = [torch.float16, torch.bfloat16, torch.float32] + if weight.dtype not in support_dtype: + raise TypeError(f"Only support non quant case, but got weight dtype {weight.dtype}.") + npu_gmm_param_verification(x, weight, bias=bias, group_list=group_list, group_type=group_type, + group_list_type=group_list_type) + if group_list_type == 0: + return torch.ops.mindspeed.npu_gmm(original_weight, x, weight, bias=bias, group_list=group_list, group_type=group_type, gemm_fusion=gemm_fusion) + elif group_list_type == 1: + return torch.ops.mindspeed.npu_gmm_v2(original_weight, x, weight, bias=bias, group_list=group_list, group_type=group_type, gemm_fusion=gemm_fusion) + else: + raise ValueError(f"group_list_type must be 0 or 1, but got {group_list_type}.") + + +@impl(AS_LIBRARY, "npu_gmm.List", "PrivateUse1") +@impl(AS_LIBRARY, "npu_gmm.Tensor", "PrivateUse1") +def _npu_gmm(original_weight, x, weight, *, bias=None, group_list=None, group_type=0, gemm_fusion=False): + if isinstance(group_list, (torch.Tensor, type(None))): + group_list_data_type = 1 + else: + group_list_data_type = 0 + group_args = (group_list, group_type, gemm_fusion, 0, group_list_data_type) + return GMMFunction.apply(original_weight, x, weight, bias, group_args) + + +def npu_gmm(x, weight, *, bias=None, group_list=None, group_type=0, gemm_fusion=False, original_weight=None): + return _npu_gmm_common(original_weight, x, weight, bias=bias, group_list=group_list, group_type=group_type, group_list_type=0, gemm_fusion=gemm_fusion) + + +@impl(AS_LIBRARY, "npu_gmm_v2.Tensor", "PrivateUse1") +def _npu_gmm_v2(original_weight, x, weight, *, bias=None, group_list=None, group_type=0, gemm_fusion=False): + group_args = (group_list, group_type, gemm_fusion, 1, 1) + return GMMFunction.apply(original_weight, x, weight, bias, group_args) + + +def npu_gmm_v2(x, weight, *, bias=None, group_list=None, group_type=0, gemm_fusion=False, original_weight=None): + return _npu_gmm_common(original_weight, x, weight, bias=bias, group_list=group_list, group_type=group_type, group_list_type=1, gemm_fusion=gemm_fusion) diff --git a/model/train/yoco_moe/mindspeed/ops/lcal_functional.py b/model/train/yoco_moe/mindspeed/ops/lcal_functional.py new file mode 100644 index 000000000..6c2d2e699 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/lcal_functional.py @@ -0,0 +1,84 @@ +import torch + +from megatron.core import mpu + +from mindspeed.op_builder import LcalOpBuilder + + +class CoCConfig: + def __init__(self, rank_id, rank_size, comm_domain): + self.rank_id = rank_id + self.rank_size = rank_size + self.comm_domain = comm_domain + + +class TP2DConfig: + def __init__(self, ag_dim, rs_dim, inner_dim_is_ag): + self.ag_dim = ag_dim + self.rs_dim = rs_dim + self.inner_dim_is_ag = inner_dim_is_ag + + +class CoCOperations: + mindspeed_ops = LcalOpBuilder().load() + + def __init__(self): + self.comm_config = None + + def set_comm_config(self, config): + self.comm_config = config + + def matmul_all_reduce(self, input1, input2, output, bias=None): + device = input1.device.index + tp_size = mpu.get_tensor_model_parallel_world_size() + comm_domain = str(device // tp_size) + rank = device % tp_size + CoCOperations.mindspeed_ops.matmul_all_reduce(input1, input2, bias, output, rank, tp_size, comm_domain) + return output + + def all_gather_matmul(self, input1, input2, output, bias=None): + device = input1.device.index + tp_size = mpu.get_tensor_model_parallel_world_size() + comm_domain = str(device // tp_size) + rank = device % tp_size + CoCOperations.mindspeed_ops.all_gather_matmul(input1, input2, bias, output, rank, tp_size, comm_domain) + return output + + def all_gather_matmul_v2(self, input1, input2, output, comm_output, bias=None): + device = input1.device.index + if self.comm_config is None: + tp_size = mpu.get_tensor_model_parallel_world_size() + comm_domain = str(device // tp_size) + rank = device % tp_size + else: + tp_size = self.comm_config.rank_size + comm_domain = self.comm_config.comm_domain + rank = self.comm_config.rank_id + CoCOperations.mindspeed_ops.all_gather_matmul_v2(input1, input2, bias, output, comm_output, rank, tp_size, comm_domain) + return output, comm_output + + def matmul_reduce_scatter(self, input1, input2, output, bias=None): + device = input1.device.index + tp_size = mpu.get_tensor_model_parallel_world_size() + comm_domain = str(device // tp_size) + rank = device % tp_size + CoCOperations.mindspeed_ops.matmul_reduce_scatter(input1, input2, bias, output, rank, tp_size, comm_domain) + return output + + def pure_matmul(self, input1, input2, output, bias=None): + device = input1.device.index + tp_size = mpu.get_tensor_model_parallel_world_size() + comm_domain = str(device // tp_size) + rank = device % tp_size + CoCOperations.mindspeed_ops.pure_matmul(input1, input2, bias, output, rank, tp_size, comm_domain) + return output + + def all_gather_matmul_reduce_scatter(self, input1, input2, output, tp2d_config, bias=None): + from megatron.core.parallel_state import get_tensor_model_parallel_rank + tp_size = tp2d_config.ag_dim * tp2d_config.rs_dim + rank = get_tensor_model_parallel_rank() + comm_domain = "1" + CoCOperations.mindspeed_ops.all_gather_matmul_reduce_scatter(input1, input2, bias, output, rank, tp_size, comm_domain, tp2d_config.ag_dim, tp2d_config.rs_dim, tp2d_config.inner_dim_is_ag) + return output + +coc_ops = CoCOperations() diff --git a/model/train/yoco_moe/mindspeed/ops/npu_all_to_all_all_gather_bmm.py b/model/train/yoco_moe/mindspeed/ops/npu_all_to_all_all_gather_bmm.py new file mode 100644 index 000000000..10655c6c8 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/npu_all_to_all_all_gather_bmm.py @@ -0,0 +1,58 @@ +from typing import Optional, List, Dict +import torch +import torch_npu +from torch.library import impl +from mindspeed.op_builder import AllToAllAllGatherBatchMatMulOpBuilder +from mindspeed.op_builder.builder import AS_LIBRARY + +__all__ = ["npu_alltoall_allgather_bmm"] + + +mindspeed_ops_builder = AllToAllAllGatherBatchMatMulOpBuilder() +SUPPORTED_ACT_TYPE = ["None", "GeLu", "SiLu", "ReLu", "FastGeLu"] +ACT_TYPE_DICT = {"none": 0, + "gelu": 1, + "silu": 2, + "relu": 3, + "fastgelu": 4 + } + + +@impl(AS_LIBRARY, "npu_alltoall_allgather_bmm", "PrivateUse1") +def npu_alltoall_allgather_bmm_single(x, + weight, + group_ep, + group_ep_worldsize, + group_tp, + group_tp_worldsize, + *, + bias=None, + shard_type=0, + act_type=0, + need_allgather_out=False, + need_activation_feature=False): + mindspeed_ops = mindspeed_ops_builder.load() + outputs = mindspeed_ops.npu_alltoall_allgather_bmm(x, weight, bias, + group_ep, group_ep_worldsize, + group_tp, group_tp_worldsize, + shard_type, act_type, + need_allgather_out, + need_activation_feature) + return outputs + + +def convert_act_type(act_type): + if not isinstance(act_type, str): + raise AssertionError(f'act_type should be str type, but got type {type(act_type)}') + act_type_lower = act_type.lower() + if act_type_lower in ACT_TYPE_DICT: + return ACT_TYPE_DICT[act_type_lower] + raise AssertionError(f'Unknown act_type: {act_type}, supported act_type:{SUPPORTED_ACT_TYPE}, case insensitive') + + +def npu_alltoall_allgather_bmm(*args, **kwargs): + if 'act_type' not in kwargs: + kwargs['act_type'] = 0 + else: + kwargs['act_type'] = convert_act_type(kwargs['act_type']) + return torch.ops.mindspeed.npu_alltoall_allgather_bmm(*args, **kwargs) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/npu_apply_fused_ema_adamw.py b/model/train/yoco_moe/mindspeed/ops/npu_apply_fused_ema_adamw.py new file mode 100644 index 000000000..eda1fe9df --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/npu_apply_fused_ema_adamw.py @@ -0,0 +1,37 @@ +import torch +from mindspeed.op_builder import FusedEmaAdamWOpBuilder + +__all__ = ["npu_apply_fused_ema_adamw"] + +fused_ema_adamw_op_builder = FusedEmaAdamWOpBuilder() + + +def npu_apply_fused_ema_adamw(grad: torch.Tensor, + var: torch.Tensor, + m: torch.Tensor, + v: torch.Tensor, + s: torch.Tensor, + step: torch.Tensor, + lr: float = 1e-3, + ema_decay: float = 0.9999, + beta1: float = 0.9, + beta2: float = 0.999, + eps: float = 1e-8, + mode: int = 1, + bias_correction: bool = True, + weight_decay: float = 0.0): + fused_ema_adamw_ops = fused_ema_adamw_op_builder.load() + return fused_ema_adamw_ops.npu_apply_fused_ema_adamw(grad, + var, + m, + v, + s, + step, + lr, + ema_decay, + beta1, + beta2, + eps, + mode, + bias_correction, + weight_decay) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/npu_bmm_reduce_scatter_all_to_all.py b/model/train/yoco_moe/mindspeed/ops/npu_bmm_reduce_scatter_all_to_all.py new file mode 100644 index 000000000..eb14bffb1 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/npu_bmm_reduce_scatter_all_to_all.py @@ -0,0 +1,55 @@ +# Copyright (c) 2024, Huawei Technologies. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch_npu +from torch.library import impl +from mindspeed.op_builder.npu_bmm_reduce_scatter_all_to_all_builder import BatchMatMulReduceScatterAlltoAllOpBuilder +from mindspeed.op_builder.builder import AS_LIBRARY + +__all__ = ["npu_bmm_reducescatter_alltoall"] + + +mindspeed_ops_builder = BatchMatMulReduceScatterAlltoAllOpBuilder() + + +@impl(AS_LIBRARY, "npu_bmm_reducescatter_alltoall", "PrivateUse1") +def npu_bmm_reducescatter_alltoall_single(x, + weight, + group_ep, + group_ep_worldsize, + group_tp, + group_tp_worldsize, + *, + bias=None, + shard_type=0): + if x is None: + raise AssertionError('x must not be None.') + if weight is None: + raise AssertionError('weight must not be None.') + mindspeed_ops = mindspeed_ops_builder.load() + y = mindspeed_ops.npu_bmm_reducescatter_alltoall(x, + weight, + bias, + group_ep, + group_ep_worldsize, + group_tp, + group_tp_worldsize, + shard_type) + return y + + +def npu_bmm_reducescatter_alltoall(*args, **kwargs): + return torch.ops.mindspeed.npu_bmm_reducescatter_alltoall(*args, **kwargs) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/npu_grouped_mat_mul_all_reduce.py b/model/train/yoco_moe/mindspeed/ops/npu_grouped_mat_mul_all_reduce.py new file mode 100644 index 000000000..fbf616095 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/npu_grouped_mat_mul_all_reduce.py @@ -0,0 +1,27 @@ +from typing import Optional, List, Dict +import torch +import torch_npu +from mindspeed.op_builder import GroupedMatMulAllReduceOpBuilder + +__all__ = ["npu_grouped_mat_mul_all_reduce"] + + +op_builder = GroupedMatMulAllReduceOpBuilder() + + +def npu_grouped_mat_mul_all_reduce(x: List[torch.Tensor], + weight: List[torch.Tensor], + hcomm: str, + *, + bias: Optional[List[torch.Tensor]] = None, + group_list: Optional[List[int]] = None, + split_item: Optional[int] = 0, + reduce_op: str = "sum", + comm_turn: int = 0) -> List[torch.Tensor]: + if torch.__version__ < '2.1.0': + if bias is None: + bias = [] + outputs = op_builder.load().npu_grouped_mat_mul_all_reduce( + x, weight, bias, group_list, split_item, + hcomm, reduce_op, comm_turn) + return outputs \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/npu_groupmatmul_add.py b/model/train/yoco_moe/mindspeed/ops/npu_groupmatmul_add.py new file mode 100644 index 000000000..c3f794902 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/npu_groupmatmul_add.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import torch +from mindspeed.op_builder import GroupMatmulAddOpBuilder + +__all__ = ["npu_groupmatmul_add_fp32"] + + +groupmatmul_add_op_builder = GroupMatmulAddOpBuilder() + + +def npu_groupmatmul_add_fp32(x, dy, grouplist, grad): + groupmatmul_add_ops = groupmatmul_add_op_builder.load() + groupmatmul_add_ops.npu_groupmatmul_add_fp32(x, dy, grouplist, grad) diff --git a/model/train/yoco_moe/mindspeed/ops/npu_matmul_add.py b/model/train/yoco_moe/mindspeed/ops/npu_matmul_add.py new file mode 100644 index 000000000..742d5b343 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/npu_matmul_add.py @@ -0,0 +1,39 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import torch +import torch_npu +from mindspeed.op_builder import MatmulAddOpBuilder + +__all__ = ["npu_matmul_add_fp32"] + + +matmul_add_op_builder = MatmulAddOpBuilder() + + +def npu_matmul_add_fp32(total_input, grad_output, grad): + # 检查total_input的shape是否有维度为0 + for dim in total_input.shape: + if dim == 0: + return + + # 检查grad_output的shape是否有维度为0 + for dim in grad_output.shape: + if dim == 0: + return + + matmul_add_ops = matmul_add_op_builder.load() + matmul_add_ops.npu_matmul_add_fp32(grad_output, total_input, grad) + + +def npu_matmul_add_fp16(total_input, grad_output, grad): + # 检查total_input的shape是否有维度为0 + for dim in total_input.shape: + if dim == 0: + return + + # 检查grad_output的shape是否有维度为0 + for dim in grad_output.shape: + if dim == 0: + return + + grad_weight = grad_output.t().matmul(total_input) + grad.add_(grad_weight) diff --git a/model/train/yoco_moe/mindspeed/ops/npu_mm_all_reduce_add_rms_norm.py b/model/train/yoco_moe/mindspeed/ops/npu_mm_all_reduce_add_rms_norm.py new file mode 100644 index 000000000..d68c56369 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/npu_mm_all_reduce_add_rms_norm.py @@ -0,0 +1,67 @@ +# Copyright (c) 2024, Huawei Technologies. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch_npu +from torch.library import impl +from mindspeed.op_builder.npu_mm_all_reduce_add_rms_norm_builder import MatmulAllReduceAddRmsNormOpBuilder +from mindspeed.op_builder.builder import AS_LIBRARY + +__all__ = ["npu_mm_all_reduce_add_rms_norm"] + + +op_builder = MatmulAllReduceAddRmsNormOpBuilder() + + +@impl(AS_LIBRARY, "npu_mm_all_reduce_add_rms_norm", "PrivateUse1") +def npu_mm_all_reduce_add_rms_norm_single(x1, + x2, + residual, + gamma, + hcom, + reduce_op='sum', + epsilon=1e-06, + bias=None, + antiquant_scale=None, + antiquant_offset=None, + dequant_scale=None, + antiquant_group_size=0, + comm_turn=0): + if x1 is None: + raise AssertionError('x1 must not be None.') + if x2 is None: + raise AssertionError('x2 must not be None.') + if residual is None: + raise AssertionError('residual must not be None.') + if gamma is None: + raise AssertionError('gamma must not be None.') + y, normOut = op_builder.load().npu_mm_all_reduce_add_rms_norm(x1, + x2, + residual, + gamma, + hcom, + reduce_op, + epsilon, + bias, + antiquant_scale, + antiquant_offset, + dequant_scale, + antiquant_group_size, + comm_turn) + return (y.view(residual.shape), normOut.view(residual.shape)) + + +def npu_mm_all_reduce_add_rms_norm(*args, **kwargs): + return torch.ops.mindspeed.npu_mm_all_reduce_add_rms_norm(*args, **kwargs) \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/ops/npu_mm_all_reduce_add_rms_norm_.py b/model/train/yoco_moe/mindspeed/ops/npu_mm_all_reduce_add_rms_norm_.py new file mode 100644 index 000000000..9d1e9b240 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/npu_mm_all_reduce_add_rms_norm_.py @@ -0,0 +1,67 @@ +# Copyright (c) 2024, Huawei Technologies. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch_npu +from torch.library import impl +from mindspeed.op_builder.npu_inplace_mm_all_reduce_add_rms_norm_builder import InplaceMatmulAllReduceAddRmsNormOpBuilder +from mindspeed.op_builder.builder import AS_LIBRARY + +__all__ = ["npu_mm_all_reduce_add_rms_norm_"] + + +op_builder = InplaceMatmulAllReduceAddRmsNormOpBuilder() + + +@impl(AS_LIBRARY, "npu_mm_all_reduce_add_rms_norm_", "PrivateUse1") +def npu_mm_all_reduce_add_rms_norm_single_(x1, + x2, + residual, + gamma, + hcom, + reduce_op='sum', + epsilon=1e-06, + bias=None, + antiquant_scale=None, + antiquant_offset=None, + dequant_scale=None, + antiquant_group_size=0, + comm_turn=0): + if x1 is None: + raise AssertionError('x1 must not be None.') + if x2 is None: + raise AssertionError('x2 must not be None.') + if residual is None: + raise AssertionError('residual must not be None.') + if gamma is None: + raise AssertionError('gamma must not be None.') + y, normOut = op_builder.load().npu_mm_all_reduce_add_rms_norm_(x1, + x2, + residual, + gamma, + hcom, + reduce_op, + epsilon, + bias, + antiquant_scale, + antiquant_offset, + dequant_scale, + antiquant_group_size, + comm_turn) + return (y.view(residual.shape), normOut.view(residual.shape)) + + +def npu_mm_all_reduce_add_rms_norm_(*args, **kwargs): + return torch.ops.mindspeed.npu_mm_all_reduce_add_rms_norm_(*args, **kwargs) diff --git a/model/train/yoco_moe/mindspeed/ops/npu_moe_token_permute.py b/model/train/yoco_moe/mindspeed/ops/npu_moe_token_permute.py new file mode 100644 index 000000000..90d50de4f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/npu_moe_token_permute.py @@ -0,0 +1,20 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import torch +import torch_npu + +from mindspeed.op_builder import MoeTokenPermuteOpBuilder + +__all__ = ["npu_moe_token_permute"] + + +moe_token_permute_op_builder = MoeTokenPermuteOpBuilder() + + +def npu_moe_token_permute( + tokens: torch.Tensor, + indices: torch.Tensor, + num_out_tokens: int = None, + padded_mode: bool = False +): + moe_token_permute_ops = moe_token_permute_op_builder.load() + return moe_token_permute_ops.npu_moe_token_permute(tokens, indices, num_out_tokens, padded_mode) diff --git a/model/train/yoco_moe/mindspeed/ops/npu_moe_token_unpermute.py b/model/train/yoco_moe/mindspeed/ops/npu_moe_token_unpermute.py new file mode 100644 index 000000000..db57ac3fd --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/npu_moe_token_unpermute.py @@ -0,0 +1,22 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import torch +import torch_npu + +from mindspeed.op_builder import MoeTokenUnpermuteOpBuilder + +__all__ = ["npu_moe_token_unpermute"] + + +moe_token_unpermute_op_builder = MoeTokenUnpermuteOpBuilder() + + +def npu_moe_token_unpermute( + permuted_tokens: torch.Tensor, + sorted_indices: torch.Tensor, + probs: torch.Tensor = None, + padded_mode: bool = False, + restore_shape: torch.Size = None, +): + moe_token_unpermute_ops = moe_token_unpermute_op_builder.load() + return moe_token_unpermute_ops.npu_moe_token_unpermute( + permuted_tokens, sorted_indices, probs, padded_mode, restore_shape) diff --git a/model/train/yoco_moe/mindspeed/ops/npu_ring_attention_update.py b/model/train/yoco_moe/mindspeed/ops/npu_ring_attention_update.py new file mode 100644 index 000000000..b44b5a5c7 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/npu_ring_attention_update.py @@ -0,0 +1,25 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import torch + +from mindspeed.op_builder import RingAttentionUpdateOpBuilder + +__all__ = ["npu_ring_attention_update"] + + +op_builder = RingAttentionUpdateOpBuilder() + + +def npu_ring_attention_update( + prev_attn_out: torch.Tensor, + prev_softmax_max: torch.Tensor, + prev_softmax_sum: torch.Tensor, + cur_attn_out: torch.Tensor, + cur_softmax_max: torch.Tensor, + cur_softmax_sum: torch.Tensor, + actual_seq_qlen: torch.Tensor = None, + layout: str = "SBH", +): + ops = op_builder.load() + return ops.npu_ring_attention_update( + prev_attn_out, prev_softmax_max, prev_softmax_sum, cur_attn_out, cur_softmax_max, cur_softmax_sum, + actual_seq_qlen, layout) diff --git a/model/train/yoco_moe/mindspeed/ops/npu_rotary_position_embedding.py b/model/train/yoco_moe/mindspeed/ops/npu_rotary_position_embedding.py new file mode 100644 index 000000000..8a9cc10f6 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/npu_rotary_position_embedding.py @@ -0,0 +1,12 @@ +import torch +from mindspeed.op_builder import RotaryPositionEmbeddingOpBuilder + +__all__ = ["npu_rotary_position_embedding"] + + +rope_op_builder = RotaryPositionEmbeddingOpBuilder() + + +def npu_rotary_position_embedding(x, cos, sin, mode=0): + rope_ops = rope_op_builder.load() + return rope_ops.npu_rotary_position_embedding(x, cos, sin, mode) diff --git a/model/train/yoco_moe/mindspeed/ops/quant_gmm.py b/model/train/yoco_moe/mindspeed/ops/quant_gmm.py new file mode 100644 index 000000000..92415964b --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/quant_gmm.py @@ -0,0 +1,58 @@ +import torch +from torch.library import impl +from mindspeed.op_builder import QuantGMMOpBuilder +from mindspeed.op_builder.builder import AS_LIBRARY +from mindspeed.ops import gmm + +__all__ = ["npu_quant_gmm", "npu_quant_gmm_v2"] + + +op_builder = QuantGMMOpBuilder() + + +@impl(AS_LIBRARY, "npu_quant_gmm", "PrivateUse1") +def _npu_quant_gmm(x, weight, scale, *, offset=None, per_token_scale=None, bias=None, group_list=None, + group_list_type=0, output_dtype=None, act_type=0): + bias = [] if bias is None else [bias] + scale = [] if scale is None else [scale] + offset = [] if offset is None else [offset] + per_token_scale = [] if per_token_scale is None else [per_token_scale] + if output_dtype is None or output_dtype == torch.bfloat16: + output_dtype_value = 1 + elif output_dtype == torch.float16: + output_dtype_value = 0 + elif output_dtype == torch.int8: + output_dtype_value = -1 + else: + raise ValueError(f"output_dtype should be int8, float16, bfloat16 or None, but got {output_dtype}") + outputs = op_builder.load().npu_quant_gmm([x], [weight], scale, offset, per_token_scale, bias, group_list, + group_list_type, output_dtype_value, act_type) + return outputs[0] + + +def _npu_quant_gmm_common(x, weight, scale, *, offset=None, per_token_scale=None, bias=None, group_list=None, + group_list_type=0, output_dtype=None, act_type=0): + if x.dtype != torch.int8 or weight.dtype != torch.int8: + raise ValueError(f"Quant gmm only accept quant case, but got x[{x.dtype}] weight[{weight.dtype}]") + gmm.npu_gmm_param_verification(x, weight, bias=bias, group_list=group_list, + group_type=0, group_list_type=group_list_type) + gmm.check_optional_tensor(scale, x.device, "scale") + gmm.check_optional_tensor(offset, x.device, "offset") + gmm.check_optional_tensor(per_token_scale, x.device, "per_token_scale") + return torch.ops.mindspeed.npu_quant_gmm(x, weight, scale, offset=offset, per_token_scale=per_token_scale, + bias=bias, group_list=group_list, group_list_type=group_list_type, + output_dtype=output_dtype, act_type=act_type) + + +def npu_quant_gmm(x, weight, scale, *, offset=None, per_token_scale=None, bias=None, group_list=None, + output_dtype=None, act_type=0): + return _npu_quant_gmm_common(x, weight, scale, offset=offset, per_token_scale=per_token_scale, + bias=bias, group_list=group_list, group_list_type=0, output_dtype=output_dtype, + act_type=act_type) + + +def npu_quant_gmm_v2(x, weight, scale, *, offset=None, per_token_scale=None, bias=None, group_list=None, + output_dtype=None, act_type=0): + return _npu_quant_gmm_common(x, weight, scale, offset=offset, per_token_scale=per_token_scale, + bias=bias, group_list=group_list, group_list_type=1, output_dtype=output_dtype, + act_type=act_type) diff --git a/model/train/yoco_moe/mindspeed/ops/weight_quant_gmm.py b/model/train/yoco_moe/mindspeed/ops/weight_quant_gmm.py new file mode 100644 index 000000000..86ce2b19b --- /dev/null +++ b/model/train/yoco_moe/mindspeed/ops/weight_quant_gmm.py @@ -0,0 +1,48 @@ +import torch +from torch.library import impl +from mindspeed.op_builder import WeightQuantGMMOpBuilder +from mindspeed.op_builder.builder import AS_LIBRARY +from mindspeed.ops import gmm + +__all__ = ["npu_weight_quant_gmm", "npu_weight_quant_gmm_v2"] + + +op_builder = WeightQuantGMMOpBuilder() + + +@impl(AS_LIBRARY, "npu_weight_quant_gmm", "PrivateUse1") +def _npu_weight_quant_gmm(x, weight, antiquant_scale, *, antiquant_offset=None, bias=None, group_list=None, + group_list_type=0, act_type=0): + bias = [] if bias is None else [bias] + antiquant_scale = [] if antiquant_scale is None else [antiquant_scale] + antiquant_offset = [] if antiquant_offset is None else [antiquant_offset] + outputs = op_builder.load().npu_weight_quant_gmm([x], [weight], antiquant_scale, antiquant_offset, bias, group_list, + group_list_type, act_type) + return outputs[0] + + +def _npu_weight_quant_gmm_common(x, weight, antiquant_scale, *, antiquant_offset=None, bias=None, group_list=None, + group_list_type=0, act_type=0): + if x.dtype != torch.float16 and x.dtype != torch.bfloat16: + raise ValueError(f"Input x only accept float16/fp16, but got x[{x.dtype}]") + if weight.dtype != torch.int8: + raise ValueError(f"Weight only support int8, but got weight[{weight.dtype}]") + gmm.npu_gmm_param_verification(x, weight, bias=bias, group_list=group_list, + group_type=0, group_list_type=group_list_type) + gmm.check_optional_tensor(antiquant_scale, x.device, "antiquant_scale") + gmm.check_optional_tensor(antiquant_offset, x.device, "antiquant_offset") + return torch.ops.mindspeed.npu_weight_quant_gmm(x, weight, antiquant_scale, antiquant_offset=antiquant_offset, + bias=bias, group_list=group_list, group_list_type=group_list_type, + act_type=act_type) + + +def npu_weight_quant_gmm(x, weight, antiquant_scale, *, antiquant_offset=None, bias=None, + group_list=None, act_type=0): + return _npu_weight_quant_gmm_common(x, weight, antiquant_scale, antiquant_offset=antiquant_offset, bias=bias, + group_list=group_list, group_list_type=0, act_type=act_type) + + +def npu_weight_quant_gmm_v2(x, weight, antiquant_scale, *, antiquant_offset=None, bias=None, + group_list=None, act_type=0): + return _npu_weight_quant_gmm_common(x, weight, antiquant_scale, antiquant_offset=antiquant_offset, bias=bias, + group_list=group_list, group_list_type=1, act_type=act_type) diff --git a/model/train/yoco_moe/mindspeed/optimizer/__init__.py b/model/train/yoco_moe/mindspeed/optimizer/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model/train/yoco_moe/mindspeed/optimizer/adamw.py b/model/train/yoco_moe/mindspeed/optimizer/adamw.py new file mode 100644 index 000000000..4809e5d36 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/optimizer/adamw.py @@ -0,0 +1,166 @@ +from typing import List, Optional, Tuple, Union +import torch +import torch_npu +from torch import Tensor +from torch.optim.optimizer import Optimizer +from torch.optim.adamw import AdamW as TorchAdamW + + +def adamw(params: List[Tensor], + grads: List[Tensor], + exp_avgs: List[Tensor], + exp_avg_sqs: List[Tensor], + max_exp_avg_sqs: List[Tensor], + step: int, + *, + amsgrad: bool, + beta1: float, + beta2: float, + lr: float, + weight_decay: float, + eps: float, + maximize: bool): + r"""Functional API that performs AdamW algorithm computation. + See :class:`~torch.optim.AdamW` for details. + """ + for i, param in enumerate(params): + grad = grads[i] + exp_avg = exp_avgs[i] + exp_avg_sq = exp_avg_sqs[i] + + # Perform stepweight decay + bias_correction1 = beta1 ** (step - 1) + bias_correction2 = beta2 ** (step - 1) + + param.data, exp_avg, exp_avg_sq = torch_npu.npu_apply_adam_w( + bias_correction1, + bias_correction2, + lr, + weight_decay, + beta1, + beta2, + eps, + grad, + None, + amsgrad, + maximize, + out=(param.data, exp_avg, exp_avg_sq) + ) + + +class FusedTorchAdamW(TorchAdamW): + def __init__( + self, + params, + lr: Union[float, Tensor] = 1e-3, + betas: Tuple[float, float] = (0.9, 0.999), + eps: float = 1e-8, + weight_decay: float = 1e-2, + amsgrad: bool = False, + *, + maximize: bool = False, + foreach: Optional[bool] = None, + capturable: bool = False, + differentiable: bool = False, + fused: Optional[bool] = None, + ): + super().__init__(params, + lr=lr, + betas=betas, + eps=eps, + weight_decay=weight_decay, + amsgrad=amsgrad, + foreach=False, + maximize=maximize, + capturable=False, + differentiable=False, + fused=True,) + + +class AdamW(Optimizer): + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, + weight_decay=1e-2, amsgrad=False, *, maximize: bool = False): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) + if not 0.0 <= betas[1] < 1.0: + raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) + if not 0.0 <= weight_decay: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + defaults = dict(lr=lr, betas=betas, eps=eps, + weight_decay=weight_decay, amsgrad=amsgrad, maximize=maximize) + super(AdamW, self).__init__(params, defaults) + + def __setstate__(self, state): + super(AdamW, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('amsgrad', False) + group.setdefault('maximize', False) + + @torch.no_grad() + def step(self, closure=None): + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + + for group in self.param_groups: + params_with_grad = [] + grads = [] + exp_avgs = [] + exp_avg_sqs = [] + state_sums = [] + max_exp_avg_sqs = [] + state_steps = [] + amsgrad = group['amsgrad'] + beta1, beta2 = group['betas'] + + if 'step' in group: + group['step'] += 1 + else: + group['step'] = 1 + + for p in group['params']: + if p.grad is None: + continue + params_with_grad.append(p) + if p.grad.is_sparse: + raise RuntimeError('AdamW does not support sparse gradients') + grads.append(p.grad) + + state = self.state[p] + + # State initialization + if len(state) == 0: + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + if amsgrad: + # Maintains max of all exp. moving avg. of sq. grad. values + state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + + exp_avgs.append(state['exp_avg']) + exp_avg_sqs.append(state['exp_avg_sq']) + + if amsgrad: + max_exp_avg_sqs.append(state['max_exp_avg_sq']) + + adamw(params_with_grad, + grads, + exp_avgs, + exp_avg_sqs, + max_exp_avg_sqs, + group['step'], + amsgrad=amsgrad, + beta1=beta1, + beta2=beta2, + lr=group['lr'], + weight_decay=group['weight_decay'], + eps=group['eps'], + maximize=group['maximize']) + + return loss diff --git a/model/train/yoco_moe/mindspeed/optimizer/distrib_optimizer.py b/model/train/yoco_moe/mindspeed/optimizer/distrib_optimizer.py new file mode 100644 index 000000000..f7e7caf40 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/optimizer/distrib_optimizer.py @@ -0,0 +1,1200 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Bytedance Inc. All rights reserved. +import os +from textwrap import wrap +import types +from functools import wraps +import torch +import torch_npu +import torch.distributed +import megatron.core.tensor_parallel as tensor_parallel + +from megatron.training import get_args + +TRANSPOSE_BF16_BLOCK_SIZE = 4096 * 4096 + + +def build_optimizer_group_ranges_zero3_wrapper(fn): + @wraps(fn) + def build_optimizer_group_ranges_zero3(param_groups, gbuf_ranges): + local_param_group_map, group_ranges = fn(param_groups, gbuf_ranges) + for group_index, group in enumerate(param_groups): + for param in group["params"]: + if hasattr(param, 'enable_zero3') and param.enable_zero3: + group_range = group_ranges[group_index] + group_range["params"].append(param) + local_param_group_map[param] = (group_index, len(group_range["params"]) - 1) + return local_param_group_map, group_ranges + return build_optimizer_group_ranges_zero3 + + +def _copy_main_params_to_model_params_zero3(self): + """ + Copy main params to model params. + + Since this step is followed by an all-gather through the DDP's grad + buffer, this method is responsible for copying the updated params + from the main shards into the correct position in the grad buffer. + """ + # Utility method for copying group params. + def copy_group_params(shard_main_groups, model_groups): + for shard_main_group, model_group in zip(shard_main_groups, model_groups): + for shard_main_param, model_param in zip(shard_main_group, model_group): + if hasattr(model_param, 'enable_zero3') and model_param.enable_zero3: + model_param.view(-1).data.copy_(shard_main_param) + continue + param_range_map = self._get_model_param_range_map(model_param) + world_range = param_range_map["gbuf_world_in_bucket"] + + assert world_range.size == shard_main_param.nelement() + + gbuf_index, _, bucket_id = self.model_param_gbuf_map[model_param] + model_param_buffer = self.buffers[gbuf_index].buckets[bucket_id].param_data + + shard_model_param = model_param_buffer.view(-1)[ + world_range.start : world_range.end + ] + + shard_model_param.data.copy_(shard_main_param) + + # Copy shard groups to model groups. + copy_group_params(self.shard_fp32_from_float16_groups, self.model_float16_groups) + copy_group_params(self.shard_fp32_groups, self.model_fp32_groups) + + +def build_model_and_main_param_groups_zero3_wrapper(function): + @wraps(function) + def build_model_and_main_param_zero3(*args, **kwargs): + global_args = get_args() + if global_args.enable_zero3: + return build_model_and_main_param_groups_zero3(*args, **kwargs) + else: + return function(*args, **kwargs) + return build_model_and_main_param_zero3 + + +def build_model_and_main_param_groups_zero3( + gbuf_ranges, + param_gbuf_map, + opt_group_ranges, +): + """ + Create main parameter groups needed for the optimizer step. + + These groups encompass both: 1) groups used by this class, for + reducing/gather, and 2) groups used by the inner optimizer for the + parameter update. Given that the conceptual grad buffer partitioning + (created in earlier method) doesn't respect parameter boundaries, + the optimizer operates on shards of the model parameters, rather than + the full parameters. + """ + + # Parameter groups: + # model_float16_groups: original float16 parameters + # model_fp32_groups: original fp32 parameters + # shard_float16_groups: shards of original float16 parameters + # shard_fp32_groups: shards of original fp32 parameters + # shard_fp32_from_float16_groups: fp32 copy of float16 parameters + model_float16_groups = [] + model_fp32_groups = [] + shard_float16_groups = [] + shard_fp32_groups = [] + shard_fp32_from_float16_groups = [] + + # Allocate (or slice) each group's param shard. + for group_range in opt_group_ranges: + + # Params of this group. + model_float16_params_this_group = [] + model_fp32_params_this_group = [] + shard_float16_params_this_group = [] + shard_fp32_params_this_group = [] + shard_fp32_from_float16_params_this_group = [] + model_float16_groups.append(model_float16_params_this_group) + model_fp32_groups.append(model_fp32_params_this_group) + shard_float16_groups.append(shard_float16_params_this_group) + shard_fp32_groups.append(shard_fp32_params_this_group) + shard_fp32_from_float16_groups.append(shard_fp32_from_float16_params_this_group) + + for model_param in group_range["params"]: + + assert model_param.requires_grad + + if hasattr(model_param, 'enable_zero3') and model_param.enable_zero3: + if model_param.type() in ['torch.cuda.HalfTensor', + 'torch.cuda.BFloat16Tensor', + 'torch.BFloat16Tensor']: + zero3_main_param = model_param.detach().view(-1).clone().float() + tensor_parallel.copy_tensor_model_parallel_attributes(zero3_main_param, model_param) + model_float16_params_this_group.append(model_param) + shard_float16_params_this_group.append(model_param) + shard_fp32_from_float16_params_this_group.append(zero3_main_param) + elif model_param.type() == 'torch.cuda.FloatTensor': + model_fp32_params_this_group.append(model_param) + zero3_fp32_main_param = model_param.view(-1) + shard_fp32_params_this_group.append(zero3_fp32_main_param) + tensor_parallel.copy_tensor_model_parallel_attributes(zero3_fp32_main_param, model_param) + else: + raise TypeError( + 'Wrapped parameters must be one of ' + 'torch.cuda.FloatTensor, ' + 'torch.cuda.HalfTensor, or ' + 'torch.cuda.BFloat16Tensor. ' + 'Received {}'.format(model_param.type()) + ) + continue + + + gbuf_index, dtype, bucket_index = param_gbuf_map[model_param] + gbuf_range = gbuf_ranges[gbuf_index][dtype][bucket_index] + param_range = gbuf_range["param_map"][model_param]["param"] + + # fp16, bf16 params. + if model_param.type() in ['torch.cuda.HalfTensor', 'torch.cuda.BFloat16Tensor']: + + # Clone model -> main. + shard_model_param = model_param.detach().view(-1)[ + param_range.start : param_range.end + ] + shard_main_param = shard_model_param.clone().float() + tensor_parallel.copy_tensor_model_parallel_attributes( + shard_model_param, model_param + ) + tensor_parallel.copy_tensor_model_parallel_attributes( + shard_main_param, model_param + ) + if hasattr(model_param, 'shared'): + shard_model_param.shared = model_param.shared + shard_main_param.shared = model_param.shared + + # Add to group. + model_float16_params_this_group.append(model_param) + shard_float16_params_this_group.append(shard_model_param) + shard_fp32_from_float16_params_this_group.append(shard_main_param) + + # fp32 params. + elif model_param.type() == 'torch.cuda.FloatTensor': + shard_model_param = model_param.view(-1)[param_range.start : param_range.end] + model_fp32_params_this_group.append(model_param) + shard_fp32_params_this_group.append(shard_model_param) + tensor_parallel.copy_tensor_model_parallel_attributes( + shard_model_param, model_param + ) + if hasattr(model_param, 'shared'): + shard_model_param.shared = model_param.shared + + else: + raise TypeError( + 'Wrapped parameters must be one of ' + 'torch.cuda.FloatTensor, ' + 'torch.cuda.HalfTensor, or ' + 'torch.cuda.BFloat16Tensor. ' + 'Received {}'.format(model_param.type()) + ) + + # Update optimizer's params. + group_range["orig_group"]["params"] = [ + *shard_fp32_params_this_group, + *shard_fp32_from_float16_params_this_group, + ] + + return ( + model_float16_groups, + model_fp32_groups, + shard_float16_groups, + shard_fp32_groups, + shard_fp32_from_float16_groups, + ) + + +def distributed_optimizer_zero3_init( + self, + optimizer, + config, + grad_scaler, + init_state_fn, + per_model_buffers, + data_parallel_group, + data_parallel_group_gloo, + data_parallel_group_idx, +): + """ + Distributed optimizer, for all data types (fp16, bf16, and fp32). + + The steps in this method create the core mapping between param and grad buffers, + parameters, and parameter shard ranges, that is needed for converting between model + param indexes and main parameter shard indexes. This method also updates the optimizer + parameter groups with the newly created shards. + + Args: + optimizer (torch.optim.Optimizer): base optimizer such as Adam or SGD. + config (OptimizerConfig): configuration object for optimizer. + grad_scaler (MegatronGradScaler): used for scaling gradients. Note that + this can be None. This case happens when `bf16 = True` and we don't + use any loss scale. Note that for `bf16 = True`, we can have + a constant gradient scaler. Also for `bf16 = False`, we + always require a grad scaler. + init_state_fn (Callable, optional): function to initialize state in the optimizer. + per_model_buffers (Dict[int, List[ParamAndGradBuffer]]): the implementation of the + distributed optimizer is centered on using a contiguous buffer for + communicating grads & params between the model state and the optimizer state. + You can find a more detailed description in + https://github.com/NVIDIA/Megatron-LM/blob/main/docs/source/distrib_optimizer.md. + data_parallel_group (torch.distributed.ProcessGroup): data-parallel group to use to + all-gather params after optimizer.step(). + data_parallel_group_gloo (torch.distributed.ProcessGroup): gloo data-parallel group + (used in checkpoint loading and saving). + data_parallel_group_idx (int): index in data-parallel group (used by + distributed checkpointing logic). + """ + + from megatron.core.optimizer.distrib_optimizer import DistributedOptimizer + from apex.optimizers import FusedAdam as Adam + import itertools + + super(self.__class__, self).__init__( + optimizer, config, grad_scaler, init_state_fn, + ) + + assert isinstance( + optimizer, Adam + ), "Only Adam currently supported, due to checkpointing requirements." + + # Model grad buffer ranges. + assert per_model_buffers is not None, "per_model_buffers must be provided" + self.buffers = list(itertools.chain(*per_model_buffers.values())) + self.per_model_buffers = per_model_buffers + self.data_parallel_group = data_parallel_group + self.data_parallel_group_gloo = data_parallel_group_gloo + self.data_parallel_group_idx = data_parallel_group_idx + self.gbuf_idx_to_model_idx_map = {} + gbuf_idx = 0 + for model_idx, buffers in self.per_model_buffers.items(): + for _ in buffers: + self.gbuf_idx_to_model_idx_map[gbuf_idx] = model_idx + gbuf_idx += 1 + self.gbuf_ranges = [] + self.per_bucket_numel = [] + self.per_bucket_numel_unpadded = [] + for buffer in self.buffers: + + self.per_bucket_numel.append( + { + (buffer.param_dtype, buffer.grad_dtype): [ + bucket.grad_data.numel() for bucket in buffer.buckets + ] + } + ) + self.per_bucket_numel_unpadded.append( + { + (buffer.param_dtype, buffer.grad_dtype): [ + bucket.numel_unpadded for bucket in buffer.buckets + ] + } + ) + self.gbuf_ranges.append(DistributedOptimizer._build_gbuf_range_map(buffer)) + self.model_param_gbuf_map = DistributedOptimizer._build_model_param_gbuf_map(self.gbuf_ranges) + + # Optimizer ranges. + ( + self.model_param_group_index_map, + self.opt_group_ranges, + ) = DistributedOptimizer._build_optimizer_group_ranges(self.optimizer.param_groups, self.gbuf_ranges) + + # Allocate main param shards. + ( + self.model_float16_groups, + self.model_fp32_groups, + self.shard_float16_groups, + self.shard_fp32_groups, + self.shard_fp32_from_float16_groups, + ) = DistributedOptimizer._build_model_and_main_param_groups( + self.gbuf_ranges, self.model_param_gbuf_map, self.opt_group_ranges + ) + + # Now construct data structures to manage all-gather handles. + self.all_gather_handles = [] + self.all_gather_handle_index_to_bucket_index_map = [] + self.model_index_to_all_gather_handle_index_map = {} + self.all_gather_handle_indices = [] + self.param_to_all_gather_handle_index_map = {} + + self.pbuf_view_items = self._get_model_param_buffer_dp_views() + for (gbuf_index, dtype, bucket_index, _, _) in self.pbuf_view_items: + self.all_gather_handle_index_to_bucket_index_map.append( + (gbuf_index, dtype, bucket_index) + ) + all_gather_handle_index = len(self.all_gather_handle_index_to_bucket_index_map) - 1 + self.all_gather_handles.append(None) + + # Store all all_gather_handle_indices. + model_idx = self.gbuf_idx_to_model_idx_map[gbuf_index] + if model_idx not in self.model_index_to_all_gather_handle_index_map: + self.model_index_to_all_gather_handle_index_map[model_idx] = [] + self.model_index_to_all_gather_handle_index_map[model_idx].append( + all_gather_handle_index + ) + + for param in self.buffers[gbuf_index].buckets[bucket_index].params_list: + self.param_to_all_gather_handle_index_map[param] = all_gather_handle_index + self.num_all_gather_handles = len(self.all_gather_handle_index_to_bucket_index_map) + + self.overlap_param_gather = self.config.overlap_param_gather + self.remove_pre_hook_handle = None + if self.overlap_param_gather: + self.enable_pre_hook() + + self.update_successful = False + + # Update optimizer groups. + # - Also, leverage state_dict() and load_state_dict() to + # recast preexisting per-param state tensors. + self.optimizer.param_groups = [g["orig_group"] for g in self.opt_group_ranges] + self.optimizer.load_state_dict(self.optimizer.state_dict()) + + +def _copy_model_grads_to_main_grads_zero3(self): + """ + Copy model grads to main grads. + + Since this step follows a reduce-scatter through the DDP's grad + buffer, this method is responsible for copying the updated grads + from the grad buffer to the main shard's grad field. + """ + + # Utility method for copying group grads. + def copy_group_grads(model_groups, shard_main_groups): + for model_group, shard_main_group in zip(model_groups, shard_main_groups): + for model_param, shard_main_param in zip(model_group, shard_main_group): + + if hasattr(model_param, 'enable_zero3') and model_param.enable_zero3: + model_grad = model_param.main_grad + shard_model_grad = model_grad.view(-1) + shard_main_param.grad = shard_model_grad.float() + continue + param_range_map = self._get_model_param_range_map(model_param) + param_range = param_range_map["param"] + assert param_range.size == shard_main_param.nelement() + model_grad = model_param.main_grad + shard_model_grad = model_grad.view(-1)[param_range.start : param_range.end] + shard_main_param.grad = shard_model_grad.float() + + # Copy model groups to shard groups. + copy_group_grads(self.model_float16_groups, self.shard_fp32_from_float16_groups) + copy_group_grads(self.model_fp32_groups, self.shard_fp32_groups) + + +def reuse_fp32_param_distrib_optimizer_init_wrapper(init_func): + @wraps(init_func) + def reuse_fp32_param_distrib_optimizer_init(*args, **kwargs): + init_func(*args, **kwargs) + self = args[0] + global_args = get_args() + self.reuse_fp32_param = global_args.reuse_fp32_param if hasattr(global_args, "reuse_fp32_param") else False + # A flag that disables the value subtraction when the `fp16_tensor_convert_to_fp32_tensor` function is invoked for the first time. + self.first_sub_flag = True + if self.reuse_fp32_param: + from mindspeed.op_builder import AlgorithmOpBuilder + reuse_data_ptr = AlgorithmOpBuilder().load().reuse_data_ptr + data_parallel_world_size = torch.distributed.get_world_size(self.data_parallel_group) + data_parallel_rank = torch.distributed.get_rank(self.data_parallel_group_gloo) + self.model_param_bucket_and_res_map = {} + self.model_param_bucket_and_shard_main_param_int32_view_map = {} + self.shard_main_param_res_buffers = [] + self.bucket_num_groups = [] + if data_parallel_world_size == 1: + self.shard_fp32_param_fp16_view_group = [] + for buffer in self.buffers: + buffer_numel = buffer.param_data.numel() + shard_res_and_buffer_model_param = torch.zeros(buffer_numel * 2, dtype=torch.bfloat16, device=buffer.param_data.device) + shard_main_param_int32_view_buffer = torch.empty(buffer_numel, dtype=torch.int32, device=buffer.param_data.device) + reuse_data_ptr(shard_main_param_int32_view_buffer, shard_res_and_buffer_model_param, 0) + self.shard_main_param_res_buffers.append(shard_res_and_buffer_model_param) + self.model_param_bucket_and_shard_main_param_int32_view_map[shard_res_and_buffer_model_param] = shard_main_param_int32_view_buffer + for model_fp16_params_this_group, shard_fp32_from_float16_group in zip( + self.model_float16_groups, self.shard_fp32_from_float16_groups): + for i, (model_param, shard_fp32_main_param) in enumerate( + zip(model_fp16_params_this_group, shard_fp32_from_float16_group)): + gbuf_index, _, bucket_id = self.model_param_gbuf_map[model_param] + data_start_index, data_end_index, bucket_id = self.buffers[gbuf_index].param_index_map[model_param] + reuse_data_ptr(shard_fp32_from_float16_group[i], self.shard_main_param_res_buffers[gbuf_index], data_start_index) + old_param_data = model_param.data + model_param.data = self.shard_main_param_res_buffers[gbuf_index][data_start_index + data_end_index: 2 * data_end_index].view(old_param_data.shape) + model_param.data.detach().copy_(old_param_data) + self.shard_fp32_param_fp16_view_group.append(self.shard_main_param_res_buffers[gbuf_index][2 * data_start_index: 2 * data_end_index]) + for i, buffer in enumerate(self.buffers): + buffer_numel = buffer.param_data.numel() + reuse_data_ptr(buffer.param_data, self.shard_main_param_res_buffers[i], buffer_numel) + else: + for buffer in self.buffers: + self.bucket_num_group = [] + bucket_res_numel = 0 + res_numel = buffer.numel // data_parallel_world_size + shard_main_param_res_buffer = torch.zeros(res_numel, dtype=torch.bfloat16, device=buffer.param_data.device) + self.shard_main_param_res_buffers.append(shard_main_param_res_buffer) + for bucket in buffer.buckets: + self.bucket_num_group.append(bucket.param_data.numel()) + param_data_dp_numel = bucket.param_data.numel() // data_parallel_world_size + shard_main_param_int32_view_bucket = torch.empty(param_data_dp_numel, dtype=torch.int32, device=bucket.param_data.device) + reuse_data_ptr( + shard_main_param_int32_view_bucket, + buffer.param_data, + (bucket_res_numel * data_parallel_world_size) // 2 + max(0, data_parallel_rank - 1) * param_data_dp_numel // 2) + self.model_param_bucket_and_res_map[bucket.param_data] = self.shard_main_param_res_buffers[-1][bucket_res_numel: bucket_res_numel + param_data_dp_numel] + self.model_param_bucket_and_shard_main_param_int32_view_map[bucket.param_data] = shard_main_param_int32_view_bucket + bucket_res_numel += param_data_dp_numel + self.bucket_num_groups.append(self.bucket_num_group) + for model_fp16_params_this_group, shard_fp32_from_float16_group in zip( + self.model_float16_groups, self.shard_fp32_from_float16_groups): + for i, (model_param, shard_fp32_main_param) in enumerate( + zip(model_fp16_params_this_group, shard_fp32_from_float16_group)): + world_range = self._get_model_param_range_map(model_param)["gbuf_world_in_bucket"] + gbuf_index, _, bucket_id = self.model_param_gbuf_map[model_param] + model_param_buffer = self.buffers[gbuf_index].param_data + bucket_offset_in_buffer = sum(self.bucket_num_groups[gbuf_index][:bucket_id]) // 2 + model_param_bucket = self.buffers[gbuf_index].buckets[bucket_id].param_data + model_param_bucket_numel_per_dp = model_param_bucket.numel() // data_parallel_world_size + shard_fp32_param_bucket_offset = world_range.start if data_parallel_rank == 0 else \ + world_range.start - model_param_bucket_numel_per_dp * (1 + data_parallel_rank) // 2 + shard_main_param_buffer_start = bucket_offset_in_buffer + shard_fp32_param_bucket_offset + reuse_data_ptr(shard_fp32_from_float16_group[i], model_param_buffer, shard_main_param_buffer_start) + torch_npu.npu.empty_cache() + self._copy_model_params_to_main_params = _copy_model_params_to_main_params + self.load_parameter_state_from_dp_zero_func = self.load_parameter_state_from_dp_zero + self.load_parameter_state_from_dp_zero = types.MethodType(load_parameter_state_from_dp_zero, self) + self.get_parameter_state_dp_zero_func = self.get_parameter_state_dp_zero + self.get_parameter_state_dp_zero = types.MethodType(get_parameter_state_dp_zero, self) + self.fp16_tensor_convert_to_fp32_tensor = types.MethodType(fp16_tensor_convert_to_fp32_tensor, self) + self.fp32_tensor_convert_to_fp16_tensor = types.MethodType(fp32_tensor_convert_to_fp16_tensor, self) + return reuse_fp32_param_distrib_optimizer_init + + +def _copy_model_params_to_main_params(): + pass + + +def ema_distrib_optimizer_init_wrapper(init_func): + @wraps(init_func) + def ema_distrib_optimizer_init(*args, **kwargs): + init_func(*args, **kwargs) + self = args[0] + self.load_parameter_state_from_dp_zero_func_temp = self.load_parameter_state_from_dp_zero + self.load_parameter_state_from_dp_zero = types.MethodType(load_ema_state_from_dp_zero, self) + self.get_parameter_state_dp_zero_func_temp = self.get_parameter_state_dp_zero + self.get_parameter_state_dp_zero = types.MethodType(get_ema_state_dp_zero, self) + return ema_distrib_optimizer_init + + +def load_ema_state_from_dp_zero(self, state_dict): + """Load parameter state (i.e., parameter & optimizer tensors) from DP 0 rank, + using the new checkpoint format with coalesced state across buckets. + + This method performs the reverse of get_parameter_state_dp_zero(): + - Scatter contiguous buffers from DP rank 0 to each DP rank (each DP + rank receives its relevant subset of the world buffers). + - For each DP rank, copy param & optimizer shards from contiguous CPU + buffers. (e.g., one buffer each for main_param, exp_avg, and + exp_avg_sq). + """ + self.load_parameter_state_from_dp_zero_func_temp(state_dict) + # Data parallelism variables. + data_parallel_world_size = self.data_parallel_group_gloo.size() + data_parallel_rank = torch.distributed.get_rank(self.data_parallel_group_gloo) + data_parallel_group_gloo = self.data_parallel_group_gloo + data_parallel_global_ranks = torch.distributed.get_process_group_ranks( + self.data_parallel_group_gloo + ) + + # Scatter tensors to all DP ranks. + for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges): + for dtype, gbuf_range_map_for_all_buckets in gbuf_range_maps.items(): + if data_parallel_rank == 0: + buffer_numel_unpadded = self.buffers[gbuf_idx].numel_unpadded + checkpoint_numel_unpadded = state_dict[gbuf_idx][dtype]["numel_unpadded"] + assert buffer_numel_unpadded == checkpoint_numel_unpadded, ( + f"Number of unpadded elements must be same in current run " + f"({buffer_numel_unpadded}) and checkpoint ({checkpoint_numel_unpadded})" + ) + for key in ("ema_params",): + offset_in_world_tensors = 0 + for bucket_idx, gbuf_range_map in enumerate(gbuf_range_map_for_all_buckets): + # Compute local DP contiguous shard's size. + gbuf_world_numel = ( + self.buffers[gbuf_idx].buckets[bucket_idx].grad_data.numel() + ) + assert gbuf_world_numel % data_parallel_world_size == 0 + gbuf_local_numel = gbuf_world_numel // data_parallel_world_size + gbuf_world_numel_unpadded = ( + self.buffers[gbuf_idx].buckets[bucket_idx].numel_unpadded + ) + assert gbuf_world_numel_unpadded <= gbuf_world_numel + + # Contiguous local shards (received from DP rank 0). + recv_tensor = torch.empty( + (gbuf_local_numel,), dtype=torch.float32, device="cpu" + ) + + # Scatter tensor list. + if data_parallel_rank == 0: + world_tensors = state_dict[gbuf_idx][dtype][key] + + start = offset_in_world_tensors + end = offset_in_world_tensors + gbuf_world_numel_unpadded + assert 0 <= start < end <= world_tensors.numel() + world_tensor = world_tensors[start:end] + offset_in_world_tensors += gbuf_world_numel_unpadded + + # Pad world_tensor to gbuf_world_numel. Don't pad at the front, pad at the back. + world_tensor = torch.nn.functional.pad( + world_tensor, (0, gbuf_world_numel - gbuf_world_numel_unpadded) + ) + assert world_tensor.numel() == gbuf_world_numel + gbuf_start_idxs = list(range(0, gbuf_world_numel, gbuf_local_numel)) + send_tensors = [ + world_tensor[i: (i + gbuf_local_numel)] for i in gbuf_start_idxs + ] + else: + send_tensors = None + + # Scatter. + if get_args().disable_gloo_group: + from mindspeed.utils import _scatter_hccl + _scatter_hccl( + recv_tensor, + send_tensors, + data_parallel_global_ranks[0], + self.data_parallel_group) + else: + torch.distributed.scatter( + recv_tensor, + send_tensors, + data_parallel_global_ranks[0], + data_parallel_group_gloo, + ) + + # Copy local contiguous shards to param/optim shards. + for model_param, param_range_map in gbuf_range_map["param_map"].items(): + + # Main param & optimizer states. + group_index, group_order = self.model_param_group_index_map[model_param] + main_param = self.optimizer.param_groups[group_index]["params"][ + group_order + ] + + optim_state = self.optimizer.state[main_param] + if key not in self.optimizer.state[main_param].keys(): + optim_state[key] = main_param.clone() + tensor_to_copy_into = optim_state[key] + + # Copy states into contiguous shard. + gbuf_local_start = param_range_map["gbuf_local"].start + gbuf_local_end = param_range_map["gbuf_local"].end + tensor_to_copy_into.data.copy_( + recv_tensor[gbuf_local_start:gbuf_local_end] + ) + + +def get_ema_state_dp_zero(self): + """Get parameter state (i.e., parameter & optimizer tensors). + + This method performs two steps: + - For each DP rank, copy param & optimizer shards to contiguous CPU + buffers (e.g., one buffer each for main_param, exp_avg, and + exp_avg_sq). + - Gather contiguous buffers on DP rank 0 and concatenate to world + buffers. + """ + state = self.get_parameter_state_dp_zero_func_temp() + # Data parallelism variables. + data_parallel_world_size = self.data_parallel_group_gloo.size() + data_parallel_rank = torch.distributed.get_rank(self.data_parallel_group_gloo) + data_parallel_group_gloo = self.data_parallel_group_gloo + data_parallel_global_ranks = torch.distributed.get_process_group_ranks( + self.data_parallel_group_gloo + ) + for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges): + + # Iterate grad buffers (by data type). + dtype_state = state[gbuf_idx] + assert len(gbuf_range_maps) == 1, "single dtype supported, for now." + for dtype, gbuf_range_map_for_all_buckets in gbuf_range_maps.items(): + buffer_numel_unpadded = self.buffers[gbuf_idx].numel_unpadded + # Create coalesced tensors for all state related to parameters in this buffer. + world_tensors = {} + if data_parallel_rank == 0: + world_tensors = { + key: torch.empty( + (buffer_numel_unpadded,), dtype=torch.float32, device="cpu" + ) + for key in ("ema_params",) + } + world_tensors["numel_unpadded"] = buffer_numel_unpadded + offset_in_world_tensors = 0 + for bucket_idx, gbuf_range_map in enumerate(gbuf_range_map_for_all_buckets): + + # Compute local DP contiguous shard's size. + gbuf_world_numel = self.buffers[gbuf_idx].buckets[bucket_idx].grad_data.numel() + assert gbuf_world_numel % data_parallel_world_size == 0 + gbuf_local_numel = gbuf_world_numel // data_parallel_world_size + + gbuf_world_numel_unpadded = ( + self.buffers[gbuf_idx].buckets[bucket_idx].numel_unpadded + ) + assert gbuf_world_numel_unpadded <= gbuf_world_numel + + local_shards = { + key: torch.empty((gbuf_local_numel,), dtype=torch.float32, device="cpu") + for key in ("ema_params", ) + } + + # Build contiguous DP rank shards (for param + optim states). + for model_param, param_range_map in gbuf_range_map["param_map"].items(): + + # Main param & optimizer states. + group_index, group_order = self.model_param_group_index_map[model_param] + main_param = self.optimizer.param_groups[group_index]["params"][group_order] + optim_state = self.optimizer.state[main_param] + + tensors = { + "param": main_param, + **optim_state, + } + + # Copy states into contiguous shard. + gbuf_local_start = param_range_map["gbuf_local"].start + gbuf_local_end = param_range_map["gbuf_local"].end + for key in local_shards: + local_shards[key][gbuf_local_start:gbuf_local_end].data.copy_( + tensors[key].detach().cpu() + ) + + # Gather contiguous shards on DP rank 0. + for key, send_tensor in local_shards.items(): + + # Gather tensor list. + if data_parallel_rank == 0: + recv_tensors = [ + torch.empty((gbuf_local_numel,), dtype=torch.float32, device="cpu") + for _ in range(data_parallel_world_size) + ] + else: + recv_tensors = None + + # Gather. + if get_args().disable_gloo_group: + from mindspeed.utils import _gather_hccl + _gather_hccl( + send_tensor, + recv_tensors, + self.data_parallel_group, + ) + else: + torch.distributed.gather( + send_tensor, + recv_tensors, + data_parallel_global_ranks[0], + data_parallel_group_gloo, + ) + + # Concatenate. + if data_parallel_rank == 0: + recv_tensors_concatenated = torch.cat(recv_tensors) + # Copy this bucket's collected all-gather tensors into the right place in the + # tensor for the buffer. The tensor for the buffer gets rid of the padding + # between buckets. + start = offset_in_world_tensors + end = offset_in_world_tensors + gbuf_world_numel_unpadded + world_tensors[key][start:end].copy_( + recv_tensors_concatenated[:gbuf_world_numel_unpadded] + ) + + offset_in_world_tensors += gbuf_world_numel_unpadded + + # Collect world state. + dtype_state[dtype].update(world_tensors) + state[gbuf_idx] = dtype_state + + return state + + + +def load_parameter_state_from_dp_zero(self, state_dict): + self.load_parameter_state_from_dp_zero_func(state_dict) + self.first_sub_flag = False + data_parallel_world_size = self.data_parallel_group_gloo.size() + data_parallel_rank = torch.distributed.get_rank(self.data_parallel_group_gloo) + data_parallel_group_gloo = self.data_parallel_group_gloo + data_parallel_global_ranks = torch.distributed.get_process_group_ranks( + self.data_parallel_group_gloo + ) + if data_parallel_world_size == 1 or \ + not hasattr(self, "shard_main_param_res_buffers"): + return + for i, shard_main_param_res_buffer in enumerate(self.shard_main_param_res_buffers): + shard_res_numel = shard_main_param_res_buffer.numel() + recv_tensor = torch.empty((shard_res_numel,), dtype=torch.float16, device="cpu") + if data_parallel_rank == 0: + send_tensors = [ + state_dict["shard_main_param_res"][i][ + dpr * shard_res_numel: (dpr + 1) * shard_res_numel] for dpr in range(data_parallel_world_size) + ] + else: + send_tensors = None + + if get_args().disable_gloo_group: + from mindspeed.utils import _scatter_hccl + _scatter_hccl( + recv_tensor, + send_tensors, + data_parallel_global_ranks[0], + self.data_parallel_group) + else: + torch.distributed.scatter( + recv_tensor, + send_tensors, + data_parallel_global_ranks[0], + data_parallel_group_gloo, + ) + recv_tensor_bf16_view = torch.tensor(recv_tensor.data.untyped_storage(), dtype=torch.bfloat16, device=recv_tensor.device) + shard_main_param_res_buffer.copy_(recv_tensor_bf16_view) + + +def get_parameter_state_dp_zero(self): + state = self.get_parameter_state_dp_zero_func() + data_parallel_world_size = torch.distributed.get_world_size(self.data_parallel_group) + data_parallel_rank = torch.distributed.get_rank(self.data_parallel_group_gloo) + data_parallel_group_gloo = self.data_parallel_group_gloo + data_parallel_global_ranks = torch.distributed.get_process_group_ranks( + self.data_parallel_group_gloo + ) + if data_parallel_world_size == 1 or not hasattr(self, "shard_main_param_res_buffers"): + return state + + # gather buffer res + buffer_res_full_shard = [] + for shard_main_param_res_buffer in self.shard_main_param_res_buffers: + if get_args().disable_gloo_group: + recv_tensors = [torch.empty(shard_main_param_res_buffer.numel(), dtype=torch.float16, device="cpu") for _ + in range(data_parallel_world_size)] + else: + if data_parallel_rank == 0: + recv_tensors = [torch.empty((shard_main_param_res_buffer.numel(),), dtype=torch.float16, device="cpu") for _ in range(data_parallel_world_size)] + else: + recv_tensors = None + + send_tensor = torch.empty((shard_main_param_res_buffer.numel(),), dtype=torch.float16, device="cpu") + send_tensor_bf16_view = torch.tensor(send_tensor.data.untyped_storage(), dtype=torch.bfloat16, device=send_tensor.device) + send_tensor_bf16_view.copy_(shard_main_param_res_buffer.detach().cpu()) + if get_args().disable_gloo_group: + from mindspeed.utils import _gather_hccl + _gather_hccl( + send_tensor, + recv_tensors, + self.data_parallel_group, + ) + else: + torch.distributed.gather( + send_tensor, + recv_tensors, + data_parallel_global_ranks[0], + data_parallel_group_gloo, + ) + if data_parallel_rank == 0: + buffer_res_full_shard.append(torch.cat(recv_tensors)) + + state['shard_main_param_res'] = buffer_res_full_shard + return state + + +def fp16_tensor_convert_to_fp32_tensor(self): + """ + res(0000) + bf16(pppp) -> fp32(0p0p0p0p) + + Transform the bf16 data and residuals data in the continuous memory block + into the fp32 tensor through view transposition. + """ + data_parallel_world_size = torch.distributed.get_world_size(self.data_parallel_group) + data_parallel_rank = torch.distributed.get_rank(self.data_parallel_group_gloo) + iteration = getattr(get_args(), "iteration", 0) + npu_deterministic = getattr(get_args(), "npu_deterministic", False) + if data_parallel_world_size == 1: + for shard_fp32_param_fp16_view in self.shard_fp32_param_fp16_view_group: + shard_fp32_param_fp16_view.copy_( + shard_fp32_param_fp16_view.view(2, -1).transpose(1, 0).reshape(-1).contiguous()) + + if npu_deterministic: + if not self.first_sub_flag: + fp16_tensor_convert_to_fp32_tensor_deterministic(self.shard_fp32_from_float16_groups, self.optimizer) + else: + for shard_res_and_buffer_model_param in self.shard_main_param_res_buffers: + shard_main_param_int32_view_buffer = self.model_param_bucket_and_shard_main_param_int32_view_map[shard_res_and_buffer_model_param] + if not self.first_sub_flag: + shard_main_param_int32_view_buffer.sub_(32768) + else: + for buffer in self.buffers: + for bucket in buffer.buckets: + bucket_param_data = bucket.param_data + param_data_dp_numel = bucket_param_data.numel() // data_parallel_world_size + bucket_res = self.model_param_bucket_and_res_map[bucket_param_data] + if data_parallel_rank == 0: + bucket_param_data[param_data_dp_numel:param_data_dp_numel * 2].copy_(bucket_param_data[:param_data_dp_numel]) + bucket_res_position = max(0, data_parallel_rank - 1) * param_data_dp_numel + shard_fp32_main_param_view = bucket_param_data[bucket_res_position: bucket_res_position + param_data_dp_numel * 2] + shard_main_param_int32_view_bucket = self.model_param_bucket_and_shard_main_param_int32_view_map[bucket_param_data] + + loops = param_data_dp_numel // TRANSPOSE_BF16_BLOCK_SIZE + remain = param_data_dp_numel % TRANSPOSE_BF16_BLOCK_SIZE + workspace = torch.zeros( + TRANSPOSE_BF16_BLOCK_SIZE * 2, dtype=torch.bfloat16, device=bucket_res.device) + residual_space = bucket_res + bf16_space_dp_rank = max(1, data_parallel_rank) + bf16_space = bucket_param_data[param_data_dp_numel * bf16_space_dp_rank :param_data_dp_numel * (bf16_space_dp_rank + 1)] + + for loop in range(loops): + copy_start = loop * TRANSPOSE_BF16_BLOCK_SIZE + copy_end = (loop + 1) * TRANSPOSE_BF16_BLOCK_SIZE + workspace_convert_view = workspace[:TRANSPOSE_BF16_BLOCK_SIZE * 2] + workspace[:TRANSPOSE_BF16_BLOCK_SIZE].copy_(residual_space[copy_start: copy_end]) + workspace[TRANSPOSE_BF16_BLOCK_SIZE:TRANSPOSE_BF16_BLOCK_SIZE * 2].copy_(bf16_space[copy_start: copy_end]) + shard_fp32_main_param_view[copy_start * 2: copy_end * 2].copy_( + workspace_convert_view.view(2, -1).transpose(1, 0).reshape(-1).contiguous()) + + if remain > 0: + workspace_convert_view = workspace[:remain * 2] + workspace[:remain].copy_(residual_space[-remain:]) + workspace[remain:remain * 2].copy_(bf16_space[-remain:]) + shard_fp32_main_param_view[-remain * 2:].copy_( + workspace_convert_view.view(2, -1).transpose(1, 0).reshape(-1).contiguous()) + + if not self.first_sub_flag and not npu_deterministic: + shard_main_param_int32_view_bucket[:param_data_dp_numel].sub_(32768) + + if not self.first_sub_flag and npu_deterministic: + fp16_tensor_convert_to_fp32_tensor_deterministic(self.shard_fp32_from_float16_groups, self.optimizer) + + +def fp32_tensor_convert_to_fp16_tensor(self): + """ + fp32(0p0p0p0p) -> fp32(0'p0'p0'p0'p) -> res(0000) + bf16(pppp) + + Transform the fp32 tensor in the continuous memory block + into the bf16 data and residual through view transposition. + """ + data_parallel_world_size = torch.distributed.get_world_size(self.data_parallel_group) + data_parallel_rank = torch.distributed.get_rank(self.data_parallel_group_gloo) + npu_deterministic = getattr(get_args(), "npu_deterministic", False) + if data_parallel_world_size == 1: + if npu_deterministic: + fp32_tensor_convert_to_fp16_tensor_deterministic(self.shard_fp32_from_float16_groups, self.optimizer) + else: + for shard_res_and_buffer_model_param in self.shard_main_param_res_buffers: + shard_main_param_int32_view_buffer = self.model_param_bucket_and_shard_main_param_int32_view_map[shard_res_and_buffer_model_param] + shard_main_param_int32_view_buffer.add_(32768) + self.first_sub_flag = False + + for shard_fp32_param_fp16_view in self.shard_fp32_param_fp16_view_group: + shard_fp32_param_fp16_view.copy_( + shard_fp32_param_fp16_view.view(-1, 2).transpose(1, 0).reshape(-1).contiguous()) + else: + if npu_deterministic: + fp32_tensor_convert_to_fp16_tensor_deterministic(self.shard_fp32_from_float16_groups, self.optimizer) + else: + for buffer in self.buffers: + for bucket in buffer.buckets: + bucket_param_data = bucket.param_data + param_data_dp_numel = bucket_param_data.numel() // data_parallel_world_size + shard_main_param_int32_view_bucket = self.model_param_bucket_and_shard_main_param_int32_view_map[bucket_param_data] + shard_main_param_int32_view_bucket[:param_data_dp_numel].add_(32768) + + for buffer in self.buffers: + for bucket in buffer.buckets: + self.first_sub_flag = False + bucket_param_data = bucket.param_data + param_data_dp_numel = bucket_param_data.numel() // data_parallel_world_size + bucket_res = self.model_param_bucket_and_res_map[bucket_param_data] + bucket_res_position = max(0, data_parallel_rank - 1) * param_data_dp_numel + shard_fp32_main_param_view = bucket_param_data[bucket_res_position: bucket_res_position + param_data_dp_numel * 2] + + loops = param_data_dp_numel // TRANSPOSE_BF16_BLOCK_SIZE + remain = param_data_dp_numel % TRANSPOSE_BF16_BLOCK_SIZE + workspace = torch.zeros( + TRANSPOSE_BF16_BLOCK_SIZE * 2, dtype=torch.bfloat16, device=bucket_res.device) + bf16_space_dp_rank = max(0, data_parallel_rank - 1) + residual_space = bucket_res + bf16_space = bucket_param_data[ + param_data_dp_numel * bf16_space_dp_rank :param_data_dp_numel * (bf16_space_dp_rank + 1)] + + for loop in range(loops): + workspace_convert_view = workspace[:TRANSPOSE_BF16_BLOCK_SIZE * 2] + workspace_convert_view.copy_( + shard_fp32_main_param_view[loop * TRANSPOSE_BF16_BLOCK_SIZE * 2: (loop + 1) * TRANSPOSE_BF16_BLOCK_SIZE * 2]) + temp = workspace_convert_view.view(-1, 2).transpose(1, 0).reshape(-1).contiguous() + residual_space[loop * TRANSPOSE_BF16_BLOCK_SIZE: (loop + 1) * TRANSPOSE_BF16_BLOCK_SIZE].copy_( + temp[:TRANSPOSE_BF16_BLOCK_SIZE]) + bf16_space[loop * TRANSPOSE_BF16_BLOCK_SIZE: (loop + 1) * TRANSPOSE_BF16_BLOCK_SIZE].copy_( + temp[TRANSPOSE_BF16_BLOCK_SIZE: TRANSPOSE_BF16_BLOCK_SIZE * 2]) + + if remain > 0: + workspace_convert_view = workspace[:remain * 2] + workspace_convert_view.copy_(shard_fp32_main_param_view[-remain * 2:]) + temp = workspace_convert_view.view(-1, 2).transpose(1, 0).reshape(-1).contiguous() + residual_space[-remain:].copy_(temp[:remain]) + bf16_space[-remain:].copy_(temp[remain: remain * 2]) + + if data_parallel_rank != 0: + shard_fp32_main_param_view[param_data_dp_numel:param_data_dp_numel * 2].copy_(shard_fp32_main_param_view[:param_data_dp_numel]) + + +def fp16_tensor_convert_to_fp32_tensor_deterministic(shard_fp32_from_float16_groups, optimizer): + assert hasattr(optimizer, "state") + for shard_fp32_from_float16_group in shard_fp32_from_float16_groups: + for shard_fp32_param in shard_fp32_from_float16_group: + if "exp_avg_sq" not in optimizer.state[shard_fp32_param]: + continue + shard_int32_tensor = shard_fp32_param.view(torch.int32) + assert shard_int32_tensor.numel() == shard_fp32_param.numel() + loops = shard_int32_tensor.numel() // TRANSPOSE_BF16_BLOCK_SIZE + remain = shard_int32_tensor.numel() % TRANSPOSE_BF16_BLOCK_SIZE + exp_avg_sq_flatten = optimizer.state[shard_fp32_param]["exp_avg_sq"].reshape(-1) + for loop in range(loops): + odd_even_tensor = torch.sign(exp_avg_sq_flatten[loop * TRANSPOSE_BF16_BLOCK_SIZE: (loop + 1) * TRANSPOSE_BF16_BLOCK_SIZE] > 0) + shard_int32_tensor[loop * TRANSPOSE_BF16_BLOCK_SIZE: (loop + 1) * TRANSPOSE_BF16_BLOCK_SIZE].add_(odd_even_tensor) + if remain > 0: + odd_even_tensor = torch.sign(exp_avg_sq_flatten[-remain:] > 0) + shard_int32_tensor[-remain:].add_(odd_even_tensor) + shard_int32_tensor.sub_(32768) + optimizer.state[shard_fp32_param]["exp_avg_sq"].abs_() + + +def fp32_tensor_convert_to_fp16_tensor_deterministic(shard_fp32_from_float16_groups, optimizer): + assert hasattr(optimizer, "state") + for shard_fp32_from_float16_group in shard_fp32_from_float16_groups: + for shard_fp32_param in shard_fp32_from_float16_group: + if "exp_avg_sq" not in optimizer.state[shard_fp32_param]: + continue + shard_int32_tensor = shard_fp32_param.view(torch.int32) + assert shard_int32_tensor.numel() == shard_fp32_param.numel() + loops = shard_int32_tensor.numel() // TRANSPOSE_BF16_BLOCK_SIZE + remain = shard_int32_tensor.numel() % TRANSPOSE_BF16_BLOCK_SIZE + exp_avg_sq_flatten = optimizer.state[shard_fp32_param]["exp_avg_sq"].reshape(-1) + shard_int32_tensor.add_(32768) + for loop in range(loops): + odd_even_tensor = ((shard_int32_tensor[loop * TRANSPOSE_BF16_BLOCK_SIZE: (loop + 1) * TRANSPOSE_BF16_BLOCK_SIZE] & 131071) == 65536).int() + shard_int32_tensor[loop * TRANSPOSE_BF16_BLOCK_SIZE: (loop + 1) * TRANSPOSE_BF16_BLOCK_SIZE].sub_(odd_even_tensor) + sign_tensor = torch.sign(odd_even_tensor - 0.5) + exp_avg_sq_flatten[loop * TRANSPOSE_BF16_BLOCK_SIZE: (loop + 1) * TRANSPOSE_BF16_BLOCK_SIZE].mul_(sign_tensor) + if remain > 0: + odd_even_tensor = ((shard_int32_tensor[-remain:] & 131071) == 65536).int() + shard_int32_tensor[-remain:].sub_(odd_even_tensor) + sign_tensor = torch.sign(odd_even_tensor - 0.5) + exp_avg_sq_flatten[-remain:].mul_(sign_tensor) + + +def get_parameter_state_dp_zero_hccl(self): + """ + Replace the communication method of gather from gloo to hccl. + """ + + # Data parallelism variables. + data_parallel_world_size = self.data_parallel_group.size() + data_parallel_rank = torch.distributed.get_rank(self.data_parallel_group) + data_parallel_group = self.data_parallel_group + + # Collect param states. + state = { + "buckets_coalesced": True, + } + for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges): + + # Iterate grad buffers (by data type). + dtype_state = {} + assert len(gbuf_range_maps) == 1, "single dtype supported, for now." + for dtype, gbuf_range_map_for_all_buckets in gbuf_range_maps.items(): + buffer_numel_unpadded = self.buffers[gbuf_idx].numel_unpadded + # Create coalesced tensors for all state related to parameters in this buffer. + world_tensors = {} + if data_parallel_rank == 0: + world_tensors = { + key: torch.zeros( + (buffer_numel_unpadded,), dtype=torch.float32, device="cpu" + ) + for key in ("param", "exp_avg", "exp_avg_sq") + } + world_tensors["numel_unpadded"] = buffer_numel_unpadded + offset_in_world_tensors = 0 + for bucket_idx, gbuf_range_map in enumerate(gbuf_range_map_for_all_buckets): + + # Compute local DP contiguous shard's size. + gbuf_world_numel = self.buffers[gbuf_idx].buckets[bucket_idx].grad_data.numel() + assert gbuf_world_numel % data_parallel_world_size == 0 + gbuf_local_numel = gbuf_world_numel // data_parallel_world_size + + gbuf_world_numel_unpadded = ( + self.buffers[gbuf_idx].buckets[bucket_idx].numel_unpadded + ) + assert gbuf_world_numel_unpadded <= gbuf_world_numel + + local_shards = { + key: torch.zeros((gbuf_local_numel,), dtype=torch.float32, device="cpu") + for key in ("param", "exp_avg", "exp_avg_sq") + } + + # Build contiguous DP rank shards (for param + optim states). + for model_param, param_range_map in gbuf_range_map["param_map"].items(): + + # Main param & optimizer states. + group_index, group_order = self.model_param_group_index_map[model_param] + main_param = self.optimizer.param_groups[group_index]["params"][group_order] + optim_state = self.optimizer.state[main_param] + + tensors = { + "param": main_param, + **optim_state, + } + + # Copy states into contiguous shard. + gbuf_local_start = param_range_map["gbuf_local"].start + gbuf_local_end = param_range_map["gbuf_local"].end + for key in local_shards: + local_shards[key][gbuf_local_start:gbuf_local_end].data.copy_( + tensors[key].detach().cpu() + ) + + # Gather contiguous shards on DP rank 0. + for key, send_tensor in local_shards.items(): + + # Gather tensor list. + recv_tensors = [ + torch.zeros((gbuf_local_numel,), dtype=torch.float32, device="cpu") + for _ in range(data_parallel_world_size) + ] + + # Gather. + from mindspeed.utils import _gather_hccl + _gather_hccl( + send_tensor, + recv_tensors, + data_parallel_group, + ) + + # Concatenate. + if data_parallel_rank == 0: + recv_tensors_concatenated = torch.cat(recv_tensors) + # Copy this bucket's collected all-gather tensors into the right place in the + # tensor for the buffer. The tensor for the buffer gets rid of the padding + # between buckets. + start = offset_in_world_tensors + end = offset_in_world_tensors + gbuf_world_numel_unpadded + world_tensors[key][start:end].copy_( + recv_tensors_concatenated[:gbuf_world_numel_unpadded] + ) + + offset_in_world_tensors += gbuf_world_numel_unpadded + + # Collect world state. + dtype_state[dtype] = world_tensors + state[gbuf_idx] = dtype_state + + return state + + +def load_parameter_state_from_dp_zero_hccl(self, state_dict): + """Load parameter state (i.e., parameter & optimizer tensors) from DP 0 rank, + using the new checkpoint format with coalesced state across buckets. + + This method performs the reverse of get_parameter_state_dp_zero(): + - Scatter contiguous buffers from DP rank 0 to each DP rank (each DP + rank receives its relevant subset of the world buffers). + - For each DP rank, copy param & optimizer shards from contiguous CPU + buffers. (e.g., one buffer each for main_param, exp_avg, and + exp_avg_sq). + """ + + # Data parallelism variables. + data_parallel_world_size = self.data_parallel_group.size() + data_parallel_rank = torch.distributed.get_rank(self.data_parallel_group) + data_parallel_group = self.data_parallel_group + data_parallel_global_ranks = torch.distributed.get_process_group_ranks( + self.data_parallel_group + ) + + # Scatter tensors to all DP ranks. + for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges): + for dtype, gbuf_range_map_for_all_buckets in gbuf_range_maps.items(): + if data_parallel_rank == 0: + buffer_numel_unpadded = self.buffers[gbuf_idx].numel_unpadded + checkpoint_numel_unpadded = state_dict[gbuf_idx][dtype]["numel_unpadded"] + assert buffer_numel_unpadded == checkpoint_numel_unpadded, ( + f"Number of unpadded elements must be same in current run " + f"({buffer_numel_unpadded}) and checkpoint ({checkpoint_numel_unpadded})" + ) + for key in ("param", "exp_avg", "exp_avg_sq"): + offset_in_world_tensors = 0 + for bucket_idx, gbuf_range_map in enumerate(gbuf_range_map_for_all_buckets): + # Compute local DP contiguous shard's size. + gbuf_world_numel = ( + self.buffers[gbuf_idx].buckets[bucket_idx].grad_data.numel() + ) + assert gbuf_world_numel % data_parallel_world_size == 0 + gbuf_local_numel = gbuf_world_numel // data_parallel_world_size + gbuf_world_numel_unpadded = ( + self.buffers[gbuf_idx].buckets[bucket_idx].numel_unpadded + ) + assert gbuf_world_numel_unpadded <= gbuf_world_numel + + # Contiguous local shards (received from DP rank 0). + recv_tensor = torch.zeros( + (gbuf_local_numel,), dtype=torch.float32, device="cpu" + ) + + # Scatter tensor list. + if data_parallel_rank == 0: + world_tensors = state_dict[gbuf_idx][dtype][key] + + start = offset_in_world_tensors + end = offset_in_world_tensors + gbuf_world_numel_unpadded + assert 0 <= start < end <= world_tensors.numel() + world_tensor = world_tensors[start:end] + offset_in_world_tensors += gbuf_world_numel_unpadded + + # Pad world_tensor to gbuf_world_numel. Don't pad at the front, pad at the back. + world_tensor = torch.nn.functional.pad( + world_tensor, (0, gbuf_world_numel - gbuf_world_numel_unpadded) + ) + assert world_tensor.numel() == gbuf_world_numel + gbuf_start_idxs = list(range(0, gbuf_world_numel, gbuf_local_numel)) + send_tensors = [ + world_tensor[i: (i + gbuf_local_numel)] for i in gbuf_start_idxs + ] + else: + send_tensors = None + + # Scatter. + from mindspeed.utils import _scatter_hccl + _scatter_hccl( + recv_tensor, + send_tensors, + data_parallel_global_ranks[0], + data_parallel_group) + + # Copy local contiguous shards to param/optim shards. + for model_param, param_range_map in gbuf_range_map["param_map"].items(): + + # Main param & optimizer states. + group_index, group_order = self.model_param_group_index_map[model_param] + main_param = self.optimizer.param_groups[group_index]["params"][ + group_order + ] + if key == "param": + tensor_to_copy_into = main_param + else: + optim_state = self.optimizer.state[main_param] + tensor_to_copy_into = optim_state[key] + + # Copy states into contiguous shard. + gbuf_local_start = param_range_map["gbuf_local"].start + gbuf_local_end = param_range_map["gbuf_local"].end + tensor_to_copy_into.data.copy_( + recv_tensor[gbuf_local_start:gbuf_local_end] + ) diff --git a/model/train/yoco_moe/mindspeed/optimizer/ema_adamw.py b/model/train/yoco_moe/mindspeed/optimizer/ema_adamw.py new file mode 100644 index 000000000..c23429e31 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/optimizer/ema_adamw.py @@ -0,0 +1,200 @@ +from typing import List, Optional +import torch +from torch import Tensor +from torch.optim.optimizer import Optimizer +from mindspeed.ops.npu_apply_fused_ema_adamw import npu_apply_fused_ema_adamw + + +def fused_ema_adamw(grad: List[Tensor], + var: List[Tensor], + m: List[Tensor], + v: List[Tensor], + s: List[Tensor], + step: Tensor, + lr: float, + ema_decay: float, + beta1: float, + beta2: float, + eps: float, + mode: int, + bias_correction: bool, + weight_decay: float): + for i, param in enumerate(var): + g_ref = grad[i] + m_ref = m[i] + v_ref = v[i] + s_ref = s[i] + param.data, m_ref, v_ref, s_ref = npu_apply_fused_ema_adamw(g_ref, + param.data, + m_ref, + v_ref, + s_ref, + step, + lr, + ema_decay, + beta1, + beta2, + eps, + mode, + bias_correction, + weight_decay) + + +class FusedEmaAdamW(Optimizer): + def __init__(self, + params, + lr=1e-3, + eps=1e-8, + betas=(0.9, 0.999), + weight_decay=1e-2, + ema_decay=0.9999, + amsgrad=False, + *, + maximize=False, + use_num_updates=True, + bias_correction=True, + adam_w_mode=True, + set_grad_none=True + ): + + if amsgrad: + raise RuntimeError('ema_adamw does not support the AMSGrad variant.') + if maximize: + raise RuntimeError('ema_adamw does not support the maximize variant.') + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) + if not 0.0 <= betas[1] < 1.0: + raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) + if not 0.0 <= weight_decay: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + defaults = dict(lr=lr, + bias_correction=bias_correction, + betas=betas, + eps=eps, + weight_decay=weight_decay, + amsgrad=amsgrad, + maximize=maximize) + super(FusedEmaAdamW, self).__init__(params, defaults) + self.adam_w_mode = 1 if adam_w_mode else 0 + self.set_grad_none = set_grad_none + self.ema_decay = ema_decay + if use_num_updates: + self.num_updates = 0 + else: + self.num_updates = -1 + + def zero_grad(self): + if self.set_grad_none: + for group in self.param_groups: + for p in group['params']: + p.grad = None + else: + super(FusedEmaAdamW, self).zero_grad() + + def copy_to(self): + for group in self.param_groups: + if len(group['params']) == 0: + continue + for p in group['params']: + state = self.state[p] + if 'ema_params' not in state.keys(): + continue + p.data.copy_(state['ema_params'].data) + + def store(self, parameters): + self.collected_params_group = [] + for group in parameters: + if len(group['params']) == 0: + continue + collected_params = [param.detach().cpu().clone() for param in group['params']] + self.collected_params_group.append(collected_params) + + def restore(self, parameters): + for c_group, group in zip(self.collected_params_group, parameters): + if len(group['params']) == 0: + continue + for c_param, param in zip(c_group, group['params']): + param.data.copy_(c_param.data) + del self.collected_params_group + + @torch.no_grad() + def step(self, closure=None): + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + ema_decay = self.ema_decay + if self.num_updates >= 0: + self.num_updates += 1 + ema_decay = min(self.ema_decay, (1 + self.num_updates) / (10 + self.num_updates)) + for group in self.param_groups: + if len(group['params']) == 0: + continue + params_with_grad = [] + grads = [] + exp_avgs = [] + exp_avg_sqs = [] + state_sums = [] + max_exp_avg_sqs = [] + state_steps = [] + amsgrad = group['amsgrad'] + ema_params = [] + beta1, beta2 = group['betas'] + bias_correction = True if group['bias_correction'] else False + valid_dtype = [torch.float32, torch.float16, torch.bfloat16] + + if 'step' in group: + if not group['step'].is_npu: + group['step'] = group['step'].npu() + group['step'] += 1 + else: + group['step'] = torch.tensor([int(1)]).npu() + + for p in group['params']: + if p.grad is None: + continue + params_with_grad.append(p) + if p.dtype not in valid_dtype: + raise RuntimeError('ema_adamw only support fp32, fp16, bf16.') + if p.grad.is_sparse: + raise RuntimeError('AdamW does not support sparse gradients') + grads.append(p.grad) + + state = self.state[p] + + # State initialization + if len(state) == 0: + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + if amsgrad: + # Maintains max of all exp. moving avg. of sq. grad. values + state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + state['ema_params'] = p.data.clone() + exp_avgs.append(state['exp_avg']) + exp_avg_sqs.append(state['exp_avg_sq']) + ema_params.append(state['ema_params']) + + if amsgrad: + max_exp_avg_sqs.append(state['max_exp_avg_sq']) + + fused_ema_adamw(grads, + params_with_grad, + exp_avgs, + exp_avg_sqs, + ema_params, + group['step'], + group['lr'], + ema_decay, + beta1, + beta2, + group['eps'], + self.adam_w_mode, + bias_correction, + group['weight_decay']) + return loss diff --git a/model/train/yoco_moe/mindspeed/optimizer/optimizer.py b/model/train/yoco_moe/mindspeed/optimizer/optimizer.py new file mode 100644 index 000000000..d79bd3e0c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/optimizer/optimizer.py @@ -0,0 +1,332 @@ +import os +import types +from functools import wraps + +import torch +from megatron.training import get_args + + +@torch.no_grad() +def prepare_grads(self) -> bool: + """Pre-processing gradients before the optimizer step, returns whether inf/nan is found.""" + timers = self.config.timers + + # Copy gradients from model params to main params. + if timers is not None: + timers('optimizer-copy-to-main-grad', log_level=1).start( + barrier=self.config.barrier_with_L1_time + ) + self._copy_model_grads_to_main_grads() + if timers is not None: + timers('optimizer-copy-to-main-grad').stop() + + if self.config.reuse_fp32_param: + # bf16 -> fp32 + self.fp16_tensor_convert_to_fp32_tensor() + + # Do unscale, check for inf, and update grad scaler only for + # the case that grad scaler is provided. + if self.grad_scaler: + + # Unscale and check for inf/nan. + if timers is not None: + timers('optimizer-unscale-and-check-inf', log_level=1).start( + barrier=self.config.barrier_with_L1_time + ) + found_inf_flag = self._unscale_main_grads_and_check_for_nan() + if timers is not None: + timers('optimizer-unscale-and-check-inf').stop() + + # We are done with scaling gradients + # so we can update the loss scale. + self.grad_scaler.update(found_inf_flag) + + return found_inf_flag + + return False + + +@torch.no_grad() +def step_with_ready_grads(self) -> bool: + """Step the optimizer with ready gradients, return successful.""" + timers = self.config.timers + # Step the optimizer. + if timers is not None: + timers('optimizer-inner-step', log_level=1).start( + barrier=self.config.barrier_with_L1_time + ) + self.optimizer.step() + if timers is not None: + timers('optimizer-inner-step').stop() + + # Update params from main params. + if timers is not None: + timers('optimizer-copy-main-to-model-params', log_level=1).start( + barrier=self.config.barrier_with_L1_time + ) + if self.config.reuse_fp32_param: + # fp32 -> bf16 + res + self.fp32_tensor_convert_to_fp16_tensor() + else: + self._copy_main_params_to_model_params() + if timers is not None: + timers('optimizer-copy-main-to-model-params').stop() + + return True + + +@torch.no_grad() +def mixed_precision_optimizer_step(self): + # Copy gradients from model params to main params. + timers = self.config.timers + timers('optimizer-copy-to-main-grad', log_level=1).start( + barrier=self.config.barrier_with_L1_time) + self._copy_model_grads_to_main_grads() + timers('optimizer-copy-to-main-grad').stop() + if self.config.reuse_fp32_param: + # bf16 -> fp32 + self.fp16_tensor_convert_to_fp32_tensor() + + # Do unscale, check for inf, and update grad scaler only for + # the case that grad scaler is provided. + if self.grad_scaler: + + # Unscale and check for inf/nan. + timers('optimizer-unscale-and-check-inf', log_level=1).start( + barrier=self.config.barrier_with_L1_time) + found_inf_flag = self._unscale_main_grads_and_check_for_nan() + timers('optimizer-unscale-and-check-inf').stop() + + # We are done with scaling gradients + # so we can update the loss scale. + self.grad_scaler.update(found_inf_flag) + + # If we found inf/nan, skip the update. + if found_inf_flag: + return False, None, None + + # Clip the main gradients. + timers('optimizer-clip-main-grad', log_level=1).start( + barrier=self.config.barrier_with_L1_time) + grad_norm = None + if self.config.clip_grad > 0.0: + grad_norm = self.clip_grad_norm(self.config.clip_grad) + timers('optimizer-clip-main-grad').stop() + + + # Count the zeros in the grads. + timers('optimizer-count-zeros', log_level=1).start( + barrier=self.config.barrier_with_L1_time) + num_zeros_in_grad = self.count_zeros() if \ + self.config.log_num_zeros_in_grad else None + timers('optimizer-count-zeros').stop() + + # Step the optimizer. + timers('optimizer-inner-step', log_level=1).start( + barrier=self.config.barrier_with_L1_time) + self.optimizer.step() + timers('optimizer-inner-step').stop() + + # Update params from main params. + timers('optimizer-copy-main-to-model-params', log_level=1).start( + barrier=self.config.barrier_with_L1_time) + if self.config.reuse_fp32_param: + # fp32 -> bf16 + res + self.fp32_tensor_convert_to_fp16_tensor() + else: + self._copy_main_params_to_model_params() + timers('optimizer-copy-main-to-model-params').stop() + + # Successful update. + return True, grad_norm, num_zeros_in_grad + + +def optimizer_config_init_wrapper(init_func): + @wraps(init_func) + def optimizer_config_init(*args, **kwargs): + init_func(*args, **kwargs) + self = args[0] + args = get_args() + self.reuse_fp32_param = args.reuse_fp32_param if hasattr(args, "reuse_fp32_param") else False + + return optimizer_config_init + + +def get_megatron_optimizer_func_wrapper(func): + @wraps(func) + def get_megatron_optimizer_func(*args, **kwargs): + chained_optimizer = func(*args, **kwargs) + args = get_args() + if hasattr(chained_optimizer, "chained_optimizers"): + for optim in chained_optimizer.chained_optimizers: + optim.optimizer.ema_decay = args.ema_decay + return chained_optimizer + if hasattr(chained_optimizer, "optimizer"): + chained_optimizer.optimizer.ema_decay = args.ema_decay + return chained_optimizer + return chained_optimizer + + return get_megatron_optimizer_func + + +def reuse_fp32_param_init_wrapper(init_func): + @wraps(init_func) + def reuse_fp32_param_init(*args, **kwargs): + init_func(*args, **kwargs) + self = args[0] + args = get_args() + self.reuse_fp32_param = args.reuse_fp32_param if hasattr(args, "reuse_fp32_param") else False + if self.reuse_fp32_param: + self.res_float16_groups = [] + self.float16_float32_groups = [] + self.int32_float32_groups = [] + for float16_params_this_group, fp32_from_float16_group in zip(self.float16_groups, self.fp32_from_float16_groups): + res_float16_params_this_group = [] + float16_float32_params_this_group = [] + int32_float32_params_this_group = [] + for i, (_, fp32_from_fp16_param) in enumerate(zip(float16_params_this_group, fp32_from_float16_group)): + res_float16_params_this_group.append( + torch.empty((fp32_from_fp16_param.numel() * 1), dtype=torch.bfloat16, device=fp32_from_fp16_param.device)) + float16_float32_params_this_group.append( + torch.empty((fp32_from_fp16_param.numel() * 2), dtype=torch.bfloat16, device=fp32_from_fp16_param.device)) + int32_float32_params_this_group.append( + torch.empty((fp32_from_fp16_param.numel() * 1), dtype=torch.int32, device=fp32_from_fp16_param.device)) + init_and_reuse_storage_of_tensors(fp32_from_float16_group[i], + float16_float32_params_this_group[-1], + res_float16_params_this_group[-1], + float16_params_this_group[i], + int32_float32_params_this_group[-1] + ) + self.res_float16_groups.append(res_float16_params_this_group) + self.float16_float32_groups.append(float16_float32_params_this_group) + self.int32_float32_groups.append(int32_float32_params_this_group) + self._copy_model_params_to_main_params = _copy_model_params_to_main_params + if args.npu_deterministic: + self.fp16_tensor_convert_to_fp32_tensor = types.MethodType(fp16_tensor_convert_to_fp32_tensor_deterministic, self) + self.fp32_tensor_convert_to_fp16_tensor = types.MethodType(fp32_tensor_convert_to_fp16_tensor_deterministic, self) + else: + self.fp16_tensor_convert_to_fp32_tensor = types.MethodType(fp16_tensor_convert_to_fp32_tensor, self) + self.fp32_tensor_convert_to_fp16_tensor = types.MethodType(fp32_tensor_convert_to_fp16_tensor, self) + return reuse_fp32_param_init + + +def _copy_model_params_to_main_params(): + pass + + +def init_and_reuse_storage_of_tensors( + fp32_tensor, + bf16_fp32_tensor, + res_tensor, + bf16_tensor, + int32_tensor +): + """ + init a list of tensor with length of 2*fp32_tensor.numel() in bf16 to share the same storage. + Args: + fp32_tensor: original fp32 tensor. + bf16_fp32_tensor: a bf16 tensor share the same storage with original list of fp32 tensors. + res_tensor: a bf16 tensor that store the residual value of fp32 to bf16, shares a half of the + storage with bf16_fp32_tensor. + bf16_tensor: a bf16 tensor that store the value from fp32, shares another half of the + storage with bf16_fp32_tensor. + int32_tensors: a list of int32 tensors share the same storages with original list of fp32 tensors. + """ + from mindspeed.op_builder import AlgorithmOpBuilder + reuse_data_ptr = AlgorithmOpBuilder().load().reuse_data_ptr + reuse_data_ptr(bf16_fp32_tensor, fp32_tensor, 0) + reuse_data_ptr(int32_tensor, fp32_tensor, 0) + fp32_tensors_to_bf16_tensors([int32_tensor], [bf16_fp32_tensor]) + reuse_data_ptr(res_tensor, bf16_fp32_tensor, 0) + reuse_data_ptr(bf16_tensor, bf16_fp32_tensor, res_tensor.numel()) + + +def fp16_tensor_convert_to_fp32_tensor(self): + for int32_float32_group, float16_param_group in zip( + self.int32_float32_groups, self.float16_float32_groups): + bf16_tensors_to_fp32_tensors(int32_float32_group, float16_param_group) + + +def fp32_tensor_convert_to_fp16_tensor(self): + for int32_float32_param_group, float16_param_group in zip( + self.int32_float32_groups, self.float16_float32_groups): + fp32_tensors_to_bf16_tensors(int32_float32_param_group, float16_param_group) + + +def fp32_tensors_to_bf16_tensors(int32_tensors, bf16_fp32_tensors): + """ + fp32(0p0p0p0p) -> bf16(pppp) + res(0000) + rearrange the storage of bf16_fp32_tensors so that recover the fp32_tensors. + Args: + int32_tensors: a list of int32 tensors share the same storages with original list of fp32 tensors. + bf16_fp32_tensors: a list of bf16 tensors share the same storages with original list of fp32 tensors. + Returns: + None + """ + for int32_tensor, bf16_fp32_tensor in zip(int32_tensors, bf16_fp32_tensors): + if bf16_fp32_tensor.numel() == 0: + return + int32_tensor.add_(32768) + bf16_fp32_tensor.copy_(bf16_fp32_tensor.view(-1, 2).transpose(1, 0).reshape(-1).contiguous()) + + +def bf16_tensors_to_fp32_tensors(int32_tensors, bf16_fp32_tensors): + """ + res(0000) + bf16(pppp) -> fp32(0p0p0p0p) + rearrange the storage of bf16_fp32_tensors so that recover the fp32_tensors. + Args: + int32_tensors: a list of int32 tensors share the same storages with original list of fp32 tensors. + bf16_fp32_tensors: a list of bf16 tensors share the same storages with original list of fp32 tensors. + Returns: + None + """ + for int32_tensor, bf16_fp32_tensor in zip(int32_tensors, bf16_fp32_tensors): + if bf16_fp32_tensor.numel() == 0: + return + bf16_fp32_tensor.copy_(bf16_fp32_tensor.view(2, -1).transpose(1, 0).reshape(-1).contiguous()) + int32_tensor.sub_(32768) + + +def fp16_tensor_convert_to_fp32_tensor_deterministic(self): + for int32_float32_group, float16_param_group, fp32_from_float16_group in zip( + self.int32_float32_groups, self.float16_float32_groups, self.fp32_from_float16_groups): + bf16_tensors_to_fp32_tensors_deterministic(int32_float32_group, float16_param_group, fp32_from_float16_group, self.optimizer) + + +def fp32_tensor_convert_to_fp16_tensor_deterministic(self): + for int32_float32_param_group, float16_param_group, fp32_from_float16_group in zip( + self.int32_float32_groups, self.float16_float32_groups, self.fp32_from_float16_groups): + fp32_tensors_to_bf16_tensors_deterministic(int32_float32_param_group, float16_param_group, fp32_from_float16_group, self.optimizer) + + +def fp32_tensors_to_bf16_tensors_deterministic(int32_tensors, bf16_fp32_tensors, fp32_tensors, optimizer): + for int32_tensor, bf16_fp32_tensor, fp32_tensor in zip(int32_tensors, bf16_fp32_tensors, fp32_tensors): + if bf16_fp32_tensor.numel() == 0: + return + odd_even_tensor = ((int32_tensor & 131071) == 32768).int() + int32_tensor.add_(32768) + optimizer_exp_avg_save_sign(optimizer, fp32_tensor, int32_tensor, odd_even_tensor) + bf16_fp32_tensor.copy_(bf16_fp32_tensor.view(-1, 2).transpose(1, 0).reshape(-1).contiguous()) + + +def bf16_tensors_to_fp32_tensors_deterministic(int32_tensors, bf16_fp32_tensors, fp32_tensors, optimizer): + for int32_tensor, bf16_fp32_tensor, fp32_tensor in zip(int32_tensors, bf16_fp32_tensors, fp32_tensors): + if bf16_fp32_tensor.numel() == 0: + return + bf16_fp32_tensor.copy_(bf16_fp32_tensor.view(2, -1).transpose(1, 0).reshape(-1).contiguous()) + optimizer_exp_avg_load_sign(optimizer, fp32_tensor, int32_tensor) + int32_tensor.sub_(32768) + + +def optimizer_exp_avg_save_sign(optimizer, fp32_param, int32_tensor, odd_even_tensor): + if "exp_avg_sq" in optimizer.state[fp32_param]: + int32_tensor.sub_(odd_even_tensor) + sign_tensor = torch.sign(odd_even_tensor - 0.5).reshape(optimizer.state[fp32_param]["exp_avg_sq"].shape) + optimizer.state[fp32_param]["exp_avg_sq"].mul_(sign_tensor) + + +def optimizer_exp_avg_load_sign(optimizer, fp32_param, int32_tensor): + if "exp_avg_sq" in optimizer.state[fp32_param]: + odd_even_tensor = (torch.sign(optimizer.state[fp32_param]["exp_avg_sq"]) > 0).reshape(-1) + optimizer.state[fp32_param]["exp_avg_sq"].abs_() + int32_tensor.add_(odd_even_tensor) diff --git a/model/train/yoco_moe/mindspeed/patch_utils.py b/model/train/yoco_moe/mindspeed/patch_utils.py new file mode 100644 index 000000000..42ccc841f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/patch_utils.py @@ -0,0 +1,120 @@ +import importlib +import sys +import types + + +def get_func_name(func): + if isinstance(func, str): + return func + return '.'.join((func.__module__, func.__qualname__)) + + +def dummy_function_wrapper(func_name): + def dummy_function(*args, **kwargs): + raise RuntimeError('function {} no exist'.format(func_name)) + + return dummy_function + + +class Patch: + def __init__(self, orig_func_name, new_func, create_dummy): + split_name = orig_func_name.rsplit('.', 1) + if len(split_name) == 1: + self.orig_module_name, self.orig_func_name = orig_func_name, None + else: + self.orig_module_name, self.orig_func_name = split_name + self.orig_module = None + self.orig_func = None + + self.patch_func = None + self.wrappers = [] + if new_func is None: + new_func = dummy_function_wrapper(orig_func_name) + self.set_patch_func(new_func) + self.is_applied = False + self.create_dummy = create_dummy + + @property + def orig_func_id(self): + return id(self.orig_func) + + @property + def patch_func_id(self): + return id(self.patch_func) + + def set_patch_func(self, new_func, force_patch=False): + if hasattr(new_func, '__name__') and new_func.__name__.endswith(('wrapper', 'decorator')): + self.wrappers.append(new_func) + else: + if self.patch_func and not force_patch: + raise RuntimeError('the patch of {} exist !'.format(self.orig_func_name)) + self.patch_func = new_func + self.is_applied = False + + def apply_patch(self): + if self.is_applied: + return + + self.orig_module, self.orig_func = Patch.parse_path(self.orig_module_name, self.orig_func_name, self.create_dummy) + + final_patch_func = self.orig_func + if self.patch_func is not None: + final_patch_func = self.patch_func + + for wrapper in self.wrappers: + final_patch_func = wrapper(final_patch_func) + + if self.orig_func_name is not None: + setattr(self.orig_module, self.orig_func_name, final_patch_func) + for key, value in sys.modules.copy().items(): + if self.orig_func_name is not None and hasattr(value, self.orig_func_name) \ + and id(getattr(value, self.orig_func_name)) == self.orig_func_id: + setattr(value, self.orig_func_name, final_patch_func) + self.is_applied = True + + @staticmethod + def parse_path(module_path, function_name, create_dummy): + from importlib.machinery import ModuleSpec + modules = module_path.split('.') + for i in range(1, len(modules) + 1): + parent = '.'.join(modules[:i - 1]) + path = '.'.join(modules[:i]) + try: + importlib.import_module(path) + except ModuleNotFoundError as e: + if not parent or not hasattr(importlib.import_module(parent), modules[i - 1]): + if not create_dummy: + raise ModuleNotFoundError(e) from e + sys.modules[path] = types.ModuleType(path) + sys.modules[path].__file__ = 'mindspeed.dummy_module.py' + sys.modules[path].__spec__ = ModuleSpec(path, None) + if parent: + setattr(importlib.import_module(parent), modules[i - 1], sys.modules[path]) + else: + module = getattr(importlib.import_module(parent), modules[i - 1]) + if hasattr(module, function_name): + return module, getattr(module, function_name) + elif create_dummy: + return module, dummy_function_wrapper(function_name) + else: + raise RuntimeError('no exist {} of {}'.format(function_name, module)) + + if function_name is not None and not hasattr(sys.modules[module_path], function_name): + setattr(sys.modules[module_path], function_name, None) + return sys.modules[module_path], getattr(sys.modules[module_path], function_name) if function_name is not None else None + + +class MindSpeedPatchesManager: + patches_info = {} + + @staticmethod + def register_patch(orig_func_name, new_func=None, force_patch=False, create_dummy=False): + if orig_func_name not in MindSpeedPatchesManager.patches_info: + MindSpeedPatchesManager.patches_info[orig_func_name] = Patch(orig_func_name, new_func, create_dummy) + else: + MindSpeedPatchesManager.patches_info.get(orig_func_name).set_patch_func(new_func, force_patch) + + @staticmethod + def apply_patches(): + for patch in MindSpeedPatchesManager.patches_info.values(): + patch.apply_patch() diff --git a/model/train/yoco_moe/mindspeed/run/gpt_dataset.patch b/model/train/yoco_moe/mindspeed/run/gpt_dataset.patch new file mode 100644 index 000000000..f7287f77e --- /dev/null +++ b/model/train/yoco_moe/mindspeed/run/gpt_dataset.patch @@ -0,0 +1,17 @@ +diff --git a/megatron/core/datasets/gpt_dataset.py b/megatron/core/datasets/gpt_dataset.py +index a645f89..7de00b7 100644 +--- a/megatron/core/datasets/gpt_dataset.py ++++ b/megatron/core/datasets/gpt_dataset.py +@@ -340,9 +340,11 @@ class GPTDataset(MegatronDataset): + else: + cache_hit = False + ++ from megatron.training import get_args ++ args = get_args() + if not path_to_cache or ( + not cache_hit +- and (not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0) ++ and (not torch.distributed.is_initialized() or torch.distributed.get_rank() % args.tensor_model_parallel_size == 0) + ): + + log_single_rank( diff --git a/model/train/yoco_moe/mindspeed/run/helpers.patch b/model/train/yoco_moe/mindspeed/run/helpers.patch new file mode 100644 index 000000000..c909ddd2c --- /dev/null +++ b/model/train/yoco_moe/mindspeed/run/helpers.patch @@ -0,0 +1,30 @@ +diff --git a/megatron/core/datasets/helpers.cpp b/megatron/core/datasets/helpers.cpp +index 71299996..ee2bc103 100644 +--- a/megatron/core/datasets/helpers.cpp ++++ b/megatron/core/datasets/helpers.cpp +@@ -172,14 +172,14 @@ py::array build_sample_idx(const py::array_t &sizes_, + { + num_samples = ceil(float(num_epochs * tokens_per_epoch - add_extra_token_to_sequence) / seq_length); + } +- int32_t *sample_idx = new int32_t[2 * (num_samples + 1)]; ++ int64_t *sample_idx = new int64_t[2 * (num_samples + 1)]; + + // Index into sample_idx. + int64_t sample_index = 0; + // Index into doc_idx. + int64_t doc_idx_index = 0; + // Begining offset for each document. +- int32_t doc_offset = 0; ++ int64_t doc_offset = 0; + // Start with first document and no offset. + sample_idx[2 * sample_index] = doc_idx_index; + sample_idx[2 * sample_index + 1] = doc_offset; +@@ -232,7 +232,7 @@ py::array build_sample_idx(const py::array_t &sizes_, + delete[] mem; }); + + // Return the numpy array. +- const auto byte_size = sizeof(int32_t); ++ const auto byte_size = sizeof(int64_t); + return py::array(std::vector{num_samples + 1, 2}, // shape + {2 * byte_size, byte_size}, // C-style contiguous strides + sample_idx, // the data pointer diff --git a/model/train/yoco_moe/mindspeed/run/initialize.patch b/model/train/yoco_moe/mindspeed/run/initialize.patch new file mode 100644 index 000000000..e6ac66e0d --- /dev/null +++ b/model/train/yoco_moe/mindspeed/run/initialize.patch @@ -0,0 +1,13 @@ +diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py +index a645f89..f834616 100644 +--- a/megatron/training/initialize.py ++++ b/megatron/training/initialize.py +@@ -108,7 +108,7 @@ def _compile_dependencies(): + # Compile dataset C++ code. + # ========================= + # TODO: move this to ninja +- if torch.distributed.get_rank() == 0: ++ if torch.distributed.get_rank() % args.tensor_model_parallel_size == 0: + start_time = time.time() + print("> compiling dataset index builder ...") + from megatron.core.datasets.utils import compile_helpers diff --git a/model/train/yoco_moe/mindspeed/run/run.py b/model/train/yoco_moe/mindspeed/run/run.py new file mode 100644 index 000000000..a6020475f --- /dev/null +++ b/model/train/yoco_moe/mindspeed/run/run.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 + +""" +``mindspeed `` provides a limited set of the functionality as ``git patch``. + +Uasge +-------- + +1. Using mindspeed patch +++++++++++++++++++++++++++ + +To apply patches +:: + + mindspeed -P + or + mindspeed --patch + +2. Reverse mindspeed patch +++++++++++++++++++++++++++++ + +To reverse patches +:: + + mindspeed -R + or + mindspeed --reverse + + +Add Patches +-------------- + +1. Use ``git diff xx.py > xx.patch`` to generate a single new patch. +2. Place it in the corresponding directory of MindSpeed. +3. It will be automatically found by 'def find_all_patch()' + + +Delete Patches +----------------- + +Delete the patch in the corresponding directory of MindSpeed. + + +Rejects +---------- + +Option '--check' is used before patching. +If there is a reject when checking a patch, it will be passed, recorded and printed at the end. +You need to resolve them manualy. + +""" +import subprocess +import os +from argparse import ArgumentParser + +_RUN_PATH = os.path.dirname(__file__) + + +def find_all_patch(file_dir=None, target_suffix='.patch'): + patch_files = [] + walk_generator = os.walk(file_dir) + for root_path, dirs, files in walk_generator: + if len(files) < 1: + continue + for file in files: + file_name, suffix_name = os.path.splitext(file) + if suffix_name == target_suffix: + patch_files.append(os.path.join(root_path, file)) + return patch_files + + +def get_args_parser() -> ArgumentParser: + '''Helper function parsing the commond line options''' + + parser = ArgumentParser(description="MindSpeed Patch Launcher") + parser.add_argument( + "-P", + "--patch", + action='store_true', + help="Use mindspeed patch") + parser.add_argument( + "-R", + "--reverse", + action='store_true', + help="Reverse mindspeed patch") + return parser + + +def parse_args(args): + parser = get_args_parser() + return parser.parse_args(args) + + +def patch_from_args(args): + rejects = [] + if args.patch: + options = {'name': 'Applying', 'cmd': '-p1'} + elif args.reverse: + options = {'name': 'Reversing', 'cmd': '-R'} + else: + print('Hello MindSpeed') + return + + patch_files = find_all_patch(os.path.dirname(_RUN_PATH)) + + for patch in patch_files: + print('{} patch {}...'.format(options['name'], patch)) + commond = 'git apply --check {} {}'.format(options['cmd'], patch) + check = subprocess.run(commond.split(), capture_output=True, text=True) + if check.stderr: + rejects.append(patch) + print('{} patch failed. Please check: {}'.format(options['name'], check.stderr)) + else: + commond = 'git apply {} {}'.format(options['cmd'], patch) + process = subprocess.run(commond.split(), capture_output=True, text=True) + print(process.stdout) + if rejects: + print('Here are some rejects needed to resolve: {}'.format(rejects)) + + +def main(args=None): + print('MindSpeedRun Path is {}'.format(_RUN_PATH)) + args = parse_args(args) + patch_from_args(args) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/tokenizer/__init__.py b/model/train/yoco_moe/mindspeed/tokenizer/__init__.py new file mode 100644 index 000000000..2218395a0 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/tokenizer/__init__.py @@ -0,0 +1,17 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .tokenizer import build_tokenizer_wrapper \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/tokenizer/tokenizer.py b/model/train/yoco_moe/mindspeed/tokenizer/tokenizer.py new file mode 100644 index 000000000..46d24c0e2 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/tokenizer/tokenizer.py @@ -0,0 +1,156 @@ +# coding=utf-8 +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Megatron tokenizers. just using huggingface implementation.""" +from functools import wraps + +from transformers import AutoTokenizer +from megatron.training.tokenizer.tokenizer import _vocab_size_with_padding +from megatron.core.datasets.megatron_tokenizer import MegatronTokenizer + + +def build_tokenizer_wrapper(build_tokenizer): + """Initialize tokenizer.""" + @wraps(build_tokenizer) + def wrapper(args, **kargs): + if args.tokenizer_type == "PretrainedFromHF": + if args.rank == 0: + print(' > building PretrainFromHF tokenizer. Vocab file is un-used, ' + 'loading tokenizer from pre-trained model', flush=True) + + if args.tokenizer_name_or_path is None: + raise ValueError("Missing tokenizer_name_or_path while building PretrainFromHF tokenizer.") + + hf_tokenizer_kwargs = dict() + if hasattr(args, "tokenizer_kwargs") and args.tokenizer_kwargs: + if len(args.tokenizer_kwargs) % 2 != 0: + raise ValueError("The token name and token value must be entered in pairs.") + + for i in range(0, len(args.tokenizer_kwargs), 2): + hf_tokenizer_kwargs[args.tokenizer_kwargs[i]] = \ + args.tokenizer_kwargs[i + 1] + + tokenizer = _AutoTokenizer( + args.tokenizer_name_or_path, + vocab_extra_ids=args.vocab_extra_ids, + model_max_length=args.seq_length, + use_fast=args.tokenizer_not_use_fast, + **hf_tokenizer_kwargs + ) + + # Add vocab size (if not already set from a checkpoint). + if getattr(args, "padded_vocab_size", None) is None: + args.padded_vocab_size = _vocab_size_with_padding(tokenizer.vocab_size, + args) + else: + tokenizer = build_tokenizer(args) + return tokenizer + return wrapper + + +class _AutoTokenizer(MegatronTokenizer): + """AutoTokenizer for Hf Pretrained model loading.""" + + def __init__(self, tokenizer_name_or_path, vocab_extra_ids, model_max_length, use_fast, **kwargs): + name = tokenizer_name_or_path + super().__init__(name) + hf_tokenizer_kwargs = kwargs + if vocab_extra_ids > 0: + hf_tokenizer_kwargs["additional_special_tokens"] = [f"" for _id in range(vocab_extra_ids)] + + hf_tokenizer_kwargs["model_max_length"] = model_max_length + hf_tokenizer_kwargs["use_fast"] = use_fast + hf_tokenizer_kwargs["trust_remote_code"] = False + hf_tokenizer_kwargs["local_files_only"] = True + self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, **hf_tokenizer_kwargs) + if self.tokenizer.pad_token_id is None: + self.tokenizer.pad_token_id = self.tokenizer.eos_token_id + self.encoder = self.tokenizer.get_vocab() + self.decoder = {v: k for k, v in self.encoder.items()} + + @property + def vocab_size(self): + return len(self.tokenizer) # vocab_size doesn't contain additional tokens + + @property + def vocab(self): + return { + **{special_token: self.tokenizer.convert_tokens_to_ids(special_token) + for special_token in self.tokenizer.additional_special_tokens}, + **self.tokenizer.vocab, + } + + @property + def inv_vocab(self): + return {v: k for k, v in self.vocab.items()} + + def tokenize(self, text): + return self.tokenizer.encode(text) + + def detokenize(self, token_ids): + return self.tokenizer.decode(token_ids) + + @property + def eod(self): + return self.eos + + @property + def eos_token_id(self): + return self.tokenizer.eos_token_id + + @property + def cls(self): + candidate = self.tokenizer.cls_token_id + return self._check_token_candidate(candidate) + + @property + def sep(self): + candidate = self.tokenizer.sep_token_id + return self._check_token_candidate(candidate) + + @property + def pad(self): + candidate = self.tokenizer.pad_token_id + + # just use eos_token_id if pad_token_id is not available, it is reasonable + # maybe add a new token, and resize embedding layer is better + if candidate is None: + candidate = self.tokenizer.eos_token_id + return self._check_token_candidate(candidate) + + @property + def mask(self): + candidate = self.tokenizer.mask_token_id + return self._check_token_candidate(candidate) + + @property + def bos(self): + raise NotImplementedError("Missing ") + + @property + def eos(self): + candidate = self.tokenizer.eos_token_id + return self._check_token_candidate(candidate) + + @property + def additional_special_tokens_ids(self): + """ All the additional special tokens you may want to use (list of strings).""" + return self.tokenizer.additional_special_tokens_ids + + @staticmethod + def _check_token_candidate(candidate): + if candidate is None: + raise AttributeError("Token doesn't exist") + return candidate \ No newline at end of file diff --git a/model/train/yoco_moe/mindspeed/training.py b/model/train/yoco_moe/mindspeed/training.py new file mode 100644 index 000000000..302302332 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/training.py @@ -0,0 +1,355 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +import time +from functools import wraps +import os +import torch + +from megatron.core import mpu +from megatron.core.pipeline_parallel import get_forward_backward_func +from megatron.core.utils import get_model_config + +from megatron.training import one_logger_utils + +from megatron.training.checkpointing import save_checkpoint + +from megatron.training.initialize import initialize_megatron +from megatron.training.initialize import set_jit_fusion_options + +from megatron.training.training import append_to_progress_log +from megatron.training.training import setup_model_and_optimizer +from megatron.training.training import build_train_valid_test_data_iterators +from megatron.training.training import train +from megatron.training.training import evaluate_and_print_results +from megatron.training.training import print_datetime +from megatron.core.num_microbatches_calculator import ( + get_current_global_batch_size, + get_num_microbatches, + update_num_microbatches) +from megatron.training.utils import ( + calc_params_l2_norm, + check_adlr_autoresume_termination, + is_last_rank, + print_rank_0, + print_rank_last, + report_memory, + unwrap_model) +from megatron.training.global_vars import ( + get_args, + get_signal_handler, + get_timers, + get_tensorboard_writer, + get_wandb_writer, + get_one_logger) + +from mindspeed.auto_tuning.module.parse.profiling_parse.profiling_node_parse import GatherNodeProfiling + +_TRAIN_START_TIME = time.time() + + +@torch.no_grad() +def update_ema( + ema_model: torch.nn.Module, model: torch.nn.Module, optimizer=None, decay: float = 0.9999 +) -> None: + """ + Step the EMA model towards the current model. + """ + from collections import OrderedDict + ema_params = OrderedDict(ema_model.named_parameters()) + model_params = OrderedDict(model.named_parameters()) + + for name, param in model_params.items(): + if name == "pos_embed": + continue + if param.requires_grad == False: + continue + param_data = param.data + ema_params[name].mul_(decay).add_(param_data, alpha=1 - decay) + + +def train_step(forward_step_func, data_iterator, + model, optimizer, opt_param_scheduler, config): + """Single training step.""" + args = get_args() + timers = get_timers() + + # Set grad to zero. + for model_chunk in model: + model_chunk.zero_grad_buffer() + optimizer.zero_grad() + + # Forward pass. + forward_backward_func = get_forward_backward_func() + losses_reduced = forward_backward_func( + forward_step_func=forward_step_func, + data_iterator=data_iterator, + model=model, + num_microbatches=get_num_microbatches(), + seq_length=args.seq_length, + micro_batch_size=args.micro_batch_size, + decoder_seq_length=args.decoder_seq_length, + forward_only=False) + + # Empty unused memory. + if args.empty_unused_memory_level >= 1: + torch.cuda.empty_cache() + + # Vision gradients. + if getattr(args, 'vision_pretraining', False) and args.vision_pretraining_type == "dino": + unwrapped_model = unwrap_model(model[0]) + unwrapped_model.cancel_gradients_last_layer(args.curr_iteration) + + # Update parameters. + timers('optimizer', log_level=1).start(barrier=args.barrier_with_L1_time) + update_successful, grad_norm, num_zeros_in_grad = optimizer.step() + timers('optimizer').stop() + + if args.use_ema: + unwrapped_model = unwrap_model(model) + for model_chunk in unwrapped_model: + update_ema(model_chunk.ema, model_chunk, optimizer=optimizer) + + + # Vision momentum. + if getattr(args, 'vision_pretraining', False) and args.vision_pretraining_type == "dino": + unwrapped_model = unwrap_model(model[0]) + unwrapped_model.update_momentum(args.curr_iteration) + + # Update learning rate. + if update_successful: + increment = get_num_microbatches() * \ + args.micro_batch_size * \ + args.data_parallel_size + opt_param_scheduler.step(increment=increment) + skipped_iter = 0 + else: + skipped_iter = 1 + + # Empty unused memory. + if args.empty_unused_memory_level >= 2: + torch.cuda.empty_cache() + + if mpu.is_pipeline_last_stage(ignore_virtual=True): + # Average loss across microbatches. + loss_reduced = {} + for key in losses_reduced[0].keys(): + numerator = 0 + denominator = 0 + for x in losses_reduced: + val = x[key] + # there is one dict per microbatch. in new reporting, we average + # over the total number of tokens across the global batch. + if isinstance(val, tuple) or isinstance(val, list): + numerator += val[0] + denominator += val[1] + else: + # legacy behavior. we average over the number of microbatches, + # and so the denominator is 1. + numerator += val + denominator += 1 + loss_reduced[key] = numerator / denominator + return loss_reduced, skipped_iter, grad_norm, num_zeros_in_grad + return {}, skipped_iter, grad_norm, num_zeros_in_grad + + +def pretrain(train_valid_test_dataset_provider, + model_provider, + model_type, + forward_step_func, + process_non_loss_data_func=None, + extra_args_provider=None, + args_defaults={}): + + # Initalize and get arguments, timers, and Tensorboard writer. + initialize_megatron(extra_args_provider=extra_args_provider, + args_defaults=args_defaults) + + if (os.getenv("OOTB_OPTIMIZER_PARSE_ARGS", "FALSE") == "TRUE"): + working_dir = get_args().profile_save_path + from mindspeed.auto_tuning.mindspeed_adaptor import MindSpeedAdaptor + hardware = MindSpeedAdaptor.get_hardware(working_dir=working_dir) + MindSpeedAdaptor.get_model_args(get_args(), hardware, working_dir) + print_rank_0("================OOTB_OPTIMIZER_PARSE_ARGS END EXIT!====================") + + return + + if 'init_func' in args_defaults: + init_func = args_defaults['init_func'] + init_func() + + args = get_args() + timers = get_timers() + + if args.log_progress: + append_to_progress_log("Starting job") + + # Set pytorch JIT layer fusion options and warmup JIT functions. + set_jit_fusion_options() + + # Adjust the startup time so it reflects the largest value. + # This will be closer to what scheduler will see (outside of + # image ... launches. + global _TRAIN_START_TIME + start_time_tensor = torch.npu.FloatTensor([_TRAIN_START_TIME]) + torch.distributed.all_reduce(start_time_tensor, + op=torch.distributed.ReduceOp.MIN) + _TRAIN_START_TIME = start_time_tensor.item() + + app_metrics = {} + app_metrics['app_start_time'] = round(_TRAIN_START_TIME * 1000.0) + app_metrics['app_model_init_start_time'] = round(_TRAIN_START_TIME * 1000.0) + + print_rank_0('time to initialize megatron (seconds): {:.3f}'.format( + time.time() - _TRAIN_START_TIME)) + print_datetime('after megatron is initialized') + app_metrics['app_model_init_finish_time'] = one_logger_utils.get_timestamp_in_ms() + + args = get_args() + timers = get_timers() + + # Track E2E metrics on pretrain start + one_logger_utils.on_pretrain_start() + + # Model, optimizer, and learning rate. + timers('model-and-optimizer-setup', log_level=0).start(barrier=True) + app_metrics['app_build_optimizer_start_time'] = one_logger_utils.get_timestamp_in_ms() + model, optimizer, opt_param_scheduler = setup_model_and_optimizer( + model_provider, model_type) + + timers('model-and-optimizer-setup').stop() + print_datetime('after model, optimizer, and learning rate ' + 'scheduler are built') + app_metrics['app_build_optimizer_finish_time'] = one_logger_utils.get_timestamp_in_ms() + config = get_model_config(model[0]) + + if (os.getenv("OOTB_OPTIMIZER_PARSE_MODEL", "FALSE") == "TRUE"): + output_path = args.profile_save_path + from mindspeed.auto_tuning.mindspeed_adaptor import MindSpeedAdaptor + hardware = MindSpeedAdaptor.get_hardware() + MindSpeedAdaptor.get_model_params(model, mpu.get_pipeline_model_parallel_rank(), hardware, output_path) + print_rank_0("================OOTB_OPTIMIZER_PARSE_MODEL END EXIT!====================") + return + + # Data stuff. + app_metrics['app_build_dataiters_start_time'] = one_logger_utils.get_timestamp_in_ms() + timers('train/valid/test-data-iterators-setup', log_level=0).start( + barrier=True) + if args.virtual_pipeline_model_parallel_size is not None: + train_data_iterator = [] + valid_data_iterator = [] + test_data_iterator = [] + for i in range(len(model)): + mpu.set_virtual_pipeline_model_parallel_rank(i) + iterators = build_train_valid_test_data_iterators( + train_valid_test_dataset_provider) + train_data_iterator.append(iterators[0]) + valid_data_iterator.append(iterators[1]) + test_data_iterator.append(iterators[2]) + else: + train_data_iterator, valid_data_iterator, test_data_iterator \ + = build_train_valid_test_data_iterators( + train_valid_test_dataset_provider) + timers('train/valid/test-data-iterators-setup').stop() + print_datetime('after dataloaders are built') + app_metrics['app_build_dataiters_finish_time'] = one_logger_utils.get_timestamp_in_ms() + + # Track if training is enabled. Can only be done once args.do_train is assigned after dataloader is built. + one_logger_utils.track_config_flags(args.train_iters, args.skip_train, args.do_train, + args.do_valid, args.do_test, args.dataloader_type, + args.retro_project_dir, args.retro_cyclic_train_iters) + + # Print setup timing. + print_rank_0('done with setup ...') + timers.log(['model-and-optimizer-setup', + 'train/valid/test-data-iterators-setup'], barrier=True) + + one_logger = get_one_logger() + one_logger and one_logger.log_metrics(app_metrics) + + # Context used for persisting some state between checkpoint saves. + checkpointing_context = {} + + if not args.skip_train: + print_rank_0('training ...') + + if args.dataloader_type == 'cyclic' and args.retro_project_dir: + assert args.retro_cyclic_train_iters is not None + args.train_iters = args.retro_cyclic_train_iters + print_rank_0("retro cyclic train iters : %d" % args.train_iters) + + iteration = 0 + if args.do_train and args.train_iters > 0: + iteration, num_floating_point_operations_so_far = train( + forward_step_func, + model, optimizer, opt_param_scheduler, + train_data_iterator, valid_data_iterator, + process_non_loss_data_func, config, checkpointing_context) + + print_datetime('after training is done') + + if args.save and iteration != 0 and iteration % args.save_interval != 0: + save_checkpoint(iteration, model, optimizer, opt_param_scheduler, + num_floating_point_operations_so_far, checkpointing_context) + + one_logger and one_logger.log_metrics({ + 'app_train_loop_finish_time': one_logger_utils.get_timestamp_in_ms() + }) + + else: + print_rank_0('skipping training (--skip-train is on) ...') + + iteration = args.iteration + + if args.do_valid: + prefix = f'iteration {iteration} on validation set' + evaluate_and_print_results(prefix, forward_step_func, + valid_data_iterator, model, + iteration, process_non_loss_data_func, config, + verbose=True, write_to_tensorboard=not args.skip_train) + + if args.do_test: + prefix = f'iteration {iteration} on test set' + evaluate_and_print_results(prefix, forward_step_func, + test_data_iterator, model, + iteration, process_non_loss_data_func, config, + verbose=True, write_to_tensorboard=not args.skip_train) + + if os.getenv('OOTB_OPTIMIZER_PROFILING', 'FALSE') == 'TRUE': + # profiling parser + res_dir = args.profile_save_path + cur_rank = torch.distributed.get_rank() + if res_dir and cur_rank % 8 == 0: + GatherNodeProfiling(res_dir).parse_node_pkl(args) + print_datetime('after training is done') + + one_logger and one_logger.log_metrics({ + 'app_finish_time': one_logger_utils.get_timestamp_in_ms() + }) + one_logger_utils.finish() + + +def num_floating_point_wrapper(fn): + @wraps(fn) + def wrapper(args, batch_size): + args.num_layers -= len(args.noop_layers) if isinstance(args.noop_layers, set) else 0 + res = fn(args, batch_size) + args.num_layers += len(args.noop_layers) if isinstance(args.noop_layers, set) else 0 + return res + + return wrapper + + +def get_device_wrapper(func): + @wraps(func) + def wrapper(*args, **kwargs): + backend = torch.distributed.get_backend() + local_rank = args[0] + if backend == 'hccl': + if local_rank is None: + device = torch.device('cuda') + else: + device = torch.device(f'cuda:{local_rank}') + else: + device = func(*args, **kwargs) + return device + return wrapper diff --git a/model/train/yoco_moe/mindspeed/utils.py b/model/train/yoco_moe/mindspeed/utils.py new file mode 100644 index 000000000..7fa025164 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/utils.py @@ -0,0 +1,726 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +import array +import hashlib +import logging +from typing import List +import functools +from functools import wraps +import random +import os +import re +import math +import torch +import torch_npu +import numpy as np +from megatron.core import mpu +from megatron.core import parallel_state +from megatron.training.global_vars import get_args + +from mindspeed.core.parallel_state import (get_context_parallel_for_hybrid_ulysses_world_size, + get_context_parallel_for_hybrid_ulysses_rank, + get_context_parallel_for_hybrid_ring_world_size, + get_context_parallel_for_hybrid_ring_rank) +from mindspeed.core.context_parallel.utils import (set_scheduling_info, + set_remapped_seq_order, + adaptive_reschedule_task, + get_adaptive_cp_mask_list_by_user, + get_adaptive_cp_grid_mask_by_user, + generate_adaptive_cp_mask_list_by_user, + generate_adaptive_cp_grid_mask_by_user) +from mindspeed.core.tensor_parallel_y_union_cp import TensorParallelYUnionCP +from mindspeed.model.transformer import set_attention_mask, get_attention_mask + +logger = logging.getLogger(__name__) + +_ACTUAL_SEQ_LEN = None +_POSITION_IDS = None +_REARRANGE_IDX_TENSOR = None +_KV_INDEX = None +_Q_INDEX = None + + +def generate_rearrange_idx_tensor(tp_y_cp_size): + global _REARRANGE_IDX_TENSOR + if _REARRANGE_IDX_TENSOR is None: + rearrange_index = [] + for i in range(tp_y_cp_size): + rearrange_index.extend([i, 2 * tp_y_cp_size - 1 - i]) + _REARRANGE_IDX_TENSOR = torch.tensor(rearrange_index, device='cpu', pin_memory=True).to(device='npu', non_blocking=True) + return _REARRANGE_IDX_TENSOR + + +def get_actual_seq_len(): + global _ACTUAL_SEQ_LEN + return _ACTUAL_SEQ_LEN + + +def get_kv_index(): + global _KV_INDEX + return _KV_INDEX + + +def get_q_index(): + global _Q_INDEX + return _Q_INDEX + + +def compute_qkv_index(seq_lens): + args = get_args() + if args.attention_mask_type == 'general' or get_ring_degree() == 1: + return None, None + + full_indices = list(range(seq_lens[-1])) + prev_eod_pos = 0 + kv_indices = [] + q_indices = [] + for eod_pos in seq_lens: + mid = (eod_pos + prev_eod_pos) // 2 + kv_indices.extend(full_indices[prev_eod_pos:mid]) + q_indices.extend(full_indices[mid:eod_pos]) + prev_eod_pos = eod_pos + + kv_index = torch.tensor(kv_indices).cuda(non_blocking=True) + q_index = torch.tensor(q_indices).cuda(non_blocking=True) + + return q_index, kv_index + + +def get_ring_degree(): + args = get_args() + cp_size = args.context_parallel_size + if cp_size == 1: + return 1 + + if args.context_parallel_algo == 'megatron_cp_algo': + return cp_size + elif args.context_parallel_algo == 'ulysses_cp_algo': + return 1 + else: + return args.ring_degree + + +def set_actual_seq_len(actual_seq_len): + global _ACTUAL_SEQ_LEN + _ACTUAL_SEQ_LEN = actual_seq_len + + +def get_position_ids(): + global _POSITION_IDS + return _POSITION_IDS + + +def set_position_ids(position_ids): + global _POSITION_IDS + _POSITION_IDS = position_ids + + +def compute_actual_seq_len(seq): + zero_pos = (seq == 0).nonzero()[1:].squeeze(dim=1) + res = zero_pos.tolist() + res.append(len(seq)) + return res + + +@functools.lru_cache(4096) +def print_rank_0_once(message): + if torch.distributed.is_initialized(): + if torch.distributed.get_rank() == 0: + print(message, flush=True) + else: + print(message, flush=True) + + +def get_batch_on_this_cp_rank_wrapper(fn): + @wraps(fn) + def wrapper(batch): + batch = fn(batch) + set_position_ids(batch['position_ids'].transpose(0, 1).contiguous()) + return batch + + return wrapper + + +def get_batch_on_this_cp_rank(batch): + """ Slice batch input along sequence dimension into multiple chunks, + which are parallelized across GPUs in a context parallel group. + """ + + # With causal masking, each token only attends to its prior tokens. Simply split + # sequence into CP chunks can result in severe load imbalance. That's to say, chunks + # at the end of sequence have bigger workload than others. To address this issue, + # we split sequence into 2*CP ranks. Assuming CP=2, we then get 4 chunks, chunk_0 + # and chunk_3 are assigned to GPU0, chunk_1 and chunk_2 are assigned to GPU1, so + # that we can get balanced workload among GPUs in a context parallel group. + from megatron.training import get_args + + args = get_args() + + cp_size = args.context_parallel_size + + if cp_size == 1: + return batch + + tp_y_cp_size = TensorParallelYUnionCP().get_parallel_group_world_size() if args.tp_2d else args.context_parallel_size + if not tp_y_cp_size > 1: + return batch + + cp_expanded_by_2d_tp = args.tp_y > 1 + if args.reset_attention_mask and args.attention_mask_type == 'causal': + batch = _get_batch_on_this_cp_rank_in_megatron_cp_eod_padding(batch, get_actual_seq_len()) + elif args.context_parallel_algo == 'megatron_cp_algo': + if args.attention_mask_type == 'general': + batch = _get_batch_on_this_cp_rank_in_megatron_cp_general(batch) + elif cp_expanded_by_2d_tp: + batch = _get_batch_on_this_tp_y_cp_rank_in_megatron_cp(batch) + else: + batch = _get_batch_on_this_cp_rank_in_megatron_cp(batch) + elif args.context_parallel_algo == 'ulysses_cp_algo': + batch = _get_batch_on_this_cp_rank_in_ulysses_cp(batch) + elif args.context_parallel_algo == 'hybrid_cp_algo': + if args.attention_mask_type == 'general': + batch = _get_batch_on_this_cp_rank_in_hybrid_cp_general(batch) + else: + batch = _get_batch_on_this_cp_rank_in_hybrid_cp(batch) + elif args.context_parallel_algo == 'adaptive_cp_algo': + batch = _get_batch_on_this_cp_rank_in_adaptive_cp(batch) + elif args.context_parallel_algo == 'hybrid_adaptive_cp_algo': + batch = _get_batch_on_this_cp_rank_in_hybrid_adaptive_cp(batch) + return batch + + +def _get_batch_on_this_cp_rank_in_megatron_cp_eod_padding(batch, actual_seq_len): + def get_index(batched_actual_seq_len, cp_size, cp_rank): + full_indices = list(range(len(batched_actual_seq_len) * batched_actual_seq_len[0][-1])) + batched_index = [] + start = 0 + offset = 0 + for actual_seq_len in batched_actual_seq_len: + for end in actual_seq_len: + end = end + offset + chunk_size = (end - start) // (2 * cp_size) + batched_index.extend(full_indices[start + cp_rank * chunk_size : start + (cp_rank + 1) * chunk_size]) + batched_index.extend(full_indices[end - (cp_rank + 1) * chunk_size : end - cp_rank * chunk_size]) + start = end + offset += actual_seq_len[-1] + + return torch.tensor(batched_index, device='npu') + + cp_rank = mpu.get_context_parallel_rank() + cp_size = mpu.get_context_parallel_world_size() + args = get_args() + + actual_seq_len_lst = list(actual_seq_len * get_ring_degree()) + batched_index = batch_index(actual_seq_len_lst, args.seq_length) + index = get_index(batched_index, cp_size, cp_rank) + + for key, val in batch.items(): + if key == 'attention_mask': + continue + if val is not None: + seq_dim = 1 if key != 'attention_mask' else 2 + bsz = val.shape[0] + val = val.view(-1, *val.shape[seq_dim + 1:]) + val = val.index_select(0, index) + val = val.view(bsz, -1, *val.shape[seq_dim + 1:]) + + batch[key] = val + + return batch + + +def _get_batch_on_this_cp_rank_in_megatron_cp(batch): + cp_rank = mpu.get_context_parallel_rank() + cp_size = mpu.get_context_parallel_world_size() + for key, val in batch.items(): + if key == 'attention_mask': + continue + if val is not None: + seq_dim = 1 if key != 'attention_mask' else 2 + val = val.view( + *val.shape[0:seq_dim], + 2 * cp_size, + val.shape[seq_dim] // (2 * cp_size), + *val.shape[(seq_dim + 1):], + ) + index = torch.tensor([cp_rank, (2 * cp_size - cp_rank - 1)], device=val.device) + val = val.index_select(seq_dim, index) + val = val.view(*val.shape[0:seq_dim], -1, *val.shape[(seq_dim + 2):]) + batch[key] = val + + return batch + + +def _get_batch_on_this_cp_rank_in_megatron_cp_general(batch): + cp_rank = mpu.get_context_parallel_rank() + cp_size = mpu.get_context_parallel_world_size() + + attention_mask = get_attention_mask() + if attention_mask is not None: + if len(attention_mask.shape) != 2: + raise AssertionError("The fusion attention operator currently only support 2D attention mask.") + seq_dim = 0 + mask_row = attention_mask.chunk(cp_size, dim=seq_dim)[cp_rank].contiguous() + if get_args().attention_mask_on_cpu: + mask_list = [m.contiguous().npu(non_blocking=True) for m in mask_row.chunk(cp_size, dim=1)] + else: + mask_list = [m.contiguous() for m in mask_row.chunk(cp_size, dim=1)] + batch['attention_mask'] = mask_list + set_attention_mask(mask_list) + + for key, val in batch.items(): + if key != 'attention_mask' and val is not None: + seq_dim = 1 + val = val.chunk(cp_size, dim=seq_dim)[cp_rank].contiguous() + batch[key] = val + + return batch + + +def _get_batch_on_this_cp_rank_in_ulysses_cp(batch): + cp_rank = mpu.get_context_parallel_rank() + cp_size = mpu.get_context_parallel_world_size() + for key, val in batch.items(): + if key == 'attention_mask': + continue + if val is not None: + seq_dim = 1 if key != 'attention_mask' else 2 + val = val.chunk(cp_size, dim=seq_dim)[cp_rank].contiguous() + batch[key] = val + + return batch + + +def _get_batch_on_this_cp_rank_in_hybrid_cp(batch): + u_size = get_context_parallel_for_hybrid_ulysses_world_size() + r_size = get_context_parallel_for_hybrid_ring_world_size() + + u_rank = get_context_parallel_for_hybrid_ulysses_rank() + r_rank = get_context_parallel_for_hybrid_ring_rank() + + for key, val in batch.items(): + if key == 'attention_mask': + continue + if val is not None: + seq_dim = 1 if key != 'attention_mask' else 2 + val = val.view( + *val.shape[0:seq_dim], + 2 * r_size, + val.shape[seq_dim] // (2 * r_size), + *val.shape[(seq_dim + 1):], + ) + index = torch.tensor([r_rank, (2 * r_size - r_rank - 1)], device=val.device) + val = val.index_select(seq_dim, index) + val = val.view(*val.shape[0:seq_dim], -1, *val.shape[(seq_dim + 2):]) + val = val.chunk(u_size, dim=seq_dim)[u_rank].contiguous() + batch[key] = val + + return batch + + +def _get_batch_on_this_cp_rank_in_hybrid_cp_general(batch): + u_size = get_context_parallel_for_hybrid_ulysses_world_size() + r_size = get_context_parallel_for_hybrid_ring_world_size() + + u_rank = get_context_parallel_for_hybrid_ulysses_rank() + r_rank = get_context_parallel_for_hybrid_ring_rank() + + attention_mask = get_attention_mask() + if attention_mask is not None: + if len(attention_mask.shape) != 2: + raise AssertionError("The fusion attention operator currently only support 2D attention mask.") + seq_dim = 0 + mask_row = attention_mask.chunk(r_size, dim=seq_dim)[r_rank].contiguous() + if get_args().attention_mask_on_cpu: + mask_list = [m.contiguous().npu(non_blocking=True) for m in mask_row.chunk(r_size, dim=1)] + else: + mask_list = [m.contiguous() for m in mask_row.chunk(r_size, dim=1)] + batch['attention_mask'] = mask_list + set_attention_mask(mask_list) + + for key, val in batch.items(): + if key != 'attention_mask' and val is not None: + seq_dim = 1 + val = val.chunk(r_size, dim=seq_dim)[r_rank].contiguous() + val = val.chunk(u_size, dim=seq_dim)[u_rank].contiguous() + batch[key] = val + + return batch + + +def _broadcast(item): + if item is not None: + torch.distributed.broadcast(item, mpu.get_tensor_model_parallel_src_rank(), group=mpu.get_tensor_model_parallel_group()) + + +def broadcast_dynamic(item): + if item is not None: + item = item.npu() + item_len = torch.tensor(item.numel(), device=torch.cuda.current_device()) + _broadcast(item_len) + _broadcast(item) + else: + item_len = torch.empty((), dtype=torch.int64, device=torch.cuda.current_device()) + _broadcast(item_len) + item = torch.empty([item_len.item()], dtype=torch.int64, device=torch.cuda.current_device()) + _broadcast(item) + + return item + + +def get_batch_on_this_tp_rank(data_iterator): + from megatron.training import get_args + args = get_args() + + if mpu.get_tensor_model_parallel_rank() == 0: + if data_iterator is not None: + data = next(data_iterator) + else: + data = None + + batch = { + 'tokens': data["tokens"].cuda(non_blocking=True), + 'labels': data["labels"].cuda(non_blocking=True), + 'loss_mask': data["loss_mask"].cuda(non_blocking=True), + 'attention_mask': None if "attention_mask" not in data else data["attention_mask"].cuda(non_blocking=True), + 'position_ids': data["position_ids"].cuda(non_blocking=True) + } + + if args.pipeline_model_parallel_size == 1: + _broadcast(batch['tokens']) + _broadcast(batch['labels']) + _broadcast(batch['loss_mask']) + _broadcast(batch['attention_mask']) + _broadcast(batch['position_ids']) + + elif mpu.is_pipeline_first_stage(): + _broadcast(batch['tokens']) + _broadcast(batch['attention_mask']) + _broadcast(batch['position_ids']) + + elif mpu.is_pipeline_last_stage(): + _broadcast(batch['labels']) + _broadcast(batch['loss_mask']) + _broadcast(batch['attention_mask']) + if args.reset_attention_mask: + _broadcast(batch['position_ids']) + + elif args.reset_attention_mask: + _broadcast(batch['position_ids']) + + if args.reset_attention_mask: + actual_seq_len = broadcast_dynamic(data['actual_seq_len']) + if args.attention_mask_type == 'causal': + actual_seq_len /= get_ring_degree() + set_actual_seq_len(actual_seq_len) + + else: + tokens = torch.empty((args.micro_batch_size, args.seq_length), dtype=torch.int64, device=torch.cuda.current_device()) + labels = torch.empty((args.micro_batch_size, args.seq_length), dtype=torch.int64, device=torch.cuda.current_device()) + loss_mask = torch.empty((args.micro_batch_size, args.seq_length), dtype=torch.float32, device=torch.cuda.current_device()) + if args.create_attention_mask_in_dataloader: + attention_mask = torch.empty( + (args.micro_batch_size, 1, args.seq_length, args.seq_length), dtype=torch.bool, device=torch.cuda.current_device() + ) + else: + attention_mask = None + position_ids = torch.empty((args.micro_batch_size, args.seq_length), dtype=torch.int64, device=torch.cuda.current_device()) + + if args.pipeline_model_parallel_size == 1: + _broadcast(tokens) + _broadcast(labels) + _broadcast(loss_mask) + _broadcast(attention_mask) + _broadcast(position_ids) + + elif mpu.is_pipeline_first_stage(): + labels = None + loss_mask = None + + _broadcast(tokens) + _broadcast(attention_mask) + _broadcast(position_ids) + + elif mpu.is_pipeline_last_stage(): + tokens = None + + _broadcast(labels) + _broadcast(loss_mask) + _broadcast(attention_mask) + if args.reset_attention_mask: + _broadcast(position_ids) + else: + position_ids = None + + elif args.reset_attention_mask: + _broadcast(position_ids) + + batch = { + 'tokens': tokens, + 'labels': labels, + 'loss_mask': loss_mask, + 'attention_mask': attention_mask, + 'position_ids': position_ids + } + + if args.reset_attention_mask: + actual_seq_len = broadcast_dynamic(None) + if args.attention_mask_type == 'causal': + actual_seq_len /= get_ring_degree() + set_actual_seq_len(actual_seq_len) + + return batch + + +def _get_batch_on_this_cp_rank_in_adaptive_cp(batch): + args = get_args() + cp_rank = mpu.get_context_parallel_rank() + cp_size = mpu.get_context_parallel_world_size() + + attention_mask = get_attention_mask() + if args.adaptive_cp_manually_set_mask_list: + remapped_seq_order = list(range(args.seq_length)) + generate_adaptive_cp_grid_mask_by_user(cp_size) + grid_mask = get_adaptive_cp_grid_mask_by_user() + scheduling = adaptive_reschedule_task(grid_mask, cp_size) + generate_adaptive_cp_mask_list_by_user(remapped_seq_order, scheduling, cp_rank, cp_size) + mask_list = get_adaptive_cp_mask_list_by_user() + else: + if attention_mask is None: + raise AssertionError("Do not use adaptive cp with full mask") + if len(attention_mask.shape) != 2: + raise AssertionError("The fusion attention operator currently only support 2D attention mask.") + from mindspeed.core.context_parallel.utils import AdaptiveCpOps + adaptive_cp_ops = AdaptiveCpOps() + remapped_seq_order, scheduling = adaptive_cp_ops.get_adaptive_cp_info(attention_mask, cp_size) + mask_list = adaptive_cp_ops.get_mask_list(attention_mask, scheduling, remapped_seq_order, cp_rank, cp_size) + + batch['attention_mask'] = mask_list + set_attention_mask(mask_list) + set_scheduling_info(torch.distributed.get_rank(), scheduling) + set_remapped_seq_order(remapped_seq_order) + + for key, val in batch.items(): + if key != 'attention_mask' and val is not None: + seq_dim = 1 + per = val.shape[seq_dim] // cp_size + index = torch.tensor(remapped_seq_order[cp_rank * per:(cp_rank + 1) * per], device=val.device, + dtype=torch.int) + val = val.index_select(seq_dim, index) + batch[key] = val + return batch + + +def _get_batch_on_this_cp_rank_in_hybrid_adaptive_cp(batch): + args = get_args() + ulys_size = get_context_parallel_for_hybrid_ulysses_world_size() + adap_size = get_context_parallel_for_hybrid_ring_world_size() + ulys_rank = get_context_parallel_for_hybrid_ulysses_rank() + adap_rank = get_context_parallel_for_hybrid_ring_rank() + + attention_mask = get_attention_mask() + if args.adaptive_cp_manually_set_mask_list: + remapped_seq_order = list(range(args.seq_length)) + generate_adaptive_cp_grid_mask_by_user(adap_size) + grid_mask = get_adaptive_cp_grid_mask_by_user() + scheduling = adaptive_reschedule_task(grid_mask, adap_size) + generate_adaptive_cp_mask_list_by_user(remapped_seq_order, scheduling, adap_rank, adap_size) + mask_list = get_adaptive_cp_mask_list_by_user() + else: + if attention_mask is None: + raise AssertionError("Do not use adaptive cp with full mask") + if len(attention_mask.shape) != 2: + raise AssertionError("The fusion attention operator currently only support 2D attention mask.") + from mindspeed.core.context_parallel.utils import AdaptiveCpOps + adaptive_cp_ops = AdaptiveCpOps() + remapped_seq_order, scheduling = adaptive_cp_ops.get_adaptive_cp_info(attention_mask, adap_size) + mask_list = adaptive_cp_ops.get_mask_list(attention_mask, scheduling, remapped_seq_order, adap_rank, adap_size) + + batch['attention_mask'] = mask_list + set_scheduling_info(torch.distributed.get_rank(), scheduling) + set_remapped_seq_order(remapped_seq_order) + set_attention_mask(mask_list) + + for key, val in batch.items(): + if key != 'attention_mask' and val is not None: + seq_dim = 1 + per = val.shape[seq_dim] // adap_size // ulys_size + which_per = adap_rank * ulys_size + ulys_rank + index = torch.tensor(remapped_seq_order[which_per * per:(which_per + 1) * per], device=val.device) + val = val.index_select(seq_dim, index) + batch[key] = val + + return batch + + +def _get_batch_on_this_tp_y_cp_rank_in_megatron_cp(batch): + cp_rank = mpu.get_context_parallel_rank() + cp_size = mpu.get_context_parallel_world_size() + + tp_y_cp_size = TensorParallelYUnionCP().get_parallel_group_world_size() + rearrange_idx_tensor = generate_rearrange_idx_tensor(tp_y_cp_size) + + for key, val in batch.items(): + if key == 'attention_mask' or val is None: + continue + + seq_dim = 1 + b = val.shape[0] + + # [b, s] -> [b, 2*tp_y_cp_sz, s/(2*tp_y_cp_sz)] + val = val.view( + *val.shape[0:seq_dim], + 2 * tp_y_cp_size, + val.shape[seq_dim] // (2 * tp_y_cp_size), + *val.shape[(seq_dim + 1):], + ) + + val = val.index_select(seq_dim, index=rearrange_idx_tensor) + + # [b, 2 * tp_y_cp_sz, s / (2 * tp_y_cp_sz)] -> [b, cp, s/cp] + val = val.view( + *val.shape[0:seq_dim], + cp_size, + val.shape[seq_dim] // cp_size, + *val.shape[(seq_dim + 1):], + ) + # [b, 1, s/cp] -> [b, s/cp] + val = val[:, cp_rank].view(b, -1) + batch[key] = val + + return batch + + +def _gather_hccl(send_tensor, recv_tensors, data_parallel_group): + data_parallel_world_size = data_parallel_group.size() + data_parallel_rank = torch.distributed.get_rank(data_parallel_group) + global_data_parallel_rank = torch.distributed.get_global_rank(data_parallel_group, data_parallel_rank) + + dim1, = send_tensor.shape + # hccl_slice_szie B parameters, occupying hccl_slice_szie * (dp + 1)B of NPU memory. + stride = get_args().hccl_slice_size + nums_gather = math.ceil(dim1 / stride) + + for num in range(nums_gather): + start_index = num * stride + end_index = (num + 1) * stride + end_index = min(end_index, dim1) + + send_part = send_tensor[start_index:end_index].npu() + recv_part = [ + torch.empty(end_index - start_index, dtype=send_tensor.dtype, device="npu") + for _ in range(data_parallel_world_size) + ] + + torch.distributed.all_gather( + recv_part, send_part, group=data_parallel_group + ) + + recv_part_cpu = [x.cpu() for x in recv_part] + + if data_parallel_rank == 0: + for i in range(data_parallel_world_size): + recv_tensors[i][start_index:end_index].copy_( + recv_part_cpu[i] + ) + + send_part.untyped_storage().resize_(0) + for recv in recv_part: + recv.untyped_storage().resize_(0) + + +def _scatter_hccl(recv_tensor, send_tensors, source_rank, data_parallel_group): + data_parallel_rank = torch.distributed.get_rank(data_parallel_group) + global_data_parallel_rank = torch.distributed.get_global_rank(data_parallel_group, data_parallel_rank) + + dim1, = recv_tensor.shape + # hccl_slice_szie B parameters, occupying hccl_slice_szie * (dp + 1)B of NPU memory. + stride = get_args().hccl_slice_size + + nums_scatter = math.ceil(dim1 / stride) + + for num in range(nums_scatter): + start_index = num * stride + end_index = (num + 1) * stride + end_index = min(end_index, dim1) + + if data_parallel_rank == 0: + send_part = [ + x[start_index:end_index].npu() + for x in send_tensors + ] + else: + send_part = None + recv_part = torch.empty((end_index - start_index,), dtype=recv_tensor.dtype, device="npu") + + torch.distributed.scatter( + recv_part, + send_part, + source_rank, + data_parallel_group + ) + + recv_part_cpu = recv_part.cpu() + + recv_part.untyped_storage().resize_(0) + if data_parallel_rank == 0: + for send in send_part: + send.untyped_storage().resize_(0) + + recv_tensor[start_index:end_index] = recv_part_cpu + + +def check_param_hashes_across_dp_replicas_hccl(model: List[torch.nn.Module]) -> bool: + # Compute per-parameter hashes on this rank. + params = [] + local_param_hashes = [] + for model_chunk_id, model_chunk in enumerate(model): + for param_name, param in model_chunk.named_parameters(): + param_hash = torch.frombuffer( + array.array( + 'B', hashlib.sha256(param.data.to("cpu").float().numpy(force=True)).digest() + ), + dtype=torch.uint8, + ) + param_hash = param_hash.clone().npu() + params.append((model_chunk_id, param_name, param)) + local_param_hashes.append(param_hash) + local_param_hashes = torch.stack(local_param_hashes) + + # Collect per-parameter hashes across all ranks in DP group. + all_param_hashes = [ + torch.zeros_like(local_param_hashes, device="npu") + for _ in range(parallel_state.get_data_parallel_world_size()) + ] + torch.distributed.all_gather( + all_param_hashes, local_param_hashes, group=parallel_state.get_data_parallel_group() + ) + + # Make sure local per-parameter hash matches DP rank 0. + param_hashes_match = torch.equal(local_param_hashes, all_param_hashes[0]) + if not param_hashes_match: + for i, (model_chunk_id, param_name, param) in enumerate(params): + if not torch.equal(local_param_hashes[i], all_param_hashes[0][i]): + rank = torch.distributed.get_rank() + logger.info( + f"[Rank {rank}] Hash not matching for {param_name} in model chunk {model_chunk_id}" + ) + return param_hashes_match + + +def extend_seed_all(seed=1234): + os.environ['HCCL_DETERMINISTIC'] = 'True' # 'HCCL_DETERMINISTIC' is a deterministic switch in ops level, set it to 'True' to enable ops level deterministic, set it to 'False' to disable ops level deterministic. + os.environ['CLOSE_MATMUL_K_SHIFT'] = '1' # 'CLOSE_MATMUL_K_SHIFT' is a switch of matmul K-axis shift, set it to '1' to close matmul K-axis shift, set it to '0' to enable matmul K-axis shift. + os.environ['PYTHONHASHSEED'] = str(seed) # 'PYTHONHASHSEED' refers to python hash seed, use a string of non-negative integer to specify the seed. + torch.use_deterministic_algorithms(True) + torch_npu.npu.manual_seed_all(seed) + torch_npu.npu.manual_seed(seed) + + +def batch_index(seq1d, seq_len): + from bisect import bisect_right + end_points = list(range(seq_len, seq1d[-1] + 1, seq_len)) + indexes = [0] + [bisect_right(seq1d, p) for p in end_points] + seq_batch = [seq1d[indexes[i]:indexes[i + 1]] for i in range(len(indexes) - 1)] + return [[elem - i * seq_len for elem in seq] for i, seq in enumerate(seq_batch)] diff --git a/model/train/yoco_moe/mindspeed/yaml_arguments.py b/model/train/yoco_moe/mindspeed/yaml_arguments.py new file mode 100644 index 000000000..5617ec535 --- /dev/null +++ b/model/train/yoco_moe/mindspeed/yaml_arguments.py @@ -0,0 +1,25 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. + +from functools import wraps + + +def core_transformer_config_from_yaml_wrapper(fn): + @wraps(fn) + def wrapper(args, transfomer_key): + config = fn(args, "language_model") + config.context_parallel_algo = args.context_parallel_algo + config.batch_p2p_comm = False + if args.use_multiparameter_pipeline_model_parallel: + config.deallocate_pipeline_outputs = False + return config + + return wrapper + + +def print_args_wrapper(fn): + @wraps(fn) + def wrapper(title, args, after_validate=False): + if after_validate: + fn(title, args) + + return wrapper diff --git a/model/train/yoco_moe/requirements.txt b/model/train/yoco_moe/requirements.txt new file mode 100644 index 000000000..e2a4ffea7 --- /dev/null +++ b/model/train/yoco_moe/requirements.txt @@ -0,0 +1,23 @@ +pybind11 +ninja +wheel +numpy +six +regex +decorator +attrs +psutil +pyyaml +protobuf +einops +scipy +sentencepiece +pytest +tokenizers<=0.20.3 +transformers>=4.43.2 +gpytorch +pandas +scikit-learn +SQLAlchemy +pulp==3.0.0 +highspy==1.9.0 diff --git a/model/train/yoco_moe/setup.py b/model/train/yoco_moe/setup.py new file mode 100644 index 000000000..3e05b39b1 --- /dev/null +++ b/model/train/yoco_moe/setup.py @@ -0,0 +1,99 @@ +import os +import platform +import sys +import stat +import subprocess +import setuptools + +if sys.version_info < (3,): + raise Exception("Python 2 is not supported by MindSpeed.") + +__description__ = 'MindSpeed for LLMs of Ascend' +__version__ = '0.8.0' +__author__ = 'Ascend' +__long_description__ = 'MindSpeed for LLMs of Ascend' +__url__ = 'https://gitee.com/ascend/MindSpeed' +__download_url__ = 'https://gitee.com/ascend/MindSpeed/release' +__keywords__ = 'Ascend, langauge, deep learning, NLP' +__license__ = 'See https://gitee.com/ascend/MindSpeed' +__package_name__ = 'mindspeed' +__contact_names__ = 'Ascend' + +try: + with open("README.md", "r") as fh: + long_description = fh.read() +except FileNotFoundError: + long_description = '' + + +############################################################################### +# Dependency Loading # +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # + +cmd_class = {} +exts = [] + + +def package_files(directory): + paths = [] + for path, directories, filenames in os.walk(directory): + for filename in filenames: + paths.append(os.path.join(path, filename)) + return paths + + +src_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'mindspeed') + +if os.getenv('CI_BUILD', '0') != '1': + subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt']) + +setuptools.setup( + name=__package_name__, + # Versions should comply with PEP440. For a discussion on single-sourcing + # the version across setup.py and the project code, see + # https://packaging.python.org/en/latest/single_source_version.html + version=__version__, + description=__description__, + long_description=long_description, + long_description_content_type="text/markdown", + # The project's main homepage. + url=__url__, + author=__contact_names__, + maintainer=__contact_names__, + # The licence under which the project is released + license=__license__, + classifiers=[ + 'Intended Audience :: Developers', + 'Intended Audience :: Science/Research', + 'Intended Audience :: Information Technology', + # Indicate what your project relates to + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Software Development :: Libraries :: Python Modules', + # Supported python versions + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + # Additional Setting + 'Environment :: Console', + 'Natural Language :: English', + 'Operating System :: OS Independent', + ], + python_requires='>=3.8', + packages=setuptools.find_packages(), + # Add in any packaged data. + include_package_data=True, + install_package_data=True, + exclude_package_data={'': ['**/*.md']}, + package_data={'': package_files(src_path)}, + bug_data={'mindspeed': ['**/*.h', '**/*.cpp', '*/*.sh', '**/*.patch']}, + zip_safe=False, + # PyPI package information. + keywords=__keywords__, + cmdclass={}, + entry_points={ + "console_scripts": [ + "mindspeed = mindspeed.run.run:main", + ] + }, + ext_modules=exts +) diff --git a/model/train/yoco_moe/sources/images/FBW.png b/model/train/yoco_moe/sources/images/FBW.png new file mode 100644 index 0000000000000000000000000000000000000000..dbeca92cc2d620f3053a4c79bd9ce333bf0fbaee GIT binary patch literal 62176 zcmeFY^8D6YP&Xtbh#3q5 zp73I=j3FUm+CG<(Qj?dGqEU0SvwHr*5((*XNSrpRj`}yEbp4p<=wS$MG@d)YwEKHJ z$uJrk%?jKYxFi}kZNb!t%$-S2Iuj{v83IEL8i5WYqRt@C%FOJNu+R5HdsCu)%C36P z2G0i4#Gegb=Uc2VkRtJUG~me#6y1SG3aOHj6LnBzq_#7NqFTWH2Z$duh$ICN;^U)b zVxD>@oP9#d-YkE_T7UcS)>!5pSM(7QoJlgZE8)nI<`o2~d`Ct`fba1bNQ&SD@aQaUX%_!0Y9U+z190>8=c4Y{34E?aeG_^EB$tGftk*{jt zy&W38w)^%i{@!IpPy}miP&ez(cl=FhapYNT8o`Yn&!3Eh>PgM9t+4H2BPrnae#MVH zEx=Xn3OYf-3{Jb4TY?2g&|nijlanbre!i8hhTUObn>i-)CeTT^WtA0-pWA-*E8u9k_tGT) z&7AMKDd&`?Z(`ho9{T8rD2o0-E*SQc^~h$NVEh#gk>;4DOr0MFsdV2WpG>c=`|Umm zT%a%$)b&cW;Y?3NCC-nbR!vyz>|k5%z~E^xH;X-5VuLf7_?xsr9j8X&oITL&__9mR zbH#^v==E@P9v>v3B0hiKn7gtahoq?JZPYkuJ}<0|hT95|)}(F{XiI-?!A4PKf1z4g z_&&qg@PS2RdH11Og}iHG^vhSHzkKJ)`9g%mPT)o8k#{-uxYt-kCezd0_xIc_Y_Rq4 zq=LV6*qhxS!4~YeaWEFa6GJh5{iWId;EPDhG=4g+9$FNdAlhEQBX#M(QR-|8j2T+* zf+C{li<6K~L8<$0IQyv6w7i3DSibc8cW){Vkv69(ZKjLr2{lb`+ipH#wpn zQJ|8=W|+MdQvYi7)iYS;$qEsT82D!fQcZIG$KkTt z64ZN_F3puCIO1|lIOC73L}_kfl}NuHSXpV!en+wU;}k^+30r4Tgkk%fyibfjnJb+G; z#q`lK&NAklB4xmtG~IiN8d~@E=0v4_hCv2S##c1^i~_j&A*#Wm40zvs*5E(sWj@q@ z6nTpKT=co%7IcfO(0{x*ugt1wJ)5W~KATC4O@mEyHrK^qFL~y5tymS&bm*>BB-5{N zyghy2W_u#m*w(Nesbx~kq>ger)OlEbu~^3H^^*2__C9iUc7E+_cjB;3ck*W2a{Fq0 zs95~z@5d8aGwRdhAsP>zNu5z^L)_n8M#|>sjzONY)QHUR=<)9n?9rBL`b4@L!=;2}HVmdSRuV6UjN6Sa&`cg}y!yBF7pcalx9T+<%lR*yMJ8 zyX+dFM(>fMlanN?AHhj}LXtxAj#M#xl01WFpT{&2nrx9=!u{~o=yL_Z6G6hHGHy}R z%b3D$3-0P9%l@(cSG@Gq_Ph<#MAbyqVz0@6L4Qg9YF@v`exJyQXp6{}J(=S(hcZXE z-e6TpWs|K;-OnmKz0zvoXD44~x`e;Ezjtmv!65e}k5u+mju`ONPk!mMr!mVqOYS25 zh^vvk5z9q;?}>-2N0w*c4I%3Im#-3Q6<@NPp{4<6u%qB2+?SCWa`EOZ<`Jv(%t7iO z)w9%_$9CSw)2qvtQM`Gzrv1!yu|6m!s5SJ z@~YrWj{07PNXCtr_oeWq)y4i#PwpA38k6I?5!LJdn|d$(>zSLR+rrzFn^}~8NIkM9 zauf;&KVl-1qf_h49cX~I^gk6ZeUZ|c?nwf$2qsW?R!5eE? zYh`QKM31TdiadKsH|g`v7YYE0$L z^+iA;eK@rsNr<~r_(nKSeVf;GROK#m5jNRr!=K|y>zvC1@-6T z;iEfVewuz8TV7kmqsA8JmL-Fmpqx+)G^DF5FgRx76?v_`{;p)Rgi`lrT7KH&^|<5H z*Vtjs9l9%H4NMJVuk{(RW9a0sG%^Zp^Ia6_Kh9bPzlbTJRVAj{=IVR-L;M@{kE;#s zwKEmPR2P%ar%p%5eZ0HR*prT$Xlj-clFpSD`qy=>s@I3y`BL~j`Y0?^W>VK1h+Y#9 z?j;fn*loP+y)L7a*N|j%ROlCB@PNpSHfTN2zjOY`) zgJG4|w@EXvqfWeyn9-Oa#W#vvw<6mcRT)oo`;~83(O1xqLiO2DwDt{hn~O{) zK1`TvPnMdNu^9cDoMEx$iv1k-N>5Ig*?7{3NcVfig5yo-Nk(7b4?#9R)>ADqP0qsW z3g6b{nXa~o+LUm?xEGf-x-}Q4@f94-n%^~Z^+WW3eG;3Euiu;FGHTVZ6Pz=uy6M9{ ziabrLe59)Fp{0GmGgI4ixAgs_(|&sW)pq)lC{}G`t^O&`Y5XqA)!LQOZSv(#=?(5x z$~`yA0#R${ik~<)Y7Uf71#`+8jdT_lqUVO)-TIf;Wf!}o=2>0)u+B1YrV=6Uyn0+e#_^19Fjbt~{dD}{_Xhlpn?dU4rPs@c zGUZ_>2_l>mhAypdHwDiYv1{FG#tn~}M(roAY|M8yzpqY0%F>#s-rnqD4Mr#m=Dey~ zP^foqd}*eC_40O)*tP1QsUgMt)4@X1Rlrr}y&9@mF+O*llg9Z2k6F=~C{c7TrwfO< zRS#7+RU^?j(VMx-L()Cnt=D546I=#H6nmI^O_z@CeUaKR+Mb3o-qydLT+E!Fd=W?a z-83iK(zqINto(a6_W*NY@4RoXd_6re{oTRqg)`6l`}~{p7b82X4XW}P>1WDUN|y$w zo%@QblfmU@SFbOx7!6olm+t#~3ChbyIdnlqsZ&z^$e;N;xpKx<-Pu#J@8Q_qAee&ot`3;{cg+L$s{O_j??h`~bi;NbH^go}(L?kQ@d=yK^f5~Bh zONG7smk1xIl9EqQe26d4Lp&Pf|M_HQbqf5ybYWtGOZ9cYlPLbDN1(v)uo?Nk1kfPk z<7Z}Bnq?|Q{;%FXP;2W-tbb`8?Gplo@Nla!$^F{_d^J;d|6jIu03k^v_dn48uhulk zWO^@9{%yI58HBG!2{ZQF`~P)SH1Jg2zXjku;08jN?)I+zyHkX_|1ALR|9AKQ$K2ho zXfjkLS|?Cvnq|5^5LEc#KSS0N0*3qH#9e1vWV{Ml$l&0hFS4&@u~uzYd!^DpAv29% zF|uKJr-!UEyZ-Lp%vKddg;URunOKOV?(+-luTx@c9ojznD{m=C21aY5x&T4@S z0?0yTX{9sl{a5;&>}-oN=81pe^#Lt1CnX=P5|a!f-luUMD=39VM4TKSAD6AC4Vd2j zCscLlK{$TbDQ3t2UxnaVcuS*SBs+)t<$a(UaG(8oO|@_Abbr5O0cI*2FYYZ9BD}}& zGCf?!ep=Z4ZDl45Vjx@)Y?B@HkJ*2HQJ`xh!v?5b^6`sCT)hTTU@ zt~=14G0@-RB^K!B!VQa$TjJl7yW~Am0=AxcX%)hij&$&|ea9lKZ8I{+GnKUIPj(p& zdU~bbfzKxE5FB9vZ6va@14XNcX>o5^jo{C~Rr7=;isk_a-h*Ncx1ny(i6 z51y^ZKBo=JXUu;{;Oqk_WGU41(9;XlA?#0vuF15i7;USOBh0E0hS(qMPxf-vcyHE+ ztd^A7vqu-9xz&3d@6YSg4DXzr?a$9Pz4=u*md~V=y0pA})TwP;kc^F?3Sr1}2U$g9 z5eoZHQvE5D>3*_J-yMEER46>4Z`WLFeMHBeo0D}y&2udQ8i;`gMZ%Gk-Z`!%X;nc9Qubk!u5s~{hDzN?gKZ;JIBIFZ1N|M+xVa__(eTV z1F{G`9teo?-z+*iZ6)7aopyJ3Gcz%*3^*#M2$Vi;R>-5EW)u~6T}@a_5P&JrF9C+{VF8aaA@1fvjt6IMgeZ=FtHzB1UC+gE z2tKoW%R;rX2|0+Z5+>oV_i5niW$chcJ>ZA4(Tp9J=tj^}7&tKek@16wMw;6Fs{TJCNEOuf)%b8%!5%v>wUc zdsO$yZt-^Mi@ytYQa7t25$D#46<5c*z+cZ7Rgc(F9NPy1%;lmc3bj#rtjYrJ)Oy{x ze;dABjS{zqCAIgqzlf_C^P7R*K}BtBY~(U-5plQW8{hn1no@(eyg%Q_)qEUNQI+no z|1ed|VXmX1yq%_L{DQIH(y+nN^7i_o;rxz-k{1}=aYdv??F*1ndjye7Wy21rpLnl5 zx-*?lQq6c*1_K*2zBEYT$?ngG5(E}9qlv_|efrQ!*lf&yU^A*6uypY8 z=)>`I)zPV1&jeVj`&Lox?jySv|F&}aR@t7fSNGBsgqAmYmg#e0iBSA!GIo)&OV`xv ztIslAui&>{>t`!C{TwW}ioZS3d>552WdoElgbh2sEZQcNhro}m&RIV1aMO3g%MSF7 zN7WOuf3Y8gw7nde#SuSpp|d!W*{8b$Mc3?(pkkIhB>qwKq%mY0`z-uW)G1lEH(yTwQjbT<~| z=_T}I?sRdm&o{WA6qS~8oHEzEn%4^r z=fKHV%^-T-PR(uP{|k(4-QH{{J~zdeud96tT-s#@q|ZX&w;V#qC7$R}m|SkcO1@Bw z$4mXwyt=h|0LZoSAme`-Ahns6r<|r$_%yeQus1YOXYyUMW;C^^dqAtbM({Lq$>!B| zak(_~`lRp&@4>q_%&O_^dNl#2A+l@KH2op%9RI33Jy zt(5UK|u_We<&G3Ynx zb{vbI-uqHZ-PCWxVXZfpI(Fo`YDbPMmptO9jRIu#>yS4b zN_3BSWr~T|v`X4s*sa4Z z7_U38Czz?dp4J)Y?uL*NJ$&&s2y4@YjGs@*CFEY}t%Q&Nm^Z}niaIS{=#zH2QDS^T zcDuewW8K~%{td%tp<|D!#XH_4Fx)hT^#K~!y&26^$D7`hI*FII6J;mgXYC@$#j!m; zS8Sk%gynR3gMRPdC_R4cv%9iF7YIXsb&ogjkQq@Jk-&!M7?APq!;p_~2<}K|!q}O??@84DRq|`V;SN{pO0*8WPbXwE$zlA#V%!mKGo!H4 zF7gM6@$Na@ibe8B%;>wr`A>_q5T+mDD75A&MD0gE!nK%quH_-D%%IkrnPo76KoWm{ zkquN@?2e+Qe}3N3{zT+{C!N=GUL>j(m2?qOxn*~ZC=W)gC7d@P-ep1wC14aUL*A9I zNU*Qh(kwkZ!q_twa=wgBM4xT;5HxE!ZzZNoqofT82r!xSJraBO$aO!s<0zhlHn>&% z_S}4oNI9^{@rMFP<+fXkPec8?N2$FIvyMMVO2ABB!%B3+u`yy`LyX9|IBiD5KiJ}> z^mqH(7YhpRUsLj0_1oGtF*WC+r^w{4XcTK<{hX<_b;u?N*4B33pKFNoX5*%2YzI|K z@IZl^E|IWsK!9Ap*Eg9?=munxM%7jkQ<)P*nrzoEX1`aUIxjT6+01Bco+(W<=e`c0 z?@&$=l+dZSH%-}?35Fk6PRWL~8Hi0$hvl$omDPLflsWKy9YsMbdXH%^KrO5r{lEvB zp4OIZ{C%$Zwcr?WO6hw%f!C{cDFVOe`hLqB{1k8iOQm-^-$C96FT0%Zv-Rh zELVl5gDgVZGTt!I%ry=ra??j0kw$>p7@mLedLx7S?V;~5wF_>*+M306v*%Tv#{Cxw z$q#MwoM4`0!i?;vUuU+M@1;8>cZLyBKeTy}UFD(-yFUDS???yT?+DWFNZ40vy#N`? zGFhKEDb??7CZG#W*J2MKQq03G7!%cv#yZ$L54rat+%M-Oc98Z~n1$ciK2 z6x7N*829$Yg-qIf`k!2+AhraD!>KsnA9R&Wx1*J*+~}9WpI*&Nx_#e6J%qx3Ym?}M z`fR>2`IFFOUtgaP?%enhetOjd#wy!M&K~= z!3k+}IdF7witi1;R*pf%qF`vkUSZS=4w`0e62stBy+qVfa}BR6zoK`MtoH^)O?jT+)0?1DbRa+?15xYcS_!dAri1LBf_lmMS!C+A48uq zI_@I?f1w;O0$h3f|uK|g-^xrCP>1FZ&7kt6Cb$v1!Hdb z(y7hFh061?kkx6>l3k-K(Bdo|6O#9LY@LUt)l5kSCCrE52oc3JVUuMrS}2+F+s-Ko z6E|}UxQAqn%FdNM;|_p6h6h47F{J|I2I*2_eCiw4&8+2JU%3ei4C;iF+kdX--mtJn zvqi<8<`$Nqcn@J`Ouau>u6P$Ohy^Ci_uSgAX9~i}*WWpeN3~r2%I4ryK`>QOs;~b0 zPv)^^kK6hqc3V26#vhx@Irf%Wzc(UU>?ItodmP-b`8b&%%d$O(=Ud`mx}GSp zp0pcPJ4^Gfi!u}?*`(rDO~K)<80#G8ZzhIDRnhOzg1z*etnSyP{yt!%D15R_vQT_G zRFATAW^?hqio@)9#UTyQ`@BW#Tt$U?|UsxO7Dl@H=V0 z6=tp}XT6&L{1{oT@|V8weCL3r`Z!Zl7DRCTda#0oj+a*S7NckK30H zzhDrvcQn{f)!L6p?sC1W3nSu`444aJ%7xPsLv%jLpY1msF4O+4RD}XWKu7p_+#U0* z$M7I%g~#@Ip*o{te94mKA%%5(l*;Sn_B$n-#d@_x7M-Sj$UL2NN%`w;kCsm-inWjQ z$c1Fy+E~c;(EExuo}SnK#Mq+Gw1t{dT*m=(ZXeXK?A1Azk3+yD(q zaQLCd|F=31h=DrmbbU?;Gzg9(%&l;Nq>fabl386KR*@7WBOMVDv-9A4*;1es$S5w0jl@m`b>VA zn$l5KRXwm-i0_L-wERnB0#;yAU}#3tKf3+(8~|{flr>#?mPtU9HR=DNi;ho2KmTKF zVgfMs*H6psKboEG|ARtjN+Q6vc7(~s?!s2Atp2u`<{l7tKgvCm2t4?%2w=a`Pp@=&{vMT{_efZ!6eEZKvFRW6 z)}nwD)}zRW=s-OAHUXh$O)+L7e>bb3Lxf-fk1hdtpo{2HR|_K!@hal;3jmE0|Csh4 z*TW&K44{G})DjL;0#Wq@5mis-`osSdRS48aWZPE{sG!3Ig6Gr1XUlJY+fO40j7X#> zqxvKN0}kl$gNX&*-zH|V18rh8jmP8v$6t@($k2KxHRL#HLf|n1Wnem?p{GXr-!J`? z^9j;IBzT0y2Fh@aey{UN=m-9x#)S!QS?V&S*TEs zCDi{9KAh-_lxq4SY{BL9SeN(X0E4p9Qi}Yd63L@49U(jqd2yy1UO!tq?5VF8Li~yd zPk=sx=46pde{zH)UB$CWs#YzptQ)CrWr_rKil9Ev>McU=*$;qF+ZZ3T?kscOE zQ##VwU$k4Ycb}Y`IG@-LuwZ{hNXxWGvzINlzuFiu;oYKEYqGA&j6+JAThobwLAAl~jTy6$U02%A{jNb=$|3*y=k_;jt}XqDeywU4u34S~={f$(7vQ+oks8V7g z3z`$gRvn|2qT=uPJf+y@i2UOtk08HTRG4HDp`{DsJF_ht+u1~;#~>lSqm;~tHiJpZ zBfZH706`cEDt7GqP>mn0(4_@Ww~oUP%qq8niLHp41(yZde=FcA0734fiI~Po+H5XY z6v3iulb`m%vL`vAS8Fp-rg?Q9vZ|i16xZ;xNHYo$2l^-MLTLAcfgN`=i8K-5Kf~Xs z;datqe4Ud6fraO#B{NF{^WKG39y=4KEcxP1G6R+ZMeCSP@6MMO9%AbT+zd8jL5!# zABuC;3J1#~9CCpu?=G^sUO%SM1}P++jfx7agwb~p`pNLCp@FDkV*;;e88oo#nc{-H z-A*_37KHpy6|j7~dDi*I%hA+`w$wHv@1Ty1;O&N$WeLzy7bi1|G0Y|t4|JrZgjg%; z9d&Ek5$sH$O!`F(V6oB=>p~i#Irtt8_ks;Kdw3NbQQ-B&fmFB=`VJNJ1WXEwXD{+q zK%{a+9q?(T*K!0stnCLz!s_a(r>Cc(r~!r|>Rc<8ye`xkGZE{m}v+7?9)ebSj->>`e~$DH}EQyokV_6^;f}V#$gxpa&Yo zpcyo{S%T>`KrYDEfHR&3Xrra}Kn(6#Gdw)JwOpdwKaoifl61I<+Mn8m7pLCs$PDC7 z09E|SoA6wyga zo8Ff5QO?8lpi|Jr_yv>Y2fPnBUI&BCO3T-UREYH{{j-%khb)K#j-X^xFbr!ch|wpf zRAun>MtiZ?(k#`Dy>bQ+NxjQY1U#4(*p$L^%^t+Kw-L(nmLP%+EeQu#^!yO3q#w^O zknir303`{-sfU<+F(H8VnJhE19j!<63Jer|!&S;Db2ejJo+j>1WrSleXArDVi;z*P zIpmiT1!ScBm1iFif<-ou?wgZ_a$8otznpH=NRFI;JHVK~mm8LkJa<3ar%XFy(XLD_ z+A3%{1ZG?^!IAQqfvfVK^%4L_Dk%d86o&AK65_*N5zm#qe1rtU!n(QsHAKv*M}m*9 zK9OG(`o3cGF7Uyi3mn3PXv-wm3q`b5@$slpii|}oN^UE7k|XeP!D;)L}1}2q;HTobrJ01vZd~WPzd`ePWo%oM^JZ%OV>h_G@uq z<_$I8bp})k;cVrg7&PGQ#0ZdItFtWIix?hQttHaSMP+WH3F2D~BhH3%;&?3egvMx%Ityc8c44asNgj zRHsgI`iuWvDUzcTSomF)bRo1P>Ip=uM(i|Ri2-|FiPb43fjp=IWF&<$j6LD<#g|j9 z@o1UXHZ}Gz~Vy|jIWX}dPUQYx=0K)yu zkJkd-hFPueQ@l!fpY)f)Saq|fW_j>i4Q4Pj+RvJ+$E^^_&#JSp12M9aAVD&H>5)b( zjiBNL?nP9bzSls-v+J1EH(+AL(*x%?mLvgg-A5SFo$yXtt^lHV6l61ELhP3CK&5Jp z7q0YYRRU*f&i4P*gFRfx^{(B5tk{`j0p2orX0SlCTkZ%0d;GTfNb)hIb#yH|>$DEOu0QSQs0eOe7Am z;DAj+Q2d(<9%n>#0bC1?tAIf4x0JC9IXycQeAA1N2yB%BxlvgrW(!D+hMz)3L`CV_ z>94P^tHk@&fcu^?t9>bkvogA>=A`w>XKRZmlj$Au{w!2XU>)pZAW4EDv|f8mv#(dEUbN_67=htwH;DrHUoOP>alH@$=b#WfUZ>%IcDxLXOfUbcWeMZc zWSEe7!6NC(>uXsB#4v$VPkjLok_i5|T54&k(p=(;1b~ry)=B(<-+syf@Ba#nUt#A|lE(x4LH?vG}et^&f+XLPspQcb@eiSnbC&f(VVsDXHjv-y)Xf zx2CJ|phqH5eEfGe4k8MjEloy15D`&=rg>oFs%8(V1M~-*8!Nih1h-woItj zamo4W9Q9O zGT|wwxoqUcqx}N_C+%ImUcC3(>;CGKuI8Gn*bw#8z4^%|nP1>6L4W$g*>59-1{aQQ zNvg-xBc<2+of+Z3Uv0a}?1s9zzu`>$WCA)FUhC~-Q)f~g$=z-$b}!fYY&OxW$&p-J zpW)`+UXLb!7W3BTyhOkH)%2+4$aoBB{dbl{`;Ofj}n0GwRqJFl!FxF)G z3*@c-RIzG{keDB=u_?T^gLwtYx1~4B4^3u1jU9?9l=m8it#j%9>LV*~-TPVl?E_Q3 zh)SVe>Gn{c_wAd`p@C#RkK?70`pVV)`DX1LtB6Ifo0nUviaXy6ZHsFu`5nh=*(Tra zT)vvFu^efanJChD?dam?zA>0;J6!PvR4Lxq%iZWdSnEGGMpE;2R(E_gu$s8#W?i4WS())b zL7d}qD?zgr-W*_y_JK*s{rb?Mc6&mr+@RUPR zp=`WVTX5E@&h>18di|Crc^dGa_o#$|t%BS;#Hi2F?Z)y3j&iyqQj+|erDV9Ic=gYA zKKlqv3s&n3zEY<6{V{2oBcm^YLymVWcdIL$iavQdZ&8p#x8`G>s&aQsB$D%1d#?M* zPKcG(x9XU%;;f|3v|!uuk0ZCDR~v&#`ZLWna?hjem^`cPqOy`GuXd(NNUNnq2h+Jd z`l$7X`v+5nd0%eVC=Kjx=N79oDa$L$7eB2HwYDmd<9ooz%O{YLteUJVy6B$lq`W_% zG;Y2>usdYvU1vQl$ybfnsSsPPLp8u|V;<}6^Sj~vwR&q>(S$XdW?2njUHOZ#26ivw zc0MqRJ+~^h$1TyBu5lQXyN5Nvexza+p~o*Rdinj7!>E1OFxaZ@B&rl(j3wKK6W=$m1U@wno*>)aZ_Vt$f#-aq=-qIYZd9`;^k!D!Z|# zE(uxE@}T#PofWzF?D^PA1`j{4+p5k3Z{P8hG_g0IO;>}mlj1lGzt4ZA9N2AZK^f5F z!=Dd)Ex%Fqqi^u~mDMqkl&iz#!Pug*$EacdZ`0dyTdz9>eZ-vJycgsnhQSLZmcQad zT=AL1GY*$-{kQpxeqZ*L_a)rU_P<)F*KJgJInr;VQNFxsVQob`WLz?txd5>LU*4%) z$yUoRs!`us9}RJcU%SI<8OawrOUbPsE4Y3Bos{Y!g=>upGzFGSe+Piy{@yWU@ z8&55J<0N=ziK++6C%pF+ey#Uky_|1tPO`RAB<3<~X1!{BnsYb8eqg1H!M*y$fkJjt zNaJV$ZjQ>WC(n-I^WQBuNy$l0+U|K)b*5i~w2eLa!SSu}{!!;Ggnwq69Dc3HgjK(^ zG9qkNXo#2OmEvNZj}w^_5$ScyOxn8`v)2~(T08G2Roz$E91yUZNxd>;;eS}GP|G^K z5GT$1)llBn+}a9!|2f$oO3Y!DWp;7!}p7vrC# zaGGP^@FVIMR4`GaJ>ND!c2e-{??h3H1Pyc6Q$py!($jwJ@iiE4vHlRGOtU2B3&%8~ zP2f^rnC&BnU=l)CtnN;KiZ54`Cx+Ft;|6E(uEBr9doKAzyYO*Zc7HQMjJ=L5dq~p0 zj`|$#KRTd0OcPx64WFg`YzHfEI!1@IZl2w-z?^Yn5r#=1OTapG@P8icc)#1#>8~_O zbH_0Wq{ETI>WO33`pJHPhV01+X}vmq)Dy8S$hxma0oh-Hb-+=1j84%X{&>>s+l)$c zd{sgXX>1 z5qo;Dt4wB$2^cbuek8Xl@%{ zgObOUU07sNZJrR{K5sqj2D66HI8in!pDaH(<^b(vso~`G2&td7RZc4vR)g}6K(LQh zP(C4!H1E!61BQi&EzUs6*=#c|zBg_#PJk4g0!k-lks3xoL&FcHv+U`)Q>Bi^yoP}+ zOoMX?4|+9>1F4ar;d>YUG?N`ijT4$39-U$Pi^3w-kdJp5PSZn!6WfN~u5Z^A2p{Qh zQ|IHwk)^SwhxAAVnauU8$-$&Ov%b@qm;}JGld`Oyw897mFi)@>bXTpc(Qt-YaX^C_ zq8{sq*5Wu~7l-)L@J7FIfuQ)i$fel6*1pjrWYlvWNXUEmzM%QMq%66tVvvtk1VUJC z&H%j{AFlyMwC!X;L@^4ajG&``yx(4#*4|_aF5OmpmlSS)B7rl2sm*9%kmV)~L7rqc zTe%Fx?5WSDUD}qVDRb(n_(GF|3KvI0_Jlx2#No2WzEmg>_j}N6X5Vn5JDwQOV_sOm zGBWASJ&R+R&G$5!u97tFtUa5xDxGP5K35@q=Wue&_ z(wWv)o8lv3bU31{kda17S=F#xBFLo?G@CjoT#Qtd7kWGjla9h7Z5FgRUVA`K3)XZm zDxS-?$JaO=hG?$P9yc#v4=8kw%U*b>MRK7Zcf)2v%`Jvl@v)>PkK5PqX9|0LOey zyDApgNC=&4zzJ?7v6vgNH}>~=CJO7g!RFcb1nq#oU1gyyM>FKG4odtye&|TQ*ws02 zkDE|n**(dtx;i2W`iS*Ep_>B4H9VFASycYky9)w!-i>2yQ!g<`o4$1Y+*} z#DU?%e8F)V+SRO&)>h!WH|*_}Hy7>U;J~8?*0eO98X4ABZcf3o$dJaCIYRC$?*(d} zbdfwUl|Y(gkT;j*p>(nL^|DQygK*R)gTXWHJ6vyZgEz=ve0(U)EiHDQ(m_7>xL`Jg zH9pmIB&&j(STCwU>1=z{PNZhZi+5+2(;))?O&^Bo$pTN|@BrI}aOy2@*Y4}8@A5CTz4#_A7l!x^Rrk7eN ztbcK-`|AY#?3a7(@7L8)#j}%sh@Jb^J!W0>r>!$8U7(3+snR`7V)y1z0b!g%IhVC?yW9= zY6mp>=nN$&Uy@hxHnXfo{g=!usY26-aM^MwDTJLGR=*)3Ym7v?PP9t_+x&-xU#v$?`8#{b49qp%PbZm%H3)xerospWqL>!VJG(dbWB0k zu#QAb9_XAgt%(vEsh}I~{qe6in7-#>@NhgDsWu&a$la&W+)x7F$5^0c16($;hZy1n zSHg6qG4FUm!nbpB9s;+(u~FMeGK)z&t(DYfk3R`vft^{#&gu}VdCDOpbzy) zypIi$5tW1+(%_sxNL|HxV%$lv!5j3$SzZnryv2L3I~*&h(|SP%DVFfstrDer6wmixFRz?u_J(l*P7wi5l;B zB)ztkfzXcjN1A4?l$XV0opu_J$*_b0kJ0 zSsB)Ky`UuW&5xH-PGD8%;X)?y(5WSy5qFyFa9AG*DI`urif5z8Vo$_`q2ISRQ)&x` zKDZ5wiaPs4MDd#oC?z9s6LiGA2@IsD{6FwF6$1XIqTxnI6Cnp1c5eZraAmMzCxLD{ zS3D@-RRxd+KLT9fpAhJ?`9CyzCKAB62$85%0dqp*D}vLufbt^k-9dmzhb16XeMJ!P zfQYFX1Nbh<2nf5sN#+6{B$1!pFT$RWehfkH?WK$H34{S|D5yM;$6VkK2Eqm4<)43{ z6VdQ=w7+PfpA>1h0E|diLApK5{twS5A}V112m3L_0q}c!=Aa08Wn3D`t7}<~96N17 z!#X|=fJr7BgK6M0%IeBWe+rg5GeTVm;0QF4-2Q@_e!~9`L?UG%3qbB3d;m9n!2s-z zce)ub9s)=qxC>w%cFgU51QM@|z;u;s2deaf5x6jnuNpMEkA%R5|LYNi1K?%^2)fVd z;Pyom3BcPkAHar(S^q5sF%SrT03Vn>^#^EaYXS_?+4;HKFRQQP@}NHesxS9fzfKsm zVdd)s*ihyJ$Xrft1$xeB;IvSBwR{!I-I;C`j?U*Qf%5P=D5r6K_&eg3mXk zVzr-*gZnOUUFhlVjJd5jIT2YNdu<^HT2B%j5D|3k^fup1Se`K!k^?xg7tp+!z2rz? z0P`_%`R{cjeh-y?aA8L(6P%DeFUqTD2SkYv9Uw#%-%rTqf>9%o&a_HP);Fs=z9@+D zbIL1Xpm_~pi!Xy=V=gqCf9gN^=nm|(0T+A}2Wp3^z=Y88`|nkxYDOM&DuzsK2r;KY z@HEkf=;+MIm8w{TXBa@}g_a1gf|$;vfe>>Ug;P+G&WP6l+z_p#$Tx1AsB^%itD<~@RVq;=qy&P={ae!Ge;`soEEUw_8GoVS;?g8j?yU;d1 z7D4PHW{}EUZQTr4>ow1o#8VaAkB2|{;uRei0-CnvxR zIW4H0C`}{4&1o$xLIgPn;vz;(?GI2{jf1m4KM+$hia%kaSNEdf`7fqe^Zre4UBwTn zsTADo?d7?~liH0|qL<^*F`SIviN8?R17%mSu z6sU|WKtL_xh7-bPGZhTj$)?a-a8xL?OpH|(aGqd17zqc=KmZevf$TAK$Mfd$q;mw^ z{-`~+drtA|dxSpVZr>ivpONvsGNHIF`3bH*IVeS-ten_rJT<0pQrozKRTgma#O4{D0Ei;em#b@puz z3#veq#;WbGnHT`tgNc)NGWZlR4I+VV`-TIvj=HR@%+Jrp!R4aNbHIpY00i!8W7hTD z_?=&Zlv3=)#RZ`VOCAjyxco&$Kv84fjjj7!#u9<*wjplMnv=zXdHA^P7*Oo* z%YXb&hVcI*IrdTl+{5!uivi#G@Fo3tG_O| zlmLQU+!w%%`7QqH%=$CavaH{S4WU%O$?e@m8@uszBC?J?=g+i8X`F|xF(6@M8)s$2%$PqGO#1YiuRC(H* z`B3PcR54F7+?MN~iprmvT#R>9>{>2n?ONkljG4xC6X~R2O~h8pYC_+7Y)*rR{!20=$`9rlK+F-cX=hR(Zb_ZZyJqOo zG#RkbiO@N^t@V`44JPyNfP;tN@>}SUZlmjN3pjrgghh^%n3^hL({X<_C;HP%-u@uO z!EbdTZ`MGN4_1o_UV?i_e@LI|X4gqOt>5!C3gLFxE3Ifty(k0y;B8@ai_jgU7=Ur>B zAKzH}#~y8;f6rOr7`Peufe()qpQ;5us&F1$>`e6th@c{_BQ ztAaQsUqDsii)l$9U^d|(Q7Udv4Nt&98d3rYAzlw2xe;=5a>MP<86Q7>R7~BT7Fs!2 ztbZLknkAA|``e*ltqOsoI~B3L#bZv>2(n4|OWQ_qdY~%WH{nI!q`w3sBWE~B3xi=o zlTgZek@ZMjq3@PUOF;aVwfVRcB&SzjKYidl-YqRK>p;|y`mc0M*T6u}8c?cQSU?`k zyiH?50TU;NUT{n6&E*Ua5`mAz?8b2U98yuF8TOn;0F<0R{Hd@%OSC@P4(Oqpvk&w> z>V*{GS5^S0qG~%`y#mI83r$wj!bK)o0O&mdIw#wRR8@J)h0eLvbY%e3SKSaKCnMEA zFQIUxU};nlx3#Z#>Ulxnheb5)LOMmI?zrjTj)}t^v;6!oKpyVEgnPALaYX52z1(J( zvR!Yi#Wy(sM|!!W#A7kmUXU17ggnv-`ym z7a;<;c*OXE);CNjo4qzNqVUiY0-rDhY`rz8eXv#>4G%^X&F9zM-3>}<6jL|O)tfn3 z(LtdwABOLu?5dk8`oNoK;9;130K{2pQ_1#E!cQu@(ZDG~-@Ixg8Q>j!rJi0Ms;H>g zhl-W^+n|F*u7VnxRi%UsQn1R10PDpA@<}!hi)=zO+nX9x zQ@zMgX^iuhG8|w`ltH#~T9X~+8wA$C=bn3j&3)*1aZ;IFARh>EejCp{u6`Ykt4a>< zh6!E>70Yf#(xoz@p>QjULgaMq(_QMJ18UHM;{;%p=Ea7YvEwLY=lKi*8$=ui;W;T2 zj)a!MQS-f06W)TXANGbUbP(FCQ*z_bg&m)k-jo%kf$qT99Blt~~NrptgWm^7L%B;r?n&o&5^-_@I@>42AX%r+^RVA47KG&#G}{Ax7I)lO^ACur|z`Pp) z?kVw|b&z|EC@pMVD$G*^8KY=RQ(3O}EMS#~7(;gHW1zCXa^@Q+(l;ogpmzGzV6h== z*2Vo){byF>v4u95qe~CQ25JVi43J3ISY;}sf$k6u9p`gk?tgj#%zZX*!8AA~dBdS( z_CEW$3Zul~#6}#b`HqK3rcCR%4^<-{-{J!_8aXTM$3LpU3(>v=&{Jd<^cLl1KPHV_ z^VkD?sfIOoplX*A{_1jzy-vhy$J*2xx^aHz-_g{JdKEMBr~r%5imcmWV3w00~kxlWN-dAVE&|3bDF=ZsT*C23sUi^~vQUwsMNd1b6CN&vSm$qDNGnu9jPA zGOwvz=_p~cbGM+$cr0#OT3QarWW^snfoaHao8Doj--N&Uh-~7DS)hw(Hq^kv!sdL+ zNzhNUIWO?I{O7iaYJd~bU&Q|{f&huDXj@lMXr4l7T0g5SuUe5acEozZV~u5b(E6Qe zFV^ADG?1O%H{K``iYTUa*?aX{EqqUPU)@k}8(9MP*`!TPlX80fY=PymOcydKtEnII z<>8eXlG$IUr4I_ABTz?BXJ>@bZSlV8cA%Mv3(qf@-(^2rDTqemdi<6497P)*#q`%w zdE>}}?Q?vcNW!Vx2F(zsg|3{P1v=mJUmn@P)-nU&A2-Nb-Ny*V7<;um%ietz^mM#jm2A|M9bms z>&XO#8WIno&{mb9=}Bl)VKgpVMh-zZA4+lvrBblRB@n<3-N64TMGQPGpr#FDFJaBh z{3OrhLx0SKg(=GX%7W`-P_W#Lr3KREAsQjqap9(kxSc}SU+jSlxb#Ry9Q059K&clt zqlV4x#ew-qX2&R~)<$96q2Zb>e|MBrc@5(#Z6dJ;OjuCm`dJ8n@w7H`b1*4sE$xl( zmq0)At*VN$Ld3+$W3@TB{n9_|-~W=0ft}jrMn7Ggj6ks((fug=VJ+1y!Re?xNjs?ua&*cj7UkHIb zr3OO7dWj;P;gFL=Ewt#;-Tv721nQy;W&Dp(lN0Fheptqq z9ZKV6-4UCB8voy&Ed$=QxU0*#@6T1bc%Iuy3-THaz^h(9qht#`e`OcUTx6HTJD+HL7F<=%7f;;FmY4snI6_+9pG18WDvc z`_=uxYm}|$B>Ynh28Ag;p9Ckv5MS{NAi%+E)qikhEMmcZO#qk3qDT0Tjc@u*o*At9 zTj&8{DE@gDn21^m+z#@3P|QKSr0{#Op7p(Dm3#?9I+rD8I@ZUXiJm(-_Gg%M#IOKt zQCVAwhJwbJzMsN~r1gKm6DJ8#dO4u*5hy&oJ~k{~mpv?riRJC5FTk#U0L9{$4{ggh zjOvov@BNLis#{v_r!3Wcl5hQIGI`pTcB5~SBYdl?oeV_{fq|-+NQp%WGmT|2UIgD- z)GPV~s>x~F$4g6!rN;-1GLdh(#&lZTx$l~N09!!Ls~@N_5Lt~z-d@uJ% zL*9(SqSdehJh!I^sL|-g3FOQ9fmvR+=Yhu;iy!2J>Hftc7nf+@7TE){(_{6IIdrE1 zcwI;b3SPgac+=|X=AcvXIb)Z%=tFuyQqt@RR__ZA$nlEt=bnh?D&Q_bLAoC9?m7nJ z*eW^Bk%~o;rOQ90EQ)9G;fNyj?`p>P4$k-&eY_cRB;R5J$4y9BczJeqc6z;^0m=9; zShEo-J?jZ8NxcT*>8X%8Eyl7H@`~Pk`NUQue6`}Q5PTH}R}uw$NIS1hRk=`flBlNA z$$Sf2TSMJ>3ui)I3%l^gbHbL$I&VcXUXMr~p@UG8{CL$uedPw%=eG#E7C08f|8@;&0zp8Z37;Uo)p%r-*`f?FI^GVYvPx)uviG*Y3S(Q zDvKPhW-Cwbahs23_4KGu_CC{8j5*_)&H0$sa+~NctiF&Ph#j2|xiCs*7`gT*K8W%D z!5NcoVk&GL-@1AdrqXEdM>LRp4fimT0(KG<@tSP(#W3INZV`t|L!%GbY~a{1DBKL` z4#NRuzP++yzw&zr>yoRGaQJb7%avXi*F=DQ?)%5Rkp_4oQcEtDT9#{`OD7p+##kc@ zA2{--=(({iq1& zKYs7glUiF*70Z>%cu8thO0I#5{LUaCutk8a>Wgm5@fV4({DOtu*pnfB!fm5uTGB46 z7xZt^=>m(1J-%QY09Yp}Gog;#_T%sIT%vlrqgPHux<#Mv({PxLb#uN}OT`6YPn@Op zS8qMI2-TWZ{R|h(_gSE9VS;b%sQGy^?@jMR(*f_^#%}WqWNB z5jW&*&C+*Bb_i&GLH1pem1nZeSzH&l>d(1SgOk-~EW+?{e?Q#zH?*QUnPX^-riF!; z`$TOje)oA4CM_oxaXML&jrv*W4~4ZBt9{@mwy6EC+;tKp$-IQG#OBz ze~PiJzM{k8KN?9V20O{V3jjL0dfEgp(a04S!N>LR=cB`L+51QY|MI>1qt1a?4fc$y z+asY~7A1U6d=2fBv$r1UrV!-MYr(Sg9i{Cc$CfHQ z`taxBj^=@gr=9V}n{1)uyV_D1Gs31XF@Cs*a(9o_Od+wb@v zZm*mV*YXxG$qf6|<{dW&8u%CU_Vzh(C5>=qcAU1=Cb_4JG&)+bEd|0-4e=bc|1R7U z&1a|#yalmys(~LSS|m%Gs#k3_-ARUu$nYNF_MUNyCt=wk^|DdkOGlB78dR;;l9DWi z&Ev`PYOz8RDNHkpChuJyysLjqSGFL_n%;JIERQS~G)wF!Y#4u5q9{LSic8g^6w>DC zSFNoxKqb;15IfblEDO4RzfTpXI-2H-vLsc0dpoD{ZbX>dGwiooj1sNTX(FbHqI4~qJch+Jli&7k~G z{ck(k%PP5zF6%+bf4bJjNEd+;z3t1Ez=RV7O~ZO<^n8G9lS z?ev;53JvmK@qR%5g`cvNv3ha`ltX9Rl}Lp2C81JVyDR1yx8%gh?*4o_VeqrD)9wW<+4=R~E)q^M|Pw`mRd=Te; z1#S~sT7LhK@2y!ZxASonejPSoho*6?FJ6X4B<6fR9Iuk=#jIrlgF0%t6I-(9-9|9b zZo$$)w^M)mNBcod63}f-;=V%7uh}@UPO*M3wCvRsPBsg+c>lWcXxd)%`m_3Smc@Y6 zrw)D?Mncx%#d3=)8Ou+ye4rL84ig^Xzc+pV&c|ome)&rAl#Y*JbRW>YR=S(NyX5^v zUnI+znQ4=>I;t~3D)rWKJC+3YPDkB=zHD3p@MdM!{r0;;@uJNCc!+)cWADQXwhep- zmh%oaGA`7h+&<~*-8Wzg3l9%uazYtzCH*$p$Fj#CkQec zo_wk4{bO112Vw?IPY~lb10JHGXIFn&HQq}A%gCnaQy6w~dvKN~WToA&HS6-V9H-d` zux2}W<67zMi?$S>IFaC&t$N%Z>NUguu4mRo^Y798-k#Fb6i4;3V8!WjSqPH)V7bY_ zizm=F0|SBktVoiQnNI~T7$JSN-SUg6Xgpom=lu3Me0h&-d7-VNm(0sT>nc7oanh0g zG%>oc%Vnd7v5P9`!*W9kTW08!q0EV%?r9@%?`x~=cQ{5JY0~O#c2>2B4~X#)&1%>( z5=v%=^qXrW7V|fM>Y)YA_>j@13%Ig;b2?Jrb2Y}?e_VF=c`VByNYvt~Zh{!9Ob=r9 zug&25&*W$Y)VYFr(v* zT4&yAQ5v(e{8Ps|ag!4GvcAtg-O|7uH`Vd~|Riz-8YD+*;N%WjkaGK7ghR zWFXmn>aK^rfavM%o$nvD-|kCi zwWxK^%|DW3rJ(4|TB~qobnejR{tZ_2o`eYF~8boT@rg z7Y4^jr5sJUp0$|~E=p+oREK=cKB_=N6?YBSh!TxF-t;@K5ZO`Etl|_)gYh}`(6g6C z&&Lg8)0s|Zw1TqgoA1(<)~m;UJvU8fEDn;hRA(c{-Oh0&%1$AS^m zjL8=!ymFpUHhgqh9;XN7MV=xGkN)r*CX#;+@vTP^sP$5QX(AnVR_Y5xDi?REP4q11A1*WQWU z#<)4g-f3sFI2@m#+Fl5u1e^w@~8-`|~I= zVP__dDdC@nx3UZw!XCJp&mHnKOE)Y9PZ9FeCY&K^jcW9oy1L9G12ebH@#ydv$AA7H zqavOiACsf~waA%)MGj5HTnxeGdvV%=sF-G%en$$Ry;_Z?kQmUQyB#Xjx`T^SWziXS zi>T4zv3%23q*+-bJ)!ZyQI>kONO&dcv3-uQo%4Esf^>T$7geIAySM)y;d#$qwNw`|)7V}TvX!u%_vmva)f?!tO`+QdkJTH`59vQ}B%yW`74|0$f}S@fIV zk72l#^@@S4$X)*M^{S9(KQqu&bdT42=}nBhO!r4V;i!!%Ck!M6u6K0xuJhO}zw52n zDV~A3u$1kigf7*je{cV4Zg1u*Cz{pPtxU6rQ(N67d3XD(u^2Q)ye2Es`TR#~i+itK zF0-0u#sn6#57q_pi9_NxoQoAS?%iiu+asiV+O3|l_a;o3EEen~_jh&_KaXoKRI5C} zy&QnTTjtND4`^VM>u#Elwy&B>WfQ<%2hvR#Jp;rC0fxa>mcRO9iCDUY{3E z7?wo@r*$3Z;!6iPdy=+}RKSZAZ2I(DT1rq#Q!lx(SuSwZ$M%HpkhaXOCuGeWeG=K* znKTz9`~4_B>3%_28cExWy`dnx^PL-%-{*41dQMU1m!h7w1SvasEI!tHptrKvoX9ms zpaqxZ$st|(q7hxq6smU#Z?b&ea`qEc+u9t64?4OECLGR5u(m%}Bsh<0Szf78bBB$k z=YAz%dHZhhIL7v<;48CUwfEXxmXXeI2FHX{c*@R8he}W0=^uy3JJoXxI<iF;MOyzKD@#k^#e4IeBQfT^}iy^-VTiG2luY>Ki`hcazROLrF6fo^Ae~# zu)f^8JIj>8i(3Ge7dZhbMH`?Aix*|E!^$H#5G<@5mhz;ZaLOwbCfx zT2~WNOv;)4+Do$Vd~m1fq$^FcJfJ!oCx?op;d`UCuJhn8-sAoCsk}YT#1da^i>wo2 z7D?aW8-q)-Qmx*_A3qTG5LT&P%G09uDg29pJ0W~dsFN~&%nN)$8>la+sQxAf7VFJg zD|ZBnf!j_Go`Lsu<&1u@2h(ybLkvE@MLXUCrIyAm>**G7WhHTe`s+qE-i!@UEYzfDABOb>#F9uwMZ;t?$D#} zBp-Ph2Q=^r7_q+n$w2CNZWY#xL0egsO}+JH_wDI2iEEc)@%8U>(u%bec&>t-k_hpg z6CP7N_k|D@6IU`}k@G#6;k~PP8IO=^fHfaZ;|>0tm6cq1guqKC$NtFWlXl6=Lqo12@W!Z9XBumYW61yr>HRrHFfvPX}feXE`( zXihOD99n&eiK~XLWC^+K;AIYNL}3&MvLBr{+slk!;5!@XTee%@om|bK4(y2{{Zb+?%(dAN~4nM53VB z<$|X4;$wVp)9na1XQea72hkwwHx5E(#2i`{SMTEo5Uv-D!t-f`lB5~5oiuG_{^s`U zt^mgcK-xhkD#@)PTTDZapo_0bz+?6P`hBvL+rm;7cfQI1S6Nkbt@^j|*hc?|7P8J)(Uq8G{ zZJlKyIWT)DQieYo5vEaR8%Rl?U9%H_6ag=R94#n^HRORcK5v?jHSdHPB7jTUp?B@m zQ-5>CtUPzjFc{x$Y_>ACp0l@%Cc zZWp=ytEm^oyzCdTfYk(Kn0lX%XsPW|P5FHQEXavp!oqIsfPwv|cpM(AToDv8QmY{n zQ7MH+RH7%0a=73^X!}0vYujhylfpx_h2i0m6%qd<5+0M`$(AO9RSruq2CNr=JP6D0 zJqL7}(NCdO(;mwsnZjvZ4hW3NPi&YQ)_natFn9V*!{+O4XXYlSk(OVe*@-_o1btaq z>(_EwT^3?tQToBJ3T2)GtOsB6E^yobKw-dXAXG05ARkWV-DfCCPa?{i_xBmz(Uj7< z0$z?~%!8w&58O1yV!-(Hd3WWmWQt~Wz(>#Z2GB^a)bRq7@+3=I}Y zzsFifeFx8{^8?Rc2FX+!I(ZUVQG$QKU>^^5YX{yW>3|%_w^)z^QP&Va0f0(FF_0y( z6Pp{7V}>FTn7M}?V*!Z-fO!1Z)1Ya4z%G#su3Ae|Cqu+;tSIeVB8Fi7vBWH`eQynh=4w-r1Klg|C1pA0A0fLh&U8L z7Y!b)i@{T^3Z=2plvk`yr_O#41-`!X01OyVh72$_D0>AC6#aptVe~c$nog5{rmlTZ z#N_;4Dy^i}C)?H6uza>K)~^{ket9Y<%Up!ZL-$D9p9o3Gda-riluWEy;vYws z-P&I}csksD3!`KRgU3b*lTi`1$>wUJ{n3pd$-eu`V!!hBK4rIlj+BY)R5vxDJSLW6 zHC#tfOOEjoPGne1`@K(Z>Up!k=ck8;M;ESS#>=f48E&T;jV)dRoF9_QJK#v?usuJO zC*yc;mZG-yrDJk5H}Tct+R^}=MRqRDY+{l@%Ox?NPVv0%?-K|@&CoWwleHQ|TX}eR zD}A0Z`1o)Kxo7qltHGap$J>Z<`g$&3sd85V~5 zY$4kgX#quYxna@i)L3X)BnJlvyJ<=w%qVAZ+{;Y7c#GXx&?TH87CGDyb}qiVIvT+! z3Z+JRFY2rE%yg`;1TaX6wqhRtISG>w*_$sgZ`JAOtR}ZVJZt>@+k#@e!C~D#xd&b_ zqa047T#JMNH>H7@-JsO)FAOp}>In(6mowyVM-WHl6;P#HIbNAI^I^bCW2nO!E?*(# z&jvm0d7*GrmS84-!l@uUKIZ);=%9e4i>a!rN^M0p{C9l=KYhma_}lsQSzT4EihBd>Nl?2OGu1e^EDz#QtLbT? zkJ#i1~g_WT)c*$Ql69O7}HUk(LatS(!Ub(flbmGL*t??sT`sK&now zkV&p3Xmv(sKxROJpumfOtguf}qlNk>^@HgBu`-*oa`AYliX*Euit~6iiWw zD-mPbHwqOqtG2+jjIv~5FaBM8BBI-({F$&AD(N-*g&NC#={SQOdf6aR85xfiFo^&-?W3IA80rmJv;2%Uu1>;Umj*iIXn9Hpu9_Q%}4S;=QTBKB}0iG2; zYioL5M%)bID$DgW5=>`I-j@h>ucs@Hnw%YWsY1&bP27no^2rZU6&gwHikdL(cU~CF zUVz{hi&o*=uk6+y_0Kq9b1E=k)A=nZ(E6>wYh@C_Yvt=KEt0P>4I!?lwgNhU<%U93 zT3*<8I2Fjyjpr~yuA^j!Ow&0?Y|Txtd082yVR6NMt!tRRQfNcnr+!R~&ZZ<*1N|!8 z2{ueN0a(N%@2w#M0|NtdJo9SKOa!ojeF0CTYu-i_(xQIE`6?jV?`A6yrcMJcpFUn# z)T7J~zqKnip`#wO)t!leV+scEU@BnN#sY(~{O=1;z%`t!PHp%Z4r5t}gNegIH;0c< zISqPa#KGD4U!F+9JZLFD17$r;1w+wGJrFM*N(G}%b2%M{K<)XIvT=To-F*On?dBmB z@oo+-2!EgMd{pE{g3xMvic&H^nSc9ht={6*TY6Wrw#6D-q zJ@9>Wbad!C*1F?71fi1@i9KE%EXqWuriNA1)6+-GCots21O-FYv>_tc6y5;L$!O6! zM7Cy+I(v3-h(ud4DpVN~u2n0rkuwC1{(Ku}?XBhMwMCCaoUTB&wiRgpwR-%e3#1dc zlun}I{sjIjr}&$!Tj#pO(I`MoZU;q&1j6$aev=}KofKL!q&cnm>@zM?9pQ!c95jjw zr`r)UA|B`8V;$As3&1x3MP)lUPN$}bG+mQ@&b!%9EI@k%%0gw~>kSaYL!M^cFOHJS zeFJfGYf)8!N(Mh0uw$1tuqKX}eb9w89}c0HA-{zS;4>&t%2V%Ufks zD*4o9{YPZm>?6qw($&Mm5k7-mPptE-G2+*4sk?G=6|;LMg&| zZ}Ry4HZLaR??PomPuSlp5{(5tGFVp4L2y-+7+M1M{S!2$I<=UX4|Nyy%xU%C3^w%p zNszvOJV4$AKzoXMNN%7z?7%ME#-7E`RyTo74LR_2T7_hCjL@(!anuM+=LsCZmjWQ? z9(nzeI*;4#fiqljQv!ueSgz)d@Y7(QI4Hq6=CHES;4X*ml75UCpCffP`?mZoeUeiv z>OSrtmm0v}1SiF9J>$%LM99g4z;Rbbz-bn;GLj|mxU%Z+>-$6{SMADdoR?X*c7L`! z%bhhKY5J>d+)c-MTomnIbC?`B?AI9J5?t4CTc?`CRq&$}s}pfqM4Qu8kz+2qwd`mDDN+tI8V81=*#n4QQ*~>NL^uVx_Tiss6ea0;>54kYoG)RVsE^Oa zCj%PbYzL!WlHg@I){ce|QjJ_=p2nAw8%U$xza=$RbllQP)Yx)R>O$|R%DBJtM_q7H&+VMaV?MC65W*!jh@`Z z-~cN2IW&osEKW>-+i1Tm?xvAdOADihAzD&hF~)|6H*JIcx5);Q=gm&R=RgtxuQ|)> z%RzXdW~QO)p0MQPWc)=~?EqrUtcip)aC~X|H4+PRT^Y8kaf;twv-&T-L|d=lT?>Pf(0?v zjs^^JQ{()gR6<9SY7G8x_Ndn5xw{RC?>GG8TZ7Z?vyab82Ccvyi{2QRX1u?>)QpP! za;cgLgl~mCtc(Ai@zZyQ8aAuLL%W;IFg9AmL?j-6uKJsq6Z2wx-7wx<*4{n6J5jLC zP72&5a2QE)Yv;&I(2gKYt@?d8q^sq#xwltr)C~@@MyWo$=~M;X-QLkrd5q}x7xyz1 zxbP1%B?L3p6g0CIJKe8okoqFq{XL#v9Q`tA^}Lo-XXr0IyxB{85cC?)d3`g9XwLZd z+gOPyr-W)z0pCBuc#1n(#n`BzlTeN{l>|oewZt96oq~tPC6~auB^%NP49u0z>DKVx z((77Xo1_svRDwZqIinFg4Y!smHj@!e|DYnotmM;LmpzT4YfNc8eVW{_;l2ta%z|g+ zLxu$P9E1uC?r(|NJ-fg5$HBGKz08jx4c^sHT+uda!Cf~nbMeSb?4+jMh<(_AhUNq8Vq@wnpW56O)-rBx7xMZmPcUZxYKAZ_X;FRS!RV$*KL zY7gqce~pBWZnsq8&`Lzb{WO)=vejWyN{icV;=S!+Ca5zJV&QM_KAy4F#o^1?T`qSO z*>8XGg0vD8h#gRJaUs5chL-Pc_1g`wwTZ%-Ns4;uC1z!1VIXxy&-JGY@9&+&X}@h$m%#zJpibV(z*%G`w;A!S&8n<9^%o8lU$7?r-PeM}aB^UfdXFT7Q=-}EfL;@d|AfHM|JyCuI)TF7= zDp{VI5k)xVzf*p}7{&}sCIVq1RgESBHo)xUT4&jb4Ed0zigKtTr*s7Vc&^9uNVjLZ zmR0dtB5t4i;bqKLQWu#x-@}GNwL~Zv+;B9x->C4d0@Y0*K9NbgFU%;Z$fPCbmgoJ= zX?Ys&2NuwPD!~61-HMus#|EF0Ks=PsV2(_6o$0v?X7mmra|V-SUTucU`#71CXjmkC zal@msmJiJ*QUlBhKDCFJF3E2eE;8ysQ<=!{aNS0$&C1-~A~#J=YO?%FL0ECfWp+0C zV6o1jHFa5K2W|@Cm+vQaD5J;_<32_D{{H^X%^c~Zv?WNXGa`Ajz{6LQ=gbr^I~Z46 zX&*3##Z9XCJxf73jA2Gmi+@X9Aw=NrLf}LAP2Ex5!EF7neVy`1DS+dDzcrL1Bz)+X za%?Pze1kMLJ~0|>F5CVF_Ggo;wALdKpe9ogpt&I@r>CqU>(6I)cJhEY8z4##GQjj@8ha0zffn>hq($E z3U+*#Hxmbv>wo;8h(B0Bv_IhhGmC>$I{7qVURx!K&mhFkH|pD$>@474-Fre3;ZSKR zoOdQt{kQ|CT5c|i^jqBDQ%yJguJ~q@n=4I}iZZE9`FDSd>fmhvD>r-72LK!|kIiF5 z0IY!X^j^0YUU@??&MWOO?=;KpgcQ{KR1lhFa-c?P4w^(YQ|gm#Pt$t0>#jIGV+Cc{ z??@r~xJv7$o*;iladtH8v-UOgJ8QNE4_v_U;pz5hl44}F<~SgqU4@l2#M4ACxi&@1 zv4(cM`Urnem6W7m)!`#SY+uUxHklVD!j$5H!MHHRmD@rHI6|+nRzeb?jzJO`oD#7B zMYqcRw?%1CHaXy+uzOdLVVWB7q>kY;DbcX(K}rW2A|i$oiHJ8bD-#AE!eDJ0x9!YY zByNhHaW*LtrO7NFH99IP*D)q7CmyWbq1_cHp0y>k5KIusUQ#~uU_>V^tbJb1ysNXb z;#1rDElJzWF5`iHd_F_+wOq2VwcBhZyNSz4a#F>#n>j%5a|d+B#>SExR-PvmYXZCD zbV=9#&xR}omQ-h-kV4@mo5?P!jeF25OI6N@3us}puMF@CLO#JFZ6rdm&s|+zFCZ1e zy0)$N7rO4+jqq0Vvz*OM4~b_dZjHi3dxaH^r^%Hj2x!C`o9lUxMY+LI4;5#@M4BHE z``HdK)-?%}LSooJ^9XLG7RZfw?X#zsG&3v4!ojG3e9`u`ysT&|@O=8~544@uF&n%$ z-6bXAmb-aB%y{;M;qYAW{20z^OkoAK*KG=?`A|JFZ7p#V`uE17E#CuB4MJpYl$BU% z(!0y*ux5@$#BIgpxFMs-tGWM&=6+-6l55^j7tcZr@>Jg?HLaw-hikX7io<$xv zzv;zWTz4nvStASyRB3>5t6hhRTT%L8c@D(<39rwxXM?^wOGlr&j?Mz$)-Sa!nSE?& zH7!n*i70Nq=L#;p*q`@T1IYIA5w!7*)p2&2N*)5Iao|@r)?gn{!|-8&pwIS<`OioR z@^d8y{W3tfCD%71(%z;ysvp9ioSF)3qbY!spBU#rx2PTtT>V*X!w*;Nn;ZCkdAg;N zCgb5OgjLXN37;a8jSpkd)@$k3=*O$|=jwFg=z`Yv z>{$}Pm&&Tk$)g68-p(}}ix(x0m56|wRAjc?z@XKpvAp~UR6>Q~U)f08?bp zL)zbR%;UDuzs_1e#9^mB6bTK>fImkC-_IJK z$4|tek6{aedd30$6_UyC=W4ArZEA41xsXxF!*N*6%s;rEZYhF(09_o03@8+A)K-XQ z5XV=8;;C`d03borZitV&P*H*lPVal*W;MM?>+Bd`UH1174+4RYkfXfdwz-D0uNZ8IKf*F%X2cc=4BBW45O(XY7}x&mq*uVfhA89DQ^)Ej($tt%`sP|!ImA8_ac_jfmNs>|S_d>se+jc8Z6 zS2+o;CE9?@0mwbbHKd28`+R>=EpytuRJI-azXNyU*eQQmFEH7V!I-2v`V^pxORfex zSMiF6|KsG1VoLrBpr{63HcsidLl=|&8wZqCV;vKSh4nx0?fyzkbZqyRmYm>mUr*7$%%Omcf>c&_+AA0k~7F&NhfN<5-{X`xD! zy3+)+j~jd%q9y@_(_gB+PMCW-#EL z952B%IgXx`wEoXM2O;uznc%`kD6c>!PC$_4_Yf~<%SiXzExdn&0jGmc66IkNrJt<+ zba;BlyJ;|3OlS>Fp0Pv2c?xP(l(_QQpqL5xKB0@4+*WhBy3c=$5M8DOWd^HKfw!t! zzgRGVfkydY31ifO2ze*|eFxzeqn}ARV`ubLx({wE!xZ_3EAOb&r?UOOh zy}$uNwVYq1KEX|76|m~m^kH-{{Kx4ejHNX&A|Kc0 z1idzTQ*1R|RIze(vaVZacT%*)Z8KNl$rke3EY25%B-8NN`H*A++W|?$&DlX*9yd^A zO|D0orW^-wmq))`?d`R0eyXM;myty@r56_$f8;i3Q433PRJ zu4Ap~4Sv^e`FOrp-g-Ebt1Fz^or6a0dwdAVB_JkVYS1=$%4$?~M`>tc>YpiN7DFboyqKEVC!Gh&NCeMYK(d`7}@{9_tbd>1ig^j3|9EtNQV zSmN;k8c}^EST{+-tK(JMWxJ=WD4QLFPy|>@iJ*Aap7B3g_)Bhrb>a{?86x7Kq)J6w z+vC`(1UD1J`EfjgfODaFnzmOTmqXC=sv^1+c$!iVDI~^B#h}+QAnr(^$45W^T*WI=IIG-6XqAfy_Jz%BIGn_u4JAvEW7to$&ySzWv z>JM=|`#qkcAhrfKki<$R#Pi*9G9R-(ZY#!D2^PZT?Bvo_2wV^Am+kK3@w)V&9&%FH zv|$Utv}E2js+u13jty}*IJu%kA`W-NRt#}`&4g2s%r+ZNk}r%R6)IBcK1i$o*SCAr zc_1us^hx$FS3g8CMJELJ%<{=91(HmXsIzlTWUZCH+9QDX@qS~~ldzoMgL^mF^f{7X zaJ6K%@VGA>(Qu7%=x4Qg6y-g4HEVhc1{M*g@m0uxI%C^M%Cu)i={U#{ZwS0e^3jbm zw->mr(b@v&WkG;CWW5aZ5uRXsBC@B|dOmMXJD*PjU651gf~4~&djt)M?ZQRxbe- z(9$V!H3@Xb4Gs)yY-qsu4vkksHlSn-#1v2?EB+4`fc$pp+aSmLX6jV=!U|O#?a0v3 zkw^geSI1(R4Ow>EScw58ilfQGU3n3*-yOiAYb>NKpOyvsK#A$C4k3(@Wn9mcmGl~V zcwsm(gChtP1~4OuiHYI9(?+${QaxbG=SR#kiT_xrk{cAQtg0IO+hUajx4As;7fVm# zQX~B)aN(0ZLx@*K2r`wM&<~EyA?`Z`lI2}YRDy{{%dnoMsIKxi8oCXEAz@gUghg*Q z2F9zH4T-WU0Y{qo|^$ zI1DB_qz}<@(SmDl7hyI*0=vm!d_5v4f`Guky#YVBHUvKxwpduFbslrZ$Tb^;=&8SH z04}BsM@yTL#C_fOkjr*LZx^o?WV)BCn)gHef{u-GA&1@HVw(9R* z`8Hzm+AaAm&(F_~8gPawjDX9X_y$~V=Nd<(=%h3(zo1pL;r@5wbvSL5xO#1~43{g* zPlxcuWFkPkujONB6?#cYiIg;_RQOv$9JTQX3uNPR0o;$>Q(_K>ld&}N$5Rbniyx=2 z0w3X9w8^ z5&Z-6!RyrHyxW8NUuGlep$3Vd#hImm4&SYgC6R<83y3=WBesCZ)y5G0N{{*rIB`=1 zYSl6Wlhovjurpyd=X+F=wH3Qmz}NYxa&#tE;7)Pw_<HOmcqZ8Oe4>7K6_ zkv%%!O%4uPUk_}Grz#Thb_{;<1R(@tnlQ2UXpTRAO@R>kh3k!d>%5HT+>O+}*X?(} zJc62B(NL9&!BRcK2s?*@_}Yklx{dzv$B!Sz&eniW=LV(M`W9IDp^q>1{SsIR$Si7D zUl`{iU0bKOc-*oe1aQZ9Qh=mOw|E{GQb^jkQ8tq9Toi$T*;u=nzOcS~3A1k9ey#Ov zH{5z}RPQGiV$kGM?G2PQ#&)4*e8-eR44e&P2keN~;0jjCaQt;M#-Jvli<8Ra+V$;h zmJiOu+tJNCnJC`|yfI*envbMjYww$<2(2R3WeIqdfuje(FS0 zEdlVxS}~B@3tLp9RZPSy-UIE003<9x>gH?~yI!6!vh-~T4On!IUwW;96)8h*4Xai4 zPo|)=ByNDa2M`=Bv@U8~4Q|fV6C$3x;_x~2L^DkchHN%l-sdyL^)vW^Dfs$5t?+R$ zdx+|TgPDQy0c_00r19EGNl7VX@Njc;pAWLOVPhe7f|N3xSh_9^txRL)Cm5~_bo>b| zEr@I73^r4OZw0|m;Dq~VT|7N>)IJ>6D|$Zqs+k|;Auv+M+kjC*ZMPziR!<>!F%Fpqc#bG z3?{u%=ZCYpZwV>MYc#93hJ|-8#?N*pt363Ms3lNjs1D%+z7M5x4|a@;`Q$CQ+OSeX zF7^q`*o|g@!b=00qMl+UQV}UKK*sKI$+V{hYSzCtf+pU9D0mp+xgcA0a&qzs!O4N$%k|t!-HtHp zop&jh3D8IJ(@Zu8IYA$lRo{=W{AKsLFgp8)48f zE`FnuE47Mg(ZSp!TF_ zKjH(qlTER8C9vq-zR!p^33oxskfw*qr9`dP}hKDyGmiO!bnam1+q^D9 zURso5SLiiWkDV($Us(oV5H^AA^iCi(S5=r(IIu^>bWAK{_ohEzJdb{<*$Ju)M`UUZ z3PAPw*bR<+8Lq9Mbg_~9Mx4#C&IaFt)8y34!j`&MV4KR!cA)sq{rK5&lgkd9<4-*P zL2WY+ci!{o^E22yNp|+(RE_FAQfQfiw)K@oP7ye?B91Hfp6kZzyx#sVbgyr;ocMHZ zAejP~s~(%^jmAo4cfR_dY5PL^WIgZ* zot&H)HOmCt&b3}2+lbkWOz%c0R%B=i;qg;nsuLtsKM$a@ov)_te+EtXW`}N_l=$Cl zC$Hy(S$-(W+F=*Z7nHU5(3&92QpUS$)SHBT7rp}&ky@ANK;;LT2(ScHn_b&6R0*F8U@NfmBvRu za9@6+c+I(-e5D$FpP?> z2qM3*8WUNU7i$Nlkz~Q{DjP)n7*$OgN@d>%=|26|qVZf9!DjZX=eGr>38X_#mg3e) zJP4s4V~a|j&M!^Wj_x-&r9IujkrepU2~7GppQ6&Z?BTTaSTVG(&kq*~4R8fYKsW22 z{uM-7OvN}*h>fLm4GD%PEYylHdCK#>@fZISq?RRyfZq1L@xM$wLX+mI0Z3cM+2wlK z5EHz%b3M0;kpp6DNa$f5m8|GJm^PggE|AY~BUwG+2JP>OClJ=Fi-lSDnr1pxK{}U; zp7UM)NTqI(2QyZl79nHkUh9#I$#YIZHdR7KqSp+cd?KT8UdKy6#K$I0>3+Nf35yhd z@WA*_2U(*Sr;Qb6DF-Ffe(N5h7ybEhJLbkhu0uCcHHDAfLtBi4ed@a7>9OR{3fisd zM1i2L$g$oJD%T9x0wi-wqYZ1|B_dRjpOo$-g($Kw;` zWqVGJ=DW=eBGx6k`hXvtXSpG-&6uD8+^gZsx{;D;t1<=Pjk%g+ef|fWk_0ENM8Zd5 zYNl8(;NwWp!e3{18&4M(^9F86Ez#!#YnF}E9gM_EXd4k5e>)E_WwMQkx?$&-H_sghVOj##;$S{!oF^7H)hR=HlCTdP*uYKF>@ptPPV%W~jU2#*Wk%_{g$)fjWEdLA*F>HwYYKOnDm`Td> zOC5_)S=Fa;;U`EK0zw-B8kXwtcg%9i(yBHzI5m;gqh+_bUYRyY85x=GtTE$~^K;jW zUcT*`f-#e1W)E%h)#E~)po?FbzfzNAUqQi4rVuXODzJpO3%L?&*Sj^(vO7xE`uZeO zFV}ud<}zYN;i;*;G9+o&kR2-9P>7A%@wcwgrIfR&6Hp z&G&38qF;T3S_3^aJlxc-3{)Vnl$GL=X>aYsjLg}QP;Q{DV7%->`W@;b=&U; z682~N67Zx&Hb5jP(;8?+0kUZGxX>XpmXs#TL7pjX_y}ZKS2dk{eQisH1P^b;bqVkQ zSraLgDb9l14Rsx&tackkPRJOc>5ZVpdyX&4*L}@$DRFVVCa*ltn%{E{a=yFCZeC8~ zJRIYsCV7@F^j3?tABwwDc(C<-Wxh)HvU*25dl68aTLd9vZ-Eq5wU9vHpRMmJU;E#Q_ zCm{jaSVq*){5*k5>PU(4;70g8G!1~;wW&pgHZ3AQwb2)nO!35ZX*khgfiJlrh9 zF+nxGej!8sAZi*NkeU{=?YLU;KB-`1$K20JdTP{D^78bwK1Z-46u260f*^HRtv{E? zf140>x;fx~_2B;TvfXZ;BjzT2qt5R&2oWe2KsS$RQ_|<+{6RJ%thwiyC&$w@gxMG- zK^vg&GN>;CBXu}mgB0O?{)gH^LG(F1aPU$pX#+)NtL+EN~7rhIF8{!(78Rk%A@9GK1v_CjFTJ^?rKWCE6?Ab#~s#7Ik)sX*$ zjiV%=Jv}D4w;c+U?w%FyJM|ls7^wL5 zHC5`a4~9I^*ydNka~WW|99LR-Ti@VezxF!#xULMyPAZ{>vvuS6vYnK0$R;@Y(>W22$bEJ}wU)}rohAweEj+WPf60xUZ0F934_&3_mBC(L%SCulh2bNVe7Go%m zco}6(ZbwTuyFV2R6w`!^Is--!VXG6VaV?_);^M}CH;!r*C6<+puvCF9R;bn%?{y+j zRRFkZaiI0&A^Abp!L{+g$ewPC!C{mpi$RMII=^<}D^(5XSgpB47y_Y%z z@F=M~S+|sl0;Ldmgpk>i!~1(`4Zet8jC7%^i(fkr-hNu`(t+K(TDXZZ>r7_ZXFdr- zoG%qOtbdhcm*k|^_0;SjaOU5yC-lGUiSugWC-_Dmt!>_@&#JmkFKC)>DM^K%h%+{~ z`%qvxE=IPE6qp*}ZMR9xADpgT;LWLJsTGi+NbRa63>1eV=vBIFO zS}xmhwH<_Q0TrQA)7mZ;=2y=7v|j(>3Zj)!RcG z<0oc5-mjHX@xO4p{-Ov#l>&eVmNCn3!?d2nX>**o&Xeo4RkU zoSoS%EgLxUvO-7^X}Ot%JTC5%1+0am?B~jz416P3JLk7eK=at z4A9vXqgGXD$Rp|ega$LZWvgE~9ThN&<3yjyf9kG&+2zcqU%CtUlal-96lWXLK$ zNWZBeIj#;MB4jMVa$ybB3$NJSUdp?>R&15~&S1SWI_#vCOFrLU#*Ebv_W+o^^6G0Y zt>bW++=czy7Y3);G*b8HlZqZU(|Nkdt?YGnyDD?dcU59eXXo0zh$9f+EC%8Ux|spT zMfXR)^TsZW8I(oBU1Eczp`ITzFW(gj_?!{=)bFVa9H_A~aEz`$_JmVP_&!Q!b>^cl z^VJ<*_~|zKVC;^we|dk~O*{M~uXlb!M98R1$|xSLiC!w|e{ilXt*teytO@w>oC8ne$a#Ynk$2X=8KJoNVl(GW~7`0(JGv3JMAaLKWF? zYnG5N-{bApyF(Wuvb3LeZW9?b7Zw-Yfi$mZFk*KvgWYFufU(hJJuAZpTB9F4ug+*B zqm>Z)xQ)&d$EpiQU<0!yqJW)UM|Z#|f1M*&&7Pg8StlmY3{08-zT#sqwliNUOmXov77c!p_N+O*p`)*-nYrxE3|uV&9T4HA>PkXGZ*qXvpZWl zww*4W`|cL+d+UPscH3_7&rsdIST4G_krbWR5_NR%Kotglj4l&(gZD~{M$}yc}*=!A-`-7V+rMg=QFa6&i93oXks2ZAlC|5 z0UvR=e0tyC-(r$-m+FVR<JDqeH~Z_hjd9kb}{vpZgY?sq=;+XwtOSvlO5!ugeMx(qh z>eA<_O$9{)HgqEmUwFCZRqg$8Bddm8G{s6meFr5$oVmQAe0Xx*(Rhd0>Ym?Of6YE7 zzA^LSs@%KK(Nv72h?x}W_&FZ>Tv3Pv(<-xipP}IU6PWIsT8C+`CyHt2Aj%?afMuG} z3AxmI6PT&_aBpyTrS!uw$!D@WeGeb6`>XKWI-{#`)buc?yVhtZaJP= zsaQl*)LSB+)*VT*J?$C`C@N;|Pt}_!5&Mr?WlqQ@x)pkL&NpY;qvf!r!h_tb70>0##XZn^Yu`IDEY-Z- z9M`ndIo&sm9b*w@y76;e9m7T^#gq0&+1L)z zOpNY5jD%W?QIcrwb<8TEG*i*B_% zKn4PFt+gK28)vE5!lbFcU!cf$4BKQ7MnRG9-&o-Om9`f6X`l zl)O5KC2KDBrO6nFAZ`kFVA?C5)8<2v#iXkyrbv(qJtLY3%5?d?#0xlPr3aQQHx%Le z{PhD=DzPuoQ4lGGop5M$rXOrNBZAU$E%(01Q~X$Q{3}VUi?DLes&%G6J4UY4c1kxw zpYVuQ5W-bSeA9XCEbpzub;$lmHCIH(bCweqVBargh>|hv0xfL1Y%wYI!_z5dL@yvs zqu8uwwecZ>UhDuIZqV+dt$(AUpbEJwA$X-Cepeb^Fu-X36%H1;!C^7-T_k98&7i5y z>qtUrTzate;|qxw#n*%Y`R#rMkWgP@QL_~~C>_NJzh8sl z=vvcqYp{fzV0QNQEs;`nkyU!XPRi0KRztR>qP5DP_(CD*1#M+-%wlzF(pRNo+qnjt z8FME;PTRzbP>`4H%?q+mHGa3}se~Hu++UL4y3GE_dT36dKTyItdiVPaA*aQ@d9U91 z9HHDI8k!(EpBrwhMxu=YH;Ve8=){BybM;V44tlb_P9J4zoerZ^nVI?T(9ls6d<4&e-;q-6cwRcU%to@aNA0Gg#YcVai@%! zwZ*PVSV9S^D2y|3Ei3C$+)f+f%3nxfpUpN3Abnz63*kc3jB(aovFx8B7V~iDEM8lG z7LJX!S<&%?^EC$uMR~lLp`78kLPxUfUzw@bA#T~9&7_o*_4-zx!eQB7q8pLVtTSIA zj58y?%Or79{i{q(HIt+NE0-Weql%{El21yNNQuh!5D~E|uiTRaxlfjwUdI=mxhiR6 zyb9+|w%q(7SmL8M9%i0l$q0+TnYGXQu2Z2sL*S5&mVX+x}(7GbBd!GL88h6rUI7 zIy$$13qlVL*IbQZP=s(1OBFIY0uXerd5iR#IJ!ASBMMI&2|P18ydKhpCTI{{ttaW3 zOpRn4d--;^QvD`eRy=7&CI>9H5?^eTI-uqi>a;O=<+c7b}d6hnPUtUXOq=Uli{i(%TC(|nh z@`a9x*7pmljOHYW-?5m4c1JwL3+Xpi&{{s{g5z#!L4ypCMILed=43UC5N8?RA`=eX zp64;Pg(*Zp(Em)fXlB(GbTs?b{M`ea{kC~!q)3BXmZ`T};BBn!?lGyDRCib(C_{A9 zrQYKHwQC1B&&1<*BG0zOL|khR4i2XFqvKWI^qOjedco91P&6&^$JI!49ZX7-ps-lX z-{azOQT-7yq}(R6ROGLw)Pfp*@9Y<_yDKvzOKjqTj2)@4Va<^86C+GbaU_Q6^ro@V zPWk9*+U$L`LQE64Rw3o8m9qtnJxXcg{D{}zk4la+kTD;}y@oe%b0-DIe)A8m|VGOP8HB+q6PV!%XmJL6Kr>QtK%ovPXzqPbpPH`o>4; zie=e^@SA{>-+W}EsL(Wthh`UjY79f!p0J-Q4i?{M;?b08ZPXk3U8^4jW!jTIbX2#> zAm15SXQ%+n=XB5uB#n(z8N&EA824}2!ii&NKuMrZk>%PMR^ye(lW_qiz&ZE9gVYE+ zwzBW6>3RZ|z7Wt6mWrt8wvOn$pSoV6p7DBtpM)OU>fC?%(f#*1F^VcfH+!7S-QAh~hTi?k@nuvbd>Kv{hsCBZC=!a;AA4Na6RSJ^D?_Ns zozg>)=Q=UbX){Pj~FNyV3hD^^n4DUEE0| zJsu|ncQg0tJhjiI83O175}|xJY_NeK(0051j=N!eelYI>l*!7=%RhELb|F!IF9bck z!zIGZHRygB*@_)-EVXaiR-yQ7-M(G*^9?O&e_Ho0h1~}N*4e_Q0K@s^rT!a#QT`sV z3U6;#ypOFjv-@5D2PUpM@U4|cVypSaYS_rspIHdR{uE=9C!RRu9Qk|!s0u3_Q57jv zrgd(OWw`or<=P$e*~X?)lit-{b~*aiB$vcAaZ>%BKzD0>6`-Z9b|Ts+mCF0=%nr>6 ziimMu!}}9xzgAT%w))f^MThQLTv@I+J;{EE{JbD)9rgnOk|xrMvdXVdfu3N-3Z2F|yPo;ZA3yR(eC%!PeIeXHUdMQ&ZFDMLAY zyLMDFwC9EDs^3O%5AAsQ2&d;M$-vCtd$PX zljfnOc*W=MI{`UBw6p=mqmPbsIK;w!_fjB$n=BXyMPncp9-00Tp#wF|%3fwJyDee{ zAQPVBnoU=m%dz94c#6Q7eg>?2)U=dsQKz$4aJu8@WL!#x$Dm92--(Itw0n>wG7UlL zH;Nw;fmW*;0fj2%Exld3`}rT`3ny!{I=4}s-m)%n@xi$o4}ho-9g2KJ0(G}^yEb-Afk#&h0?c;oI!6FSz7}le{l%y;kqB{$XN!A`0e0X$IXtrdcqE~aw zw2QYo5wYvmd9>C^H4Owe9+r?pjcUEMk>@ZV3-|G3-?`E6Ou3@@VoAZOeynJxpjmM^t^~Q?>2PTGB zfCmS#@7P<9&+axWqUBNnHVq0>ZnZ8U2{2`W$wqwaWX#dBp^_mRpD*@j z71N8qN;CPWlozxT9^rx0Q| zAC|?ljS_UJ{lPApxjHgOnO+sWp~l>{M>U3wY#2BT!j)w`9dv`4R5{ICv~&--tU{fk zi`6NlP;!*ML*85uj|vGZ>HcqT6D!PFrnAyt#CL#F$~LQik^)>9cBO{VX;@m?A_xUP zvL!A5?EE;2k4qXn$_|RfQOd7-BFz|b03Y5xCu?d5LjCx?$94X5o}5kkh$=$;$k0)_ zcFH9pZk4lhp~;_yRX={KweJ8|wOusvTQ6lk)SZWBj?;1!4XtpgHS)^P>GWy;R|{LW zjz?np8;b;D^}zhk0pgI8Pjb)EMLDGb#6#=3unlfQgQMg}~o!9B1V3?@xWsi^x+OCGDLQRu`q!7ZM+E-0|cGEnt(t2iDft zkCm(Dru!eg_Sm1O6zu`n1e&ciP&tpK9c6NU7koz(XFg4DA1Y&-B44}9 zM(>ZE@eClq*-%qoJqpR{T4!>xmR)#-0QP$BPn6m~K#&SZZVaPSmm7GEX2W(S3M%_K zA{}lw3*nT0^x|Py90#B~v5AC8IY0qQVr#8|yk1M$V!c@0_w>w|r&5DuuXMf--A5vm zmhw+F>zb6l_n-P{Z=4~7*ph(Y>v51$1rbPLOiCr$PesqQa_39jXf@!~ zic;9$v5lg*pm1tfrb;kdy+D=@2{`)x4Tn9}0t%A~K)?cS2cUj+!R>uERF`i168WC+ zr26Y?d$*$lUM7Qu4#y`bX(R^S=(F34=vS)w39nRtPyJAhqY&y|>6**+-YHLixV;Ka zo^I9t3liWvni5V0Tx^lVM8Ky)0SdJ8jQOj>eCPnbXZ$u;z9Nr9IZ2*Udykk34 zdSlp~{4_8(3pz?L0IloqO0WV^j!T7+NFQAQG6I06zg&)WkR+#_pFA$#MG>&5;D0KA z<9o}6*3;f|WBXZV-!^*$d&PJ7H}l6Z6TrYO6QSyv?bb@#ivvilzyqAMsjdSYQ!glx zevkresc{O{vMH_ZTAK@oLhJ`UUzZ@zZB# zCjXxrM?Q1Ev&%q#uh=%|rk86VN6jW5jEFv!NOA_iGt4@*bN)vDMj~qn_s+`_O6a6qk>O0UsQ;nCZqiVJ&KXYRoh6 zEsGH9_J3mdC+YCdM1k5I2kHj+KdJ*rc)z=#|9naVNHUCnCl0ZVsyvFr4Iku12+zMcmkjKxYB)O)jBO zA{;jMK%^@Qqv^+{$sKvU*glKngM;Yi`MJ5m+4sN%LE>Oz6r{j;YJ6M@bQTVO?-5W^ z8ZOk;-qs~B2fz?C8B<{{lxT&P5i zG)_oxkq4_aPval_5s{E++7S|peG3gp|ENsE8?PpVvL@{-JjxGGh3bDb0F77=q^B_s z`Q%gBL&6ed>{F*FJmKgNV)^ zSh>UaWjuS zGQK&VPt3xEF><7{klK}ab;xuQ#8gvC&_QLWZrjSe9ugfzkz98E`Q7~}&#GRd2ekkb z8RZWWnI0|(WPLdtje_Gtzgmncfy+lBYvK=G0^`HUtasPfT9O}_8VMlBu_<4)ye!#2 z^d=9=X4PsioI3Vr_DzjMGkz|w>}Ja$)(pts;eln$!&@N+r-^4ZLf0qE(rKGNcpv;To`<3(bTf4&=M=QH@n zk4J>M2*GSJR1?+5NdU=94P;Ti1n_c+QUVJ*D;2&2H6Bo@Bd&!&4RAhbu>gEb99op# z8^^3|jR`RuXr?NQ4qyb+Ic;M@r-sBzx&SibA8k$x59(q~e=9`;Pa8Zkn1yj5Eg`yS z^;w#Vn~(cOyxRSPqbMgp=K^t)4Q&(^8bA*LtkDb7spPN5wg1G!#lf*Hj`v?T;1~Zu z7i7piFQ(48CtV80N58&CHE(WgTr(Um&H%!yz^C_F(dqhCEef-~yiS@a;KxBa%l7J@ zk!1#B@f=Re1IU@dI2ZuM34SFoG+eP~p}Gmq&?ULHD~$ne{I8OF*0nF zF*Y_v>3UD4pNe59hXFrQ6$=EUcpY58X)!Y4u>jSN#|1a`|8$EwH9))A9PsRabtI@X z5(FUZ8q4wi8^v%R5-dC_Ix<18+P3tE>h0nBen({PpVlxRD&7$CS%Qg0Ri!O+nCjt(Wynnuww1_aQ z?n8km;2J`|nG9VaKw)Sa2~P&*_9IY6BKO~*Mu3hF5IXsEaP08DLRnnO|CUmnRwE4l zEDfy2e^ht2ADkWb#y3zC_B{>I^Y(s=0qQWI^F;yRIBC?2ztpgwz^f>FdhjL?m?CHX z!Htd%P#7SR0WdkJ@|1K0ki&umc>?H(%n$knF(_EK0+1xTX5XKH5duPOV@PXb2am9Uy)QFhd6LjL-S%ggDlj^Uo^$dD~G7?q* zlyE=jg9P>n1NQ_5bPYVxfs&QMP8SW%AVwf~=<4>T$|?0I{nNPHz7n~eZT%?DP(R%P zdMVQ53bFFG5%6SDd|{Fyzqadp>&?^8Vmc%78K3{hS%vOX=uZwt!jPAhLQVyc%@jI+ z{Mfy%+ZU}1iZ=r;fQeYNe(969AO>i)H4+;CoM0XWRK=^Ssv}5nXm#OA-yjn~Np_^< zub^aBHa=JdaxOj<3;LIE+$f+92_VD3tSncXM`^i?P&4T@vSB#Ms;R^?|F631>EQty zV=^khNkUnMl2SuxP`;%Cz?wi`n@fE+0ImL5_;K6?{Ll1CeVm;+wtx>7MS@aIySBZF z|6j3k9>=Iprt=SUssBiW4G|wO0dU?N&|&by)!9tr?D_opNnLdY5VrB^wzb!v1c6;l z_9di(*~A5wbCc6N7OZYR9UvFZiKk}XL)1#uCaNlF_S!7RDY7o(CP!?W)2Klkkc*jzLP7vYz5c$rCJYf87Ssq61w_h71HFm= zCrEb=mk*u*&}$xKK^VtB)riQwpepTKXmrX^YC=FIXl!Z*B2XhZ`$K%@X<)W3Nr_ph z0!yK?Q5qp08Q`IPf&$y0xW6M1h_Um5GQ86M`J~E209V2!;Tl|sU1deJ(xp)g zTwjiX`J^4+xew;+;RA&L)LDb7NhF(;{G_X)!NS6#nrj820nh_X{>(z@d;V#DItVch z%E3;9Fx4b-LO<)WUvu=rhz0(-A4I>AZn9rRKAA|gmU#E{0wArxhubMoQA>~Hmsv0& zNe+jaB242N`X(r{uc?-6Uk2_s;CCnSHc;3n5d7h%S|ASdSXP+H`+FVQGps8*NLYXNj3K(-6B z>`vgI20NF-+De0y70Poq_+_CDb`viRSV_wzkHjV9+`POPa?bh(@53m(S7$T2j?4zN zM3X&O2y#byTK@WAcJvAH_n=k+%#-G|2b40M0jBCopld{oDHbLOMRGt=SFcWs`U`Xd zzZjY4pFyd|XAb@|89-1Hn1hj=bSql0@Bgn%9}Dmvk6HfbJw69U`}zNxGvh#wEKcc& z+6wf#D~IC?Uw|SL;PO%&iZg;|Zv?7|mgcxP;rs(Cp7%f7jD`F@=AY~Zu-Pdg&}h=< z=8Wz~dMY?i!CO#E{uvD8#XJ)iXe!wm1SN+fyrwI8jt|F~KUP^)t=(c#riPYH4mGyl zq1+t;90sU?>@0)_ls~C336^gSKMR{UIB@_=z}@}opL`afwyM!K7*p=M09B;CWuaOi zJlT1Vywu{(`p-v=ztIeRO$A$oWi^{c{+Mfmeh6$C6|6pRJa@Cj$p4)>aU#oW&DKL) zo7)55zyJ%GXVMqnr;41 zMV<**#ss^`pKRcdsMGglfr3 zCgd^K)aVizgaEvaw!7nH4)bBXCXem4$6y7CZ)7OEFC64roeslnd1pb0a(8(Et+iH~ zt^;g|LF@afZ{AHnD08{SJ1)IU|5LUE#u2~A?UeOZVGo)SYOKC9dL$;6iF z4}1o7kNY*Jjmuubal<#s?}b1~p(@u@Df?^X)@jZFtz4pj%bEPI^%uaNxFH;Tmv2kf zX_nuO+F_yK^CZM zeg}KTBa!HYGTB*?gf1k4<1w{?#7Prtr7kao%tL zcUiTAd4+zvo6%}uH``d%&!gUDO%_8Z#$&hr{<1djOM*()wLj-Wtt}15M=N!Uvc~GQ z9uqxC7@5*SuyPdnLQ74imR>quPCFX|Ut%eRi#d)#P7-%&Q#+kXcMhKE3Xy5MLmDaCNl8s9%2$)MYR!Jf>q`@u(QB#FX${9xt=jZf2x= zvlzDOmHRX;@&NEd+w96%`$yGJ&%80e@qUoBlj>>(3U}Y$lye+UJV-_G<4hXl;4oH%jNm@2aB5<@uk#alSNP29h`I-Fe{ z9n0X|H(1}kQqCc-Ij|^D{HDP7`9*!sn1kC@?4DazP1?G}*`==IrY_kpD$pWjw^iRN z<)tgD1ZyE*aG zMmgmFbu*UcnQYhpb6~M?XaZvA*>XeM`Rwho1l`+~`E9x4#nAUWxQ>LDilAL)$YQ7m z^tZbkL=<@@pGtE7I=F%S^FxQMS95BA#pf}Hq;2TG`A?cMZE46IvLwP^_m<_0)HbAk zPEX@^j9pLQ9a-G^C%U@Tlq6T|J*oeiAFaMu$7xA@--6}G)#s4^Sdv=<{U-S2zn{!- z$luYFvE-S&DaqYgmwJ$zET!}dp&VtR^y_U&Z4$n>a^<{mo4QuVIkuShy=K3tX>R%* zr<*t;{63@+AxDzC-_> zc?)O&BrzzXbhDQD2cG@4&U{<0*^*I~$Q%Mh8tDh3$(}Wt*CfTC{)|9Ug7|u0gjSp0 z#m291u4AKj-yOGMnm-9WP;au6@VtYJrg4RHc)4FS=HJNl_n_6u>-Fe(0K(B@-;qT5^xZY$#eoin=$c2A-I8G{n zF~jj2cO5gxCNz46=-|Oc$#KpNwWZe(&vw7Zeo8Lb96&vCk>gPk$g+gOzbD2#F9`6D?9g$xV|eV7{)g1KKJ-#yfSK5T9$!QcC1o% zd%UtcRsal%QV^P%5*#CB)&EW~!3aVuAc_wMdCP0Qfj3BnshyfHt}7zaA`-zJ6nz;q z4g<>!$@@9_vs#Sjt?t_hHz~M##P2Ufl#VU_fHY-B@Z&RM&}mLOzz>1g7flcy&Wk+Z z4FYjn)%&h)te?VJV5`I6`VvWqL>R=t+b`CJqH+G*q3&d(Kh+OF3qCyzIGG`J6pe61 zH}eUmdfVxa0>^+sy!v|ZdS!hP$ry<8(TC0q@YLEBRV!`qqtdo_#GmT-^NuxK7~j~=+JhnB;vUP5?|m1_adYk0BEkNryh(J_I5 z(@Qr#2aswPA!eb@zAM@r(&2FsRg!WxKIb-uP#>h#NCV#+$38pU!HWGDRKmRv;W? z3`=a(Xt!oq35}n``cdH;k+7Ow_u$OOj|8bVLucFkk3~DtiDk3?eq5HnR`0eaL!NnP z#y~czbbsv#0d;^N93KC{KNi=#0aW)4qhHxkozryyNAi%W_X#&Hp}PXdPmgL-|Jh2r z=%3jZN*u`Y!LJ*+>tGyV6{QV4GgrGkBUQY zB{(KC&{+ZiylsbFL5aReg;W@|GSQK7&{zub`rdjvfYE} z^_pqo<|OzSj){G!2eUm(Tj+i*4MXl+_) z>SH+NlDDx;5~gtDmwlgWQG}ZbI*=dOU91qe0Lr z>#JD|%LUA$GuQB`l2&HQbxAFOTo*1jc3>6`J>c+=E{R+)sQ@Ax{pnZ0fEqr_%uIPF zyAs9n!RD5_1e%n9MI4}d3n|5iWPQ`+9rVZxPtZbAX)w^AgZ3n+XD0}RXTz-crHo}z zK+V`%9!9oCiA8{o*!ANF@{lY=A{EGRZ!qxlw}^$Ji0*2S;>uRo7%1_*d9x;n1n+3* zNG~WLK*f*mE|!I5gE$pZ9psIm7dr70Xtal+lg;Ovt!GLgViVT@ zNca+v!?9!^3@%~ghBRrE>4@>0yoZqmpx0thr!t}=VsUbH?cG){))19F-dmAY`VNCA zj+#xVepQY^yMR5kK^xU!H$dWNrRU127wUjm63_8n14hNhopUSBAuNH2p=;Y}0O%Z< ziZco@wSsDkhkgt^(E<4l~VN4?nLw-O}J+pF!sLMeMw6tin!7D>~5X7oM2B(^9k7Z=_#T9B=_ zCOta~b4s`aBCM&>H4b>xN*3yDc_8*M=BFFQZfYMmw}&0VYW`R#6*s3Q*2OhoR(nm4 z7HjsXXJljqc~*R=kxk6%{h2K?F8q@+IVVlDXwO1prg%{zec{jC9}B2b@dwU4{C0#t z7WCkP2P!Qp7eT2M+z4HLjv>16 zoL-R$ZLZq#Xeq~$1@UsJUXwE$W=~n=ix)3)725cuBN~pCz@%u@nLbnhw}7sk^nkDp z(D!OIe~H~Wrw_Y_7RF~QtfTU!BdWx6OLfW^&PtCi&KwW}=$97?qY6e*U)+%Ccs(-~#N8fxvVbF*FlbBhW6j zdu zEPWq*vhW-TiJphSf^{KwAlx3#al2L*dlUov!g2Le$DZb^O*d?^vpr3j>CwkK^`Mh8 zx6`M~Bc#U2J%cq5DtL)!TCm%q63A%fO>4MZLNG&KD%Kp!2hqI+9M@ zVk)pW+NoE8oD5r6LK4sB(+O+f$;%+oZ9^Z(d6w~%8XmGuH1aWFG6lvJ9Zw52kNedU zYtJ8$XLTPxc1c?Du5RgwDSA-bTdlY(NIlqDDo}}9P^6tt{P6XM=r(vqRe02_%!lEJ z##zw|^eM%fnoQX}uBbQOUuCGAzy8p0hBd^nNw?sTpRuOPl>ODo2R?m&sjo9+O_%8W zRYu#F;fJ-x-Q>Zo0mnzs2Lx*oa3nJYN4=&Vxh54k=M(jwl4WnyKI>}>v*eCj3MWok z69|jz>}AT<3A0E}JvPQq%4dI>lpi`PO5kFUxugLjs=2FMHrE8h*_p zL3;2r%+LI9J}F||XkuLt$B{`ZaL%g-unN}+k8XrZ%RO+FJUAJrTX47ELynsi^KRSk zYYVT2<6J8)xlpgSrJ4&AIIXo;lQWe~rDWuuScRRVRN?*a;dKU=hATd!z+=5H3b_AT z|Gm=4FILaZYbsAaSJw~DIjbl)SG=8gK#z2@;WLUm^56#r&z9QU8aa6dSIzN1qc?D8 z(Hg&(M${e4Wi!9|J(470iNL@%K;qGQm}x|Xu|zu6V1Kow@m2TYi-8xQoc@+3AmAN% zxfqg73xllz6cL9lCX`>)2#un+6fYY0Bo-q1}50AyB5^#PbiNPou zssUbPUF`gUP#H-DL?n^ZE>H!)z{L>8yM~@^#l66_y=R%jfrTFnEjOWUgl$26}goqmr9jb0QN@I+QVuQh_!O7R! ziA|uv!ed$GcE*QLPg@p~HnFEs2Z-pyohcOG-}}f!U@tP(mtRa#gUe?TuFZ7)jDr;f z->pG-(bm$ctg}LFGF5-e>2Y?h;RZG8q{rW9HKOhDJ3r~j3HsRbWVG0i>rwV3w znfyx?EF3yAxN~ai8JYl*KM0K<*v?HwFu~vLppz%qE>4I8oxG6r;O7AWG_d7Xyc^l< zVDi?qz=Xv9n1cxfjUkR#;<5IzMX1if_{D?~&%vce2&mdx<~QYBsj;cryrt)O>ER%6 zaMor-rS!NYp+iB9Us+}+CZ+lr4+cVtVQLU}tp^469!C&zXX#AaD5)%{ips&|i@I_n=nGXzMPzo)9s`@a$ zK4=&`32qdE2}`H5&ORm{0j3K#{``oRmi99a&F_VjpBLRRQVzuPXNvx}s(rhWi2My3 z7x?J)MKxVTj*1cz66tT8HkLme%LeQEf3S6RJ;&G7Q?@i^5PWg6U|V56HmY~=yZHRa zF2x?Vz64BQC5}-xzDCcxN(dDe94up-cYiQ3YNifRP*2NEJTuM4=H{%U7OSt%S;Wf+ zZyjyzR8HZS)I#eshO2uWGYan(hZk6UJ$Fef+Kk`T8u(sJ7e?hA*h!NnqoR>=Gri>B zx!tdD_Y~fx0Mk?hgIo5V!q-F!Ch#|w)oT00VDBeEK;noE_BEA%~<}5YeK(J}R z;=VVAo4soCQ(e!$^YdN}J?tMTb8K8kWMpeP+QD7dH1xap`4)5Z8@o$q%rhSiUVR^+ zaclOaD;SA?IsUR7KfWa46^*JJ-r;+D%v~bzd!6D@E?~{EAmyE>igCh8f9JwJkOp}Z zIUebMCe|WB~ zWG8#bz9d5p#^-vR&-e2=pTFUI{P4WIo|$=``?>G?dcWWI_1xEW1>O!vCO_Y0HRebb z#0qzAmt?3+ocK05sn)5uw`XmxuY--S95eNOano@znJ?`ab+SM`U?I0h?eP`Z|0z3s zy(*+TF0M^hxM)Z*zpJ~4#zhlAa56~gy6#p8p@OO)%@|?S=fta7aEaR ze8@vG@%PlZ1EmWK8bW3!_3%Qk>iwmy!Nu^P(fveEmzkNqn8iDq*KEN!_KTp^k;bw4 z&N@cxs(^`N4(W?lg`P8f;2=fl=lv|+$IOAFOZUsf8>eC%Y6^!447YfE|30qh1P8FkdKIOx9$ zPG*V|xClf)W5DRR5_;<*B1o?xNu^I*_2@mfhX*w5yRRjd+3;!lR88ko1|5qlz~mQ- zzdx>ExmLhnUG~O*;D#UylVJ)^9aTIbaD)_9GjFJ*qLr~Z#pu5eTgE`J47dLE$$Iz! zeYMQP+}u(;LfpgRUEkGXAI`hHG3%)Gm-i6+sI8E0mq~gwE)0MZ>Q*#$06i*8(UraO z{$V3GE;|_Rxv$q|$b6vO$oh}xp=&eZxbz z{EsoItex7lDRELm#N&YRrKFavuHcD}sap3-rdW|XoVz)G_jeMWfQA)RHCQ6C~dZfoPKp{GwNch@r-2Dqb;ps z2?07!2_@FG*a+#DRzyMsNTN!$!gbl%RG~7=uj-QWjt>L3uuFax-ty}ZmHpkFE z6d|BFxGl{VBa{gnGmMOk)~WLat@&U&Kfm5o=zeM^>nSTME8?*9vel=;t4~Z7f+m&= z-P(9ypw{XY+YB&oY+)+g&0@Hg$bi=2D`~IZv8iaU6&aH1Yj1DgBtxjca|o*Fiw#x! z7pS%!l#$UlG;H6w@M_(sa@?g$KX7O8ijjk7>V>NBQo%{-5t^zCe1MVn{FNt67;2vP zZbqNv;$|VB2w84Uq!S}xZIqbpUV2+!U+;agKR$?8`cFfTlF}|p)vofe^+1~%!FM#N6io?SND$XfUb~Z`UQ9nt?0Y#iqX2o75&qF}4^Ay{ zp7kl|3*IRvK5j>tUG6b>_8MN4!j0;=61B-F_EQLDOpQwZhT-5IiBH?lXuuCVgonP} z@rQfSZb6YNOFC9o-O*fH{JA+LgdjHfdgj=1GGf$@kMlgu5Z=kKhe>L3f%aox@%u6< zlZ;MCCEtmqN9F@-p1lDn%UZXdwiBFz7H{G7c zous4AQXNnT%w?ypc!?c_Q2~|_WIhSRD$0Vk3R#1wP$sA0(U(m%?}9D0EeytEIlH9x z*sd-=Tj&Yk(u-TVWz$}Lp|55__t%XjSJ4mxZn3r;J0Mx^LPldb;q0UTn7m zK1=QX1ZpVi6dQbe%?W(Su5wpAoWz4(yKZsE#mD2b>gcoNx-{EcO=rH)ZBr9CTnE&KCMS|n&4W^vrQ>hCIR95qSnDmfeWK+$X zK0F{2`cGY>^NYgNU}R3hViiFHq% zFoZrr+_dwN6v)WMa$|ClzJ z5pgKg#b!Nl9ui>^4<2}FK0+it{rI89Kwsb2ZEb1~+J5)cmtxWe^<3+;GSF-y0&Q+Z zH#$jZ&Cs=tF_A5q+K6xW+$=b3Y;4Z7G&aV~?;#=A$!@Z=d*=UKui#eTzpr!IW7IrJ zUd)<|pMTfV;Rejd0~5qa>GQQ|@2KFuA2jN`v>F>xaaE*Od*nI_>eHC3?zWW`ZSBcR zF#gr_Bf@W+IUn*&K@D_Zcko#(+qOMs=e3b^A%{=aYGYiR z3Fgh|NLl>oZ!`6Cr=uWpkn|A36dY0yytK{!>Kx-ds!jWFI(_ZV!v2x{QIN!MLMx4c zpIIMb=Ze8ugUY@P9n!#c7?mNeIMRSx+oFfxkoNSk+KSrz#M-=~FQF50(Qp~EeHwG6 z{KUFzAqbkKs&||rssSO?^Ao+CQD!WS{bg2U`^-Ccc;ch|n@@qo*CzRTUB_)A>T&qG zD&PZe6fcCc`x!Y*?Lf2iV$ZI&KbsCRt!TR~g@{<3&vMxd16Wbi92Qc^!S$6Y87OxA zB_M|qVkgW1@gfLZHqQ~NO%UNJsX6*A=blR;uv7}G-S7`(jy8ArsW2p8{aZUnB5HEeMMU)LHfx;OGsw^U}N@!M-m!s59()v%ALFA z%H{=;jCS;@hIbG$AKT#FXa(h;C?Evv{i+^wCk<__uV;nRrqfOju(q=tJM#S@j$~Cl zWWV`hW-Zm)zHda=i0(I?Wvh2ICSK$ne$UzD=rIx1S9Wg8uJ_!>hG zEw*SZf*Mqx()-H5@)9?a{31PpU+Z{)Gi-4o+6FyRKL?q)X~Yv*dk<<@<@fwdpQwlr znSk_K)RP}sF57_&=+V1P7e^3R5<8vb;QRo4v885o)>L|K4l!{~^p5GDt zK{CDfg0T_ijg8~CgnNlvv?p~qOUtf8^Ma2}O={j>B!W%|9}Uqh51yT`81GrF7z!j2 zA0Y~3xt_ca@8|bjflSY{&q;^EpG69ImwS4HhvzxBLP)3fG4@wqAmi_6WDq~@#wwnG zM-dp)-z~+id+Jo+e0SG4%Z;5;SXTX~Hz`{$&otyZySYKs_^-i$$?(4YiR;R3Y;AFV zy{>!sp|)dqO);T@`L`cE#iX&yuMjnJs*!0V#3$=Z>G{}KTM(}imZXGcX6f`_5W)(8 z07Cf5_ci*WLI=F4&Ug9;$S2p7IakV!mulTCO&vHpNXDPaZ8l^Y(JhU$|@uL*#BYv$K`yiIQ`X}J4SMbp2rY6r!(E349?)FV>fi#tN20~l6sMiQ7-2CE5_YP-L4-?6g}xe1X5$l6TM6kE?Q71qn=Z-?71Cr69DYbxet_88C_0II*>r@uhfHw#cpkqt5So|* zA&JGw&bb8z>*F_)Vylr9z+eB|(`}W^6h0$to+o#opt|f3Y+&8_?li(m*I9jMvz-_X zf7@mZ6FFa-*d}mzkm@1>KCl}dtRs1SeOsb|v6ai#&d+NvpEb45%g?6;Ma=d3FCI6u z8KJKEOz+%KD_};E*%jNCX)Ns#i0TF}j=mM7z{;PF{nb-P&r>*a3S8NmcA>xOni7RI zrM?pd(o^mvCvPyVOp-lBI*5g}{Y#ksg^&HFKqsoS>c6kEo4hS+uh3Y5!qJWn*zrcdQ0 zcH;Y-vR;+9hJ4?FG6#pkJ$M%pgG|HEDQG}-pO1s33YZ$n&&xHJ=9k6fg8fV59p7AI zCC-`Oj;*%%(>Va@^H$@FOy*kF%--p|)w{uVnCqC|M;Ok`0s@-72$p0cRqMb`Y9tek8yQoj62 zwMS(F)ys!|%J*5ML)~|Y0w8RW&Y0`oGyIwghF55kYc_e_E`)KLm}T+yB04niH-SAl zN6o~|&3XNDE#wqGS|~2owTlV!<~VNO%b72EBsNRQz^uDkhjHC$S=lenFd2+} zXqrl4X8NE0@O^|HzC!&38+GCB{R`i=<6#ardF9*r4v1r=(98C6TlyXC?HjVx40R#Q za+ttp4XgrnT3V&`x5a zeZ5<>uMS~Xy_Yy~!7pVsr2E}uH*Y9pS;~q1FJ?brZ9GQ&>K>Eh9j?5ocJJQ3TSp!p z2GmO6Km10TxALCi3+p1&yw=C9B<`Ye8jpk4yp1k}ALS#g0-%Q3M(Q~9a!Xd%1+qPD zxxL%F@y9-0x4?NSn)##04CzF^b!B;M^RZ~^Y_>Nk4N;G@9`SX@d;K_k;A)5c@qT%1yBBov^ok3YA*^3+C6ME@RzRe zfGrdhIuv}vyx@j-iKE$#NC+uf$}f8fYGOKUL+<8m-)j`{WjWs~{L>P~} zm(C{FxDZ@O`lJK)F&+mt>Kw^FDmcS9D>1yu*kAF+HL7=K1jYsRuF=xsUTjw&IUNc% z3Yxd~6AVLv*#^X$9O$wD5nYY=#cIka*wEe2s~cgJ0(Do_GB90@g|@m zKcxu`ymjfuPhvViKawg{qF??A1I|IfpxqCiIZa@m!4KcNPZtG4Ow98C^Pzn_&}q0z zYz=*JA)o#{Gdn7z+YfuRT-elb&KtZ63d9nXaFa?K^%MR(ap<=fYJnKoXxgD~^5(@* z2+yJn8_`9rUVuXbZ*5W~(D~u#9pCl!Kvz`o?%SvHJ>np^GyY#sbAhRek|#w!puRd=SNrTd~F;MeTwEyFxP?zc^M8Us{rhWZy1MY$Te;0|Z z;r|=&f8%|&yHEenq#airjSvMR6ua(t(z2a?yiFG67*80UZINhmHup@g_`7kdfy;KS zI^nmO?a!ats-}A1`9ac2MNvJLZ={TLp+RmbORizAsbOXM`uYY1VfUUeJe`ji`Hp2| z=*5l+$uGx*ktO;+pgSjWq*G8arvkSKKKu7@YN5=mEE8j6 z21Z8k^97fdlM_F$QS(~lrJZJ<&Q0&Wb(ujv>-C>`RW>vBhEZw?;>Is1bLc5)9Y@P= z+_)ijp`VyTpOv2O9W1y&`V!4$$**ty88~BYDEpD+sFI3`sLQ^oxB*Kh;eIsAKI@^c zz_Y^E!$4sg9&vgHQ*o%H`@?J=US2(L`5fgWSGdS(OR~{-PKgK`jnWrZrLJV=*C$tlLU(4qFaIMOAuHD_3Qk;i{W1L+`v?R4Hd-+aT zy!H!3dN~A|00{{f0nfd+*k)&EhmM6W>~YQ)iA!&IZ4}-|F1I+)ik0$gVO3t z38-Uq0-Pgf5R9d6rPV~G_0-E+CnHw=*4sEUXp1JFKL(h)i$gz_P^Dxl(%HHX=%lbZ z!N^ivH^)mGJT69yUb_AI@j;x25pAWiPT6;r`GOR!H_hfI?u}t=z6J0rzpSF4-&Y%m;@A^eAHY4e!`qIQy zV|WHcsi5H1zyAYKFFd?QuFZ8TrYwm0!n zXBK|ffcr0N;uHp&h)oS_KCc?~honl31E=d1AX}VVf+-~jg4EgS=3B^}tzo5$`Y#WV|3dBEx4`z8@oo|d5 z@-EnwE+w_{(fFRq@+DLvZ};8*hS2ktjs|J4)0zGZU#34(cXu}xubI-jo3~ebJ~~_@ zJbGSQ#Xb}bZ{ejACAhPxg~#xFB%~g>Zz^K^8Hr;FX?kDDvPQ(YCF1g|w6wG(kf6V} z_feT3+g=zd4%H%w_kh$0U1pMRdn^1Y9_ znfDGlZ9n!t<)c(meWR!}*KD!3mXq$l@0uLvwN_a|bZ0<^8ZS{@g>C4&A6BjN&TNTM z3mz3eN^n?fw)NmC+o=e_3@P8HY{cK?y^5pE&I;#H*dEGOWRwdp&@MIB)t#jEx->Ga z4BkqXyakAtam%#M*LHbY8rGl$6B-I47kB%dT8CwBPB8!!eH6%`5}?uAi4THpWG zt&~ZXqS9?}%;JAk5cN_?@0d8G8@Y=@4;;y246n|QiW|;fpKQ;7sqXwFWbfOT=&cc& z_5d^GQHO?v`(~B)NB6g9gadraTuf!<jB zW)$>cB&zd78_v`?9&L>63}$s?NCleq%#FvXfVUgfrq1D1$sO2Q4kG{q!y*hLQ#QLs zTm=UQgQ34%j!{jxYJ&YxH0GloJoD@0u`q$X=koczlT-a;`ZC$6#XQ}|ON^f9YrZTo z1oK7^dev0%i)4BIr;RjDN`ouzRIo#o--tEM!Kb4xA zTEu3$I$x`Jh@^wYMLsl#4&pVSqjWp5;Xy^=5x;bkiyS`RWu3N)xgY9Daw8w?HIpDY>E`AGqS#nyPKNx4}s z2;`C#@9UUkNr+O!;2aVZu58Hsqa_P1kLD{(wY}rU&8e@U34LmtEaYsnJO6zoU(2Av zd@xHcLNZGOtS2(kiCjW+#Ni@32vBHYI~BC-MDdOHARD*Gw(r+1@9KWp;CLM^Hrtn*J}(^Fcz6 zNl18m2!Fa5*`=1!hK(ulf;Lm{F{fw&)Nbu#z!jMq7?2Y~rB-3S>`gy-4+{?8b%f~o zXOD|xPTh*}N^2^rtjt;QB_U;Lv|ifLa+!^OiY|R)J1}?aFw_(oCLxCvB?Scqu;hn^ zhp(?r3}kuFUiz$q?2#2J4YJ3PR_X=U-;^wm4zXm~d(e0potUT#(o--A>uj~1--LF0 zxsrCM$^?C8QuAY>Nq+SpQ3W^Cjt);S2d+mJ3KLjF@30l43txOIbK05&k0#=@NioRE z+|+T=LOY#Saxj;Jv3EtUG>0ArhD76WJyuKE4J9+{G|5-{^S`9Thxx>P1Dn8b=z@{2 zwp)7D9k`CsXn;;HUsm?R!-4%Xs;4Z5Sl<(2x+ly0?2V ztg4%0Jw&ZCOBbGXeN{QxUU7F)49ZSf@(Hih()Vt^C`JZk%R%C#v<(qTbDZCczuYG$ z7Q!S>IL@@UHC2_Xnx5q>jwfdH~m%yd#; z(mJj2{_Y!HWFZF0C#Qk&t3`vUfVYtw+9gn^QT_t2w5 z8oB?Fc}g6rsIr*md2aC|mOU{?IR$bwTBs+j7bh%A6iM+uoB*PTqC)U{w}PYf#G!sq z7PA%OM+~?QhOh)lo;K{a2ObsZOAqK|Fd_CjXan)7b-`?`4d;R~hRQiS7!_iPh7Vz)1PQB0 zgIIw+FUgsAAbgWcSkI6cVlz@nz9^vWu&#=~V9zM`X z-%HW_y(@hC*0F;a@60TNcx^sW_G+rq+IefT&GKdLBYHSTvHCcz!p()GM2fx)LU@5b z%lY6|GD8MwOGIl&IQ7ONE)Xx(?G9s2Po z`QV!q<@lGcR6r-`(uNqyCR9PE>T`J(-xU~7fT}!Ug3_#5uWEaJB>(89PJM762o0LIy&H3zWUyhHftWDO z3YFW8h_9Ao?Gd!Jw{JgkSm^;7*j{H-NGiJtuL2DVxx< zwTl^*6YKEaf1P;tba=Yu^!lJ*>=rqf`|$)c5ciHf;?m{3XJ8)GWi|0w0MaX68qLFv%U_$T&&?ujV!U8{lDl_RLXHb(NR%z7zJ(n9(`$_nBi zCi~lx`1>3cykKf@Jz5800(3mk{#1{h*?PhtLu{Lc3gQ9f@3C5&FkQsk4IHSsO;ERi zV+to5TlFtDi^PCXUthFmxT~Z%zZkFPeoAtML*Mx8nq7B0Ii;bKgE< zlp(2fwlJLi&qd{6h$F(o!$Ep{|NcGr86Mu(*C#uGF&QY%L(>&$z$TjJzAq1I1ridH zlbt!+R7_Dya}Nn9H%wT{JUF}k&jSPn1r1ylgMP3lL;Qm9NkE1PC4JNCK!%iv>)~gE zMnng#S5MD(IBrFQ`iWrT+jM4c4|GN&^C{7H&&G5jI(R`}8s80?(C%NL>}HW277c1+ z-zkMIn?k9+=i=O$mQ|EfHf|yFh)Tkdg1@BS!;;n*K=ScHM zWAaGoD+iS*HBE726G!u;rqzD2>tq32eJiWYARd3#%J7HLS}My}#PJwuAk&nAju8|d(AB}sv59iFF24(>k`iJ{kDvFA= zE(d1mp*QzPIoOHJ5%wvns$#9Ddd;t{pW;C*am<2L(sg8}we_Pjf*-S9&v{*oaB>zJ zx4qAj4XbfK?&+*hH=fmuztiQ8skCwNEhyV?G(RRcU@NNp3K|yd9DDKkGxKAMYp_ z6crxE;8L;vJ~NmJUcf9JF63i&J1Vn|Ns1(j;rilar~e~wuH?I$ChegaBsn6|VL=&J zqNb9j@;Hl5M*N?0X^k&A_bLnw0Jwy~E6qAkz+l7ipld*S`G?nk6p3%>C53M&D$KVw z52gP;>FJG>ul}8O$TQSq<4w&Ocvc#{t`V)RtsnqdO;)fU0?1At$Cn;?mj{gR)Se;- z!hWcLfhPue`2NcER@EFnHKaoK>uZf1B~ZW7KP8XJxH`fo0q>PC4Z+199FC8dOfOfV zXX?Gj!0RF(z{22$XuLX}0`=QVuCXcGMvtw@N12cw5S>ZLl2mFKm$N}+?#;X<7G`$a zPVw&QxecXqFn#!Ik00{c{g1Zfv?`;>72z*2~Oy5`#9+NFX6Bj@#4U zK-H^NtnV?HDHA~>!k~Z&@+k2@K;Uk(j5E#^Xkp~LwkvxrD z&=tu=(BQ#OVtDAwK4CQ2YEvE_{z8s`2Ae|p`%Wd~qV7y>q)w6HovC+ozBt~}t1vfO z?T@{auOtqZz|z+R=ZfUYTj+iP2wC>*Gj*0=eab{PlS2`r*7|-+U zS3JNnKu@Kb{CKZDjB4eQ8Jz3y!Z#5y^!w)NelY$y?gx~big#Pq{6R>tQ(~u!nrGbo zjzwE{IHU;TuP|sBd*K5_{66*wixS&;yUy;iyk(CBM*mm+jc+$}d|!ep+x_ZjOz-RK z4hycn?PDJ zHZi&X?DIpL>BmB`={&gJrSzX5@B%E@XwyiD#U3TR0F_W1UFZh*EHi; zIk(e`K$qa>$E|jM?g0sD#Nyp=Drss^Lj01`Y8GatPiYtRbh+;?Xy8yJ{H`+XvkDopG z@);rf&~=YtEXyqnE2LgFBNtTGKc|FnD^$(3??{9!l`*B!e@>GT)uJP(G$!q@NNxbJ z!Mih6tk5s+9e%|P>>v(y31&iUlP!&z{KWG>!D5|i+w|g;ThB+F$2VT-OJ-Ycr{}89 zn|Qi#Gd+3XuH8?@8RxNoyS`|dG4Z*yk{y${fbS{B{Nck^u(ExBsD<`1OdNWQ9(ZVM z#*9hY$<)4vzfo7OYp~dec~KEDf4v^6Fg7iL%#lgCE&^61Q#*DwRHcE-7D>W z;#eHSK#2;T=Nkrw86xBci(|9N!?VyN%KJNyTG~(#vG#N?=Zu}(gv+Gde1^73`!xVR_&v3N1eI04EfO0y|jB% zt=!q((szAOVE&9h2t%2WsipMm-yAz}5O`!2r1$P`&^6wFV=Gtw{_*qwbxjZ`%`3;l zRKX3^|KET9|2PP0AKg3ZbStTkpG~l2AI)8J>MdNq?nkIjXer$HiL(wA%pr{oVkBgn z34i;=DkitPJyugl_<5!LV@H3#(emtzk|Dk=kkMaHSMO~UH#RtKjCvyyvO7-h&0Idk zZ@-S;jnJi%;Y`OmQQyA(%c(ZZnXfA2##?~Cu6LRz1{46mnt+n; zvwF5d42yD&-4Z6j<4FRhEN*UB(+)XyWP$J5!Q@b-!-jS>i|6I)=;m$f=9NU+ZlT-7e==y}d|CjxQ!I0#w-5>xqGD^WYW(6L< znHMkCuU}kToORo&pYyy33(Qhi4iYCk&$zv0i}u*=2`naGW3eNGHXc;4*FuX80%)&i z)nI?135NiHnC${oV$S1a=Jm)IW>!{OQTK!{P#<~cpr7$}zlPmILlPDLi}a}^!b&Z~ z7JYku98U=vWb+kBNK|i658x+UF zHSD|eIsbSa9DrK(xmvdb-~m(w*I=a8w?X>|JJw2Kn;O?1{Eww zacQ8K0KJbD(*vgg@qb!?%t5Eb@cGLy`NZ)r0Nt^a#Drn9wfcdUS&k(L62dV1%>fGF zA#k31ny5#^<-TGK3SeS30mP%|kg(hN7kGPyQ`2XB(-U;XfI%wM$n9Hf3jx{Z!6sHp zo7zLX+y%gtbHog?1043{Zz$?OI=L^C3IK;5fYZ8t?->P;=@S)|aP;f?m^vnWWdH`P z7dCsL-`p!fO&N(IDlD%?NfNMCdGf@3vSJNXU8Td}s7(BF@_eG{5|%K`~d4`7C@;vgX&007c!61z8? ztt3^=y58sKAKh}gK3$6FA&zAd<)vHjKgFOsR70Md4t5=&c^agfTs4t+K9b&GW}N`| zHaVv*r~@AWmcs;e^s<;OE95?yA{>GUm{K$2Ihjg`s=y6YY;MrpXV@B{5ej3ZjAW#< zKYuSX+34j3ihj_6w!OPWKHKO;n~lxXUhs-CbMNWyQ#pYfBrco(C`~|(Jbw=fv9EDC z0BiuSiHQjo{#_*e3QFM|MmKo*iQvFM)C#&imXD+y4oHI5sqqG$dqkYkK5jE_ab4Q)P?0SqLW z;O3%9;|;(e?mzid&b9;)PkbtV4i*-($qI|HqL*22O`or{KXOvf-fN|%}okGyw&eolQ&aO1(Aui;}T_Xa;0VdJl5z8j7K=dR+ zSX?i@=rT8wb#SepW8!-!M7P7R3K0+O0LvxRm8HE=Fgzc9* z0ZsZN{(+cTPXd_gxLJ(C?x(KbzNg$GQp;e!3B5n04i(WmJ;2nFr>u7*oZZlgSU3Va zF~CgsN3;yK0JLOg#v)4G-1a2uO2M)>A8L27W zy?Qa$R`E8VCtwv(;GzYhJd>c{H^7qt{^T5lRWxhK3#P2Z=@P9bb}V>-%htdkacq;y zTUm_+12V~*A6YYLZ-+%(Avqs)=PS#H?JHK*o&;urp=W1j;F%D&9@Gs0TpdKs={{!o zU66KGsS*cjQ3YB)k)s18F@IxJ_gML9d~%4mPyeCPT9(9_@gRFUld9o^}iXmOBC&Lof^A{q+DQ zO!eq>1?cSnWSA%pHWiD%oD*_Wu{^=Wxk)6qma%y{-VeCKV$nY#%t*d$dU|?qsY6a$ zI+R-I7{C^fD&N0G635yeIn;5SP08%`leVaU@su6VIbhMLb+QII73|$|Kz4}B2lNR< z6vuzxHA`#F2nLCV4Sy=O{tr`d2aqy>rmQN-6vA)cfNn`cL!&=Utbdz7d`!iCDI2h0 zfC4rL4`-O;^M`5zd0+lFFkZKJeR~c25`RsdU&+cV%#b~@Z|1o>OWj#Q!>RrIuN-huS+E9 zmzgDS4jKXJlPu_v>Tx`QijH{~0t5pk7nQOnu$mZmTKIK-MA`O0hS0jUab6DOp_d z_NRx4!Z}bMi~Ni1R;Vd>kYE9u>C?oWuYkyZNSOC9K{A~=C&_nIGm8~){Z_pny8Qma z=tS7=U}b}>^8j?+V6zb1eN4AVMyNrEHbD9vjcl~MBz1&{u!h0~pz{D7S|2N(13{vI zqr1^@L%GW$gj&0wGQLPjkL}&3b|httdqvb`;Xy;g1W@_nnCNKaKmdWnbc09IL{aA& z-(~i;1BOL9DIm~3>4Qj~midRhn7}H1%cLoY0C4?ke;O@0`4~XqA2gi$0(!Sa?jOC2 z`>f+Hdx7Nkv+;2VF&3Yg5qp981OT2Jh^oVY+q*naoh}9PIDcPgNDkW!X`NDJ<1;qX z=xB5IO+fUGbW0=YWN{vnq9rL7F!JuDm7B-v+_D$Xg+1wOjs($Cc2ai$zIrwraus__6Hvu8ld*c?_bgeVN zNga~M=Kd)x^`{oU1uA+v@?hJUnn&P3z)`Y*S{y`33C7nAJm*`ht zG?22R^m6re4x7`So*n>v)}62C?=)O&gGUOvijz$Tg`y=+-s2R>syKm$b%>st+MDi^ zAwIl2K!f%9v(Irx9aCgVD^0{IDC$6R;{vM_KyrPlB9?jBZKo$@pi}=0`V2btmrx*m zq5)(&;2mz?z72j-Q>Ub)gf#&HXHyi0w^8sF*CRQvabioE3_{eCr0O;J7r?dyUWxC7pal4a+(Ne zd*~gmc+e!ocLTOaDdPIowb8pGZvth{1j(HbA9Rce&pU;KMUeKG|U?wjemw z*j5h(e+0%~=UwaT)^oY-A0B>ZRIfdvoTyW7)GdaEA^smwc47$dVjciebn zzZ>+EJugH|_hN00H$FjoZ;@A(t3ATKlgfZIr9RTldL{YQS1MTHaLI5cGy8qbTvIRp z%hokJ0b6CyMZ3AWM`z%dG*5oxw)I$oxSYD7$K2b$TDykl#G)L0LGK33pr9$ANI#VQ@bX2S#m063r)#Ug&ZS-ZyjR+|E_B=~(krc6tdHXET%E(ErLE0uO(2J` zmz6=T8g8IQkl0hzgktD$d{IPRVu{O?T2d&t*w%PgY3~$__Oa`NDiGjN7y#A3z*AFA+Pxu~ZMIMaZ&&|(TBE(22TM2kSC1Lxh0*ehq zQdJmX>!-QvWc{l)rwdWB*Z%Tc?x7zUsrGZZzj<9Fn%~_*c3c8;?hc@%w3py}+ZyX1 z{&LW{&X@8#(9g9eC34>$%6Lxf?Y$KaB%pPw zodF|aTYoePZeb!iH*|hBz}!8in!p3!@lxL!)`oxdx{iU^t*2+-=O23WVbme0Pnu7r zMHxEfNMk=gZ=O*{@9VrdOJYseS;QDAR(w@LD^&q1A0T>Z>*y%f{kjgoi&Knd?E3JY z#3a$}6jThEl!tePEShQ$$l@WQ(rB%OWAVEO0mmCunvf)j<=XTOKgs?t>kld~7CO?} zZsUBovv>ffp>&7$e;|LLpZw5B@|*oH5O^IB!b@0)%@ZTFoQF0;1v(b;8LN%GLN%*Q zVP2~*O^BpIBlbp>^j?I#i|F<1Rjahu?9d81l;-BAI(S4ph(DV>i#Mq8G3*;d-m z;($)K%H}1&;7N#y`K>2S>mJ5Jcuwk>6*XfK0R=A5><6@mvzfmrGSwFSY+oDiu6NhA zdLmx!H!nJ#?SFV}w?!k8YNT&wCnq)|k_l}rHS%Fr%1ppEH8VS28)k(-Tmdm_;BoYv zJBxIq%sHozN+hHBu`3q$ey5!Wi)xxdfsTUv(}V&jG`BEW(Aki1LM>EHS=Uj}b5U8KtF^HwwyG~IpxHO zX`;9h4Ppe1kY`Rs11lfelkv8eH0Gt4&i&C9_UXJ*a1{}j{`Pz?_9XdKHrW05{jL$+s41WqUzXI ziJ8LurI;W@4$X-pu4;TQw}!mm;8e3TpQiPe8y?*HY{P8#XG#uZTRUxk=X9iqCt%26 z%d2T#sX$-`M5chR^@0;#bUfje^Vj7vd=0 zLo4@^u3NP#7q;;Tw<_#g27VVIej6c?14?jK<`TIo)GivS?lCqt2J*N#PQByZ1*uH| zN*@)YgHex|oE(4IJAR(=@){bIR#TSA4cXLeqJ0+JYmQFcl~*2f_flwi4S9+!@{3qi zCeJ;-_R*s*Rw25a= ztbDCR-301!F9&u4Qm+e6j{3f1H%rM9Piif-&u6n(@*3*oiSZ=VUk#XG4UdV%-Gz>B z+N42EO)IP>%49;wsQ9g#oC!X+#&dZY2-{h_H(9LRD#zs~XF41hU65*ivBn#^Z01VjO7 z_r6!lJH!>dX&obuQ~oYQ?{U_&xASSjB5ok|G2JElYol+D$Gd~Pg_Dj;&r2&Vk0>3_ zkDH$>9wVi`47m%8l29TvK<}RKwh&P9nGb-%uct@JL3?-by4d|7Fd}(h5%>L=rI(nJ z2jt6*)TnHF6|BL+wzhaZC^^?`wN>^K&DxH~?&$juQ=IZkeAGm8JcD3t-R&SDd1at5 z19%*`Bq;1IfF^5PmFnY!K|QbXsBKedrm&IykK8os9M;amPQ|C+9GyBl?^upvosd%= z-bsJdS#dJf5(?v3B@rdp?*g!hZbb3Q=sk-2Yhl~ULFYDi963h`wvtj`$sD1jl$2G z=dSNye2ISnCAfQcrszOVrG1V`;6jo4n`5PYm{XN!s#Vr_LjZ)Q2DwEXd}dhP(t)Y>k) zOiGHVhbs&QE7*BnYAh4q{X9ecc2Os0{5p)UxbHQ38Jtt=T9d;A#{g>0f6F-JF*PZ* zflW%7N5c#V6K`;!myrAUJ^{3qvoiTwP>12vHdW)|1rgNI@;J25Y3Y-$b)=5RAhsoe zs1G1b6_q})&39&MPu#aurjM-Nl)-`vw!qWlVH<}He>9VU7LPdIGT+Ps>WS5F}d8_>C28(4Zi zg{sr=OVLR0jQm@>OvHD@W%tu!Ru&=ZZQ@nd6W2RJ#IEJ-{3F)eu6+mI!!`Cw$!Z{HK%@&VRI8W5IPnRs%5`8^v#UHG|= z>}KhS(wrxn2acCWt-bY+AnrU0BZn5j(=6ews*-7l5G-~Ip;~oHI()H~W2CEhDOEPU zX0WwB=W5irXR7DBZTH}6x3H9CVHMgH7gZ&{tCY<9?u5)!xUKgJ{if2fF*N;I2l;ya z{D-GYKU%IuwB7ws8;&+fB8=qvPGypf?u6*ArL1|(sy?{HBcOPlvvBwFsp}W`W+rnQ zhu)pt#`k;s?bk=-Mb3m_H`Z)DP6zQ>o`$Mi$Vh5+DVFVaHjT>eTw7zioxctm}C8Y-KF6K#AL;%RM2iz9Owm! zL-Cf46I5C{o)EgEXJk|FpYfTpy1Us)rUcAvrSI@jN5!{dTMwQIDMNGnv>$s#Ib6T5P6*{FqwWnTXuLHD%^(V!@`~?Qx*I`o?R6S_ZTU!BtePAab!ECSfXc41DI7N_xE`r=%<}!w?Xf^+ zpU*N$4RYZTc#yQV8<*!v+?-R#TCeknSRl178wgK!#N2IC;^)~q6wkkn)L+iospNia`v#}+wq;p+s)a**=>b~ucV+2 zwG^x-9d8oDAJ;X+MZWKwD@6_w*RtnWsb2pOgO~Wp)_DDDo8nx?>o3D-MrEl$kD{7N zddZq;Agwe;>udsh3&#CfXl#o+c>%TI_ zS*RoSS-zXgBf5h|#3`JOPy{0-n>*_0VzF26L@R&7?$95V`H~v@fIJ5*fwkHLO%WIM zCUH5$&aliz?B*Wxh)(MWi6A*oq&NabY z`;T~%LdYi4bVP?Q2EZ=-7H4Kvv$jPu562NZ`MuX3=Q)K^7Y!r%Qe!~n6XJ3+#D=~$ zTpWQrMDEyyKL{pDv#>YRb~g8UZ7MqEk!~sESL132xt@b+nwuMdJy88zzgnG`Ws!Mk z7I)X!|5A9`#37qrj_Ym8s-v$z+t-gzK*L=q`IWzyxQ0vb!~S50pVJPN%lhUNJR)Z9 z++}~w!FFdt%oy~K-I_Ud$F5`hA9RmZKbE|jLWo_vHi>`wd>?n09YP-x1yT0(!A>GJ zsP#OE;k=No7pawqp;49lYCgOcCZjl~Teesj$|-g1DY_3De61ptItSZHl8B4GdW-nR zB-rNixX6W^snwmbw`WZBv(=N9ox z<_yW~dxOu3^$W(nfxiXY;wn?}oozXjap-;9$}d(Gb3avuE7H}pE-v2xQqCvCr>ytQ z{uSskYMgf(yska7J-gn?lMIO)MZE40elw!;Laq5t?ab4sPi?A_c9U@1)<|peemjOH zk?IRjg8~peOC<#oLdNO3zZ{H{A0!1#q{3?2e8kbg$t%5Ws^g%mH5mupjYEQ3?HGMomOpj!^94CG~*=!Uj-6QXv!G zaTV|9{nqSXv6%nU0vLJa0d<03`>+n|bpE>^5GWSBtUU(SGo*re~Pe@1r`h|o9iumpiDJjb2vB&=e0an~&INtNd>|jFM|>m7qV9fVAS4W|b#Aw;V070NyKO<3&(smjV^W04JSj zK%zxZyBu{&+wPFE4*@qG(hvnio=8_?T+#zzq*w>_o#KP~3Sf``HcUBv@HqfFuN)_e zlrIv}0`G|MCIJzq7~Dko;jhWyeW0|B%{H*+W@Tm~y^X-iuz15K9|aYQL2WbV&HM`m z0cP!f*MSgE*p!!c2KK>fzwOV@Pk}!I7%8%`9(mm26XhUNS|CMS2mo9BZ{y@#GvGU1 z6pG59U;%ONG^nf-A6WuZtJqt^V?Clxa-3`cjL)tD#$HVh4#*4Q-_8-2WG}l76Vn~q7jzU=)=^Vo_g8vs|PnRMy2wFXeVUss-kN^z<;%qgo69=Lw@MUlb zsU!&-F`C?8r%Md~?GkRr9?^kC0AoOZvS2w-kpqLInDch>FiiSxmvqcyK0sI>X)ZLP zzI!{4R3Q0_L+$W*czD1IBguWxMQ&E`dAF_*2Dbw_=kL*Je1-GV{1QU)lUujYdc^z=qx24%O6 zYyJ1^?xv#dxpW#fBAul^8HSWVNMOM5$G?me5)hF_|5{$gz594~0-Ba0U^@$lv@k1t z1$Pti=79e#^#G$F{Kt~Y5X4jq#3<9>9FWQj0vdPVe@pKWIL55>OArgU;4_UVo<9+W zX;9o!nEIzd(lb~1klAO9iKL%^0HaW^YPQBP|1oj;l!QC3P`?_|!}_)pZnHk?BeicCjBwmjFGZg@pxBlP9^z>^)swj!=(&`3Z30d2PI=e?8xQk9D9=-{-2N zC@3m|tLp-?q`4Y!E~IZAR7yJXh(yo2K$%%}y6Av>u*2DfE`8OUlYtt7@N>QrC-b&pax; zcrUCxyezpmtSYtzn&)hV55R_#78Ap$wI5kXiv47aG+gHS*%6Q-tmw5uyym>WX(J^i zmuCl{sZUK#c3*78St_3qByfnm&Ff{-?7vlUX(XaZ8btHsj|AyQVR^y^P`(21v;`0> z$;x8dLzHt{qbPc{kTx+T{=aTZeW*uLxx5a|oGbr*<$x<4nr;!Ae!w39v{XM&9ck18 zE2?c-={yp&{0_e@l)ZQUd3KK80x|K%oFDV6J3)*)uDigR{}u#C%b%a4Vq*_`1?DJL zOwEv1()&m&X<})Iv(jIETa4V$l@7`J>gp==)hj^k7Xeu{m4FSOAPIGJd}~G*EsTs! zn(ogh6Qs!FAR#MG;N^)JWr6ho_$uc%-9wk#I1Ss$T zx#YauNkmEK;0`}%!KHQet@tgG`Kv|py!GF-NM(^$fW87UF>@e11Se+5hhl3_mc=Fr zM2R2dCi#-zIYSSlE#j2T0?EMyY`r@9H^A5q$jfgpihu+j0CMu*dppE8^68K}^``M5 zva)?R3Bn&GUpCs7)E#*M)3pFjo?QQJ22i~L`%rMyow&-Ir-XE!+Qv(NJ{n-cH;E5E z8UXaNBNLJu3e?1Jg;Rlfo`asAUR2Z*=(1qf4@DFf{0P;QLA*5NK}DLsJ#YOpf47!c zG>4Es073_T9v*wJ2E;CR(YI-HGAKn$Y@0s$4B`NzPK!B6Eao7DvLIRRXl6wqGz4%! zG2jA!JR#2l&KUp|cjFVykoH&@v*r*UCs36wnTP=zH3bK0Mh%kmr|ml4oEQew2__z; z`{AG*tOYo9uNCCammg~51^`1~HsGOqet_nAZ^VKdoXEtNjr^F3_ZSl}(Yah~Ri(MC zd_*e#z+_eD?H}+0Q*Ni&^Y#vH%m<)~XjmfVYNU5M`HTU^Q zJ;YbjmVasKA^2j1+Ai)O(m~&iarW;+5Gw<*#LC8bOGqJNmYnk_U*RlOOwr5Ql|9n4 z579j~q~O~Te1e-)4}lQ#?~Yp+MorHXb`B$zOMI1>WE-*fHdL0)Tb+)wC-b2vt8G z77p(MqF%mqfZoMX4Vk%-wpOVLCa|DeDlCS9E(ByPx4lIfGj!lqHv;Z+w3IaEH@itB4 zC}^|s0QA9%d=rmE^*(lD1@2IJclbbtsK+^Q-ribUTLXoR^noKKqQqAG7V`p*$$Mg6 zyjY~J!#lk^dzMM;>QLkL>o48EC?CE435p6JNUgVqy$=8yrKK5*+Mn#wpP2s0+m43* z=<`sS7`0V!0p|?9@&M`=@Ck+uAW~)WO~Bpikdr_wd{6)ePcHDczxV$dr<#(WXiW2nJh&h=A5aKu7(zE0kp( z7)S5k1yc*eI)jks&8Hv`18rq5=sK?DMUu&}V)yLS)# z1pgKiq8j{6>geBv^fBcyGF$`nAE>pfEQVQlc{QZnGYNltSiwhX&*U_Pxo1BN*v z9*2hza0z$;kw^|_P3!Oaff%zG8n%g3L4fQ;9LJX%0UI!;@5TuVFrYhY_a$~pKe_>S z-KRh!DSFq}N)f-66R5D-8Ls&5EhnnM3Ic|jn@`L@wD_JU}c`}*PLQdHoIcccW)N$0LnvPB=Nm;tD?{CIA_c>3``RjS^ z*ZsO*Pk(sz!i@QSm+O0duj_h$t_NDo`{G7(1sqnK_le#u3G$UvyEjUh6(FLpw;K@h z=?|~&f~)qVIex*Rg9nLc+>$ZC($!P+gx$o1UFL0#WT=xNnZH9sOFAN2+T%Y%l3k5V zu7I=?1iw|8L1K1@b^IH&GCXV!3Ss2I6M!osxYo0PAxP2}r(l08vmvR>4TDT=6{x+h zR=sj)3pKxYz~VU!$`UW(MN*#uVn_Zs+?r8w%E|-HLL`dQ$_indZobK2OR=Lqrl*?3 z4<*udKL-Z|fd_dFd@U#?C759hCh1KQD~C&>t^moeK;75`j!QqC?R&|8v%8=19BIgz zap-lgsIpZP+(1y0|Jbytg>(RrL_1tp&V)sOA$JZ*dNO_1rY<>x2eQ%uWBqo{!RCAo zsrwgfw!lKU190xcV#$*v*tLc^V~@iI&p;^m1p;QzYa>$fe6RJYRW~8Y4PqigK-qM#BD6>fnoK!XOvgLZy6Tnz2iJWlgSpWPoFN)gNhb@4#ITE@{B zH}t_${R(^o)B(ZrODT1Piqcfaa#Jp}WXk+C!=emmM=fjwV&=Z@tvvej)vGM|6<|t2 zq23+@rOsUmMRk<;t8cP$5RDqY(eJ@$K*?5hbBWB?Z*)rrt-sDN;0|55J^CUf+2CSr z3pwP3Ha=0_i}+4$f7T5zI?a2L*(0B=H1O`>lrar^d;o-qpr)zdf22OIUu5jAFVx?r zV76~79Kok{QR~;$P-?jyY z1SSA#qH?sgEisH!k$h(L#Dq|+u9k>J4>dLURJi9afKtOeU7ASqy$){LVX%kM z;JS^^g7*))F=@#foYXw#RrN$$&?-L`|{I)UsDbJ?geRtB$pHGBWN9~0t)<+9}kDqUpcE-g$eE{@O7 zmJ*F9AasV5jW~Sp;(SskZVeEQkF+OVe0P7{Xt2YO>|Gb1q&w-ao=-1ti&?4DeS5N_ zkmFYx-J-2+F-Op0E5VKyBnX?;?dv#7ubd=l?YE|)np zpAMFp`&Ejba9v3#J7(Z^ zKWDv%2k0rqEXlKW|H)k01!BCF)Lz^CXLG~yD2~oAEpMhauQTP3)f37 z9$+XzAvBP^NU%=QTDEUSdU~7&;a! zQz4eSXb-9=V)K&b+;=5(>Tw|vqE9;9uobR5-Bi&$Pf_`?naZ*?C+BCrp4ywPCZDi* z5@I#qUCt9@p#pNI4+?sOtJ|67SORdObLqk6`G*praV1zi)vtl{Uis;S((6MbaLr_V z6a>}<1`&jkJPDH1Pm5>u5x-g=ZWcAKw&g|%2F<*s6^JzuO}}Hx`#uB@@F+(?L6MHI zaFaiN-`{eXK^pw-mf#?PEdd~cRT;B7zH?LX9x+IhL2v`p7S=S9mDqd_q@wX?zBZkjYwlL!$x07dBmMR@sZ{(!OeM4cRe3Wq@OjZ zt4rXn3pB$~?$j(2k(nXRCIk?rRdFb~tSAgGP~NCb$XUop5BV`GJ7L76QK#rshdXa+ zpl~K+cfJ~mJ_PmN!2ag>G{GhhkQYP}Adb;ddzn5*U6zz+5=u2&*(u@ou04sgfg=+L%-v(MS}9~4=3w( zSoWycte5qu(N;=Q(})<@`Pgi%gudUynQALia-JlTbwR;?qOfg=f5dgzcz&TZ6%3`a z{?GRe&*S9(Qu14#!-tv65a`)tw0-O45VJsB-9AZOK(z^f0z^zM-eQa zK&Fq9J#0_VGGnSYX|^0kW|OB`JLkFZZ>J5y>}U8&7db5_TzLy|-J|c*x#4WY3R&Rc zny^nL27eY5U+wyuL&$ei}CJte?#kmM0TEk}JAsGxwE+k49-)_H2$_}a17U0e( zVO(C%aLYFh>K8Bh0mgfM84n=`@TGjB`PN49WCH(WauRF%?Rfd)UYX7JAa}#`+48>V zEM=`L1V(ia3n=)N)`9eNWFGT7Po%BBTaqS=q?$QZ&&Ug;mpg&5Tvtk!G>2v$>CF{} zFNebeE`R9opj_=JnbmChQVHt=lhXtc+$!mhv?8DS&Pk=icX<2kfJyN@h>ddDkV@O$ z>W=nV$A#DO>j223reH+S_kB}*RBwqo&xnip!%-I^1Mf? z;Z^Z~C{F5IC|LxzqG~{nt0Vp}@-Sql;F@h@WCVULe1-qBSgdkJ=d0%|Xe|Jp8eE%? zAB`lr*=(KrJ%arz4)Q?${WVo;Y-i09j2h1HJrRsciuw`$@PEVgGNn1X$q6Af$>JQ{ zd;v!-2uX`Vif_cB4M_qr;cb}eoxiTwOHCCLIgGcG6K=(oe#Em6-`u8O`1Ci^O7I)( zPh8v3COE(_79_%(@DY^&xAgda-=M;YgoO_y2qlRsI7>q6Zr4nu=C0J_K9EVST;DEG zs)rXe@el?7$5UEGv7Di`;vU_Oco{oFYO;Jd)gepfi_MPl4p{sKQ2*iPDxn@QqFvE+W4~Pj{&F5ALISi ZI!I+7lRvpD0h-|8TpZl(FWdSj`~&~abqN3f literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/activation_function_b.png b/model/train/yoco_moe/sources/images/activation_function_b.png new file mode 100644 index 0000000000000000000000000000000000000000..04c98bf63a9d7a14efa6dbe159afcca303e242d8 GIT binary patch literal 31088 zcmd?RWk8i{*DWj|B^@H&EKm@nrIv)Kgdkub3Q{5sqI5SXB_b)IgaOjs-62SKgLEw# z&Rn|p`#$gazVq+=JAZKNX07|aYR)mo81o8LyL*d>fR^CgxpPE!Zp*8mJBJZ;?i?l^ z9wz*YQ|DdDbLW`P-I15kbii1t@OP(b7(lKiPHrDaR%MMQrKrhL+*|5-gNMzHnJh<+ zM=Bm5^p(YfG;Cbx*?H_#Y&mLqHIpE9*=v~k7@F*l$Zn(dM$Om8Bd+J!+S{$Ij5!PL z&9%i}mpGC(9#3Ccna_2+oww1_)|SWLhU7pZCy?39z7mybCR3Hzb4UqQDx|p^dt|j7 zMy8?w;i{kWN}boI_;~u}>n#4xc<;~TZv-Zzf5F34Ri3^=ZhR~;W$;ErDKESlfS@nr+_OlEEsAlxyFc~S55XLPOk6D+=tQOx#Q zaHrU*@{9Xr*s0=Bxp|7(-*?`!HD>j7KHxr6!CZdWmUO!SfvoaN8IF+gjj2`&e>>`1 z$?ejN^TBAFZHXXrn1VRf2_-+}qxKagPby2OM2Gw)bwE*VkrPEpcjncXhiE>p@}WXP zZC|KgY0*$^M=jxy3x^Ub2;-qZlI5D>6O0{<^)((IJJt8EF(cn9F?l?k_~vh@Hfz10 ztB(H#CEr5Tm%x?sUUO5Oa&0dCn2*N2^)}8;7+Ju4rqrD%;>8$;mA<_M=EQQAiwjJT zn_d>rC|$L`MeZZT$MM}0rcg6E1GKf*@=JsD)| z&i*hZx4FMgr`}drlMSVmHpFVnJqUm0`(PE{UqE0a!bymjEpzw^{Bt?_X$xUdW&9Pd z_NnRL8moSjl93+!Ae*b9DN>n%dY>ho>g^dHAK#OsR?yn4cVleSWWUbYB=XggM@XRV z$*(p1trj|co73diJpK}uDdcLa_~}9aY=SLLJQlyyV5Yu=WBkd&L8YKz_nO~B6#o|| z8+T@&_>h>u=`V+TLD&jlb8&#C-B)eN+--u$(koRz;!N0P)qZ zu!e2_bMJ6KVG2U6`uublMBGiL(5BB9e0_bbQ%iXgke4F5oB7EaSML}bAE8!PhYQRm zDo^~G+-`3e5gL&idn|jIQ%vu}7{YEUI}2QFIDJ~^aC*tF9h-)evGqb%Xw!^mh}B48 z=B-dhVYB{tL1W@ySG2CFDPCf}%qsAJ0ab<=Y;)hA^5@n_d(W$FH{Tv3YB|bJ+_Jcg zQ+%~qBzXhBHZXY_Az-g6AN^I1x-o(5t(TV<)s+XoGPGFaq4Kr6?UoZapx1o`djFjB`u5JMWC+3i4bITx3ZGR;w481u6X2&AJ zyB!@J2Yx(_lXToN2$lMfkZ`))g{1TU;$^C;`8era6zkAj@&;_FgO+y;%}?$4@fO~W zR=A&?Zq2os{rVFA;R9-SK$oKTElbwfyQ>U-NST_4f1?x~H@3{B+->y3R^8RqDYvyA z+o{&K7MTzAl)hfR`%!##ydq94<+Ou=6Wc7F54nQ%tNAK&Z{|A+d)t#kBCKrX8EI$C zT>nC%`*@Z6nbYp?Ozk4#gxck*6eZkOE#~;v#P2uBVTw9PB!i4>POowP3Yf{eB1_NY zq!eis&6Qe)XkPF8{t$6D{t^R&lrw5YyT}q!3wx7t<7$_A+K;I4vhGi{4_EQ$v6SqX z?T2cZ7KucvYJi%dxD9#G*7ek^&a{_HkER_UxrW| zrd!>G*`8?%4Gj+dC}F?0I#R@~;d*&!>fye0+7glq65CHfEVJOPM*lCgbR`YMIC>2@ z((?N-a~F9ZYo;njhO^z8pP#Rl^W%9@TYZh9eiNPkGO@S`I?lgum}&cgXXAqV!9(TC z7{z@3&eNIoianj}`HnKX)l&B}w`rlFDe5MeCqHFK<~Sc555iyDMlmJ8DWkaDWWPQ> zR^hydLqwaQmB*{j@&l*zuqTu*pZ6r77<7)e*0=z8Zz ziRm)B6&zTz@91=|7%=5xbIL;dp<%PHgW-AP4;y9_Yyiu^ri61se(oo`1GT;vW@ctI z)YUr^kyeXX1jK6c`*7;n>mAYY{!Kbp8EgG!PM42@AAW%|>unUVqwT}qd=8}#LT3Fu z*RE|=dty<_r%Vg8Fgo`oJR^-q=j2=mm;27O89l2Dp!r_vN z6EewlJ8_c4HKk9&i`AH*MGRUexYuInb0vIQ2_kJm&^JN=hODz~ZoG$WV7ocpaLu4m z^P48Opx|~W@-*qvSuod1g7||CB6u=F0M$^X4CRKj#iS$4X=%)W0XPf$!rX z=ND){;aX%f(+FE#^3`IP=jYauB(KX{ZeQ{*DYlc8v0gebc_xc7Q}-F22wvOXN`9u>f3{jQVL6zsw?1A$!X*7R?Y&w+lMZ}>%jzHyjfJ%6byxo@`gvO`Nuscs8H1t+ ztK8kup-t54bliDe&+sXmR~FkB6n4pL=9|eS9x)4J8>j3(KVC#F8*hw zCODZ3ogeAw=p@{Z_uHcQ=q`}yLLkM70i==Qb@@ALHoc)71)o$A9k&XY#K8}(1I$PISZ}4i%L`3;J zQ`PYob8~Yihs#+sG&FOqk#H({AgrETOS#h-h1+qV>aLw57i6E~p~eqBW_5cn%1una z4NFnk5VcuBvfcgNw>(jG2C>NUBHk2fGzRrDm2@2Q!oj`OKnAk2d${^_#C{WztoZ#R zHUZ+ZyShD{7Z3fSvYq$VAV(Bfjybh`;I&S>I>Yhh)=U`3pCi6V@NaoSOcFuIq?S(J z#x=i<`b&zRxUO9J4zJnL(y|}9fVtYT$JIS^Cgsb1X0%OF1E~+M7YzZ~3{EhuR!h>Q zmdplAY>gqV4(FTXS&WDn+=tIpZ@4Rc_&3iA^ay!O-dM}^mf1&0*pKxm?8KF6q~9fC zlq`PU5qsr91{W6>shR3c-A%5;gp5;RAzWEy>~N{~4ElTj&c}__1TK>adfr}nN02C&(Dc&hfAJVxCxSu@5Jg+iHE#exUz3Q8osDM(DTSUNKROk zME-#l>_O-%<7s&b2h=Qi&r%0M7$nGpq=$w$bk5h*?@7_({&{jB3H4*1D=q}wH+*3y z;CI(iUbAms8h6jBNw`6Of5Mp+uSU)e?{0!*Ol<7oFAdd9or=fE0R_D5dwvR2g3b7? zb1DT0k-F2*E5jRI+jm4>MlldFS#PS+?PJSUC!13UY`7mTr2}qqluTIaO=psF5}Kr) z!p4;)gp_BrffZcJlnBA}ghbKb*JnAmw4^THXWk-)66v8RS#hbVM7zMW59UhxMRyWk zkTDQfi97pt#&kCw8)EVH_XaOTdSViN`c*mJWqH=<_Gi6EvpY zlkn<6*3&W($F13ybFCe*!pl**ZY*C!=vs+AYmNWlpGXfF3qep(h2r1pX>N zGCKCEHJ1c70ea=pb&Kg9_M0z{L`*|pw##!!9A9lZgI_(|U4B^QcJg|;{~pcDk~7W3 z2>_n}`M{|wZu0AbY87q4@>W=l{ zGS}bd0YPD``raI-L{{-mjIx%tXyiLKQ{B@xLzBV6?W(*b4f&MKPRAJk2- zN@CI@?sx8|L6#qOG?V*FEKm)@0MELRXFQMdzbHmtX1f*A-qGQEf!%PNG+;~aj zC{xTTH(!SIgv)lv&reWd{q%)2$eHngRKVEwDY1gC>QVDyLZ!_xaX(}%3zUP zH_gJcCrV)|_XejH=H@E)r{7Rr){I8Fp8^E0?Q|d0gUzT|{T2v?0McmU)W+xi8CpMo z{$vbAaB@BeWR!N-1M8a`5BNUEc^_uA;CG$0=r4G7(*qgcB&i)X>T+9pTRt+#EGo-q_)*~<8t5NybR0d zu|oNp5xrl2-7p=>i4lKos;sQ6pn#LB{II!mEc=D3<$ec)l%$Zw-Me=~8Kq+Xg4tmk zb%aFy8o>FML<;~ZCG!TI@oKR7P;FYrTWNof`802<#ve(|;4oC%Z0Se%Ub&TH%&Z*6VOKXuhp7sY~H?JvulhV73X=cqn zs>axDMSrwmq1sqF?z%VD7)-afGPK>DEa#_AJz96{lBr{PC<3X73ja*-^G zhPe7bNX@Zd8;udQ(mB|g!zH4vr^0L}F(hewP;K+zSv+3<#?=W$JWc1^2!1j02R+n} zbIIw%bm@_-BM~w`p^TvuF&`|mUx#x~r|8G*?H*4>h|9W8y1td^1MH}TqZb>?xb&bK zm&=vom5Hh*yB~YR@L~xvZz8FuX*&F_C7Cr~mSj*kQ$=lUi*=TW{9LEY3>@Y zaMpF0@_mr`xB}|!-}A(253&uPP?{TF;ee7ZPVVp?eW1sj%sH(Ts);M4Qm3USBFMU1-u7_y&I8)X+qZd1 z-iT6L7l@&tK#F5>plky+3JqAV87OFK{(@z5Ki<6MdKb#0;UcT7HnA)Gcd^a>9Oa0a z+PJb6O{l<7LfMU|K+ohr8ZmMRqLgHEdyr(536syv`ZF8>1>zFZ7rUJt?(gqINf63} z9PeLiv1=Tb4u5p{coDdvc8b|065+ZhFPHYRpd*O=(@%{%1?Lir@ZR>ym^DrDRgFZI@E_sLQfEvk4d9m z=WV1CuDsfQ7!gmICsRDtpsK&{vxn?s0+2G^dt9(uOWEeKJohK54fq;lB}nBQc;3n} z=ct7962cbyNf+-TGK#k{*|z%X`j$V5zw|$T=<%;CUEqa)|4%^Fz&R7UlGK0>oB#bQo2VL;96iixb2enc?LYCD^-rF@ z!h+6d!;|P3_o7A7eA4C5JO8ud3g}e+U%!G5pG*W3C&!Z+&yMKzZaGF}4M${PSBI;u zdUw1wnZz|d9~Xfq(OTxBw8}lv?>`XP{KkiO_}88Ac8|+JNw*$??j#~DMmAO@f#%?o zGM|)Axm?+7_&;am|2L*LdnkciJk+sR`3>pm-tCx|XI&b*$OJ!@O_t>COROWXlQ3=* zQb~ib_zDXNDDqZoy>H4z!q0#qIlO8a70p$r{aCz$N4Fd#0|NsA^W46D8x4Ep>vD-) z2y6S%S_;)!7{;eUK>-1MeSHA|fk}F650r`bRsM>T2Oz%TJDg|=fpothaz;@6Fgx);FB>~(Y zN!?FwXt7rU$9e7AHO5f?98dBZlErdq8NWb&OV!vkptFEStMc1nkalsHtoH2tcHem} z^5GBNs;i-5FNApPME{YYoHiz9pw@qLW~%v=aXD2|_B%BSk>4I_u5LIKrP0<$nNS08&VXamsFtAMYkg@5a6Je@<1- zKazH?1zI%xZ@o>XzDeyl?T8XT){Zo7*B|NZCj-)9#>p# z_a?fZY9-5_Z;wi52{@XMvr1ErQUCFzLX z#APq$2O`LKip=D8s!r^eA7tz|F}ai8`{}9HeS0SmW? zHSY>XbxdhSE1fN-aClRwsVaXZ&yeTf%(L?CP6=S2So?Ud6r2EcQ(!hA0+x*q)T$Uy=cPi!skzIPjar? ztvoU{HEjr@>h0|XU>o%F3++`35s?=JiHc>(nxMWLvdf*T@l?VUz6A6uklB28oeFEG zn@voBTe2Qz>A>Ve;S+cbKbVLD8`GI9B+Jbfm&Wd2I{hn^-XK+zXcAgFWi>^w;|0F* z4Is7234=)Y;A;>F#K4z(Y#@)!cf?`mUZcFG=fRKtXHQkYOp0W;w}qCLY}-aBCR*8L zin$){8MvfOZUGU+9(vEGa@$N4e+NSrP zQzeg(H#JF9fMCpO0q^km@#EnSyxPq6GVGRs=%M=lqFuBGNN|de!21QJlVvj+@6#A6 z%;q6M7;@3d#fsuo;I|wvM?dSvS-lzECzDvznEga*diukRN7s0HSAh=&#c#vgUxoxj zPj*W1a;@j_dqY-CdpP4A0A2y{D&VxWjqz$1z5*#0Xh;~a^Twp7-nFF38@LD;Acg;x z5moQ~w{k1jDmY{tT9z zfIHa z4#V#JBb-Ry{15$WjQ7N7R$)EqNU?Y8P&HcY_3vLWP$-=CfOXR?eHHjz<|Izw@)b7@ zE4=!)5vw@PlE2g=LeWEk-z@!L>9{TdqNe;ukftq$AFwBi5$Xa!=QwpE4iE@3d$ zbU<&WeRZhx;TPR1H(}FXkAM|`1b|1Mm;Bm=0fVo`()m*p=1iR9kL8Awhx=g|?Z_i9 z-Z*!VKz>e{sz&kY{&-qecxHHkJjAs+!lCZ0Lv*4&PBaUsJ}ex<{n-druaHo*VS$}y z8!J>lMzpdMfRWEAX`m0lbY#`Xfw>0RB~p3NzLCOasuVeX*hI6hjh z3Kk~!a&QDGi+heJM+{BmqN`7|kXf6%F9{>_Rlez$Oh{09!&v}xG8De+n``w8 zeXV#P$k4_F>$MNXC2eiVHg|M<oKlqVQv5!rhLeYHhvzd>Z^i*jy5#z`t$c$FH9v$v1$kt}~ezFdgMKt8#F-i+ON>ST?rQ`A#E!eitEX(m6P|QS0 z-lb{bpXx-(Ye{MjjZ2xt_JcMItm+LRo-%fk?rWEhU?tPj(tuBS4b0C#macyBKobJc2}-yJ~zSuOy;jhb}7X@y-)>dD7Aq)DYufBi710M16dm>3eEP~T$Ofa^?2idDN(vBz2WpIq}%t(T zJ^;5`s^y-%D@_UCVgu4Kf#{7tEjXgzZd~}Jr^vjnowY9R-pVCPj`CP%%(k(jv zeF7Mrw7VZ^!AN5I>kD7iv3jpi!C!?i%6(yz%@*|k=bUByXnpWP@?jOQR1oyZ1b11i zEK9PbXp{zvZ43Z^fvhC#cI@z5|20P>YoiNR;x$}Gu*?zhyxFcpyD(M+lf(5s?W_SD zbKYt}e65OJ$dnQ-LP+_;Iz>cBKPogI3Zy_JAl(yg^XwgkGH?nBm}13Rqj`LO9iEt? za*IDshU?|KVJ zLxm{sHdV3dzcFm%LlIiH@V0*AiRQfRfBy=M?pk(_{;&ratVF*d2&RSu7;+;cGWKl$ za>{G6H-u9*zW(D;Fx&>4qT&%eIUl0k>QG)2o=eL3Xw7-@tzQv3B&*aKaO$TPqX z_4j_0M&Z7WP5wtXVH?84Bc8nq{u=j{(TbOIijZoixXVwKN?+ZG8dUNSd`-j0GD&^G z4G`b?3l}9f>nTo-_K?<-=Nf%z-;#vtYmK_U#TWh$b!b*)YjdUvEtx2kZucjo3|>0L zK979nEYJ?#r83w=kRi2TMW8CMTucTJ>dKZH1ge^2Fn0WT5nBw~y{xKJFFv+~T_o44 z_eRVmoF|yqnD}a(oO_ZXO`zc%C?rHQ0<(~6F*XD7>(xfxF~crh{x2bCOv5IjE-HLx zm@^22#K>QlKCwuO#%#3owHXL)AjZFojC_4|>iV<`E$R6oHDw@r%c~Fu^>~gN;d*bh z=&wEarg^6-Y6_(FRIDGh)8ub8HptD35DeSX4R1j2c3a75f_$(I{G)+OozQ*Ky+i^t zE-JLbx6Ot&jkYvfuL6t!eZ@TGbp9@EXQ52ps!C8`G9MQ_u5znLCgf_qO5&R}tWnL* zlQ3DG8VO?Jz2%aTFZVu!tm8M}4u05Tf@P)du39C$2hKg4q4d`Up0`KCK~_nSJb<{} zeg7NJ8j`AC?LP({5k94(cXoOL5u~x;sXn;{v&8ZAevO5y_)P$!ZLO`Qqs2Dho-4E* zQ)EkfgjhpjJyEx~@n&rgDKMhp_9p_Nl8g~ zsj_I6lFm&Z>pp<1WQo8r3t5fFtKra>p8amskySgc0!#rKo9V!hu{>kd={Nf_HO4$L zQ@7^V0?A&^grU`1zp0X|8;6OYQ`g`!kurl)hd+MQv{4H?>jnskru`WukTyYAxIgBS z6M^75&b}gRcZzL(EUv^GThk$4C=^MNK@NsCR*+bS{oCO|)M@00Z zv(WBx)}x$h*oFc-gf*U&`j49Y>kpcu)!0J1&>?4$_x=YxS?~;KE*IQ`&Ftlc*{T<- zs9>JNU~^1bh;X&H1~?ldX#9as*9}ZK;HUHWUYBdC8cp$~25JWE1HG1LiGK4P|AfJa z-7FQbE-(*%w}AYPf1G6A3AVgJZA^JQM`wX}M-YNW04|g}Zo}p|znpb1VB4Vh7S(7d zyvfk1(q&Kn5h<#R7j))Yi;We04;`Gx-axjmus>!qpQN zkOFZR>UCP6jEC}!1Mf>%`?B%m`cEIXL1GB($Ugg0J~|91#oXNd$&)9qUcG{!pl5;% zb?m3YK2lNul``bxn4}MCBO>=O8(f$})(cUF8M|o15nu{%YD~`9I&!Cs1WN+R{q`*Z zbMqTlJv}{5&G$`vfk8n8lZrp}!iOz;(>fqSU_K_hMn)>hC1E@O^L79Z)dn~Lz*Io$ zIE1fkRbm-?G7IO^Fg2vyHl@++f0GO!xR5U~fX#GuFvsA=6;@76uP8rMN`y7+c5t3T zDJKYLu)VXhu=cHV)5&q(0P9+nsj9i($*t5vfQ&A2h|mdk4f40HaA-h==*XWu`9|rC zX`?A}b1I|?;?=KTzkoCbE6_rEq8saAom>Na9oIyxZmD<%zly-%J{2^w}Y&E0X9m4+R6Nq>@)3~AME2LZx4+fMT znA1x?e=wlKMpv|=vAIc1b@@S{`zM2l2d-tijMw$e8+#(@LKDSsUBQ55Nyf+KKUvCS ziD@9K!re4cWTgw=pWyWSE4Z)5DqZOWG=}$)t|IHfMfT-FCVIooG-F30mXTpkzRT!8 ziA1UPS<-7?pIX8hdt9jUd)Q3=clTF{wDM9;vo^%7yJHncSu4l*t=qWt5BA)0CcCOm z9o^eb-?Kz8W_A}^G%lt^kUradptokv=`egnWChtr-U2mnNl-^$DHLG&LSG&Xc z5Aes@9$FyHcizT7O(b8Y4DEn)$v{ngtGYJ17(c+z$i^iKNsIWPTv2-h3X_7qhlxM##Ad^X;g!fGb(g^|8#@O42SROb@nOWgi!e+%kKQ9L44Z@XZQ8U z@5t#sJ)%f=Ng`8dPH05!Ov=>kyrueBMxxBvW3~N(%>Fd`UXszHgY^aN-igP^j9X{p zT~dkpoNZ@TPTsPkphqrr#Np0h%q2y8c=+&yfQ;HQ$u1#LrWAD*O&>s#2RaCRAr{dW zs7`;*7~_&89=Jrk4+*Sz-o>=>v&rD>SYk|(W%7D@r?P{^)8XpmlVQ7+k&M?prA2u| zx|d!lIk(#OF7%{YHL7+VTMd1~g2q`c-`w=S+XXI*5)~z&E*d8s zo7M*8oSZiG3K(tdTMQk=YrU7ca&=g0b=Wp#SmUE{7qzJ}zwE@B5C;b@x5>u7QdFhq z!&;Brp;Q9^3IG$66RuX4CD+m_+8o~=mZHgDtaV_H{q;d++5WUYXX`8V5E zei_R|$0kcktskreZGL-fM{(8}eqgCOSnIxML4HKyw6twtjJM}-5T0KHqgPDKyPRWH z|%;7&p0pdR$qdC*Q5)LF8~6cc)8g!ggbW#{R6rc)zht zmAxokgh73QkG7Tb-o}g17so^_$4#=kE?&y{sjCp8N^Qk-iNdsgldV!}=@rD=%FRYP zDCD4yJAvYovz-qiQrC|Z$qK^ODZ6HKM9N9WBvoemFG*ciF@R(=n> zp_!ROK(dvQ(QC82gG*d-JC&!nb)5T-Kk|eSf!m*L&MP&pBiOE@ zE!iUGk3i-$ZjZK^Yo(IQy0Ah@5#e2nZ2TphRetCc@gNXU6-zp5UgR%gx#i)8rIWu& zf@Hr(IUQG~C@ij1fy$M-Y!Tllky;fgMJZtq-kymB}_$5y-K)!nng)S^@ z+3RH-Iw6UApKUNR_U4~rHC0twrUWa0uy1WBZ-L`fc{nDljH%7WR-bIH$~!_NX4kXy zMge9GQ`(;(x(v)0Y?kzf^*GPH(dznjW@Mtv)*VGfMX)x)(Ls}f9xR;7*!75c?Ha2? z^9kOewcjdE14l=ePUdqW#$8XDFE%i?CUF_AavL?b#TC8%U{Cgp`3&*RNJfUmd)cF8 zw~en#5|AyajDk5d$awH-&z=qFx<4(p)L8#Epi5 z85%9xEx9hC^_N?1y^TSb1U0F*MX6&^`7V8j_qABcEN6A%UkZrx?wb)jC1Mk$j9Q#_{l`dijRBdNlo3Jyw0Ug_`gpUX8xgBS_#>Q0;B6LKj6kQuFW`(b zGBTX^j&;wj-6dPl^^X;si<~p_BOO|tp9=pxGyLk(vyEy}ubX5-Q?H!G-KL*>_hGaQ zXnGTYQ|lgudxzyEH6!b6dV-UpJ6-tO!5i9teE?rsDqk)2%(g_J9RyB0i&>S21fA3i zy1n@f5+o?a-!p!*3tM{qSH5mvti%~gYL!V2xM5c+wM)0r6Ehn4WaW13aaV;_SAi<$ zVz>XV(bP^M9fhmV81U(@C}bBPYCgCzYTJdjy25Tf)S5m%JKnZpDCJaAd_I)z?MEzb z*R$h$v*r^X&h01$3)PZ^tY22cH$@`*H=TFZy(cPbk3QzZ9H8r(>7_RYSi*l_d}#Gf z;sJpAP#VGcoF+!JZ};>hP4zNM28+RQ6la(2u2byp?5n=^kIoAA>k|Es819p*-XmOo z;T$VvwL5wqYqai2d)c3eP6{i+%l1OoH6bWCmb;U^-ws~l^x7)80Um+GemNHa8*s+^ zp{d89DFk{fe3=Pjs`={-PseY3_I1WXgv_HyyOKzPn)o1c+%+uZ9;^SeW zl?tD3fzy*UUR6q~;H85l}HdP#wl3aZM$kMsNL!k$&; zeG(CA99mt<)SXYV#a=hAq=w1KUR1q@sn(nCzxDAjUd%Dg(tvtwXo;pf_vy5!cN6w? zV3S`#$$MVLFfjq_(j)RF)n`DgUF3PB_WeO7)M5bm8m8ZUmmXRg4jq43?ziq*z6L>Wyjf&KYOb;AK(QvNs%iyh7HK8$yRUN z0yC_hdCNULBp|@5%WLFUkh}90B=^IfV*w^`9RfPflO|4*R|JN*1Zwb2%Lf z0euoz4K!VWJ8KL^cTnjd25RT%-vk$si;GL-<@@7K%e*SRjViHu;!4(gls=k8YEE~s zr26J{`75wGoUT~x6u(jq`GJ3%%;r-hx%CezVfP zC-doG>|b zQQg;lCw)cjr`8%*jG*_h-T=%$gcw!Y66o83nyoqY(p!Cz{G9y zP0c2?n|;^#`(`%#Z};5zY?F3ax#*#KEsNm^56<^~?jF?~a?FKciix$Bwy*0VgG#yu z2W1jM+-r(73xeN;Tj=TDqh0~NRTTCrbnX#a1yuoHT@;z1U=2v+CDS=Tqiu1Ao*SUNsz*Ttu+hqs^>AShStsy4Tt;;YQ3?xWtNj6hMglp!jO zRfmEl73}v4k4-5!9|#^+KHOm3wRm4nN!Qivsz?zOxIFk(kb<^|a5xW9aqbJ?{sNZu zA%DH|QHkKvx=y@(CcBq>L>$6F-i0L8|Hj6(Zo5X_7k)1(_~S%=@w;-VH%>u!R_Vod z^qVZ@U_CEZdC3YrwX@v&m3J0RVz1>YHGj}<`W`iF_2@9QdLU2{yEpNsOR6JnGqB6jNaejfx}w#Sm| zn%cbr&DiXx@(>KB`x{e$syLrbsr~64`M7|4bWWHx%Ad+?JdVF%F9}yy*gN|7rEjxq zEfa6c1>dage@TCacq63I8$r$MjN5~0L>9zM{Ql#JRHl<_TTdUoZX1uO(D>6A5$(i> zdvS!pSrc@tUl&=`@(qMJDazRMreZVlIj)G_Q1I$W*l2^+U+9xrO?ZYEZ|;61EiMvr zkZHq~6TBiURQ}a6{>6<*=oFEnwHki$?C09o8`}dRO%D84JqesDbh55>{&zDRf@;q# z<=U<;wGWm1@s`FE;QNiY6|X0Y8N^n|S%~lKuW%hy+K-Qd??t6IAS#^V*LG|ene*a1 zG{n#fOH>TCFFxJQ_u!_j*Cb(mzlKV5Pj}YWHh4}L%zP7vm=fYoY+7H&$nMDZ`Kns3 zpLwnp<7J5HVyz3&7}m0<>w-+*2XT6m+sZ4vhtZ5E2&YVv4j!8p=G}(%jN)kvS&EX9A$|%pZb*DME&l{oAQQv-vRwjs$o=a>vA+7UC`HKVQL1x)2R)^2( zJ@W5_5kDn<;Ow+oFaTPx7pNznUSdX;vxMEF>t>QUh7xZ%C#cI;WaND2fLmH`s${>J zAvU*{IbH7O)usBD(GRQS=S>Kvms;cwE2@0AF3OJ&19x#Uv7Cx-lz=$=Giij>0X_dx z&H~LOEpvJs8zxYzkm#et@poH{XNF(XV&m86It1yr`)|dcly|!}tBPVy_(;gP9#OyR zjPHn4tTxN4=m*y6D<_ZF!tCs$_^)X#&z|KGJ;V9e3o!5_Gy=D5@qW$|4iTHyqyj4! zX77t4X`j*SQTUu`MYNlWFX~Y-yMrkZ*mxS-s+XS1}`VwUjbJFK^X92<@bBW18YChk1`^H3pg-jwl z=N?@JOCDY7Kf@dwtsxcbsqoWI$oP?u^RK#wdxAU93@sa=7jz{1v@9SB%Cv2 zoMOPL0dVO7NuvhK;Hzh7n&D`GQU59HO5$e%t!!YvC)9ZF~=1d?LUH^K7Ewnphe z8m|uHekc~*GQriG;^xi6y?e;fjMG=Q7DjrnT<8_h(Xy~sq%zD;2M=&#ef_2RKl2AI zO2=VEFd&rctKvJ~e{?QX-F%a$L$klNGi{$LH+~5{wQrxxhcLamiyTO|CG@_2x86|J7;hP5& z2FVV=kve)dvW6kYaF|dNxvpc`cQsHY$L)QQ{UdYuDjHy|g<058VFZ7^irvlacS*U?pmAZ0Am|-w!ueHY>Nsd>G!^S2$%y6LyIEzB*fZr6rLdk`wa)^7QKbety0+!q4(m^6mXAF5U#+m_tDpI4H86ZceE=S(A#U+R0zQe6rWs zee099d+{;_-GNC3Hlb79XHdORs50&E$g#~%Cc)ytd}+s%`S7knRbdBwu172(=W-G% zRbquo95z|A@R#>X4KC?QK_h$}+IR`~FBr&nY(=i07MZsd^@nCB^vT3K?jCVlh_i># zON?0uMkU_1R0Z$p#F_Q>PXY^D6!sp;lw*}kw)t+r_#Hmla&5Ad#(8KV1yy-gm_>&X zSpIH_E_CC(RArq)*Kbk&xtY37SF3GcSsrH4pd2!(r6mrjc7mc3@vKzEh2{LZUPN}! z47N-jUoYR2z64%H%Q-W;1~WQ^kju6)i~X5&o4hKQIW=ESX^1ZTr=i0jk*p_jl_BA=$U$RS10K)rIHF7+=AEvDfJ*1%`jQW; zI4HV7-C14Y#FmePxli{zO?oXC&hAdeg`eMj5a7M-s1h$>{{y5dw3`Fn7|UGukGDs_ z(|u~X?Wr!4+&iYaozFgvt@5POr392JY&?V8 zD!Pp>@j9?1rGF&l1~hkG*I0EpwrelCrbu^vHBkKNH{_V!O1-BoHKD^vaDJwQg4@vUs_?Q1JzD~W$s&P+Nr-w6L!!w?Xkrv0V5a|m&1t?&2X4-vDmtXmV%=-8oE z`m&BXz0 zY8s}PF`D(!&h1s2qVS=R6WxwTNMNL(0Jq5{I<6sT(3(|^4k#(<72=hUkPsH$5FIv3 zkE-2Er&f8%iv;T7zb~UW0=Fm9jP)y8M~7qs2CLdC$yoLT9rSe|k@nty@530`2Ne7! zI5_ytn>QgLA@DOWa45&%7U|}-_c4tq^7eCb+dphpf`ff;Hh#AG7zFM4NT}PRFMi39 zA(MRCD+FhWg{(J$;KBu~i>@F=UsbCTXHGzdNqys~^MOlAl4HRzY+nebdT8lSDW28{ zME5i^MUDS^VN9-=H26wFLR~@QRfY>^!d4iz=6^UiAFh?H|L)IJvS~;sN5fa?tE)bb z{&7D+8_k0y+VQPIGCH`bp~!ra0&Xvu=Hyg~&9}>3VhSZg_wnQW^rZBi$84S6G=c`M zN-U*nbQ8NRxfj|oRu9Lq=+C`9x4(iXn22Y4i_Hzo(^W0B@oT(K8cbT@i=UrLY!}pB z)mH-*-y~BmG+AZC%=yJlbW;kref~N)=!=DN zyk&|xB#OM8XQ0ip_X71*v2tv*bCWfxUM>F5Z44B|Gs)=N72xh9{@e}#?J9-o zzw;zZHk649@d>{nJ$wV1=-7i7BoAT$EIcYp5m_A%Xcu}8a`$jQkIttRe1jg3)%iC4`1jsX+Y;|ML@FsGP; z@Q=SY*)86NA?xmt8w*I=Ov?j5qfFe>Cm=Uj@C7_LNBN!T3uQu&ZXh`tIVJjfg|-b- zjw2^;$RCD+?Dg?vqZwInyO!ly;tx9}Y|r}c6Obw8ecE3InAN>9{fQAW7}bw@wZEZ< zPCb4$(5dA`spmF%kH!ld4><1GT`^X_?RqR3fClU&EE);wZ(u- zeBfG;zvs2qdQWr5jLnY!e-(D-@lbzn|1Z0;t0W;sNT^7*p-@@MC=v--vPDQ_mwl(K z2{R~Li!FQhHIyY~-G10JA=2|`D>QoB$T2p`t&@xmD@(Be;9*%z3C5tq-}-w|bt_lQZ7_(b^wB(mixW z76O@(`_j=2+doPf(APqri}Okq$$iDRZ_Xq0kxAgZpEw!gcTCWZ6|<|3?|;y0q7j#| zM!tSk!Mv==@g_RigPav12}}Ehp+dSl9?n4p?n}FPIE>;;?x3Ls5fei27jf%Kj-2`& zbFVDqsA`(bV0Z@Zb#=VdqHMY7xPMapVaaDHeu4sC#RSApguZwmVkZ(PCuP0hmxH>0 z|2~K#ShM#EbLI6#rs;mN+_y868q)XlT`4CZ5Ql|hLLPXZ*Xgt*Dt;1(_8e-3)D-UE zx==~jajB6%>ZSMq&BO9}Bz>Z8jPZxRZ?2@!@A3`Zvft=Zs}lj6q?S`n9(yz4p3K<5IZPn}rD z1?#^UqPXkb8sB)~0iG#!h!!i1zC5)VR>wyc6XyE%J3*pKkcWqSA^a%S-b?3P{ta#`@Cyv}#enCScGW6K z@$Z1bJVf0wmVsgTVi&nD8VuhOr;x%FQ3i%~loaZi1p!fO{Drp97gbfmdbv`=_v{Y% zz~VV4|FT$_3v+NDf+Lw;$)m~DwGo~w9oaGyM4)jobdyL7hj_-#)gbTnv_c_4$Q({c zg4%h;P(%4f2mT@D*Tsd<%YHxsl-gT|UVW?(+cEAP+PB(~$n8Mad_pDtzrOMX=`F*HS4Vpz{ z#fB1*6sS0J&mcRF)*^aD20ag!7oSh*juRg{!1U*_HlvUqZUlcKt^|#FYNoN>u#$zp ziIoLS%F$s@o_omJDM7u7JQx1o2B@t@``{;7g3bfd+QPCwf3&t4xVy`2-Wdg1w(krm zioBjsLqX51>m94Yy_Pb3+05Ie`zwf`V+%HA;n@~wccE*qx3*j%+6Jx|r)L9A$m=(T zgk&`f$;TsKo8g6nS019(x2mC#*}Okd$AoNL=8|h-zkm$VcqQ8by@P)<7>V7_tW+uY z8G~|Zis1QuU~P{2=492u10aD9E~5XAA&Iyv4Koj!*QYpKNEqnv$LEAvmxbJg$#S0Z zCi4IK!?Ha;BdV9$XK}}B6<%!r$jZrC0*OqlkS<^+-N7mI&QINucIQ$dR6;M3mw6~j@v;qCofAbXZZ_pZ5O)wqlFStH(#t+bok?u_ODWLA* zw79e!gQX(0XT_ZkgXWkFL^wO;3KMCQU)#&zwWV>uJYMWjIEuEwN41ce(G`Su@TsRw zDmMm$R`AbzF~4f(Mea98O;4MF?-BsI;LKoSCo8&BXx?cu^EEX&_x(G`nx~E9wsm2z zuGGx+Mry8}bwK|+ERE*cwXYN@9<$I z`tl^*p}rUY!N|!BKEb<`vSYdF6Z`Dhvn42%yv*f}KkjSYP?p6q1?;tK?f=b>Gogh+ zz!o&_0i{3ayp#7rlf^6cUOuQy>TVW1=F#$v%HT5K!}TAi~3VXY{Vn@q#)o$J1YPEch?V3;4Oj zU;G?6>be3Vtv!q%QU^T8V{`rjRaPG}tblhA*tj9^9xai&vp_pA0Y|R$JU;*Gb>zJ0R)d@mscZ#$@$gK zZRDM*qDOjaagV3llk4gf#D|Ge%{dAJ;O4R?5C9i2^`rmB)KN$ZXKlJ;912kl{0r;D zHqobws{wn~xP9VQ6Y&$Q|Kk%K7_io&*T1shP8j`}^Z$8#2WDq6oH6vsc?+n2|JGhu z=ggvOdAI~!gcI>PU)IvdAJhcvx*e02H}MU)>jZ8CQSw$JGX;AvY^Qdu6!|gjwYz{H zU*T0^YcPf-A%Tmx%YEFDGbM=X7CVz>z9*@w1jB! zfJV|Yz8n3^!q1AgBG-_id8ezVT= zgQOOa8Nk1Kod~@`fh#A%!asP0vLbMyHIFai1;G8Mbw^#Uz{>Xi)>}6Q9B8iXgjMvpw1P5Dkhsw4!28t8esxm)XJ;rE~Co0l;Oe#CL!G zftvt6!q5LN@X^7gxuu0o+$Jen_DF`yWElZ=Kfx>BXOkQD$^3`jB!0=)e+^+uOSJ6P zl00nrlWzFUl@Vx6@BS(mOgLzX{rB&ZD|5@)hW$zpte8e1#xl!h0AqL`OKmec4nc#Z< z_@G45YnG@R4sX)Ue~>rteK!U#iQ(g0)ctg4S4*l0AxFuRl|B?#kDo){7Kj%Leo>OW zpE&gTv?1Oz__%Sx!8$T=vdk>=mx`r93Hwng&4Bmh-~D9y^TvEr(kq`4W2f71HRl1) z=lT4~(*hm96#5Dkt!NLkA-4X=`Wyx*0jrmB=5sv3S=2)+A?rqkm8)0c-1nQ!JXY5v zVXq3DHr}L~NL&|x$f=1z&3pp3=Ezw?vi^MyrfpATWxaN?@0R>}elFLNVfe=J?{4#f zaq~F!LCRUB>w;NX$KPpKTcw(~TzFU!enZ)>RlI&ma-3IaUxTql(AdU9b98;YB0(=N zfGbgW<&$H#`Zb=8>eJXT{@f%81RfjO*2K;BJ7BdK@<7fCaGA7_P*%v)4f!SpwKtm) z;`a*s745C(@yfL>&d=v=jkT0!<1*6Ebd5{-1lK(A+38f?SMmzQTGS>Jq&yV5nSVL| zrDZzJg1r!p0t#0_)e$(>HKwSb%5*LTBl~`U+`z{kiW1>&k60E-o$^8NFfm zsAa7l6l>s&%Fp9EeqFIyAA>m_JKEa*{PkLr45!x=iZh;z61~3C9erB`MK0ncTFloT z#k5a=f5HzpVM;l7j!Q}Aj>@~%3w$#10V$K6ZfEYw+Hz;iv{dCYu{oBC%XBYLSB4yw zG_p0NLo=>(g0Ct!Ufz!eD$dJ1@23a?%;D?;naOFTLlGhF2AKJ+%mq~Kp0q+ISW9c4XRI~z2rzNeb=h{|@o3QtFD5UrLZ>Z!;Ne@>y!&Cg_e9Br>M-(>Q}yR+C2ayvjx8LGczfYXfgyW^=<)()$2yn{4v;-H_&PGf zX?J%j|G?xpf=;3346qs?`Vi2_SQ3`PPVy)MIcb(8sQClI}A?5MBt*ZSbZpiTWDxNOrdoTwudE)KGs}yvYAIYM<8O1 zQD|QwEA)EaAhXM*EQz2K1`hV;?RtqtU%VJnF!%y?HXSLK+nIG8hCIN7Ghl_ZBj;K@ zT3H$PEwN`=O?_?o9(%3#Ml6nd?BfADt4SRtuJ4#Rg?(4OURKbUf%QeYKZC5Kd+sibo-aH;bLE5igA4oVt9pnDGWdJJ9ScAKy`xF8IGwaSQa=3)lN0B{+h`2zA zeCnkUnCJ&?K{)#{qIjk`-l7VAXJB%DHt>4jZAiIIElfzBDm^_dYpauP1m+VC#IxKyEQ|LBwex#@01^4>I;y{UrgaqrIYCJ*#wbA2PEs_ZMLRD_x zzfU;xwflHNbk9XTTi0sbDj=Tu54u}znw$uAvg3uSvMm4jq3p}t^+OrZcOOHFT41k*$bp0t^-td$=*oochqU#$ zz!*4mOTBMwmPjo`u}V=1l#s_g_Sp0rGIWBW1T(JMIivc&8ti~}K-P@yNtP(&i~wF- zV6_tiE7NDNKXw+Rr*;|Q+c&9*&7xtkwq2+*p~;V$iE*beni5v>GvM`^@=uhUye zMd`4|5i|cvCfW3?e%thEiy6AXD)C`=5bSq*WUltyfbK)QSmwB?!+fGwePiC$QM9r& z(gQsrYPCAu0Zu|s-Cs71?Zk7<{!SVPycWiN=NSufFg>z$Mof>aQ^_m~(|uZvV6SxC zhp=rOcZ4e+)9ys`+ySYmR-H?ws?b(>XVD_Y6;W8A!|5OkT96H1_LN@`xKG2YASAKlK;ETQX>|td_?g&UM zGet{!BkB7)Rc2pDfc z%n4G{(o9TEjUI7N3W;)mqK9ZwbUc7wH??^Tj4T-%(@SZiiJ2TI;)H#Qm!jQ|?-U;n_1vr4Qu%344f$kZ7Azbq0dEh|^Qs=!{&zDh0U8o*1lAmyMvbcf3JuT3J7 zX<;}b67-p4EMSoiNhDfrJ%tg#)sOeVn#kKCX~(!_^0l#nOQNUyt4t}q&lr$mFH(s| z!N42#jF(E}fg>}S(bx_Y7uq>?KoAKC?X`d8ye3FL(M*}BN{5%u2C4i5y)rbyEB&*)4M%8noyFF+sE*yTm zz$K-wC>9-!m$`hH0ld>Mb;tSrK@C{lNF z{b)rp*GK}+`;6(Ib|0Tx0z2G+f(N&~FWlZ}v)4%%uD-RSPS6G`WH4uhOAs8;SI9TW zhj6F(#oYb9mglHb479KO#(^a(D8Rm2HC6e)9jQSpDfN~)iAbZ8KKr7rICss^b6Ihm zle-ze+C}B(#{YD66AUw5NS6S7l`8}hkIvKHb-T!(?loV(!B~bpVU0PLn9cmh9r+!2 ze8UX|zAfG_lITML*8Zex)^WVTR>!o%qt*wUgDX}L0)K*%;V3Z$?2%_DaQfjw!Z7G* zhLwTp25n)P3m)cLhm*~*%Yod+^!4gdNcG5)z~ie0=u$r7X-~XwLVz1t>Tc^fO@6$3)Ax&a7|_cg^^D`J#)hXcA#c4@O?% zlRpz&-pfx{=LdY>S)}Nyz@SPa`+NFTPmk8Pb?&YAccNcB^}nRV^%GVx0X}?>mHfCg zOuU+WB1XtpSrxBi(1DR#E9Xt@qL;xRgQE+a-C>&z8cX-A>)b;OYAxY-D|XR?2VAKX zKOwak1)r+N(-$p!s*g#(`XO|dlFn%xYM6Ofqt_U?d3kTY(LhfB-z3zU*hr3!L56|; zh$xkzoc2*TGxRz^ne3i>PMcaSxW*GNV~E=I{0&UMo#BASy6>z-9At~6ygt|Wz<4~y zZETD!Hd{*qL@U?ttj%}1Oa$bzs@#4k?OsDL;dA?5#&ryDFwjL#>iOYX5I^y^nGT zl-VZyd5jjcK3J6t`ar)Neg3iThBRVioJuNrvVx4aAKXW_S9W5y`Q=Zd?j&3FhoLl0 zQ_BGsL2X{eN{WpB{*p<4rvZH-&CSWVX2Hz}(fs}_Fb91D7!$eoOtVUSl=*)93mb_S zXM>fl+LD1;MqUy@<61@?1mwEcA40q4%=2*K7vSSdh>u$v&J$!2Vl_s`dFg!AJ%3W zm-KYSPFVFfTBD`Yw9*qQtcjcgunV7oQ1u-xlDcdEZT+JpIo64{HIaIg-;6aLw#nN`J!_DcGjB!hJbNL!kihgTVrT zAL6|x6scMcN^r-DkR=*gBG}KvF20(gIGgMEa53t2E!Lfanrou0Ogb2TOkf$80~dje zbZ&p4B>}Fu@s|g$*t;(j>7w*gkFaO!x%3)(Fk-u;Qe}(stFe!o|diONFV>5KZb|Q^{?gH-~6rY z``NoWic404pb)QKcQOjsebnobUF#(h(JTV7Ikn(9-7csKKTa)e~xg_P~)d9NpMQfx#NQnaV+mS_EkZr+FbEX#hlLZ`4h zM^XRzAELm8uqWy}A&+fi$iLGpDZOuNKP$Oknz{dq-cfy*(!b(=lW9Op$FS2Gjp!>@ zRiFR2Wy4?V@-h7JdH!U(pzjT@?;miKQoW(_bu~-FYG;A&deFz44WM$rey))s6?M>z zl{!&c;<%@9Fyp@UY#XN4RP$9nuSf0IMF#J7 zjA~+Bdy+Z2KR76d`2^x}PQ7p=Xfj{`b)$<~V{TBpZO2Gv;Cx>Z*S=M)qeAVcC>Av)GSJeeF zM8jP3G(#GETt3^RPrdutRk8V5W>&_uwL*8jv(M#Zo8G2Vmvie-Iw^~;p8M9-FzK~d zwU>8J+f_Sfl^xJenK`}P&4H4%T^enSKfSS%eobs#E%0r)HIDn1Lt9i*xjs$Iq0+*X z9NsT)mv8HpFE!mN66_Vf(wKK)bZ=|{&$#fAkpIJ>r5bCG?r%o~ZP%8!**#l1zmYgN z8)_Dwp((iiOiWb7&B=U@UnwG*b9)TCb9bnyZ}QIf)E1Yam7V0M<0x7FD3J`Airz?d z@kV1a7nb%RVTzW_Ju>p2*i&NL#|7riAGOibORuio?dF?jaksVXPYcw|6moXI$!DtO z=IU`ZX@wG%Jij`T(V*r+ZZ0p<85{c*)0&XTcGtzld?E57UZ1)mB;eTEyA)18;M>FrC#RNguxcUem!UQ>qgARVHWGauko!xnh&fnOFyW`70l6 zbN0FJmGRi(-}s%di&ON@a{V(+H)4`;`@Z^_qzPw-^>ohk-nK3?H=?GP&^G+}Q3vNi z;EhV)!<$lnNY>N;z6H4vJZ`-PJKgVu<8JvAM9CNaY z1SL!JjP`B#a3bz8e=GXEX0XWe{gzU_>?aQMSd`34L14sA>&RNA>y+JtE0XaZvB~!L zulCIFKj)M}DXI2ci%c-Q_h93?AgV+bSo125N|_`r6Y5OKQQ-s|f3OiDpGxv{ZVJu+eW{kq%E zel}I?{O9h`?X8}|iU(aXTRqGrpD%b^Y76&B-W`*biI}|Kxz^OJ#b(S+ykvct2<|wP z-9wxCo=-jOFA0zE(IwXOS-P^NPTjIXx(`1{G>VpP6K4Y7{;8-r+Ntc+NMtrVKYqcR z1W&{DBtTCtT;B_IUi+oQ@>aw1j}+bS2-V47JY457JQbdj>;Ag**Q`rNMQTsPJHQQ& zx8Q-GsbiTLZH;wvnyPm^3WdM`Sg65ls?o0&s^j_*U83-A9`h1 z=}{hcD}$_KS-xpear*`8ta3k{N`_$N;nO0I=N(>`X|-Ag43W?g1_mC4vnTfE*{{c5 zUDP`->O9|UkG*r=75;kbcct>tRFT7gh}%SZW1Bu+px>IBr%rQ~-gZm0`#Vf=x$wm) zN+gCoYUk)p!6*wcO>=qtv!x>Uh;7$dK52 zifMOnRu+)nFE@Yi_4OFFi0PJ=xxinnr*bhKrKKTjcoTHQt@Ufv#F;ZwVEXhC(&g4T zgHAXGvamj#Ic6Q~7RXmU7C869g zxdiN;_lHc!T4LK6W-kdN|+k@qrpzBaMT%k`g-bkF*ix=irC-^qoU+vuuA2irJ!SSCfMzEUL zwfPi2**m{&n3KP6M8s!+%bB`%UFF^uUW()k{cb?*_5T}C=kyo&u*pob8|iObw2;L} z+y2jD6r<-t!AFeMCTNWUCgmuhShm@c;;5QC2~DV>}+b%O0&y*DS876bYj`rsuutuR6dTQr2U~^}S}i$5f}8Xo7Kv5}ECR zVZBfhu?Yr2&ry}u8Jd@zYWo#T&>|ut+uPfuq@?BLq$QOAJd5x+U z-nZORRPCYhN;w!iwgujClC5>Ig0s%$6_yYBYZ@+cQKU$tZq1jUoHmYs7QWMAoZUd3 zSnbi5HigX+8t+-YBzOia;M-3{xxn4M`dUt{EWk|;X){2RQ*1Fd4UiGxJUiF5{@rpN%Jz|tHBevSH z&EgK57pJUoQQxn9x7hz0`BVbtGlma)OMoliEeYwKMT}X8P(yOTe&P}@3_AtWjJiKB z88%WT=sbx5 zH^5&zCZ+%rHi}WNU}%qdq*XEVA_i&ZkQ{QJp%8(yI&xF_hSwl~oX zr3xJo0=4aLy)Q)CAqIKDf4!WN2gH{n3lT2xy?Q`jssQobz~%uNQmJ#TPtfo}mHlNr z+vnM(Dtgs5b#J{Dd?wcGH~;7=jGEk{Cv&^&a+o1~fh=(!y3sfDPXsnQq8^sVcV;WE;~rGBVYB%&(puA{c;-jt6(eh<+-p#nBp%`O*dy+TM~`1#;0(& loUOnYVGLjBKVf$0FKxwSWtOC?!%fP2lrF0(WXaw0`hO5mycqxh literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/adaptive_memory_a.png.png b/model/train/yoco_moe/sources/images/adaptive_memory_a.png.png new file mode 100644 index 0000000000000000000000000000000000000000..ce611b1cf2ea6c999bc366bb73836e7e16bd7118 GIT binary patch literal 117068 zcmcG#1yCKq_b!NQAh^4`y9U>byTipbxRXGD;10pvT`zKRxwyLrF75<}kZj(o`tR>; zy{+2adT(c{x_bK5bf2E?>C=6_@BCf+`x6FBSzbvV1{M|u2KL_#^LGbI)ZEB;G*bAqnwA8F zJX-FdO(?w5+BzP|scDOJx<$p!1M>?@H$?P$R@R!mg6<+n%(}D&d1#Y6RX>wF!ch;O3Q!7a zON6flW(Ur?N?EGX&NXJg<$15W+1DmTt&qIc`7YXsH6#K}+h%sSLA==@-{rqB+OA7y z>wjTv!1<%)bBr}D0c{v_Nv{qT{B{>rc@U*Q8(`ZP8l6Fhsw;DLv(=`r;LaHxr+I16J@o}MU~C$}E3rd%^u*?mMBhbJZK zsfK6mJVo@V%I8K?mS7lj%*n>3#oMVH1)&hSv;`dI5F7st))a+h)+a(d`3rLQbSp3Rp}9&wH9V+G3f!x*0JO)XyK@83>M z+^f6n&ii`S*#E+Cax4Sfe_`6rJJ+M{pY{4v4)bEd(rQ4hT8Kj`?SD%9Y-R$W4 z^IsUv2{E^V(b2Y>2X}M3JOA5Dzt=w!5+GFSzMsjNp3LK%i6-#=NkU}r7Oub)8;5o5 zbfZSEoyB2~Pr5Mna%wo3n9e_ykME;hb%{}P1;QHznepo;eD%_?Fy%F`eodn(;C7g~ zVb5X4D56D{xIlDDPky!=CA~36q5{xR#>&gA2dA;I0Lafc372CaD~N1k-qFK{4<*Z2 zobu-#Og?o!hJ^QDoL+MRB)ppaA2SpbZykDM?bZi{W=@=A?J!N*FWLe50-K^v15wEjsi_xAQvQc{W7r#|6h5o=&TXppU#4O6B9h+IFyKU)PPkKAkbsAzuV$ z&$-d*a1q0roU9^Nw4(NWjknF0B6lFIUbl1?lq6LNUKV{uyoK|&@Rf$?IZ$8NimVim+jo*Y8Ny{Ug zoCa(gxO1T!>-3qUYDod>gCV@tm$_7_k!Ma_PNmt|F7<*nw-;#5UVJ4C;ol{sw3@lwi#Y%dd}KMd2`i}3CqbE_)2^sVVX0W zl@`Y&<)zYT9=}aA8q0y#vSGP_V!-mM9FCqsQJT+_LwOfEbvX`{*sj|nW;DRqm$#N9 z2meH}>TqWcrc4|p9&&Xj3l>xZ1Ji;FT59PWu1WIy!%lkn4ZE6hMd>1YAH;-qz0=Ku zBO0m~-Ys&as=nw{7g3Z>RfM-AqF$&fPG(&zJEtg}cV#WvR6bN@@tY{fHg8L7zM1im zV*!@O6&7sZ?2Gu;CR3mVGdU;aUDxFs*X6J0y*Z~Kfz@CE2ygwyW!y2T6NoqN9K@Sz zM+|W6xbdYmJ!pYqLkLpf4W=hN?M`Yn$F~np^3(9VkS*WqAqpDNhQtXWc3ZPnCM?1 z)*bRL3eYH)0mIM@IJ5p5LD0y4({z zgvaT2qFQ)U?f#)?f#!{MK}pcsO2`vzyV_py)FSNh0E%nXWK5Z9G+;znPdcvVEPfMl z`IxmybHD0LH0-S8c&esRd|-6|Q~z21A?C-K&A z)Ama(SdAZdhh-~s>N@CZ}n9w8&74n z1j|E=arjOo5zY~~)~g7%7EM=r{4C1$7vY%r9|>vmCVkOK;p`Wcl;FFoMAc=}YF$Y{ zqkv%dE|yOhL}`6;tux}at{Lj5mtHig{We}dUjn_b(#>t9u4Y|S09T&E-hinT8#qZ43|^s&G0+iarCyBqtoS4N^Jr0y>9 zR=bq_G-{6)ZlMDmp!elPx78_N2Ujm}lxQ28_E@DBk$kfJim6aI_;GZHkh{Ay*#UZX zY-~|0@=1VO^!(Vzu98p#)n_k)3;fomvw@#k9BwMOOg&1Mgp8`F@SQ#RyGfJlyUk#N zi9f*y@sI8h-ihTN0M!?Qz*xjODy>+IEomRcPVw30kU<}4Ew>&m-HKHNsk$iQw%O%; z+Z@Yp#r0%TwQ)}Q(W&wC!9Bmkd_Ryl`@$?tlB`ygZ1- z9BKHqc{)*q`okr`_(>TSc#e{)mv1%uYNY(?zq~iyCiN**?Ia_4Csy0UZN-Jpi7_oo zL_aXxwPa|sM=tl6C60sXh1g_Fo2tV$S}R7BhgxVd)+e0W1?<&_>f%7=)?3FqK0tQd zbh&eNraIWyFUA8lb)-41M{6I6&YTSbZ4qi|dq7@ti1v2)7QQH*G=GcS~$GE{@z6Pga*z*Pq|~<}|tC>YcPP5arJH zzU%1%-T(AyTYy$D)W1?k@Vo9wRT89&xD{7+rLk<^9i2Dj3>81nhf3cc*YQnPD`@%9 zI{*guaSpWvekpG!f8H||Fu-Moe{c7*4ssCJPQ;|?eY?1f0-Xbc?>9|bSpEM0Sm6H0 zRa1C=tOUEtZ$w&!w*O%7$EN8DE)7oq+(`SX>BFkG0gmfob&ga0+j;$FSuWJ{3Bnua zQXqaz2Bf-pTjX8rnw_YgewA5H{g}SFu{FWx?MEVa>HpYMM_2gugGBz?=C6kd>(&|Y z^R5Xr2kyPlELG5fmQ+cGyFGkMd)A~CU*Liy#g3i0hf zTd@aNysIZzbcP1Iv%ShcVw7)MOV+km#gIUSpA7gV8xBOB8s4^8TQXIG*g@RTjm@>i zjoI8ck(XD~Su!A#^4R_A6(3w*?hJ9jq;G3)W2SSlUZw&Di_baXh7k5`Jd!oUs=j%W z^U*G{Gq`be-Q_y?FU+9OQ^;{J_uN&UFFeRp;=IC!-mD?-EHSr>Wo+qU%7?E^B@{M> z$SH3(f#hc?-nS7We(&NcT5w&vxrkGuKgj1UKY*x>C}^dzY2-~}b*j+gkzzJcVilqg z)&3Y1@k}hWw?oIMm0)FlGrKf;0C(h1@ zcVS~k`xkwF-A)CA1|(>Y=pkKMMqu|8DpGryN1*iRHM@H9t$Y)DyA1Grn9(gzdVQ|3 zAy#`!KeMssa&Kt7an9-a!KLYT{}(1GXCQq5g%(kJQ_`d&^3@^kc_#ZDO2ICZz@1)( zeeZKW7mfRA|6RVGO?%9a)Z*dl%y2Cgjr9^3{KFvPnzo5h(=-i&KjR7FUseTLKP;{< zlFW9hrNjTmJ2Af`G!K zyd3`<&HI05zXh+cR%_juRD=QVa}ZNm4yPSV{dZ++>CoQX8I(4khj3+vrP%WG{3Njb z2sCnE-v02cN>D!-dxjrMT;M*9Gh^qksA%3#yr^1;CY|Ie5niCq$PI0LD;!732@}bq zCkq8L^MuLLF7#xMP6;MaEod3`mMTW#*I2J}++kKp+U~A_X|FdQS@(xuNGZEGyVQHS z2mdStr)nP=HcPBM3;&XAmpEo~Cwl)26IIYX_%fboCkCXKBug{~wsl5o=0mmv76#wc zKhE0Rfik3%riQr2SL`Ai_t$0xk$z1V&d7fOs}rL=qr}y$8zVW zB*^8)U(Ifzmm5uWwd5}x?1P(AY+FH1pUz3%Z0b+iy-`P6+M+wB^m`X=%xEnbhIrY| z-t>e};}SO=a$J1eoCnDjFm&TjT1ysO*uF+Wc~&zfU(BA}qMR~ii^ALD#AN+@*S%Y_ zl281b)wxT8<*$x+_7Uj-|w2vnHx)Y{&T2e2Z}_Q=VarVimyiZW=z`!TO{?Rlpv$-k;qn~kd)R5Qw)MCq6Hk3KT=x=Jl@HlD z{pMO9X}%H<{**btlezfhDR5=tLMspgG4uOPd?TCV90Lq(X&4j%_JJoNeoX2u*A)>$d=JAE7kX`xH9XEaCo#Fr?11{a|J+- zldQ|9TW$B^(z-qMDB?HOUHm>|OWKcy>QKWeXRYaKPb_MCFrNSae_6i6A4B z@WDr{b!+<~o`6JAx+b)9##C*0(iW0P+|N10eH}-Uf#?@FLri)XZU*0Ik}CQMFR`eBxtbG)frwe!Z$_g3c`}2WwrEHZeH5f`Pw2~EzLsHJUR~%e(Q-nxq&+tzC{XnHbi(}4sB`2&Fqw#ban=lhwa7UpC~NkRu|sWi z3yv#)?YDcLmrX_7m;GZC=r7N>Ujy28qDE!tQLHlyLted4$(H+1KNC7Itv@P4Yz;U& z2>9kz6mLq=_KA;3;-WA5vq{8nS#;pep)@{~e_<{HXh1$9tn#>a&O!ARUzG!0U?i1n zkzjY!r*tkxzFO#PUSr#dj7uMo64oQxa!eP8Mg3MY>(p03Jx$=wkLWn?cQ7%J8mPbF z0Mp;5Ds7d2Hp(mro|PeeACgaKb4}x6d^8kX$F+a|=8^OLi+FnNRCvjL@PN)dfSfmf ztQ+aVZzATC?q8T+c1bv2$xRb`&x&P3{=z^KVJq*#dWk2ewW@pPav=+p3DGvtP>IzE z!*j40FoZj2F~M=JNrgSWMc_v!D8yecF99Myt8c4-sIG-w@W!f~`+viivd|WFn_N{Dhzu-UU zwWH1MD?E&z58$Jehp1vIwbF>P#97Uls}$X{MaJOLQW;DKX8HC`(j&qv00f4Jsr-(j zgFp1GSjtch-bHa!qxqq__V?+fEuRUt$h-F_P}<=DbEtcn#S@e~O4D=Fw$|PvT&`f= z5Rw6QR~|5$HU9BM)?>3q?k&rjx7Y%0@fJIcFWegkV<1fIj0{m=gyqR&R7+zTLG_sC$nZj|OKeb%Y12xidt;vQCQph;+c?+yAWP*JOTgJ;Y7!w0*K zzHaDB8tjj;-7SdJ+Ej4ZGOUw!BI=8N#!g;%4}g>nf0%peMr^~`LDL9|>GL(u_P54u z!e@&av<|CHtEde{jcdh-U+;?VY(N+Pe1|T)bd(L2kz%u5fF@t2xwWDWHW1LU$9PYF z4Nw>IU5&;ya@APiH6EsHR#3iYFCfv|_IaHVCl9PX8>fS@#)mRv#xUMkz@O!7Y&HeK zH^sQf{O+BuCOxNE2L)2!>awNJu)TZ5(JdcuOH^%C+LkX5x9HPnSI}zBZ)cXRC+H3x zDhQiQ5_dlzRUxh1E{wIBC}A#hI5Nu67%dD+4If_AevQ<|ITYT=h>Q3=|JzD~dwq-7 z8%Lgz_}CxzWA$v!V*Tag}7M{(AL(CoTNr*=W1Rp)8l0Pka?hkYa% z;W!oD&Xu-bzNCqC0%mEQM52yQ1~WcVKLs_udjMumzuPO~Qq&uke06!_lDOnB2R;e} z-v+m({~7Ibx|X?GygJ{Fef#({3H=L$7P9eMaOThQd(@x#z`ro*ze}DZV`quR8Atg# zOWN0p@4wzWTzYe2`A=quho)K$aboFpEu)M=1jJ27wg(TC4P?fYlT9QB(G{%_A{)K9 z|I8=?t%NBz1}@j`A6=aKk-il~imT@#;7ISA!laJ?EACTZWuH^iFDl0%^P5Ow#iCtg z_qjUB?6M>z&z4@94Uq+S5EqI6Y-j%8?vV;+-cB-dD$8HXvjZ->V%$J{ivT`|&u;MJ zrqHUX_bTzG3uuPBo;&wg2qIkOG!_vlCBp!Qi%?-sg=?=>LgY9iB@WD8ZA(2gS182D z;AMIaQ(kwanlW{fNQQGoacrQIgGw8BAl_GijoV%^_sdpCBYqaZ57cX$c z6#*NpGEza_Eh}v9XxqZl!ol5D1PP|!H3dHyvF)A3*l#0(hGTHpzj6bm(Ohb{Ek|1j z_{(#cd*cm0I_Jo{v~IgB=?81L#yB`NSG$(4trGw9yUqAa60fk5VSWPob1CZg3vTMj z=Hc1=Ttt-|+FGg62DRohALfm?qGQ+k^HBD!Pw2&4Nb9P0Z)@BqUS{KV| zJxjGz$W(mCYp77Nzwx_wvg5G~0Y*<2_1;9bAHkgQ*^Zm~E&}e&&*~^=r@cBe9fp#Q zZSrP%mNgjRrlez$Y(QT?sP;s_)iAr}V_0+bKwB0F)>qKJm3r#1v+g@$w3jDyQz`%g76}`sU2D^~Q z+JvoQTACA6ijpqZWe$z{>h=&q#ww{@HEq0MVVPKUvVpH}z#0C&qZ648Fk25vz9USI zO5OBFT-yL}$a_9Ge6yWT!*Lr>HlIJeP0{A~P~*5Lw){Z!8tJ45&DhCN?rvK8Ug08c zSdw;X)aq(mR5O-bSTLuhwowbt80&Pc$?#Xf+$?Q&ql}T*IDo&q2C5|1C~Rf=XOp>NYuOBkq~0vfFJ#7liV2 zoOTNP!-)NQi;iBpC&@ z3{W_|&z=*CmWV7j^oRWv*+B}hMTNVQW)Ke%@;g%MVF*aQYB#J{%J`NlHQbm`*R-32ram=Q_IZ`zHPVzXMnQC$#l{C`jxnmGAUa9N#c{`cNG4O-$I7o7O>w z(Xh_?YjjTw^`%wuD1hr*Ubqu)MlB}h3%k~1EM_toy}Hs_0lk8%vo!Dnl>Hq4E{2sn zao)d2b=s)G%PIlKBX-h^VBN5*a+-e&X+x)rHOHKky<(KEl$k%AZLZpR2eiEfpae>{+}s(2ATkgM$aG;na9^;X*9?Va7;dEwR@j@bl5ZvL0b`QBT{tCwwo?9 z4;#}z8ZZ>{?P|=Fj>1}$FA;bR4fKYaWDQ2iQtJZFd5a^{q_8{&`8Mw$V#jVok1cUP z;oC%E2yZTwcoqsJp7D$eA)4`g^_<-iI#`#HQDa}oJNdO}w$wpTbLmfK`f=Y}{zHHsLEt{T{?l{JR=2Y)l zJiE9l4B^h*4=dI&APROs>lK&Igtm)!sN>1Ql7IBTCHoPELQC`5^;E>ZkI1~#zH-eR5iC2qGfE&)FxmC9{3g(~h zN??M+qihj(h?0ZSlA@vTzdxB)MAniz?TInnFAdmT=dyx)3>TlSwk3iNvCl;P>PGlHYV^QL#!wUsK5`|){XL$jnSHps%M%~att@Ss^|P-#vRv#~&Ja7bCC z-Stls%Ws`hm9uJ`waBh^yzsA@npypaL}|!OiK14HBDy8pn?h16Q4QIeHC5Qf3w~+^1=r>$XWZz90M)dCNv4qk*1=o&MQQ zS_VJ-oyYke0#mzN#vc}Wd5KHOAy<-FM}1X&<(qD9=&xa3$>NK*mlgTT^TzD;o=}MR zOiNGtOpqP%Z9H!Q@nhe|1`$Du8i_1G(A%sAMV<@v&&Rdus@<1f9F6vZqq{L)68xnq zJf!j_y{a?&F{i3lQ9n{$ZG3~bfv0@Yk9 z60(6c)9X6=qPS%Of7Opuiv}9LkYDfq@!)?CcFWyU`~9Hr41F6J$M8%%nzV9+N>1nA z)#;{Ycb87C?HrD)wY0_1foSv3>LYMy%8c^^m^8GK&+VA(b$dWJpBbx8z zvXU1eGB>~ap1a&GQQvozo}m?YbC+LSqW;31d|0o3oqN?O`7QD6y(@W8Ud8!mq$~G4 z{h{@~P~|!JqL}v|tnMKu#?s!THH{*S9%=6rtK6vHTMX8)gV!mLp*ks_{mE3PS* zc&mg5N{;^H>PS_~HtJzP)u3^z1{7ADkY={hfDazcN^T{JKBEhMUe)6G(=GG*{z;h0 zNmHS5cXc%om?Pm1Q=UOZeI9WhwydeFJ94Lt^Mm|ifZe9dPchcdc~&zc`tn^h_av(>%L$U>)CjS?+!MXq)WsP-)>#EmR}T9 z9hVe1Fo~Ks+^Ib;I0IKoxoRzCK9nMn;>V>Mikr$G!RpRuKBS0$Na4q2L8wpIJfwwP zXKv0uulHBoi8Ng~mE*O1`U}&X*LhinigiH`4Wh;{-6J-u%T|V!R=8yRvCfqqyiu1C zYUC4sCa*(Vhd)uj0a-tE@p=vxG#X?4At<{C&rEi4Abd=^Q3jF7Q)?A51veVKZ1;O&%)fnDm`P>mHtI9n)E5$QuW5Z; z&Ou6`tmrTsd>s?9N9y{5RJJ}-1~aP!Y|_VC^N^2+jqSvok=AfxAFz?#q$3eEdEyf8o1nvwys2;UWLnWJ*ipHt|$PuSxs*iy5FPtCy~H^{SCy>r~X}F{)It(^zIL3LP&0mb5XE0EuXk4*Eg+eM$OL_ z*3C>ce$K+$QlBQsGVLgslrqAaYxK&=V7PL~mm!_TKM8hCgX2y?FLqlI?0--k|H8b$ z;{BUo{;ChlHfsWT);O?hlSDWDSn_9LSiZ<}@liKHnr_Z`8ed<@59xKkI$?$BF7Y%X zBArdhFyGryeQVmK%a*A8ldW=0w75iAX7pwV;l2h-DT78=?+=HU*_;v=^ZuFOyJNBo z+y~zK9b-33P7Sl9n!ZGdiQxL8r%i|?*_+6$X93Y&VngEWm>b^Lhb}K>?PRpaf~2TV zQ;}j-uu1mWmtOq1BGSCH**i4H@5|RP;X&x@y+$%83xTt zLh@supTuuYvv6uLAn~1C$T$2PM<>2dWXo8J021{hdD9c`@)^jIX39Rt`NU&?WpLyv78 z7+K!my_S^yKQ?l#qj(Uv1lf`%Vpl9nuF8{LO7^eF6w8!LSVZAyMXOs7t;FDbB*kkQ z+PLXVpS$d|Q*g;rq6w^-fYA%WCfKOkv?+4>mK|C_mn|-CC%!eMYAtaBSCYE9|Da>6 z3wFhN&)3G>KIh|?&Ru~kLM^lBXNd*x>D{*HApUK`hU3{p3h||aFA_n}n|Gl@V%0bH zQ$yr*#K?liE~%nHt>zmj`(lRota+o26taXyV^5qqP^+deEIGLE5ipFlWw*&ivk1J)q-6770c_dW*6NB!7MNUa9 z9fA)d&Kj7I{?V{^HW?62pEY_j2!O)Ng1bvm20VmQdj~>F#>EUSA2Hr}+#udz7U&B` zRRW3z$X?%ADU4aZgi*-ecpx|6v%gR!`#U_g*Tke+;d6}7DQ7X(`;gF!#(=l7FuXTO zE?Wpl#+pxeI{tZQgk4iIA?py{0!cNu(j|b-y)OH`F1^>X?2@{RFF^KbmDHg(=REfx zd*(dXrGDdB^pfWvnffRq@*Dw*1x}jvbuj5{M32XiY0OVYj z&09Z&{f%kB*IzNNQ1!$n{gAemu+ED5r5AM5Yn&Dv#}c0VF2l(p95gRCqI)?PyP z+`ppZAvx>vXLT@9XT`(pTD4h;kIcljS`i^dJK2)aL?=|L<|qs2o+?jY;&#-|Zq{j1 z9z{yVvFsX)(KQCWG23eOC--AACeguzh8BBVkiagjFRhbZraOzXWO`q@XYrh)sUyv} ze^}e;d0$purkqXTgYNxD$vmy5cmInY%B!kdYRzG@+OfYBXq5RW2T<%D9V>$!Ce%0D zS7(8sXpA}f)p0FK@KUvt+K6LD8+@mx(Xf&9kIy-|)j}rfd8ej-8?E8Q@gn4U=x(8K zgfnI+WMf^KG^JUj@S)a9^Vh9yX{MNm!G@G?;DmEe`@)X?XkuSjmyvq=kT!(j-sB%c z+dOf@&gw0rbh0S*jd&4!pQDRL)`WDl*CqeMsAc`KJY@dm^fowQCR1N##^JfE(vD)K zJ}G_YAUKa!rv}sF$9z zQZQFB>h?REV{Z;{227;@7WX3o2|~pD7n#AEo1@38Gs3@)B34oZ+u}+35n}>h_Q^}= zTy$WkL+Fu{x8kEozbaHhC6?>Z2)m4a$|o0#3~<-ZqS`35t^8?~^U&GK7|`DX?x zPdC4%PYP3s<27BX3v-VBu*n8-4Tx6pdtE@wyr>qqv=B@z#x{6`+HVjScPNQc{q%)}&`%QKuW)0D6wegHoA{twdMf#;y`PjvzrcKUGA15=?`!zCkAq;(OLLvn~Q@r^xlqWewQ>K=1 zNHScXgMZ|ZqWqmQY9JBtVRz1*j&?lrxvSpVGD0PQvC53bV*|zGe%sn_*#jKGiEBa%LR~;^m;A5$%TeU=Rag6K=mBQd~d)I z`}$>zYND{j#l})prp*(Lx&n(&Mv)xvV1-veTdo!)+1P00%AtIyrSIu4ja=XT=ZVnX{#V2L`$B(A__^M^Y$vP8GSkY^|ldc_A zpC%R)Qtd4H;CX+@;N{4SiHI9l32N{ab(nl_=$Vi8Ay6X`!KrU7cDCX2=MVjmla}M1 z0%@BXs&5*I;oGe7HMJRuovctnbL`EbM@dk$(;6IX@yh^flFloGNJeGT z1u7k5d6`(_a^P=eo|sL(93C2ZHe$&hTih01v`|uGARdbTE0Q5WplTh_GS$Dd$&AYS zTDrfac3{BTqluv+b}|v^jMIFs);Y8Gyw3XKf&G*(zdIjN6Itn^W2D?2asT zp#=+@G_@Js!}!4nC${Z2n9S#rmKV>2V8?OWEXU!-3APen%gBiw+*l9AarP)WG#r(y z_jJ-sS@(%wI16n=kE}($bh&Z@3)U=_euZ)Q=SiDIBG)Ef4vKG`u>NJK?A4`#!{DX< zZ_U)2BRyx7ya!pT)nk*!+yPo=y#7p<5*p#oiwV2-v*=ZXD4$;(Os zqCcc*PEkMb=K=%uucXEKnnsTlT3xh6M_uGw0%c96IWu5WrmP0p)(terKUkJxZsIaSBv!1IJJto7|SPLoE8 z7N7T}Cff(nOL;svzWJ>&B~>>dZpOGO>R|*@>X|9)*#oq4!*ufCGXtGdR9A$%n=tlL zzcx-JKSI$rW0Ngv8_W-w$LRgNH1BVUX5yvl))Z`ZO-qhs%~T0{0QQ*W*r`t{-zIwA zCfF-~sL3>IY2-W>!7$}oiOtE{GG!C!nv1PrC<|qQTLK;}j1J{{y_-%o8Js!77VpKqmy5`kxy^ntm^rlx8C4!r zR>B=%DkU1bBwjhT1Y5c$D!A>=531eK_DfY_AF&e?wU?+BU3?Dx!nO7NZE?g1Y|`sI z?5v7Z9vK}kLMO#g<})svq*ThPb_VqYBGzrh64$#ZIxWRjlOLtc)tl1O(yVb-bz5>g z5CM77+;7Sj)4DpH(L+S^W{j~P6$9}~-6QQMr!WkIz`Gknj)>i%YRAB`mp=LvBp-Z< zHMb3VD&{54^dcjb+%UIxPyTvO-*XH4FnPQMox}WOx>8QUy4XcRS}hz1I5QZQlaG*rsrqSxs&eRKtQZM3#1$v@&JP?DX7jD)GT=`Pmm8!Ia$ zF4~f_`J+R3nb8vXJ?3sRGa#0El`Hu4;ZNOuiqEgjTNx838$4z+ooPC9LS+Dyjogd8@$=Yx!iNHB(7xq778MKv1nfM1ERn0(& z`WhVy7Wc%w-^0B8Sxq$glnVnW$#^0zL#xxoG0>`9h$I&tgbvgy(YqsSZAx9$Z~a_q z>(VKc`yw@Jm0V?JCL71{3y}8Ile#y1&)&?8$G|@C@JtE^#vL#gSYHw0XQ#(x z5<`XgJs*EDa1Wz{ri&3Qf}MLVCY4%kM(G27JtZv)h`Re z!|^t0SuD4YUMO$KAvaiv%b!07IyMVaHDTUgt)Z&0b0f69lkpsG8TniOFygGqD@|b; zSi_1qVz*}08aGI(9Y=8eGzA#8H{rPyt_Nm3SaPSR@6DqwIU2H5FOzLR66KW&gSxqI zZD)2c2!g-nc# z4$%w?eA#r3B%nxD_v_i1=oJ&EunuK5Dde45rTm9kC5y{{P@NI%9N?9w#z_Ymlg(NR zm$wIKYJtAQllwjA-RSk@^}=Opw?pnJenqJqbReoMw$+vfyKle`L!S&0IeY}dc{-9H zAESw?h%0sj5YTxNhT9AoSAr)2^Rqu&viX%~#;W7#u;il>m}NMqw5f&n;`=H#M3^Syi?jBJE@BDtMQz_`nt#crqxH zkYVZyT{w}w*q97llz*r4P9Wz`;+-G`RcGtMm!_oXYgstZKKcbjnWkEAYc-%Tp;+jCmC}Gt(QHymENX z!*#IUe_<--{W-mq)v7!Hg*70P8`+>r*@)cD-&^h%FLo+{iTHlwww=4~)SdvkxDXr~ ze;>`$S~iAXJ!|@1dJ=P1RV>sP)u}foybdhH;mh@sul2xPguCJCA1P*G_zkI1|{_VDG1 zT)H^hb}E|`Z8BP7m%$OV^VTh)O7)3Efk$O@U~}KQ5sPnPOcgA_6oTd%i>>?w#gXHiqGhox9j*iQ&G~-OB}FnQEFIkyix&kM^=C)lQeDdp)Yc^5iHH3 zd${)GE-Ndoa5Z&b@E3o5%FSzVVwYs*pu}Ft> z;on)N(Iu8WnZ=imNCQLDRs>6A_e|h{m_4|CF4L4~?xb*ckic@)$+{S?1nSDhb&VF( zP-tEJL0!b<7)oAxg{(axRPoli&_#?>Z_sbxft7bHRzo|PYZ4>r>TwcIf9iY*103C zLK{jp)4Y@{e{gYS=h^vTviSt-!q`Ci(GO~|%1p>5{jK2?_gD+7!x zwFY^sEK3@`Z&kHZcKyj(OVkGxp5(D!nL>(qUaWTDdHo3ly#Zx5OP2&BooEx8=M4&brk(rf`3@gLo#+=RynmK>TW7 zLjPQRDeQCla#fNI07x2F(JE$}LT3b{L+eC-bHGSQ`>kCB3zCw@B#(Kx2+EyJ^EfcX zRLgr%rM``AGe(Sc(^dzE=BX=db8%{t6w`W;%ak&)w*Z$G$oET zqYbH@xSH=#XQaQ@Y8XKF)l$f6#veWaj#Afxp6v~ql|V_7S)-X3OTD66V{w%MU#vze zQk8V^nL*Q!MwAHWlEB)GU!snaboz}PQ~i{36pEKh1I$Y=Ey8dY(G(G5Y-Makzt+wg z=;$9l5(Y+}z5#WPpDC|KU(p}i|C|ZEH&uK7{`drZM|?B>3*$}Ji|u$u6YmB1B!jmf z!j{V05@u`#aK|!JMG;Tfzq}bk*|LdnEt73P2xhFSGhnZq`H0b;exT=zGUb3a;NL+# zBJqm2&Y&gC5!Uz32;s^<@U;|h|LFvCRSGr9e^r1F7mP&}@v3+tUCQUaevnog8D+S~ zAU!7f!!@SbB24uwbJLTImUifZHmYPFt)><(hBZje-}LcC#ToT<8=b4|r%kQeg3Q6K z9w9Gg7b&Na93%3#;?ujrXI;A5d7cB7sD{Xt!~CiZfREHISiN zdc8RTy~30a#*R9#r1c!gE#P=kupoM96Pwy?nWkL#IZsh<&l>sTI&{teJeQJk#nDtc zscx@mx+f)7Ix+sjqh7wfD<3)PK90HAGH=^u2zXRQ-0++09)?YMUSzYhDw<3QN@%p` z6^0aC`F<&_EAgatm&zU-|5;u6qK%HuE#eUlN?Q&A)umLE1MAQfjU~=FM!?rOEv`e_ zUo5O06`#l`ZloF`yKpynLZ{GZw&lYS(@0)ROp{@+H&|-y;_`}H>;TJ}_zTqWxsVQJsUa+?nz`3q{UKR=8f# zv;9W2@*1~Dr#oz#h=>cLOR<9$Dy||e4qf^ke6(RYKwnRf7VdFH5;<<>`ba!N!Nm8z zGmYVFT1leJWJq1l7{3KouMCP-z$H6^Cb#XOrhGxW$-FQeThTI=;=t2P_+u@ZSoJSV zmo_?_;G!dvoQjjGJeVHOx$V_-`Do7YoTM)--l5eiF8QXiSxYK73QMG!X34njm}Xwb zuG3PaS5pbSzZ8dF{90GTpXN9A+_Ye znlcL!6!BLs!e7Fm823yhE`MJ!jD3j~M75-h@$7xd>(R~1x_6qo9Dll0Gyu<}@)Fvk zSJI{dn#78*hx;F2j?`P0)c%DHsyJcLMJfrSE&g&SYbXB!q+N8t<2p2aZsOFnsNahd zU!-R@Y#IyZCe^Z6-ic9}MT4Qchc+zTqC)_v>6Pr@=7a1(8NJ8H%}B5JmOXP`Z!=2F z5yt4NC-;qu*n2NKJsSNC{nN@P?R*XDE%c;U$Kiz?t zUM0(qA!-1|gnGJw!MH5)jqoZg^sGidl611hLK&JXC*_!nQ4(;CvuQeLxYrnZwouKpaszc3%kQObz{yPFK9 zk^}mle_`eV+bEy(M=w*gD1X*;{zJFLHxl1!4ySY<{F1d#0&9gL+}E>oqms!5;;pbN z{}1ZkDlCqu3)3Y82p-(s-D#k4cc-Cocelm@!QI`1yEd-DU4lCV4<0mF$S}_wnSY*h z&dgk%%XL>(dso$7>s#-)BnT2hzq@ucHHc*;)@xRpL{YsGF6BwUCn7Hw$4k(~+dPCZ zBVw8ft&3LM3eF=L>0s8F;dW=>JBySRY7d8Pxu<0@Q8(OZQ!z38@!Weq8HXCAP3o=a zM!I~=gyha&n+(axN+?@Rxoj&RaY8EwsrT|sk!Q^pshP% z>E3h$7Px`1S-FzvnF?goMro}#BdZExP2u$-;o z_YjRgs?JOA@7D2~U3C=GX{>$fT-{Bybn0#00r#h*RuEVX-G3OX@l(5b!mc>k2=AexCT>tY{F?aPCUA_uKls2z8Lmqti>YphNGR(+UpcAi-!P3DqzydX+c^;3` z>FTHz72s&KmGW!3#~?}Do5AoP)S={B8++{Wq=PRSsr4LHp%jL^*_w=;&w<5P{C-V~ zrB^ur@dV0t`AS-D&RLvyWB*bR8(ZLRVW73PoK$*^1oy472Mrt0vO+WxPI!t_TVAB@H12nu*$mpwl% zVp$E0(N<*FslsG>;AEY`OZN-xW|tI1!+C$Ae$RQ6Xk-5 z9pCEUv^x`>BrpJg|E{i;9e@45oLAy-seKCtZ(G?+wQK6E&D+zoqBfVtvm~K&oi3BU z0lW_^S^)lrhd47wt3*#JWwc-8yUNPhGzZC-`n88Kw(Qf&@O*5vrdvfb3+sL6t|v;; zsbu3r@c@1MRr}Lz2Jlq9OR$F0xjx4@@w{g`;lzXCuCkdo7o!7$_cR<}_J6d1`R|$% z{~t|acn(LNCB}(nF`t$5Hm);BzYMK^MjH-3)0WK$*r&OCm*(yCI~JQJj**JQE9BlP z6^yHn2~c<8u|Y~sBDNb*v0)Es;nXYUKuuOQe&T(g#9%=jlrLQG02%i)$^_E|Ep)s|8c=nT%$-2>D~FG!U;YP!T5|wm#;n#Z+2+6 zpUp>CW?QcQek0>Pp5+#?=LXy%i`w^BO2RLhn(A^AZZ6}n_RY&T2obgVVOX-uCv9c_ zWGV~yZ$o(D{7Zgrt1b{=EF;QBb~qDj!EqPNd;1SYF+W7);qQ0XrRXp0E$IB|lq@Ry|2wlj}%g^Qg5{F$vtGzihbLJmFB-G#d2}oL9@@LKG zgQOmRz^LgpxJ08^sByl+^t2|u@2Hbg->cIP-|>GT!{wG+7LQMW3L>ke?bqhhT!i4b zlht~dk~_wCwVG>+$%@u5dNbZaOYqY%oq8HuZvcDSZ4!_*JO^CUONG>}&3K+{+#{qTRAm2WtMY$a3;nlc;^KiA`sTse_}504f$V;jTb|M|=b1HHyTw&kn_8D*hpx|) z@C1mS-)R%{&f@Yix@W|FEW2mTFgZgxE2H_&E8X~K?b3dyL$oX5>z{h~m-&~`7s<=F zgEwL4?yrsH)^Fdv|G}_6ei`b0FU^GPU|;>#yb)djeE&apdE8dr1+C-4sY~skC7pLC zQO2jwVd9h_EdnO%=}Ij}2oh(&OW~^x2Z$*nZEneUnU2$34BkOJKBSKz*@CDq@dHgD z%w{qK#=qnnj0ItuRuczEO_3V|SL{Zf-UNnBUIam&OhE4Axv08PAFqtGr5K!n2LzrK|E9c#RX@Dgxe@;j$XIDzUg~SOYefOM(zddT9Zd1)4WFQW+;as26WK5jiOTHj4dzc3Lp|y5#hqktka8*Qu_*QeSuWuN&w8!3>{%UwcQ6q9>d7Q?vfi z3cc~?&A#Pezg4^bNAT!a{ARgxO8xc^hCD7l73djQ|uK3bZy43s+38bmn}eDvuKr!P{QCpKBi-3P5Yyv= z9ue5C4tJ-yg>GuSFXFiv-tKyuk#GJSbdot*_Omu~%W_?1WOQK73$HRSXeJd4?m@LIrCFo%U)o_4aYkOZ3@6^|^MeP4Cx) z9PED3D{YXN2;cP#$eAp{lT0>lWN7MbsS^aQic=8-tj%Y`JNAjdrBi<;i%9-VanZh- z^Z&VsZsg(XqDO{rMWRR9aj#3#FLl3@BLuZ;frgDB)vOME&FI#I>ub9R!0$ZbmzgxrPx{L}kG zRMSYg(s--I@E43RsO=GFqKrKMd5GjXP@O&4PyuKA&G*DE{?O?`&c*SgPvIK|zog;ts$J@W%q*QiBe81auFH!6>4WDZn#-9Ezk$nE z{aF8s(qh$Qmr;+gjqkxMDWl>cp5Bi=B1TDl-RNI!fuad`^++>tyIrQ)4^1IfpA?%7P|*&7gy zX)SL{lI4t(w4z%vDT^j&Ih1+O6?SwujtjRlVU!PuqvoPdo}G0?fjJ; zB_Zk8^|9?J9pw7_qt&j*X}({-UdBLR35|7yIVZSLItXGqF!}E3rRU zW5NEs%EPPhoz%l4}$6CJI9$Y7x3v$=5t?kx&AepoLFB|8mRe*6S|TXN9o6@;6Pr>;rannX1>PZz>79s zd(Yxg$wjV<`o_{k?6%;f{qK5DczfR}&tnjdLbr81UTTnVBul#_VC)AdpNlG$yPEOi zkB|F(;h4xINrlvt<0Qhiq4Zh+zxd+&4VN9*cAXMU7uPgaYR-gz5lL%%SAxp6K#EL|!l)TawwC+bjk z&V3Rcwxgls7{aF%RVRMz}kdJhJnH4U$}b3kXh+ ziR-Fl^aaruG>vj6KxufDUVSuKH3=CP9O9)O1}e;_+L8MWWW19<6+>ac4C++eX&PYh zX5zC=)>SQO$R$sqxhqK#!cVzRf0m8pkv&(MLtp^ngW;5E<1li9zGx^I~87B_?@P zl6iiv`G+c+cWHxHThf|Zfy`+gtGIf z&RU-Xs`Od>2h$!(zZgu>#L|(&z+`ODcuD#Y%rDg&G@O-=e|DZI16{TsHx%V5K0qa^ zTrewmS&L2iHZ3eH)|FGjZL z(08B$qc6amnjgvV(}LcYKd8K-T?xU<$s`=toiVXx6HsJ7|Ljr3CldB+_iksUjtfbBx_{Pa)?H78Haasyr};n%$%#6@pR?L39H`=L zjkqw`h@GpuVFL`1G?JsfZ+veG15YfYHX3MmYEwhZ@qGCgqO$x`)`E9hjh&9&x2xgZ ziqr$K1gk4a=kkb_0gd5w`@>8y%J6WWSR3ZmqGa)P(~QCKq(4JKzm?6(#AhIWejgu0XcD#LIi@};TCqs$2I;M~jNsW}>n|?Dv9vm0T-Ho6f2d7J??ZJIihN((ZD-BzI$zpU#^7INc z*$V08w&A+`J7B80nNg!V3l?E_((syK^P1yx>g-eDlar*UFCkpXxXAUhTye)aYDw+mS|v-z|3 zl+<4}(9D4xb%a%3?>}nHZ7%p}eYb=M)!0mpW^GH)zNtm7v=w{Ff}fF#?Rb}3b;|K+ zQB+hOZgdmTvhgmdRy^V*&foc$Lx(9r?$uuw`RgQ3q9}L0Ih_155-X1J6NnWw#pY6*OifQnb~a z!9gdlp~H|6mZ|>Rvcu_|*(!WT(9c1;d$9k|GSzU&`HhqvZx7xL5)y;L2IEf}>y&($@Aa zWjCtbK#=(6n%1-_x&i1}b~4AMv%TsrZz_lH`TF+^uttGe`yf4yLD`g_m-RuyNVG%O zv0cehHnZTItcsR09Pie<1s`uc-4$&iF{@by)@f%0^NLoR4NGUz4K8hj!R6SjA+~8% zEQ%b>LskafDwYLV4M$;f*G9pPVGYP6ODHIQK(B)q zY$bKsGcZr|7i5$Q*gAZ&>zxc=bFkQ>85JY1N9W2J8h@unP{>m%LI>C?!L1~byc9SB zV~0v~vewMCRcN?__%FOOf09SB6Ya6L#jxeI`|*;`2sf+IEQn=^d*tkH&?GItV2(-2 z)dk*1@3w;J?k%z}d#fvpd`e-HI*+cBCu_h5Ijw5PTK`~V;xe*NYe%=7pl4`}WV{&L zs0#|KxvjO^Y-{2msbrZuaQpAf=Ehc7rD^oUpP8CD11KuvPhO6ug|TDM36I?U(y0u? z`xl@)kP2e_u{Jy@KQ?IWuL9*c5_v4&H^mf$NV1Sk-ljYqrqq6oSmhbF^&4R-fxINz ze1M#uY`k0|RFnp>yH$C{BR49%@B}$838ooV9ADsor0{_7%rSXO82NZg6TLOlxxL3Wy?$J!!@;p$vo6=ku zS^E5F8rwRh?9ZafZ6oq3kcDcY+O}5E%$=K)Pv$HxNv+q*GfuuY907vjpYdMZ1q5<+>2z5$rJ&g$PZ z3*UWaG%P{SUj&@XzJ}C~gk2z1zlavkOl!{UlndM@h#C{VrA2D3l-HBQ{<2ljPlj?@ zW>W}IGo@PI0B&?r*r_|FN7t5|i#tb_7_59ufy&`29mGS!$x|N`rKzZ8sYpkLunM<) zFh>1K-&6Jl?(at>)hg@Z6Pt`{Ee*bRF3b*@Oa*~ia!*{#j@rwFpFEhx0WqW=+8idK zh3B1ICE)aSis4W-zpYkKAzgH3>r=U%It_eEht^k*j&@#lS0WZ6^E_&xSsGch4Av5p zZZCD_l5!3#DEsE%PqJ+k;6?e*D!h#e;z0X!wSXk{W z22Y@|)CcN75>Ppow_c?#BlYeS=J3QcFVle`#j%?lFs-m*8Gh@f;p&eUb(I`d*;fm!H4{Dyt$DR^cb1xB*<6} zU7d_jlF)za|6-ik)7LSoyv6ggJ|$lWZ~j{Ttv)UuQa5CsYyZsM8U&efPZ&k|`xVt~ zUqEsBZB2G5cyPzL|G1nNZk(M==oks|70PaKEE3SMVo)?ot~%&=fBV!CwUVVHvpDHX zqg9i}5;0qlj8k%J0o88Q-09L!nuIU!N>~=3(s8pSfGa8^w-=bBbC1vLHdBpnhM0B zyf7-)?l~Q{1(yvxwKf~+(dEJ7Q&TaIvXok@zxpQsK@gTVTY0R_yMH0$K(57Xu6QJV zL(pv(ZxY~=-9_8OFJYHCjJQ17X|ul4-N^9Qxup~%ZOfzf(=SG&^H)|~Y9_3~RVTjc zUA&yfd6~il13IBl!{S?_II9zmGuHIXS-Mi3Rw}NZC$qI-&)A9UD2d25b8M{LFVJgo zoQT~mwTu1fY>+)Z1%PJhT!%_sgI{l|0qL}Y5gtNsT(vfWAZt0LO9pkZiYu z7Sl;nUT^(rK&Jt5PBvEUiJ!Q9qN6F6fcaP~oppB!oQhOcr_@xQZV*d0E@66%p)#zn z$6m1VWmw`1Bp$6t@R^W-RgYYNpKzLjP?Y5fPJ z$>)wi4TLx{iEVGT=RSVqVFT_CH?A8ju!K)7`!Z2HN$_za#n*9x4Z8 z*KV;24}x)4$Rl7QMHb$bbt16=X02eP&*4F}EDWscH+A(=gVe=j_;61zV**G@CN8lx ztH62h8@kM1o5&2>Nr@!eWTyyDCV?;E{vglh+#^DZb>3mBw+X2q;6p#*{2`P0vS+zu z&@T8Ffk(cCsCzq4^cV&WDp+TSB)Ou;TNuqt*^^Uc0oi5KlX>~oQU6acyT-)nl6ebF zo72@FAHjJNH$>Qe4y=hzrjL9?8rQfMnNK5~qY>W{#}1a}8L1S;%Bpv*kB9rN{RriC z^1?2#P1!niSkm$$HPqLVP|TXEe1S4^?N-nlnf}zo+>Op@j}TRpmQ4$D?-(;^x|@#E zA=c>R;9AKMT1imoxN1t-pF_D&jiw1d<_(BkY{hx&#(2`QfF#*o7;!NLI+JW^L>k{R z%P?%enY;sEXJxo{d}F>q7BAp2+cue)>D+;8q_ij*gGPUdSrcS!3U|g0DuL;7akRcf z?L!(Yt}m3%#bs*_EIKHfiC{vzIZ*9zalM%VR~0v*)F}I>`j(wtEH1#={K0_qciz2K zx#~+I;f6QkDgz^H5u-B^&m4%O3nsA2`hrrh?%#!7Uc_ui!$wH&4#662+HA{lKE?k28W!p(0#MqU~(&m8n66iV%bK9?K?9mR5C3lyfP@k2=%+j z3-K8}tj5AkFVSC9lbk!E(G_$;tPvq){y-IvrdTFR;uhX(C3j@xpN7SdIHbEu?-d$f zd#^Gg_n0Wh^+`=ynpl1lH>JDCs?JCk&ggnQUF+*_TMOIW3urQ#DUEnN z7cEA7zXUx=bixzZB8rS9M>OWk%NixUkcx`y=n$sc^>P6JZZLb#vd#orFZFI#$yPQ4 zpW?^nxvHZ^pBe>E6QNkds03Vwe4&V~6l6VR6qGt6Tq29R?WP>@BbVRo$*A)FoBSdy z@kb>@Dnp4hN8{=Uk zx793^%vm7uRRiyeiXYBN<$zhj9|4%c3@Ep}=;p00W;Gg&axe;RFy~2+t@)@hd&MzK z6;7NoY2k-p4E;-emTi7w-ji__r}2P5Ay!h5Qhfu1rA^=#k|T`Yk)9$D56wW2O7K{% z8;-v`Na_~uk5DzLd9k*4R+tbq3JgP&sT*sH%{?af7_>ygQFXa@crV?g1{yVAd9MT)7%YjgrW%Rcyq^Gv%t(Y%%-U84Y zHQI%`&56Z%^7^U?)T;^^(G83)`s{s;k5)W2BtsHIwqv$YHgo(O@(*T7w$}JLqVoG| zG)gM_B5tt;ncIA-k6>M>f_^Vtu3GE1xm)+vPHZR{eSo&_OeW8xdT9DoN;8*|gCaI- zxVxj&9)HL6V_{NC7f9Wna9#PO-raBfP>fm5 z#D^rt2k@jJn-V@6=A=}=4$loO`~5WW$Wr(6uF0a~yn7th(e+%Nn@1B3pSyXMXnK<| z=u2#ew6Y_o41>j{C4R_n+OhKGbT*-!*ireQ?NCzq;e;)tn}vn|?VG3~1qVmC-qPCN zb4i|}+!iDd^q8<>S0`3aM^ok?Nj;dlNggMeKb1{sKvuA!l}8yG$RR~@;L5O1=<;{W zxr9u(q5D-&Z^cyBI>$4X$Dvw~bDf%~+wU`sl!X{5I#_6g6k^sW397}ZWi1||u`%Ll zqvk^%!AZv|-bFDilj7_EIkUnfi?Bhkwp33yQVZqy&m56=k+)d*>M@N9>X#J|cQkTx zm%4>{sW-JPPMJDbvf4~|*vPszfNFqDaB;!FWjZzjv{c4I zdkTX@gO2XSPFJ46FY6NBu^a=Ia+s5)DohJf%9byNOX6|_v(TQVG$l!%Sm0SZxaPPS ze->;mM_TWY0!18H(idd;;U{$VfOqi4qY)pqI)ie2xp+hhC*b7fQ?{<@{6x|sXP1IM zOK0cO@QG%FDVmoMRW95?X1*SyYKw#4dvwQ`q{yF671yS?o~p z^f5<}XL+VkZ-rAZEoc#rhHd{?x`FyrW@%YXa_&dMrxU~a;$%ON+mutPfG@qt`1xs4 z@6x&r7FmPFZ8;GejjZIPjAT6c>A9xk7~mUa{SYQ`mnPcJ=+^@y{4afP11&kY8fpWHE$-neEKDPVo-ZFPI}EX5W;zG;v(NWF-|jWx$*-DAA=Hy zl{4^4>Yf*r?TS*qHy9znD4cW#uH_Oto3ul6j|mwyj$so~7(Y~HiXNIG zu*CQ1h6KE5u;lJ-(>5OfSD7@hK%wU|jg%Vd+H^izbgvVxA8>V788qA`I_RG~vjD?ZksSdO%!pVcIG^X*a+1L~s;c0h7IVJoT1sw#GKg%--1Y=6>v zGjXeARz;xli33S08pi6Hadx*nzQvos%|95jLmVBit2BAx`^#ZaTBMM~Z>)}>h+(JI zV&D~Q5$5${5D)*(E*fvxZoxsoB!FY(Pova_S7a&Tx4kP+%4@!sA?;W)QMcRW7C40I;j)k^7{~6QHcpNyKug|pM3jXVNRCR=HYoKpmN7M zuKkdCg5qduhp0Aep->GDm2^+}hM7D;x=yHUeWNG+rd&gC!uLCew_;L2+vbBDe17n51M%@a{k zeV|xAu@kF+d_^o9J3cR$8*T3|UV2taQVdqjq3)`ak(qD1h7E`N!^um+xz!hTWCMB` zLcWtlcD|<5R&Xu7T-!7XnzmAuj1M_@cx4_^Q~s-+BmB9GS|K#>c|z!MI~uA%`v_r*Pz|N zw-)-bdctEPY$8LHc`TXk@>Sr+8)J?3VGU`yISRTJ+demxS&r`~M0-^`Es3y?wEkyC zLLOXwCAg$+g|KR@%-pz#Cb`+>ww!HljihQ0dOb292>SaGlZbF(RkB7En2@FmXGx_S1Hk?YVrXp#BEu!ZU9xa2pbjPXK;)_pf*=6(Q zrH#c$f;;=A@|6GW9vMQ87C??=Aa@B6!J-Er1; zUF}qC%#vSbcNLCViKDaxRPe*`Xzp6g9vc;v#d$#&m_!!LR;w7zZn9|TYDaqfz zyjk;owOpoeTuPvgD$SRfudPhBCDWl$1P2;U0tROWzY_mFW_#xmkF{rI53J9!?|~zN z?9r{2X`rjb^PwIT(}R4LOOCKg2YdM1HUWESFAwntnk3D}i@ky@1-%m9LMqJQNE%VY zEQX7N%%kSIly+=CE2}k6LjkhdOkIVq3V)m9y3$ks!HiJ@KiHcdNmPxuH@ec!8#P97 zUbMtBP`Usc$cb@Inuixr`@_Zp-CPr?Aq>0 zTT4w$oh zG_y0dfqkGd*>OgZaE^RQmvzN7k`1bq`-6M}+ljb!w7 z)HRkJ;&$>&d(_J+et09o`>BbqZn#L24V&z6A5fQJTgBAw5ZrZ{USsY2UyEyQ>0J_k z5x-T%2?jG-X7j1autXkw3I3iKyj*0LeG^^H4W<4XBG5jv!^Wh(n({>4v6 zapLwK^)z**LG8JBzStm=be*k6#qiy1_;~BU!Oba&!)YI2Ny)}srFcok=q9n%p1{u^ zEubwxYmJk9kw%07Sy~naRjk}?q+tZn{NNhxSoK1fB{09yI4w*V!_HJGUN8p#Xi)}G z!7W$GCAtY|&EzP;SiK?hR78&+l;p*)RUdJH^Md)keG;{ghf$M8Y2!W#tIrVn7er>- zQpSDvSc9LF3Wv)z8LGB&zH=L7&*E{Z2H`lxughTf=KaM@Jz?KgUF^hC(Dv5T^VMNV zT%K!kzX?BvN6bpX8;(|wsoa2NN@BWpRYj1?VkchM)tC8$vFLI~bi~`{P(`>#B5@%7 zr>w5=*FM(FcSAx)BLhyI6Q^pgC}$j@E|4WfGh}PP>{u2j-gX7;M4T6h{ zmkAc;IUV=IjqBBj4a8^-lc$_-)DCGRdmF+rg5?misVgd*U~W#eF&z|7aOk#-;-Qvq zjHt@O5O*uZ@tYuAd{%Nlxu*YUedF~2E)ko^Th$5Mx2g|Bn_;I>f-_Xa8cDT+{dDXE z2q=)lUn{hCilIrj?Q%U=ccgwi6!mRtAiEan2Z>#dH)v551x=b%i7|Z&qrgBIWSuGe z5?FQtt%@S5Y~qCFpJ=xEBpfU=Q4=kK!&E9pVO7;u87X{BFa+q=@XS1#tf?Nsus2(- z)M5)MLmv^srli!IrlcuJTA4By5Pvm!8Bkx@p1145gN{)Y8M}Mj!Ck}BZlQDWj{`64 zN;Q*wQkU^OQwgotXm1*<_OLK4%@cln-Cba0@n0PmyZ2mRQRW-)H8;!~rNNrO)sm+G zv4A8SK&*{s1*Ab{)sv2Gdwe^lsp-RETGIKJa+9gjX#<6cm2S}l@NDWRiJz#VRbFLQ zas71%s~#nhFGH&etoM|zBZ9${6~`dz@8xoGu078M`8Pk3M8$&4K- z%8xjb+BldiYLCFfM;BvE(!(sSq)widVi7%)u#q+U*y1)+y0b+|N@`LPip?vO`|Dgp znu*bLlaIEVkxAk^%hUlY$<+DDU8lhmjZG8H4szUQz&v2fa_kW|4+UrGAZ$NK=pPK; zg5q@=YjeUiR~s+u@gCDZ7_5{IwCm3zF030Q4^1Xsgha|^DQ84{mol?WUzC#l&9FOk z9y6?Nv-7>=b(V1i6M1tbt=jeEeelRI(AB zObz>?c@{I_7rK>u#>EpMOLnoM&&SUCI^ROF- zBHC4lr5h^I$EAU!xm%wu8_S+vc{W6=4c>L52+?k;)~|4&*;Cao@A!JWSaVGwirg4M zg9Nz3YLy%Q4t4v3pwky8H%;0o45?Y}k-C}2bci_hIpF}7mLSYW4w4+x4uKLxHQwpX49RCGJ0%JGX$t1Y)5AbQ)KdVhsW^`P_h$~vUHPkk_k`T^*a}I;_O2^*pZvU!D+p1{$ zvh}lG&zYr39q_9@d%|*b=0BLj<0Jj{(Y@f+zqZc*U`}>Rq2n92bQU{qu`}X=6vQEC z4N`mPyATqwv7Ve8$<<_gRrJ?bryDLTI6DDYRf;11fsgGf)RCV!T9;nYvTJl~ks@2b z1_e`_cGJbCiCP}>Qc*HDNPf6jmharbC;ePNP|C$kmYGc^L&}Q%FY)bad)<0%;IKsl zBHQp;9GVCrk%#}ei^p<7eW?u^H5SvGPOee3Ml<>9hh zo!60u4tcbl`Ta?&=%f2B<5j|_^7ZrmjOe2qDA|HC=}?CJ~A{T+s*% zkM5Ksz7HQ~k&HT;V75e)!)-B#qYrROwkgQ}uh`g%V@0%hTH?`xwF)SXbtO{={Q|Ib zr;LKT^4+wc6nbQrm`4+HXlh%=X_!XaI`LTAM&Q#Or|AkaaGfV8K#cKdRh8M9@RitYTN0@3CR%e zv8xc^nGoB+&(CT^^JXd1A&F(xQ)zf#6NXY@c-jr$F%c4l!bvpv?(qdD;n^faM}NIs z$57W&ILTmHwo6vlZ%Y+&D`>Jnfc0s$WC&<{t>$X_Oy6e~t^fY<|FG#jvG?~7z34D6 zj|f2eg#PP1{_HTmf1QWDb+bKiBY|*1BwjWu9o;(-V+egTg(32zYFHx#OeFI|y0RoB z$q=3lRjJGBVk31fdCX264pVv?SDBS}1;bByktU>&j2fhU)yEs2QJ3@44homhe0Ny2MMZ^XhYyDwW5Urt$p-o|*-96B4~~ z^~7miS~^1V#CgGC?8nG`Vw)yDi&3fz?TIAxDC?`@dP(9(iCPN+u_AL*DOs@uI~t4+ z4zOW3!>no+9CBq*oFu~8MBYnyHCY8QRwwpPj3xQ+#o}1^rsLm5QAOF&Rb^2$={XEl zRgQI0W%=31j*FW9O>rIz%C32>8xH%1o+IwL`vfyS+>z!+Dj+_ z3{kO&>%TNYaoVi)JmV8L^bxjm<#KIfQLTQWT5DWV&i!57L64raL~Ow8Q#3ratf+Z|*~w7?oX-yr38(X9-0~pf7%PXzJw$%ZmyT~ainq(~Ub!-J{d)35j z8Py+gaQ}t6vxcQb@_u{hH8+tYO=;@A!O9to(e|WBcNa&4%=Xs83vnR+)H=g$SvW|5 z>EYTTh9>cXMNOz_Qnfo^Db^zMNhL z(Kx}X`{+$w8J{)TPHrQdwA|C&d)ohZXAvLhB(98BVB*DYRJUNG5cPGic~2*p4On^+ z{!;r?r$qZXT=j6yat1wXv^bn-u(*l9?B5Q)-Sg^M2 z#YbM~>9(wwU<$EzLH%Px^>gNM76P%g-T)2#89&N6e<*F!k0bzNYBF9_@YXgWDH)Dk zJH_ACJA7#bJ(e@azmuMSRLjg)J27(f^M0KrH~Y@kIVm>xZUM4BfO>#GVWdl}v7vZ# zmzt3lVsv#ASgYGpcUE&Lha?X9;I08dS8bMb$kOxFdeu{WyoSZK_6fs9wlhaMY1z`q zx~0`KZ~)PCCd&C;Ma5~T0}S~3i3-EVs={$(9sy0DIZQf4JkE7U)mm@0UZSGfqwNBgO+(TjB6;umg=ajGicFVgtbtCb;#!hv;EsHkm$_?K~q6r@?Xc-=g zf1YIUGRouzj)|2#y?m<{;!B$mxyJJG6qLO`qp0B+xe@er&Rt+#^ZgjcZS7^YL zvIg9LdUy__&UM*p#inM2wV_-hQ5>4TZxs^TO|Yro20ZN2VPZ@$#|uh=g*XY64UAe= zgb}@Sdb)r~SGz_<1}-f(0mUt-dbXfLjNyh|`tu*L9S9&>$(1Use z0P>|o=CUt|2VqXeXTm5KeJz*2u5qMY9w$xk1`bP~*bLZ`^_ZYJ4kKDdzOuiRYqcI8 z)?@AthADWmgAg~f?B({V$Sko>hVd{S1}Ar*tn#IdAcpx^e2$!+&lQ>e$^E4Fj=7r| zW#5Z!Y!_API2UK)qMGQ7rb$9{qzXaqgo{%wj-(`GF++Q7EehgPc<`OsY&LMy7P@uS=ff)@M<{4htP62losm`?*qmN$ON=4WKqJv~QAXW?@Rr4qGlm6A zKE_y#4Tr7~ykoZ`0Wx0)i?p8@GY6)`q80=%Ne(&y$}AD7X4$ zUWluPx^um$K`x&#HVJuxrqPR*AmGI;`Qjgp$6;AHYD7_NRwt+2yqq$~qk9~uA7zfv z?h1U$xi@cdZy)!W>D(#%vqzz!;pJ|8cC(IPVIB79Z8IHVm)s!Q9%tO@fR4o}!K4Xn zt1YG*Z~p!$e=OgmQW3oCIgWY^ZJ5oROwFGyC^239hHX06yZ@DmuXu9Ww_x;>QH3!T zs#y56)InXHmaa+z8{HlpHAd>|MNGAJEBFOz5PUb)bdwtqP1yyGz9C?SGtXbR2L}+u z)#7=^E)J<47nM^xqXr*JXJX&PWNq_DE^DGPX5N;c$!V%#DgB@-4nr+(@4=qDfJown zE@PsYGxG6UV#G9MkISGUQD^eP&~}%J5wICJkcfA{vhuKKndIelj*WcYs#^*-XrI(d zo|y0h*EKsbmcXZJmWpbh>7WKAi7&ObysCl`Z)^y+rGoI(&Jl-vitj9t89eDD(Uk|R zv^MjjP5%pPZvhln5UmLlf)j$fYw!ej2(E)W4DPOj1qcLp3GVI;?h@Q}a1HJd+{r)L zt^aN9*4Ea0wOch)b*p-+`}Uo4Pj{blzVAC<{hZ8>*m~1-6Kwf~*g)W@SzdQ)Jj90@ z9nsZ53rPWGD5Pa354+ElF|VwoCzW4?drFSWtOBo^X9jl>0BBqk;y1{6iy8~fl2JZR zk<@&rh0TpO&ihliLRko8!oDf3%EpoJ?W2%SvvZ~n7->RWBs)~|z0r9BK%jEY7OY0o zDT(izh+Eur=3aQU;%l)}*tVF);iY9-W|m>7%|;uRAQ<5F;*65Ol}_2n5+bX*LxZ2kh$C1udXjN(;>= zUiuyaXe`B3fY*CdgV^t6awN9szL0^>cnM=8!>u7|pY@lH4kuf*m%c!n4n+fSvUL><++F)Q=c1Ok_TI>1?sc z*39v!$iZAFoTt(4665&LU1rjp9y>Y-H+isnpTx}IvzMvhp4(6v*-yxoU+&e3T;O4N zM0)WB$LL^rjzEyA$;mWV4+?R$q$#Fo!X1ka3~eyo>Jfu7Ww84h_yZr-+QBYNtU>8^27ls0zd^8I|K4NhqZS7AMg-Z>Dg?nvk0b zcSua3n5D!mw4_}I)g0T&Ix4{VJUFi_Qdk!5Zqe?F#M|bHr(qVMpt{J-xek;b9bf6; zj9f7@JE(KGMr835-JEMt$2+F)tjuC$=t|st@GSJm6x7<=QHygdSA=n0dc~FG9fv9r zU%+j|x#isXeyNxy%xXbG*NsD^#hjJKtIoQa^XY2x!YhxJWxNuv9O`-q$dj}qUHhF%oL987@gDBrG zofNVoh0}SRd`BwCKN)fpu;~+to|--A!oLVEOXqHke9w&t{@TDkfeQ?G(I}9RX_A#( zKCjcY?_%p-xw%n&hGU<5jmkoJuHk%q7f|V6?@qo%)t^P6$-m?|658=B_<9`#URu0eT(HLi%Q!BnadljZtS8D9G? z?k~*G*KDK5jsW+V+_Tc%Od4Zz+2Ufb`uexpIIf|8#wh#!_;L|yj4OWn)Fbq%91&M7 zQbwx0LX%OO!-!Jq;%Z7_Y@Ye{WVm)CnrN99`KX`s+ii=z-F$h42=$>xp$s(&EvTpB zg?g$e;R9D@+ z;d?B#m?bN9uVyN042EmL&~HT9pVqa-qT~Vq9wm5g??il7w;A`ib)fqyDLWUf>YCuW z;6@!-`G0t-I3JwxG&DU~RK>yn#Z%#l1683o$=kseQ>IpbsHeIs`?}~pR~^GG1<^>y zjTq}KzfrASq~Jl&rhcyiaGy1(`2E;pVLfyu?LqAtI=u7G6c5;MdT#=0=@P|4-42Q* z=#-mG!p2P$DvsCZ(&+?G%*o#S(!mBY;=+!PNocw;j7UW*Q!UftTepo^H!amP9v@H4 zDsz%7Dr?beMARG9N_s~5npFgcM*q@8{R&a`TamyuA=gK?DD-*XY{x@j660nYMLX$A zkAq{E4d5n44UbI&5Jr%)CD+9u+I~t#hg+61CK!pOm|!nS3$ZG%lH{U4n|;4ZpO3~V z3FSp&HrK&C)XF>bJ_SHE%HFbjk4*=30^zMN(E;q$6U0>JR=@vjTTJQ~5m9N}1h@Ux ziM0S&Iz;8xG+?e@`Q%kAoItc@I+Ma!bA6!CiL(hXNCxwOJHE_f$%Um);a2) zHWt_-1Rb)=U~7NVQ-*vRKpyM2CC3g(sFTJ9V4KlIeSx4U3V(@^rBPXy$YK@cgv_i& z+7`DAiD0&zbHjJRzz<%!-AQqD!PgGX%z0P_tQMl$QSOTQ#10bJ=e{&AT6p&9@6ekP zIY&kKdI!64ha|1cb!+EmnU|GtX)i6?HX*9QflaUuom!m+XlhkKt^_9*Qt4dWgo0^U zcS=oe+6JWpqWYX;zV+EV%|N}CtcW{o(x8&Joll5_Qj~Z5mY1Ej%ba0B;ir*>y$ox&*#^_bIb*xZ9Wo)olmk5+dH8zrw_^$s7m0W$!|I^l6~G#6+G z@TERl_prQ;XWst4m#DP3%v9g8iDO`n0EpO(o$Kr!X;t8!9zEv+kZ#hc$THy4Xcwdo z87^JkNauHKvp%U~b(?p)E^phhV~ZacaxM-1c~;=5buhI){S;{8(wY{XPYJEJb;=4Q z=4IpuVNjudz))0hkf6!STZy;h^`2Pzq9gtp$n#--2F}%sphC=41clal9$6SHQ+{N0 z!q3JXBZsoWu4BCyQilLv+8o&!$OguVY>B7lF{a1y9Py~u4*Tl4po`(sb+Qv>sf|Hl zHEoPJ{5#+~^)G}kNJtn`Xp#s>1ZwK3kr)(uIB_OWnn8Msi6017RP!0?st)TkGfDui z(}$h^sz%5EPaaF~KeQnC;mubXec7;mg<61;%>WI+#*XIZXMY-txH)$qOszFh7v#p}Py6?j%x?;Uqy(B;%|kBc*`voZv<^zEyi52pC>H|o1mshyh)@18oXPap3(AH0tJrecStQez=Q zQID@WLpw1tv{4E963id_5miN>kv5%N1of_!a+DA!buLKCf?wTzX)+w^Y&`%cWcTLOwQFei-B*KHC<_Z}`zhu(t}D8%C!9=jA-fT+2&_${8xOi3N3gUo=3{B%M?2;I9G zw3W^x*C+slaQQX_Mb}%5kD`7icAKA7AmLWw>@?wa_#t)3X_FkrZ4&`l&CPC^=Tw#k zsL9gY4ncs}No>OS|%x+sOeZ7txRC=1n@b0 zX#^=8PR32y-K3#G{_DBp`EUZk>o_o%E^j=SFb}O+gN4Lfbqc&#_{J7>vTQBZGWpGP z>#22hQKaMzU5qJ*Txm!|Nto?m(s?hc<(yBE@Al!QU~&2GnUn*v^7c{vIMf-kDD1j3 z)!1!#yGSr}@K5A%hi}q3$}(s1?Bxlt^WdAynG!p+d0XFv_Tty=FZF3B%6!my)b+J{ zN|wYQy-9%f5i07J*Ur8gGAN?Mn)|A$e;r%gX`bWz@qY2XJT5IckS*(643C`N?^ZeWPVho$I#arq*X?%f}tZ zaBAyR^sF2Fc=F;(X?sF_RD93dXD~a2H@P(Ed@9D`Sk3Vy_^WnAKKIs~yy*LD3DrB{ z?>W;vW86DQYmoU+$k82Ja#kcp7x(4PdXK%rqBr=-UurpD=D=9{BT;>{ z-nRjpyVc>lWBXX%<>%d8MlPaFf~V8dz=`yuDEq91MWVAz)%*0dK%ah+rBnQNaw@-H z9nGK4-_xCS_5bk=E!zf>7oH_k4smg@d{cdm{=rdqXe)>2@GFPUWdK#X8$vGm~nXGdmivqN<*itT1*B(Q&Re93Iu zh&-q*m5G=Z9|9=;UgT{3f#q;Sw}o5fQn1+}LeO>9cU|sdVnK;@lX>y0C>SzhSqqma z&0}NZ9ozsWUI^^}y;84;L~8n#F1!#-6EDdz+)l0_jcS86!Ot9+CCgm?MH7m|V8b^$ zjr>DX{VVZL&ByZbJit9?hPYY=Sgp=4fdgi`7Ter349zc)Ij;ycIXcWQ@=V7H8<}4D z9XE2c!?WdCgJp1%S)^ooWdk1HaoPgLMP)6s=$){IYKPNAOT$T5wZZi?sO!V|Z*{<) zns8)KxaJW2wP!WXOq3zc|Rh8w$IzdmlF{kn$ZgeHsY%wD@e+9Kbc9UkNTiq)^_ z*i{=36at3Fhay)%s+H8{Jy=Iy-6TR6C7LPAbt2!VQ=As;l4sl_)tK-X`C#J4u99}0 zF1wZP)TtKl2b%ZkO+5h^nS?wLBq@8cWDlI69AwVm9zPKI8N2EC6UC4F)iRKvMpV5IVLz6j zg=Q3jaOXWxhxFU5xiS4|=|C>-eo7@jOjZsTnLeeH*^YbHZ10cgT5b74>~HxpA9L>9>u>Outtxy`(}+|f9K=M>HP*HnM;Ny53XGYA*?-{d z61M|J>DA%oST(guHE>*73M;itw^QcZdAZeaTR*m|tJoBqv@Z`w0#S*478N5GML_h@ zaC}q>drERc6Ok%o5}}$?9|i2vNW~Xg8lw%ym<~b<@EGriJ?xo@Hrm7DPc7}?Cs3@h zj&>KAVaGqSgY^}di0;0wP1cbb%H4;HS%(K=7C08kmeQ(7)??n_1;&=gdM{9?Csn8{ zmZv9YHoGy#J^>iZ=&Yxf5VEf~>X~UhFtLx3nD2g3P)n>dD<`{v z)kupgi>oz)AwK7r9gi73o|7dkNH{;K%meln*N@coLHhB_i71xk*qc_&})VI9kq3 zyg?*mMsV zKI;#;aaJbG1x3%RORso^TVlRgjl+GF84m5ZY@xA>LM1o{V}x0VAVryym%>!~k3(`4 z#PgxJRs<&Ep<@IN@@uX=t^ptX)i7<8W3%%SjwJ?`Xx01RmZ#P|AARZXty%8c1vB$y z4^PVUPS$N#Wp8;`K@S4q)&#Ap1NR%qctI4AqN#@-=Un-8ltuy-?Yez+PU^NciaI7B zYRq(_3gI%DQ`9gx)(NW$xi(V5u-slWE!_C!DC4`9R`uR)MFMDWeLP$69+a+A{bN4! zL3EBzbW`_)MmMI8irtr*E_o}hyu`BEh7cD%096k~O9^iefl_Vb<~6|P`!|wdD_aNr zy2L<4r>hMfRZo|*i;m95<>)yoSfoA30up>UueZq7o5=`f-n`q;_U!0{FjrUDX@z5i zw0`;K=21zPJfVt+t%BFV{^b%GIG!MvKBh3U(dDt_br7NN;6qyZJfb6}bsd3Ig=U42 zS!4=wNu3PiS-H(c^+{1Q^jvW2YLkD_Tqz1HkJW7`n_P0Y!L|nJsyFg)Fse3D&$k8= zr~(87hfKJTZa8a~2EJPuqoZSzCK9tpAFOu+pOo5x0(umm?4$+Y=;!=;`5W1eec(^h z75(%nH0tRB4CdQcLK?!XhQ2;4M3aX1*6eb?vaO7#6&TFz z1|NyCE7;q_zjv{x11QYJ+uivfaC`_UfPnkbQvx<1K8pb!B zJ|fBaJODk0Py|h+)DWA~b$rJ-;HvsUlvH~y|LiD}!e6_s_m|zRv5e8!3PbxBT zQjXxBD>VSoF>uAnyUCfNN7*kpH<~{EW<$FE))Y-G$T~X^g>6sKbMg*k#r%g`rGcm~ zCXD%amhnnNQy0hb#*Y)Y(7Tj6qB~vl)*k$HpQ&i+aoXic2$&f5ti2`QCPvi+TUt@N z^5U!R7A_f!W>*%i(H~pgjq3Y?Zc;T|)`Ez-F_Ii9G8L_Eom+%>>N)vrT=TFh>*Wnx zb!!^0AE&qFi&Vhp8DdoZ146Z?bl&G$fBB|*(tWSX|(YYF(jM-wv$Fn z@hfP)W+G=!Y`;5T(>@KyW=Hn&uHrC=*i8Glf*I9M?sxG1o z=X=HS`9Wel&O?1Wbe4!V;f$n zon4`dij(KTO0J3#D=5b;V(@}8$1U1odc1qR1i|x+ z89Lj~Zz&|fl=W+;AoGVV1g%gZEwC@xieS*MDSYbZ#;!N-vZG%b;-gwOm#FOnI{}_~ zkXj^Jal^l9afKs_7CO+G7c%GP*sl+qcvsK0>N> z-#gOWF5De#a$_ZD@+J!070$-aka9O(VJWK)#6aXeJ4K`%gCbZ8+lS3Cw@P8rIkGyj z4C|SrDOKX)x!BJY53|E-TJfekGkJgWc61PjV)u4doq0K0x8uU}_v*|iQMtc(eD2uM{^DkBtm5ioPfIDfS_c z&+G=begz#oVqIeS4wPmxs&rRHwCHx}svu!ne7`73O?1rafFG|(YRb_Nenk^yD^SOdHpRdo9VRYkW)hLWVWI-j|CKm4QF#o7^WrtNHciYzN+AU zh3T14Tcy4S96r}DPFrIuv0S8Aq@G2L}&QYJ)&-{ZvCmVJR>pK`53h7o9L~bLH39*xkJ=NH$ySPeyT1Un(8qfH9+%B3v zQzuj`9}0jENamPQpAD7p`vA=>gR!i7eEPNyN_Zrx3g8KR>g`psIP*x$|tV`Shi}i5I=J*ooqIJVRZV~`6z)_#wZk99}>dh zB(eAUtws|43oDf|F|S-MF{=3U$%ygIH@qFTMJ59@DarALPXGAjU>2M9qIDpX@)Tet zAz&7Xu$2bC1c!#1tHCBmS+=c3%cz1uS&MkI;0kj*IYe%m#4FAr6rv|dCHtaruNxxqR!GiSz#vyQekhu zRMB`cppv}9Nh&6~STj??Sqh`>KB$B zQzn==mVb*-R`6?9Fh;f0tb#tANVzVxAie(xtHoeVF20;9S0&e7_bV4?GMu^|=kJ=W zuf*+X_%1j}yBavPaObNDvI5ZH*Maqf<$oM5|5xUU{|PGapE#fjUU>_M!wTQ*FcZj_&+Eqd) z>tOMx0-OxE+Guy#iOOo{*gi)7Hajg_`<$7q1w-Gap`GaZOv{rPuf=85 zw&KcwpK*Q7g(vTOh%xs?kM1mXW|+E4%yKA{rjTaQWW6e8K zq_F-CB^t%O>lEkGX(PpAm!Y!^4qy7DYL-L|Gu2N~%MDhi%h4NHPRkW$c&=?Sw8gC+ zB$uB-%9Z&#ux<4ryvD(cK%6%Uyre3YNH#nVh*_62D`E_ce?!tnj!i}$X!a2RE@ zBV+#_gI>_cujRR3sumWW_FB* zK89u`L$^sk#8fl*ipMH}&z`Q-w2VL(G1i+e+b=-qF`L1qy%A$*@{wS%hC+foTzbPb-I9Z+D2Mu}qs0g#LHy!lfuBK|3&mlv^GS$tpj3 z#X3vqF6<=JB;LF$SF?bkGdOWRW%-fX?5TYT^*aG7juNm4D;>*SLy@U;N#&!FdOw9u zYLoBW;McVYSCBlj>~+#FHVETT4)>Rt4O^?tI^=bcHMu92Wl{kRg*@Q2fDEh3(R}t@ ztFOzdBPG7MN^P6mXVl2*(a-9KK}8sd1g*9Crcq3HlbR}2Vrxyy7Do={>HL3e_{Zy1 zv*&fMBx$R3d!;A$bCi=KypJ@@0&QoULcH?46xe-g;;1-uSdLM~rx#SH3R=u#;67RH z`Tn3zp-kpzXRJ}~%U+H9?2DV=lpvp5ZkyZY%^Wd&Q6A2mkiO$KFALykil8gbF!lW> zH__EPxN^Wd$uV_7bP_AfH7Iz%Hx*?{=9)9aclmH2lEe3OaDA0&>0#HnXu{&qQm|~Y zg8eh1mNH4maN#;jEzMT*{X0b#8l&?SaFP3C_Mq1FTv!yV|LiNwslq+yxc{W)bUO?yxE^B~IMlX|C%fCs@) zQgcS^p-L0CRIU`#59so7s~(OE_;*Rus3gIR9Qu^^gu}3|J{qAH_4USk_Ly?egmb}-o z;HU%g5Z6(%8i69akTioQkuwEN1{+N{{)jJk^9fV{q*v>`M8n*EhmrqY0OHPLNhYpAg$Qm!c&GoZd zDIuwi%VtIbzNNufZE5;U#p@Gw-e(_D|2#_DPMdUqjNO-MABHx?S1&6`p>h?LrzQt! zj^{ri^fpIMHdkp#`DQ6Ec>OjBYB6)4c|9fC2X{5}F8N1?6KW%5wiyNV(+aaHO3C%= zUQMthLb#FZajy^*C+=k7<}?6_k}O>7=1H6U(E8bvYLJQs4^-D4=)S0b*7a;jFxWB) znq-TyMq{dO(08Su*v`a@Y-A=tsR#{OBa))P4*$vEd=A_jZkY(EzzW%ksPnvi(=wxP z7h2^5U2M?iS5%!X;&OP)$wme7pQ9C`Ocq}FD{TX_ntHUkUd^b$)wq%E)hnYR--+-j z=6YlZr#V6=r{L>$xjA0q@nU*pzK~rwrJg@2S!jA-R`+kND~l|=$EuLmO-n5&BXy7< z-bT~zeVay((oS(Y&irgNQT z1EX2WG67J=d-1xZL?Ke%nf9NzItunfpJ9K<&`_8U<;8Qjw3IulMvjaPi6_Jj&eB$= z$15P0L@m&e@;nu1gWX8>x~dzM6g~&D6}R-iI|HnGYu#oFiwb1T0c9Mjg5s8SzH|l_ zH>JxLRBc%z@!=|tJ_7*Z#diq|%wJtfLox@`I)xBTfOmztgH9RtMmGgDkVE%clg&TR zD|7y|HNJvJ?TUf9XU1+;B&*V?EG{hcW-BNt2tnBjx~*itee)R5)h2{3+h?DfJe@M% z^($j6WXz^lt(*vI9rp`>s4K%wyylDU!)(V4yH)wh`xAx)6Gs>)|1h&DETq%u5Mplz ze)>J#?j_}-ffKwv4(^rGGN~*NkPBrV#b9GZD`>sBxcLj?x#i*T!~tnmTxYIGQOvwe zP6GC&pkCBfu(pl%f>g|t)$D(2XmGE(^Q_w6uL@Z5jR$EKmoH-J!`^23)iA(O*js#K ztQp=8RMgzk!!aZ4Vij<1=b%Nx8s94ZVG^sNkBTseLNOmGi{X(!*LK|sqDJ|Htg;RP zM=ll%Fi0-ep{@tjS1hcxy~EBKJWXk$Ce%x>?m`Aggv2PoneBer&8Mujn&e>>^Joa+ zp2(t!m9YJKyr$<%f(x2;)W`(ddU@rf)@x)4bo&!7dKw35blXn%mF zMoZeL_Itbk{0f2< zp0N|scyDV2UOX+>BC#9Ewj5u|M8XMeUfgk*I)x(UE1$vgr&wWycZqq{awuJ){dsK1 z$vHt^DY1I4tZ1O=M|`5t{!j8rhR&);aR^jt@b8$lmqYbpnF$NTQE(Z%CNk}ypc1WM zyW4_lo}FAU*r0n>OMYq9qly~Ll3tE%$`3Oa5xqdM#1#G=n=!)SRWn+{JCbx7FdLn5 z-c@cF;kA%%he~$GG>;X;(%#ZpR5fx`?ATb!$%kBNe0G95TPIf>FKMu#XoT@l8?v=2 zM_W1i(&S>`VjLAj-wcBuVUeGI?fXk#(DMUF9m5vY$DkOeM`qh-r;LH+{EEWMYK~0X zX#7=QXXu6LP%Pi&7@B5CNnZMWO!c*1`gsvjqvF5-WghN8xT@DeV*yS5lnE0_UTl1r z44GLfw??2g^L6BUuT<^VeLgIXtw$Vqj1D74M)q&g_IJy2nCV`u)$U2}qj!hhXTzmr zCT&(mttsU4!4Tes0kaXd@q>vmYqGu@x+=u{NQBwKOdEFuS-Sa8Pn5-FJGK_IxZ@=p zQG=$B>H8w7$rgktJnWeRF4FstQTw9Fo8066wB1cB$b9 zFHBixcje$p8~J1rG8Cqz*nJE=JWRf(C-d&au>EgR@Bj7my|T1wWdNyeZE<#%R(XQ-(xvy~|08Z^^_=j~|Fl>FIHDc6pKg$iYxeW{D6o9xwc z*sz<^_b<#sI%*?^WQAh?TwJ6dRzVhdGa3iVT5_*!Nwu3yzgzK__^_USwNRba*tt(_&QA6Ry#+#&(pY;GaOZiL-8xN)h$d%Try<` zOQ#;CZRES15u%7iZO;K9AI&cZ_G~T74_mKG8 ziE;Ma&~yB<6MvB`a%G2J`<+{#N1}(nFv1)o{)YqAe_>Xlsh>%w?%r}9 zg{dE$)gIAePpJRGSU6TtHHWM^@tIo+V|MpC}ajW8Z@H|zD27x}FOLY7fk(8;c#{f@zdxUX&hsI}_Ad;0is|#Y&9OQrqdO=w60S;+(sonj!zmRS=U3jd z(Ox9ccy1TUNY_7~36*qbX~QA=v?)KZF0qDrmi3(Xr0E`%V#6M;T&&nk)_kO*GK`g6 zMhZamc5P$)uE}Sr^E6KF%>v~|qp6GfmhPF%<)dX;Nm{=DjwHkP-kz7I);p{B*+Q9~ zcduZ_)ytis=Cd8*XT#XWSlg)_Fh%-Cc#ZkDXR`baFw4td7_W3Pln0y*EEqk=9s)PpyM7z;Flb1 zKN9sfanq;azc9;*j9j{6+gKGSw&du^?0O1I);PN{QzL?pJz+1}3e55iD?*-Iaor)& zKL|w3;CM{d2lY;mM&&wy?sJomYR%I55F}h#Yo#9VV(+ot4ziX^`2kMf4d5E8iy{{x zopm6b%51IVcBQ)80L?nh{51~>&pJTWB_eGGNbOjnRFsyz?iO0rnN;Vi@@gceSk1Wi zvrB;veZ557#7>r%LbCV}*2KFrA=PFGxk1Sui^8)Y%CYHjEUR&{?L;uAHK(-L=<3!W zjT$cI%dAGnDRl^E1bvSM4UG!b5Yd~QR9FP;cQ`~nHI=&QEUDd7G726OotR(V^;rE+ z39`b9h9x9b7b|q{|H33WE`VS83H{g6$r}_9|H3eCLixGe*r8gKWNLq53h>&YVq=`K zrye4!PyV|Pv0DR5yWAp`v9D=n#J1H-mrZO<96OWTgB(#0Q<=UTH&2!XlOicmC)KV{ z<={#8>r<%QiYqwNb1Lc$dN?Tl=Eu9|_%`!}{H4OL(f??A?CsF&IabVwV3H$81FTzB z3oWTw6gsrt;T`n<@XUqBeVNlI4DI`cBkI+Q{E5wW^R7VlZOHZf`7g|Y@l#GHu1MU{ zTb;=wvnnqp%YrHgjA{vl@*SP%<1r&HVL zOrIeZJQW&{692!QJJCrL)W;0Aom8_$yDt)4Mwt>knGy^^{(YWIla4p1HyC4My^eiO z7%zr)E=kWR!8wb68X=Cio(FE5GwdJ#HL@Mw{kO*3w3CBJjz6`=~Oy6K=#|M!F#`YM0Q1@f#jX-0C zNQNZY+}M@?@WxwFR-f234_GEW>bd$g3{zEGWET6*o#~yGf$XI%%xk1bsFppW+2CA6}G7J*;o8 zpWZ(9SwlxZI1EN|o5yE@aPh5v+E**ccisKCJZxTtmYh{Rk{*KA zNKMIk;(PDn2c-6^SY{rTI9Q#Gp<&P8M`MmDqo_EGm!!Gb_;%ARdSlM|<7njxUI0s< zfpej1J((-`Y^_GC^|QIo0uC~X@&LJuSiD5QZv3NwwQhA0YlZXKXe8Nj?GRQ%FE7N8 zY52Q#f=*f!Og(9m)Gz1;!$BwdKM|@VasIDTrTj;yGwVWu}lCsUD0JNR(NW(OWf~}-6dI=^DVA+ zFFk+#lCIun((q#zkBN%(ahN|^0`rKvPUt6Q{Sxz_r48#AcixvD9<)r!CY`TTz|$^UZ| z67*R_{ZGFb*EQt}S~OZ4IYo}rMJ2X;JBb@NqcVkAd0e!wy`+t9H|aqiS+wtVcLwjC zH2Ht9U(=LhnAhi~8gkYx2a-$bq4C)HgFBv7>Js#t)K&hVj>+whVk z{YYK&6XR!r!t6IB18Uqq)g6I?)IL*tvc&Bl9d`OGK8e(yF_Oxw#|)I&Pui*!C`)hF zGv*~ou`uOJ!ZGzvuraH5*pyySm5mCIulWEJeXjQHXPfg`P-F9kK|mMbxcEtxmTtZdn9yLSUS$|2g35 zHfW^j-xnHc5*E0Ptg`XC&+;npxPEwc=R|cB0k6EvcGh@>JgK~NCCuSJwiQ{tHez#| zPD5TAk(7=ChTifS)N{Lhc=pIL2FEU*$+7LkQ!1vr-P(ETm1UXek%yko!7G5DWnwnLx3B(W?xQ!Z zg%ywYuUyA2Z~o-IizwDm@4X?`bp7`~N>=>OxNXlE+ES>}#U4=6x`e*tQ5lmY zA$d#+jNRZg6=7Y3_z5+;9RQ~W1GLs!l%;u7U?pMw{h@7?|t^z#t~?xq`2%P$-7E|wHO-lw~Z>To2JFn z`nE&np#i*C0d!n+J=?-otAakm`;1Rr4>5jBf1H=CJ9rL3U1ezd(`UVSy-A@cu81NYs<3x9`E_ zp})L;MtOs9_l*(nR=3-tT)-b_tVJ37jt%nDU#4ZV{(b)EmveRUf)4+iK>+#k{OsqQ z*m=jBV5n|))r;R`b-X``Uxj}mi}b7GF4JrErV^NUl@#+Rx!n{RL$q?D)qsf8)NM~gjYzGun2Xrd-9umy8JWY zmGSi5n@0uubVAPUhqN5%EOlN4qW=wFCQ|jXTe5zh!nCsvx~@3(dJ$TGRd+nN^ z+~tJiuHz~VwI5lkab7<7kh+vKUOsiEI`EA`^0O%aH4JG$tsEUL8YOTV>zY(#r06N=Bh(U45dP?T_x^t?~T$YA{|R>@}g(oTaGs zoMQR(rzEFueO;sJ&Hv7lApywytg+r0U8Xg^LYy5BaV)vL(1=uVxCD?os) zxEv3w{W=pLHn-nqO~yU*XGnYInufIcMN|JMCz9w4UZ0|2`t4`}Z7@&5A19^?n@NKe#U%}Z-rckr-0m!`PgM+YWmu*~F^?Ib5#fe%**1ZmJs(u*gDYO(Ui0*++wKgHLBWyqkylk#7HR z57VM>HE4-yWSn7!F%P*+h zcW^)uzQL#Rr=u#}Pfe|Z&YAYrC#JlYIl|7dB&i)&Jm>kFoim1*O9OM{9}mJPZydwW z?%;mZ3OI;;mtC@-Zo|KF7kTfeA^7sj{II!b{MG^823&0l4~8WwYfVOYu+( zSdPqIdkQ)8iLKQ`tjW^&2rVu*n7=vJ0^8*(w{Mf)^Wa5XEHfYr5Af9l#m8ea-;|Y} zeym||83yjYSnJlN?}wiX@#Kqx4et3G<#a9f;iIeNYJK{~Ea=k-M{ep&3ywS*a+um0dxDRD5Q$FtS-IcRP@& ze|^5$Qg=EzHajg7kg&ep0sC1d5Lp!7n?ju(&6D{!EIobokh+&^8RVHv?4d_%V}p2_eCqM2XY1|i}Q&T z6F+Q*Xg#VmOafuW1*07f3&IaTu1JTp2_tXU3sp%LMG(@3o6Rf)FQtl;xC$k4yAyGn zfk~)#V=m`T$(_^SixSPbAvUa)kt8JY@Fq>B8284;=JH>uDNcxMo$wR<4al2+8Xa2A<%VJ+Qbl`bu$HSN{5iOamf^!iUT1RESV#?&& zP9_=v`XCqaZ9KKkZWV(_#^EE#5)pFT_9X(b`Pq(bjWjm>x2y| z7n0V$ri(jTezNQoiV9pQGKrzf%WIr1I)k|$J+>MF(vDkTWvv-6^x@Y7_y>Ue{iF($ zD?yDWXT|Z%Qv}J3gC1v9rO@1ZO%;iYG)@Q~Mf6W?mq?41xe=P-o9`GEwWh}Eq-niq zu5wUlo(!U`nmL7s~M~Jga~^tQ}78-}$U7tf+zC zUJLuga*qEjHC=v&k9@B2m~QX8@h0d7;hcjqG}g2c-#&fvkvIzf1pI-)`NN@=w3N2W zOx3B_dtZ#$bu>KS^fREs`vQCT$E)>Qy~=u2)TjiUXyw&t1CpHi71|kM<4oDlePj1V zc=w-CC#XN}>6yFSKNcLN?v)5KqgE&NHzIwQ9OUiwz^bLF4O{Dhmhg(59*$P`8_{^i z_gd)2qgZIL)pedeB?;UfeOB({O(rbN{#ETcmtH4MlT;E?*<380&G5SxTdpQ7uTcY3 zYr2`6r^#XgE^jMmJ_S1`(hxO&WHXht0eg^jIMyqYlY>wezWJu zi5sxqbH;3q3pC&BUAaDHiFDQ`-wb4dZpafh-R6~iJs|dttAtb3N#LV|}uk z5oKkRFZXy3A;Ztc_DfBCC2JzgcbUWGGpf)#z35r+)Rr(NRtx zai_VK&qVD@`)@IWZ+>QfVWdBUbHsf^l3%oMw%M3X`E%hC{P5>56mlIDdo-1Eyi`h4)-~B&b_6(mUV4nd;^-I$_*XP3w0J<*cSm80V zi>E3Ny#q3i10o(-;N4K{?~`nq7-O;$b?H?4% zv;wuT9RCL^CG@{qp-wjP~nJH;EIQCPCh0Wv|GQ&uo zn`wW7mWyYC%+ekoWrPu2tu#IHDUGBP&mYhSq$@%4W~x-hq}v+352E#Qd{cT#_}Fgz)e{#^Oc8vNPW=n-4ENgszR%I}>;ZK| zl)1Px)o`*(n~sNJdg{WU_&W!->2JSGuu$HliG<+GHezY)Vdq|$QX>13(FMi+$tkF~dm zildGGbqVh7?(XjH?hXMOcXtaO+}&LpcMI+iXk3E?f;0rTz<2w9))}0&&K=yr9aPt- zYS8trT5s+B{GR;|gBpZ3IW}UgKLdZyQ&m^Ani6zxinPO~ zKx-K_8fEEZ6_J}iGF~ZHR)zx->&YOuL@G=HZyRMHz@xtz>1L#_L0!T;(qF?GEKLn7R`)SPG>@yFVmsjn-O=sy^LPn-}TJQ z@2X?ffbB~369OLi%S8aS6 zu)8*}45$hBB$rom9})D1ZXo_?v%u9AVebL31m;{{yl>j*8v6|ksF&*g~56@iyjOQBadbg@xPd^ zmG6AEGX=U?p5Pq_$lHfh+cs-oT;LKHt^@_GL#2yY8-i!q^_riuL7J z5p6Sb<+5VJWM91f*}AI=j%QLnXCnGxj|?#n$9Chz1j)1y)oPZ73Iy9!nWz8%^TiBE zMLu{~ztn32OuM?rn`ENRaqZP-I0-avkUnMbiRop881s5rP90TsG+$w(?NrAz5#*0) z5H8b$)7Cz*q-Y;Y20RN}CPdv(ZxwjCnT)8j@D)#VE}BVV^mXo05hZNpjM5@m@N7h4 zv}ItPBKRpkj#-vVYJvkAY{Ac9H$U^*2qv2Tshn9M&r9ov$zJ#4!^4H0px<68E??cZ zVB3ebp0Gh*iI4MAG-|IU(3hVTUT9gbkiVQd07+ybNKq)f1G~m4TTjj%7GRo3Z{fR2l*}MUZ39e_V*xO z)PSqqZ{4p$q!7zg!4#`ovM!krXbdE^tng>$Ps$d&%1yUK*4F3H*XO$q+=tN!mJ%n| z&A{+ui8t|`#g(bgS+$3a57XDrwJVgD&9{jEpsG&(gZdBZ(R2RgBn%-i?4Mjk*u(7C zUBCf*7<2na@bJxV8<%rGr~`7I9S+Ulu!Nm=6x9!p<6DrL*XM9`XwTanP&h!YW()l~ zFxw~FY%}?JipGp7>nB&P7G=@T?mK<$?M(zb#jk{-^*5aHES8z3$Y{AdZsm$@4I5#& zLs8iFBA_$IG0+S9m$#X~dz$LvQI7pLM`8sxa?Wbi$HfWCvtYpeOK^Y3)6zTmVMn4Q zWn{nPKPW8wgAewcXTs;5lG&^OpuTdvzfwLwls4gh61a*fw0<)9UBeD<4TGlcy}l1u z7Ma$RICY0s3Eo4Z;`^7q5>GY{xBqrKLxm{*_uU{>aqj=Q-CW5mBu@T6x6_Fal-7k5 zPcXXot9xz>?ViQ|gF5*0NpWJW^g4Au8JhPmj1(aFa=iRj1^GzzLs-2bf}8^$;jYLx z-|Ut)%uWiN3PTGa)@hH2MV`ld7g6`XASoKRGTu~iuyEoP6E^F&cx&)cmh`%yvEtaS@gmm$NVD{+e!C0ln${`2A7@ug(Teeb zYBr=-FwQ|26g&88|MTb#KYJmy!rmGWN5Mct1CODQ)+piYQ~)^X@FtYk?9{xioN ze+OXwZgYXr4N+k zqJFy7>!(d&c@L|NP%nIyTy9Vw&Jc6qFACTIqwr!*F`|lcqQgMTKqJAk1zw_axX#r* z8Wbz&Jh=iVf`bSAUu`*%Bga_mTMWelaFRwWJ-V1sQ)J7(z)#Uw#8Raowqjz+*hVVw zs8Go6bU3FA1I~fDr&?2_3ksy}3gz{T$o1lqQ1Hg-hS^p|-*?lM zXg?}TXg>=fC4~~I|Fv_1{1E<6W#RwptL{{cJ$sT;%ZT;VF<4cf3?157izS0RF#KoQ zpI`VWnY_-u1SZv6am#@Gp1{ZL3;P7$>Du}NfSFe@R~`oEOj45hhVvL^3q$HO4-(NN zR_^_IVkxan-Z_rHiL_p#MU5my7TWr6<7z2l!+{0w+Y0`b+=+B-Jr<_4fv~rWUY{~ zXOfP-D|><%aFVXS3bFKvQCOIdx94m*-ZsTu9~0z@ZhAwu@8Ut0 zvc{vO@=)x1t;;-0+P^_?`1E*lT}^YGf*%QG4ZW@z<7ryfk2~4&ziVOf8b|Ou8!Th4 zo6KF*VrYaCxmjEs1!V%hpfzwUD$`yEG+?rC7X;+C(YbHRWS`_LCa$Zw^hVa#4+t`e z8p^pr6A%>;a#80^DaahpOQ}?1v#sn)da6^koWvG$`gvpRj&L{a4KnmGIY0W`73WO_ zA2H$kUJHM$+QZ~5qc-OjYNB z^Lwc0Z1PI$w~qs^Qp_HkE!R&s_Bi6p$M~7~pxWCVq;Aa@t?lB3F;QWK9WsV(PbTNH ztRF1Oq?E*hWg`_-^Ge&ZFRH5N-3XI-XoKez|MFM;4&+qV*6Ybe#7_UiRy-DCA9APm z=naq>d)`YxQ>;Hj-MVSle^EnguMV6y_{j=|fjdr36*u6s1C8~vPb1X=g>$R!`4{l> z@bBF3F|y=S*Q*tAM z^bXX%dnN@f7lSrM-JUN>M}brTN}Y=@Sx!$tV#U0g_@ocmko9_5`MEl)@#1)jy2~c6 zs0?u*#Z2iK-MBq{iop}CZVfB@5U_KqrjC$`NiM09lcX8C*9XMf2+7Mm3F0QR>4hfY zZh|=#t1O`2Je8?MD-NEr2+W_Q;9eCU=Zf7TiWRiUDH&>u9sW1u6J|k~i|ET2r>k;x zQg%qqZ4aSiS`KK_FgdFm2!g#0MuNZiMeA+oDG5{VjoxH#nC-a&Hz(aEy`x-}qonE= z`Hk0_CZ>UzGi>Tm|Fi7)oGVFX9KQHCaj7fQzWL{tau^lR4L}kit(f3e1|y~PbGDw4 z8OYR(tK3&f5c8Leu}Iy_QWk@eZ6W#% z<~Aw4huK0os~hsE?;=UxyyaV+sH%KHBbC2Jvo#t zdxuYDQ$|Q5(Y8mYwJ1m#4Z1yR@Vv6YMDvBQ%F>d_Zj?9D@Geg#)&;;-i`x#!{lY0h z?LBgDA$2^Cn8tpX(V@`gUg6UT8k9e(CpF<`B+D?2mw1e1S{99`=s&nGiW)>lpa^VSy?37O>HmD(25W-+6Bn zKVqJ(gVvk74Y}355R=~;UiCZI)x4AMcFwK&IOf}rk%9S6!RMRE(aOd>5avv*6T3tK z@7p+S8ESh99t>p$y^N&{3a_A|{otmMS49>%HfcOlUHene71y<~bs6$TYOJkJ~xGl88F0g1LiqA;yfkmDn#4ZIXHv5fM0 z7Mvq6RPp;G7^pmCM$ZZ|t)973b0MKBuk@80O2Xc{%Rf)ryULISITo8bS9`lv$6X$Z z`D{>-!ePWjZdxeLb742gMYoN?4sga^8xD!4HTCtsSyu|wHeIjqpksDz4~ER^u& zBRSyC`IuYkp$XcHk?ER^61&-bZ1-D-m|TWbY}MKXvX63Tq9v%Tc%=tLQVe z=lEd$6y=eKd_TFLXD(^a;)ic-P1`gvB$jZbSfL*n06j7eAU}X@cVS0Ep~ff6+;b@c zxz#mBTUw8l>Y-F5KrB<7oJWWf0`R|kRDk7dYx0Z*Juv+c>)uUR=f!gXuxJV1H zrC&GZq3@%+)U$azkVAig9x6w_?CG0eMvy*x-Qdnuc0QEtOMQR-oEWzKkC+Q^wV~)5 zMKr3brNv4|NRrFsu>}}1K?*5LRW`!vQp$rUYKWe)u@CasjFrn{5(^e&jXPH4y!Qp~ z#Vhwe`R?r2bajx5@liBX;0H1rxbV{hlQh?)P3X7yl^|ao!465q&IP>b4%MbMsux?d zKQ|!u8t*Zp%0l0aCNO1|Twe961U`C$xHR{-)f86S4q{#4EV!RaP zj}<{;QpdUBua-*>&xCsgMcVTwrg|znrdil4lhJ*>W77FsaP^aST#iffKfdCx;wVLK z4ki1tZ;SROt&o(-8UNW}-{n(b2H2?$Uc03pajyoECXi)I``Ol32UzW}$uo+W+KWkn zKQ{Vo`pNh<#FxspTvPGpl`tDtWGN(Soy`u_>2VE9?5*RyAc?Ey5x z+I2UKY%<)kxU=Ky%ONZOdB?dm?LBng$41ywW7DM{Mg8b|G?@=Uj>&;PSK~%nFNqKf zp;Nf6jXzE5@vIXwJtur(q44tP_)IdSQMvxcr<)kH<~w<(K_@yi-ZQ4 z;r|vCUU{-x<*gdEd}B`m_E)R09JL4AXa$zR|DdF1Q0bwysQ z)L8sm={6na!UsHby5?Uqq0v;htFmlr*YwkE__t4v>d`?`7mON5-Fw;jojXD@rVPpA zKMmE7jx8a3RbLdeV8%*G`Q<|rlz1H2;&WIU63grWd$hmaSz27`6P0T}9##+J<7t5J z09HHm?VpYNo0awOzO;CI9r)}ku2w)7HmJdPdVB~}F}tZUBdtv}kQHTiE&=wExsIWw zn}d8*+V}5GT?dbLU2T|jHZ1JD`=Y0=XD1Q3Lo2wsIQ_Ua2p9f&>EB)&bM`6`S6U9P zT-;V5MGH1Uh%55mm5#fRvfcrAW+W@5iy!HyV$RHxB}&rtQd@2Q3HfOY#P#2li6kzI zU_o^S>zPrGD2VXNjn=d08#fH?*wt+JHFqZtM&H22+@RB*`;A)#$wRvkg+9)(5_#VK zA#I)RRxc#nfHQFK*Td7z=6qFq@=0WwGo!pk!uhvlAXks}iM(mbt7Ghco0 zo;G>jpPCydN1CSkkg18EKzlL+)kxlfzo)w%+=g@m+`7m!*Uh8%P5ZKeX8aLNdNqd9 zbk71$dtM~|CdwugyJf7d2~iHbLvU~1-k6nzaaK-aWy~OmUUSRYj=du7tU7vz92)gn z7OnruQlk%lVx$y|e32bZoWNX9GDNuj{M6cEvv3Vsxb34Hb=pvg!n$zmm^}=+72=X z;0}P=!CpV*ihB)*1RMEj^9u=pN5qgfJKW1MIyNKV?zXM%XQiSWEg1>re^5kLz)?^B7 zkD1r==+`AM(o0v-!##lPbShjIecFvXI)8UtGJZHnLWThP9&T@bvuZ;2Xg#3@%HX$n zhG9`nn9S^UC9?}1dYe@SjDA^F&_=F4aGsB@GejyJzl81*MYPEh!?yT`KS0Hq!d;|W-;B(?6b#?rp5gI>4PW%LH03V|tii}wH%TV+hUA`nQ zhZo3`54dZ<7mD~IuzE7hWz7TNvdwG@>gz-;Ki{t|2@Q(oa!&#dThk4~BSq4p9Y5k6 zVE)JSPM4-i6X86Gr-ydX;;4YObNKGtP?<4LnM`V5(+AoDaPsF2lK<1I^hui29Yj*V zg6xbqSH|Lc@iej%FlGV-`xPVT1suMnB_mHN72B{H%A~>E=2T+4?vOv12>9*c|6ntt z3-Ml^uju~n{|8z3y`PwTlWi!_Vu_YoiutI4XgMj|Tyu;lgp35ow0+F^HirhwJ6UYV z_PY$#AHMWfX(nR1%;m9XFMJ1R`~o9DdE+Cw+I-efl6%?)Y_3BE!6okUN(#|=gB=ND zau0|iLAne`G^xOWod^`iKXKahf(wt=DpdQW-_M19&As7UKnXXUZE`*Dc+}z8fZy-D z2Rx_g@}47F=Wf50*2k((^@)WKbCN$Et7>DO$?Sk>Kt$E-Pi)6w;6s&+Nayl6N`Bm` zJ=TaR<-1I@fnU@!AlfM6cVpHu#F;a2ifCGICjF3-Wlh|~5|zx&l$}gllOM>n9^N`# zT1jQhr6X!oP#`AMkt$$_?RSjv%B)B`Ju6n$geD>q`m!J^kygnTZ&!k3)dnO&@&78z zM|t8UfCB8-R}tK6_sQ1NO$R~pcg}Q<%DGOaV_J*YuCmqYGi3d;E#+Vw2$V0XxPiL8?gOksUwcxeD0Krj=X-ii-3fC_Wn zfEtqk$$CUmYi-}&wC7OJh%_TutRQBdv);(N-cUzFOk01#u%~ycgS7{G#Bt*kZmVh~ zPeGT!G$Vy~_1i6|jTn1QG2ySX!18&FC!9hLAt=OjPz~01@)c|@MFnePQ4>YgY5>aZ zS&^#H5lA~kxA9npcP~aohNCts{ymPy5~VdlPqQ!J_>RO7tHhGTce_TXvlfR^6eZme zPL013YEhL>#pvwA<;x|6qNY$nBSKNS%bO!1IHpEK7(l5Q8R5da*1krd@INn~cOQp! zR>KiwvDRPClg1h_$&GP+Ia39#t#OKm|9-*{(sU~R**E?BYY80ZyZngAu9kAsWz2r? ztnTZ=pqX=bw{}mIsCV@Mm5q=*N~|a=m=g`Jr?!l5-{C+|kVi_JK2w_k%|S%HaPH43g=-V#H;qLV@frqx{h!R`^v;%(-bUyVs?m4bEcjKSVZjXGog4)fu^&s8#yeG{W z_hHyp^j$><gW({Ggbpp11SFvQ~$R7EMKRVH3oaKd(0I z@!wv2;8r#4#8;7~CE_-#U|Z8LV4>@JI~MG+-`224epFw5H0d-w|C}&g-)b&53h!a) zEwTZHBfm}$5|T5^*boG8N#<11(B}^}0dmL;C|l*l zM0pco>7ub&_{im1`jBP`#o}Aoe_blHc9QX{-s^4B_k`?H)xqvu)%))VVbCQj>T~YG zhEq)(R~f)iN($-2uETl&zcZh3MQyPU9V2%uS?ij5?P>Za$5-{mUDoH@334hkrzI|! zZ)weXHl%Eh7Z=mYhhh)L#pkEeFz><9-t5ojBD(s3wuqAo7z0*rR!t|tKQ|+i`ISU3 zlS(eDoIB|Md|w~)zC7EU+QA2q=#UNJ}K`YQ(RvRe90rpXSNtRzS6b(wV5Bk0n95PfZm! zja(SzDp)1*{4hA0q3M{FBgmdupT3rT{{MHWSEvFwm4QH_pQ_mrE&_yeTq-npF8B0m zJ-nU$ETw`f%;MqZB2DF_FN#^aiW0R#$`#Q!+RWJz9rQo!IRxGh-JS%uWEwa<1Dn#W zXozo~i>6-}pxgboD@6zwIxKM1idqc|PVeZaK^jV&nSTG?L?H= zO1wB~xH`+YS|HhUB{n`a-V6;>`+SF*YAy(!4MDQ=o&Hxoh6|F5nUWZiOwm-TN74$7zS<7|XxOf6RHH?D%PN32> zL5O6kf+eCH+!QO;mD@dK<5qr+uAClDhn}(@nzmHJID#*I5U$H2n&*h+|`U?gyTTWshNXAbcCL?`wlCZGzp6Q=}@rb%|E&Mpa7<(W^lx zV_VNjrTyYjZ&wwbGd8(Y*r}=P(siv`84;II;>MDrHo?QY!LQP3_+!>dC+PSUM*%ne zZx-|T!ZvMRZTj|gP_S*H+d%3wt=1?~ymesYvxtwKnmzAZg4P9kMu_m;@RS0#@t2)^Dp_NZoVE}~ixf#@3U6>?ojq*2;P5rs z^ym21o|i3t{#1EeghG@P8=2b@B`5nZUi?Cp=GCT_(Da*pb2iV8y}9u+fla!x=k^u(4WXH}<}|6sMCsnd z8pZDM>ULH?pS_c@*(9hViNtvI(Aaprs8&kBa2OYJMm<)`;SFmI_h&#SB3+G$l+EQ( zHeG^_H82Fratpm=%<`l4MES`0P##|RAWbv?}=f9N7IE1y9 zjUvouA*E%iT_+|0owy>SZ*j9TFz_q$XFn>s)S&Y)*>KJ5C1vt3T4zMrq>aXCFIR+1 zddtHp;9oRBo9A4hsYn(z<~?iyVtjg4&!3L+8pe1#!8$kFiq87!`puj%YxOs@YIIKg zECXI5$%4Q4onDUfKQ}up9+E6AP4z8106t(dv~-Q?njAHoM?9ixkH1rECAQ!?%3ghxp($tj{#D_x>T4>-Ro{>mKY9Rq+-f{Y35expd!x`OEpVE6 ztn$ZVsJBa^>gNe{M{~gDT8dyDz0r7<;Zb9sFCRffTT`*dm1c%6>?*IR?(M8+5v9TNIf>-!%R-|l}<86SD)EXe_x9&7v4H)p$n zb%BX=Z(`syEtcblfM?x?uM!K#OTvwhUpoWcic%k~9^p2Kmi`*#t}TXPjGcGdRF^bQ z=1zu@9xqY7G+m?=@-5vvuS=Y(mfUf>O*Si+C^cf7>9_R2J!6b=w9hA=34Y^VCdO87MBjJ%2s#s_!BFRTn+Un9+gj zz9c}F+;;= zWcG3?_Q}EBgC~FSeY_*))fF~qPi#t6k-4~6_-}RleiqmeRmxhdecX&49_1&KM*#g? zC(0Gq+-!u;#wO)NTF|ET%$d%7&6dtN zB26gLW|PKIqBDd{)tdr{kN05#1%17!3-m!fw~^P!qsZ7Xs+t1L zTgSx_#v=Ao6$V#i0)Z0{u(T}@0+x38F3&>1Qjh@zEM+I@fs8DZv+myG(>c(?*>Ulm z^s2Zt#;*ZAO|WSXLqy9eBrJ_-YFKnK6V%v^LM$vW?R6yFz~hih*^Q2&vRc=3Uy;9! zF-&@C<=I5iFZ(a)=uYQ>y4 zD5~wJz|L*^LIx;-D}gpuCB(k@yu;_STqHDvQ=gy2$zK09mtK1(cGlZLG8D14(k`E@ z>H(19#6%leIjXCK(99ZV_o-#C!39=g3e7v2COV+iD_4WvQ0MruHv^AP(@t_BJ(0l8 zKnK9ztJI?5iTH)E*NW}?XAMZMPK|9idW#EUpGd(iIQuB{QCE9Vuju+b=XmY^H)PuV znv%0b`}S3h!7V7^-!F+u;KA=Xz2hvWnkIp22s1PaCiUUCJ>q8%4!b@=4r^Ev`u?^U zXkGW>J4yo#lTH)ilb~4YpdtPbsz6*wt>b;}QhVtlMxJ7&USZxwZc&6yIs$A@5khaMV<3z7s{Uhph$5)I3DXTL4=>7VQPccAsp+%qoS96x|;;!pxMMC zmFe85skO=eW3`8U0}ga-_C4$NB44Ba3`AgjP<30~pY-oHo&961VMB7%VPZsKD=_TX z_LU#kh6k_7O`qR|Y)QRKVPfVT;H5 zIwAYp`q^&Ji0_T@1vNwccCEif>+&OZT>xi+KN^vz;|!<(>jU0Roq2Kir^f2c6XS{_ z8R4;khc7p6D*)^aE@;V(PI$^428yW(i5d)62WcqB(%KEeD-1m=Wy0)|}!1 z2v(Iuhf%sv$u%P>WPy#aJ-RSOhg{0~gI+fOU+Eklh;psG{-2Bsf#X2V|GCuvw-w|x zGH+OhET#gML$ZNy&);m*vc$3rkIZ{J-do^$GxYC2ywEixsMs789#>dw*XSxf?+i-y(AH z$m>RZV;oso3>9%$^K$$`FE-o6MQZ&lKVdR)Dn=nHI6F1DrxW&_!q;{akGN!xRYboh zyhm+{=~y{UtU;nu3?e&!FMU>i{Q_m7 z8hDI47mwvbCktZXI}_zTcIs=8=Uwf{>Tb~pw>8h$exvV3efnv~otKtR;Ko6QfhcT# z=x?e{%ZgUb+yrCVRxmDS%W+dIE8~EWI4_g#L0*iQQ3hPX&kpvKUFPlC9q_>xSH`PI z`kjmbCAfD~^C;rW{s*TA{JfM%q^+50}=%E7Ka}rF_hdKMZr7Xj% zVbDD*Vq zC8M@UqAx?8!(pMGThufYbfGPpa=l7yt@Kpl5Xi>ER9r%0bkcij1>;9;IBQ-Es%Yf@ z^BS`#x<5@c1K2=YEPhGQD8SIW*M;cmeJlO&e-rzGKB7In3?Gmbux;1Tx|8W z6~+xMtinF;GckKSA4<^24k8M$j(bd(=6V){FLNPvb2o>syq?0yp0`23&D}Zz^S7HI zvQ`*rv!ck;Hz#3u-Bp^3>fK~wIz-_GCetNz`7&j~rQ`_8_QT%uw$0Tj3fO6D5LQ`` z0e(%GkJ+IC^Rg;aSF-U%tBywj_`fHAPT(4+Qy}g@gJcKNDo^mz`%W_!=l&Hpmpcbu z;B?%(jw(Cl&Ql8YlBZhdSuG@Hq{FdMGli#T5pk17;^^(E^QZZ>o+Ob%}_ zh%`L$XXhB=%=YOr2O;OpUxHd@-$WetmT75!R={kEdKP{AQa||XWk%|^LI$#x3tnVz zP}>~ZtFfC=uZEGS6)?P*0vcPMA4GkL1RJUYO*-Q@Y9zlPryz<&?mknSqnf7IQr5i3 z0fGNmpR`~IY&W$Dv43~$w5vF=N!4Uv7;sl7ws=3pr5gP>^ zAz#`=b-o79p!MEPw=2lbqway9Q*wwUZ;*UZa{1=~u~v)Xt5sRdoMF1|MfK<(QAfHn z1-`_#;%^J|e-lVSG_`6@3y1i+NfLbsYmAb>)r~u5+ed zxie%P=1cOhtGoQE66{Lb^a=vCzI{IDH5E{ZNs9pUf1(5&+;e<`GpXCF{J<5_nz$#H zfJF@(Gd?|5_;HJBWgFS&#XiPjLWniTP3=C(Z)33^;_304c|Wh9^WuCs|KpwYN&xpg z=lsBUH(FYmT!ZSma&*t_U6q2rf!RMDrki;ZX} z44i5R`R1Rz4KCte(_iE5^u7naefTU2fJ~2_LwuGX0ya;-K&D>LP6)Zw##nb zJ8-nM!|i@M9aN&po-H1(a=Z}+w1|wWv@S7eU zIz(k{Qv;3_rpyLt&kRTe6ugZ^^iZ)dbj0hKU;5#Gf(46?qh+GA2GZWb{I1!F@g?7A z$Pib2DAOKu4H-r35>?|FBCuDeVoPZwwHM8(`CQCw5+M-;b`?rWya3`Fh z(OUT5%2W3s;@KprO4fWu$p(77Z>lyJ(QeGXwX~hoC&3!@(`ZUR_mDbmd$sCBjwVAC zKg3QkbNB_(|N02cGkV$gN5&RC!8f7rvkfdGs7%Dp@8o4-1flS|p(2(;YW=?!S|~a<$x*=LM`?qpGxQWk$8FA$4e^%xlpm_Xo5U4E~12t@FM>J zOdjWaVc61BRjZTPv@L6_1K)h=$9SME2H}E}mk}#dedtCHa+_{yUv+1Z5p67GbC1zG zBCly>*)grbGrQO)wm6c=f94td`HhPn$nD_?D zQMYSVEXrf&a87aH@G#Zsn=kCPH^kHZkt-R_Y>4*na5|IQY9QV8FUrx>YCt8#{=`;KXAEqYt4 zZUsvWo%LIbOnViMVn*d;t8Si`YuBUD2tE2e{B3^_;-kmoQ-dqS1jv^aY+@h3+Wee2 zSid3ER}l_jv~E1Y5?%I6o1cVbK2g4%1Q_Vqe%U#+P%hR2t2?L?<9%UKFwq?3_0<`2 z8sl)~#caHp`SRT+Q}NGySv>oEBi@Em^M#jtsoFNlqbj^C&$qS?JhtcPts+7&N^swn ziuZ3PZ$9P>2F?+(?kC?)zGy)au*{YQnbNYS5NVsr#h#X&n@*+-RE`U8S{W8tTqQ_ zrBkeN5duYw!tSVj{O5SuMV90$)z_ua@u$$0ow6blT+kbi`x(LyiT2 zI@=7fw54Ih`zXQLW%HiyMmZ43!8~S)pz;;W;vp4y=)I6g{-k=1jSYLm3HK7 z>@^nge{yuKUf3e=`F+fd3j30Pn@($Lj!pR3({x z$5~^ec2!NI_T-Kh5J}Lghn>2%g-6U*^L5wS6EfzLOu0=Es`$pjZv?K0?qG^G9pyz@ zHpa?5i~&8UrNE5y8{&-Fq*+7>E@wrf2nO+-d{1F1OJnN;G9@wEq67sO6|8W%QqbbW z;^GeeZzs-mx=J}zE0_umTE9!M?UzO4jT^4+l`TJz5}ip&x8 z9&sau_?f3Mu1ip&e&JU%6Vodh$Qd+i&q%l66bpO}*>}h-k++nYR#}NC+Gm$n6dQ_* z_U(EHHkd&v1G^et4Ln$JKH|U~UuE=3?Q5-ci-<5=3+IW58_OHR<;y(?20gDxL03NB zGUznZ!ja4eQYks5NYYbuljAkn%{S-BdT{kTth0GjO0=-WWGuj8EOvY`zUrhV2r z4YA>FMV;$JM6GSd`|ucxQ6rFu>5{;D$BH!OWOgRqZ(VgtKj}EQHZj%FRfKU{^`pPc zxPek$Fw+ZX0h&Mb_q<%+F%q9c;Ke1vSsTq;b1D8#@jnQjYu2z& zgM|EI#(73uvUb(XoQa37iT3GxMc01g3(5XuEeOApqhIW|J}0<9u)NfpKEpOv=4IDL z4?u3his@7tT>R?5o{3^@qy>_64D?{Ik;3y{U3D^q(3IRSyW@GQU%In7>0*Sl)(YOD zXBaLEVok{;Vd8HKPbJJ3xTs_NKD`(AS8Q7cL9Q#LCMy%Aqx2Nr*&T$$L;^I(Y*e)U zw%hcZR4r$SvVGemJ;}f)$9z7Stt&5y!Al3#U=;ECJ&zYl&O;i!SJh0ZN{tn`O2kOr zN==1%bU29>HJG%WIiW8Mjzl=rY?XgzjULDkT?B+Skiti98|rZcQC~Q;ulx^1QImM0 z#J5(1lf3?5pyrm2a7Rr4IU)6QlI9maq^9|r?7UgWG*BP4{nRoU75V`qXC=-ro5eC6 zdD%3%{~)5^%~i=NO_;#6X7c04(aZrg{ZN#FC22ng@dcxvX>KOG`*kLOPM;w{6 z3hYMzDoz*UHR-uz@CB4THxOb7KorL=K!&9VrZTnP)I0@zmlh-1}5 z?aNsiN?u^keXp$4%+G^Ii7URm+mpgaU`)IPUFdFE{RIowd{1UaQd%q{}4-0KQlHz7)$KEA0C2 z6}|FcE79}$At`f-cbX7gHI|OEC+*_kvNDMVcMM>qN$-|m?xjc0{^-y)UA9?Z=Bi1p ztL@ts-0RzRT{a*-Rle6YuVEA1)8Y_N)*oql7t65hjo;u#l6)}?;9f*abW}rFvFc+K zwM~QuDz*i90eyV?VRO%mbbYEfcY@dM9j5*-^&R#32CwkoQsY#X9i>h?n=r@=J8D=3 z2UTu+g`IU)E-Q8GTGOKu$IJDMSYM=NHBztgyv^~QmB3Xg6_ddtGg1#t=2nom|MPtW(pjdw_W&u z&9Lb`H(rFkgH+nlK8J_$^;GYVmwsK}HtyI7-M+R)dx1mU$b&yb?~UrK=NQZTUluQJ z`U=hnZ&RE?Agw~K*VZ$k+mwZc`dU{JN&x;M&Z@0dqQP$Yp z<$B}m@(cS>#&g*fkSVwif-3-)s3%OUrr0jlVH{=2?4LNS#ws#2(Rp?B#L7Ii*ihP+ z%{p(ddZY*fOV8Olmt{$->zS+b>u#zC`!{#SNop}$Zn)2rYeQ5O-QaEBr3d0Du2@T4 z-49%ln9PgZYucLXX*droS>oaJh9ukMe~VuG4{x1G`|g8!FGe!^Tf`FD^q)3>S-cs-f>S~69%cm7n^oj%%TOy3dH--aJIu(j(l)eJ)QqYeDL$cytu z78ULIOP4qet>5vfi0iG~ohxtcmG=(&<85vfIto}Z%ez_HE0f%=RdFoA6CbCUB*W^@ zuFUb%ra*tR>1ApQ>DL`c6ul|yxAa5WHh=4Ip_g-5N@3{06p{!1#Q)4JAjkvvOxD2DYA=ari?-E=DSU2d$sZxn?pT4!|7fY{ zwRw3AEB*Oe;01Zfzsl?)hM-X133n@P@R|hHgo>XB-VuLf@>GUi@9^rh$f(UxDPWT| zMkvzCo(dj^I+R}OJ^<%q*zu|HMHVRo#C``T@Og8i)Z&emB}vumLVwAEO+GjErfqE8 zY1ThJ*Av3K(sPa)Vy^=f^>>|327a)RE_C_gCJAfC0tPMejN8(5@U{>U2`rxzb%kbU5cj^{o-e z%mrFUNYl7U&W$r#T)%*1C5(6tm#j!@Z&nZou0iYcZOa025g8)_wwqW<-~YqbTSdhY zcYV4f0RjXM?(XhRaCaJacY?bGcWB&$HSX@-IHZBbB|vZsZh`moH|v}^^UcgnU(`ik z)T-KR*Z%+Z^VF1JQyhQ&rQs{XrKendnCXNo#|S=89APpWr%gA|8tB-k~H5oR1_Z+I4ez=xS73Y6J?PJ^R`M^sB3~88si9T4(A{|y+0Ez)f1T@I~E0MXJ-p9shqOY&50`I z8Vop_7eOxmX|g3@);C<7yO1H)edC~0l7oQ2q})yAg!>v_3^m<_ZzHz{r0tYD`uM8VVjW%^K3+iqo?iZ78Y z628#5(7xIt70r>5<`#(6Rz~zu!a0(Zz$qtOn!9^^@g}_{f-va|B}E8c>MEp$N&(Xv zugnq2Gx3hKq+7alyhNK^lpbiTny5hSG0U+;zr zuCP3(19T>aEN}|Gthmf;$vB?Y7nG-XK;zfWpEk)bQ0oma zT}jjKFQ&>4tbQOPm$sMi=;aM0@_^Pk6h>BvU7~Q^+WpCf%UPIEe!CHLRoFq7?j|!j zua5&Ikmm4O4dmpM&(?`5Qdhrk=y1}YIt;R7r5+GkkxZ|qTDn!gf2oa)UmkMo(hv5^ zM)>zzs+o^8!$iOwyFhmgTAvwu(%RkM?49Jy6;=2e09Ux1ebW^e`=B)1M3C7Cj{ohM z2*TeXxcuxj+(}B2khy~WIn*2@zbI1Ib(Z1q6Gane=VLD79WZcIiDMvfDUO0G*qdtPE;7dE>ouFSCMBi`PSDz4H*(6FKY z)I^arQ?Y950rIsH0Z`(n4I+dsK6O2cTGg21@%_%zvM!0HMYEj($SAW&f~?KUy6_w5 zX3ge+>Xy+5@uJT3L1e7J?>x3cSPcs{}QYksF*T)VtJ`CAN>^L+@)A-R;j5#_x%J!L8GtCOs zV*8MHRj%jI*u9Q_zo2*joC-oj7Ur7~t;`v&Wku+z?uMeAxg~ILs}aNs`+Lb-CDJ~A z*DjISfT5aD+cD>rRjt$6`3ju0ZfhL9`GymH#M^nxYN3vniMef!HTqA4Y3_2egpo1G z)T}rRQ<3&-UEd}}^Wc-m!ajz_*e*}hIKo$-?pc7n>foq{u_0L6*ej zeZg-vM&Jo;p0wq`2Qa8Mx1_X#Y6P@7H_ISY{ zw34JGM@68+W^jB~g1CS+ToP8tvSXnPR^9E|Hb>miM>+*sdzI??BHK z^~>YX%zKY!H*Maz>B&mwII*P8@wS2tZsUn)`g-1f&vTb<36{6d*HczCLRDwF$*QW2 zbN-L(X%{aiw8h0-I$pH0oOIq+d<>qdN2WEGNc36UwlrDai^~loeW5omc)Hq44{0x( z-!pBjB31idv7m%+qpky^(`IPk9pu#_VNX{}Muy_L zi3w>|mueZhZ6cW{*@6!+Y}Tw2X^LHJ^#}Dyw2TuM>m0 z5cmrb1IbG-BhjVNEhV_Tj_#3MJI>!7GjjYRxO2dd`+$EuYAO|!d!kwCGr|s!^%%4W z)ciEBuv9p6rO)JgYvRM#2CqmwGBip`3!Y(ugwV_xrXJN2k!71eCxD03XtbxO3W|_k zMt6HPGoKDu`z-^Fe#Xx|j(37n-x+SY=IB(-Na_4wv}-GDmBnRrr7(0U&w|VL@4o|r z?lw$qVGv_AA4D;abO&)%QmAL`3Ev6-8!UK9iG zV)u2bX~NS$)0z6BJ3!xE7v6#j=EX+E=_}Q;Y|iSjd4*T~Ek#`iwjTNbm6M0uG3Flc zPd~AKP0J#sgNy+QsS;IwD$9z7S|6FIuw!p-emd=7I+wov6sf7>e?sWVHQEB)KvH^8 z^4(YhUcG`rne!7VvtH^r!vj9c2z*SGYibuBnV0VxUIR(!E8A~TP@}OQ3H}|$-5nLi zGNnp^FX^Y_M)tQw1YsEY(AFSp)6A!bQZxjI*F#Q$!RR7_Rgp|t+RtLIJ5(IcAmpnW z%?6+;U4d6m{X^K`O#oX&`MCm%zvl4wk5WQ4W($jX~440}{Q)Go>-+_ddr@}Wt2n_^b4LmCmw^A5Wx+T3L!waIIl$_Br?m^KOor+pOE_5yCMk% z&To5E_l4%jY@4<)fD$Sb!a=;{g*~e?DEufLLMpg5)=v5n@z50G(jMNOCH#RuuSSi0M;13V^LSQ@_>sFv^B<80>H4R!)NGonTHlWyc9)OsNdj}{@HW=8 z>`x#1La_~Gebx(j59=^ zcB%lmWxFNZP;QGG*(cU5imfbGyzvEu0|Z-yj@$=zaumSHZuqektPS&_CXzvmbOOU4 z;LH94=QEly)g90-U)Syp=ACon_P5*jDfo7p*T$L8a8}o98Od2h=GJ{5VQkz~M6M>V z;q-ic_eRo@bPpy{>6Y2FbNt7isX15cjm~$q>p7LwxVfdoMpQrRIey@Dq(X9J^Rxoq zekBSJ1aEUs#hkQ2;QeEvSwBgvf60-0mgC^`ukXv?m_IrbK|A`@jE<1mtMyjokdDL_ zm>o(CEs5>(Et>;KsZY@CT=;oTQ+l2$R9{oX%zfaBQq3QZ@E>NxPv0=lz44vV4Sk7+ z$Oem4HY~n9UyGYhQco?XK0ZhwwL4?F*LQ9b)|gSy5k4R1L)fenXmI5%Z;?-0%&^V_ z)`)m}fn=#)02O()Ju zC=s!|-YVyfLngWp`< z3MO)el6!phIMuR-39&pb3sssI8`1WBlBWbe2`7bIKzYuWi3+>&3rM3u&_;QC+AmZ5 zKjfF`(1rwOdks5fnF#W`vwXIinNSqRsF#?l{=VTZy78XN8cI&qCrY*9NIZux==CqJ z=3e(0b=s`e8qo$guYYAEi;<$#N-N1(2xHr^{NVuP1ECq5spr?)6jX7>CxC*=swYZbIZ(~i}jdD6>ANRDj zcaRuaBnsH^(NI~npl(Bp=7!V1zI~#UqfGk4LK2n`?^AlsX64SGZ3ofr_bRX+4#GUX zF+8p|<~apA(DzrNCE(_WTd+2?yFDHEFiP-Ei&El9V4W( zT)!Zf$2kYJ65<$;wWM3m)7md(G3wp|gPPVbx7{$Eb|gFG5@n-H$Su3@SGnkeB1zb9>5$ObF%a{h};l+1lb=f`C$a^6h1bpO;#hk9l@cSqNWv*!! zy|H_7MHAAgv)55#YB%uk!<5;mxX-jC3RFU{WS+V4ZiCaLrW$b{q3U*lNqIu7+`H23 z{`i1M6Im#CMv(1-dUq;wkKgSc*!YI5i7_vNXG@V%C~n%>z*b8M!GVCA+2-rQ(B_}O zv&LfmDj6 z4J_c5YSL`dRGMa{5VA*g!%7&j(wMCCVfNfw;u_A=gN{$_6<>dsiH^7J(R*NV6qUU{ zv@7Y_inAB~VUQ|JgF=fkiDM9`99Zg1lRyOoaK>VeaPjFLo8%c{(ljrg&%y__pmk}O zj1cHW=Q_&*h-*W$_jNJ_^yDpr@6bC`mgm4x=cDqQQ=*93QV|HLj6@QD5)PDDkQv-p z?H;dUhv|6w3LGU4zD0M2H1dx|D|oaBV(!CjwY$hGJsiu|>)KXa$h(v~`b1Nr^*Rm; ztb(x z7CUc79RlPz%I; z_YW5=*1s&`FMjaVYbf4btX4zr;{IWC^i9;|PWj?||FJ)Wu7zOn|hylU8n@2FR|N6k5;{2aE8TH@L!Lm!g;+8IXkN#NI$$* zpM+VbKd2pBql{d@lR!vw@QbS@UDyM5=k>k(YQ02EP$xstPqF%&2bt*8<)=t^;dkuG zUL3Li-Vgg3es12XXua22ckX*8d9}HO2Z!Ygwp20dTKsQ<@k?{bY|{J5EX<%+EP4 zT1~dY^YIk8(onv<*t)~=DRV~e%JLJ$;1i-K*y^QTW7uxBi!fR!Y>nf|E1jPbgrky> zV&pbX@PACC=i@VHxA~1rTd_PL3ry|qYf}bnThm^5YqTjaA@&+?s=jEuigx;nGtQ!O z>yFV`{yxtx!(C?)lxBzh*Y@DJ-OKJZ{$cAG?+Ad^1=0ir%XiWF;A$ZKS#NJ)`Xv+7 z=WFQe&K2uKBS`<{>}vuzN?_IBhk2a6^+NSM&CGD|LPax}j+=&U!hny-fPYVp#-duW z2)}o13WTJTxIsoy(iT5{IO#`tABVVW{~ZMCwEL+`H4{N&^MQ5@2CY zzo9W_{n_#Q6&`S7-^kLM*SzPW@7J9D(xe<)%C!cjTUax;9`TqKS&?`Vxbt1rJlQ|_%)4ELw^@{#I9T8&ow|eQ z=>Exzn=)B^a)12n;BnA=o+KKLog?LIrjlZELbMH54x@DCMzpNe z0{v_@=y)@l&0=(i&SBOIC*y0smPHw5z9Yy zf4z4{F|hR8UIdHm zpC#>kE^890~&|$W_YfhxTvf^VQ+qJ0IZ9ebPG-=0t zLZF53Z3lZxl8$QEDJ%pW|e4Pzz;$Nv<#Y5`VgCkDx|J5O;++YmSCNKU6j!ouz zf8tKehfG!L$V8OWmcfA2aKHEQ6@7Zn(Vp3_fK}_;Z*nh~X2kkpy>hE|7AY?WzFx~g zEzG5-oU-{0Px=7K+qm*Cj&p4op6$jaIf;jHm-lwD`GrY~Xb%@mQG_kh%<;+EL{$N$ zbhvU2@C^V5$8A}m2r4pW(x^jFgj@}yT-Rz;iRoQ``(PtxoOFJ!#7UC|tAd$pUa;SU z|G7)zM&4wl5f6#T?FbZzmqRse^3IHJ^POB5?(l?8p;%fiyX+$1$XNYX@lgwE-5^{M z92ePQlz(ttOr>mP5w6wQaq7+>-u_lj@1TKE3VcmMK&1-|gJh_T0~BUPrwD^gPxbXS zO+r30xaAjOGKpV98k=p(K6l;~k8`cFbyObX>%8Z1M7a@5??SAS^o*)}%VmkV@T6Vg zxteR35#bK04SwBi|14V%u0YQHL^Ty@Tnc1N;n258m1o*D&YC-&nyIZVOSsKxu(I)! zlkq;muN2F>0P(H$Zz&ZyHT%tp9Ejc&PEi)z&;L2vYUt0I6Q>SpxRvCEt7ffzCJFh_E;q%YQOKz5D9;zep>cn4k6SP(u%qXN6f}t%I z?7eq)y+39R3kp_T&U4pz?~N%5g8K`^K+{k2$qe@#MG3HHK=izh<>}pMpJdFM^dw`r zbi90FK#g1r=}w0Qrz}Z{h99~F{~>vNsL3cNEh_$NQL7n)eDSQ&0F}(mF}Kxh_nc8J zkkPfjzf|yO=as5{ExSTU+|28;jV{knsR_`R7R%h)ApxK)@Q(THQqVw27e!juVsm># zU!pA!&~YhIoz%a~v@ghy(o(Vh=jbK=4i_9Z^5*33I>xUQvOj;)8nRSM2~{}#p7Hqc ztKC8n!aZ$U%}7nU6PYdCS%g1;nC3U5oOxWy%r8fkRVdUl!mT2 za~54~ZJ4U`I-Xdbj)Fu&zNl6rfCrL-i@FV}E{)Ucd3^5q;vEG}v0|CY4oR;^aw#@(Xm_ z64swj7RxfJL2Jo>`g>Mgym)(9_cAbph*DLjN2)6ju=>Fwo$%Dp1%;^p7bB5|6UHc?)G8sup?N9%A>B_*Dt0U&L zpKk&MzmlAV7aMN`%Enu_LbqAgU0N};gjaa2-QZnh1Q2NTyHZZWerOz@?Kj)=BsAXi3ax!+$4o+X_dtD0if+C3+_uEWL(aI$FEj5_{Z(0a=*x1 zT^QEZrU%(Oy3bBIyGZbcIc(W6q025J2m#kP^#eh`igd1iSmyt~ zJB9_T_&(boK1eFn0f`NiaH)SfED^>h&qNxk*~jx=oEO;S>>PGSxne_SEPeZ{eh1Iz z9~%Zecz|-x+Mn?somYz1O2TLJ`rk61PG8$6g&(B8L|9xM6TKQw*0p?QwZ+1Lb@Av=e^78j#(c*j)ToWz{h#fQK$aMXKo&re2rW{E`97T7d*0jrGbgi9AEYq4D?Ced-6t>8fyRI z>g|&)8^;H-fuVERYGm#W50LM%0RylpIP<<(P@NZe#yVwCsFb}vfEZd6OmQl*Ob&N~ zbqU&(Kd>iJ`Nc&ZOz(cxLkK4O*cjK@W`iw_Qy8zJWY%Hw^Kn`H8e255Bo-#$`o z%6jJv+`4BVaR%*QT=XvDq<~*Oq$qwKfzKp+a3(cp4&Ia1k|E^O;l%Xx*Pf*Ps-dYi z%!jw4vLz~IbkT1YJRSdQmY<5G{O+iBz1F)|>nbNl;3*nwHg^pR-ydG(U-^i-{oAff zR9pWQyZOACrW+xFxnD;8@pI{UfelL;e<19=%7(rfR9gfO zO*`SwV&(x-%}uPcfg?1H9>xT_g+xcpM>aQtmvacOY?vW=?SQ)CSCWF3#d>mQtXIt1 zFISwvDdVpCcj7%UC?(~ix{6H^;bpM;zF7!b!R^&|u>9Qr+N=DJ_u0H1H}+$dIhPXE zuN>K$({~b1ORlQ?!D*yB2QgMOm^HFo`KxgYp4|ZeTSBC^y@&0<%8$48o%QJ5^hv54nGBTR^PI&K=i+~BlW zlL5S_`%MHD>>xi0{|ByXm-I)Sr8V(H_wE80I?xZjmPZc7N~T~HQKds&C2Lg@4S#Aq zCg_$xMr><%klJ}_f{&Hnt2NbA*Jjd7r`VY#|fJ;?);IS&4kAM#pEcV?wHbFp+Z%<`*`@#E5wkuyMRbPa97{J<^$~6-PpcSHDAHlj# zDgU^=4?p@#-Hj~il_DLqk=N|MNzuJ@#A)yL(63gySU)71u5>Qi2qRRyq^*3-OLQgi zpq#p-;}hNS>~P6QB^BZ>KG z7#djuDS;~%9jX83c;1^ovT9$TKkh=%L&C3lRJlkdi8OxX+aKCj7xW1-0e)dy(PLTa+n;-OVa~T!L_BN+4#plAg$;qIFs>KjxCi94` zmq_S-Jq8|St;7}Ij))Zn0Y6%9{1sJFZnd<2bzA9roJVA*>HjbA|F>7@|Hz{K*M)ge zlLms00wtfv>X2Q@*rsT97kL!;eoW$G ziL>=z@(hMT+!33qd(elO(Wk1?M~NLJpw3{oK@V-i59 zgA{?^lqYP;Ckxc0XfC_AqgXrh=U#0iTX`5Du}IV08ddktG0RL!_GNe2QfUW{*Iuld%mBoRSigK@tf^a%Dj~H z78gw;xc1n?fzGV zdBgHKm5`X|GGqtL|K*xBerUImeyoe$30k zxp(c`eT7kJBiq|>q(&u1#Q?XI|2h!*nGEM2Bzk*yx*?o^v$}y?@X1EL%Xfd1Pew*` zTIZ}&ckKa7L591&>!6QD!0yNfS^P2BDIeAZ!D7A~7;NLL!o zM&gnyPYpV)>Cd%qcRLOGD!bSn9O3U$_(Ndtqk?f<&QWSKev5gpyB`Wx{}8UW98on@ z^|O-EavNLm09|=yj{;4tWh5rY52U3ZZ8h{MRRrJVw+hx?fi08N+#Y_V4KGx@A;Z{- zxC{=IOi4thJZMw2ih)hp}D)%^CD z{16o$4bgiIUfz`;{zWHe(2H||bN!1mYGNo`;w~y9h^YQV1QpvznHw#^~MxdnG$mEP4NpBLzfD zDZ-A1D(LFzYTIVKCw>5jcMmkinJEt1jXNxM`||GOy4Jm$F@uH8W!Is7r6AJexCgpi z=z4rPd5Vy8ckxk*99#QS6{(;Nj|+@84geY1l^S6te)Y(qaau?A3#juK8do;Dlq7Se zTI}dr6aOkb?)|7l0|!6x;rA;-A?K{OR|fUHxU^S6a8>z6cA3za-pUR$Mx>2K^Xq|` z>LEUj52g$Bj|*BikEVhMtUs`O?)#sk4M7d#=c_sx`pE6Q#IvwHN^dJ8MT%HY2QAVr zg<(1UjMvjD?{8Bq&s!s^gM%q|?f@Ev>Q^Jg;K7nvsRjo2#L_BI&KIwv{1+(;+lC$W zS47z>_&=I#*LqFxMd$sOe2&PK=Db_Z2ualK&wsbFH3qi@MtH6xAT)Z_owqHCx3di; zcLYMoCJhzqe%RGxym_J}cydXYe2IELzIIWAYd5elJ4o|vh>8r(Ztu3kEH$cA`#_mQ zZ9P9*?O{gg)FoW@jYJx2<3Tq3%(DrS8%DZ&2B+M)!>!AxY^qxo{#8%g%KKjg$B`aI zj-Cjoiux2HC#;-B9@n(W4L!-0dmL+p+E1Q0_+0gG>_=NZVJ+<9e_fijg`eI`2}%dX zpJA38dOmWJJE8!wA z$~P$(MJS8*6sMuc=M)>zQ6#=8)Ui_68{7~JxBvIwHtavy3%>{b2d*+}1;(-!riOu8 zf?M9i-;@6XS2WKbA_M`!6GCAm4GhMgdumcCs{!CEUJE0K{@*bj0sI=ud=z7@nrXfkH_cxNjX!m%+W-5$S3D{o2pMC zDwFlzJFeueMFYHKB>pk`*Z0J!w)gjzb>jc12#&V!w~MQ6=iW=`HZ4z{nY|qA{rB&K z9}|i@R&L*Qiy-+P9@i7WaGoJgK#a3Xj5S;G1hS_8z>(>_gL2;5TJ(x3@8Oijd#|D{ zdqYCb5NKhh>%X&x|AB+4H#}=G)Xnuqih~B~=ThVZwh;L~14<+I*;)P3t@(Xz z3dJSvxT>TIwleS7ZWdH@=eDI94@X1(aTe;_hm7EiNbDl=1Bu4(Go7iOu}zu4@)4z; zpTkm)&pljJwf5!b;?Hga$wm|Q@g3dBkY?shqzEs{CL)VHS&37khY{$>NfBRSE zWvuyPm9zu6%Wdg5Er0L!5T+=bw`j&wli{;D)9J@p@IBA-7hFXb^PhBBOJDXU#GqAx z76o7{Bu~zRDrbXpwv7tRKE4~qYr;v&mS(eU{+TaKkP-M5wIFXb!m-zd7HmFRe$7=x zOob|YsO_ihbf=^o6}&%I*XFX@KQm!Vw%7{O2U=>%`*YoA3ATTaFdo7eab6s?>c|SM z9+etJPDmPNi#^lAM@d3wLU9>pl$-AEU-<&aTa zPfdv7rpA0f&0{;8l&pbQ$r&f{keCaDVaE{scdi`PxlAHFmuV@}#;V2Ujq$L^Ci>$r z{Rz*36{Wi@bfYzzplr7AOt;BPBta{gW(UU~7zAG=jB0njH2b+})~?j)lpFiayZAav zmYoyn|Iyore_q9djj#qc|21zPRN5`Ic#nK<(Cl5dHRET-!D?;B6mgNPS`C~VIm)DD zx@S8!5`oz5qAx&Y(L)2D^mo;z(4AI(P9+%b#v*eiexCv~HkWR`s+gh%{k>Wjg6K?l zsCcHUsc0%Ki!TmpcF8~?yt+<)H$UfE9?BL&DvmwL(tBbH2q}BATuEVKauvLbyt0yM=KfU(&)r5)OQKu;Z5w^l#H*t9 zDWbk)IHiiH0j+a~S$RD*u2%odrot!pnqz_ZSKGAiVYWh@m?o^~C4Fn_9cvc6v`QOd zmzs%>-l*FZmcd)eATW1vjtY{XoUYGk{a8UGkA!bghRz)z%|f<1-F)pMUS?igt-7H zHds4vG;gXDr(azctIm+|Ar$fc1m7Zz2`CP0`kiXh-Z4Z23|+2r zck2Y1}lBpNpT0P4*K@g}9hix}KEv~P$J_4F3k$Ie^+TAn|EDqf=HIwGWS z1DiRKi{LzHvHdovvG$!ytcdsk54H?fBG?2!A6kOG$;6Nck^}<6p23O6O5{R0>l!!1 z)OF1tV+R~bm3oc~%BB?3NDvQ!R z$J5@fkNLP~j->XM)6wx=ndj%W1ai-FlyD zUKxfcVQE4U}6H?L%>qQ^j=I=r}drFz&kx0gBD%cnpx_5A9I_Q?3<%` zR1qf5#?IECAC^&xL?H3dV6ykP4NshD$uE6Wt;!d-!ms zfirGpACE10jS(kHP1acIIQw^FZO1iu5547%c4l#|iaFoqoQYr#TUTHXX8oPXJ1Xdh zdbzY-l&f2`HzJm$(ky&e?I~cmR^bxY|ZW|C%ZEVRN+3i$43J;;_;=FC1HTv4kSAlf$V1W@rmdsL7D4Am-aR2LR!=2NZhN?`g{?c_}Swz7iQ~*dNeF2nU8a z9TQ3tpV4Q+<^M5}lC5BbC74iu?!fr4(LfCSY=W1< z@C0VXafv9uUZNgaO9VfCXhoPIwHIDs?@*BvvSM~vOo zZm-xqTDfGOyRLx@_d)^5%%#ePtv1V)XpE!Y%|EmzSj$e0PsM5xvC*WgSQ>$HFSON+ z`{qsW=He^t{ACQfeUPbv0_RFFo5*=HxYj9)JUyw*TlNnPCC=9-`f*XUx>KRcCWqzn zIYFvS#cS=LCCxZSLK(B@8)E{p>s9>h9!!1|rj?E*iu!TW^<{Q@|Gl9gin9f3!gahr zO)L%sDb5fvp`0qapW_Ae9F#0q8wqm_3qZM?4t~;uUKE+%MZSkKDj#Cj?{RY!ZEu_d1Lqc`QzI^z%f8}Z`Q+sM(Wgo4 z8d9uPe{k<6E3ICQb<_kcTY}nBwN32s1WnZ6mgNdD4=s%Wn*2!uarocC`U#NCh|_ni zUN%IwZnQka54cD~eP1y8PF&@W8xx&M6R5f6xP&F$RtVIqB1{-aC`0|*u|7m>D+*Zr zi3qbYo+NAx=L0S^Xi%o)p--pKF7+2~d>L_-KtpLy(%=Qy@;7WU8fY&R+b|r>|FGfe zl(z?-S+{B`s*=)w-`THVGN3z{U5JxEOh=JksP971!Yb@thDHWBau?_|JtNpTadX?y z{4^XRRX*TjHACr7M)@dmHu*(jsQ(WbRdvnpNA%j~DSmuOZD48a)#+*4r zSQ}Nn84-6EIE+)D!ShLxA&RC>glNvjOz!rDoo!YX*skY#eO_N_!JnvaRSm!oEOGgR zW4z3n>FLE#VXMS1#co*Mk2Y+@h*2=S*_491BZ)1XL@18an-DqMKKMB6g7Dw9?O^Vz0>0-D%m` zlQ=ea7re58O{{ej9u@Wt^(N3?IU;c_2HLdR%{FZQq%~A*`sO3DnYA6ok7WxluAn!( zvNoui*s=zhU;EQ-k$6g}M-@ZSO+a7UbuviA@?@U$6zaIJbhT7vW>{yi`B%cqUxmb- ztL9Sd>u)E0A316ucEs27Ix1-1x=E4yfrlovIz>&?vj6WSi^F*yd5oWMq;tWCz25?6 z^eO$L_fKn_Lz3I+yB-KTJa{znJwn+3awJ_h=^&xIGepFI#g%zurZk&GVw#$0He49t zdrRehg@b!(dMViW7q7jy`nphcmx7^R5zD~#m8U&=`nYB7@9xp2eti|!%+A=UcpM$Q zB&;M1reCMddOZ|dX%8wpEF%wC$FV2ohI+q*9X>BU85s=Mf>Ir^zffVZwdPfk=Sa?R z0>Nk$!WY}aq?HV-%j{`3(g8zrd{Rigb_eaJ)cwMz2q&v&?YC5!>QhqXUr#)}i$(Q@ zL#$A-y0jLc6*K7QbPf`>XF6)i%X3ve)e#ws;_R^-)MV*F;ugv{fPd*c?X1Z&Nv;j5 zE~gU#P(=l21r_}dm5H)WrM$PdQcaA*i=`$~{A5s^bh1XXc z(B6Z7jqf?AYOCbWqRe9K;DZZ}%L46-t!Ihr;-6aZZJbBlFQ4Xz>qG+;`;u1XHH}uO z(>GpAAl(u_NUg$mbbo)@4LEG1;M0a$DV<8Q=!x5(GyM{D4$BN}NhK%@j&!#y)VNG> znW;uh7o6K=yf#>LGiA~C4{_9_xYkKno$ii( zDb$cU*stzi7_WFE>O>J9gkXvNLdD08P@XW?t#pnXenHfJBR1zVmSs90`rrXx;Fqu7 zVU}I%+|sFpV(1}T3|gO}HR{^;uZ~yB1xh6N555dRe5!T)LWfD3(ot-TC(3kf@@Ngn zNJL_)UaEmF7$4kt~dA4a4{o@~&%M;n$)?H`2lXcxW zI2m5UF*KYedz#}uVGCmv@L5gvWC>Mvt6)Oae3CNGE3cra(F-bUdqrwib-4tym2y`8m=BKd6?AczqD+1EhjOQZS^ ztcjb$cMP4y>jm${lLRT7%1I)fzkk+cXc3q7(w^DkFqsD%Uy|1d`&=lr z)~vM+etlQeVip$iKHD{xqpt(Tf*ti)2=X)Yn`6F6JCU&@0nw-une`R1eN>U;4(lMs z#TBOM=g*z+XATmY>pqf2h|a?QPUPo^F~M~ zkDHF_f00Gy^r-`BM0d~5G`p65sQHkX_$qHaB_MKJwz#mNh%h1p)i@p!uD7>g8>RHU z^BRj1GUGi96PnAOdYN|{1LxFPscLI7wujO&vuDLA$P`8rTEye)L{Cg9wQ?RZW(2Nx zv?anhi@NH{S(Qjj```-~xw3nF3L%Sc4dOGWaIW+-l%qx>%Z87Wd6ek^?xSNNK_N7Z zvmpClEAsT20Fts4fjt^wIqaz&U95d_sMMwlbzX*oiIgx)8wHe?v9|YqKQS-C4aa#V z*eavMv^K+UvbV*1#bfA`i2M%(-#hN$f@wmB)>!<#{8cL&t${)%K$BJNA((kh2Bpn^ z3U6;lcHsLnxy`Txy4ka|c{W^_tnl1ASO<7S1p}gb-fVioC65KmN%uX&F{=vWeQxS- z`*uH6E7Ohxw=7sUf>yrSnI}e>=1l(VXGU83V`J(4 z1b_+dQ$c*+qD4td>k>EGs1{YH9Fglg- zY~n94&knLZv`MIPU+ijacwXegXSH-y?xAjHu_~|DO$iKJ(^mP^-45lTm$~hp7sD5) zHBnfWH&-7bRt4mzE@7gMQl^v}gvVi0?+YRe{+5Hk8_6?DCX7t;I2Jw;7<_Azn~C?wY02cwQqUPJBuNiFsf%@a16Bl-#}SfR>hyVCb!o& zrGM@B8%LRUz$&)FxRMe_8jxrmu2PV;T9C&R>{!KC=px}g-&Wu-;yf|h2io>(U;b69 zbalU^Ot~XV$Jxo8x3zES(?R(Xk3nYu75Y0nLjf91idv?mRaEgOnon7*5g_WRHX@N) zImctjNvOCjar8->E*W61$DsrgRL<#jH@EuW(c`thz!}%7OYm~sq5=VkHYKLP)@2g* zxF5t9rxynr;J#%!%Re3u`6&Q~y13~Bj#oSn$NyI(dDeo(*7oe?3q@rQS(P`<*NEjOnO7D;4Ql|zdBF#Sa~&|)zzP0dDgyznnHGrscR|-?lML)78&Ya zTOm&?X!})@@0kDQ1%CB?$MciNpD)$52sr;MP^qhTx!tS#V`ay8w)m9%6xqV~)*Q8C z3yd2=e6~p@<8XyXnSOizlS{@tQGX=Ed%AC}`N60=1!fXe6aKWg(-?#pttuQJ^8YaQ zmQihf?Yb`%3dP;s-CYaC-5rX%yH_Yqg1fsDTvI6SgyMuiTig;T#R`=EPu_LTUT3{~ ztv$y6l#yp-WMn?$nKSpi=k>d;yOHr+GU(N}qm2w=|F@=As!TbR-#Ke{YXCEE$KurU zgijq4xtLiR&>j8fYoYOQ+QapUWnPL#(HGy!v}4*;GQIm$&fJ)q2v7WK&eGiw{%ruunrGa7IYVD}+|C;KjmlEU=0R)mqnaMq`dV zVgw_mwvLgDbm_^M`LeoJ_=n7U>qSw0HHgcPxEvg>sd)gA;twpEcC_~>zWpp4vLDff z7a7&fvOii3A2jWW@sCq*V_{>#q9HI8JwVH8o+6lj)5xu!M63mb<0>{X_`MKr>s5b@ zsKed+n+8}Q>E*pA0vTp*{N4!2^Yh$LCVG{U0=-sxoYWRo_T16srjv1XtIC|VFh!;1=1AF@{K0&0N|r;J0(|9u9HQh@ zt(!^8)2me<%;tTV$vsi&KA+VdV;jckgV+k+;y~NZeJA!qk+GTHBvWv=ikG9>%ub94 zBOs+sIbB<|GjE)^`m{!<)82#l&;-sM}X51B5^gx7_6uA}t zF+a9wfmc2cQllKlMbo)|b@bt_+Cm5O70+)AGnGlL3k~Lzx@3;_=O{)gQEH15pvTXH z!Z4=ee#XvxM-xF*cPrkYiH6QwODTFTinc@j-m4Cb<{7FU6JY#oe@3+j8BrX! zHFugi6^0YWn-+o5A0=M`8ONQcYp5mnQQ)5kJXy~7>zk>H-09amg1g5CgS`sF%7J&k|1@Zi~`rPg6$e_v@XJ~m83{cVs{_eNpZ2J zam`ufCh}j39DW~YGV-!80@l-9>t$+n1&19kg~BSU)9=Z=EV3~Gl$H+d^CfK>ZVt0|O7A9AB>sTYmxcX6vH=Opa!hFqFq)FBCjDSa`p-#%ZCuiQ)PZl~lOTZ+N=A}0NWz_q~@QL}<96IcJkJDWMZ;GELu8Wk+CF^@ET~_J4H+@3& zn_RrVsPG-T-35^0#`XRdpSneeh_P7i!}hZF2&PGq@gJ-_EF}-AT$D+H{YRlErMHG} z3%jJ)dLU03c_B{m;A8pq5RFSuUrcrHbmrSSJ!HsqTEqGq(1P+MwCeSr%M!=U zzPFbKLVw!nb4jCq=dR-XgQ95s{RYrgD!E@QATx;+-knfH1SmGg-!rXeynYSp@eja} zp##dLHr3=1?j-8q&Jp>cc9`NkTW;WIL?su!cIUH>WH>6B05R$p9$wahkiZ4r1GT{1C24%*k+lO03Jfkr)Kk&cPQ0@HtaGBr{({L z{-oLl_>KQoJtX-XaeS`$b9AxSRLg)?fjf>ImhNP$pXtwFwBHQ*{n^T?jnwFqHksy5 z)7q;QPPHILrS+gCS~57Jn{#jGdz9%No`d_egv;moh#! z<%no#%^>0qqO0pZ2&$;X+Si1x1eC;pRG2%Dl6NN>NSsb}*kbNCGYUjQ-03j+lV~W| zQf5?yr0CPpqqfrn6=!;NHh(#)iyD|NQptKXdiMIo6xCKnp7(%VsSei=Ah0J=dw~p^ zbhYV*Q9{6;#bHR;^%=>#o)){F7DtOLzTU|^cS>0qRpJ~vQJ=ARt;RZ$I44j@%_}t{ zQerA?HVSd@xOGyMYf%1>{Gm2-h0LXal2=l?OuC6NiA~~cG3*FQn#^t(aW6A zzX&+eB2Jq{D;hU?$;$heU)GOszuY8!bS-}lAuq<3roBJn+|CkH-5$<3sao|5#TQ0` z@1{s98Z?|f%DZaOw5|)QFI?DW1f6o1J|aNS$?%VXd?ijg-g?`2KB`HH0?H3vJZIxF!Mb4PVXl<*juBV` z|E;u77pP2ifvm91nZcPObHtOgfcz7_ZtcN%6`EZj4F7Jnm`Z2+CgSBmTnSFP%TQ|? zF?3})a}_U-sd4O{gErt7XC^7pq`1CC+6M_&hl_DeY#bR8!U|)Oj>dWv3oV}cBMp2+ z2j?%{8ERhtdR{HU#K!V=UWJ$WB{KmG8ZY(0hfZ^k?i&SMZb2`^2AIG_(CNPeUL`gR z?D5I+<&AGV*qdHNr?JH`aLlUfGVfWciV{e1r#3^rZv~__-$tlE(n?cz z`tMXA1Z#ocQ!AlEN@_G~w=tj_bK|^@WuYeb&JgekoKKqSTPN4MRQ%_{B-+n!hi>oc z&JbF})u-l7ZrK?0OU4BvOD3h+ygFx3gHoK?G@%{U2OSSmCMDEaGg^H?#=D`ZWpaL% zjn9(zUG#&NbMl`m=c!jP5)1}ue0aECIyKo$wPZ?I)ENE6mjDt(wSVe}lsUoZ`s(?( z&E`>|(X}PaKe_UPS|)@rjqD?|JP`<|tMXp#)r_0A5_?l)_a<_&_f?T|~mUthm00W;sO74>jO%FVZT>RU!-LS|$|`LkA}Ox^k# zwq;6=sS-&MWa-if?fPD0laUYT$Fs}<1mAqkGxrHKp!|$K?e0F}_$Z&xqr0j8#_DKx|M7uR{;d}nd= z<3s--hTR8H7=~`phrjjZPG_aS@Mc zK{WmVEyuo+;4wo=7$@6c3*pzounkJcJ4yb7TR{EBHVhZ#-229*XJJf>E+cR#X!G+BB)xXuznaJV&O%jv3mQ ziTw-HL6JVt>C2f4u1C8PD}huC}^9|+agCDl{1$Y%;sHf+cJ3)2N z$-7{BvwySa*sCZifcW3Pw2bXpOB!W|%*?2d6n~4QcGkC%To9AT)kdhKGq$KFs44Cq zBJ{w;oSE!fMvXi)bERCctqn@M0u?)+D{ z5lIcdNkI!ftC4T!N$!#1jb2L3{>$Kg_vTTKiiiB84X@`fK6ZTw*mz5mGDX0?J5L8= z>f6?@HeN%3s{Nid=9@S0n~!bqh~VQgxw`Z0?r4{0QTF6{>0r|&_~<#Sp`K{T46zhx zb_TNwAiz+~ZyH=Gb@JYMfDFN)(k)+s+wHpQ&D*|SU@7I;ifhjj&TL05k+Y>VG3>K7 zIgW3I)Z!hBLP}DN37T+B>@KA(psvz9)#CYZ&l8X|zP8L%&15$`m;pD@uUqC^n)Vc&n)FkRRnI%A)6dLR$qe4^*Xf5OpWN*mNMYyJ@vz9{OdM~JR^M&{}Q9r(X?6CAIyA2lX5VBe{#E638hW1#Alky z9oB(7`Sf6nt;;X)=37+P+5toljBV>dTx`Y0|Jb{;GyVKu5pJ$h5GX_l`Eu+}zj4#R zV&JIUXS?g7$md>VW|PPbVd0yIKbsVjN8O~|^@fLaGPhxgjx#OsaR&6-hY3p*TG!#oXF%d#ouO zJ+0qg+s*9PVfT5&Bq37-fds~@GP2=VaZ_sQGZ(h(wfQKhR1G(P(r{+Kukie^TJ#iA z#i#k*N&*uzWqq#|ox#0Cd+YJ@9 z2ZrOFa$NktYWItyMdwQHk_`QM5~z8i!O~ID=hoLI%u}%afw>wd$yhRRw<>W~t|{nu zhBGx@cZS5?8CSf`XpAC0{u3w4z`G;BhGx%1B04D}5Ssce zwq%<1g&b;IqH|8UH?_KnymO#&s7)Q|x+P`~Zpd=xuoT^Ib*)=LJ1?|~!m>i=?zecjHpT)C%#!?tmy0uB$9^(JO|uaC2QAJl{_ z;+(BE{bL9lo}*tCk7>!y5NB}3>f;s)s>jLT32>KpSiw9hwclnRM9|O5u zPg$ru>pH%KM=MSrPo;(Z+OF^VzIggmKE|=G_#c#{{ToNxwEaLuVk%W&Jr?$>r(*Xc zT)SVdtaau-_%em=5tQ_f+$px^kE>a(%qz~v2ybXRypha%WxqdPk8S0njMobjbzbs$ zb7EOJ($6cbksNPEV?@D8}zL|`FuE%_Pz};9^39Aq)Rs68F|rK(C$^3O3PZh zlDE9qQL_5BY{xpmk~tgAS->W`zwCvrb?pi5O0^xHlJ|F@F$zVAs~IP zO;0jUrDelVON$&+L@!=`d@O_~L{dIW6fCuUfcQ!=M#l8k@dYhXdCJGCm4;tN5FAYO>% zej&SWeJW_*!ing7t>{4XiYeyZf8RMWSQ?ZyKv0U^187{MZz zvu>{tqSdm72M6LLr`lwH9Ok^`c8=;WMH1E2=9n+`yz_k+J33wBzW0qSp^0v|1&A>$ z16F#2BPLle%-}IQ@A|EVk2sU_8qvL_vmf&WJrY@YeUU%$<-udOVhSX@vJ^|4yNiN z-R<8fcw3sEjtO7x(%a)=I``|@Jjte55ua@dvr6~T9)qx_@N0S-SmEtfuw_FNc6oND zwBQJkiiJCmRaZ#LMR~BeNuO1o)TJ(Wl+VmM%>MbKljIAnb@8Pn3tUpPp|5X|_+_wA&bvKnvWU+RT%=Z2Bh zm&O^NEH^VZ%=16mjecD%yiHWmIQe7X$iqvSCr2Ed)tvng3etV&mP=#G7?TvcX*6~w zE2JVR_RP*PnpOiYgRg&VO_ZnSJT5kag`1cPwHO^b%n~(Ev=jB@%W8?P-Y{ofehheb zv%;h$U2T>41DG&DC(cMwlAXSeIf7l*C?Ir<(f#%B0 zOt5V1M{}QUSZID4$RyUUsOx;{ZR5MPSu6J?2qp5i=*ckTuHw=MXIt3)Zhlut5 zmizP%%8cOayZ0i<35T4@|8|Yx-`@S-eZt<|bZv{~tWl8ClH0RPx<1>SI`X`11@tI1 zxw&`-k;?9}8;^K*`iUKoxtPW!>XqX3w>0YA8)^5}6-ZRHd}-kR)v>_;j(eAbm90R# zoLa1Pp2=^mhFggLF%;cJMDSR(*v%= zwQg*x5ji@m`3f^Sfc}*LsgKo_WO5p)xJNcVJ0kamkxF(sL6p8-rkdgFAxml=b(PEt zZIHui*0>ZEvxs4H0yXc{V`#hlYD7`HWBu=NIo9Z~o8N@q&83-Dq*HM(=Y7o(Lg^cb z$3hv;1RrvZh-LD9e&{46^_nq5yCYXxXMh$@&!AS|d=eKuR?(K!Gti^+$u-Swvh zjbK+>y`*8>1^cB3MNN=n%O@K-aVhX^#fmpe0A;eF-&z6&taC6Jg6Lwc zJc;ae>+wqsoc!-knDNfU1&T?XS$tjJ&8SwqTc)43Oo@I4IZb5OK8md?Xb#0*+`-edR1{Oj#eL;d z`dyZ~%D-*lP!~`9;wxVy2`)o7pRrESvdN3c?N7JFI8)27+|<@?mBH^;;&sfw1(=)b$f_f zQN-E`WN)>5?9Mi@fT>h(>%ZjvgW_ZcReQJCdA+EOU0zqlFm^9dq%gGc>-WR!_Tzt0 zM5c}o&wh1gUNWdzZMU-38_Q*=wZ-zpkJ!#+-n3tCh4P#wWdqH4$3KhJ8Qd22gQ&G~ zEyi4Mq`nEp0!>Ub_^=v=pdE^>U%90|sx}O2w6p8j_Nu2ZGbPr38Yl5fc0$Yd7;mc^ zV6Pn*pf*Sm<~17y+KTxcvKcG^j{b+S{;$CY)PS?T_Ye24CNz{Vkx-U~xH%G?S>^_T zO-c=;bqE-}^O%#&&FO1XYuBL_-oK4t)k3*mE=Y%T;|Q+Qpyzr) zwwdGEc;V3+n-Sa}EH>fw<(-7(8J-~s8~Lsc{zW>EHUTM@oR~_yJ9@p$`EhRM7{~#4 z%s86+!9n|Q>u9oI6k}$l8jo-)kliE&^SD%8%Vb*=dtv!hlJgKfWX(C{aNlUM^)7@(JUP)zSmRSlBmFE2=K6Lx8p|mgYLBR%KxG#*MiXC9U7xecGik z9WB6%QJ;#`7h;JJHGog-=wnE~10qOjZ<`Qxin42U{KQSx07bmzC{&xdcKoSP6??l} zomEx9LE|IIezB=QezwDW{uEFh$h-#LJ)rDUflV@Y?& zGQO*jeD_9n)PMC?x!k7yM*$}Fhr7|OwsMZEQoW}3tg#V%R_M-N+7AmR{TIt5x?=6> zeUCfk3E36Z;L67pKYqz2-5iMbcMa7NUaRuNt1i@j(0o*#;#(4FyBF)B7#IBD>@g80>Z8 zCn(|S&@Dq$_s8%RhZd75AhZCP17J%B@sbhmx?}%MB;dD1QZ2;Aqi(I?y zSc3{_i{Rl%Dl?=>ySjPFTR@DvcsrTan!vyyoYeANvG>qVF88gJUa^u$VtXDk4sym1 zGMapM@$Yc++fUt_P>VnsRa8#)15<|Ba9nVgfYR$6n*hHm2m8ZoVo-IYDWTA=3T_}u z-IP3JtSPJ+P<|YURYnpnksr6U20S{pBz`=(GU%`;# z%$BUU8m|v)botkIR%*Ol=r*xmW#6lIC|u&(AGj4J39-dotsj1@$jM&eEzVMovAAC# zIokT4*zU}1YT5PYdPR|{cJ8#C57s{OcW6gMH=%W5elA1e5|OM zg#CWHCEX_AK<12eZ0VN{N@PZA&4%KH({Xf+xv_DC&N+pA(7+$(uF~TEU{-yzHFg1cB$lQF3owOeOwfvf@g9n^-p4`$5`c=9 zE=;9TJ`(oc36uK-Sy%LkKd_F2!H6@XWt0zi5u1(GcbCV7|FKYVkD^oYTbs; zL9&2w5OZ_ein_}s?XGb&AzO}cVYrnJiIuc_PLPI9jvkWM78y!v>QIM+3K7TDTpe8A zLw%D_4cTMqUBKV}cBlViizfBZvvm%FdH0BIAlHE&B-V&T_rf7CS9u|T^pO0?^f+ts5bBb$O234^O+wM4r!g5C zJ+ehT49}>6u`QNFhufKu19~Cl4-~m(o3TK$f*ua*L2*S4-Q!%^ zui(BecMchkPxX8BY)^2?me9r7I-6|fjp#1G#tf*5(z1W%^a5_={A=g7g+X0W`U%C^ zR-q`<0K|}yKI62^%u`xqE)saY3vV24W4)A3&t5o;?d(UZWAH{a*BZJlu{%K9>^4%c zV~nEB>h4gGS-u@^pZp^W=1@UXQkLsGf@c+t0>&(^y%-(CVahlwa&{ zvYNJO^U6k6gNJ>q)yGd;ta+>}KQj3kw75@=-1EW0vxe?ZM63HP94|ywc8<^CKqGTU zWZlr#QC&@R86x%i$0rYq2}O@}*XcQfI5G@s5v?!ii-{ZQt&F}L?|K>;@3E>ou^M+*yM51K9D176Bny>EA zF+HN{QuT{ah2{kAZ+Do`4Ca8-OBksX^?P+BHPmiG#IKJjQ*4GV*@Ra5mO<>*M(O6Q z`zoyM)YVc<3#Ww#j8jsk>^Ueo@1Y)>w%5Of~iA>;PK0 z#oz^avhp10$=>Iyg)L&Xo`De_$(W}(?J*vx6L@Tjpy2>De&)7#>>D#+9Pf^D8rxZR z;QBO`!Ww_0&GznV#F2Xghpq2p=&5kO4Vm4%sp zgAza${e7-7ZYiUD)0KBj1?#1yJ-zi}4OVy8Yf??Wh}?Kc7YyZy!}g_(4BT1n$6tBc znd_^>3%sB$OEEr=<$;PN&1C+oy~@A-djD(Bj*1N2 zo%-e7#FLg>ExFSgPE}ZA)^7Wa^IX1PrcwTA#&)MBHaOh$iHUC&CuhjCr*tmJ1bW{k zK_Iqed~N)e?SfFuy07~sVe+E)x_5#%Zj#O-A5}_0P(?;PiF*6EP)9R~7oK5SkvB-_ z@>{pphIjW`bkF3*|4^SC1e8TDId%*NZy8yuJ5kWv_nrl_Wsf2wXG8kayuP&=E)y~3 zbbdc*=P)|Rh^xdyQ-^n@h0Tyx=BcRaW(Kl^HyMwqqP zG@itre!-?CXgeZu%0@8Y9~5`p&a|$`$^qV$SLI^jwgSWk0BG@lwLDu6eeLx4u%W7$ zXlv$dr6-;#V;tp4E_x(G9LWAI=G{XGQ4hPiXf$I)%xf_JSD8={k=kq{M`IXd6VUo> z{D>~0lM}kxW?2I`9<5jA%}e3}xK4dmQT3a+vElndx~}F-Y-YXhbPq!comK?ij8Jsy z_dPZF^r#~c(3Fl{zUt!10U|NH+R7fP6Fw6ol|;$=^Z3^NrRgRI<`->&KQG?7SM71j zM#JlK5(C@ZN@uN%mQT;bZvzdry7#(r_N&WYa^zF|ZfMajleSbT9`m`;oi^bue_2?Dn@i^{u>r_()ipF<2HScl=t=50(EWxu>{v-0yQuIR7TO?z* zT{h2KG5piShu!I1T2;h8PhA%s(RenSuRwvSPIh`Gwi%%ytx4Zg7`p{AFAgaFz|#(3w3+Zky-6 z`>Ea8EI`C}HWG~U>7_CJu-On>NcC_FC?ejTugbzbW3XQFMwYfKHk}k2OYfA^mdvWt zSk$5bMETh>HhnULD9|D3^kz1XK8)xS^OF#c#^LqA1U^s<{>+%hK!1_2Yxjy#OQLJg zfoc#sAeT8J^J=IlUX|l;!m;8C0~XA!?%&D9_$k*fp}DozsMwJ!1VVy*-T^H%?M3^N zi@Rm~Oi7ND@AY>7e4hOf(HY^RKGRFO(sxtdy^R1RN#{+kevH3woy`&N+55u_2u-%it)tb!XC8+3r^T`-5aBH%A#{>Cs-7UDzNMoTVH8baGjn;!>LOhVfowH5;#3VKvm#B4fSu5Q zPtwYZK27)SdA3cyZVc8k8k$?{VP_TKI?J&}@N#y}#(QZ#v#C2yE0KVMp4QgI;#R9=UN_5{LGBfn1%oAv{qM0!tQw+*{67fkxrqA8xHXIp zXwSCjTW#MiH=M1REYrBn6YKO(UQPd;qw#0;$(%gzlMzHsh(Wd!*L!Rh^p;2de%%fk z^fW@b3q|4q)@K6f1n|0BBj18qO>Ad)9bg}}_~ojuY!)JwJm2F5lClQLnM&JPjD5^L zfD9v~4u0R2;!giy|pn}W8&Gq&l@;YhW@;29&ZRCg%mac zi|hgEI|Y|{wvX|C>M08}U4p-cgEAbv@y=h{BMJ{Z<)&ZL#k;t6t2m1SaAgatilqW+ z0@RG)@!XPCOAAx^_=k6c4bc|S;{1D@v8~{$H!X|-wM5>Oc?;C*!o9kIJz~Gv+dZWu zqvGbC1Iy;@KCIDm07GFxVjDWw%}fJlmnJIZ0Yz3lLrpgBCvj3Fls~$5=|Q)r?TXE4 zYyi)T_J9y(-L5VRG6^UvB?z6G6+mi&!GgZ)nnJybDT!Qgbho#3Pp9(D~6XFOmaHnm;FSON8)h z9k40II|Xjw5Wo*9*E{I~LIW;Zo%mKo1X>6PzXpZ2iWOJ0Oo-WKKf&~@=oN&fh784@ z{ea)?`%bk3wAgpAUJbq{OMAn_s4`JWB!r8ix?e%fI;^fgMsu?E;SP^z!+6ODihhHv zPXLHsz~vCkw+H5`qy0>Hu3M>U*Go`yqorQ*ppP#8v318)#VYwc*@LQLn13EgwnU5FKRoaZ!ETf909q;xBpICdVutT|3iRHX&+&P5F zE}*q2h{%2!5>mdoLF$V+7A)GIVWZ(lmFKgL+fH4;n_nlMgsG^MK0BTOy~uA0+gL8N zGQ#4sOASDjp!A{XoVIjQU@;Au{1zX}J29BTvYLH6vjWXAmRBa5Fo^@R5qlLcBIm0I z_=e_UD9IT@p@c1A+K+o9z3VB6{dNfcG z9GSH49aepWYt^aEYbn=%4Ecyw(Su-@hHK3>rtthq%$ovvBeAiGyZpO-SsuxGlhtu(R;Dx0SEaEX%)KpA>Lz;PipxH|oW>%2X zL)Vw*Ki4n(qGDTvt6v|P78Tu5 zo;Cs{Hl}M6l{L>v+?!0*2{RA#CI;5;)vC_l4eKgcUPrzYC66X(;@KLd-!{>`Gobcm zNL8Fxu0fgx0xGA6pSWc>x1rq@@&MFY=Mn0`Nz1Yk_Wkdv4LGAhFwTRmMZ2+1u@m$D zC;G|4Az_D!TM8=nxS^Eh->ar`ofW~@*VucxrRG)~6dfK%^BA%Id_l1~*_NPU>{Tb;KO+L*wm5N1#&trf}lb z1EzxnT>|d8oww73IjVit!h5BnFJwa{S-#f%Rk6HbR8ig8e?zL36RqeSQTxEaJND=AAru5Dn`HX??4Hr54R`vCd zZA48mz;?TG5dX<+~F5Tr*@v3qTyC#X>};(70> zdfL3=M4Hn&E57PwXP6?)W%i~;+OnZ$y--ZEo4hTZM{|Leh&$KLC7+3l5;L43aZlnW z*j0G7vgfg--uYibe0JeE?B*Ue2eObD;vX7JJYDWH{d9?kebW+NR-t*dmJC8?#EQ12 z&$S&B#``A@dj=VJO_6+F;Gk^bmvx-1 zp12hOUGLq1NxEvpN_=ja7q%NII4H63o8JqNprF#OH$h+4AU-B}&Q6s`0ESPC+&&Sd zijVM{i2kz)gEz+2z{whFj(QvU4jHM?jh#b8koClS>otCBb!pqR0+Fch^f*D4%;rq^ z#2-3scoB`k?>z2js2RRs;~HaKAFHF2*UOy~;rsr>cWu4ZHfK0@qUxsHygx7$M|nu< zw;}~6Q4lJkMIHQO>5t3xPMkw6CufBd^raQH)+M!{>ov?|4_ndbezb^!OW|!(9bliZ zvPRrJA(p=VR8OfcN8}j*<^R69`_Bs*_uVpYZqA>x29v^O9$~m&%@L_PU!k6qyoE2m z8|{TC{%)`!XDvuHPT3Ym??~>j<+seS$&_4jf zA-z3CkEZ{^QddxyDY$&#IP+bQO_2^K8i86-nvTR8a0%gt%zp_w_|Gk8#@(=L|H3~g z8wtcR@p|}eD^!6MR>mx@woED83ma*ZCPgyYiqDh3%cPnox@mo6d%vg&vCR{*0LjO; zpBi2+Fvw(kFrQRZg*Rm7xmeXym-c`b9r#x3g^78GOAT}?;<9pHPb2}H=pU5~ZFk!w z@L8o#B#g7y0N?%-hM0e6ZlYjd!8INg`D5vt&G4@K?)Lsuf>dZ-ciq#UNcDZlm&^8> zKYymBM%hPRRH@?x0}dP5bvKb219HvfS1)N=0FmY}TPYo#_B2U|>|aTUe6geKVuYuo z)QN*v*Pw2FZ``-(584`ziT~Ux)F_Rt#O=U(}0Tt@LWfd`?V~Ca9oqU$4 zz8kgnvc#(_&6Ok{UKmEpiv}pFjL%qf{4mlkUi~Vy1-(c5lW9EbA=-p77M>gWDPflZPvDChXWT6ZS*y04#*09`K3rnn*QldmH;*^lDS-z-=! zJPgH^uwS_;&zttrY=k9sPvE=VDCOUsO{M?VlYR|N!4SAqhd3w!Nq`eS(srMA%t)_> zkd`}*=!b|F)L zYYoBTF1~N6%q0^-_v;kcj!6VAFG~QZN}VhRnrSyAZ5j2c?X-6qu_Zr}O0k8DvObt( zj0)$EgT7>#8mV$PE=h-H&SSj<&d9}s4ve&nyH2a4%$bNiq%avoXa>E%6-~RU%iuYL5K6)wAsMmnox%)%i zzA1XGKgMfRw099n3xu;LQxhrII%TT3<8m_rQ_OOefuo5vE((bqtS(}22AG1U1Tu10 z&QaGzQ^=TD>B}#c?*@NMeOj(*QmtX0XbhJM#I3w?Xg^U*qMDe2-hmrL|NLHXR)VN9 zk8SdKe@-3dU;Mn>i%v`k(IU)Iq_tCUmtC*z>;l#w&cBghT&-GPCe&wY;#@jN;VYUl z%B}o8hoMpIGNdxN;q;{2HebEY@YcOx{A;EQTV~-eVGm^!cHT*LW$@B;-V-ZbRYDsEdWY!=L|+&(qt(giFwvxH}# z0RUL;)LEd11WRh5ZZh6@L@pG%EsdK~j5U2@_Y(#I(}p^tCi{i^Ls z_8^URKU^&H?#pGNJy*QZ?=`y%|6CJqHv<>Gqve~t)Od?5Q+$9sGS4I-p8|}r1;3s1 z4rxnIhz`BP^=RtTQ+(v2e7|p!nU&WLb_|MH(*udSZ84jo>B(CR^+QOZO9@!~Jp*Tj z)NCc$2X5nhNFLcVOEqow$FpQpz{(a{J+6;P+ zi4ZoTw~Z!jwfT13k96LQxR4s7JE*R<~ECo4D5wonRp+V~piVCTDf&z)7LTIjGPbggiL&1UQx< zeZQ!ZwCx172qRI&(Ye{ANjW!XINP%*e}qdSZ$~n`&m9dz6*}ZKzZrMGV;^T#TXctw z=m45&MRyh-envWu&z@dTD8#*8R?S1u@%t*=JqmIWeME$O?0C!MuDa7a`{Wa1F1$r3kc*cXtiS1X;h(!?~^9Z?dMzAFzI=?#3*rE`t;Ed$={qU((q4M-sd?PSkw(bR#=l((Z(3FeF>OX>1BPqMIQ(`C`k{QdIvSKqL@ zc4b!mLHhJeNwt4p(&IO32O);^OK`V@EFmf(Ld5G)jQH~Rpcg??Lm zQd#4!>q%$-38QK^;9!XjWNqe?^xQ$&61~}Ff9iD>Z#j%8AJ|Q)E`9LOj*PWcb5_g= zr_PI$ps{<4_J@xOm5#{%}iI6djw&qdRG)m zx-8FE=W$kisHJ80p-RG~oUY^yy+}NtecaSxsF&OJkd-lMnQu?*MrV_V*WZt^axt+h zM?&DVCe9`nvJV?_XpTw(%{TX->14`6*3{oBi6ez~%#? ztt4vf!MQ$Anef!~7^K{U-;L%73|0Kd(J{kz5VII8lYEytjfp~6>G^qw!X=H6fC@k2 z4ZC7+QDvZ(X(R!{EZlKFe|qG(@xOwWtKn7`>Z$A4THG1k(2el5oR^3+Y=o0g2_1w*(oi+c+< z;VV`&vgjxMdfV8!nZ?>iiAtWJ8=jEgT3a_i22(Og1e>kHmtSc-dSZR0hu*VoxK;4q3QdZgoB?B*KB@XEW9Giz`Ft$DQpcA%|P z-tR0+VH;dB5CbJwYA{Si_&XM&=YcG8c)coob#4vksJjh>{T{BAbG+7~KYXP{ZjRS< z#NX>kOh+j}EBmt%-@xY|6lFDPJJ58O70f_cun~Lqe{@bvfpQPLw!3YdFIXk<K766Tx{6kGY=C=Z!q)6FkXCfCBTG4(ENglBOHK$>B1 z^hWa>XnTgW2Fahxs|MP=^z5pOyG+R0x~cE)3-A)8cR2DLF7G=(SH)61|3AIGbx>Sg zv?mI|-QAsF!CivWNRZID(=^(+1PG7-!5tcR4X%wtaCdJsc<=zh1K)7(yf<%d)t!5% z>du?LPS>eZed_FU_S$=`^^;MJ`yuaacj5Qk>aytC&u-mZiSC;-;|}3T%B9xO&SnPe zhCKbVe!RN9#6t=PD>6FY@V!+MJ^ZR4A4mBD+6W3fG2`I&%>N(i+%_#WEZfhqEB)rK zq3$7tF%iWYZrgqdPbJ7;Udqc_B08D~>yKBoB}}iImss*Hrl4TD&zPA{)xP9MosG32 z2zE&)?DxL^Gv4`7h+;O-<&Wel%{az8-J<+ZqY z9Q^bY+GKp3+Pb_bLw%I#1H&Br^@F#1qrWcWf+GPF3HLPmANTVE2c z*iduExC3)kHbzEaB8;9EAmWpdAww(^CdLDOPm&S4PYBZ_+qcI5T(^A~2~*EckX$ivXl#RO+7=5}X;xvhs7I z#WYvpD6XlwU84KFx0TR9whIGkS0josRDYfWY?1*!wnEoCMB;8 zr=ig`5aIHU^}RAKE1_sY1?D4u1v)+UReF56Oex(Eqb%)+7U0MGlQwa@uFx6p80bAr zRyvXS#I>C(ak#*QhXx*?yZDr-#?@;})%ek~`C06k;K+*?JKI}(#Tf=*I)bX~SQI-T zvr`Dz$!Ha_3XvKZO*FAb3PBkYu^Gbce*l#;xx=YWO9Kqu#(u<38ieW1zN#Iq^ItYL zBGoZ`%?@eR!hPw$dFD^s2~okWY9FerDkgUJ@_nnL!l8f=jKK_P9?0qX@M4JV7xHMU z8j=^U(!seD@{=XD#r7@g=Rcq?zMP7Ng;Gilh~MymiDAdQ&Kq&R2xR;G1!PIr>{E2+ zHT=Hk1q5)H+l3+M(|kGJ@Wr;+?jNv%Mz_{k+I9G5JD<&~q%Z`3f{#}xFWX`Xku_Ah z)a2elns0rN`*YlQcW_!}3lgoPVH%zrtSech(`%*sbdKw-vt;%hxs@en7w1dMCU47N zEsf1mRS)sIF1m*sk5qsQ6p*2J{Uk7+v9qJIH5HyAk1#xXeaLX1Ki5hg2dlhQt86k} zLN`>#AM|Gt`*shEN-FBP+?Blc3;F=OVDl`H$|&FV(Axl>05#l(tnaeBd7P;!FPeF* zTzOX`Do|Bcao7V%FeBwOyOY#zL75UG>d4s9xCd3oJJ+{be(G; zTY|SL3Cfi6)NbWqi%4R-kYoXx7BHAO7g}U5Bq$|Z165Gr)K${uZ}l}TPCQ5jkaLv# zvLtS_R^)RxaD2ynP0IP&lK-udR9sitF_dKe*!k5^kQ8?af}CI}qT=uGG$u=&&^T*? zo*@U~z6{p3cV*Gj)(#7+$m~ATm(OFp7keY0t5PHS=T9%fJ(Jrl_2sq2Y;O5Wu8$4Y z(#oBs-OH4AKw-0o%FWgCY2(|9W|)Pq-6h-@Rag#Rfjl`~2>*>VfJUX_(~FIbCso<7 zUhq2mV0giYEKgyIMXzFSg;EQLhcl8mp@cQf`j24j@Jwcw%q_|#Bojpg=Q`EYLSkje zX4DrYku7}}-b8!*4DCGvu59I-4_>F)2laP*S>RX z`ZErNL6{6SL?+#sWoRCPV5!2>5Hdrd-=z9!bw`rMT5f zt(-XbCGD$cmB+?GO*&1|L*lkHAK2mMHuVEuiT-sMtoXQV3;(QNGk}(wrGMHt z4VlFI#FT8=?6CCN^C>#Qs~(*JRd4})C4*XyhoiS)fB@NvxL{yH8#P(mgEV;(YR}^N zGfv2j&5)$7#!ys4du6ed`GVS7$Z-se@VJW?&}PH1I*Ig#$uO2Nx)XWUji70MxPtqoHZ~o4lg<3j8S6b~B(sG=`y42cM0g|6|L;JCx>m~I2K_r_nV`J~IA<&mPoJwGiDUPd5OcvdJwWKrCwuyFz>0&C_C;=Z8DhALmB`-We+B~&4y{*E3`PX(aBH%G+!*$fc}(O>gy*sua;JLL ziOFn6;(TqjHlNit2FTJ`7X`nS`pc>mk86~@-h5}Pd?CbWXsav{Pvi;3U-cl_m^-RP zYT@S*042Cc(Z*5v;GB!amKU+M#VX8Dxf0H7B#C9x_CcE6$i>W|TSdRt%f4a3w}Gj| zA?c*l6r#)VEzgcm0X)s5GGGGX0`-C5u|*AEv}T)yo7Ob=v#OW5QayQ(#&25wWz_GD z7-8{~JjMvd6skIv4^{R#xtEj4HiWc_=%HTgx~NP9J35DMV{pAMo2^dUgLbU+Ka29* zwHj)pU=BCbbb$r467xd)NC}Nqz#Ee&F^+H8bEh5iN%cZR!Uu9FI!of-Jk9E`X#Odv zsm0up+suIR@wzYH1)6}}EC$~&`J`F3GO5v?y`QtHPt|0OXO~$*N>#?OQ1j(5As65_ z@Ss#xzQp0!x`56e`S1hdvDytm&YQo7j9DGVyxK~Bw+Y#(LmI=ETmCGRk^~^%c@M7KU zoTF$dLSnkfz=3@6?WS*00>PIJ zh8J#s+4nIS%!n4#Y1#esWZ0-fhEk}24=Qq7t`DOE&fX<0hRnz5Q_J=T$5$-5u?OM}t402X(cpisMyT0lG$L6ro-U||!67Yt0Lq4y^vVbvrsKguys?!7@!m8Si z!?~l$42IOsip{Yw9_~5yRo2k(-5A^43*Cxa@plXTvze3cW5#_II+{utK`5QM z>Uq6dcsO4(1AA8-j*ehIf8tdg=pOMm@a7w2@+>RGMs&F*2e@T@8va2|Q#JCKvfcO} z$85Pv7j=Kz<*S92Zls@L1k^tYBkCYBm2S%5jVuF2t{=Mc%67-sb}7Ec%()?@q9}tp zxk1H&P#O+00;|5&MskI5;j~~E8upS{97m21e2y|sy$z0GY#^)@0(5JMT=Ju~s_mC9 zgO_gEFc*L4^^KZRfBedlvLG_g)r&yo#*w*1VE3`G7q7mb^$K3lFsJMMBf2IT9pq!gEi%URLi$L4U*+KrtzjzC;|7A|Se0dE zFNqqQDr%1!s;p)s;+}Ass+=QqHwg`Da2fsUJokEgi)kHB>{-I@j^~oDz&`a4gb5@f zxkH=&tJUVjsm&R!?{P+opZw*-IpwMeoYE~Cn&ldZeDc%mS{q8Tq*ua^>|){u%1`h1 zt3b@U<8Q6evk<*u39Ph0j06RyeV@Ag-F=)L?~vvR<6o^JOZbsYJuDve1!^$v95csP z7dncfaIfQTTG&bV#VhpZiMa5|AY^h~uiWx9(Th@#^(e;)zSpa+i=$824xfTL&yo-X zr4CS5Y|PB6@*SIanWcJQ6gdutYIqQz#1xY7mm3|qnM5QL26j1Yr)c{sKq;=={l!99 zr}O~#oC*UI*dbJoO%e8{^J)-gnb*bG?il=o#8u_OTfJD^(7n~b?;ehFn0%O6QrLse zrE9DNoQDfU{S;%S6Ey`ETNYaH#U~r;O^>%Zc8?tDpUpd<`QhIgHe<-F<|b45F!C~q zR9|K|?3+($<%t8UtMAh88n_D^Qb(m849Ytg>>I~==4|rGa8WtgTX!icKFmQ+lV+>W z(n?-l-klTanZB3F#gNwS&>r!;B(DH zUMXFv1YhWEnz+TpcKTS`qHcz?G1C{SJzHxO0Fg3gFxN5m&O+6g2Qi`S z-ei4F=D#F`R$xroSlFt9V9ilUbH?{$FapO`;^sv9h+soWLeJtu1Lrhbt+k`#7krca z(wNGY23wL5D|Tu3ZD6si)xQp#()74Eq*?pmU%ok$t(z1oO_&BeN~5e3J((M!rfyK!tzD-jan}TYkpyO_5WNG(Z@CKc?fK-eO-)I zUK;1*hPU3V$aVGOPD)FwUdW#@x0yFjgDxivl}YcSX|?UPhLvwe{z32|(0ZfsV?#sG z`HtKpj%liD7H_9Xon@VjV@Q0KAx!m%A7DQS53p6>avp7v^_~6m*6Xn& zV8lP~)i9`KcY1z0uZ(51p$me$=qS_>fKszIvg*XTAQiI3uI#WSrlF7;xB6_CZ#v*Y zR&4yPLZan9_;=scP&;H-F(j4th(yG>UG*$5Ft2i5hqhthOl@WRij?LN{4ubSLiErp zxnhU3H;u4jrwDgL86-^E*Ii39*e7EdUPCmep7M2!+$kglF8n*~7-h{Ydb7JoOXIkE z0$XU#-@3kT{BbME9<@0{utxeS|LxrKFv1QuN9~YGUNyLfo{#Xz838dYepD@jz9NeQ zKJOja6Z&}TrywcY;riSYYtsN=j@G4G)GzWRl$8>iB~H{7vI3MQ+3%VX&1-ngYX}Xs zziRUA5+EekA?MZMOubJ)-TNXv=S?@~zJ0%MC~#Bzr#q;oL-;JzMf~h9P1;xEGok6f zwqD2N!`Fw@=hS|qeqdox<&A_bIvm;aE~|7?%Cf^h=kqQtA>|TfJ;~j5cHmwJkofY@ z#Tq{35tWnC<*XYd~cXTw_R$Q=KRa1DQ>hnc`mZ@mH-a}1HG zJO0RNk8|?&-oJrCbBCXFe{<$m@Z>I6slVtl>#jj4%FPT)yOvVq3kdWVPLy8D0zH#M}pQY(>Z zzM}>n*YJ@7uwKZ9j#hU+RdE^?Q%uNL0`m;{I%272wlDUWfah@1Oq+cHbk2+mlRV3; zHi6VasT>Z^PBp~DnHgO?>if}61GC+RD2*&NvZ|*$O%jL>=!axtFR9%$1t?Xs7mH@x zqXBJ`F>E`kkjl{O*3wQ~RYEfZnON>N6+yS?HvSn&;@jtjC3LlpJ3PBVmTas}@|Wl* z34q8DvtW+Bh$VW3@v_ymeOWb8q+|;H@|HkUFbW!Ku>T zD*c-+VQ6FQVhnYqbf?L!7({E1$nR95lHhC|k5`?;QmwQq_NKlTKM~oRDCSr=NEqak zKrp=fMruX)mp37R*dJ&9xbSbky>n4h&AXMtk?is(2J`$F_noRzxhA;E%4Dz>kJ5nWD2Nk|I<+?et5#uKa}J-x1#+lzoQu zl}=^7YnH^)*kWQ6Tn_|X?x`Jo)JKgPjjvnlfP#48vLgRy)kp6+8?W2X#OdDZd3*Pl z*$HRd3)hhP-J?85o5p63`?Ksrr$ezR-;u65tr9G|yTUNh8bilKO=bszDrNkdUvsBu zBC*95HGzx`Bv!pz?<{V`0#C=Qv{QXq%7Aax%*3eMmy^JCarN$O!K(G;AUXuQ2X^c! zqv_YiqMQ2J{r?i(_unhJ{}KN0fAflz-D7@b_B*n9Ct;B(Y2R8;fgH*Ep?eHfWnAAW z8JYUjSEyUY<4e4>o7;LZ-N#t7vduhI{mD7F(qyhrLB}&}I)*t?x*N$NDl5=wcSldn zoGvb)>i)pC3@Nu@CNwDR&s0YL!{!qj(PP{&Z5L2v{TK|@h^>!;ij9Pwpv?=YlYeEU+F^7P9s9Jt!PFX%nw zzbSSKMlMIVWPa7mRAK%^Plx7W8Xt3+lt2M6dylW0^xgt@A3&?LJG478R&BHP#1$-5 zJWkX;DQjr<6N~3t+&3(Z&I2E+P$ls1v2HR^l+T8<{K-hzD(#dQe>qIK-WRBMj<;@_ z#{`j)UB=y&`qQHk=vGD73bpHu`f80E5()gC|Gm=B&8Z`>eX8-bN`gYfp{MX&rcj*S zInY+l1VG48km}~@+~i2j;7I)oL$N1Kh$XakLE z`kWEx>5%D$ES-pd58C;4>7m5x%LG>h#RiXZFbHt1$y0^_L^<~KSUwqDsVJL_$|B2I zxkvYj8e9e(%I!5+5@#(0!R*B3d!_`d7zw$~Pf;4V-F1p8DQ_6Ww7r?<+%V=86vy?PdtOgP zeKnL)rJ2FUmn?FR z;?6?s2mY9~8Nwwh|BK-oa|alUhmtaJ0U;+zcn=dA!;$w@gyV$in!cV8Sq9rN&Sx{! z*+lA3$#L{>=Ap9l>Ux~1{i_UM9N8&LL&f8vOrCgU_sUd|wA)m87TTPB4E3|ZQH%Xn@p@YdSrr;lU_LhvWyS&PF>CR|a0Mp%Dk3$u}auwO@>ws<{a8A1nh+UJ}UKRH@H+KNF< zf~IpT6`x}DGLC%xah|F@@_A_2t^2{ITqj;<{`}}Fsou0w%8(pq1h8wjQoGV?0F_}~ zIrU!SXpFPheHN2a`o4Uu)-p494ep~wqoxgAS6zN?rDYOOrFDEoDnv^iY)4}wys@;?aSQdGvPd8!(O zo%1H;3|eLs_Tx;P1eXXm1NC)ivu$sgtOsa%uD$YXqzthqsrhtT@T~|x)hJRnT>MT- zmC$)h|6s<2h>U94DBN=(!Q{c^5Ta2i%T&64-qB^w!jwW7O%HDlR;e1)i$hgl#gsDb zT?obX-T+S!fmtqzRN4Z68(XmQ@|&99e4VFwkju^DJRF`!E_u$gkC!Qu5UMdd96q>? z4N8OmU0)7;+&;-%rm_k3nHvd{;apk7l7Sg~6dm$7eKP8d}TpBd&k>eB2FamY^=~9&hi#*T9UC{L3BKJ?z$e7+TQj z7aMUU87xUtTdo$)!4mruR#W~eFr&X^Y^L21*))DX=3kX$SSq+^OBv(E=UZvI`Yg$hsF^z!IKbbjG1-?UTZhp9^ zFOq-pi<~9@-d=sC3+Iq&c&WBPnE$Tivz6q4384K<>(PF87=(%U^$^mV+3C)SauQ!t z^qjn2>BYqKUpMQ-Zpc5{{KympaS_D$&Wrb4RhPbBiweq!C0wbIn# zITge8)pgz0VIA9BI-4#F;nkdvKj48a@$}99V#kDIPr_p4u^_tSaOy<-l+oL#VJjJGBiANhR&(n0rf_I07 zP93JnWbKwBMoN&U(vhZuIFe2fURrQKVdPnZ8tWlYXAShqjhx?+;^I1K6>#mk zIc>yZMc;m68RnZloxJV?JURyXiHOS`JUj2+YD zSm9{W(;Fg(Lruy3CTTk7`KB|>VtPa#&lA|FK`x5c)UBzzkc{vHlVr4f>^2Q+(K-)w+xhlHon zhuIF3!dCI=Z%=;|3RMkU)SRPy{G-Jl`uGyVcn&Fl#M{R z9IbVLW!nyhLJ0A&soyDg@tiASL zWe9U5Wad?5o*R)X)@A}Q)&Ieaz~Ym!K;9QC4eNvmUJYZbi1^xg#R<|mb|h4wO(Y>a z?8jbCOYpzUJa%ubJ$-k?ZFmgm%PBupj&Jf=^M6jFcsy!|5Ou*V%HVF!7y>$m7Oxf_ zO4t|e&&Xp+E>1q?t-R%oja=U>lg_`Mb1c6wYxrVP)qa8JmeaVefgksg?|%0d-6c=_ zuY?$-on30cX8G>D$%dW3*zx#S?H|`NDnFs4qMH8U-}x+6HCxhJ@wsaUi9xHHDl;t7 zNg^O~-2&`X#Al*YXjV|_eBKYISKlVkMC94YjZfA*)>8U(@7C&RQ?`7IL73K==!~Fb|2X8J)x^&B{M%68FnswS@ zM-ShtW)lmJKcybJ-_F&+n<>wBG|iG(tR_HD%t`Fue}=?V6aro;pVlM-tbZO(xc;)v zK4}3v+;I$*qPygFPnT^V#42J|XJ}{HlD2})F?d^^o6XKA7{!yMD~80kedM=paSp+o z3DUVBRlJ0l{O*CVkki86edr<-@_vuWjH3p)9;InbRIn<>mVH{9POpeo)1(0;(lGT} z8x|Ob<7v6Bo6nA4qJ2&MN%eO*y*L1^V7Fcxw|~k4t(hn1U8DWuh{nhPWAU&{;_k)x zaey*+&y|R@z~Tkeoq>s;;CnzSU#Ag+LmyuB>Y8}@;gHoQ_eI3 z@BCD<6{ud$4W+s!&^^nk+A3DJ=B5U)uh-t-$j0d8sLuHUX z%0$eY8zYj7d(CGT5E~fmoSs1YP#6XK7vUcr_t`ZYpz}To(JLcaGr)T)7F<%{-XwgY zsRxpf)MH4ZAbyTX;;4JJ7HMBP-iWj<17m5mZm`q}dMNdu>BnYhu6RqoS|f+p{}bDToi3hMlnv#e2NC$+AL(#qkYp(?>2w8@|!y3Vs_~@0JmkLl5Wnf7MpmUTS zCnl+?rHRX$HWbZaT|AidKUQlA7e4-jV6ZHzJ>G9!6hV=d6;vpmn!IuaBTimPDpF<4 zG%#t30-4O}N4>2)7{dAO*4~8na*yoDVGWULjT6Ra2N?{gUqV;?rBY$v96^5 zj1HvMjoW?acE^lzWp3yNIWJkTMYYx7!7vgBGJ)|Mlm_)CDq9IPc?KUurAnOF{SgMU zJ@E5J1;cyy8=%zUwqFQ#26-=2c!1Vz;36e8j^s&tEr@segrGxByQq-_<(QIcer7DW zs6%_Zm@2X8S@sh)UV@$Qk53z++!$JE)~o8&@l2K7?d`1@Vw%=iO6sS*+o5Z$S++ml zki4f{!}Er}S#^5VG)4;*5oi>U+`iAp+Lls6b$u?EzU47ns5I z7o+J=(_T%)h@HvHU?9LkvJRn^6LlAgW%=ra+DkpC|7DJfBo@G`b6dN2n~l~iRLP!V z^yBT7p%vwRNlD$d(Swl38lCib)JCxIAW&NrK) zKB%=>s5K&f=+rZ;IYRTprTgT?niZE4MdU6lTcgp>-9eH*INSnGzMjlwwHD}Ipae$a z>qhlck>X3b3Hf7gYJ+wL28cx<{dv(5S`)^WZApQ7i;nua>#!O9szDp5lfD;~gz~7| zm|B3ydU0>Uw?rIVhp=pM; ziHlzy_P0k_#2;+(2T*>=PMDId`8vJ!w&U{oc zdIEbm^q9vfd^WzpIvxJNsD}R()^&v3(Ql-X;&z5me%O0D3j=OYMe!|z=7|aI9_^7+ zi^Y(oaSG4$oTt|2#%N!f|y3ZX3EC2r+o<}a$GHF`u z+S^vl+QkDC^19o-`>Kt|&DAR+qk+2Cl+H4!8>wl@ZTU4zA)muDT2%bMOwV767_+(dss%5rYtUKa*l-%^eY z`R@shCxC|}G`&2{F(F*Ov3FU8OAEpq2|mMxh-fq|dyIXtr+4~W_|`Phw>5m^8cMw- z+97dW^W;93+kL5qp@Hw!0(!^$?Gl(oo%W?}B?`oE)-uEETSYMi|1^oOgto1i7RbZp zNyAJQefPY9B62LhgEMA=4FC!xcPK-Dd#D!bYtB?IS~@jkv_16=XP191l}jGs)+4JC zFYZ}XJ2n?6Mm;64oUWljuwy;`LMf8h8QwG8<{%~H7>+$a>>uDsvRIHryNT|5NQDfv76v>q3#WZ3a_Wc zhsI7ls*^$Vl;bcm1rujC-hUh|!o^P-`_LotrsERwLhntb`}uB8rw+=e;>ZN_6?UXb zNN0d0FT8X~dBc4}So?}Yi=t-|oniUeW!g@2b7IVSf(^p>+Q1LQG>ptc;@|O(HIF>8 z^d^Zy*p?uIHtccrwiFpRxD`R>-4#|RJWZcWn|d&1xbXdUzG)Ysp+FTu7^-yl-> zBFO0wd_+C#h_jm3unX<*Pr#$sI%jK~3@+Eg@`+@MRD40I5y2bFu$FKNXwnLv1oraf@5Qb&VUI8zAO)Nvv zVS~TMH`bZdd}g~z=vN21e#th(27_-c#tDnRyY7Z{{IrRWHi-$0#J@Tczaz$$)=KsR z(~;+qq=o?+0LdRFqQnX9n)qxM5-2{ff%zLt7-L5^SaHY6NUWq;LpOGz`7=!w^NL`~ z0;l(wgJP**DE|E!DnFss3gVhX!PNwo0AEXetwVEuYSq%XUUsD#LKI2l#^*@Yh#ta= z!`2SbzfgwvTDzHjYwZfB-e00h8(NSpH zT1rtuX6?SPJ)xAnUDQ^|B57%hC}Dn_;yGq&vol9I$5u0hSDN@9C5Y73w6KEL^mgo+ z+t$ePc;yenB+IgN`}~GxDSZ8c$T%+NmmSV4+AceM_chEvW*IEhICzuj7$-|kME)Sg4n*&jR*8KxRQ#}0^!X+( zzEr-*KIPBY=>>@>A&JjgGL()w6b}>RKF3Ro#JBbuV)@`n|FzWjzpDrTC;t(=0sFIY z^bdlUh=ap%MJ5{q?nVavxtAuN^U7dt9YyJaaB)xj*GT|ByZ_u zzH&L|!~Cf>E4+Xiw<2@#Fo0!+zb&Ek_vGN)C22L?mkjz4(XR%3F&`B>ZJhX--?136 zES+vO|MnJDA)*I@d@c}EqLu}6a{v>n6 z4afy}rsro&9T@4Tsh_GOCCblO>a-7?>S;6%q`Hzn4Po&3A4A%*HT0hzX!GIpbtDJHBC<=8wlA~!*LNQV0E%J9yP zRSDm$xw?CHWqN)S0Z7ODJBRXTHQVH1P<{G@#Nvzihm%>oTQl~SF~y!vuBLS0V28(f zcBCrz$*i!kGA#MS6otxPVx$RChh6e-b4AH6W?xA&W3rsr0brfb%*xUe`sOH?m?O^^ zg9g&k^xl}&I7+cfZ>HgT=V&E$LqTPwGm1vLErl6}#R`nl5bs}Xh3rM8k!7Xrjy0|O zZj+3LHo3}~_RN`vW{_sF@>%J({ZV*c+I_x_Xl`X5%v{(=8DD3h|9 literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/adaptive_recompute_a.png b/model/train/yoco_moe/sources/images/adaptive_recompute_a.png new file mode 100644 index 0000000000000000000000000000000000000000..bf862b9a943ab0f3bac88561ec0d914bafed8aa6 GIT binary patch literal 95774 zcmdqJcT`kQ*CyJ;MnsYznMOg0lA}O3L9%2?0+IyDSu!+;NKOJ0R6sI}Nm4?d0tuh*(ipK^BC8fdMiG zen7V~AZZXTHV!ThHZCp>E*Ol9hfj=;fA1bX86gn?F$EbVB?Z}|N7QsrA=EUCw2vM! za4;}FV`gPzrGh-?e$K-El!cY$?jaapFc=>X{~6bFHa`Fm_nyERRJ z06a3O*q@~>_Zay!4#`cOhw&dV@h?3+x-;z`%l_vK`}n_P*?%?c|FvrxM1X|>EFKmK z2nM>!31dme{I{;PN{0|A`N#%J?CNh}l^*-Kjg-B`U`|&X_qEKsS&tLFx=SC?MPK`^ z(COklvJ%+~nt^2)h6LRW`FhkX=;0Y_&n>7uSiWi|uC(`6nuhjESj?~f#imCKFE=!f zzTbjiBq17>vquv=O4MhAho{-DobSA{$=CW1oBCq4SKihoLin>+NR7?LHLh>0+LmPF z&1SEnGHar{|J>()eks{OA+;`-JN(uP(%KIdyhqruEUXw&*Wy)*b7d>3o!I;oWLa;x z!Zb*d|5xbig=qY+D{a>)HG4l-J;raInOq;KBPN1hO8k4YJy|DHI+us6C6;g!ew6$; z5!D-O;nq$r{pZd2U4eHd zyzAliAqC?R=AUcWas#-X5zUm+5V^DROgINu<`(1$J-n9T`Sw~}21NFs{(6;axwrVm zYE}Mcq*7fSj+Pp)0&_YU=xW19Yeku;jppI&dWcONCcFIWZn00Nt%ROu!t`go1~yl> zpvYAZ9|nDeAuPKWW5er8MV^l~y$I(`u0_ZjmW*&W=c%;?iIce|EjH9^90Zr8yw(Oq zpp*1t-=20e(u+HEqMvH}vMc>L97yp%Y5x-2$q?7ZVPGdrGSlR0+p$^GT>dSmdGug3 z25q>y_IxO68SgxIQL<S%I+szNB7BvfyH)F!VDY@7PSi$J zKn&qP+R5aQ;m>x^N9oE}vNDXrK;u?p^!b4R`nyTtaGxFnv zwY%j90;>#P=2EJTOPn#nE$7$%ykS||@o-bau6Q2Eu#R_%J21_Y`p4IWTaa>+qbNfb zhXSD-?6_ZCe2b?I)A0qcZBQ)fSZnuYTXTAFNn*#%6f&21X8NTF?UVnlJsCpEc*N|I~dG~R^d9$37JG>? z7OaM4k09l~}lxubCW$ynhy zer=&HO%d18ZfTIr65rx5F_fvkLNjQzvTvYxH;&*%gd;}R*`ihh*Rnu`&Q^{`YpVmH zb%bNaTA+Rpd@{)=_XuI2WB!$c5a!Xg`nF44S^e*6159Uhwe;`vjJh(joMr}pNo;yaGM)HHm?6@#)sAx321=&Ym!dC-3V%ws3~SFsHO{XU44=}O zwi%7sRf<1b;r?+X+Sdc#RQ(pz`hGdN zf;Blf*fpBdP(hM$FOfh<&H=q#MAx9ia8@i)bbqP!CiSHA(Q)yW^Ueh?xmlNv{InqB zsEL6ut9TIeJ2w)!wLSU2W^8p@uT4xvGL&MpS2dJK=Kwz!ctPwB^L;j=Ufnd(2Cli-&cPH(FY3o;N+f>rWj~YP@Pb zh}Rf6?oYP=V9c{mL_9`If1)O;Q-Oowqvyk$*E$a=zse4};kUyVo)zNYsOYcKcHM&J zp$VF=m1UQSYn3~G{~njruBoybeptg+6b$`71m>|RM*c$SjoChw9M<-J8qu>?*h9wt zQ9|?FzTTyS^V>3n)S5bbFKNita#S_pNh12$af~~QZ5~QU7~CjQ_=9w?{a|^4^pbi! zG+uDHXG0-A*zS`*cv@Ci?AYr(>u|0(%%}s(Ol*C{khVvB8B~$1r+>lz5oj=k`ywu` zpC_d@I=UhpMkSN2T(tT1@OORm2c`?lVuFBpHs{F)ky9FcqetWQ;}&q@t!cNUm~LxE zAt8+tjz^qQX4pP`b1x1HZ9alt;#7c{Bl?!s1pKo0*z%TEoxk4`;tt z>8M0J=Hp|sZjP;t;>~3U?38)eyDWOZ)%Hx8r_gV~!|r`$_2m!wC|Oa(?<6(2M2;?D z>!;73jCd1~-ItrFm=UI}?D)}fo$6&+=p7!q@}n1*AlY!Ts{msCWgD?n5vMY0STS7p z$gaP?@Y5Hs)w6UCo9%gy>R{z@i$8xN?QJV5cXjJ1bO?q&N>Cz5>G@^^yiji~1=h;{ zpt#G*KMm#$@-#<~pei23U@&tiY=XsQoNCi&(vkJCqelJ^c*;Up@h{7B>gaX_lk)QM z+^VW1gY8Ep{QY(=jx+KL>%#un20R9znJqyp*d@A~8%JM@=AT5Xlpp@dn>UbMMpk_N zED;=X4ymxCdR+T`(NQ$2K)$KEF*K`Y_`r1}*p}N+jHwCYec>#8ee{;AK+n)h-Q$~v z9bfxa-u)m;i(mt7`JeOHl8Jkd!_t!d-wAU%H5b1-L>EBw3Cfgm4Rr@75#gZ>aRDue z1jZ;LY*IpRO&S4i+0_Rp^yYD+lN#@cv$%SML&a5p^C!$yV48seXT6$qDa^-xO{T$Q zL-#GweZVvJMm$J_Og2MKgQwkU7Jjz%M6F<50sY&P3eC8KCaZCe8<7%*ob-9!SmMme z;3NkKo1se)htd0CiwEh^&2Nr(+(7zBo7dI4mF#McF^a2fhAc4GM~{u@LTukr?@y*1 zWclqcmx(ix zGgGaj_B-|Ib7=^HTy5L`toVX_y-`5F3EyL6rsEMweTrKqweEI|oWEYc%~&7|(JeVZ z)YG6#vZaOl^Sxc(d{vtll&hhgTJdMN{;ax2)7x&uhLiH;dR;J=DrDCgoh+UR zI7YHxuhX4#-wOz|Lpn)DR&Or+&!{S&v-IX|MY*uFxUol-gYM0Pnv*5FK=QdXJ7tH( zVXo0+R`Y%djs#9C{XrooTG4sftMq;Mz@-8Ys1$T(O$lir3`-Bkh68H?9KJ_%&Bc4+ ztJe!jZg5}9y?bLhX?Cz0i0m2*r4YGa>Gw7^_aZv~{FhhPm~0xy8`esc4KcM%#>_+J zxF9N1uuRwBtEfuv4FUzed4h5gFaTDjtigv}R)3Lj<3UiNu2eY+|8f zMVl0G8SP2!Ke<{-bT86dBjuuCn>27A(H;ff>6 zH+{$b>w})6>DphVXRJYC;z$sPy-|zfda(*Fsn|zT%KDvUe?5SCZ24@EU`{Z$K+f22 z)G5ze{VjqJ)6<&%`4rRQ;$x8&XhN0K(hunQzEY&}KGzU!nc=H`FqjRSk>p2iCi(Nx z_2oYwzx4fVH0z5JC6ZK1U@MY+{FX%Sh#Kn(-Biptct(V`9iB5crGKA2ztXCHreA8^ zBFy}<1;bQ#k~k03-_Q{}mYS$CjGO#>TgRDdU=*%W+P%;)f61NKbSw%(EMQGSm z?+cC3<=Aj)|Kx)}g>Ri@GW?2ZVp!%!AN|LTxE7vdE9TfDG#gAgGya!V5hXsJFUw1pUp$F)%ywSKW|`UPHOY`^J^Ih; zVeZC7-fc4v8WvENuy)&m#uess)P7Duzp4=)*cWVcnS*LV+}5iqWb)FIbIHzw4?o|YM$PC{+1O>GW(K=2g%l6A*$XVW z7%c41qT4Bf2>S=H316IVfbimE%r=&cWQyN&8e#hZF&gJE%KNSN*CJsvW zeQ`J`xtlR9D6uMR3?iT$e+`2Z7mbMb)Q{OK%LuN%_bV`mi0M>>J75|;g4dr|Yv;^BHKM*sof8D0!>@^!VNgkz!Y>$S zk00BPnpGi#`6JhRmy}+*obYCMw3!XL&^_f84#F3j>airtv?Z7c2n@W$<%S&sCY)gSHgSh(~2(?~c24sE8I zFr}E}in0gWCX_r{sZ!XX(xxn)!sS;mhgs=neVGg!pS~~ZvA6d)3XhRSLpX*l?-b<7_J$>gJ0pw*^ncKWm559( znnOCYy!f1(^F#qo6(#EG6f=a z%ota4-EFtdANZZ*b2M%Pp^$=^9;3OdF_oT*B={$;Kb;Jj8pi(x7q~xB$E(0cJ+AG| z+k?GuU+LH0B9z8&o=%2I^K4{f4z&FZq1)M1#!{#ps?dMVrK#dU_=a9Gb-zh zxrXWQ16XT{d31z;(L-sPA3Ew`!cW~8f?k45Jh}!b#L{YCXrKKbz((CMW|9n;3LruS zevE$PXa_gaZHx{3EBYm8Y#nikzyG@!e|a@|5J)d6NKK*W#Z2q`)1z16F3Uf>)k`aF zh0Ds}WHITtAg723SmO)4YcSOjJha$H*-@yuEYP};Hy7*TB}$G46SkFAmimi* z)X%W76lIoE@|s5#$g8y$+tnfG{O10> zJgFMQe$NdX73ul|`CP$B86K&4j@6`qZt=Na_FZ+tONUJ@(i>3vKWrW^IaaHho_&W> zda%l?BIWp(+t%G!95Y9MyS*&2YqtQLD(Yj(PenS!g`zV8wH{tlCqjhOPQ8b^1sz6g z&8Xs~>Tvj4I(&xW3rjCG@UfqWhE%^r79pP3)0$e)VvPu#WnJRG+vwIj;}sD7FE@9t zEcx<1Xt!B&_T8db3jP=jFim*#Np2^Mmj*=EyMAWOH-aWe*`^iwItbGEDbP9Xg!xJNm0}zErWiS5 z?iPV*3I@5y<{OA4cLF>|$)UCo(E~l7S=l6G)tdDfANE7}R=DxB&-X4=TC{!Tj8D&8 z#6CbVz|#hj6Q+fqF?i6eT64DUF5iNdbUe5Yh=?RloTxp&5TMdh;?U$eAIqb6bw3ZI z5GUDKn^5aLwDBq?z*_OSNy-cEg2COI$5H!ZhCx#u@Ydgl7ISS<22~`v)6c-$eqOL8 zEhN7QyiBgUFZw(`cN3#nWY*dH$tYCK|710P5g7O^=KvR)+&%E9+Oj`!>EWJB zbo=!wQBWfx7%k~g2}RwMIi;DLOjrx?RS7#o-O!7t^rz--!HmSkvqlGyH##QnH*lkI z1jQ`~zp0hsJ?fd^U@P-X5w7^)0V}xm01wkC_Z7KkEPBD|1R{y**^|}##{3}qM^i1V zR(!MTvC2Y>6E)o9YS@heK5=$q>twbR{pl98N#)dlUWQv59Ez(Ruo!)&Cwcpo0{X`H z0zFi78J&K!=JwHfbu9AC&lifeNJ%R`I~3$9tK<8o#%qc7C9g~W(7 zQ9ZCF9i&96>(;s4#q7eYvoCC1$C_;4{6R7iFYH1tJ?p7FHqjhwD+`)WX43E$1R%~* zU{Z??gMFV#9b6F4&pL{$4$1>&UQ_mHoqpxXYmczsxNK;=aoc#DWKVBHPg@_U`&M(&$p8#TSFm{)wNt}G zE8@ra^Yt=j&Atd3mPtRk*_=~lYpX_oq8=|6J>rmVg)&ziDJRdocjs{P+KwVFDACOY zXnIP2RTv{g@l-Ku-|`2*9{vxaYI$XYgYwYcxSjUC=m(cdveo+FVKVb16iC@=*hhPm zi}n^|w&}<8HxoPXdF1C)><`#qX;e#F(ut%ps*RAkHCl`>+yVNNNoL_~w+Rop>;SBrBL})@)m)LtgD+u41PdG1j(NgT@F_a{csUl47-ebR z)P2}Q?Eyg3Gx8R$@Ou4E%u6>DgBMP}W=P?@Y%)g zJlfv2Qu*8~3D0@J+K8 z?$U{hWj;$!V7~>uBEAJJSfmKme}bF^7o)HMR#~E;A?e`(<1I*1sZLavi|&c4k&C?I z?xNqC3J84(_zS3oGi)uBDCt~sO9B@Q7pL)U8LYUHm8p!l2_ULf_oFox64>f=cmh8e z9i$m$3$yc)fjQvg!h5?cM8HO`Z+XoP5Znk}5j5Y{X-;`Em16YOdehi2V}VZ?%(Wil zW(7^wM;Nr!89(MVIf>r* zRiyEWbl>IbXwTx@aesh4*y&RlpaKThl#kxb?#0!fL5)+s+zlL$W3(mE;c3C|KAEWSowJ;{nuI417F6cMEb6tteV?TtFj2o5 z-h}+rlc$wvP(br3d`&=J%G1J2#QiEFI<;SiJjmRSTF~565ypQlNbKC_jT*%uoU5iH z&fl7g6n#*AjGg7|En5-jXfAxS1bG5kqR$D^Vsl=o(M&YEet*pZq7f@WKo!&C-CcH+ zP8@dMn;;p|OJ|FEPf{6GRb6BS*9B~A*5yvqO$FzIFxr)RtK6XR=7G3(Gb4ju%)-Xe zzJE+~W_7c&qcGujzJh+k4X|@EnEyG(D`PaT5K^M_jO7Y9V3WB(Vsoltx(znR;IfNIj&n;}b0(~6DYcPq0w88LTn-5PB# zb8#i;okyLr^7~e=&TkPNE>;}uv0wIkBwzt8S7D`#-!4BG;aa{3>Yfvy3#vbcXj}e` z-nKreQE#%GB}*DQRo}M3S`y1}`26;fF;Ao*HUgDLai#Tg1g!Jl<2uN7UN=>ZoX!-W zCXV-hn_6W%$f(C%|N86L^r+2f%S$eJrTfKrCVBm@-<1(~1S7!vS4Y2qBnP>qk_>bD zn2Q|2!ZJw_-u(%I&Y$Ujy*rC=3fe?$*se)=z2k0=@wBRZZbs>hi?vd6P6tzb62ijW zNzR=vuVyTp?ijw+L4Y=U(wS+z%(UxtEjOv%tuF(BjYC;cm-_TP;N14k^eGv(}+f&VM`d+2# z8{F+q=aIgum*+W41h)4fk5%5ejLV*0!LRbAzFJ|xul5-3*7GsLt}l=`n}m;D4Fmm8 zBoX!|Y9kJkO!>tbjWp4|`PHh=1I*4ph`1fFd%@GP@1Ian!0V4sZ$bWHeiB*F>N96rBuMn)TML74HljyVwy|O`&f*^I zy=M2exxb4-A$axuT)-CKbhC?%+kkQKz}@^QHb~usG>Gyd;e+c|P0t3_=ZMC77A8{g zC}Wk5#y#QZ>PlazXmbZ-Cr#0tXZ)ZhSn|vsr-=AVB{ZB*ncgUIVYkbJ?$z%b&rHx~ z+uP{@s~?I0ve$WTUH-tDyxJ*M-6P zR3lXrMc!=PXS9(e9XG?{Dq}C`aoTkdsB*+` zz{Yy#()W!B9aHjG@9G}n{qr{XvZuhO~ZW*Y8s3-TDZ?bQDVCmXMp z?HLF{UjnNEJ!@)!Xm-vdXj9kT<&YeSy1b-UHi%yM$%V(g9q3<9Yu}A0C7U z@S&S$nQQnuOzFbTFY3JTiF&IawS53WNv$}z0gIuCf>4(6EPL#kgy|7Nd>uOnU?oCf{I2 zM(1Zm5=2aiye(Roho&j&LHZa1;=y=L^ROie&w%&tV2B7R4E_o3F~PcQL~AwuEU#8k z&$_k}Sea+uc7l_AD1~MV5qu1WGqL5`@@DhrgLkP1`Pu>rg_02|$KX%oxVWxF{-ew# zq!A9UysWvXZs1fR?Tw|h{XG_; zVo1&Ur%UOT-!2RH&%X6z$qrvX{e^TfQu-!kjMC6MsDvjitMb#@mdUc2rC~XJ#u6S8A%G zfQqwH-}|$&$s^Ioe%DV27!p)srw59yl>;YoN9#X)i{&!Kzy8iOG}uRpO^ko0-2WJn z#Ygo+MYxlY~Re-ZAZvs9q3jHNr8IeZW~_xJDVlc!9KEb?8Qv3~4b)nEI)HItB+y_`0PxlZ{=X($0ptPXgCLFi z)pkM!@ple~+3(p=2&&k|m%}wJ^E>M&!d9{*XYcFPg_#UsR8% zJ)c+eP$g8~Z9#r;l21!W*GZE33{Yp2>#cP5NACfnRjqswVYiy&X9mQSTz{<+?tkzR zo`dv#(FEDP&w^acxpeHQ$QO7>!=>*r@S&JP9)$JgoYs;#(KD*1DOt^W2teUq_tmYgNQUr09f| z!Bdn2vfDP2`D-lF&%$hXf@cE&-MMtl+gXggQ}nMS7GuY?PxB?XUE2)0U^tLPR*DN_ zdCj}10z}pEq=#-dgU1`Z-8<;6^WRmJVfiI>UraQ=sOsroi?c3>zH*jGicUHY^@Ub^ z`WlO;Mtel?J|p^o8uPQZ+GK#ryO=YVS6HjZCp|OF7~VD6G}BVJ*nLpb7cU#r-YEAC zWa~VLc$qc}nTMXPNz7zuq7v5i_%5QI(ER>nvfb6!e@c(}uHR2=U2iePwmBFBx60h1 zhl5+tc7_7qpI1JCH?Mq!AO{d@d9xEN1)fWc-iMt4viB^(i`Bv|Tb0da$^C*DyNXlk z4JK%&Ne1`iX4y!KaFoA@o2I-`{Vj_)z*PDw91~TvFiSsQaRKTQen~U-Jr8#Lx32V@ zTo8YqWrnw^pQC9GsM-ZlS~hA;Pw*18fRRMw$(4%nbK$NcvE8VRYk=!xpT%NPg(&7@pEZcPg{b_^G8-Naw>#C=7l_IhBPtm8HQFWgwV?V zSas0cQ@u?$_4Y2^l zU@_9qHQD((hS}t+c!tr@&mU8tKKbmm%X`BstQfT{C+jS5MBHG29IZ845LAb4y-I?g zHka$}psDnkb2gCgW_>qsc8YOhDVh}rb1MDf{jzD> z0ne1TN#~G_Fry#(pui6dtLK0^l4wKy$4Bw{Z?0mg$CV5s(IJj<(1gssg(ovRZ*26L z^M#yPVHdqZmjW{rzc+YrBy%{ZrLuPofC{usuR_it4{iWpZcnU{=0 z{=^VXA=J}W-t6s!BL4;)mbZH{z%$)}IIadZavB6+=4(;HB45@tZu?Gm$VYsi1&*+` z^*BnIU2$bukJ*5R)tHO(Jny7lr{7f;L#khin8ix7*Yw5i;jIi)j7O~4kONa>2~W;B z`l!3+@)foKThfp(@W*IjPm5L6hUTQIez^zZ@9#d(-1j@`d}mMY%Ni#1=)D-BUQ=t* zIf(?m;jxa`RcS7Z)jNhmFCIuMtdCMHJxrgGkOaumY08O z&S-i>dI;nNLmJMJldsx}Eysgs1ph1~0XoOj^-pTeUqvEl%i%u23(fN}2%oNet(b)f z7)bQDzFSaEpYSc{o!8KSUSLVL*b^_*(89Ad?cUUPa8(ltycwxWYd;A;vsB$HcqAi) z<&i_9m$w>s80Dg|Pr;UQ1ku^ZYQ%NHwiTYM&6@u%AG>eIX<_lBO+l~o$eXl0r5EhV zRn>ya@JGK-I`3_yB4zLNN+*Co}SG6Sl3&78T8W(kpxN&x1jMV$(vy` ze>H)=Q!CekEjn#Q;sGNuAx(kMg&Y#d8Vo*C+((rFM5)i!4J)3(S94K6XP8?s5`Z`} zEuoZ)E&%9vRa6Za6)gMUX3OdaNTgL zUi%EuY`1(9H7M>8jucn@Ef7i*C9f;bP=aom4R6D#XdV^o$=* zP0%FdUr{nvLiBh6NWydh!QDCi*e|N@5!VQyqAHhuk)H5a-e@kr-4YgYb;ANVyMe!z zDt9i_S>L)_|C)tQO~33fEMIM&7YFN8t zV#lcly-r|eHeT_!&alF2$fyZi86>=idH=$~SQn=0@VpqVU)Z3O(;eKf~;C*ui+8Q+&Nz;x~G*s-`c3SbxO>J~u#f3a`844V! zVDGJ$ULM4?845X@!ZrKm#5j|l6s^Vo2HuXR+Om8efL zl{*+A?}vddycRkCz*~1U8J<_QE&hsLhFLA@(3u|%q6A6jSx4>Ol5e?W&$lrOiTN#M z&59rP>(_!qP8S|tYX@r_?v2d9=}+gkVbNAAtYQ=)z@VH{5e?W(a&yyY8!$m3=opP8 zm6^y53s4EdQ9R?)JEz%~ZrrmzK_c!4j9wbodki2`3(9jKy{x*+jgKhVM7xGR_d?Ff z2p4w46zB|CWcn1HfIJ!4J>VTBKDOdG`j?81s~^m67LKyjHk(<1TpU3wTT$PvB|C2> zjT&Fq10P099w;zAj=-V(Bkd2qu5ggtQ+oC{GDDV~^n@dMHBB`?FH<{J@^}wqqEp5N zY|^U+OT)?g`25FwB7jqk@&_=-!Io-l!i4AY6d}0;YW=HJ;cx06f40!2nD*ClsZIU`p(Y8Z9L9%PMn0$5qUfM>V#b5K@xYR3>N@a2Hn?yOIiAq)mOEDM0p%_Q)w` zXDcz9&2LwaUV%0#o;^GAXd81uDwO7Z}oh5 zUepo4K4j8V_2=0|vCyGk@`uc8mHH9QH^7KsWh0KYkvp@LNnMgNsTj?<>&>TG(yYFF zi0eu~zmgtpO)oK{0t)hwIlH*hN!dPpYXc`cGb9yv(HuSXIQ{1NMwC5}hT??QoJiDV zHXtrwkUZILna)fWX|oJNCiS}KZ(W6N{Ygw@;1BusS8HJ`&ps#NDqv^Qc!msp_SrD{ zkmx9rC**I}6Kq)B+hZXBLw#6#(V39v@n)wDljxz1&Us==q56XFpVg)mw^C7V4185B z5pcBPnKlP0^Cv49R2BtPp2fcKj>zf-XHEU)S-wI z@hF6r<$w_$Buvy;7hG88c12`u4B`4JyZl%a2paItv^e@HIyitB)T5Um8~Is=zdl-# zg-qPM_U%4{u76&rKL*H?b@9wM68sSk%JU~mH#jWJEG@XI#E7kZAo{el9KT-DsgG2( z>(S@H?&D(wxDxQ?MeZHh7DMaap+F5GoGvuiYpN_1s-tyqa-nLzZo)s<8Gv$~OpjTI#L;OAtRhqXksX@$jpxe!LRe7+ZNeJ_0LA*0kq_AO${XD6;A8q6nvc4`| zw-)Jz^46U6rrawqXbOKknl(~P-?2CC;s@T>3TTc%jegUui|c4ppq5#x5^S=hj;5Co z1-zvAj>(MMELZRF!-+duw^QgzRUaTuX2CY&{LOR3Ukf|eTAq}#XSS0MSW{eG=>?AYtu>N6B1#echR|?C#P#UB(Rx`(h zjH;UQit){ZUvla9tsMJS-`&Hz4A~6dlK9l={&D^oc1q^ck!Zctw3hNu>fBr$tT05235Ck9m5%ua>{2>H>g6c-c8Nd43@aS)^3{Zi-0 znkV!C4JW&|60pI&=oV>J&^bbp8vAn(J)CPs6RSV32g?RY`uyCrXVhjVmL*szpw)T2 zNJKQbeG4KKd-{BbhzBwGZZJ6`QGb6 z(bfmG5&8E0dcp;AlbVPoHX?K%4Y=xmd<&W?$jxMczq@}_M3!ayBH_mED0`vF-*+41meWd+FR3YKP)&05od z=KD63h^;qcYT&2K&*fVEXe=`t|CFI@+b$jnj(^lozFK*wc%NRDFTyKU&x#^UVE7h9 zx(9i6*bx5y;lj%*p?78sA!?tb(I=(!33K84P977cctNAGPxx$UdNAb;=N#`b)34{G z6VgLIdvTmRM}%ZI9O3ko#L!6@v=+kZFmXbN>~cpks>`BAmpc>bes$Q{0K4+0&xBDT zfZU!zm~zqgV8d|F$!k&lNKV2EgU$8ZQU#oZbb8Ux17JnA+)33kyrroC02#><+$|eQ zdw?401=RdEtj@!67s_LS9NtfW62%R0Zt&9yHbD%{~G!tS#G|#=rfZjw` z953$01Nf6C*BZC0w%412Hkv(DvChi8_~BBCW8jHTF`*X&Eaur{{| z(`o&J#8g$m^YeX8jtLcEDhJ6?EX=BprS3S5&U_pIUo=N%uMx85M0!MEhK&9#dRLgK z!s2>0{QPw9{p-IbCn+bdU0oi={t*waAO@{O)%0_P@!yA#SXNQXX3(X_7xeQAPRN4U zdc3(AOe_6dnJ^#p1#-;AT4N1N{=|+rN?V5Jd@2HqWO!&5^3OCt#a#^RP>M_Tocrxx znt39-r2KQ!R4P9$+-C@UEmZ2!)(NrY3CT)Yko)4lVf938MZ92m=7t{joY34}uR(lU z_^+Gimv>T4-%|L!PO+Yiww=2*GJR_o-KReG*4=G?)m*^FW9$^M!Qt!@ZQa{=IG>p7 z;n41AF!Od>yJ|$i)0aSLUB074@g)c>=Cm+h+b4f~^{VXQ z_#Zt0zij+_Ew^+wS}69m);`URvmV%AJ>ailS!^=&F5Qe@_yWco8bV&X5-0|urbKJd zo4G!{HA;S`5xoqXfkT=UNI@>xaSu%OJo4Th*E)tHf%@Z*on?lVWup?Q_S3AR&V1*&2@)-vdj}9@n=GMX<1UPtz)4FK4|?;QOyO~I{IAvb7na_)pbFo-W2_)# zS4YKjp4o@2f1fL+qsbSpyNeZzp~&2OzC_BAP-lT}M1ras?`05Z@wb6`s!o8U;@=a6 zz7~_rjntPMt5_G+KAu)`_7%!E+R@oeCzM|L^YbQmk%(}Phf8R3B`79>gv0Dz@)d8T zYk*6QkWXeZWb8`kevT3_EAElS{5Ba+Mn3F(t{dgl-$yvk_S>tP_MfCL()HoW7wxdA z8T&1%kh#ZhpL>m;Y0V&)yym@DbklYIIouhf^qW8UgS#T6@0Q-6ga=zc7D{mvSMJNIiwr4PL)T6&gg^B%`Ppcb z#;y^`v~K5QQ<9vC z0HJ$4R}c9)>Sfy45Pe2uk1Xh9^6p42hTHeF+5B$Ug0^$_Fq&u3q>m+(9~PTcloTn*Ta8!Syr| zi%_l$0GGM*F?;wr%)Q_Z@wo<8)!rIumvKFD@gQ2Zg1z%F*v@~nXPM*UBLKZ%In5Cy z*OQeWpp7bo*RrbUI6gmF`9~+0Z8R5T*Tl2Te|o_WN~atE;l8H>%aDy$Jf?ca{ZFYN zn=u%tVGTHJrbdX_IIl_~w{;)^nx%9-nRMC=SAMGKpPm%Zd={IJ0KNFS=)!)5!dE}G zb8Zp=j*4uiV1)QZA)2Vm zDq=jKW&xZ9HkqpWd8~E_YqT^LEMs>Ri5+xNBowUUs&E>@WsW-I$?)LPAIoM?rTbp! z|J`^`RBl~H_mJ@Wm1@B0{dp1lLhuBsuv9*K~LZyb%b)XURyG{QnbQ8|Z{_z8PFr4V-;RYpB04#ZDi#`#8og zrqZpl)yKT7HnBCq4{Zdv8D&EqrU{P1xLY;L+wwIrtst8hBkZ{ud@Nu4rdV9+`F$o0*8SMHXPsMAW~$_V_aqK z3jTlYaBi0wrGHUfI05=xB?#cPFPBr(+p+BW0D)%qlRYgxDwURYbzl|IW|3_JjJ#bh zWE%soQU6FBV($546`fRxq`cjQ*RmLn$p4hkL3C9w3!GhDY1q%^mqI0+2;lQTz#hAH zJPKYZJU+b!UoL9?(<}DPR5Vt&?jW>G=xpO8`(`7#+TWhm=a-jkV@{k{=I;4N&a}{M z5sj|mle82oI8f}pTu!`$&k~HbcwLm2Y;doexds^GICF!y2=tKqQit|sW78Ye%k>uF zE$p{Vd(>{T2g}p;<4TXuiyPf98`zG)@jy8b^ z=f`660(`c}_Qhm>AZ|f#Q^>Ots{Y|FWq=#SeL?c0e#?6y2wo@XE~BsV z1`mO<5mg|Yo$A~MiQK5?8FT?%3q44xL)(`t-7J`>{xe>ZKcC(a_>|n)+EmLbduGxa zI_Xm|{WRUDj*?lNA$;^EQ{L&1%=?}wi$oLxmGQ3`5?Y<)`5HnG_I#72<-3OEcUcpQ zI3$up`)<+%vf^X+@leyp7Uu(~WsDm?lC+WWtwnsQ!s*qpBba4)(a8ov2{|6RB68S} z8+f+?u{1%x1a#2%W+Thj#iS{eGf|!Xzg~vvUMjk91Q(>K0%Dq9{y4|izY#DC&1OKH zf*N6~AEiSO$_>aJa$U$ZVAn@ggG3;88bC61lp*(8n&91~&Ba0;MD;gJg49ZSS$N`t zy%`G`jnq8snjwo8?=-A?G6FJfv%|HxD}u&qxpfsT)M_Rhp*BXhNXE1+S$7AJ1CFf#rr`M7RLEz zf&KW}|G!mK{O^2+A!jhcAuz{l2h*tx+aE9q4wGy-WjF&Atj?l3PwBYNA>i8TI9 zRBX18Ok3{{&%UhQtimVUhc_lSV|{RUReM(C7~JuuGQ0MHPRolc9;WVd!Kyj}zXAqZ z-`B5=4oj2-*AElJKP6ofZ@pim&wcPVKo-{}Ani@6h9`yeVAGPZbh*PdBRwa{#)iXg zZZAaTfv}HM2vqg6*qf(}Ymmv~B^-AoePEGgX1oX?7WxrR(JX`rS=GzD0pGPMLyTsN zq90-7lmk)tBCI{BmFt`XDY5DbSZi*C1RF$V)xSWj;!6RSboG#F+wuOg1tD|1shcYw)D% z=0xtABm8;$Ey&a(ZNMJ3N+i_Pl;B0k`la4cj%g&}r54A&2MA+qxQ8}44c^*4>Y@G< znc4H!=_ZcT0Cgnq_e^h}8HLsH%xo0>l-C4D7pdwAFW$O9Gmd4)cfg%@%N;dw*HMNn2}naZttL<>zZTp9+QVz$GX`q zNG(wmyzw@=!l`*BmyXp@oMnwrx<7H*u(Ada?{?66@4IW2NdXyUv-9&fBtwSp^mw^)zXc8FJW$poF z*9t1R_ck&BffbcHX2$Ov{CGp@h#C?DsDq!baIyHd+yKAwob3Q8>^?DRyN6~cy%pds zo;FJE^?N;IX{Ku48NXYWGe3sh-spQC^_|c~bT5qO3^kU!d=KLssnoH@v(`2G0yuY| zlfLMLQ2Bpo`|7Z$zIWdt6eT64yHk*^A(Rkh2m$Ht?q)zFlm-E5mG18D?(P`6yBY7s z?>Xl=_dfS`o_p?n?jL{-y7rzud$0ArpIYA)fzwKYzzS6%G4GB!f-_jI-tJ2*YB$c< zf-{x9`MTk(bUXqy#}A?Ym{G6J0P&LRx(VBI%E@J4HFcxvHy24onV#d~Ag3cVvy+{= zH=BqgoT(~h2DguMf_`tCeSF-2a2 z6AX>V8_9`Pew=jsuqPQ+guD3@kswg$I*1V6wkV>#N$_~630XBUS+OT*tD;7qti5-^ zO1KIuOUkmUAjP!ZgS^VI!0guh%n+f7ZA^+nLh7wjjF>vTBqcw<0Gx*Zrn|TummlUJ zeMo^Y{WrxL!kr*FiZTmS#}WVy0qcwpbS=3ItlfP(-n^>nMj8r0)5}*thg>YpPbR8v z)Jk)}g$Yr76!HyB=okrcX?dqWkOa7@FADIl4Ojfe+vXrJo2+C};%i3I2sEJnyhcr1 zmF6dDvRjf)p_n7}8muwkIma6nsTDuVo{e1n$P1|7poNAMLF8zZ5!Y{>bnlTM$GIr& z!%rh_V!1i)K_P(rJUtmB>@I`fi4R?%wdbojjhPB^XggpiL7I}f0A$O(xp!f2#(NSRKcXx;Yea*w)w#}e^KyVF zwh?l3RSL@l1-0;n>$ix7*krC&KA}K&DxI z05^xor;PwUrniN&1EXv>n-i&idhfi5rwQE+q3J3J zc&z7vmhoPj#|QNutQ>oMik12I@1!d(EnK!@e?X14iRPY+=M-a!ek!UG*?Mb?ccm9a z;yzez*rJDDP#zkG|A0Ct(pULD2YmqSIrPA4zP7!E1XY?SsHho8O#NkUT>oHng;QYR zKSDrhQY#?rR#)$x)KaJ2RZorc^_4Ql_cu`7fp(1WdS%+Pktul6@m-ss<47T^xTt8aKZphMhS0N3D1yWVRG)j zn4JFAdHf%~jwN`#Va&aR6L>l(UQvpp26$LI$&0b}YtUGIuTA6*SH~r`Rph zmj&oo1?jUq9rp1gwd1CIeFCq&5OW-A&e$0987a3OD~h(-KOleN&lX@z?zk{@0(?(r zsUbzh9`RvFj4fHY7+K|i*JAO1`R_?1+7}(K;k;e{Rv+B&^FvX^m$Cb#7=jDO_z-cHa6D(lGZ#FB zMM-YQ4%*LeqF;|j`6JEeWKVve_)tDtQEx5uzFN+b#R$6-$4P}ES2W`kLSH!Y|IIwB z<9M72Ef?cOnhskvHY`y;-p@*lIu7)J|2W0GJRS{36H_e?uJ%_TCilRFF zaOdTNhp?=uV(W}%$yBgb8)Fn7(l_0_!%@ zoq6Na-MoblOuDAlrslo$GSdg`4tV;>=M6r6Gz~2`%p%!7LcZ1+Zu0=o6!Z^rhnv*Z zM#j)}WFHCPp6|>6NNY>#)6oW8OK0*3(lDNhEcHWEs?5k}8D!t5qp2W}4{hS^|5qnB zQqsx9Tw`DYX8x}P25(Va(Y4l(6jkD%$q;r)5!LP7jY0m2?I(=~m6P%Q_i9-m@T%hG zQtBVl`6*0m9@;5vFY+MciRY-N9C+IRS)f`6Oe5b8M;pX#MY_VDwax45^;mcDgGz8H zVUe&@Mx^W$&4y-NVKl2R`;cnw8jAfk8f3sN1kR%0-m6*cSa;rG_=1TH4pY37-qur#?jF znrnTS2_LilN=_M-k~!A!+Ps>Q&}Wj%^A&?R>|uQOiMlpQ7tl#xL$p&OdcdUzfCJXc zg(A!)8jlehd>e(ytrRs8AIEgORVF+^ahVPi!k1Hv-yf#QGq{m;GYzneD`Y6M}}x zxKi!g7y*8q=GFVb$AMaT{Z2qKK<$o(y6^>XQMG@Sxbuc7@&=r|>{n_M@Qt^kPNB!A z^*HA*y0w$e9A6JZOPi?f+u=ABEaY-cY!lvK*vKDH%zJq6btpFEp4KxYnfj(`*C+b{dHT08 zuTy7*De9w6Wp8-X>}F1Kh+hAU&n$j?PnsFi#Qo?}x8oI1apI%&65h-o#I<;X*U6+W zp&erBaQcphWJva`d-K9$?{7#ec~V2f)Ar zufjBabp39ngbA&^;HX!-F{BWJT4?&E-4iH5P46Yzwv^+Y((IuX2W2P#81dEGgM6Jc z7u73(KMX~kWEzOZTozrc4@zz40-H_$eZy)1{-cTV`Vl{J92RYdt?bKZa)!S?i^Xe*xQ>Ibq`o1#4f-m9K{lSzF`$NVrztVwT~rSN@aB&i_b-EocKM> zh+-cw*6oZy+T+Nn?w4S4#O^!!DeEFgFy3j%!!O>eb*ARZ9`fu5#Kvc&$kxf2R3Xy~fI^Q+MScq?+ypkwJq%}{sekVn zp*xBW39`VP&)h5J76jJ}^Tgg9z~>TEDpiQs1Ecz|&sO1)c8;JEkXF+wD&B$Z)C)gJ zw_S$3PA!)kr`Z4Y&#h7pr+IPk2lV_&Sy#}L&{AFq@L5UXU5qu+l6~`%vnT@#y|?Eb z-G$n*G$ZSiX7}+2QY|mvpQlso;Wamm-9pPA&t(YRT=zxEp_jf%X)#GtE6MsT)_0Rk zZQQlzr;`oVMrC|-(9)=?2qo8V2WMds<#UA1z-KG0WcXkZ4=Wfuf3!P2o=q4rU)b>> zT*$vqhmZX$;et14$sAg8?^NLDW&b?3cIKA#4dVsBon_hL6^W?`RY;b|@XeZk0A|3= z-MHM&{@*j9I})Wff4jly2GuPOg;~AbKcp26iS7lwP=$X$7}ssx>J8R{Ya}Tg8ey(Y zDPE~snb4+`40)`9j0i2&=Ea$LQ5!&tyX**eD{}1iA%b9UACR(_TEvB2hFIPom}QZH z^m|Zz?MFQQWILURlXhLW)tHWlI~=Z)+DaC6FzUWwPEA|-K8vz*NBYJ5F={4SNKH*W z@xSd?{m;defm^>=L`Nd>B|U_C9h)b!FCuYn71J;ZVt|#c7`0Tj{SyoKA(1;|{@J}6 zd%bI1@9?b=9>avDY*rhX#-RovRND=wFE7hyTS9C447C5+Ydyhi|mCCFm z(~V&$6s!6=F%;CdDK+5!um`M807>Gh6gJk=89voLGz(15fZz$971nMcIv~{`NiO&g z`u*cCD9m3Ti2l(XKvZ4Ccg@RQ1dj&NnqCzNmcqBMGEF#bo&+Vg11{gBNh7oE{UCJ@V;LO_w8!4M4?3~G$%xvhr&fVcr`6g2hw zK<>x6+J6_%1H}1XK>O8SEL|(kh`Gjq6VJ#F0A~~DCCY3>mCk6c9OXWvFdQaswB^|5gbRF@66APLQ<8uu_~uAbdG1( zfWb2Ro!56Dc!A`o`W$avmzITV(*;0-o40CzxxS42NeEXEuhH*)G&TnCNIYvN{kP>- zCNEQ~9%z*NfTBv&e=aQ!y{P|gY7J=gYeWDG=i@~yM0s46euu$LxG?&||AAD7e#|3+ zYI%3AK>wcjbf( z)U#Mf4Pg&vZDnOoJLCq+DAu?$;*6-I&fs~kN2RaA9^awa<2MTmp?B_xCV-Ba1Be9S zMHDC4_IV2^Q%3`s)$C6{Zb}1xR!|Sge?o2uTt~pn1JX_Ik%D@)vjWG06w)gjBvVj9 z4#yk9_QcUjiXhQI(%z%x#-S#5nyVV%&!mAROyf~tq~W&^)(ccp(GZ!bM8q#@RH5&n zE&xgnm{51{5JShR3;<5>{@hl4pXn|o0|jYV|CDKm_%_W0BX4AUGJOBP|ZCZTi;lRX=f>PN5E?(X+WJKNFTFu3B?1m;Q^{eMM}{qJ7a z8jcYD*hDz~^G+mxd3Ux1nW&E9Mmj=8E2AwsrrjSSH z|Hfas)g$C}YGhhOgP5r1f+R@s#M0yJ2F`zi4=W`}ZI}PiYW;%JE8CXUTPXCxrqaoa z?f8=xpbAe%xiOSVRdu~$)dUy1Sl~(5mg%GTElFMEiW>WL{hLLk&|QhBlOmuAo1&iB zGKhjZ8m+fQd|T85r5)?pXA}59or-N7H*r7@$2Js56Y)d$`~hfv_fyOG9v8;Z1lss4 zdi`!F6SQO02!*g2U6>UT$Hsw#X=IudjhF7ZzD-wH*ICuod-og@Dn$ z7{#T^YlYn7By$#4y@QUJz^k9G-$Za$J5D9)`nFb>eg1SqeJHEEiMOL&d-*fO+TwcG#=Q-J!)_!)rtb8>VjCe@yL z4-acsDfgqG?C;uzEU8}HD#Pv8jl6$#sn<(Ys3Wvhw*khyncKvn6Z&t7Q%Q75{ToY0@8c~5s-oOgb+fbWWQ1{LJD)9wjeVMunbjtIs;V3* zzAc`umUd4(Nxmo1FwPJv$sX@8@=Ps52I}W|poT+zo5ysdsan#2G@n+sSBy z>57ngt#niE_XzE{Xh7lWY^9x{O8H#o=EZp!;2PWuJ!yY+oqoJceMoij6p6q0ju&{q zq{DV~fim!CA*Cs2M$v#*7yhFM>Rop)xgo&!gD}5|Fui>CTm2>3?}%}r2vF=KfzWUV ztpjvbm9( z{?cI|71IcML?OP0 z_0b1i{60}cmXJ95Ma#AnRd+WLHcbBMWEzJyU+W z@yFY|J#zBn!2Eo-$%kgjwn?

Xbss#8Ne+8n589?iOt=R~j zsQqT+Z5mFlLCBCTg#IGdx!|!e+11enKkqkQeB^7V>ggjb>&cwooK;5_+E<Q>eC|d;=BmmP>TaVLv(W>29%_1MN za`z!9m)q^7OWRS3yTJL+W@_Ov*%ExEy95gu9feMPzqRSQMw{lFb;*|iqQIT zrOc^F@4d$k`a@jU*-ka1#%G6cz1o7^zGAyTQiH||^$`Z@>7cOt2txllat;%_k?d~+ z^DKbHkrYMy7gOVB>kMy>0Co6LT(;k56@aT_B5;W+Bo=XVUI$Rv_&3O%g5PMa&2m(! z!11yMjSrvV7ZuoI0`%Sb^I-A{vji0*|FYo@od3V(z6y)-ZNe1;u90U)N|fBMJuBaR!Ow=8_aeKNjTfls zZN?Qe@iy6ODvl8U;59gQk#~$Rp%+8~stU_20 zX?CI_av4QAja6q6bdyKQV5ES$3bCB$$4D502$T7VT~55KS$%o)o1jnMhXNImzVUq| zUSvkkWS)6iIU`frp_r3=a{({HSxihvn0osHO8;vND6^Y&<~3Y&)3RxTWnZ{i(_fK8 zm){Lv6kma_C`O>iLx?bT4P3rv;e3fyI5FkeAp$cHxC7 z@?u2UulS&@dIdL|h#$COMMWHX-$4Z(@}_;}PpA-~;tSlbEx*Wis8arvxmK%B*3gcm zG|W3kF_1q2JBJ8KQ|a%K`f#5+;7?0=kjKU3TX`nVAnms??gW(4)z*rZ+BhYa&Ry`BI^whsqXZfk&?8o#((5@KM$0;;p)c z)MWWh4x1Pn3dc~O=*~>419tPPolGi*MKXRp za_pddT`c6CUDag65TQDQ!V7o~U#(~K3}4++-*kqO5y0vxG-wKal!{fx*d}YeM7RfT9SkL}KXzr$;ZQdla7z`scI|L`|cVHcbF8Mtt)I`i=*` zk`-Vuh2S#tmxlR;wsT95$zQGmowWs?JcN9Lv7L3aU%S1VTEO|--u}>D`CzKZ^o|2tg7HuV#eQ`=P9BemOi*DP;!USExTGm_0Gm3PlSr$(%}nq*fzc zR&V87pBh@JS=>=A4NWm~`(Y{uuSxDIwxvHrD+|`sU<}8s8F=UZcUP)r-|oHI-Qacl z>u2AUfo_R9w!%x#KNdXKdP68IMOT&ab8l;|M9gpNDsi=`S}p=yqdf+k`H^c6oxJYj zMi3BVS}Q{NS^4Z45~O~Y?y&(L*l~tIM&Li!x*r4Ky(!L`OMHzLUO9R4oYQ9e4u|Qi z0Hj<7UOi4zDAh#2&2?BH6yOIl8y0sA)TFToMB1?RV#L2pMAcMe#FeK*SrGtTq@wU| zzVQ0&-E5^Y*0h(!#_vob%&r2Cp3+N}BK7SD4u{1NzA8wxY9D)kcVPS6H1UR)o!Fh# zOuCLeR-%m{uZ-%9HO*A0=fF1<)2H0BzYy3(UN8i!!T_BdmV?INt|Y$ce2JeNN@f_l zD;And02X1Wrn*T_Riu||JX3WqCHY$u7Efu1pxxSeRc%Z2)^7R0G|-(L&aGeRi{$}# zIG>Xkyf^jmp;e?>-y4A_H-Y~qI=9@LcjfW0NaXpvO0RZ~a*VtPiF4PZ=N>y~yirP1 z*ISMXfCgTBVDmxFn)Y1o*cTPIYac9>9)E57^8DGuF4g08F4Y{|VS@QJiiBqyh3?UB zg|(r+ta&pbzM1UI=f{rS2#=H3KD#+o7zZenn9ro%=m2f#2oBtG8r$V8Rv&k=LH0uOY@kEpV^5#dP(V*#D0Ez4Z@oGWYayVhi_(ArIL1i z>?JtZV_Z;Nf;8o@)$P@nuLm>|W@Sl5@nrrdvM(8Ry6q}6b?TjQ&BJLL!NQBtW-ZsBAhJRif2Qzm` zUU1a@1I@~&lC>Xw`bV~Yp}jNGE^nn=U?18Z-gR1?fi#V)*iPiL46#SF%=j${ya2E& z09{i5C=AT4kBxr&m3}T3lBYiL(fd}+4LY{{5{VfqQxu)^^FNY**?c4itH|KL^N)PL z;pz9Xf}q+zn~HodN9ZT~J?TsC)&JjD4$l0)RL1!Kg^7;tidk!>0Knxqk#OcX%bhdg z|6x;}SrW?I*!3xZ?R#0}qjlbT>PHY}B5=i@bZhr(VQeQO;8;7IM^+pKHAgFIp(grf_j<5#X=PNw_k3H;}HH5cd4=e z4o~O&zi1^jtu)e@#FTE~NqN9p&v5?MIAoxqH{JxlHTdP+z$sSU=#6DssUs`PB+R$e zRzE+lCRCRb?b9t3G+yI)`g_5NB5VG~T*y@DmW;$G@EPn~{UZMOJxPlxU`5KGVs%Mb zik#q^8Re)3ekxxrNS3g#?m2o>)IT!PE*bEO2oxMfh}Qn5_TrbUKi|mFI9&D)+hw*Q zo<|bfS%B}tf3JuL|E;c8>vAx^7e?L*Z+ecc@o)-epA`4=&au6NGMv4Yq+HHj(VJq?XNV`YElQqQ*@;j{VY zdJpbxrk37vdlG)#HM&>Wk#W@9vBgyV*q+N%OkHP;q_OwqSb4AkR0P?`nFd;dJjAM= zz~rq}t-P2S*fQpE?>}%JuxKy~b#-wo$*pkdkQgJ)l6zaFlfr0(J1ujTvtr}h9Zrd8 zp`E^(sx>u?@>un`WKYHB9@5e}mx6f4q)vygRBJbc-yIo&wNVu$Je~>K+Ii(m_ASfMH>T>TyXNk$AU-kU1zZg-B3Nf2;Wvn-DaN}=|J#mQYK%{t&!`|kksXwzy2z=#!rg~%-82#z6tfcR-{xjMT zLwFyF^knFaCj}rJMjzGi&a`%m{=3FoCEG-*eq>&?vi4$Ws>U7v3UHNaFLrO@s`vgV zCQf7r&88LRo|qI`o8FkOqQk^UISgE%_u8*bjM7~5c3?0(9Y(>IH5Thk@Im=*o$;Y~ z1}{Ef4a|o)6^EDdvs|6d*_Gqkn@H;@)+n1WFXlXhL8e6eBu3iZ&vv@@h+5Qkd&;bN z`;=*_lB_E!rkMY(Ln}Xqj);vXfa8kvNitdWa6O~4lm|C|BEUvYz9T$~!J=J2`r>=e z0Ix<+-sx87&zOKUA7aSGa1nHN6TGC6$$XZev8rc zTa+~H0P6C@tYullWkO6)^dLJOBo zcP##-tSaRug=Ve~+CPmod2M}~nIyYE-|cKPVq7Omu?dOjcYC2&5Dz_Z3 zR$-GiAdmNuuot;N{Yszdh1l*Ok6ID?6MwD~bm1Sf3b%|mqf-x0T|IT(1_ghty;86; zZ#$!mY9*3IA_@You1(eXENvx&%}-hnC%{)xO%UFSt0s7rus!J;@ZKfM_oaq8w=Asj z3U@_#E39WMGo;S65+W5 zuEBkI7di=(2P1=%_#`+lY4|1ilIEKFS#WegXnO+({#*%Fpw%gZ@A=cXDqefq)lk<; zMhp8T*?y%(A**ek0?|(`gZXY5yBigdo$buw13K+Mn z*6->ub-IMwRINaK3Wty7T>rrg9Qc_1@%qH&eBw41+|2cNDOiByNVzQ@AtmP2#u+*b zjh>vd+PD($B%i)8dhD*mAT-nZaPDT^lU{FY*t`)*8>DDc3c3($z0+U*mu>)oJ8ulY z;EI)5)B$kqfroi4Kyf_y$-}OCWput;s;`@*FRzgpcnH`E7V2c&p$w9TIj!jvc3IXX zt`WUk;CsjtnA`lo;$-c1yI*c8MLZoiar z#ff1=-dAMe7#31?Lqf3nD+jc6WaIlp5}5|n+-TWbxp@ff9CiLG7_HAV{h`hPa$qG} z$jBY%HCmc`+5ROsvJ4aCIPBZ2L`ZNY1Fwj-dwfVjjeWdR((-$O`!ToLuC_uUCQiIa zPO+gw2$gb8?x!;)xs{y;Fsy3xmPl*C)|&h8g;m(?YwrvS6-o~bF-T2%@9&&+aYEhR zUJO0^%Jn)Z!)VO4o9_NPNTMBgpSMz@$VG)xv2bl{ zC^{+1B-(gK3M`{PG+N}7Di&s_31a!({SV)fdU;@S@)FB`xVV`)>exs0X}AMa>_<<< zi+G++4|c8GCx<#7lxmcvm%68#-sC`u`M@i69`VZX)+#sM>pGI6BYr}hT^wnm;U{6z zI~sAeQ#`{&T9QY2+Ryz?XQ+q#kQi%@k9_2zi&oo9h!4(X$)&z{g33u#qgU^gB+MB* z&&ey-5N6xtr-PKN5O0YOsqI@Hbj*J zWb)=J;qJCey_|eb1)=;G@cx_Q($;RX(^+0~h_^nx=dS`;{BTit&9D?_6Y47BCO*~Z z9xr(uUSJKnw`ELzV(s_L9|hGL@%w5>DG!VQC8kAl+!!xpkwn_`xk⋘wc4M;VTgl zhl6tpimq3tYJM>_o@oblURIL< zHJNhS%c_{kKRuy8YIHvAN~9X2U{D<%LAKh^r6s8@^)4SBnyB;Hdtes2Wx4QrLH&vF z6=(15T;myVN@TPnhrQe*P+&vSNtR>FmJ2e?eaFaJ&H2S4_qWfo;)5V@y6J^81%!}c z#XpHtr(_ftR?Le3#lQPSze{3&eqJ={_q{Y$7AX0D<9TU2cz-4Kd?IrjoO5`FTTSD8 z5Lut3Cgj-lC!u!L%!bV`ev7>xJiQ+LwltivalvH+&{{X%ewJJ9{Qs)G#nx5C*BS^Nq}&L$|Ji5C2$QM?Mwx zxA<;M*E=c0f4qqMQ;~uTf4Ga*SRcaZ|Th4 z8~k6X?c!cqONhBn!nyB*lzn7&{O@|GcL`Ddb~+k7$&(D!V+kJ5o;Cd8?-H%py^QWE zc*D|R=U+xXtc(sj@6x8x{ zn{968FA{0&e5@v{b|X@TI?9E_{E>w-$$f7Q>s`uOZ@yK0fl$BCZdNgmxY@F^TVBX& zx@?GeVS+Pf^-M}za8`{q5Fz?BB?FVY0{_)F3f$-P-Zbj8RDF^q>r34BGLO{5zJH-g zajWgV&Tjo&g)~xn{#SItwMTgvp z-ss~8##5mN15c^<517k4rxp{4fY`0U8@mUeOBSmUwl`mP>HUWvU-It@93bZuLx}=g z`*2c++-|eEdMt$Oy}4hAYh5SJNQEDmFsInxUqzBEb(DYq4`s~oM(gK=KAyXbZRjLI zIjiMMO2)O@RI^JTmX7qgJ0XY8B**_tsa{jBn;vI#U9x&|iKR}eXq|mm0F$NHawpG~ z7w@At$LOy)XT67VHb~P@j%r>faA?~5Z?~MXC4A)HooPK67C|}Wb&VO0q|KwM3HkjcOTfO!4{3_>Ff;7Tg?0(oQjRzHudt~-jp=l|lbXA~m`B8~VHzo(jzeD#BPPNr?cEpY7tr(d33uqL9XZM77e&nj}by<=mqXM`}wNmGNm40MA22zY^_7T zRx1{aT9ktNl!fjTu^5$gr|W@ATHOAQ^QNJ3c6aVY$e1^}A(B|p@gcaeD)@|wAKVjM z@qYvnufFtviihKK+0_`Vo=fv?#yb}13K{r6?nIu#VJIS?*BWnSJK&q_ zy9XKNYV?_>7e6WEAUu7GknEQMLCJyr^zw};H1Jvuoa>3>phjn{HaF!gb!XXn?^ZRh ziy?eP@LhsIHe}{BE;-aICG3k}Kw-{vp{Bolu z$s=?f0vfU~gj`x7u4aCydZAuqhnb2oZ6 zj?losO7@jspm4e}SZi^Lr30Y1OH#|C%Q-Y_kalJRknH9(;8dqtI(T;YlFqL=HcgIQ zau!JoBOPYA(4PG8nU2YTpFjQ9`=+L^=tIOLe_TpnSc*nUSnd(NvJ*Fgj`Uo%YTwXY zp?OK0v)Qk|{iF@jR`+bpJ9PXkPA;;Z9RMVZou)Sj z&P{v2R$HKXc(jLBOhjiWlkg>g5YBSh6WgX^UgLM`R>#?+D(XN3V4-3cOG2LJ+qiBp z^;Ybeu!)W3%v5y-lTlu<@e-J|@g#w?Ex8|~aY1F*lq^Cia}rE(&t}NK)VO_9cjN~2 zF;{V3+`gLTVB-a$el7FpfZXT4XG1s_M;G1ztrNC%iJ@tw(LNm-yE2fYp6CsGu!hOK z19X_S=tRVT_~RzDmo6_s@P5;>*JRAyWl2#1t-~=^yzL!I$ON=jG=^zqSMJxYNE|44 zI_QLqR#(nmSyYI`%Z*g0D-nz$jBh2?9C@bVD^CDAGm#A!8FI;oi3;X+Xz zV(XX&544u%1kvjmsStMh!l1lgSdxBxc3|Gso~%uU@G3+H_=QLa$BQvWW2-HJUAtmG z=BXcI&4D{?0;sT6KVZ3wzb8;lj+7Fi?&QC`3(a}J@~6*IyfWDN*j|>u;COyoD5qJz z{;|Bp@w$czPtg0!OW+=-_|y@%vBm!O%lWXME?^ptdW$EkTYdyN#d8)}vQ11L$@>6F z>29=L&uXj}vH~P9bFK4C#Y+W;M7eex=di8kVdIi*Ua%V!Ca+0#tIIFu(+d&qp1zj9Eb>c=K_h;#OTCR2J+mRXy z#+)B|Jh2ZOU;^8-i1#3y_ptpjF(1k0MHxhG#n-PEY zPCfnK>^Ljq`Pl4%iCfh}N1@c10Ch(2k(3L zjfS!t^#&#{_CP}?Bpme(V-=OLW?I<&U&7o0eZgr8?(s$;Cx-=qQ6moQ;6`P4LF8gx zPvh%%*cy$E+tm8_T1xX`y>p`2hS7BsM8n2C9r)!$mX;bOv#qWt*6B_R^vIV~0*&fn zK0BH_NU<4z=M-OnEVDDe)h~7d{aJ9n`L%R7@@gxtAD1JMLrg_oC@D>Ky+DDH+^-!v zZVZGTsX|*09NN@X%pUu%IZBp&#Cv%$dNFSKqp^6H4rO5Owdn^Av$Dus#OsAj`A0?# zw|gIor~G>8{-8o2YeWD8Wy6_>*7atRGf(Jmo9HkX4)Y|hyOvwiGIr_AxAx)(B35ic zq*lt5^-#QO!u?~P?te;RbETle6I`j;Cu@*5C|dpcVTmZ$rYF5>I*s_;clBFTr!;l$ z7HG8*{Jn+ez9{L&4q@|KJCf)trHT9L5QHyrN%J46s=Vhx)ab8?PhR&nCx@ z>bJW2i1mHq$%4RX)QDXEQ#X89wVQZjQ_{aX9Cz}*5#dRk+I{z0@d4EcD(v+;dgIR+ zUVzH|!-Ari1;-5zDLBW-^C{*4uR&I=9&)chYD*n>A1reVt+7&`KxdW+*3)gaOzm4g zaRXWxXeNxKP7hnJWq1`dIs(FN=F zyIT#oOvRJ>0w0l}y*d>N917NDg&$K&i8briK{LLEXij@NwFf*yc)}fe9f-@pMo6z; z6sAb-n0bC>8k5cE^Zn0g;0kSp5Zk7~Hp6O4NUG`wR)qDl1Rl-KrygcDn5Q6z?=BKL zLHf|J+h-}RHIrg~{ga^l_TyK@<3-(vsntT6v^RH;mr1iB)VaCSRj;Mbq%&p@Umql$ z&<~zjgIDZhEZ1)X4ne5_2@7+Deb8dx>+kV$n z7H2!0^p$*jl*Fmp_efQgsS40xy+p3H4Rv&bTpHPr*i0-}Ui~P+WfWeyaeH^@o~Ezv zHY3NNg;oR^d)r(_!608Tsz!R-)xKUEA626W2CP8_<*zP3{S39Aml74INet^nDSAHE zJTgrXu!e8wn}5P*wO1%S-eOi*<>gJuF1#JF{=o>#VQz#V?H<(>+L7^lc0|S6bob`b zM?PJA=)kjiRCthai5- zxWaPww;EVgrKx&Ol!KGD%~ohRAd$NjzMS!{i~z6d5?iB`$!N2vCZbR6Rx#-S_0;dC z?hlhkXZ={lJnWkd8Gui6+Fw{ltg%*U*C6>C15X!j*s&I5qxzk;j=RA&KkE;K=W?;U zJ#0qlxtxFvGl7@6lda1!4O<-f={GYCrDRmk?N3vAhe?u}JE)!ocbb_~bVj$I$;qjW z*c0tjMK_C!^lya`Ukp{*O6=HAvXJr0_iBHB%`e(II|ijHYqs;v4}Y>4D8KfE^ow+> zlsi=5SAi?1`q!Mtys?zBKFzm+bCwtE%NSUG4Hj>}meW7-uh6%_Z|$q(xzREX=bd*< zLx-zBi^pb4de+SgS;c4j`*D2>x9yDSC}(d|zSdL!;ZW(+xR-iq*>?wG{5i7Q+aSX5 zFw+Z?aBjN7!4zRzJ!Km6ZCNe(DbbKpo2LAA2ZSgxh5HniE2tj_s2z5xeLdLk=RCzzDElz6h$TBR%C^fm}-8mLM2)VoOB27a(Q|sVi(Q7qM zBIZ(y$;z$62YaXKmror?3^>Cp3k7FuLY_gc>?Fi&8pN`&BN>98_E4b?JCnKdcJE>e z-dV1H5nM_G;DW12%tFEA>*Xt{NNku+H>QN>je#MJ`6H%mAPGW7YSBC@VMIt_m71gj zXH!D7KL33I({ez6hv}I8JpWDC8(j1@(S&mcsQvC6Asx$SQ9qoD_#gGbM5iE`8xFWP zSwHv7c>Gf69S>(6qsZOc{BXj)U6Xa{f}%0|;M0}m26|f=e4|gLdc9#b;bwoR*>Ngr z`Hr}e1k#to3py;F`_zChY2#2m_LYfvnb084=6JH&!y~M%G2v9l zhggraYAFDco8TONdBq&iOUX~YVW1|V?~qz_v!OftZ}2)c1{Bsm-4U8;I8N#Et6LJj&2T7Py>> zv4{YV<3pZ_4(6;lvmUj)AXwKz<;9wXSI-FY@lSd0(mwr!%C(j~m~xw-c>O?H)t=0( zcIR$^T1kaUI7iG{q&xs}_MsDjo&540WMR!TZ9V!2+)yrK0s_3*wFkhs9ujNmT{9(^ zR+&O=tfgoc#YI6rlCIJm84uqp$397X7Pa=iYKrPC{x-}rAF9iA4nCE=(g|=0({F6i`m;BFceWkbNr4XOC!&j{HVtD#~0e)p2nt^FMa^%os z2+_5tTllptult-W^vNCoY>mGU5Ghpp_SPCsO;cZfc&YuOv3=I)upuq-fjV25_QTUx zkdCUzVapp|3mPssL^G|Fb+Tx`$4=q%6LPicGt7HFH;PfCMHcIPi)_gc4Z5=r$#s;n zg$W+dEw+c_mb8|tz%(v8uQV;`OqAiPL*h9<&-T}k&-2n9`{oF;Zw$m-Vf;Nu-PTN| zJoYWt%P5b=y$4Nx*vi2lk zVqQ7_z}@P0vG4Z$p|gMvXivYMbN<;AbF~W9K*5Xx<(X!|m&Al>TEv8oVKoJFy%-DM zSqL_b$7Y>kB{hpYRnS5ia$RKVH>G7kb|wODLwS35GrK@nlG0<*4EqV~dvAmt6Yn)K zdv}AZ(_~aCOksr`F|&f3I!$WF`!0ZVLtfq0A&)3va1Cfq&k@sh{&b+lzG#1Sjxm6< zPiQVQH@?(uAjR>oNFcg#6bOxpm>9vxZ{#L1KjhXQ)T>5C67ZZ4xlDJ8-{9; zo!CkW%ooe}Dm5)-JoJVO`wqa3UU1QUEoD3qN?vM& zHCV;;j_D}N26f){+0nON1rCH8u|9Ggia6XNU3{w%QBF77iCn)eNUZZa=c`RQ_Esip z2E-G{a|J6^qHQwBuT`3-!KuXcT6^HDx;RHc2!BwORao3RfM(BcerTmpJ*+DB$>$oa z+(2xSp*}JUa^bQqkjED>H*ot^LcJL?T6HX-=V4^)*odf!Pypt^-lfh*wVB9K=a!I#p>u-+*FXWgs=O8Sp87v<3JUa6cnXc(aLKpgM zA{oJYF+^|jHvS{`A;zuHq<&3vXSjM6gEUa_tVX1ISNifjS3M7$bt2`@-kb0f=y{Hg z4hKv@S}q=dH~LrSMhMG?i29vA0KLu;%Mx2N&tIBHnT?(Zvgv&vPgPB=^A4*cr;?OChA-vU z>y``nkri|o1yJT_e=hYsr(t7()7O1bOcrtj3h!;;1I@^k%;Kdo(DQ9eVg$H-q5fjq zBd#u9i}t&2E83{*mz`zo1cLH^M0S3B`+)-TdcOY4tzfnDF+Rul-7oB=-S4ek_+)=2 zZ22{$iogeX6;8;nOo2_8blAty?-C+*95@B;Z@q2oH+?!TWe!cO_09H=eP!n)HYCfe zqV3Jy)K`)TBLdR`BB{3RuQXSzD{#qEAa$m%t}0VC=mgoC(DtJ)*eJeye_}z}&75VT zn&@YB=u2)NkgW3rBjtT!#0xozMX_BHYu{o|q^x&Tmt-iH%_HW`b6_mr6tISHMVtYD zy6&crWu4UuI*|A@2wiOKsWB$eUe=dUGGI2@hSwc+uxB)UrKT)|`G^)jtkYJvRttWn z8r7BkBnIO-YMQoL%A!@RJ(7ze?#*at)XX;1(DdESkt1qYU~FPk8)b)2x9@H8GTcL3i|4?(S2 z^w^Rwvrkj?vQA`7LF!_G>P2-9J$QrzQUzK7QUyoJezBT_nOLm*Y?RRpg~%^19|)#w z+qDYzP6q4T_a;~K1X>ph16o7NWf|M8vI)5Uv5K0F+MrwCfyzm5t(YU1w5ufn%?`nd zoUJ#~EhyBCnqylG$C)5dKD13QLLa#&J7Rp(7wAP+U)fn^6&Xmo`u$q%q>u7KRv=7j z{4ORmQQ=EoFi7CCEbiqKyk@9?3aJIH&xAPFZ5$2j&X6;_>NSu}#ejb#p7n8I@R$DA z&Wv%fih7Kz4SC(9l-3=Q8is2h-nz|Z+N^;@CuDZzt2Us&92V^Ve(B0oWy5;-P_KFd z>OR4JnIz-+P7kqSIrQ3}RVVo6Afm9DIb}`AX66Sot8l++j+1~qTh60W_&V6)xA4^2 zOa*@qM7V9+!Z@wmX}A=CvFsu65G^7KkcgXZAFwP?q7N*qreH>nRk-a~2hy!p7H2$X z@an5!9JU-+EiscCy6avO-1Iy6_!gZ8DxH(Hyd9Z6B%839G~~N8u@LWtJ!wvUGRYvF zC~l@_emk@fMulPhik(lPfzAnT*PZ)Bkl&4vV{HqzQWRHm;M;geDQB%Tc zlxk^YTM=ugOjPF?8%icM4=o@NOVu+O2zU9DIT>f??_I7H+RO*B%FdWqX39(VFW{jb zg!`4(XNgt%JDRlG%J4{n4U$CnUKdML_A!qz50P{rIPUZvF1K8QUNOtYjl%f2MZyo< zCk9Yj;|3U8rU8bM@0~cJHoq+8iLXWZz2fr3Ase^ahS)$27Np+~aYN;z*A7r|+D4nN!Bu8{&I5f@CUAO}#Xy$h4mE`jti`yyy4up-R`Ce1*%^lkzxl*+g{W z)ZkjTJgEAG<*o%Sp>xQDPlbEUIzLo$IeiK>ZrPJR)iJHI{JAO2ID{r|n4_EsGovTi zrOsO63SU3&LzF_>d<*B~KL$keJvh=AWV0hMOW~FJGXDErpyz)2wGdox^G2P_3RC1c zsAb|;x|Gw*w?{PIjw`uE&n#8UTi#Blpo$#YITlCX_%_*h$<=>8Nqv;?HI7*&Brhb9 z8~NelQ72}ny00x8+qw`$z)Dj1W zOS>)YENjOtHvA^;(w~uI-JoiawZPi?njqo1p3$rtsWhoB8a8CsK!#iiMFweErfoxo zO>(7HZNl$K_~-@9_>q^(#tI?k0ku!6nDA1&J8+tKO$}i`6o(Bu8o|f)CxOm?@bM{U?d!FanpeeCBvfEY<>RXabDc6cLf zBi#08)v#oX{9Z7SITca`_K;& zwhTj*7WKWZsFIlNr$6GH;#s4@ly=OWGUj)Gqq&Bqu(iI|NTAaFbs9zoE}AjDay)z; zidrBx!!toL;Gymsj|sRdc^7@4ge+I33GPLJdeuuh#`LhYHE5AvttW_|Ed+G8d3-C8 zFjrEbab{@rtx@uHIAeKxLVEsA1cF}TvK5y>d%278tS%gJHv#30e;p(NV7SZk-opi< zPqK&EVqAv%@}zoDFtCWn1Z!?Dn#&74G%xxM!v5>6EE4?1H`$RJ{6L>v7(Y6+&)++s zF9oB00DR!HusB^~cgaPuTz|DF`|A0h1M(!F!@_3spW08G^9QVODAZ3Ax%>fFswa=6 zX+7}z_V#^ArQ}gg_9SY8y|#|>EPD)}QJtcjF5Ydz3ZqyS2|pXB2jqrFv=%9^DL-KVP%(5OBtCKeEDM-7`&b zTazYvu$~>fCSpY>#VU=C)18{q-LaAN!ibf!s_Q%Xo4~V%4P+>&N9+Fk^GiE+7#8xr znAf8x5dKbmhcP(tI(XQVd~F5$4v(zJAlT$I&~<OElKsz(oZpVTY)gvKN_$cw;-X=-AQL!OJd8wa89Ep z2DSBvDZXboN7z2(RV2T=>nmVuuPsr1u@l6D57>74@Zh!Gi4BZ9)2H?DW$-$tc)#RY zC$UDtqrenSjTw&>onOoi$d=BnZd}Xe(vr{);2j@Y{93kHxj0f}J?xS8qzv=SmQ!ST z=B@O6ZeieUL&t;1gME4~9T(G|U7NNBca)s>-EmUF$ef;z9MVEn`t`oCZ(nOtdBPZ4 zZLp;e@yEV53*|xE5iVWf!o9pju&wqP%bZ_Tp9iOH8Wz&qLxqY>VQ#|Sm}?#Og}?5{ zo=o_7x4Qb$keKg4b*XbG+%xI=elWQ^HNdj-SVxGQO@2ixBFLvR!GXkoC4W+0>!jUq z`C2J8Uje=XrqRx^|!)xmQKxFeQA)k5*e?sld)5R$@* zW1YZPvEoO|z46&%Jk3=D{uOD8nL%qJf{KY5ot&{yZMawSNG|PjwQ7 zl4NWNA(5rBj5S#*g=EbxyRq-PimZcVNtRI&S;jH4m$QZuH5ziWrAuN)L{r?zZNh^^tjnou=GPFz=W*@& z(s%0S4;U*wzx~nrM!2Xv{uG)qVN|of&+MYZ1eeO5*1ncCQzw#%sDj#P(^NjDxsJL< zgL9v!s@rKN?IX0+S%$h;SN4{$(Y}J{vn9#__rvNh8hnFm%b8E?e3;?BKW=+voO@Uc zj@q%?Ep%+()cS@DP_Dc(Ie&F0mUY^uL&iyOo$bAC9Jf98hPPXve8y9l#2sG0^D&^< zJ2x(uK2tlox26R5!0%x~l1d}}4Rf=5WhoJI@A$H}yCETUiMzH238`M=&RdsH9&^s9 z3B}z{Z%nIaSdP_=8AV%ma!LnG=SXosTT7ah>s^r0e)+SD$V37DYm-Ycp!{h@xp z2S+y22up6=?ME_XlU(A2TNAxqS~HcB*b5nQrMqX0lCMn2`d`m~aUx(MfoKH0kzR@A z+%df#xBfmiUwd&g+u2QyPIe6bUjL>+yz2n=$(*T0n~0mT>OfGgeb}p=>6}bi2B&nC zU4J^VW5;N9fhCtYq8Ce_P%$xbWfMBaYqY`F)apviS=7gi%u~}1x7pg$S`l`en*mHQ z7jV=WxGR&{qBrE{rK#+B7IiAe+LvvI2^WqB_~5Q@#268zGvd${#LizmCI&R3)?Fgy z3y~YCA_Sf>Z-1<}MvS{1#?bF!wMZ&b-~(<-EmB77v__Y$!*2%=r-Li$f>S%}B12IQ zSu1LvE9W;dvGT!H(cdTwg|gC7*I^A`J@?RDW6!oH$+FLCTnB6`mSh6Cl@eVocFyc) zsjra@Tn?=L{zf;RDmVM%qrBhe6~AIU*b*w<6kOxjp?KxZ;UDWi+qQ3=g#zND*!h~H z^G-@ePiCNxTZE6mT--p>xZ&`-x2B`uV2cH_bcC8?-BW*MA?Y``Pmh<(8}=lmg+1A) znZCQX7$zC2S5H&Z#!**;)6&EWn$8DH*bJxNw~Z?xetTT)#`)@r80I; zRJ#FV9m_xe(wU<%eB{=}$yt`|qUAm>wfLp^x{KykU0;tDdQ`n*rk}kgeBoW3316F4 z>PK~U7I1JWwi%sC)&AfyC$?8)+J0*9to;Xrt~7Gm(j?XZcmUXJw~p>}M<%HdP~|pr zp6aLT-oU3VW}kZRY08=KaN8GGxpX04Dq>o(MdKowYeSI$SMMfT;sD$PH^bB}J8=Mb zuJ{RamPqft%S73E_)BjpAQjx*s~BtT^*oGq+N@!G36qk~!GRqlu?Ly<2^-h7O;=Am z9%8$nJA1oYO0Ds2aSxXgD89R{<@cehrk4(Jc}on3LmtmFy>{WnZG&T$t#|bIno{>^ zs|_3!kZg}eiM@H6ZmDp(1m{@$%JcXAGrE{+Uhe6f5gxsZq$?J*i)5o1c^64Jv z`TO5?iAJ~~2H%D|69;l~=!0E-<`rQ>AyHKCsDrErktHeNgW+2xV?NfG=HKp#-o{oq zF{`JAip)&X?Dw+7OuaBtr7*(ys8@@yw47aE^VEEQ!ETU>#~gO=x-R!z+{0HOR!{kY zQdjdBSMV3M;$?4>cS;D)$*(^QQ{Xu>MZwxX_40cDXxT5LVlWq_QT@7hF&f`Cs_xz6 zDDu?j9j#et!F35T!aOhe z#~v<9Dm>ZNz^02tl9!gP$9Z@ER-YD0HI(4o`XPs~}k-1K%D*Y#inKU{(1j`W^B zJ*Mv7neV@|P^>*$|HWbmPTK+0(hirZKW;yyJCo-9 z5L&`dxG7~h`zpsC7-v$I&9v7;_5SJ|QG4LJy5%96YSgGiGc$%S(01QL4FCS2lQL4^ zNIlCtHdvK&=hS|ZMN3}^(|k~+%lFu5T9ffn!s@(u@|N~~H{zF0@x1o#KvAwyBu=8# zktzJ}01v8g+7i{80)}A8j+Q&q`)ua-t*4{Q*})NDXN_}3{x2Kr5?}1f+pC4-E?*R;i>f5#W|~hbJrh~@gH{mtvjCdr#H&FSW{uPfpmUo)ZRN`NTWZY2!{Ku#v&^v zS&~a`6=TU|wES0U17>|b(%)?vk(ikZr@lFD_GDt8@}w|j4dL_asN|^ImI^%>7ArLt z$KdnlrFxz|t97?JB40*%<54K5ihv|hxo?L>uk`_U-TRU3(=|=A20fWHW8es(Io|V| zCm@;Cvu7gB$9z??GhB_*r(-J3AbRKoy4rD&7B8G@--F^suF8RnASetHipcM?wTn?GiosK(~zQ zy?T=rf02%D=LLAy8Iu9evMvvDRX8H0L=_4k$DD_XYq2g=#3&Y1b~okZRS&D`n+=@$ z?XO!YK->{;G*M*J6$;!27EacNWj4OCvnQN(1`3;G#sy{7KA${ve*2p`!gR+{a&v7{xul-e8xCG?1mjwrhMX;0%!F*4Y9HI>gG4|?7x{lBr9=EasT|mU)Xo>HSV<}$M_{_ zmZDc48jCGADN*xv*_loIuD*Mgb&N_9c7ZZ(?0n|680|%r4Py_BsK#*4S%)irb-&4W zvVTw8@TKZMz?mvP@{Z^&!u8>ULhtoyy?!Fr3uBm3TYr5_K(A=?`#0J5RK=?AXZt21>4WwrL8FT{3dC0yS#^|QUxAO|XZ4J*ES z+;{mF>+*(5CZ5`l!ztG);;polhW*gs#3yh(jR%o}ZH?L9imZ+wCCD*iUpG9Y1}hyw zelL+Z5Ffv^6Rj87m7~m|q$}6-YT~6JIx+f#922*jy~6O|sW)zQ7JCSRE!q(AcD2s2 z{UjCTTsI?a(;2Z7sqcSmX>vMDzhRnC3G@6gtu36uKcO+$zLQzC0itmnlNT88otf9{ z@R1+D-}QDPd_f2IU!G{t7&@$B%HOhUOcUyH#1`L+^U8@bcj+9}AVjT7abBhAM{2Up z{>CVODSNsifruZNzCMEY&~#{P6~BVGv8RHT^J#HA^UhZemv))36nd?A!m?84wcCY& zkk0nRmVImb&M^Os|ms`X|A)94~CmSh&kJ}a0-pE7;4O~94ssu*Jm5m zf~R@=Yx1aeV5Hhjn_f8oY+|e?%3&u4atu2rD`E>d?KK6Y8Es16B4*)AMn4^f8n@{8 zg0j*yJ%&E0V%P4%vwfoabyx{Jn{Ds_iouHae5N)NzvW;1jHy=_xLSpD*IdB`*?F$C zz&pbyG`73NO-j`|Q%GV;i{RuRSgPPvW-#>W-heslm1gw2NN7Eb9=ThZeeHo(skvVK z)9WmyqE-z8W19vNg2^}SQp(xwJ7xRx!kc*muRXqrj z-XDUq`*tcgVeB=(VtQW}_N=`3^FFqxvE_0@_%&{AA~BkUP=rhQnjOA1CFW@BW0Et) zC-^Ror>TgJczeF#^CG_Mg7&~0RG{2D?47_bM>Z|rabt7%I;{hfI`oTAk++~GMB*&; zf`c%p7&^0eQ}(lO+vN_Zd=Pzln#T#mP?b>3C-yKcN1u3eO4`Zy4|-?KXX_~ku=G5K z@2}Ts{ICEs1eSAwo5VFR1+z_Ou3=mMAT>R^skJ*E{rJPTudbY|VlfZ6Y;>&5#-ppaH1zX zMHHJ45^^$%+ugKoXso5+C6m#;|cDthb!E^ZKBrNqIHT4jtI$t>&rRK zS($<3xQgg7SDOkjfg+Row|bzJ9R?M^8%4L7$rXA}0FzAc8XmZnyorS|9q z=cg0<+#^EPc{^oI4pEI;x}7$|P4Y=$+QK*d#XyZ_9`G-p?|+Bi$jBI89eh6PG`nde zj2^fpFY=gm;%?*kNY(32Z`;U}%}>)Iu&yR~{g&^m5$*-J2MVZf*CLt~^R^yj1=w9_ zd?QZ%V;$7wA{8zZaEu{@W4)i}b>cvnw0Jk}+J&i#PuSp4qm^+1r^_5}&AP9b<`WhB zK%sxDvOmr8C;t5TILEl>`yp{Vh-&MJqzmQ~9zg+2(JNoD;lFLl47Xe~R?+YF>~!*} zoGCw85=O%XUFx0nKOok={un|~)wQENTDnV&a)S^Gn*$$HB9^Cz-tjeLN#n1(D=T0T zTQu{M=N;}e%)Ae|tS3M{Yhd&+__t7;Fy%rwW=r#ZG_n0Ho1OQbB9BXlz_h1ZE-n)N zVYGYGDMyLvKJX!*eG{o#*w(Cv>4ns3dKj_{RdCvN%&e#iF9wR3;&v7r>*fVICy38y$okdU#2QHB#P5cbI|NQK}F?J!*^&$LC2|ocp!7|8vXbMx&JG;3)uE?`0!gw!$meb(x9WkD0>~HBi=FqGDAwMfu!t@bBgUx;y zi=GvacZ%^X4}NoB4kvWkCs__>LGICcNotQS|s;4cG@VK=$5tspx$*6W39LmOOq zqn=WubX?nczRVc4efMFk40 ztxa*+^^cWn77=2dGI*EWoR&Q&7^#0&NVa1sTsDUL=4$2b$iS&v{uvBJls-!*7>F(j zD`C{&g{Hl0jcv-{=^BKuCK3|XzQz_xS>0}NyF)h6o$k>|Y8idG8u?JkE)$tX9(cU* z=@0{Ju_7`zuP7|kwvn`hakbFug%G{w;B;l{%I#ckuQO2 zp~?_8fTL9JcrZK-GyIx<8{vue(&yq)UCbbLeREDT0iuGgU|$ZV?%M+4sZlU(i$>N+FoX_2~*_gpkIXRlVxd+Cp%8!W>a1*d^EC^NgW@&HMBw!is^fns#RL`xhKe$uw!<`D+@{4)lqX)gS zQ%oR0ZGi5#_lxI7zILI+gk~AX%CvN0)Y96?m}JBM=Y#K`}uBS+B?PE~9U0tGxZsOS$}kyDLk`I`Gd}A`tPM=Y&~9jTP$N6= zP(WXi04EIrNIcRghZ@J~r}JS&S@OETDacJi{7OylmX#MV{T~v{Q6S%F z5}pagdP;_g(<8HCa_Mm6Uv{e(7puHI`6aLFt)^|8ExjE(=p%22v<|eB3y+32D?-ik zYOyy$C^6BJ+pWz?uQ-T*J}68h8ct17?usTUBgSc6WG|Ka=JeeG=j5aej# z!Nf5-Q_Gcxjc}Ut9w^sHq4UGb70EI&N;?LOn$wpj!4N4@+yI)!0A?Q`N@mWmAE&=Y z(!+nKBWTn{jm2l_64v#F@=7ptQ|nR=tMaf0-aw)DcUA13N@2KFg7qx1(Isx>{{wtX zjrTi|W5ta6S%#d*&@{2l1zu|~ohf3+ds652-FyF+zJE7%|3gOo^7?h_34V({7LJp| z8iJ&O(neLdXDYWLE*^}S(wE+bML>2*i)1t9Px70QB<&vp2lx%UCS=uU#aPNX)s2U$ z)BfMz3PO|&H$$m$X6X9{LI3PM|8Ee<{~A~!b{+_O1^MB)3VZ`Zc?dRj(1)&wk;!i3 zBn{30f9M|S z8_?vG=7N=>Y5wybqgb7)abEuOssUS%m-|eoFMw$;h4`@$`UpLH_&ysn`=!w6M%4u| z@IY7Pc_JH)|H&@jci2obpD5F$-3_VNQ(~F@(W9}}fB!s@#zlr3bTHc4QWm|M<~$jO z15=At-YrZgTFCgTX2PSa*Cy7@k|sz$>64~**9E5Ui~VvwfBzF}4R;rF(~a$ApKEPS z|MQa?u1tkF(ax48N!K;kwD|Q#lTe%g4(h%O^FM#4q-KQG#%H;E))7a1=vj$HS#A^5 zGam>e#{UAYfmKr3s(a{UP06(W|Hb6L5uHax9Q{HJilr<6x{0G+qaP z#@I!SOncA95saVgr#2L7RlX0s#!eq}Fg7*j8@{~5ukL3{?sGqucR*b1e^&l#E%={j z{s<&oElpw2?}ts%)r0>ll(4Vaoy#U&))RB;*1;IvW`(Ct!erWpejjSV+GYWcF0l?T z&wz)Cd&&@+J@lVX<&PBydyM*7IAT{-sPZ9x;zb;y9INK^h7 zFuy;4W(?$2tspTKPHkV~SKzr+yUKd-k&wfWw78`Cq}TQ=mV()d%=E2e(NgY*h>ss4 zeiWwfP(Eq&*^$o=(>8US+(wfmEGl_>QO2j|<3As{0Mqu0 zbfnCO83Baw8z%nKu}4U-yn1o~OSyQ%&lWDQ}}KFIS^91Lo9 z_6GW(StmMNq%-8R*v}gbfmAd`SKqpS0Men{!>I)GA3I_v6Fw|33>U&Gy$*O4!%KlU zS~E?$7wT2$0S(~%f@Ld=vLf1Eg$jYtg;gJ=e-17j!1UK6)EoW?%6!S0S=y`MKhCbC zk}gP!xp$~O%p$fKD`vc8IG;u0HX1y!Vy4647?=Qo)697|MVmTSviu#SfbyATtL

    ;#{2A}~xoqoapd}d&=fy(8>e^}h?(xj4kc|DkmcBGeP?IxZ`mFvNLy*Rp zk>T>aK9N?_7eD;gig94vVu3YeU1F3?c0UhZ0}ldjFdTAYS#qt9_AmPrqdYd4?>li| zm|lWGW)SxDCq@k;YU8y}Z7bab0Q8?n=I8PL<0FLufO02qQs%FZlR;~+zdF>c{;M!;p~A+W;mZ&ZW~@u8p1sVZ zCr%uSYCjcBf3uoVHPzVBoI?Y8C~Ya+#84PA4D8N-916z#&mRAO7$0Y+r39*8lJv0u z{V7UAHfBgqx9o%gX606%1Myi0mzPohaX_EK3ow1J_$6i0R4Ma1lB~+}AG7~sbxHtY z2K>x{^=ZBVpe`G6{`UGtS$H#r0A33scnM&D2P5>Rb7)KZ+3W6~0 z#hU%rz83X-ptbm*Uv>jm-uL3($CNrU!?7ZoN@(!;7`e|3cOg2XC>u4a2U`f+ecydI z##~O*#B`Yyt8KR_Wh=YRG*a7>Pk?6iDASmSgEO5??BK;`*Iz;sCwYWSTSQP8Kty-b zGz9wJNS4ykY8}aqpPSJWUkdJI&%4f;g4y+VY7Z7_r_2KuAR`y(cKK1Oxd%291{DQ? zZl^}Qq@01G{Lg1T`ijOTL&-X6a)%?1LWElvp;x6 z9D3!(0pj`!z;*yC)y6NT1Cfm34Q-@;U()rSQKB#be&k=CrxVP#h<&+O3anoYZ`5B8 z>ohIB4x;@`LLi%jKp+X@=IlVHcpQ1tf%M#Pzg+yDzo)M@>+C|!{!WwAgic1cdqsD_ z9eMeT1B~$+VDG_Em^%8A=t^@Wl10fb>z=T?qg+4m8vYfB6h>gD*Jh=F8K8pHI3waO zul%%orc{FhaJ($ztPZl0bqk(e?=C8!vrp!Mr4fz9RauC5%`P=%Ow&yJo zZ)j^t^tC3@_k`so@J5X39}`zp-IsQ?fD#Yh2mZ{hzi`P5QTe5}7Nr1@j|m#QE`Gpq z=4(KIQZFzeC5PWai&!2=Nq9kE@fpH9+B(u2JkT1=e;gnmLO$vc32h*%zp|-;63CL4 zc!*@FdYqX2LxsGF9g-NN0>Js$;TmTl9ToiB>iQf|7`&1fT?U^Ia^!mmB)LTgkm=f! z13_VXQcFzG1Iikze=*B`&U1u2b!Hhex6NCO=Ks=H+m$a#^uCp{rE}x`?V6vqdEWSb z)isG8C)+#_l>g_K%%3{NN6Giv)N)`#VxJ!6~03T&EGm{>netrbMv zsOnvI&Jie0(qk>k9&@wb(qu^M^>gzYcs%SbCO;_&DcqkWNil*JBTh{$5m1YJ#O(`S z_#q+%$8-tfb^Q^S4ykVx@jIj|A5l`^vVwf`&1*+iefTrHz?_AvQ0m;}l@n^S)so-7 zdUHzCFish=!sI0PZ_pHS|EN*=l|&H`({(r91w#!Kk8V3-qF_otZlNht za~PmS{Up-z5+%ClO<1i^qbf4fX;cw*E#w24$_+D5zn@P}*pr7$NrowTLovfE)WIV> z-o!QNv=c)cqI?qxwz0>)bTl#P7jbr2_L30{sqc37||vDlONHT_`d*nd42)JFiaW9dY2)5!?5j#$i!p?@opc=X4fY5uE2i!KnEfvr$Lq> z?ICLe=x4?)hZJz^r@xhqx|fW(`Ea3W>#yhaS7&~sMj}QZY`@DEOfDKsS+&Elv%0c5rVpli zfKBx($vT+QPP5r`l#8Q^Rn4Y`<{AFz5;5JiRvJXUO71?vsXAWbf7y*I(y%!nCcBm0Roxg8p)tz^`Bb*T9L1_wc4Qb#G1rM^g2Nvsnd_su{U@Sx zwl?=UqHIJRJ$Sij^WyBwt+`v!PHDDeP*AxjeZ8!3GYc}2R*wu`xj|ZE8XUV48+_CqjrU;txKOdbx5}FJC5J`A&exWaAS%RF;wLg~ zj}geSYuxHev^rX`=4nTkR23?L&m6 zIEcttVsTB6*tgN%#1`62G#f1kUt>4h0Jq&&%-)DreD8;Qek9jaU)J+EIQU7p`ExM8LLfR}_p$7oY2CTPd6T+Gc|U*PGf?QOGT*38=p2UCs zfs#qOrh?>|tR)xHcx5$Q7)@bng}O2y&*X7KE3-f_xkPU^)O&hRI&O#m&k@0upMldd zDiN9z?gJYd4R9}pV4PjkH@3*06Uvcx#Wia?Y7Qx15ziRbZTfNyeI8=)rC%fYL-McYdd z4`|kRQNpb+NY=jgXN%$}a6GZ&QQDLdq0M(+5K2e57Wm`Z7T;2>RM51Od8zh$VkR3> zK|ud27=6HaHh3sqH-A=IboBC=kb9;ydIhf~%=Z(uqt|oI^bciU`6{-+Tb(I-Yvh4q z<{bV~=hd!yoU3`ereJFFu-9$SZq|1&1Z;Uvi~GVxHcCiaLHe&`NrjfSV=0(yqaZ!1 zG?!Ah+hJX6@rD&@RLPFXQGkM5Z-NMsG*)@-Y66i19m|V6s`%A_aW#RV@82V(oAUl9 zMT7bqIm~hDjZzF}e%%vs3rDJX+7c*3RNmpwz{@n6h&F$RD!Ti86lSFdokHO^56{cj zp~D{ht}_*GuwnRlrbsVAaf6xj!pyRdm7I)HRqqQy{DmH8&Gqt4MGBeQ!pf)z9QLZ{ z+CKux7lf}OsnJJ7Or{c8Cap`7_-7te!$iDiICp?57YN$rW1v*v>7-@$MraTpu~7_8#IYI!vB6@ICsaJK_$^IXbu{-dD&| zW1X$->NJ1Q+v9R~P=6{A_3{Y`QcY36y+_rUbM;nOZH#W5x9D=e_c{!8@d1uv>ywZU zRj32RgtDVpQljU1F(K_g3S2~IB8gc-ZS$&FUlH`_q!tf8`|f+DDpe>O1Mj`2{KKsa z+ji&mQV9*&x08y^BKL(8O8f~qQz5lCFlVM5IyUvV|xzpYWC8-Zm!ex;WL51sZ#;Q9?}`*)A1%$P>1Wj zTL!mXbFyZua|DF6Ivin5>^@#0ZE#!M)f$CM=U7AUb`!_TM z6p+hH8dC;zXvjp)z#bLIY#^&;D8(nPHBbtSIQ3Fv;VU1E! zu%Vv=rUL?SGZav$M*F3{1IYt}GZ82PXLFr6P(Lr-7bg3epgsFo-p(lC5tYXWVI4fr z)lRFL*8`1c|I3PlhWZ**N9^D}ilM}D~w^1b=Ot{uWr_ykY`a-zO-6Ij_f6+ z)TUWsNK%tss^@j_A#2o#lBNq1|3P+OeZVQ3QFPG76=0j<&YYZEB2zrc$Rr?&KHU;q zg6{`Yq;~0K>dur*gNv5u>k?)6dLkVgWP5hO=*Yj=3L*Y$6U~c!3oh|B$tk#@nq;4| z=2qt$QOtOAleLsBeU59Kr~d*zrfQRr3q|kTKZ8K#n%_s5{;q#+XmdOl&Kz>OtlAsJ zTe!r>DXvi3G<=aF3v_qEGM*x@=aN`{eD3}Tw#>GL+n~E~a(|=8lRVHo@*Z=YX;5>Q zk+ARZ+-j|rrZ@kj$%KT`{cKM$2nP7^1R;Iym(u^V;?!I zZX#kWy5qC3t8YTqwBl?p{zt4=iVU)sbY)O8d3|Z<2DpBs%iur_< zaqZ#t>^MsdDKh3;cc2VhTZR(+Ke*v(2d2#0R$N71Zo=H&6V_rGyCpxbqh1eGVuL35 zBbwE;`MZwd<_8BV8R2}^%0p9Kf{GF3vT)9&)px7^<56U^%kBnCj!#cD73Op7M`&x> z#9SAwJB!*;Ll$mEuKa072o^eT{QydAaQs)0I)e@O3<$`#v&j=QOxsvNG`JXZKd~h2 zTVp_E2e`VLJbhYx475VQOWURru)*m&SV7P;7B;wg+of(Uci8={ChGuq-q?nG1BFYd z>5r0@P4QAy!#EqGa3;SiO>Zi+Iom6U*}ERBex=}c*8_)NHOHI*eGV`OTo<&+D(f*E^Z(37I*HN_#ca zD4&GjUPg;i)v9r=)m-zO1z50NY0x|gmndV7C|pMYkH>T>bW}7GDOUmR|CRTdAWE_Q zjEO)x#LbNB6d6q>XU*sOdtG&?P7kM^FK)J}*75@}vwz@5mCiNx4p@C^n2y(gYwMEk zu&7#_z>pixKs1PeZ{9T%~ds<(jd+1||W@=h*O%C-iP zlWJphQ=~h0l<5!u=V~BQn3c+b#CTwr`(rU!&sonnQz$7!ZY#xMawjh$;lZ$OpapS) zSnJgzFzSQ$x06gncyeAHkIwc>qANYA`d(|_=8_*Mm9Sr$82t2L7|e2u+-gLhz=%ct zcvXW3VI604kWPalP3=f1&?|3tA}{}|I0>4_dBj{0NX_z*Vv~UGAT~L$4aKMm$@Dtz zcsXBh>3R)GYKcF(R(L$SGod_Kn=)Cw$hvs4AyIdiAZq+qc}_349gQ7$(RmVxtA!qv z=7B;q%~3iAvEW=2|FBD!_|~tMn=}9O^d(Xp(1#TY(e)ZnFi;#r2wzZyPSGoO)pp+5 zd<^Qv2tOW|{v9-}z-X9#j~LBX=GW%vcihP>&D4`K`T{5vngUUD=M~_DRibT6O#X_U z#tg?0&?8*=4{daFrHfAMoL=CaKUNRL_lj^($kj|)1Qny3ZpMoW`qCWgj-30hWQN>h%O}gY(OewT&KCAkAO{hql?m z+a%XWQ%M=o6S8)MaC5}l`8X-gH}3}N#iQ6r%Z%Qo0qNoj-eT1t!#Hr0v2UaCK(3`9 zr?Z=?H*rRD4`ZhtS#nhpet&m^7p%~^baD&?q&kTws~)O=1WB6yE3lCt67RFw$Lm045DJiA>R=3kq|)|#7_fP(8= z1JJLSQ0Cj_Z-QVs$bUK;0GIVI_!YIQi2?ry{;Kd&zsI!eU`p5GUChL=Muzrl(7%@b zaHmlobi%BK;cNt{B>fmC5R2A5LAUQ>8nYRF20+>bp8?K7xf3ry7@wy6lC#}^EtHl@ z%0p&FLP2M13@PrK1P;(9Wwuo5Ql1ZsEH_dfF49$&nRG|mfN?9&^YNfi#S4#MXnAXrOZ{%A3r6h0pqBoX)sDVkZx`ggJwu0x7ZuUX9f9Lxs= zKmYc$p5RAdM~xsUaehOJHqW>9)eb(?xovn51ot7ur$r8uEAI6!Y1=`y@om>?MuGC5 zILI*oXe8%B3J#oQSxNEc5mI=biWCQpng9H=MYIe6<%5Ap3}^fcC}V?7P`B}~?+|Ot zKt_1ffs}W?eUSDrM6$s2g{nXf3d*hcFhDvf(m+bZOa(g8aez!CDPbEGXWM-R1ie*2 zk~Hr+=FI;lh%y1YK|X%#N#w`F^d{WUJGo?b#jVS2*vp_pYJc!lI`ut?=gB(@)!REw zJ#TQCvmbx6;oKJ8u#cV8L%2cQ>@e9BJf`Vj>ud6N&c2Wp__ndy)l40chU=q?S(N$g zrKA39-MP(gYn6|<`9g_=nS)F;ijtK1RenJE5p1^Xl|e*BBwBRX)jgKpo>31Yk|$%} z)52CCd1UC~XW9h)>t59$?|nzMp73<_H-Y{wR@;&?Y)Sp!N;7;^W7|{~ep;-?N>p@+ zN}tY8IreXlWy0HeGQRQ#8BJGs8cdtwh)Zr#>%~UfbJjQ-Qvy#nmb+{2v)5 z+MhwCFRnJ$BhFHMn2XgHte1bfMPeCuuYK!}kht{kr7soDpzz~@Scvb6#{X2rKtHph zy$8D5L9E5@*^gqFetmZD@V^(vP;)9H(WOT8iWEq+Zz-MJak2>Iu9TP7pjq8|it96h z*%f{Ys(0z)$?{rvbGlA3!ECBelnZ&-&hww*mjAB!=~Br2|5Fj8X&dCMy(UmS_YHIc z%y-XT`2X<{^UN@{Vn#N>l9lA*rcCRS(SSBp-?B+HA_G*_!=| z{Cyqi;YDSxO!)Kvt|B@tD!LJu84QJEbIBSt&y)GeoC5yMI&@iefAE?Q+IRo^VsT5w zQmTq+y7o-#-1;*tihsB?MA z3xnFt=O#+^*pw?UeTM7s4lmv(sI%^t@Aq6Naz8(xSZ( zWHo+|@3~Wv`LOF9qP>ou z3VcGKD8DQyT@XxHp5wT8eeW9eHsfQkV7dZnL7{q!%gGsD#59jPP*9lpu`SUD?R=@ z=+-e)cfZL>fH$)wi5O87{MF@z*O5F6&)m}?=o*dD4~*F6)^#V(j7d&1)5S=GMcS^x zO#DuQ=pz2j0Y;QdS3bk^`z49Dpi4Eq@gVtGVi9|xKA}+yu2IJDB>AYk8FS-GRdAy8 zM3x13gqoU^Y!9KN?=1iH8h5|k1ejv>tH2pFHg9`U+oMhXO{jLWWs{eEb!_VelMBP@ zCRB7nee~cpMG@8VCyl?L{F}VIt_~BvC)T!V8>_-J+EY0sp zWj^=^#>e7ud0(JJF?1jc{S%qv_1qDO_QzFcpc%{(fQ=D?Q z{>AMK=&I8{?Hj@ifCss%5i4r>mFH<~yxYgnxu`0d+AL#0V18y2Xs?blW$OR==|=;h z2g)^uk`n@bG4aAj$#7M-bWC$o*oDr7H7}Sf7)#gwLEyzgFaAk-kzRTDzBxxvWivv|Wy;o{bN)e42M!Ip zHobk-W2px1RNfI|P&Y-xlv9rA!-;_7mHt(oQ)b7Q0?!hT zfY&;?9fE0M>YtQ{!%rRKb1+}x0m<^1VvAX`bJb4tl@e9 z5Vb9(`KR~@6;WNVh{`LN-mYP8v;R*Daa9B&feST<%X2d6f$BaC+e9SHSoQ7!$~r5p8rh0Z<{*5^cdijv#ub{MNSim zmdu`5*Hz#Lg*z}CP-|t(--ZY+kNL`!s34CIC)FWdT1vVkVjxuJ{FYQ2h<6Pwh%dh& z2kIIDXPmu^UyMn_usc^cx*W&hTFa}0b)du}Nqh<^z8z;{`Ur!h9=}oB7jBXTl*lx< z+2m%L>+n@)VPpPw^^f{KgX>_iAQJJk>4A;?ywZKjV|>d-7SMd+M7j;|f=qKq5ui8W zSJrPensQxZ$2Rqko2Uvvg)kC~`RBdbE;32kHXM|~`6mtzE*6hZF$*{+F8yXwq^Bvb zz(<~wJ`QA`8(g;zd|$?o%Pqr2LSACS6;(l=V9iCY5(eAXO9QQiz#OV zSRp()38yTI}{Y92>)lDc0&Y$>=u&%p0ARCEh&pJYI*(w1hedV03Gy`<5`+4xfMUMt&*{N7T z2&Q8$?!qNoSS=v0dr?Qg$&$rW3T_$#ZDq|*Nd_y5NyL6-OO~WFO-EMUs&Or`4z?N4 zJGn;??nsb3Oa83;ROnBjY+p#q1{Z`Kuh z=2QPXbnfgLnP-Heq1+r3xHS}imPEOb)5)LoCBOWfRfq$$GpS2yo3ccrTZjAq=;^z{ zjyyIL#KvAIuyT&i=LAXa_>Vvu9tWBL$(&Sx%CfL_fj8g^Joz*_3eXc%A z*5|u&6Ks;vB(m2Z0cN#S@@jo15_9Ps;1#oAPOmmO*k6EWfs*d}#m4^X_%`LErjMCU zR2=N=J|u$Z^Lt6L(aU)0Hb$_56~A%2xFXR*ut}hYW=ybqp1WD0&E|DmnbpCGqkx|h zB+I2wZQD0xuTGh-|EnwvI+K<+^tEth0Ii5Is4p8&GLomJSmX3Ont-K9Yo@1(VIi4f z{_`TbeV^Obe-1SRcc)xGG1=>Aq!)bkQ==A5zc7*qCIYrfn%-MP9-V3`@YTX2kmBMW z8>_#Hc;!pm-lh)$80aMi-1h@2N}C{SKd`YoH-q{$9v#&8j`ca9-Mu8l=3Z6hzfd+1 z@x&s4M}T)WR*5PQ9z_r}_W}6?rv%BuLYYs&3|1A-q&)*t{+dL?-Oq~l{13|Xx|I#U zS`p97OGo~AUMq|_<8F99CL)--$rH>3Yc8lA;K8t2@R|C7TDfbaa=-qo`cFQB;tFW< z)aPrh+*Eu-G87Q;67gM+*}+Yy9WhJ;zR>3GO7e0BW0ej0CHsCJRQNjK^~_r9g-XQf zBbwQ2+gOpR?Ce>$>|G~#bd2Oc$>4EFmMdkAG6p0%yu=J{3?9 zE-&~%P$euhg|Yky|28{hUYsXMf=j5+w6CUzGRX2~`-1CQro1aYXm)2lMHIwd)1WK^h7Mpevx4U zW{SO#iR6T`JNJD4{24Syusd^9jRgt6g^W_lIS842aiOv)`+K5KfH=7ybKz4J60EU6 zefeWdpO;-btV$aAs#_mL8kWM3BnWjuj#LyQ#2;<6@A;B#sb)TWN+G>cqd-)^2F`JpR}CR2|n zKQihm;B)H5n#+JkLx@q;0I!bl0RwOY64&x8AFZb8LTr595auLNYc?W_97JUHZ2HrGPj#m9YZ6ijB>%ugYMjy*(^Jv`I%DseF;Be0ZwI^Z6YVb*`ELOW-W{v5W}eqo(4yxm{fL&e8PC*sfeB1IB#vCDWNt7uJAh7=tB|@4E@q zJdxH8`Epz?7D)7EfX~7?nsS}Kp=oPg$Xdf*_3MoX`$N{K{}d#I)N~@D{e10&GyQh} zBr!}T)vbu`vWQWze}poV={J?V1%i>0m3DErQ^rkbu@ z6$MnPC`AxdRFqznA`nE11qB;TAOR6j=}kHT>C!~0(o{rLkWi#aNa!Ff4b=&?qzh~unF!2%EnyWDQ4-xg(Lm}_1<+=j&e++e zhb2tg_SBXsRF=xfp7Wq`H`!_s2xFGeBj6b<#ouD`5tndI&XX>FV3*GsHLTkUF^Mui z$`p~bLz%y&t^Hm^9r}^k4kT|NW>1wzi>UZXg#!Dm5;s%yn;E4&Rc2Bn`_}2=#0yxRwTl^Iz#)wmk&Byi>4}Zb;wtJ^Kt2hXnLW7;*!OcRld9N|yLQS+ zK(IphY;Un7(3PNO#xIOGP;*~*y4rf#USp1YA=0GPW9;#1pl)vg2kUVT0}K%>lfGp> zpOGg}54s~dv@2T(`U zpvb@+WhZgk^JhBA22|w&gK~cG9(=8#oO9a=`OFE3l$GE{K;++?bYZ~LKW(5)r>JJq zQjFY%w^+1o5uBD-xJcYwYY4sLc~yPHso*pFrO=A?ScqEJXmalp*Jw` zDoj&h8oeYAT5{q5^Z7oBsTyh|Sp!Iy={x^-7qC1) zk@sv3AOQMsDDx=tYJ{G+(bh92zWOlAPZrUS;k@A&QR2zG_lv``ACs)7U{@}kJ!@(7 z2g^gbaFAE+esLnuC8o#u0|oERHMS~dcKOABI;sm~!OkO(H&dLc$8;m>L3NEqVIUJP ze(W3lG5E_tCsplodS-f|hj{N*#L!unE0t&IbFAWX8bB$g zja}h?lHdW>B_(-JgdUFjw%n=E_S5q)R%Rtj$4o4pCy8Z({Po;fk3-f;$}c69eg4jT zJHziNy^cG_1NwSy!#KOGS}KrZ_9wD6Hr`EH@d@A(-XYf6cUL9!szGO+j$kE+I;Z@j zmQD-4_u;{6072~9>b9mU*#IcZwjO3}@k&B7pcQn(th~nxK?Hge4o+WyJ&^fGR1|sc!J z>`HI?Ae&e5u_R5;pr3$34>S4FwMCh)kEbN@VgFPE8yD4FX;f3W?hV$|dGxw%yfLZ; z%xYJ^#m)V&WUdX=ZT!@nl6J8Si-}&2SrU(rZs+C8lM?9R0(H-b{aUnsgDNpfsh97M7ri!st?tPJH!*k!m9!chzIrJ|naBL*R@?J+)8|2`7Z*AN83}nr?YP&m`tk zmbmHT+$J+*q8NkNLZJ2#q?_oA2>PO=lML2~+H52}8oVw6FSC#x77AXf3SNrzpiPL; zC)P=u9a~Fa)n*|{i?N}F_R6zN)Z8BI`;oe28a!iaVguhzgZnLP*PtHf73CFEby7AO*JG04i!5w<|`;#MiN<_p=(qIk0uibn)3mr)>FBb5a1Ers=r{S%51r0E~w;4lgp?oV}L(D4jP%T5q} zN2VzlOv*wZXHcI+P+uQsUjM^f@}K;XT4ZOfH0RN^=egU@3rk?62w1gO00}G;cHJ^@ zt?;5GGKgBT@fQSTA=DXJ~~Qxfm+W^tyh#@ zn2LmKJ=F4Eh-`>hAJ^ZN_o|nGuS*~r!GSe_zx(3Jbu$*InZ6Y*S^?wdKn4cr{WM`(Kq1**+vnrY%%>*8swm*c_-6cnM>D+^x4(F~N2Bd7;!79BdIfUJumQy~sQZEXIv8yG=`9RPW zGOYu7y0C5Ux@{lpO8e_-6-i?GEz4>A`?6x75q$S zo5{LqF7rc};O#z67K}QwNnLt$Y)W&?_POt?L`rp{MFj0Ng7i9n#3y*pM|NydX6(tr zcBmLB)IgkAxcF4YA{mn)hCCJin( z3f6Sdjm_w6RQ9kDIEkBKBP0+e@cg{A-#$OoZYFxSFTU*Fg${iocZv^V6)%HYszo^p zrT#hx@jkeCXVg7U<{2NU2tzJ19>h?P7>i%Msjiet*IKW<_~(wpS(R<^w!|y2jVqlc zBH{@rRz=hk&a}O?1*gKK+j5Eo;Qo;ao|KRsRlT8V>NbU&0Bb?Mh0j>B^l0{I<5VRS z_IV_D6i>F+bgFS=^)FDema;aoD4Y9n^Lz`G%@a7DAolr(6@>n|5J-9nY*P>R(<=}0 zC9yYZp>sM7Wdt+AGa_9{k;Y;Mc!wfi^iBXef5Za4Wg*vg>}yc20-Ipi3w)dWkvQ|i zoo1|fN*%sdWXjF9%XZn8BE4NRb|Wfrs~a=9GKs zu)Y7Rin1r)n1jz_ZM{$b(PDDi6I>AA~S?ViV~@8yz1N;cX}&KK5S#|<{# zJ%!s0QCHDjJE*9_;wFB`1$X?d@RjOY`&BlXYSKW3ItIuH0K2MBKB<DSRX0 zH`Q~OD>f;YpyX&y3cr?H%t_ zm2Imbo;>5sol6wTEj?loux~|2ePX*5n2^rxyi?DOaL@3#%X+mEkTw=oh=>FZG9Lzc zwxxd{LBb`=pvFBe^NT9I+7sdibqoHCanpH(z)a893_H*};k$B$kS4#^7As2lZD)Mt zq7rViaTiMApq&Yd41Oit!Rd~H(>Ky+M^s$GKf3l4s&8>t4LhKK%CQ@8m`CHX2DXL9 zZti;+G>fKV8NZzPTsRiDGF-`DTpzcf2`Eaci*xSp_+Z@El#&Vzk^&f|&-p9p0?#Iy z!_{K^iwtgzLBqqnZF}bed*+H7zhKN_Yp1#w{=lRs0C&&j!qk_` zJ$okW_Q{oqK`;i%JiiEJDjFJ*46y+kMv~PMg}AO?;*B zS|t$&777`twARG2#SMX2m9Z`d`_=0@zfs+LV?iaF;oc;t!stctX-#RVFce-+w33X* zl2gjY^@jw@L(rF`mi9Ngi%WPn$9s9`Ch9gW=>dkN&eR z$yaCz{$WTX&N(1%xvBv^-DE3Csz(Wck#TCU5Yn^ceL#9M?DOf#KA0L;cc; z94Z~}-QUwgggXX*3ucw|<<6VN4Rq;M_Y&se_3>FTMp}nvmC150n+~&(S-DwX+$s{R zn)V57^O0`yD1SaVH|yT&*Xjd?TWLx|1+3VzRW6CNrZ>fxrtOAj24pz8+h#GCzJ;CH zEVdc*IK3uS?Xb$M_w3`ptX;RN@0?apf5hN@*UB_ zaL;#Y-nRyES$D2}juLimP&*wJ8i|$Sh>b!P$8&Wb#EcII(9DADPKC*L#oz;GM$fc5 z>{Fj6)tQbvpUjOpJWP_-{Tl;=C7MlnPPkAz6EUxRMoH3UacBxjdaYox9}X~x=@$)= ze3@}hsM6O=jE zl^xT2nWgWHChn$|Uy__lvbf{D|I$r%+#&uFPH`eXxmUu4SM({p2|p!VruEcWR&q!~ z86_ePa2C&1#VpX zJ7j3tIVZBJU4jMqly|12<7{GO(SpxNa?195@ZoAlM@^V;n}qGbHW{0TCOs|5DV~?@ zW&-QuH^e;;%=u0_WEI=ZAPvmb#Q2mQ_svFD|9JSfE2XYX(F7wSMN6f}3ftKkezUW) zTuHgIanh7%s%D_fzAhZz$d!`fd5w#O6JaWN^&H~f zREo2ov3S^NpM5}tj&X3+TZLDDL@qiR2D18(7iU?u&As{@kFjpD(zYdfwpqnvPWcF& z(DM1t)iP)JQ5oWMB)(;?xDfm^TM`ngM$9Pr8-r;PMf@rxX2jJ5dy+puPVvR_^VUT9DwdOCos*dt}_OD%J&-=Kle-&F__Gbqtehxq7RfcQK(A1s?* zGxfrh=(%%Q;Fsr==M8KYNxHbkQJrXh@j7^<%+R?(yFmVl%+@&HqGO;PEN2(G%rNJi=Ja<42JEu`PJ1FQ%v!p;*&j(48pM)^g_5zoDVP@1t-6H$QCc zpC+(sx8T8|Jm@h19;g_m<=ZPJ8_eKNeqgF6M{d%(N}!!>>z07b zjqt-aIJNxU8DaQZ5*6W3>R6c4BKR5PZ7>4I5_6b66Oqa#p2t^I&rPBIiQ2iOE2f5G zfmUL~1$;FrPm4UY@fUz4wiBk%HH*$nZP-F(A}ZKEB|}J`@M2 zT3zd@+%i%dc3?x@u8y1;OrE@gUw_BXU{q13TME6YM&^IgeleO<7(a^{-+Rc6jbS5QaWb0%G8iCnGJbL#Nbz(YJ&@kb{OeZ7RW_3t+_?U6)Z1y2>hGr)s*!e zvZ#cAO7t0<=i_(X*-seQ((4aKN-bK)WG?tD#4u8dVeZmp$E@Nz-0F-Xt0 z$E*>?cGR$l-w*;7JMgN&3TDN;eE-$NRtl8xPyld`7MdRWgH)K8%g9f&L^h(SaV-qe zEOwrp`%APsxC=G`3u>L@#IJCka zryOHVcd@x;)r;DDnQV*nqe zYE0{rLvHr1kx$?^6PX>vpo6;*WfLiyGlegFgC+>HOF#{Y^c>_B3XL7H7+W4<4(JLN zuIYDL)4Ohz4Y@OUm)-z z|HK`T0h9ohYD|cx0F>V_-`l_@F`S5#Pzt?3niJ8;9EfMW18d(WwqD$7D$D|#(J*(7 z$~yAzyNEdR{e|xeCAaJ{@`%H$=s1vV0mNYo&vPO?ZZmHMOe*=G_qcx_RY8Dv^sPx| z9X_kL*Xl_qBK0z|;}T#h=7fIRdqi}*0?h`48WPl^r&-8 zQW1*(Pfu?9E|<3PGv>u;c`aIuIR=%i*S)DfcNCyZE!dz@tjuc7Tuw^>vMoRwGUo|# zAjYCev2&=jrqYv8IJp4s?~V`Jkr`VV9djn05nDeK>q?(>g(BzmHI&U$8uiWC9M8yC9CJ>AWoG+i;X$i=3zRi!=bE4S1$?>gV`*9{M3W2U=nDWMc^8Ro6N6L95tsnk zVyoIp-vQI0ffku6Tp5+YpurjVnL`Xa_pgyyfGOaifjd5MAMCz0P^MZDi{9Cxk4<8z z8CvBeW-uTJUCf8RAU=`XeiEAqh8;?65TiEuef8T*08RyfC|m$QBCeGrW@zr2_MC|r zJts2Jidf+AilI=E^d?Vir!6+pbnHb#x2D$aJ1xJ}GBp6Op;4q_0=c*aAVA#G!GxV) z0?yk9JU6MZi|O0SB7UG|9Z|D5JHcWlcFYwVo@@bkBpT6wYnf}%7K~ujR#$4PUvw-} zF0nO{*j!SKV=TDj7%PBU`^|hHmS2u|49gF28Z19!8xOTWP8&cia9;ftEg&#gTmE>H zd21nu!>jf&zZGooN{bdXis80{^GEhzA%On`&uL7zcmOeuD*y9SP39{V851(?S z#<#$BrQmi%EPWVefdsJ8*klv{sCslPxoVY>sNb2$-(`0JvH=;#*(pJ09zyJhKJ*^E zITF`dhnqyu2hhKKyRm`YElk`no3hFZ)Qv}q-{10_3K0ZzF6p>@Zk5AzTL6%mG@{06 zmHIZR1U9uW80{GD}HkZC2+$8q;GE;kW09CS_jrIS5eM}95uW?`CL?91LpTZMQ zrNy3AhWhnIx110lJB7elfxI-Wk@a9+&cp|D-p=lG*+C2w&8DS9+F(TsKx!*ma_Rkm zIdSO9nNmO!t7<{$uB9lM3d0XMA+zzQ@ ze$F%%aH}k2F%R;B=#1ZPKAUQKX5GpdRZ3=Rp;!!qih=j31P#j5nR78CvH=jz%uacv z|Dq7aw{`tQXv&!;DZ~kSqa3xdg+a|_coHdS2Bov$u^^kjQCA?aI@CS_sKD;2>1-+c zF|h@b8jLzp7lztp7jRa2JRoxwM$U)*BBi?Srb>+J^%M#`Cona=1%0-n^QW3mPQPsK zj$dxori15u37esPZTCh|IT_0E6u`VQoaSh1CpvKT>Ru*ZY4#*^D0%h-fkB2bmV}{% zoVx^WVA9aZj*6;3U6G2Wv>7xtEe%_PUYA09to||!Y!C=+SeM~+q7s-ToE~50CEU6P zf+gb|mA5y|+AyKul)>%Hu>!qWL8zyUb~Q8EqVA(P)d`vvfUg5Z*}~M%zzvfI(}7td z8^eGYV67Y|;qyUbbHkG(sFhZEkg~w?yKWf5ftbK|viV)t{s_k>0z)=b7zW@NOz_5v zNgwGUd~tYk21Z}z3;-qxTMSEtSx_hCpcd2tw7)YEpi&h}bGv-cKLBjJJW6M5@rR3- z*74`SgYZw8xWgm#bToGHqib1@0+K$Apbbx+hpnGaL!7tIt6YD9*nZK8Rkmtf0*R@d z9u?@^MZ62`qgdM-`ME*Ul9&uMkcgWDE$wQ-w;v~-qyt`GHa-nsa<#GmW?6-Ns*tG} zGzyfl6+^3OVeA4=1?SDIGy=IgwQe8%j3r4+F?SXYL`klv z)%Gg^q_t2ETE>$Tf!k-G>f-j@?bMC{V-jYGTm`9*{2&Q}A_t-i?BYk?5MXAl;H&Eg zah5Ip#kthqxq+)5tSUaE{!2fwh*NaxyJANbW&yv*5y6LGOH!x<@n?aiGjR&IznzXD z%p4aKy0cHEY5iLYngX%}>%xW6!uJ;~maBt((|r8WX09~urO#ll7~$~h99u#OE!7ch zb^dW%2EQ(ohULNpjBTXmUbJ1Fd0!f&!!<=>u1v zH%4m+qy&PQ>>b}HObG}w!QY>`o`m&8GP4Df*&rpv&md~i+rZJEiw~tOudP4gJu0;- zX1b=EkAZNtC(hWq_oPCpOj&xSz8GG_8uUJvoA=jbN;L;=TnIZYA9iHHRvp8>Ntq9xjt3QPUf-785xj@W2GrG7uSd!>Egy6?96Ga%yOq$MLVn2uclCXC5 z#iK~qk_Y)`ljbCC53o*bVPkc3M&dHQ1*?rGYRNun6EkhzR{cyeXl$wYEa+&rjGW@lVz4C@{kz{y8c&b#5*|}LoM<6jUT7qti3-1D$Kpu;cr+JpHbL1Rha59)azY zL*vv67XG~B=N-V@p2S7J9yDiF6BQ}$I$pQ%CG)MmZ+?Qk>*Fj@T$69Qd)L+jxQ zsnO^!?`+Na;|)oPoDO#fInKZtp5^3-aMLroXB*$Hs79F^htSQwJ;*OI%)VNx{bk_0 zv1eE?h)6{<6>K9Zv80Z;PvdXnHwVm;Li}Gd{N$8;QycXDz&~#_A4P0Z1a-sXQJDyX zL@l~+?c(>s%J)|c>OehM*LN9?^~b(;*SN2HyS+rfGD3~{pJ|=$@nB@sLfnN>FLJGe@_tIeCJcjY3(|U{ZGiO2xOPkq=|R1nA~|HJzH)A& zB1A{>FhYxA*({4Jx%rt>U{yFvRiR0x9aR|E6Th~|(zs-7dLl-nJkxoq)vhbpx!=CC z8Y1HVH5{wUaSVEh$wq?Nt)1ECI$n#s_ZO)BBC(XJuReU(lim>FUuU0q3Lo(Go?f;Ys zZO4mazy!rUKEibv@$C%qY}qO*pq0bab;BTF84c$ZR+hG99NG@CGg(SdKVBNdrQZK~ zVATd6TvcoFB)vLQ+1bzXdXg27U=%H}urCBHOYj z*aWQaYJ7gZ_ImtIotS0mEBXa&yLHhAw`+`aA!v$}=}YHhCeN)a6@rFIcy99c%F-}r z(di_Co4zNmpF6^i*gRzRBa~4dhH9jNrMP6f;7^B?Q>E-bwY@!Wkk#E(R*#HY@ae_A z8jmwIx$uF|urF==+T&E=3tWYwEr5ZwzMK45T(mljI3)bwmT%BT_t~Ui4_+y&t5k{{ zx@%2X+4((jx8JDB%2#iCtCBdS6pk$sLUbh$#9^ENhdTfh#to1(O`m6xBJc8~-fwij z6UD~^H5Ub|xDB{jKWeuS!zj~dkU4PrZcj}27-t|JU?78v;`QvR;W+E7+>>(8-FS{) zJ_daV^ZoJQyzOCbSBFc9QnGSS)K`L|vn}%Tf_Aq=O)Br`<6KcgBABQ%I*JM1>taOw zdD}Mty1K&K@cZ}Jr;HVWg#MqpF~@q6x|dI+w%b+mh?h1oJABc3_@_IUnZ5ywH@~-l zl8O0~H~Vi$KRj`;V82qFsy#K>Hzb}bcurTji^R8==Md7Bp zk5}&+hMQ&$QiIul0S%9aH$`%e5`Kr2W}TfdW>1Pfja0OVKF|NnQ85g4%+M|sR#2=x zy`dxria@6W>McIC>7S*|9^-Ce*N^N}g?co~+Fq?yy(B zMrDDT*JV~n&-=gBsBVf00=wBl3{qbd8*6IFCo+#4hPsxco>|zDP#WYtQ6-0e20G z`89ULa#YiDp6><2tof_t23lCUjBZ%$kfbWxpo;RmQv?{`llP{F@=VbJ5j-*MjZ#voC3wqwq|LP;2AyUlcEp`O>dO9!6TN?$mQO9B}*<-LRx9sT16UIWs${J7R_90pQEk( zpGTq<1sN>@O51SfzfJ}LpAH^I9E0`?})YHtAq z{gRV#y+;%Ax}>~Cx_^#P<28#{85XLBZj}d#pS@+vl0W77>yrJN0_p59n$vACoJs@&4`xE$+@HHjH=Vj$3K4>Ie~9i_R4Z!t#JRUQU-=rd z0$Y3T(bagZ(eVX4LW}mOxd&2v^YhBR=D$Bi$dvneuXV{~dQNNOaTr*TakH!{I(ntL zbHZdblFOpGZllL-!W&WdtWe>aUwQmYxExAVXVVHD+)1+fbv$OXJ63nt&tjPG{7v7K zwf8-1=b|q+0mItJdA+Gkj^$3?1=XVwten|>z7rLJyX&i-Z`eeAt)&2qL}n;l_L#nj zy_Tn-Yb(S4N@`9&BtD84f4Pre)>5$IxX9krpHhF>Sq;?kMCh8oNd(t679F79VW5x8 zH>irEV6^m#On$^^LWM^8sj}vKzHiJ;lSV0UWcxeb0nyaDC$8$t!Ez6nGM;}ZwBeR& z(Je7ca?W{h+*>Cv+o>Y2wTzFC%I&-F?Jqyg{1NYD^mj{#D6UhLZqMXv(;FUfBplmr*2;VIjbtoHakJ`Ui$0(a+pthwku<5mgWjPH5*srX>JJVL*@*hNc1njhFc$etRMOuN;^JF!+p?Z*+IqU>UDCM^|U;Q z4>lTnb>TjcdzSjYYR#Nq*jClTP^vGX1B^X&a~)8(bwo;{1pnfV|JGv7jve(uQ_TtJ z5O*p>KSk0QH}?Vd&Q_6+p8|@41S;&u`?Yqb-`zK8I6HCWO1Np`+Jq3Q!DHsN?yx7v zIoT&@n#sBKE{Cq$dg(OkMqD+Vo}s{(LTc$Wb|Xe7!jjI%mbM*5L>Efrc6A5~x$|0; z@+Fc~#?lppQ*Gs7(B=fyPtn=%;MTcu_hbJz2S`(d!_6h6r`k}?s+nQq&szPEnvGJN zlBsMWO7?M{gOeKEtK6N^LVXHN%BsKz)-h{cC@Sw6{;qAp19Xz>#8%gO-0hKm8J6p0 z3IVUU$|iQa^plp(!!SyZds6@$4fyL`9fQ}XuvxI6DnxV7H0`u^zK zv#xF>dD^q3(@gg>;RQNZ&HH#j3)FWaLV=h>?jTqlR zF}%9)`A1gOq2It82+@h~7oOAdc1x4B!Yx~g^!yq4GovjS)PuUKnvpTuv21J7X?<96 z)rab2jh2($Nu9~8`|#R2n6`zjC6Go{<yUXxl&ZUj z`tl7%DFX2GBL)BD+Sbby^7YDT|CHg_i%7xn3Q0{XD5cni8l-gaK>Q#^npbq>Ia|dj z^}Ll+vQagJ*b`DuOfBy>8>;_^u6U7>WIz9p%#_bks?Cj%y4m+=TzX45Q|20GL4 z2lU+%!k5krEfffps^D?!f-%SULOloHW4iB?GS(IyK8%0rFpAGIgb@ zQA)|~!@~kYS#H&%NTrYsg(C@`3bw2bDI4Wt2i@H#`CFmD;XIF|(=a*PTUfp2EYH`2 zp=@PO&06o6rqZiv+h;4M67-yUvSbpIy8aUgT>W_K-G!G^@hYwM?+^Ns7Yas~JKh;V zhGgD#a{LA-rS}g!j0@bBHY=(B@#`AZ*X&qqhDLBI;K?$EYC^v?t%!!N4@mpROb5N!!_w^(4Wc9r4Likt`C)wut&&6Co zDsH4@{NUhv@@3t^wQh#>i2`W}Y=r*ulMeI@(?z{F{;=6q5ZBWZhDp@UMBYeLn-#2D zmhh*td1u`v@(BG_$m@l$3KmqHb-t+`-7t zp@O^u=S(>mAsCq)0hPGfL-jgaRa~6hdm-&Dqlv z#*L0;jr-0v_l=juMC>;>(XX$C;l#fG7-trFJCyI(fOnJ0%aCzCxCc-lI6;vm79*cM43p>@O26z9AUhj}vr8J&JM3`0zTHop# zw=U|*Rh6aSua=e=#B=HIOhp?%w3N*G<+Y0f?NJ0SH(KJ#+< z{x3iA;OrP1m9Hs6#YiRIhiP&!R14)@s3wO;jF@E%dTnU!1 zVr@j-ld!z-=WFtd((}>zwwMRT(wTS}R1j2&L)wryph;++Xmg#Ix{aC7wPgzI`g-;!jst^WtBbN&P}i zI%!r@;K80vlY9_`J zDr{VI(%U!b5r}@m(s(A!a*ATi;bOzjvum^O9h-!jQfxbm!6N_mhYRhF6DRhEEX|Jj zR~xSKHD2dg%|qjfVG*Ptv$3}+V)k9>NqM2SYxIEAfBl>sAjQ!5#!ST+8grRCkihT|1 zp7}iM&5WLn+0Q(PK=MQ19R|d)Cn1>+eEe}Y=p(+se=oYup18$x{b^`8cJnyY6czL~ z+tTLqiNb!ZN4|Gwm?BVk^AuCPpST|Vh$*zDXToplGcTm9oumgzvK>aelF(*PI>c1o z0PjP9TRtCSemHVQ_xs-cfHv`ezvW~=Qb5-l-klC_2Y49s;ullUi3gx>OeQ^R{GT4b z<^A{DNO*1sXmtGF9s=AGL?BWC8~GN_1VI5gl1y?W!+Cy3GcO#2HldA}onAY~ex__Z z7tZVw@L3JPGj5*$|E2=!OWWE(e@fk)q#2?1nz{7qKkdftwL5WzN_oV!hobSE1rhvS zW6-_ESPg^6xa5XLWq-X)w+dWY`*~F0@2QhDbeHXEPl7%nn&*1-(?b&#PzqaKZeJ{l zFQ>e`(11C}PY+IPRV#|G^hTuVLLw+<9$az4{Y0Gd95AyeS*9{s;!YxUw#`!WuViJl z$|mIzclT;-izME|XWnOl2bg;mjYHR?rLXyF7PKA|Ub~jxUho6<>6@rv_M?rz>N;qH znG$lH?egcZYbUpZB%>H~smXBct@7JZ!MTecA1_&)N4`$B>0S5!Te%~$^bZ_59BCnr zWwZ{<<@xBcpV2MzaE^*vE?c^xPqxDpSbYr|`EwKf^y?m5T<3ur$!gVisG$4b;{qN& zjv$|UJap*u-j3H0i{nYL+Jif0EsZBo0^I^43Qlsd~n zf#mCKxvvgm5SjpfJI?KZ6@23C-;;paAI|Kd?%B0D7-NtCr#hn<895V)H~;PT^>Yv) z8twbxG+N@TSJzKQps!?JV-mDS74t^fB&G!X&&U7#Hxk2~5+HgfFc3zW|)zzRn=4&}GE9c2+Ctx2wUU44&Gj#IGi>FMD zTh1*8(cs?!%TPQjvCx3-7uezzC+CsM3Rg7kF9==bmT)!Y<;F#3z5H^}<;j0N)0Zd- zQR1zr_$efLu~2sP=kxxoczLHmc?S!0aNQLb+~80ebTGmm|*GqdxedTsZwXaHP=X?(iJHSs?+RiN*+%kJk@55dQwiqyZW#-KX>&M}Jk6 z4}5#rHs(9UDd4mC=Ub1ypqj_$@*fseJtvi&=ABer`f$y()jxz2(9KOdooqzQc*V&MYq8q44cA&?7&%r zvE>7WrkJt7jk^LqeV=cYR@=75xmSX7$FL_aKO1)fn;*Ul0-k8kSd}Z@3};?$uQqa2 z4r$FyJXA8lao|l#P-fC#%f+>;9+lsX3x~_>IxTT(p0mw;J0O-ELXjqqI*uW0$_EfI zya@ujvOLw6nAkD{A6K8WcI^EWVL-vd88x=M)mbd65<3Tl{mvLo$7?(h$4HqJxw$*^ zWCRzZQvxD(D`^^)?c!p{4Ep1K|P-Mb<9_PJPm0{#vD zE&jMLS^<4!ZG^JP!p7yLoWq-Q!bwA2L#JM8W@^1?KSmUDS~=%d&fT0|fbL!u#>isi z*X~ftFy%c;m~tbbdi{(KPW$W*3EQ;4In$$`v2K@Jrk@e^Q75W(@_}+_+2cYJ83!he zWo14a*idGa!8W$v#twZqQ@KZf2-w!Rx**>wYw@+7{bSn5*pehP;K%kMKZo6FWal^- zu7s4JF+U{5mu+3P*`&t9ZYLr9&{2M;``e7&IkAxvo3M%(106$)e}6a@n*8u^moUm% zb_&0rr>{0LT;aGvUPAaN2V;yNyUjykcJS6Xp`Pu@Sh>eriv!s|bfUOqVgDmOx>uQy zFOZyDhbE6Va!od4Vel1QnS&8>XlmaO4;#XtGJBMB41x6L5mybE;Kd}EHP78|VAGM_nWb5xs0 zxNR4(u@%l5rR+RB?G$|#0s~B6!&XSajb(%q0#VicxpE0J z;DLv7XS&w)#?GY6F{In(VgnA~#Kly~1Kkm4NYWdb*8+vajfj{sjH#K>(b-rB327KA zE=i8jMGCYtZhf`e@jdR)MoOk)>P3(P>yat#UX z!^1Ck+;@a{=%u_o6`^>+2~pu!DrhburQU}Lrj#c|{uBwbwV4`Za{64WA+(*(Pdst3UCEEF_>gb|x9+6h z^l+)WoNuacFu%;>X(v7j!#K{j*5E`@0-NAHWl=cQ-NAPJAm*z4cx&4hXdp-EX-NEj z`DBS4N7hT$Q;V}Wuvd1G4F)3*?Jshpy6zd4@KUd7p~STY zZ|V&7AQYPd8q+c)a(o*{`GVP4FWGcDXAqsUsTY-gY6oW6Tzh2L*@dg|toqjF#^qEa zqh$FoFymwQxw1(7yZwP2hEMGZHO^)Ui^!FN{nQq;ejVTNf^Pl+ zN7_u*9N(=5TNG!)3o>Q`i!is z9d0fq^c_;A8o`m?#NQ=_QBiKB?rvnS09E~xD`;gV}<)oiq&SL5+mS_j=OV6l5Kk`8!wPfXR_X{JF%D-qIRk z1;Y{{TMK>+mMvI;VRqIED9VVhQ~$;nWVv|tgNw}v8D&<~`dGePZ-J4RLoLYs+p@%b z!w33n{qxR0IkbKdK#4Ppf@63N`*is&;Xd0zqHFq5Ew2>+h8tuU-m z2w|h+n%|=Mq7myuTR=Rgb{(O=e{3JCC80wCIi}L{b-etx(f{G=yW^>T|Nbjw@09FNsjQHZ znaon8vN=dbISAPfWsi^w*+r6h%wrv7XGDl|IQCx0IXLDqe%I0G`~BYc{ktFcA9>_F zocFlq>-8M3%R0HA(50Wjr0LDBqgV?^)5s}k2c>+?=KWyiJ#O(;EaB?nbG)}e4Jlsg zqZn2!t95=g8Kg>kg3|;|IcB6*lXQ4eit|^Pl?<~p-XinCWa^-dk@LDmb}AFp*0L#l zA=OjHr-%u8qsgPWL7lyCYGlR`EralwanbIM;%d9##pg7`WKyKlRBpllGU|t}6OCcte z@^Nf3^lWGLb{@;bObc6-Lvt8Yw-~id9L#jwpgH!g`ALp$4bCo^3rqt=z4Tv|P7oM; zz3bOc&_CrYL+>n~vF1c7I{Mb1;%J+fPl?%h%FhE~pq^$2n0GG&k|?1{88t%zCX zN*v)?Ju5X%F|X*NRP5|utN8=Ay|}CJdOUQTilaW6wQWbCcumNwmmyoKUderYeC@yi zdh42m%$MRbowPmI5cwUnJ;beewhpd?)6m2?whkvq9NXRuI>4H|tyl-wNqg#vyLD^i zo&ES7)1ujLU7p2Jwp6;OO^UiM=I&IEDNP&*q8V8|L!7xOI%%CRmWl^Hd@WomK23bz zARf5zUN4a;u}NV%jqk}t?poB`bSNm_#Lmdkk=CwCohKW5)OA8 z4Fd6@QMMtrOpHsurnsYtTbay8;XdU+bfT2{Ji{IuLZ<~HtuO>UrG zAkeEtr>Nv$^tV(n8x)Eu=Bt41v@G7z0NvES02yD3tf3|!77Fk2541#9;@pvm#2G? zb-aOS>$=dooYl0)w%4rJ57L?jwyJU62ITt4+J3A-ng&!p{Sfxkb-CY1O!|{SnopYl zXZfAVI_-!a!P$uIGVrq-RsK;UeQZ9?MQgVuq&@=qh=P1$i9$AL_S~Ah5In3_v2gp> z24u@i?Z%c@*5QwMBgg?!MiGvzX&2nQjPGBE2DR`4ycA^;jpy)SNh4{-{$GVY}a=Mw`8p;r9+Z(p>@ z(zRL7QHgq}$5~{)Sr$BXT@relT5RTug6JetTPOIF!6K7-V;_Y^()lOevZ`x~UhQ*w z-b&)4iVC80*atdl@2)N`-nNcQ3mHLt-R7FJ@RrR{RNznb^)?Uh+o9Bo(M-}@%t6y! zSBUCQ`IfDlqURm7wnLC1^KBFUG$viWA*(?59eY=YAM)RFA{YL@QQW`6y5p<}ah8M` zfY^@j@-8-}eq~@FeaIf?mxi#kp4mhBCl#~A|0j+5X-;C{f*7CD=+~zg->+e8?;ehk z8ZJf=%BBk!tu^9SW8)_m`bM#H-4Yods$qm@n?yE@2I=zcA$vCMJ>PKt<$@ehC4z-K z_6dVLP6-1%35t`|&ic9Sb~W;J@YLm5>G?e^DeklE0&!$0f9p)K=U!M5)wF{tx6mPU zoA?$h>mrF%D49(6KPunzxL?{)aUXH-SGIB1`R6QrD3kJmXL*CNE?CR)?3dq1Typj7 zKR9FKRO)nQe{8y7^&Au5`}O!<`sk$CD@$k22_yR|fF<@s9SW$rY>c&yqn)k2hd$?Y zLWk^mvaBm-s+k5$F!>8yOneG#yFaOYwq`ebb?#i^3FO~I2fKwDuX#81i9bx}6+Oqt zr8_RPyE|87zVau6tl&v!w|=P>Mc&wAbQz_j?6_r|?3hEg0Xx*7V)dgwN_)avYqW1j z8!nIT&g<_PjVbcXVXr)#LTw^8mI)7P?BPsQ1FJRt+E+21OUh3 zuiE66D_B1v*jt(GLw;vE%Wk4Vsr&_ly*qBYSNgWMi*}Uq*5v%Sni5qk5`b5R5xo9a0ZKR*MR$xJGvDrl;vAb?)*bU@8N0esNl)eBU>E5JMa-KT zs<;rw#bD6q42|}^>ni!PLFc6=o7SaXJeVz`v>MN|gTf=E4ET3<-M z{gs0R2apG$rpnt45B#v=>{K{)NXB#K?-x4Du+Ym+tFgVeDZ@WRys@wKa8!}2?RzK( z-@46FlA9FuZJjne`Nf-M&UJN3xy<#4_EH&cU%#+C=L~Mv8u;Plzeu-@q1(>dUb!K= z&ClZ{ws$vd2FQpu5(=V>`(pYYv;pYtIYZ;Jmm~#7wElptoHEvwaAD(ZD#!EmEWuO_ zMQeaWMuE1feFz4~TI2FBD@W&yuHSkCeSEb3HMwp;PcCTeSXu7(xoV>-sD*S>*aq4p z1@CRt4_WmieR4y5-6x^8ywq$gKRe3M?D9?R+z?AA{n9sr_8_Ei1(PzC4&qsWBD!oA8zXW zxvshns3L$@O(d{ixt~-n{^A^;*D>DuRdgx&veMrN05E4C1GwR{V+a`ls?*<|g0=n) zZse>@)|#KE|NdHNElA!Feh1z}0E=Yk$KF4Fn$xF)QTB58BaMSKxsQ?7#YlKP<>Ih( zra9ty{}oa?oEd-I0H}Rth(>;4TG8RXV?dXS3Z8L$2ha}K>~z0~S1cc#UZnwg0sIlX z;CUvWrQLbD4w`|M61)ljP?MupRgI%L!;nl&RH``cgp3 zI>_3G&g#fpMzY%9Dr&qk6pR}8BmUy{#Vh+PW+i0AkAQ9r`_;HyB^g>-kINrtJh%ee z0q=ta|2(?wV}1W=;c-#^ulD26;j-t2=3DEXq8h@t$joQnU)IVx;H9m8CQZrGSWYRtRM^&#euio|Ckp_`hkHU5e3B-zEx%QM_Bl@keYYZ6uQZ(?ELayaQ`lJ$}y$5^vMjBB=kgz(8 zA-#09x-BMzp0iQ%QOE2vLIfTpJ@Xjz%634%lO|*O6drOFv`oFKuou{^V!JE-*m$Fe zHnrZHE)G*_k>}Nfjhb#7l8eM&*XxK@I^6kQjwg*eN$Ezx!`4GlJp?b@iOYYcMsj33 zbuO+rGLOr?23u9?mSo<#D=8s3>8S^-WZ z?WUJoxq$%(Iy)qoxfg@+qO@0VRft*7Q|-y@5cS#HK-*&Hw)CeM%o5DjkS)*rGj;{< zR|L$zhMXE5&ny4VQF%qs$TwW)?r$ry$+<)SyjMu~T@N39*+hppIwWEM+IX%!qVXa; z7)1hEf!6ot_Wk?miV81ArO?rDPWDT__){d&Ez_h(Tq#|U{@e5qwteO+pVf$MLYej^xV~kKA18(=bd>vrc8`8ye5I2{g}p z!SyY!tVH^}Tfj2C-0DP{vu`Rh6%z!&(@SF%1;*tiYtwCVy(DZl+)t1?QuQ2! z$8+N3HnJ;CLGHy6ZC=pDv2kz+S>Xd!d9ZLnc_5y(Ul(7=)vwxZA4TZh;$Td=UUXU! z_krr{6;)us&{_cU-%=n4uWI0tTX{G6=L6oja;ELALE0Jpd5hm0h##~lU<>JWtUquU z=-x=-bo-+V>QbhwTg`AWL)2tdh6KL4jtgZsE(7p((v)eT1Kh zC)lUVCeM9gasCth^GLaV!e-Jf$ZRm*<2ydWyf2$=OTTpuFSd>xSK4NmwhY4E^25yw z4!5_^yNsK8VX555BsD6JE+el%CZs>$uU|q&oZ+gHG%Qds%PM8lRBAMnTSt?XKrfvkkZr!T5VB-|mjnjJkR~={+(kkn46o zG9S=vw9f@vKDM3ig8^(jX4g%<`pDc*5SMCK^=60#UbmN93n2aOYhC6x*xyxK58JxZ zmBz_wFH9`a$7ekCU%M#e)dN+D_q0xzn<^@4taP4+VuyuWs(|FJ-hi=NJtZ0sa33^S zgBB0xcYLtr2|=jS7CylSY3cLn{$uKIFH8V$312mu1HRdN0k- z{?ZX{SllS?`4Lm(}D98N^S3qYfhixh~nSUwLAK4?QXWcu1`pEjE(*^A#8&j+kJUg zdb%9tdM>}Mbaqq%Eaq+jK`7dQR3h^OZs#Kpw#3basr`^{8Lg>vqL7gg1|CO1ucQn5 z3!5Nbjf6+)Dk?k#wi{s{CjF_xs-g2H&wdE0i7AEO5HbO6t|ABTX6i9g) zAZ%E4kzK;nGi&u(kIU3JaMhVmIlK-8QPR^Qpf|hY4VH)zr(d}C(#E4UE{_xpYfpvQ zXUEa%RXNc*A3MRdu981&7fYr-Uva#FUkSl(%i0eLZP$Vw4-xYjZ=AdNc=d>RGgM;? zd!%N?B1BvA8DjP0%*4&PgEwSDO>*QXBOBOh#i{+~Eib<3IdPg$eu0JMim&w*qzm_+ z_o2*8@S6vJ@;K`O9VU_*glfF36l8F;kC*oj=wWdmz9awGvXjDTq_Vd>dw+tDTt}k+ zK4}Ef9FM?KfO7x%P$pg=f!1?gGvI9wdgy*4?u)FwNgmCQxdcjMUArrWMhXA_*1Tmz)|NkGiM z1KUnb2yN!0P&$tDKC%5n*&8t`$swJfLgO;~T_I65fe76f+%`TFm-3Cvb6p>$d(9;x z!2Vi2{qBn2BJ#$h8xYQm_<=P63l@)XV`&wNXyCp$57AVcbgNMCDCrlK`$LtVIryhc zd!w+JUwaJIUAh0z{?-z+-1bV<=0p`?}9-aUCH$X^1h#vlKC3};v%Go zl8PB3LFvA#rOx$-)m(1mP^8)Dyv#|aB5nCKRn}?`+^X!NOr|q?4Ck#rtcu3yo2D41 zK?p6)s;#tj2aBzWygjNgH4k%PVcNjxb3e|uojlm!wyzrWG9-|k?oR3iSdE?0`jeX= z7An&cscJqLto7M;FMar2fJ7lU%ANKzPgBwXIv2B%zrvr_y$5A)>>i&a>-;4AtK@BS zJ&3DEoj+ZkIP(e2$l#wG0gZxgCdGTxBlC^MAt{PXWIIQ0b8>>mt3HgAhKHf=Cg6mqstX-OEc{sMt{)lM|XuN?JZ=QnOq3-n2j^2u4Xj1 zByM4`U3Cj>O;N{w^>yHe1}r?TYZU3)S)z9(fnhxQwsJ9eQn$i&h(m zcJ7(+vW_kzAMLJatrab!7S=93V8Ard%fHZBmZ|+11%uxU!B>x0Ay7x|l;&F_B+HZm_#Lg0K?ErP%}U z9g(}ad4(>e0ccc~>wLH4+_la#Vl{yWErw(C8W+3mnhjL-l}afl2gC8Uw~B{|ERZ!hqAG@N0rw7%Q;4sX1}doCDdwvBGruDPl0sbFo6!M#)9 zi%KRN;VzP9vL#_6kU`zbQ5e~`ZNn3Si#B|Ey9i2YubJ=~c#!`Oh7L&On0cJNQ0tHdDH#bjRZN1@n~ zP7#xGU8X02CJmmluNEejn3wo2ax312l5&w!ukYDKq%cso(`GX6FDcz&&Bb`3>v`Cp^~0b;c*>F!tQG>?&M5|A7vUo#d4 zXEX=p-E@^1&{c7*B~Af9Spxs?2nCL%QRo)3%YR2uE4ihwGe zPY%nS(zd2WLFSOv5hqyV{*k9V-2^D-(P2E}ArTs+tqHu3aW(e6p;^UZD1Lxu{cMnd z&C$**ix=+n7w+}WT5t5(r0tVr((>i@D0c)%h#Vt#By74~x(De20~{$dpU1cM{<2M( zT*GvDUP4}>-~DS2bMF#sgYqKxY$Fa}ffuK&G$!pF;XMFzWd#t?)F)K;} zyy9f&6BdZk()RnLwqcPP>@x=e$m(f>Bd#f1T-G^kGYx%7?G|Pr1PN(yY!AjS`9{P} zY&+KC+U@z;@|oo7K!2W9I!<&X>DC(U0R>rM=y#nm)rs@e+b34nV#KXGD%o%D!;P4`E%$xGmcsqRTCW?vyjpti6xV*y$vd=5mPUd$ z&x&pifMs%7_3=_B*dhT*4MsS?Bc@X*^S4fwQ$^bK|J3W1NV|yu+@d9xPVepI5q|xW6JO! z4N3^Dw{^QZ194e*9K`)fFWO(9Vj273e4$X67(erPGBq}9e(9U|iLVjDS_A!#(eor_sl=QS?2X5%Qmfrg z+M#1p8znsddb7{f!&GHrlDEKp&&|?jMv0{RNnx;wrR zfsnkXErLAQwEhNod;&@02$pgFNm8dRgPKds_RXFPrls&xo%+30O}szWRrW9MkZfwu zc=O_?YTLIcOWzs>h+akZU9|EN;+&Ex4GArFvExKzd06`kFmfQv>(*u60YTuF>k2d~RshXhQ;XVsY(HdEak^ zvLsJb-;*xymhhhwyeX$prmhce&e=?o4DjK`QcdDS;9$y=Wc)7S{K#W@P`E#loBO!) zsNpNq9ew>_{A(8f7Rne(z@~5SSE2M+b zk_*tu6IAf$VXTlQ&c>D6jyVFrIm$W05w8{>0X z{CP2$zo>5BzmUP74m^1@N~9QuFco+iseUZTWE`xj&uB5V&Fgi)2O4GyxEq0oY3?GS z2&!l;R>{D)qX4PU@Z8j*BSqqL_xOZ;CavdQfh_oVx|HcKUb2JBgAG)7+2-Xs-ZY9K z_|AzFRi$@Tm7m=B)J4NC6XGcaaGeE3pTPo?d39oc2=yI~@Eyv$fA-D$p0(AZ;&K&( z4{eX|r3KqmJvxjK=hOZ35upt#gi^+$)!J>ycAW)H0;JZvYUoZ=C__wZ7b9dyApWNE zOFP>ufLR~XkU=`9*#?81C=Tf+28d%mx2Ghau&>^Iq(rjMCd`sbCos*5zZSH zuah6(5<18ALB%`BP-n`KQDRJGBJX55Uge&M>|i9B?e_M zyTr-;_yM#6rWt5jPYOv1nFY<>*AeN(4ez~WGAr~vtq~NZ8tDGGZqE67P@lQPbod@v zj)6A#SmG;Snd;N%<)$t}%*&H7KQC7Q^PZ>mXGZH{7;#HitJS5b>rqShqsTM|CWvS) z;nDUkz=}OeKWO$)l3aI5GSEmoH6@Fm|Ac2KJ(fA`%~-8Zn~jnLSyZAOC5!S)aj{Zb zjaQ6EoHhdA0S?aRM9B(Cz^Kt>%M!n9h$9|fy9zAK@59$_REBQ1qKmCE<7qveEO^A8 zs{Sc)XcR1sl94)koNZnHrx-N-F@7Ff2c|&7UgSeVSShftLMh>qtfJAzU3Eu?KHGku zMZ(Qr$N)zRS?p`S^XWMhFa_2a((;jW6*)zNbjGV8RAzQ-K99so;b*T)*_b_;H=l88 zg0zg38m)fIkn(o0w!bwgg*n=Zh?O#B${Spz<*2DcoHc3#kw% zi@Ys-(8h(Za7G2#e=`mK&s>4MDe=U6gYBZ~c8xnnYZiD)+#{>$fjDVqvd?T$yI z+WJ|2rP)jW?p_IC!)vlMiP3F)X#22U0c0+YdbP&oFRH2|983bV=pG;4_$dhxOsb0z zPb{VaUfSCe08pi&Bq@g_XjjSX!8wLT!70c64vAo(pl4x*jORL%L2{wC39?-Z{__x6 zkmRaamm}w^ShInJXPEiVqa$xog^?Y@=?DgdBf0k*l*0dKG8fKFyp?EHlH zt_7tLfYswTBpvfVf^9#o1CTEm>~=dBgVwnMu4m>_&~q+ucFUwWvzM5Y?PP5j4)b$w z30qMyagKAdKauHz1x65L#GOqkrq385poyI{URUn}MR`2FdZSRzxR*H;gm2v5`-<)^ zYk_xyP$BxyLb(c7VYV93L>hP&T&Z^mgnRy)l9K}-eHD^mi_}6GudF1PN;e)x{b4&p zMRY9rBN{zV$04O!keqnS^;=#Zdj6c)@~}O;{G6b^;o4M70f4G|4Q)XB>As0VdTju& z@GI=_z`z3`U0O;cr2|lGV(wVLz@|7V)-Ag%2MBF9tnjGe%Ixf-Vvg75V7fUG2Lp&1yWG} z&$@=&MjdQdoCECMKo~sF^oXdTrfvVWgN#Acd zxor#}!kmt6=iyd9xcW$JS!Xn&=OkMvjr6klNVJTXx!mds+0#bBx>iV!UXg@|QkVM- znz-`xGF(pViD&J%p=*fcRXL*sVDmUD{7;PhB~zy<5Ed}?rD^o*-Ln-&3emSyP;YZ> zq{tz$uYAi-uO@rgV02H&wkzL=GlJ&4P?)jw21$PI z8_lDn_~IJp@o4kLi}@H8M`&PPCkSe=pOh@3(I!@FTtVU0Wh}Ei)Gbi-J$B*$K=BIo0j9(uL8Sm0zbO4Fq&rweWpU z;occb4da%s zv(lB4Y^R{U-V+jJ;gpsy_q)c7&X8xUdVED5^o-EG{_=)};XR>FK>a-q}JLv5Fyyl=muwefE zx|qN9jf<`HzL(!`MG0fWnMr)_0}sGdJX{c}1)SrTQqlFmoe+IK-bu;wAOrPpEz*WN zoMp}h9={x+KpA2)k7DD@5z6!tKHax-JFp{0Rx22!xst)CBJDjyh?Y~zV)RgM+scAb zkf!PS9nS~u;M!04;me5k1>*6%A_LFv6KHoC=%0~8ig0EZWWq`0L4_nQ;_P651GI-Z*+{Xd_zRPhZ|p4=??Rtrug z7DcO1dGYZ3OKeE1Pg!$H@B7@*A3ygW#*!otcpy9}DPUQOm;KY7r|_E``MwID^nw&- zpjI07Dim4eRuee<-=^ISYd%&C#Cp$YjJhk=7O#HlsY5DypXX(c1e`0$zQuR$XUv`! ztaJw8R!h`yqhPQ)5Sh-4>K!v^i_s*l(0}Lujjs9~Jo6Z#k}wbdA2+yL+BPC4N1wu@ z5Xl%0{OsWByX`T1k)P{~rv6Wf!z~(vo9WBZCPYL|uLM=>r(>f0QOy2#%)Rn<#`12s zWV7FH0e(B+EFZ{d0AaTNfoGFd8CKd1&;`da< zmbBY}SI_#{Q;l7p)2X&H$5!&pH zu2hi;xo^hzzV#M>y=R+W2m-V|vEw!OF|?ECc*TW)^oJLYDmoM$_=r!M1KSQC2O~r@ zS7P|ZtGfO@A^5bqeU{e3rmvacXi0M3B_ELj4tA0vY}lsbm$t8}C$_9e*$(-H&J>rZ z7h-&%h%MM&C+v<=?QgM4t^(yn=$BWR`KR;fqQ&_IqhL+WLBGo0=yeXO`MsVpYBj;_k`Eo5#Tjwl!T(s`vXy}IE;sapAi*8hSA^^1<#bn*UY2%# ze(ZfCX>UN$t}022p*8D0^cso~8C3V2Wp5}vmi!cNZDFnXSKSemou-A_)c-f8z!CN5zW&WphdJ~sI}ndF^WZ4E(CTu zjP*(O%JHeLe;9Yev_$c^wDH`uHX^*e!=h-Wt}q+lU7x#np(b|-T}Jbv-IIfMQju&}dD$j8ko_p{Az3#%-feIt(%0yi1^aH^QPiKPK8~xV=S@ zX?qR`HP>VI3o(VzsvlhSBfq9IZCNt#iFNn~HIs*J1zxDc=WK@J_0vCgduw=AUEYtA z78wbWxD#koNq)GmSI_iT3G7(2jhk*;6S~0lR1Y6Ax&gvlqsBGgaMPKP4?b(&(q;VD zH#6{mopj)v16B2vYd1Z(;~z1R9^^v+Sl({J{-kF1<6C-5NxD#shVma$Yk)h2HIjf+ z;6}}0c`{!&BOE#6RS3N)>up1N8vm@&A5b0n?XQ zLyy81Q6#GZNu?gU&GKYaKmSn1BW<^HP`yV8+z={vM$ez=kMvpaY}Bk^#DI5;c=Rrn zsO982lDdpg)KTwA^Y;ZdG%3J!f?{mnYJr?*t4HYw4a$^TlJq~NzY-h_idSv`Of zsaxF>TaHS>hVu9o0m;rSOk*(3u@q`cdd!pmya(Xqx)GL56Ofqsj1~_ColqeM`hSkue|*TkW%)mNEA~D39!1D`t`reaEmK7_}d( z16;KzD`!;UFUAux^r-NtOyr}=(>gYONb!M2QSljS@L9RO<6 z0t0-gm9s&C@hlV+j86r$@Q)#2H-rL7QD$j9@E8_X;%XBn|Lg)WlGUzp^{X7B+2JA4 z$)wJ=s+43*0!kSP)@`5xc$?)9L|`l6@k`D9c3VhXhHbL6QyQp`CM~iG_RQSlr-0fKmw8i0|rHK8X9!5qu!g3G6m|UcQho2v%f!cY-g@ooqEi&h zx~@9*lV1urwBFdSEmo;dBvrXXM`%{5kRVwAH%4wmmpY5?78ESbXXR%$`TpH9_oD&yCZa ziq>y!(`=Otw_t3U`M}b{`}4ujyi7w0uJgiti@M^~jURG9<~tL_3i%DUyb^9}chrem zjp*OG{|f)({5)*Nj1&MCr*=zzc;C%uF0E8KwnvVa7_1cu$vcCJKZ zAL|eGv48`ObX0=5pI77|BT&3QPq*aXQRLmdsDxh7I!%;82Y6)(FZNSlyc>{vhbO%n zz&Eg?`9Q`W;NS-hvfz;VWH64Snv+CFgO-FG%lK11jQ`oTCTaM`)2BTqh8lpbJ`ii1 zZZOWe4!a4J=K)S2Y4Bj_OB)@o8W1n~KrcE33btp#(=#78uZXr?bA~CQw(E;~c__Cq zMX?6s3}23B`zeTpAI+g_hxE`gT;PF6dfMTMox`gLPo9vN8X&Q>%`^vg0$k@VvM-c= z@3ODA@5$T#Qi^a+ejzxa0_L4=uxF?UCph>*7F~BM!T>Vf#W(J8LY4jL z+8WQ}{PBy^#C~*u;wR8rif5ujqkI}9E;54xAOBuG-WQy2>)!sgU9ax?PU$xqoVg`e z03bRkzkN^C^UMY%-<_AXHGreG;v%}X3Xn){1h;~BGdmK{Kw&Le&NQ!B za5XqZTMi)RuutiKmn!3&Q923~bH((`{=08eap4+diLcNy(YQzYC$Ts)uqoxy@9g9&5eH?>WP9tVP@lme@^ zXfX=vRQhzkj*r(012qKPCVsr>9t1Ub3~dAG<72caC>I_AVE)$-sL9B&{7^etO89(2 zx7hDuBuL{@9Dq`xa6@a}7Sv?xZuO{ptTR8H4bGLONR5hq44}7E5;-pR=6!U2Oy|*$ zf1E-r6NZW+^C!C76&lq4aWaxluQbg5M4zMcZ^y2u06mu|Pl6A{D?T~5r5Wgm)vDz2 zOz+br^W#q!^^@~c@ze5q4Bc;q{GJ6f9q2!GZU33<(_A$Fn(S+|Pp1=ShGy7ijaphu zJ0)lvN|Y=cgA9kVX)VA6PC19B1jnp?D%JgV)dYRjzHSAqqtnHeA0K?q-z!miweZj& zUP6#4^J=P75l3ylRi%h~H8mO#HPNX^e`-t(?J4cmW>vJZAwrhIShup#-I=>GJ+fx) zzdqBxy4~7-6F`2p(_VOO4$g;YN}0fm0@gelrzW0Fc(q7@l3)q&{fy5`$Jrdm73kh% z#lq0W`LuH#!aW@IXJ&Z-Jj=i1!O?xW0~n!R?XUiD#ZG%uF!$tDtfg$w^C>PCo#_eg z|Jk-le~Gk)`in$26a&A{iK&#ax#fb1s$+v_OWQ0vucyfBne8A~U6CSP_x&nQr5(bd zsmvWO+|y}Oq{f^1fws-3Cvak8x6m)U*9*Uxp6;1VH|vN1zNKe)(?2?v^Xv33oJh<0 z3{%T#(=j$3_Vh8ipI$f@dg%VF1im(gL!K2F-qvn0%ugfHKQ+)*zKL_P`&E^Sg=Z@r zc~ElStCS8+M=FY3ozveCSNkcz<)x!{z-)M8|C@5x)Wt%va^SW)mlez{b^#JFU#~ z+R2FmQ!bymf7npR=rw!mjRhucp31Y%*ZO5n)R$st>0eaa6X zLou%O1C3o%w>oU%;I^{AR*_TF?GvBVS2vbm+J6cNG{t9#y2|Efo(ev!->a*@X}Qpj z+h)SC6!tH*)6)K1sagTY(&YlhnDEiQU0rCk{h(*{JR<+iq zeAoP534rNYD4^Y=qXxh5HLK%AQ5N3Rp32#_ukQPLvr~cU1X}Tn2Q&!;P$n)SYrcgg zeN1})yD1HyLR5Sa;mIeXtZeZx9g=?(t(k1$eZ)LIMnF!0vH_KPd-1|1bFvCiK}Cc* zF^>DV)as!uxJiYhkV2fNuQ#xbd0*dmc%U;KOSm~>TxM6S=Y4&sh;Z)ulb>$hqMW=} zp2gLXsI7ZCWUpJzHs*d=EyBpS+MFR-+ub3_#s z0-SA)t;Myj?_FISS7bu|@spQ2^*aj#`)5^{Riu4eYwc$x&6@KRBB0u{eYHE!M9{{f zGvSoB#9j;Wwz=NvTe0kW%-K=g-CUBMBW*CNESiT{1>g9DJfgpDx8w)D-uR!-V`A%ZJ z!LpI*JnSM5-gzFNtGw+C4NKZw%fWlez-PlFbPTc{eDu^H6cWvRs-Dna+rdO zF?K--`D}E_bqRS~)intMCeS*h^n%bu^%GGL=xL<@N$JJs%_MK3Rd`C8y24f1QdV|gS~6km8(}e( zn}U3CBQRiP#GZ#2Bx7fkl?NP#r$K!ORmh`<|4#c}{+F~*)T@)vBMtcWBGQb2m+vPJ zb%vwhfUH&GWF(vmq;JVepfnXoE?Lj${yPH$9-O~1)WcL_B!`ac{GtS}RXP(X`>Q8q zf2#c&qqk(~deFEB;F4$vbp{C(x&kgkn&!bEfn>@0EW1rY7rHm*szozx&x7L6(r#&% zv&lL|CDKtkoe#gXsOGsXKOr8()Pq7im*mdqR-~GHl*zS4=<#Uukn&w9&)-MVk~dVa zn*L_a!ot!g1l`%PCC*kT{U-%S`#3k0luOyR%f;dgK|z^_P@|rF1gimz8Af(V8l=qi z$lVXCoNiA3H@x*hRT}3_y6Ke+`%t5PpX_Yw%G@DCxzgd=MC|rkSOviL0DKT9%vw2} zP~+tb;Da#L&zSGo1zrh>gGl?m;PbIBdwvJ^%JIIB+y9HCUK#h@j!f0Hex3Tr>ob=* zFZ1Xh5$Hl5^VcoE`Wq}%3`5Qjg01F6H&3M%>HkeCXS;!_>q03nF?Xvs; z?i&U`2ocBqcXEdpZ`}>jyxK7)Ge*Tq`~7akOa09^?u(oJnI3m83GSMa#)H;as7hT#rkOi^9qp}y!`5GVm`Ids38P@ngauHOw|dF|`ug>J2%A^K%Y7IO827fm*A>zEKN0LDsVI=}qEdpJjr!f4y4 z50_}pvy1J7pxTl~mH7WbjLfUCuEnlUG{N<_IZgpDjrbz<)KCs+g$U3J`^-@!tCT_} zBOlXM*M85MD%Tffy{?^H2lGQ&bn2=~;mIAo(f;4 zE+(UED$ZK_d1aUwpY|1^T~$IfS|lr8JbqE_2vD+1j}tn}PXF>R%;`VxRvscOer01{ zQd%D1oaX$K7`9PBf88R<9)X6_y{0CHqnLT?&Vzy5m%(Ybhx+C1Cx~)U*Y6f??uvl| z-cB~JD$M7D+8&huaC7vOSMlT;lsriDVh@bFgq)LMsc}z{QzTl@@G=9}Gw8sm{d`gf zez18SuFlI0rVk*=Qup(ZQE!pIs5f2Ahkq10|I_~WFW|du{39b(6bW?BQTH2gFc_`A z=`zx=iZ+Mth40 zv0Q|GSFgg%yRC@de0Iwv1!#+2b$IT>1s;AN_&A_$jU>%bVI2B&z z<8pYQcfJDy9o0Ep3Ufs8*t*|EhiE$< zGcvcRTHxs`(Q=k^g1r)1S4hSi5O;Kf@>_PthDOD}-1NfU_W zflID5QNnike9Gw|`rP zL-NH(E>OG+$jAD=Uv~YD1NI!Z>iB0T-{V@{&>#)wa_gP*mIPc`u};h?16Kjh}AFF@bt zK9^`a3@R>hmIrMBlj$v6H*iYD3eb<20Nl`IzB{#|a}rStViv#2&2cptbl;&I+B522 zzLb!*7(YV^+7yZ3!@gZD?Cr00Ho{nEb78~Cw#!|$Wkw`lkqns*xn?iMdsb4IC4d+Y z3`r4Fw`JGjX}_wqHqtNxM|l43mYJp%gdjZ;je@B}FepYyHp62ZBslS<)XZaa0C?B7 z?%)<830HK34TgO!t*3z?t_aBdhk%Q2v7_%K1*b&~7LiOUn^t#RdprdQC^vt`RAU3Y ztwzPtVO}tTP)=LKIl=rVIgg~A#*p7H+ket)w#Wd5%Fw`tkfF~@9Rs`>=>QrfRbYbw z@I$u~@#bf#%j~^mCI7nGwN{R;`E)NOdkBzBzT??6s3!aZ>eXpq(Xj7E$|l92bg#&2 zuLh5U2AA7MB9H%!mS`JeU3yXER%1HTqur{^nWv!%vu|*0O@h)2MIO zqGj#ioQxg!^8Btd*=S(Lv~Vkw$M~fTSm#au7xI-lNU*T75qQy%LbgZyppKjrKs7L+ zIO+=UczI}7fF?mpz868_8}BURwMab{*V-APv0AF8kUXUEST;liGf6gnAmOAnxmjbR zuu@8aD|s}ZVz)h5-|@@a`ff}XS&~T=O=Pw0t0`=`nbp;mPwmH*kU^h$Pv7_G(pRw2 z=y_l5OX$^b2S?5fXC*+AvhaHV9Im8SbQ>tLY+6aQO>-`V0Mp%zZNG&-IUmj@23@6P z*7YP6=_>$bSAnXt4V-5JKsPuC{&#_i^t@@Zf)9dJ8jfV}1+~tSTwl(Eg)m10f^xWQ zBJ9#8c+8IHP)YpE`Oq58#ZexllB&+-J{2xdJ){Oovwx;ZFm(w(8y9p{2pnNkyHe6= znz(?1oBw$)XUFkKgLi?`d*O>a=uV6>1=q}QpjNz4cxwk~@gw&FF-~8WllrYJq__p2 zAh-7dFGwCgEq|y5@Vn!~)6dyhe1@NA50T8PQAR~z7Ce|75I!P4V_+44-x+^; zq;cxba$O9`d>PO?E{)K3JDtNmn@=(4e zIeGSl7CE|}#=C9hW;9fn2PHFk$Qg@ed8tZ{cBJSP8GOv);60w`bmPZAJ| zf8)kHt}yF+wWq&n9DaqNW_pu-8%m(fX$SW8$qq&>QJXWRra&(8!n)&Qarjw*VQ^0E zQ&R24+O6TvTLsgd*^Fn@AT;o}Zpvxc`eP0xNs;eFV5Q&{NClL6(?rawD+wQo$A<`ju3}^wRQ%+dtuS6+ zMU#FXyWktpaM^z?CmPqV$uM%y^WeF=0daM$duuxXtm&n$^TM@)Icg=8|Ee~$X z#oDAa{nYq$Bw>+i=TeQSe-sw649csSZI+si!ax68u`o$#LQf$ep>6XCDw}2WE z2B#3Z@F88&fDGpf_&-VICH*4mV2hglxL^D5M$%tu6@J%6*D7X2oz-EB+8S%7m-=ER z?Bg+lMNIY*p(_QF1u9s<0-vTlhlj*?_ZI`dfJ#cUFN```Q^gl%3zd_bMysr>OT>7z z(WOa)dIG3FED(}nZRJ0TwF6xJf)O?S);Ini-yS4GfgQl)t+CCxY8!`vsZB4a zmBThbDY90ZUreEFxuQsXK;A~cGhht7za)5St4q~rid5$E*K1F}TxEjwG9G&l!gjjt z?a7XF){bE0_Bu&5{gCMx0DKa6IWz?EMbz~p6*;OF6nb{tomijktic6Xi%S4(Jdr=t z1Sqy2$Lv&Jm*&0isZigoxjkx+WARnWyx5+D@A!X|y>(bsd-v|G8wHe>4M?Mipnyns zBd92fK}drvK)M@L5Ku}~N=iXQ1ZfuCDc!Z`?q<>Oj){9;&vV}2Iq!AefA;0}TEbj2 zzH^N6z3=-|j$hs5gbVV%j_oCk;0v;N-8(tr+9t9H&QxP5N(_R`FtC9ZgCC^|PbJa# z!nZ-6Kaw=D4YTY@o}rv2WB`Qv?6Er)zU;au54mrS;3d1j#;S)iLwY;YmJt0}cl9rF zF10mpX-8?0pi+-YNm_uLO<7UMquUo&^IM!}{ozECX_E=62Admg-HkOZS}gv!RDijG z#@rCa47)C(ex+!=(79JscP`trWy!>}E-&w6kWe{REGs|MyiL$XlE=-bNhPK;F+@?c z@V@o4kb~9*#VJ$A+_g;!qWm9 zzf}SBq;Ix6A6S<6!o+L~f!x z^kgiaQy$G?ikRgq7aH;;G2h>-&y>o2RERFY%qArmu?bxqv_0CLDcbWk98wY*JEN-7 zA4M>m!z=1Lbz7YVp=H}ErGxRL42Q6+?y!;Om`Ikvk*dK=7rh0&?WS{wJ zw)ZN3l9O;=uH!M~8@3ak+Nj_Z>rRsa1k)=J#t-X_96RyQ-pp~?dLc-ThU!JR5esDs zKYmJ_&4mlUEPa05J^q~@Fp9C+A<}F6B5l!cu5wrR!ddg%s8`vAF}^l+S%XG#?r25d z*5Ij^hne7-$-X5CqC9ta8=Hak*->#K`jmY9jvjl2umM)oge$4N}pYdnJc zl7^LAEt25{Ya1*w>)BH0_hYZ)y@rf?;IUj#nS+e0=+qCdq3dos;%=5dMzU`HyAbyB zRdcQMq^k4I+^gljFN=Talal1+McA^Q@P0x!AQS)|H=~(iY|Mq~eyCu2nq?r}HqDe{ zOj80(zk`{XXN?+a0m|aN5hNJxqQ`3?{;eO(a9VXN9(*R!C*Q1BVL8pRXeBEf^JADG zJ$MLHF7Ac5TogX!3H2iGqVLC*9I{_Oh4)J#hj&JG#bBiAd6^Gq@bB33v`HhD@G7(r zU3^$gxoJEdL>++iIHoVorDN`bo_u3IzzVCXYPri_|0X^&*B z24Kf=&7XU$J=n?BZ59X$IWFm#?=1;HYM3(F=+4J)G)KO5I!;k)+`@s$77I zHTvV{5@xsuReI54;G@hO5$*{lUGZN1>U^pFG9?fswV^-QSnF5 zFD5WJbJJw&e(|)0dadY)$PfpQf6AK&7umTFWmB#c{zZMSS|`>M6^r+kAt5%t)C974 z7pj!e_%Wi6!|}`K$h9M1wMc2;Ub-J6!vBH@FG*ky#Q4jQ`}3LwVmzjsqcBj_mDRjt zKug3IbCbRwhT>x?@LGL(3qv>9{MCwe5$DeNI$u&EL<}#SR zrij~HFm5KeZ)dY#7*eY#$qJeDs}kzx(`7YOtV`w464^=n7zx+q7roJ*AIP1TB0V&A zsn`0saj+kuNWM-Lw;aTsYS`_qfr>bp2oxgrEIm6KE|f^0UE7@1zLIbtQ`j*6@-2m` z$!{N(vLMNXS)hY_ElhmDp-&Ibk#ObPRdHS)Aqi%2ac!q5S9u%e<&Ia>Vop|k@ZO{;9Gge=FJ>{W-u>hN{z0}ZY~!q^V9;=9Il*`F$vFVu)9XKmjk;#7caXp!Jag! zFjyU40*fg!M#J<+w&#v|Ps++9zfe3GicJ9`yfYDF!s&D<$gM{nTRu(G>~C(9_NE6b z&`HR}ds8dJuP?fP7O2YsKqG|Q`J!7_iYiyHqrvEPMM)R96Xx%A=*oU;K94KJZR`k} z;&wQ(&~6|gOHurW2N^LvX+m9Kg5~bNBahEM zV8ETRIgrqbhMc9RA)wD0pP}4`Gmal2S z7E-us5VfX!3KMAS_bN?;Yo`Wp^N)9^Nz0i(y4q1N>})BTZKkmApu%O8L?$=DS|orb zcp%*1FlddpFr1%iYSn(~ouuVINlmG?ItPC|g5e37=PA8a=o;--$@qD_PK)}eB?3Zc%q69-}`)_cm!hxf_{A;MY;93i{S4}>%(TzR6)&z^(F zUHzKTVkC{`HT+rIn)FX?iHcOJVXyVCSCXRlwiYb8ndV@1a&%C!UUgCsU>AQ_iiEI8 zK#I9UW8~y!#lcY;M8E&SY^(brsYI#+?!cj&r3AQPPb}xB*HeFG`nvc_Ur^s=`H}J? zo7953EBxQv#RrbNeA9o{^-aa16)K~{H~xVNI57qNyLL+(n`6L`zF=JchtG>^uAi4VyhK7&+8Fp z4-XjFEDM*UbvZ*`z@u_!mSU7&%8>b!g?aSMa?!vY1v+DrLG8L-F7j^1Ysl#BWJy8{ zhSzaLXBlX22Rt({p%)><|1K0|lDeaGj+i&hH{t8wBt$fHLluUpX~pYzXNpmh>L>zk zm#C1=$co!9Q0FdTJIWibTX-PDlQ*hilqeI@ahI_zpDZf1keKba=hDk=(F8R4#I&WM z4=vM&milC|*{m=#C5n4OrUh{nEX6WzGe;mEt~HBuk2$@L_O`SKmAG z$V6)E_)I>!Eqo>OXd}Sp;)bWKXM~d~d4E~GOJ9FaElhZdE8#n@SHCRbjqx47lKv(B zD{|6c^iyOr3-iZdKZIv2~bV3X~R^!fC)trj05+Cc$|)QAfLJEKso<`L72paf^E(*}+JRFCY>lckm)F`}%G^tQC0F;FECxEZ zI9sNP>L4m@>Rd|1<8?ioMU3N;jEjf>Ut3=z8{wHAs49wx34+kNtUofKn%}Ej$k6y{ z!BQ;XPp)mUde2O?v4)GvWHQyC@5_;T8-_n%yfyrUT&0S5zF|VN?W}=)+r&<)Pby8O zayTr-Apb0z?&f~z2=EUq;tG+pq$RTI2122;V%knb8l|y@4B+9NzZ`v5sQm|RKZyWg zbSF$YZR<*5J~S0gc@FV@i&bGR7iZzwh&4@56hUt<)JW<@8o~A$UkL-XCIGBW=g^A! z_CwzpdAxtW_MZ9t>Q|SEM`IXM(~-1sBC-Bkz4@!ry`TI;m=)v=BH3k|&KogKIZL2t$Jx=c- zGZMZI3_Lm368o$oHVH(j{e(~@O@ORncmVI4R|gmskGOaOZle-M5o^$ zE>1F&WW|l6J|79^C2=l|42ru_D-vznC=xL`)|a9iPbaDMf7f`Q}S^MVwoF0dY z*>x#1B~)Tt4kL7U1k>(=J4>dR<)URX`*g9oQnwxLeO8NF^UE+`gCmEaLX+Fjx)8;| zOtI6x+WH<|#c6q8Nr0m=A39Zft5y#pGeyojbshdfnf-*4_#hCYO-QFMpn&DSPm1a5L^9O?!CXMJZo+e2 z$&b6TSO^G}CF)WTrN=7K%2{Xz70 zG(?V^RrZxmu1H)JPf<;GND*et(% zD18brjN*i0PYtORf|kPnri#zFc6*G!)Qss2aMf%8V)u&^N$$&QxLZuZ!UZqa;VaKZ znh4TUy-7**Sr1uo%W>y`>aoCH)9TdxYZcs8k9va>f)$(t7@exWNR4NHP*(Bs|7Qkt z`CV3$>6E{czgVMaUj&+evFuh!Oj$eSR;JFhUG`pLYxa;se?KSl!N|A-wkF4QaehN7 zKy01j&qd?OOAP z1BOMR%(+p55M!i7kit=6tN3~=iVL3?uP^Gu`fTGk3_tX zQrrVu4Zm~+Vbu7eM$=i}=enW4_%l5Xs3b2}{7_EN--?LKsp#0dH(s6;c{?5g2a$RF zOU95XACf+!2cpI!C8>|SQidp>@6ghQUz?8oEzo@r4`=!Wp>3sK$`&Q{2`8xV4^lBe5G|3R1fDBVcs{E*>g@|7hZ^D{b zV4HpKilWb`lpo2W>XSA&Y@)HP3Q^v$Q%eIs?!l;GCvC4qV0HSo?DD(|!OC6Qc#O5~=jCHr_Yh~!Zvh5g~Mah$no*Io?|sMUGs4en`*1rMu4r&*cF2?=?p?{-8AT4Zg_xPOG{V}B@|D(bPG9SJ3>FNDf->U zm*J50t+xeWZ(+Zz-jv^aOur+P4G_NfNI7Y%rk6Ol7<24j%chofLEHM-W7O+?bXL~u zyvM}I4Z0`sSqC?oXDPgnm43f$oR8$4vdopg0A%>chmC#Bm171hbtkt*`*!7w!U!wF z8C5rT))>1Fw70uTTpd}1crt?cJWxVg%?O$qm&==T>AtYpV7~P|sYHv)X`>GPnVmkh z^$7Qf-&+Hfn5K3yo^j$s9Nk(D!@otkN;^uU z+OMI{WO6R*krxgdxYEU5YFN&$vrx+U<-V&fFozvLKmL|_ldPH`{UBzfB$?b^c=+r7 zib3$OZHzn5R$9cvD0Mc~RO6=fNVfl!5U-6J6eCMq#w)J*FbAykPsS3h&ShlRQS!PK zcS1m7?hS^RU?)&p%|I5q2^G6aQ($!MTOvNYzKK^(LLKT1XYd5#vd`-Utsj7CAK8*^ zSuo)uE~t6-q_}U|!E~JFE}yqH!$3nIpX<+UqRbFI{NS?YxfkvlUFbul{Dzj~t{M-I ztn}ukfxa5^XV}WyZ0M)P$_3u;tL5|--p>wpKVCxyrROVJ4*!XKG;IteJx}AUI)6c= z2@2uS1O4};^ya7uGDp~^v(V}JvIH%>d9P?~p281^l z17u(I-^Q0qFFPtEcM(RZl}uH=za$%<{Dw^p4WST`I@k}-3m)U%Tn(Q=(pnu2*%=!C zBIb*AY-k7Fwk5rYT?Pop^X=C5_a9T5Xj~?Std4eyH9xGr+wM8s>Li#sK9zb1*4s_x zZO^1J|AY}rG4*rlAFtlGX%*qEq^_1?d^@$6Y^zqvZ3Y%>{B#dXRKpIa+2+j%OAp~! zXca;DiIAj#&|_;&u^@l3b*#7O1~Z09Jv5!?V?c2$J+|+1e{xo0EHU9zT7jN$qSfi>D4vN5k&< zxMkY}_nQR?8+d!}>gP09zDc+*=0mQ!Tq4F?Iru7e{k_IufLf_hx=I*5cI{kQ>yq9A zV}2FoR&hn_JcUWzt(L0E_vwrUtu?J2ZoG#c;d!H(=SBZB6`y5e(#_)2zX%BI&$RbX za$@2?hZukr1+AeHLX3Cwu^v#RTl(opf?lAQ&xo=&eb9YQqv+{md}ZuZ`C9DJW zLyGb6%Io-GS}t4Nv%PZDCcvG)?@|-)w#?`=o#?(+rC;0)H-3os2}m$xa?Tdi z9G%m6Q{$f3rzZIa+&n6}t(p@tbEIM_F1&YtOy=+nj&ODhPveu!5q2+cH3`Na>96fc zvVGLVUzM%B_u27y&QI^|+2SOGyG8l>=Gt!bQ~Voc*X5*PjGf{;6U|f8WP$dpziTC3 zuUBwuyo2~#>K@Nb<5N;P#~iXqvX<@Arke0L-TI|cDBh4|YhxkE;W%9_cZx9T^_j6y z3d%lNF^2g|s9mO`gB8zXTO7X#Wu;>?7>wln@o;0(Ls?!`!MNrg%-OH1_#!SM`j6rU z;$6X83r4A@yt4NY#}sT26}Cp6_5QhIZsN^`5A&47XysA}@AK+(ZmII_g*efUU-hF4 z70?n{Je7F;s)A%kcpU(UbNA!1%~`JnXiKH%`M@h3>aEJigLP}}+YWs>`>u~NV8Xk% z-qJ&}=8{TjJJY@^gEjM7Jh_18XVOeQ+45$gG|8*z)Yp=z%YP`FP@f{g9i%F@rOoe| zsoTtX{F4w(@Pa7I66G!E^9UlrVFiGwUFE84%PX^U&nzDLJvVfH)B79@Y^S)2wY#i- z?rSVhH;)_Wl}q7Z`A`l@7YTdxD1~{wNZ(w%SpDT`*d#IWRehB1Ej7cTAfRYF{DMyRP!`JYt$f8`et`;L@ecb$m-6OpdeT(P{u zsx#fpV`AuXe9$H;+KZNN|0G0 zWj#x=Pu=|*I0!Gb8Ho!P^U=fGKw zQZN6f^_r=#AZ}|jY`^y0KdO})$fdI~_cc*YA-x=VGN#h{^R555prfDs3GX=38T~m> zK?=B-gl=_5m4&iPt!3*d%j=iqAoLev0s0;}#egNa3y}0zPtK6)|IIhV87V09+!_CI z3qK?kxfQNA`H1CzCm9pxl!1^X9C$=0k7ulm(HX)+Dr;6IB||xy)0tXTYJRc_d2cT$1Ts4KI4~% zf|7{1PLg@dSN%@|cVey;wpVzT?z$f#-d*!HWbOhYJM1eidul0CZl>%M{e)0+p>aZx zARXW4_!i=ve#~hVBiXz>?cLix)VUfNe1LsQ19h0KIaxZ)L z63i24KkCKiMp?`mG(Y?HVAO@{XZOgv?paGgG~tOd^4CymddxHDmodt@W&9QlnMsXX z3TYn~#B*&t*)d;q2E;cGutrap&F`x9iLdDKvYj)oo_> z%!l9wNp5EfA&fnynTj3gge3Rw;D-b-6%P*zi{mK_w|hd~DxW+=6&Ou?k6ZH2Dp}ge za$k(#nWG2_^#0heojK95I&kh_TJ!krRAt$NEi0MH!p^OMbG};b&3pGXY5p4mGDP2* zI?hR=RuJ~F^Trbs@&2FkGJTH8bk_%Y`hVs{nss0P>Q=v*=HUz~A_ z4_8p>>5h-s!x{NwkQRz=D-0V6{p{rDTjA*(*mS8pI@nkN*vTAz3-&=S)cm@XzeTKxH^fr}%9s0+vybEgq z$hQ%2%zys#FTV4C`P)7vXw?6gkHJ5vcK8Qa`0qcK3)%ru!Xhve(ZEv}hy_sHK9oW+ zhak1H6{tjzn`WR{%K~|b2qq=H^aVs(haZB86$`LIQOpMxMKjPQ+*n3LZi;2l-Weg* z*Tpwf+E!>J7afFysz#nL^@~}8PA?O&y7nVRVpmY1#TAqPy(tsbzws-_*zseuA_ zjF*2A?8qB2yX}nmwqW7t1`xicnMArLe2UWf5QamVgSy(iX<7>>c3w`M(m{mKnb=nB z0XEwfEPR>5);`3P8?L&{J_jz$-V;7#y>IJ+X?h7|HJjvrK{JUZmD?_C6vBqI z5y5omOzaFJ8*to^;Qs6y05WaC$gXz%F1US8oUFh%g;T7h zxjx^yUwi~M<*&|vOwE!YQ#1mIr7TCfW$~Ik2Lf@hLpQDO!9Nn!a63u+3hxCun6Spp zbY#%aaR?Kr3E<>`*Se*41WBao&u(O|V@;3dis*%1$CetcG7onS@nf8k-Qo{dInsfk znDk&}ZlXO@-nKd`oJ&zargchyJ=ZdKzaDsc*OE`jT(wf}nJ)5n%yeCpNtbF>v@ z5f`|{xu_B!6mRm0e3G#2*l$potLp5W-JCPID2tja;Z*MMkIuFllPLlv*&>v|XpBWs z^U#Ck38eTnQ`NE$HQlTN%|T3HTY>_KQNoxuf&xtew@~Ut{|O`&g87ALV+s+U(@C%v zc$hITEf&JCglpwW9muyfFk;t0r!EkdLr5GPZJDR1Q3CU$+!s%cQXsFs=~V~pxvLGr1Ju?(s|(wL z1N|1IyQp5__{W3Lcp4QQtT&p&zVwadrbu3cUFa~1G-+Qdt%EIm#cZlE+%ZPsgiA|? zOTw^dW|svQdC?5!xBm8@L>-su{P*jbgW)_(z!NB<(HO?5RZ_d34;Q_~0SQtj<`Tp9 z8DwrElj-dCWbpZwo)CBKjdj5$o0)$=;-)}n)~n?hqo_zd^T%oGTtlHM^dCN~-TWv> ziGvbnXkM94Bizhl14K5FnGY?EjX*RFnA>gt#lzOsT5>~n09E*i-^h8<|ChbIPJVW{ zq$qnWU^P{DqJNC~uh?sh;YLJGSSQ@fwR`Jk>jA^CMcN`?>g4mvXdv7@);`ub*1h4B zK*JeB95(hod;|_6(D1h*0j*)PsEy0^GQGbj691k66+9w>Pz_5CXT$WdEqrozg2;iP zqEn(PxD663?;uTANF$QRFuL>XYr{B4;!(LVE$=|bk1H7)E$i0K;irQx4_o&+&8#g? z(AO7{(w5P#+>Q`;bw~}cU+in|e%uukNW*V~*6nBHMFjxv7!*@xc8j3V8#g@yYf|kqcEU+~Db-h88nN!TW514DyIYY@C6DUP{&X}OXp{gfc zN3wT1@ld9SK?7D+{Uy_ZuM#CI224Cd@O-z(i$-8C2QW$w_k(ec%rvUQCxJ6C#^L4? z8>IJb_dyf_ZrwM^l?@ z$+%H(LUiw*l;}WGlFsRzC1Ym&iRc`i-|UF{-3{~kh~K$RNi^1}I!Vhf&=hNaD=0W{ zbpP8R?=ro>GRyX-gAbNwlik~f`4VwQFb=R6SjO4Lp>>{;ur8AiW>|tF**`a^6Q_z8 zI2}i~%WJbI_k#{B`tT28%d=-U8h9JB8+aQxUIxxo5d?!%Uc8mOEiCDt?nFU?R+DZ0 zhF@UuUg>Fb<2cn7uP-Gnwn2f#E+L`oFX14%@FPAkn#uYQT07r5_!jpn&}L>|e>2(5 zrQzB`3wP}`(f6u*`kQ5$LpF_^N|R^y65-_Yj&%YF`eXKx>q-gtm<{|nGavu8)JUoQ zLQe}K1nvz_Sn-!UgMqgZ1FhK_Bso-fv_^1O_0Kl8Q&q_y6I*}u=mljI`1^8JFifBK zHj%M|z+Tx2kh)!PyOQOQgRyu2%&aJc4zwmQ`dJ2hKy<_W;qA?@?k;Y0Qj(oBp?u$1 z(&-3joo~_M$vuu6-ghM}_{}VE^uck|l`-7(Oj_iG-h}olq2I|OEc?crmoSNd;>y{N zR2*DpW^ky1Pure6@53*B-Ezl8$K+ALq`k}aV#e@35*x8;{GG|ghI@#R1>GAEs%@WJ{ z=ix`7%nwXV8vlJmJaiWQJ;_?yg(imYuA9q9{#Sh>MV;hFiJD27z+cucS0*NKJ7)yaSTFV_k<@O@RbXCQ+uEcJR&Sk|fc%Gi32`Tln-)qf z6qJHRwo_Nr^3KBN_wy?@BWs$MH4EM@m{-96NLbtyulN{cJH=;lP~bMGyVo@CV&ih7 z-ehnGk`Ry&Z%uA$j>2b!g;g!aU0z#57K3ISLow$?LDrdu`JEuh@#FrGxsqYk#(Yn))9ODyjt_I!Pi}ScifkS20cNP!tEZ$g~(b}AK*I&ej zd&Y?Fp3-z-?B=Gmm)Dg8sVf7%Yd9XSaZE3kk9O`$>kCmGGI1ZdNaNt81ux)9oWY|4 z7O|Jt3H@^t?=?=xK{;2N*Fr_@mUEr671PI+Dl5)GUJytbPUQGwOH%=hL zIpHO|kt5`^Ubl)nFW}`fo~hm#-8U_5Y6~K!6_dfxIxQba{m3}2P{Bh^Cq_m0jZy{0 z#tKAR#&YR}#sEe+xq=Oxt=ERQMp<0EpGGoxCJt{_@J!%wti`Sx`n;0fVVWh_uW+p3 zksYZ@N@_j5W>~xJ>}6Oh+I5ZBgS6wm;LzXCP*xXZff8CRWyPdom0@62-U&mP=)faG z6~G^h_lHtT=3fU;=$lO4$O=!d9ofhVlfXc?9qH(kLKp+V!2g!Y@uX5Y*h{xn&|k!x zaP_b^X`dSs3{;tsqDZo(|3Cgxy&uhQ5kZudb#_Tl06*KepNUK1k zuz%@kct>5~wM^UZVEDZ6b6;l%HGrFT+?ie)Jd8L|!AbkxZF|B}<=58pl!5uvDP1=4 zPnAB7rfr4YQAt?(lvCvJS8t=e?O`|Oo>JpNI%UDKU5Ud{Sdq!=`!OFg6^j4C)Eyt} zr^jg6zNgnghsUU;Inww@w3A}Be)`&#VQ;$5`!Xv!=J_~^gP9asm)od4CASvhlHwG4 z*ZU7dvGaU{y_;E8X^Ss5Vs*$%nBm)h2uw`(FrF|f)+cH5;ODHWR%bV_eQc?KQ_G&Odxg4pU5o$vS6 zR;T4tx?J9Axp$t4DtjSod@>FgjZ9h0X*gUw7?54eq=*}c?kznZh{UHwA62qCxJ(XM z(jI)`bYm}x-QBe`vx@nO+7_q$=M>=HSD%iiUywZPkr-K(9e6XNbJ!MCP?*H&qNlYP zgq$UT0c@;+uTFn+5En+26K6~9cWpv~qpaT~4y4Ghm;A!ua1Ery9qTnp>|Eol^k+QQ z&(=PwRJx97+!NpX{Of?jJuN1KXI;ZF?jSzqNm^cLsb2A)QAqK6zop6s)^};1mbFhY z?tq4d^=Z{QiD#BkuZK~R%Dkml(hdL8b8>|zfqYUihBpJNAUJphyS+ZZck9@PXue~eOrHEJIa)0o+Qt*>=-V6U<^ZE z%ICnh!$g-pkbnX5^Xlu8)iKxx|Gd(1c~r;6ZkJWbX=$sC9ev=nS>RB)UT-<%JWinG z#Mx~Y{pb5KN%3@U{iLnMbZmPP0zdO~yZx93EsRM1byW^`SiT~^T&4XZndZlplGVL~ zz0Kq+1wGswBZ24AH%108Ci9h%{JFAuFTBXLSLh0^05Z~hhB;6gae0S`IDtiQnXr{|^0#ocI&kB%EhR&x8;CKJjfL zzvG{`6uckgO`@Aot=Va}ib%*xg30U>5FWyKyOu`G=-&u!OcAJL2SfIAUlG7reK%N`i(B>u<$W4AIq z1c~aybo@?MD(O09!@`BVsp{mplnmgJc;eI?Z~e~!pZ;$+qS>1#BHKV*`T|9M3+N;b zp`AlR;f(=>@85x!KneNbzLQ~jUHmQoRE1y$74Ykrc6@tF#30ru0ogdl0fTUYoJx1b z30!Bt`Ys-Bj|4xZ2QP6o(-teqe|z$7Qde*V>I4879{2=g~i(d<4=a$2Tv$?E z_~eJ-qHF^1uv_ne_|_9RxzKzMed!1OS3iOz8qMu|AUTvcb+Mo5dovgEARywB{+0a1 zxV1UQvRvOd_r~RA(b{ zqw3KMAdlH7eV6va?VoK=K;WJTWD)b0H8#JQ`7jTH`I4E7C5o_kQOC96DqnlH95&Zj z`(+=bXR%lZUX+30a!;z#-Oxan1NEX`ou^Wsl|}L!dlcC-?!Iq!J=|!UV0nD9N|;?u z0{P}^*k+oh>-I}UhIb(NYHbMPjACZZBfWflTW%~=T^^90c`!pkr6hXmIE|g%ZrM55 zXdNo@E`tm&`;`l*&g=s#gnA_QKy1z1HR$TS2vRVyf-|fSnsp;&?;Vj8izkUL<$R>K zYm@%j>dkfqU_G2qe}1B!+yIHD!mok}aF^x6qlamcJa*eoV^h!i=2~~N=>5w21<1jK z<4h5CgAqmh1L}+q46XApxU&FHmwDGSa09~L0{fW#(l&Z_i3nSM5WW;*G=ert(7%3E zE1d3TM)ir!q-Ysk6ywlxHsoQG!+^69X4?}_ZQnw6G15`6<{fK8vaa2`E z81+JQF~f`AdGVY>1NF?nC11w5*!Gm`y$*r#i7UrB-P|8pP7d~b#`9;|98qD9$M~~t z5r)_DP#ZWJV5qO$HVI!Qoc9Xp6B8<43|nl*_Bq7~h)~qFz_PC-BKy`(HrI=@rU(?L z06D>pc4N!da>SSm<{fyb9124c<$Fd&B4#OWQezGE0-HpQ#M2@Ej%W3kL9#XEIz69~ z?vOpR0qN6k6*5XHd#)3T1TxZLyH#}x#MRd}4ADmJt3reV%o0oAoIreW4C@RB#-z&t z*{cQ(XARg!vVY|2o+y~vOZga!%{AWi&PeWbFV6|~1WE4lEr^)?jM{w{)1S(#?bK>f zP!vw&>J%bg>K4=N=j}mmBwRMBy}xMsn5tx2Wzt^~r3a{tKl>+@{2#2w+tkj1)m zA2gq1bX$Fed2q>W1UbEn(Gv(&&{TH_5)PFWDoRj_>}m~V+vaF3E^$e4CP69 ztx~00Q#>zTEnKV`N$u^M>se2W#g0C<>`Q1ZG59!Lwuow9j}|a@4Hn<_sJ&3VR(Ib= zs`NygBljcW2MI#Ro9T<>Ht?~ZZL0vE_hM<%4ZZ%B(>S4>=&R~9oLl57rnfD1$^R}Vku@N@*AQNH?{Vz1kXMZa)`7wR>j z=H?*z_MF0qvz~N>3Nn0J7K?83kc+|%fx=C>0JzZOY!<~ZIZoKKye#S68~uUybm(c? zoG3Ux1!Pp8(6a=hRDEg2x8@b#pr75-SxjhXz+Yr0M!YDl#PB3a=0$ITw`4&AFFg1A zO6_`=-zHrGMCUt{pT^6VmiU&CcD~0Qgp=+(1M-tBDe66hH%I&UO?<&YZ`Ab&0*0 zC(>Y7q0H#syp!YOMc(WuUX=?^1@C8tOCF0dYY%e!t#qU|lUnGE7{kS3KpTA1+@d@5 z`*qMTxLs|Stal`FT*1|{?Z?)5+&kRY6B?rA#b|{cT_XVD7CadzZam{FncH-YVPoY$ zBRD*P<-!UjTE~OduX)#h$(K%Ffx-FPg02n_4lOpb@j|u$@MRmwEdQ;YOmejx#~k+I z?G&4R##zc`=mzk*B26IXE;=he&IgWQ6qY8KId+Q4}f1VfE&TakR zP^Oyp7%4X7&eZWHd`gb%a|}`vHawA5j)3M7({1iYZZ{ekTD9HvR64|4Ner8&RrCyU zwibUR1licr3(Qwtd;DF_zz0lN^IVU=2LO*rHNCAuo&i<($oKN>`Ag>!2Ak*Z6xFsz zh5BvLNJCZd@>{85AwgCD&-=Mq+jWWGD5->wM22&uJ@yTl5(`?_#xm=8(jBK;ViuzN z2xXcq%JmU;5BZsMWyf@0Z%MbUb+fy2j$`-mE8!hWrhm8U4+=_^uEWcTL; z)(5;>M5XPz)vcP4$x{!)G;nwiow`IfrC#x&N`cWmXJP*wSX3cud<~C#r;b25$AncW zdKT?2V^*V6)$^eE@j#xXC2TFE`G->w%IOQM4pTJoG_ivsgmMRmOaXcO=8*`w|D?6Z zuRuIA1J{t5_fia`G3KkcBV+3YBkqMySPIf$51ROE>@WFVbk*mEE`t$T6kMt?4{7LT z$+^D1mP&}pX*PHa1SIeSxyTjq4 zKlo)3r>v5t$&YcK(MoN|ZC(u0W=@@nP|(s0yX0``GXo<-ovF)0Nm{@r^>#k0_^K4V zJ5Pv#FB=*oYRRISIwR9n(_?8leQZA~8=XPHlU_RaJxcQc>=bqKqFDj>fvJe}hd(uv zQ!a_ow_>?yWf^<^oInmV9H7QnQi$s=Fn^i*#Oq2S8vo1D4DXGqu7fGe z(KV51J&W9L6bZRT&7TdKTq^b0g@{KbfPt9`}Iu=$RmGsNsr} z>P=A!Rz@$WgDhuAFR-z;7OCYm>9~Rcw<6#RM=ugSeBQ4+%$3vV9DdeF*GeMnx&o=` zKu)qP<@f=@0gmUk0d!im_ejLdYnWoNib;BdRK0M8>~Oo!fP{62xv;}%qt(DlgR=VJ z{4JD0=dpep8O}Su#+lJmuK-h3N{OTSw(%Wly-jgg$AV-8g3)i$B_5TNTwGjO zEn+UO?WKUG!l9K+hGTTUX#QtPbYO){#VbSnj6UZe5b+zL*rBd`CSiNDv(#|$wp?6L zDf4c1l3HI11mxToAERjB8iSh1Uop(S^>X6@gTo{@dcmTo-=b(mFLXfwZ8S1utabI5 z)i_3)HPm5EO}e#JyXF|%HE+4-{a7i_;DJaDgAQ6iAemVr+HFAH;)37yt?>jqav6!8 znpme@X3>daf~7uk|3U-f+7mJmSH1=qvUvGtv~{Hg7$af-B24Ms}W}fr$nz7 z>tg0}##Y^62M5KyH<3geeu#1l7&Tx zb?UBb@sX~r&S6(e(TTSj@4M=-$r9Uw5L4m^~k49g;{4I`mny zZgc27&P^kK95YHIPRVOvGD&L8H#BVSw?z8ZRU=&5Legi{`HzP2^N%)XF4()n=m?iC z$0snLC%JH3e=N*)xq^flww8W{DHTnAP(=S&0Pxc(=4wqk%s0ax^$6a#{TAPh&Iqkt z%=p{Rh2YY8gg6EJfAqa7hVS+IKG^wv`DE?QQ6IQ^p3qJ%-*DROVv~OL*iy{Uq*ujz z%w;E_F5e@?kaek-ygHqMA4Mp}A_ZBBnmXrac(#ZtJ=GWqE`926!{6l?yImsh?gUL& zVLl-ff_|hnFMmPUr7C8+W>3+|Rw;=?6mhNkr8=r)$|2eGlHBln#!B|1i*7f}N=_3# zdmeUqDL#JPnT|ubg_L|V8 znJ^Y-o@=}>o(Av`bcti8n5^Po}ISO1au^vj?>@Gdd0`7C7WcybgQ}+aiUKA}BM&%Zy$K7|G zzAyM#AOa87&GU6~TC{NB?XA`vawXv^AMq?9Jvr4Aj2nb|fDM`;9g3qS8&4cmD$icX zPSZQX0y)&ismcWqqE3e~2RTY~N+)FoCH(dRt-WuErBJ5zy$w$>{B*l@>lY9Y-p$GW zJ5`NF#d*l7^&0d8@TyzlEy{mRExH~tVUAT~toH6r2o`PUoUmTL72Y2asc1Huz%_7A z6`d!J742~tvZu;R#oI4J(ak7CWvxEV>#fvsm&bHJo$ou1$M~X`-*N`kbalb(&oRF} z9~1`-c#(1=#8sfW3R$|;g+9RaU@2OnOUrK6`m!_JgX?i?HwS1~b5;)U-t;aH6^X8S zQ=f`_%P4e7Z6JgtL#;3{@}5viWWiN$HabLMKP}=V7IBLamv{LvfT8}^YQy=PRGL<= zqTY@fHRFzH)w%|qekW8=QWdpvsd@@TY;VEdS1H2Vau)-ZGSV-$p#g;@-W?!Q$->{r0L0Vs-!(&F#;>wXhiQ(`zY$fHr?7T{$HP zJD%iOFu_zlJdj*Q57q?q$Z5QSl~$qje4@`Hz3#1MegCzZB`f@3s&;JAYl*(?dsT$V zFQk(YD{YnNg~Z7lr#4qEfXV?_zd$ntT%8ZhQ#Y)|=5L_>c88dV<)UaQtve#9`K)m- z()iJS%CVCe+?8{DN(*^6tdIONap1%1SbCjbHYDt>wsVbxf4 zq*Nx7u<#}Hv?PaZYtv=#-Y@mrcE6=xlYnSXS;C{)X{iys3$KsfDUA#m&S9NEDDh6V!|`-D5}{kKey zMQ$$rgf;XW4WWlYE`Dr(w2Zj?9yAYEUR+ay_gB#ByH>)Gt->fv$JVY?_$Z@tj&^TkcV-ljmu<4^0H`Ozu(A zn;m-*10nqeisXieJ3$Egztb5A(jnTy5t4jRa~XtqfsAJ!$UTvJgtajAeg^lIU%ivn z%{S-Ln=}ol*%e>kJkOKVrfc+xPHD69q9Fi);FhTh66pyNPyQiZ_}qTd@u<7D$+K4} zMXX%uT6arm__IKb0mZn(r68{i5rbV3+U@G~+svl1n4O~!9aGGKKT3XSh-R(LO;m3)&JtgCuT%VjLM-%tfQL7w_ePO>V8x$V3 z>@$m8^i~>&GPYJtDCB)b~-3Y3M=?G`v-ZNs3uXODZFPgFySF;)|571Jhc6 zFn=sGl?A7nfRpu_%Bu>saKfiNGe$w?CGqHh=WUmx-yWgMty6k!B=&bej5HL$izVA0 zeThoe4XX1xH96;vD9H)2bWo)R!0iN@T6^QMM(!!vHVDKP8Q5vIQ&oJWZJ=EwMxPhba~efVV|X+qd2VPVvS1C zZ%*#9ZhC*a5TBMT5>NX;x*&U(Z<#r*$Jp#-e{I`FuQRPN94BTHIdDxb3R6hp`}U`N zdR(@c8HNN+dS$WLNQG;!HTS<*B5Rej^f0!T7`V4e8ldD6>BD*44{%Q`k>wWM+g@2|8QYd`gcl<@G+RF61_!kqG#X4yOUjgTZHj~7NcM1EAD}|i}M8;a`mni1DMsW z$Xk`mis<4^rWgI7uf9qN?u;G$InT4I1qM-P=9y#tcM^SI;o-^tEJ5kzyVG=qtc-`j zx@>=0bla130JXKy>*!@X;U04qu%`&AESJ`32e-CKv#>h=hIPB0*Sk%g<1x6s?>6q2 zyj~oZIRue4djWBoZ&u&=xppKQO+9lW!gc$+$mu$GCux9sMr$^Rn({yKQoVdv0Nz+j zO018X9yP;)f2chWjS@~>BGWc`nAvr?<*N6S^_MtbiJ=Zw+CU^Cyt zPiKwuaHz)WK~w?s<_rhfx2=|>pCfi9>>UHXKr$D^BW9h*4%2(birs8;=MKAOTWSZj4c z0or*aab>W;w(GJt8{3CFPD?{Yd9JFH4qqOsAdeml6{+c}S6c59ArmL^yND_=?;U;0 zi1|u@>YiqK`IRt?5h+>97FqC-K9sAb>Zn&5KtO(ro;%33W`H?+MLlA5p7u!q4;Y)Hm4}nd>-kVMS<%e&uJZMnBvsdv zqQ~jR2&it@tsy?s3)4*wjR>hND80D|b z?$FPRUgGnJdrEh!>l#Nz{qRJWrBST&ciY>Y)MX$5P2g_szW-@1ql7kj|26Yh(8S8l zxar?{xbl1t+R$Ime}7oM=$B52FN0sP*6bnA!QWuUgnN-2|E=MAZc_&7n>u*JA*AYV zFYXk*%>Jj6-)i#|NtaflRJEO1Xn1i{Z1hm<<|@NDNJN81S_qnFg#bnEVnsY`*LlCC zW&dsBIuh?S%-d8ydNFzdBth1d)QaB?44lOzN12o0THS+7E^0SB8cT%oUrSxp?zD{p-BYFVLP4>KgLd4E$c@{CWX62`}!ZxX7{^oegR{V&S zyWForp_qNgpJZk>A=ts%gm(sr&Ey178#sUcb%CT+8N6^>?o|$kU9_pAFPR#KQ?=jE zL>555rWtpQcZ}j|7eP9QZE)b}7Yrj@3CFcZQ=@p-tZ(2pv|A*^BJ`1s&B!LslQ}<2 z#@^7UTj>MJb+NjxEBlfQk|t;kA`+c3UenS%RNj!K=&jhI@8-)n%a66K$1!(Q(GIQC ziKe1p&4`+dLt*E!eu z_jq0CILQpN);(+1to#0zYfO0AEVsEIjgEas9`V@wXC<~D2CpIWTWUta~; zq}iSOO>d!M@m6pNnk=e#=I z_ku^!ub6)B+?}h6ES|R4Xry@fxx(2YE7c3SfYA8TFX6zOaG+S#RNJT|FrjaA73{@z z!2P4>Y895NL%DJoAezrNRK7jFb?)kyn(6f;eMEq|?Cy4C@fmjN#m)Wnd>ho8SoBx( z6eLwc=5fUF;*)`d$y-Vr3v5Q!Ws6hXR^P$;Jk&d(Ogt^jZ&MdM51L$(4%y58 zll8iFV$V{8M1Sp{^2KZJ0n=+)i&)O>2<}l|7xjlb_+w&8IVJ5fIwUMnMUVH9q|y zRbu%3{LAY=VY4|oF&`+V*ohw50mrZHE@nlY{x$1uk#2d9+_ZUjYSW(%dZ`BTfBIvq z{&EAkEyLO4yrA?~f1w^T-1aWXn|43eF@(>%7!dRM(LUhFW`8w#1tK^aiCjuZPhS2< zv}Y()H`Q6>+}A@{X#MK_>wz~-N&w6D?#QOQ@zFkKovfw>EM`xRa~4Mh1SlNKDm`>Y z0PBeO$sAi(-Nz@-H{|pH*x)kmTqeGbHKXsZVtJT;EE%wVligSv>Xl0iSOc?Tk3Xt` zHRlWh)@6#)ANm_7kDcbq?_t|lISsmNkqsosSN8r?NIvN27_#dCcz~C{{bbX9l^%k? zu|T->T`aOE;pXhCT&M3BmX_o3YutC0aAK)kA6hHl8U^jRQh_-`VCWBvO>~O32^}dL z`k7<0x$E5(gdG}c5-*rGFmdW!owr>#J8Ur9oADuoDW??5O9~yR+e@{3uY|k$D^nv9H}3A2`{W+z zln$N3#(0th@4VkiIUqVx79*GiEB<54`iD9P)I}ftWwJ9j;B8EL(_iMLmUy^J%A0!7 zm^mfpGpw_$pyp`)Fpp09x;8w~^|gEGmN_E8iw|Hy{w=X*RijoI$NU$3_i#4kL!2nMWf z=Pp$VbKS<_R)I11r=vbKh`urUdh|A~KY(zb72euVm>50_I-Ei}{1mLc-$2%w^)t@4 zB{~i<*8aE=cqf~|E!O5u*lO=dOy|eu_v6ECaw{z>WrON46J%l%<~1xDWk>Ag{4f7i zzNuIxl|WU%*5_dO%ag+C-dceYXmW(ZBY4vjgmSXw)+l3bW#=Le?Gyc;yyYA!m3KL3 zxBYSzB}nm-lOZk2zKypGee^m2z;>SfDaaqeb{<|fI}1=a|Fol`LG(EEig8rI7yf94 zsw2TNbXi#x98E5qbZa>A;G2T8iHQp!fyc<#(xlA|Bxl4M*lelxe#^d zqr;D`;8{B8U!wOV{Q0f==z~hd+JmfOW(oh@#AlVu*F;BNe5~GR!>9i7Fy*+6f6t7E zRac7iSnSA2Isbhs&x4{cAwRXba#zb~)^1_5qjrOW*WYah@;*Q>JX&iY1FGjd-_tok z$O?@kr^f359P9|B1?ayd3+Lr?LJxk&<c2gZB#hu>j(NW;vc0<~UP z4blJDlX60Kn_i8kT&tRPIQJd^m_?x|i!Q)J@ejZ(_}r9NsOe*u_*e(p>sWh;h|`0i z*3A?>kNo6ks1LDDQILVI?`A{RIhO#aqVBiRiM(>(FR>a}GRB1TJBmCzEK%Y75U?hQ zB17femvvSW`2U3=WHZa|HS^y-i?G`dE&aW{{ zFdEyQTA$2nOl3y05vhUBZ3IPEsv;n0+fHOV2-u{aO%So4FFIBRnrDJn64~7 z2YS>%;z5T3_(b5-&?7(N|Hat*2g&Owe00zKv;D7=1hTyVtXBRXQ2gYkG0=m2phi9d z5XN5|i7EhEm9VY@0B$79GX)Uzd`dv*o%%mTVgyi6U0$s{ECFm1TL9TUvLm1&{`V1^ zOne2v;Bjjm0vrvw5sBKp7e`gLlG1U%p8zG##}2RuL)~5h)`-c=i7qu^L*s~5MWCQ{ zSkg-7ajCKd21?O;o(?iV^qX=_nDsg`{!}#sybBPVGL=;|`&%P&U1Z?@6`G( z!2E8R-mUuhkF}@414A5Sx$qZ>iWttfp+2JoMS)#TpzTK&Ehn33U zNAbY_P>j^}qJW5rkO}6UF{BC3AmtJ8^91-|1pE}i!gZ1>z3X7*J9w}UV6KRPtN>U$ z^0f0{{x#d5I%a_7r>Nc8pJc!(G?rNP)LEo=ppJG8qM8As+{c9{@=2y+b=ZaRxodTPVJ>I}~3b5Wp~8MVe`-3?I3lodO7UZ&p#G z9n|)JycO-w#SR2Ab2tJ%fzm!JCI(@T^XaLkW3S%b+3SkunVGa;#6=(~1h-uvbLZsYcWWf42L=d|5_*8xuKUKNoMxD@**#JY z-H>YF2k?Q0epiAXvn|*wIl=s{SOQ#)UyHxeLICbb=Mms+1vKk(D?dGM{PHxrM*tH@uMz+>^h16aSg$zV zUI7X@$q_7FN5+yz4vz(qTRBEWFWmkV(b7}^K!=1KabL3~h~kLB{fx3QV)2#ob!n6gCq^)+2N)zBr*mXQ98UZj;KL}G*`3p8z#zwp} z-~4Zb*VR!V_4(ws_%=%!c2w_wc|H!E`Y+GNz5Y6v)m{Q%-a7zT7F^1WEE1RCr{!;jaO*bHFdr2DdeNIHG{LZWK*iTzRc9d8X{7^J}Y{4;*s8 z_w&@9lkB=Pb4(?^-r`QU1(z$wsQRJT-@YJ;)m2JZN6w;w(RO_KGh21mS$+Zf{k=`R zKkA&y!A2HTF_<=dc!Zm>jQ}?26Nd*}%}9%y3(fqOt2IS3V%%%D`qW?}YC+4~?;L&E#h+dVn|2GUX0XK3M@jsI({(jEw4s3G^d{h= z5sW2t_ZMb-bhBXM`Vt3j=SQNV1R@iLTGYc#)zZgS`L}JBEYVvhbB;z_vXlRR_2Xn(WpBJtA_4p2-~e!?Sz_hgX<`=@dWVODH#8~vI%QK=Xi8J z3{8!8*{WYUOVgD_h;O}GV({PX#X*8mM4kzFQK^mpBQEMuOYVj%QaNkZf?DouyP!IKpQ6bd#MH$g)TyT2!~;HuG#sV zp9yd7JR~t}W#A6?NT9}ft=)^JL*Fh0=gAxNLS{)OCpQ@OmAUpC_pBV{7}$fIhg*v9 zv3VN`757*=_!{E1h7%>71Fuy`SRlFW8f1mtAy!@YKulKXdEPp180~N^1PTr=xEya6 zcy~44f4&`gJ)D+Zy>EU%#kSD2WY@U|1CG9fJf~ui{M(2XIFrg`y-r18ilISL7J5B# z0yuibQ=+aMMl;H)Gm75S;vy%g>34yXc$n!x==bMx2)+h1Ii6!A0%&iN5o0+)>+%^- znvi1@$z8c5%is?L1a*#>)HH-GYQy{QlUwp7{AoX3R?f&)-h`4MliG*B@#xLFOW4}I zS)yE`2bd1nmjYCNC>PA%eOXb~{DdvIexSjYL4`}8am|d>vS5pr ze(0FQ)X2=+iCIsel234`!N2Wsxxh!LWGeL~TKaFtD_|%OyY!<35DBS)HtjR}hL{o`ba%@&DvM)KBRX*}J8!BjuDs(j+W8`Tt zRNEDjz@E_j7Qn-(v~<=I?CsV{oNT=I;m$@6!^M#6v!rg892apbCgNn4nw#U7o_5Fu zwJQy+DrTQ=XZ#kF@Omm^IsaC7J~%QkwTLCK_z}(P_=7}y1f{-y=NjM%YMi3Poe zd8^$|kgU~MeN0t&)W(Y?E|unwcZI_5_7f$o@OxnI6CMv8!pqYb73N}k7DF666z8s5 zc4*Aq@aP-h^58-)I{*(bJDsIml)2S&`koTZ-fODpIlWJd%dcSWIM3(%!Q;>ACQ|Hk z4vycq3LXXia@m!DWPV?D&~^avz&)3X26~6>Agu;&^$)qeREw``IDpI*b5V-ad$0`7 zb|0~2=&=MaIy^}L8aK0+_RpqB22zt`^P z_kQzJrU62Y!cMZFk(xuA{!&8VUzq%K@$3Z#*UCYvf}x z$T8<|8anNuis7uWoxz|w=cl<9Qtvkggou0iR$c8FILF!AP8ed=cL)Z}-ds+&FK9mJ z5Uj>sXGhsl;^JKNo1e_dDt<1RR8(w>1||=19{1MC=aQiPd9{h9?}IHjYw_IT<;+BO z%jc4Q$Ef!kgP;(H@3u#i-Ba;YRy^>}pY3+ZMwJV!?BL-w^&f%^Yf8Xv0s6ca-)wcx z5*$P6ZU{Kf%A@&B4Z+_=y9BWWzl)BnXjS8y{VL9njKL9+$pbn?WJG zznfUKzrGKV<$uYPCcbYN40?F87A@5zNc@YF)kyPa0MA-zBgT=5s2mys4kQG!t6f>E z*kEfTUS@p}NWreY7T45*D3Q{AcJ?2AvLNPV--| z`#ceA4yCqMa6v7=vHD(jSL86+HV%8?lSMrJ1lAg9k2MIzd!u~XNq?c z9R@n2FnA-$;;;%SOm*I$KGT|SbU5n9KmGUbzf3ehW?KKfcUE(E?fJjI@t*?>dO`;N z?}LGl%0EJ?;{JPYd+f&4_x~LC?_Zgh{y*XS&&l0|P;jw-f8#%UMHl}6OZ|V-t5w*& zky^!^h7g<3y>;jzY*KQqNup-GtM-6gNhJY0EY>Ct_w$mbpoi=JBLQpsSLQ*Ud*jZEQ7iVae|)nreQy=`3q=V^LwTB4%Q5ZtNb{x)wd`-b!m~4f{3YkC9(4J3#-4de|NM6gSg}mCEws8R?m*&q^UDd>zOEHzx#HNG&toT0 zzkP~TxQxuP zPd~AF1y5I)``=!7n$jPwR;sK@-exLs#ybr@+Ph|V3)$8_&|_Y~s6A29u)mwTb-i>h zg#`EgU6r9w@YL+M>}BZ1DQma4*idn?oh6%0rp2d&#m6nIUQ4QV@#j_7RChLA?jzge zirWr4!Yc&5DpEFrR7R@7EX@KVJ_|Ncppqy-M*5GI4>|)67mYQ25$Xt3MD=Yvhp9>C z&cU&Ib9H2wlj+u5G;d(e-e0@giwbW^8XJahTxnOoo=v{w6SjJiW%Cw;oJWFbyF>jz zXteiC==DM7rt;3RbVut#1%6yruJq|dU1PSzlqKm0gJNrLN;9nBQSt-fAQ%cV zdv-=`2OZcU6Xpqt*8k8#svFPn?srQ!j|Z7C%6}j{+Mn|9(iGN+VProD2$7uToa*=` z(s_NnITrkzRYKEOW9O5_vaGS&_K9VxcTJ~7qF%n0GiJ=6ZgeJL>HBA1SY<`X>yLy;GzmLSX5-o4j`atssNX*J!35(eTc< z0nRHI-R9K%eIFXN%fzCBN8vfr7r!y=U(U2D5c-*r zn;Bp+d(pfC?5vlY7LR{9&#mUzWb*E{|p_|8|t>3M?DGJI-tMUJZX|dK5g2UvOuw7&YTe=88^(;LvY%E&$}@0nAcHo- zEPWRj`g+>Ovh&2EwL<2JkJ^bhS=e1F zBicN4&N!*|jRSW@^nSbd4#wf8`Ba~9H-WCWE@Z|s%^Gjyzo>5N$L0gZ?LL^Evg?)4 z9*I%4e%@{t1!!AUvd$`aUE_~jL&bXc5Z2GQY;(62Seo=>aLw3p*s1pxX z^Eb`dDCRNZGtnT?BHs~h)^7e?h`-dbnFWz9(&LAPXxo1{7%qz;2`@)Dy7{8>10a`0 zBQp`N^lRFeZRdo;`>d-c)kg>nWl~V)0obT zQBj0L3?nCIgle2({(;|k#%3+y&NLXwO1&wDo8os2A6Xn-6@E9uF<|~UYg!I3J6@Wu zHE%ba8S>-}rcKMbh0gBA*Y>T1#QIv%d@HOWV?Cib3^%ZV`#tMgVs%A$3Z*-7XRf$& zU5|CD*Z-GPhS$~{v?5iafrPvmyTgiP@6_>k9sAga(u{skol&KatS-<`2D({kI4w-j ze%!$X+U7Yx^6qq6TI5x($X>cQq<)B%D|zUpDN*_}uXMv|;!Sn`_QjrCwFg1rNLN#h z-Cp1($QWg2m8^}SI+pcDwJw+n@aAu=Rh;y_6fD^Jo{1&EIAq~c%s+5k} z3vw?elpMnrrfI@^6Sqvb+oE1L6+@mo^SA6NTK=6%m5Yg5lbmnd zXLDYVuxEVhy=Y!C7@y<0p=Q855ky-5X|*c4fV|=KmcQcaxWGt3?0E0)aFp)MCP>|_ zyc4{UuT94E1XhZR%%q;%T-7va-$cH^?>Sh1o6s!xL5k&fCH?8;h<|#y)_7R7omyz{ zc3F3#S}0wJaFKLkDXW-dG=(-=hCV&G|Fk{1d&s2S!+j%l?W=a81PfJGB`k}xrhLkE z){o-5IbW(KZ(f6&`nsNJTI}&I@u8o#n>fdK$oRxJWJ(HQM!67kX=NGYBc_uNGRq&AQ)Tar8o`lL>{EQaR}NC!jP}e zHwWNr=yE+2+T?z;&pAHl*oAG)0N}P6GFx>t7kq1*m%ZRa4!34?5Kx0g9_0EaPh>JJ$PRb4$e(p^;#E~D$`^e3vS)*ILn%NPD zI{TkxANs9vQvAOJsAM%{bp{NOc}@jgi)^u}&|z08P~rujh#~37%}mZVf~|bLE=fP>n!EEpaE(lWsekZ#Ph{-4euKuzOyb zkBt0Q+*j!pn(}a8zNuV$9|wI!EzV}Nu6=Ee_B$l)l?5@e8h2`VZu&OtwN?j|s#Wwl zw`=b+t4IZl%YiPfpvZnyqpA51`+i}MRYmY`s_(Z$Xdmwj{Z)dkw`&Nk^sN2}GOLJ? z(~Bd&xz1nO14`8$CR<`dqn97kllmBN<=gXT!tc&Hg+Z-hl)f~A=GjkB@jzw?m5=h0 zE}5R=EUPZlQl!Xt10j(QcIm{wd#>h~_rOhdo!6v&51HDZBt*l}hY|>3k=Yarr%Aoc zedvSH;yLhn>H48}3TM4lsmnV#a~aoH=yUakG6LQ@NbsNdX| z>BI$f6`m@8UKrwRlO6uD_ zn*Wnu@$LAS8xJ~|Cr0O+ar6(z=oi|wJOyDm_*Zw|6>~<*VoTO<`o2uOp1{&AKE6_Y zp!yO=uMmlN-;SF$DLmZXP#s-muz~MMsXb~Qdk`3sJ7?WxI-mnb+=I|tC~Ilce!E~L zMsc)}Vd27;7=*1uMB$B#y7K+o*T3yeQjnL;q#cGHP3f7WSepGEF;z6Z z{bN(<0w($(NyL;fP~M|Toc%7%Z&Cf}q5Wa%I>D&wwnQW8(XK`O?t~YiOmcdCl+Kep z|6X!>ma5oB(MGaZgNow7E|APRvAOr@F^K8b;rOnn{J1T5(Uo&M+!8-eB_`C=Yg9@y z+nBgFzb&pdu5gEbGI0;?lzw6tHw6dWCL#5tj{ZRszu;5hOOC{o|y z=N$(jFVR^Rc>EAB_8#f>@P&guxu-=X#*2~8rC^H1)ueTzzov{{9D8cv&^YP!O)O9c zb<8YsTY^UtqjHm#%GsdSlzKbpn~7nN!no>ykv3MSVcpp@@LEdTYDSfG{PuGOb^cL#xNI>*;h#OeHp7-p;%ap#^FW?lg-_l^eDLeG-hw$mXh4iHj~ZunKsW0?T-^! z(0?sYT#yjmK7R(c8(o{ic}?4Tl$%1?@^#JC-k(84h(xU7)yZYf8^0cAQqT{PEbv3E ziy<-Yu$NAE2(*sj=rO7|FyvPOVMpUa@;<->YN9+!nfp8%se!#K3F4?z==H)JK*|*R-%SP=z2|C zxhS}{+3+JVngVIo8`1Qg0(UF{BeBTGusOA@o_6~5s2nU(!%eo}Iv@4P6QjN4X_ssH z-!0cFemhH_*kJ7fQ`~PF%$$DGkP+_E^)&#P@%)&y-1LdWsbTfZoy;3Qlf~b+3hpFP z)P8u~p8Y;ae$w!qbQKWL-Teu&+EG3^3dzrVG-ri+npfA108cwWHnkAP_*YAgQ9ry( z3B&r@D-b4UVY~3dJ1i;u2`ql&ryXbdV6RaIa)U$Hmr3sg7e*tqT-v>T6L0Wzm32$Z ztTRxhpm~>~X3yYzNjy^>VO1w7TOU-lJwT~=B>z3zKAcCdSKQ9%RAo;^3O|Hzm)%?@ z?dO52ir%7 z5rXdp#>Bh1Hf2rRqP^CK*5HFD>E|JS!5NAvto3T`ht_f_&Nd`M=v>%HZ*GAw>jzdv zk8@$$8@iFu2E%OaHK{zcUET>+xs)4jV}%j1C|?DNDVI;OdRQ+jcPVWRHw0&yxO|4!8D4*x7Sb#5Kks>V_WJ z7_n5xGESc8<*WN~<~m?q6cKtS7S0ohVc5(YPSVC!>hTZXjquuyI;q%gC-iOCemiEo zY1`8Hy>@$D)2J7@p3_0arqrS zOwui;@GY|+#3h_R)`4pmsS-8`FI-*1`&V^S)OR+pb@OG1oBGSMQ za@KPVjJU_={p(jto6JNW$*fkdriWS{GZ2hFZtZuK0oFKdm0X^FzGrwn|MzhjuQjjA zOTu7&(;vS|us@jQ+-#oou9CDO~LnaN92Cl_u?Z0cwf)?7j?6t4aX0E??R z$`<~%#jCBqe{X{)eVy5J<=FyumglG$i1(}69*{8<%!(D__h zmw?B-4MVSf&pq_}xvk@yS#mABu|H$`a=4-u`QXyTSHcx~8bFU6eI)^(xC()Y>Z4r4 z;EC}8dRJCCzYA~LI>Y6gt%Arfs9xrKtmJaQ(UaCCzobmhohPP2-qo2ME8w1s@8239 zMz%}#n|%^fAWseQx4UbgY$P#T@7Ib4wf1!Tz5Yr{J2-~oW+=rYZ=5SseCzEiJ8g4y zk;7AuYd@|cJC7NLUEaJK-`qOz4&x-^^RI}yi?1wE>jYXP`K&)YGfVQ9AHAsT7u5D^ zH!Tfnk#dbpQt0AP9QwRekX8GTB|#eDzh=a90tPD&M98BuC0Tq)R zD@#NG-Qwh7hl%`%zxdnTOTjF+=ubsv?-x8z8zi;$uMu@7w%zMVWJdZVHX+5}L2^tK zedEV^P8lOlv|XY`UlxBHJZUl2vDuM&?QL<20V)2OUx?>?irpt~Sdf3;&rhbNa735S*S3UxFd*P2Q;|78E|QXHCk2SS@D^y$T2cu@9xE?F)d# zd?gRL-9}TK!#27KEO4i?S$AsMgXF!o$%x`yY2ONFSR%Y!E~K7r;liuw`{vKHa%N|) zA^B{`wTT!vSzUeKH+w&{s1N>vb4POsA6)1$qT8H%Fw&jSxb~yik}a>OG!a4tI||e_ z_Nk(Ls;hF%TDXm3>xCLKYy`~2=iTkz)fT5XJGVOo`mU0dqy;Eaytms*g(|*Mk@9Rp z>D!)pjrvJz@X}KWoFg?MSAdFJ`cVjjX2+G_e0GoE+NBN13H@i>LS3XutmmLgfifo^7=padNwGJfVO(6;?tJ=H{ zZbAYBa?)r4r}yy4e~(1Fu6$#g8wqi%+}^6Zi_doY2la5>Y1t>C3A?WkA9w7Z@S5hy zk|r5moe|o&J-c}CW~0IsXPd?tu?~zQzs7&z^3kIkMI?W z*AYIj_4jJf=T=NBWzuT?UNwIZSar-hYRq02tqD8bWs zVP*5>`u7X9W%F~HRjNmCu}tI_4NhD*%yh}c1Qt9q_*=HFnW&Es}SpU`ags-ke3pGchb?`n9Batf8T&?d5 zi%dK@e40lsgZ!;R{DhU|Fwyt@B$C^J?5apXZmnXEqABRmH&D`FNtf(5*)82rDc7x+ zVA5FKTnd9PNH?P{d1dlFqtBZ^=KTmaS#dKV6_aZGf0ae~Yr?=EX9LJxb`k;dty!z; zaZ?MIpquyi9~xab`>+PBs9QX3oiTy(u6ch8uPd0T%3Dh_m0w$P?^A8J zAkJ*UYePqQb?mFp4Yc&8VWn+{d9TI53^kn{v$POBBIBEsgH)vF^RTR0gb4opTy_7k zhKf(>#`%8gji)R5y58SzccUH)DPut&k4R6}+*cfDHTt+vH?wIv+$EVq^{w9Ev8ET` zdbvby(;LZOprZ!@VIvQ%GstH0H3;y5->h1ZqS4knUh^SWLap6EY0oWQH@<32+v;Zy z%6NY^%fTzLn=Bfco7dbf&h5ghI}pu*-f&ss1H&`x;wUc>Jahj|Ec~N zMZ+ZFK#_>)eQMktydCT$&8#rO&0PDrgx>xS^P%TW*NhCBUr7@M?cE zQz3@N($hkwpf&gw&iX4+ihJjT$X@W*n>C#(x!8~#=?Kv=(^bjB0c^&oVEOo{zFc`G zd;b-hP$9cnFkX*&D<)vX^RqJe@GprH_t*)ixi-V>-rH-f#(;*=O`{tzsSJP!wHdz-o;ru`bM3BM*biAAzKoO1nn zl_Ykv8-*Ey0$?06`{C5zgx-7n#`eiMBSM=U-rB_Lp;{E08X{9FbU-;J9BYO=4a42( zms{0yKX~W3Dh8wrt`1!EPTCA5xQ)C^A1jwyxj8>Ido{-`S>qlHJHf5lJu&>cvG-+# zSvRIx4Yh~zW&04T%If4|(7i*Tk=X`Q##XBpupQY{{G?+<=eA#F$y0P!??q^qf*g9F zz27i~dcrsUH;_6I;#(E7o=wt^OgbOXAU8w&%B)hlw<$XH?S1t15osqjCJ$1MmX++V z45G`Blk?*b@{`)fwq~-fZ-}V*{kbnXt*;sdsgzhkZ?FNjk0W|z67?pur8`d%!PlsB{w!mUh&jcGz%8Oz}Izv9k%zYj=l)mqZ%TdBIE+g797 zW2atdj!9SeBJE1MmB#9mGIV1#RE%L#)b;uV~L$9z|%A#xG1UIOK9tB}fk)Z#HoGr-uc?-QING{pWRw2{v zLdr^i;o{jJ$`dAxt$nt`iJZ$r&Ih9DU2SerCZ4{so zW(AbHX8)ACE-B8L3M}loehmD{t|yI)sh!gB z{6nYXn{NVF%jLAIQtZ<-(7P9I>PwR`PxfN5P$wE4Df|lAZ>2c=9Lo2uSw(hOt*zFb*HLSJnN6tjgl=)4Z z!G{p)kezOi2D2U$q$IBI^rgYgbt+=rNq-Wq#*AGYS%JNccioR^XSJLbpqzbkk*{@h zA@k;W-G0_SU*#$RnXAKBd1vQOt0ta`XK4|S_JENHwoenShnG18#?8|+Fig=m#TcJv zsBoQyX^q=k6$#BhgqS{1UPKoYwq}+LW2sjxlZ*7PJB#XDzzFHEsxC=RbC%7&&z(#z zDN9dQTdR=;s*5W6aAU&9-CWsEEMUIEfooTGurk~ziQ<}<9pQC ztn?BU&r^myK8$bkZX{&QR_@)JwjXOL#0cmv%qvU`@Ln2zXHv1H>6^VH&sNT__qrOt z)pud&O%?A$cM$$QsyR~!zNhv5?l@y#6kIN><=2gn!iynYzL2(!`8kS6xaD-kx}2qU zRk)a&d8rH|`=3dYSmsrqmpi~|@b^%H(pJ?+^pzzw|I8e5(4TK-}{VmLdx_)zn*cD1VfNNC| zwzjTBgQ$Kx;hme{k8L;@LI%mTFFCY+aR3WR>d^-3PiuG9wM1O_5SXx5PbL$mW5{)> zjQWJR%!i!U)mT4#u%VxxVoI2(r`;O$dj6fez5pf<=a=(pnD-f8-nn7pz)*JsmUw0{ zBPmyh-G1J@IqK(?8rZ^zXNX^R-}ddS7qb9JkPGXMeJ$9_e-=UwPseP_Zl5)->0VFI z5zkRP-+dXGYJNg_x9rkz(M>+u*4ER$5cEuGRKY2dX5%CGD_?!G5?maO96 z+MKBLXR_qSNd@*C+=myO!nc)&U#;D{fblS9u_tgdB&+DNTBNiDx%mL2X8Rdk-=<=x z>Mbw+PmPIXUzyjcnIu*iX`l4betd>Y$gbyYR!CoTxm*O#PQv9UC`e{$oZ!B`rtg&= z8LPDR%ChP$v__(zYSKnaB+0F^c2tT)U#Ji8urrMk1HAJjowdRAM5cDSXXfL?7LBWQOVy0AGqOLE>Oka z<2;oZQ(ZjYu+v>Tx#{{^C`sq%A{$JW@=DYyaEB{nrI?)$AN$Na_9=iOsROm*WOo>7 zr>EC0iBir!TdGZS4J;4nDRiqAS3s4_7pP-2AIp39$iJF`rUtQ{y^F*oigU zQ6AQ~GW&8ZCHpwj^*Lk8Qe=w8)8|yfbb(y9oYt zIi4u7K?WGZBg`IuL^82>pbmNjAHT6KFYF3K9kpGo_!ndo|a znS{S&cG=xC>1AS~}2m}Lt#wSbvnW#2B1S#Z0sXhahOo;FH|ovu;Y>DBHxe(7gv zHjTIkN0onlBazwPZ>$ujO$ZMXYTDrMM%Y(7(FYZnS&?@V<5KSJ={0oX176Rs`i8*P`L90UGn NJkoub_rT)q{{a~(z~KM@ literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/async_ddp_param_gather_b.png b/model/train/yoco_moe/sources/images/async_ddp_param_gather_b.png new file mode 100644 index 0000000000000000000000000000000000000000..767256256f46de0302ad7fb21cc6c9c9db96bbf1 GIT binary patch literal 134747 zcmeFYcT|&E_cpBKjOZvJDuPsjQ7O_TAyhFGN2Lj&2gFEG2rY!r83pM@6r^{g1PHw( zAShr!s?<<|NC|-eK}vv7zo5?VnfbnNz3chs{o`HhJ!^@JaPCvCeeLV)v-h1aJsmCP z-?@H2cI+7QpLf(B96QDYId<%q#8W33pNL)0N@o0b9PvQw_AyKk&jRC@U+vVi)s7u2 zk2$kv{Tt)=(=K;R5yy_5cR%`f{EIEW=dojlZ~s(RGxW4v+V?7}8iiz>@egp{^_X;; zd`WbzpJ-&8a9d>cZ6UC{fNt-2w{o6KEPQ|K_#5qKQCW!=SIV+Df5o3{{rXFwwt%^W zCi|7lTi>Juk3CB)l*u{9{_)DqD{9iGZeD3u(M_+^DRK5&{aY*KTgQxpQy*3t*d|K4YQ$8aQiD1iMp+WRLQuNFVW>9f!1)0!4B^%B0U z7pPZ?ZScq5EMPm-=b}eTo0th$&$-SPvL|*_Pb6U?qnvg3z)0jH=gY_?SBdy!_ zlCX|0(@2}^jdLn9CrAH_m>b8%%c0L)I~1}k{!g38Q?*N4t`=ipkwGo#0JCwER}Z6m$HoGT1kLK!*sy2LLpc*2b=aR4r$y3Txf46A(9_ zOzrWYtyrV(l(vGohUYwXU6LJ_P|Gje4sKzeemVYRR^gj}aStd%qV(Yni#q*7Gfb8| z`(92LHij%^ME2wP#lVQqwEijGL6t0D1<^qWD>rNGS?Jzmpjq{#eko>+a~csBQA54b zO-){YFG-GX`_uT12&uxz4SQ~?t-z*}*6zRi*B)o@yn>Qo(hkNAN;*iSBsf_xck9y3 zQj-l9X)m5HaXcxO(bCuxKGX38>5lB4qVqh_?=7zaysfdJ_L^xn+de&MNg|BXyv`SVPF2sB5Rcc?vlizhvN9BS4BS<7MpQ4P~r*v76Ajw6@l zfILk1tM`k7#}Zo#cKB-r4ocnQ8>!|j&{WpYQj(zqWa0GyynY_KS4AYRXxh+CC4Y(i#ghhUh3`2A$m#kn&; zOC>Xc>fW?(NjvV=ioeOR&~1a~&aY7^DV-X0hj+KZhZ73JT2RxxLFmk44;1WYII*SX z6akN(0mCL1f3M}Lv6NKf0B~b6Q&cNBd(&L=>aST((9@M%xE)@XOB12sx~#05B8z}5 z7TRGA*i{>{WtK6f!&Zu88gv%!J?9#Nzas@eNf|i=yq?RO-8knpNU!Y-aWTfG8y#lV zByzLNx9v@Crzs4-^Z%QZ-ZHyEwR|$gx0Q)^;hIet9cmWsR8;0;F+X0NU9ixO@DgX_1Umy2qu-wde0PFfp|9~<&8p_wi9FBy^t zyHc?^VuT}%X6kCJyUFK<2e5EyAd#p50bu`M;7BpRxmfre9NIZ01J*;MPx*Z4Z}gFQ z7WMkgp3?Lr<(pXxtH}Jc zJ1yb}q@p;12&Kgd6S@1{Vw`6fz2jyE>udC?dxsG~#YwXNb>8L+d9qbmcYr1|t`78*5^%|+#dmbNrY*lFG(t%qZep@Xn*Nwq|GQg&~4whiJ^mPm4d;uH4zZnE%IAhnKwb^%^{xZ+^ zKF1jXO~bV$NLVm2y>YJqRuIW_pms_x!r|?fEEL6-@WluG=omAt{5;0@Ekc)#=RNGzIGwb`d< ze*Z-dyi!##U`*92!UBwJAXFLu1Mi9&{rHwp-w`QIx+Tny=D9HO z%v5FR_q=A;ZNvSdXC*m&4_GhXUz~cRa@H!|`zwT%_*~(n{@Wer9PQ^^we=@6`Rud3 zKPNDN#ckjk>RVdI-i)#`4_WKfVJvugp3h0q^ezhfuz;1HQ_uNR<)jUpu!Hrf!p4Lc z=(sNLLAEwU`uf>He>5z+vvg5t?l~9cO;bJoDK-!9qh+U56dVNaf{|n#th29)4EiIGC@iW_yuU~ z)Z_sN$mwR5JGi1JC`et!-w<=+S7H?SLdHDU-Mq6Dq65oUdGvaCf@PS-Ep#3DyLs*8 zEJ`j=It)kgG_jw7AmR1q#mKh0@qu9lxaDR#VywLCvUiGKm43co{ zt7Qwz1oGFaKUvHNmixICZejLAWQLpzSP;!f5~U#IGZ-G*k}1zqg5;8UqoyohO0uNw zbU(?;T0Bnq;G)ugvcLBr4jKCN{U1(OcXP5Di}wu&l4TR#8}2{nIXUDu&f^7M|900={z$I-EO#a~2T@*5~(1pgu%LR;wTQvPd*G>6ymDC(K z2*j1c7e3g+-hNO&4AEz`LsP8vQSVLT7n;f7o`*Vh-rETu1uh{AL*lA4$f7YJg*V+s zFnm0BJ>0Fuh61?Z=Lf%G`7RS3Mx^sFCCOh#1m+FD{6JEh#6Bp6VD~c~&*##XDXOty zUzHtd;(9G(u&mz+@F+8H8Fo|K)PnM>i5qJd)nh2g69x;vg3h8zXe75N&_#Us1d64n z-k~Z_(HL}?jMw9>)$T#CBloMA^TsMRO?kN?)x1TA*9h#IZ<+O1vgQ*IBE^c7b7zaYmQ0M361FLd2{p*lb6>8e$ zS<8Mon5Q5_&|ped&Pk=JXb}7gvNpK+O+rA9gmkbw67z$ZEzD8f5X!Fx1S9Rz=<=@r;P@LfqZ_gTMe>Wv7LUDwVBz%@ zgw?pDr!j>UHcv+|y_Kq5cT3+C<;m5i|3uL2Eic;o$^2Llj}tp-k6g7YaU}!oo4^^I zEzzZC?)du?vPS{h&!|n4ec`w9MocCZIZb`%eck+Br5idO+(An+XuRKDS!09RvNw9? zbIu00$@>T`X|zCi`d$;ZZ`LyGQLr4hjxNKXmd{V`6vF->*WR_P)@Ce5yGV4XsYXNU z`m%tAmEl%aLuO4U&hO?WryN5%P3oPBZ6M5LFV_W6*mTMD=w1;lWm`+9WUC#A-oGv+2)K%k^2Y{f{h%jq(N;9=N{Cvu zg@ND2!M)ETFi5R5Y890Kfm|fG)v3q591fXtsq0w$ox*A}J;o4Nj5xBf_1l{^{L8g! z#eCn()8w*NR$;46@Su1tijO1fKDk$ksK2tNAy+!zqJy2pRm9L7nD2Qfjp_LEWpAny zRJ_Osd=6U@2j^NltsSuKvSW64;#2u3&F`~H@`o~dd0|sHSJ^F{p?|rx(ubLISNTOr zElgFv^Qh0(fkRZ{sqY`3?eDfG&mlzJnX>6GWfK+7E`Plh9fr;P}5YN7j2U7YWjh<`%4(vVl*Lxc=) zevg5q25Z|o0HtCDGcUZ#dYGUM3VC6*qH@eP@gv)%s9_ISo%yaTqSUScuU>Upt|ZIf z23}&l$Z16~E1nTq@j0=h8uE#b^lRT-y38NDUJ&vJV*RxIsp(^`^S72hCpqXXP%C&8 zE1J>E2k!?=mhxfIA$mr~9SLB~<>`Y2F# z-YAK;xL<;oar`ctEbY|GW)fsd>ob;jU^+@aB`$XLLW8=pugMGVm$Dx7uM3&d44Q70 zGJz25$62&zUcMw`De|$Vd4K*ymOnT59<8`^n=Tc$wPA{SKgjF$!ZlyfcQ&(1_TVL8 zAG;u6#A_bvE^_9jzj84%1RWs3y+U~MI#W?9x@yg2amOyQ(7~agNt&zmxGmqVXN^QsRx=V~FO+mKJq;5vJ%luG7KPkB{^$uySYDo$oN8Y}3Q0h0c`5A#{I_gJ{X}0NL z%VF8G*OzSfYQa^;~iJc%snLWX5R{QkCv86MK-xWmkQF7TThB zZ7rfo*w{EjyVUCM(6mdmJh6E`Ozy zHb2Nq{rKVPY@lp&e?S%wlk0r^0`>8&D!T;D9uzMQ+^7RhV;sxo@Lvq}&kstc?khY! zKz5-Py4m(k7FZq^;XJ4qFpG#oT`^C-qKQVG!6C(JVW+@A+=qc>5<=OiuO}XhEZ|C( zCGf_?2@OC4CghakbzZh)jxkm9RvdmN%H~-NYm+e)Rl_#Fm%BFHGuxj>hLP;jmwP6z0hqVX(6sdgA6Jz2SjE>>4o=o2K$t;FqX+B!5r1lFNr^D8d0;42Ak%sb))Y?FN8`f4!v2d z=%7i(*@<(992g)eXJtZ-8KYs2LXz?LRS-1OLZl~w?Nw6lS81v47 zzr;;khXg{w6Q5q+TeP&{MyTtsYDdw)X1e*CPEZACihe#ec&OtVq4Z}`)k62A?l*b+ zug8ufnfT*DA=X*tV9T||>HC)-W#OTuW4V0A-{>YE9`;Z8eXzdhKk?xS^#V(G9*Z4l ziA$#8_%b10Ac%io_6=TCN(C3FLXkEk-=$4@q_{-6+q8<$;}1Cvm=d7gV5XOGFiNt3 zQ}>0w4A(N~esDjJC=;5XX#HotL({o^S@((l04P>J&KWHZeC8Gv#Xn~4XjyDCBRhk; zC%g5Lb1_nDt^0oW)enXWYAk_I`6Fri9vMZLVn%8O%Kyn}bOYDneo|}4n`KsiS2e;R zoR-DKzsTSbC-iwjrs7&7A z$e_p;la`8IL+l(Q3Wkn~>c0V-=+*;rEWitq)uV$%<%_6^MdP2zXUz4$>z+JU+(XV} zMN3%ZQy)HRzx9`JCy#v4r`2E<*WggFmZk2qf?gbQ`m6Y~UCA$g(3zFBhoa=l2!Enx z%3xit7yDqN>02H->b1eDydCkF#m~eF=LclC-g<_4aCsm}yL8>+k~?H(_4EAB??8~! z@H2YS}mc@PKew}MzERy zU=epk;{)YP-o=tmy(MBXdTkWn3bpWIh9ZUle~Y-&6xh#3C!ozdX3suVX5ij zHC6kh1Ivd3vvvLp!Nz~5`(ynWd0h3`_kD~~LH(RbB%0Wb>vaqJdn3Od z<*-?JzCVi@E9gke-`hVCLn|S@XIAC=?Z)5L1vkk5rt-v12Gsv-k(o!E`A2*FD`Rc{ zYC`{aDeC`gRASTCafEkHtFICo9*Z!D5DNqUqn5P96Ilp!BAQT4JgJC``Wup^gl8#0 zXNmNk!F-8ydY7GL9m67b7>0QK3n13<+Nhx=6J|W&x1Wp}Wo@%Jj9TL}NCG}Xlogbs zE=Gig62mniv21I$hq%4ll9Y(Q?KjsvCq~8|w$FxK0kDu35aSqrd-;F*kNsIKyPldcg^;iLTsNch-&gVbb z(ta*gj;qc9R`&~kNpq9`d2;p*8@3LYEHx?lj@bYymtmUTTSJhdM;>}WAu?} z6{{O%SQ7eN$aYEvS|4d_)3cy6KRhS`fFc>3J~ zm0r_RWG>+hui5qA+g+vFH7y!L*17z1`_f|7d0J7(>7lxYUe`s`9{QovXgdn+7(`xr zUKzS%bXs+KST+u%K7p%zEnwpTvF$$%>}VnT>KKkoHG2s9pIfl4{cFQbbv4~ZIN`OM z@u@)mnFPVb&Q-NMLn4C9nxL{01tyQDouHXrb&__Q^R3y zwiU-;x#^`_umWBuSp-BzuJVzpykfZF+zWHwb7i1zomOVSiKR;q&vtV|n}eH~4|~4y z9ymsy1|JU9*>{;p{17U)8;+d?9jN`C6LXHlX4Tid$F>ik%ocQF?hdJr#x(v{KBr}W+n?k`pS@xaO`-MV1;yA=B2&H~m<~0yaYhUm<*-^gp-Xu*> zV!@#HEUDe@A+l?(Zcrp4Wvgzg*DIF6q^3U6CGpvRcvb@y7pVec&BMb^s5>i)Z(wxnaF;InCV?-SKnYV>QeJY#{Y zva$SR7T7YV^)u-7{UKC>4PVh2I&-{3F=K{&A_{DG_kGei>);*MA=eXh{PP|whP$3) zxNB{Bly-n)9Io43s;xl2YJs?U(kD{*UafWH9xloraN7f~8@f$*8nQSn-{c-wYCBmY zW}K^Bb({}vP&F|UobDgF;gHtiH_s7L^&zq8heotZmzP|B#vFSnA^f_RJ-BhV4fV~J z`X03(3Z!i*v0?nTVk!f()+6C@CA-H#vsJQFMj00Y30a8qoJA{;_#|JHwPBQhl7d0R zHqH0?+H%LQ*muGBUr;jA5IquZV=J&v1Bf3WUr9(AY?WWx_LS z*~lI!R&Y71h8Jy+3#!hF*&Zt6rF+;s--}TH;r6jGJ9AA)kD@$q$_Frsona!HCV}ku z4ajV>ApGZ1wFDNd_B{|c{QFuobZtX?2Gi%f z8FGKrcofH_&};9Sxb*3bQEcK=&Em-%vTqZ$vA%Teo$vh8!}?=y-{P%NHu0*p)!=QvASP2Fa5o@hMJ#x^Q9Q2z(k!*QTML8CS+ z`9jn9SCS{l%2q1!m=ofn#K-BZ|mBo$fBK-kxqpA$V`*pRu%ofxiK z`{i%ez6YfLdg&x2vVazFEu>Mam+n8|hw5Erq3aAm!aVE_lgh;aoU32PGDe-0MA&EPWjz^x*O!M=~sLQ`xDfE`hOR>yippx zH>s&uS~P<=Tr0=|57!pEoF@95jA`U8>J20PjPA zt;0-=tabKvoY8jxe^2ZVy}0Nf3>p@J9`F37~8hRvDAE z@@d?EPqNkPHnO+Xi%E6ElEm`oNUo!+gnln~Ug%zMk5FDHBHCN$7X#V^Bq`^<0o~)^ z2pI(%p#u|h%`bBMg}&B0I}DVK!+x%=?9;V9QNRdH-4w^na7QU_uSH2sEv;q89eR)E#B-o~^|!M7*Q?Vvo@s zMZQ0YX&|Lj5Pv7mX_?$jrF34-0`%&$1oo__^W>Z zHuHol*AtZMOhb$>y=n3+DYT*9Evg#!Z#?0g5a4Lg42Nq7=h)X5ie@&I3Yxiuk4a;p z_~2Na$Ex)0SDLmAU?-1tgK)!~i4dwh@7U~v;m~`0NJM17EVT2tx}2;U#=9{; z;Ejb@bCbnZXIxw9$0vto>QQ^!SpS&x^?ijNUEIpbsIb<->^)0*Lzd#=vptx@v}VJ@ zfr^hw8af%@>e`Ytlu@DIz+^T9^>kY5uX&U+^}>jrgq!oM{a5$5Bl*1yE7Z~}&P#5Q zO?F{-qGYrYW{+X-H8noVydJU8(<7T{M5u{ zA+Q%R=CAYaXABkn0-jp>Ftvj$9J(^&tIEH_r%c|Jnu-;f&w`cKEAP?j%0 z*)e53WGI(vMs-lOBkjK6UI>F#oQl_Wph7~vjAOoU^$|pMTgl; zNa(<2Lu)jzK)>k^@^^n@(Zq<4%b=SX;`LoYd=eVxHL)RV*UGEVd0B|yHE|@O(aa?rZrtO<4W>n>X5dw4sz=gFYLxwi zqtnbi4IRYLhFeB47_*Lh@`KQ!siO7r-&0hvMU1eU7@Xq&gCK0N-QRqVfSj#5I`F3Q z7-!%2_E=Wwdw-DlO>_smZh_-x|IxF(-*4PHXQI$<+)g_`ji3%_SwDJ1BzEBTe?2^i zmvMbP@LZjjJfwcS&UhG$u{Vq#cNqv!_@DW(TuN3TUQ61zv&rUl@1B6&7awa6M~V{c zj-nd3cR$vA=n&;CX$w&>Xk(T%GMWL3Bp zX3dx}KZFriGXwnHxc;%??P&4*4+>;4Qx)m^3}9~_E%Jk{uFeVI{*Ofp8iTi#%?kt8 zR{V+iFBsQe9uY1G34mkHc#f5U=u`VNb4a{?o{pdFd@WXkP-4|w=y;7X72&mNlJ4Sz83;XlxRrFSg6 zXYKbp8Y1-n*S1Hj`rk;g|I>zdnZZYq@-mFy#yz+z!uTQjhv52D@~W?f$lF1cKLmGM z#B`jV{W+rD^yANW3q{$ISP7A2F-GrA0k%E9KpzK7MiFsgd=jMGm1gS*S%1nxt;E@R@cHx|w{Ih`IC zV@F-OJsY#*9z0EOi@8=2&@erFiMS!g=n*0TJc!C*tfN{S?(@!T-oSfHKzR$?vrevF z=kJF)`|z#I+0hQNkONkbgk@pD%D?v^LE_VXeCYY{lX%^NG2?MosM8*|Dr2)x&Bx}v z5tEfY132=8&!NsDr?S=B3$klEefV9#JW>)@?n%i|=2)qh8t?yaw^4)rqO`MzIB{*E zTdo-M)Q}ncB+lm!r{#>(un3Y~jP9&be+;mYO)nK@Z0RjACv1toR37^qaQ_Ii2dpFNPm>x|?}uYWYtV-lI)TG|}*t%3D~nCsIziBgEh*d6t`7P%RE z_=CHJ&<^F~_Jh^%*XAw*rHn1|H|tymR;tmS z#d!6L^R4WToJl{+9Xzqbhw6KEaz$vVM&19v9JYvdvV|O{#@;Q5UX!1a;5pf!fB0gY z`E3xH?focRcnRP>8Z9Wpjk(-w%wWax18Sf8W{hzv@p+0r!w9>dN%*E!1I7w#-WFLp z8(|G-b9QM>G4d$|Toyn`uGoJF`M)qEjzY7==*x*sKjef$h-%!f^;LwS|Jr8NU(^*$ zt?Oo0jWQUf_vDeZ_FCT4_p@;3gj9}O=;v9~z(puG{;P$I4ST;aH4fWN4F4{Pal=ku7_{-8P2TGE|Y#!EfGeW^((;v&Z`-hEx15Lx8c2 zBvnz!e>HS>V zwN~aDB3<@2Co({;LpX^bsa@q;S zom(mA*f=mQ1koNTowI#?V4;P;V38WQ@jHt6ox$y_<-{}O=;J%TEkBY_yeOTgN3OlM z>}Da|^lFnR_mDjn0GmEXj)gu=TYN7!Z-hNrk`FTpjMla=*Z@)XP*?G+l)Yg{WG4t2 zR@UHgkS3@T84qc-^miIHVtXQZB$r))UgHs98DGz6&bc=r>({i``(%mD>d*@9<$y=x z={}Yfoz?ZCW2j=u5uftw6IINkozk_h?Z~QHMQ^Ors?V&ZC3wbR*I7$aRI)SrM15#} zPjudK0)s|}s^xM)1foLt%KPtoF&sZ)M;nLKyUWlWP_B4 zm$3N)p7+2EpOl7aeTRM8>^8fh2U5maUp$cX!C<;lRo7I zH3+Zkjla~XvLdS*SADv+FuRS|jorBr;!oLA{8$GzpsOGDE|2R)zUBdrd0|dv3`D9} zX@El(43;$;>~r9}?XL+h*XELXZpL)V;miLVGEsW!;*R|rL*HXCbvQ9No zJQc5L@G3SzpW0jWB}lW73-DU5%C6lnxCz?t_{atRv48X?Z%T(d{N?exes6)SgD~we zHD>jh(f)HP*GBFg0>d`3gqP89W(9yoz)C%}zWueUb}}t_2}3&7=|JN?zo5A-UtGL+ zo83B55;ie9EO!CtsKu7V{KGyeW!F6XO}!d57K?4h8KUP1_vYdA(a2 zFE9tTwQgUD`3{Y+T`q^i96Ed(vK`x|m#E#8Z}^eV`VQS&(2D4`C)!*(b{(?lu$BJ9IkK?awI|}xf>Z?<-F^)u^_;vzsj$eE zXjWID|0P76bGrSgB_4>JszKucyjdfe?kQ;__QjRuCE>n#ECIoByRifi$<$7W!%yMb zSiI{f9YXRy`%zMEog;G|HWs!8<{Hz}^e+mn5a{n9r9Q0%!EYMJ+LAdskWmdY2uS1w z{1SRyW0pWK$p$sGk`%6Vjc`!W!dAdM9S!}q9_IuuF5^gL;$?%N%jaj{1w>b6k}Ho> zQFw#fm@oca4^v37Sw8aOet8FOmSH}Nz;wghQThuvsL;x`1@e@`=W>x9pGoATMtUTB zf8=VH=T`;T{j)Dh2oKn2LO0NlSiUAN?SX>?4tt!rPa5@W3Ci$elHlp3FTV_;!sR<} ztt$VK;ZU4?C*tPtgmD&i_gn^S-!3{_rY-rd*Xe=oA~0o`irhNil?F;fk1(C9NtuLE zV-%dS<%P^q621HFX8kV;lj}WR*vGWP?pHZ1>E;iT*x<2E0b2Y9!_pdc#M*|KJtH>P z%Y@vN-50&v>s(G*SolpXX-r;6#$pG#e3f%1$W`04UMem7>^SFQp9+lLhKj=iXJ!GE zJzcc~+J)G6Qw&Le|3||@UIz1T5k`#n&O7}aU$LFBaa5mqFL}u=uBC_Z27lLQ>9_Ll zL!$y5-+S^$c&g;)eyJ;|hz(`tSeBUsTx(`i0lo*Smid^&s_C_9}KoL?|Ql!L1+v@<0c#?l={FB_g1xBs*pL5Nh8_t9oR;}yRQ|~f-+FziXb4J?A z-RJo!#i0c0Igc!OWX_#?M-fov0{wRfmi7C^5laG(sA^vO*B!qwO~ah@7?Z8uTrB9 z6x%cPFYYN0t-Lt}>{rV*-7ga6KMD^j-S{6&mhX!mg<|{;Ey=npDHG&P=_lu-0l(Ld zZT5YM+*-2KAhrWl$V&JQ-HEXJu&3OHNF+(t6;XGNVA;isXxx1q%e2awGpqPEYn218 z8Xpk%`dc4A#0xVmJEgBfj&nCvWgF%G5n`t}?U?=nyS;KT!t;ygkVd7`Y zzBBN3?OC>Dw|0Z-8P~J}^R-m!hCGB+sBrlvP2=R*-R3qEMQF;b8gt_}gGS#Dd=d{^jF!mheUC(i5)YQP4I>P$^7s4SF@L z=n_dIZ89&ss`jk}D@y|no#AWl{^NDg+3DZaK)cuYkuukVF7a~b4^e)%bc8nfy4V)Gn99HOj*`!Y(WOgM zK+1EySu!!p#&;riTFHs8XJp|rRq3oEbB@#NUg&+G{_F<_auX(aM`SakYGCldhB~I9 z&_~^X)Jb1jREa*gV&M_vdXK9irVJv@hr?;`Yc#Qn^ndl!7P=VRTW}^@1)7@bT*q8( z{ss7W3tAWrYb9g$1#fiF(#6(7w-JV&pI;P)mq`cb$BnBdND_6lzsrL3Zn)1;)$dF9 z%vDLRPvU0mYn`iBTQ~xAdqZgp+xu>?I|V3dfnu`pW)u)5RMJ@PH;>~CIZx=B4u1mhT5o(aqh#sk8#7?YXYp<;!JdQ>v%I$`cJJBM|D1U+GZ(IvHBK<;q z8S4q*a53jlgcndh_rijav_Bp|5a`fSn7Ae_hG$P9DdkOf-rARGv+GEj2yPA*WW{?L zZ(6K?(DV>tCa+n^tHM@3AYX=JERKmcj(knu4d5lYMTM1BJFZ^6s$wu%hv`$`KG4h6 z?^Mj?0d{=k{OV`ab%E`AfEU_Yzl*z}*X!$>0g%Q~1ON{QL^cGgC)TOtjiojUT6lCm zfglMJai^@>)MxAODj1j82lhA{XpB|0T}u|tarnGU4`G}i8|IK$P4ShNxSP@XgI6hE z=qn7lkRA4ErO6NLPK;y-;hiH}K$CW-fO1Um?1IvxnAFxBg_a>mZMBY_dxUwY9q!<6 zA8>9_)?rC6QTpT78(gDqn2gLJB=!=mkUAJ52c*d6-Kk#(cZDI9Galq>jrm4zPQEC( z?)5EQseaQ$qpuC-a2$4ZjrYPr?4{DQ0O7UbDSIHrU@JQ)zmahjaui;Bc-F%vMkT!{ zIY4;lnQw)nP0=p&B7JTH=h5kd=)bauOCCT8cc`Vh%pmxYMmk_K>M>={{6b%hV!Hv6<(V@Y9u{0(#?qP}?r!9(?#pB_3eAhnWcN%U;HdhuJj0=fH_aD_FM#w$Tqaj{YGx{q^q z5R!HaKqCp)4WBiiFdnMu&q~b&^Sd9Iu3nXycFZ25M(PtU*Y-IzoRCj1@O<`v`4pOpdj@k+eW0ooPjdocS z?jX>$MLLG!1;#j4$I@$x_g`wrHxpdBNhxT%bltpWTaxDlEv@?Q2;?GiX|F~E&%J*> z+_%y^=yVgP!%Ka)RX)1qmy3vIG#Vy|=OuMf(*&Jw-rQ6md8P#nr{ zE56iOuGPV)>Eebw5X@#!uV_n>ckiRfv)wD8eaP@%#2$T#FV2-zMD?;#p?Ty$)}l}m z_bxcQgfqs~PhdDs!javY+@-cC+97mwcN`t2-v#nZ@qhIG*l^Kd=;a#y%&JimVO?Q_ z&QP59*heLy$fU{~lY6)8LRN!8vZX-!(r@}r_8Ab3J&a0<@UUS=>V&IMuQvG`{B7VI z209qF&%I*2yr@$>u^tl#Q~kgvy*VJfBd@|L(m|k!KWXDECH&?!WjQYQ9omTAa$ro= zFvQrS|D&>44;R8yMT@r%|41^>ocDE6sFK@QQFvX!dIVr@Kh^i?ZcZpLPRl4=*C>e} z(NI`padpY!2hHZ$L(10*85MZa$4|Rhl$~bkXA3F!FRpF8wXGjLlUCH3^)EHsfv-~- zwc)R3>)4gX-8WR2;f%OKbv>kT007u`ymLnk@S!}D)Y538{*C)QH%otb;0ws5>X9p% z*buhRy==!&8M5Sc-=2dT&&#bB*23HjQR=*_LTMCDGpu;iHY}jQ1Bh#|g0#M6MkJSC?uE$#p3^!{K2cB90efX`XMd{J-`AUsZF^it z+7OJf2jXR6fX3Wbxd_$O+qs>zw26xBo`#r)+jUa}k&u|1k5tqK6M8}KkQWz_{s!T0 z$bAlEi{uv+{2Y3jzZovf8srlv6w#Pt6+9N|3c6U@Q(6=R)rz$w*NSQQpy-+b+THKU z`3ARz`>$IdbHp9Yz~SFO%kyXUDt&}YH6Rv1u+<2s7R!(z5*UFoZ3CeLPrVT>t3)UKzC>Z^G(ja}b7<;Z98) zaJ9o+KEHlx{pM7X!yM*GYZ&O5#sNYd+YXG20>uR?*c>Xc4*O>FfDzDQ)iiSgUz(tj zP1wx>Fh|V=Ys7(R?q!<=A~3C`Hl8Ir5^CCZ(N(w&9scg>3L@FpHnr7+6u%mnT~hDA z64%KHtaLR%X#Q}ewZtX-T{sYrIym4V+}M?C4r*5oz=S(wi}}Rm7_ju(HRFAs-%-r3 zj&2v~r|#unv{~58cB13`WOzt8r!WZG)ce3?Nt;Ip8!FY6qD}3nStVRO{ z-Xa+g`_)gJ`$z|8dy%~UDdFxy%Mf-FSCM&_+Q!EbUyfy6Ra+D>cr8=BSv6*hQ!;c* zZc;wz)&{Rvc4)%7+0`Xn_y*%{9j0)W#O()!)GA37Wvi~sQ;8Mdgu0hBQ@!CDjZlT4 zZh~KASZLt4(kC*SkD^*tF`pRu#pYlaEv!>B)Ef0TM@4x{+Y9@acw;BAnEY9t7OWU(2g~;Ne|nDy?boA z(ySd;9Y_-xF7xcxj4;X9w@f$+L9f?`JW5M)JELLgrJV!aPTdlUUEv{BZKv*v_FqOD zv)ntV(-qkg%5YHd#R&5FNAfm%gjc5!diaO@OsecWQi_Cq;>5+aG&ofe+z&1hpVHCU zI&v{1tkQgca>VMkwn(Zc3hQo@HnN%B=x%f&9V+n4YYBglqfsmOdPGGI#?cp_wiR@F zOsf<5(PatD((PEvRsH+UL@vv$4P_!7n&# zf_|>}57x#^Yak+D$n{(-;xg<*Pw4}GS7lUvOH|R+MaoEK*Z9arTXyTN$zX-5QyMPH z@5W!-#!Uhm2RAHQXZ&hUMTco5Mlpv68uo`tUc3=Bs@yzxGkeLgy~0Ph;|)+D&D=h| z=88q`fXJ9|K0C5-;cNOt^Sd*OZq=W&r`gr>l9WNmwJtynLvi8kkj&hKu zmm}lCJBLZlGCS1!@)Dwv9EqKWsgS=@l`T)SvFe;e{lJ?#gIa1PXbrup7bjNxr|*+4 zr?@beRj=rpzI<@FM}!*^*-6WjsA8V+m@gCRNZI{WBV8`WLUpX%f1x=;<@Dhahu>(o zR-9bzzNj?;GMKYI9>2yR^o1<>3I+o>I9hn@k78}6fbRP?8jW(i_*)wwLclV&N}I*) zv`VY4t969(Rx(kgD2SPj53&U>xTi?O8;&awXDS^&zedwou_u`wxcwAziB9?eidCs{ z$6r3JMeuX33u4VShjlfsIm-y2pn%8-oXS@Y`uS`9zcapYwC_<;)V_WZ`6G9o#3=ce z9fnuGB6!9m7MThuXe8bsiWo!r@#+VyVeihZgfvc`6K z{K%r=)|I~W6yc8HFlByg`^6<4Q*bi!s>Bt z2XQ#$WRYMgCAxs4!^z@aoY+cRsICHt828f#y>iZRi`t%J!LH-gY~gEjDpUJ)NVkG#&qf#daL{_M}wejx(dawEZGqB~tik{g!9zNkRU# z!ImtYps*JyGWbWlP7S3YUW3(&xe+d4o8B`j^lw~Fxox|}?SU1Bs&O2!=B!k!OH&N} zCA#r~W34>SY)Rbcu<4-mQ~FPr^l!1%h%LSpWZ~xiQi?9D(C`_y>OY_0X)wbUz(|JF z%jUV-Elnk~=`eS4+7i`M&1{=vs<~HJSN0zH5+vn>@lVBB!#xVA!X0(d?P8S5GaB_8 z^m?2Iy+yKDbpo$?M9-;V5}Ow*jU(J&%`5VSd3Vvk5ie)^=*zcPuPWhh?my4A%3jHv z3p6Qxx^pUE?@mNbjKf9X&e<)|Jz)cKLQ|c_{O*NoRhB=Q+eYLBw>Y1c&S zQ6rz(2{griCN$Bb zx_MWhEbN?rM=YTPALf2Xkyzow@di*py4`oFf&bq1h_Y?642wwf+(v#JqtsT8n(|8k z%v@_U9;h9Skygl0&DE~MG0kzcD0e&$$W3ZlOP5i%gl5bD2c25moC^Z7br-+zev~tu z=rc*y$0ODE3VJ0jE?>$(V8Qwa+xK}?2KXlYWf@lZV|tJgCs208F7Ce~=cL;9{IEi< zBttHT*+X1q4{GBGQjXDV92>+lirlx?18QHP>RzLWS0fZxS{N_>$Nx^JnoYZCQ8-(# zrrkLa2q%_@;}jqL2@JX1U#>P$!$_qHy?-7HYz%!7(d*S0u5s-W=cS&Ji$Rkkf(h&q z!t`h?vj_mev)3xv?Ld5YeoENbxBS!Ui0=z?j8C1-ivY{^oZ>~{TK&3MRDQc+TOs$G zJJ$({^(gBibEAY|6P}JRD~Xwxf{w;Vap{i4m<@YWy!aN3xk*g-;QZ$o zVczQXY>mUQt4R|TbrRDLmBr5LV&N^GLs6CImIGD87Xc)^RZsYd!ah`1b-&e4zvnJR3$4U33dbk(sRVM4Ft?vUr#|Bvgk-l_+Vc>Y+mLq@ zw8GzuEZi34wMovAh?&>zpN(&rZ{cxfXH)Hub|aLZCzx%K6vd{=NGZyFuY61hL7tab}rSZ4zYU)N!)$y3eyJ37^lP%w~SWcqR%EC+ps7Y+BMKl*dnY=IE^ z#UXN34C8oD&;A*Ft_RRF4t#yIyCGfm#rjZakK?-Rib05Uj(`e+A}BTFxz$p-arrpdd&}NHc^%3?YrAw7>vT zGcv$X()GIosQcdc=kxh}=bzn|JkNbj-E+=8b-BXeEL{yDx^i#j>mZ7p0nB#V9=bEQ z0A^ReluHqhz|aP3sw#f#k(Q-b)&g?6$Z6#GV5&^ctl!%dyHfAJhQ;JRFoz;ko9XS! z!(<%4lwS#P&C(1_+LFgLAuh=Qr%NjEyP~OgYvs7zNO@_Ow{OEAvTHRBXf`7e<43jU zj&%^3YDxmUk@?f6sn(5O5{jLNk^N=^r#I^1g-4S<<(EO&dmeY+@qS=0qZAqRTA^!7BJjjeI+j5y z?l+;A2RVvkRg04Y*EQ|#!S9vQVH(Xe|aTQJUAJ-=x}tM4p6pf33>^Nb^ijVk7j|Vb7IU^rgiyHdEZN{~n zu!ni_Qx4yTY@k*yc1<%NtP|&XRXy;sb_4ks+5{fkzNh88e$rc`9F_EoOPg#PYID3M z^0m7O9ck4By$+bGHzdWtV~$WZsJMVNM>eFBO zU0`5;y$;IvH2vf)3r;uu9n9{9gnhoaL}Xgl6V70AWBR}^7^wKF&CN-?Y&GI`{FpFj{VjPhqi`-GL*3m zL0E$Zhl-qG-Do6i*1OS9ZClJ}i@(#r81$W9nca?|cZ};?d<8-WBy9N;F?efrvakmr*u_UlE-&3L|8ZD0Yd5ycWI3-lx0p*;n_xbK}Q6LL?$n=Vb*&$!|GoVvAM_fyCIr&;QzRh zW(aC2-NeVWb3J`$i9H)AN3H=+RgZ=fCj4E0)};=c*u3W8!&!OVTSq!L|8gYbY+L1X z=~|PxSL67xigMZ!$L38&2vHq%r}nt(-3Iem!9=98wTGV9XCZH;n`gM^Z^%<|X^_fj zl%f3Ax$p#Ur&DdVr?yY%4#^5uQZC4iFTUEgmU`a?Jo&icqOy1CB9u$@5osUv1}i)(-Yu%J-_{vkT5!4_;^_Ye4T{7Zc0pI>;6t zt_f+ZB7A}UZse&&Xjq8FkVOzMA!ltdYRwI6Bq5s&Om4Ep1X1-B?}J^TK`@Oa)gIP~ zDZS&*u!tx_Z}!Pvn;Y!)jw;9z=%SC<5pk(Enw^QHe(K8eMd6iI-DKg%lDyP-nxtzi zu!+LTawq?`P467Ew3Fb9yn#q==wX2QOKj@Kr#8)1uRE{=cJoh-~QE2>Wx%+HCe66oR! zA)>Yf0>!7BW%}$E-@IYFT~hglYNc)k4I^!#IVbAp+M+S5gLEu z3&ttp=A+!vlst{e?v)dP_Rs_l@9bTThFCkaTuA8!(eGwffG+%9#O)7R780A$9Wax( ztDK&7<~7b^EJ41>mrF*bUo;68yTeDb%VeaeS1#IRKFHA4ASIV)QrbpVeWT8cLewbf zPj2&E@rUu9c+RjsBUsHy@&Guj2Dd|Ri z7a7C9tPH%La(p!FR?aOx z1?VFC%$EATp&Ktq{xlVzlUyVs=eS9CVLVrGT)9lUGpSssYW+Fu#N}w#y?7^`$l|T% z92+7!(tBwAVHAT|s?1p5o6cS%&WK~iq_cS?kLWh=j#`SPWx{aTY;U{#0iMt4>}+TJQ8*H-LD~U|9%Y%-s4?ty}lX(XoY&9oFeH zN&TfDc-{;jCgghZk85VM%h!p~t)x|9g_8>lI+>vIa;Y44z^MC{y{ic+b2?cir0R4E zxdTl*P@oRfZdd1W`5)JH1a7l9CwSN8kzj#E8j1EZUEhcWp(+%4Z;#c78I*~in%YQD zXIx>+hJ8Pg;?XO=+Q^Sp=4=?e;AMJuf^;9-^p+jF$Z5>jyLgK&4XP}%7n+5Y>(frV z$g^BpZWn891+V8go%HHflTW{W(29WEvK*d3=F*bX)zu1KuOsyyJsZ4TJo!!0MaxjZ zuB=h`IPQF3OxFBaaE6&>TIAU)ZMIZb!eyFYcIU^Nju6VRm4ReF=@CQSwI`#yiCv#! zyVsJB>$gayqcvNkl;K<44Qjb&@3+fG71A0XpuT6P$ZRy8Ox~+$5 z2<*4p^Mh656zNAR>7uk1aVl{i0zBQ&9i5L7cnk>ry}Uv15bnqovIAi3bsZju za@;O-dmaC1r_-TVz$k-Z8vQT%m7g{f2}*)M)mr!T1((GkhJv|!mk1`9w&=z z{~f0w@g}?Po-7e%GmkOor0xsw!Sl@_F@j}x5Ro}ETNV?A4iSuFGZ&#!APm4KTimz6@GtAJYCAE7GPMm zs{of){-TYc?ti`LRLiNo_8AHHF@}Ulc{8kBM+PfY_-_hqo1Ny2Tym3K^T(JZ(IJUdhxqdAn35$@tRog&iErBi}WR&sFixZB4#EACr>` zPl;bF)dbQQ@&wNx9nle)l@5~|dpV3P&;;Wdr%maY_6(GI-n)|(E&a5T%j%6t6PrEs z@!H*&4%czX71Y^N1cZfM)uZA;ntPEs);YOCRMSp#N-Q|k<}nS*njZ1g%NgT38l!BL zw|Kj_%69RYIqW+uD! zn$U|xZg6B@2n;$^B*4xm!bX$TFjM1uwKubwGYK9KZJyRj6<8R}7%2JVj`OF&%%HLQ z(oMWe_?1YmJ$B0S1U=#L)47^9O2?T`>0EjW3%Y&D-lmSgVP9z95bfKPdWt@X-Dr6n zvO&%1f!|cKJ$obCUSocJMV_R&LeneySBal{My9f=Jkg+gdhlXbl4D%g!O@%-74ZGW zaIoT+WI5{k=uXu-LdydJ^T|{lf^H+k?)~lD}TF`LnYWMOW4GI!#t8bU&opFLI%1Z6my-}|yvIygYCC1N)4<;^nTu_rE ztiiG^eN%9~3D#np@f%q$>GooZtwwsN_h$W5E~`l3bWQ{<8L--UC_!;CJadD5nYXNa zzjx}zW3CwZxGAp3>M~i5fs(wdeNL$f_RiF06-meGcZbCc*9Lmzx0He0W`8hT&9OAE z;>c&$FFEi6%zGrl;5Z9i94odQa`&Aw=N3ZIplf>D>HLxc&()-sne@_hyg=WsK;Lip z@Gm(|fub%D#Ibx23_`~EPQZ4VSh{B79twZH&j_ywDhvqcpjD*CS^n*;`Rp0j__oQ7 z(i~@r0cBpIa8r;Azb zN^`^pPfo4!Z0l`&qTI{cGaTmAShaheD9Ch*>8dzIdDtJKs4>KdX&n!95W^|HN z^>dRY!Q&I_;1k;S`X!HVfeA03(vwPY8h}zZUY3o8);1~{n0DH!HgDT*Fz`>JlNPaM z%YPZ?-}Pyk3w6drBvsbrO9^7`bfP0(awV3WRN}eZ1M0hm^{um!;3;TK`h0`boDmv7yP5w|cAM8D1XecVJGEIIQsm1X zSTJCFd-D1^N$Oiyprcvw#gM3rYktl=1UYa?0@!+R%=U` z*!zY((C0D3%%5qTNM$h)Vq6dAsW8B>IY)yMA(#ns+ovnR%kLhSbmqV7?gkm$iXMXt|5L)_ivCF}Ap67+wAO~nm|j^oUM{I z>&tyTd~*0~pd#NmG?kEDZ4xH?p~Q6=U!Q>x zE;xhQOcgF^FjpvVr-lhioimv>NU12h;RdhAb7L=WowNPTI3g>^dbMS2rlz=A>6$`3 zy2H3{inF;&^)DMyVsvmk*ZjS6t{r$g{y7Ww?JRnui>JKc71C3pMbZa?n)Urew{2*8 zS5i*llktxi*Tl@GE59nbhuBV@b~vHW7$>$_woI#dPpT>zwu%Xvp8ukNTwx{lXE()( z+;Hhi6Iy)3BO0k1S@?l{D(IcAw!xKm+9h{5rRWo{niu2vG?EP37a-UMe+8MI4Kw7l zQl&|(ej*En*tV9G0Xv;U&XbX0SQ`&<$T2jq~$0dG?5dSx-t!Iw~(wL1?GE_0ZB z3VJmP5QPVHiEAZOlGS;bvN7}aY=+)*4 zt_X40=MPGd4sScf7?$g_aBxd~SEZqli>Mj=u)2XsDFmNZh0)wkKk+_SJD71PFK`i% zPtjGXYOuq^Qw6$U(g6x2`t!A2_gJ?~o0%NmmaSW44*OS9v}zcfD)%4yvf?K{R@x4j z+`~8Dn-bq6yJ<7aHp8&d##-f@XQ%N}q*pS5AHgiwEb)?N?(|AWpsWqG+5-`#>~9$! zclM2)8&8q^g?vdWzm2~~i#3q?u~)O5Cf8XkbM(^&N;w5J!t9Z62AVW#h)9Y9ZHj>q4HWb13*V35pA&+muJ1D!p1tKNro{7*gO`9qckmKL?^l z4g~Z>=2?+yBv3HJrG-+1QSorIWcqIXhqLd$hRS(+kNqE zF8s|5p7#}ZG=>!xp-WjiH`ymeB+m^7iGERe;cXLme4@8AKQLyD1N87RT=jG#eVb zj{1E{X>(+?+A{;x;I!?cjN{*Y+$5{jXu$P&;L5fJ%UDX4t6Ka**gFDRlxIXBCCRZ0 zX_W1_p5-9Q_;Ndy2H85fna8u#JU#g?drKf?J8g{ZNsc?$%=0+7CK$AlTV^Me`fqX>|`_;zb)di$*r>O5C=zT2UiTUV^fp{YT25Idb@bnaEsUNVf1Z&xoB$s7;sjNB7zbB@NZvuXG1 z+ISQV73H<#?$x=5z{HrCwkJOSR;AZd8|H`hW3(+Aqb*#2&U8vR=~ISXF8{3u>J#b_ z1?!3>Wf!k+1pTpcPSu0Pk9`X5Cx%Pm^k>+hmm{!SnoHF98zY`ibSBbW3G#51;@{5R zic#UqU}qcrPPo(A z(%W#Lt%9;)`*`w*au>RSUGBk_QJ~_|2#6ZyQ0cfes%A)-eUyE>b2Yf z7fm=j<`~BKPtBtk zHV7W6Aqcm~kPhx`Ynf(d(!I3kTI6(xM!Q7NW`Dt~Sqg2Y`AR-dE3V-CDP#yjR@C{i z!WXi*^{(aw+G|JV`P)Z_HJ3PoSQD=7OlnyAsXUpZ6r5F zgkg&bmu0xx5@?;v4ujoy-u6#vO)v~zoPD`gs@?P0$|F7{Wp`h-m4I{xFXX!6{ckcHh4GU%*}-BDTstez^2o{IG^i6U^ozO8n>yy6sjPGrK*Li8z-@ z9C!0|Nl_cu^D*;Pq-KOj~0W)7lfVi&<$5*;GiNn^RclAW2!iko%(W z&NKS)_l_@Ls5>8jH}q)VVe6N+zOgf|TIu`AP8&f8fiWT&W^PO5=MNIz9Ua5YnD)Lq zJv~|Z7v?(VdgF&3E zr7}xQ7LsvB*CQ0_*B`T|@nG+*^s=s4%xMYl5iM7^nb0}IRhdXji&4MvOOGtB1laT4 z>6(yrNV%i8*2M1I)ilL2r28m>W~^xFb9tM2Go2Y`?&hw>2le7&mYd!;XSuC6O}2oI zD$Y->Xy`iCInqfJX%w~7EwGX}gjy$vU!Pqo6RA&hXkV-1?)5OHaxx-hrlT^QbyrXC zG>eHaC3dXa-u=LxC+!8(Ahm~;^@^x?k1;L&^?mVzr)z7c8{7QAj-U-lW4flby2WnF-?ipp{3VB_wR+=o&{Jrr5v-Q!Ebmkt~@E&uZ8I zE{3`(QblqL<_!8!F=DUBVd;n-;Y^!AjqzapB)`{s&r&k3_rvOPp}WC;?T_IR7k5mukh>bK*!R1HFXWe4Y1KEqqK+V6Q=-_U5B%P;hiUO~s$w5itB2E| z9^y|&%3YV4swlZ!9!@6?Hw0&wUgE@!vClOLZgBns3nGkZ%rbXP$9o!qXhE8FF26L(*54?tp1EsasZSFCLIJffiMomHX3HWtu za&xIrejGlQ+?)M@o80(mz7)s~xWa<#^Um&Dz-rkyaysM2+j+f{T0=*8IitBSbyY^G}}g4p_r z@QLNm3FYtMsNn9c3(ehky7pU>C*4xXy~^a~+4-vp2nmZy{iuZvPiDCwd)&&+O@Qt4 z;F}7J3VlG6$A6<-8_vpX&cAE;B$&;dQP`yqt5W&`(TYCR%Oa&DX5}yPPHvTX%Hyp& zi}o@+qrAhch&VmsHM0oic4kbq5@Q+#*ZEk=cMm<5~Wj|!sS9WE9@1g;P(do5=q zD>fwgx;PK0LUv(L>gC|GwkxF#;CGk!Pf=r@X#FVN=)sLmw0`~fA8)TrgL3Q`$6f<`yf91{g6?2$FEgZ2me=`iC&yd>0psErkbmlIH=T z-;x+ni(r^y{C|Us+W%%3RjQSEx~l95GoAc+-Cpi#0+f9d3 z5ww!drVUw-L6``HO~R)z>NSlM&vZ*Qk=m{q*-gC#K^w{dt>%9ZN%)17@Ll6vDj$oI zDdCLt{lN;e#=Bq!!<(Xi4ISV<5d83&Hm8F|eF6lK3;Xh4J?Dp8)Uje9bibBoVw=2W z`+t7ln1{TL0KdQ&6inrY#nQ-15cCg|O$r?ySP6xpwXL+py73qKzVLa$bDh9F3w| z>j}3J9Nn`M&R5$U{_)2Qp*tbjx~<7H!GZtR^8ftylbe=ui;)?Z68q{nyDs()|F3KR z>%+vZo=mrT4&ukm{{;P?YyS(cj7U7~URzaZNpXO5nr1fk{}sfZH(5Uhi%w>{PDa#^ zUsNkyKYK7F2Ic?bO0WpnS4%bhOI+UD<30!Dd^pbhyyc7LnfpBa5~*_QfBYL?;sV{r zgY_uEG|U4AM!=paACcc)_{mU)p94=hPSVPK<9&KC)Q9Hu&mokkzr&6kOzeWH{CfBd zE~}X{=U{n9;hz&wq347%M_dU%m;e1AZ^-<%-Z0R=(g>-tKfMo4L=Ko6b2xP?xtjOs zeAV6$Obo<+VHtvgJh+s&7#u`<__^L+pHoO&6@?r<7z^uo_8;rLX*e zzmhyfcmL=3t&aC8#CyBu`T;(x0Y8y2c?yvDwTB1zev&ry8FQ6Oq03BKvJY@m6qpx8O*W=i%9s|8e`N-#+ghjTL9L7L%l27Q;-Jh2Sfl%w z^XCKg-~ad4yy3pK_#dCEnHuX_BO3;6=liRsvw&lmft$AJ)?&&4s5r zz*E8?=crhnM@sc%*1y)H=Eh?4dLETIA;92IbW^~1s493A^E<5td9Kj?{`T-^*BNVx zFCE%+tggYar7Glw6b`}roAMnnQ%-r-=1pJeKK4ZSUmpKI7wZalSQ`$i-&r&C_(F34 zA@l&kv;TtdGBqoipE47y%#aU0)*tT1P2X=Yv@k= zpeM7tUXN-!0CVU7%sTr6D*go1x~XJsBv00O|%OxIPO2 z8~PrT$2}3D%oLO%Gc{g|FL@;R{uO4<;2ik0qj5KbYIlx5r z0F%@I!X%HH73fY9_KKX1{{bEy2YC4Y3lD2p1HGEf-2cyVbNkDUp{~f~hV3smc(~l( z|60x+*5E^HH}zvV`2*;V{|h>>oW9%d1^?GdmQ9jA(%PjaND70uQG>S+xRCMeKR*W^ zqVORmI6v^vqwTkOBmUw*oGico6sO+GmFHJv`ez>WYze+mRb#u_4n*c%^+cGVWNtgs z@GZo>dSi3(+w$Wz>E8Edx$RcSL(w>P2yzLD@}1RQphzRP10g?BN*0b0>*wS;>an-O z4ACJJamI;P(zuaO@%?*X{tedfU0YV(WfP=Mj$p&SL&PERV;va-E1E>$EE%_2YH1TU zzd7cWl4Q0Qj2DB=4LtM`TAXYK*QHcavj??A$a?`?-*PDKV3*xRETr`aF_@^P620nE zdFS2x)_Knnd^M~%0XRi^KrvX$|F(VrjFcic5UhEAHL<(IwHY~Lm`ZeFoFFf#5X$cS zge*_z4Y-8FDAj;_CxREvCg~yuhbuL(cY`lOCq~@+y?$RXlLj*>IG+FyfcRHwmOde0 z1BnfEAa4Vn#*jk}2+>Bz`tZGRN@hW@kydk{b9SSd@QV#EYOc-osNNEr#`KVU zAJOdWJ3j!3yC+@N$mKpeKD?_Z)q2uwMd=o6WgBn;mU-C*Bk^nDIw!E%JhnX>x_Nh+lmz z0>j3IK!LdH*wuDK(iX11J8AcOOF4fW*xb3wxf#RNFQo1`2*++E>^TN?tbAU@n-A;B z>|BwVhKzelplEa*stCi6!w~&LiCgvN%@GsSr6{XXX@_-O6mv0~Q)av`r$DU%^_!4} zU5Ur41JFPD=D-l3q1y4pt@FkA_?F`7FA&1)b2gS=6>8P?d9kxUCO$zzk4IKb#j-B~tL+;QHi(*rI6{=wO_vfn>n9{U_G zt#V^NRGL&0>_g`qh#48l14h1xtMH{0r5aH*iuSfhY=e6w_|LB278r)+#UDo->(DaI zLzB8(eR-O@rh%}KL)ir*CA)o&t2c5~jf2fKNz;^Auk+JSO%-s!9OS4nY#rmlBw`i5 zuJpMSkMjI6-HYzN=WRDrS5NKIi33`Ux1V^ioFO;HyrHbmGbn=Um<|JSFc>HoeJat# zI8XVA;RloMhQj+I|C%86sX{N+{5j}%>ZohK zOAoDyp#S}CX{1A$JvWf9>45j|q+RZ2WO+Pnt`?xgIDBT$hu*Gu`y6XK9$vEP@HJ3+ zcLl@I)r#s}`O~dQHnK%)N0Nc2ilqA4>THPP-90x82<2xE9rLx)K&%_uup5uCjn-~y zPj+FZDOSnA)(UR>4>I5J3#i?}cq4W<93o3c{Tt$>JnLD)e?N3Bm-lyL*qv6s%ytUb zkSyoBk0?w)b&I$O0v9QMueL9XHc%!&e@DINm1voxS*m5CyGH0tyeB4;EXZgMiB#z_ zw_<@Bb#&)7b{dgO<0z49So2#}j) zr&opwEhkL6zZfw2*GQSy4R*Jk-^VXz$v`Z!;S1XbRZsQSxZ(1Ul=jRx_fN&k&g~>d zBrvUX5fhIbmHSz_mQ!8cjz#rb@lUmXwo0q}7?}J7?S{IvG1U<#ph^9q=V0PKsnkF) z&W8^f+f13B&?yZ0+$XL77c|5^NLuJ^@_T^{IBgmV2ZJZ_)5H@-?y*fxhIX_=?r=JW zU2Lv3T2+VtmGjc`Hj9PjRoj1i12hc@>9V}QW2@-S{beprwQMvnru+?LA;XJHlbZ8x4@HB{ri{!yoh81I2BtH8d?#>@Ai0x>TU>FlIRr1UpCnb9P8R9-^ z7}XN&aBZA-oSO5tax9{SD`k|QeGy^k>sSgKljy0mxOAJNQd43HT-(^ZBsV_g#eTIb zr&~Ap=tcVs;>oZhm1H9krqLRW+xZk`5F?S|l`Iqey8Spypd@ma*hM{6Kxa2BMwNFT z!Z23m${x}S?RhiyA82zrKcm2NG|??;kJwLTO0DS8Z3J;6UKI}h!cUXnPsUe8SRD$f}I?_x0M`Mvw6bi>nx8(A_uVx z?ShExmRo^t71@!Q>e!Q8ks3rjc7aIsex*uxIib*ZRAEI(pDK&Rq4DekAvLkg=Sno) ztOlsB7=z}z9N4P#B7E)U&r)ge+k~q57tnTzLo7!DqgoaPDf(71Viks$7eO%cqf=@ zwUeX;iqEg?>WXdgHY>+`RXtOwxamU}9R3$% zjB9lnO^dBK;EiF@if;}1*`;cqmSP<5|13uqU{4k#pR6a#=&c_*FQlnzB4}<;oMxVC z?Da2MelEPB&RXo!dihw_=6bEop9(8qrAd?AA)AmlVs@)Aqwu-$6pz;iEd!j1njcET zCi6hKl4$Ck<%DdDsG`PdS6PD&=b99c0LzeTEFv78vnqbecM)_KADOWDeG2h=539eGh5f_iHkl0@rF znB_y>lOTI|P^)9kPYTC!PD)j2`z2rd>dAb8{jUj{PNX(uvBf*-(Zq#$2llHa7+#Xy z+}W_EI9dLpDMfieSjQ zyNavq%~^FR^C?YMOJxy6i3)bIs!~Df$QsZmhDpm+99)t+S5ybiR|^2 zjzYJb;C=fSnS{nxbHZ)bv;S^?tnH@})j0WR;t^BB=hw@IiUa6_+Q_Azl5f-CuA3>7 za2T%lW?Ay~ag!Rtl5W#}aU=*mKoyNEW@!?8EUls_G-(W%pV=Ij*d}s9(G=rsd^Hmg zyWwgHJ0q1=hZ~4&9EVk7HBwvdHH}egE2{ZkiStw(FLB__jS4$29f)~Uk@K|XgH7Xa z4>`vm{?F8@B_vwa?`83NM%CtD1-vH$-gnTeU1HilR~V^I-#3=awf+Nf9D!&>v|2*j z!=(d7`x3A)n6=*aM^pYJB#}NFVMQas$ULSTBW1`-nAQI&1s9y3u$(kmj+S^jKs{kB zeyM$B0ues^*2z}u57NVmpY?T#ppwU6;##1zVGg@L2>xp+D453RB>`D@^A|^vzvNj` zjnCRIho|-G$xypc4J6!*;@tc^cE0irofv9hB_X?HYTNNuk+jk%k;zD!ygOO> z_RSVKncy$f&&Enr;kt()bAFPl70(V?v1m zAMjGi;XZb@E!7Kt*m*9GgP6;wgFuyN46d`DGdj5>=PE$TY$d0m=d`nr4xUY0#-{00Bhx#GjP*HHrQVf|fA;GYlhK zl3ME$OYM_P7F$;opI5R?urE=UILb9Q> zWaIzmzVZJlW3*gHx7R0_A&GV5All?XV2lkI;q2O1V-0 z+#F%bT+k~0_g~2oN=G4boEAzAEYZL>7XRxwqKUPHb_EP!sMNb@SSS<9GGUo#7)p^~ zct@~BS{(?Un{nPhf!qL*o?_X9K;l1WFuqu_qIwGlarydeh${k%QMq!hKeKuG^?FDE zxz<~?*M^F^#9EXpXBeJN$ED~7Rqgu%&jz}pKSA#bS*4L>gshw7WPW2g|Ic_^^|w%( zR8uiGCwr8+c~AV^wJ-WO;V=G%F!OEf2<`xuPjh_R{%0S?& z>4nRmIbhZPuJFQRYBCN(yiei(R~S!^9`JXemKj3+xYqv`?9+Qe2{AZGUXeqG&4*(A zpYc`@!?nK4b#(Th1jlufVT$^CG!JC%;kRH7$8fF$u=ULMhXAo0o6;i5E@PAsTnTU>ZWlB2g)Vpf7z>>Gu{ z&>)W^dg0rh-R=d}4!ui>tM8Vj8@OC2ac~7wL*YGgWSSCcG45?IGP3Blx3fwPH=~=F z4igHF{11hv+6#(Z;}Ec8sBw?JZ~-3O`iiwgcEV)I!?~}Gk}P`nG!vKOmmzrai|D37 z3#|&{M8s-+y8F4-@Z<7+|5x6!{ra1L>hJYeK-^VDM=eNk=Z=9gc&{+%rzw_oJhe_KAculJ309t;6bvHi& z4O-b1lemsv;ydoyO6#eYVTY64Q=30Z0S=6St;oZ)V$+9e3pRhPG;VE$P)^b#kM*%2 zTF0e_>h%R|$NB>J1j!Gh4JZ#&Up5q^r4uj#=O>FLx2dyu90eYlVqJbOFt{e1-o3`6 z=Q+NA$DzSN;t51TZ^;PJfyW&jzO`PzMgljXkn?pPKQ@Q?vz*FxFDHnQuN?#m=|@&H z=c&;%*pzVElAZ!hq*z*DQ&ugl@^;dl6XWu=_fAcmpk~wCx68hV;(YawW12L9BK-C) ztrN5&mn>8mBW4t~JJMDB#Gdm>6v!>bJfvPAf5GwZ!_3+AaRASVFLxA$2^j&n(QfU- zRbXOhc&LM4*gUL1o&Rh<`KS5=q*nkM{#xNQt3OZEK~d7DUok7<&p1n>p7kR49}HzU z%to@@-7u8gYdUbS!TTxq<=OH@CLraga#;-E1Two!)eg~^I8T+uF^l5v zrO=^^7_OC>;e9$KFc!d&y9cz+-cHgJU?=GYz^#}E&KF?SUP$w^N-{J#&meEt`E{Vb z3l}t0K2_dZPOR9MwV#@;@hB=4=T~Et<(vitR(3zl%fVJ?`0-m>uvOx#(NNj+Er=v(}XQK^Uk^Im7@pf)8lw7gMG zh2-?66^q`Ew-fpCSAQO)e+{TP3>;sxI;T0l&H@Yuintx$C@f?4=cL|P<53(kmr-f*J#^8mB~ZOqoIWkn3dwh9%6pgSj3 z?sneMJ^KBNm-xuHUVz^~u!|^{v)x&+0eSjqgKx`#5jIkPACSgV3Ps)|HxBdZIw)ls zf9@#kT{4Pjpgf@7=Y+$KHGE`Kn_DO&*AGBX9MVK)T*H~=ZYg_!vaFQ~BVEK(vENLd zqvL&^MeUr~y0EHNPB;M}i@amxAvKJ^&Q`&e@J7`-Qf0|?f%KaINVJ>AtYd#))m)qq zJs^kjVgA09k*2=%?cd6M4hxNfNn~)E7`Btn$v4nN7zw90bj!Qk7M8znl7F834a!4I zp%O8U9~+Y&u-t_*^aF9@$@w_=%t%`SU;W?|pw1Pl++AUEoxtS7gWc;02ii3(=@d)= z<;HebQNa|h2XlFsWgXDuR!2);^N~0L?ur6WAM8QL;Y@$6-O-uF#w#*l$B6r8aJgh8 zG~pLi9%A^Ya!CVZui;0jNdHl=_v#i$;ST(3O}xyB080IHyV3dzA1M17F?J-wI z-d63$#VZa})TGwFZ6Mw3C4;{8H?mQhsNe+K)(f4X;>|gx1E?QSad7MLWaL-*;+k+a zl+8ZaqXoMQhT%L#`oigsd4+2eBGocNG1&Y2%3$Vz{7@(1v<4$K=S%i6Knps9rE;Dk z(U3LJACH#h4x?-!Z>!y^Lb5a*dM}Q__7+Tg_fXtXuxoPGWFEt+LFtacWfrXgUjJu| zJ~{4+WSHI`67!P%@dn&;&SI}HDQwj737%t+Gb=4tK-T(H2l70rj)vu#%S?P~ERtPn^2-3Y z(bbp`T_-cq9CEXY8{80g4WNPE&xG-ic_(y;NYqb#bzniEswGY$1xqk_s$Az0*InKd zWUQ?ZRC$DFt%*l%c=PqQVCRzWGHXC{$g}8`zO4O7^xg(_PEBIu>%GB$<jNeV>uoc?y&dx2{b8Z8zxiB5sR9E1`D!nltU!7C zPxQ$KUtxoTJL^}=)<)cSzcW(hmam;4S6Vry9PoPJ0tbhQWIb73)t)DP?%aQmm1H#$ zK+WS=RzQizXs>=`xhGOEP7Y-_@*KVu2~3_ngTL0E`Aq=Zjqy+5Y1xO5IlMiq6NuXd zf4lw(Xf%mFAW{*{KsZ~N^u*UDWp+UE?+dlO{Wl_JD7PN1ewvL2)KXwhoCK@$IGP?T zf6vb)m$j;l@DRPW`#2KNLv;J}Cm>G8myh`O3KuL5@RgIPM0X(IrYD2bjZ_rL(ygF_ z!fLse`Jh!k@;-AF6+YItUY+9nj!fET2W(FBsf`D)AISyUT)`9p&fjRNXV1j;-XEJE z_9j1f^k9RXL+`ntgjsP+u1j789Gv?&*bKMi;W4Q*v|E4$e~F&hCx_oYyb&|h$GRyY zG7@Qy7PWg)mO%XJ2B>rM$+bZ`M@_7@7b1XSuBAFq%mdf*#(A;ADeeMDFnA%3FLi0K!5O(+i8C-uK_M#D@;YnYIl~I>MZ#) zAC7LhQso#uM0%4&Li#|A;)Q@T$j7+!ye7*VxTFaA1rV|6!&t<}?S~X#ZLi%ID~B3N zLHwXPES?J}M1=t9yRIF3~Ih=%$A-!u%*5aIc^NK_TY z*cL_DiH48O@I0~@TszRAjenpD98RC95^W48?!&^tEc0887+hge06otr!^J`&=-jcT z=LkN+dVPQ2aL94E*ngx0*7|EQnZJ`IW1%8_NT7_4Pza7*tNGY$UiBD&uX0e_#c_e- z;Q~LHJKO$DFyCYKAq>3nX;5*VAsueo^NrDmn_b62Z-q20yK^POx{|K6<^!0P%)B~ z*D)Q^{VDfT2Kj)Vgg^$shAwX99xocg_St6o{}J|`QB5sv zxW|f>hy_$i4x&;N5Kwwi5l~SO0Rd?OA|f3`T8bXf(4*3&DX2iCNbdyQZ5C@y71_ojB zuX|PO0H&!s9c#wVf8pa+lsmXm%0F5>YVpRDZ^t^@;Q^d*!B0nW7iZWeu=Y+YBSa1(0NG#WW2`L4H-@KuYjcT z-~o?|-9wJ?M%MY?zO`DI#m;A3i1BtEm0mxXDO``0sIEM%820)KI z74k37OB22ga>6wH4~+V5-hi&HvIY%qZESd#DbpN+YXVBi;=fF0@R-<%W1#8b%riE; zN7{y%B&A!NIG_Gp@A-b?ctX{ZK*bTaGmrf*vCKy9s3PnKpL<4AD%1x z6#;n1gIh5a7$6*squF1;`|f{I6d0sXY5OeD9wcQ&&l*5#FifWwO?dY4eYP3>BuLYn z1z8A%*!>v^%%9t+>CtJ?Q}OcMpl1(vC%N#j<0;<*Uy)QVkasKg@4klx5Fi1MFgOaV z;Sc0Kpx@-kpA8{g=rl?nt~pxeUWoQ0S~^-eYnoe2c4(?CwlmHJp|yZ zeZMZo{uOw0CfNb~r{H#D$L;}E`rA!yS&(mLl>eXv5ed+aGYZ_ejwHJY<|p1^r$&3Z zKwp2zeei(9JY%f$4aBAdN+xLKA4{5f-0)wX4ZeEf0Ot5TFsb&WA-N;&HJ#J$^bZdw ze82Q>sexQX+iKx{G!>8(++m(X1N`Jd`M$(I-8!rcdoxpf9aeq7 zr|tLg<|zv-crMCGY`kOqx?vs|7#kP5m(FtJVSzVIFRR}{8^c~CNS?n9=i_OqSO0W{ z`;ld-7;b7iasQ)Tq0+BRu}>YZlNt^+VgAI5H*r96%U|Rd44uSZ)k`#NuXe1l8>~>Oi;V~ z{f+N5k>_p$yFwpp&Og3&{Lb`X&7ar-khQ20b;l*pI-?7pX8Xr zOo<|FZ!|X|ueDD+2=x;D>U;24%GJfjz_imk_oCp*#$2EdcwYGGXz6T3?w8a6#{a^% z;x12ryy;!(3Y)Sv=WN~MdCeujs_Zb#E2(@Gpoi>Og|q7U3AGI4fvu0*fsvjgy8DAX z@Md^rmsdiq=@+`M6uAAVz7>`IW9jPa7W+AX_kMT%wn+)RdKA5{wP8LDBx8&3!<9fk z7f1=y_?xacC^}Pc>fb70MSiB77-e+nlX_5t+$rBJ5KMAAARctP?oB@g3_M_oK@S$r zdmbs6NotMmsVMota8;2#rx50y*-@|gki45u=hK{&A^m#S3Wd6u$1SJ%RC!v!L*ujp zb>7Fk7hmgGt$B6G4PcIvn&XokCLZ~{>s0i@*1T)HYw85~sedE} zY*i=%5s6=2LqEAzKwAOf>7OSLNJuEdhX#v!R!X0Fi}knDlbb||yBh3LgD0OkCW-w1SJ za)8RNy0+Uh@eh_C@#9rBZj2aGP^rM85Zc%#affj`8IOC*m$V2mg@+{-SoUm1A$tgCY6bci$U`&CyyK`yN@G zASsc*ooW>tPH~z8qjb()^gSXx3~J?MS2L$5rSCNB?3R!t^qq4Y#V*d0Zz z4&oqUX)w`8_1ZUdnQ9sMz(_=us?DE;Shqm>0|^N&Kkaw32!hI~L^3Or_^eGy zLF@&4oFRmTxjOtog!l!}>H z?`e^sYeC;rz5P0M?|-~e^?~-@yi?b%S~LDk5}!_org_E{@WlF7bABVQl+ndqLk;MP z2M1O&EVg1dqh-GdWCS11R0xL-?it)iInili+fiv$oGAOuS_wrH7eNH z@u9klW*NFElI>BOrmLqNf^?LP-@3%3O75JUnAW$FUbPI$T}`9?cm)P7QQ7X3=9+^n z1g>>Ytaabi+!rb1&R6>V6h;Qc3Pu$|QcDVP^e9{vj2oxJi0j*n<2?70(BqmY3oVtq}Z6WmD%XOeajw$jKaBoCX~pX9^a$eI3Dp7GW>RSOjO zc6#HyD>l5fsGM3Hvo^_tVQ#-}9tFm)$fm;2A|rDvFY|s++{bmEt#_8;mKU&|R$?QJ zPxA2JiXtQj&uKP<{R1M)-gAV+Nc3VPcG7_VN_2Y5hYWWndUS`-J|BqRWun{>4s`%K zG!WnJc71c!HO?R&Rkzo`O6mwj+6i>V5me3V-Tv9JVc9*u(l@I$%LGV_LdsU*C#c7D zYg$G{4DH9PiP+XZ+8@037})Ct&FOB@7|Zo^H;e(RHc z&3U|G&&EKQQ_KU{D#rjZW8f(b8SM4oMH^Yw$wJf+4_-VJGTU)36TgA)9IO#^_NfGD zF732SY29M<34L?^Ce;VT$#7k1{xa3SroDOpDOYVK{*|#Q^3dn;R=+*FKgx^M>SZIQ z?DM$I;Rwo6DV1G)3roK?tHG!tq3Gs5XCS5dMlFDQNBk%JUq=pIM#BtaeuA-%>q3%ie?&HfPvk2t zL)Y<}7MM2ebbDUh=AeWS8S`!E2;omYQEFrDbG_?Qt4pgYCrm<<8fdH@k9wM|IfH^S z41xk_Ba`WZ`;p2<^`o@T3SgH}~a~8t^vC?Msi=uT{Z;i`YJ%QY7xl!&e@aAM7}vbZN}~foGZtGjWp-414-@KUSq>a0bITY*zCp*rS3B*;{#CpN7d9RXP3I>Hg3mW z;xv+^{`Iqo`An{=K=%9sckXsnW(dPgNQZ5RE45&ME2?`&Rf=KCt^3Jyms&c-xCQ6F zZ~7bhBWiy+;i#ig%dXgSV2#+!T3{Uu2c`=_HitK)V}cI8dmB&?8J>33MXC0``BzJQ zf|jUbrMXpNA#1la_XWT9uKsr$Vm2WP%66qKu>-O#WD%U=L7QK&9Tzc$E%NMnwJcQ1yyA;x?5N;D-AqM#2d^ruU#_%+UE{iM) z+#tg`iB?eVr6@=hX_GWTpqvV4d1(64Hc>R!02w5f8>uf0rc?In^<#5(ca7j>k?1sj1;d-vzT51sk zCp)p}gj-x>1I^1e3c~v6Wyu=rw|Je9 zfSo)?2%@&wuXcgH3|?R07yutqJcxcUBWKQ2FiIen0hWI1p}%c{2EmM%iJx*@Cl8VC zjmA*cf7+MP#|c~|m&~fQjcj?&Jg%N@3}LSgGi5)3r)|jHvfD%q5Fy+-?$eDtsATPr z9G#VkD4{V{Z2_Th?Oah{q<;Uoo!RpexD~E;$lxYkZf)Bp{1-6?`I|eswRBSjIawsn zStqkgd9CZr9J8#$m+Dw$KMB?EA2$T(SLC9|Ex+rspq*|5L2H!Ppk5*B8lcn3s;Q06 zE&%jiJ8o{z)Zh-58ruXje+t!4Ql6OA&w{Q6OGn+n(Ki9=yRzs1AXvaiRBT|uZ~uT* zknjn0^fGm@r2{|)DuP}xCyG!o3*bHiXKAA7X7;@2C$Kj39NvWEgu8$tJ3Sb^633+EYgadFy&!E<6D@(I%^@2+G4< zDVsUSU?=J>dv7PITBe^9Rh~xg$YEpvW$@xwy7{s&+Vw#xAT}2|Ozrob@6x0s3W`JBHuIg}C@gq~Ff?h$L)^f0q zPK74xqu8af_dggPytY33W$$#NL}BN~_WXRgAN0@c4(-Vuf+}7Ub@Zn_WcUPGwrvB2 zm_q^IW#YKs)>V_8%$(EUtuAW_05EHc=^u^Ng>^C!PSlt7L#X!N+?s8|$IX#Tg9s1m zkP})K!AL>|F+z*}AO;p4kRT8bVAH*sQ0z+6bEt-)JmPp0jLmDf9afUX1NZXFb*nuj zvw$F7)26EYbEZ`Sz)m^ z5tNuxfAT<8I#|JX?~m>SG=*z3KmJ%+m`Ory90H(bAF6~7O(M#oe%?Haz-1Hc>*y`= zc7Pjm=00)d*}d|zF&{tT{xQm$%b(ZH5z$JpIUvOJrI-w9&voWi=cL@v$ zTo6s?<7NZD%-2Qhe7mZ*RBfT7OH4xO*`MnEV({mj(Bq)-@F4mfclqBNu@Kz|^qvILrJC*4LU zpYQnn7Yeal1%ef&fBr) zwbNhg_a7VrcCPHTs%+S#Grd7=gW7XcCxh@I6GSKNS*Jv&jB18G`wOz1&ZCATrVc$+ z#}YP@10NxF;#i&kVsCl_gw@buXus9l!o*UTa(zt~n9CH-vZ3x02J``92sLgAEQ??X zmW;-NyGFA_glznZ~Ey3P@&vuq|N6DpNJ+yHBC@33btWj350=Iqaui8$P+AK z>lWZ2P(?*3Z*fAZ&Bc5@4n_P<@{F%rC>)4|>_V6d_+m;2p&6*Y-3zo+& z)MDMJ#+-H?D^?GM)zh0y9q!@w8=7 z$L#!WjD&gyjk+cc zn~8)N;eci8;|1gjgwxo2eAifi+YsOK;w);XCXsCk!aqr1Hgjb!nG0oCat0V8ZK9hL z_5>OIpylo|}NM+6&UKUlpfKhzKp#HT|c3|Ud*I?BucUjZnIrPV0I%1Rj9*CVA zNt|B$`<&;ytC|wv{9n(mWGl=v%&ppTw-#M8YpEPvowZhCDvmMs6)ha_8%JRp=@0=1 z0N}!ZbLCeUl1t&TX+2a3^DD#}A+#+67AhP`5w3ufck^_nw7}eUGbqQF6_)V^4fa$+W&L-Gt9Liq12} z^W$`*7Sq2$rqW}{bBOYGh&D&;)<@&5kDrA7PUy_-*my6XypghK9(3xECo6pZk75-t zyE=Vu%(BmEVmbj|HEpc=?FdArHv;D;4j;S2s=P>guUTcH-EkvGIw0(m_GNair01{H ztv>#Z0=OF?L!Z#0${KpNffy*wD5tMLxGSW-7te(cdFv{OJvK#*MG6yef%ioS<=P9S z+HSw~xot;^RlkJOf$*`Koh81eyFBNnz>Y9AJ{6k)wBT%N;2`O2(P|}J#51WUGK-&P=lk6D_0VJS zL1Es%TZ2v+$27*c#pO&Zh^e6)W9rq7_v}D4^tg;M1TLufT|lU1HpUb~2Tu%M^bE&6 z+3vagKy$kFcq}7yvuaAnSZJD$DxfnflW}ju1iEfwsL~Ta9Qkm*%0$`o@;1Bde4CuG z>AHAzqQ3e^K21?Rm1gw%3rR!jjMCU+!;$zB+76d7sfBEyQy(y7pVHPr>>hcs(GauY z!1`IqfWhTx!0^*13GT%TzKZ~biTzjcur;PPFKR3JUe>S1u^R)hd+9}G@5Vh zI5heQ{goH}RVO`7`mgCIpI5@uu~Y-A6X+@h?Grf-XonNHDy57UrTaY{8)8ZnIWHEr zsT@T*HxnAJSyg9L6(;v3>-bF9M5)F2QNr>5`&bv!7lc#_U3!?R=g38Gyla`uwMoWm z|A16Q!A3)ovP~zO18$8aqk_JPe%03-@?utaS=UgV>^<*(i&=d-Yv1EX|DfTSA3)Vg zQc4ep`e`lv<#*AORex|{r*Xnet(<3k0`7b$i2anhXl}y}2(V46=E7-DJYu6X3HfD* z=DPH^){!pL4_Wz{t7@60te!4Tx^R*AbX$k!!9!Kz=ZHukvaa_Ytli<@Ey1r+`g*tP zRBP~&1zDZ9vGg(4t~4(b{jtDLigBxtz{`TB=xD_Nz@L9I!C_^5Fm6rH+xGlCdZuSHE^pjr3VKY%z!}D zYVEGobn7~@=lnbU0e?kINinJL?(7q6Zru_MUxQV5Z+pd?TEgzz@DJ6_}Oy* z6X67KdI;65!}ui01DM4t$MGU^VBcZe&d{Bcdr!hl$nPc%)mJr8hirskH)bxQB|1EZQl;IG@eFPizH9AzEDEFZWt#CC?8N32~&VhZYb%jDs|Qi^R%-T z^mOa>yn{|aSFu$)_#ymDnmS%=f&Z z(IG4*RE>X3^}@H9CI|khs!~Djg@0 z(n{v{3a6``TOJ>GJ|pH?ogWRF;`~V@9(*daYG84Xc#TzMbR$*MTf&&OyW_Bfw8WiR z@BO;+{;4u<#~a+NT>EZ!-VC{&aB0@LqhqKshVvowIU?$-$-P#`_ymQM&d!>Xt@LoU z2n8`})Agjzj;g2Xqjx^=Ue49!eT$Rbajoj+|{ByQeO;Rz9 zYDt#{G-k2y>4bSZCut5tK z*;+;LeLUiPhJF8Dhr+#ktgQTI@nspXFugiIUUUWhRoB`1KzN$se{gHbUrx6ivmAWM za%TmN5e_ZePDix?KhTgDx-BRgU3hDvs(4%5-^_o6>K;_Tr!Gh;Z)4F;BcxBGT0kBp zBbH-YkaUPj&(``_@=JjzZgn%g!O2QzKHKxW-CC9(oF-{ZxZwvJ6ES=gcY4|a&-y3k z;HW(-3hBc*lVHEtl8&h(51O#DUhfD&6f=w{Yet=JFYUjGEA#PA=8@##PN1_hoU9a{ zu|tvGh|Rw#+zo=Zit0*umk@TPl?WO;CZ)WN3XgJDbi4DRfoKhkCV2G`)a2R!ZzeqmjVXSZ;?(o6aJGEs{vqNQE-1u z3joj6qiu*qhULRFAM)fck(__w2yI6S-Lb>c#fU64R<*h-!@i^S13!iFHvCP3{Qboc zy7>jBAirppZYV>W|1u9sCUEbN+WrQfq#_8Y5ZSZN#co=Q>(Fzui=4+e_^(fbIvC#T z`-jH7my3~mvt{@qG_%AJ^^LY7WA!NE0QZ#zDgIH$vAujT$L}z99pD_dP)j2>WbA+) zDAT<6s&;GULW-Li$s|d0dz`^n1bZ-3SOme{#8Z}uS^@l=B(5)q>xHDF;3ut60H2%4 zvTF7oZxX@!wOv9>PI+e-$Wgy2PHN#UP7B?k>Tctot3;yUkQ&zLSRvHE(Y5 zAtqUJSEBcM1vW84mS>3T>qWTYK+YAyfw>@aA@=>LO$b*wuUn)r{ZpU%eE9OQkE7qdImz?wJSFAo05 zt$Fi*A@ZQ&_y!QMzFf_oKM6Fe{V|x@IQ@Qy3m9#t-?DnhVC8t>8Unhh5JiA>XSxC9 z5`|+T?N6F>=VFkbdFtNWhk*Noj4#8%OvjV za4i=3>;s_gXdBkI|0t7fJY|ypzm!Q=GY@)U5@(B8=z{z~8x0B7CWHopwPAN5a1=PsSVZwp+VVN4*Wy=^P0A8(^=U_O`IjO>R+0 zABw{r*4TeRI(7}>CFfgRYa^G`Ij$pAU1hUK6xV+qHYxb`p|Nq4L1;!(S4eLlYK^2@v^yCkoh7ZaOs_P+KWI)_eW%@2&@467fReuS-B zMV!QxWt=^Bzl-;?JH?~}c_WYB9Wfr8zULcjxRLA%wdr(y9!&x zqN?e9K)UEFhgh}E+oS#BIZ}~tZPXC+|J1E`oC^}SwGQHpK64Np=`j3ljrv_(sYLE( z;Nr0{c-9?=ygFI5kZeT9$*10|ePK8z`yxwW4sK5_0%dAs*v2VS$CvGJDaHZ$q@3PDD{ip6yEU1sn0DXa#di0mibaskd`5P3_(V^) zp;J}cDmD&dQu5i-(L?Gqb6eLPLW#|<*r zI#TLAbv3T-(#-wC<*{KTav@zJ6h_X_QCVy^i1I7#Qx3Xij#g(6%>ijqsTmfvib0!eep zs@UV&5f55k*fTPX%@|B zF|8*ZQ?QZRfTH0E^~=K)#wz(X>sOv>YtXS&E!9Hr&jRQ#TB^fYrKqZMa!0~Bg^|$E zpCh5BuYHS${pc$WaRKrhlRHo2`!f1+j0vS$9~~ghQVXsSrekJvG0I7RJftwJYemf1 z_=--*jBcb*2O^6INVww>ss?G&1d%tpm;K%;iwf~mb(oQ2d z5!ioqXNJ#o>kS-&eQUfW!bmN{%^H~~XM55WlC^F0R;u#t;EhGsc?Ix19>XPPdc{Q6 zhC%~htcr%r;xoNys(7hpQ!`sEEGW&LG-74FR37!Epv-Dxq`OQ{fC< zjDnYV#Wof@mhz<*(3`5gjnah4*VTH4N1DnTcDqlVbP!g4`^PCa%TRwjazJePAgRg% zxZ^MBo+Co;^t+O#9H!av<6iN>!9>lTB=}+Sagiw1wpr2EQbP@X7o*I#p?qo6V^zy# z$fR2=;>*=jm0s;1LiuK9BBno&Q3+_Zwy6}HEE%7EiBxD``(!&6gu-Oa4i<-LQhIQ- z1U=e=m9x|I>aeYbwHGsMBK+J{gV(<9J>-fueO1ydZ(H|ZgV$+aH@w|Df<{7P2E^R? z+#uvuwY#x?vttnkBYrbqMKtE$6|S!Rjyp>k=oYry7<)SG*Xw~V&i0+TF!-yOoIjee zpuMPev{cKY$Oh|#KLOoHi)_uXR!L_DUp7kDbm%aVbZ37k^ycPi{1ONKH80|v>-HO?@w9e#UR>GuToZ`=tiM=Y4@vLln*52(q`OQQs_gqpv>vLGZVDYk4<6bw> zx$vWV1XZ8-jNRpTk&l%mhHJ*_HaCh#=^oBe2vU{x;T&lzMmP2LiHUFr#6&9NOO1?) z;a7%2zdJ59rbKC_37^_V^4>UgO~Bwht&ERcTFysSy2|f!O=72H+OKD8JB=Riln`+p z5DVMy1|c4cRbDLShl?z!CKmHGqI|;*_n0&bM19?6vR53X8tXyx9y#??z`#tG-YB*c z;uUh}6^QAoff3drhg!VnqQ>5~L zZ`@8%40`K#Tk<5C*m11$_QbGK&OugWOqSLSMQzeh5U@nA8=Yg&hik^$dt9{rQ zcS~GPJd-^nphm=Mhr4EMc=M}rs!llaa9T@^V0kt7SKbldC5+qF9gz{Co=hd`elorzrBCv< zh=D4}`>826e0pWK$hutPmFQ_!S)9Rm)38ys@L-V|m!RmAHtoH<=xU04p>8M^8<4it z3?B~d(=Oad+jKya*FT7^)M?uYQx=;1(N{I~%#j2``wCy}3eTU)1{Re=vv!LP!d@X+ zwa>uMn(DVNlUnv?Z1$eXwW;@QAH$7SRwb$2O1l^yERPTRRdDX}ShsM%`~b2EU*eOOH38`@TwXINVvtdGh`QYYIDTfY1!;`Px>{gHA0D8WNs_zwkv3e;c ze|i!dg-@k(nuOB0_!RmJiJ5dIq%EnZ9kHn#j~;8iQ5zzRhu0GM&}yU53evLsv<$gE zCP%BVY8*VGTHxY8BC7L2g+tyh!W4L_#~6v|j&4(y3z1U`a>sMz^!(tO6%j3B8g$=eKmfSP!emL^^aXz%GdDL|Plt9~(KFZEdqC6S=KJtafW3rjAsKJe znfT6(P$B1qLemmB#cJ4Z?ydW%pFUy~KGGbptRZO~kP$!5CzpIV?IKMIWJ~u>?I9uN z7%_gdMR^%cd5{{~qa0$?p0fXE^xa5^I@_z{A(<2RIra`J7plxjbs5{++^#};mZ7*# zDFm$_+9>qU?C==X-HMPJ9UHEscpUb`OAm((oq2L5JTMY7)zj$Yk{&XJB`ZuAN-p5);Wc zSVV|l`^0Rwa2CEf(h;V=4MMo*Dn*&~IK`IC_xUfwsr5F;%JkaawYWZ3vw5R&fEH9N zcy{?qG*?%bQSwx`-P}2s*>&W3B!-$580tf_@*#61?`lnFAYbPce3a??)eKLfdcD_1 znq*bO;&Hc64J_%?{99hVJ%w~pd-m;;%z%7Ycy7((enk77+E;61Z>?%PyAmIZ`&WPN zb-!Zw8!P<<4@2_x6pcMsqTG-5D3o+yRyr=)E)|+kQU} zU1mp3UmJR>SHpdGY4QVQchBY=uL=Ev1Ue3O}CjxWAZSbPU<-wTY zBeEZ*3Af(K(y!EbUEQ;0+nV`e^_t5$xM$|UFzvbKA2z#iM_;f1LR%ze$%6da6L;Efyt6Os zbNp{>?Vn-%VKJ?%SPPR&d#{)kXp#7Szt}T3EymFJTNzo=DtUbvklb>~qZAXZ9KM3<|G_|KVY}=nQ`L zcVQMLFT^L;!lxia2J;H)BVf+9!UMMccf@&2pR+9W+ipEVcbKCPORa9w=S@&7ttCo&)Wi=NRu*CZ5J{-r9~7b)8ze z_b~R^#oL?qe#t)&=8Li2cMh57^^x(J0bHR3!z8(s;G*VRY1hniu;Qv^R*BMEHI>q+ zwINn|UN)Ad6&i{0nI5b5GaB{oX4{*2YfGv)9x)d%yRF4+&vVhH?XAf057!mW$$GN= z#zT{GqjdG&ayQURa^6Pfg*I0Ej!I1$RR=g8GfxjT7zYIm{(0)BKlu@`QLv=L?$x_Z zv2Wso?Bln0>v)fRO*7e?lE}?HSp^(tuGo(Zf~-=`t>g=h>4!CPj58B?#J4p@IFzUo zi_`0OYKy(mnJ9Z(Bl(W!Se1CHvd3G%iaV*(ZR2?yT@d(9jw*OOyLK(CD?@8E{je4w z-4{tZe%LSe@O5*>TA%SJlWhIg%p@~j;&@`C*AEWge%ScWo@!sxa@~Bt#LN^*Vg|{L zdVSvN)C{#vVy3w5)Wcu7wVk^Sf!TrUQ&o70WzX~6$W=B9inyRPex4QnXw87bVmNoi zOD~(9848cejm)mRTnd>%y`oI`Yv)|yDXsLU_RM1O zqu2WLOml9gC_0aEt5t(&9}gVAaG5sKE+Hgmc0`n&Z9l){QcdL6IqZlC?tB#%QjmfId!zopl|^X*?I!#q$qx3k#)FlkX$B#M(dPM-5AOgg_xU}AJr4+tLXK_r>fx7 ziNd?DOlE?D4d!Veko11MaYO<`Lo#Tj7>K+yZ;1Or)RC{7Joz%11N*h++IK1~SVTOC zcI|tfsemuTH!R;;NW7?CP~2EkK%BX0APYVJZ@6qB`~mfXCJADLeEcF8>LXM3g_#6| zXQ!%o#dhfguDo|AV8>OZ85f$`U!O4)jMES5=-m;!1Q@+!wgFmsRHo6u=9>9AsYBi? zr`{|2es2A4)*EE2tQ7pEr|RO}vLyqWL#s>l(kW5 zoD4M&5f8ET{N?7NPY-j9XF}KTRrAS_n6uzfwu)CZpU(Yg{AD%btsnnW&gJ{-D}^@I zr+^(_@~3})BQab1lAHy$VSMyaM${Q_f8pPE93Pqk%#y?Z^AS9OawbnrCLw;27E!mx z(dnvPo6hIH*$3ASs9+myEqBPOVQ{k53%R-TCqN8kZ6k!JW?7d%AsJB*QVcupCI%M6-5A)(zjyRkTI z%GtzmQx*n484BU{e&J=BtY%v?QJ`XhdaS0qUmrAnU$>xANZ;Qyxl_<5_7Oo;{csKg zLiIi@ZO@U}oHyh?NNj2DR*sI|^^r1@K1+>D&Nq_+YaDcc)wRY*+#2^BlzS5Vbtezk znjb|r9r?U7((;;l*asB?tdBL>G8`@e2fE?i(nq&XsCcINU~ypLUD18$LY{4H#fhax zh?92nAyc25y-}kaOAe?(GdDZvNIZohHg{Vsc(j`{MI}77*dv6G4^|)|3TlE4uW!s~`PKRN#0Xh^>pwt{*X`Ns;Bu>%9rV-M4gZk2A3o+^+BD`ko3m zYc?xv?z%mF!WhZ}X;=T{&u8Jc))xjh%ty1djpFh#N8rs&i%fLeWahiF_GB! zRP4iacqAs^>emQN8EMb<=cZh+y>Ak+C8`tb#_Ti@d-b&5`Io2kBER>3GQ7=hqvFjT zXPi#V-3nS3`q&y?^wE-uCY4e$l3wu9RH^8hFHpOS-~jiO`VngxOzd{PK+XT$P80Z`*EZ z`Ms4{es{Pn6r%qf$(g<{U;e0j4>`a-Y!xmr19vv*DK|Cs zD!A7vOTfg1z_7uOYc)~$y^7z0t?wiQ?NZ)|EBBe;mYFLPt!%j^J zMFN89f!q6X^XA=f42yq8g$U9iyVp>@-NGL3Z;_gD z900caSx?wo0<^hofu73CVX&1eVXHAShm}v

    0deeM2^`eK$4M|K7w6H118p9C3!; zA{(YJPmHf4^X?XMwSX}nn$olo=)pJdVYKO-N+ZwrY$z{Ih=L=k4~yHwd^Y178xQ^O zpch!6kNO1}h~zG#Jp=o`O@%9^n~1`GDcpw`Xe`7Q-{Xh6?_wOhnx3tW#Ha>Z8bd6R6eSl8MD7LRUKi`o-3PnkUtdm> zCh$~Vgb4L~U|}W*V`J4WCjM4ugQh#VaRWNeiqyfhnn7Qa-KIcrApCgc(wW#@)~~88 zkoY*REmqWRzS5(XK{Ylw*_*ulj{hR7VKZZ11*G-Q5A@8OJ60>P!M*u@d4OLZa?g?x z-I1RgdHvfjY*$_5oTC2T$+VIzne00$v-u$IDL(vusIRckq-)=oa7;W}9*`H)nLr~9 z#86sq1a?|_6)Ob~AS?UKzdL4LY8I@E{lch-#E9|uM`(Zm@w7yjkNRruKR;tDn$DUo z_C~CN>c6BDm$uMIR+DP@Z;`3JrE;)Im=N*W=YxT?cFFFG^=Pe)eU=QP zj?aoO{>|cL>xPYIA8RGc&dh)$*^&OX3M597F#V;FU3keWbvL|X7S1%sXtqYt?dLRF zM-Ht?3wI6h`EE^@{eH<=S;Fg(X?yA5r4ImcOWpk17%&4C7q}>q)UewpocE;OGb35Y z5v=xnxfPS4;$_EMv=N@E@T4UA@geF0)+%M)i^2^Ks|$?V(e@tOvvI zYUok0Z~@x4RRLNK5)MnsRd-8t)2R=s<+$vePdF#{;-dfj9<`v}bsbZ#Vq}8ri*pv( z()xq9v{-ngzAA2)#7tL8?P{Dlk3C?r=)ycwot5rO=}& z+H$gjHQ;hhxVM|>HGs*PC1)LS$w`-)2asF2uclAouQw6Uv5&(nc`W_gn&W2+r9Rs} zw|C@5PD5MDyq4Y*-@oOuo=n|?-kkqO-cUFP!6tXNMdyyd5W9n9TXa5=9FubIyP3%Z zZKmd!nXT_>Jd3%TbYgrrEXRGU>Sy%Ftfu|k)g*zD2h+FqyN|kyI6OP4Op`id|A)gi z{i<*6sV$$){dG5gyzR60Vfi16aF{qa>AxmjObtPWt<(sjmdIpGrA~;o+O( zF6uw5>h#PhHZ{ zRmT~HlcUu);?C-e{GCofZrrDK-+FiKkmi-w^;{0Kd(U}_Nj~*|F!mNuQMK>iuZjv5 zh=@veDj_+v(vs30(%n6T2ntAdgEWkE=K#_z9YaWo#E>#Hy!Y@tzu)^m?>Xz7v(~fL z;~Hm;Gkf2A-`Dm1ey;CEOOXnn9>Mvpz<4gZLa($_t$G41AE8!-|iyS0?7NUY7%Rg`6C zA4_F!e13qtKP-Ve$gS?bH3xOQZ1H8zh;f!XxzZI?A$qLRe98Nxpu%vq+i2 zLW2kl(fW(?N6l_+mR-ObFo$s1Zza%k}>?#14g>IRFSj^#Y)Ppim1?p9$dZ<>fndW-0 z7F-oOO-BsKoCTMPb>VIhD5X6&Kga-gQS_CjPpPUfPV#tb{jo5Kh$ynYbsXjH@;)=H zQ|vv>zj}rKTV*lwj`iSD^T~s1)})lMw6xu@AM2gU3vmV=UlTdRk9KuT-=-Kp#TNdC z|F?m^}^dCz)d=oOZj74R4C zkhE#2f!??MQ3jBZ_NDy~NS;%ej~ovQIkeNnJbU^3^s2@8I2eXi%T?s#6Q93fnaxP0 z?VN34cNcsG8rFYXUzK|-;PgEy6sbY56f5XtyLaB3;}&ziCGq@|tg=efm)by>zhp-1 z1b59hu@qiOg4;p06HRtcRPdjJ6XV~okr-`)sD&1pGQwyhSn*UYNdA%gI7`W4Y^%(p zgfr%~UcPP5C0>(!;dR+EOKC}?a!Htpz!i34-1oPBA*qJ6RTP_gE#d=LceOkp7p@#kzbXl~STNPyJ?V<=`xazwPzm6myf` zWj+D=!H#Cb(-%2X66Cx)yMmcY_bFfPF!7pVWHPd2?aeRF{ki8yY-T7Rk)o+k<+MH` zo?;Bs->FVApGC^toClkGT*Bv%3`hT7tBjV(muCyOiun;cp5pO}@(-T7T=+;RU#u`I zI1i@O2E&4%i9v?m`^PZgRlV}Xt+0w_jI|^**g4MzGCbO5y!=8ZbMGnWGD+J(Qx2p? z6<;7!Ts{PvB^_F`1ih^+IR!P$1g0c%YBglEp=d$;?kp^>&@5t!TVl&3tgoOq+;8*-pFOL8>AI?_a`J?b& ze{}?}vjzLDKxdi6-_P@s*E|+vu7V*ACIf`7y(E&RO)9xvm@i>$!T}erS4;7=nF4*V zd<&^IEKl_&$V|!lx1*m@=jQju^v&B{#?*|GO%`c!&uq2|y#3g*Sb{ciGO#i?K!Kvh z&Q#BhR!>H!4&Wc|QN;Bc%-M;jQktMylp5_LdYz{lKOEzqfkuXE%MZajusH zzOfyU-xwrz{?Gm>YyP8ur%o6<@+^9d;rn22=+3KhW@}$LFlV$$5cpvF&zd}NEXYst zCx=a>rb&2Q#Z9jX?7RM1C8pDf?Hn<4@y0t+>0HP-s_s{G7JYY8{DCCCWhG07T)0M7 zgW4LjnO#c-EbM_U*^ zc!w0_wGmjW)!3Yj*b0Tw%y9mmWqdfqvfhVMP~g1r3nJbqllUahLisVfkmJk^;&Dh6 zL1M4+A(Lh$>!y##{g0~3RnmNJPg3*0CVc&N2859lVwygzw0K>Qchy$YOLBD>wkL9mOmLEymM09x_V4_u#CsyhTv7Ay(<{)SzQO8%~ z8UF_+i+SUFpg#Bb{ysyPcJS4$v|9{)o_nOmUiLd-(lQH!97kL1K#k+%5FI~Q174yl z<+TudhH5x~9)68D+YXKxOx3v}6Ck7@Yb2xaaob&pf{PB&8~cEPi2T#|X3=i$1<~#t zY1fg2C2q0%@!uLO)yDQ^lSCm4X8mcnoC50qH+LWqwh*^XlY@yLWwCr;a{Toj3{nfZ zeim~})tLFL9n!atZP5%LIm}z7F-!S{>U&tIY-9bAb=AziPBcu>P}n=&_?!gBH>6TF zFGdZ`;oEHLAi`H!7R%r@%y|>PG}t6r2u;%yWSUU_dPpDZ)T?x8p|VCxO0~ zU2~xUPYygvOBV4*;9{rgE3B}&R~ZhZx~A_WCg~yiG+uj49ox#!UCy$T?9t80W0iYz z4Grx-|E^OREx@trJUE4!^}_5n8Z9{(GB-0sipwX9>%=1zFPHxk$nx~EXe=d(Cpg(E z;4l7-RiiX&yQ`*gTV;en;v|YjCrf?$sN5KLe@qE?|MCoUkw@|Qb6L0Sprrc9AX=AU8Zf?^ZM6HB&kKVsehE9}Wvu%P?_Jj1vMY+xz0?eo;& zi3;W;LZcM3La%zy6Nl;24}SKdLI?fq;y>Fmtb)SxXJ9HX4}>g#zBZ;b$EwSdOJ!S! zBmtwqawM@Duw0>RmJ_es&yY2%LRTl@g?c+(G)&u(`gkfgBaH_dGV z)MQ?R@(%@I^v3A>F{~uM^>zKi?L@ywA_47f@GEc|&5@?VXH;fIuzK-=YyEgJK>U0b zkcRInQXuw}`y|F=Q~6ADkBpiMh`4QD+|>W`Gs9thgodi{J+=a7z;iTR zniQ#0_yB@NOCJp~dYq>Nf04_2#;DgC?ON0ZI&J~`rzhdHq0Eral1!O6DTn1! z!;jkpf}XWyn8Se%0oWvLgYrvLj(baDhC`rbq$EjK0B7vJHlO&H8Xz?UYOB3QYNDPxqR`-k@|Kxl`*ke$VY&jOVfKP z=Qxn=wseo(0A`J-c3Ay-Wx@qYJ@i`T3fuWKkj&knl<5GV&x5(Qm?Yx4`=RaJUFF7o znf(zhd;!>eo_TIRxUohH54?;&E5ZXL4@k{>C6(b#$Fv9jAh55+@ zM2;=R9FqMfva5?OjB`JXn8!fyNl=MKMJJ@K6caPOM!>UF5gTgv7q%1H#lP)+n z)rLt$78`>APKEz zpe-I5xZJ*9jY;>~UFO1%q`fToDkxf>r?-LxQ9-tUjN5e?9Dz@iP zZi1VY0`N3v1gd=%yuM>PS1ph>RPTy@+Zun?V|j(~Y7Dqls65|#pX!jyNQF*0k{TIx zR25vTvu{IZj=5EJU={BSEw~I1N%KT zG@q!w;3a@s3GCAW514~&l|zMxwwK+D4BhNGY`xcUFvvsUqOiu$b+WJhIPVcfxapVb zuhvs>UUu+GoR}=HrI{yd8g(5MPsO^K-xKZ0o6 zW9==+H$(2fTf=0R6C-|ju1*_v!w)Z#4MGEiCXBO|jCLIR-bJe=pCt$cz~kQEDg>~l z_U&N5R>;Fd#t@|5japsCR;c$}JkxDufl!19|9jMp!Oqb$|}*xFqB>_{7yHOi}4qFZOwI|p|bv8f%xC(3I0hbt|XM{}iB zlV2=Dn6w?W;_IHT*q4<0LXJYN%5b#=cJL97p$uK#b*Oe8`5kD7-p;|wyvM>z0M&Pn zg19te6p_;0UL{?1y&*Dar=o+);VgHy-hFN?$Wl$md98mv+F`Nr`?fYje$JkUw?wze zs(0~EXPqo@0C$RjV{d_JaatW@zsGTW^*gQiGOzPAj}VTx@Nl z6rTmA04_c?FgI+KIrDfx?ZzZ#miMeWRyB^`dJ2Jg`AI)r7?_=3Mj)xrq$K&?&H2_= zO5a|}zGb|fQ)@Wn3}8?_xYO`ThYijrUmWP{r%@7U${Obg z=_F^9q5NNDVw2u1(e~@3uTb$>Ixx)Q_j!}srf0s*)MzL2pnPZQ&a~HY;TDhQVEYj< zkLe{H{OFJSwAY`)lZKuA@3Xpq-g4|kSWB`aAjX+ihDXTx-z^H&_B#k3E->A=m2AzA zEbDsrP2em%)in&-8FV&Pqa^6Cps;06Mt2jn#kUkgk>J&ZJf)r@rMud-pLC4UM|=4n z0THy~k2b&Pd&tl>`G9(^FM4$2|Jbu)cMT?emG(Ec@ygwY-c46-)}Gh*;jit`5-lKm z5nJap2@dT8Ty%}q_Px{AE*`eaF^)^kynCZ-29MwE;HO@4Ym^%{f4>w}bCn@EDCFBd zb;;AMuPIyI;>u0I3oo}-n>({+@yS|nm2u{oM?pU9PzcF& z`bnsHFOR(zb1zphf3GbXXiBcWrtUeknzw7VzN<#S6@}D;(hoU$E}L-NGWW#7 zRd8OfZ;GOHg`Q+MN%SZ&?)}Jf=aUY5l1gn$NfLh`tyde}nBGva!nocbLw>94qF$fw zr1M^obLast1+)wmRrQ1XYRnzzT7XNurqJkn2(Ebog>hGJdd&Zdch{YhCAW>#myrt^ z*D@}}d>K9lb>7{KQpXe)hpH%VyouVPT~M{t@(t?LG8Q~IifN>ok!lq!gcj^ONBq5m zNWP{j`PF#7R>o6`FWORrq8x5sjtc$esDno zX~sor_MGwX^$wQ}tn?}`04d9cugiKNzskvaW~SXMcF(nTBLy*!D@$$KadYq}8Z;Vq z|341E_eoo+;`I`dVCY$nk0Ecda>HZ9=(b8 zWLL#0Cz@!T5xu2cMX9wrfb6U>;fO|xBumY2w9WoI%hy2|5}$s?8H-@|TA=a?Knx17 zs8=@?l8udAf78cnv$;;@G@P&+_QP2<0-u0Qxmy05f)(aDF>Dewg>jNaM{^iBZAx)A76wCbmwy3#%M+rb^S;{d)x%5t^sn0P&xwc!%H z_n1%He$aX-FgNOZ6gUpM|KG;}<0WjHY5d>g2c|-r4ZQowSM_Y6*BTXg2m>U zyo*HR<}dxUYT}aFJ>&OW-%L%hsf&$g-&SxIOV4$%uHS*K?0o#C{!IpY;{HbZTHNvt zyNAbD6=g9#LzC3v)N^l*mNQlxK(^GynHxC;0|QlmVHggy|aR-zkwur`GL zdYcnW=qa~*Qd{j}%3_soB_aIfx*X0Wi7AOc)vF=Swrk`Yy8WO2gKLVL?fY}g9Dp&X z2Bjz*AgPaKC2VRqDh-(}yI=!A_`kp=7`L5Gn#9FF1G_fee%xAnn@Ak%A!Cq*#F1#d1db&_?H~Gj|C%l(-X!6nu z^h|(!RiSwS8f&!W6|)k8K5wx64L+e5@b$hvd4X>Kac?N#uR(!&gdXkNei{h|4W&UB zpW}0NsaUA~db-~EJD}dL^-`_aX^k2OO~O{>n?Bn2K~|*c*bDDSEhKUk8&nTv=P9jqA62mcW$_;M&CCrD zfW;huLl%dDiJfreikzX*sn<+M&HPVSkq&EnT!l=f^|=LkB7GOVyU)!Coasp*ZhvV zFL#jzKb;8&5anHFpbOllQdNE=5k@SOcU_#ZVS)zF7TTLI?YX(Y1owAxhHC=*~IA);$%h- z_=k`w0^g%xL>XX?0UX@io%Tjy~&1d7=( z?)S3Cd66(V5?)AS@zx7ek!3opywZz+y*z@q6Kg}OLMwUwb@Ropiqdlp zpP!SLKV+42ymFf^2i&;;^4J4d%E(7VTnPksUqnhkn_YKwm6};}8_lZEKzNYRKj`Bp z;29@#k>tuIyQl?x(x>B6i1l!4DKM?H4)hPyPWIqC>^EOFJ9vx}9-13!X*kn5b_MOJ zv>%h#J5y5N^re2IESt<#RFE#jv%p)~S~(78NTWRtHxkzl}(^}uRo zbNch1fZZfyoxHOq+!$EIzug1n=@M&0lyiz0<4=?{Cdvv!3gYzyeGYgCG+&0VNFSuh z*s6|Uodteps|l@qm846Ee+JqAEyshEcKd&hQx6Y?x z?>fQXMrHUJ(H~?zEz)Bw6ox0^uZ)MJQcIsC2_A?D*bVVq9XV8?K$*W7O_tPiR;PK= z|KRX_tqp;@p!I`w$O@gS^f&siLg3yI(M}0toB12cJs$yJRYE%cFcc7Dqy~~Wjkl)C zI!`V^f?99shqi1N#eu#;#F;DdSub-#ZfC^-k-Vkl(QlXk6Bf zwNRDxWbaJb!Ab_h{p>Scj^lLUNaJpe;$Z{JYA?qmZ$eFKJ$H@Dy`GZ}goBM@v{^^7 z9k@@N3c2hP2!ukU-n_CN&irh?oM`;<95A{s77H3)!P#!yxcd7}^sO>5i%lB9jA}2C zCkMJMXS`qL-!hKYqDurebaI7e&(9=xnPe9yColJt0-uE8{73s2|GxPZ6{%UUQTA1uouBdXp3K|Azwazw~}$Z&=I@W58M+9>jJ~NFpZ#y_SqhAg`>3QX}AFHwd_#n;K}|KkXm`H)Ba~ z&q>^1_B#C=nI%I|6@U>HnV%yS%l7FB<&#W?tOGWaelzjc#sf(z<)%aEx4lN(JZqu2 znsZ$c3;U?}DHuRt1g){C3$_0Iw-crP$9tP50xIJ7!))Kb5p-K6(3mfxY3?(HgomjA8##%mbDdVzqTOPX82+V1|@`ryxT$CXpe$?0avKp{pyYYV;V z8)cnJv&e!;fbe9RFED}0b0@)AxC8d4ahCwVR;!G@@MT)$Z0SLWs@eBG={2)(uT{ko z$~76?4fGPR>{8$P-y3_~;#kK^HQVl3%MXMpj?M0y)&ImMfU^unTj;>X69w4h2~tC4 zQl+_69>-2BiIAl~K>mBmpxBNj88Xo}JXmjxB?(!?)zpI`IhPx6PyEXAoGv%c3UL5u z>NpUVDC9j?c+ja_XnuJVm14hCSa&*(VChY=!IyRamww`P-VhDTCWY92tC%)5i6I?y z%e`sR)CBiBOY&SQeA$CVo3hLQ;5l<``)%Sweip0m?SsIgk`Nz{{!0dD3=kjV+*7`S z4xfv89{+6~>ggR^4c%P@K)pX2B_8yMs2q3l&uDv?ik0B}@zfe&)2_J*Kw&bkAuyIh8I(>{md_a&DT}-o!z9d9cz@ z%03)r2zz`@c_y&@Mp{&Dbv_8xzqD?aJ9eT@Z0Ei zD)EA3Sh)2cN4@`4s$iZE1RY%ehn|;uL|qmLD@^^mOrmfIpn8+1c%zG)R#QJAkdZr}F^J2SC zTv<&2V!!CQQS!l2n_7D0DoI~b0efOvgn}*{C5FYNPz83ekqT3JJdmnempK0vcVjhP zFHYsVD#7a@h9Y0ct?#L%3Bxi@!un%eOHZkk3pOFEiJB@wY2Y^G2V3J(x*#Q08KPl+ zSYo+^rPutom@&Qiq_N*tXQVKv-d5e3{i3AA3m3g(BTfD?P$}Tb5)^e+ZaYyP0~9+X zUn*(fYAKkdpI%v9Kb%ZxK4e40%HXRq@6VsV(|Mg4d!H8c_=1uIWqoOCX-sLV);>#v zrAn{a8(l&l6tS88BxlFKe7u(YC5#nR1>DwLtMSY_7UJgYem8e8CO#i&v-1^$THaXG ze5ST&5bC6nS1#Vzyx8a%gyS?ad6;jM`^`p+GOLm??(N<2|9?86>kADXRPyyn z$iKHnf}U27fyHnsIqdr)Uam|c4}%>pr-e_Jc#Ie3K#Scdj+`(OXo!H>uQ22a=4%>&lr9 zQ$M=ph{|h=j(;E3>fR{EH?oMPbg=NGT30&Z#$o5M zNw1Y_$TWo%Y=4f<@1^HMaA6qWE{z1@QiXgIMab&pG35>9F|#v?s7k(g|S z+!;fHkuEU}Zoa7?S!K?{Lg_n+v$ds4&4h_uA)HEyy7fNe+Mw>z;Am<{Gmf%ZEl-T} z8y>%I25+G$FD`}gyS(A72C*J)UMA9Yzp`=ZQV*5Zr`juj-=6P+qZUCgdA-;D_i6%m zj3@}Uc@m7CE(Y+;y~1U2edR}4^^z6#{+|&QT11$okKzrc)IjYjHS!pM{QCZPH`4I( z;0iTXDfL}3&Fwp50t>>=DP~1gI>f+nr6^D1IqZ}Busz*5CoO=k3jGuFH+FzG`U`Xu zK^T2sSgEkG`U4&kP@?IWb(vLJ!VloyNsDX`!pqIVF?Yf$XJ=qNtYK;}AEk}%JY904 zrwr9_27`eXx9mr{D=Z@_Lg&)FZl3*qk!>#%_GrqW&wxswy*j$HuJj8}#re;B;T*h6 z$oVEj3@AS>S5PwVu$?S2fq&%WUv8OLqFu*U{;ERq2K$5Ilh{g7iY98%|2YJ$`ei245aWlQr4J$Vfp25d_psl zK=I`*$*YRD7nF~*thL^fR9B|gdW*4_XkyM5{Jp+cg38q?+j4{ zR;xsQZ^^SGO%a3}4RdMLx4@VXRh8u=mpO6tbayGfmSkHsx-Kj4_wYkzv+w#l#F!@H zjzCONm|N)_x4P~W6HJD26w^#Tg_b3ASgm?~Jg7i~6R>w?+mqCiW`B4&8MHax;A$KF zrKno=*Qusf-S$+k*Dvn~ap1i$9))t;I>O5jo>1M$*J1vA`iAh(&*Rn@QG>wYJ$1z} z(@z{5Bp5{b0b>#)rQ_HZ-;`>rVzGa-6xQ_Nzfnp^K{68E4#{K zytnZxAvDBSE?Xu4J;gqWXr6n6z>b1`wV+8pb*S-B!oWkbBWsN_I7m>@$u4rL&#Mq5 zIU!INBdlh7e8){~8cC3Ur*X2d2m~2T^};3FRPR=_{P_HUs>bxq4c6m3MDLbasxeNg z3w8Db98G+(?V0j1zCP(i-$+5b!youPe`5#;W48d0jYjDn$pa1IlszBZ?S&ERAH*hR#NA z>@Y5AYvQXoM9|(M>I)tYCUKZD@u%!X^$Kj8%YC8s6{WC!D+$80vM^@FrcP~|znmoHx2{5xP5N%WooSk~_^i2&;T%x;$EphX&^hu9tK zpWQpZIg~XwKhx{jcOXr;#85qQ7dm6X_xiXz7Bw1QgjI)rJ8pGRQDEzYs_5I1%!+ZB zsoiRPg=bp0t?v#hV~;+~&BG^4W94pT@5S%f2M(yB!i$K9Mpe&>7gImf)a>BwgO$A8 zry6#Xc+|$$x`RtztBj<(#u&+$;8<;)R;8jK3}k$C$kI*+=AJpVVPoi9M6`M z=^Q{XFJfm}6tfAeZnfujDj>fXAM?FGN*c3%*)~({vIOrt=9^8MmT`Fqb{dAa$~vPZ z1>LU}j=C4#ra*N|t)SDa`8h#njLSPMb+X72DGOy=(~UZGXaZK9S}s@YERb!D<~IbQ zYRnw-wmBnLo04fYCDpza$^7}Lt$MhZie^o#3K5F)y)(MMU93%bS&tdlc1kCBY^oTG zR=PHdWUhto*g^2|Q=)9~>)+8j)qg9Tx9FM&sH`0rqT@0-r`=OrvPQZ!xomlszuJ~a zWpg~IX-B+5;z#)@zy-PmalC|OzZ{Xxbuy!L#4FpzXF+IgjGHk8r(8P{2Mye@6a&fK zuCdxmvD8@q5_#DYS+wOXbq8sm_W5VY=F9LDFD*-OT8UDviO6qDg@3b!Y`J( zX~GN9FvY`)vg+|mlQ?x*$vcoAHJ7Z zO3do=g)MH*;08uNdk%%%?V_|nw#FS#wUrr^2k?UrisMIh2nJzR2oNI>q*fs7jkLI^ zD!_8v90$>Egmc6}fkFrKY=9aj8-zP_UM{2@-P%h}MV8e8{S2KKsFVi=!I{9sIG6J| z-5Meg%;#-If-)%a7)U@N7ifhgl1d`UI#(u<5)^>!Yvnd`yh=7ehyG3Y7_eZ~JPWOU zn0;XWvHyU}bz3{Si^q1Ze!P9MP{T0o?N%Zfj+s6JCSvpB7lEP*bZOWgH~fMyV#N=Bg%LdhfuKwQ@kbj(gA3PcO`lraP_O^Ff{mEi+&Cgs6e zI~*d>YyWUC6*74XdKa76tQZ&7pTya>V*jWv4kX)Ih6rdoq6P&1AZkj=0-z#1TVQeN zfef{>JRms#o-coNLwR%52EZ=*LBS=6a1%0-*BJ)RcKAVr14Ot;076=3 z3Sh=aR6huJSXoTyu$P_XHJOfg<_{xaryU_pNsDR2l_Iwx$v|U?rQo9RQVcuAV*&_6 zLEqSGz{^A)R8D?@z9lul;Cg&@IH@N)M=Lj1w$@Yi0lLlOj)grYyEJCJ@#-)N`gylS zZyV#*ZRcQ6w(~8$o^IPK5;*S=T)M8Rx=>pI{HO-YNoKq?H{$qZ=!~D&R1k+(kV#pqybGT<^JyY2elf^uWdYm_}7;jEgC=mRNiOro~Yl3uxWQZ5F>#2`p zO^GB%6@pn!^amUU)Q1REQdB;eVt4#4V~FFNSRLUoC4vx{oe@F2g9Zu!xs1a-#q{YC z18l^m%jkQP%YuE>RHc*tIG{ zqIoLR%&?78Saqi^-;Ag{G!S7KAW(?_*QZOAOOmT^6=q~mb;J?Ysu|Ti@FIWjF18U7 zI=8TIMnp@7?_G-PT`uzK$Nykd(kWaE2-2_*$i=CcgGx*y!}0*fY~r>4I=nSaH$)pJ zRk&q$74=}9U1;6J^|k*?eHIU776K|Ig}0+=CC!0p`U`YGU~OItnvVyL$bIK8&<+8r z?PzfG{~BclwwXhp1?Q+aO!>MirI?kjVhp;~4yU4=_o>5*c?X51AeL4>{&~v54v)p> zv#<10Zoc`N6&DYWe54%iC>&Nj)XARxSk9&|e|P;*E3OpF%hf1~s}CMOIpQ48*e2cc zac@4S);Nkx!Vb>U+2SA$krWXD<~8!lDR)wC;tK{@{YR_p}Xv zrAmMdYJjLa!PUpQQ(;K^_NV|yvc)eDIju11Sp;cy(02hyNKf#0f%at!oUXuNfta8H z(l9Z8L68MnJ0+5oCt!EjD+XY;%Af_fyUt*`^HPm%!c~x7vqukzScJTMo_b@lMAs!2 zBk$E7g1aQqXL@qH@44*xe#e2s;yD3cv-`zYXfncKGk0?aidc5{f^ZLQ;xX}Flsv%( zDVZuUMq;4mfSeIIr*Hc61Q8}3X^@9PskiNuQ7Lpxr@Y`9$OqOlmTe{}m)D4^4Kl$rMsp zRw#`9=T`e?eRju5g|IKaLrW@rK*KBU-uO>s{MLy5fGenqq;6c>S8tV7egq(D#59le zsK;mI*qivj^Z8g-Cfh8XulnrNy4Foq7c3ek_ZH>$MpZ#){{=!JecF0a!&gWi3D>Gn$$kMQ6A8?i8vDy zd*@9KBE+2^B-V{BbH90Ef4Kxgz+y`~zryf-Uu?)JL{VKsRsT>#$ABFAr-TLhx)Dw} zn3BtHU*@#cJ{s}fI{g?hi02cTQq_6v3S&gqv8IE1abq2)Qo)NtbeeOUOJhx2{UvB~`vM zqHc*bv2_1i;S_ZE`e04=ns+S>&54=V@-p`wY42(bE-ad^)p9m4#qc)8Q=^Bbatrw= zc^GjX5h%o#T8!Rw1t1Cw5DY*9nJr^=iq*VfV(%lTJo+Bof1s*s_~HD2v@ct+0lj0+ z62{Zp2jxy{bsqy%&RE_Z+C0dq5hA&Prl}r-;r$nsj$G*+gBE(u9Sa|-l#Y69QfUV6 zmc*!>F`v9z-uZ~<-M^0!mDh>&pQBTD*Uw1oCAfS*fbAw||%U~GAF4%X%w>zL%)aO|+Zm58c4zxap`BBip*mNp0{|R0L z$0FfgH@q(_4YDJr5=Hd?xK31+Q)^G(XYvT!d#fVZ^!^!{W9PbLeo#Y`h6lIWOdN6h z}#MO2Ub`3^rzA4;2R)pHe! zUcb+U!EolDh-;nuD2`=2qZDNpdT~p+8RlKlJWOkxNQF^zU!UuLc%MB$72bXFFL1;> z?fy1ytNs^z_(zfBJb`%GKv{C7e`Yd+T~oppWC9hBs=HpHiIOy+_dAbQNDCfd1y?t0 zOqVyQrJNEvUaP#+<@qzfcqDjDxpBY)%Ifv8g6J4#9S}Ol>AmUHAqJcc%W2b)hv^OT zd8Ja`C40fy;s?beAG(#5aGLtu{_`tnWV9N`Q(^Qy;02Hs^n(bXhgacQvjvM?4yoxX{`2;jH$Z15zxOfU1K>~uU_b~=y8K9`yv`#R z@_{?;%+H-Sd1*mgFsXppjjPP}D47CzSB~w6pX+5N($sh=#aXfL19bT~^{{XFg;+U= zn)7iC;{;(rAQ3MA3ZNlz!1eD1S>$sCQcmD#w9&w75HV}?g37Xu1O06~SWXf=6M{RN!#7*sFo^b_#JRm5+h>^C%)nf+4FoLvbv z#`5nZ%LsfQ`YX8}FByMU4JzRms&!ms^!siZW6u;c66E zrUnef;2v=Ia%;fjQGT>tQ>{U0rCyjm7`B=$?~wQ9`S}+fD9XwR`t84Wi^6=61IPS( z2j%5@uCUk+47k*xm7)HfDyZ+y5(qr1yOO@n06f$c8@EYzIF_L!pE?A2d`=qivWz(A ziZeysTiIfIf(5`2K<`WB=zZz-8|55RZ9pm*0kmQJ=_DqYllr~_^5pk{1D;s>#1oB; z+5LI#0(T-3n8Yd!8T(NXjNw*xAY|;pdx96Y6QjsUDBbG7DNC?(G4=R<=EhW+A&q<* zKftK0KsF)BF=xV~m5BiPNC{zgr2e+cDhtqIn{rr_WIj|bo<#GXOBn^JC ztm%!J-{2QZfn=g=Fmod=3BZRSbbfxWd^$6TLur} zW;XAb9imY#b}FwytFlf$QdtpGJg%HuJZ*a0OI)&q-%X@^^(6(9N9fnG(Cc)<$Gw@1 zi;^#jB_!zTlnWGlb{E^`80>-D#p$rW@(vV;ijT1rVr@a!*w+Wz@L7g?rl7ejH|)Yb z@<9g^(#(RCXiWFBq-O9y2)i}^=O9ZZ3Ou#~Qlm9lqQ`w%2QJdS2OxM(PWIn*AgXeE z9QD0iGXBstey8q`FM7eOvnGo2%yGXN`mS z5M-xrG9?NE!n-z#ADGOv%8BIlWvevgxW%3-R=BHCz51J4ITlzPqv8Imo}fUpR@j=# z`()-hIwR?JCWx7n1ZjY);2@xL$ds;dKT4Ib>1bb;G9 zyuZb~(qP*sc<+h#T*0{~>|g}?!+?sK)!@6^_+7;cV{V4`)nA-Qc>1vP19e}f)T@GkN*0pC|})SHTtNxSxEpbQDG6YH%}Fy|J!yO1kOL;x584vVqw%7r({ z1K|$h-?@O*9kNwHF6kaVg6(2JR^ZV(R$(F_(cAwhAm{xFS&nMoc2u*B^SYo(n)w2K zg~1i4`IPy$0#oT!j%zgV+W4gm17ur!d%+b*o@8S;X#|ODCV$s)gwDy)DfuUN?mvwH zK5L@sM_#>3v*B)yyMNw%B-a87ZRo!BU4t616)2NTx;0wHD!!y*_8`bS8Am6!;|&I( z)lLJnfMz^?2cFNaH2=2CEm?YDsdyb?gJ6&%U+F~lVmBiEG(Z3nY z+2~R$LZx4cWp6ezCyx;)N{z&N4g?k)T`USJbrF5fCv-xmo!le-7X|UKYQ8+(6o0iE zKuyU=rC?t>S)kM(Uk4r-kz79zj@!-vHj@2oR7Eu; zDhX&@(4Fh;$(X~b33WkZ+tbTtjlAi;ouihsR(of}XT#y-mF!(5ll~Y6WN~Kr2r^6h4yoaRt(jdR~$Ve%Obf zE{eIcAeCsr1+@z>ZhRWMe<=!kaW);6PmI{NJ#5%LmJznioTGx2xxWWf5i74VsbrHl z!~+dZ^*k1Brb=6z=;ASMWo&ej`EXduq60g{#`m4I-gTT>8?AiOTRE_0>uD^a8*ute z=5s=l6&o>jWoTUML*82xBSr(YuO;wUb4wI~@8QLeV$Vu! zgKU#Abb?SOo+`Z`;Ym4j@i{sF>(6lV_J_AWCM1DWE%v6JNj)FyneF~W_T*~f*Rd{X z9$J~A_tw6aUML2}sHD0R>neEOpDnKr*MT4NX36M^ z3GkWnMmW9T@4zZLsF`mfBSMFc+u*XeU=&p*`P}y)3=}4pqp$D&xh<|W z=!(s5rCg{!e4g`MNY0tJZ}%a%5<5JHW?$MrQ#p7R`c%d7IRo8y?0Kgp5g9O{7H|Gj zjSOxr{?13Q&>$ph?;g0LF#E&uPi3o&_jR|oY2oG)ZbkRV)D&V}D5T?F{V&4KI-shp z-TFsFv1p4HDG5R82Blk45fGJ2B!;>F-&1-h0n|@4ess z?;I8O+H1}=*L`h|JA{+mjCDzW}g^(WqyBE=Lv9sycVuBk`+w`YL zzZiD=o;~y9;*V=s{1jfk7!x|UbOQbosRA=o9o=VI2KC-VY)DFR!^w7Ak0JfFg(A+x zg|v+Bw&9MZj`W^gAvt=sUb2vLL+A+lCqyaxs{1DQFKGa+i#3Wk|KbxNaHu)a#!-r# zXS=I;w7;DKGId%%ZXwZ|-aZ!6o@XnnleI*yy$Hp3m!-%6y^ok`J#*>{F_oj=ZRWbI z3j+GtwYy5mAs5?*CQ>R`WI~qLXgU zJ@%dwR?7zYw3tjkl0x0gWQ#8S6Dy`cr%VJ<{W`r>WyPi9FXjC1RJJ2?5U~4wL&15- zEYV++NFqzOI@`-o^Kb;MhJN9@QmkySN!s8EBD8^xYg~y&Q#m5j2QRMy+pKi)QAZ$Ub=N*^b{ zE460w^tWp*-^bNLC_5x9l{EU}j-Hq}lKvq577TnM|96B@L}Y)F{8DL>rQvRkjC{Wg z&(~87N|e=V*=n5f$soIidphp0v%X!yOAilLri8Dn=Xf{QI%YEc_kbpPI??4+-Tb}N zmL^@DfkBVEF-ztk@Uv}1pJ$i{aO+!lnFd}lGC z%m2ILr^v#ru?a|J|2TllMl-HaxJtK1gDUWCg9XZj*Jmi@KL3mB%r1_wFW0)Ds(9^8 zVNZ}R+C1o_Rm9y%c*T8O(DB#AfgG9Z{E_?3K$sTJT4v8V_NKn=2M(THJc@0B0#DBIr(4g zYm1wYfBLHU=2bn`EqS|C;Tfv?plhb3FyNz###0?QZ*WfNCeiYPRW3K#oy9%{qo3(7 zFrU!;hUMXD#tXiTuJm`lPfJ_8QoT?**UdRG7;lTd^FNzxR#6>P_=sqpTNkDN?mH-{ zkq2P^=W+9k?5a!G7$Qz0aj-ne&<+c%cm}*IZd;Gg{=KQ;^ouL`W&~6k^sCFI%83Mz z6|Y$)NH*}YZqFTwv&U|1K8q~ZuSu2+m2bRz)ZsgtxH3bprucN1npbj7cX_cI6vWS@OIztvr(FZ0y82X z)O}uSRTQ&dq%eBI9*VC@$24R%$S2yfFHZt{WT6(V~8SlIEO^m%~cgoU8!_ zuh=~?O7y+nA}sMp_v5{}#{Bjr?!$%`OI2vD8ue#DSD*6AQOl0v-~W}gG5!vhSb}}^ zfkI$!czWw9#b_)7g@j7{$J{RI%m6HC$uxM046#PX0PL1H=KO2nCfm>NKR(dUXgf@t z+v|=$rrsT~kKZ)fXR;b})E1>1MrgUG^Ls{X=fSAp?1`Ars$ICG%UU?4O7&Kk%&co; zmoU?fF94+46r9+-E|a}h;e#V|MR+`RXQ{AL+-^E)7$m$j2*CkihR>D^A+ZY)HL_b^6Pu;rLSnQrW0$B-U? zg!EneR4q?-YBqN1PN+{DW;>OfO^oI%s4`_#T$i-=+QwEk=~2g1^FnRixkqyQEA2Gf zw61rF+KwJSbJ?ZP9|MNxucb{}`W_Asn}Klx)q(4^{x$HjGe_JR7@i=4puVnnEkA_* zVb<9*O&C?flHVSfZ@wX~q?3=irzce?;1sF$p4;E4cza1K4HHo!;#c1EathgXwsr?1+Dgb=X7Z@1SmiJxUi8be(D1l-yAcTS z#=R@zCI-c47w+hRrH;Z>3!%nfcmiKaecf{Qq&(K3Oom$ULR4k=uXY`Sih?!gkG67E z8R~_phIFsoUt6``5+#-~bbL5x+NECC{8CJnw_!4OoUHYP$+F&@PaF2sTa?sdJsmvB zHYHcB#(+y|8FnJs)T8cqYtY%5875Nr)y1{^jOciMcX=n%7N(i=ssX|2gU%ZA^JO%i zTO*+|tRapgOG$fgudqJXCQn08`@V$Ub1emQEknL0KvU0K47!0ILFA0tE7Q;-6BEeB zDbRIhX_{r!QE=lV0-<965vU5sT_faQ3{U%w*H?bS4AM|Q78{CZDqvk=KiyPxDtoSz zRoVVC7C*&$I(-7TPWl$^ebc&a`9MXFK~(&t$YfxHInd`vrkx+}Wei3fHia_~nDLU< zZ+=xE4ND&rxBDJzoBy(j?m3p;ub8RZdZLU;UM|^|1^{K^zL1>WY9ezzY`+xXnSSG) z`<~b9KXQ#ah>$xk-S1v+zj3<%S-Y;P{5Zz^_+;&X*=oC-VzhhxFU3&KH|n_&7{fO% z#4?rDbv(a?MW-v;eNgQ&s-+e$ z*hJpVy>k9D7*~PqGj_x@XT32_u^Qfbwub#BszapTq>4JYa;uhbBfN<`hQXHE!_DBA9%5*E00o{I;2vzwj2M&ZH3 zjBaPC(;j?lITa-%DmCNFJ=W!6kNSAM^NQOobnziKv|WDGC!LkJ=WisQR={-jc6!=# z50OZY1@Qp#WqHe56Kw8uA&Y9F<*Ogn*6JunU|zGO+yi?sk*o_$7>iF4i3~5@-(Z{@ zrUzH=^Q(+WX|4cmf^P-Tnza|z$F!ykiNDfjaHzddUQtZ=^;wTcLF@68DRaA(F*m}V zAnEB|TXQd>C1LeKX}hd@RKkGTcf4(268S=3Evm{_UIg7vS^*$=%kAT#j8WBxwL;S^ z;U`9-UE~@oUU3(40HhZhP{?n*U^rKV_;^n(rR$JP^r!otmCkEBqDaL}$&1OGHRvvd zve!sYG>Xs>>*0x3XzkMSR^Cx?JgKrcrLJdwzuQ0WFF^uvO@LvR`EEZ#&B5$tTCK1Exy*y!(E;YOyENAf`Qw zZMUT6sDnGnGGgLU@l~cL-({)V_iceRlx`Gmk@vO_0Sop1(7l?u`|1fyUt&_A*0*le zWF;6$ushd1_53NuCs)Mv=dqzdxjnN=l*;U9U2q!mJ_m5hyAgM5NqQg62-G`ef*Y7) zxdReDke-A-?YUl)^Ngs1IFvmdRhV&t+&T@4^+@2Y*@r7c?UMZwuo};c{PIC5GzYVc zRHP5S5?4_yn!&iHD#8IRqdRXz{uTLJw8%xHHGd{3WAf2n&VIV+MIK|{uz=0&5VxtL zUtS=L9k5)fyS9h2FmF+EeBbFr;3YhAQ2EGQN-MDj5g+b8(g{gs=q1rf>im zsJFP=+Er8$n@9Jf&d2XyI?c9v6NAt$dQ)?+&26v6!0mILnR@k$TqcDr<4TR*6T2s7 zV|HV|-s8PbT)2^v8o@r;QGdSa{%u3hs*k|K@yK;9*4}p$TF>SBay+S7Ng@R*QzD#6$p$ATCLa6J))1y*1CYyJM zyxHtiUn?*HC6;)66seFa3mXF{R^g%jxhZ>J@a6kVLNgaJ)n1huSIHRpharBj&;$#< zpWdpe(d9DEU_CN#IM$Y2HC0SIF)gA_s$5%{+W19?D-({LGw)7ueBivX7EhHt>!s4A z^HJt-2|CJqmYT$l)2r?w2G4fyqBvvTJc08eD?p0=U24L3I()@CC>ChaJL(njCjafQ z1BZLGd}gENcQx#eRtn89Sq}@Ay-bGf`VF4l`i4HD`&!XU!}84t&6){MMdz?T)V#NkRE|;?6dAYU_rRsDcy6&HM3*M$49l$*bt$u5KN6AQbkD!`b0~-qin!YTE{x zdgFu*r2m#VZKJ%|V(I+pdK2or-xn@GfiQal6DD$-kiIGEW0t!ZDcw~^FP0F!Xdnbc zoA%%64p#DqVBSg6Xe;jK3-zlF_@)`}q8-YV4X+!USzqQ~EWy?IN;?MuE4*e0AEbZE$z%Q%lUpMq`PE z6nlL1;24i{aj_1m7yi~LLr-d-PVwnSS!yr zXR)GoUKc4e`4UmVraD-&%qp^|KJx9T_5N6=(23?K@V8&Vq&}GyxBxOe4#W2Gzg*g` zGOD#F-{%c8Couml{;>22fX6GR8Clqrinhu)NVlXicwb8SWb|m39(Lg(^aJyRd*y44 z8+~-guf$QDvB8l1(#)W@uNnf0K#Ip}U%zeax}QiDsJbe*Ztu%>U3XP|=5#`_(Nx2K ztFw9n_TKKfW7Wa@+hZIDZgd?rDX)eCk~hAxq->U(bS>Wqw;-lRU5EMh1m5F51l-73 z3$V~EsmTOgp7>J;nF;>--FD*gWqsXtZT!JhOx=Jesl-+PBqZy%h@}5RKouC)iLeMM zY(u`y*Pql|vgTvY4ES^7;A~h(u1`qBOufK8DANc}zQFFf20x=QS!{Q}7L#Z4AK6u8H~X-0H?M%xbm**%lJf~MZs|B#fFNWG^MctJ2@J`Qso_~YH|fRLKV0Q(&sN~aE8w*CY926h zWjLTPWt%~cW08}+b#UL_JU|-d;y8KO=p9&FBdyg-%wp-Ayf|~ttPd0#L~n{k5V61d zJ6F4Al%Fqd=bjkF&i5SVXO;aS-`2g^LVv^g zfLsNf+_OM-ahuj}DV3thS1#|;sG>d<49f<`l?~t8#V7EL0Hy^Mm8zvEBw>wcT9Y=w z167G%;gX%<#FB`*R#~cm;Cv0?rg53ZfNBLB=Z|uQta40lmjH^n2oleLYcg0#XDEgR*rijUb3XE$PQ z8a6A#1Ifq{+#&9v9XgQ8lZd0OS#@Xr;@E;=cPfW*2ptD7s(YhQ7c%eO&Vsp8czpHcr|H9sU9N3yB$4H9))ek60f$Eq%1&y9nzbqH<@PixkD4}q zG6uA6uJV)9qgV-wk(`#T;Ef!34*O&g=cJ!tXt5IoeWq-fTd zoqEfxiOR`umJqw%nBnp6J%5ZjpZyBEWefzKyy%28TdL*SGaoihz^;}`Dlce^>(2o} z;7h-b=1e=`Y(|zR#+&<%_tcZZo$n%e&QF5h@{58Ik+8 zf)@Q<47b0a;IDj7@)Hv_oyIFVxCevaOr`gn0$QO8D`W$EBS6BYq#}cFAF%Z)mM*8= z03#sxBc55GYmBlyqy|6+gJs0TfTXzSbSbGxq0gDxR}N<*TN<~lIlP8Iu#l{{@e$U9`6pY zPeB~*$-|<~>wBX+7SE<9Bu9&k!vRxxU@~ApR9%7JHVH#uwA&be889H*Knym@2pcxzKxAefLX~;NlY&}NoMNkGe$%Qx zke;?w#BP~{1A@>joPYTR$q+Nmm)NZy<`)W@Ln-}(QiH;i#j(kLTRrkhVWnvTOTn%O znX%I`Job~C5Qg06#og!ohder0$FcLp)A?)FcpRkIH&so>;it)4vXrF(agbv-##w>v zGpTLepomri+9Eg4vs3{nj||cUl#*hqbq9`4k(H{^iM-ZlFD{P|D)7G$M>~A8mjdIa zAGgQ5-$){hZc9*u>`b}+3)GfMD;fY9D=B&`tIJSMr8aOY zhonGiIo~jOe=Rq8ChD%Nj{07 z_V(~6vsv1(;nt9pcvK{Bv@>5Nkasus#jmTA#zG*}E`A*35B;4Bgu@yN_;sm{eT2em zX`|wFvZ9Zh4&Q*_>(0cgy?Sc_Tl?{8sqvN|3$D!x8m7MF92l8|OOi??{E+J8QxrrV zoqcmL#5#bJ!`*b4@e#U6du;E0#75BHc`%qz1tF!s^qoY+Mg!aDKb+%-cpp(2jA*t4 z**|KtCZ{|f0!++@DsuA3zaeoQnPK__!lSQ^=41CxsZX5;0?zrSIE@HUu6BT1{3Syz(L7hkpcr z!Hc<|wc{2ys$bK_wPZ~pb&>P$LLW|M#r~zV`|n@$27FNvLhJ~<>LT9#ah1m5&mPp~ z!q1AO?w304ZivuWFo?k9^i%$iFWMpu8`pOB!d3nriPC`7JGHR4gU2aBv_(XWr&u4gXa5cLBIws`vC=)LGXc>be_d}7l$F+^FSaV_3x3my@?dA+ZnzX zQ$y%7m2R>Q23@{#;S40wocmY5&p9E?7Q_Yei3 zx~Rn1IVmvwJ@DAP`T$?mI^8}Xd4c6FPv8?%VI}JQ+Llek)~PYV|(vxfy{#U(p`+ z&=uKOhm|_j-9Q8@(jbJnueD>_lY&0xi`F6C*QyyfrHxz^__L@qw(foGqt+;^R!SD- zpx4fc3faV$WKk`Ta#1b~Cgo41n33}LL7hp`_nZ%^626=9sM?6B^P$9mSz__<;oIi_u&+IsaiV==;g&P5aT-PqOp8bN?a;YF`v>Qh?nfEC7JimW z;TAGmbpyWTq$n$a>P5SS`ul~1W`)T8a|pT#Lw_0Pe$!2EOTUBkzi%o$@25BQfl7yZ zevQhV$8tuGGZ}>TFC;(CR8eKUx(f3Ad+r;5eT0mF9>g`=M z=@?1Q*uDS@NWgXXho%sOPBWqt6FdLX92)TvE=Fh@P%-EMR5~LY=J_27- zNG+S8$OW@2vs|oaOhL=$U&CImr7umpF(D| z@I;Q%lz~kbIK_LdQX77Edc+rtOx@5xSMZwKRDVktcDsASD9BT@DQ-ujgjy#=aOZ~Hk2t0Lhub&BdW=FdAv26DWpgbQZMWD_{EjY{`WnRgq1V-Io?78-}?S2K;HPx3_ zU@az$h0ut$@@!AtJje%C_^tjyR(;V4CuU!>XoRWd$!E)~M5DNuw#m<9Ezx!)x8o^C z=^^w!%p=pjW$$A-&YTuCmtxOF5Z0Ob_vrk*wEF{j7-%M_dn=g4Ch%vuwLaXnrr@~; zN?obz%$j$Am>&e>h}M?ioUyZ@lFO8$I-K3nMyh{S*L|DdVw-s-q!%~p2~EXH5U#_{Aos4bi| z{E2JKxZP-`;EJK3kzwC2EwJ?^)alYs4d7i6q2q-p3>mlM_Hn_yzXYmKIpX4t!%xMK zE}_HN*$1XqS>z9D;arHSWRB-toJO2>7m|`3&Z5m(38g zCtY-!{g3F9#p2Heq2MJ*BVxSAWT4Zcl>TO#epDFs*xN0cAt5z5K3CMWWg+Bb;h=Hj zX`|YPVKsSI-VmvJ#mr~T8q4m#AI0bL@fA!-`eq@Ts-7*zkwb0H>d{iE7}(SXKRzfW zyv%+Q?{P?@)%j35&f`kbM@_2A)D}hErYhs8uCxZecM_V3a}PX!>d3} zpJ`9-QXd)B>hv#abA2PqvE%QL3T_zQqG|2*F4j5orqb$~QHwqL;BCg+vCiIh$AB-v ztC`qblMH0WF-@}Tv_w8A`nC@CrdGu5^y?VQ0Lxz}<^%Vdg*1l$U0`ykdlH^cyJ*N= zXc2Noo?WAtSJU$5WFq(-G1TSz_Vw! z^C5K;AtNH9_YBY1cn^rr1fDCM#U^IIX9&vxS6@W=5S;G?tHBwCWFS_4y_t~Pp+j(E zl@Xu_r6Bh}D>>r6%WsQdHP9zh(f)-Y4aK7{agY`%Zpo3A4g-225)Sy|yP%D3zo zb>|-8F7)M^cR6kSda*#>sS0)qc&KhC{`EF;#I5fAMOr_D5(w%3L)SqorFmkL=y&B^ zQzXFqk(Dj-;mZB(z_}u5CeUJQIO6>;13De!*y7?VCUI7lH+*R*CVqtc3_&T5CT>?==zi1oBsnm1)Iz5JFmt416IOtkGbwoH4>Jd^v9vQ; zQKIbRr#h@Fu#%iXpdkC0KUv}~cBR|@6k3=7=R2$zPuG2&j7`bFFfNs3Y7D}9sc&iv zLcSzi=wy7nh1YY9gM-iw+|*o0kMmIZ&H2ya;sr17@TYyXHa4AcOwF$EMH=E?C?KZl zLh|cduSzI*x;PrRSEC?mCE^RvYNI$w8~w>h)UW@ziWj^1GeZZ`8y5`*jYJ2KUg#GH z_lx4ZiI=jxbK%=pz8pBOqs>Nwg+?A62t5D#7s3UiI{&hC#0@&3wAe3dK#NYB4ZyX) z)nc&UNQ8+^W7WGD8-qq)j_$8f3gqO^(2tmdb(L25SJ4nvGR7)(>!(IeB)8&*A*^`* zOJ242mv{OxDyQ-T*-eULskm=wgjr((YejPEX}>v+*hSSpi_ZRZeWZ-OuHtc&2#smA zzS8H?&2_Z~j{Ul!Nc4&CVCMYv4MtUwt{G!$9)_*pXF6)FwK_QzLZ_yyvFAS*Cgbg8 zJZ6KZ7Y|LTA5RP_r37W7o%woJ`7f&`zu#nXiO$WRM)3a@ z5i9#Rbz3BWuZwcdHVI&#TsHg)MuMo_s4K#-sKc_5MZfa z`Waj>gNiOUTd3Hz9+%J6%#WI8R?_IY`iknBhHS{WOuLrQ%)Sw4RTn666)Z3(f479) zjh+UeTUY*vvp(z?IEr8Cl?!yGg4N&{Nzzjf{0+C;o_bPI-P)LL(dn}IJ)uH{N9V`i z`WJ9~25#h;`#&IVnueEOF%!J|PL~XMZg61CO16bEh7kXrf*Yd9`n z{*4M#bu<>Y5IUG04TnX%zp8qiGj}=JSF&rQvh3AgdjCx%@%H<(3BFn8vi(6;t5Qyy zGwzn*MoDOlhmlOmr5<0A5#Fnj3+xJkgFy?PVM@s>uZZ~O9`o3udk9sfZbPyw-(mk~ z-g5fSd43J0Yc_5U*V6%s`uL*@jstDP4!ia`1&v&_T2Sung-xcIs?P#77$2YrX$&>y z(%lbQRw{3ymQwz@&7A&gO6ya}D^sZ>k<0Wd1|hz2N0~AFnX7Lxp)K`2Z+DHzkn2^X zF5{SX*YS5rpRg?YB2?+1d)ylenBA;;#*!Znv8hde+?ET=%=)#XYy({l> zGI&mFF$(0hP^CH2Zs|5O6?OaZpZ&PXpZ&N~=Ms;NIPVp8S{_E+uvypZtsllTH;Wh_ zKx~3HMz=0*ROK^hd)Qd4UFSf=?<>e2Og-`;IsP#E%`NYFR z_bP#Epb`v0NaZ@-6v*l}s=xW{(WDeBp=!+?jt0`SC$4=ek;q-1WFeT++Ce-6H-&!M zWZS31Fx>GWkVZOx5KO|wOTglVlu%_TsxQGHi+T5!k(YrZ5B_@V-rIwHKaG`>5QpG) z(H^8|2Z{yX1Scc%8|aWisdOCA+V>4%Uugge^DMmJ!lu%gjlO@TY_D6TV`*+^>l?oN zCh^WO6y(B4Gw$C)$3C5wwAaPS=VEeVbM8F&wo^rytdg{@R6H+APxv7r*?I-45?gI- z8Xm%5_5*#Y;k*Z{CjOSccCA;E7;3+Ici))l$ReBWVIrIH@Re`AN?Xg*7amh%Zm@dO zZBE_-9lkkxIMzP9KW0$&2phduJrQI7B(8$++4O2jcFN}W`2X|j5P?-wde4gwNVST2 zmXT{E+@R~rCzeY)A*bDqHO)aarR-&!-JVB5Y;WAcT@{TA2HZ0m35O+A-kuZf=_{}a z@=+aZ$t%v*d!D&=;oZ=eFn_tNi)_e?+eZjsJaPXl_5VZ#AuYelEs`onx6CQ5eW4&K3xJgG zdwEXXXB=l2uNRoLQQAv8tQ%IZ3=U9P7j zmm}rA-ELk=EpWH7-cx8qv!+-(#PYq_B%2uCoMW}+oV?xl?J?X;))Mclcl(~PUb95I z`meGk5Z7M}PvZ-r=9sO!a^2rD=!8>+GD^=2t^+%7=jIiDGEY1Tc>hL~f1g z88VDT)*b)dx57wV_!!CC^0hbHZ`{gKx-jp;<1~(7({CEh)R z^{yKW&vQI;trCdMI$kk&v=)c^dhj)G&sVp-H!7WeA50jP0jiiY|Jyv26+IP;AC-`m z_PdHgRy0^s0IhrtV~_cuvL3T=)yOohEFcw=@NZK!@`eB7Tn(>a$_L?4+CTK@d^ zU%2M!rax8~ZfcJ-4gc=9j_E!8FS?!o_M}`9?O|z?WjjDCyS|48ZsMOyA!dLEZWwBk zzfgOjHG8P!A5G03`^ZPw&`14aS3oAnzx9NnO}ahxR;0ZeOYZ5EU{vCrT3Cyq<$dXm zQ@HC>6W;;zq!sHC38lB#$i3JOPj_-OvMv)$O1bzrm19?(_V4JN*y;-EycCVtQum0i4B6&(-DS640UKy*a3>**9z?yytqKVrcv@sc1G_v(g$OR53PJ8wqbdYX3$P{R zYeu-tJ*E5}OFSltn%P&1bDK2Z$ZaAIbm!kU*qhUF4}3W<7^0K3*cNJdH*tyX=0DxT z|I4idk)M7|_jPkyxboi}dv>zHw0^!f@k=;#@U`8KwjW1wnnraq9Wj;Ix(d>Gua~4t zlbQGB#M&(Ul!8{mio72M$df;T%MS6u>VmF)Mdbtg*01N3BggS7p zBMlHpi`RplHY)_g6?RlQZt4PxNMCmd33e!h;vO12baF8|+R$V{G77*5%e5XKv&y`Y z=b15pXmD=e+}rV!d?!F^4L;)3VIXYHd`}Re^?wa)AfOf9J>a%qiNSZe<0-&h0LgWE z4TCzlxO#VFVoV1-O9UWp&dQ|q#br+XCh?8Dfgy6V-+$0@o;a6^*AuBeAn*G=_^oJ$ zxR51>=U|o`{+KId>X~Jymo;u#c>lL5JrX#@1Vzb0J)~8bO(JRxPNbaafA{+JU&7=& z&?Wj-Ut%)wa*2pl*9*xlU2+Bi94qKIsDytP7yv7*k-VOF>4oUD7G{2WR5`2&omcnD z`HDSrEJ~pnGWEy|N;tj-E_g==K|`tSf+02zuzjW*rO!)^eFWFnEc0 z3Rkv`wvCJ|ZH6yP3`<;;`*oqV!q+@CVmx{pPD?|O(rJ~pdZvFTzll5;E}+KEU2w_= z)_!_a6uo0w(Y@Nfkg9xdRCuaWee7$m{jW#6E#c+@EKNi6>O(CUW$>rF?n0CB)>c&i0nvHe?8>u*m}4Ef(byKkF1{O+kmyY6^fh z?rYwgXN;Cl6E6*ZB#O}nkh^mCdQDM(Tki+?i6k9BqX}IHzu_uQRxd^JSf_4*b5(Ou z+ikA_PWbu#B__F-^RVV_?ciybS|k9y%59CnVBtEcYuwKG%`ZFSOo_t)U-wN4Kr|~I z0a{D7`7NA?6?!Kx*N=oN$_j#BK7PUS3@q{tumq8Tr*+q0;frtQ3vqY!VxvfPzJGo& z8v#wPdoxIs2hwOsx>8(-J@nMMpKhj9`$rJyyLrEEPsCYwcv)*MeHG8)qQXa7p*fWP ziVOHNrMZ}sNF1yqg7>m8ZJvr#Huu^jwLJ2&HX*hBsfA&w;Cx30fJdL;o(u)enc_Cx zCVvb-Jy$%1bb*-D$Wi8;9SdyNfo~kU#ek7}=CYI^|HS!KO8JU-tN?*@u>f;U;|Z1I(5k-c3cZ$y$@dqJ2oz;pS`9mQ@tsl6v=#MY%&vESU=st&SSn79_xQ4o?7M*IckdJ&{(29jF${? zcV9c#3QUYX@<$j}^0AZ8tQ4{ZhK*NvcH?-3c^2Lmd6qwnop|;F_cZx%hUD$kh6{xJ z#TfYWqDwqn?aGQTxb@4ErIF6n+tQ*rCL=}PCuNy_Y@T|$5ra49!gJlyUydh^qdM5j zc~C@sa`z?f8}6M?5|O|AMZD1D0O_iSyOQ66khtJKcr48zw$NqZTakBw!0_c2Z%{ee z=I%VA%M$o0XYTyUy(P(Q!vr7hRB?1llVPe^ zMV1yax4v;N{Y&)`one^EOJ<%46NCP7b1XAf#?@u*N^C*d4C^hg%vDaDXfWs^sxZEa!7NU8Hyj5 zgKm5K&hMchM4}8Qo|U5PLjuzh>Vwt37Y{spI49HQ*ekt!j$(H^V0$+TIN$hT75M;j zra9WOoR$G^x#{-zv>VOwLxtFUMKaQ(5vP!VCsVB4S7e>Z0;DZDc>UhJESTm?|#2#yDV`X zhqiT{%J0N`YWO7yM0r}qs7+6l*4-ETTNyYWQ@k_2_Ag!FJL^u~BN&MQVPjmJ!e2|Z zFC=cQ7zSK>(%%~nuLXx&%aqW3_+5a&?sNbUX*f+2CZHMr3Y?HWNfeZ?b*5Zs{DmKk zh}_G&{S63>a!qH@*XT%REa1Z}MR6A>sf_Uk2RBGJ4_6ChQXyHGKzSswXdeGeiAFGQua*;%V165;tIM#s zE%Ok(^ z7j_=uOtcJ!0oKp0x;Rkpv4NQ#!$<5Gdh9giKlRj`hpx}ck-}7 z#`h+#2}wA-Ml&#OpvX~O8_u?W#9t*l${=)+qZZ8bs-qCOXLwd%LgV$aOelli-~%x! z6PT#9SL4p|uB;y|UL1>uV}PKV!x@&@gR#@O=};5BLgS)lvZS?|uW$*Q&C#`ycI8X) zPg~v_UF$w`Z{)dbET*||){bO68PDHCU^h0Lx@&K^yyBA!ajQ%A?n)V8ou^xKU6!&* zWT=swCv7Qll3lBaT%NCtyoyZzM25N<2oj?KRVy%P=ovc6Dn|g=aOy$0xq|$y| zMoO;U>6O!$S`uPDXz7lo#)(md^6k5ji({GHB= zfA;@l`6Vr_wsdaf52f& zw(NAYnsrf`khrMy7K|V2W~ox0n#`85ncV*K^&(AIK5w3nP34bm0=olqx8g_ilTm-Y z=gvDi`7`tN({k{gi^p%$^`E#tNW(8afv_N1O+^9{$R-`_q;yF8k>Nx1K0?t4{dCm9 zZ1G|&1%=DngYL-h|9R(!X;5lMQllLM4b#g!HuhxL)E82JZh{}I*QU1!125L}&9)Cw ziyYRwk})@p;)c)`x9YU8ed+eY6E+JBU%G{RLmE^)XuSEM1cBLn>WNpz4 z?p8*cRBR|4)JUwld zWYv9;uwzlF;LACB-XF1ru53G#XuDhe!Ta6IofvF%A6-RY9`rgzd9$Qte))Du^$q1Ctb>pK#3GQ~*G=hl~ zvNU{6s#>aNOW+>tQA!;gw~2oZJ-58_lE{y*ah zRQge~sDqmNDbFY>y?7r~%R3PB3yAM72}YDAIY-1%VBJHHm<$w1p<{R;Ow%V&ev;pD zLlTndQ;vqv$BdbfzAr}K_bcw@_Nwn|DJDEsd}~;_1Py;AHfX&~ z)nXhajOpMWx!IsYua*@d1!ez7^?;!|UK{trDTbq(1isex+b!*PybB>)uRwM`0~{Lfox9C!u$v_vGkf$?kamo>G_) z7wcJT{D;Tr#t01|uBdA2f>=Ml>)`J0%}z{M{Zm?bQ`^6P{iXa~Zk*`B(zN;^Lc)#m zpkHhuD#Bm>7B{MYJi{PfVo-Gf+7gJSlVv%vu; zs?`UFa%@$(27-07`9HSh1UlmP1*oi81#)XdRl_X?HIe8=ceme^NSaym_`+D5M~j?a zzfs#c<^O%c!!5-F?-3Q^6}odG9L%Y2H#wvyfnvRl?%Pib13L@}e((mA(QFR`=C-OX zB>I*UaTta0HJt0-WJPVCor_)v_z7)?4+>NZT#0y8AAPRhH4@Jf$8z*#cq#p&xAJ3) zn%G5lZ*aSDa8Z4$k{AHhMGrrhY|-+^j%Y5Kg`XK6n_bYfki>63U3z{Ks0y+0{76ivaG{SoCjC>cWMiygJ^Z~P?ey+2#kpt3uf zxPZ8fkDct@*4~@K@25hH%N_SYhA@qm0ty)RT}6($8|E$8bMBpo5n?^KD+H=^T+Ak| z(Edr0&3s8Toq)9(eubincF|cSYzXaU;&u=E=!lH(6zTV0`-8Uv#d^Ru)$##bG0jut z%Zq8CIiJ1V5DpR6i45|le5e)J33lab3$Z2M;{5@6-nycnyitY>cYS5~>?cqaDwA&{ zjl?=31+!Fn6%l$ELULt`0a;Vc(Xx3XyIJ+2Z{K%btbjkXCtwb7>ZsQvg*+h_`htP zUa0?>2(MDEYSA9t&2272HO5YN#y`IiFXz0%oQ6~*(KUu~*o z``Xe~_*_|@C~h09+SOC?DsliDUR<&czX}N!n!~O!M)pH{O+v>nFcXm!UF)kf^OCXv ztgskG+CypSgEj8%ROI>0$+ydd@1 zbk6FbgAv5SlmvDY67$WoM3*lZCojyI3(_?;vEy0pym=xsTEQZqmwG6`m+#%2h=Z;k z6u;PS`J%vlD3}515u*}Mh2ByS^b2hnY3LxfZwZFM5JhqLMG#~{w@JU=GZE8Q^r~u_ zT(rJhoTm8G{1U9 zjAi!3i}G5j=d_B9LR;oE-U~Z-lq-LSw=V>^$j(a*fHN?fU$g_>?~&V?E1_To5$nK^!wOegBlCfgIY% zP&2~|*iXsaLG6ig5kv^AT)lxHIAOacR12UEVFU;tGYA`lUM(t@n^N^EH5X9)(Qu4{ znFNhle$f8Spv>G5jX;s zAD2oDUBpxm+<>_gz88<5&tGFP-$k>j$=2bPhYhN3WQ;NE6;l6lJIr+tqa#21$j-3l zCu^wmM7*zqPOSRq`_C3>2yJ5{B#u_h3ds##)d)Urw>EFSnKnEv?vX6IA23s7pve{p zwgOqo!@cOvwj}rCq=DTpsbab^H46pCkM6QP+{tj~H|jE;lX880Q*+9>j9fSA@u@ER z?`IU~8%>nT#uG8meHbw;dxa<7T}gWg{RCOu^(YSGsAogI<6q~!KDE9);AY?ZWDK&J z0Xm#k5adOdCStya@voBLn6S8`lcLR?z1$XVhT)KK^d^9P-6cGOt z$1E$Nbf{;g7PY>D4a}rE7lc87#+Dd(&G+}mo9%2d45|z|a|-BPl2zi*Y$S;&7K0*K zA-!(FATkIw)z>ih_Qgj3uBS#7$TOgeE)vp~Dr>UMtjT_oPvewv5mg;h_maQCT}d=C zRpw8|wsC8LHTWGIz|`JB-9CRGbpYMQl~Xi@!_-~WAo{^ax6x(hz~68R(CPM2`+Ub!y(rsK@mWra%1!Cy__$9f`bP?`pheBk z_M_04xbJtYo<5Hpd3c^Zo6dc|M8@K1G5+`!=D@Ia^<79H3_FN5NYU6c2~1>gH{fK^ z$JE%_b$36>L^5`Fa5D+%>%?z#x&Mx89xhfi`EMjn+1zaa*!3MY82W$n4xSQ)&Rf)tp?$Ef6uj#zw zmC-^@B3dLKz0@VYtpdGtS>hQ;f_oL?F5A#7d3(t4F9RNVFX7!*wYm^z_{oMw8*i@^ zSu*J=-B{k#yY1mD@%Z}962rBQF@W+?_E*e+Cy`yxf}8EyHDy1H#ip6&z;AO-_CAw0 zUR6xdqYB6q$zC+Fm-%w1&w+p(ADewt@-O`bGKl$XLO2Y%Y zT$ocEa`mq)F2BU#7cR0}m1oBSY701H37%m_r?)zPG&tw}5Kb^$1z7;*vOXlqgRDm2Tn(+TezcJ2 zI$S1ow(}7b^2rZ9E+I}wtroVmy2~sP1`9~t8e8Ppbf4ty_Jc?MXi;C2qE*Vv|BtaZ zkB9Po--j!OvPX;3ZX{}-fn0#Rea^z=Zd#d{Q50c_Bz=oN+*H9`Tdm}m$r-R48c z5}5I76MeeJe$XL1J6(7>LTUjv&Tz5EJodKb?}_7JHf`@Q^oh;fdF-|0{KwO;1=C5` z9oxXX&Y32pN~yDnbyF`$DqQQliefy~hOgUdr}R>#lvrjMaGq+F2x5+|ZG|ts1MSgd zvR3E^HXs0VIR173O{7_wmKl`C4CvAltP&T<#SM{~rRJfa;RVPMRCWABJv>-Pi}`0m z@6O%op1;xB&Q;xYKVHPX0gJF=8So5wP>qBwa(DP}J^t7J%Scpn)A4fj$GV zVOv}oZmQE#EXj+Wh#mRzWze2O1tF1lB}(mrQg2SG|KNTK*q|&=XFpCb)P#8odS5$tEvg7KNJX%+>q_ug z+hpAn!PgW%SaI@WWw!QrG>k4O5)sbk)E3Fk2>sP|NqFePc-Z`4{vSab@lzFqXJ`H? zLkgmJ3eM7Ky}P=yeK%CBp_m_b4oD#{Fl>?7{bt-ivrgT*KvP%UCpvFjA^IBHyHyV~ zl_1A$pqZ`CU48~w&<*b1hPTdEc&+8@NRaa#Qv8Fa1?^I|Uh9WeV_gIh%u zA@ci)#p4q)onq+%a1c^{MMZRtqYin*o(1mw^k6j;8-3(zIdusd=KDEkgkA^Gw%~E52teBxa7E-or!UdqSrmW>wR9LKcFV`3(dk-lF<3Ih%K^FR&aMTqxgU-1Ugda`6Mxb6&mw^nh*h3*P!dHp1emT>});s`0v2^}?Xb z^`{^WQ4Rcb->76k)Qm3Ka{I8B)9w_PtXoOU8m9mC3E!qp&f}$yqYl+xBC4s=f68GfVsu6EjgdqUv%6#7j4$>G+tHXo+){` z_WE1qfbl5G?qbZ*!0)1IfXhQEaE~b}i5F2__hCfNmgmZPpy?_uABaX^ zW;jFwOWSp>NtQpl|2>Ne%2M}jpi^u*)?*0ebU#U>BIX$>SyKkN`Xw+#)gs5*{G^F1 zeSI_0*Hu3*B<7qki)0*b^;jYK&~qB_^%CW?fYKyxkc{F(*G(M z+!bNP{*TVc%4+Yy=F931Joh&6wff_S3QntSR!XeMRWh3+%Hi8QL*ds2r;k^T6L|oc ztM;B!4fFRQzW(&Idgqb{7I;Yh)(vdw3L{N>z?>rN~CwS_5+jg6~*?~AY5{QaG6}%Us7H9U*;5W;rq=;q$?iSH^YC=8Zb5@{&equwzvAv z8dtuHSdm??@0rbgMy)rK6Fk#ePJ;PZ9@3sVRu5F!!OOpYyUMG(gdFkTzW@}}*2TG< zPMH8IJSCjXRcL$y)WyHL^8Qbk2GA2W_8UKcn~OjAW|nbXXtY4{G4uB5j_u)ATl86v z1I>MevCvT2kx9kmE5i!^Zu_vaJ>ZuPmNglIc^y|;{@REbtjZZp*mp!^vGc)#2LIpY zr#mM=SCSu=_GC~Q^kE}E%B0G>_e<1+{a;z?mhI$d4$%*8g;7P8l3r2{!(u@&n+dJI zB9~l6wImWImo=)M%T3+d?Gh{NERJhgrCHqmCollESS;%n8EO;T|wg{N4 z+P@WrH!!db-+*Q(BA_-%vg0CUo+vDt(p8=$glkdaj$lD&fSj>8S39Ro66Bt@ykPN8 zcUYRV8jl)Ua)yByrrrIwzA_J^D7Q`nQc2kg%&zNA0nsd>vTRUMPQV8^Vqo_S$!QvP zU>@A~N@0W_zL5tM07dxe`@q6WBFqhZXu+htQe+{G!D25@o@Vji1r3f^^B3_a{vhxr zMlW-d*BxjUOOjEx6>O-5OOh;1p|=gy#|uH{8@?rDavCEuW&B|4Ac-;&isHCHLuFe* zx3cFT3lgaI-gsed0%U{d``at{Y!?1|l(}hbKPU~(-VRmyd?$nYrv?t;r_b{q0AG^@ zKvI@TV5r~y>5k;~y#UILyJ6t{nFL9j02~b4a)~*tsaiW$lu`C;bFl_JZ{)a|S0yK?haYhT!UJCsddh$ftF;!KC6Q}N|p5zlcZ4vd6`!#d! zp_jS~UL|X>+GDW|+CIEjMQ};U&CSb$pDQol#bB#}k)OfikDDI7{Ax4*3S?I!Nm4jG z)!|C)tc258t$<${yPFiq{t_I37+?yTB8kgcwk3F?ZUWK*J?)e?D*QHyAQu-Gr1u~ithq6VwTh?nVIp`S^`(hXRns@0bX1(tH^udPb$~c z86X_pr)T1Y%WZZ6dzF&k3@Mw*&mHif;y|dF=qETSal2dRis8Z3_2MxSf3VVVWo(B2vEp8&$+k1RI8FBiH}6nB6{t& z%Eb)>x_V6!H9v9cw?*^m61(q^Sz=(YKisKuI3s&R-hINW zKl?)0a;r*++k4o}G(IDbNp!0Pq5XMvn%b*$Xp;ET8GEK{t~vMVgH|W3nQu7>E&G99 zACk3Lxz=IL{Q-tFyUA?Y@D#Kdq&K&#heh8#Q_nK}>@BV!jAd?TIyH7y?7gYRWt1rc z>D;}F#Xl7)89+A67#Bx5f-QkEML9#vx|-RPYsjOb$EAHTgd%t`<`sOn9nbV3mU&efg9bWaWkmQX6sZEq#NduVRX z*fau|pKT54`r8O2e*95OR_W^F@f=eaZfi1#})PQRhCNFRNL z_E$GLJ2eOs0L(etk|55Q_t)8u?g(2izs}3*yuY$37cQgjyGY)+ySknxH1RrzsZSue zj|RH7o@S@K`%o&IX-lZ-Q~gCh>kuJ7uvW8~?4M>b`K4dpbz;)R64-TSblSP%%c^+= zIpaUbB6g82_?AlKRjAm^hT*Iv36#qf`mcY&kF35?5)DnfPFsMBUuR{k>P6EQN zU`d)<(;#|>b{|;pYtnYHi08~r*hHTx3KJL6tCg=yec0%lIn#}YlaD( zuL2e@-LM%a@UmLb9NbhePGkR_NeDhD#2giNLWsGxCA|LF^hpG@d*v%eK`KT`oDi&5 zEvzW;)M-oz>zSN$h;R;LPrmH>l~5P7ScQR;>)55bsJjWuXSfx4**Tc;br!n?KO-yk zp9f}~^h@zwZAoB^;8f^e=VP#ujxbWsW{QTYM!;e1wAvgqfg;(ly-YT$>Ip6_A(hSD z^YyoWNX}NmXCm`5(?p~rzC9FZy~nAhl6Y0tbnXPyX;(`nG*<$x6#?Ru?e4?ri3ePr z?8dxb^SEAS*U(N5P$P}-Rp*NI&>Nn<0Tx~0jCzGeWr1{K)KX7TZje zjyWIs%?7-s0lIjpL&4CQNDW^H|Ng0__=u&W{mtcW^NOBi->DqEaEW}*!6#hlBel8S z9d6E857&|5ZFf(n^ zUMP<}$Bj9UFff%QV*C~gw%=U1b_dbRUP!q5r~W}z3awwn?IBC&UzeGbrm@xn1_Qa? z_}&mMMTJOo>-rCMTwy?&D8^}})@fgPL$GQ8aHMk~$m+bUVh4u%MzlnUZO^|25*KLqfJd2O6kc|a71v8l=foseUZK=wep z+#jcGOAR9vFN zf%x{34mlH!4(w1af5WtMe1|n+V)kXq%)DFHV%_}xA3@$9;gPoJydK$3#&ziUs%zHm zplVfFR+WmRymgxK%Di$iAyE~s$9T}GAUOn4d zq5VFu(&y@H{@E;-w}cLnKH4u-7!q+D}L%GD1TLd-CIZj^a5)0zNo z;nU_rPqUhF2Y|X-E|U7}{ll5ojmo_1kL|p9lL2Pw-U$sd&%QqWe75ygpzYvJ_Vscv zqXpql46{z>XKjA7jC!cd5DB(^IYp)2r6ZoQ~E znj>A;nm=a_7{!wWDycfBI zGv>%XIKHwQAu9}Qv05?W(1_noSoro>2=TpvLzF4%Y!3k~yV(Eyl{&Zwg$bQ=VBNns zi+K*5K3=eu2G;r?(o;PQEoWih!6={j8*c`OYs6Uoe|3K;w8z~`f zoIVgB=bOo+6&_2FCc}kbI*~{uhmosR9BCr2V(}%C;{VBDrE@AvcKxL$UJa_4yDTH1 zOW%HqL?b~@RpYoK4ChQzH#V;QepJAq7-?& zXVM@M^~+K;>D6mIEA{3WFd-zzSAuA2EJe{&` z6=|Fj(qvbFyB|HP8qjh%z5&*1hqhT2WF(f~N!Idc(2DQ5x|=SAoO!|cu+?gMYiU#- zuf^0ugPgg4Tg${_7^#9%)SF}dWYYc4TSad$X!8QP`Z=S{U<@pD^W<*&?0w#>{9jhH zOkiEN<*L>cuFYx&CI|Th!*-!)g?RJLS?TmVJgNgdPhQ;P=O8!xZtMV5~%RUd2)nGb`e z(cI94K>n_d8VZQ#Mw8_aS7OB%J!=-pp#OW9Sdl&>P__j2jcGnZtetZ*Oosl!nLim+xFvfk=NMx*w{P{p1OP%0UG4m?_NFc~E<&1Biw}E`OIjE73$KOk-10<1 zZdyM?j^>m9kuQk-E5RyMTfywvYFk%-KkT*A{qoiv@tjS zl*7}|AFG5>iIzjy#Hc$vD)+zr;vh8x1KC2T9Cw zojf2>eTUb7n}{@S`g!FZbaw|lzwUMC|(FJ4TRH-roSV6lWhR_+FVp=xX7!95ea zh5zl?VtmcM#m(%sVLYwsFjyIX1k7Qe`q&d4hhE31aPK6rMDIr_mWh0% z*=3IKpNXoTvA1b2Q`xy9hY?VFF3e2<4#5p~FPBc81js@t-LYejY z(34xuKQA9}k}4KdN&dC99cQHWis=t{LTQGG(T_^N2 zB~!a5@m9|mt?+Y6ut!h2UfjD%FY-mHk-2eBYZp#uA=a}jG-Ev>t*g(_a^E}!`DMJZ zPzgGaei~1c;`SwTA5@O;qG98aF!;lK6df#u+^q^AMrj~BO!w$J(ZBZL{fe=KyHEa< zPpA0Yqq&H_`*`u&eFzo*!UJBb-fPrP4}Xas%PNqr<`EDZ?xZOZjU`QZ=#wst79gJrkK}SB{0Iq0%v@u zMl5MErHA|ZoMzG@T63Q=JRQ@SoQG^0To-arW8!sb6E93%JADb0NxpMxS331AE~uX3yWEBv0fhn<27&lTLeMn_BBTZ(Bi?3)N=Fm_)`FX1EBa9F;f1pKJ~AK z4&mPd(kca}1`v-Q5pfky9Y z^VI$M--xG(Cl_+5obGsx-#2m}xa>bZ-)|nvz`NJq^Z{OF+4DyGoXS zmX0z7KXt(xdlJJQ)Kn=O_4n6$2J*j4#`dI2>n?Ohf$Zbm|AwG5>2t1yOS%h~%8|6a zeNVg|U9CM9rQPek$V?Dq*^%R}DQRMQ$xrsb(IGv}eUOd)1V1OED}G_f@%UP!CIEvK$~3JzE?YYA z)gJAR!DcQ|g2)bzHuVTZQy-Fu&f$$dl0>asUw_@9JOkm;lLQ2Cr+@l6=-UaIkzr(Gl@M%8A=k61a1j&yrw3rDp?sLe1 zq>y*z&roAqx4+fGP_c#jX<cQb67I%Iz3`a(@--m;AT8yL%ZDT!0p&~1WW=NUe zjkL(CJTRwc^mJP1KFsQr6ec3Mo{$yfoA|6rFjKAy;Wjkf01~3zh<93y0&QU$om4Io za?ijz%OcuysUK_hbOE2Y8TUp#LS9|vHd4e56NbSGQ=Y#TS?4P|$fhGH;jQapwcEBZ zQ38A`Ux&WWbB{`QRwj}0@yt+84Ax}$0PAG46-7)`b_^G^%+IAuggF{{j^*m%N-K;x z^A_Z#HJ(0^w@?wdRlH$wTOi!tqvG2Bc7KJL-}9``x?bMIU8gbd&^wyk7&JQq_$3~E zZ7q|w#m&rK$Hi}ziwyV7N8w-Jq3t?P;a=}kSg2S35H4LBIoN(BkiI+x>H7%^7vPL_ zzd%W4BH330fG-**Uqk*^j$=HYV>6aIhgGz?rv3R+Tt!4tK6LO?kTqBcrIBi)BJ{DSq^|56 zzWXRKE24>i{FAqoFAtW-->bU%{=Z(_Y^pNLbiZf~7b^MwNA%#Php;U*;AE;U{N0_j z(z=s6=b*fl|NOnfe{G!qpqao^DDKQH2P}92aQH5Rxw4^P*2eYlQQx)pW#F{OFPFQ` z<3Lph$5EbskwCbVT|aYd6-autXaODZ>x)r^WH>1Qi19>KN;(Wf>J39|S*8Xv`Kyw( z@AIx+GICijyy5BB9OtCc+0}Trx5x3E~59vqj2qWioZaoyn4`XJH0Tn1`Mr?T7kSa2IchD z7+kXwB!nfL|CbOTTvCdF+b<#F!Ln*Z+RLNU`A9 z;DnlNk%+5)sza~WiBChd{b;|3!6$SxnabH_>wjc3d9lxiFbm|V^0unVthXprmJHY= z3p1qWxyW4UzkmMNb&C1TBM6w%&a5A#Odv1j?0)2LQdKlM>mclBJ^yIeiLt!XGE~AQ zO?!hF!OJK$?4;-kigD`IMXT8AHE(a|2<-u0iyJ)a>QCAr5wExZ{)hc1Wg2mlYmpU! z5sc`LCN}Z-2>pHlq##N~$hb5BLIGv7*-2*lsa{iw35$&!Ca6jNeK3cxQDzRzpedWr zt1#Pst-=%4)M*8}ql@pmDtdjgT?eJk&)O{7aVEe z!#WfKSd9~*i!!C~3~_vP89G3k%s^U$D(|j4p)9t5trDk5PD0eVf7K(g`=q}aGK{ts|72*cEY5J{+& zx~)LR10eOd5^ir9YDA1t>u7jUUSNp%h4ZvsRzRi|hlY3VtW7tGv08WM1RNc%U_ciI6+i1eP!iPO3dwko z3dkrWKkvT^;$Jxm=v()37muBG zw}!v%mW|cmyHZ?kXkh{zQRU403VoxqBQ)1`*9bT<%~Hp>7ehdTF7ZCZUqxo_rN2so zWR|hGYWEhc3$GKs-;76SuHN9le7#>Fya#%Kb}bfHfcU|};hB@%i&U!@M0cUHGwQ_> z|GiojpW@y&uJdUE4GZ}o?}1D{lZ-Qca;|zunh?-lFF`3nQ7)b9M+dR{K7OAR=iZ3z zK5Xi_MG;+(Z{4sr+wFW@%xOCKsS?gatGo!vuDh^NQD+JMG}}T+ejn# zc)6+FO+LWay?%*@J3F4G43y1)qYP%W-yg3AhiMGeY?;n1Eb*4e4)Ry5x+LO}|J?7` zI4{y88N{zrNuYyWRR?x}0hpS>2ly}cu{ws~i zFGS3*aXqb_*6No6I2xefkoYbmv^?g8rm8aZv?4+;d6-5rr?-C!*M7IdxR&`I7c4P# zzby+3pv#H8vhkP8dI1}HUHHc*mtKH0)nprtoaBYCL}`Y`f=Vl*OX{9s8Q>Ski|lRr zD7)zEtPXTtM(iA(HpC0G4tJ^Nkg&vQTH!?M&R zIh979d>7#H*-HL?zQapmE`-#xo@RfQ%3J^7;dJItEYQDpiQMdHC;FVT;jFmF@}b?k zMBmT6nEQ|A^>^*=0vO6C!v-J~3{-&pacT+_tZz)_VTd0Q|JFY72Eku1Au3|mlTLLp zXYO&tsRxjB(F*c6e1e@PyeMH}JX(~I2I~3{B%-%*S936DIntdpXbat}?Q^ctF&}N1 z9F>^1t^}+ke>|J8R}@GFV-OgD89{IOF6G2&3j6RihVnYAHV3zb%s-!J8ovUSaB;DkXpVm$=AU6g%VFqZ1zS(( z#yQY%v!G??V~Lx`GtQ$OoeOr?4YdEFmB<$qc>Wh-{$ob zmpUzUx_jE;jHYAcbO27`{5UP)!f*n*tOiqb6*?`M-2G}A{KyEI`)87iUVi%kw2uWa zIQ3#hX}l>O^@?4ceea#S7GlG7ZO_2pjr{$$LQ~R^X*_A^j0dCVjdkdzT~Z4nU`XC| zPQFkpnh^kQ11Ov>Mtld%*Rw;lgoo+Kk_!~Kdvf6CHi77009f0ISpW45qT|t~V+Qi+ zzaFXyt?4vR$3ugYgNEBT+DG@>M?;tVw<=6i{WedJ?*9Hh4l4pss3||Rm;zz0^yRiH z9A`i6v;C%H^x)5E7?>NphrC<+`wj&gCQa%jSKpvdEr+=V%;Y)|W^zqkGcwi-6F=9F z2H5+R2Hi}vss&YPgv#xSZ9#%*i_C<`6?BN%qGEJKoMWb=I^XA zT`<=BF6OE0!wH=Qr!N6(U!9gYt8nEETwLaZ;X{jw06$u@Ov>o)44zCqoVGpdTNf;< zy{x~h8L-=p)Z$VQEaz#L1#8 zYjoLZw2agAS%{&}78P;y@Iaf$A8^#wX}dkK?KO6j@Rjc`rc?rp`@iH%S+=SOjnfox z4|G(d`shlDMPR&k$)8~D4f1@rx4~%mks^g$JO224!qIfSo&VFl3k7;t|Yw z5fis%V}$B%-yu7UIKtxRb=s+DPRst^4JneUcHFQ!1-=RhPrvt*`qa~e#mX$v^?dXw_w<0Ye>noUhIpnvO`7rB4 zKkJWd%kuNeNGW2oCCssL?+Hh%TPp+exP-hyYI65B_O_xEyTsH8rvdbv{GM%zeth0j$m=@?+x7H-DdZnqM3Mc|#l;J#;tM zpimEc<^TzGt?W#S21xtg`=EJe?aP=xel3*Hv;>&`6%Th_pp7kqfot&nZutZ4Vc}Li z=0Mlcy^*bS#kT~Nqj8QJT8g~nE3)Et67!K9F8L_uTE#Pu0gL!$K#$9Vfm@gHoI>}< zIn`^XzT;*=zo(_6YTZB#Pk&L4+ZTU@|`n`znD2SnOYzfF?#W0mS+IYW1!QKwmZKU_Vb>U=oDn7N~-Tzk9A zVJ~c}YU#iR$a}T*GzcXYNa_^D47~0GEWP335hr18d?LlWiQw`RtEJ ztPDL0k)=6+efGu1iw7Ya;4Q5p+WwlH)9ld_WlJ?HF^&z;@XXumor?9l8#^^tgne>$ zlEWF^?OK|2#ZqvZl^g_VpKF>1FR{98xoPQ&1voYd08_9}*yx7#lHXd;2pw?V_7d@?SpX+P@1P5I{NaL> z_lxb#Soave*P2WIAr)NbLCHZ82!%G)p&C9RLGFcD0 zy;NN9;)o9?IBxYWC)$6f+XBPq*3XMy&Q_m{HW`4+$=KV(DV|fgCR3okQI|6np6>@x zkA%tgqc%lZ-S5@Fg`u=TiG>ugTy}AW7Pb2ORDfP?Ut^TM^vOs)oz8j$MU{=lWT~iSb^LgCI|M zRHV2)u+E7yEUS$iZ5 z=+>0?E7EL{SlTBt1wcG9v;Ay=r?ikzxYs+<(w6&D`@PqEg#11c5`QZo`G&>)GEiYf z+VSN{Fv9x!Crpq22F95|XDnRO#ceok_bcz>*}X}m)q(gruY|Ci81nIq#OjZNpYRC$ zc3ejd%y4jRcIL_6chQCOhnxJor~6#10_bw)^gQ~-eQov}Lyw#1+-eYw^@1Yz9dUc| z%XLnJOZg0bDz|w(%Dkcd3_dHV-r=krRhN8;>PMXLnm>Iw;VqQ46r(O+(E#c737jsE zRS1c*Gj$v~CS{=6d()>V*NGqhh7zfvoKlac^K$ z2nX}K`W{@@P=OQ0C&(_JQWa)t{n{8XFuFIt)m=YCwXF!%+2M$>_hN3-FqZ zBe^e<;TF?F6bmKEsrcFU^Bhi+Y%2XNis>;XZ5Dl^P#W+&yCS4D;qSv|!x@I2Z{z#z zQWMP=BNb85pdrT^KiD3*NJ>H>53ox6pvz6W9Uu&B>F`qiCXD9Aeye@vjEtHq5s?m*1Vjybxlm)5oK|*c(eWeZfN5q#>Mf z7eAU&k#DtgtPT&>UIT|+jrq}7V#Kn>?9g{dje1N9;pk0x*bonQ;rb7uoH_r!1&*Ut zHKKE(oR{-()C1l|(+UbIDg_Tzd1v8Q%}&x*SQXQOuBX{17qah4h05tsdE(@9i)Mg` z9q>{_NaXQySAQcgSV-&c#d?X$!ENn5or$u<%r#La5V<07m%nc^lBTOT*GlKtILAbN zz4n$1V*_sZ&N#}SU5YYE@%D$n9I`Rk1tmZ&r+fd4g+@LUyIMcioa5jb^-On?V!ZH3 z|8|3w$^uX+q~|dtjj2t*D{T*mpe~Tf%i&C@4-jFBZUXeJ;Z^50x|5=!>sIdgemdqY zs(Y|;G4hFn5n~K++Ve1Ml|~1@ORa2+G`kPPCZ@SJnoh@Mb?s=bp_%X(2gA-)LWCoW zHtWiM0KCxCl$U)Lp^M>!?IVM~8AQZ8XWs!Dtli}Z{RpREP?#}P;6>#N3D8unfGXgw~*ln4ZZ(S^HT8*fOg%t zN1x=c>*MJ|;T3?p!8aa@dZmxs7hctM5WPw}5*lQUs=^L{b6VyDXyjL5*p-0Jj%Tzfki}&tBSgji!X@A%2;3n~9!x z?|T441jgTGn{ePLGL$Q`IeBYkJ8IhaY%-SP&oDnKxx}K%4SIubSn}EP{ja?l{(gw< zoY(`~?bbdI`YIKTfwtonx4=&SukZ!lw~tM2R73Ph#AW(5n6kLgs&il=kZL)*W;*Cj z;PXQmWv-mZ_v_Zr$x?+}mZb+AUMGZK>D;Hz++7my5$sTs@qks3wc@j^+ZAv%^9yEO$Lue>YytR!J1fBKl@<9T+H2%jERuXYoB@*u zQS85^T%jM3eXuPgVE*tW2FuqW^@N$)S0v`ti6(9r4f3?muUgfY+UIAcVc!PQ@QCA> z&(w6cU|{;VK~xsF>@N@T<$=6AQ&B=a&ooxeWeEDu7+cV%$GB6D%lX&`Jph+1ITdd1 zKgyu!yi4Ddrj|;CRp<|Bxzrw~X*g^Y5o6bepb9gIYjszrbV}eBh+cx^s?C>2KAtIlz^(mqt@L7oi-ZcXdt!Fl&;V#Z3rsFp%Z>Oo zg1o#;-+=3w>G8`epy5W;(VARtLvE{Pm=go8E!FN&_Y7RNO*+@u9>CR-Nga!vnIM&9 z$evD3m#AH#?0S^+d_A8^Dp*2$$)39X{zvj4u+(gtjM|lIyOF=R5h)qy3WJTHh_9@n z(?YUV>H%6#aw;i%^!$I2T56|3z*nU|qtThrwh~;Mcgg<>FBSjiB5`jdy*Vf1s(`KU zVf;A{OL{79-7h1S5y8y5S*O6_D{LI7TRaKpXzHLb#_F5CXhMD*Z?4~|b2v|L?N7rS zZtAXP^IgGa6@%>&6#clVX5yYv5ch`Q`Yo8lsox~u^BhMf8*qBtn<8{mZe4{T(7yT62RVjf;Zh7f=^)F$oNvyy3b%>yer-r?`AxVM0M6|_|#3t zP3ue!G?$rCGh*%uH5U+WQiV&yD`H=t3ORF+nVp*q65k&+9Q4lE@csyU`e)NuqmOM@ zN$kR92aCYh)8v%zFKpkZ2{}8fUt8x{zZ5u_aOvCC-3Pk<)9pz!aWIoITQPY79({6Y@hDUMWEnC8wdw@Ikk-LrH}vo)n3BWv_szLrulHW0R1 zH;PA%SoeDK5j7W3NkuJcFXbf7-5_u`M}Kmd$s8aQZh5d3Az zUH_ylGsfsS$JR4iNfC@Os&cZ(lW*9mXb`!A=TrXp zp6MSvregA9SMj%LI{Qa*HfpTKMDy zNsMrF(UJYfN)px=KlG^~_l1e+Gs98AQChkPeM;$?C$r)S`n_MUQLku8dE$$x&M07``J@sgx|?A%}` z2w{@<*{`J%Pwg^9|11y(;n+(Bpe4GHNYa)*1AFVL0g-&i;`NPX`peM1afuz6^T6%) zJ=)ptz)SEOt$ohR_jh@x%eI7R62TX~|3~GB&#=$_p{H2~R~Mhm9N~JsxYa)2{sYAt z+1=tboW<=_G`hq-!1Dz;Y4o6lhnQw%5>=s&?BqcXJDTn59of3%%w6R$?qxpeBkJXt zKs>6)11YLbr&lM5m_1T|zQgDoPtp&8Y~J0r;DYHo`Pdzd$E3Xh8T02qRUXQL80U{N z(3m(z1(Drpn`!=e+*IPqGq(DII_Z5)xe9&p$79dO4MRET{==v9k|kO5Xmu~QfplTA zyBE?P7|-p`pK$IYY^RK4n@y}k&{)SQc=>D$OWyFPXs+H%f9-eTO>@5<~Q<#L^x z@>BYgk|`4SAqj=&Al%)eh=RM4kZ5b(l#(f5Gj7}urGQcwX7|-BIvVRZZx8ItuZmZ5 zUQohH>6llD%Qz+R;xa!xVNL?kkc+|0_g!$R>2|AL^{@3Ltfd$=`=-3yP&R7XX>QU`!FBcB~$<7oP?{~pJvy>cH|{tH4kHygB!A#SGRaUCvtUD0yufyNA|{+2>|p1!rKo<_M6Ga zDH5vALJRa|zL)&ZxOhz|A`?iQ{W8F2{#mU`TDS3c##`+YaQI#;{g?MDsg9AyeSW#( zmwti4ThXgys*_T$b{?HF=_eL-9Y({1aO3J1g+kT&QZ%WDaD48^R+6e`r(xo*Yd>DN z7nt?qXWA}`a9=V^&9_=sGo$ZTi2SNzE33=N1!3KGjubYozl&EG1hu#gN4JGsNrI;P zkOqgl{`W#eW5tXMm_rIrPCur24?aYkVnhlXJsx zyVr)}FC$Fv?d-0YZ0`*Jc6|fX*)SaK9VBOvHVVu&=`yUooS_0L5E^!fT9!ZB%J94$ z`X`^mxbHH`6RB2#{Pd6W=6_L1{&B5QbYlZx&V_tBWnX1@myy&PIfypr<* z`ex(mOz8TIdPRYD zCjJ~@Ito)DzaX2J_7#bBs2I{U%`qu1~#c(mHFMpcLZ4aw&|j| zR+Bl_E~D4f_L@Pn!FfKI*-mob`u}2JHWW zj4lf%F}D2)6huFnMS+s-p4|I>4aE$zswQYwHo+HdQfjs<`b~qBI=6ox*&svsqzu%y zhd@PI84ofYUt1f}@psvHRaTT+Pod6D*R_jq-($;4(o#o9EuC@7RP`R{N?{>yKiv#= zC|J$1+)LG4#@yz-oKpPsyrb?n%y4Z51^inc|5(!g{;rvA@QjBLy>cziSW938{67gZ z3CrY4r?#b~7{tfYw;5b{(sM2G|9sOy(o0CYxh-A0YQo=M5=%^>AeR9rI#hB6cNNo6X~D?2#C_b zARTE@X`)D1iWCJT^b(LDRR}HgkkCTgGp_Yr@4L@?&iMoOwSRG4x-<`&b3SvpN~xJ|3h3@@MGoOyX2_z$z^uO40pMGC2?Ahk<%k zIFOVns%;I!2;1G-u@L4}01YYyYMS*w7Ho>C-EZYH_JJR8Lldg{xcz>INCob!`}F`S zKr`@8Hvw{76U9zl3m|}@6d~6-fTA@6x6B2Kmph#P;<;uU6i8&Zu2XbYo4h4}djI(C zKi=$KipQ|I}k$ASq(Re`o$rb(o^1v!ec6!}Jr#Q^RcP{lh4U zd3Gdde8}p*y~+jX&unLPC{`kEkyw{d=}5borbu9If8&(IE)Uepz!+pv<7F>b6wkna zQCqeNQmY$LMw@d{yAbqB`~idHNfj}->V`{qExnlPBmo7Q9u(5=tFFHVjt)C0s&u+-AgHL8F~bOr^TL@Cw*5M1{pdhHCY6! zm_=VMqqZtg`w)M>U~FBwe}4&7j|S%{`Zl}+aC4qTgi7KQ-u1A1zLncQM``gNzh%Z1P@u0adBuvT^SlYEQ{WdjVbQeQF0H(l@#QwK)edLdx z2MMG+UdAqQi8Hh4_9D-Al8C%SGP@A7hH*lE4eoL+MXDnJ%4O z+RL%fL239+ieEa7vTFJwfMFW-PTbjSF~Qr(Wh9fLHIgvkAtUjLdURPINZxY{3lXqb zse{C&n%mQ9qDe6gXv8<=E+b(MZ`O_*Ds%FKEOx~QK+1Vt zNWhz?Y49T26w${2Gs5L9aO~g9rYNSss&7m5S!n)DlL1|LK;9vPf#}fWo&kDHc_SX5 zYeG#eZ(Tkx+Ha8mau|rHl|sDd2Maze?{6f<2S#GE?^)2WhtAHkd5kdrv^~HoZM<*o z{=}HF-AIfXM-N<7>~8Jl{JZu3x0{Enpy9U83j|irxA2=ElAuZ!(j&|g<~;Paa59qu!b9ym{zZHrNxB&> zf{>!|DpDr@ems|krH0viZ}KuAbhWH3_Q@`1vEN^jjkP8|HjOD)G;=HsDa;V=&`W}@ zNBPae02a9qX}#KBRy8>?{A#-vVpHLN+pmN-4(w7bYEwVWS;KaTa|%GDBx)`eho1=a z*3wcvy20Zkk0=5h2a38f=V%6V0>lHyR6vSxiLF`4j+L>G8@MRlydb}IYbn;ld*!d5 z`BXBhu(Z;eO#XmBV|n3i@yY|#Sy~lZ+w@iF+ex%dN_%)JB|!_}t9t=}a&gYVoMCpG z{5$C?&q#8Xe6AJ`88St{q0z7fEvp{8YbVIB*uKWxDMe`4(|cty0?ZiH~v8dfYl?7@F07{s^m)0npF)-|BVWG!|19x`U?3) z2H~VNQVyCzmATeKE7kwt0#WFnR`MlECk20l38u=>L4$F%gxa zMm$Z0Q0oEGH8vkUL(bY1b|GB#xRju5kBl}IOaZ1Sm4f#CYWG;km9q+4&PACbyD~ZbZc~YDPQG7fQ6cFQ^i{J;;gF@G_o25B`Ym zZfgAADSSNk#XG_CEAL~Rg0ol_&r-bnlEUWhoonD?BREd2&JwiKsoHeSk@QIR*L>3I z)T;{rh^onU%kzDl|J&9BGyWN~n$0BG6TlCWKDY0+;VOng1BApPjemY58=qmB8#4J7 zPkYuC;aNp%WPE$^kD+C!W~u48qbn%dN(Z<)q+4C!T;R8{rd$tiQ!+Z>dgxKNxut41 zY4$bZz<7z7LH(miM*DW~goqD^T;a`_J6BOh7wp$}W@@H@Fez+uf~2do)*8`VgVi|S zJda*4-AO%)(pU>Onj*Sg%FO3CtZmH14G^#!&???QSnr*5VIZg1|F%;LuoYh*-XBGj zKD(`|7vFqwhjKL+RNQ1|I~;Vh9xf%UD6p`ybNc!BRvUcz@%I`$QLuPQQGIO3_P}%D zS3Sj3kS)-O1$mt(w}+>`Wi(YGtHt--x!REt6XzPL_kK<_GSt?rb{NB*y*rL(>x;V$ z%{n!A8*6(v`^(aN>xO1CA|~0Pn>n3LzT>S5$df?g|L-8w5LP^XyZb8`KE$?WdAbTz zkuI{it*E}G3<3B_PZvE`#bc;}&nVbU7BBx$e;C(N48(u8rM`6*Ug3=WwEQMxyCYh> zb?lLT95ss8+=qMo{S8qdXcK11+FG8p0P>oUK*Goydk%Vm-<^(6?XWxkxE3QXP0egwQL7G?g3 zP5O0kU!!I)rgvG+busW~$mT3w25Z4705YJu>3+9hajA2N;#^@`IoS|I+8r-uKuzpM z*CzF@cqLPm?tp&52cidx*is36o6YOp7k`8KVE}B5<|kwM!|w7iTL$v2K0ze`a1y zun-xc{^|8W!nQ%J$fmxQ&`8mIJq8RLamLIuYjqVh4N9qy-r0wb8gF$L)`DgwO8N8s z{aGKX#=Hy`uqC#*;0y>5wNXUEO%y9;xK0dIZ_vj1r8Ll8*)p;Sxplxm2KOxhWB^IT zjuyZOaD0JKgIDuE^bpNTMT+9+Hb|g9(*UJ~X293rfGMo3FHj5*06##1E8KN@b>VK@ z5O~Xg9_(*M7q~;O7ib)aWO_8w+5x(jv_u}i>TQ7HX3Iuw{c1f@PzEr=G-j2jtDN9& zoaG#PVdce;)VF~KK+e*pZ`_^BD2tl}1@oz0&SpOypAXsoi-lADIy|oX?q;>ua|J_{9GQKk1hQE}{;N1C`* zrEs|20eMrpyD5wg*Ce?!z81trk(oi})W~}s$r+2K?voAuWmiUoZopI!$z_iK&)w)q zRuSBNpW=>4ROD}z^N?bs9^N7`-c>3G$3OZIq&I;t?~mIUDBUZPbk7ak$D{1xX~eMH&kS4=(pOhv#fDkVX!JXQQI z>*!DVE=Y!Xkp**erDUtiLts>h{fbzSo%8Sqs){BsI@pKbNInk>7Tp0WW8tBc@Z`W* zY|bRgcgS)II5S+;_(nKBP2J)G(FLe)(mmLr7)uzQkjX%P$8}`#D;;Pq1h*xO1)kg| zXvElpdL~O8?&0sCR{T{poPv}C<3*Xv;dt!WHiyFF$RmmXWD4AGW9|AIgh2&4s>RRk zAnDYJFTbN2uBFK-^#lVIaFZU>Q9`XZj+4eIQ4t6O zRjhq~-Y$!!)H{_x%S_yf5I~QCZmq;f)CyDB0`!2a#_83@TNWg&=o{AT3m{!DYht#v zC(;^#`D8c_&OI>k;`G_*g9A$4m6Pt`QV>Y(wZg?>bf17?{qW(pz|{3$j;Qg$BXvq- z_8){!Jt%kmF))@kP>L`7ms|6NP=PW*tc*E>T8!%FpYjLIt4>Ctbeiw~I1@~t3>fmk zW1k|J7e`!5Ubc$Tn^CAWW3%7gnEMZfy*_Lx*O?hB3xJ+i{D)Zzu2w$W+`th|*a7U~ zCg*}I)~8dNtE&z7$) zg9uTst-_4)5Xc9@zpmDbs$7z)uK4!t+pp*{n@ST<*(Dusl^8)0qNQWE9N!+ZdxD?J zeR~9IHIx28lF89KRw;V~suAOlbu!E3f#aJMh^u+Rh&`&36D7;}_VZkbw4m;$NFD_> zM#!3oBHiiV?{ko6fC-Z(OT{;ufx1H{%P>ufcY>&taeXhRtuUHD%d;+Ed$sJWrwS%t zsn3%B#|ieP<=-=s2XIP`V7I5|b~Y90b>vycW;&8+mVDS#N9A#nh__)q9*E1T+)xGP*f>41eX1VcpfI_mr$BnPz6l z&d+7VP*6kp&USlXm5ul`AVgws#EWeRtOC4`lNEi8dM{=qRW)tTg7b^U1W#RY*Gu>| zRmsO+Y8yWGJw|}t@qFL3);LS$^W%GE%CCzvbuzKZM!YFQJ~tRiybK3@7uqp^1r)e( zVq=cI1^8ReER z_wX_KqKHACJPjtDqtdWylBX)_@Fai%W#RRMD=X_VPQ>tv7c19}@J)-|gEI=7o3Gwo zwVm#}uos2Bz+sS5JbWW$`)0|<`n&6HZUARAqr+Ld!Hq0-h`3?C7bQP&dnIu6SaO~y z{CL=V1|1Z{rIA8%8;vknYKb~G+vMrLT!z-pQDwAW<)9~u zT`FZe-GA67`we%$>ANP+@2c~j#eIi?hw-V@g78>hL%-b?&}{wjbn6k|iC=Y~yajca zN~t<+uKLA4P(#zFhLGKPyLD|?CHjsfu=S6*PA9|03XJPA+`VzB6?V2`! zD^Gk49Bq&}`{#tLR1enc1hoQpz*B+}?8ASYqG)9e4JLV1e4IFmkiD+7-_9mrGnJBb zc-A1cW9P{jLk&SwTH>Hhs;PQqS_4>*AvO|GkG-1UZFKV-o0mp-;4l?i7KqgStMtS` zM}|%{(laWC$X?Iv(J?Ag6eUk5A*XBC6F`9}9c}|{9ffLiT_g=wdMu;lVJ5&=c^;mv zGhvRP=Twqvo$|4pxHcw`YJDz?zsuHQ*obq@p|1Hj(MuJD8a?sYN6bTrA>*ZT;|`o| zI*o|ru6Hi(QGLXO)8@-o1GhfqJ@WT0=@(&Y4Z<$PGD zVdMGZf;2$bgtusFtMYuk^qX|w`?TjgDmW;xagHmzB%2v+jR3kjX>ko{z)Y=R%HBRd z=h;P!#Bq%+#}3;^YVBy!{;pPCt;RhV11%D`CtaAET0CRrp4E5u2uc3tN>}Fd6obx< zEJRDRyd4GkMwr!Mda(mlbD`G7D1M#*A;)-8&qxF_H8snY!1Q~A#q;+l9vw(TvC{0b zjbFQguGH_T7hfnXMjYylZZLpX<0k6cUL0P(K1X!r>LhQEg2AyBZO#XDX3n-Fua#IG zU}kT6F+7nu!F?vrYAUnh#!ptWEjYIHuPp>G=m%d$9y+hN1Pa6+8q^(F(;3?brH3o| zgep7VzkZmAGnwES+%xM%LX>f|!>aEh)L=luN$enx;H+LPg^dnQTqYj^91TxP|B zE@yB*mv9V)LIedHitn}mwhzdx+dZ6p+xmi8>oUu6?iBha!;@0iJu}-!DD<73QuGuF zZxXBm)iw2dv;OSDN*o{NWphSW)Zz#2_^>6rDUl_$ z)xT9cJ9qy^BhOFtM>cynw(89lwh$&9T73WrnZPN;g;$PK+Thi=6gGhoD@grf$3e(N<{YxFv+i@^*9a?aUiR zNp{a)dY;QU->QwQ7+{)2;sAA7mKV zA&(#5E^>5Hxm{y|?~MJSWaO!I^Yk4$iYy*!e0%k{Dy50NMoQ|vuVZwBRNL~Rmn}ba zAYO(DxSJOb*WSO>2|mS!sBAWpnfyuY@|$~j90X!{&j|IVq=G3LN1`3c^2sfmgjmU@%@qOU3r7FpA8T*OQzQ0-YZ_Mp7a&5n_|KhCMr>sMbpEX zlDxNk{eJwjmErpW%82K;yBws@N^y4SLEoYuyCC{lTYGxlc$P=GL0YrJ;s*Z$`H zTB&6uNm)>pI}+x2mf_fMt{<2Y^)B@u&Sa&AzHfdJOoB*Q?(v>7Vmn=1h6@u~-Zdyj zB4vp6+&kG?79HRpKS&@%DYf#pUcNtJH$SQ$Fj1f8%@Uy%Ixi`8`mPmg^f6rRy%_0? zvW)rT2d|ZiZ`e}*y=JmrZnnh#%|IL;j^kd?;v~NndQ+8`r*?IB*$TwzAHiyQ^X1da zFO-7|MA865^N}J-P)*sfJ|!t?I4rGCur#|9P(BhLrP6r&$H@#@=xH6BJ4zAlN~qq_ z$=5T90hygWf|!^s9iTm86C6qBGFYC*or;Bi7us}s%31v7L`te8G^(ld%2!KeU>4?i z*lzd$F?9#g>`uEbS;ZK(cbOigYll4({sbD6NY{!daixPQ7Z8()wEA7kcpa*QTNR-- z*Ez~no9SOEvFz?MqYNG_^K-2rsZz$zRI0}?Kscmx?fb!g^RA(dV}L^7_%m#jU!2@2 zRX%_Vxq$!@nNNb?h3F_&^JbdbK|6nQd}o1=O{ATF&V_!fopzya?$!OzQpUn}FS?3m z-Pp@*JJvh@!C7l$5VvE-CVfl`$Jh&9uKQsH>i7=4|{|A_}bNf8A(UbG8k{vu!=Hkx6VCh<+dehWA$yru;r)y#=sgpYF=>($c zW(?~ec*j5A-|8O-N)usAbuGN2n2SB=Nl@c@3>~@ZHRqEjD5+)7`ID|dNE|IweUMFS zSCF$tFJ!G&+C4MhSTD33^Z>g)^iOW=oJ8GjyvGdORU>;+weA9!7Wha0|679P#NS`@ z4UBma+|^BpV53sGP#(f)4$ ztPF77!vj`pgeX*wt_(%VaL~(*bqKIMPt!w~C?V=P08vB$xIieVnuc#KVIzPXULrpL zz#y6^jMZj9Xr%=)KMjR=;RnLEX=NB1dG=s$o6%*c$k}gZUpYTR*3sCaArMaQy+T2k z&~vHCfrjRn!%^U7>PL^D*eshtScYLGQ-XOLIs_%}L*cZ8p12Uf9#8|Erx04_ZtC;+ zNP}7nDCSVe1~WIZ0o>rfs=r$Iycq0x8UHN+11iQ_B#j9OQn`tM^9@yB)VgOzsp?S( z6#(QN?o*kJ*P@V+)ARmdkfZVpQd`#n0rCQclyaJpyDkhWdZ&m6Kt7OmB+Gh#-@$p% zFlJH{1t|Y$pM?rwh=&9<*aZdw0Bt%6uDIm={lBLWE>NsnNmI8#ce~pQpoyLzAhErh zasnx?w@=q5>_B<9-(wI6(4CT+sVvTC@bcEZz^pSPwiUdVh2s5BnctCz-JG~c0irdsx>+!@sgpm8tu|*JSN_6wm zk)m3~gMx4AYXGqzZ>?}}1Ja#emJSc1QV#{AOYy=ju7&zTz4Xx2~2tV>hi7-t%`kl)_+3LvD3YwAdrhpgLds+-U}?z&$Yw(puW# zegX3oPY2P)Q*q`@nzB)F>zmgar#X9OA@+PGO-adrs5d#;%ztCkF%d2g0Xjwjp_{m< zWpmezXR%y<^KFS4IusPS!mtsb>;*CJ=#PNfk^6X*3zW%Hr&B&MVmJQ=1aMI6zf82m znGPwnEA?6ZjhVsa@}xe_d`Pc{763d8)|Ungus?rc*pWP}6eUskhlk$1J7Dz>0R0PI zi;G!X2e7t%^&p7%u*DHjbTRz1DlfQdrzzx$_ea#uaE_!a6s+K>09m5Xyf35yMt%tR z5LXVyIIYG8Lh&f;IRAGjFCNn%@&!ctZmhbAQSj=8jvMT&R6XDEI^1JbNijZSC6sb= zHzGFi9Is)Pr`XR3wJSd(D&b_0v&A=*3{XYnDeHjXLXAtGoU91R__9ow^i ziT}^p5XyAn4+gBcFq%NCC4Bl8y$-}z+R#p^sT%1?Lw)SDP!pt4>bjj%6Xb_7q)JKK z22x4`tNbB+0|I0DPu6WHNC9II$M{9;GB-(BkkOG>&h#*(IDCg@cL+F`%1AiW1QK!F9$4qTPHP_T`TolT zAU4nn4(5z_sR>LK4er|;#?r{+jwd&}PaH$uY!thGsq4Y%0W>F3NwAs~D+ieN0}(gQ z6hf6?ZB&S1ycQ_)TE|c@JPXg?Wc)X#dtzNpIKKHmrCLzupXUTc|D=~P2}qd?=-?^x zZ>4w0z6cc@#8>y;xqLUUMDlx#P=Jx!>Vi=KHNBBosju-)|G;~|ilRVKu1#K21O4O# z5f;NCP^B+Qt<^6=vZP|JEmSSQzM?FAoU?w_GvrT0;%FBt7FQG&FZzMfVZNe zHRKCmCux5|CR)doVyX2udBvyHK)DutCrY0T7V{n@z1N-n5h`+s_jebomC1eKh0sdh zcqr)th2n4W53K+H4Uq^SCk%)~^Fb=`oUnzzD2EmxOa6PY=s)v6^-BN0=OKleeqj@6 z-`?5$kH?)XSTt6ZKjgpua=qK#4S(xkVw?Sl`JML8R#lbHhjT*L#lH_{&EEQ%Ch_Q{ zk0IN?>-YUJa1*5kwwFM`P`B~R2_x~rN83#KChp5KXYW6objVn)Jg28~fxJs6DfeXK zG!08?@*O_>`=)Zj|Nn_*zXUV52Tcf4tFKEaBGEC;LBfz{@+3UCelJ3PDqasOYHmw(RqP~rdk zhyTvqpZn(jp#yS#Nfz!!vprs=*Q4T+xcucV?z@cg;SLj&+|LB+DNGFNmD1%W`leiL z=|Clnga6ELtKgqfV(CfE&rk90ql5QnIXluLdXHK;IQR{&2Hk+0L5oqzJhS<$x@IDo zaTArNG~pR^web;p;&um|A_Z}@@i+PrkB2RqmHIOGUIkgMLWDt__S)3@U5e>SjlOsn zzPuFvmrF=?ouYLhA^+)_=0yeqHm5;%l(SkpU1R;w4)M$*;JFNTL)9qYnaWV0UgXBQ z0#A#ESv2zJnWXkqX+-wmnb@ejj@C4{-tKpfniIr&ngF*;E6htvpNG+5OktG+H}z=x z-R?jCc3I?tY**S&>bjb9kHHmfD%Cg-0Vz8dWA2Ho&yuJ%fA_Y35Ct_yAC*w@7Snt$ zAs0?2Z_e{38*d=E_-5IS)e0p(hf1;?=tRyn8RV_$z_%fig(;_De#+Z?uoKQJYO*Tx zFPGq^?Fh+GrcY;a@h=@i&$i0NNSg2OhHJRpN~qG)&pzzSp7+VsqRw@iK=s&(9?!`Q zc7vlGhCc6m7h6a0Z_X1pHh)i^N|!Am&d)~S$1*Fm#Ty6SxOfZr`X$=lslBzoXP@0y z6~>fD*p{1K+kFT7e8PUio`kasXyyzqd{e=uVM8uYwu+pcKdEL={yPxmShG3XEA8hN zaIf=%hRz$~a?HahRO(FBL~7W3Rrb|D-caFZ?Z!j7Z_kN5{qa;~>e1?=H^1+lq*^G8 zAgbZKVP+s%O^=J?MuM>f;=s{Jt-i!#}_)2 zZ$IJHk4N&yUMRe&B!Ax_Pp_T*>9Rmo4X&2_h3TF8D@CCPLBG*OUYA{rXX7}MXIv8! z3L5)u=6z=8y6Z+$%XBxgMe(xz8cTs{D<|-qeyg)TG$ec_q>{^uPi(cF@80{C02fxg zcmCL^2Zx+F7_Yp8(y@6d$WiGsSn(H99%coU%t`oMIkj$!g_FBUsEk|MTEX};%mG+#Z zTZhNaT2>iCv$MaR2&D?y;ZYEG@(e1J~*=!Jf+U-GFPf7c*L-Lnyn+3 zPTG6|(vjXbdFWbpE}5f&vnx*hFi?YM=5=b%N1`sO@bDMSA=y%8Ln6_hW^3Nk_G(}q zWl3=dt4Y3$&li8LvhG~iShSt=sD8l0+Bxp2cD#YD- zD%-Y0Ykwyb73q*CuM0a(yEukS5e);XWSknY{xrA=MEC25m34FLo+-&i>~wvv1CwMyPHq=~4zX~6Yp!5(eO z)gR$EN5mU*24|yfdwtxDtxUul1}-f;`>P<+{itBN^Py_KspF&US0#;4?xc$`YtA&Q zKiIy|gm`qGYl-Gf30A{JO<1UVNYIs8 zp;H?1-Zk2v#m|0`BX8jyHYPT0CG%@c%+x=QcYS_MQQ+-mH$gQG@@WSGf1} zTv<6$3j#solU^I{Q8c_81e|))ffHpIb73(rm9xDtk6kfOoQwh+;7!C+5L^YaKvP8* zWmou@#NuQ&9o)Cqdj#c})X^w_;ob{jdUr)`i~!#8XQ;?ux$ouM(nIg?1!*P~*!D;J znyA!Vc}~k7PK9a^nD#YU5=^f1@;@l#Scm7$?~;#Tl@r9{y*TEEj$Vo=MQbUfJjjxE9Oz#vcHr90iyk;rU5 zQ7S0uqYihb3t;*6u}|-12Rr}RarPX;0VYJhuwz%zL+edk@Q`O>O6;${7WeCFUZ7N6 zyOpzoPxIl&usr9XW3%Vc3$H!zqM)?9{XKatRq8!UMRJyW*VUUWdepPtZ!gsE#-!DAXiEZw&eW;v6M{kP5h$eGEMAe$)-*cSh7g5;7ai~PUz97 z@)LCY-XvpyLm8=I!G^T+!&ptH9FvR5RAAHeabnj#O*vPhLX0l^P1(zs7N&*pq&>E1 z(9QEZs6M?zTg8@f|DDbUj%nKk@kTMi4k<|HHu6Sn7d4s7&HuKNSb}1(`GM;p`AZpZ*f!6dGh{rk1|w3{BFOr?vmQa zRbjJ{FE;z?_P4Bhyrd8qvJ{YrvWbxGhLl_2S?{T!_>|NXmjH}P*SW9eI_# zu*_Sx3i=?td|Mp3wEW@baej5l7`Y?0WvCR_0i6(mlMQr!Iss$F@lB@+BUcD#OQ zn9vl!ihkTqsG?26pd8NzulNf>#bJt`cVWg654k$e4GbI#EfdhuB%O5caWw+AwD#~-lM=?eVA#KtX3ov`BLBzD(l?yH4f~{H6D$7(oLad&jS&h*iDZ*_O+B(zOJoq z8biF+LLU$5k-Lr!v@3SKCrn0+e`iHB7|tdY)I8Wxnrt{sUs#-%7`&kpw6~GAs-W0$ zw++-R*n`EEYJQLME&bAjrZ}eT5c>s0Uq&&#TXlk51WlQq4P{2|(sHvf26tUDQhva~ z)|5icBBM)GXjdsXN7(V(a7UiMh`x01XT*fIfW2-Wy?zm2336BpJ-G z0;;rf=H)imft})$a5~Fw8AJ1Bba`l**iv9Kk~h3}RP{O2QI}C1Z#Gh;AcynNFWv(w zX<&suggCz(bc&XnuZ2&SpyY$~Mrrywk&fNAZ9<(MchA^rli!ChI2a18CDr*$jf>({ zZPcLI`x*R)HtViSgxvx}bMM!{0|+#DPyOe4)d^EQUzT^=ZMAHsdXu~Zgov_l!YV_Y zeot8I4f0_D{zqhKb(1Ubs*{Igzt%#&dlk!&+$!7`^vXxagLR4e~F8!1Ssp zqb(~dL6aeZ{^ibN@$9eDaz}GjTs}L9=uxI7h%b|E4JyT)XpNN36R=t{M`-p2 zGrDEegw)>5(NRvOo#zWWu@(o9MqWz~w!-2KXdYm7`mn21@4>_*sIa%Alt|i)#Z#N@ z{Ed2x=X>!J#oy^})!;Av%#h18xtG74g;jU$tz5R| zXGBS?P-~|LiwMVIL!>-s)`%kJ(IoJ?Jxc6}@-v16S&OoN+*evenTK;}(W9RF8yD`Y zarqjfyulO5Dk`pt z>P*+zRcT|fbx*?b5V7log_p>~>L~o`Qsp_qL#~qRed+>PIi?C3Pmgad#*Nk==OiLR zzw|bt)|Qy{=D*`TC!u+(_PmdnzbaNM)}SFUn!SLyy_Q6zMv$R6n9phr1oZH5v-WlF zc1hMG*h;V3KwpCFFz%r46vi{&JJJBNateYwXVKK7{F|l>GtR6Z(HAc_->L3$36`q><9fk(izk`=LM@ z?l6rq!ui-p?^1xmgUNjyZ-u7<6 zW_Uy)toize1>9@!-JP zr^p(!-AJmB#T_27>8(vZYPg&443L<3&ukQTNr3mn_<^6x^TVV>qWPo9i*j}`njd`$Z^U_?W)3Ee)nww7-de}72C(4UF zz&Io()zSK?eAg3-Hq_tuY(`7lO_K#DUcnNx#SYY-(6^`4!1r@$MW|5|OIFC8rMy~M z&n6FROLvUtM zc3p)H=jf3)w(0boUgUqmdl5xkPlu z$aqQQrXh{l`W!{f%E1Mt=7swVf+o=_Q~SgfwZ&Ml88V>@BpR*6ws1neGhrWNhj0oBLKFJfbTu6ezP8>N#1y>mo+t_2w3fz^qC9%#g zpLWWfU27GAZ^e?vippo}zZT#EZoVE+c}%%&KIo+?k)7Qjo(}Fx&2w^_C$7(rZeqOl zf@bvwK3@-I@{Iry^>t~>kKy%Z$J=GT6lfy^k=Z_c&O6nl%p%%U6wT`(Zq@MI-MpRs zc&~N*UB1Fq{7KF-mAu3HkPwxt`^qzp1e-vvoei>|;HKsLoZjb4?gNZ^1azHuXX4y7 zE>kzj28msbrMezoBhE_U7ZH;&jb2ad_B!>FYnOV^b|&tmnLgpBS!JZZpN za%=LIaEVa8jsKW%vV^+v_}v4rMdinTxpoJoH&B`Xh6CnlQ=hE$0d%aaq{et+>0sPA z*P$}LGJ?k1FNbOL57O)3n;rf(!#$B=2dX2P625M)nRG0d?>$pnwn#^5gg1o$%;M4t zy!Z{(xp}FiOC&`Q5nSfoH$Ku#XD`}OMYGsBCkySZyr-AwxNJn&O~Q!QyMgHvXD(i8 zmx(Pvw0Gb(^Nj+(^55{C9f*%(n{2RFRbL$ySgFz+q>8m9Ay%osvX!^QspAhU@zRoq zhX%gwwMXq(kOSaQV0Bb)HpCD++N(06t{J1tkiI)(EBl|SEFv6XPr1k>E_791bTzvxtqUBn-n=pHM!tTS*@myN}PG(T~ zRY;o8UaE;8$3$6rgW8^6V!Uw>J&z819QFr@ulOF4;meSOxm45kmbL<+qq)cup%PV= zYcbm_${mjqEn{4YE$TxQr5@t3oFdpugPeh*d?JSycSg0+bwtVnF+^b)E7a(NR=>1x zZFpR-Xhs3X>%>%%oUWDIq@tOfVX5ac;=VLV^b{_7EzxG*??vuzazMQ)9)p{W1e0j> z^i{sI(Oa&V4ao=+NaqZX%ydUtkkAz&WkYe}YC1cU1)t?25v~GjaC^-|>(RIvJl1kw zy2)~|BU7SNdhW+!CFS(gV4PA9QGUjy)z^%hzw(8n4V{Nq-wM3^GEZl?@YXGf5*YX3 z1vtOM`=0bp3){TRLeF1FZwa(mV`bwm>Bj6STSZ+kr8AS9?EwYtvax2+yVadFr{Q_d zMuD|rfg|Tp?svYvi{la!yRQCycm-u*gLx{oD$qQsD6TyBV={O)7KTro>Mhy&YOn0P z(Q(y^$5wy8`nGXBRH%}*W^hONubFGY3$f!bV+0lOCnMWg65l700-MqCY*@K=OjskUW1>PFmsYu; zX10&a9R{fSyrNDZd}ngq7k5_jjzeknoW2crq4dX^X<}HBTsQN&tBWUha2FOO)Xl$Q z211&|QjK201ChVy4qSBi3gQY-NqpTO$Go0jd|7dMNgz~-Y(LbgxUHJItw>Jd<5HCp zBrom37UuLyQn?lN^9Y**zuCNFU*9t`8qq&ZIhE`k^Vk14z4LVu~=M2kB-SeyUTft#gqdL{j7 zXM|eqxm2M;<%Cp6R;c)tT(R%P~1dA@V zTPwrMo_kNX_E%V?xS?s5SzWRC1>Wo9Z&ht+;u%@YHE!w(2^bn45N@TN%wyr^&)R`N zi**gzG?+IM6`E17M0K0<<$@OYlNFqS=uS!2$7C zZ#UCoA98em1es?~LYL3EKA|V{_Ek=N8gJt_>XjbJaLFSgNi}UpP{yX9!{j_zI`?w0 zBqz=?mRoQuuwXo}u6MvdC6$OSs#`h8YVDm2oNUMZdUIAhix%k#C)m#VrYHPOvckJQ=$rnYIAxtzVsTmv;U!$wg1`RuAaAnGtS1UD903}4O{RS zNcZH!yfOa!1SHA$g+#9S;?e1qcwe4A$@Z_NO4yB#50bo#7lPd{DWq`VEvDDBf=v|X zEo!8;cGMso`(H1^&ca@p;Lxl*y^tBbO#^gpr45G2Ym_*591bfB{=(<{OU-#aS+$|S z8#2&~LgiHy2!Hy)4|na|<@TAbmUJ1CFLE33@8Dk%6|B5#Q=jqzEAC1SUdaI|86YKR zz&|SS(R`4|hiVO_rAd6F_9rl1KBZ>x*i>s6Q;?{zz6)=sTvK$ptyO7RUC*mM&l6~5Yia7JSe}T$#u1;0A zKCp|Z;3_N0f`<3<;X6yC@T9DL44cwrb&YxnQ=f)pj`#C*E^jLEbE7pFuM|4C2Xwd8 z6oSBW*qAh(M%P21WU5?ZTq?QaHi=>$Q2)q{s0o3lKnR5$s1NjvwdSO;LF~EUkAHHVc2=Ek)bjdxM zjGZT+mn^LozuvNn<1z^IHd|&FgnMvx#7AQVvZ!9~xV(ajp6zqMtLAVK-^8HI9HQKV zO{^wT^izT~2l?<{X|~^tH(G8d^JMI+k7vSW6E>k;N3nW`zh-|-ehuQ(vf3L@0887M z`!6}x0{_1h2AFvbFSa$CB^3xE4o8h`gM(6fvfov*#?5?2d`@%zCh1RjWvt^=6c}fg-*X32i_2fsR zMYz-O*`E=qa*Q2!2GIjgbbC$KcqL~t8tC&6>IXNO1AaZDq0E{VI>i^=zWl{^GPo9YpdmoO&i)U4bt^xGH-SIE;K z6PXq@vW|F@#OoslHon}d1Cyn4W80*E?~zJSQb3V%l7Ffp>h0l#k?%SNWb*bm36}8l zFMlLAd}W)(>ka%O4xk0qY_qv5Sa!b9 z_o6;nI^J?Uq4$QGQeT)*?C%ZJGFiD~Ur^^JrkP49Pjj*e4&xsxWmV=S%3XHRw9jSf zRjXETuHPY6Yx|G2nCx$0Kv*gDG3a%qqtUB@Ev@P=Q$&I`uSvfF{k->B@jcp1gI&|I z+piD(Tk>{r&h>f2pD|?Fx7>&$xdo!7q-uU#Tqh<(ZNSA;VdDl5k4dBPcc-K+Z^#*zJn8T-8QvQUlp=gYv@I_b|zxwD=6iP1dn7c1JE4MvJB4_bJr=N!S^ZLx1Lx*`5{!Jt;H zZ~=|2xn(Ua?_&C&s@ge>9)u;Kh_;Y6kvHR^l%7cXdqM&|US9uZ&x|CZ#=&$pE;apn z9#%M_XhSBd^F@139 zcbiP^Lp>Mc2ko6PCCzGGX;yCJ^TGA{ztf?NB#|(axLfa@vNlMhCrWNdYe=ryknF{4 zZHepT3t6^$wWiLU0`=7KrrQi?1ljbB%T51Q#$dGzM;qcm0B~G`fh;YX6#?hv>P;?^ zTV$2eZRU7t%#y!G_Z zM8toRpqb?#7%&@kuY^(%4lW>Qa^ybzEPs%x%#M{17);I58MV)AKgYXB+j^{(`!bit zC{=z256?yF{pwdLmqN;i;mLLAw0KWlK>s4z~oM&`$@_v zXJFoUUL)As%6XV|RN*_|-F=008Qy=Nv_pg=ALi-hE!2pFz8CpZ@o4+TapCcaWaIlg z8t#~@f$0+_Nom4lSl+tiv}J-KZ3V6U5S{;-#7ihgk+k=D_Lt-M0+sdiPN617LiEZ| z5DuGQgN#$Y?kv}Pre-f?dHBDL9pu3pE(u@QaAU}$f1rEQGISwwhj2jz9fC)n$UUFe zIy^`dub*-g$XC7yT6*)fWf(|R5s0^X%61vNIry_WU%wdOlupqBmk+(6F#&@kq(Xt{ z=HvD&Ia6_-E9X_iJ-A%VM|fS3CYA$VWZV5U^n81-@L)wgXe5#@T7HmV{$JZ22W(zs zeVlvjwdY>(v$lWs99=hkTV{5*z@pm8ebJHUyQ)-VFW2r}a=yJtwRYL>hL>)%g1wr8fTm zbuKKvvnAIO6l9s4cJb?yzsA3k^o-K4d}#@sZs0y=zPwoLN^qEM(V6GKk-nE-b#!yX zwAQlk&3Zfc*t^(Y87p5i*3QkoX?yDDlTckjr<~X~Kc^nrJHM~^U!|^D?@F6=;5fzC zV}}1_yjnl&=31#m**HZV-oN=$ogA>D_3>%9`BTbs?qFt2$vpPEwYCcu%fF5eK6Mqi zp5GOmr<&A0bsQ^D3#3N0s=%P21r zaO!wu85n&gQDS}fvy`QM;xF$9nJgEc_w9$@OdL@PW0yZFaJ?IQBo_l^)Levc)I$ztaD0e F0swQ&-4p-- literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/async_ddp_param_gather_c.png b/model/train/yoco_moe/sources/images/async_ddp_param_gather_c.png new file mode 100644 index 0000000000000000000000000000000000000000..aea31f2b3c2b6091c23896b00e4490d52283ad2b GIT binary patch literal 155110 zcmeFYcT`hb*Ef1Rb`ca20*0b^REmJ020}d$5F4G)0|z6W(2Gbr7J8Ed2pBrj5(olG zC_zw!1Sz4n1dNmr0Rz$r^$U2;^FHr=pKpxs-ZAdD|J*%>V94HU%{70&S=L;0N9pTo zbDp|z>X%=B;kVo2L>|ckx4Yf6X!H@-K*dLBKsOzZz z@=HYm$F}WJ_UDrxcg?+j`Q@C?{=Y+C>_ym*?-ktBR5yNN{e9;o*4N3L789}_ALu== zvOabCvrw%Lxu*8XTx?p>{42AoJt zHa^cfuP>f^zT%k#@M)XIQC{)4i4C4(JC=cJTY(Mhf%esP?}w}!Y5aQ?gHN~I$o>rr zRnr*&KGFYnRRSEp@^b(CHNtc2M|9-BU*At&c%r5G@7LN9&iem0jA&p}I`E16`_;kr z-~Rg-w;kNRzV{e!vO70&ZN6qQzGYHmZ=>DVbBkFmv-iPc2E2M9xqZ1^R*(aGltc{3aj%c*5a=@4(oquW%l2-mknp+^;lp*-Jk^;wk89i) z_4`f6mk)uRw}(ng{|s}ZH{Sbf+f>+=CMy`?PoR#G4Y%Hq`C6O-DZX>|AhrnZ=!d9NMBatbdb)h8E@@h5XIw>r4Mj^>s=cAtE%kE z2t4dQr>3P&_%Rkfbwfbz$D>x~o$~i<6dv00Nq-Z(9g#n^BwoD46S8?9)37<)PC1e} zARrW4DZ2XyN-0ZcaNzVvc|Y>MOykjFu1W z`2}uuGlg~`b8>*gG6>I4obnmlib`Cbkf)By&R==KYL2(4?p&X1CjXK2^haM6y$M?( z!`x@4`)ExQ(=F{N9N%9LhDJZ1tDs0#}afd zRNnYwda<^oQ@1znR!@U+Ui$8bKSQ$GT3DA-9mv35I0?Q*il_?FT<(4XNWU536aVHi zGf0`?Vn{-@rU>?ee}Bt?8C!lsm{aKfAi1wCZ#ZKx!Oy!;55m#!3wlba2?ne&n?^XV zoFwu;dFBxr%Ip3YXhA1l=X6hUIziTw0{=DhI2m3$2-af)YqKD}c2=?tc z1N_#zU(kv1KG%g2O#FudGkKXJb@7KWtKJGPk^4MoZOD7JuKBNtP+AkxPRm~ghFud* z`Uoo|&@C9sIQOZi*1Kpb%B2^BE~P|n8FWfPh1DtPo!gIZY}4cwbHiXG@gl$XL}F?BT6oo)$k6 z)uwGnP<&tA%3Eh1QOv)|udIOu`mY!fd&8wKK&_Uwccf}Geg=p70&YkKl zjUPF+?PVR1VQeR0DDCn<8Xy`Zf*A2qsia9?R3qM0d`U7hqKT8B^9i{jB}C{Dt3-t_ zf?UE}414|^;Peozt+g%2(dGm-_U#;J2{{>bHDCt|5bdMc2!WGGr!9F>X9#Z%BZ!qX znegnE*S45QqxW7<_S`w&lXU!v>V~CB5j!Czk)tbnsAZ@5hz7ZM&eU8^uiCSo5kwcI z>F{_O;*;db31wA(M~6+Zb#t37?Tykk1PN9Wa0=9-GH`wlP0bvT*HzeIM&k<7KJMcX^0xSwrG7XUz*` zJ0Mr2X*U3~I%k*ya)YLG!x4_2G+vQv=WU+>cV5Rn%;ceQ?QEZ(*Ksu0Ke3adnopT` z4zHFpnVh01Od)3!Q(yT*X;#v0v_Z-KRSc~`71bv9c9!2}b#!1ryKGCk6;bioGw6%i zYE4raCskNRofqW(UEO!L^^r0u+^$gZs_nqCO2F5_p{v308pUuA2+YZFP)9?EvR)N2 zk>)%`-NZ7_FIuesx#>$65Oo{kKe|i^UW!n^Q&lA=BdQ)Qqs!RnK zVCnk>%MLnhwe?3}3aj!y;X$H?4T zzS%O)J?AN54~Bj%$%_L5c6i?+Kk0rhdOyrqZ|K3Dbr#@>*J6JuhIWSYbwh_ukQO!Z z;x4!+Y(G4NNb*x&6EB`HJ{B%U?XG|p$QN3z*ggOjc1nKZ7~1qL(IL1`N}(m>Pq!4= zP<0Y8n>x?pkhyWMu5VURzB_NF(+1MWdAb4l@P~~N&p})P_9*Xrx2>zh0KgCkFfC=g zxXgP#A9Wq#oUrIihCxN3!HpbVmadNkv*ia>F#Ms2G--$(^2{XYrl0ji>p)ji(v%`& zOPuuk?Qrx4RKOD9EL{|g>ikiP=hUZi86~txblzuzZ{M6AHAr#U$IEjRZ;R{-NYf+X zTZTR0bN!X%MFPTWjBz(LvGCkDWzlRkU_g7t6DSSzjJ)Zi@*FkUsOVtA_`$!&WMMCK zjWp`P;FX*tux)Xs{Oty+k7;Kh(A(Um@w-Ihb&B19%Q!SA782Z(<<7k*2Ti-rT$C$_ zyWAzg#q6oG(;}F}W?n-{4S#AGIRXwd_pNNyFE`N%KsH)(0;p_% zkw=B;4_L#Spd1RIyG}-YcPlO;sA)P$AYt3f)^kI+X9|3vqv3p|Fz0=6cvPuBV5d+D z(49P22x%}`?lF{O3iR+F`-(S`n$90Xt2(v6cLuVJ>l9p`r?S}c0klh2)cQt&&~uQk z?irN#XZ|=9d#X-j!=ftgx@*VO%8-gb&TbAhxo=8QY*QL;_8eFP1j7L4G3oJbH)7U;cViOQ_(>y#X5L--w5O--a86|!mW>t8RNL8#v`;s=zFq5m z+%fed@|_eKZ<#V^Ou2=wn+_tk2T`U62Kd;^^_?}eADi^2PSthARfMmaK#t|l&J+AP zdP(UuerF6Y3dgaGPPV%UM_J46QY(!oYgZkvpDUNT7VUErkye4~eRDr$o*x&$@GDSp zI^NXHhbG$v?^MQZ6N@E`3Vh=h=1Nnjb<@ke$tn^UUFyfxh2r8ZFFeUl$kzv z5UCA4r_oz$qM$zO?P@H5T#?iE)uaSw)}oPTh&F{I~ zXN+SJe`9l;4kj0lmQ>j_7uF{`Pl`o68*JRSD!^*l=5N*7*GpMn!d((N@$qkkjCY9q zSH}Ka7|?i+4uO}n=_)Ze+c5Yp`U`xv0W?9Ptks=oGSTZX3adY#4~jbrS8ThZR@Tc^ zn{fB?&?WSKnB4h&)VGUmUzbM3uQ|uXQbkj5SN8}yZXGJ~vXZ@Hxu>vjB&8?2;v6goU-_(P-S&v z3{)HBosS6YUo-cuuiojZmD{QgKFzF1jZxA`;-H6r#!DSW;CIML{@!+DTQA!0R5gT% zL*w-(Zh2zzemZqmghBq$el)~7XSGKJJYWfm0-ns@1NT z5^1yRbcF%pVJrv1@21@;=G8;+vOJ*|Y!=Wokw*}tv;2ShJ&STI$H;#x?_e5w(N4=#uz2Y+U78e z{2m&!puzoaO?mk30olRVE{^r4SjLrpx9_eKqtI1@0X}57pKA(9#jd!?C)0?mgjB@* z=n5$o0=(?n1*;-okDP%5vzSEZEP2W#N11nn$ZX$Wq~i1ww@*g~p4dcG>{#GNx&wY) z6^el{2Zh(MMmMJGk52W(lQ8&94PBZ3_{ki1<>SdWBXT=FUS;&*6NvFuAS=sj#wjIs zJfwxfi#Bbe5($L1&`cLb)#LB7hX)q3wn~`Ue)Gb!j5W$zML){2{jT?komV?&Ne@kF z*wqqZFpX8LqH6G9PK|DLKX@=#$9GD=5$7H4Kkp zDVo0JYG>g`-B-b`NVgR0p^18?TC&ql-RzC4R!~f| zpHUuK%8UOz@ChQb?zDfm+kGrO>*@A1R)1*m7v?mCQ1ScbNOq-PkK0A=>ML9MmEf|E^CqI+Px3pHOci_iwCLkuey7OuGjV#?!< z9OZ2ds9bMp;R1ozy~sJjZ=>#_r7S^9&^ukOzqjc#1mm$cjmlXUiy}6s!|}on;B3Rx z?=}OR;Y~P_<4}#$Nb@2=oT5xbp*sidQasJVK8tqdpxlQRKM%Wnnw~6wGwE&343Jp& z=B<{{4z%u=Y*2Jq>%4~;b3&@#&7Mqt^7g@opa^|aV~z&RLLok1yErx)kzzqvG3ivQ zDyc0kw~<4(ovG&QZL_8LJQqBML-4?JA0(?369d+|ZVXc1K~;Y3C1x*OTAD}79B!iD z*v9ZvF$2mO=Kk|92FqQ@XZO>f892k_^O}a=-C!(WiK%h4au4roqQ8Jr&h>j{B;{&h zN0fd^dgvbSX-jRCI*o`_-9f&<#kzPw$ly)Wc)|53Z-%tAy;in^-^Ec zhKi_LVnAiHHO>h@{`=QjZS^fNcsCgRSbU4$ayDoBEI*=82%Qm%ykjaPuS52ZMukw0 zKoBEq=1L$79CLWH+}jF=Ob@8Sjh#nyWkTrlhL|pWmIVcmpq~3SG)Xo74~2jUSC_)JQ@g~GL0cLE!}8IsG(9EQSGuxyE-2nwoT)txHZ7cPYUHiU-_ zrR$N_dh~0)M{E*x3Yjq#vl0x&jH1d9SBbXh25$(>SA6PSvCxCEMX7{mYgTED7}?eD zhWZ7GbClk}_p=akt<@y%PKh**h1(7GehdUvGt<_boOVn*fvurQ`+3s`=_O#_-m6bJ zctqq-_~#3=BykryUU;q}Ro!wI%TJvyD-UM7eSdL2F_r9%XO)+`+jC#%Dohj(|@)x8E)hqQuo<;5=S6 z;%%%1r**{ds%1;z@<7XvW)X2lG-7*Wvi8f; z-S1l?kXTzZ2r|&ACdB&6CIk$6vgIJ}X^AP5|H}jDFpn1A8cM{;* z$6=2@18Hh+mem%hS~jV+xloKe4gECS3yeO>N+!fv{EcOFy-Gf*aO70Pt?Ik`2|XZ=uGwL?y)AqIi{Ezu?SvY9Fkw zpbAGcBR=m&Zc9?Ua86JX)OYlB0nff49?};%iU=v5OLTn~DB0i>%pga zaYa-qV{&%-bOkM$X^xE7YeAn2?S+A`Hqw0Bl3ir>r*mpKK_h1jsz<*BSiO`>2?a$y z4i}~?6xg6E*H>MDuUF&{N{fUT{2qAiv(mc14Z(DgaM7BJ`Z`8AKe(>{uRaKrU%`FZ zX|8J_y1`?hMj-I`!c}%T0TN=+0IO_MC~bO7pNyi_GPmLdT5Y)(Zw@H@QD4m{_NzBx zyu&vV&Kom2f`nb2=Cb;puknA%_L^SySkp#?l!XrA^n7(&5rnAa9*F?co!Y3{QnZa< zx`=OwEoJwN7k@wgwzu($dO?P~Ep=sSF--uaeVe9}F7pEP&{QcFF(hI2E~@qne>_^hdW28T#=`jkZv zQp6{OQ?762RxsxAG!t-0wT@dp*kH_H*tf%&6zH2ti@!$n3ml^n(PaODMH(sD`zm%) zI)Q=545XFDmw_4w8|N~92jXnE+El|q@1aXyR;}>u+nbweNuP2seu5AvewP3F>~P1b znSaA+$|Mdg#7=;CrTaE@GPiPc;F+oi6@X9l4%2mJ81p=ZgH-`&VQS>N+!S{N=o-m1 zm@8D>{YG(AScttnZ|t9K!ciFA&>4o`F+br}C55guj?rww#Tfpb>I^ozEY~3*{iOXW z0LgPeBY^6G5QsSaK_9_o41XtKdfSAu2@P25ar=>rvOzBy)3Y=JGkk^440uxU!Bno| z9iBtRtHmHM$_;i2mAcC&abE*$CykBG8H%6d4jLZp_~lQ05ITcEsq`K21WbiEQ5|U` zQ=Yt727XQ11aT~0Xe$^y$+g-eze~lULPX@+Onmd+TbihlHy!1}2%EUN5T`jsx>?7# zetWks6t`%D3m(8D6|}KVCIQEdCe7}_``bGUt2D^v8<#! z`k7UzWet}bzgepm`3U^*7VP)mHhw6i&8$cyc_U4HgW_e|@Vou*ovV}^G@v!&*qO4w z{Hb@K*`;!+zN_uh%X4#k54m2T;ITg3O13-L&c@vxCP}<;zOidipI`#NhFA4;T(z5c87Z1<>MhzWuekQ znyFJ#4y8UGyJUvmEZR?7(2cTxG~3@7-ZJkAS05s7>MJ7Bb13JFXm?|R@^?_3rK61k zO})Ybl$tm_f&9*`_$bt(>C1u@?2|22eRGu3!N=*+3V63EF(}-t=HmjE|pJT?2lg*Cq3ed@DfRM zfDUDdARabW+Y&n}4iLs862W@QPPc_;Xt|z)ucPXzH5Mw5}Iy^d^1Vp}0=YECmZ*#mjQ3mm?j|0>4tP%!gvU%>gI5wO{!IgR+ zl}>r##!-G{O?HE9ql{~G%RWeGY`yji4QvO=Y<3s=~JVp-Mj zhWqZZsLprJ&qh(sj9G=p24msvZJu=?A&;Wnvd{+u+9{MuQ+ifg+GGVUb5st`RaQq% zN5^ZoC6>fD&ZY?;~)SuCHI9@%)Ebf*%(i`jTDCIpvtOqD4c#5A_$sAkaa!!wikRTaAt@)GttZ z3tB+@J3=|WT`EwP1a!4yb+|d`Bn%-OtxRHjOH4r~Rimcw3|l*E5JJ|OpT+PD)@;N3 zcR~+*douSH%?)51Re$j3!4!0AH{1Zf!IzX0RuCo^wY4gd1hwqw!umdK@i^@LpSH^Vs zkL9#~eGSF=Q?=P4ELWLC=7ca`h6a{Z9t1DWxYgxb*<3IJbjBEQCzE&~z;cisx;X7F zcflA0$wK}BfvN$VZ{eKTRW}i%wjMTZGDhSXy{YWo@0V^_gSer67&;bJZ*|2L==ifY z?-ON;sjI91N)?d`$4l!>$a5ccI#b`83&cQr{NPmFsiDewb|J27wf_3POQh1h0ZH?6W3M;a4`i5n80Ph*5C>jhUBD+d75h9cYTu0)#*~u7ZAq^AWC3EajDl zt<721k8AI(6v!SWYU9))Ds{7??9F)=7v)2O~Lws^*pbAP-`4LYA}-2j%f-|i$!$3UwVo@3sl2GXaN;cz(#=~=$l%Z zO*1>?zufJ=S)KctA~IhzsJ6|+p((xt-lhZEl`wVzTW)}OHCW}?lH-B=2NP^19{pw7QIv4>eKoE$cJD<(?1G7qcXD*P^ z#k7I)=r?+7BZujy3gRy$!AP3@`KvC=E<2yA=AI^OgBfnopdILPVIvyuHC9_11Eb3q z?p=ydZ>AoBdBav(^i^^tUjJM@b7Hr2Mxk{OY2qp$-JN+2{>(qnvW;26gnec9v_t2zxV)uMB6tOsW^h&U1lzh^)RPS@P0!#UY#|yk0)r}Tp4LLNA5RmU78mLiRGdFQepD$ek2&McA zU-txp4sHZBa|AtDB6Xb7Jp7kuMk?OD{h6PRprxnk8;!t_4wrGOB18rfl<6|MQ8XU)JwGSLdWG3(lnqaO0+fzWqolV&| z?xRI(yic>>^3Md|6Xdx+m+} z?H{MQMe0w*s7BTDE;i%W$YUoC>gJ+I`oq)gMhLGpb`wP+;*-6*YS+b4KP|gvCeNKe zxVdXJ1GCs-b+74UZ!aGdR9pS_568W74(_j#Ic7Z%qS)1HYOaN?1(iHA&0!*?WzV>b z9vqpBaP9X8aGn2W#qC11^?5G{5frbl;n@pE&U4Qcs`#@T0lpe^z(VsT^o;6X45{fJ zY)4!H|D!+S`7!GQCI9myQAM4t&wsyKvvPj+4eVoka6Q)h`zU|kIUQV;*VvN(GsMB~ z|A*}&|Gzl@C|UF0n`s!4mhSI;O9=CIbIm_nZl=!tSMv>-8z60EcIxtis>k9)OUu~2 zUvvk?sqHSdbDmQDInY~V@Xa5U4P(`E<%fM^ zH--N)u-q4ce1<(f`A>+|={v!R2Xtt?^hqR`-4nB!WMg{Hr6{=W#X1U&u^9FE#Cc6R z^`E{%MpF=m8k7%dsgJVk;0^lO;2NoLD8b7*MW9dl8#eyzn5#lEY;Y7f6aRuNHhMhtOHpfC5#TG*lOqSv)C zqsJjEKXtXU{yaOE_7%GP`{DBMv54)PTcv#W(bAhFxq%Oa*+(*za+h2Y_ERSxEfS(Y zQOcw#%L?Z1flVZKz7szzGqhWO?AqX!T*4UBEprZa0%A3&TEW%s8F0o0P^$Dq+a;bS z?|PZ%UtY$xkbs@P#_HMz*Ef+8Tuo(CfT`PJyQVh5t48-kSuR_5VJcW;op@^&)WK_*DB`9117xDJs}Zi1QBGw4bHe?4vs!am;!|0J-h3 zW-gVCQYe<}QWbTxK0STQk{;&uVIoP9;julv^Ii(EbCRdgn^JMFr!dB&Yqi74q%DU2 zt=#XB(8DpolcJ~X%<_3(j@K#UncU1?y)bubP-x_?c^RZB0mnUeY@#yDap?TO z1@7v4`{*ZN>0Pk2-HFEg0~C*9SFrA@_B+ah=zutq1EC?>hw2A`3QHNt_ns{5PxZB* zdcYEsp_Atp{9viezq!7IY#bd2*}Faz_onmx5p*VWmM1uMnI1?;_}yY$#OW+1?g6ZD zUVWcQt@UgVr35$PSg*1WqX(VrYLeOG;I9_ftapB97UgAMDVywtjWOn{Nw*5+BQ6Tj zv8;F?0ys1Mbow2*^L{|C;Ea9sA1R%Q7q?AEl7E6%WoI;}Ij9kU;BBsp!f8Ax<^6Hl z>T0=xh1y`uxOPKXMKY}d1P|{0Ysdiha!=6?y`5)OW>cE|<64MYyL0eplVZZg+djF% z2Jjw8tu5My`a02}Uwp3zT=Z*#X@a=a`5$A zKibH{%}qOOu7Ta=QUOCod}fKlPSwp5c)4(m>yJNI-?o~5Iu#dgb)2uJcE?NUQAgQq zVqaNo!MN$b@w!>fFFhg2F0<2jryg8DWUnKFe%jkemH1(I0_e1vUh|t=82V)(j{Ikh zB)0uss|EaF19MZNytD4a9<+UsSB8<<+#J4**$2Zm|Cy;D@hacE15Q6Wh%nL8^w$PFsuCNc z9=VOua<(2d2{((}U`cc?3p}#-cZWH_gmrny{I+UIG+3@M2zTu5BNv zM?*JbZ$k(sC)b^W53@dNr<=*+^>mzPEVQR_=78}==aHqcQYmhkYVBm-wl^T2xjN@E zurbZ7YSCd*8nya(Dr$4E0=5ztCmx(UX&CR}E+Rg5ZhYy2JcIgLJaP?1vDl|HSH!#| zG?BAv{-@e$vt($iQ^J)dQ@awra`o#|vXOoUQW$T62uk^{mt9=>jM*|1D0sB)oLjtR z12jFyWfHPKjwU%v=facPH3!&7%B9_mHv<@G-RO(@@V1xK?mCo3s=8@i`VSkNYiU($ zwc*RpP7#6GiMRwzU6X$BW%teNZNn0~QyD|Si4HW1LYg8dyzMk}RM5Tk+e7mcsdKlk z(i-r5Y>p&;%H4`Kc>Jl9;Gu>NGmMW2EVyB~R={4@y~eOxi|zC?d9l|YQN3P1`q;L9 zaVWoCU+AlzYIBz_2b6xQ5vBUO1#~pBn$3{rg*N%2gBYMm1|VwCm` z=rn9zeaPaGm~}TUT%hWXKpVW+6`tHTZ&4msfX3V~rnLsfda*W_PMZh;A$H*s^J!1i z2d3c@2_@D0gzLy2m()$-7n}O(`hrh1ev&??C>U`B$Az&5gyZ zYH$8{w3pTfO0^qWl*=AO{>J%G*Y^h@>+!m4(+8jn=V||ex*V6zZo05f*_9@yGVLRk zw>$>3N-?XOJq1~X-Ck%=esMQ&cAuEWp5Dp}2PsM-qBpojtkQPcgIzO@x^=IyKqB?61PJGDwRPaEBBYDJmsS$X9r0*)UfLapN!Cl9+@EttBT8jK&OVFcLVVQ`w;i@pOx z9kJ&5FZWP(=t0-um9kZ0H_rM+?j|ui7F`p(LKo0%Fd`9A!OiHnreMc7;CP+2)Idwx z)0lmu*q&*4bbjQ20iO54uMiY!?})RuDalekAwpHbmTL) z6${-t6^~7Dho)>A5=XPJHXN(Yd*??}9nP|Jg@AJ_+~c3hDUgdnQ)90s*KGjsS*e3q ztQ`~aJyz%XzADep7sE~*<}hqxw;ZU7zLy~5hO7xIa8FC_824F=760@MbBrmEDw8Y(YD+cW<*I-b+4D8Z}YEyQXmrtV@BcFOPDqlX9Ve!T0r_$sp^DoiP3K;vX} zq+Y|+y(gpQKgO-4zSl)!Q3?T7e?Cn##Te~Ic)!fO{<59k5nB>UmY==bFr_7&2nxVW zh2-jOWX6pLdk&2Vp4`LWMT`!b-?+g*P=)T}%(Hh0*9UK&ZKU3Yqz2x=zWyrRRd4_K zkxIYaXxJK;VRE}ctzCEk8oQit`BUZ*6{OX*N7$)N5gl`6*#j~_=hDPGVal<$ z1m_X_$Fxm6vo@;1%E48$9=avA{t>@vzVC3k&PWMY-UQWpoFTIb5#Xjola0|I^$v)v zk_LB{%+OWM!~A5~`$P7%YzLgv{2AbD_rp{;hyTGR?$0&4=MVmu9>1R}NssY{xh?}+kG z!LrZ%E%RfZ8r_UD0hwv5-69*GS`t(j^!Um{CkeV52mSo5b^8*3Y|@>6_V?OHj+9=0 z%yz0gRrBkSsnQ`o7<U^N6Pk&7FeI%KE;y~WY3;XlG zdy*9J^=~bJ4!ImnKaMWtsR|PPx!rxTpU?d9@#^x2f0p#z`F&15Lv7kW_`=T?BliBo z&cQeaRA5!J?=K?-rAtpf7B1b={wlLIPu1N!@jWT~K*yWxRA1@EQDnzqz|Wmd)F1{gS$))l*s2HcjoW$TCe``{^If<9g2y#@i@KUfr)^|<<#AggAVz} zoCosBUi&ZkejVhJV>{U>)sOe~$9uf|pOx0E*mF(c=U!Wcv%d1CRZZ9Ne~D#vjeLA} zYK|x4Kq9{vKY7J?@h`Ri-zCfct4#d=dT{p3Az}2UOud3+!~5dwAE#<{jqi)&B-z^S zIOzZyIccR?>tTBE)qU|4x}t%2K|hB5mWkZ8jFHFJJh*W|D}&`CoM85&&JPq}HGmNI zz5Rt#K~(}jN8-pNQ`rV~vngH2?>UD*`#wVVOw;=xM(q1QR~X37R^L3s`z5e%K3~`g zRysG?wUWoGMzSE85~d5756fnYg%d|SZK!1>;sR5J_v}clWpkGQ3`I{N-ha^(7wR~$ zQWULhbxnf(2tLjI6+7x@c-SXpvh9K`iL_6hhs-v6aV0uVNz0v_XOwm4Hn#?MQ%qz( zeA#?EsW24ukSS6(2L|x@ZxKinfxPxcdCC}`6vO)n%MSUwY!d1!RDIU<7{{7oR zVn4H{*NOyzM)%xH6a7)o>2o*(b4=)bSa6Z^W99%T>9 z)F|RuZ_;Bco01Gtq5UxxcZ;Nq35b3Mn4BLF@fFM>giXfVo~~_n4-*+@2@|a+u;Zxu z!e}h)|6qy0WzKL|Z*z^}isR$cr7wl#u=bPlIyQlyrG!5Qk#5R_abEWJOCZGDd$GXn zXC$gN6vc)CFEtJP7o;`&XyZ6CrAIl`_CYnG3+W($uz-66~mtOR2URze_wj_O}s7?FUKBXOGX&2d>2=I8vRV8+u`N{in>y3TO zL-vBEft>aG=Hu5K^n&fZreEGfem?@3XaC7Bw=XPkyok+X#C>X-yPw@xnD&Jy`)ta8 zB+SFM8=n7cH}Hm$lxNjTz;kvNEb-!^I{)B}qzj6%jrj7u5&uubi9uD8hh?&je0PFL zsfg(Rq;8~jO;v>l^O}{}=x#DaE#}=&P)Ei5Q(YnZyUmmDjnKq}KDMn%Thak_tVELS zM(MVt)`l#z3gbvat2rykG`$03b%E#BQK267AB|ZFJuM_1$-}GgZLU=V^b7}7x*qBEq+wDO3JIvZ_q%m+EgEt68l`+mM3VA2)5mMOV(0Eo)J;2C zzDX`lI?s+8-W?*V>ow}gs1Oa(y}20_UB?#-Vcw~C%-2CEzcNi-Dg-o&TgFqP*I;&& zk`(+BGEV^`=0Z;pgHOrdB5QDm%s!!B(-M+wM4()(TJFr6;T8=9^#r}giJpDJ3Gn2E zqncR=%9dFCA&R)BN1*#&4t>zUv~|bKX?lq7l1Q2N;Ub(sj>^MvuP&{jt@nVn6rwWzF=(|!`LK2c}hU( ziR`{u)$BZscZ)uOy9-=r>1ky!*d}5?Y;||c2ov4zU&QN0E3%WPf8r3rYn0HfXu%MB zndbc_k#^nyq-jx`0bzm)8Th38|%n|z`c2ch!{SxX9B)g_|k8K6c__paj%b3~?z!3rrk z;4@pd884*8|7&<(;pv=Uqm^~cTtl3)faF_&$@Ik!p2W0or>MdY%J{Y44oQ#?2_~h4 zM&6jRVIi6}G&V6juxs&Bp{b%G3`fSd(z{hl9Q3+7Uls|_*rckAe0DV!Q=I>g*ZQE< zU&rAOI{PT`_or57B==;Qcgw#@@qsEFt?7>h>9@9|3nR|Q;A)1+F_C8a^cMbAVwuY! z<0!3Do{p)i)vDa_KPtYC%hMzcteXsr24!YVSz%u38(*$$#$z4#+8Pbt0`o-RJP~Vd zJC=#ZY)Sag72Ez)xtnej#hYA@l$QwQoYc-Le#l$o*vXhc=t5~R&$#7!siQkzWSLFm zT5OQ_(~WTxoMm!$$cIoEZpJ|i>@^bk!jcOxF?yB+&h9TiHFRY{D+F#_z)ezS8nGhy zC11k7PpN)(?(#KI-LBlqIPY7!jW5A?YN;hSEjM`Hv)W!tD;W*f+*nJC3MzW*J(yRY zJFBb#(_n4iCZ6TCl>{FbYG?EN-zQT%$1W<^7!Muc_A8cw zcOW20HWZVJi)Nl76ILAyK@A?6PqyDbkabYd7Kykr_FdO(6#djeEAs@c&Os|sNjm|s zDB38C>Wzdqyy2<*4J>DaW263q8^C^1?+)K@Xsa48Zo$PO z>uv|nTp>Vz0Jl18DNkiIPnz{yIff&+ji(X5*!rvZXx`37R^P(8@e?93F}Z6t+5v}* zSmpV5m~$wxKQIZYK(UMAdi62UQ?BHxO7aVf%Nvbc4)lkgswq6kUq<^ zddcwff&zbdYL` zk!bYgb#lKC6(*~b#y5!>-re(zoM02|GG-1Nr~K&^kBh)d%iF@J3Y)0qhk~KzvR@<` zOU3l))lrj?E3j+Uf6X3$UgUZXmqJja1TS6S6nO!*q#&#MX_H|&Z==c)8<1G^3T|Q zbH+*&F`GS7p)eteq~#9Rw*tOE6)S$ITXpEYS4fCU^?RoBTbo`;*`q^4bw+>*I%k%& z)<&C*CcoLY)7Th^S)9mMXXHg#dqP9@SJ0DELq)jTEvU1Zb}}Sc*pXK zG}P&Pe({6;_w4A2Pju*5kDRP1&qh_K-HD~~N5BVfz62^BFT339pZO%iMsg@BHQU`g zV94Mt&kWuxzCmv9EZrmCTIXOSdX0fJ@fTW^)wOuje)dM_v2MmlM>zNL=3KdrF2?9| zk?f^%Ia&B5|!Z0KfAYN(y}ol>Ib za5l-bwf_3s=p)bD(14Fs_Y4Z17X?`Oc_UnjkJz;tEJx1$im&ksbHxaMqS$;u5Bb{o zx4;l>k)+Ugszlq+K((vtSDN%60uBb@tvp%{;P?13u~0#8tkGK>3RRhOm1S=b0aCVW zid%RYr6rra9q)k_YcpkWinl>iE>ZZ^C|P*dtxGW;FWpIqWcT7MR*>9zkR%BmzfdY} zTO>AX#zGjz%5Ppd&$6#R!4m>cUXEOg^6aQp8J)zJx)>w&*i@n429)-!gK!56E&+3zAD zOSjArih8qrli0b9RunNL)lrfN7qlsEZykw#2Pd=NHr=UQ-1WF&qDs~-|}n6S{`JADm)k(30SkY-W34AWPkw=;Q5AcRM=ssmS64KwBXlO zyNBbr`>jtHO`F}g7~&Ahcd6o+@}(#*?0lr+gtlTC(20)!W;oTk4r0o-1(=F5on>Q{ zoX3isjb`){FqMAd6H{?X;-b=sw7$pNH>{V;;=+XC5wzxUc_+=Z`>=owW!^gx^Vyv4LX5iiZqo;o_QIVd^x5e-=>Zd={dq%uF|M`k`OKWzRle-dJVG_ zw|hZxofyYyKRs}>0bKMVCcL$597C2{UTm5KLF>lMzFp5ud#4;o+s zYv|i1J%zoWcdYgkT(Wv9r4#H}?E;}qtIw3^0edsAQC0p-!(t*G1m0S{_1L-pVG ze`T{GkD73Mz|Qa1oT&H*veQc&vxb77UKR;$QR<-0CEK7&`&M4!8Y!n8dMU(VPg&lS zr3tq1*A!a3oH-bW^|JI4Uz!d?uJP0BB@_K_PkXRml{Q952+j6D2c@+H8Q&f-rv+6? z#Y_i64dk~c0tb%e;&&i5zVv4v30{wT`~$1^tApv>4xlO=aWI$Q^g{4>T&~>w+Q}C# zqcCL?gOJ$wDI#_sPK;g%hd?tVtQ=`Mx=6DcQj2P~=i-|kliDBl>ISu@Z833XK6+6i zZ9tofUl>VR5Be`j*I#Y~agN^0_tj>JHg-v!51@zrLjUl-Ok|uWR>TXKK`6pjoj)1I zU%~;l5sCvEm)iI_qVCsbON5?}={ZZk#Fae~TbNYft0#bzc6% zHe4yX=^T-Yf^>CF%gKA3Fh5h5emqQOkI9+ z&JU`!nL@*B3e_#eH2_hT!l`9>YRd|k(aIl}JHOw_SR0NGLi?ORCT#KZa33G16@KQA z0-xJcxQebXe91W;?-ej;pn67&0#>6S?jnHLnOAx>VR>%Ni1zc*@XI1eWf?ExLm4Z1 zoRI*x+HGzUJ2z^%8u*q6Arj1Ir;Px<6!DGwff)V<&`e7^JR$h%lYPr*#8SoCMXy>) z4Yl8Vsy6-`4LT@ALANhomo<5agvAr8otT+NRv{xE41kq(z;Amw9n$S+kVy zfIZyjv}4+y ztSugUG7GFS51y=??P`7zY)@-udv8|lEKt8r@`1QAMvh#(Rj&ODPtHWoHji^$7K&G_SFUMAU z$|L+37}(duwhb#A=3L`J`(t!s!VA3mH5M{_EzlEtaa= zDd5jsJvmM?!cgAHHtuwPiWJXm#`g6V5&n=YYqQ0aOJ}B1{G|kN7QRHU&Bu4zMEKXw ztXqz`oLEFgOP6!Wv?fdwS*n7hD@qyN!agn3!=zhQ3&~QHAcVbsi4pFE5jSm0>l92S z{+zySs0pbcBPF-ue1Sfbn*}A+?s}1o;7(e96FBJl64~1aEWw6yYWmG$@`TRs){JKt7(Q3GOd$7vnh2k`RbGMsMj7V&G{942tnL|4yx7 zTRSusaunohh2c`P!ellVD17)cnvh5zDi8>%#iru!##St{94!#fyrov2ky86p7~(Gv z+SZ$}Xm0o(cABch1Z7ina3dTuBOK`CMEt5KZ4K6*n_z#3eWrcdqwqb0=_9FNgp;cM zN73<7QJr3iwyZ8LpS!7t7m6tm*w!}KMT<&VZz#&3s z=zP>bwxsam{oI1$M4LYK=tt>KxHeVUnJE(fg!B|IR7Ap^aKfwF02>r-%Nxz1h_)nG z>%^&PVy+H1DKMjN=D)OaQ-0m^~v&{D{Ot#wjPVBx=T& z5UQMkcE~Kxx70*FbVc#=xfKQK_&nAJ)JLvGmgT3E=T0-}=f~H2%m>$|BBiAh z(}yK$j^PYsEjZ+(fHfI3F57##W@z(;Gd5m121UMz)fgX{ztT>lT8*(Be4vQ2Efz;4 zdFmiT9Y_VHRg|cp_fi=Nu`UXFmELzSI7)0sY?mqXu&1-{d|lD$^`Js}zN0uv@%8)5 z=lnO5WgV>KOT9)jd~-$&($o+FxhMNmqWWap(6rUQnVB3ic^=?pjv@ zq{zuixx{8Ckxe*fpQP9u))4*;(wt7+KP0oD*u^am_##q&Oo%BZu*L8 z@C?KBTVkPd$-U5g$m7+GjXJNgX254Wlbg<2T5=d!Z5miCi(btS`Dhu0Me(5`+__lt zieSaF^sU&l6RJ9fL5$2{+K;>ozw z*$*V;G*z|tY3vMIh!`5gb6R{^b=ZHt?rZs(a)$inCd>F6ws*8n2)BMZWf9)yUz`tW z9GJ$B7a1W)Cr5CL>(9L<`f`)>=ql&pm7K z*5$7YuV+h$4d)_z_Mf$Il(uyQ+F*@y25$a=-ymApR1SXKGo6V9RA>V5753#0J0@`T zq8MEXD>@o_JiVs_ql@&0gFX9ah6pS2hh*yRQ;h7F&sio5n5egZYz$ae?B!lBy)dYJ z7?`INlB3%Fj`N$|!1go)xiTinv)Mf6(Qa`d>62jp3CKco)&fPme^mUS>c={nu*KRL zgW-$HEV7|AJe<><2Kz~pL!F}$UV4n^zCB4uoRsVG>p7o#D^-M!RB3g`m3QjyjW~zYjog5gh=)q|DiK^dy zX4h;{zH7lSx9I~6tC~d*+R%dBuv5F+g1;i zHqCV_R8h^ni5x9+FnHmoXI3(__Shg{HtWuN#s0d?dq{#nQ1{%hBYs9rzL;O+PL1&Q zL~SHtX;~}xvcF4;^fh|nkFSZxJ7ns*lVAxdL~VwNhp(WMcBne3;HEsb1-CiF7HvUj zQQSQ1!Wf)&gQfQ+@v2q#sDl-`Kpn{$Uc7!vqMh?&dP2XBlj`wa-)Hh~b!K$mT-KW4N@+7bc?1+ye3yLRC8W_{&KkOcKAY$l7NoyEI;; z$p=LYd}Er-onGSWyj$zs8I2xR2q}<|-^O!k21RGQMe9aOQV@P(BgO_2rry1u-xEwm z&fwT%3FH_2D)ymyDZ}NLu=5DLyVVCwwcp#c_+{&L9pJAESSSl(68`IOMU{Kkfh#j3 zQ56daK_(OJu>usYvWxMKdW-mbR?Bw8LZwxj4I4ie3OV#=pt1-_46ZH38v4909|Y0` zVos1MGc#vpcQ;C(15bp>5A4Iwi<|W>C}NQEn>PxOOMG$ZdP3QcOGp=pacy}21@(a3 z+K{Y=6vh6|(%Y)ybqtnBHYA5Sz3FK@$9u*aAs&Kuu)1o7z#U{rr%Zu{hHHWlJVp}t z&f($*q^Fq#otEmGWJJKCz?~h2ZEy4a|vk!Br z!-fTSJL14bY_aZ2^)Ivns1-KTYC}Nn|NjQ{JLKrQ?i=a7LSao377tZ3Gp@+Rov!Oz zuUQtWn^E;C%Relu14H-qD#Uhl2VJf_l}C)Ybi+1qc(d>0#M%R7s4=M^WjM0VauZMV zbtn%~aZ}vN(IXTZuw#`=q^75I&*8I*biTm~#AC(+I!lkhh6ItO$Z=0#EV#OI{eZD0 z0)kPU94Q-{2$oS~3TL{zWoE{@oTkkcl-sqOhd;$gEIxlvj06nE&xX|>>li3Hw7L?5 zP4!3bIG_&E=nDH5jWrxSBJ5~j>ocpJmT-uH`O_MA8PQwMSwEVQxmD;(N@H?bHw)}Y z-*P5s9J+k|$z8oD`fbm8X7c9Qtz-#8VvK;irVU#to+VVF(l58y&brBRNY{(wUbK3l zNx@wWg2vXHzSQN(lSjE*gO%8a=~!>hg-H4K)Af_`jjmWeUp|-F zE0B7mt;@17vGZzs-V3kCFS?4+=YpfBm@HlmjBRwkPYuRp@wh2R+nGj`JSS_@8}JI& zA(vfqQG~);|GDV_9#wDgnC^!3b-`0x*LA${KKymf5yn7zxI^huHpSCjrtTAuAYB!A z+JeP|*Z$Pjz*f)8MOxQW=D)2~;claZ=B~EZj_1yZX)G_J2bWkciYIa=FEmXYxeZ6v zlQ^P{`CF6gv_CLfx<0M3gb?hAOMXf|gdlZW(@j`O^s7#CUTKH(!)^+?pIW{)&EogW zei^yMkg{pp+jBJ|(==`>meHasd?g=51yZx3`z+JetECef548zwgj$`c>%J_KmcXw@ zl0b$9EL1rTzXk`!l0P>OGcPOrKO^_wws!8=q2PzB&GGma)r^$c#Gs98d38^jG5Z9t z)dY4^8kiWKrke9tJ@5G-uFH5f%=B1E|6#2v)=zt4VQ~DF=>mAx?D$nZIYjeJnAHDNE|Mi9DXgjCSN?l z!Tcpuuj_Y_E}T`o*D#Od1ji0djcf8?Jm|OuWaJ!3KM%Q5Fn)jf6tF3deS$Vhk%-U( z%6X^#KI^!$J?4JX#~rXH6exhZD})iiIcyGfzLDwwB%AmBnEf(1nz}@5Fqsa4Q0#^L zX$8a(o+nCF$A9S|y+mDzcW~p=DWoMo;j);q{=;*1UB$0C?fq^4c)D?}PQEl7;qR8b zDPZpvQlhn*j7X_IK9o51K4xo8Cr_i@C&=GA8uCtE1&2@&XtrOLHy00KwW>U0o_lqO zq7=J{Ct%x?=>2U9X4q3($Q|C7wsm0A?diQ3|NPSf;9Vw?tx@4TjAZTVpW7#W4f#;5 zP03^cp&x(bDyNZaF8vnxgY7~pxBYpz7EO6?W3eo1HqZ%6` z5iMY+@w8g3<2k=+`&P9&e*lvx;$`~3qpnF!Co`_Zn~2jUB)j6wjrh}RQOyq??w`%) zY+*O#6(PJ(6eJhMr)GeJY2FLy4F#?EeqGO5K^e+-nWeHpos&kH!r9gSRRYXt?X~%< zOuleh+@ky$=IBBo9|431EIxY9^!_{+l?N;)2_J4SAfpZ7+Ss2(Y(~`O8?t<_Ig}YZUxF%l-GDYNM0;dZya;+zC@x`rnKhURFa;}WL?rBOK z!e_}k)Yudv;r~2nYIT}cVV@TH;cxbUg^kUCtz0<=dg{$v09G;J!9f>36K5aWB zU!;XTMLE6h^Vu^%(s$deo^zYNEJ(IE)iWqartYLAf4Omibm+mMRN}i^f_32o-t8Jm z{ml4lTmEwvy15Ldk_UA%_%%qNh1BVG&c$h|6I(BOr30TK$Gs!9c-$KHhdQ)#YPiUFQaO;SW&+Q7$aeJ6lV=vVOc~tzqcU#TAy*mN54l$Sd*vH24-OO!$_( z1nqFHiD+oU`B!2YXZ`eZ>pu_t#%RxaOUs7+5>so_e7@5`D{Byd6cW}gaa@>W2`sIQ z-{ud*&BN$3Z7RkB@UMp)gj#evOEPrbGiV-N70;JLhMvsN;9O-RxfBjztJ zK0I7dho~GDDCZG2?(@2<95RbSHOgCR^tOnvsqvNHW7Q%Xhjzz4 z$f&!HSB`4TnAP1oe?74@CEJ)j;xh&6f%@f<=fW;zRi`VmzBaM-N zN}1kcu0DyA)fty#?Q=+U30=8gHGf^ptFQTb$Xpm4OOp0mAF=UvCx~^P8IG?LNxdbp zFx}R@P|5P7;`oP(EP7PVlYk6^;?(}af~|{?r{WSwR}q-#EUpn#?6aY3a(zu^mrgZc zqJJaWaxhfGlZ>#0E2oMF-F@HDfm^!mX*<#9iWXnDL#tpetuk@eL_+p zl#XC~1)kpNnzG)CqPwD;xLogF65}Bpg7}jPiii$g%>uC)y)evWX~CvClVN)}p>&~8 z@x4;)duB^yfA@nX|HaC?{T8NbP6~a?=X_G}*m%im8=1hH?not$#BiSUIxD0+!cfAE z?BIt|>)-}#kTT(|wg|omw)V9^LfJ}yt#YmXu=Tn@M?|pg5{fNdPkjWqI-UBt>5n7b zG?B%jR&mvD_^SVcx$$!Z2l+5!^Rr{k!u>~njr?20E*eA7BGM~M&I`>o`I zL%tT;2V4>?a6JPZU(-__JU1*b()ti^TUZ&DDtxa(qI;k6)_s zJ+RNT(pWyV-)+fBPfyVqX*A8S9OU-SDpgc7>)of#hw@PgDq5=UVSViLhy~M)aPeA$ z&$n}#O+UZm+v#RMU7Ge_u8jsBm-SSFgWq{cOXIBtE02_spXmG7A&U=Qjpo-)dp_h< zi{nuyNxZLp?e^VSJ@3TFAbw}9*(c5uZ`$H`s#V@g7++J0CSAB6tpJ>q8lbJo=ir2$ z;(pob$a>=W4KEr0#_BPvG6ud)-f1zOAYTdjCAFCXfjg<}Zco^cd7arwmPHubY$f@Y z*igm6pI-*j7&Eao&-zw?L(|94;X3xM$3s%FC(I~7DtqI;m~?M$jqfI}u6=BGo_NP# z;JJ17Di7NCUL2h4;eQaF$Jtiaivd=b6g%#%ccMRX+xt?2rQdvHnTR7CIgO&BJkv zLAg0Fec6$Y>cp$Ox=fT2ySr<7Gp;-_7)xLAl=7P2!yW36=HDtR71&W$muI4A{m;8NjoR)*3B`G~g=vmGJ>TmfTEYmYeWNn>6 z==NcB`RaML`B^qu9sgWG9CsBj;=}_a!`iU#RdQTW8-wQ2F z2`&ow=YzvVbqw*93dmZE3*#O2vkq_oQDT)fjMe*?fkQC=`6yzMa$1G4yt9T|dcqy# zhLL=sdRY1~kR|wG?tjLr^=l?nqws^R11r35#U>oQ>Sv^EitGbEhxe78Tuv6wR!4%+ z1X#5kMLFMnr50ZbVtL)jYeLG^Lcl_hzxJN_C=C;*>`|J!Ym)5+kA}-JmkxU*oAEQ< z83`BpZXhO+JC@3H}H>g<0R9_A!#O<1;o>mwbROUiyUA&}&sUV7}a<`(Be$ z;NQs8t6x{{w^Y;VY|eFWr~j<4!>8&~`Ww}~uf`U|Gm*^G$+xV=3gdNbq{=kx6GA%A zX~m62WNmc|)aiyt6}J#9wor-EswQ#>dTAfTpXqG}Cw=oXL6CYJhUH!l+72KT=@A`G zgt;3JdDKRT_M~@oFIGD-l9Mxnnm@TJ!%mc4QBF$oy1}=~XhUa1*INL85|}-dxFB=M zBC)h+g-@ya?j&=KZY-a`j({b9T}kkItDbzQC3FCkiW^u9LeveYXwW74vk&L4CU~ z&F!%TV`k#s#LePqv3!x?$*lF-eV!`G%~c#J?A}t+qWZKLBH+l9VfOO=mmC| z(6cF<>fs=0HoeYfIAGh{PhxlNURaZKj=@Vn+4jQ?#`$$$0*3xZJS3v!gFULRyS-;k znW>y^1-B1$u!_ShFG6=SC|4|A^;MK0=RQi)%2ny7XkA0x%UCo#6#NXh!k|oKkjS4H zb}#iDKJE~a$6G3}tf%y{BxiPlk^l}_83VwW=0q=!iiaTf3G%8gA>;H9?#PPHfmHl^ z>B?jnNbz7r8DG>g`(E|^ZBT*)sA9iaTmKE!obpJNBU;&vmh&an+;+`8u+(vW*vL&{ zz^<+JDt4LG(NlAEGH|TM|urekIhH;BtMC( zvr2iQ)#|@es|})Y_&v znc+{=7M04J3oaxq zC=Zx7km&0?hhngsmY~X!2NUu{{nj~q-zzazKlTb1YXh6c^5?8HE&k%0Cs-7;@=WIr zkp#4pXE{nVzc^djcDo$3DBEB{Nm#ua;7y-m3P}ql=%6JFrhARR5l=OCg6B51CIMpw zvdpKiN58dN7$`89`MVK4e^S~#v)qI}82jQTu9d}IIyF~=(KMlMw}#;tpJ56ek=nBJ z(!J8Xl4@VpdIxptIWk;njGi3Jam}Ua6m7^4F;;*RrZ{N^T_l7FwmmthLND|_Tc1mD zWG{ZSeJCIu$Zy9>P!ryj8YMK`+s`D9v+Nfc&KRuasv0PfIw*ykSv_MR7`jqE2~ z>11JLSZMcStuPah^sZ(J|H$H~Rk2(Zz#QS-FFq?eOcSnW^vCg-2!w6TNm7PzHkj*^ z(l?3%R3rit{6~k0!CY+(t64Ojts0;4!WZke#Fc>q(NWJ1T8TH%^t{s{!G=^>9Bnaa zStMx{xt`PjA%NynHJdV3sLsN|pp4bvJBhxri?Dic+ZX$M(fOHR=*p`b0bpZH%c3EP z`CqxeTT^3qg#aqF8hlHmdqne%l@27Hn=j095Mr?qEB3B_eyym{ql6gRXfQNG!0dD* zVwzxkB@uh{v}llEQMs8@|HXMjEjXyib7%nDuLPU@PI`@fk?P1_hdsPJKte$NwcRp< zkBdQ&KKw^14OJIBW$vwZX#+x?%;xlXq^dgrfu8YhVb<9=oJ!EYy{W`zRl{GWL)oop z7 zGUnZ+tZ!;FZ_c<#l{?jOl&Fz7po)wY`Ts@@kFTdHOj%4S&ih0~V+wXleEt*hY|459 z%@u(JfGVi}L>!EjIBy?RJS4owa%%AqiW^!U5M8N1uy3Gf2gIGbI`@D4wl|g&^auox z_z3LPCVVlA=?YpyE0K~;Xxo6TPkYL+GQ)eCV?b$B*UBx%-U8ou3%H0Hql zZMtIc)_(I=Qcs9!=e%H)vCO}G=`iQXC&YBzsg@!_KKw7={Fm2SHIpd0AFoc2Xavy= zMrEsb;z8)A)D+$X}oj7K!N!&OB81&;p$ zB>sZZ&z3(vp0kAACOuJ?e>RU9t^Bru-YrozjQ!cfCyIgZ$FreMP)8){2V}LBxbOXY z6cmE)7RrVeeg83n@m>!Wz~TRwzl8rBlkWGQ2-rSfoz+DTd^^57ag^XMEcVS3TzlJo z%8>St_Afja4!Z0>O(=k1UHjR`$dE9 zLBt>U(b4>bWh3|pf#57ZFUFd>SmMt7<3092euB5c%Gub!`*xSiBd33MPs(o6M#uad z=k!0uDchs-$h%(;XFlrQ@(q!sP}|Rd=2p*tK~CyvTKS2Q_pjf3tQHw z1WVjk-)Q|L822yQa5?q${iNA%znIVUi)O!g@9K%3UH?fl*&n#qN)P{}*~wot^ZtqJ zgI|!qe&YJ*C)cvA)_x8O_4lHFYRgbf&YXm*-5y&!2SE>;ADrVE#MOV?eTtp^7ugTb z^poiJ9)jQaH6=rd^D6go_)kjygJQ#OtBXG=8Tf047ppUaTQfgFef|qnk$#){$EXlz z%qF!d5q+pEC^h~8@h>sv{6&VBKgj^Bm`xi+F~EyLiFn{_*vX zspW!yNyKrfp~F9aPkpwwq|kckC1e~jOk<)--3y+oeQ39X3X*Gwz|Ab+?*{*;ZB%p0 z+X3G&h||jZbnj^~w7f^=<^({Yp`3BmxI$MZs0qB+*Am5o1nSvgmSB%pe|k0MTM?)_ z1^rA8=pF!I*QQzLJOFz^zWrH+aMm*SF1XgGWxKOv9te6kvm+xYX5h&wZ+r|5)d_|& zUfI^M{yzO2g6*h{pFIV*EFV)Dr9N{DdfW54An6 z{6y{lvw<$AG_3uAsAmr%j(>ybV&aimLg`HLYp%r#hq#?ek2Q9VexLvq)PGh~%-f}!O=WRyJ4>1|u zlWDO7)9y-_`Rgd0JF{|M))S&^GZ8mcxDBd8!`)iMZKJi;C(@Dx zt0544**W(JBsnl9ApSM1*ZXmU>38IW^}h}Aibgjt<1q$$<=47u3(9!RaOaZ+e>H|? zaKOLDd2sH_c8yE$;ly@&BsUM1!P?bDoYgm zD~W!{k7u!EqerKTOy-%IX;-{*JV@fQOjUO;+rhbQ9Z8FtgrHST zgC=L}rMXeK`9#svIwu$8{@L!O94Qk$Ha-Y38%NAHQlQ~`Ft0=h6Vu2A z4gndkWC?>$$NE;Z2##ut9pfzBG%OCszi(l!dxPa+v(b_o_A#rH689Bul>4|(GNZb# zy4waDmH97SN4W(NETq*Y+q}QkEt^eh$GFeeleg887}={Pah*z#nxqSk(L3MK`OYK}yh-JAtSed!>NYfio?gM-j+n$0E_0g^YL z#@0m_uvilj&|f?iTN5Ey22bOo2MbAd^f00mL)6jm3CCuRa)FgN*B@6xHWw8xM;qnnGovbH#T zun63%Sh|k@i>w-^;?@w|<{;GTNT{Uzkw<&NBYF%{b}7z#5* z&fOSg&nUi)<~U#>?OQD8Dh^7;=|90^1Os;Q?tA*0y~_I{3>)rl&Xz-gS;7Y#h=?$* zLDgSnpJuh06zW$n^tLMY!hZSV zh??bdajC_vW6>4rpN{)48ru&lsG8i5asF1Yu!_+UoHz?90-u^FodfC$&6|ZI=6{Ul zT<*20F*g$hhyIrQfEB#{LOrevnRJ;`4Rm1vPxq1SGK9pxwo8ReSPAYo9u%^iT{}#= zRDOO&u2BKpMlkJQTT*}ZKFRO#qVp6-fvWP1XZmJKv$u0xf|Pk0y5r2HyeZf62Y33= z;V?7^=az}iCo4~Mta~L$_wc{kuwe8kdh7F;)*Mq%I{oN}X4bBhLNU<{+LJ#EFLxX~ z)+hn6pd?Q8&%&|l?Yk>B+@5m)XwKFq;0(yKFSyGgim1Z#a$u&#v2e z>_8{-sL=;=eY;U*!#d8VlRn7?bZuuKw)w&HAhmUWuq2(fuqRH5a4Rb+kcV?-6Z8p{ zI5pqfI4`_`+~9uDdageH4pMLduAZ^vUD^w0(uu!DA(O2&!ydGmj|Co~Qe+eRkqzwBcG1ccDFKvIN$q#@^cOrTsT z4VcrJ4p=XLWxRIWZ$Yjk!%fZSLz~!VQ^(br7vKmtY9~ZOe@PIPvk4BcHIR;YCwV+? zhQHOcNzb5ZVDs#DGo?^44msiBy)d0>n|zCRI7n65d4@dIwz~koqGmFFCl1?jAQgx! zI;E6g{qoGjl^KJ;VwSLt)4IYcX8mV^*pn`7JA56e3-eO}$jS1K;-50yX;hPqE=aYU zp|iN>!?P=t?Z}MU2&K)b!4Fxaeos-(&qZ;wV~w-DF%MU|Kl7DN#Q3|~2W(<{nU}r% zO1>p7PpWC)K2@}Wt>w~c33*ahorA=fFjIoF`53|@x5+M8Ni*uGX zm>3ya5Sju~5dK97xX;&Ggk6D6w1X5UXq-55*BAa763h1H(evmy@13w98_t_IF=dU0 zw6Qv%yP;8`9E$?)9fuIa!;Qh{NXDU$md`SI#W#qmb--2S`n1P@0at&T-RZvf@BONJ zaVuWm&>!>MT-n&HCPeM7(1_nGagS))uPU2za^X&R;Hg}QIR#+T8l9~_Th;Z&EQID zF^iF{Zi$b?25+5Wi14@hY4<*UT9W3p(#&{khQ1#<;uU#ol7}NK$SJ)|P%|8iCe8iu)Eo{+v)*zR@pK z(J~OXGLU5@DIW1DU7-)}iS+ALS}UyZyVQ&B%}3>M9U)pXGLsso z^D!L?(nhe2?h*A|jd0^!(=rccrE|{a!v(M{3Y~tK1VvVrT=wjkm*3{$t`)9?!Ttjg z1J!|GLyHN1p=9%Dmf%&yew*Ubjc4rf?Tww}(VYzDQMR{aOSB*TjGXis2AbM@>#kmm zEXvZdX=`W(zVY`znMEAVLEfb(F)DODpG5)*3__S_AU9Zy|EK&lSGgQuz_W>IP)!*& ztP>1Y;foCsUC(j~bT)rk0IPP|j}bLzXAlVt$x_v+GD?KE%3Elc`RE1vhz5=pR&qH$lEO-F#Vxcl>qKuJ+7Z3y*=uc3VHX~LrBwsI88)9ZnhLb& zIXZAQ7yW49x2TWMu%;X8XD7q)rf}s5g#RV=t7xC%(a&o+w>6|w+xgmK)ocsn3lGVZ z=aS7v;2SnEKTO$+*dvusSeX+jk1t{M#~KvyS#Fzj+0LE$$4b7$l!7`LDT+O9wQl)q z8Bl$_sen?no&>&r(?a0j96vis2H1W&0W0GcPfZm~x53omR3M!> ztUA+Ri<~T@rXhZJ#-!U}XNJ|cpWKPePgARpr@Q^pwlV2DUT5a}RnbvNlNz|;l#`7{ z9l5}oSivS&H`3d-se2}MaIpu~UQfzc8ZD<218UZd4EWI{!w#TvrVi6tm}9|j)jA9D z<}A+_bqb2u*9t7DjxcpT>dpdO@4cSNcWu`} zu;LBq&TjBs2ie0?w+gc?)HiKz)X#T4C}Ii6X(vqu|5gm81VDGHFEQAf0F4NI{ub8( zxz0*9ug<=-U2$NXeQ&`P+CU5>UM(0axvKmysB2sd^zh>0IS83-k3R<+CrGF*T_cvNh0nYuz zksAFWOsS6i2G2}qRdPU!1lC|*#&F8k-6haT20F`|I57Hty&xj+a;lR}8M`U!=s-Du z6`vbLZ!bWx7v7Q)_7$eK#l!=?2 zk10V)<;nO^?J_@kj*-w64G|bu~tE*Y|H})}Te#kY` zvg?|eVgzWhpcA}@E!!Zxv~ZoDVZ*{bZ$NmEHd!{;G>=sIBzkKR@eIp3fEnl%n;S*= z&|3fw*xbP2UswH-w#k#g_XE6*J{9HE39|Mk;6UJxRKAK13X2CJ;b%@{K{p^&UY(hj z-wY4h*Gvc>D``Ko;q$e~Rm#@Q&enm;;QN^n|IRPbi@^U0|Kl-T8gQII5zH~W49$o> zOGGy$82R@mO$~NkgFOfbH^y!@87-8FNcEk-GLtr6es~M3@$!(EWut+{{Cs&XA3bDw zYnmeidtojg3h}BhH-!9+p9$41gGJ01O=p7$FCzXER~&f7N1-A7|2(EB?5~O&ep(w( zhU_~Ai;4yJTWwhCbFv0;7o_V2?+@3DR)2#Y1%A=?N^MPxmr$%Lx_MkXM;MLU_*&-9 zO!ala5WBvvL1+(m^h7xy6qtdM3m8Mhbdm2PSk1nZgf%zU<>0oM6}mz5$YX`k;(;X{ z0|oS)^3UMP>VyF(ZsDWc5b;lVt-t@Efx;dQA|N*jO>5^qt*p44PSgxwS%-se%E-cANh|2x?|=>^!arPoyb9s_-FJi~O$75|#chPC z8m{!0iw{gL; z@R%ipyL!y8zw*(3i-=T=L=3LvzupJQGf%eJUKpT(UVKo{Ll}MhX)_}n0b(>$7aPvA zprcd@ZI@zKrNp-fZR&6CgN%3jo&#)-CUrp)gN{|a&g*h(-2Y<$^GDzif1xo%vUMxi z4p!Z7-h3vc`W>R6*k>>L+57PA{#n9R2@?AkC?+}@J9;&w(g5Ssz7tw8PQ;*OR7g{= z|9fC|B`5t-Ts4=VGZflgF@2?u!v5!ou9H!}=$Q1-jRKBw3&qCXv`0FR>f39{)zqflhDwG^6<+V#GhftH<>(&2jQ? zT)L53sPmuVyIgH0hRs1iwzS3lVpX%wpqcU~)8OzuR1e0Ttqy{N!`&GjKlETAFJ)ULQOZAi8(NOO>p41x)!9MrL)ElP(7~p~s$SxpmNR$ae>3kh1 z?I#7ycga{oZ`HEy1$I}M1Np+Qb=92nuXXyeI_uvMF@eHwuB=mUC~(*dCHHU@(-vDM zgGW4V8BT^$DwYqUm#UZjLTsrw44mH!2IEEan;T(CYxt4W6W@#j?2c)xgc>sP+@%9@R_~&6-O$1}=~J z*Ghfwucq9Qdw)i6!}BZi{<#=*>>t$V>bT-=Y;}r`HoreT;hKirC>{CopFFJxyQE7} zBhV{0T{2w?Quv-4EYvmn26T%J#As(A`Ju92h=ohdzP0yVc7-o+FUD_92c(^+hI|F@ z9=j-Ks56Yh_k0idEpig1ssNK_FIHVU0mh&&I!V9@^7Or2JQ`v>io*LUA{Kp3GQm|% z!2f<^H^h4(R@y^&#!}edmpzU_>P$MDH$L$atn&QV-9o>F%;kzgFrXl3v(CAiH~K&eXlWBWB{gzqO4;_vOdL%$d9D? z)xd4fllPwXq6&Yg>{;4kslnqO!qM)4O>*i!wtQq&`q0Hcy6pBQ7vG&6e#!Ap95mqc z7fQHoif^sc*`2QK=s~BG ze?jQu8Fpzss=#9B)b!8{5`MY-gJhj!U<+%SJNQ=yjQDV21}#(eW~qr9!zb^hQ*0=k zHOu$g#{&Ww7Fgq#!3-B9c6n4=w3|~!E&~d{<06sFfg0}$|UD& z#9~}mJhW9J_MUd~sTo1i=t#5zu$VE?uAZNOX8^OhW_w%erni~;wqxz(G=QG3`GH-Q zk%NT6zIqE{FdR1*dkf;R4=wqxZO_^~LG-Fkgh;g>7a7t5*{ZwC5OZ0-*=X@LP(!tD z<&<8qqI|XKx(Wf)N~unID}rxs8SJUkmdQ23uUX-h3}cU@^7 z-hH$$>GIgZAr(j#7t5gm?|7I!+M9LnwkPpp(Ma<0tO}%#OUAq3sCYcW4=Xc25Ei(eEy|3#`yYe^Zk>`n)F120Q8z&Q55N zE&>MadZ`Q$hn;_yq23V1d0e54jL?DIRX*}%caYXrljbTNKm=dINInoTec4NOa>2`sYT6<>~avw$UvB$Ml zK)segFGL+d`|3-c0D^m1<-vFDMxE0Bf#{uDX!PulQ~?g9FwhJ*sxa>icR^4_@ALQ# z?d1oUaw5dmSmzaiFiBk)h9SG-FYfm^54!{up<)hr=qwfc4J9dj)bdLzGk#ou*03L< zPPQ49$R&>kzVD85e0P+Mn=Ez|A6KB{#iFEjfz|Brn=LgTC`yEKS%iYZv$=q!I}*4{ zq>XDpydj2+2>CgI1PVG2unoBjAM+3~I~B(Q9gPv-ZZ1G@jEZP)_KTO-BnEvxfVBWt z6qMWnHqK<2ip`_+a?BSZMzew{x2#l=v5|h{@&3ZtLWm;-tFD~Y&9Ytl-erg_yx);R*>qMylYbIN{ zd~J{Rf9}q3F_%V1ybP#!-&%HkgIF11aNae4`9Lvmc6J$s2V`h{K~-Lw$7u66E3_f0 zVrMh2q^4x`Fg0AHbcoMC+}HI0JZZCgPaLZcG$_Y)JyNzx;pma~icrCr%zgUG^j zxxvpeEx@-{^UJ0Ob*(J^4Fv95{0qoj2&U5I0_|cvv?@avub1E=E^D*6i0#9=fg$%Y z<>kPjZu_|1| z41yMIcLymG_sm1|nTt`AOTI=$tEy_t3kW~D(t_ryZzELR%V9=$K?#?yp zBST-MW~m8r`6^QM0eRxXE%?HsPGD3O(Na-!{p>rE5hFUb;HW^Qgby(LA*ipW6%|EC zI7MQeH3JMbcUK9OZNjq9xIpV}6nTu{5i0Cp`jI=nA5F6qn)a5H&2tE=&)`D3-TFQB~~j zPCy^J<*_VHLs;2Wc7|uElR32j!FQjbl05VrHHR;Bi*JxnFMW-=q!dr{@HqQJtGvf5 z`x>*XaP()PZmL+<7SFBIWC#K@45dG-e8E0*G7)h;v?+$U`bS(h$ar+7TJ=355bYnf zLb{BnkGe^MOQlL`sOov|T1qkun1)x?SnVh^>5zJ$?_r|K|HvHBc^91-z?`NgAPVpHg)r}%nQfd9QS<;^r082iWzfNznP?$1%4oCbauF*xlMlS zI<5U0aKQHuryX!Kq8L0Yxo`^r??a`-0K-w9?Pa!|=N(i?$%iw5&jDiC5`BoRCD@b( zy@6O$mAH+c#ntUduVd9(;Fx(h5z_l|c!YN9+L}CoJ?c7GUR_ea$|;yHJ~zt(Zb_x) z4A#zH_c(CqXo%8Z&xD>n1hz{#WV_IZx zp8CxDg#z|VYk(8iq=6qqajv2Ds@?a_e*GK(>Bzq@93ZzLUJN z0f`ztdOAPg#fbpFH{fVizTZQ5M{<`yww^(18Lj{nu#;{G^!wGnZt1oiLeftS-0$LD zLvFxpWs?1T*=ZF`T!BG;sQh#N_4tQ>cn)(=4IMednK?Oz45wZ*7*#X!nB43lV#Izg z*pN)`u9?qj1d5qF76|$*=6sw)HRwE6Q=_hl296B-h2`V!b`?RhfTuiUuQi74=D3xd z!i@RjX5eYk*d6GlGc&#E!b=;Tst|7sXRJ-`*q`_8{>#W9OjH*JbbGDSyJtHyuTnHn zz@`|qwetqO-h1f12hQzGH7pE67cSDh5DS6RI%!aM`7=|?X~j(yY6F`UZ$T2J;Q|vCv?9jU)+6IT>2p> zlgj4JT(Pf<>&tj?Pp)AA8)F^$L<+!tAe zJ;%(=M}t~#N!LLR2K;`hqVvGuIoRY?Wwo>NcwT0wDWIyxCGg%N{f;)482nn_t#Z0R zgvot%wo^a6S3@jG?9d-BXiRF6pI9SYiU`+fRGo(F$T?KRpmjlL;<3)qS)jn759Xq< zkY%ZT)>_EU{K&`JJ8NShWiGFm&~7GKYlR^Q3ikzu?PZ*F*`(lige8yd%RMloc&X#< zukNe+40~42p!~ak?di>L>x3QW52NvX(({Io;^w{|>>3K|m2h4ED8dA?@V|YL9`15W zr~BA0J>k@ZVD09!PIqs-Z2#cHiqdiSwQpa_fU=6>V{;^(0M-(erwu;^b_2{YR~p?x zP4WPihe#P9=m>3plhG7%n_Ug-waR+2{_bE5?G%IbiUD)LN9jDe%=WS|DA2L9><*q> z&s3pVq2JlUi7>5}p*eIvxt?^_UHi%<1tu>UR|){xZ0DIooNjU`esXPj9h>{WWtcCP zcIq~`r3lL3r~B<#iOnJKA6am_Z*7k%E1Ev~9nS?$+7hmvc|IArmB^1$MpElY#;~}eAPjjwXdXA=t|qM zD?3)q?#mkPd25R@JBw95(o6AGh{5^b4+Lkz#$eUP;A$CZwE(v@9RSE%5rA6@z&-Fj zLx-B@O~+B9ahh&?A;*DS*3XV8VQ`!EIBow~q?0cqU&?n@txi_gMXx|=M4o-2S|q~f z!aeo662FCJZ?$?^y`b~Q4W7@=0wk5|7xO{xPWHkH6LY7*%~1&z;U-3)%HasdCd>cD z)^~L%9aih5wTE2luja|prT^g3P=+H5h_?h;-rfCPVjyw)Ncfpy7p7TLmP3MZzZR*`lt?y2p?3RQL&{@7-tC-?*?}{gK4^ zplbwc1ui&R9KzAhkw28pADWCeS+?`@D=J#68mRHB9BMuwPa3MYExnK+WF^RA=^EJL z*v3eiu=mZP3Z$V;Zj2ae(V_K71DtCBB{Nuis>D7Ln(pS)MB{%)U={+CeLtB}Lk6l- z6a&!RWOg5%-`7PiWcOcJskJ4$P9g50i1d_l$15uO!n3(wO^ax0?GcJ$PGr@K4M|on zqj2H|Wv1`Xym_*481m+(3cZb-qgiRnx-yePzhC?@+@S4LcAtJJ3N)7x4J+N|_L_?B z4MuzVfW;PmC$nPBd4`sTH&|VE@gc|VJZqGDUfYoE|EK6xdom&LdX`-K6GX!IFQ%o8 zkMV1cS?hxe*RqNT{e$#xLEjMbqm6bgi1Vn)!FO4yMeoAPjUoSdjLo24m~w{mskqt2IgkMJoBeAi3Q(PNZoBh(u?HrbH?5(Q#mJy&whKz&+!CO6 zYoFVV`TYof|1l-^56%yim|nnWBpU8E37qxzsQ(qZTYD4;w;T&4Xj?vm6rg zzl#%;>RB^fYoHwMZH-0uK^_9|uGJ)d;FPu~DR;{D^Z{UxW*T}PEKuCAs>ISAe-|!`=(!Ek!uuGG2BOik$Dle<0O3D4PM3~y9mf0 zs6lirokyhdiN}=E<9#lG9Zs|bfir`Nv_uKUA%a`-@q&5xiz0~CJ?Vq7XIiGa`J>%h+~<fOs^u`2Ut|=NQ zpU%W~8`H^aI=o?eA|mt~qk6$E&otoMHBy-=hG0yNb~6KyK7Lm;)q)U0s?0O)H)2k^ z)fh9Iic~0B=x^7u4EuwCy3d<1cPKCXsf=>_YR0_W-EP-Yr$`CUVN4r)IIWo)-FJQy zlGQ=^U}zI4<<)I{B`b%qn;gx!`XFn1bRbLPkhOzK!#|XBz5ACgW@>!adq6QkS)uN# z)n2pY9fSbMZ?jPB>%XGH+1qA80> z0Cyeuuy8I}5(hG8ldAiY4XY0Pt~*i!h~kb{xj$-w$ZCOg3A?_zjqe#UL7y&B+OjbT zZT)$g9Fnw_#nf@8amqCX<3F%9*J|byNX4wi226TL^8%>~xt582Aj&5G_y_bD8l!n4 zoc^(K%mOovl1iqsm^k-QvL6w6peBpc2SD^X0|<_d`E4Fuz-b$S6wwB`97+J5aiV#t zKoHYx?I&zkt3i2kGT}T108^XU{eqDm7fojrA>HO)t*8RhcJc=T*{~@qvvgTtv#=) z$R^gmkVko_j=SiYZTNg6Th-ESuvGD4b!iap2I z+J*i}$B|bp7DkxVA=2}c`-z*tCdgoOwj} zkk*sUaEb}WGqDHxo)W|a!p-#5pEF`?Rml^??H%sE9o^O_7v z*fDbHe9`9PE6d7Qemq1IM)9u2&~hM7mEpA)&DO18-n71MPoLj{kzeUV9@b%0=n4;6 z#aukTlHVj7*4M2C#1fBP{l1*-J1YUDjW>U#@?17XW1CJTVNx-BrLGr|UbXH@Dn(AE zlY&+;79Fj1+!G1dx}Nf?v9b?b_+n0s0d7us1q?yAv2ot*aSWHlOZXrxTWx|0e>X$; zf6G??5bCd{2J;huLd?`zs1)0wo2)ct%6m1E&N__2M}EnUu3V3SBbS8bKrXdwS-G+bNO*0bYgw#Xzq-hl-scc#oU;gRy-iN_+(lV%Aw- z$6XG_htdE}Wlj&M3wYf^-d!jE(xY}sMWRFiTpN0-a@QrsP?V?54y2phAH8+ePTQ zdc$P24X%*YMFnrYIyHqzcKd?Ay#lfvoOPd``BZ-E9Xl|1^S1a9+6MFodPa+De;`H@ zZn!aaR5hpv*LLPj%IYv*u%-=`S<>+hU&oU)#3L}4q*8M zsWj}AL6{qDBeH1!vC|vcP?X&a<_HPCfcTb{A6(;2%b=K2W;sjh?S?w&2?<$%02neL zpyA5ugbuZ*Ob{R5kFR8OnWU1~(*`7(hSEf|6}nf|tXCb=kOXDZGXpQHw4Gr;vp^j( zAm^z($#wpnK>eK>!pMC1*6gnAa1^UlcFB_46aQqy9m`t>LwBq@Ux4H?b;6+|a>&nM37`j63;ES4q_4(fAq(I09&mEuoLrBJ9Ao}) z0e^Us4PeHe2b?LmV2aE%z)eWT091o_4#TqBU5G|4gM{b0>l^4a=HIv}EYFwb7R#Rj za&oLot9-QhKbVP)*g&O*bXGEeVC3MI!4Z;Roy;V0Ye);qZU9nyr%Vj$aK8c&OlHD) zAUZ1B>}#IBlX3uo!aDJ7;L+h z{GlVXLnaU2YX6~`k5$J*hBd+KSQobkil)v(=pB4>m-r<}ynVwD{w@l?4Bo28wCZS# z9U6sBpL00Y678}kMzCBJqkL^2d=#E)#zPCz+LZG2gQ#x|FgZS7!U(-?_A)?UQlfXeL>*_hk0;i&=ZgM|LDS)=aCLe@uwu>w4y zwh-ByJbx4@^1v-g5gpmrj*i8*Cb~T}V=D@$8YJzW`J1;+M#fl-qxlhVRzy4X3FRFK z{HsaycPY*7p6C`&O`X5;j`i2IL&b<|`4UzrN5iVbYXAFoE`~Q-mAEyF9bANn($wvibrOMRJ*o*r56js*>CB;CWu%evyJ4dp4Ez1AF zte%Z=7>)e3igh=B=YhSUHfT?a1b@fAqJW>l`NF|l(jq!V{sviAm zt6FrPQc=KJ_CXEuhtZeWAv@zkuE1-rxy*M44~L^S6r1|LihUhh7%)5loi2(K+)7TN z`eHMRABU%gyttGv-SVQ=xCo)};P#lgVZXxl%)z?lFVcL6Khd2)Z(w#KJK+hs`tjm~ zNWDoDUqw`loSL>*qKwz>eDByI!9FF7^3@hx_;``iBInx{yu-9~?X|9@ zU9Gf1%i0&dxpEaJqDp#MlM`Y3ZbOZ%2pv_TM8odbxdg**pAvX0(7#`nPJHvM(g2r# zb*;793`R%6p-?uC6pa358Gv406C+LGvbkzqTqpjOOlA~EEt^^10}UV|5)%GZhnn2V zt^nQj$S$~BfzfRo(~s-JJ4@?wV?@*)=w1pZ|5shkngVVS0nhNL{`!6&JoH?egD&E5>TTGn0NI&<-CZe@10>8%gx8@?MyZs_^RYv_rl zA{T2W_Ym9?iXvg$l7vRUH_6Hf*>D#_V^sg#6RDqd?M?NlH1Qbm1n~&0IZa3N!0rP_ zyOMF>f_WiIa`=^aFdTAhRDX=5R z4|qVqeDel-q=OF%U}}-*S}{;=5!AtyKW78KITbcRhu#NA<)_XCBWtqm9$ zTXgf&+?Za87*9C2VbTr0&SF$8#7kE3D(4+)$%dl~E?ConKy9DC*8d`xUAxZl@)Pc+ zq>Zb+@yPe7196idZCB?p&l@LJZr5=>C2&5GGt1>Fj`rVf#jqNowa6B|tfEqVAp>%kd%sg@)$vsv?)e+d-yKrm)tl3PpVrg+_+y{&e=3+g6` zvq3h{-1Z313)Glirydbrz{514~&JTFyLFxvSv=aI=D-Ip}6NF9?mr;nyg5C#c*>)?9h8&Q91 zf_S1ISle0N8#D&vZ6Cv~cWWy-ML8Q9mpdBHr&^09Y!6-fL5QqW!v9f~N3t6`@f=&> zJ+HdS(XMP+O#)Oz-u^qaKF%MHg5g*b*@C59U4aAHZZxuW z;T)XyqB)evtWUn$T6z1rh)(&*lAfQL4t16kz`b>A!pPTP9yE0qx_W!H-Qc2oQN31I zMUX8OvmBJ2%geXt%_gWQ>JA>|3#dW?IXKoOn_-k)@*hj%99wOg^3r6q!ukbaJkUxf zy|bJYhL-hs(p+b<3j57)A18N9*o5>Zr;v1S22ds;ygG=cfJowowwoyG`ZU{xHL0l- zp`mJ`;|Z`f>0Ipu*}8)(8$__Ri0%r}a+&^WL+AjbmH>^5nOI7O&9UMGmcQVbH3@P1 zV(YhaQQ5(!DnYy(QQ%Zpc4wqkN+`1;x=OOvzI~uQhoA6Y7EId+ru(Y@1Wd(Ss=x)= zfpe z*hAree7oCg__0kkyC#O8FitF+?t7}Cv*M{Tp0H!IvJ2WPpuq)%b~s~Guplr~q(ht+ z9>?JQb3=Cp5cmW?w2-cYFJv{lmVx^tt^1PJeXR|$vKoF!`;k^8NrjG~D=c*ZI$Y64=&CZc>x>I+k4w@0sf72m zyZhtqNAqjehx;_X9YsvE=qlM&hgWs;V0Wn5)l^G+g+p&taoO7M? zDDPN6=T6MZE5QHTu5Pw59y|?Qq3DkO8ib8M*mZerv(3n7&ZD&>es0`H1iBTpFbk}E zH9n_NcTLsb0&IJ*ZRLLS#WizN?ZWv8D>$de+JN(jkg^$zYl%D749)xk8~^DrAw$ih zK)$EXMu1pGXK7JHa8z)Sw0hCH{19em!QT@c{ZUql(NST-oX9%7Wpy)_x3nN4_|-MM z>m8&*Y7TrpnV|Umhua@9${REN71*MQOCgfQbSz>0bg3nDt=hX_#wKN*2O0Snj7&E4 zH4TE|Oh$LJDgrxvt+QEYATGEW$=dEP9m6+`Gq=b2j}^0iu-2jM1oG?A6d1W(1qWy+ zrs)=kt`vEBB#BRiXyvneH8iv6B~YjMX5U%)Zgy&n4d)#}dpV>!*d*H;c#z^&_bbi& zVxCUp1p~s6O5M82Nf6{XXph=$DiUv;6u_07R#r5gQ5 zQzEYKz-0^O2*Eo8Y)A6YZM2Vi2_6SVqu^*B>uN0CSHYh0vo1zArCK&TAjUODa;pGW zcc9Jy%P`O}*!_sAB!N4~%Z1J7rUb7BX84<~Xha@;Uljd!dcak^KXoy#%xmzqhMp61 zG$zorsp^9aWnVN!o1q`~&EnV!z13s6jO^GsL)JhyXlNQZyG=a{GDvsyxm=XC$MUHD zxM6k>MW+xYlQ85db?Y0oS$Uhrg%c}bJOVVk9l1jfYd3-UgrYg>Zu~F|t z{JzG5j)6qrR2zC6gLtKnLA2#&P%8u(J6)=-6T zCH@z}l>YstMwJ?5`^Y-wP@Guv7g1+tsaRJDw@si6AoSyCWdq)RXa`yM=?1%Bb^NaD z-*uZhLPG)?-#!g^jnxhk+Y#KhqH{xqtR&2T#b^eI1^|es%}jLn)7&sK{co<9r)oaQ zokHGUw#EE;+8mSx(oTYz36t2aWxIEFXT4_lGz z^E6v%nB1mgsQ*i{CYzqP12^hbXOuxW9Mm<_CBM9U%QR^DPAvVb15bD5w$jw47%|iF z&Fw{TIjJT){io1fgEQJn#OqZyfQb<_*(j0BenPkB*K&Xg%3=dt0}4WwtIZZ&32sRN zEy}MfD29?a@4`rsbX@RP#~cwW$`^z>G0d9Q@yvkqq3 zGIL$lkfWtUra(9~83g6XZ5-@6$dlfqKRcjr9A52E z@pYHMvin5*QGG#WW9mhl*heIT7kt=LZwBj{c?}kFMXY^YEd4avVi!vXZmWJgK{*;J|=ptXB{v zGMa!)fBX*=+t5)kk>(s2QC$V3#yJtHN=|sNme;r$+&GLRIIg-RZsgTQEml&K6p+rNJE;azlihh6SvFKiP{ThhAlv;| z9_tTUQDglmHp_t6A=spTX>bQ;BKQo|wHuN%mqw2#PkavGiJLg9mHefE{E(`5bOpw& z>^8$Zb*eL=qC)oBoOWFMxKV$w&GXhcxkWA?Uh(-rr8u&J*A>05$bBWX(Fs3_91%)VJJqLw-N z6CPC1i*Vp91oM>tEvc&q_w+C5J!fCPL02`)qU&WfH`1%*g;z&c^Be3$5V%vhdu(`> zv=%p!72rRg<_0=%)VDksOcW&sb)*0yb<6TbxaATmdo)s>gu6Lo4hi{1{}KEZ@b@7` zA|g5k$m!^}n+w9Gf4CF?QdY)AxCR)zlE#1U%Mk2fV>G=d7$>8H6fm;!N+UegB>@P) zJSd!^|GD$+B2*K>j1ay06*gF&lO8rd1!g80$|2z!B>!&{b%W0Y7`sFU&NpqbT*6D% z&^m?5qC~x|BGeW>$!3*God=u4&5lNV#o2lHUL{pKfWTv zYqJ*I4_LB*mLh3XzVP7}8^Mbxovbw;_Bx>S6DsHWWQ41IP&|w5AT002I!sE<6>yik zwLo$uTlfQuW0e-L%q4NfyQ$~_u8a<64DLM2DK>#v;%Rh~g#0xax_8UIV&b3=Pk^l#wcGv#c>;PO`N$o>3Jz%1 zt~uZzutN$rctJ-F$b|BHdhZstIpF|eEx__=-j9!thB1R(>`d1_%gQ8#{C?W3Rj!2p zI1ghQg=I>(0&pi01H|{pMwl8Jp9aKjk5%jlm6K03zWI^Y73f3f(-N?}$v6ahYSIbR zN=&AM{Ch%6`!(4FWRqG-0dMlvS)k%Ise;ilR-+YLw<}w)ny1r;Zo8IMY^r|)(DSPx zQwSQ3XOh^N?Y5|zxr>OKrnG1qN7EVI1pC@rDMOj8_0l}8TCzi}2TDxM-+5;FemrDy zQV5v6dvnZ<1Un5-_((S9Fi_IJ`j4;Fu6;*1nbMOI96>uj8@t+js~&(*Z`2}yB+P`Qb>ngk2yDsTOJS>4*!yfw??%B)-juK>Aq=cDh5>3w1{1m3 zjp@+A_JUd z-YVr4Tn#LDaV;sESJ4b-uTpQ~7-QTu!I52Qb*DuD(;rGATh4JwAfOYS+bAm{JLzoD z3>B=6R0>XQ286wYlok@;*kSE)15v=O#z0x&P&+ipajZ{naM-;oF*&k>cQJJ$D*nxG zy9q~I>bMK|Fjty~%R^UM7SquVy-0Aj1H}@uGN5gmD@{HdytUb?V8d9A1XuV%iMFQm z+L!+;n3#44Vlm4~I1AOHR(Oow-h>2KSv~q&xWV9j*vx_o8nG3~PH*QDBrV1Tcl;q{ zTm))23P@B31q^&j59a1|kXu#2t!m!D{aSs~2@-Mo%;`vr9GgIhJ0l*7|$I`#WqcJ-{y&nHEP+|*3Gs(D(g-^gDFrTD1}!r z5hQqOJ2~*DdRKF_NW8R3v~oo3mKe?A*Y+|1c<4EFbaZIi-?i`PD1JS`XO=&_cS2!p zxofemYZ=!aHeW)$ZyStvTD$S!5BKl;E~*g+d{=R(otGaQ*vpxPrYDQFJ24J8=A6NM z^u7;v_pSD#?&-z3xRXj;6R!6@T=}aKwUDLp!{X8LQ!9u6K6gr-_`1 zf>c{ipKTv{7Va9sGYW=Oe%M-YD(T^_z4Z?7PK(=D-T$1#hD19Q-9Cc?-RvX})$OpV zCTRx@l@!^R+@ben3w|eV+w@rWg`ARE$Uo0zb^M-1ot%KwS3g{`zFKl;)VZ3?!0-3$ zYEtfgK~~R0aZKxD(6hf716)ycWUk!iojA?z=%vYN&pX zU;a7dHkDYziyFV3DDqxMM8z(UhZl139zE~Q%ouNXwnq966*vFp{tVxZdoH_ke|`JO zqQ#+-^oLu+!}VVsz8?5RXmgPp5pHZ0G;=&aKI3wkkG&fe8`Sn2^z^$U%G>1gMt)(F zgXgWAZ!bT#rEPgRe!c1MjkkY4ZF#&OX*d@v<-&@P+TlflTx8DVcO8gV$b_1GnXj&nktTM}py9V8v;CZy_ z?=(*b%=XsVR-;KfOhrm4a-mTVDbrobc`IS88nnZyM}DkkZDI^*HA&k+R34hQ7`DUM zy^9HYuj&*AXr1pO9|MYDv`BayQfS*m?$^gmI!GR`fLCO3EXQwiDF4$ZxH zc;lbyKDTVY{}qy{ziwf1=krJJfqZ$)Ymd{W2#mhp=-o-(;Z&)8$2j9Fw|;&E16rY@ z{CO*)WAkI?E6q#~Qc_HonwR1=nWYzS@_hb=(4_#OYri0C;#PDy=hTd%xO@r>6T+zJ zO3_MZFF)RQ=K1S1mqjl^to*alDMCW}!!69sno+In={o4+;LbOx??cuM9}j5P@5tfK z<>?qLrBv8g+vYcE4LE)qv<_r^T~|k$*LEF%3Y0lcqBBTH=jS9 zY;%E|P^S7s?xED?9jMD6&hN2^)aABow|6YM3={s9nbQ-4UPrg;tR2Ici0bX_3;sxo zyig;#%QEe)OVbW#&gqUjKE3aw(`qi^ldImyCq054XxG;eJHK++`Q-NP&M6Kp*potN zXiI+D59b1Bv!ERgVuP><)QX0C=bJw}>&he>c9U8AHd!Z@gjoDX!XB$411csq`sQG57g{glrzuL(q(d2_CiG#*S~8< z?^yR#CA#!ig!ixZ`H0#k3v8RxF={g(X+uo)Mq*c0&Gw2vi-NtEEiQ*Gn%Auv{xXV! z848mEB1aMqHf^qH#u*gOZMgv~SbZy(EV5L>Z#xVUGP=%bPM%|M}rvYJ8 z*{GWUds(^X$c@)Qzon1=lT_|E#(4~jmC2KiT$=o)p+X)WhCuzlLbi$ybfg<}Yj%e# zTO8eqpb~3noRczdA#sk+Hm$m%6uW3y-KuF@b*!z}xktP_AN=s56y*Q5*I$zRV~O$* zS9vD0EQy*-fB0MdcXaDQ215G5ONZerR>P%krFswaj^zrZd!snfk#fgE@7iJf_o09` z4>4kpzY$yA;{&ALMaJt79IYCjZ|ptN86F8M1v-^f>u3u zpXa#M{{Jd|@_jbq*8_N}Jr2sw|`5G6j?*UPh_rvr-Zz+OI+DnFoBKy>Ec{PY@RV{i!Gy8g zO5c(^@5#x~zxE zubd4!u^E$P`jEc%(y?Z@!717F!u5#niNZlC{U@;(U4xqH`wO#W-vSF)zZ+K#X?ZnK z8Xqn3@NoUt%kLj-J-c!v-*Ko^e6#Px)_lLQDtXFbU+c3@CK0JaM@xOv{+xwBf$cA8 zWQj88`Us0I&x2kGd;`aUANA_9?tOo5ba&?mYy5QSw#n_z>%^LnHTAp#?u+C1R$r7N zgfDV)xn{VC-MA~Uo3=Znm3JI_3qJ{sNvq&8ofg8Yae5cT28EL z4C4R3x(O;Jv={yRaq2_&z_(iq_FXZ2sIa5ny2eM0c(UUvv&?VoCY{oanD!8us!07)h!|dV-3BYZC3e9+JRFAF`0M1hC%2;Sh-O?#y_V-z&1$iW+noL(&Rl*I ztLw}4uXlJbM+;$gNH*pRP5ArsHH|Q#^Stn6&c^??TCCjuW$3b`DYadxb?0HfjMhX; z!*icYlT@8QwFYc=@Ps`PpBBI5meJaj8*u4E{Ems!rrXssz+&|Qw}RqDA*Z(<+b_~} z4T0IK`VD576z!>8*t+xDtjL7hT8?x$45#>H8_nOe@9n9`3VMIkW8byst8P++Vo~>azZvscB1StHz}HecF}Nkrk)JwrihNch6YN z$lVuy-y(86;>jtfu~{wW4L&`Zf zACV|cRFl!_DI53R6jJM;A?+C=lRFv}G%D+Gx+VVec#%fi=7C?W6a#`0sE^h72b8PR-&!+=N%0{@dB8E(<aVcA~V&miVo{9`;;zStd%c* z8k$f0qmff;@vtunw`H2VKM~Rc!74_NyoTj0vAdca~SY8G!z9z!NnPVFGhw?F+ z@*VQNbW0DK3!{@2reovJ4vr|O&eT1cSJm#TVA^`(^ax1V%$9}$SH zO8Y>?g`)iD(+$wa%${7sKwwu=mBzBP2J|d{T3%qju_|9wCYucO2 z{0a#Dv|amU|Aje;TXoj&`O1ywHC-xlb$5soS$sX!zr4Bld-ow}$8^Vo^v!Uw z4{ix{SB9ak#b?%PapN6dMGy+wi}Zr0>x#UKg|oqyF!W^Pq6zv;d{&jmmSY4%E2YVO zL0Zgl=h2`7Y3lX?hR z2er^H^^8>50J52p0?FFjRe}_El0J`-GF+PvcT1g`s&sOrE@{DKb&|Yy_fl4Vex=U_ zY;pGuOtjoid@y3u>Tss&Y|0zu{Xuc@yCkSG3+icK?!VAHv5(onVoy<6)&ztfmMh9g|3_G6@5YA zk#rD58oA>C9dE>OuyeBn*ggSKfuS{rjzCgZuREMe8Gh353b9`(eSw$tJ>t^oZyb^H zcsK5|l!4z^>Z*s>*%pPQA2m$t2T2KS!N-doGJZwfpr0e(xLSB=)Om-wxj4~1=Ty5F z-F2IWP|e+Qnhk$`KY7RXVfi7mOLs-qd9+mJ>9O5TYIjGzlS|%w-|rL+3!Dg~d)nYO z0?qtk^i>$5_oV1jr#k;3W>-;S(M=OC{JT#(1~Un3=ue=o!}s(ac4&B3FXo#095ja_ zMNG9mzxGAVgWLbYJbqC|<9B%QLd606^yd>D9lxX$&abp6fmrL{-(FJZS4966+=KOs zm%+g70oVE#+}af>b!5a}=T|I^z<#Uj9%oKHgU9r9jHavrHo3=iGs8G6R#MB*2h>&ZD{fmJQy59Fp_etmdE z;@pJj`IV13N|hT}IdkQci2JoNW8e2{?_O9wjFFx{>W$UvA<*TDFPN_Y|}_qsd(D6E=psp-LExL;6pH6P)dmvQDm9I8Ct`>$5nA5ll@zn+!( z?}&VUJ?{HE=!2#m*9t{n{qXGR9Soz@2ZY$t;1@08-ET@TwwrCbgzdqh_hmo`ws}1p z=NJFEFjU5r&DWRDzg+2S1ooEqiXGRoD^GQ-5s272?69f`rG{QDbPfeS?E&J(3s*;) zd_S`DN}dlb>pCzU0d|ke2`T7aVS4M3b5gwIhql7`U20By?T0mweA|lnYs=&EpBcB0 z?L5`1o^VQeFRY~BPfbhq<#RQ2r^zSAJ4TbO?j?NP&%UW2BR6zrwq!~|9Vz_?zZD@w zWC*vW2GQz`5R;Xe3?S-}I^bcO-LlbH7Z!&?Kga+>a`|Ad(pBC15x{W#yjt|?Z{J_0 zwyaFY;P!ufC11){0d7`^$i1@pT2w0l#}#t-}Dg zG(cI_E0cE1!i%thAYzVi`TZ2Qo=t+?xR!NZ;SsE_jW}^I0=^Y-aOr|LaqlkI$HDC> zT68ceDe}%j0rN%4n~rL&JmdM`rAdkp0=Km-fd81EEF(p`NX; zj}RjRPQE5q0ZatiAD{#eb8Lx~SF?zcdgWC8@MmA1YAfD-@5`-J5SG6`o3yXD$jz!d zX?kL`GUynJ?k;cW|MoKk3J+T}k~ZBCmD`(uJk(&AIT@8mD{V*2eoBEM{b}_K9N+DT z)1YX>kJ5*%nHm11R;8&7<_X!36Qk{wMCZn}bp;9>pQPhlqrrrl((k@~cKH46$IRi2 z1WRSE-a{{+zlVz2PcID@Srf|Q;;IJMO1zh0)h*F(?G4RegUPhymtHo9wwwmCj~M+3 zTN~u;A^7ksc-2wE4$}2He7DYM)ve$+6j6N*wxu+E0Ht5RV8=f#@l{WJ3nAx z+)`T}nytV2CtXc7)Mhc%pIvQ|m3QQwRMG)tfeCbnK~!C`hD`?jeJ1DbQKIJ_|CRK?n$U9GYOi(gS6iL#UBM@(*Jpby!syA_tsvXr zjbf8i$zc9z*73W)PA-pCQ=<16J$W%chC`L`M)3S}9Cx`oOD#OSZ2tYZ(9Y z&3|$ufOY1SD4?>m`$21}j*C7&&|P#UpfkrJs$=h7^tC*jY<`xOfWa$9fviL|vlGny#6~e@ChX4%HAXh6RW^e$%B>3*31qFf<}#wCHQYh0{ZaUU zHhCsjk*|B>^T|6WyBnh*9;8~#ox0rvN=AWAl`f9Mh3~e}kUB(28k1a;bWW@{;=U@T zB5QS0Y0k=cpTSOk4;B@Ca94Ix2dPu{-GM_ z@#~pqt+HE9#ZA?5iT|RJ7 z@nw>PEI=-hWm0mwLB1lJ^MmT>+cEs(hc2->1x4%`Y5C?=&8Oj#DK3xcS=)Y|ea>SD;q4@4RPfA(aVN0uScBP;YRUW);k098h0kPy^J^+iQ* zYHb%4P_ugaGvAZ_sScr}*p<3>*M-ZE2jJ+r`#!Q-H>cTq1_oUt z-9DUu)v+V{cO)<(^CuQHJ=X@N7Gvd;X%9ukQ*Dg%S&p^-iz~?u__%8AT_)Ea8=T#0 zz9J=mtjTKxlCn)g=Cx%{Ym(ZBQ08*6?}&a-Si=+XV_`g`1#5%YQt}?F`q78yw>2S1 zwKZO|J*ZpjesjGaE@n*Ek>qi{G9Is%(*x7$MKTAb6H|ip(%lAcQGHSGi506aDo3I3 zFDK*M#`))0h5?7+6M@+6?w$?9m|Be%x&PcPU2rmdOR*b4>YD7yTY<5heZAY&>#Yd^ zfVJ*k!*8kY|L!13%T%|P$$QQpzJEaR#69=kkJhQ$|F4-z^6zX!B-X2VIVe`r^H2Mg<)8grr=A@UUE zOXlwL=JUNk5#La^bvsrDez=gX-)XqXo)(TYlnJog$KdA_z#q*2qqjHFTSnXQV2Nk9 ze!Qh9X(xoBatZD(U?`?Bn$%ze0cOmJu+dbmbOMFu=)QUqx_SzpZ|<^ci6urLakA8? z4lbS8`d(ztyebj)UV83KRZ`e{HQK2(=tBQ@>*H|NLsgAe&?7g#ZAXQT|MP=gtr?f07=z1t~<5!272a?`>fbc0KwFN28smWpou-|T0cOQoL? z^!%Twxw*|gx`6*VEXO~JP|S6C%6Nk`91VD*)~Br|Ih(BIQafrI1Rg$3-!eT8IVdOP z@2*7i)7M83O&Pqkg@JUWI9ZIj+6l>uld5Hh=eJIaQO9kwyb5F-xPfVZBosL zvy<+I?kwJW_VGZ!-un*@VTtS0Vkq;I>s^q^ebrp;~P^P+Oi3Vo#lI;Y;X!{XSQE;cEU! zHT_b&i+phtFeRrI>%HO6(1x3yLPp3DBA-rvT0F=rOT6;a4iF`WR{oJgKl@8Y^X_5! zGm{|Fd+NAQd?2dS;mt+PGOJ| zp#8`h`wxFi30)b+@x~AnZ+(W|=Rq#WuCBW@553-vI{+@3n{`bkWPQ=gC&f4;a?cu_ zl8yCKK%nT;y80KUUOki`zXKTbj;Kz=!$?WRI|}xXL|yH~oe7bjN_u65na49;njc=r zxP&p+vFV7pP*j-KiO8^N$8?uS>vQ6p(qC+{h|(e!uf#=t|1r>*vE(bD_=U*v^#SJO4WKZu@2s9h}zc9R%N+gU)K#OPpH+ncg#;^(AkmY{NCzTt$7K zyLLX$9^p-%x{p=_0E$&wPeSy?UYA;QPV5d#1UUU zz9_i&tSr@qYjfz&1oXuR9#lQJ6W(5D#&(~re|!~z4nY|V2tOD@#HVc5fkXHHmiKc%8qf-y0_c!tQ@XSKhAptVCYr$GWC4f z`GPd{hN7B=PT`$9tH*MEfsZ;iebU_PxKS>SGMKd+*B#MwWv8>&&C;W%wKh+t@Hu-+ z{lgqieZvX`AnSTDNcobB<=qEM!+(DB4bhuRg3~0^h7J*_eXY?RI$V3_4qeLVa@`Jh*PSuo@TW?5;eL~2_ic;4tmRly_|K=y<^s+DUZHqP zgUqDN(U^p!Cxh^-I?=g}iL`-r2o>c~U!JMqkIco?+*_35 zb~sQa)sYwwq4$UlTLF+}AaamJJZh3<&v+EJ+4m!{CN!Pgopz}0>~+iOn@69$go)(Z z$tRju#~?f3oX}HqtV!Bg-#$wRw5OCsy}z|?{!V z-p@II+~Y0C%*EdO+Iy|v`u^6Xyf5HqzCB)Qq4E85ANp*%56y`_P|AB-9w?CL^Wm%_ z3&DLd6oZr0-1=2dtl%vj??5UiM}iKLErzhPMzt?bD0y>|)nU4iG<_Qi=Ip1l{F!7y z)=4}`_?G9-Lv^RhTD#BDYHZzkjz%gvkFS9WC!0Bz=&V`LilLK?uag1Uk#raUKlBRx z5ILQs57S9qUkqtDd+p?V5j1D_Kchn{lZC_8nD5%73<4535EChQNgJ4USbS=<*eGzv zMG?Qhzh4C&F(|@lIz-zF5*yr)no6drU{oOerT`^t8w$%fY2RbVje6!{y@zLg_;;L* zRjd5Z3}#Bxf4m5wpspm8v#{mw^EqB0uyDaf$kKdu@l`PUu_%w_yWfg!@q<~=jr$SV zi~Y-B8QfW0YKI>Bxhwi|dR~>JrubdUpkwnOYC-O}d2bSYjjnG`t*?7oExwl@V+2x& zsL!N|I$8*RrLe=#<^80KND<9(KUzH~r$P;M9TaPYOEolKY`yGNokiq3cFhUgFd+~$ z`Va}$EYb!k#z`EbZK_3V4T=bm#SV9ZTDUO0na>SXlcJ=Vr`(gOOXG)X62+z?j?p4J z1xTXlDSQ<6Egs)^3Yt=g&QLVRGh)Zht33jv>YXTJ|D6$w>iS|e8Tv4Zs={w2-UmP^ ziBF5+m%%!Wy$2&0*=-M-zc7z=QQfC(b z-2x?go$f6D-=-Dao6D0vC{%{n(Y7~{KaIIN7)Z0v@xk!={rxrff@APT#j3F)vO@PY z%XVhpp%J6z|x9r%~1nIGChHJw$Jttzui@^?OqRTyb1Nfixip3>EB56CPC z$IcVni<|0=Ka}rLD44ZvEQri5p4A_OYg)_>HxC5^`BHntWk?R)6BQO1)SW$Xwk z9{SaRcZ}bj@y|i$Rib2GC><`6jXSne< zYV=5wySJ8JI@ym{#Bk!shWp571-AIUfur-r|;E z@UBqvIa2(N-=plu#2;lQ+z~R?Ws2n%W(w6HZOBF~x6`xgKqE~#^8jhPT6$QV#D|!} zowXX;5csX!we85SxQiWv(~ZkkBA>|R+m2hAf0;SBD47r9?pB zT8%}c{n%5GW6ufWAO(8@YUb;q?m-V_lU!Ovyu$7&MUTvswEm2C*`AVF^_UVF*xVky zhDz%^?0ZaC>fzbtpAucJY&uc06b1yA@5t}U?EAd3{OL54XpFTRuIfGOIz|y)XyIDx z_S|xxEp(Z*K?D!fZZc%<@lj)0;=>C{hZ&RK3zp*rP-yxM%!?2);EsDGB{e}bAPFBV zUSFIaEQJ?@NCAOtweXChdu*P-s*x}MdLuBMDxiAjJLIsw+K2Sz{wUo|XhYnIx%{V^ zna-dO%x+1IEe<{?0)8vox<-+DM3BhvCH(hz4DzRwtSNdPkx5&T@BOwIh7(2elxV;n zVP4;08gIH=#gluzFq8fwp;9#bdUMW~iB_kBq~WA9H=&w#N-N;Zy~K<86NR9Y{ZHT1 z2}_G^g_Id$BjN^{@-^w(bJHbCOEzZp_ZGaGblYDDxe7Px!)!kK0o^VMv8U9b*_==W z^S-&{l@`K)2Ct;Utv7BJ#R_4}Zq#Z$48Hx>Dy(je$dhth0N#K$$-&j^alHOYUf>}4 z&P7?5v)rk?u$<9#CoLtVeEGG*JVoo3nv|)hO$yYyO_?gk>+CE{k_lv_RgQ&2U9&O9 zzlbiBK6;;&Mj7s{+y^(lF_;{nuO>i4c)9qu2quk?jRMRpZqa(^mif$zQmyW-YuUXF znbXQuPcn_4H{bFDQdJL99THrRbxFir=C$8ukb?e&Ezx+#D=}$eKr26vNGLAK7bxS(t?ZEd#4m!*AY(FY8(Rrp@J;Xv%3LX@d`7%rf5L<qhE7#9GhTk!ky*G1{BZVcMg-B+2gE`bhdn$hOvmr#>B7@JezLrttifSJ7E1@{qCE zD&wVT8Hi{U+(OS~45ZXu?B9h3HGovPydu@`F>xnjCu$(A-RrjjvQd2Col99yB@2a? z zNGd1;(k~`VXo_r5^|skt$bxlh|9xsAA`e#?1W(3lG~)SU>++_?&);4fUhZE1=IX=m z?qGPZ9cI1(ckxyP+YYe}2J{J|a=;iFr9{g-IRsy>dJa)j*Y81+?T%8#T0nq?w*yFd=kJDA2Vv05eFndMXA3 zS?T*iDT9GygmuHW(LwgtuUl$(egG_J(p__O)nT&NsaqA>EQ!;|BRk&P#ba zamDq$I;;)N8F&wwQscZ7mZ%)V=^&VCb7(zI7XMhhSzKZ6O*3Ad=o(h zAq`D8__$I`pr-^l^9-Sst3307e?qNKcU591{~kX7CL15j5$-{DaHVj_(}*wvQf!BI zr zzDi%p=u+hvHVf8RWJfDc*!<^XZli{CJPc^z+e-Xa_&hTZ#FSJ{56wG%8A^5q=+)f; zo8|6{*Jaqe<-tjX50#&NnK!KX5VfX6!_^8Gl0Yd-b(1nW2Aj}anbL~gG>`)s(9KG7sw7KZn=%DZeJg8 zZwj}FE+NbFeF4<$zTX9Gd>Y^WT3geRVeNKXYWMyU z+eZhMOxNhF&2+Oaf6ExfX*3&qR=1c?fM!4(Pc}z>*ji9$T^y~!0cN^1brOrIE!jT0 zBg6^8EAzSW7+N)RrnETHgqC~s0-;_tWf;U>Sv0bw4dYKvo^m=4TE2htXw$KZ5wllp zO#=OaE^Fvzx{q}{mtOA+rmNE(!79X;jB$}b>hVj?N)U5CLPnpyVB;-HPE3lQ@+$dF zYu6LMZK#JkO9}1?Pv{*JjM%O;P6HcK{9z4Cb0`0!efC!sFo|L#J30?u2MA-LwoXBO zNT|xB6Rhbzi{ZPZ#T2SjTQ$~v0`2ud#ZEL*WmEm8Idm(ZGrbiX-V`C^Mk=SYxa@O_?RD#Uu)Bdoab-8IOTEpB8Ch%)wsFUn2x7c z5spU{9~j*A$G+-5JhFY-kr284CK%`f0bnW7$k15gQ9${&;+G7tqviL@@C9kDQY7?iN=5U-K^@JPLN{zsb7IL=Bw8%nx ze8P7MVpLoCc>eW1&&u!$rTwzQHyen(M7;)^1hbgx@+Pj&-)_~Uxb~^@__vvJ@s|@a zgYjN$sNY4>jDU~f1QmX(m%@r0gGPP7BOAt{psItv zkt}GbZnE+j0&wWDxpDXIXjYJ6F@X#V1>9oi=s2?lao8#=uX1u`E21@BU*wep2MMYu z#Rok#?=s63qT(mc*h&xH11TQ0fER=Z#Tl>qjU~U2*i+;yL?reY4HCy}NBp+Ws z=Ae(?VOry9tZ7y+ivmmqlqfZ!tnWup0=MO}C$Awp*U*#21G}Ivh@;1qX}cagDgJAu z(}>TUvDQ+C-%j;dBGN(r@fED16q(-aKcemHtCF6llw}F9iYd}Qf8vJG(*8=ouRj$G zm@u71OLF5{>MlSRn-kGtbW3_eX7!A$6)xa@w|93Vy=2B_SYtkg@VCy0$VW(;B1T< z+y+EW><`f=g^BCAD#OABP0wOpO7X8H4(OJ=la;@8AVAMM$MR(6CT)N?#}>CzYu0QS z=d4Wk9@hI_S07)BMGRA43Uz^Uwo=<-DDS_o#_vEK?yp!Xb%dnI zifjz!3b#}4xMFE&NY$B-L0!mB_5}-iRW0Oo=`Pcgk@<++7!WG=JK36v7 za^&R>9O&;lVYC+eXx{xurR|oxlS{V|EUavxc%UMa6KS{W^UwSrksT!7|h6fgHD zCCxzT#KtEO#4+&ty(a5jQHx$m$1s>!I|`iSZrf5i2I&wx%s0Myoc0>_1c(69Sinsm z(HC|PPo?Z?6&3as*!E_&JuPG(5>fKCEaPsV;SwE4zHbkq7?iI4j~WSBPr1pr4*6ae zhl6G2D&;H^bC^o}Af!S=#ad$!2KjS}B^&P?Iba?BO@to`t`Sz4NqJ-gz4vjHUY#Ql zS2{Wv>lqpo`~ChM^FXp@_Qf22>y`Fsf_cks!yrU~ z%^jr4FPc-;L3+&`2qt&ayvo3YlU&hsX4eO2CZI9IY&&^^+wc#s14+ zf0)!(SUnzl z9EBLgzR1l6iH`n2zC8Hf7wyOnkhi~(azD&Y@?XJ=XEp2KfIltYWDa_ScH7iFf-{v!ds+$BRc($VIQ1JB4OR-p+R=Bw!tOl}-L{21NdjPxUfoN-)eI zQpGK45KZ%Rz2mGm{t(HTV4`8gg;5J5dgpW0Egulf;kBspfh$2Z z-*{=ER5pm>Ea_+V4&~KHBrAkuW#i2){-W-Xojx#s=^4LQpkH0Y*s{INgJXn)9>NFX z$AjG6-hkU+N_CvdG}2_-^xHRpb83k;bTkH@108(!mRd37u!>Z$!Tj}$8bS*XpH>#GF3>7859t;e< z7&f)VsVku8MjkA6Qj7jO%M}4i=j;S%54E{iACjF|kllUrNHzbKR}Sva@*~N2l^j*q zCV#M02+Lyyz4dlm-%^pPqXMi0LRwzrhKC&k=E*y z)um1ERC+mn%m^C^wywoKTTGk1+*i#fs>>h~==>WelFd4{&(lD**C9?CVY$ECmHl}n z_=%hmDD3?QOV2I@e_xwjMs@d>S|&L78!RDj@r3)5&v8hN9^&UVjzNgC3?jiq?`z8E zh+f4MNMuBcJiWpe)6$2aXY%QQz({lp8~!8eFXe<1su&!pPA70V-Q+#B-UcD8-y3#R zG6ieb3cwR;1pj;g4oa0=eLwQH*bg^kOM?oR|7O5PnX@%Z$@i*9oip*2v_YXM&jYB& zznIdh?R|NCM$Szzw_}(K^O_Z>e8_;d*-OR6_j4&(hP6%`n#|}}0&Ya(F9?(zh-Mov za=iM_vNhAsa%Ki0hwgFGKo>a)D2P6ROFs-q@b<_jywrH%;x^S~dsYf=Fa}^#`Z}X& zjJ|0MNo^u@esUsI1>+u?YaONf@Zl;UCD+%gI2YQL(Kd|n-`H_@V7z>%cxbicMD+AU zdYS-A!5k(%niPv45lT81SW{9vG~yYIPm%GWA~3TMd}WjmqO^P%9tlp2WP%|pgQ0T^ z(c&z{h=-jI1qcQ1VW>YzF*9iU+3Syv-7}J}6MjGzp@2syXh!4Wg<=N_r<0uYLU}}- zl2w*V%s54adpQp|9>LCic<>iHm$_^NTQOzC^7i^dfnH(!)p$tfEUH&Z5Su}GVXa3dqPLzCiVlqtnIk^$r?xO=i9Hz>>Od|T{ii4~z32!7?)p0y;8jgutV37~M@+MuYb zb@|zHABltU*VCJ()afD)g;H=D<|Ud-4OAplx<49hxzCl>J9aOxlnP5EX02J=PLn|-xyhV;Uox?`X{4N_EM%(UE^X0&>ZID0*6m|se4}@ird)tG_`{8+1l)F%k-E9hP%O=>um>g{yq!5y_Wykije>U5|*5tZ66ICb$&?e`xHQ)zH zlk$e_I-k6l4r}{L(cih{xYY3!d_)_?iKyr4@zG_(f3KGCLkOh>R)kd(uq{txsVWbQZz@|SCK}XiJG-l)Y&In4i^Y$*+z4q|$jrKR< zWGMdwYtI{1L;r;T{k`!qweDZ zC{mb<|Dq;IED4K!&v!Mlm$Xuq*j1TiK1yqH|KmG%#sc*|>d^tdfYDB4{ne(>30Sq1 z@&s&zz1IAz_40-(n8(c0Q0%iM$l9c<&hluV$W*N}*;J@?%UrYX7}BkfMN73NUgNX# z%hb}I*@*GJ2!$p5Ec|+%v1+>^otc`c4Hy>2Qnn(VO)x_LMB#7mpc2sfw63*gSa+hX;FaHgEFoZb78*6!jbeL{+wWa=kk-~Q_7mhv`< z_hLMSTAGw_uVjm$H{C;Vg(lCnD$#@rsV5%51QBW6e{A1l-5_Oeb*Y;EX-hEKc$vUm z)hNpHpUo=#frQubGgx{fTW9}FEE$Du_dxmB%J>AZ8o2BRU4(mGU#p3FtkzxlE_xX> zEFs*<`a_|`3yHNIp-0nCPiNDt)l=6V_BWYXhoeYkja=fz9DgUG(8 z3EoBaixbv_S4K}Nmi+XBCuCBUgj=$P7W9(03B6ldMkE&=R8YRnB`kxA=HFki1 zwzf-a>3J1#B)#CtZUOHe?ZoZucYFFH=BpWLLn&;CqNsE1xe)Kt(WAc&PY!4%Iy`s3 z)i}0G+od?pj$!D&^Z>S!bZ>;<8Li{k+C-y#KG6Y3X9OL)%qSbS)u(yG`kR_;&SCj4 zZ+2OqKYq^RrBM;Fd{tIW9{$(Sy?NW*13vZnQmOUJF33{db}e%}R4mw^Jr4gW#t6Nj z1HkalzvB(04g5f{yMOQbH)OVi8c=;XJ}3ZGGx??tnL8ayuKk4 zUosIs0u=Qi+Fp){%_<xWmX5Vg zJF;&@2zV(>KRjgwllC7gK$tm}@&IqWavsj4QP83!t7jhFKa^djwnwMxD|vz7=+UYy z2p0ai0ix@j5<*7oCD;uY(lj%?B0@&XLDKB5hVbxR+SLUR5212rh(i~DBL|<4p0&bbl14w< zwAN}R-?~Qm?9@?CaI61p^In^Qz5Kw5`4MI}`4WWuXh@X{*k3?!gecS66@m2yBNGhsBLm+CGb@eGUSiksMvY)JgyXGtcIvkH?~kv) z_#x{TKZiYjnEf;qet+WPPxy4D__=;}5N)o+!$+i_L5USLK-lq5Yjl?0PRPibaYYg9 zUfenlMR7-=*iR)4a5>klH^C7(@4}0hrqW{uQ#69v@h0J|r90UlZtVX@E?Lo6Pbdzq zj>*NFIeIj_kQcL-^XoK{HcN~OIVyo7dqTNTODm1$`clVgkxW9OR6AF7agNuu#VNJf z7{$DcIJ1aDH(Bvo$z1PtxrHy1=5sYLhq-S0f4c76r(MHkdQ4AvZ659NuECjGCgIDJ zwbKtXZ!RmhKAIul@GQkuTqrY@ya03rU~)&s#v0k8;E*A1l5{e%;F;kqkEe~DAi)8D zIQ*6pb>z4GO7WZtEL{f3xc%?a6?Bq{l|TX45uaLn{V8%(kYZ-+pH$~E8$tVxN?#Tk zXdS|lOh0*=g!f}kA^K2=chhkO*IQSOU1_)F0l6rKe@c|fep<>nLWF0A=36GA$a~L9 z@ByQ!t@;@~47E=R4WHrqkE+#ouk#o#AE2FqtB&OIROD<;j`o|M5vfR(NZdUU^c!Ojm{pb++Je)J z?SZsXnj-BxyMf2d5r@_M20Q~%07(dw%SaHpSx6Y9&mQpY*Hi#I=DhxOZr%Oj9d;<|?cXFrhK#^iGW zoLTU}@EZH2WuSpr$j}~YuStOrF%FnE;NPErT?Us_=63M@e~DFhApQP`KvKARpko77 ziLmD>BuUiq#|9ig+N14RcV)hJKFsu>IR=kmo!N3H~DuL_S&V0$r0dQ9y1cNqN^DkU)?)Y`w$E7aB3A zuCD7fVSzQ!&%J2LW4G<5hWCt}4tV6d*8Qc{Kl`kQHm!%>Hemn%2zWQP|KV9ldx#GG zEu(pFS35yozjTD8O#4?lL1u3TNO}cA7U?SJob4hjyY#*qorK%puGx!HP`UVjN!K~o zI=6%GQAi14)~Y9#qs(qW?nqrFp38k973ekW>@U-vlud~b&?oVm2D!64v8C;a8#Z}O zbWsc2dbM`M^BUXk218xOiVWFiK?1tcn* zw%6M&1RTgS?9J4Vou@_3Kp;S#T}^QXoGSQlPv$W2vS$^0Z~3K9E++7zSAbGv6chsTPHhwRrmR_&y>Cf?Jx9M z`(dDEbO(3~&&4Qu4CG%u?O!gsuVMEqV4FEH(TQEg6V3kdA#+!py+098G#Kz?At^hG zn?9TN!0RZmLP4Div~{n(&gh-EE}1h)Q=?Mjl7=+yCDX?$C({q#x4rP^HXTx4!8UJO z#XcEv1hnHCTT`z$v!$F9S~i(mH%O6kuYra)LF}F@Yz|5xOB;i z#%l1?8AIOnyyraB=lsgs2d3k2$nYM67gwj^!z(RaZlw}X*btoyE6xT&W~eWOoC6E& zSegv#G=SQ)eBM|s;2iBlU0rXLr?YW-Tf z)xa9@A=@y_#uDgeAZZ|JT?{o`%kePS{+Y*Ym!AXxiOBGl*hA5IaZc? zs#9umHs>vU^f}1HcYwf-Y|oGd4VC}~w~dr>EXs^l<+aZp4>L@jU$qM%WC40^{x?Hb z;YX`tV0Z^d#?Ij%3gdQTdYtRj*802rO9%TGDzy@oaVZbyUb_$!F<%9Z37hc>dQ!PD zz&%CgM&&fyN~O0)0N@KgqzS2g>KfGB>#P>f^5TurhJ=@#`f(ZgYS2j%%DTULL_QE& z=ug&A(P`&3u=+bQSd|)f`T%B3(kE7- zX>RRnVnu6G(-Bvb6$|bX6jYLs$qf3j3|nX^MilzIv#{Nf()N|uGUcn*)L)_Ux!D$G_41>6V{(5YEvZYiYVPO zRZMMz8Tzi3Ze4+f7&9Ob+m#SZxiHxg1CEj#9UT#S5LxdCdT&w9iBVFQn9s%&Ewa3p z>e`ZlW1ry(NxNCR2XyJ;7ux&wGNkT+(>ytJC8m@)J@WZ%1^R<`$HegDX*Cl1P}{t9W{ozS{LU z;hSV;(m4@pPa*HZ9s9ZVChYaN#756C%-Pf3Q%Q!#g2t)6Ry-TR?VEE!q-Dn7q(Zc0 z#L|W^qS14b#!m>gcQ${p6s%6?v3+yO&2IBMogN4_%@_WCf0%1Acwg+xh5ep4=d(}< z;#=%VV-Iv^$P(7TX9fGD4UB}oljBa55KA1BpiGCv8+nS)i-`RwrEx?nA18!;V5r9$ zi?5W-?+Q<{SsC4S=@8{ssq30uIAL|I8|y1zE^@3l1{joA_D?s;P}E1g;}V#78l_tE zl>Bm;*kKBf4eKhR^6(#J&tmccJI;@i$D~2Vf^M|bpo>Bh&BF-|Tf|3pY?`Snmk)+| zQ(0e<>_fx(%dfi^c`R^;3ihrC#+k0()XngY0@Dlt-W9Y=>W90-!rqh95KvyBE(Lz_ zK3YKg^X0x^)>=qgwI7kNv)H@XsJ;214~m)?ye>{XrGlG6I4U^RGxET+w<)qI5z>nn zlebj&&4X`g&c||7WP51G2N`xS;LLLAl~G=qk&8O>IXeUOA4$3XkcfTsN)FihY_QT9 zO~-7es^WlU>RZ$=eC?xMKEB5hhb@)tycxDU-^6kBsdOp&xDnT$1pTWc2N@wFWPkjYXk{+(n%YQNTBl0hgWCTFcY-YcOkQcSztxKtMoBFi0 zW-D?B?cPvvT|U2{b~uzv@mjoABa0MOkXW`Hn}*eQM3FUg)Y2bXt=+orN;NaR9+pq| z0P`@Y^%*NQw@$DZ`nY|mnG&Ang(zCu9-Jv^gv%|uc&{$1xl?{|@qMva$1{*JX}7Z~ zsRcC+yd7&5UfP;JKlCs3>`~M_xwYyX-IK=gdgu9t83Sx>hc#{|Ri>WyrPE?J zb9=25D#9z>F}ZXEnz+$`zQj8N4|6F$-WuyCMiaTw!^$ve3$mCLc-AtPD)&2fflcjO zVh#Z#?%@jDGGylQn_pdisOY`?U$F=tQd<6Ex&(EVE3;P=8`}KA$3J7?#5U0DRbO+r zR5Z=z`P{)DuOF0i@jSy`?wocp&r!N;PTHD|7h?rBg(=VyO#WWUF-kU%)@4$rKEi67 zZ!q9!Tv$bcoJ;p_WUC7n?+UX{k%5I>AZE6lQ0~5a48|7<(4h=3iofU;iIss!ph4oG z=Ir&(-HY^J?%DE51)9Ql><&<)>88Awv{U+%7b+b42gwem+EW;z)w;}XR8IS8j%j); z#FZE!Q!rZbZ(?Z!S}A)I<++TlA4dzUia6i=iq(4%NVPG16F-gFu4aSo0uCyA-c2r3 z>IeMJlNa6G%V*8kP(hVy^Mn5ESDD8aKWYk>bYBRsT@@$$bN|2R7HEBD7B7j4uPd`V zT9|x&QtbOQ3lBVY$Tr>(bzBskPQ*Av1~pH&^gET6RjVdPY{W7`_bg%lM&QcRg0JO{f*@2xR|AHY_&uc|IKX3?|qhA0O(g1ouoo>oa&!C zm4nBPIUiQ`v4LK&uq)s_K^K9Q2U1-HNl@3zj+Ow7%4in~KOqy=Y|d&O>#$ zJv^GSQIG^HsEo$sbbDgSa!w$b#PX4#6BtFaE^Zud90LpPRb=&ZwKp}j9v)*@RYt6^5RsY?_f5)_qM(_BW1QM`+A9}Ihvp`|Hm)Z zZ=jaR0@5CSfua401iu?f-^-!&Z>U5_lYtqfj72GV0|aG8HQ*lQNd-W&R3Xs(_t&~X~?nwPEe#M2-39$*T>)722F zPMeNcyml#d$rG!+hnO^3MMWB!0n5EfL&R;dxL@fcJt)z_bGS6g&;W#3d}eedt<_m- zV7u$&3rlBhs?ox9-*Rn^H8*Tht$_X41t_2!+-l(``{CN+FTyh2+d|v)-3q3~85-yZ zgw|SQ?0xb^t69HfO~oU(u$>`o`8RNuFRu0D%)WJ(cH!B!UDix*&ij=_Q%mVw6v zm8y|eic|lGG6jkKBl3InAdETrAPfl)+nOG&NZ9Sg^>wSuD0KQTZPdXNrjN-t0yLnX ziW93t%UcUJD+>~?Q+Up9PnuI>^C#c9(15*+L~nRoz&QTY+8Xk=Pe`8jXSMa%HVeF2 z8+^-0!sGDuy)inD&W`H1E5{idf#HSI?JCV0O{l0dzXJFzewkJ=!AZ)Dp0GS9*T*8H zc?7_)r)CezDeW)IgM`c;d_u|ILw4b#$@4qO2{i^R>zaixnEGx;H^;^^_HteZ_=?5l z)ZX$p=5j^orLb{a;f?jTb5KO{hdqBn(x8?s6rOG(<*=-@Rz}pf{X7IU{-cF&2iT^P zj=HwxdwO|rPsqY`bKGkSW*C@1B1bAMtu zw~ldPde;=aib4fc)I_d=CvO-j8`}J^v`t%@Y-R`!;C?hP*#Q2;HKgqzsTHp#K4}|x z^kK$z5>FY41I~KeR}UA`jidC-CzDnZsoEWEHStU>!4u=CCx*vL1=*XX{E!AK)6D zhF)Q?Cb)M!_LnS4zEk<|Cb`^m6N58D}<>7{6(ntRju+4~e;lJ=81FrWN_RRslOElwO^s zahy8epcFUc7VC}>;5uCO#7>Os=hvqH&V{aBJWnZh(1?ydzeMTZn(z1K%QknxfR6|Uv_9m-*DAsJ!=P*jM{7O~e@~^~yz}J=$>y4b)8+U|zOxos-S)$q_gbeA z;~B{|L1G%vv7!-hygz@2(yE`0cqA$&Wy14#*#8aX*Vnsb;uyTC^hzjVoKCBWGtYB# z-{fm|P`wBaO)_*EUr#LYVubrSgdwN}y=`(sO`Q-pB2wf7*g!Obt&}UT-AjizEu1Wu zH{ZF|9eOR< zpGGi?bqEUzuuKcfZ}uqqUW9Y)X%1{RdcHRxD}Po#)rd20#07Zgnckj1t)B(M^}*S=7x7C1(#55d;1tOso+Wpn`_ChL3CFo%WECOmEk3dVapj>g{1cNfILXbPx7n6SBC}WqvBgDZVdBx$X3z;Y|08;uf%bH)^!i z{THyJJ;M=v7Pn}*K9F>7sn1)#ffzrk5i64DW1_m0fP0}PkwT>6X!j<=oGOf5` z_^~cnYD3U@-XVt}d%RDuLb;=)#MQ+;8AW4yg3?qQsl1Jj;IGms$+z97UDZt%vgy62 zg0lG46a$9^g~T>OHUm|uba@%|re0|U6@#i1C#n}+X*T;1`#Y&+!-Pdl<58{U=nwn zCN{gb{8DAel*rg=;rBht_&U^_p+*oLgI)35PoM7DQ$Qqp0QQdkKYu2xB0<4<-d4Du zef5%1Nh2BRT;P;D_0Q_K`8Ur%s~#*rU@!^$nB(|429xhMc`!$(1tP^N<;rPs{nLQ* z562*_Sap6{A+bXzC90Vh?7@g9skNY&XyO&v`N3>%m%^K3IP0_p;0a}SJj>XJk&_qr z69Y}E+8c(f)gY48x_kpSjelYeeO-NZfV?Rv*!^kUVctuhG8|uG(<$Q8T8mAhP+1bH z3(!FQs2WA4*QXu1Pt{A9d+zUoM5s}^ISaC(Bfnb3&50BiGb@kaaaM(45;fXVKo2c- zx)BII{*|KQq20sk{cBaoB`I;|4{7UH^uEzPl*N0NBcE`|@N1vXgcoWhXswyTMT+gj z7Q+@rU}9K%<8d@4HYwU(=$d#SJ`7JiDP%04inZU)ugq&ZKQG5P*Kx-CCRU_XyGKJ%N;F}C$Ce1Pv@qVZLoSPAr5Vk6^_ z*%y%cCaW>~?mNnfP~Z>p&^G#j1Z8XpXV6{Cj)YX!(kKZFF3gh&Kd0+Nj6?JC;e)oZsJ;hR3)q*Y? zMrfyOrQZDi{*F+9QuUglAA-omXSi^qVgPtd`zFI}-4{2=u52Xqm-e7?TNLz-QCPUY!!= z^Nu*=2y#Lei>svyJ&(i^=Tu{@jD?b)a74>a#v?u!sdPm=|2HJ>@=2@O%44ih^2;lr zbF&49zh7jEMKYQoN9JE~16d5flA=KtkyrtaUn0rsW=aG`3yJ7sg+6($Z9V%%9S7@K+1lLU;K4tu%MW|hT33jiS|M6ntOiY7z;oAe1kL3g?7`&YKQMKuQ z>6QP8y1$ED7ywo+ixq#yP3Ua_;cuf!{*$)jeu`yDZ*zKT!N$y}Uj^ytNb*Nij+Yuz z!KV<&9}-SNwgw$#G}Buln6l6y$fsdKuR<_rVEs&&1%z>5PZ~ZX!=Pj(I5h_$qy!1i zP@)7@8CBJHC$)xwTU~_{JAGU;jcr>=al%O>c&NQXE|E>Dx?RU-bMRIYf?jC#)gOV@ zuZEh&TI_;dL6=nev-PgUqA#y7t}IlvAn6mekd;;!*Ku)ysgQR|4}ftXbk0NmfC))$ zBZn9?uj+qkwf1`5o*>Zj%3EnrA!P_(=-Cef!aXg>Y*=;TOu#Qe?lV7{itTVX{Z!O@!J`*r z_H43GYND%;yAR}tY#Z(Cp57FP&C6F zAs2soDJ}lEiNBhdO6E2v$Cbx5k|sfEg+tnre0j&I-F12R7)aIo=%#u#J~5o{Kz*&N zRNe>iRN-+%0*+Lkl$_US8WL#fpGv`{yI)Hc@r43ZS_J@jewC+1h-~^9@)nC5XesTG zz7Rt#X!FUa{_5F$YhcdDkzcF2pb1gm?Kysn$sf$Um2XLf>a@jRDIn%k=Uz2YY3khb zy8glFCU~>sI{4GhgG#SClDz-iwN|V#JgNK2&BStIryYJ7A>C6O+SkYbZ&7%He%kIQ zBRYK#vhd}y=!{C=e+HdKJ0YE2EkQ^W*rM;7Ic&fp6V@Na94xBX*aT`X{WI+lT9bZ` zX>jEz|B7}DZjRayA8XZD#hKqsb(5~4lkS#yyc5qesw~7$TwH2Fy5xR{8PvNQm&NVy zI(k&a<&ab~-g{H8bcB^^7WZ$ubfq#)~HwfWyb<4e88yimCf^X z8Tr^p`gFks{%pXva@7OZ8%;VDL=%nKT{?@}sy_z~K_`vT7{MGk- z_sZ-0Qs;>OsVSc!)eB`w@b!z_U%2%qEh zVlBOs_bU!nuO2;6^B-~Ii7ACqQ@H;?7 zllt&p0H=Hqe$f8=ZhIwOEG8MOhP&IVgFqSvS4Q;9vpuK}Cn=~VcRvG6@`*q;1^NTZ zFb&YbT(8D1GpSaoF}zT4-TzVqbkA^D?WK|rXarBPMy4K)L_N6XftS-fI|WLVK%{Y( zIxjESdox1Qe>XsV=nNW4lI+*<*Ocm|rFcB#`UJ`QYzdVcdsq_llkF(c4$(#81Z&&b1}lU;uYw!X z5k9ijd1&VcA2bI|(Chruu`Gq}$wFFJ_du|#a^Yp9WtZ(`i zBbgA~Stw!3kU?_oO%^b;Sq`7cO!%p3%$Q79wvwjSRnD(A=nt zYY2A$bK&Z8t2i0Tg%|*p=j0<|OfE}25(%q8mZcBo${MGYo@=SDi)UMsuIkOcp`d9S za;L`Qhi^ZdjHozb@Jw16sdKhwF8_4osV*LI%ItDaw1r=sELy~I4e>Oa4@_Zuh+_S# z|AaR*)C58EIYt(Dj+cUEHtrpqE}A1n`n1+^&;oirn#S8r`~9UI_?)5@8bG;s4HOXdpN?SD zih}mSR@Ntsec-tZ-$38|mw^MFNTAq>1vvPuk8}B9Va%AR6FQ$)hDtU7W{`|v6GBLF zwr9z{G^IVC_p(4dUQOq_))*9%d4$ zJhX>BVd0J%2!5hKKbYbfgAFhZL1g!MdHO^AB+0bD!Hwx%`bCVz1HlcQSRtJ_1`zZK zefkvgrb|9+9I9X}^IYR*pM%R1DEZ~S+S^8t%Fxb3buQLWRrZK7^OMYm20vo5hCnuG zY!0<^G5z!77Se|5U6vgKWzO@OVFp07yb6!>T{Gm+_L91(`3(n4~ z7U85lUK<#*A}mp8HH3uO40t&Oyw2^0surl#B__Y$bN%mbXch6k`ZyeA$L`cTMHrgK zrC-%sK?Ir_+HObtkOV#qfj>>jlI*)}sIaO7&6;%G7V``-{#5jr%WO#ds9R^Ewp#;4`m1Ie`+-fe4 z&Q2lzPRpvt+?^dy;*m9@$hQ}%k^R@w0(DW-$zua2KBcPrnuLy3 zxIHK>KMqRY?VUetDKf0@Eii9LL%m}Mo7O<0Hp=`myK8gv`2xS#z&i; zCPuYVvBM-(N@Tl$Y&^A8_naJXk? z-+QmP*7s^a-^z(1LENZc0SUhcQU=nMic55z2U}4he&YB+VbAo|fIr%uPg zTRgg6Bjl-94MB*& zNM#Iu795%bz33H-!uFe4`OG@O=ZsXPD&xMv;|A@&v7e+sp%7sQ(tlh>d2M;>HuU2M zOd8#(xb$TK^20^@z8qh09WeH;9T(VRAHQh-{T^<@6)G5&aA4Y_JGFsxNN?5T=cKerrti2J~rUekZ*`Bv|;g%z*xO)OT#}Cvs)OR=vj(f^Kan6a}7h6%d#&c{FD&&q*?uQ)o zDLQ^Ytf@}7(|Hd)fcjsAx8~x>Nl_5ISK?hC%Oq3NgyZ%X4g z>#Ax+NtR81>aqGt2kAv|M_o}FHj`Gn?THm%Rd`2KZbh}GrT-9IBIDUI_VAbA9ZcBXvPRnhbe?w3Ctafi~*Pl2A zyDttnl&_&~Zk~_-aIOvfaJsvNvJ>StUgI{};$%G;ts|IEdbCyVmTaEf7S(KPwvip? zbi8Rbu^dHXA7~=WPJ;m4Rm3+Wqnti7PvP%X<)nf;pHu65i26)k7^RF__3$hUb+}l) zn=BijnpbuAUNPfAl)$_g{hT;JMt5Q|lg&>WHImYyDwCJ&i#=oAM0D z#quJPQSPwYv=Dx0;T)_>Mwa@hay;ux!H3+jUCC!4FQi^5Xt}HiS(CD`Q z+FaI23)NiwF|YdLzF~8h1tm#V?M{Jpnceh_=*in>x7$N>Y}=#WqasD#g+~Qx`FC|i zREJpCnk+(}DrgxL@KF>fi3E?%xFV}7Y=2(I(%L@5(r2Pgc;3GjH z-@-T<_A`!QRu2YT>h~|yd+*{`^2Vm~_K|hf)FC;IG&3q}ZSO_l|Wg_rHqc^p9$h zKhK^hZID*zLY2c??Be^_T{E#)CA*H7ieQgKr|dq5RChfOl?IE7Ov0b46Pzirl&q#) z6Psy{7|4YclYpz-8p~_co>ft_-qIQHUYEbQHCQ#%cy?`YC9cf_ z(gk?F8M=`UaGt)jZ+6ruc%(xrC~0q*7;xFF43vvXb2uyI`RrQ( zaIIkb*;O@`T)XA{Mq5`L1QMeV)6F!|pSo<^W;9 z@Ma-bQ;a|(6tkv6g-rQ&TUjD2eH<0fyEq+yF@!H3t*A!ZvW2zC$s<|w1+6Y+di{F*oq68YeactGzyDU~l40u-AwPu-YYEB{C=1F?eP>FnNoP(u!S*8i zoqlc-7ViIFbUsK?&0ron_C{R3R=8??=s8Fv|3bw8DLen;Z^fu&A!h1^PRPUu8@B49 z^k&oWFy)}#yT|CWs}^TlZlP6-R+s$+&|ia2A_9{znJc9O@9%g;S-;j-_DRo#%CmYP z$7G#|U?W{iCC>?e6p$)^HZ2pZDnULV2KDTlNROlFtDQJ@-|6xJAWvOyS&8Pmzd$+_e?vmFM>c z^#w~e_eZV$-Q~(ayZuVOrG^N}+fI?NeDdOl`%w|8k+=Km5I1ju6~s|Rpp4U8fb%5u zANk`PuW47jt0%zrkt%GvPUsmMfjHR2@f^7Ns+G{hi{B1YIjf_=or64~1$v1|CJTcL zI(2(c3gbrcdCD@?5J<>Kwby_gdvym{cDd6=a$vF3hMpaE_Du_iK5tX7+!$)8aQGXm z&2R;AaVocP2};-P_V_`4Fn``Kd#Swa@fTWZ=3n|SvDEE`7j`gn7Ve@yL5{GWLZsbX zuHQ=|UY0wK2yIYaJ{5A^{c7%MF;O|3e>zlVol@Q;Dv3iWVl)$R4#S6nMW9!{B#z^P z(0ILGG)dfTpe2s;IS-p3&8QRI_^N2HpRHRaD56P*s|lCmheXF=)_E_ffIArx9+D*H z($_$bU30Q7N^jRO*aQmSn6!{ol%j-?)QS>rodp+pbHnB^YQa*OXW@*x(qu;?BxYuo z!F;8Tsv~xtpix(yI;vYh7s2lqmFxk0Zq9RneWx*)we3x};7$li37Ju`*C~%|QS?Rf zm2|5xONRTl;#$;Bo^Ixug3?^4`^UV$QR(4F(q!*i;#i0mp4Ltfw&QMWNEsYWTW(ArO6-Jg1;7;b{b0X?CvS;$s@==6fZlML zS-|`m*=`B1sA}d2U#B-EC1^M}{pLGpJQlg8QSlge_MP5t=aCF%1yaZC(vd7ay2FMu zbg|U?B^@%?3>qkkfrxouz+!;RY|u;Br&RFeuv%7sM&@tpNv+$fwV{NJ&=B3jg=NS` zEq&8OaQh`CI5HHvl?{jX2E*)wi2{zpYL4n;ubO8S33|fE?*?Us#m_h@+CM*EB*>kG zbdEp@uhL%am3wc%HNNAefFS`4Y_^%6dudm%Kl}dbr3Lu|C%Z&B>Ui+KggdLT#hcE| zN3hFpWS5fOkl5O8W;|lAdDdiw$B_6JvCdy!&s3GC)yh1Job2Lqooe<4NavKz_rLFN zo>$$|-?^B=%ny|!LrLO-#yqmi>RfYiwvi4UFgYq7qkzR6y0L%^ak%>>kLCysQNSH9 z8t9tZ6E2IC{bslfu+PYdLWOIHOsMXIuQ^b~n(Bh1NJ!C%E$^ZYPYcyIH+Xs4B_iqD zp&~|!!Qa=QVL`IdW9WC$7Q+^Tg1tz(^=hz}N~Vc4gpd$njTu@G8lcwWy}zBjwyQwv z8WO^`^&`afr?FHvN^}ys31D@jL?1D`wULq{gT@M;Yq;QasZL-eX1T=8Dw zs&e8DDY1zU2hs~3F4jhU-n%r<6{wRHT_dd0sFxMQ`uK&9q$H630NGD+dAgf>l`HS< z7{X4xaJE&4#}$5B%c5Ed{QI3P;C+AKMVxG^?Q(@LdpXw-h8k=*{fcN8jR5rjCPOc- z)V?RvUp_q$hPg2H^{yq^i0p_UaaCC-B26gY7+E6rV5npW&RuhArY<^C70rRMns0&vl>u zoZE@t0)qzv{yJw-aAPl?0R8)^TN(V{vwx7>7H85U4fVZT1~7{d+37+O<9$6anD#;W zd^gxY1TBHB3fEk7ZB~2w#az1IsxcZlj!9x~cG6@6>0Dw;;dH^H*FADdkIi4r98NU* z!HuhyB=R`u7R$^L5jv|BYdk5hi30SWRHYu@!MebhAm@2=&bZ9&8cILO2iZURRUeKw z<2|I=#v>zV8dYXYEP7d1pNl^j(B825_B7p!w4XOHZvLTtNo0^4Kpsq z@67#iC{?~B{VWGtz`hzUFBYhhn*AuT9DULhO3Ajf*d6!l{mq0Xa%^=ncVtx`yJ?OF zrk{>K^t|kmK@^$fyYrh?{WdO8s*unqe<+Hm*n3B+Tidavnn~SC+*lJG>dVCNtQjAv z^;6(L-H;5z;k`gt8Cw!2Bz@4dH1xE`<%^h$uNo<(SR3(tH?gkz;*FldG;1mUu#IrR z9}^){^Uv~%4?caFC^l7}b^pZmi{ICS@=8zv?Qc{rOY@C=eqX0bOEl3`L)C<_GD@{Y zDz}%P$hH7l^ZW?(MphImsqg+hxpf=@P2P(pi@6B2*6ZviolKvJrkW?;;dyzd(C(^K z+^Z*du6SAS{dT0(c=s0C9B7r6JW@Ibr)8TWF`N7n#P5Ia!V}0|*%YXHx5~dI;4g18 zm0yMa9wa0M4eVZjs@$ArA9*uufL7KX&T7vF)NLsI!E_a4Q=O0W;s>tkyTL*og6(7@ zp#qc}$d1*Udmwi=N856A~upLDygKw47z4OT+Q0A6jwu>DFZqE076 zj!?P*CQ^Xf$Yt~x07*&vomx#SLRMo!h8N*KhW2-t`_q9zus{+@OQ9I9AC594Ja>hj zlih}fhqpHN>U&X$oCYa=vPhfc2&AF$4zvEYWWg5Qi;H|ru22*B0gDv#cU2Fqd?UHiudfLxjavg+%v~cTU>5YNA2qk2q~c zLmqd^phE5pFuOVVC+(0EnaapsrZV}zuFo#4hf_S3S4wrArN1qa&!e%BJF(bHLyGJj z`wXDOitNpiAT7399(#OW6(=}!yoHG@2~aQbJ0{J26KMOxSWk4QW-+ptQixZU9Uvm|#uO_=p+qlPHriUj{$L4G9N_NI?eZ-?wEA}?DRZl0bWil6k8u@N!h{5&usVWBufsFl(ir4GYAC!~v z9Q-BRuHmYoqhGX)^wx}3^jmc?oiQ9Ff6#@#Pft2Bb`4YU8q113x!j zv1p`bxO?npeJ7u?cf1iBiOsa$-e=9#I%D0XjQFUU=J=ON#CMP|8qnzjW*0AyvbG(k zrQ{|`4}uph4!MonZb^UQi8?Rtmf$rGs)`zsJIP93IihAizl$;Y^2H}veTaA8}qo03L{Fl z9u3t~EDFz$c1K0<`o=$n0@JVSM9@}kPxJMz1Kj=(s6POd3~)7ncB2ETw|kxBpiA(?HzN+gsy(UF;{=jVG#wmi<~dWgcK;A~YtF$)on;cual zv!*_qsZ4QGjofV*ZY|GExOfucv^j#~o9gb%|3vzIQ}90LfpG@~?$}r7ja;5D2S!S1 zaMH9Pck7%;gJU0D;r2|D(oH_(@z^#BZa-W|EV7s+Do&fLNO5Pp>D06)-YY?!3B!7+qr+kf9yeY`$9gQ2WntG)T93F5x2?-5 zLuB+IdA6&G^!H#ch(mMOmQ1t;V%vT<4Q1_HzLhs4!Z~6{|A(EFd4#rAinnOAw}`_^ z{JzSk&4ekAXPiJMdsO<-LgH*FqB;IJc!Nv%kG1g0?+z+1-D;E1*|+d6g9KI74fJ_N z((;|n$H~Rh=xizXz9IYh-5cbyZ->h)CTFnG8u2H)A{S5UX*5F%Cqu8JtTK;=m&Ib0 zqH^)>9>w+w9%XtRGa&>lKMHREC4@c!D?Wk~ z&h@{~tY90(&%P|~>Ak<{8vKld!!JJ$yV*Kn-|9!b+@f%a8&}lBV)tV_Z)gp_zsXvH z;H)R6wmBv*)3xOK{qn3(Mtrr*T3Ia_4SfRW;K8Xj8}_jeRX&+={4E$%4nF)q@pLn?}{JR3qwPb=BWbj zyS12Yecy$iRVJmbIy7&W{<9oQGLq8db~yc}&S(rBwSO)~B05j@(No%d)-sJ7z0FM! zx+WVL-RH8O@94mETnCT;;kDJWAz7c7KL_A}hs#xr>g&#n@NrP~!Yhw(1*DGO^x z98Y|}`+#Z5S|j520UjZAy7m#)MOu$xu82CZ4vX#t!Ht`0uCY@_PuyPn z0OtHdGJV>sMiIX$kJ&Z*B8v?o#5y0JWJ~yC6DzURh7Z}Z89BpD%ZY{`YRbbujkEp} zzrw;Qz4@ZteZ?N*6BG~-q-!1PpRukO?e814+6G)5;RJgjL6kr7dscCR>Ulep`aTCA zW{3bHoH0qlqFe1p>pZ$k&L(^_CIap(rP(f&bcd9Ok@=4pNQ+-(o89?2-VIBpo>+sh zE+O@pfZmi{@VA}7X#uMBec!hv%nhuPSe2jFWNK#6B@ew-KJv9D`hPb><0|I!I2bc- zHH{rN&N6+0sn^ClnR9WJa>1?h`p)GNRDgqVDcxiCCfe5NepyXO{*DLKlj)w|U)Jky z5Cd<8x|Gwvl5gaFi7UrrSd4G(ZQ?xtAt|MQ|7-mSyjJ2ps2kCL+^~jk()JY% zVweqh#NT3mF6$bCFyGIyuORkT68i`z^1Q+9TjR6WzrVipyy!H$vNlv$kB2hEbB{{Y zF=v=J)Yt`ZiRB2dQN299j`y1H&W4BYEwmtAg*HJJ;NHB7DMGKb|LMl zzekC|mAHpB;f?x5<;7%pg(K(fh-e8 zEcxsBCCC9;U*sLs;ksZJ`F*OlxA#yzc=H!Q<>|q76ei$)T3`saJPjBd)N@lkpVvQU zkdR2DEatS7tS8UOnUUKp>b!D6niGfgkQJko+BA@_W?Q*(Trd;`im%2U*n{Y

    9Bl zu#@JIbwN6TZ{BhyREm@~t1%esdZ-LyFmE0&VO2N9F8i|~f6R!hLRHMRam6CZk-QYg z=~I9NNDCxMg0b|c#!b^hCs)1)7T7Wp|5I#or1KjUz9u13`jB@kGTuGnGb&e8>v>RY zqR(8qR3~@KH8-lenp+^gWaMl<#D40BP1^U?e0cF8ei8)PjWme#iWkkKyu5MqO2X9^ z8^(x|r@!UN6_dD)VqnI{hgu`_gs6Ms*Yrh=OlmY26voaF7wq?-PgS=QJK1 zGDFeK9dzb8INn0BNMC=W7lm1Q#ElvkH*fS^w0_d{mI5r zME{}6Yurc3)FK$alJ7=gT?~3&vmjoTvB@fr4MU+^#--CKZ3#FCJ&8B*6#QG|@I9)Vp67#)xlu;m#ru=tg?F!J>HCDg@g2GgB390QAuo`>S#Q^yW zeHb_qVe#$DLV6Ef0cKrj#mS-qc=bs!Q^j?UD1Z+jT$Ac@gdP#~0yae-)x&oGLnftB zT^a%=vp(+Qql{Hza?9F7>uAoq&dI{x$a|7V$CU1B6jr=gMgSxOemF~XkR{bv~R zKi;z!j7{##?#empkMYHkZ^6cZ^$Z=0R(94j@yw}x>}JTxq|-0DtVcB=x3V5+GO|x1 zm|NRRSmge|=yi7iz2 zvP8~5%TgeF)|exR%<9?7n*S4g|Nb|a=H!%*oqA^4=$BvIMLlM`3NE#kTWBZCMYrsr z$^hmMJ(=p%#n$T(>vwF-LFKbVXrI;@Nd{(8+J^ldMEhWz^&gp9<8*f{S581gm_8Ow|Mv zw5hxG6emp!S!J}g!eL2@Dvlo@7% z3NfKNdI>*6+Sw|XCerGtP4e%lrrpo`IBV3j&|kFGU^#dR&tKfqC--sMT5m6!9^x1T znZCYgx24dFbUw-=Mgy~siq2tA&Oh)eI|X?ehjuj*Y76@Uo;pFTWjp?2n44Xe>XtIHkg z1^HV>M?+)8accjBq1PS=$H>uH*pLj*ZoC^7B`t zI;>^wL817QeFKrZHuO1^NE8#RBbBo`Pv7gz!Eh0)D^(Q@0p)iWgrrwB>5nV6a$+Mh z(GgI_kd!}U{fttmvfypV?@^TgAQyN2zU-xuQrsZ1M!4cUQ`kr; zkRtBp*e}@Fbi`P|Ks7Ig@1fv|wqdy}?pPh2)%*1JMm5cbd-TI^0?RbAYy+`SQWxIa zvD7~8bSd^Q8zxij*!Kv`-TaC?y$et`#?}^9zr+WLjlJAy71gM@P=YxOirA^=MT3Cy>6Gm$&^ol>LAj7GEh=jCjG$$;JZN2Sf5-c_p;oMY-i{0W@0A5}g)b*^|I z+4^@#zc*w3vQgdReaSXkvRj_ZQOMivb;Lgz*hpPiXI0&rGwQ z4R?E*P#jl-wcVY(GPhc<*);MRXk(dp`XVu~;vTeXOYtW@4AVd4D8EKXf5p^WT|hcw z1m|9%;H)|RlOJ3^MvQfghDD*InA*U;ZM8x{w_J(6}0tJ`?SK57U76X|Iv& z*5}BZ2#t6+fFtY60Nxg^eJlT-k{5nh@1B8Og%OyxC4UKeC52pj$-GGSvsPT1qJn-E z1P>{>>1I!+?%%iGSWtewYm3-wyqAvjue5vXNFbno5-?mFv;RL2Pu&NC=u}iyx9nj`Z*PUv3eeKvug6r>f0D+*gApHZ~JU(n9D7U{R z6zbOo+o91GU_aPd=v4LcEya9cjE@#!u(46O6T0j)a<#?6tvSaL?b8 z9kLCZQ`3g6Ph8&3^QOx4*F*U3Z|YUJs1IxiahjJ=I{YdOcpn1Z!9Um5 z7k?3kemof4?NroWbd}JZ{%SGgqAMUH782GLdS;r0lgRYNR|*wlLZw72!SG%Cpp~0} zAG(ldyyCooA%Q85vc*DsT(`aRVh4gAN|iiGCg&)ze5+&S#kB6jxD({7s)GR7ayAGf zMHMrrJR{>O7())KsJiQaLK!Rwj-}(l;I9+4nYt&vV#s!7@kY%W%~k6cH-`w-oCWXQ z!naZJ13wSe&y-NpxP+~PH3Ty-7g+o|qFX=mXWO37%9Pw8b`zpQ!qz*k`QF;pOC~L_ z=iRHO`-yiGV%D+(?UpfIiNo6OiZr>e7JKv*QzWVoz1Ll8lfPgw&ycPD1i@^ws5lDm zAH?!k-`>E`t3tOdS08WK5;x!2|=Y2zSd9u5W|&NzX}@=*V#6mar++0 zpo4I%GnONP^hL+YJ*Y87fv{Utc8;yz$>?n%gLBRKv3b8^-2~|m6%TbW{hk3ui@!9o zZMD=sWq&u9BWoTRYya3uMAV6zwsSL4#MzS;wdRD#8?6;q6fOUjCKdLIa!(Y_%o!q< zpZhtkj$BZ+=*P8%%0-5ghyefg$@e|IAh7sWKg*+l5*IyY#Rk`Oh3d=vxmxPwY2!OzfdK<@ zBmuS=hDxnB(ZV>xv3))BLk2gY0;P%aZ`Tj*`S)W*%133EqXR<65N*6Rsk_TY%*3Wl zd9CHD0%`yk8rrmsaV&kYF%1b#e>_7|Q6QW*IUVrdj7e=x>eW>+S3l0ZuY4g<8jZiJGKqrBU9B< zWzs7!%@z^Kp!`vLg<@86PgGgBmvw+IeEZLy%!9@*hu ze3jQMe3&|HgXP6O)}#|f-s+&@!Jbs^EEyE=TOfH9`7r=y8AvpH`GHjMbnzzaB$b7z~NXL%c#JE zBi3QT)#R$--%<(KP7PwF(nB*;z0a3>!Dx&_&tAgqznv$Ss@K zZqSqbR`2^X3{bOJw-#udr$5ZR3Q{sz7Oe^dGG%-hw5QWPmdlh@|0z%--4;D-;(Kw& z+3~yKQY4P-7Lzl#OmMv1jTr8WBh^M#!;cL1cXe-E`tSi$iYh#qOh( zhA%Xmg4wnY92x`VDe+RUOcdwm!RL@F!4jQM4Gw}j7paLd}^AT zzh;A@BZY3X9Xz?z;xgkh%HyIYWg?IK-ivosq-gE@gj;}6pe5hSS^Q7|ouf#od{Tv%u(*M2qROd>a{gQqC?Uq`c8gW)&-qO)msHw1L5 zKR4W|T{rh=|!hP!BZMVhI8MOruSCF!%>X+2Ql@NNy&f8YsGdG-nACteRW zrLa7(w|6*!yl$&*?eX8o5Ns8c^QNCWGp#rN2ga>>yLk>9Q8I8ISmyhw*`s|qW>v?=!ba-C_SDEMA|L+){`P| z$c>wIs7-J{*SrD{MAb^D1b@V#=1CG=$*of}CG=qC;-bNaNffs?zWMzUcH<7)2iH70 zg`e;}V5UO?IDdAItH-WWU00m?_B$bu$Y0J4%K3dtcuj)=4L07bdwoP+!Z zNn~v8s@{3NguCVw%AeD}|CA9jIAOA=ZO+6S9+4+L6?$zu6atp>Lf_&;LwB(J$wGCF z65WM7Y(rI(ME(d>+k{W=)=M=Lh&I&QqI45E!)dUcZ6ZY{$KmOk#xVMg%Y&m# z(YRC}$@GLe(N<`i1XD(gA{4%N7aFsnD)N7yFOie3CPtAUlonyTt?eJZka*8Bm*Qhiy1mLI`hsw=OkgH3PT*%oZzhAlQJ8O6@_wojLHrB=@j+^%-{? z6dYg*@p2af{wGD;#Bo6b*g-IG2lx|4KV)tZqO=YF+zk!VPJSd$KAc&0L6qLO&g<~1(?8DbHt2<;-S!$qOhprzL- z&5?+9$j0617T#6-nnM`2y9)^Z*(U=Yf2k~lg&o3bjK_1=w>bZnt}9w;%crEZ{VbCx z{V(GF>7SVII|KYZNY;K?@@`(7#mLcs@&t?<<;MBsGL5e=>~59mDSb;$!;vZ7yW^0i zBd(}x1S3gI2RM(#TSa&NHO64<6p|;4jBTmBX||e+>vB&ooC#bpTG+O+NE7I|S~atA zT~w2!4uG>?fSHY3>eCsnwdA`Bw>uo+e`o+1KsW&{*Foq|y+FaF zJ-KWU!I0iRwAjK+WGp3|d#PBkFj;S9;ef|B-R*D{(AFzFzFgd!zEgV+)l7o!&m6qV z^}R(5#nm4$|H8ezBj{MGk-S;mD|Mr;4QRt}j)C^=FY3M~mT5IP4-eeO0r-b9Zfi!A z(`EgBCrec0`!dvui14Arnu3^==@>YYh&C70LPj`Opm?}LI_!K}3OyweP+BTif92JK ztN#V=O8*Z0A-^M%35PJ{@vFDxi-PD5HTPwWK!ScBUr(kHRKv)|LZ}`+bsQxlgHBiU zOlPLQ_-5jPGEOYFU8xJ70vZG}KR(@WH`{gzDb)Dfh@}OLqbXr);$<$K8u7JW8dP}M z)97To@mwj7$%wdhGABGUm#%0R(OP+p8o?Cx`;^1a3$KnoucNZ`itz~O9i|F-fo1hYf#|b!*-V+C*L@U1dNJQbm4)I8nq) z^@+7Ot2rG-mwxr~zujgJd9JpV{P8so-IwG0vu}1BF-FzkIZtb+qqOL({z5n(l|8uO z%BR2xujnxFIeM3{fM>&>+w}yArj}z4_t(V%v3%LN{_j39|7=)y!Pe{8z^)rI%xy8* zR17Y73g%xQz8)gihXNb?Qp6Kv<(vavuk0mVI39`BeF}R0|9p{phpG2#RMs#4#A?Xp z*Up{ybncL3hv*#40Wgjmlqip72p(?bUOp3YKp+D zw7r-m8^_A_-liurvD})jA1WW6CtettcYLSD zODz+ysKlP)P)o3zb|>?1gOF?QE4%s|%)<)LWmdfiXo6-YsbC(SeXvU>hc=kh98iNMYAlm7A7a7!?QD zN(pu+iaO=F!zlWjzXKe@l`h4BP9Q}Qc3h2*C==Omn{C1-6|jhBe_Bwh3v{Yt#r0^_XLpebouEI*S&B3r$*iuUrqDA6?w_`bUI)rCB5(K+-c`Zl2JClQPdr6?v^sno~HC{Cl zHD~)eUr@H8CO&J4^}Nb5*lD=hqax5h1U$)`kQ`yYQa*zQ@OyD9b%FLbZ%D{^xt%-& zA*M4g4IvZZ*@ii}IWYXRLsK!2KKKTAeLj|2 zOtf&@d%b$z*JJz_L=0<}9~iZlks z1iW<(gOX;;p&5cOU&5y8PoEb-5=f00UpT(|Dnp zI+lc4Jy~K;Of?k)*O6L`tEwp{#&dSCS!K#&SY0JG0QK{XP9eXI4{akasz zB5-5eXgs0asRmaO^qJxB)2@9V>>4m1dpb+<3Lj0Mb%n`jwHjymBu|fW;AwF9Co{3Dqs1| zqWc7{!7v3u;f5<9xyt*6MREqY0slX)Yu8Y@rjrcnJFAHEC6`=20`-SkgC0H*!0^I2 zoD%GbkC`>D=lvz<4C2Fj{#iRB_J4eu`=}f8`dxPq@&+xF7tN6LAcc^B&ZGaABab|C z|J%s@F%qGDE z2Y26?*GS}eG7~EjWK%->Pdw3g1CEch$H=1yN!mxg1Zx{W&}WJ5)8nb*Ux%?Bix>UY zf};o6&u-JfAwrLHb~_DlJ0Yw>DOYORuTI{Sy`muHb10 z`IDvn+QRel-EqotcaG`WBN(GH(_v+RKl9XP9W;M&H!3a>;v?MgkfPa*#jJr)W{FKSQ%vLfX&phd{eFdAwF~# z$sdkY9X$9}lE6^@jq#kL_WXkY3709?_@-_a21FNfM6p&v@pFQ2!>jsiNO_E-7Fplx zBVTawPxGXc1MY4iJMQlegCiy8=`w)?8gMuu&miGafX}l)lq)O%d7#}YoPip^MY79* zoNjxmmmW@t;NQ7AgB0fARuBHYny{lX0?b~pnT+oh4Kb%~IGhd&&@AXI1j0)zFeN(0 zPj;6LLu0t~^&B@s6Knfa|f@@2?TU18Itc z<#4irt(y)~Di-=D@3{`hei_23C1cmeD-sMrBp8W;juXEK0l^Rjn`TBZK;k>HXa<-Wc(=y-<5IT`7n{!Rd;%3Z*PPVZ@t$-fICzJYK&4DAccZ3^OoiJxKU+E}?J zWJ#u>*CWvdt;U>baRGCZ@`7p?nmQEpxYb*~xi7t&m15Zuuc+AdtHsWTAYcN*jo46y zlXawv&B6LO@%9uvN1n?jJo0J#1!(NInlIrtxjJQ>y<$s{oC`YwA%9Tpt5$xpu*uF8 zL_L8EQeb90-x5K;keu4(`wou?IYdO*m@A5#rf_DvZ5BAOt&NbtMC&3pzbmyq<}p>S zfo}^d?Y7GE(-8Lue6}auPk)vfv5P7=Qj2VqE8%M(mmy$D^!td`=n>B?#?iW}^nb*u z#MkuyyZWzqh36^c%UGstJ`6lBa_A=QejFqN_sX{I&OXY~=aIWCC&uDgdb!E{FFmU{6<$BePms}GV(?dY<0m0Xg{B;S<5~qSgMMdz zn*H_WF2ceKC`x-h`I$)}nTkyi{r8mW!1}Bore|&cXj}{g#-}Fr5hXs-bMw#2q@FALpN}b>Hvss0F)sM!99T~g{ zJ8sV@N>@sq-lwp|q9fYA&J$oRgqC$3flgyAg!U385VgnK5Vb|=N~=+rRN~e1QNuVc zeSzix>pj2Us1U%}1uuG^9Bhz_3z@XC?#k$grb=$qoown+%!-$X9{3F{kU$7`rG-9p z>r|&~1}*A-tZnd>VbtoA^@>A7ym*?D`KudO=pxW57R8!1?5Obke^|NHRtd|IZK>d? zP;IVHccFx9xZL}B6ovk-Vf(GyJ2qT``;XjVFjw`K;D%WEDZjsR%r}R0sWlOeNd~^J zn)0k#S!~zM!|oRA-(rF0dA(2Jj-z510Y}=)q>;RR^{+bFrABPyvW~03EhnM{6A}Ez zj`@>2I)$-)%=ih5x}h_jiPq+0G-|SV$jz?j<t<_8*rJa@-VV z-Ho7!A0u^+x9}?8uLSdp;ng?@zM>PQ%x?CXEu5WLFk<{k`xvInD@9R4cN$KBHjI+uHy zISH%t23&5zi?P$^krj`WGbeSM|D0oDy*aG3(1N!J@)G(WZMOZ^MYW4^Gdh9p@-c>l z_OIHb;#w?`;H&!ujnY3i`o1wa)9aIvBs*;-`$s;5`anQ*7jxchFnbqvm$#U};+<$} z9)Ja%`K=!1(OrkXIn%wiGrcHOPG(}eJT_@Ak4-W$ zm+jxE`eWq>%rri6XG$Jhb6F=}RVUx9w(_r0?$8?o-N|>)Nq>yg~ z4Q)b7@#~sN)QzQKQ86v?FtEv{?AJO3Tr~ZSRUX>W4LC7M=7#uAigZyU}`v zf^9=C5|cjfE9N#8&u*h4wIiX;VJU6<9;Qa$4KMUN51%!44Bytmp>WF^^@x~b9%gLC zc^$E>n*ONYtjj@kwjFOWI^V=1B1R{)rE_$o-1X|idz%?-ymexk8$b5`ulL^FX*O@` zD*t-Py%nbvCRI1C=-9K{N?($1-Z5PE)W|lWt4?=yNYAj7Fhrd9wUj6S{ZTD-aAd-t z8+g-EGqgJ9L~WFb6;;V>p&Dohc>}6DHJ^_j7ixL|1$+S8R>Jv^7@alP+7gM5X3SkFX5nO#1gxI7H?&0m|pp)Cb zvy%#>{d#&Z8^!liQWeI^Xr32dRYNf?bU#~Tv5ZG*n>A6{F@L=1^knI`2s-80w?0zt z3XLpz%GRn=n(nS$V&>yZfsvT~&R7- zU)m$aq*W4uTqQv-?z#P(JlyN^`h~V5nTtLVW%Da9C%-z=8sb9IpXB-l?q>>X{aIGN#X0y?f~DvFd1OWQtvtyI ztdBoVlW?ps0Po;;{V*alsrt17i^cgK1?4<%Sm$8Q@{l{h+2TkEnL=w&2!l?q$+k(} zeq{A3-ZJ;=kX2FFhg9Mk)ZZpyCzz>~a9S0m&qe2q2~)y}6ns+}MbY?C3855{&UxP7 zU@?h#7#b8~%}#oQKThzCQ|#Cu`Sz~}oh0iPhfLk9#U)`__avP96^16dF|UN46y37s zVqoKoM01#t`*SEDbJ)4WD~E{5|9!8B$eQEmHC9}mra!7TK&vat>0wfxqaeYg>IC zPlJIxG^(Sk?9)5yNgp}ANuSC;{s^_TR=Ytlr$?xeylCFy`|7kR$;NTQAu#%&!2Z#n z*LxwkaisZfHU30!C^S!XC1oN#$Qi znRkGOU?x$C$d3b1A92$2nEU^H_<2L+7m0}XdoJ%iBfmJ`%!-9$YBJ>SK*YDlf0frY zIPO&R@4x1?$#S>btXxzJ3V^0MXF3n3j~zuKl(FG{s>M=l&q-_zB$G} zOe%_A=iIC4j`^b##_$96D;>5@|iZ5>k;Y%B%ry1AY`jM^02iT&gGRu~_MeI39|bhR>?>+=UB5B4>!mlrv_M z(w*xuGuU;V$OMA!!pjesp753D@#>Vy8gq;_VkJG`_`1B!!Tc;Q(8!)_Gd)2qvh_nv z4BOzI4jys+~)9O^Aw zLq}>k6OLocn%&WGyhWcHe^g3()_wfj;(c_skzMPP>YDFl1uBlqwSJqh!UMd$F|*64 z;*P~{D`iBA&wbG7Ox~fFvLdo+lgfx3Pk&v3D0=;uyWCEzHTM%TXm~4yQ~YE-?gmFD zS#^$0IitoPZ_Cxue3O-v+mY!O`eqYOfvZDa6T1wz6yfQo-rBwC7tBmY7}{&M??ojt zd{lnlhJ8nWIU}+zEA6j6f{Cq^;!nEZ7qGzdBl1_%i-4oGWiiH2VSgsK!$OvfAth#o zkw~XId_EzJ?7j-V3EW+_wVH)TULVet8O&X(uXA*i(ReMJM5fB%X8m!=V;{ADv@Y3W zE!6^7`+It2uY$+(oRg*Zm5&kYL@#@$uP=|?mOy0Yh!)R_Z4+TnIm*bZ^fxdj%5Lln zb`N{4MlQBOt$^gIj}w%|lj+UKZw^wy62Fdrnem)mV{7VsS~`ezqw{G_)Evhj6`r`I zJB?(aam(%dLY5p!L+8N!9R`bt>ItNXkedbgGAAP?* z*!|iK@c#Gv$}d{?y_RGxuW;ZUXDEnE1;#y}eJf4Bd6QXFoccixMd&Ne0J15~Z=GU2 zJVWrzrLSOcbnzI?jeo!n#SDSc)itwz7wki^jxhT9D_Y?6#Pu#_sgN1Bsv}lzrND{z zLAmb$wrC~^ue@E>o)@rR-_2S<);c&_x7E>awkypfW1nkytg&|T3M|21WlJ6v5wN&; zHrG1$VgZ|^?ALs;dXoo4|`wEX`$DR2X!kXJQS ziDcgi7h|wTq%PF;V0pgAog(fOqc);9&4729>Lk`hk{9MGc?{IqrL_ zcqTP`F4D5 zYn1mcR~wK$4j3H=72`lz&r87jEQQdsfbB@{A13g={$HjOU$XWWg#6nVvk6oLJZ~cS z?|;aF9eCFh$HYnuNi#`nOs=LJ7(C!WYdiU?^d~6L)<~1(m9(FV1$?zSu(a(g1!ZR2 zP1-z4K~FBDG*d<0fh}>R%SO_Ynz3;ippwlO>U;^`0uHDWIG2R8$&HODv|xhjR&t2! z+yA3k3(6Bn7IG6#%Ginqce1g*OhGiyYAPI;A}6Yrrl)FYp4i=p1x*})cv`k!3|dCSkaq5@z5TJ(b}IpZhXyf*Hk&p1LM|cj_t6Qr%Lk2%UsG6Kmw^*1AdpR z_tPbiql7GEr;R(Ear+J^AGkGzmpD64gM$A=L|r_3fekdLxzn;bTxf40e#;;Hr>*w1 zqgE|d+S$-j?)H`X>#;-_fPQwJSJM2t3QEnTn(W*G1IMLGEk;9-eBP$%%?T82TTMTJwe%%7q=o@sH{t21``rYTmfd1=>B%5{Rl|-Kmu~~PT<1Y15_c|Y z(aGp|2Jpxn&?nVGZwIK$lMw(3j!_PxW^x0lsm16n0C3xXS`BpG!B?%{^&?KatBluy zY%zZMX+13Xm&f_b3q`I~LJsC?CejhIQA?azB*z&H#|~7-nUcplY)y032!&Tu@dqlm zSk)l?0lRo=$BFAqO?5UvS^;Q<{dv||HSt_+b1s!f zO$-iCG}{WNRXWV6{NMO8IKIQ88#QR|D$6bx90Xeq3J1+yJB@j!PwV9AI--DiQPpA# z0Eb_;Le|-&zMdLm&43pOD`5FXD!QWRxHWDhu@`_Us~pr8f0a4Wk5bnudM~eyGvLa8 ziw)Cetnq4Ux`uDXbRgaXUI-Mc$%v_a4e6)*?|I8ne1GX;$Ldm7n}Aa;HuGfpOQOzn zwydjHx2MOdO6Au55hGmyERX0s(Qe@oi;lF^KA=H-E_Iz9`8lJ%DnAc;-u@n!ap@K- z_Ha^U06kFmBMVxA(mSNwb=59y&K84`*=lv&W*-$$C`wk`@!M6`0Inde9gy|OMUs8H z9YPOos$RaYr@;3|%|i#cLwJF%!$ayO`SGVm8Y4fFMqJ;$T})+mPNX@^XSuxVVwZ}6=6fBvzF#{WcrF9H2n$0f7UGD^!@R+CHs zPgnRa#0$AZyVNGjBovBc0sij<9#4=v{#6quCx zU!@V6KAAe`R1EU>ar~tAo^u-ml=h<5L;O8`=>J;52F}F5{0~2AsqI*%X~dzfR%?pC z(LA5OE6GI=CLX6CpmA21>vN|$Mt*e6+QHV>WBEtlWyK?)MI7&<|ISo{CE@>XK>@T0 zyp^L6-p)0LT}*_QIec-Hi*y}(OG6N?(bUM%ElI+3nKDQs(s2xC*!W*oKEoW2?3*oK zl?uaHS#|GEBwMd{KCz1)&puNHj!my2c-|Y{5G1W|oyK#A@EvEuY28-3u6ISlgK=Qn zA>qXV?ImRT(D+&CMkt^{$7C>J>$aGtg=+;jt1B1|Zm{*B^;)MeX;W`&U+(<74Xe_ z@zcz_w{S>(CNKWjVYk{6@4!uFBgV55?_hHMqzeJw2N z=)*)}Ncf;xJ5ik9DwsB^8$u=jD(x|$6JU`&HTfITV96$b$80E`Lcm*pPzA9K?s{x*)X>BDT zL4wQv;gKoImM^^!l2h*yC^1arBYj;jHLh1GdT%4ee;!AdWs4kec2!Ec8SepEr7G;b zW$QeLTZsI;z;N{Nd}X=(2oaUXt7|>Wb>WcRPsLr!Ho(T03jXREX1USkvI^p@ecW}6 zt5j4Dl3vS(89ksKPPn}I)JsKwGH__45S-7I zPw}18P}}W%pSczyfEKSE;*BL7I3gV3@3{s{v?|XPfG;hUE;<5?^*xfpA!y zsyTULH{s3F2Vd(}&V7t+$_n88mZ2`r*MB?3J6y0YU#+AU9Qv)8@aD?TM{wjz^r;Fe zhmk;{DclV$)*ZVp(i;Qge)+=VId`g~K`L z6n`EI6I}C380wNz$wSX8|26@d$Q+A&WjgNnz^?r6`^jCC_PrvL#z*>Mw|x=e&^oeE z8&D$n^N0GZ@ItcVa^q{JS1ROar`;hE?wmhC?@5{(?<%&iC%z|!*7cHb^9|!|gW*Cd zf;3Tm)Gyep9uXd+^Y1M(F2Q5G#TI#xMR~@S1`|j z0{sTt(Z}n+ouK%lj8cHO@?zz(Z)CP4tD;JFbRg8Km}~ZZA<-)p6mSOyOS?bNabC*B z?~xQIu)&jv)Ub*70a8YX_c2+J7SFvI{q7@RK^}efCkVI*zb!h(YM_`gYO=^Z4ywsr zjxH}S525iaUU+%q9)q-Q|D{CHhRW>S;z*>u*B8lRFJOAyQ+|x{d3708#)i#>{n*QP zt-hDUbz-2YE%ct4?2md`+I;l`x>qtu9jlFZ%3$PWX-;|Zy)D!3ykFfv$g2Dw(qE3z z$i{EqKe?DF8Wb^5R%9Okvw0?Ba|F7D;O=Ki6wiF5M0$C6=za>9055kdN~lL^akMEb zzfVJHe<-M;)puE0QH8ymmRstW=5jxNXnrKi%O*Qc9cN@E%cJNpzhy4>JZA5|C3PIl zFdqU`&*ab8Z5menY>!u!&pM>mV}1mHQFfNv;-RoR#(o* zLd8@|54XT;ap;`1?Mo|4l|taeH86Z$b-Jok`yu^o-7@g!f^?na{vVx}cf=?k=-3Yy z7>!5Q4?5IcMy<;ne!OkIJxax5>83h+7rQO+l&2v86y@%wSKpNb#J+UN&q|>~ z`4Uet56vAaYTy|6_c+G+@iK~mHMvX6`OB63@yFzU*4T=pzk1s1mEN=~`oP&99ZEK( z%q^(i)0dpx@@0dT!t1xUz}YPu2RVjclO^qt+6Vu@ zu_Fsa@W3y90f@5gn|5^AH$NuGL$7@&?0?M8;s;)IC0p^^i3W=!>ZZs4Up2uybbymj zEP53B?mdTkkz6Z*|O=(>Yq`c_IDmq8npKh(Inp#hb;v%Bof@^Gh zx5oLHT#cU84DzK;QD#qRYNZhdWe(S8DkKS65m%2D6KEmaq8FJ6e*cy3XZR({#|r%Q z>rR$$;t&+YYuK9QKy!mcS&%XwXK?*;Uu&#_L#85JZ?IuHtGHsWE?@Wr1!7087>g7x zs_*iYV@1c&ixbJRbw>g6|57g}%HCt&jzuk8+fI^IWw-G`j1cXSii&l8vCNkrWT!!Z zJ0q2S#nKb&LHs!bvUR=QbFcol!xV>@GBtD1GiCPnCJMD%za z#%G9CNJ@gdq4E@s5X*4|ry5?nd|^5DEW4>vugP=gfd1dxomV}d za(ub|x+9jC3y44@9KgBCDlRmq`XVShuc;p7nldiq93D`)5Qpvp0K@4p@+=OK4g1a(jKR1HyL91dW5U66GrQKmO)CWEdq4UD;#Y)dHLZhv#D7wIvZP(UX4+%GcN#e0s<}aA-*KLoqtYT<;6hvQ3_BD89-!tQ_0^4l1|ASu&ED)R-2=|vZTE!nzT&I+?b7rj zs6?R;eHH*}3)$q|?kDZScp^OR3-20Uw(&&`=M>VozFuge6GQlMAV_3VkIv6dm@5^12`q%8B}cLj(t)Q(4k+Ze@Cj9- zwYx`G-|>x=|7ACtm{{MR5Oina_gup%Dz(5*Q_rp8J-t@Y zv&UGr7;0sC*Bh=?6so#EW-yU*431zpi`>`jbM8jbN9L1~TKS-D<&6D0U+( zh#vnCyegd!rtwdKU;m)&C!6lzo?%4;Zh8voW1y9Na(*axkm8Zfe7-d0xG1@#ktQ|+ z-FgK7&E!1DqkU)L-Daoyqv5CB;$k5hJY@jq=&aQ;+KL z;Y=Os_0ofb9ofBn{mDN@_BzjXbcV~n9S?aR=^~km%{FT+Dy!I!4|*-tQuBfypy}nv z$xxZx*L`(%iGGFDb!hVgO{qh|+mojP&ej+aiB31KmBjE&3T-9y_kcc~?(U7BFKyG; zX7`PvQlF5HvV@YMJWUta&u7?`K#Oz=1H>?lrEJ7Bj8Wbwv+|nnL4HYhC{9yqA@&e? zvQdt?@>b7nUI)|+Yl~eAczw(00ous=duapvIX0*wNu?yZZroe^RIhK5Kqz(GD#%c5 z(!tX9mT!4p|001sPK(*H=Jf3_rR2gseS=7&YwP4Nzw6HW!VT<@k|ZsHyXWxnVuuiO z7o0n4gTA1`cgf)VT%`)4aQyPqfyr5eDto!c>k~wuLcn*3%yR;JhKs)C2PT#lM_bb+ z$RoF**X4%;xT|HG9tgyeA1BC>l-d3++SRdW83W5Ffr>EpN{T7!qK}ew*+7_aJ1AFe)LNT}Bkt=5ihU(DiVgp^e z3>M2eyAWp}c!~{Dpu}sg)1Qnu4;wXVf}>=`J+XAT3q-Jn-OWi4Uf+W{LP+>hnLT#f ziufGDEWv-SIyi~Moz0bTMM60RyB;yt?41DPJLyezBT#ZbTOJ?q`bNZ61a4#cTwuNW zU@Aa>W5Q|RadFB{8y$oIL(Z2^a~{g5g-)J6wLs<41n+EKl&{nlx{Mf)3RK|JzmC<; z-I0pEV)1#&wvdPnWy*U4`vMGa8SQ0+^;_^R@S3v}8LG?iG9uv1)qhW%_jTizt$P8; z?`GqTLNRsYfmXFUt9{xdi3tg_pJev5^v+Fn zA;%Y>vz2r47yf9!J!3F^{KD|FgmdTB8g>(5U~dFw5f^wU1wV%8D>gEo76|z3yYL%U2$Po-R`2VZsPBAn zR*9$C1K+kB{NdlFqZLr6KG~T+UOwf{*Dbw-14fsOmU-jnM>|U`H}^-+LGz`ijkWLP zFa$6dpF^f`lux`>2hwci*&4BKXqMxhpf}(a;xaE02)(;NcvUtx&HW9_t^|PNi+bcw znwZx9aQf=RW(N>ph?#A$rFSXqQClh_pc(gUyL5hQqff6sxDgg^-*So9jrmEXUW^$w z6Ygu@XRwWssM$@Iaa)FQ5%3Y^ZW;uIG#h`{&fl*ra20|1wN`+|Hoklr2?tun;w*E} zMrOtjBF|&Cb6#+O61L3Yqv-mdGfZ)}1#YC+tdTc2 zyARnEn8dILB=jg2H98 zpK56HkT1DsXR-CmVz5Uo;Y@V$XS)LpFF2AE!!E(%YUURP(B_~Uc+J-R{nfleYS2m# zo?E9+6+wfj9CiUt&?8p&a%otV!h=6^t&EsOMTi+`<73I=tB7IfYn9X5h9S#@jG`&$O2tnUy5PI#=qec=%kvYv6P^K|bV7$_a=aZx#d^7`}wCJ zw$LROIrrV+nn6Vpj%zAmL@*WF63^JX%HHMqTRr_r{Jy`xJmyGJVI>fXc+IjWo^_ka zx%2fS*e%~O(#YdupsS7=XAF1%Z5ZfN8tIapmfch~p0p|HGBvBwe+X6{L37}MhKvAt zTU|%7oWY^_paSb%Pj08>-cMUqVJIHLgZV&ayAi3eX)fRLh}S>rPU7Wm{l>LW7M^+N zC2g+x=c8U*$cgS^;GgYUqpp%@eNQHAkQ z8K&kW5Z@ft;fb7WT!h3JPoNjtm!zSfYV#TiTC8q{q4w~8F0oHtm>T8ogW`9}r67){ ztvP_4CLugH`|@YbmKjTbChMgExXCvubt2I>zQ0MhJeDuqA6&>9fM?qAO2e@>1Hm(F5SiC*xa8SEpyKR6RyDh|`t z2s%!2Pd{eb0#8+t$I))C-(Do1RO!aZycP(6m3H@zj zjVv<72of2zukB25IsNLrZYe>^72!Go*jFZH*~QAGG``ck6jK5r){jn8#tcHlG8;di zvV7IFyjezAOFA&o7@qQ5L)srPbU~<(6lJIyN;D;r*+zh2FPD=%uie1>LE=M*<-~mBa{&UF6t-qt(*RW|s`fhKDidg*;}^kW`G^_~%MtZOxoq zJ#oxiB$jQnc14%>Z_K@d;LVdj`(Psa%X>UT)K{KLa=8~93SdSDCo$Cruqy#fhjl`G zP{Kl68fs^+?=hEdU?o}}pf_j`Oixr2RXcM)!?Hv-4ZY8p*o%~P-?(<^KSelDc8Ctd zlXIuQmH6iralB`8e>b**7BUq7&28XJEPCGWt2vjXMR{c~D^mKl%cj`Fi=TX$Ze6jt z_YPL=?kw0S?7BI@!x_#ZC7Kq~t*w$n)+hev3L?>{dtJbSPjodk+Pg?k-pK_0p2@{2-C5=;lD52u3Gty~lRx}OZUXVsLO%-J(V0(jCNg5OTVS1D=_dh58uJFQmiOrigw9kt zhe}5UfBspPoShttyC?@zuDMQOB0`H=Y)ykiq)FmE;>BXfr|`1Nm<_k|e4ZeD3;b@RHRaykC7j11qz54VAs;QqY@|ej-j2S(#_0*`aA{(*k91HYb=`f7BQRkl! za0zxCfRH3Uez3wUw^=clIdWev;zB2yX!X@p6|EB!DxBw0Ivt?{f@Dc|^Wt%A0`vvN zWkmAa4Xg`)-y?SJF$O}LgrChf7?1sVs(#Al;{tt;JTTR+HZgF@gNrz(IzN@V4E9y8 zTMyqtK){+>tYqWH${)?gXl0nrGetH$;^tuu@mMcbU7x>~pEK~DHpaT3(jvvAVLid# zYt0}v%j5MI1G7NdxM!zI;t|&_O?}jXNl`yS=g>IBwI`a3*tWed9n$9mOPGGkJZ2%2 zru;Qx?sWxCoQ7+h{RKACXWO#Z{a-`D2#FWb=H}Br<|cC%iTv?+vMcPSvs46R)xrM; zQ8Dr;9!Un=^2}u;<8_D+F9>C&#QYGv_w1bMR#D^heF}%DRaI&VjyI|x$5;htJ&>@DfBc(rf(%QZ^EjH4 z_~`(o$%AmibE?##PKXn9h1iGH%aCBE>Q1CFqosfIPhww&1Yu)NdQ1)~YTLWh<@gD2 zqfJZC$v4{)GaBSpmVmHc=Vs7eoQgLUc z*Fid3ph}M7H&8Rlc4&AT^bF0Qh_z7aM6?R z(kI>E&ste^YnC35lf-$+tN3%UE$g1*y}WCkWd!XCYa3$Phs@K5DpxLD@Exw&Kk$I9!ScNqY$P<#v0O zwHOK8PtH#;>}N{i0n$XU_8mG+S=uww?ED&^*X=)$oi65AMAcad{f2cz{C|0#9&YtT z6?KI1P0yQ`5){J4&q|NGhVFA&41-f z_#gg8OZ~qa-FtO$v9L=QXE zUm&fK^HmH)1o~u*Mr87W8xeK?@V3K;@ZQaKf$1m9n^!-t9sqa`I=5Jj_RNcOe z$ipA4Ny$)mz|q?F9nsV;kn}B9n%~Cu_o-RtK7MS;L@Ny){r$*DT5Zek@q==6MjLWz zv6_+J$jH5pIMTi=V9(HNi0OW7{CkL2m4(*QQ6!Awt`8IJ)&()RZz5g}00EAP!6n~m zd}uEwI5Zb`Hkrm1f-!jLEBQT{*5PZ+*K*pZ{xh(G$yL?k8x?sCZ<#6o zMt=b1lo4PWil(Mo0+y`*L1;__}6w#!t6R;87g~f3v{y zGQabQJ3O=_AAEWi;X-lsJgPUZXU1fP z`#ar%yx!?cWsJPvO{@zo4Z>rO>FOeKS97%XRMz_EiBp?G;8f5a6{Z`9TiCP@tg>4q zpQ$J*>%hY~XWi2QTtUU6=I7@Y731gUbMWP_9pFfkDex6c%nN=Y26f?rbmcU)r`{Is zGnVX1Ou}nn!ga+Ghad}ol2gZb&j1{01^+cf$l5>H!;^JB%$0po4*ljldoP-y@e@?h z5Sv-!YE|UQzTL*UwB(4ZA%L02YV)}D)dApOdmG#R68iw^()8HDqS-{iDJV@^K=ynh z`4Zo~_{@k)Q&uHu6nX0-3hVHDwQ-dZ2MQwj#pCD+udcUizzz<2z7Dz#2;Bb6Op3}) zQ{c09WQj2lOj;Nko0~71sUduOaHrAMEX=FwUL791u(w@ZjqrXSR+xHMbAk& z{15*whi7PdQ(mwr&Yap9W)_AOC<{?r4-$43GqL^F6eYNHf1PPV7MSvuDeH4O%nD=zYW<)wADX_5lVyaVVd~<1|^Rb4!^MGF3!mBM$8c`P~EXTmDoCR-oSrJIppQ zF5y!z(71+KLCmE`?w|>dKW(R1*5eNHAGe3T1o@_2f~fgJ*}$GVMoxYA9hwUb?5pnt znmHHZ0gdCM1qJVYx=1Wb%T`LzjsvV)6FSQUJ00#s2(J3B-W6^yZ@qxz@*e7NZdk(e^*Wc=!-FO%{T{odb-W7 z07mvu=VdD0M&iLx^2|a!jPj-TmfIoxoMa}4;U#GpgV$wwBNWfE!XdgkdIPM6Q*Fdr zm*lKSVePqo!cKcfBBld7RfTDSrwbg3m+WON<6hSX>B*+w$I00Dgtc^b&3f2w=JnzB zQRSk{u+DWHKmQ@zo3ea4CaUp+CW9*O1XG(FFgpe#2e6&H>|%r@EDSf=?cG3o9;10x zMPX^iGbMh9aEn&RMcYD+u$?|Vu|ZQR;}k6v$1a45HZpjBiU8*&>76;?+%J^G-lQj} z0F9j6I7g%AC1--8JV+H>bW=uJL`K3UQ^MA+vpB%L4l7|cb(+#}%Jlw*nVuZ5?Rq3t z1S?d(NUw1lZg(ClUHs^Z{NCJNWwbV~rw>q*ntna78fDB2OHj4A=WfV>_B0_3hQ593 zL;kwggw{R}=H-?4lW}|3Sh@DeiAvT3T~#+EN1$F&Mi$D^#?q~)yR9x+iI|>MeKm*iaj4nGmT>*E7U5M995vpZu)c20fLwb&?cgP$>e7v~H-Jmt~ zJuMsMh;v}qcS8clm-yS5iIy7F4u3uFH#4jAkhZuY`ab&i)jTeQN`gTCL`b?v{rJ1H z^C|0R)jy$pwQ-uUYu!KIe{@9%bj;~2VFhqPja3*FYopis=!YU~)}_8$_{_V*f2L6P zkF6nQ)`Q1!xLCr5qa~t+=FQ~7G&;vMZw;CyhSAbk4-KcOymKj&gExswO%o|6fgbcH zSY*m%p8BNZIm~NdGH-O}13bGU^L~KG9Z3b4z`SZ}S5(xG?U;iUp69OGi_-4_41MJv z7#cush;qft&3RqWn3JAZE}oJ8QxY6_2J&gXD%UM8B6x8;5jS{I=1~~r%97g(dWjVVstXuok$)d0Ce(vqmFHU9K z9mukhbh!T#^wjB^L9Q@s4~y~=BelZ#h-wS3$(AF3>TCO2IU)X|pL0*u*#!=-d;Tcm z61eJhs-=rH2_oG-`p`Vnc!V*RzZWBviQDOsmJVa+L@MwfzcSfHqOgtYEUbQO@3(}vp`hUHMPHpNLeMVIvCQnkFLPM0u@*JLhWMt=P{ zgOaz6d+v=chkA|arliC>ftme5{_ufNSd7y=W4eS>K}i&JQvEFRAz9dbmn#3iIMy`I% zHI+x#&PB?Y9&_!F8~Xu3R3P+l9y1rEz}Cot&>a(g?R%<%P(9?<@ILBs`5RV)(s$tU z-5p0EL5GYsil-xYq1!AcSTiBFP}~%MPycDf%ID;VizI&3+uL&h-;G@H95!-ohRZ7~ z2N&K@;Hd&X^tSY51i#st1K6WrB75a4E4{qB4joowW znDwD9__)T&G92O)fSUq3kZx9oBrzx{JG-iJjx6Alxld8I@GB)D?EZuC4MuC!s@U7_d zdA=>j4S%lbHsq!3w4U!gB)b_K-QOPZl?4=Htjng z{qpg|q$pXTR-r%YU0>tZrS3|VgEcvwMmNEbEpzbT>k`oc(*d(Lv2D&#z1N$g`ZINC zI(uv$>NJBTt{u`Y;tl@iI-7l(3x2IFs9+O){mA6t_QA~=wPX#j-LnPY3C6!9i8Fvc zp7g=9OM-~$ysv2uwJ8(6?`FSdE8dMAC^BP@kY@2ND~=NOZ|*5xpEDG{wnazcFvD%v z&<8z%$`H%r)(5(&6{ShCa2?ba5cVyyN%wkPiMEF0(j>aF268m%T z!}*tjUoEXe7sEPhayIH`deqqqa{5-3ljeltxH z-ga#_x8+8Q0TiQf7|T1;XBpb5bh#rAL<)|vci_~^@k@5nCd7iHMnw*QsQNrxUocLF zTgj10S#zP(LZ?o!zL4ftYG$jT&~AC@U(1w@nz>AW7tfaw7E4yk^{kzUZqZYxQ2LS} zv)iR4&1qgA$NXwz!b@Py}+tU7~lus1GpdREoXzNI}{^L?|{_HZqyKv&b0?%ZNTS$|f@65`fa z^Je`MCAz012qo(A_Ulu$dG<6A<<0((3Lk9jQPW>%7BRiYVifc^`~&lyH_uR4RhTYE z(t3r*HTLi*_6IJEGt#$;gcGyKdm=o~#W)k-;ViFiaCobDeNX{ykiyhQW-OZoPl|%H zY7UuvU+)Y5=v(Iy?2qgV^fzkL-B4l*RN4AswgH=vjh;mG z=RqoH{{=OJO|LR7h6^idMs*Uaj@MJcs+|5}36CWy+frv-g+!V#FQKI_r6k)n-B0nM zDvJ;$nP(n{57o>MmuZ#xVl5<{IS^GF`oYuV5ndxJ8;jEU1+SQAy6lhwDeS>fw&(gq zVYbkICe)-M!(bALh3i*YZoSUi8Sn9mgwF(+HG8isFGkN|;maRo z0_Evk-h`}4x}6k_`Fb@Dj;HbIa{Rht%a5Hp7|AnP%HsByi%P$;FZ;>au8^+4&ML)l z?ov$5!9A_Cb*Vp_1Q>cr*~>2S-vZ2K!?@!x?~q^59>4WS)eSDWZ+9?vD^NaHgDjn2 zQ1qmV6-Wy(x#?bP$EorH>)z{46Q^(DteqBdA4dtm@Uto))j z>%G3qwK@&*(gcYO;px;$u;`)|=|2hWHq-Y_MlXQL^R#_yyj*6v^Od_~i*PE72ZGm( zTzY2NX}+1Pc2t>!b06DpzTZpMKKSNzKLR4XXY;Lcw2^{w;5(Tr$(K>(TrMGdr`Mv@ zDP{?wk8^qUg)b$;N0fs&y`@>Z*lO6{$vqmkVBar#cF9}%uG`n${UUzC)Q?ka(&mZU zk?9&OgKVEy%^+?)PM!^YB6j=20_+~kz2Y9azkR)&+WSpH(>0!S*H_vk5`V5rmV2bb z&61h{J+%SnMbZut7VCYuMuV1jsCH1Ddg2XWnY&)6nY8^bGQyBu2otN`-z$BaR8?Ag z2s<7of26S_(gGqMI*CWpKWpMc++j#^H-W)z!rJnnqQkIJWy2{po3@gmqOWp)_bkK1 zYPky2eYikMR)Sp9W2iku;%Z5dHJ>@e%4{Fr^CL=}-#kjd`u&)4+Ki+}>wxzDlUu9B zK^|XTc*JF@_?GKd-#e{I%Z7a`OWWQ1@F%8uHRosA{IBn@+s`$%A-ZxaYdI+Q{?(f= zHMN6`qYC3a<*fED??+o6eh2T@hRHpcRIRMXtUX964-$6U%NB?+-Mis%`JeS1|Bbs+ zB9D7R=EY$MGqW1=dcB0rD|TR!JG-Rc$!2iNU-r6C5%7p+aWmhK4zhq)wTaB%af`$+ zV>b^Tz7ge7jvnlib}m3S#(Qu0y3TO3nl}7+x4-i_+3uPIkWlEVUf5UyJVvuEsP2gv z|3ZhSj4C#o=rqH7U};)-V0>bz+{FmfDGI0|G0 zb7e6a(!BD8ZjezxuFsa?2^RU6Uo9a}v`@tbIJ-vLl8%w7vN+tEC~IN!XmU@p#Q9t# zoGsfZpROsX?(iWfvn~De!hwtzZ3H4fns41nl5LiJp#C=6pH3s#S;{s=+pP%X7A}kR z4lS$<<)ybFfJx7<_?{#S#nutdNpujV8I{go#MHrO0vLYVRy^GC*@r}vfa;dW@o#N} zX?a#`y#@1?FhR&`_6t3hWwvFb7OCnJA3F%gAa!C2U4(IT{P83|IXu}|oO;y z0~LXDW%}Db-UQ<+GGvUNPj5ps0NGX%QT7nV2EM$!L-N8!+nl#5Ap0N7g#5!FTt@?@Y3KG)kDXh*6WS?Mwa2ajY(<%|VRlIuAAh_&OM;*w$#4)OnqD+S$FVe)!XFzl534~760KV_ z^^FNr*|TZA`@_AmKaDDnCCOz=E!~2&QrJV>?x)RgLyCiB?|8IYmjsC_LfeUES-hoh zu7EJNd?AxC%4GichKQK{B5|4+SEoBV=iQ}D)1>QI(ODgkrtzrYfk%F9fk!V|cy^|% zK`+W?>auJ1(`5fH4OxJ{>f`|6am_-Op^OCqXFOR~F&UgomY9ria|Z}%jzOhy^}%HlQPgIlW?) zS`Un?Yz)$%h>714{b~ycx?dx^bzu$IE!RMx(VGx0$JJS9_VRtpCL_Rfm6Wb$(I^Ga zg54jCrzwy$g$)2O`YIuHh&g}Z`kgTmt~0dyKAAx(;+guK>~eqV&3|-=2TbJ#jA!$o zD;Ki*BHvbT@SZi)EV0tuFGLAPI9V$E(qnYDFD&})?bon!uosHkX}Hs$dbYf%XK0{? z-cU-YlXxjE+(rRQ7RKjlUol-1Vi!gV&DzON}^{G_v)DuOWELHQ;Nx!XHsd^b~|K3Ny-h|mgYWY!n8ZDBS z4VwL#*{Qgrs!Oq&&mMu}x@x*x7Mf0plw%vOpSna)=ykiPF;tyjtb6GdUfA|m_2&7_ z_fP${@%^HS9`WZuz@_!!Dmf}&yXBiRZ>-lzBvJs7K&Y5H1DX7a);AwLO>YEfZWkZF z!oM6D17>H{U#%(7Y&O}o7}2MBa^UX&((6QFn&Bf>KtOr#Ik-s*qA4Ais}M#2-Omeo z0F}vEN}xDq#Em3fC~8qt&cyo1Ev5 z5KJBBM~C2LggO)m7kia_T{{HIDP1#Vt|%@i;bmL5YP_s#t^)RI3S5L$-UNT_{H?je zL|WzfA$mio_VlfZwe$aS<3t3QMomj9)YZ2GlR&liogRERm?9D2`u?vSEv62X4%fP~ z8sEsI)4iCqa*BfInQsm{l+$Y~KI6g)>ixLE;@BxN0|Y@3;1UcGsyQ@;0%27new$(Q z#MT$R?#qNPe+v4|R0R}56i;(CB3f*J_Nh30-oyi{0m9vcpbHZfo+#VZQO~+G^*+j$ zRzawl)PHw=EBdQaFdoZZ*+4)YT6Jc=JjhvUiQS;h|7U5>gZi z3tVxR>z#<$84!Pw1ErJ4i1*3H+`j&6S6_zr9QIYNml10j#J=(M#8Gnlx)s2Kt`UH# zSFQ@dSnO(!7Io7SKXr6A2eErYhbCjKEixLf8raZo0-&^_y#X|0zGk^8ztbsJT$3hl zrA_%1u<6!LH#Y^yP^`WC#xzWO!Xl@DHh$aUsamOmPXzXtdiYYnxez}om&w3{Ap^{r3Q(oN9;SnQqM^1hV_?x+ zT3|d}1ti72$o)~v03?b*AW}#i-xA2?NE+M-pe$%IyAmbVRk09Ks9@bHC ziH$^jIdlV(2fR21dxk+O z7y@wKWD4b03<*9?lK2g3epowjuC*tVP-EX83Z92psE@A%`3%V6s^KlHu1$_wQ#Y}3 zE*pZ_ZPd5j#6K?*|{=zW*@d*5rt2@NJMx|G?^hrHmPm2d%6v#xQKutY-NY9e~+*y(E3}kxLcs-Jd z`bjUTZ~Q_@xBSWaa<1RUT|JWk`kJAytMBU!xPV-l#YrQZ z4E~KlE}i`JOwYl!RDq?Yj4bYSXA1|a^I0UOJ$HDBuKRQB+x=*}<&dZ>t!eAh`J36E z<$UiY?oRiO9dQG6&K#Y{K{nG`&{Mnd>*4O1$|G`TQ;(&iWaerfn^xFH9vL0(+)^O5 zJoOsLERg|Yihty^gf^At7EVThYH(0F29=C{&}8e8A`=r}8sLEEQka?rr$lI)%O+jM zp>_4f`}VGMh)SVuw>KTaztAHR+gkz+U%1P|Q|AsxyRU~lcgH~~(@IU2e!*sn06#HgTBeg>Ywet?3aTrPN4;*N+c6Y*v*RWHdUws841%$4jc`E3I zDXFEE@0leYfgU0CDvRuAyKyuAU)TKqxE@+D?sUF*DKiClX=>eD>GhOD(=8V+ty&YO za4h#*eDWmp%B(U!pgJCI=PP7+p=bV4cbYjxnZVv^`1vK6#|wv-dM8o1buon#3Ax?j zPd)dMR7+PX3p-15>eNXjxvsvBWqh`e7o^z4?s#1Rq%*~(DWJ2wV+gOm6o6#KDjI*~ zjSIh=X$2&ylW+3AH{2OEe0n(CS78||GR}+JP~jqi!Md4RiBR*s9L)!H%fap7;;e%l zSQug^V66Qb8b)>+oO$s*1yW}Yu z;^hXS5G@%>DOjR~XsUeb>`PJ3Ny$tN&Lv($XQ)GXc@gg2gWgE8xRwmwt zi!@u-5qE}`ZTy7&7`BFy!_*F4aSjD>4~by+zC=);nvd*8zXZwhsosbimfV-=7ilHo z=R$@+g$g}g=QZ}Axk3Y(Q|Iw?|Mat?E7i;UUG*gMD1U0}sP!A>aN=Gq^o}}2X zaMEC@8~9-vr`zXv226J|aiR74!>lSYl%+zM8+7OVabu+4IioxK*?1AA^<(2QcR8gh ztW*UGU%#ydmwq#wG!yNR7bwqwyW=_Mf#b7rSuSjhmXQo)U0FGT<97)a-*{hAelhC7 zu2`#8>ot;38WtCXF=3>}pw}M@Zo?i)0aUl)ElglLu6lS0Mr4$xP#t+{JMv@NzCmO- zpZXRPqcXpiPh;`G$Ew{Kdbpm^m-KD2BygP#( z$nL4W70p>k+Svb#vA2$EdVk};1(cE$C1ix4f|3Hlh;0H2Dhh%@h#(*(-94P7QYuJD zi3rFcq$Fh2Mo5m5uF=it0R!&$^qk-K_qgwW?my%_l%pH_)cbv1&)4&MN-ald_kYf= z7=1#o(d&2a3f6s`30(POZnscz2f=fX*3$hsN)?^<2C5Yo`WP|1mL&Po1N2o|@i|-KeR<#^+&MhO@C1wXJ6^$f$6dGJa%Ozv=Ah!mkogm8B zAf+Uf?5ZPrz%V2)WFs@E@lsLy9w;VW*PveyHw)wy;IMu{1^ve7*;hz^D*cg{ej1Yh zSvCht-Ty9vZcg>DmyG8j^<8zC4SCnA-O5l;!UWOooYC!OhZbmrM|DWUcfbCKnckn# zwXNZsAUC22?`LsK*jA~Gbe-Uxt58?JIw@NWN{1u@(U&TA-gGDY{Bq4E)cn=#tp(NiWYP_6{uh3SF$>;zDF=V@%Lo=ZQIj|Xi z4trklP$t~3&|{T+-u+qcFkCg?k$)h2Wmt4&?_m5GUhR|WK4e`OPPsOYcG-w*zp5;{ z-Mp-RU?efFU@qyF=L)hlU6EYDOKXRl1uN?{%&MzH(A?@NE$orm(HVCxQd2?4YHQGG zqd|pb-wi&jw|=~d|M*1`7z<9T{_IC%KQ5ZQq6D?z*6vAPe~$UnX(Sy7>Eiba=e6T@ zRrl3d+*m!ksN=IWtJ-h89IH#yN`*TWxa9$)exjZo->WFgAYw~AbR?SRgqVHT7a>`v z^Sv0BeM=R^biV8*L}@)P4|$fU;`tB9d2q8t81EeirR`1+OQ6cA_SHaq8^LmTv$vtG zH>2&L@k1?XEC=UAEg1avh;#ncyViQO%K@LSbY@{h-JfQE1yH+e)5kNO)@v*BwhaYC zvIsg=O9x14bIQ}Dwarb(t*<{Y^xVD(MfWhrwy)Q*4o{9X8IH3e{NKS^pU?_I;Ti~k z@ilNkXw;GCM6#a}qn8VcZTa*zAy|%aX`D1Gr#k!56ghes`?D^z`iqW=5p zMZyN=e%2WYAw!OUI8Wp{gi0-E!|tMyeR-MJ8@cs0un?sOAX&u8F^dPF3OMzt&bqdi zyG4>GIL1ZcG^?$h7otU8A~iI@*CnWEV!>Y8leNH(OSMHT{XSP-%W2xhPL8>xjDh0F$l6pc zrVzLNc6k0aV)f?_>{{QxeUfrR__uRKc0%JnkpnBeh@hGkLi7iX9RSu%0-miqvh?w5 z;b<~sVQaI=q|C><*1uDsKyoE+`l;_5;5!u8mbZG?cUyg*Xy~5tWK9bmD|RBzTpfQ) zC0Opi!&_*oXBm|m1k*fr(eocq4RMli;Bn_EQW&C3`g?g}B|frmN$n$9JHfBmX#Ef1@gHAZCBCVdA8B-xb@p6*!XyR zc+hDNcL}WgCEtZ_XvkCVQ*f22?#&*|Z*l0%qTrXPH#*~5uDy!Ak$M$JvC3_+*Ik+o z;#loY87Ify4@z78$n!tL)VwOTnggIU4rI-ahiZ*TU+C_6+&8oD&qPFPR(DOgqbSZE zL>S162dv_vf0~OEy05he^U%wliq^LDS^1$0Nw^_&C7o@aWWmQXLi=KbL39QT)b*2y zbS8lNm>8J}WQE?yXuqv-Gj;rEr>syL({_0{DqdrzV_w4ob%DXhDP&L^^u-@$>r7-Y zNWcCtm|1K4D3&cPGD|aSRjovsn9)&Y?zYorWLvY>?hxE6QKQ6b=_-VZ<2aLIE*tPF z&Lzdo*KFV>tme@ZFkEFSS3N)m1m-R-p7peQ#}(kra@et!V}!bJl8g&_#I>I^_$@`2`r zc}2Bgz9CGsW^8vdoJpZYcVs!lq+T^KX#D$T)j(S0uLG>{u}0GG)@~%QJkk`VORo8X zo7%IatEa~DZ^oVRg*5Xo5yr%Hs@;?KZZjVwMv z=J{V{6{>2B1)0#~G+MN5`kNKer_E=O_e{n?lrr7yVnEk-V}xITu1xCrQ$`BKE5lu) z>ThUkzqNwhwjw>$SMZnGV}UJ69U7EP#Z2sJbso1zBlh5Qxt~Q1$lfr_GMY^CiXR7_ zXq=!9gji$`BU@k?W=aV+`E7;P4Q|U+KliiPp$H{ZobugJZ`oG?=0$q4p~?^?DinoA zsPU`XB^nWJ#6Vk<5_$a&n0)hrlSZ*TmN%GYVvBIMy@#${CVV&sdBa5$0#&+rLWBJm z1f>LiI09rcM7|f~;_KhAO)VQDt(OXuAh_!mJH;dHb_)4CsFB!dm5EDs;0eI_XJrbQ zYp}21%~h9;NdD$!zxD8GAXQ^7L>5lqFzHc6p40qkURAyVKHFf%ojv`5>f^PkoeLgz zoSHS3=>BVq2vy?HYZJ#lvLmY@HHRQ+C4j1}|mDlfmQan^&mdV;o zTH|8+=~Y!eJ(Hi6&vJxbXD;$tiZf{|uELyGJW?MZ6`?H4jY`P^qq;zNp%jY4m zSnD5AqajJzj?ML_^=sEePziBftUFVjv$d~{9zU9j8fOSG-Y;7oEYUvexyyGLHv%@T zE8gS^jR@Gddi%CIVtU;s4~q{L{F!VlaU!u$A-f}VGc$IF>IhJnn{y_6Zq*z#PY4Sr z^1cRY-MGJgfIc!D6LUeub=bu|RjbX2j_SB?;Y&DVvmk8TSF;c+OikcxqZEWR6-06I z`G&-ppMT{*=qv81W;qASt*s|c#TuW8&Am0PTz~kikEmRWS6ynGr>Yl^HTZ0nln@?c zSKVYY9#ej_zciI=1X5UFVNT`D;>hZ(_vIXGNF4)%{c~yw)5`&Fwq8$SBfeD68GT-z z^So(1G2G*;_-$0U^rp1NKl~Qb4-;fe*d?f9Jo~N%ygwMOH|l<^fQcOxxF5fM1yx1| zwmU^ik-wYUZfC$j6grlEh5h8+r{YZ{2Gi%W3Z2wN%8Vm?4JX_4?_EQ2=;?%Jlyc@; zzSC419%$Si>c06k=*=iY=yUgCG3^upv#NnpkztLO9NS&v{a7pgZa>-h!p$kGlG@g) z``lr3nlJ*oXy-=y``X0LM#{BM2gzqNt9-%+-W#+fJNbWi+#fHa&$6shT^qh?Kh{Fo2>5PlTm$c#eydBizmE@LD?HFoxzVn%5Pln6(Zbne|5BKu!>GU5uzn-!{Os!akpLgB-WHPaJ9kVc4 z<<^Jg*&TVO*hfd@U?Bb0V4(tPPsAn$M=-_K4nLMe?S|)8Jt$$2>C~oXaqCxj{v0Z3 zGaT-fx#!gQlT=iN?F)E|K%8F*EnW+RO@02l`2kuX*9b4u=iJ%do+`Hf#k15n7Y9p> ztT}SPmsIve;6zu;w4)p#`{M;_O`9T>9ETNX!4B-@)q-WP0S=y@D2uzQ@I3+lOIG2= z0{@RgS4x4`UHTZ=qvGT>4Lu?L1eIU@!?y)Oq(sLRbTRu zd_6eczjDZtT#Ay?^h^|MT=wW&;P_$<)+7d__o^5f&}m6MP8QoxWKLx}?w&gL*MM3x zH|TDlUqD8(@MQv}DerCpo$vF7o9C-QrO=6c0n2dvw7P9+g! zYD08FNi4eh_#R3?h?>sHNpT0A91D~^=Q&qZCTUx8yk{~Mpw`gQ8(Xfb7IfX6kIQ0o z{Msu`*J}VWu5xeeG1*Iwq#sTy^`D0kYcnhDf*~w@t1d6}och@`8Y|kZX@cxRc+Zx? zf7@|)7eZK!jBYY3wG4-_*y+V9(z#!Qv2HIjfoY+VqZbak)P>*?XZu# zPFdF4glzi|IUKDqC^cxyjqlOS^fkl#Uke&u<&Go| zk)RbJ{R%QZ$mJ*ddtJBHla?UvH;J3#waPQ zx@@a5F@`YZZziWbLFtB;epMrX{c;zj>-ymCgdIEB!~YjLCtv(jE-ap4?v2sF2OQ7> zV#$WBrgK+`tNS?QPBDS`n856G>ooNu?S;kkE=q~rS)BjTW{1|vM^qwxglv>CX_CXq z?HXL{25dc@?@$;BXewF$L0%+WF3GNKooK%mh6pP*H&}w7PW9IwXK?Gf2o90ob~`g> z<{=-W?mfREb9)S)YmjnE5@o*Rf+iuz9sZ3lJM)*=NN5`f*o=f`CVuv0-WkT?hKFuEbb^)j}TKNSFz7}l3 zDDPe3-6Eph{6X-U}JyVSyhj6Il>ZxD)KMQ?U^l0d?b%^tnpNNkSHN<}IJ zPZf(Y@Yf!b1Hp5{prgS8kn(Cz2C86tu+s7rk3A^_!F>nj*qewNQWFjUkSSBpl)uy> zow{#*7@uMRO=UrpSpi3-JV(DWG(T(!i`#Y{CElUw4sgBT2Tj=$7`=_?e(%`c5e4$; zlHe5)5dgZ=F+xh#hPl8<1nf1)!ZD~@-mJkf?|6W4nX7KmhB)}oQ?R{T4gbFng=xQ; zP~cd|v-3MVk`?Cm(Ny9(bH-p#pBHwJ8qb|MzHO^9dfx-VZS?8jw@2=+AXjNr9Mf;| zD$;#|EP}uT*?=9{cQFZ9&Myuu8a81a* zG&uH*~pA##l{90O831oWgGCG4G;BQ}# zwwUe$O~)_qqWG>MF@bc2q7P!})Q!MQG>QFY|CWZYccQ-#G?N${C;q#7kR8}PsKlHQ z@1I12GIS_zAcW7SklVk4^f{cRV~)nI>F!1V1)bNPvi$wcshwd5iA27b`^!aj9A0~M zRd*7(`~iKxuqhY^@W@s3NdY9FnqajsuazJDjmmr=8UZA);r&>^nl7IFOND+tO(vTP z_9Gm>3b|1rSGB+=uub3Z4KDO13LXe_{J1BxE`fOm5vVB_f>q~77w zCbl23Jst8L{PDY;3INTUvwU96nEkD>lk_=h*yZ(x1Fq8mkhT7?d@&I+h;k5Qy@Pzf z^d{OlVU}hl&N;m16KMijmpV(c%Yn~Zy@)7h_CvmtO$OVw|3pV+ms5OZk{x%k-vbB5 z+$C!IhSom2bCYv5DspU|-^(Z83jZWGTGzSbMe<5?{=h!fz}pZ=HfL*>%L6tWOHO1S z?H(VEn>3u$4VRFwZSC8wIX)ECKY}#G1JLI(Zxj^kMebB_GI{Q;Q%XprfoQblNbrp| zjpVv8w#c{#Mmql6ImWkGEvKBLHwViKx1tZw~d?obHapJMhW zs-nl4K^~N|<3{}qGRrKu<2@j|;iRRSBb(({wt&=UTkSv7{w482K&=f=QLlb4%+;_b zAx!T^lN^T8?up#%EJz`Z_B-xn9eVa(qlfwgflX4JAgfFIl5@jR~bFe{IO z8?dm8?O)snD9jZ#aiX8-rErVW6N=6aEkVUcECk!pXc|MZmb|!;Wv| zj^CkDn6nmS8QtPI(AP(KkS=5+miCpqoqqpWG$xZ6vKmGEe8i{K{)!E z!)=v8p9>;B$uqEFwF*-BMoQ2n2#6PulA>J@i4E&_**REtb!K42P9|ySnz>dV_xput z%l=<}!KomkG7_>`e874fGI+RW?VdX658^;Y&tv*6v8@tSoKw?B$jcgu5=B2Jl0C*E z@$imC3doU1vE8y@TaJE}_Ep4P@RoRY>6!#x=RSJm2vw|gZ*&}5&mtz^T9RTA zB2=_KmK(Na8;ZLkJZW;Zxc4D;9k`oKRPv%<34_N^>Kcf zH^r>u_zNc;hj&bY>2Uq{IKFiwu~fdzVyWlg|2=1(VK{mxVd+o)kbIR>{DHKyK$Z6m z2id%G2paz}y+fcst(I@GP%ef3jq@P8?DtzFfh_`TZC&^D@kVLCkihyW2t2x=5g$ zv}}n86KsdcCN{ZDzgKT0gyz?VNn#|2)uMQL$|xi{YmT&2@4@hRB#+GvOU?jz zc@3AaX{Qr0I+b*h)Z|Z6Uv`v++Lw*Ghm8+qjuFfJ56Yl-tKZ+AxNr3kwVNKr5%ONQ zjY(Dfr(#l{fV!co1X@rDF*o}9+!u|mMhzMNqg%8N)tlO$ONY1cD|=zsr8Q9ohw9xs zTQ%#eOEcZWDTM418H%#mpZs#y8-3jx2}8I^^EgH_NYZ zPF_8iRTjPJ204D5C`l5UCvinm>)L4${Xy&JUc2TJBK@0E$BOy` za_e*Tcj66Ne6A?2*C-z6Y#3eDT*ZB#Y(r#zfYNOqy>3`fF11^*(2%K-OO+~j9Tbkm z#g_e!Vcfx&nT;Wjmv&$AMEjF;XR=3JzUU<>A!f~;+=mt>b9d$6H(Kwu>p0Dhim5uMp^)?&@rGs$IW9#TsW(-V$w_Nh)dvkuUOhH3Pa2-p}XdB&)9E?bw|z^(Kdx@s%_yd7ua`LXS+2d zp0)7C#$SLqN$#^DtW9<%2xfu@v;cGlz_cN_o3Vuqs*VI=*=;dV5f;p)E)4yc>rQrt zg80yYzxboIOWi3BUl1gCS5mJvl)3YcX8M$u${0%|S^!6g4w=5>b$85}!7Bnb4Ksh` z7#@35vKFVK8q)=|J`zK!T$cKLJKC|Vtn05i@>13F2~Zq^Zn&afyPKf z76U_~JwnpZ5aGw*#F6mZQJU(4V#z@`>6-!J)J0R9#N}IJ=Zs~mmAwy(Gh7G zc$98`#qh_s_}rCIkRIqfZ5i~#9V!?bfZph`Qm-)bi7UoXS0?{1g8HmT3*`R94z9~N z_)Cw(`XqD74$PsU^VxO_AV}~<3voJ;*U@Ca6esPnnZuRu@nq~tYs=^pACKN*FO%~N zp=MZf0!fb@m+R(no;^}=DN|*&?>30t@VJ2a=L^e^(gSRl2MP<*-?9>2Gdt57V|+=j z8(rG><~Uo6ofvptxE6I?XxtwSOrMI3d`AA(do`NLTO7~MDh|x^oAF7w-dk_d$`VY* z445Uq9OLcVxeVEn1zcFNVvE#NSIAoLX`Wk2qk8+JY*1ZD_~Gh$ zBFuGhJ?})v{U+9(PA-r*pm@2{xp13?pXXCH0ey@s3T)DA{?~da}upFDbD~ z*pVkj5%c~@j3R0J%EH|_s5vuYmjb+?b4aS|rPM0e2+e@%koGFAI`0(0j2V2R0D|?a zm4|Z7L^{S?+Ti-%<*2GhG7JMxFc57L(44%SR7S%+7}EarQj?1LCMYdL@DJTn( zxhS@7#%z7hAbK__QE9^HvQ``Kf3Y5~op-w+44{4a8x?p>^YLNuitt6*RW4Gm*U0cd zfTziB#-M6~5X4P*_cF3?v59w4Y&rWj!?y<$KfL+#;$G$nZIYu4``*oqck)w9wzppt zdSA&CYJQp{d8n! zg+QEdpY`;*8~{vCgDvvQPitmAk14-83h^g~ls_tA`2sfp#de6afL#mF(6OH~9JtGRm3S7=1A-GH?+Egv>fioQT=UF0Qq-n>|+(^rWdDD`@TA%GU zOH5Zt@>zJI6u)S>#q*xg$t9ZL98JU}2y(z$EMjx0hb9n+@lBA^# zWj$}`NnNV(l^N2`*HUu6UG7Mjh^?<6459GbbO9K4pCsNf^vegNRF3@jkUN}sPr%K9M9&u&{HB--uFz$$y2C{j4x38Op$Qf%cHZ5X3BWXog>Lrbiq|;L zPdMHj?}H;QGb>)&vR;?ASuCKyclOu|PuoX~e7W@Lmev<06vwqNw@y|Bi!?#njIl0C z^Hkzq@UL5zRLPOFoPlP~DZ%3}o>7Ifo-Hy>5apK?MnOje`D_B4TvO-z8`{@3IzFN4 zNw8&&6+GnQz@Nz7CsQXbAo1X4>cG)gOTjF&7lUdj)!jDN|lC%H# zU|+e|(*-oPStW3Lc7zjZ*?b)`?k{8l-VO>=5@NMGJ^-sXCw&X*BuNop0Q2Gfk8sCa zG%+V|8Yjod3P`vrj4VmaudM_%3AJlh0Q?HY;qFf8cgV?a^B7%NwA80Z16So05d!7- zOlI}-=Z1ps{x(=-4;Z476HkBV?me^HP=dmL!vdO6AJj~fWi}I_p0A{0b$8CM#NOjM zo6jo?g*XvAV;j8=3s7m}1qzm`+c#&+YHEEZ3X`5bleL{T$6bGk{3>P$w^6EO!6oyFDbdK?%gppu zMb2__Ezy`S?Dc#4PvVT-8nf}=ka>PcLq%W5`Qfr3PGCMH$aQQrf~QC*xt7*>rroXq zqZJ!yX^d1SQClpZD9q$_-i}<81J{{<$5V7JP>Juas zVVFf;N``rW7=Kj2_m;X>qSdX<%f{6DbQfM`@tibz!m$UI-A&~xntu)sB`VHRMS=Jc zRm3om4N&ku(%fa>f&rjgj(d#FU#HI}z~s#WzmG~U5d4%*E++B21$bc$`-Pi%u9r}8X2hhx5O4FmAAN5F?h%)7{rl5-00Ntt41eQ| z)vI|y#%rCcR<6&^?c4tm#Rjwy?|le4SN&~CuOa6NsHA)-c>wm4$)!`)iNo~n7EfkG)poM~ z@J^*t8&DD$Fi<*thfiTEH}%D9+``MZ!~w5BYcVO1BrxyIVu%F0q(<}i3-2stM=vMZ zoY%UoZTpP3|^xzdtH zZCReRe#o*@y&mBZNUk*>r9z!di477+R+_+Y9!1$C*X}zeKmri8jUl^Su`XE%;`tB|RnO97T%AoIu-SptvnO z3lzl;KNm8I74*z2{6klKb;{fh6VXDcR+{xab>J)x3l>$h_6Qo$& zIj#Bt{Y~711?=`vP*Df=Xdz%swwZZkJuTUFN8cq5PB9S{Pc2>_8zAtRcQ<_nfYQ^H zYbIIuuYgvF>nA%GqB)>oTFGqn=)H-mOFnyg0Jp{fg#FpT#yK$1?d?H)T9}^86yxU*$6iD-gvJDFVZulB7Z$TX>q(!21{ z^je0&Ywiyl+&RGAhG7i-Rg zb3Nl5Ac00PuL8)<8e~r@#T9b3J6_JgXe8=H?Q`lUT%O>VPn81N7rqPv7G1}aVG@lB z9VaQNWhI64VsYxAYH=xBd19ub`37WYgaMP5eE3T+>ZUqR8{R2jc%ooJ##0B@wGzq(muhpH#DG6J`|&+Ms{tQ3KrY+>U7KT0g86W$kC?u~fWOX8 zqo+*+ij6H@TKZXrR3ht=1JM+T48PWA5-8-s<*tu6CTD*@mT8z`EsKHL!jVh5Z4I1b zdU4Xio?rr#Hu`Wu%m2LYTX_%&HRC(g8u^r`<8v@@nCCl|vJ6WxSQJx){w7COz}+vX zb3O%B(jjRUl9aFQ!aqA~ebc`XOU^*>v@eti5^Q;IuX-K;k08>xL!e3=A+hb_Kp2`j*L$0T`c| zw%##+ue8|k(~x9_bxdt*n;6@$(EU$m6hkx52j~PAU+eQSL%8TRW=Crs^3&5 zW?-N6#Y#uuNDFBxpSXl!*Z879#F`5OwW*pcPe!O&TO05q=S(@#{uFHjf{E_{&NICN z*!G3#agg;reXMf!lz7!vH(`qJ7+NW|8inD|XrL-)$03eMWR5Ka!{rjYrPD; z*=~fAlVgpFWRmc7@^4k!0?tF$E&u#aKkW46PlBY9W$pAU*v^39&SCMHbLOLAl_Siw z)d4ij;~fp?Qyk}~cvT=iGmbxdCd)&A>ZiGpC0|6l?JE+*_L4#Nk;{%eBf z^`WfmiD%F+y60K{e2`Xpu@NYcmkE3?s2&Wa`N@IH>$=lZpuBB)-kYJNdLB<})H^5Y z-~3Bf`x80ABGa`V6t7_xt#pBznh7#c@gf;CenZ`)lZAS*u4lT}Rp#h5?$kZjeRj@r zMyh4t{G{kPA95IhNoCOkF%{C4<#sNKF&ryDZX3;J>)GHfnP%(N@DaaCc1IB z?vmbiw9saC+X1hLc)=sFCh{Diw`=}44nbR3Q5{v5;_;FW9Rb2-il2E@6a$cQ`>*2@ zkecrB{Sl&tg0z#h{|P_#(!1EyVh^t&aar2e=DXRg3jzOt^;WwROlRVjD3^k()iUWt zm2d2?^lO-TZi{)HQ0s4<@g^6k4!H8Lw5QZQuSbYkpo!+3!XvJq>HnB^7j9-sS?ZrM zlDdLx<17lM*`+$CG;DiExdkTu_SM^z;EL-Vd(|!ENc!Oy=*&r@jewG`C|Vg_I}34v za6eoy>qF7;X4zIgHQjZ>3Ttsy3oTlvgv+5vi1c0eEbK2;6Fl|kEuB9Vw`g`?= z^V>fLyJY60z~loALOUFQg4>eg_aSojhPDVP+PCJB@hGeYoWHzBzOfat7cj82`6u=+ z?Xzj2tub*(C;;HXyp*{PrGg9XgEv zGC}+{SC)UvZpM?hfBf7)&MUhRM>Uqbz+rx)gRD*;K-BT`v?v{FGW;@mBDm%fV;PIf zvbTJvFg*V>!?*wEV%onqf=;>p{@na5=GX4jGauoKPpm#ys|h{tQz(dmn_nCx_jr8QHHM8oUq%j%p?;@|MC=@~m19@ic0N^LS|FcejO_ zl$+pRE^0k-BT-*f@x8KO@11JRIr-Bi7wmQ@n+OaOgEj$v!0k_ep}y!A<@axD6DxPY zon1paqqg;}3`5H)>Wlmq>yhqYA=HHI1Z0>*b<_#u$rJlYo;B1r!NXmW<-@KEya6lk zNs~(+EAr^uk1!>(8Fnho$5{eJo_-c<~JJ= zIp#N*{(Dp(Cgmz<$Y*LrP|U?V+ZPwEVYpk%Ok3mI9f_20QF>65&Rp<;%KP<4w}Y)m zf0jW@@4+)TJ-1j(2yrS=)55~$niQ%-v%2**4dh_HT$MkaE%MT*8Zswtr_Xv*TJ3w+ zbJj1}pvY&Co)}D54ABq%19sTzN#AYX5uDXg7Oa>25)x^ND1INzP~1e)nGr|@7vMs3 z(K8uZR6=)~r1_P{YW0+icVp?O6^^W}&eLxBRtB#LOn3DF!>{BdeFV!y5UZ0@m{@73 zn}S~203K?J>LXzrWM{Dc7hu92nx>Jq^SomEMq;8NlnX%VCate{9BV0@B(=bJ zn12sBSV2~#Khy?8bu#aw%0&L`L8w8~2V@J$Tfcsg$8#}^I_JrHa$d_iFAU&Q`g$r_0Uf#}|u-laCFTVKa#u_N&o_Dhi6Qx(lT-vXTXFXBIgr$XYYC_2fEcUyiQ>yBT0H9%??Yc*0^@ ziXX*_jqy`soufn3R`l|kCBYJ6qk6QMW0cl`q5)&w?vGm?r$rMDSH zq#yVf>rhE&uN;{7ZcN)MQrQ#H9b74Zgm z%oYkiPz`D`qF{`NdyFNhGz6X;`v+_bWK{rB4L zMOsfP*G*rM*N1KNX6&5!`ETh{-V?RUsNwI%%c2=icl;Pa-y`<~5rTt&IhS(#3+=Q7p2X7SNrWs5lCF+N)n_ z0vzL&Lk2Jt)D{E*t802Rb?+&dzFyjf?NYkQ74x%Nuz0eX4Fq@QNEv-1Usa9wM#(xE z@qqR$W)^1FF|Qo~tM%oZF&9NeEcxG!CcrB7^9MIhbiJP|04uMi-G?lVDD!mA7;G&! zb?$p3(vWij?ch4k~r>e&E1;A_L05vzXgjo+9_GO41Ee zU6-x#8);dk7ds>zIM0sN%zo{(`7Vti*=YUMMzA)$wxJFl17`pOE&3@RR2Hph{amA^ zmO~#Bimn2++`jtgqF_H{xthY_hE0dkb%@VRAdl$jSmcc9ZGl=QRWX3X!`^~bafgq> zR6O2_=dGt0FG0WBSAwxF%*?5+kL8Oscl0+nmovSW>J@Kom*14V51YAS!OsIKH!|7-RQgy~BGCB%NF501f2F<+I zFWmuK;K)`V%H-p=YE_6?pP+`nGep2*%tPAeQ>~3#F&!#YD8|KN!2q_pWM4XEnVH_M zcT-YOG{7epo)uYU8TbpFUpDVR2VlYO0mplUlFfbEbY7MDFG|DLpcc@~0M1|0DdL2; z=Bo0jGW~zAA3F3V;^lXy67K=en$sYaJM?HGx#qbx)MUCHPcZ3`_ITK8INp;LB$VQl z^E_x?r=+;nXX&9yYch4PNC*Q%TvE5M$6hv-YK^KE)6*4&@*hsTy$Fb`s{#I>TMe#M zs9mDAG^XxgH>%ikv*nNVVm$qK6Z%+M! z3x$ZeTiYe6^K>I1O68t4j< zR6Kt|9vw$vW`7zuj_>{ZGkeTdN!0(vaI-9Cd>o&2Z0=mgCcBR1IvTJG)p#8w;;AF6 zVBu(AccN=^zb;T?{d`}0FgRx%=yG|4d-D?6GOgKSnA6I$`#2|+uXQoux?OC~KYzRq z+Tfa|U`qWY7>6xZqU`o} z&U#pLx;*aZ@HGeqMdxQ$W`K150FIGv=ojE0(hbcZ)3tz_8m2!v_7LZA=ux3=bnPif z$I#KD4>cVQmm}>Co&gB7B&i`4$=)}%i7ptgZFYLDngel)Ode=VwkN-?N{J%a>G-aO zYON8t&cIQD$6(bLmU@v3G~xk9CNi0Y%?ITLV(QAW5CkvJgh*Y&_guD2@v6knq19TId_D7_> zE+dkm+=2=x+@UGUhj^CR-B~<8+7Zy5;i>m9|5;JXw2mFg=;;5@Ocv^c0ydw*L>DKX zK*1=GJszDG$|L%+A>qR}mA>N1Ce*{}2C`nGCsB*Y8tHr^1)mlEE{+*Na`fC^$TCYJ zZTz1j^;d+<#tclz$Yp+ry9pl>2v-obnpY#=_8fM##YVM zH#&QDLZx?S)X8jFZ1qT)dtNo#Ov2p^(O;E^Nj7yI!2gf9MRur$K#+>T zkxGie9a1~2jZMw|m_P}|`gb_QfsF6ESJTyt#uhcHVueh2i6q2XlCkT44e5*79gWBt z$ou6aW1da(x?^(^$6*lk4Ij8sKUA#f(6OHS-qRgCwb>o)S}>*2F!+m90DXg-6piKc za`IN(oGdZ(P4cpynvzl6+je)?38>X*S^<^`HpNi+elnNd;4pb39!PqZ(qXPA0Gwb^ z4*}XyGb&Rk2Hr#-aHs7$Yp_7L1mS6QrT_oQ52sV{6rMux%(*&FWlbf>)PSBKMyzb) zOq6A9BdADmD~_>J(T3c3B^;RA;l4qQ^6$&lUkAGTzxrO+Z&O*=#6Mc}qSYMB8jRJgq>m?sluS_S{rr3bjEMCEuo9-7cS?`4|AlIgpz25-Shqu#QdXBAwcw$6QVmp zHHmlpG1sgD>i8iJGdy+FCc%!h7ent8S$%#vY$Q_%KwQWHR%hkG^dy!@=YUs%sY^e$ zGLqhBlC#aOMDl>QjtzTj)rub*k)38U$>pKZ<$22{Z1``AP@b%i=GudXgHrbAvq1l<636WU) zdnI^Xejc`%(b%s6alp~FRQPKSNSBPbygqpA*eZY!uE_xSv9UNJcU3fwZzL{I&`zN_ z{|*of$&(*I76{Vafhpn}?RPIThGAiweLkrYdR@WbTikGxm3H}Vp)Ph=-N-t*bv1`3 z)mO~-=ht7qM%~F`qATQ;2W{8t9}}*g-*g9!F}?JvSAIsMN0-N#v0j0S>y?t5#q;@K zFG=4u(**!tMeua`za;5F-J2#adu|>Z33OB09g+5g~E!F+* zy+80)hL2%!zm%5ohO$Nd!`N@PF(%CEr`C%ZeaJ)?kj7f601u??MZwbeKQI~#I|4wf z4$Mw098RBL1F!EB2ZQqh>)aif`x~#HiL{v=TKDm}oo>4GuGAtbVxpdRgVN_0H|D)A z05C#fV8Fm)NeKEfuJEtiP7tv!O&`x16RvsPhx1J{Tuom}}?rmOj38|JKwQ|d@Sr>=3pm}x{(cg2mNv^qU)yJB=m=R;DhK8d_tC+`O zZx^U3*UFoBL)w{tLdy(gTD7aMkTXS-Of{lgaM8-FqVbW#`;LZFt4;SA_$038s&qwy zm4wIo2i(;3S3M@@jhrj?TrrjZCWJbkJDcBiI<9i8FDB42Bu>HK1Fi9>+Lg7 zkoJj>ye|Y^X?fli4V>hrbG$p9+FqcT)DWpu95QpVjaPwX!L^<@^BEg+vY#*aNuT(g z`_Rm|o`uj|+nk31mpa~#=Ql)MgeaHe7)Vuy^ zc`q$4e!~=43{SBT84PNXx1(8B{fgDvy9n=o^YtcRon1;q;a5WDpKw+5%+ z6P!br4Y}>u!Tqqr=C%;2zWsv}ZDxrdA==^(xsna54+08UV1s3eHG}po`F^Y7h_m`X0boZN(unb0gErFPT@c zK?y#3Q8QqoW>TrBRNjvSz#POdwgiVLol_$-aRihSg2@2%CbY`8ZgHCQ2F@oKghhz z*TT7gl|pHNq76{&RrUa&B~@l{`JD!qM?#d=qk7=H$MJ|3CPnBz<3oG@fBx)^3TqYj zVwgH>{yKRhd9#>Hc*go8Sn`8zJ{H zJT*PR2o6k=u2F}taWM0kzXX_5;i(;qGad}cvxv(0SL6Nvu=k!}O{V?-riub8Rg_*u zq&GnbC7`09prGhTZ!&;Lmkyyx6$Dh8)TlHOq)3NQ14Jo_6lnoMkMxjGLfa05c5Nne-a5B?tWHsoWgEL|K#(f<*DJY_zUIhuHOpHB579sheuguwRJuaGzJ z5pC0^YMPMK{yz+CxtRfIXH7OjWm1}YoR#b8EmtX93+A{GwnWeUwpjY%P$We>Yz5Bp zD2(KEhAkuL2HaQ}OiIrJ7dc~k4{vd}!?Y>jgllYzc>uLJ`TFno7+Q0cn zlUf?7D&Rt^Wu6M174TI8;<69b6THN%V^!+5U|^$N2$T+TbDy~NFpgQ0AgKM&nUYxB z^!SF7=D;o0&8>$iZb36zuA5t~@ltnUPr0ZJ=uGfhFqW3Q+j2!~FI%OT2sc0*&09g6 z)dLW7vZ_CA3~spvKB@t}aFr!XV3n0at>3jltWoBDs16VP0EmvfVaB_(QsgY`SX9jh z`;aMJSS{D|8(Z?&1+(y_8Yew~r8TEyx*YJq2<8_%zF=`a*)w}gvz{*)ecz_2&2k)? z0`12gOvv01+#Vr8<|tbaxyL7UZVg6o+XyPPbTd7C3(8;)8XVORx95W{Oh|^spqS#U zX;XG^(%is1E9H5=>IfOnxp8U?08Lrp6gMFV`--Ou4Xz6q`0~h&i%KD+N(p0HCCinz z8}Frn(XNtpO|-k}DL{EGDhz*eQvkdMibs6Q0#A;faO48{K(MNl80V@?&q1Ry9;%9y z!x7mJs8s8nxK`Ldt}26d&WysS#tois_jIQUu3$TMj7!x>u;3zW^t@v>J+ht!BW?St zJa&u8Q^ivt^u@4deMlV-dmFL}oS1gh@v(~_6}YMBg14fbtYw%65@-f+xy0a>8?isA zMnTK?D~+b2_Tz-=#|Ir%&wQInbTD25EYyz}jJGHx1>+^3q|wmzl{tZrC^%O7-tf=z zx@pcULz|bZYRN=DJ9l#JEai`se`Ps7 zQ*Um{2ay6-^-0}{dJZx3`%XrXIm|UmbX!PKEVPrP$ylI6k)r+(-Sf&;J`4HfuUu)_JoG2E za&khWWTHNCBTt+LW7?l5m$}{egC~WZ<37oqh-}J$X2?}W)|`!m{wrZO=>(0+q5#T` zeePeQ+4|QDMtj3fUSe^6~$vYFQp>s=!C zSOeJ)hOJ-ajA)FMZG5_)yHI}d`Z>gBd@$`_CJj!gE1O>P)qd$RtmvW5nltCQhFz1J z#l>bnJMhs4VLk8v`E3X58q1?GznHSMvrM{7b@A&bP^wM$=GLqd0JO5-o}+ykCFE#* ziEpwt>Q9Hl(-V_BQI@X=NQE}utar5HMe;2Qw*|9OR7uQs!x59M?kw!=9SUu1S>J+h zv?_#MVToc29gdKD$8Zx}Ri=HCKl&v%mMJN?iu#ru_Wttcm6mfC9n0q$?bG@9OJj4p zIZBc4@W0rHBcA`g!hKt!N+7KF>50>#RkDUXr!PN!V#$R*eYuSv+YzkzMTn7!+di^F zjFG8)IHFLV(eM)0;!mp4547(c#ZzdC{93BK&{ScDrK-(&`o6&;FR&7fOn3TKKQ@g1 zQ2jWL{h}&U#%WdegqOeORyHx%9W>9YPw}ArXadCxI8I>8?>RF8)l0k3WoSO)P@&=T zF*I;HWT<^)0hA7`M7jS7iv;_1t^2D21co~DM9tzYbFh&;V?G-6gWjq+yd0S+8Rb4N z#|IwzhD#vQju8`ctUqQ(RcOrv`4x~K`;Sw(L|~1dQ0J*1>JH0@dk-BRY`S90DFZu< zU=sw|!%+1`s^@MP4YK&4#A!tC{5ov-cRW~XeY5&$2S>ShItCKj-Jry$wZFa8giU>% zs4NUCZ9OaI_RI4J{k#}EoC@*ha}`r5m;6W ztznSBl`*Zq59OB3+;L#R(Rt&bZXtwC>My)Pzd&hh%|`L7GxT6@^~>9-nYD9vW?lw{ z8w+-l?sR^`V*U#t5&#HWkLOd2z@xnO@n)#<9XHFj(xGQ{K_09~kzfTSk@gtkhl#zV7|y{J0Zdty2<{eeX`UTgem$QqkC;XChy`7ElV#`K{?v) zspL}J#UQ8n(s&kSqq0hQ<2_-^pRc|1%U`R@U90B=rQv3!pHG5|b4doapJ{b;b11B! zJj*$iOMUJW$OW7Mn~>LU(t)3kd*EBQ1Z-|IE>-5+OE!z`8xR{ul*ES!AZva?OLe}( z^e=xqL&Un!&0y7;YDOrM)#@rQWuD9oe2P!G0=&SP#64T@{_XGC zIq%3v29zh_jO-+$a7`RAi)%sTd!)7#XA!L-!*xAVDeW8)-)7%0@zI510PJGS=((%J zVk}N*++5W<-rA+jJ{Q6!;C*zk%}6P}*LkT9M!qS)6w+z&1$bn%L=98I#!*hd#2l*= zkJRLHx@gx~I=e{`l%(|94hr6C0Y(JY_K6pOTRmoJ(6Gkpp7$h}bM)Lysu5aR732c1 zyPucXcE;^rT@In)Wj}~&kPvbyt=Lney>uO1_{1C_UT!?2rgw`2&q764R7+Wkao`iI z>DoSZ>v!x_v}P!|PrI2omfi8{8sj=6qt-h~L!cm5>!y_eJENSu(j+>m_1?_vK6f65 z4t=RRf>*PAduq?Eh_iLsR86OKQf~Az-L)BWnPJS0$nA+Ga*@S<(7-ydw+`@r?CX=@ zf`Ic|r?<{E7x${F|J2E6>*lk(N6o; zBs0PTg^??Paukx@3KXfw@A+@9^4yW8R^SdKPq~$zD(4Wzj8Vbd_dbR)bej9V_x^Ye zTx)Hxc^4DB2UH7^89vjm*G-6t_SAC^pE3d=vmyLwtGX$6JER944uUK02)t=gjQX4I z$Ia98fs>{S(ez3GqRKS1brkW!Oz`PNBB%$?ZAts4R{`K=rr*^gFl{y25XR1uEGDteaRJbq{bbe#yg`QO$lvwpON`)4K8hP@M9qZ7qPa4_5o1uLB z{L!@)LcZ|eDP&}1Z?=|}>?9@Hei$%dDk*C@xQpw{peT?&lGpF~#lf!t{5S?J57a1n z?2GlmHbh^oXIS9tWwjIiUe}uL(#U~J{emng895@sk5o;4=aSYTmQ{jl17p%Kc*VaO z_Lbn>l&uJQwHtq+bBT{zDHxC|;6Z_0wnaJkZ9RZ1JFXqzm(332gZ8aWzn%OOZ+(Y( zmlfzf^L<>N0H^;vkl0;neNYN86Vxpindp~eHEvUH&Yoj))2iiD&*nje_CLBqYNgmGJ}L7X%( zyEi$Pd*NnU3vIB^_NYV29ra&3x7alW`cqGVBNJ0f>E?zz_gvka*1dFi#rPwLq3G~AjA1xBS?QO3WgUsqmTZo;OSP7CCLcd*E9#ZEl(8r)(G!^4m*~a$}B>O@2b(*2iu+ z_mrgDAHL}vu~GFJriRpU49EF^*pN{XI%Df(mrj7mJ?wuuOTp3oa++fdb&#)4=~AVm zrO%an{=o)n3LyYum5F8l>mJ0Jc5uQj-7|~$(DF5P@1}?_;hI=DtP<_4+_)k*Ju5A3 z4$*X8FU{*V3{8w^1|(mW%BixYgX z-@(-ATEu>|6wQti0Qv;q?2dJchF)?cT(Y53xBBzNas%=zm~ zHt+eVF+pFd=i=bJ_ZV;gP}K49%%}(@&tRD`S4svStUl%o+;pm4O^fHPx%b^Jn- z6{KHD&1Jl{*tj#hDU|pRYeeCaEkI4evq%vh3kk{AzHpDXy@ZdG6yV$Kw9uL4R7Q_9 z{nwA(`UNyKBI4wgf3nB^TxQd>jV{&_4M~Z-M*q}2jcgMpaBDs*TvIw$KY4iaQ7ldEPtcRBJ*`-7+I#Rj%b6+_vAne7pJl%AS5mOzaSscA=YZ!JOx80`w$&Y6pG)Y!;PAvzv4drRIm|+@-eu z`6nGS^_5?}tp}4d(lTG)+g+ODD`P}$QZ<>%g?c^Tiz>cXrp#l-HpMG~r#+(kdnzSG zR{0f2a0fs2;!E=}I`hsyVJe5@8KL9eXbzy{J)u>$-v-=^{(X8XWS#oX|1#)!;j)n~ zdpD%<H4YjmQO5n-Bg`-?4 zTa|-zi9r+f>y+T)UcHfdLldKd{~0xYN+eo1JpSnL&|DOEKQKox*rkYSF7@!?uD;XG z%g*bg9NXst9znK-K)KO;4X-FcKSYk$Ve8OK>kD!vr;AvV$8r;f^NRcLTFvK4R+e{3 z)wk)&@}3U<7#wwj#{OHj$8C2}A;LA|F3wbs;L8_ejnAyp5agoIlY<+3`ZSqlkkfCz z+#ysDCl|y{Neg01-(3TVD%gTI91rdZ#Tf>dYK1p|9OaHwbyK#>Qn&a{ouKDh<+^=} zT5eeX_vfWY_vvmd-&!uy6u-r|gNeGW=_)`yynTl~ww>-~o8YASWszskdAsx{=i1+= z+HXz2{%|uO?w1x6ef?pFX!%(Fn;SEAwIv)$5r3@c{W>Apee7)-<5G3Jxd2)2YYatN zN-TPF7Sf@oeRW5M!93eKJ2`}b0wU1qjKGyWUlftP?D>S~P`V_W;bax0IW?c`uc#2Z zffcqy z&hCs4ud=^(#_5rM&1ZtDhbbIoI!@_?fWn{)FwDo8aQy`^!@LJp5%ruGe7M#?#TS&a zoS`7O==;jBh5cuLpUV79@vUZz+jNceigpBsF*YW>IuT6XMDiWVL<@>`zsk=3%L57+ zNDm?@mP?c6l$`Ox@+_MGA5*m9^o2hndMRZ&k3E4E3C)xH`>)gHZ70ISKW4*2&e2vk zb#bcG@_H;|tN*IiJnhmJ?-HyS)bxFfkbSP_$VnMx;ZWxHD-lm$S z+1{l1fzWI3{n(d3xxDv!pbhxIt$~pnN>_2r2$3OGjFXa3n|ZR?=PaEOuyA>m&ApSLilc-7vE~ImgFnw;-dYlbJi+ z`(Taz&21ZjkXcrH4$2vd!LMivNeFt=-(VfV1)u<1umtsWPCbvfuVOdInVH%>VWS}3 zdP&aR==-}kedQm2$bYNL@iV{LIlNL1gCwILpJucr_ucF0YoK_z1gLwHM6QRXaMVi} zb-z&~@~yw$7mjD9VtUQAHx*395Vg0!e@AAH-S%GZ%@=Y!P_sJ#fc~D#`e6!5cc)XX zoQtD57}9YqH&~{*`%ajda?B2f2F$O_`u`|1=*DEXnvZ`>*aOe zFK4(vnaeBBw6%k_rrf4lJ<+a#cG{XK3sb7DSW5NTqsckm(gRAl7sA|D&RzN~!`M*w zY1`Bbs}9sl7t_Bb+}`{{}oM4!U&XIV=iF)w#bY2@BHMMwuGk6Zl$y!Ds8)SvgD=wP8mKkDk^|H#%wJIe4r+0H&)vpw+PG82Q&DIAJ6svQmwZ!c3_Hd) zMnze3AZ!;;Y&}Lh9At$(mjcgHn{hIWFyqcu$!Ayj9f?lkZZjS2b@8sVD$ita-2!VI zCvJ{^V0bQDNAXdD*1Ta0oG~D+%@v3!|L^78_E`TNqsE+(u((^m}!K9W6*L=DQvC0l>zkt;HJpyeP&!$DqD+$ zA+wG!Ms-k4dWrn^`uo!PRcmk#s7F^1HnEp|_Gbx^MPp9h-M_6;4v#nOzz+D?Lfg6G zBur5(11I0%_kJbGLJhB<)21#*F&gm4O^u^YzX87(oppyoE_z%FNg0X0`c#(o{QOvwIC0vwV^m>X2QJ3aGeD7xY!BR^(>brp+lm3V6HL>gXBRizg{!LiSH>CDz@Bs%(=B!}I%Wf0W2hOO5hKc&R})T~rtpJnvtJvdJpC-(R0;e~*OllBK>WcL48 zXpwutSSRppJh^KQW#&D82?;W|#h#Oaa1is;Fp#hy(Ymw$QQ~(3U%6eqnkcw{^%MiB z9oUkjs6QV89F&XC)#`)ik#$PGcYrg! zz~oa6nSEbVp>5uPmGCAGn`Aq%gDFn<{kpmJ(vh2Nkt@4qgV%=kN_CJS!WLX=N1so2 z86Mw>V@T-opH%i5*QwS&L8KnV?2nOz$Ui820yyrS`xha7ka+vo?Kb~KVAT}%gix$Hx^+k}Q*wno&83F%@vXsU* z2Ns9Anz9W=!k6ai))N@o?$y0jgv%$86?UU(vZ2oQ~oa)D80Vp&s=g=i_;Y_ zWIylKgSv2;bEDj$HL-;IoIPpR#UljG?osXU6OT1>K5;WX|9UjJk3r35-sSHnss97A zR$QfDCvk`q&mVQC%31pfe$mT-$npk(Lenrxmy7sY=G4>=bs~%UWffo8Pob1BN{Kcv z5}XB~k4>`#^|Z=(B?>3ob`QpK^?uYb2`WIGL76XiSm*?*Zv4^^I(Bo$PksFv530mv zv*L-y_JJ~x7UB#D>3B2sg(@0c0%Pzq*HY?*cJ*i?sW6Uj!bA zdeh0bU_P>luhIPgQ}VbZZ|Yb>Upv?HZr6UD;VPY*C{hD2(|C#nu5ziJ`QgiP7b!a{ zywiDI&L`aUsmrvmI$-c5)kPE&`Qn7tix370e2F+)JdIaUC*$f2VxND--9EUvt2}gH zzhQvv_j+bU>84UO{`4F!>r;-RP3T5-$n$n!@pPdy#JvIVE5-HqEX~~er4Cj&U!pbO zY!lr(RZ!sB**?2{K9Go9)Lj){1)jf7j*V63L8C@~1W$b3<>i;% zVac_Wbj$no$g|%!K|_Ah{72j_^17G0;Ml5Y7_Q9YqGtAtN zWYDT=RD=6Grn(~rdVqYeYeC^)vVq;k8@O^);*3wTD8GHsrYZ4kQZf@qD;72Aaf>C6 z;2|=TpPe2MCLnQ&Dsw1GfV{wem7C|cN7c;#LbLJ-X zJY(bqk31Gm_0K2Qe~HE;^-$OMXf;rZfy%Rzx@{afT*`y56D(p9j$0!zV6qD1^=8@_ zwwS5+2!JP^0kH0Lk-WMD+IB9oedNvl522X)ry)aqLl)v*BbCWwYlp^NN#6B?FZxno z+WkW*&}Dv}v(`2x6C0a0n9h1>e*jK?Hz-E9(~|>6${_(GB;aFJnew|8INNt3M6vSD zroJ_BHvx6n8kdE_Vt{U9y+L6QV-=t+Atp(DZ9_moaqOrTo9yvs4fE$sWwBK!YU_;8 zU`WAY?2i~JX9wVT?0*2c8BiSoaCr$!~aeq1}$*h@RXr@s3|Ge|;fGFWR?0i>? zjB;GAEulP-SrBjdJzOY`od-qKlE`npdnfs|0C)BQdqX6A&Vce69X#EV>T_Y>!_VQDt7oF0?)CaT?6A6_d*}ETVrv z!@(0kUXaKMjbV^9sZOixHL6Y%?eQQo$AOr40-=9F6QbPsN5(o@e#rs=e7zoDr}B|; zaY}>Zwq`GuKe3>ftpZ-O%7&-@%Mm)XjV06X^EQSz!A5)si-QtmXHx7!qMUg1YXeyKgG)HYM!b`VI}*pWtns#(>Ob z;&Ka!XQxdVMb;+gLUU{cS3VUbbO1)S>hD)08UQ&O5KkJ`NR{yyiRXSC3mpha8r48d zWaWN&1WaciO`)j<(sREP2B>1c3U~e1`8}A>NMRS1uY61C=AD-KOWG>_w~p`T6;Jqx z_2!kToGqrSC0X(UUmyJoFFFJ44=59tk-U8u)VJPt$Y9X`d4A8BQ5RD!r(TENx#FV3=-&+v%C_QpjJW z251`)ul*x(99~?R0(tSp??9||-zl!=Y(U!x3a<`T{eOfSuL!L5@Gkzn=)IpbiadA) z1(&QsIA$btI0iaqF}8Q<_X61Sxn2LR@~jXc+Bt+$i}Q10lk$6T`rVsZphejy*ELdh zUugRrnyPzEx$MuM{OohGuh7!_9&Np%n)z%z*4!FeT)@$qX-{6bkrzs*&SMd#A!6%A zgX-VuP`gc|{FBgMt4U*|jyEJ8*1_%w!HjtF2-FleaGbKVNwyLe`&M=G|zhOq)_^ zB*(+B?8oVVtY;k#mV`K#Z-CJ8f11JnyZS(4T2t2+b9ZGkth4;3?)}ysw zBk}MM(L|HwG9xRx;1=G+2Vsh>BMl+&kZS{l)~LAH|G;zR&|iyw?;&*#(0iufp4SY3 zh9phl`tm_{IvfUfN*YKobr%9piY=qP4yS=Nqwj}N4fB-mk>77Yh@HnzF36q$kUU7( z_5!g)e}Ho6(so~s~w6aT|fuly;@7(b+u0jR8%bNn&7gIjO z0eZ^+4V(Xe`(9c^lqD$YVl)3BO;7_UYCMxuXFl93%B-Nde#ksr{Z@r64fk7Vf>aZX z@@L&V-ib^;Lx$M|gDB@#_+dpwbK$$T&-G<}+B)YEjx3*5ZjNUP6Ta%QjmK8#vVo>B zO5aujRHICT78bh24dc>59IcIetmnK!{(5>AsKiOguAh}LCJx`2z%g@NDxw2SN(n12 z_{fw52jT7UwFnt!(@&OcK1<)24x5go!#sz2)HQXB&A3@OW6mLt4%c|H`)6nCVzSGK z-E!g2vO?HFelI>UDZ#-&ODSl%?3Mtg8B>KQefe@_@k>4FwE6Fam*YLTo^@bXsn;2{ z??zjV<8AFx1mB{%eshq?99H(Q^!zW~bXh@lM22ZSB$P526jVV%1e zrG_6w^|MlL_5B}K&{qOfWy*dV5{`-IRoaLX>*8h6- zcKY#b@qhbTqo|8@Ps{vTe|&v@cup@3Eo%6*sj&j%L>LeNsIBDuH1MCsk=4ef~Bxq8uC!PSxqIPCVC3f)T0m{YttN| zkFQ$lQo&958w9Sj^B+M_jDf$bG;-~yfgL!`{liRUwA=;anKx^@T!}x$ixk*nY4C zIo*14ep|y>an>r-y!$WWsrx)rOSb5qs%M*iqQYTJo43rKZN4ip=<~WIBvoQg1SxZ9 zC#l>I2X6;^8xwx=MT!scN=EQKj4G^f(tVU zk|B7oUkg(v123kt)Sc0+E_Yk^>sIt|Z&N#^K8RR|9YVc>IHq#9>jn7Ye)Ntc6#KC8 zHyx}@hP1TlqL?o($ML*?pTdAeqdCFzX+gmF*`U|~xY;E~hrG-MxsqWy$Vv$^7t^AB zckhpwubti$cVp($pGvTdq#Wx}JG9e<_JDC`e)4*jH`sSpomgp1=ZQT_9(%Z0DuH1&)Y#WzzFu0|_y zCLQ`|*H*~R_p6tDN*=wRE#}0;%JQbqnrlwK@7nF`7^lh_W0gDU;Xwi|XCF!WZrXjSM+fIV^@zA~m0=r5!T0-D;ZRR~;_1NRPnkZX!Bl&WhJpKWn9KCry}c$8K0 zsdxU$YfXrQ;!{88QM*G<9VHd-b3=Hf+d%|QxA6w2g%FQl!P}qiJ%l0S?haM4dxGV) zX{DN@wMNC|3bEPT_H}ITNsaZBeb>y3_V=vCJnbiP79N+7Y`~a8x(h#OM2l6Hs7#fV zrQxj#*D)#+fBl3T^AJ?8=r!oT{WqlZtcVlo{W7dtqT2N2!U&Ite0a%lMi2**?^FQJ z62+V>CZlup{1}LW4^rgEiLE)amG(tz3HwBBS8XvY9_SZDjpzk#D+Wh;Oi-0xKxu$~ zl4*)!^R77tWs&fo5+tPWXX)JITi%z}?Zz`1KG0&qi8fPHsOeyGgH^J7s)ETKH+i7skFiVM)6I z_1qu+BcFJ`GhmDtJwa1t!>tl9B;&;##8xoQC1*t>PJE7twjv*pV=Ll{+jbOq242R! zL{AgsF6-MlJNhY7d?ZqimlYFNi=rqanC3e zZ`t(QYx|DGYnUs^i_s6=N6^Q<)HKFV7R6ojvHHGOsSo;+%E=`z94~2wVbr9m;Yq*@ zmZcFItnTD6tPtcJbYFH~3G~<=#l)rf!nWL@1q=}E9r^K8owUD!N;~jV(eLw+n1=*K z>p7T?8ZB%y4{9hUUcwzN+p* zT-)#Vk0~oQe1TWT%9o=15v7{B+tP9Jb3S`;<)8;u%8$jCS=}>+P?>C3F=qw*t_~|c+>`xY#_XhvT?4~@d0sEN^BmA8EV=p4{Th|5K~%+{;Dt{D^Q%lu zdbU-*$5wsN)4{3fAoFsz4FxlruHu|!Q>rXVV?gs8v}L^&6;>3%{F&>mS_ao^b(d^Y zUQ?>flewI?-zs=PirHBQIgP_AeH?$XVLJjjZ$hxiY=bi^KiqxkSs*IpzVr$oPyTIH6#9i-Ez2*nx?KFBSil{J^_?4y7qk0Zd)lzSL}(BmZfkohhc@?V z1&rxexGaR{l=!YXiTmR3@Zxw&oqXeFx-GO}*YNS-9wv)Eab$&eN)~MWYS!s%OfyZ9 zM^*8VSZ#2(_zdC@4jW_zlQeU+1JX2culDdFHj*^-oKPh8JMTW+tu5u1Srra@*MG1o zowRml!EGLf9d#_e>UeHJXZWR)9Uo2cMb<%_3hmCma4fL#zhlVxgu%)eRmUDo#HrG7 zJNQ1Pvp8Hi{5HG42@$xBx$?~fmi}p=(W?cEk>iioZuK~W$OP?xLz7n#Ir8xu?&*s} z@uv(wRkF(FCh!Nf+H=(|Iy+S-0^_pB@0qkzVJmm5ZgVQ|IBK!GfK(B)#}l<+jw_IM z`)L<=>hJP>jBSGhjwm+r1rKV6bTRuvNGNH?-EF0P64th&BEIOEg*cd)n_Ah9t$T*r zA&`}*@hV@69$)ip3!KXmnl}eG$%Un;j&TCMbCz*UM+nZ3O!r_Ml)x zDWl`jxv)ifizT;Y=eP3?SyUFfB}zd_8~$c@i3wBC#2mK4{jcCQ@0#+gW@2HQqW-YrC+MM4AYM?MP1toSOh z=WdWt*XC|=+tO5&y2-ak)e$2ft>>#-Lweb zxEgVKyg*jP^7M~A1fsJ`kJxIVBMS?h%jD`Kk($_to$z4NMQs8*5DH`J>F_?cyZhF! zvueiCKNmfeM0Q&UG?m0F(9A98>&^{L%i)pwsPZ25$HVjY;8gP^sMK-aJz~a){n5Jr zexv@@%-7e|A4u!?%y=i!35NX&*?rlXCu3)DuT8o~NacoxM-!G-0XysHX_Q(JgS)v* z`&@X7FU+T}m=jeCGmWUmRo^){IDZ%*6o8`|g;(*VKmTB$GM@96{@Gb8qe699L zsqV%;!~}AoTjLn9iJO%bUxGsV!fKv9Os?e>4{eNX-a;rufDFXDuSf4jDYUASmpoM+ zp@DGdto@1I2n|`g&ktX~r>awDAYZoAV4-0S-&f=%h7AgIw6OShUQC?aYw_O(eD5-1 z^R2HnoDGiHsB5jWQ1ztb?e^gzvr=+9H;; z{R(3euO4Kq0Nd-&kDtQCH@}TsDH+PGIx8^BWl_TBzOW~`nTA_i3fJHJDzrB8x72vb z;J2iKqr=+wqnY^qO7cpZS?AL%H_^%s@tmQ=L%n|fhO1eZxF5%mp1glL-+dKrgBP(a@h0`SGxUE{?@RjBgClZ=~da~60lwf~*WHRVFIz_m=;UX(5YTY+y$oygQ2raK?MVzD%9G$gxz+EzoZSOE1@O};5CrsXimt6X zgFmU0caPxiTpV(A$n8ia{dD&rW2lOys?Rl|8td4WEU$-Hu@Bru$QaPMq^Rc$L)DLT z-U%0DukSkjhJg8IQpNZ!;jF18#YN<^VYyQ&1_q;Q8B>@{-p8x3tbxEqpLC_Sih_9= zIwk$;G_V76$9mM(jDFuusA_$HPgA^|X@UIu9xY>r`kHU#w*u0hFYk_BVx=&moZ%N@ ziz!Y$AUczMk)K__CRiIKKKXd@FuEMwvz{dqs@aadnBV3+`P0(8?|G9Q*ej8e>`V(l0#c z&@pBcJ2$nwimG|kOEFcK>UC8b(;Uj&cSt4~hhq~QhaX;Kj+~3t_RknGKgTzbpDWz3 z^)kol;l5+x<|u3z>`8|dc`;EYNZdQ|*={JRw-(ssx(baB@A(j%md}@pmS>C>aU?8# zQ%}Zq)j!V(UTGwHsHwRf3)-8(*Iy|M0+u;2(S<;F`L6+^FQt}=Q}&2_uNhTtJjRN= zB0pc-F|ZTy#BbtatI4pRDW~3q?@u{mWXgplAELCt@QTG@b5roU2+dvMtu%ik5tT5S z(z;1LP-86ZT|>@!M(xNKfWcj;QW3)&ySBWE2X1l5zc*~}# zY7>;>qN92-_kw2*S;3OhVXU7Ta=;=;LnrpP_gb)75sG~?h%S|@4;bBZN1?t`Tp-Nq z!+~vm($T;U(RzxMfr=C^`Rb-NF5v>TPd|lfSac#-?JI$O26Xi$sM`H=>%)zzdoXTt zIMX;y7T$N08uO@r%?*L%Xhb*f_^@h#ogxmUNh(~7YxpMI{VVjZ0|V`$TGe> zFms9g?o)F^e0JE!eQbi1j>XtZXYOX11BpyXG8cY0SSp0s@lQTdWAJwq(eyjWZB1@9 ze+CO^t$hqL_1%koIfWSa-O2K@HQ~%%&=~p1?Mtg;o+s5%g^;licxQlPw>Qkt3)=UU zx>UWhwtZgUwqy=fAR@cCyRf`dsgU%=pde)jWXOCtVf4D(?zZTjTce~U+3s}-JZ1{H zw>#)k(w202P5G|+Q4w+B!|2Mk(P++!ko$t1U=NLm7fAM@!qsnlcxKbAydT}-mTNjX zi0q~AJilbdS)$%JCU?DOTbnRNS{9vy!IG<~ZQ9@}o?e-z2{?36XQW;eVyj>({u{~- zdMj#`ep3g8xo-y=^Ziv9uN-sWTl@hM(N&1;!y6b^&-eb=Hs_1*g9teRd}OK`Z)Q>! zc&4w9&tx^N*1UCKdcQpUC`&%5&pJRofoI97`dW(`cD^4429$ot*_lk79iE66ej^OE zP=Xnj7hF8tpS617qJ-}fIig3WN3=) z&rpKCbJHWK3>;?DcsEtY;%X_In5dln2hI9t(khVh@{0ulN!)^AOmiZC!5?G#6n

    ?MlBOo;&OFevdrPgT^J;Y4*z{+Vn;byhIPV4*ohb?IUoIK znFFm*+^$>+iCoda;J2+xtnBUfTo*`4J{{P@<+R5V_jjGGP?r7^61zM<>Q^Ts_Wc;_Dh`_*Ke6dEqN&K_!NR=(4&`(EL_8%aG1Zt@z-h3!Z)+#oHzTg3Nh_c|g$--ut!8+z$YHVl%XYSj z(ZR~KB6HYog(0ZOI|E*GAScE1_Rsd*?~#Yyfvs3MOx0gYunhBN;*d#97fWw6)S|~I zEm@nT31C|GdVLC&0gz<^)5O+0#f2()M5Jrm1NQUr*$+$?>xfzFqB)oF9d10_Go;px zq`!*gE~PoCqrPL%Dand_jRTvn^teog$b^h0PJDlFetMAbdfZaE_szKL5ChJ^491j5 z#`I}SdP>Q#_1^kDpyJ9A!sAI#m$sg?Y^=1CI7&6)vc9S zgfb=)gJqm+y;vWiZ~w5HgL0cE(2b&WZN)5>WdFx$+rIxf$}>qhNGJydxbL zhfNhBMHuvnDAa-t5Lt9DnyB(;paA8wQDkojj1F9tbI1ZJB;n)vETeeEjs z6Z$*$zkf-o?1vrW}fspZx?5M)*^c2gRxoGgIiHsZR+`M zs_M%-bb;`u?SA~7+`Z_A!m0i8U@%V}Nb)7iG&(!A`@7W2EL^|nKlrjpFH+TRavKcs zOMN7RrDbFCgK7?YZqdLdN?-3lbR#>gTdk1gbz6P#+aqA)KRYApO)skEn8xc%CXwFE ze|TX!dQ}Ee(&TLF$464951m1%nbyJgI;yeSoB6Dvq$mDwm_BP|r`_^k+}B8;DUQ_q z5(JsAMd&gkBfA`C6JOfu)JDXs^D(-Y>ZT@cSh*-9eWolVc$JM^4|n@E6n%Qv@%NCs z?&PJdDwez~hkH(icvHyoJqw^E|0Ex$+80-kfPv{6c8o^UK2ivsFp9U(Z1GN;DuUe_ zrPZ|y$pLGK&0EU3^bX~}y;-OCbsTsbvlXNpbbjBl+>2gO>VK)z$kvs$nzS z_C8zhaBWFdd&I0qhuq`?IaN%Dyy|e!+HmA>EYJL19e&)tSCH;nAHEi6H{@WS^**co z0*vgJk$aw81-y>F3S6_F!sYCokoq~Ofko|+t6|Tj_etVBu9=NXWPkMl^CxvB`h9rg z7hE}OyZ^cIrqztrnB0x=inPG+XZVer1&eRx4(pgb`GbB4q@y0OLFwcz;ZCHXY2@ho zZ_|{_E;w#lzK_HamhJdY+y7ctouT8TcW-aA2%0qAf60Jum5+!p(0K2bZb_{S-(9yNaVq7Smi-@E50*m0C{PwCGV$U&k>uQ(D|bJT7TM z>A2*crk=^jT|-4dk6Brkl)3dP!b~n??uoc0Xp)gwQ0N3oS)#}&q5>|spzpVtx!&ts z*ZJvu|AX)I+3x#(?(6y7@-J{AAV&|YR{l};@9sF;;uhuoi98BCLTL-Co>rV4*V&5N z-$AOj05>}2vt0G@qSjbXhi~6P7ulk-vT2Us6IXM$;|Aig< z2A-uZc)JK+B#dVFIbjT5mW@J}cvmS**rbuYIBpM97~kR5yS#xag6YU`toh~H7abXF zTlBj1*7kCScXtwks?k92(#UbFFGOT(8dAz(f=czk`Gcu0J%_jKb|h>y_7Y# zm1>6Bj_wrGpaZ-d(18g&a)L_Q|NE5b8t5qGfdy1wk;-veGl%q%%?oLZ?a^IOCL&%v zhY8W4m;*o6{OGh!!(NeO(=$=@h1Gt#u=?Qb5ghn#X^D=RPS^6pfW|=Cu^pxLqICVk zvGjm_^zm*f8?N%+QL>`4@_^3qw2c6-+gDd$N?jahSw+w++4yeif)v@CW8rc&BD_kt zMx^UN&rR5|mt{y&PCy-JMneBbeX92C$_1b|Y>_S18}876gY1;8qosf5$4dkN0Kw_| zw>nRi-rA%8A@10opG#6JWb{n{=$sPsZbLTVlggGs4mRopU~$bQhXd(Gla8gYfa~l% zz@%6;KB6aeFL>|#!gV~K_TFSVHxyu<0~_vY*?Rpbru}1vq(tXr;=K2thCvumXU zc%Tsw)9nq}st#93n{2FR;H&~15nHx=x2|Qd;{-~|H^N8J4-t$hP@JxJc(DhR5DTP^ zLB#Fi)PtHk>nr;)ILbE<^HIoM*3ow_5-z4^CMFfmPi2nCzfE+^W8>S4_m;Py3sL?$)UvO|0SYMp2^^x}i_S)@$u}Hz=WoD8*Y2w; zZ)Y$?Q@>6Kf$96Ha$LD~ehq2IIloWZr*egH@**tEUjt<4x$cxicswS$4gDgKDQp;wiS%4diF71`k)X<` zb6;I{RDAO3(`BHKw*{p z62C*CZB_lA78QGRE0dfE>D+|S|C!fQu$l8c!UsHCuwTPT8-ss#5C`5!jEb%+V)O0NWdB{lOX1W|L-nxOTP(Dzf~KPa+CFvq z6gUny6|)J3i*^s7W*JB8s`}+(v?)n3pSfkl|0DP)R%9O`gWc8oz7H<9eCAw((9UAZ zBu+->)4lC4^>;=plJyer1355a`C3)fhN*|Mc1`GoojmroC8UZNH1U9liZ^3_{+u_{ zMPAnP0zxkoA95jqYv;O1*^)KPM(>kr5mqHxtn)l&YO{CzQnSw@B|8gf6QuKm(0aun zj`-0|=S(-`+#0~#%B&&fTx@$x2}_x*vu!VyrzF=c%`XdxAFVRV|BCVIdTZe|l%!7Z z#D6C;B%9$AU*Y=yVa+N|I~ts?t^P9V@e%dqsLd;&WUtR{0I^r{(*wcj-%TdTW!hv@ zs?TNYRIjf4Ml1O8p+Z#iZ_i1|gaw=O@nX$a>F?IvRq1u~O*O|vC*jbrYHZ_%=Hp(b zo5>p61&vTX-w{NNo@0xqbM|q5r=R&C>!8rQIcN)E zoX{P8wVMeuxu9nYmdrS74Z&SO!gjBVUqA>@nzx+vir{;^g)M5b1g#g(wAjT>QzygR zjn-#};H)U5T8$8VKYs@}9xcf_sK~rdQ7;%#2Hy=DVVuA#* zvUko&OELp$<*~;FQaml&N&MUr!yc=I79RhsI0?-$=x|)r60q(}IdMujWm5X)6gPn> zth(9RI-TC#>K+132JTeu6oq@7e4Rd!DZFopz1C;P?hUiO57tS`Ind{H-)=#@S&NQh zE4rdMc;#6Hy*6#HL5~dz10C%|P_C{Vlz$y7zdfW@SI{mm@z_0gH%?#U~Y?7jk_?T}VON9x~k<*Bto zFCG~;LbF;|*s?$JCMA=wF}JaQ_H~b{@s~C!vRS+)lR=3z#aaQTvZn`)|!$d4jr4U!*gA zB{j(((o%Q;gl_G#;Eg8q6ihRCVeJd!wrW1>rHS z-OGkq^svr(ADv5G283%FkqZqQm*6z1VLB%;O0{Gn%gSU$jjxNCldNC(qX#r~^f`J2>9JO^3E?%)X>C_W>2xPRLIRcg@RhO8n+g^mCuMEWKHp14~dv(@x>~ zK6T!W$a{=Cj{{R)^(xgFQ?M!Wr84!n>KEm&ro;g3e0QZCd$668j;q0*n07>X8V)he z;LSK!;HDq!rZ3jVe*aikvSOx|)2%5WEtmv4IoB`sx@{=x%|7e}&`bxPXG*2~bnf@+ zgc_=)?W;L2`CRL0AISRU7ook=Pgj%7`cdGl1t`Mi9T@Kozi^PT7n4Y|;Dt}9?J zhQFLV-Q6p6t57mUajwWQsQA;od_%qAK&e4X{F{zCR18!2yl5Psj(O>KO!20+9mA+K zV)7e~Hq!02%}~_3=;gEhh z4f*Jo(D_U*Gh15HWun;yW8BD>C86=OTQHf|?KIkg^Fn3Hic-xQ%!tl)EF5lN-R9-L zNM1+*#{oo5m0HCYY2AR5eT>K2`?$Yo#-^`(X~o8b=w4nijBKjfMQth$WR>zF-fTl! ztJ$qwIB$3y0@Y)>hBo+DT(Y4jHEDqRP;vkb9Lx#lw=IJ?);~6;4-T{gC_x=dEQD#- zV6QRY9;@pzz#nM|RfXeARYfnFVGf4?KR4`4&wTio&afV$pb-RxWVdFOp9-;nwvz9C zss^#TD+EB=1~1`;M2DtwjwQU0F?vehsE~q?==!uG?;P6p$OI`hBcpk9N~M^CowU`j z13`P!FT1u0jajg)@G?WkM56~@X>SoB(=SlwSlB@5rbeKrPA=I3Ky@B%XzSab7wN$h z@Q;8=;H;PU2REBvf()O6An(jjPr&dlYwf)=0DEKK3hcO)H#Tz5cFXEu{(;9G*H$up zR1CClCdSl?hc0(iL#XFVIe7EVz7oX>pRh+BTBP?_2e!?TT^_X z^<0TG>S6a)QL@3)!A`BG2<4^k60cQSpAqi{3bWX8@ZWsM2VAbH7QAIFW5=#nn3@1? zR+9PHNZ7yIhuu2=75s3uyUL&Y^U=f!>D#Y<*DJstlm9R;|2N)YjT=il%CxW`O=On< R^9}HMp7C;PcDZ!@{{Xp?Az%Oi literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/auto_parallel_1.png b/model/train/yoco_moe/sources/images/auto_parallel_1.png new file mode 100644 index 0000000000000000000000000000000000000000..e6e0f13c7c1541499078bd9f9499324a2ff51ca7 GIT binary patch literal 104831 zcmb5W1yodB`!nMEukTym_nS59@vPyTGkf3rzOVbbu1)Y8MJXIiGE5W{6dW1p*D5F|sNN_j_jb|m zgEQUPvK`(Ie@f zLqYiyMdtM@RkxI#IkR_!669zHQ*%~~@$;Uz&t9T8$`aw>MbSy)zs4g{EkOyvA^)uM z$17Cjd-o(>h75ZKy_AMP$Tl0}>wCuL&RgL2vvkzi4c~8l)4$f5nVI4C?TL;sL@c9& zC%AjP#QIL<{NV3nl-~we?r8rUn=^I_{d4$I?$!Lge~z^=j~@MVIH(fk`p@CVxX%s$ zUZjF>h4P=%-*f)l|K|{e74?6AX?&g~Qomo{Rr38;v0F5?wUdY}E49NVjnx@qx>}7& zwK7!l-)oIm_-8U$S#1ZV=+Q(==ctrJDz%{_o4uIOq9XIFCTpw?d(42$99RGzO?(fz z1FKP~CS45Ao;Eu5*w~oq^Gf|t#k{wLW2OO-2ih#GtWN?tnF;l=6h_q5)lCC%nu%j& zO*lw{U^SwrTP(}B97kc{8_dz`hz z(bDe?4TqwvN=r)cf_srm7_gjiF(Hq}rrg|I>Is#%Z?h)0soYpAZ>QY-Csel62Thyf zw2}Uza4U;hpL#tBCm|El)OZTR%{;llLOm>DOinHT?TOTEjkHdO6&f5CvX!FuY2;mX zHMOasL#-~U%ho8PVZibF0Ewhx61NPg`}R0%ImcqUG(;Mv-{?|ksfd^p0G1q$&X~R-Pz3Vt$(9yNC$X^%;P-k0B6vHtmpgQznvn7Up94}tH z7-k8}WH6i@E7S-e>QCa4BNeb3XE9v4`+qbXDMm|68(>JzXG^GFV>>5k_OQf0TmR`g-2%mm(f@4-WK+Mb@CKDg~d z6Du~jc}fZRMkJWDkrV3~e*g05ifn$=eYCj935OOKvI0AEIYG`6& z0x!9HSZ2gg4|_ngkjoC9@{#DJ;EN%R)QG-1Y~gFe0mLP^o{Sx0zhYwK$;b1_e- zvt9S+;g}l5x{P77V4yf&u(0&9$kWAOwDhM6(HDK#o2`X+T8!jk&>9;X_rd-5>HVW@ z=jt$+5oa@Y0nuW&7g-!--q&ZQhTIBREycREG(`tF@%jXk{V4)(NuLlAJrCpdY^3&g z+8BIAT4mVs&@fKl7V$hIlq^tXDlhT(|HE){r7H$RNiv@jsry}AW*@{dK700>w900N z*D#m4q`mEXVo0-_}(Buotx6BFSj%2|>a%wEWw^MGjIo6~rX;l92;mfn$(x1=CS zM8UjzpG0#UZRVvDORlY~Bm}IkuVb|YKB8nQ`q1_DIlL1*G&-%fmsc-5B{em`Fz68_ zf%?T>{XQmbN7xhEFnJ{Fe^4T9V|FfdTW!O9A) zshKP)v6-nd6}PXM7Bnk@LIalBF|n|?UcBhVyt%p2)79<8TwYr87q|C+fc8yxwJ({k z4-=e4+}!kGj*N_ihJ^HD&Qw^?Qru(cZP(J)1`#t}R8mvJ&&t|YGzjhk?pDMhDylz; zHkl(wQa(=~D@#N|Vn)2SvqOQ0_f9<_I$Bv%(_EdDn3#?N|HEoGQ8{vgg@t9h(#nij zT}6dvXY3s0gthC(1O)yv3t%B7;}GQM=Ld&}^I$nTIT4=yGBq{D6e`xKwzUDvN}ZI5 z$b{EL6|6e0qMV!@xGe=06_sH%0;*G8P*Bi|DdfEMp7)V@))4yjHU$O6XFtC_Opq&% zkB)jVbt)}QuFgYTcc*#E?Y&j7Ad~=igzbr>E!Rn<(Po<{s_u?=L#II^8y2cg;vXJTUd`t@rs zrih40V?#qPCYYeRY={X`ai0gjv*6t6_Cz0iXJ-c@F5ZXf2wn~jy_kM}egS%pLqkKR zdb2dqUz+z53W|#4<>k4VPYwBTUQ}ni{8GK5Nuxrx-j(x3k&X+k@Yh zc5IDHjySFNr}n`M3JU|~hAQW$LH5;CQ87}V1cmP7M-%`*%HiO1rY0t1xeDav@XXB2 zlM|&A;A#2?=?D<>_BtrM>Wy zs!&I#W#Q#^Z6yvlZ&5{sQ%@Xg480Pa!eDlTxP?Xjfjj=+YYpXgb9QbH3llRb&Gn}+ z);}lB8N;Ur;6p>v25WC>RGMZ%x#N2R8UxPnUHxXu*Yotr}b&rO~xiFll#V_;0xyV^b8 zwveCCsHt%Sg%A|>F>`JOVUd5IG1Soee*y+eBI*A5$pmr%hxZZ^0WcW2K?Z+>=cMsB zSrd7AZ0c;ecs3gR7bO3D@J)UDf3PA7+5PSR5#XF-_~$A=XF&ni*3%O)g!^}PcIJ+b zBpg2Cvz;yZ>&3v#Y}H-+4}AT7c(Pl)z@}5hA}ZPxe9CRw@Svy6va&9i{^8#j`{*lD z^nZn{|AmHSjmKlZbpaS?=#Lj@m{kAo;2mt-fJzJ2tgvuH@aY?h4jwtbe;ya5_kT9Y z{{_SECab8(dV3tC7+Ww|ee-|*b5XYyI2=wvNeSkAs@3ClI6oaNt#S7^M*pY`@gUip zVY;e+@BWkzd;h<`{m!;}56QcYy{^ME;{QEM-ykdHzCVSgH*giOa409}HbGhExdgh>o zNOtU#v02OK2MBLr7FHJ0+#Ep(Zfz(eUsk3^)KNgNbHM6k@D@jygN^g_XLPw>qdrq? z1Gh;Z^}v7>~z6Y5@^{Dlmh>1(YS-v3k_-wXZEin2p|k0 z4i(kA!x=aV$~eNpu#y!&lZBjYGbw*m_5`jzNn2+|rs7W>SC?+W=e>AXi6@_;1ygV9 z4P3nIt~$P!VM3%M`|%%mIG0wI+IDIUzi@~zzNwYXaB_7{;=W=up8vGW-@WpJ{dJqq z#n`!j)SqJeGFd@1ck2u3ghkBqhJg?tAOEii3Ice>2Tj3h`{5k)Zt|&iA*wqWk4a7* zitF9O%n3n$TCNd6Q<8mVRQjXeEbNtc-Rb81uZwn{Wm6SJAAIT+by>llvW@|Ly~`FD zjO+dztCvQn#u4Y^xGmA`9YGRB*t9?pxsImF?^vFD`n>>&) zE|6}^;e1IsX&{Y}21fT-Q5oL#)ksmkrI>iwLeaQ6IYCOQ#>Lnb>**i-lT*S&tX~I& zHgE!ahWM;T$sk{<^K@QocAieQN>-N_HEfMZ$URmWwz^TxtFJ~@L)4D1t_^zLf7PZ< z_sZNp@0pL0=@VPkRD?Chcq!+PR&zNz9EGa#kTGM02H6zo+tqwvJqu#3XV;5lW-R z)G091@%9|oS-@=xPS4EuLmqCb^n}ZED`Cxsm-=;nK_YUf8$3M*HPr8>`Pv~GipcQn zw9Uy@N;($pDI+L#n=e|Yiy)ib+5H3M0vG;Q9dtv%${(E?($BJaGr#z&GEqL1ff*+3 z$e4tzeOo`Cw#U{UIjc(%guBBU{Uv}$H?-0D@=C!_pr`b?3DIT1b6o2O9rmlSYT0FH zO3X}!7#$crJxqo32)DMPt}L)%bToD2Vq=uhODm%4>IFp(!g32@3lwI~*|bVai!z6c z8np9e2fBqkSKFoRnz)^J-sJyzKIBjVP*6>)$u=+jvm8ouL^mN?p-80r#z%d1;uppV>D3)a z=LXf~P?ImK9n8F=Nv`Jl?x=0S)m(@M5$v5*O2G>4xxHqkr~Do+t-qUSABxzUp`c6L z3c8$C#IBB!A9`t;R2pp;H`}b7kGrB{VQdWLQ;Ole%o_79x9p6R%MH!yDK-tXJEF)N2yDmv5msI*QYzLhj8@5k3Olh zyIXBt7={a}w|>c1=?cGCF*9S|8sv2vU%943-8gy3M-rstYUa4ILezM9$TaeCY51sG zOv^_5XnvdBX?rp*g8|aH*Br@iP@SkGIN1r!3#SqVu{rpsLENg*V-HyESm45g3y0zu$Sk~KmRL3+vW)>#Kb-uJ`{!Zc8u8uC&foF3jhSZg16*cx;`9+xp zi&C7}BUNfqspTnnD*9T`Hw7%L?~U+Ve7APYky{n^W^59X6tiYi)yd&f_MdOP3^8EY zMMgL5^-P6C{&;d&Q(&HxU$`XLFUd?ivKeRBdsbbBu;>ttTOKZVKngo;g(nUqs=>>2 zWwazr74_XsD&^^%UF}a!9pb-OB}PHeIY}oGnO_m`rK4`kh~=GKYhm5gS-lj}K#Pls zqnAzkdmwHK$WHUICcygZjobkom86$SEJQ|GRa5$eCp6kbQ$?RQsgjP`@BuM)>HN7TX#~m*yY+un-vbmnwE~%ZNpX;STv6z zOoas))MqZd?N)@r2_$q8UiQOWIjcl67KvWL$>p@o=n@jKnMDm>@o0#Hg=bTqtj+To z+gY{XuGDzYiqcUh;z5+(U;Na6ER|??f9svu@p#YMN2v{(S?W64nzQl^Ht)37Y#;DP zykA{IRv0gQGF~4pKkjA*w`3N%vq;rQOFIB}?%I*?G z9rll0c%Ry`DLI9D8LzeeDP|Xv$sZx)DUf6^{z59#KoaMaiLP(JdxAFUi69q}!AmHQ%Z74iXu z3%aDdn z>dI>B%0%hkJUE%o2GX0(4xN^1anowv$zP5B;K^gtrS-pXtL0e<%|t&uK10VjVP!0o zmhbEA?8n8$`O|;&$C!rQvl0q3Gg@4%L)fWi#B!z`oq~t6{qU~*NT{r04|of@A>e%J zO{Y6w=oPBZqan~(DYsO4PEpX zr+S9=o6g`uFh!yaS{#(`yd9BL>8j2ycyXO&yrv#vJ520LGM zjDp`K&R5Qkgm`Crwcj-%bT9WEL&#UzXK;O|I1^*LoFg>Z)=XaL=U&S-PHe6pN_>Xy z-~lhRu)dCzp;HqCyS>|I`*kZihPfJito&rkeU(UU->XiW#YjJa|7#{g<9x|k-oj*e_^7WgYbq6zS>a8ICzpK5>OKflu9LI3ty4pyG{P&0F;cbgwWZ3* z@n5&x3MQ+>bkFkeAq^sP4Z^xsA}8vySFQ02tL(CLF^@LJ1SS{qGRw9uY73{_15Aiw zVJzBA#G&mtRGq#Wm^oW9YUJTEK_z%d(K*48*4|5I&iLXsGVG!QLJiy2<|?-*=}zcp z=bIDdS+8koNr@g)#%HUJd6iijM>U_o!s}9 z<2lIcM~xHMMbv!F=8NlAZ$})bD;_2B9SgXU&^NJSn)F391~qF}2Sip^mnZUUCi;j7 z;X_V5#AUQl6^8m1&rI#I$1r0d8JQMICR6izw5$qezkj5w=dm}|^JIl)`9Zq^NpQ1= zw#JcVG3oDUs;hL5&UO-G(n>2TI5u$j9>2{SFmM2(?7Y?k%3Q4Gd$JNmar+5y5{YhVTNP#6py2Ay!Yi&6d{~9`U zTjGo>HqqWM*>ut<5YvGT%IM(csfJx^bJ+(Ad5El95+#l#g zOaUq^#O^hMYRs1MN=sWiB|e*(3+GfftLG2qgsO7LXO(O@O4?Pi`UY{we(AqaE!*#W z;zI}jE;j$6Gu;cm%WYaQlJ)!olT+Q7r+*f)u!6UB(f-P-*EH=TeQg8V87?E)aL5CS9cxFd z5L>!AHds+vb!DxSNCY0_t!nZh3bP!R99Ck1q>YEXq|1Cp49SHauN3a?`RqpCMJPe& zYdxU~Z^yG0k~P*La_mDH3;F?rWYJ%Kcn-Ig^2#z#sE((@Frc3i7qFq37U%wxLo+sg z<~rX33ENkPhmIQj5m^jdwL{|5m5*#-V>p%TW)`^M{ixXeV=yuDcM@D-FB6rEA)S2+H7V3D&pkJb`5PcK_|2lm_Z5`Mt9MypMS|Xk(-*Cyzr5=WLK|b9 zgktb$Vq`k7cafKw8jT`zVK!w^UGwfHTQQCGS8g=`QYNQGWHR9Ltz%?8M7we?XCaA+ zg2$5(#vX}*{?dP?;`5TqGIg=yBU?6wX(%nVz^;qrBWi!c4bZ11M!f#cSYQ_!G5@p} zlq+tsT`&mxNrLU|(@c{K>ylnCsNXL{b9DePfR1G`Be)T4OX=Ynj*W4WZ-oq%JU;y3 z=<3|f${^d*-IyBvnBezgXWCDAOYiJvs+^+&0!F8YhgtUVlB3{}umAMN-kfMT$s|qM z8)?h}fY?&9szB14nrZFwu!SE2(m6JZ96H9q>CUj|>`8MbfYWyDeB}+|+UmAi?nJdQ zx9Fqhm1J|N;`hB8SJc%pO%%elW1_y!S2^#FfEoF|`4_kH5v}k?!Mx*vs;Q#STP8zu zkI9evdbBfco;9_#{OZiAMy1mEbzDimVaDsGy#^{!)iGzclfGvQ{Vf5j>Ha!7|=fkHsj33)#e zwEi0LXN`3g^!^E}ppR$i*u6OJL`15OPWKnhe!g8cO^)-rS!}puS1=;grmHVcfK)>A zN4B@?R5HR+yecK-&`Zms(awl#)jlR`- zw(Ju!-`_JevN7pWvg4F^OQ?{nM`$v8{5f|su=hj0$`5%yGia!aiRd8cu+gbYOG_X6 z{Td&y0-PM({_p+$X5)?sx-?RNTW>Q&bxtf{Z1^B2?03kP4P`9cQJXjYw6kvP zujc`d2&b6MyM_Irwk|4S2nj*QvUe$AW-F|D8lPaG31^=jWTcT-mwMPS-ydj@R^E_V z0EtQ%B*#X)L4|yI&63Fwl4-uY6^W@9uARlzmsmp4;jqT|P>9@0)cw`SNl4_Hvn`dr zm7JIGocS-YKTuHkg(lLdd^Hoy?znZ_j-mPSk-pX0SaLaa>mOoAwPu0xo@W+GnSCy> zZ(k%v=!Gd))@`PZ=RlBrYOFsHD`%-cKiJeDW+hsqL^RCtK`%I1xL;0A>8b1Is}y2T zJ6Oq)*Yb#u7?u834x;t&`?SxSXAzQ-D1`MY;ad!vbM{+85ld+*AL78FTuM(3FC;T{ zYs;8>&$JT7?%wGm0fFg5v6KBV_dccO_*jiHmU{1j#nKiux4OCbhId+LlZib5A6juw z*XZOw5x9JizR}ImI@8kEJq*b&$X92u%X$A-GW{`J7BU(}0YR6L25U{T^+0X&01HdbCtxV>NjDtgbMf&FkZqUx}iNxM_6*; zjZV!iQHh0RM4FUgG4dLl{pZA6TN^E{8~~3PEwn@gE4lOxGk(;t@HHnO1qKI_(+QTA zR({44#>d60sR}OK7-3MWjdiR1x}s%MJTof&GI}5;5TUy8v9lDmML!%;$B3P=f-93}NgQ7uhf zNkH;dxLtM46H2tvWn14a!$wbbT1Ugq_0u`sd*3=(754A82v*~6&*#q_FxtI;zm5;7 z5Y*%9XDca>t+pX@TQ>b#+Bz}QsP>(Ty0v>4h$dci5}MK<0iz%${eJzEaZCEn?#c)H z$nPWDNMSEQ`sOH@%S%|lSJQXL*9r1W%2pM%mlAnwuTd{O$@>k8I?Bg5zrQ(n85rBHL8_=eOWpAs z!*^49zLX2KNYaTP7{?xsKFA@*auP3nRTaq*RiwUNL18QyI|2xpD6~sYr1Q>BEi-49 zSwrJVV{OaBlNZHkUG};v)2V9DQg(JzQg>#m9ubTisH#*Q1=*lxf5Tf^U3_6U`p&1; zf1ekrob!eNLP5EcD{@_N=#Ug2Ab)kn{MMM@Ys_7*r{hZ1Ko-Mcd-2kj3b*kW9#Krm zHtaEAVZA;?D&m6__1o9QzARn%D&Lhr1x#siM<3FB^=q}+wMRX3ERQE{`jykEq~EWX z=$lglnZ7VKAc|$2QwqdT{We}!wg-cnptMHqd-`Fc zo{Q_$m!puaqFFC?+<41?;a_{yYbUbLm!785Rv`|nOP>u+^!#e-C-V}XO`{ap7*QkF ziWBl!JE}MEy56%I@otAf{d?42B|Qo`n{MezcFjw-zOhbx=Gs(lQZ zu6m?@Ha{z2;ML=6X<~7>h)4`(++^IG3eJBrqK`Bm|2D)fPx{N%W8w$Tp-lCqG+;ZJ zLxVIg%L=q>EjoNEwZ%tHzPwWMN9PSQmyEU%Zw6qg0apVAcPWb}^K|a4atbY_}n9tW6&CQGz z&@mnzBy!rXNEvv;#?6;whUuD)(t--PS_XM66Puf>m8$@PJ6`qP9vxo}FoJ|#P z!}!Dw9b7XQndqB;G?ay;hZuQa2Il5PP`Rx1=1lq%29&Mcm~8XiUgfDWf!x&F&%DAP zk@*7v^(R3oKx32po5M~5pWcWbStnmm6o_DEV>DAZ4>N!HoTcaavm91~=F7+j9e)cl zLEG!0`ZgrQMqwG~7L+^4O}K`>FS77+=5R>C7Hr&Z-g9F#g`|BT?FF3>jAKuoiE0Yq za8>PV?Ld|e`}68Aw9n4e9Ta^sVsip!yB)S_7O~$hJj04uwj3o}WW#6|q1B8~E3hOy zw?k)&mA&93qzQBf`7GkB>R3!0-<4U-xNM{~RXR9Z?)?SHe*fltX{tqSR^wb9J8|Rj zCLd`1-Y}?bYPq*ZzQ%q$MR(HE20PtwTE`346!z>+uncfGZKsrtEvTtTQCgi1CgHTv zLG?cUMV@ARPK>=h^_&K@96&0ge#S*F#znu-)}?R9!To+CX!fcBHTbnoHhK z)frfhCY2p^mmSGM&|5feJe)93j<0-rY|G#J2mRr7db@4sb>VDenX8hcC|z64Tu>Ri z+N$g~?{+Al+&E$hfLl56(P_q2R##WMC0IATF+O@!TU)!}b&Ny3(t;~iQBh&ty?Lh( zrWa=xXxBXXls{aC3H`l09VS`mc^xwrq@=gv>s(QNbJgPRckM*~x#^5k9L>D)icj>S zprYZcNi6{4rP3?}!HNbA?E`PVGZe^Of3T;t`+B$XP*KAeARf-a^8Ag*vQA$17Vhz9 zhNpaL8LF%o)(Q}ZIxTlaeQKomE9K2^wfo0Ck_Ycqx@OpwI3KaSBh7W4=zJ{HJmYzB z0$OHR+q|s8K(2-0;6R~q{kZs*GZjxZdoya_6JM%Gs8t+=n5*+ZMvNJ|L|(QT=-X*$ z{2!4|S`BY_GK(Hq+Z2iUc%J>7*F#=xUjRm{%ObIWtTHdz@wMB=cTqihQLfh2+lzy! zpQmyp(2K>15t8D3o}@p0(~F~R$oMVWF6M3RXzeI>2G%KASh7I~jEU)f&XoS1e!fQ) zkMu?$ZnMk=3~HlO;#SS=W@^1J0I&opwX-2U6p``4t8W3ruerkLZpOn#25_H_$ zXU@B%Oq((8OS5*f38>0BUXmYk@5n)K{K_a9;{HSzu0t2wSJ8RN$7?+4C!6!1GA#Vw zY2IrYd@P0IQc~`2GBn(qf&dM^n2@(&met0nv;hQI`EsOJtUdKp76eF{VwGBLH3#J?P;6Q&|Y}Yr7ur+ z_Uuf{>3E_Z;wu%+uMY(IuaA1)-bg;$bKD1Qenw)Q3Ndy|*wbm>*59HJ{XTv0H#Ns^ zc|&+QjWy6D?94*#pUW`PfGhDOuf7NESl?^Ea`R!_DnDd|u!RE-tMg8N)3)pAT8!%V zC(tZ`{sNHvOJgvchqLok7}Ufsg3GfSH$96!nXpT?-MDXjvk;bu{!$VJNM~oYNvQG6 zdIzo;;8KRq*Uok*l$FE;S_Z)wELLO$b!D+chg)rdwV3NxY(aVZ8O*IA8$I8;Af;Ba-l!=tud_+ST= zFn?6v<{iCT8+E$aBE*FP5QO@C5hCs>8P7u%o8X}ELn2}kTT3&B`jE$CH$`ri9nb$s`kneU&ZSH3>J#J3?VOc>^}lMYqaE3P?=4N#(IcFPg>*gK$r;M)NJ2ogVHQVw+r`pIitsBpa>Y+g{~$ zJmMmQ_D^>YcSGo6Wa+{vMx!2pWcu-MlRnPk$~!)9_?N(rYAedwV&{VgEdV8+RG6g~ z(|^qUQOhno^D{;^=j$gWC!)CX9g3lH$JY;-@hunVD!h{iC8_Q_CR9QH*#?l{dDKM@ zT!+Aen5?M;68W!Rzoa6cKKJijJ)%qd*9iAiJekkVHqL`A#x<&E^#Ph_tF|l%!n@bO z!pYz@?_G0w_u|I;zwbNP$-?;i!zgR&MVc9J7$n2V#)|do?;K$b;?~yI%|18i)WFht zc6O#9AVU2fIB9|J(AwxZ3i3~8^&+=Xg};B^?c-_>p!W#>_~Q@Z&kcV1Fm`5UTK^9e z6W<(mg%l}|@ZW1gMc&M<9&`+i5A>V{G|KL$)7>gBcDZ^MVTa}>Ka$@*ObV#p%( zLSSE?Y`vRGb1<+T9v+XCu>YBlNa!y=G(I^I(vp8p{2#Z2{Fk@xzjvB|i~jiWV-onI z!FR3DwkhXBNBJ!lu*wfr%p2Qu5)|pMkE|1@bYmgE8v|VE^EB>T`~eIW&vSb@7?Qr? zoO2lXpQ)(Q=jvVgO4HhRvJ&Iscs)+cA=1FZ>E|c@SO??{bc&9fy6fMy%zWt7zwU|J zHC#}%%*thh7GJk9`WhGY zvU4gdMHtc<)il>4{s5JT03v;~+sh}Wt)&bFBIC`L-vs3jS^0=VEJOg=LIl`BYy~Ax zm)7hdaq~KvmML4awUnk0tUR>zhc4|v*9Bs}ys~Nw?!@LPszlC&ksaV38#s>z=+WwP zxj}M4Ns3sm0d>0RX7l{fuC}Y?M4e7Y74Zv;jk5Rx z^V^Z~=9x0N3{`x{ypM_HcbEp$5NMs&^X@P6CVH}m-OJOCPaR-9@<7OO-*|8Pij(^N z%5TwEKJ%vcC_3;V5)1MkcAM2>vTCn24?ymp*3{uSQBhD;Wz^TYkkB_Q)qIx3W6}%M zH8SNcF-3iS{a3F($>vxZiG+6ni>EMP3-!K)+yGn%=^VT#Vvx-FiSx568QhK58=sv_GR&C~tqMIGm_y!KcY|i~RS>vSG50i2) z&0J_rA?Q$^dQBEr_;?q zB782`1X`?&%_OqQDT$Can#7a6IrX2^=1hc7fX-L_3_|gxkkT!2r%D?qu08z z?wyj*lRc=~6Rp`euFMSzdou2OT{qBXq-^273IKUt?)s=cSI62KPztdhx&UeyEarE5 zHg3*WH4~=t-z~KBCbmxl>)@Yn8XITzE*o0Hle+-;15UPWOjA;AnkKsqDL3=)>V?Ky zaVAs1tuPIuED9w(-B;)XVm7vpiIZq$Kj5yyAwqWrKX@%M@AEXK0mkWe{wh2*+-tc? z?3_ZF5&OG^$I_2qF&)A=Bjn>ZM;&((+R@kHkSE9S36k16T5QPQrzlhdfhm04gA;RO z!2?3?*H(aG!h5U6P@y4C%g(pz_$MbYNw{;RCyO{+XKe%TPrC1d?-slgxI4KVk1(h) z7&l|2#7wX+&el`Yi)PI|I!EnWe6(&jsh`m|n_rPrnXD0xYIF6J30z9}&RHCej0FY(JYZd&=sOf@ya`7-Z;VH!Ou@;i{te_g%>s6@Dhd zY&UY&Xi9bIR;TcUOZA!2xb+n)tYmU$WTyeXy3LzR)FkMQczfeH9^=YU5@_Ih%7XglD&eWjXTuxNE{dt5xwJe6HYd-}7~Iu_D} zOi9!k^qt-DXjc*yvz-K%sfjJ8Ja&C{X4o@-x0+gcvg6UbLf$f#__)O5xizsSH`o3L zI(Hiq!++ZA0<{wO_WY@UQL9kSGj8>aN~CcBk(%Rz zPP-YgKQ@b}&~pExtXki7&$!*}nVDXn&h_PI7pR}*jdbD2p_q9^-A)4pq>^NVm6dnr z95F(Dn>c|~)%%U4L{=;UE^8sP-38Q{Sb@wfkj5&Imv5y`h zpIKH4va>7EL~Ckl^7HeL@;vH($MG#TcJtH&@XE?AE-o(;ix$m>woiF@c*e)aad}e7 ze{(9o(Y>0Bn?v{_ANnostZsmr-^P53Jo$NL^*A`;PhUIWjk%rZF4#h3szfXUGa8^~ z5J+0gobH|CUi*|^KeuomF7t~#uo~RPY1bm42@FQZ^~u!f0Bwhp;LWEjvr7j6#EU7X z51i@pOg5c7V&c9`S78FBq_XNQjBi#}Mq}&;W5|(%>){7yI%mZG_Tn0NTy*6^o*a`{ z!N#UD{>Lmm^NAmn(SUb{HoHvraDLbs6exTMs9A`$oi3rXr!q1<4$k(f<3DR4Ieue)OzCp)(9(W<$(cd#xOq2Jnv)+ZZ8eQDdB@fRL~itiWi%Jndv zQ;PP**VpnNMd|>O{aXb~W|d@C4nudxNMB!DX>nDp zfY5J=?!Tt_@bD1Ua1`?E-Nt<5^@M>hYxL;tLjn?#YPZ9s;b9fXm*!?!H{Zx54LOD}~iMW6QWf9O)HlB}&iGhp;L@OXqDxSFr`Cfd*i@*bWf6v5ln*nVk znA&8s{e7WH$C4T>@mo=QACMuE)sw%*oo;SX6%rO~)U#@pZ&u$(4Ji7aS=^4B(^g-Z zoOU2WaFsc$!&(jkad&u(m!u(`ZC0fN4F|*v1Zj;>ub!mqcJU;p!XFeId#mJ!UEB)I z+$O#7+K~j0iCAu({<_6uf&HqoAEhN8U68+ayxLSNQsw}EVorYINDc4$w<(cUX?3} z9ji8jCKBiv(J=<^t&DG4j$Hi^UYZdK@?8nn^A5Mr`IbKSnYsZ%3ID_Mo3NdRE=OXL z__z_mu{#4O`|Rux8L+!H%A`vHCrg6=Hv?BHUH*0V+fu2#iT#V?gT)w< z$8WhV`W~zCY*AJ1xj5HVC%=;ZH~<7|a9&jJ#yDKtk@A0 z?P;aYH{R&f^aD~)sGLE+h3GzBMyCc+KPBIBDzMCr>^pW`4Q$u?+qUH9n5>4hW%8LS z0!N`^-^rZtIM~jC^UzOqy7ReoIKaPP57xyBusKtMoq}MOj_^0If2+4wJ^(UQ>u7fC zE%T$GU<;_jLheWX2)0UXJvFtlsP0Pb-34Fd-A>88yA4P2$;tJ`9gj3LCQPkFUOEnL zpECPij5!tDr(W^UUHT@l{}`eVjDq_aBMAOF`t*%yAQ@iprfO&&b$nVzFm;1=-ao3d z3Q99mWu)aY7HAVYCJXrzK5IENYq1(T226EaPdn{(cc7{ftLW4q&q^s8tdQ^kgYnV> zg15ufH|{^|A{D`TV8G|3r2X(*@V32g#YaT&kTHy`(zGvkA(e6m@H}wH-masvrb?6h zx&c^;DW7Qq%GysxhxlTaY;(p>D;d;yz(rhXAM@=CzkFM{PWhT9*C0}Hj}DhuCD^TN zro2)3>4}(->%s#iEs3$uO3nFZ63>G*xsf?tmf>+GT8;B>CbS6%CAzW+!ioV?h5S|p z^_w2&JpH&_1J(eWL303@!LG$)3E0C6(RWj*xO`^7{^9?vB7_N*`dQ~dy~g?(ufsO@ zY_DEJ(f1$U@QloiZ%%`bJzOGw+trgn_sYkTZD-}_{44Ap*Bvcix>Ox$Gw%=O&jEgk zRC(8qs!~>3cINaP1FCMu(#6VF%;xdB5 zZFE2LUoSvTf$7|Ukb)dtxX)H%#*JJz)2*H zI5f>Gv#@-#EETDF*C7AZpMP+4)M;?DFHd>;RMx?vOeLQ%_%$Wi%j{h00adSm-}M#9 z?@#1ZR8)vrG)qkT$b6B46aHLG%*;J~eXBx{tjtVecKrsAQ%kuX)5(u`zmdxwHlCNi z9Bg#Zpf_HxN_<&qy^CLWP>H%n4+td)dS134o$6e)<*W}RaXY<)0h>ZE(mcQd+tYPD z6sUgjBy6h-vEjyOiYbwG_~{MH^#f3<6JOw1IlhV0^N(VSwh`4kC@XEKsX!!CxgN+7 zCoWYV=w3`E)iJH0FOs*H?EtuX_Eb<$BReK*uAi!jyHAJ%n=9Kl5|n=oYT$sLm_u_s z4h8(BZ0UWGd8-Rfa_B z+R@Y3$*$`Wv76VoIE{$Z%l^vAzzFI4<`t0(NjzXP#$8L^&cMW|1NN&RhSMRA78Nlb$Q*-cs8kV7m_{7RH6^3S|=3nik^0hEpBby$g0r35trQ zAxBC;TyUB7AEZ}Vj#&<60xtt2VaPt%eg$^z4O>>Kr>|hsjadELI;5cUNnj0<0 znAl;{t{tl~&%VmU&@sk#vo~F2mX&=qnbNCw0l(?M7jbdu91aB=aI!4wfX)cq@7hqG zh3FB){Sa^rN=#30pP2I^87CZSCQF%=PQLK6f2vWmTD%52S|nl!)-mju9}aZWBk19OWrm9 z`ZGrMJpo3Ceg5WDA)hVMWZmH51G$mW2CbNjQE`)4qqg(rrvl!W7gL4g)i(krJA)w5 zv^6`YD=?Qv`1JSKoN+XMC|mh6K7FfBTvq2&_}P~#Vp)}m$8_W;@>BbY4cc$2*Lf3m z$m0Wmy%8||z2cEH@48PjG=zk)X?1uK=+R#!{H~ozIBA4*F2P;F(kb{+MCY%n&S;q$ z+HJ3^sBZ`qjyoeoq7rc@6speZUap-%*EAar(pDp7H|g&=tBtE$ zparuCad47BQ5bG7Jn1@Jle3e0fN*N!aJf-`|2d=|G{Otz8E; zt%Ke5`D2!WhJXyLblMySq3L_M8qXBF1%4$1tm$eC05wWHyf@n~G163-QhBV@Ei5*Z zc&tV(D)PXhfIqyh+dg+QwPg-W z+yd(8TWL$$)00+^c*PJ~LK^fz)Hij$9KLa`bI0fab=RdM`F)=UF7s=cb)dpT*$Fto zwm-mryV~j6VlSh+B4)>^s&>Z7u|ZENmLL|>Y_A^@p>wut3h4y>Rd158APvi8TbJS3 zSH9SSB#iQf)#FE8bYUqMz3)=>n=M;-Azx;vw>pTZ{4cIKXy~4^_PAjhu%GX4i!SpM z5G5ZVtf+{`suatrgSKa2@|D%Gj{d5us`~A5t+>?) zOiguSP-|o^Pgq~|X<|qyFH2#2dAzanIs2n!P{);0_wZ&s!Jp9Qq8)rU^BF-mKot2$ zfqfr4LJmm_gk01DHiyH`%?O|0z=Jr!uD7wY@pG_3l_`H67i$3QD`KwZz3Io2>*k;R ziYzo$qT~YpM?jq(uxF23is_shdpS&J*`mLsm1vv20M}(jwhm(%s$N-F+wLocBKW!~H*e*lgIFU#zv}oMVnL zrfiBiLfuUC7=}v3wv=Gg+s*B5u7mG|6YSmGmEByG-P<{ZU>fvcCr^3Ky6HAI*?TT{B|=m7QBYp05es734=bS|36lSe)c!Mk$zwDp{AoSz zLGW5gW21n8^YOZoHg;Is$B`_pL z2NnxVYDKNx-4=(dec<*0LV4H~;5xE4Re1siEb2uc_r6@-=ZN8RzdQwGHjd8)mmHgv z^p=R{bg=B^MEeM#h9ZBF7546mG=5)>@Kw=wvx>BPK7XFlTCxzm zL86?sd2`o|ivd@NhG$3f=pC~|WQpVkYc%Oir3o22vvzf&5oOJL_mNVe9GU50H@)YN zDZE4F2gFfJ0~owD$6weNlvd;8R9@_kO`MRDdov(j>RxdN6OB#{bm5c#RF*0FbTG~6 z5>#rYV04vatVDsz5nw>z@EUo~NrOMwlm(9>c_AWa#%`}!Q=iT|rm*3X_LI zCbCF^J2XodX3sK1kjd|f@=H7Ox%DMf2FDWi)o#?0YzbT*_cvp#9bF8182xPIb9*#w z|3mGbW}3T9V*Pd8#0#b!+0Q8N6BYUjM-fg*mX@D(tJcu24b@5#-Eh15Ht3pbuxVz< zrKhAlICF}?T?LqA0wY$AW(b))Rjk^5Mc3J>mMPmICprdz5^lz@o|U-Ke2OZOC5nu! zDd~l0$qN85=8#OUZp|f)5RM-bI-D)n#&K!ac4<@}eO5S5^i?u=VYm2sIIzLy0EN{X zVE)dUZ$Neye&Q^oFC`}Mz`oa3p}VL?K}o1LHEhjQ8U#V@HiZgf1B&svJm zR0iF)Vb?Dl931>o0hi-V@cLOz5Dk5?jKSx2UIZGaI39<17F{779UbHJC*R)MtaQ7b z?HlOnQ3$Pqbqu3g(Z~Ljw$ch-1b$8T#1B1vHqDW=8EeE3Bri>mCY$X{CGdtHG~}6I zNb|d#p^$$tCXeDV#tUfs?l1a>Xt86-mD1C2v)POuw+-j-WYs?rO<#lM#XeKfeN8vN z2i84*IkqIF$9}g;yMJU3r$v&Ek{*#u`Qwm^ua%N=jK_gqU7N=huI9GgzICAdtVgR{ z_#Fy}`>4=17UtFuDOsPBvo0-Va+;PByN1OJ%MRAdM`DW|MJ%!VlfKK5(Wsl)C3@a*;E9rDGEjfqUc(~Qio%{RG%I;cl zJY`NSd*VC4`WA!Ke;F~WMLcByR))yfSnBMb`<%T>r&MNt(CKbLFdyGS2K3hnG-W|a$R(?IvA~ zzFL<&UlZqiB*Z{fD4fhSca-@-MZG5AIOWnsbZ_$K2Yyd$V*^XoSAj>-4^Qgi{wt$oYam63k)3Cm#e;jcBN8qFA4CbN?dy z3T9q2)h@~uA=oohLqo;T2{s+hYHMv>9Y~j+s<0Dq<<17IZQsIzQdkxo58z`yUUW7d zdB1LjCbZq2gkOK$Ff`{*^gh*9sR5y8?55lR8)+k-F!GsmMG*0%&BPaZkC`%k7QE}x zidv_J@g_`~*@K8~0(BY6CRPcGFlLRyD?2r+panD}3v+|=lb*9fKV0F&fk+C7PT3nQK|0tR`}t zrrEM6mI^T~&t0=~nZdFA)dFG=*5d$%RcZum;bIeP_cKH61I=!7MVb)&h6l*xL@jZf z=D%M4nJSQt8})um9B~y2Da>i-e@F$EGiD*G$iD5)mvf9`PTh3p}p=six-OTg0} z|H^pJjql+4E3vXFfnkq_tMCjkNa05|(wkFN_#u!(|3Ro_7;g=vvu5 zzb=CQO8ImpbOEbCO3@(v$wF&KbW-V&dC9W-ycSDjGvw7gwAg=9N49%W^`AUmqxmkC z9{fG1qqY5|#m@W(?Y0?lDtX`=@rp`xx4S)MjV}F!>Z|+2?}V7A%n0_5gmlOuo%zKV zd9m>EmWHDh{3F8*dK@#bp%&kaT4+pWvZ+HtES>E2>bwte&E(QM3N}Y3M@a znb_~jk{c;V_o|}!mF@DU&V?05!V#z4)wM5SPcdVwRF*vu6|z6og)oZS6Y*v%{8i_` zXz&j#Jk9HhcrANtp7WY6tVg@pZXqYR`B%`L8j=O~Z9dm^it|*J5BVRO@tFLoS5}>X z(lGo{*^=)^jP`8^YUT(GzpRcWHVc!VaX+->3ercGMV1|`w3j;-7$AA?Pq&v0qC^Zl z9_rfn^E#$e*>3FI#Iy2qT-yt4>kDiCSQR&`-^SGZli*MBe*DK9SL1!?Pmy6Zcu^l> zEpVd6VY(Sn-LjHBZbE?i8aY1-jnl|4o>)Cfv*U;d^Ms`Gw?YQ-mbv@(L|^;<%oq`# zFHWs^JxiAKO2o@dq`fktaJ9mn(;4b+ZFy6aRYfg}?t3mJJ3|?%f|hukE@w@ICm6A( zE^Ar&Qxh-s-NZ`!NKrZqPx%)Y7kh_rO$Ptyf*)OlSo#1!iD1e!ZT9rtyB4tE`PQW8 z{i9xIwASOs>j7q&M$TIbYkT|ay7Kp-yiWTmtTT5YN6XQww%brDH0VA&yiQ8TY4HbiqN_+cQMytGF>!&FHIJ6K~<=T#`_! z7oe1`UT(ZI+19Z7<`H6$Zq&4nO6k>X(fti9U&xcoG^CuBFoaqghT4YJY|rPrSlJ_S zNJYd<&Y@A$i5o;Pm<*?a=eGqe3nkW&HXXz>r!%L@9?kNl)@b^)OkhwZzV*ify{w{F z_Z!IsDfs!8dhKsVNfatC^6)m!ZEojl6UwPjnGfV{R zg=pwGJvuuLA}y!YSzW4(?^%WT?DqJ`t4gGWRht*a*QOYJh`iGN0-K@g_=FbeUK`CD z%WQW|b;>EqI&?l|KWH>TkZe0_H&XZBB*4)Beg?nGFq4WYHBciE$ zf|=1Xc%-#r*?ntAmBI1i9j#;K&KOf$|GLI%AnvqP3OsheLOJ!=(Kb?3?w@MPFRBO? z87sekLdw_1{L^YQi$ih)XXbV0KwLTY-nyjoo}w9Up-oJ4kUfWi>l^+%zPfl8Mbv$`Ht>vs0^+w3c+wRa5B&c9 z8#8tTtV=Bqr+J-$*5qUcdL$aA5#7&Ve;ZtFN)Q|4!OH*zkwsmns~O9!;#WqIejUU$Rxy}RDru& zW{pr2m`rhLe4y3ISJ~mep%F{%D>0TkF=&s0iaJiqWBy+tl+;Q+F@t1TzgQ(?p0{F> zQ=WOs4dMBQKVSWf^~u57OfGl1Hu89|-^`gBjst zA1)qBDTqbF6l4}w_BpT^xrHS^{Jt+aNo4ZC#u_(vOk~$;Z$Y9=9gpL_oN=i-q@zoj zCl;kulQS+9nZqBMXTEdW(9W-CuXe`rhO2>k`BUE3c$#RK^UG`oA6Rqy*U!r3d}zcn zhKtOVZWPr-4DI8U(kC1){~e?ZF4YERDrdI8Q~!SRk!*XHm`rqkYoe4=6#RBQ$-u`} zin*t&%k6Mg_VsIotNxaj^f!JOc)oE09@k@qM#$vFYHkb7=%}cu#pA97Ll3%(J#O5c zNgfE%fF0%Y=g+CBACTJ~EO!A$UJ2Ovt~z3S$mgD;qfY@I@sqVKl{}d963U_m`vML` zX_aaOXp8IF7s{4XmL)vGvL`pu2yhU^sXkR=9Gz)d5Y^kC8QEdMN4tHnD7mUWZ697( z%2bMkJ6Ln0$U=KAzTDPJDm`~`rP^i6wfjQwHA!R_8M5~efC&5X^U@k5c+o5%zRqqk>_4{P;$UL@OR;{%nv*G-$*q> zTp(ANT?F-1V*uUTwI(lRKP)xm{gSP`wVH?$lh1$5^FEWx3MPISh}ZYn<}x#j^3jcg zM78<(K+IsK;LlI#=Cs%nM19h!eOY7nKQQJ>y)2+d{sJxx06&KTT=r5IGZJ z?s|HPFVs3ctj19Ao2GrNo&(S?*y-I}PXibNpU8bjLwaN4JS>2dJ5xE;Z(>(_lh1g5 z)ka1azC1b9`s8U2y&{1(&3V4k9x5wd>*}cYbZzHo-r~q{*G5Aml{}WiDSt2fPX)T6 z+}djs4M*^N1kdEYVh$`J;NF2*r6CI*EiJ9>0b| z>Rf%Nfu)ev!=7W_!aRo8;x2L3JH=gyrTLA|UK6CR^Y83L(|x@pMR2U;XSJbikR#~G9TLc806#aJ)?5YrW0CUJ{a z2F3EZ&sFTDYUTQg-g+atOVC>OI}mZ16WAgj9!=(*jv(nKlE(KjEFQQM-q;8x+K$W> z9(L7DJ+OWtrG=yML$4}+?Prg zC4uA{$-;ZLWlED+JS&}M)}}*c=XL8!G+$3%F|ID&f4Cd<-OW=?^=6T~1mO*)3K8v# zhEIcZM?H<7RTi1=PxJ>u`2vr6Lg{*Wc~gg|Vp@mG|3 zSoQ7Xx3i7iXt8T8Bjy>ojF*T(1HssMXnOAxf;@=%W6n9k_!ZKJ13?y(GvlN~;J^|d z9&8Y3%(av~U+vdYJT@|8s#sgd{3En}Qlb2LSCIu=IE`+QxUGcRhrj1j6$cfIyZ-Ong zyA29gCtKUU)Sw|*GgIjR8gwuM(Od>A*&DlL0rQXy# zY8)u)d>#e0r7o(+O9y52D^d_1pbkN!Jd+anFWsfe#Ay~xzQI6Yp%^IbdgO&KvJ|H6+Lq+Ll|4RB!-yNDmsg>qDqBcLMT zZ6AQduk9Gy^Nq*b+xvr|A%*u_ioSQ8NZJ?;ES#LB$6GVuAL!}mD(p6Jg<8A17aej_L%x&BJ9iLJ(l+tOjH3WQH z`Q6T4_YCgrMiUHXDDn2qPW{a@3C<;6w0=dz5zwZCh4>`~aT#EZ2PfNB^Z$<6Z6~NP zVwmc`VigqPDx)8>Hxs|Vm!a@?D=V8|BzteCkM?JD*a^j}@D!R5pFFYWGxKXU7TwGB zr}IRc=EF>}NkeNn@Mt~a2aOgxucs!4bBQY!X4e2LRWv~lYdg&_jk!M{(jUk!35X2i zFMc|@<+e*j^@2~R&6peVJTIcIJK2%+tcVt@#~7$K4x7Fv%Hy6om7{M>lml>=ey#R= z*1BUuz85Npfo$)R9;FDO;TG;qSUU(f4})~8xr^UR_Gnbw-Te8&2e7ZaD@EM{zT7v_hyV_%8f;9jkM z3rL;{$?o9QAVzn7Gt*Z{vE3X+Ld}z_GjSben?^14K6!{qtF$n*RvAFyZ*tp>*B+2* zW@*3Jiqw8OQ}y&>cW+?3Aq{lk~6BTf5S?H#w3UL)<0i z__dc7r8Pcl1L+>&3$9X=U)nx!bGcql6Sy+s`Mv~L%zB~uu;T$H$qVywZ1TZBvdJ(5 z0N-OPE;e@6)epj2YFdPbXzB}W;VE!x4Hmfm&0{^!H4Q2cwPF)pyk;F1SbX5U{_W2n z8t)$pbeeEzy?_56Dgfd0z`HY#?gR&5C9Pmb0ZvZ(hrDy)lNC|z6XJKu0vyZNAWh|K z>DDZFnkic9f8C{hTy{G!i?Re_ryg zoydAQ+gePOH9E_sCekR3pCI`s`KPcIc4H$aQhfXFP9PTg!Cf%KS`9_AZUNep-eQcn z!aZ7J1+K%p837i;P0NcoIiq=gZF>FBi?XsAS}?cRk5{yC@79b3QHoMki>a3$^v(?^ zuTl5;X54>B?$cQqMOuSeY89OrReo*Fk4lIva7qR2T;YFB%1cuq1Fyo|({m(7p5rM`kmb@~HXYORzXC z-^GmlV<9#d4EcF|CaNcPac4nbcMo4HdH)M~fIo8!i`jbX^;JcQ2(l02X%xS7EA!8) zuGUAYDAq*uq3t&ND8~t>F-(2XmJzE80R0jI-D9Yo4U)MNYAfwBzO8Tt#YhpA5#xxL zx>z|_vl%)QEa6~3LQDEBvwtp$B!sN{^|}#YRrSIxva7z$xpl6~^qfqN9Y~<1d)nj5 zKQuIJz5|G#uGsKs_{u{&*w((Nf%SXRAJ|;ZC=Vq+(8lg}NT)7&gVp!T35@s}Jlzs# z$^hta?$^g2`3vWOq_5U=hAA7>KgE*00IoN%Bd}E6B#`PE{mJpwJKGss?;l?%(VHh2 zG1Sv!pvRB7dpvtk&OjAbPlhFIu(U|Z{{So&sIq`c*|7 zQt08Hm86s#eHiUeIy>lM*iMEqx&&ixjFg*Bqzx-YV7ak)mNhuM&K=P%vzmiZ4?aGS z#Vtd3>u~?x1B_Tc7klufKJ0*z0Mpej`gq3~!q4U9op3FX## zaOC@?ot@Bfy@BcUZ?w|dDZP@MVm|@1{4_8f9&^lw=bfG@|06-UL&WaiUq3VgfgY~< z#|?SCIyVNjfdP_^*AoSSX1Mf>&o0`t9nJ_4rO3RUDl0FGoh>d87UAS1;&BKhk(eBK zy0DV<-Tl&bA$09=G%q_(Zjo*jL`BtCXQZRADZ(R9i=YY_n2t_kD?C`YBM{7f-yJpeoVqIW&Pz*`~uH7E5u?ay_gVp}iJt z7h^(@(g1i7!`RtIUv(>meFv`o&7R@AgxiBBlGutsQ)()e!cPemYz_huIRdJowYzxlt$v3dvHQG2NK3` znA6pufLZB^<|rF8Gc(0MUb$0UY+-;HU~UNu)AmdaLN$ccvBLB*?dP6kX_7IxFfjv7 zI`)-~NczAnYnX|NDZ!o(;Zhm;(f@^@e=!=deAOaWx3!$XZee+g1uX~wfvLjsZHGst zvI!^#^ORd6pkc~yb2dECh#jY;eat>S;SV&8C^X;XxIorz9ZS=#xpkTXEPk=~DFZ~` z8GJm)hde8bO--KRZgIOF!-i|dbg!P?-B(omWa8Wt)CKJjXe9+2g+e{elS{AH(JzWv{$i?PrBYidA3L>|Om@9&pAlIlG>e*$ljtBP`7Wv`s9=ng2 z4^}}j?ocLh`4h z-m%2}>J`4ngSQkR!NHimx&RNHot+&bK`>l7IMc9rNDekGyFiVA#t*;qA=3y!LmrTV zq~o|1X&A`c#+sT8mOG=t7azKuuaQG2o%ZINU>H)CT5!$&DH8rquTMrS+2bkGD5=s7 z4OgnWI$3}CVkYEU1k#njI2|FN#f+7oaRsTXs~&F;(oy#MwJaTOj#AK3eQDC;9-k9XB{oB`na>?n43nOqFfjarTU^h1c`1HKw?H99umC`xD)K3`ovG|hyNLuA8|?lweL_YB9tVEk`y1 ze|IGrX{#R|7|MY5V&K0_m-1Hwmg1SsbIkJ78o{o+Cr3QS_+#)U9!Dpsef7A?GZEsq z9Q?fvT&oOi9$T3n4A^0Z=DN>4n3?<}rC$jme`D}IGEpWTPY+vjj6xF$7jrj#DuF*) z37s*Q_2Ddn$Fm2b$_jICQj>z|153-x6yDI5^j}&3oEt>;74Uda4)btveQlb*yu54* zc>2uy7l%2wQ8y8Fy2JA&laZWi(-9_5GG9zmAfZ`%|ER3I`1XW)wb)Sj6f}=8{A9T& z;by+6HhN-udKv~yF*ADHX~?6J{3Q{^qC1?WYMc0g`%$vT%{8>_69Z)2V>l5$Ki0t; zu2r8HPpn5K2ejfY^PEerOdS;Up=%PnxM8~H&mW^Wrrl@?NBpxxO*?n0j4o*-vzqLi zZtj~-lR9c&9q5YZI*hzCg-qmNUj*Ek)%{SO2vLy zhLg>N+^Yb-P_0zQ`<`~|yy6m^06^}xyB}~c72pI&+^i30%Nlg-s{*i7Pd3VsR;+@( zxudV%?=8&{pvXw6b29FPbWgPB2in8Sc2d6x+<&R;E!biokja&)FECq9t%+i|vMgh$ z>{!tj@>J1~$Z=IGxW>Vfl$D>=`&M7Cz|wtO0=Pz+M-I`VdUG|RYy4+REkuHrJpge$ zvg7H)X&J6Xxn~cYJeYoogkb-}`tqcZ8e?efNQl1&P%6B0qe2ycVuvbiI>-c=2kwo3 z73R>Bo%*HNDfL$Nr&KO;erK|3`9~v{3GH{708_$KeW&h8W}=Lmqq$`>{(>o*HBE*B zM)1MPhl=))PcAs9t6ZZ59u<5QCKDfA{qXzIlZpFS25xtSNt>8c-Zqvt= zM2Kc|=At*P2@Z-hPXDg;q45g62&r@<7G!AU8|mW^&B|A;;R${1&>hLpa+*2!!4~do z>Y)D&G!9%GoIrf`G(j&U?N7J3Mq^8ha;_FJ`Oky|X54_<#lznWcxcT($lrWtPEJl< zEAt8rM30+&4Jj`Sy~|e0yiTto5)BOr2|*NmPZ@e~vJ*pc0HGJq5=NR+htTqXfR6TX zdV$Gb;Z1sw&oPse>%suM!^i$fz(6V*mwsP-cQ&(*g#k`Pm7^b+W?l=9ss!q6$P}$T z2OH?<E_FwY_B*w;1RU*(-qniYCrYs zGEu8l8Y{ioj6p+q{p}E`A(ik1#h)@eW{`0ji<*NxBflM|#$8Hx@rO=jd>L9Dz(nA--#qnc`Qn6&%g9hh7u&vR zoQis;9#LdML=y93)ML}^9ZyVydW4#4#5lhQSY}pXbn|WRnV2u(9l@jjydEI6ci*OAwed`@ME`_lUJSZ_;sdx4X(*utSdjrZ8chtyAifBDx35$Zae1{T<) zK!5{6#IP9;i}@geGn0cg@K?q?iycUKuLpHURa8l4^8d|DT-oHnZhb9`XzjOy8P*0< z4BsNFs;Kma+|5_nTwVQWZoaj$va+@&iuLwreF$jl9fpN4(+j4nt?9f~Q5gkOitOy{ zwFQ_eCK=BgP4<=ijW0M{xXS(W^z&=+$Dt?Tb?mKnaq#@yHVRjK?Ez%Qr?eSR=VOuz zbTI#fAb$6Xx8bkPZ47x7(8+(Dr1u0jYkGTWyGm!BIjwlCMq5~JtmG=4#-HF?{;9Kl zDR7k!S-qeYU{At3RblOD%pOnUPZ!#{h+wf+O2VLn}3OCgqVpzN$o_zjf2Y(<(T;&<>AiI@bJC z3Ral41)M_mpX(%`atTBLDNQ?3k%)M!w=(_7`To#qHV{ zv9~KRV#81Q(%IQodogQ0IS}b4(xvvod@+i=y0bmoYo>AW&aKHJxS&{x-G4>J62rV z)nGuL=?DgSBZGPujHHE1=qBndop!O4wwj3jQTvw-T#1ZZ;_4?Wv}78wQgVavy*) z$(#Uhn;Qm+WFA>rv!RwDo|Yx+x$Pc0yWtlKfHQ|`Rt3C}&pRVfYM>!CMDnhrL9DNz zn7HVSX4iJq`_I)`QZGq5D@fc8tB>Mwa#T6O8Ymec!kCzua68+yW_{SBT`el$a;D@M z*gj)?_jjs~PV;EJ&*O|~ol&RN!;YUtqR-C9CcdgN7y@__QB4gCa5A%@*FDa9Qxn%q zD1%^cBdIZEFfMbYjB7lGB(Os_X2_o@wADTSkf+qFix$qwu*MG+ktCu3t;B2QpSQuR zkl^O2s}1aj*II=rw^0&-pDgz`FtQ&ZlB{mX>PgghjpeIesd4T!(J7jMpwRWNk6fzz z?&(-~jO=}%P;fYkKyTokiVY~Wq7W6M2%B-+<)XS)byi{utA}Qi8Ie}QL_^bI)Z=K^ zu9ABu#rFU5_<;@`b=rtJLV+5PQh#R}Xdsnb__G2U0b_m>jTSfJYNpRIN9-SAb+S<*{@{(QCWD*AIxL_K6mVi`*PHa zc!to{VGDsW`-}QZp>J#3Xr8N+6?>u4`jGLjJr_$oUyaU`D^Jqz@KryF8?`&ln%4QP zyjK#J-UnG>&xjIlS1A_u8{$?Kv@i}i@x8|$K2{{@X^IfrU=ivf$IG-@= zhXD>qHCMug41hDpoK{~`G+N`8&D!~hB^wu!l8DlFqI{&KFrSG+I(H*@a?Cz}hTZLr zDZF;q+D@#$^E5|TN;8jxC-*miQcw;UT;9FpX4D^DypI=zzBZLF71$2!KscxT{UufM zt%IStQ)^+4O!N-f*skKfX~|9b0+wIH_E5g?);hVKctB@t6`+96w)uJOuL&YMrXe3$ z69uwvmH}ZhNi%1Tax&aDrKo0=^@;PM*Yl0*BpQ?RD8VWZ!xtY6R{}50oAyvg%o~%G z0@nv~a65Id9y#a(WvB9ER}j$Yhnash(;J+>5mB=Ecd*zlSOd^oG>cJc{cCU<(68}Q z%6vwP0*P1ph{;XhM=~^4H3l$wfn3mL`MzyV~eK_R(Y&3BsXqY7PA8 z+vkDvBRth=|EoJ~h9yu-(mS;a4L9z9sshRG?@*@VP(xGSv=YPYf!IW;426w-k$S0) zNwu2dYQ#`APcadn%~-TJCuTy;WVxL_Dmf`RaMzdWzSTxcEdMip=CM8Tt;<=tV>a@7 zYB_7P>N>Xp2m(Ew-3|`cEF`aTf3Hf}PNqD%g8ql80qt94;E38EY%dfln~fJ;s5Ed7 z#SFoGXEwZ6#*3hY3cR@wmIov2`KvZ^cqBFKo+2RReUztT9+cwy8|Gw*02KfR&nPg}U1U||$ zwYqbYI5D|1PN!7QE{-NTO+)&)f?n~A`XHV|gRRnGB?2@pC(s#D1ZSbwXrfV?##&OR$Hr6g zaraQT9Y8O867GQptCCthEE(*Gpln3!r}<(LDS8Fj4;oje}rZ>&_AzkvRo9jOnP zKHYU<^XWeg?U;Lkk-K+tRQbd(chck1QYxDMZ7B8;u*-z>3WEJi7j_9|jO(S~fq=!9 zf>Wx9ygdIIOO9@c zuGm*grPMIIpTO$VD8t+$@z~49Yo(=V*5KRG(;dV1nE1Z=&_?O+;QAv0J9b81WAa7R`_Fs+^{zvkr^b6p zO;a$0lD>4-POZ7)C+AA8$TBWAZqK82=}JYIr}xY$cL?>Eh}6RKUa zWis&5^qS|7HuZv|P@c<$t0gOBfWr)&J)lK-mQ1Q$Wlhb;t_+ku*mH|I>ZOJ_sENOS zzdvu=iF_F1V>MLu?SKkX`R@?+QAnU79Z+{a6h>2^l1Db+?m8Jrhrlh#3$?95V zxUX#5nt@)}mB2QSDvbiW-1E!-b~!=0T=CB6S!r~+`7iI04B0U=Mql#8&ofnmTRsEt zj!1bf%th?sH13IdBtsc~Rbn=f(_jP*fY){UtkHmpnEx6?M?W?yRAtA_RW^OmbVA2! z31ix1ky(*FyC3~F1(EH!#93aDtD??zaqr%shI z7HM*(D9_pSh5R%U``5lyc0&gG-x&c%*rH-7`$FH0g%F!pK1E50xF?Aa2S5kj7%cn()NJ`emmupuxi42K&^5zQpmduY6Onc^>oN-2xhW(WRe2aExr=uxOsk6UG%n@>=apayOLvVf{)p z6znHjmRwI6pd#4jcidcCR-k_|YFp0pGXfFB4J5b83EW&C7GhtuTOsVg+bGPW!ehD5 z0>sgQ_7H5JPx~miH)qO%Aw?ElbWNsw?mr=Ks+1}iWAJp$ePW~=V!Kfj_Bh(JX8}7w zF@LAg2N%5oGOio^hvB=tYo})$|3waM$;c>ssk!PdloU4qc3qe#U=%%8#dGNCVxBD6 zag?~Up%1wqd=jF9kS)EU1Uz+vO(Sw;nv3tTM~lMk6+ZHlnnaSN9|~2_kn$ayKQ25D zP3qRDC|;P&05I$2A9-};{62u!T3^PB^ISOILG7|dqIblI#4w^x3p;4~1 z^V2|?PP^=%Y3O>?ve&s;1r4rix2+(VI#Rwrem*)c zOzb{Yb<3`{8{br;5^lMZPi{Y(io|m3ovCLKS_Q=EXnx8xx1)fP;@5Dl-ICHEG@+sS z1Z_@MTE)x(fOVK}%&V%mD$mI9eC>>5Dz!+S3Xke@b<>-RIzUUU|I`)WEy>Y}0bb&Q$4Dxy=%ExkySxHAiP6S^b)fjq@jqC5J9+2L$FFUCaH`8`1_ zMuOoi^+GZCMDHXTanvuZLLW=e(f7pbYT+X6aDliFRd zf6p%zj3px-(d@_~>~`fxb=GLADWg7ojSM|(?~yFCam#=8N|S|eY_yp<4~yw##wXNf z_zLCIITx3n{w-F6hqSg1aLUz#RHi#L4$xeQt-zL@PDT*$7Hzl)-ZPxtM+dqE%=uML(^zL0~2^Oa4i;N{f*iA0KU~zB1e7VGjBD41`n}K{A!5xt+ zXe3jy7(k)tIq2vZqs4|Kf>^0t@uy~pK`v$7T@M+~4z_s@7wA?_BV3YS_(6xOzT zV%_pkqjlxVi27w5{vCwV3S=Toj6>Sn-xSS}p{jLPpH4pgMF)d`{gZ&^r|_R!1d3Kg z#XqmO+`6H*xxS^V+pA|K=5{L|`ML)GjLfL{8 zTlj^UXcQcWk9|o#!I%;ScV`+wp9xRwf1iYP(0>9#H9U+Web`XGEL$>W*_`9BFXrC&PBz=x6UuDX!JXC3a_z+ zWs&E0r#*8Y0Gq-uEFnSQWAE>0?}8NI$ydDtcyk6Np(nCq3Apwpg@vXQCFV|;56D6( zt*id=+YPTB!uTRGE8H;Ec_d)kzf|+aifPKK+i6ga3=Q2^3_5sa{eYS}T{r;OEgY_( z##=3$1g_vU*+=M4It4G?daP63(b@8GZPtzV$=Mcq5%mjQxP8k0)q2w_Q1 zRMS$PK*iNYQ706ZzDO56M4aAa(}W_4Z@Jx%#W(_r?d!2IVuYEiE@t;SQwe>Kwk9*> zJ91wx-@LK4`q5?BlOW&$ZqKj?VUzG!&3-N|b#TJWCXzUF>xx~9>41~+ZW7I3nY&HF z?f6$K7!7%>`ps{Jg*9v4-Bx=Ojk;qql(GqUsaWM@D9vX)gtkoT#SMdWS!)lS_8$QM z4a5&OcB`G8)?OP!e?OC31}gDbO-}w9^-e8pwzpK?*W3fd-aXkPQ?5f(^(SqOa)u62Xn0GVczqE1a3nVEJPqE4>We!XSe0!>jLi?enT1%S$s|yvb zY7~iBj)L%H;2QW`DC2M4FrJ69q8%I^0jbpm^ckf5u3VoMTVctntE)$ztX`YSMGMh+ znb;sGNHo{(LXQD@AH-Z%vkiWj56}s0H%D2uwP*H;CvFHsDGxvq&Gt$NY>G1ETKJrF zoijf#y!{-^FASEp*AT82e(jC&B zk}3@n3ew$3-Z}TK%b&Ac$AI^H_nw)j=0k28KiCa_^tZ)OpY%0{Oo-hPc43gr3ZSxr zopSQ|EQ69r9|gR1V8_ROtedEoqwp~8jir6l>IgF{nLRQ|(e3Gn+(7>YXd>Z|7>i2w z1Yt0^Ng`GH8*1R=OR7pJ;p5zmjdanhoub|go@-iC?k;X!D8BUmT_=!4&0!QXl*+f} zmmLui0VkQ^tZU`qEojf5mjVj4!k6dD#bHg*|mr7l|i@fmKkl<-%|C>AO2#2_SF&D(U;0LOZ} z)Tf}^Ng6lpzk`f0e20o>M0A71WdA{vN>0jmv-%B#0tKojE1u}jv|t4<=3mVgIk%Y$ z^V)oRu68X_j1DN2I7VcU!9ZbJ$;;mLLQO7Bf+~=rpd`DovJDv>uZD|D${_44M(PXK zi7bjJ65<4du~3yB7eWJz3S!$Z)pVv12unj}nZe?HTq<-M8*A(kk#p^9nRgVH?eCBv z(tZEvNapox?MmaG=%=reg_-5{g$>R~H64y02@_O-rnrs=_g5L8(LgVkv6qXX!5=eMo3cM~@}9P{eQ$2t&lw zX@C*Yi}uNZ?bRg3r#^a5GFa>nwH=1mv#zttX%@VWuU1=LiC+rTvZHAzXD^js&U#MH zk_E$Jwbp6_25$>=lHYIFM2q^ePwWcYEJyyf3XwLeqa2+Ne=**fAHKQR@QK*Ly!*_I zMEqJ)H$s+_AtM{9PqHGKkuX6#uP~i3JL;Ji(fz>>WPr{$SwsoYUC54qTF0A*G<)= z`K+3n|6F_k_E}lGe|nv3C#(O0Dmt0oui5L&{zs-L6bnoYjAt=z6(Q>C>OmhqsH&-@ z@j09SZucL4f+zVC!~_-=77D4n2gPo|kbVz^_NAm`Lw)@>yMcj$WHytav@|_i+mdAM zBTRX;*~p-vUVr5K(EX^WsH}~okq{ET%K7=(5r{+~kD-$O93%wGVlx&W>0OE+m1!%L zY5w(h$t9y(o{!)eO}npBqXm}8<`fkde(cWXyGdEHr}?^auCjK&R^Mp((1RrYJ2K*9 z_fk!QmV@(ATlFiBxtB-?{%Oq;ZO(@*e+~jGd3HA}t$49^W0KO&_BUjrq@R8z+B~0J znCI6@eq4Hz!m#$5B3_Eh#D0J*b*6ox{!+qCzBQ>*)CXDM;19<$`rL!j!1P-4$xAE& zEXmio%E&@HC8<$@t=L_xjfWOtA}E>3o-~MdMTIrU(JKl)EIgVrzqFfc^vpUtc0yzO zNFLC+yYB~1uMmlVrVOyqgZ&8|FRZ7^pjwr_=XT%?rYJTj{wPozq1Zuy*RRiE>*Iwg zBO@bNP;nsS3xuEH5fM7&e!#AS+zPT6`S|&LHO^XtG#Oi+mhUADrtvyarODqGu3d&PN{`B+6rXDvgB!E?w#%U?(P$4QKQ84U@rKf z1!R^M6Doc)MZCio#Y}BhROz#oAtc%?8eH@Gjrr8upB18l>Eo98hSj)fzOm3hjA|7& z*SyuA-#)-~<$9zRx9U^wOG1HqcyIta5FjmWuGTIkIe9i)GOX2rQQQB&moyNajQ<$C z=dG=+ge>}S;_VvmwE9DM(HYz+%_ObLrwe?mJRa8-cp}IU$jK4O9@QEk3`_OEs>M0OH&u>UhkKMO% zYcE-D@1lr;xWwgVAZ^D#%v|>K6tR13QYYKuchsk zns7#uax1rpk%yw6^kU2%J+8WgzY`~nb7(Pe??p8w)fm?PT7CBqjQ?C zvU;EUm(I&(Pj5_qzRB1qCLA_$sy|YGy9#2L~*~kDos~Shpb?MZkU6#mUKPez)DYawJ`7(2&)9B=ui4 zUM&1eknA(n-E9h1B$2saS5bEZG9Td4#3m%%zjqI=-VR$bX>Q^+&tVLg=^)+emiGyl*r1!9E-~V^qo0Qgj zU&Qi7u}!Va$LBlL)d&6@F0MSHWuSAlwLrpY{lhiBFd!>Wi7Wc!YH9RaVoLsq!K2U~ zj`^2&8B?w7?5VuM+gZ1{%@cEzey;k!k2$8_+Ml$^Z#(&yjm(bG6o~@mj#2I&`V~iz z|MAfF-+Y$Rp@=D_lP?Tovs%fu)mMVe=c-!Hzg0O=M3+AiQh!BFEtRvs9yB3Rs#Y_rzE?vl+sy$tEsa0o;;3;8bqDUThtzl5 z&*GbYXyia(?$AqvW*Mema1@UXIBhs(jtfNNZp5}`zA zss*+1992{;ybmk5KFe(2r+jxrZy1oMl2SiM#qn6om0*p_Nh5m|If zQ_M*@g<0lC#+G(QLxia6wzIc?CG+N1AT*so=1bTecjc4*&_d%Itn z3%?!>2aU7j7C1zpSw%t+GV4l7OCKB`lk(V;w$g!;Ur5LY-r{2Q(oUKEpSK#KN}OzL z2jKdKG(5QP8T6-r{P+|T^WIDUTl)$nga-Ypb4a;>_Ge2+Q?ur#0#u4@Y?Yar?HwI| z{&WSGs^rZHa#e(sJ*nBSBjEGr&yaPnGgIBy-rhbocEscUpsQl`A_(j5 zGh*u^{7`RFq#z`GQKWkxs48pvTPWhFWC>CbIF5dj2!ve7>&sC=r<0Mcs?Z|XBFA^K zQzmx%{NWx+!uMbG-s#(0@5c%8TJDfGP3whCi`(O$zoS^cj0!xFjTgU9!ZlGYD3613 zt*Yg7s(dvgbKr3|R~>fq;_51?HM#Xgn{OT66|;F5<4Ln+_*Qmq_)8WtX-j_8o}o$B z>wK;o4^{Q4d-ftj#!l{c|D9|s&^=`c?pomymM4qpS$l|t@)(d+i$%4oMN%?(8TC zXfx^sh5!4O{^*aQ22}Ct+6CQ9lKuP|hdW_PmFP1<^a0&>Wdl>@)SbNY%buY&|DKtO z%k#eMQLeDH>92LvovhXVOxU5vE}Uk|NccUY`es7Drvo+@Z-W{)zpr!MLqt8t4(UO> zyST?4kV#efCB1*?;4ZMcnyLIW5BEMM(_a@aYod28tPspe`|_Kgzx(7WgFvjraUtGq z;OQP7hV@e$d#GXTB1Vf-u5n@eI4M5`YUtF)OhZn_R1G}dO_J%&BKqM?>B1jz3IAk+ z5zt)|-IFk`Y@SBBhm8V$X9|3-iMH4j>FfN*JodU~W-AcMCMYP_y2g9kz1wcezPz-A zK=eg1d^YqUF*i47e-$)4I}0zajg^&N!yCyw+W6Smznh!|u zxOi)4hiU9C7?{(z?X+E8U8|c)bQ`Ly{y>)C`+xxLI-RM>$=>d6(4@oM2Vd&ewGcoF z#}2Kji$^q9*?o6TOIzC$V5xWS5Xi{yMW~hBrgvSR1|P>Um^#(9P6oZTrfnI4IHb7KA+ zzUPUvjv`C1o`%d=3=#G7BVWG{Z*ElfGs$`2{XDWU(uKe3Q!!YBK zaU^mdh1}K~ZUgU2(aoLvh1e+O^28Gpt=AbQo-yLs2jmd2$kj0luVD z%zV)i6)d0cFyHvKx%mowCLoi5{bAxk=>H}nF1XgWtapCZ%hKXnta#5k(N?vY5PCc5 zYlI;oc-{Yv~sKUDRnwkG#2(ysOfSiHx8?O+Mc*oC^JY zkKmPt4xUDW<=tqh@XYnPdxN0#p6?q;ZGDS}c$aOdCZ}h#kq=XW?vso@LirPn?tENr z^pmapvjH(tojU=UaFxM*WnyC||KU&c6Pffcey6YMoz9eWl4DaZsMszXy%;#I7=rxk z_{c*frYC1f5fsk6YFT3=_&z920&&|RF@jGE%f%@>#E3C$bdJSmyb^_jX$pMQmlns@+Wjh<5N=te0*GwsCJ>e zxVhCr?KU%e26<+2&|+DQ<^3_=hGBsWU@Lz93)r!i^D1h0s@yH>Zg z_-HGDvv~{O!oUFE7wCB5J-C8Tc;l0_L@rIM9ej$_{&M+aHAKX6b;{qqsQVhC2I$v+ z-d~-+pMN71jNu}P@5|;G-2B&6W+;QrY09(B{F-F2*cufZdZe0u-YhxOmfLl1hl$`$ zWg)WRxK<8=AvX@L(O?Q%C?2M_WH6+iu_&hA1}k2mj~h zyT()Y!=L$TY6Byl(h~b{WmOj5LG1GjO!;(Z6QneGbtzwo9F~%){y>9^HR`_TCm5Lo zz0a``(59o@5tWwiFV(3-Ab_*!>B$AuE-&w% zt*wQ~i#O>GxFOUcA|e6;UcgWw-$S?bthu`KzJLF|u1t1GNy(Ql-Eew%GGW0Jum0%rx*zOtx7yp=}OjKEv{>@+rvi+2*)_7z0aI1;;by{ zq*cC~wjZC-1u7VrXVIeJP$Z%*Ge@33mDiJ7(fp>(z>yO_>#=f*obk@iZf|PuLt5HA zKrmn{JzeT~2#;?LZ%2dY^?!4)uOKMY!8HR*A0U=ZSXdaCy7>6RLw`S1s>;7gv&DHG zKVcXtJvl-_Pz(7AJFKW9-YHVs8#<|}#TwlaWsKi{dj~CWT$TB?intvBbJj%zeURda zN9bVvg}hnhYGCI+%17)s)Yf|k)kFx~kT3qjX-oXxGVjW>S|cUs=lu@7*9#w-zr07e zm4Lo6RXwwU$G89Zm5w#HWLVadD*Fd}TTIwQM=rkMiPQf)a(PQpmP-Ut&g!8BBI@jk#W z2%wo|%9wr#$vHhcyN@8{b)*m!^ac(DJSD8?;@DrZvzI^#v$r7h=uw<{_H4H?TgLaO zj~^2OaY0Z(rw;y&a~j?FH_+WzeTpb5qK}qx`O_I#Q^N`C&tB>@x>#*R;G?2IavgMw zivT^Or#}fYg5TW)@nR5J00}EcSe-lIW&!dL1Mvde6q29dzQ;sEBZi9`MU*Pq5`H&W zY~Xv`ovwVkV2C0LJeah>EKZ=9k)B>!ax$^Bk-=5|^z7^tUPr@$SUTV;bXvXtflIVC z`Nv-#fVAG=!QM#b{dbbjjr#}_y=j8MT z^Iuu8SY3EtK#q(jdj?ySIV0n!@}txTjN=*>{kGz=Ja%ux^2sz$+|Og3BYIlAzlM7X zSrG4JsXV;GpTEHNfAK=;_$p2S-c&2EHp|!J*C`26&QDB@tZAK_y%RJlt_?rHP6t7A zqL^W$Dn8%>18wb%HP!X6vno(7y}o9Z`S+|mfWT(2>tYR`}I3rdNVXtvP-)*eI}qMai}O4B@&pZ^ z_MD@#wiZmY>m^#%pnPpGM9iK}KXYMtZDujKOWWyn>C=!HR^S zI0_*^l?065RgE9f?yn-v$^^;v(}on+6bhKD%f%2su~$@edg5v_2+4m3|n~WU)WU z=clBkEY~4DY0E4t^Zok`=V=bX)HSpZO=g1xfJGQVUkyg|s_%$*K|%Mj_RA}rF@xV{ z)20mfZqd@wIq{}Wk|Y6gLqbwqQj(gK1aR(ah!ILnWsH_mFV~;v49|{e>5aIn%gn>U z0VzyftWVBP&JULVqhM{OM!LG*gU*e%5qL6aUu@?aCm@*l01g_gFqWWFGIBqE;(5CL z_{kGt$RGvsxJ=6^M8amf}DMgEmj7V=Ac*0}R z(AOQkb=|UGdcrt~pTRYpZ0@N2I{YFS(Y60YI%c8GkDQd$dv)lsp5A<^&PtB-BWO5& zpBVz}08c8F$3Y*`F{{i5L9KCo-+E$MY=qdFJ_KtF2H+Nl<*J4GWcSLyjvllDq1~UO z0(19-@n2KpzC)Uu`uyS@`iYAv9|aNu-Bm5)F&>JT(q*ZmV>p%M{9X6E0qPA-9I zQvDz74!0h=8|8p<#r_sWLk+;(o}ZZQ{4%4$bGNIaXx0-g$@I%@-1a@(5YD6PU^jw1 zxhel?-6sisJY1tsUkq=(It=W@TPut8EwXJaeAe_nDA~F#J&nX5RQ-h#y5y(kPUVI- zkIa%Y&N!KZE)L}`7yM{#4W0jcM+4y&#-pRBANcmIU8H)(78Wkfpa)4&!(^zfrS-Lj zg&E5qkm@`INYJ=&LA-lGp$YdYxc_u@Nq4~t^D!(8SX%h9OZ~nCOQK6_Fs9<#yU0x~ zoZP|3;N%a?TgmoBG0O)y*M;Tfgke3fZ<=wEaEu!4dBEy_YdpQV_G&)RWv<8r_q&#EDPbzk2wcR zC5@h=2AC^6F|j{%WuOd^@ZSF`IrfGF4|!3`eyQz!^)%^1-1+?hPWmL-WI?xSxaB(* zZ4kgCIVac!QLHb8kiEu7L%ay3&^gVZXJ$x<=`SgDsE^-w{QIf3aKiz}L!KXFXneT2 zV)_4u5>XMEpZdiw&c+ortO^S~{`EZQw=&rIxi<`VtQMoXrR6##oyf|{f-CbTHAMqj z2|s)|@vCnd9uhF}!mfwBJab3K3R#P51;9S$)!EZ@-gqc`-vNF^xIR%ec&gB)zr(Uo7i{{8ht3`ETPdoA~>y z7N+6wR{E-5LR46C({K7Yxi9spI%MB?jQuRseb z8?ie1+UIViae?ytD4kj|0owd&g_Sq#Y$GX@Sa^mf8hu!#?XQUMb)?^$lA71Y#=++JWbTp&J7oXx5LBc)zx>Ki zCq*g+{*w=gh>#;Pzw!Qn(>T}Uj&&vg=!dJjdj_BLDo|<%r5G3(z-C*UJt8Kww5=6< z`m_s@bq(4)RLC(=L_stL1Q64TmaC&9>cfW+O*1+^?tOV`_u|F7va($O?w#_(kB?m( z92^?-psxwdrlh2N!w~_A5vlyHuesUfU%!Ty8yjV3u7QJ{{SAnGpz#jIB%_Myhd!Sw zTDSQv)KoXn9ZgOi&b&WB&w2g&HJV=kUf=L=DbTFTy%7L+R@Bt!LSSLt0Ol>BQDwt@ zc}b0laRIDdJ9}HANA82eUTT*6XlorlQl%I32X{L$nj%{UtYa+2?0qXQJHO8G-OUL) z{Z%p41cax$IrQ5?uf2K*QJdU1Ur@$0=S79J-Trmi9+u7tuBl_kLzVQ-LPe%nGf^VE zJVinEOo{QMW}h6_^!s-;xtKOx>J?SNBs!%yk#y7d&%dlE zOxlZ!rA2yZZf5Ssi~HDSaRCjHnJufc#Z&i)XXK#<9=_ZCJN^~+^Nkl57vH{p1Mmqh z9SBsr=C*R`!IgH>I&$EF-kXt=zGvSitP;(a2^+=Z?OPaGZ0G9#>o4INWY8$Pa%L>; zF?;k~-{SJSv)=_(Sf0eeQ8HQ=3dd6))PSX!^zP?E9mR#TW2^}Du17Sl1`1Zb(E z&2)Av>g|+2dWbJ+(j|t9P0e666x{h4(x9Th*(EO+)1Gcm{8F!J@N(b#;740t^kDkU z3*iv+r;bmRR9<53h}0{bEw=Ac9Ch5v--OKjLmH;0=GCXM+g?ZBRABmGt*`ghIZlQY zutL_Bmd8842>*kIJUl!YexlfNH(6y#P=!Q9FwxS&*;7`tWBL;Mf`V7zzpug9eY;86$Qj&;@GCp*aIftRi~|~3F{OyBZR)f`nwkDL|Jn&u{$WDD0ezWN7 z0J4O&7nPL<;g30HiH=UmC8=?P)0%L7xRm4qTirtPeeN@e`|Rx8f4WvW*XXh04X9ws zYVEnrtIG5Bjr#hOGmH~gM%jqZDh3F|7i9^?XwFNqB9kh5dK)kIY7uyS!?Kw1r4i?h z8?UK4-`1j%at9~GN9X2=^|d?}(A1HA!eI-O8Bi+o4Q^~PJ?75!L_vBoZ+S;NJ}FYq zx=}PYMm9jVVPCrH(#~38<)EpPmot2i#gNGDME-EtD{7MIgq?>Q44e~_4^hOPBJ17p z-(R1xG0$0#0!TV|Fc?wqy2loH)ze~SZTq8t60I$8i)7k@rAH@`$j!Z?XU_^+gWC2$ zcXHt^LlY##?}vf#KL>+e$^E0L99krDbv*%W0dyG)tvhIss;MweJU){fY ze+tVquQ!8e&#$#OEe@3(&J5r0V@=!L_}`4XR1Lu^J8i%gUe>J+HR}e|elA&&RGQOKdKUKX*JJy@Sp&U$G|bNfST;J5d?9cM_zi zsVad>wZ%;6BQ!N#v7L7IR`}#M4eQg(7w3sRt%oPyhxA*eXk56sum7+e%td_zi^XRf zgV?$*Q);pIDBeHEuHeyX zXL}C1v(W%8dnUdNVv==b+R(?t7QSoJ-^y12s$ti9gNKJ_V6X^?B5WNC3<|n~fD8`k zwsb0bNN(WqfV3=8bECB-w$@!VGlA=NEf7IVsdqH@rJYnu%cg{Y!Q-@Zf|XQL3$); zegEWN!Z-op9nkFo>OVR*2I=u7`T1ujISl7+u)Z4>Vd$!>yTPA^h)Mu_qKMd*+)tiG zOXUJyU@JgG87~$8{cL@{1rHrheJCR(gWOv`+sT=oq%N=TE3-{4IQ zHi|k^YL989Q&U&9hJ(0*J~mp9-1nX(N8;&JEXmxkJL1Xu3L0zv>0`nV6Pi74*d!1xe9Ge(P1FrW|{8Ay~_vnBS zW)}*5AgQaNvyL$)OW&f1>y5U`Ng-BXZUT5GxW16vyT46HVTqT5k%fx3YUjoIb>Bu+ zGEuu@&#NfOtrg6wZkx@>_8wMrB*d8(mx;r5t>CC`bUfd%@?0GRKLx)B_hs4P~XSrXtql( zOS$mB`K_QpN>=vx=m;W5Iuq7?9X1n7>UE^&1!guku1=l=XMW$HWR!~t!gNGI9B=&j z&23(gylJr#c36}Q0l?6-4KN`CtY1)nKAj?+W?QPxlD^DujEEB}EGT;v(v6Mscw^(! zM+q)9fW4pT*#)8LY3ZrmMV_Ac)z3Dy%a66m;S@V281*1hkDvKh&7P8WFI4y@*D5>y zPEkcp;SK4x$F9k^u0>mpd=HqKg@e05nRBaqSXh-;yD%`=6@Tdy4fD@o2j5>^C!NU#q|JJ3Gq!T4vXGD6XayK`YQw*;Im}e`K-P1y-ET+Ur!T|5 z4Gesa1OC~|>#{c{31EJQ>52}RW8fq#l5=$}AN+UP?Cju*jkHgaH${ zGB{ZPcBc#b%#MyiGX5*LSbDX84$x z09Ih&=B@?hlELCm+e=rw>!A5Qh4i-96xgfZ6YH)2$m*gdi&3(q?!2VIK@PyOo%U(2 zX||s76lw5LLN6{VgqF>Q;Cm96$8pC#QFC)q-tNfzd{XA}+?aTc$ykj^PNurbtCAsV z$xn$k54^K1r`z@Uwf#zu>Z&>Czb=%w#3hy0ns>hEqziZ}OZ@ne*Va|sudM`6$?e7I zh8($N(I8R~S~NrjXH0R@2hI`r1p_y%&XUZuwt8RmM+H(OWw1iuq941br5_cJ{NUSS z*&KeC3YtzwUb@?tc{JF1bUR${0^Sy#MRIcc#C6{5*mf;z?3U6!A4D$)%YA-h%yxtxTx@L6 zdBA#=Z8#>tkJ>M`UJIB_&Q%N*TO^dXeFMNsof>yzq$~e`4)|vHHx@7IZ7wzCRa7__ z8ErSmVSp{%-Q9(c((c*X#a>5CtOX2apn6#9iZiDxFA~CYxA~%CFhh(SkX^TzA0Mv~ z7q4-$K4%1oRMmhgy&3$scG6r$H@LX)itEFVvB_qfuL^->;W8>5TH6>+Q5vcfWm3oRu}c zot2c-X_7OVNMiO84@HLSm4mzY6?*XF)M`%da2Xr!m5udAeDeAFm3wb*dS$oE0^lfX zwsDB;916!^915pE{j5@ur}&K;7bk$i8Sj1k;d@%8LQvb0*$(ae(s>l`Yh4wG+h`W3 zO0TApwfypBA$~NK^0Q~5fUnv-6+AGCcgsPI-94+J;+x>QDt9ELF4^wgh!2({;@kWx7yfR@rlB<3T@- zx85DQCX1iUH9P8z&CMrYUeck6CMOT)dsv57RysjY6Og3NZ{B!&dxI3s#>C`^Z1}29 zJXe{&w%XBl;!0av|FP$x*hpV?*Dv)yEQ_1T7e^YqHL3<0DSw1~yqEJ+b&F&Q@jc=X zYL8Xo*7tJn>ez7T*i`p66%^4bJmIL>$f*K8v-x)X&&RN@Cm1HL&~^9ra%C^YP-0)a z>qRGV+&$cZPH~49pN_%7!m4rnS4*XbBrPtm5{X2ct>3(#yAz;DG8AuVH_-5cqJxw8 zq}q995%e<!0G|CqLpm#l^)neYEc434|X40f}a95X1}Q zfLSe;o}Qk9f&!fW{r!DtY~d#Gjy%gqd2>YzNZ@$9P2>^i7Hi381_tP1=NhP`z}W}6 zV?TfXEZzEi6Kv@GwvuPh%ycFB@kaUCS@LN278c?KHLgWN#7i7gpKol*xF;+te_yhW zPe9p!j*S{|LRhF8>if_W`w=?rg3teJ0Vck(`e#by60KW*HGUc+l?#9Ae*#1&vW3cd z!em}-6acbuoLfqo|I_zeVyZvE)8pRZbE0Oi1@)3%Ll0$|1bvIot@P#~4^5D#pVxz| zmADBBs*?Piz(oP$$}h(%xyi|)@j?7gI@*^X(id+P;J*RwV4PUGpqu7v?cTy84bYbU zQ&kdj9;HTv;`4+~{JZOm&gct-P_ZF(;??c&Q~iifGkyKOG`| zmv#8G6BxDLfc74u*yMOv2H_p~U7DZ?ocTOZ- zhK$b3=Q>-!dd)YUy**4cVwU$&#BaK$II$b)6K!b{lKvj6Cax6wf}*0K`R{WUZ7ddJ zXOE$KwTjfOu6sB+6M}QrMaQ7DZ+_#O(UwbY zlxS#WZfj*;Gxg=m#NX_RbE&eyjDu)G&w3J@=r6?`JMQ=yFo_=+HG0s`sG%*^)dzTL6U|3pWI z{+Qi#D3J{=;}|_7AS^8`p5vo=G@FyQJ;Rj-(Jxp~fT%119isAbxP(|1_%4?6?xTo? zhK5q(K1djplb5H#4Jj%rf*Tt!Mle&pr})^n3E%!?1v&>T%ffs?kk z-v#(nPHv@VlMTJNuyCfq}_6%@Y5#=;Nv4ide+$1Hyvg!!kRCQDYIxN;&Bj8r)|OpLMw zi_r|H3-%68XF7ZzFgC$?XC%am|N=deI;@KTw+p{$D*B|Kg?U}%(SbMcvNXQ zj^1Wf_H^=MxaZQ9=b4?6q14zi_-CeboclNumelZ`ShJ76!rTZ$+J19&{p(Dn&5|kT z@Shmsh165mdyiY1+yUt)3Xve4t0NiLCD`R~I>=|%jbVs=om+4IVXbP|6c@*P#y~~9 zMJC_vI(QrN<;$0L29uXGhD7QAE2uk3=CbMR>~wK)xr2=S=!ubCKA=6I1^<jTzFhx{^{J7s*>j+z^1zK_)+Rlt^(oWlR)g z1iB6sNgR}Ds*f)yfMPgT)CA)BHmjm0FOm*dMt*M*uo{vNR8ALL?)=Hb=X!L=l1d?} zvsK&LHlL)r7iqA<0(#dsHwt<1tQU_jK4=2Y(d+^T$KV@bgpYXPjgC+2#CFZKzOg0bkn1UK_ zgIb=g7)TR+pr8#h3Y*%2*sSj(ZpfalR<`axMipUw^-rmM-<;r6o0T0~ds{IoYE z5i&c${04442yqmY{ysd;GUn zIeqymChT)k(oCri58?&Pat_N!|B#h5G~nSiLj6o*b|`~mgM*5K4uU4babRI5CXf)_ z#!hl_7-{n5#l>BC(o2v?Nr|eguAY~Z^IpX5=@8I5M%`Kgx{f|q?}wSXrk>( zO5nhszDg-BW&q<09D1-6b8>OHZ%_XmRag;0L23Vi6e)fo;5(rznX8<7-*0O9Fa^wI z(4czLGJ@Qs*#YZ*!=zcu?e7+28RW3)ZX4_GBhZ6D6_shpW72B#+KGJ1O+W*OwuYUQ z5*rl=d{6#lxSVM|83J&&Va>768@<`+i5dXCcUdh>D|S&?(dc#uR3iq$Ar}K|O>KQH zn?p+-8)y1J+{cYL|JZ}wBwcsvX9g)zJsmgRj-`V7Um_RGc}#o-EU7{sMO*v5&+cSd z0_ODCVPPz`Kq9f8$uU}&nHe|Q^6+cPZ$C^^_L;@Swt|9nfWLqdHf1FO=H~)rdJe#h zM`kp{9mXygmN*B1)rM7w^#DDlgqmI{$w~G zg$39DQ=j^(#;Onr$D5~Y-^yeKUgj|a3l5U$3D=>~!y<)86LM3MYOz+5W{=pL#O26h z`W*C|RMjLKoZifFA96nzv7BPlB|O!css2f>kXihqgXhoguD+5|rSJ6xh^WNHyI_^a z;^;9zks{-dLV9QeT1Q6GxJC9ojYA^>t;);XLg>B&S`7sa)={G!p4nSi(Q`}!EOBr# z&ric}F9n z=5%(y;hTzwLj3Ne!WE^$qPGv2!K$yN#pkmA!@$s8abIn2ZZ0_u14KK^fTYL`{__?_ z5oJFK?uCsJFzwy@?kJtBEGH}L$nt;>BoI>6q?oTXH0p9)ZIO|YXR1?PP{^k7KbrEg z@oLi+6&E)djAwfJa+vxsSGf>sGEDwltgJuA$6*i!hOa$GzNV%Ijxq!y$9(<_>keOAui5ty5i4trOD>zCMccSfA>;qE~gQWq%%I0O$0`{czdo3r!sx8Hg&GLEW;!r zPny=3x5&jT`7b+_FOF<$Q*XTM$w`9_E!Q$<5{E=x{M2C*JM$|KX$Z?~r_{qhVn ze|4}r3Q?{&N-(C$zotBI;@Z+OgP+v(3&`$6pKRmooLNaB=jD|q;v)p zQGipjp0j{gPEn)hJ>55IaQI56Ykt)Ct%SZGOqkjqwcUM z{PgKlz}9a+E$q7=vL6Wr_qDmLQ381{FzkA*%)%UXOLe4oI2N9`6#{*!(0rdyQ9=Af z!fPDnjB-Fv`V}({b3@wbK(A6To#Nf<*eUgV*{{Wk{ElLE4u!wsabb2)p2Wn2@%T8~ z%EAX21y!d)yKw>kH;=%~z*>ac2!V|q^6Wr53J2x!Y+F9}@=8QyUzKMLPSrh&3{o|o~qmR^Pc8s{Rd(azvAKS1;J+f?=bvw+>pNzw>>j6 z1HQ(WISoxsgIXBtE{!GK)6?gGZw{l)OXrTl7yykpFfr&vteG>_c&HzGF=9yk42gP| z59sOXc}XLof=vnhd&Y`RLc)`wNO*>~`!ff`nt4@KF!1Lp(z2p27v~TKB_$@J28sdh z36vjf6v`jF{ksu7*S66oMim{;tfvj&Dh^Ji`<}ju$ucnKS5_J+eT1*SD8ryeuJB6{ z-|mz<_JE4R%VwP}TC@30$B;ak2m>xx1#iBI!>)i+D~w}nXSEjdXMup0lwo%jC2BG$WT&X*NHO!Mx{1(t^9n_2L8up!F$;=Ffb4jYDpLJqAydE0hhr!tKCZc^=8rW4$2j! z_aa2uw)N~gPT%98h87i%u6f8NvN?UR-HJJuZ98c?_>No1*|x6YWF>ifpnv{irm}cG z^dHB)lZ}_Tit7Wf2ROC&Z(`s_nPg1o08^ce1%61kE%W!G>Gt1y!o~ETFqIsmH4PFq$6qC{qa6+NJs#c`+yt;DL7nL5f zrVaHiIv*@ec}=;sWer2Q#^g9xT4ZQoIDGWBK^@s^3P!-7>o%>9a#zn zrZbv}^}Hwj1OLEuB^Ld05RX+W6-2#7$roU^8rl}b7v)%7*%5MLejCTK;_fo>?+GZT zZW(f3Mi6stR>!xh(#uIn@p_*--rigTwCHyDx4*Hm5oo#M;$j|w;!dR8i?NaZbJ?3OgGFP(*1% z58n!+cYQOhGG@I#QdCx+ZFIE(R}a|U)ys6P%*|K-{yio!aHI>EYdnK~AB2)|VnIgd znyR{TWMiG(nO5Ik$b6JAPJe;GGo-@lWo2Yodzv!%3>$TLb`S}1w28jN1W`J_efNlK7{Gi2P?t$ ztPXG)1DpJ?3r#m&k?|$=mvfcC7qoW}<6IRIxDO0yJI#oZ5E=AEO891#4Ujd!igXX5 zIys(0+q>j=6ec2y08k$S8DI)8u6%n^{x!M$4gHFbTVdD%F z1Am|xHxC=Ap;l>v5buD9Z>_IY{ts%=r26^O+*Y5hE%%*5i0c2DMJ0L`8sm-=dsf4V zXZ?Wj(0yw)RDlGI3t$!H^G>`M+MRC`I`Cd${+uv}%Jfk}(%hegm6LN0)~mw`Y=aih zm7N_jJUkN{o6BO769-_A=@dAc;~hkmT&6B^pWZupspz7(SEhsY;Z@H(KN0D>rX2{f z0t!muLkMUMK@=rMft`B)q&5Civ&eRHX`F{hpZU_fVd9T*$^tEF<&B-o!Y%Pg^Ojpn z>o0B2J4=;M?{SapzNElby;XONm7k0-rG+Gc&!q+l0nbs2_3Bu1GHmEf z!Jw6nuH4$eYEpIlU*A%t<@MS2?sO0H%^Z^89}P9TYu(#nCiF~100jv85$~$1l)Yre z0App+CSkW4$G(~wG<7m|m;;h3gLhO~E>NBd=O7Yd1PM{eL3L?#cCbAs<`8wDx1%$} z5vYS#p!%koj@6*m1B?VC4?8+X*4Wq7B{u1@@yyUtPD1A8E_ZwAfq;w$P2&c zpzH_BPg!ZHoSfXr;bEi8hB64c{9JQ<2~h6*>7E%pkX9fjga{Cv_ZKxnrWDVZO!_b8 zLFe@S~HIHFLaiq=jfDuYUB*X%5# zj)f=Q^6*tHK-C_!f;6YSEn6XU7MS+ChIYbklT`tqzyG6yAb_WJCVisz5p6*raBg3)NRYO$);ry&RErM7i&kML6pE6)~+C_ z$Hh`<`%dtCu}F;d`IGfXE^Y!5$;NGwh|*F!0Qw;Z0O)E6#|rKfxrH}xtDvY*=P@aG z{GxWyN!J8{i=#sSp28MAyy8R4_y|?lP!!SXKt?WXC^*CGDRi&99|E5aI z%Ng9fy!&k^K^M@zk+FefeplO}S%=$qnlZ4k!`1VZ!bx~<63_v>2GPvx-ou#`xA?sK zp4>1cM!)5%FbX7GaB*3;w367I>Tl5|{Ih47ng180pW%&k+}of}V^qKnx~M*7@)LPB zJa#`sUp{7lfS8{hW;wv2tma4wxg)L0LAfji-WdN;+_zXCAGW9DNcY8%dEXEFJJQ@V zcHmgwE*eS!CP#`sXM0yM2T)C6`O=oZShaukjmOiaBA98;fW;+k9JgW>^2~{FS@-0b zH+do3&>Ru(*Ib)kn-`Jy9w_#>aeKl?+k6zEdDw`P%`I0I>3kVl+RWhKzaf9e{V_sA z3&w*nvi(cH(%*6O8MQI4^QWZ1rKI@l;azur>Dc@_T6*Ia~K*2b`bX+>XG&0Qe>VY=cq* z@G&MPru!(mUt9_Sw2X}GgGAJu4cTfi2UG?9Yc5!=z*`4Rk+ystXq0GZP&;;^Ic;li z9~s1z9Dx7G&(8;QkHn4zn3BM48vOY)4|ZdikN_ehL*>*klW%?mnU?VfL0~vGZse+@ zq@=1E1&wc5d-Pqn$vig-66ckO`1tth#M=k*z;p?E_YR9(=wg4R??wUPFef%udq+pG zjCGZxC_Ta=4a4V_Ki1liq8mOcThe$-(B*I0CyT-f5w)lRO5ME-AMdhhXsajjxfOKB zs*aT1x{bIQ8q(Y?KM5Tf8mO+|ke885W3%{mv9`c5qG{72q@rc=O1H*g^Ro|{@^idU zi2zUcB}^0sgO?8V0;i^BfgU#EGy$*dtmHnaU5v7@4qi8ZxSJ+LI$1Y0JAD@zNUcjt z%lPr}&Q5z9v{^wE!MP2NhsQ+d{lmsV((O(j{vl7$l*olk>cpS&g{vKojg1XpjaPdO zY=%5#`Exe&x1;sUm78n0G*28I0fcQ3u|mL9^Xpxc@BX(zCDzqd!xVHxI0*3f%at=H z=5qa)8{QqgrBjY7UkpFIKyPb$h;0LWPaVEaJds|wp3@Y_AVKjv03!8Tr z)^PR%_nKG&6$NGGmsJx0+!=m0_N%CiC@`K=|>T20%U3&mzv=|vOiLfW2!kPhV{_Y zvS=rN=8VK^ViNKym()gcQ3X=`dW(b%Zc<>}=m>UFz;U~cih8vyYTZKuHd2zmBq=LI zsE!i3C9iOe=&W=*`V0v#zh>tM)C*iCF}S;tpetR)Ph_Q?YO&P${N4J+`N0SW{hgo*oYF9z_&YEW+ZDNr&e3l3z(#OYAZ?cX*?f7=Sm(YRpA`LUf(VD0L> zj|rCa&E{sl@_k2e`3E0`_TF)O4$5>O2^Rg0OG;bWocolQ?Pb}RP|5ZHIMHpC+6^WI zp%P0ZWDyJQWX;KZZkFxDPHKHKQbIltUlrtYntpvkqKCza|L9Q))Z4yfP9V&7{r!6h zTQZZaMoA^^{zcRlBWlxn9a)HsL4^bAjBY_$p4ysnmx{9IslU~sAYoYNc!dRPLPx^& zIjQJJ8j^RCl(>YA3QkFEMS1tkZiSXPrL1kU>ArD=l9TskGFWtEth6}T;o{#FU-CM< zL_gnu!9s27RnFiAhB0}+IG8=D7 zE*ch*v7Mbaj6`9xhHt?@xdjYYpoZ4~C?xv>EIPZoFK3*u*i-S6_n zKs^B;{HDMdX$|WepXx8F(-uz7@ghy4t$7n8TXEQ)wme%nu^AByU2(HdHGe~4w~fBN+0#%SsF#eO9G37|j^Gq5g`#ZmpS zI1pgR*t7l$1_QI^uZNS0-Pb({NILGJ7#&QIQhoo7@dVZO(8fq5&~l873w`^KKlSmCaD|L0m>BlAO>H%7O<01+ z9~?N4+-q=YEU1Ufxsj>k2iK10Xqs5$&90U{GGfsqT_dEZa`2y@;F7(4 zX0c|sB&k0|xK|$rirBspbI$W*YXuif*;WpH5sCS}|HlQewzdX*1DqB({X{-jR_+}h zQpa?Heq(*TM)38(@UV-U8_eh?;d=lshIu7?{QkYzWMM79mLvf2?9dC;Sq;7KDQl+U z3~Nv3bG1%jHY}n^-`d)OuEJ3M8!9R)WPQ*xGcWHpM?L~CZg_H-*1{H*=N1=ZQ&KjT zmRy&*@SUBVK@4&-i|3y#3~fv4u@A5Vu^(|26c@w0C_f;|sZLp8*3oQynz3WPf)5T< zXX&>)cFPObmPpF>7gas$UbC&bGRjMQQoFnT?*|nTf`Y%?;b3#A6w5_v`>({~D8wxp ziTR0Q_Z?wxFoY=l)LC2c>{E1SNasvmr2D3sT~={vS?PBz*l#sevxf=}ld&iXR}P^z4STmB~>}-K$KnhQ(|>MDY6d~EFVXIY9Ipsc{ekXp`jtr zc=`tfj0_J0#(KA7SfrmEZJE@Ei7TZE%!WhBZ|oRO3%^3!Mj2<^s}mOjhN_XQ^4(UcRvONJq*Qd zCn{m`W(Ojle;Xw5T+qaLKmsk)rzUXR92!#B*Kd@LTz~rSx(Dv;mt5x2c+Lp3vPI3; z!yCbt6HH!y4uQ%da&NTkg61e_gfI{eb7z#Jmh{cpEKXQwu7D{>-}W!n29KFV%TuCGG);vNPkHi{@j7w+%x!|iUe z8lq8G9|G%4u97djD57_e_V41LAkoA$KxQD&jy`|>4D>%M8=HHNxD*CNMR7E^xl#mQ z|C3eodRz01@^7NVibGU;B29{AgOzbbqR+dAtTBoL-K2pI$MLhEy44y-7NqEuf zu=%s;Wj?cyk&wE6fFOLClm9LW|9MOE-_7y8xL;Ax!PK&aYlydQc6#@=w;Z~D9Bf{1 z_ofRBjuzSFUI6+fKb+b4UHT3D;iRI{pU7$oTGX46L|6miSLTZFI6x;%Ku`~duURv0FE#$>IT5&zhDsC+t7@nK-CAkaQegq;* z#)aZ!zIA11lfQP~fTzjiYXBThYF)9987V_s>0cw-;I>pDR7mj=W$qq6iLEL_t)uva3g#R?f3{dl zTB7?C?|`lWXlx8DEIUK_eb52K#XYJ&u8cgEGa8JZTfn!(zdv0{}5)w*hinQ1al5afgU0HVz=(Y=7s3lhV@T7_RJd`;Y{;SilP8vm6`_eN5z7q!B+oZex z!Ua*w6&OZV8lmPXqd9W8KY ztE$+4^NXnb=fP1{Mn+C<1hhcl5v)2;pckovWeU*wV5@=H%X_4JC=7?A68i`sG6laYVqQaz zBa42d%*oAV(ygSTr$KZmI+Y}u+V57lu?P@rN?~9*fBBM4r>3ovod$6X)MIobKir8TsN~J20R&HZs9)URTyqOjU=X;I@w+rqm_f6w0~suzm8?<0LOc$Eo{Z^~F5On+`TjJK-*fejxSN9(eo}7Gd=|}SH;Qc7$ zlEI!NBf-5T#OFDYr|!vZ^;vUcb1{Q`>{Xzk(ZaUD%iDK!54$h`$M}?A4qM~Fmc>|4 zuasLtCGL=Ylc%iJX(<|>q+cy!!H%OWV$W!{_5JME* z-{=0x7^bK%KUQmRhVahP(rrW{5N+TnJ==)JWO!<;5m&W~X4Z@`+98Xk8t*0v~jS*jGs6{WIG$zSo8oCbtv1Nx|wL{x6K$ z-oqKz<7tS56L_0~TMkJFaL?c-D^W(_k!mUcqa8~n3x#K$8+0EN(|=(Jcfa7 z&A6e!H_MZ*ts;@HdRXnt_~n$9Yj-~(HkADKtrwiqT5A}Hual$?Cp=Qz(BhoyW2Ej9 z_g95&GH1yg92}y}Tz*$JYJuLllEeN|i0>7EJCR0`ubGlZdU}&PGrJwF(Th;QZ$3lV zsM!ldunj;JX=xD-REwMkLpaXI0HgW)-<&|Dj)p#d4FDY~=p#VOPE!??A4DNj0mcUj zO#1QQ9|V+2IG=-MAKgv)D{_Z!8Rqz)z6&Q}HG$*oKZbf5C?p-nwr$T!Ov0edd0rS8 z_fHfqynm&`RW~_x8e3h$9KjBN_IllPn$)FZ5D-37m3OFdJ%%2D*O1-fQzq=%j<1C- z#F|_?5S^i|URX=}Bw}GcL;3pxrrKa7kxOzdO{2-eX6KU*{-cEEpAMVz;h*0zq! zFG=lQ!Vrn8%dM}rmH#FTyQTi4HKwGEoHmmS!WJHCrhVgo?Ja3o=&Wduo$93yx~Qd0cNKPd-@5)JWdnbBG-_v6!W;^EfjPr5eZyuNhi)HFfe zjYm7CXduW5p$(t=dHHhZ(?SVT4~LnL(U2A@ehnEF?xwekS8mzfozUnZwh8(!fPoYx z=^*-ZwlfT}g|?=Wf0dm*#tA(lZ*iFagH0c}+2qxn7M^>MLvAu3nbzR8YoYU8lr=!f zIgQsRcS->nl;+7}L;@07d-KT({;(SVg#HeUA^ z&&|*)bbc&gR&$ICZt-^wiQA*;e?3dW9(mM1Bt&~)-JevPm0+(O3*$6N=#mD1`O$Yl zs!3qdZokjp4+k~AZL%U?Tx?ewNB@di_=S}G@yZXhsdR=^UKEt?4yi0S*|guS4vc9o zJAMZ%JIa1vu?L+*Utkr@2cowNC(!GikqVq1wLT>?63PA8fwi?!W2q%w%vEXQoA~h2 z=G2!#*eY5dC)oe-Ouk@ZUp*M98rwdz*I{#CA1@N8rznU9bHSG{SAv7lVZ;M9x!P_@ z>t^d2=D69BucQD)0)e2{$-gYvcL5dvOUJ_1VCcJ^>**yI7jGo67{LMNX#1}XWWB8h zUtu+zN|H@`Mym}1D z)kT&1isq|HVQ;g03{&7;-q$>@rGX}-#e=+R&=oaK?59Jj)F%n=tL!$D2%_=s2`e*g zej6=Ue3D>^;jSMpeKhyn6FTo@v;Z8gpebo}@Ugk&jWf}lLi@%H3wrraRrGTtQCkvfSgJ!_=dya+bw_Mo1NWBOJ78_+V}cON@}EJL8lbd z$d6GD9+6-BdF}_7*|o|N>Rb& zf7Jcx>9NK|FfO_jDgeNns5xATSZ5{)vR#Zc70VQ`K8vaWG+5*m-+f|Vi(sH=a`3SLoI+{(DIH(C(3eod^q1o(WmpTwNt!w6maFlK^zVt3SAQ03AVRlKZ!3 zzo(|oK~P53 zh$D#+gbP7H#T)>kHP4>yAVhudlXSp#)$-~uA9PkAr(0RcFW5PZuG|xU%0jm|tP)0e z|M2uUHTb*n_{2o#gbo#M`_G?UGrSQC;9dvwZYc9zqim?Dof%=rtR`^6jZ_Tu{ zG&Fdao>|FEKjz{Bo?TZ;uMy2w* z_xAPOYkdZ9J3fQfw;X6Y?*L}JbX^ZWJ_YLE>(}+ETOe#|vFJ&L&=lB{&(6=;i9Wo4 zf7-Zv`_^k#OMJvbsl%LPnJ9n|v_lAt$C4Zl8axemWc z5%~#q6#^N+Yd}L?o!|HBYq;Xjuvh>^L8tfWo;d&lH+f(Bbzr(znVz2he}6696u32q zM_c-UAH(i-x*;usbMVv}!vmx*lXdpxtF)OdH{RUY;sxAfkt^HVSE~b0G&Mn8bNplE z^W%aEpg}shxkZG9v2$<;_*~TcA(6CAf%}XSMfJz1GMr%&aJ={Q@CXbH99TPmjtpej zhBv#xflUoGaA9F#u$r06!F?;|XJ;VpPL|6p0~{NMkDvKSQ8<;s>5W0F1TyA$0Rr6L z-xw*L#v~AHL4UvC8JQLqa?NJHPIMj-{-*bFlrihB>~Z?nbRLtpQO!zC5#Hx>L*4%h zUb&5?@#~Bp$TH)G>FLjizg?BiEtKasbbWfTRctWE>a?rwiz%(}KmeojwmeVX?dNhsZoi=gsa@EHjj&&RtRz$#04CB);po9AKkDS7WM zomCRG`AB)On1rezX?S9N&X0LD4>3v{^t-iN6GikP@B>MB8U|gZ9zA%1BI-c0g87ap zLfaS5U!zAw!1qzZ{w&>)*868W{qrw~2yk)dS5}@9g#884qJV($Ad-JFJ5&O=37!_IbRz#uqb-;Z+KiUKHNtZ^QEl}gTuR+V0_qEL$1YH!){2tgl0i{P z8%obtu)L}-bp$tpW=&2qWFkL>RF1-IX*pnhCjv=dF-bHZ<6U-IkQg*5px;xZiE7Dx z*1SasW%g__1RZc2eLvga|O{hF1{tAoU;6O^GXG#0WZbaKvm?LKDGC99n+ z-va93Q zICa48oRUL+^6!DaULK%NjD}yTb8>Q^Lcq?IGqka*f$g6;vj!+Xm?MC(O4Z9gK(MJ| zIzuB0lFo1*2t!^p&05#b!OfYWb6|Z!v?*v?YV3N+Cb_^b`&{{XM)O{|4CA!NKMj%o zBsn|KB(q#EzQO2*7KCnYY1g+N{@wM7-$eYeqN8Yx!`YOwj* zLhFl6yEM{Qf5N+eSNz~RKA?k|^dys%fB~&8t;sqpU=TAe7SQafjI`Y!$-t)nJHFfvH+TjINAF7Jg z{e304wZ>sx(*wB-;8+u=KsH&PBH=&BLw0#8Z#zHc*YxGHre==autw~BjGEUG%77Ke znC^pzY=*MWHUac+dRo5bLLYK4l&b-VL!P1Z6JgEk0;AEBYy*n37VElf1*MzcP& znzODBW~~KEQqPf_Cb{T6wZY^KQc@t+Jr%r2PG+?!tiuhPYcNjE)wQv~WS?jfXr67K z`t|i98@yE93bVf6PLn#+ZGhw^l(dFg+l4Z9PY~8}#VJt zyk#5kj|{ZsO;Zw5ikvt@3>9nF?~FZUv}g)@F~@WDue~vh`-k;7X4$mQ0Uza?m=!N| zQ)GmQt%=iKodTeors9H8pp%k!z(O9>KcipZQMpCWgzPu>+SW@=>G_L{S8kHjxsNXH zwP0P>znJ*b`3d!YNZiT?yo6Y4s{BIQD)1+<4lzh-KR8N_oU2FG9~l<`n=A_S`;s9N zvE2)%JWBa$!4h}`%%?U$6=u?_UWIxK^Fy#;hAmI)rZin@-E?bAa&V?dNtuGK5;Q6I z$OOhuh@ZiKQo>-E-QxGj-cndY!znn+%xb<~aIF8jb?(2=oC7}Jrr{f-}TD`+!OBHz&& zNV&M74xd(fpuvD)>F5bOGgz%ih$LPQmM7V8iuM}&5+l>bf6oXXKVf;95In&#Z_*fG z;*5mY6m;9(D-Eg?RHrqA0q`b&H8KJZKd$6Y<#Eq>qF_qAv`=V@*Z%idR&lNR-b}6I zh}@S;v9a;4bya1~T<>7Ak*-LF0)E?huX1%5bw69{Tev-Un9W1|`dFp=+maI)xnEwK zI>UJl)@^EXlKuuW@!`ICW#a?7m&){N)`2(!HtfK1fIVFMzgo`%V|DNko12>h-T?5K z;^RkVn#J~79WP{7<5G?@SRWqr6cZ4yjvP5ONB4zvcFwR_D3Ux@)C5S*Fi8Av+E)dt z?Awymm|Au*cByIl{|x-p}$hTb3G048L59q|RL;VZQF%IQH)7L1PQRujU+oeD0;A@LqzF zZe=f4R{?>@m2<@CENR+hPL;yRAR(ZauxOfQW>N6qzWQ!S1Y2F4XtQjQ%mRjgG6-xoc0g<}+7m z#wC5&$@KtS2L3`lVq4<4J@^?nV?D6L$Wbjsd_Y1xdsr>m8-sG|almtLtB}dZ=!5OG zO*4GQcej4$UOxqX3?Y8P`iA8lNfQJjOg4kt_4m}hjEIazp&aEpmcq4Gv;%`XBQO8I zFF;jWk{AtS_THL1YI^$Wy&&eF&573L0NXxVKNVFaT+xq8qwyVX_KCHxy_H_U+nomP zUI>H(qB+k(!bk2IS3h-BFcofvVag~1!JNTj2TC|=J$SVqzMq4OolNThPSDKylC`N7 zYKwcO9PGUyB~eNjkOes}=tW_4eUgFVf5iwPGW4{x<6k}YA?uNNNLjoR6-Hr@4S<5c z!^0~m{QlS(C+9VIc$;wAYHN?d;Pm^BD}clA#InlEcP6S{(&2RhI839IiXL+}JRv&1 z##-L^l~0x|!==^2`zV=1onVfZ&#PB4V+0nZdXLsH-%%nEfns<6Y$nB}WW+|3xtvW# zHz+NTo)Y^L3;U~iw9q&}V)BoyI2V}}4Ly>au5U^5IM-dGMLw{oc<Aj6y`S$$iUkIKCQtRiBA90}ZTWjxx z@Uhu9f;a~qArTR9u3!dBA2Z>9jY$4_{t#FKv!Zu>D-i8)dUEnD`UGeAk4y~aVDY~c zZu@y2F#TyZ>%>iTvHbcYJ9=d5NYMMzPc(mN5&uI<)%?8b3~bRLxFDG|!!0~&eUiOk z@*oCU;;>OKWpbR5Rd`>INGEf>x+QQjOM73Lno#xdTNV`~y zykn}MSGKUP)dfp#UXjP@yXaBsVrQFT3Qs)@=m9_`aX6qi(bR zWJzLwhyXiJ*=uS-OO39xT<3RU1JkA}Fc_I`p0rzTyB~gim=*t+k#Q4dwqQH}WLy7@ zLy>-?yEh~3$YW4Tz|k`4h;MvSI-tatP;HmWm5-7UrYb zP(m?^HVP3oa|$h&%(AgT>70k-HoPZ%t3a+{t08=;=~hn0TUsAD}d3T*;M46) zw>x@4^xutf{Te_gyf+(+u)c!A2Owp=dGjSMZUp!app=GL4e4N&(*kY`KI!e@y(pQs zX+wTGzR-0@6@26A$xd|m-0Xf)PDyH&^~Q9S_D;WL>`0hJB+2kbTf?J4L#WeZJ3`+0 z_isiF>-AY=`+1N?r9o=ACqyH(xu%bJ>y6gkf2of^parnJn)3XbjTA9g5z_ghZ<-Qe z%tLlf4LilJM3gRH-cRoq?Qi+1oF%ts+Uv2B&(^dv#>*H8cscFeA^1%qJY7+{-hy%q z_d!N%aulbvgFWh2zV&@9KD(;}M>AZjwzuhr$(s2kZCHYzbs4Xo&pYnw&D4<#-99a^ zs^y4OJmK@+eIrPUO(gP4I7}fy6&Jj?$LJ>J=B}&#kGWDxS9Pvl2e#){+WEmsviP4@ zr3@n@8ZwI+?|*&_ZUc_M7Js$6$q0*QATIiwN#VgE7h-%#483IAC*E6xJCuQxszGkc zuDuF{1TmsUYkV*Z553YbK<@C!KExOEg^Y`i(?W zkMagr*2(Yu$1BmgvFA1@0h{^RZ%E*%Dj5Xs4WwAu*Z@#M{wyBpFLD;{7R#mhZ~fhM zA++m!Ha{k*d!d$c?ZALKH)%JEv^eN+V{WdA|H8B+EU2$73c0S#X=LcsXMuE|cWtl~ z&93JI=AAGbJ{X<=ygJ+8c?Suw3m9@fZ@6>-6m{#jzWk#ca|^Vz2Cc0MfR6Q7JT2kt zL2j=(H?83O>aqEmcx*XXQQ_)@PLwZD8a&gVQsv%4fJ1jways>tjU%GRt%F zmJdMIGUTg8!}c!IQe6>?*I{I)V?aZr$Bn9RA{>1g);y0NxR7!HPPh)~>wd{&%NF5LoCagv@s+5@2I5IhW zW&I8$z6X?E3%#u-vYekV?xxnOSaqMwU7{Vk70~Ud=osv+n!mPr(&T2RP_*5{LQexO z3dF8hgQs{IIlWq?8ulPebK(&|4VTIvu;#xEAfnz?hP4`I z6MS=j+WyBacqI`CDC@4=(c5Rs2;bf%vA1dk@DHtXvYfk$~P=d7m z#gBIwp?6XIGx8ba%t1Cz}&f;oU1e=^Xy0MSohGTbqgZ?ow7Hicl2LSBYG9AY$X4 z=bLjrc2dbZFBEy8F-Nwzw3fjw(D2-Z!vc4?fr{rGDvBuTLG_q%@h`0y91O|)@@Wrz zvZZad2E(ZgHuST}WJ}kD`#G_RzgwG&F~j7>i{vzq^Ik6Fprh-T)@HYxRJ2_?SV|19 zh>$BklCKKg0?6M=m)E;-V&t?BrG`j?a$snv3XqZiuGS()vZqQ#T|J=tmUxSdl2XK` z&ge2iasaFzTLDm9jf4^9o$?=#07*J1`GfqFnMBC7;X^B{yq($k4)lFf8n2PVueM*d=H_b^|SNZ z-df7h3amjiBNb)4<&?UIhekO#%PjP;{UPW@XQ=db%>#m>gSX@Ewl8NY(l(_))6T_q z1dze3kYUi_&hwW~D_?HTv_3y0K0obNqQHZIrh!5Y;HK!kp=D+W3swC6ySG@Qnvz;j zo&pCFFCZ@R>rr%yio9t45D=;R+34qB45p!{G5sB6=wOR5pen*N1d|UKgk*m@K@!E0 zmhJ%1q<{98Ci>ZfZ}R`7fR#}8*KzhB4vsP}F5lQ1Zds3vmOQto56yPtiYJWN9{2bc z8y9E=^$`7*4%QnF-nz*no`{u2e25}u^6SVMB5*)B?PIcH?N57iP^^|zKMd+%prtQ- zbFyDYekV*0+FI7N5ihx+g5k7{gkg=5eq~#vWk6!qbg$~XF@AwpX(27tfg{IyLtb}bCaXL%MLE6XQ(6ZNu1SLb3sRd zg>habUJu&)7ldUqz{}wn9%H7Z9~dZd%$qx3?K2>o0kr^)KDd-3GL{f;FMiAyUkgd$ zEH31qZ2q}rr+5^vS=Pep1q!-?hPNI-IDG;H=ssnj<5IG_>PnKEsXTo6xCdx4+C6K%t@dC499ZVG>eSdv?BBfBBSo7U4_y zaD&IY^XHN?47V)UEFYh9hqap!#4rn@bnHsAv{DmU*qE!b?n_b;WHh>Q@2P(+s#N=x zJLPDkc4VtJUE!S)di>q~FD{u;roJ;XGK?IoS^wil!DR^}qGW~#E;Nq|fOGcrw?4JE z;^Pe5N6DZ(KoWx@^7tFg|_%hPh-NWNkEB`1GzT#!P11EW0H#7vd$ ziGs-=PL2cVWD-jnx8xZyHO_%6hjMCT@4~L9dkruhYbcz@AZZma(-~-!nR>!uUOX_W z5~eICi0fn?x-khyXqrgk-zI}8eTHs|en4YF>zgi}XcngDas-VTc*JU%ur^NWHIyF< z6TdkWSGa&@$-8k%6H6dOQAX+Ftf%+qpo-BWAo!e29kVjjZT9@`1%{~|5FJ6hL$q#V zKko5*G&=l|;Bte)!}gPg&D#;2DcrZ=8lzb)`!iw6^zq|INX_HS%Xq&?dgsm^I22`_Ae^UI4FjN8{9t0xN=sh7pg4U$G1O{A{mO{j= zgGbVYN8F^$81@A5YsQ1U7O^PH!2(gV+EO)D3nT>mvKHT~Z-a-8SMJBV^}G!6EQz&) zt~B9t_rs`PD9^||D{*Q5rYrLk>+=go8!z)Rj+6@4CN5z|S3WVD3i$y3#|jKK2rXJFpYAmQZ(d&@QxTciCU|28-e z9+!QZ@Hk~4=(V)VF*?K{LAwDU+&4)A6(Y`~DRIO2YwZCi1v)QkORG7_b}HScpIBQ@ z3}gJ|a!*Qho)y%!4)hmC8U-5F`xs4z90YC-;uF1>ziK?D$zFW{>J`n(6PyP-n-zu0 zq&y#ui4ewp1MKKoPP`a=ac6Tro;!xpF)ikw6Aw;U3gI1`N+kZTnRXz##=Aa*dzH^T z{kyAkY(UMZ*?96#p=l!48_9cL^oTo-Y(EPQxUGCw*FPdTk4{}~dGRn5cU#Cuk^+ZM zSKn-TaSP}|maF(Eq6EK*R+_w?OC}>rVthh9h6gpi^%g#y>Ja{N%HM_w?MjC7VHp0@zVza+cFsOC^D3zsg9yJ6cynlPMVn*r?N^Mmy*5uRHmzhT9jk+kAXM1sFT)PeI2DU(&tysC&yDjd8yodjKn)OkoP{#%C~^<0 zqm-$qi}w$_uE01gcS&J&o_D~f*2?9sRn~6Aly;-7NTiqht%b@5Y@ysn$fii>bu8lQ zMuuyzLEPZRum&pfU^EbPuw`vCA|>CXT2oHsGReGS4kXRVRQ=h)WT~!S z#yRCoJ&doJ(gmG=IMgQJ3B|qOX4ceHt*ojjsbxm>d#jW9P~HeGnAi7=XU#?vpBny> zjLJnA?&4?|{r?O5mN_)aD2RL=Bm7@Um#NkLJI-`!J^>;g8zHa9^H-1h>p*Nu`@w13l8mn`!0bw z*Z?RFDnnDvLEO{TRV4erYTiGy{B0LgG)wyiBw$-wCZxOhc8UTvp1xZ*zch1(o{rOS z;IYU}VLUkhj}{A`wjhVgP0X?qwmy?YyH(@0Zi_D&baRI<5jQe}n@{dy@%6xCb2D{^ zFZAzd^$d2wC%!l#DCam2$eq^WMQBTMb-mQqcb&(=55!|7BFz%h!;J^Og{&FKD`M1V> z^Xof*I0~Fxgzt~-Ko`eVA7lS;;wngNTFNI{hTgZ&$>Ty$x{nby)Kh)blN~ksr{%Nd zUpD=Gs@{Ka|K@1GNPvuxp;A-tfQMH@7^VxrDf=Ys`yI)FSTIqdWZ5*ldjUv+*;(hC5lra z0pWX^hhQ`(LP7EV%_LAC&z=Nz^LsBTnHJU5+1pGu7Bk5e2rZ<>Xn%gOG6$%?3$g3` zFTc3y&bCE+vgt-%0iXU}SJR+FNt~R#Du)Jv^3wC6DyQi`e1(d+!^4bRa~q*OejuDC zAFR{I2A%+{|4eY}((Nl}E>>P99^-|RIb`C|&c7YyTxaBKLp4LWz899O%0?ZySe_Sb z7iUfRG1B+gxQ5U1GC0~@OM!0d+d2pw0F;wbtj>RXd-P09Km?6%YV!V%B?G+gdytEL z>wkc8A?NaeFEB>H%L1&k&96T=l0=4w;Sv-Y&D71WpKO8vgptO3z5ay)YE%65Gpt6Roi)E%Olx{MFks11`2~H$ zN|{l^WkIhD%r-|^QcR~OpKZ&_^$j@R36)zHXlph-_tw`^NUU4emvS3vQb4_23SZE& zYhg+Jy|nAT$sc8;Q<7I?;5gQ|fa$a!216$>+->(e4Wt&!lF^Sb+|>$hi%$J>t1ocK zbJ877$;JiwRa2f5JPG)ZF78h0B*Pt6t6+HA(%C zaj)13_?3$;F|ZG`c|G>bU>X8aK)jFcZ`T>v*`2{M5@vb8vB2j(g80*P_Lq14DIk}U zjg!-^$QkHJR?EMWmY0`7v^q388vm-z@dB`Yz^vikLY(EJ0M*k^RQ&u?JvWh02f!2q zY%KaO`1=5(s}4qAK*mu@=D?5~K$QTBJ&YQjH+wYzSP3ap9IM;`nd{)*^S!C@4NFFg zll<%?DcV8xh^PA&)zqx|5ABwKd zmrEP#GZ@Bid}tHVwmJ0&n2aVssGv^VidXuJAU#(_K~70=T8#*W*DhD3q5cCv?bph< zXrzj|N}%8`;a`3I_b5WTgW7zBKtr&br{hY$$v`$|Cu91C>{2s){*9|V-H=bCT6eW( z!P#UF2Js^ru{rv1g!=iVq`(rKm1$UXE+5l0tE~{d$B&E0kB_~Zx=GJJElNeOLWpNd zxtH>hu7iV|JCgRC7uXr-z!|wLjCTR%m zSPcxoSxb!WQQnU`@>z?tzvCD-r^n3~aFlMsrQiV?4PB ztTaf_+{s&9^cQR_Y#omJxirmljqsQ>m=fJjmOyF(nQ$<}OG`^D93~%I0xA|b7J}vX zbgkVKa1hMhHRBQzR(5v!7Iv+zjV&$f!L9-|;Dxm{pT+M)uq?ELY>U3WNTqya1Vp?v zH8p`P9SAPMwR)b4LC~C@p^xKa@ta828IC-hZ}9uR8?uI0K7vW zyh(e2h4Am1M(?+7^QocPW+!5&1X4^s835{=DCn_xOoa0;I2*(-8tZbD-?d(02oS%i zS=TMl2<6kSnRgNmX_r7`XSWR-UlWx`JdO#b?b6bOyo2WhP1PR)vW4G+*9~xEiwd>a zPh`>zdop1T8IRhSXJio4UgCWbphA0gb|XP7L`ci&(Y&7nq=?+~LCC7I_%llmYa4OF zY%gs+C?!Ba`|)fO27w?V7|~c+osJoUhdsWETYi7^($gi%$2C6AIr6>13)Y8KPs2;N z2t_1R*Md>8&V5S3Z4CyFVn=5J9+y5R-_k3?!Qc6f)~j5W-8-P+-4^ulUR}(E0M)I@ zdIfEH!``<`+Byd7&97E~Q`Clw5hKdL$h@@o8W$rzJ}E|9#UQE%4iT_L)eMi&`xLU> zeagfRwA$kqE}jGynZ7P_Rw8`6%o$XW?KXgr8PFRA5Gk~`OEMCGg{9_Z^0OB|KvWMt z(V<~s#Sx;Qr$!gc9|9eHWJja-x!bE(%J0)X<))_bLm; zl|NfZSmZc%y#n>xfzxWeYmI)p3=#w(W|hIei~E%5l%9PGBItumgVz@@BDTAKhgZ<& zp^$xhe5mkZ^#@xFnh6hDs)Mc;e7ol@k#zKQhLzJ9dozn+l}<{cs*aM#kUUh`hlY5I zjId@hR2}+!ipqHe{~cn7R8a@x`6o%tJN{JgV4dl3k*yvouv6$BCn(MYBCxl zjb^+JPP2<*YT8RbzSUy`|8RJ|^2Kq);N(&+S!LG250)H27xM}ZabGnXnfk7ErG{y% zX+G#1K>8!S$>WN75Qehw&2Y43p@<0o3GpiEPL~$B@*;;tNm;d?4j33{|TJ)O({Px-n5vNQhPN zYpt<^&8imY;nwaDNC0f0Mxagt?hMlB+X!F^0XmQ*m;1cINm8#+lnNIN@sz04ebaj^ zAb5gI6z9>SN4NYwCL|=Jr3r%E<2EJ%AkjrH0h$E;v>yawMv@6aAPA%~7S%%ZDFiJ- zjQZC%HOUH6lSA+vBV;8+HPAi-74%Bj^Cc-x`{?^!_9{ac_x$Qin7}+gPafCa+#a=Y z-x#q$qv0~aG|9#tL?1N7v>p5HCTre zM{3NSD`xJ9P)8&;sG2&IS7>s079?Xgv3$`|;J4@+82U5$ML1>9eKdiD82uv^Uht^^ zl3xmH1T)?9e7|oTjJyHmF;bbWhS4E}eCGCi<_CG>@`fa$k?KDj3@<}7(Bw5WWr!zf z|BMda@k@o)r}>}sw0!3HPdyRBUbbU?oV6&P0)%f!WvCuN+EUF?s`ed}*8Y%n-gvAv zUI%)lz%`EbdEyTjfsR=FX9X4+Ky1(KX6DR@Y-=?k!$$wdvUB{z{~oT{mqNt;Pj~jNr8EKQn=Yrpgq$rxSIYS+DNe z(}p>eRJ5?jQWB_)mS(aMBWCvtC7%=Tn5~m`F_=R-uuNBe%j1pb;XB$(@6B`TF8kXW zqp9@X#zTTpuWs$ezZ%k&;wkdUZ1e2C?sttT`&}f#vdh~BRc_Pb5>6}MSR3Vt4>r`o zP()I#Pr@DJBfK^bZORv|Po%D_Aw8Q(yP5@hmd3`;Ms>ZPd0iv>Q+9xC62kCl-Xyf> zmvV#)(?mfLC5m{aALNwZc2v~V zxFOvD`XHs@ex3(H;dwg9)d!KYKN^mcjSU?m{+Gl=fn{Z7oK|+Wwu-s3A4=7s z9KK7^6bX#r#=YM`UvD-@j20wC-J8g7xA_Imf4p3W-{Vt|zLpr%vunPT*U-KFbftL4 zlb=EVG*u_*VHh>uUtxK)=8{L&N&o{zsj1T!<3YF-e(--Fm&wq&d1+5i|7zOu;3x4D-*NhGHsZsr{1qbN)id{+ zTxK7B_baEX?GIw-fp3WZ>U!!j$?tADhJwlW}j4GJ=Yv8N! z+OqoM?0bQFrG0qWOk)mnv#(swGZm=AJ&#`nwVo>MB8$+p-K|uGr~$Ym=%~n0H>obu z7~lm%`Syf5_8MuH{;uuS^g>4Yrm$|r(+Ra4GIBpuy=Skmr9#UT7uo z+4YpMF-Ms(683rmuocFBpg;m6ykBr+Lwu=T5SJG(V^&&2(T}9)#@NJ|!I5D-rZ_g< z^XtI`-s6MO+%(H8@4)Nwh9Yd@6QxVqgBP}Fcm4lny#Y3PZ>I|H^u)Kv2)e@NAn}4O zAa+!kue%CaefQv(A(1JWR@(8L5PAwc$(KvENIYH3+G*{)0jLRKjF z(*y!tP3;l%g8x&biGs`xgyCuFA6xZv z4U{_kpOS=14nfA?=?q@@aTD(1{CgBK3xXF^r5{@jAbbhu5t^%X8YZkFf4z?0aZyi( zyND3IO&YnBlH&=rK1bVTJbjQelC`_1puY_&-$aBw$P|ICt;k9{WgWz?E|_#z{tQKr*n#l(DRZ9UZ% z*HeI;Nq6_~NDIf<-H;WXKET7_0`rliR`m7B0=`l4tg(98fqDf~leh+T5A^e&N~lsFaB5{TE}6ZLg(dvmzf?N`Ct zLZCuNO8tiqL{gnD&ihIZUs8v#jtUpDRjP>um*2y|;XZp@Xn3^4+*_dLUK#)^Ei;KlMy14CoA#lb#tLo&%o z({)WtON&&d63~0h&X$qZu4DF&j&G#;RTmZ;F%o;G!L|%x4-nWFAbM*l0g>DA0XZ8x zy9Cu=Xd&QO=7Z=kxb}VNG>DWc_qsX<=^-+-r<#JL1GiwIZz7Fz0@euR|3wh6y zZH^D6l(cjg4i6J{?3AM zrZD5Ottu1x7d8JER{ekWu7=OzR8y{}7OsXpCD&7YRfF0|Q*4m`bF&u;$)8)uo;Z>^ z(NdYFV>>C0ueS*BSW47rQAMumr|us*96G2gW1jt4n)qN`?BrOoH6W-U07I;=AiAgs zv{a%anwIvzDwPFL?ez*hg>X!;i;47C#k=}ne1sK9q3g0g(9?4vr>Z;}`3d5aRz7BxsKxFcurFG-Pu%Vka@)KQPy_77UCr_4htdqBej=r8* z0nv}(=82M%)47g(GNu&iCt|n`H@5$!7#D47vNH>;csl7kiKRWAWmM{D$!A1mucUD#@cIgTP0eW$*`Zd3PnJGKrVg`Wk z1@=W~!L_S^2MMB*Z&5UwfzTF^A_&b^oVcGTrF3*CUe{XCxr(;;b#(mq`|n+nuspg4 z-=s46a8f+4c`IaUM*#LJC^T39sW+n*HuA&1qeDah34x0cM~)Pw`Hf4>UGOzg7Y_p! zGd@FoF7`=BRBUJF!{ILfRJw#{t(;S-v@dEsiPbjaCjI<-%u&XL|G1vi-4tLnYpvcz z>gbvPTv%U9bU zcdud2HM5wP%3uR2eX#Pcjoq=M+K4A~yd};nBK5!iX~su?Q%kDxxcFN(8p^$CIlr&k z8|dhx-*g+Xta9K#Me!T4OVm_SQ0B7g-FbEH2DcydUE4?K z;%ZjyI@Mk~BmUXjy~i7mm{G0wU|nOP83);?zAem8QLd@E0M`Iw(#h6pH4P?5B@J#Gd1;~ z)M5dqULk05*q`mp|8*YN=tlMO#tYeFvVJP*W?f@;)Y7pRf+Mz528dSZ0XqcyS6^Nq7|HTcF?(8(jzxCoc6t5ZG9^*bKRrE(2|zn|2sWms z3O`#ArjLfucWCDd4Gj>zfA)tOuXAinBY7ybBO1KcO1ZbGgimOL({nCYp5Tae3_o7% zU?^q9ijvOk8YA343<0zcHaKpM9KjpNgGh%nQ8Sqlt2rwxp;%HWS6L<(Jy?AI;d7;z zFNMMm3+jd^ZVb5XvcgE`ca|^_C8ZpmUI#ArS8A35hpS^7@%VxFcV@)?=FDqP{cCwa z_uP78N8-xmj_|MtgCarZplSZ{s}4uvCC^@+8uAN%#jDh+4=Ludzj0+U_dMFZXQyvkGPd2{ zVqYKCf%y-TVl5soPSncq846+ZXeZA?q4{vdrnT1M?=wlje@AwB^AuB2(N@}jnvP-d zZ~Gj`r^QTF+xG39NEpgFjd=cYq-?VBvW*Wp;|NKU$c!cmFEXe}+1-n*jr2cJa8VzA62Bit<|Ou3#|tA&`n6mic1mSkd3r+o zquo={+a_@cT3`Jc2b+su9|P0Oqv+N~SyijZP6Jm7=#CbEx~mshBFz^kyT_-S&XPZr zAQGM$hxZmH0|>Bf>NMe3;OxlsVB3AghBfxp^4CAmS_QoisGls&hoMbx_ZsZq$V=IQ znxInEDCE+dK6;=+tRLQO^9@qiyHNzlbCsQV&L8MflJ%DjLO=om6#)y@h1;**c!CCp zA4dM7WIV9avV}91qTdqf)Vpz-^iv*LLCp<)bU5H8Ke&sdqTV_$8ZmyJ^&d($CR&Gc_wV8N`dfso!!iW1R$c47zc(*SL6!A zxP}JD44Qjlyq2osg1|tqI29fSfMgi2*F?MumF)=W43tK(MH-TsLIxd5N{LuYvZHIv zwB_jt%19Hk!pR3p!*uQO-_ETkRmTPO8L;(y{Q}>~QjGt4SGfKjt+Nh7WXs>9Z&Pby zXKVUDUue$l)opyVy7Jp<7BpE8?P4C5lN;tY-w%FgqUmkKfH7t{g3G1-H<%ZQRJCU4uH6DG$i6pX*3dN%Pm~e`M!puu zjQ^rxPy((F<~M*m7_3gj06ZfkSFh^{svHZrsg4UPXg0;S4nO{4pza~Z%2Ic!lB~?HxReW$-KyGPtK=8gIWQbJ5 zITBWuuK4`Q?ITkhZ^GVLMUwOrzA-I^Dw{Juxa$)I{&Kc>DzBr^*3zG<7W)Oh9p0G{GnlF-7HMo!^hpb*Z!Th+vjZP3|>j57MJ6@a%Wn(($-1 zo3#zPK?CXm`mq%r*uoWGtw_7Grw0Ub{lmj5Gu=QsrbQ^Is*YsJ#ICR7Je&pNEfnpB z*{w7+3EwudRohFdxE2AEo}Mncr$|STPNxa=R>pl41QNG?0*^DhrE36R!otEdlOaG3 z0=(DfAK;?05@Ef#VuT<$C>)Lww6Rgfm7$J$WLZsmqO>p)vw=La&HvMB<-ro6{o5aL z?CzJEH9tvt*ki0~NKf$I%Dg(I5Z8ab7|rL#TYznONs0cmlEvP+Qpjs=dG8*#*7O6v z)Dc07LQj4UD~C85)F+SRtdEo(M|`%#JZJ4r1^jN?fx>h@f3i%~8(o(?m+_u)Z5i(Doo-`$ z*|gC!QBb~F7jAhaH1nw5jjysO&2&D;*NsI4i;F!{&1#3&LzViPfYJX6U3JB1!kwt-+BBkLO2Ii5yH_bz3`J~r4s~c-MLS9X`1IdL6O7Frwv-L3qVyPVcjuFi(npby^?vFQ!U7?-D zREt?!Zb7FC5&{mk*n7CMK*;zvgJ*f@h#W5NfTq&O#_hIVB16cc*-#Fk#E^zCJUWU) z#3H;W9g)7eHSu`84=!ENVt0X;04TW^{Dl%+~ zhWdF5jHVki(gIG{h>ngxiKs^^ z67miA37WUtX|)?KUg|F*0B=F~V29fB*~x3FCX3U1p097kpwKp28u%)37>%sk+Ix*U z(a_~J5A9ZT_)>NZ{S(${dLAQXCE!9&DD#Bqmo0O$uuspM&x_on=Yef$QL$YHpCWxJ z{HTh*vV-t4wl<2&m5rg;<|bHXnxgj|A1XPYH$KGp<#>MC2^qteX2^RJcAw5*ph+j` zLkewjNQmj9)gJ}*hg=6!OL3CeT!cTowyrE^FnYbwu$rlro+EJM#V;o?@wyGzi2c5P z5;yD_YQ)&z|I@go7UX|0gCtRkCXWZPs zLc^I4cYFL-ht}J)pY{F~2K7~v^G~%E4-aT81fzq)cA97c)JfmmDi~+Xg_{(}^Fw3A$BT zCMTcWL8ebszyc%_GPh`jn7#RNFKc6b@D^I$pxXyJi+6}uwC1s)xCf#pQ4m<9Tv!o&CSr1Oy`XhgL6v)e`dUT?A2VPA zs`HqbeMHm5#l<10kcfl?vL7llG8TdNf&c+X_)z*MPl-;YRt&~@`bNF89gy{coo{~v z8w+c5D0dLR;$azn$V{bulaR5L`yg06EUKO5bNfQ)(u;;qkYFmEsk|^b*o&BY=j4Cic1FO7gI8JhOr_Fp2 zU!v69d0sz-_CH)%%BHAS_xH_@M95WxIyu<6WMzBu@^VoS@Cc7LD0uaS<eUpd(%+mA9jwM?b^<#kf3)=7LXJ8X>r zZ&yI9)1YH;^!eYD6gi^5eS=3E9#adtH0}HzWic@e&(zO<;L&4!rIKO z#+{qJOw7qCdRM2Xiyy>or9Q)h->myAn_KMx2BLpI{>ih2sje5^9Cj{{C^We_BSW8u ziu@p!7XAKl#AKtKhE9SkRmJhqORqVjS8`q_{qKnaNfN5>jcx@u+_X$gjkW%Mbww{p z*+e~XLenjV3ri8vj4OW7NyUZ=7a_$2{Kf11_x(8>^qB=ef6du@Gx5-cNH+|)UZwac z(?dJk40Bu1sK%vGPC>!v@bHn-&-zwY*8q$xI}MGFLQO@~Gab;KKp72y!qK`%7^wek zj+He!Nkgy&BuYB`kAqMiBY*>yknp$yC9i4az_=$N(@-6=ndQiIc9sK8WlNA6PbT3d zKX~BZK1FFIZD12eMnd)xr!t!ye@;!w@z47t;Viib8z;H$T=AfV1afH2m!80@wk_EssG991XA8< zHN^EHvnX7wK|v3R`KG2vl$UcD*<}E*8v;cRn~Mb%&_<1SYCn$hq5|SegA*L{73g|j z&^-td?~-o=Ku*nyhiGzq<+NADh$b-P7_eW%H}(EucS1W6i1&yRQXUwrT`uL1{9%wu z;H@l-jt|fJUT{M?<{BW`t5e3p4paF>ZyIQC+63AkH_Fn}7rv}E%+39)_R`F7i4X?P zHgKBR*NzCUg*=JuLIie3FXEyGyC=JBULJiBetbIx6}NNSb&O+5-1s=Bt-?2fl_MLv3k9r4)lz*m7039y_q=XljJpl0EzwZFJSP2`q zsg>k+ZvkaY*S6ENDhk0V$5U<6Mpew6Xc}zm!_O+zczaHZ)5~77n|FIV));^w*jy6I zCHSdKGvT>-S?NE1E$mdBB*drxQJV`()m~GP=4F%l`<>oMwe9aAzc{`3=>ka1;j${j zwOQ+KrF*+sZMg$}8|E&j&0AL^nf)FiEMhyh)t6-0NQ@$oge1WC0Agu=b}bW_LGH1m zV;syrqjUD0NB1HB6wbw+eWADAw0Ia?ghu1cMr>fOA8gN9scSsi&AmbjwPaW+zuYoy zPLk~VVI=@hV}e<4{M-z-L;Ex(IzYtRS%!xGm1KoqEs?{S@s~+|QTQF3+iHn*V=n^BSO4Z7v}6tHU{9jDO5};6ZHDT#?~eJ``)5T z*Z0?R^wDKxOhVgdqzGXKHA$L*k>l`Ql_phWaant=%f*4D%u5!QWpD zpgYg2Y$+|6e`RSH7~19Py#JpTfFTs(sfsjyR#Y4v9a(M*!A`d8!8wqr-%7seEx~7q;IQiqzjhQTMVvYi3?j9U3XK7I0Z#HkoJ$E4G{T6{kMl z`TM+Mv@F0gy5?~TZnjZp=hA+x#iVZgH4{<>rBI*XlwcjjXNpUivZW+xi2JqH<`odQ zxJpP0n#nK{@JgnOk&@rOuAf>7pLuyBQa72jEQhIEx-=Uo&<=xdKC4moeHK*rU%Xgr z2TyEqT1#SsO&<lWiiwd%$<@Lj`KQIx zky05OPUP>ZHgeWqGGROS?YuqipS$-Fn8$oGuRAyrtTOEQK1f(7q$ix_Jdhqc3Llvnad#R$M!fyDdSjVcT=&cti3l`4W;1QrRP^^^n%fbA=PuHlK zs8#l0yY`9{&-~JicASy3HgZ$(r8A>c>;m|>YOfPpn5WK9&{Zt*a*C#Y-h5M_Zd<^9 zal8Lvf?*z_%2#YOyMDlqW@TnJ4q}!DFB!G(5M2s6_xDA<;M2-M^&Yg9V+Pnk3@2KU zysAA!N7i^=R7z1pRGF^FWj4JPRx`vXiHeFUDSh3K!>5tm-P<$W&SHYxrH~K^d@^ST1^egg<*UF<)}Fbv7$ObxZ7!92QiEwp@v2+U_(G81K2hjph8QL z5aR(YS`gJHm&D8Yp$!YlS_vC)$;b>@utGhrX|4^w=9Y?#V*7gMGkVeo@Uwc%!hkTu z4f$FRuXg_qj=qRccsq(fu&{F~Y2;Gk&T@Gr`@1>+wWEPmvu=xq|H0pwJ3LoeEuGHk z$O+_clE0kIYOVe%FR3?R7QbZ!?%%-FDBXoI)x^?iX>GpRYH#RyD3{K!GatDh+v);U zk}I=~!#Cu;x~;3a?u{-{>vQ@y0{*zcAED|4{EJLCacT+!_KUJlQR5?KYLr4au%0%0=KW_{|j9n`1&uH4GnOHredBCRdZ?PxPAM+-;( z-LGeaGHp0Jqf9wtYSE3D=zim9=UD4gOY=QwWvUk4%uM&pA49m|PrG|36nrUXMYB6x zbB(08*dajkkX)CysgsKk`Le!l)+uSccJC1rMCSkblMp3pvJ(P1{eIR#eh? zc(59;T}9UHlmCM}wDTnykYVr5EdHg1fhdwjY@LLp6>j#aX~Cf#xCHNyHedPZA$-o; zXyUP+JfA9&qE6?>q9F+G(+-Drf<#U|7n2g9Xwev{j9}9-!2B8=EKekk?0C>abb?)# z_Y^Y&Ktfw#=XpuBw-+joX!1!;LLi8m5~}Nf5kVkA(vj295uckEM!mkp&)yU7`r!79 z*Fs0IY-xdpI(w$>1{Hz(KydctEGnm`9SI2`dsn^DCE@ZL_$IY9-u+K672pJ89L(4u z(hSZ8$?Q)&5Q=M&lsp2)fF^`HsglOXj*eywni}a>yN=0!q z?Qp=?^apndc0kG_+G&e1u{oAJv!gK_|hM%wyl!P*;$ST>EY!K15-H3y>(ikYk zXvA$tpTD8RHZ9;4`R$+~r#wBcvSDvN?alrSNuVCwpy6k8Lf-f#8WbN&3IrfXQ8ZcY zGJyJC&8K2lg!_#RXoUU^9>>;nA<6Z2#uLBU+k0n#uH>?F*%w{}GSd9fo2%Mqd>v^D z9UFx5OM5UE>oRH9w*MLWB;{kiP7;STzWM0^n!N6HKJDpS8+&_pE-t8EnVsKL@#uaw z(bl#McUy$dvNn;;=GGRdYw~sKUJzh{oUj^zO5hLif|cLBv$VEOiH?3qKv4evH!^rI zj@qs6`h$=j6b(?~Li*r=qLx--LY@XyxGatH+Wy$NF~Fh#$di+iwS|+*TzBa*2aAIk z+k7tmGbnfJ6ae(D`JI=NlH&G+7!n1zEQnJ2bfEzP093fsJR&0cq^AyL51=@MzYlNT zKRAR^cpah8${TVzyDCUuz9fNJnR&Y8L`6C)sp9MpMsvBY_05wG12zg>1jp-`j>tsS zAv3(HW|q(0SE)6K=92he;NDG4&Kbp|>0&aB8-c+;=E{pE5l2q@pe5wZ?3_v-kzDU} zyl&a;sX*YF7+?E-Yx~=0XRxg(Irp8{vB&H&=>~TEk^TIsM>#CdUyrCZ(w=^OXb=c^iRtqZh>!BRPi_vPfP4waoZcoOT*CW0Ag}$|$+kS6M=y^5sv@gluFBOs zLn$6)TckIt<9~3){&sX%?Fe|~+U7?DL5Q?SzhNRyi6H+Yz35?Rn6n&r7Tp_}60Pui z1?mt>@9&+=^Z@52@aVa8i=Y{j@JxPoK9f5h$!dH2kpjdNS@rEZEe%BdWp6>8O%(WA zD36(wYY)I?O$F@+>t-r7T0UYw7y;3h8iEH+hf2alVvKZOH2S5hzjlqbgPe9Ar@2=? zv9gjm`a zWACtR9PC~pbnFQ4eSHT{|GZ~pA?clKfC@}|fA9a-qS7eIPF%h#QXigFdrkf1Syk%k z)P+8(#A|Wncvi zA5%}j-Wau1DDEX6Kh0${1-seZFaXMJ1``5s?l;iGfQC&bB`==>q57X5JXL2@xnapwUfU_;KF?#K-i8 zM?{Ne5ww@SM|3C3h?tI5Og6lG&FE{j?c_AzDeYQfn#T)Y6o(Hv9PeR z-XsRDyjv(2w$(6fxpXY;!wspPw|66o97;>US~D>*Uc%)gQHaZ90*%rAii%EJ~YDbsy&1sDk#!o zq8c-m7UjdblPN%Y*V^e2kqF9mY>HA&`*?E;- zgAMo|zPn9Xt;dg}RSW*SX~SMCGg^tPfGmEYYQ?eR3ZD1Kb0>GAR^-VGZOvi+YY@)a zcXzNYQCXuA`;xW%OicyEJg{fPN%leVca|YL!4ls1mn5yzhLXdoxvNzU5;$z53a4Bb zJ8Cu{mn*f}V|DRaSJK8h>D#h>b;h>&VgF*>YuNY2-O|YDE=al!zqj=G4W! zV{LeH$S)NIZ8<7DzPlI9@q(4+L6pJDs_me4J8Aip+kI@UJtH}WE3ywA)h)$2r3Wwb=79UOcI1Y(no?yDd?YipGCijSkwl@(@{WsK=J8Or@|mvnV) zI^472s0q#XIxm42ie&&$ZjKx+|L&n7!5IN2e+T@i-v~rC--M+GYyQt)Ost$6@4Rf` zCkM=$w&2X47L(z+Pw@O5qCKvyroHzsj)mYqBsxqYqUfv8@#xdt<8?B!+2Q>((AfB& zszFpr&EuPpsefQiy7=fACuFEBl^QYWKiJoOwO(oov#Oxm^?Cl{=f(2x(OTF?J7cD; z-)9>1JRh#4d;)oLNFH5?o@m-9xgVmqANAfJ)ddsvTMSYLb>nN-xjud`Y*qd|sClVy z`go;m)LIf}TOd`E)-XQ+BvS2y?gyt517i%%S)b32LhlH&pgw;j(fuH#MI2dV`?^10 zM(RY)Qj22IR8(5j?pMW9?9?qb@19(JHEW70Li2fbrb?@wc{^R0K1#qV{-d6{iH*D}I%84XW$dPIq$B}1tLScwpMDM6>}S8vE` zOEUCfvzt)siDY&R5&ycBi1WSH>7=D-SfKW!hag+VMeKOw{Olz^)}bSq2%h139hC_L z$fm!24*v=OT!}%U^-Q6!)4nW?a_7oc+I9c_uwo4$E^H*ATEv&Uzd0_b(9i|}dH|tV zn=F#?+B+^DK9s#1#*7>J>7$o`Xk+EK{h2TT^g!6e^TJrFu>=qZx*1!5FQdLb#|ssB zx9MH2y4rY`e|xvAiKQSn_c`>H(9^GjR}a{2Fuo+Q8^ci(A>D}*FMC_bTUB)e9OZ3V zu5m9JC~u)H4op!XNdqAUzzFQ@>~Qa_@q3!mmjX%ZKN*anRt6`1^LM2|leC$KX8-|HAYhYdP^>^qRf_x`8y&#$OclT6h>u*)6 zXni6CF69ICV=lr2nh3k03VC^ZA^c~|4=YM=%4{@-)Np>_u;}1 z`gaY6O5KK|{ojaD9FeoUW|?7bPNs1F;C{ebjr zNrij6746)m{QfR7B9+b!uE;F`iQZOP(_Oh<*%C3-?~l=JN3GXjZb z-paE|;Vo|@c)1otaH%;tk3fSCo_2XTxi%I?Iyw_*OaheHM2;WNj}jb)peVmj!m$d< zbLgt}zT9h{YYu|f5$<%*O8`;wXd+*#n4u#UV@F!0WeSk{rke>u> z@njDk`06g(Xd;j)o7z2Z+9U$+x)o}ZSDcv>Lj-tLko^Sni(WC@p(-sj277$@N0L-? zWsU0SFyf1fPTbuM5IyT+2oN36V@t@tYc@K5u^RpmM3EsVcdCvX>S)}T2j>Da`9yf z9h>BqPx}H62QVY9I?($f1hJ(b99ZP`D+>cfT84&_ZJq?U18mw9N_z}ggaIA@Dx?)Ty_c26`cYvbmyd(8hD zH~1My!sZ5JyKmeuU?0Mh5=ZtGrfg#Im+O8(cg?=Pp69B|MBej)PNVjq118qRSg5;$ zb%*!1>^%{%eFkQ#(`rfkAw2kXQ%Rgwc?Egze%e|qX@-LYBB(tgu5o)HCDGjp8-Qi~ zU~yfw>?H;reNCs{3o4RmUXZ=I-ot8kA7!?&|Jm)Ti;9$*-Tae<+xH22D~kX*4XTfC zauHQkPGy5)>2%X0^jPa_F;ezW=Jeuh`>lUYK(27-Y5?qZE7z+=@s~>!ux){(n|V6p z?z!9b?mcpt(C&~E+Bbh)qYEVVs1MBNa=^r{b~THIiPT<(6HsbUk$%3caQ0Gs9xZx& zZ1Y!e{_HVQ;2cGo<(38Gz|Wb8?+x>@CC3eg+O7Jp?c92&svp6)3m3l#@_``Xri~U( zf54pii2?yyHV1M;mWG|5|2CJ@?rA6UwiG3 z+4;-JkCBle>q|CJ)zF>LsEzPNjDc1{{AGnBDlY2%&Ed1qDFDA8kA;*I$@PdT%ebn_uAi@uy#%L^czSH@;98Cr@TyC;L2%1`Lq0LuEI~<-Ww2=H z!oPHl?guokVy*%EQ!zu`xCpHWxw1yqA6N@?PxwmvUi8SqdGqG;p$1l9aH;SAk;=uF zrujBli7Jg3;}a%@|KcprtztIXemSmDN_2R*dBR34N{t6S1Q+Q_qzv|eITuWd4#(PK z4gDl_@kpx)eh{5E_q#s0`K>i70^A;i1rwedzuDm!=^MY}{YnQUut>0v|GMaa$zz^S z7IY2r>h?Gl`oV`_ZhS*V5ij0?@IlZO7W6XE&F!i5P?n2qn-9Jye{;{1g&>O0^LQO^ ze!A}Bd6;xXCCk!135^|0M%g#Ap9N)fAdw19ULv^VE323nD{IR^l8&UD)<1loe(dRj*Zhr3urvl|-|e!@?~UI7SZ@9eC0`1kMMwST3M!}$S>FZUb? zA>|%8xvak?FAi;;orT+LPHUs?3M!gkHU)d~eE zaK>nvX`&nkZ>#nl`z0o#CrE zFVAU54UU`9l1jxM#%r*sEf^Jt|2ndPi#Tla!#kTmf@}+UVIaV_>2%eCP2`9D{j+Ad zS428Wh#p4gnlk+zKSZGK$3+K%W)OZVi#tho8fk)Z7{!6Pm#2CjQislnmw)_f3z|EU^J@j_wTbrd=M%0j@9SJ*Yr(i z7_AU0yb?O_)q96Gp&)*yOw~U%vF{dpU!5+sRAJV@HjkyW+tzl{Fh~`Qow^u%3&AD<+zKAn&Uh&JUo2*SkKhd6;fys!uObuq4)^c zQP53d4r4%>tI+kSSW$2Pf0vLcb#7p&U!^$RE%Ax(a~YkCj#N3Gz^VXp)Fs*-$uZuk*4rE`|XN6aiO6o zW3;rbQ;BQ*HxO-Dr)E#I%-OkmJjO8wjiMD}`=d`?^!b*IJ!F=JMB^BVY9m80v}-S< zWf}eDwAemO0s@91q2HtW|oYv3SsH1xiVF9?|x;%^!_hk*(me7*C|X=2i&=^VOQPTS5)tU3yE`y zKjms@O$gkJN5d_jM!lwwSZ2X;t>LlM;fl(tP@}uG;^^K%1+9hkqs0liQ1NkZG_V~N z<>m20n1U{@GuV;9*p5JmM{PU|kGXtIc$U`pqM8%AnI-Bhz8uR}_&rPc?n8e)%U^T< zd#L4~eUrn1&a>6XRHu3p`6=_E%3U|!{crD?BKnp*2b_f!4%hMt=mRYDjI=f0<+EW& zi5RmIkB_dK>ITU8=siLsEfv?)R$W_LSJ0ZF#FJTcN>w1N zLz%QvuBSX}tXV%jH)Ve<$;ZGOmT=Wj0AXePCLt(sRyLRt02=8qb6O9eTn($1^-r`$}&p z-ry0@F#I}T&$Yw%Ji2p;`(21v>cBv;q{(S&GW8`f;jQ&Q1;wiecAVSK83RPCFoknP zb5;O1i;AqR_-^C`md+6w>X+1B%K*=kUq~0?Qj#= z_YKlSeG0+1SaEwy-uYRM7bgLk6FA9=Rv3pzWP+)+Tv6p5?0Re_rW&sMSa*l#hdLGXk$~pb z8$59@dfprt>i#QsZ)Y}oPC~qDsl&=%@Sz{XJ-3I(bP4nF--CDN%RMP<WA(-s?dD~p1< z5=8I1@cRaZ3V?ACJT^2!u?1lc(Q2EH>avi&YQ*hn`_=82EIDCWMx8X> z*Kglch)LJu6Fv3~SI2l<1_y{iH#CrbAI>f3OTkeE-IM9tnx)NBvGdR5JQvgyS32TvLyIr7YdOAuBT*(*`1~1w~p8UdzjZav$kkt|NDGz zBmyxAvokGbfU>IM3xZ&9tEJ)S1zD5zfakrYm5wz`^hrDk>EYH$DbK}uj2_#n-6IU9 z2wioh?fLil`OW8*YkL9zf%#g%l{G<>CXC{;8??*s$SfMnfV?NOc!6W3F%H2Lg_-f% zrhd{-xE~3MuomQH$puo99+OSDZ}KB>@0MiIP=us!I6a>7T>M^_D8-mNY_Rnp<%@E= z6$dLkg^ z|5E9?7BEVz?+aYosAHY~-D034umjRT=~8~{Y|#9W*qSPjzLOG{!0CFJ z$|`p5Q5Jy`Dw1+G!D8tqO+5fyEiw}J#=pSMRDcWu4VMX(M@KGPUNT^b6fQRKCe zSM&c4tKGxP<=A-lJ2MdnutF7qa}Z&4R8Jz`o?wlUK(Sz;`oZw+z4CI#E^7DWr2SeD z2N`Y_x1SrEPpE!Gk&6#w>f|~JLRz}HAG;iInSKGGvKZg*DXn&k~iFAY>ub^^}A4#l*lLqUNUYcak0g z@BFpgJ^ymz!#;+#G9w}`-U6Ou962}={&x%*-b&*f4n&kRot$(c6{yV?cd7AArQ7(p zjrok-HzxhQK6YUQ2>+ehOv~Kx5a&=SDFg{<)C4R9o>*>yy+}ti@^mqC6?AaViFv}_ zXk)_}i~)z}mL=o(opXi8=T1ON%433S41RN^;GEL>NwIB5hnKE~)=d2eQktY|6)~0J zA!{LmV9E|~09zKmpjs}34SXtm*uJ(BhiVQ3`+pBu(|?Bj|Lh?)*KC*}-8kQ?Q7NT# zW0$rqW}bA6Rz)ZJ7iQL1+<|*fb}`<=#jBhSaR0wAAWNLnYo=S}7^% z8^fW^KpHDgF?`+K-Q=dMwZ>L`mniFY3@%>j4fmPl2T5B(d?kE^sgZ zr=b;>NR(-`2L!P_F zk5yu`L#~@dJGsx!&8o@&VRQK^o92%e_x`BP5401|Vg4?ck^0SoB_H{}of60w^8f#v zMD`RoF48qIH=NtHFa|%$rBjH%Y9Uy|`>)ZEdNSTD&o5frFr9%*`n%8rEZVRLAmFa5$WFj8wO< zG6U`m9JGKsC}5c{t(#ctNz+6%tfMM|E7OoFGOqm*Vz*4U;R%)RdxO+RBWYn&EnmJT z+aKuE#RuyFq)Bq}zmP$MFl4R7(xxiXLF2r#+xkEu==Bw@N2im}x|DCc$!`95op{b{ z)tc8I7mOLo;A#B)hL<~}_UtQ{Ob4^LG<`1a09B{v^IUOJgPNF)?Te&%KK7N9_TeqA zl;528?GO9fbw`bL4g^!|V|m!ES3JUXZvLIRy-#%Z`Pgr{xifQ=`^!{bl~%Hh z%X+499w%hJQfs5s{SG4X#kNi$5UE|)Lqbom^%u$CRI(r`nhoNzUZz52E6O0IJTo`Z zxLw5i7jZ<%)@FKYl5jgbtQRHz?|~W_pUtLe5$Ypbx7D9Kjf{Byj3%6hHs2!07`HLg zpGm{zqP4j<8oA@~o~*#w95GWosOj{|zq-0d&>$thEXM9JswCUTawHY8_ezC%C!jit zwNK7(*Rp7F9Vp2XYvX%UJw22xQkDa(5`U1Ui)KOuP$kBzUftZasY|nQ7Fsu3;hTJ(x5irdX0`cx z|DpOyk9#HKkB+m|3>P@9NAmChjb)irdreELS`M!G-ryLz&xT#bZLaey*8DEU(Eo!D zXU}YLKvf9_$k+EyuW%p`pMtKK-v6>a&1vA!1J8x+Qxdg(g0m%~rboGgHb<^uU2w%$ z+uNLaP#zCb@XIpoXXtz_xJ0xbUB&jzdQ=KoSo{^78Otz|s=3+~@_V2GHeTqjCiGBH!c7x}&v{(a>JPTj4iZ(++l%M<(uzweq-+y_=BWcjolY1pr zgLU4hY~FZXKhH2El-itF1b|6jU)Nl_d-n%W=>#(X;zJK=7GRVfJi{1fk*ca(;yi`V zeO3-;5RA!QVLcMVz{yF?eZA~N-YZs|UhvZT>e}j0X61Y0bG{|n=ETxdmgkg$ND|y? z`Lx<+S1MBxO5v*RIKJL&=Gxs>nY-L^iie2JA4R%Q5{kxJE+Uaq(;7v_dA>==3;d~x zeRpP7EB&^-h+RklMc!rOb%KS*{mQeE_@L$Q6{w?^Z5j*&4f@uEt4du2f45anjIfjdy`21f^aOB~>Ir*yZ3G@!b0JWQTw8D z1Aa`oCkV4Y;Ag9}*x2cobv&ci%Fi`5lqu=hff%*ZXPMD?HVO)KR-tQNmWqW}g7BZH zORx+bDDmBh{9V^YHeJrV1in(|gkn$1vKOR`%0r});7+=aI6;co4l08 z3GG4E2iZdp)cz5FT|=HNYfh$e)@<07H5<@FAcnVd4cT05$9QYG$T@`=?YbE`MCM^T zk&M!ci5j>Pz8rPzSeUoip=idwJNvyMyxgYzLje#TkQQjT>8P)%v^Veo;FYG97Iyy5 z*pzjNv5?;}si&9MSE{_Uv6R~d5sp;@Cp~Y8ST&{!BHJtDV+$q;S-4Y{iJ7j7JWjCTxv&1)uF09+N6detdvER4!`!xjoUe>}6?docjXVB%tLE92=QrO0;~wz(tQt7ZX} z(ozbDPkHmUGjf@@Il$;QSi1-dO@jb3I-l%l;;9*E>)H0T_6VT zdlwLzn>hSD*EAIOQJ(JQkGI(l6^H-UVM=dujAA{+TGE`_$6|7kIjm%*;9UAmonV}; z5WlE#i@XX?Fedb>N6B%MJjtkm-00X-YBvR1@Md&+ZUqdVt&vE6(b(si27>gDcC&#w zuKX-Tr6Bs$beN(lfIve<$MKn^I18JGEqAaQ4=gOh!14iW$s<%7At8r1H+?bh?L6;| z6x&2mOJslP%}>_mQIwVLu)yq0kMuO81S;$XiO3BPE9_KNSO4>P%{afOwMl3ufRsU6 z=PG6+4NK0}OPL#2qDG!qxEM8GRI)A_Xfyl!xPz{`&y&b{`# zQ7{qe>P~}&V#R^4i`bY{Xf9ih7oF!dLy3!PIrgi!0B=ws_ z`I&#a+7|?}&Kl;F1nd>Ch8xr96rr|3`j3zY)aW0LcGuU}r;<0io#>Us$fI(85$yh< zTcUeSSpLtpMPILn^kEJ1#ND|iCd!m{^(TpE^HnjRXSzP&`!JI!k}3mxLtOS}@9YLD z%I1|?CD>j)Q1i~9TdZhW#82tm#f^V|c*PbgKj-Z(Tvya_EM0b#=KmA~$|6LCm;3tF zyPafW?Grd8Kaiy{`^xr)R^pcbP?p z)vlq_rv2VSi{6qGFQ)PO^i`pjy9(+XmhjFB@{p4 zz@RRV_#`|5Tx5nh5~8AA?O4+=q(y{SKXmz| zWvV7&ar&M}h>m?(fv#Iw>i9>>$qkPU-CG|&7fL_i*!*$=^L=Xk2&QI!_AY~R4=5tn zsfpddI89G!FYeW>IE(hCmXu%f85ydp5h=0`yX12(PuYo>78~VKseC)%pcRw*&{W$< z-^g10b=clE@s_G>1SXVQRNi`PSSlh}S~$3|RfHc|>c54{SK5j^6wzuUz$oZ@5^Pqo8zh=!kmjgEyQ2m%5Eq#E)fuuHwZf$|1##+#&@cr+L86)iFNw9 zZXPkY^?Wz2UGz-m_6jXaH=7p+N~%8}e}xmkf;dC4=VX#f6SZUn(U$1NMI9lY)9@ix2sEE3dXN&ykk)w5cebQVX{3^zysN+)$(G$W3wG#x4bG@2GFDx(OSI73 zXD6v~>BZA@67mlIZKJjC&O+p`Wm-hyv&E6(`d71 zmEpBP1PUz_jp zRTmL0?%=XGyFE^h1;IpV56{jaF4e~&!*U<&X17jIrXWX$esM@*n?$~|;q7ybTerJN zEw-|USW=@n%A;;?OPD;$t^DYqlF_oI>zEC8BEWt1n~yraYOd`dLi^difK<_7Rj~e? zSryFiENz8t)f=Tc^|)*b48)_{;;2>_7WVCl_L(Oi8b6H1XD$^#&lRcJbpB#iEmx*- zjQ;pB+7oogR@Q&5{#;w(-yGhhSJ4U9^*uzZvQ2z^N<~%ASk=%`-`hv?rS$#DFVB5i zR%I79PClZeHd8ZxTz+3Lm=w|jD(-vNpq-v>SeUR6RY)zTqHV`)2m_Uh$|jXcL<^@v z;QSYR?%-CGocp5keQTqpJDkciCB_^~V=V06165YOKE9?Ng9t>l3jP2XM`;xM;~wR0 zGW7*Z)NDG4bX)3Pu4mk}YTfF$;)chs(){dwqhaCy%ZzNO@%Z?>8zHuC_tA~yq-y=0 z^|a?eZHP2GB+4UUH6MM)uHC|PuXk9mK<9RHavFbVMDpJn?fOhMpJp1=8>))cx9$xH zk^GB&wi4gHTR$M2htFQmEIj|z!=XP$;&p3EV9C%uQCR+?I}?@dwXZLsmNG7TS!BP6 z9d#SqdFLnG(&X;14$2l8x<3^J{%6bN&f2wI{|_rE)5r|)qdwN|sw?5u9c;bs;fj4@ zjQE~$ZR{A$F2C^H_OCwfJMzPd?{@C!#x^6p>1uT7MtmdpPGwB8$jgL;T90*l^JGrn zB+vc_oE$^qj6o>|Zb%-d&)r26R_DxpY}2k{7?qir&)SKY+lVGCl%f}v=hj@-SLb_d ziHlsUxSZDs7Sc{@o^(SYt4TrSrUotWkY1s#){z~sOqF7y~Y5dKLT!B7Ygg&Qd=Ym0a z!V^fawz^}HLOapB-{vt%M7foj9^d8ix9?ai<_F0w^L==M&B2y#t4Xr}Pye#Lk&=`V zFRzsBpOEOV^7}Yrmj{*@}5KTriuHH_uQ#ACz6QW zg>E~qRylGdO;!cnm~(g<_fn)BpQZVi5JSN&yFov@-daZfto(@ctklXzlbG0)a99M* zY30L5s4w~uvObKpjEVrs`8uZ6eC|sb6=Er+$ouE`v8;((_`0E?DwQzi(}kP5KKt#o z3FB|a0hg{v9F#Z}d@fgV=tzqHCM-PMc)EozN_h;Q(&fP+ z9~wctN4-gY9hHDDayJ3Z`|2NS?|Tist!lj(CS_||LTt6d^`Q*a1!M8iVw-6b2T29; zOn+C%X=~=N)vyKp!XWuqPv{MZEYNZzGv0&Bxcu_2@)HYs4q?$ZYW35)UEgihHO+TN zEl!->HPgf3zNxB27wdsl)#uo+Tdn43x${Q(-6`dstd~oldqQEnmE^^ZXmpJ9i+_Z1 z)F_(ACE&1eGBFVEYsuOsUVQcH)%e7Okl(&uNYl+aWtk2?!3bl-Q}CwMcdtZ7jYfBZoDbhip)daGqddMU7~?sjtE#?PPY+4?B%FBlUq*g@@0>-4L8Wb~;LwUHvd zgyf__>**<4ZU6&c3>ps+A}KdHNx+G7OfyMN+-b0w;@1oo&&q8bQ;#?x^>5)``16F( zWG?z`A5obOd^@THhFu-xT+938lFkF09X+>~muYh)6L4T)OFj9lYXYN4pkIGt%e-e&bt1$_LZGMoVj11+Jh71v`(9n~1+@4x&F=o`%nCXik1|c-a5ed$yTYx&}H#Uv6s0&+q}nm=d25pV#V^ z2xa$KXx|@ODzGn#qv0M+R#Fg@&j6S4z-~=}&W}#046i$2u`o>j{JD&iOUbeS6Cgoy za{5*q<_NS`U$Xgkw}x?%P2OLdw8&wSg*AG6r(PDsr}J>3JLs4YhFW%c<66y6g8Jt)aaY4>uCkGZLUIQj0jel%j3)<+I7B)ob_zSA5{GMV>`%O`+WZQ>A?D z=ySHjG=BX2rah@WbV^=sHYrlFK{Z5+)2khnScAMGae=Bkt}QUzQEzM`t4R6DCq!+> z*r&pU$6!;POBaytsNLp!n1!2@t1K1KYWdtSCbpU?Tw;ms|vGvbj#EM9Y2QggojW16VoY_#{qSAw9z6R2=sTxhyW&4jN# zv9{PDB!s0~47S?ZqSDOWjXoJB%qtOZ?=v8_XMJ9C9&xQVE^{^mX!+P8Knr6r_r98g z3tZ{E5u*@iqG@eyZKHLS2Hd>n_J1y|knpwe+KAJl{Pib*V-T#tmPQc&aY>zrKTt=*NP)b?< z<|mtkx@#{Y}H|gKT|LwHzoj{q#<{#UTUY%<%HrU$IWZ zh21|6YbMLo?hK_&{)CO}U$M1knf|$@itkhSzfxj-7EO6R27;&Y+k1(x#q6dCgyUyL z;piW4evjLG(o%~^ZS-*aIUXJ!e*VUoR{+ z%Y?T-7?XW@M1+6Uz4ZFlpgQX@*h)ec8|T?rQRc(y%OtD8xS@zo-Qw_J?qI|8PNIi0 z&8Gg7Q+s~iJv;SBVmX>xh2o36aa=*k5Q@CZeN~U|`Ewe20k)+}Gmj4LB#nw7K83q9 zz2X7C;OWz+F&$poOz+CeH|As@oWu5tvKR--ya|JCP6=M5{>u4ls1jemAio(-#O2)~ zWNQFGsBX8Y`eKBRZ_?M0o|WNhs&^Ri_*UB-oaZKM++6f9ru%I+o$ z0t;s5mhTXLx~sjxBo=1^SgE5CaPrT;Xk#Putm z#bUhDL}&KT&Ng9t2o}V+B|*TcoY2oi=(n)j4hY~pE0?ii9jePo9gyxTu|Tb^=#=pN zD_CQ@vjR+eKB9N>3?XzAd&1KRkDjYf7k{JfBsxv^0SYUFDI5V$ocz{W{XI5grs1@W zKANqiw(M7A5sR*!NFbk(lvI{mHGWq*uzdqlIoxYG^MLc|D(dhS`7GVmtD(CPHx?}D zgqW#diF&|+u&-OwO=9kGHS3+ zPrat21v;bVoHkN#q7szs2NSWL^9w`OJL7Thoj1@8@Mx=bkA=A$`%kf*PBy~YdjI_* z4s%1)`JP7_!Vr;hF~`(D*Dx-N>m?*%8RaXinIseMBec~`-)NkJKK+_xc5(e zEZFLSBXZvp6lLHcXf6kSNrhN_gB|kBR)-iEgj}+ze%LmrfPiYOuOe`EKHRrKB@CX1 z`MOS~E?Y=+eAKDlD#;G0*dep#kz+XeP$l#c{>pPp! z+FN`#>9*aiMSOMo50b|@I^Pi}!!s7o+Urr@14a}VC4A`(O!sK#YH5fV=K77t%@PL? z{QDktxr$l0E|t!Yw?Zs!-cDcN0G$XoGGMUJzJ$YAio@-8%B!y&vSsQhe0G*IB;SA4$N zP#GQFQdGN5rh#xE&Oxjeav!*c1h2q7cd-6GD<8tH0h#KjteQt~?D-M(afsO5SigG^ zf!K)D$>L;{X|$Fk|4o#L)y*-`VC(hq+oj7{NMXB~X^>s$5?ag}USFj@_;cmbcW*~i zolYgGiVll5z%k#X zEI-U6QN|c>e5cFsf$nbgu*0NiVi>9+kdvHYoTr;3G*(}F6)QEfK=Kp_i*W5Tpxdoc zRFEqZ*ff5pTVkJVv^zbwS}D7y2*6yfdMH2v3+d6ZP=NP_HpbKz0iwPS?EVvE@}?n# zs2KqqBy09gi~=pXR7M5u>4!brl zY0r5hz*GYQw77%>Vsq*5*ABObVKv% zIw%oKXp71e*Bn|oEoBCvjUk6`fR6;NKS9<51<96l)o{~XaHihsq^p84rc5IuBEljC z(LQL-SN7jdwrzz2l67TeULlms47A}dM*p+tKN%EPC74C$K%x?UccJDG%;^}jrCxM% z@a9=AdEKK$ue+n=j z6}sTz)fzz%?o)6k1bCJq-tFra8 zbSa)M-$H-Ec^)1cU~@>#0FqY5+u>>t73pHimu({Vl_AqVKjFvoz#ZDZ*N91Ag6dkzzQ ziZa#@*xID<{&2i8`4v`}x#ihuAzeK^Auo7W>HVO?r|@?G7E$+*N;SRU{^U$&I;ncy zoJaF00)e5der@dz7z1$rt83Dx;cl)uL&f&p?Mi#5oK&L z$U`!rwXd)5D`l=uojPTRHD4Hi4DaxlSVC(Uv#{XBi^Xhk26>*cr#i0DXWNdTDh)*i}6YtDq5wMB_BnvEq_e8i9)O7icOo6*f+}~`&3_z6t)I^R&1Nkpx z?$31arTC*q-XH$2&jHk^21ZNBF4vIwd$h3a(GYhp1SoMnKE4)p4htdJaZn48AZpVb zEud$LhvH;JFas48)!us0o}D-=_Km#%#-$wibN(Y(72m#o1=nJ9e@Kw#AEhbf(eBf+ z(;c96ALKwmKsUY8wsx|$o7+$L;MelvdJtW8AD#&g_hgdXg!LiQBtmlF&1BHQzDBr;>DvB++iF*bV$;Si;o9F zy3cTvONxr-G?)E1mVn+#ai!xL95n`TJDR%%Mo@|H1`5_*M^PhE{r5*opEtde!ebZJ zl%boIZI~wu^mD+07|EqnGiiMUY}_2Du8gKMm9SFh5np`3`sT(44fhFl=?{?4QcIEs z;L-g}pDGHoaeJuDg~G}5ibox;SX!#5KSE92d^tufCO&>+D#2xU`&T;!fMX z%aXqr47TO(T&SRix%nsnKAFQFz}D{if(YW77B#&S&ME<=V=)`nb8>)v?B zuNJ}tZ+3;Mxvg!nGra+TKvp60j>(yyg=9J8Oq;B`X7Ae;RTNG?Y_6kZIS^9fMFZax zJU|Pu{Fb9Hp-2=8FQ67w$nP9N`#=wB(cYCfPLbr$#*izC`@cM>a;11g&_0wo4M|2d zMaRdV;ow;KtyTlKy#cci0CSUpF!Imb+k;Z5_Fbg_S^ZY4Qc_+mvBJtv#>XxH=s?Td zKSsj;9F{+`^h5E@{>?v)o{yA&m6C!_y?Tg+n8~D4(K~H)F&3EoxUajqelTu5`&<2f zx(t)J{|yA8Zs6GO@BZHmlRx)_>url<_S=AMk-`PNxJdH}Sy%`~Ogbls_Z?mj(S_CWA3wG9v>+2jF*c2gBUW zZ0`5@pm}{FQpq^4x1&RQ|4p-A{9cBL{(Uw3|LQ+XY`ti|BZO2L43CNqROue4ek4S( zZvfea7HkBl)b-WTc_}pXAG_r};EYipIYKFB%xBikAAumadNPqj;#w+u`0!v=e`orQ zLl>+XB4TJ2cUHR5Xqa-eC@=3fWN)#(o(u>~*@`jfnJ~%WHO$Qi2I7Jxg~-ebaJ^s9 zw#8q*rWyFFRSI3G79;3c0205k001wVs6gik@d1_wKVNrO*SsZ2z8I_ZVfRSWrLQa& z(NO>l@G`0dg)0oh<(rvA1Z)!G&~=pLspg5ZqSrN(<-FREz^Ev4>Q&lN8z)f^`(H#=K=g zK>^wkHb1(~F4;#_Gvch6swStL=XYdmN{UE>@7lZoi|I%2f<#k*AWwAcObao)cF!$snPsI=sU>_DoX#3Sisj> zAfw?TC#CsZV%JkL*_{o%_32HJGmS2y_3}or?$=DYd&07M3ezX<6!paNN zrJoI4aZykelb%RNOIs8M!=o0FXI^cLEWbaiD>G2-=OuH;$mkx69D4EzJ3zH2nl5Ew zl3zuEiSjZEN~t^xY+K(n6_hyiHB(vmEl9`=V6?boUPnb)v5<+_+R1(ZD+EYaMlr|?BWd_RKtg4KFCy{-q z!X0Dk3Wf(7y)x5@UIufnI1I|fnhJo_pot24^pM&75z5uvl#y|v%E;O}gPjaE2H~NW z;$}vo|Jc7rCMUrRUa$nVw1|;F@&dZfrX{wiX?@gBOB3{MFOr#^{bEDl)+}GeSbC(Q zARjWa$~-zy}1d{oEn=pHoU8h zEG^U6hd$hWWV!^(I3{X`ydDo^`0fwwV5vNQOO0vg3Rqwk4#}-U~=p?kpCM7vZ0bVsfU)Zx}lNmUaNBLJcl2mxU zqM`!*-BGfIN`p5l@q2rFTofEZ3Xcx5c&?hBSZs6=Eh;6|hdj%`@K*y!QNlyjbRA7* zCZ@)l39Y=5jRhcPDpKnM1dzzssO1`OAD^ZIVvz{=P3Wt)*Hm&mmh3DGjG8M%fa!}c zz@nl9zrti`zG=0U5g>(qG;@0$;N~say*j`WsucI}qkL6HXlN+AyPE}j&3v>w5Q;v6U}ig42^et}(o5)lz= z1>BJ6I#|GEC|tAi2SANPqoMHDv-bFjT0O}2yp$RNr70r=C7K|ghK@J8T>M;LPk0FB z>*zYa#R?2qxSnwMR5CisrFu1XBrK`EeGb?YUP!@_!T^~~kg33iI_TVi$K#zZT)1!@ zdFJ$K9+r>9{HjS8W00JalZ7@d^P_Rn+qe2bA^3Ek2L3L;dyZ1$B$5cr z^CwSU&{8aF@WVYHftiFxLwwj13ugw*BY1dun-gNCxP!gt3mT~DIUS|WFf-dqjV~++ zvqW1dKLINkOg^0R2S`k!(Vb~3o-XIvYdV26spjJ>u%b3M0pP_N37$}Um+*6G=>_fM z;NV6oztuUOJq`(%QB%z}@Ua%7;^Eyu_{B<@Rr`We0GDlbab(C@JMiDGd*bM;p+^#6 z{F<65Lb3_0*Z%70IPq5m`xYt!S2e>pNDUGM3w&O|l?!c1v&zPsp+RJk>J?pE+xH`> z#!f+07XJPpO)qhCUqwp$uFODXWdCv5ZKv(ANA&X%9pG7_bAdp6`8}Hi>H;4?X2$?x zh7|waX53zfq~(_7nsDtF z;Ks|)J3(JDL9aQSw?Iym$`#9O=Z?Z$EOQM4Mxqt;o7)m}uL^o%WMo{R=GgTWv8Wkd z?3kLJ6=;A#Gu*v41f{spU(Ky6SFS*RUZXykZ8pMEM=7Y2s%r`LE%&jr9iAY)hmJpf z{CF@N=$E5^b@YOt0VsrXwK5BjSbGJwPDScAQk|v~;y>`{^Ser2aK#~d zd5Zg!!)qWIW7=`I>erhSPIvCyS$;gP3XXWQ&pkFV+gm`nwLtCAlgZ83mwU&`>s}PT zDa=8>*v8tr|9Eqd=>8H1QCKksY7IPnD=F`aYYApbpP_%?hX79Dd1^u&0eD8>3HStp z0VXegJGuMEk#7*47+Mtj2=4^8fGzlFo{yH_3X6;DBfh#ezHy50V+sl6CA$5 z5@mQG_l@qn_1d*-_eI_b$X>g48~xg~8|Zg$ zz%yyw4@Tghd!OGaTU@*L$nxs%^$8tfTX^!JrJ$0foT-kb%|~ zF<}R@%AfB8=~q)*Eifjnjkt^Q1ao^k9F4aX8&?LInbr-k4W)|ck`ET{Q)@iK#$tSQ z_tr}a>9h0g@97nHKDhhn`oG`03jF)sUk}KxF2LVk9?u^B`|V4ltGeb$0itp8;-DlYCB>nJ zy-QeBObqeq6A1~)iG%Q8ywTCoS=rhCLI{`5yGQM*ViE9yC8zq;qsH~olFoQu-j9F2 zT}(XOidk7PKq`rgi(kDaAtE9oBqZdriSn29;rZcYAmM8^7M8*qn_C~eXsD^n9FKn; zVYBlkg@=a|XR8p@2~T8Kx$-FP_jaEo%uSbk7Qb1a>v2#X|H7IMLvN~7!^*%mMQHph zg5cT6nuJ2wASb%i;bFj+$vy=|CZx%?gzvWle0)%q&uaD$HLjssVTTPqjh=L;+IsmU z!9`wP{+))|V=@B9PxYVoGrm(ZZ#L7{R!N`ECT5g2e^2iH`$TgTrP#-*_(|`R`44tv z+@FO*@ECvZu-3;yMQFY>ehT^n>hl7BD#hyPk9{-C@6K8^SPzGk<)|NY@F|F!J@Uwmt%(r1^= ze-3sprcHi*hVoN&O;>4EQ(w&fi^!M_w$Vv?rl?#D zq>@gS^#ay-ZO8&b_r;+(JCj_|p{?)BA^4#heXCG){f)+j5N7pwZe_(Y|FWf3mFD>3 z7h+}WYeOzMX?}E;o4gJ$TQ_A=!b3>o2 zDE5lDBOL7p!?md&etbS7sULA(E;!7nz;05ePN7>qk)VKA=p(J%HDc3(hxk7IT;F@8 zP+*^QxjSr7bw9dGN@`-sgh3IPda~GHOq1uL*($mG-SNuUx7<@2DvAc~V4^Cs=|}?K z@kR+fg&nzwCw4|?9?@;`#*3M+bB><;u90-J+6^J{x?dsPKQpVZxo2g3pf$cT{1*P7 z7CZ0A?&nHFC)1ACt3St3X_nTyZ=@Xl2hYM7x9Y2oZRdZs*Go&8$v`XSw}ni?ky~Hv zw-@AQa7%EbD7X^X^stNWm#9w$Gpm~#n}k?y6>nq@ZGT#U4|uO)sZq;;9AXaiR|$#H z%Q7}L(zj6emL$d96ZQN}%EVRAI{e(>R{*1qq48vS{@a~&iOn!(Bbs_c)_;2f8R z3d2PuoGfvi7aN|xV@8htY_e=R>L*-Xu+auM0- z!;%B3wey$p2X481p%ofb|3mvKjyv);9m$QCxvVZ+l6qged-}lLMk?vvp z=Pz&mU3*zi_m@aD;^IZ|LETirnv+_=63l*TywDml<(=~6+&cMD0$y3_CJ#yYi6X728W`1#_dL0_N}^%O~@{QBEY$RT=_Z zBK3OD3?G`7e|F+TDg5sJcyO7`<$@rOs1CPGwdU zi9l4Q%HU6oRll{an&s#(Ix(AEUbn2V%p{EZ4-*E%o2|AYB)6Fht%oaoD+%|Q=|mt1o5Loh3l4VlCmi!+h_5`ffoa!9phL-9&^C6VZ8!|ahCb2nO z)o}KZ)vafoLCosBtb(&_VpJ~rztnK#HTut1?%*zmLDuN|<@j{LTW4tVKh+P9#u{5u zOcsx4m@AC&y>-I^qP~$TV_=Sg3AqwIb~sOl#>+8Z?W-7bU!_5qyeLkn(2juY4_?Lk zTEZm(ih_OcNUI0z2z&%_m}R#Eiz;EmWORs-r^xLo@h92JJgOOn<|+i6&dk$350Fal zRrY$K%*>U8BF+y5vV&nyzi+ry%noKo%Ff8oT)IDh*F{@}ImP*Fzr?SgWtA>?E>k9uGu#Zp`=ns2*FI9Tk%(j=hur5}DI( zoLARg|5lq16PY#lE5YpE!`BxdR|e`_tdyF{k45VEJsXy={0hFwe2j?~``+rxC770> zgSsm2w`1b)U0=r9FXe+M?1oldwC;z$d<^MwEY4y@@g4Up39Y=H7+l0oH4z133x3Jv zBbsqV?@roSOhY$4uXzxlhKWYzVo2RgKDx_5Al`zPoYoRE7kYR%<&p7@glj& zZ}-*ShJME}l_*jzyksh`y4FYBurwy_1zrDjUKx|zDXA{DK2u91^R(u-c$6cxKAH*j zR2a&lRQ<|pUgqS=74!XtAY@=7EUZFCNCw?ttHj7&Sfij=59K! zxa~>#iPk{Hp-kr50RFASvK5Q8)euA-VF!i*50~|Ju16*D5A8WZk3ST2TR%p?DbkKk<2 zuU3jaX}4k|SpS0;6P6esmbjR2s9;|9f~pR0AL7kq5cAnWCI_?3H5PqKC5_rL#$<>G zoER3p)x(jt#tx;U%nHcOVN9iY&BwL6ijX}`4&{E8V8+aCs5R|kg!9p z8lHOBem;7vekW^TZfFaR1Rp`!LNGzkRJQKqeVAU5Dsh%>@cVpkUTBc81Ih{hx1F=z z!x#fqsM=IRWPIxFCW+4b>6E5i4`QZH6vLQ~1eD!M-}{Q<8yOl$yby}*`Ln;Q682-! ztn1rkub&K8oM1B=q)@f$b#%@Q{*XhZd)Rr_zLI#!2}!#KeO}W-eB_zG`J-&|7Ilf6 z#rt8$bNnDQSAM|T?0{1s$h;=}A4G~S7?o**_DH>QWGt6`XG>b5Dt2(cNi+CIcm`X4 zF{=GM(j9@W;x39`dD_%ZB-P+_=6JI+?1LXZO20(J#c0wR9&#uxmZ)O5uYc`*!6;tr z44flz_Bswx77nhCxu8`ox4&6(j?XVzHcN>_-8cuD0LFz(9K8Gc!2%x~0)uZ6b_6K$ zmNv0x`G)34KQ_%WR-ksfuOe7f_QbyfJNhH<+y9hxs-)wsZ^m22^bB&5yClNBz-9vdb zj%U4v112JJgVebZ?XU zs5g8hBN7_MabusTkup=FC>PEvkN?A15@*pTNZ8HVd?m4jF%z`uiCTju6Du3KDUv%@ zt;UnSzqdw4xlk9hn*sw*r>v=-PI7;VOJqrUtbHGgJSr@465ycf9hzkqX1St1yuf(V zgn5OQ2h9Wlu?2mjKg8vWJO}M@b?xO9^YsB)lK!r9 zx$|mo2z!0a8x*@)lh-ahwmpt(e(vGaxrOVz?|a>-IqI-$gEbq^$eTI+D|KnV@d$b% z_Z_bmEiCc!;w>V=QyiCC*GYQUB`C>S-a2)7pHba38XK`T&YJ00&?7n;p-ab;7JMQG zjY0p9qrkwygICv`x32PZ-SBo@s!&P1lXk#^WI3-!TZf1mG7fp1LMu)8qgzel{R5*v z4v!)+v;mt{!jW$Zx7!?x1tTnd&=lw>U5u9InK@VZ4I0qFM_qK zl)TcJ?S78_*8kK{6wV;oU*^2NxiPopc=@bARy; zn)BJa)Kz;jTx-tfP^~6iDB9k8Y8m{Q_|I~F^1ElBnxHtqHii=)4HyklY{tX7*6iqG zCO<}zQcv6HAAxCeT7K@-u9~A3BEMvEMXN8B{9Sm__TIqibkvbvo-|`~USH}0%<6N; zqZl`;=TZOd+&@8y83jWXm8WCpJKvxoYJH~yS2#g2u6%4M^8w#or$u4cR<~JYJC`1>@$Vn?fE7D@sDp!5} zfeiEBqdif#_hu`>F&`&LN#^yv4GdnGFob;q5KKc@4z+1Jr718)7)tq&%BT%-=7FM= z)YCnibJ~XDv5TSz?iIf0oNsXmpBw%xVtn6)ERHF~E19==a2%bCCY{ zvXA!W?rY*W!}cqyoj%BVHP0tu`E3olmAu{$6-k;}Dwwea=E_|MB#%)%s9fLFbi{Sc z)zV+Rcj8C-1da=hF!J_y%vL4LH@_rHzPmEdKoVf65Y=gO9&ot?u#j%AxeG>Qd z36G9t>0BDxNx*1+`7={B%WrLRVUGvw>g8tSp#i-(Uzg>4Lj88`vo&lPc3S>?+1sWJ zOxme}X!faE2Fy|!UEa?{Yw=zmBivSVgvMDdd;%EXBMKs-jq>WNk( z4V#d|DXy(u;XO=5?HYyZ?lGpT*%%*^aU&ED8q>fNG=R&u` zlKYaKj{l)2gx{HU+3*C;&~?|^nIZhvERJo{yq9rQ6)|EWo;oH282}0-V)=2Y(;qII zq&Ej8APl-_cxW>B3qM160xThyoFD9D{2cY(>ZWHt- z4gOv}1Zw3YGp)HP!z)x=@>LY}9zl4!y^%}5zo{}XV4;Y@MWbhgT4 zp}yzA%QlZmmPZtHwsfVFXI(YHWBS`X>4tChzDx`1%FX_K7U<|twss9C6yNgZ2=RqotUh9% z*$S!~Hajs-EBp3+p=1?)aX+-Pz>d$-JpzRTRavL zXwiON7j0SuD>T@}SC0ps z(&d)Gi~AgDlaEf+r}s;CBFNI~Gv!;R_5}8yoYMPldWuRE-*St&#l>iEVz<-i!E<`6 ziyx~jGCFDc<0cfN?uSe(wJRj^eLK8IYgwDCd%^SXo~I|`kaJlh-)d7YtImIwkMk0a z{gz2hV8+`;l@a&DJl^p6HIeIFNTrMyOSMX2YB&Nn6Ulk?`!K(MJBB<9Bmyffzf_2_ zXcsBUHeJrEXK#D+S&9qGFpxOO;~}lCJ~fOsi=zZAkQA|mK5ne=*#_6JD5s*QG1pRM zTv(Wy4`au?I@&Ngjk=HtVyEX|rw|6g`weTzs z&-}6G{1JZ2Mi_>>42>$d)bTO;pC!^$(FjkiD7el&da~;AwfqX8Ck_>6qvfA8GR-eH z^ZjEzUN!rIby#m_t*nN5>z<+FBE?Ij9v7N~j+UV@W@KqDF4oQ1=%hrg`zxNUZ#|&I zv2a=9DfoZ%6Gic8lKh|op@YG92Pl7rG*!Jy@ZDtb>)%65w=j8YkiKRoJ9inSZ@TuK0|=0<*qx^Xs0QTK;)AegqrD*? z*G)&ptWO!Tx8kV`gUC$F?-TB9K%0q7%$%CR-mei-xiHsu4J>N6m7 z`XE(cS}xj~`~6Vj@7)HB*-~5L!4;L*Jy466ELj@y--@#iwKW6 z4hZF<*SgwE#Du0V@(l*E=2`dEpGpVBO^Qo&_bIK-$mAh}JbiIT71W$oWM?g;8psSY^IiEvC)tx<^_Hj4>15Og=N$Ll ze4Ty|`Y}?VZ5MrWEJ?sW>3)XsNG6T;aeGf=V4lX~7SHy{@^fYv-@4)M-_?8#^qrJ2^)kZ1N2rSyYR_P` z9P|-%xt=wGl|&Pr)16|}Zw0tD>^@5axXoe7<;(v3>h;lYy}^_)po8L&qnOH-W(6Im z#gI$;u-X9(IN~R7WhYu|xW1sZxHZtug;BB|dg3!v?QaPo6ysc z-Hwl>DtnhD+ZS+J>CVYWkoMdmVIlE!&$%mc%2_iQFKp7}=uGx2U$e*zKj z5`nhrOHz+N{AnWtvx`q!vZHPCBqCZ}B|}*q&9XL*(>G14``@eRCyPIA9w8HM6gC+R z7+!hVU(ES~Q1eIDXzAy+%M0m7yciw7PkzH|CTUVhm1Nsiwldb}-2EU&%nH*SUwdrf zhUouctD;2o6lzY!TeaSg!9t$LLuG_|YKElrvG|k`DBhhK+wOy>C^Rl2zZmMW8}#O9 ze9MMcK99E~twqX35J+7Ny_xlZpq7h{q!iQ~ERQWIZQZE@f@(?Uuf}uFiG;QUP#vVC zdb2}ofm4I_o{MT0Xa0LbAo?tM_F%hIOH>Dqfr_aL;p`(4JcN~mWVK^vbGYK?j`gK? z>M4~w_Gtc^udOo8YW=!_Y5;n=&riP)gE|0KRY^GpVa z!aYCB@iVe=*15!5=yEn9C;@&$N8#y-k-TybKu1X|Z?DAe6%okSeHJdyMdN>Yx_KO; z&@u&um*&g;>M2qQ3a%}&(X?>nWxeTxOngAFT;F-n7=)HOft# zAE`5t9aF~}#kJtkULeJ7+OVW@3Wm7dLqsC#+HYW+s^u_GF3RhUU$?&@J}xssVvA<( ztHWU?fnh^vf|RL|8IuI&Dp>Qz3HdCpSylvD+>K>8W8mdPjIjnM{pNqWqcDG>0d9%e z2X%&rM`SRUMEEAL6CxDly__lTUJKCt&K`|{@}fcuJ8yto7BFj3&`FCJEeUn}VyO%t2yajD$`_FcCrnXPnIEYxzcenPg<4}cNuBHg z*F>31AtX{1|83n9{qxYKh42?zLy#))5UZCe6caO}f)wS}-Zvwlc?nct6V2(en#i`z zjC9zuR0m!jTI?grzyUc#!DJz4h9@;<*(rI~%W(R}vXuJFrDiIG(c*}uhy?mwBW_x? zbsX+{7RBdnQl@=NiT`>5oP9cdJU@jhvQ~s!D#gY;;ALu)OKhHKrF|ug@9Vh2BE|eX z0UzU*+pBl=oG$u82K?mtat!ZM(Jvw-YRbK*j~@ny1WyJ_KFOX=?m4>4%Jade=0-KM z9`-~pH`xJkcB0r2sEgI?Vw{hGnuVN*N@u9BBgH1g+Fwj`v~n_Zs(=&Ywj$y^N$$)= zi_Dj}Q(eBHPdv~rWnO4?W_gZ-@FUsNz#;OhJp3#3b&KjP?zAZ&`>BAy6vyxHf$DF< z`ytrKx11eASJ<~I7Pqer7%RV*(Ww42gno>6@1)I*N{3exl%`k4ZrR^9KXY_b!r63DX6=jYC96?WOJ>=^WYr%9T87H)1j5n_1QNW9s^Tbe=(8^HJ&*~p>kjGv_d z$#fdnZySYBa%5GCA{A#A^4>kla91H9%u^Rer8ZF5Y41!0-~~E_{Xwh5&H1Mt0q2?n z=o2g?GpVDM+RCU57;pWXjI8FcecTwB&7e?b-jB~d-Z;KQonRS8A+uWo!q5VH45kveZtztff=Z^VG!T@T|J8Qu06SWPw(S? z*wSitRobi7KJY4}2?(G$nZ)$DW4`)R_lON4DvQS917>yfV<%sbbyzI$<%GEJAR||| zqy{*i@T-T&ah-VZx<$q4Crs;1#OKV356XS{E26&8;P*J|pm*SBfwH8Fw}U=cA}=Aa zy50Esm{2N3%qTj;@BV`2svT(8AVGlO8E-Kl96H*H=g?f&vYaGwbL^3jyVKDz>tKV4 z3L$Qm3X)s^lnXQNd+qN7-6WbIegDnw`!5n&K=F&uo8)w^{)$|*SH8!sj}|E+lPCoU zIPQe>N8PZDoN4+U^9kT+Pf!p5ge&5bfG5<9A|><3u<)g^{8@e?QzYT zEv%H@`8Q}77xM-at*0J^men#shmk)+O?}MKSIYgl{2j->0vW>sgGAc4DC}|#gJ0dW z3%Bo#chW^ZoaJ1k5A0`>`Swm$CACjWwelEid_;Eg*YHHcCjBaxn7ZbsIaPfE|GwYx z&54E>Dad~M9!xanQxz!8LcBa8jjdfwNVvv=!cBSs(|cq9a{O%x2zaWda?rWY3z?C? zTS1vOR%HzcU=1Tli}jXmCR50xPHu1gV1Cb@f+CZjPd}YyS2BT3LG-$qnEd4?rd9rW znScltC>t%wZIBM^LT9*k!z{+oQIdoMSaD9fdvVIY^vs=n*CG(sP6CL3kp z+(#8gt@dzdBU)Z`VoyF~NMTAr@ID?Kec$Z81wAU8&I#H~&vhL4B(C$Kh}p+gYi~+! zL|o@uB{}*YIGit)%R4=bDkj0kpu82AM8m^&V8Ti-HJqk?+WvQ-R5&ShU#%_l&IP)z)~V((|uD`*r8EuGc>oU zY;=0K@b`~6t6r@GSB++=YoerByw+^0gqy@!NvPC9d{({c&ug($V)8KJFV0npconjC@|Z%NI-@EJi<7YmzPX>b3Qhbx*L1GG|QNS^GgT{pPs3@Mi{iE9IW@{p;W|OOU#M z^6dMo?pZa#-sIV2V48Vo${Z|SEI+j;W)LbehrUSacZzDvD1dUOwqG}qN`M1d_E>1? zMjD?zDl_w7NYi+cavgbyS9u@vr z`$m}F{lZ6E{mxr1v+k`1kmdj+e|9)HnVl1qY>p_Xwbwk=zzCsu3u0}r^v9JQZ(na) z?}yR_yY#2WfVov-MuBQ;(dPTA&fQ}3F!tCiG=h(z=NS*LH_1F)$~-cI~eOW{6;Fv#fFoN^%b%@mIRH zx#OeaHh%cjusW1?12^^vpXd)B?C?z{>U(#`$02puDC@?pJnKOqfi-lh9A`(((j|IA zP9OC>F2nlOYvu$_gIRCCn)UJ)~HPsKwhU%p6wL08}Gu;zHHV^RHwTT zQVC`J096#cI=vC^p)#+()~*Z`Mu}8g9d!IbkFsXWWUsr$w^e?Mr zy}ik0+aOlzzrt@P`Vwam#jBPA#+9y0XwY znlWoe1x;cpDm)`?rWPd0e#5tmIs{~%{d5HP?YC0wwjVRjd?SsH^72UohO5kY6WT*0 zWu{$Odz(D;=io4jyIkI2_+x{c!(}Tl{iwzIUSxp|G~+zebOT%jyH~WV@7B#R3dOs_%TAro;E3L3+z{8va3ds zrjTc;x9XM;JZcs7U08lMskv1CZRkN`h&PD59lcxf7t-C+Yg0wcj%qHM%@mSmnxTaJ zqx%%5SCpb|VUp_@P_`%+GXw{5+foC5}B+&N@s#B+v$PADCiVTgqpgeutF^!v@F zJQ2MP^8gXWn=xj>{^9j26pkl+uNh{A8j(k!fK{N0--=}(pIAajJ8M}Ah;9u6hLWl# z0bW;@CE%j0Pt0croUlQk5ZVXT-7a6a6@imyC8EdMejkG`|7m=R^^=DW(usd{q)?iD zbM1UBgD8;0wnoJ+i*2`gkQhYlzID|3_#Ds4ZeWI-P1o4uafDIGl)jT=S5)qPM0Yh} zQ_)!M)dDabHmdWX4zTz3Fx}n*aoizBq6>0di(520A`}3a!>^8!u>o!I%XN~+?eD*b zr3qds=^viqjS(j1uo(L{IG>o4#`gqnysQr#&v-|9U#B@L53GQ`?a!MH68-Kqzx(DB zJpkJLs{`k-SG8Yh^t)5+$9=TXwP&rCE8{+sP;HEPSxEB+U>xLqz3vo~MkBzbp-s5ULIcjMBvn((a1 zAX7_9bf-|lo{HUKqawP_moNP!zhKDe>C7#0`TE#2&#A9?nmX*eG7UvIG&JJBsloil zO&-{_M21u{H+t1+2*Q@fBUz*$boF;{g7(VG$ulB9T5hF@a{mUo3CnE2tg~Js@tGz;P;Iytxp@z_y9u~f!k(848 z17*ha^=l%y?i_-`&z~5igbR6cy@D>gBQE7Jv0T=94Ol|WIn?D?kAqTy8vI*={-hl+ zb&xY9BSx^k^x0Z^%T-7e^b}71mOK^^?Bc;&Y46E6@g}6YKQCIkVU#8Vf74I6$M}e0uVsiE;hJ0Dhx?C!VF%*xY6<$ZX8B0UiD#sq z-Lv8o2Ic@8KP^oU9HR#$A%~eP-VF;6Cab%$UTOz#f|1wO!kzJaus8bXA+hT72$V2I zRynF5HXCr^?I$12iXxTNLwdy$z79_9X_rQ&i_*Hqrf+&0w0E?0@p4OOp7}@dyxvE@ z!fE;fAotFOSoR5~eCp}fsgNX1uX`>@KHEDw$!uAAvzkIib4xzNt9}V<71wW@$IqQH z96*MS(x^+04rmXYZ;^;Q+#41rt|S<4oTqILEpZ_0QyEUV>4Fs?3_AQ5(65mf){_eO z708f!^a{jVdNuT>cK0Dn4t1fv2UD-VvVZr6ZI_zrffH4#qK(Pn*h1b(Ga;H>Ofn?2bkU5~>kD*5vyb;g?cP0@yk9cw0!}@grLH_m0)R@=gfi8#g2NcJ6%SV;UB%WFURVbA&R zl3NkuAyO%xaA)tej*MZbf-q_K+4FV+cyfTK-R4VI2AKi?L#8h=&Hz3NfmQ(k3f{T~~3uRKe83esUBjS74GXe;(a(ocYNFmO;gK@5|0V=Jib< zp1nlPx>Wli?q+xDpt(WF!jC6?Tb}u84j&0aJ$Fze3ou1vJ#dQg%S8USl*SuY` z)LU-i?ZW&nW>9omz*0W$ggaj#7L2vzU6^tMrQtqxj8Uy0-by~_sPlsIJ)kdPN3FQ0 zQD$*VEdQcR7YV*o&Ivs2`Rhr#x?;D{pY`#*% z{V#X6rDlq*JghZ~QpH10Q^VLa)1-EiA&0f*vG^L!gDl2j)jW8-|5pffdxN;gM)Q7* zuuh;lc>Z?t#HCiPwIWL0U_#BJOdn&Xf1Wg>;u~>{N>xX)Wd!^LW3k<#VDl=ZXOZMO zs97bs1dIB}ZLoZHqwo0@44r!TQo4fo#-=iI#K;r|4R)5BpO{Q;aGu0S|C;vxb^9Xk z4}U*&Jpfl72rPx_%9o{gCIh60UtT=b(+&uF`b08f4`QqmvlZ|TDdij-Eb`b0{lhqp& zl{@%?c1pu?<-=Wk9KX2&Zg*``S=WF;XV9DBCU*35be5Z0__yVQ-Z5=ax`>!te#-$} zi#fqmmDS|E2aou}HS$fjh_i1R!S3kUh9~_YLW5dK(!}<9z!Fu1!voZGd<9q@fY*>e z^~9q_OMvT2?xr22D>(u-dzs+@_-#}u6B3f zMBVMmTVE5Onf~BkFonJ2e8hbN)anU->y!7r|RbjcEY%Nk5oNPj#d0@(wXKB zImskj_BI|1KMOxEYLgD0G+z=3!DqM4v;$tU3d#-6&Drg<(6z#5Y$O+W%8+}xlatE+ z9ig6bwy#YMjDp{*kOV&Ry-iY87uq;6!5`ve&Oubl;(pPMKRks|?aA8@WDx}p1RSMQ zuNRi}?LnW8(fEg%t!z7!Dogy^~4e6`19Dyw?JeezD=Y52na5wruDpm%R-@@rJ<<@ZJN# z1l@n|wi>Y0XJ?xkcf^`8c6V!5=hG7&<<9WW>c8^46;_g8IbmREoGgzeyr}2O;{wQTZzD;LX8Ls&rn%;h#-xTrEFmpctIH!5Tcwz{JN)noC7V625MpZ4ZNd|Q z#?DKo9-oz8WgT_TKxH)f+g#aIe{g11?kt;pu{Y;`%T4y_b6`x|CU|K18J5Ce?q}6l z(kvuhgbW`6?K3hGFlY_S@PpJNxGHL-J6GhWS-dj7J)%ouqbk!~)zk)ti!mpEZz7^tAGPD?l00blF z_T>9!ZkDk5w6zZmnJ!XYuMaV5+4}r?Nrk!ys!2d~-pksE;^~j9{x%XrBM| zY;gI8{fFHRUoffVHA{qY(cXWl=b`6ufgRNTGQm~gTiPb805zG}o!|Y8yqB-bms5<~ z0Rmexlf`{j{8L2nP~`9VsM~blC{0Q~!Qc;x$ee^B9*v}-L;cOb*aF+Yk*){7^^qA1 zs>Z@ngWj{k`)ujSxo!o;R_f} zS7czB>lj>S*$ee@Kcy;v)(wBmGUj%!e$K4mux#LKm@#N%<>k9)N_V%)0Zb4qXKWQ^TTfn5fctU}Wr|4UxQL0s2FuAI2dg56STY zb*mH()7sK9Cn`IPdek_)Yc(W-!ZRa{70<3tk0$z(szLK8Z)OElcKsb`A5M@xX6}`d zQO$*Ni|BgEwzad26>_${zvGS$Y`-!AA&UaJCX61&<2+J86kO^~V^>QOw18?X3!b0^ z1+|hfqRQQSKg#<`%>(|TR7&VH!IDRF;XMJXSHl9;#en$a`vs=k?INnMHRyJ_qlfcP0fq$32%Iq)7IQlKJpd9gc!h)5e1B>Mm!73oGLr7_l|IV%$6c)YOA22MD}6B(vyTF z4{b#w)5F7|4=#|SA`MhCU2QvW59y>Cz$g<~)MpevOsI#n;GM!QvixK@Oqnv%SZ!pe zXtobFe3hE?VsS2i#o$|UH|{xgf>#Oe%7EiD8lx^clTo>`ZIu43f~-{N_ZvsMfnNZe zVH8d)@)5$w;d|!r1Qb_mZj!<{NQGc6IF-F0I+&GCo_V($hUdu2Qib^T$K&l{Bta|O*dv-#l96J6kb)Jw*AjuHG|GbM;nH!c=qsyV99kyqn_(UXfm;}536 z)q&$sp=FNgT&&l>V}UIZTu<-wIJfB`RmbYez{fzVj{SBrxDnE?!$ey(;K%zdZ=X7rxd`p!2#g z^FP8uZ7Wm%d)oIl2Em4|$v=fQz43_odypMnT!?SJc%}#JQu>yWeA#-%CF6yh zU@e8t3e;o^I=tto)0fz)=!;^iZCP6x77Yk|HMIDM34ZZpTML zF2=qtU(bG*4v=iO79VlF1bYBT!@XK)=?v@vUZK5u)UbtYH86x#g~4PM`YKEmf6ZF8 z@`#&(O;OHtFl#Pv)>R+%Mi|oz$8+|7y#Tgb12((2uC7HA{>klav8zk)g=bEC?Cxx@b6!wB#v0gRB4S2L_ z@jR@8hyo6r%ANCUVl~UybT}oNrYC>9yKlk#Y6D8XBMio*ir`tr&JUgc$_4J6+MB`1 zKHssHcJlPx0^3Qw0+?B3B=@tE>A~O_s)Un&C{ibM|4{SwANy@KY;(b{+$xqb6Xdr;29pfU@*)` zI|4-ws0U_bTwL0rS(HN=FAq%ab=6tZR5*i8zoEpLRW6XYqzD?Ph2Al{t(F32oZtOs z%LZr&ageI$!--3*Mu(r2~#Olfvu4<#$~oeC)9(G!HNF(jFtaC16n6hwmK8U^sG!sFa^WV zwM|4>#OZ7U3jzx)m!mD7GPd|E-L1ylAP3`R8=CKEj2*`t0g(m%v8!x4X>8@ISxipk zNrF#X4+B#(B=vpVX+)WkuQETvG_p6-a>PwN5AKP7KC|zi+bk^c4=fa$&~*}P3VS&G zARCR)K1;nbnv_Jwo$+97)>tM}(Hur&S`v<&RJjMzN0U-+2cSm3M1sn@#|;C~z(8LC zptu~`+(ICIWet9n#j2~OR#rIV5uUPK&Xu#w9*j{fjJ zN3JUGFatv-rw+o;wzgSrnQgQ|>i|>DAoy;}hyX_n0}0iTYX|w*?&z{I#GV`BI*T+K^y{lZ+)Evt*tM6JaiPG-dPY4 zQ9CEY7*;~!O-=R9)z~#$JrAC@%xhKueI}sy^&qBURCpAn{+yqNT@?mWTNG@Sby|Og z!h4=JFJw-fBHB`Iz$KU8ETMLE+FiXF$S_J%@d@FFUx+&K1UJHPf3r9P-6uiOXD}ox zt358oVx8#FT(!jHvJI~{%X#@;{@&*|4mEU>-Zi>NCD9)~RyyrlPR`J;&6Jep%I!jp zDGaF&4aWEDLhnYesGyrfk!2S6XMioC6PqXtl(C-2{U*C80}OclnYX}$)UN=B zR8pB|m!vrT)q0Xw=Zo2@6eX3>fwW89w3e}M1E&dcNRzj0X0mf1D1gE?sQi8f&>9?& z5uQ_nG8-808>j#VZycC@3wZ&vhAvLZKJ>BxfuI1(w|oHc8HDd1O$H5P9a!XffE0yU z07Id>8HiHmhkF%pu#HWx4VCBrS=YscRK7>a!mnA(sulBGhyCVNa$K^284^$hqQeql z{^_UfF7>v7d%rGR8@i~cS}++fD1oBeDJ!?w_SI2M~D zhk!0X4ndL}lPtk{d9?>%7yM`1_php-RZaC*W44`|!IjHZ<`?C^*B5aoUd^6YO++xN z5`ra1`Y^wBe97FvpvI{6ZwJ(>x$apJ-S**d`sj#;R?_T{%_z-D`TH@`J@<&4E3G2{ zT<{BhfPk{W7JICF12g*Lq%m51S8x1n#eetR6cLbzSp`G5ZC;G1S+XH?@;LR*>S}di z46T5!Iko#7l?<(2p8|>l zfae&9QAr;fm>PNi;pl&GNuz6v)BW}bqXaYUTf6McWPHzWd3)Tt<$VL)Rp9F#zNBkz z&t#_ff~dC8uNGm?A?WT0FN^z+K3X$h?aZsM|J*d<>f8T^o96ug=LRhQeXF1U`%(K9 z`~x|AR{FC`YE(1&>JM5m_%!vZ?UWOH?s$6a!%cQv3MWkpMh)tO$que=xie;vff4T6 z92$KEDFXwnbuGr{?>dQIq0RR7v?qQAU0$bGchoV3JL(7rNUm;HUBg?6Hfa5m{MgM^ z8y5Z=f9U>-zKAr{kX2%2WTbGKl>KsQ#74gcU%bP111>I|N@#TS$Q771TWE_?udp^A zJ<$34?N{?_F1q+JwRK^~z;TV0n$0%gYMyOf!fkt=y1Tpk6*Tzb9_~7Sxzy6uCVWGR zvSixHwcA$Au35dw^KI(-iSYaP;d58_9rE(>I^C#XU}oOcSbI43*r14$mG%Gd_SSJx zw%r~tDk>@h(jrn0AuZC)&`2tcfG9cA(y3Ac0@BhYUD6FI0s>M3A}zv5OC!x$gU|E6 z&pv1Gea_iuf6n|RINWpJSFZK_{nj-kZQQRsHv44xQ=b_R73t#(uVRH-ysNfHE0~)z z;hED2I4zMs!foSz_xnPpA1C|#RqqbJ{I5=6 z`p>)Nf9*7%cVrBaxlPUxNgq7eT}M~H!RMx@r`M~roL^~p>VCe<77KZM1~bl|7A=p} zY%C3z9X60*VfC_H#$+v*-yChjh|f^H`4peU%eA}cU{=U_Q%J-0Vep2 zIo^^doiS_|E?g-8T<^M3E1U}N?CkFTm2h~r(s{)of3VgDfOKQ{h^Y;){@uGsvtXGF zI}wfUWRb;f!Gna!gDf~3i*MAAz3+WX@#wp$%o-Shb(L2_Wctf4$0;&|$lSL3@zG$3 zb@NPQ`{sHl$I7=7$@SVvoAm8p=0Y`uPdAZx0=qG7cOP>)T;^9|znAh5LnAk*u$*;J*J-7V9 z_=JT0z2cU(HV*yTs9Tjz%euiWAIL}F8a2bJ{A;Y{QHgf4_tCD{@ovvt$DS6P#)U$m zG&R4XJ7ZU%pW#HV+pP?WuQ@-CZa=NCn{GYc2|NB)X?2Y!$SlQkJ2Tc`q}ndSC2nbH zX~@96@a@}IF@5ITL=1|n>(dUa-``VxsBsHoI4(;UHM97ogC$2%V`ar7QveE#-PTPVzpUm<)G@2;;+Iq$;Dzd|A+BH#n-CdhVwGceQ9J%q_aY_Sj`u8%)lY7!i)QK(bKAS&wB z70<(@P2?iXBYA5Dn%2#`*81DqsozbC3ZhjUa&pj6v z9bIVHWU&6w$cQ%cit8<_k#f<|T-vxT=&ja^mo80DPt&Unz`{-~>haBrt1pOz;Fi3W(SX*2Bfz=z$qJ_9Z7ui10+xwKsBSp}CyI>Gyg+dWF@mqWW zr--AYquBC5FZ#}tYo%2+@0N$l82R{ywiH*Vl?GtBfXgn{@)pQ0!3tB{uss4}SD1|k z=Y#JBBBx(t^RZp;pZ>_bYOkM6rN7=ezXt*0Eyr~+uXOtiLwW1056*8cC6cehyyRE=bJpV{BqIz?G^0=|I zG~zh1PPA|2t&z`TZ0Z9RVd2D+!5e5OMgEXxalL!Om}}STx1ph-_v&|vs6&eTFys@G zlYeciR0r0YEi5j=5-=PlcJw;zw(VnBOvt6EU)6I<{@j?i!#C%ShjDU;eZ7cp)YP~|KWI z#!pWe>#+aV?zBh-;N5C<$#|<x0{l4vf3=DT@QN{OPhp;7f_7;n8I(+Jrc3JvXa#((^ z_es-km(7pqX^#8V;#-AfWo7g1FBaT_s0(4)XG9>3&CFn0MizzpMMMzhT~)h#_eaBw z@Yo}U$aYjkt&(Y*(m=!J&jg$9B%!L#`$KTNqNb+iL*}Pg)0tNZsa3+j;7@a&BEdALilSnSLe z!)fK$_huqhPPUosm)AdiEbEh}^6V47cDzP?yatV~#~NePbjT|tl<+HxK`Fl6b;GpL zccGXzTT*mwAnf4m3Z}V`^zoMv{F|Qz*O1O}EgZdfP zwSjS70voG5N-3O@OD1F%N!96eEPXBoPj%B;zsb~K743y zJ_iv{&HZ0LyAlkFwPxisatK|;t4Jx-cp?=tBii32H}-He?$FWB^kv8n784IBhjSuW zYJ^em9+u>^u1_|4j+%3`3l%E(==Ck4);KNm5b(yZ(&43}!+mryxBK>z9Y1A9fWB#2 zO@4l`*ZT5AgHS@(m%fY8M3rOS5-I}BH*VZGUO7H&s&86%YbaOxIx(S4X8GaXuMVz+ zF4((RJ3~W5xo{tG#T95S);yVA(C6r-C-X@&Jvbppwlc@NLy6+(W$ON1%st3|t{{FRj~5}o z_uU6y*LCwEao_jY^ZLA^#20yntJh6E^-22F<eY(c*ew|Yn+P*JzOz={GiG0M2gW&%D{_Qy2>unB8-);$)-zRZNPDxCB**4vQ zcY{$SnVTY{p`oFC#XvC0NiZo}If*VvUPVgn0HU(KptG5_{4ZrE9mAEg_1T9uH@T^v zaTgR76*V28Io3-f(o|m(Uu)@h!+z!N?!FaOZ`P9xu{44( zP&1je;YTR7Sfl%I>(J@XrW^0xy_;?e(}`JLT+AQdtW}NIX5|$XE$Se+f@CG_gu83z z9Idk9BgYNEr*5=IcR==|OgyRQA!>Nxu`QA)^$dOmLZ0IKb<8f19;dg~7^QMr&NEN4cn_rnz5S!7Kua<`r55Ic{ZDRajUU#Iink{?7cA8f>z}ExWtB5R^-swQJ@_n+%7ChZCaC z>drd|QiL#P5@ir067$!+4zI{VV(FlS1T#R;J->*$cEh3F_B!90VI+{}ZQq zirdGm;Sq{!PNA7UXwL2Rd)zs1*<5O{6*95a)wiX6NUeZ3_|1vR*{EY^F)m!6n`8-! zX`J`x(Q3ELi@R35e%Jl%B$#*AXQT30ibh-vXf`P3BMgLLyF62^!EEN_6hnZl6PPpR zO$aWS#AdNynGi|WaT~Ver53O^_VQ{}j(%?P=58)Tf2e%|l9K9bIh-SIu(c z(BcL795r1b^?$l9K#6BX+|M!var9lQ?AysS8fteU5X<9rWS%c5sCzb)vx$=Ybt&8I z`uqFyLf{!>gsHT6bmu-b#>K_mXD175)7V#0R(5i99L&g4)pu>S&5V`pqU5(t-Q6R& zGG3hD()>0zLQ1RXzL8Gl!5vp=^aFO8rzG;({gRTBdAMqVNt#9BtU>7u`?x(D)>U;T z{kw(p4ZkODG_e-2-(iSk?L@ULGe)+L4C3O1Bj(>dJ2Qf-wo$>5@NQ?vg)UsxkwRBj zmyT^Lyz%VvWRn<0$V27Nt+JT25+6MO`8Mc7@AO*refB{!HzJRZiyrzo%~=YB7oMxm zRPD*87UWRwX;gmtV8)!z#6Wn#-^k$?eoMnfw~*8ZPXvKI1bbg(D3BU`MdwF`ZxIIi z+GAbmEbW-_6^IJPPIayf96HCXpg(Ct*MQ-@Avaz+Jduwv% z!rVI4EFM4Z)KgjSY!t7lxnkv%gsP?G-dm`aVnk3O*naYSVz%v z?5XI{&NQS?i$lfdNe;Z4h88utW*Ko#hd8+In1xGIn-^*JjGsXTW?7Pwl9o^3hrsG} zmnl)8RCsyluw^4;e{|ug)VmHB*s4bLXCySk+jY{OyCfTGYrE{M8m&$qZdWyR?OWjR zs3OLclZ9{MAo(e?nh2%WcGjkQl0`CZP<-O(B5~E+L9)@uRl45D@XKL%-PTnjfqnW} zc=#1GNqX58>9W=4=4Q9<8&-_o`^!}$Nouf6Cwc8{=A!DDf`ofA&W?U8H_D0ry`D3S zW)4apTC{rn_@3v-e5$3wTF!ypTAUBBz9=u-Z7pcTc4m$nXt)Y_e!Bf~ZjzAzr&n1} zCZliJ>C$sf9v%~JimU!tuqD`O32;7H-3VyDb^CUPn`BUIhH!e%>_bRqtfMmv!ku?sf1ts zURqwx%qZP}Gm^qbb*G=y+}A2LD0F5EqHV$r8-KCi~ z#YQOcWD$gY_mpqX&&|24&&W4CRughIZtm6a8ch8s-j z(eg6UOwznVh$isAd|&x7T&w@>6h?CuKLbtqp0WxHD_@_9Soi+@`+X?q2!wvU%i5RB zJ0?^VXyR~%%$G9L5Mp!`;1*oOX>MgbZ^Wc`Zv~EBf5)7fxJ8PErCA)^U19hA-LpHf z?9sitEyCVMi3FuyhkI^X*vzJOb|o1Z850#)=8h^UqT_&W z;&~oJIV_D;Zx0`iW>Lq#nYtQa8f zdu(IVCSb+z_-ny?_(eJXm2Qj3_ZQCQ)bTw2YB^SYKPThAdh8#)^uN(F|G3=$ zpwmU9)C!zQpm_1LFcrS_d{5k z&gq_w%w-V1wzvcZ6>v&)3!(3@u%cef?m{l#HoXRYw~ph7Qt!7UFvQa_EB-0KllpllOJ z>RuQi!|B<8IENynXcOPUGUmygI*d6up7^&zAaE^&h#9;eC|wg z(_nHkL5}6?7wP6)ahAM#^m+IdH-di5_h(C{sis|bN#-!ik#4#6$mDBwUq?8ieR}QF zipm5BS)chP?0@DTc}q;1UlBwoXJ0-i$sEl*D}LF2SSycGWa?h-9u2O%l^dZQrEC*| zc=uwqS&Htui?Hu2V=jvlZ7DTaz{g>V9UL4e)SVSfQkIV>x?h8&Zfbj20wD6}aK&59 z`4=Uv@q-870ZA_}FN>yxSwoPs(IfWTtpU;`cxkwpC^1DV!07k1v|Qrw-VGkMB$ZzK z;CG3&GsdqF^tzs|ZaQtMH+7~qD`|+#bv)TZ1F6od z)YOc=IL`@!TQ-Gtq}uRgR+%oGX}133!;)XWtgA*v`1oXPT3`Z=WNFY4s9Isb zwt^_r+>9;1)5=8Q$WFJ=%0MO;o3aWHjpDoMHA(?{sZs79u17H0-MQitQ&NTu7zH)n z-qH-;&eA%!tC#N{hnp*>$uV!7PwVlaBAKQs`stcnMQSdI; zW=&Hy2jtN5sP0x~|NPX{)OmcW-tydGOHsE?rg5h8e#xFu2zf!5)d$OlcNPhxZ-vYC z9(x~d4ejku{+ejeJ8CBEy}fA5Z%zYX0JaTG_dvVnO~`LnUf`*Ow5fjMGO{Ny=X*Ou zQgD~~>K3Zka@Co`<%m&a6%`f3mX(m7M06HgJ}NlCG?i#daen?CJjwh)-MP+|_DII7 z{sOM+h-3%elpjWNFzvaDzqQsZv1o)It>&tG68OKau1y6(+i_f_&59TBD6^s4YiH_w zKyzAc<=k%;!Bh8WAsgQg!W;O3E%B|%y9qVj=((?3AN$}@t@8>M?F@jp0Q+f>HeS#% zZ7yXv9PF$?M}-aMau6UtZ7mL=j~5d;yRED6adD$|GdF>j4qI*mjMpOVjatE{m*_MO5y)e6%gbHv^tP0IoSdmv3WaiKuRS8kRmq5sRAkWDuz<1XZ&bb# zhKYN(VTg0g+V;K<@}-Sd&X$#tfmE?Qz&xk3@h*A}KSf~>RycJ`P|IAp6FP_rk)jh3 z5kZsj2nhub`=P|+`g~+$gt{(YzI>C<8nm^{{ZB3+A{+?R-c}!+nVA_FAf`38w0vpw zTANifVcKhJP-`akV{HL6F#)zo8P@#w+G^;ev>acGexlQzzitGjAE`zlZjuH^3mBdo zAn)0*Y1q=#z<<`NeT5>s{|rJv==S z^dbAMt}w3Fy$HDoMAIaP3WHVIaMYwmqOutz*L2~l`-cvDvkrn?8^TH76?Ql4Tm*>Q z>^fLEu3Jsn*J!FwB_}6a^12`F*qJ25e7}Wz&x3OK<44~oGnWi{M2V9QRv?MZvCh;>U=Whf*xeGS&Bq@hiW+^g6uCFEZ&x4}kAeb~h zK0fkNoH=@Z|ByFeSaPX1=BhgY&x(efzhj<%(IVPib9kUs+gO(!Ojd5swe0-jX(?qs^?(T*~e@Q^^>wb2cl}5sV!KVxQDyx)pmP~t<4XURF>K^suRr>_p-|rR&JT(oO2K}jEs!H7yC5z zNn!PDTrL&0$nPUM-SRta_}kHuSN^@Pcm1sZO}hXY3ycZ(z}{ z)wlIqQ14c~WE#QGOJ|TT5c1R>s}d~u*1A`GRW29ABl-Z=lLkt~Tz@7lOQ-58%e=rE zKtWCp!87DrG`^N2uvCDiN3?s zhN|#KrU5d9*t=*>d){GWdJSX>GSjkQ>naH5&%%mVbo%#I+ex6`1%(p?z56!qo(Fare!V<>zQRS>bhfoTI~3s40VTz z_^v(`2H`1LUnZ$C>!H%ve|hu*??wg83IYbUUw&i;Ioq_o0(4o8EO z_nVFHG`G|qMIh`b1L*T9o+T5Azs92VovxmrpJyfsZGEi_RI0cC!b5xe()|2bw`)reBGg`1EnAY_UGwy80N|NcuC@CtEhscQ+Sd7 zS`I<(-{)Nrvjs6MLD!HR@)P(E6t0EfXwb`l0U^Jl>=AQ=sHD;FwYhR@T3($DuTHuU!ny?ow(|Gj@{IkoIorKLc0p30dR$sr33&+M)Q)<1~v z9ao!DZl2M%kLbAz5CR1t$-i=AcFNM+e&l}j_%DbUZTS_wU$#7?Z0qXoW@lpyOqU~j z8w+6X>fd0`H5&u=ppPNQ0t%nySthN9d(H=<=np5dXkfI%`6K;Do+&KNX{aAyV`9luM^GO_;EjIX1SNPhbrAWMVv-lXb1*Z z)hHXzhk=2pK67Jo83ODYUvV0mDtAh}w$GnmF0_zx1pI z`sdYu5kVf6m6a9LgsRb`r!0B}15jB-MeFT58Ax&Z;f<5x{zhZg&R~Rc&df7+b8`a* zy;UTAGss~@w@;MIe4Vk(l z+CjrM5TEY;vq7vgtq|8^*Ufw;;mH}z+>zGS)^fgy`fQDwGYwubo^>%+FXvga?`~Nn z+OJn?sE5;0vk(Rn7prXiM;!RYz;d0Wbm3pQUCId!RLYT#z*#7okCZAj-C_t#eZ_Ex zP=W;!sH@ZrKN)v=C7CM#4fKryJqn{7WMm}LL5sq+)5MjpoKGKU4|I+_o2OZI-=Z4z z6JrY#{Qm|6pVpXSVZ}Eq%#QHeet!pFAJ}nBEG{7ZZQ04AyOP+~=uC_cXB6#U6mPd> z+Aivzk3Ck9d3k4rPUoqM&0QEmck=o--V^_yBEA1HIKsI#V=pk6MZ~CluvP4BWn)u( z$1jxvG{J1h>5BF1Dkn&ub?nT7+X@uZ%hz~;6RF;cQ#1v92DB_6gX50i4-Wwzf)-c+ zKR-+s^H#>uY4Y*}j7U;eUHz@EPi;jPtGpI`Q(&nv0;3IZBJp70uj=%E>r&I#Z0e^R z1Tn?~-NB1_Ma9J{;JP`q@!Ckhz`to&YXL>3+z6M$T=2=lK(5tj<<{mV=2or(Uj&g6 zEFKtU_3&sjTkHz*Xf;6W4t|3TJQwT z8PTStPRk?kXx|fgL%_lZ1m=e?Z=~bm*NUpD=|tX0Ab;O|d5J~>aAwQfzx$pZ7zdgO z;Pb@;Y;(%jk^lOF!+(I+|9?2{f79ttsnVIg)&aCuJw+9EzG_njl>F!KA2d8ZRpG;F zDUKs~6+?{Q*7t}V3nwWL)dAL4C2g9V?CUcs(FRhx!{=1zX=itf5^o4v`4Vt&U(Tq) zT$jLfCeU-jd|yEldExoH4G)Wb@D)#!U%yW8t6lgN`U?w7;p4^uSmPj~h5mB&Mbaj8 zZDX8+!)4hMj?b8pLm~QeZy;Q4e0AzQDI_a=4~Y0Knr-=z72jNoGQqrm4zp z`3OL8X_G+sz=G$-8af??KkM^8RU)ITtRG_12zBM?cs;?SpZwj%Tvu_=MYj9mC0;I) z!}+x7`zDVvadTnHRWzXsB$((U+K?`Lyd;IpxoAl9E{?}VYd!Cuhs_a9dBx%G*_#vF zf)NP$(9@CaGr#vMgj|HF(3fvVD@1$ns}UJY`#t%U5$H1aKtph9IuA*{#9# z!-Ob9?YDF3n-SV-AnGh7$Jd|-OPVi%1c3v?>A6(v?0)!b zo0ts*4QVhSt8pN)y~&A*n=Zn6o|e3$Rg+#UhH9k2nRRv7E(D|gh~i)+;l=`E&BV4F zz|nb!4mE;{e*OUg(N>1?5xiC-QaEWs##|@f=gG$Fv-yKA!pS#eL(OHYSA|!8?_2Te zfqSdnPwNj8d?26zf_Hb__}>Un;W`e5QW~c^vx6(^9pVd~w3+az>Y_sHoW4I#)qejZ@4*wozQaKHlUl*1)69 z8UqHfF4JhOv|&pI-|~uzb}>9$T#yMNjIvMO=ehMSAUG)q4B?_5xxdBQH)};Vq_~RQ zx^Rjd0`mQvD?}IlDy_#Cw%iuiO5+omf&!(UA00zv_4POa@EiMVbp!$&yuDMlz2~h- zgVCTnL9Eqrr2~Z1P@R}!CW@n#reiOG0Dlu#;bibD^-TqEW35#VbpjK;>K~r*02Ei> zR}iFTc;i%T7YgoUnJqR4J;c5Fynw?CNT3A%>srO>KjSC`yb>71DK7N$4v9XyrxNOW(Ek*zI`80SZ2WO{gJk6W!;-hG`@6F#S~Ir2sz4npme6YVx#H9vCo8pCahqvy zwRG`5*pe^?BINTvRY{DPnVFl*)r5<_$mh==qzIG>nhi_C?s#*Cn@fZ?l@VcPW>%4_ zT^lwwdzRV6-X5x6f(W>Hd0ROZyZm!J05|#)41RpZeG0+ntjKT=7?Nj@vxCDxmVe@d^8c^Qns#VAmQ$gd^m=nhcYT)p%}Z z_0L<{*r*@~y_!;#m%(x3nw;J5a5Ke`j3a>HgWY{dd9Y7+gaBQk^R3?=c}(K1PuX^J zqs@Ub0HWU`Eg~W!nT!&OEsN9ERqn>dQjWTitd%X*U34t5#F@L$4y$?2L1t!Vhh}vP ztZyMQ7Fj>Hh77~nrkfy+c>H9}n5(_C0ChtN%M%jfRhs!;{+EC73SmYsZcV+EpTUp*7N2s=hpz1{2HxNaoiXfIRTT} zrCIH)*Zc)J!ssH}XTE)VU5YIq0q&X@Ha!LQGLs4ya$fU@7s{nzT$o1${~J!4a{u>m z@~Lpyd?{L$vMo3H1Nenfda~UJwCw(7l_WZc0qi;viVWZa$k$KNSOBLfZnFb;)fUZR z%>+;uRg*) zg1>@kNH+Pj523W3m)8+s)QVihE5jJ>g{38RJiF&3srR&c6Y)HizYY)QDs-X;uXb526fh}A)JF>DEmOQtk1J{}y7mUGB)tEQkl2=Gf zOzhUJTio1Ivph|%8(Bk(g93L+@{BV#7^AR4N=#tB8gu2E(QE^Pmcwz~oKxFkIa_g0 z7(2NI$K>4=S1>4! zC7l4fVXST5Fj<*Kn2Z(1SCz?^@#+>N&rt3`Qk&S7doR1Z zQ^Frf3p=$O0zy)2uDC`}rh!(ZaO|Z3w-i*i`pbD}__y;=1miq}5~aVLhXcVFY^iM* zf$#)uT`25!HYn!{npQrdZ8}_w4Xd|UB2o&h+`0%t9Vdrq`yYYz(`=4084!`d&5|mo>Gc3<20H|a?27#YFHrVOdVE&ozDgDE$rY5n-_TF0x z0B)KkJI_^F;(RKnmB#_0=}n(G#%^+R#6fV`K~N=0P#Wj`c_Wq$7hwnaBsZ-$8ofQT z5(I#?+Dcii>m30W?(OfR*&Q9wT!R-2y_&=lx_;W%$RQw_N$<-Uo7K%q+$;PX*iioD z`NNxmSpo2fe;E^drJmL2*}3+imAFcKY!jv)$e zX>DuMFqF;zC&nZlwgN2;;JzG!uu!YZB;hSdlH%eKms6>mtYmjXUJr$l%K2{F67`~J?(4hU~^Z?{ddOmMT!*A8MidY{pEXaw8lhZ{}%Jk zZ||#xMRPD)?4%b?+3tKRdE_b^0&=d#wl>yfIWyQ0<<*E`)Vd}+*Nw9!tlNkJ0R@Q5 zqfQ&ASjw7z!2DsOz7k)JpOW23{#5VHBxP8)`el=tAvZ7f^wibS(uzfqdS9*8c>V>< z)){?Fd!am{wKCbL5N2~c9vn&7SM6)c^UrCmyimGe^et7MPJ$rlK-0V5P(|Rxwy9Oe zdWSB&H`F`_N2AW_;lmH++|2uPsK7t`Ggrn_v%-18HNd=4h7bw$68A=Sq;j79piR9E zTaE_94oGIa2_mlR#yl)|Brpjfcv({(0PPhPU2xy8$EY*Y;~>4go9sKmHeBKPq7m%M zgfYZVNielH>#~9mqqbF^#kUJ}tbj2`CL`P_AILmz^H{Cpovi>6lK`c&35_BzrFVXg z=B>+|TmjRtG(*XkzgV4Fbd02awe9;U+XrgXS?YtFh$hza1qv5(?eB&-4fE*=5Px_Z z^vCQN^0Ji$qR-o zHn0$W4fL{|TsLPIMC=tuuA0qS$!EKjzEvu4A;0U& z5oW@1TaCEEz(!9mAy4g%8%))mpwLrCw)bFX1Q_7mS3}0g@z#Y zy4g#ajL~H%{z9QT%(=O6Z{R$?kA(l2=C!y0tTQiGvp$h-LwnZqNzgqi{jB?+MPoZn zxUN=>T5(g%+kj#||DvC6<>md2=&!PsPiDITXh$5^ufeR{+}{3D6>LOaHJa3Jyr7@U z9G{#F#U*B7K1}~X`>OwX#EE%wfV6a>`agnF+R2#cC~xJJ=Dbla1Gov+&M!h~3r-4; zqhpgJ@)!QXqlJmHmzqIhu}|E$C-n|g?ftEzs#_c_0A z6D^^e<0<|7v%7Z;em1U$fbOO7=4%pvBh#w36J&b$-C%oJxAIn2*&if*Q$2n?qif2R z`>;VhKOAW%H{ zg7Vm|JUS6Ly0r2GC<+02?99I1!bXgr)9Qn#PPm+}$?$qusC|%a-8eW)4mt`13u6!+ zw7A!*6Tjk0E@vD0bYn>7WfN1=bN#VcSOreth=I~g=}T7>A{=0O#&{uJ&70dn%JmnbWLFN zF1R(d@jj^f|7BX70k624^5u?GOzC~>L#Ax!nQ;FcvVdWx{|0PW|E8z^E9S`m2CALF zS}ZKukFPq>>IqQcyEfExQ~@;^>?F@oJCHSeEjDZdmUu!fQ;rX2H)ebIEWfE@T#A4^ zV73+MRTZ58RrtfLeibpRvFZ}|Imi{PD740hds`Uu<7a?8kdyscRC{Ri*<2R;2SY^(qGJ@fy{5&J5S0h0H- zRv=7Lg)t^6cj`fvi4&ZC^yigR!psn8J=s{OaDk1DZB4j%tZi}2dbCoh1|JKHn?{Xz zQU5F!Rt{EJn~$Db2KeIusZ3Nn9g!gCrSVNGfK4ZK+G*+A8UQrV@?58(aSAnA4!v1P z?;r@Fc*kwZk1S6Mj=W}?gaY7DUs0@gsM+89kaxg40GuATT32zfv->ws!Ei#IGWzzK zz#r6owxsE5z}hGmmmN4zD-W33=F$F&EZvpR={()@n5)grG0{qOub;tsm)HFN1jKFy z<_81>c#5Vt?YB{TdrY@p7Lbfm&epimheL}o@>9Vc&q*#!rNT)bMdS|txYWMk;rTU} zs7T7)L$77)s5}lGR?@Oz%S|obgu>6srV$A6`XHOY#0g%_O>Mvf)ZOQOBt)RLBeUNk zbb5x5aiu*1W1M`g$Z+DhQD3<|aJ^^4{-Ygbc0)bjT`ok$@AJqPOhN0>PswwclV{=b z+!Xrr)L+cmnohTLLUV7TK!%uOyiW!)`2)^@I?}>B!$sieEYVi13q#1STQfw;4|lp} zN~v+jDN2tB^uCr-gOTsRM9f@%re^~^hMW)4wRY1m=2j9+;w8t;RnJRAc6jKMa0C#0 z>HopFL%W;bS1Ybr+6ZTu6ZhiLW zPava+lmP%``yP#8sOm&($2js+I)FvrcDDQauC`Z`H&h$}Ej;nzQ$oEk#3Q|4gqYC7 zp0diKKMOEr{XT#JkT77e=|p2ofcXJtroG?!bLZw3*LOY2^WqeL0t^G-sGR*IbIU>j zfETzrkGBkur6*JFWh+~##4rUV@LDZ^LH~ppgRr96(4%Td6*dd?s@&iWd?AwoMgY}c z-yHt zY7Cg5y>0NGGAk*pF?C(0ooM@wjSZ-gr3%ZQxq-X@6>W1FVNizf)ue2=@o-zOMjteZ z+PXT8G-mmT{`n~9OMXZQWj^IpQiQcVnfY{O8HX%+_Ry;d{6UM0EH$f8Gy}8l*GNS$ zZ-e>m*CxYU2?jus*>`PN)@zxyL+_>ynUrYrht}CnAv5u?Ll|txH90*{t~FmvMJvF; zFNZ@b5CW2E?S1xTrz9$27x73g1=u4o9R_Ab5N(s~qXe#_EG-o){0Infu%qt0bw76uVE$L2b!b;_0)rN$izg>LY28$w!HT7Cuq!^?%6QJ?#hn>v_V zQFG;T+1>b~bd7XI5t~k4!a(fPr$*xMR)}pqBA0QOE@9Q{Hj%M8qUU@1r@aey7SmH| zSPt<@^8nTh13j3i$nirOX}@31T|?`Z?G02XYF@%b8R&onokC53R%76whUA(q{F9zd z!{G3+JPrsZlx=ImhD>4|8(@a3&tsf`DpPO|1W}J$@;;%IZIglmHW1n&!}{??=zf3c3D^Fa7RxsKZ*%O?g%h;RzR{aXbp0Ir zY+!0Dbn^w;Jmn~`e|3ymGe-x>WOPgzIMp4(SOVKap^iw}F9rq%a9?aFM|Ikby?cjc zr4+45fNgaF<2K!u1m85c?q-t8kl8`?MHgcv+4gJU5jsmT#(!zN^OrMKPsY1P&DR1^ z9`^G_+FJsFF3FR1&c*i6QFY_W`vTrDQ*oPTlB{O(iyI&6XkJ4~M~Ie&hT`ptgPoAA zd=x(P&~5nZR~ekE6cl%loIsT5q{_k8%;zBt+}zw$&sZhBB`mDy9#3q<6$hnos5#5E z&({ae9V#3{bu0V-MPE(VVD0bF2B_bR6^*>N76~d$8Vv@Hb&gU?kC3 zqZbIxxZV?W&1Sq-8pjpm!c~vzZoUagA>I{C&SQAuG-^c+m<^^iOGWEJCzAUE+FOq? z<`UrL%^IVVze^YX4VwC zB*imdoFH`4e<1Wl{oG?iuDCa4LpS?uK23TbdtPTAcnG4RFcjb}h~ic_fIw7IQUWIp z+~iYMR%V`l2+npFEe@ae?>ImIp7dUE5S$+via_*bior%?KYTvBe;77=2f@0!x(Y*! zzp=TI;I2Pi5U90M1DL@$p{pI@&{zcI`O@>J7NiM^ z;hY7QJL(E${%Oj9a|aa&`&!?B`SNA*Xqyk+%%ZUX^sc+B3*-1{!rx2zVjlx0;YfMG zZdI|+X?2xvzzTb2^PQUcNQ1Yww?Xov%ir#;c?Xg$)D%+kG zIPr%b$M>na|6ZBQK89FtolYpvul*zn3xu4r(kf#dw|{=~JhexvaHj)*8Z(`Fx11=p zx^G@e?69TgINKsiLKn*BWdffKgRij1&RZ%T6vk}`9ypVPu*)C7Y$xKTKFDbPho-NS z3Z7@~+V9PLqJGKeOU*Hmx--wj!4{g7`_EPPGNKMcMfK&?yLGcFs34LmR~ zFfEO?cIVwc{JktmVrkeg@lD9H>gx0iq9Jaw94^iDJgcsXW_jlqmgwH;cy`Ch@o_M!&z!mG+F+pT0&Mcr zCyYx^((~41?9KMqKm`5QsE4yqS4*9OjOZ5?Vd3>FlBBN)zu zD7Gn5TKO-~-MtKHFBrn0Y9hlT)O+4< zMn>K|=Q9N*KCm!@%axsx>>Y-x8w*p#EMm zujLV**iJumg9aER86q$EAz>}}6g1>8KB>y2-exSEV_c>qU}jb|3T_)-F|p!$fmLB| zIQ~hI0hmLXT8#+uA`SryGjcdp1e{%2#Y(?`}hZ? zQ;K z*V1mv*2Ti&=1MWBG{%5?NP?Yh=i`j^$r%wwua3wcYFy<1j;$^^+%6j|J9&y zfny|qWIm3ejY#6Rdk7;$xQ9&M_;32I>uPRJ3j}`3Hc`}5ck5h-j;rwt3Z)5x$dubq z;+E{C*wF-d*}`HTSTLk2yJ__D5%zq7mM-uZgL!=12rK(n-7@NfH+A(AhZei)#vgDJ zAfb*yG!GTFozs;-OXnv$jX7*gqkxDT7o6_g6ism48?zE!jB<8H-y9qlP!Ccj__e_# z1;)sQ<4OT;?kG7#6j~LITwwI&rwnxwHpmqm`l4Vs1O+Kq{om8 zl0R_!+2xhDmXckTyk_v-HtHxIFvYm`FFdEose}T5tgGoVl66$XE9+3-Q*q<^oZ`&7 z`bTb}H}Q(Upgzd=6`a11`EbzBf7X_N?oGV{RF(M>?A4#jlMq^pXHS+s4#QPjCYhI% z!Z9L0cb)CoG=D9pIS-sV6?U+Gg#+B@;3w~!NDqdBj6C;C*WFW&Ic%&fL%dPPY+ZpzfT=lyJDO9w}BZ| zP(C=O+Y1M0K~7RJ3dPgwvL~2b{VE;e<$^VcWxlVVsULe+m~@gJqI+%LfLean`pucI z6RxvKrFl5{9bj~p#ZjXVjciAN?FRQO+w<975G^z?jo6uwa&UAcLEY=i#7ZOiV{4|< z|F$!C+AmMTElmKd2bX+}&{}-`5s3NC`FM;sm)T}B%}?%yAJ#fP6EnN+Jdu> z@{>L@h0Cd1lt0f?;=Ogth?~NKIv**eridWKZC1|^&Buci3aIB3HlXTGZ2~!z-5)BN zuXdQq89M!cl=tTGSg-H9ccWC2L`5nhnMvlPj76De8B&puawk)UlA)B8G4oUrQi;qI zNp8tZ=E9vR$xtH0K0dV8ckSQs?BBDW=h=I|UeEopHLFJdXE?7&H;v z+CC`mCM8SNd6q=d`r$X(eupK<;r3@77Tk>XDEGK@jX37mmee-Ei3>lCMSp}5>XpDc zt3tQw%)&jPDr>!JySvRSE0?9M7+LcKy&fb%$?VKl`}Lb1VH*T{?SW0H9B=$T4O9%;FCV&2M}^mqOz5Hg5`6a4_A5{D|?} zz?YWT!1S!z9AC7utn>LJeA8q5lKK60ziwq;cd~%dOHP-DRsXrN-xvNgTLtY4zA6u- z_a?8!a{0eLc@h1)h{XEh3KYR^51QA;iyGUMCwd*p)W@WgaT zQtsgSK1iY>SsPg-UJ2c{RRri9p(CA{>DKOjv8_> z&yB=^F=r%qLz&)NRMjPx`3+tiuP1Z&U)D`x;Tav??R^8?0JnsMq}o+k<^}=ZAHSyL zr=z+-B+Z@Ypx?)gTM48Dbyd~1Uh#96B(IS(zv5hL zOuJ*dVV^G7V@ExK8)3gKI9JD9_>lQj|HO@wo&}erh{(vG1Dp5JY{nsdt3lU`yHm#LruZ37+UZXGDpT0bal)3&V!wC8dQJX{ zl#a_EK3_bps(P-9z%-zz-(OUdzC|E>7<~)K?fd$|KM%O3e9I3RuStIN2u+K!&s;B9 ztSv2lyEfleI(^-H`frMYQSBwK$g)qw1HQ`yy@3g}?Z)IlHQjc3mv4QJM_K9auGA3V zzrd>~pu+Tw#KAL_KL7i<@CJj2*4wvlZ!q2NqMk-t)8=UMa|B2Adxt`peCs>}hWU~; zp7h;PIV&0JY7)wGwzWmkGwnQsXc78PK#V{K%MM;#<3tOL_0Me+IH8uY2cBn69bRRI zn+hzne(fUyT?P!RFq2U3M%ibpyKmhm&Si3JO8?2R$!f7fuvfWJig~~CDoX9N=woyU zzR|a}|Bo94pPA1KJYjM8^%2pqXF!_aY<8-(QWh&J7lV|`cpqYECtR$#mR+S12br|Z z=4sQS=QK=%t8dI1XSG8GB^=6I1x=Am){5{`l2m-4#ohFkrS=VC=y0ED=zZqUJlAj- zq0loomMvqBAjxqpTV_zCC)M)y+R8NoCiyz^$;-&xrFJ>JmtH`L!Pzs}HALfvyj45ftY|G=qiEHk}hf9W;NvivRF4xw3{#9pcO3r#s8;TfBa<_X$diY{V#FkrMJ-_=b&)_UVr$N|CsM<>5+Lq za>TFv6h4Vh-5*DOLL$8=VT@h~KQ5Hbr5Z0EQB}|>jzJ&d@FqIXbgN?fkF`Th$3x}6 zB%Qmn zK^c|`{ws2lcP>fAAZM+!HK3cMaum|Hzwn~;y+8Hvo$^yZGBk&^x`blX&W>GTQ`UmT zP4eDu|3p}qS~0_Q*=4pr?1H=2So%?F+bP(ygsUZ2Nz&C!gyu6QH=Miv`ujq*(?Y`e zXqWj{yJYiDw6p|qQ8x|Efw>@`sd?^&{1iq05R0yV!*KqVrlp2##JlQcoc#jxXE-kG;E$Z!dFGG zgy@7NB<@#HsM%mM_mR6+tPj%2$B*0WoU*IV2;=k*-p2v}gX?^73}+$YZk&PV%j)Uj z#I|v8n8d2@5EmA<cXQ05=ov>xScl2vpc9mUD!}qwz zNT}>K2I^cl`==osO^;hJKY)EMEp5FED!7rKpa#Igx^>-ZSvo9?)0f6xui1t3Ru!|C zEcQV{R-)dj$sm5AtD~(=WxM8}gAtuK0y*oLHKl9TtXN*qS?swmpPhOM2G;VYCTTN` znJl-;dmIaJT-C4Me(fF`O@J9~>D`lRi9v`A<7AHB!VcOsY;P75fp3O?!0w&SqX)sF zv7rIib$luB)57|ema}3}gEEtc5a0;$WMN@}(~2NTe6fl0!#TpW2!(aRng*f<*c4_F zCH*o$`PjuV={S{#1O#LhtR^%zK2QBV7RIeykCn{&=iXOs=pdwgp-g^-19L{>!>Q-S zxfjiCZD~%2wM5nQqdJbOwq!p<#a)$2*oC@&SW7`Zr>#%UVHp?yMCu3A6z&6^=|^x# z1D*@*=4mEN%0nFBRUV@1)@=tjMC!l+h%4vV$(Aq5kyMp2Gi0}h3~BSRoR5XJVXr6S z3E~5ahN9((U#u>*9aoYmS$5lQWt~!V+|;P25pqxMAaeb3)G-puNrEM_<2b9P24KN` z*m;BeG4E3HPp9RwbhIoI@ol;ABU{qi64wTPZebW^dE*TFh6o6JJ>J;e4R+0fN0Bwn zFa-44Mi)^Zbm#WQ)$oq`;R~qBgtsUq-|Lwd?5RASUx$a~7ed;OB{#tJOZNj3Lr_1w zgp0C?wT`4v5Kb$I_xIc+VR_bdm*gkJTSk$@4!bGT686FA!eBQ#I5=2YrS#A@r$WO8 zAq0dB3OSUgo0f*!+&~Z>`0YH%0&0aOiF3C-J?rdjkT-_X+w{C7@olObTA;%PK{Tjf z(`%#YuPKMeqlp6d2vJ|xG%GHBA zgzMj66ws3s@7^%U!Z{-or!RU2XQyM|+{XJVaVZCFOvJ9f6AJ7e`n8iz8G_O^C9n&Z zw}Ul;WaN6yJnBC3k+~tvCyvMn*%vFV6If#LJRo`PK-G(#y z#u>2s!!e3TU9#r~%NNo$4QpS8IEhlK*t}7r81OxEZTyKAJiXjY+g2^rIYRf}G9|<( zCMj7dbX;__NvF_s6xZYs2jWC6y<-D9>;_?BpUC|aRYh*7hNs!VLV#93tf9??11vBN z!D<^zIF+Y)RMpilu_U)09lg8jkVm?_Pb6Bh8keUEfKY*>HBCl!7>6*#CQ^5jVfZMw zhu2X-lP^L`JmNTCm5j&iM=%~AKRtKs=7+e{+YYd;WE+_X=;JZg8-W~nzbkCC_tm;I z6g1!WPwbhvj_MT8M!p!v!M(_FDe`61`3=k@ng+@F??eL8fnVo!RneB2ru12{h|9KS zRUjlD;G5~6aFG%{ouDCcc}NGou;@#woWU#=CixD7P5Ng14D0mL5NMjh+lMC-Ewpjz zN4#P3kXm*3#yd;Ph=}N@?u|!U0*C$(LD`@O7bT<*9>hVV?(@+k@R%X2G&SuJwEi+N z4ezQc#Z~#4njVe7onHT^?_dz(XX}HWqhU8W?S?8&;Xyj1a*!}{;oQ%5@4HZ&u~r&B zX+|IEbX*K>)!F4AAw{zFL4|j1Okt|~F$k_FPMbe6jx{9gFW^o(%ut?}mxpc+w^K03 zAfUqhrs{r)Hdd>yzt-Ra4FK1viNMzLsv{qt_d=Z-kF7j24o8fIB|E-REM90QLKAZ^ zoj^_rU26Fshi&C7ERQ`i;3HD!c90lp%Y=A81U&OVoMv*;VgJ;ou4m>e7ZInZo|>^% z5T9*vywmC|!f`AUcj)4$tJ1eG;j=>@6O`NF&C!d=_=d*AM~+k={%3W?3g-?|_bPr0 z!+O6mnzF*Ft}Bxj$2niX_aU4e6A{5JaNfZo6e?#)0VV+v7b{@W`)krop;mt z$L3zUww$>fcMmL{)UW=@@-78R=i(*DizQ@nRxY27Iklnl~7EvX`1gTov2`n+kdZq`{ap1m>f zDu-g1D3@?|whf${H2u;ma2vO-Qs$t?4xrol zM(-0ckq`@ves&ENnujT6_V$G$Xr?Rp~h}q+?e=v%KNi+=_uu3MouzTB7y` z&K~yu1~o3%mK5c>$DTSZO3Fb3riENn!`Hq{_S_Q8$c6U-YAZ+#Ql{;LWo#l{13B9` z_nsG`E}NHq<{;IQ4OcM&=P)~_y{D^Ny8`107z2K{ z+lt-kg^-fw(!hDI+7fI`lGMc)tv0^8>q|;%p#>}hB=i13z+jDJU5)d<(s=Rj!41*fiJCTify+y ze3^E;V^*-MGrd=wxjqZ(^FLPL|{^$9su4)4PGnc*`Lnws8p z$qjiO&gurdxcfEc6p$&gf6$;_1%|={>h9jMU{i z%rAEoz*9Ro3XqP}s7$9q3 zaGAK8-m#RnxB1m#u#$!r^URba-*Aq4O3lY3ib{*Sf=@4DlbNrPKrUIy}0q zzgD{z9IL3Z>Npgt0t>+FE22?={9R}0+zsb27+9@V3I_Vj*KhSoSd7a+E1sCfyuC)?ak{idiZ{`}hES7(G*ueino7;2UTEENrdZ%A|-)|y+* z5{jYf-i+o<;}+6y=b|o9eyv1Qv|Vp1^q}<-9TU?DOA432!}2S@qlSeI%%ij+X*zUq zD+j@m8%6?Q^jx;lp_u`8i`DqKl9JMw{(j#13A8(MJGy!Gqx879(O_Li9qQkm<^rd= z0iWYQZ331DM<*wIX_^eU*u_Dm`fN${;9%}5*?0_TID}i>&^Fl@w%uyhE&pXdXd){^2_I{k#b7g&KV?&F~^v!7nCl=@Kg0LI;*T}LdmCQtF!{3^_2rCYx@9XcUw>RtY3nUfI% zPzC)pTS(x(lLW}g3L2ZL~X~TLENMlScV-jH&FdPenHxy_MbH%9TWM>bW+)}Zk$v{`h zr3Sl+)Vh2Qi>vxf$unr6A*1HnlK7}^eRj~eaRcv*pMe~3mc3Q z*OpW6m1d8;>F)G`+1n**WJdVlXogJIXHzm#_N+{n7#Of4FEh>%RXoP!vLU=o&(Nb% zGSHfIRPXgwU84&`5m%#BIjYgb1Y~ud@rU6b8P2&X9?~HPtoY96)%Wo<<;sZi`h9Eo z-co-GzYxC`FUyh6m$7k;d2WXPMmhc#v;PA4x zDpa)=GJTrkb;gxI_h!sE9g(!aMV*k4FvP3v6v?lwb9ftbZuA$LvYrcN`4NAT>ZzM< zD!{X;pG}v*q1;~+R{|GQ=!F0h8FeU4S+wM(M`ZG06Z}oWxtspz(h_j=>K*^HQi7vc zw}{??$J>1&+NgtHMcb*aHn(~OU)&;G5_(B|)*0cOBwPfCnUsF#idjn@HqT{RR8sJd ze^@yG*W)tT9%%G|4d$wkSp3qXIh==mQf_P~ub5l&n~n+&#l zVyRS%kpguNR@(FkI2CdTj41qWaL;Cgl>M=*3z4^YC&iQ11cdbNg3ntbU+&{Y@J}5} zew7O%XKiDX*2TEoN(-jM?A8B=pdbDr@cjoWT%=!3|7Jyi8(tm4h4J~K!Kl&U5muTF z)PDnRiN7$>!G~@fhqHAAF~9=E z06ea4k*@IsUaInp7KFI9_l^QJ4$JhR?iXhRfeERX)5b^eBh_lwq0BCdKE-uO2@^eOsBK9NY}3B27Ouv>UpeIQaRh9{A$MKGF-D+1yi90sq zAH~xD(#8(U$0LU_ItAsZ$qqbtoi~;*Di1&&uB)qqyRn`G61v14ZHs%a-VMCE5MGsy zLz;E$D)9UhljNTE*h38G1N?88@9{Av8E)_RqT=^Ks_UlaR;0-BE?s*%Wq>YVew{r z9j%b|1FeAUtgMh(U9qSg+qWOM_-WaOD==CGR!mG6j&HUk5S{EDuw<9;bgNX?=zRa@Etlsylzkn;SxXEv7+WHw?0;*f@N+>Qep(^mf z923|}e-lGd%0{PJehGT1gmOhS&2H4-f;hl1Nu4q)kE&fmoY{JwLoAw)+HPsvGu+QcT zsmpU=4$me%XnT~p%$0kM`3;VHdT69~Jqo)(=ZO&}HvyQI$I3(%ni^jF%Ip7#F#QS{Ncs?;;(m!{hTo;YaD`izy^eM)V5Q zpv-h_JitO5W}zM}p?P_=`Rd6@-H0#OdEmvCyJ21Hibymo^>`q`&^oTB_H%NQK|Q3v zH-HPCF2n|nnaPH_`1X8ybD<6BOgo-Dy!7%vej;L3){YfcFIrDMH?X(c2FzyEO7^7A z0fP3h9kZG&n(XjO`Ad4rzJ9f}&kqZOde4w5h^9%oAHjC$UwvMav`pttSy7<` zoOUy9x{Iw{cRpp$@qfriSO1ca_IUazh17j*&k@`i2+7~z3U!$jajNMiUdxJWSEUV; zI_>mnOP%tJ;bMm8>Du9|YMX!&K#E=4i+l7EIXo$*eMGOT)jtt^VVK&d^;Ol>glS$_ zRE4eU+C?2;Fmb%^>iQg(i8K4AkHU~qf1@6v zhltrLC+mpFB|jM)gu7j5`{0gojGefgLU;IljEuXho7*gQ9dZC}CUUO9zT=Ci4H$^v z&~+HHX%k4CNakVhq>x~I4XaU9A_8@fDx!{GVxBZjcGV}6wg zbJO!0T$X~x&d`rT7Xg_EtygJiJ6MWaFx*I#5EpkEYUFmQ@;8^$Z=$BUo}j@MUIl&` z?AUFNZJ0R(MVO+$C%6**&X9-zuDBW`w(%{}Zro^A?3Sm&g%(cL=+ySqHK#UfZ5Ys# zV<%q~v;BCOh?w6aHf`^}KfEPrCw#9z=VF+A(1B;yd7S-#nqfW(_7y(W4{gLvl(*L_ zjHKt2(bd`1WRcU}4gW^Zj#-aGBbi{caOO(~55+R+!FEH$etUCqn}=CI=L8}J&iKiU ztyc83eoVgHcS4{E5aZ~a5fp$<=lJ!$^=c6$uEfQY->X8zi?(0M^Bq7=6J|I-DqGZL z=|FmHYi&K}ZK^0dQ$$-E?Ish42?`qocpuShj+6tY<5{aMJZ|uuI2M@L9XMy^2C~Sz z%?CMxmD^ro>JGFj;*2=Uud&ifVfg^9Ikl2@g>oX`0YEr2`lWCk`2wNP=(J=Tyh2;n z`Ji5wdxsnI)@TgozuewJ2pm(-n4Hd@HHNV|tq$Z2JByCz!Ss0_L z0qR6Y0J_x)%bbH(egg8#9;zZbF*&F^G&&097p`o4P{>fgN(=|q!u(vQD&K0+;G-NF zrbR~4&;x*dObw!)3_Fc!Df3p(4eAF)sxraOLDvZ?b-19}JVnq`IkhO@n?`(Bt9Ctag)0=1V`H5UwqkpOmBTC$jk&{kecU;A zBh_`F`puebjm}1T*AcnvSmss%+64O0)_Z=cb zA-Jp%;fbrs$<56*_N6wj@3R%#UM7U4`l0m!X3e*gVAY0M>@zucU3Ob@I7E0UHF|(4 zO1gJJt*SgTrjwg3=(k0e)OdgV!%Gf?ur0GWxq887-mA!d`@Q8Q^wv5$qA^Q`5fvFa z2jO4k6+BoD=)EorUW{Y|gJbHx!CaD%PFBwRkxojNq?2Wn8pRtom9C;@v&%u#C`~MJ zlf`kYnaB`Af8HSg9>M(%hH-&gSvc z+He_mb+G+fuRXiZ<^Nd6-=bBwXp;u)zEs@SzZdn70=?5z<{OlQ2tw{rjzkp-3=S@h zWoW?3QABNPao2pCg&-RsS8L}lN&v+J|Kr@{^n;ChyYE?@J7=F~Z1HpO+c(Q? zUl7I_4?E5Yr4yr;X7uR}duE-swBMCTAc4?;T3`oAGWUJ-kl~}ZN;<4`V7`z=m3XiJ zrH6=oa`V(U=$$ZDl|bh!ML=wf_C@4($YA%gvlCzrsNV%MWcx^LOpJ+<5xwHQ4{E!W zVbiAXG)JpM!0)F@{2^=ZBb@#{!FwNxg_Qo~1yDcwxKGo`348?%U|T!V$?A_`0z0>F z*Y4Wb;1rUk(4B$QwE2Cao;yUV5ropM{1gmpK@~GM69*=yA?ov<=vv*}1m)w2-Y~eo z40_b7RuE{Knwk;EODs+p8j>#@&!LRq@Xo-k$zjk>TGqJKa>e?$>~Q6G$B)N)ju7$b z{t4u{j%Ri)#iyY_TFQz=0c>shv_(RjQ&dbjh=}!2Yr5;pRl`6~(7AK|NEQ?O=!;YL zYcHs~@C;Mh)G~YC-X;Rkj3f^nGhB7u#hjY7mXlhlKhln3Tl z5VdHcMcRsVF4Z9I2q>MWPRWfLHHNqu>S@IuaowS3^79xJmCq7)Of_}GEsFT~__AKU zL>B8uc8`<8^|ipTozxgLO5t}>8w&!mq$*Nr@G3Aea(GCfln zK92#8eP@&0*!^E#(3dveb7&{mkt>o8ap0>LN55@oYs)9j%>0;4Vbw=kr^@78zD72V zV^gC++1{Ixs&kw-PfDuZ=diY|zRO`P_Vzfvtf*dLjMdTqvfLpR`$z7`x}~rihpTdS zx2n4wgV25n{bKxyrWz9>c;@#Zq3IYhrL3oxKTYr&i_q{{Jf6lY=^bc!3(GG|2CV8n zrb~K9N-?L#5jx-hZp@1cw93UXFCpexfi_A12aS1=Tc?QJN4nF)#dm~%oOFLIEGNh? zirZ%%X<^J^Y%C@{Q^Ro+-z#ykpg0;!#(-DMTY>pt)c;lFG#rY*)6C|`Xqggn2tG;- zo*<+ubjjZDn*Y9L+RTau08$OE0h9GtEvIgo%>qA?LtHhRu5wxcne|^Z6)nBU|9wUx z7xc{(u*nR@q0^c+g5^u0?a@q;5CAf%3M)O}u z$!%k1D!ps;d(laR<|Q5wTERtp>KwXy5=Ju_S&Rfwn=2AHJ~?Zt8<-K^)Un84n*?HNfPdo0 z`g*+jv=q?+G;057AdXE(jvkxH=5STj@L)(|QPnPv(08I(Nq!h}p2fHlWNBZ~`Oe@b zYqS~(R=^9#dkKa^+^||E8-0GS*PndWZ-vkSv*pOhkKc3G4xh}Xz!n2h2{Z?}1P~8d zsd4RD$e82AzcY|G2L&6}I=OeXwHgT;@jnKKhhd!5{t^@ka`#{AgJ+(5W-DKXJ@i#G zpg$8d2EKkpxDT<2#})_8CI84G+=K)JZ7Eq31H`0v9>O4x)s(B|b=SJlri4qVNtz!` z3e=`?-`+sRNkz@{hQGUw4pc9Sev`3{UF-K=dmF%_*xrBEZ{ZP@%-BoOaY{j61UP+B> zR9xI%@Aow|yy66695LK9euO}kx^`2qV1pGXtm9C4)jk?|YHk^nIuQ6ffHc(cd8z^z z2Y@wSv#A(vm4hCC@JQ-PMG8qW%%Y;Ag2F$!3{@|)-lZLwgYxpF7$_Pzo@Mpybpg=N zqSaVn?*%sRXM6>1q#^k5Bktd?E!ZL|x_`g?gzFx`>&$iQACFV8hbdA2#E5OED(=J2 zGPe0@nx2Ls0SxWZduE6JR@spr);*E&O96n&{WM9gY*m@5x2!kww9?q=Otu+xWRBpn z7#!5JUM#I3 zAfB$pQRf`{w#YZgA0Zw%rjQ5mFts=uI*mnZljA|URSCJ~Xqy$U%NI)j_&%+Il(p+D zFy!_fc-5&c2ZT7i6N=#x%1%Qe%b{!|JpB&-fwgQ{bI1XN56Zggd8;i*bV!_4eUPp> z(X&~7A;C>!9Z#ARFo1Z z-U*IxRsUFql(%3#Bz-SL&pqp$Hc#lfRVy6G?`_4ZFigc)#bu**F^k-d_K`R zG&iX&cXV1$e_u}f6WcwvmUN*&YjLU5t>}hlhf5OC{^>v`#&yAT6@nXkut?Rk?c0nOYGqTIXt|)e#@(Kf7L+U;OBg*N0J0Jk+ zbhUN1Qj+G5-(UMCrhC@2S8Kyqp%KU|3cF3h(5oMnd*d#$wDcW&J9Nu&l9<-bw?$hf z6E+UQ^P>bI4PraO+lhzj8TSLM#>GIUkdQY9^6uqRSQzc0%1rw#8HwOHG|Ql!cJshe z4_qf`8#s0P$bE+64Jas$gLF|E5!0J7@rC3p!`6ohJ2Yh}nf|;|?laH~5Q`hPR)KPk z>vP+n`ja7v3m7`h#hg{Ujm{S`hV|Ln#Re`}Dvmy#hAc`18~w5Pr2D7X#>W zdZc}?53}B_wmj$Rl=s6mD`aKcLDxyEa4O>6i0aXKL}-ffSk^89Fx#P$oC z6$AMlxpl<62+mYkb)GmMf`n6O`m}=~VLAy$BlNt*qMW{d1mlPDGxTTqnjN%$(&h>X zD35IME5nIsw6j3JwR^RwQd}`ICQY=gqEXfO^!p|306DN|h4~g4!7uM4E~lO?O8X@t zzaKUPs?ujPAg#gam)W!J%6K6A(6Kvv8x8Kz*Gxb$Ku5{r%a@B1sqypMMNVb0>c9s_%1-Ex~x<^LV{qFip}vn&fz*_85#^Sj8z( z!GAXrbT2SO{(Pu^-mlCa1axfAlowV3l&g-sro$=F&hmxX%g~Z_LKYZj*mtMIU)YKb zk2yn0lA)4S>8qFfl_75u@{^Fo`HDRD9el=NvmV?dD)`g(uW#lmswo zQ<{D#Qz9{P=_p5iYipjooC;x#1m@$g}Yh5YWPR0ojzWZU!HBi zNy4sTJuvd->(_MBV)*M&{h7PthFNb(C#(OhNPrmI_Sc-4&Co zo5gZ=h97aw?KGve5_G=2!XT^CK|U2f-{$aRsHwBSslj{%^b(UQfW5cTn3j0BZ%Cd; zuFeTs081cJ?y@7)WDr*jb#8qJr$bKfIdpYZBgN(&7f%O*Ki3N;4x zmaXd;Rtb9GEd61k(0*qj!_@uw7n?$(x6?Q4iACYJUEpGn^L;0=ByN5DXfU?Ap?`v3 zKw!6gqxWCp8HIG9)^F%S@wBBy%TN5rz>3g@hq#akl&!7Mw)6*YH>mX5d;I#266U@? zAqH+(VV>dxR43+vj84J4dq&t$I&IQ51j0XN)tFiAO0*I&%DgUKWWz5Ra-9f3AEP~? zjp*07G^@s2VqOcip^;HN(Ciqlcsb9QvdgNL&62W4-heuTC)0X zJfpZ~zjB4=lCSD*?^saD`8KudR2Z-d|F3l`cUzpcsU#dko!)oR@?t14HciwzCRs~E zIG4tIdxkR?+bJ}m@2!!N05bcTg#4Lk0ZY|HtX#Mi_FUP`>nHYI+N{Hmi#Z@{u#{%Q zmp=}smD}QK`p2AgUf(h>y~G^2-^5KLn`*NNm3P87I$vhoG#u1&5#81o8mzeeWY!Z7 zCK(0DMP7C5Qnxo&Rx10NhwND3VfR196MD+?w&oy zTl)@!tFFZjSWz)?>XCkEWW>-kVJXF?;O1y$HVBUv5m}rTcwc)IcBk?h;lZHd)GHiK zs$Gk!;&-)h9yxXv~gC@ek&+K|EJuZZd2<7!Uh`i6n#nRvL^1>Jy7A6%u#s{f*Rzi8opU|?`2q5cW1vt zsD&&Y7-ZE#G*y{itnc<VKKYG*` zOLpby&NySy7y0z1OeO-U4pJs#RRNFsG%O*vxO!oC1K|b%+R770d z=-^<&?~5#3Z`)g7k;UA{8-b<`XtH!=se-fCi~KP7Tnp$EH|lN zxh8%hqY7`Da?`wDJc1MrMV1oS0bsBiN zlIbtoqZ@q32z(c)>=Fq9ec<%~+$E8`fv6JzL0YM1#lA=}T-BfT8beYBzWMF)06OO} zTu@gRYIL|5bfSp(`@+oA1suwPflU1I9nUw5i7j)L>K?EVojgxGIRvkT)*6pTgr5rS z#<sS>_J>-9| zng|}@tIrqt1O#yY!oNUE5rKs8b+t*CE<<_AEJe`!F6MUX5Wz{_Kf%eYPcHLlgMyL; zfmG#&!fqg}h&e(yaZF~cPiOZ|Kwgd8-IX4#eYEcJ0JREp#0z`etpodA3Od0@heC+R zJcsghJM>PTJyF#HQzsUwwmOuBFlYCbIlAHdxcGK9rSshrstBflUx zAi5VR7ylF7K%6Bx=(%5j!z3DU9GnBM@xgNAY0Y$!$cCB;8UfJQ~Mw zuYCklR4dP*8>9C^Jpfh?^hM2bFG4lLY~{cT^p8^7K$i6N|7Z*L&VTOt3a9l{cSnJR ziix1x5AKtOePA*5NOo7y>?m}(gf&s$%w3m@0UXOkrBrq22o;9KVuF}}S@H3jPoSgW zJ0GH%`6H^HW45V^DHi@r_4dfW0*xGh2O1&9B7=jP2MuH2AKr{N6P>{Xgdgq!5CVCG z>))drPO4kznkua(8n+4(KBE!yx7y#X!j-f4a<`^=Wm@%^;z)Je@Tff;i~D6TLGw1-exf83Z-R?yF>3 zy1aj{KzA(l2A9iBmiLJrV9x#V3ZubhWg1AQHKqvg@IIO( zx9RuqY1lZn+mve93m2XnpeH{zn8w=i zN&?gAme>VR z^?!#HGsjoU#*gfK=>GesZtWzJyg($EKWw@z*%pO>=O1U;gyTtcP~0JBTemD)%bL%4 z(YL*tf#>3J2izDsvL2{xk8787!F4}yx?UJnHF1#OdfH@dQsa6}4@wSY?`MRb7iG%= zH%V%7lcParQY=^C8P$MK|8$G#mGgPsDnS68|H6DGzi{pxcOX#GfUIL!5eYs#aG&(KH^%k{ChP zb5WwX_vwAYL-5FL{l+s6_ftq!eSLt#kY6C+e-P+4f)2MR>so)2_@)Xv$=Zjb#L3dX{mn6y!5|D%7M~MScaIbD*scGP2HDU$>)ZrLG_A6N ztj^`R>l{8yGio-#fl@Uo@tjNI zmzmgM z=HY|K=z4n9DJy{|1$cCAQ&YC*f=6V}^eD=*Amw@tD`hVq{!xtFWcdNBwc@qkK=?${ z&8X?vDp?8FNv^6)1Z{*qT!sKymXEy2Tc@u)FN)#6o8%C=Dy$oH=qhm9=*0aNB*;;Zse+*+y08l)wu2-o(dLgK0i9NE+~Qb%0B0U z&jI(-_-BJZvZ46`bZ5lp(Zx-z_)6w(1>o(=n6p*OH%8uNDj3@pUIp)1V1>3Nw^_dQ zMGkt|iAGlFkj`VL1j3^*M*8__KUV50_j$zJ*B=saeCaY_+4PAs;4@X5BnvG+FDsgR z`_aHN-}ZL+$A_AlLoVZQB$#%mY``49_oshs;EFhoJq1^T=j>PoY8fx3aum*d3udj3 zsfWip5_5&OH-m$B5d#&6M(&-JgHxErec!hhW+(KaM z>q!f-C~$D$`VSe|?5!LO4mM4h+cYISb!5pOKsI=kROmF)rkE_$V&H+AtfH4ZSYK^< zNjTz+A+XjNO=fk=M8s5g$+*4C(Cma_w#ZU`tEC$j?=tIJ*EsyOyBTJ`q0#ht*$S5dbOEQu6zeXFTr?#IZK>(j zIpt&zq|51cd(caFEHh6mU5ln+&DGKw?}!Bx z8m!Ltb0LqF4yKR}BHj+tw7V5At#}GMb|{!O+>Q)?`-Wdeq=7FWYU0I8Lqn79xqA6+ zIni^aOJZWjerEF(eH9j8c%qi?r=z}q{Lx|7XC1VQHboVMMVn$;^egjn5}*=Pd`GS{ zOk$5D|MF4tT5O#sI4)TQ$C};q+<{ZTm)ub$b?+$n3&`kTbP6)sAg#@`;g9)c&=QSl zN9+%KHfqtb^Z#FP1aM0(6{8>*l zNyoJ;5atxeBm~UfVNpR2gQdcKymGHd^J3@*8F#WqJ-waARxAJSSR=|O3J$vc62j;; zt;fR3o-bP6(ALr6QH)xR(P==0ijT zQ&XH-G0XG_5|me96I?HO2Xj`-JtK`7+@IoagfHCcb26^#J@Mel6aE;rr^6>2>!Aly zJ}B^2`N}20)UlxF;-QP4@0})7`9X*LCr?yIjdX5gkn@c1zeA4J)P>qyU47$@`%f}Y zNnU=_JBG6x4lR*dU2n)Cdm~N#bi?O@aD*BVy=J}*jI`2HIU*1T2>AHk=l4(vofJv| zjZH&LjV!`L_P#mZ!QbKtt0+IO>iE;Ajzc(IzSaP9HadM(6lN~!`=2)&rZKbdDA!{)7(pkZoJfim7LK4P z6FU1L`f;YP##RL5K$z#cB0IX2LOdLh?wApP5UpwMMcG)XXQ#N5kqy&ySB<~(|7)ob z#^WI&Q%buEe%q?Z{!m7%<;nGDo-;MT2Yl7Y5_z2jS5- zn?z1CV3)@))K_zm0UHW-KeHrym2&U!6vPiBP`MCusGxYox4Hv$j($1DaV&|2n7A@S zkkrWhd;X=^)!GXE5riR&{X?7khrIWn%pReh_H4a-K0=G4-bX0I%8F_V8FDAR{u=-d B7h?bb literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/auto_parallel_mm_1.PNG b/model/train/yoco_moe/sources/images/auto_parallel_mm_1.PNG new file mode 100644 index 0000000000000000000000000000000000000000..22cf6cc4b3fc0ca6fd648c0f7e5e1edb83c48eff GIT binary patch literal 106292 zcmeFae|XgOweLGgfG9|_QM=RDnzYs*N8KgcZ7W5Fq`Q{7tw-?ITFyPi#BPhW+iH}k zWJod-v}!H2QPifbkkr~1t-FHQ3P_R(f<}mvDzrd>OhO1_AdpEWKW38oan|eootfa- z?tAWi?sNaS&*?K~A4xJZ-|y$MK5M<#d%f5CK6c~6YiCaX)O452HS@ZE`RX@Zu4()1 zzyEMXw*B9)p0jD1E30ARy08B8H!HJFc4a+r;dQGoyzcB(zyHP7Yrp!{+i$=9TQ}SP zmh-={cmBWl-+g@a;>CQ+?YDpJYhV50)@Oe)z2m>$zIgfl%a&b{tH&UO82=z`5B zUz?Ztpf7DYwCr=q@Sz6|uFZVVns48|<;moC_*%YBdxk4&oyQ7EhA;U*ZYUBzbKvu z%&M#0vAr-6S~vgX_ucF4-GTbyx5{I0#V)C|j~M;qy7`X>hri!A`hELZ`8yTk-Gzzn zMH7Eq)Z?jc-{idRam>>92R>sUwtFD9${$(PbmQvI$m4;ZC%?x#|DkWpmmJ&D|NTW( zf1BqI&r2RDN?cI%^l#>Pd*&PqbUz-5Jbuw&5tmZ=P zD=}1*T(*39eSX)X-muJI_7*TwZU=)qKT2{*v=r6Ho4k-N$=V~bZxXgc6@z8uj1&=MaB0YKi9==PYf0&2RHd9 zHu<=}aG1R^=XUTC?~zMV?j%{h{LsB^H#Ls$-RD`mFUoa4T0H!mdmSd&ICN>_GFu}3 z>Q{N=pDC@yT9R)R#ojpQl7&ryiRQqIH?3of4&T4;mZlpI9z0ky@{OW5WoJoNur$$I zmf#va1;LI+jP(fb+;(15R`OdViJp?g-gU(z`(L@O>3c2x?JfN;E~+^`ucc?+GJiZC zXN$VM1EY5jjIQ&IuPYl~P&WL}g~R{cbZgn@(y~$Bc~R++i%O6Gw)DpnHSb?ib9RU& z!h8xG^SPKKbCzq7XV*=&bx2B7_ws8(xxUb&HTxfpoB#Vq*CE^F51loSw`F3he`2w>cQNmr6~U=ls5DQ< z_xO_&{v?-lXyDRCp6MZux4ZSFmg6tAnB8ApwBL^Yy7-%Av8neYVxF#V`$xWQx9EEc z7m-)}PFwXmW?h^6uWvbez4ln~`K82+=CFCbfyAi+@18j|d*(IonWufUKd!9Ji~l*V zxpMG`9nD9p|D0R%cSWtya1gI{b_92fb+^_fBXvIRzi97y&F`JZyZ@;kAJ%Lpy>eB4 zQH%NZD=aYBH+zS#^$j0+bJZma*Hms=UNxto%}l0_^JhG}eDwD62m0Q>$Q2??}cv{6}r0|6=b?K{wy4xQ%K) z3%}zVy2LkR%ksNJ1KP7*vv-~OfpBqMepf6S)}N?wC`-^QzQTM~I_x|kL zn*(=LzjcRu#M~RpP;b`K>v1Or>*Pxxo!|eR!pO?Pw`Xkd`;&x`rdt(<%?v&sF>m{o zhBi)(nQmThmYGe<)W{DPMW&Q)SnM-{7<#JFE;sg;&iB{X4}TqZS6RR=pHbbF>-|j8 zgLooyA%)|FVMR%ctsd8}ED9uV@J8!>pLDeh+hXIDIG)F~8vQJPiLG@XZYjEGbfhdf zVt^+Z!V9+w)fA7ssPmvj-^o!ktP-*FBG+g?J=axi>omZX>PI(p9=)N|wrXv=c@5`P z%K|heqm4<7$l^qax#_VgyR{KPF#lMU-|Y5Yzx-DHa!=6{h3%$82f2H}c z<;SKHa39!r%kr#w^X8QdUsH15o*U)>^%9+(o$IQ1t;6fi4}|9%kn%OQ>ta%_a+kiVczt0`^jsYHbwVUnm4tWTzu}A42M+Nb>)-|T~s#o4U2kO z4hvfKD_*QWQZ|G++dH#sH+i1j0&&M3fG zFIm(O``h}sUcFw7=8dJq*tc>N@$gVtwgqr=ZIWIzkQv-;?LE}S-*2zK^Qw-ADX^C#2vaz*x50F%j(V# z9`F2syET(@?65W*?C|YcS3R8=JHxip-OBMRZK@o6~`aNDmx1AP{|h{p_sz*|!|o zhzm|URT4et)Y!s-;e{Lo0X|nK(pI9d#a1OjS}XNS;(y`9D}@uUmP}lEO5WLUM%f^4 zzdZie^0;GYwn5mE?{w$Yl`*&6C#E#LXA$y5%c&E>3q`HZHXnHQ-jah(LOasf-{y<8 z;UXRj4yJwd$1 zaI0Paq9Y!CJx(~MFnmQ}#qqtDo*Lty*yC9)ob#@5#1O=r{P_wjwPT{GV}fWFkJrfF zg_tW2n5C4sHg&Ez!TsiddYAjgmgktI=eCR(^Z;G0WSPx{c<&(EnL$2oJN);jHZ?cL zxjBQksrxQFK@h}yJscb~Fa1n#Fz8DJ-RnpWa#BaDg=HgW#9|i7NJf_V3FBJ~0mM^ zTln^sm>uvqxf%!+ClfHFj!QVx66TV#UEegW!oTa`8Tpk9S^an7MB&GRN^)EIyqaz1 zoGb!Zde)ZH`iD)O(_`e<^>I6;Irp6S!=?oP++YW1#)w%%pXH0&%f{|6%a8oJX#CfC ziLShaees`#m^T(p{8JanfSrhMo*DxW*}pF9=)293c9BK!YM>vl!8Hrle2G06VJQMf zZ|Iy-X^{4LPQkIc^X+ZTCto|bxu2t98}1v!X~}z$&uN~ ztCw}$R#kN#m-o4XF3hX)IVHz@9A}`Wzs>s(E;7XU6Sf~D8skPtyiewP^x+leE%p zShK6kf~a24=0?nYTsV8eP|i|^rW!9A#03h1#f4h=oE8%0H%t59tXwEeNuI-%#QwEt z^j{ARTw!JnNWjFa*~#=-s6M{;MFWWe-{W>+Mh=uGlwE#z{pi1~A3g1lF6|S7a<{T+#)_#WpIg{KH3zK0iAKFk8H*tfi9ALt`jOMO#u z?@gJd&a=ED%Dqo3CKbi+0dJm|IK1M-A6f|m)u8#JRuJT}1KbNAnQaiJ^M}5jSPUu1 zIU!OPWxmF!)=hCe^U}-Xi$m(YNpduOS#h)=B_{FLws$(Xy2 zQ`=-`p;k7uku?c!bbH&t!5(~vNP(8)WyIOfIiJtb>%BR@CdK*805 z^!(Wq+^KvOC)7-9`BZi`uE-1g^s*5v4bIz|H@>~p^6o&KZPsy}5h8fy7nL4OB|(?T zYI0&0&OaOC)Bv$|SBz^9I9W2A+%MgKd@tmpd1TVfA-ieu8Qotnn0nw~x<1Ri4nappQM z9Fq?O2rcV)(aAb4@dkE;IwV*?6*-+@u z!Bu2Q7O8y}4`+*_Beb-xYTm!p}Fn#J5Bm zUSd~Jc;U#O$|k;;GU&#W09~To7^h%MKZ&D&7=IqEZ!#+5N{1n2U(GVA0Wl=Zdf`o{ zfG2<#4i5yFcxT3lko1IcnWDVpHAOuV%wyMgM%M;@kqcq5iO@+FGz`vOGD@!EUS~I$ zB~#0BN4Dh&U7T8O&d?5TVRtmD-KYglD1}blOj$49mINrf74A01B153m7}4@)a8-Ey zAWT}*jYMw2!hFR!T~2=g3?m!xokuZ-TEJcn5rFTR1}aqiCv^$CfQ2naE&O~!#~Ba0 z%x@fOVGNIl>+`&5sn&;1Vs8}QJFa_J5P~x!9!-g1h-oI}A%{u19=znPj~nLbI)C}0 zLD2BzX^GY=2178oB5;;$R+4`@d<#~+;_$wj!vy9jB1MB!BRaFhJmYHLT8gt7u*6Z^ z$NudFqbz^Z+ZP)91F^_mnn4{01t<$1?%U&D$M=xZ-(R4}=k3n-c0W_v_l(ghZ!Q&) z#PSx6Ei1xNo{Q@ofj+9S!{26mIvA@qvz_N62G0z!)hWPQ^3F{SLr;@YDOtS5k|-Qw zIsrYq=$8sL?7K_sH6Aax5v~?U#3mcUYd%yLd%I4@o!y4FF&6Lt&L^+1B=}jDV(z_1 zcEDxB1uhYzO~8u5PaYZoJ+H(XjuDr_Ztux`-kGA_hbTjcs+y>ALb849xhcC093x`X z$Km9RPs?qzWDf`KgwqALAH1sQ=fBC^U*ddx>iI6YZTWeNVP{5+gWD_mU~UcW%F?wh z0URA6km`P;Ctq^-`|2Ehbb2np0M4mch!$}zTE zC1Krc@SKnI0bzIV-i>3+o;))LI9*F@kIwJxKEHAsS9$chp*~E+x zNf7zR$6S_(tj~#Ap7>X?wCj#|{>1_SAzyARRk;jU%6gsIi* zck{?!zwIYo!N^O^2i6DgGf(Xj^qV-j$-jS9^Cv<-Z*w0bEv$78Q+?%Q^$~+S!_Cd} zOJ4vwa^f%6!K>*qUlqa^c*#rhNGkaHJ+jYJCt7BA^!?1j0wBl2xl-1#3C?X|L`=0I zFLSXm*e&~gw(fcD%1y)NW3NRj;MK;);&VK-W}q{#O2h*D)Jyp77x@$j5?BfIu$6Wp_Jd3{O~ zX+qInzDOw#xk4BJs@x`AZ+Bo=({EIS&UUiWDRfjD#o&J+mBkAbazJH{q`65W96m2k z4B7XK{4bxKr@dB%=g5x7pkaCzjFL7V8X&lqo$4+9X>b@T$$VYx^7*muJh{OFaRlt` znXYt&F2cTu*zf4b5c^`^R~mLNsWCq3w$cp*b9rvERGfi%m&+3X2n)>}EpS>Vns%R% z<*}=SE|UBec4r*mBjj6up9YJA;@#4oXuPwtAcV6fuIc4glyVWVMR&MMaNN6cD6TlA zB3Ecyx%0ZmU_j{$kayV>Nt2k!*yw02RUSODUO|U^tt9x%S45*2*Qv2I{hCcY*Q>&x zj-JmD@#WA;jPUh$KXTsY0GWgWd?w91YFR#!-E1ZH24|q_pAI&;fb20P2@-IOlpVnb zjE;?YY7Gy491NS=(56(|JWEbTAK?dZXvu5dgt>)^Ef+*!KVd=6gRUH@@2xki`SvbqbgMlN!Bu@u}K&-4TV3Ci!6;Ez_gFj1rLrH)O z(7HTS7`ye>JMIXI%s+Lq^QS5#a|cal*qS7c-Cr8KAG~Tv=Njk>~Omswg=+kNEOROZBek`sut=F6fhj z&4X{9=PAHV=Mb4MNM6jnE>P~!YDgw4W|uwxo}S0&47pjBX_dxJEND=fU(rWg!bY7) za|Iq9WDnV84mo2-4n;mEQ4%dgL`dR*yYTgZv}Jz$cW^31X6hmrN1+=r=x$H7m9+k` zsFgSzUXw8=0Z`lCYa@9>P-#|zU*BF~oZa|uGS!cYaRRtLWiFLVk=WO-usTbc^9=@tPOzB@4mt*j)#5RdQidsnv zAiSWxNdX($a4bHmkg&x$t-#fLjVi86QN_wP$xk?vE%)`E?P}UN$dd4}>Dm7zm!h!(vnYk?Zn zMehI5H*g5JMwO0QuW#CMy)3As12;p6@mCIBFgB4X5KfI4BEGQ;w`kl);=y*{xyQ~H zP%}kZ`Bqz4zM_@%RpnNE4EJ~LXN>9H@aX)X1cyZkS&|#!`(gjz4o7T{zERMXDSkdU z-zf1cDsiVkLzcpQOTgB3ox6^GcdsKJ;6!(1uzgyvu)@+23(_rbT;7x&g353^G}hb$ z(+D~8LDYUv_V?d&zMZ8u3W@ zuslf-@TOzNEcM7Mk~?RC?gZ4W|d!eh6ZonMx268xL4OcnQ z{=4vSUW?nT6-eV_4T3=PVLOcPTI`CTHzC@M!&;mgBU2-7Xj_W;j~u-%jXOef_R5+PBUHD+JuK;c#qLg^YtHc(HWg#Vc#9N>a`=~gM<&Hg@?TZ!*sVVZ6= z-+HxS6uE6OQf=3S8E)PLJX{z2R|XHR1nwC~S@FDOhWn=K!`eGZ#W=s@GAE(Wl+;wd zyRZ6&F07+cM2?gicLCO zfVN@Mv-XE9JIKS|0Q3p(C`TrA1vkZn}f8e=l}YY$i( zTZBIzmlp_v?Kyfs@$ICRo`av=?mvp@dKRjRjVZQTQdAtJfMposBHZY~lp9qM#FRhn z4d(WOg9IISDxEzC-S8u-vzqY_y|C83GhSwvmgf zV|$+S8n50DM{C}CT{gjB7@AUO&`EmW6IEq^)4lukzKdOvN6I6QaIZw^Wqt4eR@#MV z)K=xb22o{@k&YW_)upYW79d!d+Ky@wG9Z{%P63LPGsddW+QHd4rkQpwj%gB~IF4sH zrr89O$u#V@31a?Z~H59l();N4~-H`{2y0Q{RyZ#*@EUaLl!<|$Ba=2^2)X#lVaCa zrLl<8anoU@OKMz+@nyO8=H_!*0y|rH?%ug^(igt1%rGRDNMMdqmdNTdcP$)Ds-akk zJqJQF14I*tX9rzt=Yqp&z1JJzur6#J*%N^Hj#Tp0F{Wq)C^DjbW+jCMLRnuS{ zwjw=2W5-g#EnA?(7v)ES7e1E1&8V{FkSnHJSj$BZWmglYp|%d0R77s4g4F-Tir{X{ z#kbRV5*9ZBy^|uIe4lV3tM@quzkq#&Qou^aO#%AjCKuv{wy>40!&ktmBrM0Sa+g>U zId;kBd%EAi1c#ss%sHQKoD=gcO;lw}1s>b)ug#ukauezB>;ldRWe{bBdAPks8y8tR zA_mppNS5*IL#dmTc^$#^sOuJZDySt0K%ueqG=JIRi4SGn(v{=OkBH*jxchEu8|uT1hDKKxCwB zZ(x+IYvrTDfgsuDT@|9dOq*kJkM!-uEIXQQAEj!FvT4!<88S5%v(%@PUrF=CpNN@9j z^xmzah4ZyaP{LO7ahz{ja&U@Fl01~E20U?;D5*qCDx^1jFW5c7G30X5ai#oTpfciI zK{L_I(0fYKBx;GBsKGYv1Z{O$4d@)>0Yg*W>$KdeK|o$CP}RW6Qmc&}{V$V?hQA=2 zPaoP2%BkXWEaHo6Oy12Nsp3vrOz|}i;KzBY(qeg4`~Zwm4RkkfS0oX~hmBlgV7@fR z1=wD2cJiO~K`AZ65DV6HtDPa;@><`cW>WrK4mHX)6ZM#+K6#itSORpzC&?IoRc>8C zw)d9KUJTAeXB|l|TAR6tO)BULnGF8(XqSZf?fVR_4h(a$ocwWwb&`B3Rkw+O-AVb7 zqbx`>(_N5$NVj@`ifIOqU_FE%^Tqf1;(&qJvl48>JSs0x?Zs;~?5WqSTh!>KN&>nH zPL-xxU&CR5-GD@s+7(=A0=+T;faKSSiROr=&#vO#s?8d|cV6Negs%8mQL`+9R)b|x znu*8+t1*y9dT(oLRep~tCzf{^ev?YwM>XylNgvIa8*NAD_p`Su1tV8+Onxx4M*IN_ zP=K_V7-5;|7YfAs1Q33G=!Jz04#>w)#$0AG7g0~o+*@A1@39H4o~2l{=1bfBZ`qmi zEaU>uRC0<^CC^c+blCC+)K@{ikR<|wgVu^^XDg*3%uD8Kdec!5Nu%AinhjLcIJXOK z%Cjv4>pW5eboBi7+vQWCgP#fx0orAsFps*^VFQ~EaNs?vXxIXH#b{& zC?<0t8x~etAU`9gz@?~EY?Ym5!lfxf`aB6dLVOP*A;>A+^_BpqE}6<|Z4GRtvYQ|f z&FPmKdwU9Lyvp9o58t2f>OXlhZp_kjz~k+RP)Kq}@y*E{DeJM4x>Hw&aKx{eptf-w z4qM2W7{cp5+Y10r zn%X(&ig2d=_?3`F3N>EFAx=W_P!1)Z5@%Jijf%-YKmT)~-{PM+xErU*V}gSSTi0S- zJ0C95p~BDx@waI_CZB6se&@xz-I&r$AWOqP)WjZ50oLGlWxOL|F7ReCDfT(^Mi!Tm6 zpGe~({Xr};i1fMH6w#Jr{Vr{ObLV%Txv28hIaOCWF2SnkhHLcMrlck70Bg9U1%wy* z`jL$+yU1~DRqhmD(`wl4X%$yE7k2$+am6r*N#}!rD@Ie99`=v=wJ36y6OfHVymYL} zP;!t<6IpP&<@1Hrr(x(tqXY@S_*pu6CgINuHTD4D5$`^nYfSeJd=_9b0s^F@n#Q3f^>36Z6PK#bsCHxmB zlBKv0tW}^vHoGOjNub{$xF8H7P=*Ctg(X$ip3Zn$%<>gDpmqyc0FG%|HMBM5xfZgO zj-K>-4C{zc8zx=|yW3qfTTKj@8@m&(vsGm|D;s~hz8PRxFMf(J<)n(@xL{vs8Og1* zq-{|5ae5BQ7i1Oqy}8G zF3ls3m(QiVmayo(PkBg6UZ8;4g7BBE=ZC&dOFC^kBOQZmrn?m_Z3}t5{jgFs&+5#L zmvJvb)L;T&jrn`Iq}cU82(K~ifOH8`HlQn__>`jBfKCPu-&~sR!CEDi7NSycwtHRr zuvr_)wb`x%$EI$zrq`A9wOZzzcfX}CfUQ6P#mDO z`CX^~%RXx00c)_$RuAo!^w73iI=0h-*+|KD3)To^Oe0scJo86tv`zg{d?GE9tsCR2 z(_iUVFz+956bBt@f!wrdQ&}o+Po6Y?GKHSn(tmV4aaZXED*?~?kFB`YowIzXWV=a) z&iD^Kx%%^HVORQt9Q>AQ&efkc-72T;Gzz4D?L;iQ>y9i$f2oe%^iR|OOn&-*zg26& zGHr8!jZ-h#W9?v1zF6gZ={( zCWZ43A>)X4jQwBy?Z7nGHs2E@%EwaHqyEB3kyHhHzA#vJ!~k}EY5^`I$C#RE2YtQ! zg&Sf)2(*+^_x0(pUn`SmxNeN*-EStJ?)1D(?dX5n-OxN>F8z`~_xYVqCNJZlI?E9+ z;p7elx((jOje{G(ogVX}*5UlJQ*}nHJRZxUsS09c(~@xZjx zR{vD$gYQqDljQ5lO6|KcKk|Ltbw>KvK0nAhrIdUrv1ne!u}{$?_-w`uZa&Qn;)eVZ z-L{|mGQZwx%icNo&M)pvd{F1SGM733w0HLIUbAId#H22_r@uM*@_~P+c(J_8HvYVi ze$#*c*;~!g7gMUHI>S}1OfN@S_G!y;_f;oA{O3n3ahx{!&1=`LwL1FPln<@r>2C}` zQ{+7Gzyn^BdYu66WG;|z?kvCRS&;saRqWD}9pdG#%(;sXYIy zvo;gyG5=U;hS2%Z|3o{y%zxz_FOv~2>au8dXXfi?I*VEg4*{hVFAu@>zIuSn7}^HD4d zO1TS~CAaMjoJ1B(`U1h?UT5;}#RBh6fRri&>>ggD>LD60R92z&V}Lk%5irJ|`_#1? zdp1URM{-MY**#TljDTyerLJ$y7~wm3aW{!Sh$}MzAMNYA!XWJ1t|N)*@V`%G;OmI^ zzmb@y0zY>(=$7gp`qWBc9sYyPUm-Y1Tm-PY5T#8pI@j>X&ZjQ1t9Ud#p!Qjz79(sQ z&p!@gbtV`TNsDm3^DQGzk3ZR=y$9T)MFBH3y)E(teI3{JPWTBmkhp7qV@ism-iOCo1ZQ$V>Q}Mr*|26{vGy-;l5b^}fK|S# zKINc+MGfvK``Gk7>59Rmq-;MA2Ki5J`)S`!%Hgt{Xw)w6eJlhKL(zlmvFu8l?P2d* zoa+f3<-T--;GD?53G=*fK6S|=+g9(ThHA2b66QpM<|aMnZ96~upQa*c%W56jSC%v~ z{k1(`sOcE1o|C);xC8rSTj^^I4s!=GEbZ_%u9pC%7YF@V;Ha(iL&eN+wn+hJ);C z_Y9FFgbc4Gk5rPu#Hb;Zn%AoWicG?9opzt?=urZVV06-d#;zm5{^EXi#ozOSR^O7szE5;ql7e0j16ojMRKzG{p z!v>)BVgr;VnP+-CY1E35S^N$cAK?>p3=owx-Kdfs<)bC$X6B!Su#L{KO76EMTF;dh zRdryi5w%r}Q_Z#|Y(cwP^h@js0|;yC!64K>U$c9ah+m3iJFn{Kv&&G`G-%o1WwWE2&{8Gf4Q}vfx}*-HTckZpuyD=y`YHKrKr&$9p-N!$;VZoARI*h2yJ5A&brVAqGwd)k$ps?q@$|MZ^Th1*bL+e zDt2lTQ1Vi{eY1+l{<`>oYiNW#&Q)(ua;Bz$1c!l5s{G_A?5h|e!piQwbPDClis0Ji z=z`}Vx*)wUaXW>dr47Y+qdN4hmpbD=;9jlcF^*C-c0EMk?LJ>&dR;{@ zTN8){+2itd!1|6Jc0+uGOMwSw-2x9k6>kDb6C1eC>YU)bU6?MdDZxQJno*`lA0CHd z*77WnQ>@%?o+O8_Q{@@o0klUJ2)phaE0kR$aL187u9xjF)9yEg{9 zAk4kURw-cV^f)ymLwyn)^YJ1}=EC>HGtHe`tTY2t=|}$~A7SzVN;0NDVas;S2yrt` z`@5=!-C6cok2a$+rCc;vu6ZahvlBQO)ns@Jhbrqsl_4e+G*Mmk*>t7r)h4LM6%lb< z_-O~<7$D=!(%MlL%c?<1PNEtT2{`T&ohxWXl{8JVw+S?OM`A2uJxBL;LG_x@#Nv38 zIOthO7|Dw#LgU2-%*P- z&$`zk7vf8UnTGy`HrO_>?Uf;pJ%l-xL1C%6neTB9%Y=KE{-$Z7F3F!cB4sk5?mt zl4mG@0)*V_Si))2czzhcSsoq?BF8$>+<78tr+hHe_Rn1kWRcSJcdFd=67vG~utG1f- z_nz_L?>Fq>iv$qAI!lIZD!y>{Mm!s-SVY`67#Ft;VMMN(8o zxYX*Mf=v3f9(oo$8WW>AIT}4M*;S<3U+=5fKo3wo-P;_99m#JR2^Q!_LFb^5L7bSJ z-Ky5BiuPmNY7$Z-I0z+|{cR|&`DIXzjFf^+7VpP%;qppq~Q-SWQ74pgi!DtjC0lgfXt1dfd$jjvY_nH2{x$3US zV2@0nk~yoQ?i}-hzrU+MXyUY{OGoiENX7^bSC0}-yVI8D>4_Sq4zlSgU7?H{%qAV> z&rz@7UlgR43U-6+nr@^Hrx}Uw8_Ft4rhDe5o!Z>xZ_@heT7iyK#}%#P;$$?_&a zlr(L3@-pVQO=|-8vRvV8Jo`BQM7z?_M-E~{KM3rp3C(R&J39Gny}*w)o$xoNnIDUy zPcK3bfledrrf?58l%5Ah+ukt`8n4#LwUuqHG-D*lT`kQwPG)z7v7f1W>eT&#fJx|& zpf<0e8(6wOJYdJtjIK(pB+}I+9T-3etbDE|L>bOro~$2K`0->(kUL;1N;;q+F`z2W zThoo1F$%Gwu~~(K{|D4q)GVkOe6-@&J70_q+-p7p-3N-Q=jr3HJB;7h(5RLUGWCWw z1xf04V-uib(`M%PWZ9}Fx|tNGvYQ0klT#bqtsHMDT3hQ38fLZC6()&Vr5nsdKg6HfMu2g?IHRYL>J+RE#G$-i=ch4c-{KABp&>V zM4RAUDVb@r4{6Q}KRUm}yT-}=iMs(tB~js%R|%8X=v0R_$}I7@NhNx$+PI6)7X6a+ z)?$325xLE-!uWfI6ZVdFv@8*Mxt zC{uSGY?ZVMjX3_!dckGWYO0FV(>Sb`l(;y%p|3EVB~HVe90w{w=|JW>6CXAzC9*5K zu=E8%Ux`jK_vKlao}`w=HWP6^(b0iQl8@D#HMXGFDF~xImw3X~Zdf3vz?@k-*Q5?- z2G}r)P)%H!xLBcHC&ZZGlqI9wm(!JZmRf9*s}0>Qx8|SsrOby^rjJN>5jZMl&+tPp z=|!ozx>5XmAsNl~c}+QE7L50&=aF%FMYp^@j3`A2#3tZuJT$Cq$pv(Sep$O?5^4p_r`q?aAPGpPjgFJl$;c z^l;|z#hvMrlU;`uE7Dr{Lj&wh=HPA#aLO}{H_J5dj9%$;k~dlZe)4swCQ@rtmpC14 zD^H%hD{(_HGcdU|*0_ezcO89c*}po5J36eo@kUr9M;h7y@D;QUi65S5KDf2{JZIX- z*aXTsbx0|zWRZF~%DYOZz`Uw2)A5^TCWy_9buuT3pa|$VSJ-guv|QH+SmrsU zX0OHn`zeZyHva`+>u$|BZ6q@pDnqBwzU*tKG$&1QbJa0`w-nf%OFt7F+mYNIw^^X8 zoF*MeWy=JAF=6w%nm<`Dg|R@GrR4J=z4sCZvfF-oevj`FMO?f3=}r7`(a%pJxZ;(X zcYxlolDsk{B9JYf8l%w;Up7pb)|D+?U0Dl-K*_=w!7 z3e6qb;KlX#8M5tyHt{@1U&Is1ctNZy*WY=fnLhg>pOFUbRsgyq(ZKN}%pY7JD=F%A_*+iO__ z$%#y8$pefE3D{OZkmlO{I80pWU0pcgGC)+32!we>oYA_bIboP;>N|^c! zi9p3KE=*iR4A*PP#eGb-bqw*kaRb1ulJa?xGN#7sV9Lt22~*6|>7Cn57dWogXCM1< zH8r25I1hu76Jv-A6hc841!V(AHHjH<%5`jwq6@$6BGdZm`SV6G8svfmgDk-{qP#nU z*Fq*#Mr`G(z3|jnTHdrH{(~HH>1y~AlL+5Z=_u&Y@-W%sR#Z$@08DUy@&~ zgfSL323zJzA8Gh|JiaxSFxsNh?BUKXVq4`x`+#H!a#?CNn_%;DlQTLNvdUcoI-PDs zB+bOFRAbnfByXhonk|+m{(in5hjpChj9G~XGkuu$|#x_gqtQsD=S@3%Ov*nHZ@{Wv$xwhp=}o6Z=q!CBr^ zjil0b&<54QG249|BL+y`d`n>D7Q2Wto8y#@NK-UeQk=6{F5TKXCOFIFcLhIl6!9=j zO*qL@GZhtLPk|Na0%|ITPc~w7nloKCVbt<|nIA4lT=p8ZR5|mqD0?~wJ4jw^W#xMs zR6;R!f0m|3V^c?`(lV6oj2zn!Kv!W!!&c?YWSm?8IhqOgfg4!0_)#n{2e%Yu>l(vz z8^hWwbADxLQAr_5MEIf6q8f0(%20{y5Kt;{Qha0G=V(loNd26&i@Ns!f`YIbOLetg z7|tXmq1=d-S1wiW)#iZUM;ZhAY$RY-2zHEodL=-V`Zs{B~Vz&0Gt z`IU5rn!P=f{4^}d`qe^CyvR*%m-){Y-BO<`ts^ccGO znHs8L>^!Oj(vPf63tmT`#h*E(z19Qby`X46pH{&tI&%?S6l$Egi1zg}^Fqw(BE!M+ zIfOp!ix@9xAHt_Rsn03B&~oHS+<=)lZH^09Zw{Q6*V;#2*3rXB@GP`w()h7-cCXb1 zIPhuGJfctBhgjl$!T~E6tlk}rK~qr(%4ro=AXmZqKCjy4QVhA+8o@$PU8G2wBKv@8 ze1V5X2+Xq$B>SQ)(W*{mgQ@2h?Z-QW3y*SGHPm(jp!DMlGvi63&;={WFHLs=9Ke^l zHD4FShTATWPb+)?z1(ATjXy!UO`Z;Q!(w`vnMPss z|KU2n!@QEOoSK};+QgJ7hrbdxgZej5v?bh8o@P?H*BK2!PP4+Q`0t9{ovXVd2}|J} z&Y}(UC=Ht5BHz{YGWR+qineSQC%$MQAkSKSf7~vp144(zPe2OfiI1zQs^ZVB^O6yM z>8;XSK|rE4kPLFBL9kO839W`o+?`>qB_`~Nvbxhy6DI8^ZmF3w<7gJRc+SIg8Exgl zG&8*HfYJYY_=LMPO=G=pLzk@ZUpzHJQ3o6_f;ANKSZxvDUmG5|-BaW+c(+fY5B%%^(R5Y;S@x7g6HG!lpQavqdaa+&r zcf9(^k*S)YA%oq>UsZ=}DvjE=PG}lPCKz*H_Z&dyAkCLeCPKS%oZ7TiT3_Sura}~R zQ+!aDfJVPOx zK3I2>5q`ekr-$*nR5H*o-fkxJX1Pe+Y*Km#7>7R8f!k`Nv~FT`<$LrpIta?F+pRu& zn}KxU;AHB#lGL{=OCOml=@G}(?%aUCRDYn9$q<|xx^KzYg~?fZQ}SCGhS-L&SqWIM z6hu^Jkld!IXpPx7_&TP`y%i@Lp&hWw#^!D(_?^Dq2;`gxK9J!__)rG8HwF4yc2Hvz(PmmEYGSFGJM<|8UO%mS%Fkg`E#7l|SPmB%;CCn_SeS z-%_wm7tV30=Cz7O(wxAKum*t?bP*5<^Eq;V;uy#7E!04ASIE;i=AmBvma~`yXIwHK zShIO*sHcO9N=Tn z$Wf>J$Ml=MKn};YY!OjSz<+gt8L~kuAZ-X4*jCCHHA+-$vrUU?9`fh8(s*F)E)-p> zn672~I5h*f_+uN9YgF2O-&T;B6Mdd?D#*@kd0k=!SXToIYx((#EjE3}dUR||I9OI5 z4S}Dk4Su?Zz*&+Pjb3fpJr!S-M~qc1wjv}(+Cp(qf0jw%jz*u{5#DU9>*uVYlzv2w zkDWbXK>V0NmyRE#2ev@SShbWa@wei!+ls{gSSp{D{ zJ9#%LFyCXdHz*ea^fLn}JXx6YBf&vd2&u-I)p#5}FdS0@7?foTO{~0@SRz86AITfR zP4Ieq#weBMR@ue{&IHOE1e&CW!2_wNaB57l2U9-0;Y=mW3-JS!JL(}_Lxn++aK^*U z;_PXt&nxe$BD-rTfL@G|!D72$FN5MvjluZjMy#B0P8X3PtBWdx9D|X7FnagePRzD8 z-MtwDBx{b5^gt?Nn^(K&NxMR@-LOEwCY7UaO2aR5NBwvy?dEx4XAD5YkHg&mBn^Q; z6*ADhjzWnN9<}8n+GuDbHslINyEL^| z?6J9DXZC}Lqq9plaMWUwWN5JwlBRR$D1=7Z<^G3~LyrZ~EQwHhHhyUIMowm4R+q?E z>x2GGD&;gy%9;BQ9Oyo+1U6+d4rypw_ks{>1mCwMBkenTY{KHeO3d%TQA^w6)R zMl9$8cdg_INea`jhx8;{amh*hj01qWXq?>4snCsQC09tv=Av3=O#T!aJ8_&_CZ7&O zLDh!tfEdljG)WJiZBI5#C#KpbW>yV4X+~1{ig*#FYOQoy2*+F4N&e_sx23)3P>Y&@;KBsgx{cJjX_Yo2nQH8Z=9&`cPZVAu={l z%b@|7|Ityh!GEL z?2kQ?>_H{Jlrw2KbQ-b8iAb5jy>13(QGl>1oheyoHOy7$(ULX6pecH#QCOPBZJHXZ z=UG_+Y`f|KwtyM_cFE}TCF9R4?vY%HVFr3(WlurFV#x1w&%eu9<{VUXUSJ#d(OzSGMJ z1kD&G`(7#G9ckEUr~%#!grH=KBvCWQJOp0KFoy=-cz6bthcEMZ-QJO9oOu5a%EB!A1`&GZcBTT=P zJhr<;1dQ(mKJ=JZ(GeRly+7%9@-l~?A@6aXX)znWRlHVc<8cD%3cR85 z#NrjdU2Lc7+>k|un8RJ^O7KehrzXd*ZMO|(My4{7eF3{^0^Hc0@JasR)k!5-{;IF3gdjIR}EM z=t9|g297~Kv_P1PLM8uc zR_PZqlNOhZlIJeb!yY&RA0utjBSHpsw-FqeQ_a$lEJ+HXc?^EJ^Z;V6`IM#~NKYz6 z3T&U{QX!uD3-U~=V3HL>79SeOJXwP?E3~g5jg!;iun%}L|G1uZz<=5tyyWwxy*E2A z`BD4Js(>{v*Cr3Z)7tvhoRh}`O{KHsvZ+bs_osEsVc4*Xt;+>!2;$bakwaxN#qsB? zhnWr1m?xlB)A!Odl+0OpgV=&lsT8Xk`%M&+Jg7ZGWdF~TCB0@IYBMP4g4(lZ&t@uL z-V7DDIRLrA^X>jhDW~bMjVn(-q(G`PpDngc_OMm{Go|Mh5G=whFAu{^FGBBJ%A$G#{TEM?t?WkNYy>GAq%eo-M zJqxW#){OcHeA~T#L0HU{TuJ$bC23rT!{3UBfYLLbj}Rm7wdrkrHf~Bv8J(f5QtJ;` zhmnf-L!iN}-RvnP*ia*)2pUx|98iYl#`%4_-G@0Vb(7-RO zRBs;3p)cMg+qY6mXuY0&;IIiwz8GEFQo#UoD4Rk>TkNqFD>>XjJ;w9#cCBep z*uxfG7GtYgzPWpv!)_9<%wUurz`mbT+~XBioO@GE-hfSjY9+%E@jZX$rE_PfAQvQ<)?$RyYKQ zAq%2fWa_?1YColNllHLS zGc}>f9!AGw&hI&(q>Vt6r@|}PPkHo#IC?Al#E#Pf?La6v%9YxHOs-zEuwH`4fw%hdBBVvA{bq&oI=yQ6$}%FPXhHiKNi z!meUzL09@5{m6MFfit!%8v>AYP52(W|{-_zawSI^yJMrxn&3BU~Hf>3MA67DI};fOTYV?9XChtR+;sK z903K+1^b|vdmYB&$VJ}pJw=|J13J{@QF`LR*MH7HPE|ZXIb>X#;#rjOZD-$Oz(8nq zBWXY&5J-Y!PK^oQQa4pXB(dvczr#5v@VH24q09dDu~_9PA~ zq)=@^iQw?}mI~kNS3dT%$@aNy^v~ZBgz~**d_EtQCGs=fjiYYUaYpu41K2qJEykT5 zI6aDPKAuG^>1X1HXkC+I&8ODsJZce6%vh%+jBzj0sW>h#vRp&28ycYo6l6|aa+-9F z&UZbT!G`f$>~bD?ILY1!2v{n5nUnr{37yI3By(!#*})uO(_#VRfvaZ;>Sihhsy*Xj zxAdR1h;9!#l&f{x9Z+d8hN(#p#KNP0n&updY*Le@HuqB?fDC*h802tz7Ai?yP&T|k z_JZvI@P^^=EQlJ;VYZt5t!cb}A}qYo_W545`{an7q!e*Jd@Ais)Rsog13g1OE|LUn zMLo1}B}mq!Frp~SQQ@FF-8OpgpK}9)C_2R=lWd=$L;-X)Y_IXjivMi90G~@MJ z1K0PWcnZ7zFtx|!E1-BVL=#GU-v@$CBhRcM| zrn*=I?JOSWCjSTa!~=z6>g6G^U`ovdq5!Qm8WVz`O)fIn0fO z(4JyzcM4AGvIj8U(20_dAWcu>h*|xF9K}ng#_YlHR|_)guvy>Cpp~Rvj8nGQ00%p2%w?#ThuN8+WI zi8nah*?Fu?oufuw-fO`D>leJIMOddlPhH@t;t{k-F~}0;rS6IV=w`-7H3wFRoEX62DvBAE5=ANPt4XM5;FJ9gWkn# z2A#)~+2%@aM*?Q!=h}wW=n5>XI}eIBo*o)R)dj^_ycIY$3dU~wo@RQoiE^-f3it=W z?0cMR)RUqK;VbNE^A9;%Ae?oE)Y}nU{KkU2H`mmYqtUc6DI*-`%y+qV;p@QoLZq`R@Ivn$~wRC+A^0^g}xV(d;0$MJl+ z#QSNnS<0%JXF;!(fs9LwHo|qqXx^e<4jxy}v~;9C(Q*_q-%zYuIvL-z8J2sP%|mTJ zlfj%uLzx|`j$1lK?2k{m4B=HWdK~a(=DE1*s3bxWyCwyfdTK2^K1FEjQ^XQ~8>Lz2VRU`ki&+rxsT4y*0VmSc#LzP=bD`38b1Uc_tg`ST7?7WXMEUOCgNMQC9vtz=gPZ5Htp?hqUt?ALpqC zuUUCI-F+{?t4_&BXzI4~vW9&bFgfd?w{#Phte0d%lYnos%57T96H|j~6hHL2_hn%gij8 zVI={fw15rUzJFZEpP}`PLr<>8DttrsRPFB+j($hc!1AfvGalMqk~s&l5~;y8fz+bS zOiPUh{@b0sC2yy@D<^1Jq2Nw?$&Y+bajbBZz)?H7HRlxVhgJxcogm5aJQ=q;zBKtB zcXaqPW6h?}@t>B+9I5B%=v8^r7-%1feF{`O4MunprWcxga(Y8UBUC~h1_7yNOpbzc z7gC_0=q3DO*_AYiB#)H^eAqkT4GyUVADU$dD_c)hPJ&q_T`&VrK9S!=foq$@wTxD1H>RLh$H#b56;2{lF*s%rz=0OC)N#!l4lzz$3Br zDjm@Rh>C6wGqh|!?HPP_CHAyCfZ>=^_@EKj=+8HFBy2j|m8X`QTyW@uNRt@@{nOU8 zYGbqxzZu7?kb4Hbe0U5x4lO0E3F(act+1pnHoycC1m@=qhg5DUA%n!%w4145`!GZ< zBEKQc(fa5Lo8_(RhYh!%Y=vX{%YTp7h1pqmD}KL5wdlJ+1|3UrEcN!_U+Kb~3G%9g z0KKD|M3Oi&u0+wGEnUI`XsPXYO?~xUr>VWb(Vc+!L?1fRFtLqxv6nbKxH*WiRL?~3 zju^*z98!jE&nB0*XTX~1K9f?lal)Ea z#<)RCh9-3JB?qjM8<{iL8~aj62aL2It(Zj7bOVh|SW@$?E>_K^Mj4biQxDTyiUW%h zhVd<@<<3I5xG0`)+?{3mx7Xy6G8(2<4IXfdT9c*QRj*CHNE_3BrUwd9LhQ3!&gmEE zrzvmuyn!hqr9J4`)2bpp!+An`VA1xQvo%vvd(Dc^p63}lig+B3V9D*ygAyd39nZwU zsfdyGa2Af~`HPo^sNp&@q-627A?Gm2bLOOwRP!A1D$?=fZzEo-_Wyy;2orr%(&5mB9OY|%7v&Xuf(G;v1iZKx zb|!YMlUY+h*b+#0-0p(P(Sw_K^5Rc3IcP&82izePvN^FcLl!N6lDv_On7il$4Orsf z|2v5Q5eh;60_Oo~0s=Wm8D=)|Og0kN7?E8Gr8Bjbu00(x*7Gb)U!W`#pPUwNXBxU^_mL|r;IR=(lapz+ zbFPv}a#cW0pRA7m;AOIcmTsL28k5`AgoO6i@~*PDT^NhKeEZBf0@xeHI8x|Ov;ALn zk(t{l{)nNlmBC@2(yVX^yJ5Z7Jn43j_yZM2W2dM;(=;kIVMGg7*P<#4k^+4$G=fUL z(7((^vL{}HDW&bP;S5U+&kVH9pz6W^iOZ5jWHh-|JmN$wg2nO<#HL@YTuA10!DGp@ z`TyVWJt6!@*mGnmI~@0dd89eI5ZFZ0xHO3KP9is+!)`W4bz3_Qx=oi^F7AT3ve6#i z6NWvOj1t%fYSXRSC=E8D-kB|EZX@Sz6SaeFD?6k^9X`mbsSjW)hk960 zx`D&ZhYZm`mwxUZWzN3TZ1KbgHfg-mj_*^+nbN>G!X0-j9*O%Scc=BYz!CkWiL%5O z9kyKKWo_r$VQYj$qZneVW*H5PBAZuUn<@esWU|cG=_YrcM4PMNsQy%tQMeKX8zn^N zfu*u~#5z+IiR~$LWCVsOKwFr|QM~n*eg@bA>oHi)`ehAmVqdeFEI2pwWx9h~DRT)rp|r8b6l zJ*Ee>uBnF&@DgY+!*;Hj3tD^XcCfh34U<(64#+y2HzwiFUA7rV% zmw>`vwgI5d_nk?}ZwNT7G0JpOQ?t$AMvvy;8VwUmsSy~K+^`416ZXoz@{@wkT$za* zXr>BiFsVq?sM{p}fVsQ>@UT39bVmn-YYu{%_8Hbzo9AW{ddXg_F$V`d=1ixdD?C%! z7PEzOQC>$3Zv}|eShY&s$9^UmiszNn0ioAdK2}c{zM~5l1h;M}0TW;j3L31@Wd04SSKff+M?Jz5zPSLgK!%@KDr=7`$x%PtxD$$+3bprQe)YN5bA>V zZHT}lN(m`uL9=`;LzJqYfxB_n*~Qg>8`Z#~G!~D%f4}n&-*M8f&&@ctB4KeTCNMvpV{) z36N8WhnhKk6c{Unlo_IN1VfjU%W zeWVQBNj4KtDhp5os9yqj37Pz~nquP?Q_l!-!m)71BR1>-)HRvNdV(D#=ZsQ1nP=@U zmIF(uW)c~sweL|SQ`bgEPg;-=B}r2lMz^}9(S`#;2S{y`s|P1RzNi_@gg=X`%-`Kq z`dzM(jjhW?h@gou9-bzCYLm?|q|ZHukV09jSn+Q-sp-*&ZP0lPS0Uvi*PB8;lPxAX zHc(u+2MsqE25hnfJX4N>(72h%tz9rqK*+~}Hs{VGeGMb59zShJv&}-gkp~gvH0`Tu z9{a&nJtce9#maPuu?fqF{x0!3=(RKJ<0L$MwKKLh2VcTS0HQ1m!9NyBUFbYU?NhH= z5ymp|Bmow}FPr1ZvN7t4M`0bKnT)c-CP8j&o%5ZSi3DOGS-m*N(v=TG3wD@v=MMKY$Vxk z9Ct51Gkx7FfQ4e%^*kvodN)ssO+Cwbo%xc%#(rF>fJgqy3lq;*>$X{#x8X{2NN%2ODn$e@Kpb)2i-)%KTGnD+kG)W9ONtin-QIz^4A z|1X~ezm-QG=~458E<|Oft43LeSkIEaz#tJblew?%J zvu|8ss3^S?8<>b6=JiVE+e^NfE5n0y#LJdU8?2P%QwTQy!Pz(WNSn1WBa|5fQx&lJtS$|K$hgiWqr@|%vlfX&mVnil z9Wc1I)d=xf)LZ@sYwrSG^$%UdlKns6``iF_BSw-pKqr^YZg zWq<>HE_+d~^5h9y$V>*O?F<%h4=S&4*ky@{ z+}OJO*iLbR!M_ckR(5aG;B|>Se(Mbv^V3yQZeV>BvHB3X^I*(d&$x_ShERANS01 z$U9H)lPj3>Qq}`7H2Qnf1A9o2i5tYwA(l~Jm=7BnI#R5kN2B`$fl$T?(xvW~Z#64- zux*m2m!IQ8G6F?r>fs%-JKXSNgTmd(Oj=y9vK_U{M7AnvKmPeG$BAV*Z}68KLspyu zX=|b6#<6Ju`kc~?Tx=r+)taZtdek-0Vl3E>m^^54t!>asQrkS5}Oi!w(68k2&W{m=9!Z718;T#P} z`qFyn`R``kSpT)i!W4CaSO~~W73xe5#iF@mfw?Bo z?%5I>uP5$1Xz1SNWRWU;v#5vgyKF6!kzeNeB-o?T8&J9Wc%kpjb26X;0%MY~?@E0= zZq==%86mkAJDUT6paf|w6hz2oqG-M}3QDYOMolxj%6y~N9pomiUpd}YTyW#?rZ1nk zcrww@{c<~u3F(+=p9|HW-rMuug_-xZ6pV;Y{2M9h|H@x~;Sr3nb9=3_>qf5nQdcF1 z=4OBo%8%ye^do}NJ`K-*P?ayov(U4X9T8rzz_aDK2hV1_>RxP8ER01xB`w0-WAmKN z3x`lW{khsPw1+rR#a4v}OA9@hj!xx@1&VH$Iz-g6`Rtwf7xheArq-<+Ue?$3h38WL zVjPDWUGDlu2%~H~VK6!0G=R|y`e|NEc6g3bxX_O5TlbMiHV{;>umzDy(mg`W2lE~n zW98B8b=H5d_K#WWL_%5@07++Q)(@6tV(@ zcDxts`EMTg)VeR;ZMX&5NRUuUlgb3&Vi3y}BdUwM-|-?gCC54vC1e7<6y{L&qqrTv z5*_awrjW~fL!<#QZL#qJi08D9!+r4^Q;KrAjc>p~YQS*mT0fcW;8>Rw<`(?QcB`wa zo*HUA@L2-eSXZi->xRLrgmC|nqV~YLD^;T4xp-}#98yyM3lBWH8eAji*xYY5FA= zy%_HWz?=$3k5E7|q*U7j`2YzDND&HgrJrz?_83lu0_~_So;3+?QX+rBz>LD*~UBD&J z^z3vsp+L2S{y6fL2pvmJR>1ixN&n!Zc-{TYQt` zK;AqT{NQ0S=Ec?WMNG-De5qD zX}nMDq#i@XPXXLBDe(QbgmEr?!riSqh~}v}9b2y`>2^yG)jSnS>7;r$_fTDvL4XG7 zVitG)^wkgMr*W+FVD6G-5_xZgWj(;ezgGcbXz`rjgo11!Ir1%=R{}_EHT)vN2^;MQ zni*FI-R74uhi?6&qnlgwGB}w@6@FuRvZM3($xHK};5()CWPr3%8#P~RWrnhx|X?8SWAP4ZT&zaBwI^YmnjeytA z><47=StEWYS8Nip1S&H^bW7#$co87ABP+(5wR$Fsaq`a2IQ{1d84*Z1zoH2MFJ2)G zO0~X=9FywGlnmG$iQ;qke7h$hfPq`MxZwpd$lD@zHUZ$PwO7-h&K096l=lQ(h|Uvu zU7ScL|L{{?;O(3}>k)v?cYrafl|ZDWF_3H@80rzZ$6~_7VE#EgN>&?oaPQu|*y;ZC zJ|5OTVpk^^#u*W5Phd{pLR87OGPa$Kap>E;i>YrpY^vx=U`YnOj^!=WpTQ99r*)fzW|h6Y+tz zn>}?43dw*kFk_k=CEuPQVh=j#_?SOI6?BeJ1OIQs8Wg`@aydoh9VTS$QlYgUPiU=b zs3WITapD}m1(>#^wGD7B%dvsw)l4hB0677Sy`Zx>Ms+q5%^Zbibke@+u4VXqzlg9{XVcFyF_Q}$VS66sz3{ZsQ4>+ZtObH0-Xcrr!usz@&DU%? z0|_ND;9MN(s9b6tM5naPDizGdwYd{i0*H1V8Rz-qxd5DE*S5)0 z>nr6G$@Eg>6nwKCA-v+J(Ao6y)WaE}+7U_Yl?L^Nlk_ngTqPgac!~gKWR;z9eh$OD z1_|geLjpz>EWa~Y)}tQ(_dn38^| zdXm6#hDs(Zf;b=`*-Krs$~paO3>|?t*EQR&vxMe36Cs@*#-Le{tR3}K;+w(y8Tnb_ z(X71#M6Vp30pdf-srirCEoCx<{Z8t!cA=5RsjW36Wb;-rs%AGZ)I^l21SB1My!Z^3 zGvR0qyu>sjhgoXV?V**-M!pd_Uy6^WWV@y#qw)6KZs)fgCjVnpo|sQbBIf5(Zl)os zBjXFYpKuKV4JHE??JDcZ09=toPXTUaTr4i^4IUfy^UbgReDh1!ZN4}^${BDpmH!qe zX9@^?>YjTZ$UlKWq}1ts0Sn}aHWE~gK0H(59Z*6KQ%~}bHSpc2_7peAUC94E@o>zR zdSQ{X9d%kpq6#8O>&8WYv51Z6l`d;GH8J>;P< z!sr|D%Hs@FemJ^53QE4ZWITA8lHw<^N@>0=sHPMo5;?H#$-KxB@iXF>;3Tkn@Cuo`l`jS44YTCgvMV#?#Y zKIyP7qF%M>kH6fs!E;7XqoIg`v*0$)Sv)u=xe{b`f*6nU4K#A6KSJ|)UU%>iq=(7n zxg>*Lq)m{tFEY^JlzdI3v2%bS^@rR8V`s-O4#G`SAmydO&|5GNN9J+!U)uTD)pke> zoeT}P=;Jb=^A{%Yu=X;uiq*mlw*mzmWY4hz8z6W2&5!}H80Kv7P(}_=#dkl=@Hpn1 z3!Qf=Kn`O}xx!L5-hckJ1+v*P0KN240*3(ePlkQv|GH_@?%GMl?83na!PsstQS~8w ztFK;Ul}|{H^m;{^^PNuK4FEkkHg7Pf{S^c_^)+YoW3R0zsZjOiQB@_>Y5WYU!H7wy zlirqjFg8*nikk2b*-rERJE$mSvJRc8JM#^WuP2DI5;8Nz@Gc{KgdKi#!?NH#zSZfY z;9|ziE4DiBqrRo!5H?h6{u7F67oNe|NnTw`O!aFhpEr)nPJ7@F2mZssy<#iC|^yrklv!Cdg*Ez*oHPwY- zHc}QuB9x(m8}-R>x0+zc&-8Br(Mj<3;A!$)EmR70a(ju1;%B*3^b#&rZq8Pe;hXz#8zvEx%f=qdkKw?4IOM$XeJOiWi2=`k$pu7n|N;CW0 zJKD8>e~HJ+_6`yM7R&;lSzk$7Q;2l2qcu+(@-o-uhdyPa=iNZuSj~Ind#iJJ5J|9s z^TPruNaKVi9_~~@zlFN<-qhFqULmv!wBb0;SI?<#$1=J2lsuphxE1=M;`Gr_5LC!! z#@ze0PDB7-E~vq1?kL-fpT*`7(f9>KMxCRBBX)0>3|EKY$JwLJj)E6YiqJsZ_)>h4$ljP;vg|OX}d1&C%K41znZ5p0Tl7)1i?Gg`})Z> zvK6lR3KxWJ+F=Nkn2Ckr4qcO9=I|Vw#kxXuoM1@}m?d4{hsv^rpq)C8fp-$6xX8G% zyRg-dlm+_r2kGt1AVSMR;zo;IO$6m-V?&jCQZWRVO$Kyybd;ttoG`JGdg!uSF|32< zl-94w?ZfK&%G$4|;j>-PAy8#`>%7_v^1F@j3Z>h43ReYP`6yLbzR5RxdpRlA$3I-X z4M#9I8->Visc&mJt9k=eje+OP@r=q%j@F4?xG&{sU<3mlViHT z3%uqdRK1m`DnZ!0CrsZ$!XM;^kdAm@SBcp+&P*to5yT`4@e=w&So0joV@PGR6bm)9 zCk$J2>5n$=yzJ05Qgzu=Yxx>qT?Z|;t(}CG9zqibq^t>s zeVqD@R+be;Gn`bVRY*e$`+iLa(Q<7Ug$M^7WJDAJ2Vuk6pc8Z?TsnY$!Hm}+wnF&8 z>u9|n4K&3q-MTEbJ-8UHtgEH%2}QVL)D5YxWh3eqb+5;<$rFfRkXsi|4oPjMg0V&T z=!S0AUOFc_a*y>6_`I&P&m{!)&iMoNZ40OVaZpGy<+Wy`4@tiWS712}awAU86hAVb zfojDuNI1^DO<=*AnQmbIDAfg{r~ZTdu`3P!Cw&u5lMaPd^(e#F4&5=+{Iq%ZWpiTx zpMPa}|2+SBRVQ-pw~xXh`EOUH&hB5FN4YLKww63lEVy{&`|C!(52Av@skN#YodSFj zkJ+eTH_0HH6lJ7#W&%~@zH!b6Z?=p+9e&uUdTb0`w|1FdjxFKJf>Uu~wV-5ZFwQzq zd5QSEO0MYfO40iXS<`)A>s~^%d$`F*eEbC>401mq@JMs|1m$C*Jx)ETq2p9Qntsln z>stj7%Hy+64|CBRW)Q>}fi^;q`%NFkeH|1JAcNdkT{BVR;qF6W5sI5}X-8d{WtDgw z&w-9J!O%a(dW96I(~n%&Oni#`a)1X|3d@enRVip(+AsgD2Q#z5TEdPv%3e8mu!+a*5KNvQci8>t)J#)c(y*Jt)@Eh;a}!p}Qk?4H=>~;>g9*zh z(*K!JPX~L8Se6FK7H@_Wuxdh0B|U_v3BGqndMBMC^lx3)c_NZvdRshJ_I{U_wYZ`a zMvP|zp~mu@Y+|HLZUWSMC{~%a+y##l^X*dPZk?SGhTdChnqm*c!b`^kWeEjm zd&~Pt)T}! zlw>C=<1T^6>>k5N$j`S)cn8dk#k3}=%l_&yD-uY zfnubkbOb79o={tJJq6KrcwdP-@&>eikUt5r^}PZ|t7W zSLbRA9bq!6g8N4(!4}9u#WA*$TuEL>ifN*D!m*Y5{|?K3C)eUjL*$wNYrlQQY9Qf; zN>0HOHH@K?$BLkOhNW<9VyseM*xHg#K+;Ds3K^-oME&&S2|za~AMx{h7@d=lJ>iz# zYJHN{9b_4qgslzXbot|B-pXk)f0JFcT&Je@wb%ZBF8S*qfw{gk z0PaZlDP`aL*M4r<1NYVKxc;7B9!fnKi&0}!6`@RVS`2)mIKR|$Xb3nP5kcb6fVug# zB%RLvF`ZLTaA^VM?oPB*{QSzuzvL-rA%cWWjmt8eHAYT%^-J_K954x45_ z`@yn@L+Aq$$e$C(d(nNvhPzN<6cJ7iFMYaJO5o~5U8S!I#+Xei=I`ByAdbuF;^-K~BL+8Xb4lw{lY&~g5;?lw;PZys_tq|YQL$$ha!BC5Pu{rtC{ zC6oK(nK4yge_@-HK5&lOL<6-@qP!lH-(HS0V@?8gahL6}Z>;n5DuQMS)GS}V+;~B_ ztM-BOXvZngn$Xl$PoPK(k!n}PuMK_Y@ zY6Pdo8)M4o8Yg?C@cB6wY*A|SMDLeXT%9{^gn=`>a1gw;{qESZbzIWgKgGd;ToiSf zpJZ2O$!Tps`GdSX!EX-RHTq0w!k~gT8is6R84j<9D&&@lm&pif6tWWIoHGUh*k!;- zMMH$^+8Mh(lX{Gs(wqJ*(LksmIlyq4o;UK1$JM$!EC?%5NO^R7N$BVCFsYvPu|z@c z>LLNJw-^pVAVPS*9k|k%@AMAPO04SW;z7(Woiy%$!bwe$wqO>rkvwo1C?S0B;>uVL zKpZjF9#3F4{xXmcztoufdw+{tGXMqtH1b}f|Iu&<+H~M?GA44QXT|_f~^hrqP-`izUr>qMFgX%(! zGTTcHWpQQCSJbrYkrj4g%&a8XCkCDUArXCd8VNKp-9zLLT-+t^3rK45wMP5DEm`)}O5`P1& zPVJN%W5y()&#Ouw@QCS`2pODk%frdLmKwp#1I$*9Qm?C=9GEapY5PTPXFD(p*r0#9 zpIyZmQwLK>dEYW1rMQK7kq;8^rgpS#Sc(7DHt;U-r~){KTf_J*Jgkm*R(oSN;@B`c z4zD+_1H;I92-*QETW$qN*Bn)~N}Yr{!t0UzErrU|wh1a0jp|EyT_$^^tyd@HXz92J zDz6~rQL?vnbDt20vd{m50p-u;-JsScCU=qR_*{6NXJfa1+mICx#lIWM@#Pjxs>}Wo znA12L$Tv8Wt>I{w78vVFn{0_kO1=8pYsZ>g3+0F(=mV%VBc=CNSJjjhYFDO`ow4|b zx_Rm@wHBO{@k$|+c!yW5LA^|sJ_wRY=cn$urx#CYDkUbyvyOc_75z0*@<6hYEPWId zBdzN`i!N@Uh>KR4d~suIWAu4mJfU{5 z>XGda9e;6U(-7QORb(k>5YS5o8iM?+Sj%A$$p;v?h)tcWUcc|xNO|AKsxp{1wq z(|$zAgi8&f9|70u#V&~_6XwoDC$S#lOY07=Ir(dUY@E`aR%ynIS!*Q*GE+YX<0Pzl z1m|X87%E}*Ca6Mh*3K%2;2tK^s1L zyVEjFjSap;87x{axgAjo?E-3k1uY&Z9lDK477|LWMsjAObhS955=Fh9c{fdI#F(jX z+131lF5-eS+2|+3KDLqBcFRyu+Rv?}7v`%HRHRRtx=Q{+(kY6$k=8=dpFGO5nT&>e zU0|VxJM(ii)$$!0a&+I|Es`bpR)@-PJJUxoeG3N#>gB(9=D@TAU)}$N^pt>Q3b?@^fRSWH2G)sYHA*t6p>+k^ulsfV0c?Hb zE(nJ-M4E(?`VpxIO^DcXemO}&;WMdJ9=}VGX`Q_0UrGh@^ie!+VSDC7ni!CGew^nW z*Ml#K{|4uu{bKjoE3%*0sXpf$PLYP-ZIdg=zMFEribQL^JD?Vv`9V}sEsA#fan{q^ zx$5UJ-7vt=pyS&tJgx5~Kx~KYS)cg<9{(`V##h(=AE6*j?(V+bbADIZk$8{Nfu6w9 zFF2F#j+V9kj(CsmFKYfm=Kk|H%3nia`_F#<%dylc?VSIj=BfC^4v(tx`Aq_YY~;*j zXo+Bcym|9xu*gRG(sS>PS9Zm}AN%g*Eh2M2_H(~+Zq}ygFH$?B$I6a3=q;Ybel|&? z=9IXiWodEHvdU7hZnW0S&R-TkdzEPL|C=|hI2k|oI}=Zds(+^d#oy(p)5N9d2wk)1 zkUDzKe=nu&g+t0rzovv@-QczB2BmiP&G?rtPTZkm;^)p!TsU(k3HTKwk~MpKSfhVG zng79=Wl)g*vPT*>M>a%1d~@ReUK&35&iwd<;_x1Q5s!lAi||1D^fRd!f#lZQygUpv z)U4EZm_h z4n7k7&BAGkYvWZrz`^u?Ox%!+3|BRx`q;wr{Bo>q#`$k}Vd0w=PPI-i^~l1<``VuOpZwbrZ;bX(KPwNH ztv}~8#%7(6A(44_TVhK70YgGSgz`et(=nI?%Ypq_UEGCtmYDE?UrN~0^ynFL74%LV;oMGxgt2liTk%c@?M%71vt z&nYu!nS@_w+MvrU2KY$t-?aJis?c}Hr?4H8Vbp?EonVDVSI+sc&@MKE^=)D?0%H0o zY5aFm9`<5r6qKVC5mu2Bs)zV0BR7n)R3FBC?@DbnAjJj+4V5%)jrwts1x4p`#}-pK zG>;mf)yu=~R52+O`B~htiYj5G*uUI<6X5(P%l3atFg{)u4xYMycS4`{`;ro5l3o+& z66}xW(&|rFu?_DGErlPUGw2_3zu=7YixK9pzRJJv!*>5>J>svf%zezC_Y$W$Gwaag z5z1fuVItWVG7k@N#FEWcBsmrirq94Uzr&9>)JluMEct~jsB3oIAH+rt%=W2asgHZS z{c+9#&z%2?FAneSt!ng9>s1WZjL<l)Xy=IQ-flG}l|mdqUoIuNF;{-8o7a;D2rTGnoj)S-!*DF6K` z@rUG7A(A-`%iJbI3x0JMi$aQ{QG)=ee-gcx#-gkSUr0PA8)oQ?^Jk^^^IeJv`4ruv z$4glqID!ze`zWc8Az_w5ca?7`w$VWaknPWNF3HHM1}?4|Fi#mO8Zf>o7YXlT)Vw~8 z208yYEgi>7$H{I3aEO7W>?Y|9N|wZU3(j=kqtKNM+x*Wm(|Bs0B8AnrmzqJONkbl0 z?g?i!S*@wqswivF7}Qz>@1R$px91K+!5Vk8?ql5|UN-;4#Je|ESq#*Al`7KXNa4t1 zuN9=ut$C^tXBYFGA#%E^LUOO4azN^xxYbA!v2gv9B;7OI*Ge(L6t|kEHY;odFEyV7 zDs+=C5N6X*b0CK)^4rY;Es0mAimdQ=#_U$Ssv&E)l}A<{h#a38cDq;ZbM8xW-b(Xl|9S|Rj1qJCjzp^^6?$& z!CN>bv(2?AkfBCIEKI5=U2G~q?NNSLqJ|Iw_;ghjg-hwaEJ@pw%H(`7cUAO#Ghx0B z>_?Q(*lWQwQKB|*i)Ohw-^@ShinQ?;N}c4ifN2JUbq1ev9%g!hBKyVX9;DtON>#2? zBQ*pyAjqw`ibM(73^j&TXYr41LL||#9C>C8oC>lMoS21uGU;a3Ko&DZ1kmQTIG6yT zt1FZGz1&1#wP7QTRdpYdLXbC*T3D}zB|X-p{SGQ^>Ic>4zG?0_RSIvh{Eepo>(1~m zt@|wM1-4>$#4}?aaK&3*|F``ktE|C{-6-+lGHbc|-?;Az7F=*^>%#$=<3__Rq%9MH zFOh)97cy1u!E>RFH|H83y;Z|Je+AEwP{;2P^x~;*GtD&DC?CJY6W!0B3WXNqC}w27 zLXmI*3(gCumQ$WdO-uZ3?w(2Dzup~+F~2kaQA~%a+O0bmxGS}t?A*LI-nrF3_ z&B)Y1cL)_FluyIucICOBW=3r5t`NmoS_n1o8@ukGAc#mO(IV zminjo*0t{A2$ZOxle%xx_Nudaf-A~hg>VWrKx2-Go9qT~Bdg$LT+t6fs{MIzF;iUY zE{dhUe>pbmIL}3`4So3JO6Un#gWv~2cU;d>Ee(D%s9^69*2R1Uz&p3!ve#VDqNqpL za52DEdjfB#i0q?1945+m#V%g(rQ7dt)q>v!C~=VGddxYdLQim?Yo=A-$F5Qe+_lIN zVYw%w<2Eo|%$&QqvlIU9794v&w$cv=@zUf0o+pUR@ts`DY!8GR42$JgB!_n5#A;Hs7Pvq*3R&)_D=b;bxGCzow|`e=LHD956tLP%5aouWqgdG} z)bfTq_xhB0+tl(rFpF5=0;9Nk9<2gRECM2as7NWP01f8kK+@!u$Q(S_+lx!u2z7gR zv(TW|y<)e;fNh0ieR{AS+-{P}0lHwYbESO6#}fmeQmCzF~xLm{AWpfAlo zxkY>tYybX`V{p95i%BJlH&q_Ry4q4vJ>nm?rJj@>PHU|F75xnO4K+7=m8j6@3=JaT zVA}~o?F}?A_#$+>2tuPQSG-cOdwPTL#SjuI$ic77suSmE+1SwMTAd-;z6QpnXTj&+ zt|waerOQW1Z@A3s&yno+)BEU5V?$0t0;y4kKF0IaWF&6GGD`Z63(GH?=?b1Tf~E;^ z8Bxp&6W`XzH13AQJ1TGb>M87jsecS090t6-Gk3K)k2WZyqY}coMMHpu^2&|OAEGRT zCm4g~I(NZ_jt~U9B}djI#|hYY3R}?(cjwr`o$s5RZhi4x6Stp{1F{*C@%&n7j zlJYqZR@iODB~G5a&>IY=&(Srx1yjI|loE>i*og!Q+0BP-w@oy*ukJ~mu{WO{#-LhJ zkDPvhV*dH=aySMzO*TR-$2^g18IP4 z`Ely|@3XElAOLb_rib#4R>2=V>2f5cp}$xV1m>&Biua}u^3KLqxTt1Z8;k&hfOeg^ z#bm<*fN258mDbtioFYz1ACoXdq6;Ssi`oX>+BN_&4MK2@zS(zs2cR@@WI6fNO5a|^ zpctbP_d^}pZN3UWQC}Gy{qIyT3GXtr^2=c%!pG@BD8p?))PQ#^inW{tdPT9x;d%bT zD$wBom!{XM@4}!6fJdR3HI7-TJ(ej)=Lb%a(N^W|iAL4&(%jcPjg~DCJVwBV{jEbA zdM4zDmu)e1R;--sj}6-un{HdL`yE$vX;|XePaO7r5C`J@Rj)X;hUe9Jf}Upcm_u8g zn}x!}X*|kvQaird7(srX(Zvd;xxFH)Q42@#*r-!;;R6H~qNFP|c2M<=-KM8-u1L~b zXMMQ+d7|BKq&9PdQj7+IPM%x~J}?C$fh5@!C(Rl{JJN@P!fg=EHX%`7=nWRKiQ?(_ zlJz^<{m%{8VH%%VZK(^DWvzO8PcUa-ja6#aa=1y1mcNz__7JNe)&dX6>;rDP$(YbS zMrTTPpu8QQHFJ_3oIXl;Fi?98Qf{xrZ8b_gdG*h8-Tt%Zg?e-;e9@N8n{UoN?t%bOK^<*gZOQsUOa-s+j?+mr zW0eBq&6m%#<74q$$;V6CRO?P0)BTjl6m@^UG~XT8kq8?ptDuWi-ZM&qer4$2d(xpY zJG24D2tgVHQmwSQ7Wiv}7Mp)wnkK`-7-|^eL#UE0Z*AeWoqtYp_>v?UF?%;nuE)Eg$Bp}Uh}kF zIyc^FczPExSqYHg%&3|Fw>AoA^aJq zxhZQT29xq>ZukZUEMIH(g&Lb>G%roin0Cns^fKJt6k^85Lw`?1!$}`y-#5A$g_to9 z@!IZpHgPBAkOK70+^S|4UQ$OJow#d+u zCqYU~{g_&xzad^8mTnVPlxRV-uKzSj>$PPQa!(WNO|f$D>JbJXptSE8rErRR-Pi7&E^znt@I$Ue z38dTZM&uF?&JkGI#})yGuNWt1^{dD!2T@h~^Yl7?Z9Md8=)XlyrqeGuB*P|lQf0cf z^9aYY{x;XX;GprY`kOrA-!Yx})LXex3Jf&bRgjcB%P_)pV6=_9=%0wa?_7ogf2Fle zw%X<-*bp9$yy``x-)rX6<j%zlppl@1icFeg z@Si*;7OjOKNY^2MqapuHhFs2mp~MpAj3!Zqdfh;jw}FKjK=|3uYu-c4?~3w@ExL~* zq?jAKO%=!sh$DziArFY6IpeJl&okd7>UAKdYj%Q2rs|kZeZ*5e-BoYtmQYTnL2`I> zP(HBogy9!@OgxW)(15i?A8BM?NpGBj$g}=sSVc?>{UzMdj`;Aq1LRF6=u47P`}jI5 zPWg`Sv1;z$qmR>BazJWF9XO$WhXDkbZ@*stH~sJ=9%+^LVHzGns~4j0JHmVfnfh(z z*20<$y5$?@KUhZBkp?k4q<2!tzJ2qHHBS-Jv7`0PPdxDii26nq$3TaSGTqHe#1bn; zZ&k?!wd_d$hV>Y*KKwPF12(5W3Ph%XKkPP!Fza;?GSM(`fgIPv3y(pV&LYxL?g-pF6pQ2QRu}cP$XhJv;vuGC+)Xb9L47!pa7O+ zPz<13%`hLke}J+i9)e1hL)<2=!Zb=V*A_5F&pBG?Hf`elh{Id;vW~U9{f-hjM8frF zQ$N8(0mVKp8=ga~#pKMSRhkqlnFw184Ye`B2x@e(b8dPUke%!=XhdjeOreS%qztz@ zS#l(XUl!VYX=kUCBQiLaeluzzsJ*-7GW^)~37I&})$Wh3PL?fPa2Ax>S1RfkpXSQ< z7)7AuTKrUwzBTqH%zU7b*@T120YTbHg4G!aMJ4Pvb{LbHiSL=8NVY6#y^lxhnyws3 zPTH;@y&8&ncn&)DWcOR5d0mJU9 zWw;txkPx%(HU|Jawm*$b@IW7wVn_NZ-j&2mj5wD>m>a|?eBvv2Dwg?;|UVW|AG1S#d=iCYJ5G1}(k1GeJ% zml_L@dq|D3*IRb3ZVw^%Et4aBlC%Rr4r&x~_6`eGu#SgU@NJg%ATw*!%u%2#mp=AD zlh#BGSkoWa35kMpFsnLZc!+ItRG|q9qXyG~=Q!x_3K@j(ozP&)5pdZ~-3`eOFz?5Foiu`w5LZ|#^c88@#NWNTq5EI3Pb zzQbLHR9ibgtQ^3o(+%}J7;9Y1Nq}Ku@&H$0-tS9J#6C{=n{!Ktf@LmGzo;h zM3sP2Tgzim;@LazV!Er!ca?NmWT%0oS|4|X;V z)54tlvJq%5=Ed;<gtIGLW3*fdqjaGbt}64K z!NAeM&rCHyROGxV;ndq*^A?vzm6|p4>wANd!Q)Aq`N<*V{FE*fPW~dE7*LR3Q*D6bb`E5umfqttBZ|*o*+uWP#2e0f$^~M)r5epb|l}fD@ zn3$Y<-{EpV!p0ld>3N%tR7LVTtynSM7>yntoDKEhEmC2JvSipROi71Ju?uxGPQU73 zO-F2U7Y;@t+8e0jR1S}m|ME{o_LMxLMe+EWlpio&?oaAwqFo8CDxT?Og-U8)VY-M* z0!jen_ucur?autij0)^F!etY0Sfj|eDfO<-BZWa6Iv0UrfU+~Gs#$R}>BD?6@R1z1 z4SSGKWe=yKB_5#Gx{u;|v7d`~Nx~?CqTZ%ERq#NilF@8Rm3iS`|i6{q!ns!w$7-jB^i7k(E^&wn2@81v(); zHqG-n6cN6)A8_Bftc6`!sA`Eq(+=BDpy0XbOfTWJ-Xhf8#ae?UVvD; zW5)ob7G5MO^~6r0o0$rRlJw%{4^@x7*w=qyp9SBN9b91x{16J>+XWrO<-;a^CV`K; z#^a40(3PJBOzfc018#sv%Ce*kx@aN1Lbt*5u;>cjiha4ai{?;w=zo3p&y@&`y5VY5(YFoK!VcCyz{I5u0AkUo{cml_0bre{6L}ayH zc#Z(k5ffTMg2*@OCcG;qGgr7`ZzsPPw$W8~ytR*R*UU+IkV8$A83d^gcwKIZ`M`Lm!DLHi%8*T@$((*d&{J=}tDeZPpao2w5-@WR?%i;rFuQ^W-R_u`zKtg;%ETJD)ACm?LSeKHK z(gbGwJ`Gl26G{w}u(Yfffe;t5$!UNx|B0AmfqcGe3D!N#aLoNbt40xB(f1roPHH6< zU0Ku-QDhdJh496)co_2TF9~lal2$Ue?1xHweqTzEQ1i|YC*#hTG3++1UsV>ZZ_hip zb`yBQ0X2$6St`hmMyiGjx<_j1!P)i0s<10Cd##F*<68jp${5D<6*_hureduHC@^>r z?wxD#?ot>l2=C^ zqxAxFbN;oJbNqky$2QODjAARqB;0)XVXc2~ZztefXPfXvi< z+Av8_Jqa!y>oLu|r0JnxPmWVHs{8q62ZHk{NRaOlX?`=v4;p>Oy98*$_m7iCIt@kX z8bb}Xl(B(IAH^AO46Hkee81IpoUYy_I$UwthH=}a*7~y{g1C3TE3cgSMp`~NeZ>wL zfV^UsU7(ulN=v%>!W!ye1M7yt&HRnizu|!YK_iy%>#m&gIKUAT^02rllySfd_BqE# zA_T}KK705}>#mp&qxer94a(pk;=M2&#v6*?LS`g zY0{^X2Ex-S4>T!>shj~u#kQR!-y8WoS=n~r_XhL1P0uFIaufmunGmA$AxE5}ou8J} zAs5(R`Odp;CbR$8n=CCtqdk+<}@k@;{;s>+lr96GF-J{ z8MzvZ*d8=g3lSG&SZ17X^-CE$cFI0fR`p%|+$_F-h_3he;wR0ib#RL{(15}?xDROp z<^znK|IecDnwYGiumBdxTX5F)34u<8fy3E#+20g7u(xUL?w3|nT+28$gF-X=v1ZBY z7)#P06*Bs39q4@DIpYpvCQIiLd^B+S*YbDBzU2EUJ`$jsr9<9qmdY6ogfxcScNT|9 zp0cgHA54+Q5;nj)RJ8oamx?lTuC_p5#QYPJWpRc0pe#{ZJSVEgE>&kM^_AT9E~Qny z>}o@hr{_9Q-~cBnG9*gR$T9eGhW_C?DIJ+cm)N`@?99WP*^(9-ePMr`2-kXx6_5SK zky=Ru4KcQF6f%#~s~$`NLBbh6ccGaF*>CIUVC?$TUDK zschx|q^ic(SB-<=CaA1}I+YNj%qR-HH0qYvJ~Q-2;?8-9UZAIa;C!Z2>;Vb5LtWd-4>h$0S)$Mdt7}T zoOKAB$V>5xA*_Xe;F5a6`_WrHAk5#G_vQ$^!ED?x7Ps9badLSR876HG!Ja2N!(QS_Mr~t$eq@K1{x1C-F7bR2mCPsY;BP*R zvgf=E4e;eSnE4epmiaqE<%S5=j#s`34_6G+W@^>D_q(SvbOm_u_~K2kzb{cI8OK6x z4^~5ag57<#gtmv^uMm$VJja2e(0tYN$x0JON*;RQeN`{}h!M+}Rr$L+J5SJ@MjRDp z4$=eE)7?wF66!Sb91yXMlRACwH6nR8xKasf_NCPoe{Isbd0F`j`MV1f{5R*Q%)|*_ zZ(0_sByN+;f62xa8Cgu>h<%>?*hVufF_cPib0^|T=48L%blr{ZxPoHO$?EItODSHv zco>P%Vj^rbaq6r;L}oq@tLI)&#&%FjnC$WC?Bw_hsFTtAXpa=#t{Pc4NT&#;kSXrR zhsz||7u63d2^urApn{m+@?A2JgD*hWHg9MdEO1 zAwr|u7yaAleLzqbhT+;@72|DE-?bFHa$Qj+>Y2#&`$jxSGZ!9D7%u)qN59W&U+Kaj?S|A{BLE=LwQO6jj+}ERmeamLsyG)MZI5 z^!H1w#bzQH_$j}E^Jjyu&wq{)9GpaQ&Gq`}M!oM|vSN=XEY$ZdCGh`t!Af(jVYW<= zM*jhpG-mAUzNusWYDBCy@=7zVsrr7@)dWe_j2_R_qGDIIo=RKA9*pH+Le|nDUWwI7 z<<(%>N?S(~2Qb(G2+gI+z%|>@Eii$YbVOzxu1VCIhfEEr0EyyBBDsOtPT2JHTj=Sj zG68A-@zGJO&!V~G7zb}*fbZm>G;cSKyP`;JcGb&n*g(&_>V8TEhtmIn%_+8Fjsv{( zQRvb%BPT%WI?B($Z^3cDA&ygu!9o0M1PR3D01Y_RED?AIRl6n~o8KZg(9X;zB3P3_ zah9PLCGG_WyO=<}2z&B;c|Tc_lg%>#^Iv~Im%wYnyV6p@2>Mz4-vEFOwb|LpE zjyr+d6PyUm_K7q2TD)?gdoLVxMtFx4DgsQ%lEoGhO&Elrkiv-_1J<%fH~EidUILLT zVq_H}$MKL`bK)=!tfuEI`IiZ=M+sw$qDN{P?bA?E#y-Wlp+pQEIjcWXrQ)e(`FVc= z@$mF85pjoWX54EZ5|v{tDx|ZbvCj-(CMuW_xL0kQ&GY#ltq)w;+D2KUiL99X7k9e% z&bwagv(E-F+D$C-M)~ZK;@lBflhAOiARwF9p;DXWbg54o!<^K;c0I`zi`)C9TK6t& zCFRl~V)`~k(t&7(nG43iS580_jn|fa^ssKI-)hp`a*FI><0*c5V>dRhn59xtPH*|0 z@y&pJ958y~!<0Jm534|X9Y8od(Nwc&Y{Q^G$Uu24lLD%@6pE92;|mq))!N@IO_Soi zcJScwg(r9_8IC1lD`OTNnZpF&HPy8`O^agriftN#Rif});u9K}uvFEu*M%vhd?&C} zJ}oWZwUC^78*NS#Qoy}lJ>L4`6TL(HPW#<}_-vHPGdgw~x}o=Wt4%(8!zi#ULFyRl z_vA*9Tfix{P(vf1FOsDfX&q=bL6lc&YHiKa5-H96U{rC|%{5PXS;)m&_fc`c8wG4# zS$l7hl@F-c+3M%dE-tHti)NM{hpcYfC9Mg!9d(|7rihor^Ew=ipj9)eo9J$XPE`bm zUmd?E+W;up^x+_i{!X$XVMP0nJ#$u9D$2OTDZ8%dWZu2t8q z0?j}={D~vY>TMXgCGL5%N5=IEY?)A?s*H<&H`pVof`#bMVw zh(Z*~u?8Lyr4X_(6X}u0LR4r#O=SyEd-BWVS^^|=CBk$gr|)#G^bS!y#LTiI#~{_H zlGy#v`S%zOgXB|Ir_niA&1Ehx9H?SO_?*+n-#6OYH_G=pV2vdOC0C2sa?*tAM=4SJ$K1wHo6@qmiz zzkIKvCIRikBpF_Y(x7DoSfRIQc`OVz!9o@oE0uiT?X}Cms4?>k&Qy&;iIOE*n$~}E z%;pJH!83eEn+a-G!fh6gKkY(>^7b?>T>@NHVS5Fc6rkwxE_6sysyXjEs0t0 z5SxHIsk1*>CPt@3RnMjFCRJbfL`Xqz!(uSgpdo6sbMby^!Z1a-xsowN(f-*2#f7A> z{vuFockb4_#~Z^DTkzErFI{EMvYSuV(C~8c2(m$Tch%fZ@j&HyJRcNO^9R;-edAOppR8TAU#%ORblc1*XoY1Ufe{eJXeg{ z*5@8NPFM#3Xnh*;-6dTJx=nyP>abY!r+0v+O{XA$!EafG#)2esxD6QG(*coY#Z_V> zp`~hk3jb#il|LwfNd0qaGr-*O5XAi~Kk=ugvRm>^E0Ks-2kCOj5}NWEjR!h*YX+O_ zJU^TsF>URU49Z+|9&^dTA?BjP9cGjvcq!@HyibzY%eNGC3YzhI4bj*RLgCQ5h_Ta- z%{;a~icV%U^t%-pMA+zv2#+LCF4jG}2*>-S)#Ht>vM2)yR6g8}Y7hV&AZ^44cB&3i zQlj;ijgaP0W;CY&fV4qMp{q$Va&QRMxw18`%)NgSS(g>%s?w7N{YRk4Z#hVWYPkZz z;PB{1nQfoQz=01DMzVuwnvk~rv+C$fP>REFUf7^u@TUA#!b;}9&U@Ew^_|Q!;;n?Y zIf3ufWQgifT$Q7(iZ1R$?i{zHZCAys-@koFHZfSNoDI@fK7&y*>XUAOGSW4_YTFR znnf5=zmgs7dysF}LxtYf)58UFdUj!vI=`;+Qj|H&_TJ!F*&7?POonUZ+7k^Py#wGU z3oIS2Gc=)0@q@8i^_`-B>#nIoX0=Ft0Y_uqo;!Td|yQ@{L2-Obj*y&JNo?Z0{U5eP^dMb))pfim zW}U{?N(!m2JkX6KYWL~}j~U}A7XtdT_bi~KVNHBm2)1gw{I19AFbzHWTRGyzrfht!N(Qa4-~vF|*{>T8Rfa z$4Pfs6l~fZtvg7KdN7=IMdrGiSF4;o@SvjHMeL&H=Is6z5;{jV#ju;VxUj*hwlTlFxLql9RNrT&v zCAx{9%ig1mPsi6>2t-6E`s}8|h(OADtkA6QO)3sK_mtuO@i!r3a6uf%iuwHZFOBfR7}o#7s$vmU56N2(-EzN{%3sKZuZw8J)4K z`un)KcrXUYP&IDoQZKo3_P=#ID7Xu1n077ZDUcKyY9KE6CvtooOi>hoa7j*hnBJ_lqwW8}=LsqmmrZ0_t0;I&{O1^MbvlUF7t*R=@#78ZJU7OArAF;A3ZJH9|Jj}r(6Mb{ zRPZ>V$U$cWsZt~M;Cd;=26u1G6asK1;m^u& zDR^iv%74UQ741Ta$u>1ezRzVe!1p)#sp`kPq{?H2dJ7$NWVwEjtWk)n`<*5f@olHq z0ai-cy(PIag2qEr>t<~a1g?$URIvhM=o1A1hqZN3Q>gcE@Tg5{!1s1>(p2Z(Hl9F^ayj89k1v!@N;F&jsTQeJx&~3Aq_6Ub`T_gId*#nNt}Y?_i*eW z&Wc9tS?oB!x~s&h48!#JnihsX5T^1E?n&R+O^9QZNJe0I*a^~f_iH$bj}~r97~}LU zI{A*wixM9@46U$Kc?u-S_-{V&8N)TsM6~b8e%zxbSn0z8JAPhcOl$}HF-%}6+h0Gkn6UJq)zQ5hh~FqZ1Pmj#;;`{z8t(8o)d^7a2BjgM**n$B6^bwbgrf=K3NK&{ z3fPNiG^W_M$(`{1L*GpaNlQq&^PeEw0W6qar(dZSuW1tArJ$8LdUZ82+wFJj9z!T1 zvuxdg2~ULU;mYGH(qjv5-<5YjYnNd`GxB$uE>#vw51N)0_|`>=*q%#mp;D?JJq+$` z8V@DEd(#r{yx_*Ko_b6EOTcZvS;T)Wy;30fQ6v&6mUw}9Y3_2$jXhMU4URbI7dY}( z=R5cxh2XPhx^>fsfxAHSKQRc0_i(8@HTb>rii$EPt?3T+Jdkn+Kv7|12?!Xno0$5AYO{(+Xc|t`SV{DQeP;Ex%OShR{&iOv6|FnASCn$ISFEQimgk#{Jxk5 zet$`tHA~U2HJAJFQyr+Cz&rB{zrz~!x5K&ZPfYgJrXJ*w9le#=$H!>7&F`cdI5~k^ zA)+iwwHYotp+sY<8fBj&-_I%6oWv%|0zZR#&c^OYiLV$WltRF+VD^e;^%o@r6h ziDV9UGk(~~;1;yCNZ=G!jgWsH=wrZ}%V0Fo#A10Hy2&TD2%f*_u89Y1P8}l1|61$9 zHy?&4m#Ni;9|-lQvDn)l5F1dIh`k&+|)2KL?V0*RRJM z_rsoJarx=13;Z?*mcyF{p1V8_uJ;jjn{W_1M(Ku*^6fEjTQHsoF?Z9OO36J@`n|z5 z!H`FvgN-|=TJi7y|9ws(#MLe<&Z*D;EIKYij9L=OhaHYnAj+f&=M;w%6 zTu2wT=gIG+-lF|$+k?IYD}A8P0H>P94q;`lgX$yD39ItSFa|*Q6 z=Y3rI&SQC!U%F{73V<)Jc{g1x3+kD! zt*W8p1Yt2PLROHP`Su5UKXkF~Uhe?Gj&XIqkny3o9%n7*R3zZXG8j}bAIYOk6V$z| z_JMmd?13wHb;11te_}fw<{|4NXclV zQ*_)P$EZdRPp(~47na=L28PF)gG}foZ=cW7T*jKODI?wdM-WpJODHJ!I=6pjPiBPL zJNWoy!aNT`6cP|E#M$Pqzy<&k7+wy!Ky_PDH*{80sfXR#ZFF|#a`TNXnfw?O5g9mLN<9NJ z0QT7HH6>%jy&siU*l^vMwIf1)q~aunNSdg)FCDVEwzjMZ ztdvn6ovy@Th1A&eDQ*V5m5;eJ;o5WW5Wjh#kBq&piCA!ob%K-zG>ByL?oi;51Q?TR z9jy{C=_)P-2n=K5>Dr2*oBAengiVd%YhDD;nagOY6;MX_7MJzYBok(lCV0xpo~cZd z#0s>VRHd%c*eZE+04Q1rFR{pbT|hPz71Xc-=?#WJr4LHgf(;J3Om_n17FD{$)z&f$ zRAY}jmFO)vHzj|WJK&s8{h$dj`?vN}R+9O?HbF=i^u)8~=sTJwAp-j2e9%pkpZ}=3 z<^&{PF8;rkprm8BCR39lx~JT-Vf|<;TJnh!@;K12aV#X+QfmP2vSO56Su=%g+g@%_ zXo(D{4?|SZXA*{CFc{HS__{&e%5yaHt6m|-4XUXfBw}W#x|oYs2jEy4MiM*d@B!jm z_$G{siJvtm9qd=MbUqWr$Kx7QlVZX{`e0=-#kIlvMM(+Uc^k|^o0Kz;t z-bmi2e+P-m&rIAg+Re;JlKC01b(0c$6@Ywimp>0+A@Jv02vNzjX0%HoVD_poBu*ws zHiUH35I9GX#3^MwFxr5fE|~)#`M%Jg1{J%+6%F{ZJ$RzgzO%S6SE#@i952<5DX4mP z%~O=>RLMOI9U&B`xzZkylUFtWk{*IME!P7}x{weQFn$u&qvJd9B2(`Ot^5Rh94x^n zDoz3-8d)k-bHj)pneWa;*dzRJls5P96~}l+BA+n+hShDqBQQG7((eI-Jg36@KlU<& z_BXt>uryF5Qh8m#cXb}2E&%sn26y|)8q;DAk9yXaw;OySD_1YwJ%HM%NA64{v-c23 z`*^+UI#kV4Tm#jI>vGp$Ax^NMza1VAA3y6F?e!U!&P6!}XA)x7H{)SVsGqOzx(u!Z z>ufzS2v^REg5r=?2l~$Tk_Ju>+7QvmZ@vg~*R>rjh%+Bs&)KZ+|Jtk{6`GhQZICoirvrO!8tB=&wpp<>CRr5%(mV#Enha5yV29@B)UdTLBkWzmlp0XQc>91(zQ-AC5ySq&V- zP(~s_ORiNkQ3A#EES1aTS_&)Uu6^tSLn`(P5)UTL;n*O;Vz@v(1YOO;4V}ZrSz*R{ zLY>biH$`fZ6N5Bnv&2%hEM9b1&$d$eG4L$m_H)T+4(13>vp@))`Np;5z)%Kj0Nqz{dwnM zkT(gzW9)xv*FBB@aZi!$BJ_j6vu&}V(I$Pora873wPU*Vd#IXAApr^)j<+GyG^Ill z_4pJKsdajK;eYmtF`IHN7hnQf7Hdg^Y!A`WsJ7`ZTOA-sn{Vb67bfr!RtLgE0EQit zM+C8^BZHHi5Li?1#oX8}#u@R!fQz-y;F)0=HvEUartZSu_SDYrt*tA<;>M&^V5vw` zwKnaBU0t*}J-CG+OK$3oC^Uya#JC*6_8OOrg63M}y?^TH{@0q{ho22JN}#IhF4A{> z<;H!A4@h?7iIH&^@^qU%8`$)Ig3&w_mIySOU}Zv21P81BNzm9gk8D^ueq+t2!FTM% z0huM0&&}GizkLt>50(OXJN)EQQ$qd4ypHq{r6?Sbzk0+;o}k{wF;*DNobR7FC9ucF z;QE3Gf!EF*cg<4SM(@1Wq{Hlzv5p4bfyXWM6}UDgH0EB@ZoY@3w#GX#6PcnW(l^{% z(=3R&Tf?iB#C*@osHMBvccSKL2q>HY&VLR`sH#bMQ#^60_k=}s0?z%fuQcVFwOE{a z-{K54cY~6(T`pgF%|ua+n92W=4!uNPk0s1BMnRdC%P>Z>`yG z^3WHfM{?(ByG{EE9EM-`5>EOXvUpFhuDH9OgsOLdN@xe4SA5^W`d)^DGL|>ntCpwB zNso^HiTP@|sjofrq14ca#9e=L?j2e3Ne(#2#Ke>GvvSW<>l~kAXqu?``qe&-Xz|vI zSCQwKk5Su&&4!@rlEx|FhIof9Qgo*{8prLsp%zXMftt@lF5yK@3Cb}FaaX>Qjswusa+~3^H=HU&pcul_|%}GZYY*G0JG4#!$~7mJ;Qa` z_H%5AF2st|PO@h1r52X+f9|A1%}pii*?5YYgj;XMK^BpRu+BRV z9S24v(VO5aI}QMJx~p!*T?xaa*vEeMjibGHVm^UeY}x^}e6tM>E2Y4&dI9*#*x&j} z>Ud+kfoDT-#+=-1v>>*d&}j?lI1x+rVc{BbKhK(Q5=A;iX8v*YMrKV4qOmf5@i3ID z-Z;YLB9(HkfyrtQLf{x~TO#_NdV7INflLO?rba6e`|2`uB*hhkJvjXvmFf_A1hrkl z()itNp2?wXHB0DPJa<$1M8l84uM_QHBQ0A*3%D-?xs5-a0Nei5iPKsKKx!Vm)Iuw< z%Eq8t!iAlEiQr)UrcL9ZqW<)Udgy|gJY9H=7JGV%Zw9boB(%2KtB%qq7U~z*xpc^G zp#@7vh5AL}Ba{si8Xc$uWY&CvB2GWH&^0=?PWmG3Vz8ft5^ZkF!#(t2cZ?;lPmYXm z?IfN!*kp%|qlz*&-AgeM?sGiVzH~e$8oF@w^W3lkgx3Gn-r2`id7fwdoWqAn970$V z(X_#ChSF@RhBsvyiQ?dPYc@?$l6GmvAES0OjYiRw0zn}@3HTV=G*VE-MwWU@g1trB znidI~tl=Z$a7;)b;mgV(3lxeSFc=7!V;{g@$3DA$*K^+2prY;nY1L6g?Q@Rb_j#Y^ zzOVbbulv4*wl4oIj<*lr$Vo2JG{`%%(!X>F*}t$KeX^dq^-0ug_2fF(?@5=FMd4W~ zBCyLXf*bnTzU5YlEjI81%ongN z8Oc8@LUe3n%Y8)W-wDW|S{x$Ds5~CT4eOhN_uIj;QA?X$^jWIL@+3YF-iK~mk2ziVX-`x+1$!`0|!9H+N}y18zFNp>VwrMmIk8H-vY1b5ylL-yKQ6<+?9Ej zk_x^lt_BZhiZi4nyPnQ2Ag@GVKip$Y2CSA(7l)N-D+@!z^3%mMPg5CFGh;Lzj!#Pni``!jHW)D`BlPVQA<*%4xF=%5`IcpIy2%9=S~}X_#U_ z6CP!BJce$kwk|5RUR~kA&Rju>D64l6L3!f{|MtujY9m+ta@l~Hub9EjMq)(%i+a8&Be}rzqL&`k-&P20R6rR z-aQiee-6kz9{Tiv%p>~LNvIL4^00Rx2)kZdrZ@>bj@Xq6^8`&#IP{^qL{{GIZs-nR zXu=oxg%QE;><-f;p&HEX0GM!Yz(>UMe?hGXC!!c1Y#@bwN#wWpuR6#6I)*8?$}EzI zFlOgB0BhYYy5_`<$-AN*@{<|+SaHYRNuc!^uF(x(;fB*yu?z)sUL=aH{VA1H;5%ni z38ZCETq9MS=k`QT5?I>^B9hONMP^$kcs>N$K_7xMVp-KU(Z z!-sQ}vre+)@W{g4C|}jgdRAEDQt^v@m6tG+<&@{0t-Ii#u4~I*=Y!5;p<9HCPPUz< z9w-#-f{R}Wdme^Q&5`Pl(+1^&q;6vm>G~OqQljc0Fd*20C^@K}E9G3oOEcWF7@(z>$N5#)kclp(~VX>4-{R!M&)DYpGbbmMK zu0i@IQytSCh?V}5T9+URTa-imD?X%ccdi4Z=w`2Cy{oMrJJ$Wsv2Og$md<0xsvK`X_~nR*XbcKXWmGykPtSHsDxWGC zKO4}xgSN2|vs7YK3C)WEpUjBO0>Xt`reA1*UHL$|nJ5mN zWLggZ9@fiM5$?lKCJJ7&NhWdQFdZJY<`8F;O2<8hEZQk`PH&r{2>Ju83+am@R)tuX zel4DFP;-cCB(F_+JMv7Yrr(C`4JtqG-?K)%H~yXohJS78ba5rNh`N_Djr}_vC+vZ) z7&-y0hY{dT##*x(~unE66cT6SqYu2lsKo31swZnP+wmgc%BA->xUao!tJk!abcHr z1d1SLf-0Zz$unYsK`~EM?#$nu&E7wIgm{_HI;|g22v%iHc~m6o1ej%iDO`7Hq8>%v zR+Z+7-B(AJnf%`y%8fZVmRrtUrD!O3VUR`rrx|XO(IlVWklZR8aChJna028!Y?S6- z2A8YOEC`lq&cKUE%}u;WZDd8N<5W^S`z$XeTJjGI0=Sj-Qqs`LYo2WfYcVQkoPf63yWHaAE#byZt2V2gaKi?O+dh>iH zs2xlLiptQnT=*=BZ`rnKlY@UVifR|Yb-?x8{zXiqhHml)khGMMuxVccf!H9Ay`pDM zh0W~yMEl|)`mJgmBPfUMYbe~^Pyj?J>HeaAC+Xub*u`=+DWa%sSM`wzc_N!7RX!*V zy`OlZ!s8wCKt^|&NyXy_VqpRyfV&E!KM`M$D@c{E4!&y1VUhF#nT5=U6I}ZdA|pK4 zG1ZG7KAh}T+L-@48C}BbJ?;a#rIXvhYMmrYpuvyT9YIZq{~T-yUVEiYvxad_G~&Xv zVxn7?%SF_5xR;PI22_}$ZIciP)-bhFNRwZ%{+_^rjEyLT@bafH%dm~06; zwn((EGG!O-8R{SoC{02|Z8b#neN5t$!xXw4{VyIjY~xNaMclLg>BA9s@2x^$GK5oa zJwO8+I#+0%&+oT`by48U=?XNft4#V`D)$(Hk(?qf zVP&R`r{3){?&MZd#85{QA_NQd-uU9PjW2rNs|Woya`I~Fqpdw8lexS#=vetF9WNpf z(K4ze$?bV|L-D>MSv?-Nd;Ey2`Hp*&u#Y%OceQw+;Gk#%4B8XNq~&)9Dxbm)?^%ub(5v-pHmi2`+V0Ok)`o+6g!q`^ZF1caT5tV z>nxYRU&ifoZYClLh4Tlfuk}!A)$Zy@0_C;`vIwE-L}THIPU$#-lZSG1Ask*egL2|) zS+z4_mjkAQs_HqX4hR$jna+9U{S!>RL<~fFyTc$17QielmCa=o!~mGETqru5QjB-9fq20-#{n@pH5(tXFncqvmOI(#>`bj$2~{%&d7}V$o^xQ65P&yU znG0bLllz5kyv-H*VkZCHHewkT)klJ?GX{kvxjhdIV7EJ~1ZIdJrX za`$PSw1q(j)5$WU$>~lrNSQtM{-_xT^zbX}VOCTRCN(YrmqZFRpCF@4e?Lh&Ag8of zA9p(sOHDGv7oWs3d@*147LbzuMlKiUW*X4d0gB`glA#bLn;!c81K60HsVxZW5doF-1ZVjya*$EHEz$bKN8CL?lvBMcoNXDqV6 zsaEx7%`G#pJ?qY?4ULROw=S7 z_kxW4{ZJpU5^$KcSEf*$zJ1*EIscsRetzwI)_DUAy<29v`2*(3!aHF@tAfy%kR$ag zL4MI9JR)0_0W?vIs#JTefTlu25U z65gX#S)yr;E(oa7eCC9-HXB>>FSANheaMGQ3Px0xro(U{K zg_BvFb+<8QL3J86tm5#NB|V!v2~|_fdT-Ai^PZ0E+M=ATouqf0{ny+r8hzeQERcZ& z;qUkh>qTR$Flg;S6j5LAPH2p`AYarw5DOB4*GDht(J1O)*ik_K=~wp>S5HhJKzOR7r@4<8PTc-M1uKOJX=s!q}MDH=#A-q7XL5% zc*gRNaXk41Wm7^?!1%a0`Z@eGtV^nyeSz>fyzqrO^?8*pd}sZ;XLGwD3t5U3ao>;j zaAEbE7bOQ-6Rb6@lBQYYvFX=KzdRs;{2?Abn!C(LV1^=?R~IwGNr0L|WMz#S{4+oV zsLj^7vxf~_|Jg0!6@A0Jx`jI(ZHNU9$b7XYQ14@@n9Eo^i;D>-;r%?a?!mM+q! zc)}%jeX8z5Aeo-~!mYx}25XshgocCVl;AS$z_aXs`f_E~0WE=<^r@hL<+{?7;6I>B zZ5U++KCW45S#p%}isy-T*`gIaNjpyi178)Z6GXe<=phBxCK|@YNCpzj%F2Q{?_BoY zowsR`AvT(6ctGVec?_-#OYq)=D-}kh=!KA_Qta>;bYtw;xfIyYKXb!IeAb-w%X1j4 zyjiKqtI%C*L4%1k9yo5@3F?s1`#z4jG+$cYE?Yz&~`-UXZ@+gvQ$jsJZ!YZ#rAUGs60~&eW+YT!5sFNtasn{dOajvTVYj)*w^2_%OjS$^Zjt-|={% zYFCrz#3)$iN^0X-fWZ~d?L9HK7l{C5WwKp8p-{;Wlb+skj{h>V!BW%~SV^p*UpS4+ z0|V*l!e`<5gAC;08KMUPstZzzRvT4?c6?8$kP(ubJ}nO&=7enSqy78$8_pyQhILMx z=0EdEuG=y(Y~x%!bKpX4OL)TA{{9W>75THWsV%-wset~XRd{1}Y)n7AW^*UftrVaZ zL9LP>k0jNGQXf^{7!Dtt<+8Y)Y!lbxmwg{jpA^6)(5@}i=kL^iPKc(mn?Sd_okoI; zuT{QWXV?vd3}bWv!L1qk5ulq(W%ppQ7x+C`gP>3?J}B79FA9egyR(ClWIc!2JMyRQ z;OWpd?n4NH15Owq&?J_mED-*jgS@v;Amz+p#&k0m0Lptb2#3uE(Q}lCl>Gj?XGo-Y zyoBtE71SdR`$4fL3kIH#I6Qe6|Z-9P6ceo2>}eFX%k{ZG%PCVp>U3jWO7X1 zG{-qYpEzZ*eWR*fbplkuY$mP@C-OCNTHKJ8USSZ6vG&`gSZ z$DnQKBLEB53tvJYK7l-gyQZ5e1?o38>0yRdP>i*LPbQ2n;36E%l05SH$VOfF7afiw zHo(I->)+IgcQZK6V(D3wTf25!o?S(I=R)1$s|{x7|hrsw*wnX z$uP%o=u>E3*_*+JUO0U-7VE0Ium@};8-5gyjB4TuSi@ma{vRmw@uyuIkMwr)D8IZy zT9TJbw_fXq6U(e?X2;Yyh(1iiG7Y%&PMK^T+i*{~jAosDQqoYbw69SzPN3v1KQ5}! z_(#KWuC!db^AbJie1mGpx&x0QQ_5~5J5c;HrzaFsV!w}f_LME77pBJ*FVRX?=7sl1IQsibzo%RD9eqjv$f;2Q zS2FemtsK*nZZ1`lkLAHs7)3_hUFoZsrzHXivc@lEvLt=MCF=++PQ zOe|RNgKI9p;TnUeDDsa(n^7_{3-e^a>dZXI;rU zE&k-Od85_OL?5d;sQR!eN0%O1TkMF}bo__1?T<+hP&yR9>%sW>XPTp{x^hF$`>sAx zS}oTVc6jCZpZa2ABHDuUq>A9~@O!n^^btnUztPT`S{M2^c{A&;`x37DPpK~P^^$NJD@$KtmG+kZ!gfwx zU*;erwWfvpjd_I`-z#Zi; z_}_}5bG=vn@}9)Ewv^vkeuq91y|+Uxs(<6B(eF#Q{r^Y37Q_GRGNq?uSFioRGrtPE z#k*$1Go72MCYY!^-JTEtc%Kq{Fb z$xH%OYEw;>Dk_kyS{Ju`8pT3DCYgx{5dvfZF@*fg4cKfdS1KfdSkir>F_VDX>-`D2eg_QMD9UlIQc z9Q%*>@8|sLWy^S##~%BOzxeZC9N6~irOki&*s`Lhii(yt^OAr0cYBUMxa{Gl&V2dD zsO3`4{rk`Ve%HGne)&hNMyvVGSAXuQGs+#BKkfVK&%HZto2q&Hp|83gUQ=2$nwP%v zdbkJWTFu|D`-=PIcSZAcn!o<4`{ZlN6PmK4>0{b#PyN=p4{vDQTDrgW$;0?i^Vnj` z$Y%R4>(O1wFOF9JJUmqESlc)>ujA+bv_Ycf5LNSN-{$$}2;!R3?{lJbR#030B5F$NwJ8>w7RS^iuxQ z$>lP9%0G6R!?WFx zeW&K52bGM-*~;kWxK>TOvFADKcmMw<1&O`=DPj4n=Q}n%;`H~)woXVD0LN?8y(RON7o`xd3G>n3m>o@v>i+{ zJHwVvSLuoUg)2>22Hy<^+(qPh8*apIYu;u(GRu5sS!o~sO3M`6Rl@i3E+XCfSc-Cq zIZ$MAoyi+|kiz?-n>*k($=ANyV(AF}2 zwV2kt*IeOzzp`|2;PgtPwtjq< z&9}=oev>UwY;ms0A6uc7pMkNlyy3pQtYDKZ(v%;3x;p&p=HSh;1@)uLZGA8hyGpw< z%_sRSOB{hxM_{uhm}Y;XzW)jDpOuDdEp2+mr;i>qL=FZ@ngh9!uF7DCBRX45z}C{% zt$p*O+#kNH?(~8>Kffr;F#3SO1RH4UzN!9ty&Zqg&$@2VpOY0n+gO!bI#nZUOjFqD znc)bI4BSR@t~|JR@7@Aibnm><)AM@r-Tcmi{eAtXcQ$)Ac9_Gf^nrEyMqBS%7^DN( zZ1b_klJ>@uV~y)D+~*j1uGV1=cN95}r+a?8Gbi+p zWBe9J?|q#kTu)<`BV6b32P{g_y!JIc{2renb3%ue+eMyMEZuNwOIhQ>;k7x21q<4W zI-YX5W5ez>t+GTp^b?-;!vW7;A=IZzs)gFG~cVO4Pf7{!~^J?3eTN8mlE^omF~9sq2)>!R_cN+ zHfu?B?!ojm<0G#YZP-~9+glL+tgXN{ghx-~U!o^=EC|nbcvsbo|Gs7n7kSfqKSd>U zCO`U;e(dk`W7u8Y&ZX7bPUy@_nS&4JkDf6HH(P$|8hI;!{Lq303u^q|tAP@Q7vzWp zM4=ELahI?*CG9;~fxD_s^Rd^qZZ!{N^0u_iJz<-YmhZnKKV4J*c}2YwYhl{IwRpei zrFr@CHhCb}lTkx6G|Ay#=orF#V%V>8te5C9zPeGDGjPw+Zqs&X)!w$%7I@enWl_Fw z_N>FU=h(SFolmvwsPHlsEjIUV-aojv zZMb)S6b|+ToYdHHN8}wWURNqMDEup2ds*-oON09+??B=k111=lJzTiH)G)qwM-hw! zeuexU-?3u{cdtLys05U%4b zMt!hbv~736_&*d($yNNevdr^TkS{LinpSFCb#Cc1nXR>!aBY;kYJT_6^ZE3)6Qtn|z>ky#Ix;$3wo>IR$Y zZx)f>EE{cVUvqEH}L`t{slh?EHb3 z@pCN{{!2|y|&;I_s9wB{k*a1;u*G<9Lc13r|QoxfWdEf1B3W}{~LH*Fe&Yp#IL9sW*-ZwPlZtZ|N+Qk^!X06`W#V=Atq>|!9 zx%!*A#k9rFD{{Mfa1j|bz zr=k5n|LNAu!K6i?@!C({?Q@xjS6W(%I(!Qq{yMIbF^hW^`Q_pqA4ON}UOaVBO2tZ0 zJwmnP1J$vCy1KfS4ezw{=li&9_6v2L7rfhPkM}h$eBK4|9XSNg0qO2YEjgK4+JL0M z_sQa19}R=CEU|i9p=qUk)Hf^BgSh5AW7$H*h4(F~eYEj;3*7Z(9@wDW*&c0nko#3L z`nNSvzEWHqhLl*`0kea`W1mxcYL0l1#U&qahUema_kgrhdl06bL5J*({kqfk}4nV4DE1S7KCm3D<>2Mf8F2r@+?!CY|{L?<89WV zv9U2x^|Z7!-mb`Fd<_rjtP~cdXCl=bukq13Sv3X0XA0m4Qu93_**|*NhsLl+KZSpz zu?)jSYz#mA;UB_7j@YVjIBfCFv6L`jLxdr&Q$7+8?=98bjxcXD@4bY{Se94j9)H(8 zMZH~7cLc)j+PyUVhow{V+@pCJ8n}J`PpiiU z4AIkurx2NnRWP2~QZ+Ki;%k)Wzoy#zqwHY0TY*YEDqCD~ey8~;1Tez=lep^=#-v@` z?J)?&OI#2rYvj#`);HZU&-Z5wz0WQ!p&89`Qt|dX;K#G^Y-??E77pBF$JPEg9v9eT z|I89HL8!}{7{mB($n8E@?<%ej&_V>l$oBTqXhk4n+N#qu(h)i-{ij?41+mk2n+Is3 zE!RzC>&kmueFpMZ%LUsbBvC6bC_b&*w7GDot>kkiNV)!?VdfO_~tB_qn>?(d}QP6S)orB z*DGO*MpME#etF%QS<8GBXtV0W#Lw6(QGmP1A>iy87%ou<+AYm!3W z?p8fMU)E8l8&}k}uk)WcF-H4==rO9%NH(_2GP=w>u+BKo)DiLJJOzBTAoPF zlnF4!c9f6di9Gb-ADYtFK+|*#;u@o}9Z`p(lO1#`F}J*Oo5BA*ST(!RHIwT3cJ&+H@4OaXe#={g&GHAii`HW&M!3Z8Jlt?o`G>rWbY~cDYbLJgam39Y;!#epv!B zNX~e35iGiIJGa1hy`)&1HkA$s&Nk+bQJL1attv|S3MZ$Uc_2O6ehmZkQrKkY%ga|s zVs52DgKa z8JFLnq-QZ`G0dKn+k8h>G2Qb_LGW;np;`&$ufd~GrWfL>G)(5kqLQ{2OLU%w3pol8 zZJbM&eAK3dZ4-LK@OWk*{7)It_cEgYl+m-uPc5>4_SqOkBL%Lp8n_f)#adc%t%Q2q z6#ZG;eQ>Ft1_!&UcupIlg?x^N3V8Vb9{TyKWCyhL72cQf23|7YwU=EkIE)-STxp1x zf=ig=;V+`MOO~3BZZQm<&NZ!EnHGH4J$?dC_5~3IQG-ERP)F~*wvh^6v@BeTDBLNj zTul+yt*ZLot?Q|RX+e0)b}Q zb$zzldrkGBgaRy+OeHp3)P$jwjaG_E!GJWx8#S8gb+mR@X{U=e{GiJc%j7kk1_z~@ z(8nQKF-Gu%iB!SmVw|V+E{!1_3@$clsgavqKOZgD!}4OpFdh$J&yR$9@OHIjYMTqT8({d{p9@ zrHHt^vd4z6Ptt)YgpineMyp1CH%BYMyT5SKBe%PsnYEgG)u1amx3kp8sKz$1t^S(k zp1-Qx@O94(2H!zQ-4zmHaToSEn7OS@M2Qb3jFO#OpCYzwQs9&K9)PfQttU>LV3=lr zRqhGRzw0hd9+K$`vuxg3WnxKP66FJYDbf+jnaLDU`Jp5DoI^}}&Q91XOYnv4(2uho zS-H{$GwRJr2Hypc*1>2jWPi8r3T!UM!NH!V;PSMYYqBjpP zO*O#gP7lK4m#d)BLIO#j6-37vF@#_~oV>YcSw_&F5&BIAtkCp}!n>s&MAE}_Po^bt z+r<@EGlOJKu;0P76G^q@)SS{k3CPMhU_I(Dyt@Fv(ipcuBi;oibuh2_DLi zJWY{{5MmK|Geb%H%#0TTRJZETtJP6T!aYzt0DF7jTdpebzAPC6Z%ksDI}7}KOlLtrss0(L_;NDhq0!vGs)1uMximnmOL1{k%57bnI+!kw$bID>>-M`fKZHckV|(mB8zN$a@DHR3$^BO z(gdoProsD`;49B_r?nDdWcPBT#t3^=2Tav|rkawob7d?_z!E^#a|mm%Pp6yBY#>lg z%;7rySw32nUm6I%ToCG~5YQFuA*j27M$p!a4L-Pj{rX%;#H-4)lK`?gG&vcbOR7C< znTU?iQ$tX+bbq3?iQQsY@zMp-pf}-W) zU#tj)^va94)cfL$2Ow#;B2CU_`ue^am5XIepqUkE3{nI&Mv<$S`kKJ!l#@*Czg_J+ zV(;wi+%aq*So4>n|v3%I%C#L|eMG086IEN+ma~axYL%O(!3+L`UtbYCDej{VZqX>|8qjyQOA?k55zS zi-9QpH1Bri97e=3u{kM|N)Lm?U(Eb+0^fN+cV132;xh|1K^k@_tbmYkxWPZl_Px$r z-Aoj2KlTYI#rg3IH5NWnUYR)ZNKbZJ2}=XioDy945VfT+;r2>J%n{G_Y9&yC0A!E9 zo;^nY?T8II#t`OeB~C+i!^K!n;?p-m6wCxXn!k?GSR)n~>9oCqP=pHBs1on!Dub(mUE;+Kb#7mDtTON_h$$Mfx|FL>p%toJ}h+RaL0x0O@*l{Vr1~p8{ zJj~F_mapFLO-NO`@gfs1K2OAsK(}(YHd(nIUn+4lfloSH3xR{tmVCi!;^C1*E)bed zAA*q)W@IsjWvFj$t3R~d+Q)!LE3rHAjL5z8a07*gzz5JSOBa7}FleN3B`WmF55;^k z^>24mdiP6`nd{W$dia5ITD4+){8{mVi~KZ2qH>JyBikZO20TE_UbMVL_nGZF^;U-= zw8b!0d@fBUS0{uZ+;Z&Etnh}Kz&P!68dE*uFFQ-m?es_5fh#f^D@00?BIz^{RaO>l zXIPAb%nvY0G&R5ioe)o`rX9(kNRSU!1!PBv6Q%?wyQcZ9(0o_GN;~CYt&Mi9FO_Eg zj#w#!T;=|W(sS$zmIgm+DeY*Hn40Kaog=J6RA*y^kO)h)vgpa&Wb06;zm=_~Q1S)2&?wo#W-2=lZVu)n!UukA8n`1~inukP@1vlz2(?A=CTFk441KcCm2wf513ft>+ zI_c{KjA52<6u>sci0w-qF2cP@!~kK_N#NKyJk;5hfY{gACQ$j1ZTch}cjK~>L(AIR z+LTG5;xXj>f@V!J>PjrV4tsrr2Tr~Sp8zV-gSZ`z{$3)rSF>QN5MJFnd%^laAWZwQU%4oP z_NCiA6%;31FKsZ4SFDQ$p3W|P?1u%TFVmik()#4=WF{}mOv#NDEr6L5?}86C(85a9 zAj2aFGxIwE20%Y)6#S>TCy_S49Hvm7U}{EAN^~B=@Ie{~!V2eTMUi;qBHz#fTr~4` zyR?%+Xe_4j;EZ`|Y^>9>B+ML2E3!^7u_pMd8oHRg_aw^G5`+9f&B)PtjuQU?YtsR> zG_>%F$(a_GKD5aYykVF*cHU3KxwG;YeGV$iG*@gl<-txd6grwMj7zGI)oW#^arXw}`XMyjWSJgeV0bT%Zl$Ni{a7?l$2D$lbi3S^U04 zueLKR!Ih3Kv-H7}B_sXj9_n1S_m_(aqIu(`hf)`lK3nUyx;bX5GyWMiHtytQwThAp@6f!q*GF@m6syY4!x zAX)69af!X5AMa)W-)%;rmbQQIo;}CY#{?K2ZJ!hZt{i7!3FubUY2JZ2E@M`_x|^=y z{=%5O#$qVMDU?9T7?u2JI3R9fQl*P?i|BOC3U@^?RyEjxfSr4GO@}(5hsF z{!gs&c~Sr$hK1(A7?ci&ddo<+AB$W>%0VCD2MMAOv!%(_xqiN8jpv&MzGoOwdzc|G zIyj~q92_L*+`xcloh@7OX6xunUz9Rrn4zdO9u9x0(llfPXBhv4&B{ali54ypl&HK72fBU<)FL z3w-q1j3yWWrW;vMCm7J~mb?|=qd@UJ1O%RMG6S0)!}c<`V&d#!5LZ3^kJVBVfnGKd z?dP*&!|XTJow`Xu1UZHP8tEbM9WpiWutP~x6uB3roWD?`y&eIy531#PLH73z;#xpY z1x_fF5&%uC#{MWOdjft4Qh#r49te7SxSLiar!-}hxDp2CH%eT>m*mf=yYYAH zG|JWG!36r-tpc9TN2+Pd4CkP5AJgp7mF#oS5%_cBv<)h1yL$^#Hb>vXokL8u)+e4j z$@t-eaceu<+Im7J+9rHkXcQGB<7Mrv!y1@MeWGI#8B5ody~f7n={JjrM#X0dA;LkA zl44ILy2_?W^|$7c(R@XGtmC{?Cd{-tL$qyjZ`?_fSSI7AZ)Qawc8u=WI(NuSsz5wG z7d6nzlVz&KjRt|jGnLIkAVLWsHFcgsWqNjf04fX-*m?RKzefKlUGl55*0-NKNDha*<=`h)6hL>@5kEQ z8o0sdFMd7asew~j^kW$hjVcFG0sHzR7E5*JRLyTz<&|yD>HYpY_a*SlFK(-HFeliM z5rZ+*<$Xi*_r8s*o_uo9?XE+#(R%psH%ZTGtBjtlOd;|2lfCzqFJFaqt!C4q`yOG(I9=>m0 z2e{B6O;b?qjN~W2t#SmKaV7k}!FqC5-AQ`Mi9c>eRop)I8}L2j zD=lL?GfF?d+w|j!c;&yPIcX{qF7p32vVB4X|E)CZjY=VNJi<71(7#t%a-fkO&WP9p zE_FG=!fa)t1A;>*uwOd(BM++F1OhfdudE=X7|1w|FXec~D5FoNe(?_?nu*O^Rwp%F zQ~ZngXHtxrc-1|^6vx^e-i~mhkr1=+p&1RUT)YxMDaV+}c`R3Hz80zW16TSsgCxXm zMxCIViJKKe}n zrmS6wr6c~LKg&{pBmU*AC;HzAF0o zhZhMwNe=$Anz8RBHb;_ec1`HW{_3eMZL5`deV)Z%8$31gOMClaWR68|kiBd)7k z7<-9@5f3vGU^FlDsdQ#QDIqGqg}@1sRuXd8z}PNCF(yk$!VirRrISh(a-e&YWL5_H zxa_XAHpVm>)+U|{vk&X)HMEX{gH}B2l&w)d880gMK#O^(r38h;gDNCLoJv!MB%;|w zVrx~kl&`sYa}NM+F`g85C+-Av{zktTWlEfLw?HSd)5!Hs0XyWKkaWgqPM`v&$ceIQ zmXqs4cE`phWFgr2yUw~&+Z+y;@RQQ!h6&*mnYI=Q!cx@;SlosdkQbW}G_H@0N?Abi zrZU>ziE=N?QBFXCTP+{~`iMfA8U&OEJFth)Q8Zdw)kp`XGQLUJM4HBb$o~kAa5Py; zun{6wdVwSgt>k;m?L;SNCnU6uE(U;=hB9Ry@Y;<{Qfh)c>=l$DK`$u9D>0+oZe zo!Hp`5i~v^dldGE`nT;LxQl2wVI*SmSNR1d-4ZBkP`TZ z;&0W!*w~~2K(JNTU1c};M}b{g$>3@?RrK?MG=Qt|jqIX;xEv-5856{iKu%(b`xf~V z9X0GRBIO6kz-$s2;b=pm-qcmVHOtuNfoQ(uW)CBLY@>3Cyb1J=tEBu=M_d}EO9z`3 z8n{Tws-izdj3$fd8UR@OF-m0e{zb`&mJ?VxwnFrXkP81f9kTkc)b<$D2nB3~;5edj zi8`rjv7}>xV&|>-gUyzK#K@oD$3MQmo5dP0p{S&IB_S zupR}%84~=K09!86L_TD8q#;`f;4ExKS-{muZJg9zk!d1!puZ4BON%XFe~zDzsDRmH z7i=VlwW1AB2^t+@w67@BL<;ELNX-USRglz&1b(*OFX1SKlmI5JWt1pca}Py}cBM-Q zwC=E3YQxkZa*W$}Ra>I|>5WL8njn-L_C0Y|z|RnhxQG@PtzQqCq}no9j+m;REi2r~ z5>`G0s*Nx#h9OGxryH`hDLqOrPv64xi+?)M`E znJJg@xS*v4POP_>o^fl$UT!L}Na8|xM zv1)yb*k(yDt4(gs{=tMx>_!1?7VCFAGLQ2D!$U)pi1nLGKG|G}HYHuZ;5X$UxxdgP zAvnHY0i|65eF0hfI%TF5^65Zn1j(qq#o;@iv$NJ%Q9H#9U;d)YOM6V!7Gn(NV(+3t z+&Nv8HPpYHG=ohpFAWmGe#T?8T%W^Do6PJ5W!$43_mg*MMj+PGG09!Db)aBG)I8a{*2TaaisJiz{AMyM767*A6 zg*HMh0hCkXS&ukY;bc<<%oU!$Q)(#t9pGdZYxpQXmtHUKy~f(!RCgWO@yGa$Ix>`) zEmR2XPBmtz_p&r2q>+Z=-B<@o+-FdRN*cnOH7vfd^jHQ85tqt(KVy{NE)DfE5(!IA zP^r@ZlMxSu6e|L=cjs^bcJLLZwK=RzBX_A!Bt#@QFpumJW(`vys%#1)>0dDy<5%GK zAE@le0^c_a64@7jN2G&Zb9(|-5EeAE9i~%ErBj`WyiRtJCTenW&79l91&nqg>bf$B zEm>4`nyv*S5mrg~!3wfv3#yaOrLo>hQyDY4G*WAxbzxy!g+~=n3xiB_8GJG6PK6h{ zO}UIk2QbHeD&0QSNZ8VhAXdG@@(xW5(=`8fGnmw0Gj6LjEw5_Co$L7o0)-U9bdMj# zTI&)QSm;<6Lp_Gp};Z^N|nR$GjyFZl0%RNkuYy^jc|!z z0oq6uTD@wOce^wQa{ZZHN1mX! z6@-PnJFl!D)?dI#?j%c=Rt+8EH0x(i)ZLlkVL{)HYU3D;)vGQSGm-L)@xRYdD|xmv zEZxO6i4CB7Yq(8E3OD@AYecYvYs75(a3Pjvd?W6Q^tk>Ism1THykANJTWl1jbH8IV zR#-YLg%tj;StH4jD0`e>5L-}!9Xc!2VWEiH$pO$Zcd)aUwV%NdCBx=i#I_184Mzs( zvaeFqbtLF}-$2`nmdW?VG_IPC-THl0zNOg3;Jo?f$XyZ*vCrX%+JpYiJFA@c*7sI$ z-SxqZdIo1oqgKG;9~!COF_kf?B3VN!{!D;CKhKkJHkbEGRp*L2hzAoAKv&A0%4&Mjr@#%-kZNFD zLK3nj0x9`M^8nKy1cbDjue!V+&&j$c+%%Gofv3v-1s{3)jMUjgJ}uZa4Nqs$3S~dS}pC9>g|5W3y|mF$?hV# zw2bTNG+7}6IuFCcoSh81I$K*+D5H_?Lt{fH;9;Fa&HQ)C$YP63m@tb^fbNTdxJpA3 z+i-wU4r`#6G6_WLam6hz@`E3+lM3X`sX`xD3OD~_TPM^$29bLyLHc+}R>0OtC4>im zCX>xMX%b9r=T1)FM?{64kpAsZ3Sk;pALrJ7UEG1{CTMY)tZx=~Ouh*UY6?Sn+B==c z><(xnPh5B1bsC5v?{)V@2~!NK{4|?6OQ56m@;z)9BvvUE2FW4FdxaoAleP~a1G!O-$Z-c#yp!^@2uv?^ zqV|4MGk|uK@+@pS)rVfxBg}`(SC_EQ@DXljy7a|LIF%~BGCAV)+(=d&;ZjX=GkRdZ z<)lU#%QzQfDj6ToDtAdBO%G%zJYf4bZaIp0<;t9(urh;v%FWjof07h zu~EGEs1RD&xFB>*wxoCI%kRN+7niY2g`N(ww5bS!J99vV1X#>*7(`-BMKIx;{w9W2 z{*B5!Nd{>lHBKfgFbXt=d2b7SOZpbVOa%x;l`$CTS80t*158;$t0GneBOs(jp#JrA zuZN<{zOJRIe+y<~T`Oa*L`n^AO0dWXv(ieJ3u8%UC-ZdnTHRV{663gpn*kb{xc74Mdw~C`X@pN z9)=<}MX@vO_@@b9%f2K6TLHEaFg8Yjm}a0AHgNRIVOtwBIhG2}0`HI(Yh2_6#go0( zQv72l#n^Q5rM!K$wj6y6y#_K zDl9W%v?wHb*~AmY#X^L{AjC8B6q+Lf;BYOH5E;&i`;9sbG2TBvGj>~U&j%#SuZLNX zTZMaiQ4+=26(Ak@3r5MmB;XmP{7INV=PywJgJeX66hU>8qUALtd6WX?2Akv=Se;2HQC6ilMWzmRU=shsK+a zmR7<#4tHBj08_aVds@jiz`~#Bd4pAJ`ct(aNo1G0NHfNQ7(Ia$-04+D>4DI(<%(;n z*^gf7gtcLp1IXb&%9|oU^0U=JubptH3_p@gUj&9*Lmr@X^Dxjb!<-+IA|}F`1}S1% zKvxebLyT$}zWe>?F?_y>In+ki`*n54ZD*h`Cv;X|^)bdB470|$E;?@DKDb9_w$^DF zQZYs^=u|xqeHFjN$Gl0f_jo^md{ZY!0TDT6Bd^*-ra#LWi+FO|&taE*)%^^K-?hay zpeP3u#3+c;AW;xjMjsX=4EtW}c_Wm7y(p9f@+`lRFqWdrKm((3!rE9G;U1A+UKj}O zQYxaIUI)8LYkr+_31X?g$Re&zofKNL(Eqs^kqewm)oW;=AovQN8a(TzG-?X`cNR#| zj2VQqc&YU|*96_y&Vfi!iZfCqXy@dT3%3AW= zgHpgCqQxBzX0x*}S>s67C1{l1hhQ@}iMWc{X2c=_)xvHe5ofF9+p!KpG1NfHYm?4O zS_qQ37En-_N{R9%(&AK7$;X&Padd=~W4F6H~`wHHSQbY|11$aXcMKi6Z%VmWq8Sen4lo4>c7oA&i$j~o1|U*Czm6=1j8R@ zd7x1DNXEppfeqEta4s=2Gt?zQ*u9^TWFaz-At1lZa+;fwcrVZx$oD5C&1Ygzt48I1 zVNoxQe!5g*f@E+Yk&7S(WE2V7WjiHBGCEj1L?SOyT3Ot_oUghCqRE+pl zK)EHz^dbj{XU}dPAU#U0i#l9f2-6bok0uABF8WOTnMxy3=K2G)T*xuXV+5YltdOHi z^!9)0Swsu@ZA`q$0hjQY_bh-qv>E^qG*@!r#n`4fO^gSeY&v)l%~2EvF(wkm_K#$k9}NTQ16W|K z5m3_ud9sBX=DA)*Ugc7m9y+Hp%y3>qtaX^Pqx1*Q#8sEdfo&Gy5h+ut{bRz_7Hz10*Le|_^ z#>Q%m1R=Ou4kvzq^Ky%@!F(EMnPsN5g{iq=I(3gTwaVD2>q=y9H6wbS z^J91Ln%v8a9$K&-wmUHlXf(vuMxGE(pAG=PqmJX-AP2$r=erHlpY!F60{JZ zj8WE;>`NmbS8`BUH=MjQ!9>Ecv?LOFrN)-9H7q~~O=EMskZ|)>Jb7s!c6>eHJ+Va- zWH+%k6dwR$V$1Z>PRu$9#s@J2`k_-$MU@VdfO_YKnI=t26XMn~!~%{TUVT>wY$ohh zrEAd`GayDTwJ--sQibizMG(H_SqgV5gMt(z!l+q!@3C-#_f(o7H`A4C8MjfioD$f{=rVK0 z$7j|@*tlN^3ZEI~W|kn=U+^o5g`?S##E}aJ7}s&4Vxm2rAvmHGDd|HXnPh=q7bJia z*(!=6Nog6m2^hr5T!}>?k_$sE!iW`6P-Xv7tkeZDC(s{MazL~DA=++hW<;Rj;$aF~ z}#Nuem zNIuaC%?t}_`4*BB(-3)4`%SOMrnVd zI}sp|s=*qaj_Mhs0wyd$wu`tkt3l#8<6=-z|9LtsczK#oRH~{ul@8V`W@qnE481^m zSlOORQkM`nJjh%6a)j<93_FA$_ewP$`pWKaZ8hA`30A)gd<5(1LO)rd{>?E};DSud z(~#Jn`2Q*<+?OPF%(Ib0>ZYhk_;XU+BM?gURTelCH>(W-Rq7?ATgtXA`eHjXQH1cX zxJD4}9AT7|F027shBd9?ZiRURYc|84WDYPdy$p=do~Orgg#)xez1q(HRh?r5oAwW~ zrX9mLrB~9Dm+X^NO-AqN=$S;5H*zQ>ohI{RFgO9Gl2ZJIpm&;9;=A=B4QXc&tN`nFzqfj!Ut|Y|o@6S{rA@X&8KX=^xbYR=Aq= zGVTiKwp*q%3W-mO*WB~aPV@OW5^Z4dZ=qDrPzzVdB73Iu?%dwHr5M8-BL&V64ZSfz z3vv?U=2c%-3lv^+q|q|Sh;9EMFA5J>##Iktl=Ag?Z&SxZ)K_7#y;2ro* z6wfez5V2bHNOk1a42HfU3r#Bwu^z)1rzz%4?0$0(T?NC$S@4C+g&3AO2wLCu9@^W; z6U?N@dX)elb-*c!#&J7!SYi+}*HQzri9e+THnWXS<<+QC&+z6_0qK~5FrN&PBSRU< zL8Bvu5;KkjkK*X4LYrvjSRi1M(aui+gW$rKA+xigB#04tGQf!tNV}W2DeP3%inA+4Kc8mJA`7P`Ai#(m1L98J8uMObwdU!M|u zQ6K7-yMugDE%6gNqFiZZ z+6xbey`~xj7P>Hyz?hhsxSdTEd1XXZ!WdaE1U?Pp z4%UmJf1fe#qyfvRgg1oy6I2g_HwB4fK2+Gss%9yJBGwf!SnL@IyNI~S>>K1tExacU zM30obE%Y!l9(=e6xFqG4E7vS}r?Ek-1t_sK;SYrpp5cL`nIXJt2^^^0GGc5xgB$H7g!H2Fdtf0awQL>Ne&gdi*M$D$P3|fPZ zK~jGe=w3I6V~V}mrA}E-j0KF}rlpE$m?!|CzANcW@M#(cslEwjX+yZ)yBTA6q9hT< zwk+hJPNJ(Y=r!s)4$2ddXH4Dpx>=i{*HH(iE=rkb#x|TFr`l^{N-WiJNs7p?yK33P zY_7=UZfb?}j)R8S(W-1=Ou#x&$nhlSn~G!NgUK{2z3>^;3WWn8%rAikZn4yox6|#w zaFf*v-#?e8~71TVg9S5+f|+B7R;EV(d(^6DO`I-XyY$nECH zU-Xwx`id`f-Tto+R8k)+eu;@j&;RZ{|KIC9w?1fjW(+$!}Q6fzuhLV0L!`} zIuk!)n@yfg*%1UVY1Ti*$gI>|d-mS>uq8VB7UvFF_AVISH+!Et=0N>uZ1Fk9qzC@+ zK_P@V(Ot`#2CW|i-f6jT+sb!eJ^mQ&%E^;mPp4--_Q5r{5u9&zV*C6;+@OLS?$bmg zIXg5{=uRe%*fN}wP}&)Xe33<@;3CNR(TVB^`z^SytK@8C6C~0G!HH|48t(q znD2Yb_}`u6|50aIO;ZJD+}2HqB+-2gv@OxLoa}F_V;YtX z5`CnT-cyeVrAx;6v7|qrW}AFYwr&OqxiAdxm8$sU?nu_E@}^we!Ks#QqysZfSmu38 z3#x#aTS=Z}Cr7e_H-CVzPvSuZ7rIP_k-E?qP3#H<2C@{X8_H&u?rr$S#QWn5BfJq_ zKr4fn5`D3gmMu<=POM`5pS`<@CMDWE@wkZtY*IHn_Cj?#LyOL|9wgz0( z;VlZQWG4qA#%ddf54QL2{9^7q$B}B#RKzjm3Gw z<@wPw`7zFkR5J+~MNEt=#uyz2XDe7GFo^pEYKPfocPK*Kw3L!0IJ~nR zV|-+(07aQXtZQX%Ax%B>{+@`;0|M1onE(0^!B(R44z3TN2DT$68=ja~#|6;LgBC8%NW2$Xn}Y=t1Y%*dtHRqd-%1iVC7FE}$zSeAQbe3pjV zMQ$^*ur4B7e4Y^oyy_`MOssl=~ zwF(u!Pj!B3-Dhvy=0Bai|B=Y=P~_VghE$XxC5i#p5L9}mWnu@A7_dH#9p}Le%QQktm-d1Y z@M32Q9_`JchH_9It~}d#nTU3RgTzb{2Yvtqp4_f9M!qAHt%<%P+bJ?KiF6QQ1oL5S z>_%3<{oFP&B1AZOs$V>*P-?0C(v#^V_6jB;*d<;-Z9~+%>6{Op+dKfojkD_a0QHbD z(KtU3#d8g+z$mG{nCv%_!OE{zhgutR`rpT)afvHaV5Br~y)d4*DdTLoldcE$C#e$0 z69Tz$LavxX@RF9&!U3PKC6!qV)^AsFS(~QJsfKyVX-O=ydnq9_Ff+&{{C?GGR;V>Z zMi2Xgp!zRyk{e;nx~R;e9tx3cy~s!>o63oJFH1uXcZyt0RToB> zDbuI1-qvL%w;mX&#X}JwVM?Na$_+Y zx}D;2?ow{YZ16-kE7Qo1NvC-Qyub&I zE&|SFa=_NVmey@J$B0mBe1gKVS{%;;Mr#7JKeSjRZg2>1WHD=I<*cyh?7WEFkYi7D zHG*e%M{^H!74Q-MGYPEt6MhR(eBrV|0+T~v#8`6mlqva1B2(Oe?V5;d3POqc(UfGJ zM2Ltu?&-J6U2y$5l{6wTP9oqsZ(!w$&^lS1#J$)mBm3a@!zJ#DY!mJsOT|Fu>RIi> zIYP9%7J5h0Nw|sMQeG!rmMmtD4@`hHnXMFUnnzu-WWk2UIKNn?JPfAoD|DeT5-+g)sNDbK@Vw#b~a(AkKVzFVI zB{A`FNq&{(H&Ri8$J|V=B|EGVLUX;4+cXzd4P?Toaz704ZOuKTujMBikfb6{Hc1(q znzU>ud`@}XhL|$Sl3Q4q3X-Qn?i~}fOiVPaw$NFk4LvO`Ez2i#M|Gl;=m=a4Lg(5@ z<9-19<-|r^F6_|5<)lRS>?SoH^&b{xRU#~c8HBM0p3!fuIeVpMfe=Rw?f z#O@&7+7qXjv9|j;V@>s;?;@v!bWUGHX6t)kqf6!ZC*G29{%8UE44gL%I*@cANxY!; zH6*=g(xi{qP6G0-50k>{B;tw_Z6FL1JrG%CH^z4vlEOX6X|usfKTg27G`PhOdbH{^ zAZ_LK#Hay`WS4?kG=?Rb8DwK1Suw`uNhi^$)167tVdn1>DB|)84eIwuX(0Rj3za2*EJ~TPQ*?lp9dGt|s2%5l7?+4`wt{`BO~`Ei z1;)HE<3!PAJJ{!PjD|tDz3iTkv0{ar6L*V;L)}mwjt6W|E-@_!wB%G4?S8F1&bzc1 zB1IBL5{ek=LyTu8Wpr8eUNVDdVQImqGcaqX4CF~}gbs=j+~6jW5*h`$tyF0_WY$Wn zV0wY(M#|6hY=~8R%#1!l%z02hVt@Kn%|(n8<=Ts44-Zx1pCOh`eX=_-@}-PAH?W{?1L-d1}kkBlR3%lWRUVTQ7SS#ans(1B-~*hP9}9+ zstPBaavG5&i}iGkoqD55c5B?M=k(A87GdvTo=bh)t%Gn8QBUm>rpuQ~Qr!ezes}&-$tby0ac`A0EDoRyk3ZQX9 zveS|rfpP|RrSLB+C-jC;gl0{G&;ZGxr;SOZ@(Ap_KFqqZsurli-lRW^vNTSNNlT7} z(308@Z}?pqsZ&m48Kxa^t^${XMBHnHrc6UE0#6O*HrydJJz*=D$hDQ>PtY^r)UjT3 zwWQm^{uD-mhey&#%pb3iM{W|r3O=*X!6g-!E3>C&Q_@m-l!1oiXP?xLB^6duCp_V= z&<)JcGP41}ot~9EJccJj9S|)Qb~*v+Ph~ZhZ^-bVGW;&xBip$cf#)PEfbt273H!LW zg?Vqn?#Pk{H7=19lVLdRS%CQtYnE}dVfm-DQjbKfr+HY19MtN;=Jl8nzKF%}7n1m& z!e-PIj^cl4?|}p2PjlD|&dJHznvoyW#6etTC%DACYwI*tRJHhlb&}GZ2EPjxfS!R} z8tR#USOXTjb#z5;f#+Bx7tg~*N@H|~PpZeS?C$93p!sno%HA`KIK6SjaDd-Q4rHK* zWEuu`Qi~#Iu3l>GOnke{2M!#NhtdI2S%qboYXcQ;<>L#d%LVFh?Cb5%0W0-M>(#xaL z2U%3(NY!ZdSdxa!J(cKj5(|&;4^Q;2$ZRFi(y^>}gY&4JbpuX7IHM`S$Q+@X?WBJ5D)~Lfr7YY?+$CBN4_~J@mjhDn{sgskE zFBv~`{jJFo8i_@Hmsg>hDQbuwu%t%W1yxo zpIZ63d^{hjr!yi?NT(;fHj1JSby6ZDP+I{`rZLl3@nkYIF|sNtkskR@;2koKltiC! zifWxUyPAYe5FqWAF#3ssQ ziQkl#i80(?D$pp`{$Ev~Q|klqlXf|Y11<7!j*}2?bwalC3kK?mjyDDlMBUl#e>+V& zJHJ4m^c4sMH>ngrgz!f3@XDML4*!Y6-bAkxqdG<-n5jVZQI4@afqx>@cFZpm&j7lY zpv&V*=`WHRJ(*K-9vi2Xd;z)t>VLlAS_vo$#)Pc>eNxC*s}hro0!HnZ!Sm&Gju+Jz zOe`jFzTZBn+{f>Lbp!=LMr!OG8N8B=H%t=jqpJOtV>5%b<6ODKH!*&B(X}?-UI=19 z@k{Z-LJ4n_@U=7F%^LZazKt)Uu@u++=ckv}j4=!pel_n?Uf^FMH}PpnfzIP} z*U7aI=8C}Ci8H${Ph_Azq8rhnxmPD;@i zSC#ZNJTq2xlsK}Fb6yi)&);dJU$oS5U5tsHjEl=lG}t)M1v!@`t`I=4P!|`Jt5?P>LQcBp zFY4g}|I5;m46NnzxZbzfGK79i_ko7cy6e=Yn#866y~W{DazLsnAtH?8L$v}`TwE^t zQt3i0YqT_b~GKl^Nq z=;vCTei62T2q0894A1iGIg?X8;#Y{1Gpv^k+w-^D_bBGWD7bnS_{V^ zicO5s)!rQ5C^P?IS`$*k2Z&#g9rUW@Ik*STO*=|a#b|9zWa~#phjc~`sYh5BW{fER zBkxXNqkK7&Az7<3$qYpYqEPfma0n+aNS-S{#_)L)KgFuZ%DkU2$CL?*(lwH(Z7?|@ zv$1tS0c-ux|(h4-Dgv8JG*M#wfnT z9f;YPIQZU>s0Te7FC!L5_(d^fye*Si<*n>VRIzqR!X@sN5Ia5&5^->FGYX%6E}roP zY06+jaSFq=^=SW;r+p!N{}Ab$z#F7{!}_>bU>y3SBPi7evw353F9ha8Sgjt>lX;xA zDQ+8%^&(aUAI(}wlnITzHMwn>tt>UO7e*%|H@|W;ZN(rxGDcsbtw1zzqrjO|%N3M2(CV zkS|6k-KJI<*{V9zii@Q%VPI3hZITkwq9Tm`CBjeiq_7~=Cs^Tjv2Jv-8n%6? zL&cMF8JhQ{Vy$}e+@vADvHPUw_H^#h7K(zFy$qoc_2=Ru^_GkDWFtK?CgVMzHd<6u zn&@yZRV&zTdO`U*!74y><=xJd;J@{dR_EWZhgS*IF^}^ru%kz* z)hq*UKo-aU#|RVTXnV@J{AfhIxw(hdSf*O(*Z?tz{3e1Pp9f_&74GYJuo<8qCs;dmk%0&NL9*T|wdN$gOV%p&YuJWBiL zsKnQBHViwPnS&}kN+Ld65}kOT0ej|Pzc3c`j4Ts0^8yQhL`^meqg1AQl8})`Ytx;4 z4BicqCK46!@k7>6aslAtC}^3y3lp79hxw)j-eZJRR~eFswBY5;<3tLi(U9~JhRCC` znp{?{FHjlDkhz*rPo@eS>dN7j=ziW$@J;rWODFj|`d5U;MZ>f_iW0X1GWMN>F8757YLw>0~92P9Zc9BOekf;y2 z9A#qvCToqTj{u@^`-hT_%;yv5&G^A+7OzG28+TSpDBM|Zfe9$9$m6qPF_$m%Nr zg*QU9jr^mOCn)LkZ|ZC`DgpMCl!F+QV|L!{!qMQEA4Fa(qXbx?6;pwbv_0iQldU5rxRer%FX)M z{__`>R0xpR1=dkkN+Azz>sNaqV6)T%PiYP%h{ZbJto&38r;OKWi9MWBob7+5(H9Yc zN?32F6lXb>$Z?(nP*@xq;$aXD9^vS6vG6J?#G0&}K+aOsRCF#8PwZbb#%4}<8kirY zL5N_fAjt>cr1R@BlWB5j#Zt3xi}3~YvPRn}>g3&U<2HcxQ^h$Zz5EvfG<8Umob$*1 zTKnTes)Kc zvn(%h)LgbZfr7PIE7YEfep&`NIWZ_y7gTg`7dnbRM|-@g9)j?1S_q9A9}9NDt^49a z^zHtSSO{R=2RLUx3&Q5fV62Dydu*V%GDgOv{qk#} z+HtVqzA#4mOGGt5bReefXL>}knV2b*xpDS(uu{3&!wMa7`rQ;NaSl%IvFRj_fWBFV zVX`o$p4j@!%HX%0ln~=dRK+8QVNw`039u9hbNHB4f9OOFhQLz4zm#?E5QNM`Iq zKLZh=xNgck&JJ2FJ34U+)%ej^#0d}Q)=cv>t@DaTz9i|4ciiYp%F-?9)*E3jIjf5Sqs z(pvX+JS0|JY%nNCtuEg+KMJVV*>-6?C!th@-c39bfblLie9H%43kIcNgTImy>jO_GpfM7WhUwOUz|CT;XJ48c6?IN zPc>Az7Fs3`(@ASqj#TRmt~TS>IFjT9P)|20AK9I8%Z(`$4!{_oW^tzeGAt`r%Bp?{5t zj!ZEJV)X-PBvDx*^TIlkny1P-j z>KN4siPF)NZ)YQJ&ZcK-3Ei z=|ei1K~A)0v-NQrm9H5wKz&t~lT1nulO9ryj6v}m)S;|tI11>PS@Jl}c>UHy<%S4!Ocdz1LIG$`|MQUdY+WXj{ig(|I!3(<7%bjF3ykH<>OYQ$Ysc zdbZPKrLfek=MmpCBwDCC&0IpunV|ZOcy+cBC#+-GE!zv7=%eUXXP^HcxmnN`m1)$! z^b(HIl9Sk(Yk!!S&d`L8g>szJly^JhO-+PjAoQOi7i|))d3pm~72As=;p7zV*AukY z$bmY{4!uTsQQ||#5EV<PAp%jskZSRaQq{nFHKj0@z@ z4f4902k1!40JGv`DQ>3>-JHJ=9n&d%Q+$hH058wkNh?9##Ni5#2h^$OoR2W`k#V*a zLY(>H9Bp+w8IV++mQiW=s7u|V|Bahve7`8p%vTJQae_Ba_6F^Da0ilbf-p!Oy_|$9 z{qvE6F*whKc9PN1=}XwJj6g9%_E2RZ7SR{s;Hc{lfY)d;bG<7DML~(&m|kF9mcwJ8 zt)FlfT~+lv?|>VC(FOCGn_-=eFG#4Lii5%L((k64#K#v*t6$Q#b9#qvwpWY zIt|0SMMTH_ytMy%9O<^eWJYrh3(I#=Tj9+(e~Q@1je#&{zHxx{bEtKR*porB@BDW- zW8pvLmKmKyFQ{C+QRNidmox@U)jk}>a5wWzBgcZr-buoBfYR2-TE-ckOb+5>w+Zdy zd3W$mj?SsTY*ns_o5shh{><)z`1Cl2C`1m9%|wSWe;PibT-w_3^MtJpYkf%lY54`V z;5kbzHv({o-YZbK9#j9<+L-g|ItPz;ZnW%TGWH<|Z*+oXyWZVMg|O7}r!tN9M_G-Z z3zXHgI`N;EQMFRpOR&|8m~nTtb&|m{}>!2OD))tu@2;ap`x~P z=lTsF$(!JO@TP(Szk4sHi&%akZ^A85qAo8Od#pe!x1bVT=$dGiUbw~l^> zVD8|T=2KP=@T3Vr?-^n9;ExLBaoX}wQ8DejO=gIO*<@?usJK{%$`GC= zX2sSfIZ%g~Blnn05M69!Q6tuoh0Gpv5pW_TFw-&6A!N2TcI0vitDHlf!?U?g->59< zy#d!H)*rh;9G$ZI-*GSGbY$z|U_^~NMiSuAtxk+Sbg9Gn6ixt7PTdmjs^HAk1FX>s z>zx@nL7|Px?Uj}YPLN=1pe4L}wSG4%7(Mfeh|P>2>cBGPjp|H^G8%5{CsiZAo8#Rc z!*r;tjTlb@L*%Wrps3FEQYTd|8eqKaPly_rZNO_A!RMHa-4J#Vt)dCv%B1BXiVlj_QG+@ zG+E)~v6Z^pA|23h9$ZpBa+t=wD+Rt0iuVqhYw0(zq)APtu|?0*PH5#K~Gf znjKWTn|#+xC>tkh@+8pdJliQpzmX4f4(%$iUjSC`&2oaoQd>?qxMz`{-FIRUJaR~+ zN_V95yi9219%Pksa_p#MNYG@&WRFF8Q<9mLws~L~jAr@P;{97)i#&{lVC{fC$^Pp| zC%#jvwARNv!3mhcQ$N$Q{wH9ND6zx_d!q{#5hd|K0dOrXEp8a)h$2*fhAG087l3D2F`i%KA&TCDF8EjPO^loo zqY!*D4@MSCsbN~}+G#s48Li1vi?HE3ty(jAYV_Du!aVlqqmK$-^cHTFvSCIZwYbti zk$$%!c?Xfl+6ggp3PgEHGlr?sgCz19KRl?n71*B#jb+&0K2rICXBB0>qLHj! zhbl`4w2szuS;sYy@*%e=f;3##KVE@Qk9*x216n*kXG`zKj1juX2Rz<`l;2#9Y-Jye z1H$H5E8z&m0{TqcU}k!-#3;=vHP*!QM;nZMGvO%ktaM7MBO$N_Eo;T`D-Th*g3;i@ zKt@owD%m@)UlVS2jd!?gYoi^L$9*1z@Oq70(l-a>FEBenHL(<3_^gaS?TK3=zc6?L zZt&JFZI)onrq1OWP?Ph=*=@>c&Jv>kxMFCho=vU|4ADiLXO{U9GKc(Xx4%#sY2Rgj zuz`gXw@gHv>)=3Zar#70C1LTC51YMrkZGzF_CeY3n zg=`^_aQwa(=&Ohe5IbCED=}VIgZh>_NkNLv(ge&B{~|MG{7O7$+$sxfezNOmpshce zU+&1=WU>h~Jt%4&?b``&S!aWM`x_@=Suj;4n661bC*fU>@S}$cHusGG~OTMOsWNc74$T;2;33MNp*dcWpNhhtolqa zLXXhy{tG2;%rq-e2?yXKHis^~uianQqeEcjK7j9lSF><-&9uN)7V0LqAbN@b_9-cy za(8n893KN_N9SHzEy%a(!1(8ub^?*&OftE4I)qC}r#@^^D9%$JQJS@sZEK@g-=??8 zyOYW8x?yu*I4|rGM(bu7@jUCt>NxbddGJ}Ls2#S@eM&Qz5b>;^&h_aBN_%w+r&c=u zD_Dg}nzLagwPueTM?3R8mTZ{io+9*c$%~uduTL=ggD=O*ee~`Q|KFxuGbMF|5>_k! zm+0s5mD(Ot4#*BcU;vJR4;}6|&&FC3h20<0VeR}yrWBFjAp$^f+9D$wsT&=jRmg-L z4vG%S*)s>aOEZr+qsrQjdgztn<*5rTx&e$oy7;8|N}eF{?>sT6a?#%Q!_)KuRrl2O zwWp-ygR|Dxfo+6Ebk6QJfmzRVCoCpgz~-1K89L2wC~t)GDUMjQZU*N;ZU4{TyF;}&a*ds#)}`Q<4HBA-2tVG zIAcY-$a((_JB{KbI#ydx@L_LuABA{eS?Ok#rBVT{E9|2PWoS3UKMQyb-3EZ2gE+I} z8@6`w_XY;IH-QkLuIN`0a5B%Z9})zeM3_km{qA6UY)e0{%D$qlNNUNC_nPTJ?Wsr$ z5Z|r}RVkC@2KL~{)#7fBy;I}X4XSn@lCw$W!!%iJHo{}VR(b&$ULGK4%&rWkV~$#m z1`~iFpOZkmV{=gy!vnhGFIj%U4eG?(t=GXjp-qo3+JXMa8#jMIPhuy$4jM=E*)p{{ zkG~W0!g5eMe3wUh4akDJ@0u@4|DBUck9=gc@QbU127ohNDlyX#g6Zr5^9?mU%w*kf z-CCqSbrM~7!YWhk^DT9SNIHVqP_*#!9!@0M3N7ZNU3Zv8%CbB{$@Xg9>laFYoY}Zd z9#Z`zFu}9bF?R7t9T%4~Ah72#gXsD8T_D>_x&ZVbq9m)k0a<=Mb*qhgY?!=Zycjix zzv>{Um*+jSN@86`%FUbnmkppAZqWthqGp0_aYKz0k)3r7XjLBVT6@%5IqEJmC?rqx zuRU#COAXtb!nBGk4`ecV(*`itSPKNIA|c)R)|ax~Cz`MH&UX8; zn!+vD!;o3b4K)2mU@rN+Fqm2A%fb~hCGmTzDn&e{O9 z*L#gQAoC5OFi{igYpf`CanClfqdtK~78q%&Sz6u*xh;%u=S9SZh7{}Yfrf|hki#2G zTHCnS+t^JtM(gGE&-EZoJ^EamQ;6wRW)PH=Ok-{nzmmYnk<~O0N|vm8a5X)pw>MfI z@y#Ocz3K=P)R!xpBi#x5wO+N%b<6*+9a@2$NoYyy(h!3b zcX^LPk-7ISSI<2cxA(s82a3DLK;rn=1L?AU(n#mU)V@$gpU+%tZ7~tuF<)EpZ>US`loLGr{ZtSp6%15+D$5j+Pum=cURuC z;oGxkvyd8sQH?Yh?$$rl0HfbtY(HB?=1(7B6>yQ|6>O;1#7=owB=f7aOas4IH-J-V zT}nDH^8*e4m{*(>G|r;UD@n4YR_w!D-ZmtH1UhLz8b?xeVgj&FpR7OY{df=*GdxIH zXJUv%0*aQOQ>%jFz0F#o>jE0HGPhGIALb*5;Z<3G%f}k>aOkC~tSJc`Z3*-5%tIE` z<1lnAMiGO+9o12|4L#@Xe42bp-n;5GjiTGzmGJH+1#!zJzvbMO9;%+>RCv}s&?bT- z0bw9DnAsMK*P(zdOaTeepkvrrMJ@?+MOQ zn|T&#G;%$SzgYsL?yYf;8YyN~&pB0Ts&(FIY`lZC?@GfFbQ2pn-P2AK31SXvFczSb z-P1YKnq0!*K7nZu$WUpi6CuXQ)Idz?uWszu!#TkYMOran9cHC!Pdr8S^Gluk!m_=| zZkzJ!+h|Dp)TGvyAOe*e&;;OB)>!*;!P(^%aHaZKl>lX}g}3y-KHJ3cA}#1tQH)%Af3vGgs+d%NU8A`sN2 zR_>F0cPi#Dhl=K{b^Wi0-^MSrpF7sypDe>=0}enxK)TaXetjSrSBBE)PQpd6tDtGV zmN0Nw0qAZYZjQHC~E$~gc^9@7U1m|dTk}5&lbYNA&(!%tljCF_aJW$fn$e>1kAWBEZd};XhcN0%hiQ93MULT z(h%8ld1J7wbtZkKR0b7wIuoDs5vrfxpk%55Ym;?=jF$(ynn>UQaGJ<=q3NPJQDKen z>q-xPwiLIUTRUYTc42QW&Dpo}`d<8^?&`@x+xt7TcN{~`jhRm}e?V#Qrm5<*Vujqx zOTz2CM>{$Pn0Ka2w!~iQdVEFR0G3#>k1jwqbE^r4#z@gJ*D(RW3ewymG6>hQ?IO7h zQ20~YJkruK3@Mcve0^}n%RR~^&kp#Rzl$||vp1Ktw?`(ea68vs`1(J7GV_J3@}?e- zYf`iBvxY=EdfdkI(9N8UCVs`%^pr&*%SgmyKSCm zP=z&KeR(LX$+LBJ0zk{M^Dx+Nd+oI;pV@?G-Y#ASPw;P@;EwmK&fF*<V)}s0R$j_XYQV=Oo$#{%OcatXpy}e6y<~pCw@_is|WAq(glD)+2@jUXN0b+6@ z%hp(tp(asf%4>v1FOl@*=GT_AtXEdXc}0aT#vb}h3j2hNH&U#B@$N`9{4vx`h<3$3 z1tb`+&+kexAD8E+S93t?Tt69g2FUzZd*ipi!EYabtMMg3l7wLn!*8%@)+bCVL<|$!QAQGgZW1WT;|M@;LlYU=bV<(T2 zV|@pYlGXB~Oh2dENaM(_8(Bu_t15S0HBj~I=x(dS=>*Fkf4pc?pSPEoU}ySAi<3vy zpXInGC#rXa;_YckK}oez26j@;#H?q94x3$XEB0ltDf?hEw;$bPAsJs8_08R=jo&Gz z%eC=%Q|8acfa=(A=C*aX*C|y@|E~6(B*x>V28uRQInS)!X+N-kTaM+62ke;UvWIBz z#18jRR>}~6u$qLV=bA&AbzPY$I>>|%H9x;sbmmDn-PF3bdD46P()G0yg0_R_WFjsI z7WOl%P;9!k_v^Y2gNbIH8sY=yr_Xh?5-_n}Nzw-U*uMV=6nVz3FfQ(tB5D zd*DYi26rpQi4|CuY~Rinr5!>}sENBLWWESwINo|9K4126OKV61gOITO7@qY?fzwL+ z(pSIP(n6HJaW^Bmvm7UIN)quhSLIG(Lvj4${af==Ly5NYnseWWKQ(4PlR3xnQQ78@ z1U@}B+S~0dUT+^#1eAq;YS2vXx279QuSD z>@p|I!Fl}6ba}v{b5AeUTV5Xa*5ggv(SzfRJ+zkv;Xhav#u6(IAe{YMRxw|PI*5UV zCSn4}A7(A;>>Sx{DwBImcvPNmx&Ta9)QtFDJ8KzRkcXD_s}t@`Q7|~p zd}ic_bqv(-CV6^(?Vn_x8TV8wHfMui1igmaiYg;vlB2iP=&Mb5gJ&&rCfja=OehP( zC61hVks?5NkNsh3&wc4&XGX3%i8a6JB@DSoCD{L0bU$|gA~OsaO53Rw)K(S39Y-n+ zu8|zUN`u4bNsTg)lvF-US9ksWwLPN`Y~9gkxumbinmG?61tQCAogTGkze5p=&~X)} zi>fZ4fOjfWi+-Vp9kSThFg z@>lv)F`s8D0e^m3DvLE!eTs za1vkpi5#7h!`m9C#c%od?-UKh)CjIVH0}ELa?cSv+IhlzPfz-@hZDgUus|b z$|+Y==SnFizVRcEt$ml@p67{g2yJN5L+0-*`o2b}a8KH@9GG;>Ie z1`I8-qlG!>Td7}arK|e1%@|edui;i{NOrTLY$fvtPy1?P{RMgn*L44SYVTJn zo;vy6{I$1Qo4UcsfUU$bKMMQJ=ee~ENICc_g#R&Gfla(S=>Pas_{epFBv|3g8pnt2 zuahN~mrkR&6J;tNK9u8NuxCsByQ7A#S$Nb2Y(B z2EEE3RnF5BR1Ut>CU)Q+!+;%tG=TP2^EEiL=TIXL9Y)OQbiCd*3rLb{7U5bybCoZi z5%gd*fUAeMQRm=Vb4v*}6!YgH?dYg@8uV8WBE5!>3gR-;Dm8a-KeC>z+YzN47C&6W z?u*oWzfmwtFVHg>owfe;>`rqd#Bel5=>oW>B&OoXUhB$ z-}I*n4*`(129qQarw9hgGh4Vkm`R0Lv#kR-5Z>z!c8_ok&bMNm!dJA@r_E!p%AV-& z0-lh1GVmB>H+w=f>Rb0!wRjyZT6}7Hoc1d4Z6zG3g4l2T0Tk3$ian~_mazU->?36h zsZ}&rj{f*wdndoRB2cU7Cf$4sjg;j)Q$M6TnxdC|!hmY7TgeN=LE9RdYSj~ymA70s zG9x$kXPK$!X#cz@g&Kom*<a@*R}xTWWkqz0&NFg*jlLjxSijzQ6B`ht*}#7;o?+x&ChLQ+vLf6dH=kk?Zhq(INU@9Pko&N=@BEyFbw} zdDVU@qM8!8=zHsgYj1JF5GL3EY+Q_RHfZ=V(Ds>AV%-QWR{Z!e8%%||6+<=1W=FPhPXX5`(c!4d!Th~fMjkhP1^J}Pp z9_eN#w-uX#L!k;HW=BPiz;A zDvhhBg;d`6NaL=H>csT=MC$gcflFFDO8P&O6h6Ll8PGVrO9rp5zKUs^U)Ah6adrPZ zbB{{UUb7e(9+1&m{NohsFI_Mwk@7{Ra#^If2>O4H$=V(RL4Uz4_95jB8%!1d8&ji? zx+WWYH3;?t)lkOGmSyFcOJ81bOp2|Cw)-MdtB+;wtL(g`vRF;qfk!w>nt}51Ld{Hsq4qsAF?gBRlq~I^v$O#5#*stB z$-wn_Hzjzo)v6ZP=_TLn)lRDk2p(hwC)OignCSE-nKyvoAHnM5P7!QaR|}&R!)P2S z@7XY(*}Bur(NSh=QHnIii;}rDoULo8Gx=hkC{5=N=~or7-!vEDVQ!ZOs=bJ>bCrL(xgOUoxo7509N= z&&kf(iKUbmKN#{ur2!bk1ciscV_CM3;1D@-H=U^-tUhCIPJ{j2y<{YVpZZ(UkJL;Z zWT15{9IAoF&oiXVBz_0%GY<3jG$VPJBI~#r$tT7*Pkn~-mbHt1H#3qe8v=<}@5Sv4 zij$lpGi1o@>`F;u!JGQcG3Er%=>K*XHUT$hJDB6i1KK_*NUA1tDB})XjC+w5J*!|! zr0un5fud6Aeb+wJQeo$T@(S(cx+Tl!Y*3&-FYN+MMlW>}648KLwkB2~l#d(6oI_Yq za`_Y$eqELi8qAHOvGASFnOkn09auk_@9ROAAg%HlBHw0oeh6j7k|$AE}OE{ zxRFo!Abpg9-=8PFd)-Pm96-lfo5sQsP#bder$!kNm<@m!af!c`l2=C$Zi`TJ< z4Ret8xZR}Ihl&;6+aLJX3#}H6=9W! zr&SovY~=!d#nXo4hbR@n70A@gRx*!W{FYWR4Cd-InGh2Ke}^QOKH%!fo!*jW5yOaS zw))BukXSGvq4*u}Q3qknJE5uA^TR3kWbT!}cjjge3Bw!Z4Ej)`osU>QOKjXZou=Gg zq;!ch?JItV#Yq&oHT6u{O@a~x#`?Fgztbk-csJ&7LeHPI( z<-zp~hbnv#mS9R`p*&}?3p@;iI`4o!Y~MbIIK?{X2?hSvY}v42AMziQd5-M@8HoET zknT8}6Y+E8ka(9iF6n6R1ORK-d4X-668@LBsePS82G|dY!VXOf-2JOAiKq%?%fvj{ z&No+=a7C>PzInq9w%yU+bywPw>lT=|QC&i+Kxn&;Zz>~|zyaOJ7wF{Of<4&}FQ zys`8n`)CUFh-NVZQIs^0MkunYNGITbPpX($7?-N3$F&0h)?5X(M;!*(mU_YQW@TxW z5Boo=+rD#+wPKXP!vcPD$}1=vMJp_@&A>#rX9{dr#(9$O7y=uVw|REO&0Dlg+qEQy zso>?6qgVc%0Fa}VqoIwb$lHSF-ih!h_C^*%3NmxxsUVjI_ z)6%HE3u7StU6B*Qx&Dskwyh&HOHKZce#d|hd_|Zipy3)Da%hsOU=T_kGulj*6i!0@ zT?mksKW#W;xq&O|!W07mbYI=&jlfjHK{q1%$HnlYIK6(F8H(lz#P25dG3ke-z>QkE zT8q~)Fzzq~I%^Ec+FH3UA}$PYiM)z`Eq+eNNY?X?*LRc0A$XFRu$@ly zJlj|%hRAmj+MK&6q~Jn40b1Dxp`)|iY)6tA&|)uuYvS>V(l)qNKrcakART*ul9@!l zBf}-l>u3QiFyt}vIXn;-ban$slO;JhmTnY$iyNr9oV(6anB18@3mcNn-14p(1XL4r z1=WR>7dQs@+Ru4CH;Y8Ka)`1d=6K1=A>BcNMymwJHzo-6pbPPtF2fvN9@&szz zQ)F1i)9dUe`P2@O1=Dj|G?hbapU2ta?rBE3#X^f|!sGyGT~8#Ng~rShRpFkQmVHX< zIJ6IXV>go3aMayqdMy$!x!R*`Kq4eaWL5v%xDNzmKckX$dkH$HBQNDd&4Y7h!-atB zZ%7oZ#<#X$w?awQ$`p~zihW=j<|aX8p}OqSS`HIRp0}flAg%({|H1G=WxG#MKc|=s z86+X6^P+Dp8%T@!I{o_V9Vs*&Y4m6uCXwYxfyC&tbV1fTR?kycqiqJ$nJ&+EBD9Y}R4cCor1nc%Emn8pwk_nSPs6(vP=txw10V zicrqTJOFA)hBaMi5D{=$-i{LZWd0!pY`|xLgGx-rG{0@Zq0~E) z6Y$jX$@jsZE$02hW1A58vu&m$MbLc>x7Sr3Kl-4%nhLodp66#=@zN+GA+d%TVlj`6 z3|hq+^4kJF=9arzUmS!l=x78n>+MqW?9fjwf9&o`$*U@_6`5jBmrkdiHOgPWIhdCs zGZ2m(cENvyWYECQb6lr4z65ek;Gjx1#vDVEJ!0dYTj z0yrUN=_M;?9^JNGReQjP)kk_<@LuCz%AzK7QoRb0STz`VKxL>7xzkA2WW|biDx{E* zZhcRDe%IlLx{mW;!T9`OR;GgXLj74m2wa6yl8#@NW2ov{?F6kZJh zBg~x~nWCaQ*a@N?S`zca5ZE0a^oS*159fKt?_-I_?BLhgjT$c4Q|03nx|I(f(^PB0 zdo-*&YiOK9UVJ&)=m{dBPx*1?N;VIKB3lXanl%ZACrDreO_uKCCjoOS#}h^aI?4pK zpr>e@Fi&um4qBIIuJK3tNDAO=`R4-GLktjObwFpzgNNO^U>{H@kC3Cw)R!+32npmo zK^6gv^guro!91L}XAuMjmVX2db}uqpQfJz3qEz#h9M}_WUsjZ&5rz8`BpW$!Hf08z za!jJn13x3n-G2}Sh@$>!7rvvEH*xC-$Xb8^Pz@^F`bE`&F*i?K_o6YJJCdEi@Fj>L zVS3GRp%}uns8)d(b5myxX*9}>NnP5{>|`)^bYZgixd@;^lOKyx>>$Ts)IWgG@;^PKc4u+eu=d*eq112(yWdtOr#2<~(YZDZZ1Zl%@fDB81#k2(Q}v-%_y z!>u?0jdO?->{(b%|7(^y&L#PLQ}aYYq30)cx#v`0ZbVvFeP{emLOLRK^^mR`xh?(w ziiSU$wOwd`gXc-;q95J_LyF>9MReEQzc%x`dNG%{9&0_jr2BsW6lY3ef91KghpP6Hc+La#+L03kcO`E{i<`Laz+SL; zpWboj;AXi+U4jDkO@?Q24_Fw`QS@CF@&N25m`WtI0?4#--{!3#o9V?k4!5f9 z(TzaOCl)N$T9gk_<_%?{j8gePS`DWQikwYEF{w8k^A|<6VtOT?u+#tB`o#@QJIty9 zHZ*!eBDbsJN%eEN#8buVtC=PSp`uiQ?F{s1CHX~8XK60|PM>j=Yh+2%hv~a|E>ifs z>RAN5FBakOv}mGk<}n{fr1NINK#n9q!{x?jWt73TwZX4&)l&8n2uWfK@h)L2x6B{u zu&KH!JNw>&AB`_)r~05?#^l>UL8^-o{%KG z1nX{)5b#8x7?b+g^MR5@ao6ZX zu<+f+)53pUL&VT2a54)>zf*MnE)6Q7@t5Z7^lIGqzN-#`uHqtgg~INSF;rVVQkj<- z*Xvq{G#TVQ7T9X&LHaa>oLs{ppm8Sg^0Jq3$s6mB7Z3dIK8oBpGCEdAmG$;JemJuO zONMv;c9!--1vG;T{Zwm5X=Vjm3fO;CIO{-pO*EP@VFHK=&{kjK`p@zH%?+RaaUw>sHwpI`g*e9Hq^#o4w!ws~c?b2p8X;LKKW6$)y5aZq8+ zcq}v>{3!ryodV;)>@C~2$()ctYy1jpU|H(ik@;waTm&W{H7Za_5fw?X%%rguk-^}E zwG?DC;Pk?oZ&1mSmfL+bC!GGQsBlWP6r?fzS!GD%WABzlP}Y&qM)VaSL$>VJGbquY zvqt5rdPDK`#K5Y?J#4||bp{JCZEI3DpVhQvWJ3#{Pbt#YYx;PJ(V^f0l9#2r`mnc+ zFUR^e-PR{U!3T8-0|g_W(v&T93niZUJFhBpsH&r?%PA@E$IYEDX6p&%Nt+Bljn)(M zkjW}@bw+ElV3^Uq=O5pBc<<8A9Ak*;9+L^_Xgagd?t(jekE|`T&dv>5-@S zk@`p3m6Pu83O_!wyap@ZKnnczxpVK0{}3!7NXS`u87o{of4D4ArkpS) zA%`zdLXNPSu4SNxLnHGsyJZC={1a1nVd>AJ@PBel!b_e7?8PBv>BF?ZY!>=&_2rW% z%K;Zt#hiH0x$$Rb$?T6Qz3mK~X;El$MkBNHv|p9M%F^us{DM@q0|MsHdvaE6o(Rc! z4((bvJ;Zv4z5K+f~kJUgxyK(kh%D<(QIMeRWKZNd`;RNC}Ktnr75Q z1$*F!7HzN~$2`XV1P3>pBM4C}#|h*KcSps(qPg+}zk1 zQlfH!*HwV784gwrVYz|uhI@iSPo(~{QlBk7xLVOvt|)($()#;+5LrLsbw%Z?I+27o zT9$%kuMY|-8U(Ecb^otGb+$&m{jgQ*Z)*3_{$p^knkT~TvWL{V(DM@)Q8J-T+HZkN z+B+P8!8x<<0QLy+gv!DIQ4=D9qw;|yuVTZrE=TgTcr(!=vy zvd8~2FmZh7aF<`M4!-QIIuiqfX}%U#an?<(Y!j!IDZ9TP27&FJ<2)k=J!T1#HG_=$ zIsbmj-*WH-SvW*O`n)Qh0op}!sZ$AXLhVsp>K?stZs8HOh4+CKkR=$V>?t1yw52bD z+L==g48OSPh|vv{wNVmhvak6hS*yR>y`7?@2%o56OpxANXzvGvLyk9rIgaPFwW*Xi zgW=M2ww@_Gr26B;R`sVQr_vrDm%YC$Y3X3%0wf)PRseiiTh6kV##TxAM0StAE!jc5 z9anMlxQhvFg@BKA!$%k@Qb+-FvB9@c>f~U#`Ac0MtG@Z#@veFgz;%89T=im>R`bm5 zwm4|Atj@*h0?UCrRl@^A>e9r(KTu$^O4M$LHR#Vdh!dWl-v&AhyI0hdiZu^+RbQh&`L&dIAfwz$Kz zb5IFkFmBlK@4o+%%UB}u=lA9w&Ejq zY)b@#*<{A7HTkkP7su7T+#{It=?wlLiE>6h#wjnj@V}X|^KSsN26$d?_D5u#6s^f0 zZ7@2lVz>E9cqT1jEN|! zFCNRfk#66!LfV!dlEP|sxCCV?yPrdRmg@+=yrLBC9`_S_-F5{9rfpr|xKp!QK+{87 zE=j;=WA?>Pj?E5Y-O{r1{LuzKWhbxhB?bO;(#n-ra5g{y3J^IgOax?1ZqyWME>2I_ z=|C_yi$~fZ{a&T5H%TO4C0lmr)N&s5a(06V3#)lU7mIOu94wq1+MzP$-4pO#DF#7r zcZ4jQ7=3L^z8A`!4?{s66kw%?#Y$KuG;cTd@ zyiR98E8m}@Q7D6&dlY3_T3Yy0GDq~Pdy#%U)t9Op(RhVL&t9NDST!n^R~;bt91WBGKt4$QSpw<)y~5R?Hl0PzTkvtqu03 z=D8_QXI|ryV_-SZ0zinuFtq)SGpcQerr`0Y-3TIL)6wkEcHe$jf{u?T;o4TO)}5YG z`9Rgc?M-=65!ie!A6>SInwk-GS)Auqm6XplUrP%Y3{y|ul#YB#uIl^hcbr>)LUmhz zZCT|5_PyF?dzA?JDcfW>IAZyBMkBCC_K!{vqIQ`vt!_SlKo;S24|3FH&4*bOSS6NJj` zH&wa^vmMjoeLB>4=Yx=uQNegs(w<+oNonPig@-)r%MjRK+i&gKM5-{X z%puIJ_Re{jxTv}6qg`B9&Am0NDM7?dx{PHl@K;m4RC1>NdRYC=Q&Wq(1}mGd3|FJ( z6^j33e6qT=bY!9Es+4{`Xk=zB0F(s>s+z>hZZM;W zbp~bOip`Ml%nL@F>InJQ_1jTON72tS2}nxyG7H>K z1!ob*$eC|4FPk#DMiVb9#oCgQBf>^7mBjib%^#RHT#@=pMc>a#`}LbAb)~*n)qQf+ z+QQ?aqM?1@=mt+nwQY{}n;1MXmC>)`s0L5Fu3lB6Ft6d3g#wQvKv=?}tAodsbwzpC z?o{vQ;wAV!+};891lQnR7motQm<|BaJr z_L{o#Z~dTY8dZE4-z6SJ)n2-E8CBIgn9CDzWuD2t3VL*A&;{00r1ZZnkDT8>TOB=~ z3&A)pH3!!$#4;5KgWDB)5|jk-`B4ibt)Aw|#*~BCLHbkgK8Ih2X9TPEoTB*P22tb_ zH9N~HGeg;=&%jQN^u?Xrpp|B~3CdS3*8hY{0Jjrfv(6pI{Mh$zth7vJRmbJ4DI-|Ry( z^W{u^R;qeUUp5?DH=KBZ{s1C@{9taAcoj=;e)+wG#o{^REkfEm)ePU_eBpCeA=oA$ z-UKJ6MZ@|0FOSAzcfZL#Aau3JoF4n%>QKK)-i$nbxGFCU#|Co7|0i=pBzJi84jd*Eo|d8FBySEDXa=JO=#HNy5lY(Wz} z?SgweG@}O?ny(461^%lSJUxkvi9Y~X$HbrTU!UgtGi@~AUmyMI0N^0kkhv?meOpVZ z^ZSGz_Za(D(+||P_*r=)$t8razp@-z*mS)MgR%d+i0|tg@o>@U57E1tv|~a3Uds zkDQ6bhi!BD#%0iuuGa+n9(u5^4zWAL2wHlaEKy6_5-;#X>M#px>%cE2m{t69bNf-^ zyFi83MH(cBo&|W{El7&`c_uqhQ?KQdrP*1^v+gEUT3wjR?v@2uZSKss7R@=&B`Ic| z@GDyj$#;9sf?>CwJCqx+5Z}O}wWiJz&Aqw??x!*4d0tH6b z%bHYvKhNiw;#Bdav>XM~EH*p^mcR-bW&j9hckoaw<)+@$Ls9YLHMP1leP6&Tc}nGQ zV&`S6>;eTaVENcKsQ~x7?Y;bPq|VPBb+_7Rn;j6a0;SWDzwNBiU|uu&8A}hc?HPs`P(VcvW0BX%pkSjH z`CE#fD=)}K_VPx~jQE}6=-PswMT${Guq8$DeU*lIj)}Ie4N#hq-m~;ui72I-Z4ksT z(H_(Ebf3AqhLvIHUkx z&(p71#k>ZN^qP7|gk4IiJC;Fh3=|&HmR_sU1R8cM2`Hz2|F`h%SqoLJ4kBiuymT(K zUqKO5>+WFX#jF7iAC3N=DiS@sQ#rykk@6$U!hF{mzgq#Sgwtw|dbHRz-xlipe!R6e z{f?!NBoaler%g+0 zS8s{T?vz$+aX4Hc`@$lxwGq3aO8Ai2rGOlU8TvyUVP-TqeP-z#byX-2`B<2b9V)f( z(On=UKtWWnS&@Gkzk>^VHt`IdXl_7r(>SIq@PjbHN$vhw=5&?^JS%>ROISUfpLJmm z+n{%jI+uWK!MV(u8E=6NvWCceKZjg9W>eI3)hC&WK~w919D=+0M|@Ygf?(?eO)uv{ zjdKMty_ZuO{S#t29wu94ePWF}H@opbpXA`d0tHHQv9h%ba%J^hCOW@MRcLRJf8oY} z44LU`KPrCCcKqdbF>w5bjZ^%bOGewL?eWdh#8JSlLAFqN9PgPq6w2nv6JrJg7wiV+ zsVU)jh%57IVMJlsPcZo2QDaXV&MSJGQd*;qGf`Q%w9F|=X)Vcs&86_CfEnezDa*dO z*#!d7_a)9+s&~GqI|Rn1I~%!!dqYw6;@<4d-}vPQs4< zZW_5)8Vbujid43VwacmZ>a%_WmzZQ_zJOMKWG+y+XzUnK#it>w*o#*yAK-W`@tiP< z%H7M{rKJQki(H4>=PNHbavA2tj=l`5I&_fWixk)1+_&$WHeSt~?nS{8F2A40X(JKv z_Fye~Fs9Z{V^2AhfV=<({t54RS4}N!d-z2(_nN=<9yH8LyZ^MQ=j-liXvvCXrgxiO z!?KC%Rp$4zLOu|>W3WqH2}dX6ba#Uj+kAOlDTF$D*V#oLv!v1(AO!|A0vXPoM#Yu+ zZ;?!;RQUH>s3lJtRW1h1*bbxv+`Zbq%(wbPzlEV7FzOx*L1K{t8z!+jr?sys5n8bi z$}^JL(;!X#o&Gr@7kgtLsldFBp6X5eHg&_PNMq7EaD6)?i&87K_ zvSlo~QLX9V^1~dbFzD#DSrn`+J#oONHA2shn&>0U>nYtXd=Qi0{Iqu72gT?6O`g;H zqg4GZN>MX^?^=|sjSA42f#n-6{AerlJktF&uv4iDR1yS}%G{L;=|>RAZ=-gvD2{Z} z1tMhDT#Zip`kKq>%n=ZxEjv)OOq};whsi*)vrkv69v#Vb2}m^(n=x?5j1$s>O_|fm zZe_hd+|$A8K5{HO;Q9~CenK2Pu5#~VmG>4{+LBc6G*{RA?1J6WF0Zkhtbxd`K2Gj{ zD_QE{9?(IkB#y0OG$S|E7m&|$6=R6o(Xat#O~?W1L4jW;(5ke#SQxSiw8Lr&j=_^v zzr7q911WfN;@YXml*=>8>Q5WcoUUv2G&A##uG=lW?uHu{bRGn0Rq+3~3i-zE`P?yb zT6P6TYSHMB>YjhY5b19`tGmYS?L5SJJk;iiBl6Nb!4s%U04F$iCa6wK=a=0|iWYNj ztA8kO#Sbelu4w>IC7$9$;w*BX++n}s$q19xP=ru$fF@!>C^!K0J*_$v9KdklEh!Cu z%qp&6SJT1J!~v!Qek$`Aw-mi~P zdKQ_!t+zk9>{g}ag_unozxc(kln(tZ>#gIGS$g(4&I$t)NKzn(I7VeXgZwX~laz2` zA7wZ$muoh(Y~5gHS(RH%4O7c^+?AexSRM4s-38a85lHmeF$`>)(Ry~LRx8Pph=bT@nNg?3%9-Ed9BWigC`^gfnf;*ZyDrx7+dS%ke~CA7<%mWoD)cBT zY=pw9#gxIL;cbOAky_?mif(lbiDYxG`@6w0G%9MX+7tC5a*;6F>ebTm)7iqY@*O8N z?rh&K%d@jm4@|xICz*+0T*t={@K&nB)vZ<6{Is>S$UEBhK){9zCLhFc%Qdp=bbbWu z%CyDdRePUUH7g1-jgjT7U0sw{TH#r5gpVL?sd?d1<*EyCEDiLQgMop3xrSY|7SwAL z!LR9yZ?u3y@I;2@AI13!@vz5_ugmB)Ar>P4lh0K>W5>L~`-^Rq>5CsIy7|EQ>q#j+ZDdPzLAF3{nVjSdg{;9=6VCl)3s zt`3uFRz9GZ@YA>jn8AND|5Nd7TL<(UqHT6Meg(|B|YXmP(%H2D1@h!JJWfbLBu8-{6YcMB2E5?xMyr}0Tivz{jj zT2hB+R|wE5u5_O~I8C8?pw;-c$3=L7mIEi$PN)uK8sc3hAf--lGEn0DF}TUPVFj2L?XboJ6y zVtF100E1^<;08FEcWUXuF=E{(?6#kI)cWRk{&~K;?V9q!?4zO+C}6ZNFnXanxsW^bmv*6q`Es3)3RA zp014?--u==7lQm#(|L_dI>p34J=Cn6<5ya=jRdn_qZ?TTzHI81*c4}HE)8V6K|y&o z-+r8=yc#MnIQw@Y<(=&hK412RCF>GTNjW#M#N3buhzL8XI*zJZ9(mC6KyAfL>w?{+ zr#`#SjWlaThd_z7TSx?af>_#Vi?@pBSu6E8_oecyQiJJx%SXB!J1!cggfx`g{4}ki z?{yvo1a@uE9dsJY?`LhraNX}ocxYS>Db1QZ1>Hq&E#4(Lrq~5)3UkS*`i#;~M*am- zv?H~oSKIhOQQIm)+W}=cWub}4-A9OZFX_WTh$C0?h{e8Tk-Vy>`_F&}U}~TnK2AZK zcYbc$L51ve0mkFl)W+Nr!V4kVI0H`)zIQ==Q#1ouSmm~+eJJL6}bheRI`Talr z>5U_kk6Zz!O{|PHAP*8o^peVqV@OW_Mxbho+F0#3>*eyAK9)H>3-xHgvy|`)=P{?F z^O6)1Ac){3^Uvf-T9p1Ytl4kKlTzo=QI)G1J5|lvOx<320nan1e9!;9Vn+@D!-oSM z`Q3OUOmZm==pL3X(Rv9GM;d|0*XqH2IPhV3-xR;o=BU^`Ug=im%y3CyXw^JXW#KGA z;<4~vwYx6z@*&0hJZ6Z6$cT4hgPiNkH2we=&zXCI#p-&Hnkw*^jXL^aHkG$xe47X0 z^VEXfy*g5-CTu=Rye3&q;lE>A-iT=@sa9Mz|{k7?ycVlzMrp z#p#*c#{PDm>KtdwmOPy=BimW%6v88)dcU>NXG783y=>i-zDJtBhVxf$HAVt?X~FJ) z8U&-S9c#)(e904CtIqHWsJ!6lYmURSEk8OuHzHa;(J5FVZ^kRnH&4O_DXv4^*fN=94pvB_=f`+8HYNA>`u*f5w6J~=i{6&uJ$ zi)sZQ(h01(*;53|25xOD2;{htVbcJwUO2~Cb#8e;NP=+#p1HD}Sju3SZ2A5sf#&mj zo6q;CB(F!0uuCKhr<7Jdv3zbp%$&jZK?gauYLLO{q3RN%+2|8`NdN+KZ%)K(;E}S`9bj;5Gfjf0+NG+$yw#r%|4KIcsU&1HzDNl_epmcnYQ=xcyDb-Yz!Ebn zIKiLwJX#$MA2!6JVKpVB|Hw+?3ovoZQtX=r~WyeYWp`^f{M92X1(cjesKMCe*-$oT;Gz1(uedE#_Cd> zn<^sAvTcw|JvU~IW?)J*a8n-MZ7c3*alreOMM;HqWce`WI( zZLA_=aYZS`{hw{^0(hutFo*-}xVXuyz;L217_S)L7l}9#r-bV3V4|!^Bn?a;83ADe zSFUD6HeHYv;@JB+DI_kfR<|W57MORIlx>1tkRhSt8^BN2(saz>=VOL|_=Z^t|DAVG zAJzR^c|)GP$>CWt`P=CT0zr$$Ji?WM8hL1Fs5wTJ0F&n-cN>Vpip{-Bb;VqwlUOVC zAg_P!jt@0WWQau#|m;qGb&sU4AQ%A6-V-bn)n}E+{8Qtjk_o*dr9HB zKL`I^xnrHSc%7PVk6{_N6I@4LyoP%XqSv!Jb3UVq1Nlhsj`cA)-V7FAacknQMN=l6 z=>=Wt(OplO*hh81GfSzOKP6~&@>v|de@vexHu?rsqyQ@&Q>V=#CLw9@%+Z&SvkE|p zQyT>PL@Z2Akp;pq^I!-!w>-_l7g(`Y-0XWhimuh7V&plY`VfrQ0Z8>c6}LD1kd^< zg51@Rl@4k-pC>22_&&CxKYFg#M!N%Pje4UeHpkGH^Nt$|f)b+%ZDnUC*J=SfeH!N7W?#J_u1zw3==Q3CU^X7uQz4Tt zQn%vgLHf)y5EJg!K!7?{wtQbGD*bK9mOc3J?S;g%eu-N49m%eVd(XqZ=XY?gE~+7}^^e@M6du zuttk)J*2EurfL#P);&io!ku=55X}O-E*Q^IeYrKdt#C$$Rp4)B=nkjV3)*FXdi=W5 zgP$!;qigE?NA1AgvviVd9~9jh>9}~}0T4hXd}}BpRtbV88$z^6VMv27>^=JG*#6E| zM_U`0s0y)et;&F;8z7~r0T)hUG z+MRFlfZm0g--d^Z*q!Dpvl(EL(19jJ5^p65>R`ZUPb{{1wVCz6ryR@6px`cB=ie99Q(6D+1VFOGI#2@XZ@OTD>M3Nou z+62y(vUgcJOswao#WP*f^4b!PeL?(B&`4>2BRgtkRLQc$znzbPv>Na;jTWJ#>*M(K zh_cM2rG4L6db6@c&|htsJL12=PiDE2oIW1SB4=^%I4;oVk8bTNgMqjGcs=z{c1j%G z9*ZR;Eqli2Kf8yn0MnR1qBJ}K%Zg&xb5Y`V{stdAxf#1>QOeaWNyz9zR_Ndv2WVOg z+l&XA&!b)NJN!vzYHs%hsW6W9)v~C6eX~N+^hpEvCQ!>4YF(VS#s#ZM*5?PagA&mzfnDE<0vda&tmI{o0 z%-amM2}Dx&HpCH0nt2!a`nv^VP55NM#a@*XbW5GDl=l4Q65vs5$dNHxYq}0^aeLWr z(DtH%hN56IooQQMXo^92u&tyBYJ+0E<-Q;KWqX^>&$7+#A7=a^dvKd_7zgbINwnO~ zw=9tgwA`*MzF#zWWq1*?=CLVN`}XWJ=QN;$Bc2zvi?c|>^S%ksGNJb?e*pF#1*8JR zgg}XaZ)plPXTX`Y@$J}4V2}NlMZ(J56KNGu`(eBo_(!wDXz05=vr&>hMD6eEgZQ01 zyLWxnu8-xox?H~Hm+SodMW#B|@J-p=(WO6g_SO*mFD5T$%+%16UCAm`Dfob z|B3$}m4|t%5+{%Udc{jWzih6}q;%L636cMTO~6i#{ng_&IVC;cFL>VBU67VmAop| z?+Jq46A0z~4-aY5{EBtMYjimTd<=T7$`mcScy6Ee+4{b|No6O*uB=&o`A`4ymUU-N zrPiu`@pBh4*!fYE)2%u=8lg8L)1Efmcu-saWGdr9Wp&>pIYq6{zeyeWhQizajGoW0 zMZcJ(%d5P7$w=wIBc($d8;4dj)*Sp<>2rnuwN&ZvedPpC|IHHm-=6f|^H9`EJek-s zRze@j{I z(OS7LUPeoyYX8fXyTZ?FVp>X|_T`(zu&B7BI0?bTzer;6=u zb6p97Dl(T^yA^r`aMRB$Fa?G#I`#qX$^^?ZQ(+BJ_#`540h&Fa*lv@@z@Pc$WBwI{ z{pf!^dtnQ+U*7xxZOu)|S1w20Hu3m^J_wJFXVs<&MTM4U(cD?7htGq#y?8bKIvl-u zAEf1X7AIjd(PU4BpCZzEsio;Oh^m~T0dVs6pwEh~173)HD*KFrxnea~8F2+X;=qz6 zGcl4fABwLPrp%B4b^AUTv(QlwWVtjr!#n!xyNWE$;^>;02!YyAz4p}u%LDW(htPsf zI2pPV3LP4-2Z>WZYwGGlT$Hk#@y*t?_4bNwStqpYjD@%u+H}{pT@3Og`S?{~Uu3Wp zwCBYDnwZ89fiAiG4gnfok1et5V-YIE$I!hz$=%M}teeaHz_NMyehr8xi%Vx1HRA%t zO_>LJWY`zSc!2>B<-*s~MtJ10$6SVP?HUWh>f6ijhjU9o7K6B+0d?a71~l~Kac-Gr z-}MLj-(Da7484n5S=^;{dV?(XGE(yGC!a1TcAE{Ne7#uuo7&sOp2rJb{pki>T;*|& z)P!scbiEZaQ=@?(zsOuoe|?8;d1?+5YUC5y)dy%4fjd8QS~AK+WIFJ6irJca1Np#s zF@6%VXg{9=N#+Bd0a|1l2PibJfskWbW3L|%eL`_35j5t7jaos?Ni`=(`_lmQi<_K$ zQp*dPI6`z;`GEQ2C@~`3LfP<_18Fc6CQzj3pYP-PQ)PpZp8?pVF={}M@a)XobH(7l zUf}^(=bHkQt*a64P=P!MHW~QdvaSo7dj7Pj59UgFMeuy+;>R^n{Tc4mCSiI3=Ih1G zmt+OM^}x|{vL*#wxYI--Tpw&}JC;?s(%gS#nL3Kyp-mKP~wfB%AMb zc5{E)bzmv23+xrFSsx=9gScB*h>mq_D|xKBg=Nbi-`X1uXu6KR|H$36^HIzXsp@$^ z-oU+qw*;8=Vd}@G!F!tOm}?dW^n__fv{3RCV&h`J!sUV#8)x4o3s)t3YrqaWc$`d7 zGcm0fIZlrGbmCB`ZSuTd8i{sW`q{QAqR#Caw5x^DaxEBe>RXF9lg7JAP9i<>Y+HIC z!j)jfF)EW6Cj?BB@=#x{BvL{`C6*jiv>+eXY~qM{GHtl254nApwC5dz{W*4VeGuB1Ir_SxCJhkvIL-6 z>}RP^MQWs>2vnQEtPp?xe|qm>0&x`vjym!F;chcj5p1sq(wT(WIK^1}=GdG;gBT4LDif6i@4m{7s|7q{iM2W^3?$C%fKLc_+SFY({_dQyf~cAjkmtN?n|$e>r+RK`>-g1ytT=} zz`n^>-JsSP<@d0-i6e1W`%d13_Jrx7csmtM>Lc+SRE#+ft|te?)WM=d>I~*R$%uF2_)g`e0obY6ii;%UYej=fc0aBe18+K^lcBk(( z*lBfO`%qmhHL$MY0I$UkwOLd=^BEXdj^`9?{luLx|9Ls0cwCiP9-n`D<^lcpCwI(>-W-x*ivv&9O z^?m9HMAd8YX1*G0C6*}P|93wHNJi=TIsc3w;jpG@ZPIF$mgyPcnPCd}C zM>M-NVqUDlthXhYTM?iFRln9VK!O;|)op8w2tl;)*l;rj84`rMN9IazPYyE9>Rw{= z)-L$TQ-?fBkjXAMwSO>%8@JtyD1fvfNA2^LRGvC&iWiPmb|8_1`eQXYe2|Yy1`T~M z1`YGh%#{%S=dmfp>Nc4`^3U&PuB45xqTs1nL2PCeQM6_=e^1V8<0n_dQgkUu^yYbT#dywS z=5#*9HM5GrQ_mmL6v*^Z=8LcqZ2<dPqlOZuJBj~IcA5S60>y;>h$Uc%Ke?ocxAhmxd=hq=nkfq!Wz{` zgC?H@(*dNPMDXaI-MU3JxE^L6u*;q3(IRI4qtrSgqT+L};HehNaFq zzeIqZbg?HQsSP_*fkV7vALYAOS}<&roPjr@X}1cBnX=K7aN0L-yfIIQck@J^1NNll5-oa}03^9Of8>Ix#`tpm!-j;IQh}SHgO9w|`E&ivuPZ_~82crH5YI+NEoI zk{eFN_^qw8%+YZguCDm1x8Ue#O27eJyB z%w5s^#A?YBr-x{8#f=f+Lx|6s!eHp9z3F2>KolaJ43Mi{AurC+GngmBQ82UE@LV>L zCjTyPRd%PJ%yVwMaQ}e=x;0tfBa5f#YCTV8m$MT5&bKm6SEVN2NdK1CUpKv~1VrTt zQI#5|&H4GX-8p9XVE$E!ed0YsugjvqPcKh5n$BBz=12A!q&_5A>!Vvlq!<2HyY9^8 zEZ*+VUHCeqL)yMAFfUg}?ztSonYoS*<7~ByH@i=&pg3y3UAD{{avY_AM_H@{qj%P z4be{>(JsAnsrcPB5==h=Ct_FFFnH*fj;-Sx!`8c}xz5s5PHA|ZrZVVtKp}f&G^Lmo zQQCCf@uxY+{h*Y=*vq;3Y5!c*60PbO+usbl06rNBV3dR)igs-kecm+syaL`FN?BOI=CB2GXQKBuJ#B+1ACyt#ndTOZ!!=oSfb} zEIZ7Tf@G}aud;wxkQHaIPVK9=bFKX^*dMqmV=w?ze}3jl5@X=85ZVX5o|=6C#rr@Y zfVX-}-{1gmAA?4dV*enlBVHX{4Eavo;G=?w*cCTzE~}Rs{czq(4RfWZ@ zQS?G&M0wqF6;ve#ATHijGH;v)<)}yMc(QcDo}!k6^ky1 zt`kR%mfFbkJZ4qjZWJ-D;?fvtkx9LDWR1Ta*V?nlcu!tMYM8{8HqI)38U#en zfE$MCs~x2aN5^bym$Ew_G?1#I{uxfjAQaC;YwcXp>}g3#6HTVoztrfYRyokJ2}H?o z84V@i<<3ib$N5tQxF{d*texFQmdv;FK8_unpi-wmvW{CmSynyeK1XSf@w@mb{tT=;FIarxb2_3GSFbI8M!)wYm&^A*%X?9}|6@*JiM6eQ z*5SW*jEj0Z`CxpD2D*W`x_0XaEWEv?#S%b9Q7=?f$dOlQ^Vgit02b~K_C3PUpT643_2`!K(T~<0+X>xIaa9IhDWbMAy1A(cKxuMplmesc>Ab& zRm~W8yRBj%kKD_}7nsR*z>F2`Ab)y=GZU0@*tfSTFGpI)yr>WS$>`ZzJ2`q1A3 zCR_ZXg8&%`DacI;Vl>%1sgl9^WfTu{HQ#*!F>plpbT&;j2<-{wuC*wsGC8Q2AKG$n0yDH5FPRrFK%gheKHXk`;evKG();+4y*$guGABw z26F9J0~d`~zKPw>Pu)g_?i(#JiC^EEbq!zX{K1L}9+ki;o9CN4R!__$r|_E~C&+WR zl7rNl9JhB(A6V^-LtmURsP}p@tq`N1SvAH6$4k^y9JOq*2{@?A%f4I1N&8Ssg>A{M z>62IFjUbo9xb6FZ4fa6UpJjVrzZmkd6k;R8iXyoS$I1l)Sv@BN!x>yV#x17eO@81& zKe3Xl_afyASGIGw65C#jPMFrnqJ`zazuuexPl>71P=8ma_Vz#_q4Xei81+NU150?! z)Z18qs-p}_$m7Rbc9;%da7=#bP}3egi3jI#lE~snj>5W!C&Orf7%cunvSlz1ZNhkY z=8tJAr1^DQyI_gUlVG^uHmZOJ=93H60zC177kXUt?!+?` zj=eFkGWD{Dx!;{swBJ@opcF~?M6X$&AB%`j2-}$xmlh0WBJ1WKE}MiKic-i=(u^SAF_2HxA4QI$ItMnnT7qNH2d#Oog8!hr)hIzw|_# z)|4cTMMy4JlH~K;a>|oymq#pYzzrhTAb{5K-KGu%8}RV7SazVsp%|sw@XDqQ1g3iB z%K0gsReM|SZP|Qvs$vZjJ}Ha=>^`t~ovg@!!;74P0Q%*$;W|_>AkEpo>MWz5>7M&^ z+EVjKCAqTdlB=)Y;&!CU;P^6>(HSkGpX>Ei1+CKvwq(I)P74k|0>a{T2u(=*Vl3)g zJ0FouY1liEU(60PnKckm8ipijr$COw@LH70QFWp#pVZrSi`Oq<8jp~ZEg5W#5}g(zA5`(oO0U0zm*-{K_Q$l2FB*hstv-I zIy)y8wpbpTb9vzGpkC%nZ2n+SapqL1@GeFGR<`NTtL;{tT74o|szViv>E8t_q+VrU zK$P8P{+JCYH1-R2Qi|)y_>g7uBkPokUoAtpJgd0S?9h13&qBLmRSdK`g6BMx7@43w z6j5MWB2SNBELr>_*nEy3q$ko}Ef?lgS1%ka@=0f?U46t0vK!t9PI@0JVkdCWCYMpK zPw??aXs>CYDNI4QW;!bY6LSp`d&E2+&)>jJ!6zv2K#jdAh`UihtJ-l(W#=vPI0@}? z2XBNdX&@MBq;LH(1Tr~4Q3T9GGr}PABKL>@cvkKsg_>73_=B?D7uk$tp|vW64azXX z4727)&Us^!OVCAUeo$cV1Gi^`9>&txXi{)dc_>&t+9tpQMc^ zb`37>3asd*svo>`>B04_9i^ESJk2>0c|3Mu-HR9IIUS-pc5Q%Iloi>2)0#V5m&%m` zc9APDY(QV(N4+gmYSZ12kd)}DVOC5MY#-5Kv!+>W;WvpSHN2BZn%5XlcR13KAldaS zEQt=c^+$COZ#3u}C)xtkwZIe!_hY-Pw8A^q9ZEvgAZUxCNocB%n@^ok|D-Dk=mUaW zGgg&2^yv&fvq4PqUII*5rJiQzs`H2R;68SspNB(_a!BCn4n4y240Ch56PpK>ZXV~E z87wBUEzg^9XqmGSco+@=@1;8Qg@770NF1pdwMr-D8nNB=jEM3t2&)|4MGs40o}2+^ z@2=4vdiJZ23Ub6Os87t>8XKGOh=Ojp=Xwf5&67byp7Oc!`#JRJ=xhd$BOUhtVqPB{ z1r^NZ8q#ZR={@vnl`_;!9f{=%yg5k{A}}OYqEX#?owZBwGiZ9c_|ihRLIhPSE6UU{Z`MY(#8{ZG`qJ&Yk@IZQf|lbQ0B zJTqp5Mnr@W`mrQBGsB4skNJcOnd866JYmoPDKYExI~)ALS(v_fAGZ}}VhV3xb5fnp z4`=mieS#bAv`G`0qh1J6BZD!2JXH|?RbH(af}cpa_urHsd-~I5G=0u%pSf;7kF<6i z>sf6g3g$yIKKZ$q)}OrO{eNG4w)2`t{{G`lv(|oNZSC`K`r-wjKaZ%D`HE{7 zPI)h64k-?ww&_24;O_Ijc;4<=5B=?fKW%{3!$w|FcPzAtsy6oUlM?VI3bkn|+uLAj|wu@tGdpECN@;>}2 z-CdeRaa%sxzg1!Igx1wLss;2(IymJ{xB9((DW{aFK6+>W%Y_xyuN%;+=%0YOc1SPp z5&wY(J+D*LT~c&MGg5R`)C5{~#~I_88t&T1u2=YnG`ytwDC#+^CtMzQMb z=IVniaY8mBIyv>_(x;wiE|hW6HNX5-(eNLr&7@E>bI%9mfh-d*nSrnU`DNnnbf4e# z2qQ2|D{8>ZvDp(70vkrS>US3sD0)7?#}r0Q)`dAqUHll zhyG5c?Y$?Jik1B2^>wEx=-2*gs=BSIziV3Cl)30#{m9pwo(i7w{{U+ib@$SwRDfqF zj>yN>Udu%N@>U^sTg<}I;rN-#2gv3%*`Q~%Ik0XNy31*-zd-Z+Vp^W%hc~Kr$dQdq z_phh+ey;sE+0U+((BQ(l54BFKJNX@dW0-Q>@^Yb4l;>pFc7NUJI(cgkPVIeQYPW7; zN#ZXx1GW5(&nu90zp?cBsqqVxnW_Y8KD?y&-Tz3?uc~+fCe#6Wc5*6kZdP8W(A8?j zMGQo1QuB`UR(;UNK&#H=XQsO%(z%qpY}L*MLYW_zLsS`!p@t}qXC8xT3+#{k*>T*{3>ka37t_5Ew>fbKVhJh2c^s9WHQWp59z?ocD zW~PjS+1a-itm@2}kg!hOv~Al66bAO+D!PdNw5&Hl{%1l@KaK=zBONM1>o{B4!aed1 zxP<=aGJHQKkcLdE2DvLsKUs!-@VWIn*3kiZ&2V@oca$A)L=!191x(f~5dqWP>M}%- zoWHIzfLUXIUgjAc^{b0(yJ(2xk2xwPf(HZAmUf6 zKH^@egAYp0E2_+0LE9S;!jWpRC`9*CbDE(oND%x&hB`M zlnib!CxQWSnVKzb%2NK!dZ$$Z$dbj(+SAXqn?Qrv_EVY9g(Fd%T95lrrcSlVoD!QP zP)w?Z#luU9iDK<~YB|3GGlAh{JCnPoX>hauKxRZatYIL^j(6$2n+8SW8O}HPaoLfZ z{?$Q)N4W7Y)Iq7l$aZR#!HS8aJezEjG}sD>HAJBtw&UtquWgnjKHwvfoDj#RYo*vX z*<-!jFIyCpit4bOm7Li)_aks>Z{BQ|d(~c<+eWLd2#AYtiOJhq`J+0`wt&b>Hk zs-RMfA8!CNLvrEeBcy1;IQO@}kx#W(J_GtbU6KUj3_Sz9BkSGsV^L)L#@E4=QY0b5$L|L$b_jl$13Io2M z-i=ov-f|i4KYx`{_eDbhgkPAl>xbFVxhva+g*kPlm~fE@qK}`Bx!2}v#s46T7jy2a zi*Qg^POh{(0qIdu%ji>O!r<%+D0ifgAPW)whj#92sS#1z%(@9=3>yMfXiL+s)Itvw%oc=FdPv%|jkIaAlGU77*81+> z{jPWA&@z@$w=?_m^5RN!SnFNy`~N@x=eeKzzMl%Du8rHW?@}=>n=Ojwpr(*M%hrXi zbxGY1&F&57Yd)JWu5x#^QqWc%-l^dAhBt*O<9#nFiNv<7clVnak7QpS6srx<~DX-P?GFY;(Q}G z?KN3Yu5nv-M05GHfWtwGC^TXVThk{uh>t$fT_s{lYutj5nm3(P8{0yUF$vVhhgz^p zx%Q*b_qz?p{H7CQ`*&jh{J9OUe2%f9>MsOrLG>#K9AWriuFfAHOdnyfs&)4bx@BG$ zo)Dy=1+^M!(Wd}@W(12Qy^0^=&GxpU@Nx%{mL17cgu>eG5DIH|fricT%@AvNoq{;T zR1e=H)oOA<$H=hs%ch7VQl?$`2pbeL+US`il2w%Rs@f;nJj;3Lk3;>_ zhgNQd-lJr=VFD`V)MauAGt+S9?HyMX$ieExWgP|IhBvuH&7NTs>>pp;E7g zk7MSp=u$lsTMjBS`+1(oi;44Rs%J|E1@|PMQ8)cNrF0bRZAblS{%wEjxAzj3>vwEVR;#Kzoe8q1cf-%m>h%JX+blc?Puym0g}WR`Ktr{}wu>3&ageKlhdGr+Zyfhj)LV$Fn?r0(dOTk6_Ez^EF$lt;;Vma({{4{0Vd=;9kYJd`qemv`!^MXP~UaQvjuUHnd2})v-ca zVp2l#sBnv8)T7&P7BRBO>dNE_ES@i1E(`qn#m}k3D;z^ST<#?Wbq-mOf0>JlZcv+g z7>IOT$06pkiF>&$datX))o76#`oG;!j$>I5;^SWKM=i7;e%Yl}Lm#{$uHo(4(#Vzr zietkfH6)_G64gW-_Bb@m?^arV-*vr``y64k44S9dK2~^&qLZS%kzY2Lav4}bPWiJJ zOxqp{dhtfPzUo2>?yY5hD1vHwkd3&x@JsP69#u8ig0ny4qEOTgjOC<1>C=r zv}UR9fESDjMtSjXUR``obgBJh1%-SuHC3~^x7s~oORSlAtUVXFUA2R1Wo8It1(^@h z7riVTMOJq7D^hZCWV#dXe&&$ae405sB5So-sC90W8Ei)a+o&AXW<=3dJehHG)nZ%N zdZtqJc@9!@I$6Y-zbUpwhj%Xs$DU3?Dii=8re$w%kJh#*xLh4Qv6bpIlK!MmvOH&s z^ylm_cvnckSnJtbc*nW`JJkrb)P1yk{wd#WdpH#ghf1I8lTx=ee~(@7!yx4v40@3Z_b@aa%Ysr>=l-Gua5Jmn# z5+YDK$H9Udnkf~{anXA4R5q$}_%9waf!N6+oYI>zF#q#hi3*xL`m-}aiL^!I@YBE( zBYi`UcMkqsi-aoVusvfbh`cw;4xRR$W#!mk_IMxFf+mQT87GVWXSwH%U!?Vuk3-kD z91&E~_HZ)hs=;c_VAhM*b6&Hu=WM$a>{*9;U(mPxsAXGCQ-0>T!<<;Um6yoMN3E{a-2{aw9=am0}h_lWEIF0+BDEBCZ=(k z&dtlf%;~?SR>h7m%yX?dV>_ryTNAKGn{k3bYzOSrG`WYqt&j$MtaaL9XP%VQq&Fkg z(@jeCkaTh!L)zs$>pq{rV`t;_qojx{!by`&r25Xu_S5Wp<`s`A3{tg0*Ab$XZA(4~}Op&*U+@KXqr*aJ4dN6(3DDk-+$ z1BJUFs*(Togullp=WN#|16c6^r#Y0&!30O&nEAA%nJFHD_707DK$E{Hy@_qwGw!WH z29L5bNJ%zbUG?lz&2Ad^JE{K@mzTPasdGJg>Ya?7vwGD}f~Dpkk+tgJAbn};+ zMk(e@ki>xIx6hiwT`U8IDCKF@AK`4sW6aTcak@@>Ize0ksA2pPI-40}_N-9JHof`u z=IFNw>P*@%3@YsShkaeVuRb&K#kJ$=GF7?R)6^(K`c@tx!X&)avB1 zW+^oFVmaYp^yMtbTNsSqzP4k;1);@_q$~;>+41a2A4GnTyJXF^5f61@7n`9pZa5-O?l?}c>iIMN$DBPtijEwkJzHn7m z8SBfhC0bl1j$F~|pmoFB4*^$jV_`06-ufMl+xSZwK@q;-sdP{jk>_cF0U$p1Df0Mp{1D)=6Ipy>*5YTL6L<*Z{Fl6A%Q_Uy zZeG`7n6jmB*d82gTP#DG!k_ZMbQ^1iIKl7<(Yg7)qSZVJ_sIFe4P=_J;0rtB4P5|3 z4wo!0{}e2fuCP+l!7uB?ht~FDY|C%qnZ>pkC>m}lNrB-6%SQ4yE!mpe1w!+Q;3?lz zxXULYCvLbfI#>fK){OGaM|JMvGXK${2Qxt2kX++#qPR{^%QUV9uONA^V=7A9YUYaq ztiokTO$GUScfbDRk5fOi*;Oz|v*n03ia4{E#$k|0m(nn5B5T#Oo^ca}x3D)5X!=*j|s>BYp)UoSkof?*ep5<>n-TyBxO8@CBNuTdQ|t(Cg6 zPClq*0^b3@$h36QocOupwA$!BEqNKr*6{L5@L*Q6D>D-?s1y110NZkhKvk;b`R$-g zYiY8Km?6Yox_Co3>kkI!a61x@CmsAj&Rk#CY^|RzU8j{sRJ#BkK3Je^9-f^bcj7M) z?<<-3lBK63<_KkP+*e9->PAt>(+vn4>B;a%w3VY$Utbz$SUM6CO$(W263`H)rbf+# zOik4{Z*S#N(o+~^ZplD$<{p^_1hY#!F%EfPho_c$9!@T;r7f||`20ZgeRq0Fz#fNp zk)ERJdS)C;nCMb*1Kt^on4HR-_@I+wHu?)U^IA%4Y;Z&bXxlvyI z;$I5jL~7`UU0cikN%yc&1w+7J#!NlwNzj3yVv-| zAl9s*eqqT0DoZt_<=7m&@@(6OdbLQX{08L4wJj81!M$B}`0o0uKcw;U-Fzgl+@OeW z*xB%1CSc~H`Le{pCRj416SRSt;K2iC586cPc%L%+_Ta;wZ-yWo0O)ZxcPcmoQ-Z=kQ19Vl-O3S(!=jN7ZcP`TsVLx$lF)8w_}BH0 zk=#zS?=D^Mwj9$B0*1Quf`@9xDUwNYYxrPlX#Bbna5gwJ59ay5S^0vMqtR!tu6noI zFh0`R*jWplLUGsQ!{|=$%7+58sSWD39p+ygS(7&pXTrhXN$7NwlW7YWaU&_l<8MyEmU zvHm|rO|XRoX-Cg1Ftr&JA{d6-H^!ByqK1;Pn<066B0B=l82(jj2cMd(=`b(V91NL2 z#;m6JO@Zp~f>{SPTefEDuvK@gE?6$|B1!yN!{jRUJd|_@@Ua#+80vZa*uJ_QruU%n zhH#*f}N2mAqoceu~M8N%aY@aFtC}N_8Xco7$Pb z&Sok4b;{=K49<3YG?if1BusXX^8@Ch=9eQ6J}yV*|Nm_VMC~AYOU?fRl^{$}=HPj- zcC2mHWWlWcDY^t(^0LjtJUoP+50uR;aPYE~t3Yd$Fjv(kyDjGCp8oaTRUzKDbI~p|-Q~*`5Imhh%LdHhjRX1nor*meh7>ziJQa|RR zx!yE(if(f=xq6Gj@wn&=XJ|P{BHwpn>@+Hr^`t-P3EFz6vx+l9X=2{N9CNI&$0)CH zVeH%H&7QIA!k;VCCLq;_FcR-A&Z;A*gI^$RcjoV-O>F|&ueZjON6Zt6t&p}e@FE>T z36ql-X1yVGQf)i{qpvS(vp$eHQeCGE(40!oh(b*JG+a>V~!A=D9dM z$>sSgj@lXdIG+^`OrZ&}rMN7S!IET;apL^TRVTLF5i1xe$xCgJTJWe&!`moqeDeW* zJ;M;&02pMO^dD$!LvU?4sB-%yEm{w0hU$+(;MV`(L_aRvlc%U&WvLPi=R3k6A5tGr z5kDNLIk8-t<)3iu8sTG&(s{kipSc!5p#p8wjO=AY3<0^~jO^mzXM(UEz*z4Er$l|GS< z(H(#l9>XKc>9TODC>Vj50@cb%!m*jTB)-&-i!+ontFCcC3r5ps=gj{4jkKqelvQHg z>QbX|wvA&M<>?U7c2-9X2|e)E;Xq35akJ_pR{e^kz*DhuWMDdt%tfZm*cZ}JYn^->dZ#}ZY0dcNO{7UjHL7a~gs9azD;$67k?_dB1l=s`(sRn13{6l-&K@|*%*Y@QaR4&e>ll2+#? zXNurG3G+!;Aj9;-e4M~U$kTBckLUx*D|?E;t;WCR!Ix{tEr^9LGk|N$?`k3a7kZ94 zmJ^^X$n4K>Q0?Z+tJ=c;cQY)x{S*6>&FLlX&1z}h!PAJiv{W&aRT*19tpTMhy#^ z*-P}C-$SNE?asNMaR+>bv&M}-A6GiGE=hj|=!O2&P*!RYT?%5bPB|RBS(Ce7u`7ZY zrE%!icKG;B{?gaQ1-jWtgCpq?hBZo9ZPFJ|bEl3s`%Hhv4NVZ^maMY+a}aalT%h}s ze6;S)1PnE2jHHgd&5mo+Ujnf>z%7NM(2M$D?UCq+m!1HDd!u*xkR^D~d3{U6P=QuV zu4b!S8FbnsP5)C*2t9dU896+@leU2rmLyy?V4M0-`xHGmd>x~*jjUA zk)vVQ=mbF0aTNMpFuawQ%1ZK*Gp%}aa_b#4D!4%oVmnA6wyw_sC=0-zqOO4D9 zohRz(u*oCljhe_-r0IVv%`kALW}53#>{FP)-EMFs82g&l5DqNr*9!d2hI?k$Ojsp} z=PYxn2PrFRFxyhI^3dlt4p5iR!k8rLfn6IFERpAD9Clbh|Iw>nMQL8H0;->ekXOq4 z&C8XmP-8F^^+K;(K>~mxddgtmAl%S)jeVqp$3a6a7|P%_msOFPr z6N-$bM&c<3RfaaNU?L)Q;qo==oyOm?)McSlZ+N%H{_E*pP4$Ns_=V7UFWd+CbP z*Bf?c!~A)9`a zVU&gfV_U{);m3`t+!ot{oP!yzqdr7!Z60Ed2^DO;!=0S3}vg5;T>#3~VH#(oOmjF0D zC^fuh?ufq;yu1~ze$b<)xiTdsP5kH&KFP>kyce|>dV)tcy#6SiD?Saqa>2N%-1K5@ z&_L)es2k^Lufbj02h`h0n>BMo?G{aKB*%sOhpJCp9qk~v>{WY32(f4$g&ZQw&Is+4 zRGn{~41_+p_t#J0ybB3!h(l+PNRZ6wcAJx3-Q0oYjUiX3J83zY3SiAL-5T5S8|#qY zx|CTujXXF_LRw$QLqL_BCtBVyQ9=X>PbdfoFMKP9=PqIko)DRqMmqp##obaMk9&!W zPfO+e>qWQG4Ml@LC}Ogz`ZQ|;4_xJQ9V{;UMY7Qk%LS$I?`aWvDCOD?uS`v2ItE>> zVp`}GHG>ZS0P5vHs=R}~)X0!C82>g~h;GNM(kZ;{D6QYr_Pj9l*hy0J(^IRr9Ax+q zHZr+bYNS6yGf__*<$TwCs}^W2cn!S5YNc!c4n~oPIYp+eWyv#A=z0`cTo!=RTE!oC zZhXW-L!L4fU?Abr)6IA~rlyeDv!Eb9a3rKdHwmcaEcY(hlY?MY+3Bc~@xaZ&zr!RM z+jR(FXc^YEy~M#I(rqw-A>mjt>Nbt@83`}jpfZgemjNW=joY$*+_fCh>oYwv@S^={ zB_x&AouQd0Vg@AW#P~|4&b-|-5~OSTswOd|*Goq-9qzKTMEhM)nz>3iBZT@(er8%H z1=^zOor^54YW5Mq&1?+?#s3twp}KX^GlocF`>L83(KTHPbEc9mTqdVtPaAyWu#pFFI|g*#FMns)Eqfz8Sf}BjGp*HrT-B1A0iGKHitb)Wq=Y3M#80 z_%pUlO?>L%8nt;ZPEW~>yOqjXX{qe@3`@bV2g`gu`gOptZt7KURyl||e2gR+m)t;@YxF$L` zW&|gGuuE3(c;8~NUM>7QhuZ70eMdfoa~fbT1R7EP)jT9r_Ue9Y=Wb8t_w-7^3=-Ht zFa@O8@ez5_U3x!EH|%ZMAncT!NERCOMp!Ij55%h3vZw30&#{1b94hBMccCS5Rm(BQ zLA*%axi~vZj5sc=r?W3*o3LC($E>Tq$0~UZ!=7A|m14aO|DVh*K6&RhGgruM{(}qofX%oF0itiS>U9h$&0Ji-P# zTX8H4NaAn_#FhJWQ+>L>p20}N>m!j(1|_54V^E`^-GS3VSARNM!VxwRDNb0N^?OYlJyJ1Rub=3Qk5vfz ztW_8r(ZIJH;i#5x?;0O?+}ycW9&$e{-f68&c`!@i;Oqp4OA|wi6A{8EOoOk4^>*r5 zI7}}3z0hu)*ahbXhG5?iOzl=!r~~y2wIFj935vD^J#yIWPqB7d?=&8RUWeUrFI^=Z z6~uK;y;W`YJ0fGX*&2i}v-lafE;gH;ge#aaVH{Qhmbuq4AcMf<|3{>^Zd zOS>e$*UFHyKnS3^$H_0-=Af(-?J*6ccc?j9qoBRcB!RLvTZoFB{h(wO5*=ii(5~jK z(9>D)elEC&NCT?&w~Ub(pfWYVj!jnFgd{l!PTuV*8KPeYJ&nX)+dAw1fQw_5fbYd* zkA6?)vmt-uTe$CJ%Z0JWBrESR;}b2OB|BOXFKkqi7|9lgmafQDLEZaT93rBRvbLQl zh63x6eUig5rkUxRHsy%8zffyGh-}ZrF#euc%oy}5acszS)SEzHjC14{Kf>s444$_v zI}G>C@6~>S!;BCINz4z7!|3uX6YtN7SE}^>r%y$c5Fmt2!=ooa57gr$kG|N6kuI7; z`%n#=V?@dyJ2>J6(h>-J^tQD_-uYP}L0Oxw&2titW1B`i=`tAKa6&ieoIR)@)aSX8 z=38y_M?}cql@~xZf&Q(F_EVR6KpHW5Z42jDwVFYjY~J--7KpDV9h-!k`nL%4wq55K z;Iny7SwPrf*O2Aena?MfPxtjc$+1+4(2T;qW875C3ueG)|st10I_5kN>>=jp~R7XCN zRi{et0o49c`*^}L=KXQ9PGT%8+B&gZWbUl zJgZ-x3D+F&6GSsPJXzf)T*@&P4&>s2qy!~?+c!=Wf;w*8o=$Qr=-`y#=82-AloLxv!)IuJJo6*M~`>GE2t*>_XQ zFu5)aqLLaysj!QsW(PA+SczYHkmm#?@R*zpNj56({(W5@nTZD*WiAh&0F;Z>XtsO{ zzgge^;nZd0e}}@rW?n2iw6&`kr+B{CGOH(4^wX3~sYkPRLxzc~MXZ_bq)HlZCS@@Ufz8ks6crMEl;56;Wjg%6)n>+1z19Ys7*5t8OiWXjqaf&wPz6aRz+N#|e=d&+K}T2_ zN;p<0+kanX2cvV~^pul7VDwSq8yewWZLBX-JISwNv{O`imc{3sFa#$XEUEpxe8%<9 z9D92Az}!LQegEp4$j$`5Ik39&__`I*AF&c>)qUGPC0W!mc=+LCP#}{tlR9~S1}Fg) z+RlzoaV)3yigh|8_^%bJ|1DIgPHzWAb3d7LkgPG%uOKiDO7PzPK7>`~g_146U1EqA zN!4dl_l+*eal6m}7@o_XPROK+pEUrbwnihrXr1kOFnc2Qj$M|V7YzNYR9*NJa44KBjaGnS zm*4G_wU3wIg=!{B_N~XkvOYMwSN;0~I-Rcb7urBKmLx7MIj>(#`*TZgQaQ*-y-hem zyS)!I7jgJf5iFw!J(c?dXGommaQPx@@>GG-(5A;uTj#t+ZQ7ChOiwFH03 zPL)FWos%>sk_ek~Y z7bWI*6B`^7N^vGy7i9oh4KoPz3Vt;?#=a+gXqu)S z%g0j20QW@KkjOE#dz3Ev6PnVs0k2%P&Q+3~rtY7whlX-}px(jSLrYkySg57VdzoiX z|2T8K5EO2|S)`d8Mtxf%VHlFt;PZ&Pr|U8 zZY)>Vb&QsiEKrGsWeLjjuVkky#g4Aum_T*Fl=4XoFPofd3p_c9z(jXwTGkPXH{y*J zo_sP(JhzXHP>1QlkglUGJA5*AAqVrMB=fFlYTULwP6b%Rk{C5QC zcXC9(p9MzdE!U0BCdP?~o*)u?w zO|BR-;?7(s9!BFf4p{}ZuR4Z>ZSWVOtEef8A2Bk6NbNg?*7 zV6#QYbn0+klz^VWhcKtQI&baUyU2db>MfGyIUar%zfW+ir0G$N(`KntB9o0&^! zzP@wgdBMe3lee6{G+*Jt687?6yxCBMq9s?)>b~0#y6YipC9Gm!)f>3So|>xE>l(`! z%J0TA#5e8@#S+%1Ze01@C*4bZOUpa2*|ta$+*Ox;l(z>;S?Ku~t@wENuX$$Z_CN#w2q9uTbYBC~|(UBO8Ft1^O zp9M|l+@c4=o{Te5_e6pU)6ZM!**CDjX_=B4ks)&vyKPZ`=oUK_jxN#0tf|J8d47w}mmP07~FVzW2^C$s1vznA_ZhUSp{YV$ zMe)b+;f~gr;TrK=^D6)XyCUZJflM~7mz|j$l(dPV7f}n}^Ol6zzwXWjvti*}1S)k~ z`;%1_HyM*86^hGq{&D5~qJzPE;aEHv%wnN?z@WuGN{fg2R}M$?^0CjoxJ3Hbm?wf8 z8gGH`j}clRSalL@6eHxg{JM)C|G4opDd<-&765p-i?jUBuxLRcFsF9h4WuX_fN96F z&EMOX9gvB_iia~}+_M+mV0tu@3}&)RlE)h50q?UUjBmE2hub*j9@eVgS4bUqMyvXg zU+J&66-*d#SY_jGXUj2Ki!2>`JOmTg;XYNJrZL0sNQu4`=$-_4)MpO=NwmoUz+pa# z4%M2O%g1c@;*$U9c2ADZ&%?LNr0cis%6x$?`Vu#1;76$xtlvLCT2@O883oQ{{)zeU zW!C9|*Z_M)#;*!eZ4U0^wxMu<43@NDKnxN-L zNHFSCI(LpmRUAo@?u$N!C%^`DqmyItWot*N#La1ueC~1QzVvdnR}y;CJZ=^8Rf2pj z4SW}{G0+-s$;#yK&M#lFree1{;C6qb5V>($Yj#p;dzaV+ucBE$3w$SV^CkNZ?xK1OX{2me830tOQo_)2JC?d)QA{k#ZSDLoQ{gi>cCdPfKm|F3FcmYpnwPFC~5l)cgHu z&x?Tm3Q|xT05kZ1j`_Ir`@bD9$mn7>(>4QCb1Qj&)T zid1`Ev)NfkKG8SqKW}3fyO!Z0XqMMx#@@(^AWqN?HktOijG?B-M?RJK$uhQrf`Nip zuinxJF3W)=T!v#cs?iv6j^dRs<7=?&3*zo5=yqzh)LHnAa=!vrGwYaAm;sr^O;`2p zIZHi}h$u~aI&}~j5~O~TP@r=Mvyk$HY>6DL|B1Ex5HQq%nH$@7gA|A}Ko}6>l$q%L zOU0M3^Uqd${nWO%2L=OhFa!lQ&@t=b=e{S#Zw0yS>jn6!n5Glt($jy=A8LFH&-n)$ z-*tGRy&-noNnD&NW=odXj?g4W!tnw+R<`$0tbZKf)2pj*PHx#OqOGHzObD(A<)u!+( zHvEr3QeB~ZHBpkDte?^y0Qy6w64*}wflUXLhm=l&C8Yd!{qdrKvRlCPMv-rNyq4TH z;_?bKV@8)z^&v9|CWX?A^iN9$?k#~q^gQF0e8in?oj>Pw%#ErkEiyky!u;cycs>x= zW1_`uFSR1+Oz{?+oxv2H%^|Ly0GAM98-czLn?IV~{e2M~ka{SGwWpRl3&G724q^{F z^?@1OJL(eHn^nJK^)|Q%Sb4R$jc&B8U=~{k^oQTK%Ack5Ta@hE64>ER{Uv*=WtKSA z?x|e^JpFrjOAFRzh>cTUi$pXa8&v<$%;RgxKkDZ{^Trq0Vn{j9X5#NU4U7XI=ir`O z`eSp{lzeMw2p9GLl)EsL=Xv?Op1N^+qK&JfWmmkst>0zkPC`6z3f>!eM%?T4Z>3Ud|T5nm02wx{jNsUz7+82Q`y38|*nowpU zHfHghS*0Tjk$OKVm|E^?F?EkqzMlNgBrGLcjdHacx$IE-$=7a>JfwVA|FeRgTk887 z{wI1?nimdaag$Jj z`FUCmCV|nhEw;cYLL!=8P%4Lzg@GHM88eFT!(myspG=otz6+kG#I07bG7MdWRJ*$v z^3B=h8k}||z*6)sf-4KXe3}#EyhgRf4-0{V`9cB?B*n$~|I{Z0)4<|-+{gd7^kXu= zj@Huz$$WhC)EHkgEY9?!yv873_&XW46y%uO)dQPRDcmWqmFlqK8ajtB+zdzI=L-jw z^1svevC=9?n8hhOd`C(D5BbFYf)iwdk4yLn>pBpC)Rpz$xvH(g#5H}IY>Tz5laqvx ziq-ekCx1~Qj`4foW0?uGLmb!?+*7f0q9=40e&{be)P;N{ts8rxq37}c8J;iJY`QX> zYYc__?|cKOo`*ay1q~2KHzR+<1erCXQfJFoj6apv>Mk0!F@C~6d12uoZ)Quy{`v$2 z6eD}X9{jmuW%A%f3w9lC{Q1#b|8}BZLF}(uf%(PBP&J>|L}%%?*eF5LdscOQ_PXAK z7eT5%X*DZetjUcmNngc`*t7aYF8#pDRY2|&Ym!@|-`Ro7De$6Jsau-E%y9a{^7U&f zm-3-#L!;MSDFU#;&9n&F{2XB`?Df;H01PN-XyGjfbIM& zukBHEQrT=tB+${JKG2%o17A^YoqO=mSg3P`lh4MRvNpqZ{1WNxoG|4{*z2f7!QXSsE zL(GZAD_^Ezon3-{OLiRXtmoRMr`OugJ9-axRm?>7`Wc)q20|x7weODoe!fm9PU)1V)3@@sQBLHRoe@rf<7nq*^$o4v&fU*AoAL> zoo4q-*?ZR9j3r)XH=CZGNZq(_KJimt`t{NaDO-Ksthn72N7MadFo_tJ}H#oh#$z zrSJCHqm!oMZqNNz8o}d2MXG!o8UV)+U_}n}p)=`PUl<7|As7$`{~`cit`kmm%PjnD zHJXR?rOKn=ge2Ga#@Q&iQ%qdFbR@oMdh+|koPW?slSq2!>4f_joTX(IwPSj143)YQ zo^u`qBtSTUEkT3gdvM_t3e8pLSu{on0gS;qt6$?T|8C{hY+mHiQPo*~k@AIOE@h(M zSyaAs(ZTFJn*Ij&K=W*(T|c=6!>#rg9=KHZvgxrk`vz}` z_CE5+H4h)YQ*FicBVPF&EYKoVcD%7-SUY}F{M!xvpS-S@Q;g4kg5J6zMvvN{dADHK zy(>@rw^puz1mvFfpUSNv59AE2)dnP!=?D~OPfQ(DSs$?GH;(N)K2?94+OPO^IV}4# z!T-)O3}5jJr#P|&=wq4;u>Aw=C)!FbDmq!iyLa~I^DVojr?^*h-Ud()zMax4Tq|zcE36!44|Hx^!*eG#DSTb5fls=y zGwIjn#g!Wj?&)Op)p0E;;@dQHEcnbCNtSgzAR7#&_oYvu(arPXr}b6`ud}se8<9Bp z#EHb4{Nb9#kFHVhNq%aEc~HFRBw7qhPW=S863{BM+~F#SCC*|&Jy-)d;^cDV=PhU5 z=r#an6~w@)ZfNz;)&%V3@zi(&m*|zsCMXY;<$N}jXi~peTeHR z(=_noV=bXc-1L+xh-^G!*up>JQMW2q7ZP()ft0m(esD7{R!BX4NBS-mEK1XG%Cyycdq?I*ZFjc?1+o9ot8u3ywzp$s{X z#8^qR zh*F;X$Ad8?wWnN5iaqI_p+~~C>A9wsXAp+bzGz===>4E$q2S&Jc7)zyC*u{N?OVoW z%kYKgB!wPfqyB{QB1w!4a2NioW1=1-Ik1-mJE$?D4KMjk<$ak~GzD;3E42&!Fy?E$ zyR?>U$1kO`ZENVMjHtMI>_3Elm<=zLz1XsBmZ;(fTQbTBpdsWPU z(k8ZaV_2!`#++07V?Nz@Lov+7D2op1JgoS>vv@rCxUmMPcm+;vphXRU9Q~@nT&a(8 zS|)hzd~#AGP~Pjmc1|Sn*O|SS{@Q^`M{KfhbCqngGPB}`9vE1v_4xpVYH#w`GzKVp$O%E5R-Bv z8Dity;g)XP4>()S@IIBKZVjIVb@>5uL~=hFD}i`gD5m0O3L$kzBJn4yy1ztiGLgR= zMs(y0mdx=ptq~hWhfL#5?icv$Tuz_2?K@dMC>DvQ+dWAn1pM2!HKhE&kR+VaeIDFDCSzgU#l+SH=<#UEh zyua}mX>LxXQZDF(1RQmB&8_{Mb`@4PP!3p7(gf*QJ4wRBUTO5Ha`zb3_!J_KEWV))vAF9 zR#j^%?2gQ_ewAW@`Zz=c4-%EhGiR~t!DD1TXI4pbIjfm*0L0U$=v*4{n+D}0f^GAK z>3bYNN|lU#cL>?8YT~=?lMZbr$k6OnRMb>P#pI|0aVBPc(Pz#TnlG7LQ(3qKjc#-5wP2WFx zn$Hy@Hf?2Jy5*J?(e0x(=|vAxjqB;Bb{TRDlArd`HwqMl>QCEI6E>G8u#r45BwD~&s zc#>;7)L{5TU^k$>`LP*8G3^AVB6O4PWgOiU5i1gswyC0=anJ)g)l|ij^2r*GL@kI<&I8Q zL&66fEq_SY26AfX3fvdH7qobzai%4r z=4decD-1LJ^&-D+O_Xb><(jdFN1(&dk~e~+by{4{LU++|r7>C^51g517wUnov2aoG zATTu>!KXH{Y4(%#P~{vSSyjXNqY?y+m^dFImIZ-<|$X-w>J)NJJb?_6wL<#3vI*YsY{hJ30y0fBpicvt!uvjuqS+@f` zh7DKE&G05E`6aZyv18$uy73OWHXlfQFun z>gYpAMAi0NQBi{&SZ#_EknX11AJM2v9i|=+|Tx5M1M6eNJZ5wf;SubpGBZPa{i? zQ4IqXf4&tIjQbBvF};24m~BDn3Z*BM6DdRIw%k30?q+Is+r-hG8|Duq(3(F}H~`?t zZRq*ih8KgrnqXbWD^efmQFZtT9vYc3&K>~@y(B=IupgYB+%&x(+}o+{WV^1>QpO6C zWX54{au4Z#6U!^*LEhIt^V7-R$pk9`7b3FEX@6Akbp{ex{0qBZ@(dyknwOy|2^_1O z1JHOv=WUB2Up&qMo(Ba^R)&)xBgxh7-|mxL_NV zpns|dPaHdW=xSc=7_C_JE}}!ICw)ixTM^B*t{(lj_hg>XV38fcS@D2Qxo#AdK0!#y z!}R29x-lSF6`RJSJP{`CnF(y{u-J)`akfcrI#IeC?l|7ZM-3Mg;gLA`LL$Ku)j1r< z|5nF@ah+m3tah~=h;&`AOFB59oT+?<8?fLjAOxA(zHHLSHXImGt80nD?pm*!S{g8( zomBc`lIN7GHshY$I1Q`s=Ve(9`#1wicL-X6(78q-=c?J)fyYZy^cON6N=3kzpZmc4=Z2&US> z9BwM3wH-PnI%{!x>!2LsFMNeG|!JVnFEgk$_m8Ff|yYT|QC&|!GL*%MrCTATY%CBiS@yP~R&a2fE<-w6ul!`NnD#=N-hKCb6~Eb=i+wnuQKJrZ#+D@Q!p5%j=8vhC)w2* zsUD@9sc&XJ$T`{Vohb-Lx`;jMD1;F0NxgtnL#RtW>F<8S9zvL)jI=f~U!~6-`RL={ z^w-WX3R!)r`&jyE2@R*DzYlmY3X|`L-5G}RYl~h3>>_VHT=Gkz&uG>pU{Qt}H{6h) zJ>t$VKrWc*dPL)}(GHBXZ&MJhc|^j+YIZ0+YEHh;j6SGVXXf3bskGY(W>ij_%LaM2lM!9h&lvn$uDeCn6KrDyMZGWZ%$3co(fJo3hF;kV3 zzs=q5Y2n?ZnY0N^`9TulzL>$~T#}By{h_wJj=DeF&o+3;T-Xq2TS=jCxKlt}9K{X5A0D zo}~1H00DZ@#4e{PhhIzcvcqExBoYZSG>|CHz6iT-ff{_F>&@HUOQZ27_-!bpi33_W zf*3C;SE{)+>_2h`SXKuse2$KWy4-Non#>IxR5E`tz9kBF7o_mEmkC>eB<`HTi^P@8 zte~BvLUoMB4|(>hI4+LzBB~6WhOpV4p{S6jlC0tv+OHbUt(jI48sz zbq5ZdqN(rs!0)}`j|dFdir0b9L#PxnsSiq-7D^{h)+xF>ti0+_ah1~qzo$xcr9v2Z<|GI#gb#9l zE1i654h{dTDEVH?xg(l!RDihk2kVYLzFtLVZ5H2IiI_I7q+)?x*~!FsoV}lHIPrXw z7W|p|GnL{aMr34=K!1xeCf@z{Ac5TBu>mFVRyF$OMthdqWr{mTa{Z4(E=-A>E=S2~ zGzEuM-7R8IKDn3KQ@KK1HoF-ZqpZ;*Y^!n8Q4Gd9<)g?*7ppRymFfzn&(h9I3iVytwaxF*W3%BoJ#`Xdvb^B>t>nN9>ibklAdb{v_B%b_!8kkuXWZ zRs4v90Lh6~P(y1CF&g|LTW;P=&Cm;U1-_!rZ6+2ypNpEfwgP?ZMtJB;;c7# zF5!j_AIrQBKXPU0!3S;P5$RU}-uD10+!jnTfuro4PNS#c0K4WLCgu@s7>2u_oM4i9 zC43-PH@@8+fR!yr9@}TWiRIOTyq=wjMhyG3<&KxM+$a&)VrS8Tk)1=EAZM!jd6aFg{>|Juou6@Re%=OIEl;OZ>U%a?llB=-h{aOhfc9OIUhDOrShx#PR?eyuf4S;~VK6KO-6>6+bcu&%9ea7! zd%$E4Xahx5Nynbly5=GsD~Hh|MVu&Q!tmTIo_&2!WPac6ouCy3&=3Yt!SIIjO_F*c zdN0}iN(Q|BW& z@Y{zLmMnK*qEfM7rVdJaYxEno&>;)e>NuykUI2pG#|Dz@WMbFMg809#8vGNTWprnU zK_Yh0Aj>6{0G&leW2*vgC0B9xgi*R61 zY`(Fwaz!Be;Dy&EDJ*!&*s#jZ4mf8{Ey86z0>c7MEDW~zS*?EPH8@usL|bB=)qu++~DyUW-2Je2ME z{i|C{O!KNse*_UmtNct$pE%kO%K^ZE(V7)Dq%?NB;8uA|<%zQy(1pY=`>PN{_B@fa zj*R!m-!;GM{$xG$+em(Xej^iu#5&ATVYg$lJtzT6G!`K<**j0&$g`TXhl2WXSS|&@ zSf6X>UiHYSn5r%9V+{IQJV1}chqKwugK6vp4%()hfzi)$sZVI{HrlGXE4p;YEUhE8 zZ99KhGI*mws|YcoQkTsFvngxBTbQ^T2DI3-Ry=VM_KMZ9EZgD^Mmdw{!E@VR^0UB2 ztnX`O2xO+UQtVOXN=Kv=Dz~ir!ZHc0j5!UFmlHN9!*sgL`p5PY8mITV1?td!E-bM7 z8!wS*GHKdYmh-oeut9XU`%pICZ6oGF?P$eZj5a2D}Cd>x~(Ot zkt%1bA=D)}7Jeopy8@3TJbC6MM4q z=Dw;mywZ?j!FD3)q-iLRh;$@Kv?t$_0T6mt*?FBhh$kB1@O>b=6nGOTS8UnJtk*R!*X1ySwB7o74%iFfDJFWjd z_+w*u=_k1}$QUCrzIJ` z8fZH8L8-=*4MwvrFc}pH@Hx0bV&-Yy*5u(+>ej>K1y9f7{cn%x(KbH+X#DFJ zMXGw{CZu?O=gLU@m9iHfZ9HR6d2Z9(=wK681qw=KfieL@YogIK|^jVLqT6dr`0b1v4 z%54QIA%2&HP{ z{Cd@itFs{5hWG8fe(UZHQ+4M$j(2}))t0OJxSz*o$O(VX!h+xY+kR*g2(+PSGf60> z3c^8xkdqSn-~zHcY4DZa;R1f6N@~{Eh^ayr40niLxM?z4-8lpQILG3kU}s-3f|ECW zZD-dfN}gR5y1K47Wlh)k>Hnn3a2lCzR^Sy5sU$J=aUcGzjLM{w})xs zIJo`hIg4Mqx0ckOLY59MXTwa2JIQ~tje`Tv9PK|i=kT|WKHhe3$paTZzW%#&Qa3%m z_V}O88n}1ezx%_`^`P_*wOosL?2cX4+}%6B`6rsqnz-boZ+Xe|ldBMCqIa}FMYKUz9itw4ESN+FvSGppOWtO_>Gx%6 zzT-aUzon;elJ)Aqyo2Jjn}Wp+56CVg!UW!2+NDKHoDk}qD{obnyn`n(*&|Fy=;9%j z&63BkziXxSMuudMb%Z39YlN)Q~(9T7#L4O$ZLq*cdez zcJ!yq5nYJdwpZIeWk)3ARD~Vfbj}TPB5XIcgP9DuauvOr)mmuW`^gU+$nz9Vqxj~vcxjARA3weV8t5>(WBZ;JAKiG}KqR6T!@XVyXk zz0C_mOzN%!6LuPc>PY*?Wu7<5jPfLm2wgPaLzPPf(S+5QXt0oL5X ziRj{KOoaL-J&T)4pBNslw))y}h|XU5HFmt=?|4%2hCV2gr;slxTSB0uW@$=OOL&iQ zt%}kAQ~H$`-5_zbx&>wmcQ)Vw z`tfI>mWe}p*Qdk4^^NFVe6&gyGx4K*)7amhO>P#F1WV$1EvpIvi+F}?&>B6AlAFyEQm9M3xthHt4jO?id|0_nhmuSvxfHz+8p8}c5&?UU8l-(LcQt< z)k7K&b(V@){m-VP=5QK81biO*1+TJ}+@g=hl_tj<=*!tCgLsQ{Oyp4=nTK7?3D~qF zN9TrZik{`0_)pHiFSLuSRV&RjsgG|6l}!PDE&A#|vK0=rd^sAO&P*FoZoiu_^=Vs% zB6>*lzQZ|?4N{ls&BO-7%X&#TzDM7(-m9(U^W$GaMzuo9h3pEen-KWj`dg+(YSNBG zd3Ft!yLo$#Qn`w|)o05i*HAM{EG*qe$vuH&2+G1ls*s4i`f9Jr9adRdOv@WfHlgaV|nrZttH24xs_C~+4z!*U}&OJM%n!G)Ap>9xursQog5lEO`sPTs@ zD(saTMTc8dMcyg@a^5tN&_0`JN|qv?7#Txr#5@?q|%Hsq|%b zmg=(UW}W!OCGNbDmC~ARUgrHIA*K_=fG3|_MP+Slwm1DD&x{<->=jK6x@zSrB13YF zj&nIFQZ_9y>_ncfn@dJ-!a=ChT(TqR_@KAYDSwwNmP?Ls=jre~TzCXjPxw|_MJ-4K zaOWAR3)$HwycgF~;VI(I)D*i+DU<#nH{*6ON?2Ymku#Bn6xTBujQFWFVNk5K%*viw z5|Wi?(kyDI`=U#!3_(plB!4^|nER1NXcVC$*0)$x0%?VY?el+9pWvWI=cnhLP?=>oYccyL|CQ@Q6;y+Qy3zBQX@}Y$(!`K z$-1rCrMsE_DHt+73H!552fOUdk-Ag4ZAPg%zlW9{k6}k_%c=F5i3DdhYXe(u<94i_ zuJhfVWIl?~!N+N2m28HqZtu9Fx%rf@F*~6pe-`67Xgt`nb0Q?}Z|K-GS_wec zb^g#v-^#_)WwTCKz~{JS(sx%w-eIMpcj@??S_0j<7QK!A-VX*4g_o8|t+~7kjvg7mXK7lsKzBo5B4Ujm8k7D>qtn0`>KeYD?Bo z?08Bfn~d|Nf~r>*!de#Z>Hp_~!5j3o4!QIC)%y5AYAP*=e^4->JL6VcXD@~}%75I& zSE(u|tXE%s705(w7*eQ70rx=X+0*DS_P+l5fh!HLg^6ANocL>Mgu)==WIigrTVtfY zy5q*|?IZFn>ww4@9egATZjo2MiB8FoEVFJ=*e}s^hp`INiIp1vdJF&y{!>Jyh{AUV z9RXLlg&mT~Y}UVOdTGSVQ9yhD35$-2=cP}Ky>H``wneW&ST^tqu2y=Jd-Z1Kd+Mxv z=PrE*3&I|JYBH#dt5_nF9SrCxwbhVE3=q7G#qcWa_J?m0Bfen+{fna4EOD5A5THD* z`Xv`beT2+b^raV@B&BovK6PB2C0Ti$UQG5%QG44itaogtm3}{m{Pvsc7yt5yrCsvQ z((O(j)gBt|Fd72UiC;E`RK|$Wv|{fkXH?4gb6wD_JE%2#@wyHvxqd0HsEa+peFK8% zVe${u0OkBLr>S09o;zJ!L#~)FI*y9^6fssO10v?Mo*!SWk+^HOUNt2%WKml(^?5Oq zP@kozuqq?P2hOh7q*yAXxTbhM7+qrwg0>|bU(5)poYDn|s0UyZIBEk56}y?8L)%T7 z+vbJOjOUq^{y6lC{^Sh;50y|HG9RjtVYTi^O%`e80e|i?d*aINB%tT;Syb>l``a00 z0T=o&V*C8GiYlBL^aLX}x0|s#pI(Ymky~THtO~}R<5$VN4KMC)JVs=)cU91L;)snv zFutSRo;xlqJaMr>Z5pvBlq<`B5HA763@v7b!8=%G8Sw}*M}+PDI~j>dM!IBZuP7Cs z3WJP5sW+^PKA8#)$b0=Yps3ceH+)Wsz&uJs(Xc=XQ}D~S@{<<1-#ciolBvNnjxz>} zqKbp2DfSYlP$J?Z5}Rz!n6ft3Iv+t(EI6O#vsLIJma`KVMZ34Mge5F(hda|PO3EaY zp3pjOghW&HAca01K@+iudJLj5F%4x0$Q+izMz*N~(rwv2YSq=~h@_-pgLeO(iHRn}UUe*z{D7 zQ)?%Bmw_(!0+L*-y;i=gdcZ(rIwzFv#a;z*M4Xq&b#F5NjK`x2<3nDzi3D&iQ}YPL z@l~B@VQUf+i)TjDktYw5II1r6TyU+<^D6GPvm=a_z6oK*i8*%-1lla9Ia%I;7>n@b~!Sz$aRdruPAdN?XI+8rt8XYLBNG1r$&! z?h$O7#dtx{YC;ARc}KrB?!`TnIo`mi%KGOHq6u<3ucV`NA|~tb zk%rXsx`^fd0I%%I3H$Vdo_AH8#eLW#fLlr-Yj!Bt>u%^0iuky4mnd$$9)xJl zu+u7jgqzUj*cMC6*?#JOp9f66ht-4ZzyKNtP2)Co#5dyig;ysefz&GP3o-HW9$4GL zjm<^DCleWnz={W5dgJ=`J1sopU|v6Jswyo5cWu~+vkqJ^>pLp&<_=;V+{$TVAdxR@ zyaa2Ana=^;KKd^t6t+b6iRjANX`Kr1PvpqUrW!)ThsR=Rj>V$A;Hq&W$pSo>#PgYP9Db zjNa9fe6b~^==uuFoRKuIaX-`g;tCl%r-!tP;VGf)8B~$-zx-=_hbS=#H|e_yU-Vw# z$(fu1JHV&^rDo`cRRt|Mvvss7S-Xi&4;R-6Q#Bi5+z!%ljPWEaeQaR5^M!S|62CY787%;?n>v`5 z8ZaT%(L|P5U396ZKyzBhJKc=Eu2=ccccV+U79W1nyk~tM;WiI1KQ%SrYvXRxHJ{u> zN`Ut;o_kwti&!wtRz$ukPz_TDL=}zOkh@I7a2J;P84|@LKUTC_Ma(MIX`Tr9Ig`Em zce2hl0fBqEks>3r|Q5v6)K`Xa&Q#IP)3}N%gLcwG#Qk-z=1(?Fp}wC4i@#X$HJu) zT0hdDrl7!RlCq=O&09LYn-*$b^x@ejjeLh~*+gm;e@G;G^+{}v;=WF8+t&I-Ll^i* z6DuPAkbI@2(9qdz%tYRx7n>u?>bjqN93;V>(}*o=6cSKb(=J} zRr=#lixg{;VsazQaeAE1S3NYdGCAsc)-#U9+O9$g++^ZC4gzFyDQ^Z9(SojUBdLi#NYLb|32!+4CXE<7Q{ zki}UL*LVx(e$yM)0+FG?nOET&d#LJT20o0ku4oM<>_`{?0Nw>~af95Pzcq2MA8stx zK=`Q6P8VLNgb`nz7>@11ngC|Vp3b_Vc4BJp;+bZjF+#tDhT4vuU4M0PsO0B0)IK!T z1tK^fb7ZJ&@iq;$6C-i8jUrr~I}w~e0)nmEWkP^*Ngpn^Z)yMj{2Q&8iF-D_);ULT zB=0I;SVf63%WHg0_-wiJ!4+0_;Z_W=;{iRB~IgKN}V&mG z5oJz>S9q%N(;K6?6@7>c7oF0f@K{1As=iH?tE6v@RuuiLmlxv0iDS=WnIW*lNUBA0srAe78|;rhI%mCHv(1le>a( zC1w@nbX+B^B5fVjl6;>H#q@9`T4Rg*b@$GQtuSi7<3t1sEE#L5QYgKvtR6T}jShzg zruH<$J}_dbFE#T*aA={EP2k;X$QC|0cGWxXi7kM`368S@I-e8c$b2u)C8e=ry;e#X zyganKMrc{G@bF(Od{d|JXY5J9tV(2XL=Vo>gLw$1{9K`xwx=7s-QgpH=Ds%EXNw9& zhBgz6LjqrXeut05D9-J07C|9$p4rv`NdTtoC{%B2o>0_onmOZWDmquoe61`6n!O5{ zxg(Ofe^0(zTmlf{WXQmK7FIY=5jRf|QcZWa9%P%p0kl1H!f2|}vIf$O2M6H= z!K5D{pWlh1u;6e?YDWXxr6ZRa^!U=!w2GrU(xxfY$mBZNV{?dYD%d`R&7XBSVniB`~wKGm64 zn9D~jtKvoG_Ad@ID=yrR$qVIEA%Xd9%uuY{Clt&zRHR0qV zuo9z}GOX(Maspt7r!D{a`TDYLT^jwcBq$o6Wo_j{KmPDS+s@TlLe@39S%9_JQ*>tl zLDF1@{;uRef!kGFM-T5L2Pj9qu^Kwq7-^ahTfgwUZHMM%0AX|z)X`tHJlO_>n4@~f zo4R`|hF_{dv>q*M_|LMGkoT^7I0n@zu9NQx#G_lbKUAT;dDl8eR@zObvl~B~QQ+IN z?wxx!4-gu>CVnvw0AhPKO`0!ly_r;grIfcKE2n#WD{y9Fcd7=-@vy4JZqj)l<&!Rb z`mhx~TBZ2)0HI7YbiQ35roJI^RHYn~IegPLb)-33>3v~eS{-e}?i>H`3!d6jVArO? zkUULHZ4u<{JgRH+&`jXWpVDTGGu~MV$c(_&gH9#KPJw`InE!&L94dv|Zlc zt0kZdyCm469S0V`{O}ZeBD@#;8QdTnW4L*p4%rW!cxuZQ)~ouNYp5U>l$c(+WXqN< zyk{Fa(_6Kb=k+0FuckfrzmK2THs3lEh1f4F*^!g8N z&K{`Ej)&jD?A^9cin|u|i*i8HR!Y`Y+9Gz%U@$bIsk_~Ms~yGktwXZuJyH?AoPxSE ze3sva`O|jY8264TK(PH}Fs{>Gdc_FrVrCwztU{yJ24IZjq52eV=B(jd%0qWl9qvH{2u~&WRLL{ZI1Thg9!7PL0~_Q zF#)}3Mm9+MbK6T%Y3kHxQ>T|#4S%$^Z%*%sN}M;e{&7rTj_zId%j%UoxovOytoquD z1qA7XAU>>&HShl6!|lJ*To?x0wK>-S*OaHFVU6JdXp*;pzucPB zle-$HUPht_gD)?kakP8tE7%$#`lEO{(vQb=<9k#wZYh*eNi7|!6DpX@*xQHG7!YF) zHNTqaqSGT83vOO>NSohXGhJX2HluOyP=20j5tiA`ixp|)IvW@(oehlwKYFBkdK7HC0N=&Nt$**d_ujpY+tv#NxpDM`sgIog0jg;i9*j4oUL=Y4 zApxxJ3jM^i$LZdx=u=aMbK^zo8-0qIdre{Rvx`Xd}vKCyUTiV5Ek z_?M-M`%z_gi|&wEUY`&hhKMh)uuS0_mGa{dF|@X4l8U}Fo4e?0u>Hw>Y$r@zljOoQ zuZ=$K>Xnhi?3}k%w75@8Q7RP+p1kMupLgZ4_Uv>Wh-X40{tGjbV?0$EX+Eu5Ku3Wq z4pHCCbT7~J1C!b3dq=B49$sc{nG71>MW4^;@)Lg}NV5d)9BH-EO*n0P%z1>DN-ShgP!xEu zByP(HGiz}$2M#sMfi!D0{cyZQDIMS6(*q#MtD0#s3K7J(I+#5s4mD3yOB!NhpJSqZu8tkU4+0d1#r7lJZ7xN2O` z@AH#haMBKqr&#~$*>9a~?rPz^LZxJvcF1BsPP`3R$CkwKFJH+}-qR)<-NfcAB?$>P z!Gr^bigG*4A)c3?5`1u92XlCcrkFZ|Q%yZRun~le0a)Fez86WAvP2hE3nDSTzsf$o z%0F)uQ)|y9TL+ksfpx_oc++tuQt5BMl}>-})~#Dr6Vg++bd|V@mDZQ8a>_DNF#yd` z7m!SU=8H}%c7GkooxYO0$ zYWM*5iL3bow(=-eU2gkmYLs3)Yo|(H%>D^3W?!^YjTfHN^8zq~IfBEIIQjqhvJp7d z-PgeC;K2}PkkiEdo7@^#-;BRdDcQv0ZdNO{$Bn4+q)%8Z9;vLcMRfnQSSvyX-Y&|} zt$=n2S^T5dIBcnsoBC{Qdf1Hp3eszJXJqJAyxt*QNcq~aVb3ygR?ZQ?~SoByyvmnA1^u-AsTZapq zzF0?CF!kfmBpcJPL$O)Xp0EG+cg89ui4iWUtZ`8rvo0zz*X77pi~7pMfV|kFF7$hGD9$=`*##- z8hw*R2Aw*5t}|L2ec6d~`tJIWv&j;@3 zlpXWi4*BHgZ72KI!I0S|yzdo;=}y2nACwoxEwD?AdojSzo<^9F9@cw~K=aUHtd}@A z%4w76Dk2#i9F${wp+!N6sBi%evZ0-gyl$(vMPr$Y$%{a9bnWIWE7`FHeYi8;rgI^k z46@0@GcPV?m|7+*{R{Q#FBXVBtjf<7Y&lPdH~BA+O%;Fi2zb zvnMD}Xewd~O|v4BE4c$-7A$FTraGO9zvlP4Kbj3$Y|IC*>l+(SQbtg^H%6(8t?;_) zcbku4{bij8h%tbfn?nNtP#g{a<CgtZ&DY{ZsIw4`0DsC0W*~=LVHziw@c9(K%lNCb$LP5x;9X?|K~qHeM|mNbiMZVs`#-z{dcN|qJoAW!XZeVuy4HU1w+V>jhjBe?46zj)ce-xIWRObo0dZ2VB)UvC_Gxw?%_ zLodo#Ot5Z(z05!F2LEYwKZr9vaL;nZQ6<{ymv$KC!K5K?|k?EukQUP2VaGXzbmbk)`nW4RCS0l zwRyu}OJpl{fU zWrM41B}804u#ODrT*T($MJXHlhst9@D#H=fbJ9FY{o1F?saSV@aplT+b$INTD`=bY zS424?Yubyo|Itx}n|A{?^YEecc8n0vi6HKd0MPef|5i4C^qFfXJw$+=?5%C+(Fzn(`KH0DLt-qr49UV$Y(Yr(>^Q&sI2)}d8p<|BA1P)h`+M23 z3!}k|n`1?D*;35YZL|kJ3=uw*VZP1c>1xy`Qk%fHuq3yXRy7}`_xLA;Uh`Wy7l`>o zHHW3=i;CF7@nU*|LNv13!zvOE_(K+$kjh-ZrLfr{lo0~R@SE@DNA%%UN~0rSJK*UU zc^xPMxosQs$C44HJkI4b7DLe-M@YPP+tSRbrGtxKhvS%R)5TwZA6c5Sht_ArV7dO` z^^ZS(sn^iwZJf-;(v{yhg4l6W)@w=RCjH_IqE9&d)Zmm`NmQ*ZFbv(&svXPag3m;2 z0^w%%(8O;ou@#LTC@S{vd3uG+ExJeRMNTbO+?w0Y1rw_FU!=ERXfNg(#=wWFz9#bg z!!N+-`4+^l61@IeDvwQ<=p?_THTO?FpVv(i$`jYe<%!huTjYi2aI{uS$Ja^1?o3P` zmzO8c&bvfctAe5l4<>UC8r>)u4o9xVXE?eC@9JQnOp(v(9-9a%lDeVh76N)}y6F2YOT2Gg$ha@517M7V$ z|Ib)Wfl9}Fg4Dv+4jtQ+3cO!&PhEMwL0Iemv*4^)CoF~k|B>X296jl8aQdaULbQt8 z4RdMsp-sj4UXBOk-KejWvo zaDr25N2ZfQfN+#Ko&{+IaLZ?Lwug&gbCzpfsLVK|0$WZLgi_&O`BsI>jboq>N@>T% zm-%4wNn!vNvHxI|2Wnw-+ZG(@K)M0+Y1RX_NO{CY+}Tj!9nMVpO#x&8#r9A zl6_Om_19v4L+`p#4r^4lY)dP-2~QbQSjA|~9a)zDiH=p*Ncq8YLKS@Jt+(DfSS4lYn)Fk$n)cwI7#7-gg`4{YHm^?< zhR58?Y!Amh$;6vl+$A`nyov^S^hWuf9!c%|$xp6`F7^iJ6jzw!?$l}Jc29#85Lq09 z$Y~lz(hZRf+h}%joKS42e$HrS^faHr#0mWYSz{*3O!?%3(OX9c$O>4=A0(HPy0IAd z*F7>7&#+waKEhv?Jrn~}4{Ny$jVda!Eed0b>bCW z5JvGOuGC0xb4-V0y~>q&@>UiCF`7*|%YAjkcK-)eD^;(F^huV0gKN$2fcaD)8j-99 zUyoNjnks)$gHW<5fBKEl79ajpF3gFldjwSC{1G|!{bv4Stl5G{?(15oBrPr~9|AX0_NTiZ^PFo>3ZUyy8 zx+@fsU%TQpBDm!lt*Z-3(7%UhyHV}s4jaIAviW1~GG?g#{p&}5%w%?FkAW+4FTcuK zQOj`M8y!H76s*hgYrQ_G$RKF#l_(50!|THUfsx>L4Y=e~!1eZjw@gi7uzZNJOp{EeF_sl~WS{;ZX5%OK~6`_4N5HP&AjGzsO$@{!{krN4$o>- zXV&eJc84sdIy4}_F<^R}{U_X;#W)-3C$higA`cVBj#dm2Bf&{np<6bT47EE3Q`;3k z&G8Us56Q0n9PZSAP)ixL(hWucA7kj7dVWw~lqr;MeTK)rtEZp26JnyEj&nxIK=;S- zfhp}1fqz!(7nFL1pS@kbl7vM+n_J-a4tN5#x*;l9dtTDbUHU={YQ zBc%Uz91`lMN6=yD%0goMSc3xU7lA_?sP=HGNTEhGfK_smkjw|pIB-UZYcvXVgogjz zZ+R3&lEqKZDMOy+b}DA<6Fe~MQuyj9r?0uE(q2uv{44K5Hm^R8G1qUhP;0b)d~bTS zPe$$Y2Mp6Sa@I3HU1DPWTHyRCQ48J8quumOK5qm?fnYP1>5)`B@L@@6=r2;c#;4es z3u;bxlli`{9`O_z8YaMcz2aAJUNL7=yz+-yh=v;QcyWCx-U#<2x5ca`vTAi$c#jKO zj+UqC0(EIF0Rqw*Civ`bWFkg6!$HaoT8EoK1*2C`Z0pJiXl^gf^ z=f}wt5p^Dc{}84{`CUP8NRo6H?zla6p)E?mf*PZ`&D1Y~VV$Uu@Kp_fdnoFu8mYT6K6KW*hPY)Yi)E+j81Yd z8AIXpZ1V+xhU65;P3z767tQ>6i%w06cuIkUc7<(T6cZ^w{2_o4&C8T=e=bA=3bN?H z(G?ruCIRs6oexMvj{@R$=9B#b>T>G^(L1`~r;5sGA-6 zI9cjctx%2kvQqQ&6AlR|l2|z9{_K=`&LvFOx@^6fzvbv{?>q@+hI4Oo=Y0}{$`E0R zyX=hCONrbM7~$BDaiRQGug4)!mj97)FO0+|nr2JbC-qam5Q8mNoDm}&RCH399*$Ss z*<456P;8Dpm?$cx7TJNz>ZejyzB@bdVZi9+eZ})C)lO~)h5l(E9mk<9A!^bhw=Yvi zc_`QG+Qk^5jCj7^Q_3T`b(W2`_xJL5I(MHK<`wmzZ>vjsrz7|z(}wC{HOr(ZFO=2E zNSj0Yc{!fS5nHgDRhKWR8lmd4zgJ-;SYZhX+*cKlLS=BjEzJYj4+ax}9}`N* z6{lqaeMJ|}V0DddY^7x&$bsMmf6XNzZ$_@Pfi7`^q-mQhKC}u71TB;>o{C#TwkR0~ za$%)Z0Es_wMwyCxrEnqjn&$=QfIW=LYEAmvYKQ$Wq9>nWl=ETsy-Knl+^!imqS3Cq ze>Am+wY@$|I7e!lh)Qxo4)$?A`OQKX6jgFYCNwoVcIrwDbaNZ9C&LLu+D){ddVTf6 z5h{GKRtgb}PKm|Q&|0jB>63s&T8!Pj_mAtk*PYnk@{RiVXfT2 z#-sZV9y~G=gR?rTVdOH-9St1UWw1wxe;5B+7asRTtt&)4{p-+?hUdPywsx2ws9_k;Gb``;svr_kN*3qqHXsrn&ZE`a^hTg;{$lFMpEJ1oh`EL(= zdw1^TcFP1op;bRzn{M_kQ4i5Lz55l@b z1oAGpfO?MBj!ae>S&R{_vBZU-++bkp<*Dx)G=j`qQR&qB458xG*GoQ0rZ-vE)cTC! zK^^d8lp?@ZE*g8wuTTF`p##WJ`sMYxT~**bhlMjamJ&}I{TBF%QB<)3BtA1o$ek>* z#{RHHz!)O$z8-!o(Be>o@BsM6r<|u@;|V;dNV&yjq3Ws?=)}Oq(*ZAN_@OTL1qs=t zNAP=Ug`(~kB#MO%qeICVuZLCXFrYx(0YyCof@lPtesG!tW=`lYJ5%;mr0ia9z}I$} ztTo3ImB=<#1Owueq^ZYRd(`^H77Ij}B%azm%bVBdFSi4olIdN(#WI~pN?T+5N}V#g`5()DcuYCa9kOD2jRd1GhO;Ib z9eO`b`cJept~vNqr~Ky^r&=nu)131AprP)S6MZZ_k*tc3aebiJrJMo>?n+a2EkM8+4n8HKPeU4}Ok{a)cJNE2`)&NTc$~&oz zM0Sl5MJok<&Zz|-9X&$a_&3XX_Yj=DdHjOze~rqd8sGLW3x|FOKw)P&3w-KhOgvTe wJ2A^)(v+G1AlhPP*8dKP+s|Aq(dn+a literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/auto_pipeline_parallel.png b/model/train/yoco_moe/sources/images/auto_pipeline_parallel.png new file mode 100644 index 0000000000000000000000000000000000000000..d0c15556c553825f47f902628b85cf94f9bf97b1 GIT binary patch literal 30474 zcmce;dpwi<9|xS%ttjLWN)&PqvksQo(Yy!GW>yM)bm?GlRE{fFR8AvD%c@J9%K+17H`>j9OYf&)?5S%s5z3i)x`>U3t*M1DWucklXup*Hp0G8MQKWREy_aZ%24^+SCGz&fdJY zj}`bXeEbxU^v)x;{Uf`BcKM?2%ctTmib|gf{*yjy<0Lq|tQwES?|cCkRWKJEUPSDE zS40~sbs}IGDZXE>*NLA%^Xzmf7jM`|EBfkE06)nXUCMRWwWpWQZ8Ci?5)ba&mciS~ zzWn-};EBO=dQd)zw+_Yd`h0n_3qv$AhTTR&@xIe}>kC6uEi@0(W@(iZhC;^hSeOuz z0Nzh0C=#^^ss)}aAu3ye*-H_8O2l?8{fI|YvqC^vv%=i3xM_sLGsF+lSYk~GX;k}0 zomzh-=VTR^X&EM5 zG1bFBA)J8eu*0f;gf6|O4@p$c>V}X|Qe!#U8+)E7Qtv=#z$dd;O@F`Lz=xT`A)|@o`b-)>eF4#Y~KHxk=uM*rt@E3v2c< z28#P&e*GCC{x>_mo;ENWiuEW;P-@1-)7*F!kz)@oINzskK!$(9?mJ1iFHtJFp090e z$db(bsV!#W6N7%@mln;YlzuJOWL~7rs@NBoGmdy5R$tWqi6g?PuJ}>kTM{Z+YoX8k5q(%9T zE{s@o%y;82N=QmDGv@cpJ8?eKBfGEKj4+~S0Fxv42XVest&lEh%~STfx(S-kSSg4nIer-1=6wEZikMv({zCO~(9vaXO&kM|}l7A)W%gz1fnK?QAVj@MI zB#QIi$S*hx#;871$!sOxS{mdOshzp&?;i-%C_Vd}%|eBFg1lsZSTBG`VEM6nBrC#v<1I`%hJ6|wV+^2y#8))QE4z%!l z&&fTkJ^&>+rV{0T6#BOGNo%XkLl&aCWIKfC7Wr{qCOf7-1xiPB7{Odcj*I&kJIj4O)gAlSC8SZHokG{)CfcVNRkERtx1 zQ;!v`UUR_l3>}{ksuo2&=^C`ufmVO6Qm z13p}I%__}=F+V#SH|+{w>G=70uKUDHZ7RfZihuBK>p*ffzD-%gVu2`Iof7J;q?6C}dA90*gH;vGtJ+=r{g1`E&K*N*z5T@G5sS1FF0`^!rx!lM z9(m%&<+&9yvVzO(Fal@cQCvjJ^4yb_H9t5>x;xkwxB8)5Swm`JwGpoJqs_Fmh_?NS zz`xx4Rj?(+bUMr9!O5!a_qX;Rm6gjxcu$tTxs|rLUw23)RMjfH*9rhO@;eMk<6ZD` zTWeG^?YexCC}_!OuNYGMgx26}_sSY91;6O{&-`y@#j4Ec=4MG=Ge!T8)e{q z8?{d_zqa1-On*W7FQ1lQ8g#j+EO27EIq2DivX%U+LlHsd;!(^TBS4o|_c09o`1E^v zG|A2MKED!{P7-_7xOOO6e~qLk16xIrNg$7Z@8>gLq)%Mw_jwYRoQ1{1j~p22k8`)4 zxQ;%znT1jG1&f6DlX~w*<4#igREL)5kn{wBo9#W1;Sp?Y$m8inw8|sm4{;BS({(^^ zzaG5&us{d{!AZeQ+a0E#Os!gARg-(=O@|8T_jhjVF6M8+Gs<_pj43=WDazA3cTk(z zr=~{l6Ylh82mYL$ef#2Xi=L;x@8Z-Jhxbg9O?_!nl^P=ppKQUA*Y>6-vba7${a;m@ zNkjjHx{z>C5sm|(GKI_2Do7tQl;YiVB?l0v+4A2r;6KRl7pA| zl)R36FhfjYz#OE#TlloX9(>aq@8(Y^l5v};wzXKo`H@=i2$(JWDBIt+YWQxOqiwYwK!g_}B0I#H^ zC8qu}#kZQY-mRR|DbGYjgM8FyE1dmrn?%gilXC%Uv`L^ND7Zf~ZNe8{M`{*VhnvQ8 zN&=iFL8qIQ^Q-Z|^eIDe$`a#FXF1yi@?zbj6tiqWND1|hcfxasKA#9lJg24{EW zS%ccA?fR>~~=de&>4rmVs0cCj|My&ZoC1hic ziKC5qpd)+^vSQ!xVX8}JoQunOC{!^fH%w(p4p-heGebL991tC<`LBfK1nqnU?@LuGvE~J_X^}3WT{7mX4q#EWx=SvHJeiVMsFi@~# z?{D3O6-5#^3KAjw8!|va`yyzUt7#}{LAYwP6S6K}I4}wX2lvMX%Zi%~W(vb)0fw=k zrxJWEq1-bcmjK4FW8U5}y;0~ft@|6p|GTTFI7QX(xD)5ueDQ-Hxz^)xFN$dQIgBnf zD1SAZv^5^FU|(d5Vo&>C)P=p8*QIA*@#5k~+xU5Y$3oIJ?F2~#&eF!oOCn!lCz0?d zDk?=D6>}8Z$u4%RE$`980X06mOYm*)ZVZ6sgSoJNj8|ktMD*)#2M3_7E|QK@NLSFJ z&74E-4O5!MC`BksDQPKubLp)nMk5uF+TqUG?c1i;Y}J_RHRyu=!;%KwF6Fznxby<2 z1EBx3`^sVa%-egLW-M&T;S+k%e#fLPuhoYXqE1S4%b0hCGG|Z1cw_%aD*Ek(j4oH? z^f8|g+YI#D&Ruy|l*k_&iJ%MeoUXUF+wb2mi)B~&eva7JF{c_p@rm@2h`OpUfJ?^s8}7ujfCD2$ ztA+0MGi&>u3Y10kZkbG_q!RPmSa+|S4*$hsm?ibp_mfl3R18Qwt=jOzCO*H9D49ss z)rekZ1)0?nK-n{rGdiWIgmd5w*lxqvS||^6OR2p1w+qn_xRB}C!Wm1-i~^V&EahFX zTr52QsU+pf%Az7T5Zr8d+a6SQ9t6qmWFItvcApA}FNyCa-GbG&UeW zd?45!Wf_;rDgVgo9xV7)72QEWh(UU*ck-go1U{+l<cBfK=v)>xJf0&Z&`8yHR(U9yiJTUBHiCSN+lrVx=;hDg-{kgvR8L!*YZ{?w%5&N za@!;V?BmYm546G+QirzI8~{(P(CBDmk7b3~y2PnW00)|xGK0%Z>oL?5E`G886z}S>g|8>=9!4pf*yS)H8*-Ap*r?Jum3^GgJ$bt z%fNKPK;!Q$@)eZ-&*+^CmIr{FH9EI+;Y6%TTZj3dpYw1YeyQYr%2xwjE#{f9ZWZ7k zbaPVBS3;z_{ldNe@sw}50OEjVAU?N^j5UYzudbL%F>W8=q1BlR2rBGI_E+y(ZeDVe z2}f5v@nj|tp&K!_QtUtodojh-zM1xX1x-0ac^eP9P5qtl+ir74UjDiTT9^?x^fSDi z2$*~(cFV0c3eq_re*om$*$eok{jOVS=*)9BXgF~rc>;LEx3gSEwkzd(E?{WVOc|c1 z!5Sg!(Hg{N*xt8pyI-K{n+zx?)#aryKQHSEoBfA8{r>Frt-g247f8nE zZ|2%AK_(d>(YOzu0%P%|K4@!R5HKq*YqeX4DPpsSbRU(Ab)n3?$+=6q14AxiuP326`Q- zKTl~pwv=7JqSP7~R?!AtmR63_yQiADky`oghR302VPC*+zn;;qmGU_W8L?5BN&dfa z2PCV=n6IO$l8eQg04T2C!(Pyz0EfUFc<;UtRzbVw2~T|x(3a{z=(=P`oN8aMZf|Rs zkLwj9m3{2`y{ZLx4gLX^e|*P9sHbq^{ABkY8Pq7e6~u1%XMRYl;0<9=ftg1!7Ztl#6w z{D$%0DkY#dXUOHwhmfrAcYPO+9}!RGkoY_bru z0f_O|w%JLp_fS&ty1f=j<3#CVq8g83^eclfhUg%ZSs8vyFP}yod-r~<_|I7R>)1)J zMq@L*r=Ak2BlkF(CO?i_OB+|&!!J6k>wvo+LY9utYTXxJ&Mwkgn!k|@e4M|y|Dnu+ z!ON%mj88?hXX@ISrYjL9jVr=vW$rQS4hd`nE%W@nM$P`|g}Z_D67<;G5;nCiz5M;l z!1v=GulFoXf@t62Qqg%|QqJffaGXCOKn}RA zvsnw0J#NX7=&bMVhAChw&oVdjR#<@kMCoeL?d!7&BKTaZu)rJ3=8)?ej;}%6hd0en zuMPu2YgOd3-IxmpS{_<1c6d7bDfjV+m#p2+*Mu+^ z6Jl2j3Gw(J3kc))I23li2LUdzcE*a6$=isRf6fXF28da}O(G!ylXL@kWOwl0G+~l% zbRK;v%fE7G;#7UsUdfd2E`L+mAL;)>{dn;`$Mb7J`>)+pd*CD?v{|!HQ(Y9ztGKf# zDmb;9rXsx-)|6X*ORa^eDeDR4jY$`J10J`LSz!t;vo`^_zPv3EJnn=lLa->p-Zp)G zjs98o%E3nU?d~)30iC^D`F{i;7%cS1+(?)6KQe_Ac{fC&7+Y|2S=smrC5=_YnDk~{ zNxz!_uj$XFa^t0`;qj*^FOVfMqqW-c=#MpL?Cv{t`s5N=ZvM}I_x{S7^Iy6-K7P;L z3ZLG}#hx^NgihWUP&(gZ>0qF!nu$x1&_*4TeYh$E@_^7{R)}@Y23x7jttfuX2KZQz zme5(1rTk$UNMbCC@L{9^yz=qRlCu;Ji@A1Dmm(opOZ&@bp4r3&91Bn~TOw>VIjOhO z@&bCv<2}!bDtMH+;jViC!y~JpE|G2a9;C@1@a{x!uksBcW=^xTs>i2CP$CfLBe%Y1 z=@Rwtrej*$%_G+vI*#5GqxIiqLESba#VBoW($)u1cSs$XdS1)s|d z@%g4HnlnC9OcY&kXqh2{ROjy=6~c@2erC3wL-*P)1U7q%^VWvC@=jhe**xLVDb#Um zr44iDZpTsaO^?VbsRcv9#uDHue5**8OAr$*0b70R=uAulwgd}5Fs=UqU!U&(ykaCH z>HMf#Kg-@yBB`7u6`lw0(2P}~=*h{N|3LV=7<#m^iUc|PhYk0Imv=F*)nmQaS}yiK z#o3W-o<^P2i}lGZPg1S4dZzvgiTuAn(B<1(e2NjD!Px5;lF2J-X@z&Www<2}hLZs|uP zcXt50mir|S234<`M`pgtjmnw^uIu|&&Q>f*Z2jH+`5eK@fMQhpK0=K4Mt08&_~=p# z-0S(tbLet(_o=O#k4vghUo`=WApjb}cw2NV)BFgplA+UYc+oWnXYC}BtB@IiO-vf4T6h9|OEXqDapdNKsFiq)Vn085X(LlF`=|+Jk=6Xo=yO7AbqK@N zLWcwoUi_UOvmr}*8}aW#t5iDw)}en=v8BRF$a@wdR|X5#UgCyxX62pIUC`&|M<6=y zHjWT_T4sg9Ip4ZbchoR+nAIVAt_w(3o9X75v-(XLf;;~^W_XF=k-htGI-Tu!JW|YQ zv=BguO7o>}Vi?n?wEPcH$w>*y-z=|*`99A~f?xQHtpr8)BaiTiJNMgbB*4rSWC{o3 zIHLo%1D{Ux?9F7bb)1~97S;8!RExBs;&}Lgc1dKr52_?Y`y3jd^f*)ng+@2z;Qpm< ziZMgQt?j{dG0RWcHr?SVG$%FZ;T?^3w6^uX#6YEhED4$k1}!7nG7 z5=#*W)?jJN+0{%=>by|p{@Lvlj*woj?Smp?9=_#}#5KTym0bHs)eJ$n@$vK&lE}VZ zH#YlCKm|5WidH2`CdHj?G>Fx2lQz@C%I-Ch_~?3@+}~%)n%(v~VaRkdY&@g)?8V87 z^!k>8f;~hTjLmXFl*U~g9in6}JR|W&`%J`hhvmWnU9xAKC28OyU`=_ArOx6G&U=6; zk|h2%y{pwWPWNTwF%KE@q(h2@`u~@yW$(_>oC%BWowT%Kv>urtg!TTCea%`H<$|8pe z!Ibw5q63D!bE34<>?42XcXp=@H*t|?mmgGRSFh1OZK}-7FQ#pjZV9?2PTe8kwggOQ z6es@*xLg{#Wyg8-${4|TG3snkLUR5)U_R!kd!xarcFe{7a(d}5g>KDeN-2of>Rq!B zs{GV)6o)p`QUUE>YRG-a_Q;hk@+Fm)$rzQ%s$qwzS`Yo&b2Fh8cDy6vYVwk($()= zLw8&{8;Ey1>NGfGoW^MPHvl|QsRGq>UmNtLWS&2tlq!-pH0Tq$&y2aH4A-i9*w~%v z@tC%KLX9f?Xp8YRZWG0il72Y+?9=O*bjCdubMaeH>Q$6sFS%@9*UsOi#pQLotCH&t zoS%WswP2Twaex9KF~-G5z6;p#xZ1@L+5!WpmFqA;1T+Sc#G-t@3n@fJDegdetttUUW)L=(N86}O4KYB|dG?nr}EhivHjkYwkqN&`nqCU^DoNVx zw8CEODMRmy*KLN2f021gDeHm6U){8aF)l5x(`ND2E@#OMz2*k*Xk&MoY(%v;l>Ytn zAUb3*8vbZ-0%oX#T7PBj2ySpwsm$W^*pYj` z!I`Y)Kc*BZl^pFMADw2pbk&fO*Eje@K^&lpE8kwMQN=!4B}7hn2D2OZO|7OowlpFg1^ zSf^i^aAw^vFAy`2XR>s71zZ`}d+7%qeylc>)Cc;_b9iiiD>aT0lGVE+H<{gWQA{8d zkdzJg(Yeo9?TW5geQEj{OKKY}Q=-O57!vD4(s%DdDs)YSq4fUg{fbgS-VM9v7~sI! z1@KtdK+28%yqjbL!RvFXWF@=r4N0K>ng7fmQLgUE8x{Jmc`E6sa-i%8;L(>V{ZL`2CC5`CTwvYa) zv?JA8Om-SgLfLk4AdwAu6x06o%!$)v9?_XU06v|pI`)OWP(G@mrD}`#%`xY(Q3A+PV+)}fS$(myuYBYr0d6OLs-ACH(W~SEWPMa1@ts!p+9{F3Hq8NNE*84++HLmmLp@U}|nsdE!+nwJ5 z@4oBNy5u&A|KF2xt)1R6AU3%7j9)YDq&m=MiEa472lG9|dpsuN?_mIr~O)vsiD_gF2soxO4w|L9^ihHG7QovgLCBk=V~&8J}{Sdl(V1S}WN< zrn8Wn4&<&PDEBuy4!^XTk5Aqz(JsVAglOJMYyLU-{(NeSX5N=mk6ZL&C$+b~8NKut zFbpYC(u2hjF)L@;rVSCXUzUryGIaFl`Ty%ZF>zi9WhKJqo<0%EdHmQ~S~zs|?!tR> zI`03xgm&88gdcs4<$%j+{^<`HgMOzPpSKF zm(aEc3ERIx9Rb+31|$7W!32Vv|8{JL)ckj7eq*^im&QDs=Lje&-iG3(FtsiSYIAK} zbPA7tG&zt45#L#gPT)! zw42}IPKS=@ue{44m1!GP9qHkmFmW7F)*MDQ@UNgBN4Ez!CbtLm3ayO5erEi)t&sUD0aPivAWuengothX z-0~=3Xs9J)bv=A(b}zOIsG0;I6L>D{gKyYy><6;-$|BS8)bkE`N$B&Vf2BuW%C9a3 zmmsPfZO9^m4Ecw7GXBp?|A>1px4RC;mpRz9NlNOJUUkmM@H=CoD0R@RF5^&1q)+|I zNDW>_R$UKgW>1}&n@L_odZ=SQy!CCukLIQTukx2KXSqbSf2s%&$d7S4^6*^~EvLsU!b1;*|PzV>TiieLI9UOYWEoBxurC5+xI# zb5t5jj|Zw2B0jLZP8AZh0+&=XQT~`CD00rg*|<#Z-%X_v@eE$BUnAu7p2#h4wFK#7 zu>wO^8tFJTKh9{A7GMt_BhbwCpH&qzOYG$aIz*t&V)+sVT2!Ds_UuUMds-biysJr& zq3hDlHH^QXLUZd_^jnTD+!J{6m&%Hnp%StJ1eAS1rN^Ig?cqA5amB#%o|&sxIreX)m&w3t>({gW|H$J^iB+`=1zJfcL;UNWBh30SCCPwt^c}7uQNP*(%mijVOyb8*pJm*JT zEpW;Fjp0-nrUQp^79uyHq-5nyT2%V0(5p7~-eqGI(H>EPzsmm;!tD`(MKI#9_j6<8#`61exToR%qQc7Q;`V?glVL_)KQ zo2CR)CTZwHdf~C*T|bd*)#5zp9C=x-ge9-!obe)DPbahNRY-wEcIOaYfUvA&8f_D8 zdp{25t+*W(%w3>;5dtpNLBMK;6Vo(l%$dsQ)%A%Uu@dCXV=QBTNK_tmY^k_PM5&yt zz&>L^W^;T#Ka0!ZVx_6fV{5>~74ic=1Lja7zc|t}(mh|2rRNFrdo6FC=PP#2O(w*p z0Xx&%$kusnb@jljuZ=7NXV$|z5QX7(HcKn^;lSx>mFObdH_bIgT~MRU`IZ&0+!^Bh zAg=_$Z-ThAf3N0#BWZi3Pc+^GiyNY6E*ZN{xZXHxljyP8xFWDWLb5If)3dIx=FHYX zy;hyzJUe1&fui9IBBM`zFB^7rZ7a=SHaHURrvl$q)%$H6QB`+AWgbQ4y4DU@bC>MV zMA8ocoOEa4=9TP?@G~0Ss9*y)`+d2h9UDaVEt&Cs?Don@o`!nJ0yO*v)VF=}weWJ=cR3P{0< zSjOf+1jt6(sjXd8Zu>}3Vc9oY@loy#w+(mUPLbvqL__O`q1d_XyBol|?j2wU6JMH8Xi2^vxzd<|!{k|%Qf zq%gx{r8#D`iXY19S*srs*+j=rjW?wL@-Ezy%CsK@92ofGF=wR@9+LK zEWYI#iG*hs+cELR9#~a{`W_vrdjv&^$In-0L{cxQyzpeC0>ssKgCR07?br~K$8@a3 z9QwfU%(>-Mz7d~7+OnE*sXvwtoCjvSnSw>l8I;2FJAt?XK*!#t%|&BJLOD=?f*rEm zcA|vP??Smo35t*7EbWJZ3OxovUW={;D-Sd<)3Ym#A}Ljklss%URa9Z|#8IKnMtpJC zfRf&;X4)j3DTzdV1UsRns!N*4(Un4d+p&h&1}^dfo4tjXJ5^~#FuI|wIUh(*`Wg2M z0ry=8okv3Ki8FL2-0(_OmBU7oihIHL&WQ%UrJa2@@LDN`AlP%R-WCaVQ!6n@^0C>> zeC(kmRZK9ObfEw4vwk?tX}WCIeI6+{a1;uZE1-A{wt25Q03JZAEeLAop{BAY|9vI3 z^~(k?y|de}OuP*aukDg?@4{32m`I^cok`7$-g|?O=3(<+ps=hf=}Z@YBM&Y^9-rAK zN;};#hjgbcziIwCQNrjyHU*B@O3tz;Nb)B_F++_|XqZz%{cn}E$LyP#wR}9rz`N%J z%XLC@i6y(w>Hdy+=w-)I_yyitx10CS;1Ppu;7#Ow&CgS)=n}FG7QbZx5{N4-(*F05 zM(x4bbA=z?es_o|%n{ApnciFpk9em|x3z*<5YfKzulb{|^9cw%>sQSKUup)S;c$Dw z3?w0xP_UCYeKoBxrQ58TX5K24IX|lTD=<~DGD~;h?_2k=?@72Wg|6bRf=+i=`7@7#S}lNE zEjriwmaja^p3Shv@lRzMU{Y*Jy0gzn$-s_px#;q9G7+qCbJ)=h#~+`v7vQV!k+Pdx z@0;8I{toJ0GT7Yr@@io9yJD5Q#zwS~n&$WEnrPm;L)?p#X5Cf`>Qz||9Fr;d#@xl7 z;N(9Mu6y0=|Z_U*jhfS27M{N1R(**pR3(~8XOzLKn(EFVDQ$=EIAZb-O-PdSjV(g6vBo)|5*ioa-!?`qsD8OqRTLX`+4oV%?kF3_Cras>|8YnTA zHhj0Xq(%nTJFWT#vT>f!^gPrELH4=uBIfGMk}Oyq+IMDFcOdIOu`KwX`JPi)3yHiN zf&E^CnK6U^B$VaIb)_hXeU>~(XJn{>77)Sx8YLjPfPYML0?Jk#sT}&&XU#uksf~%A zN3)CTpkkGmin_=tkSWY&)bq(_aWn7a0GoRQSfk|vK>pv*b@%Tr#xO+3m>t82 z9JHDmxU8Z(nSlU^uI6ohj#BvqQ8*j0EW!N-s<~w{@;XPdQ_pDYE>UxFK|XV%pZw8_ z^LZezgiSshVAU!%zLdEoitu-`q5Xuf`h=l1|2s80u@i6o%8lz^`o$#2&j$fSsdksr zi4L=VS48Xq&x_IN7BYH^jtAfX%1iaen(vcCbj!1!&P}yH1@c%2gcmahk~}E2K;%8| z7>T|h)0|R#>1~VDj z%z(VWBpf1tTr<{nUfW$*!nIy|K7Zp6twE7;Px7&mSLS|dsPNS`HrSq!VvXLjG{(%T z+*rr{C&t!`MN292b6aO@Bz2r+WimgEDxgnO$usYT`Ju0aC|xSOh{ULsjcU3XFnn>X74_$PI8K)&c7*jk+P5i zC2kOq#&~#Be*?Fo;z$}T4^9Kes~{nhv-E0r!eP$xjXLOmUuorh5lz)O-Sw46$AmvW zy46~2{9}r$*l{j7M?$R%>;y_TV>+=L1#f_@$!Tv7^7+d}Mw3VzBZ(3Ux)>8_!)Kvr zqDMbqPpU>zDZHog1GkZ|aUtj>|MsvxKr_nszvSI_k zKBAdJ?d$O3dT#t2Dy`CHZ>#~=qNCS9JMtRuoeM7a-uX#J|F7!jKe&p3q$uYvF1&vl zO#-Z4`f>W!>giOiwQFp9(U3+wENnRyVM#Aahe_$aCokiR*H7-n-ZV~FH^laFoI<<|8z2m zlpu&)b<)a(rF2EpR_Yj#$Cj1mIh*ZYNG3DKDnezI<@)mh$_uTnkpiVUfdD99??I9f z8)QofrPCZhBDqQ>G_Du+Rz-kK?#Kjd^^4T!onMp`?Up~Xlqj*ks@W;0yEQ)nELk=a z*D{CNOe(f4&{Com!B#(_70vMk>-O3RpT?tI!r^Ya`Mm2J*IF@@o~5wbm>5~_gPyBTI)ctm za0IJW0emJ98>Wqp5JSzhPvBp^;`SO)m{_%&$^TN&q}E)rapF zZeIn>6E<+8&9;qQ;I6f!jsxOzEv8GTW$lo|jc2Zi1SVB+YNa-ViMEte0(OPQ-zM={ zY<^_RygsGa>S&=~5Tm!~uK>dpC$++vXJY+5>b6MitISB(`NOL@5=)}ZE|GKJM&g~W zZ0Kuaq~}_BgU`5D%WKIhB0RlEdO<$(jcRjk`p)%hkMBDlbye9UA25AvK1;B?7a97ks$k39SPky< zsdT+ne%zm=WJSU~_zJFF8ZHR?vm@79nt0t;dw)&dAq@3v`Is@K2z?fb`D4b3knugX zPr#vFv~fb5*U>DuGU%@UDOqGP@e6aspm(laYxa)D;|-T*H$WkrRHR{5+Wn;xzdj|uD27lqd6qC zv$SRK|Js_*J@*))RpC8?kh?ud9@HeEXs|rnY7zd{pkLmCW1oN!<^vZL+7IAUZpiMx zLaI7O@JPS9S=rv3vAer&W`Woc8iDw@T?p+VD#JJS=CpNPCp{!IRB=O^C}072SX2tu zlz(#7`h#7hovM6~0lFdHFfdJHuvplDX*p0MDNA*|6g{0id!*|Op8GVPEW+-dO7JsD zR>f&_XPN3kWIM0>q_Sa*NRT`8;$nK~N?kz1`ZK0rB?y8~pFLREfvN+wmwj-8qd}Km z{U=kY_EQ!Ddh*fl6M}tvR8ZLB%p>zTPy&X}ox)V}c~gAu<+V@R(9WIO%N?p0axtTAVI;JESE)6(Q&UXW7xiv(aH5s6oP1I_)BeS%`*Y)vhKK+Chdv~oYyzp3JzCtQ|AXkrV#h)&xx=l99?0lcW_EF z!sW(vWXwnMuJGPAvY#`eHD*!jw09xb0=$jBIUD@n$MkpGZ@>PjGQb5TO@RFxD z_cG?eM(|eVSHKhj1DS$rq#2IbP(a3vdBfg!c+)e=` z`W=T5hI{b$u&5%>(2AF=V5a&l$;$9M-C@*YcDoh?`f({}8iTmj;B-08#Hk znWGk0sX0Yjvwo9wMlarKkST~8$Ru`$kw7o-Kv|Gh_vFq@Z+PoydY!NFvwByF9SE|0 zb8V*b1tH*_evr!mIrxlZR8D(7%}JGQI)@|BPckUBW?|_`w@&!tGjH8b51bCcbkrt7 zhFQLJuWV|xJ8ha6&#iV$ezqn;XL+|@!gvWBWc}7fU0he#)*^h`r7>-h=&Y1uXY(~> zZ@AA1cN=@1KT2AGEd{G)Zws6f_U3O5xvNhb^tTmQlD+2ZaIGm-KM=iwB0QL)Ipo@V zRXXhE3Ny#PIvPC|wse0(&9e$3SF4|xrEBz9jzdX&qeZX}na}`G8IuxEkJjNFJ7#A5GakRFIB-aZ)HU z*XScRD-85+%Xxy9w-9CJF^#8uZdnj7c|YX7!nnmW>XezbegpLU9V5{>`OjUv;vA6C zJ;DkWgRmU~+9DZ0>;q9GFDQ9TZCw{O8Ma*x9#|}-LnBWJ>U+hZ??WBO(yVc`Co1dN zgx>{Eh$RBb5TPKhkQO&9cHTyy5O7hW>*^&d9y>eT;+x=5jcmjgzZm5xRqF@ABsPw7 z6QLmcb1IdReKX)fIJmXrfmObw0tb;FrKR>fVdEL=3MNf%?5^^x3LcJ2*7fx{3j=&~ zXrN~c7}F?p<xH?p2$){c? z$VpIrX<<{)*mNY*jCd`>y}^L1?OxPnYN~yFzuNDEJOis^9 zDnb?2yb8QJ;4%n5i)K$Jk_EW>(nx73bEj6Cx5HLKwl$EacA$Deb^7z{Q=P^e2C3ZN zSbbTe;{Ry9UDGX~=Kw`OdE^dUpUt}}AQpvZjq8YNGo9l&d*aiP#BHPBGDx_PAbvHK z1@!bn6C@#x7e`n6J*QD}0!j_lO3b?4oAqj*xQlhboOAqfeu^R{S}j?Xm*f!6)+Jx< z?{GogErdNDn%Z-vL$@l0;3f#v1%-l= z=>Y)=DN3H(+XuIgg>3FuH~!cHWEiVKm1oT#VI(#CNXS12neXN+c0x;)$?Hw*NvL2f z1HPv1_DWXmTmdIPx!Qga-OyHF6WG`H`9HKDahI$n?8dR1vK>#AA_W}ey)T#({1e)w zESo9KTWkBCQVkN50+va9*gjrupO98?Okb@Cnw~%vMpx94X*6K-Wj} z`hX`Uczel@zoSQWUGu(IidF5rF#1WmP^j_ZUZEKc(*9O$vB#YN@)x7|kqJDhGwXdR z%~?R?CceL7)Xc(Jw<*I1XDw}oZNnSStP%E`^NtU!9e>=s-QpDZr=YAJa$P^diuEr` zX?QQ?(P54Q*_u~-nk9^P**H@WNT)-M)`AMu`dA;f$RjkQxcd>mvYY#0!B`>2-UiwUbd>*9S5YhDNQX#!&^VXJkydgQD;M z4`-`lsHE*B5|a0|iuY6S6AVW5Q0~sJF+|X{zwjHTwiia8k)ANLcx$C<&FrmdY5oQ! zm-YKc8xT%ld@{5}iB8)dP)jN!iU!Ir9O!25{8mGiQ`m~jO*6Zs%BEdQv>;+h=@Ync zI1kj#iC*aQOzk80oG?7zm6Y0roGUNzkO4xqhsM+%w_OeYhka$2nh2DJ=dAYt(d9r} z%?gPRE`z~zo2*j`hZGvz_75sa$Uu6X7$0UqdN~3U?-moHp%q?XR(RM&XqYKLJL5Vw zw&ZQmzs$mjPI~S7wWK|PJ`KB_;GaA5qPY@_f8veakjCUTPM|!wh8ri&84t6bO(Z7( zu8oo0z)=Md7_NP~^3NTBEVWY`!X28a$gr^2C3 zU+N%vd8C%aDX(#ujxW3E^(gg0aq!kR{7tX38>{$r$Tp^E%J(`8|*CavWzo0ZyuY&*oj`JRcH!PAi~93rjdloAb!i3VT#DbNR4*Kkx%5 zvIXKVAL9IVXEElxRVsVgQ4rTT9Xdc*bchRi6U&iW2UVwd1T87J3EV`EHCC9cKOKr} zF**{4zM-4EyH(X|rdl_7@Fe$qv$7fVFs^F;2jF}9TG+#xll;quflx%VqD4v;rqmLf z_u{3>$)rw-%`G*AH4m#sUSg6lF0yi-_TA^&Nr@huLo0%m!WKC$<8)wnY?#+YGAQv1}s>W~btHN?MEJ?w_9H@!FyFV|8W#xMXF*rOgSx2KK!UaeKF3 zpGH&+tEPH%lVfS4t~KSs-BogeH#^8zG`ra3ntATS^V!U;tEB>48lD?ng$kK3%3EXbvrGwSXyI$v@6(3HM)3Je=OhI(((M{8#S_1y{M z1eyE>F^!`J`MYhYMs5ilDBEUC->zuOFq0Z&MB)0``kNN zpQaf0g*c0f$;m-S@YSW(KO}!0Qm8)7wb$9rrPyb}H!1|DYCc4pDEax4*VefQ7p}l= zUhgmFdbaxXku74*yQ!T=|H&X9`Z$9mJ-^#`NFtJFKt(5r{BaIdR3F7IgKT!0o+4#q zm$`G_O*Bx`zw7DO1n#d-O~*G;^Sj}p{$4HpMrzdRC#CQmFFYRT`2Z#-Bw?!Nt#%1$ zA7Ut*#p}p;@uaS3T&-58a<@3fY3BdIyho5Q8Sfv43h?dEJqn{0Oxmzk-Dfqh-VQ#+ zbVou4-4W;11m5?KJm!&xTD}BpRNHk6SSS}2$>wv;`sP*kbr)<{{`PsW@V(qqa3Q!?jamI|hm>|sxY_q3SZm6n%QMZ!!N2xn<%AM=5(IHo z=U<+WNz8n?sFJe5(^%~%UzqGxYH`-sw>Y9zmBxNZGcKP$wpexduAr#M!GKg+J1SRV ziZ!PaFRiW*UgzxBVW2=Jd8WdgjYoG>VLE9qvqtxbRajafpQ;m8Tw7m2_PXEkFR^bq z6ET!&rQy3?g?Zx>=Gdt~L=8(*gJlP{gS?GSH7 z$jwo18ugNc%-b&1l7{T3YHowa9L-i`s5_-x**#8;G2r>7!aKiL3i`mf?LMWzhthIB zPLOc^&ja6^29}I;wTj)_9^SVvXkCg3o@^LKgkx93LIP3t8yZLmR6QL}y0q#}H|baU zpg=FQr_afm2_&%7)s3!dGWg2R0o&?ROHRubHEwb=LQOh_ze6A2TBM^iS##lFHU6C zXnhi!j;>b;Qqcs-Xg&71h+gz)xvz1z3zEjN;5R`@|I`~8IX-5jR$LKRK^$*7+UNdp zVbm~i8M0^PDZ8{XXAk(JnE}$l1a<3&a^r9b`7&wA#UEk`ZrbblUX9D?1JH!27*bGR z(w*(AB4?tfw9YEyI`d)Dof>yGNag$%ROLyn&e(`PDRJ0&8twHRzsNC(^!e@%mhwU# zcK_&^bTL8@&KdsBhLvx+=$DX2*`Jtl5wAp-?b`d6f`%*F=}fo2`c!4kXBsorIQ%0m zhvaix~Z$B zt#*A?*qXB2`K}-I%X^wwAG)JuY7)r(Yer4ag0nNpT3LOA?aXOqklvM~<@lU+9AfSp zt&<(ElbueRts~#k%6irS@X5|Isf`q~Zrf*MTikUv=eVpt)IEX1@{!giaUR1aHx22QN7 zr*1<@1c062@*9K9=qh3}4D(WZu=Xw5iP-gbZnlesvQ5pt-cY%Qo|DrPzOTP+R3y-5 zfuk=0H#*yXcj07U?(SBE;c#SvjL-9^N#t=s$xL69eYI?J>JoHW?fGNW{{v_#`@QSu zp_8!s{%!Hq79?@-8>^ zv!ox>$}3%(koUx?eqKMHU1qx_o<{o^TjMjZq+c`OG_pe}s2&8T1YI*;6y5^wGz&(E z*RE{cJc%04MQ_pXq?MX?ID6j~(*YIGKvjpQ?=lJse?C8-oyY6IeAkn0NxnXsQWfZ@ zX(l^{s#J2o?Q8K+;6sq>+CzL|HXe2(!lAdKD1_tPyEhxTO{#@$FJ+N5Mr6KwhVsM}Wbk;yAgW0uclp;ouA|DG^;wZ3)|z)W8tVL@ z$)@jI(4$yM&9Dog-9|BMZMUB$Hf8_>NLL+w;;}OkWmkR}zq#00`}$JnpPM_;6-?6_ zQ6#XeCY_tjW}s0kve@rkZ{~$b7kyxFzO_I;6adMqg0Joe0@yw!6b(ggPs<{qp5llE zUW7yMQ%tR;G&4XZQfqN!p4beOHUp=JXrCkaeZpR`Dpulx(jT@Nao_Ai8lG&TS>&k3 zqw-8!5KUDkv)Hc5@(^lptaY7SMeBclO#-eG*^x8Y^$;SZ;4MN?&fJRxo~i( z7NV3LlLhNyUMU~KBkcpbb+Qe#v9+{D-d6n3U_u5U=`=$vLkGaLfEsIbjffZQ;rxni zgzMy@i!t2jV}@p%^(4KE5J_8yl5i$2+wltt9-HKk+cU_url=<6V!h6{?BG{yONod_ zJot0?PJAcSsx6x`QQ^b2ycCm51nXcu=8X6_*Dw(gU+&*-yvE z$784mN^39yO>%?z$JLa}KV7je1(G{u*p=M*<>JU;*>adqhg9SW9rxO`xnD}HBzF=n z_Is8*mtqJftzpzn)-K7cY;1J-GvJy1t~{?EOOI}EM<_kmJcCxauMX*Oz?j3 z9+BoJBOw8^#GVfdi4+SRCQEJ&OUyj4Yjjn6#M#ntax@b$W!YPq!S`(3y;dJl-a9h7 zVKU(#-h64MTo0Fr{VLkG^9djpD54wt@vz5&P+S+agQ`FI$Y1ZgmLE9!|)BQOaWZD}#m5<)Wo-uRTPKXfnLH z){kh&$0P10f}HEJh^7(Zj-%Ky;=ww(pF-h(IeJTD?ANB7I!s%JhPat)W|xrX(vILS zjt)+%?Asaa&r4a6y19Ug)mpqw=z;i1{V!&&Ie5}}DxH}MR&Dc4nlZCm`>YLnbTgf7 z{h%}F9JGaJDwtU8Q)Y46j64zdaU{XYxZ+?5l3?CWOuRvY*kSjuQ1U<7tCL!~jou46 z#3|lkxS7a!eAV>E>yF)SgKF4O+4)nio}EZSex(Jx3K;EObE)*lQ~0*K6qBypc(k6n z^fUXf49`M}$h=5o<21r=A|J}h9TH3`Fn}=cj-Ac8p}pw zD<=1+pYOS(O!w`It+-6(q{`kT>6=Zv^{nRlX;Aubcp+FVbiU~jjc@w?^Icrj%efpu z3gVSfDCxhfm(a?S9mn;i?m2z;T7!MuY?)E|gm70bkMmaTaNr$%Ea3ROj(G9)v>T3q zUR2Mx-vO@{9TLgbK%+fkn=8$tF+f8Fd4FNnSfk)W4WG9@qTiqwAG( z9%8gXB>}1P2Qp}XPH_2#&U%+oq{S?QQ!7T?^A&gB8Hgk*Dk-JEj|z}Iywc9TfKO~@ z+ffm>tWjspB9LdgChPHWh^=f0;r5U17k@kDWPwKtI)KYDB(N2D+`SQau&BFD0vym_ zTQxW*xCzxBe^gk5*e0=6NDWsSL7=rl#Sffd(#cN&c5iJr0PfW~QWb}3H?25aHh&a|hLbBf?#O^0@6--Rc7k>P@w}pk z$e^CcZ>Zg8qd%>8+54QH3Z+&$FQ(j}%zYiyi0G>Q1UOympoN6y*p+lw?{UwZJFS{w zl0;;u)_ODyySr-%)T=jd+bZJJm3b`#ug7_Op~IT`Pa{1_4RY!g1sO2#TZt&OTeTKf z;HS7n97Q`?1P&n)`-A_ayMKayAI%c&bH66oSj(D)Pf*%Mko{G7zMvVQ+J`d12MB|n z6X9#4(=g7_CA8IBd+_te%V|c0ev)U(;z0?;IEoGEYVT9Iy_Id8B>N=-0+T^sin-Lw zyqjgM#F_(gI)y6(>m~5?RN)AR>UZ1jy@h1?c}cS1RDZnRmewC|rJ)o5F>1DmX?CFa zo5F1hNkLWdhIDXmWIsv=p!U+H6qMqcW4X^hCD9$dyKRBNxJHdfd|0k^8ZBU+VzkQ(mS~=jJ~?bs7M@aXP-*t#N{z?|$}}bpxhH zOQIvYDX6>G#+sPn$yHixzmrur-cjl=oOez|nk8T_8Uq0^NDVXW{^4~ZW1^7D20c^$ zF_&?e`pgnT&PDoZtSOp|8W5u;0?A_ZzOkZRaw)B85{>Fi&o^XWj4I@m%+t^~hS};^ zdE3u&QVHSkKTL-ZPrpKFl-jk?9`45}7rI$0?LR(KD`JYfPh0t{JuV2iQ<)Z3#xJaL zopx53joZ#4<5NpwBEJarFI%#JxjsXo0Gh&20kbScpq!qU>NDeQ+b7{W-}@mmJ5bY9bu_JrwXqo`k4V-VZ_Md|Xyr4@C#) z1y+$=Gfxx(tRwoE!>Npa8_8MPNio!IcyS0OL!wh-?lmT}C3Hr)*rGk_1{{u9@suj{ z48KKIpPJO0k!Su?tT$MN6cED^c)0h%5vAU$PBGv8i_rSlt*p+7m2=X$hX8e4gb7Lm z3U?T~<<%JuL}Gwl-gz#t>}dGw_*&ui_)i}5T{FC{1i1>*zhSdiA+JF47)SBH^sWj&;|~V3eHwUiIz9(Hf>dH)i3`Fa zhcNS2Ph95(DoI`6d7i@NAq&au=jQSc7*dTs!@gO;eK^jLg94Y=OiW~Jdjysio=WBm zmvMa(DDKoav-R8Y)eZA-cz^qwCIE74ZPs9YbKEW7rITf)w9G+oE}jbR^g*{4`|Jhj zi}k#$Thh?U8;wF}ig+L0E8?S@d0Mcem)V1&;(Q>L(#w7tS0lz~1@T#pUe$rJ%AKBZ z#AXQmxh%pA{_aKe<)-C3z9}y+>%`)Z=#=QVQ9TyOzra6iq+^{5V6i3|3P_7r+@*p< z#WpnV3btS<={?jq4FO}72#YJtu2Uri-GBM!`^{SJ zz!5H6^+IDMMu-)+g(JnL4+6Fq93(Ed)qt2;3&21iJ5qIX zCL9iIxp8I#7}@G-0vCI8L|EQK?B&H%4vGk+w);klwr$vuwap>g#R#XRy~#m~pJeH9 z$2`w3#jBXY%4$`uv_>h{bvlFny9&%ikNeQxvG21anRwPEPnc`Jp zl$o!=k-yTl#v?9c20Qr9bbK})hHlX#7ZhuMAVeoBQ3+h=Hf!<<$W>*fgYF+U%b0 zJ*DBILk!;!_@ycLdgJOe0ssB2s8hJ?K%~^akHC0*D;I|i_*JfmiHtj;jIW52&=tEV z=F;X<{ZU|9Dl4<@>rjwdQC4g``J6H5T99~BFG52=cgZ;^ZuyHFCGZwC&y4*rWG2>& zRA?9_8`v|9j%dlKsOTTpYvqaYQCdZ!7gw!c3sTf7GB`$8aEWW&oStHK7|B#!i}*YC zJ0zhp57?bMO9UO;1PKYB0dy2YSZE#7Dd&gxh@?k_C$Bj(XN**HhlVwf167Oc-f_!qN^&d*q87@ zBmzGiQmm(?_377}j|F3+@yvy;b>wQ+bp2t|OyJ0){W1*bga7F-FUundKXq=htvy)+ zk@&easK}P>f$~q?+b<7%Ff*`_rQ<%TSK|e*ttJZ_KIgn8z>mBB;O(Ehyw8Azg72b}|8)p$TLHU_#=FQz+n`3;uV%;9yk@g(Bq%OV- zZuPsuYL|FN-n)!$IXhI#D6%1aa&v^4AX8x7~F>W z>GCBdQ~}iPuwn`%zuQNP-j|hg+qD_buQQac>KcsfKpp^uq~})5*J%^)L{zd%0C8wL z->t4C0U&1R7Hzwsi6*}F*uUb|Vl;L6GRm17HiTTX}!p;5zO zo6z(ly+WkJZ5)S1#5?8+JIsIUCs!i)|+>A#HrcQ7f!vEJGMvZ6pJ?2ufM8 z2$UDs2}M9<@P^rm?29auy}+q{cPaO#Is^Z$9y#vbG&pU;vZL%JJ(Mw>iZ>eSdlY&(BxM)zNZ;#2L0L z%DqXZb-~^g#_TUcDs$5ZYlSrBa#U$F3p;Wm$6Yivz2c8?RRgrinN~Z`$7PFI1QGbY zrD_AI@pbecU4RObJ3?yD4&!_t4ta zRl0{PpQbxT4o!%}Y#-FFg|jvuoljD+NJmzNt7$sB^psHA)?XfQl2o0>>?1gEdB<5c z3yYYF8au8yD@7N; zWXTCFv#L_F7B{l6l7haZ>9nO&_;}9}D#(&Whbuk7Hu}gbDjZ4(e*5KMKq^kDdy8r18Qj`Uzc_mj}Ig@8M4w zOYFc?T7FG?F*6w7e~FtDMgW~pSg}7*?#f&@O1GwVz^%x>zY7&ZZ}k?vrq+x_tv@Pk zcYfQ}jML*R^bl&dboRhy`J}hD+8!4gKF!?fKNuQBw#X3!JbDFMdU+Jsj{=y_4Fai+ z$KAv}>+yU6nla*y!ogUEl_ZE9*9Er(x?+In>=%ZCnJ?Pp$$<~>P5AJRLdqphL^^`IlwOT#OM;IJ4WpYb{W=^Ea=1 zoByT*_@8HK)lFlSuWDnOp1^9^+-TEJkyc_FuIgS}e%8g)^2IEB;LhF+Wq`7O+&aXi z$X+`ljy=E8alB1$AsgWHxBhLUA$2y6yIT8_=rf^q*5O`_>8H<1Ra-`QSdo;q@D( za%;}K9sV}ZEiG7I4lb;HREhmJL~~d~IHI@orr)ZM+Szovu#@ak)r&vBc96tPyl?QA z^5Ktc28@U~$dv@(zO9izZm93z0VnUn&2HKF?8HMF;eDQR6L=cF$1HrWe#Iwd?@;9l zYhPD@`24pI7h42b+17qWQ?1ZlWVRAfzcI$6C3dflxnkiUNK8~L=EXuP%Cjp2r ztu0O4w)eCh4vxmil;3-kL6-+c^=u$`Z|g~GGLQ@E1rj7c_4A()F4s!o6pyi0EPn&{ z{V7Up#14(R^49T|i?>w$xx>}dG77A9bggGJFR;oeqVIqn%nPLd+O7Wp*MguhpDSvs zwwNOl0_pTq2kNq-K$0J=67*`+g>GPu0zRo;MNo%Fr)FOxZbwt^0_lIl=lq&-Dg1_3 zP@gEaCcu0GdKkrVu3Rx8Z2O1hCY8Bb+7b^K4wCjo^d)59j&{t9^nYV9Z$;!qSI^Gb zfOv67HBG=a0D(L+!Io9mQ-2gfK|~~7%*hSd0-2W9uiFDE**U+%kbf@B-v`uxH%l4V z)3}jdxaxEC-G2mkX7Aw4#;S;!sZ=16`lm-IehUFYy?}~vjj2(W9<7b=H?!)DOq1Gw zvCqNIMV&?XSS|-p`(i$CTB%He!~NmQ|J(>abSq1^+92?c<{`* zvR}Fz1lqlWpEjhPp%1qf3st5EG;uSpdIE)0kgs3Likkeu54%up2jBod@oQ#lc7gXs zkW86C^t>ShAR$k!1pd;o3QUp2`W!n)wNVvHi25^6+3B4?hS;<&UzWx&PGW01$0sgA zwy;6|37}r=*%I>&v8MEJY8@#K)c6O#r0hoe8Q<(c{ZN=q~%lL=VcDFdh;Vgde;0hQDyk1%5w%g6Rr<2uRu0xp(~eFy zDGAapSG6iQPcjFmlPyUf^kzv&|3aY2zrkm2-_-JGjr9z0rO9ta%1sT;0Ji1{_v+(K z5|;c*H!XOa8}l|)immGqS7egyouJ0}+%$Z!ov?*n(#l0r7k=tZGP^NSyJk1jZ~ZLG za9YdE%j+QXdP8j%)iollAJtZ8l8Lqb)ume#xk47#;GbylcECL2#{+F^pbHJI`BM$( zQS-Q9wZ*(ji2I%?!dEJjn^ScHdU(cm-Mv?D#-XVq-K~^RY$`wC0PrCub%L%5P58@r z%TVl}_#%rpliH3k54*GC@GhAs(Fkt^5?&mRbO7 z;laUgU}Vr42J^%=5_0roByz5$YS~ z=+@f@Pa+-Qx?WqejFi^rkd(fy2AGPoQ*@d=&w>%+)RDJT2@zh9dH!2{ba63HuzX}`RM}*ZZ~hcz{-l%L>=g%` zX=uV>U^*6L?E+P0?*lXiJ*Wk1jz5I|P<$n{Xz7T!mCTY3ANMdiX+se$$sn7Lf^=7l zCJkzQqh7R!1o!4R;ZMJMXEdsq_pp4_Nk4#*9UlS>|Hcs0x^T-W2&D$n@CobP)15m} zBJIrm&4Bpgj>FXtUH8gkqo@*Fb`B;Gg_gv(8{G5NeB|Vf5`^Gp>AjP4#OmVyOFWfQ z?-baIR|o?|gZ+}!d@>*a8U_$=@Jk{E#KfOh)9Fh-nu=lE3i3^+@=y#P<%K<0b7E6- z%y3qGofO`%S~##{h-mYEqSW+2hx`Zfpt8$+vl8iQ97r@+7fUI5tYBWVRPOSx(8E>8G7~5$-eC6J{gIQqBCF z5P3ae??}~20o4$vO0aIiAKZJ$>4-Fhn@%$GL~T2mrAF;NvcJl{MPeR4u2?b(O1}%Go zJ`;H!*x)nj->16u2EMw&v!&^Rz)>G@a=I#i&@$>QcG}_rvXfIpYl9V5!rk=T7vA9u z(na3y_Zc4v3$Zv|Ky3P+Z6hmjRAHnI(CZ*5Lx+JFKsY|arfF1}LCb(8ajGMMbMpmY zGFnzkJFKd4%o5Og2#AQovM{D`63%4}HpcEs*;`T5q;rwjW5CsA0JZlo zfU^sfw3hW$k~HrubBxYc3-1N_N~kp#zd#*o$%YM!u6gbsJz^kVnZ|YB$n!Dp%eO|R|N5iV)$4WNg&tjc`JOB6rKP8(NrEqs^m1s6*zqmtAyA7}~q|z1v z(iEV0E%y5|!0pAKh;kQ{#*~Z-Ky7#}X6lJ}{+L+IHV23Vz}#GT%L4x3`AbIk$=DvC z)qO{{Yv?m74|u{3O7aL30(#y4aWAfbdEY6DGtHG!5ah{(NFlA zeYj6&?X*L#yUIApYLEZPINI|RhiQF7E<0xBupqy6GSm!$-bmWmztju1TS>D#4@}IA zn&%l-89$@LxlSosuV(j6wTL_l>Z+z;tOR5+Mpbf_X@_SiS;WLS~Ejlf>qBZ`OXj#n^1X9}2t+;qL7Zu_u-Dyg$G0lVMKEkOp&&3ryF;T|X9<#nIphwJ-G-^FMNqmyF9%3+#G%xCnB zLS_akR{@I3*t$`G)@Sr#*n+w9ZNuwp>GC+sq$wY(z`eMcCK1zv%6nQ@ho(=0f0;w+ z9i!gUju+%FM7w+_a6<}naSWXfAvD?)LW*CcDMw`QGwP$o>F0kVszS7SRKw_<1Z6^2 zg&e5^T}JIX4$c^LO~C1wd#vk9)mILDGn#<{c#;;5bm=(P$U^0K60LVEEm%xbIZQH zNT9Z>`x3JYxpMC)MA)?BJDoYX;`oa%A9pT6M(hYb2BSC+G|gxt9CjCc#PvL}>Yn;; zlz6?h!|1qd1=L1#SZ^@4pzPpF(tY)zkOZ z%)rCL#GT80cC66?kmWB_LFRK|`knJsY}9-drV=z<+>-c~?y5uB+Xtt?Dh?JSX*OAHtHV`#+T+zS(vN0nm7G-K(GL6`C==~o>^(j(t4MfibC_;YX<6?x(yl%wlXvvDA^nUXqVl@1uc z8Z%5oi%PhD7-+q3VSgG_gA2?1hb{$+Q_QSc298<1gQr@AtYP!Yh;h@wtwZe5lIRjt>F{;ee5@UUTGL(;v?^x*dm1 zc%}`(=#?LZ+r5brRc2m;EEy+~CS1}F^fEO=#{;Qw{iX`_px$(PQuyJ$ z0-SUQcD(s-1~c&Mg^trZjvV-D-vpDkE!3|vcmsOES|7N-c9*S{gXJp=@0kAuL|fBc literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/conv3d_sequence_parallel.png b/model/train/yoco_moe/sources/images/conv3d_sequence_parallel.png new file mode 100644 index 0000000000000000000000000000000000000000..eb62c31b6f33c3d9f179503be76ac1675e101e50 GIT binary patch literal 161292 zcmeEuc{J4f|G&DeBrTR3q2;y_x)nkwlq5?rCfi78WGscTFGXmxl#8MvOO|1bee6O- zWD7GGX2@`F74-W}5eFdG5>O`F^6WtI5kP%+1Bc z#jABi;|3SkRswd6maQXcmU{PXFC z1_QeHk3T04c|iAB|MP%{|j!68EsH|(o|37-i z`$|v6{H{xaN5jbZRqGp5A7g|xq9fW-_^NSlc6n^Vu%yuvMGjx&Im`Fr$mCp~JCzN6 zbnZmHI5ITkPA>1EIaP?3xa+o5&E?UjTu^#nypKB5-%!~7>QwS*rytwF+j_ea&X~~5 zD)KC3S~xsDX_;o>xA+nI$QR?oZ>**puB_0)nlCAQjzId?e?iZOdeA1H&H6een0h-? z@?Ut5{zXK)IV=&o9Tv@zF{w@%-xJBCZYgk}g7GrCk5e4nbg_FBI$zb=N(;2GJjG-` zSh!^viSbozR%H9F61%6!`6vvAt)W8fhJVB5J1tOD@FU2@G)ZUy_CvJXcyu#*K9g64 zHd(Zc%h!(fgO*rSi1!_jf*)oz-yKMr!{sNWxM2GJszSTL6iM9#9zTq)Je4*{htA?P zWkMy7bXN8-(BR4rYDlr7cX=h4LZT1F`$9lVVb?;6pOkBv1%EgPlfhxQC*va_Wv3l zb16C^lIk#PCsSvNv}k;Qp06_}bvv~eH?K0`fX)QLI%8yle;aGwjz>`X`&60w$RQ8j zT@GhyVqY7uvgbZvft6VQMhl_gkT_B&m=s_DTTI!mHVj^@P+ALXG@$H1rvG zIy-H?P=dL4Z3kA?-VxX12CcX~GWF(oEUzzJ2p@D6Mxe_(g@N0pC3sh>4Wkw*#}MsP zD;ssECe@E*LZ2jI$%*E(RFT8l^%*vqMbo~FvIL99Npw-E@ABvG^?TeJSc>faT5zb8 zFl+-ema~GQH*rx2P^Vo>`D<}ALRq1_S?+s06 zI_4|%$^|7X+7Vu!q@9V&+4+%pmIczZKv8-eU4&e#21@3-^m_E>$5I`LL?6wO^h(ms ztk`8m_UG>c&^7NJ^QBEvH<%(b*{fF?Ns6Y~(N4!w70n_okRp#r-Ek15K3=aoTG2c& zeCiKi0)i7f-Fk@KWO^y->ASla*jC*<+N8<=+AXz3kxjDjKeA$MhfyA_c?O;we>7bY zi9m)sQjdTqrK=`{B_qSpL>Udw?{{(eT4Ex9WsZ{%rbHrMB9NK#IVa?M_W;H>h1>5% zAXn(4q=M#ou~TQEO))Pj(QZrTtRW!nXO(G_6hP{S#^INva}0oNegfP3SZ`Or+`Ian zHugk-rA&+5=1+G`km!xKvp&)*6Y%Ky>JX$R(1Ha2 z)z@=*1Z~C`Wq&|9VnlyOVnoqKfKGSao9ju6?D3~C$d)YhvAz?URq|+x z>^kw!`267-+N9^8^b;U4gJsMD39UK#kir8W*R!7K(wClBKEiuL2^_@76+Q zvcA4;+!X250?gO#;cVKZ=`*U17p5=swy5`5n*tz`RF7 z8&y4P?7V9ix#}HAclyKnXaI@J({*~#sPc}LQo0&1nQ%>(M*#ooCOP>z`rOQ4^*5X+ z z*d`M?LY><20s@ETs2Qbdn|Z56QzWcfq>)1uxS6s--lI2I5-PS=k&S04l5t4jFqy&; zz)1$10u@=pEXPvO^V!}=NX7j~#+!ZoTyNaUquEEQn`%yD+C2xMU%`r06|<4Se>`12NtC0gv``a@7kapL2V<+l+VOD1Lq7yj7UCa#R z9uP?M_c(C@@f0Uag{%eM;P9RgU(;O{jfu)ERY-`06OS$0h>mfeTHfzz5BdC;Q8s{nZoBff-XNP3K8pOX7RRlhxtJfM%%uxeo#;{ zJM{^;(HpQJD3qZ47ykkHyzCeUH;|1NhaFt#Qz5tkLDm2qY{W~?Rrn&NVl4cCKTOku zXMYDlPLtE|Ae<=yicV84k1k)USdj(+C-Jl=X2s14#EN_>X2QTnKzk`;h-l*4A6qaO zx;qGJ>A*0(qk~-mwe%Lvkg}Vi5`1v^D#$u5FH$$l7lecD`jWZdE1IRd(U{K98=*B8 zgT|cK)cM#H6~c)7oPiXY&;%0V3h0IEaMcGU2K#L(tQTsO4ToBXuL=$xQ1bCG=`z5cJ{tUXP&X2~;0qw;p@}kaMK0 zd9_|K&a|-$O&r~%)Mjf27>Dcg3|~{s96z}1m;j#zDmpa{MgOA~*&vQOws|K+ql)>< zASkXo1k8a!9pE5-MDv(28=O7$?&;g(8gf-2I$ln7T#!tlWd)bcMH12T!)LR91g%rg z!2dq`mX*)p*y*%W#BO(p6_O0}JiZEw(X|2NXxqbH($Pd@X^am(KeN50xe|&1p+`P~ zGhG{i_$w~31tQo8d?>NOjk71WAYkUwoBES16-KKxp<<{(%k9y|?m&s|_XEkj+Yi<6 z+=SooZ8Kg`L`@yDp2C5DA1KQ*V|IsKuiJ*m=cz_L*!52t#9Dqcp3G54c!htE{wftyY^jfs(IfFZBDJKdj!~yTyejJ>Xp!W zvQt$_Bf+uHZjDV{I~0=|E=CVin$Ee~p4x5UzamiL3g;1$E^uj9s3YPn#7qA0G+C7* z^xlp!cYE#2?1aD2-pgXstH|Y^vy-zes^MlY-v}W*s+X8Fq;`ySsM*WnUWy1f*Z~-j zj%R?={N+aJ?Hy_#CHFw`!~Z7;;0eBKE3EnHGJ21xQ^j!XDz4GhT+X1iz`i?A6tBHz z)W0V@+u)P{*>8EK&L`wZDYX<$b34D4?)f43cKwbS;==^Lr4y)1K6#2qa**0VmVD1( zxazdDxUAh@Y{!&1;`o{7bG6E!tiqK?M5Y~ki`#S+$NzATBT)U6obP^@bE4QEdvh;9 z^xpdoMQ#q?MYZTvYn9$RE4*252@ZMXtQ+J8kGVclkvi4AZ}rc^m(k|3O*lk5x-^tn z7O?aMqQuD`{N^axQ^Cz4eP!g!jrMYn{zVnO+nWsCcfP!tX`f){G0b%WfSD)-T(#uZF!2$VHiVe7Hp2}XO zS*$rfYylbh{t~1vc3I@io#$n^=`?QDK13ppLLCfKYBrBCE!^)G?m@fI6f4`eeLF?* z9t}qrkz`2j*eqH;k;02PyUYCJlq~Ee!o3geM)4f&{`T>iiy5m2g)iZ{zrYxfE)T10 zMww*8qblt>C8*?dbHBymX<3zp{=l;HVHVz_7)5(O$gv?I95{s>~Y(UH^JOH41w{*F7BHZDh?>%3$ekS4iKk!Ip%)PyMAWJYGB{Y-L=HS zz#ne==@0pNxe;f}?ET=#4n)$gKb9QPqwtu73PYw=an`s81eUQaFw zRb&slku``967v>FD+XIO6e=O_rgF(pLZ5^bga?JB2cs2he-$8cb9M322tYzBV&(&i=@QK;_=kD z@5-w06xWa~@eUvwb!+_Iv&yE^N1T@D+Vu_-zGzhkN_?1uH?}T^w6Wj@LFue={;aEKgcKIMGZhnvW+GAh?`?@)DSJQY~Z>3}T1m8GehM10k% z9WvfCRphBr?bo9seiFA#3v6GY$G;w_^G+VPEX!w8-l9|6FQMN)y34o9n_fZkJ&I9s zcau`|`C5g39q(UG%FL0bAm6esbXH**p?+V0OX&gS+=|92=I>r7rS#~9!MQ}my}ned zFojP|v-`rM0mZK=EsZ6+^y*E4!YW;wP6);jmrL6SfN);;<={45YtdHkq+1oH1V<&u zWR^v(K0ICjV4c-?MXT48Q2nJUAun+$Uo1 zOKOQ0$vNqxcXLu*`zSV*eEht$QC3%2V>$h$O2}_~=kL|A_v6RqJcEXz_#h8>sP)io zQ-WHEI-hb!L$nl4MDP!_i;Z{FFe5b&0(UCb2k#V}P)mHW_n67b64Tvm6+{V|)47pC zTTW~;c>LDi0^JFlUgK^9z680p*y3+iJa#?8q{P`qX_-+w7<{xvg`Kqm!inbHexH&f z>^jn~Pv<6!*F|0#6KoPGx+(CX?P0!9L%O+Jw@u~1EAb$MGk3n+W08S_Hj5&6WEhz1 zC$nHNRG-l7X+y#hsZrA!y2NX)gWGgH4@m7|-O zNn!uC^W1yoPj4><_LjbFDez)WOy~V+^=T;mRfpABMJMXI2cCS~zQd){#puGj#)5cY z!((1}Pw`i*o>-WqR#lbZiK>`wZ};*fB@6mip8ig@be zk{FpyIUYfz%@Mb5eo|_-HlNAgiCz+sBJ|5HRp8+Gc<+lnIPUktw5b?HTs8vid9jBs%58bcum6+Y9upBEyWa9$kJz9*< zIu)s;ksZsLd_%OOB!+e8T5as~&d)p|A00c5ivbpJ)>qW7RF}|JBsYKNw;!H*-2(6O z;npz>%6KcO>QW|Aub-ZpbFj$y20!<;*t4I_{ktqfruLW|U)J9Sdv{ep=!!#k-Vx^} z)hM^oH!fkB#E$(1+`8qB0#9vU=ig~^j=5yepVI%C?q#0Vpatu_drp6|7~%+3nbkXJ z`Y;9vf(|{Iq6cfe9CE{p0yn!KLtWmyLs zM8Be!-ng1mD}7zw^uA?Xjj7v75a}y8znMW9^kjW@VodbO5mM2LODegIZG#@2Xu%g|P3hS(@4(AG_X#UK&p;dwWs9udSSM7{SC!T$X)fXU|OE2$Ry=U~j)I zX*gZ+hJa<804j;(F_~Le%J$MxPpw>AWh>NF+HzlixgCKP=_5L+z=J$XuhCyiP_D*E zPa)#4eMx~T;fOdECFAAUXf+{Thb&q$)pT=Q={49(pw&7Hvle_hvi&dJWC*6vksR1?J6=F~; zEYqlIrecP|vl*}KHFpmd?e`6r>E$$sS*TiLSM47t8Y@d3{as{!(KkA&->9<{iw#{7 zdqPo#1xvgS_v7`#&z+m8(NqqWP|*w*!w~yn)C|wrk*U)uxjmkvn9+^nW9$W~xs}uLJwA^!%BSO#EPe>-PfXPQ?n3ncFN9y)gX}^(g*e9F&z=%Y(S@ zIzw2oIF>zd1&RiDUE}7yu^l1sc--Spxq>I}>aKshZGSJd|01iZKY+(9=+;T)V?pL? zOQb62NBa;LM&R9}LE_vStbW_R`E@LhVN|y8Y1qN2jr;l#0?+(hj3(~S?k$(-@R`U+ zI}|gLazSSxbVQ)@UITVx=U|{Wg_&hebAulAAhnXNNd7&E`k;8to+2c$pC@_JVXj@r{1V=IP#vo1FUC&eW0jY3 z=e%5BOWa;js|875ZH51Mwg-%hE|m0?>bopG`=LxXs$!{^Sw2_sOn|PSX|RAtYdtIs z{z#O&5)2oxBiD)OLKIh zs$v5>l3UWOKfj!Iu5hO;N4DYALv|Wv+&BV1m_bU~b(#J!VA|ti>X@g5D=bGu^wcX&b}axU$;$ttGxjeA3~iq_#KPB zkYGM{nz_IoTw(cFavVjqX=NOa!A8L7X~DThZ!?E88(^&VHw2SAbEtU=501~T8gUkO z8r28y&u&Gum^OB{_Sv-^O#Q&l20F9kmmy(1@V3TPN_D>C>ryK72E7wUpiE33qj(Qj zsLq(o4UVgno?KF>2L~KjTVY^+kO*hO@sxCn4PbE)XV{+J7Kx^s7?{Dg}-RY zme*mR(s1@9QQUudpQHbfV$}}7%)FR@i!d{ZQ_tiL@15t~z`u07;K|Nw%f}xxcw_|b z9ZzqyP%Q~^#_l(`nNeaXaPBGALiTL#NZp*sRXH)oM9})F+L54O$jsf`D(V^m z(%i!U7}K2QTu7&9y=3M*fF9Aqh9&mHPGRXu z*mPN^EQH0R`)P`M6FtAfV#R`n39g+NlPp>pRb9L9pB$4hifeSM+mHX15iE4`8-;1F zCaQ=O#bw@>okFwlnYqOB?Hqk)*^GXD`8P(9Rok4OC`jiVJ?_v~AUM>^LI8pwy1c*T zKB;hP?22i{9n!c)#A{CW|KMbj1RQ31g6pZg4D!6N@J-mwr2#>wF{AK+Z9!Vx+5CTa z@ZTF-U7~3cLWMLkOblDhYpcA}$zHX|m*Le%CA_+Z#w^1tM&qwAu(yUL^IO~+OBgjg zzM~GzWY$Vzh?sL!07enrbKN%!s~wc99=zkeK$O?AhmHV7(ej4nTMCj_536us@Opa6 zg<}W?mSxn)926p+_yGU?-6%fstyO}B>$9CFnQP0N@Fu|$TV#gQcE zl_lw=mkqQJo9MnXsJ4fR@q}q1`XcR5{?b(MFgP!BQs}RB`!wW+8S`GD;j1pM z5)8^mPcFUT|B!k0W+x7-UpZY$>mBZy$4c}#c_*^qwWf7(Yrg}6UwITjqWl&T91hp+ z0%coH5(?qjA})1|1LV(}fm} zf9cS#trPe6sZqa3Tj^wHkzSe}$gVRebeS7`vT9CnG*_C;sr|M^k1{GKO1wLEOG;ay zZf3-T8M!X-Nww7)y24jRG#=i3lKq=lP&p!JLWmo+4_Gj!uru49X`FJ{Lx5L2i!6Ea z`A+xQq){AO!0lsZFgZ;AQ33XIoGdDOP{prtX2{LlvE1P?6`3~mFngBRInoqI|86`c zm)l}a_2ornEt*+bB#eYKwG{0H#JR-Bsn0=G-Mr^DMon7SRy`L!M@qSZvkpaNnV~L^ zOjX;^=^XpaAW_}^2sU7=xtPcW5B{%1f*Rv9vUMrZ_=8Ha`?E$cI!BMc97_zg z`b|pN5s?@trk9wxagWqsw5j9gnf-J0G5yAt{1%lmt4eeF83xu%AOidK&+FVD)|hyI z;{+97vu(d?w~S6?;@t*bs<6i%Xl=k_jhL^vTW;U=w1Cfe9>P!{ypw-wxbYCg@h4yUfXob>yO`5> zS~>Q1({?O*QQ2QgUhztY*-NZR=gQowCY&+h(y>1ZBt%aD0D$(@=Vj^YmoqJ{i7zx8 z+m=&Gbzv2`mY5n#UQ^uSwb7P@1S=2owiR?7lSa~%){^poox-sYEt;^ugu@hK#q=zr zSxd}rY@R8hY9tob>n)^1$Bj%qHxK!y>~|gkOYWzTCOr zU>*`V47C%RKX^HMg$Y^s?X}`1hUiMlxyqW2MGPigdgiuW zAcq=AYpiqbV(I)C?QumT*#*q_)$kcJt??A3S}wnptB@A+a45M8)x(W?etFb?i^WP+a5LH-!PlLCiM1!V6YS+#rnA z`KHiBXs#FC4V76o>cH@ImBBw)EUWg46Du3RD$DV>kq3!!$i&PCiCLVJb#2Vr38l$D2nUzYyPG^>Ok;T%{bc%0`9XuS7 zEHp=R(LL(X`FXbq-bz&*u=l|AA7y#5cx4jk9N8yUH1-rIAvK^**M*1^3;MsmJ$OUb zaB7Y1zt+M_&EV3|sjdMf_*`wSx7^aOnpNuB3N?(3y*oy2l&R{Kv+vqfVx#MIa$mOc<^ESMQZ8#qk+}5I}vta<_fRS zsvbG%yQLK4D6XA=2~hWqZVON2ngYhA;rhcYQYw}>)>4B4V()VBjy)QQ2&Ze(pJ5ZM zs`heQ!4jX8?lw!X2t8|_+~ z{HP*;d*U4K1;SMU(-wD^?Z*iTPn(8kM4se-K0(D-%*WC{^Jlb0Sl=_t) zUAtYI@d6WP3-xE7a>}olo(F8XtK8D%$Rf2e+vU-4DHY~#;j5Z5j)xP1M)k}r@R}y*hjZlb!o8Ix8h*Y_Q@kYAA1WAfJ@}&b^Dm)~TwL#` zFaBSB!HrlmNGt*^+JE2xTyJ+lDfU+F=pF&MUePm|ss;cPn&N~L>xF!wo$dA!U@k^u=pVWa#p_ zDWwHhkdinI30o`vviAY?;~rDW_tJyZj@b7zfN2C+gny<$>UVLd`d?qjUI3|zjC>2t zsmg(%y0-kk?Ys+FAa053PFmuW-rRCYcqPPfuIm@-4(ONlhAv&3KWZ{A%v)>m;p;Em z`Sr8^5VYeSH~=3CdJ6yJpAnRLwO_ftzUKcHXmOblTk&s*1XR!Xr@_b-FiHmq@zv#7 zJ@z1*b*k;`r~}2)&8!7PZ6(=KfOv-SO-n(K{(wqgtdl*(SeiRrXiVRipcL zONqD73gcSI)lnx6u~^zdtlOw^Hn%9W&T&P`vfnIU(E!cKewI)5-ty~9e_v-?G=QQa zr*zMKdZOgM_#=ie*!n?VyB3oyDIo zII{8J@BS=Tz!60cW#ps>d!{bo5Q*#fH30C=14oz)7Cp*g*DjE5JUH$lbshH*h}gOc zb3NXPXsKk}4uId1zs~Uba390@o@?kSC|S&@=XfVvBo6;YneRB$a)|>!iNI|`Jp6Kj z(;u!WK+{_0=auNCS)&*hh2TK;d-6k9RoBX!h}rB0VwO2lbyXt-s(3_9{rV`BpyS;A zd4kl4-mUuoR)>21P529U4nU{AV0nYlUpVw@Udd&BJOF;+49DueJ;f0t&M?)# zo?P$eLIjRT{AFv6bwL~gkeZ@=nc^-0_6t*9``Wm!t=5wNx$ru0PV)LYzlCQvzohml z9Oi&q9@qo)jP(|d0`mhzECAC8AK*qvcHFf~{3gJ$XAii*9ZvtoY0`279{&SAI&ky9 z545iB+XVl6e@+9QDX#dF9oYS|jn1`+r&=;% z-?~u!0Za`*+(ZLlJi1}sVRqwB1(E>XzsR%Yyk4@6jko2hWRX`>PkqlDKh90L_CUrQ zz^4nMGTOS-1njz%&xk)YgXZB5mr|ohm$LEEXzQM6zCXJd-zO?x)LBn(Ry`GE~M&Ff=_9Uib+wgoIq>14kR@} zX*mI;eJtf$i7O2z;}U3K5}w9YKDyla)J^{g8E-F3PP+!9_2SZebFA-L<8)=#Wg#^x zQzbvrYd&B9xV7@u9Vc4b%p(A<09r93kz3NE!sU~%^m_@@ig>2@`I=SlL8-GQMg@}D z@%D9Y({_o0lE!J-%9DL8)yG+#;~a$6gBrj)*84<3?x>uHwVFWOcdz@6di@~bazVVU zjosOttzv#>olit8%B?X1G3e?`f<1&c=m{?gj`VGO$g!W0DZLOZG1*_}%3Runub1FP z)V)y+Qon!~eQ8b8E*9j1kT^+RaF|7*{megS%vzdRnjdP&-2%ItkL?L{5ht(taZ@45`+ z{5xWLnNdxPMbm0prhJ7R2!u=cs=K~h`3S8S1#oD#^nB}K7ayk>yLR`Xh~%*R*6Jwx zu6;;0<&Jv+OJ$+8a?#CdC~i9lK;9ns1vtXBN&Dy)e5MM!*K1Dv_LSbakBV=)Fqg@p zxRAmr=e$lQx8kUD!$vF-Wryn#rAxD_d{<5C7~jLr6-B}_$X!?A*;c$J`}b9zHz2Nw zxs8-qIrPCTE>$K1(rnxKgK!)&UdXY&%~0gVALSf%R8AO<~#K!cd*YTxy?3+)XeKo z?-BLSo3hA`?R0Q^q^v;iUU$G38B?Eh+^B*1_R9i$`#$5TPL%yWw2s4+2e&?V^~?H@ zxeJBK&)MQRQmU(0lH1(W%e=lbr~i0d+7z}PZJgfpkCS})xf=D+L}xNla}?x?`dyC2 zHdwYa2gAqpIuKZyA1ed92)rJr5b61kvj!HHYi3=PUW2#-n55vH>Gp~G>qEEOxc=j> z0i%;E7Uz|~RFG2-m_PZ)5pMXP&S{OALF^6=+iz7ULddy}#328US~+vO`=0g-TsY6+ zSv99Hjq2Y8W+U=|DtAxFqSnzX>#7N6d)E(q;oga#!61;?9h^IGoZgRol(M@)Xm)QO2|8W{L<0MIv~(C9+kf%R+t>nEVBxsl{lxk9|7 zI{(OA(ekCaTc_+<6SqRiW8I+yT|!fwKV;lBA?e|=7W;{@;|h3}z6B2@7;U5G)`_hf z1SO0=s%?c80L2DR7Xa6gZs<|L8)R>^pp>+CJNL$aWE?E&TJD$_~X!)9ev)=nwi10d!;mi0zyF7MXDN;bkX41g>se{Z@S9 zOJZ`%0n?)8_IlY1BL)lD27my&#tAl;A;hy%;o2?v8VKfQ&x|cxU+$;FiY}P;-Lj|r zP-byI^A$)`!J72|30|>vC-2%(3CrI?!hvoa;(X7E2e%_$_;OhKqXhX=RipX^x`9&z zk7xW;CJ^Qp-I-EkJ2QpJ-v`70acBW>YjH^LxKcp8%*mo$WWEp%liiB$Xo{l3zj&Vi zJ5;>Ncsj|k77q-6Y91KJkTl4D5j20&s(t#+?!Q&^h{vUQj#yv7{wwif4{)LZE^E~Q zIJEO-dK3j{FBXi2umLCvt(ShGVOi0ppVil?*61x zb!Z#8$_z~-=2z+Pt*Nle%&|Dq`-9VeVu?<7kn!G$bO$%hcK7R4EpWr3U%|v&kc!Uk zL{w4ByQNDf8yF6(qSlL1y}%c^A=&=#GJC3{?2H$MnYf#a!j%LqO@lFP-SK^XS^60g zm4p*>sq|Y5W$E;k$~cHi;{Wz){B{hJlS+Z4LK>0$b^zH`X_IMO!kCq@NKTBxcR#+I zEa1lD&zckMfKKKs5BMLU`fn?7BCDTSCEsiPPSUV&4~%AG!|>`EN0u(0H6_q*lc?uA zLEgSI^s?koDm}h3a7lVXNehy(2><_BroNvEK*fcNv!i5VB*tc|XfZx2lk)y5tg-ni zT&O(w>dlr7>h>QSU@uTy{6~E`XX4~#2V}VsTX%}wg-x6-vP8ZCX7ATr(RDK(g11ky zsQv^{!0lF98aoW|Co~9rXOw5H1#%*}kt`Q5O>08jDLc6tpEfTJlwk7c()73z2aNF` z>G?lSP77!{0J~2C;5z|;dV0RRK7-RsP%#1sxJ>=TfKHq#398E?-~8DyU9xtON7V2M zu*j9`W@{53#C?$HZ_@S)Bta;AqpX{jda5f0<{kq<#5@~qN%PWzP5^lmmb147fZ;*_ zV=LLk0ck5Cdq@2=fg`P7k9;5W%j*9yl3GVS`idOla3+Nf2){*}GJpt&z~e?q8rw6C zaIu+=u&#;1%b~(0b|h_?ijK;q3CXfRaH=0Kum~VsxXvi`e^)vnuE1V*>l-5A3~b^f z65hgzNAP5hsbk4UU8SkB^6sdQPX%ypQ#}eb#4)36GX)V9pJIouoNB3%r*KGxAr}dJ z|L~MMCEw*!7~wiH!3kf!WU@ZR&@BO4uoJ|O&xFVT(&-4GvU#9xXc8(cn*}avRORQ3 zf*M)<0Lq9SYpteu4Dt%~a#u@2S`1uvN((Dao;;|hY$Z=oY>M|9@#GkY_3@A)AUye@ zah3T_@{V~9Rm*!y&Q?Idgz|hxdv<=%<>bv}eSWM8IeIrk*H?RFvctbC@8gIvD}X-x zuFV$IA#&ovo}qorB9a10E_LR&6Z}eRKsU|i@a5+@(&nYb6GpuI%+hp4<$ZE2T7~=g z8z$&0EGoUm6ofp``3?yrJYqSqde`3SfB`Jw(96kf8{0YvrFW)E>*J+8CE>nz`q(>n zh?{a|UpQ$Or;c^y)y6)M7@u3dM5O}nzR|RV zq8?Zz-es%Entfo`PUy%E@^XZPUTWyMe5eaYkyrso&y6v&Bq5Z#0fcdM!~xdT0|1fc zHJ!b&!M}?sdliVppA-IZ-YON7`6seYeH1nC=)|fg;Jmigwg{@taqFs;BIUd~?J!Q4 zxzsB2?=0w#$$Xj0g37>PvfnV)6vU-jYPieU`R_J{Ugj}$q0)Lnv#v#z7Qx4{a<{*D z7A=N|Lr2#<=Kn@z$&8$T4JK)k{4fgN8R4)Cq7QdPiLYMM)b?QV=)mAm zz#h?ZiJ`tyCK4+DW*Om3Dy!`rb<}sbPjq}1lrpgfqY&;xyH_uQI1Egd)Z>LL{63l# z*rAR{c5>5EA|{1PO<8s;>VQC@mm3?5aDFb57Ws*=^y{iVX0AEDtHTrZVlvRg(zAYn z?FNh-i;eT| zxUu@&L78nz@#W^9Jm4=c0&O_8BNC?ci?x$46vD~LE_XQxrx3?e3~=@f1c7Q)n+D_Q zBT&$(6?wL*ewp8P>)r=_s1>+K7GgHJH|O_#LglL#Cum-Fs-}5!gY~jtwkEr!YWb@u zAFa4JClC%R$C{w0drOK&KI0lAE_GX%u^C(vtNfqRQ`$Wp)JCbEUz;ugnoq-)3=-XWm;Ou(S-$8++8%;7F9r1GLNPhvT zdMnJOy6A5ECyEzM%H0Au7p@zixW3%5yIeOGibm?axDnb$M?Iv#>3$m;=yt&wGJ^+X zZl939W?KsE!lkWHTsC8)#=v4p(CC1IsX*3JP_~_LSTV3vj*ka=fz`YsP%Zf#Fq~`G zS#Sj~n+A$=P<@rQ2{``dx9w3IHDb`5iKbGQ!*gieo>uaQUJ=uAmM)C3I!<#D*N`7Pa8Bh zpJ#6@8=NPbx0(PKmT8=yit$?-p9kIRE`XuN0bJtrX&H|`6u!b9G$mdf83uvsI(Gqi zAG|&2A=R^2SdpWa5hep4T|?Q)rf5lmxL51qVIjQ6EWT zff}fvYXnd!0A5@-38n|0nnlV3?f_?X$q#ZX0Y?Fsw3OLFF}w5Pe+vdgVbxepLF6(FeZgrmMdOvUAr}VzJbym0GjPC>CEOYQdt~|1h0n~sXy}Mpoh*E zNRqTwmbxguYrhFg(R$e8Cv#;t{~ipY<|+Arjg z1xT(33*4tYV4V|KSf>5lSQ~@@%rwkR0!WIE2BYjd$;~QMFd3ItFE!Yx7b07YqI8Vk zU;Oad&4&WF z=vXmF@0biuHO-dleAZ;XfT5U)CR}`GG(UmDYZ52s{w8*Cf1&N+^)lwq4R@E@+4e(& z8a)Ijrz2PY%IPUYk%8NqtYjli>avwjf>#Lykj{Z(9)|U0mY8EB36%qtid^7Iw18*! zUls>#3!Xcfzzdu0`Z^V}&?N_U{Uq@Z{dpWf`YJ1IJhm1Eit`bBIJ&8gWGn&rXAhYE zn0zXkopV$t_U6pDk2uIq1JUXE`cAvgCuvG=(lPP3hP~yVu6I>uhJ|Mrd*vyqB(>#% z_jq)cYBQc*gI*W*w`883>%3p*o?jbIvvv{lKka;X38>c(1NBbQ1Hf)Fr4Z#^m!JPW z5GsEflV?KAD0FLH9*k7SSB!AkaI^ zI6PY!jne5K*^T`I|K)GP{uf)FvqePN(=;1E`MiNwz#ejTRa}0%+CZiPU9TXy~7^l^U zF8#ay=^gy>ykFqBA|%O=Wup3Fn9rGTd=V|XthuMi0cFz|4F;>Q-Q#Hqj`RvvJg2VR z=)bER#Fyjx#nmV+n*5M!032Ebm}G#F^yb??;WxTtzY0k1K-cJyC8#d80Ur-Ua*-F| z&z_QJ_XqOzF5_Zb3N@bWfr90-32dg>?`)tqnvNxVIY-|KO0>~gx0}Mxe@fMjn=bNH zU76c^QP}wx`ysF$*sG^IIYQ6~5Y3Bb5UxYy)U5|l@cvXt#u1DQ6#@9n$nPfS1b?i$ zuUt&OG8mSHs52=z-_d01JMtRY^L*c#NnjdxI@S6;?-lL>f&NeLBDv=>gPUKJWBy|7 z8CW}+pq&7YP#`R8cQg0)$1Ha`rRd&(8hddx;V|L5CgUoDJ4M}c3!WX5X$hm9&G{}+ zLw>RzX%ma@TcO&Lf|%p&b&h3IasXUSnk=JNKI`3k2`UBuZE%g3r`c}fQIV4Y&t&qU zcb}NWs{BJk07ansv2o^}!%@=ra8RZKvJ76?WpD!aPO1IR`avdsC8P%MVP5<;K!%uv z`vp$MqK=t@Aq8`WNw&Rlrb!`|?GA7fI5(V0_Jlv^(aZU-`lxjsf~p{YyVdiGtoxEM zTp~!c9SBrkr=hQNz;peexAq?%5Z2KijJL|&{_vPFr6zsv96$?B28%f6@QTQ3I7R_1 z{3I`e;JDH)YJf9c69_FLlNThj^YouI7^(U^_g@PHuO2wsnHpJL7T?pN<{gu(5*XYa zgCcgdko|H=f>L=PTj|__k?X?vBn=R0oj3>Jbc((UNE(-XAD+rN99A$ZQ`|aT=AY`z z7}&%9*4UZta-Hf_Rd=;$B`4@$*qMF5QuXI+9F+7iZlllZ-;IN^!cWG+8A|{N(*?~x zf7q*65Xh&}1uL_AHPD@pB=c!a8-RlD8IQi@TEQ}#C389JA!9j@53fPv)B(JCrpU-a z2{bUMrD;I~k zl0o}c#+fYC!p+rJ^BCOq*;+7+9&J=seb6Z5va%#BH+TwOnirK}QYTC}SJUHquG)94 z5F>id)o{VvFPLsM+NsNe?9$wdp38^84JcT_rxx7%R5FK?((x`3zgsIdreXXna z*a)cC*Mq3;zXF>>#|_b`6Q@_2Tw`f7X$gMLE%K@Z&(E`ZUECn`J(l3Jr{s?`XlA`x zk#^EF_jZtXIRJEbhssl4(d(Y2CK`nBi8~_S+z(y>Q*dD1J#A51mJ}DSXW4ox2Vc2_ zg@)H(Xs?!BgFO`%b^~LKFSH+;f82tHkm;c0)~T|(U?rmJR}Ra#@$u?k$1SByiwd5} zRGyo5KfU`R529l4*&-*ZBN%0MAy+iT$(NPft6KQRV_M}`@xR#m@_4BC@BcO}BdQxI zLfUA$X}Ojfw-iMxTfC8FB*~U7#y*rnDO94O3EAH>W02h-REq4z$S&DtY}q6H&f8r+ zpWo;4{nw+Ic`vVX&T~6ZhCW7k&&C3r_riY~A0A%94V!qK1^9g0>I&p-nPHA|4OO+c z0i&wr$fJfiiX9dHIks~qadtw-;3)jhv4Gu8=)>0I{M{ps($h+rx>AmC8q>XsMvLa{ z!Kr+}SWNEbi-KaMF9sb`(AF||1&3?bd!<8K9-AkyS#j)mrhL<82_i?xlxst#bmk#VA>TAHC{D@C%6wQ zy4>Sk#PEq3(S~3Cpjl<7Qrck@7@3a zFD88@SrfKU3WVoX77-Mn-VKbw6_cwSB=Vt$f2kX1*QI5-u1$RALLkY&sqjY_O)*7V7bA_lb>jTcEggQSfK$-V~>^g1{QoJ(SrROq%T z6>VKvPgyr>-v?~mVWV$C3O8}7JLd|Yq9EwUoz?5|XHGk}aqd0c6Z&U;q@0}%^ck+B z+CRs8O)!+owqrhVST*lfFC4x6yuP85tzA!WIu|XSKESzx_ih!je<_?NaN0}gqUX$#0qV+{+A66rStzD8)-sdXJ?)!-8uUv`8iE#%4xtfPwN!vx z(Ds0TFmm*G*j=`=;m#W@Dne2|c?>G0mj#wq zqlvz%;Rlkh9b4bWT3#RB$$$b1(ugVy;l3TCra$I+A$n!1@0%LyG60MJwti#7-kNj! zc^J5DQ^p}D`nC&6y}z)}-K5C-X9znbMoYOL8$lHM>{)5~IOVzNVO!AWR@K?(^=^wg zMV^y>UK6d@*{(0Zw6SmAs&Txe2y9R@#;k1nb`Gi}4+giMjOIkHi@3w0A>%`J0au7Q zS2u-5PVl~%A_)5QWxQVU&v~7L zPGcn_?*pRj^FLSVL{`5$6&5IL@BV2^6DN5O{mk99MQ3b|sXKq0=CCR$8{hoAC0jH8 zSlhg`QyEIa&zwE(hY*&^qf@PvHRWmukQU2b+j=4fq7h##1y$vau8dl$piVbv8vVy0 zjt#Bnw4`oV#&;p%rkEbH#Qb-Yh-l~*1JN(KSzlYSmR>E)mhv9XWs@P5VPw6!pv^$W zK#cMd`$%;9bLzQb%Z_6*##Z2QDW353Ev$KWSSmP?dr*(|qTe!sD_A=Rc5Ng3nto`* zvN88EAM%RrMT>oocqH`}j#nIXS#d283RLUJtD|MYGWOS?aHR>es9%rTwd6h~I#W1v zE*0Oqvh;c)|%Oo z&)3?stS9-SfG)hS0KC@hsabQ#LFb#D=6^UV5y|?^M16{N8DsPR+g(?YTOG ziH`pF(EX}(CMh5U*)_m(-9|X|a*45h2~|nP|EWp7%V#IkLl-L_@o8r|UGJ@$Vey$XWW;uQ*cpbEbHS(&M^rr{lYj{-pk+o|9I9ySGa-&N114SXRIKv~Nl4 zIC&s8FrO=vdCPvKhfHA&W{?*=sQ0BE{x2j6DDRCMsNGyDWa9F~NXSN)VP6#~lU>2G zxsvmmjGN5GPOysZPBFJmw_?7XN8b4-ceoy6BtF(d=H_y;ulSe&_;uT5%#XcL$xiE7 zSexH-obnDS^7g>Gbz8#9vlHLB?wssad*^Z~Lf`3b3`F|(iUZO&b_XOYw{>CO5|R2- zC;XYxV`z11ND&9FB1gu3mu7sMAe%+x#I1P9Jy*EAZP0gvMa-=jgzZ3v%wP9uNYz z9x@H>E!;oqgFC2u5|0fhUX*+~dvX?c@7#nsCj$oXqnke;H>z(w~GU zb^=f+fAiMOaEHl>+GE!~6p4}70!!go$?8pyw%*OO(1mvEkg`PjDlgRZVTj`5Y#Cz4 z-Mf zu`G|SSTi`EFd1^4fdR2)ZHp~7C6o^sgbN%_}l61zV3q~LOJE;ZTjPbZ6cowhw zkaAZ?^g8_gTk~g&J(Pu{jR#V798sH3R;V13n;3Iwj^KX}_p9+l(tHPGXq*VBWEDFA zj^IWKU>E@x^*z#eVDZ65Sy~XUKO?=8t(Awn1}@|gF;#}X#J3EKN@J}QuIw5V{e$!p zotQM%l|U=L0vD6NLOW-w1tysR`?%h7rm0vxKPPs=rz^#mTR2XYnjRpK*Lp7G)lGrR zx{`FoQ_S;_@Ui1h;CyGy6}P}$j&d0gRh+8bQCf{--_yHLso6emq;1AZ>2c9nO}Z-jFbvQ zl3~?K08@7kr2=ADZo7Qi!t4lt8+{#mXM4kylt=P_M!LO9=nfS8i1%i>nwBG2t`ip2 zld5D|zB;nT=*;isE3x72ufw`Ud=Vz!REP7vUlng`0=)0%;(S&1WSqrx$s_)iu8>=TiHlok;7kblCZvq6)+`1|*OC&398?LPCv zBc2W&y4tF(+Nz?x_$=wX?@xEeCabbyIwpdO7z2ywGadKmMq`GFmv$&XaDsPWt{3=y zjn7eE7ZZ5n;KivvnTq})5Hdje0@t>R;#tBYlpADZBHk)+f7V3Iga?at(x`ZS(knm6 ztT@r;H-BXLPIwOIK}gNzI>$;Z7lk|asF#P+;mm&ZByw~b@Rw{zBzlkJ%$_V~Vr*r5 z=uWx<&iHWQOR-S)%LY^RWrD|}j&yE{v$ z+4~GT5woR_A`ZX3;pS#x0claXom^u)%3KJ=|sz_Q=To z5AvkQuBmf=_kRFSPXo@1%e*5LVlIXGm zEe8r>g$}zsF64N-+m{zdPn4c%SA5ci!AR4MI-2LCtJDN3=s0W4t*)}xwh%*=5@@UX zt%z2r4IdvLZQet@EQynch8>-|l{L_%4*?!Btl|RKVNOK1Pnp}i-wx$;+JfKG@{`+7ODgJ3EV<9wlC#Uij; zo2eiv)$lDX5et5154y5g?3T1Gd+6jqjcz+jWvX$? zQx-#?9Vu&jEsM*_384`3a98c_X4(NCYZhX6TS*@o=f3Bw3ft*WAOg(CX~JG%rb>g= zIsmNG6<*h-Uk7N1PM)70S+n!-Jyz}e{7ZWtws~^tBQF5CLq^>1)E@Epr|-p7!7~b$ z|0?DG!O*S~c%pH;G*3?H>htUf70Nyr=pp>Z;7m~IX`UC5AJdwE@wh;lT!$&CIo+G* zE>2LOsBhbDn@@zQ6V%hk2(*hb;Jq8~BM0~LFw)Bey;h<`qu;^P;V3Ylo31?ZF>d~V zOE6sqDUV;(?ltnD^GOPK+0JNbYyaB@qU^69W`UwNFlU*=hji$S3w_b%2JTp7->Z|8 zwaO+jv|Unj1jd2nCT@e>FF)d%hXju-4Ml(e>=j}-m-xcyT_%!}kn!Q9s_J<4F8kZk z8pel%O_{4J^|GD{qJ}>&gws8PIeIx7cKW^Ok$ml*M7_7f)l{v**Xo5m(lbTglXa@8 zb0=L>*|&Nt@=m;xJ)5M$b6#NijM5wvRxjU#)0Jg&Gp!riKCL6jjrCER?yo1@xY>{x z%qR9|^ZrXOt^(D;j7nA7>PiQCSaEtooI0v^Sp>KFVh7hs~wN2Z;^ zAXefywO&_w#OC}-lf=#EMvF@g7CIS_%%zf3WhYhS4sxm{42|%m8eS3HkN*B6%xFyb zVr6~*6Qf#cKjWR@D5z7?tP6E(Q#}>|tM4yWs`?>+v-t@7tC&Kj+vdCM=R&`Oe5xta zGk8<_e)TB-oP$*~r?|Di!iZ9@#N{Kj%dCoLjBESwdN~=0tx<{%iLONdC=K4~Vrbq* zlUHp35tW~58QJ#!x9b?k7N~nyKXiI4A+at!TqC1FW{vQUSg)Cpk-b~1)={6R%E;3a z&1!Xd&C5eb<>pZ5T<5ooDwhD4_74uCzxVgByZV%Kb1eS_}8hk9B|Sfc!^-gg(J9^oA;r*EOMj^ zsZkeoM;9A=Jj|S2bh7*jsRBF2+}fNl?nJ1g76t<6eU5(%qJJB+YH(~kjAdcbZ>^TZLg`T9D z1p>Sq?P~l$?%;Nb)(bK})=%r0aj=fz+|n}r_Z}+0vm_hsynJ6fpu?sjZxgAFDDd$j!UiR8156f= z{W6TxRNCS%ggbieuD+euBoXh<@=i^;#|sBUeQb;$+p1$=C&rj!-L{=k;Km)_lng|j zQDME-a|@<>9$OVqdp1*OPuaQ~&w|E57EYBIB3=?%^6Y)8R0(8EM9!n=@xsGcpONLz*Z*T&;UZu`1NcMZB23R3qfymKCZoMi#1%V4O%L2=xqfXueBc=?{o7b~*)mvF zP9oe(nt~!)_MDq@{Z~qk4KsRRl$xtg7on5gW;&BQ&0(}RJ5!Dv~Ni0VS&O2^2>GqzS zT21zW+~A^6`SN6?I>?&L>&NCTE=i%CX!aRH`}$mwtfTY~7RFmgz%tm9P( zfRT7Uvr)Z?=8uS@gpyM23%@ z)E|@fH54BQ*_GW939(wv+{Omm{GWYM?RH^1o(jKIi_R*Vfb?aTP6tGVYJIX0qFTaE z`FzQQ*G>~j^bDSpw(%(ec~f54#>NMx;FX4QpB+_+u)f0>#F0jRe%|H2&z&dZ9Qb3z z>MQz4{1M8%#2#b2nY+Jf zVM0()H~+2HVB_RQE`tO?$dCOVP8NBbh(PTD?~AUAF;{D&0x$Yz{jLZ!VV^%WpCP6b zDmvfyM3a%5cs;l?-EOE^tjpmVo+xPTj4C*x8SU5t^@2j|)7r zaldg$RU(-f+TqN%h00FBS;!)y2d_5`r((lkft4pDm61O_+5w@Qz1H^;z7J?(jZPJ> z3G(3SrG0ykg`Tn{i;H;iSnrg4FeTI>M zUUr=1!=^<{>O1xWUdXFjIRTtvWQ5u64a5z^J2%ro*0PkEcl4J@V_NacPp$gacwW&~ zs}>xEmbCSw%lISoDycMD$sjoQ%@du>O$}}Bo#Yvc!eUil+SC6g&@qj~?)lBk{XnZ1 zM4667ZU8Q+*RRw)JfY<!n~I!2fNu%!E4YhQwN>T2z;Xd+ID zhl&=B4Ri{tXqE}6cIv4k1;XEC#CX!6^1Q_`@^+%Kg6e2z;R#RksM3A@{bGfZ&V65; zoKprp`MPI7L-u{WJ1ae@y0Y0K<-jvNi(MJd2^mwOb(XXfku%Nao8(_*1?*)LUUzQ3 z<;KJ4H+E0A>s%}VPO%DN!bf2T&_eBzHexvwZzP!Q3YyJA`>H~kSzyBhOk?nA8xgd| zKZTnL^-bNL25DqBq`&g32JoYgyXKW&;+E5y!ECiZ2q}HQsJqM`wd)~y$D0dKjGZC7 zt|j3)krU!3g9}RZtv$876_d35=MmLk_6J~(=QX#=#xB{zaJ8kW0bgFNfxnka1F5(Wz+)rRC3L4CS%|EfsHw6fN%0P)Ujw0%40`4IZIe_f|f4EUS$9xc9V<*(;ZNwUOCNao$py ztn>)K-kX@W7}yI;^{!YoWj!f!3~Zx+o3=|1W`NZ5+j?Spfraf@B%Z|a5_~g9#Rrvg zHtLjHqM-@>G>MnmnKwI_UtfdlNT>rcZ%aU`*0;(RAtdb4I{c-J)D7Ilr9gs2XoB#-9&XRbnxpE4M*uD4HmAq6)(lQ1Tig2p2E~ninPvZ_2rTS>NwVC>l}^Z29=N#H4AH(<#T_94=90T2eX(&9{1B9vIqMQPev6a z>ksxVAS5bkEGzyRJ#7l-RVK110BvzK2_aLj4VI?&(6qyDq(-I?uP-)XfPtCh;rxN! z@?T~^Wyza`KIoGD!%v^G-T7;H80LvblJ#mTnyju`l&Y79%NXvLrzcsI`c|@&E$5%5 z+_RtK+{*>3OKobLv)XM!;S@ zl0!yZjDXBPSLGkg;D5bwI^}whhiU@Ss$*e&!#d^8fT7=n=y@kZ5o4#hQUEr2eb-Al z2mpohYKB32Dd`dDu70LoOL2-*(JQ#YXWJB?&O%Kpp-KK8P^P!93EB`!6FXeWkG&B` zWaNKTTBo0l8IOPH8{2LA^Osa`grHpoUycFQm10txIO=(49pj15Z)67nLMo^y_&%rp z4fUyOl1w7)tM9n`wa+(-En8}*xZ2upb?!WKM$`wDX zPJM$%;;Y7qUIG2^RE@s4&{HipzP#urG$T8nX$4E`k> zglhOu*(f(}gV(f(mFYckTeq68s$-F6AP97;?Y1MdRPt_9NnLp?x~Spg9RID(AE4RF z8V*WWxT|zA_*l%5Y4|f4Nv}OC_&QVbG8J3~t|F-J*P|`B7ttUB+m1U~L14dq-Ft;u)FlwvbylNr8f12>MJTNWU=gz_Z&3hu@ zs=xl?e-jz&q4r_cuB{jkIu@t#4U93Jd zVZmLt0q&O%$p77x9+g&tFyQ01!^ENKKhWK*4W|f4RaVlnS#UVIwc6Pg3x3SL`L&*p z#E5+)OSt2Q@oqPBCUpj~tg0QVw5jA?2v)MlX?Km{+j+e9q%WVUF^Fr0_Ht!Gz3>Ss zvmrJ=CwhKvPfA;*_22#Hn=V;hrp;{SP7sV_fJqjP$CWG6q3}6?EWw4+&iU2klop`b zSX;_-0928zJd8C22TsBh!VpaBgLrgOSf(?E*ih=vYmzkr;U|XkTMLX}ONZ;UztC3Z z9M-m-oBrI`#R^5Q30}oNMo2>dZ*~Xl&_ie%uj(Eg>qvVK&Ldank2VFV5)ZS2+2=Rn zZbGjV`yD|F6apU1NHsP(*CuaHqPRx3E)C>y8LyrFae~H2s6j-&5*pjEHazXW2b=5d zbBv*%LZ%Q4aLRO7tQ06feZa?tInv!5f70`Qf~cmK5X7pZdzdf^MhN<`y0wu1TNtzl zQLh5!Xuu}%5qwp{sl;9m7G;DAq0 zu>25g#c-N%#$unPy;)KlZ2+cpo5!0Zn)v`xXewmG$%cgt&w;}JG`uI%pM<=7Zw|yUCMk9-+g%=& z6AoREEck`5zug5?NTYb1)-6y!E~5r!Ub2B4j4TRx#?CgC>>Ri!F}z8pjS-pJu=!nN zC^9n-2l-8qm37-KhT11z<;mbtM{*&L%q5iZ7@4PIYP^${d}_3Q>OwOXo)@PY>MHma zd}Xm@K;-Pj6YVblEpZnu;Z`8gR~RdmODmge)@~HBrm|_l%WCtWdw{TlI~FTtE8fAO z3$jh00&H-v>P4$bi&_TKh_HD=IpQC;I_0ztlnJMd= zaH;`|}_rrGJ{-@ofeX*u+08zsFOh6cVV*$n&jA(II?s4NbRe{Es z8!$Z+uwQQ~wJ($9GYj}%27LDR_I@~k+~&)!FE@uE?YAAhquf6F>OUSPP#m9ETpnXq zovusZH%7XX-x^Yiz?9}jDa3eq-ovaj#xKB2wWzF`+vNa?*VJ3c#9JF_MTa6qQldCy zpSoh*D6C@{0wK05>&yeawOg3`w=hRcS~NyjHK%6Bl!aX43wmC|pNf7X02t~RGcwM2 z&u**ro_t}xSoStKuvDFr7AFx@xNv{b#$XP@g7Z&UWF3akUezJU&CLLfK<<%gP(?|n zrCqKx0eyJk9Ea)(%SQ719lt%0%wI?g&~EyCZk@Pynn*~l@6w{2S?L-K&x%sjlf1O= zSoER)pnl6b>;HV~`0l&1OO$}ClYQkn3AI@2e)-f$Oji~xW!l!l03ls_r#Tx%L89K>6NTyWc_i_JviaXCK*lI^wx>~8n+2@ zC;$T31_G2Qdszt#jmGP#bz%bVZC)&3%NQ{2l#sFG9qMs}#DSK+2XBs$3^$i;wS8$y zo(ks;mOV_)`*kc)^E-GvWbrTNEYQ0~#F{krEkc?dCeY&Fvqk*q8`W&ZF zpT^I$3&JG)C_h?6?q%uIdp1a3>9xr+mbLBKuijSs*e0^vp*H4B>egRX@|`ZCjVWKe z;AK1e0W^HBLr!*d@T=I1DarEnr2M>H^XJ4R{b1T@J#J{JH&4c#_iyx&s_8BycA{X^ z*Glu$!tay+g#!G)!5&{hiAjQS{61{j~AMt!@qt!_s>CsZ-&+*!;hQ~PZm~< zSWQ5pD>gxl4}Vc>-9vGSH{%M%GCruUR$sk zp9`70UTtF^FGl?CgrpCOi7L%22Y1C(G=Yw$3Hink+!aqLVWnD)X4#N!JnrOSWD0{2 z?7y$$E8J+sF4v_FR8 zETYe5?=4_MaWn&eP8PP~2zi3mlYrsPb{V`Z^h{6=*<-y(Kw4afo2tN3do?&pCfiUJ zEs)q%;4=6v;FJ%D0NT?&abz@MW8Zf__Oz&i(Sj$*MAbKJzdFsnJzuZD)lPnA;;I#g z*FenMT~G_w&~>nWR^-$K2tzZm#BqdTJ*&ZIPQTg|R#if^R~Y@;Pgm(trguoF8Z3-C zi3MjX7!4-|UP)--quZE;swx`LS8!?rCqT_isj zg6Zw;o!W_ej#!x{l4%hyrCUB)g*YRv(qKBFc+1!{N-03)X?N9^)I6}$9t?QAA+o;~ z0Tt@exOb(5DHg}uyStA-zY zy@sdc_ZV%1RE}kj+H$1X*T~>QnGPqwI{CL$J|3t-XpzF=?sk>6x}<>~d`K|?&7FD# z_RnwGckzblKru50Z8pJ#AMOJc8LpxM@IQ$8yg$J@&_9Mk&=Px}p0oD&x_u zSDeREoQ*`AaT9oiW<|1PXQ%+E{X^mYFrZLC{kTrAD@OgN}#)MDV54QELv5nVa zI++WdeJ@A$;VX?cg23oDEedfL_4YKZ0j zVR-A&W5XE-CG+~7wfrMfD{9FzVCiqdFT53tSTCL=Mw>eN%eS*Np}DlxpAxEbI`qjU zUd8Bz3O#4_R#$|tH?XJyuk!`2N%-cLc{R2nG5+kDJJFL#fg+wUz2B5`nJ$;IjRwsiaIXJC7X zvy(jb`v2nt{2SR0cnt6iblcGRsaN~?KODMmu5cy!>^@s|@AZOTsO?aNS{S^_cFz+l zo;IjP_XL>Hw5U$AFUHe9MZ}L;^EZP0FSaqL^Sf9_HaFH4%@oEC~drO1zfO;<>GRSK@X(8QTF5! zg`QWuLm=|TB6HgIYZO&ukvR(8YOStzJq>4CR|l_+&7)yE2jriP>S&&{kZ*Im8+F&X zY>BgXtAARD)=*2w;B^6*Be}>9>XZFo28RimbW9tam*Yw1#AzpE_ML+Mc(ccXi=HcK zDp^*|F?VFG8=Ij3IjCm+HSB4WwutUAUh{QaQ{P~w=x*ER^;kSzb@A-6PIF~2D7Pk< z{PAHepJLg<4c!%9koYrjw`TRfGISVyz{7*xkPu#CnPOs=-A=`B8_bzuT)qyf(g!)j z>J;R~Z6m~#+QUKtSDRcmHfEx_DX=a>&>2y1D4fy@@#w+@5uVG^Xi6R3Wr3{{fjhtX z$s_&Am(NcOSa&c1qHie}Ep}K4+~}rwd!Fw=bI&zDHuEl9)|UPCo!Op4y696%?bLN! z=Pz}sDC_W5^~3}BS^cR&G;CEM`490gEkSH>kE92=es`j@JFU81nq)VdXI@@l=ShH> ztz*$-VGrhVa89>be|5M_8nj1TDAIz5iEYRS{4ZT8>ekl+A7O~B0!9P z?v}+=jYT9YcX4WLYFy|BtoY<)JJyNl7E6*4`o+n!y05~= zPS)UnjbzweI@))~DQSN3O+p`?oV59g(Dp=dcaGG#)rNoqdP+F|a(Eq%BMA}^Sdf?F zImI;$3kar}#RW9wYVXni&?}VK$8u5h=NJB#53WS}?HcsJmQXO4t6U+J+XAZ81GLuF znOZsH)^R_6;?`IXqU^JL6*sMSDP#uzn&5jI#pJM%N&O|Zoc8?tm8;h_V;w7|Zdcx! zWd7P2c?18B zWACw5h5FrZ#wIYT%k1VEElD~dh)p-LT*jYsP*7-8a6n2J*SC(fGaB7kUwc1&BKtRH zBItnv^Ge-h$#Rft{*yT|Qs6qCLZ;~^2)ka2>Y-`QP7E~Bqo}8m@Cd@ULi{1R{1lw? z6Uvc>SUFFKFinm$ng|eX8gKln2rCs$n$GYr_#ibfepeXqwJp;&QTEAejf5X#+Z zyLOY6L4?3YI`pI($Qtyq* zLqvx$4=V;YAq`NGBaLCzf$6*ic`byLzK!B)jxta>7N;T4oB(%|6=t@G>eMg$^Pn!> zgqoQ3XM75aZc>TrrFB{naM=zY*`|GRdt@F;*D+M8<&Nq#0z1o~M1Tt^} zObgIjcjcSi^;MO@^urc5V;t31>)`dV(< zrD9f|bAQXzeHYt~EE+}x{MqDMSp(fRGdS{ybx^?&5Gom%I2@y76hMeV2eL}~KFigp zgb^8_sy9p3J|^Zpb_Sb^jc?P)AmvU4{3RYR>l2uTlyB#6#E&cGt~Q@v-oJ{a*#74P z<%bV?R?rnv0)&yM61-rKDHyx-&6RQ zHWC8#21zgnK+OywSeyOtdPump%eo`$a;9ZDLmffO1}m`^Y!^A}-W*VgO2?7UeK(aC+ETkEP#r;YKj8*y)X@1?XORd>1r~L+oXDO7Z zN;-ew^?oP}kU`2WcXbVj)@^tA;$cqzpk3bL(IcHs;uZ(}Yi9r7n{ZSp2@dq=?rsV^$>I=>!q#}kJ_QcpL>p#ScDohcw;_~rDn<%>2C(Dgzl{9< zg7W*prE~fX*yfQB5Bjd@e0UQujU*)#E{MB#-N~%#p99Yf1QXR_19FuTzr&On)%qBU4&nP8J>5jtB5edr>|be?=rD&uBtOi1bg6c zm*HZ!>I?@8I@_AQC=b5Kb&D$ww7Ov zQ)(RiA|`gwS>dQkH$&sK2fVO$Kn>fAGs0{#HOHsVMHUwV)ZLV%DUO*oPmX`uV;`Mp z9(q8lp_qzgR09%VfQ6qhbc>?;? z+4Ihr58t*+`jJ$tJO11pz*Yi)Q_#b?*^0v|X>p6D#bgUotRI z7uRl?aKee@L_hM@kcBqA@z4RPBo?7x_E3F?dLTemSSbv~hZ(=~fxwd7AoIc*G@!mM zk2BQNpHSXtUe29dT+XJmc?8Wzhvn7M65GzPey7fbKiZEpd7i=<0`XuL%=)PaTP}ri z`z%Glmh%{WRbo4H8YH=yWPQgu3-{=hI%yCr^SEUMtL&@Ya71{Ufn?vnJxWD596GgH6HMlE8nzw`;Bc-zd?K`>UBQpiU?R_{$@?{4Nq6Wh!gkxbt%}f? ztzW-U*y2n%4$a5#h9O98cb42C=zqctE6&)l{&!f*f05$Y&sO|~JT9T4rUDrUymh7? zpqjD*F$5FA$zJ&t(AJ_yg+O)(gL5JEnzmH(35BWcSTn4=g9xJ+rdi8B(Si9EGm-4; zn55nYd4hM;9ZTUK3KaKg=6M5-;8-kACDqyRP6|G>QH?%xd7>{x*_gs2?xRZEwp?&~rXJkVwGhpb_c*Yf39mPY5H z6hS66C0~y+#`&eW&qWfu%+NMJsA&52$$a=D^hB*yn8(yEPF7$9j9;sFtg;cOZMAnf z7J**LV$`k|fbT@1w7h-JqIMo(`dZ)2DxDWcKG<}|UDcD+IS`K+pzol^VvrCjsIXb7 zKIGFaP29p?@Rf(a$U)NKwetA^4MV|Je-~Pe+2bi+zK3mZEj0@;;mM+Q~@P*TNEFKF4Xy$iF{5ZbcysSE}3oVpkYQUmSc&vOhwa-YWISFm}buT7ARH+fv zGL6VJo5flP)yS>Z@%O`#WZA7IZtcAU5|*5^>gytO3~0|)WIh>xUU>%2jb0AOE{0G& zSvEa@M{nIUz}cLDPGl;S5R34|&uB%55#nWK;yO&L^3WN>i7}h?mdG|!xo*5dj4?vh zEr?Yw=#duUpyS7hf|q@U))Qx>dZy zzT_6YP{xc&iQQu#h$3EvEsD7Hcf!t3tFMx-m|4v*1=ztRc*fm{IS+@;O4 zg^Axq#@09mCI@)1(uy$?W-Nv9D2)ES?gu4cpWcU{h`4I)s;{WTL&5Df?nYI6l0vNg z>G|X{dc1-@V=e7U^Wizp z!Sv)@)mxV>U;a9vi{*uOzd$5t6)cR5BxxM{u6I5al_1;W^s|Zv_G*e2>(W^7#XMa? zyfg;;{Mb*-e}`xp*_%Gkro@ha2?#w^9zgMM4Uzlx#hJGBcuUjwv^YBeHfjC0CBW5k z(Ae@P>`!md>xE_Gf8Dt7lpDQNRyM#NxnZVE{3X6}hKHuCZhm%WB*Rc5u1}TqiC;MU z^d3U;xK5m^RdI4GBJGVF3lG?E3TTgZuLScIJc%u#EErdVX|YGLFw|}DT1q^~WP_Pqb*IkEA04g{sILYS z&&Lo^ZihL=J$}Yia_`^*yLPT6+D3Ra(wu6XcP!%>q?qG&3BsfVIV*FJ<&xh?jH?l~ zb?b00w!(wW$ZBz|k8cz_X5PF^+~#o-NcwpJfYVMRxtm2^R%m``)a zpNaJF)XVJz*O6=B+i<3gavtj^r9~1bEE{&}aEAylug6Y=;vH z$x7-6^@;*0Exz~Z26^b|CLXM`wj6C718i_iZg_ZngMJHaqaXI;l}>BWP^hA$#oOfu zd|_)9w1jveFY}G(&+rvfhYNFt*`H-wgsh5V62rnm2pp-LKM(0EPuT2Tk2rGn?)QHQ zxv}K$J1hd=h;()Sc?K@fHL~XU?}N%<&i zs{B!#yf)os_oHcE%yF^j9LSF5*ZKz%y3ZqN!g(>U7+(6>6#hn*Lv4}%`~}7>oh5c} zhn;{FzHtHa@p@Ix*M9!fFg3y#^jFP))co1gx5^Y)E@~vp2R*XtqdSPhGZ|Xz=RgRS zyP^R2E!cRe&gfKdtzE2Z)yKbT8qSQ?ZAQ(7>@-gbcXvbia48oSUTat;>Ln>ZNS;FN zNS9xzJgDuD-(^JBCWNR-^1L^^k$(!)0x!=j`G0NEd#+u&mS6m@)KJWEvuj~l%@Da0 z{@sHM5P$^h9=%{$bo-xgI2-k{sHoit1ZT)68QM2~O-Vy0nyjUYwc}Ls-sveXro`!% zEm(gt=A$Na^bpRF{Ruy|#EH;{g=QN`m(ZDWWvgsoDEmjFGWE6ZB6A$TcGOhh=D@y# z1q2kZsT$n_=zh|6h~2Camb0=_7naSQX8ZNYeTllC>4!EJGm<7ntzvzj?{^P|l+RBaVZPJxzRPA>iN~SpJOL1a zmd`5+O7O8s;8fkZX?+8Fy;LP-M#`}!>SI>t0ID_YB_L&^7=NJ^Lj9p8k&cB<4---@ zoJp8IAsj+<&b9sI#W$E5b9p|0;K|m>oms&bWR84sfTn!@s{cpA{yadchnW-AZ^4F< z7kf`uARUxd+GSyj6^aba( z?z%(*jJ~*#iOal?IK0%z!z$2$OsmfZFjw-OB4=_R?{c34 z(aiM#$PVz(wn|xGOs7XqAbapu{09Dskz4hYt8Ix6V5|X$m5PB@O!7}1(y_-%hQpPD zUpxh0w^$A`J50@e8Mjf=+0b6|~UZ~NWgD(rxK(kgFr;WpfG&+=E@ zIBS+$kN*R}NzAQ(3ac0(%*g!H-N~#+37(FfIo+L0bh9s4{xNV|t$ZutRBe9(`!T1j z5G%^iEbja}vCpTqs9nxsv7~6;$GKvixzGO|g~gDJasu*X2Lq;c&T-DH6a9{!ZrlN$ z+)-8g@G!B9tfQRudh5~x8WBszDm9X)O$dRHD-U*i`Jq(%=hYIo!V>t6NZ$BKBn_1a zV?FrwT}Sgu`BsgLjBu(^l4Fb4}i9r;mj* zS0?tfHWMxItvo^|-|*_Pl-qM|^@62A5ee+Vn_rK)`89(Do2wF|6`rOc+;IOl+_kAl z0cmbl@@@mhnX+TtdwAqS(PaHa`^*i6rumm<93Tm(fU(M8G>c23;;xh(=aOvJTT}iN zR4Av3Q$8h_^NNZL{*>C-^uS(3?z`geASqwQ8~$;tlwLe+6r~~g)y$62JXHSr(O*vX zzd7{fK`k88KEg$WJG3OI_@j;8XvLE#hoY}Y_WSx_*}dKHT-iQ1aec{sj|9^3mbyn9 zKozQg=S{^^j4UJJ6TMz%wpod!gcu;-lL@`><(!d2sNPFCyGZWsCT0r91vx@v8z8I=gh+N8Pf6>EbY?yR7T&2|gX%2%YOEB4Whv?~&COZ7+h9+jmy zx{iOAEHGcq^SL2O(@i2Z#7Xx6^&Ntg<_+Oawvc)K0We@KHHB-I7S!Fq)t(`pC^9|d6>Fg%BrqB|u zK0PXDFGIp{GS2qRE4XnGI2@cU#03v${77u@0R?}doa7z8l z*^ahnG{uq#NWGg{7>ZO|=SL%pmbh$pJXG)a(}ZI)8`)%fzLj(U^;4paBE z2c~Gbu$koIgMHnd;_+dxLj&Y=lc^f5Aj~NU(e$SR;y__?P9y#4ub-D**-tMEX&Q1x^1)HtGby0_bvxlmjQ#~vNJuxjrs;6H0Z#XN# z7*FM$2Vb#@Oj|3@4(94J)R1&sl%NdPY4QbZ>vK4HSeD|RsHL~ZP|ARH^xm9(Dp=g~ zH&EH$kRO%ncx7c~B=N0Ab^U`Yg!GG};eK{g58N9MznQsEenGhUN(nVm4E7bi4%GNw zUt0yHk8|jx$z|Dm!(*KX*8q|D&#rQd?^dnV@Kj%2hx$f)p&#q!N8Umzi=xQCs+iwORsUOl;{1Wuv_qpf{MX5#3Rzy_& zPI!PCA}OGloniW>ggQMRd03<5O~5yuF4Tsf@;D)+X&@{|Ko0M%n~ZXVNWz|Fqc-pL~^^bBV&m{D>#s5&RiEw~~#^M*4=;=&FR+ zRd&X?lBF4aKE3bdbqF6;Ig<*BR`#k9dQ|C3P*8yCfiwBHpJe5An|#@w^X+9jkZ7zBSHk;|-G07?tD(l)QMxlz2P-%DFjo|8zKUxGW4ylOS?40x%+7-=Et7759yE zTLVKN1LA;9`|GMc&~h7R_8cM|k zp>*-UkaU78W1?>~6pug%#m{6pdvouG_W# zPNoe&RKI@IpT&2ap%{hr8uU~}s4+HgpFp-O1|LE0so$&C8kkfy-S(akNYWCVvx5<< zUxwzIl&;K3pe#a2e6&jal+?_#K8D zJlx)&CYXdEMsi2-4#2MnZFqgf5c}Kq!6a?yoP~GtrQQ~r1Y^Qi26*=mMJ?atWu%06=ok%-(%qO0 z(%~r|GaQ*fcqatjL>MtPb6L;WG5AjF?%m?)&-k>HN%ecSm;DBN?DOh{4UZGO6%G>G zH|OO!$;)p?Jn%^Vn}35utaVCt0rJ88()_p5_9Zbm$~(k^|?vq2%*9j%i!I%+$hQ(ZJ!<&yQsW%Won!s-COKc_hkW)ogamD`{^(z zUzuoGRQUrv$5_~*P+*Il2aWx8OMJC$a#?bP`=^3=KD4j9tvf17qgXS?f!Yzk6VeO3 zc*oPC#$X<3d&{Mb%=51lbKz-Un}C@|#n&hPBgxHD#{#(GwtWk8wm|JD&{KMz&_;{G!w>}J7k3Syz9K(b=p(|_I86ITGbkD zb#b8ka30Asu;in5upp{LeN=hATu~Wle7c-AB#vtc2F(>u^y3vIK z)J?#|i$`60!5nz<~@C|C@_F@bx$Z9ZYQ zJ+H=EmE!DERUCnNt;r6q1^3F0QfuSlU?=yV{)2#^GnCkCaQ#kyH7SI6;J?z044?{- zc#cU|_!z=B>16mdQD|G~ad6QG;3Fbi4@P_+f|hy;iHtf%#dD)K4TH8;OdN4Ie%_;+ z1_z25-9IgC$&eR7S^1^i2>y8i{?brJ>_|bjr*g^MSJFjG#8+)QOihOCjs|kGFPyu| zaY0&a z{PJR2E;dAL2YfuIU3(aB?VFP^;Li+UJv%`9{f)Sehm^j3H1J5Exq8C08zMc$-~N-g zLSM%_zT&GwZp=?;g}2`7CkXqDY6a0Cm@|z1noB)d+b)^7a-tJhyBmxg!ktZ|d_7&9 zWEnFym$%cVUmnk2G~dAV6jM#BOax9tuXUzS;Z1Mx;*QreQ>)j=JFtZJvCSc&0K-oP zzAi37(|xvXr-JI|mnz^4(KOzr7Ay1q&7EP?!Kq!o;7srB(sEvqSIGYa1%P74CrC3L zq>xTGJTcU%Tq8ermIQZ~dx~MM?4Rh*j~V8{k@E``n_}oASyX06#V31w1KXlenhO{d zx~L2@G?dz29&i5)%j-Dk#kYJ|o#IHb{L0`L+@%`mVrIL`Uwy z)bOGs@#uZ{8MMJC(cwV2Q+S^d?tH(vkbb7H`sqVEQIU)Ef*qmvE*D@EMEYU61Ni4YuS^u#>BG$ieeP^oC6RLj(``hu|~9`H>@O{J_Xm#WQ0 z7aE-P*RxmdK672LL{he2+Wf*`sDN1-;%&CfX;@Hc#@+nl0T{Cuyr`eWd|`epIuWU} z%iR&I-tMa~S4!6YxL_^T#BYeUU8GLMLFSKE=A^Vq-(1B5T}m2=`*@B$BwcNg0(Ldt zq_)605)WIA>)3q*Q#bfks5bSV$}`V}NZ7DyTO||nr16EmW67L&#qWTTN*BW^Tw7p{ z;hkDIZv4O(%v}y)znd6&p519kFyJ?L82_E=##rFF*CtHVWo#LA(hNMXCY@-P1CJQ( ztAF}7{S)49U6#GbH6p=*UQ+@KUOYyR*FZ~E^U}z>3-r zoO%qv((gOTA#$=^6VvlQw=*xYBl02Y6zM7VD{GFN!tf^l;oq~b24D5@<5L|5MK>4PtI6fG_a48Sp4t`nkL zy!?Q%ipz}=a1`+Ul$vMSIqrm&_!7R|rK{Qcl=(ge9GN+!~v_{ z!m|T9H*kI7iftE6_)WTQHv|B7KU?z!r{s(XbSJ&(^Usu>?-!ew< zsOi|OY;^W?@6&EGuRhnOF34I4aAkjaJKJuuMC@&!x4F`2cJ1KzyI<8^%*Gc3cBu8h z)`^11Fdx?FG|>x=ZT_P>+ze%nS?qsmHALaUj^EuL2EK9QFTHLU`QHX;`BOdbbZ7Bx z>*@LqUD1Z5t*K96d^HABtzM&|B{+r}h)nKk&KMJI&H*wDv^IsEi;ulqi+&z!5}^uDY|@V^jZWo-Dxiwfx;u-N?D zBxHZ7W9{Z-YUdm%(|+y8ZC-8)tX*>?-$+$nZoi7#D`w$j0bHW^;~Xa}O=`#)BLF4=T>)TFE^6!<-^+Ed{ zK2yqKZ54J1S1Z6$Oes@6wEIZJ&|X#|(e$V!!Tsl|q9~^L#0&7yN@R1Cq`rW7%QW4U zapKv25U9R2Gz{o9x&01aNK&3)o%yKx|g}6Wls4`$n2uWi=h? z020W?h0d@4az~hvYd|F0%cWBKw)cW)GNmpGK)73iscyalSa)8r^8=D!)9IhOU{FnV z|77>gSV*Wt? zh*AH5PC~J+QynrtM-sf0N@`u56QCgX?ufV)I-^M?yvF`=t9{st>kiPd#z(^>ATHTS393r2yO z3(w$)1uf-f{{>YKdfHz{jE83K3#x>TAHak5%a&NoAh^us=#DphJv0|onlf)}74$1b ztmgBA2w6K;;;e7*-cRLic3R&*PJ8=_ylq34R2{_Qr*aF>YVJExm-(^}ef3j3wMGx{ zc)qs`+$t5HCExEvf&Uk5q7Rq6-2o1!btt^q%`~dnqUZR{%d-r2Ao*bA-X@vO#Qe!u z>0Js;bZR~;+hJuqFPecH)>z5|9;CD!z)?ARBp)66RuQf06AD9+^R}KLi{*!w`Su>wf6C zbvd{#msv48`Ye~3x!pRrF>g34sF-fK_&zz>pFCMrQ4hX^6nnG65hva-V*TK^+g(Q& z;=Tdy*Lf4cc_G(%E~pEswc}72V(0S6%ssfk1!`2%6^{j<7^#dh1PO-6wp|v#O~gWs z^E7YA{s*$&vWNaN%I4VEP;pn)gn06@AMX}@gegq>yfgFB=ltaN z5+eKW=NW44mC(qK6=7eez+C`F`!?Z&^(K4tU#h#A*EEG#pr!;n+}W|R?CYnHNG$VK zV5r-TCtR(vAL2e*wON*1H78{8+(&=2%ntzRTg#pz+q+!0H9D^T;rh8Pq!^XG1a33I zDjz)&Z{0V=3?b!P%h0J5{awJm_tgY75beL9KY5)7Keu+&OSkcNeU5LWLDSxE|N3dQ z@0Yb-sl^50{lPX5v{I6w@^x|9wDE`|ePQoI&zF^4miqys*pZ-@O6~8$M4q|_|K^iB z`}F$D9o&xRY4sY!MBg`2B#lm4M?!0tpn-R*DNw5@w>V~aLhfUpE1Lr0PEe&y;E_l4 z@lO=y2kLgCnz`v1&e|u2Q-0GP=GhB=(+e>J?kaZ8g-a*i{c-6=a9z-wuFPW${_f?c zd?qrnFuM7|$$de)>xE-T)5o7$DIK0wjMPgf232NLDPpk3WG3%-3(^A%(2Ez`GeViI z*`q0wP5MukP6V5G?T=3MG-cj_9PUH*^2x-4vdcpG=`%;jPHf_G6CeEK_VX#zimu_W zOTW(Lg(|K|^=JEx^7|d!9k1lmmoT<)aM1m9NH7e#W4Pj2STwWQ3>6`hO7{>;=kR?= z3GX5MmkxqY#Z}8BPknc}(>{oQsCNKB0bmmpxLp2zv;UN0pPo1DUFSu%LFJqYf*Mij zQFwaHwVxvQMOX~GNM=?pKhc}x!rB63?K0+GBhe_c3Cn$f#AxBoIp3o}2shOC=q9H$ zrXzL%+4!usWzZJwnPHB%rE)QALJI&mkYs-5@dnu#^WrTC(N}Y znEPG*tSwRy4J}Q5V&69}#(rT{ye&%pJ>WctzWe1w@6toRJy6=jq#Az~BscbS3TqJj zL%J41eCP}Kw*!ywcGls=$EUcr$|52)s5D_LRU5)T0O%*2Bu-#qB_CL2`Q_v!hrN6D zS=Pty9%sZlTDV`566=K2nfVhi0%Yv1oM&~zHfRW!D;UyJ(WjY?f9ovO z>?{SZ5DgBQ`XH_)^!@p4g}GIcEm?_%Q`E^T6^HDd244lIALpz);@UxndCJ~-qoYCF zPrjUM2NN0OUkYX4Z_LA=kxG}QPRKWjOFkvOPX6LuRa ziMjn=Gg`LC^uNytkDUA_Y<^lC82afHXqokbuFFG0uTisuO>?1Ca~J*VQDVozsW`-j zx38LReWqWrr5xwih*)?kD#cW8P|!z`yIw4Pa@usQ+INMUE^FU5@7h&|dR%%G-+q}I z=i3f0HZ3g~7cH{qvBhDt&k`xGGHR~drhfzt#_xa+T8n;BeRM5Pxh%osGQp=;-Gf;f zGLa$&W8xO4gey0bj;%NOcfgY};Rd>TZv^Xk_)(5R=bvtwKGbf8rsakwa#Xnw!VGyZ zgJ` zkK$e=7|VZ;L<-*?2%%i0IBMOS)^LOt1Y(!m2Dt+JTJS%}XYI@SQT(y2l_iWIApe!M zFKFIgWb+cjDh-9rl|LlTBua+{W9zjC^3&{ZQzr9=h_vMmHZsg5Gh{%ZS>hssa1dzcFFc#Q#Qs<`rgLkRWAi9A)aE#5HqkZ;r`BBoLO#) zTH7ATQ}wa=Nn@G$nxt`h4v*Rh`P=xnl!1N2Sb)^z4r7%dCd`nYC8T957ln&vswpVM z8wmNVXYovi7THjOq0;+_)#9F_;zIlTbSH~b^jg+gKJtBdY?<>@^y{=**0Y4iTj@{2 zuB&w*;DyQX0{aw=c7HZ{+(z}c{c@SjT>nS;`(`rzrQ@(?ytj=T3>wLI@;dxetZ( zBvAXbWvvV{V-Qeiwa9s#Efpdljgq7}Jea35}UHEptoqJ|mL>Hxm+BPqMp?y_loz=}-B)y1!%&S~m zKEOm`xfzOLoW2Z6Fur9~Xvb1JPcUTHrQa`1l@BdFwc4B-Z{7+fzL3A>41u3|UCa{` z8miUGl2gk%nJ`2|hhJd64@q|n4v($F;|-kn#0qx4lJVUwk{(d)x2zHQ%3M7;>f7v_ zxV39f_9h**5yM7a#Z!LEYUwEbdV9`^lvdDP0tNm{(Jm3|`dNEhZhhfr%jq1Qld_sFKO z`SFW1bx?iAn)9!*2wJ_2dQ*}0zUMe+w=S0yo+aAjzv#3_x;d9{CcQa^3&-{rjRQ~w zV%LM9#gqV+Ymc3|k~B9One;1!S-ao(h1hpvvoX7E+VYc}H0f05_2?w<^QZV$4RcO= ziBA=xD$pS(=+uW+&Ej)SAr*I=>euIZ+YoUM4kKV|_d2LkKYyeD^n2Tn3s#}4FRB>> zFyUGEN@G5|nyI!Qqd{h40ox-^OZpzlhxr$apQLZ&%f`aDz4M*2xX0fvD$9Y=K)JxBb6*9!jgqlW@6XGk_ph79j4dxb$9*4UT6{<}0>Yu`TGoigPwu2Dv^ zwi^)?@bjx|RKgc!6}IkMJlkCHEp|hPDEF5VA353$IlwaVthiGuDe0keB=t@2#s@Hx&)#9YIob3I8!&#>cE5BnROs`8v_|2!; zm7sVHXM#YK$I50M1-vu5(LY)H5#KQnwSSA)ofXygE6-Jocs#F!e6hjeefhrK7TLaY zhK)cCUR&0rkh(a0xgC62;6xC3p_F@!pnvSXN&O;DCAcTyrg?lE8;OpOKUu3rHKyu8 zqdqTk^qvHo)iU{!H|ThI;VblYGe2pK=pgP%ge;!s8SQea&^PZ`s z&=kT#RwUx=Cbx4AR?D+=zhW=VMyXR(iNB>34Il{H`R;t-rGSqXChAy5AQ_4iwOv=8pFn(v;}5Au zt>)*55S{u)Kjl$X0oSCT+f$ljo~R0f{i zeGqrS%^)Kpr*Wl8mh-gGz_H^gOwB(Ub`VPP@|2eC1AmSAYIGAao*QGg+G^t#qh^2E zDBY%TDzy80SPCk|_}$8SF-#0`HqMk%^XMGEEjanb~< z$I*_mc~TzvA32g+njHn5Exqq;a?j+xu9(S%j$8;o>U^)f084LIT3!<_a7z0g_)mPs zcwy9Pdn9lVMC_=Qndr{B`i_G{tn-4~hR|`@m<^q9ae$@}_|d`oXIHrRe59u1LT$KK z?8TL(TF|~9?Wpfh*9(S?3BHuC=9Nnsj(MTFMxD*dU)-+*zgc`*a@&9$I|yx5h%Sh0 zchHUmffH&^-=pa;ALLtd)$~*fc1!nyQzpc}m)k6%9o`dbF5-(68OI2Z@j z{C=B10`umd=u*>_$gP|VnQ>?KV|@wh`3Fn_&L<cB!mU4jy#gG~W5oYrqg3(vDKikV90g00FB-_=A+th7#&uOywDc3!BMkl8?A z-cJ0Boo?4&`$te_4lR^fdPidY>2LjSsyxup4?aDK4o`flKki070S~_H=A1xJV*Z)b z4F?Al#eiCLb%nE*x{1fn=II;m3R`1}p2+oneaX|`XDj~qIx>kqlVz~~=_u*E9d$kR zrPu>?N6?6dvGoFA*vu&fb$84BfIrCwxy9AmWvVKshO%`%x}A8^5B`7%|89TsjCE}( z8SS^@Psnq560Iyl%r9)KQoPAJFK6pTKNX2e2qw-6jgi3-^3UgobjfH*(?MrfRlT1I ziIzxy@GBy2@RHDUm4E^sm6a%(Xlxl(w5)WS7-B+Q&~`PA;2cON$!|NFroJt@o#3oJ z9(L4VSi1^4#2BT{NSwB;5w&fP6YG2R?F2$>C-<7To*eFU z;amJfc?>rcd)Y1tetVdIyBGphCJbHv;HvlLbdd`F)Boj?dSXCFKB0ngQhHI3fSCpL zyQfw^tYlZ?GCpakf2rAQzMq*Y{^};;H`PA&T&b9-gD9uVZem(kSO}>%Zp%A_$-oeG z06GP{-#OEN_xX=+u;v{kWrp3X75mP2tm|))!<G(`uT?rk!Hf{88Lr>+`KjAEX$&y%-5z?!VXA};OzX=3kFa>$^9H0;3)#!z#8 zt>}KGD;kzKO`1(F`EAo&?IgvZ03FV%pGDk?Egp67MJJI{%h06p_mx`;c%GaC3i8#v ze`j2UedRVmHPNq=8+zPHnz0wS9H)v=g%+LD%^vqO_yM9aG%Wt9hP&MBRe7pwE{;y0 z@mq*@VFIp@h8`=E0PuyEdWW289^!#!{2qC`Z{|KsaKWIlXj>{6#lvB#TlNYdR$Xi< zgIa7Q!!^i~O!-%1TjXWRZu?7!-6Po%_S_gqu+VB4gU8{fJF49k)|++Dj=}uk^#rkP zAuUIWfN7Gdtp?FrfE*Vak-&|jOr_(%%9n625_|S~yR`Fgd=Vb^zU9F)jEszV75t3+ zI2%cs%vBZUBvm~PlR^wbeD$sJtSqr(^5^+1;Q~Dg@kW+6wtpjWO*=}+Ib72%?Vbb) zc$L1l=)EiketSaRStqw=Ei{dyI*R^Tfqciq&v%CHuTKN}ZYQcZ%TPg5U#M>fmb+WA zHW**?(!f{G*U)Jakq3dg_4wrLT*NVYwrLx?jbps*Y-Vh>WbD-*GKPPcE`1ASdclYa zA3=eUiB@BW6D3Kv)~vz!`adF>fdoF2NU%bAQg@hzEEi)@@ z^M^;;>}iQ(JCnp-3&EE!AS^L0$>9nygmco%-;pzIt53Tflj}2vhI$)GmY849F&r1) zodN2333HeCOG7>z{e0y$_~JZ%Uy`bQ6G`sP)^^l?PB=-9PH4(T^To!T?Jr~UA>(Mi z7-um!FfIXVsPcw}l6qXGI6ua1;v2GvLT0pUbFQv-*et5(c2Jx!@q;H(Un~9w4-R?o zC8sN{OHj?km}uaXN3*omY8?=BvdCnRt(CrZF^x}2FxZe6vW(fLEo{R;v`(9aAp?*( zmBzouOF0Lc#6H=%7t6&1ugS@zSfE z9R*|h0gAI67GJL$#PUqfc{*}NVjNzo->Ynr^k|Dl%MJO5~n?gHA|0a>~X0-7URl0?4L$}^Zh=4=s$JXs~T zz$RLje=j2JCT~lSoIFw48ZC7}0IeP?lGZ|UMZ}`kIxS^b`PyS`fAzG5#rtRp*&-G* zA&WEEt;j@o!^9!yWzzsqdJCZ{%mxrLIL-8GU6$K1B^NYz@I8XN^HmeLo_`6@q6viI z`xzJdAM^3YN6!4CNwY=A6-3{phMh@qME?z!W(ab!^`IoM4KGf#C{A={?2!57f6SCa z2|wvxW9{L$A@GWcr_7v)Ug@`gI6z5KpVcjA6+0{&%tlogpZG|l$`+l{lr4}0NA13C zsWeBjZVF|_2a=b8uQxEWN*Yf5GNd=;si ze>MA@?<>AtI30%zD@S^ALX|;M085oF8EM7}N#_}poi=;B-)m+8(xoO#302K%7F_`^ zy=pUG=LM1AG1k{ID5#D6h}i`a5IOh+U$YtVQ$!8N zN0Oq@w~|lM_i;<4mzeEOkk|6Gj`o_xRTQHcV`I78Bjycnu+^f}`W%RdZC|)0j-ipS zip|*yGR!@oAy&}aL{g*QXH$6JN#lyl!QU#xx6<%KddL2?Y~7YpEiXt;O~4pV`k`Gh z-;G2oP7HYn)o;9{B7Pel8LO6gl4q`+wBUN?c90up(Cs?7F3pag7r85sao4$gsEu%) zAW?-GC}0sYasc0DT|=fLJmNrS3!c|K$7mNpN5>`&7lj&Nrgji9O>Ywa^b;!Nz0I0*a6%QLZ(t!~g<27HKeEHX-{@MboZb2bK zHLGT1`J5^Dwo))PnsnL&iQ^1k=d-Yod{MT(3jh9c!IiJSk=ICos2+1J;77dicm0)+ zrWLjJa!IcCZ^U9G;ouhVidNNr;3JTb-()?WYFO72){CI#5IrU$ z3?0)zC8(-1ESwZ6=a;`?haPu@<3F(qN6rMiHf*;oVz`eNJc&+DoRv8UJIRqnZHE>x zBQPJG>`jbOg3cMocx~V^7ZFQc?IiOxw-L$qW~#4oyX~aT`f!Xp)x~>--!+0^&)yPz!&Xq{zwOtQJJnlM7XiZUFEzNj4?% z9wPbrEo3k0Bv5BtH9668F`|!@#MytjHcOtmV}7j>Iu+Y+)825z$gh~fa4!Fxmt_i< zjgoi>t^vjh;%ziqO^PzsW;tSGb?4)&+zlr$!0OwxL6bq&&Yx0;e^_Qj*vm>PU;JQo` z!$dS+NbJH<49&#Hn=g|;*X)XlOXR_|FcX4oX|Yzhvi?VB=al4#Vi17Flq>%shvGx2 zktA@+dg_d8uIP9T{&pD>Hh(z(s*LA2iK>R`+sVT8$;KeQa0&t;=(0zDcHs&uuVxP3U7_wnUDigBZI?76~z zJhQTxjA=Slg1Af>yI*8xl@${$x_OBbr^{tdq8p_Z8$)tZZLD1*-jOQbJuEHCeW(Ad zZZF=`_0a}X^A8MIaQ{J7!M}MjV&6;6;(wD9N6K>~Z)F#h8@tawpdF`KtYN;mm*S5= z$4>g}nH#6%JsQ%?XqoY*Jr+45%mrnP+%Y*4rWxGq#7Xq5vy{^cPUndDK=}#8kgbTF#0*5tvhjv>e z(={i?y-|8d9DY^p;ESxxsnb7LDZDR}dCPp$4AFGaKg7ffAP(EG-61-mZBtOnInf?`ej{Q#8sbZq-AE> z$7{zK3lt6F+ehn8t>In2cfx_>Vr5jAX4V&KjTp zt9X-{RDJ(V!|kW{TSD3VV>PU^tS?xNNe%NL z0dd@SyE2+(NV>08dkjtKPgf%vsP}l@^w`q!;K|7(=(Pk=q8)t>q~ba`8^|KQ7GP(l ziZM~wNqSgEf_E)`;d`{dx#3HiP!*bk&0@*_r~mvOzBG%%?(_4FOilA9HAFU{0kg30 zpT8F8^Z|rT8r_T3()Wj4c#eAf=@*m>Jz}j90eBnK&t5%ynJF=FPI~I4+$o=ClTJpp zxzZlCO)zU0Y|aJkz_<5agAl%gvUU54enz?nwxu@$1Hf%j_lM>JUWf;PG}LDjTf_FpJ>eMv&~}f~L>lsXq5m zdZ|_5e{4CfUBsu@m82>n6o-FBL}=;(0XKzW#EqCeA#!aIWg6vEM zNm+V;67?)3eiYFaEePAo{LT@e08KCsvThF#!K++leJGCCzw(F-G4e-Aecoc^ltnpW&+in$`-VC>P=QA`>^V;%Oq(pCDQPHZJc_; z_&9-TVpd}=&#Ou>Jv4-R+I;Cd&l&>oJ6WIt%hBG{RIG9hwhYOb zPlsd&&M>Y;san?d#P7cD6@ zs;Wy0sBJn5cuLPX(OTYO7Wpv-7c@9d)VSsin+@)EqFCqz**A}o2b@S@QIsQSz&+JC zv9`8lH}K_ejeeU!b1a5GduR?`b4s{hLY;azi$Tql7($&Pp^T*LQkjjO2%sLj&nACMFJ`>@T0xRrn*@F$Yhf!-ODhuS z$H9?T9nHg0QtMAJiO+qCA}1HkN{h-=QXiFz*NG9XarJZ62jqk! zeJkj?TG*i88TizEs{66APWdXNTjNriTKVFYNURNM*NMasLeuf4dTKj$9HKa)qqin6 zs=>YMn8p^D{zF^(N&N4}YK40^%#Bo!S z-Ugbz=IEEO5}9n;GKI(4*srlIu_mHkT}VkjyD^r0_IS2j8>RM)XjmTwJKidH2$Rn9 z|KyLzx~GaIMr7tF` zb^yKM_Ok2t+mTq3KL42GK7`oXA}Tn$nmHD8?c&9J{k@T;#hhYSrKAy0iO1n4%*=#| zuTa7Dd0#wzG}ZnwB^b|vgUAEK4^G@%H%;~76fYV04Y4^2FFZk2fk3OaSJ&!y!aa)m ztRo&SA%Z=xE?~}4uFA=%!Lt>~xKsb|EZ&fFNTeqWJLBX+c1ZW>V}104oX_NNlb*x{ zFNPf+UZJYZJ|%2-h1Q2v-G_JO>3{Q!VEUu;&9OQl5?M??wia5Ake>64=4VRu#=Hm! zN_~32pW#NDj&DekUYiO&8lkEZC-j#&w%*MomslWLu%UeY-n~QwS{A{~Q!Relgk^C$ADcpyE#U)ZHEJGJohl&i_ju(i@B%4c)_(YN&g zKf-#-x`bh09;h6xs{M}qKY5&D3a*>k%t6NrZz**`@;Bc9pzTeC{*5aYNXoJM8^JlZ z0#JEOYMwSZN=*A58#!u06M>rUE>;VH+3u|SiQPIPbfqW9|0*uplOOOe-z$uMOen@X zic3;cz8QXq4ht#J`yc@lJU+3FwKn3na#b7sN8gRU)6x~n5RJTb6}%24pL^LVfE~ML zH;MuiN|&nKk8`E{l3>gfiJa*Ig+D77^je|aRu7wd-}gtnlx3CGQ;tBL5XCM_51O+z zF9?m#R`ZWNN*h8f(;+Y#xNmf6hdx=p6@<~o4Mm;SG1K$v9c?^csEwFjN`QDogM#Nj zul*M7sFi0#vA&PGzfy`{xff#w7{%M)HTo8#lq^7_U`R1uTEo7Gf~TK`Lu`WO3x9fo zplg;Q0`E1BX=YXQnb~EJ+17kVGsabOi_hMSiX};LzORG)j3F2FGv^qh^5kbVqLS2g zw&wl(%bE4jHzaVzTr&8OZEwF6)qQIlG(ky_<>n=s@I-1@zP9ou@yPfjQ#g8jcm6XZ z%w6^A_jBLyj&6`>rfHSOguOW-sWur(iRL#M$E-*Yr?PJ>+8^*oe@SsI0`s!gw)}3n zzV2(C4^7fN5Iyq3tEcm>55@-K!Ei6$ujjU%1g0fX>mvV66qfI@12DDZg9^kAvO_pb zYqyR7m-zD##bOKrm+Pa1m(ESx)1LRjuCJ25s+&c2#mX^oJYwuR=}l@{jB_uIyGzN$(nm_z2gV z^n4_x;gj(>TQ7X7)`ICririHBpdDzBXtjdNq79n?zI@|z@)OKKbXxl!WSs-l+h)Q9 z$aUWOBSO|ly2WX=(ZEH4K1@lYVto~+g#Q1%eNUi=_(NJ6bPmQR_xB|@&K|w801^AY z!IfZ)B)H*#?iRl19$3mN@4d&G_y*zZ>dDMaVOu65#F9or{AryADqAX#7YcymkLv2SkdzCr zt=~`xc3JmCJca^r*r)npo%BE87cN$2j<%7=wRv;#JN8s%$eikX$udlfM*TN7VR1$SgiYDpe?HQe@D~jD((r!ImU3yFJ(+d}!v?$!pe7ZYCR@Gz$eAu3A+x3$|ej*l8X+tW%m?rC^HF z#AacFNDCM1&l=l=j5QTL#vet|1j$zwffXW6!}L2dwx2~i%eObp7gAInzXzDAqG&*- z4w9c6LPRC897+wP)=r#Yyixd{w{QPm>H0Cgi^}euhfqMi>oW$6f(PFckZv;G2PTpx)8fV7Dw!0# zSSN<}+>F(XwE?&%@dBEGK7MuWbMllm^$$@%Z-ia*t@ds7gm2!9zN~u>Ytwa;ljeZg zXT?tIod)DFK7Haf|B_;bjn(3tM1LS4sIhtH|KdkoABr zpQ%=!mKym(_^;G&b#j@0Z}2C%PpzESkV4;DOfD-@69rUbRLzEj?Vs8bn`)e~eenYO zo-liBKyEjTAylKb1(j|ZvE)G*S-n%1j)GAiz$2GLxxtq@O2H0_o+Ot%#Ak~G=mYgX zkPM|MQ6+sal^X@QWV=5h7X$lK2PtP@CiOpxhjSIJer`o>K{H(un#&?v)@h*NpAn~6 zz_9M}%dx6E!`)bHX=zz7`+)1Jj{tR%ILpZ(eo-0-C{hxEr0k`8LX#6RND#`zx?rJz* zR3Iz*PcJ~?DenLVEyqL+v;%BH7Jpxztv3G+__#*2`{W!NX*buGl>|S4%BlQXCOtk< z)fU`qyjpRoZ`b?TRaLdp0aCMa7}*nv{mm{_iUU}tqyyqZd9b~}R#I2SLa;QV5+7;! zW&f$SRm0V+1s4=ofcoy7+S`{* zSDPFEN~y0B^N=66O*Q(Po1mnBgO1>4W#~~h4WPl^=4Ou-Q-Sqpca;HaDyh=sIY=*e?Yp&|{ouTz?NlBU4Joc&H)GAQYVi~`6qh39Hw$eqgNA^8_>Z!C9~9K#kZ z*^-bm_ekJg6%=p~bRWU+L$r{|Uc8E1HueoSU?vsvQTQ234QCkl|G9?$pJ;vR(w)(u;$zNil^S; z+gol<@G%)=OahBB9n*UMzY&=D>wqwn0nh|PKR!GD2B7y^;=R@!zcM$P!_ppWgg?D8GdQ{;Ibo(oV$VaF^>al z+?}45)+hyPiC9oLw{~te?+{pWW$1dZ6__!19MoeN!dbLE?A+E z)08lW?6jfIeKU;=S1mxF6r#usc&JGQ*rm;PG;4LT`Q=L2xx|ZWGqiH}xZ>ogytI*dxdo8E@C`8%exV|g;6{hJKIyvle?Xn6j?~SKEKVsB5@hGY)WQCIQ z^k<_$7n=7@mr_#O-@;t0snr3K$E?>PvJ<37H(liI+`;@vJ0EPyvjYQp5iU+60pl$U z+XBlK?y8b}PnS$6-T+?{v zOlsU!Ax~ms^!a|T77Da&C&V!3DvFBg+hbC#X>nYd@(YZ#^z;R1U&h9eW!gKJWMEX| z!Eq?%^%t-gIGRuk-w9Jm*$n9Pb-Ksvy{oEUcdJfZ+%@CyY2sBe{8m%9dCZNR_$}Up z#|iQTpVqCJEbL0@oP*n*mj{i`75O4Qu;;kZir6QRys%f%SBnr=p@#%$F~0Zm2h2 z$BBUJ9RLf^2BR%{8D<72C}Jq4CFi?CnvS!1gY9A#1u8_ilceZUi*c>1enj%6oGttJ z3tq&?a;h${eEpJhem2ze?M*kE>!|?1`~1@?J!3&F9AG}d^+>|Fdw=piy#^(92~6GD zb6`GkfQ*hVf34{R@rk(Q97}d#b}&roTA`7Vv4aU`I=ZYE*0g52>tJ|OziN9xlR{$a zC9kiWUdtf1)!TF4i_*odZmrUme0{LG-0QMpvG%b`)E+3T_!}xJ6zCjN98LAzr=l(| z5Vu}NeIA{?G}ExRDzI87C+e)*cD*)DMRf1%*75YfCDDul7(|!)EVkP>|4H{C`L+wL zOak9XuHR0tS-U0KiW@I?cygJ5h~|QVnYp>PaNzgvx8u5#FbU7!%a9K5lSaZw5N+y$ zCFqquJV7xn6sAui)@khq0gSW)0D^8r=NsqRG_x}09u?Y#_11eQJrzsw`b>id-w^x} zagmZB3j7`X2_Uh8n1^1Cs;KbxWHV72)p(RCE896XX_vw z=sY#lT0OEbku%k7!UXn=Pm)<)A_q&D1gOsT_9~T3JVO&ul=v+Xfx?GC?P!RKmi;CCaYfewIT? zTDq33grxM2Vk?ZsQT+VoVJ1XWO06u0j+@I@oYb0!Qqva_@>+ABc=+9iKPc z6%UjxzZK=Q;RVwqEUVs?PsKmYggrc_)A{-(9@0zNw*{SmeE`)y`o}cMM+$ACi1HQr``CB-H z*T?5ZDvn=Zh`njH3vG0lThk{q^lk(+vau<{G=!xwb_M*waLiIwFwnf zN?GkFrH{Nh;w@vQHU!P{aumHVw8q6|56Ma(7LEh)dbWgZF)`a(K~q5ed8F7)y64+A zUAm=*YEYQCg~RN+huc$7&U`;##1{RGA}QIkX|BT8p)f>u*HOO$Vh*v6CHoO*5@97O zeRMFCoIOUJzX~)BJN#HZU(fz;s&i`_n zcC>3}c|E;yl5TU6C$kh48D*aRq<)lQTT(iMMTXL>4AK_y$|3jt1%C2rQ&%r5``gy;Xy%NfnJVw9(04T zqP&aD6pzOfTg*WZZML?yw!}rGdq!@jSYskzR7{Zhg&kV<8_c8yjFJ24wdL<0o}j{5 z0q7cJ#uwm@V%$C8EjANIFrCK{Px z#~ZT&6MP(%jjlPsAck4Tp%C3;7izDMA`@js-rXLM6Hq4drUba;5Zu|1RcnJ= z;6Ao-{W-5hF5S(0L>4@_6ES6g9`vNj5X5DMhv`_zm`Cb*r(9{W8;5-7??D;?F=Mo7 zS7+RenmAX@l_9&$0Ocd{koabZ?3%)b2yV-q>G3O))hLavV zTD#r@j2xQw>^DL6NTDX^0@&5dWW(Hh&MUHoS;oCBhF$6V;zWZ9^oc7xOm&v&&MIxj zS+>irYU!TBuc(@MWPOTIb){VrSKGYjC60R$ROw>Tsx#19_l2EIkUB=BX;=L?t>8nI zI+#|FqxW_xI?-e6hfN1-&pnK67z^cu&x)#B1E2~Wqg5__f=o|uFjPP;H!FG|tcJn& zhq{}II0tL+;l#d7Wh%Fn`8E5dNw?x50LDiATWG``#r;PKiU^8M_}8e?7VLlUdn6$; zH&9e0M3PKVv=Jeld-+fR*Iq<{=PtCE>|3W^VpK{PIpj|qndSM;gvr2hyf&vqANDlO zYy}$MXcmvdyab9X*{J6N&!uKQA&E#kLC`exl<0sKjG0P(Ce^NOGj=3UXL|4N$9lwTFi6nx**bP%Y6vy>s2zibbpJ<$eUSkks-o*)f<82QMX{*Om zkZ%#3qPrnY$ub9xhjoWE0hdui!GvT52oy8EkP2m=Yl{ryR#}$$#AHV-eJpkdf~_b* z^0P|VM+WV??9!R~q2)g%K#o_hPYrBF;)Fe#cqDa$=@SZmPnY5t24q?mV?j#D1^w0-{wpWQ%V z91ORU*JgZmFcGX(j-=j_zq;fZ!GMwP$w?4VKEVQcL*|F_hh%N`j0&2CxhsgIyj5)bZGS^skP!{|Z@xb01kq6_0Tdb_eLmd`)qf|Fqv) z(li;hkTr2CF=Ub%#cWz|Ppe0c)vQ-XND74$sbgnjYlVn&3P84aZ;PWXE$avTiHqJ+ z(lJ`Ta$?kN?V zgeWGsd@6o4@mquE(oF2HBwh^dqL`54J$QA~<2)M|Sfks9wUnn~48oH-da1EQErIs% zM}8sNDVFVpk!7E*W-S^*dVg@3GMsoCO&w!4Fikl6Uy=U&hb?)P{Bd1wBjHABSokNV zAGfw@K=PIPN`Z?$nHQAo4l{~D5}c@apqRT_Wj4=vf;n{lYOAJ&d1@n11~=Pp?IZ)a z+J4UiA&s(GOU!v4`WEw2G&cpLEe=cLSk61{?QAMtvY)6oj{7L9*a=-tnwB5C zoNBUL4A!-?3EsVnCBIWiFMWEwWpbX7euu$~VjXa~;e|=*8_bgwcvk@MR>cgNXd$k= zPAzEGxG<+qN4(vJ^W=z#A(yVj3kfx-qt1ACQF$MU_VWCM60@`=9H$obW2gaSR;crwi z4g|^F|JZULS^xK?Y0SgI<^k@fPGG1g`Qshc-j?IiRnR+jq|OL!ZJj-gCEq5Kwk~s=KrK*QhK|wf7V07{0BJe@_@)kH zPaH?NlNIqv-Z%;}!c33o@L>C|^B&w5ee;o*EWlyOFi?Ts(m ztBIcQN}MPLWclZD|NA;moA^b`q%cOW5BPE}6{0$to14FCZ3CvPEFeuMg->e0^~EDZ zYmwiBvaa-m(uGlD$&cgbEl8!cO#M4y#GJmRrvx->T>cTH@vrKArW zXhUQ0Ngp-dUiOTrB(C>fqd`&mjYA(b{ae^u{axMmwfeOzc8qCm5ZQC*F=pPw4{1Xa z2T0Y}&z-wb*Ya)Y!$7LPOl?}P@Y-*Z5cq^}1?d>iPP4-aJ5=&9B1N2i?xLT?rBfE> z=H@Oh=r6e@tSnszVaG2*=wC>b^fBgV04ETH4D3OzSB7uJ1#KnuY3R7FL4A)Tb8wr? zyOu?nGBGlgNFAqJN80uQB1D`(#>O7^pSad=w zJxmw$kyo3U+4Ac5dD^Wm-Wt_gXt7N`GIz4|m3%)-KsIVkkE&yR!oT6|z@zDiF#Yra zQ%u3}*TF}QgA0rX=Q+mjD}MXaRX3AXi5XtO!q09%+X3?=D zdIfQH$CwfN>)#F}A#;G|ai~z>MR{A170KU}W_&{$bGqjMZWe$l?nygf#kgfCGev7W zmI`GlzK~n+%JS~fpFW)-)QrR7*qE86EI+3P@B|P9%qIw%TYQf)jk!@CGpft2_2OMZ z91j^Y2hMHco{UnGRJ_!giE6hF=dP#GR5%Ln{RkRirR~~7y$rcSbrqa7#}yOCcJbc6 zk-7?3cm1HD0p~!-No`D7^TFR5&+HvNoRH^~8mfCt*dpiPjzo|d9r;ki+T9P0_YbDW zv~jA3wHQ9;mM`ZYx3jova7C?FXH=IeqjCX(5cli7R$hz@Or9=Ud+i;z4z2U+0P^$> zZDN6DSDM?0Sw)H!4*QaCasSW+H3~yPke7v_NtLr4pY+v+X~MKVK&g?%3)IJzIXq?n zydcx#N{FriR4@OHK_ZhrhCdCuDMKVY*o{hzB8zgXEWA5^)yCAK6-XL|v)7+y2k-{) zl$=_XAd5B*%rD8#<_wOAhzD#%@wx&AQ_@DpKv~%CC%N@4UCG@KhR!J^eI67geYO}f zV7vOHJtDa#&Pbk;lWlo^2-~;P#^uF~y7x>DkOj@Swx`hD>It%d7O6Oi?v8^Go`&c) zl1X~+_*!JJ1b%-`6oJ#u!tkKcvd^Ewqx!mS7rZACPO4T(wB2HGg~92d*Q1drE;}9+sbih@`ukEx-n*4{ zALZw~OIwgJ==DX{nf`aO8)exZan1+PmS{VI}ESc69MlOCV`wV%Nwy547A2X?)>=1 z+d~<1{0(IV6_9wpF=Nn;f_k$pa^amj)RhZww zC7vw=xNp6C;E>iD9s&58v8&U#1qAFqh6jo7F71Cl+8`H*fEmxq_K1tolYe$+?+7%@ zujSs~d@r`d8U3*H+B|_Fq|G@x1xaN0Nw878Q39=!a_%f_CoMRY!dMQSN(|P_x*o@_ z9N!95a$8WhIs5b5)Hzz%d->b9(_>0F`D}UkFrLXt{3tsSpBpG&hpGz>J+fwIX179h z>zKVsaRioLx<#l0rSF${Zmww5-dBFi&ovah{3^=*&fB^B;^h>YgVSU&Ct`ZH_B^}5 zVA$#1qK+Y3I+6q>vFoQ2yW|JbH^ie7gSMa3os-Qcjk58SXmu_1^Rq^9w3J76T?dOO zDr9=}ubjeB#0-CP`{c!JPP_ir?t_1pveC%A$xSv`^1Qu^W6?z*+QcV5w>Y;Jy$5GS zd*)M9=wpStqeSf7XHgntu^C}&5-lF|6J9|&KMqCZLlRtBcWo}7&#)Ku`dQ`0JpKP= zxlgg9I&93j7JToZ7enEu{g!igUfY6paj;ls+;7`~-J+O^v%Q98o5=feS}AXdC9@2( z6_aT+eP{~j#iXYgPP|iaOcaL2cY9Trx`b0d?4+!Pr?d)-=GtQu|2Vwn3!WVHylxqx zrKU}d91ZrkU28t$)uWpa@|7_Pft-!?rI+ICyRsyf4QgjU(U6#($V^gq7OqjbI?9+> z0AtLwErX2T&A)SK48^ej_S^msMOi7W?>;nX+fVpgIk6ew?*ca+ij->jd>1`TMm}JhxW;z&bP`h1G-3tqUpqzTw0qHU$qo8J<>d3U77}Hxd_)h&vEHd95 z6TSB>JrYCbQSqe$6pDy({)?jRuJ?n!ho>Op9zXC?PVvhba-p!SVt8BP)}zM9(w=^6 zkK@s<8oI{dB@4GRbuB>dD{IN;sG&3T6kJdftYqesf0%%Pi&h8r+Xor6@{iPxWq zs8aG9u+?p^J+29Tq_}z~+;X%M&&_k1rB%mT{~~5tuAEQEX?t}pYpwL*u~LqJW#wA3 zf!9`-V49|||6PfjP};G!aRqk}$50E=5cJ0+*pbDK5$h22tRvWtAVSLxRXU|}+a%Kx zwTRF{94DK{zZ>0G4@p8I^wM*k>5+>gy4H4+H~uQ2HKtVF-J5-3o0@{0gKMgzBcm-g z&bo=^q(#dX4=8#_q<#L7msGm%R?lDY)LUB=V=^(>Z3ba8py|K(m_O}r5`&n{dvD+={gqMDXVBSB zCyD}@U~jlLZG4FpwYr3ZK8DONf2WxHNZ9Uu{P<0&fO@O*YxK=5$1L{KYR|7ZFu=qz zUer#B>s;d5eQq+g)uak7UvY#pAi;S91+4b0r`gt*KtVyJ1SnaR_sI=6s~&n@hA`a* zA=?>$GNi~4b0Mh^`q2=s?-=GT{S}+s!>5U2Li*;~LszJ>B_siqFH(APia}hjjR|v3 z?x?-17KYm~WL^*BDomC?B#b$G>eMM6eQzMMZ&1H%UHb^{DJfpi>Fj65hEhk-rcrET zZy~gt=PQPHoUg;lUT;V~pTDTYZvOfZCC4YR$MNwm>K}DH7SJ7=Y)PmC5Y*+B`M;ZyKK2dK6^RaIa$8M za-?N;okVVMggi0yS_EA*;FUQN1|#nCyi%s>8aYbBIqG%Q+^qI&y2&=J@p4Cbg?4Tn zrQUAO^_7dH&+=qPh4Q2JHF>pOTu)&JFB=PUP^I|Hz9HHi59&}#CWC0zhoJ)UZQcGc_3a}U>O7n@Snppn>wn&cYpCFXwrbCU zWZ>{OCcCa!j#^l>992%1SDkwO^<8g$fxJfFP++H|LUwDbLfqyf-dCALb=>a^${yeK zR@ST~E0UoqT91#w6pr0d7^`;cJ9s1r=E{aD6`M6_aV^1s?QDritImYfcE6>O3bi4-#;FXa1UuC_N1X#B>EJUGl$3kB2xM2bj2#B6H0=^i_boqtzXn^Sn;JS$qb#Q|u}5s@t8|x} zJ?!BTk^&^hLNp-~x4M$WI3PQ_U8QxtDNpe4H+Exo)Jwn4JvraUql~WWVH-rGe;|av z(xlTxL!YB@tHmwgXJ7W(e9$7pnQ1lWM(fGFb!ZI=4+)6@CXUOvx)j5!uK|IeB)f5* zcHWDU0}#D%FN{Jz9<~msHFiW?k{#XsBgH**VEaOi#m~|sL=B*7?|(J@s|@Jf~NXw z1Py2PR3t@F)BQ~MUHO=VD0TEDNl6r;&iv5v*CD<o>Q3^@FElL+pUE)5B~f2LzMCw z>TO7QPp3&~Bl7(`@wvv8SKH~XI4P-?zby8Wc_O*ZMaW45a+$+FH$(hLh%N|-zw6-* zF6iYBc?|@CDG6CC-mlX+m8dCo(vF2fift+a-Yo{D${X z8wqF{-ytmp)+v3>Zh>6vBzUF(5)**Be(H3YP>0Ohb3+gNlNSloT!{5n3Q)8yyWCT6 z=V&v*r>a^F-^+Op1n#*5jw}y5WYudgANctIH9+@q!eZjuwd2OTdhOcTys=p-acWm~ zwIJGk5n)aVwE~+Uc|cMRA((pfXlWa^ju+A1KwPEoH+lKe9?%(<%U{}@H@Gl{6pF1u5Z91FY^Jp`(@L*T5j_8_NeaCrMdl(N9_&D^6hAXSW7R2feauq z1N(}s*kIVeeaj4kx)&u2lgaUhouHc#Bq@4ul&Hr{`0BXty0J`E0NHy)Y_ook&g1b? z{q}W41XCuk^zg*f440!Ii9&>!(m_R_(fWzB>~W)tj2rm7-g_(d96c3_+C7@5NF^fT zXe+UON@JVc-+zTbS!?>r&wUH>Mf6?Rx3NujeRm+&TZx8&S<3H{gv&gMqGCDJ4E&y) zlKcpvecs48@NIN$S->bNf~~Mvn;$xbwNpHb=W}?)x7t8ac7E)|MLMb$f~@PwLbb%hptdMolD(U+SIp8eA!v)RnL99_akg^rIB*dN zL-Ut_$DaZ|9E=v&Sm}8{w7qa;7ZFhi0lsdn8ra(WCX2f>L@iDD*T{f1PsTb5z7Pk` zFh{HdAt9mj*G_6TBmalc74esqPGCnl`93H%sE{OvCZr$?Mg(Qdjl4fIW|D0{g~L6O z>o>adnV`yND&(#Ek3pxIQF5Nwp>r_i)0FQuBMjDJn%Jbt7?NTsX9I;*TI^LNTiV z@o1OOG;1%j=AfqsoJf;h{jZlfCsT_80SWrOm;7bxflEj-Ug)L%c!eb#rzU zGf&v?#IP@iYI5qLGJe)~2WmoFF-KDo#(y*p-lY;4AIVt@Hs=1`i&E zR7il?4f;KtXI=wYurL13>Jvw>@7=@RJ}EmjdO?RrjWxC}w!fe$^;QCg;24+Q56D;!cZwQ3NzPRD+99<0P#fny!4xte{srRT^PT6)9FTB3B})R% zq6A<6A&ZH6-}s3P^Z0X_o~n=jwKKnDO8?xBdQUk*(8@_yH^0z|c~Nj#q2|3rro%Kt zB8}hrjVJG8Zq(Fr*YSRPI(WqgvyH=G-s zjlhGa9iY9nkn6V(DmaQrC*9iFb+=wf5QQWHal-WfZWBm~4m^C@`|vazP(%X>f&R}u zlEKs5(6TI(emOk(rb)S{W}jrCUT#OJ_C9cT9a$+0v61EWE`uTtWD_WaOz3+l?h z>@6s|IMkBkPmOu!6&tTh9{Mu6c_Q90C<}c4&lqVRp_ZkO0egc4NXW%GXq@)7g#iRI ze|yZ&=(RgS9dayMg`ZMnvMfXg=KtqiR>CLIBf6TakF(3)Xc3f9^z_Y3pvcL|xlm(3 zKL25ren0y^V@EvWpQZ@ncd*h;JRjI?QC77_j~r11gt%|7xV!+MKaD~o(f_<-X=@$k z9A8?y1*V2q=YEmkdx!q&zYsP45t7W!;ubp)<4!>xRuP8Ju>Yl0JAC9{jO8E7K_FQ& z%oYM;^m5{B@oU^rh=+Ku|GY|W&33M?rOIUBz5m0Jb}TevXJ?IN`&1Ys^m|F)U!N2y z5Hj#E)FjnYm=yf1?sH`=L(^RI(v~?uhM*Jwa7Kk(xSzy_$1&c z|LKGNY9#R>We#?*XnOZd!jMM9mbdAxSqAg~ZOS7=&dL4x#yPOvoDigUXxW~b=m6qV z4$#BQy?w|7JF?Z&zZBLiY7FJ~CorqvJzBzPuiSZ^wpH+yC(hJcg|M>#)I5&WdKxCxb2bC4+w~&bdQ(af~wJKyTL1dsDLdnN% z*{ks)F?pY@yFM>-ESrH2A zfi30h&&0V9#W-XvgvXD^47=GX-?n&q`b0Qv-vMO%0B-^oh|hQHM-`t5_y9i0IV(n! z-UCbSI1v$egzPcDami@X>z+(y%U5>~*pfcPUM<8q4m0eBtR=5_{C~d1#n6E(dmAzl zbfPRE#K2TmUgFZqjw-#?r_`+Wly{$>l)I_2CGTeze9)p8rk)Vo5#hnx|Lds;$zUFN z!R;iJxc*Ksum?ct#_wUCBSq0*2Wg@QX!_zeHaxY{O5NuAOS}j8^*A>X!n8rH zr_)S#`Xf4V&l?Nt$C;p<2Dz!f!~Fle9f+Ah|BO!p23wvq&Oy6l3$QOepugeLFUdfT z8F=bG$)f$8mB=a7=^`|k$DgGs{}#OdHGY{?#ks-h*QmSW#%|2Y!eRj~&w0mFCs+}0 z3*{TX^NH`S{4{bKe-ynOz~8xcvvke!6mm}=vt0Q&;eH#|-lUS{u zF^>r+`vpEEf`gIDkK9eYHE&s)yqfUa9{UAA{ox2Ep>C4A4-{FCt6Zi*o_T*~$$vlg z%NL78VY_%xD0e#xn8RLy`2FvD1tlJLmV0e%Ml!S9!+wL{&*Pbx|7+(Hc&PvQsh<7Q z1>$A#TX5Z5r#c>6zJDs3n}7S>!6O-vwJ*OpA<){Fr^~lq0%C-!e?M64D6!RV>4h%; z#fMGW;8sDhtYaCFrTXM-aB#3j_6=r4;8zRkJ7&uoEe3$d@g&k7Oh_k!cNk>t*Ina!E36y9}bsAC7=?e8^c|1|GGprgzQ9W@AN zh|$g)F!<+5k*ZCr{SHZtCCsb=4Jj*#6yqTp=>YO!YQV%_d)*1r=>Pt3Br!K54IZW{ zPWY9W!x{fGLWoVltwP?aLEhuNHXjWvo|1w>5QbA7>WjWT_ge=})qr6JZpnW?=p;q= z1TFjI{wS3B86>u;;g3u1ZJ68i<;G$7jSb4c1GQPD_`WE)IR>dJNUwh{5Bfjd4uHE6 z&(6t-*l*mh9Vp}j;sXOg;!I!O}6>YPu(*}huaoT-5r zPRINIGMxV@t@r}E%9qX(QsgR|HPN^m#`LFYa4XKd z)Fb31o6o|PWw@$(>q!pPsGVZidK&(YXSkCiB|65m)sraA^BvfZ-}&C)0ZRI6>y~3t zYhxdQ9|uE<(7gXjV4MyA3?;l^`ZPNim!g@u85{JqP7-nCvU>ltc(eod18X%Fs7h88 z9=XUz%Ds*`rnz*lZmj{As=+P(UZBZtl5*ioy~bxUlhmXOHr=SCvc=ZsulNG5ClMM| zIx-91v_*yYz-azK=7Z4`a5(7?SVEQOOo>qgK6wXx-tY1D-~E{y6kr;uq)7T2yiiNw z2hnsKNM99Tl`^ewCLpyUqqUn4rk=rNNRj{@ZJ3V20OdkWYi!s1vXMK)XIN5;hs*R$ z_qm0`?Bsop9yrj1ah3mS_bQovaIiL=f}7O?bF2@yetoFa@(Ft`Sld7F(ZABR|H1;G z1CQK`Is9acB~YE*AIpHP>3ONnYBTsl3Q)9+vvYzrDYVEq27VNNm|2&;7`<>OSh=rR zW7v0BvDVLjm7pW&R^t)PM3?R6v?2?z{S3MOOE00nr7 z@3yVudp5Snx-RH*Y@QS_csC3tWnrt~JIRu`5Ve@bmk0-Tm|7h_B5`@sICX zqmLq%9Oc|5ri(H0gJGdRouBV9A5y`Y+lxC7jS{2&nsS-H@I>OF3-*IIEUV(7C<%n@ z86_re=eX1$R7lc9zSzlst^H3%oaMEvUXy zs=KqA0VEX}85t0hYjJ>a87@Wx8P}9Yjd&#Z&mX|?f2;2JBX{P=!U25g9|w!A2uq{= z!cJB;0IAxIpcg||T?5PwA}*xL*6N?2D*N?A_+h>U(>{IIxfLjoVwODFXy>yzt5kc6 zPH@<~aqiXU^f;)IO@-)%IUvgC5RPKfRJWxmK><*zsZ9A;iGru~$IQnm z(742VTON&F4mT*(j{U*Pm>e~SE0Ya4ffF8fX|x-DE7Kbw+p_5EtX+GyiU~ETA)#D* zJxN4HG2C`F3G8i*+H+z?Q_`YuBV4GD_dE-<@|b_d(J2RTWqJzlo9w1hzI|?BqCEia ze4z4lt1>M4Lf?#t)9gw8hR>`W>E|oKGKjCgQLb1T@c4e4OSi22G$-lVsoXxD{R+v5 z@%XxDZfRd@mK;GlSR_>2kRGU*Onb2&OUIrJNr0w^rYGR*dMT( z_}F;*Uvrr46kW!@AB$(TxUDlMYxUl%`MY4maJ$EVUPROZwvQb}yyxL@z5H$^0lZvivbRrR0zonJ_aF5aBRhLOeLKxXcRG{*?xQfe*ctc9 zgk2pJYRllow&=$&^EE5|mDgJ;>knL;T&jw+DwBT5qC-~~tmOs0=i(nV&dZaVuMtfq zG<~V)hHrF4pdqcv9$j2$=AJhvxcy^ApcPN9&rx&5LkztR?{qI@Xh`2&q-O2C_mW{m z)a#2AK0KkL!9xQiSbx2Um{V)|72*0;G-i}eH{T%@Bz*51$c3iJhQso_Bw{EoQ5BWj}4}`pEJo)it7}rEJlVp%$wvmK$fT zz2ig|ePu(Jt+&#wo4ZGQ32tN36Oz#zXS^NY_Zf$ShpeU(Ph$;6X9V+_n}$6v)i?T!AoHz)*_8c0@feoPi^~MNP!VXTAOqn@Cy6sL5p` zTVbCYYYSJbz2vEV{2Id(e9a=}2jfVwGdN*|`{o)MG8GrzwtOqF$vdS{se-{os9@q3 zYH}wM>9JS#m$5&nFQ^0$xF>tJ7f*6ntQt%vEL?HTz!ag>j$=;7Vz|pPyI!SMIG#U` z_Fkjs432+j(~qMN6uag8H35Q!tN`mJ%T%H0p6{>w6K~0A3f8otI^TCW@6RXIB^cxT zmE6QKd<*1>+YjA~CQHa8Yet%(T9@o&ZQ4t=v_@#z+K6O6yfck&B| z45g4@=T5?wy)(Q?heZljJpI%Ai?1cgvSD2yF?pJ63#Oxp2fz66DL5B%L zl(eqk_jy~mX}AwPk}!iia!FKN+yd%XPqi(d@m^Tei0&zMyC&-YIc7$_~Ot0mWa`+cdvo$b5Eh2qwq%Q*RAj+AZ@A1Zl|m) z4f)} z4s%}(I^}7_-&YQW>Z>qSGGIx2YTY+fIYu{>A?Eq=xZF;%jlR7QDBpeLx+CjLyyol&l4q{o5HII8uDJyk)RUK zy1HP6f7#h3DuFgc2M_s|CiwC(nwKZAqphy&*22?orj6Dm&^y3#1`X{!7z-K_NiJ!f zP4VB9BjPoV7%~VN%BQ|?tI6KAglw}MYCmz<=6pCvO?Ug^V+}gXX9tZJIjK!IE~UGR zq5a>}U?&~SL~35NQ3i-ud{YXS)TE<=TctaLT{hyF)I(E#BOwAsj1R4TkNe?{Sa*{u*w|T+`94p z6QoR2+&Xc%&%qjJx%HK@SCX7~a_DQcD*HDnW(_u?*q}3BJ9Nj@6;BhV zA!XK~tz&*5y#-f=^P!_l&IqR{^}lAl)If=yJgbVO<2sc8;Z|Sq6J^0|M?SQe`w|Zn z5k%dx;hZ*78TOdrx>hfKmYm@{Ll>?pzW-S$4eO_s592&Ls3!Z5@wrPSM+LWYLj^;Z zyfhxtXp}oUxb0f-p|m^4)wkAsm8}ah))?ZtpAkPJ-*ecd&P(9?%7mUvBF9qk8~2#G zSB4Cc8Jvvkm&|*f&9;jAEb(TgR051tkcZueN8+nHC-~Q~adyI5<{XBS8@JO91#1Sm z(QcC6TSLz)o#Ef2F2Rl(y5HtQcXye=q`@7Sp>pNcvg_mRp=wpboH-ghtxkUbx0&lF zckQK{XqFd~h@7=GnNA{o|ye>alXc&}P>+U19w^`)%QqB?6O#+mbi1EJFEq38qb2o z6e4KGXs-+2$p`-{)^rJNXf5cS1tu|1wG$=NDfv~W(+nm&!ndg4H4aR0t4n#PfR$^S zZ&0R|e4lMei*@S^)z?-BFDNHWW_MdPYLesIZ3*-uJ3f za-&J4!XAR?f#2QCy-?W+u9dn_$Mz{Rod)!qvfpT$$Io=d2diqIShK%3Ah>AtaoTE$ zkNT{{)~Og55tsLkfQqa#pQ4v!IdC9TYu4-PRVNNZ!2*i77Apx$QKh5b*kT2XNp5iN z)~^%f&R0YwSgexocipM2$t)82R(gbLcWnDq@2!TnUm>*H)c~=dzK%24RRLR}OH53! z;BO5~gSid03m8f*`HrUZ)8}Q=oO}o32iSlQibpX$sQpJw7 z;f3{~(Ft`Prgy`xF^v9OZD1@5?WlGm!&36c7Tf21u4>rzc9b|1<0yEDjKy?Fxfl{z z9P3t|fe*7ES}S%Ji+HGa;ZD8-M18?=b~Y7;X8k#64s;oLjhLX}MC9YQsfMnODDbq%3seZq0{^ZxsZsoMM6_*>YkFYfoEv&*sX9 zCufY^TD`o)#lB7;pFOR0;gUgIYP(N90rY@8s&7V__ZwA1wMxxcSppx0!>vvh<&TsV z=I$SE!|UA;9BHXbC^P20+t#PAb|q_U(V(;0S}-(U#<2GYM!5xSlV?UdQ-x($mUr1m zX+op#NpLi!34ORWN=zDHJcbMI17?)5-sPc3&Z)<-qp=P%g5KX>I&|)_hMMRXBF~G5 zP-8(m58mxJ>h+lxAue>;31MCH@|5I+g?_8M?lpFThBd{>3BI%X3+`0rB4tTPsJK^6 zTsHD9cnC~8%hyKUlIGC}{RNhfG%8wEj98HqM$RNy9@WqxmfYDOKF`7glJk7%kGz83 zDd#86O1Cp#KXCZcN_A}tV7z!JgSKus5mmi9t2q74X1F1FgSTwOSa6~C5#iPwW~bbc zU_NjZImhCp3SA6q;|n-n7jw<53;NhQRi(>j-&sgtBj%(@+vUH?K>he_LX%gWUN$)I zcmtv*%8?8b8BldHTNmglw;FJ*d5F5pc9PN}9=$7Gnoux)b=A0^32ovxB@C-Hjty#D z22=Tt7w5K8zA(HXB526Av8Ffsp3IcGL6>jW@BFm59TFdKG-`j?3se^Le*A-0{0$t- z_a?QR#(g@19<~JmqsBri5-DVyUN7p7PMW8tIOO2QXnruOiEoCv*YGSQj(4Gr6@tRN zbtm@k#u#++zYo%8@!3N<3Bl^_vL}Tw{gU){Y z1WWb)34=}+@6j@In_YV?{kKZOXNoczJL3FAh|P4w8mt=g!PFVt=^W;#ynPX!UX~zi zurmusYOniV~ZB%>0B<9gbUT;FbqOw>LjfHrh{* z7q-|ZpKTTtlcDXXT-&3H*Q3DdQ;}6uVbuc_kJdw#tB8J{M4QKo6S?fA< zLFvd+MO8yOxEDC0^>yy=b`#3<6@n6cVRF^u2yI!V7L2gb%Y|E=H0;z&XmgEsp+o$2 zTR9`)5H|Ig_q!MWRv65fvQmmHu0C@xpUK;E=xC><;o_3up@VQRg`~#9hfEwOPdu+q ztr4`Vi8{w2Xjghuj1?UYo`nRiIV|;k{blfBkG5vyoX2WqGCw=flo#o= z**=hKF;ai%(bHQ%SiJ8cJ#f;xUfdnGw%cDgk)3>@TH)1h=2WV?aqoUXWctPQq$y5Q zm$keVBo)2=Idu76|J5{~-BoQQH8b(?Qn^JlAN+8cUmpJXoY#Y8PH6akk36;8oKE}9-zq^FqcM?P6 z_Xct9oqlsZkn_y9%-bOE^u9U&Jf8&myh)fKn!e&k!aJi9u4{tZ=BUPbs=!F6UU6Y` zWFj55d}{OFw%~Rdgp;4D%p7l|w_LIvYT~7oG8GS#*rE#$Ek5Twaqj`B*|fNH3!jdH zC<xhnA?>%_lOoyuIUNqry5dwaB(%;{J|#Q8mTT_<~H^)mbvK(`;^YeUG=59_31) zieGi0I@2%~a}g43&u`jjAEll$ULDcu99FqgMT=db;*V|&>FcwC+Vqd+A}hY?3vAp~ zG2Bl?o#z`>DpIH1I{jWW(a7pv{mi6TfKsy*8-awCkr3!UqC?(7}WjNH~L?s2=G;Q!qoUuqH|iDIJyyYxO?q3|4nE zNPl75>NJ)_2|1t3XVLzU*hbfyg>!&cs9R{tM-se-Nc{!h3)Kzd5V{snQkbfLBcK(@ znSjMopnDm90EZAiS)#scJTv{YQ}xG{2TJ#H*Df(PUc5IL!DFIYAkJ`Zhcr@8{o%c~ zyQYx}ul6Zo0V1~0*H#6=$`=Z&`YomKM_5{_luJ(1%@OoGPEJl5&?d`f@MUW4_F~QO z!&No?vvCW>9us>%^1HW{I)VhrdovVn!6eNN1^jo#ebKbJWH~$WG=VQ&bHDvg(PMh@HI(v?d-?y ziu6bhmM74^)o|f?WYT4q@{iLB2wAF0Eo2sUS|*cvA*ow|n|uPKzL$s^?FLZsgfGSs z=2IxKaqgx1Ggj0igGM{CpM2crBeJ5c`?6w_IKaG&w$_&fK7anKQDUKJGhCXJZ_Q2|u8Y(O6X3xfcnvlAded|WV6uE;nqMR}u> z3qPE`dtl;E&)#b}{t>&181{`O(0pakbIV(eXw{N9Ueg0n*U|DTT7p{Y~}K)FtwaYS&i z`qBx%cxb>9NJM+~$wfyI-UgMQtZRTLK*i0la{sD-!d>_))h!eZcVt*}mUajlvRwc@ zyJa;?`U-&A@t~W~WB4pN&WYOJm?W;cvzkj*C=+-xIw0V1S(gK-c#vv{qr?onNRx?} zlC|fa!c=f@{(q}TjVsG`RrA(^dLe!yS`iEg)pL8Q(yv9vPqr+CO6z_gKjRCW%0mx#!(b%W<|twL-P<_4H2>5>2Q+2Fap05!GBk1bvfX-u0AidACRKy zEfQEC^GIRi;P}~^5Xh`(Xway?n0y|{{zA`fdTosjsN+$plw59Q)qqkjc0(vt<_BGe zwq+o*krN{{pZcHPFALfPxf^>cvod^B83s}fp!=O=l}nCNx`GBue~LAwQPF} zIf{#&epU>%$HhV&a~r7ERXSYz+U)QKEG$y-4fyyw$!o*+<&@c;-;R>yh>ztTt48_| z0>dXyIAGhC+xA@QK`%ogNQGRscmKEjJY!LcRLPRj)3PcO;>oBf*sC;f#MuL(+{BTGrz=c$&0YV^w> zPHMqBVMfqx#IiC;RQ>w@a+wv5f|!4NEFC8P#z#-mvGwI?$2A6c21D({|6}Z{qpIB2er>@b z6c81VloS?9NGc$$gmf$#q(P*lMCtC35DDq-mRPhPNT)Q?h&0H3Ufkz==iK}KarYQ| zj2-N~7H`b?%wHnJ>eeD#vZPQ}n2%%_fq8b(ojSN9{~ynzm;va5dh&HdKwvH75X}xt zN`KD5<2T!6VfRkH!Kv$Gh#n>uwpjg+C~{!9!dYEi>D!6w%ONbTKpS%@x>Pgx7G;>X zc<{i4bMvp402KUxyn25+CtQx1Ds5ImAbdzXT{b0Iw>1RuQtW}0XLc@-KW!r~*J}XY z`c~M`vfAjnVjsOgv$`V$&L48PlgATod_&iXc|N1XZ^FJ3E zFaAw@VvI4#olm9<{i4$t?*+H~sGz2kvfb5~bVp8Cd{ghkCw-PXObiOy~P`ic7iovrp6W;ln z^JzJniT}^<&`GC4W5t?3!>`;rZjNzO+KhI(b$)6PK{$t%vB-E{t;<|`Ne2c4Sr71E z=cw13*-b1D`>z2LNPc$tNbfcLo#K2a0DaL?{39oai)fgDi6@;q{ve1wa0&8opwxk% z1ImWiZg-P$N4-=lR46i=5z#qBQ-XD&Ig%t@A9z9+y;0u)Q|!4`NxeW{TR;LO5(!#PK~@f-9Uq>^Eq4#*FsdxpIfDE47p5^lawN2p>sG;6PJeHK}Sc|Q{&{I z%;HkxSY{cz49kY^80D{4gM-352FuCKGqo7O6iM9>$(XI={kS2NujDA-S)FBn;cyPm>Rt=-y^X`pa#)rg~ zGd>)q3=;0HNCIgGCV8DgWnj}{M9cHDNla4q<4eO-u3c23prUp z&a8yX+h;4NUo!pqV0p8cYSy&r0)^%$FPB`sR*2PEKMjg&NaDUC3m(x3(woQ@J ziiMNIO($}=VGQA@aIt}NW-jqKN2UIq!rI&818H9!3_npRW~6UL*Rj4@76K8Qeup+H zu=}XcSh$R=uvP^a;;Dv(nxW!RKjpZyp4V;z+cjeQ<#~tE{A=VHl=N4AkD1MB%(5;V z=vuX;)DN_K)$!*;CCb4UwUifqtB`29F{=(9Xpx%To?iy&>v)#|s zaIl#DteI4nmGLZtUPn7}qmZ(WB4Vv1ZmW zMPW}GM1ZFz&hp6_TOd!L$O`W@8I(*6VGs>w4PLzWCvakfG}nG9AVS1o4th()xAup1 zOjg&@simi>CshAeGOJ7{ZbPsAl5Ig zRj;(Xyrt%I?x~oaV}@LA>R5l4^=BQp*}Cl&*9%(~Q1x3;rsVHM{PN-Z(;g^R=08Fm zG_}dEVwQhCKKA*Xt!y``7yM4qC3g zuPF6VN~L;*0IaJo8!!x*F;;FnBB(Bq5TOVz^A&0n5=pZa?R;NAZGT7wBt0Uz65jOCER+LSZpM6vFgDsH>INLz z+cF@Cc!2gD;sOk#(`oqNgIsz=1!qmET$Egvk_;k^g5kED*YMZA>YAwk{?}qFb*t1S z%*$=SZ|jDdvTWirRZ6|xno>l&6JBBmJPRf;&nW^f5hl}nF_vfv>l`Gupz*UpglzD} zqTSQ84ap`$H;Qdns_ZxAz$)vkYQ1Q7d^8aMVh9*yHc@$4G4j zhcN=9{zyo#!k6aEv~xKK0LaKt#3;*s*ve<0o=Uy~_7TefH?MJr-HnWO4BO0(#^ZwtNQT57k>GTZOM%z+$) zHeJ?XHx}eS*v(316lswhe)|d7rfnUY8da39dqjyN2*CEQ0XTsMa3X5mD)pm!*$lEV zgwpB9y|gDTT=lzpx5LzuUw;5uK1Wd8+*TcnQ(G<=LHUWCsfJG5?ux$e`y0=B_kqwQ z*6oWSD2#zcLh{ncJB;abOxE^3L!Z&zC*7^T9&08SEf4ldSMd#>nu&m+_{fNVp9% zF2d@+d{55$l>v|MdCd~dN_~S4XJqI5?QG}edt@Tj=OH@ zE|csZdcEQIC)LSCR>K=x9iNF2aoCnln$~SpuEp} z2=j1lpuTI{3@w>Q6Ba5dsSNo$Zy#BcAuA(&+o139h2iq61NTxWipEPP@I ztpbhHTE>~uHQ!c0gb8}^9hG&oku8`@J8t#aAzC;X8wo6LeG|3yWSIKm8 z&yQxueYvPg8W+vwPu63`tbYhhusNL=twtZLxId_7JC~|#jywPv%u=cD&(?8TpA?20 z{n#UfyZOUI>(B5#y(JlQ#hDOFtsJ+o^I@URcF+@SHBe!Es&jjIUm#zlZ5_3(@o7=07hggS7Z(=qHi z-F3bT`#qsm4I;1A@qjUSjX)W?|M86HZj!n^)GMUj66ICsLSk+=cFFn`}C z)!fajA4cCd3K$X~%j;=d@sFnkVa>gAA?x(2$B5NYIKG{b-Sl#NU+YNUtne=0fh?0ag6S?mG zaGq}dWVpt`N`3i)uy}ru5?x~P+e-2F2VFdrM9gTp&=oFR*8Qc|`f6E!e`%`X7i-Fs zcj*XkWdBk4qE6kn+epYG(x5ZxkgHB~gO-3C=Ve!+m(3r!TokA9YPYi@4J%0hC(l!> z1+(S6D17n&dpGbXib)!wOul%XE3X7T2Zx-<-%fXwY95_x{e9JCe|bo9YAccbkE264 z{HPj@Ebl!mJ;(S`^Y5$2lb^pv7=)e{%4zS)6b+&Z&&`26O^mUfn(_)CG3sDKn;f*Q z&v{eNP&YgX_{7m@l6eiT?Pr}G6Yc&Af-e)jDqpJUubJGAPvyF3Z0WWn#6!lIb(t1F zc1!8a_i-Y|F9qjWzs>GQR@QdUkjBR>HSd3fc!p7UHG3Z`?7FBg7_7mt6>bD`Otg#+ zEcR=)T@^v=z^09HMaXo-*IIOE>+tFu)B)k9Khoi&EFs8vYolY-&$<8S=dokoq-I#h zzQCP1Tzmz0=56DvBcQ%B?NG-?|)S#_8DN|&@NFn1{Ksdre~F7u~F_pmh-&a`+EKO z`$|DSJM$xMHZoF0z;BcLn~W6*tPagdt1riDO9t%<>3Xn23-fW+We?bI>i1)El~~p~ z^Vyxl8lw7QY>o|W2+KX2KHubTLVxcZ2r3rVl=-ZTI_R}xF5BXg8T&%W=x95yB#b%$ ziF<`5BTk_5`|~5KO^g}+gYT#qZ3Ebjyze0oyf*0dwm%RS;*|WmxKpSkdE-WS*q+nX-BLuL1Fv|xLTNr>AvG%r zWx`{+@{0qi$BJsVFXDtsL^7+cKKtu1{2HD~#qv1||N1kCUFe>sX{FS;r#~WWcn?RC zf;eBPQk3+O{yUpmu^z=rP0?%xk=4HYxU!l3!GDI2HylQ${o{6{V67f1aPm|+uj2sH z!-X`FKB0If&io(jN$?Wyj*zX5g3d*MJr-GrCwc9y_in>OcH!RHs#IZ+10@x)&st7< zzw%1O8-5zyJ-y7POG})c#$~r)RXmJL4YyX!{&WC>t5Z9-0x|*6!DQH8KXQqC0lbh5^u* zPS4csZt->^J8laaINn;W)UmnY19N75^Rayw)SiQO4TVw&~by9X8-qP$^lW5=$pYd9CDkPNZ3)Z}{pA#Htxhda=cn znkjbOp4Rk3aQ-CN@J#4}O<`s)UxnNhH6wZIuR(Z4b&p$q*sC-%qKRsD&U*TX*u+&z zV2p~XKB-Y-UkMBjA(z1qRYNbbo87-o8uM9vfB3mYm;Idm3U9If{Rj_Zzt7p+X{a-- zTryKcT~-++<_kFdVC)4pfm9#StIEzVYM0 zrHwO2`3{WLNaTVURzUdyI(WmtX#x;MF;O+28}!%d{7FqhXNNnqI_}%|?KvxO-!I?| zPT7cJ=K~vp_sG?0HpZ$3fm?D*@D!?EI{(?g&~~+txt&woT(Ed&`B!+BYSG=KZ*k83 zX}7#hJGj1o>!O&h^BTIQ-JkZ!rH(H|7|GmFJzCDaFuS}>#;L2q?|c5?>hD710dwRS z+K*b2g!{wvb$sv#WnWhXA3Fd~&Qh339*Zk~*(V-ppRbWUY%Q!Un?grtVKqanxj43 zLWQeGS>03SZyDv0*Xx>&%?86~3?u6ZtgczA7ZU_LV7=LyI8N&t`a5wio7aK@b=bj= z$;G7Zoy7|s1z%$1zw~Ogzf7nM2TVD3IA3s>vjI`<xG@{a9bT3Zq-Ed)+ZnA`pC`v{qyrs zpKyBd^r@6&@o;+jv%l*xzQ~yqN1`K9h`Xtw)6cj26@KAh%=IDu7J8B_^+64eH2l*R z8h$%pBAWwR>)m~mwURt!Q;Z0(uVIG-{lGQsDNFB&e;~rQH}j(0anEOrvjOd0($!GG z>uX}Y@_~|tUOD2vgZfeDaulYY(D1D1IAJa#_Xc+I?Ls#QP$sl{K1(pOq4b<8;ogG+ z8u8N5`wl8p4L2>f$!AiRs8u98O|=>T08tvdU1;a{re9p9nBaX|tYAs5wV-=Y;_#a9 zh0OC67=c82du~ZhVv-l;L6Rk96J7daI=2u+O277_^_@at|D&u8kYiWfyuEOgmAOK; za+Hq&p>~N~HNW)M%EC0F8E|~0Pkx%dH!XL+qxHMNEj|udAw}<>{phQ_V)dXNfCaYg zlWW?sY{2^n;lF=z==5uCR)AL4+{Pbe?CA24@hxM3k@&FE(V+K>QkUZxLMh>!7{(me zVcVxo4l+Nx1E>0+foO}|%y-~o+u)eQgL|5@>H}!nX%(f4&s!X$g6_Cex9>i0DpOkj zr@eC7M^UvuOPx7?wkSW$Mq{})>h=$-Se_N^@JB>9!;{;B6mq^Rt*GktCWbQ5?Wq)_ zuMcf+Ja3o5BDZh-H~e#5_lIe?58G|5lHMom6~S3|;a9ElCZ}u??f!Ao`m2|b$f^a{ zDb$3F^9o*wwE5tw2cuACPR4@KxpNH^pOp z6D0t+ZijarS>`{O+Je+IqFap-Y?dGmv>QXs#r6j?X|mR@x`{JXJE<*flqRqAuGjX+QiAH*a!}7{zSJkK`Y%! zqE7=OsCr`FzCHD`e_rd~p{)P*x@nvZF*^QpggDUYUVQco$$^Bma1w4CrPWEEJ!-Uw zU-VsYjmO~Um{)H_AdGHxL_)${c~)YF-se#)Pav5lX*;lui5AT(UDaO9E z`QJycG2+pjKiiP9C&;7+(H$9wN9MIh`bO4gz~=`ZCa+vA54&mzt>M||E$qp6AhTaA zw|@ng3R5Yr6ok%0bnO3pON8nULt=ywgQc6Xg+*T58_e3>Ts3N_idMxg%M80h{77Zo zZu+X2gv1MKcL)<=+Su4Y~8~Da0j`p|oelK(vfbw7u;;$G3 zTTWtu;7eIn_FClQie*TvXAERfBjJRX-+e39F*lRbx0{PL=(WG(U^mS(xAb46@qZcx z9Z&0CA$Ldvh7nl{#6(4dWRiqi!Hp(Im{kdp%tzqgLrg=g4xJ`qqI3+c*xtZOeqv3! z#co0itg>7^T>YAS9*Q+2n}FJ_tl4zqjZ4DAH+lY5C5Km)^E6#wFZDlzc`cRjBEfw% zB~bPyXAoZ@;gUl;tgxL_@wj}9(VN+XUS?1?{5wtgan*%UvP7q72r2;w+uyrix(Pju zka1f@iNqLOAEa&dWesQ!B!I0 z;oE&2*PNHv68R7R@sH26aX_&)SlxjyJkWME!Z-51>VMgZtD=G3ZfD4?q9`^Vh@g1t zL;I5pBDTVx>;oU8x;o2OPL9rxoPct-S!ic_QBTmtOF<#DGZ|>wi}ZR1xUDgCY<`y~|^ukrJ=ygpiB-8Z}uo#b3XeZ1~Jq>3_XN4BZR8 zS;{XcC@JM5UO+N-vc~Zr+1icw0PBf*1d^uGtZYIE>&f-x#Lw+d7(Gmpi0tLd=Ps!HAI!Ie=jN)I zk;j9}?mx}s<5;XN`kk?K5bZx4#r8NPBxDBZ?I6z6*N(6|f{Zfxp%$df5PNH3;UyAi z0Dq+Ms>#UO4aD=p9#&!$LK+Iiy2`B#8~XrdO2|?pU-^KO0iC_K(M&cA>0$H@*1+`j zWXLmE*U-Xim*Mx;T{tL@HWkt>*D_yijbHd@Y5Z%`|M{)Dw4!7MS^{JLSC{?+x zk>q=D{+CY&a0BdG0PD9lo0cg%lEO`04k60n>29p`gSE_HafbavK6(-!>o)jPZm$Kv z>5Nu;NW>?~MUvQ%m1eZneE2iWyJNk0UYNXC&Sq!E}B^EsAr z4UnHLfps^dUJyc~Q^Z1P=p}5RgmLlD_^E#{@WbgMWY%%tlO`UYEb!r+{`Y(bCXROn zpU3l2dImbyHO+;8{r)`m#PM9^4OwGtpngG;$ya{*ER#u!AAj=4vpNgKCvA@@bPU0} zN`IS(+8qkJlA+eVCsm3}xz@S50>|ycHJVvEw3T9&_B%4Yw2c}vZW~F=H7aK)FG)uB zSCHQnH*0^r)t6!vcVXBVClOgio<48PytbbOwvHFhfA$O>K^aO%42Kj_Ni@T-?TRT! zm{55aQCPSSFcg>j_eZA;|9g#_n#18wW1h~xes2~!1(f041Pg7h^JX@qQW5M3QlXy1 z)(Qe(*Nqu|Z^|vv<@{cr(E%dck-5PgUi-8T87B?77S^z}uV2Mfq0%7!BI`Xcw2OEvNc3p-Z*0e4{k^!32YZ72^xYfC5GQ=qB$)SRovV*wV>z0XyJQ? zTslP{?{*XzlLeg*Ryf)zdH+T%{ClOWDt{GzZj>O|3Gb}#pG0bi0rG^BFRAU-(8aA7 zF+^3Wff1nkp@E_PaSZP0WN=6CL7n(uM@MUIW*z^H))B1zkbYP;e6Z`B_ZHvTj%wlBzbF4TROJln;x=5PB&vI(afq z-+E|FUCF;E7uVHs?s5p)ewM&Gz7CzI^cbIz@U!^v#fuj+Ys{e}Y0(*8O|968|GwpC zrq}@0sVLjIx3RyGGbUT8^{74a(@-u zV)Ar(;iCj%E{0W`)nt%`TBX!vhyp1j1IfeKFGWyTjNJ7mfRItREORAxOoSAu)^Od$ z-_5fS4WxKkFi)vj5<4TZ^dm~ggXi}*!HnGZZkR{ckI42LP&!R%8J9WlMIY^`pP0V? zBXy7o9g~UeIhoYl&N1QK-ZaOyl0L5y1U&4G+~+6aymOD;oFFLxp@|{F*FlaQXFE@j zPQzF%B4!ZG`loaX_L1^}27;k5s0a*#knv0tHQh=Z*xvQ-#({drTL*MiU(r?ZM~ksG zaXnJEOT$74O}QY3RzoiF^w2lljFxqQp|2N7?H-4Z2@U;#7TokK<}eqJmt!!|w*9G( z_Xgn^q{7@#uNqgTkQ6^BB5t8nrjqbL^UJAdZrhx;oM!tesPX(PCart)k;_^fDl}3E zBOgL7gqRYez!O2`EvT(*c_GCMT9{Qie8SnmubqvMvU4)kB5{U)N3ydZb~UtTpA_~Y z2IE`CekWvMK>!$}J}$ra$?8XP=OI#s>i+%bkhe+&bKr~X?e3p@{rzS=Rgu}h44WkVTNtnr#1zJuaMgo^5XTz{>0QL;L*|eq3vSQ z(!_MoM@@fT4O+{?;{tuKL6a2@Nsq0N0rM4;R}3JhS(PC)eM;= z53XG8Pq_3PAU*0!U%c`N5up7?5ao?rS5_N*$i2CLQf3MN?(lZl<3_62p!$-dq1&Fi zgVJ`Rr;Z_-bv;uGGYAdqbr*AO@g^RK)dq`C^5ywT-}M<;0JbK07;%{!q9lbFLSw#}|1@Mt064lfee z8_Wv%!ztqO(3Pw(6i%z<>ttX(Y1A}RR_im6+Zh+y$=fqCj!)i`AZA6(`7)@D)T}4j zI|MK2Ebh9OXI`Fkx^zvI5T;Qq+=y-Vr$KzE6yH$N)HL*?urb~KaPkyDoP4&Fd2eF& z9U%3fa`PYp3PlgZ-+|~=6{bDw$~R|NcPyQs9DDZc!#xXup{reHMa|7g&5D9M{Bj) zBf*?5wg3~XsFG5YWE__~D667U54uk4iYE$94_Z!Bq3C2n@I$ERS=F=?lc0uIoP#2> zr%aTKj*K)!+~~4v{*W$oA#s zV)^IL-X01>Q{YfDy!+rWq$NlPfk&~Q1Dhc!RB zO;|8NplFKMo%6A>gzkfj=g%v*UGz*px+X}Ol=A$2Q-RGG7h9+2Igvu=T4uE>NWRC( z_Zk4X`~~=;Q^ar-FooC1ohIlL((JA0l7GxVNcE)t!i5VNH4j1jz7>bV`#XpSx}A~s z6M!rWM&@aMmvXL6-K2f$ck~j;qh{5VxkT)8eRe`7{^mQv1B=QRTmNjrfLicc@B0DS zpC?3>+}0o{+_ra_=zl6fO-xt!>D(;m8gc};T~%YNyMpZ{ zX1Z*P^gc%KHD2#2=T;ykip()A=f2OEsh(`RJT5|tAg2D{`7}IY#5fmrFa#^ zi={Z*@nFDfXuC5a+Z#!>VygA;=?AEB>m=AHg11A@PL-S^!s0QcHg1*!8&vHQJKtxz-NkbW5+= z*1bF5Q|od(a!^O{fDMkdY+lOC-CQ=yO7@`JMvhyH8>?@jbaPT0$I9e1h;1*1JNa%> zIt$b%-mwIOo*aL`p9$rEFfn~E#i?pUAoZ_Exxb<28#s5>0d-8L@e9VhuU02teI;A} zO*sh9tssE5&nu*U@I{GaG;7AEFiKVpRIuY_Dp|P2S6|DG39yQb1US<%OsF$4jH~>_ z%U(;EL@M5>(Hky~?6pVRF&~0NTJ^%>Vqw5Sz>8{6+|xl>L8l?lioTK{I%n4;KXM85 z<$7K5yy6rKYW;kk<7YiuTzuWdU&}sCAIM61Xg((MVd&vuj@BkS{)?P~(HBUS{53dT z*j<3rU%(gAX~@fugaXoZr##EmOE@@+PRxYm2_H@oKGa%MVAL0QdQrD1k=auI_0%6A zgJA+4wk3%!c|)wts8C-!Jw4r*7y|={!QzDfe}fV*>--I}i#r);9dCu&%qBqY#Hw!uWXPCoRZ5?QwB&VjZeeNRI)$mH8stV zf7!;^&d)@aANR3o2`M11R|4Xh{8U|KWocPtFGU0-*cBsDrSVSyx*_>}6@*SQzM3?W zQFjq=r=H;G0js8EfV-Q!-^zG(WyWfr@xc9Zo0ShIwqTTh=G1k03TfdQ+qH4}0|H-@ z-?iuWd<$AyLfn_1(rJ%i5FIQ=i2w1NMxdB>5cg+o^ySq8&nF}fBo z>Gkp13=%yo{c`RBs~Ut>(gUn1=W@FMcPJ{z4I&d?=-n}Gj{HM&VxvH?xZd%1w5UfG zEi7~-ckX+)>ZNGAUn;$~Av1@6eH9?6P{FU%mcBtqttEOd%hCvK8sh1XNm zQTI@nnA5T7$}PU8GXI;D)uf~p>3xeWEfPt@=ZBs0)>$n@-nE23o_x_=UXXt`hnmLi_*K^>$*fsNe6 zE!S63^4&z4rJx=Q38%&5XF-To8>aYsFs?)f1!>Z0+<)3eeNSA-uB`1PW^JU{_#T8U zDJp}2+*DSNc>`T}mS&7xqxRs>XSG`A1}$#uxvbYRA4LGj=RR4yXNv8LX90zj(Y|px z;KH>#X@o7B%vQ@?K29CvTI9Oq&&Z(K)_$u`aTfZFj3&QA26RHk3(m8vo}0^Xjzgm5 zdJ>+TO@}sO{3_0gL@G=8WVvEDm`K_xGY3R5Du16PICmePn62Q>{8LelRCU}vzRl;KUU}{PR+a1$1hMeGqM$ctUuM=7Y?&eC0xYR>K}Dw$pyg&qa0< z?J$2FS}gWwLZ!l9KJC9Q#31yLW+s>*JRVO?96+u2ks;SYO0O#Cs=dT zMEB7l=%b4-knFpujhRxr;tNM?gab&hA`>UX!G@{`z2_r4LwLkTf9vswL`k0QM?TiIy~APST)IggeBY?`tI#!4PvG$5=fTLM_sm|3RmZs5xL>1QH#-DDk4Ql* zsMO_!@r4+~u>`%6UbAeUxx?>Vb>-$mp-bW3L9BS*1)3YO;l;5ZPp0)LCc6*-z-;K~ zc@o4=6L$0Lj9Rh6VbPlJ3&riQAD(yJy)ZTNoJ6n@JZ0(`s&%QQA6oNs!<0`Q?rkC? zFcQj{VI~6!rq*gEC~0%EGpf{MBJ+yVLcKFLu+WQj0rgjS=AJ?ls+X>Y)3SlW2Om?d1y!h zIFlT4JZ<&DC2(eu7a}bh(8v}9Q;j;doa7rp9>okfF+Db|W8S09#~1L;JOMmnImc8G zn}e>V-y~%0X<~i?2_sL@E(av-8Z8jD3aH=)Ay9;%ymv0_Cnblm#u_UO8%aP#2o+Y? zgXN%sHtc{7eUZ=I`V+c>%#Xgv~3}b4FpG5pwDPB;T!hq^V zW%xNMz4YSJpgc>Yx0=t-w2hVe7Fj-?LC*gF8Y>F?)<2ujS>14L^G9K~+|FIS(%WRA z)11M0^T9oN)ZPJNFN9q*T*ytbaUxSOH=dr(?^P3S&gwGdJku0`vS`bUdW~Z_e)gag z&KkE6!*&moLRM6p2}Wx9yN{Sz6-UdJCO2-H3fQ$(iwAPH4vK!vrWhRxY#2d(gMb%a zDaJS-*ZX-6ZBmn8@XojcpkiYTk<1#MJD1LsXgbfr0Z$=iMkZy-`(C{Cy!`TjPaEEw zEiAc^m!oC*T!Bc{!7nJ6Ps7Lsl?8k$KY#fRsF{@U+Y~}t(q5*kr5A7+2(D{OoC@P# z?Md-v1j|WWrOaFugeN1ptw=-Ji9;_6@>u^)#5?a>`=A*}CY)YKpIZDfQp9vL&Il)} zv;xE6Y;QkeEa5{Y-MPCB1P*Nif97<_WTgmlqx?Kro_a8EFBwCIYHym1VEDDB88#}8 zTSCT?l72--ed2mW6iGjU?lbBkbNHj>qAr3Tiv-N#PLWpFRTm0Hg)9F~V2s2#tzLa~`O& zw<6|!Q&a`@vWvwDjn%yDX5$nDM!k`5>z}aNHPXtjmRZerE~>ffBu=V%Yz{;8yqveA zoYs#+9C(4GAcFA(eL?!7@fqYILHHS6`_lWLnEAbv!eTga+Hthh2oXvGS~wf;uqs2b z9&HdW6I!N!%HB7N97|Dmys@b%Lrn9~5l-^*bMr6*J)_fGrkf&&P6N-2Lg?_IInMQ- z7PV)hqSCpywQq+`;&#zekHYf4g}$3akVtT5ANW)Ss03?e|NNJ+@`Nr8KflNjS3N40 zyrRWSnHr)*y!mDT?)yIKeCh#oL2tauzQAmZEBvXCGk5KiCURq&Sn(VQt-^<9K zIVhPv^xb^!=vc*dPp~m@<``^{a^5bYN=xFGz`u*o(rJ8Fzow#FPD}ev;{el_f<C#4Sb0zFiHUS2U&0zc_5O0vz@uN&EVi# zOaBTtQYI`nlDUQ5qTgX0B=!jUCfW9G;o>IHNNT+KN$okr-1+Xxv#^IkKLxQwbjuQ# zwLS^B_v#`cv{`7Y=d5a`5FYEpYdirZCP>;ie)j$hJIyk=N_P;k+d!af5K!A8ifH?% z+iMV8-TPkisa0xY!ASxl<=tOWv0O}0TcfM9*=7R?WfBi#x>jTxhHdkm3nL0NCn@YF z-HCen=4pKc-C6h1GYegoe3xO&)g~mk{B2?YNTs+5Sb*-r)*MXVXFm6+Q{IBFE|I1K zli*g14!P-&&JP^vVyxQ$9tjB(;=cVfRSyse9q_F3zibXV)qhUOJ7|3FjCDFFb1`%x zI>%g~8p)`fD1L3)>C3@&IJOG0&D<~0vE*)i?Oa1N{$)*t>ZHy;FmFJ8^FMxuS%d< zLAL0nOnC7-zxL&AC#!QbI^c&BK^Xeqz<}vp{2BTkH(-~o4^$fws(CMe5m8^uk6HklEb^0} zX9nxu!Zi8Ktr~Q${;4Ovyb9zL!AIU5-%e_e-^K${O@gyY85Fs|{Howb6UlmM{yhfq zdpd!ZJT5$nv68d+7T)gxPatp;*nwW|#wHg?wh1l8IOO{n;Rj#Hy*(@or>Pn4> zcOU#8WcF`Z@yVl}uQNcJu9ljA?#6w}(@)>a$7&UX`}=>?$3w3M4Ck9@DAOh@bgWoA?XKV}#gOPn|MM1!pRf5?*G$qP$yxb~v zSnyhkPw>yPlRmBeHBJW^VuoQPoxWmy(6Z~LpqnHR77tpbYG%Z0$>+46#%n@F|ZvQrh^NX zJyoU#bR>AGIj$CQ5x2|e(WZ5KxDKD!#wrzoNuY*$^jI$D$E^Li8X9qed7Kx|;LqbS z4%bQPwoo__ro54HKNF58{rM)fqfCa<#3nbhF5{fZUHJ*%yQ0}mKfD>AB@dp~NpUPM z02nZbUiUHM^kIOXvYh3YlJmIvoSrK0I~100UvAz1@g9dh-S<~Rg_p+A39t zv#_z|gN495+|B~SHb2vojf=Vb^?J+1nx=)3cZSil)K=AcR`UF zoJanaxT8$;d1^(KZ_{mT;G&p!zc(nUnDui7#t?kX#F1-z(R!PYj0g7?U zW)h=crNAQ?b#W*@P`6}UCh~Mr0YL>b^nglVZ=rqR)jA6vWaV-o>ipQ~C8AW}qLH`U zb+7Fq_4FeX)z&SdSh^T35p}0&Nq|^ObP+bqbeQ7W6a>;+LE4bHgLA!aRsV7f_Q^Wd zgE4;7#E`sHI*^lTNp(AhqsU!CLw-Q-G>%=49ow0qSkBsnEN0y-^ye}_{xZ&?2v;8q zO&z|Il&%&+{RwQt2HH%PmC*N%Mb0BAq8gSu1YL>!tIj7Wp3#N+5(45GLc$o)S$g?! zp1xTW6{9X|GHTmbqfp;prN}M&%K9*>6GUi}@96t@ET@E!AP_lZzOJ8Qo4!Zr`eZxt z)dfPYJ=I53BU5Y%X}^g%x+5dHJ{tw8d zj}0;y=mHItiJxO_Fgx#Vh=@6G?qC?2v;bQO1+i8eh&>pzw6qo{a8sl1SQBC?N~URR z-v~_&3TjcQYoyzL{l53<+$EY1n+0--0l>T}KZ=qRR={Uf0emFAAcO@w0NbL9(5Y78 zkc^lgICNR+FzfC&Gc9=GKaEwjQ7~hr*cBrp94Wf+rwiCRq5`pcdokv!jmE^e__-yT z|M!fGmu0<+pILXK1z^0ttn1A0tKb4`4wiM=3`7d9N)<>m< ziSgw%+}u$Y&#{UV;@shsuBHjo^Jsi3n6A$_Ucc0EB<)&HHW!kGVIZ8D?MFAc;ZB^J zDwZ@+_EEg|zwXKis+`q&8H{K*m&rYs6~c&wl?5fCIf#p!@4cvi2SdOE`(L_jGPhv= zrx8Oy*#OU@QE7S>{)1x4DYoodwepa70yl3B$yc?I0RzVR8^uIqG-WdPWr(t)Muoj+ zfa1(tCXsYwRx6WcHy_UuVnElX?hGN=BSKD&`{hD^OkM)^f!9YKds*)`1bTVO7nG^g z;U+2FBFY$5Z-0rEaGoiwMr7L%ke~1(OwCFc&!%5UIqk8_@*t|%+~hgY6S@tRLf6Hl zJl-?s=mRd(`8ajuV`V~ficv5=t0KAP1`oNGj0YJ3)q0h*$MHgUl2JGQV}9?R4y98b zysz?Y-@u)hoApCD*dt9qE9jA86y(@=jf^jgG$vC1;V=9X9w5Qa`d`hdCjza1u=xr^ z&eC`b)EhL6!EEn)WCI@C+@lc=j(+E@5_ZAuwVUW`eKNK?WY^=6d8-~jG!2D98Oh$z zJComhTl^BG!#q^q-lx|>^dc)mKD9U-KAVi4r7b_6{?v^%ZVn*HY`dRy>Ed?odAsT~Ay?T(M5SZ1 zGMvt+RK>2&QffYdu6EYl)9wf+J4lns%3aFz{+<0&Wf> z+tWLR3ikDumAW}iZhXOEfp_O{Q#M=J)Z5QH!iRuCY*suxj8^!LL`VN9q5i!1oQ^9Vy}YK&E4j3{eomWTIl}!g_M_bqXNSBMNe)S;JvQgN7}9+< zE_N2~#_ZeQ)WOnS6}UzjN~HHkM){1+Rk`p}N^%z_{bU zxx8iBJ)zSK?*j*-FKESv^9v9mM@wt9#Y-z|z44A1P&?W-)~&cjOu+@(b6!p45)hwpyuLbiHt0(m!CwFv``Ec8%N^j&{oF4 zDV0VN;@cnjw8}Xpve(jZlgDm(k0IrV%eTU4g?|1hi+U->6hncM2_ANkQ_~wOG8vN9 z8+u$MprTP_ClhYdX#AFHlK=rxR)7|*B6cCl6;AkA_vp&JVC)DvgXL<|ct%iuP$+Kf z&9ir{ZukZUk^63(>YQ(($cC1bTlao1_DRfjL?`Jo5>jeu$R8c9W&u98Q>S!B_Qte3 z!p>$X7s@@-^3tqcJV#{$)Oolh2E90FMY4mLxY*M9v{g5Z9{SWR#B&u~U6QI$tR^x( zH^b?x_8g!8#}%X=heSD?6Ioyt&PfX}@{S95u|c)f^07y&oMgPV06}OelkZsb} zGg@$;d~Uri#q-H|4ePha;10p{5?V^XRvGnSsBe@3VOb8VDNFQ+fqDX$O+;j(T!LWT zi3A}8*K|O@=os0@enr-OJ>HBc~_*6f7AG7(NJ{q`>V^<4DVJo#Do(^VH2;X)hh2!XI!+Z^V;UJ z!D=jedr0qSTg9PzYs{WVZQ;C1l`>wIVW7g|p~c-v`Fdk~eDu>tl#^^EVZ6Oo9i{;< zTkldf4g%&oz9ZUCD#@u5!(Hto$lBJGq^dw7$ed zJQ^s+>9-p5YQ2)P2!QE`=KHIT2S4(gNi#R&neI_lT_V5r&TZf@XkBGY)GqrjE?N1qC0%#I+DCuscxDR%B2`J_A+UP$ix{*b{(``i~-!eENYh!I?AV<5s3wX47vz?F)a zI9jzGfBUADI?`^flS+qDb`HCfo|TtbcldO7u@nnu7R?$Ljvh4c=CwYZniHxw z{)v^FDhgljrKla#oeQ^RXk-SyuW8~ElD{#p_(@|6IM}|cWN#{A2!ED=q4DdEBJ4N6 zl~@uC$E*jeKopdd8?(W}q)Ylb$8+iN9hwgQU-NZ%_;zd0&n{eEG zrjcrebL9f139ISZoETTdBJY>C@y4TYn~F@^FxRNzige>rRn2+24J-=R8=v2p`QBCG zwDa+Y#|qo<*v=QhVhLYQH$`%vK(~S4qIO?#uyX~xf*joy>rZLNHa-t}&|;YQqK zYz;kH)?576eAfJljJ0_3*t~x8jCY%Q%x?17<>6?V)T0g_$e$VD0JSo|tqC3Fe_zc^ zQUCw<%DD{y_}R4rx~}n4hKZTfe!e%wHl*ux!XJ&S%dtISbz>){nAeL`pPLs^)p92| zpP#m%nK>{f%>qCC{Qr-9S<_dm!5Sp|d zU+s3QQS=T+X*Lo1!HO1kE*0-v)v@X`$t#M`q5JdATgFZNbug>3T z+&nOMIf%h=YTfZ_9>~nCS;QQ}H5r5lPMt4#K3Obuw8crd@&kdY1U$Cz!kDViUfvLP z0{n06lPB3~#n!^l3Y(v+Be#27iyqxPrn%1+o_QKDna19ihZC@q!Y-F)mLtE#*T5HVNk zMJU57o`>`40=5hf&`3&83Z+?0rt}cne@)$Y0DHbpHQe3^6+5NnTgsG(+H6Nj-5zJR zTYHs_rGlkyhc_osZ(MviH+z(LUhT)`okY6LL%jIgbzybquudteJ7q8R7$>N{*|J$} z(Fxd0q_aAM&lMUJ2}5Ibm#v+2+^aUa)rF)$8z}l)CuagMb=782>tgB8&oTWm7o&0G zXn2A??ztRv*sw7a!>=iX7qmX?$g|xTYWC#^liF7w;|jq*t|pu)is42)kvo$%qn08X zq_XDUFOZ+PIF%XhP9^j`&Stok1pcpRziQ}7K1?8J(`$Qqq2e2vIR2C9ku407#%qpo zyChIx#D1)ePa_Jez0#9N;+ofHeaCjx()m;eWgyQ~`vxAR@NU5en`{h&Y450V9 zMqNNP{l>lyKH44k>XUk7=Np1{N1AhtrJL*3tj%eb4I=)QWL@zuDuu(iU?(wxbz8=f zs_L@{{3xu30&dB8=#8cZ!LxN2*Zk84Z_&)L>)gH)OQ?X8M3Z=3Sd>Qu3&-nIh$+eg zZBciA6<&4;YLmHldGG8lX-n&wu~n&e@*g|eLT}w06hZJM!7TT)jW;WxM*d)l^T*q> zRYdnEJy31hCSQrLb5rhBtyUgCiQdT%`DjBOv+mPP(*4mJck3?F62{RKz29=%M5TuI5g17j^WOZe)UR}&ll3P0d zhOf(Zb+mPBp&zrgHZo$p@U5n^1@3(J>%G~?or%XW?AhwMq+^68BQ@1skyp;#HOCjY z-oQg%wsB@&EjHB3>Z)6p5bYPQE*`fT>tE~7IL2noDp%OyL~Z`ofqVT&L)C@^EI?ON z)7=M{Z0}G^7DrG`vRyOzKWx2qRFrMoJ*=WIG(*VH-4X*xcXxWzl2Xz=bPYXpNh*!E z2q+Ch4Jj!hT|)~Z-3Z_1^Stlx_pNVz|FB#vTrB1~ujAOqKK9B$tdhwP|!QPWRi%kYw{YVq`mbc7{<8zH5Lv8HhOViDmEnCOUOBSk?) zWLbXsQaN>#%G0|t4Yr|vz(a9}m3y4mZ4Mh{%e4jhu&(&#NE(1r_An z$AiIw6f%SL)oQlU$QJlt^T7mW>1anFQ&9MZW4ta+5x5&Xk>Rfi4FPVP{q}p#Ci%SI z3#AA=8T#1{eR5&cvLpU0imoP4&I*O5rtpD|OH~_$NKeIH< zZ1WmjOxg-1XM80CKOY2~o2@C1iVhw^POwQvEFURMwJRriE(ctA0QZ8p2mWqoYC}g; z1;tv);R}Gm0_7q1`xXy-?M@V=VNKlTO^}V_BSr+2@zF|)N8~dj-=Uv7SFPs%d6246 zvzd#{<``Ij3teO)rlf%1HlB|8q?bhd$`cI;dM>>r1PlWpoK1S)lwpaeMhUtFyo`$J z2MK-5neVRjI9L%}s89GAUsNz)U-&`M?%lAO!r+VfkQ4&1PCcHfXiG}W4b+R_(ebo?;6X$7``b(*5+8yC*6m8$u5p0B+*^HlZmADI z_5Tm9+qEckmskF^-?(i1F3)p3`2QEznW4DMKfwN=8^CO}cIpsrs9mGe>{voEs2U;# zPCN%aoRpuOeH6`MRw7$8qUXV1BKh6x24cVP5YQN+;So#v_^rb&SGPo#~!Sp(z zd4g@O8_NvYc56A%nk^e|&2g*F6u-So2$rM{&?UoA2ulYN?Pkw`x5hz4lLN`qVIn^J zHJE(%^T0v~A9xGAPQOJEqC?vUUln5^g>zR zUu@I|bl`yh3<5y8R}n&!b*B>rfGT56bgf<*GJ4aZxKLy(4A-D)*g0DCq(b`7H{8!~ zP9=AFhaRuzHtf;rPX&==P$TR7H%Y?CXHXr$(KbuPk-+`dwj>R1*efn~vF_5dBJ#jv4Y_>FLW6B*MF?vR{{XMoABK0<`N~(yGW~6cZV==L zm}k)94Ej5C&8q35t9xb6`@S#xhg3K0civ6P{v_K})=>Q^PwTHC;6qdV`=ONq_58Vu zc+1Nf=Pj&+caI>3n0iWj26}dSA$o@Tg?iPDbFR(k=#{qj;mjG`Bf;_R2_JwwrLKSD zNZ10pv-rV}(}>#OTx^@%6ATcMNTcEUM;Y9jnqQ)3MRo}p`t@cyf!8Di2y*8{FM(}7 z4#27}XkqtA+yxQ5`y%s=7Gl?T04c&AaPzaG{kvvx_m`Z6EPKS|3m=ACrrr~Rk9DzW zG9qs>7pe-2CP^IlH)vpwpq^dSI3iMg#v z9R3KO;3a)E#fr#)(GjbWZMXOwQ(@y~N{M`A_!;Vmf zM%&?C#FoasgKsHbDZS|Xj*RIxwzC6ZkBSfb%FCY-{J z7LK6~0fv9Lb$v?5rl>^1AO@uFIDiI_Hj!f!t@F0-GKMaLI^lW!i=ZM}LVXgoV;4nPJN_qlf7afQ2;ot0w4!sBQ!lGUSBp zQm{>9Mq0BgX2^^5L8#cO%2i{_VSM~p_6|X1`0{h|NPO@eksjC>d_0GRQQ|Ldt<%4V zVIzQP4wpv~f}_*7o1J+muL-gVW&|^1N2ybJ0Lu7_ zLIFr9^oflb8-qy{RW?!`8?x|adYk=*=9dB z+mbF!y^)BpCo`9agSUe%JioE5=nDzX*i75;7zQ@QmOu``D$u1=*y<)tuU;W;1jjnR zf7l(_%fWFVJfms+o`nC!`G z(&FJLM*8Y8FIRPQ@x$tn_YjQhl;c6O3V$E}lek6~{yxR6=IvuQQ{>B!dGBU3#KLqA z6sYMvpPW-1X!M#C<0wQ?@*-WEqdpn?Y6>6!I?Z)C67|7546{$d;+l;gM1&39)xdF; zDp^k=B;^;N5_Ef33i_Db$G?t6ln`Yu3J@zyIjc+bpIYwmf^c=vI%u^%9P+fUuE9Qe zO-sBWATxlpsEq+ADt*k~3RUO4$Lcs<6*yKr301UP*vl zuRr|}L@Y-WZLr;}@S;~HE+)p`-BRX_s}^1xgcF-0K^9y@rh5#iE-qK9wsYom4s)!- z(84@_xV@4C6Df8U`vMbJR{wBMrEUj6nQcnT4kE4c`DtyqP)tvl4qk zvG!kkiPd3l%W72fUcSf$*ysQE#HdPJw!w(XsY25Y{hs0Bg1MtP5 z<`iyCTWhwte}k{)aZ!g{mtqxDm6$aM9YuksSfD_W%%}47DdyOB+TPQ&fbalNQC0wZ zmHd^{6Yg{Ta{`~FAChPV?0zR6;@8 zp@|AEYq=CvJ(4;Ddb80SCeo&ytobqB(q|R5qx2C|$pX-MGB!hb9C{w``{l?(k3X*zRY zIk!E1#INwosQ(NWtum|h$k3O-+!MiWizZ8cMs@D@msEXlL_Ws=Ch<7D{=XKKnJ03BYI!sdrKoul!H=!vgmB(|));_XuxI z7E5WZ4c_)k{{8y(Lr44T0fK}5Jxvm)kES1`zi^;c2aqRtq^P#6{i4_1OAUB;07R|` zqH$emm_D37eI8MGTf!~#$3B(p#lLbz#oRk<#=5I|4<}~L=EUccmui8aZc3|MTjt+b z=p1`h8m(Pxq`kk?^uxsqNBiR!{=O0d3&tD#F5ng1Lr!j>1wvK7+ViUQ>K2vuzeT7SG&K0XiZr-#S61dvMJOQ)g0V5e9-E%3| zS|>Agn3m(VmiPmZ zFH67*eGC6zYLYJ**HSr`>AqNAVSp@jlqvyC4TZ0_s0QL{r`sfnmX8gpR+XB4lML|=hpV(=)|<)4u_Ze-|d9Y>DyJu!V115ja&)gdeaSVa%m3CT^| zE(Cj~xT(qR>Qs5YYH9k9eFS}kPV0#=b5Ez9TPD5!jct_~SWM@D@xutX>|BEGzS>Z{ zb{Ot_pF61878$~96@QG}x^bkNI8=X2^0$G&55>j`bMSBSe)agVPgDcML@ph%IXS3i zdKQoUt5_glUAlC^xcO~&S*5^?G$7idN1mkSSS2a3;%iX$v7)}xFdZJtsoYMe!nid7 zAOg%j0i4D#{9z>oj`s&BwGmq`xX8Qsl!J-|i4!RXZw1ydJ|s9b2eY(m+rhU5i+-!Y z?O4&7(tRrRhkRMYa)hZVV;K~Wt`mxgdsvh-@X}zfC$+i;&H z)Y($LWHv2647 zfvjuabEg(voBn{qAHYsvfpDpKrNXsOfHaxnsm9*xz5CCN3qypK)~>HGg&6eGl56FO z3duc)+eiv4rZ4O99Y|8DG>LE;@3h6AglB9phVh3vqu|&b2eQ++d7aeV&pmQ3aIUjB z8Y>qar})Tu8@UxsFq0X^CJBm1<>^vx^MDM7PP*UW-9~2O>;Gbp)K9!nfL?#w(-4u)Toov|$Y3KO`Y zx)odQgcSm&1h*wnP_j%vG6(UMJ=Ru zh_AsoHOn_{Yvp&2@w^rYXg3&yEu2|>zrfR59gXDX1n6`YNXNSS}{4O%`c)Dlb9B@;L{|F56V%~UETeB#b%Faw z?x_*{A1X-y2R;QOq`jl?#0vUmKHACpJ>cGKqjEz|NV9b)@8Rt!?`dM!{CIqr?-t!* zEEVaxJ7D~t$5gtG0mb!aO7F{X+~0+2vNd*%IN|DWsBn++f#C(p>0Pa&j@y^Q*36FU zzbiLjv;K22ZtiY)UW)lIdNM@6)L<2)M@<^_@+pbrD%1M+CQ~yb)$5CN)KFZsju5gI zLL3EkP$m3JkvvryG|Dik+spP1D}#r_;Nr_KV;_hDf4A)1#p;@BGgjfFuhS^)5n5X} zu|H^kD;LFEg*_4EM4klZxioq(DIty$n?+FjNo{^v6Ghgq9AM_ejRpK#m2@s1gRQ=sj zl@TfCRg~-g)(S)QS4^zB>5~mcC0!I)&yH~ta4AoRvcV2=Ed!=?mHi`GXqh*;QCN(* z>#1^!PkLC3VxXRB<9rJ1nR2n#PYF&KnVhE}Wm0$SQ#`b<7kS3+Mk`*MVILu+ZeWd< z`5aMasQT=s?~=+bxrzoZ-spzF2mB?@&*Cjxz%9EkKQ#^q>AR3?eX92vuHRjoahUx< z=nIV0C*c<~<`-54Z{u_}&+2f(ePt)RjP5J$lEm!=b?pYYlgpcLsC%S!ag4O*spMJ; z>Misc&<)p>Y)D`i6p2Xzp^ntK+Z5mPFLp)(8)d1ln2{KTK_{wo)>nVG9}Itvt!=-P z39&DfD^q*GFTHIk!`d$vh6(57UMbf0-I5k%`r;h?s>PSjixv|0B_?s^&hg`>{R}D# z+K+9(1C_kCaiLsT$J4Pfc?vg@Fb3N=E^f>_t$vN7OKRLPQWnPnQ+Q;3B!fKldzk(h z(q*JM$_aot$U^-Y1qB?EH4LH#%zf?$2DmrjR-NfLQQfI%sZEBCM3Ke(%*8wWxVoQT z@Q16Uhy3K}vQ{_u9om`I(sf|%>u<4ZTG*qHZkF$^f7kGK{JIZ%i2U7`ZQ--}a3p__ zdqSUgM9I3x&!|<)?Za~Z`|nyDOOk{lvK6_cFjow!fdo4uQ&vJBg5UF5r?``_9*3!r zmNKJUQ;r;%(e3iZ3<|~IBy_3*gr8VV7#o8&4trA-;mb6U!lh83|Q^*s75732v%D8n~gv$=^A$c1hC*=>_lOCt?v1 zN3C{DxHW+X;yn6BRck<4aCXzt5b~dv-YCAD@0RFT=vOEqRtZLq0Nih+F8FuNl$sK( zK?w`Rq&-NN=?vW6~+lm36F8poOQzW81l6+ zrfwF~+}GEjRe@-o8GSxag5Tj|kA`dA#f+R&8SlS7N=uR{7W^Hgo^ja;K`%QN#Vg8w8^g%0ZG zTsFopdhEu=gUx@wA=MK)p|QCtPEQ)_0=Y?yIEM{QjY}QQEP7K}2muI$8>WmD<{Du4 z`w0;sesB9%xv&o|E$&zTBrrKGkU??{s>!&X{^G3dKNK+Jt2C#d?T$B8$sUza=S#OF zTo`p%&hvND&HJ%t^6icj+353U)_4_v!7|cF{xy_1S_%;#TLFQw1J9uA(^GWDUE%yo|~eIK-UvHb915QrrXaflaff&>VNE;&TnXM%}Ew+87V$79jBBmG3g4e`&=%r-p}A>*SDNT`N|5RGx_tpOiZP7 z9cHKK>@CS=Nt-m^tuGY9O?Or*Z%h8Dx48k0MAZgV6D6~n?)!8V;^KQV5kyxn>JVtT z$XL(kpQ8e)FMx8|2o&LKydp#~PIU|jg||oW#n*px#A2A?9}NG9j|z9pddJya>`%Ww zxl{7G&$6*rvJcser&32WEX3wouh}n!Hv0-2ajpN^Uv1Z?A#H(jQj8ew|K$nuOy_kp zoT{4fKQ$j6!fMO!;#-GX=sl^5o4Ie`dMeC3Rz4uKsR}U2iP$xr z77Y#JFQ+j3R5|Oda%t)q0(8>2r_Si>?3K>x>m17`B2|hsr+($wO=*;1vCO}eU)-KM zj+w!(3B85+mN?3lX_)+NcPKr=Nmmx@XZi$CceunrAZN^u5h*&e|0z?g3PguUQ3WA0 z5F7hA6>QDi$g+fqEstB$qM}hfuEIkcfzO0=s7&GOPXSHH7HL{JNN}fE=^NBt|#10H~p+}x-FsC!xnpD(J+#f%4Y0a zl%#dosInP|1Yd>3%8`$MD8+BIt{Gh}-HI=~I^&m=YYN+1MvGF?gooQdOT_sdiNntP zK3c8A9Z$iDQjgRV!l~I?{NxdwZfsmdeVZA(`!wz-$QuXcL+TFEm``@giXU2=L;CG2 ztQ8RhmF9Kjks6C#q9P@HBAVh-sm~=*mQ!_zu5o7(Ke^AX!(&F*K!GYnGYY%rbK8Zl z%>WINX_hCUL1BMp3G>i*vn_{uyavzdA#E;s|Na=7fOq>dB~4CK9d(P|r`&fZW?O8w z6`Ai1T>CQ%GV%<3Su^uup$2v)E&d^Pw=RBj-#*X0 zRbvIxGSgLUS++_uGc(c+3`{5}xY@v%4xp5y>Ex_b11%GZZ<3S79I@8k zgMi<6x6@~Z?lQ0^G1PnCtu=Gp0>CaYwbUg%B^Y2i;{@4g{yZ(m=_?}@NqPTl)@i(9F)`puXh1_wu?XOp$?9j^ifCw#$fvt|>Xd_)}BJ)cczZBC3AMx2H zZ90o+OGJXeT>5YL@8dWq>2;^Gkvr=QuAl2e)d#L`KiXhyCmKM6S1nY zrJS@Q6BBym@45&0$gP7J+Ghpnw(P~L;nlQq%;tCc+3$^}GPPHP=_gZ*C`v7d!-O}x zk-7v*MN6gmW$zVk`7PMp>ac6hmWzWYi4ZvoDsn2t((icdDRzg~e#Q(~k?xbL!#-=N zDZ;p)F4RC)pffg06`Jyk(L#cl)*X&={!;`f1JtF{=`4xf#CA$LQu;@9x^E0p>8?noN{d4tm0=mv~O~WE;Q;Kqc297^Xw=k zL3ws*uSo(|vrOF%7kqwi8<=`=`}OD8`!BL?g{0BG5gMxuo^sBg6W$Ok`re;Yiq$tS zIyyt~BNC^E06X*?$6JRFiGLeQ^7tR$)j$$Eh|AuG3rO~T8#3J;&s zTl(rS)F8zJjhMbSDMl;E0m!WM_)Yq5`k)a+&QT?msLDYnwQA~3;wP=;8DFRR{oe+r zpW61PgI#9X(nTGOzy|PM6*hkMxM(V-ExzArM9Uyt`#7){GolD$s!H|d{t#ez_OnFk zC_;7Yj727s>H)c5V7N~LcjHmq19neDZJ0+grsWyNkg!LM1TW_v5P;hpNg&nh(mX%2 zQaQ1Na!UL1ZUXv>v-5h??fD)v`J|32?g0Jxalx@T4ov{3)R;B{Wz=Eb<0n-W{Cp11 z$HBxnxf6=IbIGXpF7X^lM(U|=^yAj0QS!An;d zP{>SgR@r~*L21y?gtKjBW+=D7wcP>j|5BK^a*7MB4qy3s?r1JUi#}E(-~7%WD=_OE zdFeBH=dv2|Ie)7xGsnOnjXg#sLzQ_bvo>FYlZ%6`=W7~&ny6n@{-*Fs2=kD{N!C_P zZ9d+x%Egy4kNT=%VGrs(@CnhZze;&{>piRKHNYb}{gmlRxkFge()_a4(LUn2PAcgi zklUje(jC%2Q3Cq(Ntl+VS%G{>NE}Pnkq_n$KMuG3$J48h>u8Qe#f``#vA3NV%Q)Qp z1%(}dkoHdxI8Ln1a`YV5VeoYfs?C4m0Ie@~t{QAK=C)jFv-RTjYSSs8!m*YyCFo%| z_pd*E7Y#^jZ%onopCLG+!k9`6B7q@=*NAX3^SVR+fp%vc0Z)%?iQ6Y|kDl(qM5`(BTI=eoAZgS6f ze}0-AE3t4|=lWxQlWrce``o^ zfWJ7aP@&kZ9f4$Q#+y~dTRFQ8=h%WN=YpaFfw%j;V>!U9_ExSOD^-Mbma=gJM^nL_ z{=H802UU7^fw_;b9BYr%Y#6Acx+@?c~AE2_Rck-14a%@BwkKC|{i=0&@V8lf@NAdD&U$V6@>5W+#dQ$vf z=;y4dMAvDkl$TD(*=?~b4np|_@yYoKc?$`{lEagp#wEj(qX}b^ze(}nU~|b-Sz_a6 z^UeEap`Nh4@V+`SH2PfThMavd&wm;?I?`g&`&^+Vjm$D@Z!Fb#w#5cZm|oxLDEq;f zw!OT$_s?liF`dRj@2OoV4oy;@cn|LHd5xWe!oWi>Dpy@f+8WjGFuUn7J$3Gelt(5i z@+{ockvrNe$@`Bs9Odq}+8CJSskgyS{V>Ych^N*R&&>4XlWa~`*q0I*ke;-G1_&WF zVmic;7#MH;L%O_ZpHSit|8d3LpFE~%JmBBj$;!E$YB%+pZ3rZs71qBmpxQtth#yM(Mb4~|y1rDN`SpI(;VsY?3w zBkFmmuc@-wNIIqOl9x+$zpF?5>VY@z_KpSJ;Qc|Oc*H8hPH$eze+1;Whp4yJZw!b& z1$3a-(GWBQ>lW<#j=yKg2r%WGky2&IJ*^*TA7c*#8BcyL6cyVyn}V{_#}XRGn)ix>Ir&e=51%GM+>h2 zUho-mw6a8{m3zP`#EZhrkD?IuaL`C+$R_qPd^#IN^{AEwIjlbv>O zNL<2qzdFP>%7^*l1QR2w&LnFu6-a_XnCCWaai( zysGp9Bte(|qVjM9If8wi^C8VYlc%sMRy9NOq*#I-Z>EEk&jdz~v{&DC2=2GpcRXkc zt)hNZ&8eZBq9SIgFzmiD!GlLJ{gXoI8-JGGQ(Kak-c_v1_`5RyP@VskbA~++Xn6cj zJ`Fep3VUBl@&|W3^U||P=nBbs)8qU?GS(AXZyy~%^bNq@jwaWRfBWS2WF)&iSa9R) zLnW(%u{x^yXMwT?Y&2uIzJWbmUXopl1ji+Bj_hYm(_qF%d0%hYcO^%TdYf%EkK2KQ z0loZ00UoQ3#09()RSBm;-f`0ILJ0+bMP_VF_DoMvy$qcmh~|rGA?&hwJxJ&x@sku5 zYai3ESIb#PX|->@bZ{Hvxu^9#V>7ODCoYKLGdA-`>!WPy1@Ws}nRyaFNn_a-s#6$D z;e+l(vY;MC{XA$|tycFYeqjZ(-lSxhH4i@j$Kr9SgG1`k6RT4Z-ibaT>5r5D_Xyz#wPdhA|I%Y=Mv3iz`k@g8}pDJAX_$@`dB;a z@@HM0$H&yQuHCdladBkzVCfmT`R?+nN8|w2R9U?t?68^B$$?meAyI;7W+&QA=;cj6&(P2taT`I(sWHY;oz6!XGm8DTO ze8sUtJY%)0Qbj}+)f4?@m!(Q5R0M?fL8D3{xfr-D^A=OwcXkBlhIkwo)&{XHQ=;e)Ne31s zLePl2m(NOwor-+qxV3>7*u`XFLS7LSv!8-X(rO9#3Kn&$1W!KOvnx-5YE*YHvg!&C zs(!huh&uVuSp~s(ijP0$ZTOVUxPGR^Sn@mVyxe+%4(3RxJp5^v8dF)ZG4x(AZjAk# z(#GUP38Ex&$aBAT0c0jtR}b9{nsOY7j6Bj*SfYPF6;eiB(XK)DgqKJ{As6U^Wx&dM zkIekzhC`^8NQpT%NrYnkANgg)YdK?5*I`R~x$%jaB)NHcN#%867Xw5%`uE z@D5N!h`u*#&)9yt{Opo);QvwxSi>Z%k(*k7VXU%9d5b3$!Je_)auY5?6TZx65^B^!GebjUKP4bScq`aD!;wpv zw~;yccE=p{(T%Bho>$ebhq#-!To`ggak*=g+`{%gfIl;lL~`&~{+x&=Nqn6b zP?8wpH3`+e`sg*iqlJH-5@cAI)s>9x1eMM4$>+&dD+H1iWy;D=2gmM;c=Hp**(=TK z5*)w*vaeVq#i}?i9?COe9H%bi@Ab~vddH#IhCJKZdec#884wBui0V@vg#|%;0ZLn7 z67IwWo6l#BsZ^g*I*kP_a%K!?LUdE)bA%e0Z7?IVW93U_HC8kte?i^MRb z{HJ?e{&~^SLW-x*$KNE0%E9P{&fA+j3*}Fy*Q%xo1fjUa%4l%cg1CS~OfNsC5%diapW(+Wz>_urp+G#2+XD3=!t6E$z3$%mans0& zrF$#=9GL{$cf1@rwFZ*ujCLE$yJGgkMOQAI{gHc;6tFZ&4ZVhy(e(MnfobJMl$;ph&{g^^(j-v51$!X7 zKvjW!;NXJXHiM%0;DTm-sKf0MeiVkQsL&R83=;b!KkvTkJlsDdOKv6jFj)AF_Rwf1 zu)pG~56*e-`K9mq=@w^P2=$i7JGRL)jBv4t#~XN0K>LJTO+Bp`l~}+go7D&9R9@9_ zx3_;z;N||^O1Ul5)NgF_noQu&fCbiW6nx9)U6n*=DB!0VK1$88d9fFryf7Xt80af` z%gXU~v7WTS%>?M!R={hO1{?R_c)P6AxuxC%4U3W&Nwm&&>5po)7!Sk-BQRQoqfzxUuZ?M#c|v3 zQ<*+SjykFQzXu#3!f|vJVHGe-9ai1v;3&9Q!NX@{yt;P=#AUeB>IA=ESxf^|3@RA` zM}gCh^kIr57jgW_Z@fIxMY5w@*6;4ND6ce8DvneB^kb7b0*A>um1WCeJLJ(CPsvfQ z636NTuUL;_I#A z@>P;O7EfO+h%x5@_BA0FUZmwQHHzyw8>U2!gSvk8%Zz8J{mPLhR&)d92X1w9mhlOm za)fPIv22GL6LQ^3>At+f_CtBUvS0X)ez&vq)!vioHJB0LivtIN#BsjhRoc)N z?FA^_b0!2kIuSHHav5l{9niSo8`!kI=d#==ez5oZ&uRP0aMWQoKUEFS@3;%HqGw=_ zT^Z}Hz7!`6huof8Gl0kVEQKcfl712`+8mpdkpLu_ErrfzP^p?{!@reSDEuMKvN7Nl z_<+G%=kRCrAp`f9va#(xSYMRZ7LU;xwA%3_3T~T_^=x{ps!qXQ2_lTx^>7bDhKSN# zf}=J_c#>RBz~WQ7_eTB+;rllNzJ&%G>M{0SIH`%hU z>LsrVD=R4GhGg(D&B3X$-A2G4sPL&l+=1|Wv<$Ks-^vARf%q=DSuGHQiT{8BllnVP zQ6j3TdyQ9O2?)@DtyWvhaJ$f88h*boD)>(7@&s78U>|7Tq-|Q{zaD(B{3DZP@6@SZ zO_KIrg%~YpXW$y+gY~9PQLn#F`#+?}e>F;>Wae?6YJoGDuWU|xF^BHD@TMC*Kc)fu zUcD(Dbfy-4qAq0-q>sq-r-XIsTTmH2GFs_C#7zUq%4eh`Uaa$X_-{!Hhd#1lEQkt1 zl*Vp$-Yy>oMdCAAlZyR-zbwKaKA4;OBI~f8uZ!{2T|(?P;4hSkO*9J zn|h9`j==}Kr0(U?m7z$ptEjV;Bpa0DZ~@=u&Q1YJm%)3U)qN0R3F%_T6Fr&Obj+R zsbtsTxqV%vi8#*mkGm?*q56j6wBp+^<*E3trU^(#_lV6ua($*nw+*rEep%TzvhQT} zh};5wa#o(V8Ytt9+(r3~b^Q>jT8*+=yy2xz{u;Yc0C=s=t4UIA2LM;ufpo_$d;Wlg zD%giZUxXm8!)Q)exa4V zz^M7xs0rARxlo*|m_pC?w4eU7_iB5iAb0t74HH4gz4wr-a55onU{*PC-D{_lbOe zJazT6SyKqTe?hFEhY-dMtp4!xJ@yw*@hWlpJnqRo9L$$+#%gxL|9W_Kuqe@^@oVv; zVK?a){JJ!gV&R!we^r*VE3m18;QPMc83JP-;}1S22b1d)z?7*QB>t;bzTiWc$48^z z>t)-=Y8ArHRhH~bW_bI2lZyE3QG%}~)Ak|Wb`ByQ_kh7|wKLlRD@Kcd02_Y~f(6M$ zg?PU}*-W)430l%H$(5kE7Wv7E_v2>APN6VY#QhtZ{;^#jCekUDH4 z_Xs<-&E(hi^*!V5{QthxdgS^a?UY`WA)X0j``DR9`Z+EzkzsE^m{tI-6bI=VW5SDd zeDS~VH6S`z6PB0&v84Ge2cMN2o-b|H7No@|Gsj21gqkt>u04x?{JxvXyL2!#HoojR zacq!EU<5KDOrvcRAQF6e+t1Ph0pCn8L|ZHb)QE?p(9GAJ<(4q<=dWi{>7Q)_h5BXh zbWO7w^Y+)5w324yg-(IvCMdd7JQa{2#0A@Dq-n$r(R9}rHoPW?FaszajJMX@19YOE zrG$MW*TCh2Z``Q+PouB>pqrlUAPOWF@}9`yYK*ApxN(d2_-olge{hVob{PPTTj7Ry z4e*sds`QWgo>q$jM;x=3`uoyRPdIHcG5Cv9gXGd~UdKv;geKfk(+OtM7y_t+^bgb} zHT^GjLiszZf)^d%W6f&ijJ7KrMm`)Xfc9-egkQm4vM0wvh|bgSNx-SQ7~3bcEY5Z| ze^n$ZOvtYYUl0a4Bq-_w#mo*YWwHWQF7qdjDBe<`eY7bu2H!{Uy!OX8IA*z&o#+8Y z2Zl3K?7w(PT~?L=!dvkT$9fUT;J{0SliHC#$~OKovuBMRyC#$Hn*S}=#-cwUdkhXq zK80;9ez@z;B5$Pn>BXOt>f>?h&367XZBOCD@=(*j2Xu4zU~xTQ9{wxtgdts9f0l>1 zV+FgSE)4%82o^72I(Y6h|IIMK@O7a4|D01yXr@zcYW20=TQB4!+SZ(2tOqh=6uqWV z^8UNeM3MsoerA2CC?-C)lD{Xuw+!D4&jW9E4SHKawK;V8M8AsejFmgv5A)n;c^LNc{yYzybCA$)(J^MhM#w zW(2FP5T-!~n!|5-t8vlMuxaV7%d)}Z;VuvIi19&_KZd*C9O1IQV zdFB5+pRIuc=kqKSTYqe+RtOY#*>A}*G~%u`Cupf-Z;6HJ4rg1t_XTdF%D>D$kb)r9 zL`i0b@5*1iRjIU!bb3RTkV2H*%zMvo$?qZWc-XrCVLpoi1ubk>`II9bK% z7Wjj$&f|z3z|(QK<6!UJ3uJKxdt+n1B%a9*$P9XO&VkndsQ+z|8B>L!%G%3P=dlml zx!@{yF=qt#0j1m<0SK{dwgYLbCg@Yy+Lr|#VBC_L8Rs{phzsX^!SrW~H}vE3$!rMy zYTUuVWv|+WWU_>VDHX;O! zq6Y#b&d0U3Ym{=(Vs6`KhiUtaf~4k+5F~ca;Q7>84^d(`!Y|niA~c!vQTpgZyt0^*yJAr7fi;1yNEZ&kW9>77Rug9qKm@G?{d%t{;p)tc z6q1@w-?S+f|&xqSP;QvR%qDc6_fz>>u9 z+o=H-V-^29?Axa<&d7f#UTGD?>o}eMyZ$l0GXBHx8dJwJMn#?Mf8;||QBGSu-p}Xy z*|mEVBvm>H7oAv<{O&RL)K9Pv`rM`WI2S@Z4v?i%H0O_o)13p%u#!l8lagq}Z)B5- zfmO!eD*EZ~5)Tf*5ugcD?n830C0BP*)moGVHFEjxb^shb@2>dA{++x{cwWYT$%2dUZ>F&xP zovi&H&_5(QrkZ2woZP6#ZQu8VQXLpzEK;%lef-z2(1Y(nPLf5$BsiY*7za{9VHsT+ z$HuvZVcrM=k6<}rzQxRiIwT#r#fm~2iL{{nHby;*)Tv|`-au9cQg=bZ>6#{BY8~AB z0Qm|wJ_0c=v0=I*?0~fFGa!4vk(!)IL`e*Z0HFuFMBxwx8#x^&)LKg@faPG_&x=TI7hTN`l(Mo-{@_eIx%jj`Dc)qcO$2Imyja?BL zeHC()YeR&w(?umY+n@|ZlO4yo;fTD~R1Q$ef)|4 zd3rw*0qPeBq;Uk*$ZZ0Q!~cAH3%9@=PXjSMAj72qb-+P44BX?^W;M(`@`VkiM>ndSE_)rOuu7nI!Sm(gm(=G{n@;}zD!ohQ7fW{Y+C zcX9U4BEtUiQHBeae`CU=rC0w3$M3e!Lf`J--k#zU-;;x9nlBcdmJdTN#ijk|`Y+fw z$r#_%@<#F(Uv}S{z4xD>jBR_N{Qofa<$+Z0UAT=R$rwUpCYeIWJXfYP61F5{p29YV zZHQ20oop0(EVye@rD z$A{x`bTPq=tZsuy+wBK|)v+Ph;Pgl}_HtIvRSX%QyfhrY7Ix~WO4!C({~NEVGQNzt zHF{KYH~KdEFR(7D07Bf5u6xH$cKAYA(&P!7mgXY%{)#dXp;!*~mpR;iTWs=7wy=R2 z@$v^DYL)nk)(J`eHi-+5=~Q=117^@Tn+7T>r++Uh?5J>osuljFfD(l9-$_PTUyQ~4 z?FZsVlZVWbAp&G{s7P%$io%e)G^$2HDk&9rOHWx+vD=bk(Rt?R+EK>cOu=U~9rhWk zw$dg%lqED9QxLNE9+r6*#;)~>cY}NQKJ82L_3#_42jBACpGhFT(8%L`#dcHpp4Tdk zL!~ESF2wl$TsZcL|K?gb*tz<&8Ov`HUt12GsuNP>8kItSh`&~G%=6e3tRC5c!n0a` zoW8=W##`U)Jq!?lqJ(gC*@?u8Z;9y78frapowQW>o3+Y(@?){3lRnC0<_fDq&2oU# zcXK9sbvw zweV6{^xTv7))Pj<9hfV4BHf$2KlUHG&uJq1g^F2OEm=qAVx#R`q##D1_EYeo6E~Ej z<4NCke>;FKq~ogeTVGZT+Jh>DN&6?%B{9#I_cNJ}tvS7ZV{AYEeXc7`Z^@nBonesg zA?HfMrzR>|5-cqma8ML-nf>2hsBT_PLFL>TrA~`q={M9%FNBjH#jqU7HiQ8Yk#Qaz zDzt~az?}^BK-He*)U9w5Mm6oj=P<)c zP`g>%p7+Nqx0&5vW+u?O&p#LuCG)wzwxNIaR>JD+^%7Up$~zvLYrj1DPX*Kx8=D`3 zkIV&vO}&(}aI@AYlK@clx-eWG)}>xK{BU4r@UIG8~fGqI^faN=UI%rbY$OJHyITgy-O5CRC)J+9?W zn^t>JCvf_`#05W;X%P5;c1F55d!t^_CfTzA&bJJk82Ww2?;9H+@B_8d^9i@0o|A#?xD~l{7VlGR4?Ml3<@s%%zdS|h)t8fa zAwq1RAo*&H;#yv+yjKzFs>VKEUwN;vqB!-{^VvF2o1Y$yt$+K__f1>x4 zK#HZb|^xqAaAkYLv16@j#i{u z`}D5_4RtU9iJb1OOw8n_c^F*hO@CX^_#F9uz7vK?+KmD$V|#E4Dk)uz&k2fDHoRMg-2%vBA#&sL&x*&WAQD_1UxUP%3T;iO97 z=#^sP!REp6u6qq79XUJ$3T~WQ>*^RjjYFzDR9OBPq!lyJByy08mp$@{J@6s{%~-Zh zj`4K*iBt#j13CjS!MhLjwZS~g6~fwCGz&;1R^4+rGZ()U?lbI9UKsw(h4kKiSgO^d zFIZTDR(fia(giQ{+2f5H*jaeQ% zn*}d8oD_BUlh-FV9Dn2S1+n3YWTrIP_kvh*iIz$pUs3Y_CS!YXj`Vjp13{7i$Le7L z{VHq6+(R0-$EC+##lKYNdgqf?^zg^YLjq4`CekHtKd=)3$qQXbjtks_TULrd7_Y+X zgZO(DxECREsBDFCUytMWHl8W&rsH0_vKoH?R#HJ(4!-0xl@(8Oe1eJVIj6C@@VetH zxGTh)k^JQF zro+QAs&ux0DC+dQgE^j)JA>}4YfjYZ1xpdPR<7{RxthMp%F0g;*52ZhR97fZYGMrB z^Dv=?r;z{~5Cf5=Gin02_7#ASX*tZdmn$r@4Oft6rmvUI4r$KG^-vn26V2KDz%2@= z9C$nOLahR4oxxW71V4_CWIHTH>oBTLj!C@q!UQe#W1WUQ-s@5_LHtCSrnwJ}?&#|X zXO!(MPeB;&-O@vSn&tL`27Pjis3Oq@w47@;mmpM$7jemh(F=?C~)sYrv_ipbIgAUai zW25>T42@^VikubDfrXH-757Pv^8}l~%B0juoK88EMeO7WGrE6v>`*1VX=kWT%5D7A7dR(nHQW$V`K z-I|;AXW1&do*Y5hTyxt-PhD~pGYo!!BTwNgs^@K_!Yz;ntnW;)<@ZhW&W>dGFUu0j zqi*1_QF-Ud?^OGdjSc5VF#Q5^mn!V3MU;eC9a+$jkzKHgfOQ$;K6|owv5y!}4YX(D zDfwP<;}$9U2o|R_xC)i0m7a%nz5)r-gueqdZ{Fn088 z$aJm1vIZ`P3CTWtw>WL| zxSYA3NnIlaJ>LoeT57&&pILM%$?WgA_c~Ul8&~OSVduzL%lFpDkQ%c4-t{s#`9|9e zytv9YWZw3e3SQ*Q{0Xb$>pEvx>ino$6VMLQ#X=NoMYAzkUw*q!6kY zV6aXSuGMdOA%jC8fg<`ljsn;P72baea>A=}0qLRZ442R?Om(~0IKq!&4%J)Scg7}~ zxx`O@b-Qa^sv##mJf!Alcv<}cvRv858^OdnkdMjG5>R>D52>Y!G<{fZW=P>188!P% zRWFU*e4Z5TcRE#v>8p0$g*|iq6}SAcFBXH;(##z->8+Enj9*GGO^P_q4*2!YRZ^3J z#6hjj7wM%D&P=@4Fgg8&@qH*UxY;^#2qVzoH)0yhr1z~7#Bo=%PLCAwP!x_Wfi`BX zPxz^_Ag9;|RW|!O#e|98vlDBU)xD0UXDq8b7(uTqVR7VzU+}jhUqD_tE`loQ<|HHn;Lek&s=Y$X%L3zV9P05uTg_C5%?TKz4Nj^#Xca zWlYMn4*dEVGX)h>FDhPtZ2pP3l2quP?*HcgqpxFWrlZ~A?U#B~-uq(4FGuD(a1&>b z^%Rbi{hA%E@SL4daA?V46g{)W1w7V~H;fNG;&e~XXX(!8a?1;5JOuPz!xTY#UteZ; z9|vRdG;q18xWw6~iKcUjXVezBMfm=fFHbjf+IyORo*`-0iY`BC@UFaOu4MT4?q zG?rKt?}XErtpid0AF|KDvO@wE7CwI+-ST!Z8*j%sWOQEXz?YEMug&~dpOPHqHu6UM zovt~IlO68)*1yt_>SuUsBG@weEN+-s^g)|C=KHKi*QYSvIpBMG-LagGU9ZhihQ}mt zKVZU;JDoSYwD^jumObvOq4|C#E#c{83%V>u+K5%#?tr0pfsVEBgZod*&CC#tp|F$I zL2qupnC<)0z)pyk86hWl>D@M)aC3YxbUl^)sN(HSs=h{tGv!gI2kZM6Cw(?I3Vho@ z$~lU<-Q*tL7W&Gksp$UbtNy{DpnDshxYBCs|FyG<-pAu!DpGX z(`)Xh2fixIf0HP&W5g(1DX{zeqOVEaPM4cni92PfEQCc__4*J}r)s&lS8#|%GC0?| z6jm*#WHvGC%c&kty%EX8zK3C;pqaZIWPH8EBb;A`za}r@Yd!p9u+5>k78H;)j4n>j zSDZKY>`At+JQr86Fpvk=_zlPH&tP?@)U_+dbwSBIc(ks=o?_UYIHclOo^Y&>QSOk7 zXw`!4Y&Sw_0W$leLHe8H;*x^jUqJC680;XY>R-86LyEGHKb2Tv;5RJQ7_hZ&=83P9 zz-8H2>0;++F#>$wx1@`OgJ$76{LnBaTcTr3_A;IS$Sr@09Sfdm?%>`1g5Lg_Uiwt4 zEJ+?a?jcv1VrdLF(6XU0E7Z9BT<5^hq({0mh%M~)2SR&QY_K^>=Z6F4+JqIaz4X8c zI8`oZ+opA~zic8=M7h zZMz*eva~9?-4vgja-Tm2)#LyC4S|$&RbGLp*8X)S%0 zXMS!wzTT>EEIFOdgCqNwyM=7yB6Am{hdB}rhVP^LoN(*t<+T4Pvi#?u!8bt-OYIwW zx1%V`);R|+D@b@GN_xN<zi~TQF!{_CUIJ;lG1o%qiCWEKQ8Qm&aLZn$(?CYB>M^GFd;qOEZ{LKs&+@1Cn$P1rhM z)$;>hA{8`2Bw?wwU}TwHqIGjNr~A#UCND4V)3RSfIWgs>{p;t^JH2m2wcVxRQKiH& zOldOpn|f3CS*m{*_XOduqLPetPj-tU=ecyf#|#eaFdu&_aVd z4NPYHP)MhOX8DJ!Hlu3=*f+h_eftLW?mH-*5EK*)5xjKu5J!3D^yi$KLVnTAJL&dB zSwQ6d5c5Fa-`Jz|PV1?VOo$DQpdqz$8VYgCp}nt5?Oz#{yHAwoPucpveDdUSiOrWQ z{jo2xZc;)8Ki8bfLe?jhCOZ-svyqVqMmiKs9-jgq^sMQDDj&Xp?@R_c6j||-;9e>X zTBM@yxazn*ay1Y5ug^M8H2?BSE!VteWO>9BVA&du%eDP7emYBwgJ7HRkjMgA#%ATm zXt(27(tPqMYhG-ft%KjXSZROWlP6CY5*f`}gnP67at|2|n8rQl-%hnLi@5dAD)VpX zXcVN;?lfe$R=^Y}IaY1LYrgpr{6nMeSUsypYS&l#$H&hip0Ou<;pN78%!2e5etOSv ziD{+BTsU_$5miu#TZmYndRgd8Zmu1&Y)*SK2lX~VPj>uKv$Vk|XenLY(eibq=fkDCSC}2VMyqse=#1U)_dl zaJ>T!UCe~=TCC;8tFN(hbWql3t}@4mBmKD~-LP0#ndiHYhge(zKY91?W~?LTN9mh$Z} zDzF+qU7$wtcbqi$DSva|*u-+G^X)3hgqj2FQ{Vg1LfDUu`S8=l!;`~vKzP3rx#yI6 z$Yj(kZNO8*py!y*{qW=!|KA@)Lu5xRwMTkKEEU>9o4`=?jmH%%t~z%!&O(ZMY3*Ma z``{|9G7q%of31kabn=mjy9HC7c8(OO$eZq$Yd0%=?jmel`YT{n$@seO{D)1CCo(bJ zGVdR45tTqo&vH`in zur*i6In|$oGV&N)5$(+SBPd!vl<(>lX?udW<1=_H>7H#F-ucB@#kP~x-k3I>>j<&_Qs^r`fTx2MDMm&3$EYqI|T3L zG7E?}C}l`A91Cdt=@>cd>HaLuB^tqKC3fo15iMRgO8@!cvIQ~MRvfo{rKf9a%CXkC zIEiD6M$Y3gB-a*cJA_|~KwlAXi`d_Zd3D+-|@pjj@76-R0==n2-HK+i8 zQQouyZ{TUnHJssU)TH1*;ko1TrXWAyVonlvzERbD4%RTJA8$RY=KM4Nn zU)`sdfc}9l1)1?{U_tv7nK_QU$biz90Y@1(BH#e1myBEcU7LhUwA0qXmCq#X@UcD+ zGWl7(K3CcEbSYWZEtkn2+vloH0&{|IBKi8U#0W#7VJS$;NX*)K-hYLpeR#W;gR{Q; z+Hfw$ff|6y>93o41i8J#9q94+YrY)ERKAf{p`1WaE<`;yU~}4R?se_q#c2W@|DLCm z@O}`&?%xkQpK-?8XYRR-b#`Zt<`e#5=iq1jC7ld+RNx3~bDAJe)wT~rg;3;Cp+~G* zzawr7g+5^Z1l1XhB@7;2tVvrnEM9VgqtCA34G{y7OABJNf}x2I-Em_17w?dN{q#5d zzk_vGC5XNhdVDH8UdHu$dAhT$B}J-fbD>Fe4%!L}O~BAHkvguKbvPb{2i5c9iV(Kt zbZ;5>HJp6323pEcsRMgS_RpWHDUP`|Q#@&UCz;@ey3>7Su1Rzo^`9mU$VIyd81^GH zIVcL12Y)?KJM6;hSeCU8KiV#F>)^;&*zg+-`3^X*E8VSCb_)z*s;X#GBYB2F6#JSx za>JGMOjIAm6D!32c~X!g$P?;=Y{k@quHu{*NtIhvZ1Khr1e5mFWUiKi6(H5@`MGqh`a8zlK>56csk zNnoi^whhuuB|hJu{Y%(3I0lhw`}roX`uLv=*(W~kc&)fAr8>@|qeS|fh%h@S>9Knj z!DU7L>t)?}3g9_oFet4wdS7A+(@v<_9thiG6@7)?1Q9tE#ix9}g?hcYP9jYw8s4Pn z%+aFu{CbYR+&aUn-VYv>dxKEujq}^9(A6Ge6 z<&Zq7m6pq#-URfbq-tDUTc2RqrH>qg$s7EzW?)UD+EvFYdwRteA$7dAQC3XoMHg0>ziH5+j zxgEyldN%ljO|=bv$PEX+6;cGJw2H{xs>7A4QEd9pC0_4|G6mGuEs;DGvKq8?)QRp|T2Q86FUNBe$c%y}|9r$6^-gwIl171l>nurKu2`5h3e-;BcqC*!$6= zQHqk{sPi@|CJIw|IY^Z-^|kIEeScrMn;kX}^*dr~xLt6>Pm5H}jBJ6hF#`_{@HN)| zF4bPA5uB8f67(gdeOHJTDl*}5;Er4#p!vp6 zkK8eJj@E)FqU$LN)$QK}Cr(qgL+h;UbCDPj7G6e#ko7_M3mc_S3i{4|-4BR@$#F*p zVNmrJGERXc;kkkQY{|c(ON)viMPc}Tb*&r9x+cE@Lk|{WUQD9m>-+>Zbf~CiQo(6 zAjsq)H~Tg&v-lAq@*|}OI|h}_0dCrDkx}4AZfRP9*sF9-gG}9qnX#hR!kG7Pm{%wO z5O**YDo_%+x9AV{DT?#G31L4uwpYT|XNueUa9)d3!kJ0|{#CBW_0UnlOJJb6(5uDi zON2TlV~H8WJu?HS)j|)sn85*?{krBn*c{U20r57bQRrd#PIk20Z;+1{y70l^i=be< z+X>zsR7&#+V*5^l-vN9G2+5XJzAFSv=0NQzGKk?V7!r=0z@=b?Q2ELq*I=@t(*3(p z17&u27&ug=Wv2E`Y?sQuGpaYcSavw;G)^yL7;ue|KC#Zi9T964{Dq1FxRN&QZ3WVqA-2&Z5GH@oO!wv=Z|Edo_=w1z6ptuk zBT|bZ0yOVl9p& zJ%#w*Pw7VPyura=_wc|o{mi*4saik8(xIScYb^w$Qm2Yjo+r~7vT*#C6Qfu$Pz~1K zsM}`pirv;gm@GB$>4=QH@JdKwZk*ReBNmxRL;>$H_UnWdj36K8wV%>P6_}E2oT>W+ zjjk`0NwYmAZZH*=HaJn#B(kVFeqJb_0Y8$+6)ITto#(1IPV$3jv*A6Q81m0j5sm-=CqQ}sVl1E~|34Ex+r)ZPrO0xA4 zG}*H>*NDOIbiH?Gdpi6c*)&zd+*E;X>S;xcQ!EiN&+-YBjv`*SkGY2F1lO?ll;gej z#Zh*>rTHXuI{0m;N?5n5qN-d3w7K>@;!Ht@``{)2IxKnunWVF6^7KXD?oD zp6`JZ9x`x+d{UWbBI7H0;RKZq(iK@?G*v58R6Q+P+I4dajpMjin9U450W(OpFh~+Y z0}LjhZ1RWqp+?$)T}a2QWEC!21V#2pnae0{S7;J*VrKQq2qhLF-8m&07HI}s71!-+ zME6+sRikT*wlMU1dGt!U%o@Jxd3CH32OAW@Mq;Cyyg17_!nw2D&02K8k{%_y1*)fv zLbnD0!a9PYjWg24$*h#ZxVf}KxzHS@LZ&(cT8f)N_EVL2Gc z8U7fMW-il+syLxZwC0zpIWm;zGIB$9nj*lC>p4XZI*02nU*Jj@zX+;$d!<1M@FEk6 zR)to3R2etcg^`MenG>xoP4U9^+hv0YXKcp5m>b%34s3S|N$CrRXy3q23zv8UpV7*g zm~i=6>{be*zha71Kcu24`rKK?+DU6v0b6H5JKbcYNTfD&Rr8gwGzNco8%(Ftt4|FZ zJ^(Gl$^Q32ud8dxQVa7I{m7$ZPx(O}r%W+f2^SK?-}=5pF^k{K!VH}kxQQ{Hklc@v z#(Xj1;@UF;@nX8defp~Y6CEL(m0UGNp*<7(Ug~R@n-f7)RrXO&oNL&EeVQ3hFdK%+ zRDR-1{YD=bu=bN5u^Fv^S>}%I=wRoT@Tc-5d)Tw2c!_HTpRX}pd!By&p{&Nu>s)=m z-UaUU`*E{w#W|gqKDWI&Q0N8dChwH@L3(p_#jS1)>AopG6Xi^{iHu17f0_D!Wd#Er znl45&Ul&iXC%ko}4$_&rEbb~Izs6{Ei?&8lkn>RrI)WzgD%upTn+dopr>LVfE~v}& z-)YzFha#&VtP*EI^@W#2tfk%=igSvC!3DsbZU-Y@jk5JIaYpOb)d?j$sNwN1KAF;$ zu-P3$Y(BS`%|}s4{)%sTHLrvy~~j!yT#;gD@3YqRmM5 zKG80Lcp?_*-stjW_RB4)cz!w(*Y_=q-DR-|Js*j1{5uAvFAnU~I_jr>L;kr|VeFm# z8lG{{2L@F60X(-v=LtM_&Q3`xcg0T+sJrB{s8^B zHBGiXvKW4+iosIS(9^u-c!ezPT=eTaDswNWvdq*Wc9Ssb|c+M6x*t1y6+-_++{Bw?V!nTvUIL);!t^@F1aYSQS&XsY zQpYp^eeZRGK5JpUDU5PFU(ALuV+kSd%^SD-bmu*xQ@YS_+Ue3>+8AQ3fxcnRg<&bG z3Bfgpck7!cyV-J^2U$e;Y0q?JP<0MedOCSR|D@9g1-Lh!bauRp{M8rhPZAYiP`9xL zjiP4*8?Oyy4lH8lpylF-%!aMMb?E)>^J~w|Q`tfp*mEPogk;awH%0H$%EFg%l>YqY zpe7xFAs0*_p79+^Y8Joh`ftvSONdY0KkJ#lr`vt{t>8-86zRrl|2ohI3S8UHle^5J zsKmNAiB#IZat>{n0?vofgLNWIszkNQMcnv!8dD)f!%ZunZ1HH3uC!DBe4w8Q7%J5` z)>28|0b{>&vnl(BY~;ZF3H$xJT{-gVJbo5-&9h!YN3kbbJV~zohF@?^69_K^_&+~Q z0IuMSN{Y2L*Ds`c1ei)lJy@_K0R~%fm!Fhq{rJ}9R8t(rL;m8t zj^#E?C0bGwqAhe&Bn9n^>Rf1C0c`X94#dArM-_I6AkJ90>aGxre~()uOrpjc6=E+4 zq*vW7Z9MU^42EUOgbHzAeR7Ispw7GlGjti-$Bwkp9HrfBgl#_6v@-gdYMlc(kOIr5 z1bte@ z6Rqjfhk*d>R{nr<$DJvd+Z9UK5vn~K!8#@i4TMS@#|-MKlH9< z*Oq6Eos@9)aMC;KHlmJyGpIwJ7PQ>VPQWagVt`vr%e zf3wExK*kULVD>DcH+aI8XF@^k)xNzo8LhN*CjbnFL&9NH|92@D?n8bj+^Ji2qCW6l zeINL}Y?d-{)t2k3(QxIkKc86#0=o>JiKt7Bz)Y~%$N8@jvE5}*w%}yW-W)KN*N4C0 zu`NGibX*L3`%AWCrI@(BW1lPCKp9));7+(p2o#8Xa4(TKk!oE%+vBHq<|j|hD>v)9 zy!}nYUsb@Q_uW4Vg39zw*%@!xGKtM>dubn8H+YFc*c6p?WB-cSOP6+AvDO?LD79z8 zyH!u5*i99Ld09KKMw4K#qa6HhSJm0Ml7F0mLLmgNNooG)5uwnteZ8J^NWl$u>;HT# zd^nNf`s0#xU^ptwj@AMe80R8aubgu_Lhzklqis6C2J_2JK(`ANB7?dS7KR5G@ z^sp5+JJ{~ANWl<4`J-b`If;PPW=08`oVz*H{4T9uGHZfZJ@i7+RkOWsZANREB%Dtk z-e-ujyasbNH42NXp+RS|0%}K%@S}{>rh5&JnD^jPlFg)9){=_CU$=d+cnA$dOG5X_ zzc(y7)pvXm|BCh_Sz22oXGo|{;Sz5-?2kqV-^~4GgGMcUEVhU9Pt2wBZ!>s$h|=bHzD$-r5&vpt8vMB&ObR#|3V1dep&iK4 zVJTq(gR`LA{`$h%q53^BU7-m_Os1g?Xt!j3{i6i!|1^>2QGqcA#ML-TQ*hGpr{BE`Kx_j1WgBIM|X&$Y^F; z2dg#@(7(DDd(kNO6L#Ast9%zYQgwp09T4X7xyhx$?X6FKzvk^p-+vuN z-P!yJ1pe>&p#QJG{*STCfBS&&3mCARMMr5N=r(+3p$uU_mC>g8wLJw^^Bp_a8~K30 zqh59&Tw8UU|s{2LeqATZ~n_ibkzWd*@Ya@KI>LXZL z8b#)7{?R%AZ5{nL03sm$ZyyUJ6{c(aH@;M@4;>c> zCaX7>jWbt-M_?r5cgryz8e9khT%gsnv$gGe{V=D$p?LCJ0qxW1zb1mW{(B$r?-0El(8{jG zAmBRu3xL<0i`5o!X_H^P@3S%-w&P*~%?2saYIZ;*b4uH*`m^lFWX$2;7XbzoZ2p{o zeWi6=@Al)`zJZ*E@&oMtZ#``0k@x)X??m~6AHY9Gx~sJ1h`~f>wgx2@62_VXvIBGG zpXc>YYYXWrVJ}wBZCO-L^|1bzYS2|HBPwUtE_dQNb(PETq(6RuLT6Gln(b$TsfQ5> zx>V&JHefHB3*;!`rFos$w+(;zbD}M?(Dx$mqpxy>^&2Fp@>fppA(@_4zwa6<$da@- zvyOl?wUKPTGaWQHYnxzp366L+^tu1UO5`+B)gE$UKGx1WD5g^+Oku&R+2l*!7WNP; zI3$lOd4b@Hf8r&yRIS9P`>$be0Opl5Tp3ZT>rY5b&PY4A z-w(WpQ>)oNV(;6LKhno&a47lvev-FX5K>Kxk39GM>w@~FhZE^>N=neT@*W(9^xOyj zs)aqm5o%vcoHM`;w!#71|GjF>c5D31dc%0~O|t+_=VM^Ql3YUouKlTpjk~f>=sMq# zucd!}N922+o}5in-p8e!M*B3`<}XzFuTun)Mr*<#m@+;@fT)G}i*Oqg~6 zzID3tQyhN-%YE^Y<+=XeV^FlrA>&>EObVxaO6>Zpw+Y*uO3*%;?2xFg1`=>930Obf zY)%>#25zwi1T(lEG(wn^K4^7LT?H&xccE6Edd>;vn+rSxz~?n?eSL6y?mNoIfud0M z0g**t_F!SA|Jrl8wI4bC2Zde@F@lR*1tIV~B|ONo?H25{IbUFP=Mb)Tu(UsiS7;hW z0~Dm;#`Gw#Fg5CAh*}yvCr>sxpvu-ZLLIQ8W`FZor^o_-uiWG|80B;?O?A8!F}q&0 zu?hq9osRv^u>~RCZ3V+4k)j2rDeTMUgBnbxZ|ByK4kamdaHHM<7`f_`=EtwO-6d8) zU9;kA5(O&9Wbv>yyaNgUBT2yMCWri0SXdm6$~-3LaVL!3I}aI_*)&TCA-kiV4gdWy zGU1h)i~{sYafaen&CFQ%-|OSxWKUEb5Mr}_{xo4>Pb;_zjAGR=AIK|#imyPHctl4H zCu}06XAp`ks_|UthOFB}8PZ3OtS_Ii|6n|em{cgW0ijPTWHt1@vnC**h6=yv(r|3D zkoHZ2C3H!N?Beng6ot?TiF%d>`^dF|mvUEpouFgt;fnB}+j`XU#CK;1uT}RYT@3WR z`|;-Z@q6ndih`7aqoIe6Uc6R)WEyo;asdjvO6aThLl;%QL_2^tmAuo$`Iw15pzIBk z?nCH%+ULegKI(myd#3ml#k;+^MXDjeyxnAmaiS9KNuz5zZ^U{_h*UE*+go$M`sbTg z?5gG^}`kV~6>UrLoR{Q!laFYb4xQVT8~QkDVg z`LD@tG1xgIn2c(HtQQ%i^vMpraYLg^QtSx~PknL<0Ny}E@&F5-pWXZw_5RE6!Xw`t zdE)_Xo;#U-%SKR#?xF6r80&YgrJ7EsHwYSyVwHB}6lbCdiu>M{CR;4RFbWm@I2F!5 z@;!4~rvSS95Qk!)Q*fT7(*UkVitxZa%(fbxD?y#rduy)79Ea?4f66%EMxprkI*W+B5Lp1k!y zJMH!LM!ZlbuS>}aPnFxt1IXU=0d0*%{_QxY(dsX%f;`)Aj=A%9rtp|>;ffT`+zs@5 z{>+B^c(ooBy32C*Iy>WNArOj@;i7-(%s@`F^QO7lyvrMU7at;0IKy9s7uyE_0Vu*# z!GdHECVuJeTXEi+H|SBlP}M^b`Jv}&%>tawHQ7u|~i>gRr^^Do@ z5dmxBkfp+GEV9Dnq1z7B#sB4WvxN3?iG;mSDkj# zKFP`!;;f$M)B1Ltm#XJ%khNsMmT!${K;%+$+2O|hb8ax;ac}Rbg+SNlIf>4a6r%Uo zL#_IQ#x8g!LeN6oXu~YM*2)H&8W;1nYjwtC!vXf7-LTe=2ur&<-$_YHn&k$%b(c(} zlCs-Gcz&kWRk@XB6N2L2s4$*7IJ?SIH7I7*TMU&B)OLty0OZcU)fq_c{HdaB0_0`Y9Nx=2BuQ(dWWi0)`ahtj#6Q>;JJA+E2taV!gDW8i{HHr zr>R4)_Csf(M4O2$m?zsVtNxQM^c3Y`0Oiz;n@Gzo<1y8zQRc0}>NVTWVr%{FDx;eP zlb7^lKeVLFVLh76hJBGHl>SDBxP_)^xU=>KM}iTC&w9=X|JsUPn>;3ZQa)onoQf?| zh2c+^FE+>!)UDwI^7Eyw}q!Z|Z$pg8VPIVb{V|CsGdcS2%%-lc~kS#@tr&9b59zn_St)H^?L@Pi+ zR}BltuJ%>$WYHprsyTo8FmI*&*l*I-ZJjxbH8=g`bFkCQtwoA5e=?0tu32uiTL0|g z%-v1q9}Ir{hYpUEAu}#+>7h6IN4VW0>8XNb8y0Dg3Kz~NZW5C$=Un9o(0&|kJ3xNaObMpK3Q!$+|XmvYlviUJhE^l~(pa~+I zlj%;}=fKQ$>%tah{S>-sBBupp-Ij0od(VX}kU6a)&Vt+e>=8{hr2~y=G;>k%O>0DB zt~+w$7EKAl)27xy#BZ!evh%Mg=0fil+EVgt+VoT5Vn;>RcF@WNXgBW)jDAt*nQA% zLM&(s*xr!Gn4~^C$~lr`5k23_HBqNh{j@mz-ZJ^hDDI(NAKiV9`F27&wA3l@sicsOOnoq9!G+ znv*J4-KiB;%)2QWG?(M23 z?~s>nYCoM6G3c_#I%%7xZvM_>GST12ibU(_q+BiiCDYTMkXbX<@Z;M-Wt8TfN!dCo zLvGPi&*TU9W{VNi8e%vIZMD*RTSM&St&GCgSH=$=r)ZL?v}#@KvWQvTX!elzBs*y6 z6E%4*x#>*fjcUOReh1|Q&N;Vq5Rk|d3SP*9icx8@#VuxQoWgUH!^tJUYvE_JM2gw( zf>8*{?*FKSb0JjKI@ieZ&-Q1nZih#CP$v;qyo%JDATsUA{tS9mo8rp=heGgoN6W*RKdgbS>v}Z}84V+FgQ8F}x^CNuJ7~Qdt^+b+ChHBL3vjj0g-t^y)-XxK<0dVIrz_%1er>ZKE6Sl zdB_j&w>7Gx<^mzIcMWwtDxQhCYx-kj3C6fJKo4MNM6BbW_Q8(Z8Y;Zau3HHQy9=-v zv+(`^_2&$$2^jha_?2TIO~UV%2%r1kL!R`jXn>m)lqf3_EitQ#b0MoXTUMfXRHBxVVn4U z5^5_Vm*8Nk*W*2d41>HkS>1%SdQWaRWt~;i*T;}Yhpn)r7pGX;-=rQV+vYyrLu##% zk>OLNVz)L=fpn(O%R1ZigcRPTdb?l7NxPA*HS;px!iJz7bbdHk4<_#x>yv1p);LxW zb$b_^T!HV{b&B0BYZpwg)@`yBpHlMEa@iltpN^>)8&=U#+|$#GzU?Z4DS5Ii-5Eoq zUt$|O>dcPQiCeHR^a%5!C{w~&&|Wm+P^bsgS^e2R1lMNL7VT>r9M&qo*xXervzL)* zuPzYXL@zH_kh$@Wq=Mp?Jjt0Cmm%bpYR9GHaQUuu8Goq?E?RM26?3h0QZZ-xPyq$C z*<9c(9+SwQF=z0lH{8cjIdvD|>)^(O^wI!tr^QX#tjM@!>1>jwqVl6#0T~*<$ZOo9rP`y7`)p4NZT{(pZhm%D}yiFQpE>-h{6O$%L@LlP+!1ryib4UkECa zJ~(M9whM6Ykr3dzk%_{1{G(8z{cdmf6bEJ0JXI~7&X>GB_IcMNN;v*&_+}{; zwfnSI9`xb4(P6KELP3s;>m(FE(snw zJbLEEQwO@IpEtN*=>W*acC~VO*j_gi;W-+lb~}=0;S@wZ7jZUvE6HDtXW=MXz1F2e z9hEs{-z7${iI}9RsU<$J5#P&A3dEiyGv2#bV3Cim=h33Za7MZ3p|lVd{+By7P%!cQ z$63{oYt^*-45|zBKGWb`F;_rW_-*?X7(WP-NgdzE>s4iB6x3y9Nzou7P8%q6GL!jG z(Bt5FKurMvNV%I}4PF>OQ{B$WdxNC*zVAw$ta}{zcAR_lK;RK-BGGuL>J>Cs2UumNC|I@7PH!bp{;QVT zhstvENL7J(=T;nhE-l~xBs8X>Dr2Fa(@d^l?BG{A$$WvP_naUnNR*+sMC^98_cYi#~@X7p-k7lhjs)mwIU37kznU0_t%r_ghquLzvg) zwS^JM%Ff9;YrlCuus$wiPxhOSmH9A+D{_}&L;T5?8$2i32r25^{VXZSl+^6^@%>cV zXK3zk6|Wc7Z3u2yKCyfA_~CgiX%_$JLCIUPW}KE%;{xj!bL)A(a|&GfO6tB0=TYE} zD@DKu4|#%^_(F0Ga|CnVSB58WChL265P;ghIBaaO@6=wjfOaE?NLKbA4VJa5^jDO#9VR$}j$g&wu<=95edO>#+p zZ~v<*drzc_*?mQ$JID4uA=cL#>YCC>J+MgnAQA57JVQJqn?O-256gnxz_aQnP^0!ADALca(v8O&te9N2TClcLoqZ!JJ&rvK=jaaYz7|o>4GQU>=iI@3${J0A0 z^R10cl0UgMj3yN^2hwm#ro5gWqBzgdt!|njolWWY{q|WD_s_I*&TRXf)}D7`E=9BQ zb1kNNhDlq*I+JI0*=M~p3YBTDtWAAH3c71|Q<4<<<2*yik6mAU2a!Wsl%e3W=3R^L zZ7?Uu3+iJYC_Sg7h>7`8zaGsl7nPqx9PwrCGyB!uV5(iZ^=O>BHThd-qld1%XSM8F z)FJ1H#jlpTBpr4)Qth)I;A|K*jl=sqK6ckn$cCuOP$@lg^HQ$PJYKLy*6N~>Log)` zI=}hmQLj+vMtL*5x+m1GB2EvqXf2YNq))!QyXc$-0OU)rIj!-$>o_}YZq66=nUuKP zbFWV&6Uaq-c}`L@*&6?DJ0o3y>ug1)PP{4&w-7k5Ij)K~i9aZ*3Nz=5B$yZeF0y*$ z#1>+6EiGf?T<3niG2WI5<&`KYAgFZm%WDfBfj#t83X6w@o-XhTkC~>%1h)o|=K43Q zzO0V;lha#c~K6g7X9)gBg@leBt|5Z8J%y+9`GRHwT%Q=bxoYKBJgDXA%}+F{u`QR=89^WF&8V>x5Ml1*bX z#ms}4>}G* zEy2libXB7TA=CO(`rSTh|Mn=D7UW%RCtty2B%{-}Cj!ZTFTK!g$}O922`ASyX+MAs6{+uclN> z&N7qjeg&j=wJ&9+Ln&$Zt7nAtmpY~LY56lP$m-Jq;NR`GP1sr7mR&SCpYP9r43qC1 zPS+z_@q409oyc|Nn78*5uzzxSZ^ud8*LOLAwozwTB3*JW*vt&ku0z|rLqw3H*)GsE z{9&9+=w8hlI=)gvhMpTOOH>!5T{+6n*l`6ERY@!;g|gCLnctP9;NxM>f47y5GaMN~ z?%|~hwi`|ZipgN+|4{apaZzq<*f)w20+P}pjY@YTA%cK}L4$~tbPp{ugi0w$Gbo{= z(k%l>BQ=x^Js>eEHRJ$8JZo_8``-J0p7+D+Cw{0i*R|?g>s;sYKNLhHOQfW=U1?wl zd=^r@nzM0&)m%@zs#5QVz<-y=X0l|=Ge)5+O|2Sx%^x~R?fi`-Ul!HOOuS%u%Q^Ta zF2XTzrL41q`MPO`e6{vt%E1)d#_QG1W6V27;utt@tjG3&R%qOqYwc}ohzxyy^0HnSP^U8$eAWGh;mt-U7D{y&0rJ04jM}m^e)ynF0 zuHJpV%D{j7hI)S}kos6@ZK48P~ zK64pgqcs6fM%tqbpP-u4OkiKlqxYD}Z`(09OEao9I3c`^QlY{%C$ zOl=)ZHHP$)3@W&yguWo*BmZEXGJc-TuTj++rG*yY1z8UYyf#@$e zrpAjf@L+>>PUAW2D+Zj4kF!z;Q=(fmZ%G;SwOor+Y>68v05O29x^TmWu_W=!a?{bYU4aeCF# zq>Hg}Ka_sHi3t8#@GjqMg3psK1JoWr$ESzq9&G0|XiP_swXD13Cb}Z3S7@0rI_H18 z#;)~#?>KbSR&!PsRfBoKMOxR z1EbHbD-pv3m)X2Vow1j*MRT5~kfK?Fr-k+`KTgzNbnv_G%A&}AyyCY9F}Cq?9Td!x zA7-RpQtFw+(Q4IIgA)l%uiC+(RD{s5JTd^GSq4SUad$q7FMGuz=Ld%(#ql+ndUwc5 z?p~!UrAs-~dpu%)P1pOL1*czQ{;jO%Jjg1Iw_J}nUa`qc?$DDl^ZD%Np>K1o$+KLz zL^D&YH@Cq&<1VbOrIn#*A0VJNFUK+`A>6iS^izrcRu!-|+3nTgbh@1xEbtxcwx7{& ztlku`_AIE!H!hj5PX4p!!U55hn^*L&7}rGwljewBL7{E1ISI|>1}zSHTww*gNMA41 z;5=W0#Hhs8*Vw0!1b-iO?K|u&7G&A8`Tp$w(~+S%kt0V`Sh#nUcGvvEq6@f7+k7%j zb|r(-WOU3msd6ESA(u`f2MylyzxVy`S)s-gJS4?2Cg4JC+iGRsf`4+DzU^ApKI8lY zxt7bh?<3DgfuY2sO5%vU{MaM3jjV;2R$5YfrM$U|B(#o*yK|@0q@%7Qfukw$CL4l$ zgNak5Sjip77cg&Eum6B)IrkxLE*FBGns@s{&y5U+9G`w32nRk#*QDQ8nUC(N|Dhy0 zE4eM+w?nQrmY~?)0B|YOz5!oL8`*~69s)G@2BF2T5@)qeVQzND{PXmMw^1HrG15Gnki;%XdJ8@bA1A4AaO7 z(JF4_BCM;mdiT5~g3XI9HAvk8pM+sJpohG(xQ6nd@}_)NFH;7eR`##Fz<6^*B7>qGMt-t}K>X+cVEoHYGCg)pDB6s0MzfP3zj||RWZL;nQ`8}UdTE+T zaEZW)j}g*hDp*Z+Ti6Q^m8;I1X9J#ha3D{s3z~Y*w^$o762-#X%kw*^kzHH)o^&v> z!6*KgQlp=HCQuZZMH(}I!t05#3-jX)x9&9fGpDl-?>>uI4+U^k-o~81BDWQW$*jl8w0-1)&{@t-J)H)`EWLJwNr|(s$g#vA%|uS z3tKRkANNVySLuDvSq&>q9jqqCYRdYLMiV_zn+S_bM|gw#(`1&nm@mig@F|Jh^y?6% zm7laca7_W?xl5RdhgR32hESFR(@NglfzUuub{b;ddVQ}u#Q?~3t@M-1@@cxXqn#F=)x4==0LQ)S+=9{eCQZI-NcY0F5lZe zV)s*35Oq_otEF12ZR!IhZqtve-if`64{85 zMD({juub5Ob%o@N-7gy~XJ4PigvwDqy^7$vn&C-4Bl3_52W;sNY{!^PqSIKTdhAdD zy4Z~MrB2G@3K^Uf?xT#kDaAbl)LC%q`4(JAkrospB;0QH!L{@FHY9^ zoN%E6Ztn6Es`*Zz(TX1tetf0HapamJn4~Tjgo|_JN?Nhnp>y?**ic|*8j=7JR1DL4 zBf7Zpn9qOw9nVocolSx)R)KZTjku{9Bynd6AlmHY_V}sQYZA*aDZ}>}$IFT#^~C4}+m*FsVt~ z&ySdr25c?*7>cveX?l=?T)}NCS2<)b(Ae$SipoE8=&e|?+;sUh0Og(_U*h1pOM)B3 zXguId*hsY_5nCv%%+)tyuQ*gEe6rI!-C17>f3U^XvG_J>##Zcu=G<%ls3*-^@zS}& zLnG_ingWxTF?_z5pP$9WUxgEmb}d>g*vZo*6VAz`Q1q*s_Q-gEV(bIw{4;%0L)!D1 z51CLpm8z~-hNE}$-I(A;)mR{6%cuAV{(RF`Bx8GhY_8NRN=~m%vs)0>{PF!ueXS%Z!6u-PEyR5w%s|_4@A{aPVCsx>VT?+?( zYG!_GbUv)NL2vgJft2({ogqKK?)x1{ zkBwOR&-bmpMp_vg89n~7jbyO)3yNbxKTa_NJG|?4Bqi6a&vL%0XVbUck@za}O0e9aXrX1515Gt~x|)UV4?(+9dAVDs}qj zC`SiNIBnL;8qcgMvFvGayu?*hv-LoPSJch>CtQe-+8nQsiEM6;(}&~obSKHYruo6P zj{m7wF$k?lYeK6hqoe4FpZuk)I8%t86D*|Kn@^Sfrom#?Hd~)?XVrjm)iH_dhJhs1 z0&)4m(QTN_y0UPvB=&~Dy@Rg{C`Z{jHIze!!%b8(ObHtsUAWU^(t-7ukCQn-%^i8M zZ)VBC6FF8{+Pq&mJjUH@;xA71g@jKM-0SZQ_(p4w;6&ZXamUa89|w1$NPFRgyTIeG zk^H!G*JhVeXNvg9+C_$(@wyE`)=cd+CRl~ z3GZmE5DM#xDo?MvIBsPoGSa0<(zQ}Yj$gR*Nq3(`%|<)?R>X53z}onJFAby%D^7H5 z5)4z1^74uSG=Wy7{CW2k$*f^W8TCWPi+LTIS6MZ$7@8VW-**aCel5w3C0y_EBzsv>`$s$%OHgBF2Y%_F(`UF=XVAa7%7W6Tv^^&s=BB=Ry9PO zH?J*c&qf^YoF`Ib`?V?>5pA2)`GJ^ZuFV%3@T61^V!8=Zu0(9*XjqWjTf+~r!uHUs zGw*GP1^awARemrazdgzODPXrEuCqg$^td)QK#lW2 zVb~iA-AX|X<_V^z{XTa%W2-yOssUF(Ta{2uo2r8R6l~<39t-Lw`Q$aC&bR9{$S^TS zB-aQu;;uU6oyPTtEN#OUAu11Y0Ab~zw6>C%w5M+D;fwF}ZWCr%3KILn_=+jN`EIhdESI-6XiQFl`CshK*Sxi8 z)kiC@JGT3?C;I3HH&P;|rT)!ymQTpGI<<;v!Wbj6^V&kly8yrIuugWHhlRYGZ2OXH zp>-Xg=Dwu3NdLkXpPDrLtHEH>Q4#JTDy-c@ctL)QLgMyHo%>pj@8B`D2b(maf>$k- z#ex>6Nn1^LV3{SeX4SZs$|}{aJ&X;wB%XtrIqTWg-A{|yo{EUMryb8 zsPtBBcT(w$GuTL*RaXf4IgM6i@1WHn{fgJR$1%*m)*nVZudlXPZ=F3+ zqc%yJlK=10*vs(hm8r?^f=%}07fR^98s_CT$6Om&_xY~`&(NkLCf}s!u4)b$*g~l_&6(Tl-q^Hh zi8D`n&fHq!WI%s=m6Goq88vmSDZn2RE7lxp@|N%<(}VRn^I_HD_1d^vA|2LheGBgM zKQ zXQm8a!n`P6MyPU;G+5B)J!hI@pa`@ zn<{BKmM_y|A?{pa<$>KRo7+`t@T*Ycc9Xi^MZk&_hn1qUNf$km2!ekFFhx za>{kVB231+ETY}!dPaoh8tIA|byDubJ0yqf(qPHE{&e<}O*G+p`B@7@|QrT+9OD4RNlmTHJ(c^q$Xz>oblB`g&G}z z51i_7a*cpJ?igcZ%v{1=bQuNplUvlSJVd*)t>%rODxsQZ{%VwmVlIi)6O)^er;aT@ zpTEuVh>oo$jeBNO8zZ2NWMQsIihXI}f4BDQ*fR(EvuCV$>b~whdw2usWQYqGy;qi0grSn`Jlb)+)>9)LdeR)uH*|Ics zj5tX+<>2$36ePsWuf#J#9Wy!~!bK-eNns~B`6FESLXLj%vaY#12KM1vtACV%JLfA5 z^I~JJhLY6=!A*mOjNEkrlsw0naw^8Gt0vif+TB-sBG_hK=1`a%b`3#}xc7VJL#jxA zAEDDEH>V#ZNg?#3n(U%Xr|wk+mr~LQ=@q$(*t+{0zxjd{wupT3M6jYrgILBM@7Rk( z-(*B2?UnU`P|+~nKsh+mY5j03XA7`5GeEisTjU;9;VEA4P9MAHXluu+PPu@N4ucQ(NujBe}$VJp#R={%C#ZpdjPS*)re~fX>UX^b1q5G&s>gC+GGdxuOWJcLJWFn> zKJ&_$jP{C@az%KK1@c?q?{Ew3%k6%uqH%vfgYkvMj%x+pj2zkjT9r?2m4{@bNlldl zUJp%d3a*VYIAJviJH0Cxo$gj2;r@bm>l!C3=`5`$r2o&imj23~sub-e1e({oz3{Rh}pM^+;FS!~H@-$tWNmZ9Tr=aVQ$ zdZ{;d5^&yo@9JV*pnMviABYFRP33MHwWP&w43KAMiDcHW;@T`laJB^YCp?6l$mcb8 zA=6nob$ytjo+T$+P->aV0puQ$Yy26#m$OC2VV-ZX7slhA+n7cI-b0`C(Q`>%>`~Y5 zSguvd2>nBh0QT;yf(gqArZ%?LD06Mu zNB`YJ;TR;8o?Y4{4aLC;nSlpXnG3~QW|9^r@#%Ttiwc9*4p)&S)|QPOnJKG!A0j&( zPtGi}ygJRT0pYv=yB;ux5q`TW<2q59^-S)hWjB>77p^@lLYvF@x6W!&82+$LvK4=I zL)B_-WcS=gDAOj4#nO_r0g}hp3sVskL>+#tl?W^~cXXkfX*}Soecc050vOzbE5pR%hFmvL7 zK4<4)kjZ-kB5xr)LvtFWpvnetx$u7JH__g7K%`)u5w5oK;wv-7xK2o%3>RT&Z=MW` z=lX{Md5&6osPVa>T7OtFQf=j^<2&vC=#kUb4T}(JwE%BGjX7{#siV&z7HAsdH&&Jb zB?*Eyq68=2vEW9X#}h+@NDauRmIcMtA-fyHOk-Ikhip=hld%iOfCe~)Cqe*n(p~ol zI<}Q3Z~ml7@RX__gCi~1R!LkaDrvoVmnv#@bE$F7&*8`juzKjaoA3e>RAF3vct6spYa36dH`i37pEslFZ zGh*u*wr!+FaQ?Q??rsAg9q(+CftYz{67RYO=gF&tbazR!i-hax!Mc{34Jk#saX~i^ zh4TA}iwks8g{>fQJt*w}A)Z2*K>nJ3rpS-qFA_~F?DyBB{Gf~VYFh+W-{6@p`-(hk z?6vGm?u%0Z)qeN$Ubi^>H&f{*o`UC^hx$n22LZ~6DbT}K3|5l@k|Oy5YZQIxas1** zrAC+Vj@}B$Y#dN)5TF3^;(M2JAt_6o{Lh;h2zKmQJxe>={PAx-jzWw;L}*#B#Za+Mm75N=3Iecv-=3 zsny;mKU~^T)9Rs+nHq4{hYWoXd%dNXqdc8cVc?}SueL6Q^yKV+bCESw^wpG%TDTHr z1X>yZekV@o6MLKmt|V-@rALeWvcT&`%>i=c=pbmV?!8rKIVaXc@Z&wmd3bdi1C_h7 z8nJ$stXQ>bzrXtU*QpnpV7Sm8)Y`X%Y=(4_KI|n6z}uY+@|jg{uo=V~#NTMQ=}#~g zddTVB9o2HxBdoW*OX@NtzplyRhjy`hV#ru3ejUI}^)BN*Hhlp6rP#89puf4oMyZoW zh0zHLqw20+o36)grem8WNNr}xo>H+t#iui|JQpL!2PQWvhi`xKKZ$AWW$2NgAzkm# z^3I0%b6b_G?DO>MFEDM2EO#M1pVCbx0?D~tGqvCbW84ydV-k=k;uKr`?M{~qSxT!E zZ9)R(O`%Ih*!O)5+B@eFB!<3#(H5$t_K_m@swAa{Zl;K|aE`b2$kzQ6{%Jx&kw4k_ zj*tkFqLJ|h>)rPa)Ga5;1;PIK?0nqdu>^Alq5|BMoUY~>^QVwjpuF~7LtEGO6n`0w zQ~2p2^J{`-3e|M*#qSgtbD!$9Wal*^jo4Z|1u@%!#|A6fJ7<$Lib;92plh4lE*lML zJ|+=mjFr2Hgc3aXamyzxHW@%f%4NhK)-E21)USxwfGpF&s(bAm5l1d{Ldwq_Ge;G; zglq?}XIMtaxJ}76tpncET!YPzVjST3?nH%Dw`$#TyuQo3;t3q!8gAZl^k~~Ur_ML& ztypb~11c3{5OtZNE16bY7<9l{y zuk?9A5A)@TspbGuFtAAo&E?73_GmMSf+Y7g7xn9A_(ig!tutkbi4&|zft2nxzk$H` z4JV4GY3uY579UYl6<}eW`}uMh_nx>%__}OQbhT?SNx|ENh{>G=(e5NsQsiIrT{NX*yBWpB=%SG7 zEolKjVJ5tAq)L3|EdjSxtx|__OUlD1x?ilm8ax+eS?j3=x)GoLL}PxK|6fH}1a^xdwPw6$4US9Uj(i zZ{{@b2N2;U*O@9|jS9c=lYX!Ed;yhMD$>cGLq~SkS~-z2UCJvxV!9f)2&qY_Ugqvl zL_DlLsq;pUy6s=9E0((WTcrBtlY+y9r|-F!si$%mDLqa=tzseVp&&IoUetgM2tNvt z(VSRb7#p!2l)tF@yjjs{lV?9$>kM>UbHzcFVrFP=& zJ-*Z9jRnCVcZ-<({lc+j`1K(*Hug=T;sb+(J2?^%&Z$(WL(OzhbHyK?jGz0wO>uaxEyzq@oFC8)eSA_Z>GTvl~YvPFv8LM$>7BZ zxh=kj3mP6>->I+9whG7sw`MF*s30YbmY&*rUxcfP$}UxiV0vqB?{?M;Odk6QE7pV3 z5)o;N2BaG4dS-CSa`r=VbO`-s@QOuN^p=fJd{l`bC=ncucE2E^4P|RAl3Xukolyu# zSIpkU-3)#fNV@5YIohH`$ma}r67mfN?%S^P1vy>3&`0JZ za9xH2$u}hP~(_l&d_z!47+vze+|A%QM!2TC};}84z)KBLFJ1BIL#Xm|L!1Vv~f$*#e zl=ATyTWBw-TSHupPiGe7BW}9_@faCn1TYx@l!p9m2kPJOSbe!+z#dp>JUPZqG-$}( zAq8v^Msa>;3#&N3r9<0em6yQ3M~$QmFx~%it^?Q$K-VY;Z&@amaHyNcvS-V67`d$< zUG(5U!0!B)XK`PI0f6zA8juURIzm;+IKa1a#?mb6@{*LC^jd5YvA9rW8s|BI!#@qp zohO*?tqf2|3Ik`3kEd`AumO^v^WB%4ZobLBJQH+6h+PX>zXq&S0QALmB4mNF#;jhr z5iv$J=L=bqm6G}&_<4~Pc*A+T?lK zwrzEI(=0#zmu;sgJ^<>)0zBuh08-A6cy}KBarQc7RR!W*v+&r}615}kS0Bf?7`0Le zY*2t93(Tp88u!KRXw+Y5^f%vJ)h&>YP(3^Xe8`@X#d@UW3l2p*n! z>UHo3cJAYRB4+(xhn#&K2`2FDALZXR;Y&Khg(K) zW)S9OC%&OXr~S~D#jO*BAnRSEqYtqxE?|N0SFe3^E>39h`_oGuw`HciGTkYMg#n=O z@3$Gd3|~|Bs{B-F+OgVz0wxltkQ|LWBqc#>5~D(O2WG{3sjy>>Pua_vFmcj>qLzF+8gev+;r#1(}nMsNBu*P&eL%ZORRxA3`oU`(XKw2Xw|Ws zXb)ObxQ~y?r|lGztCn97aZ#mX$2^SIzpae_i@x&F=f5|4z>K9o8&}7YP-EIB4#wJO z`c~y8+6a96K&2PUNdt%HiZ-6I;_j+F5PS#_-2N?v4v&g|)sS_+xpbH4bmmDJcaNx! z+(-}f!draR<%AiUo0IKTNBRlDN?|;=z!Mzcp21^a{!M+ z#Al)N8L1~nn=fz=Mx)od@r`-^Ph-BjZEO*e9prpKj=D|%ch^Hfbv^z=|8Cv?{2mV} zJ0FfQt1wb9zH{dkH1t;o{M8zKz`g7I#&NId+LHOY2Xb0+ykTcIcB0@vpYV6HbpQtX zUqAmf-k?;xEx^s@7cdUWQ`n2Ko%WTPS!p~t$o$JG>|b95R)}X9ysH>k2=)qjZ^q7` zLKK5;uQD6{{xgC8`LA;|uH)SrV!`0}7RRb!b?WVc-?9Jm0b*!?e*9FXw}&^J0rlJ~ zv`t`A|MQ<|1pM!6z_Eq<=Z8%UW>$2pME*r8{N?f0dC%h_Bp22YDpELhoPuI=zj~V~3ztIQ)e5TV??Eh*U0e8Sp2IdB* z;?{qB4|vo?5VY`&p7Vc>H~(mgT(~M^+G&ggjDh*6RVB0kn&tfKHAoC+zAO0QkRhd+|B=|9Z3eE_+<=svixFg zATQn@$KjI}v>>8CB8*>2|Mf(Y(x*}VU%wTgUJAh#1p%+NX`z~vZ%Ri~TW-gP>&|~7 z_dnkH^uu^t@DbQDYJK%F;6Pyvadim}h`*p(|NCvpBqVsWljpwz<$oTYw*w%2M6kdW zZTN&@A8@HwQxQ2TD6s4wS+5Q)&4Xnjp$UeMm4IjR-P+m=P*P6nfK0eI8*Gc$DiF$_ z0F$O)$@lkNENI+O;cp3`By|KJep>)A-MmU?sI`jh^z{cpW(prI{(QCJnbS@IVqQcn_n+`OXA{&O&bop0f$`Xp4dSlQig4zqcP|fJi&v-i=2T z?EqMiwN4rgWLYNimX`2X|4HE7fG4BRFZF&yC2j-kX=rmH+7@5IO&Z#5){;ZLU?VR5i`-%yFctr*l|i zG2*KaH~@XYbE(Nlz;0yy$a)A^j4z(Jm9^8Kf5zNLp*9Xq&*uIrHh|DkH3e8DjMs8G zb4&X;fWbuM5ID~K2D}b_5w3!bQgxF$pxd_ujA;5VQ6RxJbb|LNaju~+P%~mT0qtUm z+g-(z<$pIw=$JwQD8Y*Z(32p8P-8{SdjEB+)S4t3wQEjzK73aqrt^W@SjPmusq3pwmoe7av?15Io)R#-M;JzcD$6ovCs* z4nv(XlQudzPB~tw?!XRYM`Kj9EZ(&XxD7a*4$s{3zr+4onpcf^o~^!d#R4;kum4%Z zu3mzww^5!8zjWWE8P6J91(i2e=i~s6XbOm`*6=(;WSMQ}Q*}>iehZ0*?Ref0@T;X? z59vhV>Fz;YW$^}Y4XF(8n>q2GC{j)83UYDkR>X#cVe3%=&BX3avKV`%z1fJP>XjN$ zk$A95od#*dGX|_v`^;}kQfddh0r2(E>rWTM!A2Q=xw+(u%a){BU;Gf zXC2i;b;=3RY*3SY%#LTP|JRQ$4|VN?``9ZBU)tg4Bo8>vc<{?fKY;3*VF3`J4m9wX zC4jQ;v?y%pUX)v)vgX@}ef5BCvb5-QGX)sx;CBB1y@=nH?R`H607@dvvvK=?v)CDh zYVXqjiNK#Mfr^JDsW(IuUACZ%7uvQ5Su-T#glJPcz zGMxG4l?yXLpF8U**SmiDVom^{nk*yJNK#lk;EDe($P?nt7W!ZO(h)4vuzKs6`3mGH zjm=%FBKz-=uPDEwiBSKM(#Z?=ng>E-J>a&uS!~@DsvPVoT_&RonDj4Igj?KevuvS> z(IB5)Ij|)*=L^<{o-6S3xWC_%{@;N^!}rNRS5_1W5ya+x9tI0?^!tfrBtnLq9U3*3 ze&f8K7{x;ld}^ABSoY8{mudQu2d$JUwP3M8kXb;P(C3yPYjx1*=VWZ((65nF8h)>j2k<`n(lY<&erZN6KbNzw?Bb(r{EGCxNsC z(z--rHU9TDK?`hU{~3&O<*!3H;TW)3tSU7ypMBa6svH_buV2-myIxs-lzHo`TCgdw zys&K*02jq)5VDG9)aH<4A{^&q+G1%7n8@7L4N-lC_X6v03v;eqR`%`(7f;gbRPbtd zWo)|pG1-<`wd?RgINm5cNNEd}H|epiNSK1g34-eeRe=ST>i#v($$F^`(W|=!m#l!B zbA3u8pjDFYv+}1c5d!PIVb;#ghs8el%2tP1q<5|2ft88ohuNQ7bNk%yX#5TVv-|m_ zSnbcYAZnk^OfI2+ZwF$!iS>KIL<=mN1&qEckkDJP_}s*)S6gR!C2(`7SEp1O!8-|6 znU50BouL#v_99TS0VU7RNgQCg5#wqU z@2Rco8o^YWAT#CGjQyZ6inq1p(@mRIAEZflju`y| zqUhXWlH|}5A~WI0jm4)(*2+X#y*C%W;qlI&YdC%ZthM#e09wC{ec#qql1sf6&&l$# z`6w~ERHJx|+!t5KG4!q5c)4AJ_l95>((|dso**P1NOvMp4&X69DF`FO#(^;^D1;kT z5Z;f+#br9AxyQQZKC~8|yEOQ6{gv}YL^}?+9cixK-{GX>jHmI-l#3ZObOe>(unXj96@>YN2P5Bd70oC@uhcM8G zS2;AI!wHGMfP|~keL*e4Ee3p)wjwvNiM%ug{7&HlJ{?RBwQkd2z6*r`4?ryJUXQXi2-x^90tgH z5E4_Y{eWf!xs|rSs%XA;ego!8#Zq^_m?`|Z-NJB!^1%nc5^K^ejute)(=TFqPsFI6 zgrA?2!W+jN&y2woHLYxeVCQ=v_KNk1k^!?R=58BUJy`2M@u1_Q4a@_3 z>sHwLeL~M9v@;Aj;1%fowwVGReS(_FCYUrr`k!rztW6I3)h2e&ll1k~t0u|wG4U7UxniHENIa*0 z^tzKPF@}U%)k7#vAm+NbS;reO4RYa?*$<0PNMxBGd)&QlN3A+ncIE1)Hw8-ANx8n& z;dK;tY7+e%3&ZZ;Jz8$?|AIqJu4nlpj&_?As=j~zR5&+;tGX8AFygkPAA|Z%p_@ml z$lzS~Ygc?a)GK~+4p_XzvrcX&4UVmn5l_&QS+R^ajoesThP@ckTvJONRjP4KAMHgJ z3lR|fT2eeeo*8d^68?+trQlx z^qlFhfMU|EO|y7YO}Nrdp?(ooC~tZ~)tVbO%35zk@_K-`M!J7{w@j}z9Vvcm6UxZ<>ZjAZU2U4OTg^(h8q!+PE5{ObLVM$hhNt$ z;d6P+GZuORthFAgQvQw4cFu31 zA;oniDx|=hiPTMkSw8P^{d!*4Mo-tkxxHy@=;IG;a6XIMLTiB9(>_0w;>+uZI9xUO zG8k-M>0BM44}4?-SQIIxRtMB6iwHrV6@)SuILR$BnZj&Mm~Gg{<8`eCA$MmE853klGo)Kck&wOK$6hUm;Ab!&S&iS z-(=%=>0iyT-pW9Z*jQ>yWR#K$RUrDb+7)YR=$>dE^lwZ85lY9|cZVTbn2gn+{T^#8 z?D)lRlNrADK0d?#paY|Ug_s=iD*C;BQ^tYZ?wBoqlzw_BKrbS<-n;1-Wim~t2Tbgw zG@*l+{OaWxNLguT>?=Lv)tB_O0zll2OZl0)is#sV0?XN3tLg^Wk_k`pvQ>U59qEqO zHDH^u=D8#H*g5{5!pe)Gy4a-#%Rr7BR?eCg{>azN-REihzKFY59FI8Xux76KFD=hR z)x8Y81=?xs^LfX@0J(Bg~hd-xbHwjB9qbbm>-hO`P7); z$#|-vC+QjSgJO}fOhWtd{_s4j>JZ$JJ=9V2 zrZ9bIrCmDT^;?Of4QyT&t2&@_av#w-X$mb>cf9x()0h`8|1l6u8<72 z;ZQ&9?lzr0N}-OMaW3#q#~N$$^0Vru!H(-YukNIL=`B0xx~Xg=g47aPc=*)sYc?5= zSJ3KE(K8mNs;sZlQkKkxYy}3Dq>7UzRj*U7G>n?Kn7O^rej()*w7ypHg#UbrTyb63 zBT#Kqh~k!u0=Pgs%LG87#yJ*y7Csx%(_Fn^a)#i+JokV4`o>peUz}qt5dBbQB9Qvhmh{?|%h)5;wzE%6EFEqNY zCw&#PneDOKE$3B|k(sm5yrq1yk0>Fmfqv~HX=O!19s52^wVREmhTOiKp6FM0S%T&& zucSrctkiPgW>SuYD;Un-TfzDNhqQFj5xlRx-!sqW;-aC?Ibr-F!<(Wh6{`-GX-n%TO5&~GzX>qjgi}#oY~!$`(^Sd zgddcoMI7<5>WOAxA8Hz+>&nKP)`H5{8}7@e?n|89o?i4D>-+L$l{x{UapX{ga0#+< zcUKP@afVS&vh`)mzXr=MZ~$@G{v9>c$srEu^8;|?Upw21O*KX1@r~Z8i1)fD-NubZ zP1fFxUYT3)l=?H0Ikv8a&j94%i##}ip9d>cDK!neKk-~iSoyy9$2$JQXS0_G9%$Mn zJ8PQr68sJlcF37xmdz`rVcrJ2(U+IGY>b|MI!7AE7xOKL_r-ZbCZP;h#Hw|s`J{Bs zP1l5{mLKiM&)r^~WS5z2OrDi$?gG?ABdj-oi!rj1YIGMm#dS*AlF#e;Ol+OlK_luE-1*GmQ^xzAr)>xS}Rv@lwp~&_kYD_ArA~+7$}%BJ>Ijh^UQW_?pN@2r81Dm{pJdvdx|24jWt=Z zO7m5Z%DkrpvLpOtYm;9PrFg%+jx=(9^D?o9^uEqpj{`;;xIG8GLX8~5HEOt~8%NeY zEq!(CbU_zdO>-~h_vLWsgOHgmm@(?3Ot_C`;!@R05K73OW&GF(lQHRvomP|2`<8>7 zLdOf;`Tf3Ztg>A)hqC~T-w0jj$5J?y5<~j?`qhASbah7n%-R2BGkAjWp{ zJi!wwJ{xD4cE*4Rf*lT0C7O5maJw~4mMRhwZ?D5bOkNFo1<*UFMXza%oTChAV7V^I zcizx63?`O-?A_Bpbx|6TZRut(()is&X29&$&y*crAHD0Zl$8(XC=*=Ok7q7c-D=!x z7g64WPcN}b<^|Fw+UL(3@$OB41MnwcpjUM9Ontf1I@stgdt5;`vWfD}_I>#NS)#^! z?N%6Rh`4+W+o;`$tv5{KmX{ILM4C^0-a2bOth|vmuhHRYwba%+IAk{N& zVtqpTyP*>_M|OhX~|kF~3t=-~D_-pljbcODnML^)tG z4iJvmtopTz?5+J*JNzzvtDUrO$gIu~Km?{wz7l?qlPM}G{&Fz!``wO=G3DU_GVjV~ zKM*VlOl&7qwOU?GovjEW8}90=Jba~6bFS}ahgR26ELZTVv$Z*%i@mcP7t5*ysHDrk zNRz+hjngPTGLw1YsCywdqjdee2t?I)#PH_#kk>6OE)H&s>8r~`Pk(Xeg;uy|_V}s4 zN^GVyw) zsq%1!VS50dh!(SWk9IW_C$hdxzOJCeAseAE(s73ogbn|KdmR z_f^LJUXAi4jrN}W9qR}3%3GaoB`l`OTT{RcF3DWp*oBZ*WE>&t z^e(6EC&(?O9gI~ijxE(?Pc#2E56iJ{b9TdKfo=YL;uG1)EHdC&QG*Q_DsxPLC;?u5 zXTNj*uqrXzUHu;K@b?r}cl1fuaPh7SiZHMT%?mrqJof_so}X$OcZzI}supEsY@E&> zr@|L_zXPK(iHt9`E@4+7eHB5ew8<$4PLC$?k2jgdp3`f#p*mszr0E`-AkCWKbX zFDBpl-E;!h1OcKF-a^tEW+H+IX2#aut~(Ri*bia$A+0dneI#o3Y-5!gIBF0$8?-z* z7ZbMY5O!aMP=b4!=JP7Zb?kI~JWRjLWj(Zci154oQgp|+B*;=psE4;wrwh|e8Cu#1 zYUmL~$~vcA8)!{XmTVxL$4I-&+xDAfF8ymRX#owD!(@5fdN5kylTgi6d8ks&RUsu> zsbxLX_=(F++G?K9-ClY<%2vQqfCU=&@~F$P>ZZ!U;0~PhafYEYli}Wnn@S%;l{(8< zZaa*}YV>cPy9e0O#!9);lc;m~q{ZUa+~^9@;mk=bhaB&$3bp{HPN5S2oDF8Vi=^x2 zjfy&zrqazmqpztHLVQ}k95#@Ct(aNSPwA;%@*B|)ni&4=w7Mg-$co-+oZSL%{3&NW zJyhapWnt447d9OcY+nU7CvmRxsZEEA^4q z;zw*uaoI>Oh2?VSaZs%4F+dA$dna*lm zxd~=$gU*rNf}hoTyqo*N(MznhFppML2K(w}6PY2?rrJ z=iV?ejkAT1cP;a0`wT^F)tDa$o^KpSf0p1*o7{4+Bu%Bk@IeI6m+1YU_MY^wscTzo zMQhbkZLLxS%1!5JHl-55D{B{rLWXyFZ3wP-o*_)(LB z?|p;v^~iTuw>Ve+A}5q1bwaLHsIwxMk#Qc4RGe$Rn>1E%xj$dqnK-q6vm~rVkZ+;@ zJ8W56Tr427D~C1h7e=5Wb*<}DdCpWpli~K}{2P=hrrr7%SRNsoo}akA(GmRfVC1q* z%Fu3q*DVc6CmJ(=^~uKPM$SriC&nbL9@lgXCaya;N510UVGfy z+`Vx1?nL(3qFZF5w*V^|mDW0pid_7Y_jTw?chFw#-59)e6G!9-e!=>#_uHWtY2hb6 z(&n<)fo$xU=f{qeRlo^Htk`4hl>&x>-L?=i5<>;ebrn$wsFGi^y;eVCSR3VZfsxgvS@oP<*-8(h5=Fx zTYgVdV~*VOv`^XGai{NHO0GUQcR%z8yRlgBkc#uXv_o&ot^Dt_r~Xj#%*nkt?vQb= zOX{w6AfLW`oC;C`$^NL>xK29Q->@N^_}Oco=i_Ox^lqmpHZW4XEMG{^UI~kO$jcjz zrOMA)*D$BE<9V_RcP|P0AH04#o&r7|2mntj)5zePIp?lDexhxmV8#7KxL`+*Y>ie@ zW^8qR=k61bwK*j!+3?H8>~K>P6Ks$FB~(!Leq?JJQwg^256BXk>uI6+E*A-L;*1{C zjbNXkTLP!}va=r2Ny+eU}n%r$(2$NtYhHD(24^>f=`$#WQH&$}{sx z(L|BBY0TM`8y078%4jDOz1_^BS9O88+I30Ld=#wsJxC+w{BMf1wOW^V?`v4bc{?A* zXP^jh;A7vBzQ6cR1A9vrW-E|rqq_}0&a2?#H275vkxY`6O*Fcw&|tqAwSq0E3q@u9 zp1u5TJ0ihbv?Jg#A8f=OvmMZPb+q*W*BR%|*)~>x<=y}67KLNL4{sRt(6qb{z%e43Em~$` zat{T|hjb)yA$0CytTP2p1QWg2^@f zTLlHaOc?c}YM>9Z&_Gp=+w>EDu#{4~pmn6|`g;ivgPF`f`TGCJhkJ{WUxee9D2986 zouIU~wMRB~j(!Rm6ATAFxS3hPN~C5`(c|PfcRtuJ4-Lor5)@zzw4X&Tur%OSyB?0A z&2cPNwPBWP^lTPgT!|8hKE8kbZJ^uVK!6eSTsb~&>SlGajurf|Bl8Pj&Txy!t-LJg zUd|x=`Zjq5ny6Rv&8V{_wyjJTd2FjFynyPjQ^Zr=|CrtP%hDYNp)H|m#t&OF7jb~#^s)TNV_1O53UPNg6kLqqH5JFB>2}c8Q9FK!o!QTky{8yR{cM6Z;4Vk_r&<1g}|mJ@`J*lQYr& zJA$OE%u28ihJt;f4Ze!GaLBAbgi}Ak6&AGC7l;PC?uD*qX~AJNH# z`QP1!l~N~^A!`v2ls>lhp-}4vDV=u&^yo0&#hh}2!?0XnRAXu^CmMVZb1#tJb=3o- zHJ^DRqnsfdZa_8Tni1B>Ir`^$yc7}6_~C7<^{x+*V|Iqg)9RC&!9=n`-Kh*P+6^YZ z78w21nJ!bO!^#SCOA5BLq`l21_%FN7A_D>+IR)&H41P)S=2OQxwIxlAoiJ zmNy;4u(htn8jOXy;Rl`q;{x(3doKVlQ4ZW6yG_#j;u=Ig;NQVoh@p$1444XFeLDlIPt3Z*G&; zepa*lNFoLW@rl$aF)AlLsKFo!&BH({siDaURgX1FE{kboV}jk9O5tT#+WK0Upj&^; z*aJRMtdwaWU{8YP9DTk5$YVm2lUpOCZaN8R3-o-(6f7`MCOieX*C~@yQaN$+N1KZ2 z5T+0rj|Q){7=NR$H59jEMa^;=XSZ$e^CSEi4OxhA6A6fnHx1aN6M3P~zPSo|@&K>| zPrOENy+5*Pwyleqh#(F8M&pn=?4Ktb=18D&dnxfGXMnmQ?@y;BEkZX1tY{|KqZF(1 zkoOsPdE%YvX6Ppb&Df_&Ta-rodQef-Se5aRfx69MNRUoQF&JwWSpgyC3^hN7qOq8P zdhm5Z`O)ZzXMMx+g@eEX_SgbMqUIwr<1uC6z+5%{5a^VwMwpRr=?`hG26L5fJUwtK zePt9NiWKV0l&;klaFmBSX7owm3~>)y8@|65%LU#e4(^B8YAxTaKy)gX@iFoaT(dYZ z6lG?dqlVOt`hD!k11|T4abX5k-QJEYioZ#ZH^%*sVw#`SN|^-m&^{`B@Icm&tm16& zIg}x3a{3T02DTBV&Q-Bo!JH-|JD5d*;pGgD+OQsJR;8WzxC{z*@)NP;0Y zJ2!L~lej1%M^P4L=2RqP8=m*%(sh<0J3S=rKGPXhn9)~NV)fk^)%dx(X=XP$wtk_e z)e4{=15OZ$)W0s&ZY12S0bje10+Fx(c8##*EQdmg;DrD52T%=j)7_;E)}+?faEnIf zEulyI+iP48S%#@08!0%{+ zA?r_ZTquTPMSRyl)A#TN!0v8cM`X?hR0}gEtp^48dLhWpNPz!h1c_o(YZV5$V|ZCS z`B(AV1acV>-A>cLU1dyyDTdBK+8K{h2Gx+y1y*zG`oYZE(~WVP7SXyVvVYCm#UjTKK*f{Yl)Hc>W8{rE z_wKH+b~PXD8*3b2oE0mj0VoWJXa^~$_LegI3Og<#bOJC)kFz74&McsjW7PEUMGS1* zf5j^A(V2^}ozkMUut>ET?sZ+qF!lmDLSSfE5)W|r4@Ms)i#O^#7hJ_WZZ_azwHJ_j zUaKW6_0{1sjoQhZihfoQDW?0F@9?guQ4j^l)`O_Y&#o8Tu_22e^KKtRbh0hLh-}%? zjB+$z3ArHjWKsq`+2mo2cFp#eILQoNvrCOj^7UY6#e_~?QY>0Kl`{N4x-?-`?E$-i znWlx@=8Bf)M`HZNMi*QyT%;lZ2uFfr*^ui#&>X@RU7N5+doYx6i$N4~=-VTRxz zFNx5nyWHO2()V+9XzdQSQbvZ^WFJuuNYtJ3W%`Lp0cq|2q+|N-x~(ErCLrLQAj7zM z5JmAB%2oDyF7zpLj%4y;C=~&>MHe@;D*o`Rv(oZ75~JbD2S>B?(CM?tKs%7yysUia z`4tt_tedE_-|}NqSYhK|R|U&Z6|*-}$2&o&VsAL$-L=DbtvZ|XR0e#R`#bRdKjEqD zOMdM-Mz_QB-m8M#s^Bf*?AS7bv+gVm11^;1rRluQ26$HmS{eerBlOFt8>xzT3qMXC z5|^Fo(Ap1h%n*jbSO6O#;XYFnkLX_qgi%^RdL#eRXqb_tP>DL>heS=Ett@e_pp!S(?Iix1ikP zaoOAZ-Kx|c@0-E7zy(w6pDV?Ki+4hN^wOwZW1fth@U5G@%zsOBxDjp@0=3pR(>P8KzXWMW zH#gM*j26p7Jdn0+07)4yCe6@0a&0(91g%^R0L;8N!`uXW0HbFTG_9V5l!v&k?)t_hpnOHa13YLo4sbMSq zei`t5+L#^pyJ?CvO$1=hfnH&YZ(#vtEi|T85J0#~wNU`ClHp>{L009iVF=gKr)kPY z7{ymH!4bn(=cOxJm-Q6JN5Yo~}igbh2(9#XkB_Q1@-5}j5o$nshbI&>V z`@8QS#^~(*)F;J#6{HI40dnf zovBPO_t>vqrJH{^{Ad z*Ux(z1ag0L(OIAtUYjq12^VaswH5i!K>QN%u=4dlNyeR~2h#m4!G}|LerppwX<^HEN zpPg!sCMm2AaIMt0=olC<%bHwEa(ko4KDEp(mNe^?;yygk8MKZEnW;2h?_7)K%gf4I zu0-3L5j7KQU2={IW{gP>=Sqdj!6C-d;Y?q;>f#}Sul7!wDzh;et@q00*o$*N) zQsKD@c$PJd^bQT35h7kDh7LcwIrcczZBe%O7HUZy2jgQYUp6#Me6TZlJk#uC)t90? z#IEI1t4zBSdZWvL2-(bR#j#Q?Pp=H^S-H)ySH96hOtr{8&mCJH@CU?OA{S3%sebzifbw=EfjPnZxI$HP3=qh1nM%WT@r zLCZ&bHgKzQoiUeHSkGzxfolNULQS&oRxC&Vs*;bQ!IYB^*Jf+LOfUOk$`tUw&w|6j zaqNY~X3p83Sxwxk?nbQxiaf(R1WqPO26@_CF~aF5*$6pza~cy&VuazPJM-Syb>`vm zUP&H2a*o@Uqms*KgGb8~Q*91>g~l){(eC4dW|NW)`!y^!rUKrqVSEWZHq}J2>aFQq zTPEr$rZ@BYe2WCst32w-^b1cvOKG**(j}gP6jPqPtj(>ylf3g!N*F22y&+aTfO$Ca zoR55xkQu8=mO0;w5b&|(duFOq#B|Rd=msLE*h{}?6JXP9Z zI58x8u+XuiN+wiC*6HF)kx>(SjGEEZo1C)F{mk#IzU zqigQ6FFYUn0O1URW4;!(O11Y<%y+#CK~tnu2->!o}}+q0jR-v zT%m*A7=JxeQN=hjEg`AKr)gXkvaOijzdboFCOl$b-b}edUPUoDPxY3s=e&={6p4>A zPkD}HPttL)NDIzEktEuNEf>9T4V$9;9Y@cH?TOfdI|P%+T)_LU)mrR~YwFwNlNcOO zdJ;X(3T}uQ>fa6#tV!m6Tu9=TU@1OdaxYhO(D4oCtcm!{__Rdx!MP8`emQAQ_{Tu4 z{;1NNnAnk)_(XE`H>X*|+%6y^)?$rd56w^tGqBTqGv%zqH=+uKyp!B26T(l(cjrH` zZ(L1Hf^*<(IgtY{y)YCVO5}slOo5~nAgxxPD2@VqksDD>WKN;v~ zGEL#=ITZS6m^ov7cUctkU}Bh0{zNt3Iklj*l9&-HOKrnE#4IRp)n@lf)UKbWS9%5a zuwn>aeT&?Qoc!%yRJTGHZZUIMQ`J2VQr8mp5HH;d)NTPn0kGv>77OJi7NKAYt|sU zMl>m>Aw|(wu7n5M$NZCk)&fU=rJ@j?-w{^2LFAW_toYX z0Xc-`_tzh$C>;>+D{DM_LMZ(7=k-+KCi^pV96U@|Wgc6a$mFIQ{qe=GmaX3ghbrpR%UM3WKn$nVJknGIN# ze%}z`z}fq_feAX%_vYWy7p)z4cO(@atbOVIuPJ3gXPJL!u~cs7`dAQ}XQe;GzW2fZ zF=R_an0~DR6GwiS3tJ;5ey%d9SHNfP3XQyz7xG_JmA}>@t^R@^e-J8G=7kE5OyO-~ zU)HVvW8}ZTLKFd!GLNng$PW@%mDBRMRJjkMi~p6`{V}|1`>SO-lK5r%QU3j;bdt#o z6(a55hXgh<8ZQvCg8#e&9kT;6pXlB{+lDUs3J;QPF?D` z?cIR11l$Jn!*E2u)~afD0?!+o)lN%5{`em+GX(~dkdWjOMR>>pF~1MAKm6{8X2Y>s z*7k-5WnO-0UAe)|Ji`G&kH6Sdtvm7OJPY~yP7p&g<}LWOKl(!rK=H6MA_r8Is!4^Qz#6`m}!#~WO+428(*Dv1IP3K`lhkb&RD7Cd`)vI?t&Srs0`^* zcc=cZ`AXvz+~hPVpV!(5l;V;rrt?#h_^hf-FO=WE0HP>~phs9j>5R6V>4qO#aqt;& z6Y$rS;&Q@)+cZ9_^zmimz7d?tgR?yW%`o_6kSP|hEwU$|EDTYoznMog+YhcD^tY$O zQv2{*-3rMe*XIWVF6fGmOpJ_j&kys3!w_ThbeVJW3ksy4Uk_F-ld`r_6ls+k*SK#A z|Ij*`m_5*zTlh}InBpd%tCdBH6@d;7u@z~PU%}1)Q9j>9g&J?e&3@BCSatPmr2@ti z$H6rR8RBPd+AUv!X_usrIBp%|G?bEyN%d!3qY+iQ&v99fsaM;Me$_n=T-Todzt5iv zWpjP%Gi=z}fs&@XXr3lNdlG2W{40h+iWg-hiGXnpc)G%|u0X5R_hJVIEtHm|oN9>o z^Tbe!#RLKx4BQi3ZlUX!tLy8!%%V)%)#{$wYm^nP2SUJ!Z08y4X~mX@ zp4PwJpWf&SRsoe#+r5sAS9qQ`!Z;iiB8l+m?=IC2q){V>w(9nSV$Jd@_h$QKP(VPy zh;#@4=FhGv``At~pSLegc2BFOYQ6HOu4lbfe*R?bB(!U>i8zFlB&_ALyQ63be{~H; zfSP4zcbcL*+O{emSZs2>mzj`LQ?UbEV9m~avafpSBI;OT4KeW$Z~1@u6DC|7-zq(W8{`OF}cn>TY_sB%@bntHEn~Z5!=vPXVxy%?5CY>s8W#+1LeSi z+Y0k=bB~gyl8mnRq%N1$UFi)meY{(9pPE_kn)42Czo~e9(nmf`o+GeqxwtURzZ@J9 zrk_a6WYV>?C9(B>FW_m!R%GUT(M!7h15Yy17$5QXsDwf@K(UoI@QIaFtCS|0+ir=Q z{T&wAy!8ozB{QN{(6PRO`yVpJvMN*@r#)?sMB!t`a0#H_J8={cjRN3csN9x=|=t9c-;6Aw~IrS;;y0K7?8>J8l#M67;k}T;54;L z;;ygoZG*UM-V;l(vEIg4!)jfBX{A<12Ens>rrmp;p>JJZ3Od-t!kcVT z5-uKxL8)pLI{f&0E+;7*Q;fKx@pL=P{gg0RN%(e;^~@{R zsP^Eyj_qg&!yww$|Iae}h+szZzJ1tZD^hM6cp5>#9S>S(efg@!jnjFCX*$ZL`R*!Z zxy}o|vyNGBpjw7jz;!RiY-W=ks|2_1QBZFMzq1^H5ftpT$L-jp?lD}zC5Fa2<2K5_ zLy<&a?to^9OlNni%ab8m6w*rJ$Z(6gW z&0l%^71W&5e7*gEE{9(VnYdloas=wXtV;W$ueUdvQ>geHCm9l@Q>K2TujD2g)J9#O zY*lHZ*+XL?bL%a7+{2VRcF~Lf+~InCQDdKqY1z=Ou2z6>uWZ85mFaig#pn1mR5BuE z#^*rl{_0bKN_JkVQ{Q4@`k(Ijzf;{3d=+Q$Id<&C4_f)hhI0+EED20{^!(2IG~E%r zQhLVAMDhuK9I(EY5t7L-uHU;5aatz6f+kVLyctU2wlmJ4m3?Mt;Jv1CbU_(4W8(*h zRyE;}odP;&KKCtH{K2S$1cOCf=puee3dHqDv^u_o*jUOU&Fjwkbk(Z$#e`s^;@;oI zPEzK-f(Wv(@XcmA7d!?xTi4+=5GB8uTT)ZJ8C(33=qpyDX=m{El-oo8Z3@Wu40gRQ zFd~Ifoxy@`bz>u}yv5eKHHfX#{bZL}jj^h#s#L`m1{I5zFZ;mBad)!U!fM)z`@aRPdAgT4;?XYVeu_8u3q+M#5z7W5KQ;j_^K6oqTGudt&gfGOI+JV zn{hOaD4a$s7e-w1u-3GEYwm?3P;yOZE41>pi^Af;#_zL~4^1N+0wPLLMZn8_wd5OdY&`wV};LHz}L{2vEv+R@E?b{*1-4}NOdaLLOI?12$V z^U?b3a)WtBUC$S7F+zX!fy@rczQyrQK6ILIB6>sLT0hmU!oi`1{%F0L**VjecOR{q z{KkSnpG46M`p`9$5C?WtKsg=UKy_jmZ?K)9VQul;!@a1gpL z=*^MLci?lFNp!7t(85uK*pof-1fzKjsCxLqTsVOPa7*V{u zj!sP6I6Y5)_BMGO>-pUv`#UbV@u=lnntsd3%jJJ@iA95tDyJDILL$h+*n=^@QzAB>0D_5#~GG#oEeiWg_D8`~($v`Ka%z zoCacG6`}n+xSO~e`4yA;O%jMr`u^yc z(dE8WZ+&b+ItfIplvYu9y_Q&0s`oXo65 zQ_)w;`x>{ER$$0&wkz%-h&`-DR-{lPcF=O&!nK0Ss>4FrsxBbU8lQ~6+j7CtgRYZX z9wyC&7cBklx}v#Bkx)IizvsAFl{d>}ch08HXL8%4cw9`!Dw145CHKO(TaLDW_v$)~bz1W2c1Gcz z;MpPN@?er!RP>i-f)>24ra{KZ{Jtu=SI$dBDx2$gNQc>h z_+JUl#b+UvEVnEsq9clmWzhmY&BtdRXP?K#{xnXTXtosv!goESo%fgbS3a`02f{bQ z&jPbvsXSD0xI#`VE1i9#n#Hc-_~CRpFSg^xiNxY!w;^>-IL*lTS3!XbR74AdsR@L}E>v)68sX>b5vR|LFVH)2Q z*<{fW9{q-0O=%{EDtzuq?`iI~GVXP@h41!!2zr#&nL;0`;C2|*8Rb-i=0s^y@g-+! z@sR?d=E$dX#e8t1qNlJU3(eQ7{Ediy*dnxPO-^;!Y4m})>^;%Tg-Dm#ElGtuPtO!e zgZds}w+!+?RVkg4bB~ew1)|jTP~?t2RwW(>8>vc&&|QE0wjM0aN*fG4S8J0WcgG7@ zm={pBLz<9ae&?NmBWWwHTZPmM&YlwlY5icD6IfNc@n^eX>2R{Yj)4m0<2yEzg;yG= z1YDnn*l%nJ#8fKi1So~i{lLk*K=a;O54MtHgEb(^adLlF8 zb{=!|IQ^s|TA$jRb=~Cwo6tDQ3qLi!qPxLY1Bx>jLo(3zMoa4yCM;pIz$LuxjQq9#L<)59wj-lV%98jXb?K zjUL~Qu$AT;>itYo4&VW zk;3nOvS@exM7wlp79M+U(ma+cx0<1UfCw2^T#mv^)n;R*XnZvXIfv?3d;j4O;<%ka zBi-KNao}Ur8U^+mMht&EPc))-rdje5RCmp=8JUDLj%V(B)7-jOcYVmKE^&UVg6lZA z*aBrJsXB0=JG+;M&@kh?J$)NXqg1EcVJ=TB=sriHw^h-z#uU%5S8H2$jp3UwRTp4I z`el`#bN2|H;+}((v(x~s!a8z^XmnJl^`_D#?m!aH^*6Z3Iy^c(&mb_G}B$hXJ`H)R)7~+(^({dJ^$vBrMYbom6Ob|^eMe-v=vYO}?TM2qP%b9les-w{><8(io6*c_EJN5+qO{j_B&^M28#+Bdulf}ikM(O znkaTsOL#CH?a~}Pl%EXiD^lqSlSqZMb(s}Zs#~J6KP6f_1johv1{Bi*U>%j_AiDZ} zDBY`kptWS8721k|nX^Acs({#&W#Eeg5O!)6lQ27j>;3 zHt_92WxCB@33PZInZCDNaDITwsusFmmSa^z8J2PzBILEz$$suX3l?pzdjtbNfG;uL z_#K{96Qg*tq)zfJ*1nhWm$`O(P`6ACYVJmche%%X9slcsdItb~k-SU+Nx#+Dfx{eP z|6MEm$2ecXgUx~VE6yTR`kXT+!9_36ILkYsP%$1FLlqv4I3eKRV4>NnCNRiUY5F;S@aw5NHcr;z#M>qQDp72rbaJFKr8tm&Vr@v4r;-;jt%Ocp z*DV%u?#rhN5rJNyl&+M|$<18NJ zJXtAs_z|7dLY6he5tV3Mzp=EUaUqN-;o|~%#NknH%y}ZetvJ5pw=wJ&#;j{hRyGmn z6y_o;n77L7;KZ(5kYlOJ!s-Fx0lvVEg-V07mZ3?)t)y&7qxp_+0;r%SK15eVc>x(*M{Rwz zkh79`Vz7eSiua+jWC_*BMY2oBz!%Gi>sI?(skwYt@iyzn7n7K}V=4UMT;+A`ukCs5 z&faNUEtMN%_9>cTBu@kArAlH*)P2^c64#U_lRo6VR&i;piRfAVHixqZ%u@$zfnmR# z>bfd#@{_KN!~+ZY2l|w8qcDwJ+t?UDU=j}rc=g^m^uh?y)S4`H9dD}c{-fs)P0k!x zP$X=VQo&HR8iC_3(3h<8-wjq6ozvdeRoISNY)ff|;RElm+p?-bP+4^dDUms7oOMO3 z)1AjQo%;}|wZ54ses^ZL22NeGDKS6`uxQmQzpn^4@C)VQ9CtqDJ#R!#Y3#viCHW_0 zDI?9I-$)g~wplZ2Pz7YHvYCN?20@stCf=m0Ng zNY_7@(kd;PxkQc!Vy~JgU|+GpLU8UdaES=PxS~#n-obP*!Z_E@VbhJvUj=dM7(j-stG)R4$6&Eewx1^%LD@aaDU7 zbM!!s5Sd<|83I5JY*1d4LTN5@z)nBQrAK$LIl$<(aJNS=UVhviNInhd)fszvx6j|& zw#~fI8X;&pd%N;;O$luCPwJ|+<|Rrs(#n5=4NC}3G;r5BA^pMbA-|4S8>DOLMz>h~wwP<*D6Z{B zx5hnNtNHH1s-I_KJ+{4Y%bx<48Q8sgseeAwJ{XD)m*+3!r zRq!*%G#!fw2R|Ja&cH8R9xuPXM?9Wzaw0S=o*6I=*cro|POmCRqWf$=;~v2~@6{JH zw}V|Cmc)7_uo^BziG%fbI)KVp#4@)J&u?UOv{WTGAHY{QR2k5v{W`^%N#3M=IdMNr zGB2ejAL7B&R79@`urbnu8aCI~gu@uYH|8P#(EpUJ$LM3*=l6ZJF7uK)yCmi1Sq-V{ zQ>SBu4%2Z(A?%wSjl8yVA6n6fdHT1RLpXXqk*Jbm?BO89@&#@Rh^O-HL^r%-dCOEf z8^`2S?$>^C*(4P_XkR`}qrSi&WnVbRE}pa{se5))E+?KmFWmCt%v9l8yZ!^3^x6SF z^(=?fy}|^88a7cf!>nUz5<6#4GeaV>oj&%ZKv3_Z@dD18S>Ix2R(Og7Q{fb(qWe$o zKpmk?>(=&zLoHdT9L_(0y8MI$>Lq1MVA}@>63wun1>y5VF9zt{crC-9p;T(A{9clg zqeHCXA>{6;@@YJBk_nfyPt}Gh^y)@TLTN^*d`AHzSVMlw96P`-?IBKTSx7#D+RAF6 z>X0BAz|q4Xd>I!G1{zR--0!3`!1;ffJphM|`>5WkCF$a$&;@4Hl~v6K6C4NAHM;88 z3XC#@jg-s&M7DWr)vTuDh+Fq5f+;pyw(teHPj#WX?CA_Fe10##2|i_dY|Ja^A@vEKvjGdKrc_jtQ4Ltc>1lV8rEeR8*swe7U`4`W17(;~ z?ZFgvo$qVbuDC)@Q$ovxHY#Lnc2ka?cd{vu6&+Cmjw_zK%uzLzz=%EZGXI*21*tk( zqwocWA5K+S*!rfE{@Atpvcn{jTo5NP-6BC&{5Vrm@6#rIcv((!<&d`H?k>Z5{gP5x z)|~vbW?Tki;_#CWDYK1aDsk6Z8GxyHB< z=MXPv*4nlD0^T*qa%qxFYhwQvc91;>jzU2--yEAT{mjM&fDKdv8@P*6pd z28ehRL(y5m?dM-CBFQx{oIBAYawpfI!3vmFTFYMV3~pu@CE^)K6Qy0%H@SO^thsGR zDpUz0F5CtRVNtjyuSG@E8= z#nBUb>@Zi;Wu&bWaHemrSw9)isfOFjBXM`;TZtBMKYkrpz}YaLCbr6v)yWYkys{8D zx9>jvWH;+hcNM>|{yWi6xGL29#rv&{95xtsfGfoL(^6TE7n!Dj=tf8tA&lCNe_Lw% zZaf{E4{%O8SSWC#Tcfs|c~*4>BXgqFPvx2MM4h)M64$2=5}#LTv=jO11k4ygnn5p0 z$%y#bp4~`Aey44QQTj!IKCv*lgjv_#<3H`3c6_?(&8kzYs$O~)B5(g3XvT>^482k6 zGS$vo6KaYSZ@4d$H;1e0ti>=qVJX_b2h!CN3%JTrq8-3Q!T`9!yCqn$z3ix>&_Q`u zt^};p8ENA#_vz_r3SnWxytwviG3MGP*Mm@Iy}E%`F#tUGN_Gt1e273U^WSg)BH9RDJYS$w>Ki|I zW8u?%#ycQgoWU?X{%N+7SL%R*k_JK3br>~#=G{-bhC#F@th1|RB?N-oO(GxRSzH}8Urx+I$X3nrAkzV@ zkV!+IQ4)AHyE!*k**t(L3BD861Mppd+F}zkE7f)0-6Q7r6tB(CTZlWrLXo=U?-Dk~ z&YFJ;)EOazN9D75@m#r*tyIT8ge#`V0ESS$=DFyE+*qGF^cG5B?xYMiqawt+BtD930CuIMX5 zz~{dcWfxqi{y7nQSTlvr-Kfm%c%RQymfmMc6|B+noZON~^F+v<06k;U8P`g2;U9ew z1e_^X4;@1bHrnqf@zNJw>@*rL^zNMWj_&Rz%57jN!2>D zEHSJkLI~RH$hSb9pS3J^^gtw&`*-5xa`*IS0S{zt-c{qLC#$!7k&h{mTQ9zNPEPgT z`-G{rxvrg0lM$S`s1+6b$wnv~Q}orN|6UM)YS`@kY)Ktv#cZ#=WRpb*f&D+mhU`5A z9RIKF09bBm35WszYq1c4hwAWuogStZWH;2g{?`!xuOc%IBNQGFqvR~B{Ac-pn*t{( zqeNHHGO(Hl5ep*(cVZym>iL|>x6apsLD@645y-lfIy{+K@S2h2cS~6h(Q@Qui8v0X zj3k5N(>*jk>*ssTZAJOGf8RK2x&7L7*07{>#gC0+^T*cyHd_D#+`Z&23W@Sb#dWsob5}g8_d zbsbsX@$is5mN8^^>4=7|YtX8tjv zSYYG{!EB>K6D9}=8M#Z(u62dmcD6Q+x*L{KJP(zCoa+1jG)QY+*FLegs8&wX>B zRnNp=wOIRWtG7U%<1~h0DU?1j1ED+3{Jm!$#ZUX!RyBvZ;`#U0hjQFc=XOwbXs=&P zEX^|<`^NS0hA_fJ1m}awqtwhj#a{!P+?ps zZ86$zgMiDJYcqhEN7l<%VYnYPa}P*rO__PXrlHmJ*{gS^fSPzzHCB%&7BrmDVqMD} z8a?Iv5DNx-w&VEC$<$g^uOszW!C&<~f`O!>cDK*f;=O+zW=#q(83iX(E63+=e?EO1 z(csLe0Fj4CW}O1c4e)c*KnMjh3D$2q55~yu?mx{Wopr@@TxY=!!(~mptvkpWFr{M@ z(0ke7pb<_asCcqDE(IxzP0Y;t>^D1h&aVo~@z@>YLny9-71d_>+;{!87N@H=_F7VS zM|WM4K{0CoiRb{b)mPdQ6R3EVwFQU_%1W4~(MPOiyaPneDG9x_n2G)Dm z#xj~hiz8YSY}jgzzRahyYn~bFvE`TX*ihMiBVx6UAZAL8CE!7aE!3I^x!A<*80?H_ z)~=jafj^X z7RA68^n1Gfi{z(5=$MQ^u@$%uzSK@r-9Rzf*1F2;a&a%l~SJ%k@)G5ka}ETOqcBdstKKBQ_Iwu7BwHABkF0NrDf_So73de z<)nTkkBE+z1({Yx%gMrxaf8dg{7zrLbb{9qoz{6^zE@q+ z^oenvlpz_WY_edOdHVl_ z4KFLd$&fiR9h`HXRcwQdkg`zMQy6b+Q`0yvXY(o*7B>J%sY%Mq`yVXRfx$fEaFl32 z)``ssX3BUh=|Yc>O$5j*fYb;mg_5Gi>7Bp6$n(yH&=c&fC`1&vEV?v4LDO++H#&aa z&Uek{3-As)_@-h(5tVf@8HQwaoZjsw6+n4hVZ35?WHT_~K$H`D4k;y__DAMz?ZyfU zA;PQK7jm+)wbvxxhvYgNCFkF6yQD}pYGw`Hw@e#+f}O0Uiz4Qqbja_a4OL7%ln7H~ zCh-tirSt1ZM2p9LaowzzA>pV4B@-XWs8u7U>j4NCAe^?TW{jlCVbu^Vl3z=?(?nyu z>>5xu_#luw`qUR_IRi6E3)A__2IZqHT;3#d{TQ<$P(6)GmD0Rhx>xmlrqNWfa%;#(~i%5l}jNHu9+ zb$alXwhwntS^mPOYoqSM8FNUdtn#|ZZlg@_jNhi^zRgyOpL*tXWLE+t(TaZnM4Sez zhi!6zrm>TQt7op@{dHY_(waexj0=)h;T)%xQJ+ zQXC(B{HuogE0kbTRIr%=4TQsG2Lox}nsJBJfT>1-VUj6BC^CkTC84bOH~nY~QwV0J zkj#@fY>0QQUwp8~4shRKqar1x9)CbmM?P!lwdY)Z21p8KcZh`TWow6t!XEN+@M8%S zd-8W85M}eP1->p_wM!1py72@gI$1g2Gfp&MbbL_g)OzvR1D4NT^Cp90rL>tvS5u#p z%^Jy)s}9hM8$2?Nq#iG&eNdN+$;w%xXlYmuXQh#ZFzRiy?4f zL$etDyaZb!$E0z8A;zY`JaURg?Mn5V;Jc#M_?hyQaSDG%N--+#Hz_(ry{#c{l0p5^ zeh!iQHM)X5?{C|@+nD??f@=k;Xmzurk=*-Zz2CakS*xw%Ex?~JstXJX8*+~z(EloY z|4N@?(t7G3Wi%9^a(%;a%BD!#r-_t*#wJ>Jcx;GWu*hn4)81gE z!!**=)O41+-`n~i=`B`!CyeFu-@C5aHZ1(w`E*D4JAnuSc?G?XqW&cRjkMMr}~9GDGPBK$Bls* z2T2+1e%?o73KH>cMva1?5_#Uv<21h55*5+BS4B?Z2ykEu&?u6JnKU)_;5Pvu(X+&E zB2Jkkbub^MWTFsmc}9b2{O)AmT%#_5l_k-H2o=8Bg}U1agFt-*?|}U0D`&CGyXcWo zKt*vW>_;F$qwBqq-uz@stL;6-k~TcryyBSO?Tqujj+Ap{=(i<^hSmM#B>f!)Ev=Mf z)sM{E{rMj)=qv|TT>nf_n*a-QhG)toqTh5{IlJig(?&&G`}O&f560d>^FewuCZ|S` z2ZvX;^o?PW&Br^irBtT(c;uLZcv9hB&;XI&r^VM|?i;>-LAhH&tfHiVQrYAA%(&a{ zf~MrX+6t#@%d?X?HKmzWF^G6v5>N?SXNsPaUX`~71Ja6+ajS_iKx{e%`zoX4M7$S` zjqXRNGel6s;rs=$g;-)~TG}`1qk`O{K=A0qsB#vYWqoLa!^NYP`s^lC+o+ygpSD~0J*A*cz-qzF5--! z&aAMRRlL53!!YDe>N@xdLKCLAel!0hL9=wW--@&4i!?;~)F_e@wpD}8wv%3XQ{6j0 zsNy;8yHmOqhHm)ZhWeOOCPJlam;AI9wor#5KFUsVP#pakAR5}^;qHSHcVu#abUzwK=+2oeNa&Wu zpMP!(@0xla0!?#Fwp7%t@$o={hg8Im6e(wrUr?d4Hb7&4wh+b8%}F#jH`f#FLYs!& zw-cvSiB-w<O;J>_Q~1Hz?vNvEVe*E47XO z5Pa6JBt)HE_kL}tZUGmN{KcL|b%&ph#lp~4M@;_gY7+}uvntbWkYvfu!%Xl~hzgJY zw^d2=CPX)u%)}%Fw_gWWb~Y!rJZTZ{ynu*G=Uw*Pi(X%7PLGg{}heS-fM>lvW303Ob{I@#m>p3Z+e z@4EOjp2Xwj$z4M$_xm{5WDpfPR}TepmbJZ&)x?}f;P4qS|G=4I+I;MDGNfYj@(hVt zQvP6pF0^8rkrCSgXXT$57CK-Z{iua@bLi0b*V!|)wqJAWtnaEZ>DW)J*s_9f++J%N#MF=A0QVVt*+hMa+}fIg2UPCEZN z=MQQRm$Gt8Z@`1$04U-W;#l36Q^ZA@G9lY8D9la|G10hIz>_gd_c_}bTl}+@h%^r8 zyt%uZfb{m|oRi(OhH0K27Gi$88q8&Kpy{`FT!o5h@#l1sf{u?22%ZfON0|;pV8T;- zz4+NQ#afKqnn|o+%k&jc*Hw}h9?K<^9qN+)FuQ29q6!*YX^%~2+4207M|xjRMx<<%+&1(uP~7MeQ$#dpbz`zibV zSF6&qZC4BgC>W2~K70Bm<4QAssmleoAr?L=C|!A6uGZS7RryI+^#u;tUd=~Pm1jS1 zAps6QyBmR?^i8ScHdUEbk@8*b-oCDhkH?Zkk~`wz8nop-*$A$RKFu3qsWIG^&371n zgYaWK(7`+%JjThFVmNy}jlo1m5=@5KpkX30CoUnqac_~}cAm^|1;Z@9bwi*GPbH<< zI;e0Y4h_wN)W10Bo*#?Cs4T1G$=%^P$O1dlm;R&-dqJCuPTqzVQBF>yu|Ptk7+FVyo>e z8G+Ag$oftBFcoO|V}Mro!=!-xM`-l<08>vL3?!hUjjdXwkDdu;l`(QF6kX0&%BXQ4 zGFrwRixY1qHw?HI0aoIiO{qCp0jLgQuKk>}G@RxW`2x?;Z|UqmS^S*9L^@qcS_f}j zM01pUMfDl^gN$jmz47*YM*va;p}aTF`(`lRV1;Id3xNJ|r(h8qV(X4Y6zhU2zZq`DH=r6vfi6%mh!e= zX52&TJm%)d9b*4{58gs6kQOmq2Per!IPpSd!;> zHgI}mr9zSA+Q`KKpvRke&ZiklRLb`%5=ZUm9pAJJJ?4wS-el1(A@~gD-*k*1S3Di0 zd&Iet4M;~2k-W0kR}V3=*Eq7KA-6#?=shbuBO)anYVEXoU$h%5DVoLiKzSPWU9N%Cp0dRh3hR;n~(dc^Pu* zefZAz=}nM+wD8)PDWqu=O9C~jUOQtYHDP*azTs{<+%WW{_jx5sM+@8q!G-WS&i$!G zLZ0en5J<4N3w2~WhT;-XZh7Tdw5N)V;SX}+=mdU)wnpRbl!utX!VtUoY9?vMFuFWp zPLsjS{5dxP4Vc6KhCZ6_Z1MioFk+{x_|5aYm7D_YUzYGL*|;DmTS#4i`?B+OAYV;Q z)U}^{pJ4sgust2lmyqVl*%yS2z320ix>B1LwY3r=}~P0*s@oQ3pr zgfVP(V2@@q-=@qj4lXHGpG|(4Z>yRu51QX!h@FXs(_F&56+QNkl;gRtOE9FOW+H3t}JUfCSZjn{$@lGjLz{O)tR~ z0Pt45j#N1+0I))B*hkzQV?pSaAxWW!*Q&)=8hg$$34O~qKsN`4rgOT(TaA3eNIrsG zl{MeI)1aA&il&`sabf)0Sa5VVu|On{(0%a5*6Pfu`ek%LFsGnIzTSC;36i4jj16#6 zYeGaF+5ipBsGesf6Kde0sEnypHCV?BXU{(pQOTqOpkbX1CY3MF+BlZH=+^91fCdz(b0VoYby7Wr&IVrg~{DR;(23nbbj6vE^zxz_$J>Ab-G0i9=_V~r2jfV%IxtVjPhTPhB zdDeQJV-q>Az|6-E4d5+=z}rtZr}fSR0oVD&SC{pWGh_@?vr|gbjb0ZGb6v zrem0a904_50b*WWeW17PW#RBHl7{ra7;7c+(+Wj60Ku4o$rL7X*#-P zK$3jf2+~%PyZxQ_Exc~I{KE?n5P3t4A_D-nWF6!zo5mSkn30v03vprS_qs;g6u|_> zG=9Y;1Xo_pM|p1OvHKkt8KJ+^U2G6pru`n~|ejH9lfA;+Qx(gsZ~1#tm|{&0C1@FW=IHsyYB zi`EI_b5%9B@o+Kf^73gG5HUJWk{ml*Zs8@&%<{PT*}jGtIR#vKgKL+R_yuUoehTR2 zw%&5lk9~XgFdNbN^Vcm0zCT*7yQ@)iLxJu8@)yQE^3C->hXP&GdO~qix~;`cptpoM z!hMQtD{<)(i;n5DIa&0Yek^F^cy)ii>D9r4*DJtxD$Ps*s=f;%po)U&)&YbPVG!~F zavNxZEgR*rC&@FMeLAwwPtG)$cdUtPMtFK6%NQ=Fv1PHNivb^#Nt!%E%Wt{1HmUI zkpZGyQ6cDQp-%f->v(0^c{^6m#!TMmUpp`FHRt|C=>2_M=$6beR__8tGmfhEQkdI? zLw9{~V&G@Klw=y_%cIFJsb+yeZ{NOsHGQm+)m2vtQI9E0MsZs$h?eLywE>ovjP}#* znK=C;tR=3VLjq59>a|Bh6^CK#+RHBH)d3anV^$()&cHxR+Hc~~$@aW_B z>)IC*j!Xb^4FNe%e$J2m?kSiaeHUgTAA{yh(vHCluir>_AMy4Y>m`GX(khuu0&dTB zeMXPAwAxW2S{Q>Qb0D(nhXj7ViKfV`TLGRS$Y+QkyyS< zLMf3(wn)9|ItO}^i3I>7xu|#Fi^O|ongXtL%NWNbx;pJU`GU>TmBQx&lqrxq!eK#o zJFHm%j_~RPj7YI%89GkLU-996z3aN!EK!rw=I3!)X-}0HRd>)dk>kAFQ(h?)bc@(P zJ?1rFBEM;hTTk9oAhJWK&oD>j6}i?;fP43r`Dxh;qOe+?1r)(pF@Q*9qBQ?*5JY}61_ejCEN%t2xhY!c zC>8D1KdBYJzVG1NW0$}$q`rcPs|SZ!`|m!Z3Yrjl7deyRfnKvdW7A>j0FSbM*_p7b zKC8CIUT7pkdZLJNN$An<)w}rr5%$($Rc~wew?!i%-2&1`hk$@I64Ko)I;9&#y1PL_ zIz(DpKzgCl4bq*`i+Ja9@3YVOJXuK3P--eZi<;8gzdX}8*Q9`3@}&(@?r zK99n=Yz~o~7v`7x!eWOnfp9rN&v{IU@dFFEO=R&3N$Wjjl-*Zq1u+;08qH7LRh&1% z&%czgGA6 z?B^NW`K!xry_Ri$zRi0fQ)@e^fBQHKo_`c@8>uMxI@gkhUrqFGMg0KNPTR--4uRXQ z_xn7^wT%}22w!7Tj{pmu2!|EAup^wJzH@?ECT8YZ$;L*Y=o8JuV}I(h-5t zFzQ@{5;&o^rx5(34gBZNCfdIvhsCw{a*eq&yP+)uW&&+uIBvLR2Ig%%Elwev#zF0f zT`hx|m3$BzWvQlZp{X{TNR-5$X!i5=_Yx3EhhUBHj|)-G*D%9fuGf#}C3<3eTlF>i zT{<9brax>ANgskNkOc5Mnnf{(?F`se9)*7REdgxD`Vgb9X?=@xv(}kRmNDZv{0x(y z2HQ`;IsSYB>^R{{TUpVw-|MXLm<^aWYxghT>A+L@?H~m#vE8W~b}yh4{uMm+e{9{KlBqfzMMOJID5e zaCj&c2G;$?b*ju3(Gnl#=5!rSHXYdQJB)`xG+U|C_u&z6?d`6F;~nONrlnIq*6+%` zlt*R|;OczI_&&|xnj*A{^Whw4CPUKN5}RM|Q?F*Ngatg09sY=xB;lq}(GcP1;m1xDap`>O za~RZYey~?@%`FvugzD1^!8=e?>ssE+3}$NUDV6M!BybOb7z3nm_W7%Q;=1R$5ug#1VCvtq_D#>!vKO}K>Aq$ z-20`EgqW~!KHK)tk<66e=W5HGV*q6s!e)bI`u>b}!CltG#H6q44c1X0yO-areO+So zYT}Ng@Pa3dOrQx>*yw5LjT>qqWnRDY)g4FQzTb76)ZU^2a4kNZ7!t#F;@0o4#elx{ zL5K2^nMMn$Tw1b&DCOmcGa%x>yiqlsz6E*HFtLyr&5uFwV1P}fUeSkfmLz(9UeB=`2E*`1;wb4KJ6=;ZE2@2F;$UtnXKit)gZ$FL{KiRO~r}h&)j~N0; zihiBR?i+rU@-WQs2ED`#%hM*2fg(~BZydyrN_GdU#$~+jvFjn{ShTU^dL|sbvB^bO zapVYwk6d=g8dP%tzGN!XfbWp6AzczfIWj;LNf4xK*~MpxxNw8CbwJXwSiID)nM-oN53v!W0%q z-CLd<4$iW?!*g;-u#6hG@+TfxOc(-DO?ow#5C7kWoU2Q_?Z2mC{E!^qINw~Ig%#=s zrH?4w-EW_)Gus(8tX0_1=@unVDJ!XskPbG}IpT8@T@Z zK>(K+o=31n{;R}pQZcSnZGyVG%$y1QtIX3LMUuxW!_m&N`Fy2UW^EVO5v*(VhHtl4 z*VeS!Jznt_$XJW-J(B>E*Z@zek*7EjRH9bw7QGvdfOUEWzH0L0k76aXkaqA{K9Vv1 zR$S$X4izCY06|dkQ+>DXxgjf;(S~e+0^-?n3t*2!^*cD4P-3!mLYLIi`u}@#lT(`d za(f7%1VxGvG`d{5=e0AjLYk?LeOVk@b1Cf0TGRS@PyLQK!WSXQvL~}j78W)22$WCI z_7Grno^@h40;=atgdy6M2(v?kw~=06UD?Svk1o2v?r_hr@Xd+ok59IBp0BQGt_+>; zo7!}YF%CS}$|UQJ^c9a?W45RE`=NDQF|Kp%85Bh26e$p2c{sx!2Vzc_Ubo~5;=^!9 zK(u4=B#4Q(q(9qnh|UKVdu5Z<-&<&;`BxKsh6~hcxoW$6r;78v}==c=5-FX2U2f zOEM!@IW#5b@Yrpe;cq_SC-g*(qG9+jtIN=^A@a_D&XJWs=fy*Jo?}}43JsR z+H(G{@2z=phI0WJs^4btVU$>~LK8Q4oo?FH&OFi}d~WbzOdN!bWFn<-@p?86h}XxX zHyNh)>4hp)SHxnrl+rPeuIFcV9(#!654h<~nVfn;;BM>cR&>G1t@u%UbNLb~nBos~ zLswbug+$aal*d1jK34teV?Ng$6Rt7)_RTATC*SIUY2upwZ`_;%;6;J?i1?Hk0(4yN z8U7V9x0MxGqr`ciIyiqw6p3?s5s zU$|2=4q-T{q=dJkU)?@<94+ z34M$29PwHYOP_GQ^3iP@-4Mey%dBF)-{K?d{E*I!ja}QO@6C_t#&}eQ_4#C5&G8;F z?mfXamvH;!KG8rmo2Z1Wf>GRg9V)Hzb?V~tfK)Z2+UL`$2Wb#E!se_)aoC()JTQ)< z#!wD{3fBqxY;0eYF*jjPz_s5O_v>VbT)c(-rJi*e4I2rMUGmNgv{{&_yy`n?c0$uv zF;}9_DLDBed6j#ZWaY{6n=T7jVH0&atd|1Hx*hY)Zfjma$RcCHU6++T)bnb&lL=0! zW*hw~<{)#2l?HwyKTA+h7*YR7Vc16XFd2qm22!RAuNX_Ws?R@a)y2T@)~$H*%_^sUn6utMd0o>!mx$B8Z%oI zBwf-b{I$m`tqwRN1qr=#AK=L1cT#*Jvp+fjzKp5$R1uD2`HX4hxOT5EUg%Wq`}469 zx0I>IuFa5E_~hSqJx{&rIPuMGZgAGmfmH{@Ewq9yUNTd+)-eM=5Q|YQzSKABSbj({AJR_UK4x&{ZcxHxxz&MU=SpeMq z9C%L#2bf1kpPimdr9nc=yV`QrrxjXZGdK?oz1SYPeDKx?`WWk5r3w^%^kF0U~zGeQj~p&B?^{F|Hqd*Gso|#Xz>=!`KXg=YiG>S9jTDCk9RB)7t~MW?LJ=6MOH(rDhYd;vWO1jCw)`ajH>7b z^j0gZ?Ck~2{fidjIeIyITh+bGE%Vs++~V5pGs#?%Yfc2L^O2zMOX}BTKXC%4tDPzr z@pifYwS4O;NBAv40f?t@_O>v8TzHpRRsH3;xe{lit~U5e--{y;eBePU}Cv;k(ZKtg`wKuW8VO2xvURJGXQ-BWOe$f%K zs!6Em$w+OLRWdm?`qck}I-lV$!?L<(+pV22w9h}bTRUvh=9})G`Gxo+$J&+FM{J4& zRi;D01z|SMrLyHJUE?{uctMFRWkC&@o3GtDD2Ep$dxp zFA7-+bINeprLw1f3+d7OH-yJu=w;4ZSbQ6(7lMWLS}NK-xV918ysAz@D?wVQ`R2B& zv3khGvbou|55kd--oC?-I9=10Xglq+t@*rwn$Ss3Nm0>IYSXA(TS1}WL_!P7et3&S zF(8!6kD97#L2^+>=O~L*^E&UaRcKgzg|=--e2k_mRlCl+y$HncQ!vj6Lr!Ekd=e_@7jMq&%MG@2KP%M{3PN|15a&9{097DfXR()nPCXX@#0 zis-nKijp-RfEUCe-BAE88V-fGrUW^XT$LdmrpVP>=9jfuK)d4U1=?x7HcA+)@!A~& z@&9^WFfCno5*VhwvI61K<21O2&*62y&QpFzQI-DB1byGLf(p5=CGj;5rsx&j$gA<& zGNOmF5scxL;R_FeMyGEm?3Z11$nUGM_laqLjFQaqUY?)$QHX+wzl}WJ)IN=aolo%uUf>%5Wk4MkI zd*0!6QtX!Xg3et5pwni2eGInVPhngs5%hn9JK8oIgAKUjEsbdt*Z|8;brpxyeuGb*K-0)oR0BJj!0fXF8LFN$XU7FAFfaDN| zu+_<^jLy&zS{{fJ5yp$9MubetIT3|m7`|p=q9YsljiT!04%hO3T{M!$ZdzYoh&Dos z?2e~9YwaYk+g@_jsdT`%!uTssA4ta(UzQdMSTQbeHOO*TMlcKgdKgzzUYE&Uy=hks z%Vy*qjsE`rSpC)_>clo0Z*60G>!YU?HC5x!U}vSI2PFa1^y52UzE`H$22DnL&XlJc zKeb=OboE;QqS1PL>6GlvH0wf**@I{nEk3i-_?w_Y{SIU)mS(d`M$M+VfiU8=8d@Zj zAuMAZ4ft8Iz80k}*n9|v?nc7iX)jVeloi}mhcz#-&P=V=*4M*(DLho8oro>i;;}1& zgi_VnM!?Tk7Von`v)#wmcHr+%v3-GYt@qO`{Za+VB`tI1#97{A;lYo{B7pF0m^~fi zO8A4PI!CcA+;o!cFYz=_)%pzb7tGa$)v*WKKCG20;c(nnn$ix~x7@~De54YD;yeNG zYmfm~?-TGz{OfKlvwcf$5SxqfHoNViiI0KYX2C?SAkTib{0?1p;ZW-Hb~YQ3o2|-5 z{bN=kl$s}G$PG^6FkY5tI~#L=+x~Zuhj15OuC%sMN8!|cSJaVP8YF!2NTy8Lh5{>vUz%nB26hWybuv%-Psy5!^H@jT%+PM2BEic?l3O-aEhFy&FJD z=u{5Sldk!xPtXa*9-t*F0+XrTLF=7AS3)8s0^HkgxqlC86H+e{5Ylf-mTDZz72tsw zfU?8DiR{vQor`+8Q<5jGrn&=5kf{>SClNYP6NpCWJ~$Exj}xH7eEZ&~N3-RCpKa}D z^!e+UGnuAz<97VcF+4!`@j|91_2aCgz&h);%yZoz(K}0{#gTRf`wM)+7FxC~DR7N@ zlzEyA;x0?zSo$AJ*2?_-0|A$PFtEa0ycu98DoLio`}2P7s~_en@{sHN9CnLKY_^Nu z!wvGx!6LRSFA2-gh^^chaA?VcoE2S|vGOv!{oh~F=T+vl**x~sUai<+4Ck=Zrk)2# znB(*4`bjr1Cx5TCWEDW&XF9};?7Sijm;q`S+kJoS%ERQ|a{7V6xS;czLRIWft2gRp zBx4i_JLpGa!J;6~b))8c%;2zl@fz3-z|mM-5#)=TE0sSCSIoid z6WUPdyK<*Tb#>Wh&#W1yauO@4)yfBp8LR1bxg0@~DegBT7r8WYmIV0%8I z_{}Sbz9SrQsOtFPoq%WQ=P(N)=*kk-a2~Dl8$Em`ukTMtkXRX`7fy|Mv}Z=;m-f&L0KlkwLDMa(bQ)9y;3r2-U2O=eizC_s6&q$V;_Ck z@gDOT-52D%b&Z6=p$dFXcZ$P~aND$c2v{V`PMmsHkKh=2P{e%*!!8ZvSs3y@Oeb*g?iNYPNoHO_UAvYTv-Dq>kC=l_G7VpZ4$|%Fa3Dw}sQk#a~ zW3`1x+zNgdRVraFhq`gDu;@vCdOhXl`tv)UK%WkRA+7H7ajV_yAlY4CldR2I%f;Lp z8>vhD4b7s%-MmU!1h|EOS#ntT9_83BN1FT*ZGO;p_a#U~a1+j1HTr)K#f83!vI3p1vEme(LmKkDmQKGm6|Ux6Orx$6ZXs4i-!!^dp zyiAYco)CMIDB7Y}DzhVCf+nufx|DLyA{pzJ}gSn=&)x{{4*Mf#J zz52nfl#$1HHv6hlrBFV_j89kg(e`N2iK2@Q)`%1e3ZD4qUk|Zzr;i*vo--B0xBybs z1EQpa62cdZR4H8oh{Jo6VMt(vWD4J*dcs8tHXP5qRaKqPCzVD|xk*dsp?UD-ix5 zrUn3-GzTnz;NUOHfDEg$0R>3@uj~q36k-D46Ut**TQu%(KMMzZUIm1Be!!vh&Mmvj zl(@$BD7f<7o}aXn+B2waJv|ek*^b!y(YG_vY(AuV8WA^9B9$W&`c!4bp*?^n9kN#R z=d<=F*7d~p9oytclWnRl^kY=`(y3!^`b~2p-%aBCGCq~{u~)SLpy_pSNVPu$>o+Fy zo$m>*#JZn;cN0W=F(?P|zVSR20U}Fy9_%uY{@_F3oo%^B_MF+o^K)11#==v+x|Wa) zo%_fX7l2)j3?6$MR>$0zwmX2}D*;EO)D)INzCNbfcK;RZ4lMZh%^ji&<5mv@3;86U z_$z)F1ROAx|ME8YoVq~x5E{RdvOi z`73=$ZdXXmOl8n9qeT~im!{oZzE;>#>V;gC)EEhft|Vpu`NV^pu-_|Ypm8_Jt;c_A zsmoeL(RgcE-MGlnd(3n4c@}6SyLKuy45D4x*b)fS%eP0Da1>6puywWDSo+SGvQnat z4j8)#B;0%(W&zpe?Limid63L^u96D#5xC+TgNF&{cOw_7NrnKiXia{leEPTPv$Q5k zU^SJeM!^heGb=+c;_I1aeC?s6G{j!0mj3UICS0H_c(qPQd!r0tV`{US9Aby|RaTBx zB@|%^k5HYqU0*-&s&@w=tO3$8unM1amsS`Rui&gLd2o9{^@C6UE?bFHu@vP|4p>5X zMBP=jdOZNt?Kq`BC-|$q=chkVb9qwwRUKuBR6^GWbVij-(0w|~7EPk(g}2&XnSdQ% z13(fzpLNuz#c7;4ke*b%iQQQ?Q*u5p?OWVs2@IW-^a3gmkTRnCw63FIbrY_~+CTE) zk~9B;yKn5XKyf7e)fnXS*qXk4CH|@c>3{qlE;)NaR8j)u+V}Q~aV7e)`FKENOJULV zpl$Jt>@!RimoOh%1*gQceQLuP;Th*It)J$7-j*;(pB|IOetJ|aU)}>z_pAjdo-R8v z@fIQdA@?n=-UatjpP^Cex#FL2drbu83CY8;)uyY#QAw5lXO`-e^VoEYt=3c^ogT-& zialKfSTIR?d0;U|bfl&lOMTpMaykcl&`B0jUG4b+sAKEHIx@LzfBf1Z`V zQ$F(4;j!prWxOneI@`&GvicNZCv=w{#e51M`oe5*2->uJoqvE+x@|&A3MQ#R>LyT& z0oy3pRlKH*3u=|E+AuqAlcorihP{)anhloF_V$Z-n_jsT50Cu~la^#+13*___S}AK z*|{Z2Y!=k3Nn@H0mu?#MT4PZ4|I8R29x9yRUbFHzOxRT3TW9qk<0*CD+Xw3hL=3Zg z@2}lh!aFmH$`}0zv=W35%Y}Ct8ug84Jcw=(@D^TD_ZkPwaDQUkdk@xka^F?865|+k zQ|kLCyIoog4lWVpHt_fU1pra8XA@IjO>1}NCQ zqtM6@qToerhRT_fBXC(?<|dzQabxYWM*n;K5@CH8nSn7nB>i~+P;_wdn)0WEHrdU! zP0vW(K7833arGj@-I`R;*9(NJX_dk4XP^o&J0k~Om8o7%S;Mk@ue8F_4PpVfv`z?MsFXkwbk{|w z8Bw)R~%k z$i2$9-d>?JN2UAwO?c%tMme_3F(3J;;tz+V8K9CNBilqECP+6WrNv@ z#k@AKdV@FK_EWy#{wdaT_04!rZR2U4v1+@1Vl#^_^fZgEvsHc(RLlJBi%Wk6oU_)mqQ z)Kby?7G5gCYX$RagagH=CZi@25S8_E^zGvZ#sQ-5w#hwkvmZFjTYpiMeTc zp|=x|rdyqNy%Qtj9AoaP=J;tiQNrFK(>^Vl&Jgh%M>G@u1Oy=FZtzPs@qAFIE;X#f zJT*!9Vj#Qe`aaJ#3Tf`rW>mXaK3N<-#bAJH~4w$!qXT7*ntk)1m%Eo!MhOn zs0^^V+=^afc#KU^?C^7b75o^g{8C8si74RZRc$5!S~doS5M#tTRXj8Ng@6Qv7p2M5 z3jkEz1+ppSGyu8zKX!{%w7^PK2A`<-@)nVE)|0mo^YgYglCx(HP)6i^3ZPh`j9_pL z?fj#1sB#$(lgdF!p{{3od`{%yN9`yffl5 z{V4Th>f}AIb)jJ7|45#wrh=P($tQc z0za{6i{#R%ZqFLpJ&25(ZcmDY!zT5;hKg=4LWD!_E*kt&X7l;X{;QIo^l)2J2c@oF zj+X*Yi(BXfny$W5^P|Eo8W2YS=M(pKVFf^UeOg#j$+tO4%t9<9K@--xj*j!gl}@fV zDNc7h82ZMfMY6o!pJNKyo8Ef!BjN9}MUSp=g}t>YbZBz!LPAUgf2oy|SLfrr;WYR= zc2Aa0)z30bNs9t~$w*TyGg&r#FZqR?w-B{FGMQHRG3qJ>W49I-%LvQuKP z+$|PM8PH4dJ(NEsMZUieB(?4v*m(sF=S5_MdM&GduZEt?{5^IF3_h(<^G|~M1d)U; zaviCPZ>VVrYkDoeJ>u|sIv?60l-{%HNX*``O8_O7V57y&Z=VEzh|SH}^^LAM{Jq$F z&z}2qz80+~u+w+*5d?i3rk3tRL)bO4*r-gtK9zm-7}bA@{-%mzvU)jiwLJyn+G4Z&r5_0#+bI-dCq zg+4`au;t4@p$#EplBwiB-?-Ye9?_{}yTbdaGPh@Fl2K0WhMkwQtn;J~G%4cV6jovB zFE$!zo;4UueXwkogHY~Pl3OBv7K;ACMc%n1@K|j(r5(a6L5Ik%@qW7U(x*sf6s;YJ zU!xE+)xyB*X+jAeOPWFg|5(Lc;9tB@=^oYjG%yeo%<5~+>WhOuek(%_ z!07>?%qe@A!WXe$JUEA}p1)>JgXG?G5=n!+35bY2rUKL8hG1d;7;1{#dg}Lh8rg3l z6uz5)1KxLt1SPMN2d4S|DzoAa3tuc&9Dh0z%Q#NH%|W@8$pyuP=wNS{41#gS*Ma@Z z;P?aX7I1}ugV?&Gvzoz@=g2CWk`aJB!?>5nNmX(b?_w#%#0zASz~KX~(|;pa{`149 ziH*-Q$UI!Kx%yey5>z{?q3_r2y9ke{w|)$a4MzrNwLh^jTW9C%@>B@h7sVJ9hWPKQ zTsVXe1E7iIQqmsS2kV-Et@eMYnGcGSz!mslhJbc#gu};B#f*DT@5fGObjCwb>25F) zUc7KPu7$S*l@}ahF#(D!TZuHAN)(GI{4NMqbL{q@TcCSi(5oA&_4^zAvI@q=2mr>7 z0LH!0ZrS7rrwvwZY6$pP{^6iPVLaOvUt z)Tn|QK3I$MmU4-Yl6H0FR;ERs5*v3&23#`!ikUP_M1+Ui;j8UHZ|V7Jmu5Hv6yEdT z*-AmXB%3JYgA@uJBU(D4z;6j>THM%M{^O!CQlyq`rC>*!el9t9lT}_PGS?c`-ScC>%}?4Y1)ns0D#JdoO#172>{p{E`8ySP z7-enlBOiVyV-ufxnL3%JBR4SRX@cq$xVLY7B||o)GBhlqC=4ECs}uywq%zQ7_~)hn za1j1Bb4g%rM4?)lrwJ98{CWhzM*Qe03_GQ;XdMMF`vAPG15kdH_7b)yl-)!B$I_vr zBrgL3hID0Po@GT%fD%n@Fs!OmZTt7a@n6;@5i9SrMjv+3A?DE#F1+*;Bn9L+Oz;9= zkUVr;?PDjx!$R+8nCu2^JPpZE@lomHKNCgREBXy`Fj{^4uO4LV2i#RB5!gbJ@)RSg zo1SZ@8oTF7yhwqn%$-Kj60%F47Lm`GG%E1mZmlZFh8^H>f_@`oc%ar@IC_v`m@6Jk z0)KAQEMqumAd>kcBG7T4Z>XxmXCWrl%KM`JhJUH)`_{T97an?;O*9}}I)Dst-Kx=n zJ!}n{1d4zd|1>xC;QxEy_OG|~-dLxoJhKa>Xa#dZKMn(T=26LohlCfq=`35-+XGy7 zrwu}OGl=l8se^2#LzI~>?N#MAbC3P4n%umQE_8ugU2cj)y1p(|WNh6ZMV=P8G z6_UYYpESEO`$MHv+=qo;@Ma*-SdBf4pjH7j7lP~tV}?tVASF`P{*ZE z4F`wkj&a|tfJi|aSu%}y(MgMBg^-?CSgM@=g@(6aO|al?(c0ebtrE%A%wALd2Tcap z?}H5@SQkxxN$L3o-1WcUAu62*Zl5iHL==vZ)ieW(BpLwE8wq{=56n&1z}VP-29ITz zt0g2QnO3A}ZpN9x-SF`Fatjy!aXlvI7qwIH8GdJdH(1$X)&3oxllKd57BYSDms5Uh z@yS2O3L2-zC*9K~WQl6K_blQrS+YWQ+1_kLEC#~VXCLODon_C`elN&|AJf`uX(B$Djcpz0ZvE((tqy3ZY2_m|o z3Y^hQH5xuyBDDhrjC2d%Rd&P&8)>XFeK?zwi+OKwytH_Goomn_z5)&1`_=;Oxi;W` zfL^-TRDYvS9fh}59vJ%#+!Mu$4kVxlF;CZg{3XK<)SE3j^?{F@Qi8OoVYR#zoQ z0bQE77<#yuU^jTQ*27Kf^9^kgnZf6+wQj3K#40 zwax(SYVHo`)=%8_^K}G|5X_3vx#gb(6N8W`kc?Bo_nGY*`Ne2<>jDn45(G70HShV_ zj7u2}&z}%%YkmwXTxqQQS1Kw9d#p{F;OW>m4j+0cQR(b&C@FDSEa_0xwu^SK5R$l5 zTdXW=9;nyk?9J)XYqfWLyYdS8wJtc72%ySg?_qCB(ijZ$8lie(gB zwR1I8^jcDy59lInU|)A{TXy@P|EocWwxH;>?^d|`X8SFbId>1Vy9zvv4j|$t6yLEE zcxpi_q9^hA9;lERRxpaWV36{ua6A_rQaiTrs(Fj#eTP55krevsvY-}V$=PV zd3#;67oO7Ekuk6DeR#^ECv4kL$^YWVf_`~A^{}0@>2oALIz-x4ZMUo(63MV-A(${_0qook2otc zdd@`B)o0Pi%9rSX!_xZ}g?JfkC?u`}aw(f})- zYv?V>)7Cn!?#odQWdyUGK$fBpb=s|b8Qx>$30ViPTSdkedOpf_AK%n;ceW<1{jjCe z2t#9_18hzclf}A*m4j2%eFFC=*v%C~8QqykZ8uQN-j6JGNv-h%Sp#oSqZ}We=UwSh zh+kCi-2L|0h;8ZmW3L<*0E=P_M#{h`On~W10Ji`pk0j`Li$#)%T-zrilEMI|^cxtx zRs;CC%=bmBKqqKicD8+#V-4V3Uw)7W7`HZx3%Iz~UY4nadBvv~c#I2S zV@}~6sU(I`7f3>^J#WZlek*z|ANzWFvE|8uV{(GriBquK3Qxc09sfAQlEh&x_&m~* zZTf^A!h6=>+0wzBfA!+pM@H7wwH_V~DOO&Cc>I1(jMMSJ39;~F(~-kAC~X6-z=`tt zU#GlBMFxl%aXavMLluQ5fPa63BaL3*&_B}$Z66Y++uY^JBQ*fVc$rQww~uyx(0-Y` zA^5_pezn~FwT8+ndx>#FDu&P0;qbBFGfl&HYJjIZ>6Kfi_1nH8&W`P-$&yqRYz!Ta z+Rn(8U;Wx}!Bz&tyilvX z{!|CYniFz3n}iV!fT~9p`CYhu|Boq@Y8YA-=@KCU`hw3qX;jPVr8C&L@?rj<*NMK( z0LZp5<9M98^IGQH^0(2lvdy5yvG9&HV3!?Ut#Bh?Cdo1+ms%iScI^j#&_x&acgJ^8 z0yo09F?U@;h=pp(f90czhJ~R@uz?d1QcKEoWIA1=VV_~!ezzX>8rP^g1-S&ib*3kB z8eK%$1H>kTm^DLOh;hoB!rl%IRl@Nj6f$w^+hzDstYAQM(HxUwzO+q`BqymYY%X66 zK1+7m35RBauyU+f{tU*L^I6LB?F|fobd`4e&WL#-y%uG3T(olOjJHSCMLZ;90QLj9c|=fSxA(xY|-B;C7qG^#Ocj)Kfqy zzU04sM;W%sU5A0r(_%l=Zpkp?eHQ>11GpYylNEl1R)L~Hxi-JeUU%Fv34t>P>Mkdi zJc>Pd`OTPL*AN$XtA*5?-)#^Ace3qOYDt0i> z&PGxY3ls+n+zD*?y?te=%9cN-WCHG@?De_>giT7YhSaO}X)- zoxiE-rAC#j+BxB$MpP;9&YU?u?;>#1YPECx?IiIrExX^=O?yr^I_)!P2qGb7JT87c zy1qQzF2wAI3GxK~>g5577xEbm$v;6U?S%B=Ca;JrS#JC!KWc3 ze6=a{xS;yoH0*O0m<+15Wge_xK;So#Rs(Q`kfGU0{|q7NXuEl-Vw-_3K}M)!#T@w} zfg!Yy)f}~oqZi(6SNBB;hHf-Lg#g7MH2=~#G&oqJ-Sy*|TdGyA$|_5+U2KpCDTPB0 zI4x|ZzDzHeX9|ZtwSb8s9mz)=n0zbT4xo%s)x9Q1>rShpXj@qM-K;QUHaV~Agd6Z3 zST!u7;{pp0dn}ju>%>T68I*7 zs8Njx+@}~1h#p(Da+-hFs^|cySSN4KJ!^x`l=6R2y)r1|(s5>mEHBiAdHs;3WEr!t zWtvkMd+dmx`sz+;5;H}ak0*9tsrhL^t4d}=W_NOW@S!BN4LUzHiJdH@TC3}SlO${M z5?jqQf0#tNZ};py&6D-1D%p0ThQ|Fv(dppC$)z37KXDS;*RQ;?@r1)~%lP{j+0~Ka zNQ>TpwegzC#O~!8x_DByGf5~gFGr*>r|fW9&5c>1WPy|eOUfXR4(J9zQiBVxPdXfobOf0g3Te&*uIa01bCRR~2~Wrzidi(C_5Fv-htA%|lwP_gv> z{ev{2cdm2vR}cJDOpU1?^U%@tZlgp3cM(x0mhy(P)!5r28dw>FvikO!qg3SMw0b?= zR#z#>o`U?s2=?D+eXin(OIO%%sUBex>vZTQW>27B)sT)Ya?xwiHl8wxkawzW z3?x*NBP;YxQa5Rt@W-Y}n`G&=mJFX+{a~|wQiv|Ewr4|lZ!K3EOzU3 zA!2qzhJ}F_x(ZlWyqcSUZw-&q+^X>E4A)P<0FIhy6&g_y>qy}%w0mc?OQ|h~)I)g{ z97C@+WA}7_ZYbVtqTyk(&hrIlqQ*C825&G(m*DqGV04t-w+5W>)yqA!{DtB%L%%In zuoF1UA+);kw)|=xcr1ZNxs+Far|@a@Ar)jwWds?XaqC;0f)`cjmzPl4i(xMDw}ZN< z+h^f?$$;%f>b8=iuO4B~;r7YR-s#QT^S5BkU;nQ2tYM?Y92pN>wAYcyW}C-j z#CcLNY#&0sH!ixXZ|nm>ErJvdxJ057@7A7h6 zMB@i-jv*t^d{d2iN}||8%22QkxEXS3@_!Ppcew%dJay5PT54*<%+b?)B&JV6X*4J& zUh@iF-X1s<+sG+x>m? z0BjEA8$fe-f0cB=kpzGWiayDzz7_@^;jnTB3KVGpYum*-E~44p{GgY=fVC1f)`!c ze8JUNrI9pT4nL)QG1;pe1a6GI9P$my_ zBqqjUB6@|juZfc+7D~_Z=zrX_Cp1)Nk*0u>1YLE|4aPcK_2IV&gA$ZIcI1jgOM%%s zyQr-QXEIBNY7K_(n9*2-p;K`i7RZOVKleK2oN0=M*6b3jc_lC5pDNLiTK3khf;{ri zwg5T-QE-q_1F5$Kj>pHR`hKW7_JzaeLV1lTzY*9S<#T7?H9Az*?9N(x9nna>yo$^WeFLpN(ibL*3~0#d@k*CpTCY>-12cjw@hR|-hhlHlpV(k)gHNq4Ka_IyZ6vA z+MaV5sS5?+3a>)DA!#p##Y>s-{s{SLhAr}4d*LQ$P?NtGxV)6(^On*gMZUt_rQo+o z#6?XlvpwIOp8YgkFrt_>d*4%=#!g25YrZl>kxdT?kcF~~TMK(Yot60w))&M) zbe%XX#bBadyEBHdkD}Z+%j&iV;KO^3=EqKaEiY3Qnk{n-=v_PHA@>-ssj zVwfx|XMZs!@4OcvTXZoim=C@(4Tni>xcBJa0Q(N806VH|?!8%W!@ysg!yyu5?5_vG zWz&o|RB4c>b@seoWb`Haf(~{4lMA+wwnDw*q?NNN|hxSAtoWHj6o_>`7AaJl=?J?dw2*A`FMPkJx5pCLGs%$sD zYke>;E#67`(00R*2Br-yfz%rx6F1iFi`wgfb zFXAENAlCaiuX=#QgSG80n)g~0TBWuJRLG!7g{huo@->8N;J+l%oxK_~otz%uuL?yT zJiT@7<>@+WMEgWdgEJ^S{^*m6?EGN&xh(CIAyP52`V&(papf*V{oqt(tM>hBcRzaY zt^vE~)B?CWhmr8UFOxOz%;rN>9g=~%mve%G2(Q-Bq5xjh`+unhV7U3$$;T|MSR=#Q zX;8Cd-h-*fSRYucaU*u07au-jt}{CKTd20%tMf{`p$CDtY@YDA`}SeT?m1q`dw)>kdj@pyv#aeM+3^9?^;5Mahd!>A z0@3ldpDLIQwzCYQI9Ve*m{gv2a0L3x5jnMH{Y}y>i#zve+o^QaAB?dJC8%S|2QJ&- zpV0vP4}?Lb$y!mw3)rEld$@ag%?Ka#%0NL1J`t$Tw3UiY3$%FO6*5%viGLWtf8K6p zzC}3gl#?W&8j$;Xk27lV?E;TaosTwl8U=-@fPc; z4-l@L6v&%I-TW`a_Dym3jLu~pelUkh7irLCwJ`OjEu_f z#J?z82LdMlD5B4{5hYN#E95AeRCoWF%-c!Kv`!?x3{}-esfZ|gpbl#lMeU%kIq8Hz`9RgJOYClS9CjML<2KQAoEaVYss`OZ-> zpzgbZVtn!Esg;A7s8W49)2k?HlR*j|~JnOKGU;Exd!9M(VJeXg|g$$rCvk7?|5~#GIRme=oo&rfLGH zFYa+_2LpT?oWZo<^zWY#@?l4=fhN^uu6&SzyyV3373?xykDYW~6TW^cX2bneDNvCv zSU+ggNa2^FlNbJP)r!gcoBKT;vv=~RgGBFU-Ayx9ssgM+mELJ1e z%MAY~lBq(j66b090IeK%oX5ODK5|uEsFAR)y1ogF(LQv5Z=&w zsAD>#vggxSC{E10yc&|DA0-+0iGtC|EF%*`uCe<_AJVtQuWe)wes@|CO-C!5{vQ+`tb;s)ymXVaGSP&{c6`H9p8!fh0YCA7A z@HcxE#_N5}-_}X~$@myX`zHySflb&#+5+cX`a8;X>z709%KC|?a{)>;=E=2@zZcCw(V^J3Wx&I4T5xc zDJ4jUNFyw|VF3~X8|hkfmozBdjpQO0Al)ER($cYrcdWhL&-;Au_s2gRhfL<2W6XQp z_jR4uOaAsJ4-!D0Xs`aH&X8V6ZFBr?akSlW$JG<7gbNzbF9rSZ?)X`k9hWo-qnlCi z5V6i6m&j%1<-2Do^=IF%6}A5`)7rBBr>Ohu7>5Rfw^)G^>~xrcW^4U+>KGCwg$|!) zjb(EbS}&UFJyt%+HhC)I&*Rux?N$D5f0g!NY2cCCsluF8{WZwO$#eI&zoogumUiCr z)G~(9r;TNhBll?vN*0o=`Gr(c$$_Q*LLz$V{Z=8P#{D@UpaFv&KaEc)wfG8yQs2Y- z{dW~=fRd<(_`x$b(OlJRGZGm2gK^9&1-vqP(j@*Ry;w%Qk7M@9Je}*sOx{`p&ZN$Q zf)~aIfh*Y_iSF44!|l>kqhw*W#@%GXV8(CjN*%-UgrjTx93@)S1VkSo4T3?-UNA_m z+WuNLRg=U1$^tUxtRrxNXXAl^j84z*0#iC|Rqs+>32u0x7`1s8g6K5P(_Qv?bX!(a zxCAt&ZIeL|tt`@l&k*!J)|pz(+Yxb2A9sa*>J;pEM;Q9x@}r&9x~3lh58RkEj{4g@JsvR6T55FGC(*a|&Hzw-_|mxw zb|~_}gP-f&_{pX2Cfx>twCnM-gi_v0CaoI5@1C@`0Na4}*kkTV>Y9P$hGL3ERVe5D zdt4OC>OE^W#qy2MC)GgbzU-#H?B3Vp?b)7F(#s{7Tw4Pna59jW$j&4-g=wGhO!{Jd z7>CFjNb#e#SND&`wRodntZOaqo3GZd?x$c*=`R_Y#0P=_bW#hc3&c_UZ}zd@YbosA zAZ6`8-@zZv$iPy^Z>QA zHfuCB^MEbvnyXu#RMGtEs%3QN%^S`s#*sg6F}cjYa%q6U;r^7vhE2Q>DLQMyQG3@Gj@$1!-4t;ScJ-?61n+*G`ZUXGVM7)qqdvVNND7YIrOy14+N!h#WqW-0u$S$%rc-N(vq zU#^I4e|cBnYcc)98j>mJ+q1%Z?dY+sI)^cLzD+~S>uNMC=V*2Gw8H$7KH9O#q-4Mi zGF%-|nV~zXdRs&42jql>uZzAZbQG3Mp9VeF1o~(#8ZBt?3wGUn9b+5dZjo9(`Q{^h z6}dSzY{5tbAT!@~i6N-=F3p+3FK&)YTC(-&qzb$U?vLmLr3;>2m1|#%^A16J%pc$D zAF3)Rw#b#!QZu-+G{p_q z3OiSi{F>FwW(1jbY&lEur6o*627H=EUmrp56-1?gQ%y?bt=q*jYsPXxf}Q&eL_tWD zSSic*?(K`l%)0y>(3IP0z&5G4$!==h{O% z)A3-TYMVN;rVj~cEa`JHWih+vb>y)~s)&l8wuRR!UZHWZiQW0sdsEtrE;0uUq#u>*fV@?TU+{=bI=C2$7u>Yp%8rVabNu-XVWU@P;Dv(rt=hDr2BM;n z8lc8({iwA@%lUgMOA|AM;PRH|U%u+G-7{rYqZdR#QZlY($TU2|#2I$_n$o?Mt_Lv% z%;c;drqEy7r;uTFD>(kB#%B&QIZ531pdT zB@k>~Lw(#?)A%5@X+QdS`0&$3Tx1NYPOkg<`}tsbxTrok=e;2ui`aHtOz;@_tRxMa zT865ve*Op`ue|m)@_qM(ew1tEu{z7H!IvxYz~rqUlA%I2F6dtWuPQLUTuqt?TK9|y zc{X9MKX*e)>qQXw0xXKOHfKo|9Ur~$lqy|>zLyl4V7l?i{nyg0y=D8ilVbZIBQM3x zPH&}~EOk1o@j$DC+D5kp))u7t)_i9=mEBH^jsinV(k@z3>r z3O`JlRI;-pk&4PG8%qZx;#o1f8lEgZY<;FNvXMY9m1rXlz-{X;^W80_EPqL2qlLcP zL|+oS^3C_b+=6ZFlrF2?_;&iNvsSL5Jf5zM$K>nT7YRN~J~FTtr}Jj5MVCgc1y7hj z<;O@7w(x)u*L}@KvK1+y0kHo&X!jcZai4Sl+2ZZDuN-ZCWXRHqGp=i=j@+W3Ke9Rx zmv;fCKZGRHYwy`ZR9;Vm9dNOAr__*k1xDza=p;+@+nI4#y~l`gcJMza}6>J z67pjB@ZT7|(c1)I+XT`bDL~BkhAG#t?d(ijSP84tw^w0xwSu!l8|9h`AenD&wq5>w9--8BvZ10UH9o9F-QQ&4IC>J zAVPuC>d(UkIs~at?L?m}kCDDm&9B5#I!0-BB+FG!@N?cu^1#@;s#>GLB+XqJn@$^D zE|u7g9*FeWBoDC}JN0NCj;|VXz5&9fq%W~~CcJapL{a#Ks1ZoFE$G&%2^+O$U0-Ss z6DnFY1uJl8AvpDe@1_DVEpJD}7@1#Rz*@r*L7(XZiqjD>5G>!&3_>ItM4XNV8f+wz zA$9gk(N(8EjRx_(WN4H@-|VJWzVXE&TUup;1Q<#rJB3aHL*Gk zV7(@}+Qli||F;OC{^&TCYYFuvQ?qy)G1m$qBo+-88}n#?#EtjWc13RtIh4^vMN7E! zaoZLZtXd|@sTMvFZvShd>deY7yQu){`Rpb5naB%9SdO?o~k{yf(* zn|w2wK2EUK@<>yV`FF(%!#|@de70QZKea-36vgJp_pisvXxeq_EEy925l;xOuCMo- z;&H&A1O%eDor#~T%r`(_PPEYl02V#fbgflt86WZ|=LKa<{&l=rb(#cn=a@!t@Fr#m zLg|4**Pb%E-6ONHM-pIK{t3Os7Qra=Q7Qx)(jIU>qwQee-1o{xq_NuH&U`-Vx)o6& zYR=d10oi`c{)B%_-DGZUyefP5lZ-)Gz^u_PVdzP)1$Ucoi+5z6t%-PeP1?r~i*7(` z)eVcaUft)|INt3_IiAI99-*RhN^6Z`vT{x~C)I7Va!qGYO2aM<1VgG62vq`tv-~D8 zU=%~`u^xVC_Wg=2S&lZs{Z}u>O zGV?o=_(hz;*!Y;BoHN){Ru%xq(m@U6!4=3_UPthBIP0TSX&05XSJyq?TPI0S;)VYO zjQ##)Vta=(xPv!>b8~c!*?93vnycUD_xGnx!b8VEJeU}4HmEy2mkH!w&83h905|(b zQ0#>Y7w`(c>Tcv1C=cH?CO-MDf{tzIU949#nPKsq_ar}Dwlk4|<0r6B?@qnzAL@H* zJ!OjzqClrpNjOUOo0PyTLCoL4O3Skm6*B zXYr>i*+fL+isxxBIo@QyBXZrQ2Iy%-z2$0i#FA24xX9LtXhwc(7gr6v@fE3Csn-*$ zcPr5A+1@O)1%=v95Y5T_+nGQQ2**WQW2(m-$z}x5MdT@FM~Qv&!$<%r>O7Q)+8}fi zhG9`;bFnU(x6M-DhQI?vMdVjaJRpMWBDHUR9(G`EVxrYTmjX_Qni^NPsaLk@p3UOV zd5hUY5AO}{eEB!f`ai5VMs$-Eq9D_o9tEykiF44GzLxn|^x2+L0-vpP+lr<2qXW#> zp#){?iSVk2LG*KnA;gS{vJD?#e$;`5{NrKcbE-{;FoNB{>u$bd&NoZd=R&zk>vR)vCjH*zXrk9dm|D8E$Dfob_A zO?FLMW30RUDUu8R-dGW6@l*8R?E6)7MxQp(lHVjtv2wDMi8sr#LC9HvYV{o(a}`#! zg_crFtcg9X=3CI#K3$?fD@y4OeS%A&qKm~CCywPf#q2x^MW9)_E)4uuy5$10u63ed zUg*wVgs`j8$MHwdArbes^Bx^mv_|sL>*-m^wwoz;JuUORCbq%^#0is5;uaZ6a z!irmTZS$(F8DmKJ$`kdITL+yP{y)&{viq^J5uJ2c!KFe#Ea$irt_|I-Nb0&j1yzVT zL`A{yeN^>zb`O_s6ft|Kxx%UXXlssIjqr>(B9*5KsdDt#tKJT>ARU%C;HsCJHCwaY zugx>D{dw1(z4?V=Eh=bl*s3w?M$%2+D>J$@Qpba1t=4vl(eQOd_YT~Oj6SH{b!4#x zu3V|;mGRkY-+nJfTH_b0M0l3tGySqq+Vt{e1g~^9)ortAMp$Q8iQn!ikel__>K9B6 ztf3B^0~cq|?kR3@ygvQ~!eL4RzUb?Lv*CmR;+f4;KI69$J(I_mNciDBYU+dD-D4I@ zGwk$=POAg6+0lEIw?E@`f!+gjwQ7M;$mbxL#L{?>>>fvcR1ZchMosJFi6ND1xGt-C zfxn}x@I^jLahV#2kp;PK%P?DIq|4h^UQfU*sY)jM$L-!`s)-57?CH=v3;a&5V+WD? z+=77!0rHb9NA;#m((F4MbX5n&f>;w_ES?h`;jp=AgHU2-ejGzKu2F=c0C45wh|8L` z-+R5}eb01|^DRFX2&22Y`bfNcGAhqL;9&|f%bx*A9w915ping@Uq1{>T-AgU#~lPZ zBv^8|eV6Qy0wvzJ&(xi3*3^nG2!bbX)mfsz3(-Ci=A>sOuDl>Z4&X0<$@5?1BL0bG zz;>1x#S}zpc}rC*I3Kw8`YYR9(>=Jpyg30ORB!`A`Xs9$#=O~h%lzEZgD7r&$BWPj zS4uO5x4RxBFS2{a`Kl_by1$mSu+16u79F(amI?~)GPf_ifzc3X(xnTldebCZ)TdZq& zcsM-xc5=<1EU>JmJ;+(?=OWqtZY!Prcz9Qf6aH6tzJBcF%x2IVj+D|&7g9Omy8V8@ zB$fL@NspUdMPn*vB_}VLQ=SLN5AX88f7q<%8{JdXoqm(N>7y z)a|dFFO_9hHOf1+qbZO==KM?7oq5~3C15{^V6LW}eP>tzAbpkBsNm}wNq+nMz7%ti z$cf9^YHL~m>DU>AXNe`h$&u23b0sA@kT$PZ=BQy2iq6z57+9{D_WRBGcOI{#o@^jv zbkRLbU#L6$M%JV+9mu*QK47QLt+qJOO6z0nqmh}ifNLf4HGhHPs{{$_sO5zFXCG@6 zJsz^Y8y%=wLV?-!xoECyJP*AqYExKhHv`kub15W4mcWNDMetPl_G%d9P3Rc+;X+58 z0$OkB@fh@&0QOO)KOvgA>tcaH8n{^|woiM@z- zP3TSgiB2R123gU#RI@uMMWV+S&zNjCU*I-;W(ie5XN(0}lMsYHsq|v^SS5wFc5tpB z+Sp$Y`WEU1#kA6@zAH|?b}QDFXQp8|^4SspMqhIpKiPH!uv?35Z6sbNv8hfevJFs;#C}FWuU~h22s{G$Vap z0;{?o)=l<56WU#oAE4VayA{RxE!*++7`PH0lCG|$ERAL1tonnOY|r@Z8fXRYMhCAa zjZ)tHrp$KPp8PqQC$CwC?scn3uT-y*0e%v?$89=m*k3fDX)pJ)os3x-wC6&UQ3lTu zkaV3m82#XE!b9avDgZ6*6Ie_Ak^<6{>*rFyJA%CY~UIS$z0t^P2*=vCO`e<8wDrpJPjS4C+yYo#|5O^s7;k4e1Tw( zzR`QG&!E-0SMM|VQ{zhyAcV9Y96mB>2YSS%c!dh6(sq+#Xh7Xhot_&5FJ0y0xSAf0mApkUH^mR0EEjY6qxU-%;$;iEJPNrJpdT}N21;En7fFaofW8xDh^q@Kq)Hrk&d~6!(hgQc z{1>~SK46U*XGba-&)RICrL0WU3ddES69tqrYJ05Cvw_yjFBwl4M+iR^oZ2kPU4B~ zI^eXZJ!)gqlpOi!T2lWajS_pp?li#5?nidWv95&d>Ura^@r4Q@7Kmsq1%fxi3bxOJ zGE8^dQy{@4>QmBsv@6k4oN&Dl`@N1u*9%o|x7La3MWN00YE3YQc$!Bj0|ElW2Ir~p zHxpP<$0~zz{4L!xAo)Yp4MOHr>f*Ttd=g&>5^+PPwx7P^_(Flo6`3w;wPO)0K8zM! z)TUbk8Ul|G%9|D;|>qqvQ&4#|+%0z~5wfmeq}U2!j)YZQK7pM2wEF|#mXiDeM%tB+VtYA z3+Y-VXwF36xp#-rY-Xk+zA|k1`};vKbg@gmq|SYcilwp+e1jo_TWO(C#bX>3D-#hm zRWZs&o%>@bjdz$YB@W29zXKfnT6nh=Rz}Nr3Q!7;7#8y8t35aR*uJV$;S?cCP(^A# zzbbZ{b?2VwNmUl5w{wA+#JTtgvhW8!j)p80t0s56@7gZ@Lj+}pMt8!^-EXG`TQx#M%ajKk7v$nrL7$)uMN$j(s#VZWJmq9OcnteAm*vL?Yp!e~CDB!#-> z$r@+w?AIM+b4?#I=Yjg*9>gXJxgO*Lzt27CjFqT>JL9O2S0>S(OMYK=rL`4_?WYCta25_;^ zxxduclW-p$ZKUJu32o4U*Ue(Cv}K+?JCLK3%G(E|v6yB}TBdbaA#vYO!nk5Bd_N-XJG`dJI42ny93zIZnoi1{*IISF;u8 zw#4O3A?Hd-bj5=|-f7W4-f4iKRp%#uV)Z7gHg3}|HaqT%x50#%`J(rR{*|9#`@ zQs`-?PWhuiDQ@^D=|49lQILQMaO?d*YLIHVPj>GT-^Xjf_ z*UAl!6U?9X`4L4V;_RQ6g>w=w1EXLV$~d{b)zA39CCER%{z48=Hppxe2FZH=Rtw3Y zr?qxZ=U@If{hTg(ruwY;yb}2-3Lk7C#&cwno1ZL_WP#*YUezwYH)HDiIR$_SGr;R( zaEOrHfg&`3nusTzZ0VGy6g}`9U=%GP2bY#hJ6GvEmGp;`6kCclaY~O!cW%O|BXvtc zl*@0GdElSk@gOR|>D0?2tarXj+hos)uni|(HWhO4$E5yWUYYMB2-pT13!lIBF?LxA zqL&s7IM#L9qhcl8YyjEHpcu0OAW4Z^g#tlM*F(+PSa_=7scH1>9FgPg`N!xsXnC*p z_&mw7EvyftbZkKCCUB1ix_VT}YVL>$Xz?KGcK4?_2pB7&u>a0hC%VcR&AY7lbwU!T z6>=Yvw3NV1Cc#=mfo?F0CZ(==yYv=b6bXXZ%fm#>xLDzfFZ2 z!FkhkiuCwVU+dko-0P%I6Q z+qrNVad?M-yZ8FKJJCe{^T502Z751wk|b;&tpzqfHiEnvQ(W-!GJF3CN~VFU3nfW9L{F)1iDFJw^(O^$#$XB`(+bc-6Wo!49Oy5Cm4 zA5lA4*R>Br`4&s*k+538nQRv`%YEHgnvj-019D4beTIJ8&u_C@(a+(RenpyiPWsoa zI^DEB+*{k^U9G&^eY)lvq=6ey#IOLNZ!tf4gkq!HNE!lQ>Dqp2Zi^=Sojf2rI)5J^ zr%59RHeIUypOy^~s|1Q~PCgSogxUcA>M5U5{jhGi7Rdq1QkJFlZ^oyZ&wmomDAb`r za#==I|A~q)@Po>!w)eVA21tMv^$Yhgltm{rP7tk=UTtf9H8yz1r+$Pom%+8rPJ|#f z-|IF%u4tGJOw5!(1`ObSprW$~0dliKXP!=9@hJ+}^YQcBtCh8PU3ii2Vn(P!7d%h< zuQLU$L3$M~{%Q~-y&{vmtRstpsATu%dVkL_@`ilO@Y#4!w%ao@k94$-UHj2MBWqHnV`GZU6R?!D4?wW z0GZ^hBT%@im3nS!TK@(jSHET0F3Owv^||*Sc7)))7V2GCju>r9 zj2I0%g)-Gxjh{*dQ0XUjUp#!M01ib-O_#wMFHT|<0I@COXEzz9=Iw#~E!)jz=B@Tt zP=w1olJ?&qDg|70uLY2 zXn92o+k%G<>%pHe$RXlCLc}OgV_HdU68hOUec(?a$A1@$3;oMjf}9oq~$74)XVm!l-w z%?r6Lm5O<49p@cf*&~N_Cy8xMF=rt4ai(U~30O&-lI!KFKV5-mOjjwGkebSDO{@th z=_u#?S6;FIXa5C%SQ6L*!co^_wdcY#PA# zMUDI!N2Aqt*9}29sS@;IrsaqxO9AX`Tl{Rj>0ITbz<(mac7=ur~}MsRcH3MFqd@0 z4GQZC8s`4K7kfSu`8H;X$mvpTG(dTDYMWdf(Zob8ZCA2-lkwAQ!tR6?LIA+KfsfbV z{R@eb`O9u!cf##yEEWDCj19_^xq=i!1|*OmIVc70Nn{@dt{Cbi6XcO z@d<2kmW^9i5|?b$UE-;y8HDLUW$`%n5k3Yqjp2F>jj%&a4*yOv?g@OXM4GoznQ`~` z%NQrV6S_sIfg6BB&@AJA!}^x)3k+kN{gcY$EdiP!~!&$0TQ zzOEv#Y=TK@D8_!lw!2JDL@Ik-Yrz1sBzoi(KODD)%10ZRdJ>Y88Cxw6(x*Is%Ks0~ z2K!G<_7E6S2;v$sKFh?7{PsGRmdNL)YERq!G~agKODaVLwc%&A!t;P> zgT$@mFxR@y;Ad8w2vxp9I^*c5oqT68U@DK2jfL$e_KZhghCwKV{h z-#}6!l=t77{p2j*k0$k@IjksE>-l&vS28L7TTL&mniQ{$2~@ZP7M*_*5%-5kg(5QU zI<2EqqGzCh3#J)A6Ug|dnapX?>mX>IC)Bnz`IQ~TK8kQqXM*k+eCgHxE@gt{KF^o? zTR6K;MHm$e(Ya4dn-RZTNEjIR`U>_FNxwr!B7owNltRj}TW?q;$})i1lyv@BfS z9_RO=MmBq)uHXe;{vxQU;$Vm{aM#k<=TeqI%EW>;MhwX$j$E?vl&q${iw>H4MtmE^(s*lk{vLmdM6R?}!xuja#F`>8Cgxbea9*%IaT>fIP=xSta)w6Q-LW9yrt4MPPXlpWxJy zu`*Vbe0RgTYLc2M6k}u;C*jx zwiq?Lfm76ZWD|4m=e#c{#O44Q>(Pm*(tT*S2FoYWvkptx+S;>V883nX+l9t-%Nm&s za8fAcyU{V(T_A~jWS6V;&&?vbi&}<06VF|? z_%43uAu6cHKJ=~~H_bTBP(JG;V{UdCv7Qvu2c|)t98}hXzkj4`3dE|JomB@QPl&-m z_W>Gx(Cj{FIzZRijy=S${#59pp>MnIJv0KwwGi(E8P*rEA+J=Qj*JHUoZw%q9v1iK z!;=sJ+JG<;{q+!|$sKcf{){LbL(sJ3^ky+^#;qrEkmiJ&92^-b%h9qx$1zceY*hyf zkBaj#bz68AH1ATg%lp-5>iWHzsT_gm#ERy?$>Nb<#10D!x8XX z+0DgM>w9g@0Pk!sXKKJy2G-EN=f2 zNBH4dy-Y@6hK5#y!bk+av;4Wgn}w)DK;e{iw7^HEnE|r7#{KNVN%v9p;t>i{hZQliM=UTtz?O9*Y9TXd$niPYBx%@ZEBLh*+cdm{0-jHon!;k^?D|D zI6ltybchPKfAmO>QZ;o8RtA_XKaOI)S)= z$rsdz{LYECz`5vpo2II6qImQ2=5aem>zT5fJeyzkm0mPwaitAT+>qR#yIwq4 z#vIW>NSvfdy%Vbe+{f5CzXF(~o`1&dv*5s;M0AGtYF_%)iE!n75hx zpvqHYCZT=_>b8payQ8P?R zMVpYXk8hl%YHo-n(Y9VS+Dyk$cfEJRRLFd}`P zh&an@Y$4F0%|V^O9@MEx_n)!uxH#)?U-~($t!jZ|GEgj7g`*gXVQAQGwTUNZotefF zDhSS&^=ZFIan_rg#YD$~+l!#v>PTN+c%hqy$dfuyyWP2ywI177{JF5uYE!~;{L4`# z2&SGSmdChrw!cJZ&bIhO*SqNjEKTVtF>#hCoyH5O&0Ei}-3rSmcur&*aeMkNg{?iX zXENGK+Vo^}o+pfy`c*5|M08l+-kvUY%EOp+&#utdTlu$#5UK9lgT<|NmLwLx>?|zc z-JVkdzJD!0rCM%zNkL5K8}%9&gN}}RzvaX8NWDJK{TAj1TwL?x^lhH^m_v;8l`VB@QVdCS3**5 zf_SY&1tT7Xez{BC^6oYXY|-f-CUGU0k}v5p6ltyJcl0-88?4ZXbZlJL5+n7_t?$bh zBnaz#-LEhxba-b#{oS7*DmV_2k%?9D&*i4 zVz05xgS99ccssh!NcnE&!-<94#SG4mjK_-Izvjl-gY(uNW#!vr_g!2Xv=c7;cO!9!Zp#ThtS$9A-b9Q(d(jz2V8Tvs&f zE=|w(>ciSdQYeIfYVh6N6Ja=Y^=e@Ya6ZWh2b>2nS_?#J&AIlc573@(Y5+B_C8x zCX-;J&$*<_a_+Wp;fD#jE_XQ!?|plh>B*|<;8A;*YI#t-vvY1+y^sQLYEOUm*YjUiZ=;sN45P+LV$k*Cq+ngN= z3{N08E_^H2+R{G2a@6ofz1H+vw)Z@nx$B*F#GIz%_NMLDZ5A!eY_!bti*+Gy_)7WZ zKiF`4N-iUf`1a?oEVpwn40rC(mir?1(D3XetvMw07}(^Z0!oCn$0Pn@|JU76(nS@`GG5v z2a}ME3#i*_RjU>nee7>x6x@r&+D(h3Pe~C{6Q7M@nM!I-D__sj!HR zd-Ed|qO#F8-8$xHcsl2;{WMMg9pSq8%yQ)VRBTXl#ufDh1i;DgnKzRl%6uexuPR|0 zld+7U-DLMXNxkqnduMBS3K9^!8@0vU1qAU)*Cnabr(TLHQ?cOze4te@LI&L}karq- zS?c}8>ewTl|8AM0maM5OYocLV2YYc>}eZim}mjk3iQ=xXQkgB5*Lo8;a0idirJe;cWj7;H)tIWXrf(De_ zL|^?jh~pGk8hx#{+Wl*+#a$nGCweX_S*7N8WBrmHMc#A z$=+;IYqPKyQH+s+xuR2hIcnV}8uU1ECah$Z^>qq#W5}~)Q&k@XW^O0PuFsF>dmks1 zDNUsuX11Nk5FZVE$+|ssY>nk4^iC2BontFs%S00N3%fl2Wh zFN>f#)jRRGytvSvFH43-=8@RjK?YuF18cDsu(zC-VR*dh9NeE3rK^gKOJRkOBbJ;No2GYEf+73RLPXaWD2+Dac|}w`d)Tcbx|$% ztVgZrt5S={jbNDb``icb&^X~Bo@oJ%UEfBgd^C7?i}4G_drhLa@S^%_@(n(nStmn#V940JP`Y|R~T^jHDuqx;XkUtLrvHI-L}+vnS=%ZO{! z@(khq#<_8?72Bg5bjs8XL${G3s(@b{5lN6p@f-imgU-lW$`$t8;icKrIr}#5e8Dq! zfO;mvUnpVvoqYJQYB}F-J9N{xNG%<~i5U#;@x7PZz=IGXm>~@+i zF<5=p{N z()WC4U_+fj2$_;wQ6?Z3^_4wiRJ4@QdYmKmhIzx*Po~c>s^JNACu~!_IpM`NTg&97 zi=@_ zS_xZ1>x%(X_l@3yshB8^WVT+zXZWdt^j97?&4P8%um*?x!;?8KOITjcN6?h_H0&HL z%lex6h2*(4Z29t@>ATMkOTp0)8{yJB9bRBI!2ufUoQkP6Jo-&%9ANfP#9?-O<5#PI z`KIhWRw=~$VlDB}%IApG+9XQPGj3L$+S=g;`&%Gke*4ghudn?zD?`I)rMI1lufyg} z4647~itEW^1)SyK5S|_0ZY_Y7(KwuGYyFa3^jSoskh+0gE!ngo^(VAXg6QgZB&<@Z zaSU4#_Cf6jE5k=NQmMswyBw9rjX&jRdNlWDO$qEG#wmsMy-Ot;=`Kft9^Ne%J^Z+L z_Xe`-*iqXnz&X*unCju9qm*fb#-fj}_4p*8M#i{HWXAE==cS-ud6W`G?m8TS?~gZ6 zjEKieorvv&)jFn=6HjsSiB2c%I`gJ5uAh_gpU-WDVo6qp*bdc=yP8L}q8_u3 z!KXqX{}`xNm)mNu(XkG6m@8cuJZ>A_uV|yy(%U z$>H6s{ONW)_|EMd4h2RyUJiyTx2#dDsY;?;q-2)oX49|hXV*@f@CR4|qTatMyS;Be zv8Gb|ir$C3Nx&%>=@ZEPl~o!Sj?8I?V@6^NSRS1HYfv(DMT~0=v&o&+ z&Rf*8UnU}}{I;xLj~xxDK2TqCtj*XOT^Tzq{}WZ@!8#(jD-VrJjx!n3mXMh0m#%{x zIQPg;5S-_~-VZ>15z3`Q0yX}BAB1V~TgRLd9J0y2SW{QMFW>{=HgIm=wrzRu;deAY z`5vlJg*R0{WG-Uz@?3wi$NeQiYQ;F9l`iPFA<09+^-hIAW|o?mQaRw|A`pA9#}Gd9 z*i8R$9k%IwKS#!ZN{X%ha||5!;BJT|pQm~k)>9~Y)w_NojFSwNRII=|*#+;PSVAyH zID}3(5(N2*c?9&CX|lfaIc(BclgJ9wIN;`SWy6xfU-y$?J#r6!!A>0Hl7lUSKT2m5 z-YbB3Uo^#NM%|O0@eJz;z#M7zad{Z~{l58j-?xEleY7&TIM$Wy9g$8fTM0TEd3JLy zj#=6wTCfzEI5|3@rEod<5YkPeZ5hB6;0x>Aon8@*L<$Pm0${Fs(Km ztr;v4k(!18vmB)9L7KTm=XS)@o1X2E`x(U-aX!UwJ7F^Yb2uK|~4oF4pbn7D0p3kMd2?S@W z`rrKt6r~sCw;~GN1ML63ORC01IJ8)qdFO3!hm-S-66H>BVd8@aGV5rzux04@$wkV; zi7$B9UU!~Lep#O$xwC{^IcvVnpPAZGuR<=Mw2VcY*Jy}b!Jbc7cK4VvrJ=?3bYN*v z`=PC#58 zpxD8bik0&ed3M*`9+o8&OXfUo_oe`&EuoYfjaGh&O49#X&*z(@>JUe&z{d*AEv;G- zI(fyCR_w1T5EWT@@>j084V$JaVe+rvd94*j=06xy?s=0NJ5D8BSW8(HVlnrT3Ku)b zL#;9`iR4K*Q4)p4R8N>Jq6yDj35gcINLjsoX+WF6euSiwvXi3>XS00Wl~^qqL-B~Z zQR!^i-Nh;&mhowsc9&zR`XSROvUI!O5~50zG5WUTo#ENAY3{=U^)zu}sLe`i%MH}H z?Y_IMgBZ0H?sk-;fVp1P+t6ekpHTtQx9HB*ga+slFp0Tx`^bwKVZ-f&q zVNWNWy4f7QLrtUj1d4b*t|sjEyPTiv(H?#lrK%eVqBP{RZmDmW7y}RVS6JSGB{u%X?H0 zO0Y)6ZESPABiU@@Xjr$C1=U{%$(@T+<*RAFde<5gvWSWSAJ625jSM8nAnlR;OBwtY z{)N^f_==#zkuE!Q>)2A_BdhnjvpTwvqaiA>mtBg)2x~Ss@ZEIz({>d}{AY-}4tOE zi9yx;g*@N4I?VYE?N)REvPKAzu^AcPu_0?6%O|Bvz4dw&JFmB~6warXERSnM4|yt@ z7vLEL@Ln2xz&0veh7W8~2gHtNu!fN^g>x$r_!jD1YbNoOgd?`Yuv zJlPKGEypu|5<(E7CfOn+bs{GozTF@7MG)7LJzSDxn5 zlfHN354M8k{E5vHu#V=Bo?-KUlzV6lGvXV#b1x``V(E;F(noEn4dUk#I4CMro5iko zOesS)dtizV!8UAdSpBh#=PJW9rC{0j5*nV13Ggh%xmGaSC9f{n?mYV;IRzj55@ujU zNhz9fDJ8h6#9S)2MgpdSm0mf+bviq9-tH z@3-vNsCQUj^Nz@HieA!|Pdl?zIzd(~^UZ#wlA*tjLgvodu8S086N4k!>#5lzHK7qS zZgmBDR*^cEpgycz{ZxntAH19{=5fsU8^*=Rjmc=u&8-r8#VaU@Ss#qNskus2FoqMJ zZ5atEc4Q0|4789-s^h2()bmI;u@QKLC6lx*U~QDnq5U94!v9py1N$%fx^EEtUx~ev z%?y8KNP=k9>Ha8Sl2hZF;&1r9=8xYcb$ofia9h(!ct7FQ7t@@c!7JB!-;f9SiiwW! zNc0>uPF*Z3U&SPWZpsxdct=rcg1xH!B z?u%N9ZR7LS>ix5u{~xxlG9apLZL1(4UD6>bAdPfMgMbpEG{VrS4j>?*bPgpAqDZH7 zcMK^F!hnc$gLKZfM$f(Hp8MT@`a@^$wf0{7eV=-jr+VTYo3<>G?$$61avBpuPFB^r zWr`1eB`iHw;NU&ll(vMs(vv2c>ZPEdxT&8PJc1>4-F%*~Iw=!@PjBeLAMM^`k$brO zt+kN|y7}gK=N;OXzI=Zv^G2R6fS5m35wiX~ZL)GI$$IhU4L>;DUeC!_>`8{;Q7M+t zYTBC|&pPjORe{ANWW}czcQm_$xH)-K2}V~wd3_`hH_eq+Og!KgoL4R@T;Y`Rl14*f z;cJ%c9jAd(fK{JjQp*mo)&>`cYY5=-;P+p*8FeWqycG~kc2Q!b_I~Nk*nuo{##iV2 zB$nR(FUK8>L)dkQY?FB5&JRr)p=yI|A*6~GpD+?KEi+t6*Zt;w`l;6RA5rUzt>Y?U zWq6sXfFUx=6tI+oTHwQmsP}XB%d$K0<{M^52;J0_Q=6_7B^&%P(%0V63RM>4%J^W| zmszOUZtw9WbuuKhB}l~_Ha~N_T)@mwfhnnjW>gRk276jFoesg8snU!?v7|_c$OV&# z(<&Y)d9FwJ$UY1*MbYO#8rfz9si8$?t*9HT`ugGtWWC#I=WAde}i zYD41gRfq--YEG`$vAz!j{=hkp+0ER;!n9An+NR2@wq*5^M1?Ny+J$eO?m8giIl$tE z-b#jSd*P|3@1U@{_l2Sp#a?%jhkkl2Tky#AFYHOy1St@GwqBmT1y%_uB+G-3keuSL z7V^9ci&TpN_^@9R#_(TDxyLTey4^cu07Q)nV&lhK)Dd z7_6E1UB&g!cEKtG9BEzT#qR=uO#~eL!w>kMuII{Ri8HQ6eyiRxb|wqmP?ZiR*Ly_1 zJxPMg?BS@HJ-`P9#>$ZKz6WAAwNmuJBN;Vj+0HitXPUZvq)1;G+26tTkmVP{2hqT- zv9&MCvEx8?|B-2c@a5fZwE|v0P0YuB$I+aA6{a$infJuWPh}|5I?M_M$9ebj#^}*v z+Crh4Bs>bk3jE6yYOhrkJlB?{mlu0}prfN8x+!s?Fo7mg*aZt%3P)!RkX{I z&rBe2&>2m;+-6z=o5F2a^J9{~KyyK*WIT~oJq1y9`yR(vsr=Tf#>=m7uAp>cPfA_& zSiy9*@WyZX!!N#2ibIP;ooo6xR6wT6sXBK==95T1aAI3CzuC^xj-qFeGkY5%iZQRK zF!^qOpfilz!K9dt6-zzDL_2|*XvPr6;i-HGp z$1a&xw34>2Hqs?vziYS;UxdHKs!U$=tuKpNW$%BGi|8aJ3c$;+d~&aQOW?p>arRKCt9pjOzTD)6t zplDz&vkgHBNz8#R7W~1qHXr<;lthkHu$x(*SKH&qywb%!wfm2rpTMF*;53`HPm)(w z=B+mUagRFwQsg>A=?&u-?eEozRuXTC%vBN(fZU|#AlcOn%x7fq5o{^EL%Ep?; z6zNh%ypofWYBC%;*1PO7E}%|J4mtc7NIBthc3@-IwI*gA(qD>(i4Xq6J(J3<=ZL+4 z07~2K#!1@FK(A3a>8K#YopdGKitK5icFxynYkz|2h<*K$pk-a}SXCzWKlnOT&xbbv zVoxqB6R=z!G}!cj(exRm@u`E}?ng(*yyZ$5&q<$*xQWLw4SkB?`-(8&^VHa+R(z{k zbM5i%|9Wu)KQgFkv6e6t)9ci1@*ZV z>=Q;TX>s8CSiy?PlV?wBf)mJ|+^@x^u)<(Ok3AxwV0Po$Eatw#*{<{ijy4**YoZ=>cce2rg1@W` zSPD}mM<+bx#INx08QFe@z35-bpS)37!Ay1QA*fBS?mT?hB_|fjtu34UC3S%?JjHxy zg#}kU!3I-+B-+T-EC~|)GiHHB^SQ{Jg;y{Jlp$8~0dMH_uU-<0{)vi8Y-Vk@A}Fl9 z$P}zxd`&^Ru%U(&*d~&Nqfzi23 zeJTz4%)4IJ^w;UOwn9|)Fq)#70CJ$q0!na(B&KJ8N7b+gPMjCN{6))uSwUa@!h+iN z&_L&Pmomz6W@+qZn2E=?iMh+PL9`U=4Tn$th4$Prge%`7$Lg9Gvk_1`hZ$``z{5`_ z*!%nic$=dw#8^ngoKJ!q*SHlER=cR3_$A*Q;)N?*yM6=59GBnJdvkVBF++uVMBtRhRq=f_gLpX9@IPv6SoprxDflc9haD_5`r6%GGF|wbac$aet?cE- zb?y44ZdEE%Z;Gbw1NvwBGNAD0Y(|MymbkFF*cQauQ4#V0(+q}*cfApn{eZ-Wh@ z3LLcU-+&&!O88pk3p9S6|KwsQWQ)FlNh#C5XY*|rrVNL~lT;fM4qt3yiuo94YUXO~ zBGWpFCvWYIFhx6tY=p($?MuB3d4jbVCz>sNz%XVR6Skv<<&wYH`R+RLw$gpd5w5Ja zHSsVt+2o{(W21(lWXusvD8{hB+(7z5wQ0N*WjYR2Ek%5O+K^Y{OJvm8BN7H;wfJ^5 zzd8btpg5HxgnsnQUtgzmL`lE~CS$Y`_l=UPk{zS{CLC)6Kl1T7&1ku+BVLk!66r{Q z%S&(hdn{<7oAlh`rg!~@ZXSo>^?wge*4Me&bWNk?d*@WS)fV~JWiEt06~rQnC(Er| z;it$C9J_N^+EJh5+!fl_l}=Hd{q4%h4G%8ID5;S82fEKJs-^Wl*a`JFyM<+$e-w?M z1MMs`)O?gCcCXZbaMMt$ds$bnM;==9da`5vFhcY~&I)=0Fa92rr@4wky37#dS2SFo zH38!GZ^}zy>0!VF<0Ob!9JXMRVW6xO_OPHb*L!{QaT(rE8VlIV)FXmMkTR~Z4%sK< zRSYz1&nVZXEr~VbxEjZq#Uy$BE6ij=qX`g^0!}`nU5d;#8Mi?XEeG&8Sd0{a2`v9eUKx^kOHacxy!sKkVY2m&rt(>=4jnjpZz=VCXwS4l9$CwkD&*vd`?en|Du^@ z7@kT0S-?a{Ze?cJ?xe?|OLg=EO+8}l;f*P7%|10-QwsxS`Uw-mlRcnZ1EV|sycD_E zAUFXBA{i+}Vg4w!-)JpRRtVlkvpP2YRn6UTpQ?mKcB;X~l@_i@aG3Cjp{?Vu=O9+vRmH<})Uf zy#U=^ukqBDTCWt*n)7~MITNEmm^X7O*`^n9h6a}~L*i~Iy^94T#TDQ=?~*L>Wn`V4cuo15Mp%j`&9 zHonGU2t1qr(z-+&%-vDsAf%`L%1!{zG=7TSQd z>(`^Q;A}kjT7UKCx=oj|Fl(lUUY$pEkzQZoj4&qsFjwDBvw2kMPy)Mo3K?GRE4lP)!<{y*azI#0ml(;~T9jE54U`5O zUJijlZ2eKDOkT9o-fg1za9GGZwSbm9=<@sON}7d9C{Z+`8^4Y<)yUOv$PaJ(=X>=CJ_&m{?umBumCSS$C~i7 zOJiw~fg7Rujk} zp^oRQP+QFq(Q2EB+80NKld*K*x@M6rey;V%7oBlJB0mAuKflulK7F>my}b?5Js|HB zb!jhAmlhUZbT~Jv|BA_s)nm~bL!;Rv=kzh&%4MnWVFwH%hxP@=-+0`Mrd?JyY0z%p&@qF}tKY#S6pWXm=5Mhf)_CG_dn9~G(pTJ`*+MuzK)kYc_M_^)&8hmqbdAVYAyQ@tjmX8~1|NY{o_n7|HGZUVg z;4+}dIy=Ab*^&5Q?qh5FrPRfQ(*5}DL91b!0`wO$OFv|COBeYu+49A30!zKqcNxV+ zH6D<{aZHMnu1WuVzPx~QaU{N7_PW6A<;iYziyhhHL5clBBar64lXgj%VK-b7?R)^} zw|)J55L*)*wa%9fAGS-UBMEZXw8L+n-UF_tcR!cfP;bsgCSqlbaRM!|bOi%z+;_Ztg7QjanuG2mT<;&w0;# z?7kdz2Bs;odOBYpl{FZmx(oLoDD^rgNKA2T>Xh;>Th`{Y8tbGNVykM+zc*WI^y(l)73wb4A1{>NeD$uSYs{bjU> z{$FY0Ks7@>P&0mSKp3ieW8sWb|9MxTp<{NTkpoJ1W@$WAXAWc>Fn@VPtwQZ0nuif9p`uZX~)b z>U~io*jQmMI%7Jzg-0b++Gh>L9ZiB%-MQc$j*YwcR-aCbqtSB2GihwqJ{3tLx?cGd z&{0I!;pEcaVp)u!xPz^eF^dBC;W3~q`uXnPFb2KHZmZ9fcV_MOOeLJYd3U9=%EU6S z!%0UXelAbBa%|&PLE+~6bl*JqcOcHH+5+>Zal$7@mMJEf(us~|kq>6-o}O|h07Eu zhPE(n4&+jUjhhWkmyfUy3>v;VbNWiV>}8{1@xi{mv|9^3$sm@Pw)a>1AO~m!5z90g z76SkC&W7G-+Qv7}=jl+<;rX&yOWifCm9mU3?Jkq)o#!>HyHW3MPsD5)pe>X}6u%_I zeIQA8o_TKC5tk-z$8Eg6A;e>Az8t5BS8fCUV({;AoHGUIX>TqWJ$`^g1vg)?WId=Z`%!0x5I)*C3TaDqi-|EIuU3FQu^z ze>gBA#X+93H?`r&!wA~B!(9!Tb3aD&!PR0M4t%eg_*_@91PyG0n)t=#`P`Lhl_Nsa z)c#Yq1x$tNo7c4GsQ?sC;ElN zg_?|Jr{;GPZ^t%99v;lxhmXiB$$iX@871sS!!xeyZh9N^=tJ(%q(>KN`O3FwY@VZN zu+;ERen&fVf#D#EZ+N!)CR7Lg zyaF_ty$JgY-X*{k+-Rq15YWXhdiAd(ry6|HMPDvEKR-3iQsv;h=ex$Xu;?zliCCby zrw?5_>EI)- z_0v0vhHlrRU2_pBfYamcdj9p!ZNAjNVA5__xz$-@CBto@mMD&3EY`K10><$&2UvhQ z1BJSBn8b^piP9K~CM5y;K&&U4U5!KZdQ@0mPNvi({SnQ^v0aF}%g=sxyU=;03Ab^0 zihk{3i?{gp$y#oCg!A~W2n||;(CQ?Hxrkt;Q4el20dk|sa|F#e|4s+JT&c|Ebs8r5 zD4XXGdJ?=Ns2)$(^Rr>6aO!~Jn_Q%obVJ_(9&c>t58zRRYo_OiqQ#2cfPZfQD6Wpy z$(aU}w%~6f^XZZyAz*gQ-xw=Bk>ZFn$Z|ylDlJCv9Sfgzu3q@Nwo1y6$)#BQ^*dgy_94k{slEIm(R8H!le!RwFx2Xv&r7y!ww? z!{w;`H%Kx^ehPYB?A^U^rGe^^zu$z8(x?)R?Z^x;i(2PqE%IOgCF5YFzHid=b~5k?Ap4pwmCu0?@5I9(R!(?!DC)Q@0STm!!Uy*+x9S^6RzL4AQ~;SgTS1#Q^>t7s5Qvoct3LF6eW+?q z;m>$x-M#}M?Nds*SoSYv>g(Q&ui@iz(8{D=4ylld$)E(ch>}H3LnzQf@&LDB{~1;$ zo0qiffh{vlGwKpSG_aKaxysxoZwro_b~3=T<>nQo=i3CT%#$rGaeUd{d$6K|2w+&) zk(0kO@$N`F=h1mdsanmotY6@q(n7!cg@$)bLzasr;Z_uYP&gah?hdBjs@Sk*X46#X zDw1EO$be`141BQm(Mf_3hD(ph#+K&Jvv>hqTFvavrI_y{|JD0z@RuE8i+lQ&NK=)L zZ@otK{cT0jXOJA8tv4b@#FwiEputUJdoZyz{giLenaW-&KR&?>v(-9qUM(Y4{}63!utr*z&jv3h1mm(NWm>UiGzqQuu6YWY9~_e>_Z{iAv}||3nxArZc^po1aAoj z9KQq1wCZ8Sc;0#ZoRD{$xeu7JAq2m}{^^F7R*84O9x7nKt9s5mCv8|82TtiFR;k0v zD#!c4txMJXYd184R+w)nC}qzYz9;P>rp3x0w~{RyeKWl;hc=nAUn{?f$UEm}1O-*~)``hbADeLYsWm6SS5}~PI zD(QHXgl_bE8RA&jkAK$O=@6!T{t=H-XoQC?H188+9NHHrI&q1hx0mU6-4Z{2J$1*k z$6qGe`v64^+vQ0O(eyd(O7)I=FSwgEIvIg|SbCL?HH_kfATaf$>R}TMk4%|ejQ>D+7kv*QGEYG==iV8+4>p=#3>@eiXZtbU7!gh*!+_Shhd z%{ra;<<{?XwUWvWaLB7NaFJA&YCv%z8n4wRRsh-JE8h#dGkr_IuJ=SX3dA(11VP@} zq1(uLnM+<>5=b4Lo2jHk15JXZXCocRZDC;?tQCDj%BRYt1W&du zmy_wt{--W<2p5M$}rqrZoR)(pLHCLg|H=olx-{IR&Dy1Zq0;h+Uf=A@B(3&_-M`}K`Z z*+7VmeHdQ1uV@$_(6zhd@t(! zCmvIbLA&|MHD)72I-R+5HK*8s#Co!_psh=f(MArtTsEU=yhE^{|EGfiT3#}5+QBQZ zBY4k<91@MR0bw4v1>fID;vNnZGxIQFyu66jN#jY=53Nvc#6>63JW~}?TrX3>EAz8` zsu^+-3kkVJVlJAnHUXSC&CR2!G1!;xS%6{JFKb9A?0v}ikvoBg{8Nf zN4Qc2Wz*s@z&s$O!M`K?qks9stQ zah&9E9!VGJQ_dc14FzrV?B7aaaC|&odIY}t6d}q1^5W~>Tt-iA5vFv#;paO|$EMFM zAF`!BO$K)2-^VQauK;e@0p`h0@1|QfzwiAF7~o5L8sfVT4%rHZ06lJ^y$WQ^bH+G( zL#Mj0P$f%g0Mh_g;*f)mqfD5VvS%LxC)pkHUoQ=rZQo{SP#gV1; znDb+nHi(Pi-Fv5egtlJoX8K$-?MlRSfCGUcpgJ8G$IWN79DfqYqM!vdLKPp3?`ATM zc(YxD;`$DA?i5fx9y|E{Ql7E34TCK75IIy3$;B4pJtndWn7!XI`{I+?oqpt%6LNX857Wr&aC|wx zpqSqeqgK0JWj=-rL5Wm$| zjuFTC^fYFJQJUU`S5ZIfgDH`XA6NEc`~!HbP^KY3W^X|(M2*7KRF$LxtiCz31OIc+ z*e%ze*C|u_?c>z5!wC{iwp1-oyoQ8(8y;qkq>10?I|62bopy8#Yq{%QA<1&l-kCN( zkwv`?V|}K}4{gYAj&NAPVPAn6Pg3u@fyQs-+W1gWsznp=z^|Ux8Zh6%T_99$-y;Sv zn-rEq9h~ojtz(5eRbW)Kh3AagdT@QKQWP7|V+Gv=$jsqDso5Cv2rTFpJ3Cn!Xooz# z%_P=gV6;si-RJ=DT%HD;o`>uIjOfP$cMbj}sL=m~`NWB%YS+9a4419%?M_NYB4hTk zAvkvonHqN6RI4UZ!uu+~a_#@MT(910Fx{$*F(f}dWuv&~ws)`@PB?PGyhJQZmAM_6 z<2=A;MCfyrt(;c$rQ)<)N`{#N=wY%)=_AvnEU5CG2XvPyNS#tlAF0r5Y5V9m@ZK?^ z#|2o(qa+<-+53qV3BWvvh@%xh!{lc{_>^`TKpN$c`~|hR zw+uqLvt7dmEB0Pj%yWMcO$|I#&D^9c0?ymyc(M;!A7IQIuZ?- zb*^0m-+orb2Afc($wP7fF)fEzYu1$(3C0FCOn@cX-Q zYPGQN1wNlv*%B6{sQa(XsmeDcQ)Mpo>P6vJ&Oh-7mYN&;$rhVm%==F2H=;ky$Sq%z`3_?MCh5PrX+MEoIhROYT+=(Wv4AVdDv| z8fqReS#&{q#w(y&S^!x4ogV7lcR}BuKB48;L##nP`tCMOkxnk=ApiawZ?DY}dIj%k!dF*n=Xt_DoXT8RF&dZTSYg0UeKdLbueU+C^ zfME3^R1;YXDyaN{K2iY~GFfYB<}v8G&`!Irgz49Cmw^L7L5;h=)D!SHX8#uJ{+9CnX326z195~dG2|VXCl$P;3bqPuu5eaE z9osm=?T+Q`-gOd(-_^_JU)DwW@vJ!#qK<;i4vLrBM-ar1gEL3gLiG3g`LGmY4^)fp z)4Dt>8%|@yUfXJ4#NuThE>Y8e&8G**B!MMB z3ID%VR{Kfu6D;$7nphs7i;mJD>2U$~CA-dO{6OBG<)Mx|6+=dS-N3!la@q`TUH-L* zHS7%~+M+MF-iK-!;PkZRB`xlyu}}mGb>_d}7I)ghZ=k+{&#vydH>t{&|I%z$-+%CQN6` zP|N)jM}ar;$i1?fS2e>9SLd6pltWBJM|%faTCAGNz_an4N#WhA0vP#X<{3&T(b`EZ z?e_{3wd+&I+WE`6?x*f3yB+XT4lj3?Y6Eyi@;ovLd^34@1nIXc7lDoHpFm7{z6XkO zYih<(NtExe)%HKh@FDyl?%$f?|C;Z8NyZwym+@%JvG^qY z3wTVdQ%ramQ^93|DM#*1s7LL;>Mor$4#<6-IF9mxfCtmd6m6@EEgvT|fod%$*Nku^ zWvMZw!isF=ZYM}!`p2&%L)uRFn-d-C@lyn>dK=?sKyC6*FiKk|rxs46aHgV!8M|gTRd>L|9!2Wgen?@5?6=7 z#b#C~Eq`OtXjjI^vZWT~=?544%4@7C4QU+kD%x9Kxe6ttB29*%GedxT0oqY$SPg%M zEX{KDETFecWGQf+s;X73N;*bF2vi!t?!P&r-;e(vRiciIn8#)f-t)J+&30-{Doz6Y zv4#+{JVNCjno(0{LR;!u?q2X#5=iduGF_clc994+zUF5L0OkqV48Zu`N%Otn5u$he ze(sTa%=Qo*P}&>&rYP$aAa_9}Lh;*5fAg3~u=OR(KWp%K7o?S_-oBxd0(pUqc_5{mu^r?nSiRwrp>j^L z|6qUr8cAhEYRj62CVC>KfzgpBxd8^TG^#ug;IY>*iYcCo2|*y$sFxrHc9h z`s}p*kaFFuIWA_+-0cR**I}RS^PbM;T&#Q&jW^mgtkLZka)Wt#aoNCb3s3RGVd)|e zIvd{ZhB45!`0$>jJnw_aEw8c44SY^G19{LJm6r5>yTcsip@o2%CYzO}Z6Q`oxLlUL z@h5hM*#Z_I$3N#Z+v`sBNb||nt+m0kO9NV-Zz_9HC?8TjY^TzkXIVg|81|C;m>p=| zZVRInu$elHVz>+GHezGWs{)7vy2=i+EuZ{7Ld2C`&LYU2s1S`;WdZ%th5zp)70Qea z-yoOqof2O4WFZ1hInCOq3{mezpX!+Ck}6mzDaW#A5`y~#8nHeu)Y^jB9TZmMq95@q zVbr6x6RX^AN7KptUt6aI!mpeM1h2l?Io5Q}9NIVfjv~(nb}qbHu4;~AIdIjfq+9P7 zJ{z3bO=u=<_(K1Ue$qP-to?oXfpZ$E!tEfGA#{iKk+LZ;TW!uK6IiJW&$*T(CGNw^ zREuFKLAMT2s-XciR#@(C5pR*l*ZcGp)KC>zvE{^KN-5s2Uf$(E{|R#X)k%5eV>5EwZhM;(X<3`M(Lit8 ze0{GQl@2i4Pohs}%n>wwcPoB7_5P{^pK%}+pqxG`*06cM#EjgZB--~>7y#7-v{~^6 zsQtJP=jQ=6%5Pp>>lFO)R24|uu^9ZLJW(wOIO0GZea{hiP>jLa207coF1Ob=`lWZg z>V6zZU#%clcI0YMlDmcdwyePvFVn$i!3mUVxDtPqJzGnkD)28C5Y;hy|Ji~H7ypihn17hD|| zj-!3Pcx>Qz_a~Ia245*4dMllg1!hEWCw~!V3dcN@P+X9?^ZO7(z0Oz+?gP^U=UX@F zpA@WVAQ^YZc@)pKLWZRAjmXu2_y!13YQaPHGIsTH>I_qL`s9yTzsM>Ke zqL=m`?1r|C!&KdDAwaL}5l!t=>9>Yy`b>T=Vb7bNM(AHJ+{1)9gwEifHhkH&^S75N zdcYfyFI;~_=i<#O+cR^&`|ht{n$flQrfI2S)6{X||7Z}%9cC&zaRE|2xwmjPb-{Ie znuOx@Nw?-ojqLbu^1oo^gHP)}e>F;ft?rs<-suY`dC$LZ52Zp}91!}PmLBo1ACZ3rMw(Wlp2HW%>O?-2%RY?=j9{iUeOKX^?`QoR23HC6JDp*d`6UfowLKGZaW{=ZnE&0BN2jG@40rk49f1m(oRsa z7zJRPC80P6LHRlq(7m4u&=sTCm|b!2`-^zFDH-^SjDo(p^1n+zBX|YN`_G>@U(LLI zoBKxe)AzV%L8+)E20{YyzX~Y~m(I5Iu%7RzB_0Qp$jradh*#UhO{Rd?>MzG6stF^n zw4R!VxubBrxq_00-c$)aGKz8_Ge;bE#0m~9Ax{lJ#O|$FG)T;N_JG0GFG7HJ*nJqx z5LEAM` z;GO>)HEGWMZiTm%h!K&TjBu=PzoHjvYO+3zbJ&?~460owc!oe?5>V1nKv#!M1)NXV zs6&oG)`?mPy$^q1LJ4kolCaOaH2|U2OB|>)o>~6*Xli*9%vXi%9)=w8IRm$+IQ0tf ze+r?{ABBHeZt$(=6*I;O%y4ea{Xaa@txkl;W+)@67C1ZDOywpC_Zq0N>mD5BcD* z2)ZAY%YdK#&mDGZIEB&rVU?ZXQ6Z4hSD8(7k#*07=Ux>!nAAHqo^Igzlp|4ypfpCg z#MON0J%{=UP`4A<#rKHl`RF1q;1N|m`|%Fm2>%f6ev{%xZeDzZ$HBLk+Q31AwuLg^ zTK0@s<5d(PTu%U;kxA>z&g#|oFyVX)2r5W^pEQ}F!{qORnN`V7ucOy;C-MVkt(NK8 zZ}A|yv9*xI((eZ~1(FlbvkRneYb6~2d{88Tg3Pu5nS*$uX}b~vX)2T^`ex#&@AUOf z<1qqm_TB|F(I!fu8ma$)vW21~Y=29dGTp#_EU5Vfs$U#USDFYrG+j&!54mp7Y#kfz z)%Q81m6MrN@S#*ObKVOP!c$vvnFz#A(@u3eZ|oL0K_!Q2n83mM>upX7uAqYt=N=tP z(8V?1e?D`hH^5Ytb<|#@gS~L>y)2>zOgT7<|8pl$))Mh-Xo92VO0nb>KaP*F{W;*I z5_R1?fg1N4{I`w=hdn9=$auO6M5{oCNi*>8k4cHY_z@K$8`}hCsGjsSAY0+y;v?+C zicrosf8K?~>or*N=z1L81b6=Zl!!F_M`=TJ-K9&b(I)EgV!f!Va|=P0h>y-cPadpj zU5e3#Ao{HZOghhQsqL}a6fe$zvrf)fHg5U1J!10ILXK5L7U|0x{Nn)!l@vj6KgZE9AB@jidfb?0%ts-ELIQNNJw}T{e!4K zhlS!RFsL(wy+igwwkZX|)^9TY4+NNp7)>=iAC)>0neH>MP;Pf@d@ha4{+GrBffDkT zps^>?dp{Yh3cGSU6kPr_XKab|Y5Yok_04>MFVlKmO65}n@*O7HR&0*ieu7@ji)aU6 zIQVNfu}S}a6qyrHj&RgvoPuxfUv6Ji7fU!oW`Vo@?h_u7nHw$yXW0D2BuAm?WaM5I z-eFrN9+-Kz^*bjjuA@KBI>EXGR98UQc$`24WZlmm8UPo{jRj0xn)PFFZUb!`_~N34 z8a_(BKfVs)&y4L3Kh^C>tI+;+AzAB=uq7zA86WDyo%d)5G#meH?&)K?d9f4@hFz~N z+{NE8PdnCJl^pG$+>;jOu6=jyq?W2xqOT&QuKJz)m=&~BAY%C&DUNeDyR3R?#9h`a zOg?YMdJHECGpI@2SpeBP+{uvfV4$W-`OtWF1}3#sPHIPkF6(C3(U$Pg21DN4w#Zkw zK;61o`4pW#lOLbnm&$*%=}ez?f9thmWBku0AY1$;J#@S|yHZI;7U$j=YKl~GjNZTb z%81wE;W!w&A_bXK0j1^BnI}2NB>&xO5)^?rH+rTn;<4P90?o9|jXcH;tH-U^;t8BwmDzJZGdeK- z%y=nMsXjbMG(Huibunp3Xnf}+v}_Iz`05n6`{{ueW_d(6Ux!hE?ka)QW!;bVC)g^o zz)G^~5gIEi9K>-BjAcxDj4L>cbqjKB&SU^>Wl2!kg63;h@8IqYO>CeV*lc2|; zV3vBhu!B56fVk(bNsv~Xr*rR@H_0e&UZMx>pajX~fPbo50E?so+6z@$k*yY4$ojO@rIv#{ccygEpn69Q3%J`w|^i=a88Vj$l_ zr8!)sV1U@s9~ra^$*Ys(8<+%iM~YGAJ)j?()~;#Sq=otXzAv6Uy=!0}k{POd0 zj+-tqH+3WWXxb?3mH6bjvmvnCqgS#0Oe;7CF9w3jvs0YHzCQsU7o$HVQ+`HWGUSBD z#jD9u_E^F#5-AbjYH#zO#Zt~IR0D##3u3;&hPA*5>1res=Y7v%F86U>n}cxj;u}}I zk2KsazyZ7u56L6@;80?MY(4r(@5|@Q?ovs)y9wpdCAoIdU>%aiB7e4BcTOizU=G`Y z@&S>s_I_T@rKg=eS}1FVYjPMWM^)-L!2w~Qje8!tL}*kEU2*ggXGmZVPLj#=>FDM& zwo?GaDua{J2RoFq+s@xPQo79#9+7VDSa(WRucp#tD2`FP#wMSqPLqk5@sZi`)vefm zv6~IqiPx8_5Zk+xm%xE}UtQX1J_0LQj#t!YbsT4f|e$+z^Ye#eUgj`9+E_a#t~8!y>}mfIM8RRu$${Kk+w`m zFPXpr6VbM;Rj+00iH9pTSPjTsE+$K@jqLST9joGI1tfD(K|4))?!cQB&K=nUs%4>f%J3TIDXJbl}I*Gvh-@MU~_3H|0hOUq&A@<$X} zTus&so{bEPeK;N_hgH^+7u}y-70KR06{bxO0wap>X`A=XO*1<)jtWZ77bhb)hmhPcXJ+zCSo*u^d# z+8iB-M+v`N5E~PD)W26#V!JdBTc>tw9CWE6t)eUlqg+4%T||4ugv?p}$=CRe8*igp zav(0uC@kzpUbC6WtZlSTfCuvv6W$CEN~GcSs&l@apeKj5*`B*7`D{iup6fwwXo~!f ztW!juQ1ZUWox^7^vm9I9%LcuakeJG}X+}%bNjBp3#78+(*)>1P%+op}bLu;p5pCgZ zu94}yQx{9q8y8LUOuoZL#FQ@Whdz})rd{J80I~k@H3Umv#`DjL)Ou_mCjwH} zn5vl8Jv}Jr_=5C{A>L-ABkE=R505i$X!;dvuQ*z6)g4Z_`RnDY4}e07EeboSU+m3s z2zTV_-NYxrYF@@lYHUK9Kzh(5%Nxh4mTJaS@ujeW$qerY+ARe0mgzorN!mdzBmK=( zKmKi$MENNM1iFZi6}sOSQzsuETglou)Nk_4lFTVa_8of90Y8yLO7VHZ`W1<9Sg4^L z3fC+riJ?t=suFs0X3WJ4W@!+BUiHj-jx<>`aPE6ePuSeaH}9EDMMB>hUUiW1kBU6_ z?XOz`Fb1ksXNW3hqrX$6uf%Abi7|yr_M}G}yCX-Poq1DvBW9_h)&D(@ifQ58$I?kE zh3qevE7Fb!cF*xpH6Qu3ONS1a-g_y9MI>`0oXQ4XQFmj6mT17=)ss@?VfHnF$MNm^ zHrh%#N3E%{$vGH#*>2pFpSABD_TS;1|M_X4)I2U+G=5@_o`1%Kz<*Lm&)99A#ite+ z(>;npCPNnTBOWK3?G2q{LzG9GAmq(Eqh}Ef6F)@H^e$IRhcI|?>)q_98^-BDJA6Yb6ZqcXt)1CMUIvd)BuArACc+tLjFq|~4y%abC_7Y%GdqorhP?qj&XhvO|f z|AzD44@A%X*vtGIzhD4SFH~wZ-59yU%npdxn-yzLW>_IT@1BTK`jLuVS4^saa2Uuh zSu+@Eo|QVjRi?DD8>y&uAmL33I%uuIhMsS|KD+#7@%eLoTOe`EmGb31ER5if>cP4g z&rtDJ@YS)0Q)Kej14d8Utf8AAm~LRK__9tr@u6-P3k0TfbiyRaLnrby_<>v|X%!}G z3TMIS3z8xd!S2jv;F4*W__pfRDf&t8Y2D1uo3k~2OzRfqTbMg_ zQw46U|M-L#?Os+qU{EzB<&L~FqB-f?_g*x9^NK`PxfGRWzApDE88Tbvv*RS}ov=^` z>Hl`in|gg98-Q@DS8L&6y0RD%#)0`_f6$hIArlE+$!plbs?_1=UVB_N0(C-^P3Y*f zoiWNWLOBtnq-T>YhEYvEk{7|?VIf~hVB<@M#GlZhF=F$jr9fHY*$sTc+{AdR`0y84>9`7a+P80T0d}HSeWSwLsot z$V>z*lk(vw04R)+#d5x{o^v0xi+I};I|S<~(@JZlMz7U7?zZeJ-?gnvvJ4UC70$-R7IcmN;i)v?>;2gI;Za}SXDC*NVVB(o%X zFErNk>Qj4~QY7V0WtT9+eGhkuk|tIEDRPo%E@)vH++>iD{mu=Xa<^WJe*I8!)5O+a z?UC!JTR?p@DW(~0a;Dh~6hy}4{fr!5rch<%7hjcU#>U$@P_woG^F)EyP0jf7B)zrB zAAZQPOO){>k|+)nq8ct>^cmN@k0z`d3Gf6kxQQwAOe1)iqWHRb3eoeAGDf;7QFB7V99<*|ft#Gj>DPvIBD))8pCvIM=Jl$;`TaA74G}Fk8|GFMB zz6jya991Y9IHlMQZo!`rz===pK7ZA{lLE#y92{PEWVCx|Ie2*97MldXaUhBL1J?oq zozO!_O7e61*N7kuX%+G+ld_7jo%2j%uafa^JU49MQ6xXX6tEgN zIyX&RU5kqagOp9Z?a{E23qGW_kNm z3^wm(T;i1D+|-U;@*|Kg?Hl(#bkY9Y6?^1!VT~rz{Dr_;uGI5F!PX+t?fg#c^9QEd zcz@pzz}Mus45pUxsD<2%hzPk@y<&=Du~8q#hfB?WZOZEOV-MEu1$g9nAi+Uf7|$C? zq64*Pr>4W=NvcneW`%+MEb_4Jfr#_!m$YHRVcXnUX7gzpw7Ol15t-qjSW-<@Q~{-aCk zhNoIaZ1PaG1ifJr@9SRs-T;gbWVRpc%lEIDHBfFNAf9w=D(6TCk#}B$-%- z6k6H?;HC|$VeSc0Pw$$y)A&(3?|4Pat}#;Zp7Gf#fOu6||JH;L#wInpmJ&dcq_ccd zAI>?(T7OZUa0=l?%cRJew(htY1lU6qTI0j2oN(}ewVicXl5iehLr{>RySt>6 zk{SuA5fo6Qo1sG*l}1nmk&uu^1_UH!fI$#Y1OWl*{_b(@{qD8)@x6b2$LAl*1?tT3 zJoj_Qb)Dz$TsO1w!ITxxMIj-Dr8u9|<0vAT0o8v!ZnPmVfRHjG}x3g{z8-7+W2weR|aYJEw?5-{50sE@3?dpsyrBZ*_W3T5HJ<59=m)y?uL zOD(!bO|q>2vuMgO<@1|Ip3$Db=exHyu`<}WdClOERNbY!S8%?9bH{*)VK(fVVD5SY zKY#uTBRCl$#oj^zK^$Xs<3a9)1fh6NdL(oGB7ie|dPehdy*&SX2XfsyPXhic>E(Ob z(z_O)%gwG%1{^#NW%IX{cx7I*fBNS2^`%YR;!((OIl0VJw3;*6Z>q(I1O0J^s>)Bf z@ih~3Ra3TYML|FD`z25|D0+W6MHyDWlV5niR~+wbzGKNNf8AC_4e zmVce3d~Y#^=^?A-n~*$sVD$*kfk^R?#A)&`=!o6V$onxD?LJqvoSrO|gZ075qhD%0 zk)+Urd-+ib$k?I1Wdmw%SFw1=*B&71)mmI`Vc}x?ReK-Gm^le}E?){WIu1p|FPu1R5K*CYtSaU=YfvM3DH8;MBom1K#+&ZNRBAeYoNPWD z=&gX+kr0FaB^*gUD%Hk&wm;nhn~Gxnu81M8vgF2CCGIFR<_rG*(V z+|8H77y4qgbNKE}^Q0)b@Qg^F9&*&D{V;XGS?_JB6)Nr@lg1oHdwxg8lD(t)-c-?X z^6{0Ylb1|1jbH2=o2CP%_IINcAjkEGcIEQ%D0}rj33XD>FoVW}1z^jSl4GIr3O*%I zJbjiU2xOfVkKpoTBCx&UkjB>wbHB$jG4X9c13zO zK*tZW3kVw`_Y~Jlb-)pS&In;-XeH0Zl=7a^@`FoR z5B}M9HbSWxq$^TxJ2YP(t@4r~+C6!(PA$>hDMtZt#?Rg|3BV&f;?!b6EA0xv^-_Xv zuAq61)sIjU^_&ml6M|v9;;S>z``N4wFwmc&y$(izDHaRCEd}Fc6I6mhA1iL{_#d7) zI@y(q+~8Ue2h5ZQ%$x*Q^UdR|UffS>zdu1=!xu5x((A>-a3`71v^e5yiz1+_3e6N( zNEMRvdHx{DR;)9HVqNKV!zN2zQ%A7PTbK8_9rs?Ib9^|~-7huc1-POaOX1_=PgQ?Z@np0XEIiGSyZ5XYPt#t zkpmwj#|f9iBhGWWOe?^yV}V$t%?2Gvi%&JosO++VcPazayxbNxD>yFZeMj@bCqSgo z%jzMsWH`}UYNkTHiGNF=U-=sy3qHYweqYhH63HIvvn%i3Y?|rC=GziVp)VKM_ug-w z-tkntjkxSxb95Q&vufOW0vqdBGF1d<5(AZC$H_EJ>xZJ?G5baX?4ID7S4teE_Xt_= zcBoDV$l+%Zb47i-04s1E?+D0-*Fz`qPI5{pQr0;F=HkKnDE&2zQCxeHNEm zjTT(@Xx(F?hhMYFKb*Ngkms1gCrjos)O)1+y z*kN>rtFMF;a=aCncaJ2M>^C;PJ@O4vd}?Ww?bQ+}+~*5gN{w7E4RC!nPC9J74t}Ax z8Z=zNkuGT|VvK-y_(5+!f(p-J36?*4y)X*bJ(8CKSe+&w~!F%(RD(OGcAU} zdBt;!DM1yLvc+S^XS}?Ks-* zyk&jLsP!!$x1a%TZ-gjsVwY(gY*a-WaqL^``h{V-o=$&h1;EdX$BncpXWU!3K!@&X zeRu(inNwfW6E;0A_wMfg3gpNGVE_R<#ZP5@`pPvwT56TtOO8PWiOy6_5pwWpsK;T0`iZao=U`~Y&B5XW43 zJ&hEmO$eaN39mp>D(8Xz<+t-VL9<--mMm?4RYQP{eC61FUJYU*Sizgq_GUWwh+Ex? z;nUaOTtEBN%8}D&n1^;-op_vNBGUxwdF{qerP+5Ggvb)L(_raJM{hY4oSHD{G=Wr~ zK^sYamnpgK#-eH-ISGGB$Z(4vI2Nc8RfZ^Rcj96Lx~9`HQAjM`$ql}!-q@9#e2f=h zHX``sKpF_Ioh8B4UgvA&W{twFke6|q?k|;a{^{#gIVTFhCzlEWn7u7EcHQ+>yKBP4 z<435TlN}uKC3}T}`>O5Hz9A^Ki(ju5D@wi;v!WQwh`vYs!8K>iKMZ z$hO-tQp;u!+P0r!{b&J<-(9XvfG}|OL+67-zqC&kMr@Jy(5j{alwb4hLykYNWU?aS zNoxr0n`%d+gNkk@X;C|Ob{4Gw4!`zSfA=m48t-tvO-vKigD)!eC!tH(B&ezm5OI9~ z%~k0d)Z-rI>oCOwOL8+Yc}ZO2x|Nox@6`O91E3qUROh-g-s$m;noJ^fYlX4T{_uy# z`FGL$-Q(RKZFbT-?hz3h=QG{=@5xz0?;Vn&euFLBp7pD&Vrk@a(2(6-L>A<=c)f@B zoRqPNbKZ(Z7mzh0SbJizrc!2dsp;N-?nO6(CRO>M^q0*Rqd#q`0RUu08BJMqs5;gG zfJOPO^~R9fKuhski=VL3K8p?(YEx+J?JtxlmoB$_#ejlU`@N9|NU6azoHivcUD36U z+0wJq95`b1;suxuGeMnMH={Z&6IZD}p!8}DiP}um9~CFHDRphMG)<|WV)RjEDp*cp zoqOZy%gMREet|#*@V*Qqd_4kHwhKb``gRq|90WdSi^o)m3dQmylvCf5K($Z$Q0i^CPO%|cOQ+b6rm zKF%V6!kkWxxAZQwm3$dXSBfc-Dt*D`9kr-+f*4-zLescOTp20(N1OL!#y;*ONb`kY znkZ%O$+1SFH+8-@Qv>zyz00F7eus*79=~)C8^G-J65MTGqi~*-mvVLJBm)5 z03D5o<6`98`Ot7CF-wOSUx50^3+D7pUJUTt9;G_qSFE0}K{&LH zb9CYnwr?L_e@%pQ!}zkrtuCwFqlBJCrV2kgs1m=+O6Mik1TRB5qrP^1*d=mZ;RBxg~YwR3d$CKp`q zbArg+kdc~}dU_fVRJ@7fujDpy%KKF@&W9NZ`JP}SI9};#CU#RE2oFqj7St&fHyX4$ zqeX8Zm?fV}SlB<#RHoo)6yPT<(U}1BSC1o=3JXba2xW2O9sKeT&DJOrB(yU0%*%*T zLup{wff_p-JNnMfD$=J*gUD-;;dT4lU>!|);6wsNCfm~+7Y{$iiafWNwUKSF^kj)I zcT`NrA%rcA9XJiY*kBApj&}r(DDA4(D1|%|j8f`2osSjN%n06IymE2!%`gU~F}odl z^=qVd)-#F6d6%J0S&T~CrADDx$oQM;B>e47eD7JhABzQkQt;BCje5Ey6a2X`GT5gC z+Qy2)EdI*3XtkJ5H?@^QKwa~uacdQCrjOYZr|ogRC>3Ai*4Tv z<*H?EN9=^8hFJ8nn%*qq_@PpD_R#hRIjo@?P8>p*?<}!vQr5uCl0>HaHYETWEM{P;#*9{eX zJbPjcFLqsvlBqwot$Ii?USDkm(OVn5IHuA}3_CbvG`1|(dt=CgnGn!RYTgDwx;Ev? zg^3S(pwVw*hEZRg`ysZiqX(P61{i!|5#AI9{2-LYNgi(ZehsOC4!`Ifn zO9F&1W?a2miERLX3v0}6Q8cvqpruois!{y}^p&XlFT%Iysc59f9)??*_(9*_flJSC zHC5A@&XhYgFsu_*9(85ETB6IkxM_PP_5 zm~Hpi%Zrpa9Fo^4J*(tBrZJ<^c`C5^tX z);KHlE~D8$#w6pYB*LZU=K8km&)WNv?u(LwTJ7Ls9##iD6mVqgk6%6uRjeEqC6e}o zbt|4sFNfUryS`sMEXw*0Y=H3UwG10q#E{S;blpgs!(By9W8&iO`2cy-kA&kW136c` zE?Q{%X!+f{Qv?t@FPoBLsplHes!ffgJSfQ~DFPW3uRo!4F&>`TS7yt%c~F8Bt{gQU z?UXvV55H|dwc*}r{n#PUbIvH)oG!8U25MJJQoZMN&hhP|1l0{nM3y@Ikz4Fm*m|o5 z8Xo|HXDih47?UZpXln3L=hXMJKT7ebHwg5nL`JUC-4EY^Mb7NleEk;Q90j@cUUr+{ z8#8Mvn>|c`|NAAMW2FeB7k&z{(RWUex(qs+036k=lq| z@cF~@@b*6n6PYo*XIdvKx3>ab*ACbEHj=~2kKS}#h@e0_96A~!ol{sq)1A*wA;OKW zPk64^n;oT}uw4{+o-G)FJ^vNzOWoWDXc=2Igj_jLyKl7=HS`l?V*&mp11xlX$plC{ zGKRgyX`DG&z}K!=PkGkIY@20C+(A@48VvRkCpWScIL5Vh!WpEZ>zku?Ejz9ws=J?w zMeT7{na}QYl__Ukz<_^2(xgD;1sDTJzyR4YRxZZW{% z-rjot!mPxEiL4rcb}x{{#Bp;v;_&0m-nWe=^)K|==M{tazQpIL8%21w&FGc!sR2Pfj&EL&wU7Ao^C`;p+R zdWG4ca0jKcf0RG^yBIRHjUPTsh^-@%rKDjO;oB(4uIZKBjQ3776DI~5>trR9i}Sm+ zjIA%f^ZO`6VU<4ip7;yf)a^}+cp{e>ynGs3woptF0pnt7{>pbbP5B{Msf+yp; z?Oh|;D4X@G5&KIv&;XFJkUPgD?drB7sV z*m_GmBFo&9*AB0kic9w6WGKZY8{ExbGT8&((iVs_K944A5S6=Yb7w*%C41=z*8<(x z-M5MDgM}jFw#VIw$@Gv(9-7|X2;MuIkxL2V98!f)KH*s^7%%4@4Vb@a6L!ck^qH(0(YhF z^c!k0L8jY+#+DxlYi!XHHJ~j_63(GywjIZ_5~&?sCn5}770UT({m1}Ni(L@0gRJU! z)3F?%cTw0}C&oBp-tEMI^BicB@NO`+`DHsFTll>U06 z-{N9YoZxF?soya~6#m9Q)CMy4two-TfQ(jY2X>&#!Ozf1SP<07LubtDI^dUew)=)> z6Z8>U;Q%u+n~K}y!xId>zEleo_aJPB$MZ@Ew0}Kbnx-9s-DuKxN2=SuKbGYe8Ih?2 z4eX=JIE$Ymj&5CVY&jN>mR+_+gxfW6ca&mgA-xPG1VwY>gz5}m_cP#~>px$1HG(R7 zsO))H)d<|Gfb_UClDEP1_tef@ty=eT_-5}ZK1cD}d&kBE138ZV=gteTr$6N>*V_M= zdz7_$qRN9!Mg;9_C}g#;aKJ)1Cl8R@+K5Dr)iB(<6Y1ZV)!tlXCehP-J9bht29&1n zCda`d*Y#`;iyzW=`cVUX3qfjJq<|{1c{rq&-{cNx&kOc|`Z#YBHfp(veLUTV3R@WH zvP6m7J9{ah**58&PyDrLhwRpXOFBkJ(T&ueltg32S^O57epDm|m%ur6EOD*o%|4qq!)&%3AxcaLT){eK}g#kgS0!Gc!1GZMI`0N zhcEQH;+jMcoJW<-IV&$SUC|EA30+CMTxYmEUr;fSFe2Tj`LH5$9Y5fv{oWg_XaAx7 zUy~X?h})C)ttAWLC~=j)h;61y0Fxu&NBEh^IN!JnEmV>~-1XGmywWcz7zX^yN|}<# z?Nbi8n0y7NWL(cm4gnMN_nZI|E*Q>xc;S}zjJ5KB}5oRCs3+rz=-8K8bJRz z6=KtWi}eO-frP~v%E9~e4M^33ldxT6-;`6JWars54V78VyUUqnaw`}9gp!8HtUG2P zL$-X_FeuD*kF5Ak#^SyP_K8NJ9abL?wSfvu&8lE1Abd~vd`0W#^F&n>6;-mlo9$IV z;fihhV#rV0V4-Z>#T@g-Ryy7`zIxpJU%U7_O+jGBKuoH01EImE^^eV#Fc+JvCQ{pdz;J6t|-Vu2FR zT8<;)AZ=;iRR5~oaVEGuLrTPpEk@&=#!3Xz=rM|A^=g*wFT%?`ee`{W<~4ggHc!&| zj)o%^#5m_cIQ6Yhtm;nG2CVgKRdhpVBI}bV{U!=Uj9XG5r|*iD0VsA7eM;MDdEQ)D zb6n4aXRk6_X4!Vet|^taS7bw5yjV&(!nK8+5vK+T*E+>sV>H=m9Q5;a_@)absa}I@ zU_*Gh%Bmf-^vxl|wY-SC$%|Nnu|-P8(3Dd0qD1B)YwlnGT7%LQ)W?c^+g6Q95MI+} zc4ic8Pqw)oXWh-f-$^W7o=VH64FnUGTDP1VH;ZD6l^J_nSoS zz-UD1shwk_i?xP0OpY3+#9^MmzvUqrKBL&ffUfNal_w_QGB*1hSm`KtRKG44kZx$( ze2k?hn{(oE^A%Pg53%eIFws_1(piS*OSSWTpV9;yfB~=4?bmaaxc=u6>)Auk z;yO?1`kPs=55>nRjZ)xRwkPf|#YmmAMQ+j5uZ;vwDyinL`!Mzg(#Dv*b$Cl5MAQC? z*xQjX_=qc8c$;l4{$;7Gx$OOdaf{k5n!nB;AQKVyBmf(QRn2k=_Jw3RvQ#q!XMGt7 zfu$d~T5P~G77no7xOG*_kMG07_P?&MfO1a3N3Gz;zOJRN+C;zT+lA3T!Q zE3@q%e{R3Q7`dzR=>=%bG&iEUn3=28ah!dpQrhXG-q^ORG%GORi5tAYS&zeiLpjZE zFdM@O~%N;d7psOvBT6bS^6=HAQZx$pcXO$RY0f-}DJdGj6n^f$Mmg2T5C18j; zu}Oa!O$l*}^?t{}U`}K)sX1(>s|Z&A;T)@_ck0kwh#n~oh5f@85rtPgjG$h!8R;8e(A7v{X@4 zwr~cCXX`ud#%;RIv%IrdPRyW5mM<7kuG7{*bWU4B?N-!MjL=h)GP+vQn>>c{r{cur+69~+ znQ{I!41t7MqRX!^bf&Nltwz+s!ZAH*kB*4$aR@>iMRNwsV_>B{;-?)9Zy$0mZFEgd$Mum!TFW4h;5 zifOV05QngZ4<~)>A;_;Te+>g*=snz|#7pQz`~=C|At%Bi>Lg8q9PQW%g~Y+Tl60Va zz%3?a=Z(CwU;LzgP3S6{3eMV$!Tt7y$EYtMfVMfx8Hs($kJ5&Gl6TTs%n9>Ns<1@U zZB07_FJjTnO;2?Iq(Ch%KphG#;*wp7<5##HnWOJ-)`{r8l&Qpfzv$!%QlnK2p=+f2 z^FgVDPMx}iMm7p51YIcveQm=F0kPi(sJtNABZ7&|qql0M@ue8;0Mu1BG;mu@yIjIOBmCvfB! z`>@~)U%y-dO_dX#S=h=?9|bDt*KLKP9k%CwOp8bU?FNw}iaJV^Jq4h3_0P34%c@r= zVkfhbMUSYz(~B^;N7i=xl#q|wp`N#8z(|ZT-5+ubJ7~+Pi&>)(PLDW)vGCwcNwNPQ(JFvv)XG+uO%!2c@S2bDTo!9bBJHG>60aS z!fgOTF#Am$&v#}ahImSaJ1wg|hIqJGlHa-s;yuvbzt)_Wb)S6nP`^q!bUjE5v54oR z?b6xh`L<#tT*rV>|3Ujw9id6Z(U4Y!{QR;dg;rLWe4}N-^B`5FK}WjjXc&LZyC4&Q zl-*K}gGOSr^Mlmkm>Q|qy(-$iueDsZ(+-7->1$J3=v^4&9`vz6C)+wbHBWrVPdD?N z=?ZF_U|~f1*vg69Gcoe1unBvWQlas(9rmtTmYLbD=u^Nws&}@?jtFh(pGqr|Lylq zxk+>VAIR?czrHV;)T(sHW`ze~^JF=WblqtLX%K;;sv+%u?`TW*BC;A@zKb@)W07$X{wV(qW$)Ctyxi)~((Di+ zm66fN{U()J(UTo^dNb=RLg?U~=%pTi^*!-VUP0z}SavLpc#FG)bfMfZ*E@ZjIT_F= zj$eF%z5X97+Aa2=cRz=yGgKXJ(sOADSc9HtI?EneIbwX^Ze;pRHUw72Bx+lz4(rkV zu~_-$^tUf~sP8|=W|SR|Vc}Ha4*byDZQsnpn*4UM-@sIitsd;D)2YTB7v+dJK?u?W z>1M?kr$eAqH>F?(WgT(K{>D4RN%T$>PEGB+_O!Xi{Nf7!km}@6#6wBQC#4R)WNQ$d*PPw$`2|2+Y}QaEbQ+;Tj7i16cyhnOmYIw6rRY3YNH zWY-~yl-X;lh_Hwf84J7M*U7I3i_#=~pLHW7oa?`YoIYz0kCK1?;FIn*M*fV><0}qT z2KtJ?SyR)%ojxx}kecHNiLceW|0VfZ%j|`d>Iqjo!oAf0>UaruaqIT21ACrl7D3y{_YE`kij9ij&+>$b6;}CQ>ZeVkW2s1W}LfOl$g^VM~9lIOE(tamCj0+)E z|F4JX-!1WU`X&gHQeyIMcNKpWFD`$W3fJSO@_#(a6N1?lTmlZJc=c{iw8&l^&R_O> zh&C!Q%{sUeo8obdEb1j_42$nM5?JRBn4?&6p)Gd)JI)=3`Chf1o^p&S@bSXFT#c7q zkRE%+=z%J;PCP2;JOMCeHfVGiV*;FnXDkch+p_@ZCcxbUB_?x!70uI#mvhUKqM=7ZhfVVv>(3XxdJ|Da_&gAT zSz!O%48REE_6gDiq}jM)ti2EirxTK)d)8Nhk@O$Ci+^8jHJz!)z$ZJE6OPTd0&%Iq zGxg`FdrpGrj9A2V)Z=aqqg^Nv^J_s-zc!?osv)3YUSYY7Y3E74fWY`ZoRnQ;gF|K~ zaAQw{@KH*X9_g;xrRqbWMnSS6fxS5BwYMwC`EyVCMoOxOus6!9q$BJ?(Z{u(GB3%# zTHevRuF*A68?-c#x3YsQcyk%sr9mijuY#P=jNFAknb?IqseuewM*lIPfL`xI%#+B) z`-)X5h!^T7N5-zGm;F5@I z>xGAyfD{ol6Az4)7zpZ1HMq&Cg2lw0grF0=jhY9Bkw4P+#sYse0}nt1&hhjojQuV5 zH<37H!$+HX&;2r6uW9d|fS;4W6I$QhdJM@1K-B&FEKcC)KQz>sbU1xHM!h?Q-ohWZ z0`l~KmYBe2ZvCV1PNxBZR<2T>(@vO_nXDn~Wg?eksbPr<6n+mm0jo=T1}T}w3k(~X z<(4eek=H#UnrGZ}@oQJlebS#l(m@+KCn>*?w++O7{PPS5oYer5j}hD{RK!J3b_13{ zfbdQiFEvI4RHxiM`4JC9?nr~?kX)-U2IrYcb0`>0cd9f-#PxT@O?-r6;9)K>hCEJr zFai|!n^HF|zzOlj+>5bpPQCR=hj9@4wrQ`qWPpYO<_TMYQ|J=Z+RK5Rg4gfEj zsb$~vB?vvd^4;k9aNe7c$JN$;1rPpKZ>_QREB?QHlvPsc_+qiYw$T4G$id42wiAHh z{Hf4_!7~GhOn|DOc6AtJ9vc9a7vN68Fg@15?6jX`7*4^zE+j^w!<+JF!2SE*fXn+0 z@T~slZi2rNm8yVYNBu{z1oSq@f2e#g&;S5L`kM|1lCB!UTH7zNuJm~b@JUc=I^Tg2 zHntWx8jd&`R;MKVnVJyWOhwaR*9Y=jT6OSQeL{YbjG+@MXp41Ef{EZ27)R z{+J6Oa;!}6Ow7wiq<;Iyt3K#HvReNW;iN73OD@_io7WAa3;NeH6_xrl48fGGt77u` z7N8rzTKIHH{{EqPuW)9PHDoJo%A^&?Ti1k$j=ROE>b%yV*6w9|*Z@!h}I`o%+f>(H8r>lm=h{`Ll9y@kYsn?liBp^Be81j+Iq z=DWXZJ^hMrN6LlKIsWbD=)_>Y93c3|Fc^S<5@yHQ?*!SP(xE`wf)b=rE-=vtrp(zi zd5^Iw-|qw8zGac&$xSok$IMj47UY7p|wVa5uWv7BigLZA| zU@pJs$X8-v%K3Ro1ZYvrg%`4Go?vkv>gu_&(ewzNESmq0Lu@NZvM)KXYSCe$Xk>$g zf96-4wqZ&2li>-&m^^J}GBK}ucKCV>G2o@m><5qDd-^&$W*=6S;51yjQ#X|dxQjDC zBIiBDXASI|2G)~ITOPhUt~mGKJt~7E1MJ;oyZ4wHdh?vShYH?JZ>|ICUm)w7V9Os* zl45fXvJa%0c9T+n;{5Z1Mh&vA&xJmRzxwi^R5EnFb@I^QNl_#DN~(qrn{2?x#xCQ4 zq8abqGrl-gf-W2CgAI9?=jTB=lAq-E^F0iut4P;Htm;1`%K{3JEyc1!)8ggF9LEK6 zD_3Oa`;)gQp^3Lh-_syh7A5CQ>5nQvN2MOca0>NJ?*?C zKL(A%B04;Gy}9)zGniQ^J=ssx)8ri-jl^Z!EGHp%y3+qyETVsi`NT4NoLyij~PZM2I%u58K$K) zG~H6u5eG^oJOrppv%A4&t3T+daET3WLqyx$C0I95%P;qjq3DBUgr>`6{b%!Yq*mh9 z9B;-KDCvMc>sA{fP%uT1sVkeRN;*5=4SGUZjw*)}p8c4syUdZk+I_m&H}g=VIeaDo zkSeo~0TTY~d*lW0$33`813=W>f1b!xGO7_)_4p!!l(BioRkc&gd@DpfweKVy;1PGW zDJ1ii-u$^QY_aZ>)(?t^3@6I#5@Q~55xzI}l5%UOj54DZ_t$GW0(iJN%cz49QMd?| zp5-enqJ@wLBQlrliON&%HfK@c4el zqz3-&(Lw+PbfF!R_~8CK)iG;Zn3!8K*KZ)ubozW9gj7?R@vLPTNwVMZc-kfOWPl*x z6)~RFJ;ks2t`)%qy{wV-#N}ude17Zya^w0lWn+kh1ye6oTklWtu8TQ5cRLz(x}C!T zV%=a;`m|dpYTO(A1YyU~zS=|GaVIXW>Uw2imtzFtRkmBy6Fx`W@~8Z>yk*p`G-T^; zbAIt|K9wBe7RBUGC?IOw|CnKU)j>K4uB&!hU|fHMaYg7^m0zPWo6iuRXyx`Cw8f!N z*M2jLXZTw)i)V++$1*8K4>5{Chu>i!E~Jgh#D~mfIyLUv*-aAzh3(<6$5JFo5NZIx z{M#aiPi9amEKiXOpsdpYPBR4iT@QcBniiGkBml;I4&bKCyaVkG!T!*TGZB~dPd(VU zssH^Fyg#DAgrI|gApXx915tZG0lt|IM5KRvGu|H#{Cbn(9t)2eQEEhS`2Uzk{=cok zYC7!y80fS>qP`(UbH%XX4+JI8)@4X&r(hR@Xt@S3Q-G%QpHalSH3K4N*JfUMscp{J zjU;o3d6tH9&Ft;cHwvTYLXfs#SkAD_B>OT-27t^f3U-B!eOhH?GMh9eeeeC@g|-VO zU;(`D-!H^K;L4}lU+o$DCc2{&HjdOJkvE ziq^khsPw~|3_hM4O>q};*H4E-r#l+#gTJi%nE|&Q=KO-mSsDUafA6ORv{Tr{flx$z zpG86n!y;)gCy#3^D|M>wxCHv>-`;Imn9;h7z)8`FQ)YT;`U&wCR(jGS};4_bu%GqfyX!vn+MFikUH_Cu)Y`?25) zJ^I?owME5s3}hMqS?<#hfdx2H@OSXYe7RQAmA|Ml%|D)>8mvpfU#rp)lp9@Qs}I>e zT3AlemOv$lwB3<{&;LvUJE9!Mud#N)LkpJ&l%Z!BK!tR%Cy0%?_>D_I(NO34aT%y# zH| z^)V#-;i2RVJdR;U+U>UlCXJL_=mkK-yvqUYjkYnWlfBKYLurrsm)+~a5rWz}O1O@f z80*D|#Uvvycm%Ef60`Gejjc>aQeF)1{hH{A=u0a2mXz%ER>F0<*C1E!iKeEn{r4nB%GA_b({3Aivm zzg@hDIoPrLZ*!q9vOQ|V0fOeuCYDx46-~D&yE5#p+d^%E2WH)c7yB%a zUz_Y*tW~4}*3;?c*89_N6m#iYNhB{0Il(*#pMF0@G;w$6cXl+kV5dOF!y$#rH30uw zV3Mjm~-THC^L2t0@TLKdd2M!)fhjwNX-)9y5AZXx@P<95Q9Rhk<W1%(k&i*;}ZuhH{;u6(TimqYmovrBIAO&)w7nFqDZKC z+4WTpBaEF-IP;<%0DYb*1TdgGsMQT_7XeGYF}8!p|#)^ljnC%yi(5|79}4 zys;?b9ar_SBaBt%b>GH9=qSgQ7&0naS%f0Tsn(R-Of=DfTGu{O%Mjl3V(L&a@bolVV&QUj-LK6Y zvq-8D{oAZG5Qs^u`Px!n6qD(45fL3?ui!%_WJ{m4H}3aoTr%-j+g3;iWt~Wj>TH=* zER?2f1w>jOoZ4u9M<(a59=7u1R-FG_4^igF(uwgslpJg96|0*M`JveHh3>vNzz&{= zXb=KpJm%b(4)US@{C)_SukyfAyFkG8jJj_kp6(S6{(_%1JNHM(V*SG}0k>H>meb+| z9a*H^ekB=}LG zm%GJ$jadvQCR-brMPLdp?@2rR$u_X+zKr{xzr48)idDZ}xOPa0^@_%65nCvrzPW#T zuIQ1>CeNNT^fgVJqi*xhF^LP5H~-uwuoC9#y$ZUFOgVqJYJ@r)yU;v`mI^Ih%5Sxx zS>RPLZby?R`h6-#9Ob#G$*@U~{@c;!m){7$OM4fNw?)zAPynRtFe>GlH7+3)H;F-J zWDMoVG$;1 z$oH3$E|{4fkb*phMI6q!;8bqK)$ph;0vd>0qE3FICfE{K0lyLeY&J-?zp*LTeR%s$ zxGwRVg8X-$bt{~h$x6rh&lw*xcrjnT1dP}}|MTZRIt~9S8U6LG%ErHD-+%pu&-8x{ zdd$BAkKOFQ9=pF@>Lx&)V&X3JTKQH4KO#sVH62-Km6tARR-CgdiYYlG3F}3P^W@ z`0W9Gp8xxv_nh^ev%YoKLYdk3-uvG7y|4IPzw6qBs;S6A2%rRLXlM|5IVm_A8oE9j z8b$yf2KZEoxlaNvpud32N}#>(rCkPJu%3%6i=&~HMiQPGVT14ZwsMbNprH}Bp#Gt^ z+vGh(LzD89ml8*~=&z@Qr)fSp{rwBSZ}af9zFt*QWlIdTJl)9C{YH7yWW= z__+RJ{q3`jU)7?gi4})BtNQJ9j%^u|!M{h4FMdpz=-Fg!Ml~%k3^NJi&7yNS%8|@b zwG{3sl--rPUX<~>o4jYa-#6dCBc$wkAANzQ;V&h|XT8a0!dDbM*vrbw3JMGJ)dG%u zf1MuY&S^)5K8PT%dxP{gBz64HpDOBBn}%O93S@)+H*Vi%%_fc?;xTH*g2V7E*@$WR z#T3YV9VG#zBRSPd1%1)`^! z5hX-(bKzWU3)hrn=@_A{&(l0*x+vO-=6co`*=uKg z{Fb!b^6TqZmV}50&HN!h)4Wq-iQ0*b#gA!_#@b~L5Du#VUUJc2d*mGL0mJx|0nVgj zk6%DIo|{r*P2qgP_Rquo`_uALy#Kdn+X4gUs#|zT$@Z5?-`BIkuZXDQs%D zSL=v=twtwc8(g7z3A$%9T8LQ96$`ieHgCi|3b&Ur|Nd znkgNAtJG@Zo}vrmts(WtkCnP^qdRB1?k*yQ$loZr$SyD>@>$3zTv7FR`SFThP+iaP zdTS6#WfWYt>xqJ7bZC)oCDM{XB{Uh*Z?EJWUYaY*J2~lV%IukrzP6OT4y)2l7-a-kTv}~4^-~=U zpG^)HQ@d(5(=(PS&T*j?J{kGuwl&D0{fbm0ajE~b>i+f@9Su^#E8=ogJl5Z<#x)%& z=;ez@$7F>W$*^8BM0~iNg(RJDdo-nfj=4(4MrDVSxQRz@D1}8O-iU`GL)!Ur%OZoe zh(%`@JCs3**Z6Q_GA=GoT3XukWN&%A^oPLDDnwGIF=cG6`7xxQFq$;x!#`uYecbY|1R8kFSMloy3dm zo)f?gWxy~DI8+Q3L=R=~GYU9#)YR~kBIS6g=Gd6;49u83j9y%LW?0PcX~65iYI0vG zuv3klR?B!V8D3MDmUv+2BUrj6Zw`D0=FK|o7j<(M_g!nI9NMuBg7E+=0f%DGC8 z^o0_>F$teZBh+-|WBvN3r@P~3C)+xq*C#dBjR^WzoO3!FaB{t$A7NZgneW}UcPZ3= z2Xuz4)xNwj(5@}+{IW3``z)KKWMsJUC{#6P|62KMV<29>UPotV>ABAFw?Er>N?xHgF*@3seM8zH|DAn$a*cU(tMzWBigC$jjps;K8u ze+I&odB7%8(OUluv+hR?gXyiz4Aix*Zo6VN@v$Pn*VK77#d(U+gd1P4zqGEz zs09bEV8z;w{PXUY>3?Mk5pJZgs)M}`$ZnD zHOpT2l!6gVbUnN0;bgQm(}eVp?#1@Nx&3NY7=uLQh=GH|^!xP;fB6`;=sP4Ll=>dq z(r})~c5M_$kH9|cnCpMgmC(Uo4r^#k;LmR-y0R-A}OrziHZ>0UuqWms3m9to6eI`E5b1KWZRi@P8+W*)Lo>$)q~ z88!bQ<}=G3rh39=0r)J_z^)%5nTPG{a&8(M1xeAvMedHkc6#@hR|RpZqh2#g21UV@ zqaODMClj8CgjOznz1!9s;eCD_jDYcaUE%6?yV^cxkUK?h=Y><;!gJ($RhD7vOJKwK zNjSoI-0A1hV)LWtDbihNO5W02k2_&#p0kJ3-t5`00ys_Gp3`*?W-H3OGP`WPds2P6 zQ6rz7Y0%(>ghtD3z9XS~@U-aTTr?0EDo#9|3Ulc$%%D$(MW-!68Jx9;Q|?G@?qTd_ zKq>_&k>_xQUq$9U+wIp?yC~q^xeMoT0;-RRhzyHDj!Ew3K=8(WVpXYTn_u0ZlIvpM z3PTBP!d&yE$y2g3+Ma7eo}m?o05SFx=REr<`kS#$MS`;a`}u7S&hyjh->;3?>j^z^ zu14AWv{=lVgWY%Tq*h+ho~vP)a*vub`PrL-+>2UreY+qLNiQt7%t*z#gu^L?_mZ%! zEmfpiAtISjB$Wp_f=%8>1(#^-rMg2+uD3K*C&+NV{Vg*?^aF3)FT&7|@PSAqBo+LhL<0WzAtalByfmsfH_z(%kj}Y`^5IG)`=>WR z$%uTdBI^e@OVGjdZTPi6X`uDHB?Y=;%wV*rH>2D7&W704GBL3@FnU<{)0B_q3cgO( zW!axt=;gk*J{eoE+q0E=2PieE3#Bb_%ANVgjqKZlhJBuLV2%q5fX<1UZm!0#L!X-6 z^!rVJtM>oYeHT>@$dpD9Fcp^_|5y#>Bpk&z?-{d+=|nXcXz`4$rFtF2l{$aPln|YB z4xN^hZp7DwPU+V~$6h}=oroiS%{#B^;ukxeaQ6yj03#&Xn=;w(n@ckJ5$|{5(tubE za|Lsz-`yo0oB}#=l$x%N2KiYzEIJSAPV<=OWlgYrRVDjG>^HH?K744~dmc_2A~ydh zeCOdKvl{?zc}dPIvVI*9Mrh87ye3-}|B1EN(!I9|iN=rk9?lYyqXuWzKhorrwJ6J? z6ANf22x;einRdjIFXp?Bvu{|_^n5E-0-sUdA6vv`OD=EF(6^6CkGz=|ZHxhuj*dj< zO&5l^AFSmU6=gHINjPe0U888EXbC1^*K98{=?>^axQLlRS7g67czfF!=;_h@dKsA@ z@FHhl6WPQ#r}^|tDQUA3BTC+_WkS8;6ITx(nacYII_5v)Ih^CbPd2Eg)*jAnYSzh> zXq6B$y$vn&SRk;x?}HPC(~p*&k!f$G^^@$kxkH6$mbLKd%e%7@p}k~T6i+@16AoD$ zPpuX&yF7kfC|7 zw$rgfZj&@Kx(JhCwVY&vIP`2D8KVM&!7`iCqHsjbM&ErcHyg_1_HmGJ$&S}oAYhTP zf%ZHqlj5D1rFRV=h(gOhey!rq;Qa2*4}^)R{S7WI%0oe?wUREetmPlqzDmnXBAaO6 zH=MgEdYmlhzCqK+bAsNpw$v7G$-8~*b7hSekF#n911=kx>Ur=L>2nh!Eol5h1GcdH zgHNe=zb2VbAdqsuo(;WUwmo{7P_R7HPD5p)1abXXn?L=bJ(pK@}{ zyo^GMmpoHWJ{)zPa%Jb3LA!6pJwL=wQ(aw+jL3Fe9Yqqc<)w+dtxY&s`}#qiuG?`; zPZsv8|FpB*l6m&&=(~WMLG@eBgvddnubmK8D3<)atHjqulwc`xtGu=6C#%RSTc2rc zq7+C5A3*%{>)fIh8o%71R)Rs~4tXu7>r)WMD-U&ugx!WnLztH)%FQHUJj}8YEGB5; z!hTcDXCExikAGf=hssw+H-*}Y8L{27no8}nby8O2|MBv!T=rZ?G&6EAwLeFEtBuki zN})}Wt4<15hyMZ!@Tg=Lx<3wtDeiNC00bl7u~+w_r94az-xc~f64+I0=kIp7`QOf&;TAtol)A`HjJKhxy1$*l)V0D9tZ?AQp6Z=S?e-Y}kr(h+yi zdoiR0S=VJgj2~^x`I0ZH?i9Sbm}Vaz4BHbveAj!0vNBr5-;$XpKqcit^lPN&=>gI& zbZtII8hWrXsi=^O@{(3<*{**jEt1} zE{q(NLMKaENDRLz-UN@Ha&nQgElIewU7h#};c{7Gu%LC-EDoH0X}pw~LmF$}BaE~J z6}`~AEM)}C1%@~%`4{j_n46o+1qOo8hI6f){ey}cavwfkmx-|5$-l3(-+Ag@L*vDmR2Mjr4iO%x`D zNCp!}!-Ff_mF`^|&$7bGO|~cCkVZLvYW2JWny;P8tGsyizYcWaMp$N1S-O?h$Y#7j zhA%54h4O*KNb$r8>zY@HrT(NK?z-HNY$P%u_Bz(SaG;@h^+}t($8jy=8}?LSIaRTC zc7sUoI<0BLmk9>@8@u9e%4gq{YvBnnH1Ma43VMRqu37fjpLZO;_0w>o7<4Y`<4ti! zh@I)!d%ApHOtX-_H@CY+MTtnkJ3rZ9m4&6C-gei`;wS~0@3|R$J{a31b<_54kwcX_ zrIc(+31VS{v;MX%i;*?DmHMtpm0rrCV?1-$6>?|0*L8?V%)T{ODLxW8n(;^SW^GoF zjncC$pe^%u$gubKqzJ|%45L#XBN?etc9KFV3Ma1i_xUN(4}UN{L6NgM0D&eBy-L0m zCl!sek9F83ixU5v)XQQP!Hsb*q5XEO%nM6XZs0S9O#ie261+Kvr8HsGZ)U7C6*hFU zD1WQIK&~n!NvO5-XqMS@&Q4JWMo>{lxo7aB$cP{Gd&16jo7{T5ueZq3nX{gJGSAE@d#dCm(~IK@ zX`IJe2KK&J7~Fj5v^?h^d0bH@#KdPXXclF~VivMfxa=zKWFL*wmRC2JBO8U2=DJb+ z^UmG7KzE)Ncw(DvrID-fH%0B*0)y`}#XI9cKnJveAlyM@Oo6)6BVgD*CA#LV8+YHrvSbQ8s@Fm26#fy%ji5+1^>Rr?@ zQ4vod)5-;z3zB%!924}$>NIKxe;RitB?>v)Rqq_NLm?S7o5+ovtr(kT1t!#^U0;=I z&~_}KU7p%U-9MUj%j`x{&C6IGDt?w=ED?xuq9eNr5e{)DqpxbqE};SY!pqldmUael z@kZ(|_VQQ@BIv*Liah(Zwf3^Zc~N>Z+r`U>H9wu~h}ZJ$DGkT-r}LU7KNqB>cnKVb z5p+dYd`UA|7S)|Qh&ySUODL9;6JJ~2gcyGg{77H^4KO46AMr2(#RHiAjuz#()T5#d zqu$hIAZ2>$6Lyw_a0g|)Aw%CMIeMhu zc+n{T8&dG&9UdK(A*K|#cJgf?Yki`!lKw4#IyVlbO6#$%M90KDd8^eILdN`#3;%E_ zBd@bar{ao)lJCA|r0FIEWeObk@>~=~Mn>fPDFfVJ{nW3W2l4YGXe;Sz?jxn@s?F(! z22Fx@pBcoU$r`^Up!YfL(VrUWSrV_@rtliIh|E*ixfx#dZDMUhq>)^@AK#t#(GSifpYT34F$r}y%Gy^T*QLZM5%;MjSdhepbr-(1PE}u$oWOXt!8AI6>7SC`B6Y zRTi-$Y(2ENu0_Wnh0A8b5=6qqsYQfQO~G+K6Mg4OOK?0EQe0RL^QN3XTNKVd1LPe8 z1X%H5f8G*5UQ-M+HIl(bg28nB|L>h*WPt2HRKJ(&N6=^at#+~vpKexB(#xo{S%Z3! zOGgBdu&}35iY?!db>5$HxR*Wb7))P&e4o-6wDmmNmCt88uxyMC;DEqY9}^5P$_adq zUB}5Mr)-JE$?6^FWre_rgE$~E!jz~b`ag`qAs2(jZug1)*2XZ~QotLGijKBB+ink6 zVu0xL3^jfG_KgdWVIVGd1yu2~FNvCJYTPbcGwN9~jw{2a4`%7@=+Uj)ce2K7pEUgbRm|EW zeh=d3$3VYsjMuyY7-vQ2b@T768+ynlQolq%l%Ah%>Mn$H)tll=ib1tY*FJK_v)pg3}Lu;Z%E6{N5W#KL66~y z$ubi~MQx9x*c=D-Gsev%cue^qQB!cu0cP~K66SK4P81#J57Rm4O_{)z}_YKC8 zko(gfF;^+!W{u|bc2V{v+xM>v*ask(*;Ji&DTfK^)50SISRj6fV&pZ z)*j7;%O9g8D>6-K>;jyH0v4E=Ls(9kTWdGOzr9Xte{CA975;`(PeB3m;<=n^HwPjX zLfKOp;u3>i5t3_b(JAL)?G#Pk^0S0uX@6-E?p){O;9!i4Ep9kZ zj5U7@#7ZIorwJ9!z&PH%eT$6s<1KkTxN(6aVfU=QNh#0)JHoMEkD}|djrW!Yki)(g zT}BEY#@LIgtCMj^gKg(8J*_*OVvY;bEYymIV+}Tjk=N8XuF7C%t3L{#l1Cz z44Dj=N5!tYU{;m2^MeT+c>(5yQnFqkYj&oGFZzUKyQp`67JelJ#%ivALqkO%F!t`V zzEtHd8Zd--JyqZZ8%O)&7hm|0LO!6&_YD+gw*MR{dM?)mV$z$v>-8wINMLbqNR@rz z&7OkfKzSKR{%FKkjQQY4qkKv8)*SgDAa-&?rMY(X zeT`fvGoKJ;(EuYSG4pH1mQVXDhre|~MC7~B;Z8N~Nl`n)FInUusw<(2vvYWni@eH-N<{t7%%rVcqt($OFGx%Of`$-?OA)b3^3r*cnyae-M>`O=ft48p?_bhg6kor71?dnlq|zVc z5tv!~W?O>g1u)IF!mM#sh4&Hg_f<1~gmMCC7cG>xw8gq^-_&}?5Jvz6l8D$%Xv7)> zen>c&BCtt(v(RVKzWX-vwI7(#Tr^b>7Z4fpUAKuvM`NL>5p)UfqZR#+3td^nJ1aC& zZs9WU3B6l1l3h2amj}tB`*N;^6CbURR~Q6Av-pzs7iRp6c#H%oWYgZZRgxI&{y29Q z2n74WE9$$iD*%4oP1#du0g|fFMH3g-d2Y-EdFN7kTG~#}yznZV^HT_d8CZ|n@yT*C z4S1@&LlZ49E?iC^>o$ZG8p>9rgtr4}o*USxb3c5&({mBb;(#uZPSb9yMIfW)oDIMD z>bx=>8cTqp3E+-EP+iJ(*D3BaoNfqSeywz0*NJ=14`^pU#8oh0)5&@LTU;!Y$jK5j zeECffT1r+*NHnwTYq-?f^C`}t;1}gIQIqXlsdqUl*S=MR$39poUbrnNL>J6ILu)0_ z)zu}(F$a8AX~6K7ZG;o;9%}sJm!ueYup@k|AyMwJF5yqpc5!pRIAl{B)QE#x#KT2K z=5xbc9r!vLYo9d5auQBclRvI}{tSpX%|;61z0I;2{nyf&W7f2NX5Hl8px>MC5_XXX@usaH&s7Mx|4ZmAAa|+xn04XR%#G zr4tT}MnO*LcN82vDGR49p0ileTp$B5Ccc$C*|_Rj@?b4On6a4|ScJqiuV4FP@~)U>hSD*UbPpUD79e07 zm7S(tf2JU7fjP_U+`z;oj@rQiQ;5_&|B?vkWH9h#t+%I#WdR*@_H^n?6;6u91p-(8 ztq$Zq;MTxm4VXJyTXS+$0--UjsCn_cSdmdp1_gi;uM@D~0kXbJeDVOJne%3m{i z6%eD(4_tt!1vy+W5H!PJOc%{fjwL)O%%# zY>L#c<*qg1g!??X;tcyu{^7F}SFhD*pYV}o-mD7oYXn%7<7kLsKZE7>7Q&a8U&1^X z8zAO}Nsx^K#01zwgEc3wDon7gTuy?bR|x>hp{z-{&K3&HP55LlNrntmd%N2@m>sCz zyCajz7V;7RYfN@wU9n!*mgoEgy}qt*%_~v1bd$_Ik6x}$W4gwkYtv24m3PW9_~QDr zTPkmyqM-Udhej(c^FL^e?aJ1|hqo|1Jp4tFw0k@dl{WzdT`$at1{svQq<;X-#p-B^ zB~)O7=;KHEb5JHpLu;=v;A(J6z2bFY)h1>$D4++hxrA29i<%I;@g{rjV^(X_u-+yX z$|ecWn;$pkF#`$cG()Rozug1))=&FMa`|i^-i8x%H-T=WaTkKq+{S7D*ZaX>W3TOf z7>jeI;-}vHx$4V2=*u9+37ZsykutgPB*b?yS0*YyHeFx>l=w$uv8zH-YJ_* z)lV*VA>MLb4{=E7l~%ML6R9RJyvwU>>sD5Jwfclj>1$Z5gwS(%eVM2GnP^WCQ6ILP zKl`=YI#Mu{6@Rg2J(L5?zyz>R02JKDTXPEdP8b0gXy)V%;tXUa*hebU zLqO5XF+zI4Ctx{b!M1-Et7!-H=jA67ddn(WCulZf%EJ@+-k!I2Z_gULXfNmbq807M z1ONlCE8zsbx2vdWoybc(di0hR85A7nHOJtH%zJA_3S5QacSVS5J7d{<9#jMbO$q{D zCG2!Y1cX7XAmc}I2o$0@I;g^e`8g)A-T1OlESvgcw!xXWeq@G5uFB=*6hUWwm)SsS zYULDzV?47EN&Z3ZXFUloZy*HGF1|5z03X!B?@NI47kJ80tl!r5cQ`J-Xh8i=A1ldA z@{7Fvh>}3jx1aTIvdclZKpaydJAq%;LbyV2CGMvG#DIXZrG@WclDwXm3S%&x9&8Cx zoLtV|4Qxmhjxqye1sF(iD6cPX9{GEn9V{N#kznTmaF zO+fp9YD##GlmE-bNWuRSpJ4X3IDlDg>2v6kgXI-C_(_KF9utxLH2TEuVaENx9W|q? z_&1YCeeHMldVPiHy8h#LRJ8F1M**6+c%DBwVNF}JZEc@qtme4_b39EGxmSFAq_B*m z_J?tOKGN9lJ_})IIG=gzK~Bdn#wEyu;4z%g;9h5A6Fxu-7qgbmVK;dZzh8f^LH@%= zi>$JzmyyZB&fz_;GONQ4&%Ff(2AP|mZ^@jWHIKD@AnlVDV&1M@>TYD|hrX0Kt1xpv z@Tp-FI-h3t9+Wha5)#~|ZM*BazVE!uICI|=5@c^OEtr~E7ZBzkA!M=rJs<7dchr#f z_Qb)l$Ak)OkYd(n%a>pV*A#*iZEz&zX3fDOQAO5T$2N;r)O58~aDCHhJsV?~QHZQp z4V_Bdg4^%6aH&O=sOg@ct;~@m_6$Z*lee2eQiI!J*PFy^h2CLLr*4k?*%yQhjwX{KuyqR^DfD5>YI!CPy^vnO1#I+94TR`0s-E^~axbiM$}a-6h*^PuJ_sIj}`uWRBh z@fHd@E1GMcC9jI=Vos@X`;X4t{U#dp&ij~rZ%1TN)6)HxQwvkUs+Z4EeL|YaX=kD? zrby6J9PpA#cJHu`yLN(Glg&isvdl!W8;DtOW7FQCqolpDP6;-^cc{>EW)F59yk+9# z*xQZrU+eux4R2_7VIL9d_z%<`oYc&vIGx-=qin@aXd%#%G~{>sIzD#Ozs}M!+vS?} zz0s%rP&70g$I{Bm>i7At!otF$G9G~F z0obfMx>tbSulfL_*@}!7d)`u0Qv-UAa$1H2eTJHqj3P3mz33@HN=%H#@4L#4>7xui zRwD(PPbYm`Lbyh1(#`~>1xxC`NO-&e=}jHXt%#fuoe*4UXSqGui0kB)P~#di;UAx0 zh41U_EmyG-cw5zn+sIg;nXA*NmqK3W863%R>qvfoYm29YsTNTX66Q=QW5v;G>|jA! zymC@>XW&s$@#p$s>8y&W)RBfm@<`{+DPgCeeyERqtQ7CT^r*&fNsso{Q8ctIx__Ve zzrO~kPD?*o%r0SCLWAjX&X)JQHWh{erm)=Ax4iC7krXVKlY)If0Z#kDS4{on=l@XMoTU7 z^kGb|7Q_Lwunz5TyyTsX#SEsFJNibc?`R|B-2uk;CzwUPG}`OfRqZ%BnDD=25^X-pyC8_l^Ih41x4t9(cFecUb6`D`zz60?}W`TuXC(NR!HQevv}C_Z@r zq%199H8ht$(q>Fq***wY{OCC=mbC33p@XEs^-hI27YV!H4mgpKEEx1Zg>rAa|=0 zH`hG}8oyCP+@J*f#kL@8$$scsBkA3y+VOrXJRN^JDq{@bOH+hgcK|b{%(s|L_6Z_x zQIcgd{(ES?he*cla6?&vWbS9B<<~NhYK(tuFAoayPWM;uTaK6TJo&7YzSI4(}k z)Vt4n=i0&_zJGi?<-Te4wajF9@e4@GO#+q?)KxjJjcwS58lX+5!+`EiukeUEGfMBTllU(U0Hvs+V@UxhfrMoYp~l`ia>LmZx}|z}0)@ zl|c-tu8-1>13m&i_4cE(R#fJviznRbo1j zS^A_Mj(+7ybuLXb*1H`RudJ%)<`6N%N1lLm?defkB@16!S)mtpQ$xSvlU@dtNV#q# zQfQ|GlSIhSEF6?SrP_CK0LR0Shf)=C3&%pOBCpf+`>+-M&Dvd5;uO|c`uE~KD0tfV zw8{xHrV?`50(4wH8qejmCm%imQVG-%q%#HOYvhW6nnZ-p15yM^5lbKGJ$fVniWp%L zCZH||r1lX$H-NGOW##?&Nj8!mBqR{D-EVH|K5+g#%=>4VPYTD14ZKG*AF2<8hyp6Q zm0T6cv@Wjp6(};>`w0{8%d_41)SIVEZ7Hs$-1Qqm{BmqV@;hKGfPg=?fy zuFX%%YX7-+^$0NTNcaFKvp|()g_Wa=oA_7oNvU>mw#a`)&TY@fsH&t2y$wS+tPGn0 zY6^kG?VQxWR_VGHGJRvCqB1CO;buu}xU@ncV0UV4rbS?IvKm9ggLJ_> z8Ae-Tzt9DOdkr%4J#5V${x`2)NxNJ0AI4P76*Gdph4Pw`QuH1zCfT6MePPXne&&*u zjHVDLkNZgA6~J<{nt9mMJa(>opNUITiK|&%s?BDQ<@c&mppl3UhV!_iN<;whU(HJ} z>;SI~p{_0-NTd=`O?>080`WAFa$UbJv_GtIIO%+yf)V4dx&=|eLAK#}FD;A(=&LgD zrTKV-|BPyl0#>F@I{X@<SP3@2$~C(eiFfW_gN3Bwjj^8Ek8H)1Qaa5 z$!!0*9LxA6kx$U$h@0Yl)k;C(Xt9B+bksjjp&k_b=8Z}Mw=Y4Z%kg+K?LWg38G%Y2 zfKocuKwUtDS8xAl*C126xI8cpwIvROIBPY403FcdcPZ$#E3Nf5r|R1U94V}`TeRw^vU z^Z_G_a9Mo!q4!&jGZ#gZxU*WRzND~HQ4`arE)5PXS0>gzKw(a~t=@(S4HX+SfD}V; zn9od`@Xrty!#MVBnl@n#3p5NaEt|bdy#c^)3drF^@TKe&J28E73M z&v3Rm@W6z=BlQt1i}Fecmnb+upUft6O?n-)B^VWq|p}>al-@z znW{qDSQJ5*D)kCh__Hs~yPeKiEh7;q-tnleO?ETOrJYbWbp-hyr?qkMK=XSxQy#|) z@ft=%m(Ovd-0gU00hpL<3Ct6bO$W&3GR%v5J3Eh;vLf^PTcj!V5E>eifzy{3DQk2E zLUZr2>p~pD^vQUM5hxQ!91&a&>nftZ^Ry$3;$~id&><@OuCzpAUrrCL5ajnGI#{QGu_1D61XL98ke{QS&ivpJPa}>x=f{Oag7eyBdRl}v zKzgn?vm)4ilkBhN;pZ_vPjy{Hm7iqC5H4U)Xj=4Cb;tS~P(wk`JW+ z=c=jX$-z3vL2SaC6hWRI(DD>eu|K9qB^`1MOe!cn%s0$MBA*Vj=sko<{=L`_p%$>8 z2W7AMhChocr=Z-1XcP+nEV&t<( zRoEG`sHitc7b09F)7*_eLxczg1ug*BJ)2){{r#Z`tN_1eW6v>;pOz4e(piRk*YmbW zwnt@Zn=+a)Prj4iO$8;^(qe{xEyXLw7RU8|cn3g(4r#va-=T;)wz^%Q;rq)&!332{ zPO3d#3~wwA2@4KJ4b?B;rWt^nbB`|3ri^9W!s3_yF`p+Ws7bl_xN`>_x)A|Xf4(ht zq&pKn6A2Rq9t>qcyb9wv^&Z&I%fW~myO%RARqn{x(aGrmxB`hlbX4-lV(ioLQ1FFk ze8r^&xc(Xd^|G6ZfN4nm`r*08EY+n2IV*FUY4k08Se(;LbnTY5?df`@#w^j_YbSu> zZdfbp&f^Y>6gj!U&K?XXl?&|gBWol}Hqr}-7`O(~(*O^^0WiL>N*?7WKw^Y;d+z3Q zzXU~K&&S^DAy{xO?TvR)Jt!tjbz9+p5k2Wo7YAN0lL`ILLs;wsXX4q6&kpj@;Fw5T zTW5rp%mrHb>|6+Z+%`Zfa3lIo%C?EphGjX`a* zEeXk+>uhJ6?S8zhkC*^d6R7{m5Akghjz)jXq1!kBx~yp{nL_+Pt|c!gCx}3!8Wbl2 z`)C2nG<8(%ac5AeDWAayuE6J}$pkl&kJ})sm+$_R%MakjR0351mJfH$Wn$pQ(tKjk z@2OSIwg;Jr8fB;gm0|!(iv>0bfr_u=<72?b*#RZ{eLo8#TmS`<2}Gdxrg)eGx6}cH ziB~;@s`wc}Zh{EL%BbD(UasxlTOF>FCZfYCp;AF9j*Rn|#w-?{?6PBy6} zbT98eJFShEW&wkXpy~sFy>!G-(phZ)x`vEo1a*t;XUD$B$y9&+`SOzvUrbz`N^9l7 zZHBzdSc1KX=&gw7NmitYLg3eb8YhVawp+;ooKuAn;ruiFS;yTzp%>uugLkLUCUYXJvk&fJKr@M8}~!&Vst z4!c=2`k*7ILPZX3-QpnYMdCg&Wlwmj`S2baJPgAv+L%SIY4hv&2Uf%?<~J!;&*y%D zbIOkca7Xd*v3Q^>cNO~`HNyiqXR`Q$DWtL4qY%)Ciw_Mv;|N;*cu0@J22f-GWF_fu z6C0_PY65pgd3h{B%pYhR16>?HP!a z5DDm)ozTnNdXojPv9bSYX(~H8+wJ=e09+n7+NE~*nK=G|u7T)(pgMmcIO$ez_d^DD zcBv&vjgRWACqGTqxogbc|8u*66>)zYP$LvZVDJaqhG^MLd@Op>9`TQ>M6YlI@RO@{ z3xneP4=ffdnyR!h0N6Eui{#G&EcWL+V*qHBhk0OJC}onEZc22tco+MnYsW7iX=`f( z`iD6B^I+nYC*?ybryx8k-{_c_D0%_Fm+4y9yA-g^h)kE@Lw}V{-bzET{n3v=c*zc3jY=_OT}_O~et@a-%}!D`GqU$Y`i#ZVq}moPjZ5AVW--ntgsKo)X$HvX1g z)+te~ZDr7(V3`1d@`LpqXcRat*AVMPO_?7L39y4Yc@WG6F6RKUyO*B^HRSND0xBL_@fScrJu^m>;Vk@r=`5? zZ|Iwcb}9emWo|$UAL=<$P{4rIFtmaUNTzn#c97@q8G;i}Hy^%7Hs2N6n2lohJ$f~# ze?b^w;40`Yef-8-4`P#d?{2|_F8CsTwQmyc7I<^Zn#cTg3LLJkt_uqbe_#orpOYH} zY720ZW38AWx-Fm#%>J8MuKFy*b0d_xUH)q+2xm|tOImT2f`Wqb=26nieWIIVTC6h{%ij z52exqRyzq`k2C`6i{Y82d`lvtN&qC&_2a>E)6)%sokj4Ye3IaQLEWG>FE#1DKfy>k zpEk-g-_|KlZ{|V4HO|G9FrP+J#H%84Zf7r(4Q)MDF#&KYhx$SD>UOQXm_^=mP$5Mk zyaxjA{*}~(L$O<MO#SR{eTTA*DkdGKa%a1fuIO>GwKFAMq^6x9FvbvUgNP4G`s zVR60N?w9-jHhtoG2nyqKlq0n81O70hG#@~CZSlT3C=B_RHMOJWHf#h(#igZ<|1hd6 zRiM1NQ6*~t0_2C_{vDVM&h|Ngsu=_?#^2fH0S^HiKOpXavYiIAyIpo}YQkNhjcj3_}0nCsU|lFbR2Q>1qlBy<4+>~)6zQ^F#2E5i|JqECqQ=0_ z)P(J`^c2@ll*Too{%(MFaTCa%5q_F%af9r0SKx#%!Bn%g7efE=>i^QAXcx(Z|6>_| z{|!O_bM^qk-~Dg&pK8_iKScj@VC_1S27?0S^5-HSFNHF zhw$+(oWYBkgMs9~hNb_9vpf|+sh$|Fo)HY0r$?i<{^?~#*!xus`Xe_(OzW;Zdf=13 zqjr7xN2_kz+Wdplb2SJdM{z<_rCxO0qvUr literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/hierarchical_alltoallv_2.png b/model/train/yoco_moe/sources/images/hierarchical_alltoallv_2.png new file mode 100644 index 0000000000000000000000000000000000000000..c3715f4cc6ff88fe7b8d617cf65eaafdf22a7634 GIT binary patch literal 19435 zcmb`v2Q*z<*fy#JK|*w*CTtv(8rXjftKGHPgO z=r7RFFhVY4fGan{MpAGC{e_yGG+J>V%`*6L$y7>73Jt9+3jf3a3;f2lmDd7K5VWEG zqj%cm8Ka>o`oLtQ9=hnRB}W;N4%hA^Whj$n50aSH;veXXj-1zDH$xz2xmKX^0}yw zA3u61oR!wbB9RrHv$KYY#=%N_4Iz~lBmDgQTdapXcC#{jnvWlU7gb(BJ`Cw9c(&ej zm(;bzrOcDB*E)MRxHj1{J-;$v&s7XER#uWz_ZIQ!F3e}XK5Nv?y@#W^rr7g-T!GhW zW8!!vTQ1CL^r5=W^|?Bxg%9PdaQ_r=@YOkd2+ zF?N)gQ{E2LPtUhcpPxC0y&bxTJv+gg|61rpkZ0VC-NEqcSViznvI1_Sq8xkOtJ{SO z!6`kMJy*sE#iAo`S(Trbuji>Lou8eKWd@DkkNlJ?e=0)miZw^;0KCx_R zDt*w;Z4(sF2nfF_UHm=Sc`BU+_w9$s%0!0dMmk{!7FkwNGj+++_YpzEU6KPfB_Bz3 z`R17W(z^oIbi;z6_xRrV@rlYAKbl+AGA`!QGdFpAlL>42( z+3f@_9Rv%CtQ>X1$E+m<8DAp{3rf{k@WL9%uRR?Y6<#m(vQI2Hs;v4_ zy@Gtz{*A^aNzJfq@Zq)a+*!118rikAwa)8P9UUEohKoVpqTj0N=IJO$3usd)J(yo0 z^Nv^UmB+-*rd#^)!k{8& z_GMkz+^~k;g47e%T5JCkEc`A4h(c)R-0HxSu!RZgy_j=XUz5z-C*RH7?~~zoMZQuT zD>uQHjZ-6kIIKpBUBI3eh))ru)g4UlC9i6+k_C%-sJbweuWpw8mHV4iSSthXqh9yh zG>^!y-TkJ8emvHek?08y_04&#wRg1Bd0fv)S?ZZc`0Z$Uu-z5>j&H=cWy zs9@Ar?S7$lEgYwyp#1b@BWa*iSApo!QYPuUs=WZMZi%x^!Z3IZ^5!m^F^{HXQ4zi8 z4v{SD!$(+pH%M4I2oQ$B1=;#0%qn5D=&`e+1%*6^5khPc4mw!`Cu~vug-@S6QHmA_ z2(~3>q|rIR4L#YB_X?|C$%nUMT7{>pym=vca2?$MV;C|~x>F@Vv3EUb(RTMREm+U5 zGX|P47k4b^R1>vf&_#7}Nz==~Z*&27BV4S%gKn^6&#&Vons)*%Bu+1}0Bq+jTS>_e zjg*?Y^bgDCgr@fW5ZxzFuG_E_iTCbxD;R~wtlhjH@5Rc>`VCoCDp%FK@|iy}0kRfl z-#qC{E3{|YbBEMI}hjD zIv$RF^12sIi?xOvA1{1~u@Ws7IOR-8TTUO+CD8o*Z8CqGQ)TY`1nJOF=@AHZ({je{ zq7T@1H3Wv|2;I`4+l=IF*FFxtM~gavgW2iH_9xam=sM$?Z=Vy%_SMZhxaMX}^acdl zc!j=qI?B&o`dTPG(osW=W;#-=BkH5KYB$g^m#l3Pfkcqdt#6stvKVg7e;`em(Q7#V zj_k7C?4YRVY#lb4YH%gZuCUf=b%WN8PMO`=mJ598@SW^9LE=5%iIS3%XczWTu~=YU z_`=UmRV|!*g3ErN&|B~v%5ds5$Lbu}+ykJGg`I!)(xr#T`r{CmcZMttRxT=$)|NF` zW6DMzUkw(bnWO&`$7W_#ot^h zl-o;NXV$E9V3lP}16!Z%IL@0CUcor%Y_;BM7QS$johcWkhIk<3u$k=l1yV72owOMG zNo=1+)=6Kiyc0&Dtvri)-72?HTmMN&oy$7`9a~Q$DKwo`UnrP6{PVRhRn;H+DW*^S z)}m2@WtoeQv1CZ-P!{a)&c&GAZ@Wj4O+W|Os0mBglUl12%$+2IYXQ*1D>bh_Nq#zH z&_T`whwNgB9a(Sq8}tlAW~AagWs0G?*b9*hg8j~10ojU%5t2FbqVMHJH}Zd68+U7e zD}U8_O6AG(x&zsvBPiy?a-n{6AFc;M2nKjRh0c>FPl(+qVVdLh&Ng($Q>qLOF40%W zE4Ex(=&@|~@tdK|Wo6M(1-&-vX=%{c`Rcieoo(dBpWi@NuXgwJX!X*$CVHp|DJUp> zDc0%9=Eg^`EphHiQn;g5uY>NR1MX^#tTe^MQ~YDU2B98iwUy{m4gt%}0>!(tihOk% ziiWv&qWLx+($+v1*19KlNt$D>cGl~~-?7>+g%xDIhKj@6+#G6z?ZV>*5j6d>!j{3w zhf&^V-8fa)CU4{2_@i63FDp@OTW!Sy&Cp1xdu!2oq>uzU8(^-;7&w0<5Zf@8`3bk^ocTrNL%^ZbyZb#rh zK-##9aOzO&(0Lsggl#dJ*OjgfeXV`L`oVv%Y@5~o5=s&C6h(h}eV%4ng0DDEi8!sM zF^N2=3%OcOhu0MKRL^#WDEDc#HK|Fuq@x{H-Kcs^ox@VU?@bXr4T^~gDJmdnxoM8e zll20!RD#PH180gat51;$%g3AmNsyXc!)orL9Lbyc!q~I;UN!TcD&j%<3jC$?$O7;Q z*(}N6B2wh%(pbeU2s54zlC8MLFT(-v0Y%TI&#e>}aD6fW`MQa{CFJ$?#o~j#gYDGw z*SXkOixl`Lqq@)bIUID#(NXf;KJ(6Of^|u{;p}LI6vFDD!(Oe<#p0l|w>oxLRTx@V z+Q?AlzjTt>Dj(dsZ&CE-O(u(@spbRH6Zj`j=3qkl2dWkpNT?3W?Uvn;+Mo=kyFpjd z^D2iuJgWK~wW=(sWS2?npkqGJJxL_*<3i1M{(fIfj&_zwvu73`^JJujZ`i?Q!^u?< zhPyw8MQsPM7O~LT0^ca6ihHt@V|oDtF{++c7cW6RHk+Y1-)!1t3?R)aH=fsrMfafl zaqL>Hboka{hQ(A|`e+cR`-!}vV&t!(W$~ZQ;v>q8I?FpThO$XywJ3R?Uvp<~-wnxN z*LE5+StY8|TplfBMA;p;ouzklnQ7t86J}mp1>w-6)e$j^{O-NZ9xqynyA@yqG=4T6 zTkl=gDctZiOAC)(o2agkyU=zpIm`>2fKn92>rbmJzxl>Mb=;?9qN#gU_*JrmPLKB& zrASY<##cgLoQT<@;M16@mevP$J;t@!WdCs95_KHN;C4e`^nrw!Jh z;NP$KhT4KuY5^Ok^DmV%4xt@}6xz1vkTF(4o$*d$q)x3`$O@5cBx;Zc6j6F)hne_& z+bpENY|F*ufXSt~HIU>H`Nkv@toZ8s_&*C9E_NWAOaUY}3bj%f_GrmaS(?HXC;pM3 z14qy^ujsdTUkOd*70FEt*(~LJtY?Y7;?B6|gdoKxDpRaZ5l|x~PN%qbWqe{mdDlB# z9=#!=DWA`6BKvz_B`K@FALKA(%IWYle3x{g_1llZHqxrN_w0y4l8lox+4JVhYQcGH zKkB55<*zNO&*n3b+svSM1^t|aR6i0*sY|EvPx60l1cIXo&9={2tMvz0_H+>U@(CFg zJnzRy$?WNDG%9k*SZPp9|4@LKJE5!j>PQl_KpAl6$4zG2LrE*E;*dxI5TpnQs1w)+ z=Z_2yO2fjY8r{{I=)G54&#-@Dl5;7Srq$HMH|;6FJsz>G&e9qw&G|dtS(fi+Iw`jC zxO%_oHiJhMe()*PYIKs(XsN+_(iz$pM$Za$k`DGtva$kT-7{tz`qNnRXsa0ZXin%t z`jtKN2PZ@Vb<=vnU+Ev;coTE`aS97;{29f1TTOmjL|8718hi;B_AYdhzwbUQY z%K9>PK(jya=qf6{A&G+3^R~xK579EO&+y6-GS%VRu=PS$gC_z*B6P0L+WmmSY)U%? zU&x6Ln8H8ZawA;LV8yCiZ*A6Ej5@)0ya#YRA!Wk;2fUOmr^4i+>b0uT7#FFnSSgh> zCj*g30L&)~^KqBrR^x96+i3@3-5AfNpAa)cYC>7-hi+|oS+2xfso_a1AXaTv7@x@Y zwF;d5NwMnUkw6+xotTt#xGD6EBJPf+InBBWE1_RbQv>Va<}DMy0P2DxyL~2#$6Hvi z{nANYH(xI&v%CkZiA!*-B*|6X48;7cypjjb!Ak3+2@C=HV7(!>J|kzli_oeZq?xl_ARO+Hfv=^XTx zY$}L%oYdy)6ICmIEO(@{xji4)cuW8jZfzy!IW9G-!cWrF*6z*WiYFVbc=5g0K)H;f zG+yc3WQPkRscW%~E<2+s%g^b<@;ybN^nvte-UHHV3;g${xA+e@HX@_otYIX^K{}*Xe{!v**r|Y8B24Af?I@r#m~#pY^;B7se_~ zn-9NJyYUcuobHZTp2^C}zF6$bh8bYi=k?USSp5DmNiOs{7i`*P?yABTrcBf>q0YVe0A^;P@Osz1b(Ta zZ@Jst`5z)&=E{=3|um zQpA#o30X`jXbg8~abVs@pHsb#mU9sy)mF$Ps*tR>j!gqE@^m6kmciTfrNKK>~ z>WA2$?iZGfsC?|Vossayl^w*yAy63_7uIh<3oB3i`u_5z-UJ>~7KK?4V!>4m;?!(b zbE{-70}Nn`p=pkbeaWN{*X3*^OE=KvBDqMKbe0dJWk#&1pZq6lHr|;mw4>SmWpBy* zWGQrHRE0|w(<>ZT9*5n3E#@AV;n2Qz$9k3~>?E`0lkR2NL9L2ss))!nIVP$43lHI? zen&8kTjCKTJFG%S?vB+sZ`gg7YVKBU#%a5_p>O8LQQ}7-8Kk9(J9cpnpHjp~O}L)- zGlZ$NKnFUd2D25BYQ}OVCJ=8aq;m zY9F+QxW%~~=5}_tsE$b9oavG13+etk?xg{H^Dg4TQ66?}kv#m%cT`uo6 zd>{lXBa8hXdh6Rt6>q>l$2WT(?`29o!RvI> zmwYxSp{vB6`8ZbXi?CUy7#hsrnMdWmTAd%IU&%bwqZ% zemZiDz58Ki%a1QC!3WZ~H*UeY6$D;=jZ*h#$8&TiR&8>QCo<8d`bdlp%HBJ-^pVo- zb4dFHV4%9#N*bh9BU_+Ya=>n~bOY5ZtvNqCW+ldOVr39NUgZgsA$p7dQ*B)Y{lem? zF@@Prb|AS|eW{>k!34A}iGRH#P(aqq8?I=2^xXB5!9RkhW?dI~d6 z|8fA9m1&m`0{*i9=t5}qJ~Tdm;Sn%wr2H{Kr2HaUq&$zU%yk*OKSo6Ld|uIm0tSOP z9X4vjVz8pgK+k*oD<1f+5{1$)n^HpISwvqcJ$Hu_v&|rnq8+?fml`k{FiOx$TG(3HHt;qSnsiy& z*c53}_*8Xu`%=xVt#e@c+tNW-so!J1dKni-4)47rHZqRl2*gd^`C+|$`I24#=<|!M z=uwS$jZwV^FblSA6&MT`A{i1&a!UK zV~p+V)k5wq3Al}tR-y;7?q=9GvYZ=)qvWrPf-QwfV%CYU$5aKy-2>Y`zI*-d>_N~s zvKgW_HX3V!pVViEEeB{sOvL+EQtdt(J{mJ*Gh{AUE?Bzgy1u`D#`2aFK5Z^Cwyj+# zl*01ntt;_BFj;MfIr&TF>Djh3K}wd&6Q8Kmt}ek8bPYH{aNN@E#|B~$4Xst0qW$=> zJ$)5b1zw>hk9DWV#}Q5)>V{$WX}W_njH6AhPRF$x2HTZd8wZsWO)&Np?9rNsu0`>3 zzzpSS^g@d@61$Jf&VLl?=5yMNK+C#r&XQS2W8b-Nn8#t1fu0&Kp}MQ!`RI{7HDX7u zm2G#oS8QP|@t|U1En`rFK;uPHd}C|ym)i@NJx9E=jEn`k23#dv%1k%E#|G#;r*01o z(U{HN{Lyx`u%uM*BU|tr>VqY(jj+~BKVpN76}hq9j7j2^N1bl*wJ=(UY=qT;_5f;v z&uLM(B}~-!$2TIE*zib7HT>=tT6#?h`eVK4!#?q_r`x93(2Q?@m>gFWEqqY!BVf{d z<&v$}!e+)RAn>%I)yPW+%l%<5n-YOY`9_#VdpDGm(==tqTBVOo=@AeO#YF!G_`$tw zo!QTK2x~ZwAV%aZY`R2j?tQO?dht#aNGmiB0&P$3yzXykC5K_yOFq@h=Umh)ez>d1 zUe3IyJSgDi4xv-@*n@f0&xUj&g%}_6S7S7nzH|-j@ILI(yHY~QIa9`ESXsG(Jc#@9 zo~>y2w1C(e*1)h#{S3L}66kr^-d4Is7vNQ~X3cO{KbxpgnKSljX{vlKafRzlQZA6P zerjU+mpl#j@!nqFs};LEw8ZloL}y|xX`Zdz^yhF7*NFHDZ*Y$K*t3PMyGeVf3#$qX za;d|GlZ;6WX(+r|(~AS5zTUO3Am75)gjvEgGsiFJ(ejPa=YX$(ydi^JxBL5aX<5q# zp1ZO7_uhmyb@FW~CCwJP1P#g)jRUsBk@HWY+xH*bO=)2>|5j%C>@tw(rmh4`&2GJx z+>OXq`mvtTiuZslo$HF0n<1;T+#gB1C&pDiZ0 zH@8c}{&eC}9FBjK=KIqdQA@Dfv$5TB<}6#(Rku~ut1?_15sWM zz~d?<@vl`kpIYhYOnxbPN=PfbGayG#E8%rA{~;#T`|Pm!Y`28RwC`m7{EV|PK|mM; z0@E0}u(S?(a&kPkg|p-J+%8Ol*cLW%9)~3rx;(~Z*TrPHXdIf`z8c2qky#?nKeK*y zC|6M!Z1Ox-n-97R0aHZ4ce>e1vbwGYE66nXtsgX|C1y=j+vIi;r8fE*1?#FPsqGC! z;Pf4=k_CNPWV+;U4&R!HtD9}spnURrh4dz#g0RIG_ONos4z}CZxJ$Znj5;GpO>lH^ z(XlL^qIl#nUkz!=kK@+$Yy4s5Xe9wYiIvIVgGf!#K_5N~3kPZqY;a{_K`m&@0B}0n zjWkOuq>dBS?`xaJ-7g?`V*P*yy2V>7mp|vOPgF<2NyXi<7AM@KJYnHsVO|^l^eXUH(~=oc zFU~y9Wk7Hu*|nr$c&UvUeZ*lA8q$Y%#SiC6_G*ke2~zBTZ9-ty8;z2trHpVE*sKa} z!%ch+CR%Zi1CaLor(tdQ4TbyHDl2MbD%56{_e&3otT)7nB}!ZsdWccdC5;cG_dc=P zJLqDJ+CY5!YZ%Y9$`O|h@GUVozn>zx^$%Rk{J;%&ZV3AWk$!NMp0#QzqY<#cJTOTeH0E-+aA|_XN>L2>zJ7^ zC>*RG9LM{hjea!qT-RVm`J+JRl^fG_Z6*JY#@hVj5{B zn$W9@=kJfy5$Is)beJl?Vg$rCg4j%95GCQ^S(Ek7xh%MBA`>+^zlO6{#(BIK<+a23 zy9b4Cd=*bmcQK%1nP^>P$?N13Ynxl?K!~oMlK5+oxKC0ItFGJMH;1lVyoFZ8iCq@v zGG_EX;#)qIUCGT&37PXQdW%wAY?ry}DOVnjb7_l`elhs*_cr)e9*$<+(p8 z%E!1AOR>VQHc$WNi)Bt1Pi-76Q=R-1^%y4wI?m%*4a>e9lFja>Q+kzofs4%M%Ptp) z42;A11y}dkRu8AEKErJ6O0n|F6Y+BA=^J4UL{udKEXyN$-b$*(nXBXDPv|H^Tx63W zq@aB_STF*e-+YLLj`Ek(Ec77N5v_UiV4y``)QWL1C5w9s%d$o&pqTl;x~cV0*j>ql z1G06y1r4k^`CoC%cLf4kg$YjJztaP=$jHb@v$-_NUF^met-!K9kxvw2YOaM7G2T-_ zpoHrP4)E0*(gZGz!Av=jzXUT-(HjH_^Msou-xYE|l1-HuJoiKKB22#@&o)-Wy>^WZ zOllXRgJQawX>Jp3;=TW|={E~MJDxtzVWCwVqvp3vlka7teA9^^rfBZ|ow`k}%2iOE zsDz8_aTHsa;&_Fra@b%Q!fGlPAUaH=UBS$nEH;heEY!mwqReEWFUgZ^Zy=^zZ`dY~ zrPA!?Pko6GRA^zmxcSU2>`p1-BTnu-YO=LesVDQdNKN!vCr&1zU|z8nB5pe@sAt4c z5zt1MD57ZX!>!b;uE^fhpNS-!X<`4)j`TCgd)8|3p`_6}Bh6=ys;be4Z?#PCuyF@z2LrhX z$$Im+C=B8X{FE7-F(Gz!sxb*Te9+wiBpP~IP$dFiU-l|QyyKf8(*pjelivG&G|nBi z!;LmVSG1C@n|I)J5?-%S&)j4*Q&hw!1pQ?04casv2K{PW22_sAubwHwj$t@Ab>D3= zaDpzt#cYD_-69C-*JfciY*1u%~yq$ST1AO$NJAV4_z;HElRJ;EOLYCEqc@lZBynOmgrg^=Io_=wL_P z;6SGPbq3GR*3Z9RSe;2o<3ik17k#jc(#|j*oBdxGivaIoqE@(0@o38Z3#SKIwC5c8 zcFov4H+6wwjm;M_vQNv=@jDnWg8sOGfw$Ss=4wZYh8Fy})xj@Sz&j}j#f(4=QUA?M2D%Us3@By^rD+X^v-NJClHUjkej-- zKaZ-80}gtoJgqW$-EZ8{#Z?^?m~!#YU=nvXnf5yJSPn%H(0%mE`$! zo4nyPd-RA*mU$-jnu?y<_UZDKzpfg6FRru)f!|g|5Ht{DgeiiozJjXoFUSY7nJfw@ zKpJZ?m-}TL13=L3N-EQLCcmycK`jQU2@05&S1QGGfjl#6+^QqqPn_r70{4H#CC z=>@w!e3#I5lE~o#*WHAl%~CXnbQl8k$%CFIQ$Y2PpDfoU6e)(v4pw`$$mScu&8bE$DT~&OS4rj1XzsxEv zSP++!zRN>7HDCKTd|5?gZ+I$S(7pr3j(<%GVEegCuZ<*?kLnsM&|Rs%=&--IEHdv zRx0g2%*MYM5v^)3YQi1qh4Rc>tgeCSeXOK(s?9SYu<`F?*xRHQQkLP_2ZU^D$Vz?% zc#bO;i~L7?3%P00OwWPRP>H1DV~UXy?}ZYZ#uqblQRgt_6|^^T!@al9L1uNeMBf|# zIDB+kR@tA$uP@`KrGNi+*wfteA0NC^PxTT>Wk>Zo&+hFZ&+^YWd2wa19LOsvT|d7) zcY8(#3A}XtW^DQaZa=*P`J<&T+wb=YY0|S)4Dh&USrqr@Co4K@Jo9}j1)rd_ihvK!ebm?2)}L?tvbOD{Us}QzR(u}OJ4=WkMYdbVXM&tN} z!Xb|9U4lKq%zdFEFd)_INgMpWpChymaU1W*Ldk7q4KGQWs$xZCw6w6`MWZLna&T>t zgkUeR-`>hSYTjYb8o2F9g--2)lPv2vC^uTHSF(S3Vr5Lvm#7LK-GLm<=RB9M?Oe(f9qk}WI@ts;LkkNIZ?@v%05ez%3NE8_69A#4{8})&F~S~g9y8qS zR$?-WfGpE21VxjpCcL;l4VxVqmUu~h$scCR%kLPasNOv3YmHL^B|P*A>?C`LvW?ez zAL?+use&78va&8EW6eh~h1-wHe9N zbIZasLFKF;Fy_GHqe7p=OI!Y|A9h}AmBpJW#z=a#WH-$r6|IEvljaKel33C0_3<*B z=P|XfqaD@^jcYM?E>=X!;=aQDoRF`C+l5ub%N3wwzQ)dI{aS(OmGap0JPzqRs8zua z4c*kRajJe^Hmi~^q<}F$?Jg71(Te!r_z# z%Wa zyarujmm*&4BUZvtuWx90?RIwy+h%2?M8I12-FVo5mgY#8lRAD-tw5mSD71((*I)b| zLqt>s97{Od>ry`d%H_h`jVAq{3{}Y;Cm5OOX%$&rmN-g}PTQ|z3gf?$)HfFp5U9v$ z5&vG=7z+fctTSJI8DZr>E|*x)tseYhw9aM?qbeD)SyyEwY1O=7_qf})FN5AUU(9`P z6+{7ZS&t2O~Kua@BA$#Xc$s z-TbO`U9HnUg5hwOBe2$xr@HjAnpGxesd(#pQOCFBK&3@zqJ`wZ6vbT0eXWDBlR{Pm zQsgbB8?7S&|76P&g|2D#b=76|{5G+Op#d5TPW|(@Qk-{A6AxKl1=j|Kz&*7wQH!5l zLHTwqZgj`*t!JD}E;qe;)puN)MRIb@yX`*KQpBNshCgu+oykAFz{fAVVJKP65GrE| z<3~H%P+R+oPi)0_C_E*s#+q5ASQ=3)WsRdy#DHEUP8L>F?4gL= zHZNwaueRKIxEC3AQfjDxjZ}{OqzAkw*=?%59MVw3sR4a=k}+X?{L!9+GpwjXo3XPx zlPb{Q-x%~MhTDD5k{_awj0IgN7070x@cYTm`NfJxz>WZdS7H9~Sy|GJ`2fqx3WF)g z08tGhvs}@Ch5dNX+qN?Rhjk!;iYz?an#ymL-BQng4Ngh_^m#!_OZKxN2Q%B`g}Rlw ziRn?rQ0CQ1=a4m5wVS>SB8mtf#gBQ*gEPY6ITq1F<$S|T4l`=_cy*-79avaMDj!7- zmL)X@atc!IXK2vMv^2#5Eo~Ls1oWCAKR>9fsh!k2lgBEkdJkGqtbhAA*9(=7FK?X|r>&FeZi^AS?K4JzpNiFuW5Fr)Z*G`6w(7 z@nZa^z28@nqqKZXoYCK2kLdFyum<@i76q`SmTYpDG0SR`4{(S_SX79aU*D|X-;33- z3W{J5p7x@SKS)!TC=3(RDry)iEtU9iX~8O~@=us9gOedqr_%RhW`yyJySil5>4OQh z&EL-s0?zOo2u^a_2_LSr3+u`X^GJ^f%i-1x-Oo`Rv+nt1T};D^Z1w! zRRw)+_)8$ORG0CmCk!L!V31nZMm*%Q7TN?xgH=s~627aP% zqo4{6_^gEntUmf=gNmBkRz>P*1YA_obbv6A1KVL+9>C^luVX6(8L8h*U8!d~xfvNZ z|7hwfs-6-yY`?4!@cY4@(AnA9#T55Q_}AYL_Jjgm1+;-0Tx5y;2)(ky$GUdwbL!b! zP}~Hk`Q3|T29#7#9N!NBnyT{j$`W%+!J{{QBI}~Q)J7Q_oVu0o6^bqU=|S|0HqV05}>T zPP7Yje@{6W1a1+uB732?QqsO&1xmYhv~wLmohii4SNQLi$R8ogC*yuj))P{4%>d?z z_X82o+6C$Uk{w5+0%?q2F47lJ?8Uk2ZJCLE-SE!)GNA1!hAYIiS?$ zV~OU(!nXNX-^xK1LY?U+I&yZMy0;k_e#&nd_+J2fkQG;y&zz2ld~yxy4vKa`0oB|l z808e?X|_irMtgyoc=h+k)U1L)a@za+RMgtil1AJ^FzVxfrW(*QRc$l<>c(9bGb5OW;!}LqI9`TAAB)!n3WP`LXVPV zJKxBg&OR}P_$q#!Umh%aS`EZI4o(UP3Ef(INYtZs_SXxJ)Li1>=%~$Q?W92d1pL=h zKcH$34(6hOwfblzCX9be=*dV0rFvu|t1JVB-$0S1w%dGHG%8PJn30iTxICE6u5Z(w zJ1--cjAe_G;E!^kK%@env*8sn$vN0&AkQA;X@6EQ?z!qy4Q;r|38Yc;o)X z+w>yJNG%VoSKZ~J=(5{+me>t2P}+NT$9}#GWTDjH@4VlxuZh0S=J@9p&`?x)8XR4{ z9qoM)QQ@41NE&$Rf0O;a-r zPTvD%WrsiBlx&TX{r+{W;XLolO zF>3HwWI`XBaA`#AHhsR9N^F}d8=3yopbvJT%y&(RFrj=gxr!w|pE`D-8WQrBqMwNMjWkDGKr_Y}K z*3<938pYZ|u3N?mC#^5N1zCMd}I)6j&(PL7TWdK}m)lpBd`+s(bV z`dSlBEYkm{ohV#|#V|LXZ1o_FJDt9>JuZv~BrL?~U;b2b?3Qv$!-1jlUDJua6L4rv z%xx#Fo#0<4iRHMglkRDa?F~qzaZ>H}Qtc8uRQ~T(P;siZo&@|EiyhNMF9oNrww4wQ z2EyBay1cR&b98hBHEF~GyW0l)(Hi=mT14u|rEv`y#J_>Xzp#Yt4*4{Vc7 zrq2j}#&0k{61gf2H|usMSfbG`JVk=Gj@w7Rkp{r5B3xRe);OM4p)5Hfi$vS>8+*wF z7bw?Xn_gqXTe2YO_~yDrtX`6FZoG15Bi<(E6A~_fM0$ zQs;;B!DMZFw{LYWDo!eMxBd2gFy!9Sz#Ldogo{5gQ%+ZY`9MVc<=hH1f(xgt`smT_ zQf8FeER|{H+@=etON220)}l)XAQxBfuq+LO3cfi71S~ia=ub@YTSYNW0Fifay6&xv zy59)Sg5J0rq+*Eim(p`+fbvIPklI(UZ-2Dbmn@PB3R2;uzn`wP2Pxp6lWZ<2k#EK` z@4Xx^Gs1ze{(iOnEXZBG7IB6`YUc{}{IKxB@eh!!{;mG&9tYooQRSEr>6!war%y#d z;i`ffpnY&rALV*$gvXl4?veY-mm;8P*-Ti%it&^%d}50@y1Zk6h(=YS!v2KEJm93075z$=NDnAD-T`BN zCK3N88v~+Q&3k5&f4&!F^xF+-fdCDn5RpemI5RjO2u?ixdwPNWI-o9n0E84OZ~)K) z`;k;#m!Or`>J*ieg@-|URsjz5^xDxK%mjeirJYeZwy?0Ui%Cf@^;|K=Kt8!VnVowJHOS=4`^y{(pBz1e|LmaXgq$b2LD^H<+X=Xc+CdENrL--Yh*Qp z-J^P70Z;|%f2-TmSAuUm!68zB{o?;I8&b1{dqC*j7L%RT{xKHvuZO)>%R0%S+VY{e zf2lbb^sze#Ai-B;w_p1EV+G$NyXs2d{Jn5AL*gzQGd}2;EU-Tt0b1E*j1rZG2?Mu3 z*QxAvUa(0harD)qWIYS`eQ=f;zrcJweq#o|a>WdOzcdIa|qk1bq?7EtCJGbY##Xh6XRvq7{`d-y$x^#cJw0a8$&86q&1SJ1}wrsK1N zP6;(QEg0TU`&kPk7@k3AJpFDkU=CFO)!d?8XTQ+>A?B9)to(lnj67poAqa8*P$!5% z5^)E!tEb#nz>xq`1sb5Hs}tw%PzBvGB+nkR>jLrxLM(Yc4!ue4|;S$P11{?kMV3Y*S9HwD18fmdhxBvtTENPbm` z6{GfAgthH*xk*osa%$fcVoUpST}6XtHPNCHb=PB&f|hKWG3df#DIg;p8opRy7t22+ zt{IA8LPDRseGU$Jv#T^#%pC3fWgWY0mw!iS!y^A;mYR^Kpge*)b36RsvFZuOG3xZT z7pS^YyD9vy2y>%)6(kWYC%)`;y!>}y`L45kq)-bSPTE(2%UFK@6T;0Hp-N;d%*@i6 zLjUtnNjWO=J|FQurGT*hCq!&QOb!h_R5N^Wv-v+yABGO88o0m;nNFR=tgR}g}M2&kBPFO zv<1#_pcWJLR~s^LE7^1tXi}9!Fa2^r^K*lMY5^|>iaIQUKNmQ@8%@S zea3GS`_JKbP;8(T`m=}Zag*wT7vkSk zFrm4ckpnzph5biB>!$%`YQt61o+nL0%LIzitW7OZYh%c9mBSdfA?)HUcHK# z_kEC@oxKp=?aDWTn0(6bcH3P})=QHcpnm|eOP;1WzBTYRuu&lD3ej+W4ns0~=O%?uWws=FFe7f%h^8 zMg2@RfJppT0Al$m=C`W>@NrS%{E+@^14rRRaVPsP02Zo2X*Vy7YVW7W!}{6S1hVj3pu#)G*pE-< zO3g!0B=)!#_T^qrQ znQxMb3N4oL1CfDW z&lP|u{{MUc`xg@Wi6L$WNYrkti zu6eqj{$Zk3m1U3-2oS))z>wu+CDp;ez+=A7Rp4O0&O#U(xxO0UuIe)4V71dk$6pO7 zYcVA;FtCP1#5XhOuQt4stez_v7|OtZ2l$9%xg{7FCrD0GOw-Hw+z7T4n`_y6#puyB zr>dw3uC~@_v6+W))ff8M$R^}&vjYpgV!rB*TuC&0K8;>h21+c6d|sgSHt#db(t($20+nwle3&c#LfD;oK-pt4>irgNV+#vU%!cWKW_e`THjOL-&Vbw7!F z`pND!v)@DDB4kK0B~o(Qi?2LW*pw$Y!{gR%^4Y|j!*A&1j}Hp`bPXKwk9rK>{j6B9 z&Xp#KcWpfIP*CehxaGU}?0r`PiAE;oJ2f6K9_FpJfr2uKla3d+UQvjm^NI;H44^+8 z|Fw^dd03%ewn}BdZ@TwBk~QlmZpWs8ZCzHy+yq9q{@%4%bNjWAM8vTrfLh)YJhEkT zYAC_=DL$4ky^EdmOfE9SGkX4a45y1DV966aD1crqKHNbDrmY z=2_L97lv(tXWvD-p4@DEg=Y;9!KZ8+kG#;}lFkJyT^e(I-* zn*yJ3;L)h_7UW`L>LY3B&AJc^Pkdh4@B4r84+&KJ*ZX=5gZPi@AE7eOo*CdAamQJ; zbj;K9-fzk0*M1b(!AcBWbM~{0JQ5*w>ang5WH!;0ujoD<3W#65M}2NRSsHzymOKdE ze6%LE?rzc_-=g1y+dQ51CW0;E?mWuP2YyHzU0-9C<@Ri7Z+chC3Oo|H7d9di-wt7X z1io7|cfD>ovJY1Lm4{oJ*vQ`Ak}Rg{@>C2tYxWdz*7wZW&o6ek>nGeg*a{tHcA{T! z-RJo9^0xtXG_O6ia>48OKBC`1-@t(N7`SB}y{CV_j;=x}KQ7h0Pbfujl`{LN>sO%o zP|1DHbA1+aU`T>0AAvS`R|6g8NW6LA`Hs}VG$T^D zUU`27wyg`{*to8n{QQJ8{uXw1vzetqw9uija&d~3^Hk#-=UG`9c@yvfr%XyBj{0-t z#5*YQXE$KPwObE#N6Z}=SVcuw7kGcpzs2_b_|ah9BIWK$*Bc-z?LTt&T4b<^DRbeR zTRR+hH25rwAw4WiILnbjklU14A0?yylg;g|pYsRwad^)>t@s~pBqS;Xt>fc7oeIsG zZt|hO;n|VB=w4`zMuZ)F#i6%!gJ;PWG*4bvtTyJjCSFlI2Jr zcv4}oTGCtSCbuISkG3435qsZ3J>1j4_2aj#sY^Z`5mlw!=$*S`j_LCmZnqC-Uufsy zID=m{+i&N~F4GNrG(%Zxr#<4zm{~s4-1_v}&OxtU{+9Tn1k^ zp%C$p%kLwrETUV#eP8ou?2TY-12cE0luU8^%OWSq{lGqwh?4p@l}LNFsXO5tx(jW-?Y5Trovecn~ zZQw5$+2xNg#Zb>U7mt^dF|;ZGBk?N^=e>c3rFbL~6a z$#rqb5c@Q5{OM&6>d#qk02H)H{vptKNlwMN{%?{#;|=R==RNwiUTWmiCC=4Twr>9v z6f*kl|K|TBy<`cCqFrkuxpA(AN=rf2{%~bbGnnk_x4Uoi@vd{C?d+$&55A{KbZSV8 zt`}r2U10j*C!y!;c9CqwAb~Q&fzilmZOOShK1y1Toh)X`Kpsk35fhZuu`9f?Ls}fN zBqc_#E2UkVHJ{70=)l3A4qIp}QwulAxjh_bj1G3=z1GwKPMSdIfya-`j;MK6oz6&J+}7HJsn#4VQsXj;gAZ;N3R` zd4m}L4!5-0jl|1y?(aZeOvrC}&dBfI(D?82$DhmR-O70}2Ut2#c|S)r`q~n80on$F zr<*!C(N(*l(cAbx;nqCQt@jQvq1R$K0yOP7Gk-1lJ57RBK;Rgx*JbNb@<0BXel(Z= z0Q%LU=5U6C6=H)EVx7?hRl|8RI``d|3iWVo#9n4c8hSRc9Qs|_z1Z-4e$4FEdOtV8 zZ(XrbJ9@nPBn3|Vb;RONpz1U9Ux|%>e#CXvb|DB_IH2$QK;;q9+EVEGOghDYae_n# zWz~AT?*`#2`0sZ-XeK}S8F!oPp2uuFwYq+?V#9bKG{>n8Qbhc zq<{+kKQOMR>^Z% zmj<7!0ix9$_1%@#ggT^mGAy&N?$|x;mmkLza9eMf(G-UHbNO!sWNL;CDH16w?q3^b zo0xLE7exTx`qwM|X!TlS)6qQvLb;eXa*Q2&I7ZYSyK-HFs4a=;nr6yHue(q5GQ0qj zh7PhnP0;aLI+;F#OXO_(M~aqG&=iRcB`aanUgxvHZNr#T<*GP~4pQhH`$hT?ID_2gG1J?-OU3&BFqh~vrl|mgTYW2mj z%*rW|z0>urShmQRkio_)xUd$i+F&WICr-F_W}#8=Nt5!Kz0<{|ifvbfj;doS;G_qL ztJ?(B4!YG1Nx=1gq#%KGdAQ6Z_vE(UV5*H_HgHN}Rsg$$xcUX@#3Uf3Qp7;Vz+2`@ zx?L6ZiH>-Tn$GLC>i&_64gUUyx-vwM6i({j{ENCU@^{B$`#YuYT5QH=qpyfbYBsWK z(#>?k!a9E2MW}T&#|3Ofmdp>GS8qMiXM~RF7%8?Mr4diTt*AushUd(fl zbsd-%@k3#Ylh7Ao%TLD>Ppy%ezzwn+=CAQG(1?40wNke^MrX=GX5nOcVUuCXrGQnP zu88so9(zEi-1W`%TvYEHRc(Ar<2{9N-MGo7circc%iPsW^sj+3ME$KRX zD9#={wtvK7a2(d{+}ispK0wu0s>yXeI9OleQ)91oWWYh1V5@ULVV7g&Xh9vrVJ&9? z%0)GTa~Ysq!b;sJL&=k=_{;}Oqzp*`*ejW?@r*j?>IZ3@7Mr%t2kYb$<56)i2Ix3A z1dJNGy5D7WfJx~^w`P9?ii<3R@*dkeo23vq=(&h7%4l9_t=Y=t%C3pZ$QgD}fsLdo zO|qixtV3punA-G%>{2Je-iCD;b+l)8uS54Ks$|6zLG@r#d8t2XwYyrZo5Q5Ec+#w( zk78};d9G)M%J?+WY7h94%Pq6W-SPN82pxJ2^A`(5-t7`vcOf+9T!buF_jAH>RYM8Y zwW0biNB_Vp_>?Re(SgrAM#Xg8hD$eOU|6}Wl*h#tU*Tc=;1kmwFE5fSb8g5Nk(rZi z=wF3R%%=lgH)1%9>#Q&+B#mLxD`gf@81^4Q>(axG;ycQv)2X9WZ{Lom5Zr3-7}6I` z$A@fhTbKlGV?aHyVcdf#UZ}YmgOpVx-G53zytEVa zP$bkc_HWXOiPB2!`IFK^K7EJYDOJy#d<-rZ+yUPMEM)Q=5vosOEgaVFMp96gq2$rI ziRfdu4OIsEv99a8i5m5~x2nsVb~U~DE#c#5KHNWcOZ$|v1D{(zGuYfi=>y+^dAl*O z-4s0!7oiiAs0QZU%Do^H=361cp3@dWrrP%JOG>QryMT8ax|tu`!#v%AhZT4Vw{;M^)rf5< zVbPjR#UpP!LJr!YG4nhMuwn%PuWSuB1;dq9?nLBDZ2{Y}Y|caXeA+_Zl`VMGwSxt< zE7nEIl8?Qb$($MmIvL4mMe1l8cL27Pj0?w>bkBf)-8zQutJm50Hnv_EKM6XFZxtzp z+L7W+psyO!_%)MeGLA+;tDeeU-XWH~0)Q6bb9S=SVJtf>jgs(_5UT6jY5&@3=J|c5 zS!u)Iy?wEEoC!!Gl+Ymd)MHf`c4Axhq$=7@5E>aqZOpR*_e;r8bK)>R2|yT=MgKfV^Ql|L3;};Lj7|pKgJzlReJBeHm~?GJWlu_HC7$=k;X9rj`;{45ih0ab zzZM>)#9kJw=QWH8bx0;e)r4E15Q$o`%(etP{-gB#9{7ZkTY*lmE&u%{ZApYP_-t?2 zX$j1zZWO&`nmn~FFUE*jlo`!|GiC{p1Bn)q_`-WD}zPb^7aa-=kLWoi502ZTr% zI)DbXAy=boF=@0~YwYV-DF#Q5PEyQj zVbxcPcXdw2O=C$_wb$N@?SRG?qo>?WdttB4rNikgB@F;gIMq7{Zxq|(5qn=0A@9l~U0(Z1 zR2PaimbX6=RbLmwY#TG|FVwh;PTha)^EIq@SwJ-pk2jjkC;AxCo|~7%6PR7Td+A2I zG1NjXSO)++#V;g?%8R_qu0U#=M}r)#8bCeTFkbrUzs19OleGmr3kK>K z2ijS&K^!Uxg~F*8Md**KEqo|HpfgN`Q6GVv6UxrM5K@6p@mQ<1=8&7w{IQaA1WNooC~$% zsT(6HTaiuLm2iTVXJR=#&n%dsSz^~fk2rJGl0lYi=kH9DXK3Oq*!6GjCC6!WW{>LWaxryOQjM*=tlnkTXE_Y&CCk>*u%ra zVJ5KtUm5zJixF-X2q?U$ku2n~X<{b7&ef)O3tzKLr2r4$2^}hh9maOMj3(z7;@MgS zbu;ND0ze?5#?=e-ajSI}`Ox<0eoRY)47dQ3nKy%A%HBNe9QL7BU>0__J2nEDeoTy; zS&UyemWclZR>knR``Rj(LAuQjeZ$lsmGG+}e5eEfgZpl|;d7P^b|Uh?wH}L?0ipwK zf<0)3as5oIgiph?FzHb|$tp&3papzlX(*R(s~S_s&~zlw(O$B4ce-z&@09VRnQ+QI zXQJJ|(ohjB(+>ut^Vpl(q}dl@ET zZqE8?rITIF1UXA9Xw}l#TWw7m#f$4sGUU=)b!6$*vtXab2Zc$Nx$HpU8XBhj^Maq)Uo@!N_6C48=SKKv}{okETJmMn-g9F=&r&?P5Dprw}@V zqVQPSO3Wl%U@vo-*Vxw2_aKdNESZFSC)?jd!yVRp#*DRr6>c=vZ+8de;rMWl^R7ok z#@mUHWL|xzm;U{)Ff{!!Tm~CP4Q8aI&8U6--CUB*vSCwe0q!xH?~6=c;{zKkJnr9Y zlEFh+ohJQwXt}ybFy)pEkU+QHLQ!?hCycY>&Vz$e*AdA44h#N`vdR2N1Oi*$PfAdl z3}XTvN^#%J^`#9{v`@s?SqEc*1i&67^i$D7Xn=2G5rLT>hWh_G|N zDnA{T6nJy@F?&Q!t=k7shy+w=44t&H#~z&-Q6ae=P`An1cQOP+^aTh6D;nE)sK%I^ z5rrEt7vD;D-6KpB_rW8Cx?(T&kH&-)G0~W2~|QjHpw#Btu)n9yIlfheK95>_Puy zmp)VSS-&V-ZmUQ%Cs4n^(VO}`CO>du;r_WZ=Bvoe-de=tQ?F1lY~-UgL@TO2(sSO) zJf!MIY@`j`NV{!SP3@U?KvZfe&?q{MU7EDorHaUI9E9f%$h=KPtm@*@pTPf__z(+Q z5xe3`R6ANr(vBn$0s#8&+L-s;kM5<;jL8>xNgaciH{DCl*FP1sfUrI_Ct%6jQ8GL3f_Qh1N-2#Jrqn3t9DA6M&|I*|EFm)*3aY2;6&Z( z-a4wfi^~3Guk#gqI7{~ZSf=<1ktbcHHr145#jxzqP`cW`68})Cm1YSblOE?BGdaZYSc?fdW(r8m?AiFj-+bMFWGU48@jW7KZEFgn9X5&dg3Vu$k)N#%a8m4 z49XVm^v}RTnilZ&x5d6*)zT}yzvhC^DrhZ9WndAp8+*e*2%q)}1B?mAVe#aOcpq|7 zSIUzMu5)HBMj3xeGOU;uv|<39pszQnqHmgP-#wE1pQ_W@E}OqQ-YStfH#O&F<-m1tyiuT*m`q!Drt`*rYu2_~ zt|Vh=)IV$OJjqIuU{TV=Im}7o8I+SfQ^}Xd$CN--lc@!58vX=Jj?zTe4b&_T|A8=o zT3(4+{T;(cQ2%KQYNrOlsLSmLs>trI6+{(Rw&jAwYGoc$zYomLzs_d!nL=)`ZziST zJBgBu#QVAfN8rgpmZm3xp!-NtlBu~&$6tT4)|-IK!7rlq5n`SMH}Gm z7j`M7dQu+5y}hA;#YxN>{HUuLhw!qh5^jqw(H5?kM@h~G-Ku~SGp1r9Lk>I=8QWp61m0}mUWn)t2`}P^_JIYNd7AcFS-RNJWI?t$Dm^8)iTCwN8yb8p;J0& z;()-)N?V!QPBH5o_iGW^fSG}|c5p9(CT+=#(wVG~vR(sKVFwJj7#sy64o%K3-W=d> zYm}z28^HOcm+IrAi`=l2Fy8s%VZyIneUIwkZxBA$fl7q1Ss}Oe5Km)s&?PxQGk6&e zhZNE$E*A;kM=Lvu#mFBXHH2h^MQI)rxFH8Ej@%Jz))N$}nwkbfTcb|JN=Nxiu2Lg# z02l?Po#H%DD19|`L$WuFnVA9CzwOLfU*>l_fyuYuT_oIT&MGmJTUV~U7+bLi@D~ie zx~IO%jV~&;P*;^=$h0-XDdohp9`x`+2x&J%RLr11`3*Q0Rxm^*P;p!4w(H9e27@`X zHC8&bkA5|AH?_p0P6AYZ9bZYS#Dz6RTl^E+S=v@C|T1Wiq* zA=^}8`%e+dZnFHFv=zcd;p_kuEA-kmm4>R7-Rc?2xR<(uFh>xb=}nj%+Dt@Fq~xE# zgU5>@6d7u}jfNFKlj^{-R*uX40PZtbrsLJTDs-&|cjjW2xQ<0o*Xp}mRu7o}>|`B( zMN{KXvVg&38ol#e|~z!+B;l^Lg@Gmb5Wp zt1aOz?Gf;l+pD>~i`C$6$WR}65{=m=rKX3oeDZ~$awO7?BGNRvZ)PAOAJfdz!7x@} zW-zf}jmvjhHeSst5VA(H@ak(+t)R7WI){2W;>|lf-iKh&Thj2P?_>Bkb^jo8ti}KrmUuLQ|DeXyQ#}aFBHUb*_)rd> zVMPm7Grj|L#%IdZSZhOQH_E3QycopyS-8321(4jDFPeb5=ES|N7h>VU5Zd_Spl{M` zx`V4omnab_V5?NkE{M{26czhN7~ooji_4WMcj{Bs)?soby>DBhPv6~p`@j7rRu|3= z5Hv1fhYbDki-0P+`||#=o$_v!Yni6&@k=FxV9K_!O=%t_dV;nsFGBg(MYn5~MIH-Y zdzPd;iA6~@u-@B5k7N-8wlthldYgOK)f#G=)vumjh{X{ubvOh&6)~(yLy(kxXu!y* zVM+IND9`Lal0)|AYJgpeTITwst6!;A2b-%lcy#;=6paPrdr@$}^;dEuiu`Y|2nPqJ zus)>!zaYobR@MD4!uk;q5WgZ;RaeZ+%sC&t!{1B<+s-qnUVasnG{AqLzASdYJk~dI z>D!EdNgCd}EiugE<^czTbky2o!7 z@r2*J2CevCIB=<5L&h5uv&H*-=M46$R1>Gt&;5LA-MZWE`d#t)W;^FYV>gkG#6-&) zvs&+6<~oq-{$vqH0^j&G8X}8>_FsHBPKH>Xv)JKz;#xk#Qf)fHyxoa=0TPi3{;IvO zbVFCsTl{S#HTamqZSuSs>w04m+~efBpL@!m)BW_x7&if_WYd@6)HKmZ47BxyQ)LpM zE7rBAVEVa{Z#`w~m*-y}lUcQJNAz~MV=(~zUZh$hpLI_FG)!|koR}%sYbuPA<5CKP z`Hc}|FVzVkWo9_yJbmL+i7~JY_)j_VoS|s8hwoozS9@R`PYQuR#v{Sy%VvU)iM=e} zpEM!p3~daT@+rI=t@}GL3Ua6XM6giDUx5xHH3U9)2DU%f`0|dyYjWc~}NL=y% z6MznAd{6}o7vFuD*je=b*vZ!S@6LVxtW%0dU|^NayeSXtO9=z}4&CwQEs8le^VHr| z<-W2sm!h|0f2T!1TjsG>bQk9KQX-WL@YUw@e3I{&Vu12*JV&2DgGi>FJ535V6q2cKTFhtY*q{qV`*FIOpHLNJ{=ft zQxZ#<7Zeq{xrhj>Bfx;J@#mf#3qoCUtmGEaAJ#pMpRNC86tWTQl-q~=xMFT}TEyM= zfz=NIq*beIp0Fh)ZTiwa%Rf@f66f3$p-Re&o2EW-<}N9`Wg!q92|sJrwnk`I``RM= zfWCjAhK|4N5pA(N{B;d;#c3m3eHfOd2@GBQhGsl)V)z+-e-Cinu3BR0ul z1vQdk21IO?rADd{6jpq zqRY0c;l{Jd_a8zCQ_2sfpK)ejl+X-xStB@g#rexO#fw8{A_+NLO8>)d?hheWkG@ zB(1FaO1&dL?wy>`4#I3Ot7eT~HfKprefQviI^{)Rtj^^*k#cDhn8$b^=$)9EC2O;I zZp(q;(hb17px5e$`6vP8q|M;oppoL{lgq^v3ugSNx5{uN^m9YWT-1<3r=QnQHEn8P z)5ze>C~AtDZwp6fn+eP>-#bG&?o^dykS((K>9DU%9(A<#DQoqp9bU`0-+wibzA@c| zCSBo8gX1{I)BUkYGo0Ww_GpmXVd6kwv|XBhWI)U?JFM5>kuxgB%NhcZ6o1xVGU8-9 zJ}H{+s^fIUk+h3-iuEtrj{Mg0kCu|UOh!qsUk$McVn?L4i{|8PAx0Lz?F)UMRpMjg zH5ff?jSr?)j25HOV_FB`{#|VJ*+cOF_LZ|oA4J*d8lICPV;l(Zi{KlsWtwrt4VIZ0Ve>k(&3v6!I6Pv=?; z!r6%lHlA2AUX94*MtfgNAFt{p=|u|u-Vt)GOf-3tkYu=Acy%XpIF3mt|3h6JcumX) zF(1um<=`Ewo71xPp!h-Jtg$u?bv2dTe!;aiS03=xnpxv`Zm&$qaX-ed6&3JQ{A>`3 zNJHP!v!S2QwG0@ix4^DxSK0>{?p>5N_I@XwGv|&g*k;LR!fh64Cc4VmG$~MjoSh}0 zEvDTHo`yD79fP0TTsR9*hJ^aWyj$TL17_aHd;`-z`j0R`LgG^!e_c=>z($( ziS00!%3%`;nFJ{jPuwhs^n5WREUqGy8Yak5*zYdX>1out%=mSVQW0@@3$-W#8wK&p za|Be=u1~+cWqHjl((=tZ73qsI-wG~PJ^)HoFUHiudZ^|q-QsrVYU3|U+nJ53SRN_(eUS2p zzE!rm06R@lZ2ngM!tki&GKfb5+8zYREjkps9R~&V@(@Bp(Rrxg+RxaNA?PQAh0}3g z@TD9X=$G~GQ#xxyLH|bb@)||qnu@09)e{5Hkg|Z4hDJE|d%LqDw_>e_?pTRd-;+Fo z=A_5YX?Z_czj5=wZPDl^@#<=8K(Em4G;f+11fqoAyu!r?{COin90JoZhhr>$P?+iK zluSF{Lub6m8yA|q$U%o10&(slR-d`&ezsyQY_?#ZWvd6Ie zy}qkR-)LdYH(MCo6l~cH$_olS*Kg%HtOI(ZSer!&@!1`MJG$X6IoxIW=Xm4XfajA{N@l4R@^1f!zf-FeQdHO3<1Fh z_D~li3pkK;5dx7NY7i9xA;(Z;O+!JetPXt#5DQ`WZWKW&n8CSe&Y4D{^v}Do*VeFj z3|2B89Kv$OanAhFl>Ms92Xgf-xz0&|cshU$L zZ&(1QqN4>{t)A1v6DVLU@_c`PKsXEd!(~c_eWjI|R1h=qE~{j{x%+!bSf`eP?D|O$ z4I?e^`;C7@j==DMj56>2Tt&9D9cNK+P_8YS3qLD+zcB*b4B(N-77Ggw?) zk$c8~Xx%%T)nb!?X2vys5^>!uwf)ASBIOFJ=nhl&r-r{AJ?Hl#$aEhU1Gn zwT2A7XmCSPLU5H(oBcCldHV(j4+ck{8RBM2b;~jIFGq<^(*%#KpNSX0R}h-xXT$rP zZ@uc>tNS7LBHoS}`J#pkGhGtB@a%tPNSfCTtx1Xyg-vH4B>0bI(MR1AUA9M<#|_IM zCki5sams><3$odgJ*X!X=ZLsx;BVZG8{pBoY^mo>!}n{k(cW9VcCo1@hu7=uJ$s3&r}KJB&6HFi2c{lEjcwA zcu}Ln)+r)rwBh$X8kTCaA z*2%h+ZPK?8i0(FQ0;C%*LAOntQob^6RU$#ZNjiubd`?Sq{WzyDO-0j?3$ourNX`qX z4|2*lyWq zqz&E$i1q1mhuulMMU;9^sidUd&cqy+P&6ROEPjl9DwG-N6@}QH*sCjCkFUU`=PjT9 zES;|5i!KHomO&5+9<9eep8&s8@l!YL*g1_E{mhL)hUL>Jf4grF4skY1qQP%&F&^la z_X#HMe63H1-g?2IBhPyc9JAL&-I&6B06yMy#6s1QA&QqL6?D2422jgeuF}eWZB(aJ z7S*8O*z6vFE?9*S@%dBu)N90QnFT=)31i>u_6o;CxWhx@AUOZzOMDwar1wFwtJcav z#iZNtYwYQ^kyR1LQd|hM^!g5q{pN5|&=wqAz9&ODOPx0yivob;9lA!U> z>cvje8P7REhYHpz0pwsR2KD#$E8BBuqGUQD5e7^4DZflxH)xB^5W1rrQ5OD&%3|5} z>t$OC!Fl>xOY2S11dj1#FczQ9a7B$IP_ePn>iV^OsB4y1zH{}Thgz-`bF-RGzO~-N z7KZFLhOGC|KvthwtMJk~dBH68?OpzMN5L~JfrN&>M|1*{f^{3_ek4;qK1( z?PX%pzM0MyHk?&xw7>M(c1EkqC_FE|Fdx9-65<5+qOmjLu;?r<{OKoTw zN@elCPsSov_@#WkAA?RyF^)HB@V(+33K7=|)MT*}O%(_~Z7|CG;N@2y4Wd6@Y?nBUf|5%^s3ZlrLygI3vsKT3HbLoUOO9IO)4=@}&r4Q=T>#Y{{uaX%G4jefo`fwL zh7%LGue$8=+si{tw>Q_o!>M`Z_cr&aW{+etSQ)QJ__vBLL*~OQtpzDPW4+UlSdwxt z2sy`NzPgceQwC9t(27DRgT2UCU=fq_+e`m?yj z@_6ah-|inI9hrv=Z)CU1h62S8x1(z{6z(@=Gr5Hce`?wWfS@8lvc=`kr&!QDLU%5w zD|F5hZ)VXr#bSi%hj@;>CJtWV$OcRFm$(lmR{O^{6Oru`TKKN55W|G*1J|693WGHg zg=Eiv!T-yQ3^@c5GrbI`)mhE&g3V(yVLW&s;B7$BEj>Pa}L^pXE{;9tmb{g6k~%|f14hYG#ci%XP7@4J>p`xR07R;sGu%aeyiL%0yk2*~P>tYFO z2RQlJlMjhw69^G=TP5VEE}!54v^J^bzrNTD$^$g;RE+{2JbxPMj2-$MRfDDIi*PxY zo3p3qAljhD!Qywz6FH0}TqqHoGpYfKUN!X;)daJ{Gu5I8lWKiOoEWvR-!Z7!{y1mT z1mfRwkOku8E{y3{kWPtn(1Sxl-kp2Py_Ki52l#GimfgycvB-GQ@-Y7iPg?7CpmUOC zGS+{GcSw$+gED9&jNAa=sdVN-Rtv0(NA#Jcrsu2C>s0u!$L>R#rg zABz!F-yAD%0eB5c+0FfQI)Pv^759?>&CA(gGORDD;~>XQ300KU+@bWn%%naj?C&nj zxQd~SlzSL-CEjs*#YW~v3;??xC1N|6Hu&JzmN|lh`ZAZj!Nn%sq`E|I%d*9 zn4{cKz|OSJtVId|ez=LI4G`?Rlp?OE-S&uWM$@Gd4#yg|gqFaYl}7tjqk51(t@M0C zcSyN%PJC#bNr7$@LQ|pf_gMZ-ULM7;(~Lc`tdvK+kRQCqJP&u$SJyM8npOUXCH@!7 zo2(V=owY7D=fc7;dN_3FB-K1BC@U!AeCV0tM2Cl?-)K3OQtB$f7fW+~NC?9~%c$3A zN!K*@7w5}bg9D9o)Uf*G!oa+wG{r3J5GiVc8AJjshKtOheOs-A-iXh}K;z~>!&ezUmoJLs)SzUj?%U zlqyA9T~J@on|SRJUfcOJOAA&st0m=4!8mBwSPvKSMXCK4|DVW)O1S+Tc((tTXcc1o R`VR_BPD)v_R@^k?{{Yl&8|(l8 literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/megatron_tp.png b/model/train/yoco_moe/sources/images/megatron_tp.png new file mode 100644 index 0000000000000000000000000000000000000000..cca83408710495c20b823a2e9bfc816dcca05e50 GIT binary patch literal 35137 zcmdqJcRba7{5O8uA|n|U5!th3?__1qY)VGf!LhPcB!pyz%xod^7$G4md+)Nh?9Kf; zbluUG46r^ymZ(<`52;4`~;wlKlMOOsktS;7B z_(>(*n_uvs%Qn(lb_m2RU(CN}f~QuE;fGi4B{c0-txfEmo}!Eqsz@`Nr*`(ndZeu* z2*ho~Bk>1nFFq}fcv^KFPw`ezsLHgU;DTB;669G z7|yM0x6{RWl<=w3t#pNHE;$kk{~D!xRpybH#&=dNvoj=oLzJsroGN@JbTXYUU?}w$ zvG5Z|mBucPTa=BV2fmw)+ri8g_kx!CTdl^9R^71FTVsc|4})aCe_Qh~h1{J^j>9=tp+? zDS?)d=k$}P?EilTH0mx#s0qCuz0jR1QGs)^zXZpQiT}6ckVwtwZ^Q4CG^=;%WQE^3 zXprgBuV4R|x#sAqHHq}6zH;%8J$A6EdR;@>!qL>pOJ3?6ENpD^dgn4@9;`Dn`F|aX zq}V_FVbXPgH}FF6#}pwFtyy(6nvHAV9)6+k^CoHCHrL4)%M$Bx!%b~$<{r#Va8uRp zln{uO=>9+|E=k$(a;8g#2AsTwxOHQb4bc#sgcZM1iO+S9kI}`|j$gH6f9;#~#Lpi% zO6*0^r*C?pM91IAd=PYDtaT68V2`YAnb30i?bzjaD&)>*nMC2@vGhFwOEiiz|hf za^sudrVlJ*1Z*462@b-Q2YJAHw% zpYM!u%bZ-`&<4{(y=p7ldoApZOA5yWbrOREC$FSa(I6RA;dtJ-zW#)CiY{SQ{Z=&2 zjuxnr@h|fxKYa%dlR=d`=I?$a#X@<__sK~Wu^F4$UZc~qX2M!1Npx#$w?c6|8;&r3 zri9yAUXa0X^3{f`eq>G2EfLw-awFe~PJWc&_P*o{^WPV>dz+IGWOcZ2LuA+8<=Mot_1`zAAA^Iww!7I zeR5Qyflo4tBsh6^`i6CD7a~n^J7sj;TAtjU)dM#ws{B!ou3%uezrVk`+lV`+YBkXM zW6Lz=Nvd2qkGQQ!gKUpWVxV;UTiwtg%|OKTBBMz6s|L(2n(bF-n5e`+gq=ceZEill zyzH?xU^+B7_)hXD>Kvl|h9~g@GCy%cL&NbAr?CphC2bnQkW2D(p#=Bbak`Y)SRJ%l z8I5y?4V6O`U<-$}hIzZW-Zg*Ns}4{e9v&tpbmVyWxR{5FlhfYGN#t;&OHWTv+!}$X z_{&4Ud6%ExWwIgU$z2AKFp(#2dvj?5{QTZKo}5*&eauA0jNd5 zlFd~)gD83zd+jUJwQJWL%iR}xv#X!9n-}3-x#9*Jc3K|hbC_2%H8oxJNN1Sf4mO>8 z7%9sedqMv^OcUPvoO95!IR5xx28|pkb6DtY45io6))q@_Umhvf(b1u&quXF3Kq%RP z+9o`?TLN$3l5sQm8*<0A#R_Z>n8t5zZiZX7XiJp#sZQI3+0(ZVZ!@J7fum2TWg%(K z%B|!V5O9S>1k@2?o#(df|7zBiDtbG?$qeCj^M@KEw(Y^r>NR==8bZIlJ(tMlo~|xc z4GoR)n-?#w2rVx#8;KWsJ-}$2ntz4s_HuvT#Nt2!y~3yKWMmdrR_?n~k&1DGjpIm! z7vXPqvPpW9fTht2S9|;J=bV)XYt2%@6xq4C?Cv)d(*Eu)yK8<=O|g%M7^PNGR#p}l z7YAeF;o_E#EE^n*liJzYMY0=kN6EQFUPPp$=vsG19Z{50878XK|-aXuokkzStqus>r`)p1N&$o3w+1>repArpx*sM%KcJ|Ul zun5x3?DECeOG``7o;`EXIFCp-p+Pn@rpQKuAPM}$L9}V9sVpP`Y{tp8R{4r8b3~5i zHl_KjQ>nsvwXQ75#s!yiHI6x8M05FP&pwZ>tSm1FNq!dDsm5=O6UuUuKzLt1{ImvL?wXtu$KL$4H-0#Xf; zCKr6s-dz3=A!_@+?#@}{R-|g&(9g9gkEBSewNjCpowmoO@=lti zMC0Pk+o;0yU!FgCl2Bf*Q#leS`Pn7%93q_?L}IeNJc7#dZjNMM=p0F%Un2JK+$)in z!5eX_OeyBo8|UUjEZC3oT+Ev2O@~3G7qZc#+GwQS-Rog@Zk>z*=}g1y&g#VH&!54h zHFEWPjF*auxQDPy!&@A!6;-(AQRT8DBPIox(fj&m#J;j(TnryalpMUu71ILellyJW ze-xr*Cbl10C(gy!prm(i27*HMT9|gaG5Y^~u^dq5O~4lU_l1mJHQL4`|?5eG~yuc)}7hlfaTd0#?RaYrf$3?^P63;0S$4pi029DH%#OMzu1-v3=vj044VA2p znq`^ZWf|e$c4MVIQeF79KpnN}4krFS>xE#YNlUae4Qg|~CntxkqWKI$?HnOb@>BE> zu>_y<+9WuDviX2)CR^EMy3pNd0y8mAMm`JK(uUtp_R`Y3-;@NB;avWPEjz4|FZSs3 zigaZzSm&C}*5bgqOL&Eag>DC{^?S?L!5;BwkkVZI{NSV>7Yw+~w8nx3s~hD0t!= z{W->&yiWpuLtjHf!`9Le85x=NNLfl9e%u)ZS0IdZgN$sb&@8ma{BW_Ljnh2MKz*D} zioNHpJeJk73|iD9-t7DA?x)g2_xARR-u~OGV~ug-E85_X1BtP1;+@9aHL&tvi5yxL zNoX{YtNWm3PgoHMCP7qWw^0#~ocH(oE+*^F5)UR@UT?AXxKHSZ@%Ak(EjRV@HC@B0 znNJ1DEh2K<{t!P(E>+}kPVq{_S;XTB3l~x^jwA*d8bUL3^G-I5RU()qL4##wWm!mK zK7MR3CqalB-?6O`g1GvUAPLNEMGRg-xLXe7OJJ?Wj2%c~eh`k?@4I*Ju&ClZ_RK19 zFuDImM^6u}_w`%1dddl4kY~4eSfXNLT*v+B<>Vl+3+4IgqnL8|@F* z*VjF&#(pkqBfP{QL8GmHqJ;IdV4%q;5=ttBC@IHt93L?kQc&V2G)PTCKXGq@z`#Jv z55OBWT5{u1P0!41vYkgLRnMCI_y7S~1#9*9Yy5dVEHI-hdPj;CvJ3`7Lsi*Mml2)& zKY1#tymuxjkJFu~5MJ1l!DJB)6zJ4cx_=v>aUCWhA=x+?PkQ((4LM#w?x;S@h2WAs zBG5h(y@~Mp0q*Yd!b!37$^p(n`?hTj;YlSU>K0yt$@DdG33*ujBtMJ2g|PPD$YX@{ zhW9Z4qjqe17w-R-P9e#gH>^r-DY;lKN1UKN#>4I1KJS(J~W z2&b&H9e0i@uhRQBg*w;2GTU8pl)2;e3o3gjG>!f zO46$T%&>@IErdEch}(mlOsjY?^CMg^8{%`REn};{-*c~RG23+Bl+p@mApev)eh~9U z3}%JW&ulS|@(PoA?5FK-d&Tt4e2t7TWehsn^QJgR^Jf<@_4wplOsO8mSagF`c0Kx6 z2dZ&;lH&8)ue0e~llk=+!!swx9EjQVD;Hl^CpZxoDU7t-{_j;IU+wBUDJW&WkQoXb z(J0={xUs?%_~*FWRl=-?Woz>cBAs{f6O7UnR;D}HgBFUCd|(_@Y}*`c{90!xU@S$? z$m-zGGXfvGxw$*ddUT+f8mZn`m?E-&`J8K8k1J$j5H{;?yiV z_@zsLrqbGfhEM77)8lYbc=u1g<=a(oj_)s=90FA2@XYI7@RrtYVl}2z;ZungWQaD? z@SZZwnGL+wb`AZsbgQ1?@k5@0mDKWJ;R=i=MBD}!1!~dTP0o5yxAGylIiz#)fif4V zY~yr9eAEaTeVj(0`yJ#%|H&npG~{Sc9wDQ1AXKSJ!Y7JU^8`%zVnvo0!3{^!7GZ|y zPc2gdD$(TV!^v5`ac-8h<(#{>4qjJ=gWcxWwZeo6!&ajMuA5eEaN_3X5oSu9+xNR< z{DulMDb-V3_E>j+e^#_M9Ykz&yB>wfEBFwc9yW8SERwG)!h=eDO#usKeSXusfBzh# zls5>GYPWXMm9{Tcn0D%g|0FK88pv_QbLMI>{{_Ttb_}4DFr4u^1x_`8j-@N%dFp+; z2g4WT?J29?n{}$vX$@=>FIg6)E5?^OJ9UNpF(fM0jc^TYdGpicA)NL%50t@H2?1!? ziEWpvN(s~$94ud5;CF~Bo_d|cXqs8Cp<5eRf?KNU`>5(TR!T)UoE+WQdv{aSe4@YK zGJSu?hb1wl>2Trlu4wGD@WC%*AEZ7E%|^tBc9Guk)+9mG8ZhmW!~|WrR(1yQ{?5sH z+(Wj=TMe&0E0xnpeb6yk-APVrTYl>k74qS7z9oL<8`b8}jGDIoo+lQT+?)>U$gNrP z`RS3o0j0q=C2+_QZB_*yvZHwgJ^9fYid+wMJp`P!xE|RbyHH1dfxUdS0~1I`#5isF z=SqrCby`LZ_I|h|Yo}EU)-YY|Xwd)dUu+anL3DO#IV+3Cn&hYmS6RnHp4{e3 z7>f`HHIy*_(mP=OXg6%3G^q8l?3UwwhyMGT!b%=5B2!sA0#w&oWt{s9gEO3rttQw?ZvTgf zKI-SRI`>#370NJ5aTDBM!>#|^7S(wxq_M>G^EKP8Vy*##G)Xh9R<)T&J_Peg)};-V zcgZtG#URk zSwyNZ?)Vwd*_VD5Hlw%AdYsC7$Q-8{eaOE)XbE6S7+EjI*Q-k}7v5fOoAXk?*}tx~ zc=+-It}EDmlSJ^zV8NPwv3m|V&C;L;6S{-#sH)rM;;CA#+zaS6?7_*cvy|>GIz(kv zX>0(;#1o6TZOmCcwmOG+!i6~kgcog=K_ZK@lMgnB!}@aSof;Pn5ZUG_V8!{KP}`wc zyArf@5@p$lYhir7F(r)i>kOj4G~M(GtU`WD~&YZj+wiz@mPiT-tJ-E?-dbRfNT} zoxF!=0UsJOrD`0bv9iNd@)WE2xKyx=hh8{QnMh}V7x_V zna86LYrw}I9ib3HCTj7a(gEH&9fE!28Z76uX3aJwgbo^ zlI+~r&)7aNSB-(g&j8ix9#DsnBJi3`q-Oht;Kam)7*>2+8ihSCIT;TU(2t51ljbSv zi?6BnFC)`n^T1M0SXGcW0srxh)nw)O}$lqhWugJp8vp-qZrTafER-L8X9R%#M=47 zW@8(R0ku|OZ3rblnXnSRSOu=LvI2~ZB5Fj&D3r=ix%>*|NXg}?LkqLqX?y#zv*c(NJ!!;ADG{FHb6{B+ZZp&dKZySxB5bN zg%GCXRNAPOw^Fg=B`>u$cuyXhGNN*uz47T|E!xMZn#WYN{u4P1eYuhNboBHAI;ur> zLVZ+}>3Q1zlg^zg#1{f3J}XC+q!RekX(gT$#%>dvoB5{kp!&Yf<3N}>0nhL(CKDFx@3v5mtuGjNg2aT#yk zO!cN}*nTjTjYOB;Q`DAE>Ui$yc?`RH^X5%q(@3{TAxnz|hg36dOgR@%O59~NlCt+o zg0z~nu&spi{xKyP3#n6tOswdVl8>%Bx{l_-9|s?&3KR{oIooxzbhm%~GPuDM!mm>{ z*I*+v`%}*I-A+6KCO(A!m9b1kmIRAVdg!VkMDAgGSw_926ifOpo$A2i-teJ z(Sb@WRsBsQNGj;*1V8FR`f)U;YCmRX@2JWxo=Zd|UYcgjMTR=$V|;wuwq(ciLN9Lb z)v4yn-BNo&9420mH(T-&v5 zfM7o0W52gC_s|#5xM{?t*t2}YC$XtP{j9#{p+jVIk>{}o3O%^$IeOz*L}za^`=yEL z?SVYN?I}faoQOs17!1c#@kB_tRWh^JZqTV`&`$M#O^xFg7S3^frF8*MNl8&D}g zzLgt4tZ{Wc+4E3zZ#V6wAt2uV`lPzl0@lj-R3dQRFq2;O{(fpc~7*=TlnH4g;)rezh2%Rgf6_Y&uoN*!v zmCWg^q;_a2!;LreDs;jH@Cn=3^CG6rCA{su(=1dDm7nxfifOO~Q^FI1CAj3zF!y8#~PdZuTo7R1()66xVLM5;V1O(od{ zJt)W+AOpb^UWuwJyBaP`O(D%wWdKyJPSgk7LO+6{0-I{mG9&Dyw0k+Pq@mH*)ny01 z6W-Bci2!HnxL0G|Fs6LkmLjsnq`S8z7HxGPy>PoLEF9V!Xx=S-xw zx+*9rkZekpDOIJbOFrSNjA_H&Q-*z>ucTO1!98bNB3fK9|>s~U;2E7g^#=rHFc$Jl5c+v_)pj?b zGwu0ixMLh=)IurVt|;!bSK$Xc&Zu%@a~)XU?ZTmde%MRX$sHpLmy+>FTiEMpc%qJDY6ytq%J!odF(NU;fI+^!cA-~k2lW{Ds z5RAFNjT~mEVTo)GcisP3KcP%X`fcUq-uRai=T|NQB(^$%EHG0)nuu@kTY*MSE8Xc# z`PEi+Au0~p=wZd%KbS8@99&Y80oNa`Imu29h!wim~+Jtd;{A`w>Wf z4sut|gxR`84!{`dEvFv1k9L_lO!!QTyPAli)_6b+(kOe?9D78)}SO@6c=< zjA-Wu6+-RSmR6^aiM2t1fI3q~R9M@L9e*reP$Yk>!hj1M_N}2`v1K){+h$~u`m(ah zMCI-o5_NxUh=zY-dTN#^neLf&V`nWv3j8}MSv)Roicm)aGU6Mr<=nA{K?3Nnp&u}# zzN!t!cb!R>|3dh-u>000qnIK8nXG2xh8Yqx)UWU9ADWH+rA<>4!vcHvINX!dQb+Yp z+wg)X?Tllbarc%K5ow!B^-q#hP@rQkpWfcr2rcp3H;5|KSbrfne=A^5BU|O&2D|cRRy~iH%33DPqAfY@GX{RkIK;T1?jW56ugcC8hCvAF|2OrP1~L zl0CH4S543Qv+2^)-ZBFjuke6UY6SXQv7GV~{SO8=b`6UDuuWBF-7vU-Kuo>%Y`k-= z)aOUPrS$UeuoOGi$ioCswIG@9iBy^8{wC^3LD?Y+p2mq)7W}>lyYi92oV19}b05XB z?AZlehMCR^A)(WvL$Xnla{F4xdG=@8-&ms4$I&-XI2WkIBYIf7?J`TM<=ZDNhFxj zv$k1S8ztF3K=Pli=1qk$TBx*j_nL z2yijA^MKp3L64et+9l_qunoXNQ>ybJ;0?obZe!Hq1dC$1GIQ1^M27(jQ!YRl$gCf@ z3vjE<0}S4&q;fu_p>B3nXzc+iCWc=zle-!N#h&oJ{C)oDKH-`q!3K=3RQ=W9;?Fmb za)yauM)mqY{Z>2(TzO7LlkdBTzPbf{4K_jl6z_ER>k{bJn<`|#7!r7Jq#>b^r$t{P53E#jI=C8?;2~@ z#S5sm_F>nqe5EvTyLkMff#oUp3&Hsj6qx?ji9BI7osC~)aGMvmEcfvnQpU*f@@yyE zGa|}pJ`rS%REklxz>Ql0)11sM)|^cJ*)puKi|eXKZyT3>*$3%sLPF z3E{d+WGY8NvT`GHQ6r<;>!*xJJKoyyadQl-h0!1s7pV@dT*xv5s-L`HFNM9fNA{JY zLZka!4VLM$QGd9@2bt4?Lbf)iDqJ6K8F*A(41A$|(tBZGd?gLL3?ToU9yZ)Eb1lnr z=7ZyF<(AL)VzPC|xvTj>0TH560Tm(%fSh|VO|BXZ6(!^IxL1u!M4hN|4Ta&0m)6rW%}-PUbG!svWs@`xG2M&#Ji}8-ZqtuW zh>)G{n=L!$z_z^1r);p|*(ZKtaWFP-Xh~dk+;2iFg}K_s48>+=A;X`gX98(WM@sde) z{n0Z7R0SWfsA^pv&L_DlxAE~0-0@;RBa|YrfE0pI!tqCIWVA%oA0;lTVep@>RO25+IBy149Urc+lE`MrJj{DusbO{o)Aobz zx){M_1cNC_fN@`r-b`D(!Rjw4v#z4mUmC)IRJEsUST)wy)nDD1HDNW=K@>vfoH^|Y z&p2-eQwV8CyLib!haObtQ6td@1s;UxKvPOf;6-$%^0x|^_3_oWn+rJ351D$t6jwo+}JWV-F>pvZJK14Js z)Y@-q=vl(g)9)l@=Re9u^K8ByGYD!iHhxu$EBni&fWXkhLm%^%p8py~2(JTi>Z?@Z za6F?v2xF>J%$oc^GyIq+MdPNAu$|IQrLX{0{A2SrkgQ9jZ-vyOYIdP~C7~v7!vGyq zVs*ic@a2a&geB0&ayKdm0CMFCgesWq_WEPr5_ho50W88-9qz*QwgNjg^nA9Tj3~m07a#$g^D~wt&%oy3km^OOdoD^;}ONY)D*`v@5a;B{(_|sXJDy_q)Y8i zHmct-~%18A)< zCxO9cNynSl98?cE=YU}dj4{v-6RT1|y(RWg+~>gSz!&^0Dua{OXe4vvv%ZH>>6G8- z+8rpEDT2o;Ts4Y2U|pIlKx<*2__28BF^5wqg~!exfFyqY{0V=Caq}CEZvtuAhd^c; zDaK-ze0#qSB{yvJ@Kaa?s{$>(Vu37$SBpey(L4r1j{x2&4wZ?C389~N*)@nAS=G~! zS%oAqfcDao2G_r?{=H_zYCNiH%z4uo%a}Vxp04XPzA(dmuHC24o=vRXP_)XPb`@91 z&OGz!;4h$5VJ$B6q?KR4-o1Z6bPWSXgwf<=WHg!=HI-`6Gz*n|Fg4G|)?uTgZ_c62 zzK-?yYjSC6X+{PkIi8}T;*j~DZDD}8sNSA$(z;E6m=_xFKthhcSASUgY%^~E}W7RZzFo=48G>Wi-#&wT_HhA9m5 z8hTmz9yAh6p*o(cNkZPU12VeeTLZ-0rGiLB*jp%ltgk{moJ_@vm1Tsb#>Uan>TMp$HV}^mBRC26}YTLg@v;! zrqIu}e=z=rTAGG%>|mOQl!OGhu*P|s8*G;lN~>=pI>(LAH1!@F96ZDqGUA9j+DH@0 zqa#`)4z5o-d5)LHjsPHf&KZ3$01*XtGMOi}HO`XxLNHFSkPPd*%2srGZmzY3#r{y$ z;ZX6(lYcPwU_2G4Wi&A_h-~@Z%|O^nO-;?6J9ly`_cAd=LWt>8n=U>Cv9YnWRt?HD zgvvw`l9q6m&Q#n1b%5v_9S>sIaKn4o1~ap=bWRz(-FLvbEtSmpBg>vQ1{=1+c*lSTrim{{PV~h(WW-pDBg zOZGB;{*;Vq*{WD^Z!;+XNna0i*<|XNNX1E&Q6`7*3qd$hX?D2O4ydO+<>V)nvu4z% z?I)t8rH)I3(=9Pj`2k6B*7l+Q?(VKdUyl3V>Px_KZe-r*>+2h`OT7Em4bP__;RF(( z1Y-EQFGYXUlgZQF3#5?p0L3WIV-D_-(Sx81d3t07R-y^nEm5!Fo_8-*_IhIzn87a+%rY59pz3T*CP z@9*rSCL0?Z94u^Icyy}kz)|c1R{eiyG%E}%dKJ9QEn&Bk>})G@bD0%wC3A2p)>VgF zaQZ-rn+yTiA5^}FUxDE zl3Rs+ymg)nb-U^Kk>n#A%ORRAr9>IG$%fWGEgkpNVbZ@t)Z8qq7*=g z9E8mlvAi%G%gl&GtpxENa(HwOnb=@3wq!Lx`Q{K1b^pufTF&lDmpCnwi1 z<^h=;ma>KhJ*msW{Cv|An(}lSrs^kOVVawPTbrA>SFX$g7a7dtW&GiysZW3#}tt?7{9o#Zo9Q;>~7d3kx%$OFH+iPesd4xq92Eo?STq7xIJu8dZ& zSMC_9s)mmCB)$b&B}Z;wx%2w8IQ4^BOu55!8*?jYsyPyEHI(Ya*WS6Y0r-5Hl%(Y3 zMtY0M{vUkL5;)~IZ!T8IKFrq9)C7iUh0UK=Nep6GpF8?{dN#)c*c}}m=k;#-L+Iw? zbKP33etij6>EyHx{;RId2Xiuha56aT-)tZwq^iwOO>=Q^*~q~fd|N*Oem6}qJ}xTi zs#teVPa{Rr^t4HZ>^T*PynP0q?k`?wD#UQ}@Ko)L2Y?1-XJvJjb7C&Em9Du|452Yf z3@CRwI5=4hr!yg6oH_)W0?hu9G=p5EoOB%CpGyY+4xt&VqrV@9VP76X2d8p}l|JWm zIXc+wN>lW$d5X!3YbxFM_1#wM#CW(R2B>TwQoO4E4dxGc&46X4%jP`zd{{Kh($EOu z6WnCY9~;O;da^XM%CoVPue>iVD!RpQ|9q&}%F@pZ`-3!1j$Zjzv%!(dEv}{IWnueS zg7VqNX9#&F>2C%i$12?|Eq@ttK$A@tA`eqwE{x1zy87Y(E?OKO9_DF660cY37G)bK z!^6Qb?pQvB$mn=J3~nd!p)dH9jT{^X-P;5^kjQ2%XAO0BqM%0vbyCP}4K|pAHAMQ` zS`+ibhY!WDfVaKapXa{6wVDQ#Ae!W)u>X_Hx`A;HqS?!rFPE2*6$@fin9C_}M+VsPpxRhe zaPI`Jv1nmoeC5;x7&L5yBihWuLcn>=NJ`3gR3g!)@fdM0riCpMgOy(yPGAC+1wW7> z!unhBD>ht-P=Dk>+11&(78-w$oRt+oc@MHzQFh3wz}x-s!9gk0cyQ}pfK}}6?@LKZ z`4E72W{b>9OY{ALtFEX}TRwsuSb@Gj=61@>Fk+|&vS?j5A#s|SnVAZK6oFNz*xtb* zuL$rBgv=cREL{jT;6^cSTuSpe85EVPxXye04t?x=oNj=N^Cy_*+JY3v18p9ujvI@EMGN2t?KcqAdVMocal!qoQ3om^LLMT>!4s99 zdw3iLdmceIJ){=B`MVR#0}^RNOaHO;_dKuU6pf`a>IAT-(v3JPki zL>(awqJ)qG`UhT>O|J|?ric^q)L%)mp**QC5r{o#Jd4=;{tki0n*iRouG}@Tvnv>t zI1>)JJq54L6GKBWEO4N!(=pch%TJ>cFnm?;Ugmz{;~G_aJ)qIKdgUe@Q3|pAQ``G~ zLtbuL*}Xv0`NU_JT3P7 zF0MH}BBcQ!(Bo4GGB~L{H#6h2o0ikji5ZpX;rzGYl>35$!-IqTLP8_$?KaE9rI;&1 zwN~HfxwyIYOHos6Q_WDgy%hb9L?4fnR<)O>M+2)KGi3r1L~HQFQ*V+NbFhdWao!Yi z+xa>y{S*Kd)7DQAOiv%Q=;Av#vX^XU7_ zbE_}Dm6q;8)^C}nemxaNH`e z!W*kl!JNtZ<@5$FLz2PayhaSSmA^POq!p(JX9n5Buhv%nShx{a27#%%`T^!E|7J;g zTW1KFqwtHrPpCs?=jKX14$GibUCxm32Dl1vcV`o{*iSsEs>&0>00tvTGQXgjf)N2? z$*1N|whkJKkkG~rI8jijhb#VGgAS1Yj+8q|eOAuuahYl&hGwjjM9&F=34+T5D#Fv6 zH_|kQd73%8rJcjWUz{XFv*9KL&*1fTCoe)KT|u2CjTt$BRYMo)Db@SVh>9DP6@Hk$HW3rTeQkymo=go*AMF zW+C4*3D9SVecQA9Ps@IU5-+R>MQfdG1C4w*nmIk?e|$s)(etNI6CvXu!oCXMk-;5- zb6i~ff)Q^L(+N%w@d(0cKV+O0}cUcJYK!30d(ts0gJ0CIuK3 zIk5flH&Ai{GzM5Kn{1c9XQp@T!p%C5 z)c}K8Jq50RK>Yi7C5G<7!J7efqi*Xh5W40dA+pjfv!4T5g>*c}BwbKXZm^~0|GUG{ z@DSEo;p)Uf0`esTImtUh(6b&B-W8$h9u6W9cnMAB zMXfC@xA;)vE80wCphM^Qvwr@^1pv`{>B1$SqT*s~DvTHk1|awM+Jc(bscL^jocbOY zS5I#*WKiM$6elMknggEjy)+GEI&X)(%zGd_FZSP~sy#`Lyho?hnEn|#UnxXlfMkHu z1J&GEj2siD|Fhc;2Sjm&bTS5V-{S|osb4lqJn4jc$hEfLVUa= zvhx5qL7?>D_(3vjcQNoQ0--t3-mXMLIJdY+gFI*cvTJVfS3N-fTRjCui|J!vxacymF5SljR0i6Q_Hv%PneSJ4`QsDB>iM8eLMtbg&Qq{_` zS0n?VW^XgwPDNGggG0K(mi*)2mKzwZUxDna*N6kIUwJ~6-UWqj3b}w+yxN9O++V~NFt*;w~YUk>f@Jt#CgiMhSlBx_o$DR zXWB*qSBnzJcIm%^8jr7*j25X~IXx;-7bq?O7BDFoFe#V=MH>O!ODDS*fHWbYY$sOY ze`frCw7>r&@TLk>0kEBwJK}?DX0RE7Wp|}uR5o0=c z3UuENY!f-wb=$OF986~Lub;R#+y-5JL5d1l*@vu(LLXGzQt8=_IdFDvYB+}w2gpI5 zX2+)$ksSBQT0Qz3i@K!aw>M%`6*&L1XzGx+1WDYawyH#RPww_FIRYPHjWND$22~2M z*zn^dH)!o{=6&&WUS;HPkDBlM_-TyT?W&#%Q*iwq*|lA_-Nz}7%_`wW^;wG^b^h=E zB@ql#9JVKk#d{SP7>UlJEE_>?uI}uVME{0Ks>ZAf24wUM@Bng~dObO=I=N=N=O&!i zHMi-aVRtpMnM+uBw69O2K|_Tgtt*PW&^W_9_%9ewB?9?IBvwG6T|9($F^3~w_ zI*sF>IgFv9p=&Q|{zyMkcX4%H1zRC&0XjxWQ!cX7W zn+o9TO}6A@A_-;F%lD@k=U1mfVPw zW6D2j7C>+tNHgq-YMNq>obtH3`@tkz`@XrS;nuOcxp}e2q043h9lNXp1h!WhouB?W zNwdht>tc{ zu)~x4TFV2Eqns{GGw(N_#rpS}HY$8WnkLDKs{I4qWo74|a^~t6jDn_iiNndFs+w6S zn8%GVr3NCIDotjJT+2#@NaEr>AbArP8fpYRi-SsNbKiG5&dO}aSwj$vpoi)D6>KP_ z0!Dq*F8;TaV3DI8xc52}@0sI7#jkX9ytT|*lCj^qeWaM-_&MsiwsBuuHizvivlg}F zRzs&xIuED(p2{c7{W2u{^rLq8JKQtSLN4O|{`-VI^$iuOGtchjWwVbsoJO~~&J?`4 zB(Qq(K7VydUb>bl&*$*@kxyIZl+p2i}4Yb62U`-C)aBrTwet6k|WAZ*kIW*E`hy=Jad7A6a_4rEtVi4b9jM??T1mu>OC_idTKJ_~iK zHt7f3aVJY--d)cH7AJ$)UWxHqqb?yk$iG9~WtJYvB{M;b!yXt%X)dxVaS<$A?8j^D zVsD$8WI%`~7sR0MwIn7c7JGYiB^-Q72Co{F{57+n~3L|x<3Zz(BRN5 zS=taQ8M;v`Ar+YyW`3No_N!Hr|&A< zFb_&UU|>{~ef(kU%;OwgGwzt890{H)e{3`tZoVHu(XO>we)|Qv!QF?Dl+e=BIy*Zb zuTmaEvRcSSKc%gsGv+*VE4Po?8yr@f;QB9U#W9LZt-ip!#ZO}mznUT9P=1Yfo*GbS z&&?iZ(g~Fnu z#)Wd-9Djd*=uT9{F7jO!WAIl|RlUH+@v(-`VEww=``66w(7Vo2e+xjVC(TL9l~TxJf&w0qlv*FVvyC^|T7 z(#N|I9v!Y&>y}bm2d{G-E%@NrR_3T@J+9j=J6dexZlAfI%wX8Z*3r*^nqbEyz+Mp^TLtlOj7wuxo>NWXpq3mpHA!hw zg+-Sob{FJh=9!qtHF9qXAMLb3pB0CMT)@$CIa>#Qg-{OMpkFyyIc6o2upCq~ybFKx zg_nSUiz_Kf%yNt^Gpla2V*KT$W#{Em>*Lj6de6fJDGCqGFZ!%pREL9=7i3 z^*J;F*qQlJ=$ktH z%vh45>*@n4Ms}MBK8p_bNbbVX+x?@SitWtp!>x*KeOOag)j_j$)sErfK5~$TpTF=Z z?UNgz=EVf#rXu@M;JmN+k=dJp(7W!qxOmqjx-X|PBz@E6B{ys&%zNJPE5OW(7}cX- zC=SPA6b1+kHy79D_BONshoWcp@H=8&)Xw+(SRqBCd0*HZv3pP*c&zqljQ@UMyI1&k z->3c#9)(()vu?%663HcefB6`31{!^L^@H6ue0xXXyTWXaDvASq44+LR+UaY z_~84-q>FQN)a-6u88RorevkN5)d;-UM=Nx-woR_~OwnZnBfGzPj?blh?Zp#TWCcS7 z^?3ya+l%*-M!(KG5eW&AJvMMq^L6qC&zgqux_{uV>v^N(62`!E*y~csP~{OJ>x%-H zg=g2dDK!osKu@K;5U`jZ|4b&6ZMq6n1YbVmL}Kh{*;Hke`oBgUNGWkMq-JCsw2nZK zVZ^hOnoFLw$>4yeuEqF|4WZtj+R=ueZh=jo%i_=e$;rtNA3ltYnP=43@=E$!QMJNI z4A9*itIjYq^6@3`6Ywm@b?ir3Gq#WtuYMNv*#dmYeZMQG_Y%#B&dkh!f`r2j`K;}m z9_fSzYMPW@VAy7@;D3QT&-U9H_DXPnAR3Ipg(C3DGPd5F_N@7x>tRoiHy^K;;*f}X zJIaueapwf!1SFC^C{^y8nWZ#+NBHq`UM7_^4Uz*kMn;t;xmLZUm&bMHyi(LhP7lsl z`S{e9yBy$%Gk?+(a4;})Ka;6`QO%Q)(H)YKZ~J4liDZlu3U82pFS*(O7V#r(O-&sB z=K4Ea2jMXS^&(5eWcT=NJbSd#AVDFYb3$`Z(Y=T+0YL)>?_G_nsj{5A@>J5VZaQ#> z4Jq4g+hnPC%nt;zL>KF&c=X=|H5Q4@458W?#Mv2k?J_8~2gQbkKCP<}&8A0Nm<0E- zvKoL12`>Lj2Z`fnG(44)hA$3$ScomT-SgTwgnoOZsG%&;%^ErZ0RbRjA+w0J9k(Cx zYab#hBqAOK3M(cC1{^zP`rc*8ehWWuW!^_N(>$|$aF8?9qQzn0QWJr{y8O;a7>NuJ z4k4Sjh-K3W-N0s6TvF22)rHUv(q|)AqJWz-;+EgB>0M8LmtN*>+ped-6mJ$KYAc=# z^&=7h_DP_dz+zJavRKvMe>J0io!>@QUsDj)$a-iS-p@}@S01;sN8F9M&bqwhL2|)~ zt$L+*PrLvxokb4Of^Z{pEZP}5jMN(uwMUXR(LsV_a*&QX{ZjVZsb43eJ|F;pLtj1X>d6PU^lkf>C$GRu-kV-h!li59=@fjqMp!E?KP}B-df+`GEOfGT#p{YM z8XETX_F9WYoR2!LdFfKJgG9$88~0`*hrs4hbF0@SRAWKEf1{06GunSnfGmc@wgti! zed5k!?V%V>`wjMIeY$YWnIX41fhpcDBkP0k&226mXB7N z=;6aQFdJsRnB>BMG}G6~%b(tUV3K+b0qrjHBtu*@w6%+q8*h}`{yM26r{>!(Vk~D~s(CPc zIGRHtno~t0Net$Gu6*xq8h7~W=VMOwbacQ;M@;?hm10f-8?d|58<{v;mBD~*L z*Xif$?MFZN_4T#2Aw)s<7I1}M-M2n{`gFO}$CGfr6)lJclYjH#K_8*hYo|$j+d7=FH?vt=WhLkb|0!nb(pB6soBBdR^`KjE+}+| zY}!=Scw_^l9wA8_AhOhHpNdVJqLIB*T)i6{z$z|ZwC_rz$vV;b1Dl_>(p>OJg-TC-Czl2Tk>M{aTx?6Zh{e~@ z-Z)qcqwo3i=LjGS6wDQ#9bX?Sg}9~fsoysM40wO714$(x7V-8>{fJ=$+oh-%`SS}; zwrzVm8rd&T$k2WK#u+9Oz$K-nrMh1wc&mM$Ey%_m!Pj>UTeJORE#Y+wD&ua$O#w!YW z*^$3B^oKc}_{d4~yzqa~fcG;mtojq|6I zS3{r;;5bV$UGVB5)Lt-uEGjPP`{a|OU-E(`k7BIdYFb(gqe(G>x&@XHtp2y|NQ+6df47-+1%yy^#X==mMjvsFN4 z_6)UfWxm)oAM?)kj{)tIs@ZrSA8_gE5`IPtNcmy_$;r+Z!uLA!JoA@Zl~+5M}buwU3cp zdrCCvVo<2}i7{?qWeb}@3gz9q~yJ|2j4l#>f1gvxC=ln4pJ82r(fVCAB@b&$>Gz^L}~-4bks51YrUn8L;-R!w{&&e zXJkh1!G+DzD^gced*Pr~1PExhsJQr@+qZ{@hKyymo8KYdEVABz_Pv$5#(yk-H};c% zbS6|h!m)GZ9OdAvH_u;KSg;1^*tne?nzXdGF0&sE=&=rcErD$P{L($0&r~DIMp?<- zp^i<_l0XQJ7lTpo(n;1P7)U;6t`mFhMBq7HG7b5}?QSml$JUaP#IJ?>9DjW{{d5A&Lw`>F?K6F#kBu!gyD8R^T`dd z2q<0@C)(O(x6Aj7K}KG|vbn&GV=7e5D*5&whN9pempc^Vz@ioq2a>mhj$8j<;=sqe z0s{Akg0%(lOW4d6n;kDb4Ll9i}GHnZvIw(8f$$&*1cxQ#+G z$T|5*+b~}z181e>^OOvr83Elwe|JyL&XC>6``E?@~rQN7^w%y z4nlbneQ=weo(A?~Y4&@4)S31y!v}Cv;C!3uGhI9C_U>wih4dyQlJv5#xqmW$4U0S= zL`Q`4+W{eNufn@()YCpeD%AaUp7P$-O)&)pUS(w?nd0G<0+;Q6<#in0Z7pR#u%x50 zoYXFn;m!Fp6r?9_;&EUuFU`I-m;$Z^$4{Dr1f483U`oLIpm#_J1lD48ld_^oY(>j1 z51gIu3_%N)I>^ws++%obWg}u|7Z-WLFyw?`CEO zEDdG|gDe~*z#ibLt!|f91u-ZP3O9@NayoKzauD<|G$E!r`|RT&=|HJzDwl;B7V3Oy_3bp5*5ZP6iP*zHH$an$zr z_XE8}E-26%*ZH~a(W6HYQ%D;nq6AYW-LO2Galn5bGX{N)2+!#3k{caNGoY?&lYea6 zvEP$r(g_C=E$c~O4_gqfD^2^5THQ!D-rIVvgKOu}`?pF=?NmBWA5tM`71f0Y_G~$f zif31zy?{SJ2=epur^^0hw&$}w#^H~Xf9v+`Ti9u~Y`Fu4WB!TPPTVNr0=NBeLpTz= z{{BcGsHv&ppd$QShpSEs-8VmP*naW|;DeAboYsT-4qBcOdtYmCh_S2FE7czCSC4cw z^QJ6317I@yokrLJBzezdc!V65jdQfOt&Q{7k;bN`2croD+v=O&)-`+J?f&e4hwxnJ zS`E@^WSc$%U0tQZ^s_)K0{{%C@!`XVz+rICzfI|sSl3!n4{!2{x!tQdoK>iKAJ;j; zBVmrD5l#iVw?@Jlz-0k|Q_x|l6S}D(^7D94`Gd9ocTa|TsbNMPMpy+f!o>})E@NW1!SQu3?6c&Y9N`qE#A<<^ zKZQ{c64u$=+Cd`TF*xl&ts5X}Le6V|p6Su}# z_(ovLHha)C1HT9O86bKn<^8>~O4ww;tN=Mb2w_D~wVi7H0pM(;T<*R1V%!b}O3WP$ zq_%Y}P(?e*#`^~t5&BSWU;0Cs4|b8C%o$~YlQ!3=8b(2rC@-h5-MSOqGq>A0JvstPc@z{`&7fVpc>d^Y?Ot^#8>I zv{M~8v=t8J=g(3g(Zq%awC3iK-RbG+C+$a?na~J~l^tYb$(H-nwn^0)v9Z$QvKSCa zpq7=4`tOV_GW5Vv!3V$x$`(tA(48}IWgzjziDD=tBV%Rt<5rWTX>LcJ;LeUJfH@!7 zjNNmxwVlRK$=HUQ>g{Mw9D=j4>##w0U*7{%1RxPPVUc@Fmg_5_P5k^;S6APAGihsR z!0j~{*YN{4&G*;8dQUjbB{zK!B-6RGUB>=9A9-wg_0JSe-a6fx*;xw3d5t7$`GE-=ovg)7uKfZg^KB8e+S) z^6SsaDt4bRRo6?uLgV!Y!=#qFD<0F0hGj!xiz(fu`=qO8{;a?cEAOyG$T=%NlZuCF z>n5OIefLU${3vbG)I9rAr0VyJ=Ou^ikHfvp%b*Ldo%$66qi>Vd7+o<1^i5E*!n+6N zt$^{sPQOAKj}ztFcr+hL+-O@gJ z331HJ(|9-IM{G2LYTL;P9uQ2l((KsSH>37~hGnI3aSvm+`73m@ZS>S#_(n9mXw2xE zIjzE9jXe(-7`$*TwIEQH+vMqFn171^!lFPn{vsRQ$kbG**~G|5a_L79h5uMcigv;a zy&A=j;GCnD83nP9r@D*9=Zu6Nd?seWsyxggH9?dV;I*GmXgZJN zRFTGLi~1M5QuMhR`8cC&SffcvkDzm3U0vPCzjZe&uH2-2eu8ZK+3)g}YuR|!^n-&Y zfc8@Q^YQZj2GE&%!?BRA^Bb~0pAaTKc1ttHl>8R`(q&j6dU|^B)_^(+4GzAX zp_MTvS8(8eYyR-=t+d;>^Zl)g5T5g-`3g+H)81YDm#zR*prk|yWvbW-I0&GV_kH^G z-s;kK?0r^Wz+&X0uLy@5O%`3LcnB*WX_`IT{na_G=^ox;B)P^9RG(8mDd1}G(_Egn zhBXbRZ+M0Lv)FTp`5>8zgX`9C0}nTMO;gih=xc87=D07)h~r?F-#-P&M2W+ANYi%~ zK!f)0_dSlVNy?6hlg;rugZvPwiV_eBs!w01BE)m686cb-8wz2KWM^lGnVw0s*?wv< z_9R+v`aQG^K=kwUo>Ir7!p7c?E^w?7Vf_mgzoe&UncI=G$|L)jEj^1239fzqeCzt-@=&LD!3e0p>ylns*}8s&Doe}A2OWTh*>uyqB>Ze-AIAl1)z*_HiDN#>W~pDti6t(SK{aFiziq~q?GFO zy3&X5=C4n!*(B~MCe9{9;fv)wx_j%)FDQyw3;%7_0ew?_WSbbRdD3xNUluooj=t&D zxu%i;=Zd{|`uar8aT5in+k#)%T^-Wvub*fXcCvJH0<($k6JaDxSA*U8f<))TSXWa+ zw=C*%#1?sOvxe3C<=+kefH$edkUo|JX=yu!1|jh!rU}oOI{&UNmv4+=a{7(G@(F$c zX%^U)jpBWg6S+WQAiI-Sw9fX`GYW{7wD`DoPd8`BaL5NIQV;6Y{cgSOW0egn)zvv3mO>2R$$EH_H-v5d5B5ru7XA-bLn77w&mBOVJ1MN$ z@f&m%F=2zW03Qze-8|M{Q!2gZr_Ki9b6tvLN5?=%$BXz2Yu0v>D=TG<{ZPmz(>xmD z-%uYh5Ys%fywkYg3DYUzLz45bhWP35l9ru738sT#sFU?)dA_(y=EhXHS*>?3n*B{JmB8v*jl#ebo!pB36Y*#k0<0!|9M0_Rz~+*gALstb z>}`8R&tpfB?E0x;tp@2_dZ=0+ecqE3R$THn_WPYqEd)n|nXy^`@&Fu2C+jCarXT*} zs^d+JDbR|vr>ON|YEJym>GCCmWy#Zx^$~;NWB;+{z8MWk)^(#p*JhrjhJ26lxW_gx zs75}NW(ltkm%g~}eVbJx?etUU^oKo#PPax29tlZ(a<1*dfA)j#d>7JvBUODgE~_Wg zdQ}x^v^3`Y_dWE*kJ}+^j(BHVQk)&1!>R_gE&5{~RAAp#A=kq9+2;2lgR|6XCeD|7 z=^YzAzjMyLkB`OI+5S$or&q3I@}4UBXYS|hcyo)FJhr~lDfsJypT@qQA0!oodycfAeTHmQ8EDyZZqpD(H&X|ej} zdX<%8Xm<{H4Pc|-K?Mbe*&U7-F01dJPazhJpC_H4XZc3c?_=5*roP;6k?fEV{$hT& z;F?$h7q_P|0^bx$W)dN>BT>ioc(^>@(VHncFWZ!bUP-#tsimxB-GOBUr?fQf<|pr# ztG2|i%MvvT{5MU!OSKpa&{kt6#zLL!yr=T=c2(ssn`Lfo4na>eD5!kOBH9-li_Oj? z@-!(nxkR;6cw&ukppYco4Db9q_ci}3O1y#sg>j{mZ76dmo=WJVn1eLR2xYZ12qc3V||45!hySV29gS_R&@Y^s`Gq$B5juv&ch_ zjf)l_$eK@W#>g(Oo$D%NYu7dGAwio{tXn+@pI@_q0o?Q+oDWirqxU47;Mbtr|JfeQc ztN7sXC*ktT%2v-e_#Iy^-V*;$mgwM~qJGz~dsFTW@vV@KW_No|Yk25BZwBXXXtahe zUr-A|w6yMuYC-A28JGLl)^r-uB%(ej)AqzgALs;8K+P#Tfh_|ydy^jP{f?tF(JYzb z;)*LBYHUBkGpsc^`Q!E<)NLVo8L6G&9MB+{JNSSYFL!cEn%Xdyhrq zM*qNgFtG-f{DedtNq}PUo$VMK<}gb$(y`YOBay=S^F1Rv<3T(`-&e*a%oVZHV&d;z z(I{N5wByOU{@W!xyW<&2Z1Yj8nk%{P^G&mc7J?2cy~|}B+tA3rqct8n z$WDf4C%LTbl;bPFqS_>BNNjgXF%6xaCb}0?BUt9#_0Q!Rth5fzpARKR*{AhfRmUL` z)WNFtL^|+BKgX^0NxFl_Eyg&sLKw~zq^j_&5VE~RJ*+T=BjGtY81taU>bJ5^;+f%{ zdA^+d#)8=8_dlLwIzLwE7T%R5I->N|UWq=7YeRatq~61qv8}H^2OT_Ury7~Fvj|)O zMO>Pm5yfXK8qS;xRQge{fi$9K)Lk&jR9$Y$_o;yOiV0Dc{|v`T&9;*oq^8~$!kso7v^24%#g5h2BvqGP!B2$j%oqqJ*koMa9~&Pie1vNA9=#9k-b> z&|b5=3gMeRUoy!YkVJ^$^3gx4Yv-aJdydBUE=uZ-1`J7^mn|aR=C`XO94e!wv(p?m zwG=iY*YV@FRocUXu^7QDejWx{k$p=yq3CK=Z>=O6vNl+bUb``gOMMUvGbCH@E7Emp z;L{1KF)M3YQ(@I(VfJ{C=(PTar8F&|4S#djdpi$74Ybl}BLWdwQeCP9rv@Y!PPDcx z@sCaU2LIiYCZA$`>HrHECHC6$GSCop#;D!|!U1I@t;Lc2+CLxduYI)GXkq?LY-loW z#?81XdxFc)TPyvNnxP@$>KF!9-uZ6zx*48% zB?qaW_&6Y*Rl64gqgUBD+@`25T620++!nK{xBW*-6e|&MKKbc2OJcpwfc%S$1 z;U-J5>gAvg%MbPS5fjgN4m@x?fuuKIR;jwScJ)+o$`#!UlSLEHT1yj$Z_Ik_auO`b z^4Y3fz#Vb?UY&v&7xJR+jV3_Bd(7%T{Jh76WA|1f>xy|m1jbHZKkEIpkHrUIH363)^qE2-sjWP6FN$-AWJ0H^wX0&aO0n zpM8H)fPu#6*|Vx;jU&4w(?9V62;l2`Hj01$0Inx`{>2#i7AoB3v3Qq_&cm?+o=>R|lfg2Zi zjK$uFw9FiTt!10UNXyK}TQf`*eC$yLn+L~EFy}3`N7Y-eeq5_wb+NJ-`CLq5d)Zy; zcncVbi!uLveWr2H+MoS-9Cuy>`>mRemPC&Ibsa5O8sZY807Tzdd z@HQb)o%7-{Tf~jcwQGD_1Oa03t}Rf4x3%0 z<6l)b4%yZI*aGNh*nQ6~#Pfz^8vHLA6()DIY>he8xbNt*9Vl%z1iI~!EA#P~X(~qo|3D&#`5ruwW!YOAF0zql;MX5A5JxCc&Oxm> zyq{&IdaC>fx-$b2K01=c%efVZ@#VwA^P?j4s!|cI+%uY(NP5@rmzyRsFto>^z#iu6 zOvuxyL+^tIl!JkVuJUh@rVXju%cOZLkEcl^+aqFfab*?R0#p+RKIV)oDv~hy)?m+6 z^0eH;RM~d*)eeX?0+Y@=rKCQnM+>Pe2Yj0ffs3sLuAI+1g77weW-=;s)%5Cm0bCameNNK*00q_m`t?2?uVQI?weX zE>h?|LNkFGQ_QWEhdaRht^Nrxq%)z1JC~$FxK_bCMKTNNEZ>&;C;=e&@S?DK*l9+9 zqVJj3(Vfb?6x6SHV1PSUGM!cYr0xrSc7+@SJ5cfJekQ_R{Z?MGt(rr;2yRZH4LPIF zJ{-PhY;w_1a^G5-%iACJ?4Fgyeyd|^b$8kewNRuuY`x2mWyEDaIt zQ~zwU{;k<_PmUmH4JmeMZThhJx2B%nG6XkWC!g1)JtVxbphIT(hMIU4GlS3Z%ZE7< z{!W>Tkzv`~Xn9lajOkX+e`}38UtN6%;+vB_v=^{k+j;`W{y`OtImaagqjADmSfs^q z2S!cVAK$j=(bFypOQfnpDr{jb@YMSvRu-vh+AzZ)4}iKRh!W(kzclGObTwpCW^a(K zuCd}(m?yApKu7p`(CUO@Yn-5_wzgrhy=S|aRmwJt`$-QDZs%fQP+3+Uc~)x#xBx+H zGFE*`<7lcEF8&A zC=Oig1vL=i#0!IybRUz?QSRd%;mwCT++nX@$=7kG8@RZj{3cy)%^Q2HIYMOq(U%_! zH;JEByoi(muMi)fJlrnJ94adY-^a+^`~tkS(9WjyxQsq8ElJoA9$ijF#;<@ou-rEG zxgg-uVLy19Yv`$X1zp+C*foie?Wj9a>~zhd=Zf)FA8?cQRYvo*kyk zGv>p|JYrwJ+x&E--pV$TU3jN)%$10UV!g+uHe*|0RJWc+;7B2niM;>Z@?b&V-N3th zNF&ejltM&A=U1@~7i?!~UCLEfq#NgZWUtoki=^+^jtni4OTv=WP@7vs6d#mThM^Vo zk7XDhuljp6wy--mT^(lq!wx0?u%Y~d>u;<`SRT6>)awX+sKx9nbh>Eu;QBlCMHno8 zu6yZepL8TMko@}FE~_~>Z)9ZQLImo+D^oUkcFVHoH2M&vsm(ix0750@j;0Kx9AP|JnIX?2~ub_FAeQH##WI;KVw- zFZKa3F1GJ_zqZy6i`phdSqUGumg3x=>9aMylphZmZ6QJ_ez)$$tVNvk&?~VL&S_Lp z8NqK{dH(i+^H$!&{mb0bC1@idoj{lyF(Pi|clt{LLo<~_`NPEs#$?_7m6ttK#!jrH zW&VOP91D4=6tZ@5m7tSx?VwOV7=>W_?vzl8+8Sl&E;_%FncVn=e~K2r??#H0?aANE z*c=#1iLt6qReh`)jUN;BuBt@))dX1CPU1NdCtz3v&M-fi-(~js*0;C;$Tmm9VLIPT zphIkW@+j>D9`bucgo&lA!tIJ1~wvV*nBIglNZe!$%sI zpB(cIVUH5jGH$lo!P`r8u(ZV1aW$iGvW6?%PIyn4Zz3Icc70cNh_^33`r@I3vCZh8m#63k@QANncZvJeSs9@@|pfd<{cJGE&H2wOu^FD&D zo@7E{XfWaqe<| zocvRACPN9LA7Ld^^gZVfj6vm{mhR4N{@`j)k*xRKy&|g2MCVBg8;QgOn{%>f7@_N-ZQNkuS?F)bGd;4~ zjc)lmc89!k_1ec`w+2n^+54F9BUHUC8?fMiOY`yNa7z9J-*q)2a;`)&nO%;JuDO^F zKa*CP5y-AJYQaBT%P7#7bgeYHYxedU|G_T6k>Yv6#&;blObFRRb~zW9GK#vD{s10T zV3zZ6^A5c@E@UG9iBd9R!8pNc=SDYB(zlXH zH-J#14&_eN8>3(qdqg*owe~wJ|9{&2dl5T(WfZG8L734%Pkc2lIaz^PNCel4yQ3PylQ3-m+Q1IQ^Lng9*~jvM(nJdh`J^5n$q5};17S`aEE&EXPQGR(~_?5Z)R zGX8c>j3^d#16W3QkB`ph^`vHTpyCP$#F8A)-|g;bhtzq@c_F=UwrV%D8$fGTw@V}u zOBkPbdTsFcO)Xa75W0D@*;4F8lq&I~5mrwYYgB9lP6c=e(Jqo8x_9o7>Ee*2HWD@i zv4Qk20{R8m#X+~bz;O`6$I_F*|AR|v9fz3-*o_e`DDKw8=hCHPCr{p6{0T52F|?NN zU;y-rLzN^*LCnz@8;f6IzaWT@h!Uzw07{W@Mhy!-qjWUx0O`5W0UUlM9>*~Qr}g4v z;Ant}2T1?xn|1#+*^jCfU~7>m#!W<@i*YAe2zdQKI{~&cG)UxXF^9-gGH%@jFA5m- zTvOkVH`-7~_4i2>Wdf>z75JMG#iQ!u;5j)Jla-$Slo5JbQEuSzZsNij_R7N#mg@g! zq126yjN~%E7JwFl=!+5KX^_ztZ!(Gls1j|ZeZr0OG2s0{X%8CpGeYJW)jde#As#RJ zw^)aigR88sfpP{Ir18l~+%XA5)g<@wxEH?R8o$|H79nyig17+4&1j-CweK}}u3?FQ z^2BW$7&tJcA2AkU?-<y zsJdn=V7yRg{kO~Es1*S}PoOPsw(kN-43Op|zkE>CY#yDQ1WhXl#}X0}=6a#MrSAX| z2K+XIJgPfTRzaxjcm_`vdX(!Tl-ECf7fwEq?7$qfGZNCX8MQE%3oOH%zSlQ3fdlbc z7{tOHcgU*&)OF=bY)+1yrKKf`aZoFO8;-~0h&jBAd@XRKDAWctqQ`m-NO(_7jTtb^ zX0I;sBb)<#(3Xws5!8vq|41jx0(?!$m45{I5>P>cmfFy8KlMhUU;?yuT4&dh;{Wkv zNdzVk#k64X0|e@+kB?P%p)KN+1lLNjAYT176S$v~K>cDqP-6q^CR&ohx)Z`DNRrM( zF;hb~Drz9+V9If)1GIx+@WZx&-lBAY?JQzyAS2ult{OE4b~ym`iMj($u*SP^T89M# zpE)=35fPV8h1@&7(m{(t%4=-NNOC(b4Cb%g~GqgRwymCKa9 H^6>uv?zFj1 literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/moe_dynamic_padding_a.png b/model/train/yoco_moe/sources/images/moe_dynamic_padding_a.png new file mode 100644 index 0000000000000000000000000000000000000000..c331ee0729e06373d9d18aed83f31e75b889a638 GIT binary patch literal 43548 zcmeFZby!tv+XuKUP!v=IQPMy}Qo2J7y1P-jyGvB$00Pn-(!He{EI_)u6r@|&bj-c* zobQeK=9;@Erl= z85Q`?1uGF%TNDcK8}iSoa9TVP_~uo+rz&>xmIii?&u#Qk@)pKc&u#7WHHq5CP^eod z(I<}-oZ^c1+wq&fL{CIHILHY|+Mn zv9XdefYv#vw77)5SDal#LfS&SYN=1G8`rd4<^sF0ty#oy#XU^J9bEGy)tD468e{hW z^aYI{r-?Yf63=W4Z$-aL4W=IoF`%r`))^N0J-2B1EM<1LecPEzM>XOe+%obP+0SEV zvOiy*_33tK6ZMtz)QKIgWX+Wl-38>L)fx=SJ&Tg33&KP1wEb`(jees3LB?p#HI}ki zR1968qgRqhh;#1T=~Lc9ACen~A4LxQS)My7$BxR)-4E%zur@xF!J70T1< z*Y~TU(@lb+F;P)6oTR14=Ghyq(rk(<7L)5cl|3Qi84R(__d>(ODe)DR7r8Qoc(hm9 z^#_+NSrrW%Iy+^$>Lkt5)+_levzgjls}y;ZSon&%_rs<39h6+S5o>hrF8%s0=yezfoS>T{CNhxdn7X^ovOJ>}w4Hb)+;#EN%<-`m}HRyXG9>FFUR zCZ-uqA1PQm*cZ#V5?LzCOYRsjkYzU!Wgg){E*3SQK}S|);Obu4IK|L*%l0V+cz=$Yj0=oe=ZzQiAWQnyfZJw+*U|8kQ&{SuUVz18gZS7XxWK2&$qO=SfSXwrI3|_ zBg2YQgH^w~*vpQifY85wX~MVuMaXDq?u@;yt*y&KRz~v+ZqK9Y)Mnq7lHo>J;KDYE z3#7hJmIk8O}W3MXR5lA45?zR%F;A7BVw4vqYJ*yFZHWf9=|}t#zGYGYgCSIW`!SrB&t& zT1DDD^LDa4=x_-Jb4IvZUS6}t<%6o$BWg+NsuFvKNLudN_&UQt!r67wOIYDzX&M4& zSXfxLYS|kHT3aRR>#}Y#^kFXR+e zg>Hy^cYJF1r%#{WzVO_e&$yRGy@IaYYsQou9v+6QND1c~(4>gIiQ2Y4s#CwSG99U1 zdhDsA^GWI`s+8%1d?+1FiPnCYlmMGpD1G~zlwHr+FzZ=%vF+fYi6*^nj|nd?FFz(_ zn;)ONl9!|Fnua3M;^bu|g$k}NUW;MBdE>^%iAFcPRu8h9H&x5xafFAViOY?#kqOvr>xsL zI!3~s^Q@H3?nbr{N`}L|bZu@HyUSCQ`APN?{XL3DpCmlv;CFKVV!7a7Tgxx%i2uZ3 zA<=ekwLP9E$Az(oEX&O#C%Vu$M7ABLRDw!g$yq;ryMN@{w3x(A5{hF72X2alHl`)g#<96<;Um!~_MQaC<>7l-)sy+Gs!O@4cKOH3ZN1ip zgi%F8-MP07vs*^PR(#|$q!iuE=^fwa+h=DF&9*=II%Ak8ro`Ysp z@Re+n5cwH~jP?gPAxm$>X~MJj7{2z|#0C;y!dl*D4T{c6?jHM<=-bFO-E%;{$VBM1 zsQ8_$Of}0rwW^0{AbZbl#WeK5TFQ{!6Fq_}5x4j|dZ2vl>{lzQrnhh3`eidnzUV2A z&LYv_s~q7Ex7b`SLA&S8S!Pa4l^G=RWL!dHw9*{=*b;LZDLh9# z0){9)>wL5zd^MwDE!Cm98u-0E)xE?~+QLo(^AC{msi@!@F#gc~t9Nu%V@RQ2gi=oa zUc^wK`j@Yi0bhQ8G_?)vP;UPjPApl@^YiHf{j(E5&0aPOdPMRY^Xx}T?M{IOF-c=RT|_vPJjaHLdSV+ z)D)IAbqu+d|IeS#2kIIr@8$$cP_Z#FefatFXL3jQNI|jePpd(D*Yqz%-K%DH%`Pb~ zvLj!Xbct?e`U~Fr_|yK<$+k7(fU^&ZcE}1PK8MShaFaJex1p@0^~}dup6;ej$LW%0 z`mDVWES~%~g4=nk>S62eiQp^1Zl_pXBzrl^&)AlGM z^9l_q<$MiUhR7bzgGNv4$d>t;w%AJd{Zfn3@(&#X#~M&?w0tfUd#9z{Gao6Hba!_j zF4XsTadqvRm_Q30_6YPHZcm_>N6KgkaGc4g;ND1Yu$*f98kOa8s-=(){d!K%OHeSc z^kU&q?#%40{obZ=UT&_Dl~w1F)tKk;;lliUFlBA;ODt6_t^S3DY>TnVG!2G~9A)&v z#PQ*T{e1U5(QbKZzO=f%oVxBnGM?VPdV2~$ga*2;XPt7!8YM8_?*(7kRH3CjYdKMy zr+vIPu-Aw7NMkW<&nzobmgZf5v$Zsof=%JpxwwYn)TnV$%#e!jt~*++Gch(6?J`n8 zek8dov$CqH2JbQol|;{Ea9T~6=UueO`O;GY_s=RTE8B0*$x2E}J|MuEM<4H@OAI^6 z^Ix2zf8p$`njxLok6dh_J5AgL)2EDiO>rQoUTU_^%?fu}`YGyyk$E1s!JXTKSO+mz zcHp;Xlz?JW|Ml5TQSqGGLff9oI42@70pL40IvGN1+78;VFwF zm55n_ub7?j5I@gAM>qH5^J_I#)h<(XO}Y{5g1rc8;rs4Q&qK@rX4%qnWbr-Bff|l3 zRZ>mKmnh#iIwWi2LTg*gBQ7)1hIa5dWZZTluG=dZ_*`5&m<}>tCmERKy;;vAL-Vo9 zzLN{M1u$LfR6XD|$QEXpKkj>Qz4#-{ily5W$-?UDz-*#tI}u6eDP&+f`1d6|k7hmd zeJDIsl+jwqby28^7p-UQ5*#R4WN*(gp=r++4dp(2_6&Kn<%zn9B2JXk{m`bldLr%K z4$mXo^lJE(FrC)3pDcB$(l3>rV!ZaMsHlkBX)URHzL?y7Q&#!~>c{0s2F>nJY5pw0 zBoqffu(f>7pin<9zKcfP@c1)8T9n9SVNxg#MUZJ>^xPE(=$c+Dfby)go_brmKelRO zWhEnUI4|AT*EbDAhe9PF9~+?EAR#G!9_zH1WQ}N5w(y+}|2Htq*x_vkn9tZ0nN?L) zS*vXYzuNt)dH0hGrD~;KTyI&KpYN(#tzER|G%8FH4l?OT;9FQ;#&8p$P$mxhTjsFN zd-Z>MGB!$}4%#m3rl(V#<#B$wl&)poy;KTVMUgqW_kN<=WD^ldqZ}7{_>KF|K!=Kr zbK8TuLGzZ!Tb7tF7a5(%@M7ko4HLQ&kLD8>d;|_Y417SLWI|y!;CJPaN8#Wr8E9d; zMzlO8EBKQ!o?k1ZngZ6#rVK3=GVZn8HLJ$f)?Gy*j)7@5lxGjU9V(_80!a26IBGvQ zPPh*c`OhSCF=$n1uDY+f?v1T_G=^ED1Y5rAF}rLH8QQ~>xbOV*i4{BV!e@}os1~>$ zQrF4g=*(r1-RQC7Y43NAKeA<-m6Nd-ROyt1NB+-V32D_y}CiGVnN=H&$T-*ij5hH!PDQ!PQu1kd? z>1zISEoAIg#;Qs@k3G*IqtT>W!*10Rw^p9u@YESFSQFWFP0P~|bPu|1D3 z^QE>#GD1Z&Zi%4ZvgNQGAMIqn$#QBiQ;7`OiMAInUaWN6E!1*dW~Szfu?S(@bI$h>a<}8Z6Rx!26<8j&C1jo z*X;$^45OWPuv<_IX?+b6^zm2THBOBlX1(g`Q-nhDzR(e6zcFlzp8r#%vUXwwZ|NLA zN$H|`hHW@9FjZx3W+ri8DlYS}9ty?U@bjw=rdynGD4Y{rvlUZnF{XxDWidD{W7+pE z`XE{V+N*UZjVy)i>;Wlh>5VlcYkfv0+mTo{IT~kA5 zrIXI(!&9GkZ?+p0ksRyx%LeIkkOvXf<#&+Lc+9|W$ptb6;$rHC5-&?X~2y>A|pA7pNw zDtQEF6tHc=#EBBq$gy^Re}AzwxqS0@5z3_j)rhp{=;&u}1MI7}hE4aHq&(Dj z3oZ-)ENZ7a*0L6=tgM~SPc^&AsH{z)Ds_Qo=buI0?_NvMl^eg2p;m}2b+E{gp=M>) zQ!q}^O*b$ddZSZ<@B0P;`JNP9Hcxz?e5raF^JAdys04^M7B+SbURoCHK7G}QJHdY~ zZtys+6tD7AHSsgVX}<{#4He7id3r3BCKso|p<`howd*#HsgR&b(O@t<<{`zSz3xB1 zG*t8y9h;D##2jsA6qnhCAP=C68^bohvIvQ4@@AZXlEx$^mR-b?32p}2(s_yBx|pe$8FxKXTZm4ND!u_K~I2# zgY{ZXLnBOImD}ovplWjVwTl+Sw}}I;Yx1_wZjhKP%221C!NwaT%6|R%maa0z;9OMJ zP)x`bR@1}i9Ncuu5eW+0SOf=1tOZXT`WEmwsKJvj$X!nd zC6DCYqXqPlSRKHfh0+?6K%eb#S2W$w@n(;-sfS0cg|$LTjLJptS8`claQ3Qs#E&8Ve^@nbIUBWGKJcfzN+}{^7G5N{Q{10r^f{gmI5)vR7 z5z&9>_C3deQKBYb<>Jcn@;bF&V8@;3#;+aLs*cWU^I+O%gBim2E&is^CndAWB!8Ec z_x7B|A|rXwNFTqq&?8uiFujFz;=n%hzcDLQrScG}g)B9rOK2EiFS@hcCB<88VXP}I zlW%sOH03aCkLya{bFIevy3K#Iqxc)zx&OPwyK{9qevM2IO6f=W&!uNlDqN zgEP*ZjSk=U;l0no2qHlsAZ{KpEgw0K_p3!i`QJyUrHym7-7(?rOJdpO*PrLyAR+0I z_V|4bIXncr;}<>wSTvgp{2~?4ZeUZDdGivMJ#e34WHk;~*cx`3z9@*|yS~u-D%OPr zScJ)A(S^muqQuZ_)5W17)#c%m93vjM6@3=v9{~pMqS${|Sy{oge9hDo&o9BBShoK#J@G}utQ6Jf-mG&RZB{?O zcqhA3D)jVMXtZN)Fyyhm;UMNS$o5IkgPMfppUu3 zt|6O?r5s-l4yqHJCNH4BuGPSv?dt0CBNR5ucjDD@5FVB@$YbXwIa>R6O?poIXj+@N zaM>gfyDpQeAE?;VJHZi}E;?||5|fej73elrS zNy7UXj}(3Tv<(94Uti{*v!||Aq$aQfU34Ia(SqX&Q)A%9v%8szaQYQi383|jjkC-{ zck|NGXlbbRd5f&wC4tywB!9&Tc-Bs8)|2G*>z2t-7jHVJ!YPiNc}=1BYmia>fRH)~ zs-pP?!3K$L01eFzkJA>jNxX$VYh~mk6|T)ZPcW;p6wg}-9)!jvAlhVffhc9<>D>ki z%r8};HQ1tIcH2qgORR!Lr8@IuWT8!ZT`yLldYh{gyH7V0;^N`CkiE5xW-RKScQ-f` zoo|nih2rCP6fZ%=^@06m#iPGAUJ0_t<~dDW*>K{a+E7>IB<)WVk6b9s!vtd64zSvC zxY*v;W=vP=by;edvgSM5nWFb3zX?0m_uBnxyfg3^P%*YvC+g-GK1orq0KoTK{LN!| z>qRRlMn!4dc|X#-T3gx5(mJw@O_5>Yam-(Oc6UvQuTjF@Q1L@^WQE_8I}w!o8Fp1O z_pkd`9Mw_MMoUESm4fO~xEx{8VRdA|9Y4M{AxB)5SgNZUf!SL`tNY-QWshtTe@O7O zMAlofu3$)b$1E+s@_dvCRp`@3trWIG$!fQW?hZ3b{H&e05w0&46fq@?bS+hfU4G*( zi}Z_t-k*2rhHoj(A6>$Fy=B`+difcs9s6zrBL+cZ-2-O7TpQzx;#M{H_-FR*t*r~_ zJPh~yTM)pQgGP7k{R)*{s5M8S`fU#P>~k_Q8Y?{x_tMLzx1r$fCxZl&drSb5X(LD` zvJ-crC9Y$hw(7Qfz-Q!++^7n1AZ4UyV2CKm(^q{Oq}bE{S*LTwHTFn1g-I(rU?eJw ze4&8iwC7-F4D70GCspAr<=EaL^F;S8)x&!218?-eD?p;Nim@LVN4psMVgV?_cHKPM zUmXbokKkUYsKxJJADnpeK}%Jt-mAMQaI_wT(LTm#yKIj+E+RKh2GjrN zwo~?!!6L(~tI#R2PTh%Z8|FB8_vB7KhqQm=6o8zNR6FjmwNkUPbvdfg2QlGD0aQ^i zSCv%f**0ex_Fb1VABB}E6LcsjWcB(W>WbS^LE~n>y5+75-z6YB%UkCL7K|bnV7u|{ z{tDe&D(YBj6glsL4!-btdRm`v?bDe`hvnh)46{=xnE(S3~(>EA*FD}cGlrhQk_38>ZxkNti9RSX9GwGC3OnyGYKvg>x{)Ka=y^=9pT}~i& zz!TN_jh+XoWN)jiuC47U3MFX^0s@fw{(Cp6MS)J}wVq8frU{5XNCGuTMgYFE$$b_@ zrnlIir>3b%@A59kd)!!QBnT=_xUyE)WskMj>D3gdB-^n{d-29VvYw1Y54AG$J$oIz zB)$r`&MTZps84V0934%aore}5cIp9UEVGuwQuA2;y}7%v0V~Z1R^5r}8fVX)u#DdPtL6=oPkDbezR{sw%24{N zU})#n2GMXDd$F=%rSnF5srg7cW9^O*lrIc7DN2xJZ#~E}_xPaW7?A@uDlPu=a)EDj zVsGCrXUe;xTeN;|ma$me+z8#y28>f+zeu15iW0)zUa}Vz7ACu{RAj95d4(MBGk64b zTznUK;O&s5*>eA^EL>%oGh);ka}L|l^9C?EL>Sow8r6RCXfw-Y6aAK+ z`y-Xg*kVVkRh`0)jZRJ42M0HQr$k7dPqyZ!zkNQb#%g@t>@x4#OJrj|O_2o_h${94 zMumuE6dpUz$8yfx+hah*bf7OhT44t5kX_HsO#_(E8uul5U@n)&u()5Bo2^xhj+qI}P~7 zg6eQCbRh~EXb>)BIH)9CQa%?2sQX4L%vVrWM1-%%Q-rPv(oR*sM)+dW{lwl?k8K{H zqt0Y_sHCTP`S}aA`zt-5mbAR3Iy?VA7aqAuN~&OKi3VHbsqDLGVTnFyNPjQtJN%j+eTmDyQY4{G)nauC!(_8Jkk2g86inmm902q-%8U86_$ zr4tl@`koMib13e_04bJ#vH)0!s{ir9*w`3=fi$rzC>a)cGA9s?V0d2!o=C}N!-IoS zz%k&%7LZB5{v!Jf1qej{{u3Y{AnIgKhO{d(9^TcvGqDQTr(a^9+;rs)pw@77Po3Pf zH7H=9dWA0tqSgdI71FgV-r`ohbw#Ge;SpciIaJ?|>N6S0=-YPi0UziE%CTzCi9#j1 z&VBINynMWNdAP>av1dHA_Toze({IihBl+BEG+cpAi{+`6m;{U!umxk6pA0y`S|jD=ZD9`MTXMy6OqW3 zVFFR8;u-|MzkdC?S&tbR%C!gbWP?k3w@^PAkXS-e{=Z2gI|Po`u~4guZ2Fv6&a>W) zh>#Bo>5z(LZbdY#!-m?-*44zj0><7Ocrljt_KMr1wn0bB6GxH=JAfVeGKda@Mns;2 zLbK4czhAQ|L@`D-b)hac^sn@Q(@(3yzsy4Z{<;{caaUvQn z5bh8mSAk|45mhLhk+O`4E&!4fs_Q2HeQ%Oo_{{!T$Vyl3c7wsx#r8iXCsPlk*~@&J z?*XKwudG-gyI|+jmoKjp;52eS+MQikSolqeRky8qK%PkSr$F=6QJqzNjEuder)M1~ zE3jPWk&jR0bldr;<&IJ82=PL_tU6f)ZxIUcH=y`&2547HOUrPTBN_pAPmsZ86=5ar zgCoIieex-?xt*5UI3`lLub|vc>`$Y=pz;HiTpTRWJ(-Kn>6S=DtcK0;Nw_@gI~IsJ z0mj}pVgsJqfZ~>gyX=h_Ew_F=Qfl5)Vmi><(=&yvqszR=xzt})n?|JN^m$RnZNhQg z)lEyM`WIBW~n{+JrU6o=}Q&h|Au|a?CpmOdq}d!n%jNP ze67d);KZ|4-Yn-L@8==n2@2@$$k>Ro=E+}31Bro}pio<$nt=Fv&U4iWx)wRQ>!v%d zea%aa-rnad&zwTB_JQ1;iMJUH6)c(mo%+P^P)Am;p{<1>py#gVW%P#H)-2{@OZ@kT zg*^`zP@_?EWstLds#~xH{mPrlXVTiQ@<_x*@d*~ zgNz6^9_dHzYnQNiYF|GKzlE|(;Fxd=T@Xxs=uS0z?pt>)e5lOB8@-d0$)GEFoxyI# zR|HRo+vA|TCLT=tP=e8yMy%O@jE0w^I%YX~`!~4?9w-rJyQ%inzhI)hajE~Xq%ZRX za2-}A#>HIKrNl2lWD;9o%prP!+ba1fE@9=_aho5zf|whk2P5$In~Cc9a+yK3CD1ua z!jdd~RsG}A+}|CU#{1m^3d3cXZXDR<56l@wK{By(&)Ypv^nH@6Lf8LZWx&6Y@o2y9cm#}@f_&||6<8@Rsdgs)8FTa0%hZnHj~4Bz zpB{`4m@2qgxJPx@D$-r2T||d%`H1bCqzCnjEVFBY(4>E;*%R1qJ?mn=teDudns{5l z6*Ia`kNK&7p{XKvp7PPV_J}rva1qKdsSH9qAI-#$ z*^Rv!jyhw3>v%qdm#{GFjT9U#y`>zyyc%07o}QjviUpv0NC%6g&ar_iS_~Ew2{}bf zhZ`ehsOUh4VH%0tuZ`Vwt#f%ZgCM#0K|S%)Vu890Q^C&5FECPRpX0DPj)1hJ#}bl~ z#&Af(@dI1BV*5EnL{@6Hwe}~n6vRNSqH~d?yWRr~^4?e;v)xu-tHVuCWlJ5Mq};lZ zfdNB-`-%g|w5sAtpXVOfLl7)PHdZJPz+HvM0oe<93urMQN@!-&i2`Q|SqWnozH^m~ z%O-Ww74UOa^^m!sZ@U99KScT2osP_345epe6t8r}cVO0jWfTVt#n{fS_sH>sjMW$s zK3Sjx?60LKmUn`!H1FSwY*9RLM_=1YYqVg9l!wt7T8xRW-b6Smvj4HaPum~@wU*P* zbD;B{16>A0HYw(4em_wOuIRHpk=h={K|SJ{_$vjTnxiv{Ny zlo4Y(_ebD3i!w5wv*a^i1Z(ilcv^u5(AL`OM25vJ(PKe_j|da)tpTT@+=CndBdy`t zq56fc2{UsG5HO6f!{n9BU*Ci8Zdv2Q3i7WvsZ?hpzDphydh4Y-VXs7A<_t*;7dYFx>KFbG9%x09Z6SH+|>ljUyDi?(^2PSn-l8-@4KX!GdMK_iBH#B)GV^ zve*4%xWscMOHAgip{lg>cMBeVo3eH_MrA) zxPLS3R*C&gR@~aqrLihyfTCNoH&LBh<8B+0d~hTyVDAkgEIqqRf+B5mze0w3aItsH zb+tzS9Cv!1g~7oGb_>&UVtplS?owtEh8I4A^rAp>uceUPUjNRGOEi%!4{lsiiqpL; zB1egDUGw{eg~m^ZGgph0;*~G>IB-Ce3^cS#s-01bTQ)}ryM-hHNtkG<0QGa_qQg7` zbGE;;zZUfuES5%XK)kE@y8DU(A0HnvIeCAF#}*56DE1D)3CYUClLIRZSy2#_nPk>z zC;96J#K0V^-;CfsX&v;vxK+|(P=us*79|=@hGY2ev$qtz`2nl#!MsI;Ymxu^n9yN0 zManrdm@h=VMSmf{Id>1=HyNsza3Cn*sdErp38AIK)v>tov-;NiHiPBHuRlLReXMEF zh0044{=z(fU2m(zZoB;9tX34Swj;4e@|OPJW%DZn`k8=nZCg3gN|sBxKT7pO(90*(H55v1#&MEdRV8c>kH zUFJS5KLwycIFR(gA_cdlONAD)NzzpnGk@#n!)@JbI(jgczl-D&jv1Sv4(JChk5&|! zRi%v-(X~K%d>SNP2X3(&Gu z2GEEr!M0n29?0CG!Dx%^>$b0l6j4n6TDr`ZEuchPMMR9L4c}`B-?LvV!5Hktk^J25 zcu&EW+_q5A_KAYMI2|I2G+O<#3w=}=vk|31X@t2s_faifbOi1uHbM(I#Aped(EFDm z=!4|UChcz3?F@BzqpDw5=GFz#YGwMm065+FD>N))Inx98^Loj%Dzv8yabbsa0yto0N%sjG~vIw!?MkW z3ZH@-{*UtbQPi^TuoPShRh}y-we#|CIsSU^m81uUiJj!$8W?E3Z@0b}XjIrd4dUC1 zLcl4#dMQ}6D4jD1-UaeEpAy7x;!DE2SF<{~y;J=Os3VZ*lZ@}D=lfgB!?o^JQi=S_ z$n)X4>@SyNbuQRWVMj?E;d~*7P<}#WoQH6FPI?GQViW7GCEKiAydbjT1 zuP%!uL1bP4!LPKL_B*j$;p)V)Wam+N{y3j;lmCI&ev#nZmVBs@xs0y&XP zQdkB_C$SAH6QPN7fN27|A!?wEH)6emBtjqgUsO6t3ruGF3HR-J zAZ-J%(g%Km0%bJ4Am|H}YUYGeCA|U|4_XdV4b}ile<5`#f| zY7XRU$~!uiAy*O010fa3ST@&Ut~0^jR+NF!AN?zmW7=?{e;P4q3o)GSBr-LXj-&t(dOWkHnDVXNq2d-26gw2w(HPcT_9Ox@%$Hq&A%-ILZ--h3klk9gJy1@PlU(ei)~h57@v#@ zD44wi1GC7O^k2VzeG~qFVWUF;QZI_vn$`UNgIULGUwd^D5_8L|s#1qIUGBZ}C`qfG zwdaULV;=oyzx=?G{aaeIu5bCnzKtSimGkEqkAE}TFAuX}Wx-(m6TO)@TyZpa&BwS- z)UU2Pd!0lk2+_+Yuu@C5lIWtmkJ9JlHAFT zN5P*9fuoZ;4`Pz28vE@VL5&dZ?RVE?kEJK3l$4g{xXtZkocH7j5(oDdYx)IYggPqn zAP(lob$aL3PMIl`xi2f*<~vZ;LIitEm!ow(vPn-PF)<+_|Hflu zoAv{RVI{uSwl+v0K)mp6jgR&y1m@rX;o%@U^^&^@93vH#&ZEP9*wnZNdyJ--G>K!k zy!8-Uo_A;t1_GSEw7mEc<5~YNnRS1n2-Mg8^LAr|J|qFG`d&!qniM8N$!T2jfaN8c zl03+Cwmp7p{XIZfeA#G;XAn8qGR5&eE&3T(&C(M|>;mX;1Dv?At0*48q-}?n3|e$nT>Zu%K7jYC|?PlN6z++jv(F40jr61Un;@aBG~BrBR*N9c!XwB>sU)HAKE-E=N<*PJh! z12<;DhTk!?W0qbtf8uVt6F|Nea>j6}jF!OlXQCNBxB`Z|pHKXqva$ihouZ0g>SMs3vyhT`tp7 z&*+&ibQ7V(H+HJ~UrF~g+KwAtgx^xl9@<0@-juJ1E2nm}q!hsP@L>|LmA&KOwVLk3 z+T!BWrY6z-iGRx@Gy=tf#|AroD!u-~c84*0tRf};I`th^xis6t4bmeBQHL)dBgO=P zf82mPrJq*iO9dOV9mu*Of6`N1Sy>5Zd^1pOozTA>X-LfgZ+Kl15b*k6Tsu3r1K+>i z?&!c-)G_B${2^J<*fWOdZVN0dtmeIUV5@A|%GsOq0qxMNav(G}gwxHiBf(tahY`>J zBkJ8GHF(mFkPN#(!40j)Y#hl^h1*PtG{%IP zdL$%YG&fO3)gJtA@e@DN_-~#)Yr}=v-j3u0)gzkyT{dJW{*!VGZTuJI1`As}1do(6 z$=COSX!lf*fEK$5t3==Pv?ob+hMnc61Nm&)ueS`c<>XHyAS$cL-tQHRJ!jGdLWT`w zp-dI6xReb>LD7buhtD ziZm|R`>5XFC&fco3|1GtQ4NWwwnIe9xVYkLKH;QXHg7Jg`dsw+x|N(40##=C&V1*J zNVeTuxJcCt4LSAEPD2NWi0n-)ViFSF;fE-*Pl(tGVmyedu@a)?EIQq7I+mKg4Pnp_-M!;b)+6n&i$;K0dMCROFCZwC8q>>Mom?{qq z4ic^G-?y9mb^-A?BCMwxaNFA3X@W(-SKz&Z<4wcxb-LQ? zFD3y2OYJDfDzCq8@>riLc7Uxr+mYxPKd&OGUNt883tF`FIfzfutkd1zfqoIO%Gl}g zr8}uJsn>D5{m31woS#sI7OJD8N9W80!uS z4^=oxl|Y`%xJra`4jxon*@H#1oD0dp!5WeS_VpW}w9y3xY=?Z93IAgh7Q(SGCF%^V!X z+E%DF1(ZVqX(McK^t57*@2Mj7xc~%=2=g*K*R)l0k5`|6ygep2T*AnpmwR2+1G>N8 zAcA_kWzV0;=wdnB0pjeU!@qOtRhNC}Q;OtUr2=L>s{B86y2{-mE-|Nj!)m_ZV?P)7 zjZ2l}(inw3_iLa4BrqOY0Ik;_fiADuz5EA<5>ZjWz4cWcM{+l7+NJ_bNa~O_rQF1$ z%?t$Q)H?pltgjsS;MB0{Uh}MemYu<2QJiysEzSMb*CAzQ_t0l1W$Pg$oSEG>N@7a2 zEO`|zJ&u7b-5t&)AGvh!{IMy{+0)sR`ebQZ=5igS=H7?B|Bg1(i)+BO4{7>U$>A)d z3q5Xd96Mg-skb2yN30(vI*TUGrL5ZlZZ831C*=dpKJ6vgMNDm!3rt1u zmP|30Nd}VF_$OgC@ej={WP=96cwZpY-@<e zop{icXk=r9RP#?P3fPq?tEZEOp!q-#7dUFC63l&uZeyUay?x)wPe7cu4+syUL1>)N zJ#&2s$%7a+s&Q7Z5rhF*D2qAhT7;dW5>NQva3JJFUrHg;-`}1jvb5G7{N15~c$+_z zX=!OIXA@jE0k&t>)lGo6nPiOQzCWG0JohiETl*NHx&?26iVB7zY77t+g$t+WIRE&> z6fo?cVltug1?J}D35UQm+KdFD2%;M?fGMhBr4R=$e8O}&p6WCMXBEu5uK%=vS z^Jnts;TCsi6AgzSz6|{Mw_gn*NaXH~P9c0%a0G1EAjD#W78FEqVeOso$>?rG%IZC z5vrAskQ9imj04R@(Vx(IBUepL&EY!Fx)W`H8YRPlWY!_N22vRqIAEM#`$*TKw*Ayg zkmZFjmm=#}CG29&Qu$SLHk`%~9>y(_{bc4k&%lFJ!gijQ1=ZELXwSpNlQBmcWw24~ z%LR?(J==B1?kC+UVl7F?G9$Jg&uV0^%67kfzwHI3JbR#SR#apo{(drPqIr@#Xd!>g zBQ7fyv&hV`>q9mG%vS~M(wlo_#q~S?WC4;_sFT)+8iJQUEZ{tHa4GpwBIaT@GuC@!-DIFI@f6@yH7du0rX zpFo%$kw-p0hamy12GIZ%`#VG~n4<<5_YW0eH-i~PY8@;Sty4Itmv>Iqhwx3vDS{z^ z$P|EGW1XkNl{XQ`zBQUf%jXrl1GLz{ym*($oI=H(L>BDAKw*K52po=u4_1jz+PR^l zTY;K@hCi`Mk5Uw}+7KH8zxBboK6EFa^joJ_Yu|%|(t$@&?fckJM0(A^FDQmc6d&ng zS>T!24FsW2ZG%0|Q+J+8XPBBeAip9&=!1^s*Il*QDYza4+h=yf%BCD7gdrlo4HfIkp8`m?@j6KJV-|~3BH%lJZv%^6) zW{uWdTmXr3VOFYEv!S&LnmP9N2I}&awVd@I_hkYYgBW(m3YGaa-*1(5M1Ks`v5Jeo z-~>wL5v|sFn5jZX1=g!n20|EZ#p&wND9(e`=C*fW9hlJV^cj6#Wb( zc!ENpuN>(>(5e=(zlGB2Iq~lUH|A1;Odz=cQ;6%Hh;vHYnR33~dI zGn$LnA$tI+_%#5!LsF4sGZNeJ*eXgicZ!8t1D{o(V%9bg-P{bZzCxbMF?yR|@!S(d zQrP!I0SRS2xZY9-$+NjV4G7HxDxsVQu0!vV2yPr<4Pb}CIwIdEVbJcIgDd-P0gj22 z9=_fHTR)W?=_V7l_R?*a1;d=dGE42RIcGutq7?-*Kin>3!8E0ZH9oG#KzzF*+D5}z z&4@i#)7B(^W4DCdc0_5upe%UG26V@eR0SHvO(&XgxraCG#sY0)%;#OXGVa8_2_4bJ zAVSrEwx=cp^`e0TBNNVy*e{Sn}&wj`j#-mzb(cDVs9zQ>+= zdQSk=K`>ge%d|f`1PLQAUKi&<78z(u#eX_xP{m#ldxD$*79Z*di$P3Obh<&*H?I*j zxwJ(!#Tag-$_AB`^r&b`@ieyPesvOYHP5kf7Z&-|{+wH1H21ex2VnZ)a36ZV9Qz<2 z;4pMq=Kob`^;2kUT=&GO%+AWHNAVg?Xavn$q^LpYz)W2#t?4VI%=^4w0Y$0G zYc=eoWV;M>2i{w-#giugD1H?hIGv|Fw)a2LW6#` z3_)y6OgAWNXe51tY!EMyP%7=6#=`KJ56e+(3#NI^sohWW$HK^2!nJ%beRs1aM-4Hg zACoHJy#+D8uYsEmUC4ta<+mt$5C6ixJv7OmX>|dPWJ)ChSncr0z&y8Gmjzl1KY5+m znt*UYG|uc}aM+LoNV7)S(!eh#%gGh;|M&VFhpi~_gQ0_C+6F?#4in#<--$V1dv4%j z^zcTp_EcP@sy{Fb(ZYe$q7!-KJxuwE8w0+tk%x!JnC)(NcL?-3fp4A5YQ77mZTV5) zMzv0QGdJx-YsD951kvV)+qHd!cLxkSDK1)Q=St%U7GbSf=zFg^ zL~;cE+4y^@q)mJ7&=yF=qS+J0&*v@Vq-YW>jIpq0c+DVjTQsA=(M~yGf>6Bq>S9o7m3CHZp4Heet~)*}cjn;vmUsyffmmoj$$>_d zL)(#hDoUcBqn>USzHH$_N!M>rf zu@6ZEQ5(sodrrp5$_$sWb5JlZz9;9jN}`ixgU5vw$;b$o#{nnkanRAj?Yfma7P#x? z@z}iWsgc<;$j9VNlj_AtnarzE8C^E`yet-uhlCc0MWV7_cQby5z{7R>@dBn;$!uU- zNO%32Qt0G>-liBL0iWXv_wl{`{TA-0(6*C+GYif64DEH#9US`e&$GbNX~BCN3^&SqjZs`ANeUodZZ8A^;e?cO)qa z=^SwOF-T%=?_i^2`I}Wrc*5g>%A4XfH20uM1+E5a87s(8Q1e{{VxE{mexZJXqsf+_ zE4LKLY zY@Lu?EMX)1P@pO)dOoJ)NMg4jx?Q#MO36m^2WN9g7Sn`kF8a9`@7m6P%9Ej{#`kq% z=Q}%2BgyTyY^qJWhqjv)f0)?8Qd9joTQs{G4jcWZF%Y^c^ZgUQ*1=(+tWXlxX5gPr zwx{nWcIyhYdIw!(O9h^jIFIvM5P0M9oYu+tBN}L$oEUv+lRgNG9D5q4#fUn^PzKw+ z{F%#4r@yC}c0pp+d34B=B~g5%+P3-QZq`;7qENo72Rw_?90$V#&5Hx1DO6l6Ab|)d$2_s|7>6VX zKr9VxqV@;n2I>ax-ym&Ju~bpgzhghei+%R>XZwL97Q$~9rb01wJkb)vicH9hYwC8k z!E{=1o!C!^h=?fGZNzim>=Hidj0P^&k8u~{+O838j#*Wd=X!x}ZJ=YB9u-?#YCHBZ zpwyqsA?||ZhR2K(^8Sp0P*%vhEN2VNuk3ve48MUJsuhc{zir^|L#{$yHy7+rQ}pE|DyRw|#^i=2yI*S!$`M zm!)Un%vKnq8r$XN#+FQ*4KiXiX=H}un(@)?tQAx72_U*i+xc#XXMPa>o(Ke%Gg}aZ4t)2{2*PhG`QQ+sUJ3|K;Hj2 zcRJfyI`pG>njF$ZC{HMvJHu9>lA9@Qu(uy>Av7g19$VhV&rX<a&YIxGAB3t15+#z&0bSz1@x^Bo6xHd>cF1n|YGs}c@(hoXmOciUE-p9Ka7Ulq5A z5Dtq}dU;wq0^WwvOYpR5pBIJu5;0mNdsr0}*-&Z_PSqyD^XYkUbp*5Pwzrx`fQ_QR zaVdWi?W|K`F1NBA=Bln#aQ9haM+TS~)o9;mDaQLs4C_w3tF`KQh5Q^m;{!{a>~{mr zr)Fn1b_)ahoOlx)5})aTV<+C)7Xr-;e#v1a zWZx>r9(YcOA-YChZLQWmY&1O|IoY`JfufwCgdydtjlu>3t%KIuZ0wdwDVz%VJHekeH|-vo-ncnHM7g-RwPi5IRt>y)G3q+uDGlsmX6Qc4jXnOw z0y>6rWJ@m~RV9pGsY6S>KT%P?({00{YG;<1n3(e7^NKZFTU#M3yqXp-aDTtZ5u+95 zBz<=4ijMuQ@G&?6in6=D6+~3Hy;X6!i0aSRj8#0O-&>TL3#|wqQu7jgZoZ6hDeCVc zln=<+9ToKsjkxlz{>2S=!IX)$F^gaFt!N1$*|2Iy>1)t;^?Q8m9gR9o_!khYKTi(= zX`Q+?P8BUmSVZy{*l;Xa+8@3Zb4+bNx)c1}dFsk~`O+^dns6b=)YobeudP{LVKNwJ zij9K?A#4y9LddYLzQlIdsEWM1=EVC~<5bQ}ppOEK8E1SNGqGHeC-GSn(1(($%`)3Ur_pfs=%T+TJ z6_0UZmM zKq22%+9_8<3vk@>I7@u|Hf9BmRiP_tz+$47o`>L0whf3c-H3BI^jv{5V;htEuEUJj z1GW!EOB_2Xo=;JOShTz<;nN33zTthD-`3^wr*FOsBJA#OF#bB->iBDFQ}?@Xu2GSV zSYSksB8p|1#?>ik!LaSotH8K93 zX;#QCkdi#_QadW6MIMBGt~xh6EJw0=sMKO7ZuCmZm|n>Rfs3|gk&}9*k&RQ2I7!*# z_XM}jdkd9Y{d~2!n){%owe=Dh_JPp*pPZil%z^WXf;We#1vqYp;7rFTO zZ*pqcT>n4Xd-HIr`}S*iSDH(R28ryD&_EHHqOBy9VVfgEN|LFNv6KuMD^o-%lzEQK znWt1T3mK9rsgU_uUtQNdJ@@Z9p5r~Mop$T; zYtq)yDV>=Z9v_U2d^0>zO&d!V$xj~m_1mc1u1d3If0^}VY5(8?PmdQ@T15Qa&GaOY z1#M$xHJ0}GxMB+l%}8f{Sk*ByIgN#v=5*=0;t$#ECvwNVg40vZd4XY)153&GYi~F8*nXwE@O54iqX%%3hp)ZKDgD(c~L)?vHm5K>teEeDjhvNJ@rmon-AXHDH>UOS&Ty1cN9iE z$;wj0ped(Hzq1D`ZGYhh>>n<;tGo~Iw6(N%6tKwaNMCd?MuBnc#_Sj^v;uwe|>L?^t(SJ>ULggtyG#~ zCMS*Oi?r`~#{;k2xUn7eWhL-w@qibJ*v!{w$Roavj6B@Q9FQ17m(hd~7)KoxEt})_ zTO()VTpMQh=Up=QUCH>irw^MW{1IKkUKKtH{v^FhNx6xG^su1d)0mi5Amod_e?N%a z@Svom-|N>$TiF?uR%&W#Nei=-mcfqbg1a<@LcObUm$UbwKH2I}gzXDuM5f)1 z>Uwqdb0`=)GaKD(k-dqwPnl-Ypv%o;W8s0c(4U`~5IUiziR{x70%aCa8+jXU3Pm+KgpPwu zCRaaAOxzghJhK<8;QhOIH>am>w6fD{#w8`u0x=VIp0j(~94>Z)0aWBMT%J9TB~8gEI?y!SI&8enhR?Of!v zN&x*{PoJ`riu(FEaFN|hN=kB#^eZ#?Iy`*3{R?Dt8v`~6yzl8DqXgG)5#P1{l9?d+ zNPCL@3gYz5o1bjA10?z#xo?d4VlUg`wyCkM3aC`Dxw!Vq+3sLtJ9+NhPUy#oW1Adj zkbp5tljRANo1Ml*DkEO_Vc4_M9%{6fEVCU9o;0LxKbN^@eI6bOQ&1~1BIOWvSr9{2 z{jZ8-og7z;)W@c#w4)YG;AnK6r!TbX)6pvh8ZHeQvdzUOi;L<7KbWBTO@1*jHQMf; z?AXs6le~vK42WhU>Tya?q`%MtcCggLV?+tdlA!KLHvwzPxXeMow44MT*Tdn}NpJ9c z?8B|buBJT zWn1;Hi?AQsi(`s$-Vql&1Qvo?$xVKqv^wEPax@UVJ_@>iVGHP`E3$8Gb;E0lN z;Q^6tN8T=3Pvg0o!nyUp@4%t2CB#UyewAAU1qJPf6JNdS?2MTF z4FV)&gHCjL`3an+Kxhb+9hM$Bt?_cO!fV?x-M0F;xFG)5LvPkeRcWxxQm&T&q@}*b z>5-uQiWXwyih$iG5AyThBCbOl4NBJFw{Iu-wyYp!WM;Cn`knTDA&??>mUZ%+N8bz# zuI?f6W-hvY;Q`dQK|(56H~K%n)cx*SoYx~2nL~CuhdMU1HX&?KR|p`Mkru|;7U?Ve z*k0|O`5>Gr?Y~y4irAJ%dT+S%Kn_#_0?SWten?#dmlV{OUbqpfY424EAqmwebQUM} z^u!SLN=LSmjV#mgg!)PzDTmZCX&^hOqv9Mjj zp`UXg^uc(}czR{sAcCyy&w0mH=4k5F)J1}|+r@9L1 zFP)n+cXpk5G+5op)fhr|&EDJQK_k~3Uv282o0jt`*AJAj-=Zo}j9!A=qXnarz>wkj zA6G>PUBW>kMB2#T=Cf9cxO&}3Wdbkb!o$kKt&?_~J!)MA22upFy?=`XzApyu-GVbS z%8Or2%p3Y3Y-c5=@AmmX&)FbB8aeCX@xm)bI)#GE(r1T3#*SpWr8wlufjA24s7t2` zro;&nmH2&}8l0EdVyDi9vqo-UIY7M3jyP?|WUE^!Klu#YHNFi??RG_P)y%-uYz^au zELo2GldKRrxO7>8AfK^0{`73qPu=9L*wdbrsDv?7rtwz^Lb~Ij4*8t|GHN0MQwq7A z2f8eSZ+Mp*q$V`qk$#%d{9Ji&je&6>5lP_Z6Tjxbr$QGtsFY-O(WMy8Vi2-8Wl&e^ zwXidwmiO>DKhSUOo--U5YZ;D2rwJ4v-vo(=u{allmGP=0uER;HW&3} zl%AL{A697Kb%EKC*(nO)Ni}xPLzQPGm3q&aE&hIN=zg8-vy*0UWc_M}a}{K9!wQs~ zzB{}Tg*`KdprnHG^$s)FIdnj(w1R{)_prk8gWKdC!~*38tBUNzXcZcsaIYhE1CvkT zyPDj550ocy`vm2$rl$`mx`RQXe;*7;abIp|<>oVD0h8&LLy0ef1l0Ja=NIxCn0!=N z>tbGhc_a`nLj5o{wHwAiYd3XEk<6qk43ekcEx^$L( zrUSK8^ntdaeis$}gv$NRBda^u>K4A#3emS$&WYyhCP!H8U}ko$=q(;jF1*SdX&F{- z$fd5FA0NLSdVnHA=C0J>xa=6F& z28Qmy9=GS{J_?1_pjpf0Vciuv#j+fmQD%TmDToeFXSonQfQyJoK19@n7kr(X!2zo4fYlc?G%EFf@S=)%X}N8nkI@#S2eIH{C9lpcI+l*{dbtJ#ZJD1ctv@z19wHZQG4OUL zsR(!JL{{YEy>ESgd(r~cDNy`m;RNg5-k)b<21cMicFN)#7&K0l4}Ien(X0pR!Pgp zHTrKYz{8AfwLN=Jn42G9lqm)_F}QMdQ7wv-PwnPOC4D``JsvL0umzIj6-6aH zxtNvwa}1BhM$YS>>@{}19%IU zT8%_w4h9GEEgp}0L2Kq@&Gvg;Tg?nQd|$k%3Ag8)ECvJ}txzK9e6{SGc8`-x6zsOY zR(%eXNS4=4R+qdAJJ+_ZzH12#9Pvi16~<1Rdu8f$4$*qI?=`AgBXChi&QQ?Itb^7( zNT$rfOgcrwxlnSrdLMO5s-1mOy$FNn^XoyFwI|$iZJwDh?qT!yh;+Yu*Un&HG0H7F z%LWhVWcv$c(+4t_4{G27nJJ2jj$R257pnU2L3X95_iN-TBF@d;7WV6V-EAaq;|Pnf zmER98W4|wqdq=@RXQaKu_OY3xBMX-53!EIoHa<*7fsVbzok98n4x0M);o05eJI&=4 zCB{oP&uF>#qz2xPwIS>=NMf=EUW19`02}y&&3BM>)xA}D+<=H>%%?o$F|ZuaHJs36 zXL=b;H;!^HHFG}$a~;F*sQq!9R$6`&npEE9-n@+$z3_RV*a49`&%^-rir^w{PFBube{TT*wCh_;>9%{O-544Nvr7k>f5J zx9j+k`&8vZDknR4)68b|9i@y3wY)*)m+8jaH28vonCjK_1BNf3M0f}NOA3Je;RRW% zJh)>eICbMA?3r$5uQLmm=4je25TcrfBxLJ+_ypqQ{V(w3Gwl^A}Df5#7 zrHlg)b*hZUB|@1VLC*fmx;C@V*U~=4GV+qCY0YJ8%g!y{jSl4tj_jvgYj4H#OYs=& zi+O#3!Skz1LfwAKc0;CdPtn!)oGMykdeS#XCZFqFi#z-6TSZKe=Jt4-vMEo?#71zm zD?r^#c45=ToReS1>e#%k-U%LiD--okZ-NFC%GKD%e#90K*ick$wohW~curPkCXgXb z`y@jdAH*XmFnbwfn(KRPjzzZLq`5Y_v(ul7IcULaQMtH%J*7AeQl+_kFjEoJ^v-%8;6cO%#EeY%*=|9B&R%jasG(n_P08-YO2|uq8VG#PNcm^SjoCU zpmfZ0ytBm*WTkrChLZ$+$;3Pq#E8ez`Qq6;*2E<|*c(c|XTVv}V>8*VO~+cWdZE9yCCPU$f5BDeCw-CcZJduKgF4?2iNWQ2Kc zn~HX9S7ziKvkx|KSx>WElgYIwv)oCbc_l-)ew0SW(`_wY>!qVyj1w;L_3ba|5wG)Q z8y6+3r5^B1_1b0|;$+kFGt*k$5?(@cFc3W-xGT_2)8L%a!;Epj3jWTt40zm5wnMD3 zq3svgVl8lMX?7j&k-qbL|NiHR73Jk=K@Zl@EH@cWVdyu!A$>MsA~uc(J}mFs+rzTn z<+Y5`{@m?OMZMp=u-YTOAYlDdw>e`Hu& z0O!O#E~Q#t<#&9x{bqa}^JH8xFO+IWs!qEjPh7d8Gma7sg-!+rWK*vs6r&8)Gsm;b zWM-s#Sw_blPMTq`&9@lS}i)7y6GDQhN( zo0JPjw+mBWKg``QU_VydyNlH?c61NoSj}0vg6IPYDVDt_)Yjg=P}&wruHs<$Qr=4E zD*R=vBj>@-Y(X_<<#EUW!)<6Wc3?Q2Ez7vmKlpP8Gv{`4TaO#39u)Im^eql$I%gcX z6~`2~EX}GSqz{5lw>`qy17zXDajfAm$5NP8GB$PS))`-DzkUhoQK&&t+)3IQ5v&>- z8bvtV-#W{_gpg6KgU#EC{Sfb`ep?HRgS@AV6LcADjp|SX7nWXal4&_GJFiptV0f(3 z+cJ7mYw&uG^O2vH>0|8a6}1M3XWQ!*3gnGPgNA?Ba5m2xmA5uF1|EuVOf}BB)w&Qm z{Ifo@J~6}ZVyAz!ztG;kZ!OaX0#?$Ieef@7L6!<&T6I!T@#SkA-!_d~Z@q8?A-_(- zg+B1!$1pEMICf&J*REZ|2nJ_tq*7s>{vOGH;7T_5_twn$G_ctnc|P=Q=`DW({0r2RSt3EJCe%yI?@RnBGaXN5uZU2YbO-7Nz{#mgkXF8o z`M~s+wt<3SF$nR9j1N991UjY1(BS;|+Rv?;8l19fveC*Rk01BD$-8?{$R?&M8Cqqu zq|a`M6BwJL@*&_b5e&VZf|ZzMkr~2#UZ?a?oIAm3xUJv%%S71t2pxUZ-rhRe7h<(d zTFQ(vXMTiyI%r^T|HyWI@-a6S2gO#l+N;L?A_@nZNkOwohahh3aoeiv55#_%$uK{2 zvvHyg>H~YVTvgSeuc(b%wIzL;qNjtWnmKwMh(p~_$-VVi@%fr6nF89pbleGv`MTz9 z_NhfBFUxa++;<7w529nd&6BEGnL||Q7F$> zw2$Gj&YL$kP+kLjVZV(#gerj>Jc9o4(xWdPgQ+LNk52i&U~oj;F;G>eqTz`zVg=q9 z5~PC@+jkIyGtctYm0Nd-Im;L<^OJ6C8(Bq$1ibe3`!!06TFcfshVBKcxa{5Hg{$gw z>#@fdHyuNvho;~4YuAj-j#xW{zWMw;ZXu|)<#EPiEAtv2wQ>~9HB2reJ;EK9tVE@{ zC@L{FjC#ydMjX^0Oyp?J7lc+MV5i$F6#Tr60O)Of!@8@&zTu@Kc~@;Zw)w^DROn%; zG|nD34mTcWv`TuP@oVfk%q3M>dc?ge6uwxJeIsklKvjkh9Fc2=bo;T0^$7x`!y~L7xSQ*gLZligLk-i!O<(qXhIWbD3A2M&RY$(V*38UI0wmKSJa@+v&7IbRzPi8DE1q~js${&BesIRs&DeOvHAxqT6O2VXUHF9L_6B4IALxH*I!xTT-Y4Z<^agn3lCZrK4x+18IRWm#rYt>eg?M@BUQ~@lUqIZ*B3qk~@D$ zkl!l@2s$H#lg3R)J+O+oJZtNbj;IbuQr$O6&iN0oW(bH+k+YZpoooG1_5)Or{$CHX zqp&;NZB&^hYikbdLuh~-K!wWz1UM?{&-?zBH4zbH=N{c(LZBA{-oCz3a1JMCQ>H!J zayRt;UeH9JBmnk4D}X@s;DmbbZW}G<)qVTMW-o2~y$<86S83sAtbctYea@D68uf}n zr>(ISdsbAOCSJkHyTm|nh&s(2MmAC}b(lV<2?LR;UxKidf&P8me}$K<9>6k?fGpWu z=J>U=wJ*3Ir>LgE@C>ed>2fJFietIL|D?;pwb(UOYQyLQS4?pZbYGOBG#VKgEKf{K z1lRmXo%$&RbNg*2U3r46ACp?V_h{iLkB2c6dD2-Fh*y z3{Vfq`%1?0$;Tkyy1gRvQUuDGs=>b+%>iDp=p{CdM=EAn_yUneH*U-Y4j% z4)1}TR`(!{B9Rosh1o?I$WcG^1J92y;f5mr4v2P~2T5ewlu!(i)$7X_qc_v0zQEa>H{5v2e^d#z7mXI=mw4`FT#_Z2u_MXsSsFQrOXls z66Gay^r#m+uB3>G3ZPKtty||8oBayQcGM> zWfg(0T>W$uogbJ`+O`BuGse@bFDY0Lc#a9Lr%;5B+J0jZGViPf`$t_#jGq$!U(by$ zKYziH^sQgMzJCU~MtDqE9%qZe61T?#i|-967*2JVx=QS+)-RBvQ^0V>^t|#DEi@8H~%DUSz`(9@=LQL3IhL!r!;H?j;zgllsI!gc2;9hlh?|<1Gp$ zXK9uM{(NFmR(7@rVw7XMb}hYpY1}USnS320pLUoWk#cg%CkB;e7%Kz_0GYh=&zD_> zP%3=CJSD@GfI-26E&U^D|Cu#+5mV*s5@gl;U@=y|h5E6n8rs;)zvmY+vd%;+ryWnL zL!k?agrciJmbLT&>ClO$5IEwsBn9HlNLQJDwNc0cLT3#aQq$nVnYW@RFv1TZ(d=nB zuL*Q`p;Q>CC|1i4iOA%~KAz}_qkkb=mFu&`wns7g2(U*jE%)OuV+vn=Shz12{lQnk zuR>+A#m#Z8f+0&z!5Qy?038=|5$L)Q$x@yv)pXUEXzY}wsAgmQaOQbk@U^a;X;K^+ zP~}N()(*-Nx*Gdv|GI{u93ggLVPOxr%)B#Qy=oPmU&T`bcRnraW=p5^9>4*X2j*Z>zUd(l|smkH0Gbd8-neR}<2h1+p) zI2SG|zl#Rz+1B1}Em8PkVL8b>Z+sPzd5&vdCGQPIV{{xVcW4%AURJl2)+6c2Ik&-Pk4b#Pnlkal{cAzNXWA6&1#j=qSye2S3l zSbp!80^|l%TSh5_@%bB*mE7MiWFhEz6vcf*Z!-j0k@f16Q~TA!9;TZ;o)OvJN^b~h z79oGyGVcgyV@fioIj!RbadDSP8J7+}nh1~LHu0_rY_1m4g$G*nqkrl}PZORw z;)`=XmzH~A;avZLN6o{H62IYT_*53+hY0F;=tQ!>>;=Wq0o69UQ`E)`p^t}uN_5B( zR(R<%6{w9IKE!tiVOW;x8n4U+%5-YT-9VARW{sU3x;yTbeW*72NnITp zD7k6FhVxcd_Zv38+=WVPkizNLc*x?ylT>OM>W`Boo0^(d$tZotrL~MiwACU4*e(Y^ zNC!nkOiUB{HW0I=O05%(ZlQ(L|LfPUvJQOPHXs0JE-Hy1L#{sa695&dXl&PdT>U-Z zXqJ8(liFIfJ@480l^Ite#%^>mmaHHw&P~i9!n53WEQB|7OT9_X&p#KrfKmuBE_Z;w1oNyo=HeoGb9HnbPz3-QB3-=j zZ|Se=DH=qhn^#?(oo{8n?Tv*4M6ltkqePOY{!<8)Nt+74dsEzZ2g-#?Y)UCn@`o@BVWGge0>{?Z-Er^0%_E;l0jWw{6gj>cr@y0N zN5IZ?r?B~+#KlDz8AzT?Lg$fK*sjTH?|E6DC&%%`Zd8%tJ3~y2DzyyIKannM zd9-y;IGTOHW@fBY9CyeuDwR}(RgZLjJqdZ;$+r`Wl&AxT8umMX8y$6vF$oC|H@fdG zzkTX4xLg8(otWUCru30|evhEYxS6(Zqf1;nbR6*zK*({#(fF}R?iXde--o(*>WxDJ zv1r!vxoyHowEZei6t|pKreC(wF=XPMWnX+aYe~N-PXn-1U0ezyglqRO`}DxDM&`ts z#urHbV8NiC>%nsA$`$Vh_P7>s*T_&(P++n&5KTB2`HviNM=aI))V~e@5bjfjSo#O^ z&^Q1KGIE!q$_|~od;BS#80)5^M~~t>Io)*tZ zH9anFXgCN%VkCYbhWVn}a+QhcA@#z`vkqSOvt7N1l^;zH4vmpUd(V7EC)*E#mqp{20|0MgpN9#rk z9L3VF9+qw#L;|3BkGs7%J0%NyZ%vCuZH$`IYz7{MgKN&?dkYYT^q~5Lvi{;gCN0mQL;h;ybJIk15pVhTp@4>J`+3zW$flP1Z z+*`)>&F!epnX!EwVAtVmC==u~J86RQLPD#jRQ4MdK2e~lMxilFJD%#+Fl(NdaogZhCe-P&=%n-d z+9@0EAZvTQ`oZz4BG|M+5_fUwAXhf$kw!qI`U2U~YUrIW3ix}xsD+a_+j(Q-yQYY7 zsq|`Vf;^a9qb-kn+w?LWAb9$U1nINiryWB=Lj38~8pmhnO^$qi3CZ1m$$czgu=pfYt$|`91an{NJgRTa-nagq zzKa|Sz=%cYYDXnP_W%WN)3wgui&v+Wyp$VxCc0X3eVoxs>YDr4;}Q{y@~p2HHOhoS ziBKs335z85S=9H-GJy%Bk`%TjRN0`_#o+po9A7Wg7i}V1&E`u222STC*l@U7;;ii#zX*dm$gc8}9 zQ6M4I(M(^3)^0M^*6AY_cnoDuwLhOoyb9^fEs(kH5HtyFm!)vf6NwMc)<5yDFyh~@ zXz@qvAu5iWGIaI0NmMH?>En#U1^BkdZrHq(2C0RFh8DxeoKU-1&qM851-G13B5`|t z?Irn}aP3mQrgE2153WP6OoDYeh%f9TL4#t4k_k;tM5g*0?A+Ut;8)h!bw0J)d}MQu z^ZZdrxQ&ohQC=kQNI^$bdG(J9L`#Zq%W-Y(9mFWmLV-h$sxG-h{2-B4=3LU0K6zj= z@FQX*(mhr7r5uZVGW*6)E|hkBo|;`sUAI#LyHRzz^xqeX1Zg;Xng3G%h~z_~^SiCN z;BMqx-ySGl$mGr7KnYEe#QZOnr4{t>4s=^-VaS(^q|XXKa`@-5b6gfBni4c`Bldp^ z%^`HAl!qng_ji|QsUQps*O9YwZ;0Wjd%3pwkwDqfK2L)Zj&PGE^rRvkPmjB&IM3S? zF1Sl@K=YFBW%ah5JNt>?SLaDV0qcRh@5`H!tXPRCZV&Rt$o~2EY$y^67U#!$f$k8G z9ko3B#=bw}rubMu=dAR*-tmMX1s0Q zzYKR519Uq0jY2UH8@mVufX83hMK9mJk@}zen7|=0n{}Rfbd^(sc43K?-7yg!0IM2izq4~t4u)|vX>~W=zq$l{FnAcr45?4l_Mt( zL2?{NLV!*%?ywyu4v!bdmavF`QD|@8H#F=P-GEVDks8!Z_*D>bn9?ns$8;Oqq?0sa zT|HiC0r8)Y>>3N+sYvC|%5Ny#5eraRMddd3c{K#n9@Wp2lL?t=NMz*Fp~+N~xwaUA z_S2_Nqn!7EV7wiTd{fDYL%V=-r)%uYj>BQN)@FV_JzH6QmN(*34T#za)K|*Q@>ykO zjvwl^9H#weW;#wGFY~Lo4L)T;RI%q`%f#PWfOfX()G_;f-FZP&8m4Kigxtu48CY)r zSf&;Z8@tbLTM;i(C>K7qw0Iobb<;waPn5B`SeW%~W3a%awbi-vhp$#7=$&-qpJY;R zXtJ3Sd1;lsl&)%ge1fD5q8ez7q#F)HeyF)^XVtD%-*do!fZ!ayZ7iddrppHO1^S3W zu0T2dy&7bsj0mX0>p>f-m=D;A`QrB{WKGV_gGRd*V{-Oigc8ya(YzgaoFVzpuxr9T zUl=a=3y^NLNX>o#E+aABw;5_^m0i~@h6o=6Tpzb z7jV(p2L9#MApwCm&(E*L+J|`S$*X5^ah#cgv*8&$OKANl1}BEQ+ADd*6O;m_71#ok z2{#-5SYK7?yUj@=a&Q2dV1u)7MK3dHAFJQ6-(Ii#lSjvysnmV<)R)ROpqp7%d3m-r zQgGj_-4?UR=|@&8ep3^M7_g_+8-W?bP0LmLZpDk6Y2u9$3uH#Q7CpJOzrPvHN$=M5{qrp z$ELKIa7D?x;gw!OycI%bFXmr6s)fPskW}kwI`Y0GD zflSgw(gjKgg;{uH-;9g1Lpk+CGp1HbiO~^it;v;ID+MIilP;m-TrSTfGFDL5(fP<% zBM6~M|9C)tM}{OL!*EN>HguW_okTywn}7&3SR;3!a-jFG^pt!rMebc_?aV>1sPkwC zHdFt1YO8Q?Tj{S~hi|%5@0PdP803bRsbmuz*!FvWXRUP-dlG~C*Glzjxxi;$PlX&P z_cT9$=le^Hk7C$aXbq8Ez<&Gyb=6I%*h!n@!ki@7pdPg;AP11;k>WU+yn?h5${v?_C z&^g6LzTt-q_wLP@T^ zszF;$oqa zpm-2S@*iO+eLxukK?}HssjWTQLP$4!PF!%8(jyYpmWNj^wkyam%AAV-Mz_A?Rc2=Y zxhtI-Z#En|!^k{6?@VlH&yw0HK`#0-DI0+2_ZMj6HUN0Kb!^wgGWu4ukF?a19K`Hr z?^SEXjn6MASQh4qnC}*(&QOyh=b?oSmSiVlz&i2h)Z-soV4VwcyaT;Itkl)%}e=@hXGs_tp<3eWx;^H(-P0mhxAcl=Xy<{D!X|5|{Zhr}dP^~i7$ec1pVG)zR> zy@^xAWFu{h>2D#pz*)Ey?pBomhPHF(&X~13(|4Re0ouh+8m>I#?fc~o`Xa4TDm z$2&IDW=e>5byx~}zV+faVE^f8Ka<8VYV1`T&;G61(H-|_3cW7ZqO3x{^TjfR87*GrrcW>MUsH#Ie7z3&YJsQd6i*taKO3;Qd)&}MSM&9R07wqb^46cAwSLD)Zl5IB(a#;1{NepYqTVbGBS;aQRoO}px*I^;@4YJPWywlj2@fnw)%Hk>_L4Dn?qylX3I zZ+m!fK=ILrXwb<0DUl_{{kEFW^X@fPIL&sXpILMVPF%yi@dm-gaZ~W^6p`GN?)-@; zGLyKtH{2OeK9iZ6ui5{jGBfG7gU4)Ob6kQ>ID~&~5*xyEw|^?mSpXoUl~_9dkC|(r znVsFXzY|wE&rF&#kB@Q^35vjZxAUHexR%P$bjOMEG|O5Znw5kU#CVTpMayK*UF|d& z_8Qhb63o6f$nY0iN4bx1_)2@$#z>b15p-~jF7NrbwAGIwgH*?%k6z^8e(8W=yD0ni zH6*ZG$4yOniLjFTc6h=7^_appek_GlUr+y6{_#gEC{=W-Y0b#p3?}KimE3>Ed}>Rq zP$OgvW!SALXxB&tU;!(Zf=SK)8cg=1l_@=`2-OxGClN?0e!jya^}nT?U|nQe!MH9} zkri}ThW{OskG%SeV?wbm?z*6zw8(NQiSU#ts<#`>Y<`|s_*Y!YMe~@5OV#(#R2^*$ zqT`6Fe)E$y6tUay{sS(aM6EV)H}o9@pM-*2QKA4ddMRAi%{WrAeBDLMA1F?gUvq!& zU-=CCA!KwFMB)cTrHTk0SuVjz*$z#Co~!z;U-_D_Oi6DnO zSiTZOHLTMg{*}O?_VLFOL98kS%_o*P5tAz^qJA5g!o~q!_@7xU$5k$(X&yFz z)zat5s|D^_;!*$1gbZ}`Ew8UHO>zdANSLb1{-s)FJ8HZ(!X~W@8|x2YxhswaKj9uq z-LyL&5INkAB~tl?!Kb_%n*lzcC}`<~2?nZwn4BQZlp6Kjo){6CCsBHGhbYEyK?7Ky zW?ct`r2!?Q{;LR8EyQMl^j>Sh`47>m#a?;mi4lVLr8JpjvAEzw(9kZwOW5AEv~UyI zkyR*e*Bp#Cwa%ZkwP>NXl~yHq4QPw+0Sw)L%=oRxoNky3D; zMh+D(fV*``&4`mj=Q2p!2xw)gI!5(XS(yy+JKz`DL!_ja8sQ|Ud;Kc)-)Ck!8=HIA5_siN z9R<7+6gt77r7ioH>1`YZ22+L(;cgHvE~PSVA&(wOqZr@<)pcnQ|DZigz37Olp{r{Q zD9oceEqVb>Rl&Nhl2?R#=?uABI^8S>5=L zngcb&na>lVaoPf%Y=~vW`1E=~W+e}UOv zth)x0!poPQ#1L)iR*;bcY5j01!M)dal&Ji@l01}#GEnffz1Xgh6bBQh3iTFX{JHP$ zlBGx#kuRnXjsAO)4_NFPeA*rT%q+tb4IPC-2>znC4+YA9ke0{9k4qYD@s2Bfe+$L8%T7 zFky@Cn-|(K9@PUUC1;d0<7iAxP2+7sISm&%)Vs8P)rhm6dE2uicUB~@dUzrzI?<;+ zwI}LeqGA&NgUx@kSCFkCQ-VOm;HeRQjr4RL`xmCXZyi8tjj0t4a zykhUj*REfeYB-FbFfSQ*g@~y3p9H~DuV`xS%Qj+lryYziyV}Hxr4cG?mRtcOM zb%rs}-pwz{u+5@d{EQ(%ei9np`XLZ$sL~Ms~>yB^Z-a7qn{r_1Zwo?aWj5!IhiN4!qV2#dD-(0`lPJ}iP2(8$=oO(`V5LHB~Fb* zL#}j9rhmzLH4Xbw)s3b}q`=ntfTsYDdDYpQLN=dzBi&|y5ww^IYs2LX z20~tH{6iX!+ELOb24_gSQ6UJCKc)_0Jam=zF9p7zf4A~ja!aN9PMEV!{P?2NxEue* zByw&W3Blbse9|M+N-Xv%Ie&WpJ|IYkYZ$q1!}E3DyIJ(}E*qE9x*wWrcA4K=Fz#cn zu^{m$L0IEnVq*HdQfs}M{P;moeLLr8c^{0g%L~IUMyWS_*b#H$-i975o<_vjepjHJ z6!hp<_;?y>uR8CV5ywKc^di{WIwL+pBFusE?(~TIgu@&;Bg}^a-~2}^YzFb>n;Siz zdq^aNkjGD-z9(^ggs4TxY(kBKm72RpT!~{tCC`o5aKmm%Sq7QPgPnIx&*j_)ES@ zPk*yfeXUjPuOej1`|`JMXS!%)#BuQ{O-|T3!x-h3th-M@lda&ke0DP{D=Y6_-Hy{C z%R-X)`=@5#cXbu^9Ii*Dh}h5CL!6W`TCye8hx27BHn|iOi?D9?VGW_HDpQtIwK(um!wjQVWmuAp}Vv9ptFdVA@s37m;P0!07@#AzrtJ2wHM5UnUe5?}EPPj_=Ju*k>{?(~Ey zsIpTgqruY1?Qxp(%LaDg2@;fk~T9-B+yikYlQ4a zL181P5A_dY>a!sxNE9?1FZD#le>GnZ$VM+V>BEeO;`uDKU1v3jGpzRQ+nD?l@`A>n z-3|%s*FN;ixN{x_D(~LW9ahk83|uRtceEw- zxZaq+=FMmO1^M}(o~aS)oYv&qM+)gWW`y=9Wx0j-#R)plaVobfvp=8JWzLdx@Z6zy zZ^I}su8J9b74=|?&xwhCzFKh0DiNP-ST%KZ%Zc-f0D_THMez|cfHFtj4`XUSL#iMQ z*120_w}M{Z%1U|6oT@BYu@<%XAgvWH9^LYx3FttM-MB%M&k0#p(F*x!{~)?N>jB!E z?wa2k_e=9AUe0N9i2pVC<_$Omh{e;BM&4DIdbZfT8U~BpwZ?s<5jqjbh+SBF${BRD zj~5^qL|dhmXC$Vd?;C6W@F6OBJGVy|NyIYlG_vDdYa8eac$>e9fIuU>G$7R^q8R!M zSjwA7zo@8918uL$0StC_x`|A&-*K`r{1wb5w~23TNLGPip;>?ZNP%KWWEDHINn zC6q0_obD+OwWt%oB+wqfa$aMFIEv4z_3W~MdL}woT?eIjy}Q_8Az1jj80N77Yl7H z0J?!b>B+6Vrqxc0TYJXsv{Qcp7ID{ zd7Ei;3eJG5{*WXan3&vagUV8c!yow{P@Ue_F8H)&Yc_nCmCl(Niy!@;=<7G*%KATi zLaU#$MBe^O@O-wYCQdG=t<))2j8e$ZL5tRyEX}Nil z^#%6jaoz z-+xzl8gfm_q+2eT&dlNVfi|{ZRa^TWA%Lt5*o{mfYMmtj1j5Ops%RbhXZ|f}rSHd9 zybI1Nd5AWG-RL6rA+!ev_5hTG^V`ZyrkIuC+s zDbLkXI(~cw(G8r)P7vI4n%%|^=GpqnClp=)7Lv2A^%iKu7`3H^lU)6aa0NlfE$*Cj zg>M9!3#P6<6dB|*c*!K^Pcjo9bXZ6zC}X2_;DqOnt$ci(e5}z zACQ}ES*!XES;$dVzlrr$`}z3JgF62`c*eyGA@k|3dmcPBkf=K&zwQBC!l`#1fjQag zXsmar-L1CpgfK6Ek(huFRoe~)?hYB37q#j;F|FWM%bW(pz$E4**ss)V{QYoz9r!fGAoGA$Z@v4t2}qC;NGK0%y7RUzOe+~cp)x>_{P&*hSe!V zVF1xZf^ZPQReOKSYWk{@pCr6g>FNkCeL%z86&Szem5OVYZ%XSx2=pesUg92spIqjn z7$p;O2e^WX5b8(u&h7VbNmtThMN<##@eR$syDLxzp*%so&sDh0ttNagaQ)ZNLLDN6 zwM{&--r|HFVfInsm`XgL2z`Y-r7`wHg*k!kZJbLulPOof!zQ&Gc9yIN#`d)?z_kaG`&4+cTxrW#>lM|F z+nJ!I&$*oBBc3;Ogyqm$7EvX^^vffvb;P{xjfLF|sFGe-Ay*Ju5~K+Ra62Mu#t8XO zPEOAKSNRY~!!2heBKO~DW$lI@5Cuh^Iz6z-DmC;Ize`BjGn;iwkf_JK5`oZg=bWIXkgRfOl zQTYu$x(_Ds+0&=Xpc*~PxDv<^d_v%gB89UEscV|E3$_QcO}mLkbwZvWE^N6R_CX8_ z4CH|j0MqalG9-cy^MYwET)PjZCeX_eboT1+-E!lL=3~B6&%_pmZ!XdbT(h{wBlSV& z$ri1Yv+Fi`-VW4R=eB>ll}*)I>CJR)W$_lLi#bi6-j*ponmuv%+eA|0N^6a?#k4Ew zf9T$Rs7Sk$#cj@gp{qGphr!2FJb&Qlt>e~HU&d|a9Dh5F-H5P-xfCwJt(aC{fa1Tu zmu%`KoM^D1%h4n7hhE-F^z}9`6jxCkqnVG$vSpr(7~GY{ajW}g{*_zZYA$o*Cp$19 z_zd+qe{pzGu!`F=y39|g&!N`=zmU)tdiqftQM8@h3HBA%dG!_Op14X|Ebsh}$N-QY z6>v-M1)C(EY<}fRDTF3OAw9o<00Z%I82>;G<^j=;IPw&sYqkZ;Np`KH(HE-+AD_1s zolt!FOMZDV`}effu4gdVyAgozz_+0*a6HD6?b@mDZ|qUAs#D-7LNq=?&7+4_Vo>e< z>!h0>2OoD2Mt6v=rExDxS?#)G$BuEFaut$`bBDevcSqw2*aCDgO`L{wz7k*NqCE7N zRwB{S-w?m3-Ptg+3xaH8nLG1>A5aD#H!lOSmi5)hTS+3qdht#N~5y zjtPm0ocvFhk)A7#^?gpAa_<~$)npewUs52Q(hho>1;E@5ctw}N9+bGUZ|`5L%U!s* z-~fc`D653yHtc=BmsniEG^8TZSN!ysQ;KS9ekhCF_+f!!p(sX^`L`yB)^=ej|nMowS=q6D? zW*_cAx%Y$DCI#PO9k1UU!TQl5D!6m7dkxL>8FY3jC5{l=u}&WlsilBDnZUuaUei2C z=Lf%$C262tuMglDPk0%X6MjhmWp3}^NV>VKLA2P=KKAy2x$iA_mRLB#&fgMeJWh~n z6pACtDpn*7b)U<(zy6MsB^z5&D)`{xf`yr3qGeDImVggN$NF7A+D0yyduoejy=<>V zc{cSrFmvQL6sJr3-y*sv^t|eFb}-?}NNm5*K57GF{7bLe%dBzflnH(Q)^$>>b4HY( zpB`1?aM(#mN}8M(nKsp~_$DMOn%jHbA@S#_UR!hZh{uojmFw#2etW4OtMy}Glfmq- zgVQh#?ZNL6z%+iR98ej{c>S6g{p$yE9^0dKybknD~ z_vcDfmGAh+}k<=PfPywG!3FClD{&JNtNC z%G63lyx6;~@vhbLE|F>X<$MfbX|7GD3d z`Md-Wuc=r0-+*3;+uET8b81?ea`HnuJsZ{zgs^nMQa`D*O9s93j@Y(o3h&Lhk|m1zkB z(^&h&ibZQk7Fm38?8P_3Nf-Crco(b}cIM8fF74#zUdWKou&gJM>~5fo*U226{aZK? z03X8F>G@gplzQ&0XldJu6r5=ufG&QTYo>>bLsVd3UH`P;RlN{MO?moXSlW0>+%lpUrFYoxF(muk6*HIWU5@L0y zngQDnS6>moLS|QJbLa)l`n0LR!9kBTk=K@yiu~bX$+tWY!DIz)`6%U=zkYE~ugf33 z@`)@aB&7CB^oC?=yR`S?q$=A({ra8GL$$L>Ryld2N@v@VZ2O|TW;2Vi=9b3GnX$H) zg~D)XjjI~INw-j;Z+V6X%Z6GHC)EjT3$9x>x##3}9dMoh7|M1tEONKTOI4eEsp>ax zr1|#koA~r?{#4g$5@|y$?7j`wo5wfuWPSc3E|;Uzd7Zt?@ONG zr)Fg}HE-=__BnAltPV3dmieh*Q^aas8ht;&|E!n>2tSqXTt@7G-eNvj`7j-tt~K=Y z@tFu$r+;7g=o>PKGP%O5@M;^IW*pr$lxIJ2*+_Fsow0SrakHO^Z(z}37k3oRUOxxB zYfmdlLluV4(EP=Hrfqv>nAnpV_3{4K>K{k1DRegXwL5N#dW#ur>FDr|j*d2~zv=h& z6HlJt`H{}74U0^-c8$EpX^oGE?IMqKCiJ;w$a~1-$%&V5J34f_^mDZLJei)J#*B;G zKCgZ?(lk7=;$`}5Ih%HU+1IR3YM;MJN|I-Wt`V)FWf62Je;l5Oimh_+hpS#0Ot&Y+ zZ8Ej?6+o%vG!U4akU+`Q8r3uk7`r+|+aHnn_QbRI=@tgVnj@dG+Ihk;>E6WFCtA=w z3$jb1as9sG$Dz3&o(7jM^Q7^$H^=JGFFG@4W@T-`rW6)6$W1ZBb>7t6?C|zWP1=?A zeYy3@1Nc_^a$Rlhb{m75F=a7v@lV#BG7E!kz^pMTJ<~uRClWc(qvmb{#H71x(jv~9 zA#QH(ypRk@7ae3Ku_z%|2>_{IT3s#ou%^G~HCjs)MD%G6het;G;r&d81J}(0&6&P^ z^TDy2TEd&0o|_!kJba)u=<;246TofO?_{2%o4K0{IwX-u5#>@wr__jUNu*=aG_J&7 z`P1FnMf^VpWf$5P;gY5Q|Nj}o@c+qSIkD;-30J7eJ5FMz{MR5BG(*+%Ypfv>iNT-- gjiCNtpU8#P$qW2$Mx#H+hz~y{qew}TKJWU!0QDk0TL1t6 literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/moe_dynamic_padding_b.png b/model/train/yoco_moe/sources/images/moe_dynamic_padding_b.png new file mode 100644 index 0000000000000000000000000000000000000000..deeaafe545e7132242fa6b71688814af1963af2a GIT binary patch literal 79984 zcmeFZWmr{R7dDFVpdw*|q#}qk3P^*B64E8oAR;B*ASDVS0@4ir(|wy zsb^zrphem?iidXvPxAg!EX$9GwsNGo3LfnjNj*S*^b3V%v7#woR8i}cr{{AZ--j4mN zvVZ@6=GaM_fA6`1e`fCJ?;qbGywdW|@4tGG-HAN?@69(oIQ~5237#I?zdx0<5g8>J|3c#b(2)b92v}sC@{DJtq@=KAp8m2b8hLsmCC?4#EOf9losp3d+`Xtw zcTS-_UTCvB;XrwJcUMbm?CfcuTJ;k1xG~plt!#}FSs59bhY}JU9f_jcu6wo@FI{3{ zW0UQwZXnQcUZk&Dt)~d4Yt9dS|NaAoMWq7gvzCRXZg6GZXXNDM8&iJl8KtEI#8gzo zK2-M~1~*Ix8K^NaF?GHC=d|tisKc<(g&|abZi2S!rb<+*F%2i-wP&^8ST%|lK1OIO z+SwHeCEwn|)N;v0r82^jNYEve8>(r*ap1((gNtQ3^Qx<#db`t-h#y+uY^x{&7PG6Agj=H{lom$6znxN}!m*Ar*^ z6EIb)<$*j=)4r^r=GpxI+f-ae@7-`Hoy?pZS#Ye}s)I~0v!tZ&*2-w+^767hrn{?) z7JRIm3q6<)u3^m|hpyknxp8xI54xAINHH=pt~pnkn3&`nT3@H9Z=7D07;@;Y%F9zo zl$WrwvH~Xz)>|G95j=Bp0EHDBX9quk#}FJpYGQ64PDf8icSBgXy4&;&A%1UHSCSLi5c!pgK3VEa+K2$U{&7=+baaUwoUbq{z*)+dSwZRQDyq zYoDi~8K3057W4)gxCGr38k;iBhcYr@25KfoM)JIrE0(MR1@>rlFz3LyJ}+h8t5fGU zIz&kpaogklN^(LU!^1CK_D(D=zHK&Esg`PNzZdVV_k)8Fc6CNkkxK9d1a+*?dbXyTH=?e*6@ zdid}yVeMykdsgthYUmWr5Be}DhIjc&QkTz8yQrfTjZu&p3< zAQBWCLl{hl3XPYBOL9k*S`$QcYIa*KmgAYvoLn#z!A3c6EvKt$J3W949^0FxR$$x8 z-Tz~L-S}XC*BPQnkJ!}lyhzEh@;g^PFy~e(l4{uo zU1HPxL?XO%2PS@=LAHm8XXHbosL#+{WO2NSC^inT7}LRg115HMRoM0rvAqc|C#|=O z=)m2|U>3SXh!#CG2R~1|zIT<6lK7vaM@C<)x0X-5JiW2j%+4$i*3&n*C`{#gKsdWjPJaF^2wCsm5kjCasegBwf4zkhZNNs?$R&e#f!`(8 zE^!*yl(jYMsMPeVEEa!iA$3l}ww_N4$;;4O{dFPzlUf@?Ic#&GmeMW7J{X z6^s|6?KFE4TT0L0vss>>XL+Vt1P9w^ygo)e#Le?wX!=z8qA|y=BhnppVc@GK_Z;D=RC5 z_jcQ~3mJ|-e@|C3oPCk`(L%U}c_+=`jyMs;?PZBW)8kjf049AomGSA5ghdUm7+f%> z$n)5Vj1M2~KnUzlm5G5(`jpK3QLc72d(FP0QkpD!DnkH#{;i*1!LbEP-jT&+$i5!e zdWVLF7F$t9mi>b*Lq?GrIy$)w2`?1a=i4KUqwQ;G(HaSb@D^lARw^*U{!{{e#IHup zOK{cRdJAz5W1aA><-sKISLfC5q)57{<=6nb4+|+kn4)%BzPbBCIL@YxZ_7EWlcS%5 z5J^8Yt{XwHP0V~kVGR|{%(AJS7p~6E&O&50o&5HC4-!bq(2#nWCfh$=clqLE(W4cr z2`|=s5vOQotB`gWvGReCxjdAtsH9Y6+RuiYD@vPU!P) zu9VGF!(3hL(LOY8pMrzW39bFO#&qL`JowT6?mBD@Ib+ON)mHaQNYRi*_jaZOH({K{ zTg!vxsGN$>*x2Vf>kwGm33AiZJ27tC0+9KnOGd2yAYvQtP&)>E)qVf*l84f9JvI-e za04|Y(;@C5$|C0o7dp+<>$7#)lhtSW6_V9?autOU*x@3v=*xHWa4 z_~px&qxtPJ>{mw8doxu5T0EdVTZLVzKv3ti*67~FqMe_$Bxrh)YV;8I?b}P)C1VJj zp=ttKJ@UdQ>Ox#5BZKL6tzOl^9*X?-^T<@`s3ffld*hXnG6>KS?ej_ywbcycK8{rZ zqjMNnxpGFH?p$Z3(|S*Vp`4c6?rhvvArTP~;&BkDgm-7oW7Ss%3&c(Pb9zG=m6i=y zlfF%Uh6Fau+W*r`x_jBS(l{DPFb>u0R2NA}O>ujh!CRXpc6N4q0MwM_Nt+gJha9GL zT+l`K)5)!xfD7y(N1D#HM8)04gc}SxV75uADB{rMaokU$0Rk@ZbHx$+)aKwY?uQi! zUjbYKAjxQT>U%l5c$mw4gjW*Vx9&U}rdnv!NsAsTYR(y)UcCXio{x{Od?_OnaSNBK zoZa>L%%r4m=Dh5s6Gwrww8jajs;Q|(^I9b#)`$EuI6q}wrmU`>0Xa(*2+J>?a%ehU zsGVV;wk54u%K?BMK!J33=s2by_UlE0*US2lx5^h&W00ieg8!)>)eBLvP_Gt$c5cpo z=N<5d)9jV<5!%k_fSiBbbjcKQw|VvMGh`aQ87fM1*p0T9x!n%Kcmj5?4i$kUVYgbX?Lz%N$XAr7H2k?{~+9e zii;P_1;N6^)D*SBFIL;Tx91EndN|P-JQC0emwr7V#Dt8bq(^dLiwnO`?i zQVMzbRo92Z;c!Ucyzl*|C@lXT?!Z3eI^xRC+bh|S0W?mNvMIwGt0=vUqqiJ( zXCrhns;kGw8m@seA!(+1XX-RC!p`_?3g7!1y{bBkFHf^A6xJ_biwg@s0oMHIpMM@o zOZNZ?S>8+1TLGN(#oO%*<4X?GFN|p)KR$2=9|SUHF&Oy#L4O94dvbF6;O?HDo|F^R zj&~Lp7m>&l@3O*IVdWnfSVH3k(EAbWngdkLfmBpf6giK1EE1{b6Wq1K!otw?08qGj zcud#lTAinS8A79?()08i>_6Q+=ldx&HC6O{#i7^cszdb$Ga5_AI14)0^GE5aOEAWQK^votG~n2&BiRw_Bo7$=*ywzdYKDGN4? zy^v&~JOAhLH7w5l%}~?=5$@QrqJ_0nKg&p;^KdT>u@4EkyjX;M=|-27T^rv#(D(Td`pFo(3H(|hQK`mEjEw1{s3eLwu+3o1 zd5k;tq-E)7Fp0O`Cje8mn}L0cKv;b5^P`x-X-#pva^;{fu)v#9pQ7wQ6VwQ~y&BT)Q5Wh z$VQ%yB`0>np2L$>q`3XO0|FV^u9!CH%}ULo+b`}6B(-@KsBup)z0`8+cVveEs&%Q# zS4b8^5P3AGVE}j|Y~v*qstABM0xThFAZ|{v*qJPjLQwQ(zm7%w{wDw}Kr_*UMJM)B z#lOO9j(Xmy5IJ$XH+WfW9l^TI>@``!u6s8w5DdnoAR+}I1)%0#Ne}#b`ueWda)d@l zXNXff-47HwD5Y8wqd{rpJ$e0Y(lRZu8Zryw`Q|qaTcbbvP&-S2V{Yy4;M`@8TR!4XUG3BSpTAyKeZ(^zpz)iq~W&6d#dajaj5!SBQ5Z|sYxMOuV?L%>! zTAIt2(5DuOy9q`=oF?XZDTxK}gylKN5R`5^?a#B6RnVp=E8E7S9cYwGIcr_Ev6Pu> zvb#3B)pf2@q;uQY#AL7DwAeikcqC{ti?+L;lU=Ejk8HfZDvDrjA{t@MYhv#@JdPZL zyiB~u7uS1+*rylR-ieJkSN6{SmBR#*t*Xc2AA!CsVaE@vA)O>cesG;8w8cv!el~ZM zcNX|;W_o)1sCfgB--nWtLGAMe4&~f3N*cw_6?iF&X(3qIamX;RLy%x}(AowGg z?KhJZuwU+Q-5yP|nyz0QG)(A447a{Z*Qh--B~R7*dCSqSt$y^-C67f4@)0NpiXaqK zv4|F8%uP&|epcN#ygENSn+~W(%Z?vURvzKbe4TbpTMnO7Z|)flcm@t4lP@*#W=<^bvl+l`U{)Q;0`fI}3#G7j{H1W?KOtissO}=aV2#8{{P{+&{Rg%kfG6 zM$cmEnG>c-0xpHD{A2U$i^ap{-3Ad_&vXjTqa&#C@GMPE;d>2LySk(`v((*!q*bv} zF6A(47m?yhfpLR0?!3R+unAMN<1g5Zp?v6Jq81M3;j)>NM&z`iBGU-hb>VZq19HW2 z?gzW&fSr14+z+n_*GzxqAOr;^y|S{h_SR`9Cnq1F)hm0-BY>QKC5>={bjVjRJQjV) zfx;Pph?}@tYoqmCO2@BxgwP-WyUi1&N+oTJ?`5C{D9Oy>MMD79UT?13$Gcwm$V8OP z&2u2J!EgY%kLbNHHcofjZPqdA_;9xzG>5Q|5MZL8fck!KXrS4&5xmf8H>9qi0Whg* zRn(UA4CGkgaBT1VZ&!BCl5H7Ar|xZ{(m)&NwmgdGF}AL6!)T(R~p>0bJIJa;$v6CB2z0Ngu6rchB< ze#(Ar4pKd2zdckgl61-s53t-%>fR)4H*`+fS0EN%1T{bv-d1&F=*`({8^BhmI=SEB z59_$?ehg>TNd4|dY6=$krC_2yC;T|(acs4Q797Wif~$Pg9Qb{PT!QdWnN8X++SUQz zK``D|uM>#YvA5z-V`?!W|M)RN3J~-#?dRtw@J{4xbQvHTNPbJ~?z?_MN>|7T5kGeZ zbN~UMBPWOm-)o$gGoq*x5AVxr@!@s#6F}OwQV}K!@{|INeUi{YjY%()(9Q(nNfm%W zAtTs(r>4Y)Q)o$rAS4x=Bvc(H$0PL_jEl=)Fs$F)Ggq^>SS0DTDLn)wj~ z;I;y0{7j_x1fF;j3c@V!S5Pz{!lJ6NE8Xq~^X{<0d*W9iu)%_)YEn;2*TEW!U|}uWYj+9<<}37veiG<*VpUcvLXMH?zyOh zXL*vawgZ3+7wCCf0Q8&M+KT(r>?%g{S)>x&56TWU(nQkB%GA8|jMiovqXista;o=C zKo)1FBrc(-dGeh=Xm=*WzSiY~g-NA`hK7okRyMFL5Ct;J%hh2w2_@Os@KO@^)FNn% z%Vq0%)mjrX6Dw<&aWpxvL*HK#HaiIo!9Y+8Q$5gA|tIv_6N(hax*) zNx1Bn2kb+LfCU&w``G!zvIx~~8(D&zXAR>}Z_Mu$J7Bb3szw&9;MI#YX`*L+?+<0S zL+%04)03+s^7>xl`6)3xkAeCYO}riaPVrTwlG4=l5QGw%%Ah&|kd+Qt z2buTn=K~)=b#oG=26y~1Yi$gqcWJbuL>RZB1Tufl=nkO8OmKzTxij8+H*Vfs{m`my z+nQ~pL}jKJICJ(k1Z%{JPhVn@zjEfi9vSSHXd#zkHsC^|;MD+;h4tL5z-GMi&d%kx zZrxJW(gNbWe5NEZ@h(JW5RE@KM5m@hA!WGg5v1ipsQK8tDsKoSCGpIhKX)!*czP8O zX7?D|Q09XM$|Y35CD8m+Qs`XL-|BK0`FsZbhU%!2Pi-Q#kTa!XT&G$db>RFbwzHj( zP=TyBVzz;Z1EcQk?>7Y*0AO#P2=kuM_EN7D$mZgeL71=A5&E-_-RJRSS&$rP0($iP zdXa!VuZ-H0CrRLQ8WvR_5s^Zw4@1(DX`9j8t~ zq=%TSWj|=(yff*uxxe0yvT(=72vq}i7>&c?P%Fz;UyaTW3=NrB+RIIJn&hq4e?$ICx3MM`g`O|S_2mZLBa=T2#xz)lAw)r~M zl7NLn0U)2pQVSLsrwwo#L^vj9=7K<&Ia06!z%Rfye;(Zqnk4wW8BL&fTxYe#p$A8~ zf*ItjrlzIN zdAK7`9Wxf;P^t59%R6=`2s&iLpk9GR>AcC}Q8dMo<4B zLd!l16fs1vu88p6dH~er0zu=f^~ERqZ8BB}v`8s0T3s zePL66DlAlnS|t1r0TIvI*%mUZ3g6k3eWJA!a^>31_s1H0L|^s z<7B2Gc<^V}Sd3RM^<~E(A~jDvBC&&3kql8ElD5n9d-q;?lhC-;nOaiq@M=N;8^@@E zODK%;UfVkm#|lB*s`sZz!(wW5a%&FCku3o^1Kuc!bie z{UIPfFv0SsOES*T5T^l|A5vXUnjB3Ppfp5TreoycQ7w4Zs;I3UPiNc|%7~OqmwGc{ zJG18YXMX#3PbQ9E#eIL82Hg%E9V&nMEX_GW<+9HW<=&!oOt~%uf+*2n8+Xg#Eg)8; zEClj2kjr819A-$nuqAqT=M!=zL#|5>@x*LHsUiq!l;E5YNT9mF1QI$8O)ivYE?&5> z0Nk$c!(A`_9w=FZ&WrAU^-IebrieznyTw`YQX&%B%%=0^*Ap~G!^LJpRW2ofduQz{ z!h(Vl5ixM7Cml%@LKWPQo-2qushti{rH9Tf5J?f3c3q|mY+nRfySJ~e3AJeg^;B}< z7p0e29)Ez60(Lv-*f=}CWE~W6k9rc%cO{FHw8jf*KsL$%Wfm<-n`&IFkxTZa(GLV1 z$<|5B0hy9vba?Y@7Yy5W$T-coHJW?t-Mewzio@1Q0c35cS-?Ax+7!K{s5fCJ@To$m zWMWsgfvluL#nQ#=R5us5^M(=7Fxc_w?uQ46S_ViL5tp=_7e7+fCs@^Aj?1}v9qmH- z9M%ZYy6FZ3lOOcw`X9WUur9NOU1$1p;;T59*H$0(*INJr45QP_t z9lZksHzXmPcNwR}^)0rjLxzsxG+crPg&trQpwK-#5dwS=OzG$|dr{1v{G&Uz#J#{J z1vYljHSwnD)rD`T*?XX}kM>}NNGasJ@xEcY6VYN*Q?I)iBV};Na1JN0%5*Ek2Fj^e z;g56O8dNVb38ORK`8f&F^Jmc+;N^?3^uNjvkVZgTK2Fy@RZ7Y) zRyGY04MBrRH*Akbw=uG_%N?%L93~fSA=Y~LH@KTS%rX}VP3f^#?8ew z(4Ps^x9P`^_uvwN?)&-`+eALK0#?DV(FSDJHa4@Xj$AKxED7D1zVx>(yaaGMdT|?|2Wgh>4k5QU455 zEHo@T*l7?crs#vsx9B6XpBV2ncNrMv56pHZR5Sqf{Fb+oH~``&qW&H^hQEN^c>qOa zQ`p4F!x|dCe}_yj3CzD66ae>*IBS5a=2e@IfayRvX24-7S;o}1st~C^Bby6t$wNsz z?6At(91N`x$7TSv)rjA?pa_`|!Vo~36c8{F`nrXG6+B)kRk{=MrHZEJ^Y%Ay%&pJj z{<+HUas?hCGOSbMh69-VxN5yk0MS;VUJXbDxWc~MVdw4|Y={FI2!%KG>y26>Z~APO zn~#=Ltxa#%`jGVhoU9eV4Hg=A6YrI0=2n+?PsfeE56r>1Kqy!&gbt+zm+RNBFU8ul zEn4oojAOOarfbkRP#~JL?0QwkdBvdJ!VP%EI6qqx2cbVDpW-#~fc?Haz@I0yZXw&1 zXZ*$bRreLoT>mz*gWIlq8wv@+uKc(`MBjS)%tcOp(5Ke`osrt7e#^YN1>v{*tsyrHZAH5^*b2n_wY5&j6E z_L?ln7%Dagk zRs|RjvA!=vPBwfWIQLRd5| z!!}ApfQ8tG#7qEuCz?0^O6yr7phk!W;A??LJw52aSC*i%5>z`0odA&C^4*apT1r#k-H_Sc zd1REbv$N6PM%wBDBm%$!$`1`*>7}KQLE!0zKA$mj%^sk`Kxvqur$-V5Eb2g?F=#-& zB1Yz&xH{d?%n`7nIduMZOKW?(s+(JNJZu~LhU8XrU`I%LM`~kQ3ZPcv$=-!MpOKqe z;-UyWZc$Oj?O(#SHZx&IK=_4({Y!ELu+~q_V>NZ(pPc)l$X;h4==P~<1<#63+d)eb zg4s%jStUW0?Sfzj^WT|vEQV+ED#nd|79j_ts}AvWCvtw(m51iGN~FjINzQJ^Nkt_L zX@acS4WrBLZl@MQ2?2nfGi7nm~w zlb}?Mh;(4d(1?gMgXVCg8YchX!Rtp^j}wtPIO_6cOB_95@P~QTS&$ zNH%uR-2v<~6FNz^N+F4;L$b=-_K=X4UT#u?Zs$9Me3P{!026>POJc*d9HEoum#PY( zFPshvp|XL(d!O?2^NIJUp*;+JR1XF{Ap!CW;Fa9kRnTRKr(vx}^?($|!VGP)nvTJ) zl3ad8Y(rGr5zA*Ygu@}QN|Z$CsmQZAOWuINm*3za$Yq(tn^-&2cHe8bt*_}%!*$3- z-y6R_#{2(UtN-8XGDkX~7YK&6_x&=$-LLPSyKr;lEw5I7tn7nxigtUknPEZQ$(d46 zbVDu=d)Y<~p5<+3%@Xl|x$A1-^gi^B`A?4@-K*STCMrX*&AuaNiwN;;{(U>1EMeNy zNP(XxN|-VA8Z#E=Dja9zJ7|<|kSU_q!I&MJ9=mTZM^@I zetGVpqGqgyl@(*K;Slyls{}u98@7WBw@jWumpcozL#x%iNoS8MM{R7UmKP%k~?edOYOaCXk^v`(y4^{Ud$&)tpQ)9wnqPwed^e}4?| z_*I#Y?$4f8>sgX;L+rDQ?v?qO2xveqO9I*!q2uOhqGb{8s^PVXTBpaAl|pJVrTz@? z?bQ~c`?%9SjJFEqgV6L$xQXN(HYAw9^@Y)!W1Hh$tW7p;;`=2L?WE|pqgqSkUo)Wm!Z8|_H9x^HR4}KnYeH#(j~HT)v77(*4?&PVWt=noott-@^ZT0 zFThK~`!;gz{TPqjpi*j+f>L7crD_JhqCY1vDD`iioueiT2Xz&=D~Q8v=?&BNj5hnn zzi+mxP7hm{petzn1R(`Z^nBKDAV$Ot>Zb4kV&?x9pj%GR(;DJJ-#6|Xk5E2ZyNCik1?*Rj>tw*HoH}5hK z8=WByZeR`%FrbNSt&-}zPyihqL0J$&QvlkjfePJFCG3;Zom+li$f8Cge$2IZYwT$D z{;t=M_C^?OOBeVlFC!>_y2ZZi;Cpd2!;I;@S0*AthTX~IbZ=uJ5k>gvk+EOG_yfWS(vANezq4}-6(2-bLb%c)9o{|05W(vnD?aTFfF|T67VzRRm_TBfK2PDvL>zEAZ)tXuM z(Tsuv+<3sCA*|NKj-Pj_0#u%${S-QD-VWzliCA9d097BljF7ftfECKh3((!!v9Z`K z5Em*BO?sdLRrrh+7zP6WE;8=Eu4et|pviHpV7$ITO#P0RA(1@7ilX0-HUrv3cPp@0-dP^@gQ^*1x^uWzw>8Ll`eJ_Lh4T?7)(g->i zG4n@{9#vRXId66WiPvp^`V)&o8hY(EdsS(Mk5UoyX|b6(qgh{%>3kCQHD=mrE7ZqE z!CV-wc5{V=hxhdw*mjgy=m_C9x{;n1C{93E74(VR06oARPB1_@6lrqo>h4BLe6R)} zGAsfK=>{@MF9~H)d8A$V#m~D?6SdvrL;Cmz^7PR){Qw+6UhJoWW-QIN^N95~I2RLlgpvhtO3DWecR620Ax7L3Kq=+#nqlxvS1q=m2msXe5TNjP=qU%`g%93X!)V zrrRsiTgdQvx1dWq6?$eCfWa$3TLmKAGAgD1;F3YAc!(ed?T=i*Jb`G)gVGWQg&m-- ziNK2?kSIcX|7WO-!DeG@&PO`!7ofNOuwISA^UjA_B~#=Lo3R~wIZ*Kf*=BYdi`Iv; zA(_-}J9@2AoZ^TYg0#qER|~xO#{4Y;6OZ+VE+~C|F>HlH&a+W!0B1nzw@9Nbcr)}= zN&^N5h+Tt2TcG`$6crV1yJdJ5L4yYsx*sYeq2b}F03~r-B{eD_bpZ&blf+@SMN+ZQ zK7uzfWA)9a4CG)FhZa!$6Md+=AfWX;P;sqTj%6{Ed>EEitiD;a`4DA4B}97?0^+rh z*jS|bu2=wIBT`1TZ$OBj`ADgK1Dsruh0Y*-ZG>gY%1R#rxg+2DBB8fRxwEKh#J{<6 zf({bWySi8;!h*07A$B$kTLp!z>Q5cm`*EmxW4&Ck7T zwzy>d`%8Fryx&Ij>#TWaFhLB5V`3_1kn?B%{p^j^tFi|9dk0ruM@RSM+Zg^G!Ipu#-{h8^LHi3!D2UQpCT`rEOz)6!ksu7T&kn4&!d`*fi;+|UB zkcPs?Q3GILM87}sAQOvpA>r^!Fcu{UE=m|m&+4>^!M06X%XKNr@) zAeU_*aM9do;XkX~0IRSFbE^%C;x`qgsmr?C{5OkV52a2MHLkLm;gCTO7HjD>FD(WC zo(Nvum2V@{S3=)sNH~Wx^;;-@{yzSnK>;K+crtJ*jZq8r9MwZ+ZKwt6F1a!+)}gG=h8oN5A+)RiwcpWr44G<>iKQum9OD4nL z+!?!iAN)a`lV|7bJqxJ^`KKASe@M6XwACCu7(Qr+v7Y)pkuRqLI7<9%DME9Pl#D0x z>b$%yf-8KenvMRuAhBOg+Ely|Rm2oD+`wj_X8n$DY`P2OaK za@Z!6=WeI$U!G?gxg5ii86)?6IyAUaMQ<~z(=S@+siAL^TG{cOh0!=Jx~sl%Im-RmT?vP@Ou4xA-FOUNZ!c!AZ~M)! zYbNsB$oV@rjLt{*NElBaf5zrSN^!VBX%@Io?y`F;T2-iFj+1TXWsFD^>ehq%|6X)H zT!t&B>WT^9GBxvUV%TTDVaBBv#SeU6S?K4y2;b@p(2QW9{d4i9aBLq380OgELaN^` zoSJYXeH7+{Qd<_qB&B3J~$ghsTk?~V;XmS$bQnvEl29ypBsWG`K@NQxvgad;9-B`z<{CY)L+GZfbmX zdzCa(x|7Y{WhL;1Nh#jS3z|y3t{9{JI`o_7Uvu0`;@}BX?pIOdwmUbO<61W~{J-XS zmsCk;qn}3xiixcQY2WKF(%oOuqdZ7`^C#Pxe{!4U{4t!w*UvUuUpk?=bP|pW2xdOY zPRhu5o9l`;Wzj4>Q_EqkHT3C?Y}CWg`ETQUnZNGf$&;Zk!65h)Hnz6b;47519K7?f zHj!}l>}~{Vad9#|Ek3ELD(*D&v#>TpQ8&brS6W7<)hLt|F^c}3G5;2XqC^QeumOsj zPK`02;;GpsTD0ZwH}H>MEl5>VR+2Gle{{}u^|v7v<)7q{L5~+Z0;NC>Boe_6whur( zPa_E&ph$8o=1o1h8$+>F3Npd!sw zjN{lq&=Xv(J71b3@VpEZs;-fdmYPFs&3dbaTZr#@hBeTs!=hm`AbLuboT6d1@?tDFC#;iJ5egW(p z^fKRST5PU8L1Y65#cfM1hsTj9tT1$5eN0Mv2^9%ES>fHMdsysELN5R+Yn~)8Jm8q! zG;iCdlDmkLuBna@8RKTz-5W)4-6B?9k_>qu~_X)SrBJCa#F{eFqvZJ%}?)8ww{QTg@EMnI~_-5?! zk4=3&aw=eY0(+o``L$md!tY$+m_oH=a+H*mM38Dbdcr~IAkH>kFX+cYn#%e4`MqD4 zfqpotb15$vJn<3$qn;NLu_2u8WYH*o zXUR+Ww07j+z%86ji~k-gviE|fq1yufxmV0dK$%@hU<1vV(4{>EWu!VgEp0f-O;}6j zj2*f%TnvR(nmyq&Cw0{>0p3s6TC=h_FcfZU-rS6=*=5u^+;p|)yDVG&;*|ZB+;U}D zD!N=QZthhmS&_JocXi#9ks*R(YtT~mWcW64JY{9&jZQ+|Ci_3aqtOG(6x(B%JDE0jrckUv3#3wKFm^ zn}s6Pw{2J9!`*fp-~>lFjJxQLdgAHqHmR1vxg;PiC!kq|lE6l&{aTbX^=fQc4l1B= zz7W~{*0yWrWQJB&H=)VaFsGUKD%qC}8`-p!jCcJ9>nE0R-`gdMRt%bmXp-$;nB_E| z2ZLX>0anrwQN2OGQnjru*gg%Xk3`@oLc_WsQbteeyitwegjxtvqDGtqPLeeO=lwC$ z7~<2Xq@eKCWo(@qj)Vo*_8}&bsJ^*>)X`Vuk{^`3S7}}>O6fG&F*eBbuT*6ntjet( z>|N+v-tDQ`W_lLq{zQx8SDX)!k&-%1BKQ+80Xs5pt@Rdp(2TscOu-eOM>nhEUnk%$ zJiyd=w#@Yp9>iI!HOaZjA{oROnewme*3k=d0arVNX~$M4O(WfgBd%UZ!m5(2Ek_*2 z6{9OIuh-`O4f^%Gugmn`8AM5|J2GK*mTL6ltnoejwV!>j8Ec|&cMEuWD+IOnYgou1 zGh~b{RLM*f6ui^p<{#&lNX~9@J}*NNKRVnw!f#(PSI&ia?XQaupTMT!vys9pzJ6Da z#RQz;%aGKrL(~biUH3zgq2iSv|1)6g&ip3p&UBbhEg>n=>N>0R$M~Q{D|+R*HNyc zCqC6|UE9HKeO?J*FVOGh-`6C){`Wo$DLr$~Q$v>+bhh2db1IIoN9oq8VsugK?9)u0 z7yrDLFOji@_0A=ZLz=PXItuf;Rhi-u8`9i^Et0yabj^tW9_OPwUyj!p$A?jq{*dyj zRb=N#Ma?$r6slj|rL!V(KVX-vj7|2hExi>YH71^EI^KRz#J#+aKEI5`h1B)uKEW)` zb8Kd8EAmvc_0Reui;rAC@LrdF+ZpUmkKSOkkGHyHA!tbxl}4>!yQezfd~gnW^DZ7g z$}j!*>DDqfa|C#kMv=zz>@(jiMrAs~XuhD($rw_RNp_yY(-xJbAxr!k;&!=rXp1d6A2l(ay8t z#(UhBX&8l#Z&w8#1V@-3(#FK^9UF~vEn}1Wd;f&dEoDBsW{R}^&3Y(JTX2Zh`4QUt8CQKckUg~T5ARWb~q#)CvvEY^^|tY=}n1?+|8TKJBYW)M{V-_ z-|qPU_k{im>I!vI6(itO_Wp|#4` zWd05?7wkHn<7|B`&2`aFS#$Hqy$1^Rqb?-uZqwBERr&Y~{ontZZswFg|34CXH*#5K z3j<%giCdp9*vw;i!+v-z*JAYk@B`BG@iqiX?ZR;%!ca_q1Fftk7+jf~EmCT5MsitT zp5?ai&nr}i>*G7^`>(6hu20LvwAP(&HylZ^Dq~anGq9Og$zWeCW2c++82*HekY{HS@)`k>fsu#5POY;?fkPnZD;SfG@56wg@i8kl&P`qmsGmb&g58p zC@s9vKkN6$lircTlHI%M=|mcPgf#Z9g+;6x^CP|I;}RC@&z#2`{j=E*g651a&l)C( z*YX)E*mph8Krz{9761AE534i|RoTZMKPt#p?@&%q@^9I7S2=M;X$=o4*NE1%%05OH_Ni}qIXrSOZiH2F|pEuOl3blCm7 zAG?rvV^T9O+BM>ddbu(JVSZijs2*0(Sv+`~94Z(4<#ox6XwH?Yh=~UgQ(IddHskL3 z%bbTTbEJ=gAEsDqrT+QSk2ms>%uelGGEAnKyqGz;o3rljzCYCF_3f5F?jDZRzjdh= zZO)AO!KQMf-tS&Wn8YT22=Yx>I$- zRpoEzc)o24){J2KGdF8x2uhJ$*rb9*mj=c{L6=Lc&GApLULs!!@bfZF854Lt^Xpfi zKFmBR9r+`aQ;weUv$aFY<8(0(&_THh@%r+ zquPil7Kcm8Eo#NI52vTI5(NDuF=g*0PtWO+U-#D2B`Y)c{4y4q&IvwfHtQQR{P$4WiUve{QnOMKKKh38JIA)H2%Xm41d`{=^E|V<3 zuMt-$YVJ=Q`9T&Ep|=8kKfCaopW1vUfc$>;~JZtQ(j?zy4my z4>hNgb7SIN<2Ep+4d{WGs#?!=_JK<)D`r{PDbgEG+v7Jkh1|t)g{dQ4e|G=<`V?S* z!$U(ma1ICrbIn>f3~j9y@a}G+M`I{ow{BZu;;`U{w1U8cV&YikwJY1VEV0-DEH-Vy zd7tO`KxupxrutP>1nc#4!~yDPFp%|>4W$qK7LcyN!N#6+r6@rs>)(kkNHEAjZv~F% zH37SpL4F+v94xO>t=Utl52Q&z}Dqf+bKYP7;{l&vnIm1z$Nb>gg*#VU5G2!ZY&w0Ha zVQKF4cre6dvLMtsNRs*S$rz!O3PC;>V+X{Y(#cX-#S@>O4_ikKx0-b7j zu`kvd_u&B9^^nFJEf*U_1Li}l1aAfSOgopZbo!Tb5Pmib@i2HYjDMuLPHc%jx0U-doKeyr-)df4d;Dwcs zIW8n(wBDwVd5ion^va~z*DEij$$VpB{2qA!RCu_eCV8vfd59%5CsVqOS(?9bfgS^A z2=?~&BVbG1X_H~FN?dQ``lmiQX_@iZOYaCWgm>e;Rlf+JpBBx zFOA8_ygiLy8jz8tdSZ4$mlPi^qPfqg~slTR%4Fgr+z)y zB*RbT=J(7IDLmZ52($|nyBZM|O?u#-kWXNb;pz^yus)~!d{#t$_FkcQd%6BIZjNZP zMC7yUw8z1KdxGeyp!;JXAJdZdJBL;2Bqz!-tT;n8{09x^=ynG?96yPStwi45!XziU zT09G!{_4tInlP55<9}?$l(=|NX5Q2NOP+u&HRXW)`cK&Y20FZ!nj{VD{}+2-9*yPt zhFg&#GDIOn$&e%@L#9ZDM5!bpWQff3JeD#l#Vx!on5q1Z3uFy3~uiIuf|_U*Oy z@2&Tk4h1|WQ{fR48Ju@*^F8h~N>+K)Qm6S{u0ON!;Kj2EDluEGAqqWVr8Xi@h;@rU zjkEusr9O}mhcQ7+S^RRUv;76`lj_>?78jO$a`oi8^tu|BF0#At>8kC8rYjrt`wjZ- z1m+S~qL%~i-Jwjm7k1k^+NScoOo%~`aY6sw(ADnDe*es|j4{@@in#4P)sBIn}n*UdsoTI&HQNt66zQ44IB2G6u7oI8n3<;xAds45ho)LV=tD= zSsJ*zl-O6q96%oLA9$yamhH@)lBOyvmu3^Pibfi%{ORpfA>)@h7(P~4Y`WpXLM=_* zQ#-F4Qn12)q;R{nzSJHu=6B~-se7uJ+osr3Jlof$vh?h0nn90b))kichzZ)m{(bJJ=R2$F-d2Ce ztQMM~VEf4_tv`0!RkdQ!wn==ZXJ}c}({y8CtL$>SC40s=)!sYDn!-N$)*aJZbgty8 zxKf|HkxNC*+AMmd*kR(Bh8S?$Pp&FI+kn`2IkmLC4y+gY_%`l+I080E$9jg%B>(lzDGkjQ_W zRe5Qu%GD{#Qem*qac<~bkIORCabjmLLjh#}uHY}t%_oeFx$&qI%gfgfT@cX0q>&Eh zica)R;cW48dz05Y952~ollq!AdpyHmRhd|~ zJ_>&^#(7|_=S^m~-9DOW)_PCJ7Yj!eI+!!A54%_oXcc(!k;k&uvWh*OchT9{JM~t_ z2iYtP{~m5Nz8ms#cGmDiL$L2So4cb~RA5|JVYcjhBcZbKSE*|JC(~2I6xHQsx|wB4 zoSKVAxyN@N)_?Ty>*WiLpWg9&WiT})6WCH)`|#Vv3)+{@8lTvnsIMn00I2x$Hcmr> zypvn&qm7aCmc_)bG$xoPu9ER;JvusGUt!t795C}}q*if%)V94gGV!NwZp7XF30y0G!fI9Mi#ZOde~MwHcoYmG zWGp_$KE^Sm`}W?w6>Jo-G%Pz~NjAf;z2aKNSu1S6uUG!m(y@5;WAT+| zOeq*oQr;HLQBqXgcHzPW2DC@4GaKmsLaygRm!JDY2l%p3i~M;bsVZlQ#*FfQjrpZ{ z`l%7#c2=&PW4!Fa12qiGX^im^zN%zB^*&>UyuT?eI#ieZ<$9YcR=5{}1r27mnlhx{ ztX$I3h>MG!k<}KrNHJbG*6+Tk;r+qmv7kikxoQ{wG5*u8hyR8CKY4E#?GsVmzhmDH zN_o!(`kqwdt5k7Ka%Vc`nwr;xS4Uh%DTA1v1VpKPDRDibIVMO=Qu^|kLZVPt#UGAi z@gBFPJGY9)x$`Xk90`e)3ht#>JJj68kW^8|AU+^s^=|)`;`M*a6-Kac`)0*i#m(3T zcyt@PbDNT_|7hp`nLOaOK1;rqqw+Q)ptKM|xpvyqy zu<&l)W?}0+i*K$l*CzWoSN_;u>agu~YV7g!JI8YpCOz3}q>63WqvFlZ7BZNglk}E7 zo69qJgZAGq%I3Yz{-jaOH}RwAdcGZBNTZrAE}1@IklwEm^zs9xHn|?V<*vxoxKrjm zm&UcSACy&W$i!86Z`mlSP~cpCwD3DKI6=iQC4xErbk2({avHb#Om29(G3ecp^lUJ1 zm$KT@oxbwXV`=5$_sN9vs)fXojEMD2J%I+pw{PB^Z)5$pF5)_4ka;(xFepI##tWHo zk>tVJbF%k>(k?{*iRZ?FpZJW!>7=|7Wquu;93S=XqmT3=uDfR0?5s=5)_~3B>dpD*0{v#PO!fgmcCIE1JQ5$8L#Gs`B0&D|E>uC$pfv6ByYdKOv#Qqi(R) z8T0Vsqc8bMw?F9~s#$#Vz^vy^d$aQX?ybx>fd!%$iX7DV-i-fO=Ew_$k2?$)6 zlm5?5dp`rpK+*0)zUug_hKZhw*bzYq)@jQVugXw{hTh3xG1_O7#~oa{JhGR!)kvCP$&>F*n6s9Xm)K$Mx4V}&W&u!Gu zXB-N8PgyP0+Z#U7(bGgj7VSE8;G^^$<8Su?hl!iK8XBs~Cl4g{PTwgu;;+u!e)ZRn zeE<1wnOnz}42xMb!Zk|!Mbd**o*g}^c<_U;0M#b(xD;#agK3@_pPQ8nDn2klZe2!Ynt?D`iH_>px7-f`Vnw3Q*)hR5f zk2be#u23c$*OFW2(i-V-@Wxd+cKWh!R@Yz}@pG0f^M^7!?smk0JA6VQ>`V$#BezlwpH$}$z zSM{REa==L*<|?XyX@3KX08Yktp^6mWTj==@@LbdhF}TvcsZBJnAdkMM(0Ags058KK zZY{a^{FozIqjkpBrcxAN_E8B{DPXhOkG)@-&%P{SdR@WP|A+K} zu!Z$3)+n__MYSSb3P4*k7MC$jeIg6*|i;2mt?(@8<*}Rf!yqrQAK#AWX7xI@+ z_oc1&xEUTl)|aADI3lWdY-TO-GiLT-py*Q! z5)Nv}Qu&be%z(P7)<%Rm)W6TDQu%tj*rkZrXbIEZi8eN*?*HMC_j@0}17ndl-pPHu zUbshY2jz}HQr{f;u#$$^hA3=O0_a0w*Ub?^A~qeFh9+^K(J;<OJLRzppw)bhoM$zl!YbmRV+AE)SE- zI@Tj(t4iXm9~3x#Z@qHElbU+R=GgD&?i-z#w9@5Dq-2OG`6bOi_VSL(jDfzF;Xvip zfc2Yf^L}nu(i9(bXX+WomDRYpDbLS`Pfl>N2m4Q5zkNb0kuH8d4`WdO%mO&^-QBs3 zKIGPcw3km!3`xE`=pT2YT^16EC{W9^U0Y#mP;LI_$vy}po-E^2eZEbJn;t$ockyz= ztrhZ`r|X|n3k;~&S>MS`M@$Vo|2m)V__?S1WNBH1(PgHXNx>)aR%?oDXFEP*Hrjf2 z8u=ZclI_%@E&YDVZDB6mYuvQ@6kmynk3$xPIOC^KHK(syq5gxwTGWpsBrTqf8T7L9 ztXFOwTO--!NOthpQ(w#SI;WoMye<2K`KMJrWPjkNXl*@zG%b)aH+9E>gS7<~%xo+& zbQ-Jv26+kW#uKqT0hZT)h-q^A?4;1=*tdPrSJ8V*Q+=(komq(LQp^&+9x~l$--xBTWiu}1B}8G( zs{A{8gf#IYwH!TfHxn0oa725Te6cIL`=eU3Gj+D(bTK*RAwlLJ&oRqIR5>46W+7)I z@1zbG?z{6=D|wLgAU|JbMq+yAg{nRIL=)1j<~O5f z(Nwi5pNvalAoZN%hXbmvb@k`Y-_z9&m_o_n8J#9Elken|(WBTC|9w1tcNJ#}dkqa4 z-fa9F9T=cymM6YuqMZ)A#UtN2>xSqHu}YBpM<;Ib)xUermfm>b(#3Bry+1ZfOOg6= zn(#@zl%%6%2)PyZYoEL9eN|(n+8staluM0v$i;1GYTy}7DcYzO5>{n6LigS(iBCn; zW14PNOY%5>{M#juk3P4J+qLq2Z=X{5i;R>ei7*<@@Q6}W?dEe^?E!i3_(e|o;E2&P zy+wkWe8)MX<02&Jw$VwbrCojJkBnm&r@PH(~$zgiT`mIB`*pZ3-;B^NcL#+|gW!ZSM0hXc4C+~UMHh+A!WBjYevzDETrm87# zQXc5X=Q++F1gA{eqEz&cnTho6D^;RAWlOnOd2)WAv9aR82EGjb zx}aU*l((iQIIROucyd(aMy`8K)Eq0m>+Cczzt~^^U(b{(eN&31!O4BM z@Z4X%e43L;c5pX01I57AJJF`?`Zq$&s&gve2nn#N9${tU9f(dNmFHEyW=iWB*R`A( z#!jEBX_{$l9I zU8|^a=C0;Ly6t-rmOPAdZC-Tj&Il(Omp-$TXIjwPEJ z%voPPO&mQWccw_zlf7$yiTgC$Fz1r2&(uJ&#(fpTdE0` zeiyWhp6>vUwx)uunFF8VndiFZ`ID8FH@S=4d8PDpPI|Per9aU4ugVf}C)-b0r$;R+axnLjap|)gKAh~O zxYqGb=unm_b;CQMv!xEZ<>>2_xU;@es*+GQ*XVoqPSD*^f@gw*vd?^=xcu!-U|}-7sZ$xjVc|5I;kHGKln9~@l8!{W+pa~`Y|td$eo;FCl?tR7t^^@{>kc?bKZkcUYg?4 zF$sw@Mm7G3IEA#HQst5`dHL@0u)sT}(e3%PA-mtv82JyY?&gZCnPoFH&SQ>r)Vjc$ zBRA*S8FIo~o#{`9lVP13h0%`oX>S`2GXJ(jx&X_*l&^(pjL$DH$B3p3>+dED*sRR) zn{4RR;#n%|r`z)*cy{vhT+q9B=~A#{CgrB4g}aV#BHG$iH$G&JTUH63+OMK+MwyY! zEs-2qn0N0MEAM@)smdFAr;Cl5Mi#}SX`dX(o!(*lwWHv3o*8>6`#V0t#Dbg$(^2J$ zC(E|z66q}5AHDm1`$Ph?j=P@S`Yl8+nwsv-S-)C(zEHOLVdLYCBfakHm9^h;A`XQZ zz0)BRby;6eK-DQty5S#um+Hp_|9HU)a5u?mb!AkZ;7^m$_3|=Q7UD|ARlJs+%6joCmqV3N&-c5HG3X=VydFw->gN{r@ceXdXbv)II%^3BTFF%qv z=%vz%gOpR7INBbn^F*3B&p9oo_b7REXns_RJ}^~!Uzu0EPD1IkOj?d#O7k#ni(8iQ zKNSN}{i3r&X|(^D>h(77Q@m{KrTTh8Y=*O?EUNM11AJ3b^Z$&6(8#ya5JRi~{8Nu% z;QtN*D9-x)&!JlMt1vwA_uYyA?5F+v-Twc}{-56xSN5UBg@tUi;&h_^Q!63n*>$4N z%Nka<@_OUi*cL5 zeALyIqGxql8yycMeag4gX+2aZD}EMe`P=Z;=bu+QZJ+2w36G*zv-HP*U6!o@zn&wo zu);jFD*NTe!*hHO$20JHIf4Lp^!Y5YfCFm#IV!dv1Fgp7_boh4Rjxb(xz237*Q1`b|Z*#<`?&PbJPs4?M; z)~|_)BpsJ07UE zXwSJ#&f#$%lH7qfqe>{Z65>uaU5>5WSq)0yOF*=+i4o8t%ui#m`{O{E+^0*Ib!#Dm zxH216@e$M3g;3SEYuioSih6O^okRI~d7zIV!ZQFJ41?B8-4G7B_*a?5=33EGuwcSj z-A^Ynhvhcr=@4M z^>T5nC=ox0VCW!JZ4VX-IYYVG!NI`>GDyS?abBG$uN75?AmEt$G)|FSZ<|WU8~S$FK?WKM&_9VN;n(Z2jL|SH`~(e>~QF&vDL9GJ|ko) z`3@dr9ZiKDMKGMc*y<8-SpM~KBK5~C_#-nA#ky6J4P#&xfsKVl_A zFMRgYyNkvAK8;Kx?V|jW>t5L~$@30QPEb+{omyoMkQd#}9Z`S3-YDR$PR~+c-2kS? zEu>Z*N-&k~B27~X-wtBU5-0V-T*^CbBy?;JpCblm-(MZP6z$I0G$%o5KaTj(hYm0r zy%<^bs#>3ksyEs;8|KwCw@u{d=532w&bkHq)jA8VeFwhD9{x5gTtrOT6T1`h#>6OY zmgVZ9rUPWd2fd}p1(%#E5+G^ns^`+T)vt>4e%vuLJoytOtPsPO-$PZUpT)_sgcrsS z@6;(twCT`$F%j_A(mgTXefngR!dl5CPua!QP7F(z&b+JE6IjyvX&HU>f>R0XbFL4juy2PLCZ95{nhL3r0tl;A1%gvpg zAqHSP)J0;RzkF%lmd8w8{q*_sTSg+v$Kud-`ugLC%7qKfpE-oAO)&$2Babxq-W{d( z$MF2TTGHR3&?FNDb(S!XBz65T=~)V#zczp>SCOFcMZ#x@STN8~#VEn~rT1K6LJ)0* z2T(j#Wec1H+;Q4DTFx6sZ&)0myupX#l7=|0OS~ASDj}E!_Zyo-3-tPxUC`wJ`)d$L z7rk`pl7LngXKQlEOALY2Jmvh|DHJj=`{`xrcKeN85br4x+{+8ec6mJts;3Y|!!1#i zU=u8)`m_CIMnFal1$@OkN*h6 zqy`w8sYtMyK%G;^uAZAYpbC~CD(ICZY^)7Kp_ka^r8tuR@$u1McnZPbrqG~`La%9# zX5~{NuD*KD0FI@r#){p8Xx}}pt8-Oo;56Wz>o%)TzhFVq49RetiGXpAKeyY4u^A7v z$wD7{m>8_7%JYPbny8rAE}fDBi*`TDj;*a$K9C`Sz1BG!%bt8o*5g%q?)%O9|2Zna zMJNQxVt?nrFoh7JQupdzwSIwYbE( zHNhQqw%htA0<9lmU*vb)K9(2qyL9?R6V;i-by<>yBDNAgwwS-ewsr#4)x94MzW#}S z*_=v@uXMs`uiEM=qFON1VcB19JgU)^Oy$$JuV-F|;P_U_u2UyYoRBZ4O3us-#d&vP zEsxH<#rHT4f!E9iX2;t1|M zPcFNRXV+=r8X+kl303~(&lC8;z&loJBOGdwhAg#Y`tJnd9Q1Qod*CFj1bWs(ot*tZ9$Oz&T&B{CRa^Z1lh^EQ3>ez3GlFn8_ zFq-C-lxmukPt+tX z3_d(b1uy^3rZH07gx1>`d)=BFcWVu$rz=SqckZ0|66W;~{#bw}O_)!@K`mRToWHG; zT@=>ym^mp32OuQ=WgOA^m}A`VH-TNh&PAMJzy_U+E>hCm7v7sH_vZlGu7_}9!wAU> zL7X^kF;UR-35lu!8Wf+fJI+Y&F~y`*k)Uf$DRkn;yUSvcM1x$UKf$T1PICB3bv3OV z_S?;6NGPefx=PZ@%+ulgP9OcXvr-AA(ERn7a__m06Z{BPIWT7tEEyLOEMGQ^_n1{i z4;Ue~NYK6}UAN>BVA1s-9UUueb(r~DCC**x*i>z;8{~gn zv?I8-5a+|8qb8vJ=I#yA55NLNtCJ-wcf6O$VVA*1*atSZwMnE1hG<^8R10~>chKUv zW!X^*Lk$n8%l;&8z$0T@Na8nt`6BGF9=20PMrQ5TX*ok27Sr)uXjvP*Tww?eI|oO{ znQD&p`(v*w;xDL44sMuEl~*V6FOr3N7Z~S`#Ks?2Qg}g0`doa3?-`>!W9d^LA0G#* z50{Qt9t<*+-&z~wK>u(bztZb{iOI()l$F%QJjFYln^wuE;-fy7G3jNFtw@K-7Pret z*$+Kw@9O!u7C;1}Qdj$eE18g>CzhAv=r;pMKtXd_?TLgu&bbk&<^1{c)Eu%;pq39q z&eKpBnRE>2-T_n5aLfeN>ki3Qj9Vu@ed^=qw*{yDPa!3a{Kz|ahlc&FVRq-(v#y

    N?U8ou1m|v5GVHl=u#k_ zc_UAonv4WCEpXj{Dxo5*MFN9^xsD!f$@ozL@vRVMVLux7W1ov1Z$5~KIB@VF7ccJ} zI5&MQvNt944zNJq*Vh-}jR&C2iVaVzq@wb+xjABf-oeAulSRUYQ@6y4TU7KRWS+EH zzpb@EGT6hz132O-)I=0dpT2PE(#iAZ>7n5G6e5iCzrOfCdBO%^$Vo@rDdyDXdZ>Hi zklPS&5TpI6cscL*tSljDdY*o*S2n%4$U|JIrU7cb&nahFVW$h9aS*B8i*5L&emV})>6bfXDIlGJhgiG>TfH|LHk%l|&Dz@HmymVQNK z{+s3LrWY2rK2?i5rX{eeVB5G(eQRqJgc?3$J@n3+u&S5z7l}$9znrSXxrkSnhNOSC2GFvGQaXtBTX^!9E57N=H%wG9CP5qslD8G zflsOzU$tOzG*xpOnX|q~%ra~eD5$9+zPJ|`i#v%^i;6rEo9S3s0(AtPU%BhyO@=#`U^o1wwyYmcD-8>yH83!UR!wog@=fX`&iWxW<{}Wn9PurVW7Rh`r4|-O zA9EbN+XNj^LABK848J15;iiP)Utf}>;Z=w5@J#bnZTkx^FE7%qTZ=Bm-ptd6a!nNk zqFtabk4xDvEKDD#=6p8xYQ}eWt*=rI9UZX{WQ7d^tY{|0Au(M8*;jc@&0Pql{X;_X z^743?MJ=0>e-5Q<9z-dZQC`;>)I5^Dupt(^J)-{SqY@vfIJJ)^M|dJe9#0*b)Ujp? zuJvoTSikzcYw*KZSJ&9Y?yRFRs?m)@+J7FGw6Y7f_7FDF<)fb#oLU-Nx;0;2D(#+d zlKT7J4Z4jGHmt}k`V?I9z3WoNtFb@MH|9zPtr~)OXI3XwZ>DB>XtQ4eo2}1Y?{UFO+wdo;o-; zVM;k$OP~}^VWgy_PjS=ooxT)>llSCHY!8RllI4 zi{IyU^T;{=$TeuJKasHYd;eaEsB>G)GcGw_mYcGw(z#Pzg&M1tW%%;7X4D4M^MzDN zUz897EPuSIYHel0%2QUR^yBV4T~6IW?Jo;j7ii1hZYi zC$|l)Z>R7(uA#95c#R9Yf#rxv;hzu-|Efwvqh%C;wRM6zg_R8I(()c2(n+VooL=ob z?Z=q~4P*GxDJd!GBMuu@Jc@{@hLz-5{EoWx%&n^#nfA(O&l*Eo6P98`rnsA6%)-J# z%_nK;+8PUMbFNDRzYD4z7+jj zzp06d2vlg{f8jcsmfD%q*l2Qu!)tjj^tyLW&Ee&Mz$ftGKq(STxZt|vQhQQ)eZUlG z_4z@Y76RorzP+HYSevE7VNYos>aD4(J7Uv|udYGFB8khpOL1kGDSHcRi_PyF8tgm7 z?li2ng|JCJ!0U-0wC9>`<`Wb&LAD`YtY5haL37Ghc|&_$tV)NLmKH{y%%vG=X^Dt^ z{j~d>c`3GRyS?S!DK4)AJ4iRbYEglZ>6GorWGQiIRCG7)HMNbaQ-37ann`XZb6ol4#yxrooABsrv+4iFe(B#q*g;am^f|?b0@mf zD>^zy9?D2dPa2p_rDKx(bC$u0-r10vwl*z9Hs}vgk4mu7K*mUdtA|}8PHpYapW2ug zamO6rjyRy}t9$e5TO$#A#1MtV!pkSi4q`z{I*mt1YgRTioEdNGn|)uPox!tz|4+SL zbD@y3*~`s+8-?~`jJ9JPyoJ9f!gc1(Qfx%4mWs_cM0XQb$0)a0e-v0Z0e`6K=&)hI zvE6^8o#Eo{p44JKWoyRmd{th4GrVes@>=z{jvR^j_3H*yuv{C{FFBt_H4Yuk80cv) zvv-^4&BDfI4!Tp}k&(5%RsOD^nYv7Tx~&ShBI+fa#+i`;_w(}yVXqn#bYM8}02U%C zRH0R)g?u4v(~zOrwwXv15cmv&B(3P~?XXdKXOcUE+#>{0OTcUb@45`fId`TszQ!!s z2WS)4^c*7igY3FJTn7&ZY_J%$i{0#orw)jSbi<2# zKw!qao0p0E24MK>Ycdty70>z(alZc7!f$uopHk*fz!Ww6dJD?sID6iN^Xh3YU-m;} z)c|TxM#jd%#vd5FJ?739nAw+feS)THHGZayD+VYq##V<@)C`o1nzOH;#`lgEx87r2 zzLL_Bx6(CVKsiLVbt~@vp-G1_IPYN3bxodLZ+y{WPAs1-NCE*Z=G-D8vwNb(KNZ{b zZ2_n;gf6&Y7O#lN1C$B=*pG4_v)sD+7%iH5ee3gp#6!EhZoKBISs`G%w_`a01w8Nc6_J&@)5MQ8U zX673TdsO%u_~)StH&!plCRD2fFio?PI#>b%0Y$|$3a~&DO#n7hGP1AfSB`8lG&Hn1 z7Kg{x)$puam_ITI!~p{vTS#@^@nns@iHWlSS9*GSh;myBb*zhYLe)}B>bT2u2^3>w zw4Xn6TcpZuPKuvHLp$i`IP+aCECl>G3olu@6qXLxqfApr^1@94`oxBS%|PWwo~yEQ zy^P>UTmeq>5O$Ank_^EE2Qu{W*(7YNkGJr-nzusmdZ-nJV;j@2RqV-aSH+dY$H(XW z&`G<9O~QKb6p&>Nc4~IV1X2vdg*V>~4Sfgj2%^Sa=Y!NiJYV<-K(GlnmWQ8zTb$Y* z?TpNpf|3id9Ts8&yMiTtu`x20IkO*c4rFI%uZ>;Xu#45L=$jpHK=B)v;PpEY zJi;b;R#!qBnnlV<00O@?O-*5VdNY8zdbx}nuhXHUC7@gUkdlE1+HBSCQ)Q>|-LWKz zY6}^l`Hk5HJhY#ha?4}i(yH-QeH$dd&%4PWXC)#s6OaiSvNRc*4JgD;*M+hpBPR9UI%nGl$C*Vd7LKILU5Y(4nlN@(e2e{gt<(qL@H3>w|41$QIaE2w8Va z7wj&)(VoMN0$~WJY7qavz@#i5yxc|pozQ^gmX(d4o(6!7IRU*jA~P1EYRBSz55{n2 zX{m0+K?K1PXOzO4M;PUXGu5*9*~*#ZvdGib$m+godDc3X-ELvE&^`%I0up3h3ETc# z%UX_(N6quJJISj$I#>_GK2RmQ-)^!~66mQC)E$`XqbkWe)Ya8{XHzOl+$gwG3gQTeS`$S3;IN;^|Y)6t%dZ7W0#@Ta$2l!@UE6<=VAt1r~GI z#VEGwP+JJcsEV3@qqui!Am37uVheon-d0!pqngfgU$TM&$;jtKl0AF&w7w~l`lKoC zI=dfmoT%S_4i6)(OwZ145mZ;0TUv51xmgC;w^9h^dccvarnYtl2FH8Bur>_l{pf6J zYO);7{Bi?}kOSGw1k60bteDBmn{>VbtpptX1T0nzKm7=lGZzm0qQCe0;)LnoQB!b>E z&IOCKG&CjvVX6tT>{$C`BT3JmX%*RVo8(@RW}5tfuZ!a0**U_kIb`?IAZT1O#;69s zH{xhdi zN6i`Sir9!-O=Wc;QUz<}!mEu3^+u3e41$)nVL`{28>0PTp3W$2SY+Hpi34S7skxf$ zu(CZ0Dl=3e(dJeA`^#>yGQ-c`v`ylkWa^gKEl(S;(VWh3KR&zEuaY#V=HGm|q$S7v zvYvzU*wjsPb0Uo*W+q`@_K&`ORn+0kS|}|A86iI+jzmPf9_i@HZuDN8IgQ0ku&Su0 znW+O%Q37CLWpFYb(> zUg>+(IlhQpeSLkcjjNMt3*{XIOk{GV((4Y=5v&*@?~df^m>l{*Up8i&dgXnqeNukQ z4N)t}>%-Q{6!(Msa8|f2B z?zJmo?z&}1r>=kN1J=34MM$_9h{fV5U={AzxpQ#w&!5OGBqZ1kSXf)OY!TXh4=jbB zzyFlum&q3d1V<^3J{5`1I?c`8_L|OVo(M{LI*RSv%c5#a4+6n3BcS$ptsWu(w7Trd ze$7`qlfK-=Ix!MK8JL=qA_AW|l-eBy)~VRJNV0g)G`)+KlPZauqN4OyfB)_u8^hgv zcSAI2sZ=VjNG17gPtUbAK~_4t<62tGNF8kxPPSw@^+tfC#k#S8{10Md&!nowZJJ(M zI!#B4pW)>uxkZ=ED{(l%Y2rsM+F3VRGuhGjA*TJojBCe!EtxegX@&|qA{>vfa5ygd z9?Ug&aBwu0d#+}h**pIj9<;K5y?4)lIGDhiv$q2Ro;8n1IQ*jHi3psk*oen=Ea^(m zc!etJ(?_c9+qnVD2q*$bis}HIpUtby89#p-0jt(S))3_ycmO{?zxPc|!$#7{Lre~zhfbK)k*836tpk`Q=rID&LqW#? zl@nA!?*J?i?*zZQwjp0%-=*UB&fjo0^R%6v2d{+uu zTIX1?)*GTk?FFcd$3jlK=O>^Dl2$}iR2^Ymm{%k<;kh|#sl?@H`01LPS6;PfXllNL zfA$yJaW=~4=X?4@2%{@lSfoz_ z+EaWaemRyoI9eUYw_4>c0*QsQ}HZThY>NV4W&oDE1#b7##E(u3 zF)G9-dK91LGJ0BCeDhURSJ=~2dVUDVX%q&JjHpTuVS~?P!z`f>*?kxBi7?|RF^=syDuG4YBzQC%6~boe~8II#eSaW`kDX!;eY<}R;4PP|KmMYsP6j}{U7fz%fz|W z{=Zb`zc2cE`1rOVmHq$QZ`WV*Vf+{gM)cob`}@fcDmER6K1dLy|I1%{Z`KgH{NGFZ z_owSy{%B;e{05fT#h@@r=yvST7_&wT?I$uYC$KOFM}nNE1qw?@ zn`z}3QDUbgEP<^*Siu+4$j-EI(@rYNJmN15RevY(gt zdT#srgxy*RoRNUGsdohrs#kos5VQY1Z-(-5N})cJn}Voy`_Xa%OQBOxY*h%`8^jmX(t3~o9RboI&g+2)>xGT{YC+gi0!%EyCiXvatY8534JQnu zq+D3qlGQQc0T>3ajxZ3YH`n!0=)kEr?Zt~_d?#Fxot+&-#&)3?FvQJzo}9dS8ttbX zlg7szFV+N|N_3^W7e;5Mu$Bn662+X9o%DZqtDQC9l;h^kxGMc#giSQhBO)SBC@PXV zm9VEa2R52iTZj?GY`xL5;^N)673B5xId|V9!>r*LL@Zp~&htltiavkkN?^MM4<8=|U@9tE+~$4Amq^6;wpCSCKs)dy5sFa- zon{Ai9MB&%VbaX=6El$B8e#Z_!Ri32cVx*eg!1Q=ghGsCxcI}KuFq2{jW`Cl_+vB|< zgKP$j^Y8=fwMjqDa~c|cwTARfk@)!3W_lF5I)Zgp1!$Pu#%sl5&stEy)g}In4{UdC ze)7dlFw6!}f0359S4fENwt}x(>Ir-}UdKs6P(hGvJN;yE)ZrM~7**zZAoV<(V~lVy zw6tN`!$qZ~Y-M_3e>pr-ytvUMsjv_Y7$R41-b^e+5F9miKzde^H*JvrJs_@)UHZ+f z39B485efbM;OxyGRDw_sU;4v2>m^e#paz{DfT7`hjmJQ-bZ!)qjZ2S=mczI_`l>sj1(4ZRd7 zkkga)1uVsY-g(^`Jgk|lLC?k}u&3HFY%>6v-Vu@oH=wy=aWr_AoD}G8VZEUrm8_wR z5*qgf4ZoRXGzFwh#bWOT1#QC(QcGn(VV$jBPT%TUKKs~dfgS~~VZ~1y2|g?oliWbh zIP5khl$ZhnXvTdN}0dDq_p) zBL6(l5hUgXh{sP)Uxmky?$q7 zzU&_^`EsM?FNaGM8`OkvF&g6~h1-`M;Bmuczg&*965 zENMIc>pamGz-)>Z<`Iy6xQM8WyGv+OVtVBZ*j0FGvEvz`|Je(E2X9ME93b1`ZH(Zt zC2INOArLxxZi+W<@Daz!Q&Iv@eGo)<*LT+)q`t7*3y+BKS4)LK@GZP)czAdff_kt@ zvI0+pu=zJWx5e>#fQI*AJOaV5ndF|Y@bV-GCNP@Q)6<&-52vDmpA6zxe7hKhUnL5d z6-4{NYASB9XJ4;nC@U|!;7&Nw)Tf=e9lEMfqHA-FH_+%5`7b^s*Pe$9`_^FGcplvE>D7sn;>SgAUhEJ4w{6^8RG#$EfRP zYZ2=kYkTukJQmf*q9ZA-C3>$-(II^P{CV%lh_ad*4Hg;Tr7)NbG+)%UwRaO=8RG^7 z2|PF41RQMEoU)x5+(mkWBO}psN3Ir$34!J3R}c=dG>>%QeIA3^C!}gTOFW5N*qqVV zyjBRJ?de5+f|15o=!ex7jL8{rp=bwvM4e~tHjXkR2Ji9}p2py(1Y{?Fv)OJ}D<->6 z@C*pa7-~?UBL#yQgMe}Nf_mX^ZqTlgbZ9NH$$32iAz^#p5cY$Fh%#D?K?xVWeEFgp zFCBx|cE8zkhnB+xjM(4#Bt}M<+~Pixkkd@JT)t+I+m6^ujNd*wbafWAK7o4ChJi+I zDk29)5+?(EM=|1pQs2B`L|+2{5_1=TqJg=Wa&vRhGb$dc)3|bl#i=9?kq9dQcOeC( z)2{_LJ*cd0%D3uL!9}2a#}EsV7C}(gaC|=<%0`sb(zEZm#=A zSn0%g58`RDfvoc19t#(G7g)R@!~_Le>g9hHuR^Gp0LsJw7o2wYa&eKwHlBi8rVtt= z+Ic~U9RU59uHZ)m$Ct&-9!3pU)He}h5qK@Lq}_fZef-n%eRh#w`OKLC4BqwmlJ6 z1cG@@O^tq@b{ghFK#<9zVgem<8=y80q*bslVe(1LF~HxAM2F^+>Jy1@xMANWoSQ|h zyPsg`XX#gM!SshBN@^lrAOfG@iI_nLikLtlwlbQ8N@vb6cvJU`>Hr=QR!LapHK5ld zJeGyvL%JP!VGZS+^!(=&L{k@N8>QL`sv>5zMDPVtN=w&Lh7W?PBOVQwwnl-KE+EzU zS1mwsvTM^PK+C-cKPTxj&B-cm#Yn;nZbuH?fEvSWA&D2nu}{I+7tq9|2rikCrHr2#}Fb{OhBVOUVIlFj1p8_mJ%V$?ts`0s(y&Es^`u2H1m{ARaw3{S zP^PWWmJ4gmbnrvu{Af!s!c7jUN<3UHvUk?+uv31xpSj<|sqe;J)mM2$Af)ii_37mv z**y5-5;I-^5i^MD*kH=&F{71=g%yLiiWP|!LxLAh<4A;t~~NMYS0Yg?(0n}A1!q5^3G&xNRni4!9j zF|d+c)k!=j1hX5b@wO_k8I4eoSF*DDD^id4d=h$IxNd)t2|=DqFkuuG`KCeI2l#DzC->vz{u2@393Ld=EI6!gkwVsUE z?-5>NzKUo~;2yTOw-X~X=uv*D@b*S|W9_yE@HLA$z#~9{fMbYc@=0G%x~^b!#|(Ww z>;wUf4-kV$Shu9|Pl29*kC|^PRK2}TwF0#ym%+%(Rz-GCK}`{DQEYw&CZ^NiXe8}^ zQXp1^uuJVKlrAa88Y;y}7re{Rr?>{NTGg*Ux^zSSH9vP4a*v_^R=fep8$ah~c znbPrCSrF{ugCEF3?a)0xdGaJKA6W}6VQGXCZ@1U?Hg*9eT=V`DIX-TI=S_@j!c0kL zW;p@f1~wTt#v&axq5%nJ|9v>u{CImrWo4z*>>f~(c8AwmN$n5v^S2dPClsF7FgNFC zKju()u;JdBGiQjqhsv0E2M!Jn^n*|tG9g2f2yQHc1S2*fsA$Ye*5shIEnMlAhYL{U zi6|89fmN|p!Sy`tTH`Wwh{0|oYNQdJ1=x93q3}Z55*8YIys$OObTc^OKws%R?X$>g z$KobaVoA`lusW6k2+cjiAE~0FW{KY3Rui{{(OMKoD+mIRC{wRFo2s*5)40t}f(@-q zOj;d^kHzNX2(DF7QlcQ3ufYL_$m+<$Vp3A9U?ILA29Jm-Y)mEsvEv@12;F@z03)UM z!32-S}>CtB;tKtpOlc z1=IuXDea45m8T<0Hvl7Ie-a=K4*|W@6TlF}Jkj30n~y(f8SO9jLbU-)GGHDsdD~5G zio30m!H8>t3Gx7_mPVPuVp`piGw9ALD=9tA%A!kccI~Qs(_(InGUAD}D@39xKqDUD z=QqSFL7}3mtHylO%=|nth=Sepd2&ggKXk4vF^OHrD({p=au8pl5v5upkS z#6k1`35`3Iv|Xd{^8uJ5w?L!Rt!zC-Qc}0IbZi}&($ITEd)>>p;%9LH%94XL_fgEM zmY&jW^}{;@G~KC|$Abb9-ml*Y>d{w6kf}MQaI(Im@3H*oXcQ`BbQ$|S+W{c3oe;6G zxQ76(af(PZ!#|Ur@9vglaT`&p?ie7ZMP*DHV-lhf8G%_&>O5o1) zD#Kh6E(4^W^%bVpUPD*cBA3I8Cr^S?Iv^nM9H$9zric};!OaM(xE^_UZWB{q7!ARi zS_?RGmS{egxg9k#Gg|{V#&iGy-_WvC0kbbDDM>);O@=|VOn3m8aO&wvKml&^>+@b} zvg$+LjMl(rSXcn6CqXJ<3T`|i^QTJqPrwR5(HiOg){-U+sG{lc6Yv9xH6kHF9m%xB zd5Q;m&bdWJ0w^y@FQ2i$TTrQhJh!bKU?Xftm3{CJP>L@@!u|1J;4h2iIIMox@Xu^eFDX_oY+OjQ3lNT3K zX+ZhzxX{SR$jC{tg+Pd^zUtMW)Y7kcc9*79XbUwbr0 zZglc-T8hM=gIQyeYm4TRcGc%s*X)gDfXs?8V+Sy_pO5b@O6OL$$%)C%Nw@e{R+&Ee z2i23t{JSoZ*eiKhC?kRXRx{m&?~j=YSn+bN;YoUN8D| zZ>$r}sl@z023Yab5WoG1*eo!vcPB$YZg$%^?mmB+!9$k1G^Yn80yOz4@K?g2kS{`Tp;cNo#Ix);_^>}JcrFF~{!<88vzMD2vbO??E0M^XX*g76OFff#T@J{OL%M15@T zc@2#q1X6}Qd%PNF4($q#j_QfDQb;^Uw{s`=C!Me6dB7XQndk4Bx@Qpkh$9?0hoO+6 z2}YOzZo z9*QUIgjyCpgf%n~$ul(n2YYYfm1Wy~i;5@+0xF`UqO^dBbfY32N_V#)-6;l0!$Y?q zJO~I#cS$!A(p>`5APr~T-?x8b?7hc1|G^pKb-Y6ao;$8P&~& zgU8mYtP3hdoq1l}Q5a23x|?njR%J!d+XdmtWj>hu5Oye3Uui&lz&!(A`<9n?mz%$8 zhXi(g!bf9M)Bn}E1Gfe1F3|PJ>h7OgSA1YoKnj3rw;9ZmP;R`25*y+>SWH1Ye+SJ# z{t0k|D<8lBq+SF7d?fzuf+v!(=hSS-)&S2wRvQB(hV zQ-VlINCzxX*B0*!-nrb*6Tr@U?gyV+tnoH?S$8r|wKULz%N!QZNRjCcSmqh6RJD=M-GK}ok>d>Y)Gpzu+xZxg&jI^ z|Hxu$0Lnoa3NVg%RTPa{&f)d!?AQT$+!X8z95J9wv$&t$BN&JM3Umjw zhEgIVEmWf*If4j@6d6EgT>f0dr@e`M093g^IzGF3Md)w_69hYTIwp;q5cj4i08sUq5;+gJ#REDs+(bl8|6g?b1|Q8?80 z@c39q5vytv^wU&O@gc)OM0$#>c>uAV@MfQLm7{bA7jbqichuw{|!<>S-P zlARsf+5*G|cYC`;qZ^6@=q)pVI(slz^C0?+&_?!RHNIvNI5?zO_VTJNmp26Wf)&pP z(OeaKn31pznP3+NPJmkxqMAX9qcPgXaIk z`@#z-Lr}PAz;p;TL?0w23lonjA#spT+q$~ap-SdX$}%iaL+Xn650GPrZH;|%f?I#ODQ14;X8QKYg>l>_LlmuDN$e@_YiJZrN`ACiXQImLT9xY(%v^OM!q3Wjf;4 zM@Vca>tROuCqF-Ipd|{+H9%k(t zm5A_g0H+B34H_w;UjSez$Iu0~W(34J5HD_b;>BS5L(`ZX=r=&e{e;$!@1|hu2K;rm z?&6-HTO`mVh)&x$3yA6ld=hwHBxPi1goRT8J3@8_*Fv*NktE^~+KGAi`Jz&A_(Te- zCQ#+(E+qkiMWe+gPQJo&12`rYLeY-6W1u5^6j%~y2t(RLA;9fSxo^rVD7*jzJ0$D} zuzV1~0d#1{ov>LWFMd$THsvx4Ygxzxc5!_vk))^ zIXfD^Un@gdqy;j0E~F$`I+r1gfsaP#?ZXJI-t~)hia*(6r4;vu%gwNqt{rL0aM}K12Bs~r3Fo>h$j_cZOtbuIq&g1A*&y11nA}jA1DC4 zZ4XT7`Ge;b1cnYsATt;E{Uz4Z_o2$!S?okC{)k2ZdO)?l`^m`(Sh76f(EtyNfKo)e zekkG#b_G|MHvoOKh^m4BIY8{JaEu2hLO&vCihi1 zr}Hj1HgGE>V6sQn1uUcqm3uQgUA3?vA%$&kZzI$@IDkI_=^F;&fZebK55hdKK?r-8 zo=$Vg_XZ`xm^BmK$a|I!h7(9mkXryX04&wpn5aP39Xu~E?@K|4EIe~Cxx&kQ2eAp^ z7`J|2FRPh9^G1kh095cpB=@~fZJ{{3MnG_Pys2bm7O3=QizC>K268rFwzl>CoKp?5 zJ1EK1dx3dU8QJ?}@TRsswb#G=G6PS@oVWE_@c8`3}IsuN>7Uz~?`QB@C=RJvX;1MJxjY!_9#btyfSq8ny(@0tamj z9Cr2bY7QctAp;O#pusINvJS{<&3jD1#p=TXhZTYBSHOLMwGmDn@@64Wz+Pwjs<2PM zi8h8mQ2$Md5z6&uJ6AV1AiE)8BghQgr-;Q9_Aw?F-WPZt+;2C5J^^+BIsFD!8{%G} zypDr&>x3Oz$bjB@30MR!Sp*=v!=Kkk|1V?!5bO~C*TuyJL2GbVwgdRN-~#^!Vh&^? z*tc-|KjEYs66iVx*A${>0KWGEA3eAgf)uihfZGLl_iSQ8HdmzxG>~zEF;^Ft&kfI_ zq?*Cj5XWtu9P7sgCLf4l5Ti3+rQEf*0CDIH>b5zwF77KpUr=EH97HH&s8ONgstqUNWPnSG?o5L zl7?2JnD$e&kw$b;!Q&Xz2(9&7uBndSZrW2C?wy4OF^P5x75A1Wc-BU?mrT`5jpY4k zr--ZxXE4IbHw{^dmsaNw;D90%RuRHT25GIj6vp@UG6yFhr!6 zB#FSUag`h@=J~p_vWpfd?RuSSckkVInZbYwSfrh6&hSQ&n5D+t0tR{!7_;%qY|w3c z&@l=0Dd3%v!Wy(s7|pZOdvW@omfr1cTcQ1|SF`(Wl5`OWhq$p}Rqs){2^bb&-`v58 zxtZAv5J8qBKYSofvgp>zFh-?;F$B2FM2H)#92`xjrvh;1Xf=#`P%(R$KP=4{MMZ)K zg@lpop2-V0BLhuS6&o-Tqa8}t*s2$&%J&-C?M@7?x({g?z(nlZpVGkJ$bKXXD@6?6bTubfr*JPjG{@F zpm`y1-_*e1eRA^s+TEm0RzALtkr5K_18476NN-0l)S+Yg;%s^sI-hq-@;!rLU<4!! z7s#)OW5K#oo&FtJFq`en<0U`eu*i&dT-weUMP)tZ&XkLzl--(5&rlf0#8m15hy^*j zC@v{!5r)ysSzr9KR)>a|i=8JIFFZX35tUUnPb(GN6NqCMgp%D{YtKOV+1j0`{H@T7 z!%*bRq<7|~Ioa8V3-rS6DW~lzYO|MGTU(D_U91QfKx5qb){ToW7{_yRI(%`g>$*Rr zn(KM%)Jquo;3Js)(PS#L8^_Q0{nIlW`mbwzh!qnVWie|L3$*7J2(3?Csh2b}a&iJ| zYlQ%ebpr;0ux1Je5-%yH91mhs$yFFJbK1i0BqKk56|H&8s^YgIP%%u zogc}--SM1SRZhXM%)nb=S@Sm}IM{Y;<~dRXKe(x9X7(BS;$SWt4`2s;e0;51k2n~6 zp*d9xGWBoE8TX-Yh3zhxUESU4H=?`#KDV$~L)~wxlgiVn>1b>F%y|=|Nb&g0iwSF? zn-%W^c;7=_GcJo9^3 zR?LBJ`-nE@-CuS>dLz2ul3V`U+fyshV?Q#B7km3Q_YS>%*0<~qWDE*SEJLQE9OdQb zhk{m2A327M_a=TBjsxUmE=Qb@X^?CZS?LW+yyBlf9X1qy+Ev+=OIjLeKw%9ztJX@1*neS>AxRu)J~({Sv`1aChhRo?Na@ly}*UC}WUb_ihU0qvX>w3I`LFxap++{nB z_UF4%7GSyV-Me>qc22ANAv@c_`9HJsR<(v5H8eE7yOeGE0*AKM{KP99d^FKLJ#uTi zSy_)jPKvdMW2?%wSdbeWUuy0jAMXT;O`V?bftj^aL3nsS2P{hw`P9NWe^C>*$$)|sO{3Fvf3KrEw2M*bTKFneVlq1F?jqodq7_e+J^fU>q>Jy1+5vywEde+LMTZEYG0OP^3c!5SA;TqoYjk zWBO#zf9h^-ZcwOvDJqKi^-KOgKMu3%EG(Fy2mgk3^)?nnN9f+6&>kiwBbzg%JR!Ls z>PLAxgAKFxz^~B({Ub=dix93*)SRYV?0R4GYgECTK!$t4iblLKhM|Hjuz9wQJc^5p zZD9TW&ewef!N$cU7CaF(%TETpk7B$f!P^I@Wy`(vskN>yC9+G9p+^vD5y}Y43q)#W zqB{Zt66ttJeGUEzIS|%hM%U*%ge$-sg7O8I6d&OPhs_`gW>JwMGt=E`Q{w0kwzd0T zckz0=3?r=1DdJvz^Z3``bbsBlP-ty^eG!)BJYcIHoZZ|0FAsGu*1UnIX-5uCuJ)!q>4ZTzvQ(Ok%E+&wR;&)evOQ@K?FR65jX!iB@P*#25{=|c(|_CDD!<&7oOns z8H>H?E9%EjYv1mNBo%va?EanxE6iyFHn?BocKKe;q`wR{v@jlM< z?tt3<`{6&`vsG0@shmEUIyjtI7HWl;2YkQ`|MU?7tc%kn>vOt7=3N+4lnCz}<`VIq z&r+Uah{_-K$J91KHGc#n3LBAwv#_wS;SnDZBQdmZY^-DYw0qhH+$y!_`}w<*n_e-H zAd%i6eUeic)Wo0q`m^w9qj2?Xx%`8h)y_d7A&99E2|KVdkQxu{2(SPwLQyb(ZE1NK z3`6j)J%lJBrl~N9Y7gf|Wszz#&O5NOvo}Ie1ST1#?!jAt8ci z_Vz`}c@Rp_nVCQoz!~1R0-H%lNuSFT#U~`dTm=;#Ze?WvXV9ziWsMKgsx(1eS?rA< zQ!5CPSxNzUdB`qBI@?N0q98+SP$@u)0ZdRN@P&;$qNJom9TOJ_cqr^4q&6?wLI1ZE zj+j>+C2;2T^_O5uSt^{`?(dHS+Dqv4rYY3gydR#2wl+dN0D@qL|KkDbux#3;(njgSZK;*iyF*oTnc!f>#wgvzfTy=aQ zmm@~S#B&1@EO;V@U{I0XY3gj+>tasnVHt;cQ9K~N>KsFi`>s{76MbxMGXMSP$W=+% zb2x(2SD%%Wvl)B@^-y;KRDcuO8pdq4U0t;)z#dYz>WtYDu4K1HJOAlNd1 zSMgg~TA~dM8>TK&S2JNR$Gd&|Hqz9~tACTtHdRwo(>gFVCz~L;}4l${4s20$NQ?>W+V61~^rRu=D78tV=O1aM$s_9+TkL?DxO zjlOnxWHg@fcX@Rm8ZCae#F8Q*2KiHO+33B??cE65`_J0iSHJ<_S5VN_mK7KGbClE8 zzISmXe6gJ904SnYi+j2`M+yN|YX=vHloy6Q&dAWSo@wB(et7kMCWh1wgFv;p>&Dw* zh}=~%adD{g_3i9X0Qkn&O@P;uKs9Y`{n&po_4~IN15si1A55Q1umxW;7&RwbSy)it zyk@gL#tF`dw{PG6W^1tF>4F4#w3jIytWN^_E{6h$0&ZkZ2XWour#l;FHa2~BV&!n) zd>Bo;6#sm1(kR8sJDdbc$)=pL$r2l^fb`Z zCv{vp_$zq^IS{6bonF!&-gc{jfo3W$i?~-dsUg$>g0lGV2gIaXp{=~onF$LcUE$Hc zjmb`swpP~HMTZ>$rXW@z07XEEvBAOxatKLF2!Bh0r%}-6{jVtHR;rezpm!78|KscE zy1JBCkPx`>)tv=}SPfX}p)qSG`X^Y;=Xt$B@8#P(09pj#<6F;U(nzDNlQXaZb|3S* z?*@aB`k-g%hIsXJXvBqn(Wy9grT*a3$%ba7apwpeeG*KL!RJ?=)#Lom*lnrnRB zDcNNX2mbgwC!Is#0lX!R)NDp=ZS9v|<$(tA125|40n%ymT35GsVuAv2)99!=89BMRwe>Re z5va6DcR-iC3uK`{E|2T*Z|c1Hi~+AL3xkzWV& zE$m?z7ZOoOKWDrv)>I}?3y=-6r=qduGUnvApodu-UJ%!B;|G`;^CUDvHKZNQ-o=#)7V)$C|X&Gz#UJm804g^#K0 zwX7z8|NdujAwYC(>J^+f!j2C|-p zh7sf_NZB6811VTPRNT+cH!p;s%A&kDY=s}6bE-Omp@%(PU3g&UocE_so&gsZ9R2C6 zMM=rY1xq-MKd4AG%SWemA$^Gf{fhU+%(nGdxu8T{%7hf21CPqp1nh6vV!ag zBlM;^5_GSU7VNuEpWJ^^vquQ|0{gy8yYtSbBTG~d5!)R zz56tDGawC!4-kxjt-z|Awq7^fw@Pe+n7g62HQ6<5^i09zT4Fu6Vtm-tb+^a@32JrXu@p9X^3*kJ$ih^qTY_<8Jhgu7x1%RheN z`1`k-fo7GwowQY+n93T;ZXohI<)_}pa(N+XOKV>YR1mX|kL?=yFbjqxDlA%u3RJUe^IN?$m_k> z6cydS_)1sgU-;FcO1a%mL6g+FsUJ##Y*klpSA6@TexOt}eb3eISq0(-EZa>5} zT1zS?YA26=xRhnz^{B^9qGGuwx68)|0~mTq8bYA2FhW}mKaICp(j@phPmYX80J258 zZUH0$;73Vi{Q3KL0fe@;!Y+_cpn5~jt1BYxQD_%!O2NG)QPGc@mMFs`1 zTj~x2>)LRp7j1aJut^E#GHOD92(LdOygkMH01Llm?A3y*fA2EVr(sL&ej^TMQDGmT19P=+32cHC;=S zz>&amM(8YWWma%!+jK2B{9}nH>J@mMq!E}F^6#d}lH6aO)lksUZ9IFg6 z2!hanlmbEo{39u)rCi|1=z{c`i?74a&yQH{VR9>yvf?$TVT3_=HGlf^+}B7kkB-Dm$AH_fsDSK7gyvMJfsc&4(ngj4Jb} zJJ}!%8L|1(NJ4)uJVU3d$(_BFganacM_7x+6Ymjz;Qu|+5 z_22((TxxO(`aiyc{JRbIgC?#2Jn8@ayUQ_e8ss65|9}7D%?XVE_uu;w!s!3`o!727 zUwYAX=l}lVd;UugmvH}&-wEH+aPQqs`>X%=mCX=5`&{vVfARmn-2c9+|F5SzexiK| z`HqQ1pP4GzhI&@3GiFKDj$(&2!$oXDbjzh3RQs9Uqh?NAqZ)>|%ZuH0EW2Uk%VrD4 zDu1YU?B4}Z8gQ8bAQBiJ7z2$s*(qC zYb?|(QM;DHFfB%1Z2tJ1DR7|oM6<(MaadSfidg1_?u3_JPk6%v?b@pjjcSAMqyOQl zYb87PFR}H8BsvV7F`cM8BoQtwCzKwbXdY(dD;dlleOVLh+1@NpsgB?}&w1(Xr?P9Z zsX^L#kW+a+9`$PWy!NnYBHB!6ZBB?@U6*X>#?G2q0e-Le@b3EQ+N={GdA<~lc{nCnl}8m`|j;-;$$`lRw=kt=7-_ zoGrP!e?jIZOQNNogn~wWl$}hP5>Ki}=+R#2X_O8UmDc_nX*Mod?U&@w{8S;ebP5~j zoL0>G?yNzrslpk42Pa|jR9n_34^ymmT63fGZ*G)4su1@_d6&ZM@Im4({%ax}(Wm*n z51jRjQxlBqvRJFz)=IY?24dm%<5yoQx?U0X{I?%>C7ZpTp))g{9W9QM@{57;0x9!% z1~CPXzN8EHV>~uyG1unl(wfR1c_v9|Msj`K!1lh%J?ZPM+1cuKqvjo`kKI@SkC|jW z&p_T(c0XC1dMpx_~?|p@9!pkqs*4v9CDwX zJB1ase0al{w3N1Ebk{@*HEPY_`OsP#D-t&|%iN?gPU$TdDQ`|KanWm?#*QcaHC>15 z!_Qg zYb!&Vu5-g8(^2i3u*LGYxdJt7ng9<>z)p-OQyFiv7zGuulSli)x-^pfvdTC z`6$LTHS3GTRk{)5q8g$%OeW(%xZ$Tyb8<|^3q*v46AdJ5MujV@$g=lwc2`;@$jSvK zsr!?CdGjS}HBB{(nV)gdk%cMr6K<6be(W+Mm-sOihl2<=f2}e1GZiAEW)ow>OY!k9 zhKJVZYZ?AZ^jyH2-(m?5y5aWaArik?T>_ul?_%u{(kM!bo#eAo;?LVun3<`E3@nz) z{Sdg@-ykQ_aA{t|Q=(Ajts#?+uDOPv7V~|gJ^gYa6fbMrwE*>uuBw0p!7oITR6P8R zdkZ855+sv--7#An1ThIqrOM?KqH&$f5)C%xuFX0QlP!)nx_Oy|x0}BPPCWn2ES-*$ z#LCw(5+cNEd{#R8&D<=b_-80H)9{@TOF93z0vvP;mSl^81nOf>n~1hO|I{4e-rl8G z(X>jk#&!;2EB^9$lh5Rd0;D_5wMyAC1p;KLSUI~#UQ@Qa;7LZ?R!(q~XeReYC!2H_w4>gx{~D7h&jhk7U^+`Dji zt)ae{bEZ?t%TeU|opUmv%usmkF9UpXi2Z#(y!%KaImlJ4Ag}x(t?f_cP%)0*63L(S z(k8K(s?WYlP63{@Mup)N4!idGjsX`sro_23M5~Te6XX(YrW*`GH2s%VpytisMp$9Gnxdh|yN}=7A|q8^PZv_CM?+QLjJ_Et7Duu|&%+#DaB0qLRmTIg4EZ?!1=X~a%f%cEp4Wo*X?nh+UNes5?iX3JX#^Txg4Cx z$H74JYns_4kGYd(ZubhE@8@3eZ{OawXe!j|5k2)T*@$HRibe(LR7rRU8439jk)I!{ zNOk0d8ILk2Orfdi6AlQ?; z!^dTGcP|n7;t+o1FbA5x-hoC^Y(QxV-5N`_g4wP$skHDowbYR|pgTnWPglr|+p!eS zRfhypbyO@R2$cpo-Q>*rtguDGN>XQ;Hc*w-2C{jZb?^~8e=GXZl)zmW4{Fhvp2Y`Hsv*k*$Iz!l+&`>FaK+{+zq5RWz%--l_EwWtJTe9q?E9sx5)|NxjIoq zSU{2)dX8%U^>8BD;J(LS;n&*JK{Tec$^TRa@c&eK_>@h%UG*Di&l6AnGNLN=+Wi{sC!pDA;^PLM@bfs9JS!k6kM%}>bpUt6fGt*XF z_%cgjG230OTtE5OiNOB76zJ(23QQE*A!8}S7A<(nFDR!t&iLD^HgqRD{{7UOLgrh^ z*MB+g+>H48fhUn=Pzhzk_MAtcbxig^&za|WbRS1>xQ|Zdt9!I0u`TnDAKnTzB;g~- z3|y_GOE0dHx8Ztru<{j8r{-mN*$7eQdyKhl2BzV+jk9(cf;A3e*lsI)?m_i?SCS1V zx7COwQMP=ON<&;!opC4lyWy^MVpy>xY~co$-$Ii&*wJ4;ztl5%@85y}x8j#QMn@#t!uo&=$9BJ8 z-S;?8j|3||n~f6Z76F!8i4}SBoB548 zI|^5VIVOag_6A8NGedus8r)lq8j|2@-szD=D+KKw*qnC9ytZ`Gvbque`kkFOKI-%m z1Z2EPJtw#K4LLD=oSqH*n$eO?OE_IJb1XZByQ!h`O!-DaWJ^sAXH9AP)wCHSe+lrk zTTP0-W_OyY{SGg3?d)2>BYs6`Y`MkARYm<+V6wU3#q}jO@Lp<#&neZ|a*@4d=FE=fJ{a$Xt+Qao7-w`e)*Y#=Ld^unBV25L>d(rP^Uj2cbbm?Jb5>b*;i@kn#Fy!L8MT%F_;g_e%Os8!yIoJ-PTH$*lm zx<7ojNTifv%Fe~PE;4Le9{7Vl5dz%Q4;gN{Np$)re97JWNmp4$%cqC~JG{So;5X_y zd4%OrKe-)vW%S3|)9Ts)(NCiU$>8hG4<;tRq?)hA)Gp$2+kpG6_|$^X3}ucD5rQpZ`Nxs1+?C0jR4 z-z4O!`jCBG>QKAnt{`xKzJCQ%CKP{mKT6-Zyx$;G#?_I!1j>nD<7%-x(TjWRwaWsg zH)>u=s?!<|?V}$@8}C*O*eBO++=gJ=L-i!fC8!-@^W@n>k45Db9PK1{4kD^OPx*O+ zm?ghu5oNu^WBq2nTEAgKh~LBd)Fw`eE&R4!J*&gz2x@q0!TbksFhyB%e*4RyxB`qN zbW8bf$HeZcgIUI%2a<(Hd1#9(nPL?ltFkD660U?J3Y@Fx*c(^P)k{dN&94SfMaaah z&v$7GH*0E`3(h;reoaq{pF^=N>_`np%9%N#?6Gy4W*L7fD+SSXl2nEatT(LKDXL7C zOMHI35S%c6geFg&m9& zqN2Q=miP~%WLUWd{RS3WR_q>T4)-rsM`*}pTMmpRsc?=?;abiYo5zwQl|6^ADKiP`0_xB{+MynVW#UWgWm9P@^g$I?dX zvpzK`>E`OxjhKd~JEj*M{vS?``ou3Vt@4REGwjZrp8Mu$n+&y7jaC|B@5%>%$Uo&L zo)VG}RlZjeoI7$4*RZiMQP4I_iLF88L`d3E`wsO7>gs=UH+W9|X*z7U#TEwODm+S3 z)$9YxvxQ|Fo`-SohQ0sgJ0$T2OwJme zm7eq|EB+PQTYY7=uGW|r>VfBcORLi6lc9C33=f6gT1eyJ^V9Mw@ZYI%ywA%j&Yx;K?m5|xB97BBadXSEb~DeT>HN5C z!n-r}3J3C%{>j<*(cHW6_#ZX~#8)b8P`@1i&J8iclg>L$8 zn-KR>g-B?p$y#yaNi*x0wWjezo~`RBe;#ta&CD*{PkD`h7f8f)&mLj%@l$%eU-iW@ ziu+DG%Do-bJ<;l*>sUE?*dg=O@{PweYyT(WK^=STFFlg|!@U3yDYi+tbD$@CzSll6 z&Q;|Ky4=F?xZH%;=%Jj^@29+J=Fz;}_O~=6tMLT2Gt0OBVUWe-F4X${lh;uXihn=4 zqTX}=jJCUTc){&~^d&ndy6d_|f4@fFmBk$q?6mkr_=y;o#kk5JU2|)EHD88b>b>cTM98{;)|&XYMsGIyHYz%DY_k zX_I(3ILry!Lglv^Y**slW8aD?k3%ijpRD zzFX1JMe7Z%1^jw;4yXw0+WM#>KiOMv_xU*P5Ei(~)gE2mKDst8yd(K0VU1*+a&}@i zW<}6Da;_iu)G^pByJv6Yl9ONH$^9du<_wn0iuyg2o}Vlx>1}u>zfTon%6>5NNM8Ac z@q4NU+gqE{m!M|(mHkqmbfWOi^$pjyizXSNdzXyHO+Y%!Q>bmH2{t`9|IQoNuum zV>Ge;z|Tz z)`&oAJrO}>g8Tg=jyx**0Q{bNUV)vN1n^C2CHojs9ZY?WQASk0jqCdJJ)sJdgC@Qv z5(zxo?0w4iqRLcvpBId(7aV!TtG;d72z%VCBFovYf7h-ia92?O`grUMl1eEKo=&}k zr)WPni6$x~&fI6wZ?7*)5oW1H7EG}R{Y%#0uL)Hcu1$6k__;_eY+AT^xhKtFRn_Ww zf5{7ljkIC0hYPq@-jDnJM|PMQzJ`ONI@b8DE;Av%MJ1pbL^p3qwiwq?d>!h$_<$VY z3}qB)F$YG_cIPWOwKHppaEL@_AhZUaq(z#^8)44pHGZ-P{z$W zH)}?tR^r7N<7na-D7;?{C@Efw>i=au@WDV`rH`{Hi1da&CZ?oLIdJ2@lpJS8Cs<>0 zr3w?-)m{%p#)VYdJT7Dg%_!bP?@#=r35Fv~b4gzt*0w)=%0#02>(TcfNH!V%u=AuJ zm~RXU-IG_Ima%uo9-Rg8(f|mUtX~hwG*$ch#(c8lXE9YKNM3+i#J}UL%uiYEaeaSq&cNb^N2;;+yxYzaToRKp#yn_ASdA?K$M8@MyP^+d-&) zq3x1tRd4)MU#WpFw%g~{wC`$FajRpD3X)r@Lj@lDVk{}b4PApg05f0Mqswy9<{o`RF0En-Z+osOiN?M!dPjt`ZAHFW}RC9G~MZ-p|Dm$Oq z^ZVIrIg(9^T2&6oyYvKGvz{r&4qJ>+K7290I@7(Evr?e%AY!_|wC*D9(UkUjctY%C z6(9NTxORE67{_$%yz9dZorL!6&#~O9Br;INxjHEX_w>{0MbeAEp&rCBuo;T7ka2Xd zIiv{4FcTWg3s$X^Tio_H@|h?4G+WCQw5OYDJBrISQA8foDour6yvNPU)wI`iZOE~^ z_zE=Z;?kg-6xvN`X#l$phEy|5YXuCV|kCU9_UbU2& zQ@n%?f%n@xMT!`C+I*qe86*2`5lTwKspx3Bsk=?ASD}2vt{P|_+9(?2Vqq7YWt4MJ0{vR{G(IUV0~eMh#0Y>(#=`z8}$gLN9N2%KOz?~6W4iZ!Yg!G*JSYKcMij@3wX1&p=4b|vs^rWkTy{nIJ^cC7sYq#q5YEPv+yf;?g=U7xj(}h#KhT~Mo{FIV_opbCJ zWZg>Fe{MaTC=5exxI$E8D&M`zz2eb`xbi|6~z{&Y~AU;`M(2TFGsDPZI1$KZe9R;54?82>MmK0NwWfr%pUN(L~pFDGy>SoId>l<%#HVR*UbY2?J z3urM7IYp(`c1A|VV+N&K#AM}Jo1AX?*7w zANCjFD9+P6NfsxbB(=XCWyaIib$G3oO}Lh=&b8auCvC)r*flPRjGBkt|MytIOj^8& zox>&YsqYTY{kyCABBc>dDs;G&m_$X0mzE)L1x;t;9hCDa84zFHS!zUXZ zOGb&V%=0gl66P1GzM&rN-0b$X0sr41JRR-NO1lK&Z&k$4wLNxFFftZiF+xiDdN9=8!-*|;Tyiu`w+{mGRwl8o)GUQPPh8L zB?*?3Usux>L#xgOX;FmX?|rLE>l^D9T(b^w$+eXPXEl=)p8NFZZ#0rT+Ez~Mo~Vv^ zETV+2i9wF#kQzxnyTp(xu7Pn+- z28(!cuwr_X7f!d!K6i^3jIJvw4gD(gz=wjiWmR@uy(TNde-;A){BmV}kNJh(=l@Je z9z^jTx4gP06H>4;B{*2ZERK3`D|Ep5h=QJ=+H&$8fa49h1U>0~3STj(D-(mhSIHJU(vB*EKCZYFXcZ>afVHqsyy#-{XBpJejY% zdxgW!W5D&Y7^F+qr$?Se46=R=TReisYkgQcns4HWR3!O@0JxM_y zOx${-snTD%p;K*q$CZhbtt~)C&meO9TpGyxK@LaO33ry|sb#INcm)z+^$QGB3c~PT zdLu`hIX^qMpcDhEKRQ>&T}K)?$AAD&bh){aU_#;D36eP33^qz5R|iwe{Aj7Cf@7hU zMT$B@dcKDeb`jY!VRbG~c?M zpJ~52u=J+bg(F7`9Th`E%1KJk#{Zo0eujapDWBlI$xEi;*v+$gLL4FvR;d2f=tOO$ z>m_8fG2kb%qW`ang8#~j+vdS1EP|_pS|S=erKuUgzZ!JJj`Ls3s|VMiJNB*&>I+d& zQN-#u#|LXwdFamuD_yR_M7L)0{_BhZ{q1n8RVcWusr9w)4Q=|+FWHX}n%Y1Yr}4}a z5&`Ol;IU?TK)`E7M)7Hv@B#6uXDDNh7-u~KLPx4p-kubmCf};f%b29i2b&>3yLo*(&785_<-HlrOR8H;KLl>#;S=uOQpmbu95pnOM4C^8 z=RpgAv+8z;Te@W47)wt3$mMA0JVBkkV zV_%=5L)uCe(?W&(9RGLR6SD};!`mh9(gXJK=dYI+$M!rYB-BJ^2ox>EN`y= z=>Bn@)|d3}sO;mV)1m#smpmlK4l|5;>EmCz$Fc~iT?bW7R ziWI39pL1eG91frJCQzqu+#YM(Nvit$yU!6KFF_1>Rih{yM#HH2($|Np8e;|g>3fCe z4w?!D)frpq!cv!;&amewcKTtmD`+0tfzfl% zJL09~eeL1?7sVmj=pR`o-fUH$9}SviOEE^h9NsThMYl+J-+=l~;B;X*%=nWUP1i4O zuZ|V!{KA#m8;S#JfmVNLB5=}Dc`co=-IHC&{04$En_4}1+EzqP;u7NY=4&X?1)g_` zZW|^FRi`wbTs#@yxG>8TH^N|>i|2UM3}W^=A`n~K&jJp3x45`t(v}# z>Ms{oug>k>N}Ii;jQ`f-Py3Sxol8rom{*kZt4D@PgIopr%Q}Avh<9gsf!c@@5x$uI zUUhH_Fnib?cDhv$qmV<7&&S3&qq_-p*uFsnE26S={^BAu^G^p1W0+IzemVY$@0rPn zXrg*ZHnt(vi-A@hFe0CJs2X-;kNzGn^;$va)4)opz{hn0oXqjSJad{Q>Ob)zd08*t z7n#&KmlyCFvER^cd98k}B%I~zq?Ipo;pnW!Fs;}zi@f~6OZ!$g_6dJ0^s=3MKe>U_ zb-TY-eZUGht8^Q}x=_IA$;8r7z+VaD{%G4AvioZpw$j0g)e8t`ReYg*#; z&3C#LAE&opI?4O?g}jl>tDUt#Yg8mbQP^i{X*MUn+f}z6x)Vuyb<{DI_T@r?SX~w= zxDt}kA&w4pXJVC>q_5oL;+F`};wPl5yyA|A+8zaUiPdq4^xG^;~ofgOw&_GPAz#x}S!^qGaVB%a-rPb3F z(x``pxv7n5_N69gb%(CI#-*@OhL|;Wh`HoJO7E3-$$8)D*(no_JiV5FtxI=NxW!2b ze4^u^(1;$hOD<*}(Q*6vHGw!bUii72O;^#o{hwafe9XDjE1iQ)YFZY~4L?$C6U=IV zvFNX?8U0a}{95B7cJeVEozeRWv6=wkn^YvIeREB^#rJ)v-bvY${ER{J9pU%w=7>=1eUc!FR-n;FZ)rkd`8qfe9qWt^Dn0>ueeaW z#YY+PwSfqUUpzeg!q8+zam|+k{S$gi%mSj}E-Hol<8kn2w@njay?kDAJ|v6T6d_#% z6RG;Xj3WC}W+C3ro%9&VX77xV85UOCt&N)C{CI2~XF~(y71#SCnU;f}I#_wR68&AI zRfAZ%2b@!#qb}-%^%0M;q`ZO`AvlPUr5jvU4kyoYC1@JAzfz`%pqTtk=o8mVTDe+M<)^tlwEu z1s%YvRN_lHSc>1=Z|**FrtMYW%r{E<_9m4m!Ec#$Fu7z}*i7;lQBIZdm6G~EF{8Xi zcvo!wU=3)ymT>a6205Y$_|G^&399n^6-0o;Uj2_J@+65Y=) zdwk_jQX6Zpd|iz#or1Bc53wKS`fY%sVWs2Vt1}a4SCnL9?OMe;59wcMp({{#4-~A9 zMB(A#3%;_E9t_e4$_06xEa7j?rLM(0@Ab4|!fx2Jm zc-!xa6eYToO(CsnA35wv81}eh(OrY$FyOql{BAN)ud~2HsrGf7pKoDj?cs!ecp=*1 zg^Q_6*2y=g@ZU)bCmdw>)DpKE(S2-BKCT|u_}#OX#}kw1{ZVG7v~aV|wPsawG{-T? zw4A#^PZ?j{U^c?_tRG7GE?nL|c%;mh^+h6yH2!fX<}c>cb~|^F z*grG)x#Qk6*9hAjeTSZwLR=;$1r&Kj#W_$y5(y+J6qMFS0~WN4P|c9Qh`qnD^RpzM z6tkrK!uR?kpT%;;P>Jx=)?9rVKy(k&NWW&M{LLGlpgD5Ry5!(kQab@6qsBh-!Ca$&)yiFVcQ#(<`yh$|Qb2kNSptMiZE)s1%^TE z7K|Ip+tbrtWF4*SMve0txhHeUnkjR|5Z}a;rb7JmQ(xO5N8B*)RE!w|_>Ee;4;AaC zRw_jv1IZ_nPpnslMol~CcN+rERZ_M=%Y1#CNwr?6H)C%XXUiw?jQY->%X{uN1go$= zg}H5cV!(Xtr(b?PGRnO~hkkaqi=vkhAK6>YRJES`q5-ROY(E-kT^H7Kqo&dj_yPU|#9~<}j^)ls5BP z6gkO5VK5oQcY`a#k``8@YNPCrY`PQCL8MlmHl8n+GIYP0_H#RyG%An(8%=CR-`_v| zfCzd+InpAjIaQd#Q|)%^`AaEJp>&zysahkdD{qLJ>JiLN?DPzEw9-eGdpQyM8^y{X zc`_)WejpP)_8cyxQDod-MC?+yD=t4Jc3QD)V;`hxh%&O6qkt-~^+s0hYr zE&V+5RT$;i!QY$fMvrctoGT=mr6l7vM71DmqvGx)-@2n4lR2ua<_~OTcM(e9ZDJ*y z$ZsA-J+*&UDGX;ePo!7-4R6xefg^b2(F^Q+dWG8TcTec4&$VFpS}2*}ok>QjaxHS& z97amwcO1_Rj7+Vq|C!|JQ}dtODa%XN{%Gk``vgp7Q&#dc9-M9GFT*Q1$z%uGul6kG zNi-Byh?OLV<{mepj{hzZzAR?k&`C9H2;Xeirjn-XCKbR6mjjAjFiQCiZo=qClxXG~ zc?FA5R{leI{D7|z7E)%X?(*U8)InG{Y^>8-T?8=;M11FuFtx0I5oR8{cPJY6^_fR` z2Hx&aGRJz882V3BY|k2hP~^tSF}@P3twHcn%HjfRhR1}4AIo^KbTpeSVC*Y5;v!16 zd!{IK5Cn}8c6^A^;YD)3z0 zYt0QhHA>JNw!H8&0Yn5rV$u_W*K@vqnHwAd2S57m<=!`{M1~z!{Py7? zF#!z^<+Wd}>jTqOUJx2-aq&gkZLZ-N$kO128ZnAblz()(ok_G&fuW-}uiOly`o4eY ziw0>}LYMRWi|sw$_8Jby8UFZlSC6}*0{+k=@3lCuK;+kCjFt1pe?Uu5LaX6Cb6dwl z~kHvAYNuIN$MNjU5&BgI_yXC84ncMz?@p2Z0b>aY)d*ka0L(9tvskAJ)v@EXdlH-+8p~|2;h76C?iS1kd&HYRc(Zz~S7$wBi zs|ztQHlE&bevTjA1l9REGyr3Z&hmfQr44Tqq!HH3MCkj=^QYU`-ih9QY&-W7e(h); zyMOxJapP$cW}G@+H(M`l9qzfS=@vR-e%k5zn_d%bNyg>(a@5HK^kA zTww=h%=X=^0XwyoeNbNmpdz<)_Q_W?9Nj2Xi3Qvt?NWsl%ww23B+J_8J?o>oVsmjp zX=N@U6}~3bB3%~pxCdXU9@R`^#0{s^m-wvveJY;pc|1AkI8joC4RNt|miF&YZ_QM( zg^|zu=E}0myMAkUl{;$FlW`F6NjNCPwflXa79n5Yx*(#4f~!1^gt9#O;OJu1=9g{6 zEo_-SA1q(bD)>l=6<3**{_ER{v}MCy=UIyEIpqM}`QU@3bP1Q%6MUBR!6eIzhjU3R z72t9f+6Bq6l{qd4Ca}3&tPJj4?wF-J=wR;S!69jgw~#_sL9QWdC!5rx1ETa}Hk{XMpd2TSB7^eU ztG`4IZ&EKamsv^hj*a8z-X*$Yw-mnpv$%1Wq2=$j%ZSPJE@70qhmOF)$nVz5%~`20 z>qYpCvadl~AkcEx{NTc==N%@wv`qKA$A^!*U5GGa7WajO!7uD*%R42km0%oXFMmn5 zu!zVJ=Q3uieBCCAyH+JisJ;73lp#+9Yn&jASSlNB?5uU>7!TMbTTEuH&~R67mTB#H zYqQ;!*O27d!ut)eoD}lJ|7ZDV%U$i+g6Y z%emepg8RB;?vH-4&P!flA-ILqHkj$FX}Jz(rn_5n@F~7mH^yUtdj(zc70WLXV$vhu zOZ&UU*cOFhK0BTl+!3ONb@3NhPiGN~(BTAFU{lXO=OLs4I>b8*^Cgt>7$jMxQFDE7h%^|!T06cZC#ODL3kwfoz=>wU zQh4&IRpR{{?5AZ9zHTtOoaD-WqHmljTcn3?Q6xmMkLr^U!>&2{crq z%h9^wi{mPPV#-*8_>OS?dL3kHtnDuboqefY?$y%@>UAUuBsAJ-cv<2QpbLY>O3Bo# z+D;M>6hrw(O4Pr5ZY;zsK>g!XxVk#W=^L?Y{Bn_{T7fX>mit%%)a}Q-3L%GJac*uy z??rRJpj`B}d)4rmOh_hRu&11-Ta~}{hm6i0`McCwnH310al+2iB>4hk2X1Sng>{AJ?gm0P zHlqXx5;X%~%$_<%O?UFfm<2>=_k1;WHW#K^fcoUy^J`ojs+pX_=f5>LBI*v0SxKv0 z8?Wu5rrx9$19uh%{7R0F^t%w^Dk+1_;jufQb$Yxa(xH|K`B8Q%GN&5-WQZ8$al2NU zkTB+Pvf-LWlO;PP)MSL`pt?6l9k5NstmE#5dDF#9Hg=N`m6I9AB0xZpY++>9B@RzT zI@mjBYm2@V`ORvFzPxd+s+x2bCeQz}lrE`?!*K(wCq)C~S;p9+eyffb0-;}Q{MoY% zB(~@5QhT-j0F3Bq1mQtITy-3a5F-mbH*6k_mY}8+sd=!Kl0BtuU6_sVqJJyALB9kq z=LocK5zdZGM2j+ zK=P1l$}-q0?{wpJ7KAotF;C;%l!ikor-be2-90g*uaogoW-MbSB(j0USjWXS+8KA36i)^;&3BI=hOtmfRgtzq`#=w;jc z{vs;rZ*le4(8l>FmkZqJZ+k9PlRxyt+bmAnsr(uV^X?XC0v%{K7A1)*LzS3QZ8!gx zGe1t79xBnxue|0B%I17IztogBYgU2&GZmQ_@PYNpOM#gpM21PgGr__0$WV}+x&Zm7 zG<4{w+b9yL`fWhu0R^7J)e4`WhKv2)!DooaCffqUi+43ghc8z3 z)9fV^UC6DY`}&g-A55nI`oigsj6c`+_lt-5@&M;xTg(e*SZaRyP3&3=ZBgOuvU0J? za`ewMXVEOuEz}vTgWPmdX(fT4Xb`1QW;2o`ojE(>pipN5adraxcdq5YL6(NLm~{rr z0*_C0jZixm9~$wW<3+6*{Fj+F#c1ElxF|n3mguj3%!Hg;fP|#(k9_z&LGCmZ58w=w z2!^JG#^-6W(&kRtKx<^1?NuobjApIU=rP#g#WS>Zwqsd8be9Zra~T5!(|}umM8Lj$ zkx(hC-tAM_Yu_=AFVt=84S>D@Vt{7{jV`+t59Vv_1$U;MKzvQN4lo7CdE=kJ5ZDwm z6otc3@xf$wX*6>aEb$XGZ>sYxiMYVzGPvh$p=#ko5J4Mur;`)I9C>)Efcy{C>eC!QjxPAMn+9-6?Vh8)Uw1DI#t;1=R~ z={4I!7ZBhXe){ej8pp~`Ah&V|LzFX|-mE|k@i+{dqZ?~Au&Bi@#fRe05li~T`u*uN z6j)BEnyKp&12eI%#a%MMjagV*_SIcA4Ev}nXham1F8#~(D@Hze%H zabg!UtIf{04h%cBkVc@dciOOZT;J0ZN~|HjCeYi(ttpBEvChpa{_^hU#i&Wm!b7tKLu!5&7t>iZJIX!sI?fauHJzK4KBPcTng^fyo)s%Yg- z=}k2;PM-CJMTl3uL8nBAi#{6G!r4@Songm2mrq4lkenK4@B+2`EoDBHuUSMduIDN9 zB;w$Zup(p)kL2`CR&k7P>t=_nsJiG2HtP6ybBn998)N#-{#kwjA7$zL6R3z-JD}Uq zhkr?$>_>`0LrO6BYEZmV#hbdfY$ODy?)MqXl{55Ub@!uFIKuaWS%fEVW9d{a_xiB3 zM~9+Tp~yv)f<~i$WGS~)KgYYE`1}hIr|^ST2)B!=Xg4w{hgm@3Z%Y?&dlq({8Ah37 zKF&B^pBL+p?2*w9!U#hnApD!THlFhCDUOr0LtSS4;U>#y~f{*J#VYsBL8Z zM(w>?E7xNA`R^Sn^LIXNlqpw>_krxQ7U7CYIKnr+?1bzFUe< zp(3$;ch)g`Sy!ZxFh;XF8^!I@1m~mjCV=liZF&hO1MM~shd$$rHNx|?IpHMo-Mm49 zKQj*O9x|lY3GaLm)PRi#1E)wLUI~z-q3z=#8jbK5WuK1}*BoR=GknzI;pT46(U782 z$Yt*sLO2&@qS^%H_J^E;Vmw(e8 zs@vD7u1mYb2&U|8h#Qn^p;eL{xfVsUmFiK;SQ-1ZTlpb>V!; zl|eR{F6U|TJn8-#*Be%}*9!ndig}-g?v}6%-}}pp^i*yRUS-0Q*cGHAggmg3&_6wk zkY%Q|0njL5T^dg7YJil||& znf{uPy1+lMU-JWmY(B1E_)-HJq8X|s;;M^7{0K{-;@2u~T`JXhZYx))9c1#~b~hh< zvo|Ci_yUZZUU$sJy<``>!B^s-voIo1chB3U{Z)XYDyXEPxHyG}gg%32O(gW5m@mk@ z$&^*%4jOnX{0~jG_exv7+PY^8O9|TU2;EJpZ@$7Ku7g%OjrBdZ4%GOx%9%GMXP6Hd zW!s`I7nhv9IYI7LJmvn#w@DyUJY^hZWbo^-rE9m5q$}E=+x+i8j%}6dqz!&-yj<`!&~J>AAz@Bac|tI-ZnI$-=20eV zYp?Q2PE%q)g)xJLoPabzaYOAbZ~?<&l-#F1u+0-Z7*3?Ti)A)_hEA#uM8(4mV;;=M zTl}F?fic6qqUwCsdv|%ZHG}E9P(wrnt6=@?b>6k5^~KF(i#BP!HK&Fbyxb;Sh6Jn7 zA5rP8;3^b$o{7Zq>h44l`{BYRO_rh!6TwX^wg^j+CJo+a3{s-uyq%7Xl^wT(v)~EC z4bjQkmI-F^I6vH8SwknMY*vL&(@y%}xMU3aA8;xlL|i$3tU^R!aq)HH?%bG7s15uI*4Q2(pOkypl~Ny_8S$VnY?;nO z!PcHUq(l9{&Ikx)hNCx3V)j+u$~=o(#g96?1{fL!$Bd^lI;qpLFPz0#x!(1;X)-^? z!TPO&kDNUn$gX#8yDo!s>f4frcEUCXnJV+HQ#{-=yLo~m^p-C*C}xY9&^cpdDOP=S zopc1qIeKdUWof3578mv{=hL+pSY7XsTW7+rR_27Ku8Hez{?YjD9m0BZg)b;1 zBA{VuW(;IB$|K!Tu!_cPL+>va3it;)P08Uj2OJ3Gb1m3fF3T<5Ey=)8@M^AR-`li= z4}>P(GduBl_p6I!`G@mb!=PT8Mo_#V;Y}gEvQ}uiEPD?b-3>RugnX}4u+P>kGA%OX zu*mki#bJ0oKI<>rXd{v26T`gi5-oN>C?Why%D;1GQo)uue?4dUmJLT^c4qTWnv>KY zJJMgLj~7Ing}X`Xa)Xfb1OzM&ZF|b*eKbCQ3$FX96cc+8rEd>UTy)kM z1U*r+MHm~JBhgZbwWJ78mOgl3ZU6X}WZNj(^+q^?)bnrl*y7jJ_m^^f zthvy#qDe^6_wf=qwi63{DE$|WHjn|bDOt`tmuo}u?i_Ek|In1lMLov3XQtX@^>1wP$ z1vFd@^K%dVuHlaSixS@zoqR3!EU~5zGI-Ya62O#k7}&{%fD{Zjn(6za=qHq45_SD2 z`Gvu>oevskGbsRS$dNwYM(i`bzmKr)fVHx<2hbHhuFn8rO5XwPnw!o|7=k-jH`d33 z5Cq9uMO2R_nuFf{K^PY`w<^38_-G@(x7pb5>dGr7pvC^Qmkx(Cj&;u1#4^}FH4Z@m zuUNJOAo1*yR2oV)@=tqDJg=degDJE^0LB|88|%iq9$cl%@s^wHK}(Mf4D8az?QCRt z1=JIWQYwph=_OQue{NV#j0eEE+OMADL=`C_RYjg8M}QDGP3jEh!P`%AoU~#~AE^)% z`MUZAA*388`Kt`Ua5{o0Dg3TC5S0dF##G9~H7fiEm-~c1zQU?@$|@y~uJT=hK&bf~ z+9&~HXiBstH%)8RW{xhrW`BM>{nLiA1`A`vHsWS9Ix}~e_Xglse`Y-0n0!^XNB=O3 z-9@A$3gH}J=_Hu{klLqlmg9v7^CYHz$!mtDk%Q6b1TXgrFs;)A{ zsrKqZT7GJPAQ3RSChl;TcxHKXBUVU-kf26p;qn$RLOI6tI>bkZfsw}}(arOztPZbR|!eBih$_|)yZnT+rLu5N^qr=uA z6T9eTKj$&U(gE4-`#QZ&C}F5P=6SzdX!Q3eWLdnOYD&<=y;tLn?ST3bV5e7)3wNc1 z+DTUJAI{g5>Aso#`D(?NTQG-9su_t}1^DHpKHS&eJQRwHFac-hxbZE=SY_z8x&ag=}`VB^yBWf zyT#B6DFJ zAx;^xyz2Z+>CrRgHrXRB&+5c2Q^5FQl&3%Imr8{SvQ3-{FhBb?mBC*!3v@^RcBh81 zW>W(gb`9hxR?#stWW!LR$+c1U6~HFoMiwbra>$-l zjWX+2RKG305zZz7aM`hmIhq-cuhn&O>`&Rq!MoWQ!%B(2N2948Q!B9&^YZytm*?1v ze?EFG9eRHKW#rNF&C6mx}I8_ z;K82ogGZmO-VTE%e!*i|1Uv-hf=$cx)YfM+0YZ_vuE;)*>JiN`LI6bp*#sYnAzr-KY92$1ifp*nr zoYxxxHWxPpT6@S%G8B-{V|4rWFOs{vah+4PT@HOi)YwaODwN=9CuefAOXiiS1bg9nE0LBHl>d;fADLJ@}4zmV|4HhIu^p%3LY>!u==-n^L zma(E%kxuu+$5>RH<$&npdG{e-1FBlR7X3NKOJnK>{O3ZOANc<4{`N@$^uDMahIj#( z06;482CBPpb(zdZMHnRA9I{FZ2vlr|sI9yl!G^_sP$#p)%=^3h)#2sa=U^K?alWCI zerAABIt0je5`GtSUilV~_NBXq81?xJ2?E4T*VcKR+b%z6GTAs`Kd91uchbOiGnt=X zES`jyPhf1NAFDi>49Fv~HPThBUgi3eb;$IuzN;c7cx^4;JOFTrB*_O$<~C^D>_-3^ zve4=ucN?Px&{s+0*e@ot=U+$)5XV|v=m#Ffs@W|+mWA7>FaHB?7tS8VXhBC0F9;G_ zMj~3_V%Mw5P$U6nrK#u;lqwT2WBh6&yySr++62}69m;`j|jZMvdXHv4fzJIaM*59hM;lvDBtM<7N1-7|G#M)-@ z*pCmKSPGP!@x;wO2UgjM;+`>@E5{9MD&2j9q>c@EVcPu~0d-A#Y{UoEz@(fSX31)o zR=0Kovo@!1YQ0?ejvz_nK?)5mi-dUB?-4c0w&3R}Ym0+x!9YmJDj?c7ej#{g{&5=< z1`tP(;B5hq9IA^OU}Hl3??{0qc`&XDSlYRh5Ws921lD$T&+APr-nnn=T?6s)rdHdl zijmtEbv$yMql+a%-2$#~l3!I^M=P^7n?(7{0+P`s4htNZpv^8WD30afsd4}MD{9f8JF#;~7tvQ}mT zz{y~4SE<{k!XNS5^aoUmrx-_TKQnQdJr7ZvX`o14C zJjU16ZEJ@tA454N9XgN9KjM*s)!$E-KVQ1<3fHdZtrGJqo7@A|GfsK!iuQnk!thIXef?x@nG;Th?~Y3IH3n(=>=%OsKhm(hN)?& zVMK~zhJDp!l!{$FPoFm|)vaI>Of27$Hs9`|8Qz-1n)Zq)!&G+vXp}ixeX=4Q_O;&B zI5;SoIOL*$Jri$?tD948YEcMFaKuC{q|0W=ow~MY%37&pPCuaY=r^JBkPq_vY_tD? zl=G0K#~t&XEGNV)G?IkN_&%ZIIzDt{Mk`bv@P65G7260U2!Z(B@`3_G3Mc3SUH%fV}2A(L0NG92J});KlI?S_cZe3PkJyg$g)%w z<=!vyl4mby@lu~E&U)}^F*K>tQ}-TTOLghQR-GjBz84k?>hDDd1p|vskKIYcy`?lN z*J=(uedjz6JXlO|9IwZ_*x$-MwWObjK1DHlA-Mi~f8ZEaWNxMDBj# zgZ%%IeAKLn0*`D&35tv&O55fFyoj3%Kn0e)IwqKMo^+nB+4mWqj@!lzLc&*v17+5) zfd(x&Ca^{>+pT1`zda6?am=YdMsCo0qSs5w7wFK-b8>0ALX}8B0SEEi56{8TP-6V- zZn!&H+q6;kY8_v*?k&)yxxi(F-jGM0l^C@Iklr=*lwR@ZOU$;@#n8UaLvpag6Q=Zr z^H;PU+!DZsJ4{lR6+~8gMLOe7$D1bh|6VMvhnH zKuKEnSnRZ~3VD241B#KQ?;Kjb5pJrm zbH!26e$GTvn`~Fh6tvrPM^W_7vljK2vc{USK&HRozT;lx;SaczI!dCaFujV8S8@sV zX4V_q$4bNqjBuCX?^{fw;I?l_OxbM=6zM#_I-LV3uC!|1 zj`>Ai1+5uQ?hez6ccJx`yLf}<8oLjB2&(S;P@cz=r8nyxH%0^n>L<^V`Yscf?=zYC z`FGo7NYD9 z*qHqcq_^Fw6cKv!0l+n5Oas!jnMb!!cZ6Fq7d33P`9a4L>SJ`u=eyIp-h;glSM9^i z1()DDBksGk2>Y*8!WG8vllD`n$r;I$zT(Q+y7fTweL|gu7efyEQyzK7XfZ^t93jpx zhh%P<1WgPj$7sZRhi}}&3waf+1cs?aM>9NSwmR;6JAj~wG^uCKcTZ4+CkUVy{LU9C z)%GLpj}~;xwX$tvmd2e%C{|~hLSu#sphIT&2)aO*iR_Ye3hIr((2G#W8VU2`X#ee; z<&D8DnXS6>`)DBec@0Q9xw=v_m)8NGx8YCyxVx$j)vpF5gDz%Lu(eww;EV+zG|<89 z2J&RCq2IOU)*j$nNM^IwS@7MC);*4i2(Y3PMyOZNfRZ$au(Qj@IeR1^lcjwRDa@2! zmm7ebHrfIc z;(@tIQ4i=&w(=k8gAv<2M+kj-u?1@8=$gTrqS3}u*b&6(Frj&d`0>G7l+uJ(()(k& z>n&}j^wk5I>#@+iC!Y)GEp%A z2NtdG@i9w*=XjO){@D~^FOVN(&4qr3FqNuWM>}lb5-20v?(*(4#_kWU0flBq%8MP_ zbUJuqYmRCRB%TQ%LG3Uj-@&g;!M)0`+8&G5GgyMsXW8n8E8CX7{Cv>6mjR z-GBEi)awR%i9=*743FerSu2-{T?|U}q-9k~8*o4Wq4y?1eGGa2_}nWqB$ET4o-Lw> zCrkDb^sef|oT(Ym9q%SN(3K$495~y8TJlsl)$h+PU4za8*goC6W415A$k8YCU!1coxJ*E5`In(#vMYqL8c>iNwBGE|KlT4C5WVt9@KsNiB2 z6HPi66ruSCO0h6f+n}m|Ue_H*T+iEozh8tcylwvW!Nm!4zGlEWWe){_eGH1zw0RWA_6|oRA_%H$1-orWieB8v zS;jpBbj$_IT9O%Zq?A#M)H5Eb*5YdHPfthRzIx8F>3MMz^0bVgB_{)N-Hrv|=avj? z*lYQZ{WPY&U;FSXGIVGFV9m3*Ct4*?E9ZGzJ5NG@asLhg%iSi=**?B0>jwnaY?FBO z`IG3WH0U^el(Vb$ zWu#=wCzqDGi;f_m@ZrymhrKsMVcK8b+p6GsW}5?GyMxG9AM1KJREAeh}&_VOkDPEnYZaH?k>^Y0fj z0~iGPBf0_FxyjY%Ty?0PeZ4pb#47Q!Ld!%$)9_5}{U<=@3x}ex!{vZ1!+OD7B%uT; z`dJOTQ3T$G^fFh zHJiO;8qJ`}-9P!6y}BkDzP=nUjv|ZrzM-u%v6D!`ZHq?F`hJS|kc8WF$?@mCp7Q3> zi&ek@h)%&B@3nz?YN|>Wy)Dsk@4UPb!19=g_@^cKXAKjq5kG>GnXi7Vbb3F?;^W)+ zqt#%Quv?icivg@YpiO_!T*DPF?-K99z{CAnFwtD}O@LbuNR2}Wm*F2o*WYjzqXfXj z0)k2sVD`T*xigD5FJ{sR(isF3Pdclg{d9=S4cFn*3JF}?140`L`};-Q9>d|57eTa~I}2fZ{T z4cB4NDobH{mh?1^ZE>!a=KOoZu8W6c?!;INsBrRTM%KY z5E)>ZZH3%1CAHmX02w3zYIgC106Py9IgB}qfK{yiDPrX$vlv#QX#nuQwvvmK#DzQ) zV3)$QMU@0ozrFpZmSONpobGgs`N7${Wm}|bVHfY=0MAi>SjdYBs7BtGYc@H}3l6we z0kA|h_f)(r*LAU`D3~5h`PHBZ5O2~l<27$ghUTm)zUUT<4e3Bdi}5vCsnaTEr^rMr|g+0mOOvL$Ajqm z?Smggc^zIUyV?zHY8CJ4!PN95Ni7yZNAK~(g2G59BKMkz_*eB^tSE*)Q~iuy*77jN z3gw+Pby_?H*#!@fIz*B5=om28d{5xnagi4qVKs6}E*5I!Oz4|5_=XmgFXC)VC*hA& zhw2<{X6WmTcH{-a-KiUO>8ykumsnkZTg#{Q+H&yEEl~*ZjCkV!C#?;>qes67yK%|G zmW%>vpJ;Puu#e|OSD{k7oAM{JR3M?5Wk*z|iVLeV@7GwSrBXObcX@jHc z=vI;j?^CwyLCcGXgPLz!m45628iSYxiytZ0`xqO>K&9X^4%t8BN zIM2qXLv*|4rp>|keyOwxi?_v8Ju;8tG@!&|jSIU6Sbi1)Ff42Pf zuFZR#rUQwj25tip=m$uty0R^=>x{n|yAlpTla$f^k_R9588haSkF3XEAl|c@KeqB( zXOhK7L0M9?i4PUWpdXEF$M73SYF%0*Wm*?u;uBs?9X``qt~pISavxL9D;@ZsWPwh_ zG-IW3qsJffwHw806x8PoRy6aEFs3UZ>=$yBIW>*$E)nkIhDx4a&(_Tf% z!iB1g?HjI5@tHN?t$i_jo`K1v@(L(-y-Z~L{vEfmlLfSCq~hTFhuguOJWOk718JW_ zwvt`5BZQAJwDJSz0^~+4`M`9e0(HwUFuan#>sp}vm5Mul-o;&3WI=RP_qy%3LsPMh z4_h^9wo;XHwN2QJdsTMj0N;w2aTE7-Y3`VF)6#c#&aF=_PddDvx4av)=&vhf&ooAE z^VyW<%+F42{H0s2l&2!E;XFQZkU|wj)?$9~KD+XzMUAhc5REBCUn)d?jTsC&=<_*_ zJufM2en&yPg%$pFR=HN|%uIRylz}$^y86s;HxpS&lg-*`e;=y%p=CD}`4Qn(ZQ}Y6 zma4mu%{u=(2Pf|=chg2)eiXG7VDm?6D+M`OGq2ioHqN=(BIDSy=b9J%-CJWHJx&{M zYSi4K-yu|c+&tTt?+|X)kQj|!3l*PloGF0@n8(jbBKTXHBxch_`A_ogQqO|^yvx7N zouYKAxZZzmA|XSTH^#8JB&O7S9K7XgO|uUzSn{+FxJHy68wzQJu)gjb^_Y*C?4sd> z2A63SMEZ2p7u3EwEfVTDYlOg;@(jz)3j;ZH&%Nt6hhFUOroifHjf{@1w-&67pz>uH z$_l%6Nrp6qiq?<=Ob(BUbSKNEFY_&ItD4&2MX%&3w>J9iqy)KTqAkv?$)tweZA%yB zB8Jaj%C*_sh2MX{{tpKj?zOvnqjAkKSG(TrTSerYH9qD)?EYQlPv&fQ8+Y&OESfvO ztS{e(2*rX@s|J;BH6P^IOPL#ikr4N@$v0`o)1LaH@6t`YGp1O$yi8szQ&i~9hrkya z7wqrC;U~YY_gWZX&;2$24tFjjmu4c~!5bsC>wDLD=3n{v1zZPiHQFri-J0^~#D<(* ze&tq_A#TKOM4q3(j{3iDSPP%22+^FB%6+>#gGo5v-)L; zMvdiU#p}oOKGuu{`44QogT~x*D>d>ekzCh9uxO@h|8m!Wck(h_eG+D{x#zzLE$YtA zDTFb&$`#N7c#ZofprQj_E@4|wRKQG&In0L_RI?RmmNcbz=$&iIFxV9`NWRocdm7>* zvvGXjy=vg3Df9N&zlIwhb^tAy6>8XW@whS0Cz8-YHH zEhTWE!kB;yXzFZsQ}+w5^W%z{(Csu(#|b6qyo7pn+%>A6d&>a+)u{rO z)r=ybp(q4(ZOR_qQJ{A9IBbeZRn4f_4sI?JUt|`t2JW~Ads9CSrSD&F_Xm`w2Pc6B zD5uoh^NjvYj&D0Wm@%}4??0ZgY`M_5%=0gm@6VYzkSd~yNrjX{pGux_VB<(`!gLY?I z52;vOtO#cQ2taZ!s1evFVoDw9cBa2|_prD*Rpfqa{OI#VZOy57vQ)#j;NX+v?JzRE z>Dw0n{D2Ebfr}}}n{o!Xis%^?APX@Qcxn5urne145&^n|ow~CeoTouxAk&-4{EI-V z+%x2^p~KlL1D9V2T?}oQTwehQ8+7aV>d0j4>UGBy)%?B~?Mmo#@Y*Z1mVRIeTmFl9 zMB?*#ia^b~>(hp7&xxtoirs)&zWX`{7PIYdc6(>>&vXW=o(bFnB0If1 zpOWYcp}_H7k#_p5l4(0TKGx3HiPieoOuQO!EBD`!v-@BGjLp6xHV9?QN)ZW<*R4^BH=! zdxKk#T2bh+_EO#4npuILZX(d{TQH-#_R>%TpujC3 zlV$TIK(ql*gZ=-$^x}8Q5ZtrGM$w#P|NJplpk|N3y$8n$d4UODaL=S>i>T884t#P!k7Pf`M+QWiKKHQp+&lM{VgNs?uQe4b<;}zY4`BRNqyPW_ literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/moe_dynamic_padding_c.png b/model/train/yoco_moe/sources/images/moe_dynamic_padding_c.png new file mode 100644 index 0000000000000000000000000000000000000000..590e173b072ab18abb8b3a05f10383babcb3134d GIT binary patch literal 81597 zcmeFZc{r5q`#-KIX(2>*l`M}XAw*`TWNA?nL-vs6v4&(fDTQoFV~I@BW0^5_#;!8< zW%4j~rZ6Tmw!s)PX8BI_Jn#4K_#DUY_s8$g@ArEjuHl~RzOUtVp4WAr=j*yIiFapD0W|i{R^7J;kGG{mPy1K zi!%FfC|JjR>^<%xeO7$lMDg*V&&X$a_>EiT)aa}z3K3lAv+lUp7CMU|a3@j2*BqN4 zGw;=TV?)*nd(5IxP8q6Pn0rX5zC7ZxtA?E*ruiP?P;nrJr~NqDx$}= zcAD7>?8)w_wKN{x=<5pw_SwL3imjBOFxX=>$X;83)xz$8urZL9O_u^c*| z*v6^!KLidF^xPhHWxGs2bA`iE065%%)K40n{)7^*^7eZ2ar#f&ZF9^ z$HZFjRotA~8ZIRy20T$g8_iZ2Ir5f{eYl$#8B@-GP>a9*p)(w1ZpTuPO++CUB`Z>+ zoFQ#e0aLYI#%`~>ACkTu4KOt;o0_c=e_9ZQCAHX@oe;X!n(P|qYU1B-ix_i(!fMOv zDweg<;+2adv}I!;KB{TSG{-x*V357_Ak$vv5~uzrBA3mgxRnxz796DWO-klY{E%N3 zGA?N0KgghNImyzlN$uCBsriVhloXwZ&BRiyVTi@$lNiXuFgB1h9{+> zovq)Auv7=hH@pT4R{e?bT{oEHV)56q!~5NMIc=-l&%uVvyrm=cWg_*rOwto8hp&*d1 zvei_Lr}+^O_`t!|kWn~DQN9E*b^+Yyq2IJR5cKu6T7_NT%0uTwbT3}dr^K+`d zFLGQM$E9U_bT*50m&p}*#mvYo>O_Ukj*y-5KGj!YulH1@`z-25+5HMC4=Lx(uvJ{2C#1@eBuw8#+LW_ICV3 z;1sca>eaLehC-g%D_(A;ZF^b=n9~6EtDwIL$of)#EEaBxFC6Fzj$SjJOFdE_RrI{i zs;YdfO)!QWTWh^&vDmDT-}j2{7%USOvtu%>x}t@(a{vb=p1I@WSn!64;#j zWV%L7(r?iV6?YoRC*GJas0tpbvp%!j>w3>5{+zT|1m@Lp#QO2fe$u2xXq#w}PisfD z&;(py(k@TD`SR-CY8NE;S`qNaleZvT?hIisaw&x1BT-@2FYxF|PyFb4ki&N0^-q(o zT?X4oW8r^jFN zE_SSuQ9?D#Ar8&o)usZD*a1fj@rbs@Ql@>#Vx*OmMiCe_X0jIiJgRSfB$l0S6;sUe z|L~aV`pj%RGSZwhS#3S*>6ThT1YF$_p#HTY%YjDocDcGU?hFnxnwnfQW>j80^BYj4=`wS)a8|&K99|PMwye5_r0WvL3)7Zh!`Dc=^f2x z`EzmLJn2gxxNlJ&+td?3HR<*aQKa65Y{}(8)P-?5{N@#Oj)s7}6|hVvWj9TB zLJOPg4_(%SezQ!J5+ZJTBDKF3yv4ka@aTsV@>heg^>7SuEpn=i(DbTNH|78~TFrT4 zp+mDXnRxScCd|K6jM50ll)8Q$raX%C<_FvcC)4fj$txrhCkugf*e+41xs*uAQ-|U` zHz%7|NalF(j&6Io=Q7n-re6gDaEuyp&C+FP7mi{2py;nDACc|A9JdwNw7+F7>XzS-)T*gAuNnfKYP(fwyz{-) z8eqx$m%|WK-8lo~p?pP-YFk9#xdQ-+iC8?KHEPivpa}Yk;dTFD19NNIeJTIWxkdr_ zUdyd3@)9Z8Q~7!e#o*~Y?>u-zga{NDp4Yb`BGds7EXfCWOQ2en#=r~F)(*YQR};dE zAtJk;lZ5Ja8f7dyJSVC>V;vJb+q|T-11~?1Tvc|ejj|24fQF7xj`u}LX~$#RLOd@x z&FSWJnMC^eNL?rbgK$&7k?1e(i6Pj9r*Q+@IiNn%ZKJq%@0U4XC{1O4qsf1_!7Zeau}KgUO?7&hU8G? zvnU<+@6cjZ`4JM`bKjlZWZQ-%vQ~s^mRxvJ62}FL2z#fkKm?uMO)UgHD5uYqX=~W_ zxJVk;*AV%WT&>Rl+g1SjChv9vhP>TXK$?c6lL# z^fHL9OShy|Mj4d%?`P)qN94v4%NfS2iB*<4na=pC()>S042+}csJFT)wgQ0uenQ=2 zZD_?_*d=7(9ce+&Gxaw30<*8m#Tp4ZUaEHZO#{ju02JiS!lZ`!!_u68XNAt zl;>H!)Jv`PFXyxsL2|iMBhAiUgH|N-B8*!=UnI%86>}N*@p11Ng^SoQu=9TR5xbJG zzkZqNFm3+CMCf}f`AiyD=_00=_qeS|FKO(wl47p@wFiuddkNswd@$d`xprljaw0ri z`!P1Q%KI6AJy}e7$#G}iAHki>}>tMkLfl8KxwsmcAvOlsTJ;WNDwI^vs z!E;A}P24iNZ5Pt}d+WbmEFJV@k`6_7#axg{gwHV!-Uk=q+rbXrgMXS(z=jpRUD1n=*?4%MFv0-=)VP4 z8)YVyZkz(QzrJA}=icrp)y8Ntqt=$7l>mxfd_W}6Opf+r+SD8?S?Y;0{Ba%szB5^1 zwiZ;@YT+KGQ2%UNZQz(e@*V7V-h4CpL)%Nhq?57q75rl1?Y48II-fVgAppDEi$SJ) z4uGwauLXL;Gu8_~$Y)CWmRT2(eJ<*-;}NV#%At9w+eC(KBB5l8RmAK zvXY4bQ`}|C?>KJ)#dL1A3I%*K<3qo-v=boE zCMT-(`D;*O9FjXDm&h5GfMFw?6Jb*?aINm@s+F`z<4SrJg8yPG%WEZwHn$IMN?9S^ z5nQY$+M93;ya;yRM`HIi$t_AD(7+IzKj%`U$g&RUJ&CBd0TQhiCO4-6KPDUJUJeIo z?UiZL=n043YwG6_S;QxAk#@p`EEPB~fc8+dO7o>4O$Ng1(R$`BwLb7;!p7~ZkkvCe zl5fS2>8mk0{qch*ZUr^xtqA20q&xXG|_hJ)Kms^G3G)&>&aZGYKxt_(C9u*5gh6Kloi6VgDm z4N>ZKw~K%9LB}6zlcQ#F<$Bod21HPysGIKJz+z?DkX85fn&3%qe-p-KE8aM`teoG# zICWyK3NbZ;C>+PY%K=NtsRSnzLU8TO05AtW4Q5n9kVBAc>yBu$3 zEqYg#r*9_#hmrc};BhrHW@l$~o|lgeJb&kDc4rsy&iuH06UrF6XpM(P*+f6I++*Ic zfr{6OH7mQ|re$IAfR*US=j;auGi7-qZlANExP45a(PgGDJqAZcgsens1*Yj0E;MEh z)1=gNYcPeFqjnf;z+1%>7jTiCaAa5H$Dt@kKVlVyF&vZ7y0GhO(j9{l!)o^)Uc=(j=nG}`-_dI9shXJ7GHpW6zY;c*j(PJjC0p?W zcz%5ml+*jg;T5Tu@8brkdJKjCk}+I4x6jD$ZZtAr3%Y%fHkDpAf@#ceWm-s^mgLV~ z;##c6&iK-$F)3*oG%t`>Dk@(>%*t+ zgm34>%HDI|%u7sm&lJ6b3V@nM(Gr~Ij6)%Ax%Xcwwo;h z%_px0O?o@oL#7OFD)m43uDY%}DZ8I-=ALITMvFLq*JKqDS5c|@b?CSVwJhr;Lmr0e z?yiB4tRom5oq!e)>M{-l{`j^_sl)`tY$nv>{L9F%1(|Pb{SeUr3ybeA4HRXq?=9b{W%)hsPw> z24)JR-Ki_a_X~*!*-~LddgjEi>mviIrar6?@A0$GSI*f>KNgV;YV^LfBEHU<=jeY6 zABYINo@}5saVZt!fter#wNBg_Y&Ehkj(B|YLhqSG)$wA=E5cnJWwg{45|Uarlm7{n zfw?$|+oZg=FF?FC}y3>N(cLu6Z_+@%hv$lWIm4+ z$B-v?r?s%LrjIWq4)I>L1+M5f`fBUi#~e>SydNyN^}+S$WM5|*KTZ7hv1-7*21n?y zL!2x2-un@@=KctX2tvjT5VyRhpE}fUrh5;;ulj1wVc&|#0UbKwV|HbI9_vmdr4_8|SN>bS0ER!y&~gOh#CEq89V190I1 zq<{PwJh|%>>JPK=&1{mbM7MX8J8g1G){b@hf{mU+FMRjT_q{ZFdiThST&-cI)1r&j5gy6u`Pvd%adc?Ev8+`wt`HYI=};VHNhJgk z-A0$&NJV>5GiZ*PXkExAR0xffKS-;vSvWOv@kPLkG`yw^=^1g-LTBkZ+uAT|>5nu^ z%8Z2%rO|X}`o=byG?ywj*#KF`IzgLxMB(wDcwKh^06|l1<<~EpKzRWt`iHvG5 zdy9BE6Oa}h+udbw4k#P3H0w2ZE4gs#+5&OhJ)B?JDSGASY-&CKDqHEzuJK(R@0@EB z_U><#mPk-MT3hkemx}c&;=^p=DYYc-+vnX@;BnDur2?j<{QLc#Lffo=O{drfFtcQ- zSifW-J6>>yo|MaPB7)7Plu1yB$us>wqCC>i*!yoK1Jml&c30;IYkaZ#WsF-sI$8aq z$@Gmev3k|+vl?`>s!%DF4`tpR;1vqx#*aBo+7(KXC1u58gyA70P2F;^(dez%Yudt~ z%Iqp6%+(k6s`e0TA%O;I;cKYAuBmeLa9=yh7Y!5Pj#9IV7H!0NBciV8rDGsI(Ll+R z=O{=&+(2c^y6ZOq!Jm{^KwYwT℞>ePiIyXLI9FFaZ2|dE6ZyO1e{|>3y|G{}IZ} z%_{KmO5E$Ebf6XQf?Ryx{hOaQnzrh^{p%U0O8D_$5S;d>w3Np{hbTA_4)pY)rHa`~ z`ePf?epye;ZThQegF;N?l!D3FgbZX_sPriyg!ruuYl6!BQANm!><-k{uvig1q0QQa zz9dd!cinwb%)yUGcgolM6oWupG2ci-AY63|XCxsT#P+JY>LrQ|mXI-|E#g!UBd#>1 zuq`iIxm|k2m*%VjYCKX%@~o8&`17|^zrD#;dh}{S{~IX+>YFBY4Y~owzxmV+Ygi=F z?wnynys^-Vm@-!!56QEVyx(lbNJj_;q%TFa(NeMEUlZ$F5c7#N2)mQFcqObJA1 z_Ak$gYAt;*MF8k6LbeymTX*!8ZLr^WrXk-!e{}!(dlD_EYjgJw?aTqL|7m{;Ql4Lo znHQ(o1+eUR9S0-dM{H4?-o3nTw~o~QV}vrdFAAeYO?5MW7{Oy2?dZOElQvg*4*_r~ zpsX;8tg;z57O-csaBgzZkAobnrs1px+KlzM{Sx($F>CiP7yLIpQ!?mpx}9UnF&z9>ABuIOQ4(!>2j<`zg zB;)12O2NovPob^9$jKAuiQLMtaY%CPUMvwvvZ?7U2poVxG6&sqA?I97DbL;0d#Q^C z9!;U7Be0q=I|wmgzyh6Pim|M{qN8%#;I$M^RxbvT_LK7c=n3ss4%? zPlFiR=n1Ny=vpu*#8}e4y>0#7XL}iay^OW z(B}@x{4JC0i=Xzl9IGAMqT84~0WzqozlvQu;(F#&C+C?K$K%33iDRyB*vtRc#L4^h zP>V!>iP>oEkQ&=`c3-(6+pl5;lEB9mz>TD|u$b(yqraZ2{P`5muRl2-PlA8(_P=jG z9q07+-|IZjKgIt1;%IF4p?@>DuKwRw{>9=KVwJh=Y$}`8jAJu#?A>V{XVzvGh*g`0 zW6t8D1MHJCe~I`XtUsNLS@Mion$?XS(QVz+f-GT<^1ZZR*|*{CAM?GMZ*Jdf#^Ls^ zg0`>9f@mgZt>6Q?kS*Qya@l{X+=!gh6EC^_T_Sm<2b!iDz?h z8TWajw;$&-RHJU6nHfgyx9>s8&!ilB>7CZ+$M;kOuDizUvuWETj|3RuTLAwQ`cS$= z>*Og7MWa`JXE!$vV#S2YFF0vD{#rx`6`J&5iB}S*gfHuzfE-?_dTPA+#A z3=0^yx70O;x;F-3`}&K|PBcERAc@o7s^%jJ#KA97(MR}RS+EZ&pTuxhlzv`Md>UHy zYo_e~6gnn9UxyYt8S25N{aLx08zqFC_p3jD(&fA4B;C@r{T$oi=~|M$hphnZ${WkQ zI_t%8KnAV{?&H!@_mBQQe;`Zdrk$HanR=VWFu5`vk5r=xuVy2ol;R_{8!5ZH zwNdIuD}@1wF0Dm-dSi9iizQ@}jHutLAikhA1}AR0_fN0CT}((cKSh58t*qM2zHauW zhZx|OdiYKNR1B66dMDk{;LNT+bULiR!gNZI4d@DUqIiy(ng!_n)2~l~{-s0LF1|`Q zwFL@H!8{N;sC-uRhq<`poe@lg4_xu=u2!KxbUd2uiK5%IBUZ!<{Ok+*Wp+|?Vp1`M zpok-P44q~wc9!YG?4dc;(HB23X7lErR~x2?>vGW#1jx8^Rv6H3`6P3tF z_bf>+d%h&!x1F#1C3C$i>P49J*0Rqjwyqc!O4ac-q0 zKi=-^DORp$-1@mt%DE~JqYOINqWf<*h!sUX`e7rU!*igbSm=bK((B6i5P7aJfbGLe zXDXjJobC@Wc;~X7y-ZaSWb!uaW? zW!#K2G;wc;p0GwHi)qY`7>>{9Y*u+SFKN&bI=Ft=o|O_^}?5()WM&e;^MMa(J%ZEF8+@)K|aQ$-`@c))kVm*~8gs%Mr@ zt^c4lj3K&VbvAkFl_y-mH)pax_hf?&uRl+6vz_2;$+z&qg1`F#lBmT(^le)QOu?3! zELl&YP7N{=+|gcLid54Gi_7N53LD<5KfWJ7jipq2S(>Z{YuKd*Hc?o;t+i>U<-$x8mVRiSU+qibctS{&FG=e zp67o+5$AIzN3#HK`NLb_G?t&>!pN4QpkLo^12z97sa03T(C1wKUyT(BoVYyd1-swM z)7WRQ5wrcv@8qC$Uu39MOW^)ZG0E_)?nIPuq~WkNfVDK4a9Q|E!ix843*VHS4V^lk z*Qbo~ouX~#{Q%F}-N%Ho;y{=KMJmyYN4?#4>limz$FgqhQ4x(Er` zB|tR6+ig8uBGPf1Zb?uY7y4{$P&2;NO6m>VXRkpZ)(QNU*F$ZLMf~H+kXKYxnu$?v z=6O-WgfhkKL+aD?t?Q>xe||2-oDr__sIp;9Na(8X9o6_KKe{51me1)baVIb34s{rsw0k;4~p-dgZ zxVF)zh)}o|s!0j<8T>~|Gtgk-0QZkXfiD7NIo|ED5A2dVqsl6uuNL%qz0;bM(5mvB z*?H5tAHoukaxfX(yEJX;@hzauk177>&80K`bg6;r%_|$(H@wG_L$tZB~imton~Uo#~GUANUNQ z^8>f4q2~)pk$#x$k|+cAQQ)~ofZ7&oXj?Ek6ISOA1{6Y?)J%-3L?hbPQcy(Yr1~Gn zI*AeYYEh;J=4@+UW@!!TFVot;bQRLQDl0O}gnkDQ?CJJe$GOjP%)_0jT?56L%N(80*VNZ_lOpJ+L zk~17URQ$5jrFzFP(l0HnVAGx{euXKu9GqkeJhRO(P_VbIo5VexrDsg; zj(i(4$d~W(^pQ?|&w`)hs3$`+EvY*WiPA1~s2J49YSkQenp9ddZ;koKI?NM16p(B$NtG-LDKZFV?XaG(D{)3~>X zPnw|6PLBQKp1VE1qg`Rv<|~`J@}>j}rscar9XjZKuYznpI*)2pe~)S_1!iy4#$XqZ zKr>3E)gJjF1}tP#A|sgpxQGqHsFT9XS8My<9RUcD4bT4FZrfvNTTvXLrE z=nsY-4XJoCee~dLySshBZE2SW11-sBUy@MrX_E=(gGJ_2YZigvg^KXZi4t1icI1r8 zrFl7T&*h>%4f<@ ze(w?Cx_`i5UEzjUA~(v+HNoh;s^+zxBZ7)%@$BRKzdQ<%FKT#L&;c3!i%^>C5E`-@ z8hC#+!nN1Jq-$WB7@!GvkkO*FQbgzEHtL5?p^GZB7iQK~PYi9OBR2Hj0NiBUqPAyh zILM5d)^XPHwnL42QmR{Zx-ahZ)kcj*cQFT|@kEr(KjYLRC{n{>ciPDX+H;D0>WHz6 zeh)Q{544TQGdelD5Yls&rySdqcS-=O#5`)gAYHzoJ|H_myom` zC)cR66Yu1c5;fMejzWI!mf&0$`CaEtj|;mTL8E~-FB9RBnXS)t{r`cce7^=+PDp(2 z^_{rv1{i8dE^-*QcH`P8+mWs+;gh1##`n9Bhw8_LK~J*}+5ab`_+-z^c{L%i^1y$> zkCqV5Lqd>(KlV1W+8N}LapUM@)+QuVdI3rMH!8V&P($!{k(|#LoY3gsnWq|7_9BdNQmpYw(;L)Hjw_nbN^MOKPqh>AG-kYgRzl`vXP`(Pu#{nW*A z7leHt>6epUIIoj7otOw?Z5PimKY+H^?~A=~`R~u;FK3m8mL9v#JMZ~`w%0DTEtt3O zjkD>_E)%bJU`$Uof_h}9of*K!{BB2I1!IH?RCO*dcbhn@Aj|57F>>+RN9{kep0U3{ zHs;_yoc1xl3B;rZOV`&1mUmdAE{-$j)mihVoqjSXVPVKhcg*4}Y%iES*Nx*EyILXa z^VyrczpN*KITp}Lb)ji~g7?I??{>nNyON(G82EOEY@6;A@OKAZL`NgPHK8w9niHBn zn0R!bBc<=04L)($E1dLiKCg!0{nP*3WX|!h=bafzp&;f6h`C09un4$cU^e3zgE&Um zUnza86%1>o*_nlRj)AbH?~la(M8k$lL(rOogmrwdbQwQE|DH4&&AS=OUc*Imv@Pnp zfDuygijR6b9nnr_={YlrHlUbI%C!^C^Np!=rG4et7cVQKSAF8wNFVgy$?nF=?&>E6 ze$Dm*YQ9xeuLz9R@zt-?Pe|N2d`jU9@!#~Y{28b2s8bPgL!G70dv(rxVeT>!8AF{F zssoLecRE0fj`hy&%Z5&(-*(Q#o+D;0hs({s?>d7EtyODM@0xmUS%KSA6SRaaJ97dd z#Xi`d7_Ui zpFO;EC|$Yzb7uNwg5eu8e1)j+R&JV*rWRPz@Y>N!TZ9(dGAThGa2Ia>n|FfiRxqQ_ zB`Wk~%1yITEru4vqwTTPMBcGn{xW{oGcBC$UO!9xo$^ROwU;;jJYoKB3)Uw6d5Kc! zU)k9Q8pAryS=L>lcERW`@VXMFgpt;Bq=YJ9sj4a z|08kNdHzipx{u*k|B2oFTxk5MN+()16}&2h)UkT%ib%ao?Z0piK2FNK{wfLMu6&Yu zktOQV#y?KXuts=A= zBPQNYe9T@9Q97r@s%<;JKU-cwS;vztNjp>CXBuJ6{A+)U-_Xm%O-Gu`khGlo3iE)u zNqFX3zw=8KQ1g7dI)BCI$>4WET%BFk{KwhXKZ?>otF`XfaPB!B&rTz%OB#%B%c>_F zMP#eBSGYC>;Q<=bbNn2k3>R!4Q0PCRt$5WgP7|~ZH;c+&?eSO$Q8`q5=X^p?L}uZW zMZYYH&RuoX_)~*?+Oa!zp#mB&ZafI7$Q_q7Ebi-_2PX2)DWsStq^n&pNjqF=sw-VL zya#gSvN`WIGW>Q_qMTt(ir_(RNv$heMsf8F92dqTpaM&+jfU=<(Ev%ulN*mODk~_n zl+BThfm?0wwic3u${Ek^xNlOC4oBx?JcmMP4qvOC_|My!PF(YiM|Fy8R6(yk$T(Li z0>Trpt^RVH&d4Mr@_yB|2q$;x7TZc)a*fo@ZONhVTvoRSbhD-IeJ)mfZQLlQaAG1# z>Z;M;)A{_47F;O$!ehBaKb;hkT-clJlJI>EPit~h=dY@VOV}nw;uOM0=`TVq$|?A- zYRTz6>9eBNn~@HF(mSJ|thmyN>$B5ao@+YpdMbF^Aq&?sRc+1x%@Z!TFlf{x1sG3B zfcUrWHloV&hqwX+(CBB^9wnXa5n;FT#kfZb6{+@6=5CybN$>}lJ}oKti%$A*e;#u1 z$Ew!>322%RYp{NeLcj;KQueGP<8Z;~&0teMZH>URw0#4M14UXvToZOldLDhXGV@8o zoh3xQw1_>iv2P{W7hl(=p*^AG$ZKmZ>ZWKTj64H$;6uaP_Cx$W_`fFO3*I70M5ds85so!Kt&d*K@&nl+ zpSwkvh(+#-`5Wks%NpqZo_gYQRHuc8MT{iyuU5_VOV#PSE0mrS3;T8+8kt!QeaxQR zM2$gW&UD|yX7pQyEa`^zI)2iND9;c;&5f12Ru33=pCX)EoxwhkigSh07TiH4^aYC7b^h$8E|+~Sr#jOk_Pj!nrE9Hv zr>;#{HlO{04|q~jx8Jl}E6Ly6vqdfYIr_$r5VALTfJ zAw$0Uw%>3n3QPCo5kiXJ$@)i6aR#He`aq=9PEGo5nP!ZD>zNh&VU4Pnz1339!Te)= z>InKq$f0umad%h-I6!mB;6oOK%gW%c*?{^1ursT6Xi<;wO^R=y>-<5xWV`d4@|Wn2 z3S>2)vwhF!uZN&r0POG& zszwG(ga-1pQ5^_n-;Ua!Pkb17XDglk(ZoIIopQ|=Iz@3?*7C4zso^|`|VX{Ma@_hd@& zyRx4ha5$|%jLLIt}6*zTVShqyi0)3-(z3i}0FVA8~U`PEpL=4-(omq37sTuerOQ}?|sklH$y7e~OfPV66 zED!onbyHFO^E*-v>RucI^{2*YbsDh^PnF3Zw+=C#;rclW8V${?<`!fA=y(3AytJeV zh&`WWTp9a~BZCP0o0-qW(lH6U3M=kK*-EldwzawxSVISv6S?1ICq(?oWwV+0b_V{Z z58rrnHwdS7a;Nq_Lem9_T2MYcEYRAd3y~bw!iFySNy^Gn)HghLPn&)=!=EWBZMC?4 z-Ah4eq~vOBwU890aa=z5w8c2nu0`#Svr2TaFg<~`b5lSS7mi`FE+Y54?K_|A`7$x` z^#{0rn)^n5V{Uf#t?(y1p*Rv(o(dYR%pQoJXsy!DineAQpe`KN(16uE2psFT(tDme zbXxzC*D)*Cv!6KgINent*EL8*peNi&ojQ>L>X_cI5y-7Jnzy+7^ySB*8>vHf3hIkt zxkE7Ij^QQ)#?w_Nc!X^hx!nv)j;|{X?{I{}U~Hv$A01fSo`#IpV-kjF$ZvQDNrI&w`S(pzo{aEzNxK zV&H)pN&DbQq!j-gKSyow6l`uC0J639C1Kg@vorfiu!)&exR!=lfh(|?F77ba57=46 zY}8*iGkBt2GV!4Ha%AtTBX-q9^N452rp*xZ0TceyPEL8sOT5T-x$!mcNNcsZT31+#@ z`gy-Ek*`gp-&?`Dz|`mA@0!-eIaWos*1^FtS|&Tva05Fq_h|p1DJ5k^9LH7Cz|l{E zeOJJ`qparfYzzf)A3~$Kcb`gJ_lBeY@a^)zQ_^Ha4QS1V zQb|qjG_G>4lZkHp4_u}usH{IOBg!qd2>4SgxO4UBc+-d2A~`t9_PF_d9n1+~#HN~T z1zPOMw>6HLc&S2sKJM}&`yOAST)1Cbu?R{<<3$V4Q*lC*G8H@oQW>upJ9POWzOg*w zI=a1KQttRtRzZ*ADUOss$!J*YhaR!8o1jTn*5qc&zA`v1(i%*ZtcjeH+83gh8mWPA?y@{*o^`o-AbP>pn5VV5K=&VEVRl*5T< zWX<0lOZN`@|TeTEX6`lyUSwD&P9J0qyM!JffUD(5T_SH4jl zf4N3l&C>YZb;7^du0pd+&;d5j?6=q@eC6$1NDa*sI0stJp<+4 zp?UZ%Q%U1BL=12CU`;2QQ7nO_kGp*7?+H2@iyEBmMBi8;U*-8^p;1NA883OI1ZU{B z2bRz`Pi|^$8Qp_%oR3wq9sxQyM?gp!>v4pLZ7lC|bJrVM3b_*LaSjoIICVY|Nn-ZJ!6#B=8*HNo1-fE6#f9p$ zB=^$QnOM%k31=Yibow%Lsp_ldn*;~NUeE)Mg92t4ygu1`G6XhW*dR$!k|C(aY`FAD zamH?&BI#X-@*)Cms(#1Iu_2%j0!^~Q^lC`X>6i|Me2;7mcEm?QU#Wox@o((oKsF(P zKpqY|#MQRpdSw}R<5jv};xoGvTK*Vp4jdMQqJAaCdmK0-a4oj{4=$QX>19!>pKS%Z z(6ykvG+HkAVTB(t9%CtT&zQ%%UDpgoqHA&76l>;>937=6Di>>(!&=1Zl*_GLZ}#Um z&V6r_5)sCyL17ftby6hhQ~Zbd1WrWQW??x8Ee*ouuZl!|qv!>dkK}yZtPtGNx+`B6 zAPzj5BS&bGYb)MrgDr4JTQl_@_1jTCO&`z*RGhlTxGY%!17eaadPltWujfwBq1nmV z+1z*eg<>36gIp?w%qg}kEl=#QtCiWdzA1N38*(@re5nJT0^l8kn`89zq(y|WP5}^1 z#1_}XOs(A8_zHXLP9r2)@JgWhQBF`}p)pKt0W7WP9raP~_|@XDQHZ;UTw59AfZ##p z)I3sZ)Cb`uf_NtuZsxw}^6om9CI{)irFQa2Z`OB)2B9x3a^~4RG|4&9gv4iF6F%mPb+*Se|I3^gTT9kGvm^*<$0P9(A+Rt5i-zR?~pW%76Hte#QbU z%diE?f`wKW$cH$`sWzP{76BrQ$F4GMaHA)6_!{0`YRM5((!ryk?q$3a^Cj6~i>Fi| z176O5&mny{Ryce$ytVb@w}kA8zGo;i;FKY+EzM`>)^>A#F^Nlr(_k#*i`OV4Pi`@{ zPvk^!u*_*TI41TuKVT@2v>1tPyiK+Sa*mGl*#zJI03*Boh|RuuSv1?O$QZH69N4#g z?e9m{W+$33duJ>mo2NUci(w(>;dBx;`DNl^>&A1;d;w?g@k32EpDU)m1w+8M`AqoV z=U*&obI3AV1!@MEUTj+Cnt86GEB(YY(2BDO;|M26dU8o)L4AlTpYJ!jbL?!K`;}Ym zH^08rzAGtrU7IK*It(0frc=ju3=M z^+Y}f5}m&1@WcvaYIFt4cls_BbXZ6FCyf(!;Ag1tZ@M`CnL||ZpZQ!ut$$k>L=VRA zTH=qtQ_k2GbAQleI{)5xyS;Xus_H2?|y zzn8pUmFIu*JU=;}-_`z~k=gQx26+U=8r>Um{V%`F;hG=MaLBfHmPKH0_bM8ViP?$7 zMb8UE=9$wxV)X^loSiJ)>nB&tS&KN;3b%>bRa3||D0;nP6HnBCfxN%@eVsYiZuN`n zwuI4Bsn(7^X>TnUt(?tnT$}FuNjU%FnPr*7vf=9;AZoBPqoiT*87TN*_R)%t4&sLA z(8l4Oc&O37vcr(2qrYh6U&JVT(CEqY|Eb{rrA+^C=Ff5T{~sZ-C1KcZJ#}ca5fjbs zHvcwUx%}xTTdZ&#s~1BrhJ+F}QXG1dIZn*Mh* zeK{X=LfmzJE4y`>F>20SlpP>twQUOkWAwY!nSW>D+W(RWDC^s<0q#+p*#y~}6dKWn zjo;JhDK6lj^aO4>u7k!YGOQKA&ZYLPOR_$@CIz>=pWzC1yp!8@>L6QHiw0K3X;&(u z5n7QW>9R7>K6Wwm1rW<^9Y`q!?eV&)k2sXf6_z+rZ*dM=M_pz?$KR}hB~8cQG)%=; z+zUl~U=0WfYoGi6Okx^PyJXi6AtymR!^%C8&o|uCJgbMZWd`8I@m<}dwMiA0ahI6} znIAZ_DPHGuR(&|6y}}a-B+|e%wOqWjMkZ2Yx6g((+5NiW^FZ)BMt8R#CI-6&=)|^? z`$$_%YY<7xvA?91wPG=CSAm^-AGv7mx%qLR>|&)v9%*WO^R*gm1GrLE6HY7~>wm`y z)LmRHyaYRfv+I_X?kyd&j}7PXI(2>YBVVcLpoQ)k!Oe>2Yo4Yn&Xylt@y894JNN9cS zi~wn&jRvD-iY-jyj?lmX<(}mS0G|D6DA%TJFh-PCIZ|UY5b2Sah}m&D?L4t5p0iYi z?*@Y+tgBF|mEtUI`ihO~SmTmUBD!kQv!^%WQ`oQIOn}WFgjZZ7gT;rVs zu*=ue`fi@MYilc-@BkT5J(|@J(BI++HY|wQ)vBxNYHhqlq)rZArtaw`_P9Mz+4J!_h{oK4c}?{KhqFW2T;* zg7@64z^uvF2|8dCy5T~*vO^woz}OvmYY+38E#57nkLt85_|U$TMVM@~Az1mXQ<9GJ zW!YgTWvyurX-g`^zVO7DijAETei+*qc~KQo)@QS%VGDv?SxspwU;+<%SLA;P43p;s zHA71-$_vyUPq!ILKzr3WI&l+~%2x5bRO?O9%YdN6#GIBK>(xF#3GoV1 zc)#vR=1M!FwW4qGmg$1b!r5S$RN}3-(@N^HWem=OkKG%|cLMSkzRP)MDH^DKp^jF- z)h5?R%U_!AZ5*;E`MU#?aT*#H@n*32yg2{Upsmff<1dlBz+kn83Is?TyP#2CXYGmA z`qD#>a_0~qp=6iu?h$94-=u@v)*}1mcJ5O_DFlmALWybc-xB>civx|f7Z$6GIRBYJ z4*0FA3LgmswT3~XYoW53PtmP?718Vdv^VRr)p9-_9>36WP6HvJS%=mMIa&pKO=E09 znL4Mn!>(tJ3Lp7qw#iTU>CJz`Vh(@Jzmx2%Z(%)lc;kz1vGvos=Q{Bv3uTya93o|? z`-A?cgW(+)Uj~$Dti@D@{dG$r*kgqJ(IK}@#}}3%2qFirNq>u42#~**vr%)co?+|R zbNw2$#9%7kbiyuFr4T4BC=9R|+l?3K@~l>iDv!KgD?18r%&!xjDSpF1eSNzzk;M$N z1{4n1!4N@KW-S>4Nk~#GCbX_QA{S25B5SPB=%O`aU*H@jgVrG3+hO43mw5c`g3|O` zImpg%OZbWP=CONS(|=DlM-L`1T^|~_SgD*xYLf|otn0P0>_Sq`%b7=WltSf~QasU0 z5*-Tk7#VK)Q+Vex%5!8=xB*}RB&yfw95y$!yWXrA0d~cQO(GxE7}s`Y+@O$sTU8!< zjPOAyAA>7kMz)ip`40oWuO{&H5Vnur*eH;(#mlDcd{;9Wo9!!cHbtX1glR7n*EFoF z)ppQ_&EEu1?mE;P2dM3yc2g0jR)=G9z!;^PF#?69^G)ZsotXYiD0Ysv*psVix3L4c;l_%ua zqq5VBfkCZ5|6#^HgN?2tIYsOHNj&y!VkquSgwj6CR>4CBF?t0=y$lDbE33mE#kU5g z3yh_EU8CmgYLzQU-tAxSmH5t_gn8CBY)h%DSlZVP`}s*!UH6gVo-K@w_N6eK?%Ty` z<)+fhs~(q6Ju-R&mHs?F+p}foE3J>m1x7#6nuG`NPzD8DzAz#E1e+xepOyW(d+A2| zjFzGxn{nzJ6(&zi$JZNl#G}H{TKT>4F{>wqwQaC088`W9*EDDEl|tYA_@9ui=j7`y z0u)A)NHs)p_a(YRdZFw5yQq6%`Qux;3B$@Ts`Gcur1yw8XZOx+gj(T>tyV>kxCYr* zrXHV_N_<%jZrRkWhu3Lm+~ci=r|GMpLsl2;eX-*UaIzEMP>6Hdk#zujt0|S6$a%W zsxL!bj*f{7=!2%H?lcE^jFfIvO&|UksVhw-%Uxrn?A0}L4t8}j5q9R0dw4gUty^4x zOT=jCu_D)x1eu%S(t?`@NPxvgg4c&|?K~Oh!#|yFsE!em=Gc%(8-EUkxTtI8#6Z~QMUPKhK=fb);9-Zz+ z#L4L`mHiiu)cMrn%Vj<{4o1b6n)zsL960U{T`ZBwxs+1|6&p0dluQ*u3XyY5)}Ind zKvB9Lu}?B9;HZVQo46Zs^>u0LVJKCT96XSwI{p{NdhuLt>XLU7;JRT~^wl<9xgHdw zyZwM9IjJ>wUCFb{M}F zJyi`S!sDgnxgK@p?-SJtj~eLqEZ^n9!%JTMe9!lX8V>$(A=p7p-fx1ijNt~u03TlQQie_;+njLU|dVX#q&xK$tOZG z`D%df$0F6}%U*rmo?h&ST~~$UMMn>OYFggaXX!PSA~q-wMM+#DEl{&|5L`?7Q`CL( z{}M*L_Cz8HxC-m{?%Hed2U_Frc#Gp?CA>AM!e3nH9M-hoq{BnVGXfY{qxs@r(E^yfo`Ygx+<%#hjJ?>n9Rhi<<%Tt z3tarPZiW};(<`sYAk^X%j#fHjkS8Cw3z_n?xha|5JG3m+DGGjdA8LzNb`;RN9AgW&pj;`qYso4_yX!&w*uR3gJ^D7n;) z)T46DK^W!$l|LT#!v&&CIdsB$xn*#{ggy?GKts+%_eRRHKP6LEmEvbRH|jK-wp=UA z>$f(9#HsJvS0bLVM~9WpJgR@z^~1tMOmx0W$y_gh{k=~U5V)C)05;{%h_Pe~zG+N# z-z{7JP^^hi)5Nkq2}jg=wJOWR;cL)%-c=6P?=fvUC zg~~|l&oGD9Sn`Zj0e`tug*;?otlDbP))5xXZM~_fpx>Z#X=DL$F}wl;WsIV}(?2LW zj0VYTd6``x;NmBI9H-1nb_2i?FkZ%HB$Kmmzmaf|S&HIX0RX$EZzg++Ci@_kl+qK^ zd-z51kr+QdV3GfErvDMF!?fO_o)SWcU>9MEJivyQ`o>J`SmjyRbgsvta_3EqznRN@ zE!7?$F|+%|W?5Wj**YX_q1FV`JqD2(TLzTzzTkqfuCCd-7E$4kiPdY*Po+bejD2`r zEW4}m0~yjC&`fmQOhf)+jivHC9*mgkbICh-pvEG0bGM2Ky8esLjf7qqZ!qW9pq8%C z*C>8GFOcPqRQNYgo^am^v*_)k=WDDlwIjn!MypLBCb~jb7)P17U|JUO@xx6mFUTQs z20~fAvj+7rrv~6~@f2cueGF)-qB6`fF!mxnc4K8!M6-xS{8qh*jv;wR6c7uQ{?IpG zIS<@#uxk891hPreyyv{8%1OKAsbL@YW)>^r8Aa4&ft|mSSN64Jfl4DLQH;Q8)fDV)L?E<+7W-S5Z(m8}CpzvZ}Dx z$~#k=j=lsT;?nLfY15G^Ntn+KbIf2uT_$Bbr{)$s$?qCPo+Gv9mhBa3+CXyyAC z)d#`l9z$7efw^U*xM_uDT?@Ocy*zl6o{J#Zwvt7;C8BQ0IrFT<3Z|Dk73%x=96oJn zRr3_Yaw(Ay<#RYpC?z>_&d3ya$781H!D4!5 zwp5@`J#oDgk*0T+Wyk6~-wxaWpn+fOhv1hRmr4DjDv#i?!&*25Wq9L|_AK`!_UGAN#)iq|&2j*eZrqBO2l|Juc+T}7ORdPZ za5n}iHdb!l$~-OMytXv#quA6GF-Aj8#4ixmy(IK=GV5i|Fr9&)dLN3kHO-}zG#%D% zUSCgd)8orPEsHD;*lz0d0=s;5$->B3Wo(Yo+zi#oYgV|YIT=V8?ql<{OYC{(yt)Q= zBbX9jWHtsq52s8t-XqMrSQZBOC6p_raOP$c_TWXsjifJ2#nU;s124(&fIEm96K&c@ zyK@IvoVlB-+MDe?lYNyohOo|x=+oIP2jNNNe0W_u@~h9?q9wO{lN-m$%cIFZPoE5* z+UxVkWHuTbNxblO!1JtSmlCiyt)AGI-`#8p!1AhN)gsnKl^-UUFDQdj+oSa(a4HLJpDbPwJCdgg|mqd8h9!O8=zr2F7??*KCcg{h?z;h@{E_& zJQGEHeR|}+l0@{qX$J3PM(v!BB~$Pj@P#|IWq-sUQ(JG^eJfFk)4yk>lNfz@8rS|5 zr*GzGJc&&{a3?c1hE%xZR+u_Zpg97}rSBTwyjmjXZ+=k(r-BN{miCkBql&vl;8Q%@y&f`OLwOW3xQ4<<%lbo1I z^dm$_ynaG+DiEyF0b6W>iGp2yrhf173TY% z^^aDzduQLPuyTLsqybf52LAX5*t% z!3JR_rf!eJ=x?Q2(AH1v$jqVxo+TEsY|FT*$6PRI*2hmelh|PhsoOlZ|8jZaLe{t5 z>q8F~lEET*vlq(G1J|jQ7CjMPcfRh9bzBbRFchz4*6Q%a0Hk)eV9M(_3?@szA`kx& zV-(yV=A)`wwqQJ2-7Cb)YkhYRehAXZX7;$kGfcBxL!v~ACNV!WOL2fwJ-7c_k^WgqT6E>)7)?t8 zlD6j$9Tpq(wk%DaeHD3Uk1u(6^;2m+nYED-y-4xQW=^;Ky`-u7Uh=6cGbKD^;t?Z! zY#$xTx<+*37W>x}6c>-4z}Dv|~15MDaO4?8yG; zbf<~{3L{yeIl;Lw`UEZh4#L6Wd~;amho^6OUxtzUVw|!{zU*F$REvSU#Ikh31Mr6( zJQYl1GkIKQ9q`+i+)Hg>Lh08S2I7`z7eD)4glw^q!(1d-C=p2I=nouMLW8|9Ix^(1 zw!;0~=)jylhbv^l>|+CPyF*l*KDNI1yl;f&HDzi3!C!~zv~0{Phxn#+jD%l$M}DO8 zf!gX~9rDoawj%R1*|pE+@%@|ji9EcBi3ehC)0M+egSLkF8%(D)JdVN&1CIz;npF5e z78TTx@dEpd15iUM7fT_{+GB6ov|&vpwk=^^vAa2A`&@mq&iF3@!BBn#_g;6|r-?hJ zJ$!u6E#a?_J0zyKt~j}*Vu68zE^E2XUD2vHcL*-lBDk%>hKoRJbGunKhfTqH`b6Rr1h%raGGj8sqU7!{-&-j$_9FWSipI zdxYhU(~l@jim=90Ml^$FStewfNk6t-ezyDejWjSt#q3&zs;53?pOd2&}QKO z=Of!>dQ2}8xHHgi|KKNX_&1;4;?Neix+rlLa<&}6L3U7|0+{KSz)hh0qa^0JZWpBkhK9zHU1Z|eeD8(upaskl=XiC*ZF^r2>mbr%Xcc`628C|RO{il zf!)ewC2+sYvh|yh_aU?QvR^<}OX%t^hlV(_uubVjAJa+;%lglJy5Nm?GRDMfyQN0Y zmkxB)cWpPgfH!CK0Gu&Kxy`M2cEx9e1teaI5JnN?yfb6Fm*?Hc+9HW9<~PAj+T!P` zHZFiLQ#ljBZG=Z?sm;`r?jGz3Nz&#&ns^|zV2J)tU=vDWVk?VgPXT!6v14jI52OVt z>W*%qsG$>;ODbUC20zG&J0+^AOP|#@;uXtx(I0SH1dLxoP%r=0_Y%1E=>-#Ij6_4;bCs4V#BD~1k2B=xLcGnK0=OS&%H#sTMZBb^GqQhi?ei?CDt_f*Czy4Bm|t(? z@W5XuE4J)?W$Q6sKZ=Jhg}d$!qhjCui&GOhL3`FbSLiOi1x%B_Pl7CrR%3WbkKv2;=5%(TU#oQJnz_h$L@>Sn9Kqi9Pd1QOP3t02J6PG(j}OzTypu`O|Y{SE_Qt;L9fO zgn+aKyS^Kugxqw;C*|IlN$D*|+iJ8p3{RYC;3hs`tA^KE4oQ>fXz3`?FbNQ%zoUx# z^8=7;AFg!?Mr&ZYN`jYX%Hz^p8m*OX-OZFX!1UdU*j#E?jHZ5|IM(K8`J74XHn2hT z^k!D(G{`8>`oI<-5(1sR5;Ba#xS=na>xp^!cQ;M@79PxRb2E}te(RAw*s!|Da}0QU zhfhtydFg=i_R&N;@_yi$z*kaT;@s_}+dw9uhgMdt-YZXed$axIH0*UY*F)Tj4h98B z9YClh{HRh%Iw!@S!SNw}wDlKAh)|wCPZJn=GBMzSow~yObG4};c-0N0BY0UGMRB?Z zDpi?n|Js_v>Ys8;vmh7lD$b_3du_eBm+l0#`_o?%To?-9$o<7#IFo+QM~>>JL|dO> zVPT{D`*bD+6&Bu_^7E;0 zi~5;CE;(b59Z1XGhIbj0jM#wVKF0cM&u9PI%zR+c z`S3AeA5*t!R?K|(HT%$_Grrvv+2uqjl`@b35~iu=dum(?zv511nT*Yq+y#G9peY=p zwz|Bkhk*shVYRQa)X9ZFj*20Ga3)h5+6o2reJf$_-F`4d!gmwsb12Idfwe%kZp4`i zbvM|Xs7BJ+?n(K5%tR1M!1vW=SwP)6T!oDlC34Vnr4)J?ko71Q+u5>>%G{@XwRlGM zGr2r;^@?&6OL`S9{?m5>yRSfuH38_&UGSVj@d*O@s=H{=&uZMUILD5V1GPy*LR z7sdg(=wu&79Xlk8r=3hf7%E)Hui@^!b1=ll3cS0$mewN(Avd^9*6ZUp*!vZGhC+bW z6p6$MoKkz0xW`zZFZHA-g0KlfHP5^*Pb$k}R(PzS}h zfc{hD+3;~`0QsREh9*=Vi;{&kulBN(QF^B4oD0byzFcE5JBBh#l|2a31mcN@$p#3L zQS_y+tGa~&SRba%oge#@K+@xtrAmSNcVlSYZ)A!Z0uP64N%V9Lh_+oIEJ~w(Vxn9* zw;AeO*u@NwWi=CG>RDXJse~`ASA>D(8^D-o^4qlm3IyMoQf1;-i-Ydff?NtgRn0{> zu|b`2vv`5|zN!HiX#nhv)I0we{drKZw;ZDvL~A{p6EI8+6E!!t(V_NN@um`ve6C_Y zS%!X&269jTEwAq-^>_dmbQ%TgDoD?N*!J*O*LS7T?GM&bOPZbEQz8 zni}DTD+On4JB}b;F&fWQd5tmVe&jwbm@eXv&!(J{2)b_3vwJU($3xKVN$(@Vwhzun zuCQ%plfjE!qoEg$RIKZgBUY#|=9zt=8Hv8EJA|`!z7FjKUm}DYjr-t003xUTs|?Er zd7t5b!@BP(xG%$lJSCo#nF-gkl)a>^ua^T;d0Hsj26|+-9Z>%G<115J~ALuzxfSM0e6TsC7N-GZ7)bZ=4Q** zQ*mBkw5m2AG|T%yGU4U*CTnPI+5jVL*30EiA)st?q9p)E=)agC-~)?KhQmWoO7`(9 z`V{l2V8?u?$$~y@b1TbBpj(x18M31`88m2opPx%%y}S>usICh|0kIjlmHja3EJi|X zUk86~5ePb$j+6#gqaxYu6hlg5j4O-LJM3K^E_duv6@+f)O3pV=t5hX4W%Q~wbx{`< zfb;X%floqO3HkglIEN8WQd=U%xMU^23HTZF3&I~XZg?I01BHH=g(!KSdU4&-=}qHk z#o;WZK)D`AH`)Yx{PYHDbMEGy9E(sXF~X9{qldGBxHHXTGa&$Qc3kAB4bZk&ML&SQ z-KtZ!nKB8DV3#kn)+n_Sv&+Jrk8r|bJi(!!cgbW}yhd;^6uuD21m59U|1k$0m+vlH zb38U}nE^c8XkW;>?>+^6T5)|0WIYz5FVP*qVOqSuweCh5(YX31R;_ZFej?rEapB@N z_)5#Z#Hcxw8CopO=G=#WUjAUEPYc4HoNOV0K-1h1i@W32ITYumI>U?C3k- zdhz{X4KAbT7Y-lln8sH>kdwn^TUM{e{HfO9Hse85Y%(N&uUh^nmJD_x_Pn?fCol5B zyIl5xTJMBw)}2~v!-Tpk)Sv%x*l5V$72V@+@x^*6E-a(`?8LYH$AKQq#e!_7UqVd|bx`F6s#FnE%K1j@Vj()0L z)l1V?OTj|ZK-X|CC@;^djZj)XCxz}GFaCTcO5W3QeeK4EWlL1?wo(`MEYvv%aqj{cB?Cw;4X&1)Lp-P~Oy z$FYd-j@WZx?Tv2b(XkLHk%RGB(G}Ejs9w5b^RaTcpE}x@ikl%j#n(+mdltsUjmIqL zYwjQEAfIArsw7jbkw62lL9IEK4SKX4jsVvwY@G>oNm<8JF8lI=GfgvuPLuDDMU4=5 z{i_WTp<pXy?=V=Y+0&bqvLIgmv=mG>Aw$ouFT#A1FT&Cf^wu zV3})FZ+v0@qnJ=?Uf4d3y;UX?a&Y+Kl6cgX{qEWJGKenI^Ii=0VL|q?mzDgwt`D6J zrJm0Yv=cu6jCRA-7oPBb*o7@@uavc=s~)xIZ}(Q6PyE zgQeXA=_9ZL!1*XmY%YW8is(#50JbRy3bDxM!9X(4{Aux2pZQ2X;r~tv8(Nq6t0?v$S zw;liIm{sB~4no&uI} z!GdtD&&^yEze!eE8K9)_OasHbV*OAv2OL?1 zQ6MC_=sn`^Aq+Ip-KZ5-epgOwHsRZ_SsRev98TbBEooXEY`nBZhYV4{eh;m>TpPykH0e3*n?~rW;ZOIa#D2N-KdkO6l}Rp>)==Hg2=fm$M9j&NPyH%@BB8VS2T zjyDG2prc$RM%rP88p>a!2J$kF_-P}rkYxZ~Li{k%WE5a&95K+h`kF_-;*;W_FUMZ9 zD$hMdg2cTTfke9nma~doGNZ3M1RCo(C@8XhEbk|U?(<>&6 zc%f>W?7Ieho0_|p*Bd$MBR-Zh@j2&0Mz*nK;8RSqN1_3? zQz~EeMJsK7IAy;{IB+%{`!ZA<8mIcaUIwwKIH{p;r69B9Q$dNs`2gA#THi=%a|EUG#{>F zW_6T6@w=6X=}>!He*UzMFleN&%I9_r&CSSZSKF{cb4^8sn}}zb_jtuI>EPgn#pF3V zESD3YAz>I_R1JYtb(^0Y-Daj?rpcSNHMq-^9&c0>BOvkVPevHE+Q&w^9bC46VEWh? z^YEg<<@>NuFva(2*H`17^9*)Z$|^?sk3w4_;&@hMv7f`?)syR^K5oFT`ed)PFmWrM zm5Jn70VK95!nJEP?1J=>!EmFRnqeIIq~G1Q2XL+ctRu?(OjK zjZWszMfi1i@5R5mxnGTL;991=L)$Me%y*w{_Po~(H`H*wT3}Q^-P^JqC<3xA#mZur z2O|;A>S&VLJDT?EeZ#}0z1t(v$M~Zkr$BfN8G=8l$f-FGPm#=rUhX{T|%)Lgk z+$Zm6q881)Spop>SDy@@VAUiICp@*%YZJRD(h(h~%4yw<$w{1i)vI=$CHqv^W3=P=_5qMGu)S(d zQLeRi@K=gl-N!@g(;uk9qo;IdG~s{G{fgWxDZt2yMd^K z;+E?1Tq{7_Dwzpuv;LgAz>m|k9TKWl`I<40?)Nr6=LYGDTe;uS z_b3MzF{a=}9=HK~ge#xa6#6HGY_EmwR32Ch=+Iy=a1OkI#`Q5Auy(!|(rks#*5HUJ z;~$}D8>j`JHLew4SjBKJeq|hJwfnJs1x%$D0hGL2Dp&$(YO)I8tGT`f{|k`~nzJM0 ze9VMqsC?s!0Kx6C3J?TEZPq}78=c^7Y1K{EMBi~+S#p!ocnvZ6^~yUp1m}ip(640> zGoxJEM}*1WC?M^3_j-~n+?`3?4ylr+UAqLb5RXcV<(gMNxH;sjz1;vMlmU7RzUMLE zhz+}59J3%GrziuT3_EqhljZsN`j2H~Zf89|co@z5O(Xw#+%!PD@Ff@hFe%VsUVn$^ zGygj;kLDitfC1m`d24x6%c^1#W{~l$oaC)bT`QMlWVuZ`Er_!#B_ihJU2^7tv}bAo zJ{H7{9YgifD`Yic>K**R(X;XoLl}40A{ouMASYozNS-SJ2APw-f#yQ?Ob=_Kfr%H` zx6l9#V1t(~db|MzA~|;vYa`h{7LOLjv8Em#`xIPuz^=PKFQ1`$aDUE|-A8Wp*eDlw z_3E>_+yzYi38EyQ`u-`-{-MRMWd{#&!hTJ^E~op0`u#NBJ(?h-5)cH%BMsh_Z&kK1 zQk{!gFOqQM4tKi`7SmzGGWvrLWS%>-%sD^Y3gB<+Ip;@?akPw@u||Gx(*0wA;#539&Qipf(#&!{nZ!#(6t)eUF@*9-1`BPucE_;?ice5&Z8$-;gVJP zZMw4cS0b==r(O4kYiZ?(SPBO3mSzA?jOuJ+Oyr1YJd`DUx9}?I?yIAqY68!X`L2!c zpRJFlyc{7!Xd3?>X|)KA55C(b>AFv|>@31wi_gGlB&y zG`kTuKJ=uVANju7PAb=ivS0LkP-tqIV`l*1Yc*vMqsbr=Fs7{QYON1eLv$esKQ+Zb zjtr3me}s0Gd(|E{K}@2dfDR9~qS2$;!ENat{ampriI(~%%3b66czt8A5bbUZ%OafJ zEuxwXS}Q5lnXD3B4zoy!=4!Fk8$QwbhYpF#$alGKgY(G_rJsV~J1Qs&`tM-nU{VS~ zSbX;0fX-vcT6gfo|Trx&XS(z9AgKWAr^bz8%M-xlj~G4` z>SN|zoCW+)6tIgylWHfkuF*WCeQl2vxjy@x<#{Pjr9rQ#|0BnVXy+ixVnf$zT7a6i z{&u}FbGMlr%tnm*A#nYu8xcO;vNEBWesw(UBz4k!B_wyXrNsHke4vrb!;eRXRL=fr zv&C-Q#G6d!&v+#(JgLc8nGpIjCjlVkZ4X?6n+Ra)5AMw3BG^w52mJp`+wOK}E(*^U zt@w#R`=ly3YO$ey)YK9AUf5c+pSe~2=du9+MEvwmWyf~W%}Dw-BE79D&-`215kS2+ zfmX5J?{$F8PfA_gcBz@YaRL=M~P*_h=01QL6=@TBZ7z1bZoK-Kl>ng2QWgb08`9zVja7(vS? z-8giB9gJTM_8`#&NwnNO%l9{?j*d$uD7j}CR1F2aPb=DMPt%dUfyA=9{fwul|KNXsp`_y|=!YpGB{W>_3(C@_LS^v~CK75$0gBf*>lO7B2Ya zJm+$}7(S{$VUR(Vox1hhrc#3Ii~;=puuMx&(W@N-OzeACn#`|D7RL;P1Rr~I^^n=l z1Iour&EdhMKi!x8Zspe6TTcx>*s$8m+T91FPAqRr#mvB(W~tPo(@H(FdgKX}yjk;; zNZDVq(=9s{T#k85XRsuT|6!2OCS1437X;y1DN;iO1=D3WEzI^;ct3Ij$@Bw|L#<%i zf7l{u1_g7U+qOlZg;-BnfP4vJUzIrMm&Xv*C(bnpi`SG}3413zd1#dc0wrzI%4f-$ z*tlWWUz)v78H2anUOzs~b0uaZ^I+74duMVl`!!8Jf9u-~H2|IYiWhJrlrMA)=x68v z-G9-bYCbs7?tp&ihfs9b`00P}oqP)0oe)U~GutTo1CVsi2Xs3^7KSpeS&xyOc;@YR zkVeiRr2jB9`zo_}OmE5>Y<)OgevR}s&DTDxqUDAL^AJf7xb|j|bY5E#t%yPxb0yg@itsUtO9Pbmg)8M&4&$ zr^xh0T1*nB6-i9j9rSQ-d7>v0CEEc4P3&GHzW{ncAp@Zc;ogtT0?0WsMQW87Y=d72 z;8R09?`8^g38LcJnY12K#_dV~fobW1D(STU5kDKYZZnIW`rA`x$9*Mdwtbjr4YcMA zOhO^`Z0HU3290%4HglGSNOwZN0_+a-{X-jL3aHQ5hYz}UzwveNP z1bY}l%#A-!&ivXez6_a~>xQYk$aqmX4xA-~iW|`ta8)E3ffV6yr;29)amSIbB2Jgq(jBw0dv%XBBJh>S<4S+8EJ`#^0o1rfnd-F zTY`um!}0oYt_RoM4fhXJcgF=wvo#XT#wo~nVvYDH8TD)?+j=U=O#E{#Q4ww&b~7j9 zBwh@jRHp_u@$)8k^iun~@#;Dy!_Z7xi&cV7w?TnQ?0>o#EJ^jJPDtjvz4AC?bG;zA z2=D46cbvJ0chm;z2mbTiIe*VTJIEj&2YxirFSq%wb&kg6ax+E2-mH900);OfkGq)| zqC&}lbnqoRE%3nO>rzfBnzcmt8(4CO0*oWh(f#9IMhTl-{=)+w6tEREtaoQNfBTeG zylsxmxUZxEWGLrXz#4zdQU}H3R!?sng}7cu3a?`iYGvig(Zf2`qOuft^MN;)t|Qmm zbPlF2C_g&({pyzvlI){`YbU*;gi8ctm^jgAMbeQ$V-rfN~Y`58)O)Ex4I{`I`~M%kl0rAP2ber{V< z)bK{5amx5##Po|Nk{(6GgPY0yc@|i3C}K`&s4URaxZKFZY(t>LzjfQW;De54PFLIZ z-3yn43YYz>mJ+N4@S*&4apHiMs~I)?ufJdPsYVB+4-zT+prtnWRzc%+AN6k; zqBYL9qybJiIwR(j7`cp+Hw|eHZv#d2bpVq9lb-bYz zS`<1M1eX~z=ltO(1_^~N144KvXPG5MlVNVk%;wB~C&|KPDK50Q&s}7x9tTv5O^$AX ziE;?}P7eOpM0-pW!jtyA%DU!V9&nDjsnW3VCFe!?_&#*}@GHU+SI7PAT;II?EmNkzNE|7O1X69mT&kUjJ&b+=*%sf3Y-i$NalDtPDb_C$0f={5zgZJ zUxH7zVw@_}-25XII`Wgl=5F{Q7(p~fZe*|FP>%0yN5jnsUo6_j<77ThQU!W1VQ_?y zd29xb^LC2JZF}w%6sC5>aJl~|P_{`Tx-Yo(^-+M0u{2@4%U5l5{%9P+cBqJvkOglf+b#j{Hqx3Y1(zv7-yOpKFS zu8XV*`7Ba07xX#V)un;88}R?|)*gOR%3GlT9DTb-#ID11)-$PLW(gz$Gnc+{09&krA!r@`;Lk1eZ zKE*$Tf`S1Bnw{3&lv{w20Xc7$^xNBM2m7+_#^IXl)75dzpMC=y+xZ<9FQj9grO-|K zb)(0Sqr3nOba&maZ`Y0=yMJBrS4izxdZr>TQHQr}`a2h0cJp%Be7Dp3KYsixTLO&c zPX2E;O#@C^G#%dwKl%b7pflpfvWBdPZ95m}Y_-F@STlNlvqLbzO~CNyv62F2ZT^#5 z-ADFC{-jnzwkQzBuVW>^(AN6rv2xoNVOzs#+#Q~zoCu-oaF)s-ED8s72>PAViC>Re zEN#O>sH3AY9{}zqX!;I;@>AB#zm)(urOkT_?efdDTcl#U}_77hI zpHVKq@m~fBf9ZAEJvCaB+?pVHlJ0LYNgNYV$WGJHk@c(*Z{WfuZ{+8e8o>=4zk=P4du+=KjAak+j z-B!L#6F=rAsuJk^^4*xAMoPKYqJ)|HyDSKY`C#n+o+Cp&*WA=0OS>UUZv5l#71}Wm zFX>rXjjucYt{we$jQBUTdiCC0wg=(H8my7DnVswIZ>gXH&o(q+#Q5y*H2!T>O6xf!{oQ005Cp zex9%=ZI!q0XqtuY8&xGikmM|PGjec-RVe1J~SCu-F>Mp{!ST4s@ zn5C^>d$>${<-=j%{kFVBZl4gvANa3obH6pRj`DgOJ!|CqLRRcqTYyx26HHcON<~!s z;iC*?|5qp7r^sIUrfO&RXM~aVN@<7zgchP-ElL5UU7I3uZp;vzp*<{jJ~3 z_1`@y0D0={gSOgFo`jsUT>AP8w-gk(Txl;YQ|w~hYAZg}W8Qf2mVN7?^rwzs=d{?`2yo`3{LuMIdp+w$NN&)y4oyqF0Q`o+zi z()QFE3qM5+{(eFEQXD7WCUAzOvn61c>jcT~K>IXL{-$u7z*nI{k29fU+0 z+wbTg>!ykz=#)G_(;wB*uHHtoCi{gN=U&OZsnAHhXK`=C&ga3N3%7L*9osl6bS;Z$ z>~iM%?Y*7@iGU7g8NE7DL))sC&4ml2%M8Vmj+^+p*ZRSPtX||()F*d@gl7qz=0x3) zrTg{UHD@sX_-S>pP%Q#s=5IKW1&kG-zOdRSlTB#2dO6VjDngfxobyVY{wjb^vom`gD+1WTCM|JvW<(#wEOOqk^-hNR8eki(O)=DKu#s6+} zRISf3?(#dQut&w9wqYWC_p#3E_Q@>2njP8q0ii3UbR~qFDTBDCaW`L>uPz2v=3 zh0y(ih|iI|N08fUwbFqN5|;O+eVew4=Gwr^fR6HVzegs~$HI|!^HtqzR-Bc+6sZTE zS`~UnY^i;IuBUe{)Ky1U>hqfF%b>S=Ye9itq{Se+IqhQuj;DHbmZSjiZP(+y2o>Sl zKr8yCJbqT?ouxG9Vd`UT-NAl@w(E}Q^aZE*?!!+}fKLA1-cV9B(e-(me3cnp;%o)@GC$sUyx^O(3^%rT7ur~{As8VIn96bl{5#nzJR^caJ&@Kk0S@9nrw4FB7#dMnOunJ73lm_AH zCTo@xY4nb{*ayCkxan*6QZ&UkD$IF9x^@6^^al!dSl(#ix+n>%EQSW|m0h;aEovAu zAq;;RNc(Z&`Fd*Qa{uJn$s4a4k&mZxzbi7HSXHa_nsDvk$sHv$Q zrIl02KI5`SKsk^Az(G(P!|WSis?o3mFQ?*8*_DEws(B$3rn% zA2ssNy4`c509=uHQz3s6nYFcXJt8+lGB$m-1-bOP>?zt8=o$54xLMg_A{=u3Z615F-D#j(nN$^DDeT<>k>gTwiv zY}DK-LYdJpW9BIJsD)d0Ggy)^{IJFAs{GcDv~5EG*ujj-4ZC-ZsJ5?a$94f!nu3$W z`wwrqFPG_#K`|-kZfS*+pHAr;QkM2S68I^~~YwZ9eclr9r+GF`ms~}m(AGWj`3JOs9#YLg*)8ITINCwr+VaXIj@$!H_VKQs9u zPs@p7+-S;=Q5$p${g{?p&@k!(?J3_)To0l2$$pbfPD#@`;>^0u*baI9zigi9X^E?Q zzvFl<%kSBJAc*Io+Z`va52}x0-{K%YBL-j`asn=!-aef<8&SM-J3&JQ#HE!tZvGWR zdT-RyYU91@fn4bQ-Cuz@U>3&6a4DR{^U&_}HUk7S@cFGQE7c6RBUq8Or)rLRamTZ` z;wwbY!K=sLOn;oHM99tAeFglm&5`whh@hyq8`ft*&zIgt5pgMv&D)uB6|~qP^Q*7insZw7q;c??el|BkEc; z(3IanpwGUjT#asDEH#-l9M;P1{zL65=MQu8ca*Ef_}WFH-sIH@#^n@h1gkpWtB4Es zd1|Z)UWeOgSVB~kr~zISuQYM}6gM3c<@eMK2z=?s9t!QUu6uVN3gb*`O(x;-+YSqV z3)b`VN{_rDRzA$ZRN5p{PI^LtlP;8VN&?@ZR>%iKiM0QR7r+hsLjGkQ!nXwLgGVa= zT+8R%zg*^@-#z+u0OD$E=aNF^>D7EyYii%&J}Re+4}b4nzIZA$?bjVQV%JtaiV#=j{RRa%F01aq`jJOh&!l^u{nozm$ zqmbYxDnZ2o&6%PE@N~pKCvkRC!s1lvkKl3FYccLj#p29Z^jsr;)Cq7(-T}Lp+i3f2 z<@K2p1rNLM%YQ)KCaJi%aG|T^<#UJBj)t2Tj4bw;zBHY+A(l9lKbhaZcDPjtG$E~Z7t4*2#(nwe5-D$WVh!wxXtHkI^Xj58LoPB zTr)~q<(<938N0wffH$6bBqeKW?ur%nMju2KPzR|sVpcgkHrCZ-D4Vq9n8WN%6nqy? zv@Vq6T>eM{G-y1y#V)8> z{eY7~h(#rzhwI>3mtB+v_*tYx6+Z&7MuD8-JEjGWE38aw^*R8XeTtg0Fq!1e54f)X z?%9s#(qy;j#kGXsu2X3RLFe(FD_$oTc5hn(1ltQ+5KW6Jy|OBKRP$6pC+E~;JTIzH zUwtSvOfpoC<{f@1%X6Q5E&Q9O)epF{yjm;D({Slstr5_u7&2(Gc?5852@PJ;Q_5iG zRQtz9PM+L_OaSa1qI<^!_df;NN2pHO`e*bF*O0#B_x=y|-UF)1>}~tCj#vf;5s*F- z+JK0FG$FwPWK<9tdRMv>X(EsWB6hkcRWVUeKte!D2p|Ea_lWcsq=p)52nhtvMxB3o z-|s!&Io~>KoprwRt~H`gFnQR|y`R1JeckuZic&ABQsR)|@qIFgK00d;O)%XE=Uamj6BG zxua^iS4`?`6$7!ui;z8G9r~jA4cb0=sSCYG7-;p{F`xH_*w2&6NER!O>4y#?+>)I}JVBNVCr08yH^S&lA5$aE>4gt+%O=5D|vzC+|Mt2z3#ad7^bQqPM zQ7&gZQ2`y0Tl+NYKW>w$d}+cSGU@vEvc<3x>i7-<0FE-GhG^FxVCAsVZ_4@ zqgRXK*27eFv_mlb>CZCT_fgb{r0{J4Ma2Veg(Wu2`X{QcOJZgUH(lGZ(`&PmrvhSr zQ3|n6-^jl6$W#c)nPZB@1gLAuN@bHMTNwJ}?r$euRbMsP6LTWMt;QgLpLQG^YQSAw z2u8N_`5*7Ld-VN^m$J5-Wc5&%%){bGOoTOhM3@((Tgb{0IMbA-@#?eBcR7KPeycI3 z(euaf{HvF8zoskIZpW>tmCFN$U;8LP<0_o;l2www)yI8aFVdDlGuR|Q2^VKHbA8ch z_g7TRec$T-kvOmbPGl~=-U`r>b9CLbY}k4s3YIO=Uq8mZLw=~7NvFs*2t5KN5$_c| zkNT(t1}Q6E^K9~#YqsF}07wf<4d>0alG`iZlB|1*3@4WRR}e^rPuu`2cfdOZRrHXv zV7Vvt{ax=d-60vKhmsmBW3SY+}r7#WSQGohHl2z_{9jKq4w-`6MD~r051=3 zj-M?HDpt{%%Y8fVo;~3+bt^mdXq50dsPDCq(o-q@Ax@MSpW7X~OAedgI=d?t@D@0H z_`xA*SD-_ked=y4o*&sAp+7mAfykQln%Da%OT|)VJ>7f%pCA6ROsQ~w8kE7342mx2 zTW}Zv&94{w60c{^rmu&Pe5Xi?y#5QDEBaseiPx5hF4=ggslHFciJnPos5Ju^)_n|RQ?#c38cuPYc`@0Tl=cw@auO9{= z!aDbi7Q9s86RXswrOc)iXHBJ+6q|r9TXVi|3jLT|tJ8Nj>Me1IK0J_qRy4fM zi@bg?JO@fO03H?1qJbd$rb^BKLFl!oc zZOua`u!%j2bF{U03L*c7*Ik99-%>Be1oE2W2U+M3mTD}6pH z*&^D!M$UHjgnblpEqNwa{bL3&b9Fi+YGjY&DhwJg#I#!Ol=}+nyj)`q5A`conR?lU zgUo1a*rfPEU&IZ;VM@h2QpbXT3bTp>QQXhXh@R^zs}J#NE}rRJcLRxU6^21rP;)I?%&{I02}2|JnUtldBW| zIDSiH&z}9&!r$bKU*~?2QuM3rA9B=#nnxLOd3i#F$Wg9C(ZF3Bb<=4u9#zzEuq}(Q zY0-ebeGW(S9}v>?ABA|tHAnaN?Vaxfee7baB76N!z*?}>$m<)I!|yY9v>YmQPS|d> zFm^D5UM1w_Hff78b3TIYQ29%)!%$ZVPQvEGt$BePcV3#0PnBNXNUVD!D5k~?p8b|b zDlBuYDgGkZymZ?sk1~*9_DZ?Vl?zf;Qs-D#NUHYT z!UsAXy34FVo5^u0zRz-!;yB;i9ZTmjU%$Wj{HT$ojP8;da!>2oWj%SgSG`23NyS?#D0;}<^v{knRgl%st!0CC2hJN13dXUvV14Z8%Zv&P_94$ z!-FUmQb&I^NF4iR-R-JF>gWsoP}?`4%(-7Fb|GP#xe}B6$H+umCr<-i@?Y9c{c>6qx4Aa0*%+-28#TGW z)GpBxSEZD0CtNOOyL2Hdm71C0?e!|eI%t@j^U)NjJvUHqKKTRNGMT5voWA|XP|vR^ zbzC_A<@rIcP2`e}uF%DY{l}a;ivcq&8=eODMlILfc}ot9wVnM9G8@I(+zlX(R0 zS!L~gQ&ilh!_*oo)bU>W{3dmaLnuUqA;L1LS&xA zwB$?c%J24$EV1vtk_Nr;{s9q~qd$GV4e|7y=3kKv?idZ@-Md?+6RG+IFAO? zAP~U8?&(TJlI~|N7Qm@%JI@I-;Sz{BW_E4d1F9Etp@*O# zXaDP3E+p38oEokCAREeFLCxyxmkI@{_joDF1q$CJ|49=G>YL-K2t_iPQHslFbL|2H zft|>%VGi+zX5VAIqvzhC=^wrKKyR=AMe?)Jj=y;$3j_CFcaP=F-7-k+Pt)Z-Pz$;i zcV)3JIKCt62Fk<#Ihc6^PzofnJ8XwQDyJ_+vcAlce~?q0uH z+Mt8?x0nJYU{{;Iz7o?t@^Ya2Q6UkH^O?vPI~Y4B1MxWTtJoy7)+#=lHERD_wWhG` z+v6tXgXAdn55Ql3QBYA4buL5QXP^Dy{sXaxDhj(h-JnfEe%QQ-hq0(`5kNLY#dCdp zou8nFrBCdHe4J`tOM=5_4%_dxe1rY+<~l{YAVw?n)y)+S3|TZf0m-70KbCpNj`vZ^ zxLxoM@oNp+W2`^zhCGrgpSCv}lMLV+Yd>!Kfv1iSo=^QY(PrIew&%^hZndPu>C?<7 z(e{mxBA@id-_2@WLdyNl46skVC6HjuHnk+${l3WL# ztwSkQ_h}82lODA@w22DEXiq<0Sg5s7jMyGM7V!``gA=LaA?Ln1p~`>(qA<5hpLeZF@iBVEaF z^ibfS4DR!$JhQ^dF>&*yVjPJ6jOGbQj!naO5W`+<2`^J`FfDm$-K{=F=O5 zBaJGTOSN9S)#z7FWA~{I2F4cjFc2+I_TthjK#qhrFeLA6nc;g;`@?vYQ>3NGj3=~0 zY65A#+~5j*3CO(gjSoozBW|KTJF5rit4+bx=xAR_4c`l^gjWO&%!-Ltb{=5URCyS9 zNC*$Iu72Z=%hm78taVG12)uvH{%p*gh&6149Dm9B33bMfF`|qvotoe2L|Jde&O$6LF9Iz6y}p{UVYocEu`o z6qqGJ20jVTg4$&WZhZ8=)6rgnyxXzPayhj<7rxidtmY*^yecJlw7&P*rX?67Ni?nP z!kIqtjakaZ-e`PJhmZ%fy1C*I*r(yGqYaXuJuELKKKm+~K0J;XG}1hi)Rpu*YltVA zZ)s3gbs`{epS>KkQV2XqEClEaQOQ%VoTcZn>%!BFbhnmtQDzq(m}90M#sRt_%kSla zf9%rQsVo30aCU+O&HeQeUJ9PCy+Q}f#&eRhNMG0%8)XZQFR*yh$A?s;6+gUpRt&7V z))}ee`Y{r>(VdwUA|eomQ8Y47lw1lNtFDS;RI~J;x$}(S z_&`D77^GZ8{$)PxJF0*-=vM_$1t}4J3C15UHor#%*}~P{aHBdp9o$9^-0Qvmeri%pl_NN7LGw+>~KnetMrl{ob;dbq4+g51VSCutfRiSo6j(a*94Vk-RZE_8E3^ zV=4zxym(etZ16rgtj>5#WIo}&p&(I)XH7PB==O5z$D6m}8-#0h9a{|)@&mBPEf14= z-v-C}E%!xjv~Z)q{~bmUAH-d2YmQ?*1wXngQ0`;>j|s(o=+V}C+*gpkQ9$z@KC65+TaL<; zZ5WbJn{;eTt52?W5;R#Mgd0W3MfLtq;#44F;mf48o3*S45&4Yks1)j+vAl+>rJZMW7!oFtFVWsEf% zI13KjIXrFNDZM!LZ8{J;oLK2e(9J-UpX$PM1));F#1i-voo_jeXdj8B`Yf1F-N z73sr*oC@#Q_eXGP+<4|a6-rR(zSsSXoZe(IAEC4yc?jLjUH+>T&NaNOs;wr>)Cda} zYe;xz-Rz$^OZ4{T%$6LLfUf>{?OL|^Mmq#c_iX!)T|!eh+>PUVY+`q&s*eO`PJ^YQ zBGt0*HoKvGobD?j#tq-RNUgOoBloy!*u##FmB$EFyjw%$eo?~WAoCS+!19Bx{!9}C zI;>a9pBloMiG~dfq}N$xSQ~KfY5Q*9UW$xKdo9!hKYEg1%vE@oas>xHM?THdgS#fc zl1n~UCi9%iJ?}HY#uR03NEo#Z#DtRI6=veb3;nRXs^;<6)8pf>B_H89p_p+ZU+}_(~wvYbdQ0~y8N43n#A=jEk z-9gXPigeB9YfH+ZH_ju5Uz?MOC z@fhJ7buW%8&-jX&ZRd{1krApOw;tiT?i2q!`da|QA;}+%+K0?mIe#<#7PsM^ASNcn z=}3sZ_e2N8W<;#XF};mMcNZRo;pZXtd?s$%Uy_5-&P|qQLfz9(!g?^vvS2UVQ&qpA zPb&l-ZQ9t6^;p!cJV$NBYm(yHrOx!)?w{)m?isRj2L-VeG*Jw__PsbJ7O zmO29QpUXc*onI!GWY8_}Vj+_rbhl9DCcwej^Y^@yeDgmVgdd;-p&5+t+2uJ+--!%} zR&e${UhHFhjr*hR83CjE6lTA*;niRADA#3C9niC(f#XdE@b!{;jX#+v>T%K>nS0GA zAz4OrqFOXkZ}birg`MfNn0=r-LatV41i6s)dNi%4?|4BERxi$e?q+(uZl773|Nc-| ziUuEf5^RqmaDMQ1kJydAC-!AxR12yHk*Y9*qi&}oySUeKKboEcW<%7R6wLFI!doJr z9!T#q-`7Tpyo2+!Nun?QuEL5Y#M-)$H`j$Oq@Sl>wINM=?k|JFN?q-&)<;%auqt!! zr7KOBj+`)Be0y=<1#MP6cZ4_Mu&U3Bd>oNNb{DZq9^ z1tdZhpL<2;H;lHnjBhu*$X2(C9aejWOC8yn44~pmyw2!O0vGaR|55ewt+>Gzux}n| z0z(BpAS70FsM4lJ{pCmI3cUM^Oof7XZRyEkPbS@Ty`hVb>kRgZI}UjEIcFtA;(uY1pv*#i0Ux$cY) z2|n{FSor=ZT+mv^%2fWE%I|1slN)roR+A%~wsLvYE?hGroFR$DNgg82 zQ9=d>&J?u8DwWa9b`ti`t9%o()S;D`j|bY?)tqC#z0~neU*7-49ch!~H8M=O75!e@ z`ank%IkqmF7bS5l7)^d+Odg#vaJ}-U5btZ(DEbp}^l2-y?Sq=Z81sIgnl6X5NOMSI zxwHuKp*3U4?kl7cRC8?&D2Hy#i1NEi564FB1*U_gm-TNiSr#W;vxl}JdrTtJ>2uKI z|G0^<%Qs?m%k1ezPszG{b`^3o@WbZD><67x=_b?2 z&JD!mxmA5|`FN zWBz8D!WkWao681Z^7!j+b_k&l^@sb+%K2zG$b<+ZPd4D?JkL6ugaX?_)tkO6C7)b(;-Q_WUc3671tquqtTEcVnFD2!q~@yc?(-PJB*nKXwj z6@_}>Vt(&2C1`iQX_}Y?)}7fq=JG;Cp|t!78I3oId)}xU z>vDAY?x_?fkQ)GF!A)w9|E)-rIPo3-YuB#yHMqtQKYdl-LnFO{rEGeNr>Ar6xSHjv zWRA)moIl8aEya;<#yHMptDM62ij#XNj&x&LI#1g^uNxm5-fylrYl-s% z?)1lN)(1a7){U{u#_>K5@RHygdIPnB+X0*5A7gKs^OUQeBH284N=U%4(vUu{%LGoA zEg_O9nUloE4N3b3-ym}TY5}VcJ5b;rwVa}cC8f2xN7g^fCb8HFquxG`{cU}u0#{vM zc^TT>&sLae>M;p?*o4qvCUqyHu1cZ1NOl?S#xd0Fo)q923i$220vqOZQe=8A+v#%V z$_rVpD}TAr7&&Ng_`GwcG@J6BgP{7b{HdJr0OGL^V4YZUp6QDbom%+RZctzE)?AFT zaXSKPHskk2OEo4;Z>0t~xpeVVkb0D?U&c%UV;lmtOZDZqsf_e!w9}l^a%9}wjxj^I z6MYV#*t6K7myd>ptnYSO@Q0(+)xn8DFJu6Wt-;3N>dCbsc@(D!L-czt%m0kFA8zy` zF*BjC1<{=iwCsh+>adyP;ni{kX{s7|LVAMSZT!Ch>;XG>S)fa9ySM+rnM_k_ zlb#0l=Nv>8k&z6Z0>x8h{nw&1VI3Cm($h>_-AV!{ju!wtT8Wb;z_>8*_GXpq>VoJn zImvF^HSGga6S)ro<4GC%lB?IcozSHZ?G^@3v}+u3{YsOU#MPErcU zkeNZVs~1G_FFy@0KUdK!m}L3=so>xvEyn5K-RjjZ^myT`C{%MT3w5`(#eKzH7}|3X zKOxKCk-R_`NQmArrE>cX4l1<w{Kp(O%=nfmVT-HP0H zVO$%(g4*%J|3)>asm-$eC51W4HA3Voy1`b;Xd?zlTS;qf(d*C+_qTa)9A#cv_ z$f)beu6G+3&uZmx0*rg!T~smvAS8^CQ~$}!7^zI*`0(3*l)V2%fBWwj!R-8B9t8E2 z@2V)*)DKm3ck+S1+fbHofLaKFzw07?)I>aQzuxqd=-x7pvu7tQgtB|KnH%U~@xSlT zf5k_*sGl0UujaUYpY^e0=82f(>stk}L{&pPmuVV+fy1rLZyD@N@=*k{(AI(g z9(AThmoQVkX*+bY9zDlS`W4v@Z16W;1BEo*-{-9;UPM9DUB0ryAv)IY_7MIFD@ad2 zXEb9IAQbxTtX6Varc~h8K!zo5>e0$G_H-o;+<64p1G;Sy;epE|zxSVMj(yk-r;TN8 z+qGjvmFs+VaNSnEm&Y;*6>8g_wSTdN18#A9HNgU%Y+Kz7C#rq7;{q^7=yZyFhrfNa zCg{o_0n%-?5)|&5(;mPZqFD$tt?#0ionylWD8QU2HA<7SMbcZH#l{1@&}f(Ob;c4- zUx@DJ_uA>~ftIFYHP%y>UC>+oQF=U#Yy%SY0MJMyq2bTxx;8g~dwuEZgJS3Y7WkKP zxl6e-J_8X{*O^8jmpWI5zU6X_$^DqX{8G<$NaI}5_?)W`=;tEm9c{bf@T0jNvc}PU z0xtNE-%Z6^A3ww>KZ}iG?PINf!-Ys8f?`U*eEXPto&i$ZlPNq z<)Or+^J!RiFD$=bgto>`+WVV~FeuIj&Y&SyKpT4lXuH7j8f@}QmNmQD-8(QEohu~D zx+RuaQUH~2iy6rV3;NAd4moGU6t$JGA4@89+gu5HtCmrfq20=7w(y&ZS31%H?KXp;6w~Y+ooaG%9mdiT&|x z^DSO7Ii$mXJWn7G5*tf|%(axZ_2l-Ch0e`NIUTVR8$NC|^wv00v`R1T2#zCdRo8ptW7b?`4MHuUhjN@}qs6i~F;Ht?1Tydc*r9dO$4rQ+Iz z`S;0+k8pl^`tf4KNkuBEiQC$(oaE`jMH>me;lb(u(#i|lF3R|$mVSttgWZpC>~y?s zOOgdbn{JA7|HF+}{aq0^G14YpUqdLgY;>vj@|zY2Iba5^fEkA_HM}SX`Pt_O;Tna% z@3sObK)q|Hg+T&r-TG1;pB-G`E_ENSN==HL6WuVIrJ$f@5N+3jy#Z%4;2W(5SAg86uGEccUiPWZk zi0P~40eo?bQ(f6dIe2Z2yy$dwX$)mRji&jcM;=A%IuPrf9`d3O6CvPOq6Bos3x6ik z*9rwzrUF(U{K7b2TNlwcKh5lnf})*lNlM^njgIfDqUKWrF>)s46+|0`iwMfEh0X^X zY2gTW%)yaEX+*}APvTr8C{o7n5(^s5)>-2aH9UjLZj!0~$7$%Fi7v%MLv zN6B%75(oK`%DR!7`bq8<;0w7UaA`F?X3sy9&+lozRjbWEJ^GEhgzw74^>>WMfy9vw z@WJmROBCq}gvY|K*@T}x=8Xulv~8%GT_LIW^)gdDD+I?)z8#mryb&lK9I_4;6-nf&8?-b&y!@X26(onfALR4CFEoo1YZp?#?dtHdEYHa4}uo<7as*`;qv|C)|Rz)+a8KI z47ZE$+1eaLO?vkbFe$C~sL^2>M>)!!Un9*oLFeNG3SNwv(*|fIM{ouLBc*G{E zL(g@f=*O2PeqZLC$Y@uM^~S?&yD)sQJ8I!&MPE6_G34aOT1t5tvDl>uzMgcEgg`}E zc{oJI1<;#7f>>Iw$#cZsb9J+KAik$-Ze@gTnOI_fN0LU(OEd%-fHZ@CwRz9W>YGAu z=45D#C&~$n`%{{;DJ81?8HeO8hEG~`vbFXy29ys>mfxZXeiu!dk;$Lx(GtGiI|HFb zoBie+AEy%fNbbj2FdfpU-2GhEIw{KeZV%wOf7FLuq93T>%XR4n!DSvE9=m!23t>D5 zdTEAMNfoY zsiMw169Whi;WGX%q7&Xmu{mcUO|j#KWw~homX%)3uY8PG?JIATXF)jyxwgyo5gcHc zoc@T=<6VoYMSRJO zM(q1#_ej2CbS}25BF;}fEh?v~a^(4YF~p-5{sK?k-2U5sqhBt&4{cjDcJa}WFOuZf zZk@V7rv=w>6sqo7h@A-!;9r}uAxsB76ZeRIx1py{+7F77th2*Gm z#1d&F;0)vvJ<t2{kX%#yFQxW`HBNYd9`o-ZWY~_aUIy{? z^u7z{Eh`z?(}Je=3CT5dv%8&Cqm0X(T1!uFj!?^1KWci>PihqOr$4lK0|O8Ne&1wo z)eH0>>ngve4HH&}TYo|XRLhT7^J;vY&f0!KJRKdk5kFb}V)j$>A+KK!eWJ-JCE)7p zSC76q#8B?itfIJjzVVG@Ik|Tjm21m)l!X#nrGOCT_UiF=)0wLp)AP}M2g1)t)H)gSkxWwegcD-dx84m~Pl<}TwgzKdQzorE z96c%)3Nr3321C`7T9IE&=WGMz{LJ)kV}ld9kbT_cp@b9Zsztf3+Ib+=%w62y;n;mV zx5aX&zQ^}ShdKg=%b%;o*=| z7@5xQ3k|(F4WNolys20}zzw4}X$7M)9#sb0;@4JDA1|<@muFT)&)forzL&R`72AVS zZIuH-zIfNL1)^>8JkihH@Gq-Ni2xyOx=h=BAw0PG1W1JNYYueZo-q)eCc)&~q z>f)eF$DkVuIsUe8Ax|G>F9|E0nI5T7dT}?^cFZ-I1fuXUOq7Rbg)*I?Qkl@nveOtv z7EmyONh*>k&+PJQ_J(W>u$|O^NKtAZvsoN8?b7HMuC2t}#!Tgkt5$5_!nGe{6e<(7?2--X%w2LjiCHhyJ6{OzqB+IAt!eaSmR44_&oyB;zTDT z;9)gr@jEy-l`(eM)9JjKXP08xlb?q8kK()s&Bo_IDowI3ONnW#@MU6o+5BLSpm!@4 z1k-37b(#BREyZH`cYQqdMe#h~UT^nB*-1fq}5qrR3zP(sRw@_IQi2G2slh;fi2|v~~+D zn=D5w$k2&zKmWE08^866>yDsml@8FOrUk|cfk>_(SR&~bp+u!^YW7FXRPuJNNZD;Mz@?WT@^A4+5)xy7_)Nh!&qIpJj?H< z??&+WS(yR98O*ndVcABete%>PH^c4QuVE1VNRs&uX5itE`FK}mJ3AoCHqG<2<3U|rqAPEMOkD^8^G z#y!`ZKNig8CYgn~*TJSe4D?UdDx=h4Nzvc7#$XxPv6V!w*mYN%r06g1!1BJ%#~B>< z1n_|ee-+JiVx1g=rLVvh%$%00`eygS7HGJ_ex7k9L?TL`+&DHiU$SR7vNI8rm$y>~ z3Ta3>Lo+h>s8~t4MD8RN3}R$m2&j(_H_E%)IZC6!zHV7?2H%(f{BA#q1k~iGdp5rx z%3rQpyWZD-O;7Dr`yCwA&b2xFtRDZ;Kg{xX)Y^><{YqDx4?|#KWW%-1u|%KB;|BK_ zuyk>6^>pzsCsAY2{>Pf2@yl3JNkH4Ak%%bCdg=Hz2Vqsh70Y7tK|D5(D0dIZf<#zA|ZA<-MIXJ4IgVm)O-2z+@Xlj}M$L zBD#Bgt7uV401sgUUdOf z%-9Ry-3?mNTic|SD!8JfI(8&&wFq+Hgnfj)udTmnYaDELf`>L>t!R){A){SjPeo0F z9zD?w(o%@zZ#-%~a1q}*nSsOOBMmHbA=GbJ#76mQ<>}NQJKdC$x5bMF$WK(e< zm`1iTH(c`xb!4AAL-?A2xOWDfx53_v1$cTc?ok#SNT>^-ixko z6~x}cJEFn4&5ncwemrOf`vob2e48WPI&%FxT)=FR9P|;lfAX{L?sM}EqhHKK(jAox zZETX9wf#gTR|l8;pgttv%>-uMm}#IDVy6kVqImO$2ro2{9%Kx z1XDfgY99x<>7k_MgP8=&C+jxdD}n7-Qmy80X8QA=FbdM|?RfkVIJ5hn5e=tcrODZu zRUx5w#78|G-^zn#K$_Y3MKLTy>QE)6nz4Z|kOv`ws#JGKM_!DT1Lb(Z8Obu6h*%64 zXhrMKQ^_>7WHdr1E0ofq$R``lP?wr8&1EoK^2}khA|16@tTk1?w$!c<;O9QZE(C+6 z3}(+vg%?JE1cYqQ7))z93KLsPEfAnq!!fgzw5KwlH_fMhYX4~sxA*}oeOj;8uB9U( z6vs1e8&z$G18%n?@0ohZq|<*;CA6bFNlm1gJ(Y-#zC$JHqhFM#C498`h`VC@*tuqSh$pf!0(GisdVtlEgAk6rpN&>EXRU+E3Q$lV z6rG%08{I_I8@{`&(bvy!t>tInHNqf|&w#qzA4eI?b2my+EKyxfFG$mhpB_|JC{LV8J9Ig5ogIJSW zhsl5ViGEYWFE3pm|9D;c`x;f z4=WC zd6>oy%PAz+IboRksob?fOJK%JFRWk7qdL3-Hy>QyB^_<={R$UFd`EOK8wXNb04{i% z8CGv<=|LkplmYT%LU~(u{SrZW^yB9i70SwpD6(g$3CtSUj%J)ck4LwL*OR(g<|0OV z6S#{ue7BcT!@gh?XVV{Vq}APsOb2Nw&{-F_a%055XBo;Mn<0yRurv4+%Xy<=M`6Mp zS+K9q7G_5(&nGQUm>v0Aky*Nuk8jFOjrAg&8Vxqeh)|$ezi}_uV&hli@Wc^0>ko_p==uY{pP8s+Dz2s8ZH|2xS!1q zeQ!|lNQDm`o}BAyHuh?uV|E+tpw#aQ*T2%y_So&Q)TlA`Vk`P}X+qBUL;gg5x4P%? zc8l~{i~V+a@JNH0Jmj$=|HZg}MG^}4alP&4fob@!_Tv?XdrLH#yXx@k$+A&ueX29yeoTpDvoxq(K`) z#IOYD?|-dW(oNxI7u5r$lxhM?sz2r6om^L-%iG~-U=lz$3JSsY^`VQ`_SP?G7S<6_ z>dE;uv&rh-IUXJrPDR*@SS>wd5Bk)Chqyh?VM?T@laDefO5q#o7;6C;K8Qowq;lOf zC+y_%CLLwiW3t5o^j@P)aNLIq9?r|GjZ(BpfKbnptDTl_v|fp_6b(#Srtp>ZG`qK0 z2TWV>+OJy1#F2vMYRLlgOX_mZhuw~xxuXQtPI_PW=SD?RkzV7L(;=jY$6qBo~q2c9%(^lFawqE*A8c(-) ztcNOnW9s~5-_DFc2Mb%7wFcCMq&r{UV|A(E+Mcnk!Gdl*?(t~n8YRel*2zM`^!VOD za%TqRgr_N}3g{1l*I$L@OL9cje2TMB#EFjNdo(X77j~~FavcCWj;1VaWUUUKic)vC z96tsfAH&oX=Ola>4ltlES`1YWv^PoL+sVFu5BrgGFqKiH_5AdRVsR2W5>Ol!i)(Wf zZQ_8nPJ61M{~5n2#__L8&_e!;6Ne#y^nR+nzz(^}#3;HGZeG#v>}An0jcDW;Q5Fnh zbPZ<;%3qFFdY9--3OX}`Cb1En=t7`bvk@wZp&^}}73RRK_`v0~@eA9-_RTP&Q61&xPJBPdlz`^D)@7xqY%Psj{0s9%Dyk`5Dp4 znCI;)xB@6{hC4s(Jy%zd`_Ox`dmyETzTsAB4&Vhmj+soqO3kx(vJJz}V-B>w>5j;a zJ&nUy>}4_jOnpHBk!%4nZlEnigzYWmo|o5MS%>uAJSHpV&zDgKpWMjx$kwfykw^a} zls0H~i`PSUThuaer#|8cZM-_H>HVf@MdsPGHoGm6RIk`NbyVTmPcAM|BkrjomvkJT z*Xuim*0^T-Kklx^9M7gsmnu*C>LE>Xt*GgaZ`bMo`I1*Q9gz+FlTlu<9RXyGV;cSr;KJ98tF*xvz z6Ku$KX&quNnr`pqy9)=tQTs~v{u_Rvop?|Xl@gD*EB~LH@pigvf3&!w#JA2e8nF7i z{&sBpA7QuGC02=BfPr&ihu5;He#=d_+ z2|NNW@PEH@_XMau`1>!{IcjwN{`0Qmd;a^M{5=@|cSr2oHb6cUx%W6vkV5x=JaF`g z;FTS}prq*K*ZStaxiqbZ|bdt27(zC_BzBm_>S33CuT_^0{GHbUHvU}J6 zNAKR<8=pv8@;}YhEJ43f4=!*&~H5a;_at(o<6%YKJsh!*i>ur z@rycr3-Eh4INT*Wdhwge*SorS7RSfRGL(+0$BudnuRYb)u_tW(e1~uOHs6T`hynK` zwV2NC-&7<7>t5bg;;$C%!w%JX?u)jMdUZr`X}f-S>v?P`x$(f*3!TzFwX*@(xe=t;+1Lyd#nc^=H?nTnRk=T&o`+Q3MB7hy>gJu<9l8vVk z7A-d-W%#{&SRtnVRUzJ<2|p+lD5xC%Xa-&$%+J1{uT_`z#2iBFsZ?>u zMTA1X#su-GV9Z&{vjVf>xqsi%x_2hh2M-+OcG^<4LP-JC!3Bq~t@!r+D(oC$hSi08 z+R8I#!9)Ds0n_&E+yd7p#dMt{h_Fau&)mXEM5Hq7=zLUY*W@DvH~)T;5e97q-(*;@D~(n<*zwt%vkoKqa4;dFqxQk?y$mg8fQz z(l{-0QFWGTDRKJYy17z#q~u8OV@2GF&sV%6^0gvZxQ&bIaw(e{7$acgKV}T6QB7&m zJh~?FDLTe$)6l_v`%>5r^56wP9^e4-K!^Mf-3kO_iH zo^OAc_M4r=t{~{v5z9+HGu)$(JSVmT)~1s2b9vg1o#pb9XH=I4WoEr-L-vrvn(cfk zEK%=2W`;M-UTSa8B~N3tq@W#8<*Z-ZF)xxS7s-rJ{D-3zFegMV&mMmR;g)r+geTOR z7e*w-q*AgVnvE+P$I7OYQKW=Pnf~96Ul~>Ev*Yr9t~+ye&i2tu_k*I<$D1W4;~T`E zaV`TFaSuFV)6#u_TVm#cg(;7k*=Z#gxs6$RloemNaI0z5^GJKjq!v28q}Ti?%c^Ri zNpVF(kdVG5HMpRXQHTx|`A$EOvE&Ft*nzNHl^$CW-FDGl+JPU0%ixL=fNJp-<>G5x zbt5#l?o2LKLy+4K&iZ*xJ}Sk0JY%h|zn&jnx`^iEv>AW;v^~3i;Ee3PDFZ3~%a<>7 z%L4d;Gk_lmt3WIDv=k2KQYO3EDyXflM}jBRo3N<~j#8f$V=s|!lexzoPrVi+4Y#kD zKHkD}kEK@-pR75d2Cd1FbjoaO<&YJR(O6lIDmMR?Gl3ZH;n5b7p>pSEfPc)#1826$ zSf0_#S7c_o#=J4nPpnQvHHgYfL{O5$;cJW|?vWT{L22(5ea%hz!fqi1%>LAA+klqK zB|-9<5Q&Jy68@U?mM7tY)O8y#Z>R$5EhDqd=v3s_t6GI!lX3KM*jUM3eDcZ^f$PU- zE?RXOHX3FGK4lPN^>khaZT!(gyAoGO*U--Ljy83dch7@qfEK znnb*)EhbqbnNpYKvNUNO+QwUd5@OUAVsme_#lrc*$&TLmI(@#1T-U_h$GQVb?tvT+ zmdd}XZ}4*vhzE6B96dJEMF}{)qEb_-e!1q7;l@rfum8_S*P{MRcE|IZ;apW*OOOMr z{cq4_GT8|U-$uM2(PwNfdAHQeY+-8aackY!ovWw?M1)NZbD_eh2P*t|k}ZUqVn`O3 z(SmHVTT2>gsl31U;WZ27*w~>VhVn5Cm1(4dHlmA!9=rU48XJ zUA9_v%oic${On5cT#@`ueeV?`Il}t~hhfSq1%53qEFALkx)5m2gKq{hI}gxHRk{^n zggd`vXR3uNniKqB{O^9k5%`SYH~>e$E4AD8eTO3y{{xN?;rRoO0Oh{uJhlrv?B?g2 z#>lu<^vB_6;UyQ=P6cbujXt(Ui>TY$S)cL*);NgYYs zmrqvOSkryVDz7CfS!NUz0RQ;x-st9t@X$X4f&H}T_CoFS@X7J_IX%WlZ40iwMHF0@ zjH|*vA4d3bHx8$IST3}_7tpKIC&wriq-<5FQYHtxjZmHvstK&Op;ulvlS@XEZ(dD@ z9zE5C!9-V(vI>kC5WK|KTCM3}LB(#%55LCw>U{AgoKS~IxgY7md`w$Gi^i2zgP>tyibhgK$nGDZv#ld4+r2 zTCPlI@BQ)7C5Mvucpg&JpILBsizkV2-!VDG$qfC}Loy;ayu50jrsBe4-(tTy!8-fZ z)z-CTreWy6KmYx%a%yX2_>WNlUr+z?|1t;822@42xWYmi=37nMVK@3yJ6k0+Yn z%^okl-}tz4y%u{49DzmV{txQjGpfmM?f11Kq97n3U8O1l0@6va5(N|y=}l^ofCxxW zKoq4DQIHM_g7ioUy-KeUDWQiRByY4Fb&sytUd!P4=vB%k`d|;q3NpfHJocFxu zRsO$8wDO*LDgykJsu4eJTkBvovEN3uWFovKq!KIA$LNPP#ShC{AE_i7%L`u8V* z+yYfw3aB9ga{NiRLlTUxpqSnjl==VpJvV3r2z}b^I|AfZ6oJGpbx=9z1Nduz`K1~a z5JCZ<@gLlvSAaTbWZa16&j9c$Px(N!#_cTgI6W9Z9R#F3-uxLrtyyLSHN$|Z{(=T5 zdVoP@prEbx&w!ii$NGVgSwDF15&+hq4*CwFOHcm{0K7N5V{!EJ^yZg_4+DW^FsPp@ z+4g4uxDpwll106OML-{*4*KgVr~?85mj$=FmG`>cfZ9^^{$jDuPBCGxb7{A;TzP*| zdH+RfSnGbH!){~wAG}-X!KwfAOIu-~bP#qs+^B$p{l`5VjD=yV06w=Pj-xEPRqu>{ zT!JO{^Cgnf(M)5u_QETbODKoAD%Q+yN@>zg=~BUYn$W-*z7&tuRSrD-KW+a{`SX|GyaYYLw3>g&?Huh(;8Ef@8EK?hDOCeS zvsLF2Y!55Fmx}MGqtbjTb2yTOebOKzX8&;;|Gt$p5-S(}E5`uK@jvGnWD5(AcQ0uv z!W$GZYN!y;iv6?fC?!i%N~^936ld{!Vp8hAd6PpIA5Y8nOES$G1$@ZZ1(cZGnH0Zv z^^7wY`QIf_oJX(fgq?hSWzykrYiEF4rbhD99+tiJZ2`eK_Wc-TDbqL>8QuTJR2P_L zjEsVcw^c1KUTK9qR&Rf8RACl%8GCcHF=_kVv@8c~#Zv`n&IoAj;7+>l6Z<4|O?o23>uYTG(5Rw=ttR?vf;73sOmYw#3^ts6DN@O^XT!mmkT-azxhl5X%y47Kx zTF_fel50Zq9f%q3dCM9wQ$i~e)JUfbab<-jL=2j3Drbrc+GhGw#X zu*Cd|mU0^*EZ&TzV0tq(I@gGDPu;&5Xu94k6h`^BtlsKqdC&T#ouTpya}qpmz^>x? zabJ#8Co~`8oX)+kSO5utx#=*2_f1mOhch>LE;c?2%wBX^^SnL#L|%VbXr*T8>&i)q z{CADB*dz0xZbruQ0CsweF}wd|p&<61ns=59FVHnL?C zYxmN)X2?wEFOb2ilq|)y6D`$Ry2^G-;A8cslZ3k$P`I3SzS&_QUt_~<#+S!EN|mrR zpbvjgc0(M14AeHuLhE0;dtDHG_%D!wj0H?Kt|A6|?1s4EKR^a*0AwH~c&;DGb^A`o z>xuBX3WBT!M|pU?Ok5*b+Fg_iG61bm&lQ3ga;BbHit?c1#;|>|^L6YJdBwPpsY6$^ z%8IQGWDj+Q0P+0qEIw*FZTHcAZ_gf$8E`iCUdx1I{|PdPf#5>}e4c~C?!w$pornG? z&-0hvWts{)=sH$y31dpT+Ke4lJ?9;e3xT~(&m~tiJi4}`e01Hl=SXO4lk|bL;pX<* zYVqiHC7x$|vt7@7F?~T$kY`2WRV0v}SD`;tiH){MQzu=llG6;i0wH2Yq35q({r4Y3%(jP_$A<-t@$sb;m(a>a!#)K53Uw3cv{M-HUyF_SUBF7I$tq!zPg0SuuUb~@*NEXiHdhdS zL8n~V@EG1mCit8FQ}+|poeJJ?r(q!A@&LxF@-L79Q)#?64Yk9rm0?(81P2%l`c}J% z;Sk1}7)EBVh}B;od}d@{B37iMKK9b%mN(q%OCRn>q8{qBimvNpPTvRylM6i`Cvb3; z;(7nNcx2Xe=kf*!Ki%0s@wxbSJd1PTcb5w*@UWh^qp$rSShd$C`zPcKMPE!Q6wIc% zHV(bkzjrk-H!OGhHcBrGL#Y}rHiB1~#8zK5qLLT-6971zbqgI7D6i9^8u)S9o%_Vu z_MpH)y0yfpJv)AARtNz+3t=99(Rx8G`CR4B^g3 zYi=%RifLGgFK@9e6|R7oXsKUq&{M{@7*J?#1i}IW6?lnJma2nyHca);9MIi~nWF(} z<=m=t9il>e7l$L4r%VC*QUBQ5@mzSxWq0?1PZ z=I`qVc*n@qs6{+sAJIzY|8H0Z{R#hI89cafwU^uRDAfxiGMXo^3T~?D@4wcOkd1mq z>sNa9^_gSnv&?D^h?e}be5>q?1izH*bIj8ztAt;4u+;N$xOP$BYddA90erz#Sd^#u z>~R5w%cTx6bY2GEQzgx?#gnT4gc(@VyG^j*A;Mb{$qp zkv_F#cfaUpoPNF>$x2oJZCvL?1x1@P5r5GPeh0_c1+`$6E)W@Wh9~dspLU?Duc!Yf z&ET@U0wSj3{iMBuPtg%y2fr&R1Q$$eMn$kW_KcI??wP{GRN|CtLiQtR7hqOUrh zd&!^fjpzi=aSd?{6wQ-a@c;_7bwiG-B6(Z*8txRw?;gztDW1ieUhto0mP7Hq=H{2H zuCTRnh31sni8R&?nPa2%Rv+jusBL_};qBJBj{d>tbePsnIV&{vO&~h?3`(2AciE|0 z?aW)RgTAH}e}-rKjdE$dSborwlyl=VK_I@Do?&!9{I*)C6e_G{#`kLDlpWUZ* z@Q*Y%7i|)5XiuRs7p7kN}&KA0qd^%FgyiV!X^{oKCK+=1Wc8xNh6!U2ST?Zk`dddTa5 zypVIA!66uY_}@xWo@tMVUCu?R5!qu78klQhoQ-KIDM?<2RZ*y_=_S}I+}#jtFEiLb`^$;qhUHs_}iT6^um-IT3O2UH@cR7&2XnZ*t+*C{b>~cZ{@@HE%n_uv38NyG$#MIYO`W%E!}t zrET{sxcv5p8e+GWIh1U>8B>e(&uToC#+_qUn?^uQm@EJ^AvmyAgy*kMW`6%3R|{ed zf-Ny-vS?Pjpy!w?GF{b5(VgB-l|FJ-Y1K&A^L~AGiTtIrGf~OSLR2izSU|L}Z|hrO z(fQpo{R0o$7@AXphjx|&l;NReV39Q5#D(Og2L^7VT(3GA!>*#^n9P!juL&H-O@+rj zd`1NtP#5-9Zgs@Lz?%x5tpcAHxne*Av6pUhEIZnL#y&URw7059tYIs~^g2+D_sSKi ztaRB>|8zOw!J*gSP^k)l4^F?;)(mz#U4U!iPo$vJci~$|b5QGMMP`LSg6K*Tx~{LrLGR9-G?VNs^<{S5J@hdcafqd)fL@#R*`g zyJDunit^9H<$Rr8B)~-f(gYl|2;R_BftPsI5vDye6@KC_HS*5DObl6oXrOTP@G-O! zJ`#x|s9(+T{lKc(DwK%gc}IE`E$1twiG7pBidc%n|B^i-72N%%HoWe&kaHG2yRF*& zh}JZxA45Nx4~M-coD3x%7n>>i(5>WO@OtSA5B9fBN;9f==}?wOQFQddla#`Jm*{25 ze6HJo>f$1i$G@?4@yJsh_3X+H=_~v}kN4vUa!*hcI!dI+rFqNDzuRo(;k!w zdg6IcihF$ZR;~X+xI#bf%kFwtpII^N*Dc($1V3V+7u@ zl1Yfc<=lw+Pq9>{flCZ<9MJ;G2*5NzQ<(;V06_2$ra>#p(Dli@?+*Bw-7#OM{xA(b zd>@qpOam!m89SldgMeG{g^KLSzwEFj*9;v8XXU|ryEizKlc2a@&6aMPBTI2ZUG)bc z(a}dA(=&M+FF2Wf=l`2&(D-jm1J#AoBKGt0-#=}yXV(n?xxNk09mBp@npXwj6RYkI z7Cct(;oxOFhpvC&ZU`K(Dk8T8vDr+|iyeAez`DL({~$8*{EF4Ic)+zs<}*gJYi}P$ zC&AC@s#&QD^7h3Xy8y1?HXFwLaJQsU*UO;P?SKg;?=!k^lS<%F+!jy?$X_vYu zK6RXwG?*{jw3+ZxQ9oB|>iw!SFflj~6d~#iRgQS$PDw}vd%>R3(qxJ=X_IS{rq?u8 zlTk`0Z@}quKMOjS^SZ(;{a`L``-k9R^vo#-!4p9bDE3zDq{E)zQ%lk4Q!G?=K5RTU z?uJITA#jL=%0r;sUX)=GFb&@QJJUe5J)Wd`K%1hu<{b9k;fn5~_6`rfU0urYTXKZ} za`SO1vePA~c(o;EMXM^-`P5zr?`_}|9L}$KA|+}dumq?7EOoyde5hIIQn317i~l)? zB=5f3^d8e{IH2NvZ*}>-iQIZzuJW_;ly~YG=%(&;#M7xmbAfN>+FV+C7Gpf(%v=G} z;1qiJ9l=8FBObS>1e4t@blWmUb`p>bc#n;v@We8QJOt zfXUU`W2#Uw33kh~2a)MVeaz}EsB_gAY5E-569^sV#p_`l!M|jO&B}PvjXA_S%vjk9 z-k+Z>_!Zw)u?Pc&K>sD;_OGaqIZfbevGZ2p_b%YpCp??m+a)!)Fcat5V>D&52X865jNKn6-?0n>m9Fb#S~pTuLk zl!?Zd1y5C;iD}4BA#YcL({Fu$i@trxI$$b@cqp+FkucI3SVJn;)YY11`qx-PWX4M z@PAt<@DS&J+W-Hv5zmUt5-UKHgJ#Kl9d@wYHuJ_zaZux6oy~-GUYR0fDh8;$SwqU( zFy*xgfXn^ma(bwCsxIkZ7ak07ximXH-}d{0BXiCA={I6<$o5kDp+PykS@{qsFxhb) z!B_8Nl!c#@O5&rpw3Nx4qkBIY_Ijfn@~+vzc7W=9oH*SEU}+CEkj}fhfY;;nC`Igo-1})hIbwc9(Qr?=iYB; zU;InyBY~>)@z1F5AEl4~=VQYE-<}9hxT`r29fE$jf9K|t|J%ag|LrB8pGiS`&o}5( z!PTg(VkD712y7Lm1>_7 zmaMNQ<~em!MhJU-ZH8&QKD%!FPR}j8eHQSb4r#M$vl<$~H5o?$$}ze)%x_?Q zcEt>-S>vpi_gnZ&yKk=Lsh$+!FU_09yLS$2bKa>EihGR`2^1cYGSBIlWSpE#_YQrK z@le)2(M^3{*G^%TSKzQmdp>ulw!B$5_C15U(_<+CHi*)=?Z7?bfoE%xb<#Cm9(Nvr46pE?_~I0`H_@2-(=YJ@AGG(b~0odQINHw-q)0r#-nzL>_xR#R#zvpg6n@8^tGmaCi5u#OD;MTCG7Wnh$-Vgx-uSHQa3 zv5SmJEI5J{nd@=3#xHfIn}YMS(zk8nHIB7h`5rx6B&=nPNtv|9p{e59=zkO<(j+lp zGx~l>ekhSO)g6P?^%@i8ob!9}@^{^C@srR|b#Yu5qxgY8fTSUZ148^m# z>Kg$;N!@v`KFNeAkN6nBk^{#?!JVU=6H4;Zgh5VJ-q?@?lvWV;m z_a30H!xkDk_~eJIfWCc*>ogE3HjkWfYDmkz!@v7raw$-iZY0I>OV0_^3{7_|fp(KT z=zF*22}TUk-2@|VFTpB+>(X4`2nm=aUnlagcy{f$8ZfJuu#tc|?v*%S7Gp-dx-kSe*#9GQg*oN#GYb%{5# zKJ7N$UiuKqX~Ab0iFj7MrZ5TbXkfIS(~mUu6wAh%{IIBr>ED_sui7m-(gb!Rzk1tUP^TA(@+Xm zFpP3gz+Tu7Scz%)_y%5VB@Hyd=c(41pFy%{7Sw3ZoL)lh7SlGG!z~8ZNDs^;^_+=C87(#%^4CrzIk{%urziH5wI9&MmkaSi+$u!L=^j-L}^y92H<`|voiriR@X0&#vqiH7PfKLzWjcMIkQ4$l}=7}TP zehP3LTuOeJv^mN%XkPFCc2`arR^5ckLOv~oK*HMVj8Pss>J_mJm}M8`S@P-<3-tU> z*nIo;OVFL0+d5ym*o;I^I9}@H;5iE0BxsX+UL*-ONL7z|eJUn}=dCc3rmy@pPh5P< zdc{qJN=H+?*{!V5KbpoLdia5Mq8jcSbOJK-v1A!Rff1hbh%`U@UfDCEh0756_O$wX zh6?Q6F+3`~-0Df^@#NqP92)Otd? zT3p*{+o&B~dkDlF**S(H44A3``y_H@b1)~lX*{Ycwt_EzP2aEzTHgEIBTLC?n&oRL z+|*RAW)Kc7yOoMgX48F@8e3yw(Q`XeXeMG$)d%Vq?FuvKhQQ*Dx8%U zB0+X-u?^PvHwXiF%*e}O%+wqBibK8AXEYA*4r@@it7OYh~(QB^{rQGVy$PTf`czG8Z(((FWDuYi5>)--_?JvO;v;ONl1T`373KR{xDc9H#J zJ1ybo{_m}dVkD-4kyp&|`j;igB8cyt{9vxIcsU)=WMVh1OP`wREzR zz@m4qvkymtGVY?40b4lCRI zHNqyx_l31e2aS$#6qjJ8=uC7OD84h-)o~j|bMkPR`=+oQPn|HF{-RhtHTx_Qi~AKt z4@eAX4o1AHMf~2Ew~H>keY?|oQXk~fl@~FRSP3X+j?m;}k*B!F`X3WIo2d5~+3c=H zw_Ja|`en5L_h<>sMbV?O5bR;>y(}*2=oF<)NUeH#a>cw@JPR{sqRu4OIw&YjW+L_*cN3(ZD&*Y^*%>?%WlmW;ia^VR#@j4DF^>4a?<$(;ipyQ zv8OO=*@XBz^*Cun!;ywEVOEV)caduQL0r;j#L%!`$al}cLmzXbmr>UT_wCl5ex8fIs=AShNit6&Ryo(5a}o1FatW?zxQ)i zy5!aEPik>aHE9aku`ZbwMUtk2{+O-|y`8(U{UIr%x>BhoNu-VaW4#q2A}Z%L$at|W0VD9fBl+!lGD#njY$qBGzyzXCkMkV!I$S%Hu_Fna zX=+>C62;G_F{9|aky!^kdgOAP+OtHInC&FKOj(Qh;g6ogff^DCwr2;RB^_WYB9#h# zN5iRlh1?c=-9|PQ=hQ61rbM7iSG0oeYNp9F?q~DxfoR%8alum!LYsMZDP%d=#H1!q zsW&EU+wF<^dg)0mlg|>Tct0sync|5t=emD zD>+H!QDV&JEjt}Mg`681mcva7LPQQJRIViUjnS~8R8^){)nb*n)-~vB+*$ydQqUQK zV^@mlky?zt9tddZh=ewOrM?-7!<|sg4qiW@&mKM-TRL%CJ~|?4y>fGP0u>2TJH7?U)J45p>NXDC z6c=9CVK-HZni1eTc-7ksoh!!jdeDDKg0BVOdFrU*D^I-pTd__iGtK_yVhBe|%P^hZ zZgZLH9+a(Cq4*-+vrn2aNrVSuN%kzg;--xEIL$#G8Ma;!Sk;d~ibJEw8y7=ETP?>8 zQQ6%nWBTxbns1HUz-<^ zDF5w#c47Q}G5Xt`*IP;-0+=*s=C?f0E<&Ux_kZoosAiu}lA5X-71ejLmgQECo;m;C z(bPq`M}>!0Rc*}aK>qgSi><D~j(~LDua8=aR8wTYnAxv#gI1ikxf^m&tq{Vn}h8903 z*e%?p!Qvg%uuxa&tHwp5zYt$bhu2s#PnR8Z%TK?QNgDq#+(T@N$ zBmbR?{aXy&G!XD-nlPWTk6~9rI*e}WCgHCx_NuTYVP!H{2QMU7?ONXstzZld5EE&H zvu@rWnAcnQ0ggG7qns&&3Y&ha%KkpqYy2I`Nwv(UC>D*V3UWyNBXq#jEtjyKgOk>M zOJWtgILg7{wtK6bR#v`32OQnY9d)BfsYy{PHWJzPn#t^_DI#y>lEqdGrIWI?D zURIN&Img}D?S8)2R_jN+6=5kf9q`cc-e?HEAv)xwmH^?!7o(kgbZK0pQfo%y7SaVG zjjU|dAx7~8_a?#|T`P@Z%9BXD-h0D!YfGPe+Gu(fg%wH6i-sKCEJ~gu#y}(QuVv?S z&uu$A)QI+0#FG7asgeTp)z2maC1Qwib6m5E7;+vwyo7|FmV?Q|wURpM-Uu&{c~fk4 zz1i!RK&_1*%cRup(bG#+;{o7FH8C(-f;W%z4#UCi1nIpDEn{@-+T3?Jj2T4s$in;c zT^ox(fa?kn2&1x+JeoSJThX2UAq>C-D)!uoRz;A`aAZ3}`=IIKkC5n+ECxDkv5r6S zP$jCLAt^=PE-?MJ>q`fZzVjxb1I{DiDEyw_%;l!fDXa*%HblULuX-J6NNM2>&uF?q*{W!MXXg2>w(v}L)S=m0cq<*wp~vv&sukDy8U zJSIh(GkW5VijvAAM{rWW)6xkEuV!)Sv1Wemu-V)=QFV#aSXQKwDD2=^#|U;pp%I8_ zdf;=Yo0JT*|C-sw?y^tvk|$~z2_~V+UWJwA;NgJ|1c-~w{NYGl!9 z3X8T!1J#ASN{N-DYY7wUprqdXs-sk z{U8*R2+lp{r%g-B!9tD#gdng<=mLIB*!E`l9zq(1UwQ~u!=(AdEa#(wysOJQIGze% zkjdHFrx9QYq)_yr*EaiRAA(Gg;`aBZ&R-K(l199BgB2fW-6Qb;>%-aQ3VvBAe!wN$+ zsfLh|&p(@N)N+|+ceu-zGn(~dl-60)xvKYtFPx905_+SA`Laai#Jl+%PGj#9Ijb7K z7W!`O-jNSAYur)rI^-b(?)>txk|{5bT=-E^^PX+6jIY71&qZ6v7FFlDsWSz-&;naz zvU-OpVf7s!C|Ij>7f>qwDHqkczU-0vr7mxGdD3Q6Ox~IYW~92+9|7nQ7*_Fw$c2^tC?{b`deiu?!8XL= zJ%Ss^B3FE1XyctS%-QZ`O(G2{c@AS1#Chc8j-Kxg`W#Oy zp3hdPW8axXPnjOt*1sP-m<(@{-s8NP)mNN;TjsS-xu_Ola$p<(=4*D5oc3tj+l&Nk znd^g0U^n?ve00Q&0qCak#oq1d8hn`?qQ16>xKMmI&2?iLtF3C_nSCD6eOzWoLokke zTzk)B23^^}AEvsqR5|louLsoM)TwKmEe(Q8n5Gu>Ab&B?N{$Gb_7Tl8aXTZKB787A!aQXZNXF?19dZJW%Zu)I+P4h?wx}1&@S#^6*bV)`Y zG9KXByxD7*9D)8V?`g7O0m*WGIvNRbjYbXrl_4`4xFFYAA|Ldlg(&&K5H8)b9<~$1b{3F%u6w+XCZ-@dhvCOZu`4<#>EqRZaAs8=Sww zOjpn+TMhU7C3{prYsi3Zgb1_v^zoHl!I3%^u)I%_A!wM4y-ERRSb(PiqK23auastU zPgYiJf;Yh}yH`jleH?8gKxA=!A~Ksz%JWY4$KFSmej;Ge=8s7il8eInQj2=$<{GL# zSg9bl7)frjqnq{*)q8fQZ$ED7oH_)BQ4XR4@th_1`b#{BsdsPZ_8(|Z% zA|S{rLiw(Yv9eWj;DMRv<>H+V(Q+pS7hZKDamXj)u1UUAbOsq~KKT2-#G%_3JRpQg z2O|Ya!m`it!X~~il4!jk5{}cJ1Cb!WTpz^sBxJtL3=6UTD#{UMV#L)FxVN27Zgdi& z9@;Haza}d(zM8HVJ$D0p$VoB{BnuEeYstiCTIA2UMO(f~tkyXWU2MIM$3&-g?P6GC z8_SCWz4Ffm8!c?)X^(g|BG4pTP%4~!9lvVWk%im53hR3{qq`Q*%!YEc(jQfk@ykUAzR%2rHzUG&~_L^OO%k%97BOgK|Nso3^_F5ar zzYe?lbx90I7MLgh+{VXmLDM`qLZwBOWP6R;B@Hx2gJJ3@Ts?;{fRjxodzwGU%;ga2 zYs&@kqHi+wYcO?kUpnQp9Y45HbL>e$GP9$y8S67#?MHO#y~&&CG8KP=Yl?46Kc+&KPUn@{?w910Z3egH?k8oV$H zeEoZiNfEWpgqK=^RzYw8IVaQqns*vLy+@-cZ3drzc+jQc3z|+Cs0}BhOR}I)SNz{w zOMseK%DRuVf~^}f^WH%big+*?^>;_W9~BW0Li|^M3#fdGp{q%yo`>Cee4HMEJ z&kWrx#Z~G3=6JX8RC%XB8T1AHm0UKAC++W5AI$vS3bhu4HdP4zSEm%{n(D7WO^Y1t zf=(v-QT6)o4UMvT{fmupf&JX-(01^xllt#JKUi!8FFP{Y6BoE!EdA7N%K=fdXfqkk zutre#OZdk(YV!D>ZAqO#=^NB>1pj9%%Ku;6mi~9#cj~hFU+vB^YKQUe^|!t9rnH%E zFVog2{EwO6zgOV}K{1s#mv^Q49fu!(P=O;zHf?rR>Jb*!ePvY|(yw?b*lB~`p(7$CeS`%9AXa$n2_m{cv zTKP=tdQ(Q4cn=bm5N`%|LLGMF+z30P2iuSG+grU`zOA<#5sAtN18&tDi0ZN)Vvjgh zNLg+2^GZpUPtw54>#|TCQBDsg6&Hp!#+@@l*zh&`=3AZ`7CrZ5&00%R zfFZ{+^`%CK?cQU9b{<&{j!`p&ib*MI2k>&?WLnu~JX zoUmE)$eP&i)dP9tT7LnZncLg~<$}>dWwk3r?vFpc! zkalgv0Y{6Fr*jU4eZ?a3S;sU;cr|03A*t~*CsQQ&=bwZ7A&us9%#zkkE$!%AWv6Up zI!DZLv|RRd$;tfY-)K1{FNJsxdJWl_>G{6B#tIA9pUChrj9WDZTaxX3;g`Z}tiJoN z)UtE0A}w1Yp6Q+kjw%9J&CdE8}3ss|@k!qDWIrElM{0EDjd3enHv%{y7VE_wGhierva?uL{i z`!E@akGZ-El0yb3Nf$t7z4FT}(65KtcR1$g;4?iv-B15?9!8VBjDksS!F8bRh87pm#~q7wbkQsbt%@0gWtk{*Pw&wY^A&c96;)`qhi2*E z&7Y+Ak#lFW!1ZJlHF?qVXJIqmZj)SkW@?5~pre7HN>qskyNh|d_Ay4arn_{BVXS z)+K_Jp70xKoLOaHw_c)?iO`1d%|aJ$OCJCQa`o&|Nvgd7@%a}mer&+pyys%paqDs4 zf(Ui5)kkVYZ-s2JuA8}-OT3q?m-pA{eQClS27|v^4#ZSij*(suP~* zq76>ii76X18I9qP&bC%bXcSWsV%@1(n{1hELe?UCKKGUp^fX+Yic104=@B*a$mNrvrs1VmJIyHLE;F1~PyhGS!uV8IF z-wjK$!Uux`og0CniEOyOf+x@UzuVl7e6zDBN!Cm%o?Gsac;bUfgTr>nnCAqR?WV1}?)Ea4p`x;DQKR5pKnOwkMev}EIh z#(8!7(=8HOHPA;OlOJ4B{(ymp&1=A%&CUMVSAYRF0L^< z6JphqAvN6u2Hiv{-7#orBopM{HF~(P8Efwe7 zU%qgy<{o&dw#D(=-TZcrcNkV-e?Piu0T7h}= zsWAofkQTesjElN_)bu%&ji=kNW#gns-wgPA9iX5xNSgc_PZ9%hF9}xNWl(l6t7~*6 zFCS9n&}^!@K$fgD`rH^UQ{CrF!BT@?{n%p!R@kciIIf-Y3Wr7(zFb_D%*?9K^gKd& zK0j^JXECAgv+BsD@8xE(VMLrvlTWAxC>T0WOmlNzI9JgU#i!>2HS*{*^4;9Ct8$SB zhhE37hx9_QpUJDMri{(^qu`>YUeT}~^57o+bC ze$1j);h7bK$u}mGNN|nM2Gh28*!5`-(p1vCXW1o9oAbG;yQjK&Zk@`Jblhh1#UCi` zO>3nu;Vlfw+*g*MT}JRg(q&Ju+eKRmuZVkc?So9h^f@t~=LT*Jw zjl5+Czy+R3aMWG=!d0_2V8CTMkhrVfa3Gw5NTkUczB>9YF?wuGW^PT9+R~c$vjR6E zKOUgrSs_aKvh#LuT4DfaSJoGl0W>)GPkf7V@yX1|&|HOnEvq}1wbVHeXi5k3a;3^4 z?J2mOF1ZOflwv|c7R@0`G80+03OrXlWr1OvYh}d^5b;g?CeP44*zEsWiZ%cOz}RrsEPgqpJ{ zr=qyxYdl8gnC%;%T;k;c*Nb<6v@)%Dp1J)Av#` z!ZVCSX}DMJHxZ4=pMEBOb~5wu0a1+ATRANyFjmrvJ~_U2=rcerk0(%TR@kgXqCJ+y zL?sDZi2HuPS4g!SVEf7x8GG9kQs6uKM*Z?xk4NE=gCZ4#lxbW04Nu7`J;Y71bnN&X zAS|q%(2_b{X7JP{>3s_k>q|)+zp0RoeO|aS_0=$KBaBN)cRkC0^FGK3;lB@l#d1sv zb)S!FoD>5kbxyB`^9MQb=e^`R{97BhNN@;4y$i0 zsF*284E&SGg*?ChJaV@mgC3KBL3yV96ufiwMc(D=#ntlRUoW4(HkUH`uKXIPi0kk_ zn$&GKsG+tgXX5Q=;`KWI;2rmXhAkjmpj6ks5Hle8XP>Moja(MP0Xfz>^Q+grJuu=i z6Nc73G)I7@7)94ORfapExPW};U`kouUvos0pl`Yn{{^1v(VqbNj7@e{oI6xwYtuDO zK$oPIS6#-_Hh2=M3y5`3wINbA?I`baDN85J&1vLg6phish6a{@I2v7%^q;Hl8OGs* zZV<98_|pR-rzgs;PST_d$_ADLuQuY~S%}(w-x<7YTI@k!`0bR%&F%FNCQp?tXm1x? zMb@@<9W(cckLzUDnp&KJ3;}8BHV!~`dEFH=@;}FKtVJ|s&$y+uWZ~WT!^$-9RdTH% zh4)>!$)@9b?l0|-9dHOUc_}?wm^Ubow!3Qj$gAC&O%MGIi)xu$q7KbCP2|U=%rjj) z*bZVL>uuef2P@6k3+wag-y%QC?a%*hSxN$4U$R$osJe3aWaY-mhU)pv=Q$f8$>eGI zhaDB%o{PrO^7v;}NIT2cmZ_mK!S3Pd;p@ahe9*D2!4{X}u^JfJxEaSOeK=|5ywPBj zi-Qq{Jbf}#!``(}W_^Ab&4Mdu0ufiU~EH6(ONa%>$xoyRrg2QL$>bau%nnn%+8O( z5(AelDce03k+z04OLmP<)(q^r#E|Iz+nmJ?3Zm6rMlDgOcB@!fN@ybig}u(ZD(jPo zG_ikAQ3(INIUo=Dg;9TQaXBMafZoWY+FQreeYr9-Ja#rcNo8aK520&!M+Td_a6 zj_-Sjh-D^JJ+x+8;zJbF2Vu{aB!v~#C3&#{&<5BKSqH19vpmqf#26}E0?)HlZf-XJ zdt0`cXlL?8{%3`I+(fCZ{~!^Hae2wsnMk^>k^>pME8P+4+#`fqh?ubV{?uyoT-&Wv z3zR9qNty%8T?WW?2|KJ63(rn(k{W=I_B7b}ndxcWg2-b*xSo*6tLu8jZiNVL=N1v?XZVDzxmM;8T4cDe zqJYrJw78z0eU%Mw71M_;U%n@yS@LQ)Hj7c|75v;7;9f)`^^>L_JpB~oJ^jFD<`C)x zZ%To)qX4k#NnY6jcBKJfiVdws=gWgrs?NsLISuY65TTzy$i`t4wQa z{2);pX)>keyDJP`q7>5~)pV8N$Geilo#c_Uh!de%*xUr zIEID1rC%uM?S2|1>a$v+EIC>Zc1|s>;FZ~N=ef0MsJtW^x+F!(S>nzAz~GhmSzf#Y z3jiL6v*uz1HpyKQ`!S&TeEF?Z{?BAP#G6x1z14}9^~BUWLgz;+JQq4!v~j9nt{Uta z>x<>KN#-Ej#=AgaaBYm^ya1w#b}mNx91`*qcVaSS@7K0_&k3m@hdX?Ea5f%M!5}~p zZ>#p?y5DzXGfoQES)x44&NpRwh1XVhH5ONPWR4g^&BCh(=1an_Gn}*~3+IA2{yMr+ zRJ^gmqv>zwifYe(8&aIolvzdXJYBJcDg_bw^W)XPA$i9dr+eL7)3ujPLwZ-gfV3C{ zy3_82+$B}8^dRddiTA>EvYJo@#coCBgdoqC8pvF!2t19l^l1uV(jSKA&>)Rj1PlC#@a zWRpDaXYM#c<#sr)RQ4<6E2Ga+!w56&2F>nOKNSNgW!`<>%O|PjLX>4jP)oT{slBk` z>AAn~BXmA|t9JV#Wd#VzkgEN6m0GvsJ$JbR>7X<`&SCXtqHDQfTtV^|c_!k6KT21K4w&jBFeEv4o?f>@ix*55GJ{^bFtI{F z5;DPP)!x`TVlPm{qi$GRC+k;nP02UB4$l;G;hZk0DNYSot*(L7tdzvF=32bU=3Mk80!Ta75Uf zYN7{!-)hzbq*pI@@7}nAf1HCp>9HoYd6vfEYV8t<3!fMis?t`mOjrp$ZJg#mksmh4 zuO06a#ygu3;+5P%hIGw7F}%RS_1X5EJfgeD_}af?PqKaX2hEa8W$Rz554)X;4S(N1jD|8C-f4Gg^DM z*&%s1h2cFl!uh!h2$papozLon_Hbw&+{z~=RUVBd%I4Jh_C1b>3#}9BOOq>ORoyaF z$|fes*?7bYngK~DrAgjE%V@MzbW$)4 z$w_3=ic%DV6vglwuzzaY_MAOuU+kMX^MZ38-aY5}KhJqCzt8vc9gH`;H#g~m>%(HP zFl*rd#2=6=z3#Bn<>_=Oc)qZisgvn;EXML0HTCPjost~HPVtKvN~O#D1`}}(9&ki0 z@#n6fG4v+MBfOEw7bV80S>fWvYkx32^KCP7ja+#QFr6kru{VdjTvEqC9Y{<}2z#e$ zS=M5|Z|sz**wv+r21O?|@sag)4xI_V!tIxQLCI(?myhxR1CiKkMIFYB;H7?1*SUPS z?VEpg%wzH>W~(WaPMevc7FT3)An$z;?vlG_mA^;jR+J)u-MC3LTE-R?0+wVenNFkY zm(6-+6;|EjI~Q%>t=Qqxx!2s3EG$h3vY8W;yzBQj&WXLT+q@$}Md^jLqP{hnIH2fB z3mHVI+rA}S<@adWS7|0>0oOy^*`t{73XbWDM6rbwTniIz7I#;ws9#H&NFW>!RW$<` zZ37_xMkZ0vn;LmOE`t^1kY;fQTg91e+YIYrOjfHcU<4+h!!4ckO zlunyJ5d)d5Ly>|Iw7Y$naaTJbt*>}{V5f1$$Qd!usZHjbBK<$U`${U)hu&z*@2D~`}p+>?UUeQxN z>?v#D*rSx-!@O=dqu_gHcCNh9LSU^zpU$7l?LX|(zQv&~>uNlJ2wW$t3KNkof;O+4 zXGxCvvkW=T8TMpyy`FrHp=GV{^_ktzh6a7Zsm5Hlms~^!XsBnTlycq4hx}&c)7qRW zr8R)k+X4wkA^}F=*5@CU+0k>3w`MetJVK{EYs~9!m&Mm}4qe~M^50AK4?WA94i

    ^(?pG08SAycS>hj5#F)W^svPP2t-@%Y-8w9c#Q=Yg8{Cer+Jrt`Th zaZ1iuHqKITB-TmG5TFJVw}C5~?hb=x(>}QI;euie)DI+}(h2)D)N!W=NKvVQyP*0d z!flC#t}sr8t;)1w+o1A=fD^ezIaXmv`JU)7ZQ0*d;(@NjCFScWa156flDzJJG8&_c%$1zMmx9a{@W@At-8{N zy7u{xNy-DPsdskg4T^%SxlBHKS#&nz1+@z8&5y`QMO-u^tzzb$(q>zkco-oqm7kXa z#EOQ`tUoUv2)i*cHk-r29SF{n89xZSE)B%thcM$wm_)&M{my6`TkPT8jqX>@r>@?qbb!Tk^VRPmhu-h+82Py;y+( zLltB-_gq@hwX$m}N$`0z5&;UKgyGQ2X!drSy;>GU+5z#IzFI{?=M1mD+Y7BNF0wk| z8#4{qi4NcM#*Pz{UAhzPRXTOKd-;m74_sMj%Wh4gXIWD@ z`ESR)4h2^1jvPp;Y*FGP$0j2xxSLW{trff)eKyAcH}V5QcMh zhMT=lRYyxmJw>oo!w#S zjTQ$ynobo5?$)l%J|AlkY)(M_125xxswRr`a)^GWHjP?>98=t~Jm zd@0vhv2lAm&Q|k_54%jqwQlW-H*%cFu**$1R$wnc?Om)doKUxe_7M<=WUetcDtf#e z7yt@FMnjgKS^dcudC*JI_!sQPNUIx5|JSV2cA(luNjZA~x{!~Tfx$)%Xqc|~cVzdk z=FBu*zp)ayvLK4J5P1>fOLMAkJE zcY~79@G}gufwXa7t3F{5Q9&5imo14k{qLOaLRJ^d!v8#J_y6DTX=oiOXU-+7Qb*Et zicY>ECr{STNP!WRse3Q|gfiSlXw2b=Z?O)cf*L>?eCYNjmL!1KdJSwgk2m#6xvCu@ z>*7G`Nbg7RU?60gTdmE6kow&mY&Yr#;JR9iUH;PJ;lJffk@XXzhWU>=*n|4FA`4J# zA|CC;>VD0Hbkck!j?qi=Aw>0Ki%&`Xd7^$2j(YG@xchGwP2&)d!Vg|2cZ-hC#J?oY zK=i8LX^92#?V{Ioa4;6kx6MF8wy?eEA7A{z7To{bu|=v^TH?E%{Ahxt=8|C-_w700 J#qm6P@n2L{8zcY# literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/moe_dynamic_padding_d.png b/model/train/yoco_moe/sources/images/moe_dynamic_padding_d.png new file mode 100644 index 0000000000000000000000000000000000000000..fae8d84f831e8482a90660376e3e723a5be89baa GIT binary patch literal 80959 zcmd>lcTiJbyDlnxOP5JE?4lp@lkBq27WgQzq?BJh)v0HH{g zqVyh7LTD1HK|&3Ll7w)B`kguV-Z|gQo%_d`J9iHm_S!ppulIe{yWaAwm1r{)!;{Aa zkF&6_oVw zW?sBnIC!i_GU{HBoJG<#xgcXPAwAai4>$6nWN#U$E4Aq#&A#;V+ynCm<3`uaif@=` zs`IMfZ)@heX`4NBHYz8ItiS9ZN)F$Gw)fz3+cUjND(5D!Z%I3_f3gFz8s^@5G57;| z6`P;{A`H*Q6!FjT*z}q#>wh%Zq9Xov&a%objf!W~rX9Auj70EVPQhw^-T3@= zp1bUA)@{Kf#>mi0>x*Aj5L&YR%j=& zIBb;2R-P?hXlN@bLLj}Jhx)uGtUa89plj2t$80Z_EU*h~rKgkRX){sQ1xmRW{Jsuo zZLc#p_ja|ZYu)*95Y+a9-;b4D1Y?jO-4g+VzH062*0vp5Ka0bt)@~Ov3LNZO>)Qja zSfSsSftmtD-urzG*+n2IduwHG8C#l<_H_rdl&#KPyxu=uB*7S&7uc!AgsIjG<1zzo zj2TvXxPQ$TtZ2xC-U1hUC!QhT6`&<*^W8DLhg^epsx@8f9h&eTMEQ-<;!+4V z0*mG@c=r&>?`fVdb$B8>TvbFgTnSp-@2KRU%Vb~S^T>+G8$U35_Jq~yS8&O>omwM$ z(9!3#Dc{G>E9tj^IchN*n~|{T<<2b%sckLK*vtOPh0qh#9PO)(tvbgp#e4xM&U$AC zTc+Z7z01tzmlPFp8>KyFsxm7*Gg0sg2#8iW0T4m$O6SABulU5zCMh-@%++zW#$o=Y z@WFkBZ}v~y3nv&9H2zwVoa6@N)N>;kyLB{i(P-VQxyD&h+!J!Y<-xnY!c;rOm}Eil z{In}{PumuUnlG|_e6C?2XXZNemIUL!23h~qv=^sd5+&o0(J z(kyqasEGTbgn^|y_>{Y$z7~aeA{X1mi=|*ChPTy;)kkE?Zr5!8qarbIV=lv@H$p(-!wrWrA?4V&bX2(VZMe}1KsJ)^8x!>e8;tc%Ek5p3k zP2lIFQ4vuQn5hS@NPNQc8m=S{1;e)HQxJNE?2)+!38!EQ-dQzl*vt-4iG$&RS?mN7 zr=zg;VK9ciUCtgs#D!;k1 z3{NlRZaf7+6&}X$b^F`Os1+Jt6t!`ZY zUf~wnuz=T^w&jgFxMw^oR9PGhkA>}-d&MV>PS0Lxgsy42WC>vi@UD(JidPl54Y3Oz zdJ9$%f30wJ#qZDLXuniu7X8SC<42&+6@gUuz=Cg2K5U&xFWKU-sjbfv4kCIv*9~1g zA#=T$`m(q!y^MVjC^>@LRiZezizyKZ%9yK8XbuiJ?XL&KM0Hx|t6ycLo_NFxtbf(r zn$uK~;^wkt162wK>s$mzB3588|1wR^w z&wZy%OneWkDgtd;^%l0%?}-|kJ2+O;?rbK0*0vCk!lz<iY=qhuDJVyIj%l5K;&YIOTn{l&G8G)X=Cp_w} z0r(Uau9{qER$eD5@*I>Yd^P*|r?9Isc0uQUWatrT&^abvxDc!L%Synvq$K~H_mjP8 zk#f?aRv8BL4$hhMh1-?g8=2;Q*LCa}MU_WvE!PDsvEDY6@d|j!s1;?T18$l@vFDTC zYR9~lwUv@`fjJXrL7hKUC03FP)8l4yV zH7!z(tks?i;@O6T0w9QWQdmlO|6Ex;881F0C=T%b$DT7$PUQ7RM|;cfga{Wy*agy( z1Ng&RzW8Ua@m7)-8#~GlF_o_ojAVK|m8`U`H(&MdV48?tJ3h$FxE>{qwZShcS?0nn zl*}0W2Z^A_(S=ns7FFf$5%hy!AowFOiE523DKzXd2gYS(oDBz` z$h(U%hx9vV}8sOu%N(hJF*OwgJlcm{jsx?gr zDl5ER2`2A`3WedHO3T;$s-yp-);=0+uy9PQx5GT3NwV1~Sz7**yr-l8#N2N11ZnsH zQ*Zm|A1=t*DaoH=^~5X$d&E23l2~Qj6Y>W*HykpF=#i5o8-@PyB6o)V!%P5UcpaB?+ZrX>(v`oC0_Ur zB0$+b*sF^2>(8=*oKT?yko1L0Jn*7{~jn@LgZIEQ8WO4}n!Cii6b4`%zSX#4{8y8e{@8aDCljGo94a zRrOW9un)3JItpzMzakQinxK;sLFp-bR(^#l`RM?-mQD$Q)bGl>4UwL9V@@%UlYcs_ zY57>slDYjVDZE(4z|o`C7I^*&zOLj^#4qCNlX_DcpP00`x$J036Sy(p42}pzGfIe- z*wwQ2+Jdi|m`3Y|7Q%MGu-QMe@9L*E9Z9dO)mdH0%h#dlqTK<2a`QLmP~npCS@YfD zwd(deLFvo)&7*IJnSA9Av^`d(?6pX)Y}g$!`f6vnbf?K~rhEaBM}FohgbufCZn<$5 z(${hnBpm*}%aW~VVjSXk-VE802^;LfyEii)@l-tOIU{*7FDp{UJbu8mBTNE_GgA~sq{ zS#PTw(!3ET)KUMgu3E=u%U}Fao`SE+?<{fU$^M*#U2^WR5;g%Sg41CQ5wAWu<}Bxh zDc2gCo*Agd*1}BUpi5>~L5BMBP`5>t?j4M_;3xaWplhQ!VUI{(&-vR>x2a!auPAuHx+!Z6A0bK^L4vHGZPw zIFNNsnvXruL#+jSF(|AZFjF5D=c3oxfz&4Vz8CC}QP$kV@qc9f{G016l+Hfp}A z5{Qll#DI_Ey}(4^_kja}dglU}3TCa9Q0r$~aHIZJSxn`AcB(f@=DIdPPCP9xE6DmW zJwKeVxpvVJGxY5Bn9w$4M0@?Wc5$sz6b-vdE6F!KC^BHODss6GJHnl!OyzDKlgYmA zCT5EL7BXx7&-naB#s|w8Y0pGftPx9a>?02Yd}7vXWHxeZHRX3^y6RX{WS#c<%Vo*X zq-~i4y)G_6^Q)wC=_e;Gi8n$e@gULh!EnA4Xr+B^-@ar6Az&h#rCTGl-YRN#;iu*` zYVC1}Nvd-ssQ1b+Xz3TOY^$&Ne^FQY9>9)rFYC$+P`c;sMS+zh&Gn$l!j7SJLz>uT z!n7T8IHB$5h6Q66MgS;<4g>zBX|9ivp$#K=GCwQBrco*}s8J;D4Ocgx|J7!li?7ZF z%`7#=bc4f|(A(}%cGD#b(dXFNp&DVHtMDRnT7j^qZZhIm<2^{;=qKAH^hE#oG{#rs zk99|Ucgf~ZGio;IeQj)N)smTM>@g|VNtt0a-wv`_Ir(_uEAHyug4-wz#i#z&=sBmW zN{^_}5{NItvV#i+K{I&&Djv7_FT1|JHADQhI+`Pxs6J>JbW?q6lhT|1i?4qkmXKnR z;a{5z7lC{HWV@42|K}}2LHzfi4-sn@Q#gGa(qGfguZL)od+)BOH>%{iI>kSM6&vze zZd6zRKJn@Lj|`}lFz31B$Gy;o(55cNseLCnQ`V1vmZaqAByOkmQn_PBz@D4kO@A|CTl%WH>*AYXsIVQya#!Q61o&8Z@=Lb=Wyt%gh%NV3G1&~LbL>7|go%JLJT17Yi70kjI=!;ExTdlw@G!`7hW z89CR{(<$w=g>11T3@Co92!KPzCszc-+@iJ9Uv<$EkzJp$S5X~+Qv~NQV*%ozsGzo@!lm-h$EP&yHEyetr z8J>%FjgEZ~uPjvgh#?BngIwo#sv&tr$)i6Lo_dTG0 zMsuKCVno&lzoK@qRb?^X3J(-TOkCz_+$+d}Au$RR$xx^y|I2=(iw1t{|Jua&EhNs<{JoeAgU-hoGtjw+7b6d*)$QPm70Q z+I5zqD&;*Wp&xy&%cCLf{J(5+6?F@oYV9|!2T=i3jnuxNZnYqUd_K_N-SVdgb=Z#E zaazj5R}(@R(q)V3i)$25AveK})xOm-`*v5j8Q^MnyVftPk=B4#e8TrE;IbI4cr<;D z)VOSmjMeJj2{TJBglP}G>U8CreoqUOBcQRFHBQ*?gdjxPlvShCX3xoN;A`5FpZw}@ zT1OjEcd3Ru=F)(wTD)`XS&CC5vghL*p?3XGCcBSplv`M$`@(emECcuku2!NAYGH&|*=z!{{h_ zbX9hf8Yb!Hn@)cpW>FZnS#UqxaAd5*oNNzP?84CM*0I$^wirQEg=tzD4ZR$+b~RC7 zge^I~(jgiNB^n24pI<=5VIol2^ILX4$qE;x$RW9p9OKiyIzl=BQW+f45jk6Pw(bIw z&;}(Vj_T)^Bw@8#($O0vB^)W*)m_>`m<6R9)N{pv2hw43@sh|d-Q;?thTel?XB>kY zt@`>>L|JhMN)`-Nw2Pm8i3=Uh@ifH|z2yj8;$AJq|FVe~N73I}n~t@13XV9NxKiU<>Ce}2*b;wC0J9m3&PcT>aU_In8 zpPkAnm&Kli02Px^S=;INShN?;y2@CwP-QE9(PC%G5z@1_F)poLd$3|CVcoI)w~I6q z;$@HEIBv%l;(+0%CLPM>u0v&V_ippNfdND{pF1Tg8L6V{+{4dx%{LYU(G$hbMwx~B z8#iCdq(Efn&~=l!XarN!+c-wdK=^!xy~$Qpnj$B_c4>f^9F^m@REamQMR8Q`vyOE_ z`F|!s%n&uD@COfsTA%OpgkBq#1&oEvcs1;J3A?GRJXz?&LxZF*+;6BJ`o|a#Wm^Pb z$FC*;uK(f@q68%;Da=A!T#7fJM-El2XP7vXW#aLU37chx39+7b6d^U_*g{wj9oROC zGEQs1%_#D_Bf7rbgQbL8`-TU~r4C^2pgM8-ma^y(wW@s&u;W5&&b^TP6#*{egubk_JqS46|A7)yzwlW+Z+S^*eA`mYl*aDOO<0< z`ykgAi3mQmlu!o61t6J2U0^0WvS*1ng?;mD#qcF3$+1tjw{4(q;@}p++R(1P4jbm! z`svAHsON)}Kt% z-jmJfP~Ed{j$dH!amifi3hfPCNeKxavL%`B#%si=uP?ymN6%&ONI6DVJ8c?iZxKt> zf9C_fU1!Qx7iOM45jAGXisra9vPZvml4a|I6tAOjqC?ng`PIgnn}Y#jeVAE*T8Dm9 zi6-6lC;-cSf|7Ec8B1AM=mw|Q4`dy65&KMYcP}%$al;~mGUxh!S9$BEj&rzcFJ^^{ zU0+MrS6P!9T6J$8zF+RMJT|>9%NlZ?$#Sox;@Wf?X198N5jc}C765BN+Z0)ecFf)C zl3Qm6F}qb}p8}EH_v5hq`_Hk!q?pf`>amS3B81EiOt9IKLU&~@ToYx$HTG{mu0C9L z>Sr{QB(Ha3inV{~uKs_>$Go?Hg~e*dAu2+%w00J8Mfj)9JYlGt5oyh`3jAt|B@y;I z?*9JLV;Qo1>;C$}pJVvQ|Hi`qx$*hXfBII)W9y^;dV+=XuLS!SejacU`;Qx1{|8k6 zpNIb6?BvDv7=q@D*rqt`Q4+O%*5iF))6;~_HFOgd%{k`sQRpusanG2jWzaWF_ckI| zs3j}ZRW`b?2jv;zr)4O|PcSQ*&2D?Ve_j=K7as{RC%+ zO;7lSuV;vA@0wWlkNq}G=Gy|UMTZ{bwwhYHD6sf!?X2N>=Qs=1=zGeD+u$IdV}aaj zfw$coH$Eu9>48EB*N3rA)DKS7B8=d_)^gOblF9th`eA7AO0^dfdGohXr!F@;enGj; z)z5AhN3ln6gH`sp$6zP0^tUB2qoV&_4_AF14sz*OhyRQliX6wt* zZWnGyP>qLlR&Q)|PYJxOUI?@PSZVksc9RN+7%%-*@v)+OVQbMv-UYnWu68m%&kc{V zI_vGB>1shmAV6$!k-vSHTA_cm!S3n!jjK}x-8dqs=?J`a~*LgidIHX!WrU7gC7217dQ-0or6v(6;K zy||qdewuU2LocbX_DuKgBsdD}Shl4!b^RUZ zl1_J%BE}IgFGsV)xVo5SXJ8@+MOysz-JcZ8r`hb+xj8oSIkp`WwRNqI8S!}fj<4@a z)61YL8}yBpM|2yW?SB=KaWB>w&S`!dF<1S~^1|}fs~&Y79j>i`rtA)}kZwZm5;s1l z$#dO&gI<700X?tb%Oh&8MO?c@@7dM@wFbEK6-=b{j1&1?aUc9Z0`NIf@!Q;c&y0f% zm;`+Omchk8vVOMn@50;B-~VWi;Eq<|v61+8{l@^-uNa)D?PUq`8<^yYi(^U_2FOj( ze=nlkfo-cd>iiDz1H^VKck~kP4OY=_V^5YTXN{-NiQUzXRc0a&8M$ zV%uzY(|22mkbktzt$ff?q;gMSMaU)R%i4ItVsF6Udj8Me`oFMgy~ba(x$0;BLBD>) zLU=}4dl2IMSwC)}SgB*|ivQQ9I_r?CrUioI_jN<_oe+iqi~)kM@4V>lL}sQlM-KZ3 z9e}QHTbpu4p#rpBE0>5C*&R9GZU|rD^6B2L*~;Q0HK>W``0lk;T-oCBa~OJyJtgnH zLr^$j>(vj>Q~MoCnc{$FRE&m9s-9z#)PgfZK9x$b`rxL06XK6v7K3#{%u=7b{d?xx!-U8FLO1iMktV)LlPU``+p3N<&`S@2Uc&R~ zL>5U!9aR{kQVjAXCA4B2TqYM8{T@Tx5g`)a`7327>|fWKIt2Aj#p@i4kW=jL`8e^O z#1-(qtgzj9?x}kaz}VeQpt$c8%XBDCJ&_^p-|Om2&lRae>wl~T=Mi(4gOw=XclX!l zna595y+POLO;h_*;WtDz9#?Zv3?tqbm*+@HXgw=dJdIsk@%D&ECiWCCyY%a&_f<|{ zW~mua!`55KXErGVX%X9eSg&}8qfd%~w3>~H^YKamvCH&4aC=97j-WPWay}#LPe&}6 zpMP*x(0=}@^m|TFfwnX&h4t;-TQ}1Bm>{R`;&!1vZY2kk6N$(#9g@}g?($dqgp}ba z$Vzt*fSWO;0Wdd?i*l6Pfc-e@+XmwG894g_M=8DMn;4SCC>o}M+%Y|t(@ zD&v*CWBzHEaP7@~;!QDE1A1Wz2F2Xy&A8iI6(|ThKMVNGBxh=0`>FR6n$Kw%@Pp_x(r4K= z=mxK1IqiI@dKu+Ehg~o)AEq!NSEivEOO=4BsBT{hg!R}Jb8J*;X1az)Sh&b*IrQk_ zo(FLV^RXSXTs6^t^;R5qNK@$gF35rDy^ChGh^lZ+7cJ-E(WSDb!h&c^$1a4w$GRkoj=LA{U2JYkA8-=YIxmsw?ADt#LKx{4)aHwo z%mn%VbGR-AT9x>i=CfSHyiNMeP|#%$d3Xfmo9DyRD zJn4dQBu#xj^4Elste>-9B(2~U);84yWJdATZU(7u?y`>th_;&H^c&N^@chUO)3*8HXFfu1@Yxf_|-SNKf(s#?Yq73+2V=e zzw>=~iYY^sTGZ4-IkX;Y^dXz0$G=~1kkZ=DcAS+!&5K}k^PVvrzkFcM``5+4W9NF@ zn#H?NShK`OkPZRIbUY=a;;+xCPz@%{9p!kf?wtrk&OLn2k95ok@ zus1fQ{YUS|V1FBCjz9mBpKC}wRha96r98)4_+xy7Ikq#y2%MwlLEU+<3((mJ`UhLr z*qjy$b;IajzYS~Y%38h?=XEJ$6qWQCOnvd5;R8zf4(#vZVIGeg>}_RXs$zj|V)G^S z)r|hfilo!i;@6jDZDiy8kj|b`f4;}!%}nyljjzc*Ki*!1Rc_P40eZ4_NyC5d1|KL} z6+U)-rMOA?ZZCBn>?L_-0RaKVw%fWE{mXEE`dpKR8S0#B#)tDO|9vjR^c-)axY*H2 zFYP#izt`%eCMvD@n{+PG+SMA@*1pro6V$7R+E4twJ#l))_CIacL2&+`XZQ9xvUIk+ zpDT#%1@Im@60woUxOKwr8)Jw-bqe3^1~a}z?oBmqkB~KAIQ*FTM0Vcs4-#&%y1cVW z*goqdgK-`{zWG{v=QJm}WdHQL_AXwetfuaTvkX5`(>o|O;D^HQHwC!mc!J-tvQ>7c zZF(eup0Ri6M9by>^kKKN&sTn5C~h@VlemIY!Vn}_eELXqa!HRFZY%ZG{hy0 z>oD$iql{>Z0vNC8efTx!UaKiRnJoCC`0m~sb01HLbOO-o4Heu;%-GnXj5|$lP#iD-MxVhB+HO4hGyWxT6&)Buu}>XxC@$TaiWc4 zVf15n0gP7UwyDUGQfW>6CEi0t$BCNO?`(*gtvZj7l@@+pILF&8{E}0gx4D~nw%=NK zSn>?VtSW0xc}tgSMnybin`w!PC{<#9n5|OHOw!#TWzMUoanKM;L(CbEWH;C+ulL$?R4ydAKyN_FY)qY7=PI*D`xS! z-?EAf5N4`7smGXO+Ja~!6^9AGU>kqb3c8cOw}Av^trz>kNz+nIT+xo$kdj=W(Os{~ z?5QyVV>6o;R$xNJ($lrq&&sx6cg@-9yq8s9-%crZws^wC{rS~%q6V_hlbL7xE!?C% z8xuKou7-iRsqYH(W(8Qi5@*9P;*=Hdggpyw9l^kAx3*o=ao`=^JbyrbA@x_N_!P4J zJR$g_T%AJuXoNY2K$AjpQHx3v;K#K!s_`*#K+u2(=mVry_Pll`_*GNQ^ljbQ)AEWL2_ zl#cUox>I$Z%S_bId?f}iaxqRlpDIDscNj3mIJsnJO(J)c*DG_IXTATO7U6o$8L@_I zR*w3-Z*#_#+nQx-+Rpl?qfXYTh4ZM%HYKUY;FWC+MHJFU5eDzqogx!z$<%da75Rjw zcLi#Cn1laT_PuUYFTsGFPB{4{gjdT#ws#7QlOOQZmE^BlI2zbwJe?EP@F1PFdfPtF z-!F06_rM$t`#$t##Qc2FE#o(j3xHsk_v{$E=~bhI2hw2`DoqU|%E^aAjxarO@wAZk zCf%+y=48z6n46A~Q%|Q%N$g@QJF6$k%*r%qgibym5b#^Yg4)TS@Bk}WOZpq9$|(pv zKtcQ>_6}g@mhUVCyS>#gDU!*n1{j2}QBoFBNY@3X;3+j}?-h?%YUr%bM0YfC8$*(=+0ZM!7~HDIE2S&Z_jEF=rH&BXdgYOL zw3li;NhT!Ox*+sG$PtJdPg|s;|CMsyt;Un9D9TCFIZ>cA?}H^}agE2_mRnWEq$8g- zq$fDL=K^UJlADdI$3nC78+?NjV$ zZ&=1kbnws`j=|ia+&Qu}g>nG;%ki)TR#S&DDzDC$X2fmm*uZfKX;J*g`v##uNZ55R zrQLd*$mEt#8GYOwnw)m|mhAYnD~qy5MPZ!8ww;$^hjIFlE=)0p9|VYWYGPpq9z9v1 zo85*}72VqaiIvjST$gOxeVDmCO5s}ziY`YhW!+eHZ5vxy@|*BHKrFE4X9NyKR#M>ptDNJoJ6&UT@L%h)4%GR z6mW{nI@~i5@UH*8{DM15!A^QlXn+f)a=rh+0hh^hwnbTkmTYDOOAiSx`LFRq8Tz@$ zlg;m%o$}AVjjUX~o2%M6ML67h)|*3`@}Vt@E#;9LFe`y3h?^Px#ufs3wA-uT3{*W;R_K4(yg?U@x$4CnG z?Me^4Cuoj@qq{p#$uToN9MgE*4j?erX|BmdMsd$ONS7a9+qauR$F!~bMTq#CjXPbW zRcE+Ixm9N}Tp)!o)vaMI(^TZ5)yVOMc`XTUV$~3xw*K6aalmZNsO)mnhDAf*pyq30 z=Cc3^))hzDL6`4OUtUTdz$!(1t)+96Qk7pJQ&{HMw-(QjiN7zDGb;HwP|08$Ncp9K zve>GekhN?u2WG7VwK|XYL`2-k*H*~`aTjb7ACZVw*DQqP8TWx?OnrfIiG>dyz7P<> z{tiN1xwxN;RH*VK?b_z4kL5^Xu+pdy9;Yyt&d5Xtn^gK=O|nu%4}A>zV4b$B?Bv_F zw17{xtG!sS?RbgTN^uAqRQ_8DrfVfm)47d5+;-bbu3ROYds7(B^2FL&R-!s{F!C~Z zva8`aD%UNkhp#kiGVkY4cW54&8xVA3B4Bm(@tK>McywK0r`0Z_wwnqOTX3IxEI(lG z9%!mBDINBX5xSwkdf?b|F1?`JKI~%Z51qcIH67j;-uDu3-A~aWZWeT%_yeZc`>{An zjMvH+)k%MeO3T1(v3|Day?*2=Bq~B2C^VydzVR3|{St4t;cOF*T^|(J2bs%aNrO8a zId_UZwr!F(2dO71xdI57G+{57K0OrnshIott{ED-UVRK;0f$VatgV^v9YnkkbvaXH zDgQKO^z5uezxzfyKKq*Wu^8$&=~b;+0V!N=;?WuI6YvxDnQUM_cY?H7nk27P-dd0A zfy)`yXV|wSu7ZAu^!$X2i#EQpPP-lYcr%zlV##DmsbQF`pCz;yUtGIrH|z8ez|A?d$HI$nJNFe7K56H#?3G*)_57Gbqp^2@^syj8nVw^RI+WAFSt{sPV(|jAJX2=`Pv+ zISbXcsE~TKs=0DI4WW8bkC+H3Onr5|IqH}30Jq2_+}}X zrUn0Gzo5z`_t~+0`e||zHF@kVx&-#)G$u*+MQZ9hF}Annv75AA!JbpP4MnQH@u?-k zVoljKl26^8Z_mOR6XdT!9v0?JQY-sh*K}qRJKdS3Gu0F2is;Bw8Ym5)(6!CO3*L0skPNa=m-(Y zXp};{*gEXu{6ryb;GYj+?Cs%Xcedw2!mfth9OWH77sS%VmSVN*_c=NooDq zKER6A#Q!q3+sPu`tvB{{u6Xc>s1{4?V`J+}fNA)QUk?H?us2WRXy**XXh7fP;6# ze?`L%9&BxX3UJA0y~y|WU_KDTQ!BHh^+_jwJ5_G1oPT zH0l~U9{80U%KklXFGVZBqEPkZEB~}UyO;>vmkJ7&dxr8EGy9A=vCFM5W`PvOpH;Xx zQ|6dEMKadWL#accI08*yX*{*UWl}&?K)( zb{0RoZ$+8DYkTa4-Cl}gtT=PKh#Z39ZQe;LX#~X*LsvCuFLK+*)IyN?u&>x%aP!M9 zm+Rp`6<_aJ~ya~Tk_tV+L7ec8HI7kTy!ar`Ev*X zpq5T@?nSl(N3()T&}3_ygo;pMOQ$$uNHNsb_gpjWP+95c*Q=Q|Hx_%k;)^I&G2O&+ z=;<&*+@_Ecxx!Az+Vt~ag~}0mDk$%Bj-r`^NP$6{PNk3OUH|LXutNdUjQY^(SBC)K zd%7&~i@w5J4+AZt*T6^^UFCvcMmOYU_oP65rM2~s+)6Hz4t~A5Ou6-J4Ig>3nwvdY z{Gta5TaUPpc%@C$ytynEpeOr=vn#b6K_R!sF!y-==){~!T<5#Gec3G`Dxw0W&T80j zE?a%E@zNeh{tiH@!#ji^q=AoEq4WnOuYhza=~q%M9&j=2acw)Ngj$;qI$4+9-GE5e zbXK)$%eBHxN#JQ|`s$}GGg73>;X|D2Pf9L*-Zm(>#VUG(H5Tm8-n^+bmqAp`o#;z)&>jj0UVlG(d82I=K2%YU zSt&jw`XYuo;^f-y4g*+;SNjwlNfG8iVZ`>4L9Io zDZ8O*cqqBiX8iz#eG}TuB)wVL?C(o{VV+J-O}BJ;JS#?*mo}juQ4u!vW4BEC&EQEZx3nmQmsU8SXg{UTbo_t`VP;yaqwmROuh;3L0NVoFkJ-&2n^i$Annx#Aj zfT`X(D8vlvh%EO(EyKV-v*f0^aXsyxjv(RpQ&HCH-y>%;x_Qn71c-?3fx@4BJqhT~ zB+Wo^wI&)*VE(@JH<-Ch9YjZBHk-?&l-q$qciT5c{aB&Pi4yvdHl1u*y@C=&N>ryF zPW{9I1R-{_nDBnNi2s0GpEOz$yuKw}13DYBjOo7gr1QYH(}!@Wq*sMM4Cj`P8mjR; zMql=_I1qAcrlo?>r}Mb_XO#8PhmuTVW&uBCGC2L8_}C=_-M<)X-ztq!JFxp!_RRKx zFGcc3VUaO``-YIkF84G*G(Nu((RU9 zRVt~9ng3*03smq|RXa9SOYz3=e_8;;+^P01<5qwtO(1oDBzxQfF={?vN$ko31N>i$ z%~rN5&iy>hH$y`@-$^RLNs`07P$@dr+#nAs!RRvB6V~EmcdvNanOC<}Z+>wQ;hVS+ zA}Lr6n3j@|7P+54x0C$e#`*m>(vANYT|bcI4WDF3A_9;6PfT;^^ky=K{vk1DOi^Bp zH~iNv?TyI*B<)9H^?}RY`m{+=23CZfEEPV5d4pG6T)04B{;DJuP2#V5+7*eIj|?xZ zXHwh4MD4wXzl?XALO4O37mUB`eR$eFviN~t3GFOm+DnwD+I7{dljZc!pdf4E@RI_| z*O)tntRkFrU*IP3FaPj`;|utg%(^A$-*IsV?T@PR?32PZhse&%idlg9VYaM=`-Aow zWfs1t|At}}`y6ri-|(VGABz3|*fnyk7Q0&#sSMlc(WbAR4pI4cRQ;s#%+YcM_^(J{ z_(9Bze}^^fh0ygE*4vMmIdnK#3Ncj?x=Y3H^>>(7?+jHlDASHRI^znoDZZwawc3)z z_x{W^1}ANY#3dF`9&v4fRXNnXF_5}{SzSWO;7 z{I>ZZUh7D(Tb^|R|Gc6zwlRoDlp}1-BD^tpC6VgiYx!w#h`DVE36~9J?q8_kX0pV} zKv@C`WYD|aHWrli^YsP9#^c*1gX8|9baw>T@ zM&=tYH}_ILaYse03QTU;vaYK3j#|E9ojq3~5yF00=de~2M(OaRqmmLj>zX@?_Tx;E z;j1Q3Z|SoFnBzXiAh=vcZIZ(in~WIPjk67jxO?8;#zVh-Rg#pl>vXDGcv~qllPW#d zavGHtzfo)mg7hSmB+IPdZ|aYY5X{hW6rcY*(|oPxMhFOp1tc+pRY;^i))n`#M3$2FH?4hyI{1z3XY}qY7ev58Ry0_OMD`yCuvGi!cLq zs?4jPg7mV&ivlGFM{lfxF2|8kx@;Ohqz^EUFF%>%+Chd8JjU z^`GK@Zb{EY5cFYWYFP9`UYi*;X#xt(vsDiOZY9TW{}@pTKU1lYHA~t`VAAHZSIJ5I z;%a>aUhujz5}LjcwOt7RJcfpNFZI=FJ!UMKqfFDZf@cGqOS-A+hj5Yz4lD zr?#io#0`;zs9abmj{IrdM7I69J|0}Tyi2|DP7Zd$-4MTmV$v6o=dZoEK94j4?@Nu&mkO}y#KSZ9WXK+7xYV)1h5QAw1d7sIv7P7d5n!8K zkrC4_+ip6Sv3k$@m3BdvwD|xee05Rsdzj-JG@bbiok}dcAz90v;4LY*Oe}77=mNhP z(Y&}3r8z+gf8Qv|!_Kn@EAc*dggQP&guW#j?hx2Qv^cn zJmPnZGDj?76`(tq_=%1HEAKmGp`dPQdacnJ+Zk4AsV|-Z_Fj@p&H8QK+6@3Z#@Sgn z!)O}eHyT{*0sevkBFHX}b4}}-v!dyX8FF8)*C4IBN}aS@&_f~mi%Ei~6^J)IN`Vy@;XH-E8vaxe>< zep`P%@AD#IQ_Iz2?Q>{5*nsscXMKb2)75ei_k5R;ud7CEBDP4;DOx$hXd5j$=dojV zVc`uZC7&hoXJk>Kh!N&Ub?-aok%?_}y!8?eo0tTKC7b`ddS~J+dKs z??Al)2kKJC*%j>iYPCrxF!s|4+Tt81DyV@VW~DB8dZ{q{=Lor4+#9FS6S?*A2WL6R zhOfTbWlbiez1}{83Cc1wg46ey9M`&=okljz3N$oE=y9#iDpZUn*&Xb(DXDUF>|?qe zWvnZNrt)p#GaQcgZp(V)Q%nF|m!-3sUUsoa5|?(4YRO;>VjF*Ln4JPQl%2~7mpg}> zW}o3H0QkH39G~FT2=G>4+p&6YMvG|Iai4D7*2;aKLkzJ=AI+KZ^=e6I%lA7Gn7B+S z!>vPomy13#A%tTOz)WABZNv8@eLfpfn(982>UX=bTHmzP&)J{!*^hSx{|CYt7o!9= zU>1blw^nWF#v>wA&7C{(HVW%*7ZTSy{qvZqFCc`_ABT_Av@-Y&;RG^z?M>hLO*flO zzRAep;!Ibs+cmyflOQjx)2_2@(>CxrX-KQi&aIgjPZGhizJ?*moGbTfRudl~z9EP0 z{#wdTDeV?dQx(0-JR>aVSLSIvl=NkNvVycA3$|0h%yAIw(us90*A07({oAu3)ngCBPx)xnIBk8NN-@{q+prxv z{SX%Z+}A%DV0R+y0<#p10bGA@#?(q;Mx?FjNuesT;X0o7wy9ym3u_Wr|D&QECh6fL z{Y6>TX{%u&b_4cvlqw?OrzJBoX4-CGNeE%@`@B@r4 z0za1><|gvq9dqi$?&|!~0CK>}H=yUsbX{TuLnLTcx7^p-+{#tJbz$ihD5Jtb2P}`*A!_ zxw}Vb4<>VoV2i0;^0XT))CO=QT(~EfBZB+Ikb!Py1sxvTLdQf1B9J62SfG=@muWXg zzMZ#YDNerGnmNk?UgPFtE<$ABG`vn6%**by)WWDGr-=X%8(zfr zS-VC|b-#wmn`F?o#trNvzm9be2d_2{g>~bSkB$>DS2IZ-S^M~;;uU?4bU?E>FFT`WR z{s(*S8P(MG?*G~wZV`~)6jaz$0i`GbMT!MQq!)>R5Ct(pL?8)?4e20C5y3=YZ(1k< z(g`TNM+8C#DIrMckc0pU|A~6e@BIGf-aW>>_szZIo-ur(jy1B@oNKOGp6~N~zU=qWA^>if2IY&MG;<(@U+AIWX^^+He^{T6B-exMmB@uDKT}P4kGhHgkhA zEap4KOGAJXDvy$vWZ0YK5d@31nT?$Ev*YiYn??|r|k;YG9$27SrU>7?Ew2|3QI z4O*e7>U2B*(0sQ630xk=YUkD`d4YmfG`jIQ%qwPjJ@uH%hrph{L9$iH?lDmlcgF_0 zL;Ppjz!j%QkY+dM=CJn21x~x>sj7E``*M{}OYUu)DTW2_Ow4+R$Sa;Dwl!(+Goxbf zu^GJC68&&r=$P9%8o=Y3_%B%rS^ca1fw5iv3{+?ghMm+>ttKG}aVO>Ib2ikgJ; zkU90BAvyTI{K?#qH3r$qkpU)U|Za{1H))&+}pP=_I;ZHDPpwQ-bNwXq?CJQ7Z)~X<3^=G zrA#L#w=_)E5%LJyE_$aArly3M;$%kLH(KTtTPs_QgUO3XiiR7q6tM>xIyEgVU3=>m zt<9Doy6!K3^gC>I9N_WrF*NT+6B0+>dmZof`;M-6Y%$=1xQEIP*!F^{=fuO+$#vO)>?W2$p>jF4Wgg{0BK5aekX!7$0ue*Qu(HUe5hHNi zfn4=()O(L32xVZ_%H~<$1=Ps%Ax8x;)e?Kd!AIM0rf;8-V7+Xmwb68Bes+L(j|(JX z*M4YY*+`$-bt9briJYuNqt;#Ut20mv^ zGI3uUfmDc=E?~x2a!c}vKOzmoLi*eX17@bb^Z8=u8!hUpzlZ23Xy;IfearZ4e>-+) zem?A!>PvZqLEb6qXCro2wewwmt|5&x^D$qkE_bwydLW+JNW*%z`~2Hm9$ULk&7N8R z)cGGQZSt5f`+)Wc_N3jn)tCZ0PoCS8+jzIf((x-MAL0wmY&u)a=vnji`Tdc2S1I#0wqF+`ZkXfTt-HKU0v zs?X@OQ&LGXH!7=*^av{6Kykb~!q-~D-?yS=yXIY$58^4xaNSySO`yVZ|#;mG`><9=rsJqo7MM-0!Pz+n!TLVq z`Bm0xH-tM~`uf=Zrl69NIAZ9z=As=)`#6tL7t{zQIC~eIv{5l_bqQZL1f)ql$HUB~ zBJw3TWL7xPiuDREM?$v~N<*!u7SZy3TCas%Dk7#mK9`&v7_)JXT|7P$Px9mHN&~fu zktn;o9;z2S@eo=$E?jj03VA7+2b*vP`!sM8TL0M7u~w}GI$3s8SP>B^i}PN;jn8ue$>iVbM1HutkfKrZFR z3&s$eMYNxXGo=;!Cch33kej>lX^U86`>s)m48CilG}=*EDgxf7lB~IoiU$=07NGNMNcp$gD@Rt&PO*B_#uvz7s|NmiMv!#)0Hw!nJ!9R>+p^i$Un`Ejks6nPx=$7o8+D^m zx(6xt)a72(9N90cPHl~{R;`8zjiQ%>WL^Bnh6^e7lt(t(?(V+58>+0aBrs-*PHK!o zB>#=~Uyd#mh>q)%l2Ok7qgq2D{Bq31>V|%)+Z$#jV^@;=G8u#lu3f$)-xpoAs@alR z+W54fP@rf*-@ua9arfE$-r!7#pW-L-h3Je83y%&2a(`6wYVRjpfgD^>KrX3cqiEy2 z?_iXL`$f)oO!rcJ_O1&qUdeM4MN`y}4tn%}Im?L@7=X6+H@(HbHppkx-6>ZtH{AnJ zhfCYrOqC-VtA=##+@&XNPgbQ*UgmTWWXR_3(8HBIjYyhufloLWcDO^VxnVM*;%{r; zGm-*Cb!yQ{?E(6q%|CE*l~l13WKnain*1VcQYfd{E;K-^YNpMAySuYrn%~NwMCRDV zc;nqzx4!FSuBjqj?c*%iH`zE1D^~b)u1x-5FW95`zKP*d()5{4?HCOzR*U3iH9 zbaq@2mmA6=Z4Oe7vD)){59038$oejm7FCn!5PvS(xCa-U*ejUB$@!2}G}Y&M4Qu!r zPFI|M75+W&a{QVEiHu*Y_((pPwd9~=JbS;paM}<70y_9J5)W`fb-n=>$@l<+Ifm6k47z;TZJw&z8Azg^zfA-rXk6v`t+A~!k zODILUOGd254{gb^j>;qBc@U_3o%hfB1g_g7cj_x?>oK=Nl#ahty z#ot|w#7jKCqqzBpmmjF~2^Bl<-|k|&UeG^EslfJWcxT)4X&K!VmCMhSuu&#>#Ub9O zZ%9oy_aK)wQ=q0g%*(Tl{%$|Gq~?_~rTKbRTGf6nUl;f!_-<(h^HC;_Dtay4w}D@1 z*N;dLj7Si8*ZiWhcx{py_95JWt9PDY>_Jn!gO?1qIdrKH>3!?HT0%3q#5{H}Z#aM0 zwrTsLF1ZOKX*yj!^?ar25H0miKL1RyI{%4jV&P<}&X8iBAUHmE^!|u_V|JoG1@Ylb zfwyeyes7nMB(Tt~{l&0sx?#vEZ>Y*t_h5azfvqrZ`DAGu_`KV66KIuwXrrtUX-xX8 z&CCeIaKo_2jvkh^y)Q_TWIbNJ-q~zZwBS#P04HIiabs)Ta{_B&sw_GeKD%Gf#c=G7 zGKr*R=gJVuX%x;|$l?yo90WZIHn2<&%8R9Qk#>+ywc_@R(y-GI`01*9l$~nimP|0K z$QZKWG;Q@1sI+(3M7ke5z5X_N?G~7mOumk)o$3d{p>{Zrpi94w86HWgfLF)J-w{dN(n~I=ENX8++!^60AD~W=_Mx zQC8!vsmY$piz0c`PmUlyXtjA#8^z33fC^yl(ZNnp`D)rnUK5)gh~?&*4Yj42sv9%I z7$}#n9*`MO-`U_^YwH&{`5{EwX!87Qrq^pz$*!}VoCNlRTuDM3XM+bs?}Pw}4)|%w3=?NQ_l&b2_NX;HJOjdBYUp0) zp>zxSi+RgR)qEJtwz5J)f~)&)brkf>V`+f+a!RBRDz<0ES%9c-dxy3JU05Xrnr**& z$^Ia|i`cC>RR98dV0S*cWbQP23*--Mvj@_^fzNhJKQL3T>y0+sq%J3%$ZQxm2r(AEV1U1NLb2P!tm+d&z_=d|@dK*BB8zn5#2XagON25Rno zzIU=gjRP4}`2W{Bhq|x2%^CZ(oV#^%w$_$^ytHw%vz@0r!8(0&ztY-c>L&f#|LDH| zUR+ZeW4*dlccDRdqkV*RP9lNpW%ga&)#thk)~~P(5~j2w!qG~Dm4mjfw7=Exk>6Nn z14y(`zpbjEc@}PS!SQENP~}1ZN!Q79t61n%yLa!j0JRLy`BB9%^*mR9qPALh$%wNM zII?DkZ5f}{J80K|nV)AvB6qWhBkSueG@-}dZ|0i=DJ|XJpulLN2ERTyuk^DPY5<@H zP(N#sP7${yTif5N`N$uJ5HBjLE1e@i*)9KF7k+|%3q;|VpQ7-i95aPI*8E%2Zt>cg zJ6pr>f5hMa7fIgdSc6ndkMrls4T>aZb^mAWf0EDV^q-$t{}zXt8kafs1{5?wv8#2^ z4QizBi1a@R&OXUsULO-^H`?JCmOBGE@UlQ72EWmSU%jtg#i3U%FLrvIW^75v*j2yZ zarLf(QEhGHeLoO1RI z4E1W$-2uc2E@J+2Pf5?3(=sC|jG!j_XOfT>*odYz8@xMiuGTp324WIO#@201qhbBg zG%*jWtiS>eC?`>jPB3S^z*t^S$t#imsTgOKCPtCoXB+hl^%TuqGJd0r5{HrRauv#x zbJh^BxyLyPrY4oeFlqm+v5%Y_l54$V(i$!gzjd;F|H8Sk!%@lhiArUD9~XKz2%q-{ zAykvUR|fF%?7pz28rMr-Fip6W@Lh69D9Nv@EIb5=Om+X={#Jvn*dpOcb>!qF@WAg)anBG9PHPk;IpS zZJb6;C2=vNwoDoPo1**kT31-`9A^dx^T@sj2FOPi5% zFtAsCC^L8Pl%QhHB?UTlNMK+ePlf_H z`GNw@?Kk>bz3puqP^O-mZI?rze&)9&{*GUJLUtr+aK%5P%4VLMTSgXfEq^>;ZTq4h<`py|ON;c*7U)Fd<>@D@`@ z+KwF~q{H+?8z1?UExC8uPQkA&@&l{Wf2q_M=Wb(h{VvCr9yD0gTwHr_#H`fD-SNoC z&h8w`%{j2Uyn%?Kc3~xWY|Sw*xvgr6OFS$yPKP{gLuSc}|Ifw|*}--!|*zL##PxH~&W_>Vv`-p5$`KC%2NcEO|LWD3e@ zR$tuC{obcRn}`*ftcpNB`DC#iOr75!B{H7;mxE|N{96I~D118mqV-9#^>J}GlP=Gf zDYH$o@yp2e!`|Acp6 zLzN9Z>?5pVs0Y^8wk6?BS6ZzuWT7AYB~hr=oEXfC7m8cG=`x8(R)UQ8p~m=2X{9+% zpKv9ZN5m5BgYHo$dK8rvr(!D!SrWVrD$>8+INW*c>l;C>mjceaN>;VcB{u&+C{7P+ zgzNPn^7&ru$e}%b_5vqJA{UsqKYzfb$m?+)Q)TSmMJ1jc{4xG8i?lLxw~Avfxs+-b z(@CnlXI0x9sc@-KmlT6ba^E^Jo4#uI$JVEgU9c?*=s)6e26h2m#9v|5^4hb}`}v>h&`S=)sNFkf6m7t)~s$1^STuD2Dl}?VI<K+7$Uzn5Z>@!EmP-pmH!wRb z%k6_Q$Z4I+lwZB*f3Usw8Nt&~0Ug7=-b>)5gmtB;8^zl!XL*L49lsg_h71MgYcS`V z1E9$!_PS*b8(Pxd`Zj$I_S?ES1$ANQX*UqC)(`CnNV|W<7&-mu_^JHt9(!-1g(bbd zutVKKR&!k=T#?otrNPC0Bc{&vJG&*7N*9O(VLws3mdx;byW8)}@soPZ(HNscj-#FL zP$q-P5!vT;ZRZqF>)v~y3DOCqDXO~tfi)#l_1D)0WnpmE~ zxO8eCZ7wr?9>#h8#yBkPzCcKZDYXMa@;9N2yZro^BsAs(oQ*)wSo&7bD4wX^@fw{p z9O=_oK|%4XspX_!3;2S1mla5%FbdM@dziKL;=!%;DEw!K2rnz(!_K7V?RV8plbVhj zia&>=*@lt%Tn4F(?;Ne0fn2_*zH`G8&U$Dlh%!c&$fkYHIIo!Y70+`ClF`)uBZ3CU znXeXjNzGiJBz2yAJs|B4X%Wn7o`Mycrv6nu@YVL~unab}nV`7#vKt_Dx+|l+6qCcU zs`)YTYe}$1bV^KikvU!0_>7l!0K_dJL!@NPaijfq0sNsx=--$P;)m@1MC0 z@$$}ln9CiQ>sqx?`k=6djd)=Rg>|bv8!0!s)a~!%2sAnWF=HK#n~|h~C4gxz62zL3 zd+@q2a^EpNddpnvP`fw8kQS_Jw0xCGiqal!Eo17053Yx?{M>;i^yQ7&Zub+XBE)*$ z8(4bOd-ay!X#2FaZoU`xxf?HVGv})Adg@KcRnhSNPOtm)djw*oP>t8OY-EL^quDfcUmE*Ecsc>+6AfXX4MpqX zq_H}kbMpB>ug!zI-_oDSoopVb3o||$d3=5Qq6r@zUB`BV$yd%Dn|ALD^Y(G+YLE{N z(4k7xs{IY2V-w1YE6W183q*)}QXmM2cUe+2r&B^xRM}ai`Kbmg!L2GJ>JL_*Uyl4( z+5O+0#k8R%Q&V*INH5;z2NCynFay&}6H@G3j&vH1eROgfd;@j~RAaF@o~<5x4|1t&ct+=i^^Ypl6w>7LoY;(F@b7jU8epsrW&Nl^;BwSAEot0DZI4P%gs-);lT%`^w zH9Q5`z7MtLy(b$sZvuaA7XwshiNMo>#5=Y4TWv87w3L~9kqh22*D49MclYF4`&b$J zmuFf}1#0(^SE6$1mmyGFq zU)ouk(||*IwYu5Bq3x`o(;{`XL6Npb!>J#MGz`4;EkxJqJIHmH@TFAP>G!#*No2?i z1PPqyuQhsqvnP4b6gHSylWWEP!!T1s#P>$7*gxpXG6o zUV-x0WL#@Ap`>gr-26Q*(x2AO5^$zVWq0G(^KgEGISN~&g+Z3aH+uBE`v!A}%c5{e zv=FQbWn*i71s6lpp^srehM?sYOWcZ!q_8dVvrWncK;~R0R#rSN>J?RUZ6{gyGM$q4 z*76uYfVlF|xE$nDr zgTWqO^AcT%J3MmD#b*+1ZUso1Mxgl=!Z!zd=~qp~h4_{c{~;O`G7~ObFiE`vwr~f7 zOpEw$Ll$5czVc1S)`Xv>+%4~x$SrWO5a)zhV(VJHJa5JRxK++&HE6zM_|g0AlD+|_ zN2+-R?RrG8(ug)NkgMiv|7sTUDz}y&fFAbU_g9$+zCsHXPh#l5NA(WiO!G~c{W{O3 zX%G0j(g?u;O8K%X!wD;vccA7(Bh6_P$(BpUGUONpO)7gM2{lz1q}7ayLaJ7@)4Fp? zltjO*Pp{Lnkgnr-t9NsJ402?*1^_Nza4l56PjW+{pH~JlkBrqdVCSDh1R`#Q+R)8g zTa}67s6J6OG{!Ze!S0!LiP}-aKHT_AvuP8P7h}r~-_)j~?PlBeuf|${*NkMpw^7Pz zQ6;+jjJhR+Y&1TfY21El<2v(4ykh9tLW!{gTuE<@|J9<~JN<#)b># zM=5X!UqkTc-SCv26X&ss_&!Tx!6S*Bh1SB4)NAr^W|R=ZBIme(#G)e!8u6A?UUjdq zkb8Yp6Q zdC#ftV;jias$U;u-y zynyQl7hakivS>6=uvb#KPfhrik9@MutqTe(DiZu#&3wZx?ie`_K`G7zi*AT435g^K zr9pJOx&r+jBHqrI;p2Q*XYtqOuYF2cH>;V1M-{ZH_1{OU1Q9=lAjB$(kMqiBtli3zqk z0a_;qTIb%jeqTi|9kLjv=+GfC>bca{)ZAER7h|%%Qsk5*(-IRG(S2|wn!J8ywsXU6Y3;TAHdIE9M9l(f>@GGPK*J-%ebaELt6PQoH5 z5n%F?;(BI;)??PPN&N#Uzt*k?mJsRIMrBA(F9OpTmLr25E+DtrJpoyOcEJP;;}63k z$|IKKWLOp4<4}sIjq)o3(eql z((c(TMM+%YoaJQaBI;b;!_kcwpV{(`%CyfuI zenP{!!mo98gi!QY7lX!-a}qz7ba3dlgcK}YWr0lP12Ie?1=_4GkKjsH=5mc{oTDkfIt-wUZoFAlpA$HrXuk+GNJ+w$uhDH31L>NV=6TG{8ZO!t`Hcih(b-e43Z=i1CmvitNS=0q6ZsuFJivWf8|~6dTOLU83U*6h`u; zMa$Q3huX|8!tRMh8ynbk3!}{5qH~>U1KDfw-o>iZpf>YsgM4d)JhUYk)_xhCgNmhf zz)SX%*AY+7})CID?uWK7AjB|<`A`UYd=#zWD$Nm|_B)4Q? zKuiwf--UGkC<1~8iM|omIG?u577tpGoX%=?W?dFT*0De2hhxa@?C0Y_+$38NF5-yu za4oNbD|d$916O)M&dN^iDt3t)H*Z}6af8l*&QQE`>DOkhi(4e0DRdx3>n){5x&-P` zC*8Ijr2C1icPa&!Sm0vl9DHce=g5J&|CB2s1f0TWrZH23|3ED*Xd$+`Bsj5qBxiLtw^BIGebi5nvcYt`o?U=)q_M>TxU~Xy`-p==9OktO2L(ICv z>(TaGe=_x&E^>-4`yWxj`;E<(n_6U(gFkb(=9 zXFx|N@|;PiSK{Lk8#0lGLW{yl-YI@Gg8H&7SMn;d`?l6ic`AW-6zpxI4A{fFdXv&~C6hS@qC$ z{UJA#5T%)O<{iSh&jG7x2PS}}0P=PV_e|SzWc@U2Zr{&Bc?DMrm=1$A?LiD%qHTN8 z0)4>l%~F*RjCH3j4C3hmHm(U4k#D1*ZiZf*G0EF}}by7+a$ASY~_&YT~Fy9LJ?6BYdtH94hf4X3g)yX>H+g$%71)0=B zpGva2_ig*fi}W^KU+l3=9dX$!^zq*^+S##L?Tt}Q@Dme+MQTCgl-mHU_130Qg>s3FB~yusQ#G<+l^psjQipgB3t zy$4Q8aze3-%F$cZqU)Ub&B{QFpMN#OPMhfbyBb-=WL9#1tC7m+(dEeY+d?lL|Mlnb zVC7Ow)ynTHHYclCBiPk-?B;x2rtB6Bk&b1zaYHxPdUQ9&lsQg*;x#!Be)0`V%OV`* z-vT7W_Kz{MlAIds>c{Dy_{453eL3>q70asidCvTnWC_@e|FVTG1`NO71j=pOep$i( z`se>Q`SAbIl>ceT@1FR-X$&y6RJ;k8oeQ)o4owo$At`8^PIW);S;Aq<~KR>*uiya-6d&BUDoyE z`kAfpwQE)PeAvGq@#7y(qkLZzta(r04uh^2LQz4+yrI>YjRx#S%}?N{!glFe&1To| z_tVj*G^B*4J#=FRcsJzC;e+dKplo$KZ6vta5B2-kZY#;!@W5CTAMcvcKdwU8R)1)6>c0!{>ZRb9B!B5bR`z1H{7zZrH)p+2yn~xu;xARnGi@4j5|B>%ufBwyTAas~zk`yPeU~suj)te_6 zKiWQ{{kissnQrcr6C~~U^_x%p?5`-;&t_`}%Pp!~ybZO_y-m>&K5_TnhON_Uu8&5W zong#p>rZFE-U_H{!sLrI+2+f>J@y1$$eWSIOn1i$@82d6A0=z6`7TX_Fs>Z$vb(PI zsp468eQSp=9aGo4DPAsJ)xYKt1ReEGLcX+?9r-pQ&vnd-l$8@`fI{k*`abWPKj-ZB z8@#)12Gm^a;^jKw#Il2@>}XQ36yP!n;vC!PTE(s&TRellfbor97h1A^myW-pXrxp>TF8AR^Z|P2(HCQEkDUo5=`&u~yP@iw zP?crY|IXaUw<+YE!W5o)@hAJ%cn7k;Ge#v8OH-~l!o~q)MF-K&`>ydusbVrjaV)zr zn;QNVb=4vrV-vKlg3o)|6}JIDnu!;2R#y9V9a1yjiC?qXWJv#dsLwHeh`dw!fjEo6 z@ID%!f@kmlDb$W4ewV|L4U0)&#sg`;Qz2 zu@{$1JoVCF{g4o!wlF;TdeQK6ka|G5?RTb<^p7!J2 z=7`ifbTwN9MLo@^zbD{LU zT0%IJy+Kp_)qyko_rv3Tfnmh>xR1H)5l*c2Rc&65w&1Irv#v@Z7CUV1&q-U>y!|mm{bT>rQ4ON1jt+nKY6f>U z)crT5yM#B_^wXX`-pY-`bJf>F$w{2_!(F`ciHgS@)$N>5x-zVo^M%9qtbt_SgD4l~ z_wk#B8LEQsTqELjT1fr*_L0EOY39sOd7!Ul1;3Te#=GJdEnD^?Nk-ba-R)cVFIovI zT6yt#A;CcyA1bhPqHw_Ap$#qk#;<;VLRi@9cg4|`x_>S2G*>?~#R@+zC|NYT z60Gj>C9lrm*r>v#SE}N& zdIO9IZG&9VzUr?Vx<)8lfrVPP_}OcpyiZqaruT~>`sDd$W5UM}owVQh^Urn&aQ__* zrG1!N{c?-E2EEF*x>LL><$KVphcBbHVDFJqjxxAHx?wMmy}~fB zw6`L(YV2+JVUIrfOBq<+MngpoTR+^>o9MFEJ!qU2?~vd;1RK@WAI*kd5Rj8GJ7+u9 zZyBHBu07XxPUF`L*p@qS(Rz)C*nM z%4}AOUbX2@+C3yB^uAkhe}M~XV8U5iC8G?=j(wZHCe9XPHs_pjmDWdfGa(ZkofbJB zrOAw*Ng}gEZzG(~7NybFvUw*Cb*Tnx^7`cm9ZmXE z|J^C^x$zH@ziCh*7;8 zw~CHPXu%8Rm45qdXMX{Mf_PpAM9+?Oo~5a<_9yCaM~#T=ocdf3Q>qBmK5o_>vZhMDOdj$LLu(~?AHEoT&AJJ+C7Z^AGH1o z#h`$ofE%CR{{Feos9kx7^9~>Q7kEFi;wso!1l(l&bcRY}{(6}IWbhje|NUR=v_|&- zUx)02^y1%b-uD0ah!l62YqJ`1Ekcyc8lU^jrPJyC-aq=ZspXze{os-Y`qPx2O`^*=8W7Q5zgR>R_IbX3a9Iy7z zvx!yI8aStI^Go17dw2x;-){plBz=Bn+m3Jsumd%j?tq=d{+X)#@4f(5Rcmp&at|6x zURcZ0&ED^@j==)+ftH468(YT+^v15+ia-4ZR6l1n239c&@vK$8&&Z8n-9^0i!P^%9 zB~`Yd-5X?8^ZgI$=zr(l?<@WeWrJ|UXc}fZO=n}MYTZY7V}`_;2?Rk1PF;h%hdBd= znS|+VY>+lL#yRqz|G{?Pp%iu*ym)RXd)i8uFWYbzfw<9$T{Dp+0lZNqeeummkbW*q zyT?@W#K*$^8;i4W?D|)Kh^L8SwI=d_7~!}uEcpQ1&HLBn?ui$4T0 zqk=jP@qxoH5T5c}aHS_~mhL81a?I&yk`mW_RMa*>ddo@$&!UNQ)cm=AfgwTX+3NJ3 zd3Nlj4AM$kvBfwah4cw_@}F{T`BI*}+m*V?Pd@y?ULkt*v?xSon!5$w`h!|>QpFhF zouPY73g^*d+F??iI+FHSf}dKUL|2(!T)U{hviQoMA`kUL;~Kuz)fK<^hj3aFkCZuP zr=s-s_JEV|&AN69t#G9)>8<7-`;t!0wB`=Ds(9%ymYK5-UXj@&1uY9f)*EQ3*cqd1J!LdI|jl1^@iB^S(#*SPPtD^~L zOGm~T&+1BlW#wLMu=iuL;za}-My_{>CR_beC@?7s>tZnpPub0x-*ZKq3-_mh>mtD| z?rvcc??F57U1iWzPg9~pvGFrIw31nANyB3Cwq-W#t9GN_qm{A~C$F40$IGjH4d}`C z^gRKPg2cXeBf~TC%{#Q812`|)GeQ!@QM9hxQ+KlNchacO(Q&zlCijCv(Bj3$Y%)`!8J6zxP5N@ZF%-ao&gLe*} z3=gdTJFtGrV*AQeYO*?V)9B!L;nrUBPbUjs%2#!lNnD_5H{N0NEec?U-QLE_qd z%Hh7hxbD}Pu>8Dp6L2wSxVHg7sQ;U2K^PjGjB=%meAJo}EKI&H=XWDEdc67Vky6&t z=bWTM-yD1G`sE?XacKUWlSp|~EeOKX`75p{bX9EAeh!dEV;7aRL~KG)BYS+i*Is(R z;s%MU#fsJug62!!@DZ;A^S}Q2D@SBK9>fSWzHsPZHph*gHX8`*_#ol}iz z$0~(P!!mI-xzH!lTV(r)_18YVEkK=OpRSY@>=yjtXu*l~x88wV%^UNeGc@6a zPY~t`2eD^qH8u+9Zkug$H64gswI(dWhLV>#>mH-!ly1RZ7FyN}$IEfZ-e2eTWLw_H z=K^oJ0{p&E{;C(u74QoX@G5+3cgK@p0srpNH70q1Z3aco-aHkIGX71&99a_|gy`_Dy zkc8|@U*SGW;zA|pe-Iri9yRNiU#^rE4JJ868mma$<~FL+t7|1EWX9;W*uUawCVIs; z2QG`2Oek%=9@y%~My-8*J7y?TrPpCM1nH{Xaf&h6h^c+u?~YPIOCBCpv@~(c+?fuS zQ{$BooHC@fC7T8`%bt?mSNb)TWb6CURP@f`{Eo>#)c1-H*(EI!Ug-F%^ZsSagoS>O zZ*lcJ&9x5>tV6TfZ9sB>y3-Ygd!J=KuBUX&-asi0zkLU50boU0PeG+^{QcuPgs>5{ ze3>1ZfZ!|9Tt28=<7M9t?=2d0SCVNdJyQV%!v|vMZ#`9zt-1{ofuj{QR@$&&6k3d| zLySA6eSvdcLF~Bvs$D{}O`ai_)!swc$xB4PFIP*<%8&O_;13H5cU;U-3w3jYRm{c( zR9`XoexxYbUxV~-V^S(9XP4=2ocvRZ0$Xl8VDRKWsj^aN&AVa=ydKBBh0X&<3UJU? zZbdmT3ZhoCR52hNIYSY4N_tpzum$c(5zC5!TB=Au1HXg+Yt)e;4C}Z0=_zk+OWy+F zL)S7K&bqyCLbQL;;(g|$EZ~-R??PjA>l7@$ws6&WuQgk^$>i#`>Yp z`loC+pWkg+5$nzX_F4j&!?Kbj=Gn1RbChbmzdG?EopSrl*AeA(g#3q$LxP*9C0SMu z<|lhHifB6Y(&~Wrf@Pm0pc3dkAlx-#6wMMnOk;Xr{1z1<%8+dnHAb5t8T5BcOj;th z+g99EXywG`7LPrq)0(sGj|@xZ9^oPg(Wqx_el`M3t;?R6{i)wh=5IFJuEGyDo9jL~ zub?3C+V1-F#kYio2?xDy)BHvXmo@33*~5i=h?!q7?(Fe-EnnaBA;;Tk7PrpcAz(J` zUDySO>8hURDo{<+s070juER9SD zB+J!Lsv%6awN@(aJRB5IH};UL!zB6jYbLJrVcKq+nEmd=AEpjs_I%R?Z(lpT?YPh-c9zhUEWm!@N)W31LXn$?2UT;KJ!7O%R*%oH}tYXYpVA7_40Y|ZSCMG6H}U{Ob#*X z$@YxwCFK$c&>ADOX1dn_jrSKHJ^pHw%qzMbR5W%AD)&WmeOh{wXtd%L31O*KD1$_h z^nkZ-gtkEV=1nAzLJ>il0p^og@R?sjw5ms->Fr^4@F3Hv_KdW zq=ib5X6G27{PvPACMLEBBALQ#|23Dc&grsQ39qzNq6Ah3untR8m%hW()FT$F8qGNk(&xD%51~? zk2YU0Ye3zQmMPs%(L#Gz)DF>?Ep}*_8x#pabi`Ku9BUt?-rXO$ptF_s9Ve$tFiv*V zDPI$|C;xs{FoG9$^Cbic+*a=M{Bp&T-R$wv!m4r(@bHfT2u}N}oo(p`>ugN8OHh)7 zWkPZN)t-n~BR2+lzvO>RUDCnA{ie^t@gr;{mbhV(G_Hgmw}XuXWN z(cOjwmL$MpU{s)6R9S60U>z9WtZc{{YnjVeWjndgw|uuR5*#1(R{?%Fw3-yzsz!P< zpyR)%Jxa|+lMoJ+U6^asIROjKLEwzRh5izDT+zbmh`O`d(wY(XPNkQSiyI+jF03cn z+XR9g51BjmmoO_cM>5^JHvF_2*NUGjdXDmIyPNTJ56A{d|1`ZIEGbIn)(UJnr2ubk zzq+3fbx|Q}X%FS9o!`yMH2z}wynu>b&xE(?qa;A}h2yp_Y9#OOt4BF?trFXTNs7MC zc3`0MMFK>gqPecwmasKCvRJ~0x^WsVA0JrMD3>Cy>*juUq7;N_w(cjPsC&*o`A zID3~>5pb^)dlgrjVVSBvzC%mmjGV7MLR}=kfOvh(MD@bNKQ&f(;W8ebtgox2?KWpMS)4^-|6J4LtGAq zvCjJ{;1e}~9Ro4hzkG4WXRypKs!+M|Au9RJ*k#e@%~zKM=hT+WV$noBT$-U z8MUhf6CDm9MP8#69H%TC1a1Raa~=Ib-bpmpK(FxFAA?-11wh7V<~dS8hYeSK(rv;c z-XCYa0@3#7z=^AURY*T41B=M92NPBy4XjXcw2-P%ZMBV#*s~OeS2x5Sk3?_E31;Bl zIICB{BNA!VOeD3enJJfKi+R`bXYoF4&h!|1-=QrNjx6g_YX!XackIUS0#21_bb=>8 zW_1XaPQRGnSqPOqV697vSfkvAfmO7dna zYRU;%vZvzJk3N9`Sm~m#ikpRfMs0K~k%A1QN&WP5l9bn_*$nqE$xpd2ZjCmUJ;hl% zcDY54Ios!;vUiTellL%|d%e%SYr(M*m<)(7Lf$jkVv-ImK=5{U+$z^c;vp9C@0MdWw1f1)f-u6ye*)9 zBoGh&bQc)q$vc%_3Em^|Afjk*?Wgj?cU8kDC*>u znsj4UOCw8kSTHC+vSY0AS(LJbhlr}8Lm$rBtPjQx@)scjyfqGSdnmQ(lA!u*~Fiy-4m8JU2#r5EN22k4pRH9p#r0Rx~K6^I3 z8H#w2jHis39lGJEuaNkoX6YK`KuD=pD*g9<9d!m2rKljC5kwe@h@q2!4KgZw0fe2s^ZfSH z_OEEVJZ`z;cu;*4uo^<;iQB{FH_1;^R;#oI@l{#(-@Zhcq&fr`+9cXck8t) z^I#n{zuD=(_DHeYBUxXLjyQfShmkoB;JtpsVE@xK9AkN47=a}XALi1FxxtXfpAI>x z>;X`ABstLv;?MdFp9?F-DZIjdJGuY$#|DH~zJ2Fv$~r`oda#F^u!q94Oj7C_WTkpw zhjwmf>~N!_!-_0_zim7kJSKaDN+^T-RUe8SM$---;*T#jrf*&Pr^@jE6E)pOE-7p+ zzYJZkTSF2THb(#N*&w7lsxdCsO7X;pM0$bTZ>(RfZNRkUL z;Qzk^q5ija@&9H4e^Bs^XXJ;nT7Fu?>Q0@SGG1jv9>Uow3<1`Qe4Z~CN^F@)woHpU zf(+1F07-TvW`KFeAj))Wgcw4m3CsgN1V`xkS3ysxTdQR|qfO76BAwRDsf$5R3Puyy zLgF{JpEg~guh{FZW=NoDM*%DMnQEod!k~Q;PwR9}QIdO2LP)7Ce>9N8!T1m(^CblB?cn5 z8uCz@`zJKL^78sJ`Bo0-B8v?izM5>qgy{(fE%#lIhFy$6sFH%I)P@crmUr>V<)1Q& zG77hZW<6AU2|$eKaa2MHwxV0ZyxBGffD2qfL6iF%ixNr`LPy#4%KSKJc@A)psS>5= zy~Bh8Y8}66o~zg?OdnR<%1~-E9NiuDTduc>Li%{!@&hWGPejirZ80gHaji;!>z1!4 z5FWfv66)4NX+HChpE@J&^}**xSbZyU(X6_S65NmA30jS?y(3Ees4kFhDQ^zqiaJv1 zTL#znOoN?iK<%5@^Bht?TUaKYDtRUq^62oIdy33a(-2SxVc|zWRcmwYqb)=$Cw3W_XDL<`s7Sq*vV!qdIl|qHg>wN z&=k{*&%{u0;xBy(Ll#C2oil0F)(}@_eT-^JxOv`7wmp1pyP>nw$%oe#%L|k{t-HhJ zeQkTEb@GVv=x(eyP$XXPiId-8Uj4)Qs6%JYr8L5*A`IO=37=`7tPQK%q1T`XL%T46 zu`_gW{6L!tW;z#;AWSx`ltZHBerZBFk1QkIC}(onCey8~aVa31SsOssp02cX!+)Wrd?k!=%5<0NVlL_dI8%S_2)Ej0pp@@^(4M!N8~StHxVM1)}f`kv8!!| zU-UPg-osEw_h@$o1TPdbL2?SR-R>l*TW-08alepKkm_is1gKrI&NAf(y zuA}oX#8Y3ee4wOVaCVmJHjJ$|D!lvHQb%D#7mnZMz4Me5nCC)yWaD_borW*zkKSxg z3CV)nRMiv)>IIKI)P6N9+Tp&QGIh0{Me~X7_J+Ovq1m9UMKzb2rFZ^YTZtnkTOz*> z8f|?XzWgww9ml#;Y>WN~rFUg*$3dHTPWTTYL5#`;YTtg_iCYhCPF8A(_+R`@6{mL zmQp9wFw(s5oa{1;**+$@nwKJO)bv7Q@{hIxZ;+)<9y2ybWl^dg`5RAyQd|Vz0tT`9 z1e=nR7oC*hnILwH_%&bb4m*I-NFyB2j9)6@m?<&?+QpZyieo*PGJ7dQpNn2w5%lc~E|Is z!k;maqD&yj%RfyvzYC^KUV6@EHL}-;nGUfk2*bWi&{9z|+UERYz@Q#-+_XKlf2%UH zV@&LdgVkr#a2>W9|A}`%jb8&)(}$i2((e;z3Ar=W$^>kz*9yBx83olwmA@WxP<^4C zId;<06=(MEYW%=gmGWxDvyYFWqQ1Y_0}_IIXNzBuqj{(;e6T_cb?qUI_|2z84zZhh zSKn?jZQD$2L47dXsdM;u0&>1Ww}u}w(UJt_8K7>(TkYdb%Bc&vLHFD1x%m6_lW(gP z=sBu>C5&Y@65e-5TQY#~B^;8VG-WZXlQAuRtJpRpONZ#$H|3mG5LocW+n=1mc7_Rv z6ZNTGgEFmEp5>UbPNm>wPd*2Slyz(SB?Dv(ym_-uth}e<>bj?k116qW<{c*MzkoK zAY9Te7!DCj*MwjpOQkI5!z)GkwB^;796j~Zy65I;r=@fJa{kat$bsVhW=Y4nZ%<&WGA|!^_x{4Bbz~BdB^f)fIznEf!ZH zI`KRPV^c#`cVHtI@7qn>*h;~x%A>7?7_%TI)$}uSfDcKU+b5g)QdS0tDSjWD8(7vY z$&LDM)8rgnv%v0iNubZs6hNQj)Jb)%`4TP`>Jf(+bc#DQgoQaHaip9byk~RxVl%Fu zJp@68bLrCZvZ~BxU>HehmRUI<(^fydEXBA%J{D@KJCO>cw}J>Eg?hcxBA*8s-KqiX zc`OloVWivI;Vqh)dYEFB>EtZ7(*M2;;Z+va*kVsf@M@jSKHV^rpme}&FYtCi&pxG> z2t?<8?>o2D0l(T)H~R&GX_(n9r1`1--(>pDTKZRkZv}Y24;;*?BY%A%7OS4Y z^0t+rjYDQ#qJ?Oa)GSXNkY<3#q|ME2t#7TZGUgDpHME27>hVAokHC-=^Ss}@-ZPfX zpyw4#kju0=u`fZ@U&8m4z*fm}9)SgeuEz1*D6!8?wfWK~M;`!WGY-gA4mOL*qH^-k zuhs(NaW$21xl@pq(=wwXnZdY^3O-1Q>tcl`f->>qpJxR>Z>k5gs`pi z#NXo&q5erA)L+7jq#J1mP3}Pg743jQe!`s)puv;&pwZt|$EutVhwRCkmX$TXm@w56 zav_u?V!Z~~H(}XI%RD5D6vP-4d;iHW!c;EhGt zUfMtjWf>(?lEN&uoeKx8=*}WTL;)#5UzO{+=FH#cS%{RflP$;ONr9v&EMOKuo~NWr~0y| zsi_9;D_+kSLw&W25}yZX393NI$GgA><4IBS3XwOzb|aNk?Xi-wWe^fo83gD6-L1Za z0smg?^9(@IJQ?z0xUg@h?+hDW+U)v92aH<|Uj&MDJKC(}+N`<${hY5q;3Yn?7`IovMrXdwJVmVIA^60!zAIMNc zg>1#X8#z z>e~uTW6uN20rS!eNwMEtXPSkpB|&Xy!^q5FzJ@5HWDEHmHhrFP9);Dpz;u4JN>4Xl zS$zs|7T;p}(`+Arxl??l?4j@TT!C}eiLoE2E8t6KPF)Xu?yk68esXr4D0WZ~|CGLa z_zyXMM-REc^O`=K%gmI(xD%!&n7(4ltfZFidaAE zDgPq-ijkRs=%8`N*6$+nae3M82<%JwQ($eeeRd}!OM=tPU&&bO|}q}_vwt=%d%+Stj_d2^yg_HUeS%J2xZ{) z_?`OS%iKcPQzCO)$HKtQgqU`yAA+AoCEB-Jz8&kXj9+PE6}`-h9P|Q8#t0#X&l9k~ zWGX{_b`7OPR7%9OTRumq3=Z%h$R9ickqP+^RJA|f%3ur!s-OMW*PfOB!wVb;?H)GZ2x+)voK!dCcLN_h3>~#U@%~%tM z$t>wrt5;}L9$Q%oQ=2G8IFKvz!a3d~#Qwhi)nT>W@aleZ`^L<-uNO&s(Sz3>%P7M` ze=cr7cL3hoYF-I!8;Ik3PueUJA$Fe-fLiNC=+SVOhQOd+W;bw=l@17cK(TEw($#Zr zQCSmt~Q~LKf2iR)9p_5NaH55(flpvCd#5zMhE zG3i<;kaiRw?%W?%8{$NS*29X#Q)kUAieq*)*0DIdakm5tX#DhA z#5|1&V-3{~jQ)-StSSHTJPVECaKF%ARqul93iRp45~=M;nlZ89D;a^_GuL4ZlNqB% z$IrL>t@Hz&$~2MWDmDAAmNnjvpJVO}LfyDJu0UNEnKbZlkE%uQK|RkE^(W?j$8#2Q zV!}MH*ZjK3J?0Y3#wUpj6EAKV{yhvO?}%3-|?;mOF{gh0I}SnVDr} z6dA~dv~9#@u*iU=#4=5o`Ageo$yp-Um-?i+nTCz-krNWv;|oT+RZAM=2io}A83;|t ztsp$47O}a+iQ37M_0F_Kl-qjL+<$%Vf@R6{Nsv$BwD~c>EN=<=K8Y*-SKXzMD$|!u zD`R6Qwa_o3>#Q2sXT4Y5jI!N};K_!89xQvrr#n_lUl4Mqz7ee>cf3M^Lh-}MaW~D| zfYNhP`;x0s+k@(nW<&&>7It0i8UyEvD(Zi@d>S$7>5WJ$_OuXAlzX$rjP&l4T)~U8 zPLT68E3={Y=Z#*$_h1@r=`UD{UDK#tT|PNQkWEVD@22r+itC#a?zaXM?7~{*n6`%V zl?#KWtP%kKE8@q-ee6h1l67-P=G)W2jw~Lb1~9WfK8FGMH0@g6DjlRDcM4 zQMX5_M6A7x%|Q)e)nnb+_GtIF zFzXGm^eGE3Hx!^B$;7ZK^ZdT14PueWPl=CpH>@I@Ohuw~=vuy>C)N!8W(Suqrffl9 z-eH95#eoG8W#>_vmvlD*YFkqzgprEBak@cm?PyAYdHb9>KhZwas76ZJuRh$=+SK))>XmH)UaRV6afvWF?vtigDPd_N(FzW5~>%cRS0`$BBY{50@trr1STsuYa0 zXr7|oaFe~2NL@;voI9Q(JAmNS3@A0Yy1KuZce&OpF(cPNq1ueJh2|?RbpB~GBq`gy zCTxzLq1={cQJQpkJ0tB9|4n#axaYUvCKV1MP{}j(=4)SgxnY##Z=7b;(%PRXIk}cG zKeo)`2Sa0Bi)0Vmp{?WAKpfSIs1jbY*Zm%$bfO|}+FVL!D<+^wm`b!M>`H8fnRT4~ zrhK{ZJ7?5IDdj+U86nHVL0A2?GjU(r_N!%JT5Mt8@zex zZmF&2%lxN1IUY&_MK%jRMgUYzs-J#KY`bR8P5IMW+!*R3f7@+plk=v2r*jS#*f6lH z#G}|ue`0BL%bcxca;|q2aT(VcjNhHJnocN)prfE8WpDbT~xKhk_!`E z<9X5nxQiC*CeH_ZKXcI?7>REr3b|goVDoyjw2?9wWSIn#K8S5uceWAM9pQ+8yoCnR zH#dSW;Mi{A!Z-4#nU#s(r@KqaB!SHbb>{HLY|~;F#cEh*bZjVnR(qn_5VgODEf|qP z+9%DW7}>D#L>J?D=gr_~woBEWPp21g`CK7(nGB;DY)cZ1#0LH0vz%|mD$9o8D++z) zufBKY)tG|tMcd)<-GgT5O0>Wrzn=>>6+Z=B)*1DTV{yprGXIuk!FiV-WU}ruf8ZmQ z$8C4aS{y{${lNRQm%gJOS#ZuiwBXDI-*f9g@Es^7oCJBMpR|B0Q*7Or020gmz^jk_ z8n@FV`(U_KR&7 zFnp{z|4G+o*U^XnP|ovzGhoUSi6IOAuwUldY`4ebF}vCy{lmG&uc}!5MVE*Cz{H2! zZZ(8%uhU_*|5S@5Bdh=(JdnqE8`oRMGHwRkGwx0fnRg9kjbn6P*lta%g{-$=wj$ps zcdV_>&D!p0ODww3eRS7io6^c@W(7cO!RgQwcyc%Prhc&gIy**fkMvH{-C^pkk2j&_ z;GVN2C-o%3GALZx13vZp+KdhJa<5De@I9oxf&VEw;?pSM@T5 z)!H7nC-)5s=v$~_0t=)%MN?cGxEd^-km4(1w`9d>f;fkY?Q+E-?-4g{?CbO+wY0AmuO`9BI1a{Qiq}pQTkBl({MCIv z-MeFGNuz$`882Vs?h|?6C}i`L-9u)lau-}v)IJLA@6tS*YcTJjjI)IdL86cY>fJ9! zz3hQdQ!_@t@yfnWoXZmK-%Q0kq(EuBtFMG0!=Cw}m3_Ek&W2Tg^q$=1JA~EfutQL2 zq+80AZ$5`q;4AVjpy;Wa>G{_ z^J{MD!vLFzKg0gT{8bbZ1g9x=OzR?t8lSej>bv?d2#6XCo&GX~bDfI1Td;XGC)ldr zh1gCLa+nPCU%3Tz&C4O5tc%Mff9k>bBiy9GT787GPMu%S@6`4CNv8XZl{BMHr#{Nc zaL5l*@kfI+%Mzn~;RccAw&pkZaT}_+FsUqj7CiGF_3j%YNSSn7VX|Y;vioRW-Q^9| z<{~ToWwEI~hAt|E)ahiO6snmJ(-aUnzfP)E@|0t9#DYAE&$5;(%Ruo3R@`VOMdRaX z>Lwq+xLx;oUC}K_5tI^1!OU%LXii*QkEa@q!iw=DJfm@MQF>xyW#Y^HO1ZQsoY^Zb|c?07W5Q_MKUCbYA(3tVG@(;4F~+!RQu zh>Q&!g(@`*9>FfJ2S5)PbNfE2F5L>mW9v(_A6S$p-rcKca(@umvY4j-tzhX2Jo7bB zQB9|L@#N&c&OZ2St8ABfOLHEo!-dG{;@R7ytaQ!B9Z>N*=Eh ze}wRvasT3za~enOu8{+lBGwKh^>5w)fn4AhJbHXm7oL)I6!0v}2>MHcBQCgUQjyDO z{{HF8#pA8%cjA%d>N?E!yZVn;8DEB)mXXDF#2@1+{aehVJ;k7B#5_#|P){V3`jnv- z;%O`&`PJ^7t=Em!_SN+#Z}*(%G2Dm~eO=I+cjdjLXJft>J+Mnd;tCY@y&p*pv{`Ew zHcF8tY1apmtRlYx@jPB)AiMqBF+1Nb)SivW&*OJeDMQbQ$MRJwQLlxJKcc0~aeD1C zdF7J385liBP;_Lll4dDUAQb_=#LDHzI=0&vWXM!k@J#us=A!xZ2i6skv1|Q(KaLOt z%Dl(mda78mfsID4IABL*)cX`~2d!5cZ!YX+H@N_Fhsb$|(+S!Zf|I_wf9G5dzcGkk z+?5&MOT2eC00Y8c-N%%-%=;w^Bp6X}ircAH>L#R|m69(|<+{eQQqbym%$Iw2$Hwxl5wn3fzdKx^xWRZ5O zQGFs6Zn%`;F=&H4Sbysb!?SNXJi?|HIDop>*IU7~0RpsfqP33eA44N0F*D;1K5349 zkk)2lOat1lL>OEb&Sq$J2gX3{-o@GctCgjDuaeI9;;TMYENBbm&ng5Djn)#j1~*Qh z2sQFO%FA_7`pf>0_?9;1Bfm?v#1U~l4P_cya}0dpkxKFK+B|4e<|2vLnLa?D7z}#R z%6QH?6Ky8)mZW4X!3e~1`)_FQ&Ol(=Ls;FuaM7>1-WwGgPue02B`lx1grenP=LgIv zDQQt0S1hB;Q7Ru74tBERxA~}=B3SUVikf+=FsG<6$wq@r<#yT321dR{QP_kjsu9Av z$j}?y8cAVW%TIXQqtTot$yocPmIA_&ouR#o=}=@zZb)wCZIKwi&p1`6O0Z+cVG&pHk`! zBu9WpoJ}& z-};Q(C}+GBAD0Mh#~Pl@4I&RYr^y|0W+50a7}VypNKca_z1>s)A_7Dc91tzvOxe+A z`WJ1eUP{ITzs1an2MkUGE65>u^|%juQbKH2ei?*}?LX#Sf`;JXYO6H>;w~#cT)UPa zDuWrt8bD{S;wv~9EK9pLtDgl){Wkmq!G#+Kj0kQ#;^YxHMFzoWV29OMqk1hyHZ?W{ zpsnYKN(i#Z0CD}}rfFOxKL7$etOGy*Axko3i6MJ-#RG(1MZpM>LTCaZ=D_j1p!yh8 z>_lL!U0?~ix*a!6cVSPJ!BnkBSXy3LOp(**K0u8mQPhP&6n^4ux?14RCpE*iW3G_Y zcjms~pOzj%gqrEhRVFg&lS>t_qJA9oepU?1OvK@mb@jpgUF48ur=04cxApaa1_1lE z)>;yAx+rR^wS-LrDAK5u1nD07hotx%R7%Gox1=uOHm`Wn_m52bE!zATaLqjAu`xjo z(3Os|hsAMy9S(TnBk={pK#8n~qM}P9zSZ*^RrEQO!-OTA{E3Rx;-ELw);uhm7=|pV z#k?(Tv_X^8nmVG!Ugj&lMdo&j|J8G*eUk*$+QM0yQpKx_03&g#sL|=Q#vCgMNaH>O zT@_FsN81_s zg~a6IZzEq(7yWku|K8Rvx$sucp_b1hmbXoda(_ND;3M`xhR<|2h07o_jGgw4Z!sl) zk)+*EH^P(-4CZj=kU)#`KtLqO+ zO0HF-GdgFuZzv*Tfs|B<+QG#&=L)m58gvXb*0pb48>d!oPS3mIVDga{FCGN=0sQwz z%Whj~2g&z7swn%-?KrP?1}n%k_%ZfYY(l>a6rNc+`R*d; zb3TG=9u3FFDP-}64@(@Z8{0FH$qG%)PHXZIDek6ucjA2@_)+|;CNHDrM)W!U~w*evLBiBU+xjFEU0MR&L&REdF*mR>had$B;fAdX9GvZQNWzv9$@L@(&qye9Ox2rq=c`lhp`?4B6)3SFoL)bk{4Bd(YC9c6y+d#Q4tHZWJsR z#_z?ud=KOW;|n6!x30Eqy8#Z=w-#FePwms#>9mTm?%Q@MV-Ce0N7j7aVljO(H_U^{ z^tm45!hp?c{R4hxu_3+woT0O-E^LJ7h*FSO^Q`Hh(S-P*z?e8QG;GtUA;Ir31>v_^ zJ`@zlF1!5#A8R3YJkd~zGDEg6KzjOKp(C-+&;KIR`y|(4fRco zBtf!`%sP$Vd^+k-WKTUKIAF!CPJNy+A9ql?we*LYH5L1ZnSf&dt*Ojs>d#Zcf9>OE ztWF{KdXhPbb1wmQAR|kHmX5Ozkamm;WB!WadB%@^zG|1XbUVP8Tc1422d!vv54}f- zI|bpkKFGMkj{jalwBV-s5P^}@sG=ex{YBclnwM^_+rbPmOLZd~BB_mkpe;A$H-SpS z*5XhG7+Cp4M7czeh*MvW-J-DWBkX%AS9|vLfdB6sE1`%d`?(Sonc;G znnTUI6NOeqGtKNk2wXEl`8t#yFjT8f&9A?ya!(V!Ak;v}?toU=!jt(q3L-WjU|tS1 zo)gyG%PfzpFPD`Uaaa-Gd1f4H9xCTUVh~>eu=YeA&+MSr4oJVqO_3Iu^3(DDmP)Bh zjbFF+^PtTI$&Czv*+s~4pRqg$@8BQy0SxDvL098pDI@KHv$yqxF z2Yx&gfkq=onKsvs*~S$Wv3JgbxFMhZaM(*hA^)8TUwNWs7?1!6I)_U-5+OV6qk)B( zxbcj@rj2Th3XWQY@KG*&-BX~}Y<9Xk_xw8JPYv;y_>Ncg{L^!p%TWjQ)*ZXj=!(#+ z)n{|&4A)|q6Ya6B>&jIiA_BEp;M?Ee4AcUpJF_GfvqO2trm5yf(W6+Y_NYDP7ech8 z1kn6^6E>DSq{XsUt&Fwxml$|EU5xoV67P3&jq%a6bRr5+&Oz!w1?P0(MY&;r#&jL3 zYj|iaq=QlfpnBCK396{Qwnq`H@;y!swKyGU6!EejTKJZkjRu4O9af5GMztUS2Y)IiME++DU7X70uRGtPTzHB&o$xqt<%(=<@blrJ zqp!;0YJ1iy1hx-`c+OAOEhGs3T2N|Z)4|qKX+B*S-WOM1O5p-x1L^l1`wq8Tkb{lV zLay4-+RLV5EHvuE;Y$#79t8qZDTA!`STZ7nSj%548PCFp#2pwTf86`6fX4OMf<22n zDQdO=AZPehE1l}Z6Ht_|_cU>0eY4+@;tTrraUSsONe z*}=KWd~%DdJNZQL2$SvBFku9ccS;ggC~IZm#oc|wbQzNTZCIh58KD#zU?yq-5(OhH zG%jBd%x)qqCC(4hx9~!b4H?wL_$(!(f~aXZvT{+<-C4S9kJe)y2zMU4t`|Q=_cghl zLaAFoO_@ilVB48=vR z{Ox>BuxV>nRw>kJ@jjy&F8U`7L8K)8IMrv^8-3TpQi-;%9oX*MR9>GL3Ai?(@>k$nMT+9C0e@wB}1*M{7 z6t1?**0so{_H?c-ZYbkmZCg&9fprf=Dz15X{d+}zhcEjC%&r;x0i0Z>nKEqZ218$z z$F-w^+Crpj9KINj@BsReMayNfAA-EKS1jX&SVQEx(@ghb%NHz*{wT0TM$H4goUL`_ zS{^^g*6XLUZ397gGI2jWm<|91Ev&b2fhG#Bu)mK(LYGmP1PRoHRoQ_%#twRIzSv&C zcNdu*0B##iN5+)VfYr8k20F#n8pvc8DQ;Ex$5ilxpqG%&oRdIo4Y6cPdK25)tSk36 z#?G`}+j7hhfjZir<>W~4hMJGYO_YJa{>p(QE@QF4`4V)QWcn;h%4WeJxZ1!YaT`7S zj+WQF`EY^Q9PfkwHhTh_QpL_hkx0PZZt_l)$}-R>+8#OAN%d%*NI9wZs1qTJ>jn(p;RWv3b&b?vBU}frKG(NIZ4A?PPRSpQ! z!WpGMWkeq(yTKp>0iBnFvPO-rdac#mHBpEu9I_t_$DJC%<)?eRL+5MN#{@a9%1^7C z>bx#Fj$wE!8dv12RtvMOg&V?6w|BMOne>!$fV0-)05RgtoBOiIT3?;LI)7LqS})Wi zAu%e!b2lt<;b#N?h5lp47gMS9uxxyGJl~dmT^4oe;QIkmx3`PWb7K*?xA`tkJhAy` ziU@kaG&ERbxDQ04AKNY{$KD9rG9f3c%XLJeA%{3A!OOq(jr$aT;8tdLR4F@aUr&nb zMxL-P^TQU(RqDno5beII14*R85MLh^zu=dn;Jw(c^1kPwCPC|^XqBMr$ z+6PhY%#X=@9A^3zC&Q= z9ZDkr`JrZ6=a+4T{FH7d_Ka{i!8nXwDsbw)kFW7h<@<4J{P)W)=pa{gHGV4J4NAA~ z=Yrd~w|xH}^ma@{uhYQ-gDA)+fqxQ7!$jCP^?hp%BFJ(4BdP#-XroMA<>P7AN$pRZ z|9Ha3+g=t2^YS*rV}FwSr)poF$o2RuE4j@Z@mF{0|5M2^=iOufbe{jtpw<6tm-dk_ zTmqWvZ+O@U^`PVHf~9X+OFTPH0JVNtmuBAjpHS<%E9Z4Bend(Ew|eZ)IPAnx!XjUE zE?a8StDw%TOk;o*OQ4m+M-3~VJSKn|=fZ^g{!(}9z`y=|B(El6x8DDG#J}V@he0I% zFRjj9JiJ_giiYmm^$qA>{r!jkhYIr=b?Ed8clEQ*-Skg?61aR5A-&EC@mKC@fSg7Tl;pYIgKf<2REp_^qS7R7d((nMe|R{_(V_Q8IOiP>RBGXxc# z{(Ns^GMW8u6K112p0)r-O~S`mY|+oBHw7s_6mp+_;>7H1P1`Q80FS;Q9qyiR;LJ% zBknqHb8tR9mYft*h+w)__Vyi~;iZ-93+-*sm=SpdTMD0<9l)=a!~@;_KO5)x^ULeS zyu$$qmooJyyv6~Ea(iPw4%{42cI6|q3Og1qg}(M&Jhs1a_+|3`z6cMT=WTBZ`vL*a zwuxz`u)sFaClGSVcJaX{K}P(Uihvh?f3|+0seHGrY?;?|!ae6yh}42=`EXjyhHK_6 zDZ3uTgTk9g7lMbV=gh5)TEvOy68XHJ0dC>glP?@wl@58@dA^G6o??H0$fiPzfZdPN zoRuVxd1_{NEep@7Il?9ms_qDT3)QRS>2ZFIh=M%kAtPx0c51Iad|`Y{t`jgiITqKq z*(o@${@0CfS5^=}@8XNL#q^d{>;L@BbmXGK0h;PqdNj&X0orIO5wpp}>9?C2J{7qG zuf8Zuln1DV7{&$Dzl_zmb;{5@nfDjoeWyaw6m;q|)?c1E@Xh?7=_#_)Sdh!UpEugG z(w0Y#dOFZ#LO;y$WkH6PW#i%or8cYfqgWanhvs8YeN#J}wA}n-1~dq9yh8e(AWpbg zv(e*8IY!sDehWNXnjZ0hc|IPp{-rlQtXzUFgo4{H6CV7o%RG0cEv7ipX(nW=g~TkA zi2c*n$?|mH!fSeOQj@r+BObpmaw!SL8DNAa9*n(ItPkVMQd$T;0&kj2VBOGbmwCJ2 zIgJ@8z_Jax{3O8Q&pC#3y_Va>Q!Qv_t)M)`IPBx z$ta8%OQO#ctms1X&Pks?Tw2*W2>Hvb1?@eC1#r@{N4#>U7RIj`3LW@-X7;yzPvrK7 zeXRY-77)#DN}tW%J0BY?-q5X#7_<#(%({cP^y~fxpNzcH3jfepg^>%0Qyb!*Ld#Al z`74;vd+p^i@fV5ZPQMKC&SytbU;+45*RUi`FQu4ixnsW$)DvPljUxlD<_S?_E80UG zx0s2QONOmWDFtO;0At}4HD*J3;}KTqulr^`bsbwVbGAxfbV9j4^^oQ9iPr^7Yvuhp zaE#ut{0ht05(;2*#!n!am6I~(w)@|NAgsf$jN>s3sJTNtVI@1eA z?2fpm{e|bR&ohu~~8=Z{qyadJdO(ihi>11S`xzgUE69wy0lBLN_`AEbNH%jMH1YFSG1Q+0= zs5>2H|KhB#u>|~7x=^mc`YC&3VTi)x0JJ6Ov+iwO3%t9)9r5Z{0yzVF&b8aSZe@rM znMw_f$Q_<AuF3k3S8G zByQ;2&MJP9?K!l&&eO`l7kTMj4FAnNJsxC@Ks2_=e(Zxt1fo!ZJYJuwvBcQekptn+YV^51i*`IO(0 z{Ie$CetR=f7jgF}!|SGbF*@}O-(%hfUbgXow;)}k{3wYP>k3!UH6BLx>QC+R6?Ysf zM`$`iSQPZ34gF&%Rz&F0c%824ev78{-XWoDhMrq?1uhidlb3uVBbrsG-stsqyZE%5 z<{A_q6?tIs{_QPS>%XTfCtQ+6#5LZ5V9B2y$oJ8t->=i&tYsP&|Hc)k8n=I+)Av}R zyg4P7wu`0c*>4lL7uma-h%{!_Quvv}Yw?_ILTLUqj2HxMEBx|B-%xvr^8jRLPplwj zOiT3GMm}51?s2 zt(+mAz0v(h+wAa-6mIE6!MnDd6y|9}P-v_LI;Y834mk`LtvF++s$T8lA3i>_nIz28W$=vi52(zLKGjkT_fHpvNk7 zT-GwbG;7h&!}mt5wRA`n)h%a?AZzJTov89el>OfuvF~78z&mm9t%kL|G~Z8NzjB?u zCEAOqeI!o(ttsuyNQU1{(G@tUeZR}OTB+}`VBy@9E$-Mt{<4wYBy-?a$?=sNe_VG{ z{Lkz5I>B#_mB(v}-nwm2-VfLs4M*$_0n&oYI;ng6?*)MP&D-wSQX{Y31&e6wdl$;z z0Mr6w7qP~O*qnCB{H?0v7R!V6>ezYc53}P^qj)GPChQ$3*E~Dg#(AY`L`sI zdxP~w{+h%x*IXs_Jv(`Z+-*suEX|A9zo5!+HS75Kx0(x+cAG16nIjAOHm+o zw>adl1*EZB@2|7|>!|4Ji)`$|q0Ju~*`Cei{*9{L@hN|Gd;2j-&+GqT7rb>!fa|Ye z`Co04|GuIi{b9qofBnzCo$J5x&38evFAl?MuOh4tY|Er|IN}D~)c*cZ){^iMq^y_uLMlboEgBD4SVF7zzEkIy;$dfrVWIHI|3<_oGuQKxi=jK4#%m?7 zuTt1BgzcyXjZ-Ju;ZFj;EXT~^Xqk54jBhITtbYU-fv_(EZd4W7=N0=FagE2S@ng1u zFJ+0Gy3)h-CD~5w~BXS&U_$ni}}1~Ogo)E8HIn<3>XTZ)1uf`pW^;j2W~_s`$mA_ zF&9VlbM#jb*v~XKQw|YP>}#!K_+WPf?r1!7E|v2>NiEVn&{F{LS|k&03u_ zax>xQ%Wl7_@g){>Pwb|BSPgrsyuP%*@adljh3h(BioSRXCzo|t7Cv2%wC$U4+VWA7 zdg^3kxcwz(wl+^NFbtgkO29CAiacMloF(dngG%R1S!%X^tNwiiX?N=07Y=_YwyLn>%hVjb-qYSTV z;HekRy*_E>b#?Z6h{pThX|d9)US$cL_ZRG<$j^$PiYXZ^uS%C-NL;@w^j`ple`=wj zQcq8UA1w%Rt#QvhZMx6bEhi;_)@CBi;ZMyiW~%ezk3e=EAXrg$;SG~Up?X_$%y%W? zYW{OjLxh*lL?fbE-4{O{wJT3Q%3?vEkG#AIEPU~P4>?qORvxNfIoij3wTeY9pb zz(y-|{H?gVP(>FP%g8xncfMGG``Qc6^B6wTW*xW9q9z;J5xsz1fm>f)x9$KbRA;jY z!drWO!c}(Vz9aLTOPFaZxv;@s2u;@$ZP9=7xEBaqwnhN%fZB-uq%E&Ng}8fiWSs;S!@f9VOa@r7DI3Ezg0 zJKd2f{b^#GWF48nbxcpVsu`0)v8dP6AccKn7U>U;HuNmN-9!^{x;-tvUla}c7SO7ch|OXAx#M-G%TZ(~WIeHo zmaE~9N~mCyQn~Wpo;%mLz{_-^5r8NA^Fo>}61MpP3;|M=OKL10D3%8|bqptc6ABb zo(1t|ialCdl(X#0@)H0^;G$S>Wg1>k$rp=4uDb(QY>M8MTrnv?ONj%|K(*L8cE4nL zK}f&mX@0D7Qh7*o!`S?!4DCkG9zwLd|Z7bH!vT3>yRZPaqUHmi~r6ToFK zCEMJv->dsO12>$YLgj-%6DK;ugT>@H=`^Jq$E5&r0{XOAfaaP3`}fzADdx|ibTY&0 zuFaapdxH0o1XR}NJqKsbC(h9rDw5xYi|lH3wRiQV4P5Kgi-4E%0R{9j^Lne8ouBn< z>Ir5yov4sqGxcL`f_(V#>YF3S@zXzo6@iRyTQ>=Na?DLY{|G_+CpX~{ckS2N7DB(L zPLbpuV1n>p8J5QRGwHKStBKZDw+Qo<7m+i9glNxqu%9~$^>hZu-~?;g4S7bXdHFM9 zoSdAW;K$$u(X>`o^#L)H>0bH{a6jWkc$sZ9bYuFX@AO34$y=Wk^5Uu#o*UHrKQ=iK zyI=c=ubY+sR->wrR(d)q8w)*BXSs6)eWguym{a{#YTfsEwZBgR=xY5oA!x*YLfZ{G zB6;TT9&DD?+UmCs$0O$KrV-y-p%b&koX;PvFc!UP3JLPnEOk!AkJZay>xKIb2?XvzP_NhGV+% zDfY=4iX`ooM)>t=*O9a8VgwwyV`Unii9S9P(?f2j#vOE|MoPoAube0#O0 zG$iXb3m;y^Pe4`XMbc|udk^0+El0stP_sI|S6+{h`~B}`Y9$8v^G^=6zEOBEUIi3z z(o3dnwm6T=d5kmXFEv3+QKM6nC^hQ@EHk+~NG?c=Vj1T#HK9#4a+)~nv^v||z*hC_ z+Kb=fgWRG&k}SpiM_+FYiM>9Ilr&Q2v)_YW6q4=QP&^fP<{Ou`oGr&}1I>|czNZe% zsm}mF6SDt86W;Dlffr2}3Wjv(Eqk^w@#7DqF#mCZKqL1-uXB^;w(hz+>=x%I_89-1 zj7t~{fTAdiYfLC_)GAsDWoGcU;F9XPBs^;{v+o2k6iTqB0 znXnl4%mjw3=^6(!nu3<+S?4H0KQl0;~AZbXq~MGPkx>Z z+vPI;!_=o#nNMz@nK3^RNzuoi8H?@Svo``11MB*KkqJOtaxrY2A9r$lnRShqX)YTRW_Lw+`f{a3NBo37RjI2~ zXuK~uF8NM@%b$GxHf+Sn@L>Aa$IOI`CSk{ByMHz@0%x9{_jyO-aI@KYqjMs>Ns{54 zq}(4I(DpRXBkN~zv3PgsnU0y~1p$2ao!?HE!a;Vjfc3OzkPr(tQ}kKj6$%fY=D=i; zM?yG&faH9|a(?FFP6~YFc9?jV_M<{|F+dNczfr?s_h{uO%KWzw%YpT5=!tksZ&!%b zDd2K|f#r;PA~Hp<*5zRcc_GyknA;Y>90vbUnR$Y}AmuyQM$EzbY5FfHP9faY zri!C=juKQ{4Q@H{JZ|fB0K!o0#T0x%?3DtBBY2(u-)maQ^6pes5ao;=Gvn?=p!^N@ zdaEu`F#b`F{{>A@O5$QySu-w){Z0c-H2YX3KRPisr5sdq{SE^XGe{9cZ;Y z-kT!@;{4?Jd)+u(9q#el6z`dG23z8+AU#&~V?kvm$p4Sp*?Tv_>G7@wtsr@hdJ}t# zAfOHZV>@~3air;EG&+!ZuM;>00n~&ii>lX(r-Kkn;e*L>G;LlSut_8W56!=%yVP^K zGJo1<*Usw2h3L}ICy>>}^}@=Fz~!JrgT)AA@?V8C0_;|QTdTQpbw76+xEfd%egCUt z^l7Ji%O3;Y?Q0@bB=m~PVti*6lQ~`+uV(3FY}^nEkO!U^C)0YtlW3A!2)%gF443to z9PK!ebQ&~le0j3r50Xf?my zRm3B%q)$YpTk@ERAz(!jAZg|@yTEhZX?Y)q}Jp+ox#+Y(zh_Z7*%r^lY# zHL3eDTDBbyw6SYP&tLtc>93*EWw|&QqGJbaxn~-$Dy#+4xxoC`kAyDYQoY&YxL%f^ zNDf}ccex3Ti=G{zy|@j1$$Dt^t(|%g2NM%ZWfJ8)9}r7-Irrva5zD$N-ZDZoEo2-Q z0$07neqLsR&J6T@PV)i4OzPc*jzj>t{-n-*hv$a)VggSHX z)%jH~3&+qWxU*F+EuidsU~8+33W|(#Zo?T3mJj)6O7@XI`}J*}iuEf&Pjid|$7-FX zW-VTii^&-f)rA#VQO;fqr+{|&`m+ubU_UDquw-)UMlk_O*9ejtsS#h9DmPOeAyvT8cUsV&lob5Eu4$}Ob}QQ~MYRCG%>x)V_RuRk?7t{L2KB zfEyYAO#e?}k?_MVBA+DU7?*JM6?Tgrr?$hAzC=M>gk&7ftKzJ${tzZNkFQUDaF6(2 zOIO}|CS^}sku8+(imX&>o!Mp0YQ(J5`yrj?_?$uq1F6}BiN+jYq`OVZ4>ZiaBL=W* zL?s)9oG|LOZhckVn6rbX9B5c`Hj7-SHvz35fPPFDBb3vH+Vo>mW!HwQKD+=fLmj5e zGP5h}ZHeTAJMRzV-sq5uL~H!!otMmKC=Fk=zF6WJAKdVUsD+ArF^XyUH;IC=8d&Ju z*FZhaD3&3Xt8$l{&Y#F(c~DG5Cf!`uK}=pYFI=1t53kX{-vK45$NU=VY#8CTBa zRK~vq45M(a7yGFju)BPz;FqJZyzgZ}LYy`k__yn)?0BME*{FDz8!$Y-emAS4bJFsjoZU%(^U=k~fD1z-Cw&L&-#fY!j(-V3 z@fQuoo3B=OEq3MkUl=-0JvxgVr_~ab-!Pks$teHLv>hMyj^A7$J=qcb@4b~^1|<9b z_ZR$sYyNCDqu*^JiG#fI!1yHg7j20a27Sx&v1{aSs*1|8l^_2n1 z_4)uYli40hUtrnq_4>WhW&aviHDFbvX`%Irc4Kc2aca8~YMw#8ecy{uoKOKhynpv8 z&t3=O@K1}VTfFlATk4FD$?1N@MgWCf{&jpdKac+KaZ+S%4rryrlQb{uDmzCx-g-1~ zZK8LU-(n{OX-Q)(2s2)xeTA00^BMWvYALIe(1KOYji^n1Q+bc@c3?fJOE1bt)Ve5B z8fm!@WPc>>;z6gA@)Q#*28moy9^WAfY6~FTRE8opB>*D&SK5(XoMbB|0s&)WLBth==Q0CFWp4!n^rXkua5?s`uIfSf@62l>G3bBnFOklKy&ntsM?yt=Id3RR zZu6P>Ok-1Y2dH;N)1B<-YVd91H-$K((L{>F{Y^E-nI_4U?6vPkSzm|%~?W-`qJjS&nZjCWtiE%#t1l#ivetad?3k};~QV=eBEwyO*bQ& zJvMhH2(;sdG*?Zx6t{cDXz zv62l)-7_;nEsL@`_(bJ6PH~*9vW1>Dxz3R|QM!M>S@q9kDkm|rsvo}8s(zLCX@3m! zn~mC7z847<^>(7CRF|^OD&rWU_)Ua3V`nv%=6>;vu)F05l7F6Ma0*Anu@&^lpVVsA zM2g?4WTstqvqnQ-l6#~??$*DqX|QkqveVsy0usxt5QE}a;v78G`LHrXt%P|snTMu% zJKXZ!xE{7kBEb2FD#Di*g{-*``DfNGXjQDO5IYjaVVy>(O&r)7;ed2lBx8-ioeY}P zCwt!(-VlAJJ(I|s3rX&J%VHba3q(I7-k~6On_!qHA*7OXbD~6vfya2t8~qxxw3+YmF}=rZhs4 zN$|!QLXAK|`=CVWONesYH}6Zn3goW~vHsr#Da#%W)@eH@s~9^_KyE)bMeS5Te_q(* z1;vqj6aV<0&0_$$2uGcMon2SsQgkvlbfI8!y!7E`kUy!uq;e$2(0Wqqfqw&Z1s_Fy zw>+tv5p@5SmBU?Cea^mj0sS{Oz3@#R{}F0SehBbnZ+ucQJ+d%Gcud|CB!&eeNQ#iR zn0_{oO~Kr0Tq?>)dt@~us>+DAf+KleSW;U^Bb6%`tBa|WbQ$lg2+y11)KQD=P0Jvp zF-5GXh!aCB7D-ubJ9i8KL`nD?^6mjyfc@JoLva5Q^*zJ&>XFPRzl^tXLUvz?pFxVH zT6%owJNB@aymclI^FX#L3st@{%#-US5Y(329};f6JE*=d)t_*YceGuLAekedCj_>cUw6A+eLqIZ@0V^t8S_a4@b@NhWL@N@nt5frVwbigrMm)t<-CpeBMxU{s(j|%r};d zzG?iNp-7LKwI#+;;!{*{M)6huUH>8Vc55(tBPYZ^ZPRD9<<4QmH zQ%CN*t&bCP@*1V|h>I;4A~Ia0DgHq&7nAv&Rj>Ct;Z%DQbXlR-23zWb4z89rJ{86- zvD9V%D;2$&>8naV=MVdGx+^pBQz_PYS;C%Piz z>~0(NzADVgA}nSnarY=x_X#xGIgi1-9O3%~re)q{&Ys35xa;XuBK)O3T+G_&4iWUw09c!Nvmyr~R(ZJM0wY z1G62G;lay2o$<}7pUr^*1)yxw6>~6KWYj;iVq({HUKU4*{k~gTmbFoJh7*5&W~1;) ziP1#Q&dE_fpBgU)-?Nu>;a`Fsc@ zn&;V-W&wk_uq(U1Qkfr5-P5|&RrFYvRIml+*Y&Rh0~RnZ((}V`ncrJafOn9D!bR3?d!emc{TUt+pIHc9iZ+kOor#f$G^8`9c{k;sNbO0m2zx}d$P71642nhz1D1yYda)DQ1FJ!z`DgydQd-FF%Hmtmdo{h4FFQ4EhhuwVD`L0% zK^?S}HrcUj#75KS!LZPPX)pK>R>7BdRe9Ke-YXE)b}a`xmjHtukMev%z6=ek3`4-CfTh(wUS9ilXE!l-@TtRlXX)5lmq~A$OY7zII`9wq zJ*e{-l*H(Ef5)gADW?P}>CW^t%C7gYIxcaLWEXs&gCZ-8KG5$bgY9bsSM@Wf1+vA@ zLVL(Gv>K|R^aBp*iU4*u$uJmr7AbQ^?>5?xnH5G2xPOUoBpdK!MR9YS+r^6zVw}Iq zsl;6R+09`0)2I6N9o*C**yJiF3GKog0iLWM%w;Bb^L=s~xYyn%Ahh@BECeGR-J+Wf zjQ>J1sX=_QEckVgS8-Sg2-2bQ1i`hQzWRB`x%s16Bz3+nVik;y>5@-{$VrdSEAN|4 zIo5mMR~=-8?p2%yx_nL+X;GD0#OEEFMa7wDx1=5)(Bt^r%2R7y1ZTi-<8Z@Gbh^@M zLP@qM7G+m!#&P`ezR@bo7w6=BuXO+Ygdln79o*hrWnF*n+Dm#1^AmUt4zj83sl0+X zMpxCg*iW^<%e%CUyV!vN3ef3(&L+<1-Dn zC){0{7`-%;h_7-T!C+0@2^eF-Xa3_-@UCR^pT?E9DU#}DVQVx?+Ou{ol$T5h-%tU? zyDZ8-BGoPORg22bIjT~3QRjAK)5XnSQSNwB%%Z6YUV9quJ!S|Kn_R5tKsOSEriwVv-*yaSCV3+Hm zGnJEIf(G>+RvVRt(&w$Ofbeq3?qu-G9w|sK-%-Q)W;+3^?)lPN^V9^7nU2hcc(B~i z%}@+Y=dqpxg=^-4k=C49#~q)3O9$surKw9UM?LuH16f|A2mYv`N4D}f0-A}@&B`tR zrpfrH*O7Ul5iPQ~I{0}*Pj}HDf_cz{)k#BS4O9R@Vq&9}uh3V(TAI%qx#hx;;+M}<9AHa1 zjJ=cGPT`$Sq}^WnL8~syF3o?ft8l#f5?MoU&Etl;^5b%>i&j)YCuKujg<3$gmdz*E zKIyweHJ9=5$<0gng(rp;AG>7{CtZ$kO9S`aAnl9p&{^nSjfg_c4M2h}K>H^W^+F89 zSSI(SBEZh~Z0Yd(*m1!G4+Z6I5C!#~%XBd^=^>CNy!BRhULkhY!ASkY*|lmuM^qHS+;Y;H%DeJd!l zPIdjq5@gR0;#;CzRM|5#4pR8RqbGg}^fIPfI45Rq6`CCO?v8>SmSj!>_ zElac&Tk}?wW>$U%SQdK9AzD$mvoXH z&N9!L@x5)j^EnXA@Euq50fFic_0d`gIYjQ!Dr4_=zWSFbA(~=z5y9ZqD|^;U;fkt^ zg*S@TRy;Ul?a5j8>sbe7cmlKsvPm%Bt{ZRx%2iLq=r;2bJSFEFh5ENwh6I4!FBtGl zUrlCoEu^nrI9&Bp34s3se(qff6Uq7t#9@H0!z@` z0mxnux}BD@-hmnw<{=mMa_g-V1$OD*=jRwwJD|V`ONo$6?1gtr!*a~4tpIcJ==mgR z{0|0`N%+4SOn7E0rRJ;v_tyl3Wf;_1Lins_6WPx7elbw#;Lv?Sy zFWOGLYQ%0HY)Pr_{$mxm*gLKkmDt486%-92r{le`E}#^W8z)x7H<=ZHoEmdLLf#Ph z*c?_yqU1*2GW+=^!?NmRlw-Xao7W~kRPd3#A(@A=B7uUNlc4*xgO6&SS}rZ=VisG# zUZt=s8x`f3s)G40_osV;gtqnTo>$#4VP35iI6q*lxfTx?jm=5fsg4Q5$;6$*RSrg4 zqCWJd@RH%yZmFp3BHdp~(k*!J$nFJW49+REc+lK)w(<{SE#Dn#@l8ImuiJp)dK7g4 z{YPJ=AZ8ocr7=`}>|67C`y**>f8@ z{_2jXoz{nwFOO&L5fQnXoD!OBjtcY8sviE)^MQs*FT-#T`|^RIW&KhqMWmXnfG-^9 z?)o`hhm>1CpVfEP;ty-CUIz{doJ?2ezZV|~v`VcY@{sRv5MMec?Sdtt4b#Ih8l+f| z2yA^>(l0tb&oB8t7~VYG#ccp!m7f1YgcCs~9J=|0+tyaj`oj6W(r9*8dscMo;rpVXVl#_Yw@-pvbaY6btSSxcWHoX?zxefKJGp3MLOZp z?{Fw9dg;tAdW^@UC&7v_!P4SO7=u1EI|U~NUErD2_b0(AIKCIZa7~+tb^9Vc%2a|Ui`z4h|2HCk>#f)R z2(fTJ@4RO83AOfbp02V2;>Sg?2z-WjWYbjQV5(GJEQ*2Wo{z2R#_m9n&(z@bz;4+m|%LpH>Y^*z(vQo8&j)%|C%Zw{|y$>+*) z7FY-xQ|<{0f;i{*3lW{WPdV?djb-^i9^x#$+SZ%N8VMAKMM(PYk&_GA}5NA0==X(?^dWEHxB9-S^gaD_2 z@~4$|r<=8|t`Q7=u`mY0^W&PZX&&7(%Zb7^c#RBO?`gWivevmXZSm1;8eg!&#={Rv z3lMb=eQuN_ojb5icHou5A_LpGNNro8O7I_XRX)5##zX}xA+>8<3*I6-gtpbN%_r+* zo~o`t(Tuis@*D&~wHFaDBI!#gS*V`$%WA~bx>30Hk2&u)WI?H&ogb_Pi(Nk6WU(_1 z5xIWFS{BPY)QCjb0Ao#)SI_5toxUehwYhKX=O<$U#FQAMS^I!%R@7#WP5*~IWoZy+ zF}B#U%oiigW21$tZwmn{ErVt-scz*YNxu)SrS-3`bSS>chD6Y3+?)I6SquhSPl^ae zddG*r&9210P{nhpR|*Hk40960r86p@3g!W=AjFZBfrAfn26c2cyqkKNPcek~frZ^6 zU}1x*HpLCswW}%}!o+8@0%F(b=6iQizOz6&wtaFvIPXA1!2~4j){KZPMR=AUpf@&8(fHnv`<1{(s5}^iS?BC@CUJZMe#yKCA zBsJN&XZVOOYw+7>chbCrkg>5HFm<`ErQ)3i%hMeGe9cfL2mezY52G8)OI2lUmZF7s z^hZ7y43Iv@DuN-7Z1BiE?TPl?^N;+V+MIH?gX9wyGt<{oi%!|3$VOV3VKuOZ_t*hx zSeHqsRx6K-LR=$5`b%GsC2^CbuT*uUF>!KTayX09q+qA%7r*P8^m=4X>092o`HrMq z3m}}iN(kBMaqA*_VMRKb?NvJv0?0po-oge-j1%|fnfK9{ETW|Ia6a5z)Yo%{hg{rm znjU@Bl&t5Gf`MNw76n^hg``oMYnu3^>~M8ZFLCffBR9~CoU^Ut1g3OfE4OU9W!Xs8 zn%)<}Eh)0=y8o--4`+VQOpVVV7scwj7Q|-oqX@3UnJCT2kv$9xb5uOkvsZB^T)O)25u--jo9G?--X%Zz$7FF)sU_I&c z`ksJ}-q5sZ;ZC6!wYh72rv|U6dMc&mMh5SBC;gDfrVLW~Tzk*;h%2$!h@us)Ws5-E z+uNK{OQfQVmno~ml&VCa1#QK~SCm19@LtiJqehc-ht|uA8{;emM14E34j+l45JL+4 zHp;Sdydsl0XMz;-pa5r+%uXHz+AU?rOU44tayYbBnM{X)-@4mSu4QX#q+L3v=G1QU zj+GrsY7AvIz>r2uNU%SQeu+EsOx(J&41(=0Z(P)hkJwA%8AcZhaZ*gz{AcANNcgr8 zVRk!g~_F^va$s@&L zJNG7_k5I)$_`Ac|0ds|tY%UbNk&_5PgI-G7r1B(@*RZTt!WWy$xDgJT`e<>NNpW4+CBW%O8r;|0{ZOPGPr z^3w{(fsgN5A1e_4&$_6m6b-77@Dc33B}i8bB%mXP7>=9udpQ^TP{kOcnrF+<_=KtSn?e$07 zLERLdL~PhdATw-xe>u1p3@JU8aIF57Hzu|L^vb!n9F+wR@gF7q6{sA_0}2(|__0;M0Xw3RE^-G1HL^Ml59Ycm)uSINbHTd4PBIm5I!{aM0qzbSY6 zZ`J#74s+Pvj&)}JfBakttU`dUfy;7H{M>uY;M2OzyntdqC08&T4U^61_SKw91FpXAAnz zf&;cGF>)gz^#+hX&GAPGML_fH8$w;a;eoLpHF+VdjBYYt=p*}8GYr>dUCI-1Kp}oanZLFu%5HO zMc%mbI%ARE=k`KQ+wTKG13j$M(u{l&4HJwrhh)(|Xlp+H>As`zzE_J5u4P__UM&Nn^BP{)9*;Dc zef#F|L?a7$!&{WQM@g7mJZzVmbe5=bCCn$?*#VP};N&Owtp17_u)b<@iIQPMR;Nb$ zWPyC+8<9=ixp;v$m3`O4cVP_rf|>`>KMJH3T_(MvTeQNImXI!Gs<`R4UPYyX6#MGAhEjDV{0jiu-F^&tagrS#jSsL>bHNJyCY z`+Tz`)eV!$YFV2{e+XJduk4s9@aRV1c(afe8DH`VKXsFA+>e83nW5c2lG)sEoC%85 z_m3?*jnnn8$$%*U{TB|=jq#XJ?0#$ff*VBQ2RQxwdL}Sw-pfp_;ZgmSa`W}6kxa1v zej>mh01)NIQ}#X+Q9Cn_e7x-e;(S~_O^1SI#H!x z>S?{rAtzm_F#wcEHn^i;7}?@;(+fu`ncxITG~CA>leRF99J3>#%hr;_*rbS2d4P z9sYU{v4V5P;Vi^{N?jc#b2e4QFH49;h&8i$Xba8a6iJOH>Ac(?_1(FLC4~mNtXRL~ z{>oL7IBxs4@+7YMDbV(PwW22@mbG#|=%#Sxk|$y+RUvsB zlTotZhHu_0DAEZ4qUH7`rY$0nnVdpz)`y58^>+bRg;I;qXM7JI)8<2$H36S6nVU7xPMzAtkw26LciP4pZa#m_ZuhxzFB7;pbkOtb60|31 z^9$mAS{1TUSwebB#P-9)iHw^lbgV5G|+c% z@7;jwKOBX#z@W!PG8iTVM2*7OyWI|H>DEq1f6~%T@^lgs{NEA?pJ42I&}e1$aMPMu zZ?7{RO5|H5OZr%N0uVqD?|R(ajQ`6E1~SusD6qw)EIUXp?v&pU#uww3O#{2(Dfm>14)DNmCDGG3EBrW@|pAO8O1Jn9)nJ*2F$E^K|=OJeB)E7T&S~C zq$HcfpAM1ZH^TIHBM52t4k-9Yg7!OHpXYkm#2mZ+yhN>HzeKPdb?@hTV1j?PLW`ex z@yBgp3sh~5-0}T^mriza8_&j-EOA>sHy`B=4?gua_D#WEU>cxQ#hFzv_nKGsY$GY_ zC{@8Lfe%V61wJOP*Lcbi)6Gh=>6zX;yKVZTA!}Ya7e}v86?P$1l!uAfjjIwp(iSOUMHM~Vtl|k)zt);OpkLC z`4QOfj6{;Yfg~wqZ zBKzIWY?eoLekFYKK1E1T{Au%QTL;mcnO>wx(V)p0f%41S=y=LeX1!iK53`7=K1`a6 z#2O=?4Mm{ug{t+rbTmy3n8d*kHK)ps8}$>Bwx>p!wO8-^CHr1udYfvi<<=yd8?t2@ z?62@d7wHr!ee6pnMJpZ0R)LZ_7<1;iuM#f!JcoSqVIbD<0=i-_B=_)%D0>zxjmRDB z?(o=LIgY+yK%yJZmp<)afvi;tQvR|KNOr*NI*8UMI}wFJ3-f@4dO!4os{(JB$i*L6 z)}|?4G36AcCyzgZcXNG(&xf>ZcrgZ4s=}5`M11w$*Y|0n@8mjVodpfw^t&_I2 zUXOZ+#N1B(*L=TA;LAH66S*+Yzi;C_Q)0Wds=fvg4VcZrgE(LDf~6rhS6kryM)<-G zY_H!$7Z2*oT*NdDfe(7k;?`7O^9fAa6yPmVn!j|py~GAYWB4$^Tx4We$Qh0Kjyd(_2XwCH`{vE%y(KV7Z{eTaUoCtpM(z{G4%U#gU>~-KNV5^Gm4a3MCapRmpNL>D!G z^=d>KKb-2=ZDFXJx}Z`(Z*GivP?OR*q$6Z4UKj6ok79EnVjzQySFb#_6ZK=9!h<3t ze@b1LJWpBn^(SE5#0Pw*0Amuf@f!L8FPmdWY28PPz(o8HeA;U&fOu;)f@J34hhL>0 zF5ZkbtZzhUb#ypKw?rEvQI{ev3xNn9Y2!<;c#icxf2>6PUUF`rA@guc`P*HlYzdoS<9+J z(t8q^x5lj}_S6FlKD;}giRf8mn$slJ#AtxMNy+Z~8V|`$w%Wa*15mfLWM29#10i)QN&dMi zu%bvH((pI*U@1(SVXT%Zn>G>QLIGzja#VRvYwjDgR9}wsn7Vg_2@meDq#50couf&+ z3zc3(?+|nwLhgzE^zY^k*p$VSREQk~q&$1`!Np-d4OCgmQX;q%5Y9`>qqT3{_^m>k zwaZ`z%fmBmJG>$cH%s-dG9-06UE*JMOlGg}H~wo(uJnuN0|+1!dF^>(hLSOw$hZ^yDHdOvWDY1U(v(iTEr&*n zD~>%d_w90%`|=5_@7&*WZmQ~yCZ}1L1%IsuPp;TyauNCr_|zzW!syff?FV*RO`9aX zA3skr9SsDXKqaN=_*!V$t;pBbmkGe_r-%nC&hHI3>EM>Fx^Da(`JtQU(r62R~WyQ@u&dqqTJ+tIRm!r^tZ@|&L{ju-d zkxP-})J+@94?;?HX!D3gUfI5#ml0}p8MY-Z&e^r#^_)#RBjCMW@f*hOH4pYistDxm z3<70vJ?h-w2qFT$yw7wcF3Y1(XRldr$ z7yY9tdd9}gxjBvF;E9^Es%UD8#TT%O1Hk02PF^r7{p*rcCF4jU<07xuqiZccDb-V6 z##4Z_dOkFx{DibB)c!170dCeFVrv<1BMX8+IFVBJNFPLmbMwv6gm$7>ck4uis52NBn-k7AUb$oAJ*JV7owvzSnQe zi!f(^?$$5?xlbj$ zK*Y>#8p2neBN(@AJw9Xt^3^uQuFR%0jj9zA^Qi|a9`C`Mb4rbIf@dq4ic||MA@%G) zUBN-{jJ1&4%<2u@NKYQDV1kb||27rIgH0k>~Jbua{MfXTl~3RqgHO00;t~7dUb!Y{{=>^ye4rx}TTE7g zELFAtW}yHL|I+R{A2|0LG_5NOqu5hq+;=^!D4fV0t}I!SZO_(0RI+fOy^QW7B6@bE z`+3RU3(sZFoQVb+bw2!ImlYTin3<~`2C4`yvXBjgnOOgH%)6)};e;;LqAQRB{zGqj zl~yS_wJ>CDWxuz~tq>aDf(v0gnlx;tqr%W`Qvn8MSRQsRC63Momz^Lolfwi3&lDe%qQ* z!y}@*m39;XSO7Ye$oG5vHgMRPs{q+Gb8IbboFcL?BU{;6 z`FJ6v=$WBEH@0&a*RAqOF4imx(C`bmjj-PAR-*8FyU~ma2O`q(lM95X&)hn@X9@V} z97Be=O``zg*zg6ICeSHu=W7exBS&52a40dZdwR0Nt{};v-D72vTKZd zyYG1^0=wgM=h+{M(qSK{&H4kW5BYVM_6VC=E@vD_B$N~?1Ct$n2?C_`Nszr5&+0Z0 zZqxo3R^0c#vbWP%#9uTz%!ZK&6dK^Fae0ezvuw5CH(PneWi2s)JGcVw(6%)vyPp$W z_OKmdrn{$KAwL`5oa6uAA0_*9-7CCj+9Jr${Xs3s;j6&Q<0ewQ zIiZ-6kZ1(BBpc0k9FkchbAtq`9y`2sV@O+J%)|RI096H{m2$3X@bo|U8^gcHy0sii z&1GHgkuUgr-WNV^RQ5HvE{e3K&)-r~G~wVow^tNU@`{cCCZJ)GOs&2W9{Or?em~x; zX`0L%Zs&Clxi3cbD7=OJj4*Rdrfn#9Jea)Ep zi0)cSIWKYSDOWu_JxuU7{D%WC#6Ltg#+R99CV$?y=xHo5|HdT(Pev!c$RTZE;?bI$ zVTw4XG&%47QyVaettZON?ceKbZmDR(;1fq&$2WX71;(b%%|1Nzp$E?3E-&a`2CHZYSv@&rm~(s9 zazOggW2r_I~v&IoapIW7lWj zwcO^w@B{PX*dnyC64WCSEB z7c!X8y1gBYl%C~(3%PK0{)ezf^w{v|7esh4`W88CfacV3^N?>9D2TzMx8B1p)1KxR zl-^iz5@+|(COU-e0O_b*Nt>@TGNf5#t;-1O?xt9gq&3}o&gy4H(z{EPWqeH_v7}y) zVKao4+V3oEFsyv*vaRHwvzB&;n~Hcjjq%Fu#o>OclqPosM=6wS{z8qiJL;JUt7r?U zyxBGhVqGr$G_GeHX1F=cl?&kGre5A?={djMj8}<-2|(KxZ z8ywqba(u!0Lq)MS`8P!Q-AUdOzh?WJ&#zA3+@AHb?w)wXzD-4Y-<$%r0hC>A*M4*O zR(WgL+e_J7DyOYje05%&-)w`1xt&F9gQd~<+n+|Oy0KL#EEkiY(DhrMlKf(iT6 zO3*T!Z`^L*+pbk=9sjat`Pvx$-9^)j?{WmjueNolk&P5i|9pDI@~iXqgs0v=-BfwDMSom;siLgI78?AZ+W7wHs>B%ifCBbyv& z&2epk4QI=-pqwRk>t5R~zSi|W{T8!(jm0seX~wN9!y8wU%i)l8-OR zoPSfi)U@$qxt)CP+l%Y=Y;$`ZvDNJ3{~g<>MH}v$vN-SS{h80-o>+Y3&_&yvsr}1B2$+9yZ|equxs3Sph)zAKN3=F9hPrsftN`j)I;v$n1DB zy})qaLc?Y4c+c;vQTUi;c5L;HV_)!Zn&of0ADfu+jRj z^V(+{D}oI{C*gs1^aAIsfy0EzbJmC01^;j4+}~C6t$w>vK0Ds|s?lA&p!sLSCT#J8 z7eM>Ge=Yz2?R3+hFU$Y?Vea$xuYHv-x*l|3sQ;fM_bWDD0?$JOZ-$?(zi~&&S>XAi zHS0FN%`05@``7jVSKW{1_IY=-?*DW8|MBjG6Xtb~|uKcg>H<{`*4qfDR)4|3JRx;ZZN}S*6|af7;RQ;Q#;d|3AnC z_XXJT(!kSRKi2@)Eg+rL8V6kC06UEqsI2l}(n9!|usw#)fNQwmXUG~Jn;||}G~fID c?(Tp7s~pDLY(MZBfUdXjboFyt=akR{0N1ay3;+NC literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/moe_dynamic_padding_e.png b/model/train/yoco_moe/sources/images/moe_dynamic_padding_e.png new file mode 100644 index 0000000000000000000000000000000000000000..a373276204e9af063d1e4595a1a87a79fde392aa GIT binary patch literal 348661 zcma&N1yoe+`Uh&DfPkcQH#!Kz&=OLE3P>uQ14FlTsWi;c3_}RgAUSj+F+)g5H%LoM zH(dO_bI$!d=iasci(y!Mul>HfPyL?Q&l9ThTAttm^@CftZV|kEA**)l*1hmsx9;A@ zxqow~JYt(00Cc>tWmP-`gGb zd8W5+U1z_PmC|rG*t~Rm_edMD9ezZ&bFqu5cT17*QJ!a`=~ZhsGFUWyVI3nw7z=7%+);du6Z2Mr5hBacSG^_ah3EF-y0(0kUs zA&=$^o*fx}Fs2Op=f`rlRI7&UYAaNfJ z;s!2wx6z-=qAd|7z|cf4p=l-V`JT$O?A#eN)N#Wk`=zDl+7YBs9uk7qpnx>NZ8h%y zK{+o>v79X2^0+{|A*bRDCl4d01%Ups_lk4&tO*b|^H2a{0G7~1om`bn`X1C_djTfb z3R9Eshok@GrT2q%ZgL?kU61GxE7;KRD5Z&`53JppDpN~w$2^A@6uTImh-LycEhe`9 z@qdDhO>Y*`xOI-AVIEYdgFchV+9$c04Lu7Z9n_1T(1$=>6JX^4iIW(g0_Azi{|5(l z0D^6t88JRq14CMAXO#baFn(+a8r1m7<9pW3>pX#a41Cwo`)`=4o+2nHb5?VtjGa<1 zs2}{HD#pIJzmIgUs}2d|(QTM>|7C8;$xzPltsruQ0wRIYZ#`@&^rN22!~b&`Wg_(S zt{Y#){!yRd^={?~kJoFBew3D;Bm{;0rz#p#vh-dFkYSFuVjKVlGha;{FJWktKlg(1 zTcd;>&c39fQZ?kUX>a&Q?`%bB>5$egk ziV^Zq^5C8tQ1bussmjE$Vn}@H(m6wf$c!ESD0_Y1#tQ4d~n^-$8yPG0- zs{;S$EaTkdv0ukEpaHF>j_Or)A?Sm;2cUrjS5n=n83kmvHK2U-K4hv09lsc@-8xQQ zE@A$Mh;z&!)mnN{|~EBT;#FiTzGI+ ziSEwT&Hy4yGVw0cpKplvL`aCY&W3#YXb!nppc2u{|YGDG`NCCXSG#Y(bTCxMPtq`4~b)3G?6d zfD6OWN?gdKL^k4)x3na3%fg7B*)anmr3u$^LFDC0TJ{xDr(+imsAa;!Wn!fFpN@uC z@!OAZqHxRHLfVd=?dvZ^^ivW^Wm`~N-YHxq<56A=$4_}I_6?( zx{(wm+;uZ1M27-qd(0qW_$Ye=68E<6o7%Jdh`ntS-Dl~`VqG}aK2je0#s7sQJ$d_t z(>@POTZUr3znX6~I7-nv?=zVEq;SVdAJ|kPas%hSIom2aB0LH^Q1-myjH@c%u)#Z4 zRHy|%WtCr+EUXMnO{gS#rBF=dX(KNWNcz?8mUa7(4mZ72EPZB4!*JiSO4j6M;0_fw@(-<03nlD0`>eF{21WH@*NVvuoO z{59Xk&`YrAarOSk{`-HRR*rV-4Xj6>M-0(8dAj~n=AER$Hh7n+WcjjfUJs& zC(*8t1%;OArVkR%Qq~pr-;k>}XPr)c7YyGAkWYbU8-YeUgTjMbr*slPh^UsFrNCwcxtk*vP=ClB4w zwyc6Q(`9l`s@u34ZP*h(?xej(9wEojK@XA=i)tDwpZM9)*D9wJ*K(I@@p$iS)~v?^ zv`^zAiB!1=*YBP_xj3*7~bNoWsW7@U3M7AHBnrft2)alR%y}@QgDvOC=I8bO^^)tmobFwRw7oMZOet_T|k+| zzHG<-n;>Y2+mmlT>@tVhPI+RFHJj+QrI$F?+4nvlX& zcEfwrX!#VFqlA^WK>mAd#4r24SvWNdkY_Y=kA#;BvbUNpom8EL_f3#I+LG&$BcbTs zBZDZda!YMs*ZP9X`6DI91j9uW;eI|BueK$1t$|E;lH<82&AI@~+A#B*mvF7da&{ys^QMvirBp~!DMAgC=O5ZT3MnruBM%=zk?$h0vJ7b=NjRVB3_C$z#soHYbT{9%|6XCDVgv0o{$W8z`!oUQP~gjM2rWk*?> zpD8Jk*`i&J?aee^5?zqz9-x2O;SJ01?m7DB%Hd>k!ULdNG1nYBb7LftDyS^6O3&EV}%t-&v9cBj{FG zQUIG#MF&Dbs{VsfQKt7R<+B7xHJ0}B;g~(H#@#xZLm)GrUIiG$+?P0$wj`<|fryej z!bnfsZ`ImQ)*r>~%tL)%sWeZLT&>?-ev-gLGlEtVDPzzgndQ`L2cC7Ujzgu}K{VHA zXYreOh?T++nZwxVz_%dDOs7D}B-|oqZYne=LR}{6ayZq)bj|Zsr;q81f|7XJQbnAf zx}Gi)^LMfC-|fweVgFET51eFy0W`zwN2=xhXri0pn?8YX!S{?wr>1^BO^KM-_A` zQg)5RCAqUFK-}*>7md~{7voe+5jx9F;Tb`bv|evG`%oMccDIG-HD%AZIGy)kJ#HPX zQ%nJkrWK{j6#vOQj zW}U^gN;;xoU`?~iH`HMCmy6CP-COuN$-JNtL>{td60AtBYCAA}uCi&Aey?)kqq>Cs z8q*JF8DB;r-G=VfUh?$?Js)~c9qJ>o1Ad{rVh#K9)7KKr^b)=F4U|DZ`Dr0r-1yP$ z#Ig`5yOs*&C(yo5H}QJ(9q+v)T2RMk)IQ&&4JD=2wm;mUaHE}ry~ams2MBpFx4R=;#vbF}!0 zhJ5sEaxb&!iKO>j{W6*TA;0PjI5t~StpkzrRmHghViHc0L`ni_dCjtPU4@6BA+i4u zE!{Qw@lpZ2eOHHb;LZz!&WViL^I%D7s|UWB|kO2wv(MCk(H`O{aU!PdPyt|dOviXeaADWX0+H$alhpV|F1#; zt{26oA>S}|#gDjqtR76};VZMj8Fr)O&b_r)hf)CdqDuL8pcfI*rLk+oDdmJ_QyljF ziQMKmL7<|#+SvK$%9^t8Eaz#oc4Tmz8&NMzDnkRXfC|jy*|Ggps9f*bw89b9cHjns zW1qWU*=Vuh9n~h30kred$hK)XTM0hm%M>u6r7Po|$BAd{RKFB@gkWoRZ;h zF9G1@I@RpnHJYH<5?B-?n}=>$ayni16yH5M3w|D9-%|oyB|WSKln#}X1_INt3F=?{ zcJz||c7vuSOwxp$6?r5=3TF!89A0(^vf#b+?8oW@HA2fm@qfJ=9g(&)e3u3Z2wM)IK+$l zw1z4!p>r&0u6qnXRVNr~O>&uS9S&ru#b-H+<2jD~OBpX&`l!xZ)are?WKR(odGI#2 zg7f=4h2$Gmd`2MEbK7G((O3}SL^%>X3H=5;F6b}ny+i!kCB`z(kht}D*;TDMR1CiH zSVZ#GljCZ(@@jGT?RbtoA%sHbrJ|!PQ-{#XnAZMFdU-x-`IY^Jp=qj{yrs#gT`Ah` zGplK*(5nf~6uT@$=}~?iuc#rz0EXIx;(cPMT(YpBZd8bK#^r(*Qc-5gLQgCdot~&V zLsMU3x>TnnZSY>3k4i*>+a|En0{X5Gk!QiP$2r8>AclAB5WBuaZ?^enPRZFX-O^z@_)Mo4kQm@5P-KU2T7 zIi$-j=f9zkKbx>?e{|C2(+!XJ1m;1QjWpuZGleE-&@ZVv10ts@M6omG9L~115dv#4 zD0{~fQ_^w+aE5mE|6y+l%IP$1p^oirM2iQsnRquK8^tFxt$Txl)JMbNrThqH*owN( z$!{!ofap`NYX;~Qcgjp(_ZAB|dFwJr>q=+YTg)Xfoqs_w+*@cU+TdbZHQ!vsAeHw# z!xyxu<#RkBUiW{iyo>a-(aW#tG^{g#HK{{29)E{h#qtSdUNZTdG>ymXe|v;c5Y>fBAFt&FCXov zDZW;=gr!l;IRsF? zeORgODV=IQ9E|AB=4EkWP}ezAI8{BDGjItUjOp?S7wdu@=0b=g&ps}M2!0n4VK|*+ zX6YclwzFZ#K#_Q8eZju5e0jaaPQOB_CLZs&E8F`sX|l9+n9vREcw<}!OSiv zW_=CFr5VydsL00Ga`G7BjLcbrL&|%p?wb(bCmGp8ld=kqB_vO<%m2r?++ne@Kr=NS+TgFGWt?*<<%} zL2Ra-d&YG6o)!e4xD-gjE!?v%=R!>AW;#m$sxyxcK@NZqfIWcH;N9LRIgm9W6@gT7 zIQRPgcSwF(p5L-udA6LXe!BPao>^v{))RSHQTA@GTz02INZCha@f;{a7*6?-1KR?! zSDji2^6cKQBy-WOCsXyn;79%TbYOI}PeX2Zxb<*)c?N2{)Bu=2gPW_!77V}j&3$#sr(9ouFtIq{y0EI44Oau~l`J?o}4 zcVkF>Z8~&vwB=WS1q*dYp-tJ%qXFqT&kxdmdyr!+Yv{~Y_QXo7`WSoG&qS&ciPnA#N!_rSB&3pkndQp1YL>Y3(!#nIGU~@%gc65;Xvira zp38unjuv}4T_>KZL3%iV+oOY^-ZarrH0y&{VE|tA?=tPZPE2XTSKh{~wD4vqxHbEE zF=?UzCmn8XVycAaiy7d9va0S1igo{>;c(tj?wh*oq~FkqK!G&opJ6vT*yb4ITCj9dE|RwT1rBtTueh=<&las)KY3l>&6C6Y#rGpiL`Y&1+i+M zR+Uz&kapq;TJJ7h6mk=7+zB<pI*K zZ1M1cR9KI20#y*|p}t$dbre;J^HKuFx=cWle2m{wiLty&uxwRFwsVY_s_yL#W;Jq3 z)=7LjR{iZ(43q!k>NkBE=dkmNF@z02MddvvbqYqX2TGuad@{qa7|lXS*aneDVdu{} zKu7v1qhU1-q*vA>%rU2VIG698L23u4DeJ6jzrC=8i_j`u+t4S%mc3mBl6(V?cfRdDjuhgr})2qXgnEhej0k<-SF8+5Cs(K>aBQuBz zomF63(2Y3DevXrYrL(?nHVhP^s{FFD zDLM$cu5SvVmK+rqEdfQML(^mo%)=e)cMfxbjg9+1Z>pv42X7)H&mt$eB;KL!#~TAQ zDJz?wYHjdX^yZD!!Kn z>A6{lvvv2Az(>H34O7Z1Gb70X%y#OGV`%cKGZjXjahtvrxrO&!~rj^CI)@nC+9o@!N*!PJLy- zP0A9S7ts-G)PO$SdUKM6Nh#>Jgss2LHR#4md1#o|Zoh*^edE2s(><5Te_JfzFxZ=i zjTFLYxM*|emyNTi(`r{@!`axpBW@F0+4KU3+sXN=f>#9!s) z$@j{>$3FtDf)u_Msnkvps>)CIWn(ER;Z)YOO(+pyCWu#JAIq2U9aUNKjo)CC@iE`@ zTjzmXuFdiLLRIvLOCF|NG5ck9!NUo^ztE6yp+ArF7q&WB+sXB$j3W={&! zFFiswgeNhkEA$H~HBo3FO*|?hxI=V*{sNDuo(l309Jp3R= z(aX3y%t?}*C@?Tqs5h}mrjx9`f?MKtXma0a- z<-7;Q8Monv?7W%a2L`SZwseu||toG$hOGhAv}^&77$IjBHe&0gNs2W$S{b zA>2%7BRY+yd6LU0B6gNEj-^vZSn+PvimK0NuL?kM7ud)8LS+=^5T$+Inb#Lr?MV0B zyV#a|w+GYA^jomOoJHbV`}ZE?mgPSL%hHw~@!yB~n7kz)fv>!8=p$!eiPpZnNAO8` zPvDz@^%n^Ub&$)>Ad_R)$ysO)ueJl|nLWStE<8lP9?WvPR$^FQELN%b8B@ESaRuo( zpaIpR)kG|+>;oaI?49Ih=^I;ipcjMP@hjoEZput(IjI zDg_82v;Z&(3oFZU=fzn2EKG8d_?j7DS)HPAqyyjD@wXVOIU-;o_JGKUn+rl!S^b4k z>>ELjPt6TW9<696)Ql2^`|3pL$N=aqTR2(PRVZRl$smuz;ZQ=8JE!SwCqDqKy(<;j z!Suw266OKq2MZ};noACujLgU3jnI@}xIkJ@ses+TvmBJq3tvul)Qm9UO({=uNqMR5 z#$BODi6pEVgaicOs>dt0M)WH(Ypc*YhROOJYrb67`T0PE=9mP3#O*|ZrT{_Rj2kN| z*;)5&Mtoy0us+L?E+##!7g(w>J^D_&LOLW?TIv2&t)(HP8+D zgj3~e)M*=aZV;dM|bf0XD-032yq76aV^$eA7Xe*ut z#NYoWEx}b>eei*OXwRbCOVc@jfFBR6YvUjlvFl zGA2X*bIF14W5A1>+ydEHZ@@i3v%#)o(1Y^fG^phZ2Ly2H@U<}*=EKt*tb;5fblfCf z?LBL=nTFfulyQfy9Ubi^e5`31>>VW;Z6|V!t&*>a*|+S}OgFn-^OQL~F^Z*6+u9FO zIe_@d3fXEbg@4ea&>dfwk&!be#pEe*y+Qwt~VEWnoFs!YVLLRI?jBP zf!MA`=ISgpTc1lBpB*H#Z5c3dj1&)fF+xw;5Lz}on5v0no{>4llt*9}!8S!Pfe7(s+mr%my(9cm+7>w4 zr6!JrIW=8bggUp`C<5ZmMwP4953`xD4Mi zW^iwG^qi&uz9(XhC*@I1j0;ox&B)(q?$H9>YSHW~N#9X^#GJ*A8XMF+H&rc?Al$eC z^5s(t+iK36!V2i997m|n3m7zdrhXuj$kxfTCN}b>I5=_c+v*mvZg14;O*JPH#db~M zBMq^!Rm4%%471JxdglQz!;O(SABssAL@gU0g1Cco8r%dlbxe}Cq-;1#&FNaa->;Ig zl#1T%!OFHCR&g?k-p(D9CL50m!9R?TC@^OdA(r_HLmTcusJ+nctw@C8 z)1g$?WHfM!OCe}iw8zOW%S zi%G=&w2)FMD)5%ZY#@}Lgq!=m8L%|pm7>!H6$n^KJWPLXP^wKTYla#~VuTVN;hgTt z)b6(3^RSyDaqL|`-2A%431bROGC z+-^STB4e(-%@QFspcgm*=A*V&(5gp}z~^NaCN0>(ApHF`#_QY-A$|1#!q_yy1Bp)W zCjRBdg4#inDVzJJ;`(?$lNd|J*3StF5D)%V((e)SOh@0Jf)|T>FnoXWx8R1qUnEBo z+r>e*3z=?I6&?sMqDJeAvd_n0iGVVlp_VwACl*1f6_QaI)Lkiq;gvQJH{6`)K~Z3aLCgS4s5AwCwco;|XCISe2*yNa@eba-d@ znj6>AKRdaI%?6jYj0k*6$>7b|2(TL`@!$N=O>`Pfbq@kbG=;iQtQJ7^m-(; zqBd*`K};C5qW8H53 zjODKSrquTjx%JhzDwToc8pTLFmt|**G%Ae*#3&T>bN0!Z6y?l*GG0X!?}oa^go4#8l!!ItP10lxXCH)B2ZSBcQu2m^$koeBqc=Y2%k=r+3JUwi$q9e}- z!`Hho!KeF77k-Ce8e?oxmhEB~pJD4!RW0w}M)S3-|>#T*wR`o&p> zx1hSHi37$Astk?VXUe>QBXMgHllHXce2~qpDh< z+kxNHzkMAOk3~-awRDqqY&4~`u{0;iC=ySoTcIyGfU;Im6ybJ}IHI3YKI$taGsP*% z*=<4}DvD{y-&U&q6|-SN=;YIt*Q6m&RZHOH{U9NbpHEbf^xOK~!&svEPT2l)WVW2} zNE@XlEEF{hmKZ^*eS*6bC`f*@G&SA*)Fa%%CF!*q0kBd_t-|OYy@?L$W{=%4UFjf` zPqBZnbjmcQ5OmfzC>FD!d|ky)xa2xK7Q7^UMBT=#6bX12?dSK{F*}=au3!^b?xmp4 zg6H(NNVrkG(5UNh-_M6?&Qq$2hkQ2zIm(xCx9jvaX#&_3Ltm<}NQEhqzkl&L2I!^9 zjRu~d)dEwgz`HP6Q4)MJ7UNf&W$}wKREv{*AAo5&f*|A9m~p~)iih!SrlhvfZJcn& ziym2pMLVN%O=@Il5Q1@FlG8Lsd-C)#M3E&@RUb&Iu{HC?X46{LO$64XC|s!-!UkJZ zg!T-n&C$piW!?}c@!3zkd0bb%+}k(t+XnR7slWT^HtN7$aE_qT9b+6*; zeC=}V%4<|YxpSnyCLc4++%2ob$vx73r3{8xR}o#^q~)ior_kkzb#vl=o7o{+!Q{mZ zf}=jVb!U;Az<1A_?c)ysBPW6)RR>EbLxWIa3e3r>Jq0DWJ$(Y%R8w+3FYeIbdIV@oMj5^I>+MnO z$#Td<^6XKl(4LAVCVsYIr%qzp6WZp_J~aYN;foMx%iN&5=u~;ZIPx{Q#Igfln00%h z-nu!c=WK2qs^DXf7(HRS5kjDrojqOef_8a=ILG8j2ka6j`YlqxF$)#-jdi{>KVO-~ zEE+zDv!2wS+pZ&-jO#i~U+bd;M>W*--Gg}YSwjZ(Fd9!IF?m3XJms!u*Bd$ICK{ucd(TQul$htH?<%meN@>mEFz>PYO-QnvQ7znB+gx{sH4wdcXaKJ{B` zs$lD;M<4FC@v|cW&^&=TvojO4r-L08Od+SwOB9SYGuh6F{?*yUk5%x*9CSM`UkT?8 z*fA^u4tAu`1>}hNy#$5jmi?4pc#MwTc-gTqLIY)=)qJwbf_GvHzTM9WOF|Q?G}A10 zQyPj(F|$=hubY*qD0ZI4s!px@Yx@MMSm8ow@_67b3HvOWPB3J=(xQ2gV@g-vj%ndx zBhgZz>ZRPLwUXVGud|{_=<>1kXRi1VLIAm+N2!+0!!0cGOOrLa*`9vm|1{>TgUcU< z!xs_~qu;X`bQy`s8=FEG3KB;ik*J$RR{^nrlj7XfAI{#dvj-0VjlqR-#= zaC)mXcdl|Zu@&MXz|n6gv-6)|#MY;pF;%+OG*Uh+x#>qCnqHlq9x;iP1BE4kUV%m7 zSgQSs(PK=l3_F(J*^}UKLNXk$4GVJRP49%>n7M8?7p*r(;^ zKuxLdKTAZPcMTSswCdFN2AJG*Dft7_&O*@O=s4KAJwc{}9eYSjmm_R9pvGoY^W7|d zxftA~a`^ZTWCq=5x~Lb0w*P&=el5ul+Pa-E1~pzq$k)G5NPX}cGAX4fslpLp!lx?< z==8?_Prr=1(58V1P2h8uGA&L6aibvB7iFw>bV#0=#maFnihS68gonT?t}|gzRZU*@ z%@EWgHGr!$cb!IuSObF3w6|&2@75~I`RPo~bKwPoIT>`HJ-^AWA{%YU-Wq=Rj!ek* zFd=~W#Y~))ex+=dU|+XlfBTKWp_5XQlM&SPw5@Q*tY`1@RYD~ESwZh!+~q4H^2kY9A`kP~&NU?SHi68!A@tRjP-;2<9Nam%htnn*o)6Pb zhVOT9{z_VYxoFi6NComxc|<$_eM_*UHg)WxKTnB9x73BHLQPTxGWmQhO{I#9J2MKz z!Mn+K38;qpD|?}x4AAd=3oBKb$aS;PT*RjO7KwcbA@FbfQB-Ju8vkCFUi$z@x@|#` zm1to>)&Q$#D8hXWUVlPPGbDgHSMO~HmCwYLX_La?&prp{S&+@ZjEAxJPC`(?75P!5 z2_0@M^qE^1^?E9s zk86tACwNrc(+>K$Cono>0nI?EekU(sjFOXOV(dPRJ$Z`vOFA#hGlW+pWnJkbb9O!< zE0!8PlmS0VAV@`#a{tj`H3f)iDixS3e1rtrQ%gF&wrh6O>w?`N;;q1*9v(oi2o)$= z4>{3iRW6oK0Pa~x9En~Dp18>MX!+x)_M0T(y#Y6w2Ppa#hEuw$a~|Uypju&_muIO^ zlUaofg_d<~351of)=|mXF(Gxnjj7QxNqjpj#?zpfOtgN<%FY5T0|uD1BC9PD8j&OA zK;VikdkP6Z22;Z<@hP(zQ#S~NHxuhRzc#Xj^++)CejDl`>|Gc@st8l{5{Oo52%`Vh zO$|`j*Dumhg46%PQLshcagj68O2$y;cvypVq9XWVLVtAU%hN$akZN($LQbMP z^nSc8nO#?&vYCZ^-nbmV^*yy)XxJDha+yOB#~tIqMArSx&Q{7t#)Im7riarI^r5_o zazEE~0`Jg6W^l?M=x1$Wp?}$7BQIgRlTxmc+IXj;(FD~9t;)RV)cW1RHSogE_mS)n!ucm(K90wiZ-H-J>Q#}LV{W;$;>aBXk zw#Gb@a7Rf=K1T-=O<@D~5(eFLJ{-LI>kyfe?!#N(;+n=kjFLWNWaqw@KM`n{+WDJo zZ^v;qa$QmmnxVp*_aN`67Qa#UNB>}7=azk2UUD951aO@hz2_D9>~=g;yV2vJn9^4+ z{G_g30U?}`*tq^06vhZl_sIjiFaE?N!TC0i$2bdh`NBBXr;wE2(mkibCxmIxr$Ht? zX=k$}{G&%W+oq$l1vWb=nU4IJOrSs;pdUjPo-YnRWxDg1Ywbw@UwnF`<=FFR1 zd`5OW%W8=&@LchSEmqEV#IRybnq+8OaO98C0vrLQH&WCY()9B!Z!~EoobU;~)l}n% z1Evg&IY&S*+_&zRxJ|4_M~7vwiU~W?taUd#Z$Cxb*W1W`bnSjF&}vuI{@Iu&s*sk= zFwwt?PLm}_a16ovV`aDPRpY}p^oIE3K*l7Rz^Q7xg+`A7(d-bPM-ksO-Qo}A9oHOh zg~cCai*(`m9Nek4OKp!P?Q>6-bHpQBrw=)JwVXC0g5z*3Z-QE$X=Jlxs(GhTFecvp znU}ym^YYiv1(#`$g-H2Vv-I{Ll{W?<6haX{hHJxrh4T!mC#$DE3VlWqu>ji_kTta? zD9Y~x|GXS?zdB%k$RPfKL-pRPd8%IxjwDa$e7`my8NP>f6f-h18N$;@PiZ;gMd?I5 zZby%X|Hf{lA?=Mqlcc?PZk0zh=c5R2O%>*mWjj+?h)e0asr)UZCknGC(~b=mD@=Om zQEWcihN*~pwZB<>!r7VkDFA?@evM;ucsHI~)@XpEEYCnQv&rT$(|0X|SWp!&4aCX| zqLy~Cd?iF3+O?;ZB((-#128pjBo#ibkMCVKChw0K(29Pcrso_rbN4}^NlP8`o2H}*0!3E5v#T&fSuR8;)V@XE~XTrq2s*Fa@*tzE-iwdBS>7{$W;0|q3JD5ewhuM+sdkpoE4Lp7CD(Ts__!FxWDWp}sK(@oo z!uwE)#i-*wUhv`8$A>bW?`J28=EWXln%kiJ=$<)DvK;tp(UuZk9~)k$jqgcI3bNsG z1Rr)(`>-2t1l{}j-BtKCmroSEJ3igjoFKBw?8&IomOm5dDGS|z)^V?x*kg^D_;cgQ zF;WM%r^??jFWyjXZ~gSg4R_-@oQ$x(P4#@!2ev(Rmoz65O=#%S76;VlwP<^@M832* z5XkLdaeRz?cTtr3LBwp$*g>S>zEAU6+W3jal(wGw6@KPbN7{fr8f^1vj>7P8)_*)h zqeg!`{8Xm7P0~euvg(jBReqU~>IvXeFv$d_4lI4jsB*RKHuTTkgY9zTT8@4e?DB{bU#Sk0Wki9)+{QhF25XuQ$uwJ0w=ra->fboQ>Bd_hHDaS87EauanQFsq25l(*YB#ISg6T3tvsL=%+^3)Mza|Rw`4x|2i@iUgVY;@3%Vt zHE(U)lLsHyz7MkUReb#^)YQYo;QGovCIO~Thgh&gk-U8r5V!@M_f*4GIiOF55_~0; zym)`)pSvKXaZg1(F!@_k$Cy8&bZ?jHMRq{j^UUpGIkMy1=f zV1|?$OMfbUb~TEBm6CSBT_rh0@-{)dW7;0N$hAe%=NIz9u(w4)pEt5r?5^bYuhEBO z^qce%P0tLhu(KV<1uH>zN%qUXhp&*z(wSlC07v;uf!KS7ELE=c-SrIc)-SwFHeqzN zcSv5yQCl0HSA%bA@EVek*{XHR&AS8Dajzdzus3_Q6>@`FK77J@&Gh=;!z4ovB-_QU z>#;%yEK+RV#hWb%|EJ@UZcm@e$KUoS;mx_`O!3GpXcY~dKQ8rXv!VPLP3lTofUw6@ zW$QW~5#yo|UB1o)>SxNBwK0YruHw#{d9G&0r7tuV)<4`Fu&$a$$O67Q7Cyl|ba+p= zW$?BuXrn6nq{CH#_)XrEA@mxK`&J~`B(6>LyAIB0fXYk{PqCMzY>>Z+wosV$! z5~*IUGf~VT4P5gScYNyANsjv!^YK9Ok@`j+@TV5_;dNdrg>d6&gw?Aw zh;}l8yFB*VG4EM__>Y3I`K`n*i~0eXXDVmE2AT$b)~_YHYLP`$Lh2W)wv^_6hl%^) z13yhrY>tB#toLA7){thZ>~?hN%Qdbj~_M=J_O5vAH`bm)&cv)YPxssYHU@^g)L zq3tZe2Pb<^%|0$5x1Q!) zl}zt_XP%WXBsikhAR$3<_xyEWW+n@*Pzt6#q8(%9YNsOuFWth zlk!o#EypBy>KSWrN=Iplr%B%e8aYrc38jDdoNCX(W&Qam=gIlfL{1a z`tSsLcz5eLljI%~IX>n4-B+U$O8&>1 z4&qkZ7f=0@Yu48vXpYsplg27FTjr_+g^Vfxpir-O(M-oFVVG50;Bwq)c>c;!;EO^T zF%4bB$F#K;vzj_r)-(GCZ2dZc>@SQDm1nkFwj$mJZ@AGj8c@7)hFtD0EnAx^ltg!i z9d)>{&GrVD#7`1E3h{6;EEpQuaR0=aUl{n({c0L-cXFpr`!j79lj@7!TsCBNt2ZQC9XcP_EP%%G@YXLGBWTFgq)C9X zaEg&koKl7SihS<+`a$dBkcwK|Z3ZFD>6)#eXF?^5RUP}2n(CBXeIf0cG@6bZ+dk88 zuOtp|WsK#BGcGeKMG@u;Mao!vhhr*aIRRXd6I4tZ)v~|!@K|-D@i@_6v2|Jsm?B*A z5kl7V-&0*kgzU?=_zes;7-Slz4J4<@EeXm~tJ*0*I|8qw%JhzUOfa!J%!=*c1WTZ2KP|8As z8gLQUBfliu(Aqd9G(WE(d69w5_jiu?`+LadcRdBEtSE3%VuvA-mj~!nP$s+1$>;d4 zB&3rN_wzuOQ#`a9d#P~WzWL6t=QbJOQJ%1{*~U0N{8p0ml{Dc(fAi&S&5VOv;bbjl z9vm0EyI4*(mv3JiGeRG3`wyt+n)D9Pym&I&_K&GI%c|dFFg>~1Wt8$cK5&oV>3QFs zr}UC|@=RY;Ro?^>dYazreo|237mvtblw?4&nKAT@X{2YsTJcog7Sn2KW;A@!c_y!s z@MJ9;61uQdEnI7Lw{NBvez@pqwD}=Ht(MQsz0R5h{CYODzWS-r?yo?HBL&rd4b8aB zJVu6lfpU3yS5b9!Ur6Ge#1->YUypI_hJ`&)DST^?34W>h_HDpLtBD17F7(ZYOMz7U zAcMflqc{2aU!FS}OYGr2P<@*%tMTAVaj~?AqsW)IE6z7((N^Hy2ZOV1QyY)?UR72g z@*aPaw<#66kdC!-BY&d(_-#&nW~6HNAT|~K)UTPWnNEZ&*%)VZcz#+~7?TU>zu@z) zTn+2@SjcIG2KnOfM|;#!2E&DS12LuU`|hlBN}z?HSnR7|&ZcJ^kO4Uc4fCe9mRiz* za%&EJ>Sv^cvxwVfRKnZi)NNp8`BnQSnd+7eVoG1SyV%0arSH{N7l*S9w4B8CdcWb| z*0Fvj+38c1J0CFb@pE}5bW5GR_x;8cAPzj;8Zvr)x3Sve@u6}@?Fi)ia`3~S!VUnt z04o>poBOVjrvy!(yoeD(BX3#0FZ1TOGQ8jRnOfKoY;@Q(qqhn(G@2KZ;Wd3_Uvaf^ zjI0<_4owcYyc}x82?0(8p)=deq<{HJESevYniy!$yz!C{b)KHP!JviiE? z`;M2%k6%nbcNd!>xsii_=2*&!L&@E($i`hpDP{ZH$20!Bt8-T@K}F6|O^=VD^9%g) zije^2)M)Fay+-B(&D`UTGMa(s9=BFtdbj7|0Pb`; zuvvZDQsX&t#m#e;pl6~i)c``8j>~&qV&(BYuYS1gaJGhbJKDVyvhhSt0ohr za2&1bF!JEwfg{nbbNu38pKr3@8`jXSzTRmc*TyR~AF+T<`rfWpoz)Vq6Ua#=ESR}n zsHPv1OHpmy-z)T1c7VD;r22vQeOnnF7m;mSWGIS;p1FCNl*LyFE5Rf#PMaP%ZpoWU zo^9e6$>z|{h&mxysZ0w^=3;wp$^4XV=3xgq95w%)t$pdq#ELv*zovwD^F85%gHMgM zsk#GScATc`YVNlMZ@r(&KSh=_yJggI($WOEeSMQ%Z%e#gcc=U(ZM7eLlPEnpw|c|Xqy=)KeQWq|P=?>i%1VWMHX({8 zWbJN~Onp8LaTIogi0C7A*I5n2XT(&$$ci}x(>s3Q3hx14hTBubySwb&IVM5N_*a@Z{>l&iS*mT#0Y9zH zrgND8FP-f_n*t4M7`;^w{9=Pq(gCbN%SJP*k_#$auERCXb| zvtdW!4;Plv1W18%{OSIl_QG$`dEKG@ksU8#XY3E{;>m>$%4kr$nObu3{ZK<<&3u#j1bQTY6ce7Y2_tfmPqMXm@{gI45jT zg50U@02<`{|5cRxE0MYT+e3SseA(SRkXB3)#wEolw=W2cJs)I=-rM6xY>UEK-}taO z?fy7{D$;}QLvl)5i=MMVU2dn&Nqj7&6@SJ3=GVA5t)$`vp{GeS|D&S=rO@sD zw{|U4dfbm2gBErt4#!v4?Ux0}Mnd*I!sNGiMlw4)+#D8-u^uR-8f{8eV18J}0&Ou+ ztR*X+nohi?kJ_Ykr4DljDKm}~#HW>X`^540RE3*t?qHOV0c-mSKg{l{&@X=wcWWfR zp1dnme8#eHNHG2In{-Syh+LU2oNLJ3WY8U(w&r+yam;c!hOkF=_Fyh#}F>{QnPQZyDBB(?*S!wiJg_BzRh+xO=ezMT)x>rKp<4WruDtW2gdV1Qs1-B7|^t1erh$b|3F4J zfkL|@RzIQ1MqUNWCk2)?PkTK*{RjLuzw|?#8>S_yXr#c}dkUj0bw=|os+FeYK)mKnGb2y%pV?F96 zZ6ebzl4iwl>H0465I1SaP}tecK%vhskK?G*gfAMVX$32~yQ@17KlFH=7yJ%N4z`F= z?O$B1U07Nj-o1Xu4iNJd0O zBkyHW-}*+9yVYEZU@3y9LL&0lj=PS0BMUTi#tS@{XL`gu5Jb(WAe2ADZCi_T78HB(zO0YP19(oD}!dRz9}Aw z9EENHrTmjQn-d`DJZjVovHKA%bEKkCMJY3Jqm`Np+W(8<+I+P$5(Ar-ki5N@kxf& z8CT9m3f^IKwN6iSP6aanAcheIKT+~>5drCD&Mc?a6;!GpD0w&!*P&RX^#b9vIT=Ox zI;kKly`dG~ie$pb9L zj@N3#KSyi<|0+lSbK_oT8D!FsCJ)FF*gE>H#Bc*4dpiGj5as*Th{_MKs+8`4_&%#g z&f{$I)8f{Fc#u!w5tuu}8V4n^v!|wK0psc(LnY7(dan;O+Ph>)^=8AT~Cm*k%Wyyue*QmVxP0@|15(gjq z_`O+oc)MF^iyP|miam(H0E$fe1Vap4XiCl<{Gfd{R%%Hz)xT6pzOG4qd%fZa+zi>S z-vyhP)#na>G)HPGpr(>v>b&D(xMdX9>_~ z^a+zU=RKNhH1ftk#1-w+Uey9SKMvcic2fbj?M%<_X7?aX4g`jD>ekk;LH3o%ie17B zVuy56uYtdeyoJOu)q-qSAE;&D!fv2C$=l4HXu1ihq zm+7-Vc~g~t)UzofzPcc%-Ly2<+@$MLR(|4RlP9=g0>6G>Q^7gI!j*9e)6!DaOF%wS zD37;=h9%$fo;hk@f1b{8!<$`@hwdYa?9k+=`MvuMJag!`@6^(o_@><#H9xy!G|Lh- z)2YgTYae)*yeH0PA%ExZp?eHh$xlus1r|Ux2ju=t50o&<`gdd{i3+?oAmA(s$G;Ml zMgA{?;_Jzy0%I9ZM(Er)O&|%!UA}G zN0KPR2b{zbKDD3Mp~kFs#^_w(bv1I_X7wD4+D-TY&y*Q&N@7pQ#Pfs86*|Z zi!&Mh2CXD6bo3%tW+Eah-2wfGsX>o3;@u!UUGsU6UW(~)98c`)O?Px(^6cPt+(-$C zp_W~oVN#`-X)O<@BI(OE_wr%gw8hQ6bXG~VsV}T{A2}v4d8_k;=?#^S!DGGpwj){_ z>)xSSNxrhQr&Bi8G0FFa(fc7O3W^Gn)o@_FIoPLjZZvOQ|I>Ij&(ZPJl`Mqbk4A)P3GdjBdb6;9CRsQkKDB_Fx|b zl;SVItk(G{#*34KN$M1N1)-6`q<6LcDU?IqC?dg2^)&j!wP^_g4+}z~x(~)LfRM1> znU?!{A)tf=Mr8?JoV4N{+KdnqOE?&{n9u8^{RkZHn{s11EdyaV(tKq!^^nIg+zXI5 zH;Rx6OS46t2x7nWML?~P<5e4!7b@MvKIA_QD5003k68dbmA6=OMTlN*jR1KyA;0JgO%EN(Vuqnl z`c8zoxn?n`Mg<{h_+ZJN2DsGG=z79N!8^TTZ3(^13F=%-)Ko@2h@6{EUkH$DKt58t zXX1zac}$X`qBALGO|43fbGig?I3{L{ky;LrKZ2+7aA(P!rLFed?|t5L(i6@B;czLJ z9K)*twS4;Tj&DT;Wh_&j!)xNEn?94GddT)Ljwq>|MTCF;Wa4g1{}gfhGkNaA#xv9N zb-by!^nyx%X~=fF#YzTZGUEJ52Kbp?71#Tyht=s|f+aH(Ye<4E72-hISJe7m@wq^L zoPYkuN$etVb6vp!X0m-wzkdSVl+m!XJ-Gy;J2 zq_@?Af~up{Ch*ti2h&899!gzHZf|-AK-PoL{^1s6Ji%U_WxP-=k$NIEzsV60Xa+90 z?guT4^sfba{!|3bQ>nG#n!&OSccy<+dPFQfdt#BNKJv7&sU^t_2BwcHJM`C}h#=x8 zg8c&kYc49mT=oRj4Uo8}mKGWItK73Iovt#3j(+u>%*%xVnOtc~*(Zw(Bvb76=W1d@v zpNKWGKQL2xZhd!L?LWR-9V_K|=n2et%9l8?{T=U-@-bJNAXK zLuL`k;q}1#RrfV$@RZ7I9FH8o6lksSZu15O+g91Jfw@mX$S-Edln}bo^+)W!<2t8w zq5&dV-NmXG_9fiGLgxP50PlQn5{p}F>64Z2?<5=~6IB*73fC}-S-kibH2Lq;X4uaZ z_N5UgguUIe887T#NcQJn<+f-d?{s79$~^$sz!XH~HPW(vw0W5F!S;hzW>bi;`C0-2 zQQ#e#vPn;$U<#i-9TjvULGc zQJTTdvl)(VvMjK}6}7Z>A$uK<7p`C4}Y>x!?UrCNBApL++g0Wj zTpfj#y}T=5d4=a^iJ~SgM@Bj=?OE+|XYq%A@#2w3H@(eqF-@dd?L$$}-s&tfC&w@H zaTG$O^0Zq$4XO`kAMQkIvw)wL0xfhJO=t|QpgP%a@T0wBWN{dL!59W;X8mWM4iiRd z+XKeT*VP@Xo0!N8lcKr~UJRtGHGL=)`JgNkNr8nYQS39)4LA7?pmpYCtlpSezR75X9CKA1(C#DG?TVNiQv{u^mjla2b_YOg#LZy>MAsrT){ z>5!`FhOupQ<(k7E0I4efnt(IrvgK!9E)mY_{;nqZEYd`Gv(=6#T=Fd58yCC(IF=P` zs{Hh{c7Zo>|0l;V`R1=OqO&AZ2?%y)#c1t?^Bf${qjUhXp$8JshcN%z9T4i|DX8yTBkmxXzW6;fz7Ms;6<$3t72u|+)a zy0WV&L5#g#+ooG!@9ScDnS<6#?8fMFDE#6}jEYX=S*fQPtU%qt)wQ3X(!oGMzWXoe zVZ6ebUSZiAjTD*$nJ13CSb6fR7-V<=MH~oF0J2`0uAhFMJE28ok5ZDJr4r#VzSOw> zoo^y@?AvMIVH?u0a@8L;+g$Ydq<3sy9%yd}d1w?r& z(}>Um3n>=6PR4A7ec(ZVux))kOw?Y(*lc=Zj+WLc_82tLn;US}Whw(pxzN~b(q4w_ z7RKaW_Ye8rVv5XUh1@|Cuh;o&PxRa5x?S-ekB*;ELJpU)+O9gwhN&(VLn` zMG|k8?1o;oDd1NcamJ7b-(VH2ZItrBR`&huL_XxL{Div6kd`uAsbc68Y#k!to z1CQ?Au*)%3Dkqes#E@X%VnY`#y{j!i#^r62B5CN1+^r}#AH4i_&GjAZa#XtXcrjbD zvP)pU(pxR-;r{gpmuq}6CUCjCh1WDkzHonCY&$}7UyG#a`WgKDgLa1h*+BPM>&!~t zS@ua2X@Pel=Zr1$LWBB*>yF>_>`JLgXmiNgLu>9KUOA`f66)9SGbKxwOIFJAx?ocxbY%iz~ z|M&qlsdF76T=$s6`r%xKO2?}%(E>6Gayptr5o@pp>-HOc7owc-=2*;$UMP6Px_lcB zsnL$dNj_9hO_S6dSY%T7k&3(M*QxOoW{*~eG?~b= zM0z-4p!!MzNwT$6*Y_jByf(MStFN zCHL!gc)ozW?aaaK3cJo~=@sQ21D>aSU8R44=ZE6Mn`&=Q76?KbM{M5+A)0L7vgy{t^Sdse<-r8|b^>p3%6g)$@Xx6Y(niPl}kGXL1^s%>%_f|CyNO%ZB z45ew{CzfR?E~-QlrE$k*<9~A8k_e*954Ac&oW?MHhi-jvk2V?5+`Dvm%f%L$AD$+6 zP!ZHw6^#84Ob@F5x}^$+F=x$2I?C!^ zC~kEV531|Tu-ewG)YqK=Jp;|)=Wj}1DCK@BMMdx~S$*;UB_s{M!zbYInbr!F3O3mv z!firmWq`zJ4(CN#=359Dox;?LN?2sv*9eAgCv}AH=4Upo5{Nfw__?tcrIkD7lHY3f zJ__Q`!F*6)6TH4HjTJq`i~ZJ`^Tzf$-uC=3}y)pfOg{=CJPLtJbvhY$NWelx$tMC?GZu!e;ry%2-k3;Ph-WECUMV>7tM|Pi;>0A)e?1eCHil*ETW5f8}`RiGk zxDCw)=fZRx24q@^)pT?@PZMq^9bD*cam8L<{IH_sr;sO(=u-(1PjOhqRr69ZS@_xP zCQp8%h%U-W0Uo*AmC-CJDw7zR3`h|*-XHCV6bGU+Zf+xSyPf0y^&c>qyGU!s!L#06 zK5P7f8FnnCypA4|Su_$(+cyOY2La6^_OT9*@1)BGBVgfH3CV=-#MlAKt`X9|9z$NA zY@t7L{dl#d01UN_p2V-~5bC4!Dc&09P*DFD$Y%B13+GDEqFWz`xOxr9d_=lZsl8Vf_L`6R(nu3m{^5Y_oq(%ZUlZhM*CY-S2&ztyrPlA=79}Si6 zR$aEf;g__$QG^_?RbQ;_{smX1O{8{&1qCy}^z>%*}Tn<+F@*elqcv zoOY_xP5>C3kOLzQ=`R*=ZR~ohsR)fEP6+sZR}WfvFQ>fDvY=5i8`h_5 z6~7bTZQ8EL+ptL6r)#NxC;;XwMjp+NsQvyZV_Sl=@t^m3)Btm%*;@HmNn#5CKrOm~ zN$j-rvylkOfbTsWSM`O(Fxc88p8;xiw>mGdM-XroJz_HXbI~e7pBxgy8(uDvGymCBt;Qe2g9QuqNqjoUU>`~Mp{(rb94+c3A;OysJj|T<%mX7 zu^k6$Hrfam5%($2*qvulMz|ET;%41P*`dJk&!>02XVdGu$~o z=|ope#S=?%!)7wYb6Nis4eLqpZvuT=;GB5jj;lD8b!92IT`ZG;I;L3+=kD8>AZ_bWG~&2#azaLJ=K0RiVR z(Vwptg2d0m-&{;+2h9PJK?hq{8|rr@XAX8-%HM_7-(&XT! zmPfl`=V*}>6Aq7pw%8e&{Xw~Ps399c=8|T{En+JF*egeiw?2Btq@G{~_&$6oJ1KZo zV5*swRh40A(!-%Z@Gk?0HJBP^Sc{@G@(5)8`ELfvl5~R()|Hznlym57=Re0t4BYD= ztZ30`vt4DOa3>9M%j(&AjP{jA&Ns*E58ocR$K{q4hC7Uco;|{v0k;wRL0LY$sL`wk z>1B+{z2o!VW2LTQ*t^rO&Ro#m@xwoO4Cw_GiY9Ybp2@j+P;}R|^T&`KgD=cHR)6{4 zlHZ@rQgVD7=WA0()u;LG=4?auxcGF!e_bF0JlEXgb3Io4N-Se?*9qCf^ShwPpL&wA zDSu1slFk1(xZ)eIcuUJL9NnGjPB7N)%ID~D^ zJ)S8uw`0=63}5xX0EoQ-m&%(~`KG<8R2IYDnV{tUOw1XB!nBBSw9%#Kz=O z?}Vs&^WI(x5@YWMkH}@Vw?=lyhNv=#qiZ$ly#i{oTy}5Re_QiM;AI_N+dn-#`K$Z6 ze3x_PQc`+EdFuDQA^Lzvn*Ep!i$YWV6WSCaIM7l2`0(Dg`$5*ATWbrB5z^7~G&zhv zzNf8W6+?MKVl9oUHQ^%Dm|5#^*nSyFxDjO1cl89k_c_u$Tx7xQ>JKs9g+@c+CAnYq z=RRO8`5u%c%mT0)-53nBFt&Bqr%pXDK%G{}+J9z6+aT ztfuZ=^a|>pDyyUnMAdZ~~%=+DA=Y6wGk>#LR>uHW}%kRP67mq;CJhZOm=* z%wl6wilV-FKpHk%1+>X41?ZHKgjcQx)P<<|v>zK5n|TV?Azc03TwK@}Tx$NXa~^Xa zdMn4z)lTMK%!8+~ML_y7p>Au{Edx6;t{+%pX?a1ZfA?UrBXJFA+S+fGQ(fm#Z~4Ut zndE<{8ec|&OH66|xoB zxROWciIriQG~$^-H^4D;a{uPUA)n%qv>6EjwEUg_E7cvB=G4}5bhXxA>im{}jq*NA zrXFT6|1Sh)${;=0qa8YOJ73wz$Mi3T$x>8YtQFA#`4@|3jVIIi;-UB-VGWYU{IBmh z5=v{bpAR6Y5TCf)Nu!^pOAW=2mhk^;-2)-q$?&)Q+`O7qbd6oaC~0*rL~DNL!v&rq zq~YZ%zm!2{%q|7^9{I%b(({R_Lw^|A(Dnmvq5hTU0v!N|~wzTLz> z{UOzMILz0gGHcsuZ?mapp5OwmDmzaT_Z`kpxl4}+${3@&ZYasCtTx%) z0{KUxIX9#{;GGSdI@Le8^{GN+#fD6?2LSR56`x{!uKhJ&0(F$k$@YE2+OZ{KN%=vm z6A&Do)^EI82LjlY-w$Q@Yu7W)R?;+e8h2^C%b(y#!1IDutP4lp^GLkS=f4Znc#-{+ zuqz-{!EWUJ;xNWm-9$!2L{xOYvt41WqN(YB8txG;z&l;U!YiH(tsm$F;IPzYdUXEI8A&fG?CJ&Uv12U5-$M9?9#g6usP*5$UflO$PZaQs47< z;kR)QHPUMn-I_WdS2)1Di2yDveRaBQf5t|2bHn#4>z*Vlr?P9*cH?V)XcTp%;nH?A z2gqwzzayU0gjgAr7uPI&3LSw8n=8|aroJG;OY8>>Za%}l!~kYa&=BVs^98Wt zARsfI4-Wq=FYwffcm8FBaT$%Xc*X5N$eDC;dkC7OUO8=4Yghdh&17u@MPRG)(6$`i zxvS{&Btlr1+z-;xiZ_>L-s!V1R~Tj{|6U_EIlm!Z%^H}5s{ zEgXvA3K6}v(Tw0#H5Ja1)3&O*!|&KxFoG4IGx{MR(t=*a&PRXp6Jevj{yKTIo^1VHL1{)|s`C(-8 zaP7Y{b}QtRRfF4gjwK8IJjZ|L>2>kTw~%u0`8{p|5l>HNNe74kIbYr2;S7${R8 zFoNLMzQ-271NuZCzhV!zFA=?pgN<*Fc66V5_IYn`@iP2no(Np@VvjiDd0rT>oA=VZ zPCf2q`IuCTJu6CbdDm)gG_)fqD}if;IDuwqn`GhP)}G_(TaC}D8B=W@=&L(@1eqYI zt?v`-ZRBFk5f_UxR;OHuSl-2zShjkjGrDT2Q9yc=K%iTT>FETK8->~5ej~FBqdwD- zz0cQ2(H&g~R%Id!pCHvdi#0<9Q8{Wm)77j;p}urXiSEe<5^hs6ySqMWM%@dtuO2p? zc=1tr6u$@D6#F_Jx2*NYDbh?*r1PO*D0*MG{Vcp4nIDam!C;Ft0?Suk=Z*9gut+uA zIg9C30t=9uvSQ$kfS?1a-KP}$1WT6xbbiLsI{G*(>rp$x-^puA!64gtLIv2(OZT8F zPJ|fRI%aWfSmWxLC1IONz~_#i772- zOSmx1L_QGoWuboxtH?4J)sH^DW$CZ4mFIA}R)T%_Kn||3w6Zlfe5-}`gwbfz7A*1m zJwaizQeV?q<+&eix{<((ynco5?#N_()$fqnxu*>cr$x6HY(o8=CgX->X=ftohGWYc zS@E}SLbnmAV29tV(AcA<&E24L0V9Q@SzrY|@OZh51-rWfgi9h~M?6FIqp=B#y31Ou z<%xsbXW)T;cn||OZwB7AiL+C+_XdWm2t~NJR=JNDRrQIWhDAr4jO25YjF$>|MCe3k zC-58?^5YLix0?3A*L2!!hzeLTvBegGVL$)6?FEj58>u;&$wRcUd$!Oli)10e?>Y{H zHqkE42KTD3t1sNHwVx~VH}q#KMo%sEa2~Zfh_$?$S8gy_Em{*m=o46qsk~?;T3Xvt z0LDGrG_e@3U(|2Qsh5-@oH^f*0*B2vaas_1uy+)HitgVNH>*DJe_RMpS2@tBeWo37 zc!R{*UK*)xEylf&RdJn3?z)1pDNucQ9=~ro}U^pBNw7$-gNK{gm|VrrC3{$%DXj@b1WML2Te zh*2}FvdTU>>2WIHV}hD%qDANW9*Eawl+z+G-+Bdiz{Ar>zA~k>^6~PpO;#Xlt}>Xi z>HHa^Rl_E_r-v@M)j~{|rk>Z(xdwQ7JUx-s1jzO-GKxhV-KU?fT9O%kC!op9%zvJf zWXK5sE?J-Efp!zLW*)&;Q#BJWDwzBCIZsiD9s)f5-_s`Zi&eqJ52m%iLM6qUQj8q>y@Ff zSE2V#_&L8on{OkTl4Gl1no1%V;5zahgqP91Pis4@M{=Trr9L#zEN6g~!$37VLaEl1 zKC<+w%mb@JqHL5`o#XT^DcoHYwS62r`LTLT-`?~b}dChsNP4V%^X!4;uHGhUqYPg=IHMgLnX5h3it zKz2j#pDgtlP;`e`@9|v@!(J<3T5^ATCzZ%Us!WcGjOq#0vm2n?@>W>=3d#@kI+6B6 zKpoNrnY>Ooxtxr!J)Bg*rPsO1N8m&Z(YY`C;I4rIl^hmIJOk$^0Uvx5(lqvOvJkiO z832vLV-rHkHNSX1iTcwil)f7f@3oeH2mgC#?_@O8n`ccwRkhU5wrZQU1)P5u#}1j` z*VvF*gMVHFUs-#fEU5!nkErN{XP}K~guZ_PMtLbMrExRATiB@tL$Wo7#eVd_>d(WQ z+on3qwOGFuU50;w}Ab}*SRVeP<^+hHxM^6 z`?&fV%x<71vid#n^MZ3Ya4Rf$jhsIClAl;LB!*r>O8S=q&`i1kFBH8v@s{f$Zf_o2 z*omb)_I7!VG%OTUpI2nfdVyA&$y?a9XhuRRxCvePkAiku4Y&h93Rh4XexvVKC40}? zS_)T6{{RShj99{aN(!~TPX2l>0pcA7b+6&PpLVZGK6T(-l?dH#5q%9>?R{5X)-#(W z{_s5Shtat&q6=WO2hkzETQhXNtUdz&W^D5Od<9jn2>mm2PqTSp%i3<*xV-brv}V`v zq6<*(smZzAw136)7r+c}7lV}wOIUfIK)_Af1{LkvzuTKjk$pg)wYRpOoxljyr1!}H zO?}xXV2f*6SB#iba8EaHi5S?^+U#FEZupsJFC0@?tPbzaRZRQQ;KkMOV3kup!PgBB zQ^irq>_JyWLk=yxW0b}^!h^#GLS^S_H?3c~O)|(Xk%>xQELsu`4?d)*oL_mjoLx?N!UOVTrawyrZP_S}J63n&^!aVVM> zN9BUc?NJd3ZJe8F``LpR;6Nw-1)C(y%ZutuK`YasgvZCGs37%k@ZEBn>u7tQ&fIm^ z7&ZNHf_Oc%#zvP(&0_Q}`0`TKU)I;jvzJ{R&W@cyek@*TEoT%yYY>6)Ti8-hGua9hlwx$GOCti7wijDb8rMU}O)np?Bk@6Co*~(VX zj-Xk4%_=_J4(F)yAj+I=;pJb4FJT=l5S4wIBk~Wc#f+WOh?d?-!OQ69K#TPeZXzHK4DLIxNCB6X&<$l{q>~F5J}9c?trN5BP;JLt}{! ztsPNM0k4CgOA9j8@isO{j~iJ15eBqlCIYj-2E3 z=vC;XV;wKjuhrS(w>+x5vVeYc?|S+$XtKF~t~~DiEa>uMAsQ{`_Jx}k!w7{?QE*n< znHoV)LI`~JD7mglM!82B&?9GNw6mMx@K+xBRn6fzTrKmB%TBkV0d}eQkItoqrmH}X za$&+Op`1i#_>AKNN*jrPG@CUjI=z<}xgq>Whl{*CD|jDo)zZ!FB7w{W0dxMehujjs zLQ*xCa3+AAjw|ckeL4Zj|Co3ginZYk7g%~+$c)G&|O56 zj~NUx_I)-A?)^O(GF@dhD;!$^hMJ7(*EfYkt+_|AsnFf6 zBg9!Bnq}-&rI$2gF-Ct`OjW%oG_ZnJ9jLe>Ur~Oe{gv#$(ZPfBG*)xfrO{TinQNn! zmLNQ;u382I7A2fc{PwciBmWki^hk@w(PEQ3WrGr;0>>4Ri3w(xVP&HP;$HaPO`g@< zI5XaK4cBvf=Ke1n1ECZJW_Zu+jG1;LcfUcj&x9g1~Fb#Uv`TdMY1F`Z%bzAzfJDTlPh5)!RM zw5&U1RIt8{crW!Oo>)p`x|^^SPMJ5PfT_@1F5f?0)suX*XR1R4f3VvH;dL6IPo};T zaMobnpISoVsHjXGA%Oa({*hJZI5rii@a&|}+UZ%5p8wv_CV|!5yY0o$X(;^DJYa>9 zh#|Dcc3X0;#e=Km-i~92d0SsrwvtWSF9ZQq4`;V}R3+Yye6aGM*Y_#_o6wT&LfjDd z!+M-7n~}c^lL%meBuNBOjxE0FO*i^?tJ3a1m>vhYP|Zx#Kn{c0)qoc*z5-(Ig;*Mv z4ju2kPROX8K#U?g5F~S0$iEvvv+5;x7QN1&iF_1Bi9mKLH7`!BFCkj*Z{VvatPk!d zclZO`V|nDrL0i}nb*+UYiS+azihTLAHLz-vlk_#*YzZ)kP`ZY9Fv&xaur(1w|UV^C_N1w;}qGx`&FJJy#AWpc47)8v`$B_f0`lgXDU!7a^;R zE8Cgt+xN7 z#;)UIg-0cmnwKi9RH;k*>2*De;VHE6_`*|@iK+?Hs4eF@cMDkTU&FqKM=y8`g3JKg z&++W!XpMxCN07zE9BF1E*EpPL-%CS|V&%wD3{C#3NyT8#} z`_kA?>e>oAEuSweK>y{j7mylf>D(WT@!8Bj=-jyC!1!1?VU*D^smEccwt#h?uL5nj zEWOG!xYRhL0%MLO4GegyuDFU__K%+YFx6u+vBH)b^uL_=p+5ToG|c^`=LeplmkWNE zgHc^~LsmluI}vDw1tv$D<|sNbKRYe9IT|FH584QV6KwLn7pP}|)#H78eO*$b?{ae7 z0K`$SaHfr3nYRcgLyS2s_6!;v8QY= zQT*-kPZ12S6z3yD-#BALrK6Q1Dpy?kMA~DD3i~;%5gx+PxIKx9@f>b{oeAG!4+>%T ziH|aT(qD-HwzBkpp|kt!!6LC7yn0hFvNLHR0E@@O2YI5sk6M1YQl&!iYuyiHaj8Dz z6>A!qTLmzXjdCI93KP#6Un9u#T;Jg3P&j&n-6konI;~|YYu9}<0ggxj^ch=;gP}K&D22< z9OLknT+h+?WYvRDQyP1m$XMQVhw)t!235jaklh?wx-|0QrqLh!rADI%es?l-Ldxci+bZU$R zM<5fxp?Io1`+eWQbz5&$?FY=WCoYWSWk}DLWW}8^XQE{qARBj|>9Qdn?e$7;(~%iS z!Ckhb_>bhq;+m&YmFo~GrPN3fqepScJ1JJ&We^#Awv%j$~{@QaoP1Z zobyud9v*6FlYkEn8wBo3%+F{3*G^}kGRK%?3qXfQ|EjDr@=CFl{QpDtG()}akxs~Q zvRVN#LKP0)ul?`6TqEvRe5H>$Y1TMQSnsElDgB4E3s*U8-hMlni!!sWc$Yh-&%!o} z(E|~bXFuwMLh($NlO0%T$xo0d!);L)32z??O)zfv zvgdJPT1w1%G){a!CP)qawHGXUT+|(}wH_@#_(Jlj8Vu?&5)MTAEd9kHS+;n+RVD&{IT)~z%i!)x zjya8}w1xrJ&KdF|hCvoh z9n)gfBe;D9zquV4$GXvgpPb1m4t2uE!=odpxG{SY+(CWpIDacEO&|TlNjP9A z&J#blULb0&=J@U3yt;JkTqa+L=Gc;PH(kmX-w|`dp2@r@wN*deA57u=$`Kr`8@#={ zaD*FdLI!yBZ@m)Wei9z)aO~y4c?XQXO7CBPeKsq_c~STlidHxag9Dq)kd2p?{E9Eb zUpu0HCEZfsKkRyjz9rsY4xKmkMAdWYZ7=y*j&}ozALaD_$$#CH<=i!!jV{%%_ipvc&)0`(KIZ|Jr{H z(f@vrv&<$s<}2!FLKgd`?F`Y?x>4}JnV%$UX~<&;!~A(EPldd?DSmgPAV zP5q{j$ez?_82RyGEWyTCVfTvueBoX~mZOJZb&f!G_{7XAt}P_e}PObyB|J)Cd6Stz|)KT$m$) z);g?~1=11j=n6@S3Ug9J&w|uK;BdEowyhuJ?57W4L!5rz74lW(Ig!`lsAcrcfIPV z2+Z2;5kGljzLLoR2jfAfYjm+;tiIEwyAu}3^WU7&96vUm>EryFROZ7eM%s#){W?Yz zKHV$dAC*>)mq_AKT#ihS@+B~}3G<-6a9NV@XL>pNmVN*W9L%)I;JpkPWN|ZR?3$)) zYR;kesB~6WtNW;yWqdHiIv_mOHe!7&PzLhJd%UZCe@0g@vJ@(Vv9Q;5wc}LwVRO1} zHfh31fK~JM_#67b{%wG~b503KvX)C%c6l^T$9}@kyny6&IhB(C>T?vhvq>6f%R=I7 zeKUMlqfCa17`(!RC}*hk#lDd#>Fp4)Q2qF*n~7}TJfiLxu2ogrxV`)RthmVed>uUr zM+u$>j$yDXUFVtY3y!Bi`k%*H{b{7h$TjS4&t8y-jN!k7B#(Q{5%-pq$a1nN+`KAX zlI65SPjmyBzImK&lyulS@!Tj7?t{lZtfr#mztZnHS?>qk_cFRkZ8P!1)q4BxdiIY* zV5jK4%vZHdXI#(oHxn#g!Q;rTc%GB%(e>dT;Da4OChOQkPCk`XNir?QYx#fO6v<4- zJ)0)nD8*2-^0_X&RMnnUWad>7G;QrhQ}b%{cwTV~EkEf{?(e04hdIxOb*|_6tnxfk)rUBv6tXQz3|ZU;W8d|{jmsYGFbrcnG)cqimp+(oV=auR_;ZZ@ z6396}+(cX6dlm9h%(ZRw9>J3U1VmWaf8HF1)-rETKSCb%+TmdXF+>*c_zp0rmLR`x zmmUBZRstmhs}0`IiS4dt`rh(BD+OTpIaf1`ob)GXk~^@cOP7<`N0oIqO((||zPFQ; zLpZDxABdJr%hTX3^Rty4w%pv8Ga)wL-@=0$0d`9W@lNny6a12JKG8hXhd9_+?05Nq zaNFUN3A^jbojS>~H<84r2l<~I?Y5p}X9`mdMkU1$hL(X(P(um zyJ{WVJ|x`r%Wu&3!wnp2n6d%1kbo?s^bexEsp0~TYA5sY77n|)+=idxNxuB#$E##Q zl1j8;-cz%cQwip)86jfAK*sf=-V4QyZ&SI7L%S{r^z4gea>-A(Fl0>9> zicDS7qKWm84qP2by`;6Ct@tEX{^<0idaU_LQM3(csOoj{awM}OTYvJoO3~MkS0!K% zShfu-6TZ9kJL#wnbbO73#Gi;lcF^{SZn}Uc>hbxgdAyO4_QS9HGt2y@d7{e2b9v^ESGdArxEkl2wV|@O@JWc#vlTJ|1=Ok3S zd-C)g>}Zam7EkcHFke}2R6K~R9@BjQhni38b5=s`v_VcTgFLGcQjb>q@yVt(7yWOT zoQIYAH_;LXD@D{FT<3grg*iG#B8hUg18fs^mAsobvQvu*aRx<=#n`EqpFMtYsg_8W zi>p?ASEv3VUng43HvBMFpzj^4q4~2e;Zwb0*zWvoVV>rnQKjjHgp`P%T;yh|>T1$dQ;~0Y6aNh7i;gd7XjEK$))g;EGAx$Vid`N%{fTEgz%kv0zM{MA*kXd5ZHd;vh^*At6y|RYE`b(SZ@6j_p^`X}XCZu> z`|J9;>3K9zrthH7XKfn-Z8qaH){9(w`pVBY1G(uV>hdGb8x9pgRU*I1wKHP^had0oF*tXm~NhEAnmn~2UugZ1fT9YZK`WL#LkrJU5W zQZb{cPvSYg@WX8IvJEg$r>!ro@i^z)*PNFTFsDpDNhrsnzWbZChT1 zdA$wGoHgt934YWSUXNe58Zw4hf{f0(LTMYYJiJWdBqj_(_e(Cwe(rrq;YA!wMEn6B zqB-4YgW_wR7z{V^MoV}^J^_9N3n%_8*wx71-ku?Vg4l^jc~LM<=?66NCAF@b8!rWm z!tUdAbF#RF4^j8uxdvTjy1lVD?ubPlgNU<3r-s)1@U?ci|qZ;$x6um?N7*_U-; zhmE@L^WIGZ;Z;n6|r|Z(VTCz{wzhJNj6HzVYzikKWcH z?^?%w!YE<^X^e-Px7tDtp^e{5=TIYlVO=`d;6J8cHy$=Laynk?+@}5b0rMA$TCpsh zps)PWcE=a4Nxi-|oTmc!$~XW3*k8hOxgJTTquID`aMi&r_V42zzLv-|Tv$|0J+u-*x!i*5cY^vRJ?{%?1xF(+)9yAw#mtw0Yh~l=iJe73J#X0L%QlKrcapJS zZ?qy8JBb;FQ;_^}IHdz_J?6RN>IiH{$Wzd;;nBfl+UmonjkH%jTxm)FA*Bj34)0O$jq$7KNGQ?ix<$ zHDG1B^MXAqo2$b;-IO~tY;yv@qHbV4aTc-*!ogOw`C7{xUPOxr$;MAjB5MveE3@tE z$%6MH;zW)ItkH27?RW2I*Z%J1_T%cfLtj*)cXu7R%|@KZKHe{X1c2Va-nWDt-?_GE6w2UGn7yP(T;9qsexA3+ z@M{NPOPX9o^e;|8Xg6e4fPXfRAdgpsD=Q5vM znw;%q5JYh_Nam{mk#Sl5`9v1Yi^=%;>v4(r-hy+2BzBGeHaC$%_Tg?%XNfjDT!#`6 zo(aCq>l>d3_dS-Wz(i?#V#2w^mQD>)?&ojinV8=aa}c=?G!;p13~U~*$xDL@U^us8 zR0mKYF^&T-dDf5^+@7KDtGCSh__3lDRAsiuT%&0ESGTGf(Z81O4mXMj+8@qQJ}VfL z`C`Wj?nVzzyZcep(F=(MAnTRcMw=O*D47!}rvWa&>P8mq=|scpb{9p+$~&wqB&oaoFaKy*H8IHA`=H$Rn8OGC6BY*m?g3wB zqt_O?_?Gm&OF~I!F%l*3G_Y-nSlkA_7ph36Qh^7b%VeH^AM*KqAqHB=_SX_*USTwq zX^@;cEeoF2M6^bOp}Y$+=Za~(&;Nzfs^JC@#hA=09VG^^E%zIAp``2%tzsQ>KOAy> zqtds^-B0OslmKnXWVXBYb*ji@!mBvgeA4@}wZY3DZ$dNG-Z&H7+rr1+&WUyZBC}74 z0>Lu1_of+OS4~@mhx5(aeo9Rw(R!Re)F?eduE*gl)(YeyyzQ(=kY+iJ^#!02C$X4S z%gIQ``fO3fTyaiU?OD$I+t{JE!NkVqXM_1(2xGWeDQxjDu>5!jdLt%Ee%C)LkMh?- zpWc+n=x~nprio>O*ZQI<=-bLW^*L1Nd9b>@9(S1M`p+EMraup9O!T45Jlw|*GyCIQ=+ZZn(_;A# z_iq^Uf2_Bd@2ti!JELYwfwFAfL&qIRIDlNjG(h}I4tVrPp4;gLGld5+z`U4M$47zI zxhZUC!1fBoU*OwA!Ph#|45%!9FGH0VH13P@fao#N8-O`v25kSKhX)}(ob6)heqR3K zF*9Su!dYu>?kK3s86#-`-1T_ArLoJ_gburMM#H>9^!<3SpNSc+0LDH}f?NY-jghS8 zeCsUc3@IxSy5E87nhyOB=EikyEu{5z{w{P`u}dgeBGqSo6A@I4M59-X1k5%QWovp~ zxfu5mLl?ED6+&Q?&+F0Cy|>)z11!&K{O4>J{(_@K<5y8uN zC8nem%r344=<1#NLUa|@oSpoB9x3#3L$d9`LpUVORA==L%fIN4#m2?))l;S<)pTPZ z-%UsHCG>cQ`}7lvL^XXj6nTW;xOZ=2v(-f8*Tk!We(y#Q0X1FKb=ktg96rQxwZhVf z&~3FTY^lNZy1(25d^KU(ZlC;8pa2RxQH81GJ3_)KY2&`*#C`G02u!?S4>(_|yw`Mz zSYqViF4D!A=6|Z4=U(!p_2av)eU?XH%inx3`Ls?jT4kO?`=X5#%318Kz-o>T{8ur0reMm-vJjL`3Mw^gEM)>7IHy7fL}X=Z?5S(kG`7__CBG1%1DW=0DYprluF=Lb5EDCZf!*VmjBgE1{zHwNta9qc7qnDG=ebeI! zWRSi1dt2n5%4M<>>}*i0`HF3=!xSi~P7IGA%dXV*Lj(hGl!zUIIPI%h>&3Ul&l?bv zn7!?2$uV3l0N9-8+g&>OF@etlFKN+HB<6UFVWY!`=NB47C*wTO@0OY8Z(7d(htlw& zS}l#korv*qb0F_~Mcw%f@wbIPso?^8lHhHCfhIj8Y+vVWLG8L|ifx61$qThy=RrgT zMzLYdxNO!s2Z6arU|r9;_G5XrMx+S_ui+5Gu`47eCY4izpv$Tgy~Cg zde67wAVCvHM#VOF)gr@_N)}BomeTtmQZlAKBswo5wQ6|yq4%`aDui>hLh!HLyp=m{ zgvIan%ojjX?!C;wN_V@ojS8ga+cQ9XVl!zHBYqvuZ1Igt=bWI)a@@Coi4yObay5n1 zR-&pD9X0*@N2NTgqFdL7EC{ZkA+Oz_Rg+iipKk|5p6cELA`^PI=WECVT;2uwKXDOy zh@-~Gwo@s$djEl<`+{CsJKdf8XY7b(HOiK6)o^~H+gy#GSuDB}?fARZ!1s^Y)CKs4^mrw;`VR1Jtq=V2_BIxtO@G~>U? zrKMHQdk8>x+HkV=l5ic(Pz^xhKmUUZPLX0Ew%dx{ttJ8a;1+kQVs;I->+=cGC#6Ft zUY&C;#R^0h`z0CT7MmsH?!~x@BJV=%(``ss$gxY>3n1HVt*P2E`G-`vSBw6_ALLA^ zJ8sZ)KlygaXad~h#*{M2{`%7^J>ih{clrCL`LBlrI(mNCdUIBiTJhbx z)$^<0u>i8{(v;-1Q1eivNjG(k%)p)AI7|Je(C!0Kn9<$T0IUj?8Bz`rVK5^Ugxb?) zAhDJ3n?6tYqH|3-XbD!J2f9EayC2fpw;@=^4LknPh&p|@j9{4rt{O*1#OKEzPx8wa z^n$+!c>w|&tq)&ovKyMVy6cbI*s1*r4$b-;6;l{!D1U0ICFGjY2FzF-+=eKQkAVwdl z!|?_RIX3OpL6q<`czrup)_Ym@%3;xBLhxR04hO-(_C&Gqf>rpeW|83cj^0@qk(z@L zGXe&uM>&aVBQ^R({`}wX9$EQKuYKi=p9W=|vYHwrr-$yeCLd%ke-J!5;S_lbE%tf| zpW$dI(n_r(6sK)h6@0Q1io2l=2@HMVm39Y-0=ctNhA+wP`##l#YJUpiSuEI^U$Ioz zc}ng)l(7KYcxY|(@rtBD?!oC8w2(~1$>w3Tz!Kv90vcFL{6;p>;u?hE2~7acoRC( zVGn`rbp5ZPcye)kExJQcdaN;OI3l8p*(FkaDaDyd@kKc;gLPwr2a3(DJFdIxf1@u9 z;TgwP3k#Rs#>I7|!mi1WV`%f#)bCnV-IvLqJ2Pl3z{|LwVro78w{IcjqMNg-fy_b* zk3$Ff)NFY1d1dYY-2~Jx{MV}A9S%sfBc<G~R{@f~we zP;I<@|179wi)DmiOw_{z7X7*&_3Q@Qay30XJVqk#y3rsRCxTD427u!@PbcE_-=7R< z?=3Ceh@?`*U>VIdRMQ|S5`O&ErKpi#avN6E$t`1|X%L%9c_dzmB z8Wgof&&w`nG>z)b!Dwp@hr1ZQghu<&0$L$r6g?(7&yD|~Jj2gppkd`y+o99QM2I*z zTu&cdK>qp~JANTE=$5o=x_VX*{%h{O;ODzt^+_-&XT3bQQW?ad-AX}=1S$+HYPc%u ze!OrkwBpnV35y6*TFE%05?x_?4#!k50)zBanC)-a)S^n3>CHZU6Y2h#w8@aFRxXsQ z5hnDt;Srg~GlNh#YrQ7JWmla4w>6a)zu{Lj_fJ$orU`j0NY~{$fRCkA`9W##*+T9!VtD({t z-0R(6Hf>&58a3_4S|1m+ucVXL@SXlwzX)pR9?(*>gLk65)9%AqR!V~PobJRz-_xZm zj|n!qo!~SCh<7I|a838Hz;_WGhXPJ$jI>YsJoQ*660!Xl&%v!^--%^(@oUCWwo0A5 zf3V=XFP=9tgEc>Pt~RB$TDUD&D+ z(HA3a;U~~gpXiNy(n)I8A4->RU~$^Anz=YtI}l6TsxaP2F&<27XaY4yJqSjO68+gJ zMZY-S)QALH?*aK$#@nBEiku9hM1FfR+Er$U^fhpmn`LkEl92yl5*K6CQz`iD5>JL3 z9-2u0sP1_f`L$fN=6yK)gZ_WUUH_X@JB*b@`}cpxT`C1BJ4xneIyUJNWOO^U*e+{} zr(6Cy*{}FFIC29a8l&pnOE z`Cnr><$k>=d81bDy$e<>NG~l zyA#ow6A#9Vseg#8*}IGM{{_6!l*I&`v|BS=NT#M4{QcU=?TEy*byI`P&MAKM#bI@S ziyqXHb#uVV=-DX}A#T?A)jn5$uuLO&pi^pKbCMC3q4D3g8aDW-DB9!h$~s%<=OV!g zqi5L;u?fJq^&&k2Jn+dHGp^93_lFcw@WC>!UkqGwVyZ-{_|*A6!*Wa;VL-O0ysaZo znIyfok?8UGbgPyRzh{E&fqV~R39p~jZ^}aTud8U&j%UH{XEDasrlA{s*H_wF1qanY zI%}bYxJN{5#vb;kUMifN7d>!hT*?2BJQH!#{hBb3@RX=utW;VyCTgP?_-;p2%u-4{ zC1#^Z>#z6O$o$MJrzfbhPt@M>rue@k_zb-NZ>HOS#&J?Ny$TxBD8e1~qlM2kRtj_V z_@DQyU3S-gy*XeSY%4({@pFAJS0(S8hG8-o){dBel}70L3M7R&d(2p6k~KT<_7G?b z!lbLo9gn*2gPP`kJ9I9>zaSwBt!Y|L#FE^3cJ)uX3`r`Z;t)T&ymOFH#qcBNp*qJp z_k4ZbEc>5XSSjSU&v}lveR*J=d74VsxsDo^88rS zrC66OHobO4 zYSB415uBH^mp~sXF_PZqJEwHddW+DC8UIlMNpZVg%DiyBY%%WJ<}NVUCgUSTnCMSx z;V$k>RLJa2j-fT)DuO&)yDe#}dn(3vU62w)NDSALa+!UaPQo5k7x%W0i;p}MU-Ui= zzygg(neN^`OMC}&qBTZCaRurm;pLHUb)C*slO!E31C~8Mx7PJkVywOY7``7#mR45} z2Ng7cN&U=<`H^rKbNYkAja7#Q`VMCY9Oxzv)(4Ljw6L?5V?{FWiDs^VF^4nVND@`< z66)kgUTeEShZn7F_rSpNo83BlH}6JU5Ly>BuSnP6;$EMnoBISmNujuvuxfCt@o~!f zn#O>CaDI=^$jEM8Qb92cLAEk=xlo`jaQ5|t6N<876E3S`S>bOt(h+?0;f zEOrY&bm86~-@=~Y9%?Hg`UFClci&a+;AOk@0=S;@IIjCQ9rwquhR%ObRH1@Vej9Dt z;OrejgcQHm=)GO(xl4*&r<`HS{e1s8eREe-*J;0qRo^Us0amrU+GjiaLtdu@Urrld zPB*^ug ziz<+AR5%zQ10JBO7#<*7zkAFR;=_PfE0xCBw85mIB#$)s0T%sO-9rR_^I{L~ff=jp zDmZw9oS!vmyH~6{7m%o2OnuJ=USXL=VNDuXKcw#g99WI>1jnZ57w{*hMHxdEEvs?Y zjF6Hs2VWjKhy?bJGTNsi9FSvDe3$j*XFo=>y|_%S?v8vpJkB9)AZjPhtiKWLKT!cmG-<2iF|q&iOT zp{!1y7QWuX8{`c_S}^uBq7(eykL%f(NM)(DfFgB3Lau6Bq&&yot7uM%q7Mcz@N^Su zv`d?TK$njSxFrYJN%I`EFE-|CjAoNvh^`um^wV_o=cL&QozVE6kIbpRkJy6r;gu81 z&b?f7$ZfGqQ<_H(E)#=PvH1TCzXx4}fhFt=wql$mV4@xpQsc$dT1?diC2ZfxBdyx; z&MP2B!IioX#t9awLLY~82)mDPCh#D6s0a6av@nLXM@R$bX~CnTB1qLP)u-860((fqzfg;aqjCnfu4lbJ(R|(|au4C5VDNZo|&O%QsS!_qvRk5TL0Bu1zPo$VDqJ3WSh@^(-alzs3gccBV-R*bDb z_pZ6Yk7^Cw`UE3z9*|+9G!EytFZ`{{6V|TZ4|1 zC5O0O*!Z?XCRi&dJaSiGf8=&{B7ku8Rotk&7{i89kCrNNluZr8?Q5081%smC0zj&ymu^)O3FtSWsqthjmCoz8mI z=&W$Ri~n{DL8FJ_PbF!pGHDcW>)-|1S)91wv3piTS9z?>(j;cI>PB}7F`JkiD~ztev4vxjZMdT+_}+W$bv$=z zM~UWhxk$FW&}+naguj1NYp;BINCtS$&!LkIs%r8E&aVa~!&{1ZY44wJeq$^l-JSVn zuK)C?A~?qd+rjeWV)Z;tSU4Onv)k_+Aj{)VWpDr>&t9>%7oEPpntqskmP)Z#BQKM_ zAJq&}%X0S@GYY26{HTZHAY}@1l~r`2$tScygcm-oqPbFItQ3 zFR)Md;Y->7c+1o(kozjZwHugv^CfYfy;0C67=NvpuTx zQ=(Uu$jkJqtxi25gusE)71(S!1vbAH)!CVa?b|-R4OpwfpUT-GnX$F*`?TuzD--uf z2(^=EMXbHw_0$+#7_)Y6>ty*oexB8fZ9#ojxtr{#Y5V1WE?}}}FWSnt1NRL^;)0(G z004v|1 zcLzUAs&3BcBfz;dYh|!GNJH8KcgQm&h~EOWTK*TKZ_})eK&7;k60&|qd+EvXzhbBV zL>pj~XX%dn-Rb!XgJL4lGrM2VYGfiZIKn&nX`^=b6&9(&;CfsME25xtwtU6B$JuPo z52#Ri3A-j2JCj3d6bTi75e!JrP}l7)2>D?>O44k%=2P1 zt&eTp8T`vtrIg!9((H8Nlc5dM>iB~)1y`@f1pm1r&vIKNwQ~;XfVb8uowO!r`gX_G z2c7}aR;lpnf;(Ni*8-}PyvrBFahO=PZmaPLh{yJ zY-+M3Wq0W%@3V)Q3J9KW+S=V=_@+voT!u&3l< zl|`Q^;)P-%4z!_ZlXU!$4%6mi2u}{X%}Eu{rb3dxu-*; zWwjiCMekWDiND{b12p-Cn_C!)p;_jx#VJCsfr4XR$<%v71lJ zrOvX$X+2xCs0x2{TVP5zF^n6ogPuLPPHaUY1Ex(mONvT`I_$C`94#_R;r=0AMBk=T zqBGw^t;;C>tsZNc*J-cgfcN!#aoDdn@)bd!JO0`c*`Q=ECDyuDx_Dkl4y zxqLt5y)~C3BmGw859#1`_E^MODLeAv5dZKxi$M5Ot2fCjC}I^0e5qTh680g3O@LWt z10U0>!Qt_T5*$1v$oIOFMnRr>3Ey+}!CJpq=sGn$L}X-i%UWXj3(37r=OHmVU2Q67 z!l{O-%P;aM(JSSDMr>PN_C^BLT4VW(TcLo=#bR8X@ zZw%6c@y|OCMH=D^%d5gqO3aoWcF)N-YEtwDs>9sqJ^heym>}A^Y$__QtrjGy#hV4j za(OHybDE)Q?6Cvc+0v|L(`yA=3S+4^yzt%Vqxmz;X!yCG*z!#HftF^ECx)m=>oxT=F zY%&z+|BR4{d~UKEd7)J+7maRYgX``2g;%&<69DBJTbjg(%{U3~$l={b82LGoxt4D0dpNYx!qxr<~VrJX- z_8aJWj(H|F@#a)Gh??M(nzlckNi6--K%`1*XfW!j{d7DQXh&l`2Dk;-@TE-`)G+Fl z#_Oqr|Z&a^cX? zQ}Vk_zaT8x`~kg3`&TFX7U^LDQML2`8Q zgKF@&sNRBUeQzc$THClv0afT}wyI%8Itk z@+fKABe#c!=`RAG{WuUcaWCw!-o52Zn><4ysJl1bL_ha(Y=@@&;^<%QkH!X zmW>S`wexD}ailQ_Mnx#Vl9JvNJ`PwBvQm;SAZNLg^cx=p6_+{kX3|~Gs%%$AveBfs zHU0+v$k>ig6f|ED`HX?2svLOZ?{0rJMtaDnxJ{qs?DxD@=~rD)_v6R}Z#Eut8XlIb*wK%e=9|_#fAGZ1sezIfdC%Df;zd630`o)wexOA^gZx)k=*4sWlB=aW4x?mJjMnM=g z5%%Fl{HL*#sz(O5Y`9Do7Q0F`93Bd_P4=+HMd>(_?4Qv0Z3F8i$;83SPgiYBjC_$j zGh97!p%rb-yNLIrLe9jiNxj1%{yPewDf58KF8^1^#~96p$k;A})@y8!d5)n@=iU!L zaodG3Ya`sa$)JibZg&I=-6@U-L*-K>#V)_hng6U+v-+u}9Sw(sUSU88 z``V2TIMsO3^Tu_izYYE6;!jA$ubWRyME~zygy6Gb$=&VWG;cIpt93Y|T|i>+)J~6_ zk|EEzb(z^(BDcP@zNoVa!fu-@Y3<@B){U~9V?z87lnGo_vHG^Ze8W7LrZ(OU&Ywxg z5z}_I%Kfe;LWOJVkrq@A-UI5HhKR$`aJjHBNB^7?Pu3BR&sz0{g*0kQgofb};~Du} z@1EueLLX3ILwK*=gmv0b=YtCR$@mj)6oq{L1FV?Od9(A=UkvA_yM>}2g ze?hJQWnu#mHq?|F(rB<-9Bz8~v*7n3{g;f3Tkv#%u>OydR{MNFie*0)4K_;ia9-UW zI^O14W7F8qkEs2Q`pdX4ch_oIN~ueZH+H;B&BeNN?NMss7-M0W%+6Oo^jfFJV>Fe7 z`1?k-fPzgw=2?XD3`~x{DEMcvEpbbfire? z&bfuTvA@g}Sqb4oIyK`|VokWVy1^ZLtI5If#0lBcSDBQNrY$Ind*%~IiBs$`NNE4M zV|Q^1LUNiy=sJ3gj@prIHXK5!M{!&!8*GD^@i2cb4Ty?fcS~J|M6X;d{A{gNb0RA$ zAPXGN%r5#m<={+M(sKHvsUegO;J%~GJ!lxgr1S2QlmwQBe~n&kH72G@u@~7SlcQza z^DL`>u4H2+Gzm{B__LT)jd+9MK^$CnWEkLGfsMA7!1j0v`Q&eV4xVZXpeZ;z)Jmi+9 z-_ArNyA0HFz7FR}PE=^?+)k*}N9NyqL%?~s^lax z!R_pU>Up^Uw{q^k`!YB*cBz!*2r9zM3^DiDh!cnk33s}lNi(R!37y)lU9tuiyXLVwyzu=z{#IDt?!<;Sp5S%!Z;gO7%n{OgU9AB&~o9i`HNhr%$S<8zHLSiBb%aMjuyILI+o@1OMI^Qh2I5 z#F1X0dSWJ5)T8h?u_Xmf02w42SB!n;Y9&PWV7V-tLoAD8&X~%p&kS=-e{V8X1)>jz)d4 zGVBL#u`{So^Mzc%Ggz^4sZZ{Xn6XW$qSRd}b3^TN=<>B?leCS9n@-%xq_ z0P!0t9Op@c%0*pUw+|IO?O2ILIyMqM!@eiXIgh-K@rSgB=?f$5vf>FYL8FfaiFKc}Gs?}3YE z%5>`c@XpM-t`bW2ef;n@Hb(6=rF(WLjShD0$cx(Ljo4Eulqk%g!sT|Z^`PDLM(Ql} z4H&p({&De>1w67>U@$c{AUxcMb3&ZnZb0efoAwuU(xT)Fy7!2loQAwcR=hJLu0Qtl zETruJBsm+7Dv5vcMiQzx{8POadrF;299SiXT+$w6OU$GbzFmKeLszKM?qsdjd*E69 zT;}d{{1V2|KN7uU91=ky&9yk&j{duJ6Am%)*wafgU9V$-nYDewEECmgFgp}P;0(~R z8F%NK+!v7}Na^SSa{4E++B4jrD(=+hk*3nWl@a5Xu+9a_%WC9#E(SwcLkj-6rhYL} zPCB+6t8)ciLPFMCO&!BZuG4=fOmZ%rsSmA;q-0eSD-+o}Op+-*j$_UaTb26c(*a&| zj1>*cc=`DIO%a5xBv+}gB=yWNkqHtvXXW>zcVBCV<06YoESPd)6P#J0pxV+-*9N7V zDsc7NLLY+Qk2J&%Kf*oN#*H0Ore2l}Gp=>f1L&7`Ut^h2Zff(xaHEUp1xh6DZ&uBh zMPtrYDZ8z-m;4n>3mOz9o8e^*^Qi#V-keUauWC`%x z?-#gMFJz<+xu+*cpG7$y=)5600_jtvUl$~4xpth3{#}1p3ct42@$^ixv~938HvdgS zbt-E}EX`0K&4*pEBBX58T$6aESE;B1m%sq*mL!&JyL}L8OQ^iOTYjyybkQYzly;fT z3DIgW;(A`bUAcWj(RJ|`k(?K0$BZ$wunUc-!|To~l7n?+*Y(Efh|x#4No8@BO%OD- znu_k9+e6ntTI6mQP5*Z!^NjBMSsN`WJ2;|P)cJ;1Z~ErqFkLN$)9?tK&g#ivT2A6N zm?s((RXFJl0845n_tja`WwJ`QF__LuVARy=Oh>iDf-HfCV7Lp|vIF2sn?@#GzddU` zN{KWrW{jhT84Pd0yh7Vy71I9Y#ZsQz8|_+0i}S>5JhPJZpy@7uGY!I9TAzORN2)*} zGRkG|89r_Ku`XN3&jo3X`V(keOXI(34L8BGVXPC-!#)aH!Ycd3Gnm2h2C0eVb>JE5ism{_i!Kh>xxjIUfP^>Sf9 zNXW+yzriqmzic^8rnqKd;i}^vABV>1k~(@UB-=>2iL`6{jh+9tjKPl?W!8uPy6=BV zY38L?d0AF2gD1@&|Q2ay+5*pq}=Ds$-sTi9WfAu)8pb)h=%fp(8{g- z&lDoN1;sUG88QYeNt{Eqf?)cBM)gr>mi-5r?}t39PPfe5)z+-m0l-E1li0Cs4kTT{lY=E-&%xZ{@ zTDRnfA}NkSPTzGi#_uWNBx!u`DCa4bd)|pYh5SpPBQdBguv$HinKw=eqddC!Eb2Se zcXv&PRRD17UUxP+ZVON@#W$7foNu;vc3ijNi9gQ0it9PWF4o}If{n6gUp$Cs_iKPI zd!vI9w+xh6{imkmA~>TS2hHwf2F2&wdHD)t*pDGDETPdB{2mgeWY8J+w$)0#mT~TF2MBy_Z zLYsM5NuSr)nY^?nGPXuB&EPGKfG{|)i4@yG(}>t*smX9Ho0h1#p)xL-hddf8C-OEX zpQp{Mazw?(`%M8X*aky2`(xG2rZI@>O3=gOC62yrT0tk66olHw((Uo|9c}LdU?9ak z%@sp@RQ7XBvechAFUbvQR!>L1h~Dk1tr;ahgZ;-LG?gdtsh5_S z7`@W4?D))u#EzAalqH|KZD&gi(8tnB`m*5Fv87pcDK(9k$ z`oo+kN>upliQqZ`WniSd_`?8?dm|%wYgjCu9UQnFpmT(FLb# z8usSl2eV)RVV>PR(k;5bN@7)s%T;vb;8Q*P%hZVq<-`c|dal#*01+1o6B!!?#xbv5 zHFtP(GI>spJB>|(G#*Bm8byfbHWp!jL8VLvfp=MKgZWvg33o^s7-IU_>#m2#2WREj z{9IZ_KmiLr4eiOilutzhgFQOn-nW0BCidsv8n7Kz{T=|$6QR_=mcy8;Jf`emzIK`B zD$th3Ehgq+6N4Ux2bN=#He`;+_>SmcO$V;KG`EmQHKqKWZj||DC_uB~PT{$inHyJ# zvL&M3d^?8m0^Xb%Qlgephh=d4dm3S_SckH|q^@tvdvYShUio!fGoQl)7ic9U*ByBL zv!J||;Uc33Wz3jh_ul=ROcwKj6l%QdVe?<+6SrvOM~wK#rc521TxeI^!&3m%z-BjWeh zO@8%Up8g2&VN6&{Ix_p({V-2@&DJ=1NtY3exI$knqHkp+2QM|<+wMJYR1h*#wuT-CUIEx00t9&9-9q6yS_3j8n24j)B@7_p8gK^Ig1>Gr7|`7q9W$GZGm&RzaJh*B zSaDUxU<;yt4%4aSzGV=iH<-HKr_-$0Om-Q~G@X<#wbz_-PjF&T=@rsg`f%lU++VTe zh}?UqoKZj!Sa;ZC0`;jFG;hKe>~$|TGBFys=LEO+9lbEC;Z2@nXcxl!-8P{5%?Ggk zj^u1?Z|=NluJH!@V&5(D6ix~>&y1-+OeK%chCfDL^ zCXl8GD(JKCkdgKLt$T+lWoTpXG72AIDktkfl;Nd0t84`<{W;+h{E@(_A2eHiM%dIj z-thcY*VWB&iF&7r>`~DcKMbpR=f%U#qco@8VPPr#YE|MUw?OkXYgF2ZmyysO-Kn@Z zxMv(gPI`V!9b1Q2$XJXU=Sep$^YamnMLMmw^Z>;=Q`bv*4#nZ5xO;H{%y*`j8qCor z;Djz$t2b7{$@S~=Cws9{_Nx5kh3riS&Bx&RDcxW#H|W0IWTPyuWvYQK%hdpXzNXf; zigbTop$bIBH1@nSCq2#)X3d;59TF5)CzEW8`N~PL?0>L4c0O5glThe8wI|BH}?+VXBaFiZ}{_K>yd-UMP%;Y9h>J9 zzRdBmZZ)YV@+x8M_JWY7{oIvc&Hcu)DTZ(1G$!czyS-0goMXuM#3XAL z{)w7IaTImP7!56cI#el=%tYEx`2oO7gBA~wi&91PqLX-UFBs|gS3{G^3<+!4IrN2% zYQ{vzI8t~cz}8$4YzuRKej!qPbCMY8!5J9n9A0h=O56+0NO&%Od%~Y@^J97u*VT>J znA1t`=O}_Y1@+`6wjY>wHCwr$+WW>Pbhp%WW*ycH%PRyx_3zz|&|sYlzNORoZNXq< zZ*+NuiiX8F93H~yns-&aTK#;%mu`pjHVFP7vfeT(u5F1LO+tW>AVEUo1a}A?Ji*;v zgS)$Sf;+(-g1bZGO>lRoao5JZf!F7rd*6HG`}QAXjNM&(?X}jdHEUK?XCv>2cdoKo z(`6fSso(cAmdAhjJ}S0ArLGBak2rP%H_y(Bu49gB zY?4=0*mk^yzZ9 zv0n8SOX;Do=%uQVZY5|mt zyj&xJ4u1GiH-xBcE%eiIX-$^GEuG_h*tbK*L$pRy^#8lIPX}jSUH761p=@2+rRF!* zge8Ie&NpD@yo~>M*TbZ_3r@6LSbe|-;NYjYqsRBMJE3xQ^v$HMBtHpmoNjokox!3FHp&>W@lR8(q%5CR% zMc)_PVqy6hs6Gsn8!2xU3Mj!`g0$ch+>35$2&Q=PkmxbYu^{xUOPM7J<1&AyMU$TrINBW&7X6F(km{ zqH+LBF!OSYV*nv;QvN(RAnGH2?&oDd8DV3!YxQQK=c{qE_AWuut50aEa$AC>YP49| z5do7c2WK~T1@TY(;pd$~?aI*cClf@^A9T9LI^Hsjj8J7x#(2y}ncV8exVt*0$wgNo z9bF&dylV_4c%Pg+w1=$aAM;b3HoRt%UOJ?XONSP30Vs+2I2GMAXLPurEn> zKB+Tm+1}YH+eY_D;XszY5+)x}s}e@8W5q{ywkgu-CqV$YEZRq-qJf6bQbY$RPT0nh z%(47NDY=J^9Ow9ns8?#(R_48Eowwzg0>uf9lPkSBv>8o*o_k#dIrFZ;@yw$UO;om;OHe{E;JTtXdRg%)&Cgaz*Z`GZhe1!@C5w zBwYz8-n*I+%V~$sNnG+a{`Vyg6ovmwU%f)bay=k$6XdbSiD(Va{px4J9lVRXpv{9B zuO=fQLgaN+;i-lAD4B4=(XCs|9^(UO zc{5q!k}#K&Ik4>TQS0GEUN(cG#7l}XnsxH~G{*)7PK3w=}(y;U{> zlk$$;Pk&k==1fR$xY#4Bb9)SBhK`Qg7i5f+jkF*tF7>gV9nLn2nx@yS-82&laZ>X5 zNf1^Hh0eZ7RNH_pJi>lic#jmUyzVT4q*H3pwFAQ3<4ZgXj_&~>37N1t>soeA?(>sX zguUT!Z)zIF>R4Nk`M8Z9OGM81Yg>>4waI8ArTUH3d{$E0eiO*)oj6V0WDW^Tb+=A& zIDXRxi0<2n>}>B|NuF5cq?UOo-KX_4 z$IT&z3C_2VRkjM$0Peh?x-PCl= zjEGf!HS!}Ji*V47Fn@&1D+h&X{|TZnpM|ayXCx27BpZ?m|8Y~qBwy}q(nb<07*o2k zB$or5;QH8=n{vCDX2wDv%)Vo{@#Tpnqe0fwbUy(jwK=Ree!Jl9`XPQq2#9=@ibZvB zNEBF^iSpb2_k7U&{dDm{!5A(gO=~E)WF!Xo6pbkK94bLvGa36x;7ri?5-iP?BI*tMwuV>A0(~fOF(CQob@7<4J0 zEqPvZy(gMI`@D5P;a^^rXKN-8Ez82239d7$dc7zRXTUA~`!Gf~{A1;%BH6CFTg_do zy;jK|36RM~y`meIDFQ6em#x4%-q$z>g>7=k@4ZV3N}pI(ao<_Tx`qZT-quZ z+j2ZnM>W6~+Uhu{OtwPV@Ct}Xe4OjF<WNdiGL%0f5MxEoKInG(gmmH zmeok|`;yRs);tx9j*)jmALmaxtXs;AzwkJOWZNy$!8x4^utn$5omyAKn!Urfe;xc+ zDFK@NnjzpW@vGCsVKg?Rbm2ukBO|E|+?6!^@ zX0P^GjQS>EFz<8;ES4ROTQY=q4=o`P>hM7U3NqasyKf!Ne@5_wPv1>)D>$gqe53!^ zulE$Ffg8ht?IZJvWuptDDBWT1p$3Mb7`xxO!|qp`n7u-kA=LEg@@}zKQRxp?dbG5t z_x|wh-U29o8&xC$+IZX&OE0J5arz$RdbF$Omhw?ScO6tjC6V+J8W%u$+=-) z7xzw%uhT4q$93Q%vODm7AvXw*YET-ZL1v?)i7CZ~p|4jwCS~H4%XsmIuTW$Jx9#A_ zyY;`xkmgOVkn~^)t!CZKd=F$l-2&Nfzh6VR4xQ(@xUz62D2ECD7$W@%{>T z<@X&$%z!k!o!8v}pwD1p{!s8VV3?mP?~-j<)5Z-_sUNIzH4>Er-3IJ07 z0VAIx=-fLVXuH|0`geH})(+F3HKWW7k{MB zCLThwLwkd3+OP7pYWCLcY|K%vwp6#pjIE6$&YM@tS2iV)clKYZL%icm8lD-K%_J)( z`ra0gR6y0=(mI{!bFm^JD>_M0O?}6lzQXdCKi=pk)sPu>5k03)jbhP`2RjL&GjV#z zyvxGCb~JTlP2M%Yw2F!ht1PNw9P9!WQDTZWEo=Y z@7q#z_V0*k_d~Y1&=nskgCXow!TC7T^IB5Bw?05@V_ucEAPvNzrGTWO?{aZlth3fx zyLTN~VsBQ_<6yc5`K}I>t%g4zG9K$L-g_y2Gtl59Ya}kTm4|{=-+6xOLzaV3O4#^e z3%D+Nj8!*>^h_>6q?~>}M%JU7%w12X0Aem9>tBa8U%AdZjW0Sb?d~7SdG3Ag4I}$H zB1xDD`DQOdqR(z)_nnpF)+Lr;o7Fa+2%poJ13>L$9y}@}n7*3j)dd|H819WXKM6?S zZ{lO98aUQ|NPB;lYUqp>nqN|se)X}+GJRXJ>M*C^uCu%iX2gDQU`g3EYq_+$ek?Eb z_;FlW*fArP3wD0#ph)pHt4;XF+$C38Kz-}#^Z})Wl^C}zyy?YA^}VQ2Zfj$z#Vur- zD+E%B(}=E=g)7KVRUP^ML0jf5-LQ3wP}*04MWM^L&^C^uxjf>gmP6Nkp^ajC|In6f zuKQk0yNC}5gsOZ`>!hxKJ2kv-4HW;2`r>edo&#!y$00lZv6J4msBRO}j@?FogU;m! zgth0oC#Ef7evyV;G8WlqDZbQNLUrY;`tj?q5G=?~(~YWM8=BWKDP^%}>Y`CNe$zXF z><@(3viUkKlra!{XQY12`0`Gk84H#d{-X#0f^?;b9Y)+D6v0ibtWJeju~`IraR1`! z1d!e}k4hd%s<*F4G&|P$veFq0lm)l4`%cQ8^^R%W8&L>c^VGq*1@6Ez02Te4-X%eT zOKh`8BmeaX0oTB>3o(`1|wKLHO*%<;Dv>0k%yHjmHH6u$S#y)AhLrpF|trq4@*((KAmXFos*-8u-A+M1LpZ2L5d*V9i)T z2tb4s1-zin3jz2-m1yqz;~m!i8DhUVDT(`Ga0>mM5kpNDJ51!P_f4lzDI2-@eSdqs zxj#$N=#5y_k5m?MxXnFSod5=Qv-=PYkJ|dG@Y}2vjU7%jWw`EzC79ob{z~mr-6Q?@ ztJ2no!6?s?DtfzuGTuun1KGF-quf&)gM=$J*Tt%v-j7A8zKNuXaMRkhBW^#W9hCT% zRJ9PY+=Z-1JlFZ2OZf_+`Lm2&#rzfR1hkqz=H`{ftVaakP`y=vFCo>j%SOn9(#Qcb;RPyh&uw;CUF8>=1I za^$c<^gxEiW1q25){d|MImVxqUCHx~{IXZ!AEf z>zn=s()SrE6(V0J^9HI>&s#;_w%BfA?>}x%q(hA+hWM0k^g!vXRzv#AzaEmmOr>;3 zMTAI}ol*$Y{?2)LD{oLkzH~yqR>>PykVmYZqO za_(cr3|QLa-tM_{H&}oSe~ZeI?k>q+G%P{tV;36Vr|D8fw`7(hljpCX<{rF|A9ik; zkQ~d%NOWvFzvy`RTDLmRX~e{7EB;SQJzP1^C5Z0car1PSFD=e9uVgM5>v2Zbv3AZB z9?c|^#QVlg$y;qacze9io}G!z5n4==_t>ch*7b_inPOYq1jLfr%1E@vPw1JAIj6e+ zA&<3A9*>=!5V2AXbAV8Mb?N<8-}6cq-AhU%)jc`L$*zb1ar)2?L$E*EarUJI=3^I` zzh<<`%DtFQqjI82su^f6M@Zt$f~m*Y+$!QhPxs>(Gbrkr8OsW3Aaum|4{PRS-@Jz| zx2(0@SF>I2CQC~>pCHAxz=t-cnAe`lH69u3ml4f))E<8o*=v5ef8m@nDspINBM7Xw zEzw=}dKIOEuFKOJajiqN|KrUm#fN)Gh+nkLLZH8EtwU_-o~}E)E$9c4mav=~Ga{Ak z_S^kjoFw7DH4K4!k|bhL>t*x_bfQi#Ca4W75}ut}Y=T`hIoTDO{0p*w<*3k!b^8G? zC*wB!E}@rPIUL-$o0gstI*leFqr{}8ErEl6DSL3Z>%j$0X~p)P;Dh+SpV!~kV{Mte zkXIB$tw$hqnFtZ3<6Ayqeb_&u?Vdjj8r=mYe^EN2;p_5VqUOoHOgkv`;$WzP{Hm$H zsdrxMJ{QxLVdyM+C>f2+NK97nVQ$Y%yj#?ihm>b7(;eY`W?3?a-$x!K<>)R)?QXVU z_b4_wHT~T`c{;Kl&VO7fWfMo8MAom?XkEFfVOuC6cH{3*yy-}U!6HecW;pFV(A?o|d{*N;x!aEVo5Z7?~g>vvl2 z@JE3=b3|IT?(BhP8;7<;Joew5#3%-znc=wCA7HLmk3%yyG|Y5NrY8^;$g?O5VYG!) z*7)06PVdy&r;rMgCg+s{`%%*~y+1eT3GlynOozJ(fpO?d(s7O{hR*HDvgQixm}4N8 zDQ$?1Sl&g5yZO$i6lGxp9Ta_bSod6Sj8&%)+iCMZJDjc~;fX1RW168eiWCkkFNii7 z|Jl$~#%=3W;(?*&xa85xA4z&y@9`@!a|LN>r)JNMuihZ$9fg-8KF|k0??IvTn~M-Q zJn_&*!?DbGg{aWBF;H+<>gm;N3>GSw()!fV_V6$-l1>k(>q9a8|`fAErA=d5>$rp{+_kR{Hn z6M)@r{s7;-;o!F0GjbtTwLs&~GWQOA*CkX=q)Tf_nt%2z1HQ{o%6drlWsMXMV|n0( zKWTsWkj9oKOB!wFSXg2}hol(6=G6Q^S96&5B67RqZ z(|-l};iGXxdEFbA|3}L+C|?6EX!`kxn%*w)#nm5}kJu~^AHu0RdF^cK(Tf;MgSIE~ zDjm38GCr(3R0ckGvh=d!7h;XCtXjMV-=&_Ao>QIFCsS{I?cQ~|eNF7oBEDMV9Xq!a ztsBNtxSGC0`g%~o;2@AN;g1oedAkUv^<2=5=Ya$9)@6QIefBl# zOVi1BPYI!BmU|-;I|{wo7&ztu>t_I40_L}B_zk`@redSeGN}27XC<+}s0Q^;E?cSV z5%;Wc&xE5c8rG^2$mL6O3A{v%;D(BRzod^LTvfPZ{of%^XJoT>plrkd7`Z@GpqivH zu`D2~;Y)dVG+Gcp{ELwWRQV+~yf<$}E+3ho#=c*P{6ZVw_$QtSMWlV8vQB)^d1rvd z{q9+f-CN2Fvt4Rj^St{feBq(8lCk&eS4%QD(-H%VD*G$Zdr~ka+~+j|<5;q=PyuNP z8TJZOOT6r*VXf}KWc5lxbN^=$5=rp#dJi?lR5~SjqT;RWxZ}6c)V87;qWjGlqB_aA zO19?|xGE*%z~eU+oI(&{`tz9q!Ftp;g$nd#74j{k6{YAl^*p~>=5R0QwUNoiTSM(Fk_0%uVVeMBGyO!)Cz<0NU>#z1AyDcqF{xnyl zD;*#b?|^Fv0E}CnSzh&gqzs(4UF1)S?M|7W71+lJ`vy9?5f8wWzTUP}FSB4kg!0<$W~Ou&?LCpB3wuitm4a2*eHfb%S4<#nNmQYT7!+ zq`bMJv|=MGZY>bwGpfXYLTwPllJjOu&)iY|(KBx2f5Ncme}rMr2w7^jO~)!d^cSpX zY8b4`c|jV=@3}cZn3q0|;u^T-SCNv8-o1yBow};(cD)>>zk}oOX6ERym}TjyB_1pQ zO98?D4r8_}{S7~{(wvDF>hEwYyLy;3-B}|*n$z;1sHF+MK0IuaeiW~Cp_VLrlu)fT*Q@ECvOR}Q{h{>wIS7vsYk7S zux)k={?2IPGC%lIY>D&eeDLHm1m1MK4aa^ZuKMMj@u>%OmhuxB+N-lZ8L#JtBJX9{?xE$f8gVW zn`^TuBI^n7bwyB~8LJLCA+wEAkWz>d26ii5o%l-)8mwLPTyvnm<)mP|#&4u4-J=g41vnd?voX_$J z!cV*0#tbF$j%K&0knhCSrRmv@*ApQ4YqO;;E{>OgBH5(G-n8kVR;UeT=sk9Uq|Ra} zH@uR!O+R8=40R%wE`Nf7{uIV9g*2MqZ~Do7=mZS2sw}YD>a?37Jl_-ZpL3<1M5trd z&*kTGOapGSC>uZlzZvR$Cn@Vw9{N}7e3h5tWvs?Y- z!-_Dg1up*P)UX~=2)~84>1Q9DMIWm^QRj+%$h}S#V4te&)I00-F@$kV3(2P=PAs3j zjbb_~v^{boBCjcpC?=$bBlce*IION~ftxOuP2r&Epa?g~orF_`$SuBt-G)Z44bfmk z%shz&N+EyNt0~i<<$2&KRM<@G0{_?70*UV;2#soR?AYo?lFnm-{}M}xCWqLy;IUP2 zJG#3fn(P|;ql|MH;JQ3*K^L~YfC>R^X>W6JY!t%L$d+_Q3DI49m#ch)BnvyVTq-^SM@#h2wry%KOqFKf5FrD;*`OE46v_ z@iVcUs`vEAl-*kwj0UPTQMeDKCSw)e@YVN%df;mNmLMw)K6j==l@CcgtKEZbJNo44 z(-^*F??fD(k7D~w&U+-W&}#)xYNrQZ1OuLH498JO-uuq##n-{7yP;E6N-&M!ake3( z%xZ|e6JVd2wy06j%g`FN{I{&`p|pM+!F@D0S&p4?Y1VRM23a#OmhSnzNOJF6`2v#XBW!xRD zO653AOB9BaXjaq*8YNt8u_e?2oowo>abpAycIFC=!Sz%p7FvN)LKGL4Y}_?XNqj}< zHI~!jT()1Os=rA6{-R1ScHm36=&O%19U*9)8R+rPYpn~pey4z| zFnVvPG2)89^4ECja}|awWWI4_CXPDZbtjAN+ibHJUn+Md=c~h$+7i}+Up`T}y^GK3 z*|SnL$A6HBaiB2(eP3WPtfu;ap~H|RfHA$S5}pWri{{(Sk4?S)I9F+YvYV-UHB?l^ zM&O&RNzc}G-t_cy(}hKs6eY2ghSYZlp`t%CNDsPx8KcvW{FCzGA3Xm)t#&qSh$>5brM zmfn*moR#*@V71G|T6fYTK;=e{t3eJ`@X%Xt^zxOzEpo!%UVM7gAcdPPKSKd(!PL-q@z){&oe4*z$cCBfJ=o?HZWW&{4C+3pbo391 z5Pguv%5(lrp-*Z)qr-|ShN}$(fBOZCEO87D3C8pCLyh^u%GrCjfA%5WAu?Kljfa(s z;pA+9HeMw@61}5ZD*Akoj=E3Wd{MlwFEuUWhe@!lg+~es*-6<&+5?YgR7b*m@$dPW zqYHx6MD?WkW9u{Ws>Kd#8>8^S2nHGL8L0qcud�&o&ZQYn!AKPyMP&387Kz${m8t-?NLUppDp? z`T0DJT~7EXg8FA{Q}Go?)w36&4w?q_`dj7O<@%h>N@7EzsSd`b&RtySGjpm{|G~bzyta(dp|D&a&m;Pg$~ls23&ka~#F*}`WB=Mj zT1=y1PRlPD_u8G{m<)0N_+?tFHows#6w(ToxLhrBoAMr8K7y9*4>cHGKU^j}ZFhF{ ze^=1MSvs;iU$pDwT>J(*>52Ih_38c&kMdNX+>=x2k}hLF2mg?fY>WZnzRngCt_3pq zC0GmFqO-c+iO^g9Z+gfpR7~K?`}uYMTUy)~0?$w%+DJ4L2h&TY#Xy|Q?5_r=@udU9<+?YVNUODY$EWdeXX)^|BQZW5 z>)yLnkSO~li1j!1YQg!Zc)y#&`(P7fbgf`^=kfNKv_Wb+V%;u{OF*?lr@=z*fqZ|2 z>E9|dX4hLaR8^?-@myM1ffzaBBz;}n7aiyeo;eE%bEf;y3GopWs4#qGJ&t-Wzv&7_jJ z1XY#m*hyMj0~Gvzqm`r;Q%e^}$r^NpGjBSJ;{x{+DhiQ*J z8Wv)!C@R=12|^{CXpatIFj?_O9lG(V7h3KBY`DDd44>4lh_NWCG064VB-JCZ&%k9M zHXEEp*OGozTvAeC?VxM-=!y@u{&Z251NOYxPBGWAH*O>aDHI?b7p;@}fiK4!J8{6MmGPY4sE@+YA&YibIIC@bHUDuU) z-gg>@LWNciimY8idvcJ*=t=tP0C6P2b`O)&U*b007VGJcezzt6>e?`VzV2edH$I7{ zvJ<4akjFbx_--6VR&e1QYalP<_)(khkYZ--IyRZx|HDZi1Tw~qK6LPIQhn<_3l&Flg4zALX2{JZ0Ny;80@~4 zN_2KmkmaJ%qF<%+)p!E^{e2m}m0Z8Wt+pQybo63pxH_`jI8gTrJYS@E=WU%Pz?W>- z)p2%u`Fm3&zm6Oy^LnnWc_>;D%OR7v%T@3C>6WMoQ!d-2i+xHuZB0!b1>9!V?YKN` zvNokj^y?u^uN-rVlnosK+nPnKPsFyW?2Q9Hp3*)5*O~T2KRw96E7`Xxo_Xi!J`ztE z)}tdMARgSOHCW$0H+C4d8eZ)^{yl4Fm-j8FBiu!K>Tgk_E%sJgxqVLld1d0|tPja3 zT5;-Dz@uDK3!+-9G(1UjEl%s3=A@&4V_%3;L=1R00*rt4@?VFmg8`c&;3{Tjv#2B{ z)%Zy@IOniNjoS*ZdX;{Fj2A_aA@?F!7Ecicq&A%V#|$7Rvx6{JX~57> zq2KJkVt!3$Ro?%7=Z^z8&Gjg-!*)Sd@jKrEN%CU7OVIHpLsIVPQ)r>TVsxcG)gvt2 zlxV8y z3yYDxu1dLbTxtpHpwrd17mkAl+qd}HpN1%3V286`3ulzDY)7N@+2pGF9*d{OgVk6D>d+RwSEteZkBktwXkPtZ{B29+6JqXHp2Z!vf|jDsd5{psPl^P45%D{6cEQ& z0ne}}ScJR$`y|FuZL(!Sul>!?o}dN`?!Sbz=oT%}n zJ~7)gVR$5(VwzD`6y&Fo@JdEo2ya)vj?l&99dt#l_S+GyR!ORByzY~!jGayievXxE zDv}~W59K$vGdln5xq)Ho3g-F^7Z;rUFi9!>tl~ygm#da0-gox{6X~eYIQ#a-eXGTq zEv!8`__Vh{_lzZ8M@Y6%Jln8#54p<>*A16g^N7xvQ2PeO&gJpfM0$6LYq}#-T!yN{ z&7)8r)}8D_(QV)M=bRhBM2$S~N#tjeBp2hdh*Q zG@Iew>xlacgXbuC4*8mCQXR zNt0_@a)HSqGrJp5@oye)Dtra_!`}DlJ$quXkcR6Zl?=;uJ>KuF=t>S$6;%}kwnCv! z=?tB>(+7t~+Z%eo&je z-dz=}H5M+c3JWO$Aj%g9(Z4GeA55eYsg zxS}d0;)l#%?deJREeR`#y#78uOzE@N-oxnipe==ZmC79N{#U@N1({ANEI~hh&vdBx zFgK448<>+ZHU7agT#_NTqbSRQl%+&YnR?BemE-y*9^if|mdejRXuV%la<@_bNR+N7 z1(ZNE>^e>I)4$97rG#N|dPGe@uBcC~z#sH@RiBGRv!Vy9K4W;^>3Al*OkS3^BP z>kX7$GY1H7l1{J8%R5z9)%!`E)&ak$>51^mMEE&o2d$;)uYT8X<$BEyb-wHNdGJba zI;BaFx?3UXa(@gKo%+1I=O0+DPzZfkjVjEgj2JHBW;=zj+$F!9XTz$U5WSw8>yw3W zkD!-qPg5;NCo)oykDQArbwdfNGm`f4n(i-2EexPmQi+voT&Rkt7N!}!PKwYe3oQHP zh7Y$~GkhphB73cD0G^dbBLma&yP*%7P1LkHHo8m`syXo*sKNsoMsBB@Yvy5!nj4T} zRAAKpD8cP@axG%&M&GGB#F25Sd^Wcz$?iEso+D{up7$ac%AIjRm;9o( zJ^Qa_*}VJXWU}4naXbTIpgah3%Lf5X&9%P1eqv&RvbH$0bmrC7cv%kF!i-vCP5W)i zSRX%QJ!b(InyVVKR+HlFs(8xvCyp78{OtJ|@ZW$Tl5nGy2{k1Vl?TsH(4J2pu#Z(` zXJ<{bHTYt}L_S9mqur$tyt}vhQ&JLkBSw+MaTSYi69f9)adl}`dRI6? zhPg*tLy|(SDEL=1_NS8*(Io?HzdTr)#ldgIIXTNV8h}~CARTY^SQ_d`;M#MWY#&H) z#+?K6ho4qjRnl2&J+tnnk{%0Bjy(I2i++lv>Xdfe-%n)Od!{W=0(ol*&Ps8wDeoLQ zpT^1!*Nb#-scVq1%CqO7J=&mMt{_8#j4)4j@V zXOahv8EYHEF6HX= zN7H>xZDn?rrlBwyHo&(pYY>0gL?w{de*9Wv=T_??6ByIt?bzpw3(>2Af*~!|vf_F5 zN;!?pBu}n$0}4Vy|5V&fcw2k1Z;7K|_f6mo|I;(gg83#p1N8LKthUTwXXxn8aDzPh z(8|H5;5F3+jBwgKymx26g!IRJY$g%-lW?~&%UCI0tWwfYkl2HXo6Cd}ZePicL|Ec! zE7#JsCQiiK2WRYvsBxwZw`koK`{b}*aPy$W(@i4`QY*LYcjp%r789FIfqTmzu{n~Y zqFE>{EPl1+ZU`uoUs73H{NB=7b= z!x55;r&&c=MGHn5SK{AW_*PSdph~-Ko6O}ZOHv)+(0G#Gi;nO%g8wBN&uVT z<=8Gh;1f!U01N0yA13aNb59NgP;Ypq$%O#$W1DC4)m*=mW8G+m#=fu92py^z>)!5D zY`1<~8HfL6NrlCo$;*m9M4pT9r@!5@i|p(wKY(NGNSR6Cs9VC5>Qcc&di+9fmgf&t z)YhK*Vc2eHYfVVZea8{&F`q_wPcL-x*KCjLz_{2drZbAjWI)-o-`Jw5l)__Hle5K| zR+bCSkJ@8;%bnPaHSnjkk=xeC=l(w3-c4wiqP!iy|8)1>(CI6sIbPjMA4Y*P4DOu_ zC{vaib)M6e|8fYymnHB!`+M?XvJHFrRriwkK+n|g%kZ}y;>#bi7+g1+@-j;{wWC(D zxvGV0%UbY!I{Vv~`D(}MxawuF6zF}wMyR^pi-#z=i3+;DdFy{c@x#}eIv zCE>X;?@%ecbQAXrjD=CF5q~^`jF*Ab;r_23*$L8IIZpu4kp_DUo?dDGGh?nWBZVe`Cte!W5bMlvw5sizNPydnE%=#cqV;$pp6_csR;^|p{qhOW4su{ z%b{M1r8wfQvpm7#%B(JEx_#<>Dt;#jSN*?R1rwVIc`}RJ7%;#pcOn+^0Md~BuMiLR z|M+!qMe)Da#Q{4_jb_)^irJk6L58A=7hmuG&0RXto`fh?V^Ho~7_DqB%*%^RkbjsAa)(fPXumiob@?C{Vwz9omnvM3qlAeip9 z-lP)l$bA9#$8MW{Cb1WXQJp+mc1C0G-JR+Ga*in5<(TT-rQAf}%)^eX9QuUjTx3(p zMXN}ye?d*@Q@dRt8C>3d&&IkWV=PwYgScfrpvr(RYj0Wq>eb7Yl<0TW?=hCi|M})% zIyoO%%dHTCy!9>9rlN2OOvGu&R(M%;(N&=UoPHC<$#cI3?EeY}@IG(Gv8oGd8AWQv zx_4;;+0tZC>0+}ZU{2cM2$M^eG;0Zx@H;QGZMCG?Fq{v2!AHasDu7>oE$N%@1^!{u z;8$$Nzw_nd^%u#R0_%nQ?KV%i20XH(hMWOVv3@0{K;U=Mq#I>POT5yxDaO|fO&4f? zC}Et-5=mFz%zD~9&>zOeKrh!FJiMj}qm38cKB5_ujh7qNsI}@<<~TOqkMG+{`^<7i z?+q^^xysJ^W4_g>{DrJVM%IH`2uLgt^qNSkJy@+w!w8=y?2$t9ow8v-zbI5S7(ZpF^sQCTGQCTF*}{UQZ)uGFU^R@9fU5b zgH929Vg~gJ!%KOlsb157)2PK0s0^gH0|UvrdX^{3cyFr!>IBBs4~Q zRn%UOqveQkjp*@G#cUJh;RD*>zb|p=T+lG|w%h8yd7w47j*X~&6f|&~fqg%ba=fEu zhA*)B?fG_n&WDof0_Z)e_cJPqUT9~D;UP;XwFdez+yAjVnq?jIbOI$XKZJ1}MKq1Aw9cJFv7O8r)% zUVym0&r7t$tNCrl36)U0VDTVaTdO#5rFzRFaud*%P^P}SSFQN;S2n2CY8$+V9DdDX zm$7Vr^N?7rB4$yg8F~Mt{*d)hw*}dvbn&fjg|}72?A?@Q_7Ms}gHjWoad$*kRtoyj z?R4!+if-GExCXQ;UF0oKOwaD5+2W0p21GRobSf(VRzB?VmjjvaTrl1Df)z1X*6}Vh z_m52Lam5{X{D0rXVHem>28$n&82Rq_pAok#+!v7(+BIMr`Jse_G9s-`X}R%7?UYio z_#%gA3z23N_uoEHZSbMJldSMC2J+2j#*B(dHOopB!yv*glH5%?w@GNww^_ZbaY>h5 zy+@S%@wR7Nn(4o~Ii;Sx7QBiEhF=cYG?RzWECpJ6<8KW1244nLXxyMJV%LQ9!p3fR z;=%$`>`pnG@mb+=%30f^pL_)@^=4c@`sg0!J>>^3IDM}^z+a_oISyh`rD~Nqd2nNO z`LOqtNVlLHJFC_IQ=+cBsvh9bFMd`Ko@}<)uF&M5o4NjGP&i{=CHau*5sgbp_=x;& zuFm8*5-p!=#X6e5G9I?T4CTYmV=0mFe^M{EDV$Fu{`)D6jDFwU6S&q;4hoFU!#o)U{&c&lAS*NQ1$Llgn%MqIp z1SxDVu)XzAo_gl14)K6S{V?pCE0=jp6SpK!gC`1r(JH^zL&krm>WFu`=CaI8Z|1Rp zvD~*m2=uKkO^;a`xM4o;N8}hQ{VamI`tlYYkPY)nM#LgQgzfbrW?r#3NpT2&`dniOEjj%v7cQc%9d!hJ)amks`zpI>g>GJ^=UUXQZ0Y~J}^{V-F^KvEn^3x z;~tnpNa2k~ZX9mzZl9aGBRovFT(-UX>82z_`tp7BIDjAjVnw0O5JI>7CIBT|GC$Wi z;rtiT7|Xqes%?-<=l33*0qvBmQq(Ttx9I_|t=%)oO0`o|QD=X+YvDJfR|+qA=i@f| z2-}DMn?G&dP1Ljbtj(+Ri5MrB==hP zY9Q8~SL6;}2{59}e!uw@9A&254L#2$72!yStvkN0`aD)7puhSC(HAF_QL7YqBL&Rws}}@MQX;1)7#-U zVG6R7zmr3=8)?uViZnsW@ypNX{>-FiCw@%2PZ^VZt{F#TsVwSm+WeRD3296G7T~O^ z3?1t)lBT7V^}n@fx={%y{A+sga%Z;YHj&p#+)=^_AJznfY~4HdoJJngFXYtU)>>vy z5^f#2QR|v!)grn*@O`z#59{b8hZmQ+19!t?@VMjO8SUhM9Fb6A1(gcu;&zTzJu~Pq zP6_7BDwCgvz?s!}0S`jrtw@Nf^Y9BcS$_qX&3AXcS@~_GgcgSVDOB2lwxx= zmf5p0_;aFjC%5GgSR*+bAPYoDvJ6C_LzZe>9JhbI)bVF9ZK#{s`S^0R^*T$^H6Ig; zX@)14gfjAvIh(YCHNf=Ttk^;~$4eY2bPvy_dKwqnQexz*0+)>n@BG*RcAM~$Y;Xfp zBo+dYk5`L?!zTvO6IYEP$uB`Cb1?>VALX|BE>5D$lntv{pOFlnb~_x%pnFv(nPr-c zbUG@Qbj`mC%Ukzli`s0SMqLwj3mv}ip++B^R*gS+Pp8{-afjG{aQBOgdnE_Y<9Z>4 zWZXXL_lWB8hcwvzo`*f$tAUH&mt7eJywdyi&n^+R}BKO7rSnmti)d z=||3(YI5Fo8TYXaHSC)eD7Ilou4p4$gxkRyUzGm#_98pV(k1N9^3#?9Jnf1Wyq8iU zi^eVD_(nVwa0#qmMk<<=v(z}o&rYOx%ZBLW&USy^n6d?V>t`R1G-oDDF)|g_x0!jp z?l1cF?@^I!gbG!R1xhb;VGm(eVaP}n*WGAJ!;k71(~))RJ2-$A){&tF5O;m%*|`7X z?f+w=QBN+Ry_!48bWrFeyQng`6Wy39^vi4y_aGPoc?Zb9U=5x?6{kxFN*`B|v z?>a$(PTq7?Z~e#M3+wJ7@fi;~!c~ zlGat#d517MO-26%Wd#8^t|=i&7g;Ueyy88K@8hHcEd$1i;VRhXs4_<)|EzI zDRrk# zo{5>spd>V-_nnT@Mj{2x5z&JEUjG4O19*;3xNG*71|pV4Vu{AC@e;O8`tA}umem>s zF@;kXPuE6IJB!(B;_^-8DI#tXyyW6m+nb%$XOoeY^h9%$;_B&dM0zk%#9Gh15=dk% z^i>{Eb#u?0qlLNUD=NuTPN~KS!#m{*Aa6WYqPqLtFdj7=^UAr%+NHpAw&h?R(9VO= z?!cPxiD7eyJ91V0x?rEU5l;5JQl0&=re;=~w5&I)8>Gf^QAE8_3H8Pm&Gv4kAWuux zvOSW07TZpcuj=rtk2ftG;iSwtk!$=9U?y-w_Y%msmW}nNbh>D!vo!7~!u-`m#?Uh4 z%wRa(z(U<4ZaNVsWN)+cWU)4AcNiT@d!=v2i!!?nd9mG+_qa>v55clqH;`&(JJ&05 zbs6I~;6%!~|4V2tjh$X#DyJ;+Vw>~3Ba70@hMc$m-HcgeYeqXgBJ;}gS4ZHix6NGf z!JjT^h8X%bbM3D8FD^yfri|sfPNx*9nAojm0^=Y!9syT@+J0X#;?C9Eq%xdDoiSq= zo3!apEw|JD@d*iP+ddU0R^|S4^dO;l%1hqq{;R6&}9~!s$`J*OGpJuX0>vJBHJpb+a--`G+=V zgsTi*_PJR}QMS7nXy(BBfEhPSzhzUq$kaUDI_k|GU!_-Qv13f@f@DNG1p|_%wk<=A zMGm()_y}Pv4)wq0rGYmb#w}W7IQgSsiGFAm-T6e4k20t1FV$u2zbflV;C^W1(_`85 z0&NPrAy&*oE2#IK?uFwztK{TgP}GaeUoKpy*}FFaQYjpcXZ+uRzK8N`TMcW>m|2PU z;oiqBJ6YoER;ajAUH#OrIGaCc9?ylq!CCX99d)7=k?m0lVILYDz4-Ju{nKAxpoG`L z+Xtk_y?Z-xEvqqR%MYP;>RltjZS>jvXWOX~(S9m_?7}#iU{Fs~esiDjEz*eTv~Y`o z@%18yQ$}Ep+DIvD(}pg`WA~<*D>XX9qyKlnoSi^E>p6xItJtbYYS^Yq$M{oHImX($ z?s#M@MU^dCZ}Gt>hLvk3`laZeaF~O+n;EjT(ZGCzJUj*R%w+I;B07{Y5-Z4oHoTco9F84lP`>k_M_iLS55Tm<$MgW zNKN>=*W1@^V4;)~FnRP3X=p^~E9d(NET1 zQPxq=w7cp@Z@%G$;G3~deayb4*4(fY0Vva>yb^8-#@Y7fN%eEYf1^3a!~Tut)EY%e z5UcGpmvizvm8-7m4AhNEaH{VNwN(yLr%Rq#&s6%+$gJp)%_q39L6oIftsIeCMd1gv zt8qsuvQg$Zz%OJFCe+d+s;D%8LsB|pr!{Wy$u_T=SA|>c9Tk*SNrUqeu7PPFZ6>yf z%urA#v&U_K5Hw!s16lrq^BZnx!V26*NEgK%38gfifUEqa?H|| zD}DR7R0g$sat>2(-0Dam8!iPM=p99_IhJXX~wRL^pVjo)A$td$KVPWJiH$er(${MMjprAhGrRbuD7};BdEY9WC zJDh5-L~SfI^~%3{GsrO*TAxqM!eg>nL!tGwx)k;B>NIQ5nXZdQ#Ii#x_$s^K@+xU-9_?0ruM_947vO%P)r_Q_Bn`=g{ zWlRN|C3JF9HwqT-pXoV{ePs3?d7q5BsVOVHJw?0afn$89O)s<>GjP=>8YIAOVmD}8j>!7nmFJU$EuzTS^qvt7n0k|6>Hzha`Je=C4&YPx|%hg?6Xz( zPvHH=;XrEIQXGSnJC;NxP#186`*(P0&|}Wxj2u;#kZ9m~3a~$Cs~+nH2y`Lwz#uHG zj^0}tV)nwuV4s?&F0bbf)jlpa#s4KHH_91xS?7I86*`;|FsuR;x6wlijdbOr#gQ)w zSiZ@$`lC58Ebwih{77V|oM$s(eY|C2tbyyjcaquf4u27!u26aTn90?tFE@pB^_5KU z*u0`h86-jT-Lk;s=p`M64i`i)%e91@O4dx2&t-7+d;1I#NY02{$NvVHulLlH6@GQ% zDvCQiu)A3QhHO!c!q5poCY`IcK_E%Q)+Sg5^Tl8^6C`cPYsKa{V^d#sT0g z>=_G}rr{R0>r=AiR-O?&0NpbnqFbopPK72QZOS$dwtaZ;B_*Q>kQwst9V<7mE0zo;-+ zSBwY;OiBZ*{v1@<3H}ud=+h7bl+H#=XqJ%T&4^i#F{Ih?+hz6WA2wVL30Be1EV#B65%NVSB`_@r(2NcP7 z&d#wYU!nK0aCIv{=nVNk7y7SP8U7b`o#yoyLdF7O6~+1N?b)#H{={L3@voa^T8E%7 zRH5m=FC^nsFVq0<1H*qq{FJiENwozwNHm$kazA;~Mq`L{`{f94H5{x?s*nyVNQf@& z>{F1Tn7VPhcR4pZ#@|7(#e_TE?$CjbkPd{0j0ASky<39twlrfi;~h{Sj=|1ZH@0aH zWwb&>{)y-4%nl!AaQ@a4+@ZdsC2lnx4%g*fKF5muFEsPlfE>-jgBjSohFjLzhAMXM zHiT*?%K1Uua9BGKWl1iYJ}Czr8y_V5J(Hxy6Q!Ww&fBb=RQ1fq)N=c~s!5FBmWZOc zqR<$R$9<@1`dVchI3GWd)|dEfCEsUflwDrRM(x7AzGrMMw1sK$F1BIz6Lfo|r>?n^ ziC0=tpY3)|QM@SSi*KoIR8gJVioLz_mE3Ow8($Ltf^ZnX(m(vZqYP|~kTkbt?B*db z!rUOG^sd(A8+OxxDcWty*5Oe!D43FTkb2;*9EM0x#K!*-*%tX!nC@PKXEU?x*5p-_ zbI54ftE=raAU*n=i}Nj!p0D3_)Na6(jOvf}4NnIlKbmD5--_~dt3W|JsDmpni_$sE zho7QJqJJjW-h?kZ^d`H@RmwMgf*wX^W#K6u8KM80h)U^Vuz2Gb6)Pg+?<{nifQBxX zBYkyLHAK_fBXf@IdlI|_=c1U|l%(w#^EwFf8y_Eu=@v3=$SsOF5|fa$FgLI#`LpsE z5!wv?U7SQm&fzIfwT<-)*zrHD0>@@1=?dWBm86`QK(m2QghBPYit%@wy*= zFr$SPiQ2xRdSOet+)h&Tji|V1|&mpvlTQ41I^uQ0!Q5t!qk1gY=d?# zHn2xzJ#vgp8A(BLFInMe`y@}p*Ruwc4!Y z;&rk!P213iE?2aA#;}vc29o0$&D=@4ZkKT#F6K%$ZjtUHg(5DF?_0Z_CqjUb!bo&l z3?0>p$E~BAB*<@H^^9nd)=eBPk&aP%9D``bwLeR=8Vtswbpk_EH&QJY0U%q4O|Fmd z?5?$bEH>#ng?V{eQmPKm$+nIl@P2-MJ1IG4C?4xR+3mY0QuzL}68fUZml@SG)8xqSLncCr zUE9E{l)*r!4C=?xLLw{mP@9Y8^#GY4LZ=dGaTG?5qDt}V;)GOBb(sAz6qc1?Irz}r z!rCONays*F0)d@92@g{Jy(jCcWVIBmzT7>ullzxMLSEujA# zB7hL5W1eo|JM!MMr>+R0bY#M@dWMYGmaZmwuQVpQWCI}4H=MJ$i2`#zZSwPN~l)wobN zOYKN&pu~IihL~pL>AIrrvDhB3#HaS_4amfXiU?YWd@*4CyS<&r`g8l#Lh~_|$_;`% z52XH?^))O1RH^Wg7AgF|k!kxVlu}>G>gVKlma}y!V-rF9G?o)B4CCL_v1l1M-a>4H z6ZQWjzdQztajG`$gF}SCsB{Y**2QNiRaJ{Xb5(*Y1A~uo!yPO)bKhbKAEp=hMM<1OMJ(GvcW{y|6@$lH^nt>5LVB%J9@;XDSW#jK!S$goS058So?*_2~xkhUO$wq{Q9#VvQT zH6-1-7#^~Pvlu}J;kn9465g{d_h)k=(a~2^mmJ_sCmng+mK-{>uEfxmw%@d~%)SM(gYb z{R|I$)^7G7J;ovaq#r!Q=NoIM0~wM4>eTDy(0;FHU%Ex?>9N)Bv|tAA+xO9TYCPZ| z#K>4x>Wr2_Eo(odykEt^&*6)yn{sZldx$Sw=8~y?;-b(DyainJn!16fX|)0vC@xQ} z`nMxQNC&qR(a+MyqI|8Y8!^`-KEWp|IM#oiGg#ihwvasGVG}t*@UEqQsYbh!W!c!z?wr zwfBGeo$vy+;Rv|LLbsjp+#;83Z6&Oi=Wq?H1(@3OEO2}9-GX@+Kw-fP6&Gb$r3*yK zJF=GxAyp@_h=Qavux_?&%v8}Y= zbQa(qMC~8s!T1Wm^EXX_WRmG@TDqgWdT=RVeas#rqoTE8?yWy_-f86@Sx;7$MaNGl z#bNI|I?`VCpYu-3vIP}xhjTqeeI(OLE{vy#T&d)^=SWX59H`jX<2&_|Ya~=sTH5Gc zDsG*YL?j+Q5SUW&mAFVx=j?^?d9VtJi?^G7s}m_t&L*d&jNWC$t2f$bupU(_z^~lv zXl4>|_TTt$n@-#5jbml`_ED3BqD4Nz=50F4@H|5CD(lg*t@EswZ~Scq=H0LlU9^+9wWO}v zI<^3jYYR;S+PJ;!lH?ALJJy+-`M=l+p_07TFK?j)JWwKsna25TXPs?{CC>c_2I!vk z2vo=0;6}C}pFihV*0RVxN;hS|;fVZjY!(inaFV6NdmzbG9I3H30E@ZnQNnZ2!nNk) z9W5YBXBN6~XZ^*WZpYM$9oxVY`en$zI{! zXf_+Jf%kn!qE6N78W1*mpvYxksnlBNU^vw=V!3Nhq#-WlP4U2Kccf)U z?`TVY{}2kV$@?MudWEJ4$-;$}mmaF4Em^OA?81hApaki*Z>#Y1vQFPHz4r;H4Y zC3$Z?pqDg=^mg>RXj%sSwF1{>%3meJgtG=Cx3Kc9AoH&EG6A)_s*R5~rWHRUE6`WR zZLYxCI}F;Y z5*qWn-)+u(|oJmf>ykL`luA6;GG2@wovl6ff*qV@ve%%R zbtJruM)L()e*cxw*V=nIsows7?n`M_`sEA2yc^)ulDZZPfW zjL2oqE*X*sITvb|m1qy15^7hf{;HhjuUR{zk8ntvKp3ap{>>=8!{L{|HchNPsP^BV zL0`En7OpE05REGOWbU4T4N?0t*naGH*-)leHjI(q^)KvwoR19Mc&MRW3`-s!M!VUX zQl?|8Ixo(f$rxyuMjd?9cu5hAXSrWi0yGWhpH)M<;AJx$>XU<8fse70|1uHMRTW$V z?kl=v_lv?Ov}QM75?Ccs{WY8$rkRX7_MX2U0gIyG7ZGzoG?I7Yfx{rs_j^%vfq8Ig zd>U{DuQQs85&YU_H)j!W#ih zhdQ%Xq<1cm;+0n+)LR=>ra1EluHJ|<-YoLQf}rFg3xUV5v`oUn>f*@Xs6_l`_*C*V z5BTv_w^>v@FYo0P4T}|ZyTcIN)w;kMOUreVgVBM-J#j-~hT zxR85BDCdt>Ou7}S9VxGR<*fEtUr|Zwx=yY&r7+ey#_l~az|p+T8rYzuctxK+f48|8 z+HgnVg2q$weLVYqn#QoMWb83k!*GqG;(OUY_%gV%C!+hZdRoHhmKZL5x6FVo*54wl z@mF}l-qU|IKS*?^SkA7lw!P^f?>&`@Ql1RY0hHCComH?bp}tko_K(;(<;lEt_`WKf zbMauNSg`~YQ{+_JMrh#TLJ7V?5mMlu@E9#>p2@Jk8vnI0BPCW8eEzB?L}0b^kj{-C zHqWL>IyOdjho#KOw=4c3C8^Q-i$VMoO?s_pJwbbcWEX*5_ z5Lc%jSz=Y{w_|)+%n7%-=);_-{BAd0T#1s7a#%5=P%aS{i*0h$^bkTESLpkcmFa_o z_a@<<0KYSPJ?{Wwzb1flATzpdhEBt|+{@kTeh$oLlA`IjeR@F^^8EYD&z=6&0k8B2 zBzlE*QFtGareSc+gN-TDgPWGAF#Z)UZ8Dd23MpK9iAP+`S%92#ie^upV`2I0_j6P| z>0We+*Sv zM@?u*zcTBN#tUBnd>A54YMfqI{l4>2&kKN*h0tl%A383pM|B-+KThiE8OF?VMS$Qx zD%3(GWRHptEUL0h9UAF{cm2|g_U6BD2vj)jOfC-HuJ00v$k-X}O{V{W?kctL_ISKH ze?WLx02Hj$Z88S3XA2Iq66ibV?_XpBtXzzeyR1@ zm*d6f_*6R?7W=~1Yn*P`+K2_M1`n=FA$9)hua`t^rP6T(AT2jt`mE7H)0@vPsWviX z{Os|R?28HNEAn#Q5BUBeqH^)alp;b-T+q&c4Cg)7(g=7p$x0z^O^H|1GGuQOL}XpI zY9_S@8qQw+tUvDuT0|8rXPu5)RpDo7X}cv9#93gTGFneMY6E8%O&jr(Dkk#kMAmX4 z_dk(jfbjp^Qw7n;0A)mH019}Gtrw=(7j*PWQve*LwLHkr6cu<~E`_QUrvRtg%#vuf z?7Q~5BXajaxUSUxPhWxGHWOuanPi`GacvJuh984)x2c$$E*NYoaHkmyZ#}y)8t`+N zcQuCVf2;VfpnfEo16*n|_nB$Q=rZBwLw+N(&onlJUJ(2T8nqLJB+25(shK^Vw1E=v zyHd-EEv+BtU_7v6+UZhdV!NboOc3G7qm%^HQ8&s@0c4K!8WzqGn2}T#78Jaf;I%6T zO8`GBm00^`;Obe?%Ms;6&Qnd8yWXo!^%F5$^umi%nv&ho^((7D37?$ZTp4q;#(_e5K9x@Tzm-I$67z5>GXf(oC;E<7Uy#1GX?4 zQYlx<%L1LMqhwir**c$eGN@KTp6+Y!%I=?#mdfk<4=Aq(dq&ghTT`xhn zSU9>L@nZ&0HFhgkv8keG0d|Va7g}hAwhwnt!-3Ce;MWJ=Jx88B=gA$vdioJS(AV=1 zY^3$tfSR!O#~2+xQ;{VeEqWLe*c?wDaXVuJU6~XV)kGz2~nD$ zD?w~ZK8}SxYP_Yrj({smhG*6nsY$(tOPG5Ti~gTHY5_#NXb*Dc53Bt?AcUPbK5u(* z(t5ydruRrrdz62(;nZAOEO1B_mpLi<-PK~tu;S#lYlJ#0BM-lR!O`eKT_?5If)!_A z-}mU%Q}(AIAB@Omj2hjC(OH{Vst)f~p^4a>W1l9nHaJKBoBN)V-@L`%n06Et7T&<{ zS2WaF$%(c&?!XmyMS<*8Lg(t5sXT?%V)BWf&x=|Cr zsVCb8-9CFbGG&`!dkgA*=UPB>A)vR%3fSC|Xc1NG_+=$y()^&iH>w+XT!{#4NicWj zoyZo<;Rk8~yy+e83%8Xq;>MtWwVx0;vm52C!Q=vMw%52;F%;%H=PYEmkf_k^5Elj# zjM?S`8-#WrFzF2a4Rv)!B_@rc5xrzOVyI7TbVDuv8~zyYq0#lneE97illITO7h{Nf z@vmvUxq7cjKU_yqX&>Nse)~byv^mbe+mF$TNRvW3Hz$-=kaUPxu$yPY)YLux@R?nN zy>)J5efU@&n}@8s_hjCtIYbj-YuKh0TEU44Vh-=QqKoN_N>{Z5&VE2CzvL@ub>M;~ z?5@bzZWQj5CNmati;&3LUvPd*iRnvroT6@OAO&~&!0puK3tdLteTJQCls$*2e1HbI zl&2lOLc141v+9w|N)hD%54mJr`1rE)hqxSO_AS^{Tr;d6M5Th~(M`6^>nK zL(68`OBf+M_$TD$b&0M>8SoN!hk!O52yu0~^i5Ykcr|bawX1b_F!ky@P%%)`Blt}E zFbf-Er`$^fDF0C?T<{T=nqnMoHLG){qS0gN(@Y@o`N=I?OH##KQX$_H&^aV+H^{(E zahc)e)#oB_$TJVHf+tKbEc^vL;?e)8lwa7GwRZY;MWH#S0K~kXH|$N~9p)}u3uPfq z)H2`daE3_RT}0i4bUAEAwxh`8$uK^p#L@@TpjvO<+MU8 z%9^iDO*d(DZM0l+x~zhntvwPS!#A0}kQ;m-vGgQ$z;I|f%VUZ;WP9N;K^i%jY#RR# zTevg2OjUmMmPo5E*yXcAETrXdpE%xJ3;x}1QE)o~j;nKxPb$wm&qu1XNY ztTi2Pd$HDK0IzVJC>`)J#n4U@{>%k?kIN`xkLj$PCsjKVj%8Zw1EX`~OfGFk`L}`7|ke#l&1kqP2p@y!JYtk3a3i?S+ z0VtBo3vR0@n%uGkR3sEaNVo~|CH9vG)jKbV$|d~qIH;N1i0t9_j~ln2fE(Orqhbk8 zDS%+x&B5De)Phu6H(JPAM3M~MkhOH|BVj(oy$!uULt2+hgACqgsO&g9AvtE1s`9>P z+ze$?P75`DR83xm44=IfgamB4HG%V{rGw}aa`a6W;oS5IB2_xp(Ymlaqe@%viP+`X-skA+z_Si6o5)Vwstmsk19!P+q#?^ZdtTsI4bu$ zZ;28C%O~S%s!74e$+P*^8k$(YX2nl%`q>}Yz~{e~e$rhhHnga&Mx;!24)t4~bRJoW zi$KPO7D}*}ZCqumAP9uEQqwFt-K)`(hAVCapp?NyCmEp9q>%tT7r0K`exdvQ=_CpK z_JVU;6Gg!6n!seKLSGFLsRnqt+V65^d@wujVkg5Z>^1ZVfwfUnow8E^c^Tn1k2g|) zI`hDa$(uYc$RX+6pq3Jrc*s*=!mIhdYmJV-s4&;$GBWq`vxR+t7W(&yy) z@EL~lrA%&lyFpvgzKcyGH7~$udTQ?5I#nY=s1DPo_ablltZ`8hx5X`Rmt%UEaHGl6 zbWVQzIA>ba8mn55Qy$oS%vrU<1f#SKc%7%>S@%@iA!3D(Fg2<1!*0D~`|-qYkTj8)fc-IfpfGhkWNe*JQfctAA; zYEoPEyBKxe;? za33w`QOh4j{;4C!aL8y^b?toYQ2AXTwLi(M8JOu|@apS(#JAl>O1#vO{OV1Lr#~|P zGU2{Q%s~`GK=_Expmag`>4_ML;DC&n(>ig(l7lL?^y`;Y@m_98cs&u{25o@8rb6BZ zef{sZ|NroBt^f0b|K|X&JG}iqAXzh%|8DCN^9x`|{S=0zOFznlXF8zp#Xk!zv8yUq ztCj_RBG%wnB`oSi4f)geemQKx1et5_5C=%vS2ky^&s%=6zjja{ZN_<4^u8l92#)Nq z>aXXDVF&hS1?`SsYgz7LGtTo*To{J)jB~WeCh*7gQ|Sl#caO8}XwQVy4MjHc>qgNY zn^VJ!3E`j_Gnh%fPc{Rj4PI4wDrWOs?NZ`=_1YH*`#I*aBe9G|MOR#1FS1aqQt>XH zw=b`_xgznmprDn&g#Lr=K+(m?&E5jMY2m3-)JOLu*Z6vS2mMU8(%P z&wB&Uyaue~(^0aU{YpOw5Xhpvk0RvRkIXWYqQZn3ZAhBR`}MGOSGw zQBP*-efaBdgOUU2@F^5((l@8qi@y7%qcdiX%ACxqH^xBU1sL^t;v|f?Tk3C6+q2AS5S{#N4Ijq&+g$5~){_{uYJBpMFi;7-y~dJfRbMXsb(=y= zP52WidghtHA2(|7Er1P5VwuOiV5hT*C znY1UyJ@FQ97qs7@>}u$=*x2E-fUcKqXKy0tt-styELx?hCp*75v%^$U7APQViG$a@ z!ezslHET8LIE?1zx_y!cwyHzr7cMiP97~zMZmVlRJ zKIQCDTyVsV+`xmLvk#Yf)$3%?O2mj~HUMS^ybXs+kzv@v?O|=IY0gq{ z(+VDFvn)02IWdQ9TI=`i-S2j)>rZV?wjxhHSp5d(h3IKl3o8T_XovX>q6jh{orcUB zaR00)itKF4Xq;jvV3`Kot}5hm%#zvTsCb4CQ&qL+7_6=q3_kxP4|`}Y9l7Xtfg=hF zj^&Kbe2EjrlY$V5i`vd*Yacw8ybs-REJ<)I#tcV#;$ChYq}y`BOgc4C#ZXbNc@JFa z+O(s1G2QpDi-;`Q0zuULBt50W+pP7j5J~SP1414zQRdv@*AMA|oH0<LCDra0kat;35F--p6rG~L_z^dB36_8PWnvDJG%c5_+BI7BRYS{XbvolaE@R5@Xf zQ$7j6jTzY)IRlx&-<4Bh>a(Yn`k?o;+?o!*j_T6T+oL<_UND#c5shSEv3J^VSlS7g z{_>yI5>OS(pD+nB4T}rBmwMUtG-9``M0zt7dggA2xMtXVesR&gyqqx;#baaC?>aw? z>PYV~d_{wifMVsyK)%(~ECkQP3iSOvIn&sfKukba$=C#j%6Zt+?jDY}Tv_6_{6~U{fWELJH>>U zEB=S6(aH3+EopsuW~Vajuv^tM3PZE$HQhmk_2tXdla3WMZ)puZ!NWMQ#IbseLk$x3 zeIYWnA}M-_2439HG|;>ATf=*)KIQeb@889LRSZAe#>hNz&;74lu*?n>>TUpNI$>Py?}uGCXOKR{!uU9h7&_%$B2x^e`G^m5Dy zQ9uQ4f^Y%T>NA1+-IxK~W1^ByC|o2zURQ2aK1MEmC^*gz@sRgR z!{z&=sk3D2!t=I`cU5|LpP63sjkoZ_byk+6`wh8ASZIf}9_tV$Cub;%7;u_lQeH0t zh*w$5UcYF4p6NM7>RVFkxFp6OmW+#DWP(ew(Wk0TP1@s;xn)Tj^|sJCO;^d&3j;kN zr<-un3ejn-7>3SW#bVq3o2Wak9AX6FYNyolp`?$6YZaA|2M7F^h4>`9O`|L)EWH?H zwB5aGWWh@)Xg|&GV#ctW{BN4=z}fsy&4YlNq~&5e=hh_ZW1CBls8crIT=>ExBVdrE z=oPxYiF;Q%x1;p#KT{UgFcu0Ac$|sn(K#D70(VH}YIx8ZOLTNJ8yg!HTV?E%l@gQZ z>EEwWhsQ@wBfPO;msIHZL_?-X?bkK@8;alT(E_B13*F%o)381Zp&?ru<{gS6!5g+j z`|4)U)#l;MuCPgS*UP!1EPJtzT!% zN-pNKOpAO$?h$5@qtA-0rkByLL!|bxz#a|ln%#9_yu6Ht)KKI{ZWX|nUJ1No69w?E zNdP!DiJp)sh?z5lD->Qzvm^KmSKMapx!W@b(dtVMnHih?ea@uepcG;wq6eW{{!}Yr zjW(|i>OQ}n3J@S}SF|>GXZUEhYtE5yJ3O!wmWp|p`+90!D(~1dzdN)oWWzz}lz2%X zLL+d}>J)d>_LgNKV|Y9?_j-)0L= zL4;_THtKYrXx{wR?Be;_t;VY2Nn3WBu6tQf6PozLHI*)FABa&`I_e!!U(z1M5qW%S zspVNo9KoDoq;ACwnkvuS1d?dD>swr~bv7#1EZ1G)u>l`tI*dmFl|8*3euK;@jq(|e za>;r5)}z3yRDA7(S^CO`J!9)mAS%Xm>rq3pdqz_oq%XQ)OS|E9FXw)^!CL;&hGnel zuz|6O%ueBtjC~LrkTw$|10u#ueX`6AvbBx^P5z3F4<*f(iQ~U)WH=^~=f@Ik-8Ow8 zB*@VblJy?=W|3z2yRkT4Fg&Z!HxiuzSabo+Q&>^3qg|Fu@s=_j{7_@rn(oFr`+#vP zVx?~PjP3)N%qf)EaYBHC3aWckMuFG0JSkNK(=Q`5^R#Ymdm9bjg2}uT(q053Ro-e7 zA|#IvYAWaT2Xd_pvz=HSvhRDW`e{7MluWlT{>TWg*}qd+I1&V1Waowwo(f*YtQtgR z8yHV(l0?tE{)pY8&fhS2U$6b;lOMYqX6^o`QxST=2%tM;VxBa(rvsrwvjR0hq*3Q( zO6-Z6JrW3C2x03EQ#ffm`>YfVTlOol0R_uRU%y+_iaR2_DUMcIZ$k_3@tx4CixZOA zuDonb&{@$KF`VWe(a?Zw8dBI>^Rjcn)!*m*V^R_eUtkdVrEkNkvP-?Guo&r@CBEvm;; zBTgiuxUAgRPJ1S!hv|$d7!K^cU${^L@&XQg618riUKvlJ!IM3v1mG;5yjz>vqiasc zL$Qp}ngXukvaDD92|%U)kX!CNL8Cu`&d#&6esvBTbc1vP#-{bbRl-Ez?;6nqle(`$ z^%bp(&-*mF;zAY5bTW6k;ww)N&z@CFo)=95n$>X(7*W4pK6-Tp(nzksqQEF?v24J1 zZ@ck&KKUE0UPoL+B4R<5MGfo?WU&A`X|fIR+p4`B0NJeq zq%3@qs$9_%=2AxtXCcIBKI4vlLDBO}Ea;+PLqY~`>(;I@PidBVVpmV&FPm(Z(2%cB zSXHi<;vlbW3E_2iJ&>olaem#W>W)qDptV;RqzHOnIEb|INghSGRa+_Jr+LH)RpM5M2U-;d3XS7H z@h#VhXL`a@u1UOJ{^I@k`Zh~OQU{d!h*NVz=Zm3SC)vMa#{U-L-TUJ$BU=_fvphJq z+eNRI1D}vzuA%CgPc9-y_hV4=@fux`q*66-_u3V;f=gUkG*D0L0;&bxH+q3J`w5#1 zoL(4QH&{3W`UQ#6P@e+D&T?M=@TwKA-&+vSQZgRogOHJ<(}Z_fpN>Ue zgy9Kn&7nz5B2qbQeZ1WUjtU&Hgg}_0057U~w3qTi-xq6QZJ!9c!?V@UZ+1p4sbcY9 z@swL7kaHRYN}oFZK=KNgUv@_?a;6H{Bc)(I9-K*cYxvWv6>JOp-gumXPZMK3$>@0v zwAmD<`g3qyc1(7q`@^qM%buXk3__%73w1H){*osGgd&%J_omH%ocK14JOI_J9AAGN z?tW}TD&&BjT12Y9=nQLIC{*U&0f{PJOgW@|J-!I;jhY=Lo=q(H$E9}-`Uo!%1E+}= z{Z48D&;HkHH#=|3!8v_1)t1Hq)NdetD|6l)x-%E78<;BFGf1al}xTqet@^iYo z<4&ri2{5E#w|9(z?VME5;UCy!i7!mgokmO5@vDT1Q?kcVjV58!*K&vJY+QUz`Uqt0 z`Z?U&oWO8`gq-RiPm-71Z-uUm_(tOZ8L~@L9zOlE{sYr{uh;Wb^r4%JO$6rY;pz`* zP5nm!_YYsLUoL(b+4k#$X@i0QRZH2s*1O?tIB}OfZRB2L|MBP;eGGQv;+k4{I0WP2 zJqf@KEU&9V<%Pb`7=V9ZU))tEaea;X9uf4{I1u>;M*(hm7GFq@t50N9Do_xyq)6sx zpsoMst3reOs#t`+{m)k=J67GW=W#S(i3Ixj9}B1bik#bi=Q<|{Ll#y-{0WZjy0u&w zahKqEN-_qN3|{tV&SnkTC%;Do(>&1o5R4B;GdQ26|D!wfw?>oyWjCEgj6{7;Gx9?( zBIv7V47CPV6Ht)=OCeB+69?v==lT=w4~w`~G18V%o8M7H{=eOw37PxT;LJNk*dK(u zPltEt`GvWI!2pQ^Q`M9ojV~6-Oly7C;D-1$C5cI`#YS)D=4nZ?i=!#ONrT3O*<*zW zrsSB8nQzwwmOPc(kg_BOJ|kcG@GW3@#RisDxg&MQSHGX!D%Xwu#-j5v^HDdRlMe#; z&)l}!jXvCXre@Q%o}XPWSZZN)i(rqT12GX=|RHCW7o~BFH#9I#>DqsCCQ!)R6R@{ ztLzlnC+mE~k@Z{@^IkQ$u`Ou_vsIdZUMSCe*brCAVKtb+osFy|dC};RbL^$QH!=BQv=8cY zNuMK7Vp_7v;pr>^Z8q%93F)QfEctCZ+TTFkanI4Z%z5a(?pVWU-O|O;~(2IkC&$# zvX%rr2(!Dr2S4%N&^OD)!xG1rXxqo|DcrEXV3(dNt`3Mp zvvm?xWrkhW-S`N*XYr;O&H1tINUH5O-MeBh!?u<{3$n2$+E%}9%=x!@CCNk6MtH$% zBcgfDuW)1^WucTux7qpj@hMpG(myYyWSeF|s;fO$X;H>>?O3BFmtOB z5EA{;G*3LO8+^FPCYe5X_M2`fi@C02PUn+BrPog@C6n={o2j#^{fVsJJ9LamHR9Ny z4*m2MFW9_4jJ@l_>6gQzppJ@iOk0u^NbX#1u^|MQbjsZv*H@)Q0E z1s_=LbC&*+A+bs4nug8#qTLBC=RG1(;j?c2U{K1X8+kqc3ygm7rOOGdT~Ww1%f9=#?~)5B}RB+`}rA zM|@|fsd^`BQ2^F8v=zn++SN;uB$4vZVrt6v=iSCHHKU)Q{&!ZIeMPwL?^mD7lAT>^xmku|$2$pF9Vz5nlUL!oZELaTGVNtzxvn>B+kH1C zP!0$X2tl2j=D!xg%CTQMSUTB%qURm)SooX?>*KQ8N|^J6zpG#2?<#G3d7d+mY%NkP zo@dRW4^?ha`qOUCYtU%3$eHA$(k^WpB!)iKeqk)$EHs`+TVaRVbBuH~u6B`{L)OT| z+A`#r+_Fh$VSD@ZuLp9%RQ$QJplJi{gNAP%-m8DhKLVo3-@Wmek!hgv zm@s22-||o5q$XeWiL~x(|Iy3OaRkPV-`nY0z@etO@g|0ljX^;hG`m^RdAYfO+fFk2 zRlsJDUf0*V*94R}{2g0=Os6LSSZMv4R}{wSEytb!ns>`Y=;4uoq?L@DEcK0Fi zVfw#$P{lzcQR1y}=PF-x;5+OT-~V^|k!K0lYASmG;pzHV*Qkr(sU&VC;0Sb@1d#5K zwGwza|06g#PaHzXN#&_Wx?}frY853q=!vGh)@*<)V4||L)S4WuQQeN`bB%wLK)7O8 zHiQP~RJW|-QrwMm@uM}gNsj)oZC2^}eDc`c}lGOVLp;czx zn#MwUv=-S#9(s`aV~X}rl1&#iOE6pWHY}iz2?}hH zlZ{E>>y1zE{aoo>eR;wFXxUCTnctnA`i+UiY%1D_h0KP8OzC>GKNE_y(2!^X@;I+r-65jEc z+_C1LJGpLUr?h<-6_li1v0(QXU8w`Y$DL5emmy*E<8UmG{$X3NW_z=yK|t%gCwtQh z7G8VB1;6&E1PI?xJ*=CB4L7}3xagNEm@W^zcsk|dj?poiyqP~qmIG?Kd2Cmjqttel zECn|NC{Qt?ysZfT=&KQ2p;^MAf{zU18&p>$Y(5wKqJ>@>=}LJqjRB|Z}$i7hL>hd7vF$?3Lsrq-BoV%#N z+_c!VfX=Cq{*lqlBm_0rQNYLdN5jYL-}vn0m2OAWj{VqHY}r89hLg& zmqQ4?Zd6~V=;h6t3T|9pNN4bv2TgZcB}m(395u_lWXv{Pto1XDJ^FG4lFA1cMo1I# z+4H+#p{K*>Col6QEsOM&7yTM%ph9#zBXj(|PsQYJx%|-(GRK&27YMi@BVcxYDosLu>j;(D!G%fYWsW zdqn#U*?r3Ze!_a&n_%S0Zd=793pSd-ihFsN$f@^^P0=T&q!7ACKq!$OGN%XOxT-F zF9INUuF|c5$1oI?1)1{ZqaM-e>XM76gO;V3P%n?7TZ>se*;McicU|vtoWpFmojkd; zWNjK(Vz~n#enA@l@19@<41kHSA|#B(33t7 z`XLT(&m*KHT&uUkfO6;a1Zo~x3PhNrSo^9~G|CV=qA^^P-nqS#rZ7@H<76j)nOIvs z!!&W&z&4ZBFz>>l6}{w_YoGXI>OFcfs@u9&xU{=8;l$iQKPu4qX9@rA=H4B@QiFd? z=MM=N*ODurf#E^_!I9@t^Zswe;gA6FW#+kxsC2KFWCoTqdOx-8UKPyiLc3K^+q!+V ze?PuC$U%bWQNJjLZF%)YfpkyLbW4uO?>9MBkK1~`+(pi1w}+?CJCC$oXL#n&vPqo6 z|B)jR_?{m4COkRqy~?trye#Hjd4rMRMxQE)O{-yory=^Z{iGuv;cPZbj&ru;d7W{o z=s&DaE=7fyqX9v?X?!`QEs4Tzm|{^LTa(DvERJd@N^rD&3WfA4lz1O;dnY!tyf9QI zIxllt@6HRXV4mO5v^u9*`!k0Z0k35FFL?XcUfHHp>ii*uxAYZIC+lpGX7N$&4H78T zO!70@eqtL-6-Az&r+WyB59AfY^;P!cWK#WC%+ikEYlb^(D(=(|drv)*wbNs;XR!B$ zR)_2~S1-{syCzth#|zH56(;w`CQ)*U+dgE&Y{coT??uIHkx6=-+!_Ib=x#(F=Xig|uSsY`TPy~2!k!tSm;SAOpcN7@<5*qn={ zAWkHGgGDDe%{C!vn8Fo&1?%n>jC(V0dpTqOG=PCd%KNj$ucSyiUP7z<%*Cy`+5F;k zHwTA``W{^KU~tRpbjt4=@+uf4QG(kRwi?VF4jr#)+@`TId;CEurzq7kCeQ3CbPdC7?g)mY5B}y8>PBtUkQsTg7e$AFN(vPIBWU^&Aba zo|p~4+HCkD`Wxgqf){dC`?ur%j-0W#U*?bDWsw||?~1{#?H8lJcGAhy(Db%%)zk_t zYOlIdz)m+@MCoRe*|a{`eYNz~QIO(svSWB-iA}?++5bUndmZm0A+p9VI&9FUw$a1OPO|E zDC`jPh_v?4-u_8KQs!y$@9IS^*WJj`kV|T>8@s17k~`rVFnoNZsa((LY%5r|@$uqZ zmD=f#E8ms$YPR?chwS61Aj6KQVYzenNUy<$_EDRicd;Hrbr_i6?CU$#8ob}fk5-tK zymcB&S@; zr@Ql-S~u7-+AMe3W@9UUMXT!Wb>m)2>dhiK2vyZK@_qO}x&Qaoi;0sfl3fxQGH-og z@>KwB1>O4-Wp&01sJ=Ra+cc&aSvY~9G4 z{Q8sTrM3GD!W-G-@HMh_Iv@B_Di(vR%o$;kqrdekx&u!nUS91n>uv1!;gQZC9o(!w zRVU#g)f9YQcvUm(dM)LVIlbDYIxDefkrP=4ML)4uF!~insP^($X^j2em>J(K zmiv_m6TLrsE?V;&G>HezY)*Ld@Z5(&zgrey!wy&~RQMSfh_USv5ovmaeU7Sde!?6S zbh#A2E{?)wt!75>6%YOyFb=|2Y8RXnQYb}W+UVSL4VB2za$DakN7t6c6 zDn1uWQ@2S^k9Pao=%_pG&fZAI6p|=)Yi0uJ^Il(r{Sck)Oqytw^)E*(4ddl4tz>lV`0_CT#x&4>sMzANiKzd<3VyOk=322s7x- zLcb>!IDWvsPoKq##CDQ(O59@~ncVpH46dIXNQ8kS&FSWBlg9Sr z%)Wg0UqR_S#`}&GV-yKoP$F~!C)ge79UQki6A*rSX+aadGZTwwbW8ax#S{5>M-H{p z`zH?TJBya?toR3`#59MKx|dFemBc9>Z>d-wWuO0Xl(vIg)k z)A5b0o0Fl{S$`ZY-wCwg85v>8c|xO5vcIKn24~i`g-+A3CN{sApoX`TJHZwx|D?6v>f#}XIR!IP{6r~L=f!pkTe zE5FS_;|}T6zB=Jx!*uEZqvm%9Yidm}@!Nhv*cSJ$JEC6v3Ij|w!r=(4Yllu zcP`6sC1#A9zbw-Mm!Vrm+SoE4R7DYFB za)cB3uR6te5ew>f`X98#b7I(pOiu{t^Q+U;Nhq6z#wi+`3&>mv&Oe zjW0Rt>bcYr1^khNKL3Hs$$4YiD@gY7;^Xss?EjHIUf&bAhr$}7@@Z8O{6q7yh|;2# z+;8hc%@72Gd++}#OV0Rek)YPeH1tiJK$1oD><8ZKj`zY0#=Ry=vlO~!a%hR#>Q;l9 zl8@CA|Elh=a?(@aYK>S~6ab?|=Iui!O~%aGwWFX})@wp!QitA&<3ZTPRXJDGmS+$br8oJay9)KHhbyEg~8#maj?P`$QHyJoythxzr$s=t}f zdP>uZ14O9s0gRsFEDBF|WevNoqgVm<3G{Yz8&ywtQE;X!5om6B47*u8u$31^kPq;Yq`w*a?6PbOS%DrjbIHdcZ^nP?E25KI%i58> z|C;5e?;e7tNQ&SDza?3(9(XDlO|q)K|E=QU_w;I9pKN~g=&#EGZu<-QVxn*A<^dcc z%NMaxPRtt94Bb_uak{$H{Od{1S!C^fTY26Jn%jf*9KjwBBjG!X2S+=SOgDScO%C1h zIVt>sKUSo?Q^>gNZex{|OPO8nXrv;0cbcg&Mp?zF)DCS*sHis4n8L-T*{3u_zAL-6 zwY8qg)92eo&}m1Hl1ihk_A8ckM1Cnb+c;_63q9^;Z@SbPtO-Ghc_^UHTC19JMPr%L z86CX4Iyr5J$gX~flL{6@XOMgQcZPw<+nem$3eWAlt(!1agPq4Z4z{ohRG!QXu9Od^ zN3lxW$PLc=Z<=*7Mf|VoLcP|4?zqCY)n@82jEsJ+YS-B{L!iW9#ZMM0F4eiXXBFM) zTR~Kbb6{7P#8+VTZF(WHwC;tNX|-;Ru(8KNPV>lnJVZKI8$k(cQ7+icV@l=n_t(?F0}6*S>_vv-ap!KyuF1tS{(P)-*lKP7VjLF-%(yg z=T>PV^Lcqi)bn8^Oi1X#oi2+iM4&@BUOunTv}HV@$O&H@ZTWKRUGYZHICt$<<&{_D zv%i8l2g$^zrH_viMHV?YS%y(^F=~B_ zo70(GuCw#4WV`;*Evo6E!*3z^1MYs=2X?Th;crtcQ%=SdD=kNuZ}E^o?_lSoQSGE(nC-22ch}Ku&1qGL2YOw-U^<&pg*nm+J z3yGgtxJEeV`rlTfV0e3itB_SaljY>2e|;U%2x1gg5)VJ+Q0d>i#>3^{rhUbkW8~QM z2Oo&SVSQE4)I7${hTCWJ8~)eRnz%pO28z)m9%%ljs7|r|PfGm)$c1t_Kr|4ME0ZTW z__#}Nj=d|rT#4DrklIChO>lR#&9-T^j3?V_doni#AGgq#~fGn*y$;y^uk)n15m9egM^0Q9<6KTBXtt@ z9ma7>y2_018EieOaxL4;hC+iy4T9fX%{)7??kH9Fsg53H`;p;5j^C@2=YbQr_Cb7N zNx^EoyF7)^IwCsS8mMOr+LLpl^_6!&FgO4f>Q5O!`jUlZzbz!!GuRFuz# zE|vT&V`E!^GWr2h%gI0w;RF#~?86?J#YM%O057U$^$fqS(spF89t2hTvl;&ly3Ymu zYxej5c>tcZZMy#94{~y^`L}+;-qbp~<=UEwjc(Y5&NA1kGU2*5`z;?)iV`wCrBV5TgrF!L-U!_`KnD)&~fqM|> z55$<_)>CV)e&<8-_SOy>)DSJc+$I1QyIccMSBo>@dr4n)|*zqGLWs7_Z88&&M=k+r2qO zRHGiZdO)pi`xs=GRP4~FXHk@YDPuj2>T%gFcRMUUraIoapc!xjZ#M{BQMs8#^jM?* zro|dRuT+qi+n!y^xm)ZT6|hflI+{6JR(#MD%kRx#QR)8krj=(%w+;|koq;7@+&M=w zb4{D>tskehbY}I8=9fO=*#WTOxTosx4Q+6&x4xu4$|ktqrq4~sNIFHvXEnrirBd+; z<$>CYi05bd!NLL?N3x>-69^1&uVx$<9C=pD6?UdJ4EGH|V-3!^e=`|F|4N(kn>v^c zDfn_(y?bL@C|L^Y9JM4BZ|xda((5`S3A#3Qi}&)u3lg{6s8fp+EVf=3I-w(+5Uyfo zqg>w>@Z~b&*`}^k__jJ=^01fTbAvm%{mO-F`|xpLod9^SgHb)WW|m@KYSMmOM^wGakjP{obH2&toivBrTKu@=BLkKz@j2LPXs(pZ zbO{uWR_~d7*LC>B4!Gq174G%C7d&P)9NHd`fm-VYE58|0|u9?7Ms+;}=2z~03S{K?RsHIP zFKzsVn5(9oY)VOHbCGcl{6lkDR>{H<49i>q_bN;A<~o4+o5(8zjX}S5J)=gl@ti30l79_p(P#aVUKawXaj53`31BQ# z|JG;;HDb9jzCJcs4tkZ~0wg?s3lP~HjmrLw8^51`zy4u5RhpWsy9tOI+8wuLQM%TiAl6N7oe zo<#)9`FVw*2d0F(S-0O)ui1(u8iN8ia0HCo3YT4pmE2Uy?R@Oh^~eHHT)SNE6v=dVkftsp7Z!!JkiWD!!UcD^H_#w zJWe{g5|ya%r5zL|N4snT$(Wx1L;93mu_GVLG(%5Hm|s~KK2mR{E?-66UK3xDCD0t! zJ+4)s0G19;o?eV_*HDFKF8^y|Whc*`Xta8^G}iEl8n^()Ij)ET6%EMp`5Rr&rfMmg za-g;icp;f}1qik3Oz<4Q7uUy=&xXQ)k30!&=Z4{ccXmtRtQ482-D$&o!DR4M&gR2S z)M832V9Hfgx_nuqGAL$9$GLY>mj5FsKWo3t#b@8FqBbRuL7{pj_F~f$RbryW#Dn{mg!O#d@Y1N=_BfA~a>B%dPBb{LGI(T! zm7o3a)ICPjOpWUS6sXNUODsvPwUnI{fAyCb@HsAcReaM;>_3t%6*nAIEylcTPTdW6 zrB{=(a)J@mRUX$U(bU@pxn3g6)`!xwoL z$(3umrW8!Jho)dfO{c57DO?cbn+bcpXDR8p$*Xf`7saf#@U01>)+Fb}Wj-N|GAKSn z$zq1Gxq{25dgpnmqkN8-U65)xRiQ8?L$6`AA$z@LIB69a^}2=N*$4XLul0c5ieV}6 zxFHy4C1Kpfr3DGDJkNILQ_vqvaIF~i%TElwMgn5te4Cmm0>=EnbE$aTQ!ZE{$b-mI z**Y?0O|Ob zy?ViCBUxUw-h8foLPe!DBAOigbaIdHV}5*@Q$`yON*AwdRGu(2w`G%Onvb4dPb{3; z2E{CV$*ZbcNP4!tnsgXFKO1ozrRz;hqC~^AsG|hsooc_uh4EUt8O+a<>l|jPpTTvl z=OGJS(B0_q+)(GrlCAohBfua`+@dEWIW*0pxn$~=mb=f7W~(-l>s)y@X#Q!%X%wF! zEi*i){+d8+;ELA8M_1ws(o)g#Jbmre8BLgMqngx;wa5x{n!g+ICDb+gFIEytoXuao zVhH-CMHsv{px{%sR3=@PA1%by{p?>w9?B&jCvpML;lhPmlWN_if8WGHqV= z5dtXD&o1Ur$o_b1lHJs{g(SsCaT(y>AflFc7w0b0v*%w&D(ZRdz@`7DEa|Y+3oH#kU^V4L9;A3>UD!NgK#g^BJ=o2Yut0# z2zDUIscdeX(ym#gjAnix3!P_AmfhGiTAf+o+f0842D1e^S1SEi9Bn1gAvNupp+R6o zNc5_6npvny{XSX-c(@B!nwxFCfo*As(Q-iI^n+-)2f6VEb?=cc>bT*;lkv+!JUkxa1rssdsLAtW3 z#+Sywo^^i%*S^@Ca#2XXi~9hsjaEY1wJs&+wP(l0zx*CHV~d1I`W!e1?>!f+97Df_ z1&UISO^r+K=ly1}sIlTL9+bw9S5pRbEHDm!R3!XRgr8!HfCeMs+!$Dzx~&y?d5qC_ zQdMKnQ+C(d(|w2Ql-g7k5}*e)Mj5tOLJZxOinL6KqV7kGgnDPCj5BB8TkDRNyXK!i zDAM2(;)wf*^X-je7)=zYxsItPd<{`NTpP6J<=qcOMJ1LcA{KLSLI+o7VdNxmQkBLj zlOY$|%C0p4qUHNb!FGcgO`EUPR+8cEg3WEUt+aN9#hcS9t)#7eIc<4Ab_~AcSpJWqjLvA&cvB0DgaP(KoQa! zWLr|56+JF~UTVmzH4v&eAs&+-J(0m(HnqdDb>O#Q_XHO_E6-mf*~uQ)2CM@iX8qlo z-Z=@$Prq@kR!SgJS)UIm?$>_f>P1bjOG}2srg;R%40H3N8_cUzC)N+h%vpH+)Wd=J z$fZet53p(g*vwQrX(@!7iP8rk6%dv}=q1B1ZbFSNVy`tTAZDEdw~g|t`O&%gyTGLK zcx+akFnmO>{g<3$A>V7sB3uCJ+GOznd8blrsZd)uD3iVTxE6=&_4y_ix0I3rH)`Cl zj{>+5nDM<3*=m~}&(T$4Y&tos>9iJyg zV#B!h?JEX>(U|k`KjAEzCoT1H7i}i2yrhQ%RDHWV0t~T3o6)A98liqS?x1 zQKd4mKKRyz;SIVK3!AM>rcatK?OvYsApjW2J=9ISOY%~&f}kI%uoTA$d}07XG)-;F z!#Y$GAm7k_CJ&>PB=b0p2BIouRZ(btN!1l?DsDO@1J#32c+*5%2^E)bgg{n4w$v?v z|LF!eKduTV!VI|V5?3V|EVplyS4%Y|2%IVjXV#;SRahBRrLRiH=hD4Y4A$qorr}Gx zy4vH(6??0vqffJ|r!AH3Mp0=P3Zfh4F^@R^6|r7MOaeas-hTIC!_MoQ;_h#6sbcGi zk21V^(yw0mM@v!{f5hOI&03OwY8UBw?N-b!zc_e8hp<~7(yO6GWCN)lv}CF*A$-RN zV+ZzmQhDhjnfBmR#i6qk)Pa4eMr$+nG}mBKjHcmygiKL+8koSSIUz0Pe2Wu%f!Ugj zqX|~*j?VKAW~ls8rSfIfw=fOj^PK#Oh|(x3DGN8uxMdO3l)IGHOI<$^9~IiW3o#?U zVPc6g>+Nw!+cP`lvD30Qns)<{P>w;6_7`Cg?<_xiw`s?A6Wu7Ge%FEwunvc6TtI}5|ahQ#}%Z!=9qp2<(hHRLQoFTV^x&I|0=Wq ztE1^AH?A5_0G|$GEqD7(L#>Yo`C+`aIf$Ru{NvuyJ6BpT$bAO^6+8;v6<#$Z+nYxH znPvYRKdh=u5~yztOR#GMwk17uF!0Jz#V7X<*L`)7i(eFut4JNV9uS~zy*r#|AzY-~ z+W_bKeeu%Zj-p#uXwAy!?Y~0QY^9pb4bD83knsv;G>F5U3>b}^ULU4rSEkx1DHC;~ zZo(yNJbs3>AS>J`7a{;_@W&wbY@(A*1g^{55leEq`!0N+!=_82iFLZA^MrE)OZl&& zYzoP}mZ#dr6;p3ex}|7L>qj%cN`_|{)fUMLdA@2wXUwa_b~=4q*iIE7e9KG0LSkg4 z3j~q&YQN`qu3!^M%?RRB2*~UcgX-D_uWGoP(m{W8Gp|^<9G*WVgQ=5q1TF)xh%mj3 z%5ddh7XlwAEEV0s+MJpCdteFzCU8T%x7_#8mB-DOhE;ZKl#&I`|z`gPUk<8?dn9Gb# z1S;>f<2%fF0E58@qRFM@_{RyHvh}MpVJ6WLqAM9b(MHQ&7#cAKH>%zBqyVrgKFxV7 zI&9)`pBMz3SD!&hN6}}+lITE0VRdWyv|KvSv}RqB;EMPqHPTpI8yH%;SVsoZZ|!QS zbCR0YYo{i&!1eLqB$b~?5jHlDr+z=|&uo?aUkNHcq)oiholgZ|n7sQ{2|_DG2<}t* z*BLkeqtW^j-{S+|BY6gq@M9LLn6>|93D+~cE5|AhQ~9C4UbGWbfW?&+1~frQ>%A8$ zOU}YHdggOPOW@$mEI95=Z@RAiYp%I|ji1}mkh~&r({S;!bJWcId)5e`#W8qEKzLYK zSbAKOA|VZ=!aPw>`zu)V&RklOxA(UtiFUM4DA^>$h1p0z@$llaft(@3+TLNN_OZzG-TdD|-Ty(A{J*xIXS~Vt2mUX%q_i0POG7}uP3WWsxQLwQ z-$|s9=D$0WtIJ(8_Fk+ zaTHY4vDrnYpj}{)$S~&LXT|%;;{BK3-TmNvB{1Sn9UA=U*>m`aa?s?}ukt*y92&lJ zC0T`1|DjPNr8V$pXrUcTp}BA3>{DS)3UmR%ExH53*sm9{CCY28Yjl09jb*oUkq7kQXA^KGYnfgFrQSMfS3ohs>> zWg2xs7uAjXR4>_x-1mImb3iVwR2j`yGhdg~bdBlNn0_ia@Q%ve|0;)XZ8V%#SEkjF zgsN2+N^4EN?~g^458Jw{wWOpz-cckx4ydoq?!L?SKV8#ORyCdR_rL=|SX#uXI&isan-=PvH3gtu5~n zA;YAn%KgN?WV=^_tX)*U6prg^SRU2Y3o$Dk6U{n*l1u8(MT~1@KwFuzq(lpn11>Af z)YYCzx)ucznn|RnAY;YY!{8BG(bW7^cKa4F4;M7sg>jmzp;A*t*Vha zP3Ikuwo3K>mTvrbec0i)ooINGd7kXBGbfnMf=l?B;$-)Qt29e8=+0f0cOm&caaA%(@A8-X zxi|vWS5TJbh`T_1I1F?ii?K&3$YpC*sX+hqw=yk7t7>ZCNmsfLJlT^F@~(JwO#S`O z@&N+T#oc9;y8Jc9N;ouh#Eum0a{9gfL!&mCm)z2aI#YMg!8GM31E0pjHN(STy=>G3 zfsKq7zRG>-_(bMWkG zpj<05qN&R!#QD)MmevmeRrC5q?2`Wr(kW@IUCC;(m=3$>|MK4g1aJ4#B+*}I3Epq} zoym7RP_U^LI~}WV+>BB*Z8#C>>FFIUH3@5)@VR)ryY{-X7Y|#$Y!X$L`baVi7FlVd zh+b}j3F~jN?Jq7$c;J&iqc*&vmzo)ASq)E#=2z0qG!koPw=k%{URcN;aY9aBSWI0n z8yU6^=(O$wXe{>&KJT%#+W{<`FedXHFnPcS3&GVxB@u028(1Ab>UsR7R{e3qDRR5K zs1(8=qD*U%S`_rt z5w}vo?W8a{qvGGjDTiJ3yk3saMxI~C!){tK~v)$Z8uhS`4&$!3IfCLuT|cWL_$O)?m(?==wHw`%+2jS z(y#&`AH5q7PaX9?JcHBT4=}%z^rv&do56D$b5i8I_A?HHi6(cYg0aH>+FCA5ymcm~ z=tKTh)VJXLrf~n{=!Ahy;jao1|$Xc54vhfG&tgsbDU{ za>3H^oeJ-0?ok+^N>Fxt@TZ^|k~Vv)FuC69TQ2$^m%e3gggn<%vzA+dh_V z%TfOpsc!>zbEwkdR)DF4S-iB;?1PT^L4158&!+U6t-YdxDk?OmM~)U>l?@_RrtrVm-70iPJ#PwQATP5 zCKm|ie9K*Tx4yNrBvf@9gCsGQ-pURZ0RjI?E0Tq%F*&0) zv9V!quyWu62h9Q}i{CCpi{O-?Thpc1++wCks7Y`Vf4z<{iBU4v=Ctw}%<@M;P=U8s zIf>`es0}&fiM?5?{ukw)>;*%{TPZN-)kn^>v@;k-gAL?tOl81Qy3yDwq+r{%wzj@T zcr{`!&eYJB*f=<$!~4!=U)xt(NVHnebdf!%A=!6w?GR*X>BwzbHt5R0P+M=6Yjz|e zWkP{rrZW~d@M%?dV3PV*nEx&jzAed$t}4~+L39xl&{R;am|xMlrI!2neke8w069LCXO)G79}?0` znxZX7<+Xpt6$gIQuo`aNd_|8?hqL6&5`VTFRez{{u#0i&+2}AnOB%ef6tLMpoAZ(F zt#q*f3RyhE95FmUw?4)x>GtfFl5dn(Ottm1)pls`)He?&nfwhumcxZBbzZlqW;~Y2QASpR8W6r+(+zJq=HVv9H=d8{OwiShE5RWUu}ZgY4?vrrk69{ z@DJEgjbCx|t!4nQEwe7xqW-WyQd2q*7L3{D(TXF&G`RA(Lq#1urg$Qo-qFs|bA@Zy z+LGu*M6!|x;64c zfR?%b<}1VW9N_+as&_qDPk}u}X~;JnCXRrBy|f5D)ZsEu(DZ}3T(HkjRhtcd^$*lz z?*v#9M-?0S%e)ycgU}!K9kAcKIN0*T{frNwI)uFPx=zbc zP50FnJ(i~BeNPl*4x?jYGXr;MxX`XmCIn-k1~6G|&p7#UxK)wXQqHyxNmDRy2*0`i zbSKf_Y)UUgehzTuMnk*{a94_(u(7cx>FJc5F!?wx8Xr7kel6_mj@%y8+au*lqx=J+ zK~Y&UyLp4Pu9+`=b|9qsboykx5hz)~?sK6nXgC~;xOMFPf!E)rfTw5GQ_&nx-yoWDUHA}e7&JmswuKb5OjjP{kZ6EB0-aX4J z0jX|O`9MFD`_TGn3M5Z?MQ{*lQrLLfXNXk&)#f#+!PXm)TuaM_X?>_kWNHG_hR+BS$!P4MMX<*B!E=U5E8t;XT$#o+4`$^X z8o%(;09T%yj59nqO?s}wikE8wo4V#Bi7@gjTPm@tr_~*16|LZd%AOA|1dY8-Pl-G| zJUXMmci!glXeMedgl2~-8ug9@2R8EYx)el{b@6z1d}h`xD^eB8V3Zjy?8M@zF0>>Jy>1@ z14#y?WO^#@O1F0r!#DUZ1v^!Bj4o4|L}4kb`Es<2G5f)f6#)tDJ1;sIyF!nx?J#z1 z20cM|X!w4EQACF4k5?4`bL)t>X%Ug=tjA+@_As+g30wIP9pA4@p2QOz9_Q9o88wQh z11BgMX5m-MwVd9avnkp7xOn1OLN;}%FZMcmK7f}$t#))(89^J}J5iUn#6PPxYD}LV zr4VYyGQeAJ2Y8Hs#|T!AuX`FIWEPX+r_9GGVvZgGk{c#crz)M_Q6C`0pa+IfZaYnh zzUvIG&PD?9(e)@PGE6v9%hgol@fm1?;*;DF^~uc`2YTl`)-+$$`v z{v=dFs6<&T>YL@xhqj=yh7Jm@fMXs8)9^s_L`rmp2VZ0i)CtP93j*8+69ja+O+n_Q zPQs|t2cjotCko9pqMJ*eMxtCXySFpm1i+xPmOpA|% zgi@PbJ!mqM{7wGT!oh(A>O%1L3<*C?Q9*e_I*_9O#?>t6qlB7;OK48oDz9Wmysf8& zrKKb5a;xLY=u8gw3ptt*8*u!}qRAO(^=_KoB}BIj3kulqe%X>{v6_}vAtFpPc{mKm z+AKgn9}fPx@ltcV9cU`5dt0M4&hCcHb&LuGIYciH2UA49`*5^Q0toxm?8M6A&&p{u zXSdb7g8HPt4}4?n@B(f)+Clr|mVzj1-F#M&FIXz23%%r>(`LHKVo2gJC-;=Fs2nX# z=qbM+>Cd%{r_h@VHD#^=Gs^o-{KG!uj%~}UQ`)qUN z_3!vP*iRGpj9I=7jBnA-t$4^f%_}+M*{x57OA_}MabkjCKG;4*J92icrmKwGLxXP! z*^Y8pU5Glv`biOvzy7~@f|DCIpmg**yc@JX>mn}UF=Gb?oO-9!PhmtRS zI`3_+O$K|*b~jHQtdfSaE9ZP)jJWBBRt8W-oXXmZtI^;Q*Y}SO-@*|4c4H9qoFIP* z5vu*_DK}bWBjXj~t-jr-Ju}v_dPzBOOh`!u9b~d_$}ObGJVHw;^PoT51{|Zk<9+H>({b zsOT*>mfCMn?s8`Jvp@DdVQ`**f9@#)r&Y4lj{ZBj*(^4TO5C7)UW*#c<%83(KX-dW z$Rbw1%52NFWTvQfV+G&+n?Y&EHwH;9m_PNk8NJSCsy6tnX$fJ*#_HGC!(-4pqh5c; z{tyuF7J%DzP=tS}C$Oh$TaB z=-(15E@Myq{yxTcKvBsfQH$b82kU#vU&a?uix!|-nDTo6bfJof={X!nB6QAGa?|}A zzA%c{t4k<+|5G;$X8t@D=<_&EKC=ut3T70_1=F4le;L2cs8mrwh6rCMKCtlLb!H3$p5$ z)NxW3Df8=|Y;hUa?`2R)8q<~JWi^?hhv-I4k9-;3*a0CIpY(a`1jGa2eNo&aK@eMP zB`72^C;2Nzstci*C3+u+{F!isPh)!Bl#3*DI({;?Uj!i247qsw#wU3PN4nL3Ig`GGt(9f2HGB4$|JTi- z9t)y)CHt9S#cz)a%F#v0ydNGw3E@|kSCk0DuTf%t=dAQT{_7XnbVD6xh4b3PxbqL` zT*pBOz!Evz`*h`i35nT2oc%rxYkan9-M6`n zM?IirMzgGfisrHbQEwx)Q4HmZ&?+yXLT>*Tb>1-02)i<_YX*%UJ0(%2{ckyMKOAPF<8O_QIoOV5^1PIft*=je&1P~)!GD7-n z5(|^1dML|}e`@gm0LEI*RZSFrNC6@2(H=9HypZ@RTX+5F_S8dl?B=KWNXZIZ!)C3i zevJp;ovu_dpRN(i_=~XzdTLg03TcbDo6)-@SFy%Sd!v>nN;P@joW0$zd-hzL4T`s$ z_`TcN)f!8pCy{N3mtJ3-JW$XyOL{mYgUL@(kPGepZ1p&3A99Q@y2KNXYxr~l!|jIT zMNVxu7yPT*eu|SX?ut~fnPvYUrQuNr_!wt>i>Jf1!JFO&h{XQ$W(hRYsA%|vlG*ex z#Y;@Q%K|(553)cXz-l2nWLQhv!fnswO{65saMz$dm|~WIH)L7-Ya&@*k=6V@> z>G*vi1-UF~rsK(z^sdWs6|Oxd+r>d=+JBbTp2}xobJpq^9x$E;(y2g?H;QHN0N|Wc zkWAq%rfNrqwH&`AJ&vYr`P9D^9aI>T(;edQd#9Ur&can;v%rL>D<$=&hYx$OY&c%f zt4)Mv)+eGpZ&{sTh}rap=k{IG)KBiJe#)3jH|3sRot&iEEQYrU#WLA>qJ0Nh{~u9j z9TsJ`HT*|WKvF=uq@{akQ0bQL?(Q5y0qKzL?(Qy$0qO3cyN2%iZl80``~Am#&9&M0 z?Ad$OUhB8q<`(n>ohR%VkvD52n(z2*m); z-wQs7;LLdjZ^dO}ej~IT<7|tC!$fd_Bny?z0erqPh$EO~AIJkO&D|u7%G|ylmDW6%5S!gMKQF_cK6^hkp68HIc zRX({t&BAx<R{vhr;*vg6!#=gZHbsuPr6ENRyu`1yP;}y)tu9)eih0Pz zIn({a13q4yz$=ckwK&|${Hsyi!n+FwteBR-%1z-BQGQP%*=9q^h=e2hvgGE7TWU$( zf&Sz(8VW~EI(x0!f98G>(5ipGddxpUWhPo#3z9hQJzi!0Eq(o5!Rj~j(%`p5CDi_d z)fTXU6AnRq$*p^QC}S()KI&q&3L`}0TA^phM3DPkabMYwB}%Fedb4}-^KfL~m`3fW}W-ICHg3Ki;6V~>zLA~LqO|6DQGwE}uXBQ$WjWp%llz#Czs z)Nq9!JgGmWV@OhL)?A*?_uz-MuZJ4!uZZz|!6BS?^*M6mthC)Ts#*oB)3Z^wUvko{ zH#q=z-M#Vn_^pjhfmei}&Ddo*`E4bX8RyfH2Sg`zFX#W7us}JZnrP;FD?kJWwKi`a z9*+12%w5W4;GR+toMXL1ZtA(3YDhY9C=_YIxCZT%6jPVxI(~ z%0rB%++Gk4msi8Nm*jSO-pi?%vVvDU<=g{f=5F^xL_jj5Sw}zI^a0)ZzF6ikr0m!8 zct+oZPN_W);creZn`t-QJdto)wlkD1N5kB6mpSiqnf?}ua1X(q-V0B4Vj1B0&NGlOAnFf{3gpeyJH4dQN*BHmmqv+vJQ1OK-xlit6dYcq zxh+Q8xHG!2;?9?`Mer=PHu+wBoU(r0w{V9@(xdJS--EBUJEMx5Y2BQ7CC{8pXP~}D z#mD&FV<|a~bn(xR?-@8%S&b&tv?Yl<Fx! zrd|^2`$^eOalD=R1$yWDbe%bNH*6~*X~i$ZvEZ82Yk8wKQ-@igbu!V(*;)=$0nJxz zcy&GpwMfIFQV8fFmQv;UHL-yprpgah83*^zzB5kHa1gsviTDGle1(?w%?6HGknOi@ z_w`ed;pnmc)g8k<0Hbd+;bBjZ-B#V%U6eSa_)s9NaHy^NpF_6<`O)s!d1p*OTpQxe6KY|B%sGq3N7%jR;icAH9X4$sGyU2YWMJ zr=5l5S$wt(kc!Bjl0)M@ZvX6ATb2zK7Mo{8a#QHKZW~L1QdaV>bfVGJn90i5HMjVYep)W)`Y$*>Ajq1kqR5~$|U$ir`c6{jL z?9zFn=SL&n_`W2lokqlCzk}otT6{OM7TidWfOcedBFL4I=(}i^S6LolhCvoL#K(@J5REfnfB<$SP7;HU-!-5FVxcGY8nWH@>H zUX`lMds4rz@dfpX+v)3DMTR}Og1DQpoXmvyh}@hm#^rF5NV_MkkzrjSX|=bJ0K=s+ zZuv?yZyRTV%qbc^g*(49fLLu+cPS|EaA&1_aX!#w&^EirdHPpIvZ#NzytjiwSTc1` z6=sm8tr!c0jIKxsXMt61W_x(Bu zIsI@yv@gnpVPl5xPC#ld5f`UX|p+vDr9}BS~Vx`N#dKE+#sVp zkhghnP_<^ zdvFD*;`&iCYWLfpp?$sJ-M3_*VC4;cAZr}gs>4gwqP;|SQAj!u7mvn(Rw5#^>ymHgZ=oKy&js6v3YZUkrdSs=ZnRGSJ){#PIIA}$;cTAXlU z^}Tx|6xDHEs6MhhwEOU7wZD-1kO@?>o4*tC`IKr(T*m>Q(rUo zP9HmpsM2e#&IdEqbWd8n2z%YAiTtE!UG3LhOZa0fAC?2S8Fx-_mlVl4`&yNla`SD3 z#U5>*Pg7T&ZhsD6+HC!9N!Iqx7|M?D-LcBmI%j0BOwDS&p~eSpPDbn!G3ipzvzg!K znjj<6#nWUL7NXPl8jkZLAptq&se7gzqs9XJ6FGkZ%mK1oZ|j)F{QT`0eDjF&4~-EW5%!}jmQ1WXTq0F(wEBM4Gjf;@!ckdf*6&4dC`k075gCgF!4Zof`)hkPG; z2y%=m-Zk{+G-{i;7ln_ZY1k-Lta>Dkq>p6>ReJcW;K*Y6dem*-`KZdwtg9qauHUO2 zEwtd%oKy?jUTwKo8K;28KBSWpWMmrLq0FuC`gb4{J#T*S3NxWkhgNC!)eobm?G_Xs zLG>kDy8ocEBnMher=n5x+-jD69s$`ON#B<#$vm|4j`cf#JK4xdfPe7Ti*|jXEMU3` z?1>2BwydPld^YLX{V#D&nCl&v9oD1!trOmlwcW|p_%0P>(%ug)SmI8SD0A99dE%l# zo>P)L?;&Q>#{1t}veLI9>=o}|iyjW6*l@x$*F1z(o)N?bqB;F7Yr^4neyt_bAGJA9 z9H5Qp{+yQc(7~)I0$2*3{$5k2w^*u1fy+-e&jB?1ey(kbIK-O-m_H|Y?S@S_=x=MU}mVe_^};iaYYYOgUZP8f_Fm5yr}4ZG@t^jQ&KJEG<~rL>{G+;*&-6wp*7=Uf#kC?&49)FA?Mv%`-zO*5oh*CA zi$nI}CaTCylV8&cbLpGk`Nu6*Xm#$)|49@&KGicX%Dd81QpR$+UWk6r>XRc-SMB`r zyX`gRas9ir@o9+Ti{KGgrKtBy?AUBo;uXVa*!fnL^6t?Uhq_r)R9b@VAW2JZK@FM~ zt!;5V(gj77IgEDe_r}Y>%e|Yl{V%JtkJU9&g33cbZP?X!1^KJf!bdm6@fb9NVI*qT z$rUDt6;>ZE2dVQ`gG!3WqI40T0IDX@()W^vytlZFvlUL%9VLl@Y5h&DlB{54)lv)0 zI928oRuT`3%*T}MW}kPaY8jJ^HZOE*@V0D>JPSF8jv>7%wk=kM3!P4=@NyONN9vK! zFhon5lc!$se@j08mmLR0y_o<~A=cOVy{-^ zO{{10A|W=p#+S}L4On>?6}iR`ctn+|RXPZD*f-r&h-i6W3Qk_n*$QN-79&V#IlRu6 z*PZ0xM~hD@fF?qlePr(=N(1;u3QAi8_r8N~SFSf&KD2s1QCo2K2(e8kiQhgU#MKR| zZ96L$*lQ(>UI@-m2-;ChbZ~9o2ZIH8cY8lOy^HF3k}OqR{E>m_S2=WkMp62O_8IH8 z*wn$ufV}cN2=#C=ZMWyacP6$!nRM^<1OE7GTtsFW$ueS&JLWDCVUbT?#odzoIE&y# zXYr>Dtx1n27#3vL8wZ|NI()4_dDg12`;VP6QD&E%uIR&n%gHwBmk0)^R#z;kg+eGg zXY}PMPky;kl#Pkz8?LS0Yn*1b2($$IuP(^_hJwx`3gaqbd#DxGZi@%rp>iV{ZP7&z{o}mQjEIg-o9I#Oq?PIc#`d{0)4- zMM;sW0u4g(pgbAF$6x)-HTfTUoa+-Xh%rU%)rso8Pfog6zNHRqKKytC9~gVvBDNB2 zQ$Di&MgJp4RE&j(oqtIpa3$pTDD0lB0y0DasTyO^+W5XWAf03?wWJ>HD!jQ5idOvg zq}Co+41mb5g^h5~uath_iG8zY!9d5d@L?D3`v!M*Bb>kanB;eVCWCx!m7lp$xBKP+ z0qdqjf;D=TQVIY2Y)a2o$3lzD@uXLkL_N{vcm1#{NadXEZl*i_oO84@LoDLJf6Dt_HtnLE>|=M+ z%Pn|MZbkn2->t|^iaM$!7pr8gsA@1lf&UY>_v4v!mQR! zJmymHJ{=OobeXsAih&hs+ZX?BVbcpc z;%eeKQsarKrQbTZ?BQi)-}Ku4QOb0SDpx(bSru`UMkAzKvP-ID>`w5V}A;8~Z>7SgB zx|GOdJg2fi$u69?%Kszz=i9&%dIi=_35Xf*vfJ`sluB{7op~RVECQ@O=}0ifpI=Bn zAb+m)fxKb&ZB%h9Nw%o(%#fcR9F@EP3kwMHxy0(hu2<3%k8(4G>QME-(4;I1Hd>ok zk2aa&K^V(fax~re`}0(aRunyc)|*vBs)W5BHXM!YTuDoRN*EU)&rv2`m} zJU8JBBiSsog4EtLy@@F9NJ>q~vNMS6Nk-haML097PZk7IQSaa+5w%My5Y}x&O*Z3R z^kaVzn?_dL4)yKCO0={hI9U-ot1%f)tv@){c6Rb7_@nLYB_$hnobWcwX7-m(&DJ0u zsmMho1_yK`@B3vl`h0;lh~SMEq$u`7ck<%Dh~OYQ9ysj z;QKfy<3BL@^Gh9ib5w-wBKhPkR}%XsP^-n%suI zOD7(xcHLCpBHx#LHySgJ=X8yNIcm3@xLJ`)SvRcon+l(8fq+bo&ljiW%TE`5%W;u* zc%6E<-uT>%-!=zJxu$3(>!_-{dt70Ix{(}xwH||ZES<5v%fj$@+V133cw2$%b21AK zp(Po%p={?KUJAHjQ#V0i!bF5x^(Vym^XzlSD(#l|)Z*OEOpGeR2>*%D`bLGpv-lFC zNCGS0zVyYwE(cK(qyt3()>+WUD`M;d-tM0#K;v~^7Yy?btWEX_qlOovf6*a-v)j80 zfqp$e`xTnsK#cNQQTxc%k?xWTmx!xMMJblCVRnn|?)B>~!v9c2*m`)r53_L1+OT(h zwWD*T;h#5cO)M&0Pdc>e`wV`!1Z8rBiX!y4hCl_g0!#Xs>a0(<}{lEA&S^Z6isa{cl6K#U+)6mWTHw56|1z=~|7o0^rJte8)rWIjis z`&S@%%cDs}gUug=&d(w}_CSzDB@}(BGjnjb9tSE7H1gc{tHNU7;FFz6ZKe^V(LArN zUx55P#Z7C_8Ux9?fR^|aI6`b4a>m{fjlt(|td6PQr|>jt!HKUX7peCt2sDLZrXo)t>$W{UmFNI zn5E96-}&O{;7Ukc_(9C}*r`sL!yeiFcWf3G!*gSCnrE^5_EdaWZQ!e@6*qKCkM2n9 z)~ec-!C`z~)A#|#JA2mVF9;d}yL<&cPXeWNQNO0%F2|2msoycKjwOu@@257!^6+a* zgCWApRQq7FBFAoD;`sr=65A^qxPXFrc_yunX@xvz2ve$Fz1Qc<6z2LmKBeA8cbpD& zpONb*6ub{=Z*F$O(yN>g7+Ox`Fb*)Vu>Cq;-~Q;+MIeKgNmE!5UkX3P^R&ST_vKR6 zN!f}RXKw1lD=JT`U~cnK_@2_Z&eHS7o%JfKw-+-F4)+?^Dj2p$jy%F^?{s`R-QgNY z$fo1+YQ^2Kv+Yt_3$Cg78X3)+chD**8^3KCh0gxqbAIZrfC$x_If%nt5=L1zSo$C!WPY z4qXOttCcIm&ub_In6?JSrlJYm)1*KNv$N@2eT%%Y737DB^oj`i(JBpXLgQY|x!{`{ zm`%9h#bbnf{?H16i-XY;8#FeaPDBsmVV1e)a(3Q8L^or`v}<7U8UA$R00D~JBOQN}ch8zFDd+AR zsh6y zv1^qk+CAT#`Hhw`aqLw<$hzwS39@q=qVkNmGb-b)N$^ji+&14fPh+(wVZ2pRSMTG6 z-#bm>sYo*)Xp&T)$De_ZI8*^ijcN1=lcE?e=!(!~FWY&1bWnyVr z)~@qXSoj!Pt!h+#uTaBx50uB4e1_ek#2i?Jq&$hzZFkQ7$ULS7@dUDou;X6Nu(m#@ z#q?g3C0PyAOTH2HVPaTtmN2MszSs+AB0zvmGTppXeDSrdN#4#TqYKl<<%m&=x0v~jf;?91T=}> z4JR-;b80WvzuIj*N!34@^WW1*3%<%cl2j{W<&f~zBlWgRc=jn4L}TS17b1vFcvYN( zhKd#*a&;q}@@o?9GOEvbDC@(6!s3rFw8v2wG5OwBxo;jbvLOHXqmU`NR>#11S-!CjgmB>YU-P*tbmXIpZcv=KuaoO z0TXD|!sumXLDiR=Hw&>3Y_g~HrU2GpfSR(kt1p>731mWLmV>>zGe@&g){u76nG({q zK5oEnTb1#dOW4#~;3syfwuEw%jcBx)I=eS7dx6cG*x;T!+@0HSa`i&WQFI0M71U+E z3u;%u;xb5{X4jTgyd_B3d5_+aAsEzLTUh&bDox}rP1V1*)rwXA1kuDUt?yb}?qu=L zbY($~fp>)BoqAt-uXc^udgE%Xelb}T-3Qd*A0XS5F%if$e1u(Kin&ae%b`@U4Ey*J z>E~te&udpeX`?$+8wmL_v75@7b?hkAnDlA-lJ#>Wo(2hd^*@RtCvk$&5_P=_yzN^; z&sbLb>MA?LM^1@6Vy{4@ogwUeJN9b>+@2QbZGcA;87IKgb$((B@Bu!c1%A5J#tz#) zfOx$m@+4(~`K^&|R5p%_dcr$p49aH>*o?biltehgCI>gGXCqvI88(yDYwGUfid$Rw zm>jVa^heQoaOi4zM74Ely4_oBfVIvPYTQ$&A(MH}IXkDcT|H;ezrb=s_{`=+)#{D8 z)$Z3yw^60T7jcqTXH{nVjV94aP<*b0#i>u()YhJ|L34t>`I**~K9S{WNvL)N5|%RH zNpTi^2T%=u*wP_QO3Gb|JVf!GNy<^@7NyK~v$!&8_L4(t48vZXnbbpFXYqmB)t+^a zxL#e8TmW`5^A2bz&7Ifm%5jW|4M0-fpEM(2=hyWW3K!G*1a?hMBn=j%_a;nlHmcbH z^_Nh;8rrbV0qXpD>WiK&HEc&^aJ%ASK2)lhW}rWQTgn1865*TDJ&{rT#5|h{V@3jp z#2l?ZP)+G@C0|HBk;*7l9nojIrHbt&rKUoAY-me4dwu|WrAn7S!*?X@;_-SXMs=k( zp<=0TGe_(z8X>?#2pdZOP@;H}Q}JO*zbdF~5OWmYnjrRiFwBzebySrUZNt<^YDuVZ z*&G9L2%|>r&!D%mB1Bw9)ry)yTJHcTPFD5@t-{^Uo+doF5siOQFOYeu8e#b=G1`J- zPyfL|wfS--{18Un=IG!@YLbj<{SZaX!&27ixt-WFqMwHc=}n#izha_=g{1{y1xsR+ z0|`zO+ie}bf%Q_=$4rT|m4m-ETp;F)+lPn)HX9+WsM7J1nmQo`)LgG^^5+kZaVQhj z29{uUK%wk(E+$eARHN|2($1QkqzVWODkqXHtqJ^nL~Y2l6g4pPxItlg{_=A4QqAs(H~r47QpbZoO%@TH)MXncB?|T%K80nGxlY(J1N$zn8%Kfr82QM_S@_z>Ti@ zdOFHS1mIC?tNmsLY$p^8wvt{*GqZeIx-jkFm4oP zP#Ltc9~7Q6WGIl{iCVxZwxKE$ncTooVT6Ix#8hHRLvd!p(PJnq&f1=N&{pBU_Ufm6}kZ>5RfiA#~~& zD%b#P1}Z42!Lv__VFqe$C3`*RDR~fnKoMFGH6}%WQ|M_=FVWulxi54j2W&}VsPB{!xxN~-gUl~BCjYl zcksXBkqq-7S}Dteyp`V43~%4eT2$i^jJ2BhTwJM{lPxc!A%K2kLs&b|kSp&*8`j+8 zNtGH1ku0vHTMMYc@zo7*l7f3T`k-=8ms`6MgOjU#UjjLF-qO6}E-HceXCy243VjwH zXjr)no>s$lGEH^VtN#uTxuO)}FUyHKrXf+%dVXD-zp1I+WEz$Sq0S&;CjjPJW*Xp- z+@+_b{jPIS#{(mfa5NPh5y+L>8W83F>!X2xze$I@?s}74RIAS7;9~=H4*M{f(3OrQ zxS{!|vq#>w9QzwLbcW$=Y#+iQ1FWR)u1WCV(051vWiC@At0Hn8AKkK~*1p|2L^^b2 zA4VaS$|rH|f)C7`RpyxzrfV-f;5tVi*Vo|b2V2D(1K-gNsHDgBJvgY)o@}1VET$rP zIGaFSuNvq@6*Y>VZH+(+I7tf9P8R%)@z#w3{zZ0Hl)ZNaiYxS#s?Lf`?c;QbSD!f2 zuXiw^m61yoP{)F*;@HdI;=s6_OjS`U4@@^;92BnD%8GpWI6-QqsRoTsDPuFw*a=xG zyF4F7zHrE>fu0K%3-sDH9p7Y4wFW0>f6+2Qx{0&qBE3_XT9nz!U83_al3py5L*Ge> z8R2A!ZkTm!tJK4<)#>m_ga$Iz2n+OC*No6qhc_HJ!{`@mo9+O@?bo^oo%ps1(ei22 zKu4W{og-3lD=TbyyYNJpfqk5XE1j)Cu!9C|KE>C-9z-Ufs$r|d`85^VH0=dpIZ1`u3WJCE>EdAFj1obZ zKtWl4x(Mkm@YRtAEV8Ou#8gP-yQ#dVG2Bn?-B*U>@|4{aFn3E86Trrx(Uca|4C_>kfdh5w zmoYdxl#D=V{uF>aSeetxu0_TiHvn`PbjdKLLCrRquO0~413&2Q-SJ}nq98(XNWXoY z+Hg`~ywC6W8Wsgt6kmt;jLg`e6i)#Buh?zPOU22JZNi&Bv2{tGA|J1`l=Gv z!5SlI>3&VeDAt(=!Ar(*vRA0lX+1e3U<6GjkwRptmf8!WIV3eNJHa;mC-GK!19f9hj0vE|mz+wZQQNFy7FCt76$1**w#qU{DTQXZZ$SwpxJ=Ld?0}dn zG{1umGUftEzYMe432n^6Nwo;`>K|)~_3E@1tP^_ml}JOsE){x>r~qY3WuY)BuKBy? z?+^M5SpG9AM!`fY)@cj3CR9qxLPLs z;gh0FY{oKPEj+>L6spWl@J=A`xS;r*pR)Bat2J6?HA+@Rea3G-wYc*P z8aYo|ridFA+MzoSBpX@LCf>-UQNCre&g$4Wv(dlDK2}>&+Jb*B-de7Ltx$oY>n0u8 z2XPHb(Odm*sg|C;i!7!{ySuMLla3$bQ@23QF(hlez}At+9JFAR-ztbXF1b)D(Mmyd z=_rWdDk+Mf+c$a1i8)@W<<`_1bobhSOge^cA37E)>^L<7OMDSZAI(=_2fQZqCZ{Gp z8F7w~&Z7ret4I=kBPQlJQXYko5!+qYfoKdoAqnFcMDyt7$!~*Aybaww&?VU?-cZ=s zi`$0pUbc-qVYbQk7@0QxX|FNh`XEU;Qm*38r-L_k=>GF^k?Fj9S!K|ZC|+F4hT1L8 zI?+}rr>r?%{buR!GN(;Rc*}9C0Wzz-+rqMZeZM|z(}S3KAYJTjbVi!m-~$VPP{X}7 zwzqcMzLcaBp+kn-Um?L#ni2RYQ9-w0>;7hHTyv;4HL>p^qxGZP8V3-x4+&Ub}s8k&?14ZJd@-0 z6~7!?l1{VjeC-{ehI_P?>+xu(3@x)l1!00W?`H`IebR{Bs{v&}vpMKX*hJ4&Scwgr%r`~}*R z;+IA$jDcI6l4H>963t7Ji481lM%@HH`-^UY|ICX6=o?%+fjs3&@iIWN_}35?jd=sW z83PW$q-h2aENRtj;V|u5f zbOnUNH}UUojwJQ|YPk!$_rCoQ`nUc_f012APmN2UeacAG4KNSvg>)E~Wk%d)(POi_ zbXyiZdl86%U=I}v`qjk3;?w$y5qvBU$IGWq!JM|wryv54rp-_2MpD=oUB(mI8~v}Y ztUkRa{aX&nmzBV$g}$E7Q!l-N^jEz64}|i6@RPsQ9RGXxUw`Rt4TkrKE69QSmX_J& z-YE?GOt-D<{L{ipChSk4b!(06^K+6)vZ;v2WbY9R0`sc;sibHzr4q&Y2`xnm`*nwddiW8VrSeC<)( z6qLS6Ha7W6 zg_(h)90zzjdNugfM|vn!iz;=3`T|kQ)YST^vIB`;O3f=3gmr4lwQ57#8~uU$?Jn2c zKxjdSEu$x2RIHW~WO-SvOigPHKG}N{$Ezk4AhRKj%ve%%HoJ26L$QiSuKTT@Jnx6*68xQ|3ubJ(g{Y~ggXWYo7Ytf?MlqzF%FhOKxYOGRMr84PiiMnbyxLhs z4dJB;o}s)JRR%*KKP0W-7N=lYBNxq{X0rw5e0Re+y=GG#gHMg}>Om<+4W2 zbIf-l=bKAy+(>6XC!b!$2w6rC8i*~-A{!w#%I8fcYL*nWAv24p-#(TWUIgvKK9-av zXAPlQx)rJ{ z&9G9cSMR85HIurAjj^`O-XQd^AzKT^))h%0y@?eUXyjUQ;wzJg#*x8^cuS)G^!Unm z3lnig3W2s`Jx`^9cG7QE4m&Y%X)T4VUf4b0?}>WUDMl3-dTiFam=JoBf8N%6=}y}w zbdWF4W}*Iv3s;Rq&?`7KmC!drmqMZN>X8&8H~WJpIgWc3m@AVSZXz!*Sj{J+F<`0?991L zZj?VvCcixQhlHKm9Z$4PRC}hv%A%~#krROZ5YjB{om5ScjyY50rTO8|?A)M&btI@s zqsE`{Rtad>WIE5DfB5(-{6Ox&QLjjQ_t&%{Z>rR3VkFqu7q=}_d}@9|O7S-qEo14F z@SV#bGu@Wowxi&~BlF|U%j(NkZL(gD$6!$30l!zi6{{vq<|(BnUrMmY0a<0#5wFns zWp-VDYIoDeeBS2wdQcf@4T2RZm-DERoZR+=Z&Bjsn+ZB*ZkfBv(k_FMr>P*DxlIj3 zDjtVKZ~(w85;_vkN!yQ{-}2VXE{Ap<$bkvhAp^RA@eU#s)7qLu-JE|G)9+3g@) z()>e)#fqjAxx$8rTgiV!@AionJAMJ_6BsW~t1ppbW@E+=GoJ#n$KH{m_@B*>Ie%1% z_98au%Rra11nW{&fHG{@%1@&x`xv!f1#xCBmPLxyk1=1%Xn?yR55?Y} zn;5+qYTcfb?)e=U0H0ir*r$#7cJc~)7Yv{BP;?BpFx zudVidHWKHuSsr6L&i|3wvJnmfpHcKEzOl<8D6GHcmr7l9qWX_QSlf<cZ)7$wQw#?j4K-Vzz~_w|w#X-L-_R1|0y5F4LvD2nKj`lT!a zQlBf>x8@RFLZcy3zN|G(Z4KYqEAi6r9?vt>!biQb*m$Qj1$o&c;5zZrHKGa)opyq;3EG`H^|qYkdmag0P9DT~Y5jgd5u3dYNersDheI z_Vi9IDc|V=9rzp}@YCtd#weT}TWwDDta{+cCec2du|exC7EuqSciaah#TQS6rPI&o z_1kaHAG&mgelqZXIA5utXB|)fU18UF!6IFv6vS?xduwD{PjD zD8X%lqT=Lo!5=;*s`%@Hs8+NE8lHKu=IBo;$Q@8zPA^;8YJ*~0vs=h>1dfyba_$Wp z29Xzi2^jWZ|JRrj#N$1~U=4d4`$uAU1CHGEj8ZZ6DkX4(dX28Dc?(JS*Sd6tRY~9d%6LtcN4jSx+xx|v zL_VO_Jtp~kZBuEA5zY`d!Q>}-G?hMm%eH759%gRN6PAiD+hQi zo~iyWRor^K?KXfZo-ADe+Mpg(VAPgZMO4=DLW-B>`Te5jqvh(}6LA5VP)7gc32ecl zH>B$3Rw-5aB`7Fj-t8HU$&I+$(JOKomBiKk-38a8!`wo|9s_HRGaWwN`9|SLt7M{_ zSL7Hij@R{x`X(M#P-DXk0>53JVKJX!vw=;zvd&e3D&lNW19iiHBcOeQ5@+!A2`DNs>;&(~O~V z!fD*mH?jKDbx}CMzA!4fpLFF)fTf1_NK1;xm?ey_80jr2&&b%|&vLIm(eoAaqt61ob`9iLWFim`bgwG>$EFj?p z61jO`KJ0m@1m&xPzru`UJ&!q z{^E%Wvf?S_O7+#T5Y-kII%Xctwc7yhFZjW}f|d3pakXyG1R3G?K`n~&jq7HQz5nz` zn!cv^TlHX29|QM_0@tXBfyn3y^%k45%J&)+sl6ET<4Q7AmlS*5-#BM4@A4C*?fv65DnqCUy8Ooq#O&&<{W~PAv_M(+B!p=job4thF3hfqe2Oswj=S zeNc``|J-KOBB|i6?&zOYvAs3u{ZDJBa2$Wkg5dSge{hB$gSQ{Fi;jLa#g?)(5%J__ zOM6|9Zhrn_u6?h@{_CK(^Ld>aZ#6Ve#c~POr&l|4bCoIahV5Uo@PCe0fe@A>&W%zs zPXER4&#&8UbL?D$%{(eISNO0|oB=iymY91J-^10c6YbZX>*Jh{)M*HXm+H3{iWTl} z702g=C;fBr_A@ED=QF)iLHdpq8KXb!8(w^{xQt!g?NN^FBezh|zs1G}uWquJ-5spX zd(-@ii`hZ9i|(N{B1eW^#k$*(9uOX%#Z`KH|2nWd^62|=D)1^)g$leh=t1@%YtagJq*-DXk=g$=vHI0FnN-WPd~GZC zb4Dwv{xOs|U6V4@0N=ebDd>Y}?!a*#F*qw`#@IamSJ;OiGCH}M(S&tuB!;42k zSs>>M0ZX{slPC#O$P5p&fH>9$FT5s#2_EqlR2TL*9da$e

    UIQ^3VRS->t!&^CtGIuQ;Wv5D~R}G|r>zK@I3}nU6l^vh3mTF)|`z$*08jiKIh_vDdlx_H)WxWm&JvgGAJPW2Ch5ecdozZ}1vl zBSVzcLqd4N=rKtRnpKd!W3loYBI#Qg4nT{VS`({onU(`kjSqbagt8o*jZPb;EH&)U zyn$e==1%b-iW^}I@MHd1Z71^*zHyY3hQz;E^rEXp5ltxBv_x~{D zb1IPE$r4Nqj{`*vCTF7cADk5&nEykJ;LT9UPbEl|d^9P>isV9f3-P|;z6%wS#db+X zU#K<^(x_|~{orJI6{W5dLcZ{VnYwdAo-M8YGU^{+^8Ly+M>fqa~? zIOBI}_D_D2BQc9&LVrXvm;9}tiuwb;W+wEoDW?@DHB~Yc!*7|N?Jfb%hk{%m7~KEB-{}VLl8e7kG^#yZEJ4}p)FC{^ZFjzi{}N?>o;$NdluDkJY>elpVJ}n1Z*Jhlj_>0pe$sB zs`}<&wcqTojTE=Njq1jc8mm5L+`}t=QjN*P^zm4I5>QhraFqM~F&cM-^~T*B zWqiHvaCp%z^wUZM9PoG_5kbsg?u(fxbg_tqfGG;M!yYYC}S zNAH#SnFCI36O%sx%w$lxi0&16v7u!q?a#lV6kve?V_&$WAN5*O)6mp4Jm_rugo*#R zJ-s}-R+alE4#Q1SjpsGL^+~~}!IQojcQk}Yu>!vj0uB0+$HXv_g+G96%WcIIw#k~@ zWNvG?tZL-CkYrHQnVH2}slYC$spkk1YrqTFXG;pM<=(F$&|-Q+tN7kS6!&WB)os$j z`AEO7{xaOzV8w7{s0K87h-8HED6-6Ng)%&$!mu?R4oq~Z-U zebyofIBy*f*bi_8ww^2YGfp7O`RJ#>5CqvS@gxh6pFw8M^KB0VYHp*3K~@zTm41mB z2A>S8AA1vwMbBlOIc5};l%&Cxh?W#dh*wak{?N$ySJC?`6z4_-EhS;^T5J2>PjDP_ zvl9Q0r?U==s_WXg^@vJ|bSqs0(hVxoCEXoEH$#J>fOL0BcMS|3(%msM(m6B?4D}tK z_kF*AxPa@-oZ4rdz1O|&-?A;o*;L#SwhfOd3rtYR+lIGOkeGVxYcG>HSymSK3s3%ED-BFjLJ3v?@ETqn_tXrzYN{9kEyJ?!cee!X#h~Yp^)0QGe3>PJ~ zCZ3D3N#8B_2Sxhui;&vc*`BZ29m46%{4Y7oPD=? zj{|~FwOD=r%nt`e9y4r$>|eLohOAp9?t2urCC>DqaCG#j^-kLv#9ZpQRoL1nr^m)b zoFqwh?6MRRDry1H_;ktaGRsZ4c8XyBq!$FAFi~?Yh6DE4;lGdc?fgH7sFw~L;F54= z#H&F;qhHZAU7BmS8I|B+2$_$B0qVxX<`fk4T|$DuO_F>anDWVDc@~Uwk98DbI@hbH z_P1c(yo{?jo)4;y#_GM3K-zETkHFI+33D{*LxcH3nVUh$CU+mA{euMCdg*DJ=G_!xsJm!anI;mnM$sZ+Tr@o;rlZh$tAPcOJXM4 z1qT&&NiM>5Hh)qDfwtx&-{I-F;0p#n^j$bl)w)74)ddOk@OWF)Zzv*lwFU#*!D~Sd zE8yn|xI&CCbiFEb_jjt!^jlXYLQUZ_udTatY8}v#Qm(re!3%xGr^p)edcqS_@@tMp z(ltJ)<4d4rj!D0hN=7cP^qo6)zk=Y^<)Ftk>d|q(Bah3lo{za z)`G-j<=smP=~^p8>jji>$MHJ3?uJr1MU^-7+=(12OxEMdCPFHv?^00xF8o~~9%olZ zJ@6fV@$s{ig{!d-h7e7U6^1#g#5pasf$w-!K z-@sBV^hyEJkE!0ZD-}6KW26hT5%#V2@n|_PyJ*!2)tTF)tDTj*6|%5{>>R2`p=P3I z-%SmQsuF+3%((Hs*h-(2*dW&18j6F3n~~M0zJuiynMXD6oA2np(6~doPm3p(Uel)ib7Q0pg~((YW$+&V$Hi5j}l`71o{ zg9rjukQz2k1Y$@-&hp+1$;gqO~?*J*F6P0OWH+Xmjivev%gQ_2{z^$=|IA=46Kb8=|&bv9yk$Y@!s$G2u z**tuG<0kPLnbF0Qex7Y>bs~eeZ6+T)cMfUU_OM82SmI7u&Eb08Ui~${{+nMLd!5ln zyb1s~pB+8M1=hjp<~SRx-JJHkmx%f9fIn_c#m>Ma9;f}je#PN;*F8pn?SzU#~u6Yh!csQ3%**|avEwR%{Ghz~SB`KfE{d5SLjN-e96norK_*TmbiKhD)jz&DfLwBx(^9?3 ze=Qe&8LfO%8*ON&J3iL6D)FdYp(&}Z@7qQlR{V_v&dAzJzEHvH_XbsI=vB~v$e63{ z@14Vrz!A6dZ{0z_f&ux)W&CUSjNK{tWTV z=vG*LW|!bSYOT|1gwPk2dBa*|QPd(7H{fvKD{^1w`k7h1!dA*@yX&G?bpTF*pzi1= za-H-##mP@joVxZLe>{Nllyboq=Kd_MP27(-rCho69x+kS^A zYffowZA*j=)<J+VEf7x%7PTuF;&Y%*-YcwT;XMB= zGG2_r8T4D0AKBqeS!K=8tpXQJjTlsvd)nD*e@kvE4pBZLG4`BLi6+Tvqs0lrJbUS} z#JwkHdPgTuu?dl>pi!Kw9Y`938{GRD{`(&BelItqr5k_HX=H*M20}CtfB99}Dmai$ zxn^khJ7ZD-kHLHh7WdgDZJg#;}thboMED?dpqld9_DrRkJd$PM>_ zygd6{6*{~yIxo?R^Omo%6NA*!&)V5!q%mOfvev`Pzm5aAvY%x$*whKklTssg83O>pLXK&YsN_CVZGlL~j9+t1@kbsNzw zp{!((&v1WQ6?^a0?fZA)lc(Wk6R$_s{$|RYC{ANnjW`Ae8*EBOCNf2hryUiS1s3#= zzi~bWI~oXs5!`dQU&LxlXweB+3>%_ZBiW7s%N(b`D{uDY?Dhe(w%$vqU|<4)yBYWK zMhzYMw%>4w9=(URmEd57>BJjn3YeM|i@})&Yj=N3r>UNH_rrbvD`QjZkJ#MK{YIRE zJ|+ErHp3kOp<4Ml5@T06kMyd*$yttZAHYiZSl;shOh%# z_cqR8o8|D`K_xM2yCVz0oZL@sOL$WoY1CF3-n+4|F96jn1I?6AUDQ#6y4d)8uG*D6xEjexkjy|sQ0RO$o55a3t3x4?dx+w9cR=mk1xuS!jlka3uY=h z&q%3td)Mq?foPTvm>R`&0~@m6h#v2zWhU_|hSH1YjC$tjF4y~KwrYi)On=bMQ(&Kj zcm|#J*3T2?Hvi0GYpiEO>PJJD&jktAaE`<(;FSzfnPVKX-n`O4`nt zK#KKZC={RhV<(FgJEIpBW}sfyP?D5wwraBZ${vjF4n0fj{yv9&t=0SD3?cj8BkE)8 zAP*o{{K7L)e(R18$PLJu^?LBrL0E025l{S)*o-gPr+scG+H3Rp`1SYiV$_q9 zH>nMcZcjsDHM8|d7JkdsS%l`dinU{HAn~KqnjU&0V=dW-q>L-eVW!`0#LBee`nWPd z)D|XVcXXRzl2~3f^!pk59c@yBc{K6^dN?(<0r5>eql2j5vctP2J-P*uoK$B13Z5)Q%5ecEK~j?K zs_!TA2=E z%P)48iN^;W6meObG)^u`=WJXm4JUg&KQm=tZ3?|N?r!}B4}2W0a?~3#+O|CwOIBR0 z!q;d6a#Nls-aE?5%Zk{8a$%L<6C)ej>(COW?8-A^ABqDyZPV+hp1xb*?K8lK+3TkyZZj7fUlqae6!kQc~oeKb=|)Q zerrhhmh%n7iF6^t^9zw@nCR^S)5iU3&!Z_nUGYj|9(ij!2JDPLWwy)=kq=kW-1kri zN+`o&84g&pu07$FVpog06SoC_e(@*J+TfkG>Gpgi<;|^Pl)M;zpUyr!8W4wkKHA)8 zJC{~(=owC1$sg>e=im6D-!rg!FXbKveLKX>`$pI{A^hK{0^I3;C5xncIg4*iA|XEM zV|p+wnA(|EzDLZDK#k@q^k+N z)pU3)s^gE&*`}1n#b2xojQmL8oJtPq2R}qVApLHiz~o5^i|jj?!8C!tzifQGNBTWv z)~C@aasW?hQ`xs$O}O_Br(k*BxR_|SBG7F`S$?8-lx4v)a3eoD>&KlI@_O>&`r*TZ z?bOR9iCbESB9oa*q}-YXwJEfr zwd-l@KI^KO*u1B=QT*03GTS%34K~6!->8(^H#miU!mBoCMZSbspOSbkgx1@)$}gL1 zcsE6*k9eGpVNAxpi=j9$$w?s@^wSTyT=kkd#~6z{{|Q2GHEI}Yb)X2MG@qPK543PTHqTV6MNSKK>0;D7HAS;_6OLBB z=6T-dUYVZRn&@kC;a0i`iO-&ST&!PjnE!4rfejp7{_xAjjf{P|~J_VB~<1#zf=j*pxxe-#3St|?8h(G^9W3))tvC#w3f2D#Q+WT^5 zMX^_1ea7v%@5j>nYK zbXYIrG$2q!3J?zx#udfwo);AntHQ#{sc2p|2Ll3%nafmsOhSr`g5#2V8^@xAuQpTB z?zPOm6^4>Q3^S~gOUA0#V2;{fWGA8d59*?FKdi)(bD4#wID4ycpV<^ z;WTWvg&%1lo!{-B2xFKF@+xF3%aUcpIzRprIYlq+k?ZwQJaw&{U9+zUGkT%NW|s*U)XDs>#0rNQ|@T>Z_vQ88(@Qev+;ixLo>0N>BTda`A* zkFdzKDMID+j($e_FH^g4rPPZTslh#f6`NkxbEm{gBMq3=4ya^HGZy9|hp8UWz)@Da z@6;jbDo)Ij`K|KY+=}BmK51$247f(!(r*8w={(D4(yrmdB*G8E8{Lx9wG6MUB{Z-| zvTGS)E4SNe%o7Ia!2?7G7Id-$sRn`~1&;2&5|aA9_>09B+WTG~aIZVM6BaCT;O%O# z%c;ooylZd&oQpR&xMx@TmY*kWEXvS-z1kugL^8P|;p>E9l=beJcvp;p=&80!Dxp}R zDSD_}mURE%1YAkNz0l<-cv!XSfqGK=@cZ&#(=K|}@5>8p9!sj>Ou@WYv>&o&{SBwx|!C4 z;qfnjGMaM(&_+Zx{isw|E$pbSet`yEUD-9OiOX;^rkGuog=ujPmh7ZwNgK0kOX&}| zIO8Iu2`ASTH>#M{S@+&MZz)>r+lhHSaKD1{gnMgGb5WlC07$uQ3XvnjWHq~asnP2X9N2ya$Ec0O(A^ykIU7*B$4 zhR|KQNw$!o5XRK2hxUm{z3SGS+d0g+euRb%fBdq1*|#(VG3fVL z%0ueEMQ1!!Pc?VuP`yM=iR+tE6|w2gS{h32UD-@qO5noXEKMBg)lqJQ8m=OnUZH|= zmac-a?2A=Ff70Omk5DfhxRfoMGK(mG84E-lQuLW0-MaG8GF`k%m)@dGbtbuI+z_xmbk3a8h&^kTq*M|-Da1nwSquBl z+O#xzECTx$zo%`XP3nluB@R{i5q0OnHpm?|TYkacD<+u+xgMeO&NKg*U}Jg`{#Cxt; zZ1=Uc-B<`}ko9ZAkZb7RWmtr1WEF`vshd}j&BJ)qw+5}76682J$DCU?&*s~5yh@Md^7$2}nFNW)u}w$Fadgh;F6 znk9Mj`L2QIJ&K&qnOkOSem}C7bjX(T zdS7O$reT+X!+YqsO6B^aFfS9MEJrueI-+$*+XDl1_-}*)uYSw2-0oP9bTf`X?BAi) z+4U+`tuR|+nk*<&LlBF0>b{ru-e|r*Q)#_Aw^k?RQ)bw0ZW4O7p83!b zR*sFAushPHHvm}kED>Mg|c#{>agt*tv{mYCz3%TMaIjKK9V*Lx&n=qy=ue;5|X zM^+srA|nYaI&as+tYkQ6+kA)>4e%o>FaK%O(SUw(<|H&BVeqjCoWUV%^xYg=1JoLm zh>{cgv)Wbn`}g~;DEr%AUNJl8(I*p=%UX+S?Mi3yx0UMYPZg^zf7XY!4*SaFTLo%@ z83_+~*NCnI+04b#>kkYD69;=2%Db_I*9rir+sEg!hgCZ|@)`JpJ$o9?=6BjcVox+H zs$woY1s0`u-)=Y3^%PJ#dcXy?O0`E1C0}yU)*IT`Nhfrmm9DWQSjakx`<2k~fXQch zWYM=vqtBLhtb3rsiYv`lo+|J6+YE*grgILfSax=rL(`^tJ8M1x2YL;!owxAm=9h`Wxqe^G~5?xp5MFX&XQ`Ur&;XM(r5${nDZuPV1rlg8=7m7Uu7I$shUje>wk` zV&jT`g3DMId}T!t%iuQF6tu<72SEBU|7654|4HkiAGsK1R(xzUmp1&_b;iN5b96_b zpNw6NOjTpO@60AfuS~1vFv;7o?rPAyl)FjZ*5nbhs9u<4yy=j~?^}CU_pD>G_?t}A z!4Xcpe$pMbg~NpJ8K>^WA~&^Ai^-Qx^JZA(Mgg7Rr1K!G9g2GSaPQ4Bti@7zB?QU2 zyRx*%G54eY;zfwbXw@G{6J@VFY9G%p0~W)kM=jM>t7puo-%uyU||p)*@q=in;zbzi+4g76fD$) z)2TA*d%~VacTTpKiWS2KccBHB4juPo=Pu$7KKb$(+eVL3iRK^w{+#gR(DEPP(4Vuh@{;mGNNnMCP|M#TAxAa* zEW`VQ+^+RDfx@f*1^L?F_Yx8aIn6CXixKbC`hZO0ar~q^94{qv) zle@KVF7a+uFw6H5+2*jpmd+tWEGMSdZFg+<=iOwjHxN2mF8ScZHd_oFABFS^E^K9i zJxiOBKXD^gt*8yzDfx0`hPpc3HH-epOuv34@F?qK|1=bs?ZM0;zBTkr?-~D<4|6=w zs?#ioOe!FD~z;R6v%b)8Gb0u@GtB720;I^im2`jMW?;fvR zdb5&RAH!VIHTE;;&bUv&ew-8Bt;WZ~XixU>19``g#kGRMj97dfx5U-S+-DUBKDsP! zeOL*BhK;5a20j94ohsC!0N$^Md>=G~I!8~i6_ScM&}!nvX=+$G=?;s$9;Ej(TYYY_ zM(b6RBeLRs!d?=N9hH6j9M6;7@cXdVUfg%Aw!keK`UF4zAT0x*u>N>HkF-^xOqJ66 zLr#%&*o%dQXeS*vGdv#Vwym>@?MnAve5Oqj(4Vf5fT4^F{A!K)*Sh%@U#7niv8MIH zIq$l!`aE=faF(BmOoiuGcEV8eo7cu{q4I z5QCN8PB3|BO^eHznQf1a@qYlD&^E|;gqEb{LIoI<7)zpfib}Tz5(ljIvMZ{;J)za{ zm8M;?F4-tM+9@b0l7W>LOQ3E97>0+5t2Z|o2a1{g+TcTa6pBhErlBFs8)nn3 zs_N;brCV0<&=4llo&Eh>7R83rC)!@mEs9vlfsIcCu~1DZ464vg{KUy=xTK!+H<{)6 zOOeJr=xLmdc>IO3mR=lle0+MUVyLRRwS3V43{*mi_|fRFy!Z7ibUgkn&$C5WU58Q_ z#LZ-euRawX7oRDZ!OI(IUQ}8;&oK?%o8LL|C>kb6m|%p>p`aaquYe^YuK=bFD?Qcr zylE=>!pbbkRO0pZve79XDr2pa2h}#1kPOzP#AMse(aw*oZA?5@qK=;yGy4V=ak7lr z&C!RI#>^8G4c8X!P-{#`rmpc97n~B;qO(Tuhu+K^FqNF+1J3XhI#BVRSXOu9#XGoh zCmW0bHn&H$$#X*h+KO1{snN{SRu9k(W!qinwN{!4QdO6Q6%!1a0UylLRbMV7ko7GR zPZn7&@4n@=sM1mk6}KNV9+cHCb&n-5mm-N|{MOnn|uG4}N25YWj zL`?tg8%iL%@~b;Au|^X#X`EV2fR3kK%JO9>R{Cct=Y~=O--c&AtcN!O{1~zW*x^@MA#!aMzOt$u_GQ{}1Nz+%2lnaHE zT-Vbi35jR|iW2k5W9)j!bVm0B6fejc?(kA3zlsL$29#o96=Ab5L@rfi*R@@`lq6rv z9}xc~{^}>W49{6k9YY1%YB8+ceYs5FYO6MC&y|8auleTahYZ{N$-y*5-EPt$RaNmz zzuqOlJ`CY6jM~Ffl#92?4@ze!W7}>l8+Ts@&AWrSV^7Yh8f84l?VeA3i68P!2bu67 z$OVHdY*w$$f`HM|^g2q`wnt?s-bV1!AiJ>Kf1wG&u(=c>=}A53t1+7&=T2gG8;nhDebaaobiZ%g=G(#X_RHk3E@-xXp4^|mjm3n2qhU|kI{;(3e)Pg z+aHbdauK$0ifY>>I|WrO=2c#7C_sv^JmT6rds3wgB|a39x3k4$y&3-N>!qzq=or(n z(SXkBAOEW535pFAFsUu`6nw#EOTBl96Az6LR`|uTlxYng`6UcyoS%`bKU|ZxlV=}6 z1NENv*WNgC<4G=df9N-N9tlrr6dF041gmzY`L3bC( z=&JK6BG-BTUO&Oc7$^}KBi=^6Aib!Vt!rG>UZ`ROl0jG#^9Sk^qyJScCu9jm?Ep0P<#%eX+5WW zmp0^ewwy3}o{F%qKm#(b*`MN+CC<7NWg5DS@h{7nw7CSGzC%&UX*L+W0963BQ4_U= zC{&(lU1;Z0r-LeLh0;M{2vSel0CtG%D0-pvh4S=TdkG#q-nnx`ld*z8KUv-srLR05 z)EWLrI+T%YN^K&j3ez!0B*S1%5OBvw+{X}b`l!O{kFs?`-5|`?j_8M$;Tu;5;s8aL zmEJ%)beC*=lBC*_So^igvdj)Q03f>PkE_|K$QGv#a^8^D_O4V^3$^RN*g(_M7|&j! zwFm6jWWaAlx!pWecI3+Ay^Key(xIbju=@Qt-OzG0&|jtoe^x^FdjKBe;w@*Z$6659 z1FJVnWjCU={k$SrtUTk0{7mvnY_7<3!OknJG(~`dvg$A8q=~JVA-J(18#}eyP8Ggc zVxgomVeFtp>c~p3TI1d&pWprBAkGFg6VwObWn2M}LtQpAL!@huRKK~$sYv{VxbAy6 zu_qO(^L(qIFa zwaXfX02sxZqXMj=k(BO2&5_R{e8FWH5(_Y5Mn{hR9vt&KaaCF`!)#MdK_|Vv5}Ahl zQt_e+&y*!O6s}}^tJO-`TauDD0QV)G90Is;aort9frncUuW&=e;CA(O!(SJ*O5$cj`aJ%>o2h)VzUG9mipVBY z-L;QRe%|RkBNH+Gjr{-=YIO!R{J|0be*VOFXb4YbGJ4o^g^^=FI8Xsj9<*2p!M?-z)hEe;HIM)I0wETGtpbL>wYWHDh?o`bi zN8}W6&P%VDb49lKz2a@WMi#S5vr>Y3-IzDI*R>Ql|H(pDstj3^Bg3-@uQ;{=3xQZUwDC~Ij%U)$SlKL#1dw1mPlf# zATTbsYQ2C6xE-+19}i7?Qfy@g?S8EKqU^{?>>P`7 z>Gc{Y0g{YXenN5sNPgQ}&!4 zYKY<44NJanO6G$d)u-2BzjR4neNwBj@H=hH?ba4j0FF~y0=GMQ1c*Gvx;D2elvL4OnLuJ671VWE-`4MnrMe*cH z93W3Ub;kXUSksijb-rTLYJT^Ov_`sHwi2sxP*zHDZeoplw^(_%j#qq*g^`%MqoAc? zIsj@R2Jkh}>jOU1_mig5+TMq`09gtEWhJ5FmSk#u5-A35ikr}Ede3Bh)PCl6%eCzT zaELQLiMTuM1_GJz%#a^DIFCX3l^ZKY@)xaI)R?|hp|ilreO@{Ipo}|NfCOdkIJ63 z+nc-BClbtYv50FQFZ^xdoR=jgVy6GE{+-osr@gmuO`5$Shs!a0*WFr0%-*TTO(XI7 zb=GY5T8!o1UY-!=UpSbPwZM)Q%W6Ojh!vDMl7Z#4ZqjomA)z!rFR7wMpd{m=@~U_j zVe^*XX_4T9Dp;bZbl@Nt82p1pR=}KHHLsMgsKMpjB_@%mB#t)*njn%gSgkf7Gg6UV zP!b(i=%{0A^8|C^%s@@y6bLtF8nqKkx(aDA2yq!KgtFE@(gSAVU*=gZuJQSq%xE{& z$0nCPTd8g&fjH=32PIWoli_bH*_omWacA23iGL zdnFTI#PoHsnOWk*(vpnCkh>PnI9p-#Kru*)TA9(_v){tu2Zb&SxhhcGXh|<#bh3UU zt7GKDOAh;|i(&2!G-UVClA4ppO2}RnV;bQzMVMC-7q70YXy0U&8m=If)6f(&asC6i zQp|<>**WD>d8JoFRE?hV{hm0&8N3`R#kC?NA)KlMGN9s|x||3aS{)Z}psQU=oLON) zyvD*uuo!!lOl|%76KP7sMi*lu?XDP8Nfnn$UTIthi8SQ}+b(sqr#Mk|tCimYp`N)5 zf1q7aX(DX)-K4R93<0fAT$@6u9%gQ{YJwbP+mtcslDt{Pl=1zbcHC}`t{P>(M|=!{ z8H%W`zlLG5cTjuCGl0NE1&Xh&IxT6b!d4dFz+}tASf^VaSpzIr=;PI53yQmkPR)T) z4-jTTthBnrD<+N6cV$pNz&-m0uNi*0T}OUg6gbd3c-F(NV(sxZwjz{;rh3#@d=Senu{MuhHHx3P36IjmSa zOflZN>VOus1$Uj$GRV!2ybAl&Ku`^Qn|z*JC~YxH3RHX-K#;RW18U8#7>Gp`Ygku( zPt&+i%hM{>W^oT7SeHN*hp?#mY^clnsO4B0;3p=~!;P1L%W$D)1^{D1rZg@FP)vDH zs@|F_WrMu2Y#D=ifeDQkJYSbj0rswaiQJX7gMYxe?HVc5k@Qt@##QgNSJF2K znN%>TwVVE=ql2y|=A12~6Wr{Ku{3_i&Fc)Ri5j3$9OABHh_Iw?Vr^8>NgZUw^{J_W zjnB-7w02X(l%yOtRTA13DV*Nrap=7%X2fU;J;@)x<8?Szc#Rn^O*R(0M?F_lYhm2H z|BN5zDkNa_Jz+~Ky~d?>HFoJUYI4wqQjngMe>dZUj}ae4xgp0D%D47eTJIlTx8#BS zTx#;f5eRNah)%GpNReuRye`&Io&bU|LB;vvX z&X#X1qVyPXdoDwuz`z&_h%Ee*d8{6|`Y9n}Y0285zH`xHw1B1kLWKQt-_H4cgazcz zA?ZS% zRHkXm(P{drX;a5Vi`2hl7gNKQ%$hxkjEr;%TliuG;j1GDy#!vbsd6P|Ma&pVC)t{% zeyb+Wz-RciuOE&v7UK3cu2^E{S^P@H7u3>l0g?d37C!5L>~t+3P<#3a0)pfk(_F8z zFMfd956phj`a8B|m0Vtl!^lHw452%=Z1ju_K??u1tJ$4?ies{@QdsxJc_xWah55T9Sl9rd3=c+-zWKss*JO%60riPyR_{W6o z*sxOn$tqdg+FJ`N@Z-ZoAZSGWsPgb{M7`zg^myZUqlHKo9v-PI|2K#tKqr|LaUB-;9osA6A1HO&9F#7DaHt z>8r3v5u>*t?yAle89ySesm-5`DJ~Qn;nj8d=;)VbiePTyHCUh8)hV{NHv^C7@=FTY zfLNaMn^+x59ajRyp1t!&D?jn(1xS9L`?9nVKoQZ2Y@20E8@yqcl;sU@{CK)Aq)ZQ93r(?Pl9^Ko5*nprN@m*T;fO;BJ}E@g7QT1;ojiNdxQleEt^PP zh~E1>Mvw9Ji9*c37PP|c{2m3nKmw)-px-_F3i%nOqEfLGK-^2T+(=#*_CF)Rvuw97 z2tk)+aa72d7nl9Fp72(Wm?EYHtM=Fw`Ht+UXPlZ7OLeW;4~;pI6+FE+U8>m-)8sg9 zdMyAoI^j^4soj(%F!lmPHLghw!AoNn)L9Ke@~=NaOPCvs$_7Ksm^DGw6?%~x=G2ZO zl9~mxe%%w|WG1C*9luY3#FIA^6N%Fe_SX9t&LEDY>ytz0&B* zM9N7hZQ8|P-a!DT_Jm^u*(4Q&tW8nP_4y4KT)MX&?vstHq*jHQ^}Bqm|BsaCnshbyjG;(3yxZZA z-m5|Sa={uN6P2k>Ned{DFL2_`#Qmu^L1#_?Fi;`WdnwQGxNv=!zQJT%1+KYbL%Jk) zSX8E@QSzN)b%llft5FFksr1|x^#v;_7cE?og0frz#!G|Nw35|4yg+?Pl_CX#ZQ-Ia zso&~8n9Lf|y{*Q?q!>2RMaA776l z-V4t?ZnSA_t$Fux^e28iT&I^(2-3?oyjGQx-XO1K&;w6SViy(37$JLL%IUe6^m<@S zVc9p#`yMV1BrrW&>JC6Xe_kZz^AnLG8^XADXL_Ue+SYAi{_wfCbL!7l=SQ${0crSo zq2wzyOZ^?`UiuY-e;F%wwr;Ul-@2h*1Ftc~6|1ydNE!sKkDp@5a1%w|$)PFZ_w!B| z1m(2LB>-jAOM+M%dIr9 zz;$IY+j@Ck326$9_I_krnLZ;LF}5>_nN_1Dx_d@}J625F2`7r))h@U=RLkQulfQ*+ z(GJD?x-Yc@(U|H@T9kZkTlJa~3L1PSR8;A}@btu>3z9;t_Ro!pi?TS@VPO-hJK=~o zl6bf@`kk-CU%F{Br^AyCZhz}XViXNyPa5EaUguvArCe~6${(tXvn?S58y&0e)S<&` zCB%ab(NkCRuYJhP+KGnmlGxf3YR(Z)Ai^GPqR%m0;+<@f5*|wgslrjq_cVcCMc%%F zTAXZsNcffwPD{=JhR!DSK#{-z=Lk zfg%-5D{`&~BJC-J11O&T1+YcJ{q6$M#iVXr2munuYvKnR;eA;7u#pU2#GG5_+hF8n z)$k>wt&b0$5Pz2ywT3`oiDQ*ek8on_!K|G^pwRqB-qzkaX!m?PE90g~K}29YA!bX} ztgo}phCo5uHqu~L=`-3p`ca48&iM5Gtu@*}O9hU<7@P|n&&e0^-(^)VE~Ae;k}ye; z(F?Kd8Cqz42Mk?G`}CK~gNA>;lU_b}0&oC3>YW2U4w_*XTE3G_Hwl_Lbtf=fA)-Ea z91b5@$4y8w2#c8lQJ%dmm|6Pix8yE8e>2UMyVR`;Cbi5#ejO)Y5ep0=b(pVx6)KtY zc+C{!%Ryt%Kd3}YRZY_fv39(1jo-^=&ip$%^ml@!XGet28`r8UHhDJ#;li2+XL))? z*Z-um?+m34rpxWl^P;NvayIJ|E}8Bb+`0dquZj=_T9g#!8HKDLz7Qb`3=$|}Efg2B z0^okaWvYo>R6V#a0mP|cWn1`Owr4~2#&=VIJk;}Jea9k`W@DFehy`2uRw2Fn?pfvx zc=|-sqxknyOr~GWJ6U#&(!*|AISf#Lj7Lxt`kYBo@AWKCNM@!nLNW6=XVnhMWMFvA zwgT;*!kcup9&d{0gA4~P;Lf)A{s(^J(NyhTpA*xWkIqjh_y+D?DDaq*Q+{zWhwrYF zg4La@7gbDHcH~ZTb2HMF)-AIkYPgl1z5&N~a1Rnedm=pv=$XqGMSc_&@$(w=ssAoq>xj@x z<98*K<>b7SLX=#lFOQ0_Ja=BYQbN9N7)D$Pq;UAn_!$j1R5FqmPOqdyuCrQG;P(pz zXiVaJ_!CRG?bn)-CNt!FZ_R!l?-cGSU|5xl6XS!ow2dHSru zz^Dm^Q+17_>029LqL_Jle#l;HzbKjJEh2Ev-TM-J689dkyKDfX3J2hz43EtlcAP74tN$Xw;NShh&o&|jriD?>I3EmVP-RLz7)oPOLJg3l;YJ0#7KuOKh2?W4MYfYiRU&Lq$?*vkT+H{Et?XYMA{#RAyS@pE z2>EzYEM2S9@WeT4Tfbz6q_uAs5u&-c@_UG0citKDb)VC(YR zET_lJrAc?(E50|N%!ggSQ@;oJLpuwCS+(tqNdNo!dCeQ2r8mQ6NMgO(LeK@Bx;Z4S zPeopLI<>#8C^JUzckvx|maS)6GwRvpy^B7oB+TiOZS2q74%zuV29}NX@3RyBU4Ln# zCq@jOqIE>c06O?R05wE%Cgeea8Dbe&(nvZ)*5_&qhk_BPZ%J6P={;6!~lOp8G%^oQ-?&S37MSzpH0`J)M)WXB48l;Q~eKM()g=QpTb%GZi$ z{?f+oKU)udah`g;A*LY7ttc?~9Q${v*>QY~<{NpXGW`*^;pajDLE?lQ|#V{|N9YYrB1+kT;@V~z}CHf#U3F8{}WY)pAV<*kuclR*Y9~)Z}W|)Fb?#! zmwg<`CM6JaT3n&oH%_m0badxj`7H<%I%i5pe#DA7W0Cf*30o!uVRS~PQiO$Nqg9RX%|%I!20y*ZWk)4-oA&tm=`jWCg(HKlR7RD6HzvF+$tk z=v$8#)JvS%{eSSW#$!3;YmqQ6>-mEoo@>2+aN!@l(=DHiX2#G#T7$E>DPh`f zomh&zr(1Mu1zuUF$qbNFprP9E|i<{Cl*}&I#UCbO}KYk@S z?Rnh)33O?-$Bkzu?H3fsNN|FrKc}Y(!3(S#Qwb|@S#fLJlJQJ`p?e4Uug}xv#O6qe zm7iX5r~4Pr7EchH@UH_zCKG+)rOgO=mD$bZlF%zwGH0wQ8kB=eBIYQ152d5FOeddB z?NgOd+%Dd}17R9bNCzb7r6At8U^NX^Edj>=}`=pb%qM@C6Skc_OPp>5C z*xDkwHYFW=BZ>3r1m7?EeXDnxBVBq+{Em0lh{bZnlhdrG$ug?u*iQ-rLZ?@_Re`M_zT&j6V zy)tAVeLS^HX^@=}RC}qJ*(5o(p1MPd=&q}YM`2JZXbb$RJI00FFp)j}zI$MBOAXNw zd(rClb=WHL|LA(lsJ5CmZ2NAhQ%YNCpm@HMq9L2@u?+xLdH|#oZ+& zI23}r>$`hD&-1SJertVy_>;`uWcJMLnd`dF<1C0jYi*DZnnwr(pYjf6?K|6-n`cwb z#jVzQB6-i1<;CUJ-eQn~Sj{TuwbQyTiL^QJ1|WBj8g@+PKVwE;5!C;s);Q8qy`dS^ zDO32=BK@cy8Fzc`~h{SG$La`5Z#MZxPW~b05=##Fm_OGLp3A6))3=dy-GP`A=YE zWUT2$XTl!E9mwXnFzw|ix5cQtmJKqhm1Xy%OMqa*uPiWJ^kJ5`D*RPWtIJ1~_juDI zT^E1Y+UFIr>reinZC;s}*pB3Z?t(I|?bEtM9`jc?D0W`Idqy3@+0OXubv%`znmi{l z$Kf2`1|O(S8wfMzrJFuf;3sPkt>oRo{SWg=;482}%d}?P*sxz2_I9x<^{E?PiGTF+ z#N}=52f|^U5!L3?nFZO;oPCb=1%48Fiw?ib<mxw(V^I;*cL66lPFtD(29e&I77`r zh(9ETZ>XN+<8=*56SjGkcLsK7Qe5lTU#;Og1^7+nd&0Z1K{?x|71O71D`w9I1KH@T!H-oc*D zthXnbeI@8w3V@^NQ;JQg@d>pJT~}yT_X~lFYNjx@MwhQS#a%1z1i;IDh|_#^OcS%( ziL+8#8#_UAGxZsDn*M@otFiCzN$0m690t}?6(3fy>UQ&fQVhZy0uf!i))fw&W0ur> zI*TA8bca|N!|+Pts7 zWtFZ=;(NkFe*Lfq#3t>-E7SbLwzHv^QyY~*$IEA*A7%(pu&aD5R_+rznEgZT^^$c1=QX@hJBn5%(<&wV`<_xJp_? zrwGH12Zn>;hf67R!ufoXj(l>18x^vSeXKRTK(aY7O#5anW`w!nC=7LHkcwsMuylD% z2)W)L`ip4t`abm1Qwg^ZF(KmO^z6X{VPn*BBDwYfJ3%jy3I$)*NFLz>qo}@%ccUuhLTeI3O@eNkwka^NPib z6@M>&A=?;1&m50({nJcbcJy^%oWi)ZO#wOoZ2~etM%^_t>LOvrm@gwTi!&(GE^GqCf0ghe?;3Yt4=5-~VW=avSL`SJX1;KhOj)~Y&4?<<@H{t2$G<;#>Lh<1 z6|XN?N{YMq4hKM8;fEaFI1?QPQ=$fk!(k z=-RNSCHD@Pb)Q!cT5bETI3)VsEe23$w5Xgheree!$^LYhFO|>n7L(%XTT4AIkWZ}P zaG4VNr_9R`#-POW3Hg;CA}|2W8eVbRG2@Lal59!VW8IsraLlBBBCfSz%O`O&OJw-) z`^ifWh1u_(Z|oxeEX3}IPZh;kvon94%_G)12mj2V_S%`$fOr2RA|NMgo$+Oa%%sh1 zrpcW6N#K>a9TKWgMql&rM_3G)m;wn=OVBXhAE6J8wsXXtcbtjhN@ zTlgcMbYb7VmpLjbKDk3Sddi!1HvqnWOLR_9%36x&{krHtc2 z44K8mg~=CqKX$fA*GR(e_3gL_Zgisg7WS0d>$=O;+$O;Xu}Ar49Q);Io)Pb{(7|zc z6#SK&N&k6yM03EJmFu5B(zMUwKw6b=WN|gNLEa_OjN( z;g9X>(uot{LtWO&$9p;33CnFR-nu6zXRmeW8Vdv{1zwWXtDJdEh;{nKqhm$bg zxd6wXz4DN?g2Lx~vSBQdsMf9pG)l|gUT<5Jj+~%>_Nmowq(FvR?K`>Wk^=i#5WY~0#DPzX5HB3BdW<3D-e zv}^JM5gwU`TJ&5is{kFaj*Z9(uK6&%OleMDOHS!DY3XY0%P{dJtExPDG4Qkd}+QSaw-=zZHX;pWjz z>(9<`()IU|A3T2-yLIl|pN$Z~JpH(~IsS465k%n~4ZDenMF#H6YZcle+!^eK_qSn{ zipdk<>DACdO=B4UYiX3G;8m;8`rYi)hI>veoE-G`dH+An3%4ozHX~j(G?(t>$1}tG zcNM9@g=xrp*BnypD&&qw7H@kU_r^mH-fN)7>dV>AGlF%%!|&-wsX;;u*jm}mDU+7J zGhHi|Gi0@bg@*qHz9p=~aF3dV7Skm*(`Amx6^^!T*fKYIC(TpkMiul~1x9}w@gOir zr`wEM@ka7I>ci=DA9VR!f|0&&p5IA7Vf(cMn z9RE^P)`Kv@o?N}Yaj~o-sZ!pBpSY2~_Ub)RN>3M4L{@uUUo?(u?LkjhCU8=R1+V~S zxEu3CQayE#q;$mB`~`yAf)RsV2QH7Uj+muzAmTWuJRo&F#LYj{wqN}OI&CXFp#=3b zv$;6?M!NPUqGV#!aZwxddd@mOYbWqzv~`J|pw32F2SK7#Qj_dmoovUZp3(&UeIOKSDu zc&gszr@IpeW^j@vLv#doSw7j=9$G0_rHa?cpyuGgEw-^j7751G(9G-e7?ro#ab?Wb zE0aL{!U^lL>Xu;}0{B==Tn>PipJc0jw-bOgOh!T!urYH_S&iA(=0vRe7{~Jt$Eh9< z2M!7nNlPs$s+A!`e2+;hPm}s3M?@$aQ@ih0a4w$)AeLP{D7MS;qt>kle{jr>O(peB zU8r*l3ak>IbqaUa0rMV zoE!h7Pde5vr-*->!k_%Y&%WA?q1o}@(v=BCoGu=}r#RZ85MI^?fU@X29_A#XQcB&g zYB@Z%eiMGVLE+IkYS>2jmqXwBJYUx;vBra#8sTLDeqoxQ1R~+WK1Q1mGFMb~wiYBr z-Y$OiJ$=3|zO7MffaTc?F%RXANWWEV8X-Iiq9SDGcZz7b@+c$l>FZMGT>#twc)Hp- z;*BTI4^9rertuel3NJH&UtQ!@U6`=i*{te=EWWxe;U=99&9UEudS-`9W^#s2=dn^@ z)_Ifem?<#NmT}EF5)Tz#HLP0go!@MA35g;QW|zCuHYl)ImmRq*wff}uO%y;$h;+ih3IsY8TDlVU@Rr_O2xUOe#$xiox#}m(+MWU&zI_5dW&cZZp z`!Js>5#Fm6Q+GaQewTpgh8M#GgDc|wPA8~=xg2Sd*=cU4*3^-Z4M*wpjD%`)c`aS+ z770)VYJ(T+MUTDklGt?lT!?GSxCf^9RO*CoK#gxMKQh%mV!w4>+@C+GCo`AcElWd& z?Xu{}Rzx;tduyPFklK(=|LG%SwI9gVP9w~yL}_l>0Qvbq=8Nlr>ieigBA4LzzH{QK zN29CXm0p$3^?q1n80X))uox0H9>_U~fBd!4hwu3Wb$9UpQ_gf^Mh=9mF)Oa`=wFid zFLZA6v>0%lr6;Y3G%kTRZ_<3K3W`FFB>F85p-Q? z7Q3nG_So!RmY_(f-#>M?ZhU-O^+1JEe>tVqnoMuqYM^=G$G%f{-ZzA(PRCBKYW=D> zC$Zp8>~k$@*xJOLl}JS8k%TfUu3$sxQua=T;OAT=joRg`PbJo_Z%XYy*HH7YqA57C zOkS;#tAl)sWwg9$8(S`;iAmr*TQx*_tMiH&0I=lw5dljN~!Ti8%itbnzrA^d4|8eZkYYGvfSbSu#*5Em3IXTdYOB zN{Fn*>aOQcy4T^PzUwZAk6G{-##ntm+QFO+dNePapC>70P443kpb3xI)tx^~88V3o z>btvHN3};N=`&P=d*g4tApChUc+21k3w7$o)36ZVS>BR)qt?ps5`czSqf-mJEH%MA4FJC6|6Ak=w@lYMaoW<(Gf-4B^a7dIGlNrnIw|f(Xpt zf{j-8Ut#`J5hkfJk_#E^%9Q{K=JzkpjE}$8T}xWCx8;m#-91Fde3zz=z+4~<3=|z4 zZ^i5A7{BjCX1r`5h_3e?MR%k%GiN^D3p5`%F`ZRdWb<6dE2(RcuoyW>(x6YIqptm} ztMB;%A;-xIIkRp_$q#bP9JHEN$V5wlMv^;rMq1jA+O+BZWMoX7JGfuONE=}gg@@;L13bOsA5rL+JCQ z)ZUt@f&V}id`9-~*3P{ShjjLG!(TcOc{n8dULjz=1mNj$le^BZ-P@u`bK$1m4d0a8 zYLqmi1;+)(lFo^%TJIj7rVXo{L=Xx4#?x2bpWgcwA~uNgGFXKq;zwe)PA{#)%A0;} z+ighEwEUibmgE{<(MrM+jZe2cdBw5g&NteP2=cpXxGKL|tq8cB#>Fpw50&$>&? z*)L!JY3dr3EMHi@4PG8Jm8u%A^kJCKzIkDiJu!PQZ}!_2Udv%dTkC3MzMYUhbVXNh z*BZgp62B(_S-1aCV6cUn2!M=?Px*jro6qI+eDdif#*PJiZ_=ZFd{bWZCv?otMRw~rk92CQnAz3H(2E&@Wd`yg<>Q7_r$JSl?3zwIpg;ocwUqd zu_d_x&-Y*cL|Q0`kp(B?;6S+7TRqB1b)|FLJ;l>I{89noVZ!9aV$|M=ab#e!S7BOD zj2^qk-Sx)-={Rdz70!rZA)>rOxa2~DG2V1oR|VmYWxhtg_5wSvtV!Zo11qc2TPew4 z8kW}N2k#E6m%Oc+%;s4aeij;_lBI(aB4~N)q^PNVG9bfhnhSe@Fl=^rCc4Py;M@x*Uj| z99iY&-_}CX;OUWHsVu@=51=a<%*?-~@7Z4IdE{+wZ^oliEwNeIMuf`T9D0XKhd|OYrHW!Ur1^92ExBBYSzl%>kEc$*q z^QAi(*gmamjPwD*h#I`utEBir@4Z&5R-KI8a)?jmcVl|L%Hdzv>1T^Z`S2<6MWYN9 z7EgSuh+;RT)es3~Z3kali$J+9s70aQ-*Oz1nmuKqBIOoUp;mlFxow82g9F8O zBsTWrc7@7jm)joHx0uu((z7wF#hifordg0Oyj%OKMEN_}ac`9b{+tK5Y@VIa|9=b( zPTTgVc;N!f+V!bZ#T+wDeIyyixffCozrVdDj+(i($4Bn8BPPTTqQHY0t#5{$w=_tx zqL_XtZufoOc->Q%~Edn(bx95jCU@F4v&zGUKr&E7?d0+(nlaUzBJB#^8 zdMJl>%tJr)R{fu0jvt-{?T$0Lr#|Uu7_1s^)zy(2zj>hcqCNaKfI~d{{c%**r0w zqWKW9ed(Q3I)~lZg{IzdN^*JG@_S2JJNn&U6N&=#w79lyfc*WNlKtdvFIR?j@85qt zD1NkJsOsog2FUbJGV=Y`a`G$r@Cs+@`Sw>Q7SStLp>tQSE?2%Xw$n2FHX(Ar-6cWr zPku&S3_0Y^SjV7SLVr0eWq@%AOD8OjcX7d5*7pwM$@$7NNBAemqEWrb-sPRUSLG$; z<4wlF1m3OBeba>Ak+8cPftFBbcDnQoG1t0ma!%45Uee#c*PjrNGhOJxJ8pEib^V+v z1E^Nf1p!lQis;xd1J?O-Bl{Dx_ieX_GI7IcEfjAr`Xd@C*YcEBBuWph37-K zo~z)syU$qgP`{09hXGxFc9jnq>=ruti>Mg)V=mw0TMmM`AxpnAw_2ZLyPXsHuI<Ni>`lJ z2lj*k&%2a_KD?IYyqz@0Q=T~5HadE-AnWUj5m5N<2o?iWe;n9u!B~;gag2mH1m0ONWPYgXy>LEQ3fDXIU1;DHO_kcB0_`{H%b@Dyh;z z$b{9*L77e*MEy(5#2#r2-kt(wx13XQVQ*$ZXwo{b#ThiShfHs&+>Bd43!8i}PekGn zW&AdnsIk^M$FPDeWM9j5TwAbY!kt|hO1kcfxG*4ME>^ude zWrrzZm-PIV3*p5T6s8!n#|7`Xa#Hv7Nj)?(^}6ENT9=QN(%PZJGM;t4@6OjBT)%do zJxG9(qUawoUu?;k-jsBd;=wW1doy>Zxo6Ce=XVs6^1Z{F0ymdA@BV)nM}^DPrv&Ro zr5)285qWs7%oWvCb}xuoe)#GDCq`Rrp7sp9QnPD$!uHU{gLyWXA7T!fm)+e^N{|XI z$)B9Uzol@kueYP9QZMbf2^5k`y3^3aU)`(@QiwPIx?%q4MM0T-ez)|9 zHukDdTLNxUv|0E!yTv@6TlA}5ZC@+0Q#BY^+qRjqN30PsfZhsJ4Noz6hQoi5?;ExXC*cBLyN-7xFscq%qW>uiFchpdurU%Q)5 zdm_WPWm;@QlKVJ{tTHMuCIWs9JiQh$KCDX?rO#qg3DoJ_`29|LZBw4#)vjlwz#7FT zs?Q?oJ!>LPQ!A0G(jy84xMXY36$j<(0W(OQnPH59cO`SHG`Eeg>-ZGpGOo1nB(2ZQ zSa7V~rsOTG(s-9dJnPNbTTfqXPW2n}7|+Np?)*!f6vke115O8VE8<#gUvHH`V2N*} z_p2P|q!yzd@Bb2}R=1Q5cDu zWPX;vZn2t!VC}aGXu}|){)hFF?~DZnCu%a%_E1Z9uR?Fb-4!p@cq%K~p@hSz;n{;k#dOMhLu{U2qn6Z9*a12!N|;C0)FW-3xGA~zhQzKPBXOJZS%i!TN{u-W5xO#)Ef%w``o-sZqzA=FC{=|_hIna znEGDv2!YuE*T@s+$dVuljgwQ^mh{SCNam{T_*eIZd-5*x9JIlYlV zBSC_wj{6pq=Owf%({ihabFLHMX;pzmGY`A)xHcicrC-2`moR8FjjFy+u&%C`xnv5I zYe@?M+anZih#@VvP9Qe3p{3wDhVy?6%uX)Wk6SC0e7Ts4@(wL0hLEWU{T}sk0n4QI zH_`c)|D4*>a1Hr%wwXN?@o%Kpvutd2l)Myr?XEy{aadlJbUD>CvoH|HbTN~HhAn5+ zE6rwv3}@2xdi6Ph9OkWY0HfY&NgJ+8)(ktlw=P0H17@W6guWLWObnyUG0Bo9m0X-qeLy2$w)*++ZKC`9g!OUXOSIu&`@N==73P9FZJiU4}@aLE^2&l*khKlMhkupmMiET2r_ME#`{<@O+)uuXlf|zjI!x zXDjm$B%zi`{|$Kp2)hl2cozP_(b3WBL`V8WUZC`v#K6rh_(P#?;m?XfEeX`ZfQ^%e zsi*tBIt$9B2QJ+z6d&&+a&=DleqKL{$lwnb^`E;K#HJnMZ#gk5VB(#SD=FPOanx{E zntKn}3vILaRZ}|npuMHv_0=73)$$Q1Riyhmr0JDPXr`#zG^U9zv~;YFwm;WjjXEn& z)piiTH3?%aigCcbinlRZgRvf^;0OfzKI-gUVK^J+@7>@&&YXVeMasYQ-4JqolnpP9 zDPhh4w53p5^Q;>Fcdk?^W?zVgh~-3u+}YC+$heXQ+oEv_d)5AJQvFY6GVgsDS2@+O z_4~COdqdp*>X;SXv8q+`Rx^!{-AF`~CD=BMYXWjLCoJSrnktgrwr+l z=G-3zJf;bP4^-#|);_pn3B>06FJ;A^yDL3U8|o^$IylRh9+xoPm;f4iDoz5P)9_Hi zrXQ;oo92}L+skfxW?b;?#zILdwzX!U5roR&gV4Enu8E6mFeL_eW=;;`=!o6}`#h!Iz_L5nbS zUdMdr%^x-tU#;=MLQ#seHsSw*6Prt0uHi^(xSCs(N={D zk0nJ57O7NkY_2LeK)5bhVB&y~Tc@mR2T^^I$}2_s{DTjjd(41uN!M?@O-CS#p*Y?{ zEcAj34C-zchC0{NX`MWe2E@hTTGo-(1tqYjZ)mG)5Jx+EyJJl+z@VO>B1@KQjbp(K-MhrF!ES1dbma(~$fby(|CQs7Py8%2O9AHpCv+l-E`9 zND7K2dPTb_GHw_Fd|l<_kh@ZH-0;xKdx{=hbf(Jkm=#Rjn%BiG1r2ZdRgO|Ok@8j* z+d4*67i<*AR=dqIB_2)7!^waA{ZVw~-=~jM;#&ImrNVLmN{P~R)}*@0WG?_es!6=2 z0DD}%vSLAVfW_A*$Ico+q+-pL<DJC@d)}m^O=v0=@kwGH{??-{H*7wlfmLaL zeMHLZoc%1QOnNplK*5PTV{T4Q4x>p`g6p36eD;PsZb7GtAM>V@wPg9FR?KpLTyw^j z%g?1IR;Tjj)@>3NHo{a~7zP@GVzoqcUGU9ft?`;Z7g&|G-?xX1HJ$Z@(A&}M*TbY! znlHHn;!zA`C>Satqw%x2Ja0n8p238cbITRlVneFI`9vX?5nUJx8kv8QC7#zckS}iC zvqKaDO!0#i4zQqXPOhMYD%7kTFoao|0y^n9rg|okv=AU({8N-{`CMswg~gK`iuxFF zs65tF^D`kDbnKu!wqgRO^roC^6$Bnfi$gB^JF#N(!$DyG4ieEBp<9cq))@V4q4Otcs*d^mR$O{$p%7RjYyPNa68e_Qt=a+^eP{$D^nEFxC#%kyy zrWCZxEnQ`NB?RIK&J%RprqQugoSPB70IPxp`>lWS?nZ?QGG6DAR&^|Eg31+GnZ!Ms%er_i*+h6sjT_yrD|5P}_tMnIv;1(FOtie# zZ<9e2lwi{W^TUe(LM44Fi^GTkV1_)->#e?!6@#J$NTt2wa=M@ZfxtQmfPc)XPOW4c z7nb+0Bm-CmZ`JfkrtT|X@i~v7v*`6KD-wf!l7e#l<&9t{QrX~&7oMpiNU0^Y2G;_k z1&Kx)dTTDm_c_4b2@@IzpT(eN^Lu}>Mq>5lz_J;)qT%}vtcMjwIKARP1x|0FL>L-= zD+29{;>Emil5WVi`?`LAnS6;NoN+#24>_`^ScUxy3c&s*%)3QsVe8F-)+4)ce~IOy zsE#7bI0EC7KJh>mmRtK=fA;ztS7F27+5TD9v^mk(bK)AAauF7}R8?{}LBJU3|2kbq z5@PTy5Q zyxlKyo2V0V5>BH(h9!if)JnL>^GY80Hl_hXejPVfHDLX^G`!q!etQ%fFc8nq*$QME{92-I(l#9&i)DUQ zK+=>JsOF?gD27I!%jGulDa>c*kw3s3%B83^X-IL`8}IB;rUvfmGRYONHR)-s1TRHG z!gh*SQ_oA(`#jmo0tcVwYvvlWIUVISuqJt<2E?sp)VFceCOcOl>kNwho}+x`=`r%4{dzAvpy zvd)3CY?-YTe>x;-c}!9z(pPJlLe&>HH3vuX0)2p&9;bx67g`o=b1bahUj(VpF6T&g zt>?ssrvx_`46x=lfz&e;webU^qUEfro}yl_?h6^lMm)YPS0};bYIF6M0+!)89Iz%IUS%GLJ4)si%B$u6F)t%wle~@Po~|yj0;RdosWX{!1>|#1gfZGO znv)y=6|91{crLZBEXLa=A7w6m+ilEU%WGV0A6K_MJzd=s@*Gg;gi@ED=Uk zVeq-qRMw@`Q8ARq>Ssw;t{W9sq$5I6jI?oNA;D_QdM*0R>`~2ZFoOlZrhZU#mP+NM&je$JGT%qX4I?Xg%xHv6j1RtG`$fn-dBs94jA9k**R4YU)OC3{YMHq#)Vz9(J{vnhRU*c zp+mBB?NYS>NUNZif8VwztDmK_Yzxj*uNlASq`@Xk*IF#8&W*L2oK;kz38cXGCMOi6 zZaX7ottoIG@&50vLW1gJyXHSD^wqWFOG&k3oIPs!NmBH}U!)IWF;% zT3Sn|W%%32&3V?}2IoL$@H{eBOj;UxLY0!9gyS9!8X+uG?Web3DNK*`>`|?8LuH_j z5)!QV3MP#tD+n3&Gfy#vclrZmOovq!3ipL3WNM?{}LqrE9Cp3 zA1BHnxF``w-t`F^93IY2SDUx4cJ<`RrmfX`V7Nx4`#kYzG*^Rj)HyRV&nQU9#3(IL zH(Wp6+_ub@qP4S@DH`(*Z@{q&(r4{g}Nf}yXb2hK#RZ4vO2wZ!M3zo)P zR!3yXlG=@~z)n6`9QIt9D_0}A;&O{%@pT)kW>$)s*lX~Gf-d!|izv`y4(xCY_+sWq zE7>bI<<^gzMoVTVszP#5p}KMc#%(zpm&7Nj2^#iXRf*_mK3;^a@dE;~v+{7{*9=mi+FovMA9_E2M=)RNADNT!7_2<#7MtXO%am zq%VYo)KOeKdcT!lTuL~W)eX!1F4$n4t@p1W311=bNv@}Cm$M-k`kr ziILD0-6@mxRcFJ>_xmXWpW{1=wvl|!32Z`}gs8%JpaFAwb$r0rQipiffAJ=NuW_*b zA4odJPiRoByw0I4i3*J+^G!-tRB)_tvxReSC`S zu{2*>2PAFT<|>IXuVrv2r|l7a;?jh@=UgZgS#v$MLTsIxDrY`%_-&otnvy-isrxb! zJ2U>BTwm?|Ah>_K|Chrk5e9}xSXkJMiKU#ZCOwN@K9VYqX;GhU+f?+8D(!a-nV!q7 z#E4&l0NxXZ3F^j|z9x82@U>|lzfe`qU$3&qJjKwrPT8@YGT_98G0VgA{>EHU?;YQ*M)BG#dwqI)8o6YQoq-Y=5@QA*R>Z4I|FaGx~x|4I>eyp>AS-~~%=_?%jL z-d`RNbu#P8fE}fR{Cwdo0l;^DUz1-Bx2 z#_uZGFdWJ&<%`*GPDXP=O)ZVM9Bk$$kT&G8WY?l8UM<5#s5;Cpc>I#*%Uwk^C-hu) zRHj~`jAn!;zCQ$*I#nca{X+R#yr#k;`Znj*O?v=mv0&r#?IILJKCe)5teTdy?M2@Z z7W&ASA+D@y{xk^S=|H-j&JUd90er?ciuY6D{yJB|sIpUo6|2Az0~^zSYPHHH`<4B0 z#peLVNfT<1M7bSPTk@!}(>8Z|=ZG7M+WGP-3^H4KkBch%wSUKnDnknD1t{8pJ==Xw zAu2Cv{_95msP*l?Cp{lk%l471GR}b3yJOOeIw7Cour-g1T(y|%l@IA8bq7aB?GqC* ze?mDeAx4J|=f9QV^Ny~o4xbTJJo7!&GR^L@M+mrQxI3`%pyh9f>G< zYC%h=m<3I;zfT4uIw4Hwr5hT3Au=uzx*Cb(^Qo&Wq^}!<&20L(SE$Bx6y|MD|6k zc5XdlF~*!=HYEhBflG&m$)a&p#>WV+E5u+%EA5Y-Aj^3S!5vj={Rn5PvLY=16())>)+Zo?@;p`$Z#OBQea_^Vu=FbS5c_d0H6| z&K^e%QP@m_IVapN^I=^)SKN#-6icsmA+w>`vZO7L(-dL z-+R}V3?5?4asbwflT*v*ktQ~K``!jI8#H&tC>_(Gx`i91DyHqsc@B)te|b49HM(~9 zUtb)JSF%kmrIg?I3*FZFt~I__W@&oc)mgH8`+!a3^Ec&4`*=+1Z!UTZFIG&x2W#># zI8Em}+$(X*g;xJ%!Sk0k*<+Cve(wBh3rx!5=iPjw=vTfbEUydAF)@GbG2pP+>L+H2 zIi}Hs?+id+VTF{pZ5Gw*;IC1}+6A0XCk8%D;tKYdhXl^$wR{z95U({>iC;Br5|%Em z(ttB*HT>}0#OnM-YI-?$*>OFR0WCFSP~CiBt)@=8;CsivLcLJIpaM};JKo&O%nn^4j**nlF1*yK2N}DfT%Z%PG}k z25ncny*5zn%Q-rq4~x~iB859gWyxc%XPRw^`l!CiuiMUQ`JzUH1uCCh zHn61_NjbEw(?X_ZP#tt8Lf*t}A*iru0!%SdX~( zgjZG5K#WU<8Z4IzX=72=^{HuP6FK`!rmUJpgpDF3x0lmD`Ec{Dmn0@#5H=J|DxilAG3^O;%-TNj zUQ(6gQq*i4VdjmiQ!WYRe{f# zAHtiOKcS%v`m*bM(Ht`(b_%U4w97b#WKUvRH6wE*Tdq{OQjR`ss9C@*%d;fSV zc4c$o!rsTBy*MhHp>(}xYZt$BT3wVnDU3=<+1zly?pX&E6vgGSy0veenOhp;lXk(?`3)tn;9!41*aaXwZy2oiuG}$ zoJuKVHciRa@B-YR*btJOX)d{fvHUW=K(ZF5tEU3X*rlE9@Mx68|Cd2?N}b!5e6 zGfl)OR2o$#%lb5rgi_?nQ@Pz)ZA7Vu?G)!G@fbvaxo1?rG3RMi+M}n*gxEQ;Ye_)e z;24)(OvCYC*=f)r^I0LAB<)s>?+L8Q_Rid^TP|qgiJxc5RNkj-SdFZPE<3hl2*%V~ z5*~&YuZlFYieL=+lW-dM&uI>(vV}-(GmbNlN;rEEnQ@N%sVdKZb07$7Try+*WYhUR z-it3hsmctF>`yrw*6UiNjyR*VJ4@t?T)5ywJ7M$GffFygWKD;ZI!XMsXLCBUj!!*8Xe^jM~$TE*TU=3aK=0LSW5vP-ky=7eRFKe$ujr8%u}enJ7<&Q_pgbSyikeW z`tC)wMCDOFNt4eHX)js9=SxjS@3)7$M-d4lw@{zCR(Q<(tA*+o=L0mEMN2$`zV8#H z=m}lMt``q7aR>XP&GqLtl_#~up{R=;{xhRXT#C0Ijgs{ciSb+Ih>hVD+hu#bwnt;> zp-~M_7^UEEwgd2Q53usikAm(w6<<0ZF2On0^T`+PgcXAR`d9OtoD%{li+S@;4ru%d8Q>U}jH|Ua)NzHX* zI@19}m!5IL_vcA)zpdV`c4PVkBf^mS2EWVWyv)j;)JeEG=Y6>li>e$}%|8083zHc; zu9zo3CkoQu3RtMtgi!=m)u$F*Q0gSK+ZuT{MQ1=Pw@6CLXB-b~+n@3|o+1oO zhF4n75@sZAeD?T>BLrmCu}CM>I}QbS6R-b7_}fdhG(5&=c>9Wf!WE5nMDobbRdStXcz7SADezf6h!k;#t*+fGW2in+mRG4Q&ob z^iUqVOuz0qUy);#-VNu(e;TZm?dPoI&2MM-iNFf5gWM1<8;COd41WPqtz(hbncoQa(CIj55QAb%5k*J#ezq;=#-#b}z<2FXFr zV{#8TRmACeptHvnthys8aPgS)FEN8y!Y7{L!9$+kr*LJ`6XK+te|1b>YCkU}TxhMM zmFI@i__J_BwWWCsxrq_R0oyHt;c1>+t4Tx=m!x+AzUh^T+m%O$OAo2Kw-`tWcJkz> z*Om7(+2+4}v+U~ayAyAZAj~#!@}r|F*Zr}5o!TXE!Chidd*^b(U?U|dx1Km=&Cez% z>w5Ram2)~7v$82gHf;74Xal8mu!UQl`^@Z+>632}Asgpjw^ILZd2Osk1{dfqkJrw+%bz<+c7sH6C|R20Y1+&;3ct!}vH z?K2;8k(YM_#dltVFQw2oMC0T+8mbI;ZXPXkxG0J&-R&K&aX?VYjmn9cceJU_2kKO?^BH1 z_e7d}8DEf2#%07^0Z{P)U z7IV(|*53QNKKm6?!K3fP)+a2(a?mypY}-Xhg|Zc)Hlt@{wMtD$c3ok-VmKYmK+*anMEa&>epcqJjj-%qMGX0^=(^4zeeqy7?TQzg)R(II?>Y;+e#pFJVkW^+Q`2!8909W2?4s> z2Tyn4r$}RE(5#02%@)fB$dHio1{cYOsIJKX+n1BDgYEiTDlUv!Y7s5xe*!N{>9kLP zp~<)6<;HC^&477bdHHr+{q(0_Qv8kKEC(T^$~CgO33c~ix7>bRe}=gJtN0=lZ*+MT z&x<@_p50w~y=y((XbVHA%BGcd-vqbLS>Wn39TK}!L+XNv)s+Eiu@*UW`kO(!@=~+~ z{w|uyviCC~IGbmeUORlgz%*%(YJY%vrZizN{b|v7 zg(q32L?2?09`c?K+E_#+<7*~XuNnk$E2<)^+#84P@6vA{b-X^CG%-hEceX`)Ilp+G zJ#Hlvvx6Mw%kEURe^cJyFSp#5N4?vT-TgS8-l=QxqSwjflS7UpYMiuQi9acHxut8_ z(XeG3SOeRhkOWP4*Vf|C+Gha1IH%k7NJg7JI}h+Qn%A-VrI$n6!US`Cn#QYJ1p^M| zC6@dji?uVr;XQoHNh=u=$TDtd|5g$4dEV64Z2Fo^@TPW`cArGP63Yec9S2>R*WYKEh2zmg(xaS2i=CIx?rK*YfN;g+6%5pN-&dJGDSy zL1)32mJ>>krA%@(N<&S(L4D=*EIvL<;sYJZ+WI(QlT}0(jD%oWUd#PEtJ%1JLef|a zOD(pNX`zMUwf5!;SbqIYbpG%~xB6hB3&&(T$p?Nk{hHALzur>FEOYU02H>bB=Km3U zX_sbL_yl0-d3UVg6z*q~O5DZY+OC&7rd~-d2s&Rw1MlSu+LAo1^D5Zte%}@I($r*K z|I8TR(qv5CG>SEq#`NC@xol|v_=cCi(Ksw4bK3?oG|{nuA3?Ylw|b^P&yhwfT9dAh zgEJ`bO48xT??C8ENa+@m=N%=dA@ljUMMU83Km8Aads4q_?0~FTwz04=zoKDobV7=R zyL;oq4Xt|M@f1o9xBSZMN=d9v|3Ds^Q5nKgA^mWDVdy_$lnoS=R8etJ;u88V;ij!J zkBK>Z09+EYu+Z(vrNn57JQtu5U@D1!qBHmr(kvz69Tsry!@^-=$BB(!aCV*)sVp4d ztcNzCsrA)4ZfXUB0reiFEy!~D<@dGVkvbdTK>J-!$zrF&frd zwn5i$bpO%=lj3g^GW}hczFpw?&>D}NRxLERPxfcfjCIk77y;S z?FXywPk7jIonBq15ll<4*Ep+_uhcY4fHeWrqeM+`he;KIM*Amxm8~jAAV2lPsl!Ud zQ=`R{e6P{bWkO2pm!L&GoS}sQklFJDu8S>O-B*)49d44#48iS3=Ut14F7s_>d+a?< zg)aP~20W6c!4|q)Q*+fQj>2ZS|DcvB$ zwi9Tn_dj`c7QgInqO$j1WXkZ`4J;$oWd#grdISxjcKhjSzXwmXn(Rkkb~QUK*kSI} z2#8*fS&`h_u92#o__T^4M*~V&;DB_)*9snDE2Nf{Zr~a zxxxE@mUOkqe1IdqDM~!+NMC<-IE$oqRxuH|Gnl4Ejg_GSV8lNOAtRPOzn1TJyKHf# z>i-%dMf9I8pl6$Y&Oxr}Zvhh$pr;5XLU&1Nm!qJ!WIl&i_* zN#9Y}9+4Xtk!8`QKj!M5)-OM6jOD8no98PNK1mC^+2!H>ru8lU^Up4WWF2(8`1^a= zMH=owt9-$euFNz5~Q5$IJMI2%MxRoa1h zg8{M%+oH^~-^nH;EbZ>M(khYHh=0u5et|??LS3(ZrAGmoJmApto`xG_a2IZyR$J&PwR~lk2sshIlfi94-Wda*f`661f0=A$5ez!uwd9nWMXR(rFm~J-(T9>;Uz2o} zi2yNr_%=`RgjTot`R0U>CaINw#JtlMo?Ggxr3H@Q^S2vWW5Oq!_pmx5cQ!ja_Q3tj zxk7`l1H4rAYyk_We)CEsjj*pCqjoZQ-lHRn$7#nWH0S-i|2u>Vy!)4{_D|@r8lMEh zwI!RH8A!Ar{Ss+Rtz+}v<-Ye~}j|b3^kpd!6F`gQcSB&?=B3AKIy8P^P zyU1ff{dUZHsYj4#VwilmwKcx=Z?at_;bmn_POs5E9FmilH7l;0)>|Vgj{dqq3p?;)RG1?7(3#z3pIFBi_G2&mr}M z+nUx8lViSmUe

      Z`PbrBcADi zctSm!sg36g2HB&#`dI#XfpPWq+f8>C({YwmL$Nd)M-hCuEc&@p1EiSiGz|yqW)`(* zXP^xM0q91|NsUlsAlxf5rJ+qs5fph`18 z#R(D-E}t6Nv|tDIO6_2g<&fRB_O$4lUtaR8I>vOHcN@G??@Wr@<6vf9lEaLQkzyE+ z0Wd?ohA+E>(#1ua!SKU#3*r@YqmE6IpI3~yM!OQsxSW?YMxIua z7wrKIxVk4zm_)wn(t7%OL?b7a-?J%&{_6>RWk1a| zECu?>?pv7%&eOsitp~xFYyyStcB|33)`cl~DCb0R*G6px#yGLW$#eT5bw=B_Z{O7g zUnjq#W@EJzpgdMO(Q0-7^7%Sd*2TCoN;0U*-1=9J`uwUaj^P8}#NF)(d?Nc4A{p|Q zA+n5juQNz=B|ai$v9Eonz!#^vwVi0zu>TcZGodeG$eh$`KieZS=;~8?_e)*oJkt19d&1Wx6yGVJa>-%h^f~$KW<(gQp4SF+G$5(&LfmPcb6oFxN zc+O0EOYoG>q3GHs;hs^2d#ss}K;U#!Pu@EQ?gwtvM0kk?tTZMHWFd6PDeqoY{Z!G{ z0<#`yg@8^Y9l9Yp(u?6-;NLrBAqZ_|=CC|kD??xlL043LO7K6CLTu4W ztA}!tFVN!@mAGn`Ss}Sg-cEy!lgORi6$BP+(4DH>+7uQ%HbY)YY>OSB+D~rkWOZMq zPCF0CpsaUUCNQo0fxLZQmL$^Vrayq_%rVn4w1u(H?O!wJ-vBRS#JxrIM=#tb=kQ12 z2{&2aXtA+HV2ZZ?+Y~95qm8jskEyhWnHoRlyZs6~J5*(Gq_=1t4(AJD19kypIwS6e z?NA$H&EH&O7Y35^seumf=j|e(NgOYSX!Y-ZLU?itn1dxE-dNPIiuxd*wK8)>46}vn zA)n~0Z1K!Uav^iz>Rj5tt2|s zD@N7Lx|Pq&-*nYx!<-QF425RNw5rd1xm zqRuT~z8JP)!D*QM<7FD`E~gjMO{sV!;ZEi&CuG;@HzdcS9-3_I6Ary*n#%c6+3!?` zFV_%r`|6J9hLabAE#yl2KbG4so`T~p-nB+9XgA*O^ShmpdDwv|n$c=aPETtoxWJd+{OZ7ktfxhyrJS_ zso?jxmhD=9P_=92fy4P@c9=0FPJMz}Y5KRDC3eaJn`Br5zueW8IcbFl8e869?V@v| z@P(!RBNrpZjc3w@FxfzRQ=c`8_23M!s-s?K`qFvm+=a92b|agosk?aJFP!U+ml`Qo zKiC=?G{SB#g|(YzSD$cr*FU00`PmogRvi6;FesI81rVR?SYRN)#V&p}Dn;D@YDFv5 z69r!niCGH>U!>WEUxT-NwZi07b2B5OvvZ#_)wI>Sxd-bMx!s57c!we}NhZe@eIVl9 zjw5+bHFw|Ib@o=ab4KHm-CvyQ3+(kY+8WxnA$AJ0-KUwO3P3y}aP2e6I`wkC(DGAV zDhpUx?GsVw7yuSSxbBjc@Y3~)cg#=6-bV7*Sc{iM~^6>7Z+>>L9)qq;GN4Odeb zVCa&~_blYtbO`zVxQrp9OnloGa@~hn9~qp|V`I(1m*eq^g{XvhkdGqlG=K7YUCYT( z3McJlfjnF4{NqnZ7r0_}#P5|KQW93jLfeHEj2yjS&<%{m4SGg%1Ok}=Rdn%^f1H@- z{i`V!WoURNl$Pc2F2^@bzmkhN)vH)bSjvJkYU~ZxVVb0>hB6vSRbACn0+UeVyLDaCrEI z(dR<>qvN4rXNvoxqoL^x2*<~fziko320<*ouJ(gh4_WsFvRQTv zB=5XNRw5=kcwfjd2wo%?lp@f(=dNxcmD=w$0cgSE@~a%xVj}G0ivAGZ;ZE%fg6X zxPNd$Z=P|uMf~zdCWLrYGyKn|54$UN+VfBU(TD)#&qJW4cvw;gK5P*7aKNKEimn58 z{!K&b`UFN&328U3NI2fCrY4imy#}+5f*zBbn0-u}aVIQ)Yy_LCTitrO`Z8WEOiKT7 zx;~sB6*WR+tacA!)2utXH~kwW!x;5%C%wb;Da)hc7DFXM!f-|;)Of9t6FbNBmEEFL z&_(R*?ul9HdJih*&*V?0?Wc#i-WWlG`p(Ajo2C8Ghs^u9)VN&xo|j>@RSO|w9a06} z8j#E2UveR$hKX*;+K%K@OQgRU@>=B@Q+ND8xVSc*LtD~Rq9X?DujENa%!s!Ak-@CC zvHG5(TC>8V?p^0#_>KAc&L-oWoZs?kl6)xQchtVIy*V;)43)XvAMbFvpOWj#CO$|W z`9^1H`c0hetCnVHQzY3AIWK01WRnVSCC|X^2-BF~F(FZ|n9${BpSj)1)i=7V`%H-B zKaZTb(liIhZ8W*ri&n^fmr3)2=Yf!1ien;KXXM&&hjt+**>p#Z1CkKzbkVwcl-oM7 zVKWw0kpI|)kw!v^Gw(qzK-M!@2anhSWH|M(MLHpCiIoIbWPyuFAz@M)F-4WtifG%H zZtl~Y#Ahy#s+m>B;=vZ`wFn$bKii^h4xU?bvuY&SHDC*Y9HgsnPr5WZ(C=KWg7qLu z0NL-5ZmhY%)sxy0IoGn@GIhE#(0N4=-27u8LgQHF=4a~Jcg zsJ)QBRw{X=@b%3?8@u|vn}uT0^7Wc6!NxfRvtaZSbJFFOQ_^LWw=gDPCeePT&^4dN zPA&w|C!cuNJk&oUwh#Mo9bRWZiq{Oy?xd9sX-T;ZT&F@?!-}n#U|GhG#s=5k-VrLrkvvOWr*lex`!}ka>m*7IHC|Z z^Ezwzv4*=V=ibXPSXJDw?tLi@1Y;n`=`t)F=Y27E+Un;EafKI-M`S(tl6gaCF}_jq z!FGUQSs5>O5Up`x#MQN!;=g*xOTp&~7n?$d$fK9t6?RtzG6ob8RYj2W^pri8Ei-8l z%>&yse#uJAeLMOwuZ-G`kq#3{xfr*fP01%xU@;rbJ>Q}v9W#y(fB5byf5kX309UpG z(>Rehn4H#FXHD3Mz>Qv-Rm8eDTU1cnXr&Cz1Zg=r%ILC%F;v50=+b^~OG6W7x7$wD zibGToEq)CTa%()!+)s|gimkG{0^Tas)yOu8gFtXpv_K+ z(Vbd|Mu%RL!f{$z)@z9}u*0Xu8tE-KEW;FM_jIV`kIic`bM+dcSiEPWU_uX&vx%S5 zE!spmd-C@)Jj6%x39E6H_lT`;CwBMOpgMn%$zAepwZlj&5OkA+vn~kHpGbieT>MsdPyECs^m<$(#u^l#zE^}N<*!%*Er(~MfiKs9M}0- zSkX~;ejp<7^O$P*(7s*YlxnwSzLF3rgO7S7d%xD6UEH@PMQ?exg)VaYoElPny|R}x=28sb1dW866pSV4R(UwDYc^L35F6VmMhPf_hgZ01i!CO zc^9Gs3JL%@SFw$c7@!$BIYT#ZJ>|FXZHeV(QI61xap^(`1dt{j3}B?uJ@-`$L61nU yjln=ad!G-@;_xyhp7XPF0`o9ri6S4U=iP-f;A@^${(9YV)ih4ZY=HCJ6Ei~=` literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/pipedream1F1B.png b/model/train/yoco_moe/sources/images/pipedream1F1B.png new file mode 100644 index 0000000000000000000000000000000000000000..dd027b930d084e3a273014e74b204c7e96aa95ef GIT binary patch literal 86908 zcmd43V{|56w5Z!1+v?c1)v;~cw%M_5v(vHdI`we+=_`?ZFf(foWvZ)}cTc*s7&nk(dSCy-w+oFFTtWi}|Pbp7rR{1F_ zg!G6`v>z)Gz8;3_{pz0U>-M;^9ABT{d6V~8ATfPj|qv^d;N;bMrsXOn@oZfBEJYH^zi-|#yqCmgCzCNirCZwi@W@qDSXlWH%&X@R9 zJhi%C;xn_b5G9RnpPr7lM8;KC(yn)Tr_m&NN3Lrd1xG8JbP~s#XY-yK71!1xjgF4$ zv?&&e)z;PRj2RUb7lU12U+ZLaf{u15L{p|nt#^7R(Rh1!^p1{3&_pHRaM*p5c|2Vx z165a7@4oMGzF6n#9x!0UhXl%JVWv>e*RbaDi$ z13L4^$HrRm3f<0E`!+Um_^rRa(yi|Mz1^!{Py}drg;x={IQ5@Goy|iiWpI8Sk;7{R zYm(wgsdKagqv@d7DyxvYxzr1*Z;S`N{!hWZL-^MEWr=9X!JJ@k9Zf4k_^|eO9Zeg` z@yPf*{K+NOX?Y3lZc;wEi;j`B+>5W2K8Q?MY%!^H+Qih+tYAol`); zRPbztm7jga<<%;1d>jReP9jq0hi5@nI`C8N z_Ny0eZ^GLhWKqf*4@LKt$Ywn?^mcmQGWx&!benx6=Q5h>u-o9C(#cLwemGk}-Pzgc z1mkoTQ=TDT>hgBa=5z$1r>Do_^{7355u|v$8E4pwjFFThV57yMqj#wIw3qH|XdbA&jRg`oC zO}Z>?PSZqqOuudwbu^wkAa(wBaBvV}yVgv^$QWO7nlEd2IFaE+#zRgn=JJ-GWDIN$ zXdmd)6J-g~T9UX@utD{_5sJ*=f3io$$BFw!Jy|d_IL=N;WBk#h= zDvzU%iX&U$*|tCzmf7`(%gg&yB)pbFtpG?pf%8O0VBNF^a^`cEhu8 z|0`cM7R-XQZ#rCmI}jX}lWWIyZWEE4%L%Wpt}c7M*8N3@8vRGx(|N0l2!7{8iZ#bC z1X!H&rl^?0O0X_Zr>k+@Z5{FK?o7+kFPztx;MHemE0VYLF2Hdh{GApks?XKyBQ<>7 z4_1RIgCnssZ|m|r?)=MbksP0nH;%uq%jYfXK^>D%eaa68_-6~bg+3jTJsKehRndlM z`(lE15pF(TwAE6QZc2n?W#N#^vu_!Fmfjfzx&mS$3m=hWs(LvoH}X*gGR6GxIm|C# zM(H$u<%`BXaioQ%o5*K#CajT>Bh4S?@VGm5j0gb*pD-BmS8++75dQHfw}8Kn0twlK zqKS9l&i&=?mk)flB_oq1JtCdc0g>5iL_`Geis!>f0?BjcXRUn+YG!3%8&la_fV6+i zR61&j2@4inXgJ~FVhQ2r>fqVJ$Mdz}wCFOqHodOK07aQ|3)EQruaWGws}80{t1@m0 zvVuW6qTvMu!A}4BC&XKztpdFrErC5ulEviCvjJiHAoFV`y-G#cb(VHYN6|Mlx{(2i zo+TFtanNmZ3aLQ-a^o!`2Wd%S*)YNxpPW2^$2yk7?HZ{~Nys-rA{LFL%YVFYx8C~s zyV7VAOWxT@*t5wzIIsuAKRrxP{GP4Nu>rb5BxKU7mE}6)oxQ!=nb?Yo3L3wcUlekq ztF4YCu1@WQMqtEyJ3D>PFt8b4z1dr`WZYt)S;v}eD<;#lH8d#4{ANbsv020I4gb1R zL%Hv8*54J31PsisZIPLM2}G`^T{> zvi;+MMS?|$1r-#K);Bh|k_=GJGt&)WL7kC1ySmC!NMON5LPgri%jzU9e%018zE@%x z&B>q%YaKwD)Co+y_5N+D$O@?bK zoD|j5!;g%L;xdhIM72nyp`$xciXSy$p2*=zWk(2YtX?zTli`5agv7x0HE{sy=|I=0N@$Ow;VbnZ$6#cu!vfSUdhHBE9 zOl#z+Fv(fi+RD2nAcsLyd>OH;U>rfGb1JzpP8wN4rH~6wOT$#B=mwB3V@1T%1(UZ; zOvqGM3flm!9nkIVlKrnEo2Tp`B0RkJ?$`Wt=HjCp&^-D~egY~8G#nf&%t({dfmAdu zS7ObVd{KHjx)_EpB+Y)D&WCXNM*nqG1g2|e1&Rz{ylh^qwZt)eQOhvAtkkofPEmB!#zlchD30POd=Lu z4fNoo{>@5d`>Pr9ZubD6`d@*-E+GHW!ngnYpeRo^XUlb0*VmpF!#{uiOp%@yNv@2+ z;ZUOZ0ZS}K$c&GVpIuWMztUjt+%amhlrB)Dv({|e=45DCK>1scg2Q5lush^$bQ;QK z_|MRU+OZ>9i6A}fF@bF6?Pry398CxT!6RK>-W`krS{Z(RemTVj!K`egx(DGVg_}A`_1eQ?oGlQ;`mJpKQOYS{zORttOv6{qaK+kK>`U44b0u$eTp<)q=VdSxxZHj z%--JK>R?3Za{^I9wWJkoVjMRxGUBfYNoPeJj923V(WFIk=I51;R5q&hUxDyhwr64& zNq-pq0ZYZ*!vy#7e#QOp^~M8UTdx8U_PyorpdkELPazbagoK2iGTAd(nLg+cC!Ivd z2?@W^po*$tqE+#-j&pHxhIbQ`qp31oh~N&(U;4&S2`?D{4k<~GEPQ)^i6dZ&pJH=J zDq)}#bKHKG8kmCznnEHap!s<6kN93irjJ6vOhd!TqtUO`C|?7{?zULll6-Za889%m zI?}kXAz0luS8lvgZz`r(s9vdIXhK{WMBcX1;eN|TXA}??DgzZANF1 z%1(zODejm@Ew2%S488#w%J2P~S1xG0*`Bd>#q18Q@hZPqUMDC=2SVU`c5aEBnc7cv zWIX&3HqY*$OS-#}v)svvrd|3$5(sBIr)?E}L=LZ&3r6f_gP`#uxk55xMZ*}(4t`ls zxSt(7u$XD*8tR!v&FfKv%yOf+IzRj_RT!ncT=aXJ{>jBie!{Z|bQ60H!5-c=r0;xv zyLwDqMu{lWQ_E6EAm9}b>edzJ_Om1;RB(5d}VTW1fsd;-N zQy3*}!({OSRhc(~Zx3e^v&1$$A_l(f$P0?-k?=zlQ#U)E(*NbxkJ_fD17h(ci&;_w zbvrxyo0}U29i0iPpyC{XBpqPfoE9iUo@DqpI?!cgt=E<@k-&HOH#ndtA_|C_vNbji z0*A$L0EWB8T|k}L=Qc7n=HhA)#Z)&n-s;dd{`nwMd_;LQqfA%#&Acb~cwJ8-M2HG!2jV?MENCwiK*4c#U@KTrc2}932hma znD%+~d*z$>b)nBTJBZ%uP7TsSw-L(*>$bbpB!2bSF1&oZq^cuN9b`NY;o#2r0n^@A z7`4W;(%-iV(wxG*Uz=F2pkvnU{<|TZzS>J6d_CKUmy%kr?vPSZ%D|G0<&;=XRU9)- zORIlMD27?|=R}1|I!}9ReeEaDw$RJ&!Xg#2pr(}P+@$hIdU{3c?eUKUVv`cHLItW< z*U6aU?bvoX^h*>B(-Pta1;FoCtz{fe%T!P8TNkR+rT%?}ca<&k>TE7P&ZD?br7I5lw zI!{vPCbE=E$3(o0sf(ip5Mog(hp{W%XeH?k_H1jv^(`I4@9ir>$9DIB2~*u&K$_=+ z8^YOCi_gG5_z6Rr!x`${(-VQlLLW{cS}iO$<|$#4YVkHF)qM{guTcQFLZbC8qtE_M z&hTlh)1^|sj-a9_GS90}D>oe!Rx#Tf2cvn)g6WE;`)8+yhx<@X9%Qe!U2Ate&G3x9 zA#44Fi=g8UjiSRP1T7oGbRx=L8R<$!bY$c%8YRd|8yo_{VW9+UfoSzBTl~gO0*Sa& zSp$^GW4g#S`TMMpMFNT9@ci6d>w-o`MurZKpv~J%nKDH-mvbnM{ns8e=C&_-Mq@H( ze%8p%I+X^DiOYq*Ic^!7TAn-8<9Izh%<& zV8ruks)c_~s=|M1iQS(1ey-y=Tf;I_EGk_6IzV}8_>uf!xK~y<99^Mi@w@})^U|KB zIf27~8#k2bzT|i?wDH4YGvr-o1y^IiHuCX@+wk}%`+A3txawV8VI$I#+vNV(*D#lX z;%RZm0}8S4&)x+%wLZ2WH#A!vig>{e2)!w6+{k?EUodc>S}My z$Q#se$K?s0E(9+%aza#FH;p=*a)&Bba31Ge(Yb6YYZh`Uo}7HnRBCwi3;ZngQ-CXB z3l7eVI$=Kid%E4C(ar%;76#3c7Ct;?Ohh>UIy$|xS_8CK@Dz#H;R z=bMhFuJg?4@IKQi-%)HnJOrktUGH#QxSQM(BNjvtwGhSZXkNHy^0s{XF?kQh2dnZ} z>w3dDp>vCgfV&*gM9>u-k#%uJAu1$@i|~H+SDxJ1O zWvg-FC|t!_S)yjY8LH(&agc}@R3Bk4<5$GJYa?lF@e*;8GE-1YnhlmJF-O6=l}e>t zbZ~LR-D!z{KwCwea|*cOg=xrqY2%%{MzEz1&LGaQz1h*zXT$5xBc{1wjXn%WYLm+k z)UB^VedMJLs8>tbJq{<`U(_eQq?DYkI=T^O7+~10@V8#T><|0Ob zr9Q}6EER#K`aOe`G}W=6+I2ot3EM61VSo*WWNi6Zp=Cn@%3#jaMlUYwtc{!}m;;FM z)oYGmCaQ0ceaT{LW};!9nY^zz?l5P{6`k+0Zs^?fMnx=T+lxF&tKXl@B>jD`c0;##!71YVYoJ0Uld;1YZ|VE?vA%vH~OWFRgf2m zv5@N!`fCS^u*qW4fq z#LFA^L#Xb+dec$@EmqvV>&DE>FWsk!$YM8N7aPiSw3H1%yFKFGFiA_T^RS^$I|Zzl zt2A+lyfdPL8+4qQQPlgi1Ew})1X9)Q_n(5V4iOPo$FoKEmRS}_75Y7V>g3ZT36k>i zM)6~?wt~wlmkj=IJEH7S1m-YofweljZ zP^4fQ7C@2}Y2k7^##HR7{M;`d=4HBd|!C#`lC~kCxSl59xt@Oh((0k)C z^elXymiK{I*1JV&rKV1BA;Jgui6LL4-HLlmQ+4n`4e*}_Un(8xo1K3YEiy?H#RFUz z6R!_G*ar~Bzy*9PMEV5V^G9;w6d}`vy9%>2i@DTQC?MCkseotyE=RBoQuFX!jKaEA zd^V=k`=#HaRNeJ6=6MTgOam8Dp7mNGW!uK1c%(?&y>gFF_4e*b&oX;GI}nbmj;>z$ZOy~#6l&(lRY^- z>4h)XRIt-xkBtodwE%+&)jP_>bp1b->z3VxywASk#$2&(n5*rmo%sg*!ezo~35g0n zJjCFuVr)NeSQ{jd?=?3bjtz;6L;P_EO~lvAec%Ah{w|FE3JgaMhr70=C-6>s5=q)H zskfHNSF8r6xpdlUYZa1#b3GhZ3#6@n%D9vwcsM{AoL`Waao;%IOp7fjShICxTIkm> zHvOJ(O*|W_oW^p)qhH`#>@khv-#9WCF{4$KxM-tsu}y0ivWAsI*YQYRyuP`|QDC#j zzPo%HpE^{o(!eRG4|CsYKE^Km+71sZ;w`W1KeDsZ-o5j}T3UY-FT4|blHv@_=`?hQ z>^BS(qvMKMAg1XAf3smgw}l+^)tjW2mKO0v zI&oxHhs3(u0?SNH)U6)=F=}BR@lEeE;Jt>==k8?xRpv-nKJuBrNGpJ5ICh7qf+WPu z$(LA`*Nh|%u!tGNuMv2uPB$0^^b;6cG6YtX<7kWnKavE9%ZMx`@dL`Y5!mV1V*M^J z28Mg|hJV5B8F(8JF`U!W4;J5wQx{%B3PZBa@px3b-AjUJ z3P;rmRH2{O4tC(B6b;bemQsQRWu5jjcKV#@$nc_e$dYjdO(L+>>KMfGL8kaTE*z-y zCmTnLcD&r?Oo73mH?aiw^!)4?S%|Q@9~|C9fQ1f>#gkkI9T_=E;}^?=5|nd?N4(Iw z73q8#iSNGpT(kJwV8vcsgw_eCq{i}ihW4o$DMQ|$%yQYL{AL%eX>Xq{QQSl+LiVE7 zS2WYjrP^PGN)r(;bIsc5xF2mze1kSAIItJGMeMni}}h zb!k~bd&2q7M=$1Y0G*8+4EtuaEW26bzlj^p-TJ@p~HG zgilIY{b2WSC+~XF{L=X52|AtE7$MbWIm)7dWP(J&TpzHtH4V6vOuE{nkghv0DUT=( zC*!Ph;%$-6|GGJpDBSK~|D6+)8+^~@X!8Y1+nAN0l@KFr(LW@vL~g;S#PiATk!_#v zhJ-4?(-m=%{srq&)(2;gLUuU2hQq+#oC*ehSi-OG1*NjEX)Y5Gy{K-b|b9<9iB ztG5C)==&1Sj36!a?aE(GULDJ~GdlA}i8U1z;x|b>=1XKbU5?`w4d??~S~!}VObWh@ zaB_1y0drh@C;#NQPqr}HaTgkX zpD+Wl)hw{5CN%H+Lgi(UO|3}nkb>4Oi}F@kSv+BrusIpS%|h_(lfnDwHq}%vB5asK~NN6f&oU4T}#YSMjjl_^$H6D3dd9oU`s?_bk zreT9Q3dUUR>)61uX?<}Bk4ns1yRuW)e|r29lc7&LeBx?d=QXIF{5RUm5oLJJZE(CWUB}Fdwndj2!jWeklj|Ityq3W*SOv)9a^^X&`|`WySEo6e1aGsd z!-e_TMh7;@8f5Tw-i?Oph=4*ZFSEEKgEoNUA zL#hYV3A#c8Y<-Y8J#ma^-jYYQ*KbK)J*mDxQ#9{ZK=)ag+=gG~5}_1d{o0Aac;N`Z z2A>_}G@l|ZQ2i$AD-Ug-gX>j!$WRiauNo{LFKV3euR+Pnomg^C2^9Ow`Z8A5p zg9gpaFEIvj7KWf3vA_xqaNe-VhK`*W{1~L;Eu^kMkgHh=Kch|l-W)hZ?1CK}Owh@S>I(J@AQl1XcAHt^r; zBk{X^aI;vDgby$JtAtYgW@d}jh>ymm)1T&kFVl>mmyA6@Uc%$Y5Xfx-0CvMx$)?$T z8SOOA1HWd+Q=#l$nPHn@TTyUZ+*-*h90-@hXcko7k);Y zHY{Ro3?9-dG&64yRoVg8MEz zLR+~)56x63Ukpm0D@?0eDR_}Ov0gid2)C_Nh*-reGVsN%b!c0leb&Sg8)G_~A zvTz-4SRDJ56Ligpb!7Og77-G2;cZG+Nw~!+-M8>h8Ymjv-u-LyE0hP}S_$9IaUO}% zYCb3yq>@w02;HqdU5YgsqN$6d1a$ShN6B67?Qv?Zp zr|*klbG{Oml?WSpy0jq23mS3}aes>vh*=-Utb4Dw5bDJ$@)VmhvfmriP4MxXOlP$y zCFpF`wPCwq&<9MdwL?Cv2nMPryiVR8PVP-)N~)ZdmQvCyq|K0~lebmU2J{3$7-fES zIaZ*bViGZ4T-RA}H-*EV*02dPGU`vk4)n zf0$j_YM?8AR3JnIVL~)E&C;NqB)Z`88n+Hxl-%7gy*lp(JTz!6h)?gx4jj2Dq{hyu z{&1v@Qg8gu4s|^Lg#gYvP^@#VfVw?SUznseZZ!rD-RT08SItxHXlv+QzQxGDkaNN2 zmTCnN?dozqh=W6^XkUmk_Q>%^z6_kW2yPA1SXP0%dK!!~3uYc)z^&J?`{dUxH*z=p z7d-8EsNrTRK9HmW$W|;*xejj?A2@#d4$I}fRL`Az{h|aUypYyoTMvNp;{##dDZwkM zxzu=Szwh>k10NZ5OJbsgJl?IPR{Gv1ZI$o_3rlMm4)y{qQyfYa;${C%dpu#@?*@#;+fOFqiLN znHqu&T$NakD_?%KQ>w+Nl99FM>4rxGQ;;)Nq>}qrXK3Mm3WoV*lZ9Mn>m3Q-9`k5@Sb5fWce>hd4{`UCL~=h(#2*6r>3x^ zD7wnK$5;^exU-=`=njX2SwZj8{-z{E5cob-1vI?4A|osf@g( z+3b0PqY+Q1O7t@3l4?8eF<579eGKFZ5!j~K@WWysv?JSd-7KN91i@+g51|!=r0v}N z7#$p2#fxXU;m=}=yO8mB$5GXt5M@*RA+-`W=oZ`@uUOS5E;iFgv%?iCNZWnsT19Sz z@}f@I)^T&Ux1RbGuum^u(0sKpo?KknlI_IgAm(%Mkqwo7pQ7<~;*^DoT_JaWA_?=c zHQiuTi)O7dCH4I{`owzFAG}uZMiVrvbTy~4(Pg}x@gYC5jmeCQ*Sg=+Y;iyLX|Koo zgZ|H@2$&CC?8v zjP30qhqIXBT~k0*|MysQ?JpP5yJSWQo43*=lL z%&>&1nwjxdfQgBtehaoc3B%qNPvB_A1hikwv3l|uM0c!e7^cDGEJK~&v`6R6s+NDw z_d1HJTY0MW*Ho|KhvT)XT*F%l-N=Rn=zR8r_HT*O!?nNB_>agPp+h0VsY6QfCp(L` ziUps_)1nNWhW}$^z;9Rdk(J~AT6?cz62?|)0N=w~UFhvGq4Z!LxD4A18pwMrx zw>hsnQfU)2HmD0b@*f-uJCjRc#mDn$F0a89m_kr6I?0)DSP-A?WSF?EioAHUA2qBf-zsZJvojt`q;Aa_h{o`+kfx}ukj*yaHN{)nB4ZAP3icBzhuLQQX7kxIh!R_#1#g%3~ zgLB3f-Rc*RzrMJIxf&ATcO7d-?gPe|n2OnWakCY+)(AeIbMHP+%coBWSB#7c7i@Q= zYAm5p7Abk~@ItlK~p& z525jhta!HP=h7pVmQc{L?j_lisI$)F^0@}LQL_n$SK)QywDpdUzIITP3b_DF0QBdD zWCDYNg19~mbyvw(_fM8;rc*{eq)cQo%#w0U3P(_q4zI2ps1IyxZQHygB_sYUM4OwE zO-Y)j6pak5;D`nuudyG0twi;Tzg}7eXNbEBYVpG^5SyJ`q1L z0Cn;qH*CDGemckNJ{GXz906Cz#Iz!+Za6oAi-JOky+HcL(0lkcq{ioKyjAD ztv<@Z@fj8h?)j7TgTCksC@S*McmZus14#m}P|A1ACZX%CIn1*6Bv#={iOSix$<6d* zbA??M@G_0JJ=68#_1uXPPnwAgtYFVqCi8D$z5C)phb!oOJ1AwzR$D`X{Yt1Su(@=k z94w=KmwS5vDqkUqc$yriR>wWDll{snc;x7a=&&%zBN|#0V2Rzj9rFqs`3&V!v#oZu z3`kf+9dL;?2m*dr8*7LXsf_`9)(cV`+6KF&|D&5AEa_Bl$VN~Gu8Eo8_j`n`!>J$0w3Nlu+6_-W zoQqQ*@CC1RZqm4s&(63gf?%$JQlG)kc~+d<;ZAmmN1T+RQGY0j)LzvH%~r9lqfX={ zuRJFB+#)2hic4v=(F%ufNz69>*!bblPv#z~FWmQ5L)$yKkMefJAUh^fD4-g@Y$(B~t?zA{R$BDmSkko&kWmH1OM;puiwcm|>`wYS`u0IWl*z)PVn zu_|0EK++k3Fa|M_G~uV%kCvXH5`MRxB}+Vm#&4q}Nu%LY$*_KC@9eIwN`#}kvThIdY%PDcR4)up^71l$L2JWodwaIph8vfp#D z%gnN89QTIcr(V^u_NBrYWr5`;CIteJ(Cp0~*GAoDz0=ci%4n`Lr~`r|&oQKNtp zG~9_%D*-zg*!(P)ymq7AawT}#9{QL-&G7}2$`>O#p?|0^^WupiMbrpdyQon#6H@dE zDG5v6CxI|Qt2b4Qtb-N7Uq(0N*|=v3fA6DLvVg0#90`1tcSeg;t+t9%7F{%5ag<)r zvF;GI_AI>n>qcWmXZdf=+TY3St2rXE?0vop`nL?J7-_vCFc$DIBKBJWuwL*wFh_k` z21}WU*N4&!`R=!?2)7pF% zO6JUw|DOsPM*I+zmWPYswU3TyRLg@SS}RJ}28ZpmuJq1l_IlYV^YSbuopmR;I#Hh# z@v;oWxAz0L*C)^CJ$IYsT$?$v8ZSkXYZ;JgoJJduhm+vh%~b4=K8y`97Xbna0GL8T zLUgUi%b^Db9q$g=GMN%tUSH}AmWgGzgXoLvYxfOpFn5S+1^E?IQ-ugT2GrRm6a=1j zNOY8AIO}DmQ;Olv21*};t5^*t0bd|DHY8|rD~DhlCQpGeY*iMLYH3sgjfzL{gN2QQ zS!icBDmE0oSQVw9UK>*Y3@I}jnRV_h?Dgv2nQ3;9kw&c|c`+f8xsVY1;N(A<^;Tci8s$B5VE$#3JYKvYQK-vf$5zDUZaCXNeKhnR&4;$WNV&xfont za5munI6GE3U>KR7{l|g{eIBL&!)JVUDhD1arUSb*QG*Z%%2pxl3ZHbT#xR%nzzmiin6zI)mJD5;%gPs5}6Nh2Kwc=wo^Hy=jmZ4`xW|5#$WCe zFFY}EPO=nMs*_kUs%1;p^P@s<>k3xt1ebo#Yaw7gnecNY6ud(jecpBD}pgmM5J3=h0Tov$$oQWnEFAJPb~%5`I#Vr!Mn8hd2&Vx zj7q;^AG!H*1#@G}c5l4PtH7QNBvw-s7PPL^;rEcM!%mk&?_@T2p9fei$i>y&LwEcO z8QF@0faM2YL?iU-j6xD3Y7sK`-7YsLmm1^oc7Y7THJhyhy^*5|P@DtHh$Z`2aRR!L z`^R8sd=B7Q?`@&F=5qJaEB{iy z!weYoD~~f}d+M66a!PW(N-HH{UFdYHI%7G`2J)P>`<)o3V6d;L7%_qf&g9IO>0l*X ze(W&TuCf>~c3Casd&E5lpA83}xjr=l{y>vrE`M4KNfkcAREw|Wu$fRYE7Eqj!%Lui zed)Pd2EH1(2FPPyboFg%B3-OiZ;-!N_l2tH;WD>E-V6U2wyV=5&KI`EV!ue1SmPF( zUxdfYdM^AZKNaJPJ1G>}Ps$psXBmpNvw!+(itQ$61FlYC-pcBax|VoP&2G~X-OH%I zCm4%eoa^CwLSs?-sHjTxIN{W}UdWD{lVfOp8St&3q3+%a!>%eT)KlEa6IgetH)8j& z{`|Z4PzM19e^pbl=*f@TPqz#9@->zGMB>*pHcDsYzI`r-J-+#(OrepusHkH=doXz) z>!_)z>9u(i()?M0aF|mDY99Kx#>BL7%PM>TP7eGafPbgXT9JMeL@|}>Z{s-Kv)&W*Jt~r zqE~zoeDvZvyytuVy_DkwH2S`dbsekRhcu5r!nsZpQS@H*`v6IU_h5cu78qQO?$5T*)2~m)@a=RAn03i0$)pqyOFac2ce)<_u3+8I zWGLh8xPueVJq26d9d)F<($3;hS%I}($E3$`vHD`_5maT2IfN^kWMhdvJw18>#APs} z|BXsunx5l6gPCY*9(3z8tIaP2uR(E(A_mV4KWdME`!n5aRAGHM<0gtA*=mS68GT%m zRIc9BZ{~dv>*cXS*-!X^vn=`t|NM~YYsEZKHLBL>a%ii23r4PTcDzd_4E(ycQb<2^ z4c~EnvBJ`L2nBl^Qi} z#{tfNcmZZ$eZ#xf#>AQvc2WTvg)3V-rvIj^<64pe1{4!2p`B=dEoiyrRo2A;la~EB zUZcNigXYQU(b2}Q2zhJ7ApA4i4I9ivLXqNf=P63_SyW%aG46gn$F08*F#iJk|0Dlm z5Dq@+P0EJ;8&TYFzIy!WT!YytcxPTT0Ii8PEB`j?E2GEyEf#Az3kXjuK##NlM#Y`p zaKhRe4)(Rk4-lzsbQ%;{hxfwnhpgdY^Hztm@qP_<51PH^ibO*G5y{cR7Z{R7T<7Er z#;0G0!yvyB*}}dq)JfTSO{>te1^Z;Oq~p`!$4c&h1ETkPn@r~QP`7gZgUBrUzNCi% zjQ3f*$rsN*0e8`==|O>XCu|c!n8fopgJYYDw_HjGtntYe1uJJ)6Xam~);z9hOKMSA zZ}NKkAXX(mLDcxid-P=*eczmF5AoRv`0Gs{=5n6-6Nl>eBS!h{Bf6>$cox5|@O^EH zTIW4?jdlyt9iCS^31MO0dgneg`y-0z%x4H28}f?EF6vB_9`MeQCG zn=_|J4_LSEbvxPZVB#fg`_o@v-Q_3k)JA*8NveYE=Gk{_lu+pLpU|zO5AHFQKwdVW#;hN02 zTv-m2cVZ!yJz&4oZoULwot&#;l)j$IBEABO=`Khi_D@we8>p)zUv+Lr#JM-U;ob%1 zt0yzMV;ix~r-apfy)bqtukRFk9t}KR;2Z(objX%Vy!Fomq0wT+%?`JAL2|ks1=iyJ z9$e~b-;;ai%7|V;xC^WwFOC=6@U%SIdbNCfK}XINTFUrWA>D-IE_7$=Y^7PNNx}~` z02)nJ=;pf?duPzYUb~Z@{Vs?6N>yPrRyqJH4S%>eCwqMXM?Gv(r7!DQk3eot+-%*E zQtF1Vp;i!YYx56gB*z^yCsYN^$n??F&=3?A1*=ZM$;IW!Hvn8s+LOtZ{F7RcfGML< zuXb=H7WRu9`9)~U#l^MGDe#&%Dp<5$ru?Hd1SB!iW*{87@Dwr!cQmg`#mYDp&ihA= zB7z%}BM&`Z0wja;!K!RKpnnKR0R91o_J4>wA8e;}^tQ#4sAJ&#H`yUCXHzC84ll&V z`5#S~AnnhNrc6Rk^XBH^{(4RfhlC4bO07TL1!rJ)b1rjXql9*2R9f5G3~%;Ffkm>) z%J*zQW}`GAetqJfl9_2?o@&w&bjklL={L8vy<#W*-_`UpL*tVFDS-Mv5ifE7hpFLz z1y!ITL;~~k30pfloXPu0SJ{{V7!ZfV9^T%{G~yUA;cyWyzRx$N^aM?1cbXzh#j3Fa z7_bBDi2tBN61)8xVU|?VlriCLXfrTl+SQio!a`qAQ zzhX~BES=(mqDW}yi3?nGmNLv+Lt*PCQqoLkjsW*U|^`0^9hrz zWkMm+hLUyqQ%S5DwhUw#(^66frT>M1DE@_j1l^RzQIU}Ye$D^5WvjY8pU7bO%YKca zVqsaJ#akbYrC@P6k?OMu{I3c1zk&s1|I0{#F}1LRM}*Veb#1iR6X-LSa38qz0si1% z60s-$eHsi%UINinA3Mnxs$CkH|1bUg-|=7)?>7H!94xIcf>IL*?iS0*VTwcxnDFxQ za_SorHJDEyx+RTPGX;pXHQTN^9rX7LDLw?r+0En&`oR7(%$$KR=3m6De?zPuRdT|A z6v4(b*fP^135arWOVri@@ylB_P(MGv?7~tYAx30$4*3sLr3(5N*cKL%AVmQK2meFh zvizBmM|^2!WW?+P!pvGgo=m_*qS!hSJ8HiO0Ty;|+KPyL^8W(;&~R~tU<0CAE`QLh z5c+mbCdK!=d3i%?>v7Ro^4UVk-fAGExqom_(At`=3?a!AK;AYtr$i5)Awv!Tt`fA} zwi{TOw4{&3^KS3Ek8I5eX6XnCfnesW4GlrSV$g2y2QNu_0~c^dTnC1RoSbOeJ?;Vj zP+)Qd|6*Zrq38cFi3$}zeIz>?q!uOL-rj)e;z9xB?{R0%FhM%A#OBoe>=8a+9d|`M zJUty_rM||cN>uZ)(iZXUP`baw+a1%6Qmduk6s)d4OeY* zzVHo*R!2J9-IBTCu_~a}2&V~2NoZ+hCIm`2k({Cl6E%)xw1-AvG5|qcwJNQv`}+gy zp&7y-MX#=ZNKB6xk8n2%9=2=xF)1`1O}jm~HdVT6T6QTI=6tne2UBrOp4kf%^Q~!7 zJG0N~sbf_Jmg-?NQSeQLGV>JfZS^9z79P(FM`K2$OynsHUxCq`W*!%i7;o*&Ib$wo z+HN{x^jcJ~ErNRXSZE24>jtvWn2lJi@j#|l(cd4+RPwWy+s%-<^<)d^Yo+E_dz^l^ zTtT2wL`Fv+Y~lNdAWum_HQ3_mG9B>xt!E4k4f~$ilO#5TlBLLwIAn5ZKxCa->~Sh#bk+yj z)+9#S)5g;pEq~&(4gGTKu}nsNfE29sB|bwGlJ%S#TqOv3MpU@zt-aD$-XxHpCpfrN zgeW2xWdA+#drJpsoCM+Nv>JQwPnWL8Xlfi3eaD4wmJ)qxz)*JgHqHV|Jr}KiUZZXVLey zmw&buaI%A~N0IBDn?uoUcbSFUwE{*@nD?1a{App0K!&x@IS)vZH{sgU?L4LFWMMrY z2IV;%&Y*Xfd@p2*)%gvi>+pFn)WX&;dp@SRIH01M<;0UxbAiD8>u+gJcHgcjn%`j4 zp;fnl!*?UXM>#YU`O4if0^bkz)q9J+ltD2;J><=kYbtd&y9MxEysiB)qy3u)$@OW) zOZb7AmR#PMMRViw#|>0xrT~V|O*&aOWY>xw{ILw5Cs)}IrFV-;?jbb?CPzIzShgkq zFnic8Qta1`oeqLH87d>`OvbJG!rX_GS;OGBR6tHxX0!yf#AF`0*8uqkrJI6^5}R=*Fz` zHM`DgOQ&1aC;IaF%S}D3!Ov%!DkLwL_jgihe2|!6}G8sjdeF(!Wf3q4X@G=nBqpRgzfwP)97$xVMG#X{GmZE26O};%f@vQ%q zUve;-DpBlU&-Be69n;gjdRtHq=15r^^~OY7OVePa^{DOEXn!F4J{a~z z=yq8vLC4C9+U;z)ZxjL%Hn>5;_nW=0Z2ti&laA)ZbGP)(B@x8|t@;IDJ|1u9iGyjK zqLxx<4+s9v7`q(&jz$l>ItEGm>s%H0@BSKH)M{EWw@R*0rEJDR=R*#qYkvk;#x=46 z+EM@FCIqNH;x3SmzsQR%EN(?hZPMA9qx_?}?QPKGNt315@dCmoC}g3$4JG>$@uUzz zid^B6&C9nYOJDd%&%j7`;6Fj{>R*{K=f)LOZIf9Xw}0YB1IFWTf0_asF}9nYHK0px$pStL z2Mn$tf($e3>zTVvCT5Go+axx@j4*U&okfFmWQP{8%we?l!>RcH`b5@0JCnFBudR`A za$*&U$Axxu@T9eeh&;u_#2k<{BHapR9spT#ONr!*JinLGVoUZFN%BZqCP?yW^cbfB z@m6wUt7wufanqsxlXw!}p}%65{iD)k)z#H6k{3fUIBo0%1O#@8G{J6ve^;IHa6bSL zblZT=?EPS4$B}V{jcSohVhLf5-Q|q~2~NvDlyF(ZY;5Q{BAZz(D-Ya{!_^PZ2@>@W!0)oX;~U0H8A?90YQ2&FKbi?PRT=Lr{|TtHJ>Y*bpQn zV$Y^L?qCpf-)ntSY>grfts?y7i4h_4(strkyl9Ja2%;D~YaoDiM z!r_-`S1X;4I?xK$L?Ts+EJ`GPrG9W4k;9CH(|+v*6u{lqeYaSsO!Mcxo;<|^WKz45B09Xh zyrr^WXic?DWWPt|8ENbA?Fa+FM+_fNgKkgCCGZhR6bo zJ9Wm6a7;!~86%rreJVhPLcOF3x2D-Gx{^xk92ohw09OG0O?vTx#6wbCUU98<`h!+Rd;ghiPa^+ zv2pQ8>#6zdSHSUtZ;tSX*>Hk<6Eog^?TdxP$Q>N5yP>?MuKmZXFK}B_JmhCnQ8V}% z(yP_8!2o|Fz{+Z4XUD%khVr6B4sj7MAq4pOL2w3Xlz)0vXHEuZFHE6q@Uia&XOGWO z80qQp2MjTxy+T=Hfa3vKTU%Qz^>#V>lRlYQHB!^RU!Q}-7^ZD?Cf(?p)B+%HJn3%X#9QM7gn2ucn z&iOl>Un|c3$IIruy}!q~TRj2*_Qm%!h(UkIRN(DP zIcDS3m7G>fYcddiuWamry{fL>7l#cTPFuFzI$)R<7H1pX?!#Zw)6<_CNX95`hNxY> zbfC0qLGW3HP6}n?{bc>ecilHSx+|>#cpMt~<3`*gMQ(uB#7G4q5-@sO^t)shEI4|_ zLNp;EA+1=G4LRv^1z-sPugFs+n(=O>CUI`6X!%{Ao-5&R%UAU*0t*@|{Bp?LrvBQ_ z#Nzj%zhcy+m$-|^YNfe9J`@@ndR=-Uu(C^e42`XipO(&&{x149^23Mr`F4~U3$!dS z^8k&41G8Sjnl+2=^a7H=37p^Mpv7V}#!@YUh3z8n|A4Pk-NmR#i|XUu3DB@&sK(=U zg4;?@m6o{W(F!c3((ATA!(=B#4tzQ>(cIo-@NIW?t5fk!PdO1(Xr zoJYlqPA%3J6B=8M$9G>>+9Ajne(dY@vg1x1V;b4>iwDcQbHc z-Rucbz7obq^z^^ix3*vAN?BmRc&0lH!RB9Hqg&q=`8u=!Gg7marCkxrb*HypSEj#; z*tn@e2U97e;siBMoW@~^qcaIvP2v(UDBP6NAiurbeMj%5cP6d#X(zU=oRbe90_Mp8 zy~=P9qgCYjB|uQgqN5S9+IbxtxcBk%`YME`8g#n(WN%vt*!cKbd%Lrk1pfY~AA|g7 zowH@@x8Le)+>gv}D|sg?v7gb&2@JN0c{iDRAKlTIg$Ip;Ej;fE7~*pGIGbmP-k`?Q z?`k_Y>My_7X}*v$pF7nPqsp$xF}7=FI>60dy|Yq-_M!CuA>A{`k2-+#*jqP8pfWUT zczXE2&|^PMwc;-AS8%)@&n%SCybY$eX|>j|zFfb32PsxlvkPI8gLU@o>5E7i-n$rM ztoO8$tKMvatoqapW&C+?tlkoLs!XK?X9SSu_>MnLU(atOd+3J<#V)!CSzE4Xpf_9!uHDhR zwbJ@!qnS-2hozqLWVB9z8ztXALBJSE0H!F!9|0p$y80$rgeE}P(iq(SREQBM+i@ia z@v%D%(EZzWahan&W%?zGv`KG-SLoJ6nKu--h#Qha{A4G*d}Ofaswr3gta69sPk9hY zR7XqqAFb1P66?97%Y!S89hg^YUPw8%{bcsA8s1CFdE#4l}DW7T1par@ywsz zwi0w}^JSDgoXo2s=f8r<|Kk=qS5k@&f)zaJS-5M{eXM(^FrP#Q)d=AAhEtL%?fdk! z%9gWinqSOJlW7Sccm8E0x)P+W9Zu4JM@c!XKFF#3s#!W|GWOS_fcX*m~6R+=KmwOVEr8`@C>*4eCnFyp?<{~uYr7Kf0xZOib{p-y*Lz%+{iL0#@exe$% zo}Nxt7!-Awe)B0l#J`WYWqP(YT6rzVKyzP z#Ybr*3fX_Z;DM}OUhvp8{(Qlk|6ZVi1F^Cs%^D~E=Y7w?S@bYlv0d`{>}p`L{+s%n z$7T(MP&c)*jdT~mPsfzTQL;y|Vqln*2f@9X3Ryo|lzPk8TMQGhZjr^HJpAjxjCEyJ zKbP+fT5Cz7cY+jy#YUm@mz$nUwuy>u%`=TdV7Q;3e15iHFe^A+E`B{d+{c|I2ufwd zD04*(@ca&{(6<+s-_21|%)mx07Q6c{YrsH&bvN?|2;&}O#EKwbGr4E;@;+*gsphro zEM~yZu=(A}fhJ01&;IH&r>8Tf6xza9oIZxqmxtqT+gK>0rbTbGpB7*osF15%*;a0M z*ec`DJ#NhS>aS??WN!(W(GohMBx4aayg2Q4WLk}gNJzjY^W}EUFZ&Z=gNg-PKTPOk zkGW(9r8?{MUE?oo9;wdUzY_oWEu7^S?|V-+R`^j!)?P(KyWHx>|!&-%Ag=LkKa<`L}) z)%~EHlx$2N7~HQG%s`HfUnZW$^yaJaF~Tk3oE~RNaf+b0xllc!SD(?mbxK3#((2eC z+`Jn7K}q>13Sl&?h%S1zA4qPETqo~7Lbk!k#DA06Asbi($P;`&y5HjZ3 zH7wcK7j5lv6qCLMJR;KrS#QQ{A8wW$xsz3x0g4fYs$GHF5Nya;O##3LZ?fxZ<# zMofZtdB<+?>X|=b?anqCaVt*acFL#Qyr=tkTQP7)`Q1)#&;NS_nZg|@ksE1eWS}u+ zRU1VNB|*Y33}Jr69a7eh5v+mPii{If^Whw0yqE>&NG@wQwSds6Kgv&>MVpp$0k34jd;uj+0gI;^G+9=OQ~=oCHJf_PPTiJ^&p=vFDP|LfTN9 z6p&fsN5P!ma%z@NcXCS6)AAuMycW$MO7VD&B=L8_p!-Q2s_`bd&u|7s!A)NmW z%ul(bYg!O-Z4;lfe7DcV{rph{i;saU(#<8K1Yai?g<1pN#P@g_Da4x&BkIj~o(y2a zVNoy$;%0Iw1yXo{j-B4$5Pt7|I(Q<^X5WoMVnsR6mPC4D#5=7eJ9us%N%KcFoz6?@ zk7FJZOcbL>V2IwD%{1KDbp%PL@?W~7vJD2WHcN|gu((wcj9a7>a@N=6;)$zyYRz!I ztY2t%^TsS>qWPPKt}J{~H#b_u245}-emvXd50-y-|;latbh&9_JHfsnXqE?J~D zqs7I|w@%50Bp?U4WdIRj1sWT4=d+dF*+b0Ff}8r*qZUlYDWZ z4Z@#|CG?{i0EE^NcBh1m2QsiMeF9t()?U|>@y{aY?u$5nYZhE2!M8LK`;J`Uy&z66 z?CuunxfUzvouPzKXjBUQd&-cC>vY;a_bL3+f$rf>!Oi5+mENs0R3?`mJi7UE%<;HO zXodfP5BKf-T3`JZVvn_{p|&EjQE&bg1h2enbipK4|L@SE4i%nB;SB zd%&?Jyo-wMYi@)x$Mgujp=b>|i^Bv^RDvv zi`CH)IKY73oT#)#zqfX9n3l5zGONCtyW3mq7%k%JL4NuoAKtlo3*P&dCRD;_SNwd| zSpQ{oHvIrHvXVjEX``o{&QXyzwdVM#pE{+^RFhq#cyTPc&;}b2r%lN ze?KiH=eX!3hfT081tXheS1y=LR(Y==PF9NQvvDMRF(p#yrN)aR=O~~L>@K1iqng)q zcQKKFu{3nxE5b51*B?4GgA}3o1kc!IT!uT#^~+qm$UX*&ztYKuqF%7K425h&EOI&N z#^Vd%$0j*KBUSEOaKZ{Gyy2z_$ofNVgnX?Jds6%spO(#by4aOUTK0#x$ils}U&j#G z9={Fww{kl)yRWX$d>#isM9UIP><4HuPw&F94(D;b4Ru|Z4h{PX9q6ZYImk8jOmjIc zl?+ow=3F*E*p8g(*Q*z`zy^Njh}MIknzVLam#;RNAQ0{u?NOsu)jwL8RxecGqHG>R zSUKwX)H!Xd(d_?!W5TT8+~+-)Wg&rr6~yjkr@=;jav} ztkv`y9k%7+PN4sv*s)(fQ8Yw+g=6Rr&muz4S2>K$Cia6027B_>`#pL%i%s7j{fkJN zlH?*Ki4stbN5Q2_K5JUM_^#S^RK`_Btf@Qd zA`fZ1NVtJcz#Vhn2;_@EN@u@nLxb>9QEQCMy!!Ho{ib8ZbYBBf*rN*4U@^&-Z6~Iv zPZ+&+b3z&R>BYsQpc?qXmKjb2?xy`1mH7RTd%?e|BxK@pY}K*I<7W77cHo$;7=YFY z2PSuhtJmoJjwfFlSRp?R=Ra0lriNq9OlafXRj6E_9xq~i6s{Jp)Acqfi-@EPFh00P z!rA&ImC%G^D@5htC^l4wlQ$v;WubV#Zv3?VUFgVYDG`_R*FWEW6D8s2~09111+I+GZHlHp|NO zaddBZN}_W4%bMq7EebVp!KZeipbTMT{kySLapgmmbfZA8sk{vju217tF4=ZA=Tf|} zSe!7e(9adh{R-mx8qQfMS}z5z_cFmb2f*IQcPd0kZ(51W)F z^F!`4Lf2M$8BwoD21^YI>h8PVHNOu5QJr-vq3Evka}j~4l-54H9Z2_P9>V#wG06Ba zhPOAlj;GfbDBs_xA>Q5ftkwV~0?vksM8xR5RzQl(0wW@lAzDh}rQ}cjC(fQM8|JfT zkb;`|Av@JexVmDzWr`vn5lx@N0XsNi1KTJ%5!DD1BR4UrFxAg2<~AUA*ww z5id#z$+I;IZMAwM=1OT`7EGL!GOqSFfOtYpak#0)kPF6`E?d{MLU8ifjt%8|V6U!1ZRdbrtDu;(5Q z+L$be3L`5vQQezqr%yYGJXPL5j(1E-uEA>f;F|BO1?0;FsJa@$)~2$8;0uM3-uV=D z%$~N{T}FRET*C(+ zqOf3P5brK0=xocy2F{Z&)4pNk{^_4ljSelXmavYQL24tZ#@Lg|~%dN5Yx zWTfX83}VPFAz46BYc(qy7kqT^#36Dw1*e|6>rYC2O;Z{bbUH`juTt|T*z5+TEIG&b znY|RNm5t5Wg-?3|k!D(4SvfW;F(dp}<96)F3O$Hv+t6O3n$8Sn{PB;UCkn)lw?ovu zK9GI;V}<~W`x>wzG(j1Kn}pXUeZX%(smb~yzZSnMOUVb5P*lR~K`+G$Q&S~a3F`Bg z)vwMwe>Z-=rH#I+=c^u^2Cuc4?}*_3onB_* z2^!6v1sb4!SZUYcC8=?q)$*M@wk>5B2g+@QB&(X3{#jqBA_aB@Q4mJ~J}sl)B@6 z+necxb+8|;(eNJ3M~#%q4lf!Km%ZM^=x1;2xq9T zXhCmX)AFo`c2r*#(Ra{~h7-A;?Kix!+Cn1o(Sau1(XSi6>zs20f9BFE?=!5fM1%yxkad!kMB zOXV!?#?Cn5-CxA~kD<0O6~T}B7ow`BZ2B{gZh_5-P^_*y$;jMbkhQ?kk)mqUt?$Op z0Exu9`qGB3ko9*PIwR1AZoFbLV?WklXT0A#)7!5?JwS)2(};WLxeXDh3z<>j3oL=We>EHO)ENTep6xVI3cM!eNf5i*|>oP9*dK3cKubHc#e~@;FWrr0Qq|(fTmHRk;O64LKye-MZrDitc_EM&ZUm z&c+26zAbtky+aLIq=(L5d zwi5U_fxec#rUnHd^wf5HFlA|X>wjw?YKrZ^@BdekLXpduKy6a=VzUdYPB9LQ>`A-R z>M~1La1=N3X*TlSWS_fIfw44p^a%#Gqb&Oc3MQrIIF$s7xh7Y`LT7(BhUz`Ds&|vP zqyG5UkM-w!pn_>%Df-)1cB-i)Ig7VZDcV!}_uPsI{mpGZ2KxnqlLfzXp=0R2PFJ6*mvf(X-I(KGG} zl5^)HCZLm|5wedy8n0MSFZO3MoHX}{GPw~3Lzn10hw_%VU5pXVrU zZCUcnf=zk;Ii=Tl@puh-^JdaJ3yHEWy?f=Mu|fhYB_xOIuoa%?ckX$;=9 zpE<#fz|_z@*meI?Xdwne2VVdq9m?VRiFkTfdsm+N^oNOh>|5EhqTWaT02}39B-)#| zV}V8)lQtV|#J?>FPo9eCc^hloo8N`?I(mvZQowJbl5*-EB8obzdNP$L-N%N#(~Aof zl3ektj4j^AOFn-(3PbT^6$8uvhF|DZ$h6u^$~&KgEb>B9?{3}{hmf4@)&*q1`_Ox< z^a{LjjC*&Lp+~NXTmu%fgZCEa87#8TEPHZMM}LyA>GYr)fx?bwz#s|g9habTaaRp# zo-hsjK2o7CqYor}&suw4giy=#AdM{Yh}R=z-jj_cVT0EG9KIc=r+3=O0uTM~=B0!S z->FKyj9JS{A!|CGe@S4a_g7dSUn%b^Wux|6R&C z(&uA3C}{G&pc!SZ`p8%E#I-a0b0#YF0{lUQ!-dWd*JG^E`Hz-Y!LJ(-#NDLZ-G#%l zNQ50Rm~|EExc%D}36cuQBVY;x7)jGulGV2#`EsBqDhKe)+f@ogG&MRUGjfaiol7`V zGa~o}d*9G0+S!~}Z6~n1V;hBWf5GjzX<)(TrF0LOXC;l4YOobggH1nvzfL*HPi?mK zF{rZwtd&NL?LI0hD_>c$?hnIIbe}YHvNt?x!7)v;`p>B>XvVpEN7w2(2Lbx9aHnWA zUYEL;%Yx|pC}^1GX#LUJS&lpJZQ1R|{E*P0@eswb4SNH!-6nLGdpIkji9(;j z!_-fd^>2edA`UH_aXvjQURKyG^EPj7$`fy{COUfvfnVmDltU6$XHI$rfj5r7 z|F_pkW_yXT3^k-32wrTTwgQ5+t{MF~3S}A7o`u(CEhP>9L7+27+G2z2sY3x>w^W26{}6hw9GG&eoo_?~F#% zhL0#>KI(&o*#d&nZFBa3l@_-Pa{BGwzj|I8Bx5MfN{mEQzQC?N`QT&1H>snzeGa|- zPKgslR3FpP^Xe7-ekQ-yrRgq$M>SF*ss=koPJ|+@&4cWdS5;2-8^BOez$mY-VtNxP zmE2@B&ad^Jna)xarT9__-^I63qulVL^bKlV`BQ~;v%I3;j8mc)y{qZgIf#9OoEMl~+(Zm3u zbYOf>*r%~;28V;~AlKITmOU0113V6*K2a;n%StMSC5j5G0ECmc;F=4?I`T)2P@e?& z(Xj`7VY4!gr+gHrxV6sblJk$5UR7W=Hj+Nx+WvX?JlLviG(X;5fLW2@efmsEqR^(S zuC!h3D@cVb_4Gulfq9BRb z$qA`wYNY5JkGck;bE1|=WqKabYoK{Br7(kuC>%j!3kLRQDDB78-`u@{gQAnyf8y@B zxe}q`;5D2sIzvV|X9-#~v*O1nCp*wRuOLckiWCiuCaY7kdP>TbZb{hiUS69%~74SoxB&TiW1rvNiMfLaD7`GnJxKRfR+wxb7G z4HDCl;a7PSqRa6D>kADYxCdyIdV8}7r^RU>Es_AI^l)c|HPSo=+1^6RL4|u6Dgnaq znhP&sDzER$C{WqXVJN8&$Ps|;!jN|!#}Z0>WC=4T$)PJU%5qIdTFKrjP_i=_=cs43 zAvVw?xDLVPu0iSw|;Q&sJ=h>f08W!K_L(@G53Zh-UdyKa3sWK2}EEQwk ztCFlG#MM|EtN1N)mioe;_$@mSPcdN-^g8Jt*H|qWTTJtsFWe(;+86O^^I-&XfeI(rP z7T`dsMtvdF6((N^ynl?tn^!oNoK;v&B6s0>^pB>2+&3MBfq5ZTwz;f?WaA#c+y{Yy z0O2C~l=tivmEHS8^^{XXAnr#;3v`~$n-1d>KKG&HO{2pyvDd+HTJKR3G-D(u4)Bro zzGI7R*Nk1ojbPEKBP(OBcHYgIc_vVa(!`NaQ3Zyeya$;|3YaPVtm?B9Ee3wiz)p|A z`)BPcW`h>$YQ36dmk+8m6qYh#Zv-+FE@P~`TfOtuRVJep2eNBAvEr`zfRdYR`X zj;sZaujS^~odg`e<1UrM!JPv%l7ACq05!)jWl|-BpqAGu1y#Fhr0ad;hqDl=)6K8( zA9-~rlWYRj+5afxU^!eV44ybWc(O0bep~0ojntxj2s%CU;oN^So0%hPB2d8UDzG|6 zRfXW8PE_tjie+zs&ce_`T$xn$nGd0%j1y#r+p6v`qXIg6WM5)b%Dh!2t9_>KJ3yqp zbv)xxuO16*#AXO?r%;{0CLglJ6b0(Q^Q4%uD#g4h9`BjfB3NbxYx7;sScQf0`Ymjv*A3pzE~Y6D2)Z4m$KHUIEGa# zsV&H|zfn^LY>|PKX()ao&ZLl1JM|t0Xa7-1xsSgJkFl-*UfkGtZr+XP)1HMzOOl`F zL7whPRPRG3`ePFq?ZEbpQhfL3=qQC=$5&`XLM6b!L%5dfTJLf}p~FH!M)sA_lym?^ zNl0mo2)OeV#=xVp26~@JCBc;Fpz+)nbzj(bz0G3vE1#zJWnosE7JM7@0Gb+<>u1!g zYRKPM+V%c)2yJ|TV*`!wPI$cM2me(DuVQ`%kD$VqVo&e-|9adTagW{D_@0 zwsFH)x7j;7NZ6*|d2MiyMj}eV;g{CakJsLjAp%2d&Yk|sKSNA+LdCs-9)hhYRipa! zYy8@b+TYF$`#+r-Ea1g>GbKqs0yFU9J^yfsgkcuU-IdGLZx`$rz+(t|pWLgrRaJ3v4?1~bg6(SDC6QPX5!JyCmwkJ@} z&%jQa!QJhKZNOx)_;1ZOA;6BqNd|dG-kt)G_akjWDy}H zf9O@T|2%f=1Exa5dMk{4HW@3rX6s_ue1Kh82?w@{>{0m29!Y{TEh~x*kp^4~k$Cnv>8Tl~ zJ|?eQ`eDFfZX~Q}3J3VD| zcwrXj8+b9h6(3E#-u&YtxNwj1%QAMX#UAK;_>b_>8m-O_^Z9$`jLvURUik91UbfH)sX}8BW zMd)APhTZlgK`_@*1am-hk+H#&d0R=eF1TFm*chddWzmJ%s;a7B%E-pKzWUsZ6Z{2v zAsd+N0>VyGSo?7fZ!+k$(@SVRX6eG6&Z{B6t$%XU#N;7Pg7RKuUp0Pb^RPAB`Czb$ zi;-p)qna`nZXYECs>b2Ws7?0(I`m2jR>w}Ez%~)23g`}=Sc)_I|dqz0Q zy;awC{X_sT=KCCs|I;f&9Lo`3xL!pQ{P?1q@;sy)7p>oUfXkPQ^HziWKjM z2L3cWTtX5inMV%LfBH-w*pi`2d@z9hX7gk`P{gwD_&w%;j27=LV@KA+>5m!I)O*Ap zyD;-JNq@}7pc@zsHh}+xAk?o@KZyF^kBIf*xy!- zjBYky)QBRNAGS^W9#V>rfq~)LW>o1Wq*xOv`3G~>_&^iAFiDljLA>hAh{&BE@6dKv{QXK;HQ+$OpFLc2Y7tB}PrQw+ILiIO z=5JVSxc3=QV2g~VB4BlbeB3yRq6WrY=bYS&>m|7BAU;4-elj%;LKz1C05y*fjKd2= zktS;DB@5WG;gSF z{|_OJ@6SN7m}mR2S3}>*8i>l?e7?GkrRC+^2OAClYzwg3E=?R@&jmM-dV88ZiQq$0 zPo{J~uW%XmK$Keo&Idnu_cRyz_Ll)7%K95cslaHq=MU@FtDliGus{5BQ;{=ZX-kKC zT2k?)ImnmPB4V=&&0A(+E)Hn;U68+-vbK6n=8@zCQ-k!L`=#P)eanU8BejQsPBB^rS zEr%?_TFu|Y*3G_tvs}{sY$Rv_AY|eyRP)n9D%A6%Ni<58E@U&;r#ClW(wpcHb6~cN zj5g0}*t)ipoL=P=#ab|{BQQOsuLEOxk7W1e>OQCUgC@zqn1+FwlAUbmqHC1gMT^y@ zopF*Q`@w0nI9p~f0D%&)KENiKpW28-=|?H{KzfL91UGz;Vf&#!5O&ncvEjt7vKdBt zr|mc^JVwbamhk-q+SkaiUt5O-db>X zcdwT$MOyepLA^BP+;N~z>JWEPCLR8nA9m&D=8ZSiRY05G{Z#-IwwfR-p{Dx5Qgl*Vj7~YCIS1z_gn6u}opaTq|H+6;Kj~^vwj~TH9AW5Y zxP@e?bkQ_gjcqd=K%(Hb^*5oQCV<2D>+?;+1I)(MKkB7mVqrMli7-h%kN}EuX7b-D z1T!4;us<=4kG}_o{^CBSggSO%hru1_G;4>bvJQjMX$!vnBA*VCVwR?OB!FsIOyj>* z!+t|q7zP)E;tsk@ML{aXsy0uKG;uJ%*k~#Sh8>C;Sy|a77nJeYk&*D?m_I~+-9CUi zTAn7ZywWM4%2@n31*`vmn@`)FOvv`H9p)-pTgweTP~6nOyvqb$Cx)A5bxlpX-99U3 zms&I!h#Q$GV`(T$TY%R{ajD6TgYXY^OXFn}14b%;ow{w(a_NsIx3;m#bOo?024KQ9 zujApGfo}jr$NX>#ZfXbjZq~`55&;|M!N0H-=jCD`Tnct_SYP3At=CVARkaxRu=WV-0JaAznpU( z`Q`^3z_`IpD*yM6(Bl3CoJ|>_5Ip8>6qNqu-uV3#JuntJ@LzewDKmCV z;)U4%-*83q#aeouV(!ahOFXo4(BHd?({zHYHSQPs!}EgvxJndZq>{M3I2x7Zv5>@} z=xzP{{In8ZxGIMD3v%oBp1)_S{7k0>v$SOo>kYHv0h%fQ$x`Cq-QBrMhWt-`Fuynb zKa$zslRCaNIvm)JqLHqq5!KxLLOfEG|4ropj2ggoRud)b2o%BkD!l)$Q4@Sor&OriH#jJ4W5di@ z=wNHRF%lEl>gedGl?qNpf^29S3kwhLr&#zku7x*+pVxM`OXBzlmTE-WSA@pb?c(Xh zArs1rASy{~c?@a&G!@%;~j)>daV8n!hu1`JA?PfR1$NhqdwP^Wj_F858b|@3bxZV z|Lp5s26a}lp?@PMuAry|GpJE&ZHm@c7mT>P)HOT|A44g(qnS<+l(eO&%S82KauSuA zn%e#SAmAfi1NQg(%C8p7jp*3e*i^u<-+i`z`j3sVj6&H{SL@Onq?e8oJ!>QQ#5EPHS4uZ?P)~>pStEs8wbqF!xaElg4CnL(2AkRiqh@|=a@<>LV9i&D z&J+y?w;~~?ieAh~F_AkxTwBWCQP{&+!Qb510DNa4 zj~y!XtWrZ|i#VF1f}MZgs}rkCs_M`Sy%*?N z?!aRY?WR)H#%s6p!#8DZf(eW&h@^|287=_@>-!gc`%rLzTc~$fIOLRw7rT^TM7`!S z17IKOxdFks%tMshnfvDnP2Q=VMg3QRmBL=q@Jo>J{rmSzRLWnJAaLdKS09b10RKB? zggNHtH`gR9{UdU1J@G})ydF4a8eUVyY7teTEC59hjLHV?(BJ+WEV!O3|2^U^`2OEw z0|5a6;Aj#B0Lz)ukArnS7d4ZK-%ylfQ9}D(NX*su=aIE$(=cAokDRlzNPsCzHeH|u zn>@#4<=L*3L2RQVKD(MJhL)4?l>A01lq~H&ij|Qx@QO!q#8Rle1vkkJ=GJO7A6WV7 zEDrcqy}5vnXjO^Y*h@^vpCh?*V3|;uCdS6FLdbovuJks5_W5d7=f2>Sr;F`+KcT{c zEvee1Z?>y@T8a7gEaXV5!E-Un`t%eKSO_a<1ZoCg*lb@5(KGL4V>VP`B@RR=F#zVI z<&%EaOs!ST;k;q6I8l=6W(g4(W|vt~ z?sBB=hB{r{gHVjuZ2*HWu5~LVD-P3TT?Qe$gFGn!czd zw%En4dN8O$(}vssn?z+c8p5)Qw!Jz2Xt7WY4J5)FXm0^u2{J^oys~oFcYK5gF-UwjZP;cvv5eP zO*&qWae#^71?OZC5NH+Cf&xzNK}`yaGlGPcs1^G(2x$yEGCOuF-j zn$?yXA--D;@T)fv-}5}qPi-PR)M^2?Lj!J0z{AhU1=v^1ARuJp2K7=K{5S0vK5)|5 zFwnsSaVoC=V#L@`28;Ho;KTW~?%6&3z#X+-1BU55GXz#LI0iO(t@n zbx>{`fKbU_UA@hde_Edg*9|zFHlP0yc;PUoAIw)#0>&s3*GanBa^22=Eg9YYlfd&}z2i zOGwZKexz?EYD)(B>ku`AeEG*g#l^IoKPBYkdjFI`qz5>Lc2jTyt%!iuSpl5f_3Re(|p-!lJh7T-Qc;>M47aN{XbItGTnyNA7eNg-KT z1RWNlykDOe@ZY_pNxnHc$QwsT!|!G7jb0|XZ%jy&Q&60vXqZIN($a1&gailIr}G4A z$;->}47kSYb9%6@$|EWLo29T^Cq(WU}*zsTtC|F5>v zZdsmUVPt%Kz{Z9i159yD460L_$N|NUeU9hP3G%I-9YtzP92}TjMe6$lBBL;}g_7T) z&=NOuG`167a5P^ZorQdF9Vc)F{!i|PLYZJ))O+9|Xmf95vQK7oTq^)#33Lxc8l;rl zd{XC=4M5ZV{W}`;-NuI_fEprb9*Q%cF6wT;3$i(EaX!luNL4!>{J5so`;z}uZ^5P9 z-hin&ULS(fx^V&#kMaPEK^NZIKp1F0TfaqF?_o8{5RYP%cX)M8FRz~YpBV=d^R z;YOU1ObGlj;F-(s5bkVL`nVhKtHuYIx_gE zYg@S{A>=Ac?X|)tnph~oe4lENle4`x-ll?%rrTRQ5-CL%X&pfBN*59kE%?Bt?9^G| zz9GFgdm<{M?g(xg_b7@Zk4D5wiYrQTH$*lKo%j_AkUq30E+oRB0i#`Xb&1s&5Tho~ z>f7?OdA5PLW_UL36~?#ny0t+PVIxBbV<>6$5a)hHBcNihVOzT|1&u;$UJn}!MJATWN!^}7MC>yhjv36NCTbYhG&2N#)2gf=K@_P;CsCOjJsK1fN3zC$g zmh5VeQ#yOMcwb&+^A?{Ib68H?xfb_}J)7orM9aL4$Xmn}sTlI#GB!mb4biW?S0@edIzn|S=3IzJ;VDV57 zrTy6wF{;!BOX>U%KeRB*xOaLX-by;{+TQ1k9ornXI=o0lvQEbaynqx(t1{amo3qIw26OjCX|! z))a90!FOideJ?|sVOlBl(9Y^z`DahF6t_w5rSNnz5mi*Nb|9QYp0QW;Z*qC5lndDT zbAOgp^5tkPg^VHIXDgj*;q_;1PP_fH@!o(u&+(=4J~BGGBOdOr*h7sRpzzVxA`p$k zeynW2KI_luIjxBaHp-XX00zPDwuQ!(afs*NbCgG$2{wpW`9TW2XL6l@y)`Xj#8GU{ z+!)Jx?Wx7~2cbWkPkIQ%7K$xAiu&9_dzq!Q`)R-Q?OB{FiP9n5zX&>zW~Jw|Old<+ zjp?O|eI^#i=h&a2hW755d-ymJQC)|64wl=xQf{7m#)~0FMH-S>jS;5x_p`T>I6sff z4vBv-gM0c|jZo#o?lEr2&=Vu@^PXEEsTto*)=&seE?{aLV+6#dIVq2hzUHY>yWU5G zyi5|PpPlFYkoq)tzl@bKe~%Gfg3yyu^pUX$EY>+Pu6MiCbIV4MP;`oqjb)B^zN2O$ z06xW1^wLNHokVw4=zHQhectsgQkiK_S%1}NyEAUM)zFhA?LP-Omwqj9BI&IpcfB^r zNK7y*M)~m3{zTMNR#yebGyR+gLu@IaGOJ9xL`1BMr1@;+L?Zu_eZ>E869qIx&hcdS zezCeePM)pU+&p$J@r6yXM~qVcYuI@&Ol7b@qX!XW?+IIvp2y!G>`dN}Pa7Sx-p zH6b&^ax9Mnrhf3*zzE7|hcmXe`SA_Z@MIbA{v?D(k0u>mD|YBDF9%gTu|2&<9>-IP z3)r;#wzeF}$mA!uGD(G8eG@>?!0#GC;3L=u)6>+_GS;z-^G)v~9Cb@jy3U6$LG2fW z9+!6*$#P1SA53;bx{Fu3G6`wD4zZTy(NrOW7vn2T4^f8hOc=vOmU>$Fa-UL3C|;uA z?PsxhbkOycPQAjDw$kpq?MIp(`?8N#J|BDn2?aXts~QkJ6cpp%hNhu_l^L7SYxwU8 zwsv1rE!Q@|rFpD&CzOuFC17c`=imHM?NI9%;)K}@I;%_Ij2*#MvoXZ(8JhFuZ4}bf z3;0$x)UH;gBMx=(^bctE>8FQpG;2_MUUiF>Q>$e7ygk z;X^Z}sN+WXgX{Z_KPOO((2!oZ5)B{C!ir7t5=hY0SksPN-0oKseLtR#c7RTzx5JQ= z4*M7>eJ`hl4s**peITW(RnEOBx7bgrG){tTBf0b~B~sybMlyxe()U7n4OhZX!Dwgn zA1}&Z(P2Tlt^}+rbJO=eEnM)>FMQ$QgY0loLAWW0mECW3>Lu-%=hM{4ZjH=p>Spp?hYbVJ4a+4HOs7Tun=njM;yx+|Q(m05KHBl0qjHAS zz)+h`%a^)%fN|I;YVH2RA^hLTM3L+P`jZ1D*Mj}-h^#No4v7%ud0+mR*iCQ?LUN)F zNv%Au(^WOgim!%-!$+AX44TJjdAzej`@L++waf8v6XJBe+v}aS?L3fap<|#9{SKPb_rTk=onJhxpYgz9ykXJp zzj=-EA@ha8(XKeJxUaN529gBxnEaXszqkB8?-TXZR1s}@& zqX?jGpR?M}(6dB{ylqjj<(9x{7Mv*Hff!rxF{^`pL!2sXa4Vg>-E4h|x{-Ba+eCklzQ)t+nl&+Bu?AYf68X95FU*LV5z=Rz=N4;**2gaB{Ph{Na|k zMVvY*B^9b1Cv>^!*-GO9YI$K^grvU|fE+#-@j|@HRmQ~wezjY_m?9wW=T7bLA|bZ9 z!T(Ph*~dsz2wI%TuVJQDd!Yrf{m9v89q+ww zt;8I*a8;(tu~LPI^wcI^<|mDxTr=s5C9CPguH{D(5)vlQ2d56vOuTGhmJ{@SbwOW7 zssBS0nmv>*=$3K5U->LK?<2sQU`bKvjxL@Y6nnsOvm^7q05UJYdSYykt9HtA1B> z2V4#6jqn@8f9`buT!~;AnPD5*v(kGEQ}I3Erig4;Nk0*o{?9mOzD*8mPjQo*#oj6i zL=dozoEjs4JGGPVmDF;<&4!lOqOcw&CdHtejRb4K3s*3AY!Dtpk7wHdU47R6$+PV+ zJDrD_=!a#*k>A>TctY}D>n?8?Cl|xHQzv!~-VKYREp7YI&=Ll&NWk5%KQjS!K0*PL zfGkZ#3DKi%v@-P$!gI#bO)$mVSGag(5u}KW4p~4Et7lE{rTdJ^Z=Sr`@@JGW;r7aH zrQuDU&f#NPYmskF%Bf7Wnnrhw_vXLkwbj_ixxElDCF@<70D)+DmS7gdAWS5YP>NG# zqbDm+y#YH=8V!%f32^LzKiyojLEzA$FYLFZYHmh6cfJkq&YVhu^gER4G-8yhf&!Lo z5x;$@Wk_n_e~B*c39&8fuX4s`aymXx!^R0Y#~ANPovas~d&aSpsP5imBOrKglM`)gn^)p6s*YFpL6ooZweL5&KLb5>-{#xSCf<1h&)mCcLG6WRU5j6Wrnb_YSFof(A{StU-d4B=p&abP33U-Y%$KMLGilwqcY6J55o z1v~et0RszwbVzwQE0f0~|1-mKLof3dY8Vuk~WJ~zvk&f3mHtko&4aY z)EZ-LYa6+_sTW4TuaWsc#>aZbZD;c&0<)Q^P9Ygv8$KA zeo3P`)4T1_#$t6Tyi4-N^Z2liuApkl_9-t}9{qs$vZJYDVWN|G8HPgExN*j=qbDs4 zji^>+)ZXy&_C@xxKgI_J$!Uu5E(BJxCKNwwtzTu2awk&p!@xOwiu_7X?hjIM!xX_d z4~KthmzbOZF=qymx`vV);gmoDz+-Emm-Hc-wx#oxr-0r~*n`dsw*7{}3P%Z!DvUxx zrK-O0wmZ7FUp~udRQqyZR>zgHiMplH;^)EJRKp%{_u`!~=X>sJmN-S{TdbD|lmiBi z-Lu1msjr>txhkuJ5gWER+b_R9p~JDlXGM4xhJiMV>aFX zQVW&6Jwl$=ZVmofW1`Ij%95-?&*>uIni<1bH`&RZpaYtl(rWZVC?Yr9i0on7cmW{>Us*9N<)oaDbY*jfIJNbE+K zEw+*4aN~~g&XF^#@w%{AC^zZR`sCMAw(+NjQ$SgWylt>u4Wg_mP@t7pDezq_hHAq- zdh2pg#8P#?;2k}zI_#J6uN8K5xaR=1Y-dI9x(laI;EuS>ffR1=mSCYUZTBs|puz3& zoLW5yY5*B8QkwcHkvKYg{UVaNnjL?%8)%uV=8^LA5PHHdecN}I{ zTJWwsAz*eX=W1Q62+>s|@c>@RTstLGEb_N>JOO$W1@{;IlgoxLt9N!&EW2wXo#`YlDxl?FBVdWb})`NN33YkSzgR)ZYrJzeNC zN+B0BGK{_*f8^m*G>b5V_o&{loF%Rdh4PW{?TXbz`JK*_ekNuMG3@t8r>({RxbRjp^ zo3rD$>kd!EA+niW5a_^I3~N(^gY1p!y+}K&)NMniA1Z=f{X>o#*XP@YV7C?NUZjRG z_{vwDaQ{Q9J`vkq+=t&DyN=I(-rG{V$wEkISAJAS?%t{9R$iR(wH9QyC%;DJJ z`uXZGNbokKiF12hQy#CVslK6NUb(zfQuEbWO|?|HRC$HxBu_(ZZMo(C{rwk_1R^-9 z5j3M!?U(NigL`k#Z-2kfv=uD2bzme;+=;n&+u%I+pxouLS%vM}&qfc|O{$YzY=IOs zC8)~US!`kU47p``vZK?=h@JSk4SdR9mK_ufefYxAu9pWcmH`XFFfuX89aMdH?s`%|Bm*H#YoGCWK+u4MQ<_UuYOQO`1_}Mw#7M}j$>jY+nJA=<1ISDz0xoFtItf}68b1|XCGDVQ_-$u zi6d%@epEG?)eQz)&IFKRR_U#MutE|~h$CXm|32M9_CPrEy)X)obQ`DQg zLut2$Vwh+hdbh`t!Hpj5`ARQ7eI!P(pBNUZm~E?7?;j+_Jq&dc)wd=ES+lUo>}kdt z7E^3t{pdnVJ&RNl;?n&lb6cG~po{~H(`H`jM-pJNtk66vdfwy`9E{&pEQ&K~s!sno4 z!{bRK90v-t4D(Aa&Utj=detW1MP^Asg}h7$3`; zw;;fAO06({5zHbJP_w_dSABG({_czU*{W#LE~CWWHp2b{xmJ_iSG9^h))<>RI<|X~ zJ|~@GJ=G>Ii8v}BgMiaojkU!(6syeN*z3B4-_j6XEc|?Z`{JkpDtP96!sz&Oi$q-R z-5n88(GcofK>TRW+}mMTOy}E&dzo_Pj%-gAkqib10Y-T<`8jS1l*_fWl=xRG5Y4BV zy6xL|pC6YYSohkTAng^9<3UWCRmto?*7Adj)uDpjp&pgNwYzN`xAv?En4!fOuR5_> zI9-3eD0T8$TPBT5etu#0V6RlAJf-Q*L@j|Tboqx?cBYfUgc_e4_>CBnPESeg!vMMu z15~T*w_dK|cbH8H^u9SP?PkVW+`((6eSv7B3TDq=`$4C3(S&%XIrj|3=2%H>NYcv$C_Ygu>Uq~3*6E^xe%`?x z1+>jYqQJPlxa8^??l%HU&+FPlK~8RZ>K}}-%Fjob3$n=lMr<1AP#N5Ei<^BKSs1Ok z5=B7`OC9r;-q4T{R#cg4%9)KqHkD%RJGb>Q@?m1)*|#N8LC%e|j>(SA!|ffS^yP23 z^{}L=>_WMs5Bc9KH|(Sx-a1hmct7zub_aO}kF-H=TI!$t#?bE%$+GTEp_v`9U z1T=0CQV+|TxdDvwX1va;L0zsh8oM>Lt}XAE8usWa??g^(#Mr0IaxwQ+hAb2-X1=# ziY{j{6dFeLC%xt~X3fVJ!_D7g(0(46%!b)JOS@juo+=##{O-S~Sryg0yXR0RDv+ug z*}dK-M=w;L&5(mn%JNFfdh)(elBj6BIM(=Cs<@b7PxbHY(g=Qo*R}uXJf4~>P0mel zc&)BpOrEcXk&?8MbaAQz;&5U7_+p0_L_V{}f+4aw_Zt!pI?#n5^`uhaP)FYbAxB!Y z^XPRoAzj$~F#t)$BiAwF^?G|$SP3@KVG<9Ns>TW5b@S)@ z-7fPZg?$7p)i3*Q#6(+daqNrL2I#U}5P8U@asS;ZAfcAxRJsElV!m8QT_BbG7MR^2 zqo5dJ$*iyUQj$QtccX!L0#hZzt3nRB4&`CrGv|ejd#Qr0C~RI`-p|PL%bWNp$NI9S zP8wEb(^9*j*V8FOozjkU{K^yOP$n;5Z0m_0P z_d#dUVf(xwlAnJ`{6#z1<7j;m`yrl`r;f_}$*UHd>)Zu8QWs5p_~&=c0ug4mb4`4P z9+z(cIq4d?#_GeEqSq)}jeFixEE}q6w!<=0-8s(Cb?9Qh^%S40tk;PTp}S4=){6Ms zw3UDJdpbvz!>aw9`ixAZfv1xykNI_HtFz1HM8*nSqhEn3Tdy%bI45e=5L%&7q(njZ zt0W4#t;Jcm(WaSEdu0y8v4aM8wp3GEA!!CEa7EFQv}-3MBnT1vpSx`y9GJ4mAqd{W z>M|9p*P5v4i`YP{tZ&oL>FDUto&~^syVzw~%)5@Vu9#EEXam|#QFyMXv7Dk?=#-DK zL1SV~p1Kb9D%@GVHG#p(2thRg@+>D?q8}3i(zfK+(UqI>9ps;#bUxDXLN_HchB-_) z*g~d>?>5HO>I9b6VCv}u7+BZEMmgC^O!6h-jXb2sB|AW-d%i8Xyxq0_)xY!F9q~zlIk(1@cWdFb$>8pCL#YpJ#6gE-}=MYz2 zpFe$Tm8I(0XaqWGf7;~LdrFM3fs3u)eq%O_G^TW1(foZc{rP6Y>``&=6Z)m;%?NKV zYhZh3J7o&+-@j|@_R?MuS9DqxG{q4wLNeTej~RZ;`bx@%M0p+#gpx09Aw+Hois4og zo4s2)Nh}gtfeS*OxR>luv|6dP44DY;;73<0T6TZ%e;;O=_$B`(Z-dV_ptgc|;`eaK zV}%m_6tX97d$WF^YK0c{iDe00Mf-fdT!-(jM|rr|EWTROV9OCB9BKcx{!yOp6Nj>%E8Y2m)Gf6K-ye;(u0sI$M zk1UZ|6SD!vabAOCW;X#@efi;c@nP42Umt%XQ&#Lclnpcg8lFiM4S@E-tq!4ey7&+Y zFtz9nN6@OjD~B>~0zne&Wo#Ha6)!(gkyW(Q6A%zkOjZIC9*hfasSwomy|hZHWESqL zJ&MNXy?yIFQUwHw#w}k(Thkq_hn7OF_Luh@5}H5vBuqn>=}U4{S1ca#wGr#683BdP zmK+P}??GR*KN1Ym))N-D8Lx+T&G$A|J&R8mn|_JO8zpyPbVUpBj$9pxu41zfeLCE> zF*tR3L^Z=ZLbwYDTIg8{wr!6UtChtNuTyEne(xDh{Uas&WG_5U>6W5maD?M0I@PRr zQy=Be1OcPL164A6N>HQ^@gmId3}9+ae8XjQju+4Bn(N<=$@DRptiPlG&+({nS=2$R#Cvf zNV6Hr1c@OsNBtdX%}`kQ0DGL!r1RWG#;^kVn#KOPlL({q=g7<-0UnZrwYVYWR& zP)(KCb%*qCX@%0^0{*^!6m^v)+Kjt1>!*hUdnwHn$7Dw#JUXSkP`K_kW6G*OzEZ7& z<^)@db<$jDHcy)RS|vHJWU_Hym)0A9{spH1Ss3O)OM@8Hwn;*<#?a9QRxUy+SorDl zEg2vw2&?vSl#gYgzlyd;#zpSKA!Ih9ldpHAd}=hoiWtB1PkhOiMjF*#7pi&*Tiw4? z?QVz)AI5^0xFg#o7OHK@VWHZUe>+#Ro`#MyPX%{U#zGldG{m(8?Usy*tampE&-|mv z3vzidO0m}c1Gbo=7M|I8I^;d1s#pj6Gz9m2qouEv%T+leJcN5G$ANywX;d$h1U$`e zXe{bLWpkX#Kd*=>itUoDQ%iQ0|5WI3%OSHzE|;66D#qree`6BRf@mrE;#&VpN6>#| z{%v@PYt~GTCPlrNl|@KL;8+Q4AcBqF6Sp;+b6Cj-RZ~o~3Jw_DbCZC@D$9B3cK*Cu z8s)p0v@uA)X4i%OrO|LcsxAGESpK&>KAuBUsX^rwb-YA&}5D%#H;uB zlJZr)Bv)uMP~2;df~WbB;pZ5b?ceNZC~yBpz@xK?&iAcX7FNe^)X$&&=1NuJS7=l# z(sKLN%prkD_$5Oie^Z|XZHw%J;k9@lRN?@c#m z%(3C{cBH-g?HjsdoiXqbTxoB+;)4BQf4$podO}3j-pQti6pu(4oXAPyyNSo0i}~7u z(S~c7CQ9n+{mG--Vhdp#!Nj1~>crXjZ&`QkZaO5Eg?yab)BO=Ico|oMw(POM zED)6KAX*xc;cB1$&L!s2J3Kp{{QAN^pr;jca#KiLE8<|mDOw-qb4NQy(e2(_(9G;G zI8Y5SgMxxc`AV4PU?1!}VWt#)((b=A{evRGQ)R`82w9sVgN%%bT#;w`L9uT&5_4v< zfCfUmPk$H`GO%y;-27dJoja4}fCB8iH+gtSW)p?#Rr0w5r3W4X0j*_uk3%j@>ncBQ zcYV3&sY&)JmWq6h!rp%;s7A67|utC58ni+qrT&JgD!- z*<+gfDU%6Us!wkcJL$+nl?7Y|*U#gTQ=G@54L9`x!y%uF_w<;Cun>n zhrHdad~0e<+skp3H%|EAV+IXRXGGD6bh zJ07faoZo?&U0W;LHyWRJ#OmtRsz{m3+gksoIYxBCo5M zK@ooLr8>vx*49=zE@r3F>)YFYfGbj}KwxW_oKWxsHY597okaTN!QNhc;qx%D?%l$7 zsBFoB^#ERGXGyM}T1+)=D^Zq-bKVgBnaQ1;C+KrBTl~>M?izc5)Nn~hQ8#W9r07~K z*CdIIa7k_T$7^`TgoIxDL7%y*bPg%EqX=$Ea4jkM=ev`;tb-(Jtd|qir>6z=F>NOO9~VE!KS64f8qTBh;5*^w8xpGG_(b!A;Rm14Iqg(mm^t)Ld?Epo z2z;QBg{(bqZ?w4!4pHYMQ!bvgCtGt3vsy+%7;Ay&HW;d4+p)Cbi2$jKg6I#tCcQMt zBOlR=Lvs2PJGcl@eMX@<^p<>=z;5$=bUL@lNYLN5Ui!nq9fnJZhk+J8Ea?RX2jMX9 z2{uCFy~p~?mO8g1<5TVbn2`BKzW8+4Uz>YPsG^{ukrhY*H2uG*pMRBsR9jbvhzS{4 zSQMnN@d0@?pqL7zRJoE}A?Jy?HktVk`5QP=e3bXNz8|6YD~0%bnLF*&12L|J_tay{ zB)gBsA1~IE!ojX*@?#I9$nY;b0o2#dISWOIU?XJ|-*}}DI$eC+4VdBN-jd-38{hCm zP~2DmeO`54#mvM?U;=781-l5o*YTkcyitTh6>S}c=TpyGa(@z3?CmUc+sMCsB&nzP z*?vk8=Jm+~{p4NKH z($vx*HGrKvnQW=}b2g2VRUVSslcE3Gt`f-xLBRb)_gf#_x`0BPkzanG`m!=BOg-xSgF_T-`U3w0bXc)q&LN55TO^!G%IJAaQ|`O z$m2q;WJv$1qyDd^*Y`(Z=y35=P?4g0@*~{d5+#W;>wSMFRbYC>3o?_#4Y?rg1|P8-CU%ryf1bLnOeBuH^(&Dsv$8 zBN`7=wlc-H@ml5j$r&NMjpyBrd~=KS3u6^F1ijChA_vHt`WOBg58?YUFKDoy;?ShrFPdJQr(QLy<&OiWBvLIOot&+(r>bZrxn^W!ds zI}4Rs&Axqo-_-XtTb!utG|IO(pmnKs#afM(EYM2lzq?8Y69i@WQ}DxDNNS?pno-&) zXMkb6jq>>3_7=&NxtrRT){u#4_s;l>>wCSdlkxx`_}~!9n=j(~=6VqMYKb#iBaN)k zu1Fr`Ak5J^Deo6!p2}E*Rrqk9jqXyRWuRE)x+@;Mus)i|XexcM_w2>mgvLHeL>*So zAEH8sc;A99UoXS0RHTFzV0Cp~8_|IqI^)3rYo1sWu77$+kryvoIR5R=={uno(42zV zY(YX42jytV?UaX;7>J0Wy){0-h(fh?sm7`EqEfiJ#Ln&q=JbA-Nx_!aF6sN#i0--H zgH5S=Jdq8}{7sO|3){WrJ-J2^Wnya%3?!bAg0`UBu0dTk z;c9j&JdrA zTheIjAK|y0W7$bzrqBsvMXf6qwgJM!gVn%zd%X^|o5ZCDFXoHc%#cD&hSM?jJQ=86Zajs3Yj>Ay6x7hrAhZmKBu}0DO$x&h zODGg8kONroItDQCf18TA-JoD6hu4#<{rKT*u_6|)b6FEbC#Vfz@Cczc?nDMmGX${K zk%ttC1sx$6H%_@IX`{j3-rijZd&i@e%{9k0srx9mZZeCp!|tfG&Ex3!%7_^7TO{|E zG}<1p#Ce;!fc;10=8^(>tIVz!n`Ona5rl#fQBf$&K?EDmG7}K+!9~r%j#&Ko=*w>0 zzkLrdjGe(Qz=0K*{{G_d+?+C%Q(s@79xx@-(*V#+9GEeyXmkJ=nTF*bm-|%N&T2A- zqD%hQlKy(oP9e`0{Qn;wG$y6*VPR2nQ3>_mJ?zgyzgE+EU1a|0RO}Q&0E_8^~8Gn(}P$%I#Tgt%C}_ zXZV_L0<=`v>`Q@)oC)^XYTKkIyOmt~wd`NGbFb?c2hHh!GcBx`)gT9No`1;;l$TAd z&==l#cs%0ODqU>v?AY&*VS}D;7kXM&ub%H#Y}IOvf}>t+hGCS-R5ZkC$Kz*w)Y$CX zqZs55Jke-u(h?NEvtd)i@bid?b6V@-_EK~(=EWbz&OdnQZo$#Z&E~d8d7*Ozgaj8i z?i}wkufD1~y;PxplszooDEjg*tZ}bru8K9+O65Wq_*47UkK-BEg3WnwckjtGS4y(L zs;?UVKrUAl@%{Vvzch6Tft#PBth^k!xrqP1Tw;HeW-9f{$|QH*90uD@FV@Q)NIZyU z)W9JkgHJA~XnaC@l&IA2-vRFgOCT}^eU-_7H2Y1orMsTQY$$a_uhk?cF1gnr$h{left)cNvGMa&lSOGg&YVvTtQwqCVdcXi zEa>oDI0FsyN;6(#o|-j03ujL5jd9Ot?JJYpiAsB0yJ@Y_1RZH`lWU)7&K2c7ARyJtlQ^&X!+is2YyXfGZ&k5*rnhjEx|GL8FI_1&~wk1?*hv>Qd43a;Cn` z%-dmTp;A{6SbqQRD%v3)O%lTEYy(~prp$|{V5WJqRfq|DWQ&0$_M9*m zP&duD<@-mU-Wz3_VJ|h?igYH#l)Smct?t71m9Zu3ph^|FM)>VnNO7a#b`ZS3_i2dI zH|h^AwMYVGgsS}m6L7I;PjuID#NIII2Q=`ON>@0vz{!T-Eu#~#M80xRHG;Jf@BeC>QD_OV8Yex1pxm%QoX0WkPo%oYY0KA@k1K#n*q1l(Qo;RJW>Q} z#s?9~F;c(4;;oiY@eT=eV%Tq)%9SWlueQ3#0x-8Ev) zA9Q)%dskr49|EiG>*?uffB2giAnf}0KoG0qVn#{|&68!ViU(-N){3x7t@Bf zuGij_8Kebj$gLIqFyy=ui#(Np69S2V2o`MUJh_C6V4o?!R)6&W4$ zd7(--{`uuEDFtjt!^qJnZaU%~&J~fL_Cnyy^M}PJ`zq$d{OigcvAN<)NP&y3d;Hbk z+mikA%iIHcK;0*6b>}yF&5xjHI4Y&KK;VZ7g3^5{=uw>`8V;|b6M_G5uH+86sY3vi zqjT&a1wbtL9?}w62petZ==ix{ogLF+n4-qe+?@Ko+)RPAAE3QHyr~Rg>R*7LD^vdr zkayLLnOa)X=0S=pS^ofJ-AR0kt+pEibvA1nTCCgu zD~zA~Qaq^JWT($8^3V4bfB6p*dAL1+PB0FjP6i((0azr%kj=b9^5SH7M=U3SWjB?@ zIHYRgVCP-)9|Q>4^D+ZS&N;X|I#xydCak@eBEts{b`HEblGrU1l34l=b~52PUDrT& zcX#Rs;rdB@>AC{CIuo`!lY=L_wKX-L6#o?%b_Ib`mtFd3*x6M@5%4%sg zhvIJ@kt8e|d`|2EBp!ba{|fhSS_JTm^u?hP=KZZNSw8qwIGo@-u|U8oDg=ZuSLGE; z?Z@X*($eUP1-o`ujIrmykll7l^z^#04dkDPzzoa&JsT_x%+JEY!X^_omou$cp9+BF z;z9!g#+Ya9d{WSw5!@M-{Xl+O(iuA#X_cOi7eQYMaVY(>Nqxi*%#vyk1Njedk+^-H zRfIs3!AAQ40R?t)>S~gW65s@-rBAmz#%vYUVC*Z`g%Orw!n_Y?PnRx5(GG$BW;QVU-ans!N zDB0j`(x>R?D;S!3xOOoolt!A-enU<>$_oK3(Ex)v-oK&eYy#UzAUr6(p4=U@ssiK; zrpQ{|&sESuDX)CsNs-{`n4DJG&L3$&$Oo0i4hc&lxVI?%N{wa?YsOL$_wU*?mf`xa{c9_;y;jL|Ymlvg8$6 zx~8DO{;$ab3Lao-(q@o2B)xuUB~UwxU?Rus*!ju+PT@Mh)`A8AYs#|g$*Z%)>UYe{ z%so8k#@>Y*%!iZ z{+lWz5w3pEEL{VT!*7xld_n10?(55Up4eBH+8q$os#+jQi04DV2}{5Qu?)mRQeEfC zWnE{Gi>a`{%Ocnf;M3l@Cm$a?KlyR>otD@2$qmR#4mdfd@6EY7m-cQ!ZBiW;b0xu+ zwmRp3Zd>f?7J5?0&Ncmu^)iNS<^jI(Ff5-SWJ0*L5D0ZLuxEBL zB}SNSx7A}{12egH8g^1tS}gkYZ?=IJ~AfA`ji7y^*0jeHSGYtIDot_ z%WNv=F9ZUTO!&&Z1JL=XnSvq=^q-{n>X+!(Zl5QO3Psijw)R@9tggIa zN;iWk&~GCJk|e$OVy*X7F#J9?^u)>2U*1CBGS{i{gI1g2G@4h0F&|;R+H)7qdT868 zC0o%?e^QK!>9*^XL=;`MV1T?zalF%n#$9wcCl#i(8G(kpNO7o&OUrDA6saUs4GrR? zao$OB7(ykIINLQe`ugEFE|z3{pXt)th~vDLYCtl#hZPqW7e4;{oFlKA6CGd3`v7b%?Bxfcb6KR&lmMcfyr5LXQqoDXRUrp{wYLT>!l??h z-&Kv%<(IABZ}37nWYt46hAGa`R8>{=AMydb8KQZ!=3T4!ue^}~7;CG~i4@#mN}y70 zq|cX;}Ea5#6NX(fOA@>zVf-9xqJ8%bd0sb|FO;61AO z6{`y>cKQWI3v-^q=o`!mxd9k$U{4*6{veRJQbcmiZzoCcnBVEoB0T9n!Ph#Lk-Sel zd3_Rso#K53L+5vWlb&O;-F5D_7Q{cdH5njh!_z($t_~xKF)<_^7DhvZqoXgbw!E30 zot;$^irbe536pqOyry-3R^NI*J)ky#4uX+!F#>A?t|X*e@}QfVY;aB5?>`TCx8Qq3 z4sHfRa6PZXON_Iu-%~R^7f%8TC7Ixy59qqC6 z=BL3v%@F8U?gm58IWUAP|MmV#HAk{{=RM>Vwd28<;|FeFYvN~7!~_Oq!58RDg9N-f zPUgwNw<9Kx$sy$h-MZiFwOfMJ1PeF^7*`?yDil2k=Q*In6bH^>Im}tm*So!nne4`Y z_Yx8>LT?99#3W5Sy0~Dz{Rg_Qc|i_r{|Yg%Bw@%hK)_Q?Zqw9NuK%Nmxa+RPn94}n z8-|HF0QEf38MzTOD>OPi&ty2+$0>;!`K=h_2D8N+^wwNcbi+ zEXnSv`40+(EzDQ7mgXKuwmG8A~wBa1Ozi1b|5A@s*Msi3`mq_%l6?_f1PE@R+;c_sD20>6TCOY1Jh`W|T zvzA9bkLP&Jg|>}3+cN&M`tZePf&QaN6A|yps^w>UyPfZ2YW5_9ERLQ`LdKBBeBmC_ z=KBISHw+teCNB-$vd0?c^-^85eEeC1NviyzZ;KLKk$(DS_yMri&8+i+lY;d`S@@Lr#Z3dZKB7;#m zyUc?bE!xwE8vWpv@7GE2KqqDBH2cN+E;94Obqf`_X95E`LXgY1lv8YtS~>J?XnvY5 z`V(ROXdG7_ldOnB^Mu{x!}s5~n}}A1=fU6*23Rj>8@+3v zRPF?N%ACtD3Emn!WCAftUy4JsE{ig-uB0o|MhkD{G@#nc=_Qo9%Zk^*QK3Td8UX3r zK~SgXxkJ7Os-Yj;GqmAZgzP26KjLt{kw*fXQKu%??dHwkNa5r(FZN}aj)Vz0WQBCRR>#c2>~|*OfpW5{+be>d3wp--@e*S#p>h2z5Df-XYd%ydCjlGLGOYpw@9iK85XsM%Yayb6v_27oAGF014 zjEu$9FmZ8V9wdw5?W@Bcr+C#scM@3AFfbjp|CKBv0K25QzR8Ujqj)+pKdSu6?@wPU zmm7(?f-8NXj3Bbm#ya9}cUNzOb8RJu-g#mQA|aLXBB^KvGo^t=p%D{-z7V&d%Y$gW zKZ_1nf=`&xo`i%*xk7suT^w3XwH$@ zP&s7FGLGmkG&OD-;U8*n>L1MX$ia8XQeR-urZ`)J>1pwvFqr5t8+yg9_?6p<4fdfu&oCWK);#7OyP)$$;h1BVXk!kg>Fk~(58H$J{Z&diAG!RM&F z*{dR0F{hMH!j*lgu0q2B!@GocYKy%hAnE^uytj;sqg@+x--G}O?k*v?yIUYgg1ft< zA-G%9gkZtlJ!o)u8g1O&2^JtY1h-S<-TV9Yw`b)$>%*GTT&a~Dq0mf1D4*WZ(3O0B$@-P8N-WblFfcygcR z*j9%?Lx#bFbNB1ZkdFApbw^4(<(Sk67F8>h9rt9Sc^`#jF*i&W!Vxj`CL|&aUb%JC zV3~87E~y(r7%io;f><2`?(Arw(W>Qr)#b4~$tcmadeDz3>GvZ72GGR@Ql!BO1LE;z~rA`bwt%SE*pIl>JtB2;!nVG);M{QaKOYt@OtKHdg&8Xq>8qlYnI)g6&*b%(Y_#}{ zSuJ1=Ej!|-vQj6l$ZpbcJ_khz@m!SE(gk1S5Vy-4o7M@3nokcLvsgCRSN5TxcKOno zgB^x^8qK*IT-RSX{+5`r=Uti{A7~)l3P?w6@)jmW#vkr@^wjWEDPwOcL`|x_OR(0h z^mvLz{|lH8a9v$rC!-j^6vw*})$e|51-_Miibb~x;1WHLk6A^`4C>YNq&vAny;fsz zGWf}1zc-=pFQjZi!5YfUP*)UDg>=B1;<53(M{dQJBKG&-@fL-9(s?u+oxy@OmRyB9 zQS4_rLipT9lLMu1t{(1FVmd*t)C^O`;ysGrrsS+#uh9P2j<9ct&}}uYs_k zwj}6M=Css>X`++t^?P5%!(B4H?g$$b#yS_v22G8*b6Io5S@E7Wyo>K>FkxboiG~ra zi+8j`g!3<3t@sAKAlCXESXA6_VX>`_jJ8ZmW3vTR`{9^85oP?J`L|=X$hBlGy589f z_PAH7H_yHmE`GhAT#eQirWHnLSk~KwDu$Gf+3`GFSK}>xmt@lPdXrDme16@a-SzSG z4!*WVW4lY5d9K_}6~=+Q}sn|eim<*@TX z`ymlCfgOaW6PjS?_O@GueI8hty$zuINJ=WtPh z`ygA67THNA^F;-L$9!2yjhz`+!$Pe-sgi9FH%{D6axHrQ%Rc^lw2o=U7!gc7m{O@& z_8r~((K{$9)mNbn0i~j zdpYc+ZH{9QRu}F^@W4=34e*lJcq@NPR8>l9QE)uZ!JvLsuD0uYb(VvBtnc;;|Nfo* z!5sX4MlH&!cNIEsY`2_@u_i(~@f=9qxeCBx`$RvmXGYH-qK3u0Xp}aw9HK<%bD_#( zD`d29q1@xgj#}Nf7~j^6^9kk97HL|4Cd&keX5uq-oNmb;6XV-Q>==+Ao(CCCpQkB) z$VIg21`P`j;`ff@9rHtex}O2);@|SfibhHMm9g0`{8P5M0KMuO#lO z> z0Yvo6_igM}l3Bo=nB)1u^6RvO_5Apl%_HA+73yuV+*I4}u{&~yc^hm#5Z52g%G-Fr z%4^4%>8ldi_B0C*X+f5$`Q(p2K$KZN*%M%#>XVXtuI$T^WAA8~Tlppa4{c24XWt)b z{=I5vIa@$vM~j$J+O^Y+6|pj_Tb!6YP#uwsF$Jh0)4VJ8sGvOBmtbeVX{I;KbAXws9?AH`{0k)wUA!%m59-Fkk zNXpbg9&Imjg_}|n-NVWZ!^UpT>dj4msW86Pf?1(MrFV;*vvP~%MeP)@Yw`%gUq&#k zeC*O|60hzH6AqGuYc{&;0Lil@1S2zoj5Ql9jl-sUI*t-Clbq+Ab-oLknpRqNYTaSS z*$80ppmyx_t}PCu%UhY%xK(^q!VkONBf_fTAH^*82t$?)%DVOMdXG`RB~NsRH?%cE z__(M>H{tQ(CN5!{bYOfOb7f_PK*-&G##yd6l3e=z`>#pwB9E|`w!y&4J^$$mF003U zX4>h=@=84TPD2CUcoRJapFtBn>_|6(u`h(!d8rk2OPv<&#d+bhi_z>wjwTbRVjeOr zw&)sBWh+)&tb@IZViyqHm164Lj77&Fhc>hvx^ZZ{))LEi?8sYDPeoeKYuh!DS#L7E zbkFAq8ym>`B3nH|aOthTv6`Z%jn^BrEYlS&s!(=^!WG zF8*|c!`7l8@r>zsp!JrncIDgn(~%xB#ibw<{d(LP$l`N2-2~~$6!Wp3Visuf9I?*@ z{j>g@-yAvbWK>3EI9D_n_Gb!>sTlx7NAZ?E7zUBX_2Nyp9l45@ny@^644!*kc@qnZwudw zP59Bn`3NP@YK#l;(#DSkEo%*vS{t;Y2e04AuGxM{P8QJOOAPe7ep8VEwbN(1u$}Yk zT`y8fS~wMkE_#rsl<=B_zls0!%~Kn+G48qG>;28hlpL``Q|X(M8nV;gRIbT@VAny7*HpYsaah-HCq2JPr#P_6B>hDsJhEOrjWqLrWaA48>vP?AM@=w+PjW+=cVd zJif;_p}%E&9aQ#j%TYxmRI^RqS}4OGq~Hh6ISB=EJx(@UOZdYvvfX9l2+Cmukfvfdp&1 ziwKRzd>PRYij7SyBjq?vu@)n0+aiu{18EyE5M4KOr}}cV6tEgOY?N4Qi1+DUjw4*w~-s9q6Gb$zAFyNWYwK!pl|q=w#if>}{Y_E2Gk8WSHh zw?Z>7`S|z%Z~8(GX+FvItF}_YFk@4JjWbbclc}V-9g!P2IlT$^ zX=+@>{jRq_7O2afEM6J&aPb(+*YG8H=v{~F$9X~SuXnKq3lGLCK`v@ByX5ImkiRcd z9#|(!O!>Z}NM`2UEKB~Rs1*E9o|(8MHoDnPQkU@3(DzWXkPRfS{3Wmthb6GgxBvO4 z$%}rPOc=YUi09PW_z8A{?bph|I>}o{LU)a=gg<7B$|dq}NPhz#Zf!u{1Qr?DmfV9< z3>KC@%WleIfjvtP0bjAKTh9+$>Hy+zMeg`0$Mm*f3j^XXVqT|WNzEL`y^%s1z1JjT zS$4crQB!fh$Rr4Le;S4mse6mOeP=ALBsk?+UcCz;7jA#om0gh5Iew;C%~BN>P8%3u znR}G~jUiCMuj!lK%;+cky)^cho@Dx)Izu}28sdd%W5!dxg)ICC0I0!pkaBc*$nrui zkfj3(h4MM?MD^Fm3pE0Jh#Noi@>3#++qch7PX{I+Hlq~Fde5t_uju>Ui}hql$g;{E zsqqz?#L0VwxgaYVMXpQD#K`=Ud>(Es(jrAkULv8UMwN1m2mfLdh|~+cpq$B?-u9|s zpi6JIIwP1%`0jhMyc`z)v&uXup_;~ce$DdgY}5G;TFA^GNQC5_{Phk*>)I+2S;brG zUhKJ}thX96{As;n&uEi#(7-V-6#6w?kUY5bh#~G^oWy0{KmIulwi|h_j3*%*-M%%b zWG)&TSr?~V6OmNPGe;YKqh3ot8&iin>bJuz_4=VrBbrKHU~P?{Nb$SPaX*6NWiqg^ zm0%!Kv)^0^9LPw!Oh_k@3w*gneT%|?hd0qigdA6@L2?spe$GKgJLn@|u5Rr7>F1}k zXz|2|Y7nJCsq~4iS>5YYbJgfi%V-D0+Q|6M7Ub(g!B%>7Ngrhn~^rQJ*XVED}`4}Wz*U120@sHPov zA>++7icegaD#d$*#22X<8Qea_KSyl*KQtkK*GubIYV=3OXL&go?=CeloDuNTz&{B> zc6c}Xh>O@AjW(6H(pxA(J*?Gi0s*YgCIfMik&#Igku<|@*dyU2rzdYHDPeJ#tLrI1 zEGV@hu=WEHG1SvbYmh>ElN#c}zOfNPMTFujEjg01(%H14@>_~>4tOJ^s%(F_|9M#k>3BW1n;~BP+cS)9w3XN}QA$b; z>SJZi{E7|y%H71v%H7_;LPqAG;w3igiCZnvGK7J_U!#n0zA+lsB6Gou^_Z*P zZ1~tBT3?wH^8z?X#Wh$u>hHmoj}c3hUzaSaGk_gfUAlv2+wk=~dFe$(Fx6^w7PX|q z;byPts_#pO9yq*lP}#buqOL|;7pLzQ=zP|0B#M=3xuUq0V_WG*ZK_b_nyR!P^hBf)0N(=ZH zEg*WW!OLAl>Uv5lSbT(T%K+PDk5_z=T#*==1SQ$B*^=eo#!tX{#dgM-C!}mT`#y$`6-GlWC-!*tV$=T89|*mb z-#IMnDw*y893nkf+(BXDh{Dr0une(@)rub#!XaVt@*TK3XfeNQnNn$z7sp2|MnkskSWC z*H;pAFvZgOCO&NvGb2e#w(Jq5uFOIkrf;m~%892!h^!mOxE;c<)tFb3vs+!jz=*@T z7cxM-FQ?P8BA`$7#&3hFv{{F0HC#$U38!9SBf9_P08@s@S1}2DCaj4q00OKX9V?c zsFyDZxse1HLl_+akLUzTj1^XBchHBJBNVp-`S)Teo9Mv~HGY01d&pH=UCC*&GW{9XU#x{Be_?Yo0I4x?DFV%-J!sdsG~7?rEs+Z9S;#Y zrJO{%x}1VuQ=kA)_D}16MV6EKNgU=#X=!OuNlA2r;JdfRLjS3;(AsX+Wqev?a-J_o z&oxrQ6_1QI*kM+X@1CSyh8+#UWsLZ>2=8%#`T06{$^iOYvwT zPCIcNlII#<$c5xCT`GC9 zVrlzSHZshdUE?sePObXA0#|x=Z;x0$T;T3Rv>DddLSsuuU-J_#d5(2K=~5okJ~JuT z+JS7jnc6tXfm*hueufoM*f4T*j0^}jM1uiMTXS|HPHI~!ke-~s2`5d!OVJ^+>3HZ8 zzpLJ?V8=+g8EzqA+GoMKh`erC@Wnl)EJ_J+ZBTtNQd7YwS{3&*WH!(BXA+^jh*J75 z!h;^4ua*i{t_=n@;&DuUDvP4N$E&^@*7I!;5Bd$KjC=Z7bJliehWRqjs(Ha=1T;py zesgZW@!8%Q!Njk84G8aBawNr^_~>*PMlFf(<5%tL?)SwlE6-&QWqJ{nz@u=YqN1@K z#aEW8S6M|Lhf}nD6P8Zr7k=5L-c4x&3qoKrGQ-{M?hW3}!G{(s`j<>f#cqOk<=Mh0 zt=-q+mQ$sOZvmpWT%UU~uN0A}vA?PiBy;w%H{Yf++un3M#o>F%L#&XwXbQ)k8Vc#6 z+Pm_)^@;2jt+g_cdIv6UwFuK;L~KfkbQL2v`}-DGLh1Tj5Hn)rQX-BLrvvS=FV(TJ z(Y8)s4Tl&D$A zpNKkd))hhW7w_9tvRb8a?TA$u(l55%KO8hFL&es5P>;2}y|mcy?Cn-xwpJnRZb20- z+4*iiMrLbE= z=zG$m3p&_+PgsrfCI>v1v|nm!{SOt57iC{$>kTWN<{#O1S`RHWnJ+Qbng6Q?BfuW| z`tZqv(NmmOQb?M<_S(ZQ?47LKTorP&>lSohi(O^t8l_0w-l2-afC}dedKk(T=o2u* zXDuTJN>H3u^lDwj#4 zhovpx=QuJ6k5A}(K2yrhRdfqWTEYwuNiDaqQm~wbtDt7$+&c=Ybbx)f6X~{GrI@>N z@c0TR?qrBg`c1*GSA;O)g<+X8yaOHuS!g#UV&#du5Lc7)(sC2J^cQNMx30KK&Cw-f zZ=?F!&oK@gP{{)ah-!#%{YNccKZQXJ#6dxOJu(lBgBTV+3)pc3g>NJ1fmlM8uKSm zSeTNKM&w8(YrwHvTd;D`ow{~8qNAh7f<9dyEC$V#>V{fL-F;ng9MJ@fMkLpcv5OCm zXLYB9bmv#VIFBj+y_m^_*Z0@%p2%J^%aSP#MFm}5UAGnMy~9_Ww;VRaXaj|=hWNx^ z4iYCDDDfNuW&~QBjE+(w9Zn-h2-a=ED{<2_#z#;#A&7?Zk5c9ALPhK;C-w)2%6`l_ z_FuFpmOPk-U{9QE`W`=x__D}|19?QxH=D^^k!#IUL_?@5<|+Pvoyx4&XU@)VCuSMfR7a<9$6XJ@G0#H@cUxqYYBY3M&+?09pxM7qQw zHBYx#$By7MOD|;}gy&84tuxDMbXkH zqaWRPRT=4D=v7{&Tgeb1HhRIvIoU%%Cof&(M)pWY0G8GOA7 z^7Vz0`Kt;^RCoEg%phBRyh7nk&AUbuc#TahJR>9dU0(TchWnpCM$J{na)uV~5d;JT zIu{p-fn;$bDG|F@CmSzcy~1?bInn>Jl{Pvu!XhmKk=Sq9S5&yZvOSU!fiN<{!pkI9 zJ}|9*&eOAIxVXybfg(<96A)_IuZrrLzUWvUoOJN+Wxs={XT&cv)FTjZWs@z*g)j<~ zwWnCBJQ$G-W^dQ96fzD~et5rPwuIW=@IB~cHvR?G9u+1=&=!=K0(5@@3#;a1oW1_c zEStR;$5N;&K_p_o8d;FsgTxadkE@9C*b-8bc|X0}7QhWsse;Zj1gTtE-@H!KIiq;k z*-YW4rt%qXs<}ic>uc<-8o+ev_4#s=IrH{p|IEj!^6gq+>GAccuLbQO<^(E1oXuf` z=SJT;iy(My0$lhm!t=ugF zI9VLao#63escgHh=lsT;{UAc+?4-vtDQBX3WJ6Yg;KscXqo3>d1UE1NFxw*%%Rq|- zXlop4Llfd(#;j+YDjcFTpXGd8cJ=Xt+tl!{HYQbTqKGo#i14)-mOBRIxy8b8Y?Mj? zp$5PWdW$X3fNA#Oz<+@<(D=dk)s^uKA^}GkvdB%DD0Kb`4q`ZuJv=ygmBC_!S!snx zbjiKaObpqg?dlCVW$}SND^jsI((j$Sm)r`|AQd*IEE7pZqW)Vlp%3re-{5tE!s6de zh(`=$$nCJl@{b>sL9YWd3y{H_E+5gR)S@J7TtIe`U1Tt$5iZ+i3gUQ-O zn&gB&ynic`?`%C&@U@Y(3(<8b?0>wzMJ)-Q{_n1DQB;}hqHm0Z#FKdFd^Aamyz{iQ zxaP0)nfF&Pbz3i{E;!y>g(NlqqU%vy_Kz^!@7dpQKq8>lK-E+vmzxtIMAks>D70%f zZl60O5O873-%lFC)mrsS4ARTw9`>7Byjp+vfqLq*Po=hl%SD~VSuaWdr7;?0XD>-H zVBzkBN`NRRa<#olf3{+Gj#SShiJpF=KUF=TiJ;+1muW==VFTic4%h3_+dCO(v~bv; z#97Z=lv?9Fh@XlRYzR8+T0hfkiE0Ij^e6_u42ZL-i5!DV;T4<(6zR#R|-XcnG zNZDMyB7rh%ILz_Fa+e>CZQB;?qg~}U*zxSYPm`Zr9`;;@n;V>!{HqMYe2k+9$@cu# z3si?}+o^PB1iLjs4 zn0>Uf8=mvvI~+6rDBfU6M>&)T64dsEbj=FeRU49~)T$`%+6djPf>HPnh^(6#`|?ri%a=~e#Jl33mR<}Zz>wnD?iTut9T*i86-XEK8UMp z)Is*+F}Nm9t#46{{7XFkJA0Q=LoGfK&rha>&!)4iYe(fioexwSM^ol{?Ku5TD@AXZ z*=)PamrzB*NsAxb(~>z)2JHqbujiQk^$j9fQ<~ndRQwn50vnfOv5a>FEs_Np0G#@U2a}(4l57LkOPX%{o}(1q=DyXLIC_8{67v74Flb zd?XkFxA?s4w-M$io;Cyu1Wk*-g-J;CNPZmJCp`~WNa3QFc%TwsU1`vzCo*s{1SpI+Yc^c|H%2L41w`d*F%{|8Me z`2R-;%V#_1K>hzEgry4r?w+&)$!=;YS=u@}NYAGJ2Gl_u?zrS~C+FhaRx|#wv9U~N z^9u_|mXKeJ)NSY@=#f4>dcnPReFZAx<`I8GfBb4UR8?L7wd7K| zKRAa(@)IJxv9@M>Dt7>6Lmz8$v2qyxNppIljl4A<8K1sx&i_I~Ss{=9+r&rid$Vb$dP~Zfz zfK^_+U9I+$x*&(2k5kk@%f3AR;TSEup&}hKhBP?Ktu8FcY$CAT>YPa=%K#U zcX^5cLSh6UB&z>FNHX&+_vb3tQ||ta1cQE)jGqtr`@x9&L!SImC{7(N)}xo|*5d(z zL;tNskyGi{36QQpOYU#>H|FurtGBcB($3Bfg-nRgN+2HyI-;bak{HVFb$gZIB=Ru- zWB>XJv+@Z^+}dbvWxT#BEh*{#XH)z13jY7&4eG7kTk0%+kgC;3E1O@;Tw z7Fc=!N|bUNsvGi|CPw`VrjEY;KKns` zE*KLN7t-B8yH2EE?cQda`Z|9l;u-2AqDF7$bSt;a%>7#c$La|BxMnGyYhS9z zN*=DXh60Js*-+pV6%_%8%(34jth@W=3+Ae@Hspf|@oNtwCRf@}DsvHAF`5Vjg$bmfGyRo0U z9>ji8GsECL`C0l#tZFq*jDCip(j#+Fy13y0@tzw3MTbn40Av6rYu|mq9`h%kg zAQC$_H`k2$@^BgZ>G&8!AdmTTfYHx(tOmlv-UB_6p@;C(IYJ(D5(fJEFrN6$zUEMK z?I(@=Qt|5A{EX{wX{aNJ(Cdhr)kT)N;Rr4Q1+6ZNr5pP!)FZ zK--rzA^ZOQKh8x7fQ)jxc?AWb^uRS;OA?u#h5LZBY|x(0RRq31HIZQj8lpc<)e#yr z^_!ZCO3afYCC2dTgbn3tyJ{E!yiYYwe1-s-0M6{^r-AEJ_thdO{_Y(t&Ol8q?(f6D z`1!$begON_`367_x>;JR*9brzkJ*z>7C@<1xaS#@kCqzY!lfvmoCxgoK*uQrm?iW$ z!#^0{cfoVvj7Qaf>i&wU1qC-YAe3lVBLGb;V4c~Nr_-%tl8I*skd7ovp8AC)e!3b< zs|d{f5gmpt`eUdIRSO~-r1};C5SWu)@_k3UBE=Grd)Was32>FrC}sfGU^1kte%*TK zU6rTYM*yrYH!`$8yfO`M1Y*}1r-1kTn}8J$1K4318NW=Hlxq|dR;95{Fh?$7rpo?wkVlA zU?^ML*`c##M@Oo9MX^abTWKmy!e{?hZ?ZoBWR&`;7h~nHoS>4+6w%k4T!eZHdS2|) zmtmeBW`q9xp|{>l7D#~j_307Yymm}=olZML#r`G%w0Yuvf-GK2!*1z7EsLMNssG%h zk|$#l66J;s5444+5rbrAiwg@O|M(|z)puzek6B5mh-r;R1uv?$7^x z>-!j$i>FOOFiZgd5KyBVRrpbb?GixcVx0a0WCB28do62q0GUs)6~@orx|$lCS>VSg zfFU)a3Bcdf2S#asPhtEqOlYR`)BbV)Ub16975^U%G-vixeLkHOcvk;E{KF>o4o3{; z?GK7|)!KEum^SCW_Yp9^ppz_Bg_rd2cOZa!2arAh)%2O>hATmF$1g#B$;fW+Mg2NV zDL0C82_Qh)>51*R4QPK-{md5oy}Ok(RzO|UFyA%Pj|vOL6{C>mRkIPsQ038sPj?gt z@?~w_1|N1{)c|x2+q%i*t3mQmR;ckp&5LfBs2IXBW1nv4Qy&i8KYu)BFWT(Ku|~e@ z*Vv7QOoVf4`@x3wrLZJ{i*=q1v=iTgHJ|O^D<>2Z1}@3aq7l1%S5^GT_Q{8aGYO$j zJfWk@l4nalMkL83Nb9D7|*WW8xCW5Lm6d0gtXI}!0nU_V4-9#rsMEhkyiVvd3yUD|mEGQ1wsnq#9jf%6{7Eu)Mn>*#N@bhSXIi?o1Bvt`lw;gpP1u;*xq zP@+Euzenk9uy~o+LK3W`K7O-R>$bv21j}$palyiM72ReIDA*e}KQniuxE*hy@qozp#;ARc5KD8I*oWqooNf7u^?NWt^Sg$Tk5_Z}ulJv}Bn z!-nYcp~oWd`liBDLOb7ZPT|Kw+K|E<0$PNYyN0hUM+gZN`QcJeR|w7~?X>vpun>41 z{7&LPZ>HR^AZ;?BG>kV+&Z7O&gbf1qtT*>yobl0L_uZzG7)=MrzsYV{$z+?OtZaXb zt}Tx>I)UydZ1@sZ*}9TP|6J2k1v!t+A_nhc?v&LOUr|(5Cf`ZY;6HSK!MbqVr)4R;%6I@BM-_FHB}3qDwy@H1MwW z%(`&Hf^9(xwV|8WP3Jrr{R+!B6a&AtBl%9Ij^nF2N6EW*r(sSp?gW`Uryc7??~H7M zbyhPw@(5ASFMouoUKME&520fj)*rWP_LgnT;%VHRYsMSiQV&o#6VU=K^{)<a5DQC$-Bafi_7PrRDOYMc|(K)0~reu3OIdiN;cYctIA091{)gDbbKsDrrhRR;BkO)}tc^GU@p^JQj~ za`qDM#Nhp~2x@KFT`wcG5~uX&ebKc*Z|Iy~9;ud9zsM9dVs(Cj-k=DyB3|E^Z*{5a zw2gS_`a}Ps7hcN{zfbbLV@!F44_eu%C7JHlknl7EYr!8Ii2arU+R z6osp2_~V0A5GxmmfnPegJe&JdZ7aOgc`3ADfcnA%Y$K-M7=4!Pi*S5&wk zi69gSBju56LwN+#dl@(~CG$R*JYEKGo<~>D`_cH>Zb}VG4PA0HYHH7<4;hT$7~|;&W|Ga=Y87ybm%|{OEHo+=np1 zxfCMxXD~~%9C@GLWR^vE*kir55~=UNtUYvW?SWbq8{yM6Q$4C(5zP{oCn)HsmgdD# z?KVCnsz2IGYeWC5V9_A*yg|IRJS0LH|6^*{-B%kk`fKeEQRyDuM2l~4Qp~+wfz*`W zjk|uGYV-%>*r7q~5R=px`q8I&7a}67_Ug2Q1DHJpl#*%_L@(?=-rtCDk90MNKFD(=nq;as4n<+FO6!Q;C*~5%D-1SjC!} z>@!vvkc0c6v$=5}+6U*{jPQwV#nV`ev_3W~h?+)6uw+02Nev}c#@pwr^)(3%_qzTS+JZQrQZa2^ zt}=g^Jdf9tk05whVOfs&?a6~F-d8Qu+C5^cuZ2Y?YYR@0oYN6*Gxi=8T@T@H^yt7b zP@Ji5CCw8ntJDxZ*yNjer(&UmOWJ*iAxq5fXC0{CtOgQ9zfL2)+yd<@3LfopLM;aH zo$AaMdCl7RZ34@n(o#d^O1}xC*`mTL6I`LYrzV$J;g?bI|5+9ekyPo z3vNUZHlDq7jngYWUkqS0(WzT4U>HDKXZb3gHhj-A`8n3O6gkn9F`ftYX8_C$sk~s{ zH`<(Pxo+BeemrQ7>mjeEYrkM?x+hb$vFe@|S^Ci=`Pu#uxFl`be*~nv_~DRvsj>9O z`Lo+KFZnqYjr>K!B1etesO7oxhW=GJQ+5ARwQ2#~8Eu{5jtU-mpO1C~wxbg4e;(fL zv7U7E?Q=(;kC%2mRWJ6qjcJK9avV;lxBF*Av^wi}J<3W2o25$PN5}P-$}k15^;q)0 z@)DToHbj~$njPp229DY>??QWVF$9>*O*1g@w{y)mMn+7%8byn#KO@)4x>sVm2JRvI zsjzyEj-IqQJ#^mFl!8{>I5K2=>L{DlP6H<+y*o4p1PoEE7Izg-aDlgc6L`z*ZisTd z+gU8_;%i0QQw~P-F|I?T5BzXrtg1tp7k1rv-tN$*cN?TEX1K)H<%3db1wHnHtd;xs z5{5rk);BIj{Ooa#wf!lVeYpQCXdQh^45Be2qP?2x5(hR&I4`KUX_~LGzI30KRUi3u z#A4D)-6811`nXmCh3ODi``tHu+XTrs&0yy#Igpb9QxuR=BNwc}Chyj#=U- zpU8e7>Ab;PK)l3E-*(FRB0K3Y=h{zB3_S||HG;<`yv4+k6KSfDd2wYXo9iHxrg{2l zTS#y?eeq@NUBv*5a4222!G7&agTHP($`LQ?(t^{%k_9jLE_D++bia zZ1T03M1HeM7xV%(%Hxco9x=L!$`f+(>eJi_V?^PZGHbsz20y>M%Ufi`jntW?rq8n9 z1-h@V*FMga=m1fEo;l01Vef5K?S$&?dGTA94jfll$qaiV=HHFAXy}5g8Q1d)z%@s0 z@E-)Sl~uGjSQI2})`WFByK>UZSj}Vabn@$>WMOqXrW& zR!XeuQC{Y*w;vHT(4&w6^l-ooD2npaMY`|7?9q>fL-?x_Q**u|s(MEr(heW*h?7*Z z7U!`CKRBIB_Fqn_l7+R2kVnejcyt?tG>$g1h$wJKGiHS^_(u;DWh+zOPvp+pHPFo0 zvrvBkR$Eqtf3{_|%y83fu%L~S!)Go~EA=(vELqJ+-b>k>n6ltYK-ZO>SXJ~k3YV30 z0^=KE5Ltkn<$W}a7#}nBx~{G0`Fwt2O;oMRBTt>!M#iw>X18<#!n^o7@M_@ z8h(QHT~xTuODBSyFWYjb`;O45vwGISu!1DNnLy{Pd?RZ`ftN zV_6`J;e)UDv)Rg9FV>`t(`7B_$)PN6(&FP@xAE9u~;?!vh5f(AWqlq`4Cf{>m zGfD!&sV@d?`my_|f=3_vO;Y|*Wxb^FdCQL(++%67OUw5;-h6b=gev(>J6;q{IRCROaXbC<(PQ=8z7Xf|m-fAsv+bKvYCtyqnQS5=(? z-QDFN(019;u&J(?`6rb>4%qGo-S~kY^Qim^wR3-+XC{)TX2&Ix;gXRaT96_;0Dnbp}&7hx5u7U$*$_(^_RqTD=* z+g{|?6cp$ZJFWirh|CW!>90;7|5V7Zy!DFXEA2@5V!g40*>tUoHZk{dFF`f09{BMI z6{5eat_7wOO_ZC3brRWIlu6go9zv^kwGp53mS0#{GnYa8hzrxEUof@r1S4XUmIwcp z!}6u4%g6U?(zBBj1AWPPXa7~`z^<7IWlNr`kYMm%~zul>4OVw#Ao zBB6%COiv_Ob>7p;yt_yIhJ?h_647fBWT@eyvoUOlO%(F-gZ5kf?-WF`#f!rVi2dc; zXft?qbrY`zU!6Ggyw$xNxG$`2aAN)|vU&s=n8e~LUr3^2C#WlTb?K;J476{>78rRB zaAFWM9+>pDPmGX_ifrW{&4ag>oZIl?g_Qbx+*xYT@9o!R0vl+eN&J#%O;Uihw7$sO zV8Sx76ea%@1B~$shjIKD2n=(A0$(lBQvF}-ifsp|Q`THM4&DfZHQm@DTdsyLnHTE) z*&=idHodk={-l$mHkE&&*K{9}KKKwMcH|=6{7KTl9-1MEDh=xAoLy=is+{JXi;)fZ z3YquCrTmTei9!(RIBx{$d3m6MDnYDZUF>aBzLDMa_*rZ}ReOppHXzb{~TO?nZ4+4plmy zpeP@}yND6TuNiMx$N!?1j&N5V)Pyn+7q#@9%XhB!kPn~4>F8jFMo#xIaT_#Ta~Wwe zxniGf{+jtW`H-GVvY_;vp6Am9o4uVcc{z`rJuO=dnDB$yBGVhh>4w0*I8 z?MoU>s#ACAugZe?(sU7pbO8rz_Vo2CBZ&JYYIv0Mfo6_MR}and=$S&3A+p+O;foFF zBG|jO*oFkF*7qodwi?-29xcGCq*-ETM!J^hHaH)ptB9FNOFiNs!g|lcx5PN#a%E$B zSu4o;nm`J+;H=#*0&Vj@uwYFtAYz!Qw1%tmc;{AEk>@N}uT^XQ*ifFi!Fek*pYNn5 zEBsN@&8d%PmaB_{fw*pP!2R&NU_X0&OS|^Bi`1~r;(TR7k#$?9ZU(PnT?HFnNog#$ zm6h>Lb(jm~MZum&+A8N3lVBxeTsy&8$qg?}wS=UW9htG%CHKI0~6<&D`wbe(#m3KQX>ymdVgtW$pJ9 zRr|6tVIGo*yLb_KyI(~0p!5*al5e8FaMzsy=_a*Y5WZ-?9M<069*VWe3V(<_Fmq)7 zi62;Nd(KEq)0*2BmC)K;g1tw@?-N!lc-zt9XKgR!wmMEzSzjM%N{kybl5NvJlj#^f z+{9>ZVB}M1V+l^L1hsx#HBzlfw)Xs~deLS~eRUlHo1L}}j;yP4xEr-3uM83?rb(tv z)h%iMHHvxEc)JA=GNIUG7Di+M)$PtM&M)?_w?1^JyY`kmvo7r<+u!lN8BL-?{SA@p@1JFcCC#NDW9MF6w1db>rb91= zS@GiM9ca&^omc#(RHu z`Mn+bkIXjOUc@dVdl#HvtTnKE9u_79bRn{J9cmwrQ#C&prs+=)*Hm?A`JN=Tw&hY< zu?iP3NY??k*ZSRMC(Y%1<9-eP+x`1fF(;DdvyG~f1v7G#;wopO^z;Mj_+=C;?JeYl zUmxSQ;{}9(Z8E`*A|!-z!SJq7=^S}@#Mtj|e-mgmsJ)8}*0uM%1r;XSRN$!xqqARL z`W6l>XUo5&>=4MlPdm`!sXF19ma~}k#c#kJ4F$;Dq_*c9tgB#p3*we<@@c#J)3Tg2ZZ5F5Xx5lE1{pt*ZV;!r8 zuXdltUq`9q*TD$ci_% zZPBU7pqYW`IMv|tXYao1!tAy{O(_;$3`|x~Dx!-F85}97S)3;&@levQYnzN#~ z!1W64w*%ykQj<7k7Qa$2?e{_6uF8mm1NV>%P76yqK9w z3i|crSqZX&A+oemtThm*wK)qd)g9Z2wmk!M^?GXT>}{h)Gw^eb>fykPBjUFda!yn%oe1*eDUNX1rT5>8{*1 zDven>|GlhB?AAS1fjuU{B)@AMEdhIkC$6(i`&5eb73o~*_l3xO+%Z=3W}#3Y7~%FL&;1@RovI2i`tPcen=Z(RjFJgB zaOk+Ek8UV@o{af<6YJ0#(DL1U24ns>$1X?NAfgX1&o&y^NE_|lz!OKGAsw;H`X}&J zE8U$u5p*|#;rGV2_}kMT!OwW$h2s3h?%}92W^yG1yL1T0k1@fmGQXLzMQC_IkSb<# z*LX4{4yQ$CnU40_`j+>vUg?J*%zw^ufUk7NN;uCxGhP<_jybe9)>q2YZn!fmnmi{% z4D-JtKpDTz3HiXQBOsRjHwU4FV`TW4uThWgUO{rr%iygQfNXo7EE6X4}MhGv% zyyRZO27HrU`d?|4(M1!jJGKj4HaSJ$knrvz{v;7M7KiF!g&d_5Ei9fLX0}>Ieu!C! z$|lwM?*EPZGsaa?NeRvD*ihsW`DI0W@s+lSs+4I20>0%FFIRQH9kUK% zz|Tz48*S9razJCmiQ3sQ16pAeFZco)Y9%G5nDlhO`JWB|U-S;Ai_PZB32}l@8_-1Z zerFh00w~@nm#E17=5UVyO0vCB^qu>+%a?RJAE!C?N+1)7SWtFl05%q23;x?R((f~P zAps;8={x|VtZz(+aRn%?|I|N5b+$;6+zYhg3dkO!G@xAC{PxPf6!sE`W&7Fw{NxY7 zM0j0KWyeg=$p9IHzkOwjp@n|w#oi2L9;4OU+pB-FSob2h6XUlD2;TsI!HtQoi1?fs z;uwV;sWE&1Bpc&8;0FoMYy*&_ob*yRQf}`kxc&vrNFsUJJ)X5cZ!R}E?oZ~!L(T_m zqyAe^`j-q}03&}2u3#q@lJXCXXMiul&r)O2ev_4bL(QXuJ$M<*!z{c?;dRuSh0hRvuA z*h;ta-PfObR-UD6!9Vi(5}~d7`zeaIUar!(f=_v0%0h#}u0Ll_H-VFP6lc4s5hDJ( zvsV%RxcZey)IMJMnbN@TQ@t_x%6J+&%TX4<0aSZi?r&Y(f7NEse|O>oQ=;faThV^t za%G7UVy%6znX~(e21i*HkGo`jaa$C@`lFVb4B}!W1s#Z7Ru!lRk$>jQao`D1hJiGZ zKLdK3W(%UNbQP&R7_Zq&Gxz8l^t`~dl-5hjYy+5*m)0Jb8iHClFg@>||JayHQ?98j z_MEacF&Zdi*cTPEXH zr`Yk6Dc6HPF==-d0iM%oupq+^JBbT=jh;3B5W?q-zMBv=eq(}lm26H8mxHMKkJrE6 z017hazwZD7P~1HhNch|lo3RPQRt`3sKjPEF@B}ao*=<<&;XYKLR_q&DqAs z$7ZXaTcr6!SQUL(UoO_hSrhSBGyyU|7Xgy!r&hm zq%xrgOjC;2oY6CV-DgP?$@Y|T%Vd+QovFeZEEXd>1i&vN5rNNL!uuB|sBglSfVRZY z`a617x(~k^&e+j^gxv{O3nz~|GB*@OiR%zKQ2yVo617scQ(^YLVj^-h z;l$S(Y-t&3+2dRsmR>2Iz}&qoysV{q^HBXP&Y<(@#&0;hh=+dF4`(Q50KZ6kGBDef z6#n#bj4J^+w1j6o;}-8=W2J?s#Jr6Eac!YN7<}57zFNGc?;iGv{q=gk+3BQSmq?ndCekgDit+dbyjwym>*2<$e9zSlgIY zc!)X{)NF)^UH~czlJK8Ir1M#6WQ*b4J4!6MoQ@KIP1b2!vrxTQzrW28qnhX9l*sRh z$Q)I6K9Z`(~>};KueQ7<4~c(bAP>X**HyI;O6{kJxuCfI!rL%00LKN01jE zDK4~L{U1qQ=q>t1c~nOsotK?4P0?Q~>`q9_b@%UqfX;Bv9{hhs^27qj6ye@K;2OYW z=ty`VlxK27J*T-|aF3vk+T<(dOKH}A(}BP;z+LSarWQit9)Kz`u))VB#=KAcGKoU{GT)P*RFhcf`%9 z=1HRdUDlI_PFKE9w0gQK7i;P)i|F*8&~2E_SCWoq2{G9KdUg}J6459)IMn5_bBaX@ zq|@cvnp)%zfVozz6?%R^CzpuYR*LWecNe1v5N}dt)&bk#f&)a%mBcEU$&5Cn6`O1A zQKWa*qUi#jD$9V1Rt|s(-+TnEL3cBEk>yVv^nA(OaHiPvR3Tag6hA~8ft|=YpgPjU z8w)7sgyvib-zGh$1P3T4F6HG7t!Ag(HMwg zz$h#%Jm2gM?}1=V2UM4A--3IS>;tpcogGlZccC^b&4D9nTmZ%tQA$dxs~r#!2nHnO z&zmzc2>;Q{c#+RTy$4iKOzXXOD2THpawqhb8|_7i$d~}^9W|aaHW%_C<9o>`V8PuL zUg-tudw06|_zQ#$CLyS{eNk4sy1HuT?2I{!c&D-g1XqY{dusOI-iq=NqSs%9 zNaWD^9qL3Xj*162rcv25l6_o04O(19RFEF2!0`GP;dj}0{&@@7OZ9TV2!nvI-d+xJ z9|flTm*h-Tim|JFcIEF5sC-hQ4%)3Sb=`;%!BivPHjHLWr06!wciaX)ytPhgY&N29 z#;XS>ak%*d%%w7qz-L zSVqj9gAgWf%X3S8TZ%U!=VhBQ;oao|gQ(pk9q_CxWa=$Ah5Zn&Pw!Y1OzXU{0?U*{NfeF|6sP}zA$&R`$Fh`iQ@j-{BM8lsJq@bey7vnb0KiD;tK~^ zY#&7(&1rzVE2C*1V2nojcuv^o%R=DYa`e^ez5u7~fvirpjU%zqcU@JM!%J|Dp>G1> zg2D{7&61Jw{bW1mL7EtM+>Nk(#YsxE)TU*8VOh^YkE!_OJIXtoEw6RFKVtTV`{UU~ zT1^g7vzip!Y}*?<#=0$YgQm8&(SXujh~+z*7hAZEe4)lVHKa~O*X7gehtwHb5Fr+Rn&(%v*$m+kN)u3ZWO3>A+;fpW#ZpLmq|3 zdOMXLdEMiusa0$6v=AtiPXr)>!@Tb}NE=<~2HjdG#mUxx8a_6kqKb$Z)_~bL-yas) zFqNY$NXg4f{Kg^oyZYV&++>H8Cr3x&fCMQGEpb*ba==wJljE*uyi+Jw2RNYyhfg@B zqWIoW{on{|c$bvwv!$@nrIwm_UH+&0o=g0Q0AqzzKE3M-e2WKSiWe^%j_^iMI1zXn zEr((Pqg8c6Obm%r=qR9CV|We-EzVt&ThX!T>Rg@=XE3#!lXL33#rN&s?~JKcn=s7M zr-qT1@w1%T`T3-lC&lYuE&sR{bRi944CYiRFw>2AbwzVs=V z$c_?x#?M=H@oskYpSN$j4bBGgLQM)7Rncc6Mg$tM3E;dvqoGLWY)A*^m;DDm9l4d- zo_6NODD>c^c`9V2dcwReU7o0t!&D0?(L+C{W;Cs0;n;+j9lb}&^5I7zMk0tB^Huva zf=wj&D;D0GzT8STgvL-pZ=gdM=jr~X0f z_}^pLoqRg~`&Pd*en48l;3bF{M}@9LO-3ikSZPOUSuD}8gOzTkR&%R4%fwCy(out>+ zw9&mF(w-KEcXsM+g@PqiX1iRpA8;QQOE|wZ5Cy>2U>V*t->5Y>keWmsg%`|dZZ16{ zN36VcmPd6kDDfd)K3i@yLGy6G=|uJ|?c`m1Y^2cAZT#dmWyfy5jIR6C0%18@-kDP@ zI{w3v0Ew#Q9Ur!@qT;ux({^hYB7r>xVpUI?kUS)gEXqI%AB3v?t-I&5I7?CRwLEKU6DCYgG>ER^v0OnKKUFgcWOl_#gxCa8h8h3y0R^ z2j} zWy07rt5KQ9#^VeNOszSh&>E2lO0k64d&=s0E}gi5ZT9SqZ*-luQiV+jcK5xq+2B!; zLIN9S=A-<&A!X4YJ8w^*FJGuOt-WG6|YjoCX<7_7H%Epxs{ zf>#mim23=&RDe{;DPzRpX5JMmOLuOzxL?@;r+|3a)_IC=^KUA69hk&=l@V2!HP*wo zRiJ;-zA9>~?||^wYV_dL)5tJe{c=UEC`=bn!56=JoSFX};E+P=z5g(4NVy z)6$IqVGxRNs)(1~$N}wqCGjQ%m&h-c_zC@p%erGVyey4>>cjVA336i(ZW9I1+jUQG z+-AO}fz2ra$%@R=PIbW*|8QB$w6%DISpj7`miHib;dS+Z8%A8invEgOnN`k~5c25#7A1l(ELiN!hX$CTJzdNRm^4SGRLOj0Om%r_H` zirW>vJZsK~oZ*Dgp~fNqmDcJHObb)d-U6N~0!L0=VG2+SLYZXDUlHo81;8Y z5xF@z0Sx}YLAgHJVdQ2tH3&9V80`c`O^90ho2C!WiBhBDO;dNqTOQT>*H-1Wq5K##Hu-PVK zV?$3-cW>-XXIob9o+f-9k3~W|-B}cC^!ZZ=cTa^@Xu|NxRh@kZ-Xi|ziuXNsb)-w) zXNSh5C`|nqzGq2(4k;@r!`j&4Qd^W;@iS~F)Fu``NvUQFs*vogn^gLOO{G`sOcO=SM+4c+ zx+I+ZEJv^!?uLEE*=MNLpt0A4Gko!l|Gbu6Hn@0cG^;B`w#{F`WFvBd{G-=nd3!Lt z=~Qg2vlzEHC6uoUByzDqNylK%Nn?jZ`?A8d`4J}xr2&7As;#hWWdaI;SAjyzSNZBuW?3CG4?OWX_xR}F+y_D3Ju)4ReX zeU(_}XQ525Ql=i#@VIoIkf0OpcT%P1v5oAAC~Id5CBw$HyJr}8z%uB=Zc-3zT5uT< zPP*cseoTgupCu~T01ug-{vEcWMA$_G?{wFMbS~!fbIGFmeu$p*i}}GUR^xYMs1AUt zST094sR)HiN+-{g8q|AIrm}%CnJd>GuZrX$wmJxlt#-r7%&P5X{-SvrvRWabH<>53 zY%P+UdW11)F`N=asvGb@JqqqtK8**PjkUMgzxYFQQMS#iM;LgKui&3qCfpty(4@ zaD$C~_c}>&0*81!3LoIM&jwfYw8!Us_6Qbo*|jwu=$c8kdN7W~tJ>2sGouFBI`V-f zOdYsMstH7ENmP1I?+BdYHCk0fhpL7=tBg~njUl-%{NBC}bc&jZ6$dZVDWxHj?fzn2 z{PYzQ2n3CkNoz;WY866fGsRoWsj~Y;(Wan#@3HX%PQUD`Zc?!`_d+-Ir6wB8JmMk9!q; z2-2V!lk9+_?UR8YBVsnE#@pDLz2A~D?vCJYb_O(v{dcJ3@ZC}${6n&y;J{XRa}bwO zjCTA|BqH%M!B!ZSn;Ft&N>%DBF%4y}bV8OJ3He-4jcnGowz84B(EF~(j{=5yMGLGV zIVa&^hJoD+1wzW~ny8BgSyVc$sgDE{9pzhReRA^?jy$4;@GG7!sfv9lT_MlhzAUJF z=q;AVgtv{(R^84D8coCRg z&F;PI_E7v8%G`LC&VYx{h$gK%{mTJ}yS|D?ob?YN5uz|c&QYlH<2O1-vt`|l_FLPx0YItMy;~Yzm(##ezIARS;NM z_fwqMLhUI0*x09gW#VBm;KS}v5NvpgG)837B_v1)$3#iGqlLPr|J5mIZJopoPG-Z* zVRXH)CMhp_-GPs*D9>puckK49C+ZUCGn^m~Rq$3!pqju!S??1#g*=XoYDC7lX$T$M zAsw6D`6vC|)oQlq4w={BEYA0wZ`hm%PaGmiE5C`umtD|wu~XxtIhn(wGyAGrB#eq# z-ra}pMPntoct>t)9=}gGClWw>1@i--FnCJY_eU}vR;A4SNFqimzarEZ(W&w zEiq!aYl(Ue2kcjKNq~7yxVa>}>rwAAxCPe*uRMdpSl$MwD~@#I&%2Jr&#<+DKef|b z@k|>HO$P|q3l6PnZFj$;&~YXznP0XICtHW=giA+WV5=f8io6-t4Vq4cqDBdQulnf7 zsnbkLPVSG7X3c+*v~SPYpNI;+72K*~nCN+JRO$OI(k=_=@SGfvOgd@kT5su6)SGc` zf#`u`oF3)xkAzb})A}NH{Rl629+Zk#ybBeEMM6S+^|#`vHuiH@)!TM<7Pc^LU*7MJ z&njpi*si|4F#@0ZT7oyvVOdZZbSzcQ=D9)tH~Sg7P!dw!(QlceV2Q-iAfT{B(N z*jwGqC~YAfR)|SPTDOo^NV%93eR8x6(iL#yBhH{>BGTyB#(WZzGGlC}-Dg~ZeWkao zv*Nvay4lPusTo3)6+#i$!{j0EQDu_fV${1P8>!ys>Whr?8XU3%`st_3fo))Wf?2^g zNs+l?kj=N(SiF!<)PkOHJXXj#)%Z`-XT|{#l?%$8L!NlJU89&pKIaXaXO>JAV#>pU z7!J9r+m%;Y5jVk8vF>l6I0z?22O0FtZJb0@Rc0TPjRKqUo(H(?1-|p)hPTQZT55BS3^1H_{_V*7B%xcW4 z?d;m;GaI9i#3&x_)$w-)&p(6`nt3eqoMUoovrGH#`$!xf-m7`m8-tW*3Mm197jKDs z#_>o9JZK$XQC76SQ8-6AbsKMf+y7lf*}IcCM0q!0E+HY;;rG00$EDHlAi za?t}jVSMnZb?5fpO>|nx@lS)>G?Tr(GM}_Uu4(TIxK|TDAiBh9a^=CE%fEUHm8Mev zuo9?kJVH%U4ZYXHhPp9CM2p-jKkpM%oNe%}^AeY!&0VdsLhPG?4@tV)HmMz_pzPMFzkp^TYmHWsSpPw=s_ z2S~caoFVm}8ydKq@;`S?AkvoGU^B0Y1>XjxMAt#O55zBC;cf5WCbEASOsSrfjjN+- zfbTgLfDN%jZFAISx%F;-g`A%J=&NOK&%*Vl`5Qn%y4j1Z3v9!2>opCU7Qg6oxHL%& zvQlGox^&%VGxb*S_>Z66G%%akp>%PD#iz{rwSWgj5|IqwboEXDV$L0Ngz;npL){Yp z+WrGlIDS{J@4LNZiMwSA470*pan(Sc={DN7>>mPDKHrX=!)a~Q_$z3t1%@kVOZ9+>_qBpMcx_TF=;Tzs3#?3CC_}ij|XYy!#d@ zPIQhTpv){~l#2-lrW~}>5q3*%zsBH`dklf*iqA-@)C@t=VK%L7O_O;L-uW4v#acRq z{d4~9rTB29e7sJYGB!50DJz6#t+2nBN0Pa01FG4TKP;)*Rw_&#v=T?Y5Vk z=WF-CPHDcxz()(E6A-SceVYFq)rFSlp^TN$RgP#`Y2r-=D3ydt7nC`GdR!&h%g;2D zo96)B#w85(qVro%8uDB*VV%ru`38J(uHdCEovAQK9yvtyZj2D+Y=d$tmnqonR*pOgQd_M%npw@%5)Nq6cuIX z)A#IM#aSGbacS316g&9ibLj>OE1Uwt5 zx16u8-FJ)uT^*)XfltWLh@JBnh8G6&{gS92l}2U$ER{^{(ReiJEG~VL{N(9HKPQ#x zqgO1F<3$djgqZkp`_j>fggf=N-sPL3c;G@uNMqmKFTvtF)Sg_jBLsg5l_PG67L(c2 z>q^dN7RXyHh@(fvz(WwtIl|dRI1HX!!}ISH-k4WY0Ijm`)X1)w7&;*ggA5Kf!0X0I z%}!6qWql}>Izo$)2_t<2PZS7v@H&;QJu&gSM46qI`EVv&aT3!pRck9hfh=>^H9urY zlu@tqyFiKKwbAD>M)%HIT&bcY%2IX%Y%Z~xnNdxSd#MP%CR9&{E8n?xhf~1$_`YL( zC@%&LzK<}`+MTEo&zi451$~3J%A0x*70$@S=G?F5#L9C<)U^!$0nMkcdC+@YYTqqs zV0e7?7l&7^A++Fi1qgQG`0&ymKoI3l(Y|_3YwwniB=!qkUy}2U0SK~crZ7O}?W?(v z(gZ60sqQKz;g13<;7qnpycYT4Ow%({0w2pYy{;FCs56DvIzQuZTf>PU91vNh<40pH ztIK6_vr-d)T|A|_!=Pf@HsF5Z(shL*W;@CrdBz>e$wm2P%vvs!0CBbFB|r?>wN^dp ziYEwn#`qA0)W8NZ*lOoA@J(bs)|!QTniA^Ka6%4aw}PTd-Pb+%lw>Y90en z96}F|hmXXa|`E8)+_ zheI|fWaJ)jb0#%6`$%3bZ(uhY#1Otn=VQ>1rg0$ytnXYYOnqX6UoKNrn9R$QE2IA23I^6liZCL_8!O$wG%jz9+P2cmAv5y!VG&>FPP5 z53e`oePMeqG&nEEv6q#}jTykgGeG{;u5)0bGbr0dm5YSmh`O(p4a_^3p;KMooF*lZ zO9y{P^H&%McI?{j%b3WCg`raR5WT>3Q1&KwaJeZ&P8v*-&nrOJxhjG<1(+3aytEckU}2vquhpV}tUl#9T53^?_f`^XEhN zNdyUNg?!4mc?fT^yhq$6bU6pb?yAHrL4A#G=N=6rEX&0@l*au+D|CljAz_Oj2C2)` z$82ivE(qo53jXQmHAn>Pl9at+?aGLyh)9Yfl2(Gl?f*+tJ3^t1zS{D7+O6-6p_H9!4Hl(PfHIh z+14x578ZT9zUb3vA}g@2N!W6o)$>~u8=aL6grc~qDi3tNYK zJsFBAe8zF|yk7Mh6U2h&HU37VFjOMa8$~-x)J|GWujfZl7kT8@O&F7+pX7{b(c&Zg z&rx~t#MyiPw5z|0@7&hk4hyPiId=522YKbVY^>+_R2oWS^dY~SqY=95W*eDKc0R@s zge__YCoHEoe1d&ALGg`bnKDajVFJH+TnO4nEX6;JXmGtsp>Y2W=u13c28EWb^8I4z zwS`eJfq!ihxosw7!&mOjgVEgOhH%UaDRgBEbEw{LcNqU_3hDDN2^36}u(45mUB}Uw zluoT;G2{Fw&t-li+-jtEIgVu-`5NiZMOpX1 z9VzQ1PLK|u_vNg|Pl8H9L?NGRqJ@*8%i(aXymBg!*2&6QG}5+OtA2{{v)MDXU&l;Al!NOG5rDlt@JImh^5JWO3G@DO_rQyjgbOZaZKitG&) z6?`;%FzfbnGx9I_C+HoQav0o6hCeY2Gz0NzWev-%v8IGi_X4Tq6*yBU?R>Tf0Cy z{;^6lM^l=oJ^~=a;}TA-O4TXJ-T{MEF4Um)W}~##u2Z3p z?-rQfu%?~jpv8Y8#;SbnqRqdNLKFq2J##9KXvlM{EGRCz!LYtl7#GA`J?845+L^sL zno7IWO}W=c&RUvb*W#gU;`O=7lF43EKIePG*_L=GLo>&=${inj>TX+k2Ud4SgCGr3~uw^f;Fu&6}&$@l_F2!mxN0IWoJ%606i~qhI6%{UY93x5l zyI_?Q&-EssPP-c0f`aj9wSr)=h0MNglWKs)Er4MXrPbqRvzk93CQ)YC2pc0==WGnB z5@0slI;}ZUR2^9jBieJ2YZ?RS{WklFeYgGWF7E(}uUa#!7(!Ik*ZIbl8>mXesnuu* z_?!q!_15SBh?qUGx|;Rj>DO5omN^U?8{4VIdlAW@UZ@3f@rX|VSMNDYG+{B>`)i7% zA$=R0D1a)^5fmvf3S4+%Vq#L$(75kga?}#=ISEixhkO@LfwbRBt!hc(*dBm>Hh-v zNs|8;SDE>nznH}r>|Nx4F(u~3{)6uZUS~)957k+W_+NPP3s!GZ^1ri${fn6d3i!Wd b0(R#+VFGGvrEM>L1$?B$-oGms(ewL16F^bZ literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/pipeline_experts.png b/model/train/yoco_moe/sources/images/pipeline_experts.png new file mode 100644 index 0000000000000000000000000000000000000000..687688e8a5df81c0aef4948652febe4f8963fed2 GIT binary patch literal 32611 zcmeFYbyQqmmn}*Hf#4S0A-Dy13lQAhgS)#YAy{w^?(XgoTnl%1cPL!$A;0f?xBGSX zxcB}4YQU&5s*dbad#}0Hnse@8d08<;I9xag2na+8aS=rbh__}C5N~?lzXd*#b6g_; z-eB#-H5?%zkh)+0-^9@(;Q=3_I*F<~DcPDhx#~L@Lnzsp+vz(x8S7w;EJHvLLr91S zDZ8Z{Eqe-rZeC!{62^_G@R#?hNFQ2_u(r@y` zoSoU;KtKo%wTzD=0^{0~L`ebOm<8>CC+LUhm~Y+6B{zfj(vZ zHE@g_CB>ThbllG&^QVK)h#!71azAYdc>A~h$pgB(wLFKD_vev;V}l|+xyVU zd;ZJ~6%1G?G}qq86qvg?DXBn$&#vFstau&^bkL=TO<$trC-%O#RE$ao$j*NNsa#?D zM?~8e&@DG~{>a=*%G9D7l!ANkXsJBDZcm0CEo4B~J-=JmQJ-jknM3r&;-DGj1xF1m z+IU724XsX`yyy9Ok`2QkW=_^p5NCK4u6Y+ohqF?gmqYrXS{6LZY`>AHymaRU}m?mG6EM(aKNFiw6cjWJ~ss^Sm?##J8BnriZ zDYccafUNSsk9%xnc_JhH>xY~fWd%9P(>~WhT13dt4V~rdkb0Mq(I?t@jXy1PIWA66 zpmJ|wY>Wj+RF2m%P42^1)Q(UVG|JT&-&Ubxih8ww65J%S-Au+?x$!w@aoDjlCZ$mu z_Zs9nXw4iRsGfyoG>t!as6|GJOGZX7|B=ve6Z5XwKnbDFRNM9fXRu|beW%*r)U`LD z%;J`bI+V{hV{Y)!9Y16`M+jry>xqkS^S-VC+~a%4Nu;6Nt}&uuYHFZ1>nrCL?71te z;b?l4(u3|i$c4qr0GmhD>SRxMyjP^Vg5bl}y1n(U-K8IUjtSu}UP`{>yQgO8s8QQL8mnxw zAEImniH932e(a-Waa(^c4%XCPdPH{(ZEJO~Z*}fU2G%Q&upLd>r`=A)*ZzX z0J}>4y8ThV-GDOOT; z6BGgT!A+L|i`{a4;f>Cu%sD$`gAoWTqQm|#%LdhW^r`j(r&qr#D0Si7UEC@8N@tai zBL?s<4^zIW`9CG&ga4f;RF70jg zRCJPaET&a4ppf?Gd41Y8jDQ{*mw+Q{3f&`2n1sobbyrswoR8YP(Od_y_Io$F!h{5A zHjS~@lzL~(3Rz7sNH89?ZmaX91C7Ecy|(pEgJJl%kym9Bt#u=Pg$`+j5!XCi5#I zD^^i`xVAeIb8jb>ysC4jL#A(j?auL4wg$J{9N|bXkTjxU6*QfPp#zc_-fR;G<`w-W-Q zESqZHJ3aXjxiS&vVhaRR(cB7+Ok2AIk4$n~=^RN7(G@3#^hu>0VV_h1z&p*5kMXiV0Odu#^I}+Wpn4~ ztVm@G$?MCVu(Fmw&i`o>a?IAYqa-OJveb=se}fq^{mH4|V;HDfy>VDIxh7E$`mxe% z2?3p4vh6AL(w59C4+oDYG5%xKyhzTMU@%|u#I10?SUN&n{rR$A3(rqUKF@ZY1H+#7 zcJF&jdHXvh((!r>o}br#Ys$PT)xnstJv1XhQdk1`s<^&X72S0^Ir82@?cV9O%0o#*KD*ztl- z?Q9S}?JHzNj#wSP3c9kCb-h0)Ak2jS3*QYk|LbNLh}AD;{9*aYRH`%ilW? z*3Lb|PUgBti>0#4_1`(JYJkWs=?>sd9+@AK(|9I`ymc>IwW{s5v76@wK>d~}$mM*_ zf$maw3>{IHB|WlEJEDYqha1(ap-^z|N46H%jj9K~1<|~7_B;ud^2tA*r{x_spNJ-l z>Bh9ZK#gN)wr^vnlGz+LYfN5grE&=T6T<0{F=u>ZWDXAizz$Fh_~W$-1Z7itV3+LM zf3h)vjT6{JA7J?=-M?;>{2RNla2lhk=dr-()p3p9B$9YB!?*?IZrks`b6_Bn<#|%D>+`aQWaTrhCLE!}{%e8c zeF%eX%1!miNREc0A|lUEmv3uUv~paO>?LZtRdYqp1{|<`9aBqQD~dY<=qW7)Y)I7) zDIe~@y}L_6`L_=mTQtXpg*?47k8p3C{S@+?>MGP{0^Te7CbGM@*Ks_wSa#`Sfiu}1 zdywgIkgBt;c(pNBW23=Kh-CFoKJ>P_Iu3A#5w_cIgtgY&lp8`%HVq$-mxvyIFe-zP zLlWnmkc-FLHHAGm)0dQ?x%5ifBd#%UiE?#mX+~QZQM_n z86U3k<7q78sz)82IJENRm+oeUkYigkb^f@qyxhgm*)E0u&f>k7l z2&YBLv@3Nc{QYPhk>7m*84h8m zbY`{J%Q-)OpAqe`t-^dtxghg%#b|~qUzj^R{~F1%tfbIu40yXoqSSyFqqL#Gokfh?vI%G+YP&2g7!L@BnVUaGfnMoxlelK&Bf-eUgR z9Tm^?v_x|-{)(K0RCZ~8H;qn?B?|IqVq$$zZ~fVRtB*y%`zO?=7zusBhLF(?_7NEx z1}||VDG`w$Q+jR9EY1&JU6`fn8u7T4O)v|%a@q6Qc?pr?aJ{HexljJ185CR7o41Ty%mH6)qsci7 z>DH&>p40T1!bU%yWzD!*VS07k4@w3HIhn!)>f#@*fBk?#{cL?Ouaro*&5wyT?W6EB zCA&REH9x-gI}!M)CEeMDLZTI}I6oRL7YhS*dejI9*jH}hpAZK5)gKs9xZTUx zf-oRT6F&VI@2pUHzc)JvpEi<;#EqJwvdkXI*hNUt}Q&h|#j2?V%vTDTt{?iIEX)L!OF z31_5fZOo?90#=Ixh^($wjCQStUIzN6zg3mLdr4uQv~3+IjEv`HvANZKF(`HxhRas> z4JRT@FrAjA39|skUx>!gG1miVmfciZ;$fZZ%vZ%UgC|^ zOW?xDkq`HUHOf{`UiN7pGA$%U5O!=vy0}b7UU2$G}4&8MH4`d9iW)jC_oMdF)e4V{8xrCZ93w5mtb!wjm;$9~53*@hCP;Ls@g zxp{|iTx+8Ez6%O+%D3g;VmSac8lX3^x|`Qnd3h-t6{uvD%#A@w<}XSK6CFh{4~rE2 zS`=VTEZ!KJrYPfyQ)aCl5aWr6Ay)D(f5+we-g_A9fi4rDohScX66^^DbZ&ZLa`B4< zD#n!E_O~iBgOiXYNr~CbY!1(Edj3J;)*E$ZHj{`mY?M}oQIf|k=VuF^!@7JN#JP$a zhCsEOl#c#90IIN)NI%AnYx(guZ7WI7bdJP{ry8WaRXC%(X~$c+IbDU zGt-`}1p&9=T?)MEguOJ=>#3#**A|&venzWlOJwyfNj`WQxJcK^>ujzP=3gjpFpWI1#Hw4TXW~<(LJ4yw<=8l>8*c|(-oi&DL z2#7KLEm&Dn#R-QcR3e^gy9Ryl@rs_=U%wODlX)!D)Eay07JW;JiYdIk^&K+g&MtGU zu)w>0oCC1$&@idLR*OE9J>%{;+^%k+I9U3YV9R+hx5cB4U$%W@^Z0q?+ydiYo zv#t3aQ39P5^i-@$0bZjFE1DMD>2Mz6P0&%W+0hS3!);r5V9hKl3fj#K@UWQrbawWr zvTqx7SLU+ijR${h$qT48}TummQnPt!8>tI4#+i=o$cG`pW(X>*| zTiq$#K)Gx>$hTh@br$4l`hOYjN-k1#*(lO#0VB&GE=3%X6cQC;ZvOUK4}|mI$yl39 zY#?IN#smd51e0H#(qk68VEQ#92Mr(2mWf~Zj`j%jQ4mi|vI>6WsI^YGlWaD72V4l< z%-U{7$2%IwiSJWxTWo?$YN@Jc0ehGj`T0?k6c0<4=@}+yxlA7@>_{k-DuUXU_^BpO`;8gZzY61k_*I$Su@q|TjdHR^Z z?Y_xMRCdfy2-i;J{B8SI1ScvNfZv3%r2X8@`4%AgYBIyQm1#8;h6Civn;xWDY%aus zAA0iI>NU)rKD~q99m492&L2H^_E1$N^~7k(Qx!oprzJgTTs3dhgaMxDJzNWssji2Y z$I50?JK=8b>?7sV*_D3R75|wUPG0d{5sk-nG?yp^lU znN`dW@=wR)znd*j#|$i*o!+z$>n~hY3MVJl)OGj;po)^7@Dt_Y~)V?b(& z$93=*ZH^^3=i}o3n@cE6y>~~thh;1o=RdwowrME51PT(%tEj}O0we!CXoIcrk(3Ot z13;Wq!D7Fdgsa6e%lJ3c3H%&N19Oj!l9!XM5QowC=a9FgAEjCE4tGOb__oWi=nZZUm&HaEZrhfVQqkxS!{2QZj z$fl6+XDT6%*`;l&@{FM+!ep@i*aWf_;%Bv2dbFT?$|>`^_f&G}Pom?89;WJY**R5; zDs?td^4y~9YO$q-D!S=qMkBX%YgPTH33ACM$HbjdTV}z_7C9bQCv-vG=My#VCfG61 z0BVYe2m*Y0jBDTV#qF75ucp|O+7*5BhNyK?yV@q%Y1+7e$J6k^=?M$ycMx`+-w$5i zGMV!fEfnvQiSu;Z1IGJD0&7yzD(z2dZP1VaZ!S8r_L0;;y~Mz@#(?kto@`4L-y3_4 zNXBf&OUV^#EM_4N=J7+Qv1DFOhX z_FuF;pizyrFL4(G(m1#Lwmf2A{*5Pe*Tvi{tMObgs)hINWfO4+6~0~6;O+0gzQ0e+|@+nj=m>+ zs5#C2gSMDNjS>`8z}i;#2P9U_ogiLLh6fBxP~-xJ@3gb+ z%;<&QczC{9M~E^eZCnsd(Y;o)ruoo3pznN}iRR!8fhR(nM?;Mw1}B`JNH6oRR*2H7 z#^?41r_F5ryQxu1jV?E-#M`7XeNB&=azcR?{#;n9pZJhKMzy#6UlBJQf`i-?Xd@n) zk9n>St(NbZamq}9L(M3E0p;014;Vebqax)qkyUk^vyQ&I>_qxuDWZV576u+&bP^DL zsnIEXBmbeiw2yPrsx(nvdX5^<#?UKcw_h&I$to09#^X!tlZY(02Tp;bG_}K(z0}~J zwN^OWH3>5;S5-w#5Wo1EwzA2;?7HOj=L1kHqoL1Q=%~H{CzN{w=lmi+<&uSV>^q&1 z{d_&;3a%uY8Xi1QEmO)BDB>0r6HChj#!RKFrQ2t|*D^EPh4%8`snGNY7ma4d121S#i#8zYjMC~aE?2^FNf2`N?!Rz#l?DU zUR*O1RoQrN#}J)=F>(pkGmT7KgmG{`BZ!-u??2VlgxkCmyqP**wF9P`0qVb^?tg=# z|7kG)^BMCp-L_0)xa{Alvu^cQ;uu7|-qHJGh>*ut} z=jNhPT9JGM@d362U^Y9q{utv`$05R)Q%}dmCcp>XDm1J1%4#1yhuhdj`M>+|MFH($ znV8;ro3Q+hqHFLpts=i0`m7B9nVNy2xa!1KsIcZIxsbWAID812&r55YSmBaX@a?)@ z=x?l8`3J~v`mNN=F2x9{Cxdt;=7c2mmWbT7(4Bn5Pp&;E5gFYoa z&GuZGET$hzeU3BjbR{{b-@AMZQG*VZUqNLM=2w5waaDA1&}U`q4PsP}C^ zDK4f0Wo2dVf~(CtFVoYrt)bnA@`};efg&Q^Kt;4X$9KHQ^CB)Kv!QdQBdqnbi-#KZ zX;UhX)hV#)o0b3_!U+RT* z!%wc-U;R@bO6H%xKJ5UZGQYCnt44Xunj6IvbKpa$EIPtrb}d@^LQXPseSduuJ9msp z0dxOT`a-K$*_KMlRil1&Es$tri>;Xp;+xA=5={Q9Tqczd$W58>h`MmW9ZP6cdxMlb zWqjl(Pw)xT*J3AMfrbHln4ZK@*4LLos%{|!?T1N3CBK0;6WMTa8_T>=;jOoxwiZ^6 zK`a{TIaz%uKK}+f4tuxVa?Q(fR!R#qsn6qT0_fV-!C3x?`y46#!i+LMz~(YJ^wfZS z>Pnkc&Ki?R)Q8|TF4l4SWEkG58Hx=JK|2W zb-6O9XZTQkxld!lRb0>g3|Mpl__PM$BYUoVCEZ;stu7{}pP~o9~ zquGNaCP>o2XBp!7xX0+vwz;u> z^B^>bA1Pw)w5Kn|nOk3#>z6Sb)8oS&p5LkUK?wGFwtA+7b$c!3B#;m8e2pR1cR$}W zWhAFx67yt@E2QOPpo=~GI?4~pHXR!z-Ze2f`@&ZpY$(>6`?T7MXvkapc~+~oMUBl} zOs6lJ=p>g-hQ1|_lIp|zpy=xe@7;-)T%DwRt@c|m+aS@St&rW3EzyBgQK>15sf8$8 z$~fE2St*Y#E|`)Ud?=7%Pb@I9`TGFr10#x-p!(I#bz!)`)vE9Al@(;+q8cvqgS=VR z@l_>R^h?mKd2eL=a}@-HJTFv!&<@h4FQrG7mFDKx1310uz2c>Iy%~8Lv~46hJf|2y zEQe_B<6Ik?x}bl{0f z{&T=$em}fk()b{Eq-#cPoPz-&h$n3KE3&oEyy8>Qg46~QFN;}~{ooB7-ye#Dh{Tz7 zQSbJ%gA4e2QA4E`HZUc2q4`hPO}`m`bcJDV%fMZQsN@nro61gUkyg`d=}AA<}_~KM>*!(;_4TEbdLX3hofUOGf#b%EW6G)5K?3Z&*?W4PfI;-P~r25NJ zi8!qDxBaA*>7V}kA8MRxfAee@f_E5#+^g%GvJ_*q)PIBbP*M4)X%A=H+j&(mwsvU? z6yO{WcR*>;fuDw~hUY42XlURH;WCQ|*Okx`3%t2KpAHI2IXdE4aoY^z*y281Emotv{|r8uKHRvw zEY)poYPxTKc`hz1Bed4n-(CAQ1ywK`+_pB1t0iu#gXK$U!hWFb|wrJ(_f+IrDn{k)T$m|UVcmY4O@QsX{cHFQx;&RkZ@WMX1h zvHGH`8yjQjI3Vm7n=wuzdGh+sP8SHYxw%pHMI`p}4p@yAmQr4cP0?>4p+oB}Iy}jP z1&OKpM;Ynp@Gq_xkz{3iDXLoiAeL7S`HEDxIuVxd)IK6pSyMIC8@O%Oeh_>)cD+i$ zWO%liUilL;@UCU>-~ck5{-pOJYlf3<+Y7a2rjA{8%9`V6fd8@UEE%aHN1{=j&%O&L zD(Z5(_xuz{(!uxMUbm_6_Xn z)l*d4X3?fx>Hf6+<-)-c{dCFw?qWN|{gPj=4xtcsZ0c}jGgRj$FV#0@f`VX_0@8R+ zaYT1ecG*}iK0d>4+vKq|1_PtZMpt^Y(ll zmz%A|(nJzp0sB-81bjHm!!o&!_L-3ZG&qRW%YxD-_4SY<6Ww<``{UVxt=ZYrKVj9# zNDT}P1_uXOYipnpu*VqiPTk%Cue&=wk1My+ryJ{4@A~Xy$T9cUla6^iAsC>_aGz}$ zH%$mh-4E5AH_Ud=c1)LnzM+aH7HBuH3yGgf@mN!+Je6W&E0n_iy*Sn}EVgyKv0$rl zdCj{2h>EJPQhIsL&Jq}66c-(=X681CjTQKe`n~IVRJ-)rQCbl6K~hmUr@Mk$=We}8 z2{}6tNr4=V;xwtf$qK9&GOT$eZ2`;GQCvS>gok9%C^7#+5tV1Hr(qH8m(`(% z9RJB;$JGy+oX%^z=b;oIeF_1nQB5qmuX{*ZSb*3>9S6%(<|%}! zS@xz{H7xri(!M^|=)Fo`U*wWH9!ZWKBn++kCbdF7Gs=ydKf~U*0Q7XN`fT0~E~?N0 z<7A0yf_wGsuN(YAli&1-Qbj+45*l_LZAY;5ck!GR?@}6|Kup35H#M7Ib(Zf!FmqSi z_3+Z0nz*^S2bXQWA*7G)Vq(U}#;S(`y4+qtVea%e{oB2_x;JYdH&&WaT==hi-H?wJx5fpk4dv$J=l&K%@7@r%FrEA;ca0?51js>#Ssml_Q@je_dxTmUAXJBuDp zRYOA~GJSO0AuIsNIi2spnoMKIjO31_V{^+`mZ(sd7^BU^%LxigYaKf{}gse(;>m8?1c6$Ty)?_3|zM@*|6Pr8x6)qLZ&mOiZ*4qnP>DJ+3jV zsdX^bm;AnzdjSjC3Du<)KGS)`gzUcG2N=kk>PN+>2_xwtp^OTtRYtD;T`w~DC6XfO z<=ds6D2@)R$jJha>yr+oisN3UTDRLTc(cvp(1fdJdRONQ@QFs1T=y6Et|EHFCA4kr zB5?}qdICh-qi`6>5vso(pY}$k^-e>ItLE@A^gI!1P|ZVGsVZL{V*?6n{fG`97>$y! zKX=fTmH>ZcI;|dTq(6qXEi5e$_x7B$9k;+2+e43!k4q`|)&!}IM83Qp(lJr$5Mw2T z)6>%g%K$+qLw4QTOIjG|2S~V0#gdP{k z#U7IvS*yiJoY7_x)$gF*elIGbiHf`ox!gjBUM*3-%N*M4h-RGm5NndV+H8Lu9v)bq z`ZKDsGb_l}re~;p>UN|2F0gpXYTb z0jfP8w+Dn@W^UmC_FONS_aa2X5C}A|pPRF_wr6H&xi}7bvlk7t*;UkZbVgz#AIT|@ zaCzO<{(QFpA&eYIOa2Ohhl5X#PhdGdKE@}Ai;LrF5{1yg_<^3m#>|;g92JGk!VHZ} zSW=ozb{D{s+7&}7_p4zkIvP!imX=4X@4HS#Ho{mTz)NT{o?lzLh=?dCL}g}X)`f?s zuv}AXi;9Z+`1sf>LwhW3|TBp{lNO|4%%GlN6w$`BO8{7>J21HF|4_iJ4{u1T=YU!V)iJz5#$40|Ub) zJp4L5{8v*xHbf9G7^=ChVQCkUZb&54in6l!_?hHmW?Xk%NvDe2ti=<*Eezw%skA@j2vpKe*oqxU#(ni zHD_;sGGCjaK4^%5$s)Ej8$epvm?KER2Y}>p3Z46n4UFH|csrxR&p8)HfBJIw?hbg` zchCU=Km`L}RIMr5n+$4T^ntIre>&*Bi^xwP`NZet<;BPEWdk7xJnsDbJR~-Pr z>MbMFYM{nZQiQfJYXR&L5*qVHT3W!u<@dgKa+-t?L;zMgx`dQOBQ9zOHs{z#hfu!b z5}-Qa3jMWnXE-jqVgbZj3^46~-QE8allDK6SArJ=P8_x?OKl*4kK5DE5zU;^8YTOy z7fegP-;k#V$TEcgNP}fKLgi<)XwMY{sVPr2ya(>xB|(5c_4dc( zml+sSy=DS?lBL2)P(D#8&!Mr&UQO?8a8Qt2@GRlMg1QAYDl8Y}F3)Wt*XiBit%JFa znEugu>5aBaAkQzpk&2ZMIE#H<9<_bUfuW9$S4Ot0^UU+J<}5Z(JPMv;*yc<3u1@fR zLM`RaPVfh}?IFG5GljUqBG0FFUK;L5(?v-jc!{xllZmTJo1z=WQH~PoR$rIxhcx>l zrM7E5`Rn6^z^R04#X2s@Qze zW=*uz{+4x+nfU9~L@r|Pj}lIovKnjVBZ}IpSBe>KVqvZ1gYt)u)$j$Z&+=<~F{@MY z@gG;dRx$%?0I=Y^tQb^Um@+jyeCu!VIsWzW`IKp8B=_p(bT<({5)_yAw}CtF@gmd$8G%W|J17vYjmGG)y1goD9%bF6 zh%oxA!F&ObKoCaht6iHH37z|W$w|4XF?p3*rlp1GRAe1#R@y^qe6cfdi(eUk9_)8= z`|@}B${r5wbv;176UcHz?9!)@zH$0ZN##XZri>b=egLqHm2Dl`!x1hA?~E;9_SqiD z5LrH13%!|M)&0@fe2qk(S0{yaHiVxiL&Q@9^wD^{W`IPyaRQl@=}?OSjP;EsQA$g9 zt7Cpi!{??vzpnOF!cTHv&sF}M)Shrd;xd}C9w%dfecwa9RZ&qfcLqkDbM&ecGgqO$ z%0jR;+??(b?Z-_fIFSzfjw~vqXuM62lNZLP2OdRuGKo9-nPhcVT$BeE2k0J2>1n%| z*IOKFg&m0rgmTwyoP7W&rm6x$amUiF8ZIJ^BHoW8;k+{Bq1&b_pNZ0k8=D^E*bs(d zlTN2(=0TeV=&1O{CO1A!c7FaG@_l3`3ixtul9(CxSM5}xh+ux|7eerG&YM8{lIU74jbITiGO*;7?EfVCX8&Wid6mn)A^aEBH|aYpN)wFv zX?6`t_p6uSKf8K_w4f<(bfpzl^$Rl)0U-xstZnPPEXnTHb;eK9%&B*2D_1y%VDHa8 z#WZN|Kbp`A>|X~~za?LX-2dN3Cq9nWy1$vA_9urYNhPs10t&)0K9Ys2r$_q5o1LKawk%X`IE z__BSk?h~h1mrawHP?@Y?-nqMJCvR}o6W=f8ZVIeU>0RCNEQp90HGVDgLIgZfks&F7 zhSFW3-ZNs>QdUNcwHGr3!ciDmZSgvi&I7`GC86WA2 zIchBP9mF2z@0$$INU*oEpj{nYLT%^HlOEzV<>^;U5dt|Bi%3B2HO3lEXTr|Svugah zEbdoAr3%m;fH+)q^&=7x1;=oxK5;62uJ5_VwH<42`}Qx`!VN%*%c*q7q)nK&h+)4R z`X+!GmaLy8GEm>{_-y?XaLF1{PDYEcCkIUx?C6nZ{mbL8q79^X2+i`ij)5?K{NBk@ z*nw!)=RFLJInS zy<_J9oxjeSzU{b-`Oep7hV$CO7{fmJBl6BZ-^rwR9|%UelBkG+$-Z>P7(?FoZsGMD z^$Wrx<4>-S496dhX5TXG` zQY4k@T@q=5Q^Kub`?C{o6bwE{A_5*nGs3dXBSIr@qSq_{!Tg8oKl&0A{eVRbyUW=O zqp>RX>JNbU@4SltQlu4XA$}=nXv~kxn3brN>q<&W5-h`gJAv+i&A`?`CL)AIIspXc z`GehAVCZ`R09yZIwYojySlPQ}A>Q8J=9-`Xdjc$Qf;iCi93phwt@^$=JE7~s|0}qU zZf!Y%r^Wi$)4t+%4cbo8G2tgQpokX413IVw>k9o9#|3F$r~i%PtLdkP#H`xEo(WZa1y#G#pRc7h*Ci02)T~`e4j<-f{gkLsB8x002^?0kVGcZmrxzUL zK%Q4Y0SNl6tmL~c$_tw&s@R+KxPKGn|K=U0tWQv#ne?QaSGOY7z*sgYUxcWv21dlzRF1k%fDWs$B2e(M5zFlj4% zeKqUL?zp>_Hz%^K1c;1{A8bts()X@zhd0q#a0eSGP4|Y~9rjQk(1j&eS$h3CFMJ(E z?cNF&JFTw`Bb={99-izL-|aykqN1Xz7`0;`Y~M25`M0kWp`U^!8%HDywHmUD+G~sB z>iQ2xX@`bbHs){?56dNasvo@S#t+VJU6Y}7S6zLZeEY2SVGR4ae`@n{k4neY)FKRY zdxe!J0AwqsWgp{_P<5Nmr#cQ)g}=5N-Hj(Ao2%#L1l{l|ycH^m6iTaBGAA?h2aSx(V9pB&z3s+VVxUr^{R@J`m?S?yI|!b>YA$&OYr9 zgLQ>&l&Ps3M2uMNh)qkCY>gf0)sLNfyPz3hWN2e4E^^=v9i4k;=YN&^X03Kg#ZRdp z_TI_7_rqx8I}%Ip=l)dX$!S`SQ>YJq*mElu$75E+6x%$)uyF_#4Krsf7RyuGU6Z*Jv#0V*03IbbDS52BI(TYcbjjdb>+fKaX-L-es z(Flh!QRYOGk@r*55!`9iQ^Xq-@hFUJ;knYvnKoN_)W+U1jm|pCS-=LW@za<4KGnk} zdJ|kd0fN?RUl{G_Wb-$`4#R|z0G=7iSR7aOnAK#9I-O`Yzn;Moc@BqUv#AS8*;i)* z;MEMxJz7YJ+*4`)gsXIrOLtvk6z9=R*==l^7l($L8&DE4^)N-!)#qLw4s<3-??O{q zS+eGFlpP_wt_BWFe#GE;bB5+yjwSD%K$2eGUnC_27F-keusRm9K|rvMgMU<$g4c$F zr4;$*U0d(Tbvc1vG1&?W_aRllQNoPaYteoGg$oY-0LgKIVJgUHJS)b-*)3Q|&%5F1 z)V{<1WthcsK;lzxBuSPpzpM!*ih?r$+lRX5h<_FeyD_`1shHZoG13yo{x)CKFyiL&Zx=nrknt}PZOk_tN3 zuYKcB5fSC+-Vws3aY~TqNZ-Crw7DY(}73F{Nk|p zq=GP?8in7jXub*Eh)&g<#kV-6(IlIoBskGoBGuuBn&O*_QLVN`IoO)IXSz}HEu{R! zv658*^thaJv2v6U7A53Wb3)~2Ior8;)0HSyEn{*@uFP*iRAk9+7~ls-G(eFNH&Jd^)iAkfM@db~{4w*7&x7i(**yLw z@56DDR-L@0LNowxo=jM}r$M2CzNOsmylr7y0yEFf=K<`!np&_e~n(o)_bI4X0 zWnHlV16@uD8SUG=fE{0+M%Du31-v>25N^lL zt!p0*)JN?;W+mAZBGeAJxS$0Lqo6W9pYt$E^)TM$Zm)>Rjj&kSWClE4{`E|suYKTC z)|zcE?)S;0tF2>#M{1aTWISlp%WT|xZc(^p?|I$kU7UrV#1lKTl;-K{DPtWIc4#@o zjZgHuWcpS`X`(8HsdlHmCFoeszZoC2bXA5=iE@~o^2)kfEX$$3ZuPVLZmK?vBzE3r zb$v(w%q3w_jUNv7?id=}lH-ou7$l}4&w>Ga8>0fdd(qsD5mRSwxltwD$di>csP~8M zi+Rh76qQweDQalkZS=tVE9YiqUG(DpW_{XwxTM@(|l zUD@I5@d9l$;Z!lf6it)wvzTkf?dP~grh3JxAaM}L6e>c81ugIF7&S)gN*hqP`rYF3NNxNC!3Rw8B?hKsGgThJ{cZS6lNbKl2prt-JfnTKO9nSNZZ7Cpx>GD+BF%tgxlHd*a=;+~(B?`rpHrDZEjk*Kg2KQmZm0stmN9!qdkA7r0*i@#dF4x{}vZ< zH#fIMVw8^HU?QTj*SBx!)*&S9OVc96A0aM_^(zHL=4SJedOP z8S(Pd6lF1aPFB?UG&iE`$l2I(lip0~SrW=Th~)H{{q3m?w&(2=4apI{c_<`Z?5^{M z$g$E~vM2%t!H9jlqtoy`IC*`Y$*1$@WWMb7?sj2ev89CtyHiAS3E(s68g2pmrQLe@ zNZ3~j5`SW6O1wGmYxU&tZDjd#m^%cMf5bO(&tQmN&da&*_MaUVh|P&`&Sdq$6PP|) z`95=z0U*;hcKTHWfyaC{e;MRSxQortZD1MVwt1fR75-fdSrNI-xA|H`gd9WPV#UB- zDz&eD@Gy~NjYaabU4@D^3uEk=C`Qy=;+kd`|NZ`4Y_VtC%DGs7<2b;PmXJPRy55?m z&ri`)Z{a^Jxn4Ei&V$?E6YkpY8HlgFU-{G2Fw(GvRa$9OIsFaTuk1g>(ygZ_Y?db*ujj?yr5zF!bT7Mv=4-mP zL`~c8A|XejuuN0W%o0qEM%bD4Gg}Yzc$IQKGPm}u7sf~WASEaw0;t%i8muX>mD^+>OYjUVnR=G^u3 zQD&L$0F2a#Cp+yQ9MJg9_hqN&VIlq3*4B3N^LyLd*~}(b2{Fo_*1Rwf zap}SH{V=mZF1+~J&Nu=~R3WIiTEg(}!rf7DaH6PQAFcR|q8L+A5{9IS^V@qRGW-9R zEJ5rBvkb|uzvS%HA-(@kD*rP5Mb3MuOtY0KwPQzn4=tl8 zG8LrZO#xV3S6UT{z@Uo@){$&iwoQ^3f7@5a^V8AgT}wy=j~B1MP`=M~B(S|n+9Env z^R1@g)Ej@4$?HGHKF59?EqupaNuQRWeU&xoTjH%InYiNW}S(USIUnysT?LT^m zCD>BOQ7Ysx?x;?<8*l8z{inio6dy<#pUn<>A>k6n;DBx73~vW(l@j0BUiQ{_Nb|=B z$IwlJVFG^<$mn-47t6tE6wI;=oQ>E;qX>S*7>*@dDb&9J+6*LpUUW5+(E8fa2oC^|GeR1PseT*0M66 zmJ$M-5ps><75|Ch@63_y*m6K2Kk3NIo>}YE(LVh+A2aSYa6?G0q~3eXc-Z}&mVZ5k z&00yB`L9L~S9K;mE?>8kU1V{wsl5w-hCuDb?}e8ZjWcqWi9e*MNkz`HMC@!};HZ5H zJMKF9U6z(alWAV;nNm>(vxRzcd{wP;OaJ*{RI?!*$E{s=uQHMkFJ!Vu`0wwU8(aimGddSk ztyMC&=2xAXDS1lKmDj74b#)~vXy_t_Ym4R^Ne}2whaBq?I$Dgn;fXrYXry#%*^CN;>Gk(25yRFh64m zYA7>?hf`PN|NZFh^o+5p-yi~gTvPjFb1VIUw5Q&IO8Oq|2FLkXTogyzvJgh;#yBn7 zC#^V}jl1l-%L=oC3kDRetNcNB5opRb+68 z8@(hD>!gXY4u244D4SZ&oGYzdGqY+X|DLoeMZS8Z%?XP4b#~ReFOEzBiKIez0yOH& z>o)9B&{OcP9@V6FlX5mrh_R-@v;N+io2w#=9x*(s?(VIoyul=NzKNCfm1j4=&Q^^f z(|{6{@Z<2{bG04Usyn@>(LU>@R`q0c!Vu4r)@QX(nmM&aeIx#vgYA7=KOw+!S(u`a zn{W&rSN>LGU-Gsvl9W(;Nu8~hh!*yy4@ouWJ>SgG#+)8CcRJ&J<;|K~WzJ~plxiN5 zVd;$}{_UGVf8#OiCPlu`vDa?;YH&(*)xTo&ihCj(p2uXf`=?c%N-vG$G)b%BDTVRc z_qiy)er9^wey&ux)0tJ*^W)9EP3&f?7Xm)$W-y(DO+etku^I)&itP4s^W$o=zeulu zqj{7YGRy18Zn}(+7AVJtL&QVBr49y4t>Il?=2iM8@ni`i=54#ualM?MJL0~wjhtQU zR0FA3C?p7-=OhXEmEj@V_^g9vufL>h&giC8q>rC_ab84pCY4ef@XogMZ!SC4Zn-#& z=JhkV60Yk)oyHS+?II_n=7?D#mc2c(4mBNCRaOq3EeR*-cuY+_R`6o#8;R2V!M3ui zen>-{sAQw!)e>Q^;BOkYKX(Vw!N4F4QrQeHB^G5#c^_QIMB&DJJdd<^Ew$uI=omj< z|8aSwjj36YqTzCci+Z+5iYZCU?+~2vbZq1T_0ZV`?^iygVZ3eWwaphtjN;s#er<8L zmG-&W%6=}#xOcx-i?^7%J@FTh=s1`Z@O?Oo8kfu8{ByOP<^p^Q$DPX&#ST+>#|*XtoAsjz*J`G*U+IsBl@z!&t6CaA zC>l&#hZ3({aAWAEcoH<(?28u3GooKjP$(9$f+cjsJ>sTBcD~PHUsT3U2{qu1Ruw&nA|&g%`Wg;s`ahJy;xpO;WyWP zy~T^ZDJaXV-G8aJO%N!KR1Pa36-Z7n`PE|;{b+p;qxSDo##g+18{?GX2*=uZ^Ckc- zvLru$XQ$1v&GkZw8Ux7p=uV?)!T-7a%XLUC%+0UvZnROcmz1->PI&Q$EO6Ujcnx>N zN+rYL;lIWwP@sinbpl?jwXW~=Y0>B|LLbm*RB8Viqzx%^tCv_BLWd(dc zriA`F;jct>8xj_vQGjr4^e+gRIRy*^K*@-$o;2@mgm-w~5aNyo)gnarN(@R$co zGriOH{h>%Tqs1!wXklWcIQ(wR-rFLBzk|G8Hao}PX%p3#yy!afLA42y#*mkE2k0n7KRiG*ap z_bu+`b_z&_$ z85XL;1`Yel%78##UVbBxpsj0SN{P146YvbgygZ<2B3x`LEC#%Ljp0ujC?cZV*^z)T zW8pkd(n>^3DVCh8YtF@m4+q&UV37tMz0uLp1q2@K1K%AT9oxi6lVl{9ma7mDvv|GR zt3||NouMfe=153s(0nN<_9m*njTB;khSI4%AfUl;$T+h`f#N{Bp^Qq|mPkaj|Teq?F50u{O%kul8x4WW?wH^oF-aZ(37 z((FLU&`@F`@Hq9MXeinwxy~0;Q`2Tr<{w?a{5c-hH`Isvi1zWmp+e$E4-;7C(We(z&9lQ);w25ntb(?5g7{*5L; z0PTGN5tBoDc@6JBB-NF#HkJ)T`NgBbns>4;)qWQ)ps5H_Yx|Xeu)J*ScHR;e3CW2E z0?wNz7gIL3u)yiI-u}F?K{GPq?(W_v8TrG>sj;!~%pe80l^nwMMiQuOXNx-tG4A4% z)L`()0zL33Cw&6UBgq}`QYzHX%fhMqzF1)vDh+o?y`e74#yzCU)t4@U84hLJ^%;Ui z6m>`&d+g|o$6w^$NfbJQLc@LrLf#az#)jq34(>E(-}-U)6%B|z zj{CRg;9%$-)|~!Pz|Q3mR+yh}xL2y|Q`5;gP<6XFA_{xYD3?hoNgv!VwsoD@R0EFz zKeqr58aCcKlS{r0CDkw+^^%aEwxcEm4-_%!)mHn8rsYTJqjvZG`yH?pbi~HTGoqws zWB`$4ZfhAYMJ^hGq+4q~?IkYGB{Ixxu)_{ViW)jFnq9=niQ7y9pC~PD{K341RO&6( z6sAE92@e$>F>=_ytW}kiI08vT1dDinZgy5D=d!)lw%THgh|7MY#=CB}T(2iUAX4hd zY>&&3@2aScd*w!qX7RC6qU5IlgNq0bKCH(tg_jpguM;k|wVmgV$E*81RE`mfdS)HX646VMb ziMLjh7)4kX!FpY$T24;YR=98q3}V<;5|%_IghD}-0^@fYTWoA@WemmFgM}($3=v__ zvRl|oeEu#=2BM%~Y-*|c%b~;1F}2A?KNMEwRBpsbIcOFZV_zaQ5t8K|i9A2y+>$LN zy8QRl0a7b_bhJeIyzptVes%=SporL!-hKx6QRDIm=0mdx-yBw$%*#|_vk2y zFy;-698=Vod>MSj2U<5U7IlU{nvIpUev0hal^z$b%#cNZ!6s346af}x+8iW|9%jD0 z>`JA8F>M}&iTM(Z>YSrtN-4lFkzc3myuU7;Aa-%#N+my|p(@ofb2?lny0KvL5(Rl5 z*u1Fxjdo63EuyW#6)p$`HP3w5*wlB%Y-_zgv|5;1vThisE9s>raL!Ooc|Yg%yP$hNRO z3+5L&{4AvFYbBje_((j(;`G6xus>XaYnkP8U~#a{)K)`Oe+_)4?lFDr?bsgnqoyPW z_e;gDe^e3yCnpC*Kv{Ek;Y1=$s#*PeP~(b)sp#o%F1e_gm_+FrPIDM)`scsLr;=u- zUW5$%W*YsVBNwak$sU6;Gjlki5*e<&&+VtF@lvv4TgmP)^)|Y`zw;kjX(8n7^=xi9 z(96Gx^j(5_`@I8KfE7u+FjsV$e(Y#yf0HEoVKm-Ol(#|S0iqy}z5P%s8?{Q!{fLwZ zcye26|7>VD>?JLSPlOI3ElufTqZ$~g^N4Qt#mnzBnFOXf>*G%!Cg_H>Yx_{UX|r>hR0DLCkf@%`(yV(rU%k$)Q zrCeA0;-b%ByYCdBfIq)S(wVXyPhx_U92qQ8TkPM8jlV=LaK5 zj+w^8%VCAJ(LnQemrxaK4~|~~=GZ7N!G43*87d?wv6yO3^n=v9zW^ttI)zG-4FbMc zX%mcC;OL85RoZ*SSw#y$s$PT{QoDoerVeSFn*Il`6Hdtth0~P6Vxp_d0fT_Uxsn@< z$zTW3V@^)<>#hCb&aNeX<{aAImKqGHq_mm9q?A0xB0t^x`E#c37u$8@c30(WLDQU2 zQwpN5m8c3?M9uqj*MrBw@f>k^;aAJ}yQt*NgyL(FiczNEV#Zkq9+1ae%z;XH0%vMg zEEN3Dlt8_Q>R~rBOs_b7+)V5`)7x)w$-N~F=;|(WGFpzDF9n;216`a&c}LdY4+P|e zHfElY)oi7QP5v9O!`iP?i}$ajYQKv*`P=hOYLr98(8y!;4A-KJg zU*eJu4rnVJpf*#+B>g0&WO5W?^-W`&8Y}lifsUbJLP7}l2P-yYm7tEnI>#(|Bo2ME z$7jqs(wC^99p@z^Y<7dOd-D>I3`RNe*%=q;d|b)ZQb<Gq^Kw2{8_C5b~H!x}75OE$X*g=2Us=sA>I94geuR9Pky#sUZd= zdCE@O=-f5;ysXbDddaGn;?FZs^RBpgh$v+XMhiVL-oEhc94zdKiUA3_Ml0bL5lQW@ zH@22p7$A`wMg!oFmq1(JBy(WXAO7mLHQzz!C@MUuXu@+$u|BDD0QpevYBv4WqQNzx3 zKz=iKfm(@i%uq}MCBBhhz*66`6;J>?%#}}1beh`bGqbA7=OtJo{7I}5I0_cdLi_6} zPrOAWasLf#e~AQJ1nXJ_Jee2gDIay<3)wp5N+yhU9Xcj*~Mf`tmz_Nr))iTXtde5q{W$1*20ig~(g8_hXO(a&>VMCd*` z-FBS}hbciohdeNV=*=%s0w&u#BtYr!OaWR+$IZe0=2jQ?@lK^d4oyV@A++Wvs!Lp= z>}ip^pIIYo?Y3*?e@z503iuqyPw2&CNI!qpniqg;912DH(%ZN#H z#{m+qN}!~;9E_udy-%~HQgB~hu8PbRMLWHoJ84C%v8`x+{405eb;;Cd^((K*Y>82E zKrbrF;Cr6Ug6s3GY3Yc@9rah`4E@=4VP}7D(scDVKG1M5QeQnKE2QD6^3SnHlmU4_ zXst@vF>Z#vkGh~#aK9d4isrDBVR#>3-1sD{=YBhv{ixA)0S+@I4)#gozAkl!Ue6Sc zzXla+`C_E&b=%1XhJj%OdQmCBYPWE6L`mnC+;RdblMogMcH`+8PUWrCd8T%NkJ$>q z$UqoO=!l}-rbIe4Q^~}bKCo@_y4lQc)OS?AtFCe=5RJ;3`iMw4UrjQ7g?<%duZyiEPW|PT;NveWh2F7~>a(rWRGc?vPCWVDU9xbYo+4$WHO!=?$!Wv-wmHNxTm*sn4#X@r>MOLrZy z@k(Q}J+L|G;X8J4=x21%xDnRQjzB@df9V|(Oy2o7w*(M~aPHqb03e(o4rZ;43=Q$pYY?;n7Nvb54~ zK-*wiGF=VSC`IB@$11_K6SCJBj06hvP2)(<}s zFnB0)Ze19WPAK#e%y3Fle6*0rP(vknktl)q-5fGvFrY|l6WGW(C8(UtT=6cn}Wlw)Ejl- zkIX0X-1r0RPWR4pncT0>1{=y>kNE%>M3PMR#ik+@D{ovll-KD4R3@mG-Q0(usk1VD zm79!=?DhT?1QSn2Mu8d7zy<;(!4C|)L=WD551VB-_afjM4&Vm!r_|gGio|GUC7yd_ zwzvG*hK@sawiv8*+1FndwBrdwH6|eeiB2WbS;OTyum0vEZppD>F-MI@*3l>>1JWTyvjX|Wd7=}XO`90xv$5wr`$@XgR#U=faiYuq{NH{7gDqVds=`;TyH_04> zI=_iQoQ=4+?{&FFQ1S7^#KN0-#K!d=vD|cH4GBW2`#YVN4lpCLcE{GM(a0)}E@VNh zMvwdDoTm{AoyWY}$~4+WW- z88H8MhseNW1y~CR?qRtYb*Eb?M+@%EMuV7U!9X^cH0bqlwN-|;l1Du}_OX$#b@`+fxH3Mn@@g&w<&%et zZerFCgm7dymjo{)cS*t;Y{PB$QxuzW>s{jmCfEHZ^nyg7O1~!}Q0~KE9nrdC_JVGv zZm%g2Mj%iM0tv~IN8l%nn34`^_>bSBr76yA_UeU0REyjw()GOElipUbc>L8g)NciP zMKjHu-O%NBSI->fo+}FfOtx$qo5F?@N~5qpZggseWtn=yJvO#4pvoZf@=(wrNKv(p z`r*GBrve}+=#SEwVuX_2sp*cjc_r<5hXyRL8}ZdwXLu8L589E766j+oHRJA&3(x84 zNGzh7MjIcs@`)O-#|!Z?bbKUc;A-LEdQ6S-&~fnRfz+wW0|~lm)c5I5UHVP_jj!_PBA+X*zT)$Bq=-A4Q9+)wAn zB%fM=^WY=F1_4mhi=9e@mogzv_h)UZ1qg#iW%FllbRxecf96Ln)v=0xr={B4W!qY; zY~~*i3(>Q(&`&5y@ZexPw(y(zF9j?n$^4ctj4I_eSaXjuR*6ipmM>=~_MeFD1#x&C zU*~W?Vy1_XG(7F}4`$lC9-!O&+t#ba!M94Mh(GH~i;x5ceNo5sktPu!8?-`mZlF)j z4mf@Ux?n+XZg@PKt2*BTz?X78y?3#Xl%D|~Z=1K1L?tC*4Hp(%RJUcV_7CaSwG^P9 zVLvFcuy~U&%U%`df!mI`Iew7af?V3}B~N|G7uRp97${hf$e$p83hOHqL$cyPLi@`> zOG^uvobus9`l#3dT9jU^N`ZDiKHr+xqLu19RM5(c;!406)ZX20asUikP5S=U&$>-# z5-X7@bhD&8WRM-;vL1hZ$!U3is48-?9D>%;E4FMf9oXM}JTt{8V|TVBoT&FZ*zAk- z>=_EtL4hYB1?R?p5AuIuN&EEs_prM^I7A)6mzb5vl*LrRxIAU~wEpz!N&UQna_#@g zc#LXFT-voomYJ|Xxe_SX0x8e?Eq}Gnb2S%(rs=(d6ksqTtQB`0>}z>uAV{sy);oaQ zjrG*lUmy*Ud|Phiu3O zTo^&~;mRYe)~ta&H!)gV-$AJ5xACmfsm76XdQ9dt4@6iC@j^v!$)9samYO!m=p|1p z#~DA6ZEbjUQFd#N>kSFqTu${r4Tf-?RCNdnCg~m@T2CK*+`MvmV(QqT!us|0tcAU3 z1I9j_FgyOvK7ck8C7M!`)ou2cWoElO;!{~E7Km@H2N!lUBOKG%__*=p!pjLluVgpX znTi=e?iHrXg9L~DMBe?oRRnT085&7_7)!vL}o3=VEj9v2bDiqx2!+5QAJ3R+8R zH2qs1inVp3z89=>{67+{#~>CR0u11Tq@~?DnFBw;P%5nFDG3u3OS+(imU9O3N|S>_ zL$vDDxBH9yd~)0RQQI08`voVrj!UfYq($f?AIKik6R2odwF-g;7JXkcK6T$K9cr?3 zUMzLzXkx0AVI|YYMbAu#QNiXRY0fWB{c_m%&(Y7gU-_DM&>!YR4A78>WHpZfH>Zgm z{UnwS7bIL1pH!!;+_Mj!5v2johP%B@9M=cmxF z6$3U1=0>k(?Q2B%=Qn0b(+hv zp+gX6W}%@ycgveJUnyB8$w|KOY(c();b0*k$Yn}gF0)~?7T#tU?VNe5(#|c!-`=9O z>d581rzpV!4JE>XFyMc1r!qw)6_6z+9sg?p4UU`F#m7-D)7$@uFfD1G)0_3vkzqk8 zlbARS;KK(FsMR!Hr*|P``x+fhob)?^>R*3^wQwjEM8yl@j#XHwHW#Y){G|em6cZ>C zXw3Ss`P6yl0!JW!g2vJ%E|W9>nu_h70Oq7x=x?CaqQSxVIAd>9t0UHR-G@ zvjqYi5AO)XYIvOCFV+KY9G-4unP**aq{z5zaOKkHo+nuBaY1pghKB6pQ&_m=F3YUO zPsXhEj9=*0Gb&Tf_iSC4@dG1!dIM^0@aT2RturnkYolgCC^k6^3^p0R(9nn}gLMXJ zcB4eZAc5sos?e+R$Z6|GZYy>JB`dJ4UBuDxu!7|kLM}5naKL}ZFf>M^*(WG47J-F@ z+kXJ%AMFaJ!|2XN?5C3p@bDp(^eC{kL-ee+Vy?4!qW1(LV7=wq0EaE2jSKMGK8GG| z)FJQF$HFC|!k`DnY2>s|Vx&^C5QH&a%G4O8RTP(q2oSWN^$Co53Hd=VFiTAc4`r?6(kie{msPVr28(6@{aY6cvCPF1M(#!(9{M2XeUb zTHm5+(9@cOp=%ohb9{sM$L@jFu?sIjN{av>GpFt!f`}=pB;c4EN)pfn&fo>yyq+ecxd>EhoeG@*-we?K4oC-y7ZiqKz9#Ad8v$ z!X{q#^;)&C=;vU>LOCY21-o1tETBge`yW~25g`L?|Dnos$iPs@T5#%^#7`Ltw%(;W z;gSr8P0X}U{e~!E#C#d!DdDUX3DT4buCDtJ{j)o}0cmI?bA-+I>FoJ-2w4i48JSs4 zND%VCZC(>jNBjB&3NrMv2pEp*Cz?LR2D$OWewyp(G$e;(siBXfuUkeVQBx`TJtfrF zYX1`5rvwn^if;VFlayn6HZPmB5-O!g*lM}celk#;8RG@vy%8QXg|v*E`Kd1)wBht( z%{fO3aTqq%b4VhPiUmJ^@zrX2f94e##sJPUgwd2DLIMl9H;N=v6u!4&ibt!KCbqjf zvV%MQs;cZnh8gPnd7`3U3R%4Tzn@3wN&UxX#ky3DCy?UvX$Mv_dWjMKh)Ji1;*bQk zBcW4$B}o8_fuW!nT~U~aEOV<^S~Ab#(}>gITPki&meVG%F5q7D1Jf>;cfFGaYKzF9h7Ot)gtQWJ_-_k1U@e7+6dnllYa?e9vDaye|CV4zwP zEJ>ge)|Sryd;%;S@OwK5h(1`7IjB_b*Pmv-#(Ld!RplwCe6d*Zi;G;%x@v$}ouY}t zYH~9u(ZmFam?Kd5nNk5=i7a&4HBa;P*GBzb;o@E{5mDC>w~dfY)o26)uUz}hC`{7m zD_#VUr0lgnNbTeNt!>C@574jDR7vTwI*`(STCOJ~f4_ONwvX}UNW^lzf+RC`lIi}& zzV!Q&&;IoM?e93QAF7`;Mc3*zD81bU#-#3zeexsay8LSGLf);D4|1r+RuW*iYeRYRkmlSYcVNs+vM3YK{P#$RE2xXsPpH^ zCc$-S)@O23-{VGfk-L#t<&RE!`$Ql9U?s1^tMaSoGkvOSVeKv%2BA^vYUN(`uhLkj zy|t|5gENTPp4~olta)+@{gm>|4pM=czBRA6=DBdw`P2(t%(rP_dZBjQe76>o$ik}u%)Lh;(bD80f+9>J&rFPFM zo~MMR5@tp;HS!YrrMJ}?c0Evaf`YsnJ(hevayiu5S@y?F^s4XD#rp3TR@Yq*MLa)` zmhb*CA}+`t2^`E3IkBVf+avLzi&V(apdUJ%lKXoPHT5+cE(RV9**r(1>)wtGKtkHIA z&(*&R++=hU+G)FvueLkF)IY4bOB3t&r`B03O%7ZwlvlRj9l0u2@xE_m(O|YmEDHlm zErF@g+b;b3udbZ8kCCPbPEqU00CJIvN#p?uoO z{gQ0#6haX++ap{S%NVW$B@X@mOgFLt=wu7lK6JgTPfjksM_CB=Zqpm4iQWa79-3$n z#mx}dEPa*n>A8BJ^z`Vji~iTWQWhEbKdEVbG{yRrF6e{f8dB|;Y$vSQ~EPt+~7$G zaYWhYc5S=7$5-BiGO3(nm|cC1sikltBlkPjD$kkhp-E#AUiUp}bq)yReC)YESeWCp zRrB6b2iM!bJS8y3chkN&*n#M!{8U@nz`txDdxd1b{P$Lo#)0a&l_KL~O2en2srGeTA~E*p2-sQ8;Sam=Sb3X1eyI`i3;y3uu{EblRU??d?ujQZ)a6)Tt&X6*u)4FOjGE zh46(TsO~GrDo}1MhGn+NRh6|h)}J@9y`+@K=g#-7o}$OKuu0$?4@YQ+e$RhTE2vh2q793Rmv^&lWlR2i6ucYO}pTag~g6<#8w`eGguM9HRR>I(xfCo*8_Rziia`(mkwR$Yx6c7s;xKJsNhe4M8q_kD z{>ATKTVnD@kiRSI3~sM|pJ=Nvk0=y1hB~kb1hFnaE{Y`T7{i zd1&Dki}R??jitG{Vc@xhIUukfCQ0ds6IilFh_=HO@Y46H@AYEaL$`ZgIp+h#pIoRL zC#afSYISRs`x2bL03huRuSp#nWfJ4SlbOfBA3q0>vxrjyaISxr(sx2!7y%pF_h!yh$zRVqwFxa(_UVdzx5BS@!?$I z$P}DJ%Z+T=PjaStu)7F*AChQqC@WC}5SD`S7H#+#cc$QIbx24<*4jLpnH30_C0~td z2DRk5xrkp)&m3FTYm?Vaq85i5QUcyKR=cV(E6?RCoO^#AH%O^E7TynPP@;*E?|M%ZVvld5qWC;J;gh`t^OP32)RihQ%(E`jPJGezrwJe#7SP0T$e= zsOgGplWX^NPrro>J8-kaSEq1ue0$Q9RBI~#rp^5d#S0-^wele!PbW53L^)&su#J1! z`geY1B%?E$-61oWT2zUF8}mPH))LV)_aj16*;d17U`?_5#XvU`8*F5Lt<&_AAB4|b+@KviQ0Onr?y3Vz2t)i{4d`RJDo>74o?5Nk~=-b znwF`SKO2ylzMpR?zcfK(xz3KK`-P#?B(+4YPgaiX6A9J|c|)>5f#)#?X|g!~I{ZUL zK=7rkL9I!<@e;dIHOqI?e!{fgs^ZPZu;*@~sOxWNr^Ctw$BNM)qn(S4{YvIG0)$v;6e;?a4@=@jA{U{sFm#-P_PbVZu3YP0 zlaWN!FHDp*dYHXT#gpn~S#ICYM&6 zJ^iUNY~|5J&X-<%lFus60fFeB8{ZKb-(Kjc`ERkb2GX-DyQpb*NZ4%R+rz!(O4+T7 zp=}OQfs6Cz;L5MGO&_$b*O#4H4HplWYc{f)4)=GiRwjjZZn`D2(UZauFwCmY9J_sN z+M$BAY-!&XqwvnV$A`R~%%Zijig*{5E5a)$=cFnoR~K!$i*-=ycK!&o==LV?p+k#M z*05Zjny_(xw%v&kG}f&0J3Ag*Cue!H<4n8pi_ZoOZMfOG@B5N|^|vubwk?+wAEan{ zrHkqP4%kHap83pq>mlOwlhA)QDyf+GekGwgNf`=cYZJ6=&)dvDeEbc4-+2!@4^Gtf z3_ib6J)2*G#l&0@Oa>xcTVinTz^aYjEJW>h_lM_D^a7=l#fnDQ!|F_O0>OtiA5@vk zN7Zn7HMB$9gh&2@28%43h(vWA_c6pK7cvJ-X6qdeV5`#UV>B&{I4tvKeLd^zTcj4c#)}PE-{-j9-M- z6YP8rtn8qPf!VN}dd=t>hy4Fl%I8sb4t|jbVw%QAh0$`&k=ghvK3|(eTTMauJqIU_ zwicZS*Dvf*d1ttFIwu~C&0@Jtq!*ldd%mtdDp(M(lqE^;&3OYOZvRL|W#C&$iMJz;*-&epeKasZ83V8&!Oj4`D#yM{*xdwJvn6HqYC zgs%1$xI90s+NxV*q;BdBkEzX?Fj3vTwe-||Y%zsO-YdZw^%^qizn38QXvpUq!a;g&I6yiz!FRnh;jlB kSkM0<_y3F7xBh|=b=k-pBZLF$4+tbJt{_$|V&MOO0NODx_y7O^ literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/reuse_fp32_param_a.png b/model/train/yoco_moe/sources/images/reuse_fp32_param_a.png new file mode 100644 index 0000000000000000000000000000000000000000..edc051816744ae38c1f5ab1fce71f55c2aaae911 GIT binary patch literal 48348 zcmdqJbyStz_V0^$&DI`FtLiD+2{L#yg7=h zYmC!0_qCZaF%MI7=AUsd&Ba8@L;Oza zlTc%)kwd+p!i$8S_H(kqHs9N8Ei;(JPM_Yksn|LkFQcC1L5i9fs!$U_+N~iMH~&5# zTp{_dnT-5-&{)*djQ)BW12l~gbz$Q>)P2Jrios89W~IGs^zj$Uiaf=r3599){XSes z^3L0~+yfKXLJ4NsPeV!yjau~eCju&Ay7|D9%j4M|v(}~{J;9p<99b$G`Tdg6ynzzx z%C*VS`*-52u)R(4gBiEClZ%cZ*OI|k6UEmNHq%q$8oo3Jp4+!Hx|Rd630LP@0=XjY zbCp1xNEjwRaR|puDqQ5PjydzHgOlIow&Ta%|c}2X<$N@i1lyiN0 zyc{d)N^w@N#Cb1&dAM53J?lYtTCxzwJI4%NUXMwn3i-f0+w|$AE36JE8-H11)L{7cW>R#+Ua+J)PXPv;q!^^3(?$iE6&hwN;2s|;t%rD97VDnQzUF+C(yWZ7aAA(B= z6l;H6U3G~&PW&DrO#Ke_+J55g&9L;G>;zHRj26RWj+aqAe9xL8{*k?@_~KwA^H9jw zz?KAswe@=1=G+UT!9vGUnjz>dtL}`Wy*ptKYVzYNO2&+_uZ(RIX^|LIW{{%xeG!*k zkt2=_v^4A&j%K5FTyOgi-nXJB3{moDE*p;A1Cf>^ALV}-HFCK5D zKGs$GZ65!Pb+#rm$^_w|ZTxXbNbN+OQ*z{~#BTqK$z2^+Y66_~$yc-eHl{oAO0z|m0b(;VJi%6ks~zX(j6IvzcjJD>=xt4< z-4C>#EzY#_-`-4-VV6JD)yBhZsKJN`Of8BQp({=kntUsjdm#o_rF*nv`RbgYoQaeduu zUg|W_0AO^Amqx1WWPK`!k{o$KrDhuutSt={tKD9AZBJPcT9r>FuG|@KZ{{DkFC@$6 zk7>=#=W=Apq;Z%P8q$~|gfVGM@@mLb1zjy)bUu-0_h0SY@D1*|+o5DaF_O{1QFPN| zg7EfARwf7Zy{lyotM8=d*NVHt_3D^i>73%#=XJMoHPRA4Hl-WIX$qDZrA@k0P@82w zL|%R>w4L3|bhB;K)jOLgx3fLV@-kYV$q^Kv+Y&0&50@x448Ynyj$+oKnB;MPJ4v>2 zUfD_sH!A2K(tbFf^wvQg$^>T>CZx&?m>Uam@S-*AWI(>6?S%)z+!jPE#UXR*rwOxj?xYl6sp(B*{EpNWMvQSNLj=zvb{?q$9 z1MdZ5SR!+s=KD*B-(i+{Y;(j)+Iy`}7l`y+AZ~Uqwla4+r%tECbJXQ)-~7z$A!+u` zb6`>-kKigxZ-v)I_m(9W)O=Z3dFSTcCw5k7&CZtcfF#w7FKRGfQIIciEa z^fTl+vJFSCF6m_jv-QDFQpZ#%pgR#@fX=kD!g1eq_nsCU$%`s5LPxr_zs_66a;q2{ zKdw)(t364~UEg35DD*7ZcY|UUI|eH2R;ohSp3pH{Dnd6-$mk?XkrqCUjBN7APjTTuYTGsa<^c} zdL;Id=`S(0IKLO-sgma*lLAtqE6SV^3h{+`n7F?iWjIeNUtF?$p&ls2?Oq*xHk+Po zX2GZYz9Vl=_hrzn0J8`1HD4-j=!O6Ndo2Sd(6#8E?Ct{niB{!$zqu-H4u7J4^V|Lz z9JVn-)tr2*8u4neIBCWr;UOyu?aA%UrCB!|CG~g}tw)Cz%-L7Z1=BUxd?q|kbKTrp zY`VI-XQw4LCg6`@!EDrWdgA-)+>{Dy(ad?sB4(n!Gd(5t_9)K>te#mXFc>W(bm**F zM{hB5NgcmPr!sjd)Xz*~#Xv1XwJ^HB0yA5;({UskGeo^q3z_-RZzRz)HZ}J?W0sr4 z7h}mU1=0#34)P@+SxC7Rt_j(-mlbS^h_T@F-IuNMUM6BpoE0p~SH88jXqUX_i>(~P ze+PfNKT*Wb`17r$4n=?GizL78q4+^26v(tkBsiCK0%9oQL}ANU|l*9kq)Z{E_*kp@(V(uri^m;e?sKL}lEiE?O_G zG!EN12d?`AY9(awLW#DS(_wgTur5NA>t5H9vekeUQiA(ftMEd%(1wO$u!3i)FFAbQ zQmqWuo)EsO-bwr;x5Z2#Ph{z%=JdET`(yE%m;AY^DN=D>ewUtVdS2eHD^zFYFP-#U zL_RkMIwz0mrN-p+SPg^RprvpQ=PpLGyv5MDFhp>G@^U$R0(N|4#*s>Bgg4-6-FAY2 z)l=}+TTKfMe`a)8HApWqohJ`i?d|=-zt!-17#TJR!*H1>pt0L^JD^gC*x6RI8~8XN zu1EOF0lVp2$@358gl}H=S7&DIR(KF2@1>eg5t4;bKphpm-;*0EJQm*+3vvUanXd0Fk`_Mw&cwkNeX zbX31-tinHGOigRQ!FgG8{YZ{;eF}CX)l@{i%(_i?BoN`+w$2YnlEJ9i5QG~ImC_b` zcK>eFDRfZ%j5EU~3KyYnlG|CniL|=3*#?i%W$x4CZ?y2T6zZOGJRPmC5L>P;dtgy# zOtUr~oN-MiaZ1~2d+Gc=#=YQ>Lnk*=Bfib+@i1aMzU_JxG4H?Oo;WH|6m5XylQ4?- zf^U!&N;L7w7-XTscmL%_nLG1jzsda9fPll1|8FDhuo}jR>M6JjVwX-*a}EdiM6-|F2rVp&GZ%&wIttQ0DI zj%9(^kCpmHWAhZ5Hi;~C#f5#m%PtQRNdwQ)I!`3i;l@~m^Wb|HjWjV`m@X@Ypac1@ zChxJ72QQvU=4qs>Prxk~D4)JfqTng+TU463INxbzmHS9a%55r!ONiZ@8udUSdDdsK zWp7@6!e{qu#yw6vHqEbdu0t7<)%eCuA}rTkcLQO%1E-j`DeRy!A66T`N(B;7^A;Ej z1Y+Ug7-QWSc+xyGht%iRDNTVWL|~<3zjMFX3x+sOlE;eHLOhWax`ok@O6!frNMtZG zUFp`;f|yGxZQ9?SXqppRY1NZ8USU2IfO%V&buLRTOqX>^^7=w%2XS5sbCNaAmjBG6 zUu@`hvirRHX})@@&E|MTmHUSN*};$otJ8Fy4@qOzw^HBpqxIj-E=$S?+nS6wGoHcf zsUnt60ysYLK)>kZsV3PB<|tL!4lyP3THf;AZNaW`oDy^`4SDWcb3E%m%WXk2RrhKE z%cR-Qr1kdRtp9ZqYG#~Q)F&D}hcftWt717#9W=*EvC?I*wkC6t5wt=lPx)=W;RMEW z@SI%muXfzwj4}T~9?O>x>5RUA(8&zhkfjL{fS?D;O=aA~#*>i57x9;{p~0}&|9L-9 zJAah7{$*y%N-jhun8L@D!fo%x?r9rkWRH3(9_-1OyPt=&h0aShVQI{kvRAbZy=mf5 z@FLcE7<*%8{#seGp*FJ!U%Ruth4xrBJ+_3Ai1u*m>_i@OwmX=KW518KrtJJzdZ5;M zHgs=qViV*%RElO%!pDUTSNLy?l^JZL^O$4d-{%e5o^39ZJ%BnRG9`S^`e&NFdl(N^ z9(x_I9Bxe+#MwzdbzSM*(w`_cX{b>SEPi(Rq{cyONcvzoLh@?PmF~lo1)r-`U_95z z5X;)PgA9n***Dz4K1$B_M@w{OEWTpyT^W6nLIr-88DhoRx~zd-M|cR;j<U{Omx9i~9qay*H4{K@)}IoG~9 z$JGn#&b+%s^09W4mL!bPN--2wuS>WJznST_PbE|;3tS&d2KH6Xc^=hy8+1D#ZDPf- znroJs8lTZs{;I$4(@TN+!XLMQW>Th_^;p0^ zTg{FrFs`;!-lhuyuc0$_U%-yE#Iz|nncp^R5uZ^QO>c`)B|c}kP=`G!2;&1r^jN*S zy`G*E_ZCK^QONxC7uS&kU`cwS8k;hyBJQie#Pqty3bX=UmVXvX2z|Bw;dir`WMg}# zsX@7|@o;@a$J4MzuSg&Et3cEeIbvX@=U?KnQBwP~=5}m|={qK7FjWChERds|oX`Sg z1@F$<*-^&NwayzCX8r;m-o^(ogE1HDgtkaHZm5_CQ9BIcxlgWJkp+?aNPZDnI8?Iq zXf~X>zU!)X_JDsh>?m_wz0%5y_Uc@tJIBPT#8-xnc0-<(>8dZ9u|SPgw%$T1P~uA1 z<|+hGkbh)u3r-U$eAWbvijh*6sgnK(yN1x3K$ePcf|~0_RW@3TP$H?@ZgkS3KgGR| z3hXw%g^~<`P1EzV$w}8YJ9!$Bf;mb!k@DzyK=Kvv4wR% zZODxXDC-Q^;%@8==7uQ;8)n%}B@;4iZ3{wyr_P>l3ufPw?_?4Yy1Y8yf%T-x9(?YB z3h8_zFRN1%)N%=_i7OyhxXUkcbEF^>Ne|sy*kIelyp1Q}9GKuzAKi0#?!LxgL2O%~ zU#1CpBLRky<{_MtB z*2h7$*j8ihuv5g9@ub5oEy_RP*Q0@gI{Hvdvo!>^CFnR=U|akKgos#vTWy4vlddiO zyW%V|?-7_@4=KKc+5TeZ$bP)T(dLBC((@)FZQYd!N+ET3me$vzuQvV&d*ux$w_$cqK7%M#UB^#6f)r&y&bU!bn zAodB_b7ePz096M3Ev^9sTsCyB+q5B1lnlaE!p{*wv#r?d#;Pb-a@khzboN;I(02_g8C#{D3HCo!o}F#5lygN}!Tn@|0e2_|6e#2~N1 zMi=%vuo3wDJW6xW*8P^Wb$aB;>bRgRr~D^!ew+Rb<}LBQRMC7GV*-cKeyqcb`d14- zbaR#O3ViOBUPU_qmR)%G+>{?1Z8b6IT5B@2vEx~Sn#Q3#*##LDEz2)AYiBpz?Ccz( zp@J-@3GLe~-*y(2IA5;D(YP34SP+d2AE^u5oa5dPB7Jdo=5CTpFkiI{k(%pAUQ~~{ zX}k$}XhG&ErezGWV5=UAhtO53p!F++@8Q80S7TMJ{m`x7l|JVsy)y0e9QHe`v;+SZ zHpyvlwzy0>nP6jYHpidmVZ38=)-B17!NcHoUH%CR+8-@>qRipC*pc~=IQJv@3a|ZW z@o>o#A=fp!a^C|Ag*g5ovfNk}%kP?stM3TDeDf-y#9daH(u}QpQJ1S6$5!Z=O+lTZ zOgpk}NH`e-;;)wbQy1XaI`W8!-*huA5;F!z@>g|#yMFAl8ZO^jrBxnYN=o_c~%q4;+^w=XuU-lp9+be z5nEaLV#~IJ-!kBq_{vez;1T3aBj)X+Q!?Ia#XFUKX){h9fPwxqB<4Gyf671Y=59{Z zfz0(t5vj!z)uXrK?m1w~* z2{MEFFZaW^T4ZS+I`5bUmfe6LvkyqlgEA8y(TN3(uWyCBLq4ti97x|-*GqNgr2O|a zZbjaiAeCJc}$pbHSAOcCjblXd3Io% z(EcfCozqj0j`)0i5+joRCH=!|7MO7Dx*&xsnC!<$)#TU7R9y!tQZC?Oj;> zDxs11sO5qYgFd9lpiFyXw8Vr)NIfD?4GK$+-e2lIU-AT=SH&1a6x5S;z-wU8?w{m z3s?*FOOj#Xek-YOR!{a8titJV?x1kGPY593!ohFfqD_`RZ;Ppk;BGn~7O?66`tvRZ zeHJj607BH0SKhK6kT9$F7aLXTK3Oj{X&3~;WZ07pNBY~JfiZx}MDN1Y<#{Zt4pZwR zvj93-K(Qga^k4$##6WnH?24ohq^LC@3hDsUgCijeo}Cg5o?S=w_jRa|H~sxgRBdAW z??Ix7{hxZmoDAwh6vhqk{`={u>c?~#xZ?>C>OvaSlTH831?oc7DqKXt>&_fC#^7;I)G2M9g?w#BEFl(}yC$D91N)q7jawsK2Ddd#s3aTjVe-HPq1C69# zimCUO054u3%0RlHLfpTa{by5Th%n6leh*ntL8!JLQ}0|>;ck*u0lL5ywK{(f_2O{M z>|ncLN4rov_a|yN_;%#HmR+*ILd+`fk+27Y9qB9XJ$8KV=$ zAXlK1ug$L3x900Bl~mC;uRW)Q#Wfd*L|;3gY**xmNe?i=YFSyM zm&B-Bf*hXuBMEajEIfYKDWindsUzSSwqvwLy2RFgV$`lr2^IG`c;t@{*%e1U`sRdc;-k!4EuvCPsKN@IweA}iRPN)P;DhX3${ z-cTQU_s)2Q4CnRPM%fVXK)NFFWty4j0uJMvY7Mue)adzT1${}tGW^9()GRDWgydpv zeml~W2Th`v6*c&r!lMzO*i#s-)-io{xDl5!`Q`N%1*aL}>C${^*X!MTahBcj1C!PE zdNhk=UWe-%P8bn1;@TT>tSC1z?2-BYHd&sC`+9+o{S}j1a)IsJtrEiu>9}XcBYP)Q zQ?*To@(5gKJvj82;f*gYxTGfhk$6Lls^5Ewf~5~@&n%e z0>g?Iy5-LxXD160BWmO$t02eMy*6(EdQNi?dqS3V9k}daw8Q3${jf>N28XPmyBlp+ z8Q(~9E1-go+MZK2jyl~70`5DFNP?}2N(IgxWkJALl zlwWGXPk&H4YhGVn#??XS=v};zxAk&A5CKON4D*aKt9P?qAIguYpNMBS*j73eP?n*O zFvM~hEi&M)?tS{jWgu52VOWf1Ubb_w%tqm6X26)WWy^>i9zWcwlh!^qT z#@1cS00!w|XEtA*siUzq# zxo}CyX}X}sx^o%CBbGPY)3E~f`XeBZXmj0A;b2y(^?lZ1TKw z8Q9w}KP;7VqS5OR7A<;!Ml0zjb_v#*-+f(I?=W0U_m#}i<}+^_cHDPAqZx1eo^RE% z{o;Ze-qW7UQ=?=vt}0;rQUyDuk1D892wfwTyvTqJ%5*VJhQ2rj8FLZao2u(73lc|J z`c@Enb*B^x*_7W@n9W1}+ejw}q(RWk5t{XhIog?Z&WmR26!SURjmeCV?pp4OW7lg? zi`A5VB!iyW9rd7{BngA%d(dX4F_rj?*89&}SvT*LcQZ+ytKzULYXGWJIQW5(wp5w% zMiB9u4g0N4IBvNQAXX#$L>jk!tbDk1v!6`e0`+!jo0Av^EDZ9-=2GMZCkRr1!Nuf` z4koEi8Vn*w9+2o4itOj8LF45kXi8v}6;AdkY8^}%y~N)Dw=i<7b)2dhE;EzVyA`Uf z!A`g#TgWhK1T>FcfMK~aZCTYV*1KZ-Ur%Pa; z`Pk_^h(!OWEBv5_vj%~L=Fht~Upr}L$zn+suW5Hy+v9sJ;0M0m#F3b7@)ns*v%pa>q8|x5iea0vj!{8ixaF8qwIV_T0y{s@VZi`G8#ScDTE){O!!|56j-VR!mY=0 zvxf5R=PSFHi;b}Kif#HkW3co}U}M2v`$G8UNXg*7x}9lOq`iYxp8=irgdc(Fm)5uTYcJA+p z!g8D`9&7Tbh;Q{dJ$POID^~^Iz4*;eY5tp?(UDVwQt~RxFo=2kx?`!~^Zte%iYniR znfzn{d@rP0p#0~-+%oSYa#Eqv8HG)-PAC3+e{o9BJd zk2DgWt!N%S;(ppcvI;ziF804AP|lr9i??z=-RE7k4WXLUW}s%c6&iGsIeh1L2!Y(u zAw*D1`msewDMP!L)6B3Z3%;DFYOLKocB}r9Cs=x=4w}IP_56pMMx2q=_LRE$>ivRR zmjv^b7RZgV!?Q=+HP7SXrERo`*%+bHZfJiVJQJ%nEqjL^cZbr0;Wk!kd!+o)^YF${ zl%!3IRjX=~rBtlyOW{|lP2C()2-A;atFP>JZoYr85K-T>nDSVJP&<)1!iha}P+jpM ztVYW&G+dTQW0PH3o=0+D#GRK#9xqVhT-DkC`%rja=+*E$ZJ>g25orm)buGfB%)r8T6IGAp>P;!cEO&hJ>jyD1Bb6Dp_-1=}Xw@-1ZDEx^XlB*MeBudMmXF3;nh%E9jB&k%$YFB}zIQQbiH&wi>Htc6@(-IkEZD|#ziuT01( zJ!RGLmNcvDm!7G`-<5~GJcr+UF&>vWsUXx9Y+WBsIoo;7x4vH&Uj_X{c|Zn4AJ)*l zeUsgn@;FB1^`@fa<-t(QnB4E>MxRrcbpS@a)bbAfTthfaABWbG^dt*}?P5JJfQNzt z&S+t@L8Bz~$Y$D~G~BG)%I$x+P*!;8ae;Xn)nk`}0)w(NFJ>=L_8Hk=dUf;1`4A8^ zY7Z2`bvIhziaS%aWw3kK>qTY5g7t59)KStLh&e5wx%K?khYQoEr8eQAOTl>erHv}B zav_0Og^n*M(>M-E|0^#SRuinW?3Q<&tkUva71phL`5crM66$YcQ_OT#Ns zyjs;rRksz$S#1Wg zn1dU~ruPw~YdM1;0dn8Cw>4E8G2^wt$q1+QJ&2lAaJZS9k_J}qKl14XwF!sO;`@3q z-A>g3p%I`bilRGJ61c>pdVmgs`E-C9h(KVRRo1Zx2Cr$ON7d^sV?Bn@%l7@(Aj~TO zE|w3neKsDsDK*ubDuT{4jY_C8CLj$%Ad!ys*#6a!5Kb+chal4c@iPcOc<29NLh%m2 zv2KO>EPNKS0f{vm{**jrCo!Wk%YZC)TFPVBLGVXc!lF|CGsVGDf*H)#jS5{+L5a}` z2kbNbbII46hH=4cZpF0wIP~8I%bltgWE<4bCca&dm6_#(@~BoBxQ}RsmD{#gkemy)8a1aRN<>kL#`- z#h4-#&zr`}_^f*ieD_FEIbw*z8PnfU&aA`A?f+COgI1sa(@WzW7%+C;?Lh!uGZ?q5 zRE)~cRcz(EL-eeF`U?=9e>I!oTaY-xuxgT6WQr>#R;4KqdP7OkifshNk2gA=P@=T#2r`08r z4%oB4JWMpLwu|$^bWGqf8NN;W6opKV!^eK#3eZ8(BwBPWE`w4kP$T`OdIzH=VJ;;_ z!ndA{9w^Y9BtXV2{#+D5`6P+g(gvi*e*wF)x8IAN(L;G&eDl~}H-Zo`g#Qa!5qNyx z03~h~u)hwv*vm9igkR}e3s*q^i9;dgzet?*yP75DCBRb zh8@Hw^8v5QZB3g`4$w9LRQ`#)7@>e%{|h7iU#4haIp@UwCSoA!VXXWOzWz|g|2@FF zLrz{`QRy%N=YKi(mDx6c?&RLTVBbx;IMFxTIe^h=feB$Tsdv*YGZn7~;_h)UAfUjF z`~&-iptd@wXLdZo^O}E8wz?1@DfsVrO(Db06%?(M1+R~ruLn9~n0u%IVgm+-B60t~ zYtpeGo@2XRU->qN0eqtn=ai+39LQDxT+IuJxBd&T8aQIAUqnn(XzJpp8$DwN^EEU8 z<<)7gEjk51OT+nN9v5jq&uf7m4pz_o#n^{R#LXXblwt<$%bMgXUVL{++FAUH$X88@ zMq%4Or0x{C!V2O_EQ8#w=G#wIOg)$Gex#K|bo=CE(>x0%ptb{7{nd#JnlVLVA3c}W zqyF402+C21MT(Jh@9GP`fK8LJPV1;y`kDgFHZ5# zb|eEMC~-B~4>zzb4`SkbXvDn|n?L-cZMnh%*>pUk;~0n0cQfe(cxzx1fUH-(d_HaU zu<=d=EhWD>sstltC}ShBeXzY2>R%7tF*iuxK0g~s32=}_+!W9e_i*#hIaoi z9MS(bFJ0(^doKLIpRBYd#KgczWbMNi+8@f-82T$Mqx~x`q=$o+n%;Dw)m5#r(`}r; zB&0iOdjxI7y;s}QkC7y|qrMNpLn&84DpIJGEq~C0P210$?jsGneGZ7iP*+*J;x`LF zKF5L*D4>z?NfardE;|T1yp`^<>1Ke?J^+H$hzsYF8r}07ey2-hXS;2LYoNnLZ)ipe z6)l}Rm)nA|e6O1z>3$TXGQss}V`F=(QIIBYN>G#E@OcCH>aoYc>YYoF{&DT>s1uNC zi+SvRy&#PIbcyVfM1^9z@$$@MvfLC@ z0}BASFk5Hf?}9$mS|Hc;0*M)UffK-|c=R>H?-DOrbnLJZLC~=*G)oSe!v8VoI+vwe z-^0ivOnvs`5{^BXv5pQwJ4hbLWfRj|z2|V;N$WhD_9lyTd7TB>;`pSeHR)4GghS7ua5_s|g>%HgqD_>qs)bZoMYfK*!DuX2Rm zyx|fTWB8E=>78Deb&!kQ)!=&hVY#NU0P*i8w$yB2+rMbUD3;2yCpujfX{#EwB$W5N;)aA|ievL!Np?6h2Pmv5zZDN97A`;yEDQ zf_4^~aEp3=;&;SD(Tvp|qcF?eFZP-}$ykhPcKx8O=}TL%JF9)6wwDP2hL{4NlR-K5 zQGp>(QandHyBxO@;cOZG<&Hd zcPPju!eW9EDWS;tmR$NrF|jg71e-@y5(G-C_`u>(jzg9j3(TgxG_o#QP51i~coLwh zFsEWCtvX7M6kf#AW-Cupv|FOV2zW&$py~|)dA^!{=zidfopvXteOP2F#xo)$Rx= zv-?f1CpE>!G|#+%)Kj+E8d+6T@Yd{Lf=$cpi%tBDdVtMo)T&X5H!9$F;i1QP4XUeg z%LL`7QEKx>b0@o2oy*qgYlBK|_a>`4Gn|_R3Z8z+IHmj4lYC#dEUi?&OjoJTd`Sim zxgvz>@>5UcWigZBEH60vAxH;{RQl+ zor8c{>dk7KfdIdgd2GO|S4n`@xja8MKt87!*(kL@etmb7l+Q|a5*%zvqL4KNuv+@Y zsDS6bh2A79oYe!+ON3n|ZpCIly+@gIPPI*DQ90*?>_}Z4;hoKq?u=V^@%)7OR*`+x zn(?gW{f(Z+5IjZrlxEqpo_}4H;*URMYOm=V@;qQ~} zhI$gBoWqrL{H{c;6(hKv(-SwwcQU6t?y{cAJ%u> z)(CN^M+HRgv}m=rP$uO^3+)kQ7{tHz;TB4W`9>9X{bJ2in+V*{s%5iUr&Jg^s`0C< zZC4IeDm(=NO23Uougug$ugN@>Xe<&QdYBPIBtn)+de@OlC6*;~(mQL*2H5Hd0?kcP zIZf;otgIMV_9dvX0OJ?Xy`a6}!aOed5BrRFU{cERj-48Ep6;i1cUkOrWU3$}633yP zw8(NJZX=pVfH=EUEquDKlc1c%U!`~l1-p)U+r@~Ka9Bjh-PbKNk2WFHVf_#m2>LGx z(jlT|9&@~p)KeLGYVO#KIAv86pC~+J1f6$M+PL@)E+fJvB96Fy?9$4?wA=>FV)wVw z1V-Jfw8yUUpWQ=`peNp&<2-TU;~+4XXGTSSu>INKvG+pnZa95-u^66OkeC~KE2cew zL(0T50iFX#BuG?$vftIIwSeRx=uygiNb(fnlLQiyToJ108J-6(+ZzicXdgX8qZaql z?O9PTCuD%OR({=^#|QmkO0m+$0oDl^pFLLZ9Up%y0c@ymH4Lt!*T zPGP%m!TCy226xrDd2O&O-EXjslG8W`sX6)Bx4va8V{YSVz3$FU*qKJUpiNJ5hR>T| z)QaagusycGw$pCnC^^Yw7PlIpnyqA{P8JP7C*)do+!GIM8-bTfTMS{&N2w)BAUxbQ zY7rg+J!^{Ijs8)}-p_iAK4`;x&MiC`F;7@Av8$B;B9HkGDSQGtd4P^;QjoPI$&HD3{rXB%jo?jrzU1QR_7!h}1WRI! z6~W0z8d_8NrsR>#EgfvluX}g-PobmyO=jL%F-2(`N+3xj3z8G0j{qkOGL@R147Jjm z6g(dpbbHzR@=&JaB9qM5sEpY6R~=TWsbDxwkdOaLGDOjMIN~v!! zNbX_5>Ij`qANv(rFH|TOIO%(D{RT694YZ{MN`YRqULl9^XL%l2e?yO%2Mq8~&)(9L zP^M$sw%&oyPCJQeLNz^EzlN}fcG!Om>8j6)RiwMTZ)jN2e$TKoKdU_rQU;Rn*yF0ucS?J-s?(M!%z`Yw|8BPTV8#1vR##XK|7Ot- z{Q%`~6HU|JWgofAAcYfKht60uc-H-hlzB(LqYq>+Q_N3GNWw89g z5oKBBoW5>%!=xGL6%h~YuzE?CvP!?Le?8OL%hag6YZz`PIRjoVL=LCWz^7YnWQ< zlvX;rWd{X^vg+x6!oT?DoxP(AU{THk=aGyH(P6sCg!?i!J$243My(!IZF#2JJV@$I_*6qTP@3Sf<+hyP+Mj8Yhn?a>nf3=z&(AlHpBL2U1fq#3}#X%RPwp(XMRtSYCY%j9FnNV}c4U(biT$-3mWBuZ7m3AMRq zz17y_&{%GJ5o3vu8e&&1FQxkT?jF4QPCQ&|>C2d=n9f%a6a542-j8+5TZ0|yYO7QC z4s}Br{rbBY_qCLI+eMy?l_)x{gO{>r9>_StFLd&}r(hafE2R2Vz-)%^9PScr>-qMU({Mz->Iq(oE?9Mh#=x??#s zZWsZvJUc@scUeAlWVb-AnHW6jy(J00)!t3W5k}>?k0ney`>Zw3*Ng}={hom_)p)fmdw$oaybgvdV%F6g@zbf3k%JNHjQ(duWtB zdGm9B04pgHxjS5bqxSXByw3~GI(pkIJsfe|ls%NwNmnzE^r{;!3S|9Y*Ux#_lG*uE zpYq1jAnW^agtRV%VNJ{atFQc?@lcxyf84iL(MZ?hDg=Uob?xlPh(BY?TDQo7K1z+2 z;oq8hydpw$v)c^n{k5R?$~Ci?+y{ls22n>TdqtfqRF|^`#En^Y$oKUgcmW?3*PEVd zb#SfRX|*?P6x)DtQUCFSn~iLeHgoPopvb@o=aA>rpcUUV@S}IG_aYhs{pYy2C7=75)i+S0=Da!{9L$j3r`pDfXk~+!Kl|{8uvr`p)_1OVqKi-Q%-l`W?kzqEB|!Q-_k`Vx@TArwKSI9Q8mGV4|LS!B>X2lW-zu@{LgMmqh*_7~>$C$&^e0w*|A zmE3$DHw4r8lOWq3egQ`Yh} z)Xzc1iVT$p%l(HJ*^X4h5M!1a8LaPp1CA^Hm5KewjJ-^7@OmICXz~_nAb+*DyOd$_ zaFSg%?&~9!X3qQUKu2xDPv=^0^6Bk9j0^=D3Q%OOif*g<)_+nb6rB<(w;)06hsg2G zWw`oc0*&@nnoWo9FO6m!!<2}p~=zM`~?H9a38g|n^TX_!Ic)4u9&bQo`K8wN*?;M!G@de*9 zK#b}|k}g=UP+${u(QNVdb>0CI00lw|Kv&i>H$4tT$8r*%_3+-_emBn zJb0<29>*4UHap^@9=&wMeDix?X=@SOb{CEYwPeeq- ziLP-KaQ^a;`dkJO*hu9BE;XQbj;H$Q+u^$$PpP@Dh?*~tD8bQ!Ft6k7t=e>%U9nQc z8-Eyna~!lAgtBR*)|VR7lf;>-=Yb zS~&f>l@{#2hkuECeMKpDz0>^a{eb7m`IuP?lBXGS)D@iG@^R=xKYw-Qv+J{2hA9y& zPJ8~L&;HexG59{L=_B#E-4gxPY5UJ-n*22T-VLwXep)bNwibI$*~X74er z1V7*X;_fv6Q}|3C=F#^w=UE}24=M=(`$R{+ZLf>n9L5k1q@cj@lE3b1BABG@kZV!k zCs4xf_?2JX(Z(KmBbtVZHd@X$PeEs05?*PUe*N8}jg3^%mW1!s-~4=={KP#ZN}nIu zOPakw?@i6d*U2ATq4kkso$(4|(=Ct!P*7a_hrq3|N9X%#jffXdMh{1(YoCEmi&e3= zr@P`)$URZRXQJb2;}*e3=H?OjTmp?rJr9Kw6luNk+>E(-P=z43wH!F(KOsTabKK!;&B6iu56# zdkZ9S>|}0eb91au1U>vVMnwqco}uQ!{6}Ka&)!ro|IJfMq2#2Wv8)i!km15mziX3~ z`MLG!$0u;PFsxKV;X0TuZrW^lX75d5CAc+Vnxsu+QYV$LR4j{@ro@r!1aE4vV_&KY zsgRSV-~OU(^A$zxb&n9l)9?+#2SSFqI95m^*8JKr75int3qNnaGE*z2z^!NA9o{R{ zS&g1Op9(SvNW4a0Nktob%~no^#IH`vXH-(EFd)xGVAp2Ry<&LLH{ zq3Y?Qr{lp+zXEnBKcrU%5dr)A2+Zcxi+|;C%oadb5n5^BuKE$?00_F%9Bp;n$@&%= zSo7KUNx9U@!eo=p9j(oA$SJEf5w$oBS>b$nhIwrY{-=^?>`h9MSmX2X9kg@1cN;G= zrCW|m>xN>Tll=d(3V_{R65_*nt4Mt%O(i(TLL zy%2Zw%_##jFcOt`y!zRN8RJ52@CvgM*H2e&P(@6@F&$~ot$yiLIC~#sNzzm}v+J+= zP(=tkzxaNuOC#pP>Y`F=gzs1J{;Q_nLRH}ko0Qv`N4`a7W-B+QcaV#?2oIA>o{`v@t!AVIj6V8;^?H%+rU}y>$M|v$)<6njD%)X28#rHG*Dt+rs2bL@gsZkQpl0LVaEph72(#|N8ifQvhmj) z_KBBgADdYYd66{bq@)&)4dE7{y>F_WS(4y5p1$9&kD?zwwJ#CFD1Bpy^=!&80c=5E zIX`d{ghE*4n1RE&(U<|dWR*ADa`krMxS-!nZwUnUxiigvn%%6>;(~Y0e!1hh_}%7+ z%e0R^;2(pQOGc;qH}FQc*8QBXdrYV!6l?dl+8>NMa*N}2@7aLg((e?qh*j?IH5ni4 zI*3xWXMgMmx30Mq@*mv`aXTQC%1lX!5(Ks5a`if!w|{nw`J#CYp*kP^*)HvE{0~RQ zjUE+A3^-b7Azr`g>-i_Ug*619&mZMKhlI3(v?3f69tvCDLGY1e5k{(gVq}N2%Cgcy zZK9t$)2uS4Fhgz5$&3ZqWceA&jHL(8t}a``k}+4_u~B=wL;iSHjskfq5&i-b3x1H! zJkOXUKlb&$>uV^*IWba18sgnN!a*+*c+RdYxaFAJ6oj-uW=Dj8rX%L3ZwVx9zB)Kl z-nrLL^Sh)rWVwfr_X%Jd$;{b!Qpcxf}meG%^GL%1JyJeE~|7d0jk9sZ-lyY}d%zsZ;A z)o-}I5tZ8!bqU7DRvR=p#cnx}V$fD|UT{g{@XqI_^E>b2qEn8S>D4RdM!@;ca^EhV zos?Xh_rIvV)yu)$vLYiQZwuje`SiowodZ3wy}tdX!_Z`n0Ik>$<1!iG>R))!;u-uu zWW9x3mBHG^YtXp}0jWiINDER67A++pp@4KtcQ;5%OLrp;f;1=~-2&3m-3@2h?r)!S zUFQ!_-Zk&cyfe>p-@p5Srh|@`XK}zBg01z_rQpdRn|9xWSjf1ULnGX zo5!`)atZRt*)E*lek%*@MH9qJRXFMCcjp383=&^Tw6pyJQT*9NhO9O8t~#5{S=cFV zP-oi#G7w{BK`xN7yC6tJV(l~R78leQfj(J62LVC)mL5SPIj6!OzCq(=#OiE)VBeH} z@mUazFqdys1mU!UD@GI%+v85^%)fXUq3062^q+e9EQd2Vl&R3VoY$}y5jxj@IOEF$ zQ2;6WpcJTVE;1kSVr5le_KQ5}l7HJLze5Di+Fk|@h(UmK*JXM3*vb`Z~-$`(E< zvVpaqQB&up_{)o@IgBb>?pKMr=9S`6wivDcJ0C9647wR0A_7=8307#-#ds3-)TSFN zJBfo4(h+*P5{hY1Oqqt=FB*qtV9B2<_T`oTvR)=PcFgbA)|Lgajz$N{WD(uDdJba;3c*L4MAo>iBn$LvdRTjm5bhBAq4K@! zx$k(*XUQLtKlwq!hZ-yh5wq8h6LrTg5`Bf-3TS7mk@ce9Mn8qA;>fS_fCd$EYowV2Nk-}R`(1A=q}!%1R{Wz*|=JXOz*cr+$T zF=^sUWr`7i4(n2Rf>5O+#1-34u?dk3VQ!;n_>Co3HY1$=;t|r_k1-sqfJEI%zfY%2 zGU&)qEG7*7A%tZgKbj*!U!o9#DpZOQmnTnd8lF@NV!8D^M{+9HP?lRIv4*7a=Zqcl zD_?~=Y8d>ue0q^aF+T!?;O&*7yc~mc9}h+V-Y)p@G@KGac9%i}A{r-Mg^aK)46KqB zuo@-eCqgep8eubf*^Xg39`O>LxJY6#D$vSL!CK^iyK0i=CD&Z^^`x* z`-6L-?~ir3jAiOeEuWE8tlCmmt&L0N!XMX!Axus$anPn7(cp@O4Fz{_;M+~ZidgGV zDbG0X*i#t1kS;0@ozOg{FL0D1BD0-Ch-Jk%a&gnyG@71_+=6%D}*z>wIE7AGx? z<`I0HMT3v*981@TEGYT6e*WUo3uu@*MLPTf83T?=kHuEfRc)rCU87q@jM#GD;-$6x zlntV&rU6YCRjS3-tTV*B_9+045rzt7>gI(<64&20a(X-lNpIo)d~X*Kx&&RSrt9SU z@=qtBJ0gFdaeks_^w9NN`Ohw7h&N!Dy;R9qwfW+GFWsJ_N0eG1JA)BihCCO``mp^L zdt&HYD;4&+XdJ#izp6;}9{`7nGd^nOfmjQ5+&-g?WGL%$JD6S3iQs#xMdrYPEoth5 zuzdIF()NCDO0kb_Z=q13n_}y?Fxqztr|8#By{OlLTGHI|z8EQLC%%Qxe>5Ocmn zPAUpr93EaVuml&-Y@fy?MNtpyq~SwEeaQqiaHV&E*w7~W=xoAE@A{=_>28h{N|-Li zr{hZD5Kb(Xnu0EvuSZ-0)q@kXUmJI`;=e2mqK=0*~XvAPL7u{hM^0l z@-j|3`!D2N`Nrru+&8*WQ5?&5A8ti!PNM7=;?A}=3sID2PHxo`u8FAGyFI`xfgoqr zKs;Gq>ROSkque~HO=NEp^(`cp()k&V?{Y|mcz2dkoUhJPx;d&$vA4ny(&47;@)IE& z3IF<$bGvS;J>)fahPc`0#UXyk6Q)1MmE;~R>@CCV3~2el1GVitTuiLB!?$;D%KHJ? zi)O_#Ia9e@t!4)eKP_j3`7f%jF!VJVDTa$3jGgs{=q1C*9Ud}XTa@3Z$TlkCn=l0) z$h+g)249z$HWkz(C3(zY1-l6*PoJFnBr;WXgB!NL?|w|UJQ0t7HJ?RXb<4QcjfXr` zE~~~7v3kJI^1RD8)U(a$d{;%r_xIP^>P3n!Jj}Juvqzu)D93+{|FEp4-qY=M9cwCf zC2mno5_pjqRS)j?55%0y$LGu~3aM}S_+d5(-##uHuVyMB%|9H}7B#w>@o4=aJb$ol z%ep*?&n@Ca#m*?A8TjFhIXraq3+PX6%ew_^GT>08Kj$N7V_Q6t*Bk;jGYff9r!!-g z2>Q@rN&Nl6ZXv>JU+1M9C;W4JOX&Fr9uobZo>&?LYY-WCQKPd=aslQMq8Q%aOouuG1V2&WEnzP$BG^c+t>o?D^ZG zHdsiEO!zKs0CsAeDV+~}3xfiM_vD;Md&m#PvIlqWj}RPxpoaS~)v_^ltPV*91*?q5 zy<(^QvaB5aS@ph_Y}&HE44AcD!=ahOm;-3TlH(6Y;0(UP>tmma#iBNKJ}ZmsXa!1- z#qZ+cmJaiukOY?tri;}|V)Tekmc!Ub50*OrDdT$bp?-dJ;~rYIG4n{B(5&21^;zea zgRuizwGJe*lvj_ZyM|Rp;JocXGRJ7#pLFw?>ySs5?Tw#*+hxd>q;4GkCDDI`e4uc0 z^1EtAuQ7C@L|tXB(UH~z0(zo3B4Svr2P~yA^4vRE$mjbD1z@!_{%IM@ArN`re7Tkq z$7v*6tlQes=bB1-;VC8AX-kxYSZLR2W4G~`bO7vP*)-p}vy-8n;;zrvd-IWr1RM&B zi{lQwGTn&sob{0M7$!*)J9>ZvmH}{p*y`DrrrY+z@*j!eBUFVZ%JsZV@q_Rzu4p-8 zp_Sp)1%r=PXbj;WqS-ng(`zSG>#jO0q-QLJMt>`K4w{2~EP;VBZn>M+wDZOM9X4qW zjo|aypTs8y$hX&%KP@H#3W17&b1L-#gG1H5qQ(#qoTa4m|{RI3V%%T@V}wzH2-dC z=3gRbnI;t`g{pUu;1HQA1)||Z%b)zkUPqjd8OS}vXsi#aZw)kv4WOsCNrAn(T#`5B zLi0bJ$&e{k6Y|thWA8PZ$9}vu-?5pizMXsSAd-ghjO}W z0Moz+K!oT5fY5#)bUPsN;FzCHLv)Dnbj0Amg}u`mEBLU94@JTUG_GnbkaP_(l6m?v zIk@SXuzeVV&OIffom=U~c>#{kgEZC)-BVj;GI?v5RGMKcyVr=m`z>273J)U%fccU{0ZyB#IOIeg_>L zFVBD#y?O3%{x$vMcu%HQt=|#O#r-0TRb%Dn^SGCluL8d~B>l7y}Au@W@c zd4mlljuUPM0I{w6avehF37tWJ&>10a%Z5&`|KQEYlzQBjq0XCR(fMX-}!4mrs zhNo-VAF$yMW(IBhQ<}Z*?x+;hdH;P<!jU|yJOY)^z~EC-pN z1)%!((G^!Oc{=^ij@O}w;c3wcI@{}UQ=vU9MlpjP5EP6~&n&uH9aTlZG zVCaK77I2+73*DXl-{ih9U52M2hhE*-vs$L`Z37+aVw#25HS0G^{paC_sG9y=ded7I zm^|cpwc*bo@Q~d&Mz=;UPk$urNww_^&-^hZwG7%Yf_F98fgxhX%2q9a3S$ zDe;k5s!}sXsQLVRbt`f5&%wM&XM>_&4e_>N-+MO4YWp8_KNI4=NY(;Xy>2RDQPAxz z4-^VnX61Z5D{?OmZl_h8A z-#36K$BlXVL8b~I6*l})u?CB6z7Np0<3tDo-e-9dEFu%UiH&IK8*DSNP3X*rY;K4d zxZz_7N5evh?`9I6X;T6_5?b4cjRV_Vo$zW)pTxXyW)1EJr2CVLcJL)PDjizk$q|`iSTJv4nF?ayZSAw z960`%UMNZ*od7iknYqG$cSB?DM^=SH8T4Mic=0+EDA?Tr{KIk16ZBYaI4kz=zTN*` zf50Y}FYJ+tDTO={bUJht+_J}!p*`r7Uqh-nzX%k^}zG$xVjG65mxzfILZxM?e5Wk61rED}DG2N?vMDecgR2 z*mQrrO!2~b=*(+WU2c_dsL6+ySOf%IKb<;mum=v5b)c`)%v7~MG5z-=M)GmhMhefq zs&r?*E%S5pqLz%{1U`+$K8nG#F{B+yu8qjUChsx3&Ix=amuIuKXSyQ zJWB_2q#Tpg0zV?kDwQe7t^P90YIatXqZzFvm8Gqm1BfP<7&SorKw`SW*;R{ULHZl}bXP1EkX3jT{jk%md_SHwS+MF-m7= zf-Cx=lWmPo0n%Zj5Bt|a-Dgni;Qh#r*Ac1B%OM-|?-%~RVtIiusVE2KauQU~Ii`?1 zv*C#HWZQ>1uQ&uIf$gkb(S)mH3A8M^v`a7jFwDP`Q1zt#F);$rZp=^3TY{`P@tTk$ zhZz*%K!=Q2JfVE6lj%e5gqoMRfq*}m6%I~DG4cnCzT_5Mq~BC{DV4W_iSYNy{5cC= zFv!XJSJ{=4ei-&uT38S5{7)-IVnJ0qmb)eLjBdeE^JF|wZ_+f6W zWzF&E>WN071aF{HXxGaWoX_&_xrTMf46d`MM?~%shPuVCrmF=O(5?kF$k~U!+0kCu zy(QO?t&E6z*Tm2{|C7vBBkFV%gIVd7p|x{VThni3hnbp!*{8=hXDQ@NgK90$Af3;- zFt+Aq9lhWWKE7uTkm7fVg|7Bo?GJKwg2G1FH*nTGG|r5sH*r9FkD#0v`9=ZbAks|z zxPiK252e=_0U!&jq6fD0l#URPwDlXV-9JYnZ)^4w661^1!I-bAFIag@^`-oE!pw$= z!{1bNW-_K-BK2guWV9}fSUJq^SDy-WSH+3xm$Qs@5OM{<+`A9$QqK*qSz-uXQhGI2 zMc7P+H#L#wbb9%^H<3=wEQed51DCsxFT2*~%c(kd(*AMkoU)E zFszlu5z?uy%dvrgwBCJ`KzelVe1Q}b5NId7KpIb!Jd3q>v!;getD8?bE>7#X2!3;5XRHBv6M5aKVL#WBZ5N5H4Xty-MoRl$->!WU$mP553YbEsb`&a4gx{~05 zF*1K)cIMuk#D0M|QBkEZe1=Q?pp~nk5UZJYlWUbEyfjsbOti`z$19)tA^2@jbmm1V z$xP4s`U+_pw9Ii?3n8o10kkyVvq5!03>a#Hoi6qq}%Ew_GK39 z1WWR^f%VLf8KGNej6~&ew8!lN{#Xj!rkEn*aRUQpfPHh&UmJV2j0hCzM@_a>bag-|J<)c zJ=gPnnHNop=q;;`J3OWW>jv)(W=OcdHNa3WNqsD3Q{*XMA2j@!G*Adg3;9JCB-0T> zm=t80_qu2mb^k0%VViPWT zVlGO{Z|``OmoI+FH8#^c+2x8-e?W46e6=qc@;Xfau59Ygx2GL9YI_0`B= ze(K{3g>pFHe@vac(%d6I)MXiqvwm6pcl6haXG^~wiAVxlEqs$9mlr!s@2f+TLQC%+ zaVOuTE=F;CmFBa~-c0;wv^f_|L^3}0j=_kvmgN2dQ>#NAW_hmf(k{Gby8Eiyu+~Xz zzt-Heg3Jzbn(QjnhMlRQ%=nwdK%7@-1mhhcqf$L|Itjk55YB++0Ng6n&1yruzRWoq z0ZIK6%%)mQ>p=0)qKBh-x6|&4DP@u*Bhyc=pEfoOy)k{ZWQVlCqM9css0ns;JW7l$S%0oc!0JfI-}1w!Dy}XuDk!RPFw^3Ad-1 z%-7o1Av7R^vzkqK&@a!IhkLMk1ro5m@5#O+)Nb+6?7;AV0{`*)K3lSnJB!Zf!G5c; z-+3F(7K`8oQr1UZZ~6863x9FuG8;&d&up(+$9}p8z0QF)tNdSS#7&%-rXa1YxZ~M(IrOl z`=t8o$axVIO1P^eUwYIJ{0u8LZ(g%%MwjFs;H%go8byZn4PeV{pagi-#Cq)Vu;#in zv;7@(!$G9V1^F1f#~kEb`2;WH*p_$ADldGoI$JgSG>^r^IM_H42-sT-wcp9nouO{S zHW_Cu?k)@19DlTAo#0Y?P+)R~9mZA)H+c@kNTTHN@Nxz`%fMdYFi0^q`GhO~7lv;# zIPMo3BX9YXI!J>yUk_D|*S6Iico!#h5$0JQ#*JXk1($N!A6s~FU*Uio%zVW8&R6aa zS8JAOu#sWUHF&0cJs1lIBfSws+GM2@Dc&HGWBU-=-{iS^*{)r0nq)WfDXMgp+Ec(we!$?NVsSGQ3A(&k?f(+n`l@|w5hdbCqiTA6);b z(u}1Tr#M@fkF?mIQY^7(VGQj6@~z1i@WRHQvy&`1HNmtc8x4_L?v~iIw@b)#hbX&@ zWqO~6ugPdjTrQFFIiPjH_sUGb>^RIzQD&Nm`dEVf)S%mMt@En%jbg~MA7Z-^-nH*8 zwFrw|;s7u;*%QydcKHOPb8QRlFdkM@^5f2Nz+42dnfJ!B+>h2z{`NK^jr2W?JunYY zdfn?Z7bBqCo@RJc(fUWXZBKm_vL`b|vst=vO?1_Z5MkHJwQs%O;~XNx(VIqCA{}2S ztLnwAEzN)hRv!h-}E0mld}wDj3CG#|9U9a9d2-l&(x9BMm1_LCQpVCL-={ zh(oQh>kGg5*wdjxzX9ksFPZ1lbM8#W_39-3i+t>(19`#Eupd<<#Zv;ZlHk0&zPxJ7 zt6YWh1^$dz2Q%C$#S|~p*^?T&%WydJp-FhEg_H)HHqb?zy)LjWrqC@IA~vHW9}y|8 z@sTH^Km%Ea+EN$!Nj6fY`zEb$;*lB0kH$*2lTX1l%e!_-gs!G?p>Nbx%X7p>6ojFb z3mVuur|)ut(h1AigOZjJi6M$Lv+z?v!)j!a-&*2Oy$09g8XJRF%GRC|wJ@l{{cxYg z^;Ky_Uw$*6fERWE1i{cAc=1N$c#Tr!NlZ)Kw-dl-BgmKn5er#pay>TfxHx2*EY&Fs z;Gm)n)>wmx_%0WnOySX|Ya!H7jQ7s*Is9>0aZP zwgr|{!zHA&@swkfruS@1DP30u#}prZjW^e)pI=$QWToY^zIXFAykm9{SVjt-nTp5cG~>x_<#AMCZ4`YNfyEV}6#gLo>ZJdYvcvbgk&4Wp>Pg>|VOfOFQM~DtiS?6aQNxq|ZGvAXSIvQ! zQRk+X$}qh)J$n1!wfw~(og`AID9Kew58acF;ELiroHN02T{Fe%^*{`F;mKeHK?dKN zg70XSPe+Xv{ck9D%up;Qd9XKlV=x2TzH@m&5Dk{{9FEo$>6^w`V9}E%@*sK=nB0erOO7tbL0jIxPsPRB* zQ7(E1%80*1+SP9Thl!mFXmkDf?wlMU`+KF$#Jd|6nao;DmZ&BJsVFApQjjI#Ig$%L zuyMsRC7sGbk)yJuPq#a+qR9EPyPj>gHC99-9}K6u?x%x1IS^!$?tfiF_6Pzg4$(4@(AwLdRJ z%wrz6bs7;PC;b=XVlePoBR@gMk%)H(h`Mo3e!IV!juum+Whs1V%qbfY>1=6VAiThO z1^wslKte=-04)Y9Q;@FP{Hrl)z*f_v*ZM{0av;k0`H=I!2ka9N4u-_hgfJP!$0Ldo zM!+ampqTas7bxju%qOa7wHsYN*L|E8dms-3Rr9Ddz2cjW093OjKg1Kjx~oZ$fGi`s zMgY(Q(`Wc5&ELwmWm@KL{Ye}TfqF%!;H>uGc{OFPd)l5dgd6H8=0X`mEg7Eq^!mKy zRNdfncMHsij5$S^+(d=S64YGfMQh(7e0=RR4S&@a{SZ?nYX2Bxgr@4!BfHG<3I&Y5 z{=A^jO$5ndkfqq85o0dEBrR68KhBpgMzCFvBm?Tz_dMx@L7GXgOtA9gQW9qB%yQtI z76~j5KJ5nw^$cH}f*3Dl_y)yE9|%NPoMvSyD&!4j$R4089-&Nm5C|JBIRcn60ALmW zBD)Uq6b;TfZ5kvPNKt9=xK#Zo_}WU+QV#!r?zB@{mF5E zp*7=9FDroIZL#X+mnuM8q5_3wU@sh?^Qac>WvJ5xkf({mp2{jzjmFIs>;BOHHlRSI z3SJFI&Gx$O52!Hc1>H}MiCXoJ@4m1<61?IM6Qy`~L?xlE*y{%3+Ekwj%ko$5QnR>6L4~s00mW)5l9V+C zM-nC(j)QvpykU?f+M(ZjXTzvL)f| zwF9aID5vL$6MqMGg5oRUyf4v$HF>^D*`J!lug{9luS8oia~zIe z=rZtH{V8;-&xxRu6_Bg0f#E76E|i+7)RyJn5FUkY*g+!!goHTFY5RV{Ze zJx_CvO@d<943aHKkBX1u*xp;rRwcO35 z+F%13oT~2rLVXnRczHQcd=Pi=+X5hO07Nm4v(92F@*xf9$0q_cwONRa14PXF5I78q ziqH93Vj(aL{{SXuizJ%3$jSAB&mJtP(Z6sygU5PssK=csayGbC1FX?_%*Qi}RPvR! z-bPt?WG;()}%ju7ttuzT&YAzY1GmU3j*i-nm4+BE6I|su))+uOuGcgN^V}!)rjdMfHO?f{q%S$GQ#CD`#9X zetm^+$a1{bp66*>l@Y~;s6&Xw+xveSC5(Q>p|)Bc^(q#rq@e0`#tax9UfKki?ydc% zT^jjqG3=WGT3Xy26am5aIMn@sZ!G|qo9+@y*iQrv&@YQZ7hiEs+uu1*0LOW*Qku~V zhv)#4rwd&3#rSN~2=t1$w>Q=NIJ_*=9_Ot7KkN8R#Y`d1(N9$RMcHM6pz>w7>^V0jhXjZ&XTIoL->cY3~(`jI0XHirSh=wB>y^TZcpTpo0ts zbhqImm4X{iieE&~V?g9$JHDlS@vWNPwTDlFJN}{PwIGop?8)G`HLm*gJ^yLCyZZ4< z{)!=V4nH5rsgETAE7J@L_k=40(V&S`9guB9Bj&%LS#~{K$e{^4?0gvg3~8$!%}w`^CwTI zyFzhRVJoZC)uB~}9C+En7uqAA&3#nWK!U7G1o(GR0q=R!|R&(fhZ9=^NC-t56FT3L^bEDrhx(qRf;mngVa!?im^K%f(Sk*V0tI3 zmP(HUkp(6E)wG~J(u&X*JZ0dKY_H7poVvS(uMd8CnDQZYRhs4Gfania5zr%pc2heM zr(sCUXVb?YKXMsE^5iY(-~cWTvUQsj!LEAOU#!>#;wMn&J6)?n`A{5pOchWbzK`+j zy7Q@@mNlA3xJ-Eee}n0Dh=|db3dsh6q^UA-l>YybT6+kO=>KL%J{Ug_pHf?vr#@;Pm`laY2FVmz{xJjPEmxm(fQa|1NJ_4;%1A(4gG;%CtkMZrd;WwA z)ASrr$H%h~o`O{z_wT)zS^gT9qVq?_gFaq^pCHnAh`0Q9-kpgzpZaQ*kEV>-quOA% zs;X9bz^!O_oJVN3m#F}!e><9Ui%$Co|FKl!$T#wV4Azv53V!{7e~SgIw8HPGpcvF% z5|{cDpm_>`%Po!q_T&>GJ9#{tb~3;g8m~#}z4)rj&9EwN5)O!pDuCX@=(O`YPQcN; z_-z^S1C3~n^hrhE+RkHL95QW%^Yi{h8x;gnEir>e9(&dlQ}NsN4EWU7|C>hB6_m;; zy#JdRe(!g7Tl|jA6)oQcW6%}=hdI%iuR6_JcGn8nD8_*F zQwUD84!biviN`O6KI$Vp#s=VqfMFcDDSxG|z4<*?2ZPzyiLUA4DYGU;R^NYBFSqMV zZR&+88O2)v>FeyJ#EuGjT|@l=jVjJ@Qh{B>`~NE>~|iK}H5Q=wAOyU`<(= z7Xj)ny*sqQZm`hMNpm|RGIyzEBX2kWV|~V=yrKU%m;sPrE|~)!-|Y1z;(1_5z9uNV{A?{jG1O|-;T*>& zr&Wt3P_+|!gGj0)MlIl}0Frqn06oC1%}iQKl;|lok_=T{aNE6fFio-NQUyB8*|XVi zgV(WKo=h`*7AQlla4VJFSbmYSXV-t~yo?%~Eh1&Rk9w+wsohfFZ96_2iXT9UtPY(C z0vy>6+Lvh7W@5_4u*sfQ9V=7PjYGC+n9Zv4+mRj7U%<`LKk}`C;`&W~I#wF{mrj2PPag}R5GyK=O zHP`HRX6(zV$cWP*dkixkneK_d76EMe7&y~K~K8x*#LR* z8G7H%cr`66(P-eu-K2qSZBdakQ<+2k+o&K%%hHyoJ7YX|f<$!YTCGkFdZ%zSZi!hn zL3Li>WLE0Ur%fquA&x~eTAl58n(F=>Kw=OJi@IpKxwQDdB#$AW10`zbxX`*^i5fyloa&=QneL6ug|i2q<;=5RxD>0Nzfxm&Hi&zWFK6K>Z7Nw43~C!0S4Y6(%SI2 zwYKTD?B7WD993*1oAl7p;7%+)`}CJipw7?fPB2=>q<5P0&>d}Y^QT&$Qn|6?CS{mf zPI2DG3fhmNZo*nd-2{i?Pgfg$$|qhw#~lo=Utd{mYB0r7KRva48!WpvAPE3l`M35# z)$g(LFZNr+LeP~Z?t3jr6P-rALxygy%5 zO5SvL2PeLny^1fKUFT8Lp-kw>Zs1fIBVKiNe|~Ar58~I&=At|q#D8^-*Q_C{QYjMD z%A@SvX?AYp#9WTAGrXfHNQT_^q`)1rMg?&&UZk!0!DNgqI8puo_6eXKh=;Ckf{%Rk@wIKuMo1aIH}4H~KWijs7F( z=4epS1AM{!UgnuVtpn+*{&Z7kHt*#uuj;x#6`Zhsbh7tWld8JV>SM}{P@Zgz=)m>G z)WnHdUd;Gym=?Z^8=2>uqL;S^jw;y;lY&K(!Af6!EsfD)2G}W6(n z_x{zFssgzpVdu8Pc!0w}X6m5|I|;L(S1xLrF7hVPSvY*w-onK!7B6qJpU$>~L9f_L zCl_6^BHht^m;`-NaQV$prK9@sbhv6S9g>5TlqI`=voirXk%@}oSRan^G#r^Sy!s1o z*?;Rq;NywxPB1*DZLh|29JFg?~M6aWs1`j%s(=v9B`o7j+YWRAiN^t3-8m zL?oIq3l|chD!;n+4^3DkPA|FMG9?5f)(Bafm{siW^m+*^@=^nkDb0C>(h`gRSnZfFAul3G$B*8&~5A>+_XG&P$Zhahi*n5 zghgwqb$d@0`^KnD^aii{T4r}a-k?wKfLL_L#Zxg}k*^x5NcGlm2Xj`c=Ya($uMLaEgY zX`-ZH%BYO+iPL2jEKT6KYJNCuwU30`(@jZu889q~dATBzp9kz`y9dScqAyXNE`b>8=mY3od&kR^=ZzRw~VL6yum z1ga%eDLRoo36}XX2LG)Nj2Yq`gR0^YK0_n#(pm=3!SSQ0q|}6SB$`O z%+22#@xqgrnT5qv(79U1JjKreUWQ9>HP0sJ`vO^=5gZixN5<7J#4}ZBJ`GjzYpdnu zG@GSd#hWpE-`TTd@N?znmW!0PFQ`jR#4SGRb9iJuwt7yGY?(b^oX7^ZUrYKYC|hy> zNyegmbJzPMH7=FyeOo#^p4VF*A?ht2T14~;s>v$lq`A0G5;@&>S6HQa^z*&xJ=H|@4M{@hTJ5irQRy118~2qV?n-BiT5<;r zb4_5OZP<~T>G%6WX@r|B@~t`4ipTc~Tc$})0uGL<)czr?M;pj08Qk*aLWWWp6nGFrx{d6nK@({hWKrJT&fb~Wu1LEd`x#rJPNG6*xw`f|O-8+B;4c=8j8_fW{vhtu&h zYz^1xWZg#A#_l{d!Rd8E}q6B~g1T2Q^C2i680duj3k-2&*ZSu=phMvVCS1hs|K z?Su9D5YplMdWF8V+iQU#rqBTCnMBm*zj%_9wjVOpNCkjke+i8Ny|V zL@^;|JJfZr3`5%M3o=jVTC9ezPGNapHSR$U8766=gzGPS0BanL`NHyr4K;1x{0AzY z17itocCE$skpEDHyYui>#D_1)7tNw}FT8~O!&P@8s)!COgK6@?NPzSqOE%r&@%L$W3vmL%Q z37(h5M-#&OWN!tA4AiC#M4~_LMimQpdrX?tc-}=HzMl^N`x0E?<}tlL4-oUzaBDYY zm-Nlnj$I~LYUM>yKBe`wxFEJ>pYGz+fKJ-3g(r;Iuczw}bbP{9R5cSntMItJ0@`7H zrLXS=u&)878OZE6jQ0L;6n@n9mrdXgfZ>BunO5U?%?DVE`rKGv#TU-!C$0CJ#)#3X*TUaFN%!W5SDURYcmo9y zsCojh>1s%+Wf0JN?+4=U#>gMv#!v&#gG=Xf}^~47ABpTHy zW)3HgsQTrGhk}YsZ~j8Sm=WHav(-eT=DS9M`oF!HpGefzO{k&EAu-)GX0@W=dQRhv zB1a_t3qXF{v9kBU^bW5g`@nUzi>-bhN58)Cn4UGyRNg++F9r96Ob+jV0<{anck zyIp)|afN8M!D%28p8hx!m5CJCBgKsYm8@|1=@*j56N{Rah7$adP)44yup@DCSePSaagi=YCNTlWCP)LlTYL54{)xm11%UckG{M%tWzkt%9aQUZz z_!)k%M&$FHlJCLTe1*pHX;_EwQ2HQ-UkUQ))MtoRf*kBKVcDK*KO@}cdi+@oM3kyf zrvqqmib>V>;+eO%>`P~3mfP+3$1^jd594>=fsd4ad_r+a&=)ngL)Ef@oQgN!JGu zY&WwNeEYzLypMB`d6&%rUuX7pvsnYwR- zY<>-HW>U)>Ark4Sqn(&G#r{CCKqy{#ndT~ZsKRw1!IduKsw-O{$(5ZXQ?9fS_( zjAi!I0gLiYrvwJnfLzDj;$uojqAL`k@!dWc%ZLuHI6e$T*F*V!p(Jro>KTt)Tuw7x zabu-|YT)dY9%2x$Sa4<%ha-txE=+dhG0HZXh_I>7iPHTBwf0PCUFyc-@wABmwqNL| z3J6oapzdLI>mp?TIu&K^EWK0Sx^=k&lA0s-8Y0cYuS13l_Lr#iHZ;cjm(}`xuSb%J#Jixq*MYOxv#s^rpL_ zF^b**{duQBRp>&j_exvDP0F%!C+66ly9j#)l(JrPFD8hF%xwvV8!*rZ~ z?M%lSOg|?`tL7QFo}AUKE2b5@#|FQjC;yEkOHzgUeGd?l1S4}lq z)jOs7Ob5vwx{8`;X?JHPtRb>o@b$=*|AG;{((ciHPwLdyd>GWmO+w+?FQfGAwyST^ zugmrO<%gSG7Lm%sG0X%_u9pn^zw!Nr%ReYl4l*YOD^~3f2+lg)r&=q`z46FAdR%rE z%g)Vr(N+BJjrDzK=8@g4FyBT&xuqGU|5$w#Cxf$_KikSK%1~p{VPnNY=wI|#8nI=8b+h7B;0fW}_}A{WVcK!%`q z=R3xo*4b*mR%Qo1Uw|@0={uyzd9%E-VfWZ#3 z7juzxe%8?l;(?S8rBMimYJEHd4BvO_6v#vVS}3r$IIT@h>AW|wz~|b?_+--#6c5bEe()sy#!`J_(xbuu^s{7VGN|!1iRTB`YQl$z3q$5%lM7m1v zT|fdz7wLk4^deoV^xh*PO+Y%Kha$a(dRKhkbH@LkkLSa^4#qGeY^JJ{CJ(3TZX&EZwY(CeJ}CIX`JWR0P$rcGgtCWP{q=77KI-v=L8DbL<%O5 zdwuVrq&v}A96Ifxiv)h#*uH(+zv?+~&f8n)pirqje!26J4`@(Jx*fiyH=BM={errM zF21IDihS4)wyo#mI3X0AS8S)(0=;haqW|)2@`z=;dS^-xp>(fZzk(2B=HFZkN!WP_ zP$vGDU1ctB8e#pc_elKX@83yh<4UdF>R;ISGgEc$*gv*ZV?0JSZgNtez#IVz<-V2e zI{WT=;&P<)KCQFOkpBFPR_k*R6++C~j&uJZY~e}&wvC37&v?fr&Rh!L0NMLxP9xIe z_fm{maGysw`nztYIW#1ClTpJb@NRf=$iC2(?+P}oUv?u!CK1K-#dC~|3&L(&RwDl! z{`+C?mDIvAQ;v37Y3uZ8DFvs1K_)cy|^EI^0yjx1%3L->Y*+|M^Jrbe|u` z_UA@CD-pK94YYW6m0J(&d0~T&p%qz{;A`-b~oO;RUPJ+OOU}SL|1C@n$5+> zW$OFYmlPlWnlG8#sPj&$u$tIkaa3wje9}+0q3leHYhGNjE?)a>vp=RGJpPRi9yg4o z0&6UuZ)7LIg^fG$%gNQ}>=xU-BU@qv7D3fwVo(x`^;3%`UZB7lbD|RdLBEBT=>0m1w3Eg4alc1{L!nRg8-yoVVONH#gjoRp0nua}TnQk*>sDe|v$ zrh`h&beBKW5u(qZ@9l~-x;!sD( zd%hj_1D$|Et?4%_mS#=g*5T;;DejH8`w`3MWeLHhn)zv@)4gL%)C>m3#~Bd}>6D@< zzANT_hBnmQlHnI@{ofSxB~dUKmJF6ji`gI&E-CpC|K$8MAxF>(PA%>uim1DQ67g6I zQ0!q_B(JA*_ew5o9@txI6d1tz&VpR9P?zIJ@JN^Z4gRB`u@DzG=n@f%7k^KodT1;( z6yC39;nz4Yr9fzRnAHerHX{9&S`+nfWk;4rQn7de-em3vf7<@R+eZPgWZ2%`X`nOk z;x%ei0^lCa;pjd;Y<$j%6v5DAra)SNfgT)|_*=D*H1hTFNL3LCzkx7@1MvEAqlO({ zRhYV98x6LE1n`PZJYyJ%O@BQCZ-m*O9G@)w4&l8^PHE6Nf_q)P=!KL6bX11$^*7U< z;Yz)lDmx=+Va&%eL1`{0{xiN7>5JsmkBHRsN)qFLH_RA_Zm#N*CqVzHkzUCi*9_r2;H@)c{pYHbUM zIg>+j-XDA2lmbgWw{Da9;`XUWpR~%n-`qN<;|4iAsg{q@2662aQ4Z4?tx}hcpDns% zn+ZuFutW?kNYX(v6M$Pa!EWy~Gf2M9Hz*SB56~)OlQEbZF0kxIk~+1 zlCYGlj@>N1a4lyj?*$PA1LN@KL zq{s$$P+3u`4me3!00J}E9l@{$xaeVtwpqehV|1iiF8UP(kQDSJ(Pys>p=O`?i2Eyt z1llXUdv)3$N}XOr)w3?v+ZcHwXU1J>uN25~-6$c(g$Fg|MrPqm}+_I0z8``1n+3d-3#x8awyN zG`#gVbQ?VxNl#_qun|8xrVHcBn!7f(<~}=erWuvcA#~_T7W!u5PcZ*eI-M46xkX@& zvHmt}vKm17K=!Ce8&Lr~lkV6Ke!H#RGbA+l3^LC-VCd)GobAmfrW&j-%pWWSQu$v$ zW6EC$+-RZRaw7$?U#et37VK7?xU2V!-XG(Nu+I(_bV>-CRsH+KViKucu|e2blpZg5 zuQ-e@qn`Tjp6;uK80?<}X| z@_uxwR7#9i>yF%ffP{d`)sp%-UB9p*ebeFTW6|wIlHuHCtFC5#R$HuZi>rWR=>Vv0{nU1PnTN`&1my77YdQio$uzooTu05 z$n0v57U+pW4SL8{JTyAoobIG2bUCl$3#Qz^2Di_A!zd;74uLX(IOk5C>a_lxP*?{5loL7 zw}5di&`*XAREe=1UU}5s4|AeIi|s(Sc_9yI4wo-hU{uO4Q!N!g*x*544#{C12VHqw z?K+-(;fTC2+6@P0#$3*s=*5mlj(QZsS^iYUfVQF9slytjDpK z`ai||j(4`F&MB℞i9-6+yA^8|k&r8rH38DkoIhL|m0fqt``ax=cyi0OYTr2 zPacwtA|IdJt+J{$%F`1uo^#X;%=jaBCVqeBMss3@7=c~Psqd>n^XB}l05YDWixa|H zIko1O6)N*qZ^e6Y<;v-dQ#9An{8G2Z?%Qd|H)FAy6a>DU>tU$yjK#&-OwuTBNaCFw z^SEOfHeQWa8j#)y%2tJ*L~{l$4_tpGU4;&2g7wins+8pp2s8VB;r)Ea-L?nASwDH~ zAR!e~>}CHICAxPw<|=OW)JJA^&Kc4Ps3Qdc__rSDg-UhLf7KC;DSNHyVGV?~NEOxv z^srt@WmWOyv>pdC$m%MIHq*?qHjBWch+0M;ya$!$ktZ|S_?cjHFeK2*d~MCdsF@x4 zqIpy@B&!tJjv zNYW{R+-9LGEacSd@ri@&WaZ;e8UQmw$G*%7$`! zWEO?4e}5e-j$mQFLDU#Jq)9m@>L!7L8#KI$Shf){{^n@dT^F!XRZs0GIjQ*OYj)|> z@ktx-0lq#q6MH_Gh(#84UN#DVEjy@5MfPb*VYePV877=;nGlNaDM-_b z>N>oTYL1*i4Lnb55x3QsSe-gL)_v8(sz752AW4b74P|&GO_;K~7Jji$qxqTuBVCl& z$>xv-E6gnb^TffK$_j^Q_HpMW zJEMpFXBvyG(x7*27#T={M1c!J6A!h}NL6+RR&q&rt3`z={@dF?x$G+eNu#f9p3 z@_6c7?wWo58;=z4X%%d}jFEmk{k-{o=9gO$BQ@pGdKIBzP7rDx1JNfHm08jk#+FO{ zQ{Ti03T9W&=@?bnQ>#oV+bK0CxU}-4K`Dt}>!U36e!6%+>m&)7<^t$N?7xZ$XM7+N z5pmbf&ejrTdBRj!tK~u{aA$O)3XKO$cUyV}sq0UlTr{75-ff!qk_++2G9t~PJK2(n zRh`y|iDKXc7W(|7f>)%L0(e7Y9LelGR&63?asCU(H|CJ!XCJw%d|EXS*RGB%QK4^g z^A!+26GgX~82~_)HGn9C9JdYavHSt(--$Dls@%6{3vXP2G8si{h>7fI-l4+U!ErQ~*X@|Y17Y_9(J{TMQ|>-R@h;`M>1wF$Wb=Tf7lpT^zZbCX^NtD1!`%J16E)N31h0mflr zl3)a_WXg4q)9%9!3JCDou^&vGsu>;@8J!i`lheag&@>Zgjh<@{OB^A7Q$tKIg$Tv==^R$yDr3t<(^XX>V0yn?B zxC~6&Wc9YTcEF~~`Cxa33Ji(;oZqbjwc zE--cBAv~%k(b4mPvrBSV6if0v#Dda+TsgK|#Y5OqJdgAK;VS6t2ZN#E{7wL#+lk^LaxD}!L~5c z@?0uX3PzlY*gYpg?K{1D)48e?toA5yUif4s@Z+1faf8G~-H8KN2iG>mj333$%Z?_l}^LOd-rQWr(((J`9L}S-SEc0E`2kOok4mwQr8@I@{r{PAg z&uWieO8|9&zD-=U{W88ZjJVK-s+2}zHk~awe|dd4ao;Qe^ff@I3b5D>=Uqv72brz385gS3S3{vA?;yy<~Sn5OE5;MVwyAX4?V& z^K59*kjbFW?h+AAFt<#Ht9DcW>vx>DDr@R~s!!CuSO`7JNjQk*T){zh0TX!AgW)6S)Wl5ktWvr}#v zpnj2s?(U79oE2(3g5u|(U1ax$-W2ijoUd&AL6$6;gBhO5U(QnQkrdD%VUEO^2RG5t z1BLRghjgZ7sV!U+oesgXOYyccJVT@-A6387jP6ADC3T(Tc8T!CC1w|rYOjnwB+2VsQwLrm!QA~5~)HU4m>8l(^y-N@Yyh#y68&H z^pFUN9qlt9m`Yq$VL0fHx?ueKT7E!Is~%yrA?i-F??yIeG(yX@Xwz69oU~D19grE} z7*Q3h)?Uq+Dq1tNHDPTX`0Ke=knFpmdxR!CSx5Ly6GMTy*iYD#Djo@IdiV+Nt{Md5 ztGJ4u?%q#}%DEME)AGzJLG_Q6Ddn#ZdORr`ZYSbx7M`={4U&mUz)3&f*jr|s=aho& zHXfdP9TrlE>3F^(;qr|bTjb=e09mEZhM}qI(QU=!2k<}z?{=1CxKo=)EbU#{rik|V z5UV)OOyd210boYTv7Nv6I^%?92sa8m3cag28F+Af+b zdxiWuKnvhE;dqaPn?(CrBhekP2ZQ!U8-l-_?79v$nsC2YhyOr5@>*Jhd;41!Y;eY- z_6bn8OZ-HQzuNH1jNTZp&TNP2#5+rhJ=R0h#!{kKzx?*O%&;Q9fz9Vn(0>)tcqguL z;cY>%{Nnx|(|~bt{uTjFW+P(E$n}9r=U&)T4cEDjD#%oYBwH{mQ73B23IwGvgm7B; zfX!xU*!vF^=woBwf_K1oqQtm?nuD(K0F~}Ke~z|T=}Y04)qZy6vizafGmU0k{rg<_ z+VH4PLlPS%qu4p9NJ39uWoyOiK_)44r#n~mT(EWwlQa6Liet=`=IJCSwr9Ak+dCgh z>$^#V*Zii$oRi{BX;$;f)>L602eA^mA{MY`ES zbXk*I?8)c*eS7@!VluDX?(FJC)UDq>ubPf7s#eJA3`GLJ123owx=1JCO*uJX?YS4# zkrrMr#(qbyJ%35JQHyx^GDjcb^WEdOPWsj7bdFZ&1HK67%R zKrcmBLB9REypx@cokaWW-RAfh!=8)YS-@e){CR|R1!4JQr4#cfc1@Y8D_+BT!77Kb z$jIB0Dmqta#PWlI8%m5g&H##Pt<)Sw!-EqmeeSz;jKmZR3r0%~r|Q@Eb3^0H0%<$c zmVd^`e)<^v`~!W=zZ;X5%m<@7D}i1FHMR;>b754$b~4Op<)DM05+~0Yzs}_#iHbI) zQ=nNanE&dAQwSI!v3(lX!G5>ts1^PcQAY$LXAq_jw$+Ww{ogzMnP&kHN7wb76m}MQ z%7hzeS4HOV-mgYU^L`szTCu9%=K~2TN66o1>r&hVGPqpWZT8ekWf+qi`>xzLF2(EM zNFiW;#J3%}Ic4=@sYiW9ntpExy)ppMAucOF1xN8&QQ~SC*I=Xep-PNw0kdWvEkwUr&CaKfX7UkJ&{l0#xD3m)Qh(2SH>)DFyVtT5 zoie-1v>Rlrmnjw~4>);LGC-|E4VUOVd~WEDJ`K~mscx8^l?Chs1s_Lj65ecdKjtZ3 z!e{)>2Acdax(-xGYt)psonisS$lJ=o+j|ze6O87ru`K6xsftsj~T zG@=-$dp!ZKfme3nUnnd#pA8v4v5iO(Aebt~MmXCH95qX|K>1by}ifm#w8xGmIxz5LDo}R2u z5u?hh==!nG5>)>WVWmXTeu&KBZ^VG;vy#B(HK1W<89_9^DBb)4lnOcO=|+$51Uf7R zlPFV>-GSvtjsdx9A%8*wuf76?v*?f{?s6(9@%BWcn5#->m^5SL z)l5U4G8+>xRt(Q*fEyR|WrgaLfdK!w)#7`!w081UKjIjQ>K_U?J6?kf=+`-$ogT!h zkGz!=PRI1M(PB3*37Xgj(6w2djS@?MlvRIB6pbeJm%LWewJgjt{QvMhkDT zs3b-UJ50*e*w20`H|ZV(l{a+?P2Zd8YEoV}V5ZMM>T>Gs?{} zv{yzL4aQGS(Q*Xn05CO)IPhP?C5B)N>%Tk>xh70M6YzqLdQH}!tb0R$sZ=(|S+Oe1YaU(#MV>O_?gz#1&{(zaf_#>s)dOg$)NLXaFLuyPoWJe{ID0*M7vK%~DEU*qa+Qi_cb%dPBrxv(cJ>0;e zVU>SdY2y6EB8JLLiIWNg_Hq$;G-U8e+QHZ`}&@stPIEjnz{$>?B;4f8-QagL$h!Z0`|$2njR!D+(C@2%5h5CT{QQftAwsPrr}o*(#fOl0Yl010E4@OMpUmqG>XA5WD7XDQF3ne zi)O&tQE(obauV8>`fvS_hat;s6om92k@650wSrJL0I>q!ioXpMnh4ClBjGIfNkxXZ zSWxRzHYK?-6@SE)=3#`jQ=CT@3G1X~7W2&`hqiOHwda@Y1TkgB^4qzZrw;?RAFldE z+GTzt4_5IXm-)kIO0lP*?F;x%dKDh!dZd0Aj~E8co+K19eHwg4corx^&P381a;_2+ zq10=BaVqzB$0q^oLrJ6Q`!Ie3;iST4H^XFQ&mm~$Q;-IU>7hm`bUe| z&|hVLm|3q45d3~mnOgIP)nkrd<&t0Yd+*-aP<*t+S87O6F8Os#xrWzfnW;~YdW{L? zV)1h3W_%wa$}Ro9_u0R9F8^uJKbrV|d+8U$3@^rs3V7He<+S2dL+3!8@i&(cZr*oy zqQuiTRXa$Qx984(#a!h_cj@x?d z!K3lH9 z0__DE|9Mssx;alGc?OKLB|2GPG~}v{4+MZF)Y`Q%f4=n^aEQZ5N3eo<_=qL18Gqa{ zpA*-Sl?}v)*}i;v<%l)WxzW2Ma%A6lkd-<6#}AT$aY@$ARlx{XyiY5|*o>q1e--WC zO-~??0eShvmSEI7Tc6_@_jPw4KzJW{!vcePd{>++R-V6(5q&XQPy7PfyCE52{XLp6 zHY~zz1hB`ny1bEwy_h{(@C>nh!~$cWHUHv(YzBZb429*KLD4it)e*gCcBnw3gNTso z1TeGW;Gb>$Tb#%q&1zfSB$#IfPyt0EqM*Y7A2v!GTXH1Z?xGnC#nDW<&11UC%uri~ z(S^=KcH+3_8+PzYs_)OIl4r34Q8DQQAp0Ws`EO)c)8TLZ^-aFZ(}#_rMVn7N*xGST zpW(TQ6ljHFye?(lzsV4gNtav+WF(Np7O7~tyj$Cu~QQjC2ZiT7ju_`nzO??g^FDNfmM&=^Y3=$7GXN4TWmao!9rFan}j`6V7|;4lL(RJDd!0LKiG0#ks#G{k3*hc~ z9o;+KRRk@2{z!PY@JFRpAC5FR91q84oG(kr1~~I#i`_8?l0P84GRr0KC#S>6DGzWf z>R#h&F8=^hOae(}3h<1INB046+aSn2_EAZ{zYcpVS7}N(v_1+nX&@d-!Bq=xaPK1o zr&hQJ%B0*ef=WW8#AkVtKF_lMq9r^sc%8x7{I^@(YCW7d0B0@Vf0V%xc+n2BKsN(H zt8kEhdcnX_ei5>bEbaQH#c^l3moil}6D3Qg6Mk1SE3p5*po z{7q{2p=Z@Dd4P|VshWS`Rs57?mBf0yhTp`0TYs@z?s$6x4kHenuGyM#gS19#A8z0m zGXUgNR)0rxs$}so;s@f2L1Z2NXDU|}z^-1)Hxaist0qywk$gUj#rgnG^}9$1;H~;3 zvCygFawH6Ro8m`yBkX{Q)#3hbQ+KMnncKJoFtkochdMicT!GSXL+nZ+5c*gm%N;}^ zR~I@c6J+z1+X1?=Z-gnpc3jDF}^v5W3`Xkfc;^Jl(3m0_`?upMWKuXn zA<~leWzQuGmznaYYujWDzMA2aj!>9ivuY<9Pit{V@9!W18o>YuhSx7iHvR1u6&waa zMy4C-;#!=|cD$3tK5c!o%?cQN{eC^=A(iETVzLO)_glX$K;z7N?j3C|$Tx+5iURmB zPWG#vd2l6?c`WkdHHRAk+fygI(hIL1f096cguL4ZUs4%vCdg`Aeu2*|SHI6pmKU!U z5ddCBLPiOgc-UC+*TzM@_WnRr50%qo;gva@Z2dV4j#1!pG*C?>ueAR&1yqKt*jDYb z?=YX|p#d;s26v)PNp@eYW7)r5s$o^3W=HbD+1v|b0@>G~41 zIQTdM`ztmgt%~dVS+EJ!OXQBDDY7PpIiR}&xSYwRU3hghzkK)(2_X|W^8S|4yy!NN zt%w;;)!q$YI9;IgW}zFjhj)QYna)Ds)Iavsipp994}rgWjJ2z@BQJQLDz(O;^a>G6 zkoKs!EG-F31*-n(2u{VXKS$V_7E+c*aBZau^~&(~F!m!^Juk_luD5&FJgh5GCO$+z zL)eJDnzawF?Ho`zBXW{@U8Y5|P|&QDhXH$K z&?8|b%i)-Nk9Xi&%P)SWGQ@=_efj{XYyzhGp z7;<(6TbqsJ8Qwrmp?R5oW%A=_Ms_Jr=PcO}|2<7PhXLQKiwtX~rR)#WKRW8APL=Yo zm7Gfqt zgiS8v1^I2pOBduZxjz)&WmHdq{)W=Gdw`{^F+@IwVKIL00l=VktAUypU)oLWKc0zW zMulP397pHbiUA2cS*p$+ z+bJjP4Git)wEjx>acY39o&mhf`=>W+Z-FQhj1^JvKNvP`#bCh-=;dt%|GSTl1%_`& zKs$Y(qrvy8`SUvcPmGKB`gl3BVT;d=wlmgDfRXWCwf&6p?m|~~Zvt;}K}j1* z6n?^oS$C{0 z@)~+i4>yKsjNzGkOmTo30H!gQ!ZpK&H$z_AO?i{fuP69Ty7SEtk$0ZplR>opM^CJ= zBwGxm<$!h;kV?g8^nDgkT$W|8v7L~S4JOtc_C4PXp%8H;xK2x5;DYMc0!aauEVcjZ z-d5PwW5Xo_hySy4vg&QczzF@{4$J??PegPdF!}-aAQCf&feGmTn~9$Zyy?nB1O(AL zju^^su39Q5c<8`h$KQwUkudP2cmnEaGr+7A0oMOYbPr6UcVzB?%!|?6H5PE?n*TnK zU4M&C3ZDuSZg&IOI2Sm}jlXBXoXF&#gTN#P`RAiC@j39p!^~>n2l3C40J9AY7>WGz z-W>lw^8)zuD8NzqdmN(&K$L+u{=c6RXXxr0vj7|0gWIZcSm57dc{RBbnV0_m2jx&` AcmMzZ literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/reuse_fp32_param_b.png b/model/train/yoco_moe/sources/images/reuse_fp32_param_b.png new file mode 100644 index 0000000000000000000000000000000000000000..de069fb4ef711d410e869ed93698791df16d1574 GIT binary patch literal 46566 zcmd42bx>SQ^eq~aV8PuT0>KFyG&lqc?m7h5;K72sB_R+rSa5e42r@t*xD6g4_~64J zgSVRQ>b`$py{V#@Idjf*_wL=h*Is*d%v*IuoM)8J9zA-5qpT#S_2|)K#G^+j zu^4E;7291@bl@L~hnAwuqpDHrUEsr0TWK}vM~`Zgux>0-fzOz3N`@Yf9^v*r{GklE zmRLV})ab7)C#~aa23tlo(O#xQJC3RJoL>fBT;6dsY_XNl>~!dIC^Xx`2ZW9m%BD1T zCgnqQLvS(lQ;kA$bqtB@SOnWf4ZAyyfyLVO(HI#SuN`HiOl8t?dQYA!wSJ6d3n7Qa zL`UCc@MamVQ1$owSxQ(^c#E`Jj3rK8Qo}LZZPdC8m8aY;^MpIw&HKJCj~a*tqcr7y zcyx5(Qv+S0t=B*L85cEI>pe%CwFE{Z`cd73B{flb@q{yrk9MP9V_W3ZGuLhN#7ENfuj!7H zQ7`_9o^LvM+q~hMwJc8c>Xbwwq{4=6`aP$R`ay`5Zo9=IgK&pLE5)Z#Mx#-0lR3YR-8g#GoVqlGd+PX4zmPzs z6!}HY($7%$FwJ&l4cMv>x?k*<$YnU{j#2reH;r@&qnp1qS3fXrsqTfz$d|2M*3Us- zc@Qq>aKvDL(=L#~a#SUkBE>RwVG|0C{`B;}9Bf9p^jlVVCE>KKLv|A2*JSxT|_pNPa{D&5*0kLB= z2oHRG`j?P+*d9%r9}8|A^o-7>{b!pko`CBlMSa?+Vl%GZ&U`(qx9Q1Sr>6ugnE$3B zMpI5iMz%NACy1DUHZmC>a&yU3Y(uXv$RU27wyMkZ(xd@T+Shv8boXREDEvLj|2!wk z3Eww1X+Oh2DZ4<4-l6NU&;TY??#okNC=mhhVnO-$^P+nr+594YXDlMxInm7jO}r7% zqD&Ey@$)_(Cle7=Tl0W{7{kg5x8Fj5Nj`yUSOK4%TFL5fP(x4kVLaJ6wS;*{#@enh ztOxN{<%&fIBD37qCRpiV995=mv5Ugo8-2gZea~U|Wj13GW>`t<-3e17f!ka?PsH8# zJ`!=8w_-K#`<$%xa+tKiF;KeAT0BQdR-}IpL~XA%xYFU8wj}(ic`M%HN#WXgy|(GB zzZ7&!L3d={KYC#7ju>@3WW(QOL-WsP{&V#!mHd^DL}y>8Xn-|yPfst=@pGD(<73|4 zHMv5|G4*P>Bt5UU4OTdwnJvWV!eerIs`kk}ygXmqgL=-nng*S+2w!OJbT|0QwwQ{> zJ>TT`?#7@}*u;t1z9wP${B!qrUt1_%S;TdL(AwpY<+w&^6rsFI!Rw+!XqPLh!wf~( z#VYsrpl=HX&0hR_q%RdtiC-iS2~cevh=(wEQG%wH$qZ(yN!!#7!wAlYzV9a6iv-*j zy;q4*ngeEq1w}Z`=XH~j#0j@v&Aq2H)1KG2aP#aHqg0HeVY``pjzixUWcVrlPlGS| z>$S@*CHyc~0^t46+ZRfv{T??Rz4~aIds|x)$o<7h;@M(zwOqE9AD_pVrv4Yq479LG(Hxugy?DL#~ zR0%tLJhGG_~%?Wq-apA7M_h4^+kaM!jr-&*3?3 z1Y*J!J#PkPpe@^FH(D2Ra3o{k>e}^&{^??F)sf7V>dQebYsmUp?(wWM*BfhPly9n$ z)HvvC5Lw}did7P_u8tmPBu>rdme_CC41!K?eEZv{H~YXtV?zODWTAS=bPvv zSeIvgi(6tWN%vC6F5=`iL)v`x@j=u`gj##Xpm`H}f0a1ILZgu7PLl5AWXkq3ijA-_ zH;Gj;=q9>4WKy@d=g-+Dl?-DKk8#VM+YGY%&`tor9N$yeiB?3X)RBal^cR-bA z6NZ_Li*o7j*E_dLw;IqXbIr>meq7$6F;=}=M8|L#JU~|MdVFyaR(<3XaOxXF#ZDKcicwnbo*D>WQjCPf z`s$IlP35Ql3@bdmAU6-s{duA(Vd@a%d7zzcosbg@4fOr?LrX-`m4i zKk&2~vTN7aLjOu&5HN`F7Nf_0bqOy;^iBkJJQ+PSjYX>z{jRHVqWhwB1YQGpU{kQei%d`47vr5YMynyQUJILS_Y*uEgZ;8tK!W0vYgcF|UXe zEYBM8fbIiZhyk$1chd(4mEOG|V^-3k%fHDjFg^%K|FC4-niw4&?MV2!Ix!o@U>a+cbj!;rdTB86ih#xbznxVndQO`& z_a*+h!x#DT9H!dsE!M0Y*N9NTMa$oRub>^iBBz^!ah(MN-&K|w?ZgLBI1~k5CpysZ z^^a1v){*^Os`Q_zH8ka1YM|y~7w&GoP_`{w{&q&jYkRHp`~ps+%8MM%&1GI)+Gk{(eM=NmI#tN*b@KZzYW-?W$pS$Y+t6FB0g{r% z&KH#4_C-W9B_13CyVFLSZGA>+VteBV@6`U5M)d@Du=z%PR^9LN- z$Y*Mc`7)n{GB_s8ixjxz8mFCB^Q(szcJvK(DTTCKZ<@&%_w-8a6Ty3Y=g$8`#J zjTljGJg?;Q9unC6Y!ryz%+v3+Ig`s%N%7IC^(;^3Kq$S-j*@+AEP9=U{kz+t>W+|? z*ld!0WJfCHcfBhs-XT5eMVDYmqnE73k6c_F)NYAXDc_P-mj);}ejR;^RKN9q-BcxN z?cew4Nd507b528j8RNU8_2gCo-R0-CsUAdy_GXXjaLzq#3y#r@WS}!?P`p~(xm-J?yGkDQIs;kmws9$_MA0##798}ES zs0F_9ao86dWRDZ>0~?Wpc1LRN2}hFLlPHoC@bf_TKEN1_+QG!2jw$W)>+o4pyf3aR zW>Ar~^#&#=ayb z-L%zd^}nn-Tch=#Ck8fm&#pr>!`0Kiu0zCA=JNEE8o%Shkza{|ot$-=m7Tm=1mhP- zqgI3nK6zQkYjc-jJ=w1l73k+m0fSi3keB$~lK!7ZV|knJGq>J+Afj%1R<~A8()@4o zcy4Ar=re0nIvCK(UCzXG>%;aoyR`k~qAOcRkpI0G2WEEv~qMg_5`zO|~k7v_cD8(<@1`D|+Lm94GAZ`z18C0Cbl_NBwz z_ll%$a74rz>d4PK?ujDm^8KrLvq4Sd3ZFVTFN(V!O?d5Q}?Pi<PT4G^ z>JrW03z=$GF@p=-87r(trn`SvFpYya4Vqcnx_pYfOsrk#mZSwSBs~1j0dd~CoM0mM z5>dmz3Z-N)NY_XnJ!Ew)#Jo{kFCY{ghcOW)-xVeQJ$}EHB-euHoFjLtYD>LoI=8L+ z?mHB>^eD_BUOpi77KP7c&&3sZV!AqjbDA3D6&5&6P8T~sKEMi9*$8->kj+`T+D8Rr zBA_I`rlUKM|9O;&_gIGM%f&qp<6kjJ|3K-M)1aTtW5=g=q=8aG3F!W5dRE8AjPom7 zWpnrH_DoQfu8YpC@x{rfzYq=MOCoZqLO^R{IX3xYHnAtbBF-z?w8(YJB?plV2bX`c zsKLX-k@fY3bp@Sd;FCVj9^blWe0;K5UhOOY7US8~y`zG)(xVQ^;QkAGlHv_*VQ8H2 zp>_L;j4KlT#6){LL1o#3C*sM84ETuu)2mVfL?M}@S134u3Jh62Q<k9ewIG}OPo-vf56W3yO zj3i*j`hO~qY(u1oVG>m^$hu&$RdxjHQ+@92?q2=-;DfV3H{xDc|DRYzwi`3^-U|8s z$LA8~4|Rqok*UIF!yVMrVq>r^SyUy@@-H*jWCUcvz=NP03yq8YlVb zz%kx4P^Guf-Lf!uQBHh9w?W0uo&!qffCGLvW3kc_?%PdzA;?FnnK~UBXpQe3zyG)$ zXD$`%OV5hcuME@U8`8#1+VIaF&J*uWpLJZpAET2(^TU=uJdGA42%@X;cF<@zTEa2r z?szMZq?`?yt>Sz(E6g%8hu3iu&we{jxS+*MXXY7VJ>liTqF%SFPNX1-K$cQlLP;lPJf2m27mXY2*A>N zJ_mlA*akK4V}(a{^3P8ZBh_%iuzSbz{%en%H_vAQlwwO5Vg8_MIk+_Y!qbF7V_V7Sp4 zMd?>FnbXF(JXZ<2(%E3*4MZFb8GuiwF%ed#b(PJQI$Z>+=+vY~BwbLm(!wAj$C}^< z*KFuMZ8QmbE=L-heB|0r*1W;Cr=q*Q&iI8Z#uqLBzhvS;aN3+hMPC0(j?gEPe^hD= z?siGH)XW~c?xNJ%i9?U<;{`y&aZ;DDN^8W9w?|)r=;guByvitZ@vBLRzeHeCOn_8F z3Fk1`ZY?^nPC_lYrxx=QPbu0h-}>rd4O+84Rkzt)sSK#_=q)v=-n*25s7NV_6m;Ye zBi??0gq0ciL&V9(z(#i%`VfoMv#V3Iaz!uc=3inby~aUMs^8{~L`N&Z-52yHTeK(7 zJQma-2OoK#2KvRy?%P-n?l_+^eYi-h>WAwh*Y3@W5&9g^VA--0tiI6=4rcQv9EB&s zjNgWN62GdDBw6IXzcVBE{>FJthN4c?ZFX(iloerlgrnqrNj2$1_tCSwYcLou^#Jsh zQW5s~z?ZRyFWKpwj~vGU3k(E{2&3*hDlK(`Xt;+N{S^>FR=xHTfCzH;kyT4nlGHU; z-;&o33tl+oQsz|7X&=Aw3~|awD#kc#RlJ+bGkJjzg6+^;JcL>KOeQPw{_0TW&$JHc z$XG71p1C>w?LBBMXz&|9t5Jg;immOoxp`FY8T*e)DCV|q+@JSI@cTSweDo|K%Qa?W zPr8?=E*I55e;BCBooopqX9B}uNJVBEC`gX*ECmYurX}Xj2 zUhXsye^C-Wl8A@BNm&-v$^^h~qd)6I1%7|%4$QkkvPru9o6Jp(G z@%~Zd@>lvzZaQ~!`ONepy&C;l>67dCJN%OioC&)&2iJJ_pb97M{UD_`&xp-KB;WNX~)x=MW4#UMSNs*@2~ zD&=`!qb(k+&ClzYa_LQiYjkOg!40UCOvNLeAGo-9|7FQqJJE?RRIvW<6 zO~}RjG2$WewHEbXFYGp2h&0q~B|+)zBYs;&b=wcnQj2=BP0VGz@<@K6{rvA>%9Ez1 zrpwEdg41fvwO_Rn>A|O%>LRM$cg1}9fqnkJb6;YD&^%wC(K5LmU!n;NX%jBnI``=B znqDIX<1=e??|va8yMVRze3BAxO#WxYF=3?vX1HnQ!SilGh-8`j+(Gg+&LiP z`Pn7TSfpN)_NJqh=3I6!B~HF1@cTClo)x=uJy=Kx*tcGaJW>Gkoc>mvKx_INvJ4pR zb=s>g64z*4E8D+cFO6eBnKK!&8wKPWI93%xIY3sR_0&zU-ypkG1pRSJz^*!ZGxgJ3 zzT)o}<@5}x0{uW#yCOdd>nCeLo9mC`BpehB`@}&s&(PeVMBoIx?*C zdvn|K-$UCGn#1`uY`J)71^e{hE?==PvKIDr8TJdktFfBUsxYj7J~6j5QLx#)^7}XT za+_E0a2ii)aKfk86}!e%=L_f3XGTR6rXB9`QKuMqzbW}C=Oj_+G)8-18{4(D_g3P1 zox_U0W_<>JHA#IFB-%Aw8NSPx?@0@2gc#_(>-1Tn?U(e+(U?m0&+&iF(te!4S7i1e zUDMNDEYT8!#*uA>nOULhiW*h^vgo2xU_!*?(!6-wf-@-=aCx5jS_eV`D@q#hkyLEU z`<&uZhw)-yU8IaVRkV9$lVdWLPrY6A(XUyWujoi?b%zJcw_^B=f>9~!IJ|R)t#FyG zX>C>oQEE<4!sAIh;P`G;Dh+i!|8C(4Rg* zSip>5qA!kiK#CprYmSfkU^LB-xP`wcp7)wr?(JOp!nqzx;^A2w-e@1+T|ZU%`unFg zN%3-$KYJq7(4>Y`0qnyFiL74_3?le6aE|$zcwzcmZ2jt!UH_Oa)F^Z%5I@nihs$tS z_8uZ1fCZBgvspQ^UFE(s9s%(Ux6k9Y%b}LjeLB>9q#uTxdRz8C6hc3cL}uGCM!=hq z!`Y`mo|TzD7LkF4IPGHMDQwJUTnU4yPu*1y9j%nuQRzPH7#GkB(o$+}8P^N3!u^-H zyJ?ani-jQ;!Q<#JNU2|oW|E8_P19^89HhMWpq>f)y{TLGRG4pzjq8ik#>f#1_QHopSpZiuU9dtnHxbIEY+D~tHT zo{pNdCjCs;KKs%@1l#W^0Y&uD!HS%hmoXF$VqdQ|iE52iMG#XJ@f4QXWsXmJ)<}&% z`=vMY@h+WGNvwerJ8D|)-?hHw7GNk|DOg%a4CQ{hytjmS6YeC>M1&us2P zXOEr`x;E&cL^>yKN|OVkBPVCewWUkr%}s;0ISY2O9hplt5+D!dXk73HHa(1an?U*v zhhnrzft~B8*QRd%`7q%dqNa-ZF#Vvl=y;6qvl{}tsU|GFT)+HNq8pJgF#Ko-POy88 z8(}{ssnibp?29{a;`3dvhK%J-9j-?kp?QYjJp_~fx`^!xGew9!oa^c2M z_j{Ug57MMK4mOqBhX6o0HZLHn(A%IXT=-cMG7e3%B?-3Ojf$fwk;njYnC(}A`1jsn z7LzT_B`)K+jCZCwkq+-lz52{{>q z0~ntD$ZLgz5L6TFe#T}df1nl|n7HAy!hXc&yVM#sP~G_$rP;z(imKdZq=&Rw-%1Tb z-PJJK@J#5B8t?Juggs}v6kfgUSZHtJ#<4D6gvLgp?J*`z9DFbGw5r;if94Fg>9AabQsBvC=R z$h(GYb3N?uFb~5f8eT1pdk4cCo>})GKfN!W#LhE(CzO0o?DaaX&okfujs-}AAT1GC z)ZJ@>pU4rJ4@hlB=s@j=15IsOKlDlI0C#BTGWYp_K{gYD(XA-GOXcoe({#Dn-Svnz zJAj;TPAk~#<7tvO`h8n5LiYaU>FlLEwb>sWxtTUU2ufmJMvcavg$=r0Nr#w^4r_b_ zoT;O)D~9;8P-}aNch|4@0To?eq+M{)63wlK0Qo<={hOp|$*Qgp0L5yvLWgB5FOgr9 z%EK;v!iGDKM06L51PRAiJ%z{(^JmV;S*5=B8GX*@97~NTTS2+2X!lv*6 zYQE_lq09;;(p`9l&@4*WmM0Wn-n^Ipb0sKzwR%U?2WI%S4Y}B z(1H5G@gFWVNgB!wTFVx6tAL$2fQ#!SunkRsY&$7^;NiYbn*(fF8_Rh)R`<&G<~z=& z<|uHp5StryfYB{*I-~kKr#1onAiFX;(p97`Amqk1crSm(cdv4N$6uy| z3btVk#Jjz(aH~pyeo`tW0xkeuyaedGz<@WD4@_JDM`yR$^0j^v)DbfbTvvEU4x@m9 zChThFo}>A5k%fT~9LnhaT$cZL=24Ug){aXWJh-?&{3JVo*3+u0pfcr6xcY4nBQ)2p zyiLuj9OP_a%@TS$Y9q?u{~~QVOqz}W=4BIa%s>?uc=NyUqQ#Ae?597snv-d836J#G z{mNrL^;8hA7~zJ%zF;W+BUZQfLp_qcsLSVriauR`mf6lgkDa4>;AUBw7#zom>ME-N zc6(A?$)onHaF@@_QyM0oFgZ#tOocJO*B*PI50LpR=f=HnuHt7ovoR5-Xwj_m4U@@S zRnPBGi2PpZkh$6wrF7_`)cP37dS&+MsgyJ69+)JLR8)w8#PhMW8h%%cfnaY*zGbA! z{1Fb`tj}0y4h?L2ZrVqc;9Dz7Th%JcdR@QxXud=(cJZOdqj8vl|(80C(C>i2@%3@TL3-H_vXe{`z*yftts7j6|_^!>X-po|JE zUC(92qWT4w*u_Vuo1nq17+&Fv4jYG%a$k*k48x+?WUkg9ZE7+O-7JG>++t3p# zZ{M(->vy*L(h!NYp7E3cH5H=(`FL5ZL0BVSzcGC|RO<{R?#p65Bj#Wm&k`nTZtqw$Y2)56nMHlV3mVTe^@u zq)pD~lrN_UFwnbC5_*;Ha=tB*Tey z{eJmqHvNQ%*Ws^H>V8mhnNDSF_Y?FIlM68x=kn2nTBw^|MOxYJKA<$(PNZ`)ee_aJAdi$F1@qspu`|NWKg%qy<*@&i6^Fnw2L zn_7>G7|8t@k6FiC$$}Wdu3FFlBOBY6yrQ!Hf2teJh7=~nGB3-QZvy_6H>24o!@!3? z{Th&g<=bH++Tyx%fC6fCJ|A?S`$2!i$QaviQQiLkubUTEF%fGmLB3pwHc8!;;&6-q z)&JoA=-Up2p{57iWfOeg-s*@3o6Xi<(O!*mT%7RP`>+WPe)3T0B_NPT0!58O^f^$J z+bP`8q~nSd<2>Z7U*4d_iEJh4q`OE~@%V2&-eK-?Qa#@4`V~ahpu-I%i>K>RXh*Hj ztqiA0C)LqS&lei@C9y_QTos@0Do{{EdM6zntx4Z0Dprl%fU<8;P>lOI7Nh>Tbj#fm zjiMe8Bs2n7BJlJT+k{CnF-jJaQLh1&s!h5n)xQ~-={)-c^+x*upPcEac@c= zvuifGWC5Vq4fI&E;T{{&?aEtXN%HrF32_tdfV*!D4y9*(NVbYfI+YWaGjF5UMJ2_vH{ zZ>{ok;))ppCTC|}cL*uAvjKKxWV|er{c6I0)81qiOc*FCB-4DN!q*T)N?g7w-#`91erY7?q+IGD`b06l!!|bHz zI13g^M?csM!Av;;`reFwd-3b?&n*7L&!j`aly%MIVcPox^okQ-MZA2)_GZf>z)7nIQD{YJyZS^l6QRQH$PfvJb5_#dFFTjsK}Cq4yha@hgeFBSizgdbvvy}N-c_QNC(l_S|Erj z_8s5r?L5TJxcj3eotrnxIRE3%M?7N+jGO|*YXtXFFdEnq79Abk)9~=P83L<75ys7X zyojz<{ebgZ+*$?h9i*_|;?K96p=@gm7?_{0u@&unpECB!t|1RNZaUHFIb|I1Lz0s% zT#+angt%QLp=7F=8PY z-GAix-i>6V-CJH?lZhJ9V@|uWF-*r@Kky5=v#?4^b|~5V&0~9w!eixDD*Ar~9D6aS zVmN9ya*8#8aP#~?jPT1oq~Ip7V>2+|8W!=hBnkD-{hw+=j#zE6JEtM)|1D4{A8rgJ z4H*ME5jYR1X3i8F5w4agl&AXbWlQjV=X*?GE!~!r!@S7CZlzqdN3$PDD(jaNb?=5F z$#~4z=I{v!Cc2`#&3yQv_q~ZUVvz#&lY_E>IwYJ1_|@iJdQJ2|s0_yW7dGz|vqin0 z?M@ZnGfXS$o~%RG4h|}vl!3PI3DNwQfodsUTfo&zRW&u;$mRqySuv~MZ$;nKmK=vEUjcfG(gIfVV~UVp6|`bzJC3&4jl-} zZ}wm?*zx8t*`~xK3pU~&V;8IT)%(A1DONVhgq#=X1q5{FAEE|}d`e2nX_%jokkIK8 zJ}qsPiELYF_Z{8w)(90bw~4_!JN@Q`X4f$(H(*fAoNR0urfr_mDU9-O3lwlnx`Iqw zBRx^W0tvS_Ho`u!sFY#vpHu=2-{8d-pqgRZO~dGf^1+ZO@*_8w>V85)vV z^?F^U=uN6lBs_ffPj^@<80dw6Y#9GqYyvih%F#y@Ui0B;LcXm(k4 zgF*SOzN9$LRoNH1taQYfwtMG+1`;2=<(F#~_;X#?9r>E!k{{1J{ zt}-Fa(srMt03~_eGbzMz;0iC&bzU?ZJ1P6Tw?X|PF>dRl&Rz6TXzrb%6oUhi& zmony{SUDhVh9JtF9&m|w71Lcc>cVd7-Lu^deEx z`Fh*U{X()JmR4U#sMMvZS)zD7syXe4(o?XItf#*y(1;mO(OFD)(Ayc;IH3gNSl zlGX*R;#uUg8>qElqlVx}ozC?X-OCp98EQ5MzM7sXpPQ&NboMeX=lCBSx4)ts3^&PJSub#n8k_dr>caJQfj@aUUqb@tr~v7^;4{U&Bc#z_ED$peqa zb>E>s7%KyK^v*Nf^g6VnprD`*k_2#hpA?g5^GsUbOEnBy^t&b-{okIgcvTh$aj-7 zexNWe*Vf?~N6Pirx$uPuK;i3EF#x)2exQ#oRaGK&uKc90Dt_J(ZLfK2dbO8mNkQX| z1J{^^M7Von)!Nskm-SZFt-AgZj&d_eCx>K3-nEm}v(F4ZbfB zv=SWs>vL;n2Npc$Y1kUg#jnTb4fb_Xfm9w&<*kFKPd)k5OZHiKsRL2{IuT4GCMd1A zf5iD0!BzPxR~c|cJ{Sf4(?;v(mFfXCb~OK39($JnAXq}^8p_|tH~M_xtS`dBfuku^Om4 zjd8%B-$a-2uim@~c$l@zRL^;PAf8KcjanReW1`crT0lD_TD(_Kna~kEO$C!*vT_#{ zQjNOJZaRi-@htaz#hk>wX^>=i12u+@f3<^jn#hXGYJ4D@+Un%OHrU>|tudzcNYLL< z=X;vjRG`r3(;iPmRcv_-S8EBnV(!O-g*Kw|tW5%y!DS{yitXpM+a`hZg<*Wx#JG0= zCzl_p5j!5J63NPcUh8N$WUi47ICLETy>sxhCBsb0#Ltr(w+tuzfFJ`X)v!>|a=l6N z%T~4If8n*H9Y_vSHYOzecOvC0z=D)x)VGu3_BJH2x*foz;~V#ryhgX>({GQM{_LUp z>zjxlhEse#sM(*U$ue+F>O>5?O`LZZ(~Rd{2|1EGB~ zNg~2S6%muj-QvAQ{3CJqNmRD2+Z9!gJh-2JiH``a6ikt0fI^O!m42P@?%?`#-=UrL;Z@7>{ZH zA*6|1)DgWZXBDqKB(UZz91fXyn2NUUl><0RJdM!XpuQFRJT8S%R#O|fR=^&%JD10Huu$k^qHuk`%fG~$~@Cw zu}gFmy^u@;RVJANYzzOdKRzr zYWdM;e+o-aeOd8OKi2Kmqro~__f}$^Rmw?uAm6^6JI05Mb&|y$&0n*Mah%qBA$G=5 z=+BO!eoL7hQT>C+jHe(+EU|K5A9UtImeqB+pIl)EaL}m7^@_Fj*-TA`V$6s4*LGkX zV7Y2evOXR#_5SI8ZbU5_c#>wGmHGwjulmJ(th^_{aCDQFd$6^>`Dj-Lv$vIzX8=X z2!(YKQ2SD02|6rNNps&NQ`{?fXebrqVh`Bxfc4G}F}S&Q644~72Cj;t7?$}y_4kV= z4KX!yKd%OKRB!fFvLy@tk%#sj(@T~>I$ZfLB{t1}Ao(Bww^#7M?HT^#_I%O(y*M7W z06;EBw;JnOqu>0n#Qt+r-v51XE9C$0qUX^H%|H^}08sq;yQW73Kq4;K3YZE&J3V3O zcsfnWO3SSt=u|>Z9~&DR75>Q<0qg18p~dbckd$JZw}cvJScl( z$#@c7mfL>)hZ&C7nBG<>(f&T<>YePStjK$*3S*S5Twg~V8vyvy$-1WgQ=X-vyYU06 zyDopxyc#RSUqJ~JEHi&7AYJO>#$Lp;+mGEt;>d;?0oW&3$njs&75(Yae`HNQ^T+@4 zA~wwbND;<2kN>-+fn^eJ+k~h4%(3J>${)W#JOx$bP@r6c5#WcZB1{vR`#u|4+PJgY zOgBs8|50i4Wh@tQvx`~Jl+WD|^7YR%k91x?c9#GQ#hvW|gH{(-@y8@gmnThFPLTA` z#rG)Er!2~353Ws+(j@o6>erBQ%cN-ZTBtz3d8d7_5-#}U>eCVDu|Ec9G1%=`1nG!J3kiUzM^ z9nBJL&`;akqv*GijHUna3$f@@xpQuYwNK;pT7PaF2&&RZ5N^!Ub)Ojz@J@I?-EtLY zSZ~)VB-5*2jc%-0v>Jpb_}={G6eX*Ex^K9DVkjC-7l21JPzflN&{=85n~y}|$z788 zH99u&?wbQ6hC_GBU%hAkN*DayESv;jRe{6G2w(kpgBWU%jx z6CDsGxtw7+&gQS0ErdnpBGJDhSkGr&_QWhWAp^daP()I(&m}(IB6g~Xve^6EZKnmM%eP&j zCr46mS&qGPy_AB+`k@0FE{s!j*Fc>LT>l7r5b2+jW(?m|kDjHXG<<3V=`i(=jbC$x zwAgbOEOWAc>H^K0JUzY0$Cj0Z zN&uL%qp*&e;60-ygx-(oqof+^6EnXWymSAsh~Bom2we>r_*(SW7AY2G z!bvL^de@Gf!&y#Tnfd>ZbB}VAaMRpqwAYvHnbN5o?kZj!a35W zlKxw`0&Fi|K3-c}JB^K2UH6c>Bj!sa&;(I-^WE}L;vp<8Ey?+9us>WJCUgcO>kK%# zzBV=rUc-08iZycC?;it^X~(?i#Nr4@;2C5i>6w|sAy&5`M)fv0iPW$1Z?J&0BtJM+ zFBDu^S$RrJV800D=Wg&F%}oh_X1LaWIB6sD&&Hns2KoQ;_NLK-?rM|ICLuePX*themF#;UJ;F6uGqQFsEWQIJ{ zVf4h(CX8EN3ptUmP_twtM79s`!wl*$ut>Q=z@}1gauMofDiZ0YI@7i5;}?;_V-d5m ztRS5W(Lv1?p&w(JiIi7o7Qa^pj@uehaE#9f4t1MKY$b5028{DARORWPVc|UC@p;|$ zALGUx%mVFt1*HD(6v*4p*$=Z#;>#k23*KL`r`K$YE}70TC>l}JHKa#dOnCdRwkhR^ zOT_BzhEDNx{Xv98kiT>yzxK3ybOyIiVQ*v@(s7JdU;fZLYRMldFcS75`#R5f>U8i+ zJ2lz@w%krc84Z486ApuXVISjk(tVO z>NQdgk9CKLA${NrcmE@{FD0u~Pcj?(>dGZC+UjxD%F^UcNI~wKzj3F3JH`;|X&=MN z1uke?g8NWp(eUIh5=ShXt?{zvMr510X?X39@#9T9Ny0M!%790oOqBbfS`X=xK9Q^8 z$z~$!hqZ(ui)Sqj2HFO*ni>0lo5VzT>yeAtleDFBLI%&<~W_MkI<+rBr;K&QzS@R~WRev>OX6_Ji52^ps8Zk@>GYzV8q1zy9;Cfo%?r zqj^Mu<#RlncAmHq^mp&5=o9 zxR#3@-^e8kg%=DP2l5YWPvE=S;_ch9yF$(rP0q#!Y6cN$c&-*hKM^~UqZDIRqCE~j ztvhaEF~71aYmV;yAIXVE{6j^|!7rdhDL?+}V0d?q(!hUV;+inFIFJNn1n^aW&!+=q1pD7HBDmSV-rLx-Kvfp~ly2@8FzNu60=JcMk+bA? zt7X@(dVo?PhwNcN!_k2vrzDzxqsr6fFs_sUQ`wyZ=e!gA;u8xV)W6`7dH zeq{+#+cXjY8nl%ajMA-Xh0_DzWEdA3@-G$S%&IxE`P+kY-O&St{{>knSZefODZ!W) zWeCX^9yo9(s9m%6TYEF~9?Lg~mPv8J@}em16e*0yNqnJbb&=|GimbI4g2vJN)=*4h z^E^3QkWaBu(}oZjr8*Po0UJEJ5hJ87gSUp8>zXSP&QIweG!C36iE|dC@Ap-GNAAh} zY~xPwd+{#ctU`?x{q=$tnT_rl!-&>N$M_EU1LT;z-uc$xT?BmOJ7Dj%8J6Ar zbdNlRQ~$tm|A@2=-u%R7?!E;@UGi8?u-p(@A8xi4@mL_y=V`al%Kha_+}`^A1d#8r zv6`Ebp)C(HCKTX4_1FA`5D0^XlYAO0e-?Ja_>pdUx|l4#;eB5xs#b-=N8&h#*TJ67 z5Q6`QthbD+atqgnMG=q`=@!^1-JQ}1NVjxJcc*lhbazN2(%ncmi;(W_j&GuSzvrCs z@ryCmde$@Z?(4eU6!+h#fO(8v5EyyeHTAdV=zYww{^Oj0{K%hywbYTIiR^Wjj1Hd6?;VyK0NUyU@ z==an_pl{ViqQm`*OyXO|oXWB0;!R9m=A_FZ81GZQEfGX}x;bMY%v*nm0(<+~40{nK zBz2zNTIP#c{c?#qZBXlPABoYHe#yp*yRR>+H5g0>iOmvNo`p#93U9^7vwKdxJQbno zh?aS~bV@KlU>;GTyzpW2~IJ&hz1cP&uDOJcWEQi#;?0PdYULT|Y75r{m15xOQ5 zCraS;hO_zU6nztGlAs=6O0ge=aUBh?CBG(@~vo-XxA|Lmr_}U->xQ9~ z<6Fyrr@0zOi)E=vGcC;cG3GkB(_i$;q&oeAbX>8D6^TlDhs`m200(vfO=gn@ZG z0uO1FOxb9Wt@#Qb&NCvvT|x#&6m(|7z4giGE_k?tmJzfm82M!1%Dx9oRGJWI@#AudgT#0-AKn>ve)-$dSx7+DA1^o=skueQ^HA zWc86?rM~}U?9seY;J>-g9XooDs0IqMgv~2iyHx=2l4=;?&l&nkr`4QcJe~(KH@P4n z{p!`lQzGVcV{mz&z}AK3p2qX8zmh>1ny0JyAa%6a-SsT37M*Z0mWo91XlRwxTi}7| zrstz+o2Eo=tajPpm{R$)t}zS5H2X;~Zm|gq31ujkX`Xyd11X4s6b|`j$yk80q?pK; zh?iQ(1GpQwSJ#hFDmPpYCgq~~?9W>~1Jcupnqi^8bDyNiAYQ0}w8Z`&=lveWIInZ1 zDU(G(SIp-v@^H?b7D%t0!b0fzNt>JSj+=@%@bE#gLh#yXWv^gP5ZFG{u4G z{*}co%bh`J^eim*#BQRB`BJH`ac@=~(I=5EPB(`|fnTCn9c!pVKmW+{q&GehLEH-i1EgCnEaxi+ClS1m-?g9(HqrwCH}XP$rRcexD4{46m5lX*hedTtr3v zej_0z`)5yE#6)L6JgtDyS5$E{OZa%H?FAwculo889-D>se62O*v~Vo5)z5$jB{rQZ{S68@`2;;rg6a5p?HAhGK%%^X zigHc8!+zmj*qbNW`tPUqj}PuDDk=qt`__s*;RK%bwHRc`U|%`I+a6HhQAEpIT26-2 zQE^Kfqp*d8iH}cx%F^lX+yq}oCDg7a<-#Du&^*2v}K_|9}-Y`#ZM# zbX3eWs2*z#*<3pgh3|4W1hbAk8-mKYW(et*WlQ%OsciLdPOff3k=(EOx~?u*WHJSu ze_}Vor9bc5IeOOUz8OR>tT%ecH5FPdFmns`tl?3!PJn{(xz}QIdw)N|!oq?{ z?8Fos`^|fDXcz7KP$;A6Oj4KgbnhPG9kMg+bW?1ZDw4h!D!;LF*p=DaP8*zSF?{>2 z2&Mb(m5IrQt2eK+)E}}h9m{mD&l0gGG8z0!_%hYS&n)Y#mY*XcA}aV_K@`h0d9EIp zeelrv{w|H??enxc9dCO@>}2c?NbfV_!OHC0UL>b;j@O$SaoG;X_ysdR_>kwFO5A+m zALuGfXJiy86;KhvgByh}tndoaa%6hlccy~nhM(s+EFaZcG#aaQLHLdWN?3^x$?aJ%4WG3oI$sc^I}2Q zDOpY|m6hPf+QHXkua;(VwV1bMwnj-G&C-LQ5D}Pr>ZTWCO;;|~uMLV*_=rL<>AF`I z3;k4Tt?<%3kmN3?b(_+;T$F!3clSA??;n5Nljb2`dOaHZi8P8Kl@}4|T(_WL*@vEEjq7&4=q#z(vK@$wu*~GM~tyubuS~67IFbcL5~d(Mno!TjK@h1|~fNtMCQMI>XE{J@3mxb_t`{Q*M^LcZI3Id&eQzTSHEMutELWH`z80* zeDrPGb0&EoJr zM{9F+_e-taHDjq}6J^;ho8>CR{~Clhgzg#uy20y2#eRp^m%!9X`X1CwZ8vGB+*LnsSw_lSJQ z!vS;GmyKjygbz)iFM+pmf|T9qgfVY;*L1?)(R z4YG4N-EU|3p;~Hz#CBa+;?ES+Tn_?2k-Hv(%%j>Sus8DJ3g$e>9rhPdzI^%O%)znm z*B>PaX`Y8ddq1mp6N;0NDe|d0J@Cb5nU46T94OOa*?|-bIem@q(|&wda4!B_k58ps z(oTs{W!6TH8p_nc=<|~LRH40K2AOj1W0l*s3BJ9nf5AuI1JTXQuiq&c_51!I;<9C8 zO6a0AJ}Ar0&CQ~wUc1z*VPInNjwTzH-#xk3ZlRGO<7{d$|F6fyN>mEK7(trbpS zvNZ~A@-JG+?-{Av+505VdBzk`(W=+Opipdzcz{^=+4lKp>y)f;xiY>pUyCj|<94nP zXRK(SFiv8toLTZu_e4gu_D(TjJZGG@xoNLv!)AO6W)5WA7UV*ehu}YrT8_vd5;DX2 z=uWgk-H%p^qMSt**09|4sFf8!ZSiSS9yn9J8=H4b3igf@p=V|l-lP~hIsEXfS%tfn z7V#5xXSJd5hS^Lci>XqVO7NpZP4@QqJP93F;SW;WNPVdqfu%72Vq2{w3&s zFkg3kvHy7;ckJbV3*OvP7syItm(N1)T~WcD+kq`2XN1u<%_s;1Dxcu4PuF8xwtrDY zz+I87#nZ~ed8m~;%)5J6s0pL!fRp4$rrODWUL z@I``}ejx7;f=cQ&(Z)fYBpM>j^m{}hCjb+sE5;2s`#u)@!HnebnxTJU;O=pbNJF<_ z;FG6`sjhD5&+pVD*7FPL-c5<$8BCBrzfgQXbn5I*c#q2_GHw_)>eB1h7{0sbl9^6l zZw!TvgG(yiMQi1{8xf#PyWBMFT3r4;#BODAoKX)aRP85f01Cm^%4oC{-Pp=g59-u0 zBo4+rQBHR>=Biax*PBarcJ>UQES>JIDr{bXdaggJx(sQ#)}55HG8JDB zBubKKxLq73%ns}x_bc*mCv&G%vb@P~_i;E;JdT9uZ@92tB4DB`ip|poa&!D3&dz;F zZG+zW%@*Xn#oTIL-Uu*rXRK|&1vu+V&Tx+a0hj`{&9-pBQ za(4n=$D^Ym#ZWU@c*8L}ypZ`V7E2;R-k7P!mK=m;G5h;j4)t0mSVzmn&Oum9V!j6b zHAlDO_Ozzzj^&Mw+_;+YTI`{FxZ!THFqcaa5#io+T zU%hY}0N`HR!tJ_X{p=)x+#{fX0Npm)%*eDw`M7I^y#QnH@KA{5nCqj~(4dk*Yne~e zqAj9@5**As%#-h|7`QAVe)AkU#)i+ML%S-@=J6UEYoWV3-AS`0$>n~+*{t)UhV}V)+x3c*<=aX;MCOsozn{>!b7a4|KjEWz$kN(`w}kt)N5}6eQKRWW*)P zwOvGzZPQE&md~-FQK>Qws5YPLE7G`SEzr!OXH-k*Sq;1|%dXq3+AmOos6!tR<)eLU z3Hdm*qH=g6J9E;~g?v#s#C&)=@QNkN+kLXl>9lAt_^6|Pc~B?i9ZhKHCv94X)*QO- zq(tm&^m3e6vp2*vt`7==^B;@nM>;#twA6m)e(Xt# zCf~*FAiJ@VN6$Q>%*|UxIHrX6wT4Jjue;o&d_WsnUQaN=kPdFHt7xP_&cPXbIJE`t zWTQL=*0f(zHHxef;w2+B$$-^XbG+t$GKg?(2m6~c=~qjnnqIGs5J9VlwF!l|N{&}^ z)T_|W;c)k033p@8kSJnA#op&T}El{W@XKBQ16L!l2u zW5nsYBE;;5o?3dkD_teDHCOMOxSd$_Fuea#i_iY`Mc-_r8KQXN=iVL~(_b4ZRj!_A zoX&N%tuyQ72Q9KGpXXNhT+R+A2%B8&1-AE8*S=8v7*It0Z0Xq+FR?2_*-!^}VOTJ( z$Fbn%;%%NhrMKEWn824y2fxnflX#`tWqKL)_{n^}78P%*q-*0v&vB6Q(0dXB;D$Z< zOf2No&m8Fpd`=%;u-oimBXe4Qh@Z{X)}a+>@}rQnRqPt5YOub$dWFwr1Lw+S)?R(z zz)&h^o`5HwFEM=u#I<-2^6uxmrWn|_d` z>^DF^Ass+MQ8|-AVRRT-+E1HdAj)s9JFwwAlNW>x~2N*~G4JpG-2B z0qdY<0rS?m07$tLA!cFYS9t}RcJJKq+6#W}xARn9fcrvej#=d0aYK@tN(O(+gHZxi zJVc2}pYk`1yoiIqyX};V?zjSo4(G!6B(3Z<4>MkPN+0+83R7!?L;SUm$E(i!VWAlL z9WkCHk!=FawkMBF`+in#lv%HX=&MXjQcmJ+i?FX zCxT|ec!uw;g8K18+Il)LA&rXVXSEF~XfT(q(-ro|3KNtXFi0>o9m&(v^aYv}3Al;G zwo~}zniVe%_c#zP9Uu87cI^$2)ExE#c$6EDZdN!I`soN{t3L$ogq3UmmhFlcZ)tA5 z5w5hhdc>S4l|e?@RTin1%SiMDreB@m$J6;{ukr*ap79!5qJJP@^hrgi4}0GdA+h#?}vW#1tHf}_T~4fpznUQ zMLE!x4DqICLCu_Hez9}@**oHqf$kx*-);^4UMyX#*XO>ut~dUshv_IGshj;&3PvJ9 zxCAXCI=+H<412RXp`=fnPUwpJtNrii^z^JEag1S0H^@#NPMGk4OSJp{B=q^*Bm1@z z+)H4IRHxKMGP$yjw&+qyh2xK@hM&B3EKTW6tbw`U*?3tPDCYQY)^{)n4#j7&=L>{> zPQ>?O{=}GN%rAu622&t_*tXnh3h``j7XIarWMcayyOY!TFP0F((sZHuM>jmylD_`@ z*@Vx5+NMT9pgJd8I{ld4>JZ~@QM1dhpY7;ulxKht0=?0lN6*wB#A|!8dd=iG-{OdJ zNyOc{`o8Sw7v1Zc!!)1daeUx%!LJUyWawXq$FwgYaUew0R&8WeC#0bLig7i7vrZir zrI}=sola;xmcu`5cXJ|dD=u0)o-aYA6{orQx*KN2;ewOhZWYz=k{T~Un<&)h)Fo3} ziS+u=Ff9{0j?g{m^<%#ghW+{siFqeGbld5al)`ZAN8XYy2sa)sN^-ApOrp{aSR;U8rhdmKN4Nj>jt{~oa`~J?o1|Jk zDTFT7MB!KiYCHOkY}`h9uL2<#(Suu)7D-lT@W0}GXmwmdhDFptznmVmO`#sQ5H7YG zCz3)!X>E|yyV))?H|Bd3u`B9?<@8IYnl%0oLhOwT++9%l_qE4sP#c$)q3Vg$LeHvw za}(hps<D?}a!W+FeIMJwK*fAH-M#i@MzWr({s`hB4diA48U!nU1qGtaV? zWrllZD9PI|!m&GpvOl$<5`t5Mehls%sa>0ow_?G{@Z`gdVJ6DtIAJiG&%6mMj28)~ z9#d4b%@Qy1H(Dru)fYQJ>Om);fNA8NT|-zzv?u7k{4;~^#DLyA(EtYfv%c@-TIva1 zgTIT`hyE??McmEBt9~LI!!B+8N@j1nAx}Lt+X*8+%o^TDC$a@Jgoa+aAp;BDuhsGn zqS92szfPAe6v7@(_MUO0@iAYWnhs#<%{!jRUEP;IqIyMaPG#vIga%8a_c6CNexijm zxlrQ9_L6$U{fv$~4$xlAmZ81oxIkdp3*67AU0%69wQ91%WP(*A4yj7`GJma$Vgp(DFNe7GWk&JH->1pheo>` zb{{(-wL7gOm~X^B9EfA!u=$GS)t;P56hG7|`997NX7Tj;YzI4a?#pfP5G_&Y=tiv!X>s{gQ8|xOj zKJLh<(mp}XbH=17vgr^unqw`GPM7w%IUAJ;f`^Bb(Krd$nZV+QW_wH8;6+RgtUR%v zf2GkU_@3|U)de!&c`*-~(A~R6qXVQ@CputjJysblw74P&5Z-0k=Q5I~(6J4ijg#1` zF|Ej1YT{4}BI*UN5FbI1=>r75!y5x$Y&esfOFK*ydQSpk>KGClTg=l%hs^(T%HIxqV)7??ia(<%Eju9T0$-sq-Of}r~g?7;n@gqb&WeKj2mA#BUmhtNAQ{WK1<10EoA7VB2I(Hpix zCh@E6Cxv3BTU{iF4Fz$^xVSE=>BbNEU3(#xG3pPQD14ocRocv}8OEg65eT9WW@nE z(iEXb5PP)^eaC)(S^gZ_F&C_zj)R%V*w;~Mm-Oic={Ek=SjsUXJ*z^B%XsI2oUdZ# zmNs9u>8l*cM0k5Mr3d`IE*R)W_JFw99~~;(Qt+34;?+G{9!)a!R+?*cG9Nq)=G?BC z`uV(M20qmYJv2J6_ag+J*8xWCmc(6#1_`o+;0i_tW>LRt`J*v5#LTjYEQ`@SdNw9M zodpw!%X#!k&Go1>R>3!-G{A-<%oEwKen#QG_}23izB7fp2Z>Wdn@1a{M;cmixnMx(~ z*o?ZoLv<}ZuyoRm?)T4cS-CR!C&hbH$zGg|=kU)U>m5&MIbUh6+`^3Cun8@wb95nl z!4(Q_l>!rOiaFqp`ImKW2t?J9N2iYYnQS&`0uA?Fbnas6R9ZY>X@5WZ z2KHJtHn}3=DGzk@W@^ZiD~o`XxPdArg+HNu-0H*hVc){T;X-aD_F_#H-04Pnc{#cP zak`~t6bGA{%5Y~_0O5%PuQZ)*j-mnTx52g3Lz)3$*KIQzOvBz7E~0m(98lYDWQgf7 zD(XwaghueW`CN+%l-El6#MRzV#`&$_lGVjf!0#O}3>K>QE6$M11 zlt3fI9Yw3(a&eXmDa=GkxMbE|CSOWpqEr@yHNK!PYbyA27pUdF+nI`;niPpmvS+Xk zS&?XMOGB|f{hoj4uvOE8J)iL`v@0Bh8of9*zO2mTvpJpbuxPeCcs4oTVopmH3#c+N zsi-6oo5q&1Utf_ei0v*mc10)mR5)K!G&&y=&$f75I=vQX_UK#{owm}SMlRN5!DBKU zYL9I5q1QdJxN=PqG{ubiIYYgE)qVQF{XjBJiRiygu4*W}^Fd-_xu9e$_Fop(co zT*23*B3VjZZ+aY{Gtj_cv#CcYIELA49}6j|lO1CdTaF0%1kvmMDLT^Wm4GF4#_ zcHTx$iCeLb!=$h34WGK0JG$wzo*2qy*G_6X7W8>Q!nTb9_TZd4Sjr^Dv z8_;riHoW+qU8munC7yRRbSNEmW7m@ySjvnn3?duc!Xxfi%#uXYSY(x|?Y;BQ4NN9P zpd?--h$%N<=qz@QJY%UxYAbrSlDOQl{#UMf^&8D1+wui$%%o+X6?G`g}G zi&tV|9fX)Z`p{0SUy_FLtvn1G>z?WC zRCpj$wPm0kV1^W!3j4Q>20 zd-qsay|O44>YgW=q9-j>{{&>-g5`%W}N>a7-M%xK3d?36T5Qg*z2?&d4NY=S* zal0`)q;iGaMrf{n4*h=ixIQ@Q+-r~|c2S&a&1-Kep!`0LPU#jt02N)N_v~xOaZT4 z(f+yt6-_Of~GiOi}R#ekvc_o;*H-@sx?I!Hbk(vH$h48zx zq+RdV=QcdvjYFBLh7!kIL8(E{X{-*;hvg#_uI-!�`dnbnr@vPi~n)_tepH`(i}9 zf-%r!)@Ad|^!A*O2zaM!GE|!kxA?56;|>-=ve_(_Mzck))>hxA>uHs!u;yTLF(H{% zhiE^-rP_QE;C$hp@P#(Gdyp$juLn6xEmJ&kk3O#Xt+^8X+##rUiaL0zEec-muhcV= zWqdEL{N0~5m9x!a4Nfxg2VycCL66|&FrQSVZ2Y4{aT*H_0s-slOb-#=)EV01`vgeC z3z$K^XTw``p%07qdJR@bH+a|kOT)_AUw$L)&z-m4nNMf8DB#UqpX-fe`oNV=MO)pv zKWii;?1r9$!shOTAr%R2?fe+MRDMf+Hv#z=^k&T#&D}(S-!FJwrgZy*`b|dd2gUj4 z*3O59jdzc#TvdnTFYSy~(a0p?Mawu~Ux!YsaE_>je&h;GsYBM%y~|#(xA}1Bus>SY zL$B^sc5ny^*DjyR7Q5cy5Qf&25iL)0p|~LglKPr{G?Hoc^gl}=5iV+7dJFyXB`G`g z3EvO`T2SD~hf$HA2#A<1=}7@=s{rEe9dWp@ygygtblZjowOj}~55S{BU0p2`EHpZY zL};~;3WR=6aGUpj{f1i3{VXz&0TE4)B23XL!zeeg0u+X$0dpK+r2l`w6FVs*BXR&C z{d>t=Hj?L>(s;V^?*><#<4z-j(#HAV7#yv_4W5(C00!twDp-AEv~66JSPSbJ`W0 zs1`dv33bqcB#qZS7+`BC4^g-MkB+P$5D599F`*C7FO4P_4CPXF*#%MngP%|1afNYp zbp_SxzmM*39TD+ZC7q>DvG-m9j8LprrfmJ#YRTuC7>Or@4jqi^m@wi+j^dai4gl65 zCrZ96Gn@Ib{KQE61g8l?^WoVB7_u?k?GVvAoAqGibJA0x&sA~<;x<5RDFRB<^J|kS&1&D zU+!r zc;A)U`(a=|2FtiIzGo?@zd~2dRDVzbTH_YEbIGq^=~l}X<4hXdtFYU66|fm#!>~>({N_mSBe<@E@g5#-wJ0vr^+&kw>}d1 zhqhQfaOU&;8sF|}R~LCg!wb!jIjv!8;nr(^KblxUlpCXwxUnA@A z;osx~h6Wzvi5kmAP}A`;mS0mbmQIV1mzTFs6p#^+AzMpzz48q5VhPT$s2Rmx%qKpX z-tq*lK~@07`_)O={|j*1+~&Jj@LYfiSEo)Y662b zwMfL9{>{>k8Ki94EN2um)dn~Ry|~u>3)KM}PW0cOmAoo+H0=l&(Rn!S1SjEVMjb|= z2a-6@Dh&s_2NG9hO58ZuQyw$s*`JUW(EvPgoeAn|DPP=`6)Kge9i5*mUOWMoFS~H| zIy7~yN^}e6v)FEamV~$4&t+(wieNLlz1?Bi3Bk&2-SMy=acfoiS*U?55wXzaBatji zBFKt}!vjxd%I5n!!=XUI_BAcvY}gNHQ{7+Gcrm6tuP+ZYMCZPNVCjlr?P8seV39wu z=!Of#bsd?oj<0>>9fwSfyCuNu3#m?#D`qj{4>c)U;6Jfvd?XbR5Ev=f5sYoqNG;TA z;q~^x(BgeUjLc2IUsQ!;iZl>oPgT(d2Wu~aI&?$=1~~g%Z6X+IGp#o~m=@hBQiCIn z9qI#kbI(frOItVJ2gv-eAqXX$!tp~;Xd4;*9Z6$pUYx*%XrPAF{a#Be)CdBNci`Ob zIkC%hD*INMi#u*O{j%f8ux@1D1^V5mq?}^Z0)=XP&`0ZAO}-~6X1iylm~h-0;WyBZ zkOp+L4D)E9zsu_{g)Og(P{5+T$#}|cn0b8ko zv0hU1Yo&Da%I`Yo-tBY)s5P$y}@s|!-iT; z(SHOSCHad}xe{2h{5-^y-{s#Ml!0sSDLvozfO9ih^0lG}Mb{x7zIRUdd1)=Do{PVdo#aZ8N{1=xOcsU~3~K7QWf@ZK!c?gYsMcz(%S^EG&B09rQB` z6~e)Hdp@46pY6(GiS|WHeDZs@=^}yaz)FG`#-~6;OXA07#`#V6P(FlDh{rJetxwJ~E-spfdO;o#D#OMp_ z+|=%8yJGfg#^J@MkoAvwdx+A_2^CQ``)3gV3 z#ETg1jsO&8oB_SKw6rv}e%<)6u&|SSpxN%70sc2j2%ucv=mWo(@`dw2AD|r>%lmXY zZeyJIllI~S!s2|Qzb*|F$PHiv`G~~;k#8JKl@ig`X_LW^XVbcv>=L7_!}JdJpjdl zcnmod9Ua{|DbTJzBmTats~?bKL{cf!#zHj$eu$Hz;cj&0@^~@nc2M1U|3orzJex%SOwdE zpYfh=a>dHa%LBzWv2F|Zo`YVX4+IJdO3%r9|5Gt#LPEmQ#r|9$%hM6X{Hra+%Rx!L zHlP*hb1m$84-vJtv1tcoqOq4?{14p!sa#O>`t%ImzZw=W7J%aFCuJN0UJV2az{q25 z>z1RykD9#G8@(1~;kt5N@E9Yyu zdqATz`8GtP=u;DQff6rlF>aw|`jSb^%kx4*(d?a>wHn1I>~8ap!ch1-7U34$g^lX- zN^QteS|6%(?f+LMX#@7(G!_Zq5t5mK-!c2mG_t-GrDr-gOy zOZ@q*D4ke9OrE3d$pP@s6sQ|tE~A&v1gt-I%;7u8`Inkgk&AL4gIWGN)6SGvIT`xM9e$~I5@8w~(MFSq#q{K&{w+=)POL3xt)4*`c3YW}yj-SV`Z^9lgU;17kl z?*}L5QoejR`hA2A4PgISiCOb*f;;!1{a#)j|H|cJL^Luvn3X}|46sSGkauj-7}M9( zpx)Flv@PUPpnutvon$QpFDw0JY-hIn+8SkapC)f? zq?`EPEuKWBrKNl^KSP0$#vozSrdf=@`3--1b5~EdBrPxvL_xL^+4q!#|T z&fvrZT!g83_$;9RUmhGnzbYqX!SBu$9KrdwhsfQ*0@qk;W(iI`vUL8G9q5ksk^vVS zV4-@lUs7GxAAOAP$4TFo9uY)n)A6milapa0F?Qyrx>yfaDFbQ?kH+czu0V@!6)p~> z=I--baPNePbVTK}cb(k1RZ=k>^n}VIO#g7f1dakeDUvaokBpFz^UT{Z?ZS6HOk>pmP%epcN5Qh&i*Wa1C? zX+Y*j7S2UUuEG>rf_Ynf z{MM$a(D{m|)>0z;sCM>gi5o6I%jrB0{DiR4&W(rVi7<$k4ARM+Z@1uQ9aiVBM+ zZfz`@fDOUEN8sM;150y<~9M~Ox}w40D1HTPGf@f zQ2)<%3Qv2xUt-pInUXjJSrtXSaraA)%aoS4zI7B_I5=ae2KK^m%U&=&sObz%8)Tj@0dF%8Uev;V+zlHqeuA}unP)+Wlsh1cO zC`+MyqIw^_s065^$ZiT{q#MlkG?r~-2Y;I4Ojw|WEn;|M(Umu*So(x{p09`}2! zlGwb^1YDP`rMs3#>lI8po7CY$H&wB#mdLq(OSfA*m|Tgvc?;GU*Y2QE zd*3Rv4<(f$UPQ6i+dVfVaPccUpvA7RN;N9l@>%(dz(Ysyac^&AuYCRcR;n|lVrpx` zT*t1w<%cUp9!E-d2h|*d={+xW2#W-#RL3W0lgqXr-z%+dqH1ahf?P#e^}1k`=Ud8A zE7|rpV^W;{pq`#Kfmi4GLapxIS1)XK?dMbVDIys**K?a+=ohCjd0;=au1}A}>$_W< zoD(;`ij#AFHiXaZ-9r?km4lR z+aGuT-{w4OL;=5&=EiP6xHL;C-6Kz3YPR#KEMExB?2otm?`}NV3{Q-4;K!u@g$Vz5 zh#35YcclD6_Awsi>u4+DuB6vbI@RjgK`X`_)W3%)Hg703NMMjMOJxq1QyL0hE%%`FlXy@}Nlk95*#OQ}9M1;-&$LP}- zsO$-!NObMrazXHaya;Lx|M}s^%c{scY2)7T8adELZ*nwFY_T3re-3YV0J|S^Qe<`^ z7xd9fCbhjifxElWxGsK2lI07oghr=!*@SsHi~i>w{*AFWKkkDXk*fp>U)pCZ)}+AM z{-5S?)p<|Ez*v)hUiPX4xT|r!3l(p`HHm`xdv}xpjLGIdC?d1}pG(yA=6}Inau}r3 zdAlaQ%LB8=PEcR}A2Tzvm`mmepv_`8ZYK-@G_U$H1G8E+xlMCc)3`yg1ax@ff|#y4 zKfyL~7y!J&Xt9}RV<6F|&UW*M3a6}mo}||kBp*aXa9>?ydU;%(bZI#sie^5fAFXwL z_7%&27fL*`NpQgD6ndx=e2dDjwfwL|IM36Fj-Wd#Ud}gx3wrcq)@0ZD-%iSvW9i6S z>IvqkFFbQ`&AF|Z0u9S(AT=LOCXpg_vnR@6I+|EPC#)fT*=+LiKHBm}z^~TAy6jlK)IfD0wD=7A_~w>Bl7bFzsPyNoEJC;qC-gr@qqJLU zD9$dbG4I*@_%9j7>XgkY^ST8=)1$L!H(Szww8o!p?-jmXpEW!opo}$N{S0uYy7+R` z#mLzkx|_eMe%4(XMeeFY6cb@J&yN=bS5c=I#f_1rN9L^)S-MrQlXiJ;d}0GNwuJ!bdR*D?AY(@B%RwdrSKh@ zB(2*500dcP89w2Fp-rEO{OzOdv|^Qn7-&uxvj3?%7wXJ4)>r3{QmjIQGri%@5<J$q zSr_ywJO1ql>bgdAL{^OpK-5tiaO|GH`hw(mIgaTat%_IAVXhM2xOh2M8~e34Wj)$n zYBK3>PzlDDN&Ah!yyQ3)A1v`Err5?m51tF~-veU2+J4?7A@l(Vq^KIndHFZ`GC8r?scu|A{MA53d-1zdE%a^29cWITz7&MwX9?tR;zO@pAVVq1RG*56}-Ros=Hjsje?|b{i*fL?h9isQMun;C8U!3qJk)Fqhj}R%K`tOUA zhGlPkmM2yP=b+GC_lv$X!q#9Z-0H(*lw`ElmBAm)OmkDBO@w-dF`UjLWR#o!WXi>TnhlP)%)E3!zYz1zAMME*^oLtuIeDx z(g?nf2%Z^$dj3Ydx`aWl*gUI_f<*4A_29TrHLK9hugORUGsT=#H3XN2ob`%XvY$U$ zFuvFd=HPsO`JDjw*I^MjfdD@kFAjgog-g25i4^Xq@HgNp-14|SqoS$78IFX+{B+^Hk3Dk|8QT}QsM^Z+S zb%QS2*X>N4tn~4TXkDICpv6HmCrKUY$1e=UnvarXVb(2g#3se!yKC`(uuz13;2ZoT z3xWnD!C8QseCOO$rN#z{m9tdhu1b7j776mxI=Z$=$v#;~DH(rS? zk=|mS{SSW&AVWjEK;H+kVQe0kpMc&4{A~kfTEOZ3wAML5=TiK^-jBBI1J+!Sx>dAm zaz2I0%*^bo0zLsmp83gK%kqr3TM2c$KH+~CEv(!ZLt%Y;Vg47P|KH;hoQ#$h77v1; z%NFqb`Y`{`!{~c7G&I23d++_=FYzpV0!i>E-{74m1cj0AV!1Uk)bq2-*p-o#IFV#^6@E48;^Iib~upq8Rmp~A(*MJ$!`9Hn0&)-I3W=Kt-y?ASTT@-PEBUIY3FN{Zf|M!Hl& zrVV$+$vWFF(6F#np78%`GmWF0Wj=%Q?pP=gTFnwhxXy=)VWQP zXek6YqyuD5W)R5UfkpVVuT8Y)V`>p^b=pnP=(HOaU_lG{VL(UddD7ExJ8g9t%hss* zyZw4s4ld&Bt=}r7-kHgQ_tEzjD@FM8d+y`V)SslC^-b*Mhx9J2y}rbsEmnV*oI-v$ zBV5$-__tiXIHQfnioK=o>AxJdcI!7Zo7wix*Aj@0P=>qS3m=a!Tm3bN0CC+ylrqQl zLBtW)9jutEb-#zVn6LM7jo|xQ2?cz_ubac^q~QKizGy6@RIjBMbSD8aWOM@gLUVeg z*o_07QVn;?$3Mfl;Q(9DG|YnZi=NbBHFotfC2dKiNY@ZD+?^8|O$esf`2-oH&JG^8 zX4`*M&^4>0m+T5NZ%^9E-UR0)p)bQS0hZ$jp7vU)tCQ_D9OgiBAWT@2?&!A}@+2h~ zo@jB1_4PXkx+Iha%}*m96EJQOQDZGBi2-KU2pJ~Z0%CDv&p4QJyr~ljDL;pOwcmB= z)hd7MGRRHt@DxrJJJ}i~)gMTZHHeH*y*}OSDN-(riOA{%vEnSilBitvl|{Lw)mTz7 z0iz%Y%I7w_-wAf0sx)M_X`i7=%Uxl=2_F@8vBI{HfV0ZPtB-8F?S~N$BK`yx!TVxg z=};y;g5+7mmxY&Iai6G0YZWjd?1bIkIvOKQEi9Cq{rx;)*7Z0hSQbRF9NXW_F3^O|KMe7y*7 zFb%8KZY2HQNPdHicuK%F=yY@T{Raz~($i78*Y?CP->vGZh-lr!yB#V0h;&B8h%S+; zZmlz?qW=viTOd}hfW-*OyK$bu^9K?qOYzGh;_9uDLSC4_>i&y5;*EcB4{u#%XBA*N zxF=;QbqPbQAU;s$g_-VUX$9)!SaDj{-r1|W0){frJ7BB$DUMGYY6?SfHS*iui$kN~$ScpHF^L7B-F#e{_DDd0vf zGZPL19C^a>PZaPgbJLb^$jbBQr!FD!bec;|muy150V_rMD9?TFjtA*pM`d~)Ih{T) zTX6dj2)E>+X?Y}IvU{#^yoBBCdEANB0utYD9hVt0&~B&gMe%gU2lt}T{!q(p%|~Z9 z-fu@QZ)?hqch0$-ZXei{A9ufR_bQfPSTxy9tQt)^P*D^91~)gQCh z?-8rC0vjAHwdWpIukz-%pE%>MIGm0$?t&`;U3@@ta>;-DG>FZHjS zCK&^o?yo=vRr~+xMiBl3_VQm<7-$F5V*mHcyxS+Pj(Dz8no!b*;59LJf1bPjhmsNB zq6qe_V;KL{(ry4?yvnaA!~4JwcXS;ZvCvlp69Bui8h~2jv%mfy@8?Gc>KEND}9T!)59G{GqjZ> zIM7=lwfv-{J}UG|oKYt|XH-?Q!{vr50dW3U_(_IQYuJrBNCu$nEnjqz&$)WPwU9wv zs7w?1^Mt>%dvE-gihyF)1y zid%7O(cjPUP^FYbzY@`Clxfn=1yVmrO;;P!qq5c4Hj?our|q7K&G3Y@h3P*E z;+d?mHGjU#yxFkPpf` zK1-6q36;|`r|D2b@Rzv&e+YnIG}`)Zq}EQg>`dEF2-A612hufrRyWanj+PP+#Cz74D?1TFz`ZH6S(;vbr%!QTKW1z(^+-Rb+PI_~A6Iu5 zdq>Og@WVtBx5fUfi-;#GF<|^Q&WRh|<-&o=Z;L<5u?4uX*!y3Gz8m?;q+-#GDELQy zeXj8T#pZ0v0QR!xO`yY~G0;db96^1$^&Okjv;#AUX!x;QDzki$f9zu&7mq)g6#^jn z*g5L0F~4Mb7|$<~asc=GJNbz`3}$OT&v~~xFZ_MpNoC=g<6SI;jeQJhIo5BL0rf$G z{5am_2jZ>=xUncNNSVu=$tYy_n)zF{+nZagjy!Ry^JxS_D?WTJNBX~E zeDaKloEp8u8ITmv!N_8kUgozs3qPZi20+ zcQ}%1SrH=Fh0=Ju1{TOn+z(b{t8&GO`xVD~tCM=M?)^wcXi>c!Yh<(B?mY=}Jdl~7 zmywGJ{Sny&5XV_8T!zL6zS375U>&Rz|BwjKmDSBLvdol^Rp0D7+{Yo`7PH~o~ION*TBt?{YyDO|i4P0k)_yRYl30s3cSMH=EZRl+g z3F2$bz;+{z`_0!VAO!!vd)qR_`cBB8kBe5L^J~Bdgjj$;qAwfNpv4=Erdy@WkV4Ka zm+~o7v(8yQ^~FarJPaNn8EDwY20tazamSe0Wtf%b7~+kcK54`Ds9c2O=ep}3TDm+r zk4iXZWvaEGPrA&ys_svd>JY5Jl8yB3EhH&6t792WdbP_8zdrb507P@bPtk%o(o1#B z#|>|pSXTE_lJaO0jQBDP4swAeU5`C^XGxypClmO7d9xx@HeZ1E-rF(SI5*NtHJ`;^ z%vPXlc_pUe^Y!}7`J%mE7d3h9+t7;3VXLbOmkjATHph4L813|UKLVB%Ztfqoyg|^f zqGx)M$s|zEBnyJct2z%878QL!zjKJn%sT1GxiUf}exD^*hS0f6w?-t4q!(&5cSB3*pOk)hF=2UwT>Pn-jy|%3pL+H@Q34_Kf{o_J?Yn_g|33xh-G|3}X|X zp34N#M?MWY#Qb4&MPmhMY~k|@y*f^eB>J#%L5?ry9k&^)rTnq8ju9+mCxyKq=6Bx; zlCU?^MNbORX_Z;iRrzFucK9n7=tM;hX|+A(mIQ}z39uH~+&+0>(#1HN^Yh|{{+V(? zGAi^rE_#~Ja8#dnyXDm>mAi=hqq}pTBI`U;`^A|j{(n;Y#>J2B=2kLa$$L~^ zcWHXaO?G(A1s*M*&9({eZ0DS-d!F#;W~U4M8epbQDdSV7C3I44c09{15UIa*K9pKIwdkhdL+@97aY4@K zOTAv1ajLspTjiZ%A-qf6{5Gg#qW_3 zen;m=8=ca@Vc&|*wYvPD=LjFdGW*_$aBDylil{CQ;|SKj^)YPEZo`9-QZfgMn?(fO z{bdtU;YONL>7 zIR_A4!Q2QSi#x?pQsdxfj4V?PLdl5I9bc=aF=bQ#C%Oz{k&Q@ZpN_0&@6V0CH7`Z% zXcX~zKxs#LGvj`p>WUaFhD2Si!&8;8}IG+Sx_@k^wh4Mp- z=_0C*z2^4->oNm71Ok9{FdZpxF{9G#M80YKV zj-B6q`e4iabo?KWHZJ$9GpNa@M-b z+BI`IV7^54*3zggET!@hetgQJmJiyf+oM+M#xwMJljyf0evqD{Irxj~M?W z68aR&Wj&v3wfyj@0d1%s4I@ZHA}U_;H6i+I!hvj@e_O@Oc{K17AlumsmJQ5^fEsk9 zIojVUAndxkmzPf0vuKJ``LV&>OG|t0E*30+XyFbLN_ia?HB3ZD9KE~tz@Lmf6rMqe zY&v`)LCVKBx#6;`>+H-~C>tif*j9A5f6M1HEnMib{0ZNyDy2oWZP>%nm^x2tpY22; zf8EN1UM4h@DA~&pc{Q4+8jgYaW$hQ4k*HXz1k*I!K( zwC|5JM30nXdiuufag+i_lTlT*_!DeU5QoBDi)jG((kHIbTTw}AF%|r+qk|6`6bfNO z-`B)SG|3PeDUwGLs_MVGxDdzUR@w;sHfcrfJFJbBuklfsJAFh5>z-29T9-;q(~vb^RT;KZoAE%3|PnHT4Kt64s# zT#FF}4|w9KFYz{f$@@+x7%Z-)o~D|$V6FZR9$d8*VQRP_^|@9XEVb<#O7eyU1C|no zzzepZ`*n>Wo-vqjen9N`>OI2`;hP7)cUBJxO2B^S6afw&^W)z(&w5#Fg`c284i3SU{za_~E(Lgg)Pfik1%u`;uN| zykwAJREacXaMA6tx;y|y1Y~yneL6&+P$)O5Zok}*!R1@j;BDfT$nU06(I5=Q!Y6$b z_dw_&ZHc`DaWNu3d*uJ}W{;mMk7%2pOE5I0&%dXGNG(Le^rp*k z{?N-x%%-Uj27MHViB;&BO04jx6Uq!5Eq@Y)wUz0#FTn2)p2UxFeFl3^fM%bhWfXii??5^QGum)pH0BiW1m4ILHhFT3gcvxG9o^0k5{=k`lHibASUX zD#Su!qyyui61GGm8TiA+o@jHQ&_&cNNAzmHc`K4VOdNc2{PKTuLd%NNk~f+*kBUQ0 zDsa{yn-Qh>Kh$*Tj#D61;bR6*t!HJ2%2E{P69_|<8C5f2qnsSJR)OENTuH+9+MlYL zJaaSUc5MsJc*M9T5?%!Z7K)acK)99wdA=q#zcL!L;iQwsH{sa( zJh`e~;bzwVfngrm8g?LSb^SqT!<+r%92=MpvCp|O!O5L8ai0jV*lCA)?#FY=4TzQc zYf2EeSE9FWX^US+%n9QK1*GjcaIaNbPY=0Yq#DfumHge8Zejy%ZCPStW8K$!(2R7o zwL@E4L;%23o^R&w8Wi(8{_EC4y2xo~%1!S!RjdSWa>=&39=x(I0>b2h`dVtWa(g*9 zg1!dwm%r(06+t^mfdBbou+UaSjJ8GeaLKmbMs(8?;)kDtp0hgq-|QToy1_N*3bM<@ zKE!uwTO-8fpl0L?Su|R=YiN%93`LENWZr@V$7`0k8=@beS5YnZYm8Zo3rK_mPL}b) z@h*yOAS%i-=C{TL?2VnNvw!S1>zLod9WIwfUO_cXLi%{|71WH4AH_4UnG8uM<44gw z1J#mJU8nzLh<~eqlKX5;-%Sx>kQHG&-{YN%-zqES5&v|@JQ~IX;^FwWUw_Gb0ebqX z9x`Dq&z3~i(ZT9$RNPtlQV=yis%oq9q9BVJOrK7mJ@33YxX`Xl0?HrRQB{twjDgI( zfKlUSWFl^lTUv=Rq}38SzlzRU_l(;5f2=n@eF)MhQ>cf`eN$HwL`@_yHg(4}cC*uK zR6yn`)RlNqqMVr8n~si%OQD@NqLG4SI!0AT__K;Bv?_wAT+#7Q?jG_6-W3#7*Po{N z2PJN%tj<^;P*YtOS^+6N75(R5H{m@ycw3wd;bFJ@+D02StgOX6dl&nX4>eeVN!BW2 zSSUq7O$A;Vk8iYJtehHvGgxeRO{!#0yM(bo^I#huC9Z&RIPaV-$R;;q1q*baC4Ss} zr@X5#;h-Nhrgq(s9(>?{LrqORd38G1%S?5xs_#ysVHG|9kkVML(2aP%vZ9*>Ds{_v z1$G-)6Re7^pm_m2yy^$)Z6_L2P*bnP>Q6*tRX=7kzpK4 zPq_lJBf8@AMAo)}b|U80s>_}yr$^7PNai~%T-yZ@{KooJ2_&j(;|I``fH?_S4xl@w zfoz`?|1#YB@Le_LE(?zmp~KAxvql7~xa$3~khNl~d{z4+;L$udv9NkX(VKpy3nNZe z;P{sV2Y0%&qMvOHZC+){z)E6r?17<3)%U&ueXacWvgE9+KHk)fVr4RL1 z57@ya*q~ibxyF@Bx!NE^X2`(~T_h374n)jbDbO?_K9K9w0A^X|OLaOu<6Q2|UP%LjSatzSP|)oNWBmG{;@O2OH;`9tff0dbEX*X`i~ zX3&I}X#yI~Y@fwEH*URGzh*PBplBMuh0nkY0k=%|M`FrRqcF5&;dwE#M;~>8*0cs? zqe%R;&F4QfK79nSJz^2iejmfii)cUMwQ{Au?ru0|pRLtV2Cd!|qy6*t(ivNSIvm%pI`|Sl=eByTEqRU8_o_ya%m#T zCcn3U&$;P^eaV|B%7jtIvuusfJUR{|U)~`orfdMpD!P>fHy{zRDK{qdeY0EuRp_m( zCYfx7$W)q*8;e*!)jTCogSj*DoT}J(ztJ;Qg63_B>=0d5$H9(~UnyO4A4z%(nmFbT zx8U6V2>g*G71HaK)?-0dNp#~Yf6XztSuTj`n0?#R^BHqe*qsA+ETy1SsD{ORujN#% zz84t~J@03!$KjIbpr^*`TwoL79y|;`<83~EbqW=%%W5RdydKU#yZ2sIR}w(U zzu@5hkpDBF1yjMYX5NjIO_Yt4IzO*~+0o`Sjc{ zyqpeU}96QABR7t5%K9pOzu?BaM; zdbem!)UGsJ9Hj|jDT@z*V+v>CCnk$*bG&}Y_q(hQ0v{Gyw!BEGwqsGRl1^s^ju(Ob zz*`{@JoMpZd?1RXDVK(Kx;>O@&N4ce2}5DFZfmYXk(&)_#+a7Wp}*_G4%N)IKV1)N zu$L|=rUZ9Morz(H*UH{s_?%)dJ&-MZ_Ybr3BVPxNFj8iSH_+M@Xgo1FUm(F{DTkUR zZ>mx5>D3JF6ShZCQX(}26KC)<5a$g}y!MlhxS7rL{iXzU_w8A<0!;kx;<2q{!gy^* zDFd#4m$m@GeK)@7X`fF|O@%fIZRNh=qo)ngaHrh}o3(_M8%Vi+Lo#;+6cPOvq&S?s z6fq%=3BEz9__^`uPgh6AwYr;p9uc=P8TAw~7PZQABIKU#(mtYeOz3R#R?da&_;!36 zw;U?8I90*8y1Hrz{4~QJL}p$UTT>GO5S1D;TMg-5 zlBcW-VK89dQ07M|BMHVlsV3$ssh9lh7C4K|McUF1yN#aa*+x4QX>i`{f@WzvGwpV_)PZ^W}Zn zYLz4tcwvO!Kl65Y&gxd|Ik0(qkOXQHJa)x^Lte9hCR78QvZMm4@z(eGu#!raY<~2> zk4&UU7s(02;^saGFT$loQxZ!&z*@+tu^Fb?34z0Ulp{|Dgs438y9qJjw~+a}L5k)ty;WNBR*bbei=&J~c^?v}9S|Ej_e~ z7xH$*sMPsRFn_K2c~kL6TzBmK*-`Mo(TYFj-}h`O%mWzTFWu^IP{?2Gz$^Jti?j3n zOhN=(;D)sg>zD*8Olv-reESq+DUgvk6S7LwNjf_sYr}`a^dx~yFO&TSSAL}0i*A*o z`NfASv3Y}IhCWxPo67K_$J5R$ocSna0=^?q_I-3*qamsk4G&h<-ho9#{yD%&fs)9T0 zCV`w7aaKMYL7f~x-b3r9UJDgrTzrWrw3>ascm5txXru=d#pMulx-d)pQPp7^L_<~q zMIL&8CS*6;6QlntSJ<5os(m>xwjU!J@0vGY)^pcw`%zsMhqS47>4x>npaq97kj*yg zPURu*^yY^wS`|v(%T<;3h#*Uw-4w03*W0eVh9fKa6%OdYpPuj-@WB;MR9KyE;DjA= z!Vvd!)#W12;06YnV7+I!ocf^Mf$tZklPU$>G5N2`hrG>jA_Lt*M}Nr;rWYLd{<#ax za;x=Pa3n%(JurdU92-Mu(;8gdJM`zF4k8!^QG`o&++Fq!92*?$gd!Y-!l`3|rBmx0 zxr1q0$wbug%cb;1O%k+H-@Ci|UpTOf_qVlJMa^e z5FaC93?Lx?_poG#U5~~uJ@r9Ls1(x_-8TMya&`kY4k!7u%?}bf9{7i=Xuc*YRVqUY zsqK#>WQjGkjoGJ~Wr2A}CAYOoj(FZT^@ni7I_xvV!8zMHBevfY;x(e2VRZe}*OxfQ zcF_vF-g~GqxW3^s3a{m7h~)630@H)K>3uwOx&uCZ!_+FF1k*Pc5R5y|DE3Q978~O# zE<|?yp#(Q2?8sm+`mJbIW!&L+-AoedIXHg3Jk0LYFJ0;{M0|0(e%|!lhNm-Y@mN{ssvcQM;wc z3KrAX+LVtV>_;pHLcxiQ51ACyLcZA03amT3YYFxM8eS~zoJBz=#|Vsv_gIXL3+CwN zA%N64vDhhipKQcy7-V?M<+??CZKswrIXxW?c!99Ha*(u@G+xmb3P%>6Xw4NO+>3R+ z{f`^B_-_K!df$H{8Vw+sgukcDUfP)SU#+{-W()cI#pl1j(nzLo0b@BPx|GQkLD7}`;M zW=L;Wx}hmqWQqzC=ssghlm=rQ{H65J=n)_Ux|rUIa^s@HHflB^W+G=6rHxxw*mEQhkVKzzqnT zZVb*%R3Q!^#UAGytnjPfl|pOUwQ!2x>n-_cRb9bRmlLncHVGzT&-U5bpm}lmA85&s zIUSxpoJ9qDm2P~SS(WkdKog58P4C)-QIIp6I6;zPb$e%#YidRBQ-WkV`9F$}vZLvh zZjM3%i=-DlP5=y|2 z0h#fP98~)SSln4}0J7!9G~lsf2qc|dEgpF40NjZkfNgX;MWJ1fqlo+K+}C9|O~0XE z+O?@$AJAJPK0gDkGV-J3gx6MUk*32ypo{+ldtK>cNRA4cEdn_>!oGN$_04bLh77|M*-D5P)k3RboCDZ!lvuw#jZYYl7F^)-%UrDt% zJBd59at}XA(_ajdJSjigRM}$0`5K_gMD7b+bVUT`hymV40-j$KGpTA*6)jiqQnJ9zgN9{vG$z5@V4y5r5RxB`lX{TU8Bz65OFRYhWhgPVaixRxFd zD^2)!3@N;0VSkoF9BjQ(0jRtm4yOdXmY*4Rn zpVrpbLp(O2->K8%U^GBV*V$^5(tk$rg{bJ``-?@^7X(D^jEN3$s#afv<{Z(W<_%lF z{-UvRXl(QktAZ^4wO`oAlh%RpOl%i<78Ge9Eui}oE@2e5vtl;LlChn$s>;&Fj%Q(( z&d6}FXUZqf-o$sXd9H6*`CGNP_CRB*bPczcbIDIylei{tMfw$vAG0iX&y@$VHFUfL zOqV6)?$4K#YZ4i`-yZDKd3bAT7$!_9NlrQ>?)>J3{ef*Mj}7Y|&PXOrPwP(d)a=z% z!S5XH>{$Ok&%|V(nR~9o|AcgrYj&1yesQsP_!G}WvfivdXA|A38Pdna1~lgRC9K61 zbyD?jBQ7s5{lm#Wf1s3Lz*zXtWkx{t-}l|eA0e#&mPVUB`2Q)+oeXsnb}dAHkJFHI z>I$Y7y~Nb~*=P1mav+(6;>Xft_?461o8`QCS-YbbbA;X7Y=ZT>qFe!nV+!NdcR|pvzPI-B1JI-R|VYSZV}zN zqipoML5mRa#8#V`BOSTs81Lr&cyLYxVMB`$_xflM@WfltZjJvvc_E5c9;Cu_ zn791cZijNZzQzux2r|S(41O}=(GA&jJXusDQ9tB)<)hAxZ(godv-y&dNfM39!GCDk zByq28(NK7qR4UuIsH^eud2PC7rq_e+hlXi@Jd0Rn&Wf*AvnoDy|nMi z75H6u;YwYY;=?EkCBVs%TtECB{WuTD!g(tee#$SQg?hY4xNeEVYM?QiFOK;o6T9yE zI?4TsMz5?CPch_)Yt7^nka_Z2jCobZ`T;+IdN0y`_uxcfdg_BDU+*!>6g^x1u4P8l z9NT3m(e$AS*g!e4z(P%*9YE&npv9z~h+|_M@>vaw z00oV~?7UXT`j5%+T0FQJCM}iTRX)#s$-Rc^`9 z;qac}S?WB#MXlA5qtO4B<&O_dvkKEQ>~C#VxYTv&UT;n6v;>?{dTsCfW|@SS8d6C(VMfs2ukhW$LA9UZ+qkq+I#z?TbM7yHhLFu0t&jg( zEc~83yZC-Ui1i4E5h*FW@Llet8ivq({?gvxnDcCejDWYpzuOFzf56cb{LiTc;lD@6 z(raImEKHgP(@#oszT1!??Ue8AQD;6LK%lQ{oZi`^u8?lI-A%Vj9as3jf% OA0>Hpxf&UZ(EkUT9M{AE literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/ripipe_a.png b/model/train/yoco_moe/sources/images/ripipe_a.png new file mode 100644 index 0000000000000000000000000000000000000000..9c93017cb1db230e7624adcfbe7bd57e9d3c0f13 GIT binary patch literal 46665 zcmcG#WmH_jxAvI?0wF+S!5c`B;O_1o+}+)waSQH^ySux)LxQ`zLvU-nhyR^<=iZs` z_rs~GUF)nZe z6vc%;R80Vn-wjwZK^eghA8Ml!UkpCK+X(g&8crWR0Q&!PeH^wcG5+wOaYIr>P}xoQ zH1o55`eNrjQG|)9M8*<;*=bfEpQ3220do=DcDBLYw#weJdY;nea@NM=lrYA$^vR@|dLOO{`Gky6EKxdgOD#;vm5V^y)MG-_-*w7PS2Twca~} zl^6i^|Fc6!A87jD_cP$M^>B>+j|VV-{2Wq{|3}Bsr(Fj8|1reW=OF{O|1oARVr0Md z|1?e*E{dhhSM5od;o!LoBCvn33pr2oqetcC>P$;2ULUJ{v7a=#! z@9doSrIjZIM0{2mZtlTN4^G(wHh2a^kc=1x6n7!rnS{iwx#qnP+&LJ)N+k@2Ck0cn zB%BTR#LntBuK0udiMu&F%yNud_b2(-)S&6pSOjBMzwwJ z*g%`b4lm31%^g}CEFm1y+PU~y;%_FLopb zcksOKhp!b{(rZ0BM%82HNjt&W*;~0E>gR~{uyJ}X6q3p6;grt-TVVox)&OeVr@vCa zvR<5j>AA6{;aR?_eHuOv?~V6;O8S$D+;V^&zYTVk*hDU(@c`T?c(tGty|NOydPB2A zqoe~DR}Z@JaH@_tIxeRmwPydAkWw=IG@4P6Jy+{iDoc12B3yl&iJGB@{4ptUl-JFJ zzN`!tZ19%l(wE85F5FUgz^^vL@0z^5i9rCMEq+sHJ5kjFLd(^^ZV@M=OsJH;d_SXK^3V)t5Z8;*v2U{)iU+gp*JRC64uOVQDSzQ-p>NR4l^v}qN<8Eh% zS~Xkv`7sOZ0GrC*WiBrJp{3e`WnqeB$Ic`E;oL^bm#5IC=H@~I`d2rSqD_V`oTudD zXfZm|E~ihIuua`czcus-^KLclc?%9}qBJY8Xnd{tuM2>V_}p&`qVF7i)fxD8IGP9phxTU4idFE%f?(=wr8SplsWI4zL%Cm+X2BqTgm zWcD=({@lFsLp7|dwHLaNJM^=}=uyBb^xwAaAX;*aKmXe$-rd_jes8`b4xl2D=rCl5 z9Fi6Cx0yGjWzJ{h5gCrFeY}xO{c5T(#cMWyhb20&xpDb@ssPVPPkkW8DXa}J(LC3o zP$l7l-2PKMA8e)&8i90j4ie43NaZj@ua8t?k8#)-bVT6?ZI{$8W(~;CYWoqfO5w|T z-onPgyb`I~@mkVQ4}u~cl_)1$oPeUXEnt2Yu_$HTWH@~HE!G4-bg^YC_*q_0?YJ9{ zV|vH@?SP-I|D2X+r+hLqI>os7cK#AC%Y|_(`hse3bd&ox6arzDkU$rv{^Q37P=4DV zUNo2uUu3v`v8H+2eyC}tT@e4fO~rFFUiom*T_t8|O4QH;6_-ggkuh>x}?WZ8(~)+Dc%^g*Nz~FE4T@9C^+>hEGT;OyKr z%I-pU2L3%48J1PT60GjaSyHt9_G>=l!y1hnUjGG7>&~e~l+V0cVDw=EkNHh>rImNO156*drY$f;TLnegtX#z5JMAaNq%B zC!J4~UNw4p2N80fyLHKCc5n3VP>T=IO63~KpM6qOHX?cT>uKwFfz%RP+iVlxM93yXE;Gj8jw$n^%%$(XuI!hP zvzbkj)M(k_n8FnbriOA*7M;K+g1JA9^m=?AaqvUCcg>X&Bea+=-dT?)*B&XKR)79$ zD7V^1s!(5$Q(NrAoXe@^Cx}I|&NB-f3w6_&QazS$<8(CHuIH8`8w>GygiVT8+JqOD z+dVq>mfN4TII=w|D7W$pmThapGXJk;pwi>*rXOlJ$AACnbInqe>i&0k0{^uYK}G90 zBYA^NP4D;9k8^2bPQpWkpAGlJWnj0N!cO z%O|Lqt;iw&_zORK5wDe-(C>$8sbkOe<_z@>k{`LA3@}0cTld}Tjpy0Y>`P(gm3s}q zi%IxN0QRT@TKYrruJ8GT4zUuz{46b8k8QI75D|UkM~W-LqCde~Rp#lv=ii8f99-aP zuB^-dVi52xj6s8&)q=IT`UVPyZFE-a^0MP@p1R5f{f!!v+$Z~Q9x&!hQPz~}_ znEc)JUO9$J*1E*k-BQT#^h$sSYX)wO4e20fOxhMAGq`ULX#!+;$lR}T;A_cp-fpP>aFGS)8T^a)Qe$y&=F_J5dHZPW0 z(fGm@I<>NpcW%+lKzAJR^o$Us zrVRQ-uXabNNUe zzLDIze{b!ts~z=m3n-T{$sL)Lx%TUGUW_*?4KEmx8HuyCSdKr^Dh3e;$RefkvSDga z&e*T|+tGmj{6vx(P^tGY+6ZL1(_k0^AkPDdo{fZNvk`0H z`6|nE1zMQm#1I60bAVfEGpMP3@hEIz@UbTryNkd!Vr!|}awT9x972+icQ7Wcm2FeQ z%3yU`sl=J$hI7%{G7-$D_%Q^QE;idPOYJPw_1NVpRU1P46M?a-aAX-KC;wid?MWE@#Mb6@y@KX=s1d{ z?2l)#0#S)Uk{?(P^Wy;x5ngt&ZI3?>09|6niQ*BtBB}Vhb%tX-u#t)PEN#A@iogAgATYiC5w0L_a)>Q(JPKE0SxqAtl4(b=s2wXuFx=zCvW_aCT z3wP`-EXri3LhUrIQoXyT%t`+Vht0j-mS3Pha~(k~pt(#%ahQIt-&=Q0VOf#GYS^#* z+FWO*G#Q|%+vW0Dob-y~DEKdC3IuG}RcQ|JbliukYh9Q84Bi7guS~m6`uJnAqy0hg z)%~LA2T|R?X8H$k!9KUt@Ki1UendB1dRXSr7x_liXT$4lk6PJ*pxinevpoif+;mK$6hk8GSrY&cT6hj~ z;u1w7O)~PTAgCXjnr?tCh)-2c?hjwyotdL1h&j$zocr)8Dp}v)6^x(o+dZCR#Rg$X z{idP8NJN)s0-HS5SyO2@LQ#5~QF6^VXe^O z?n89SqZzT;?e?X*il%SA-jgKrOetWtVupIqq4!ctUOPSdu<31D5jDcQ=_DxTq#!JW$CzLZa97V zK;XQ8nM4#n>H@{~1SEeSZjVz?PL57)dc~|hcSDs1bbP-a!iYqOQ$g?7NA3r}Ej^Xun}3s14*75}o=Hw{zo>)rY- z#Z`6jPZQX@EtF8(J3z1uNI#sy%LgFXIZocR9mBkcA&&G#KS}(2s#G_Ysg; zf-TOh!s*pJwPn$tQENdZ6C%gbgZE3fz@~_dOMZ4LT$mg`K@jy9_cp{rV$Zmx_Eoloyug^0b82oa*x8e+_ECTpjKx~!7YPP$;n5PLcT|{@hR*7e z0%U5UvSj%#y46o^JEIEKsgDhwK1BsX4kJYpaV~s)_P@Mc-81q)rV&_2(Tx?6%y6vn zfoplf{VHGVswfp-#ggMq>$wu*AG0WKPl}k(q+g0rq%f;GBoex6t8-*4MgG!TC`%>0 zszmtyL^x2aAOB2bFh1S9?_>LI0VpbLl}XVr((QkQo4_gC{yB=6{v^HA5FR~Ty6qJRJY6n2N3h}Z(+vdrbqXQPU*A3-O zMHpaXvjNCpsP1gd{gR;yHkTI8Xm)@&O=872_NP#~wT|u)SDDxDL-m24!dl46$#8A4 zzVy`fE2u_R9loHWP4qFb@LFoU%!whGOeo;2Q-b;^Y*Ff=#bN2!+r6NDyZDDqO(+r@fjT8z4L*0YjcWGn^4!5UMtViu`wR#3 zHrme9V2l0Oj!8xjMJiB^ zI2<}!nv8x99mfjVEtFYus+dSV*SDp+BxfN#BAtCGs_lK5Qm|Dq*@o^M_FP~mu}5sn zm)g>+5}nLKS-foT(*H3QApoP$8%7Idp#&4HE$6bzoI+6=$VpX&KSJ9y81-CM zYwrkGSLmHh?U-NE7l2{%9vAasSF^B{dbwa$#(S|)_3{&2g}1p{==ibi#%sNC%}#zS z6ZGDh>rqAVUXttKqNt|ti@ny#+rnUAaq0*RTkkWCuuX zYWX05@)#~CF>RTmoc`M(8;jV-e+D&sg$*OkQsTw=fBE_}K1_t#Gl;SOqBa}k+~Twp z=)jS8DNQ#g>z>CYNvrGgn!4I_(+@hb&eEHyKx%p_;!2qRX2pV+uOB8IWOdCNs%y`` z=G2|{cAj%n@e$8$Y`97IpK?~+tfRwrfmIE=Z#&b-9cTIs(W$IRnBYGgCJZpz%@!9# zwZ&|c)_vO&h4;1a?`&vlK3xLWRe01x?jP4zn@CWocEe$$%5$>TeYHTs!LvPT!mE+j zy)_MkqiX=-$TNu3T05#)e}pA3};bX@v7{ zjcuk*y-XJA+fHK@3T(=3NhKO*7O!+-iXbjANEY~I#JdEd95gOzo$SlzFeL`b_D`R9 z#xOmsN8WWo_8J12+s8Y5paAg+W&2GsZ8{51W}I23FHU!G_HqBRubqpFguBPs;ba$z(f@`dPAmZ!q`3G7Yc)yKKsuU&=_Y zZMr}sHZ8yyH-Ya(t4iPl6Ge;c7@b8;d=OxwNAsE?iwl3!a^Y$&3)F0@4ce{0q*N?V z%fv}wQ$2IvjoW$|dDXlY1Gx)E5GMIt>|Y^*_zTWpHRzK^f>y zFu9DQ#_v+tZoS5E+rLDx^AxY7u;J<_deCuwuv=zqq`_w+&!c}p5n0wz)bL2+VWpg)TPy2yS9!#XfW1H}^VZH+Oq2uDK_yGR@@X%i_G?qj5d( zW7egwcxeAK0B5o`8@k8`Lvh6!JfYw`Gp$S#rCIG~iveGm*M&oF1>TZh1|-Z8U>85q z84n%?Pg*1q?^e*MHCl7mqSMbPDd;Q2Z zp!&-$NWbt4OT(+Go|bX*D)Ff&Z?By zPel+l3%7?ez3niRHwv~J;IucVCDfk7!sae?lTkh60mnf*xt#Xrw*TEk+NL{*WCoq7 zR$%Y2LIFE`vxewSrg6(9CM!qg`6Oo7%i|NlLbY~M@qIbpo2{TcnA}&6a||ODf;m^d zynV`&Y*$43L36+$n~@3=;Q4JsLys7cCt4UHTWb-rQ7)b;6AB zZ>NX}N{iXaQP@dY14Md!{_wioWf=#4%Z!@vpfpW<1{x>yD!gabj_wPKQwnWbu{c7Q zAf~3WDoncyI@MJP4u(NYvNb@yD%9M+EVAC0Sf=)D?<-H&o3FgKC(=!@Hhp#tJ*0!5 zxfL|kGBQke7AB{&b!sZ*uk+WDz>Rxl4q@{qcq1orUy}1M*Z1U$@zDNvIbH9G-$kBb zDSN+v?7N!SM3j}+Bb=a}R113VUiJZExR@QCT(a}MwmaScW3OMB{KRj>aVFsm_98u@R{UAwMS@jAx-RmO;PSaM7Zc6jNJ` z)->yvYkFuaI-@rtCmtoHmEWpxY}Xy4abno#ou+6O5gw6d1D4<^wB93QZoZW#I6}=A z{)gusy)w)X{VlHrA#(T}Ml{MROh6gv3FhmZphmaPc?{Pr>6A&caz}>7*55qPrLMZi zCviml`M)C>q@0jIr+Ov4^Nj3tdzyRmgk=m$*HbM z5f?jY}!-cC$JC`V@Q|(vsR0y@N z%P^7XV1*LYXyVGP#9ZeZ;dWlG){x#8eTYFJd)4;&i))RX*stUoZ#Zi| zUco(2!)yiyWUY~T&y;1z*i0HHxp8ZzP3gKQ6BrJWjVt0>N;-vvFdOlXMZRRn{)8Af zB~g-;vhS>2DMA~jcS!jab<@hS;1e`vpGt>~8W+C_LpDrC1Odt!{CixDL4kD<%o#wM zn{o4?JheA&?M$Twd5XFUT*e7E)_$qfeAQ+K+Hb!N{M%R-QUf{E;v6>`FQoaAK_hWS zObJncHGH#B$M=^j%3)p8;*7R`*Z(;LXG8~KR4YD3IS&Y*Q!Mt))3vO%Hr0MLVHK4K zn?y}-m*lGtJ$33QIYw0D`f@W~Z`48+41>_+83K8mp5P!IbV9kIyF_i+Dcdcnpv6q5 z5pPV-${9B}h|ImfZS``gQR8K1m)H4I?cf4Ybg-R8&wQxvzECLMcMM~Vnh^+3hhNZU z;Dlix|CcYjR-~INRMe04rOQNK!ENu%=wVkn_KQP-Y$gJ;7xD$R+86nKg}+RDm3Rb7ju@0;~f%He3KEFZjnZTc9~y&@S;g}LU$w(QCy4~6GX zoYo_`eaC}@1}pI62z^*(L1EsB%;8#+bMjVncbrtSv`)hFJKU(=O97djJ`~7K<^{a4!oi%$4r|)e$pv$qZCypnEe1x%PhV~jRol9RE zutB(wz;C-2OYE7hL<{FS#Io)mpG=SyWPc({(Y}hztZbKJn{gtZYq<7MRZm)H=!Wi` zwtFNbY=HZgyqJ>+Ml23?3xmyM#5}Z#PHERkozb&;JvVVtgD6V5b7eygKI(gA%+!v& zme^83kc9#f2-`5JW!pyyqu}IZ)GuT>%5~MfMea^gU+I;|QzlQlB~C-JzC{3=0;m1# z%&XL-eTPvv)Ql(O8_irsdsRH{dJcQ)%zRgTj5|FFlw^kl#t6M@JrY)P<30Np__(*X zBfJ1k&H)XQ<60!L@S}M}wdmxd;GH-NcIwO_@AWmjzGX;Ol4!IRCq7M70%d=k!|tOu z0}DCUAy`VqQ(1vd!fMnlKq_@K!mt!=)xx>+pB>9Ka zJ>_p}3i0@0j#*ifF!;q^M5&RI!We`0U#&HMAd8&e70P*T1cH9yXH_6=5r|tV<9(t< zj&<*|OPC*D1I!+3>c=@&JB|DwcUJ+fKOt0?YMO?y6G4 zk zKzjRViHP1Rj!z}buhVEwDgkEr-`tjZ%^{W-Va8_;`8E|^May3o$0@8^j*Zy%cd^Y0 zj3xaoTby`n0iDn0nH~i?^bTo@r)A%Ed#SJG^>jIocAAEXSI~-AR7{L0zfwU;CqaUY zqc(z~`{NYl_a*m5_*|ckcdWH}_Tla{Rn)+UKXuBHkO(}PQ%$Q{w)`?G5UaL!%&c71 zO8Lqvr3xy{OB_^t&mvVPiFtir_EQ*7J~5u?Xg;;4i6*hIcfEF-+f@bKWTYFQg*06S z@Q2skN|<`u0v(IM!RLCL50pD=Vd^Y*rmWo0eb?d*L|GX7p{mj;y!d)p!%)SgCmEqx zabICinJ`k3=-Bbs2%f@>;(f(FJMIS6ai-br>EQ8?b~==rfC@2j31Bk?t5(hIfeI9>OBKk>o&>X3j^&-Bm}V8VjVv0Cq{>7@)6w(>xGeN zZOUg`tbRrRU@MNKdgdj^8xFosuX|Z8M0qev?>}sC)Q^*;oXsVy#2oTN>=XQY@1Lyt zCVW=dC33rB9&1Hk=>;2v`B(-OZneIjcD=3zS*1GGCB)+j5EcCp-rYi> zC&1b*5{lx%R`W=hl`5u1W$byi)CN30W1|>#<9KS{-}P9D$IxegcEZ6;euBu^m5>sKzKdDmwjnQ&*~(>ERaLUUQbU|&eq;_NhP`3R%0g0_OVR& z(2W)}#}uX&B$bSA@VxhZb5oZ;JY<6-n7`9eJ@Vee<}r5%5}yBZa^GFrsabvT!mYdd z_I+dN?u%s_)l0DAq=Cn>_~vSl8FsRPuV4UM2vqsuAsVJo>+&Uuzs%!`cQhe{=~(F` zVgC90-{o`?6V0Y-mFiF+F@2`J`%7-!*-{L&Yi*UzMr*Q@VgsisV(fsUb=<9JNe!`nP>+o9`p2qX!28BJbw5!X~EXZTeAjg9gwL&#J|8YvITONLc z0cEX!>@e05F0@Vn5kxwOYUKfyGxvh9o4;JrAC*)mOx~TPn`A1_C6yee@?C8ckr8#P z4P{k5aU@ct6A&^s;BP#%+I{~$Nv|5r7s7HU=ss$aUtR5uu02#{?M+oHBA;20pqcbI`;GmPgXxSllQ?DJ^?q z``{Wwl4V_U98mEjUU+wpk-TV>b7YAj+)ftk7vr1P^4VEHoc>;v1L~=dNJ5FDopWS~ z<7Iqo6012o2t$`fzzX3B(CpHVHQGOt1h|?XJoYh>V7%u_-J4TAdH0!IhgGAwH$^;- zHej0U64v7bdzIT(>&LcC-##>_dLWG$ESpQPa~1tm5}#>jE7u{OIlZIHR;*3@}SI|7W2b z%!0)opam?{-#0uIR3RGv@ncCt#H8cNS57#e{BnWiAi(B{#n${=2A)4{glzxL9BM^W zZ{5F_O7N^oYLWhppgKzC-E!t8Mv{f=YT$pXXh9)=kYsTGTPo9QA8ho$t*Ql$8i2!h zoPyf1U(jAw4|`#C96)#O41Ve~Nw#o&bLsA(iWlu~6UY7oxg)t|4wsPi=zwlB4UyjS z@DLI%G#6L<-d?pw+MB-1V@CkXAE%F{bB5Ty(i0Q=E>-ww1m;Y!)+MDbRRe$d!b&pvSVlyv=!V zxxGuS1m+*f(9eC`pp=$^s#*88?~gUh1XM(-?PN^O#1PQniV9zz5k|YL*@quL=@j<+ z4Wr^5Ys>2pd~2lJL*usOO;%O76k{pQpyfUcgrQj07bSS zMx;xm^s3^Scdz_1fB`ZVo>)&`}~qjOXo~KI!Hnb$O|iVo=5rE=Z~!zf{DK z_WrzD(#RDW*h}KSe5*h$zV#mYUDbtgBH(To-z1re#^}9=a(X>i9V-QYF?uh$JnQ_7 zs23V1HsKLgGKRXms6npdOAhR%>k9fKH$5IpjhMdVK@64sQ6_xZ%}@JZQqAWXcE+=X z;*XCS;^OU4sSgs!>NGADUJp!rzgh@e<84K(oGPJ)Ir1M{YY&JPzdQYT#9`PaY+##z zsQw&0jvKMVdto#Q-`&&d+VxYa#}DjM4Ykx?0!~x#c3{D8zVi7z^n7i}-`&1zdaQ;E z=KlGy`{lq335W0V;@Tba+@k7JH7o$RaMK&|wV(jfTM2=dYB@~FyKfaYmq7F$w|51H z4P0B~9j{S+uA)9m5O~Bo0#J?JrTTv_&i>9Lpf|dffrU4;`0?vmxIZ3Hef@Vvm|8hY*wm#SGVI@wdGL!&bq+)x%ixDiK6g#_>s{ApVQO{dyL+o% z@vcEuGgwZC3TUTcxbM!0y1>n{-RV22+k>or(5-R&1 zM#^IHZh<9qmo}y4SaQzfF}t`Z0fxt+zW_Y+3JL@}YognJ$j+AsWW?om)*{!vZ@O9w zkuR0|Cl}8}1rf}4qFSiq8J$#uPY*ZgqqS;pI6QPJPm*poeSRt7aX;tIOZ!|8@BM|J zWIVS_%PoUsAICQrlJtE|5P#|>VWYK*9-%g}G#UQe`4q`Pk6-(jZaM<(sCwrc!G?VJ z)3s@&=9Jos*~1;$KHE*#3oQmEXY4UKNM252m1;>B+?xl_)v;}d9cZ)LlV(^D z-XSp_7LhVmLTM|s-DQcyMJ`+@iQvc34+3hcg+GAUpCW&OBiGFub8?3QyhkGvWMusv zQ}jH~ShXGISwm&V?!8e1rxb;;gpB4L)KBRw3K)A|yD-9rXXWXHly^`M*^fAuBnNTx z>(3))_TjrG1Ns z>+GeLibL+b6)G76f~CGKaF*mRrR9xuJ1(0Js|uy4r?C2#_K&B8&Q@6a>BIbig$O*D zp2*om94&rNRJ%?KDd@|da01b@>D7-|Z%LSf>}4|S-Vr23YemASea}S0=1?az&^O*d zvSEeGkv_|Yh4z?rUy2T!%C3lyLXG0?YK}V;W#x!5F<7*YW4?^q5_m@}t2#S51XaSC zs#$3!HYi2i(gsTevUAFb5qzBC17qrXz9`Hc`Qx^t%e9)Uzwwh3e%kOzw34-jwq0JH zN7W2B^kM#se4nIpY5#c9PbJRyHy{QzP2j-meyy!=+hA(hJ=KDY`XaWuFeR4ngJBIg z+-a*T+@ekbOQhhsCskmjg=qG+x}=xreS3qaNwBvqX;_MIjBTd(yURTlFM;0!WJA8wH51VUU6SimqQUD!f(KJ zLX|enZ}$EA`uZhX3_KWtUA~wDJeZkLv|g?ogsMRvhS9{h$Hyi3Mo$AjG7JjEHGyPDtG6bU|mC(DX^ z^LJV$jFw(Ef}1Ecs70B5g_r?-!UQzw1raU~(ed;2efn}@N%ih+h~v$)o;M6Pg#R~U z7auY$@_%3tj<|EQkiAAd>%LchUliftjC@ng0KZ4K1A6{}73c0Avdye2q;C6u!9uoZ z?CLi!nDf^SD;#E<_gM~rv%Sh?z3!)%5ruLRB4s6E#CC8LbYJK=e;ZC3-YJf*!kVUp z`q!WxJ!sjmy+F~B_rL=B&phirBRDpr(& zIO&P{JEJH4v_^*TKMHTq0tTd^ooMW1o!FbN=dQ2WaN<5be8G{x(00Pis1yUz z)l;mw@cbtTchYw7=KE=TZU!&R&% za*Jj%R7<_HT4eNB*z;=L)5qWQqH_^yv4=2Xu+V#ZJUboZg75@x zhocpwD=yl$aB8dFvBM^@dJsa&dPPUGMpw|>+w34JvMp-B=njPTk(*~8Wow9k>;~5g%`t_j^U(Vi7beVt4}h{M{kHz@bY0sf2RE)L2kk>htIAavT;*Djq?uv9aL@l-F8RyUlmdBzL}&sQ@o`@0J6%UvSq64b)}bh#dWmpr=H1|@?Ukk3q9zps6+ zr?wC{+|~9~4eG^I2lq2{7RQG?_WCBE`&4M!&vNbI*=a6ErzQ8)3M1r^$d!IL1@SrW zqU0OvE`jabhvhn<_n*8!Vi)VmAw$TjF1k|~;AUsjc6iH)iP23=)fjGB#PpkWwfyKF z{mZ?1<0mR~>hCGwedt>BbN7q4*FUYzYhOHfe?93n;gyw@fu3vHu%eFy*5)=<@k47> zNg}S~fVZL5m4Zn3%}pKOiJmX$Qh zPKoL$E^Zr!xRG!aj#x1l=u~?hS-{4o=6R+22&5wG2g_gF5k-?NtH;ngNom(VLVoUP z+2)VzBkPW@%IC1DG>d7?0mH$HJJNP6_N7_n8)~+c(f93$xajEqR1vjfM3xM9P~wPy z(7<#|D8nRj=5mJSw<2|<;NM@`|3Bzr|MKtuZT8V#jBAY7Tbt0=_ve5hi^w#I8#Jp( zP#jp$SV>^BA(8xN8F|&%TM7RJ&)e@})tP0E&iu|}%fc$oRiO*zJ=rlZ{P<-W<~lWG zD9MARnN8~PxD(#U%KF|%n<%rF+I?Eb5;`ItaPI}`<2tg|%UW=O7UzEdx&g40I0>xL zVQqe)uHh3Bb&`&M2Pbon$!Q1|f=0w8_RJCz)FS3QX;0FvM8%rlk5wHl31yMn%DaT$ zSl#iI`5y4fd}em+^l0OXr`B>Y^n6x2DiukdR@LA~B~Ch-zr#Ae;}p(PE{txgcbg>I z4H09OmL_~BQwtkZ*@4VN3XV`0=qtBy<_oPe&W`HCg-Gd(>w*>ONy2qg;rLDMn&D*w z+jDhrGN3eBY=l5kS{iny=Sd9$Ne-aGKsY{HhCTuQDoY8Tx03-g=^s0u6HUjEQR^#>#rkjaY`&a0xtd7)dHgo ze#Y0`rxrs~dKxbF|F9d36z}Rj3)?G?0=l1uO(VnZCr&z#NAK@lc>d$+oQfiOzvIpT zyE)7 zBvj79+Gfz*w3^3Ir`NGZf+p$UlKpETw73zWQ_<1>xFOyCZvuV{PC5-&idZ(GaLCSv zZT-Vd&y&Q!k{<%jJiqQry6Ai|U@7?MR1TL-?b;#V<1W0nWY)q|nFjjSX{gNiF6mae z3h84s%dj@ug_p(dOr^aRDF!CpO1*%8FmH1miXpI<>Pw!JAZw{{o3=Jk6EInAVZP6Y z_Y_%@2SSuA0vQN3U(_^n6{`4mI8g#Dn>?xcq9%+oO0$sq8}}94TRVvvj;605<3%9X3(PVu)`F#JT85sD}0Fp3&lkDM-jm}rD?I3#OrQH)dr3w_QX zH6M^8iuJG7n9*cp)zBY*Yo8Ay(_pcK*Ih-ZV?55RJRt@(!~5QdX85pT@rB=e3o7dQ}uJ;h}`V=4Gc- zKlo)Stb}iguce@}nYVI3e)B{HA9Xjcgd?2J@jCmg6DjF^m|Ir+jijzfzqbZI(XXIf zcYrQr)k49!r$KGb%|4amEk1;-ylp1J?t}jMR=LC3!}E4S>zs8xfFezueix36c*R1= zrhIuf+hAc{5*ClrC7r-&SSR@3!5a0@%x80(c`H)Jzb>)Ovm)n1gGnc?sN#M-_S#ItMvico~$iUFwr|NT|FXOUKMtxhP^XH(xq7~BMgP1O( z%g%q$-yIdbZ-PiN5+Tub+}mkB%=*rGon4;@qFCbuz3Yn_Fe(iHOyYttfiuHa=~UB8 ziseB56)`gmpj(*Bz;<4&p8qN#Ovqr^sMuzJn^_#&vNMV zGR}ziEwv;7l3aZDTlJDaNEpki5%`B2A%dOaCAUE^iCU7v`Ng`4f~?PVFYO)&nUsBUk*RpnY7J(I~isuH8E~ zD;vfQ9X`3?4aw3|_a#qcon~^;P(q6^W8Dbp`3$2;Yd_$ht4h z#PH5L?p$B42<&5-zXYWjYX4uPy;V?~Z5XcGmI9?%iv_1GrMP?W;!@n*gBA@QDDKkY z?(XjH?(P!Y-LvwqJ$tsCthHw!W+pjchU9zS=eh3d4&bYrSN`5K9i`W(H>4n!>ipBS zyK`=}-=b3b-BDIinM#-xAH=Ul`f0O-Y+JkTh2DgC(sm|T%@_<@W0qR;{C;9tI@2G} zs&bgtWdD;Kbo|GB!O4Y0SN+1drQ!KAb+ecZK9|mtA1QKp@7OFJvfP|e`AlzFcCE#n zNB>_<<+-{7{%Iwy;b{DLpOGD_mYOXCy2VE%UG>+BA+4WbT0{ z(!Y|mG#%RUzIKw$mM|cbb-%sAIIFjV+0(68;*(h!G}jd01o2uOO^}_T;X1U)ZV2t}}#E zs)Q*IV}(X@55424&-k+RS{%(bBn*6$GC3SEmyA*hL7R6{c3FL-SuQCMSTy0lr zMWyYF)a-~G;o)#3BZ$T#s<)hKxb&J;%TrlUN~H)kC;d$Yej~X0ksIoHD5w!mN*Uci z{F}K#x7R&c_ot#&QH_yZR!%Fs%VCc7!7#ScQH4g#>40pgs`Tz1ap`m%=1^(hE)wtK zsZFj&$n3E|JQ4@Td?*H%(Jx^lBea;>(kbqhZ}lEp#val`vw}5gB_FgyPI`Nle#>KY zOm4jly_u5|=b;5E9Z4u(VK*)x!yteA)Fg1|qr)P8#PaBV>yFm%mM zqrT<}*TQilnFB!Wf3duV-zzT#)@33ZFDUBW`_D>b4abv*V3}JqI9Oyb9Bg8%-o1_U z5TrJBUmA z`2DZjHx@YQ0nhM!A3j6GS>B7vTL2C&jd&(w`l40}R1w;MY>9LJqynZd__uEVonQU_ z=_7N|E$$5DIK$r6$u(S@&`zIXn2YPtYu~uF=QBmuK*UOk*-j|J7}YyT-0+~C_sEZV z;-T9*rH5bdSMY%cCC#qbQ+C}CkeF!K`QAoPWbyQ@jA5Wz_*j>%O-9qOJg)d8z=%&Tw z$op>719}p0TG}5>?4nj)X$dnw#SK;5q@&%fxNpW(g*09Vw(*Tvptz4`u2{r-y(ozX zAGad1B5C_r_IV*3*sVlDZb!H(3-&+cg>>`MSz>*(jL67&uJ1$2Ped z&#@iTXzr15p{G;JiG!h=oFZf<8;5p!YS4)<7}pWe#k03b=YkLO-q(I~+I-t^g$YCd zDgdHq-EXEa>~%BPKkmyy$;^Scjr>ck3e-P|-tfG#%+$1=6GZ&l+x?ajGci277g=7} zReM&Y!4{}(5tob*r>A!fvwcZ%pIyX1vv%VyIru=ad1R9BJ(}FdBJ8}+)c5kU)}2uT(949rwB`2g)N>b+6Kv|@Li*f#|Q zH$hSP5xe`YE`EyubcXSDE&0f^S4jPm{rNHg<11}%?h?%%(xJ%HWxHHw-A09Ip-`@M zpvA>VV1KV*4J~C-F|XsKE7M%8Q4dv3)5Tg_elE#!&2FT9H$Axfgcwd_Q5ob7I;lViKVRJP8RyemKWgTR2I_?i-)i_oDV!*Fw!JoeWR7Vq{U{s?_EB`3~s zdLyfP{4uk$kd`b)O?&ooEda}-p%9JCnn8`$_y>lhGJ6>{t*6H&vvS3Xj%=u=iKF1o z>1j5Ys?b6Vwl*nr6g}<>@$OqYo{TTh_(#eAjBPWALO|If`T=9YMQZSIPit7dS z9;K_ee_OOjq#OfUc3TA2XTG}`haiXByu1r9hoPO+Qdr121m^KY9 zT?~fPb{`Jk3;bJE!>JI+MU zZS0pKL$HA~GLcvqtt@xJuyac|`cs_YcAH58i(}b--FaC~#O(bjrUQCXx89M%Hn?N?oT&+7rDUQ zRCPS1016`sJqC0}VI8GUCSSdwgR%pOushC}km==bu2RbLa`D)Nj%?pqzOQTZkF*}V zxc7czWF>3sVEkCe>3%0MsHLa(HpiKCpf)3fhYS&>2vWR%R3 z<=R0U59<>}uGfQI?K{SmlRmMt6TXhbt!EYH>QZZ_R;1gJk%yc9=Glloeh%{)?pAaY)Bb4`O#{2=K+~Sk+s#3$yeU zN9!^YdlE4aUr=1j?&r%+ggZdHsJc0EVOWW==zUTaz{dU(T)_z(*NhCEYZeO`vn{PJy?WSD!EwqCB zZ&DP6J@k8=ja~_{>{y*~{;rh3!2{iw=~SFO`A5s`HH%ue@I8hc%BGmK>B^z}(o)ea zGVhX|HZD*P?~40DBvZ|qI-D0}M!F**uM$glz@rE#t-t=VlzjLm5WDm#t{W@Wk19~> zqjGmft}}ep}Co9!j83{!J>CFI%^T^+OYYC z+(e*Wc2?rUysk!rG^j~{P_+)(n{3>NPshokGv zaberG3GoM46HwD!ahHWd+G4WqfAlu`-LJU`JiCg2Z|Fj+r5>CH37|~XyeeE!NXB`w zuRYqFQNIEMH>I~!_>y6CcmaIaS8cls?kfX*;gQSUv0%!+BaQ07Qw*Hw39ru z;pXK3{w5wPQhI>~Eh`}RGf#+0cz$BMnCRPYDbB2~xtq0bNvRz5_xCaONs5PJO}Zfy z#BbzTZ}TN1RZc}g`!}(o7TmYH2<#SeE(-q0{liHf)$^}sgzvHodqbu3Og3*r?PpOI zO8}=vyNbWEKx}`%yxnB5MLoW$^zM)X|^Yar2@yG*oE} zk6`2S$i?p(ahlEW*6bo5o)fSq7cR2+=jD?(mrt8Kg90ZO@$(l~-!XBm|J7#Pu3V?W5dmb9k&HaEz%?`Vh{>XQjdk`_7 z?ni*%4@Z6R8+oq7{$Op89+SB2>AD(@`xBPH_WGOz|c6j4X(`!%yHw>Co zX^^H4kHi|@>hX}UZLt2+9x~8vE7j)yzM@7FM&5-q_I#dmG9yB1r3mo2#w4-1Nl1K? zWk_}*(RlG$0_=$#iJVtahSn*Jys;XdJ0RnKO^JUnA>O>!=u7n6WC zLga?fcb8Zy3*z@009*W!CyC2_%Ry3SI;L-r*aNp`Y}6q_D(%G)KZYddv?8y`3ZN3c zqztC~YZe?GNsD&=jEB)m0JpO+TvEuQIOxdZo_}J+Fxy}IE!eBHtetyXWJk8)>RYbh z-WQ=pA}EB5i7-`792s$Wn2Iaa2YE^oYB10xqf1IFL|VS5bJg*EL)^1rrKF;iSOg%< zV#{e9=lzmtPsoMeO|d{MvUPkPvnh$6LQ*XInD3iS3E1+J6d05}3M@8V@ecL{8Q7lv zZLse<8ITy8<3Jr~A*TPWY#rjd`SA_#l@}yDzZvX-GreLL8nek$ac8^2wa0%$^@+ca z3Mca2rYq{oj~t>g4qP=-0HhhQexzCN(l*qsWw=h*as~~j{0^tqUdZu)xiEUsXnJPj zF~zRXB3H&dw#;{d>(g{Vz%CH_K4u<|mORGm$~CVkY=f#9wKQ-pt2IwOWjDPDGE(P> zJ=t=B*eFFXclBf&aHM~r3O4OQ$8PiW@3v8;l~E;eRXWYsBi-}~q=(BPN>h4@`#nkG z!(T%*b=!(vAPb<&2Cqb9fg<6$F71F8GFT~h6@Iqcul`>0_OM}b2%7g{)mRMh+D=|7kaZH|atxO;`OL#76r6m6jI@rB$2!Ilkf|CpHDD zI$r+3{G}L(Ptmh9TCFd|g5oL82LLE~}Sxs5jaEO~Gr!i8NQ^m%Mr^x*iCG{5_# z#$Ygx#pJu^fsghISTZe%+0F7-rDNDM0QXF&KQ1yMqdN7w7+Wkg=)D=$`>6Mb=*?^9 zr;+K6gO?fGV^trF>jF(aliwLFJ*Z<5YEJiDkW6u^a(8*hAf~bsBJrM=IDGMO&E6h& zA~T!L&m0x(KmCL6wi4L?_KonlZAY%i(jA#DFs$_f%;iy~aAIQmwDoFFB41#4-QKC> zySkOr+IaLCx?uZ5D&IcOSFRg2UQVnE@q?VE-cVRXi6P@X^o!H;wnH%&w5k*c04l$7 z3(RQ%CR?*f0E|=X6)pexct97!!F(Y=os2Cij>JQfzW29<^`~xQ6#fNRibt6ZBS$y{ z!|DY(+g&tX{fqC>$=l61Zj7RX_yOeUK^ezca63!onk7_(5Qo&eytJya0L~GkXV)Wl z<E-NIYb!zk7i-(cahO4xBW|=$r+DA88F4leA38iBwJ1-E2T(JIH z)ghs}xeeGmpORYN&N1_|Pc(8sO)oF1{E9k5y+SrFPIBm}`3^a5->vo)bWejFS|Dhdp=eRO1Xq-Wm|@g4z-B&5w_-gM3(7&Wk$(4 zWMWV4nQ!YR>+smx?vQJapWmE-P0ZZRBG2(og<|shCo0I~k*=6sv?3TqbsTWlRvs{M zluz$?R6&^9v)$2|dTX};;?jk1VUy*HwY7Yacw0Q?mRq!R^VY42Vkv>ZWerNWb*TvA z;`*dF`!O?`U3}*Sp`ZZ3qY+Jv`F`w&R|8b#2A_Lqv!0W;oRl7lBd(I~3FGwullkw( zz5e1pB2IC48`P6m(JNb$;o78LB*HA|;8Db&BU-(Da+D8bUB_&Hp{+cr%dVu= z3Cv#;h!Dry+a|NwE7rgUnt9ixj-9noV_cr;a^-Ao$S{KJ~Ye&?%H@Ro$c#ucdso_ClbYrFRC^U~0_T3(OXn70V2bc6=px8xB8SD$rO#wCQq z#|?jfG?cq9Jg3yo(wXjn+Pz|;3kiw;i5%KAVAbaCxE%w9RktCKgmIHNnjZ4194g>0 zqpBG=U%1`0{lv+J!yDMQ!CN7wbjzA$rfgis4877aB~q6m`1UR z%VuUK1DGXN8QuXCa;0tsBY37p;?GoCY%1@M*sd=f>YN#Mx0x(>=;ylWsi-l$h^&*k z4ryZGU#-03?@y5SY9{v*Ks~MIQE*b8YX4a5+zzv&D&x&;ZLhYhU~mCCeP{|k#4L%| z5$BZDX$j;*)KXrbT%1q*eJDzLI9S))&w&-La*2{aNw~?59($>KRYCw-i5TA7Y6*Bb92) zbCf!vkpD|CwdrQZ{)X5LIMs9j9DD$o%d4u-8U$)%lr3|7cpvAzx-0*n*Ssg3D;`4gu}6cS=*fLvB(ENV zl){^9H(~CG#8T}+!}cABj#hgo`|}NURr;U1yzF5O_1833F+OJb{bT37I~k5UkHR)a zp;k;E+1{~d=YTKS^7W>V53FnHzDS*PN3U$DH%9yXC8%>K@e5Z>dGaxoh>UlmX@oJG z*{5Xy;L`}|Q;KJSyiWQ6GG;PL?*Q%5_ZeDBvz+_OjgN_$J-0nhy}(KKWthgq#-`vO zSx_UkD)W)A>xb}>3qH4S)<5|HvfGlBGW=JW4uS`I_hBh=d%o^hKPT6POA!|8AFRxG zy`N}=C+7)_K+9W}AAVn;s0;A-%XmR-^SGskLcA$Kfm@_8DlyG5I!(w1UBB{wUmyG( zNE~;Rt;NiEk8)I{R4!(*@n=HP)GIQ_NDMiawM9mJ;PKFWGx4Jn?zh20x4%_Kp&~nO z9sv0>`3E>O51np56T=Bfd(}m5xhwpt?)La3>-OmWe|noc^|^@tJFWd+ubJ@2aQ?sQ zqW^b%)k{HD>iP$!8#d_ae&epkzoTFeTNNUrZ1>m~HV3Fb3aGvpm2F{a?T<(S;5f)Y zIh1|s;k-VSVG1B4Z|8{Xg^!R9?tfONoAZjM^hPXxeCc=nd-wMv4Z334>hLgtf~;*L zl-$Qv;Z9|6p9SKL)a5ef=TV^ckX}n9ly%YDdH)UHk#D)GxS^Y5EXXm1`eul&!tJ5H z1;t$+{*n7TO9PBHwuGvED*%X)zmU|l#EJ8`%mu}6^}~f?Mpl@$>`mLznTKwW+FiVa zzJn8(FY1dOSJsPNkCPo=)W&e&>ZXXJ)wdO96d~l-F^k$xQ|D_sjAR}E)1J>H$#2vg zQ_2l_JR&X6WY1s51>w=2%Eb$RY8<6T{xLuMeOE&M-|-+pr-b);{da^`8?cvx`t%mU zwT*0d*UkUii-&W>EM0+Dy#(JgfnMr#B$6PvmwPV*43CHRZJH>z>wpLeP~p|8=FNc`QIc?bnDXa zS2+XhW}4i)4qQrJl`uc;oca|YI3k~ zf+-`^TVO*7hc7L|?O2Y)jy0Av(&tOt0^;_Xb@pq%){4Xm7I$@-CHquPCrkdB!hvvn zzTX=G*>->8D@+f>cye;mupPl8uRHBSRUDmu&+tABQCF_@I?k=)87SX)HwCj2RrC$@d> z*To$>JZfrPCgoR=)U@3=FE`JMtxL_!iv+ccQ1L>0J_iFU-=M3ER)~p`f29_El(D^p zKbdRlny}L1fDPLzpDndWPDsp3tc5%bfyYjH`pVhm(h3vo#zroBIM zIyW;`P}=*tnd|*24di zi%apRd`|5&^eQhm6Lb8B%|V3R*Nq8vQPK$=xO7q}7jjlXuaI42JRC~WcV6xhi>eAz z_OiLn`sLWd)0x@i))1YkncW_~tfTsuU@jWUyOrvbQRL)cGxRQ z+gWgWLS;7`1RfY456mCkN`|7><%i}%t*ynh9OzRj3tusPMKo31 zwNN#aE#Q#%Ujc3fGGpc;8%;u*iO(kS9z8?lzU4@SE!Ly6w%m~FZpRuQf7ZmC+So%# z3D;U@pHSx!E~vgSlMrsJ{-CI+_^j5{Ka$A$8Jj$*mB{01dA$z-lRqqHMt7ACl{uqW zYVx>H#4v9rn5wQ~8z8UZe-jic=R1wtl0E3bHZz-1Sx9I{{oD%s`JM@b^}&aGMAt5X z=sRPj(~a}@4QsyeQnNGL?p>)UIiIt$DWwfuKK)>en3f9JXR^T0yWLzPsrO4$fI$Pi zSq6yij;eDK2s2aTXZO6)ytiK%aZM3(uKrWAsIeG!QjuIi=ez`WQVI4p(@}SRWKRHY z>n;{IX|U&7B&3PyE(EyHWdz z;ftE*y~u~aVE@A+AyV_aU3NDC>MEZAUH?>&=!E)CEgORM7ZK)-=IC|9wAhrX=WVyc>NYzL zJz5^xO%hzkrN6P7y+I~t&C&-qR6chmGebiNWm=tT2v;#VG3wF#>u|Q9u=q#<=hp8m zR6oobduQ2Z{oN}c{F5uriO4r1oI?;uJaKWz%F|>uiweypF@Hx$d z($35ph2i-Yzoy=``?O?Ap;o=r<%n~c$;ItQz{o!CXNa-$qOH9RMTja`ANKn4DuCH6gvQE}BkIe$O!yREFl1C6@qHs_PA9{2AQ z%E@4G-|1cYR*al41_()=r{BQxLbp8URk+!qDQyTz&C@AjA&srH7MrrcK4 z`tVVrPehgcMU8{%S9_scp~L!Russq~c&(ttk+Xnm0ooiSV3o;AtCPbx@ufX(USh9% zouNqrPRarT8QX&14~E*zzK5Lx!@b{oeJ6|mmE$AjngD{5yy~ojj!Uau1Tx<@TSOkvoZ1Gmej|Ckyl#!umN(bG5u+i= zkL%4Z_vgN!vNetI-(2T*2mL>KLq2@{|JxJtzdHDJ0;UXpUpZwo_TC+LoKCDwdjE(7 zz$FMZbzn#SVIC??&E?s%WUYH*VOZcQ+>~4^mDUs;;ne7CsdYE(W4?EmNLHS+Y2{CS zHiEn?PGxRs6Z?315Bo@>Tevc@Ljk8|&6};mX=;YuS1agQPmak`y}{TY)d3^oq4gTC zAAhY@+c06q5XF#X`aN9K)jziPrZ!)s{FV!bKBd5f}6_HFuS}3+d0l=#mq-aCq9iZnS%g#^rW>M zM24g7n>CEn4cig>WrJ#fSlaX?9(%I>woRv zl*sgq+ic;4{fr+zPXM&1u-(_1h0TLrK_P*Kj-a|f?;HK(*FJe$srUfR->Nl_F9~mh zw*KQ+`shC`9y$)8S>i>bgl)c=wO&bNj7#7U0>Y6Wn6*YJ!lD!#GX;Y);Fbg- zdG?3h5k2iK`p~kZaI`Xi)$~dh;QCgSP>OylilXm_(bD-9v+IDBda0@j?}1ORVXfHL zenu~X$D0su9pk35Z$|t{y$y!^V+8z|(E%g>?nyMSCQ)t_N|y63g;IaAskM?7!fu^* zOwoRNIA+y(cGYOoDGUsUbbj%IpdDk=-USDAmzB#d@hpAXlj<5MF_-!sKAJ{dWts3# z3upp(LOJnyoytpy=lW|Gn>yWz1>xH#F3+UV;--EboGyB&?E!}y00{M2s&zc#vp9%OMrhs2V@@R_geQmIfZ zV*B7_0~*L@)(56kP7Rg>Wwg&7FHCsh&IfZp3V_0e~*X#@-Mo?Y~@tQ zI=1rWYdwzIy!5Cjl}WVV?fJ1K#U2<2im!t&YvOOmhq2r5anLY7h=Iu?lILn;nc3I$mdAF)95iGkg3 zWa-wUBTBjl{CHfu64dx#=#Qxa)H;{OYWC{L#Ez*G&J1~X?fA@N@4q)r$2?D=JOz&u z=3ak;;w{hCd>$-1I~qdX+*oMfuV!}WFo8o;TX5gGe#JmpT=j^#X>bLGIsgUBFjtp> z2!a`4Ec2nbdni_K@3tKW`*Fm3nKZP~5QH|KL_8<-Va{At3gdUmlS(BsYYt{kp?)xy z^UwOJC*p(%ZM#o43GR&lwx=s?w`Lp6F2O}~ju>n zDRvA0&~m6B=}?2DFK81k%q?OYAq0Qjm1|Hu(As7@+D6@GQo2xtUoKPp;42|G5nYtB zu5V_3(oyhwAt7zM#W_yaBe2HM(dOf(%|_3f0UdB;(!oGA4ViqNOETnr)qVT=y2}im zxt0Y>i(_hSM?ImW!jMgI7!pR}D_z8L@yp+uqaINLs zX;IH+mzar*6Q~ZT$V7G^Df_tqqhvwXdzOcSjFO0nWjr187eS`JRQTXer>!?|6JaUZ zWWgsegAh!m&odjq+wAeS3fLGA!>sgMd+*qfqv`MeRm5y@|`^ z%RAnuO_Y+q9DPnj3kAMvZpZ%WSDsFJ=`!=0KH=9= z*LV1hohl<+R-IHebYNeIRix~z;Sk6yf-kre zX)u=_o99C{vKiq_HN<6S71o=5_KFl-NsZ5Go0L>J>yr~t>lZ?kRQcP6V(zT8>sFY3 z=wg0$ukqRKG3bFyu?8T=@PJL~akTd@Qu*&y^bUU#@mp#KJRb1TW^}kzdP{pH8v3Yp zg@ftb9X-9RY03?gQP0m5DgnHTF|_w_+-io$T#BLp33*MODc}-mF2^iVlUY*pVYkyh z4X@j1`u_OMmsYt=-qFD9ODJq>vr*4pN0!x-sasjOTgDN>D_O&GL}#KI-{E}wuZbmpenGQWui23 z-+W!G->sL=)fo&1sUt*Q+^T1=m2m;_$HleCsl> z(tfYGz^174DOt~d)HL2^KSoeTco}}Zi-Ze;yzA!$0^@UK)3jbc{lHB14a-@R`ayVo z?r)cHKs+7gB8GO>?T=m&%uGs$(JcCM3*Xo%T!`|}Z&}raR7)X1uhG+`8y{N#Bcvm` z*)6^&_2t=^ay9S`jHWrIaPsS*{XkVcp;D7>dzZ^vn2f2sZLHxifq?8c)LvGJF3k}e zoN@YR+CX7=6;fT**$<%RvX$3arx6oMB>Yq?t*5RJjW06eL^W{VM zRz~JD&ulJ|zEkLC-k{hMKG0-Yu1qkRQ`T%NA6mUNc;RHiv~Fj|uq!vx5tu9$yE0)S zh&^T~bL9LrN%N?(+!*^9FCP{{3Ow?w_R_v=ww21${90|L)tKZu_tu2Gd+TQ0aK7@F zy+AT7L^lC2_OCKh>4$Xu1*AaXH}?sHN5u_0vwWIp6<317)Y@T>dUj^s2?JKVwBx56 zLbVSH>HAhnJzpPOysm~_Y^H~^&J1%_MD$zYpSQ|U40F?mY(e}?!}@k3=-@ZoA&M6y3r^ zu7*!_ko80&NrN-h9ON~r0k9@19qKH`ZA~DE_oWr{5`|hX58@u)SanlJR4#umzTRs2 zI&TD;1S)gJ2FlDk2hZSwnUUoWQtbOQAO$~N^ohd5Wkr;veUnCaoG_M9HvMD`?X{b( zh4Tp)nyp{!nTpoGd(@nuPhOQ~9x&<%x}HU`O&EX|QX32(t*Huug-CuU1~k~W8gnSi zh?MWRK8$wKtY1390BD*YgYtzmIpskDF_Twr`=|y=DbaV`9NO|9GgeO`gf zSmT7Ux!CB`>Zb$j5U~I1ZSkz^S{?YxP-6bJG`s%BH<%fh;6S!9C(K?H=7`UoYAb;1 zuUZiHHZLAQC>!~QemvJO`lxb}o2Ac5SoJWkyq`1za(<+6`^@JBr54@dcltI`;m#X% z@JrJrBKLpMBIe&wV$onvVGl|W>f-kph9 z5hH=`fN{-{b6Xq1ZY$2N0(3YJaDg)xUOBBom4=$Gdb!*GT}+ltV?KBK&2lzVCqg$yu_p zzZ|K=6A>yK6MxH){H1H^fl;8dxf3!2ZLnDWRbPw+0BUmEX&da#1<&x|0fnrrzHB5@ zu+TQ=T4ES{4c{r+ZPN|jKI`8IyC_=U_o*BBn}x*2=9+7rtDjR-S%L#_A}*ZFQ{uYs zG8sXb79-~||MRaGHDE1Bp*64Ad${^@8uV{8i!!>|G4)bL%D$~ZJ}WIWd@H!G`+~wwnVOX$)h$|Y&-{2I9pP--Ni39@z|Rh0 zOvv?D<&rpLHjHOBjOIz6D>Hk@WbwrvU4?bcF-tfvN5EgyD)BK-(*wQ(rAu-`9V4oj zINUb1;;tam?yneu*mRDHzfgpm1 z%z29Sx$ogWH^jVe3Yrm6(vic*9SY3; zECpwShfFnjP3Nw?f`bk{z?l_QU$+fOH+g8J!9$(>3QK$W(%wSe#RkC~3pzUCZ*+RF z#kIFM{mO-0J>&dlyA7At zM;W!_uzpj!g4{Zr(`RIO*$QSYTXKYH7SFFCsr5^#C4w1hI!KwWd_>f~c=&7?@o-!n zUJ2KA5`56D53ie;O*6gyoO(j5Z3{0_oWmhzSoa3X#l{%mgW_}ZfNHXKwK#WvN<|8c zPd}8ql+sdW`4r*2h|8Xui|2!|%VBrB{z}Hd;f8SJb-isTM-Rbi?(%xfIbNE^t4zk)#?H@7@V;9ArJBcXPGBxJgV<@vu^6a44Wkf_L@9;Z z9<8G%Q__O$I>tcK!qu@MXmfhwI8k<{-dRwhFxJ__)OU64N zZ~8z$VeWf?CL%^^Nxxrb@R0d5J1V|K!k8Jeg-Zd6oOe1a|1Wyvosu_qmRLF(7d7mL zf`<~?0-IVx6B>+t$->(bz+}vRZLZ;V*%X#Bvy#~PkR?^{n{c)1Xnii1%_`CCmdsDm zw*rxvw|t=&klg|kpC(gF_1)a985}h>a{j(N(*aAuC3p{-vhOcGfA=9*AT222F+ZZK zcDtP#%=*#DeZ1q(^`s`cS=a1$auHRM_%_Am0jLg04YZf`3a=IV3l}1{x7fcQXW;gB zo{Qc1=fI)@GkA6EOoR>!n-&UJrRh9!S^+EG@zO~H9L*NuZg=JjRajYR1`BM;a2uz>L|nfoeLfPrzLj(e8IFz5x9`5^-4qtkn^|~4;vtf^j;*N*$5HB)T0Va zM52i=oHE-QFyCY4La=VG$>qxKimv25F78V2*~s!eT&{O_E08##b8t0Rtx}?2HYB_s z>ehNEL~^Nk&?%R;kdINY)X*-)?Z(@3FAEi|c{|dY@vl6}(X0pYs_0hYI|UJ6xd^Tc-^K;H>2+SQLm5x#H6UkNmDcuO@+1 z6v~`LGDh9XYp!O+aoVH)d5n1U8rOGwP%jA;_aHbshP)fbXf4@-+B@h}v$c8Sqk6hr zeL_3Lru|qhdP78-(Eq$Y!9F71LW?pl_Z+e0AVx8_($|`}LO!6;pT=nnj8@s&xk`D1 zVSH}OcX}fiR!b|KWL=WKK;LHK5?Kl(xDDAHc^Sq<@1-i@%hLsq&#?@8peeO+nYvj# zMOQp?(+-0~i}=5tLGptS8;LAqt%r}Go(yRkpK_6&)B(VH#);06VPYTJ@m3NqucvtR zI!I+q>X~a_(<6$>{7$AMz5tiP$xS}rNOYoWoFDwkJp!-wg6Qyl^soN@oLnuk5}BVJ z`6H}!oy+F$)f|Wqi+hvS$QBiA@2%Hloq}s9W zpaQu@tbh}#(#NRMywIe>?6zdP3EyAEkfJH8ZF!2M!laZ5PvCYMQuWhWQ!dOm%|wMJXbSg zYhm2~T*%8(_aC@1)K%xzq|(vSExTW_Pr{Ta{mB^lvXF4Pg3EQ_S>qfA6RnLmxHq&< z`YxKiF!ZkZqMvB7`03K_Yxit7>iH`pW+Sn82=4tMpFgQbXJ(Iz)7th$h%tdw0kTs4 z11&DNu!D@L%@Q<{8E756chPx2}{s7|DXxS;#%cX46>5+GqoipGLx0p2XQv>>KK zi4K9eD;Fk(9MY9>tPYKmZ@j8iE0XrJ);H<&9phS_Z+8v@P6U~-z}?gKEZ3tMeh5Hz zlP&coZB-e-o|oBgx*uqgU7gtIW~u=-uuwh`Y9(jFAA4i{QP&sysSA21%u*49%44Q- z1}qxRyd5PXOKEspAVTj1l=EM={HGB&TCrQ=q^{>3lh1 ze4}kX_OBXpu%Os=uR+P1vHNTnZ}56gYd1Tx{5Nq0+0x02>O^h4)Bc!}xPF2a+LuoC^HG^PP8m}EhzLIr&YM}<@zj3Bo@zaaPh9LIyd@#kF* z>Ga})W@Pu&_%~o!ufXLW_&<;`M#oE3h9s>(remq3QM%k5S7dI$vfL&jSM8J?F~8cCLkKZc9sg{hnB1>Kj4TRX>mnaJ)TkIteB#{;5GDt44wCD~@9-28e zX_=wK99hzmrxo*UYnfQg@D|KZtoDNEy2D+ZtFTPp0^HtI=$aijI{Pp=1mo>-hoO5sQ6t69koWVx9_la3MJGN1uGV~xhF z2;glvmVdyoo5^}vp9s`hnk%M76`kbp%PD7NykItT96CC(Ov4GxCaEmrvgY$!23Z_x zozKrlWVH18UVjIy>FW9PT1v;r%p#^rj?t7aTwt*Ls}hy_{%tHZgOs9+0GXsB9T3M^ z`0`>UOk?bdc?a?Aq8$*`6br%81p0?-F%?n^VU7+~mg@ zwISR2H-zt7eVSrG14AQp`pkMrcGK^j8iN(LJe=<$L5TA+H__!^xI??evymSTh$nR+ z2<++HHi{bKfKNQ=7`e||?9;kW@Gm^I<6a)e0%rt8q-64db{>nJ@f6k^*4EpOw(fXj z)HnO??VlUzVL5c9?yx)0#LxVDBlH-Dj}WrWS@uYHyjz9);`oX<23=0_eq)3IIHzqJ3ES&;4>eoGnLA3ovue zu-|nceK_6_1mT)#E1=k`x>r*P5&rPV{fkwQnM?djp}^yGZ_fAf>17s6BvVRrj02uJ z{k66`-anazL}9Eh+a&+wc8D4)RPj7ftnRuGzFBitkMpF+Qe~BJ-?N9LK4d5e_S?^H za*BR#&${gRPamK&jW{iXY3J>O{ObU^C+Y(5bjCB*46rKF@%ulqiXV zmKXzm$kljxDJ4GA$-920tD{OzGG{2|VUSWhqk>PSP|fRz3sikgfJZ)5zU8$9@St!6 z=RAL@EQ_bZ>hyw}OE}BDqmb)(;;@?Py)oAv2+09M^b1q#a%4$A+@h@P>SDC2M3X|r zQ-sE$BTsq3{hTcFTLHrXHzr~cs+2k10#_%uRNhjz)V`YIsFBPk13e9>#ZBxV5Mb_+1#trQ5A!FudOYojLxZk%Gj_?cDhQmS9 zqT8;w1Lu=(>)~zkKn%A_W^eYJ*jKk%_G@9lP z`9jy7z}j32WtNkxbNWG>{*Y7~TE6SfxcLop>nVn(*YYd)$P)#fmmzlR*cOz*Otk+; zpk2KGc4nQ`7LOW#L9c-_x*a+~hwghC@;NU!0#TR=8z6&lHKt*ZmtOAJ=Iaezd{`d) z6rz!D+tFg{Raj1Zot_{qDrACp5)}Js53};HT2PJaJ?d4CB}WE!UTmKx=#j_QhB55b z!|@h-4Cw!JT;A30N8JCY?X05O>Kb+3eiZnMgyIfuad(H%;!@m-ySqD-;_mKN+*+VG z#XYz?A;k*8J#bd{-e;Wu;#{6HZiaB9CJSJ^Ljfio2I-ZL|a-^55!Ud3mTI< z;lDs-6PvGX;iORqi5>;hWuLdE<_0}L0Gl(%^eKD}c|87PjlDCcRo&YO!Q|xeaS_9n zZX6v(082ml7^Aq)&Bex%6t2v=T?|t7k+_$)I#522n!E=|iW=wcd$5XP{v4ohHIKd) zvbr}Gh0h(gxy#aliOHagRF<3L=iR3Vw=zPHWc<#n{yqXJ_RtHX{GEaUZu_65fO@?S9t?GTW<1xdhNmmaaQI!9HL_>(6 z?4~K17aV+oYsG;6 zY}+qD;~x~pCI<$C{!=`5>N>hwe;)aemsM-jmno%XjlrE`?Up(?8^R@d67Ur;Czm_X zPHO;V^8a}iKhjh}4A&r70GD0TnElm+_7S_To#Ijs2ToBi21osheo9`wKwBGyj1k!h z#Sk{WdFX(R*8k|8jT?F`e)N%(qiVU?O$BvB#rKGTd!3y?9YktqXgK6i<msDMAmS1W1My%&9- zNf^x{cW`@xgDb9&bLV&Z^a*uUn|ihfOkf@%*E_0Kfu|xl>Yh4(w&%s85XGqb ziA3D_Fk+hk8~JW^RnW5a!UB@@I(-b*jRP#>_`mFYmn*UOT_iH>_Xb3wo<>mA(E6&v zAGgF_vcNXtmsvcskWTPi%u_^MYR^L0g@mQjt z+R{@#Ku3a*TOD8So(Vgj0=M;v74f631-tVBB-Y5^-E-|Zdis3_usgGenMhUFb%&F7 zuT`?ze9*DD*-fi6d>MN8&|mHt&NayDd|Mle8@&`gfyrWJ+gPXtF$%l@ef#8(=saBt zXh{})lZ+?OIx@^au; z5Yk^8r@z05W{pAQTuw{G2#cbMx9+M+o?+e0_8U|h9NLxvn~$->Ip5j{5~0)BX~eM+ z3Lcd~Au=U>i%L2JY*vzDv~+>gnA?hBDag;#cf{$3IJv*0B4UjapVpaSU!>?pM@M%Z zp~Cs2+6V02PxnXk^z^6C_`;}I6#cw(++s0D4%{hF0a43rq$ zJ&06{$qpda5$-7shG_dM&}aspzM|FJAmrGY^IGlQEaB&DwjIH4T)Z6Ey&x!@$Mw^8 zlJdOy2BWX|96x=58exLI&~qffZ9`Ab*X>MXN~<3<4n|Bbtn2_BpvTD2lfA9e&aPZc zXTn{?m-&%GRcw~*4gL@gD4S&dllb()&Y2;`o5*04)uhGz4~JWtW>^Zz{Jn;BWQbb& z@}8i1%;8M0CX#YAMT>}+_D#h`7gaK&l^=Bs$5@@mU?QaLl2tL!@twwEKfyC;JUE63xYk2vhuIt{Qdk~%v-oyd3_YQL( z$u=s1^Ax8POR&SF{?$u!uYpAOeXHfD0Nfu`bDQs6s>JVQ-VG|X;}9<+zNYDd!$F^~ z2Tk|6Hp0eV{-N83!+FE;Ls1!>T(Sv&u(l0Y3Za_PGPIiaeCNymFv#w#87`sdkV;-9 z^(2L5q3>5Er&j+vZ{p;GwK15t&if^l6_VUd1>I^;Z0=@L^qo=vl;h7KJb?*(ZN|9T zS}K13jBWVKxS7Q9TV460p8Ru`=hhn@Lg2-9)B57q_5+QWmYdj88-M0^f8&$Gh`85t3z zjvXo^W_{iKOys}wzMGE`jPt`eb9^Ut>!aS0{*h@}Ck~zwXKJ`Btuwy%hmUQNDPTE- z^_@iZjfNAvqp^H1rFTl@MN$Q5cZaqW`&Wi)!-X371`V#W0TXeIV(%zjSIjoh2AG=P z@ue+mYo}hQ-+EFsB7cn!+0e%dP4&J%+WAeuHYrQg-atNcXpJ9h7oweTN}1G8yPt-7 zyJGYCh3*Z4?Zj{2WK%{rV|$n{wo26SCy{5sk$X(O_q!}o8-x}TmDKs!xp;SfWs~SH zk>?JHmC(is_8XVYPZFq4ZiwubgE&hkNU!abx*-&poOlb|OI@B90z}^CRFGDO5R%s* zzFH#EYgW;)^gz|Gt(#t2WHmg4TqHX1ghUYeL9B$pK3+(|d1LH%_a|dr3T%iMR^B*! zn|ulw%fpwRko)fIi&!Gh`3spNQP~;;G?d?_tJ&QlWjl4BPbUtntkD9&OPj001_x9} zXHAq{ksPyxC@eqyK;)pA(UMfMKwqNT+0ul+!%RyE@|41l4x!` zusFc7v@;_U_Us^kZh@0y40oV=`oV8Lt}-ZUVSrB7A`AmPX-|CoKs!mr+xF<)xng_O z&5f_IHmERVp0+%3u%|uTf z-XQ~Mnx-AhAB3ois^3S2_FFq*+4y}zy_6R?>c0KCdbS(PX0+WK`ot8*_4+6t`T60Q z>(A;>tV!W#G@sZO<&Z$8jv56U58kP%cXDN2j}ckM5DIsIUB7DP^|)J~L?b5$avI0x z{Lq<%HA`Wg?8^4XjXmV+!(<@?LaK$>xsHnUXmS1qSB@l;8X~YW_|TSVp4qjaXLTW} z^89`E)0Qkob}oZdi$ZL{j2D{a+Xqd87nsX{4Vr@Fu(ItFd=FWWS7*hY9OpVRl#Wgj^W1BuW3<%wX4L0Hl6R)S$PBAimTos=ZIbcE) zyEYKz0{-^+{^@TP8Ms`wiAlNTC>);`w=u@0?PW#c;8r-T?vY6!fk`?A9tr8rjsLJ}#!@MDZ}Z#y{2%i)&ur=i*emY6tFW^cOMy#?97>NeSlX zw_$ckDVFQqrB@gKFbK4e*vINld%9@?tpOU6H50(3lqBLgGodcE0OsV<=3U$4#83iH z@7LXp*Vu_Y+-XWDEQITa19KO3=cRS6XSmjgj)GmyD(T$?Tp9AEu*@C|V2$C; zxfts`!oag3x3u39(f=3D-r9f7aTYO#qpuX75H@f@ZZ znS{fx#_|UuF22uieUewZJ;oC}h}B?Oy7qW^A0wmr5UHJ~e_ing#l^)FpTt%tfs2;M zg;RVqgljxA80Ss$Zi-;!C+!r9S`Dso1Z~N~q1>PJ+Ah-35Bg4C;tY`N))!P#G0F#= zIocV>hyS_YcMJOe?cM#qu*CU6WliLm69#U#&!DjDI1smYhn2%w<>CVGX5V!1wxM%l zvC7PJF29UA-kc+8*6Li}bGvx6ic-GApJIuWm)>ic##3WH4)NDH&gLx%J zMD-u50`Kk_AB2x+@Vr(#-=iPG0$5xO{k{3|uagy9`?N;itv7zJ^Cc;PWW&G;D-Xh} z*hf8t5&?4H&!TO{+2@UNhUvc^8ygScbj%2d$H09720G0IODs;-Ztl~yr1ljX@mk>u z&<9kv+?=ILn)+yieK)_b0iak|-`X_JXBdLr_k!Jqy9pB8$HiTBi$y7Yjzav~f3Rjy zF2p)4&u@J6-me0JjTgNIq@|lQ$GL4BuS~|Y=$<;1*_xYv-qwX+H9ONd>iC@}!#g*4 zpXX1D?z0RH*jcs*=>q-Drz+%0vW^;$l0R5kUr97FV{G^EkLQ2=nie+FxINPHkfgU{ zukw-j{CGyodfb4Ej`XivwViE3Tr-M^UR-+MU7gnWpHSc38^iV@%I~x5a_?Tro4N|2 zm=A5&Mqg~XeE#YoSMCSUx*1%&HM-qcuNd>!Qnp)M#({$d+KBVnsuvw57}Whs9KLpp z7ICsYx4;SJ^mL87^;XP8UZm}yKay4~f&`^BRwe%+L~3uViz7pX6?0EXMx4@`(B7aE z?JMYfe`8P1e~roejuxdXelagz(W+6KMJ0nn5Se(SHi0Ogdj)WmvJ!ivx+H}cH0c`} z&@)M0Dg6r6<5M1q_GLW|4^8G$U!8&3-N%$z^tVEtSHC(Mt9Q%FaU6vqs8_59F!!!{ z08@k$v;*w4bb0M9nCh?kupNi~g#HuqKuhqcLoQXVot7&6yz=KT(*l4m?{wVYoa^m~FD`NRi*3tZ9DZ1l*i*2M zuQ9tW1`TN4T}pRrBw;Fq_B+Es%WE(oH!}?zHX7ajxZF@_f6x{yP4<0TJK1^t_QCmJ zSg673QXe3#<*x#By^XU_Zz5BDWc$bx`$n8W^PX;Ej?Cd@bj76i%NR9Vb^4!>m+xky^FyU~_tqMsoo+%mhgkLk#m)8+L=5*| zzROsWijt>5{by0O0Kzqz`B~``C0NOLp`{Hq*@shEVUVXaSsTYP}rD-f_kGYI3Ca6 zz?+d_%BSs$&MpTXYQ!tm+(ZaMC8X}b&G-aPqBTB(&KW&M)?Cg^^NGX@S%C}o!b!W` zQjl~bmEuhKB1mo@cloRP39SUwZ^}7WYe+AQAH!&Ev?Zz#K?^cIevP?F6w(OI2HRfcj z8j=Z{d|;rheS?iDSwe$V7V_^;&%ZXPoJQud`;{p}4HyJw2*fyC7lLdhJ+y0nT~Ovq zgb~dF4^{g4tQsBjO-c*5Gdp~!AqnxfkPSE`mpgYauB0MgswPmMYb^IM@-jY<Dg;yS_8h#$i8Ik1YI{*LBKBKhW& z?Mm&=QcM|NTWnAo6^}dB%USj5(2uh?76pL zwF5ZJHvtt?Dx0doFjT{VEGkDRu#bNbPGHZ($sa6G;K_U+dC`5W+|?MwL7B=ro+y8li7tqCDaFPIXyvE z(Hq`GU){6qG%cXq>YqzE4~G2?cQ_L0%dCoi1JvHH9)_QIHCX^u1_BIuL6e4?tXGuS z0n5VB9h6|ZxW=*kK11N}pBPg9^>OaNmmHvrG62G-$3eX+H4iPfx58A-^!x>Dq!-^C zxru|CS>u9nsKTSvC*Gh^|DpjopO%mBGyko8Y>+TsqNCCgAPVYTt^>?YMU`F10H_%G zjw}bSFn9tMlYOIAT+6{N7bMrzPzs_X_j^4bTl%DQ)w6tWm99Z-xeA&ukUC5;PfXiY zzneARm55aN-8^J+(39@m;ztKcaTpj`p&C{o4%1XXo43@HVhB!F>;VXE^@XOW#FM$- zkGjkR6}Z_%$aMwvzyEEfFpMi&F&$3!YjoeYN8-NsEp16)FgEe{2Kp~Su7W}GaTD3iMFeQVZ9^0umc-j=&w?~kTXnw>vpdz?d(l3-&xKC0>YcasDh z_^*o{vxSK9+p#L?dZ1@9Nm_6}0g^$K9mNv9uq8esuVVgE^VmlVqo%NOV*2eL24(_Q zco~K$>v=i{2oH5H(i2^sa$3dVQ`!39%PC&iC86dn(9-P~mBS+4U0t|>l4eEr7fi1~ zt07$1G3aBt7VGlf|c9j?aAIpDpuo=o1cw*r%loFrEV;* zkrFf}-Ld0@!GXg@`f8D=w%?4cMP+r$T1%3ayXKNi`l{OU;XgpO9vbC69OU{*W3tDZ zsA1849rwe!^c5_>?oHv)3H|4UTb31+Z6eIvXOadkMo9yKKLjx5x>dE(`-r$zc8bx> zsId1bbNJ=FX*kKDo;M<}Zs|$KUa$gU@z8!K2wH?i!zLX?0tm#8pQR>^11*=aAVlH% zlD3j;6!_uZkcbEL-}y&XG!A&uPCGR;saE+11k zvk;5UfSL|WB`_Z}_>l^p3IpP)jwpeg9!CFU4LGx;@gpgy{U{?hMWHSy9vQHWPA$Ts z<$Gks4C$k5)fyyq(|#pp8RZd$v-32g4Dbjzb8|n9`!*P1PDvQfX#y__P$;?0#~j>E zOrIW>bUR+M`eK-nIm^UBl*7KV$SHMwMg?#_Co!^@XSN#PHN`6W*tGbI1vNW@GX6A5 zLD4oLNWF8>Q>2z5?1xSEABss;HI z>&tc&F2p*Dd^35NRnf4TGt4GHFEx?uw6ex6o*JyJl zn?J|SQP4=RhM?W$9xHFDbOkTnRERo|ZZuwoa>kUKlM*#<(fET$wTlG~^p8yBPv~do z;$5ZuLk9q{1vdIf1O<-%jdv$@cRFakSz( zvG^1pv8HJnxa-xz}{0^u< z&wM^8v!gHN+AqT!CBnbw&cK@Vz^48O9|SDP4an*QBi42c<;tj~%Azo$KUVB}j(&Ce z^|(|G>K~#u^*ZqyCw65B`yHdfBpz9oiJinFhl**s7?$q!!Wv0?!<{qQzl3Qh zNxA5Qsz^Fxu_b*bi}dV7^@=To_Zv~r(j{lUbXuy2t~nX-f|vB7c3waW^6FQM`#+EW z%HCm~H7bEFkyGV`mDQ>oPkyC36eQ17>f2ql&$+vKPsgPb6U_eytW$-7>@RS^z{QBm z+S6s#pI*|j>QrD>HJZsT{6ws>pf*4f{pDivKx+BjJGW7T~2t?`SaaE?iX9j z%~-aHZc1IO8jZo^^>1Z)^glMV4ZreNq(65!yzO$Gc+sR9b}x~BG1=)M0QmAmZ6mUB z3uhU$)qe2#7~e@(WQsZ@ZYM3v_9dye&(4##dX$Je%!-GBd=yg`aAi&nj$q?P-bM4uc)egiFI%&Iw-Gt z^9K8n#P&3HEHKQM4UyRB$+iI3uk_!Ve&gM{`28vV1NH<>5YR;Oxb?e-UuW)%rKoUW zOvs1AC0py`HWiAA6v0=Z{$naj*KhHi>!{C_m@Qn(8pr5f8r({)vQ zLL_VRjO!I!;4!|Lf4ji@eh~9c3kR$4-CON+%Gkq6g0oRRp~tpOS%o^b)@x-2SXme8@!h zs$hCVi(Ftk?*~{mc<$-8lO~%Qt8;!lu$t>*4kpCm`jM48D|17dfU?i?sTrM(jZHm{ z)-oGaW~*R%`4xGiINi~FfYieDUqFS^eiS>`cZ@Xah{JUEA3c#HMd7C>xKs7+7fq_H z)3?H)VYh4ic7^9JWDh`=LBH)-LAd?$BgP?@>M_q^6Lq$~&&jP@iiYp$zR;bQ#mMUt zT7C`(qavNFSv9L$!9T2eTo|+;cbowA-eBe4^3i_+tKrWS)95l;f*{&K!!khezJ&L&Xz4wz(jI{IhUtXd zWIFlhl3d9pB(4MkQvos*Jk^}2s0jE4ON)RAhIPvtKBFS z%GTcOyFoe@yaG+KlPZgA%`qI`(l&PS9Le=T`0|Tb3&B(_n&+b%&B%t?x8@~34`!{- zeRQ2ihF4SH`}@o~rUVt(dea)OZea*m{&PgK>eSJQJ@3x4zsRIR3=*^=L=BV~^ zQ<+x}3pQU>(eWjt@=?G>Qi-1)@oVXy-VJaOOveqQW(eleIv@N9Y`P}jM0oU9B3ywk zB521)ttwpI@RE4xx?5ou6mED=CgLnOIR!c{K1Pz&VUy2*b!UrJygj96Mepj!K-n#! zEp9D|`o%PSQn4n=UcYDySxSbz3*D3in&?s>*q3yi+~%tAH1SSp28} zn*?YTO^*tB$}8&nGoI9@XI5OpGwq|@UE=nGfP|cLQ6n)BEUBPb0d6#^IuU-`y{d0z zG}K~JN~u{%lcoh}5ib>`^DofZk;(jKx)7zJOy|6nW*>=Uib+u?NEr<0SrY@wLN^c9 z>F!;Bi>_d^o&O|p&ttsWMP@-V!)fvCsI5>qxP;ISWVlaQaB7~!*4Qv>-JPzd-keSd zABE`)QH_+Mn2VVcuq*i|qgHTn(0e{>xQK*bYdToO45xbF;qg@li`JwGrTrj8_zsg}vsfB8A4JnuV4B zBN|k9BFCt752tHlh>c)U-JkL_z#=63u+^NmGRUYtpUNZ+&he6bqeLPI00TEWR?`M} zDP%ag&6>Gc6A=77L5>h0RT_xa>_{QiKkZM#?AH8GZYR|7pkT<-N9 z2G(=BA`(6ov?aiyISUikL+{VEZsYwfoxDOk42-&&S4iK{QmwrH?C3}~fD0!q&-|U9 zo{I-9XDRRN&LBV`5zK|V_hDP1u#SBbo97|@u4$7wG8z{n}U}MqjX#ig^_3Ef=S#MQ3re;qy`Lo2nfPsIhV$O_fgIz`>kvI&QXzkOt>);P4wK&)Yta zw0%W2r1}A1({5tcvZwhMY?BXyU<`xB;QOnEJ0neEYftovdQWM1&Gg{AW@FX%mUoQU zcr3#H>KRPypIe?K?%4$=$f}3s4x$gkM(Z*4a?oJVk@I4r70<>c(}pb%GLRLX7Df8K z3DAtKO(dq|<+G&yPj1335vfD>Tf#-ih^DWvgMJ3U;i{#aijZ?-@S8wz8!BRM|BTle zY+!jB~NxGCtc5t=Hwf9L2t3q0^+NQzztxEb%T6 zlHuI$Q$n(|n;3{TDb#+7NKFU})HrZA&PCzQ6dj*({tn_I!|IR=B@Tb6qxd2}c^sOa zMp!v}?EVMUjIuynzu0wUa0(-WY-0Cr4V-bpMe1+P@U>|)PjU+$T zoXX@~jr5ylg_!EBRRb_JGA#e$YBLhu-gf?U^Xt#&rpc6e#@!dR4NUYbz02 z&7$Rs8NZj&aUUQ9;Sc$0ToYZ(D|vm*jZo#-l`eKiu{Ix8Uac2Xi3>2N?|&A_{B?JJ zR9MR8&%)pPxosiC>JLW4#qM9j*d9d0WnwQx}Ok!`@_Je%lS2xhcve`ZT zve1Y}6zx_fXQ!{G4qmV(3JxnCvI(9Ywb=a8N;GVu^o?4yJ!Z;B&E)`VZ!MQ%l}lr6 zxKH1(7c2t*ygS&XRJ~1}=|7H96;FBzoU^(+wpLpndf~>NPZm5c4&C+>{ewOTDCzg6 z`j={i%F%|CmaSLh8Eji$ZRJG=d}%LXUNYI(eB>Ob_{~PK?PzStPM|`OL*wHq{yOh| zi@)pU)PaLYsa$&S)lP!4@l599fZD^^{_35VtNm*OZIuWuEdpTEi)V|Nj?wg_{Q(gj ze)jv&M3@?Kv3exm))bS0)}WJ9+1=%#jv-TbFZ4#xG9?oPhU_+_N`CGxlgphN^*dz8 zq~GGt?kF{?AbQ0H_~HV~bDBB_NK;>yn7YL&qNiuM1Rh)m^Bg+A_4|Z=Q!ou)bV5hA ze35Z&$%{Y%xag`pq_)Q?TYhaRo8OF_QpKN(c`d8YzckS6fZagP2Pa-OfFiK& zLygVUwpjoPYwV_nSMD65cn0futA&H^7C4-aI?&=0s!9rZ=q2IB&@#VxW$EaqqNz0r3R=_WPz+SO%mCaS& z^+b@r1Kiu;0^T5YDM)$IWcG~C=(sIx7PaT-L^_sl_?$RAm43Q}8u)7@$`95bhTcq? z2SQvA?wtXs^nmLaWv?%(VbZnfEbYTTK?hsgCCuS4Ap-g3kV$QLNEO|!@IGA$g@N8# z{r;Ds`G>Yaoz7t0Fs`qsSC>Ctc(@C8s8i_u7T?Q+w^DNutp;)R`^C8c%Kd8gw#Nh)X^W7sX7!K}IK+W3@MVYW2)Q4R-qWfi5Sr=0d$#Gl!ePtylWl ziB_2|=jRUE%iqDE7f6(*C8UDSu{G|0b6Qh3O zIx4AMzM=o79KYc*S@A@2M}4d+dYlFC1@{IqgIF8tesu3=Apl*^^i~C-fGXpaL7C!i zbv2XLkMC`mc%!+$3F=1CFTTwdzSH}*KpzKAf1@e0Wvj0i`&{P@l0M88M>+(Hr>pS<%zmm1h7f+f|? z%dYn{k4nR-nQh=m6Emm*MmD>al8z=A9oAp$;*p+K7q)b zT0Yua?ptu@qa)W$G8|#tXTvJI|J4AOOMo-6fGV>5FqMEDtY^zM<2LckaHT%okbBnZ zeC1T4qpye*e_p(LrBghpnt1tsicsM&+UiW%1vaBNlRDy2U9l!z`6?OGTzdH-;sSbf zQ^}Cs#Y$brrr&iQyRXGVCQ3a!V#T&|q`?maacjwpz4g5AzT&`l5g)VMb z)c@X5bdT|z?;HM)ix%SZC)NwyMTL2~z&$)6P-gzSp?8yn#(PT3(Pj=QVvq!XhK>L| zU&18O&ck4)8B|97MC}2Duw#iTgK%RhvTYWqJk-R9H{a71jVgRXG~CyTTL9m@{lITDz!9)GN?J1#8D2$ z6M`jX9q^?E_ZD8saCXbF3|;$0qmo`1H9vtrDaE#@A}39(0Ya^-vWCYEZq3bZvdodK zq5c?RBEe^LMLOyF=GF;mNW=%VqmqXo>n+8<(JBwq%Q-LuK4GX0%@<`0vNq}_FN6U0(`I?+y zb0#8;E}!i_$3)8X*1Mikdd0KwY+kV6G47ox?xTRD8ev~#qBYs(W~jS@e8W~VzstLv zbTcyFZj!-$$K=nOs`xsP;l1-)L2qoHY(mrND7jtEfn#aDuYZ88(Uz}cB7s|Z$4tjy zWRX5tUiAS9#Gmua&8B=&YwL1FK@z7U+jyizFCsSL@1PeZCnxVxuR8yQ&h_7KoPK^0h@>J#Qm4ew R^XmmLWF-|PYQ>Dc{x2%Ojy(VX literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/ripipe_b.png b/model/train/yoco_moe/sources/images/ripipe_b.png new file mode 100644 index 0000000000000000000000000000000000000000..ac35cf1882bfda6506f671e7f3b0d069e1c331ab GIT binary patch literal 52574 zcmbTdWl$Z_v*?=u!QFl1kl^m_?iOU@5ZrZRK{xIW8<*e&cXyZI4#D01=3npLSNEKH zA5Oihk?yIrdTRQ^tnOaF2CFDZqaYF@e){wYMOH>a?b9dd)_<}$JnX+~JPfnMzuRXQ zHEFR=Rg=U={}dQYQAN>DpK4=~UX8!}(-9nHbX`7uLhbv{`FY5p)a=uz@f%qQQ4LSS zQ{OKj{l(;WZUvK(I;ydhbaD&+!G5Q-^wlKt^__F~wzl>6rW(7Zvy|0!lZ^Tvu2aXI zG+qk}Ju_D1p!b6|b~ytNj*AJR5ydwjhzsS9zdgJMIX+t(H|M`^;f;EO5wK_!(pZfD zA3>k2rr0mz|7idFF&@mg#NvM&r^13|@PAKCV{sDgQE*lKe=Yq-=>PJCeTMUYoBlt> zu>U`d4L^G4ynUo_}ICE_cjrIypHtRo#!SXYp2?Af_Wpk{EJs+a~L>aElD@c*d)`v zOE02K5&ujBy=OQhBf|+ajAwfKI_wpA?%sWj?bYlyCKw-H{_gCFxUl)%k~;QCG;k=i zFAOcOriSGy1OfDy9nz!h_fG$IZ3zr}yIjE<9=oY!SKBlT$P%xwkKFL7s9COD$+p0D zE|+MB>zRZ-w8Gu(hPp=JBs;vk2jR7_M*OAw znEXe?NQYd%vQCJh;FfWZ43q&Imx1#}z=%P&kEEEBzV9L{7x<3;3IGBt zJb#A9{EB9>rxsG)2;_es3lE-foU=f#sYDI0bVo3dG;CwK_WZ6f=&MJCT9UwN1V`r4S$ZBQis`7$b>Q zBipXZ@GimZTUaKDFRn0%;Cmu6J7g2~ri$ZV7Hf8S>%~vg+SXcPe7Pi#5$ReabVj7) zf#F{L=#Tk|ZG1YbEKl)mBo&rHi)bgFtWoHStXThnleJl#3O%~G^9-7TwVcXLkMxC3 z%J7vu>CbpF8FSimox{>8DF^X#KzC=@)sce$H6kEIX_UbZws!)B_G%Ii$VH<3W#q6FB2 zR4baAYq8QWtG!<4@Ep4ES4?}3{M2`!D9v|qO0tkxC7m(%xZDaK4)TJ1Zws`fX|)Ul zpA&^es0iLk$r|yiL+(++qLLMveF1>%KLOQ#N9^!TKOlV)Bi}y$c6|U?j{{ARiTGun zzw6;ecc0hi-MMo6-Ha@xdA~KO@EW%;5p~O!{3%wytbW1d`nVU~c5FqDwfsY$IT7JV z7x4smmL7i$vra<$iY-Wl?=TKi{4T=p_YrdEc_~af5S$*l5S)Zdti&BdycXSn=8tOQ zf8&PZa7Q@PxmB||{bzfqH}(w+_x;8SJ2~PE{eT7CF}#nrwgW-=eboZ=o;hXG|Cmmp z?ti@6x$W#z>$1vs>hn(TdBVLZK}Eb!4`vF3zN@sucX^U#{V=6ueF;^Cmv>}GSLtb@ z4jlS0r$2w{E+{477PXphyxV9O1HEi+h8+)TUp^06=Bs07j_ZH6cj}g25legO>qv7u zu!_nsqybwo+}VBfQke4MMw?7HDDRklJVqV9vLIyil{vfw4ELVhZwr4MagE2=_0FN` z2Xm3!dQq1yEu(i22ye!IxgZM5!;0wVMvGb}1Afl?vFL!d70oY|+ujWhhpGKem$G-C zYBRO9sRZZv!0(PYI9t+Po|urg4dyFwSZWV- zpg%?NL?Q-{1S-E4Ntjy^67&1DO}LD6<$+RFFKF#$i`&t}l>~R4M~V6d`m=Tpw8MuJ zU@-evPqqK0WbkUkTkY6MPpF+w&E?xi)M^(iTwietIiJY=PgxFHOaJ>GZq__+{6^s? z-{d97UR5L}*@%y`@~&=|^%KYSAxko13_edZ!^eg@lQrzvpzqgbH#2#4FW9h6%KO{)R3D5aK8e8@ZDd60g7kM}D7WRmV zwEgOM!=fM9D3C@CCgmS|A<+kEGH{fp156WtUAv2Z^A?o_Ya>@H68DR6ZBW{DE=g4L zsAt5yD=04URfC)s*fX65cfb1Fi}F2B*`dBC6~@Il=RWNq|F8#*xJ~ADRS)iFp|-WA zE~K=3@MS#zW(}{S@0`84T%0I66`9Fu*m)6>s5K7nyFGt#?=%EjoQh(0=;nx~7n z8PRqYuwoZJ7m$8ee*>L2z!p-=as<2HJ}eAl`)8qb!Z-BN zdMDVg<*a(@Uql$SvMz2a&dBya#HEV|O?}+B);V8(o67@YBTM!?gbq(C$#y>Op2@go zM|grNiaP9txrrMP1H-`x!;AlqaRd-_xz8nWEVFY zuC86W^{RzNF8-r>GEDJ8gi7%;z*YYxOiW|BdVu+#g?9hLI8hvI&+!LZcvJo{l}PUn z!Je2jni@T#A0vheJ$^GW6a0YU2Z($qbrEw`;u(9ePT=xqDR!rrYsC}QlEJ$uJMjE@ z5XYR;2l3t)eKPk!ouS9dDf!Yl8$1Nt>*UtH;d4dm#9OU>juGEx=#};;ud>lmS3>-o zfL*3Pp%Q`~6C%+iKAYO|K;z+9=;zpeSz@2#E#ZIzyfPa@z@51W-}#D(MxL_ug5$Cq z-3FCFyAriQg6x);3b>Xcb&=3`j2P^jwGHZEtRTe02#33ZJ-!Wa7Lw0QlfD!n-}h>4zlAM ztwNRSi8SWs5;RTWEwSRAZwsGH_-<4#{5TBH%Gd|BfIMc`at+7yg(5C7gvP;2x zZvHASJaky#{Bsy+CAU&iNOFeeh%DXzCthikgEmfRR4F5Y`p0;WkYkogsBOnPeUqP% zeCX)`v3V&d?$BB1^~+HKLo~&SL@H`?ac>Zsp+S&9tfICfMv6dD!kdeVDOp zva)Xq0){Ls9A2B_#dY4}7AI#K*Dn>1x-pTgw<#aZq-Jw6v*OxCVI_ciIO~Ft0dupq z^zcV9qJawyw(0dSl$&xTJ3)UGAf;q7owo{}D|3JS#fJQ=G;{H=AiJ^$j<+Q*|(qN@yh`5w||Qp!|l# z1yom^qQKl2$JXR-GTd4FecRAcILj1|z9fqtpH6{L2&;+VIoElzg-X931+@HOdxdsd zowrYFFv1jsIF$C~lIaCTAX4~~ykD9AxaXy`Aj2sAz?r2E5N9_g|0-TRC{pDaxFL$w;iB8-m3Wa#x3tY0#v0INcD?iMprye7lr9v z*I~OO9MAOBwZ}ph3mVuC?eJ!^4H_EeA*J;9an6mdMgZ^Z#4qk#UwN1wrRlL)OiSkf z=B3dRi*2ZvdhPYpfs2;$wL%_wzvx-p)}|voK-RMt=*fM}5~6M@d>#C@nf%Olr!H)D zVg;~3(F>IyT8kJcu@cc=@#K&$y-MLO#fw&|pE668BFx=)~gVOJX5G-HeOcdm+ zNYdE0mE%!D*f)ug`d0$bvuC?f>SZD>%)lk*iDwW&lw7c6=riH!iMxTe`0BIOaVpvz ze30Wv<|g94MP(7#1iv`hMzlE9g||4-2KT`D!I4D)-M1@g8#Itj%Yj%ODGWcf6cS-y z5FYV*6C$@WH04%?F6eIdw{iFmuoyA;#V z_T6!nT3li{MhMz32C4HU?SNR|Y=}rNld7gl1zb&90ze*M^;bivO4rUy;D@bE?|zBK z@Jqu^#g&n^Ay#JDwp7zkxaj8km&i^9B5*#1TgVJ1Cc+qMq_kX4=D9Z$a}iX0Id)-W zslAIJPAAqz%szfACqPf^B;;D6kBIMfu{pW=%dZF@u5Vk0=koA{50Qp-hzQElSR7hP z>nT`?(l=D!emnu_pW68E_je^G(z7xBzv+-Ih?#6SEKt|;4e+smRp%?m!K_$ zgZ@!od{s7#g^+?mfJ>&Vx#Zx23tZn>{Z6dIqDj)>(15xeI%el{FyS;84$G}8C;5zF z2^1WU{>Vig3`*@{dMtGU!sqdh`H^G@s*^M$@z4t|P_@ zfy~ONG}(+dq2PMuH~gPG`jnu7{^gX%QP%3ea<+U|gvWfbWdXza#LcXNWN5^FPbLoB z7Y<>4_M(B+4y!MC=$2L!^#8oT2=Wf04MJu40^4sdopZZ7fv?HOOKY%KxKqY1dMfxQ zq_4pb8_~bsiy%HB&t=xU#>GSoO1$40*&^RKDmm9~CAwRYA73TD`(8TLD^P;k==c+!vwkN@dC!E&858XlKFr>dzf{Rkf_}*4EWSC#yL< zMR*3Wc@)evce+8|rXIb%R~_NmE8yWO?X4W2lBE_=xuK|l->-Pe-(y4*^gC;=&>|Y2 zuu{pXAP^5aFY2ufFRUyq=H%@uiN!QjmfA`NlH#9J#rhWS(dx@Gv$Ue;uX7evO{AyDJ`uP`;h8UE=|wgW@qTclIIQ% zG0$%l>Wv=wfuGmWwL+@s1S_MV@*ED*?c3-T=TOw@1*VbZcX+YQQZq02+a;>%0;E{q z(#-I)T``2e`dSllh(*6$@=rwwpwZF>BmV@il#X7N+VBQyH@#!%p;f7NLON&Ro7Ijp zoPy+t`%d0J*&M0pw1*%gt4W)|t&Cvc>^jbrK6zTKi(8%84~CD{M%atjfTya211|6J zlA{myyq(P_(z0vi1pcDA8Bd&ROZfSVxmZ$0+{i7!h}~x>hFgp^J4UX{$wc98+`?q- zruJ$H>JF;hkNj|G?}KUAy1RnZ7|rhE8ww4^p^eJXpK|5)0s7c&j%TI`wVeMz$s^m& z=6IfPG?LhBBVa>g!j7gt4;we_=(bFS3RdzMUPy~1z|xF=7lna#qRB!&-`Loo;;>Yp z$4=7##QPBQtyfl6mlTjVuMak|aLHaS;e_W^(r4^_z4PUByla$%N>lV%o!KDK?BWkm z7BtH0OJ18(6m0=&jBrx!*`!X-4f@FBuP}0i7AVv`!_s1mA{tmj^?fsQ!pTlV&f1zD zj#VuDeAm_okSvKxZ^4SV>TUIg5!+c}cWZ&62A|{hXDZ{nmsgkDA@|$6@Mof zZ@`Wbz}e?l-NaNI;dg6R!7PcwVs0;5OI6~T@q!iXQQ8;L5GiHNnIq@TXY_pQ-sPEX zC%PjUN=kHD(YJmj{9+dZd=A@9O2p1cv=)Pf*5@OmX#tD)L};@n!5U%FP7sV~-+z#n ztf1yK?v_UJXJ#01Q_%i=6D%DrBi-FHBRzvlq^m?=Zphfa?p4#sgAy!w5GK+``A+tE zzU)58>U#0(hVy6IE4~3-)bh&+#(!pb3!iJ0RURZ$;sqVZoQ)cGRDDyRiQU5x8x5DA zla}^lvW8M6pF{3ji`XOn#)L;Lliqn0Z&S~3V`A3BY`&%=4x6IDEl{aBg9rj@wvui( z*WQW$oO@|L@`KM+;6UdBd41ueRQseWmv9o)WYOGmar6kma#qO4$#dI-fph#*@Y85MwowF&Mss{1=Q-NyP#}HP z>@)dp@_q%vRb^6<a4 z)MyqflD5Xkx$Ha2s!O~2Ocp{C_t?M_@k-vDpJYjnrX^{Uk=)mmsc%DJ)kHb^wWH|m z?7GHvLbWu&O48qe^A=mHYrmBy_IOx*@Iim1rdjieRqC*|v}3i!dKuR9T`lt^BC?du zp8~=C0u&{Yj`p(8k!De~*qQrRHtvm9m(&k9mW!EN&YaqUeLa?hDEOK&f|G09uGw*8 z12wj%XPD#j1%gWzQ3d4TelDOG(u9&*|CxC;PFOZ=rMT)n7q36F)=ObZlKGYI74KuH zt}Mtuhmqtjs3okvc5@ zvs%BqGXh*G?D>o6h6dO4NYXr90@GUUE2RTwRD6l&jZ?$V1KnG{{ul?v$_;{P@u#7} zzpitvu48E(wu8}_3aw7f_SI4T(9V=7;8H|CVoHlXpIv`4n#BkRuL;lBJu2A_sx4IB z4`^)=X&+qd!0|eBpE*YyM>r`0a^--yQF&It^s{sb%gkq*B-ri)=az52`B!B3av#gK zAdYdPOF!I-kmQ;aWSkeOU1cb5(?Cm11^3aLFq6O2^0lB>gEV$f%lFS5$TKMA(`NAj zX$Tp=NLdMqBL#CI<=^mQuG`5EksE%=LDYbf_Kl&YZ8a26`dy+BH!kluky;NHSTOMe zhb?+GXG%po62S)o!AoQQHy-QaGP58}k}Q2TsFm92A)u-+Xl_0hsd9hGpi5NI89&H1 z>af;eFx+Kj;gaFy-(tkfT0gsppBoq!?2zKta@$n z83~hMZppW?ePul&rQ&0^7&fbS&F&e%V(yD#MRG3ZQEgnyCv5?felNLA@IUye!|+p< z^S4HnQ@tPLFuz9#cg5S=1pdNPR1kNs;~6kR36YGo)6#TU{^`fb@E%D&*~IjWF623Y zH*d{O%N?zNYbIzyfLv#e1grG`6hao4TYv03DbeiCI%4a%GFO+zuXf-qkP9l`TT>`! zV1Tq+pBrJzs~kA%uvDy-=Qx?P*|?M8lZE#gl%!S;^| z#2owpLcd};bUf)PS*d$XB)jSyiPT|MazQ+6%rd|rL)#M5eN(#*=EHa4m<3u|PF~qO zUF%7siKD=aXMaul%k48}D#+0I=L#krt~aT1NDXpqDCrMV24Ys!~Tt=;W@o@~6>7 z7KvpmB&_V{Yae@ACeiOw$%<0tU7zht(Cm1%8dcC3z07C3DrM@qhf#ix%xV>QQ!Wyz zav?}(co;kE(mYaTmwqKJ(Rx7^O&FHQMKOr=L+wfK67jqNY-c%#)M*b|)pt`L2-P0r zJ$B)2YBZMdbRlDzzlKHAOtES4fH9Wu*m^Gn!btN{vBa%!XoHLvM(+DgYW*&Vez93x zU#9mmcg@u7`;zyteaLC*q6;=sf9)a`U1gKqYkYas+I0`jMU@lg@3)sktI|>&@WC;i zF*Q16-7ps8uXBdt0_cDlO?J|9P(+{jz% zr!3dVOht=}3b{uDXkqX3&wmTeG+(1A>Fn0rl2jj4Q9ae#udB>f)wIt((kg_=+Ls9b z8cp3FJkHTKAC|}i-x@urIM7R6=w^2#;2Gp59w2j|C=0{b>%W=pR=7Ji=>>&yg_Kbu z)kxp^j=ggc;|S)kC}3(=ieyQ7Ps1m(lwAwi*CmKgqbHMPWty{$y!EAAkf9%`1*WMw zp1RonjoffKeGIC&92Q{Hbl#p+&kUW3A4)$Q-5enpKQ$jWD5OV{GI|d$cWie9xXa}0 zsj5Ij%0D4Ir83I!pJCIsSg~7apXCQ-_)_18mIpc^WN7R#z0&2Rkr8T;RAHX}0*>v_ zAWs5*VFDq)$qG>BpbUC)DN?$M_d0vL{c{9hgaooIP(@_e4q^?qrU8(f`5Jo={5;{63LcJh+w@7EP6z+2KXYbj?i|c3zIga z^GduVGRs-Exh_odCu`MrDyqOH?HFy)cWPcmu`YXdQPWRN#+yWR6U@?H#$OPrbMKmU zm~!5hw9$1Psb_b-YS2%8`#dU2Te>Nk!=_MenEwhEUMu^?nGmamAAC7j_p*VA6j{eQ z_(<)bZ(x61C7x5R;!2)JBiwS7y0L9Hu1DFXsque=8zy)frI!QuUM-7kqsp2sw=IhX>JcJ3Cf- zbI6?Os&NV}FY5l}nxb!veKEA+~{Y%zRhB;=E z-fr>z&ghe_+Mx!KxYHbZ)i>K3WnQGptht?>6*`sJa3n`pe>fB-P(0|*veGZrVW@+JAWPk1GWSt;T$B;~4 z^_~5qFktFkk=QWcM*$M+4~fq)ax95omv%v)fH!^7vP-Ct^TG5+mdX0i1>YAaT032> z2{otlW`Wj~NVhhsGgVUc+ z45q^)w~bo}O9{F=!rTm`gPSp z(gzw57BXSA0mZNQcGM4-jT*C8W3rJ)l`i}F4Dc)sl-@km>C4r%QJ($k_gG$9)%ZaM znLC5$AYdB#%Zaw=ZaS_4Svgv#1q;wFv!(RTUdann_dikh>y{tH@=q1g(7GO8@*1%W z_5XOQO|3a!inMMB0g*VYGk=Q{k7L+++zBpcsw*XO7PiOVD^1VKsxuuQkbToA;Pg?! z3)7C$EYA@il`2(h1*M=8%4{7LG{;k%y$mGE7j{L=Po9-b!BC|iW{L{6nFG8rhztHw zYb)0ofx+CQqPK@McP+$m z&t)iK3wd0@UO#TIJk7_{Jr6Fui3K{48YYG`(z{euxNY|-i=uxv7_ng5xrGk0^f--M z0?QGlwnh;ic4ljva+L<1?8_7MCn~Z0RZucv8XQ|O9ICZ<=i#kBJQga4PBZpRgfBFx zA6|$W<;0T2(J$g$eo*op*($jDf7a6RA{KHD8^HV9no&-+D{p3j^8EnN>*fnLXt0Uo9q@c!4-%E1lVk3hm)=%Uh*11bIg{6 zn4jLX{N+#a!{#3E0?srZ^mCa`b z8;CyU<6Ua}E>6Blu_Dii>KV#jKUwVmc7`5`NKO7uq1;6^nWFC+&xY(q>Api!dj(MA zw{@XlclQh$W_g)4SHix0I_Ychae7IJ-6;;aNPV?txhGyVDGe5~VKk+{i=(=IsLt+{ zmHip6;GQ^wCC}ymJkHwibEH3&J__3zcmnBIe3_7^tvS&tSxMVPtIqxdib?UyX5Z0| z-gEiEY-g*n-}73*d}%EZ8$I>;f^RncRSa68xrLI$LthJ8*;7N-9@dzp*=%PRrO{Kk z3d%)Nf%<}=TRs7<%K3(1bDaV!<+}xxHl`QA9%^tATJT9LyS3@hUv1Ez>tsdC8m$DA zDlzBY0E(&kA)47BHq()JQ->U_MATL}`hD~m6d@P9T^i{uNgM;#KcAgrS`Ec2(L3D^ zzfy50<>t`D4&%<#E2UK|P3PL4sXHuw6P)B2Okz(AYT5lJ|N7{IX1R=VJ&-Nj!uD2@ zEvznz%k2k0z++4WB;1RfdjGATw409QpwgZI<$^q z+r!TPftQ`fMQpPiv>=_MNF6j-C7bC|%$V=4j~asY>$#90Kh?%sKQ<8^*_JrROgFDf z^PMY4Vp__Z*lXAYM?PRNutC~T<@efPv)oTE$JnLbJsOSlvz0cLL7~9m4eo_=h%D5j zuxZkCm{W01X0$-dG2SRI?cMa!-4gREn%kCyxQL=-T~rQ^M*4GP55C7dO8c!r^3tt@ z+QqDIY7@i1;zsM^lAiF8A%cE*0M##ja`t|_p{^_<(3>=ymU|hBrdYY(mzwc7Z;Ma% zCDLDFrtCZ=?vBPXA^q=({2BpXoyJa2^S{1Qr6X2;bWBrnamjb`%nXvHO_8KvC+*6- z8CGDugL0tmvdJBHVu^_UoM~Pku+Jd)XFjjNH_r~?%;DX?e#czfHG$km_<9>2a^-~6 z6Y8R<`>WXgfLW`VJ`5)8Axm?2+0PTs&BCCkn|_W)PlQj>g4K{Hu8Xioo08yDeNHo-t0+nn61@ zxJ{4Ceka|h4j*mF?C)&1~qSJ+4 z6k>QDmiuIS557eYHz$#wvL6i&oakm(2OBoPpSx4C3@P2Rnm6?23Y

      ~>+X_-@a%Nq-sHkv>caV@1NRqBjDyX1OT47smeE!@Y7R|nTVOT} z17+n^-23={IQ};an0oNoXc=uk8pm6BVXXLW4W{IDa&vR|Vf8vTZQ07Xe?A?v(HyoN z!BGExEp?AnBb^fImf(>{m&CK#1r`M*S&;cq9<6&(3dO(XC}={04DSg3@RKiDzUq7a zaME!Ynq`}{SmUH@oYNJduxQSFCX5+JacKz#gG%pSh1jeO!_3HGZIuxP*|>q)zGD|p zJ@*pcP)8RCiPx(!Z`yd?dFMlpI&v|E1qF?~2Tyi~Uimrn%Fk*1JZ;K&KK*P3n>O!c z&WwXYw%6}cMgZ$KY~-02UdCt$V}$_nb2HhnZXH=^X$&4bpm8S_t_Y-|PoSG5s9>FD zL(Njyw|E=bkM}~&HTZrDGFAeXt#dBCl)Rj*7QZQhx1$xi63{#v<3Wi0> zk(Ls&O&!Iu_dn;=|9*lgg8MQx#m+_7-UC2xb|ydk^a~m3sf-*tAgl#N7{IjHiY(Xg z{Nlp3!i!cbF%C75@k9zjv%Rc1OufD@<4z*xtB-@j)n2kLD%Den_ z$=s+E2(7G~x>sK%<%~bI=DzX2OoIj!LW>39sh8en`SA|Xyk0L8#th@eyPx1M z|Mz=RQ<5X{R*;|1zS3e$CKFqCZRfmeE@jNn5hU4@5kf>|djE0#FFf+p>sZ3n4D+}> z%$ho$dmnp&r6>QEB)gqfd&Cyk3n`Js9_rt$#c)O%#=%B%-Y8_x+-;PdwU_kUbE29U zH*MR=BTv1G#~W&H^UWvICynRjWgm0m(F-Xm2%e=UtX52;NBcgcrD7<^$M-_|+9B;W zXmGJ>(qvNqb~#FFY6N>4oORi4SQ|HJ!P}FUo5jl2Ka-h}%CJHGB3cw1er%xZk9%>i zb)nc5gkAVzJOv1Yif6MMtwxKWTu4ql_Zo7uTgEwhyj}$G+Pj~$;8seIQe*hw0X+ZO zdkCR0Y5d5rMF=|#0Z0Gw60$NvTg4?3uSY`&1tA0;uZNkF$MN_R&vEKW$F}2KO&?6d z%+z`$4PVw_7-m47V8%SuLdviV%mXdde^`Um+A2PQwWHoq&y!F8hvMRW2%(02j@#{G z?9e_ed+R+8KkSfZo1B(?wQ}|MTz%)wRMk|snhks-Zp!$H+ zG^e$EW0bIOPvqYtA7EZq$4O zmI=P^1NQ_Ht0bUFjfrE2bMo&Nh2%j>iK;5B_+cGyzWp9&op~xsV>V03yJ$UxetYCR zMhqJep~X$@)uhI(DdRcm=+L|cP+VHhRkuCF(m$O-K|wy?*J}a|4uPhjAu$axll??5 zYM-n`BrBLkTd>S*`%)`@jmzux^589(wrOkVmPrh%$`{{!$I~yq&C*kk#u8F7=oma6 zFPHrJWCn%VkvHea2SG_$IoJIC5iUIQ1o{-^x45c#%al|D=@({Uo@7PVYdBXlPqede_}ghHT8G%5Mx?~i24gwY{*^F3M9G;X=)2|oOM1&ilTi?~<`A*gx& zS*#1@W9-{E+?8$a3&3nPao!onGiqq)Ki(oD{)Vi{{A|Kr}2onK7(`TJ0_+b%MJ>9vuzlpH*E1SkFOurQo~s;Deq zwT2g7f1gXvJ|#+*X4Bg$faIf&LOL5rKmXjucRZK`o6~M^x_IL5Ya%|WxU`I`ZheSL z{(KU>3-bu~ep^Oc$$PJe>PIVZZ+DY?Y8s|tW-tg}RnlzquW}Nr};D;PrPt<)cr(;E2VCG_D!Z zz*uA?`{e@amsO)xc`*(*W0@4Nre%-|IA%kc+!h??Xrr)H`0HdU;WIdcm{ik)*JW+{Zs2M|^ ziXqR4TF~r$6F9;+sPxK zB<F^6?idkbymQi%mcfcbtHO$Bf|PFITa6{m?EUGRShJO92xalU$ zvu2Tc;z{lHV7lENR{gl1eI;f7kb|~z6hbg*{Aj-T<_8wcnuyV8Agsei!n|oSeeP@o zSDt?fiV*&6%IFmA0DjuA8BKbF^M!A$&6#>I>o#s<$-JqwePshRd7Ah&4YT;_@VGRj z+ov)qsVbTG=2CXS-cU_yMHhDOE8)X0z6pB0Kmh>*`WCTq>kej3ozS9BL&euLtXYGS znue)&A%QKUu}=pMd6t5YzWf#%<5AkmFF7fRx;h7w4;n>oPL{7_elZ2(PYQ{WX&~*A zEIj)>R6kik!{_y+UYLn6Xn=x!o;~8qPeiO52O>dWC1iram7zIhcO~qT~q2a4K zici@^<^y>sDFU^?NTU|ijHkx61E{L5;mdEn$6_)2uL{Du5KNsghMGDjGy3-lKKdl| zE$Yp=e^^RcWjQNXf6w2}yNH6^UPvS(2M_0&*I&ftc1Q0T2R{AZDt~5n@VP}w$;_-qLi%1 z^HB>8QHLw|_=^78V2DyVievzee7~W;k)*yRzg4KYsZFA$D5PDUMePd}t?p~75octi zF>vr8_Em%kwnVUP=T2UH^CM=?n98`ZBctMO`lPX#E;IzzVL-C+*X_LX!T;E}X?uij z_8~7fhXDfzvA5FEM2n)wQ)D!!6eTvX!^q|B~a5 zn9taezH_~V^tr5qLI~y^cO)ehWtfe|Muj=B5EkY4;f4ikcPLfWwLS@i=%L^D+@VCZEe<8RqC{I8z!8;VIf zA{pagGtwy$Dt?znaQxx8!>IQ27I;T~`gsc`vxyOd`nSATKuK8zufOvdLk9HW+&>>LI>;$_;KAYB&TMOnUTg1KmNqKpM1;Viw|MofN<8^P-xoBE2#!- zi&8L+w^4OpIkxFGJUhG?2bnNWY@H$I`(!j489HktKd&4bjy#Cg^j2|_h>B~9v5c`GtiEg2RF#yI)1eAv^jTrL zp&b>n+pmU6r^H+8!O+VHia@LNQvY!+wnZr@=D;xV(KX~6urEoWrOQ=m@!ZsuWM)jA z82Whb^X3&7%NMP^Y5*~iyWnOvnBZds@!{09XBMatE z33^|16MbezI&)`5K38bhp1mxXH#1D^N)wN2eyGikkY^zM+H9)tufVz7i4+>?7iWch_k<7(8Q6y* z1HsuN1A{C~&HK{@&M)dwk_FbeNn|~ePvs3I$Oa8zQK)>Z95!hvSt^!=RxDHOxVJbl z_4B!n0`vm>rKct{e@5u}1YqTl>litB0JEnZ)HX&YgzzL#_x}4-+<7O)ygZEk2cY&Y z!v341Fl1-p`te6Bvm+PLTrew4b$&l~?k;7?g6Wt{#0pPBYmpSrE*UE=aQ5Ic(mvHot7$v4>uH*({th1yzlo zVg&G;!$Qk804X)*r#e~y03ZNKL_t(?diADnpCXd%cCxb48Pva@@5gp7bpmLWoP{CR zK>Ed5G<;G={hKwUT;Nk?T6;r4U^E&yXk6rZwywUONe7Q7FE_i@G#8xVWP1{Grq1Mo zYp9Ls2$jkO-2}lxOwAzen^xtN9`vp0IgP|;@o1=ZqLCu zz=Ug)i>jN;@NDvsdR``?*+xg$1m;ne*z8jRMvWNiPoii)(;t2E4JZEb3hEtB-g@~S zrc4@((Wv^<;e$P}{XCv};?YD~n%z3VhU2|j_Ac0l<&+d0FIJQPdJ!OT{@|eG=so1T z-V5_63np7f!p}nAF?#r*_&ar@#Qd2P@pwEa!2g|kbi2*otswwIwvqI!vMIl;n7X%X zNWVCgi!Rz{Ne-JAImW0pv^odQb*r&Xn1mEy=+lRk6HmmmcQ0j^U55Sm<4IaHU1MBl zU9_Gw*-f_HWY=WdwryLJn{3;dYVu^)gnzbe^PczK-|bUA`|Q2=thLwkyB?9~+;3{> zSBUR{Lm|^qC9Y~X0`|awlN~F>ef2MPpOYsTzGq$OKI-}jnbHeEH7*U_2BJLS^KrFe zgFFZr7RE901UN;WDL4zov5`qj;RR$1oS(MOmiMGVg zp=`gknFcS=W;Qc5=8{Ns6o_%VnZ$Lo>7l>e>pojtaS9#Rv6ObF6aK5 zDLvzw`%G=mECI&!du35_AEJbuC@IRUF*exYRpn8heS@DdCd|C@Q!U8VGJQx3_3QWe zR<`pSo-Y)#*7nrZoAntkOx>|W3vYd>(dNzzIKzp#seKRUK3}B#YXzEZ0?k8seb>V8 z|Bzb~)e=8SHjZ|0P}A*h^Zbu|>=J}R5B;#@uRa0=$g`5CNA=o14>YMba8Ep6YP@iR-?Big9tyz)T z-yHu{Nq%09Ft6WT@_86boYiQ*uOKxCJT zhavAqoKrq5MZbe)5&=JSgRZKVD#je{mC2IpcWbIizqh zAT)xchlX-)sY44pIFIO8@xLoAf<|?+{Z52V3PQ{M<>+i!d;%sCubA@(nD4=@6Jnr{ z{o;3u=tkB@MHe3moasX&New@V2rf2j{c_<3SbFHyg6H0_U($vx=yqoNO(fY~D>K=S zXQZ2l;vw>LqzSh1lvPqpz+fDQA;;4|qt`wR>AK~J@3gAx8V7<^jM%|lU;=`z>x6j- zY+c(Ao`nHgtFOv zzT&;IvJ$7=^?*a*?Zs?<(NoY&Tn<7{_h8-$bnStLCdn<`FA4)OMf4L~O=)utboV;< z;zupNl@VWSY2Q`Wp;zV{%oJ7p+5UQ>QT><44fjHAQ_1Uby_lXKeX-O)iN0WySI=y5 zW!eahJTF0d(2asT=U1FvJK``eJN1M54UCqrK=7GsZrv{Q=qpixl)FDkS2P@tcpo*; z$&ug{K&a-SR&VnUGuc+?Bh3$Q(`1es{u@?`p&%XNqdsytSgE`k20tOEdQ%v;ig`kA zYB;34;01Z@pYyNQr>9bl;Tpk@REhC&MR(IalJ7d06fz!3{$>)@2RW!N^Flk)QNjx* zU4RZ7qwIC+2G0AUXNv}sHpn)pJle2Z1x)ufFGPA7%RHv!`eBh5!+!}8+Anj<((HP` z>deFf1~33+&1ajV(1RbL@9a~G#INDH25jNHt}SX;eQAlaH*gPU<;HL2Bu-{3Oau4T zL^!mnLVOi~Tkw+HW{;et5d zr4_rf`qTJt>#@Uqy=NV8$rYA{T|eN{&Ri~6-pdS1q3ewg%IkTFep^rVRmVb^HD<3{ z5i;<7WJ^H5s}rUGrFma6nskNMvLy$GTI4CN(Q?i6sg`XITk<`csc)Sp2eShcmiVoN z!tIEGVW9R5XrzAHU0(c^lrz=4D{=MUV{p|setA1#!l|s8fR|c|(d%)4F1CwfcmAi0 z`Bp%ZJYef_;f-dNYP>N@y2W-S`TLlXhw%HV!|q0X&;;+r5A0YjfoI4=WWhw|sbx*^ zI7pghj~yvb>q=C1c2oYl@2Md=m`X7%!fp~IfuH_2B(1vM-y$ZW@}2JTVLup|<8j4+ zPN)|ah*qOGAR)lD&XhjTYx5lMEYE=JD~zYG z8%X#lXp-z8E(Z8D;o+c?_bA&U=Tk3q*u^OBV`P7jo|2PRRJ=ufBCRhVQ4Fvpn6|pC zq4w-&@;YpPP!W~%?vsw1?=HW7;4FY+C+V(?nplcxWb9)@g98dbWwoGO zVU-nl%!1?ktP7zGqM|dK{J8u`*j((oI!upY!YaACC*-H=1W}Nhpa-%>iZmxw+vrETLu#_SDgl zWy_J(Ud$iF2|7^5FF%yhqaV@zhhYb_%HAAOlbswOHi;hyF#eh6^rA{8>Nr{D= z)L>*R7QoRlJ&<&^W@z)`u?~Y@Y_QQ$Z_=&_4L=&=MMqAFs{e-AYxQ22yb21cf&LftiM9 z{F|8)R#ta9-C7kLnHFOPg1&U7rO2q?YQ#~EV7b4)-YyhkAO7T_LIv*_72T$?nTf+w z_(xY~DBzDj*8@F+Kqx$VvA#5wEp#($D8~iMjxAAJ+Va?WQ);<*Z{Xwq3h&_HP`1O` z46C~qu;`pb`zk`mW6F8ib?(l)`fy?Tc-f;4^G5wM_IN-}LpQoGZrx$hfKjeY0O!;( z)e#Ph$F#$Hqd!O{4cVGzqPwfz`IY~3Th^VBEaLHSj;Y7uw8bF9?R~zz$>sW%V^j9W z{-JwJZq~8-CpM$ zVYQy_+fqCsh3Puhm#4{WSPr~GO@*091{HgTnvDr{JVL~n?zPO{?pQ~(>*`q>)bxy| z#{udrwp)fn1M&2EP)G}ImboGUr}=8;A3D}rHENlO{l_C=b6f13Kbe8P_{$x<=eMew zvz1p)+LYjZ8LaQNSI@CiP1k=I1xZdSI~Fc*vxgUlJ9Vqx3J)39 zR=#EO3>x9QTNlAXSPx4 zwLGav2!hA%%UQo>oQ@CPmwNlLaDeO`r?}9#rYfwh$Q|tp7BHn$2ZP%*m~USyG;28c zRB738-D0hBCY!F8P8D{@m238?W`qvoOMP?^H=FtO0|E$I*S@b;{H|>w(7k|GPOU=M zl2-dH|5MckC1GscY(X%*t~%W8&ssZPNiULy*h0(rs~qHsEa+n!{<+r1u3yx3Gsfe- z)wa+xxnI*lm`B!%V=v_>)RT*WYquZa*_w!r2s5fJA<%4EaQ{F_$&y*u>N{+Zi`3{`l7^G8ovCzffkppp~ban|NM;P8`)#8y>4wF3sI6c0^XWB7tv>tXq@)gq>XX z-146CyDiH0c@{ZLjp48M3n1q*;B0i23Hb zVo&@X2M5QaFgCt%4Au78iE~?N#`Q$=vv+Y)%*CSv%>U!bj<-DY#mqL+tXZ9&RxCt5q%vzISELO7M;R}kK#xM5{n^GXRR96sP$0a|;;%fL z@ltN-HjZcIITlIooC!1Z%z#DN=pFgo1D5+OZ3BslzDpMf4MCfActjECyZ-TUEsxLz z(LU=Y?kQMXMVvNI2zl04u`uBv#=m*VcdiISXkR|K-n`2rxbNH7+D8N3Q@gQz0sp zCDdRaQ&v+{6WFGfNm>>aGHQsv0KIXYN0W?=)zf_7SwJ*PB^JsN^Z^CD4T>+P2bD4=(~q#tD_=l$_ISn-b-+lofJ>+2AFXod4`Gfu z7v*8V2G(kEMGPeG&1R6oCM}0{i%zq*PHo4_ty-@)FW@(}6`JuPkd4lCpZ;p7nA{kx zd6Tz&XG~ch7Ky%ymX}Ma#E2a`J*${w5%t#;EJt!T}dhPU~eow7Ks6MdPBG9S4@8S4)TKKbhlACppB zd37e$gqVsIQV)Dua+>4Y4d`jBgB7E0S1$g#m>WOvQ1crc6~T@U{NiuUpw?C_05Bq# znTsWhcXj9_osVk_!5wE}=0;>{ZogZic+4j;JGXhHaMT-~6>>*Ekqk^rYyVB>4(d=Q zC|Zu0v_G6vyF^7j;WG8TBC zT(2jg#gxs!#+xGTXIRdg#&{%T7wt%eKc3Vg@UQI$Lm@&~TZg0&efQ#$HoVCT;dJ|% ze6+dIaHq%>+ZezPj}A3U9kSF;Mi#{OJ^U~2t7APkD7?U)o~@_bx#o$1j%IM|Kg9~9(* zmz~JipjK{201iiojcb;3expB*jxk*iIEuV{6BP8K-Vx$(7;1SH?pTS6Yx|D?*PxGN zEgAlf;hX2Q&D+J^-Mdqjac)U2^Yr3Ikr`c4mo$UrvBK#1DJyk-ZLjzu6=!gVSdEUD za=%*AMT_0*yE{kwz(dkdl$v(-4Q_nD?uZDu5@qXuH%gL{tMhis4C29M5zr7XYJ{3c z;NUpEy@9xM8e>g3sUYq4w@jYr{kv-tdlb1&{Yt@*qC59<-oJoBN%N=teiZxWy@j%< z$J=o6hBE0dAn!ubR`ibr_>k+HIW$b}9m#QEwTu2HG^5zK`|?uaPs#ElBo5grz}hDc)?_#1 zJ4g51=b$%)O0E7xYK>32@9|IZ{pC{Bt)ly(;i_)x3cHZE!oCH}0s^wD`|BY~mR(RF z^%A>Li`I-MI|DM#h9v z{!IS7?uAlbl7C0khqNI=Rfny6Cu$}K7w?Sw{;2`bU3K|(|2*LaM9278aSQ63Xl zC;gZM!4B`qpQ0}v2YvRAFL&FqX#&SY25Veu%~vlrpII52QLhUig_RxezwJiCAv?Xc zJ7WqOFeI@y)%!OzjCx7A^Expr9m*~@L+KLg{!T#wZ}*7qH*UJ#C;H5H4y|nbmyk7~-D0=uYIgPq?C=-}hhfZTNHmx^4q_^*`VE@OMl%wOD z1`w#S#f;rV$06*u(=iU(GS6Y-sL@go(yEnQeVpg~OF&xVHOU;mxm9d?{75(W;QE=S zE%$lr-O@rWzccmzsD%LsD*8_@&Rr+0#`VRf}1_fJfWY!z8hPac!LwMAThf|Ew3cIy<5gyiptKosrR@cC*q zS#vsEx48v1&T}5=c>OpWM9$@|ll(JF^41fKvC^J(DRUg0jFw2lr~KRcrM2qIR4Z(` zY8|R+bsHiE>T(fcy^j0sE~Zb@y+F(PTttI|`sK4T!R(;ko#A|VUDt38cxCr|*Tc9O z*Em=kJE!cO6E9?d+B47Aj@i`I^rV+RoC%GP_8W(h15>43DUPH4!N@zo%Qz0V?H36V z9!Gi#sH(vRzZR0q2w`P30v~}SN;k0z=jq^IUzlQXnUEj3n#Qb~+LmS0VkX)m$Wxw; zTbv-zK3~5J2?>RU{~4wMV?itvn^JetDa*uFq)E9u*+|(~?_hglXUuZ1z^-4H8>gh= z%rRp$h!uz+!y*^~q|{ z0Wg;_vNQ~6Bs!)1v&p4J^|V)$4+B-@&t)^0T8<)3n+^2jWa-dbTb+P6)QGPW_7pcyaLqe84WE zZynB++ob8S!WXTd{bp)4jks<1C@GlQG^;2g6~4X)aY(CYxIip(0f}HaC>QeSh26V& zvBT$3+2Z2hP}7(N$RXANHd3672U(+Exl=iXxNheqMd34O9cyFGmjU_;6Y5?g6e_%p z(=*&ZLq*HdVTSbiAU&@5y@AH9k*O(%|C5RQa!nx7P%$^rE@W~}7g4Vw=)eb4E=4lk zJ;E3LB3*)3M@d*Pt1KEGk)WZuwqguy_WaNO5y?p|5l>4UH z|H#k`xA^iDs5sfxf9=`k6mFM1hgU2qHv_0F{O_NJZYTy18dLn8Zfy>$AR0w=+q6%! zh%>d8;E0=I9s~D2jYcdsH*c-|E{}$XE@kenZxc;IMy3LL$3>4zid~Yd|`xFrAjM4j65(mCn5kZ zC0-h9#G>`&bjTFbZTbBpHpXRswp>-Z8q=nq$+pAdgoAGCC`Ei<#*JW+xN9-yeunFu z8j^c?8DG#nMg$r;(esAfyK7^xu$iA6Y_v??>1F^{K3A5Q<``p8OpLK7zSq$3C0UAI z=Y{wS6W=3(3S-U>dwct?^Q{uBT#KEjhP}PL`AqRIlub}Xu|sacoMdQdxw(YcF{KNb zC6mT%xKgsRdjpZ^AbzR1ai2l4ykfI?Q_8*E~7Y%?JRM61i ze>@r>5%I{1i}Uo^JAmQnqy@kE%2Seax#|gn(^Sx;PnxG~Qn8XH8HIdxH2l2OoWQK@ zc%cQ_d#LX9Et;19e5}|!?Ip!$O{&|?Ps|jEMEp6d9VDlsK*dKBo~=+>>)8_m-uMBy z&^sY3>2ffq7z$Y=p%>HBLzD}kDf|1(hFd?>`K~c?j7{_?i`LD_$XJu=uUNnA9qc7$ zqy&MCD>$du*6HxNRYF$(SydlZQ!q(Qb6yju(!4ne*aPV*^UGm^8K=%qgwIzp6 zz7ng%rtdvP&qrf?9nklE>cCll{91#6&sMISsg2Cb_}Zyl#pi=!-F#|2QdU}B)f}O% z{Zn<-qtKa=U5`Nu4GPMO-sg_Fd;L#F*~cvcFwnwe|7IW5R7GkUJNq+gGv&S3(MZq1 z{+pN0NDC(Z;@&q5G)(Q!k&2r98BZl!>g~&b_|_I`yF@~MZ%@AO7ysW2a5ac!aMh12 z==k~m2;BDZx*o)O-i{#B?D6Lh3JQ|npGzE1ZEI^26%%W)nxk~TSd-G!#75@7LwWEP zKlo;b;Y{4KAollB@Kd-aS5HSL@hkE7Oj%QNP?wGdOmCM5L+NK=kni=R_kzR9L=;t? za1{e960W0dWO=T3r4n*vCht`y0@)8$SP4a%aczA>iw*3MZtSGdfyGUG!|{bY99DOH zCV@}<&$o9EM9#`;?l9^usW6LLT7Ny3pS0W@4>+09A+cl~>+((wWW;$9(qREVzVLfL znEw3{;iRf;ytUrCVSF=i9n-OAtj-t^Q%x@sgPXKGQ5uVv?tBa>jzBb2#ndyA*W_@d2)xeN^*OhUrv zy;$$vQ#L+(7-&0Ui|Aa-SLM6l;>J14np{FrgKg@GlPL>)#CYX3?NW0oa z7Bpjl#pg^tt!;}6egkj$sL#OBFpzqyF90ZnQyscL;C%FA3-79` zCuDOqpSgA!aYl=;O}t)X7iX||vB;H#4V#vkapj&stl*Y($)749D zKc!0wMIw+Op`f-8521e;8WIDM!;veG`}xYC+oGO?uP-0xnj^&P!#M>0<$Ot+{@Crb z;Pl(`ZQE8byk@7T)3a>!vh6`tUAOT>I`hEraFby#c#JsNm(kHtzPGD>;5+CVV0lKXJ9)kHcn(^0?vS&_>U$WNxTM zkJisKHZ<q%J9|3sJ2T<}!;?$p z_V=-JO?IA~F!p#!-vyhUq3AUD!beTP!FWAv4;yKcMy98sVP}<9v_6)KVu<*oB6o+b zHaZ-N7Ee*#d8;=4A|wd4#{h90NLM0DXK@1OZFOKGQq7i^Hy`zTWHEk1p^POOUo`Lq z`{C~|P_^k5F`5=ex9-J))UclMr^aNh5#Y8!(uGh2K*;)gH4E+5kB1aqs92A+jevEX9@d4su!$Frv zgZ6mnx6a#ztZMCh+@vyjlZCacBhrGsecdJ4@Xryjofr^zHBJ z;1F(WUW@CNY`FHhtkHYL#oE$a)nxA4+O*zXxku&LaBy(X7po39oc`b*KrxpmCoUq5 zfnxv8%GMq4HQ+O+1BlC0Ln;zHjSdN3@k8Oz@) z#P6ko>gjlxsLiamMmCc#%p&g@xvuV|PUW7;k(#e#GOBfGyuwC1tMTSyjV7x6C&~wR z^1m^(HfXaPeQ9a9)(vku!1=BDJ+4!yNZSL2-^8RO=B&xHmF8$HeNQk01A~Lftl`#8 z|M3%06&RQ26>&5sgYnf??;4<`fc6@(WKh%7n-0a`*e*AQW!bk!R@JsgHQOvUx*jWk z0RW4oHWwO)u8ks`a}vkpiAOKw}GU)OD zZ$UtWx??g z^9!HM;xNVvPJ)H)>^@;{v0a{EXRR%&uTLnExu9YnM7@6fWhS%SVZrI*kTG38W1@Nc zVST2KXS3mxwm(i}vT`?`!ES+c+Vh@k?rbn}cX(t%(qbXIZO!>q_~CqYJn{8% zPVfxXT%7q#9~3^f?@|V>Jx?Djg1Js}US7*N#;;#W!inUl z1n6W)Z*Gc5a_ZaXHQydLdPIRw3plj~RX)AsJ#9L}x(7(@wdxhiLHeXrJ(9`kcBB;i zGNR&Qk@14hs@M}8*E1`hur=y*NQ6Av{(10>e-t~PZmJ3u9WYeN^eF8Oe*byvP!QCi zA|)dmoZ^2SxK5JlTU=DAem%icGH!|7rAiXb>wX`|@mMEAFwgRQPSFq(6)j$Bw#eJ{ zPEx~`Rlk0UN-_TXS7=AGkH+Zx>cA!R2d0wJ%ue&-X7zPjS6B1S3eIA*4%P-^yZbpM z23**a*(aFM_vO$H({Ep!^nfKgUHwx+B_w2YzgVW4+WQt21OXkX)pVF#>a?eJeM!*q z5H&Jl+2FHTM@mO!knQmSWqdH9)o0ie*>TyXaCf|_kC9}`qlFtfC_<>jZL)^@J6J(ygHW6+6|7| z!bT06p@YV#fWD%kQ^{kvcj&!6?Zyk2%|)ge__xjS3w*x06ev;;w7Z>#uG?dAWXbW} zPOu7Bs77UH<2SgUYj5xGQvl(pN>z4$1TvP9kX&Uvxw*AGJw1P_RjH?xRitm&w?@;bl>U6aJ&aCFM7usN%Zt+Q z@&=|5P)9{a4=y#B4F0y7`)>(k5^)$I(8!pW(PF?pT)$rSz~{(Q-Q3;|0^V0_IF4Wt z804qZCBM27Umx8R&_M%C;t5-7x9Oo~25l%{;>sEy&zkj(%aHN$@eLI6USPC^d0+3g zHLd0SDA#ukT+jX^%7Z<`GZ>W?-(5cIn0i0}j_g zg$kSgJ6qc(pj@A2vUu1MWr~Xc!-{nOXEou98|jGEIVwIrY0F2a9>$T(z`uh2f$_Qz z4M3sz7h^Y6D#6Oqqj0 zXIB?$AP~+32!6gyaDF)xPRPxj>J9kV9@!Rw!6zFjKe7ZYI?UD0P2co1G8iDS^VSMc zQTVLPu&}VeSABhC_{*tN;`jot>7nSuZC($ueEj^(I`p14U4Z9kmQGGj59~!5qA(e% ze+fZ8UTr19Y{~#EY{SK^!szt$uqg{5m($_pY2@Ju1nQ#(#={SGzIVN^rWO_|2?jsp z<=wkhWs(vTzhfitwK+pMBu`=HaJz&9>yo83;WyKwh0A|XZQE~o+RFbC%nPZgYx@K1 zXmMrd4^#v)(E$+s=?1*t143@dk#+s}=+Vhp0tpRW1Ze2$Ipo)`sZ2S@_*@A#bzP}x z`reUDJ#UFR_HAKwz#yi%GKC^V>Z9c*s;%_(=*UPQ>2!R#JHGLSR=XCfmX?x&1>{_$ zY_7o?L~A~g4)*tYci8K+ru7M!q>3V(kewYfXiVeK)ZN`ZFgTd&ZHycaVzC67mqR$6h3-B;jr$Up5ep?Q7TwG}-)-z!{1x;IZIJjuoStzbItF<~h zqpRmGt=4OcaD<}>;Yu%5u@dF$e?{?5_c1tppJ%MqTJMBF)bsU69n(R(O|5u=;}INF zESCQVSaDeyu*Z>+$C@8;$FA)s?{*AkslPHJC(REls5EYdDi^;1k)F}gl~LR{B!EtD zAD4V3<|75K0!1ww?+irl?%NaXxBeoJp+pBj65p56VSuQZK#4X$Q`0imjVh&Gjflq; zrCED25ju_KGWGch`m3^{;$L2eZlLRdw_)`RpW8XT{HR=dF)zUA^QOlIxCnk0@M$tP zbg4*BUgn=Yj zz&NTSe1%9PXSRQXw9n=IJd;zU^?iR+q*dG7KTvo6(-_?ZRjf!$@J&luIk(XETtyf# zHT_4-CT(upcKZTCgt>&G62l2O`%BAe7kl(lad3>2f5&A)K;{ev=#2e~i)r!=PXFcB zJg!`;`J_d(8t!c|aFMTso(VGI@j^A~WGUUMuaB39ajW+G?RfTkEn*&us+t3nF6Oy* zpK7UYGSbqTj^mTEe_Tm#516)8jY*LO9Et9s?62hJ5^ zmhikUtJl3}dMzr5l{oi&*T^L#l>KAMMO*SZx-qNDMh^JBv77yII&O0b2?^fP`zl?P z8OxjN>+NNWJhOv|jQ<=8y=MKF9QlZ}G%Vqt&SY@-A>XP%EBSg(4Gj&xKmb<`I+2D* zEX(uH1Z1trc9}($_8X8?)v_a3fyTyC=$0GJ;2k=zziw}D^F3{a+~a-%I1XTD|9f;a zQ@3XKe5ooeEId5jV@h(xRQRO_+ScKC`4gDE6*k!2@p8uG>cg*JDRQEk=K?B;wPR* z+kLR|-&Qh_78TlfT1IT>-);{B8o#x@t;G20bJ;maWg}>O+!zsgycDTc*Ch<(X#1z* z7_BJ2twsWWC*-cEhdKS8nQ%NR@-E!NI{j58AM3L4#WBYn_O7 zy_D&_hFe9*8&u3BK>(t5)YR$q^^S1_{L-SLvP;zno ziR18JfE8e6O$rVM_kK7VoycGX%rHRjFjHqBNG`S0G%Y~J#1wyb=gMKfCK*r2|1&10 zni4{>C>sE^Yt*Bq)x0Qg0h5kEB#;dR8h{lih^t~JmO}d)5|l)xFk7X;Bp!n;1Qevp zJHy|=5ij;>Z zGvS}&Z*2xJFtCB4p$4Bt`PsQyNiL^srbWLvkTt_)2JhftVsf&ivND=gMTPIja#`Fc z6gd#eg^L$$$G`WW5~2#Z%1Q_ei(KE|(}74Vpx@1s&z<9rmtYl;kvaTYgWiI{f&b}w zP*FBx-3&aW(d(8nSVTB7E|P?gv{3bM;kWe8$n@%JN3g$}@5?Du&%Xu}s&J%uDth{u zwd^(NMAdm8cM?gS6iG!#7?WW+Vp^We;yv;)G}R4Pzm`}UK*wD#WcBR{X=utKqv9xO zd7=^$Q0A@O{M1g-a7#$y3Ay|Fk@+PZ9jV2{!k(UAU8cK`QE<`vZ0dS?*F6}`U*@Gk z!z2>2lKQtuM7*w3G}>M=ZI(Z7rzg6Pz5mmJ-BTGr*3G^-ZHHm8_`gBsYF={@`PF}o z9m-mgZ;*t8Es-9r=62!sKBFcJM=mfalLw-cxe68fx}JMXII(4T-QN6!;v1T287ldm zD7w0X$o=@+DIEtzT~z=Dpf}vMi(YEq_rYZ7OgTEj1~{u_` zi&#?52m)hciUo@Kseh5=gl{7W2On@6+;BfaAAdssNfd6EwXt6IT{9jDSodH1&gN*n)IL>`lLm`s{yfn zz40I%kir8urUBfoj8AURg6_3?0$ZoW7Dr1&}#+&Y0)BoQ;%*|=FMdHVZ`fs zjRQbTxSDuO1!RR`=}_T9k30c-y$6`f?sv?dw;On{pdeEVvj*2?GTB_7Zx~4Ys^P)~ z^dOwLmUh!)mRz3$D-OGr&>72oht1oo+eed+;4pfhBOyi{90gHTjJD-v1U$m&G3DKg zn_Zda_S7}cIr@u^Lq_{{`>*EF8NtE6eZnFHjgoP_UdvHY!Ucv;SJ6*T{m2R!(YJPWMbjs;4Y+Y9L^1M2OXPPbZxHJ#oq)<|+Qwu|r z*)LU5)szJ406;DC$+8(ptI^PK)i(;v&CC*06Q%9-8GA`cL?tB9v5$UMB>5d6NJ*8| zYtxI0ia9OsNzwc4DU}*HQEdI3$Vm4@s~VJc9gf4k^L~4KJ3_$-rH-A*$xRh^m*)ry z;^`wJg@J|bHE(jd8$sn694Z7^1I9I*%O9VJh=^2Lq7eYJR3alyO^eY1lT#L#bG2X% zuvbtrP^8SyV{|re0OlQ?0A0nT_tWQ!n1I)PV7S%!ix>%{Rz?OnW$r+Kf0BGd4lqi9 zgog*WISmUImI?+AuJ(8JudJ+b0IjHp7cw>duRsP!&`yQA3jlj|HfsZd3)B$vi3Rh3 zgsTt;yR~b8a5O15HLBN;n2kQByxh{)&u?^MgcvaVwZN~?=Md0l$*jDOgWWja{IOoK z%=D3Nq!^hRwU{hBZ9A@v425j1`4Ukh1=NIq6y)w~8C~vJ*+xkTm4=3{_ZCGSv zDoV<)B#;0NJ2DW@gr>SX0^5&)g*8_qgC#93Eh#Mx_yZ_GLBXA!ow0YEYIQm(DXDA$ zKU@QYP3a~^z#WrQQqBT=n|%B>>V;JD*?;X@=W29Xtv7KHzQz#=$^-m5PEJmM3Iq8Q z;Bfn_o1KsTkrC~$BL6tDdg?}-YqmDq6j#KWn18cdBxvxa9l4p2ppe?DphHD z+)f~W=<5>#Fu19?IT{Me4_R4+f`S6P)czvMs+|<;jb=!0AdO7>60AkH& zVI;_;O&W$PB~?sQQnADVsPzG-1H84ersfY#OYC1tN>4A(1qtK;W({ly0DKx^xdVdn z^XE^|q8R|@2bvFMcSc2ng@d22BZxeo3QF*KGem&`jxda}ii#AUSK?MaF}N$wl&RTF zlLnJ2t=jQDO>v5NDN-~H6(!Ht*j7%URGEB)IN)}S5IAUiaMVx)oOc1p8kpi;Tw0uv zogA9iBj}XFi;jY_<(XSe54bL1D4PfthA4pI^XNrbhl~Sth^g#RK()I;(vyI^vXBJ* zp2q-|c@1Ql=1pjLiy3d7e2MZog%)9t8`cd#+GQo5CofzEPs+uW-ucGY?&>r!#N)m9 zy`jO$va?G{k}F8QOr*@^?hugiIbcK0%yzc7rBqchjJCHu%T>wA$x*Pef7Yg&`IeAx zS6FxE+9W&e(E^h2$MVRK(ECZLw1bPbR@MyYkQ8{{;jPW~m1>s%o5St=hH>e4!_Yd} z78R#t^c%WJLA;dV=KO%b^l+c`HglBW0AuwH=I$$*jC%Kw9p`kuiK}SS+I|hE{^3T) zB|T@HX=Yw2u0+jBACUeRG2+OXs+zt>nq>~;8X*5`vY`?VUZbLQ?}8DEug&>$d?Mw0 zzphYRJc{(2g=JE#b&7c+U7(u)NBj$#wgV!(g*sIYLO&h?dI%c}Fo;T3G-)E6}+qv3JNvY|BF{|AKMPjKwvtiXGTMj@MtQ8aS?l`#;31uh{ap#b0%@HndE@ik&qRbBLGv?*)e!mX<`&Z*JZ`eM_l5!PQc_t zlr8>fvWlkagb$J5AjS1Ncby04y!Et;cRg8XPWBr3!?~a&KsU$A*n<(aqsxejlW{^0 z;AxypdGIgy0AgLBI1n|(dBRVJp|{PWh5x$d;T?Ez?pltDETHc?Rq*G((6WrW-mqs-%4X#x!6Iy6CLmDl3_tP}WZwrErAiPmW{ za%YaAsvC|~?!qa1s21oyzpm99&#vSSXNYT=5C7EOqf1lb@nM4fu0s!O*zUiXq&+N* zLM0#Pb2FgnGucC9UIN%x-UkBWo_t{Jy13@gxTA8%_TfQQp8q&B+~dY%Fq-{0&wl40 z)VNpSUg%xtxF5hg9$@rXe=6^GouKC5l?zw+5(}>%+NL%#K zz4Ng1N1ywA{BWQbDpH@T(!uZY8OT9LT9oUxZ%s;0>}3kTFV@kedbv9hj+{YA1lr#E z3!KYm7vZI+`Kk3!VjsC#~^jN1Ahr){WJGh!&dUr_m=8#|PN)Ck9}_Pc2jMB{)+pxi87$rRsyQ=2a2 z$Mh5BCoObDrW__9HyazcQTJarKKJnA@?;Ao4za_L_~HrmcnUz?Mand^t_dt|z-3TV zmLvns0i2T|=`UZPPNYPIR7dyNK6pH1=I`iJ1?#^67jb&YhOh4wcMq$?PHTit&(+%T zHR=wWNn=&ecrp}@;g#5}nsBh(EY6bGRctbwn4)C{JjVW6j>Sr+=pTc1vHK&IyqA=J zSFKJg{>>+9rN_Yz88>P>WcXdFm@^?KOF`I}MXCZPhJZQr*4u-EsxGk5n4bXr{d&Pt zk+%5j%$m+u)y)q<~levcr3```>WN7Tqa6gGW;MR(&HQ=O<< z{}A`P@_2rcb-0_}!2rBAsN6|94!66KlZfih-oz9DtEhS1!p9AXUtjGiGYAADBN>aMSS|&O+(scW*3e58*1+=%fuhAYa43;z$V>s{+ zev~h1T*kpK24r2MSP%DGK!2j!9hX_j=JatGJ1^x$5_;2G0CMPp%@-ogGC zRket6hP zs%x2lBD}l699}zR>Q^-0%lP#FVWi~r4TnP9fGt6)Xm@mMa`#w6eesE$Q;$9J*2m-U zDpGXg8>Bk-rU z)ta;QPlbKm(1E~PA7{`!^U*y;^}98z#nj$*5oqi3Qbtu1X69FcYgFWqQ$KtADNFfk zb-c(?t87{6ZC7o|<+f*R1PVUyP4F)Q&(5p?Z!V#gDiqz?i1BN@M1hCuem@Q}vfC-nge{x~}&Q%axiU6^_86!iKMUVr~{GP823*&bs?3}O7Jp`>SK^WqyH z)?ma)+<4U_`t|C{p<^dm{mHkbpPzkhU#*q}v#uZ{NXNHZwzG5JAJy{c)h(85r%ynV z;QdcGa3V3eN(z`b<{~a0K7i!Z3_kp91NntT<=-n53bYz87Tr994uSqnviqHyIdUE+ zhGI^R=xQ1eSI(m;_o`%0S5H*+qk3=UDp_i|w6@WyO!HAk8?3yHQS-G;ZZ9sQzL&{` z@;LF_j=^hv?Y*wqr4}l;dPdomzph!XvKb@Cx3zsQmHtKbROQ*$v2-UuNM~oJD(2a8 zZA_+io#YCg=pwyZ=f!2z)K1Ihb1SyQ+NY^%>mMu6pw|7)G&LVr&~nC6r5C+%^WBv{ zZTh@llPNmByJo&Zc$fXvz6YWSW77)O%yvQt2nnqF)-9rrrT#17tj#Q|mTf7aK=SM}C)O zWZVz=y_J!XX-~AdU>0%*DJ_dkq!R8&Gxly0QE8uQMw!{X=z~y6$I&S299(O4FdxxARRJDmjc7k z-TChEoUidb-}in0{ax3ObDc|WX6}9Oz4qEGK5KpMfJdqd_*coV;^5%mD=EsV&D!wNq4NIN_nnzOnC$o-A&~WaxAP7E2)uz zr8n=zURS!Jla^CpkdJn__OWaIr*cdeiZZll`$s?n4t>MfTn%q$7h*8~&;4<<)saUZ;9nF1` z-K9{&VNmmRSEM$p)g9$HLWO}ZSnXu2eO?vueh80Yu8VbPJ+L~g*vv3eshwF)^a^{0 zTBEC-Ij(SC8cYatI@s!!wh7xBs7f&w5x1zVA;+BSf=M>VQ@rn>iL#3kV3P zY~8oA8Z6+YS#VhYb{G7$qB}`ADNMHby5G5fH-I4Xd*|{<*%M!#ou#3;tsZgiZ zUSPflo9Hsho+6DR{SlAoPS+_~t58BCSN$_q5K{~( z+t4;3fI|D379iNU9+(w);BMnJ?)We#x8sdg?20`u+0Z8+Qy_vFGLGMddzW z7Jd2cwHxE{uW_uisMebN@cG56R>bN&FEGJa9i}|@+I|kGAJ4s*t8!% zv`H^GiyPlv%bx!_XiA(UXl2F$pA)@*QD!o$?CYfa5+^uSs)17!qXH7r?%O$T0^`4K zrP#mL$fjAq!md>$5Oy91T4QYn_8sZvy*=O8G=5FZ7xM2z!toEc7iIgadsrc`AgM14j6>iW9i@5ng$!5 zsC8w=y>R`hc~3I0S*q00a@kxgGK2^3-$$0V?XHYK5<f(-ig#gD1pr>yU;l0<42>b8IKx!hfdb6rA7Ny%X$GfuY?ZrOh(4c#qplbw_$ z#kv6OkIuIM=9FTw?4aA+TtT-u`Sx&R);)km6B7}ACwbo;&)YPfBuwvYWK#h>9H>Ba}X-1@A5XF;_x;@5tc@A)u0od8;d$7` z_-8LGO=9UQJo=p5H57wB%#03@5Fg(G8Ge=tV@PlcVocy)HfRN=}`J@hRXiZ}On z0)ux;l2uuc@WE-9zbio74>@a1TaC;ci)=<59~e>!s!jv*Ik^T5wVmM=oZMV}MCp!W z`bwOU7w_II=TDWpey`4bXLB)_n)kUcg4wuyP-cDGkaWrUlAIhC zti|tUKmReccCCKw(BQ*LVbvtxp92BIx3?HILUc1O%xdR6rGlW%hBJ_Y{3c(q;c*U^)W zjEpS!rmPo1FV`hWmJJ(qFS06X=o8|(x;Hq}emb~we0sFjoqzr#je)KpoZ;vBx_$um zEjR%Z5fLH(>PgNcI!CpZWKqYKt8Z5qj>B!VZ?3Kk3}x#KJp<}A7jq8h?6*gro*vn; zSNCaFTI+zjuzN2ybYcJ<_LKWQdFMfrje$?5N?fAWC7k2mp0|PD)DgpOzS>UO$kWI} zE9n%_?0K}Sv-0J`eV|tFeyNo&563<@zPRHN3GekI2n+3* z;6bzraGR#5qFmll4qC0-guXwop1FXdv#;>>;PtYLrdRx=UX8@CfuU}tT4YzD4!sXV z`xv^C5#Drqc&CcgLSiBO;60Dy?&XBZ+8~MH6DFNXZbu{DMxNV^Jpb4$oHi!hCWm{Z zW|t%|a!M7NCtV537c%X7FK22@lt%XA+EWrg*?}BG!!~r9U)@VnVe&tM6f^tnb#2=yP=92Wj)O$N+>WUQnHZy8g7pYoGzY zHa3nHuX6zs-#^kj8<6+7=Y*hr$fJgmUl^(qet{yct^Bc1ruUlRrb)vmzULkEIt{(v z>k%(K8kQ$r#}5>xi9-rUUE4!frrK465sk%H343A+$ErGA%*RSFIi5VX z8Wifiywi}|*3t_T(AN^e?y&`jCnk%X>~~<%(we=ITK;&^^(U7*+3ny_1{*yqI=B3o z+BQyVP4`Z3c)@!|jE_wAQW%KWHsrSxS0z10-R(AN2iuoqMpa3Jy5emT{)DetqD0V0DUeNkZ1uKS_i@+>#gFX)RkBBa$j zq)QvEb~R8R2+@5i`j9Awq5}P~`fNP+4JJ3Qk4+9onR1NxF3`3fs9Sv#Tf@0?N`=_+ z05S|ex;5$#DI1#=ZY+Kdy9Kna*?{jT!cYC|&}w!%?a-Z)SgX?SYwf8!Vsd10YEss=8zkUFdsy2MvEZKF^J-Iu#x%$VoWjY029xIMOGp*@kbU@;UK`HRc-M zueL)fmk8Z-YpSaC17*b+(-a@iG<--Ub5S1LsT;?grNj(HL8RTg2*qn3r@-@vS`K#R zk&g=aY|Z0ThDswG47W2;Gb2Z<&Uba~? z{F1@tcE<~~?FO(Gu?CYebcEjy`p}9iHmyZH$zHuuUUFv1`DAAYJt!6Q`7rY0{A_y^ zs@mQ--+6U>WIU?sKkPVqo@aUcR>IClCgN;};`qR3S5?a6VOE{K^gvnBJ-M6ps+T-n zzCss1@$1co2$?de(#i9>?9ZTDv{Ck>(zS_}H_KJm&S5PbBhDq!3s)Om>I@KC!U~absQRb_}I;G9E zoF^|sjvcy~NyJWbCEV@H?ePNz$eG82J6%=1$ehb=I;60dI*GfPr?wi8L*nbD6bhbN zocUQx*{nJ$kMk?67G7VFNWH%~wHR6(~rvkD|zsx!=QDgD9s^lAfa>@r3s}Ue#6VtnF zF*bb%QT{5}O6c*aB?>ayJNe;jiEHM>MIwh32HheL_c|M!{ASop=HqjcuI2iYucE5i zR7Bs2;ED4#_}mN=QKJv6h!R!k4oT_pV4Gl<6*Gd~wK$2by*jF-^U~-F!*)QCsJ4&} z_aNtEmwLyrr|(fiuunW)$70zpLbWZn56`8FnJGEE+VD~DdsQEaGHan$+BZ%U7_g## zd{0+(>q<%i563-a0c!T7CrG61h)`O6v!w{xkc{EjidW?gzJ~kum?Ia%B^Sr$QwQg` ztK{w^)|`WA2M2$M{7L1}?RH42L@!7He{&VG*ciY;l{EI)`JpY5`s^sP=_x$B%O?tU zQk@jtPs@17=4NIbINQc#Q59S#cR|p=Hd7}XYr_R2U&7;ru?kHlv(A0G+VngVjW?F}%)z#)-$AZ?%UL#BwP?V_rDD4v zBP=LFk9m3T%B+6mZv7p6fv&*%d#6=W2Ua%kAbr9e?BdIFz96Q#Nb}2J;X<_6^?I%} zrJ)zOW@bZ#v)X*MQeuii@|DjOZryyWy-1+XsZggL$tWsPQqnA9RF?h`wi)^`bjCqb zYEiu<=M#zNK-s2`xWW|+QWyO!;h{rqf&i9B>THHi5&F?-8sWrq*42TMnus2vvy^i7 zjIrmLBTvH?)Kd5;{4VPAbLdPLSvV+MDU6sqq_)3_d@O%#azpCUb-(rqRtLR;6lTpt zHDRN+jRllM&p}Fgi*|dU&``$+TQpA-G7x2~u}M?z}*W(s4(lv}4}VdRcKJN7;>@(qTKUXj-e(9QwyS zGih8a+*8%rdhht~R7RO+f_01t0@{qm_o;V=I<$(I2)ifIy0+`HcorY-M$C91wqUPj zp0b+UC%vQOZ>pJ;CjWQN8f(q&^@BcJdP7c+TjT)l(8h zHdLQDJ5+3{BL?sDLA5sVikESlLMcV3Jx<&?^$XQAmH#`Y<;3_wRLvV=-)PBFloj+e z9B71P9z>c5J1Q#4$b08a&v0^nRY4cry_dvwg(jxLjtt?^NB=yIh>c;*1OM z&O-V17$ez~8pM4I(s=<6+Y**KNpJT5iQ z1WJ=+opBxI`Q7>aSskmB`!oR!o$r^@VVKbHX{y_Sf>Uf>^-e5l=eiD1Eka7-oOb4E zX2Y!$9puiHX-i02>1@T2W5xXk?i$Vmc=?lionOA`KvF5=2aTok_KxdC-g3{a^+o_t`98k!U=0_qMHXDx*;mp0V#!$Gl5@;%A);VnV53 zDc+BaKvW#hQLIE!EN-xvQN3flK~Xh6DQfp&^qoTyaF9=y<;T|N$HTZ4sbJ1GAIpw* ziCJ@Wnk#Cvt3B=e#;RNN%H3yQ-hx?*hS51#M^^|EFS@0&7qhS#@sRY6!z(i@)`Cv) zmVvm4_WlvnOIfo@Hj^%vAglq*Y}-?97v}!zdeKLR*bf%Vec6!7p1@yL!(CfSVh%ZX zG!?NsH5ZoZ9RigvjBp57DMUU<~z)sk7QpWDHTJSo=M8q!pPP8c36E&h=eS5OtZ3Dr0ff;!|&~$%@BTip7dXvb1t0V#~-UfwEgguC4Sn}&A z%%l(T+=UF2>5Px;Aa_5+^!Hsviowl2PANgUR}ZFItNxlo2xiJ;4xQ!b=9 z-&Y`MUB%%`tkctaO0T5okin&lqx&>|Fdb=a%cUWv((-a|^9C};I~_D|VL`)mhDG{? z(pg1^k1yUqdNB5~Du`R@XuTywwS68J!a$v!%~;(%?p;#k=+KZugUGu>;(b8&<9dE>=p5~vkJ z3-zkH2cruF1H`770vGj3TsUPU+{1HWy_uiSX&=|kq77%k-tI}RhERiSvrr7HLYIF| z*^=o?*p>lYJTFIovOL{s$)%w@M@K!4ih%V|dR;k!O>71d&!C=W{G73+=<}@zKVvUf zNm@u0UcT1H12OuH7g0lK!ro|JNEz~}no;`wwEUgunAKc&P10Mene5RZC0h#Ne4?N$ z`Q>3vc_BARFude`T1RD!POWE_H`8jg&Ps>)RC;}jhkCu&Ww+^s4}2cYDtsm~gor8& zPSzypd(V?piQt_iFGypZWU?y0jF|TDPUWPF(OtQ{k~*1V9cr)?17IXf75z|^=`0mEEZuvS9UG?|Pn z^FLe|?|Fw>sv(}lgtD*EFnJsVe`Q-vS((cg-E7BC zuOw@wN%c7Lv}Xcs1AQoE9)(9KQF30n4Q)cHbi$oQO9LyREl%`cW8s_E@|wA#^S$4*e`n>YtJD!5`)Qk)mt98 z>&1H2wpy`Dd8@AN>(8lCv8T(cMUSM~M$EkNV^C_HjSDmHZ0<{+joIKwr*|tLV=c;) z#n~E)3~2f9AlVocf+I_!%sSr2Otlx?$bF!sEXFma*&ET`;~178p%;5Liy0RUFef#8 z(61ahqo>s&9@Ww|Z~w$}HPOa_{o@(C|teNmB&*RtS2B0@&ft4c+^R{xE% z*lv)A1T-y}dM7YA^&rApD)Kan_&*kPiwiPJh5*ZJf(pG8be zY_h-pT{B6gmV7EIFCswE{|ul#KmZ>jtMvfCl1mF-_M zhDE_zR8uSre9)b7N&kU@U2!4JN=r)%cAOYYtH7dcHeaqW6f7O=3>hV9UHk(uyCO%b znJ$k%qkpi8iF-IkIC@F``*=~FPV1{PQ0mwmGV)H)y7(HlzyHc@vz@tccRGkW5xRNn zC)9%*@yHhfZ#b=SmVz;v-X?u#rMKTMwp2HT1#(Hwee3;!uaW&P_}T~DrgB?tY~^N) zghN|VO8X$w=H*RWnRcW4t-i1^^RINpCA`g=rvxtJ7akC^#~M!DHKLa7S; z@WxEfdi(sR4ciV&lERi>+mOUK>i0?Ke#1nG^LQRe?hYFpVv6+EF1gus=#_3R(V#nN zynAYNV6XxiwS7sJsJzB1WEh^RD$7usN?7yHOZbSiY zlWK|tD=4nUsc<2hmk^DaF&sMTSmXTz7k9OX8U`iC{uh3sWE@Lz#{u1Qz*#) zHTU+Q^=Xx2I=D6qY5`}bd(vCVIEvjrRGZ>|S8V{8X8=JCiXP!ZyM$z~RtdR%Vqw4D z&ve6eZPLb&2{sLzhcqAf`arN z6#8ug3QWR;>lF5YQ~jwW-6U-;vs{BK{pPM@s`i@e5F{n1j;@+%J()c&u*uG4#;om6QlI3XO+plER) z81_pUS{@$A0T4DA+jjq6-uRIL8k4%LW?ykos~c6X+Hfr0EkN*EGqqv3g`%u1hoUNt zg*!wY*~4MKS>zwL$&h)F&VKoRvf1j|IonOQ*NKn5s$BRf2^o8t5l=<$7>m+373Wx@@=_ZTAPgpb zmFswpkpvGT0ieG}qoYG@bOTu-5JoBb`~>*|)Dk2AJZ?WC=s_M1`y@R0ZHCY-O7;E9 z_R?0{^8n~}SdKohy8+Q21@O(TOM=Vzc)b60zjwEOdE6i1M5Q0ZJg&XTlmXUD#i=D( z)@QY=WvT89QDR2#k=S*-zpJLSc6xud0x{Kg`qxlNWu$6*5BKekuUdxTcRf?1nUACx zxF05S8?6Nh6RyY5w(hM(_%Iv5dHzMmQB`M-o1B&YSD2iE6jCJ!f=dy zZ=^;;YV_CJODd63-UYEbaDs?8o=2VEv?gPnYqxhyG)S}7?5mb1y&Dg>q2^&LQ4`Qm zMPw_5l1RPLF13_RUDdw9lF4p7##AP?ezWU6S?Q*0Dr3KDO_7$aQP6**@g1PT472fH z7EMGv`=jb{4_nLP)B^}dZ-&+Y3&v4Fv2TzcEwgA5@tGelsnRs}7=BkLV51-m{XX*f z&`zA`Wlyzu7V+DJ(PjfzRWcq*aw^n!pv;ctH$s|wiWcce7^sLHCptH`;AOLuc_q_4 zG&caey~X0J>f*^GLmnGewyTE;<>C~Us?c^BDO8Sk($>VcU_?KsoD5@qWH1_L#Qda` z7Ek5)L<1q(uysInkU97_{E5ELKKB{37RFIzO2OG}@ftsuc&)Pg)8Ly5`o;;>u2mc~ z<4@U|50U=W$|%b&_mb3)ai>c$gG2P+$KE)&MeiXZlkTT2g+)vAmGCkCVcdSG!tNlb zpj{J>i^at_iqmsP-{JPAQ#g;?3>2s|_-P&~#>1KjF&(O4`en-p9Uvr_p_p-rFGBrr zRdhPWpUF1jgLC?4A`RI*!DWsUKecGGvdPHY^vfNRfA{A#+^Y{TB5IBK5S9S0mFU zfkus?2dI;dw{ueKoC?9KVlee&y?8GRxnX2^EO z&CZ;p%i%EqO8)Ky>%99B)-rx8r~XreIuKU3BR{8RR#;mDC=ee7!tveiQ;1O zF@d_ov_t^Qpz0i@ps>5xR<0BL`hthu#mpVO)~*nG=G)`UOacH8XY~=zEJ%#4-l9Te z--{-CWXgi)X!K-i?tP3roMt_@GltRswloV#I;Df5tda%4aTwz<@8d+n>)(&hh*1uS zYU?Hd9#T)+&>l&i{>;=-NwmYH>C;|)X>)XS*1U6m=jyB#2+r^QYFhbGSH13+pnA-) zb-5~r{Pv4=V?TVNceACTPfYq*w@9N%-Zxi9lev7(nZ|TV$~h&}ZJ~=?pClR2Eff@Z zl)se~OBJp}S4yY2{#F`ghYP;9uZO=tOnWbN zCzxN^)REJnO=~TIsX-I`y)6vOv+ajB&}e4(hOvjNMpl!R_y8t~ye7*RE|P zRC1)MbwuV&YOMd5P|RUPQMd|M5l0N00b}O1CE_=^zL3)twb0J(IkB>CI--lp%AbR{ zB7@Y+ud3f$dIr1AN&+n)){?lvy;i|K>)vDbgvsqRD*J&SCEJcTZ*K26tdcEO59VlEujIMUJzMHtR9O`ZRjTQ>J0irBAv9&Dp=WwTK3*; zkq=PQOOKjB&b@m`RvLOV&P%Y$j2=(CMw^<0#^Ns(xkU-K{`z5tjltkDoLHr8ijS00 zx!uoU0680{vQz3nyf|&{Os>3Wa8AF!-C=lqk#ni3cqVl>+_Ddtn^q#GlbT}&!jxa+~L@ZX`kPr$LnPMr3bSAWI``sqLE}^a^iSV`=Y4I*F9{3 zvH6$n_=L?Q@We&4VK?oQnM127&@s~LH$QgBlDcS0t!fUwvUY;qo;5$qEbw4#Gq205 zvGmheV>j|kGt+i&Dvq+s+%x5!e%aiHR`X<8lh#*COU4ASjOb2|uRIxN2k_N$2etZR zIIL&&X#C_7|36#o%Z7(-BGh*t(9pD+L?Zr|IBJq-IXPK{30HKlsmvj!2hZ_lv?hO8 zqTU5uS!=NABU`!`0_^L1-=hp>({JXYSP#CvLA9V>FAz4#67*Vgz6<%gus(SF)~gyv z6Ek*JP%ZjlX7q#BzY23Jp3ol}mV*7@G$KGMPg4>E37IDAdLV2hMHAF9dQSpQxfX7Zu60&7_W@S=>LWF`!<1+g#~NL@7~ z{jc)kGG3Ev!;oReQ!7@VIqOIUDK|w_AGhfXpXc%-B#%pBNGOZwyLN5TIgk2_drBp^ z{I3o_Ig)E4kNHk)b1BoyzVO}hYySgN7U$Z?Io=!ybNIsWNLh^MiSBLZcLq#}55tWL z#_uSFdWq`WvQyY_q{ov0%y-8p)5QxT_@0AvWol$CZ;dHnvvuiVW{zQ1PEbZ?B*UBw zGdrH6R2YM9M+GV8lx{ED<0KV7TCAn3UNLug#aPs4mDgn5{URkmr#WCM^Ufi$i9W&s z2JQSNl>ZUCy7)mhu8Ave{D@a(k2fpOm9@2P>5@>zI4F5L`m+IDI77|ba;TNRR$X4O zAH?B#f>Ys5be@LBg3!XrqCkS^4#|S67CtMvbNy4ebHGif`V>5XcxzQVYKpVc6n+o8 zzZnDPrb>Aw??*sG$Bj=$E4gaJR?#sR_kK8w5B>h&dXtWhbaiMl{8J2ra-iYlQ!$sN zJ9||*tV|WQT5rfv_1zl8us)tHBGx`$USi{+=Rt~1ybZO~;l)aR1MJT@TD8mDm!>NM zO&ZPIawiX3oP&x6pJ=A=ROvyqGop(gsXhm)@;fRlpzSDiC(l|hp<>K+SF5E+>mxXe z=Mo0fikF})-li7A2Sf`lb~kdzydszJJm|@ff3<2}|0i28CGK5`Mc-eG7`@#>{^8w` zGif&wryRExL$7D`6OY>nm3<^!pLixbhZtsZH!$-cxu+4>D=B~3n>lxE$d95cy)TUkm*RfJ&#!aQfFHlM8$KjfG@Ni zTPGr^tweh2MXNkILPx4k#Hya$+O1JlCjAudhGe*RRkJ}wzDS(qvUz1J{STm+gth3M z^c5p1hKj2SS;(RkY@$Y5(JgYie|$9Cn1dU@I-+PekzD#p=UA&(sV=Q=`aK7ImDUXSMktq9>uDZ>e<1 zI7`PJChn>_SY%*TVb^dn_}^^}rgq&t{fP8Z=Lvc@t7%d)!A%V^Y3ERmsbU3HbNX_{ zff3Km2~^eWPG1kkee1%(s!Yc^^ONK?%uvNOlNMhs)7kfr`CL#Bbc#2{N1W*^SV7CG z$CzaTzgu}QIlqn>_ZjPFRD@{g56sq~-oKC?4O22U*{;szbL|!qC2AYuxj)3tqtxmZ z5oWFy-Is0{vB57L*`0vQQ3$0h@iHve$a~uU-}6Sl=fOif0YL5X?^u`2zb$uk0E+!- zw{bqnk={B6_$?j-`0s!C2!9PN^k>(JrFBNLf~LkFeSs#8BVn0(2u0CeB4*bv2~%4< zJAVSIQtTWGbqD^O1LK`L?#{BeId#AO@KnB#w&1@;&SrFVO5A;xrEF}kI$}Gy!L0+J z#qN(=aV>CMCY*i{)(&eHl$VgJ%Hwam@ET(P%judh`TO}G8;{+V7UhnoO*jEDBG&G)58avMpd8`o+Fr170?SyX;{TwS5jP|vM3@Njkb z3}B7e-q@ZJPYj;p zu$x@EZSL?meTM)_k(VC<=zQLJ@hj_=ksIMJQ$Gy)p-nj+&k;D4djbS~ZZdcSzga@> zfwfbS9f3c?=OcRD3+JQa<6oR!3;c;{mVUfIDDjPCc^(zOtd=6MXBK?vd4IN=a4i0| zgllyL02bRpM{9_?1&(@^&6p5kJ}tBtZpet%LK@a7ntvM)M6m?^6#^*VW_$u5%gk=Dr556V8Kcq$i?cUdPp- zm-{E5=QRhP^)L^d=fT+9i`Tm9aKc4>E)%M5h|kT0Nek`G8T^d#!sVO>TLg#nX9gY@ zmVJ)p(0NXM4u_22WB(<5igUEaY2o>5_t-D_Ut4epu(!82q#7iD@44NlFh_(FF6DAp z8}!M~0_a;PtnIfi2XX3GKT1naFH~K+#*KVbX**%LdhzwgWYG0qXCbC(MrQ~=T1w$&sbRumw_(ONjg9lhyso4ouDiL z=s4J(Y9Q|7*JFu-R^WR6Yzz>LPQZPDR7li*Z>g6@T0Ks#9h=N3HC5V|4=I3}aRCC1$f2ug96E-ZT|j(I0T+7Zi1SsH>1 z(=2R$_oo>JGgt`bQj5CtuvBt^^IIn_g*T*#V5F7PI=*XdgQku6}rsKj#cRxYjK z;rI2z#LZXVaQ`e7w9x@a#;%dCxpVcSx)o@M2Lq?k`A-0*(#JO!x)o~hNO?Yjr>~@k zN$~+v6rjvtR}GVJLwRH}|$jl4eq<&yMnjv=5uO)gC4Z zRRHqMSe0EWAi5kMY>fe7uxS-p17QGrQw2U-8b_1?iVc4Ja_H7jmMY9r3YBRyR*98U zeOa7x44WySi?KgW^953$@;d4U(fN?@W_u8&i1ftE?$a7@8LJrX`9VfXq9U!1C$2QZ&~LenNPfHfzGUqRRMyS8yNcs8_CLV zglYy}cQBVUm#)9NJYtOHbS-430!G-uVm=(JS6(MOT42EdU&hLItB`CLd@zdDGPM*S zSUKzv7`O$DU5)$;R(UO8oN9p1f@ML0FAI4czrw0lwf#(&0%HCFE0}~}q1~Lt)?D`w zp+jOXHDdLwUa%?aK|Fz48L@N0K^#N(f*9R~+j@BiWp+TNqXCAjHa zSAynPiLpQ8@;^2UL50M6St8LjXb>RKliUFlbnUcVNh~`E4c2y*kN(YwmXsCfR z@&INV#aJGq+b>9T7>oT@?NiWS(gA!U?U%CR; zNoN34U76Y`pw)j=1zH!CK{RO!D#CLlf4%+!g4}Q}=-z4!U>41UfwIjY+{kYAuKWW2 z-$#VwSc=j3Ra$nikD^n+0zY@+ayu;`69*#BDP$VXbul0>6gt5?rNmZy((A~^k)1CeR8Lz3LqbDiPmcDI^v;=( zfMLv`Ga1=G9P@nqf#RmI7*4m>>FW0|)n$yJ50R)gXgoh80fu@3aK#8p5Gf;eH2MQ9cTVbg)6fT3;Mg=H4 z$(_QuwX=~bCezIU4%jdaHv%y9?0q@@z7csmcef7iEf|72Ri6EbGXPvI5komSFF_9sTN;AHYd|3t(B9k4}G;9L<@aGL+^9gRw_E zn3VS(;99cZHgJk?wsq|ntOD+s8*tM2-ewmd5nq!^umSBk4ah` zDy1w8s7aqQ;lyqs3i_89mruSz81RY%ya z0J7)8ldsYdB>&C`P$K!4{Okbpa&B<@2=oGEx^2xFKKXvZ0^JeApVdVptRw?S!S<>mVs%q!r3)iE36=TC+nagH57;ZZcX^Y@p=(D zz;p>)0zXNa-cRKYB}K_0Z!kM{VfF0Ee9(GNZ#Vm4gQrth5 zvv7H6lRM;6VEsU=CxSM$VfaB&u7TQU=mg6`kfK9qmoH?rL_b53XlX+%C=bM&Et~#W zDr#71HeK}K0-GVSdgql55KbhPkf~rgez>z#cdu%ASlbPR(;>Q$H(!VbK9bZxC#F`Gwx(*N*8?exE7q8uF!O9tzzfLv>`#;z_ z3yAj?w)mk9r-z-@x4(R<25vnHFq297Gf?7RgyPHf-w|9I+{@cDt0C{r3JEkXy?(}kcP;nT2*N!N&#Ss(T)!;wDT{%+ax@!z3|Aq0 z$fz_IcSF@(0`%Zh#w5m7L#8M{NoMX)BQ9}ucAA)SqrrzzB|jvOW~{9Wk8onB7fan@ zbey^=Jp5>Cl5;_4MAXZLqJ0g^OK^^q%f$#w{ZXo4aDPD%*~epjyuUFxQ@#*+e)LL= zp?PnLpxAqX0*BNkK*|)bCFNK4;n`EaJ8C&p1p6>FC*-=S5C2l63KKhb`8nVe?qW>> zHK#mWI;^I^!OY@|ph?^H^jo=uj626}d)9#07z<22d09h0dBF{irNP1|tSyh%vE57b zNOB>TT>p4s6Wh39uGj!l+7_`Xz}5vkVh3{UM(UKmh}hxJJ?p{4PZZck)ffYA`Su|g z_He`-w4IUr`1+8DWy)!Nb~`=5WLl4uR>IqhER0$x?2vR=XRd=%vnpLeI6bO%+y)S}dDGPjL{rYzbGtZ;T=X?_ zsqiBeOUXP^3!WZm^cn)+Cqus63!gson1MYR&hpY10?_A{5DIfid2u)S5b~IX=~BH) z>j3lpCV2hYtXViP`Je+!rYF7Z!#0MOj}J8- z=|v%A9bUUy)0rn3o1$5u&1+olXm)8LL%xZF%GY>r{Lo!6zCW}CY#dU2J}4 zjyGO4-t1Jb_i%%!Zpb7~-k1+Az#4N2 zy_eX0Lz}gAiWUYsQDoSj<~;lA#Qo^+*DJg=Y+pdq#Q$|~hF}WhJuhs}z5WL9%QqS6 z?3s4m=ciwuZo0^l#Ers?qN}uQB3CXGbbdb>Gwsi-GJSy=RhQ`aetdB)d_HJ3Mily% zRELQwQt9FIWK3vuM;Kg5Yxsskf``o~+$P3T@r17x72{s5Cz~BLALELj0Fhp6k~-Zt zUV2p*fg)1-lqUQ_tY*y93zn6da(-Q`6Hu|a%BX3UcXFHo0h%_eLKFqM5N?1R-py}5 z3A{3Uuhfxs;A_>?qlzKJD0uy0$2{X!h6=+XUa6L$+vl9J#RCy4&Z4pY{_j|qLV(@8 zs1cs!PVREkihpj-ac!OrRMnWo*tCWlDqWu0V${Chdr)@ zgm>sA&>JWCma7-jhZ%sf8I(R=%7&t(;z4%S4$g^Hmu@eJL57SgdgZMOGC@#@_3PJa z(o4$$6&L-|i|6yxn9#Db>#1^w56n%+x^~rhbuW}CvU>JH9ayy8NFmyb5bZmF2}A8? zK{{I>&__}G>=MKlbR6-Cg^t55mWNB`Qpy(;?e=EM%Q$atlj}S6$+bAnhgj-*BZ>Xa zjeyWqRPD}U$Q=(ZZlPLv%ONXv<9@AK0fqJ#q?MrVg#b|q(ksgjxgpPRjG|@Z1aJI^ zjWcv-l{0??aLC|K0tp|eGQwJy%qqf3UOBlfR#*)sDr5j&v}Z#tNN z-{jfUDc^ofmFWc6Oa!(B39XOaW3FfH-Cg3WyDY(+ZeAEUrFN*IbYph^RMKHR31Jd*-r;$>{!)^G8uN{?$}3*C!b-AP^Zp(E17hgjl-GK26@{#9>e*e z(cBuEMb&3EBe0@3Y6+x@H>CAOXI;H^Wp75QE>P4(rJFF~^=C#q>)hSzE%oZ*22k>N zk`y_I1&p`X^sT__ju)J~unA{?EbC#|K$4SX71EU~+OHkh=Gg;YK5_Bpdk|?M?!WSd zd{rkDvdlGygCdif)Ir8j?^Y9`GzYeQ4k7Fb+E{Jykpr6GjUFRNw=#tAiIX4VNPko- z4L;x+>+XIGx|$t8Ag6=rN|kQdNW1_t7+UbgkGG4-)?W3Mvf_4EDG$iohGDrAG1ZJ7 z{4!iY4qhT6&(X0ZwOxg(8(Xs}^rHdxA0)OvOmN;qcG(!URR^$4pc#f9$~IQXF}fiUJIvkw)$fig4sJ(- zI;y4Ha+)IMh@IB8-f?t%K96b?xR^f{q+QwwvDB+GLL9UtezT&lc{!fFQ6uX?cJK>k5<01;# zRix^KR$DC})+`-%mijC#x~XXV4Z2d3SBn<_O5VH8vRM8gNvPZ1pr$}Zx5jnNmJ0sl zKho(+{hcX+mwtB)d3IS42mPjY$y!dp9vgWCi ze5}{DLR_Xxs@`Sp%X~Ld{7#sdxunmug%uKbE(1=5+(|$HQ8mr&ph23YWpm=lS~l6u6KXYz`IU=&3eafg&`=c z+ODjpW3$MVX%qPZQEuUg5a|JJ7Hld_mk-;&VGcVl2*<^V)FB*XRnSE=Zmu{#G$WbE z8sWs5zr%o~O0H#a+mFd23gee{C{}~ZpGb>P*m`}$(naoQM;fLZF9pe z>iHhZHlWiYAb4z=O&q|#8NjD?99vb==JK;3} zJm_L>cEL<3y|ucQzWuum53%Wa0Km=-MwdT%kiD=}GD;Lfa=Jf5$WHl{Kmc^gzg~`l z{^;>2g@5JvyJ3rpSOZz<<{!q=&`1@Roffu|1?1a$d^*Z-S2LPvp-tSG~3^0F-Di zT2aPIWvH8RSu00wvm_s8@~Zs$Oq+KUakl!e$?}sme4ZI#S`I&$fJeVQ9Iu($%UYWyc54$c_lNUVrk;7#&$< z(fO`U08B}gv;|gLVkBJ)mir75{~WARVoKtUSrK+p#CB-FMa=*_3=EUz6v4eG)YIOY z!3mdzBV1})V;l*MdCy|@o7Kxa`_14*p?+;)=`(Es+&n84&X+9MQxDW?9I6%c<>!_% z{^ql5PfpS>iBK?F`0;iKiC+Q=by zmpj`!s81~!W4cGPGGgzBq`G4<@%U)I3wNH!hMt^dW*YYyP#xg59 zxk;PxsUH|?pR;mX*TnfM9>Y0e*=jnuGZg$yFKy3i- z2ufFNPqK7sIQ&YFaJ+Zixk+N>F2BvBr|r5L??-i|n^$d@Qm{>+?>i%@KCl)%pm|M# z%QLU>K`PhFl8RPDER$JWJGC4BkT_|rK`Pm= z;i2V@vwKk(yg1h8ToHZ0B~uoz)S7G~ueCIS2zCJ1rCPUmnoFx?AdHAKu+F9w%M^r> z1ni8?cq=1FZZD^{pYeof^I~yILJ|+S)I7*ZHFz(4t%~t+?7ZvP%gS4Ivr60D{z=Xu zCp+6q53wHKuupbK^E|&b&BtRPZO;<=Yetr!FC!+Et7q?^fZKYFEaCsML+RKhb}q#3 zuNz9nN6rIl*v00p-h^2G>6{#|>3=4+qHY|7}if;9(Bcb zGFn)o0ohxpBg$OsiJx&I0GD@s*?Dw-4Q+XB(|r6$;pX?;a+?kk8?vyWPXK7tgweuN zn&m=jg1QsjdIRU3{UD8Xej8Orc0ZJ&I|YA3!fE;g4YeDuCzjbP*1w}Y(P9 zw_O!Qqjixs7W^>;>2~wXYCR08*ij`=Lg37w087`a6`Lcm1UviZ_;!D(JOrv)!aq2 zNc@%GN0uE_gVm$Q>s}J}U%p-N4yhV1>pef7yK+|Nj$4jW)k8}VGekA)Nn2w2(!?Uv z6~l8h8918=^z$u`MW)R3q1IGaM#%Cy&NP9~Aii(#d|I9m7lpX5OgK!sw7Sd~L6#bW zfKj#WPy9B?{W1#cXQS9D{$Qd--XOKePxRQ=%IjgP&a)AV%ZoElH@@ZZ$X0(|(Emd_I=+w_mmNIW zL|kT{JF*&DEn5Bbgx^~AaMR7o&VSuwjP(nTYm^)CF-neFQiguorENxGTZj-ltf&NRQe*K9UK-AZ*mdsIswz(?QFwOq zG~xQJ3ocYrX7zxjX6-%Ad(q!3+}p18f0klArofQie1s59Q!yTgZNa~VT9%jU^%ERF zMN9h@^{WK%-?{H!RE;y0%BW6DIe_dLI7;0bVrIXgU}*9>c8h1ZdmodZ<@XZ}cArNX zAE%Y*7qp#qs1`MND8L0YASO@!^*3d23Ug;okmeLpA#2v0A!ft{FW5D#;fKObjE~>t z@ZhcFBh_EEr9txw!(MsG&uutlGs3jY+-31_A?QYopD`zyW<=0jMaDrGtjX| zF!BfsE_uY{%Isr-JGGyYNKkeaZ=96xh#VWH5P{eF$9|)t>fo5o*B{5k)i$Pm7;uq z`$)v`Qu=7OvS+zo(p6ECyM@`GgzL#V5^~xNC+vdnXA!sQd#CTc#|01QU6YVM8}=t% z`%C#<%a=y+MzifUGc8nhhZ(DgIO`vwbs85c0WO&eO4MHnrJiGZ_e_`>GA6Gp*PL4B zc(qS=2eS^&xvQ!*PbJ>tX!;TQF?(I9Nh7T*EpkXk{9rRkncbf0HqPCcb3SO;YcM|d zE9RiLNAXfww}|rm=Ed*}3TaAv4%SLW@9XC9FMKsYe4%@yg*CxHw|-;36m`4)z&NBV>VtvUEd~EN6ArU1;$QO{w2?1+?vsiV2|_I!=QHEPVdkt7 z+%sdwJTW(o(*xQylSf~H--L#=@V|CD1%!*7dKb}CgKYoLMsl;q3Saro>C zd2m>&6Om4uAT%JGRwiJl2q%j1@~R<%QwJ~%3=Rk4h|o58cs50-sDKEBvp3_8up5wgel7A+ zbrlXTFg03{*=*{QRHoT@+>F|=UguU99D^O>XkXs*Ml6)2#ZBGXyni2K#@Gzo;L;j> zZCkGJl>4OWqvD|?&fzb92&~Wqi9n&DMo%WAmcjbEqTo#JhrMe#+V`|u`K!-oCNclm zF%T*Ki00@LGQ67MXFMCH-+OGbB|(Dh=E>@{`j#Nli}1l_yrHZEZ@q~yWnO_qJxn{Y za|=Rv+247;A;h14+eZ00T-m(ezlda3owtW-6&5a(n#SnYG-nTiK?)w9>;r6E)k0Ui2Y6ogl*T` zBb635B=Ke7%8B3oLATYyD)U4_!NTEH)lYB82ZU!f{Wt;K{-O9zbrTM zX4NCKTOjIFJ|REMM6ie9-l_9uWgZdZszzV-(VKp8zHEE&Csb)y+)kXlqlD&J`k1{R z@*6%75}%E|j4wh}S0B5bQT6)F)ER*(ht*&ov|wT69ov@S{h6A>6>Q5vV+TC)8jWLn z%RVfccbvCdmTxM-r5cNi&~>$48q90nWmfEef)j!=wD}+?LyIu{+kG;t4)m3gZwtv? zy33@Uf%@_{)TGwO?ptYuTRcf!wXyrcj7ao^qnGrMpi?QKXKgKRPYu7g;4K@Wr)8k} zkXpMT4X5^V9%GchTgv>LqC};UoneOgcEFoH!|;)=c;;gDv&Z(On%;^>r?qLleMXg% z*P1@xIILyqd^%BQ!6$-J>iSt`k$6TeG0^GQ_uYH57y z>qzPKNp>m$(;8=Hqx!dY7lhiq>@qn0-Oh=*@%K0`gY_<2j<&-z4Z4IxH=9cBg<`PD z#{NNgTCl(Um~HqI%K^_HKMiJ@sO$dx+>>gJaYNCJ&6|@A;mxcuyz#*s!C1wx6y}S{ z^~&vHHYfjiHNc%Bx^XyO9I=oQ_$?nb$2K?Pm~lbT-gNLbp*Vun({Y3^o|$Xn56^5) z^TeA8%-Y#VkFJbnHo0i*`IKtassJVQdMsUz=BMS&Q{|2I=bFu5Y{T&-@^e#sS{@m6 zpK^+{xOl=#vFbCtFpaJnpMxbO&8{4PFy0bDlG~+k!0i6?f=u^X@wWZP0WB@61zgH0 zhtP_Qm97ChUNIjkcd1LRBW^Ly*Vf-K`GKK#D|-Bwu?$t!?VxQ5{J^w^;X zW@)V6o1Jdf&^CQT=mC=lpCyIWcOiGXu5q<(1Zmms6AkD2u{dO(FVOL|yzPE4V$K;Q zy7s31$L6$+u)vxSpJeTaKr%Bhv>{DUv3^!KBcd4guSe8c+DB_R@R51k#pmL98ne&S z9QTuo88iNduwJIgzx%M~M9d3by8FKcYDJ7*g%otONK}OK0p*4V5a&n^>T$x2195>n z^%tg<-zuwow2ePdcgph61v>Bo$LAYGbGKLl=cLduX^3lW9avvEThRYScFoEhaI~!c zO^2Re-@M33u3>p|^+K722qFI-Cxx_VN9o0f7nS{Zc=?^<**Db#><7tVoKe$@2gL`} zxaW8ygGSCC*Za*xC{z}6{T|$BZ(MZaAHGNbc%IAQ=yVd(YAa-kAzeR6U6fc*9`j2W zdu_9#6kaxCMpRo-y|JKbWJ)=x!CQAe_?w_cM$aB1URf{8=yT87$jd8S)|eT!d=DcF zfOyW+*Mbm$mD@2e*7LCc6*q$L%s17t!QtyxQGuslr1*4O49n%dQe*U^EbD_HqEp8? z@&6G-B#^({N%kwUb%@*&Q5?8j*ca*SS$%auac!Mt)F0#5^y?U3W9Tla(V_I>)JNO< z{jJfrBu5fpW2h#ed(d+792gGg7!VM0I4CQHNUvuuuNV2<*P5^3byB{11wbvzYuqHg zW%`qP*E<@ObJ4rFZ$w-uuN(ULzWJ0xVc<}#$I1`JtG&^C=sm$8=er}JXL5fdA0ETu zZG)hir{H3ONIx>6pl3qkl_R`tG}`?;?reM?l3ct<&AV&aMpuilK~1Vlat!@$Qx>Gx z2hx-x1xsC+1x;TruRBeD14cAcqA?koqBj41;FaG+l_~Fp^TZMe;m$K{Ik}B#z`uJ~ zDGg_ROs9Ps%2a_;^910RFtp>9Mf(1|b>G_Cgyb*`-IuA)GZ@J5%m z&7e`oY~Q-Ybldlj3_J94`Ldp?4Ax6rC}@ZzS}w@Rq@-M5>)VtlObv(#@{gF*U@`Nu zik_Wjq8RX~{gXgm%PsY=sRbY2s^~8u1^I~c7s zZn!TkMv3}Q+eF?{OwKP)lL@D$8bh+8zePLt`1yU=!yN2nmIz-VJ>_E~aXB9d7xm0+ zlJwFZZoiBe<~#i2OH_p}T4iXu7-v#5;W-w@@Zx6VZg1H=d1yx`?^LpKVC19p-Sz5C^>SAtg2NRAN7 z>tEfZOfHkod&kTCJww#@#TUF_K~?Z0Cxw<`K7Z#d;$id34AF;1hnHqTUl?Pwe}}&6 zi#>+eno+}7FrUzlm%r-W{ z^?#jbIbw65f3yIK@n^=sD38vE*wZdnYMKDl6TLk3xk!&K;P;WdSU!gL0Gw*NQmgm^f4Bscivu}px2`x!zExDk?ONBCn|6JBG zSmLC_Y%a_{ioNOBi>YHIH>mkq2cTzVXMe!qKJ6Yva$?HEtS(%!!Cg zf+CI-Tz)l6#$FFSTWN33Fs`%s4k%JkMy3dLE)!t%gr&{=Zy-48<=Fyg0zVf3ax;}c z#_PdjKyttqk(qWbkUVarpxyE|$}Mj9f!dum*N83uqfE)=ZcUN{wK6OZXhBLiTv7fX z;GGIX%s9@W1TkZId&? zX0p8hY?G*k%secQX73pHEitVlOwK_y{Oh4;g85sCwrc%zF;$nxK1HOa6d2pf6S}7( zuGlYLf-eRaRn*zpIsroZ!*h!E;vMV-Hj{XbD;g`}-XXpF)G()&Cyw_ue$VK-sl&qC z=5Y00ntnhhk`04KQ1tfH$;VROUu{?e;EWvwq~kiXpIOC_McCPxUfowr#@gWI`XhQ& zO?ZO5-u$CfQD=pF`6PKXlY8=RT@OXOAV0A0firE)C3){PC9k4a?=s(jm(a?@pB+G} zk*P1=Oy30K4&9Dc{E!nk#ISS$?G-X*u*dg>OnGKWx)BJrJ{|9((14DXM!5kF0E``D zmO}8Zhu3|iq{jyW9pZvFaE`3crfhNQSdw!MNRIQ)w!pVQ8nO%AHTC!QF z)884Ik$iiqGv&o{nIAo~xy`b=kvd3j>^#8d{kpEiquBuh37f5%5lrYg2X7wua#&bR z_oKv_vz;16eL=+cnqrhS7j+6LD{|nm-7$YbG|xY&kMW$QjnczymmIH0Ipc4LM*Ae+rhEb=Adx5w?q(Ix^d0syqC4tb5Xi3PsUs^H>XOv z+>t3s+|5&w&GAkVHiNU#r$v;}dHy#Jc3bdS=N437^q0UWmM`Saosq9v(5S9GvCz`- zL-1$XdBNoRN@9_gP9y6vF4YW|dskZXjbqAfqIqvbkRHT5AO`cSOq7$xf5}Iqh$gue z3rY^klOW$YKrD!g+8|3&EdAa4L z8opm*c1`v%@O(Qy+Ii>w|q@CnY>yeQ0j>2+u1C^*}lSrlKaWdLV8sXp@Z8?b{* ztmuA?dC}b6T9CZ5tU;Vc3B|E%ywdMgylA`RAb-4xJWgiRA*SVgxhbcd6}%XJXuKaL zz?9w%2*4Rml#k+T6FJpF+cymV5eU9?d1&5KF@IG9L5lhbx7`f1Yo<+cBJww{WqFYm zbew-BLt_5LQB~ulxMNiz-XX0We~QEVX4Bl67wlR}hJ)9@!HzB}Y*^l3?T|**k&HrY zVuY1BPuE|d!TrbcGdW~w5aGa$Iq1+XMOxejk}C8`&$)cxMg7QU{p#C`ns>JXawew9 zc}ag)i~Aanqg596P~pxEg_f14rJ)J{?bxni{f37tZ|5dlR}jE;5nm?#xuMEd{9A3% z@r;NUdyghBdiDHg;o6$Oid5Wo{p@k)`j)#AE~ruB)Bh-s}8RUjf{NAanKl?#( zPkwh4+frmi#0A7+44_>6^aGlQOwR~de+!UVGQn18K|AB(J@QRUR9t6HF$z442#md| zc{+DZaW~&RH(e(WX`}RD7LF9owkY+|v(RR+=A?Y%TbQ5r7j*0967(9EFX%0_Fn`rq z5Bf~1k}HPDz$j(&zQv$WmB@RK=^Z(0M(~cUFy-STJt_IW_gs6_m`qv*4OLuxovN$D zZ}PMPZT~EgNkY+__x|PK8T9L5hi6(4Z-9w+;ffNr|>OJku+n>=OLKryDsrH6{#r#e1(G|KH zMEJ+89g5i38FNLZxuCq^rn(DSEqwa`!hO=aA&=Gnm^26EN|Dm_j3=?@vVU}gX`hV^ zJ$*aGOrsG**>_2+<=Q$R#nyvmJ-T1zuAl}nYwgu3uE3FEn9g)dE$MKpY*FotBYGlDxRL>cSGLo>MZrWRB3}o; zilw*D#y7H|dnN(@ z?{y*{ZjRhavkX7m4T zN~892rx?c&<4(B=jd)dvCJd&DS-b5aIL{d_Hkj^EfU$MKEL>nn_RHEUBj6EDLh%r&x3K2cO>NH-2Wo$36C4vQx~Xj|yk(FTrX zn&UQ{&^Y(qDTSYrxiRyfbM8$sKLdgqSF6Jbzg<3%I}rKcdjS8qILC^%@4){}YL)st z->s>xGtu5a=h{A&ByzNCjK%j>UdTj~I8d~OR=~Aw=Lo{n3&GQ?U7mT|xf?&b=u(0Z zY4=xJL2}qqU>|aB!5Ru0)nbN6=93!A(B&={L0^$l#Z*ulmWt%i$Vmp4>4)3Op<6P! zk*Bi7_NOTw%Y-H9aBNz3$K-d`6<0@^ zIIpUV=r%3e;i^iK=><-9fliJ5+!>F z;=IskH(*H_vM5`MB;5M_tM*9qj%49kl+%66elGfnq7!YiB9;9Y`7YVuFt6SwU zf$mv#Xve7-J=BgsW9KTsg|_EVdWDU8)sToth6kB9rGze ztfVxHnIZ5VB^2{1$pd)E%`xbFTIcmP^mhaaPQ zx!7S3%BpNg4TSWh?=giEIn76=77|@9;I=I@eE<(dOQ=e1Ti6BFHZB;t{MeV*t6%mo zILhz-$H&I+^X@c1v@mMy(KfP{(u!|;8ZYBr<$CwQf4&?`&OlNvkvTkox2)k~?2X5h zt!W#6cAtE9{g=$hRglT=GRw^rp(ZKs++swA&ak!#od1S;R_{l2PMv1S%MRZR4s{z$ z4h=5-@zu%bR>kyg`04iHN}`?Iu%FWM?)}MDz9fI^R@zmqx#l6y6@$e<7#p<5o|b+= z%6pugqhkVMAOmuQ>nf^p!aC^22l_ohd{h1asHiq~!GK zgIUH9PFSq(2z-hq-KV{UmM-K9R|f(RZUSRXf|x|Y9)5k*doKHJ0!d#Lk=X);iiXVq zq1+4(OscXyy@B~k5$Wlqg5FHdMF&lk9Oq;vkjvp+d9fC7IDw()PN9ed;7Px*Y|JmNc zf3Blg>%RX11XJPm(;nzwt`uTg9oyTbZq1N@fN)w?4}Q(U`-8?YL)E)vKR`D@AsRSl z3{QfF-b8Tg5bvXp#R|FndO#h&^XL1%&}Bqmk^xfyN$sX?SgW;lL}sU*XJ`+u2<|*Q zqx}QLvh{(CCb+w1r)F@(?>10Q5z8F0CZokwrw!gKU_ubn2?k>+5T>s|w%00e_C`8- zLyBtOgR;Mfs3|&Q$7YindkZn~dO~vA!>*9h1u<&P794O=-IE1|ugu>4~3ZBDzKX5`}2l?nDicU`=5)Bv<8NM>)-kxa^ zryb=(>S*<}mI0*Rfy?KOA3(USMWR_7m<14ti$^c_NTW4>1qeoe6S7WuWsW z;6=I%i6p!p9^0R$e8zzbwHG1H;JSkUF(~Zw41b3Qp@rPcp~^d5KWBwE(y3lldm6O; zdxh8kke0jHRPtDSrV*K{||ToWDh7I$Ul-*X2?BMPXCNzT5Y$ z_)}#r<4hG}>ONA@c75iZlu_hl^z02aKz52HQqZ=W&Hwyr26x-|In!r`>dvjg_+90| z6>vD>^9CfI4Y5VJnGzp6MU89rIvf#v2<;n<{D-%wZdHJ#gVJ?BFTB+?yUC98Iu$PY zcQzg2ZV-P5QLycPN%F0L(tZjpkwBLlcuUkha>d3_2@= zvcSSi7v63Rl2VZxzLjG3{d?>Ybt-0&{!39$j*jMR#iCk&yIEW|WlIn6{z`MQCA{XS z73A?XL^uP8T8O0D0~h~;*wVx9%ndVQ1@f-=yaAKm zhBeSJT|_dXb)B@dyi-^1Du6)n#!(u!QRH)TtmN=Jn>aD}bOQiHsD-O2kRz7blg$Y^ zp|JVOTeohFy&N}u+F@hlRP3>~pa>UGDyRwzJl5=Ab?zY`kLkWMxh`-TEHM%GI9%U^MX^kvo{v zgQLLeRz7$MIvhfoB9&c>ZnanC{Kg2}=3=EN*&)bB*`_BO{%MzC8k~U~_kj}w3K5wL zj)47j7z`L>sFp#n7n^-PEeKMpgXBQ-8#ge1TS&pLg=&aLp7~V5sr2UKF58`xy)i5_ z;L;24^tkb2!H#zw#}atmr

      HhnVh;+N*(y)&4Y~3{cLFALCc>LE(&5)PYA&ELzHI(!r zy34^|$lOKJZy2*0AA&+56Mq8;w$xOk&7SS*MeNpJncZvMnOYowdWdc1Y3V9Lg0@D88c~q+(HP4B8Ml#u3ka{8k z2)6zI$KQZ4p+MH(xiT97=tC(Wuq^V$qkM7@Xy||cWz>!|`RF6$S>dZkg(1(hW1GO1 zjeI#Jhz&WysVIiT9&rTchgbgRGyi?Re+BXXzxwIXOb8@OAlJ(I4e3DqzaJdMmH`LI z2f7ug8e|n@r4}04Nppv+k!B0%s&_-Og4&T#{`Xh6vu9giflxcb!2hu*$o@n20BYyf z{qL{t?`A@FX#WdVuS-bNv=7;kQmCVd6Z(G^!rw*)>yk>UJU5g;teR%p7-m>s_w{Sz2jkknSTocDa%KMS z?H$U6>}bQ!S`fF>pr+X=zU5!ioxR%8R}G=XB%VaC2V&L%FvlWETSIY^oX!yX0zQAk zOCV9KK_7Y)vYRNl85pZ&cq^AQbl}zmbzZIHL6jgQL$Y#PP~jFpOeetIjUPS_vxP<0 zH4vK7;w4mr9z5G98WD}cKw>>H9Ngh0Gmi@n)k*>ZOBrwzbCxcEI8rnZ$qpen)NN!E zZjL@dqp#G~cHe_-UC1NGkVI>@5J~~bswwb4-VMN|PX#E!uv)@HPemnAAl#r*F!|e10K!1o<22#~f3fAUSV_GSFl;|BMAgy7cPzJ=FV^0((PC zA?o~^fbq+98cm7S>}cm%?&{5j0M};$GWr1~?p44aoY%1SNEP_W*3!cnucG>)lAHnicx=;;FEzWT` z_Hvg3GQPtbu~6Ew={RMBRLmw9%lW*GWBMVBPXmoTNdWx71ZZ~elD&#t3|w!d+_fMj z3yLp<(+yzMT7J7N^=Puj%Z^4Fg=IQfG_;HJlOn7ckf2bOI-8Kg{jdo?$gM& zK|m^9B7%$|ju|i?+Xb$r@5&f&11P*nbI%bm!+;52D8C96X)0j(VZpM2m;yn}^?=1} z&;o;yXn9u8yTH4>fN9?&Kf zU~UbkF+%^d33JNDo|b?>ToKF~#sH2L4MBzKK^*wilWQPBb*_Q)+lt=?L^cbVXN_Lj z=^XAPc0PH4rQNSSfp(gIXc$YzY%gZFV$CSCv;XLZrN` zUeNIlgK6_kkl9@U@OBZ+%hiYArF(7LB^Lt+ocgUbDqQAI)=$2)p>ITu+Q62PoB`== zX7$_ZBU9u&TN_maw;FzyO-(=@MqF5)&*)5^Lm{)wSBY_k>l?DPzW@m(4@?=stHA%o zS*P9`oR%O-+kQYBp`=sTdCSc%q z!NLzeu?p)B@_*zK_Tgm78QJzc+K|H^ImF*mc=P#TB^sZ{$Z_unh`p_>w*ZS4>|Z?c z889j%@E}PrUzDtuS{X2L540^`XyU@x3tM!2qMmh9#RO5}22u71e|R$ubFtLj|0YY| z30BsfXRH0tldY{+?VzD|WghN}DCl){+}C}rc9{*Cr}4ef!T;u#GaHfFkFpiEk$AB< z?RYWkO-wL7tMtGnvH2Z8Kor{qawx{YRwZ!#!Jm!cKdG?9q@Y-XfS@1|In(wenc+TB zfSndqvbxF)XX{~#`S3toSzj@hxjq_>B@)MBF4o4`ZCyZH)6ANq1Z@VcnkdSn2vE)* z-Ci-W(SyU$NLo>waWd#019lL?}2R%|&2ESl1>S3hXQ0JY8Tr=`jRzBqZ0}me4_o z2C(Edf*^@|tbVXNhZ2W-I#lcb9lW#(>W@MfW09{sb7!P8WN!T?`Ft9E1pknmB z*2e094!qEauwdapRvLeUBq3)c0~kd%j;?@Ye*N+I{zfq+D03)z2tY|MiQFXt5iBVJ zY7^$qLy+0E2<>iAgwnI(2jE+a>pClBNsNivt*%|NPW_(!KrC)?-p$4b7|Vhm9AUSu zoQab3w=Aep8aP-_<;Y1SGf&(}O}H^1*@hKaUQI17L1x5fd%c)o1wMQyP_0cLtq3D> zRt6q)QWPN6Xs#S!Zq?tN zggh222dxj8&0jalrlL{%VYUjJ?Js?&UZbuwZ^R{HQ!iSvJ9g%YaJ$2|nH6iqN~l=n zCzvjpN#;$6Pxn1PV|WR%asim2F;hH!g%wqUARuBE{xmF?rq>p2$Vk_XI(*`y%7@D8 zU)G6*gTSty*!07%7MI4+=0k9fpucSyzX<>0d@?{;@veoUXtsE*I3bf?BZACoc zM=5wYYXTX8D=?LeJm#MZV0kWHyx8+}eO(&U5_2BFDckpA#bxT47)t(jF8z*8RZX}P zFQsJ~NTM3NRjUv4k{rs@i_44HDmJQQFJz{EUi4q6R=g}eD^Ed&MI)*J&q71ca25Mq48tUXHi&nvAuNh+ZBWXcTh>q6VT#o8e%Mr^JVC(oAZkIUsinLRHmI~xbcV{X? z!si`>6?e_(*gNF)iuJhS;22&;3g7MS&Pg{_BY@^pymTKveCJAo`i$WGhU>A&P0uV; z4mAXI#o=DGvjX_$Vmakswx(>O14#yKFVT%WUZ`aBRJJvk*3`d%HP6J-uXbW7m zwDqv(NCOt* zW~A}PgFBC*Cx^CyV`RQTG{23SI73SynwZSM3&6{+gCOqGYnpn0BsfZOgG?IfK>4$Z8`g@*!~{ zbW#R#4U2wi5|zT4U9Z0NTNj@lEc{a zclPPenjk}OA>4x07xGPPf*$5=Z>Kv~!kyfD3yhA{uBf3sp1(fR);%hO%-yYrHXrK- zmIjsXE26J{LqO=4JmK@^89&{AR%oV&0@&DmN?ZTOvIAaqgB|y&jVrI>XR!fo-Rf@# zi`F|UUIrJ%c_2A~9;C!4;<~95?r7MM9)XsX2PRq%gop74Wh!5os*LipK%*MAbf@DI z7eNN`n)aN_r^%HLG?nfk%~2Hy)^zAG!#^L1I@)fF#?B`1RHFsY@;~r7cGhe~)!)w` zF{QfLBpd3Q0+^rHOK`~0W+*YKKWuWLXh`4Csn?9;WZmbV&fetk3V8QSVvXzdwpIx` z=;Ia8EkNvUOanhqD-@5+)C%uKG7XLwePvpxM*t|D5dU_Vkrqh}*FApCt#OqbE4kglWQ4 zZ^>v6wx_TYG^sv7oa{0Skvi0YmDgZt+>xifUJX{!3qZ4= zxtnng;S7YSk?S0a=yA%7m ze^PY{2u6V-AElyQm#oolN&(YW0z-kM$9qt0*%YD=Vv1&E?CK|%s7gCEx$p$7tP<^b zm}4fn^G@^Z1ZnJ5(ViwXsN$;lwmh7Yl%tKi*BSZgkJ?>ZJq63{idM@ut>Vq%HO=9=CFAHTYT zqU(LQ2ccfuF9dyvk+#C$A*xOUp4zoJc%m%6${UX-6=Kv)9DTqnZ&32YPKj9aCbu)E zt6yYuUOPnTMn2qNT1YR}KS65DYXs zg>y|klNLmy>wl4DBG5?ascd^{PIWNPPAu>8tlG}^`#P<-zIyWO>ECt zmUT?8;DkEgPghs>y6=UR?IJ5T*AlR|9F)|Yv~0*Q9sm#{sK*oGMzjVll5w$(5S5v? z^mT;{F%5gb^!KPAB0g3TARqBc5VUnRNiIw&^2y?L4<8aqd@W?@d6dT``|BfSVT|+J3otbAILz))L*$EDhQ=*}Z}7~N2bI{QODoBxBv=6r z_si1p0L|ENmu4o__RWn9<*sm*xdpbVtmsd9Cz&+0{_lRic2pidFt4&nh`UY4t=TvV6sM^*t~OsHkjSrwa;zUQ5%x6$4{ z;hSzdzG(i_kafq+%;vSm3e`)ZF`d-pUCD8hTQ{^nR#?svAVJBXAdJCx26P-xy723ZPYvudwryG7R=rvZ~C z_<>!!{PGa0!5x>}20M@kpbI4diKar!58c2ZHd$lxzBTB;WI%IULZwJFquPvRP!;QE z@gS94)8@G{h}pPm4e&$4S;U=F5YzQ~Ze651?Iq4^xA#i=Y{E@;b$WOTZVvi!NhwrTS0SCf-;5N3NbyyRH@?H{p z2opo=GADrm^vx@Max1{8YEt6~m`-M|H4G@rzr=Q=?VG1Pa_&(}2RSV>$#t9&Jgpq! zmJ5Y%D&cvfovu>s11wMVxBC|bXOphQap^l;Xr)*$tSs z0cj0iIwUytc+Jb4GIa%=Y&KZtINm89a9D}B!A)O|OMXRSAfs}i$f~28k)7o#BE`;a z{C;mms!zUdB~>u4@lXoBl#NU%#d0$ahQ9c`k5b+N+S)h%LgU2z~gn@9QuIG z&5f19$V;56Uj2D!WFrmotQ`x)mC631E;ZUwi;wS%AB))xG&q zpXa=MCluID@)n)=x`>{9IU+lyK4~8|TYvZcT}7m-*((cr*0kZ zsaSMfQw2wM0w7qo5cm0hA#Gv0d{&2^Iv zx@=q&4f(#y`eagJvvtZC3ibOi3)=K#0D0^&?&!0@Iq#XhoyQ8w z^pj8XwP%AV`0JBoxjlHW-H(<3))jgOV@{n%a+cCEv|f6kFS#^C6m^caR!Tmo>z zciuC#*~`ejk;7{)Xb`VYuqT#S>zQ0e&^>)%)v+!<0#|S~TAVcKKQ5a~T6Xog0$=Dg zcFH+jj#hB3N{0S1?!c|BY1N=}DfPoJDS7<#8J?8Ky=Kv_37VKLipZh*_J{cX%(Fvovj%vjvF%u%ctlf+>vbA_dumXT$uEDyu2<<|Bzj7?uSIp5LGwgnLi>~vt zOcIPgz8{&~k+vy0oPp{3B!{uuNLk@^t2WlA-ka0Gxn8T}T$04VsO6x@K;pqZdm)yF zBCjnsMlqW=cVu{ri5F+ng*N916XzHuoN@%HykdOb^rKqvot*CcCw~12830g|xJuw! zZ`*kT=E&5w=opJ;6YE&ckA}G%CnI`}4LA+=_u(v4fY-=`&G`dK-+0O{l=zc2S>mxO zfObFR{v#Vje|%v#4)4Lu(qG2j_a>7`lc&|LC%IfXj>mC>V~@{?_*UGI zn8JzU;Yct%H=C)NE2iW$0HwSMq!*D>3c_q$kAU9IAV9Hs(!ILNUSiAF9Xx;QWcLj- z%Fk+n6LX51A1UkMT7MDc)6sJ3q)lJ%=|Hvf^i{&BMIbktGDVDiHM`&N z{DQ|^m1^`R73NgD7y7ox4!rwP4o^WoHs|PNV96#%)^EPFRYi0!0f3u%`5U%7)0mET zu|l*84|t_LLcsKG$315;&^fr{jejjQ;U-$C7^*V4iPwX)@lK+H(8&-4q7za-uPLxe zQi#Wa#V1UnC%?Xvh0n%+{uQn+ZP_i`$#_s!YXQHQB7}SLI3;0Am%9=gwwJ9WT}lul z_vF#enjf(|^_E(XXPkv6xgaZYG=go>C*D5DG$zVxdugoDU zU^3keIfv6(oW+Jc3I!BYqlGZZhf$-3edKq=anns22=Ebexx#i>dU5hs#)@|p`C5Ei z4#_?Gf%iCtP9mX_g@t8i*#nJI-k@C&q%Rd_=>DKgD{8qq<{IP??}uS}XBh)nop4H^ z(p8=x2TVl(h7HFN#1hnu@(iV_*(Hni+?4rPWN|TJv!NL`T9R9x%aRXQW$`dtxGW;m zNr4_L-K&6O@_{w$#k4$w-{Z7qUU1*<&s0yo)hbop4kg!=uqE+JrfahD{_`Wa+b;%1 zNQd9vCO)}n)g@>%Gpr;h zCY7$+4yJE>fA<5=8=krH8cF1aPcaX`#Zj?L(hJgYS(m~$3++Z2BcW=2v3abbeRl7~ z4m+``xTT~~x2t~D9CG#QJ`@@UO#;TZzPy?dePMFC>UqGWt;=*Hb;?~j>UaLlCJ1R3 zpzuDAvn%K21ur0m@pp6edsn>^@R7Rs4-Yz?u$R3_ zVzOfcE{!G)02ZaD+spZrfq-FG22OT>K6W|~u2JaFiCk>B&Im6%$EPt`a-7#bbSzZ!a0F&wbHMxWB$`v;A>k(NwV9Kcj+t$G*Ok7TR$7yR@+a_fz! zm;9&Z4}@jn+&(6a^{E8=FCX@jVBHp`#$7CW7OcYX^y7WC!|Iwr%7FweRFIUta{tgPlMZ!eC2)(i#`i3`392)Mj2uI=PMu_nn0kn zql(-oLQPu3j4+zj^9~sKUO8Mj1gS`8)RM`j0Uw5pbtKpA87M#$E6{hvP z#78>XAunj!hT^|L#NFxooN>`tS} z1_IDT_uB(}SGNo9VmYi2_~aavek*l%&;aTKX5CxD$;zf;vy-L*rkD4g=lW!Z{yaW% zLlaeq|6zUhpOURwn^5@|udf4TvU|-v+vhHz+4#ZA?miA_j8~F{Y`6c^7ic>uyXYyp zopd@TYMnMtbS*#7a0zJLPhQX_c$vT|ZwfULF07H08|{%Od|>MDVfxC};{;IZwQ^rh6)3@RTfI?H(s&LI7vc0f*~pSHVQHFK$$@;w512psumQyI`1l zWyNT^F^p`>(|408(z-*qRpy`LgpV5rkrg+b_~{XJ%2Gb@xDJ`(EE{4`4r=YjPlV9A!Z$@JHdWn8>xJ2RK?;bfuh62&eg_(OU+K&h<$o+o*_ssy#@9OFU0Q-d4p8Xg4 zlt#oEdOp-1#*inBb6*c4xbsKiLdLPPQeb~WWQQ^P$~PFhpT;GnOh2+dQekg=#cj%_ zI3xHEj0Ol=?6oWCu`)b+U`(AO=KnqZ&u1Wf~?1s4i5E0j0bzLZJpZcnZyN`;WnF2wpgqJvL|A?-RF6b@8;(>%CZUzpQTz~ z3n^6QylC~IRK7#7@7P@MS!J`c9pkXt-d>saiM`9RHUU&Zk<~}KFTBiGK?X?p1mnfC zFP*L0@tn|>WH?y9SR9Nsry&O3?up8pe1k};(HnL9lS^0diU=Pptwta5x2h| z&A|2P4naBQpT_<>GX^}U0~5&E{QNI4{9ePSlq;BT;oD@nVV%b}<<*xB9SWSj_XaV{ z#@|{@$KC5>+>{wdE0m<4>BCZ|iIQ@<_6~@&Ujw%IC2Su^M%iI_`sqifUtn{%3D?07 zwWQe{3OtQgzoD@0I^a(8ruy5J}l>2C9+cJdtr!gCEss3}`6l9+7A~(l`-!X%d_pA+^sE7Cmv>k-I#X zJ;juhd}$GV((&`n`7?#Q<;y(pukGGnTlS@Y1}!iDx|MABS`c0p5z4TcSECecY}Lh6 zAVw;x^SW9w+t?9Xark58F2IR;#z}=8SL$)HQj8FA{lrVY8d*m=+?&ABwVu$X2&do*nNs?hH%odK~+GtZ-~S;gxHrcb^^u64Vv<`fqJa*6vrv42TQ%QHKohT7T-+3-!O?mQ5~xDZN_eAR z5@JWLtnVjJ4XNSGQ6ZIL)9<01Od;7f8;s|(@kHF}Z{uP+!DyEX*&E=Mw=Ksg3K?)P zV{duraHH=q$!qI`48;ITxpS|zfp3#2j-%+5!SL|VZk#I$a2Qqpr5nf$3$7e9sk4h^ zymu`!gaY?9hhi>~y!yF34Bns*Ap_rOuUoq<|L_XvHgSqEK+>47Tm`7l5}yYDx@>ENyHYV{oCp^lrLoh-q62&J3q{5d(3R+epweH` z)_9iG_DUB7Ogq9}KE$r;UP#d0jUx4SO!<*bzk4c zU3vkg)bTi?n?Pdb!&k=NLFt^=diVYH$n!B^Qc$X6mms8iV>zUXc`P{~iWTVl;38wm z_weJhp=&Y(xD4T-_))uX!zV?sH!+e_3U+%Biz9KxB`C$ZyV$^&onP_(P0xV`lmL@C!_i6KuvBfBC-kbR?8FC3v5Ye@Uv52PP0{u2njrb9wc&Io^=Jovvh#{W5xU&T{WyK)y90df_ zKcs&Y#?r|qY}?5JkOK3U%cgFr&6imJ1|zY>PHzO${xRz6gM3vXo zC^+`G(S9TxIV@-M%<9&zu(Qh^ORBi>6@~xl1QDQ&_NX&RW7EZ+B^)RszX43(IGiOs z+2CZz{p)3$0j)uq^*Ia!4H{obTSSIBzm%H!e1lCpXJ-GnKqaahl>`UC|2dd z)hhOh2x(8mOQ@IQ7Y~IQv&sz1lz2=RTE#@j7(_nNq~)d(zP8p1SZTf8ipW}lK1g3@ z0Rs$d{KC=~(2Wn>VS^Nt?I~^q;equbN8-qUMmM%&4vd6AQ%8p`P*@G`fmn(q@g|k^ zK^xCSD2+r z;EIQ*gvAjl!2dQ)*=H@^L3Xx+geV6cMZ<~EKPM8n4P44YOQkM6rU=3Zq5vWj zKf-G`$yQvsQ*GIvu1ML(Y_)y{mKG;~kcO@+6u%t*R}!F{lL-?EzraID*qV#q*@O|m zex|QP+z!ophQN&Fp!p(xDuEHq2`+jz?tNPMOKE>^K%L%=rfi&q{#x{a_~%KWF4f?l zXaj}q*&#BR|8;n$ZPpgyNB9A#A?Uj>P$saghl`p3K2y&|mpy`-Dy5&byf@%zLn&GA zFlH~2Bw!_!pbW$)4d7a0H4!GYbw&y&5qVYel*5MBIyR%u9j4v{;JN4CUnG1$__HJR znFFwtZm3D9JUv`108+=I+`CInV)Z66-ELmlLR0h*002EOL%l&!Pcn6BZV22*#k3}Rc14fW4B*VrjhC=rytKSmPh+ga{&p7N zhL(S9)E+4x?=*>PA+sqbCsc_AD+8u2W#NmzFCzD!_wd5Xsmi-qfKoLy@XSD9C`NgT z>b`cp@V}0W6SLkA-x$As?18@<*Mn@8HH7v6eL~0)QU)t)=(=H_j8$*V-^8)|5(0!n z&mmg$4O%cv3dwVa#j0-sVt>b>3$n~*1~s(5Bb2XCTw$CSbn;+py6W}eM|i5Sx;C6d z5cR9H>oUmDH^-I zEO+0UXgbFal~-4Qq)o11;a?DC8vPcf-}L(ZlT9`@x>Bqq~xnP`gu z@bqYSp!|`1_z05c4SS?}g>h9n6&>1-@Y_<7Px+g;;cP44C$O&4ml*{j^wT~MLzm0L z!M;Mb9+Tq}VBtLM=}1Dcs$lox7q}<9pO9f~!clUuXyFUvmL5=!>9uUexf~X^Y~5x_ z<0&XcqVLi3`R~g72`9iB7AS7&jNB)SNl{Df0lbSpbpri$>KzQunq=6jB%VHeJ;($c ztuMAB{PHTpM$M(LyBcrq2#1D_Ah*N9hRT9MXkYGmjo9Ly12d1y;M^gfsxUJtz42{Vp-!H|Q{E zl4hb>(&!8vFg%m&HP{saFbH{;HW66gCOf;k^o2?Ihuzvh>Xv)*&(^sv^J~s}8GQQP zqCBjzQ^B;tNKA;8v>F=sgYk5mhAzW`K-2XHSe&>9iRXHM)1h@9PqI1N%A8gz4*C% z|3T5aBW!}^DrkH-dCC+M-+~sf;xtXM8CkxDze~Z5c^NSv|m~I>~=nM``8;0zNs`UCCB8FXl^|t&lQt&je0;s zdMmYb@%T#Lq5Ycv?KomBs&C!>sxF}98Q(dFBno~O&ILXsVso+W{r99cGn=>#VyT}b z^g?To-jenpH$!5d-2a8gBDf~hUl-h7CZ_7dnPUu|=o~NWqQ?Y0iHJ)p(cy*S;$pt? z_e;-yX)oTpMPXcSMa7z$2VyQt7^-bUXhrI(II~7Hd9ZkxJ=~nD@5r^Kpu`}6c&AtO ziaKTD$s!0CA2p)dIyLzh-q#jllKM1-_*pGz=<@T=dOy5f+?x&)L@~gR;Kqg`xVu7@y83Zo4E=1;uH33 z80h@$Pszo(#*aJtFjsEe}v432#MP*$C=L-}DATTVdq`p2zk-(W{2np; zI#!HAEb61I!O-SZ#0Q8JqUpg3dRF{bQr1K(q_t`z%`ure9$(LaR3KA0N~!%WkC#-y z5g(=yU9(f<$485a=i5XVf52{%o-OH)ItqN6Q6LOU#>Q%#NW60`GDb;^bK?W{)sMid z&TWQeh6zV)uxn8rQaz2bM(ksBpc6r~sg55f^|~SoT%v;j`uJ|tXEsYpLW=ZFHM@X8 zS9{`?MP&Men7$*Q47y?QO1{pOZi__uzuJ|q`^z02GZ$=8M6~Lk!;anuxXtExwCA7X zFl;|yH)17DbGWCXNi6P!Jc!5`*Rl(3wKch&rQga~r8keWD*%(lv$#gM6%o;>lSQ`% zm(f?d8B(B%zzZYWgXP?ZX0NgQN~5VeU=sH6PpxhLr_wBf^pH(Bgizmp>JTn<`F@(w zunS#2FUh+F1ui^yrM#B z^f3g^W;|a1HxE)ZP-QV-s?mk@8qYcT*aUiqt*E}q+T5i{WY^krTaVZy>fDfq+v>U;%N2 zMQbeC3g;_!EhT^r{pRFpCvG@)NZ7R3Jg8|oPwYOx>9?$e)^Tg+B6O>V>!7A4r78pT zD}{i8EJF#)SlmW$moYN-76Ga4WXT4UCK^8fWHd0Aqy*Z+SDtT4sh?Jwz5l`=h{JOz zT%I@9iI^cr5xAkfDr?R_O4G~QhfA$mxABq2VA#s&O`FDj)r@2&NgZK@hJBSGA=C|f z3zT_Fq1?@e4(ah_P6}%3hP@PlRAKA#pqpX51ns1s24^1k|@EiQiy)h*Sgy7az#lR zQPhm`x|7fPh1%8vDKs@*q^2lp@Sj6^JJy92V%W5YcJ~ondTg%pyn*kSl1|Vh-_i;p zXN48Q)VL6ZEoQ)1LcmkrKv{-quA>@@ZHVz=;(-bPSkjn4(tw+8(7`&ZsVP>nK%PeW zdH-0kpLsv+yiime(AQ&ehN=kcn|I2eUVUx-G1PnuHdv#tOq{=J>$s(*AMAW%mDV?e zBl7qnKtP?n;fd-JV+ z+Bm_*-30urh8}ENaR>Dy0qmo7a3R^+Crv|OQ!j;yjm7+UnHTP|1NOD=M&7V3u9BJP zFT>fhIMt;7K3+!8Udy2$m*D6|4Y6hu0buRW#R5Um#H zU||wpLt^3lVd3W-SxE@t*LpgF={w}#)Li=*^Is;v2E_pf*MqT85wCOt7}4~*K9TMf zA^18KN5j&o&_^VErd4>Si*PJG3}T_aW%8f|k2?DB7-B`@t2RA;T_5{%!(;qY)Iih- za$m{X*Nu@7VVrezN=K@7yv1AaFf5pqbE8tEd{C9OdD+%$u+gn(f0t}vX(b?M<*7|W zxXP9s+1P^`Pxo#t(tT|aK zr!#!pDHw|T0Z`bY(LV(qUtm6U2)m_p{RHA8Mu8s5m@!YS2u!uqp_jWW#BxnDSCeKU zm#?Y9KNQZfz?2t;fnV6&dH)<6WGEt^N#9_!^+PH}1enxWxDfPg1O}R(lT-{e%rWV$ z-u@?PK*;#EgpOyr!D*f{hWCvYoR-aT{zIr0YS$;P zNK~c9F(&1P2wpAw$+sMRHYIX{Ibjxd^EmmQt98|m9i@NwkNT!jD=m|?ws(%(y`Ej| z;`@f~oyH0sK!*t0Nu^vS4in_!7ZzA@(WMWZI@cGl*T-cT$h;<#=t&rUtD;I^I_q1; zJ66u!R3YMKcl7&Ub&6yQ#!MQsoH4b}-wLixyvt9D_6yk*>bN=nVju{x2c9NCSdh6P z<`ajkag3UKSrGFyVem{<+{IAZDvY;%qYSKC&wAo`&dNtvseKuu5sZtri*R{IMan7byH)&;sbYztZ%iM-XeAmOp1So_3ZX_53#PD&HYW@`if3Si&w5Eh%MDJl7rxas>! zslkXvMVRg`$@HQ#Ba`TD`dwaH| zeaHw>jpT=O3J*&t0lJck;-3XXQeuq1Qm?P32@&d~o|^qt$ek&(s3IFyb=yAbjB|Hu zRm2qslv{-;_vO4^{4k-FxgZ%bp^>ur6hOqXW$NljKJLBZ$1ttE+gD;Ezo6Wm#S3Ll z_%oLIUU6FNh-*!bB*FzhDD$YZF@GdI#3&2dkuBy__;^)B^&8wMxx9~?e-(4nxV5#g zzP1WH{1lfn;G|L47B*=G_^fjS!M{te>KSG_44wJ`N#cgf+VpwKHf9F^Zm6vVPtI1m zL{U=Hwp|L1@fEv!w)ViEZ~ZfqxK!~879ATaX!?2QbHi6^KI;g7UJ#_Qa!(c0%ouuH z#Siy-M9jlRc?_)}T8DWuzHyjzN#EyMahwFQXH0x2LhqUQ3)8JX;2l31OQMX4iscEB zSOWCEF|p|8o=_H;-Z-atA+i^O$2<<%+Ns;VRjxHiBE_}z$Lz~+Tzkn5z@i)x5`H*yu{ zg>{2*y8nQ{WL;aaKFOwCQ|khQ>Sm3qP_fd^}}jZO2p{H)C(VTS@S49%qd{^ zxOhv~QnNScwvWB|RE62ymXsA4hfkouev^5qX-yQQHXwHMbiq4$wi`*>(X*qSF*DTA=ukm3^ZZnZ4j?`9-!dGx!m?zS_tGMJDOOW>rQQ*uDS&Wq*3z91b z^=vVxfbM#Vje77)xM19OZNyQjfa}iIHsWv3@9{b)nYeCH9MgJrE1{(%d2Cu@Os7@& z2To(+f3k(UZ6%*>%ad2qWOj%f8#!;Q6ae($lHj7dWLU_)^J*?}(NBk>t{gF~y4ONd`# zQ|pKCKV28P-w@=U^@z{608e1D(Wx7Au=nXrXL5AQ1A78T3}vyh7D9(+-s1V;_-$SZ z@uG3=L;glwIqXfNwMgDbqRNC696iD3_WxTzM;l{X7ZODqQk6+#KhiS2a!AK*KgCcE zeQfc*Jyv@eHJ3~I*f>HXr@{9ld5}}!l1~#8a6NN~_j zVZ}9n13!cCQ)=gAMtLoJv95H`S%d!+wsG~UFR4gMRHXHA)XB$Wbr*Sx#Htb4z{#=r zYhV#kv{cUApP44x8-E)=%gEZGKNC=HYfeV0#iN0PGd7i#l2%>NDbHDD5&DhywCfYK ztqzj^+JuoccMU5lr~otHkDVZm8-3-D5{xv!YLp&Cv0FWpJACD}bt*wvl->_zGZ!`D zAMVMl@^Zv4Y!UJ`HfBw{T9ezMoeiLIEkFMj9B0&OQc$M-JmKCZvj2$hKI%=~)jEv` zYB{d8(aINIJ`x5u38YBCMfW-yJ@GS6^^EAnX zAFX+@6_;WZO?504OOM#eBeF{Z*zsBy2u{6uj$rYqB4-mf)BVN`bYoXHa^COo&>Cpk zMKt+k8LPXcj~#0Lq3EP?gc-EYJH=0T$z51!i+sk>5m(yLTe}FKWc+hW5x}ooABQww ze~#X*h&j$>8{MRJdaq%-^Wxtcm94&26bG&M-}kD#Kj+apAKDcP!{59q*DDB{eL>f* z5nNoTx+puRCQP)&n0S4RTqGgIGSd3P?Gm!v=eqkJ6J;@7z!ezSSFGXwIMjacoHXP= zW+g>Y;~%fv@mZ;ZQ;7ANZrIU!_5&lCGwxM*wVKRasiUV{t&s$sowP?f?f}uexg`>}M4_rGD)DM_QdFTxoTXB>gpj#6+I$Y+j zp0V#Ko~M*kiB_ftt-c7-A-G60dt)H12fqyOng&m}(U)+^f0gXLzIV(@TDhs7VhVt92h3U#nzo%5?+Bmx zikS0;Q#^VJy62_!VXkDk!X2D?WxSjc!}uMw=sprlh&PaIBxk9%~ zk4?^i5et{98}REAyL=ve=@UNn7>#P&c8Eps%o9wi_p(}Hlp^ba#SSlU68Iaa?M$5} zP^~!qu|atOTl}?=!c6FUqkIN=tR~V4w4c15^A~S)|KgLc={DG_)(Tl_Nc6thPnmNO zz0th8Tvi@X^ZJ+f)u^nl-ylz>nJ^g=uexUY_(fV;IppkXV!dffR6&NM>U>_@x)XJP zhg&u%k@+}3glxgxPV;tE%^82uFxf`O=ZnQmB0Uj9^O|Gbr1`zCcXlq>tn_qsgOd}s z+}lt)sR28+{UXnPfI>oPk!?oYR>#iNz4FR1ph1U35QslW zIjtts{6hc_6|STDaA-X@W>}@m=ntdUx2@YwF272G0r0gE=s^NYC1QkYfYYKj#_={? zLQ?k7(8asHO$VQ2F~Y^c(#(N~QroFHTiM61=9lPtT;IJOtWP<@Q4TY-r>PzZx1}UF zcHOJDsLjq9RC&p@-bUY@$8j2u_fN~ujC6fz0OD%PsEV;v&bxbgV?&52VNNgPu2TT} zgV*)a=1?oPB^`k?US zExbG8(yXuG8RH2){Am7we_JM%n8Qfyhwt#4r8Hq7Ky~!z(SWaiGCtinULVu_8UtXG zf&Ki)W#4km(kI7_iCp+-(M;wY4iW1{&A_X-I*dNZdax%p{9Vee)SD<7OjBJYOuxYD4 z=-jV#GR9!w@r$vzDDX^L%t>B%v5#AxymJ{aN}Sd}O08N`hwGGo?2J>SZR;y&Fd?jk zWB`cs-585|L^7IqA;X|)?pxF5R>}C=^;+5BwT>qN zuNp$y9CW}aBC4g2h8EPbg8sD0GE4rqdu*@*Js9Z{iwSU~ z{+QPoV0;Ff?MM#>(TD2;9C@Yb_=utR`(VURmck9z|T6&!=#TH@F%@cE-M# z^5MVYxpmVrMqdZJ0_XcFUUy3*LsP!13hnl~cootKFu|OH*bjZh{j1VeFSJXj@Kz&b z!RFK#EOi0i?K(w-lsl-i20c1IIYyZ)IOskvwQZFSCEQG3cKzDWYlCB}fy#Sdnjn+X zr8qSmEuGe^T z#Us5jgxm}M!)1t4&==8eS%vM`_(Y}V^Ex%t@qU{rp3{d|w9cXIO_P;iU$Q>dx+Lm| zKl*MDUICUZy*Qb2{P5|kFRp(=lB(R4XR0kJ%oyrJ532r-EWXO#_!$fw!&dM{a{gt~ zWRsT}j8_XwGs2}@)m31Orp}qSYr`LZ&>kF9UysKq(Q_qH;=3iWuPIgkoSKL1;C9#( zb?f!6K%_|uy#-|T(&B7GB7^b!f?zGRmJ;1uxy^=8)GeXHCQ6}!EpCv-W%IvAmZEioKS*It@S5L0lxPs zlq#x9L(h64+N|^tjk)$Dcq8lsI)9r;?QyT0Jl=iJ^7ZYJ%X4HOURv4js>Qc$<{$ip zF^&o6*HAkUX~Wv4W&>$uhUH`>(aa>PXanN~75^VCmh=W?rTs|kE5~7}NhnB6bq>r> zeyu9IRPT^H``TKW;L2S)?PV}M7`poQHSX<8F3*aZaioy#r_Ni`72Nzzyw`%*wqa|` z#a;!&bkys_w{;^Z3$F6jo}9s+819jh#0DrL&-ee zZbM>6j?^f6TelT;b+<{<^`9q0{bfnU{&7{%Yd4 zU`b~kjiJ^etq>T4z;~ms9`6O07jbPyH4>YG$~#LNt-?ryCEf0r4SuUmt7mRp6YHqf z>PS$kv%I{5V?W1Jn=Fyi=lg;!Eh8Z9FGg}_*^m2F4X+iR?Fu@DTEp?D>1YY1wO}<) z`wp3ZhaVNKc%!+C#Rs64;1XyeV6_nHGIuN!CA8PLPmQaWY!XaAGj$80Am%cy6~wYi zGWmJziY+Prs|A9-S2k9T8xgx#a?0nJ*Iz)(tN){O!22U?M>DL&WYtGKw6Q5WPTuN5dpHj-k z#D&SFkhgCDNbo&lmXlj}>kp%?wcJmzt{p+@aQW$d+?#(vz7CI1k(X7Rpdt0h#F`>A zB=W9slaA@#d39?F`dmq_!HHOMKQ5Vi^mKG>V_s1YGmHCVT-(M%7XhHEdh;yKEKXg; zZ=aFj+|_b!EtDM0d+}X;G3wdHp?GnY6{0?@;c?xKYSTu|Is9Mr*!Ets!`7;-+o88U zRPtk4i>OVvNr6%}+g@^Hi_tFA!{Xp+^ZN_&td(YW0)&>tV3z_<-&*IgcpUR)*m2S=N1p)@hgUj0*k5NAe)J)D#q1yyw6oeIU&Q|{?f185AMR> zX$`j82ELAJzxD6)lXsriAE!A8BP%9XyFluGW<*k6-Kyg170}ci@Z*i#Ap;db`Meh` zDK$BS4xyU#-LUSeh2m4;Ry3ndqBw9z*;4G8v?9X^#4DOJXm^&Sm!B6@Xq<@s8)%@# zZD@Spx$^=D{)#>VG4OUC#EBw1kd~?EEkaU`2bLoZ{Sm^nwwfuedjF;ey77_G;UV}f zEWW?qpqud1nY-iETW%UW)|_An3YLy8GF!T0>u&clZI^J>XT~b0;)}iDD!>B|5`aUN zy7Bqb^PA?)dLOnJ;9lX`>M<4yTYIBFH0MSg&==qLhbWGzbHO!@5iFl>imIR4UU=dT z$o~jQJRi!+xR@CE1lRkA3gbyuql@_oZy2g`OraYmY>Gs)-egv^Aw3gAvCwSeCW8{O z_qf!hzmsbnx@cP$`}f&N;?H2ny7zZ+Lam1h$4ds91PMkqwM#aD{un?xAWTA^acb@N~Wia z-R!s*uj3Z5>?vnZo8v0^cBR+}dE`(P_SL~R%--W9%&f25*VxEE~9sfvy-0z88zcnL=I&V|OuM4S= z{AzBTm>NblCI7O6YaP5_u!k6mquZ=_aBZ}hjy|mVHxPH%BD)A4@viMB;PGyBC_ujK zmIUstd@=n1cJ=R|%QaXIH;M(Hf%kRoVtTQ6DXq)7GBj?4p%qj|`bpYwd6MzC{Go;T66TH4|-sn&iInmW@LkKTvh%QVnnXYPJ^}3>RT}z4^NtiCTrmw2*fwz zB8uu2Ve3c8=9(e=R9E%)VLI^g6DUW>@9kRSMGdBQePMa-Gp_tW2K^ADPJ=Vnkh_S9 zE9sY6Ow!i|-=>L&kEDPiX=EuhylKd*Y_hwdu`6@m5Rvv*U7Pcg3@5~TIH`$3(phfk za{6xlDtYnbJ7@+wvvfPVF}dWe&0FRbP=1k{pb>;v=I@Z+o46B=F1m|DnCpTLksPytI`;+V>)sScAPe5zclgQzWRBO? z-NDPg%S$lk+T4`P|Ac+zsYei&Ikox>_q?&IgN|>nROm;N@jfSBwYL=PlMvOq*cuB@ zI8*RJ;g<_1GK&V=McT&4o$u6lnG4OGB0b-kkL2QOt$n85p?@;Rd-=%Y3eQ=1iW{!2 z{2!La3u-xUpYS*z4{h&6TR~H)n-+7&sjDpoQw6zSl%f-yes2^dPcA<&(Lg=;!srTs|0zhj1&b_s0Cy za!p;gvz)xlxwvP<_s5T-sCOg8*G5#|6tq0;Jvle0zx)sDDsR8=9~29z<{~($HM3Q8 zJ#*GxcE9r}4z)$phfC~b%aByGA`7<2AgX~h$jFBAi^5;z?#LOr9Vd=5pMhMz4Sv`>_A-xxRma%VvR9+LiZ-6ol0ngh^A&cv`LNRL! zFKd(;3R0-bKq`bfwfE_xDE@CkSApS4P2w42^hX~#fz*#%FB}B0yA!!bkj3Qj=Y7-7 zVvTxN2n~|hr()zBHe8zNMo-|%+hC84ZhP;0gyQ_>c)h5rE`eA=mkxX zt424m!Z(<*9r_y6PRd-?Z!NyyI}Y*Z@MuO&D*b1nw#jZ0%D^*0Y4o%?WwqtIPMO(Y z)(ePBsBptmMoa4!p|A}oohFWy)lb*`dW(7a%47PZ(KB7Fsxx2X=wGR0U$|99%#^&( z2;;~19;ROnu}-)`S_te{#V%vl5wN{R#tr_&VHTFpFMLb4ah%-cl>;C42AytPO*(%( zwEsP_GgQ#jgp|F*0nDCY;(LS{6jG?aP!4wpz7o}67<$68{6JUzNxyHSHqkhjGfsU< zBfCo43|>NFquZDH&M~nS84c!b$A@KI^j51}8rKgz+YF#Zahqw6D&UkZx?iN}Gbe+NmZcRqG6wZtSg9oc=b@F~ujT2AtY zL)2pPhWXc#esnVIQ2_V27jf|My^dR&K3*MH&$o|nYZx~DZw{t>UisfE z;alhJ+R430dk%j7LHSE>+T4#)M0-iE4<9F`B9M{B=gnoP;5ot zfMiepD$;QwNQ=7OLxeR6D*bvt-?^LNF47;X(@fF*_5c47uP;qnd88=W?Z{lcM2q!1;mx|bugX6#ED{#6* zD=C$=ME)}YjwL6FUP%@xGAJl}(s|T_xWm+N=A0PFvOHxZyt@-hgGkOfxn+_fjQ@Ml zVxHt*qGxTJAP*r{&)>GDTZjl5Zvt{$c0#fX>Icg--+jJW0ud@pq4Wwl{>MJ5JBcoq z5cHPWUc!ZzWB{ev%2D`Ulhxs)aKBDb8{v5C&tFyI+u^qSqQelYxP`2w`V|J~Mvmr7 zq1}7^w{^>^!FXpK0MSuznrw{-w_>dGYrvQPs3yd57)JVJjCVtImy6NxoV7ADbh8g> z+-p&#PS|*274Xee8AwA^%N|ZW7Ig#P_dQ8;px6~eSSPH5jPw*#f9Mf1%fCAat5F!t zf)&LcGqOLx)1v2f#2hH!6Cb$4Z<|uL*T*fvBZ%*;-OcXjhC}#4aTM2C3qm?9=I-Fz zIz~Fs8SCNG6f;cQ$4Ku!Wyw(#hT)C7_XQ0~fakwXpW7^!GlO>tfBc2a2}LG}EG}7! zp5NV|Va|AQ9kt75i=EZke95WJY+Syb@jz)*{_Ib1IPEr}Uza?+z+5;*4UXtl{QOFH zo=C+6-dum>2W|=fG{EGOen4t`+QusSWeWY4dIvk2-jB*cM%_nz@L$l=w@GiCpZyUD z0ywO7^*aV?a1yP%Od2Lbo#*ESK?`>%Sz+_zbHQTI%nNF2Av6&Xj+>!ZeEHOPnT&i~ z3PlU{f8&#=+St~F#8XL!>gIgsZi5A-_8oKvI!>#j*!}JEK5rgqr^{*o?U^;{BQ;E@ z2~N62x`kssK6@AGv~T)TT^GHf`}e(HMpkL}3YCBoCKC5ntaarGXeAanziAg}%+yTh*o?6Jp2`iKxquz5a=IEu7JE@0f z>d@m?R?eY{SJ%!;a=Y!b1j^6d$Tq3_6X{c@IQIU-XKJ~w6?Z=x)LDpl2!NW}yhV0U zY~@iK!4PG%>-kXn44JiY(nXlYp7Uynn2tZ|5sK#qXGQG`N*vqvGX1I=5mJ4>J%dle z2~$e#a+BmLyFktDRo}T>!g*3%_>M4kEKwQ8uA?z>40iHJR8_vu6%=*x z8mXs@4gfV*$6dcKy?gIPoAwgI%Js)RM6`;-6zPh~FTjhk0kD)of;H}cE4LCD15Eg1 z#lAwj6V*N;l5ke~C=xO8jOs}G!KB~rOf}n7mD&ArlDF-=?8jWWN>V>SxxZ18XvayC z)t6Nbm=6i297Z@<*j|m%yE0$(2M*#B2hq`|IA7@#1sPRWJCU#vgtfm-EVw z_aik9wvYh%t%jWYU~CF|6BLD8IEYq-nEvTv@E<7)~Y#)0*i%O*=zmlg-Vb zlp(UT<(PM{rr%RN3-{558X0~39MT@C@O%PuGlXh%LUHU#D_tpFq|ZY5PX>b*-~8_+ zAPTfTJjLB}i3yI3eJP^iYR8GKx?j}3(n3Y|rLu13Hkq(%6dt+iG1maraZ1Ba8s+6c z5EsZnTE7glLa@08eOz`I3`Eps;lq);C&DAOc`>L+i9p;w{<6S0nL*1}4~cr*mRo4Y zy1stG=b?fIp6Em7a){aW|KXBE+El7Vs+=fa0k0KnOl4o?$I?ZTwgQb>O_i)-c&V}#@(63U-@ zApZvzjg3_>Mx!WgNmFzx)w?+2k`!nmQk|7VLq$b3DQ1kk5td0c7E;6TgUwNZP^|OX z+Y>qE<{6s|jy%ICjXYNQfU=nph(Z4}P2s_<^!JYApRJ1O$yE!;KbYOZz7sun&Rd&< z`HK`OUc25@Cu8+O|V`_PU70S6D77R}$wXdF&;jLWx8V zkWu)Sb)yY#|E)V5t--pvjR~j@5K0jt6pvr)Nd1R&Z28K-qx=fIh5?C6d`piX(cLgz z^Z5X}b=qn>tlZFX(!1Hv@?=~JM3c@gUPU4ooOB0=9;R4~lr|NliV!3DRml-CFe{F$@!Yj2iqBt3LW@|hhw$;WLZpNB5k0_)k>i$ zR(EJrmbM4+>X&C3`J7js(K$2?ddGaLKEO<|9=$e-|SRgGl zsF#%?gwes@?^*&C6GEUK8Y>r1sG88s98n`R39+%U1u)-SFKIn7S1R+2$cP*q9K=At za(JO$P-&{`ZNU+aYpnNx46y}b65Z_8-rw^@Kgoe^3#46P* z``O6L7@IaK3vzv-V4Re(cSwFqmOdmYH7kIecYykM3yz-&0OZTT*}T=>0#$FC#8ns) zrJN}$GG9 zzdwXg?n?s(iYqWyAsJ7JC<9;#{bNve9QPGRX`AR)%^5-v^fN0Mb_eR~;Z$BbJC&@^(_yVxJGU3fv zoWh9j>b6Mjt~v9`(KzfOp-40Nzb~p~3%^xwd24AA-&Q55*HpudI75+ge@t7_O-@ zEKk79af4xV5UGYq>ClfmoGRa@L45#-blmY1R+@A#PmWtK9>0qBq_kZNJUNEy9APE z;;k@ReuHZxzC6%LeW6>X{j<}_2`mZHin|)^O`KC8 z!K!u0QdC&aJib4Au59#mK`dPRmO50etn6dvn+4)_)g; z5a6=GT4Ze7mp~cV_U*S;zc==BMy()1x6~n5+27I)(qunyMRa1xCR^p)QgtwnPfW-h z5biNgtP!pFM3PGM`hVw!J)9fPcZhpG-XP)3qq@ww4Ty+s+3Kr+vAx|$X$K5&)o){aC@g0Xu1OCiL<>Upmx+PLD^ zg7??WCdx{6*w2WksvqWonu_+LgUN#A_W&miA8x6NRL(ilco2FHNnv9BH<=Z)`ZpHa zBr!!qJ0nP~{7z!N$_AhIJ@a+XfK|ni?1?eA|HIZ>2SoXGYs308Fc4I_EgGqz5fJH? z?igZdkd{{ImTr*l?xBRCyJ5fqm5u>{0fZsG&GS6pdCz;!`yVlL-#gY`d)0LYgaOt}}(F;F~$b%y;`>5>gq{Bxun*{8fGA7SiQ+KwrS{wFP04GprrSrIT=%f10B( z+AoN`^aoD_nJeQA&)C_Z>qvR%ql`nKlo<(v4@fyaJ(9yohmr<>R+G$@1NhW$3Uit4 z0U5i8cQ)Qlt4DfpXBox~YR(TL3EWNZP53|E+5lZk#u^=#W#_dabd~A0xlqz7`AXlIpz}J{=oemS`Ao zM;-jGyZRODHn5uY$8>Mb?eBpcwUc#T)POYNE0_4{v$!^2iwf;~B{^bv$` zfc&x0G&P=vx2)$&hDBWZT@q_}L-qX46#(7T^fG#}Rj^FEMv_CMQbhzckD^}}Y%_d3 zxz70uC)pEFG3V-0d}M9MOvuT4_Gbt+&%GR?yh4lwedAy@tHGNBwt>MvuIr1tqgw*Kq z;PC`#TT(TBqYK|zk1cVzyXX58Mi{9^IpVwpfEv>xN#NV%&4oaL>n){0JG%hLX=?k) zrZu8x^<^a1_BTTn2e4e3qh?FKcT9O7tA#M!dhD+@8SWtJ&5nfOMmyZG6v)wCs36KE zS)Ye0YAxU*s@1fO$lkr;&H)J5>uHjS__WMtz2njm|MU){)e%t$f9y6V4-HJe=p8TM z=TWqgr@=L-BajQ$z=8}&Q~6y4>0|U;fl`Uhip)BcxwuBev-}tSS)heg#P>p z4GmS99|YBy%=P4-rfx-=J9S1GoBXV-CJS7023^_)INpjmD*b^Xwl5 zSnEn=*Zl7jbj`3ktav(%f{bLv-GaSJz~OiF5mHLf!_IS!H?JE$2etIZ)=4tSzcw;& zb2JWzgIUR?to;2I+ezAL^cKKsK}`vdG%c;5nlu#=hn@oQ)f1ggvp^ul;Q-$#Cg`>+ zui5NsmuHO?Wb#f)#p;f?rEj~O#GZi%Zvw=hlR!SGGM)gz*Yf6e-uY%XHYL4ON^zEv zOp(lM2w1746-aDa`dbsx4FJ%##mO#8Ha+OQpR<58jlZ*x$82!Lgh9={FJlY6n!og* z+=adCa)ObaroN9iKuNwwix!dbs}J^)9NtsRXCNo&vXHGDuop(C#QvmHS!okoTeUVw zPP3yM|9JjW^e=~&t*|%i1TNAkWVSYxEW4#rvBxYG#8;juhXds0F)IYOMAgqB)9(W= z(4EQ}UY1dh0Onf68}KxR%H*FcV*K|)YwZf%x{?S;a{)=7W{)UX3$=_>j}!iv#Q@7O z5Ne+55a1qXXIu2_<}ztf;w^8~Zy(l*aR9bgaicK|*bBw7PQ3H%3ZRO_zNW+a-RMz1 z_~?7i@kbQz^3&2saE)>Swxg}FEWe3e%zVz1fW3Lpa)=!8br|!#7=|&3&O78T_wdo= zmm3&<*UVZhvw{58#~kmg|1JMJB0ar1&EM=3$Z)2Lt#jOXXh4!KL6 zxz^k(MtqBM*U>s0+zybIkrsFJo|Ge+{c?^1M z<-U*z6<14*5YHN31d-lQ_iJ3*_by8pdT>77|1^u{f*2eG=1QPx*5kFAAc4ws)NE+TT3<&r(-|15ogMhmJ!d$4EW&xFuN|w0_A2m$WJ+^uybfK|mez`MUfKwP(cB;VdT7 zwY;$_SAWc0K{8c)|5+9n;{9v|z(`{N8a?GqILO@YI(bqX#$NCD=**Qpb3gnes6>l9 z_7g6%7WrP-=_a9oybi~3Fha!x1;%cdWK~uF28?Xe7Yf6L;9Ak=0GZ!MbobtGFSUyZ zovy?V*cYAX4?PR_=D52vRYV{mRiKKGOK>KQ4aB}y+3`%`S)?$U=-&%~zn8qx4%xNz z5WNi}`a;9b>$wtKHcIx&s4@qScI|?rAp1mUC>O*?7%$+U+}Kk?5OPW)b#+_&Bk7&( zSzJWKL|)%HII0_<)MCa~tF%uA1otd~VUTS`N`gBy&Oa@9SHtu^RTuEQ)N66Kt1Tz# z@({*({R|2AojTA~ z3!fpt-|yyK-{5+Lf5hSvPss;g&tCx~7N7;kZ)raq4J$6YA6h4#S`t8mv>PAaAs1)9 zrcs6aPVT(>)DPGaSB38&qdSamP^GDf-*b&#y2JdwQ^wf%1qdn3Zk}r?fSBon0e=Dw zwdY?Y_Uv^)q%y?QBJf#WAt(O=-M849EOrFDc2qw%dZ{Xu-Qiv=lm6k76np;zg1N-D z^zndiIq&lj2)&?qy1)AIFQxFVv3=$Qvj(5y&7~{CM`y9kFyga50H}d`La-RwagnM` z=byba8?k+RU)p3FI~oalK!i$L<{l{}zoY4Qg^#yC0AA7HtcddP&TB@|xYT*Wa`01BPanjmMc#im(E1kd zI65D<1YYNR>m;ZeKfYkz#bDZ2Op<7K^P}rJ9L-a(Ec%K?qt8g?HR&_`Nb6jec={6M ztB#kJmje;ChkqRA-7w*nwe**SWmjy0J&LF4w=S zRII>iki7GN@Ph#TXzQOB-Xg#0b*k+s(2;^g3&asjUQ1?IJKs8T=-v;LYzy2WhIT)g z*tz`=26^qn6dWc9x>0-NKnb?6sP`$HPB({>ny0u@F4v!uUrwr7jpq{xyGD%_KnT4w=+=9+k)wgO=)~~XI?O(w_gtyn^&Tmlr*;bWNo~;ETu5we z&z-D-PCI<#G%%WG`e(d0)7P_2FwQ}3TM-p+-UjZH!mYnZ619TMRJE0IW5wRvx1`<7 z4u_e=Ind~nA7YsVcjm-9tXxj1#-#8`*n`;XCo*Ec1nlN&pqnQ~+edSTJ(pb;vZCvb z-hMe~>9#khP0A_f_78t)cdNF=6y)M8HNIrw|2RDq=DJKo!Y^*Te0U-r-cd5^JYw+4 zVFgsPbSpRinp29rJfHrF8XXqzN`iuLb+&9gWh_sUf2Y`$=7I!itXDK%dgD;&W2!ek zA!}uGZyzmLOCq_Ixq>&|MLOhslKxn#7rTTX>w9Ei|INAFSXIR(D#A=5Yj{MzErXoP zV`=lV0cg{7(a#*~czR%F_Vrm5e&=`G^!SUAb?4DXgYfxU`bc5yfiepuiHTC06OJ~{!W21R}Z2zl%w!tFZ#-(1LKw`3C0fG%f=>mh-wAP_x<3w|Y3JP=zrce{rIcYe>+VLRMec(HD2Y@BqFXn#a&+)L2yRh)t>m0Ht=xF2M2Ry65p%VF$FCf9%1s<-FjcKo*GCdM+;>%=*_ecGm$ud5R#I`n*_WBIt6$@#S%$gE_Rf z>v8ByECLXNWcd*L93Ja?syI*FLABe$A~N0}`oF#{OL%)z>;TXYXo^rUTc6OTO!a0)-1cL#6SluFM@b5BMA^a7_mefbUzB`Dod8en$engm`dfZq9Cgn#oQ zl&`#jvzGsV9V7$WgVBilj+29X zs;eOdmb1SI;7jR5$(`RRE$vMyME#SIn+pvU9&gxj*)7HdT{Tu1S!n}Q@E@rTeG|4t zOR^t?;&{;7vk$R*1)jgT{redzY>@*#y)WlXTc7E;qiKxY+B13fJcU$}Cz8bPF|MT7 z&0F4N`ww331aZVVUuCHKbJ^T^AfO@&r|SE7|H1g+XqRa^(XhB?1gVIE&>CQ8VJgvV zxwHAr6pL6HY_x>9TqUUIVi>Jvl}9PEzlsI89<2jPnl24|J}kcSfpa$Dl~tkZ&YbcE zHn=hXn(G!>nj5H)QrsK9p27CBA%;Lf`hJ5e_9m*`j3`V0Sx)d9`@(io*)Pz*=hJZc zM-N`wd~Y;>OU8h1!QB*F{223=in7;#2Po`y1qL5$kXzoOd++$_@%LahjUnOv2+#g0 z?T0Q`ZSnpbZ-2cwRu8?ryI6F3ce_CffcYvraOteYH0dRJU)*Uh0~?&n(yhc6bmJe^t|owvs)M+HK2 zUfRkvzV#+N6&iS>6&b;vP|ogtCr7QGXhocdxzC;TrOp8fL!Tt+po{}!!G=dIn#}jx z9evbf%r28@h4uarWHgwVkn|thKR9(^nDxV7Dcd)S~whpNvg9?nJ=Nt~M=PmnG>T4Q+vMYUT?w4N#w z)LlF)p3%L| z3_gc<$u=EJlA(;uk~7=)xlc@~C@`&FMaiXQvHogbJbZht-nHwda&p9R?}9z8v-lSJ5YNAxG1c%i2{g|CkV#-7;Gv|^tD+m zj@q$P=K~;kk1rjO@!=ZXx=Q*iw`oF9L&J3YW2dZ(5yyIg+tl&z?SYprg-Ce+W_I4gXEiAmWF1<%?CtG5<_oktQLz~Jr>VxeXn_OMVM7^sTijy;%*X4 z6Kp;T&Tm2UH2;b`vx=U&tc`X4EI*q}ouuY{U1ccXSBP5W=iM$brY}7W09r z$2RKzB59zS+lPj_SL4I{)ve?^LxRIgHNL<)guxHD+f?4ZGUw;c)E z`xLa0Fb}=h1Y8q;?>s~<;7V!f)HRzoBM zHJ59b02|(V{5{~BtgiOe)%{ws-UR`ugMzk` zUI$fYk)`5y52!3}U)N!zJz4tH@oBVU{i6A*aCfPudTcKdeUJImn>!_?tJtq8QhPOJ zr7`$%%8^vuF1S=iv0VKemxmnF$VsII%~_Mq+eP0oBSGpeKj+-JxwkwEk|waa z;b_D_44KSWg|v*!6Z3A*4a=A$4x(yh($jKr1ijGn#6>&EAI;Jz0rauY5st!Sqyrxwvz_80&KSkouoe`R!0 zqxIE~y_lm!zTQM=bpL=Q&A3F-7_mJl_yrifG%3Vhxz1=WJ|s{M#=J}2=iTZj0RiRQ zdZcvU!=VF1>jt#uHjh6%Q1satUSF93N$_fB3;}QP=y{$hy5v5TOTWocqL%LJUM^W+ z;p~Ea1BI(}M~T}Tjl?uHHo#`f$DE}tE}t#uT7U$+$uC8Odf#_?QwX-#Q`S>xfd z(}0kQ^2wf-{sm5yS|rUoo##rHGegQG7M?>K*CU^IbqG&?mwb1`I>yH}{H8aoy@eQ7d#$8JP{ie zzO3UyG#bPjsDb4xU`cdo_b2^AP|@kUntJX$4UEpIWhs;eo%fJ+AWE7x6wPGERTf^#i0zvuOkHroMM9T^e5g17C{ho`_9zW>w7( zw~$%88IZJ($|}Qo>Kql-9RsJtMjZTTuWP>}q%o4gY67wh+1#g6&p>MBYyYU_bX&=8 zm6m=*J4%b+C+Ua*w!>IGq7EGMO`r1V>8wVsT1`6MAShv!0kwkut zd64PQN=pte(i%5VT3f{K&R*!1Jd|i$bw7l~@U6U3a*^jjVT6g~_4Lv_(xN@0c7bcT z#Xko~1^FOks+Hd^kl3!t(JkG*kWg94lx&>`r_ zs#7S+GqGIT)a9aSJesZ+6sG+yH4U%NFntxNU0ZT_-7X4jPJt$yl*sztu&47?!nAtr zoY26&NkDah7hri2IlfK$)U_B;n$(!UXS)~b4zUE43&iT;3SXHH^ zLnT-*=Y7nUZZVp=r%rw1oWu_OqS7E~1$timCfXa%xBP4MlO1JJ1+Oh4kE9l28j(LU ziqG^?LeMF=h|F$HDAuxS{am=c1>k?vqkjw41ySuLE@g9#azZ$Zgu3nP)Y#iZRvsJP z<2Dh73wO8)<;TP7xeb1H-xUW} z^ilk7Qzg9Z@4T|952$eaJ!C2m82#f;#C2NPiwxITG)5P$!G$BCcv@8q+bO;HgcRN{ z$6#95gpQ-`J4FxRFy&Va)*5W|xVC*B3a7h&Gwl{C;gvdu^`f^SXR_-fk%U*e$5i7N zx*D3vID?GW0gR~7nivlBg5cJG(X}kbw-x0Di>T%O#-NVw}hpx zR(3>;bKER6_qNi5q~CW4K4k8V3V)t4Q5fy5h+Cw0Bn)}#ua4!z*(C3|_d*b$NO(_Y zTw6b82qCPJCUND2el~Vco=w<_8|AvqXt&9vzPKBmll6~f-O7@8&hy!aChq0Lwv?OP zv9YldVVpS9S%G`QN1a+3Oh;q*Ry5z$Z#!9%7xURt4JrIJ739&o&4)BbszC>T0jk}9#ykIa^XB0 zFnzSjyHv7%!=+wGV#eKjHKO5KGTsd^xi8WwrZJ3!QaqImaXkwuOFyG7CANT+et=?M znM|A0Omm065Jfu8E$jn#@%4yoPaoGvQtq@hMf5Y|-PSUk5(!=67f-EI-kH%bRZxm( zU$%Q=BVRF1L-4y;BFGG0>-&PaZm#G#JookB)%X+nr0T)R$-*4!PJ|dx#zoosd?Wh zA()_$@L_ht)p)YNK5L}nsHab)J}CJ4bI^?&OUrlt3;@c`;A36{sZYT0#iS3xJ?yGV zN1rVC33c_be_#Z}$Gwp6IRAcyyJ_HOlVvJ2=FGi{qnbUF%+)7#S?an@X$yRRkfe&N zhcgv?R3b%j=Pc!{);)Z8D09*gHrK=_^*Zfk|2`a>Bx)0QN(YMbUV>^S)j$}j)kDf1 zyLPDnr|L~m5%BTPnReB<(|=xP7N{?{YEdAwnX5K*1AK=Kd?(h%4?KbX`rB|Asr2cv z!)Pld8&4pqSwPBPJaeyR%^I>ESW?Bo9IjyV;<)1Io%~BUl6`%k(upDj@1)+1eG*W| zC9nZvJTo3J;83pD`tAC*hZ+z>c{fH)AYY3r{{*hx1xvCwhOn?3Lxy<4%HFgM?PP{- zIi;mZ5l$}VM}i?RTn|PQM*2PNPI^_fQG^ zl#e^ij@~*V@C}K0ewQ&-*7HA1j4dAlyS-;`J%Qw-nv_p2+0*==P< zKP30Ef+v9miLuQbX1}_g*T5JqqVxX4sdr!vu7#tD>@J?KWxK}L{pbGxdFgh1k_u(1 z2^nVB$n7tN&4uI)PRrD+bVY#O0|WZ6-HVli&6QCct7{LKPVyS(Gy%rD>!;O`xhk7b zXX>!<1d{L|09r@A8eK!SmI;1RAWMP%oV(pC#Oo(-ED0(Yu4k?ZOX8VCOq*75ID#kq z&zEN@vhWADIoCWoNgvK@y)gnoV>hUSNF5dayW1t4r zG`6Dkj)x-EVyGyB6#J5f1GMT)_-k-kb$Ss)<1y?Dn3? z3-Ea$^BJH$yCW+-ssT%uk-f#@8@G{+Whp^HG|uLZ2P~DL6sb`Ga;G{Bv^yU z$BP4@*gN|4Rs(TdQmqnDOF&wdx?oyH*YX99-Yu!X$5dxOW`4rL9{v9dqXbLp`1jyw z7lc!xm)BoesUmY-v-h-}>~hF2rRe#W5*n}u8tzFf-2z1T8{LQ7P%j9J3!5@vgAkuC z+cwTI{~rgVjI7?rj(1u-doW01@Bv#}5;W1IT{-fXq_ZcjS@xnZ>?u%eA;Xp5gRkEQ z6yY?K?%$$ElnR}B4E`xo@6Cfa2MgWSZvxp_Q&&~m`Hv2n#|<+Iw-0>omY22yk73~P z0QA2shFw6{*#HqCvYbePs`ef^Um=<4Cv=s#jH7%TSPxZm8f1ehBUlhCk*ng=Gx~SpG4`0-rG@sgB z#J&`tRSb(*1bYL98#>HB%27||3i?!jPB^|_{?e~oaul54)iAK3$>3#GFBB{%UQzEr z_n0Yn=yVcJt{X{jnZIj4KM4ES?Fn|V6lfPOF!j&i%(wM(!RC-Ik_A-NTYpdMvR5A>4j_^{SBL;<2@p|Y<6ocLQ@3pRdfc?LUUqDgAw#tlm;XkYwg_ps?tH@G zPe$CH2|J$)Tl6ksA{tE`cFlmOCa@%TpA3iwFmZiTY?n~2UGxkvc2oFvS8=`aY3my! z!R}n-$&1(6<3~&tyXLB=1)H(j=K9BShX2nM)GA}e()UkxOYVIz4@4vDUcC6J)iVa>Vlz|Il+K!(yTM0pTzdK+P+35h+f^-JP- zt_@cUazMCFfo=-Vh13U)X<40ImS8>xa5}q1zfk+T7_?E9D3ZUc>Nci`IuXy9!-cfJ z6G}F5&Eo(X)*1JWV#VXKLBUc}FnadXGv})`f#a*NnGNSgvFA=n&>qj8q-orR25(01 zh1FZd+N+`q?k%wZSD={3)Gq%>iGQ-Aoa}5?{}fF5hZ1Yqwx`u~A~VV41k< z(-CVqROysTd2$}+*(EtT&Wm6j>)efJY_1lC5w@s2OqZuu^jF4B3QZA?yh*h}1Uu!Wzuf`*IJSZ!%jhDaVmLb!e zSrCeil5{JVqjd~xPpxCA(=&b#VX=JZQXGf4q;ibQCGi~2?Z&ZL;$FR7c6#XPk*CIr zwdSYEh1J9yiQJNv&0U2E=M1=aaV!w3@5||w9)ecbCx+xlpp)tnpS6QW)zyQub&L6n z32-!htd_LMG3o?XGexwIv;{SNGavWi!bmepmRv5rT8XHK$P%;32870an@~5*>S?|D zW(NIO{;nFGI)E$a@d<`|k+xWVaa+hpFzU`jqCDd~nga5@qb%j^P7|i&aojLeLqgy& zHfG*+Qv!8&Bs&jk;kU+)DCa+;?^gwKHU>dtYx>F%InFhhwYLkqq}RvS)6M%NBCwWc zy!Mk9O*<8<43mNR)bgIYx!E~x6SNnA@PT32b8U zp^vzlIlh6lVOLlzcDh+jSVabVOly}_VaO=c)Wt88GvvP6u?De>EmE1eEd>tF+vgcW z9L*CTRZ=l`_ONWnJJA1Ewu^fZ?NDW$c*=#o36p1YaOvnF-J_^kB-?Gy7BbkFdHCs2 z=vuQLzOOOPg0MnaRTnv)sc&rYg)qjRDAJLYFM@Ds04rYeAH`IHGZpBJrUVA=dd{) zEA<&yVaR$8{TW)i=FIvkbhJn>?gG2&b&R8vxY)|)@TiW(mf~c+&vK8VDb=RhatVW9 z_XQ=ss5X20vFZjqka^e#D<7sPA|-f7wP}b(4gtH)}D6asG%; zRy0t#PPKoqvqfFD$F2z)$&5;;8eDd68G6n{^N z1pZ5`Y@Tgc(8MdKFQO`F2Dl;qh>6+XWQ7j77Xe=I*tlN^FeNoKkS5pSr&7jNm>fz4goIu+Uu z|9%mw01S-=&MiofGYGv`_zv zWK65i9(gw3UL88TF}=%~PTXtoZQ46C-{xPUF$*Eaj zx`7O#$oK8D{ED`-;xpc?_NzkTiq|7 z+0iAh`hKj3atclu&3ya11Vu~NQZtrruUJs_9B(eVJ(GW^G7RIkM{m2n%>&0Gql$W! zm@SYaALnb$9!2aJM1?XlvvVpZcSc`8Hii)x`ZCUAO919-*AI+vsEQ;93VPsOH)lkn{7 zbRNdIQ~Ab<=}J6^YZZT2`NtC1w^3)E z{7HTkN6nr1$I2D%+lskp)(WYVIoz7ycYNRq-AsXk+UC17iN-q%E!TlPCfl>TTk1nm z!b-3K{VndZ6_o5Toi44Sx(YDX_adlj zlA+D(F5hsf&9YhC+9Xl)!dZIl`5cJyOj-esRE9Q@araRLqV`WW>Gn?8ds@1@hbu90 zvJ~y!BX*o-15!tHxsU|ed3iK-NOqxYtL1SLFCz^dK1!YkeO1DZO67itn)C{Z+6+?J z%uFV*{6W`yf`sI^c~6%_16WL)WANDkd4#D(QTN1xf+S;KEt{$C8l1oKP}V-OG6mpovO1BCATn2vWFf8fv2|m= zBaVDB*VD?JoTP*1xecLcyTI<<425AD=4S$#d7F8zd&=&4hy5@u58x8smerDu*sCvi z8e04(Ak_>N4Pq>%S??9slJw!QxjeDRHbh9%v8{oT0b~a^!t!e-r+Ir&h8=HuLT|yu znkYA#${ASCs-&45urjjziW;s}KhGUY7aQCC-pBP4s>#rh)WXS}CI;0t!NPerK})mD zf}!gW4Z87!PT>iKUwFOZD-}AA5PR6%9i6efFWv`STTrVS&h!^YSjlmxrt0?{Zm@|A z@8ykvmcP0lu?i%X@#vG0giTiu2WJ77s+Kr#Kj$h;9YdRwsnyGIhLsAWFA|=!sf@6= zO{7jLjwk#&VyJN30ajVRXHoBR{a$8+@FSUmd|8i!L0rKGzl_z z%4&Z%8#cCyAeQL%Ociyr&II5BIw$1wyteZ#pP&CI#Y^_;3oY`lf;wa<;&|9bQKE|J zu9L~~J3(F;HDrhK^@c%!BWysvL4-Fh!G23Aei#xXsAR`(J*%xM7k44Q7oK@y8!W#YpoajxT-$@AjpohYBwQ zNZRp9RC)U5TFI`*0=Wa;_WiiVFTzZg?QJ>5UJA@eT-61& zU_r%}$%$3MFIXf*vNOHInT{mZfDJz}a5>6s8RV$GEB5dy+7AlE;>L-7_`*9PWe6es_SR-b^m%_fzX}gY`=)c@Zd7F$NX+mc1Fk5;~zaz5BT^ z@m}@j7t2$&sz2%K%8UZLZ8oH$m@J2?m7B^bP>M-N>y#f7+SNjGhP%>vbEupR_q;$T zq0l)}WBr6GO`dJZ4ok&!a(LRdn@HwLJ3emLdT*YihO8cDOS5&k%&z0ycWDp&`)U5O zR&_?&IeyhC0*v;JJ&FwJdQrI~sA}r_o23(dj=Zf{DGR@c!^U6$A_ds_W|O zJ8Qwnp~~fyrqw~IuAXNEsGiG{r@g_C7JmWIYIK6{zP}K+dwu8>tB{jqkz9O0wa|r7 znVd|Xf|x<}kB^zg<$~?@y-Bl-6=-dp%`nvGL2;8ikfHDY+H#0B}at>jN&po?yjv3pEti-eC`Sk9{cvY+cc; zbYSZ`53_&RhX2LiyAKxO4K#lK^o^@c(5~yhh-lQkU z8GoWc5+(J>S4O@d;af22h)zt(ML>V2+B{z)4=S*IT>u%^=)c&``_PQ>GGvr77-Q-K zVF{sYHGG%+CyHr2ya!Z-rx}z!8}#s{Ncp@?yZYE$71D4(rB9!Fe1#0%?T-oAz3e+7 z6!%Gn-kT4xv$b!y|FWwmoWpyVJpzcP=6#I5ttWOw@7t4z`wletz9gLOWC_wHOHI1k z(8Q*v+0Uwa-~IWR5nJK7pdQ>liZ0bCzIm>1jJQe^Y7ie+D&1XX+L|+|!TH7llpb+W>ayP#-FVZ<$!)Lz{JwG5tHuy9292{6=#D&8xB z%Tj|AG14XYV5^rNr}HDADq;&bB{R{0*eq0_iq;_0n;``y%H2Vsss{22rp#~ith{9X zF7cpQZqFe}(B`%JyqNmKxV#Uy&Kw%_PK{L{J3SJ^&(7$pID&sPz6GpeDF0?x6&nm? z#Zcu+D_5a`a(69@o;lZRC3>pJ?3|h+(O*L&v9heO3SnKxR5{zK8S=_%RkOaIy^K&Bqa3ulPaxCY-;d9mcpu*KohyeDsX7!_#0o<_5Se^F2 zltQPN8nxA`1GODj%zpLwmF#RbexQAJy2sxAYFzuk{wjp@F5f2cxEC)=F=DsTbM3Rv&72Y$8EY3~V z{gCXW^|HQKF_+|U?u;eMTVn85a!Ah*xY*A8K$Q3=0HeA*T|aH?9|FA9qG0PF!1*^# z|F$$QY%>}}sUAu`%9mfGv;y?pA@dY)%@X551js?8guo;y3g^WmkPT^LJ|87@N`cmQ zd|WC(uPcF0|De@w>s?7zI;y1Dxc-uiD2g}5D&xDQ)~ECSIfv&jQ%-3tc1LcKV9!f}px%E`jSB>b6V_b3!yV@`JWmZsMN7CP zgDrCHPa>!fXt}-Me5>!-C!Bd?36&9T z#rpo$b0hjlVkb){0d6I=Jj(ov%u$(SJI|22JCki|oXFbaf4c3kiX2eKS+c#vke>}? zuCTmifhOno091LM$OO(;RkKgt-4mo{*}6yW!{I+hFL+2hmx6}k>GoM{Pu(b9R|zc< zmupE((nVz}?g`0BIL)_<&Aj8@I3&tSr+`$G5!XN;3nlZ`pph6Z@3olU^R*zcRCmzk z>6kHRq4x${C1n(n+z-sFgs!Ob^C7<{i*#4t@9h43CYL)Q5^U$n1H`{Kn&oKhecq(|OSs?e_F@>w z5}L-DhpSr!GRn$)GZZe9=8xZtNi9A`k&UT+*zE6vY3=NWU>rqtbqDwF)fw!NN81@g zc7EISK0Cuas2Wc)1-|@5zM*h_3_qu`+I}Dgp`oN{oSE&g_d_N*he-uH%}N)C)~-2o2Thlu%DdZu_r_MJ@_k? zLd$!3n!P(Gf0OIYbqJU&Jn<5?;oaT-)kp4uef&WKQ|=e{$tS;kD`Qg)fS>0qT_O38 zOqld{?A9+s6x@h>0|i*BV#r{hF29igx5;TMbMA8C_kUBaZ_wKOUyF$bj0JhM8GP$1 zEESWoe}yygAmq#0<-zqh!-)uFNFSjXPj@jqF#s7F=9fYyEM|eCIKEk$2ME*JM~a!J zpLsr3&%eCESxr`N{)57}^A``i^{wc@Ubdj)LeACd_MKf8DVmW7HsnHXU=U)_9=U)F zNC4_Tmz1zww9a@0{~HvTs>wPQz|7m-&ZVc6cj#gN$1fZhu}5N?$S=u^hg75Q8&*Nj z-Iy8GZJg*Yg^UXFH69eP3T#MUtc{X#gJq`bX(5YREwb}_{KrWKiq*RO*2<0} z6Te1jW3HyHtc+y->X=9J%ML&;)4+K{>|A{@}d-)!DWM+An+_21dE?tcf&9>T$*z>DFlL~EmLpH6X0k5Zj&65OAZ31s%gU@5!O5QhWynGsCB9AWbsl}Bc~cA~1c(nESoVM)j{KVVhQ6UCXIZ<{CGG)ace zjyAP(oZd)sVhGY#ndrps924>0yl@-2Cfo5gof@+92*O5;UN{AQF zc1jIVWTU@!f`^iTHJga%Q{gO7A>}shjq(Ima76fVcvTu0ZlExu$zGI{`jC@@nZ zDa2-JG-AUy8ND?8(riTOh&~{H3HWVyV|?gjPa`&d8&ru7J_;o>NIftgmRG*BJwAts z^teruSeG-w4^dsy!&gq6*6h+)5X3u|t~)T#8&VF~n9ls&rEw)QiyLMrhGt&Pm)8zc zy~s3nQN@?&r71|4nv}%p-6fZub~MdSj3v$*xv_`{pV%UbSC;Wmq7%?)4JJH?46-ai?f; z4pC1X_VYKVm8Jka==c{#y;YquMLiI43u97t9>?uONaS$@j7;`MUWDTKPI>%hJ@6Jb zQdKu;T5r_ks@5N8sp7D$K0YeT@Z(%9R=7BtIJaD*bW|&oh*}?(Z!#n&`76%_<3z~M z(O?E~n|%@(9MXie=(%-lYFT?Y5itb|=^lheZSD#-7%D(|(*18knlBW|6S2sWjB-`v zRLyumWiik;CSj8-^O*zD36bfBX?>(~8Y&0gsq;@$waJKw#wI=fwq|o39i6z#@z5*~ zfBzfK0SbwtjVCx7jopxUJ>7SfX!n?7ErUjcJHA#~2d3xfkKeB~TY5s(J=ro^JU{s= z!A3%4Pxzf~j#6&wQ?{?*h{`1nVl-VIZm`FBN-d*R9j*Y9+d`h8K5hc0-^ zo$20{lk&DB!Nw29fiRAlY--O|R4HpY-gCrmb78)X2#dx=(xi1=u=G+EhoNkR z*)eogap(;1qapO{L2{#k3tRw>@d~^QN52{UW_jHIF<8c$1Rq%g_}^-Anb&fM^_xt}yP zf&AdE9|bNK2cf5tu(#GF&#Ko7%R*XD0K3a)Bq8awMp3f~3^3>H-I1Ne6|C!n214VL zg8YH}efUVJAA*)!$G^(O{>wHbAUh&6;Q{(%N2$=c2tMU8ba(!@hoQhZo44v7wda(} zS|?`f@9p(jKL(Rj=w^QxH-fWkCX*CPPBi>Pq9NWweDWz}$*}6xt}M}kS&q#qO~E1s z<&yMtz0L%8;TSJSs2EdwEWuwccRN4m7O2l_hc$z>Hh!N`s%Q%zHVy? zDG%LUN+XBvl2W8Qq`SKtluiLD>Fy4ZZV>5`mQWgLNxyac-TU76z4!ai=fQK%-g~V* z*PLUHImS-r+PUWYu)m#~TjAGUw%N(Wvbr~8+i&8CMBgD{KD^iW;mcNW$YdfJC{Wgd+kEg8vkkaeu4BxLgnfT3RhfBlMlgd!~)AHBfnwkASww%Mj|XHEQi@dN*L} zoEZ>WI#ts%D!PC5V;`X?4Zp-GTxE8d^kXFmt2iWDf^Z&j4%mW*UFYINyDfQ(bLo^AxUb>o%y8?LE#lUHPxdjdq+ ziN34IrI26_HJ-95kIv~&OIPDi?~X@PucegJn#wJke{feCUqu&@7&mU&vBl=w3EX;C zCb=~1cTwxq`=iT~%%x>98B-!`wzl0*63iT@a5wl`&>Si_pL6!uhmt=elB}|3s}nQm za^LutH^sSr_<-&kaZ0;l33|exb_&3^L%*HZ8Ml~?BxP7Xq?Xjjd4NqKSgC&lC;X9q zWR5Ulq)gvLJCAJ1W%-Kf-c#%kXD~Hw2IxD4O(cHnKhAu{WY;OF?mlw=%fep63sGjo zqiz__TnijA?%DVHsr;JCOa_~*-k+^rTTgA-d0&2~v8IIXWg&5y+N^8t6v5bRs?e`B zfx$_WC-)J5G0|MWO1Wty##axH=hSGT?{pJz2*Yuo&}>9wx;V+BIFPH~;HtJt#*sm= zj+%Y{X@9EAGSjb3g9(H+G16xH;%}S-Dqk!j7elseM?O}gaT9Z8m^*|Q+Bp$^l*|8> zZuH~7TDu?{7F?pQj)h>8-q8ZAbf?i^(!{IIMnFzEeXKYiH@V)_=H}VDa>Nfn@{(3! zvLn71xF63)_QzZKvx~;l+jsq|*<7n6!j_Z8_rH>wemxA%gJrr}c9T)tP}UudSL8FF zH8q$Pxm=~oCci@SMNH&U=-Bx`6rFpAG2sncvAsCXD-oo~Bm6v8#7)iP21X(Zr$pE0 zi-fc}RL5j|rh}ih`vudVr5ag~U0vhW1M<=&cP|cs3a;YUG;l86V9Dro?%)e2&q&42 z*MB=|wx`Kb-PqAJt7D9$IO<@fVhqpITzcrhvC0gx4|wgT*z&c;uU^Mfs5XVs$8N8> z=xwXmSu2fO!jB8v{?ghEe8*a!fd%@EJc>ev{k!a6=70CXOzOx>6MAwpR*kUGI^rS* z_ccjx{!WXlyb=}i9S_ih-J!YXJYz01C}r55D4!<25M|P9rjJ=8@m3wI0Cm{dcQs)} zf{~Bq!11IoJD4{W24kNojbBC2spl*EWE%F)g#xL}xp{b(8!FPY)n{wEPnZ20_8I!d~(U zkcn?(`ZA2ly_LNb1+tp}1f=Sk3ixDyiLj$3zhLbw@XDuP3f|xrYes^8k3Q#UDu)E{ z*&OlZ?tS|{ap(A5z2(sQajDGl!+C#~6PNQC!J@00#TwQSXqtMOf!0pNrbvMA2hgZE zvI&)ng(!25z6I|N35V}jXozf+c>N5dexK%W;g0v7%Nx<}+ZE~F$pw^z&xxIN*-zSH zZDI&Kn-=gzqdYuPf3}p^Km%xb783RJT_-pRL2C4q2;7f*4|~a95)0)=x`cn{3nvcE$;55C1w#&RgrD=Yh5;HSIMTtD zQ2iHy6Hkfr;6uKMcUJ_Z<#9GJ#^Uir{)cJ`>wEJ7!NS8>@vqmF#`yvN$=fUfOpx+d zu@8NvQXnS;<);>m%=+KFS~$*Lg*z$N7B=NKqP5gr%Ps3I$W1>$j8Ab6`Z z_!o~0LyYr%z=zskPp%Ff^nU;HjN#aOm2_{04D)t)C?3=P?n@BAyuIx{0*L=VHwIG1 z&+m6o=EMVmAzUo6`|2EON@mZKKM^qxfLZzPM0Ozq;wDMkQKH^dqd^IScmX2VTKzZm z7T!FRoRt2kmo>s(pMVr$C?+cQ)^`N*F9Q4HKEOeroAhVv505#56^^hE5Ay($6YMzW z1%I9%6Z|U_mP)&}B>HFF;yf5n4xLay}P-ZX34La2D3^NWaXYyQvW4nTB|EjvqZ z`=zhIf_TCf#FW&Ob5s=eFL>j>t=>O$w<2?|2`hwpe^MUM{cE*5zt+5NV*lJG(v~X2 zPCEc-1Z03lFkkddQd70>|Fih8wz7x$l?4=&1xJJ`09BDEg|zWn9QBK`f@z(km=v97&@9e^e`MD+AnQmOvcHA@_#lcloHh^y>ByRN4COF$$Z~; zwT_Y9D*M5o4n~&!Z%Q%jk&2_13{?gUbs2w&-RfeaHD?**hlCe4 z9D)D$?SXQ#un(1fjKgRQ@VB)9QRE*c_O)H8p6Jh=Y_e=du=;=o&hl@w6)aYv-;z9S zn-}4Lq(`H1{O+Lz(*JsQ0$Hdir?YnESZmqEmRs%P+C*+b6s_L;^W~2RAWM6xwlDl* zAwX{afA7gr$aImj!U{bP5|*i;#2>_>49tJMGcIMrw;2nT^5A=F-~0~?x~_~ENsmuR z_V6u*qE`OrIO6|UzZr<2$P#-_2cS=rAp9?i^ZzVWno+mH_5qOS26s|^$J={XK5Rqx zzkjEW*!?Mgf<47=wcWS+aYrwl80lTFI9p-wK3IKeSfUn~Zj`!}g0NR_(=TlTSoC0_ zBB!6~{KKgJ_n(tx`Z;&$;|NvZzeW8gV1X^1p-Fy^OF0#vf%?CmIT$>L$ro1Xx0mgK zAxbdvb}|SF0Tg&O!90L{$b`ij%&d$L{YP-H25Wdsq(m=IKpF$U(*FuuKY947VrJU^ z|6z->w<{qCT$Nl{SF!UYSa@)2>i!uaM%mQ>l+;HD&5P39pr6MSVAVHTcg8BaoK;VH!Qp=`^yIxY}krMuEeMcNPA%=~nuDbff1S(b8^=t7MSJVRaI)8Br ztqyQP+sX$7}6yiuq)>MP_VeB+bhL01Fby8qZkfRY7f1S~jh zVM1S{rr^kg#QqJ9z{HeDLNXA)JkJE$ zS`6s93irHjq-arv#XOll42MXOadCu|jK6_X&FlNmu&LIo&$U|K)(=+5jfVvV1>po{ z7{D8#()TzrPJLFGhv2p16Ae3!rMva+5Gk8FC+7}muWc({t09sm%n+neoa3;0+D69<0x*P0!u9ea{ysJ?ZaxKl@cQ}KyV==%#efU4gXK@EV4^l* zbHBB^z3}#v#ypz~LHSR+)2)dDtVJ+5i(^XbcC^fY66x@Ax)X>rv`T{kOD5eUXN*<9 zD?KoOU*ry2$>8MvQXn^XE8A?pGBf_IfnEW2RfV!C>am?b9t(`M%7Jmt*|WWN{*Wdgp z!|~yDlFP3VV0=~I)6(l{g|jI|-MgcBuV1CpV5wY4tyXr|VlQD-dw%{yP(H~$k>AB! zAmE|RQa<;4z@K`-Yek{^iobJvYR4ByvY)%H7l2Mn_?#(5mgffQ_GiN9Dr#2?S}ely zsUk8~yc0kcQWC7rmd|9E<-YP>u3=Dw)b+i<#ZILUM1ydY#S23rQ{Wkxj(zjlUEWWz zpemk1z`V{IgW&lahOuZ5!@^H*+D}y4=UxBA&T^amfWHs|cBC5MSu=u>>C-GAV=xD% zmu*_jXTQMY%eC1VfWedIqv9h%QX%AfpYt=Pj|n7!mq)j;!#ovq7fE;5hq!79)yg1+3@Zy2j9y+YFDb&8*c(J<-jcl)$ozyE3d(uC~+CyTutR z`&kKmY&I%9#{=OIX2#P#`?epg)zx88bV>pNw<7W*65@pLq7%>hc^87I>blqMMel0BSa~jxI2`c^<2e+m@bLwIY+(HDet^|zX|LaiJeBh& z{#U-AZfcA7McMm2xV38cOij1A?!7Xl(mLn6aIq^S;#!JQFjDGqCug?=1P@^H2Ec&3 zz*l_JZl3G2+ z73Kw)R`Tvuvai_H8!bId;Wy&^4?qDSigSY)=$>k>hP`^#4klD}8k`C|1s&G|`Ivuf z#PF^vbp*_nRGeQ0@wbL1BwuWvJIt&({{HnHyb+_t+5Tl64x3dHFb#$qn@_a+;+_8Y zecC5k{ZZuQ%(U!2J&`e=XZ|(5zdK!AejLJBef3R)03zmf2MmS?Ec$cENZEcgh+>nr znE+|jy;_Vrr;kV1HFf?Eep&w#Z-AM}c3_sbT!<`;Kc;BH27)}j(nlF6)($pDM=lQMtBt&6eeH%VX^rgc7aH@N8j7;tF*;#alK_P z7D-c4`TS=DPC~se_85;Ps8kCwd3T1bFC%^cv5<+c8NB`4Clq!$a7Z#Rxr6t(_xyH; zC_fsCa}+)ZIf%+<6yAcH*SK9@z}EKOoqFb^0f{7Iqq}JeM)#w;)|FZ0&XW(@l~3oJ zD11#g=%w4H1Czth-lsR{@Rg19;mp%Z<8p$ zAgZEDTHBV$gAXJ|zev93z$~##1P;O5w<%sVACK7Uv~<3{`Q5zuBh#ATV+}fk+1mG0 z>{@nT-`m6Hy(_X`y_PE`Co&-8+HpSMg{7ZI``^{V?AvEdvN_Bm`Dn9(NE}bw@57>} z?tRPA;P~s-_T*!Ig@OKdPL@o;)ab4YwKx04JN>UmnD6( zSs6i25RXB41vh?|0T{dSxL zTdNMY4e$h#(lb1y5np_?vuwK8GG=B8fmSa*>yR(iahvT~n?Q7OR{ZJjgvhwkL+}UI zJp6Crz!8kM&-E#XSi>cdSV%@i@KwaqT?4uwK5`v9zfN5KYIn8qVm1^N^s#~gJqeqp zqv9p!pBk~Zz+COTHI|wtv{2mNU|0n^xeOzrqt5Qn#k|2r^lyb^CiR)|s3Z~S;+Olv zgWcntO5Ntj`tSTeBDn76!AbSc#J!*fE?qj7A4{Ci?i5vJ75Y{?Kj>U%Wv_dok!w-} z+&NvO$^bF5t7R?u%>btY1MjT_nZC4N7+wxR?7x=Uwt=*Ob&q5s7eWqD(~7A7{tRV~ z-@)hj!o^s^Q>^Qq&<8jZpFI*84Yd&S>asy$$8|9$9-*H=>{o4#@)hok79(ri(h?P} z@COzr#7Z^K2HCzOuIm;znTwXs9>>qeLVv;>Sodz%EEgoN9JG|%Zw2AEMXvjrXF`Z^ zh8`l^oxZ?D#3*A?;ZulGMshO}Q^x5=drrZ>~HFZtX#421F~Q8V^rD zc`vtnU*K4Tyut)r-Sr*lAo`X{G@xgP>nBQWp%F65lDd=6*a=k(%^kHQS}(F%0=>7q z$M3Sh!BnbjA$9z`svu+PcC2EEATrqaS+Z?CFiQxt6#uaalDi~bxhxYLzmvtv*1=I@ zt9?dlV})8MJwzNsK}r0;cR-ci>x+SCGz2}2FW4TwJ+=4t%jFmpLY+& zdMksp63e0oS;f3$+`bi1;UK~&c=1=C1W(t%peaSU4TbhA=>3+1Cy*wX0u~wl-+qzV zEwNpT5ijBf$TqiYz0wMLs!6uq@m+qg?%Z=bS#!qaTg&d?hw7Av%@)QzGDV>4;j5z; z_NX42vSGeCtb_6-5u`2e4F!JUi93UkSlH6S=fnfww_7Obw1!0vKr6(+g5lWPJJz+^ zv*Y%kDDTU!jrA6@Jr%CVw8dKfOKGmJ*}}+ zkvqAFxO$eSjvZ}v`(?T%|Mz@7L77%*7E}v~=)&39kYTlx+A?cWpknPI`GuV=l`CaX zHOG*er%%hO;A^cxju@P*f&xM1;J~g~tVr*-fIjJCAjhN}`Tg}Z$EyuA6+Eg_eg9(} zw$3~vY45?GQ3~YR>wNYZV*~duK)pJ#!n@F8`f9eAj5)(*u44D>t-w_FvR|`s4CMS< zioyPad^t?blVRSjnQ<_(8#2}W7)_Gm%UQS&vkJFpY;T`qmN=dcN6^W&=B*64W0Up_5*qNy z{R)2fWFo1b)>Zx~bII{9`y}oW5p9%fRmJVUI!VZGB9*q`)a60$QUz?~Y0E*aFL%bj zjQgKu(!m|ll1h1w<*lYe6oT(cKQ#dvAh!+D#Cr_|#@L5i$a)}9pQ0ao@#s}rRc!u_ zci>YEM(H)g`v)Jm|(`8Rxq5e`RG z5rtwt^66b20~O^W$xJ)1w!cap-7+|K^Qlwry4Pk~>2P3oJPIMb!$PHvu)oPV!@X^)a@_$@V@jnk{ zJs~brD}BG_wG{J+?rUSnYdJ^&w`1ydKB6|U&DpAFnciT+98xLr^vvMvEc0VFD!UJH z_m2x!;sX$FfF^k#^Ft5`E=K^r>v;P!m>w^j&C($5Ly@kt`bYVf*`80Y!;d<;u{1WR z7V%4|w%K8-fb4|#aJ%BbEhN{UF7I(IjmEKHs=6gY`whA)N#FVYyyC~8u0VYAl1h3} zO7E-tILrP4z1Fp1+ZV!*X0{TQ_mzNCBEG~DWtBmvfB$zm?4h)el(2NlY4r4x0@|liYFGRI=sQ|m*`O!LWtf@m{gCc)<)FDM zH(LA8<8wMjfT~O-g@IZV!%fSXPlM~#ack&%Vs4vzN$(Tx!Bi5Mnmr#vM^GU7;oe8& zFQP9%9X@9#VLnIsM1v`4>y$6sb0Z>fKDQgK|Iht#z}{L{+b9sqLr8x`{qaR+cX5`J zeZWh%9X6N{pJtjk+gxS!jLy_8hB+SmLHve-Pjrx{{jRe+I4~i43yeHBuTIqlEmi?l z_h#UGZ&-42#i1I z zBYxXVRRQ(y^kTl**cYF<`kY&YO}AW|j>>2Pt5K)fbJ+;V$ysBUMGeEb?UxsJ&#-fs zYhI~7!@0e>W_GQ{%tyPL0kV-Q#T4zC)*ND~TbSwCND8BFau6fm6pYlItMG-f%W()Bw{vRLE8+WgK~M)yYcONal3piI!JRBV1f5RqH*lqTIKq?6nt}zUwMuYVQbCp3Ogg zREWfKbG>Nt;#D9(9K!Q2vw@(+x6-)iwQ}#Ti!%WINf)rmJsq9@JWj0M;_mpc*LjU$ zHcv)4(`cjVG$a1VjcnWRdds&~nG*5XdqyTfJUyN=O5my1Gxc_Fw_+D~YI$~eX@h{z zwe~>7+Z#yMPQRg?$d40|y}dldl{pjz76sO=BpSPSsHi+8Rf*BGa8#F%qNGP?hhxwNioS26=|5B?@T7q$DJW! zyv2XN&8#*htjV&Ef&`~w)bj9<%&b`?>~k5ef#Vr$6&eJBlM28FGwBH7OW{4Xayq3u zcZhty&1|V#AUsd)aY1|RbFz%~w8ToaNG^>lfDHyi>12D`4VCn8lHSYDn*?FQth$~0 zx)H2$DLknT*Qe2X%IGSNP+eyPQh|-A>Q?LQ+oYnClame05X4~8?e^V|w_G_>f#(eU zOjD!Z&tfs~N$)IxADkMH$}UEAWkwm#BsB0%a0g1Ov0RSp&A{R_N#}Mp(i=E=A>eK~ zIp=N)h{0M(0-KP|qZ3_mt=d@imqA;=w?ct=zhHvWJ7+Wm$es6=shG8Ps#&f0 zP^#o1`D7tx$BERf3T5J-_dH=^2yzNSx^g8#@eY_qdtuj@gS7XkfL%@VAQp%8;u`2O zCi|F$K3y7!Kh+O7i(J^D^qh+&V((#$X;XhYYq;in7Bhtm;UbdBBdA|pYYfJ+{R}MA zoErR{H{a*4&^-=#Lt;-e0!cLdW(F`JSNbKb1X>6LAbS$!8kIP#y)9J6Wx!$M!y#3j z5H1n}0}ojZa#oPIH9w@ILg-V^sT}mh#5JZgvi9qmuTWH)Rp@%>CdN;E)*zKI%#rC? zJ<7={F9+o+vRKO&L(~gZr3`Kcw@^f_mADy9kCpA0?<^i0;g85r`fX|I`#N+p)q)I^ zIq~zmCxR-v>B`~ZRiM&SIYUi+-;HQh>;L;Z@)t-6JCZj{I@Ps+`g{7}RRY36(puj@ zLo_j`^tC^;Mg^Acl=vLAAaH_2A*-#em6c~DdZ~qN%HslABW(1HAHWhvdniJ`KrBc* zUhC{s^~2?Sa%DkOA5`&u=Oy6{<7DOfrIjxnb3n}tmCqZ)qdiQIOo7ygOVLke1wTW8 z-i*}01ZS`|khp6zXq@$>W@lIabVc(e@Y_#eU}yIV2z1wS#Noh0jyRYR)YQP!aLfsb68i34t*Z>} zU{Wg)gAJS;`%`Ik=i|e}QK^}yA`ZS&N9laHDR=h!^LsN6Zn`?LGvA5!hAGnw!gYRm zBBRxQ4TRwkpN((}*X%)*)1YhCGbOnNeG(ayHm{}eV1^L+mGY_3AegsnEQ{~cr%xGh zxx+s$w>af9zTG$h0(a=YnIsjv(Gng%A(#LLm{tI%YXwd~!L?zpJQ2lq+mGTVt}=zZ zIPO7xFRm?~Ld{N-yq|E~k7V(yO~w^^)ai+2I@G9odEPP~lGyqMf4Tl^o@2<#2=K+i zcj26vM;UOkq}`$20bdmV0u$Fo(ER|#_YsB$<&aI+mvae_k5<(MYgr#IHf|)Z2Tj2} zA$$%Q@x$;2EFzdX_D00FX%<;EVbeizPyX&(JOL)lC+mZeCQ={6+R>=lpf?-1(I9_K-Ttvae zEMPo-LQ(Lf%a5eAGj$!;nv-RrtER_^>JZyn7q~6^#*>iFvuez1RR#E)hWiDQPwRU) zqi~CF-4iB!H;3q$1V70#<4C5Y3zO^b*Nc5s1lgNvfp^i;Csi8Ka7Bs$N}Z*Q`9@T* zuc~oYTMfYOvtS%sI4V7T_}Hpx>l&Cm4m@U5kGqfC&SE{^8Ie)FL_S3LW)R%I$jZQ1 zsgJp8ZJiIBthTQEjX6Lla(lpeVIGUB2~64s_;PqY$ZbcV&=zzV21m_~)>zM$l@10c zd0Mp?eN7L6m}u1CNa(WRHjc3LpQObO7nKk8OAA6f-l!V8?M#TbjX*C;JPOcK4iOlG z<)o&b%HLi+7wfkMc44i93$OKO1$UfuFKhqtk|UFRPF6T z?kJ2}aX^$)-;5~laH(0akWp$+Wg2^F*dM(wfSE^&^eoM&*?tj8wcbut<#is62NBIn zW#JAo_1P!*yEqaFuVIgWm}oh_woQLEpSoVr7xCS`MSlPy5_6hS7$&7gepR9QXezaS zqZ3dai>~bTaj!f=Lk~52p$L32K2~WL^fakaAsz`j*X{OT)J|edMOwv?zhI)1%;SWJ zMJmqo;Ed`d!_iyS2`}C_8q{{X0EM9bzCH%r;l)kuPg6IIuZRXl4bAYfh8OoQe`W^| zRO6$%ksN4|H zH!M8j)&;t-uaV1*p1Fc--ZhRK^hWTCQDYq^9zc3kxg;{-P6Jn-Y6KD&tEpUBR;?Pb zgMMgq!b>?hHI7%idd;vNl#h1eyC;Nu6wP?!*&M($JZZl@^@W>k7$h69?Xtu6G% zKM+Q(LXDJC`3Be{@`;S;`=jK%KMMQI9ri-rJ%Fwl-K8n8`+R{)=bIJq;I(qgxs~+e z`Fk*XcycGoBW`<639bUP4_yQ>G=XIAmyXRwRY}x*vY$AS_OD$x6mR#~!KdM;Lap{8 zE!M!r*;sKOmXfWQ{NuRnK!*c-o}=wQ$1ipUbS_M{YV|{Gri)^xoh%=lB-YPBqfabn z%aV_{p9tUVByc=XPPqkU0*2I7Q zCRHJ81`&&T`{|8Ro#^BKhUV&?PB*=SKB!!c<#J2yQ6m#9zpgEA{%celVqo>Wxd z>YQ<3ICksce%bbylqe5qg0!@WeSzrD#pZ$jHgh5+NG*2kaE3&7J@p4TMUm@*qjzPq zzzYFpcM6qb5EF*X_w-fu7)q92;|+h*+xPrI+wAkGHkdfn)!1XjEoItihfy9~AXVQ& z1E=5J?V|zS^-oKgP*f)1znwRl#<{OwtOGD+g|=P+sy&j%DW#A8n*%X372w431bD6j zkQ~kj#T29R?vQN)(lcmYp|IU^lUK`ym-A@K-!$Szo54@nX!}6Cy?1iM_*j{GiNZk> z#o$V3b}!8$3)@FGYPwjFdHYF|!SiN~t;Fy4k_)KjonWBT z{{%kJM2b#GoUiM^K@SOW%YhX|9#HAtWgz^CsLVzhLyDJExm5||W z#*IyNyEM3kxz1cp@Jz&EQr0uYij@5YVt8$z)6qm9e&>72AUfZZDa#?>)>B+m!3(w2 z1QRy_`@ikaw1-Z>*)GZAvk--xBy}}^Uj2#!*=n0P={+lL0><5_UuZI?)-3nI)$rmi5OHYa}JQJ2LAm$~% zqPHu1Uhw0^4b!J7%fVQ3V+R{!cTPwnTn3j_o@tKwp^B+M{NU+=NgQJ*s|tfVIE1t# zdZ2Q@!RAKuyZmM`;ugW*1FS&H<`01duQ)(-m40A42@m%7M|fz%dMzb`55Su(vU1mP zgKhBcyb%|;opNKf1#w8AXfXrx>M4=atzij8(fbCHk2k-w4c38J_;wfjaeC zSyj{b6o51zoy&FsS~&5H2HMEKK(B~l`&Jm3-fof-DGm9!Oc%*pfg+5{VvKD2A?Ah& z`}Ky_VAqD)y3LMOH$(z2pNAa5x^!r>`^0PXYk3<}v{6rBg`#qyEPm}X=dLZd*mldw zJmc3$LrI^Y{X}0>fp=`7AC=Zik)KfPC9Lg?& zvYIeosT08WLhG}8rS}2F<02;J-oDfe>$ete5s#pnv`aEzFZXG{{9et((M;qjj6&gv zjJ2rU{i7DTOcC>X&8!Kk@PTo&3FNP}rNq1S#{&cJ{*Z;B74)^@|K<|VC?)A(4;U#UaSB3wg)F{lqD>T91NY}}x7R>UoIX(hM& zT$Pz# zH2T9?3AWBve7^qdn>0pH_VsP^&fZ)8RKJi8hZ3zc3SBx2s^wb5OMm@=1zQ(=47j7Y zOckB|iHzwm;^u^0z;D<3o=?gcgCkqo{OT0z9h}w)u?h`SR2I zld@Z|Dd#`suV7xgHC<0-V##Ee#Lz6JoqP!ql zK~3_NsoVn;UQ(;qm7k(5*;xaOYEgwf>C5@-+`DM;(C8d@wUP7uaR|&H0};PAr>Bp?Y6^# zs!?RwF59WVmM>wo-KTtBsw-GQqCXi?Mo>PoO@$0hVe>IEGA41`Xl!pZy5cbb!&lS^ z5)SA95n*sVg|e|ac-4&cDCV<-;k5eSt$pvo$jg@?%d{*_#kdUrZA0Hb2I=*LkDUFo z-vAmW#^sO2l|tGK!f*RvL=clJ&-bkUf%lvk7ck57JUdd8!B~8-P^JB`JpSHk3p;`u zT)p<4_wL(Td-K(7O<<#&f-RT78Up&h{TfkDyE#Ygq~CNLmrL|^u@^16k|k>sYDspQ z37@~wvLc1bR_k+Xi&>tkf!kSOy6eMZo=lp$Q2EaypR~Yrnap#B6Ofo;D7RmI0+($-EWtZ0*;0;_TkO zDdOamQU^2Z9C5N3e&2s~-gUCJC<@%Kf$=rTe6>L_aI)34A_Yc#C~sE27&>nTo2U~7 zOW6PZlUZpAE(dFH<7A7Y7iv@pdfI!!I|tfPW2GDZthe;9E-SQQNpaj~)nbK_CJ29H z-wD5#U$55tSBZ3`cKN_H-njf#OTr$}XBo@Z6X0>JFxvEIqE;{BIXMm~G1RCB@q$Kr zQ4-V+bmvsuNo|Ay0J%11#$~Fo4P9wvk=o|MHw(UrEsz?tt$ngZp2BmOzqWmvrJ!r#h+-2>?ri;St5|*c*_Ii zOTw0$kFir=@oIM)kc_n^A&GwPi?h6CZ9xGuG?asnIH8*!vT|_cfVjLMwkPOyd=GqA zlI)0s7fVk09k0W|Rz$6UX9m6*Q_lh6&m9%-q*28G)VV?aGFqy1FS_G7oUsVpR3l+KF!f* zt795+_dkpDq^=9k;fiP)vm9|9lt=BE(u52VnGILBrGi3;sfh#R1G$FaaqkJY0H_nJ zsceOPD0Q$rNfn4L#UaR;k(xb-0;@7exS7{WSsIZ>G#_CmrtN=NKqhV5$bc+bAJ@Nu zJn{Qvb9NJWus1DqBaBrv6?Y9rstq{9z!W>e5F~UQU37(>mxDsR*LKGFf52+`>=vdZ znTX%Kh82sj{KDq)dl)5wE{p7lL+tJE3r?72?RHBo+K|cf{)HR*`h#!7w%OfUPYT9H zlbKVdo}=M2Ndk8`g6h<_PXx*Bgv>%0h`c0YiRro{p%susQIV#JiBBt%$43~ zdl^Z_0FrQ?Dk*e}Nope3#{v8WFK(7WCwakIIToYd;FkSx2RJpW#sV|>QPj)REy#2>tpxCiw4EF*!xenBJ~<6{7*p=zG~TmUZgkW&zRTD^hnVQ;G}~9|XMlD&t}ZK@3R$Gh-KR1S z#zEWG^e1vk6G$lSfNWq~9?{~je1pV{oN_8dH06UpbmBu&4%Ql}9IeD>&j zUO=6#x64IK92q~2yCF?7$g7x?^WIKTeJj^$S~cP9onPqSr)b2E+_Y5^rTH*%J^{QZ~{hVj4kRyg*JBs0&Z6M^EU6UPoOA;+4NHe+7!nl|qrNx&olp6Z z=dpgstU|ljSN&yPFSN-|5NmpBa(tC^LpSQ))Kt(ZV3>b;vzE}4s3=lO#(5+IjIywG z(_SK8Y(x^WRf&WIJX~9xpqWT<+JZv)PM0(Kl~_)2n|>I2R5Vp1!1c-pm&-NTYLg?> z{O?X=8ikXd>*;8-4yTgx^rmvHo3pWi*7Jxt-RRH#RkpVf0x|}k zF38?#06wZ#Jr8J+QI&H=AaU7acUWmvw>6_lxY(ap1xZ9wQ#=4w7TbNAJ>RhdK^rew zYp%)4{SC^wAD|yU!u0sfK?!w}$sooF8mL8ZQQ{r++-m^Rz}zHF1g4UrYb=-e1B0-o zX}&@b9NGYBu?>KW_`33lDz-h6$%ikQ8yE|;gc=t*yrcJH*^!E0CeVyDYSo*|*Y+<) zlMC|X9W_V$fI#Ka$f{PUGuzfpEZ!4ouI21p=Pd+dZ~ncDM0!Py>Ulu_im05E9t$(> z=fM56uB{F@&5idijMV#JB%2-tSy}m73E_w_8ELggr=Cm zEp6^CITOV{p+ZDh2~qCi#`#(Ul9p0(--cm)W{q%7Irtko%hy^)o~AXG)pLtRp`RjM z@;Q-^7~!{Fa;>I|jLRFnuD4T?H>Da5!3ox{!Nymg&frC&JmO%CU?L49@yna;gC9Sc zN}!bifeeW`j_nBN4~hsZL0oRht%BH16+|qIK%=pDVWyOSzICqMY@0POYx<$-NMY&M z*^#9QCGS|jUl>Y_*`Kdgz1W+bZJGck^I$tZ2Auu31;b;wOeYAv6x|eP`aXx;7deRI4x%+H)yq_saFbpq9N2oEWiy=2491)<^><={XWHWZWM) zS!?qBwfZh^3HJx@z@06jJTR1nkY>7u&Wt_1Y0WW-KK5tqZ5Y@`C4NUDemCKjbI(Al zWmqI=)9<985Zw7IGPBO6TC^ngXQ_^(DQwwO7!wp9*bJWQv-1sFEG_A8@GZG)<}DBj zfe0QXc-*GSPf;*rN;J^I1=<{+G}!gU2pG$!b0v`f95w&s5g4FELiug#D?!SjTdBj{ z>i(5y%#;No!ze3o7`w15fO!z0C;(_|Of;t74;m8EHWUT*&G~WXKxz#@#@K0h#Byn$ z394gL>LG4!^aN~B%JJ>alqN1dXI3vW&CKhH(E>gn%LqO@6e2&fEsP<`e(Lg)hy`3MzPL4X1;iW$B;wDrt<72 ziFzLh!@6wH%%^bBGEh=we|l+Rzi2o)(|$R7AV&h07fBMGKAb_4QIec8Tgr9w{lc)0 zTn3MPJ1SM?JTM#UV+J85nzsRVGG~KaZ2Apu2s;F7Z3CJ(P2&KdU}B7>L4vr}wn{E_ zPVX7=`jAK$2pJ)ypu%g#Fjb3|%ej8u1Oq*4z-M_|>j^&7)@?o1sp9^oPb-=o9~qOv zwXA!x&CA85V5`%4#=e7TJXGvYs(?Sq$2?_<$0a|9H6s5Z_*+x7*4s; z$qwNfvvZH8evzt@f(jX$pjvvQQJvwgj56g$^F(o3v*}MPAt^DKJL(9;6LK;Tw0Fd! z0rL2Gf?82=A~!GFi2)cTW@`qHD17aAevXqVSSUo{I*?a0cx?N2|mRAVo;_W)O~#UMgql%U!Z?uP4B+h~&C zxSRuUH7~R?Ksbf5;ywFK4Y`Trpx-dQCCP)0+!4If=t63nxiKz@VDrkgzt4?l>ibmT zYqbtvuZtq6^$f$`2%Crv;E4iQY5b4|7|V;TZsni>6DNhOpWjZfcg>gJ6Ga?SVzZ!sSb@fkR2j%4b#}5j*bnljXhe9TEw@EL`Kz5xmz0qyA8d8H zM83TBbG5l$uI9Cz-ql~A=HZb3i#r@=b6P0q)Y7<$v%&q`f|nsZFN`CH14Bh@yw!`O zivavUvZ?bp*6$}CjDfp=DY!G}g8*){9OP3xDu{7^vtJF{2-cA+k}Y`5JW%sTrBqvI z6z}T%s(u30gJazs>0R$;hZ96(3itALoK5k;0C80|GwLNtBXolA`13RIy*F=M04zwT zTY;(}^#$b&oV$V=i+#88ktiWP+uqiUJWy#Q^Dv&4Q@|PHQEc4-7S~rtzu_(eQ3o9U z0$9Qai~Y*`AaR?k(ks_5*1(KBo+^y;t_b5ncU``!&Lr^h9J;QIz0zNJ4sIKU*^IOh zcKU(lwN-cUW01Z%a}Q^|AUk9$O7y!MDF9YF115BeSrsB=@tc$hxUiR8q(x@cZC-EB zuU;mO0^IN;Wk>cyDf$Y8$z;hBcoQU;w_$~n_~-tzfp89|d#P8=d|&lV4wTrIV!Gs6 z>)s}8pxg7EH9y=^K^%d2fJkSt@MCH35Hdu&XYx(Ofqhr9Y4+I*Qdop4ogY(eF!y>Q zc{>z(rC}a%`-d>xShr#r27Xx3>^V85x|BFbslLacWTQ1v>$urtZj3Rx6SSFQIf+E=W*VR+p(XrwK%@ueo$*sW0gm`a z2Y0%|&*wrqNL*f4HWIuPM4<^ee0M7zYn7{P0L#T8zQBr?-T7@RgAC~@_!INm7czWRQ}5$RT)EnF zv4Z_wv&)|Kl58LDG|jTdfiNn~JW*96xOhd|Mv7)zgmaKgFb$@O2KcuEkXZSYE%x%K z-JViGS>o{p&pT{fj+(MZoy`ZB!nX(Yt|xh~ruy_ni%~5t`z%Cv3Txj`;H)ID5BLcY?A20L*`M*Zno6o zdALj%=vz_%JsJ2y+7%tHkq?p)`4P_tBM84frgI_Xvdme9QurLOw>k!;=z%)?<;Lb_ zayW-qHf+SH$tzhbo_62>D}H_3=r*ofKtQ%`1VQuGTFSC zU`f*vsOchWJd15acMM&ej;A=vc;F6IH`(J(3sRb95ylBE_QTXD?Z2oGsGep%pd-#$ z)5j(yEf-+}ZJuXMI&rX>oWXCoAzG;J-lh4hxeD35IbVmbXs|-q$x`1)diz~}=Cxgz zun$g;n6bPak;=K^oxs7O@HAa?yHq3iStWWKxAKH?GBq! zA_QfvI(`=&^x;JFH|gmCIAkf9wXVbT)+%@inlk_@q9|ClSne^`VrcR0 z%Nmeg#VS+^CnUE?l$tfh3Mmos+VljN0gJ%_Xb%nGPI!GtIGz9m5(?rg8Ohg_2R}SJ zsNa!lbVG|Q78|UrMlw`Z92$fO*I4KGF9>)8b_7)^7hp9oEgEJP$!9F`<{d}lckg6BjjeS! zON#Q5{8NzMp504?Hah?E4EIIZ>0g5;YnAvO;xo27#hBR$xmGaoE~|v1m(i)p)D$JW zxW4C9n|}gO1_)@GDzpcI>Wq&=Lj+*oE#D_{CC0)sa1{PSe!u)qB@>`i8ISQG!y!1b#VxrT6PQI0?GhbXuLjf3&-(L>d zjIOAR64M38jSL8)$u@XJsrZvf`=;YrnHw8i#BCUUM#tHX@hN73yM?S~CquMx3!3oh z;NmRouRLu0nndzeU=izqr>cX9Apb=3F=U_)Fdo9XMKh-yPW zWpL)7aS3XjT+OH|mX1dN77X(^MyQhvNc@wVR0>{V!=UkeQ1%t9jxRM>(KhKknR)qP zZ~>GAwdz#UA^Gl4O}s&FF9Uj2DgnoUwLb|<6w3)7xj-!*Vi!)5n!!>l#P{OjKH_Lp z+h8~e_FDMtQB5-Wc(znE?72Y)3*bqF){40N94lP1|195*Iah69kaibE6nk$P5S!-9 z{{R|&yAkvcOEbQaQvqjNdOkyRmhk)Am)?f)tjk2eZX!FvI*jDg@=)AS3^~mC3 zB02sIXxt|8#`VFL>H3D^!0GC3@8eZ@!54_A3`z$QsXhGj{T1ANjk!B^JBtVGyZ0^< zNNGaM6?sGb-|1-3nwMyao-IiiWl5_JetgWnKCVb77%SKv^QU>1{X=)0za;&cGd%2y z;EMBWb~ZHT7Qf6ePmWfCF2sZDP19FvQr5n*o1*QG7pgc1oB&zkX&dCh@Ti&8FqP#U zA`fznlJD+*7JET7b|o5kf7|<^08CE-l(Q-jI4`ASKnn&oHuZEnnD}3~y9ddy!SX5f z;mlj0L4Fp?uopoWpvm7&E70CN3r2xwMK&Iaik8wg2K`S{zGJlGP9{zp zEa-keS@hrxOjWY5UulA+r;vr0CVK7IHoYaN=FFeq(Z7?g#Zg}aK>}y~Pk}Ip??z5- z)&d@nOu4^29QV=LB|=wmqiMzGge8AC%oi%$Vc+mg`d^=E<$D_4=&w5)#;>(<3N=&6 zQ1W5|fa9AM7714NJl%&k30|z>sW^#A!T)k>!)qumT3wBOc9{0DdB7uMuti&HbhBAY z!9Cyi~?U;iM|XmDp~d+ElFcj=hKff$8Gzv9SuF2$D0tNUV$#xNChEKM`FkL_~0}#2~*rY-Gn`|EqhEnAznYl9B*gcag*^ZiDYdw}5=J z`-1VDv};J_Rl^4PDa$jPC&L5W+$U^ictk;?5;tWo)g2GV43SWo+epw|_k(-Z!D#n) zp&EM>nSMpzeSfIo6y{r!^IFxXq+P_=K4JBaf9A}x&-Z3u=H<=s$1s4`U48U*65-~Q9uMX~G?oCI)oV9V7AHP+XrbHm*JFQg(ZQxAdX8Pd@hiyU5;!XG?PbMi2d^dU4E40wb@pMQk29!!nb0 z<6&YMMB^;5>uMR&eBm^FHrwXkoOaHTcc{;`&0dZl>XMBUgopppOnD95ZePa6G%eb5 zm#79C%7l3C171$dCN&g+>I&O8~4?1Q2H-(A1(2P)&-jTh{@&&5H7>lSw zPWr#%u72W}0WA5Q9|QhKRk2mDNJQTq+EE-d-phgOs%})LP?b_p2BQ2HI#X!k=5w~4 zt1wJ>V0E$KpUiDiVZET_{&iR~vZ)*^lBFXp+JA!kcxHs0bR^%)~;vy24D}eO;;LrSuIoM3j-hQ2{0l~o=quUu4T5@m5 z%GGZ%c{8esbv!pWL)ov`Y(PDT{Zodar)oV|`vwe%hb^WD)b;&M(*h#?klP zJU6a&7ZqG^-Jg3mjB;!?){e6ENG0k9ce@!W!P<9|=J*GI*vaU0=|-sSuc|E&0pO>F+PeXsU+QYWbKd*2CGO|2f9ryN*Fp7b-=g+UX3OELbg6zQ)& zChv`XV(-V5`1=$BWjnjU3s5A93qH98;;}@K-ay^=*uN8tkao|+UQq8r@*!s5*TXz# zqbs6RIf0Zl{2#h7ujbs!ZyNjk(<`aO5OUvk-uA{VSf&wjOzY4eg)Q`Qob(HaGSFiW@DnY6=X1zWf&!BN-waVAy?C z8|Hk9=sokZ}^W#l>%T1?0@;xTSTEcFRQVVL2G6UjOeu?zr0SDsMzO%oRgTmZ*1=0Ld zaIIFfY#_N&bv?+4s0mp~{(G6m^U!2`LyXidC9-ODUS)L}`=9GP3}B%{RN9U*_eDP} zTajR#T!Gq$7Gv0MsGc)L|4o9tv1Y+FSHe!DQS8QI6DXx8?2iAE7XjH>KWH!}IY42j zz2d#^z@`%x)&-zy5`b4rIB&xcr_cHxg||`a@jUvOKtS5H&=-u3O>-U52|Dl*d9GFc zBa7n;6Z$P?eR&?E|1`x>t>pba0X&>~tFJqfWU{DHztOeeIQ0=ZS6L(*?b-nGd}~w` znxhSPpeLZeq3k2U(!vt&QdM4Ove$>t48O4(qq%CP05iVuTJ1k8I%64Ch#Y3dXT4_r zH%Fl)pQK;q%3!V;gl6^8qHlP4Jgi@12`VjM259637W&^j^3%}DTY6=c{sw;C)~`SK zUgtsL$O=kt9p~==L)&b(G9H(s74L8+Ne98O1jb$V+IV1xWT@*H{mi_|q;32{|p6SsN|id)OyMiAEwM z+G{2*46))^98t~XfVwv3kQvX4dmMY@Z21OLYseBjiSVa**ETozH;4Z(n26};M8K!m zZCN6jwW7uyfIhlp2D5I~*b@6|tS>6Z8wMKqy&!+#l&_m|kZojAiKXn?=zCd#)Flu8 z{V$HG3@FD88UxzVmsk#MhE3Tjgl14mbiYa%Lg#f zOedQJuz{KV8J7|Gg`Z=n^%aw$2`3rPJ)XW^o7~FsRrlQI)5WMptqG`ZK}8OyvA4V^9dy|4Lg}@f(XQ5d6)HMb5hb~_ zCRRWw&3+k{(eeP_yi@Yc^aM=#DOk_``}*5M1ZsCspxSFBUM~bA>+<^&Gl- z2O)-qedPe$1OLb`pkyf4It@8j5RZbORzenCD9>(x`^PjZPMD%Ch&lj0oKAuzD%~l# z>XSgnO|kT12GBihKmx95_>Z#ax0`FxzOQ@BkDh6(50Br=@N#H&(6gy98rK<#sr#FW zbQsL53s8{S5?0%c(s~1ndUO;;UT?Ge4*mNBugYXfc-S`V+F_zdO99`7f~u1Kq~Cqk zv_D*G{j_Hm+&Bi6TjVyy%EWY}U1HcH`M}vGnqkp!`NM8frNa1JRpB|D3zd!n)tr@Z zw@2IeLTehl;g|WF1t#qQy^S#{(8!VdmZR2vA&5$61KL3AZrlo*bV8%oaUS>{6iC|6 zEFh)&wT1t#9j`#}cOZFJ)mSn1Abn>xG@%9-^rn6B-E56~Od3hz#D6z|*qE5VN_0vm zqN5h>`!?+t7K*V*V5`ylywicNLmAm}ZtL&ncsW4!2VcOg*F*Un`d%qr21Wm318tBw zMRmwfkYS)+I!-=(sZP+2o?z_rO38+Tt4;PsHCG|-yvyOD_|8~vsHg02Zo6&vO>!n- z2)qy_{t6J*@qh?i$vTl*j)S7TaVIR3b({aO0id z`>K&`3{*V~AiYQ1@E`Ay{YdN&tbO=QYQvL{X5Avf|5n#-h3av|Y6jHLeq1m#SNAj_ zmdzWJvjc#~adUXrIo=o8W`@Wkl_0#2S(7!J+}}pn6RPOsX2-uB&=E0;$fQtNG}bpS zcT*=0Gf%nH1JjTg!1xJnkW+^NPBg&hi^?I#Jgjku6F~@Cl=q!ntsUXBqI)3Yj6Q5j z&94CLIwvKc<(LFEMm3VAOqFxBD0NthIMfZr3Ca<><7M7MPtwj|6hI?BUn``p`1Rt8 zo*x!}uoT@Hrr6@>#T#L9ZMAE{U|9H0Hqp|H+T<9f?xE#d**KGoI7loHKT!S%R-}Uj zR%HWbq6Dn7n?ZTf~l zHGfx;X3HAxO>PMyU^jvA!*k}a=s^PbzPpq485oOUD0l^+F>!4vq`jVLIy(M&M^tp} z-#?@^4b^eo8ga;IW>9374Qr@fs$X(yNJ(-(SDd#n8fL~dy*T*89z!MgQHOvRkj4hE zPsM0N&Lc8X@`U(zqA0S~dNa&rMaB4cp;FMUDzcnFi8Cle*jGGc(0~gU=x|@gq{VfF zVpC@`fUp(Udq2LK@HqFXMT;ZDKaZiR1xv~Q|6h*Mp{Vh^=uBq)+TGXq@Plx33T3J* zqL6o?yVPY-jeWkeTvr#K0VO_CG^W9*!>F7z!p>4pbu0Fqk7cK3fW7UmnU(|Iy|x`C z<#TF8G4Q<&`~cV6E0;PQzVSga`gxdyI-YYtYxG-a?+pMAKye^`3BXTc@oFbxwv=7L(O$2qr<(d{~(`~Jc3Gq9vz^SuXcR;iYo%ju?? z_d(-U^yy>qZwZ*)9R7hOZ$On<{%bdHA(ql(bqFNc`GUa_A~ zJ)hhRdG47-%=@S|TwtO}M46F0b5w$-Ffm{0`yXrH@boF7DbwyH*rJ8 zx+{Lg;r|v2kC67G$>0K6&nGr0wc7PQ1NjR7R%hFrw#t?}D~Yng&*)`u>xaHCdE!#JnLx z+V~PKZsqpFhu^i`_L_Jv^tp^%ijodw3Zw5_q#izj*{13n#W7byEW`X1IxZ!^l?B_D z5Ju0ap`FN~dyW59v9^3(UIH7Lg5ikMUEB8GRzkFy@;_5RiGrDnL9sn7=C-NIrdyHW zlHty-U-OmB*0gDF9>M?mq5mkG!}E$H{>7=>fr&2lKNfI|p8$C4$`A1II{9Hxr~J#e zD(TP4Uw8a^$6@fjzexCls_l0xGIlhE+dny^dG)&I`@-Fbo=h_Y}F=;T;kZcH4~p4#w|XcRsv=7A!o9oJi|1FHFnO4-WOlm46`DwA)Me9FUaAsw$p{ zfOMn}@P;at0YPv%7FLs|oN)`3;J+*0H0$B2R)Ias(Z9@|FbV#_ay7BC}(aw!zF$yird z?SspBEF*S=QYq@)wA3PStyelfSONTaPF@GSboZbT1? zsV{%HxFXI2d|I-pU=>0AcR)+uEB$o>Um6pLLoTy-nF9DbN41$Q&_75KbuE{!o#rO9 zzF;I-^&iWUYaf9%&!`S%iO)k%G|9YHyCdy-@nLyVZF~Z=*1nNwTC2J05Kbc3KV{qL_zUyH;~xPnF%=p$Y_WU8v;*S=ANeaxhN+elVc_3_wo zga)=4VV>d?0yZCqZ49OUw{rE<*n8t~`9gBB&m!~ztu1S+w7(872|O3-rKNzoA%^P= zVVBNaMT)TG-RVC@X$O099)S*(SV~4eFOitb7b}ohr?A$r9iEee4E%^XxqN%n3~B#X zHRfdkJ-EdeY5K!4}u&e1-h=}AgL8t1^4~h2lH0%%GC(wgu zT(}D6J%fL7DN&2V_T=$yhUqURg5#cW^Nipz4Xk#y*+|QHI>h*B(imNh{9DtUTeB6i zXBc^Dg2>1&q?LlWBmr^ETio?+7TZA1paPBTMgH@`u4j6+?V8VU3Gk2E!36$nn=+hL zC-Hfmq&klrwYioTuqi|{vn*obULEagJ|kyxC&>YQL+|AUu-0qkw+#a#J^J0VnJDhI z?NoWbSI!N-6RtJbN~sxgA@U4HYsgRwpurZ5jc~kjn5Z(7E!Qp42+5JcfaJwJ@=?jA zbhuNqe4xgmykaB^rVr2UZoH?o8BQxuz=1EKpoJToL+|Q`Q39xQK86MS34%QX_HV&m z1zJ77tRwWPva0PHyH7Kr%hV84=&St~I)A|*~CI|10VeQ_Nx_jfCmdkyC30|pjCg8pN*QqK{^qW}tyGc$I5R(6hKq4r}rQ-gi5D88M8I(%4Invz@A+ zflJ;yQV7az|A<32GRz^Ayz+f=LD1-Lw$Yj~O4EvYX*v7-qSse}XAj9>avWnRy`lgB_|(?;tP>x`jvqk73}6~kA%9J`L+uZD zP4~%zO&EJXF;vATBLiS$p*#%6^n4Su0bpBvmizpE;rFx(tV?0{|QV#Qi(4o_Gb zkql*?%zUoXR;$MYBhKC+6W zmyLmvkp!4>j+P&3|4kgzG*zoPNTC5VU=s^J4LK>tiC?62xlHW;V#%KH3#+rAuIZz2 zk!0PEkVv?=B)N6aJt17>Xp7Rb1tx|Rs&jFmkzdm!BO0Fq2C|p6-G7#S!n6|sgE5}B zkeNgtg7o?MTpsMVNfhD4r`S>IEl0+sx9h(^%T*Q|!2BaBf6PvVkw6iC^6o0S;XqFQ zSQI?HAg3}Dld*_-UFpcyl;G!&pS~JH^Fr7N`(J;5?L$my>~hyDgQW*r`tPn6dV_FJ zqckVA08>o*>`-%xV^K|QOJJt_4a|7npijs!u7ambmg&D_XGbbjP7xMYD-d4x+Z@5& zy}v5)xcp`DNqsl^n`@CX^33V?`R7WCr9VAg>Xl{`?B%RwQXK)3D6pYuf&w8zauWYR z7$`=p&N}s~Or=j$1Xhd`^|Ki=X~}aXCx@9!Upt+W`LGFW^2H>>`xhb5`}} zqNg5c&E0!iAsyC$xnS~%D139mW=xT1v2u2UNb+!l>)bOG5~vo#pyi2ECzZBD`1~IaB(M!^!w}9cX&JQIHj~< zYyA<)72U=Gd*uL-xYk*ubDKy+tl2xUii9fxuWH}GNT%rLEEg4s)EM!A+3scPVANO8 zn9se(7k^Bw(=?B3x`MkDcoRVQ_V?F#FwWgvGzQ%_UP~xwCKHC3iqaf{LuzM*4}^!L zhlfNM_W+ZB0f6P@#zUL|+5F^_9U`2|>n2}S#*@5_P##$pCLBiIAqi&`@n{G<3)7$N z+>Kz@tPR5tnmF&f8p#rU36bsj_Wx#XBQ&Sy<3Tozzh5yJQ3tZTkfvP$y9uTH%l3yJ z%9Lmq3?&aRFuyK0D7fo=WUFZfk=g1Iu?tE8=aSJBAc_-?7a+~=;}5(@$E)g(tf@!^ z-XKi2p3WVi^qACLwCRAgZp=`@*6BJ-wg3vL!Fc zs7$vq%hN*o6=s_8TJJb8wQ|;C3f{wuIE@-KL8nd+fRLmkXFArt@S-HpwiIC{ii?PJ zyGsAKJyI&NT%DYpGPZ6QC;1Oe@dVl6Rc0bVrqWY9>|*dQ4V*H8Pg(9oywiBz2c$)& z^l{AdKYC)ak=1-w1GbcpU6SadMxEPo10VT>c65Y%+SGz!?5giHDwBQd`9Y0NawyBo zQ6<%=bM~aWm5RxL3-}pfFYlpZ$AkO|0u|>0|D)l@5KHO!cJHP}M4TivnyB}XRB!J*?SVJb? z>y}pP*_$DMs{fAO1X09kI|GOOZ?s6UM+2t|b5^`rfW{LD@ zh^l+px1T?x;wnM0vP0E_91H^0N*VZ%Vc{TCDe8R0(W3Fg4t$vGWFTV9UYCL1(v{POeMUB8pFBq*NTC7u`cA zuo-dng}(S4foS_I<1d65MZSg(!ef(wfAUen@=ISqu)44x-73G42AmP5gkqKIBH4GU znkA->8!}sE7Fr(^OFVQ14+N3FjUetB7;$8q6_%Rc#}!cC;n8SVD$ggQD_Egv1pOql z6kkol%q5d%hI$iad{|BbDYD)3$$x>~aVl-=2BXL*=mX~2tPXz64rZp!G?Xq6)OIGQY1lor*OSU~*52K2 zrjSe;MpyqWf13u6rirMsyD&V#wMs9OK+A!x>j1Oz0u(Tj3k)H;+aialvQ*$s(1|kgzCG@% zL%7I=F+XeO;0isBgScC+`GGjD8js!AOUl@aU9L7l7=NmQf)m`gWtYq8?&Oe(1bjf( zxG(xqUO(}P`?qDxKAa^rj*{F;%(N2oV2I@Jm5Egk#b^pR$b@Kp{hCz0u21D2x@yfD z9-l%Z8DKDVq~E|=Ps`=Ax4-2ah(hNB4gCtbmsqNWLwp^9)SaXbC65}ThP&;m(;~`58Oy(H z`33#;6ahvVyp-Uchb(MN>7lRDAAiYB>hk(z&c5=O+o(7TvZMPF{6Sveiv7rNVIy0V zKT$`q&=>HTg1p3(-`Hri{H+>?xu?rw`g>BzmKF$o>9J9QnZt8Ua_{xEBod(selU); z_s;{m=1nxf1dxLn)_{wVo}Ju zs5%uLKo|YzR}!?+j{Umh>A*=kj`SUrkAYjC{ZY3rK5pB;cnya>8qd7!x((GZLQB>J zO8^`~389Z47yM}<@}MFkV+K~n-!&u7FBQLffg!A}=*?||G!7y~J(9mmjpQyFsd(ia zD`1T$FN+!E_G59~I;LRt7V!A{YS_$OFp3CQP7gy?pN?b117P0lF$m&F z#7BiGGMeXpPUq2>kZR*7Yi7m|piwtMy?k%vt7T2ON;&{%xIx!nY)JNHSbHthLUzpt zdrCTMi;NX}{gG1Nru!xb!)yo>pN2th@j1S2Q{DVuc7ST!1!tl3KkdG20@xctB~kK9 zUG-RqYcJGoypJUKjRZ>&+p^QbjHJ;#ix#%TZ!$+fDo+aErH3Diy}e3zO02b7Zk(?& z{cx&kq6#SPl=_(uJ+j&2(_`&uXenGF)!844aRhkrQcJFFdJe5+lum-6;VA4kyATcl ziq9QBtcQ4@z0#F}{V3fh8EdEOU@R&Nl5$I=Rj5kVz%lg<%ef`w{MOK$GWwiPeuaDjyIq^|S1`%VA>wv9YIk>uPn_)YEa@ z$4MW)KhiU^`*ej1YV$D>ECoL2vHUaGK=})jb!y$&3I8B8;jIuDK^k`;MRh#9BsZc_ zC$E_!{a#>DqLD<7ih+u%AnsHA_MUGpwVgeK8<9F2+2}H9rihz@_-8t_V6u(ppP^v*P(W>_VB2ZnUH-?R>&j4jSvUt4u*oR+6wf1lrVQ>d|EtP{uwpnV*@0&Fp6_vOe8 zOPqCVJGO49od(5x+shrCVQv{6OfkA{bPl}SK02Sw)fO3?mmCe!W1S)q(+a6YfPA$b z|J`3^QfHSmg$v!{9^gy$f#XC8k#^i7t22ymH~VoC3k3cPU7|2R9qx=PJ=<{DXn~J2 zY3h79nQbBj5rj4!A?Jp=O_Hwc4*O2s3!P~NBOCNig#W5hV+c|?uUhjW8lz_`+$kA_lnMYI=Q zvkQz*(|lv<8NzVDpyQR9S|w(Mc|ERI#_yJ1m$z7Au{EQf3{`^3fOATRC1jCk+dWJn z{sbz$wik}Of-ZSRH2ZngY6qqeMpu8~iXc2AmrZlIg74-*T^^Cuk2TY`DLHe%I&gYc zOWG*NG&gXZ-c^6Bbhgu^LI8ajBr!w!C*FX(**I!y3xKQ|zx;{%fMppg+AP#9fUv*n zh@qRjxjfVzRZqYrd#@R;ur$glyH1SYlOIK0i}0q7pxf*p3D+dyAE4@^&Li+~PFXRI zi>?QnH@j6>P3dYB)#2o|w?*LXVI+sa#Pht_Y8ZjZOdH`+aVr^}_R{-3Y?a#-@G4Nq zVvqHSXjCA|&!Men_Sk3|ny9@`a#%zLNOnidsizn}ZG7-;uM7<$znX&XLU}@6xAk-* z|D~GBKeFQWickE?QYd|V#uE$Sx5#OCiji&! z`>YOE>65n?kiz+5hzj+ridBDQfHPJ}ZIaheR&NI*v^g|$;7zr@2*+*1pzT*22AVW4 zp7+oFzI>ZkOL1gDH$1t2Xz_*aFp7LQ%Hwv-9!-X|TgwDVz2AL{;$T*zJ}Nlw)Eb_{3M`+M3{QsF`AN0?~b0T?1h+T~35pQO{rAK0+&udqR4 z>Hmb|j=1OGi~!KBwCAKcJ0%Gx9&d<(QCVrnDOT) zyfEimq{c@k#Due_o9ryi@)ASuD;61_mI7`8e&&-GXV)R08%;aVI3I~GM>a9I&dnLh z)plhwW<1k?z)`UVcJnky2RcFe?gLn@@i-izmuNX#ZGn9w*95t?2f<`o^MTOPv1A72 zoUfT#EAOhs2kn$QTBh)xS}NvZhgE*rlNFIhxU-&zYD--#6{m#X!?vrZL1It&kPz)I zN?y?U&GYVxtDolmsg-5c(?g4u(&w00{r&WEl0f6c9H0UFXtv!^6liS8Pa;E1$X9ZH z3YpdTg*lxC?r`7}k=4*PdE9Y^^2BmB87L56o9Oz61{wh^vqLMU#54$KB$_|`eWMBA zWtD*a-M;^IZZZLjOm0>zLp^CCv~}w9L1g?`z^1KJoZ&zp^k>u||&xHa%7ehn;+yHY1OzcFZYeyYVkr;nqWC?liH&OyQC9Xw3e z7}#@?xnpyam7{EjakB!IQIdf@CG#KpB#orb?X)U+55;-T@~#2yP#5qSbv9^TUb8}b z$wp61nIQ1+T1(yWcC6vYo25xPYqf*;6Mr*{AY(dYTY*syTp+z{Rh^a6nq+)pfMxX^ z)=<;5?4gecaB3fEiy_D<+uR|lzeBfpyB&nmPBO-?DU>*N`TmoC9)Rtf2;qz_Iqz+O z(yY!6lDhe8hv}mS;^0rSC_)(|24)~d91bg@Fk1?7src6LN093qFzJG{kk%$r6cGvGw`^*qgQDcW?$s9rm*>INkCS}ALx-2*TSzwoW0k_ak4PNM- z!^0C1LDlsRH9=(voRK$Yofd-|VgmhXzWCX+e>3>73Lu}^jy?wtRl;W=MCt7f1efa< zn~AXJp5CzH;EA)HzQ{k$dGUj(Y`dsL%m7@nEm;)@cN%0A_B#et#C1Fq*BM^Jf8jsjEIa5M-XNO4N?o5`F`!?uF z)qid?xxUyhRUpB>@CNSvCl@ZeQ*_GfADHNqIlqL!7@(;;pdn}*cVjF>>dVqJKNcR7 zb2sI-2>MCg(FU%F3R3*cA%#-4Qpc=uhVNBj3r%2!U2!*%?@Xt)KinI}5X%swwaJ6{ z!a6q=kT`0f6ZAExE(#6v5ycifr0|pc25yFgzrk*GES})q-7K{nujMx|A{hdW^WQ8} zt}^ju0Is!C!MH;4DwXQ5VQuK($)KTFM&(}kfJ`QHkw=KL*@kh&7eoq6wO($76Lc;? zw)$UnZg`4BVgzne;#KsFfLQr);Bw=m0hW$`VI)3pV?E-*Wr|Ro0bz1@PoBRzB{5%Y zKY9-UfjU^M6z5tg_s<+u-?BwruAn=tT;-*2dQ}Du&beu7BARGKpav$9ce?alq7uzO z)z=(g*{`^UNg8WjtzVef!K5Oa9QK9sqD-sUUu@re3R(dxg7_-L!KC*(KkQT36_CQm zxM5$Oenm0)*+#ex!pXi^Y7M{1@q6k6d0nIl|6=k;cJui_3B4W;rGw*E@9X$MGRgh9 zQB4}4CLqT+G`d<`?fPDi)l9O=0B$?RpFu}DAP-89Kn|e9>qjeHcI=-~s`r$zJokUawuG=JB8v+oogHWG;m2(C#ZYf$ z?QtjYKu(@d{b3Gy@^2#TiG~si5iM=Sxl)&45mHwT<}0XA4*ryHLQPvOU@ADJ6faSa z{Ma5}g_VdvNITO$28dS)C7w6}VLbqGvkF@Lab6utTQ~db(4Aq(KD?CiLjgqtgB8TT zO{&b#M48)ujpGYodvdP|Gx zMHS~<^4@xagM;JJ(AS|E;te9r?Cc*f@u$y6qcKyugD@8XP(m3OfdY~~;exy3K$8p{ z5}!bl^SBrHMYwkAHa}oLh|VFaI-j0kx08vh9He)1vvd!`&@rYN&Rzu^!Gi68-g{q6 zI*=nnsDE9ZgEcOiwy=pCEHg?b)o+#(aZHS#l8kDVCJhx1A5eV9yJE52>}t_$Aw{^? zje>O!%?>fAfTNB8h<5`NMo(v+W%pZ81PzQ04Qcsc(8CE2%fEB9&HLxB3UaF8(f`vW-{VXT%r1HQ<*wM~veZkv&e-_;h1|A1Iwa*9#DBSc$fqZ8-Y z0)*3A8f9zN$3TfCkx9NTBg?nSczTsmJ2qlu(ybP~(OUeA=%_kHdZcdRn1|S_!UK$X zM)b$h2ejk>B6LwZuqBVLQsX#Q^&^W&bc36#0+8GDO^N zOWg6VLwBMC`qAD$KAY~mxNehny8)((qFFg-|JKQNk#R|_4Ql|tL~#;|{Th4)2~)ZgKGdUU|)J zI78%R4D%4k&$blf0xCV%O;zISjMYW2zqsjvZhQPur=H4cy;!Sc1CUk_xYKLD`$gQ> z6Zj$HGDP32J2>z;?$_@}%mqVBB_k>a>jhL6_ zDUSOCinw_z>TM1%i`x0WJfEH_2k*q-kPm}6mEg**DbRJrS8#hPBi;OFXzLrFQsk8w z@)1-^{q+VSh$lYA;>X=2o4<(BeQ93cQipM*?+f`ecAm=hefMWegYW z99;6+s)ZBYg}i9#10wi6XNmk|0yyK_oOe7_X$-sl_bmWB&nA;Ix3&W@sQzS-fQkz} z`JK1^bilyRpp3tdpT|%pEpeTPt}?dZ7M&6fdFC)D@QsN>EA1ErjqwL&13n@Q0|TfO zrz_rUlUSxLtIBAjV4Un+`d}2;l6nv8wuP2dI6*Bzb{(gS_ z@(eWaV`=N_rWcqDg~8xVGpZw{KAb-K?=@AEnKn^B^*+qnWWSZ{Kf4P_kVRDV(kNrS zztGQ6h(fOrjeCDliX$UN2(G#X!hyM;PDm5t$aT_B9u zbPb9hB|(5ch8ggWa7JUtrM7ZT;=Uo``D%-Zxz2UDC2|oVEk-f36->#WARKpjxux~E zmm0bMs}56{kM-1j{bl+ZMZK09boB|cZx%s1{D1(=1Hu?=GG1lY^)(4s6+T9-7Wkmo zpbSZb0XNULV~R5y4{E)4jl%ymI4SJi$d$Be4R~=<1!m6FgvJ3S$3%?*E(#k>yO#D`vfKa3~Xh64TVT}3-l!BdAj8ttxHL;KXBP_ zimlhjt3BdmGrqvsw=+|n|5Yu0%1a!)RshvkCdmroCzF3PN^#s@q9P|SPq8wTL5xa+ z`F@p{x3;?~M-U{{3fh}jRMv{He^W$+^^w&>aTdY6hb?_n-ADUhhR`Zj9!KnDWI+9z zvsGgntlJkksGib@da8ofWW%`vZjWRw3;UldS~uLcRZCIRT^=q0Li%jK*<-iRdSNg! zGn@Kylw_A$>0ob1kggm&GuM?)%(^6y0FiJ&*v&%x=enytbGEyxO0E7fpdqvWPrhBq zry1ZNeFc^fI8dNHM#r(N%xU4M$Y)YX)C7V%-zwu^J3(QMlGo)W3Ts#$;y!`Xz=L6X zpW58$xu1qip#vDG>6J_s$e|qou+?|Bd3P8I-Q__o$wUyzQRA6!OFP2sN)@=gBq6+# z$kWUfN@sF`zid`?`R;EU33*_Z2zyf|EfcZh{#Nj!DZ=2lBzHzH2pW9_l>Y_8pDjcB zi)+^$ZtfQtOKas3$$(--T+RU3ZOc(7F2NQ}TIoV#ur!&eDL3|U3)(REX6=gB4-0mdas~4px5jsL|AoGKw6`NA;wLG zQKR;7Lj?HJlmNQQ1Z1AQz9G3yW8lLSH~2WdMsqXd^B@6c9N~_oZl1G}NfkFkKQgRC zsO0|-{rFIt;ENaS(@&`7c2Y|)G*szF^j&{AYS0AU7FgP92weH2^$O&-S>33^4^1s|(9ud|vzGrjnt1WbU@TM;XV zzzl?~wMZtuh{jFyl{@3F4~2zu2){=;b_VGc2CviZC5~>vJT^s?`(6FYQCL`5Kj0=O%0!TjRDoTAjpbzG z4+o)unc*jPxT|DiEUJ}_F|2P{TIV3gNFYX8U-L_YYAPSW7Se>Z0oZ>qlJ*3kz6BN~ zJAr4m#Fb9dfQ}VR6Z1S+D7ISJ9mSQcz1g2%+bOoPKHDokT`EFl0maXEA}8)-xv;?E*!X|@|j;ELZe!2$6)7K2uB4E$17;wMMc@~|x zZEKzc1}e(Q^#gD96SOp#QNG600jBFGK+CQiDD(2q4d8 zDqr7;m?1aJl6kEk&{|6LjB`!(B=egTqq3yP)~Ms>z?>7 zQ2hmD-bv9+O`8J^)mH@zi$G3F39+m!U9J=0jv54Gwv`ymUXreHeBi_RnsExb&HTaE zaozi|AH4Y#;qQDA{?|4QEr3tRjZlIBV|aNn1CtChfqs&CF=x)lZht zSmV4~dkfr11+9cL7K0MG4`ZZL)C+pP0b+YX%o4s+r- zDc+$+*GzwdQNb9f4Q5P0$o4cyMccC1&>lhQuMsktXcmGU9T-V3=w(qjo zUpUZ@-+39>Zr>}vItj^Z87W`l%vL*eu&XXDoy36OV}iGWn3poaYi$(U18dxl{wWlD z1%gC9fkplROW^Voi|-GEwugZ}pbhh2G!zRz0ccPtx0vWZUGC3+t3RTJHfg5e9_9l7 zn%xAEoc``%El5eS6F3h0@S(@R@~KgS^T|!kVtd8C1saUXjA*k#gdJaaV~T4q`T0h- zk-0Hz_^cNIhwA);RA{$Hl^{fvfa`Jbm(t%zS!ngu1*VnKVTMx)k$?%{#SmP15hz}k zxhzD=H;W!(FF7BU=lg9kC)@e5Xv3|p-oI`4*O@5JF*pW9G7AHsV`l<`ZFuM_kkj7y z{xB=;oU>{fq{UmUwITt^*A+&^)^9L~+P1#wQ|FV&{GW04`wS+Y7a;pdB*OO`T!|B9 z?|~X8;`o|%jp8$qoT-3PRre40IqmM{wP(O?kM+UiQ7=x*1wvXe;eW$h5>Iq(td|_Crz;*sO}r zInS{`5}%+k`3A!LH;A;ckYJnLVAY+dG?4_OGjA3)T8`nhC4zlvdD{J6eeu0 z5n_=!we@bP-Bmm|DgdvjukH#DBS6zwq9uct4lXeJEjdudCK33F0&-BI&G>OpuQ>(R zp6ktf1Fc_-8{w?!Hs#^&;~=rl{dO<_)nC2zztzA)-^SmJ%Z~a4lI|J8yfAL{s-23i z!B&L9yPM0&cLT21XK&eF6@5Bhgia=|)6$UJ%#o#@xG-AY0#_A&66l=O;4UW_4db2d!ixsJYamguwZb>0A4kTlL33 zwyk!p&@d=JIA%nxfebY+pEH3R`j@nIQ-FglQmj*V3<4b}Pr=H`SNRqERs*Ev9L4uE z5SW5Jo(zPSAK4bst^udQ$q@f2#mGxAG_JRk#vXzFRb^uT)<5_9f(Fds#C~LHmc_NK z*&!FF0MUtd6H$V`{W660kag;Ur432=522EUfX%m#8Kq#i&4db@PXf}t=*uZ*(kI8Q zf$`$qYz2sy0U_P`(S7+t?shhgyj-=r7; zG75^d*!#Jr`A4*fu=TeqLTF8a4)R$Rb$C|sB1DQl461>on^(0!|Fx#cR}Fv{yWSgQ z8mYZZWji^<%rS~J8ZZEk3E*dYdWQ6eb|=#xiH-y~{Go&59h!g#cSQys054(x(NrzE zDJ$xCvo0*b-2h}vYAeBj#C)j2`L%D5>E=_`!x^yed9C{Yrijy*VhY_N@m>36mup=P zoPpU|B;5`<7)*n>L_#3+g2;01?+rMCWOj+4ej>B1d;e8=?i&c&6@Vw%yMS~P+IHq} z(4)ni^#O}BrFj^1mHYqSPX^d0@QWc%%v|`DZNQ#`CU-MXUG(}&y=4*z@es8wDdB$B z5$pN5aVE^$dW@lwpqSjEY8UZ_gp72e9BnK`esaM<0jfgds zBEo^Y~lO3(aMge}a5%_SEy)ps6o-1z^HYfJhT3^?k4tzrX>EU%AB(GoT;D_$lMONk zge|}4F65Wer_BpX4ttvPJ`4}+x4+9?o1(Mh!IV}=xeowkpj?0n?iggQ_YVAIn>|7e zYfO^W-R)Tupg|n38Swzgxz-sB6<L9 z%+W<=tKFwhKat~Qw>>g!qsS1L2B5Y;A%)CofQ%mnXvI>J!QL6lEk|ePD%JGOww?$O

      D7Vz-Li47tR`6)Jeq`Ub!Tz2Jr4=sJmen_~F7Z0$2D|GIW~hR|xisR>@#va@U_ z;8zC;z<9rIng@ zvKi3{RR+^TQmfi!#_4-Dj65wNG|$&cr36#P%jY%KhkB~mHJps-KqpvLx;VI;Ax)gg zHB02eDbZ>Me(E2yeOMj_f0-5)JeQTNA13ELd;=HF#TswuUgWd#MHb}>!Cg>AgWuql z8cTWO_J#=2hrS_TLEw(JQ3tXWzp-#5!tQ{+z{tm;=6Jz{Ww`%JNbaObzYR#FW+*z4 zv$s=Maha729I501UJrZWx`VpChE(E1(5vy%k3;)?uKp_b`d_Nd{YX|8x?C0q+r=@BGuuCw^2D zZ8~H#S9;c15RWOZ2EoPezW_hl8WqzPL#5s?0;?BASn- zw^OmwTW*h9wv?vpgIZR)JwyrE?2Uw7*m$K0)GSJ<>~oPb-|hwz^}m4co}^%A`i28F zO0!&S-=@fJpzIL!ZQ)a`k74X~fDuz+L1pBF9tOtQiqp^;bifXg34P=JY_2w(*xx zF++4piRFHSrx%|=O5`2ad!?J+^;B~2hRb6c>8j_w*D_UbhtXFTAobY6&mMrb*77av z&%&|?Vy+I3`h%4(_r&AE0b;E@^@&+@eIp`erBN9~DyNs)TY$he=M$ zUXlE^%~Cu^V`(WejOSh_Q@&_id(8j5H+}D5tIJs1COg_M2f!i+ts3Sy zqf)3JHt>ehRHm)n7Lb!Ew|s&4!#<6sa0tow6F4n68-6~;$aV^^yp_561!+-Ihyy$` zy}ePBriqF&E89xRJx+ec8leJ?q|gIp2?ZlM-)n4OpaHY@+{(^Ad56Pp6;#?DiBl3p zNSGf`r`+b5&Zk}2rEndcg&&v=hJ-H%-uhpuqj{CF>EIRLsM-vt-&x}>v~LGd-dqi6 ztdopCmaJz?^O%2RMpf_SAkoB*a@{2g=B;bomjTU&ootm?N{ACLsp!OyHs9-zpq_Wu zXjmr$^{W@V{_?4c#C2}`$NBjX>5Tsl!Zd}XAO2Tj8jOT!k#F-~$!aFLwExb~H0K)8 zwEkc0^*=y*=jYFVGd;YwFR^>KnWpWXvlh;i{?7Wx@Ox#2v>~@%|87kku*Q1^>1LZI z?mnq`0sUA(UXqvA9Ha+VIHd;b?Oq|*9?!dzb^=q|u6^f@Wt(#(F7~@lOik049*06< z(L>k7nUrmvb#2#4YyZK`ik^RUc_6ia+H!A`e9x)an<4=UGQp{6e9|`Vv~ZF6E&F@b z*3tXS@qiz;z6CUr4P~dh@+rnNbNcY>c+eZe^NY2>pmrSXfzEyo^jNBNhP;_{v!Iu5 zv|I_LR&*M+}j-HzITYb~R76_V)L` zrXNZg=k=rY;vF&ag#E+S_6!ZT$G$xfsdRcW2Ttw&IaBt`n#x|wUiBqCP3-G4cn+DQ z@O2MWyxMW>7V35nU2JSCjNLPQ-cOC~ZQpxQSccnUU2k})lm%dbHp>hF{Zf7cOeK#_ zCjqP0qXDK%BTb-ih1qDZ3(=W+;1cypZ`bUJpS|^IB=KQegaqJbZaT5b%QI(yV)yZN ziFQ*85;N>W~bu2_NQKgvz{Ka?Bl6p0nVw@vRqu;(eH zou##CYp!#OONO*TE!J$9AaE0G8a4!U@U>y!cB?j(V)B96IzU#P0=H+xF4lB7>0|&O zH!qA?e+oWQZd%FS3)`HMIkei92lLA6xUJK;fp>b@6?tZGqWSeN8YO9DQRfkn7J_XX z|4PkqvUKg^@q_&Jv0Nt)5Spn8Zkz< zrl7Qx?rxxa=kaQHsoBZ&xcY&H)ccU!R_IXT$Aqncw}YFflgR~rv$sB+eXBJ_{<@#F z+wtnH5uQ^!Xg_tTn{zdE#~E^16Jz(c2$-$Jf4xo`{QA7nGu~a%MjafjOe*@cw5rNs z_Uka23z*PBiTOkN^|9d5AJ8lZCCBgmh`EiN8x$$I`xpxp4%(44+{0;+oVvZ*cjn!~ zdP6$_HPtI-N$X_;^dN#U)Y>3oaVZ4}j{Mz@WfT6Dl@lk=H;1czxGL4m7aJ73&e#<| z_?9rhk5s&xuGs3fgHMO))SaJN#2o>sx-Fl{$`@O_o4r)KLzQCiK!99D2)5E#MO`{m z;@}{aJmZhu23&M$-F~fl+JOpN>TjI1f1QgZm7bXWrlCkL&QM|gdqfLOfZ{_SFZr$L ztBBZFU$j3T?{v5Z&p(nhQ7{a^g`KBFO0`|guUD3ICo3V$3=9%m93vUqCwg5}8Ma5yz{cNFhYRfV zWI?1p+-mD@=*~SqD_HleZ$R3$*pEb65DHV_+oul@wh*V9dojgN^|2pMkB-Do|YXJAAjPppN=3X!`yfP33C}%voA#xh^_kG_Ygrq2%Ar z{I{r4w}(#6u$NTkK58UOGQ#P`1yl$eb4Meoa)n{g7XWFH6~(uES%6)=tmKnWKNJEu z?9t=6Tu9`_)dYye;B)qv8oLhGp{ux+M{=(w681ghtw*+z+ zApOR9_A8I`2fhIa3KD}`$6;vs^O^w;{^xHp;V=y#dgBN0%iD-#9U|Uv1bJ%-?RvY0 zW8ysUh`QKb$$^l-#(B+lLl~-29r4GLeEBrVOlPXPC;trLpa-g}(QrPZ&e8y%{gC*z zz825Eq3X&HMsh@AwtMAMJMo)9_tyt=O_NRAaGT4)a=w08zq5aJr=(_9&^_6@U*G|b z9Ol$jnF%2Y$E?tqzGf`{n0~XXKKU!(IrS4a-%lryuO3naQx&=}Ip8pf+_z2|^?+wa z5<={MtV0Vc`_h{rXCM@Q83A~n8DzI2Gzd4QL_(kINyV5Zy$@$zy6;8cw3xR{$6S{t z^@7oBw3U4;LQ{~r&ps40T$VY~ISY^otwGMm{~qHYaW#GPP4|2z3ijz-f<7a*Qrhnc zzZn#?mqjI>ZWJ%We(E4*xl4|Xnn~YLAwKO4(cuYDnL|~`=+v1gwnSfU+mbsU>EqW> z;d1V^eJUeyb=GVwJIDW?sbu|fVr9L$|H z;Z@cXPD@(&!@XN{rpID0T7-;ClCs0lPt|j#%2#d2z*D~D+8^5*gz+fFfR&M*;XSoQ zLEeg2r5CIBS5ZP$G!HSQ!N_IF=~M!GW37o~m+2q!q6rDi>)9IDV<1HDyybNyJeu?Q zJ_DK!X`e}TEVPcnyT(!O$EXIz>ftGf=V)UhlO)P=l|aLxW%nKzw6Xg6CBqD@9K>N) zp(ZdN`wK83)J?wgH5A|&D0&S8IwTBa=1{}W(2WaT4$IT{?Xj6M1W2!H?$a7&KQ-7q zVA%fP33qb&=Ee(?V1jipm_DwOD?g%+2Si1RvHt;0QH0O^!dAOHUosB5GGuCkqeZnJ zNShW_D5I0mA&$gd+aYVpzBhG|#}}umLkwvIiidu6OyMFv*SbBDef|0zE*xm2$163V z!?d1jhb0Rh+aH`Ep=TX%hQb;}3TC!@UZd!{8)~aH_}O4QCzp8p;|P{~$f-S8+`#%; zgBh3?(+sn-fX|zqII?oDa<%WeeaT;K+E`9XDSt!#b$)1Zv@d&Ukw9`n()=2nRS?8@ zZJzk4Uj9}q79YOK$d%ibOxQ5UNgeMbqmbOzkQ-vnIdjubmb3e-N08(&#t(aBf#46&K7@Z zH5o2H8FVX<@OxEx(Ha%%@T$S04Q?FG5Li$2xaHTH8BnVI%x7O9LYR49Z`{5ZTGV$V z8M#3>5R6b57uV!|hUmb$6GY*^fn4n%XI-JfcQ-;Ic{CIy@n2G{dVZ1*0 ze{hp}|3)gn^F@vifKeed>cxqCtljo_FdeKE4yg7_gMko=g5m$rfO}du*FtLLg+oLO za7Q`oEw@gUVyW{Vk&uC|w@dS80k77C$o5_nf%`ds zQ4eW$p;zWE5$&)>hW1Xe3h%4MrTXjG$Dxa=fUa`EJ4K7jb_d^cIfnKfLx$#8QW)s$ z{{%Q=zldyNx0HpS;JGS*sn53^;4pw6KJe*CFU@;>(e)^4Jv;0fn!ffa{$ck~zDs0c z9>GM`twQR#eW}5jsOtT1>eV}h0k?!Xs1qnX1leYEHQBFr---UP8^i&E3@}k4JPG3? zJhUFoe06u*{IxIIX!#~Po@AvYY2mbk$CldL=Rgn=;B{LKfw(@=uYe55{Naf7N04wJ z83@v}Y8*iy6EM`t#z2Luy{PqZ+?rt~kCkGn$WF^8Lt_@Y#uU050_=)!>O~l(#<;vA zKJzW<9rpeP0wYPP_jj+E+sS;XmDFrZdaYAGl@nEiY{=gqfEPp3LJ#D*4U~6wu-G_T z55j(!K*Jk7flHt~`r-7OGsjnQCs{xo@A z>{^TVXY~8x!9+s8D=)1^3G&@t#(G?Ah;Dujd!kq9lskQZBKh-aGeH?!m z;CYq;U2h8IP6;jBvTgLGIluhtL<35jT%!AauETI7zF77oJX4mqe1`Zjc$bs>{+fin zGY5G3#)Y;LSx%Z3YE*);nTrDZlwi2^0Qnx~w)w>Lt0WpXGp_)%ngzhU`KsH#-iifz zU)YCtL(5XWmjMXBuah)4H4*{a4F)?&KOD^eCUqx4F8bnLhKw(bl>hjS8_p&|)tbE? zlGjd6k@o#kR#KRxIU+DMfe5{{iF;P{hEXrh%jhRc0xXRoJjChvALll!m$lZ+e)oP? zNZw|2t!+;#aDQK20acIV8X7W~)g{*>@P{tn^(c96%YB#bwyzu`Ak)c55y<|py|SPJ z{+RM}K&0bCcNEr7uJ;MxX`lK>6Vz{u2{~VHKO@DrQ|_Afu%b)_nfxC230zfSevK`B z6u08{KlTDKU{QuO#s@k7@s{YJE6<$N^B1Aa>16b0d<|ER#@_0J+BIGZQBw{~t*#RO zziWTimw&2ES8JKI@qeEsO{}^SNa=72iRem{5@eiayq>O|eKP!{?OdjQ>M=nxR0}~! z(aEob-o96w7~EZHd$Yb5pc|2_aoHkFOf^GLjXl&E0-orH4%W{8Iihv>@x;>xA0o=O4~?$K+=eb|!qJvkm7lTJd2^r{Tj?NS#n+WHk>P{*uZ-Mm>VUN0SC+ zEg`X(=rP`G0dAzupZag{Ok84?D^oAln1zFqe=9&HfWm+6Uh199q>^EmlG$?qGHphx zN&{PFd3&7fWiDQzhdRi~ybX?##Ncqocr}043cAI5RCeTcx6MC;@IwqDT(~JTP~>)H zDUb)X`!AWD8uTk-pi3Oqbv3izqsGR_WPGz-3q5v-B3Nmyi{zxh^l=ii?LJcXWpLio z!S$8nrkS)Onf+c^Tft3OND3)fs2&YEgHnKSm; zX6i#8jt-|;)Bj8zKK%E7_q0d7-v8g-?f_FiX(sqGd}hzQpf?~G0?6Cr|O&cocBpXldmWj_kmw}UGlx0w304G)fD5nc<* zt}ZwgC^>Ek$Pyh^wgC-oBt!kiLe`hZF=7Wt9>hrD4E};sXxW6&3I* z?KM6-rcj&;SBwrZQ6_@t@9T(gsxjnw$5Q6qZ(N91R@!t;!vyC8drUG>6URKX(LOmQvs zeB0Pde5cz2e(0{SWS`Yos?L@q(-HVWuNIsQB}>9~zBfZM$NpW1Ah@PCBeO9ox2@bZoO@+qRwj^{xMC@39Zo*vD1x**mJ9`+lxDujr4p z2uZ$o#;Dd;6Lx`oCxRJs)3yAad90zIYxt;m`8Mgx)~o)XKVz0Z3ccY91x67JchVli z8V;dm5Kfi~KML)0@e^tq|3QWe>?Bq3F4MU3B?l754=v|ds_c=cI1a}x6_`l8I;HHq z^m@p+;Hdt~Q4KG`N}>rVg5+V#SqolELG0ZauK;+4DlB<)OeLZ~ww%>ltv8gS%7>`A zG54S81KRP)Dmqvs8>bA3cU-jq&Ne6=!WG$Tu{AqMg#XeMBS(d)_Ba^42G4lTF?HCA zk)5e93+m^q)^_e%^7)`CW9X+lrnY{JgiKiVPDIc-W*iUO%9Jzb&sKmr%Y_=&x7-=h z5mmu^R~!5^Y}BlCpGPRA#m>t#Wi-%VGf~y!c4NN8=rr#JI4nTC&|*VqAGj|=A*3L2 zHAK0&tKy#qg|BD|T+06|Yt`Wf6iAT_=gip+vft6F`ffCo!XfFf!w*8rx5f;yQ*hN| zs>A6U8#0jT4|$MccgPade@lt#1pgb%kT6aKe`uMAH|n32FZ+Z2VD9J8_)0T4M~o9-p_c2&hM8`Oq* z3V1UZrAwNeV)$mtjFn)`m=2^Uj>%?vR z7}p(>|JzZJjPmzD$Jm%14CDWxka41KkF)s)nyhj zMXuoKO@!v=HHh6(tWSl-A}_*}S~`QM$Ni>$Q&vR#jU9gV>^-rw8XLh{>i22HxlmiC zD^_w;Cjtme%GXM(kv<+5l90-2#%?Nym3lKlc@w2zg?W`K*@zM&v+q#@L1bz>4x(sK z7icH7+=h63(k7hZKGEtJdlx5MVDi2Ca$>0SpbvRT% z);qchnx*}8NMQ9ShbP`53FAX5ju3`cbs4A=i4TYVB++6|Kos-p2;u2wg&^s&;){&! z3*btzgHz@VM~C&_Q2-vx3U%m^$+y&;E;xt75IKam{g6erEIpzYOl%lKbzMD4}BbR>ONkmW)| z5R>sJ+OU4t@lKUYu)$UsBTmNmb=Og3E`rSa**uNDoT>)#SF1>AE+>M_{Q}Z%s3o~i zVp=#Wr&+OzY1Yd1z~V>d=!KV=RT{*(asXf;%^Y@97fToVsHLd(Fi9%Ff)B^wTi;L1 z&mhxOs?mexQyN-D0GxD6+X$@4YD94pv$(29g1*@|G|HuUx*co|gdN?Q9unAQ5&VuB zjNsIf!b%5byVxF!ryB;V#m~b(M6s7x(FmvC7LFpXMF{vjC`&0tE;yk2yaVYds=1r{ zXd6TyyS^_qSQ`-WOk9s4z7-b(*^;L^a54}(5Nc$Ab>!`%0^`|C;Q>(`%=E4|?%v;2 zBruL|y~ilEE@=JTV;;5RaOGK4{A!JUT7CKrF>;eK?#HsXwBym?(Q zaEPa%#pPj6#E#_{hLiuML8|UcOrsqrTzHYrn@mdhQ{z zf(HUl)x?Z~OxYbtSoqvMv6=|=B0#~Hpv4H~E!=Ovmo5{@pS&d{b3_%o$VQ*}Yo#kK zTy|sVT7C>>BhnH5VfWHQc`Y_vTpql7N!qxJKjtC-&h1i;S7Rqopsur^IR^2vdC^d2 z@_TSd{G4a@yDxKaJTOGs=@m@l1uPGn(EEKeV4npG!U8_^T`BnYgwVXS^;x(R7(460u@eWK>!Bu# zlv=|FF^ak-J z^Gd^r*izCi=5GBxvui$O1;j|@x3(kN;{(BlwH~)k*1jit(uw9oLK2-b$rra0sM~BV z1v41g--`_x;iGg*YD)|YwEl2F#B~lHV02U5m>l7- z@00*pB45+Fpbij(Bx+%yGn)29&wLUO)M#;V1CApHNggRtg*$Fyrh>u4A&X^Zac8x=;Yl{)JrYXg($)?-7OUPiKTbt`FenzRBD+%zr%}zIPdz)yDO_AV z@3TXgnazxr4q|p^aRRrKzTG-h>4W8TNi{{N$Wq3Vtk<`oE-{t%RG=zWycI`Fh15d0 zq<~nZ#lQ<%E(75)XBK~85E(24`S%jmMeqvzB}P~r4YU?8K2sBar)@135}J}fCA)mk z$y2z6*@9-Ii8N9MKNXd?MJ@A0z{(KC3NRIXnYNKB=$wbJ_6`Tto|h5Gm|ZQh(?v&N z;I!ZqoI=F|{r>(u8d3Y()Ppm_zcSDDX@RfLQG8DQPZ%qT@_$?wVm=KqK z@AYO#?0(V(8(xJ?n*0``%;@nk6pSN+vPhJT4ou*yLw{?5_*udmEvYY6Uqx;QPaa~L zuk)e9)rf9f?wy{*HFhK8>nI%HnR9MHc!w@W1ULmX^JmP91^N@r$b99~ zXzW&HG4&9gE9V@*MT{5NG|JgzXqB;W?F>1JziL+1yWGpQIG#$VnTFhiBerIfn^uk( zI9b!-RjcPj$@|qdtlB;qXx?xo9;)T#8(=3#w*C^B%Cku{=)S&z$&iz~#FZQH62fT< zk{77|TjC>P_bKll-5lCkiv2_4!NiByRS8%xgv$*(1p9z`Ww6e5K$a?CQuqGBVFStz z!O=8Fs!W+8m4}fu;#$J=<=jJSNY$i!LtnPt3(;pgHAd<)g5jJPXB#V3w8*+L^mUu> zeAFf{e1o#scQ^G_hwC;j6r`TQ!&;9K+6{E3?JSCmza!Ygj6md&nWg(&x7f`DW6(~U zX0LbR1I2SG0C@l}_`2atlQ6;+n5(3mK)06?WP5>XNTWF#>({@QnTn^pPu9k()s3wmMm?t<1ym*{&6BYENwj`uO8Vt-!@Vz}=&s+{7iMs{g;6$t;$vW%fN z7i?IcbJL+W4jZM-5y9i6SAeXd*ES0{cCwffP9UPJSqYdz(C2ZEg@8nv*#EGpwaEzM z(|F<<-$=Ql+}QV6d?syh5#5Q~4w)*t;Wi?pJnDo>epqaVxM>V|+6!y8x|!^Cor&u$ zvuE#-A6G|V^&4db@Aw28hJ8yzV^gTERPpAnrqoY5u@E9>y=|903RUU%GObKLhJ3;NFeHtUWPY*m>kSI_6!G43x4N_#s0#)! zr7eq_8ce~({>#@-$v3^^_@}Lr&XV=E1Lsxda)Nw{luBh^w zA`o-IK+}3)M%F^B_KQo{=d3p@`NK#sO3s9WL@Ir)gfa&$PaecZY+7qVx8{#y==ipv z{U#Wm#T|mu(B(Oa0tM%bKIQEupEmz=!B-IVq-kf z^3#vL=V)vvMO)0!=&uFK#+xwvEtlz*Aw~J~Il8{J(GV%p<Q!SP2aK&u8D98)+j;hzS3GA@nQuC; zM25RIXxmWB)PLPN`XIax$ZK{EUhBR55^WNQs)dk}gu)n)9P%Kl1U zg-oniZ1wvX60lDAoV^trGYGW0o~-7?+nAoQAF}w|p_& z^wQ0>JPBp`N);w;+7C8lbdIR75}dG!jJ zRF3|#8bPNPuzo}rwa=O|7p}W2+G_M~K}f)P9Mw!9BiV8=%*0(sJ+jm&tPE`-kk1Nw zO7UnT${Q5kJfCpHS1p zbr5DogZS_$(7#kz2-R*`XN+y8RW^9QmCkuL|Ew|L1mIz*h2P0-_k`Is_A2GXngoCn z-fE{e&60|o2&hZyw+MW zA+Y))GySOS#x^u$6D+wh!<)q%tLcDQUnQshT*l6hB6WVsxLi zO-C)2e$)^ZDv1};&Owr3o=|Vsw*aLMPtAep<(m#`wFX6l60CvKud+kzcHkj{CeMYI z4&Y54zj_U{7H3t48WY&b#>=!YT)=pUMQeK4eS2*nGGgLd!P0uL4q6n51esh$v3V=A zg*db_mo2;&AT%kE59o<={v1r% z2t|cGiPpLNn@DB9S8hCe%`0{rKmwG_ss?ojk1)^7t|^WsHB53J$s{{UY?b(o`} zi~*gY8Hn7lFhNL++T8W8-OTd5)Y@yHJSKMGh0XLPx)*fWQHHa$pSvA>IpLncNxkhf z%!B(+#-edx^nL8Ii@eDso(~0PWCzr+_7e#sY(yTcMP3|+@?&~LJTLB4rwGMfx$B|o~1jk91wnQA)ETygl*x=_GxnsJkwhI zXgonvWoSels5pgA-3YY!OaQ=yJNNcG&hEElyet0Au&qufWr`eVh2MsQ!QxyjxAmC)K|k^rD`LxRIu(u z)t^p(l51FZUvzz7C)qq>?yu7rK&L!}=CO|hW#1r4dK;0kYA+^Sq}7Jo>|HL0b4`WvCQv0s}5uN?fF8a^M@*PS8edb-PNh zfUB4DJq}qmo))HAOuwiU=f}{~oEj=xIXKMvT(?@)up&X`N33ICSlki1pRe-5m4J^D zBUQLHm5*{$1~hPcj{JooppSyeWzw^krm=R@(W_`e1(pAL{?@b$vXRBa@S~oeD8g zW9zMT%#zL};~O?>%RgNlud8lWcf~XKWO~lZP4z2ZqcA?|deI!cCU`i-1&(S2=rjQ) zC9iSm{=G_9j{8PzF#4|b>L+xXD&8f|hHRK=udJuu#*^26Ys9=UC9l;IuJ3%%XgfK7 zI3?8hMdR@aTNe&S?`R9VM79J7T2kwK2ML}qDuUzLAh~O(@_>P>N^1sN*$4tWwJQ6r zrIsNhY|XQm$jSzxm>)d=x>!8)9vre-cg}g0Er% zW8Q=U38fR8RgPv8aQmLIqwOJPV!>NYUMuHDQ*IRpC-8m+2%YVebk-4H0h5D7Jm>g!yytwt1?*1kn zuA(ghAc1B^Al9V=m@Ap`qWBkIQTu%X;gGaLtN-6lMi`{?i;);E@JeQNtV~4Do z&FyNs60FCv{$bQPfA{EpizgUi)XZ?h%a+2gU-&-vSw+*1_uS~|c!|UAeX`v;WUBs| zX~_|L9`xRxb~+(u2_!)AZ@1QI#-y9HDE4T^!U}TkdwQTH^tl3Hcyx;4fs{e80J~1P zPU-$!$6Xr%4Z)Z4bOx%e9y7~z1VAfU381#S6bXoc+MF!cxXQ`i{s!X* z+DVDjyb52LsqR~!jdIq#44hddkXbVVr3w&<&BgV3QeHmsv(sm{JFb zY*#AWdDc5rCG_R2r1hT-!dZCDX@ddDS+P@a!Qt*R4*gWvUOyuEcvq z18Q7Wyenl^vI+?naurNQc>ibdElh>W<=3fXLZBf`OUe!V>NB> zAu$Tp1ttpG8N1sT;nKcG8oM9?;!60nkZQi_+{=B<85N6zifY;U+ndMmOq7*Mxpzs^ zk)8D<_-lOh>Unkd1onvZ7XI;1&3Yqu$6!U=eq3B=)^9Sn z#`T&DSnfD&T{Ug(PyTQ!@TRr2FnK@+4L#Q`Ea|h3SkpkR&&?f#Ui$VcBC~@!+et>+ zt9JqNHtFn83xcArZo#_1z#{vh3bjhcj93*9rdA zoj;?(*w?gs&VOcU<8eW3nU%`s>#!4q@V$CW`MxF5UaD6x8)Lwdb9Of`1;bQcFXBRz zAt3WZ;PODViKkgEaZrI3S{dLBb7Dp><%=PwZ-BJDPF`|okrReIm z?6fd5eqGN2=x{DGuA3~?UOXhAZ#}2rE!$pbQ{{86wbl|%12BAwO^exJ|?_y=Es7wsz#k6KOJT5i76r~Bc|wcGg{KBfxi7O%nuxMofTt*+|B8;qVyhdw*E z(*L2R?j)VLHfmSSZ`-?^m)*0~CLP;AkGg;!bt3)v2r;lf00wGeXB+$&3$! zLTw&8@^uAAAxvX2?WU)V1U3zp9CfyKV*|D#{xa=1T+RF^PWZRYBA@Dl(D{zgHX+wV zHkO`*v+TEG+IuKnmA|eJq&vYX|0f0J+la2e@&V|TnPfvK4-XRiGG{bfhylHv;vi0V~{b9KZK!nXP|W1@VLNK(IWsC#7N z!sWC{fy44epk~!$bqB}bwQxl&`m}S}N&o17@?L|MSfIQYUo1yMidz>C3tbNV{fd~q zTBW0aO~K=n}(gh5TU zMjcI0BD=nDPqWqMEbG7AvJ&v0;CkdCncHyX-q(#j;8?#i7@PoJ4?hI12bp|OVmf_L zlMDRMf8|;}|q9pp4zlPitS>NnhyiFy^cmFf-yHIYOcpP646&Up$j1ci5hG~*N zm~PdsW*uAc`2~B|82yWfJ`ey{bdm;qH>j<^h~g#ewM7Cdp)XP#-DIVt2|;%pE=cTL zT@Fwz`C{k!HPNS*UT$^4Nj%y1vAcbd_2vCM&G@(N!Sw#QmdbME6p1g0CghtEuSB^l zhG{#|xcqo%f0MEGPWf=+g;vvX3+Hn`ZJkf_bUaH_(H~`S2EhOIKn6T=z8X%A>qkZ~ zUqmYRmjH_^2syhh+WNpu@wTZyPP}*Sj*cdGXVQD>%A7q{niHCDZ3oqq)Nh zcmfK7slYIGDcB_+RTFxx#)7Z@x&QS1$`zee~3@Q<9O+{Pd{gz-4PB!@4z2eXKc^Y4P)J6LtM{r-m36 zvAFBx2e76caxgG>ySe)r)6T)Z_qR$&E?vIu*^fbg!B6=ggexoIFK$svlO5)M&rY7_ zT%BKRU{oQy#YpdT!a}xQBR3kFI?*9MFAVG2ZCELh6@G%{y(zK!)KdRyI5T_aLU`(~ zFK5b;*j4+Pb@ZU4FS&xV7sFaUyQavnk%@}GpW6Ef;rQI`G3}s<#27z@3c*-ii`{nS zyZt8U!BXcmctz;{{0f{=1oCs@4@ zsVUD>mBvU5C^gLNIQHa@413y)#fbA|&)q!qxeH;zaFlaa4_|>$4m51EFN8ono~&-!P7wqyvgY9 z>wTu7|2S5id1|Sz(8|T!mzp)f$|{nN)nj0u(S5jA(wRl=gT_+NRSEez>JX-14OHmX zi+CR|SL<6ZD2XWo;QRyS*98~Jgyq-S7a^zr=AnRwBI?pp&q+Gu_P5UF(^|_~EOr%{ z1E1H|)Rf0?_^ewQdeS3{ky!n?;u@cG=H#T-o1*dH<9L#*y3WNIU!Mv><7(XCcc++r z9$Z&WNXm~FP~f5J)qPh~#?1}Au_n6@bQb}oe>C1YIT zKgz2o@F&FQ7Q4FRc5J@n2>K)YSm0$p!F@N3O4;`WT+01nVQHJ1km`H4NKSG@-9_md zr2UN|7oV6@bhKWQGbpkQu_~WeYZX4}Odo$8G_z7m9prCyk@vl~rbaTT=7K%*q z7b^;YAdJiWWG(${W@Rxn9WaLyV=iyx+577nEyOjqq7#;EXu^-FTXS;3)z#IJYZ&Qb z;8_}gE)qgq9jAlL5hp>6Jv2tpt^}x4oZz97llbbVUAnYF%(wI99g@@imLju79yoE> zlh>I6lGFB5GcuwY7hkAy^KQ^cjjVRQF+05ylMjp>y%NrL_27Iqts=TKCFa^Z@2lQU zdRb!YR3OMnaOSoB3$#oa9`{fp0Ip20E`~*gAQ^ye?-GK&{eE_7$&Wps(o2li!nVEa zw%7=?5t5aM102^ayn#}ROY5uAa_vZYp2}g;-|mltQJ7PSsR?;x{y`0gA$CDb z409ODhGC!$j6w>tWV6dVhKf|3g&Ihxz#ovc!cqCp_mt+vJF=k*NfclV_M1r=pvikD zz!p;ZgZAHmu7Th>3`m&cX&NX&s#VfuXG)NF*Qn-T6FdptDT#a^iEMcny&XUl7pfjF z^GnhIIbyeGl0B7W#DB-sBc9Fjj-pvu6|CoX$pW~ z(dxBwq6SOvwDA$c^_z%hr!`Wr0tP>N7nFrNjn}f)QkZ7QHM?mtM7~dJ%HzgyH{#`{ zU+W%}vBL@Rz#k@tcA>9@nUmr4PA?_?B26$}N&_CQ1QAVtw6n1BXROZY7NlN`h9(}t z_eK;QfIW{6A~Y(GOTSg~-hP?*IVc-%h$ds&aq@TRoE4T2`Ou^h@fBvQYsUeaGI?Io zMuh~B>1rdwON8>PyU~^kN^T^i-%zEljDxbxR#?7rb2ThfwDuSoqsxr?NixY^O)+t>WIcP)SVU;F7k5yN){VY4 zZXh-rpMDj~X!fsTf0@RZBItz>JU6~NJM5a{=*Nen=eASb=s*v-& z)OF&%y@wIS=%RJr!frDx9;Ni7>4jm}IFSm$@CS2YS|~-H(m=qfK*TzeDCuu0VNphJ zd>aJg)W`&>1j-@-0!`q{xc*QgO}uZG#JyL(HF`qvr0_w})*m|kc=##^1G63gKjWe(tKgsfOIvcQcJ#4X0G8PhArG0!Ccy%jP)RwVjx2*{^Je>u=U-(WQ;I z;#_=#%U!#8shUYW-Z>vQZM<5_TlWYGdL-&K?l2i!pJ)PK7e~m)Rtlg3rpT^}f_zvhI&P5DMGQY$ zbh#RzxEMd*u^g73j`}EJ^Rsbe$s8gQR`2IsZM-ch4>#WUMZv1d>{>@rt*eCmIiLF5Odm3Qqrt-W}!M8osuX58xj zhC(Pz5PSGupRi{cDM}&^f~_q;0o-8P9Yoc$*%F2^eNMpz32&r-MqFbg@9^rsd?R!b zh(Kb%;-9bEHBTX~E#nXP)y*4;LzF{pHc(Axzz&w&K(OwP?jOi{{paeP=0~?i#rMWwO;Jaj#3Cprao(cR*e=_5@3 z^*40_yHrs9$%%|U2qW4d!HN2a>P^BpX@o0m1}vKdWL&m);l>e=B_zqIAS}byMkYdG6 zOZ255EjPRjG4^Tn*p$Cz2PpmpC)16g?=zd#3c24R$`^fUAW6|`n5O~ya`m_0A|}+p zGRk1@pXt|_Xn{)!#J*XUE+sWXpmHx#dhfINPw9Tp*auWX3};%=^jTGN2-Yh(5G(4@4_N8!nvpdr}8RDd<%SfIv(8)RJ`+E;U|_0s_RPUYS-!@^lMmx)Q&5 zB5GWk=yg>-)on80t4nT{H~q5eKwW!QoU_<4Aqx;r>W@5w3Orh`R(#n~v{xk>!LU0b zgB3{qV;)7(M6kR8>Hj4@0%ohauqW%qD3(;nhUMQk_vmqGM?J1@3dEF zhXYyf8+iqTMGpd)C}^H@GqGhw1!TqvUIRtlvoo_AWK8>I68C{V`~(CiT32#hA2s%IJ|v*ssxE4pU2bUSMU2~) zw}5fM`-tHm-)})>ULPOLqRME_C50&^o5>jSHy>08gGx%Ngq*YG=*XxI1CT zf?c8H`X9eL^;Ta)h6?3mMmvd2xAgBH^0!Q>m%N%Ez$Ni6qUyOn2AlJ11Wi*5>-2Li zcq?@`;lJ#%k3{HJA33=%Tx%177&)9*%m$KIg;&YTiep zL%X3~jED}r&dXP9f2Gc8k}oKwPvN#SK$96la5cERl zZ@s|^3*K>Q8NBrP6Yb*;8Q{{YVw0zRUMlh)$gz*z)h1_?H>gW2STr?XvF>z02T?N) z9kNm6{tu(!;@%DMM*mm$lFpD&3@%bxw=+aw6?RPundb+=Tex@e!SEMcLkLI7vR9`o z8^6UHy_0hc8Iqp40KR&wS#+QBhiRk9#Z=Nn?iH!RhXI=9T9#{b{X?DTrFSuq&U7-_ zGo?jI@iMVd!bH9`VRBtu7cP3!EUkKL{?Zr#}zY;1;1tDc0P&L%g zU#*}hTs!attz`K;vp0whQ*PG$@Ay^gVD#AG*u(9R@wCA0Q(RdBHNV)& zZARp1V_`Xg&zngA?ez8stN&IC2KRwaAfRPAN%k9CPtTtt`b@2GkOr99*2NICuF3re zHR=J@b30CaT^5M+pgC zExs!Ft8Oc7#x%WPj|Ay~XKl8J>{>)Z$p7kxG!)bkXSwuI-(ipn#)`LZ9o>vUf>J>9 z;h2e$1#%qr!}K4oWQ4&977qL5y3w1e5T?P94cOsQ?pn$Egdh5yFybFAPP-qeg1c5h z#=Y|;EB^hNd)_;V%IwKZX#y}`PE**b$!#bL^#m;0Hh~iTZgU!i-Nm_7Mv=jK*Zli- zaT60i1c@&`xT%5~QvMEO?L5kqJHE}5q|)ut<+FEj(NKQu|GFhW#wM3gO;MK17F@nVdrHvVb0rm@w0~T(Nd=n$%9+w;n#*9FHi7h7(c#iggWLh|3OXBwe#exMy(IK@?PYhim8}Z|v*` zT?PB5063b7TmaR56$+czrjb?I<*1%yxmUZQ{=~Od72-yt{1tr^3$`qr z!qH)dcMjGK28B2r27|mDS6X-x)@&fh6wl+Ugm)Xm>uNC*RV-EX&5-1ebyPcHWF)pC zP510Hd{`;>)~G#6zsIfFxK}^pXR1^OfjWQ2s&SJ@hRG5d-co#_QgmDxl0!GjsBviP z*{i$xXJB+x7(={zKdu?S5=R^}VftD&ryteACf7+n&w^i`cIt6BXW8KD=;_I@7^hJNoBr_ygPh`}>BkA0D(1Xyq07Ku?P#d9)c*V~F_BVBR zOkdw*3!3Xy+YH#<^LD4Pu$w#zM9#xxzF!vwGQ`>E^u~)pbv5TZF}+brO#h zSsT(O4IHjqR}>kAeCa2*=ZmeW@31M)hfm)%JO7GJy5(ENLde+RP;#xz)XUewbeY&9$T0^FvmU2V1qSee-1-Q2Rrsl+6h-b@fW&x>Pj`CgsT;VK(v;%}Fcmc&MC! z;kog6cN`B-jW}EBVMPLU|F}-A$xTDk?i96#d&yo8B?7u_x1kgy+kIlr+G}&n0jx3;=h;~)W}G>obM?c zWDPh!oJ>mT)^6hOXRh@4!$3U*GqX5i#@3n?J>J3(8vYO!X!G>d>51F4hKQk3)BJYJ zY!B^ASt)a1-_RoKg{c^`05CCKj&L!ZzbfLNXTeJ9L8?jC_vkq1g)9G*u8^Qerlm^l zeP`n^MDPYM1we6BW{COX2jWz<7j^{sH63cZ3EQY5odqIY6VYkU>qgZWqZ26&+OkA6 z;79Cr^MC5_8M59eP#HwR@(Pn}*x^tV*d6Ho6b{CIwl2WBn)e;9CPJHG z&z-V4A`xvjRk;?6(`GT7h{15texZT0N0OP};sUI=rw;7HWKRvd58yF7N!~Hrn&X6d zF+KpVTKiI`%t`_GHoOYAM(N$1G4-@hm0D+B+F0Ko`69ezBve$~+hJp0!hvL1%lL|7!~x)@(y_bs= z_E^c%H7Yl3l;^}qaUCbriuW8HyTJx6#P?KI5z%tc%aCjTb4&fYnpz zwX*+hd+<+liA=~0>{d+QP6*iU1c+?dT(N-KSeEq_eFm{Vp+iSad2Q=~FA3pF+l>`L z0%R7;fmue=PZ96~u?bW7>i5(VO-ql0;jfcuAWtwU@=1JM@5<0Qw=s=t*R@jS)z1fF zB*K<*C)qR?<)Mjsr_g%&SwmmL5loUE1Cx^(iY-65W#9dK7|RwDz9{ETx6S6(s_eR3$clz6H2tI77P}L14KmhUEFY{E3mVlnE)JL|)_^pTc)>^(c)6-Ur)~Z6wD^ zyoV=JuM>SxLYI=FpI1*6Em_=_qu0qmo|-A!sc8R)+_IZf`#|mxWjng}E*_1Hdf6{* zw4k;mT>uzJn;v?sTV5YwOo^O)R@B8w;!6H-#}CWnNLk}4zhP2h7Q5nn$FDEy8Hg#1_d&|dn8w-U{)msIrqNR zK_pWAk$peY7N&(+01H%2=4VRNqx6^5dGdhc#t!})2MzcW)GL{)kPn=1udZ|D4J%be(_5MvLcy|<~whjg-O?ex2(d+9pr~+ zNKS*r;*(Nc;=CxTvfpm&(H8^{6+Z;<3vQlN{RwMUFpE2{8G`dcqo=zDS zE2=AfY99h$MS23_P^xKd{ynQzhpV?GH;+yoM^?6@zFJwbf!)q7>Pk9f^H^rhaC^zF z$D)x19!COEA~I+qo37W0gL5cqnp`BgKS+BAcjqbEUpWAG{C8PO>kp*O%U;q3H_2&& zFtb0s2BlZj|5_7}f~uTQ5TuXCPcah2C1>TJJwfB=U({M0&+t2D0it*Z={&_P=8XWxM*AxDZ8F7W1Pj6&8XUo@*5i;HMWN?gGmBK)JzC>VyVjoA}IX zOeX>|XsJ7UgJync@4`hKd`5u<(c6@iZZehgrh?#4FyPqtn!FpT^fv23 zB|q_9ccK`M3snuV5Pw6>3&*n8kWwZJ(xm=SKIkNxfVOdsr`jGR&!9s!HjM#~I z6Xr0%G*OA3{Zx2WdG$u~D|yZA~9Ki^w4Irb4AF-NvNuSQ!AQF;F2h?IVkP*`}rn*5)L%Rgp-7#Ge zdHp^-rMyiPl^%Z;62x~*0>hI`UZ+=W5O@_APh5T+*!9(SP~)QmOne`*md~J-8(-^j zA!pw^p*&N0-wxi<-i4$xKE6YE-zWOH$>X@u?w5LbDIW504JMiu;YyvC=Y_4>R zpUyJ@ncH|L5V)Wrs&oL&Q;F#S3Z8t;-v2s(-!`wK0K2u~G^qg=Rr%oSxC^muLs3?aq zh3ef!tu9>#3p?bJgs%zbDnu-7k>po!v$BF6-9aX`I<*LYuvC)|5k2}FQZ<#YuFjHn9D#Q?jhNAJHceIhtw?YHy*Wca z#8#+0UOMM;Kq|6fNK7&eP4{|SodPZC_3*GB#b`kxu0(!UztGZk*GNenbFv)8J=^Oy z0ICIhdga1$hT)$hn`OprlfC>s`LCY{AxC-Yu+A#Xh3F#yspZp=|1&*el{YNX)lrPf zjmEY)Y-PFSnca|k_H+Ky6rk4RW+#=+rzsvx^D+%k^`r4}Nf;oM!VsuIzTdV=>L8dE z`*>#Tdm?kn(}nt?bS@0ag;_A8bkgxOf9(w~SuVvQTECL!=fBH8N|!@?Qzqw+a)q(ZYc8jK z0aunDKYF(YG~^)-POq^2juVap;s7OLe(ppsI|Ttv8%y_N6)VdGNw9ENdu&;TAj^n1wo(k&;LaoSUIQ*C{KN3O?3aB>9 z;@#Jo&Fr6HtH+v*kQ3b6>8AD-v|~z>(CMau4%2bdek|Cl!hsbQE+0^cqXI=f?$z44 ze_o~zh!v+5gSdUJN(&Yh#5byQDQU#L8WKz6lZDw0;$t*mKl*3`Z2Dv!+xZoK#bdT| zfPq2TodA|q%ZyV@*o6-40|;5vVbQXM^oj@V<9GD(Xd&w19xHDqUf`Zj@S*CZ2G()K!Qq~V& zTaH1;GYA^8052Fta{_#HrdHDNr$+`(kTZb~oVeSur&Z8SY z^|G{z!o3Q@cP&&!iW-JSe*bTfj8~CbeND3)9@gSVHpF|S>?l@$78(&>`MdVvH1hSY z1LO{M5?7RU`!{W;Ms`%@us)Ilx)lWB^_nfTiAKbCaqziK)3N(!6?biz*@93 zA*Bglr6=_$eo4KGhF~d46HgQBA?iyKRuNTMW4(;Y8~exuE@00>9)lUUYZnfW$Y5=X z^e4%AOhsk8Pa(2=AZ|t7X$h|-eO!Sjf?9=<(Pax}alUjmSDxON0grbEKQRE^=P-S% z#ln-5>(o{)07#)Jnxi7A1gZd^w^J66< z;)5U2840U25qMU92fIK28jRR5tu{j#OrbjAlgMd#d60sTK;onHrT8@RX8;3Q_m8?s z#?9zym2QN4lEQT1Y(`OOw{Qh(r5F4$dpqZSsA@uilgjkatqm?h_L~jwQ@-^2>T)3+ znV{PnV&iiwvTPQNb>^pg68uYg7G-w+MhH59S$))HR!lZl3!W7U}YpJ<1hW7S=e6T5k$j=Jb60aSJtG{S@?t!h_IgTB7jOdf@C9K zlfQGEEZBAlCAe8A2PGNx$6kypGjDp~ai|63!nYp#VrEvEz0v!gn)58{L>VYO-OZUYwVJe~VZSYIn9Z;_w zFVxEYGs#EJ`zYZgPNzC%Vh~nZTc8|Ga32gv;Z&@obeIS8tWlXS-8^23tVWj@stL}+ z6khIwql64KsnI7fvi9N0Z0^2oE!*u4d@saW8bjj+0!Z%vJFAuX=JUZF%zLz5(I;TCVHH8k#F zAC5yFzdYltOn*N{g)2ARFrtuYgZ_KC|CWkLuUfsfUmRM#Bj~_b>}pE-L_G38`R|Uv z14|4ME^u;G6591R8-dYDV{Au)RN=JTv>o2r> z^*f-EjbQoNMb-}<0^o3ncUxOubX+*qb*x?*5EyRWR-aR8B(z18KNmM(_^1TS8_WTI zf3x|ec>legs%Qx7l8OPX`u51mET|LYgPZ;HLdOc9^*XON?EYGG5x&6a8@t6ZZtH3m+sY}b`w(xt%IqYjD6$lm>=B+jJ z&seSut6%dC5K;VwIV{GUqnttZFt5nU7w8eg5xRL0$ z5GpSlc1)8(p2#6KmI5ORk!-Emp~CM_rQg$w@K_E0PW1DDOme7!VA1W)k%p^Dy*JxK ziq9}kZz^mOg9Q|eA}>f}Hr>LWx;rrN45q(?HiK$+Zmk+ms7Y#?)(a6ff;@$X$f4mI zux`R~S!!FEC(oFw<<<#Kik5S#ElS$Ly_XX=7tg7jh^=0*A6WzrgyA*>h`xssjak9zV<&pm|SL+{0 zLB@i!Gy@5HdFUyhp#kwa5-^lP}x zIaY;V0B5`5V1;h>K)F7%OpQqgRl?|IK*HMb%ny=KK8@MJsz|o{BKya0p`6_)#z*ak zwTxCE9dPbNeZ_%GYg>)gB68dXlZO_r)t6h000;TkWGa9teoOTr4@ov z|Gl!EEYa)p`|x~7L`4jLo{gP?Hvm=IjkgIE2{p1n!Iz0FVLrPoc^@(UJXi5fXAc)+RY#zZl<}FNcM(cGg)I6NVy;f7hpHxU3|SU- zB1yn_+*ak3nI8VVL*JA`fifm*G6@|$RIzsHLPjuFIUG8vcAzLR+sCUkck*j6$`_v-i!f<21gOF) z{9KpqYxG^X{t$sL^^3>L)}596p*vbNR!;nhN2`O8&21)v1Aim5^&Vkr4Y~$kjmmfZ z*r>%$oho#CFyl&=w;~{+iMMfeLbq#+pMcluG~%hll|}#=kAm#~#%Hsc2C^^nXYaM~ z=lC4+#408|Z1SMs-qNHp1ZWi1Fv!JiIDfR*U-=YmiOx}Wt#}6b_ zWp63p>%qek?ILTTpAXqT=Ur&YlY(p>GjiMJGpPdDAzXJ9d$2f4JRK36Q=5u?OoK}G z-3C|ba-;a(p)&lSY+%eN=+9_`PTrrtQ@lN0J50Y@1L=w^PP;!}lf=@vb6(iQf9()a zpd&yvh3>j^LG0;Nx%MeJ7Rx*$`Oa_*s7u^Fy9FZg?p(`XCeF7%#`icD=g=$c#!g}G z_u&PNK#UM38y1jBO0W&BsQbPd&bAz{Q}0yh+LXDmN!R#H7umo)sqS^sA+kdvM=e~= zBC3-i?4k%Drl_m=&)1^aujHLRSuhY3LWUp(1kH}!x-@y_y1I)CQD|IJ4Tt>A2>tuZ zk_F>Cyi4c`>P5c;*OHW<%(iyC!eQM_@N}L_4cX&1FrTyu8T!6TN^$hXFqiBZiau$h zOi{h+{&1#VZKoU)@3Q;p7#^Vf1RruQ>?{;zwwy6&@4S?f z_uWX|idzBtYp)mjhpuBdLjjR4CpTVN{2ltAe!G!I8+RaW^kGf6Upr6yiuNZ6yz19f zi>eCG3?SvzPA>K4!_nctK7063eY(vZrJ;B=>IAYp9k$XN`w&mDmuEiHJp#6&)Dzt-Zwmj`41n zGv1FaTG7s;YvYeOzUseOq9ctlf8TkL33;nzpYMII`;J^#O=c#Yk#S4=wg|vOe*WtP z6)OD?36sC>|D%KH{~FGDA%!LLYUb0M&E(Da>@yhlKRr+$%+zhTpNX?YfdUfG>CMvd ze0--6{=0TD!8t4bnBg0K1Ijhl11{IhcHzm|(G8jBSrT_UXRk=(#`Z0Twm6VhR7W7@ znWh$UZ1sm3OMJ#?fMfCbxAfzf?@dF?l!$JC4cC)9oUumPt%Q^BoaGWqm@NS|bl3e(pCpc+43P%3POW%R!NXGkAH|F?s5XT(K*ZTR+@b41JM2aE9y!Pe} z-@NO!#;@m5lEEo-Lrt?4>`q>yH*|p54Kc2k%XQLlPCn5pPAV6WQ6PkVX_%}vi|nCTDA!9H!j*Hiy| zk}du(GYIDR(%7%M2sSkI*|;&DrKF=;M4 zm#dQPyYYfe;f#ANPZf?SWml^%IS&iF&zf<0q17vi&T}))c6=M}>go)TUU^;xr8fM~6JkSNzAY!*y*NKe%c;CqMhAn9D(b@guh+MV#=$ zue_R+Dcqf1$=KH{3=?BU%u8-k4;as){;e^3DW%wZ?>`K_UiMaGAG6?X3}VT*a{jv(scHpiTY{+F%hMbyys-ywU*Oo-Vi*xEb;lPqrDvuTkh<*Zd`3s(*83NY{!Q zyT+u-=Y@tG8{8jjO;(stG7QoS?SDwZKUr1VOPv`rk4a97DDlTX7u!3eE@#;7a~fVfr$b47nhF*~D&=K_3GrT}7>2{&df!@5NV9K*JBLEg6* z+Y`yFX}ElP7ZjZbSvCWI2TmGn2^MS1kA1xB2*-)uS;t|UXJGwO!l{8-CSNX=2`hn5r8@Y%}n(HtQ#qo5kT z;2wfJ`#(iv=GIQsZj6Lz#&gGbEY@KNSYw@SkK6LB8{gl@W|($kf8%A)wXq5_C4Efj z7Wm;{O-dtg$iS8Drto4Q@APo-H&{lXgrzLgt5iTSwFRNi@%2g>Fh(Pt|Jw)zFhn_A zLhv$97%1ti#mY+-jq0`}-)23#Qep{MZTCgpM`7+ANhhqm;XTb;dWxC+@ZRFZC^<=1 zc-m#ar_@#F$%u32vr~)fvWRQ4+VB)6(Rju$@~_eTg^nRhJ$kIJ*JQE5w{^l!%-Im(BS}&DBO9u4{hM0ZtRHuI2^WVG^Og5zY4( zJxhO+w#nd!46xj`jvmixenW^OX%ncMkgUhExxQjYY+iAL3U2d7sVom%5BX!ExdDer zI*I=9M3M=V3gQh~K_r%R4?SuWf=W|-#d}|7)xV8m znUUfNl}YiHb~kQ^4j~#x1`jE#zZfXyT{t)++Gk!Dh;K7ztg0#f&bw6}%2%3p8_nZ-=^tS22~{k3P;isv^TNRSW=-sCFg2EQ6;#tU&aT z@@o_6S~U>a+CxY8^Md3SQHnoVuy7z$Tv5@{6ZTHa+^BCVo7SY^E6)!v_tKd^& zJJq8kzTTP9ILr@1?PC3<>2CGFgil*)y5l5@=x7iq0o1^U`uycLy2hspxaP%B7`{JS z!-lP&+3iWA0(+VARN8e3K6rJ&A&PG!&S4zZOlO)QyF~=}QHG2|o&r&@kp?&!2(So9 zZ+5HyVd3m}vD{r_guypvadRfX#36xl+x4Hdc1waJ+kx9vQ<=gHET2-=-jQ&6tOAgX zrEUmp3?KYcJ;fSVrTjl6at8lY@aK=A9v6&>7b$9^p~kQLlfxgjnDfGEzRpEFX;J~J z=!KXFs~wmt8jDBXy()EHMJ)pFYn(~8FBq^y?V}r~!th2{PZeiiLT!8jI2WjU>kIPz(7ievg(O!of7%T(8v3v_)eHF$V}H_H zllD-SH~2&Svs(%JFgj$AqH>Tkf!_6M52K4q?(*jH9#>vRvueo2Yc1EAT`*rw4eAR6f&P z;L|(A{Y>A`0jM9(an_%_=pXdt9uv|meX4_tB+0e;i0&%kXk*6Rb(3{b4$DQve>bmI zW8;qlie?Np3`HGrUcQ{o=Hg!?>7W0J6E@~C0|T*X??7u9zhbTZ30(vzw3TZ(VW(ou zU+Y)tDGt8N%#iQYDZD-jb!)_yk5ZQss)eOy*HVD9vEq%%e=`rCdAxiL_@GGtm}IoVSZEmd!J)$7PbF&lO+A=C!nHl_u_UhypQ7 z?9(uxxk=vwlHN@#528!hDFUKh4p#T`uG~l@b6<+zbw1VTXx+KI(X`9)rr+xYI;6)} zD3kGxn}zw`9onyA3frNNIwf zPhVG*dc-w<9L{h`;ehx>?;q0NFOQ2$vIw;s++BZwsu%k8Id4OA zR;p^~T2}9C{%xola&F*z*944fbQD8kJmMfFA3r{{3*I!1?c4hm`yIo7CwPwo-9nYt zv+0{)Uc-L_3y8o!+d)E|PCd_&m$+@0$Fc4^{VBdk4;%jeP>CY{%W3`N=!L-`OwK=~ zHeMaooO{$*ZnE~~jsScAEi0c*i%4aGGE-qNw!a*{z3&c3ll@xOv${~%U6p;FW&hOW zj9j_PG8doU)A9FLU1!)ueikmCJbvNLv6n4hTZc{`OnW~kimzT}W}g`1ft=w&WcPPx zsa}hg@;d6_4n{_nyN?#tU3WqHTFz>ylIRm+u`z||@2}Ce6D?plc8I3~n+Z%IQSgez zFugx8P-!K=zoS;NIjaOU^}hRs<@UYy8~p>?(f}`>m5!n-WkVH8b@#j1u^LwE*Y+~V z1AdDa_GfoW!c$?6`Lea$7OjEmK=rkQn$cQ4R&(0o_Zttr6dlg}sojmE7`~FxyFDsa z0|BS079EVaIj`}~?>&L~Dx3v8%8U#l0S~6a+ZR?KA@;^~FuwTkz#$wT<@f3p!{~sI zK@%;vY26iP%$7&c;*L?ST2{(=`rF#P?cha-4GK*k8#CY-+)@9)tisv|KCLtcrV|a`hchrXtMeCH` z3ufnXnq>U%2Ko@qdqJWxCyH?VmHX0Ca{f)0(l({ra{h+2UE9X6TD0W&Yt5J3|!oxn(#fFr-)&SS$9XXo4J`; zdxcYSkn76wvMTn87zmU?r#+d@e5HSj0dr2j_ni6l_T`N>;x1speMVY~+mN3LTUzrp zi}@c;BrDgLAORA5kO|B#e;)f0IImd?xPI4$W$Dv4H+Gid-@>|g>T$RTCl zfgwzbqTKQ{mfQGlc1V!2vy}o9{j)EIgoR$G6OB7hyuz`BP)$al#0(yePUE^e@Jp_r zntgwq_tpUU#kezqh~KP4372{`@B))IfCifx6S z`Ehizk%h?>37-z16kk~TcZf{%gD-t{AH+Jr=e3OBe_DqAj;d8$yR3;@@Cufo<};Q z_MA$8s5SzUACjYjy!qV)h*qN?+qSx#r;gF(j?=Qk0aT$weHKp;1f2;3+$L#)ZlO`K1s*74T-{u_ z`t2ln^QycxsMb^`4Y{R3xMa&k%kgR#Q$4XQNo*9=MVeKftPML-YNIs1Q4fWnxt*ET z!DK5D^LGvv$F)Ej&-BQT0joyi(bSCpMEP0VANO9>-`J7ID$Q#UZfJ18T_da;;KkDe zd7~PRRwq&>GJkYPrUUb}zJ!wi>pIVm-K9!ghA%w(p0;mnWf&39tR?$S^3?{{+(_qv zTfrk8uw*U8@XZWmeMRhJ^9Si*iQBpD>t5%hVPhg*&NS@(566lOr2VhqDO*rG4J|@i zq@YBReFqbtoxaXb1PF16CzhQ0bUBiv$K-cVZ~bk*19OYgRcYXdAjC+@DkhSEcIGG8 zbFFKYC<3;>4s~+A1zD|besFf(g~;5@kUpVRHr42}D6Y^f+6m4bcxc-H!+GQd)`iId zoEm~@Yz-Av0UcmFrig5CEyeE>6m3ehANkeZ?Z0)LK9Uqou=d!b^)Wg5LnAI)$U&Xb z*augn>Q?Hjc|Q0yZap8NA@r0|qDcj${8k zdgNOt(v6fJYUo7Sr}3Ncb-y$Y3NmLJT=3Y)YhgOI&?Mn4txq@SQ>#y(Kq|XU>*c^A z1iv%iaat&*^w!v;2oIXDxooL?zGXqh6kx^n9Al|9iD_uX*A*m5`q_`ueSkIJucBe_ zo76BWZ6|^XjU5sM3yxPcq=tO?2j$qlKpO+w8#I4$a_p{fd%d4ZlJuRKTgd+8N0{cD zf6468<$8l(ZF=l)v0E2N6}0N|vQ0G}_?|oI!s!__Ux`Qe{;A9xSHFJT?*-sQ&xJ53PB56G)2tO?R>l$~ zhyJI#Fheqk;y6^ehWInl(Dh17#^nIKuw^lWbFM66tQUv|2FVAuu@ou4u>w>+4^cwSwZ|o{%>H~_}7W85a(H^r2*}w zh|pbZtuJwwuyc}=y+E1tP;}CS=Xv}k#`!YqeRN5%HNL-|ekgemfBwyVR4a)^qK_aTa_3-+&z~x-<_FGG z-Zt&PbZ=An7F2*=Y??OT`iefWQ|3nFQ`|Q(PFLEj+%Ic5 zb(ZQ~yoP+<4e;G{T&|I^fd|kc`?z6rvKC0|0>Ou>;TXgoZ$02k-p|U1Oju6@7jl$e z?=r(SgE0Y|JV;tJ;HX!^fC%qOG!J4yf&yI5r#6RO*NBbXHF#2$+Mf&YHRwdAgM9fx zy$&dI!`KAbr~CNsOM3sQv=Lh^$Dj@y4s~pSat8q5J^mM#uGkeFEu_e$>}eNv) zQ-9*a*^iGb7(2n^VG)iV2bVff0;1BAzZHTFe09mQmdi4A6P1^TK;H#Fy6&KdR%O$! zJA6-gGtUmQNZitu5ahk_u;TNcgvJf<^S^@+gt+9Nfn_lzg&`!AH1s|_Vd=w(U=wW9Ilg>+F zY4w?M3OtTK?$G4)5{LGBb;g_d9lZcTohg6Haf%Ew_v{|GNC7WQ;^Jgl&Wu!h{uLN* z-xVDFvDBmd{&}fNk=Xukq{6VF23lMQH=gpjE_%bxeA?ILHj3?rgbKjWZw4%jW?&16 zmgiyYAxcy3;KtX0a3zSvX#~n9Pj8K%5|IucrR9<7X7W59WlDqY*&zz&&YmxNK^b(P zz#Cg()KB*>>iRcq(5R|$wphGq_jK}e-&g!hzTo0k&J?i48X;^!hdCrYfy4_Sf=DAr zOsW6A`+1L@Lc-!P%L0S%3Qfr4fxW8^WSFZW zmkrM$zEb{Er#2YR7|NVh7uw5@R3}lqyjH=dQIbDohjdy0XjmGsd!!_Z=l8^jlHRVQ z({Re2`CwVk!y`k+V`oXKNWVehw(`TcFe(k0bf7``y`-WdY=wr8y=KQ0Yg@4*Q~KNmTAC| ztg1{kx~C4ZnNGzgiI0m3bWg5-`JJk7_*i>|r|GzOF%Ey3E*5D%dG&q7sA7HpMxUY= ziet=R;VO&y7w1%&*9y@j#b$6}tQp!bHLlL43SX8hV~)P{kDVof2)jM5)HuxUEzt|M zG)b5c_m8&dabkYSS^RL2`ZFwCAvSB7HtST@H-jYgx8%=@`*Tffk4R+|Hf(w>fjrai z_$FjP9^o8fd*1v$)cfWej4ww1x-?roqDp-DpZC&e;evr>aPW1bew{%KiAsjZ%n*j?+@+h@1(k0>uiv9ql*KfE^W@29o4 zgt8>N1LN+~4HL;H@|rb(u(=E9{*)Dv%!=~K&xSLN&JS;sY(jLtkc^Og~=;W(TH;I-{^OolZB13#}Es# z#S>z{E8r$9B5r2X3mv|ALyG~OW2HH1m&;DE=CMCYIW7}~*B{MsliSA=At*LS(;tw# zW~8fq@a`3bMSI2Uf3-bI(cC;f%#u4F?ycvWW_bJIYR<`!ddJ!R74@^ZIkY02kpIyC zZQ4^uwFaI(^1K&-C%~jApT5iLN&lVLkT}o9g@AKm0Z&y$G*c@=!tfo=NlY<&*J7$~ zx8b|xh4o;YXU;H{efBDID}8Wqg1-wa9)T#v7hdYNZqJbbGVNo*PG%(3R6>( z9MN&K013XM=9;*L0GxG-Qg8mYJPv9;z1d&Q`6`R_*v;O-jRA{f!7`fmj5e!zD}y=$ z+|HxaPUOi!qRq|+GNY*+1Qe~mlB{8?m7i9b6nBFvy2HFdKAM>Z0i1@ zL0*|p+C2YuN^STl+4A*uxt0ALw>@+3@jK98>#cT_)!^V)jvF-3ZD*y3R;*MOJa9KU z{QsD`l9P>&Iw{FW*^(S#dmlv8O?%!y+lMVbBWE#gkVJ8O^J6 zQ|X99=h!06+8+IURVHCL91~$!(AYc^f2EkFM83XCDy-pKUGAM>8h5ydjcrhQ_N<%u zK(0`}2x^o2cWXu+gcnx8YVLjYS+zQw(@=wasjPLd#tg3~nUCH{YhLf*HF~pvm+}P2N#;!!C#;WO`9y;pxaPg300tNmd$X z5S3X&qn#5hY-N)OJqnpRPT0Qust$@cPw9?+Ut^A7SO0V{i=ZAHie>|lycc*`@$Y4J ze~m>(HQ=`{0(Q-r{ytV{Q}&;x1&`SN%IjFK9C0J-Y|HoUPdb1xyV&aa`=GL*93sIE z3bUCZqv!uRZqS0*|E9ksO_R|Wj=w)ek0wZRJ$r+&fLzWc3s4fVQY8OW)D7AiN2xX+ zX|p@LPW=7d>hlaFp?g?mCS1>Ta`MXy{pj1`S1{A{Lr6dq5-K~P6-Qy~Kf3hFy+#gY zPvyya`%EbriO}>sVkPODF7(PVF=Ge}Q>V4?W81d$A?4 zTA)hL8Fx|%zkKH~sthiU7ViI9;g>mcANo8{I^7M(dX(TcD<^mmBDD(-cCfPI2?*s? zkja6dc|E_S8Rn{ElYj_ZNSyzGy1p4GGI2aVtE|mOmZ|=`s(dw+F@pNV(}j57c1o|4 z@?U3Fevf1$0Df{4(JQtr&o7gz$HDPy4V~4Lu^4LnB%J8Ov7npSx#Jsbolpr)Dh%J+ ztOIc0MvjFgAmu!2Wz$hAU78I@$Qz$s>$`5(eUo-Y&R-rb-za(1KNq-0cu`-2(&_h= z5SV&qM~mtclV~K1Rzr*nT-b&<&$S-V@c19zNdirT9|s*&$sO8o^LkiL+fxKevETi> zo&*})n~$e#K38J!Uj1yfn;!w@F+{=l&+5rtV2PO{!4G(nVfTm8qiofF@h5R!Nbv4! z*?2UOOTyQ^D8u9~ws6XH61U2y3c<`-@C3g$=&S zrwevP34QbnS0K%Zk;J@vN!J>z=17!r>#h=F$LN^B+P?7X;_*hhoJ0gWD3!alW?o6+ zV7YigZAHgr%#-u?&itvUhNR&EwOr)aE#IX%qwx_efID6wMGisJ@6kZx=uw1ooNO9J z_WttN%$4Z=12>AbgtAGn^u2iW7QC~Ilp>8mHwB0%D{}BkN03<#Y3m9x-0^w(4DRq4 z$#_V;n)kcQvMH;>kapUZ{Y;*prNT#2T!g3O4CnbVrs${I+^eN!jGmR3j0F*9sVUb@ z8`~7N7Rh_o7g&fa1CM#Kh^ ziJ-?b!^z4b!9WMW5nXlVXA){D5B!B=?FafI;r`%IT7`+!yxFJVgAtuSyXWKo?BXenMOtEgNn^nG%uAYQ&GF`*4AM>7ne=;qU z42wS-M|=Y<)+zOJfmw;`?Nq`6M#rKsN0jU-6=GZw#gjwn5=8R`npsI4O?dsAa^<|2 zKifZF?V(D{nv1AIt&r`!V&DPRe~L@RztvwlR+|OjrLK@?qP+IDA9LBv4t4d%Ju3}b zNZtK!Ma%}Qt5?TU&4A1+CH0`F1g*AuAB&c17zhq}Z>P>!gNGQ9U%_?#k&k6qIc2rl z@>tK6jaJFP4%b4Bn<|6HD0ri{j*GgUthM69_`YfRv>C5*L0^X)a`Kt*=2FJ-*CY|g zHiUBYe@8dKEz%vG?_9iDY6W^-&0_N~y@~c)#I+3L~d%{6>!<*lm{~o!ibKl2Q zL=J_-mJ=vrQr?Y+1!hFD)mRzFc(I8pBmiN-jf*YH zc&J;ZJAyuy9xz-CkNHu?d}F$@;tw>7wCd6sj;cUnMhwrXPyDcsb=-!wSzFpDE}HR3 zXDNh7&aHnHEA@UAG%+uP3+`-1@M>>~ukB^i%z*A5ck`>ay5J1sA5>LV+%%tEomL`U zM>~s{9OVTnx>1HF+#}?dAe`?=&0ko5b8*v?5nZeYPgTB09KI7aMvGXZSNeojw2DGbB`aUc;RRql~ zawqNYJQOc|u?zK|9=Oq))WM~TC5nspmEN*P$I5D+rYGt?%9D#6Ibz+bJ8~yo(NY)7 z!S|$O$3Y%;GXS4J|=c0DPrUFdruRXaV+5MbdMc& z*qy}#a~GmY#&uz(Y+Ta)cdhi^C&v8)jIUsb*o|-^ZT^?ZvAcFfJ1?A%4=?d(YqkIs&*V5E?Iip zb>5DLeVV6Le!6=VfDzvcks=otqt2Vlw6&rwm>wpMhb?)95*^$5-t1#hH4x*v;9Unk zt^i%b@!{ZE{^eb?2;t{bA-ydN4>R|TAH}t@Er1F5YDyCA^U;Ne6bmQu&Bv)D2C%>i@~IYfbeZ3til>?=d5oYQoPa^~lw%aT0oV7pxYY50qLLK71uV`(<+u zK7M%EEV306>8vI^Aeu56?*+|2E+&0M$vZIJB}3z^u*_L>=lUjPwTgsCKDZ|l0g$UY ze}3p@cw?c$skSJs?dBPnHZHy1t>ouinF=!Kup?b9;%1`d#%Iz|{adzszIBb}a(65> z545$T9>3SH7g5T^S{XAGbI%py2^kIEYnvzSiMw{Y0}5tm^oGkQvK?(Uw5y!)6`%ok z|DyBJ`B&YN4lCE?t+&qOsv}Sy+cdTj+NI?M>Hc-n;|XrN7RO5tKu~rZR|WdCBADT| zaRR9Ou0s>_`RKUiGO{#7Lu>`;-NuA-8bI+AnvhrSd9CI7Q0TUe*OfIYVg4!i4 z=>9$+Wsu1(f&ZBA`#eSf*j~H5o!%H*z7zOxA>I1quiu%w=`LY$fALNrifV?B9opc( zc=O}XGAqgZULW$KyY|4MWy#CnFL9-uqM$CcY86Q(7Zv{VEfk)ke>Mi>TI$lJf~}1$ zk$*qha7%v>D4P_HJVLtX?ILT%d{-t##?f#;j00bx3Hu^C_A9EmMWc0KbPt&fxq5N_%dyddC*&`wXrEu&4lq+8@z_s5*RB?%IttT~Ai|bXbPq|N7A{ZnPK#v0p^6}{{U<}5}|J;auyvV{P7GFWb zBrL0(hl0(%`+2-mU2_bFbjlCs50Nfj;>m!=7P&4A&dV1o9_DB}+r6JJQ3QQHdQr`4 zoS{V<)1LMFiOQ1B{_n;|p%W=@Dp{*SV#lVX1+=G!G)9?fL$3D*ngECn=Q>Ze4~Yo% z$|1_J(yyOe!TP>ZN%6KW@ck-ZM>D<>1?+9&eoUtSTDMZJCY|P6c9Ktr2dYXSTyuhB zKtL~)Pc6te7I%Jt%d}jgG;4#WpZ~N{-YitO9MSY+!O2nH!K73))BX6LaH-OH+UjUB zOQJ0)io1booo#8=dR>8JNpeCQV3JHc4Oq-jK~2zeAxrq>uhLpRnsS{b|0V3s{ylGE zSj_R2dn9Csstb(dq;dvmMS1W}&f3F_^plY1n!{=>(4#vl{-@UA?Ap&!6j+-IT} ztN&BgwTCmIu<^bwx|xG1w;?2nFvMuAOm20C2m$ zVVPJZ3Ui%dV&a(FTxL7YIp6udzW1N^c`v``dH;HU&+|U-^ZXpqa@@+^Jh*)f5%ZD+ zI5?v;APa)!lfR{CJ69w@7tK7oowJB1pQGe^jq)SzS5QMW1Xy=~v}e2IS)qipT9!Hi z>PIFA;j7nO3yeOs;HGeqwfE|GzGUR%I-v#$g#0t8RQ#b2Ds!X%1|%lPFI&^}!QtYx zwVxmAuJ2Ol8=%`BpcU(bvQZk-N94)rVq)-IVp}@CcFoULimwO;qfMo%%kvknldm@s zrjS{qwnU+eZonIzWL?<~yOH`T(>9L%IpP5<^$ro*FW#zM6k4ubO~W01>sjxuR?0c? ze*Nhs6!E$;(y7SVcJSV5S7>GYTm1~q^_jU8Ofv~$sidxJdwQ~T46ILg@! z{MGq)(nN;dbLK~Qd(v1>ZBs9M(WgECJIUzz^C6pUbj8$)k9yy9{6}{7z+en-Be%0a zi|XqfW3wHd_72YiNyzpZhW?hFp#<-W9e(_jOC#J!()@BTJP~D?X(Jt@-gg6IVlEhd z_HDLHra7@xd?-?FZSOQLneR~hqUA>DvD2ZwS0d%LIW73?vSyPZb0E<@NuY!?ao%2- z*&v&F^w7tWgHYj(m?Q{S6`4lu9v+`ClzoB5LwoRW660>dj+$%(r)g=(G@}!R_tu)c zjM8bEJel(|iD7%091$e6cDKeBih86u-YKI12N1`eFxTt*9au_1{7Sa&6uH|G7}IeB zQc+|B4UX+)%{>fO&l3LSyO2STGH%sHZ?e?m??uX0sTRQv_+wETvxUde^&RS!8lAPq zP0qEe@eH_Kv?oOZ>SS_dD9y~U&c}_#ZY6&O0t@as2*MzhNX7O{p>*%bavuuTc)Ain z(fyc_;u7`h)UZFw#7RqMj}^%dIVIAHvSxfCXzfg0BFqdKEU^1b$UM2{Eku_idd8XM z`sBgq1*~FBOMo#<6!acx6yH9V zD)k+AS@vj=8k5hx*^oFrA+(`ly^3H^jM)}dQjlGQ21d` zIurLhe`|Y3WWkc%RY{v(iSJ-Wyy<4Ro308)6QpY@%~h`?&0}Zo)70&`=!-DneK*6S zByFpH856Pnnk5$g2gJ4nkop1K1kOC!rQasYLM70Dd?iYhu$8j8$V|BjCZu-p-Qju< z8map(8h?-+oR+BqgVplrwn#s{R}vDaey%3%mvkp(*TQI7K4gu}#vyQJ*Ni~?{*`gE z`fK;KmP-xoOgV+Tq7XaLJTL57<2$Vo;Xy6@lpa0gN)`p{;HTWNCv^#RN*3_sVHz!F zbU2N2W@9l0Y~*yR?*yG#*$fz!4dEqsi%wgrW+;LM*$jNT5_NlD{=Or*Q2|n7f*!^T z3>TKU)_g4%pVVF4&;Z%_UqrK-m5@$XWUw65wxfvXi`NaAeZc8W(lsz zk({+oNI*mUu6vOTpbRi20bzM-u{h= zF7v;+fZ4vJ*c|0%5m&rioV;sCsmSB|>)(Y>5i3hBG9Nxl4|~VaQ;yf&+SmfDm-^zt z_cIT-IRN*)Jc@%FebPJLArV&(Ab(a?4-7evPFV$cZN(*VaDF+U&2NFpv@3 zzn?b}+WoY=Tze_K)oXo63Ab z8?|~hJk?Jp{mmwSwFI6_dlv9NK%(NI$r*(o;E8Sj50Bj?R59|nbFKf5YX7bsv%7$> JB3XLH{Q)^4eU$(J literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/sequence-parallel.png b/model/train/yoco_moe/sources/images/sequence-parallel.png new file mode 100644 index 0000000000000000000000000000000000000000..fdf5c6e9b0e815a3fea953a904ef190a7143cad7 GIT binary patch literal 50795 zcma%iQ*b72v~DKmgcIAD7!%tQ+r}5$wv#Ut+qP}nwrxAv`Om3c=l0xmb$4}F_q*QZ z=UHKLGGYj@*svfVAP9fOg%vf_u$u<2v!^Jag;o)oR z$*n$#c4V!l4l0F|^am3m>3IiLyaHvs;7QX(m1AGiR^zmwq!|?&<@68QGGjDY<0*br zP*PGNA`iqNU{gcEQA-ye=~^tA8>zh|l-F(ep7;~T6Sqr-!xYDp7EwX`evBBt+y6cM z@MW5Y|9{u;l`#>31SkQDq}u|vsVB$cjwWvSoE_8SM@^W}=c+b~)8(4B!jCRIN?VR( zc5AfM$~x~kR*f~!3`8KX;k3e%+FR@9T17Zx*Py9BG5g-v|PnDrZQ>2zol>zHw5;*tSM-*JVy~@Gh~ud z>KUgDMAdSEB&LRFrJx#;vYS>oQ%h^(jY2Ck7E2YlxWTAiJ(g;xH*51VoViig4Twkr zo^(?w!->}kaG{-gCgooRN~(!Fh~Vli&W5W`Am1F`Po56fLOZz82Dj6~380Cc1v~veBJGhw~vjC&MG&oqn6)MtD&s0|= zJE$kO($JKc;qf?=;i=xH&rECe$?~GGUCV6tjtRo6Rs%^}STM3tI?7b~TOw@G!1I1& zwSI{O6Fw40Sz6yOuJu)v)n+Y0W$>ihp?^6BnrOM*3IjoU!pvAAoI4UrU4$KFXU-`t z%qZE0q>P%1o;J5(Vkq@NbG9=?W~JRl7jMmFA>Ye}F)f9~n}xj2&z3TLz+hCoi0xR1 zLjpR|SW|Pk(TYBe%}PQuuw4hyN))jsd2m#D&D7jn!W+|fXF>ne5yv|#t6+>R+hkhs zDF{JH={L(FTeYX#>%MBB|J3o|fqkv0;GVM;Zs=`In^eUO1Q!iG(&po8G*l!q+@(`QX#z*ImemwtN! ziJjnjl3$vZP@S=4OetP#_yt3w+m>KUny4TOEl{#FP+V%yUu(Hgc;Bh|OB8{+V0kZh z0Yh)1Pl6&;WW}-mWB%ub7!job4Gc^t?@_g1}wI`38%K0?!pU?5}VtC znd-EO^NkuT-vf(AB;erjYUi&3d{KC6(IqEdjSj4evDSi8Si_K{Ju2JT`CY?}#SJsh zWm-U^4Sw)UcCC{tZ36M_ePNl!;#XVBlGZNTp&eb0$4Y;K>`kDYQiP$LWDa^ONez z`x=JJ<*IM*fywRE2*~@W=BRx;Qd+^%ly5x^QB7}|TdOcFx7A%##!L;XAwWi+7gLeu zQs*|t+6vqBUi#`i2F2HDik zEiEpf7{1_Zu?! zA@~YQH*a3&tDONK4Gqii494%a=(rxF&Sh@dD>zJ{qK2Ne0G_f%OFbKD+8(`s3N}A7 z1)(lMp-N*_S?@4cO}7}#Qbh?db4nIM=#lh4XXYrWX^n?A-P<<>2cxioGp6FMiN@Y} z3kdn~!nxRfnC(sHiis8Pk-4)>zsrJm9$n?pj49osetNiGh!(1HAwIw8P>Io3S+j4$ z-X7D7`}+}Uum~ELr2US@QihY$*%b*DK#^b#B`nAkhV)?th>KN7efZf?_5u=c z7?a1f93MxdQW>xVN`K;J1;F>nB@Ymk@2{wCnH<&(Fs#M z#q{bahgA>Fcl?gHHt^7E1LQG$62Oy50YPD#LXm_?I#C&ws^0|29T! zW_@~ikDi7c4c=jVoC<7W(N}NmQTzSzQw)o%JrWBw#E^AB_MD&BmnWLLHMd`rzh-FQ zQhUwJ%*;Sl8^9Pur^?==w#bNjZ{WUF%-{Zq*OdtD|2S!TtZ>F8!H!ZmtYK{F1x}=E zy3DS9eS|J#SzXRCLk9FBsA|V5^2W{_B3^IK{z7Y!s;{r#PsrxJW#>O`oow82IviC&|n29w7R-#a8J$!Nd(k15E@jpvX>QWOWSB>?Q~m;_v_g z7D+HuqN*|pc^e*Qu9pvp?3eKlaz>x7gGYF72geMeo+&YE89JAq*IDn|uaa0g%9%K; zg1Kg}`GECUJr*MTfSe?)eLMC&hf-XpOBitC zi_xIS4itI3hMoId=#8@=oHmKcg$BEFe9h(|EZ)7D-a1Ag{tbO>+QGtx{z&k%%qRwG9V0A%PzNoyB!f zT+k90l~vhY(rBQPd%x+5y_CxmSubn_kHeEN8y@bx&*BnlGW07Fn=cGwORM*uY7a3& z*v5*N4_3YqkCB)P^?JJI{2*pXPxLn3JQ}RDtn8pAY3neRKGQ{QeWvh?v+XoBrS5v) z4?q?z5ImJ&j}|4W{x$-Y&eQfUEKf|Y?w(P*)7O@Lurrv|`t8m~&2~@(4GsN~ppPSX zZBL*9qt@loua3%DH<#~?Ycw+A^>id)Pi?idQy_j>+#ohlNV8g5lA;{zWb`FBH>;{qBn~YHv zqf-++MBiWpDaTVCaM7=(C-1R=!p&bhMJe31Xbai_gb-bGlT&^>^3==NP z9}QU~qOpf$#*!B7Z+uk}qTK5<{6uI1;JZ0S5TUKnfCQA<-@aqDe@>!^swC~%b3&$8 zfC&mI!nu6j*iANHM^KI;o%nM9#Nt&A)8n)*ocuvU4wi~OhEO8}o6#B!_`Dl@6Jp{m zDwr4wU$98ww(iePqPW>O);nT?!8^x?o)S9dd4qtu`4lQY0S9dw^XoIv)7_YsMta{u zRF&uOyw^`blzw!R3#0owylJV$J@}EW$E3bIv)@Mmy<3e)dp0vF%ciFQ;hdHtOdR~} z!l}VQ14*&xpo8)_a$!^=upWFXvQ~1;?06q|e@}!XB}^v_~R3P?bxp}d$ZjWGGayr8r%VOg& zzO2s=<7;-|1nnub@^G8N5M)b3OwC+9-MdFNR!YD)@NJVhudp(@7{pVH0?Y%sSvvv> zPr!4LMO6}*++AOe21RD(clUI&tSCDXdPWb^_I7M6 z4M;N(mHs&n+2lCjpwbaFW*;W=mTQq?=7J{|t;v+=VJE_Yqx~n$H)wdgX4pR=-438# z_iKFaCa$&witf6+QX(|TFW)wzUshG=aM>Xp-~-GFb74UJrupk;DQI47$$p6hH3zGe zE0ysLk2=UwFty)io2?{PZU;`J*>@{bEo?n3C+(IGxAkXvDI)gLxcO~_*z~gg}x#sOV!=Th3 zmS58HE9vdJSx%LBrPB(IHf`y;!$C$;ZO+3)F?wo;J;Cbj^m*@Y%69QURe!Qa)VkQP zIP%ath|a0U`HJdPC{g_$4p%K`06bz#9GM`rBdI%3 zY5|tG$m)8wjlf~X&Y|Sm{S_T|9XX4%CUrdB>s%`H?V9BG*iXcqIn+QTyf?~`9-}FY zunceOw+Fdyb4JGU@v-oQGY!GUb3ar~Cg?aZ<1N|K``2`*+T+iWQD0#N_49(x9OGe_XE4!OkMR5~C`ha> zM3XJq#?dV=Bt3@X8Q>w-R3pf1F>AYA9bg>u|M|7V{TXLx8&egs8{Jj<7ALyDjj%7OQ_+cS(luj%ft5 zofAz2fH*%r2JtldX&b9p3(M8^gu=r_KLAOzKqM};*hh`tebF}X3OmueIaSl=1Igw6 zQs4V2WGK`On{D%6lUJjrk>Ko-AT<7Q^w0|({GHeNe7#SOy%sCNjD>|ozfMg7{h$Z< zxvTh(t1Jk~;9uEbi*O|^4~Hmlqo8G{+5=k<+4k|Qb`NM(~5sa8FJ|w{9w_;u}eeqX~wpGUDuc8nSf#ud%KI1}w z=ePuRnr4`AuiE&ELuo%tMsT4@Zq2&dTR8w#rzt{f{~dFDf@?WpoYf~ZkyUr{_*aXz z+Bm?Tc-Ma2nRrKKOvHY%5S`#t&sruATj^KTS`ecocMOvM2&yQP*?MUDJliuNbPy;D z8!3totQ%4v1uq~SB4`=!$f88r+rS##*fPD#(Fhsh~dO~rqWk1TvKd2^4aPYLo%YMb< zbCrKhjW4c_ePfoHt(VyrN}!N{inM_wAO{(8gu}n6cnc!VVOo=xJ?YySlQXx?8`zl9 z0GE|zRkxPbcTaL)1vv&-SC^%Ai)q)=qSx?SUm}u&$j?-jh0+;MhYD;&1#sa8Ie^kuZKW5U+q_KA^{U0A>b2OZnFu|V3^(2`ASj2?U7 ztg%#3uRb{tXG*B6wN2MeJh-YNfv>7HqtVO>N?P*RU*yWi4|R$==@Z3(LltO1EI3Bh z(&A8l95%y|6NhJ4)}5L9!F~G-$Nh3oEu71k`MQJ?Nx+=WAx5w=_243S%DJP!H&KM| zxrT*3(Vuy?0-8`Z-uV$Ft?8_ATSBda6J(ar6m9y_L!ChOP;k@X#@2q)x=-Q4-T%?m zi8Y5ba=U@iUys=XBh3+G1uP&trhfZMPcJlUhJ$VSg5t|rPD^EZ)U9eC-kaiCxZ)-i z`fetTOXI*wXZy@{-JV_J40s8)+f}un)5z%9!b69}>(wMv(cT!aB23Ibhu{D)*TDxXK4y2A54_Uv=-ALBG>RqO48oI6|7e8O|L>orSC zHm0siZ_^hthWvz(VZ(C`7%*$pEAX-;cU8#Jj7`brc;=tevs5Db_UQD!wM>fBfM!6% zdeij{tkd&iqWx{_0Bk}7zdw$z6?cWg{-`%YNS{ntCO-93K@YObAYL*Q_dx*4$fF7W z)5?W%1sA#S}yc!*|)zT3KVRecF?|xTumy_w^4;@uxKR zh1|f=s{Q??2Z5r-X@R>Mho;N@lFgR6RM>OP7xh@Ox4%(*G&Vx&cM357;2R0%iNxi;YZ^H_x;%x zB1=R7wvZ6#BNS=5MqSvJ&z?!Lv}mK{T3=0Fk^NIu*Hq9^dgxtsiz(|b1qpE2gT7!! zlkxuVSg87Z0VaFhQKcQ2TWrKbK8wgiW?dP}Scm#@s(5Q0IunWnK| zD2wV2{`=)B$N(lIlq2t2`#*1gFfwyv{Qw07L^T>TTJD!Y={}=a^{-x(;r?SMy+-3h zbB}>jPUA5HmVonP^GOcFAM|`4^kbXu@hwBRBRoEyu`6(NM*~S1-+S^AJQ(br=+*8n zZp31t>Xjdl^#vNH7wcWhC)vUC5mmR)Q5{!<>28=9r-oug+5d@SBjx#zQyVe9YQJma zrm@PKPxp1v>HYL`tIlD{+WBImuj;Zb0t z!&SRtO8Ye?kaG|GP}i2TZc=Obfv*!;8h=xzPD{@;`RC6cEZP((5jP9eQ%7e@TK%gJ zYnaTX=GiN!Y~UZ>jaO@64n>G8)E&M2=Y@y^bUHScxffT_a?cq!ifBJ7DuLZ;HYo0A zR`siNz4u7!82Ik*$TR?E*^e9K!6;k=Q0I%4zKf@=0o|_`%qGvLA*`%1GMknr=tHJM z$}x=a@VCOW&~gotzOPU4B?Pu@_C5IYqjbqRdY`bbge0m?E(hqmExX?I#(DF34xrhGERsYxR&Eo? zl^2YGMpxubG-l{FVzM91jpO7K&o6mdo0CO>hO}Kh7wWNN&|^2U{=*S!!}mE=c2#FS zA?p1yXJfmbe&V@ThH+Qa^%3!uSdMAB_=+VY#CVH_&cNV#`1?qTEmV{dh!D5!Ug+J4-Q9c%1dYY;^zAct_8@e<|yi-I?>lMSzIV?O>zH_27c-pTcfE7c_ot z_uBm;4h;z_H9>3t*ZtfuKge1bg~IJq*Ph3-zQ$!I6cS~Damc?h$9u<1#V1K4xvN{V zsb|**@=f=vynt=XVj)2Nu<3&av6ii#s5K>SVL`<&-RY_1oxJAPSbBq|)qerX$Wy+4 z+xP#}fJSM+uK`xvBy`;H)7QY~4RfxR&(sL^t8 zarL#4pVT{#6v=Sq+bjna%sW$#4^a6i)S8VYaKZn{rE7r60zD>!3#Q|fGv?v!=5tp? zm>5yAN44@)u=0x*9%ZC>hs5nTOw83~1x2M?eE?=?5-hv05hmWk*in=%L1RLla`%(38>=Bg7%mFRV^aq}Q zZ1))Nd<~?fL)_)(=|D1$U@hxWXw@_*PdNz~3Qs;!019aAqEG$yXD7x$%8PaDXMe41 z;@veLL0(A#L_kmX{fJ`A&e)P;dN?kMHo3r{uMn<~w`n{H>1S7yLDz85u=~c6l>65jgcylVL_{JTl>hkB8^An zhn7MmmwPYbt9hDNWRy;cP1z(mR;~C;@(l1jenr}uATq!qIoAARB&6Wm5}3QBP^a}Y znZVV&_;|?xxTkDnJqq6PO1dC!B@gUW4ylAxh8ZN87``HSjhnLa=HvUgxNwDqYq`8F zTW+?3bK!NRPp!QBIQujizE5oA3;sG#Nl$7;E)*Ja`k9b-DYMV-=zzABN6Su64xTl$CuOK{;|1Z})4kPr0nh49TlcqWW~SK`zHGkP>NpZ4Qah+l)ZM&)+E)4*-Lf=3fnq79x1ON7(O9zTg4R`{bppO!m+kEvIyLfV%SH zqJ2_$kqM|5L|6mm8fEwfUt3)#$ul_TTcL!W1$zJv-e{k%|1$zEaaa2tun4&fr(j5hMN0? zA0iL)%*`7$f8Q>}0FOlgBD?W&s_|4tn1yWHF=<_jLJKA3#DWc@bQ#&lN~?cnaYCV! zB&T@o#+?4tKTuh8S`g>#(9e%JX97^ZvNS_0B9PWFgs2p%>4vj$ovxR3en30&Z{MKX zjX&qu)$Ru^ZJ9jIZ(gUaBD$x^I7(vP#jrBT5{6yQnROKXxZ7&eAtU@bJ^D|8lEC** zH@7it_=M;jZBsTIo8sfT1Jxf~Bwh7j^TxcbhKIM86B`?owMjfeJ*5Y6Z|6Nk%11O~ znl1UcAL@LyH1^u!1G)lwm^Q+QrsrLH^qFsx)wmX5FrOH!&|P@_FML62N5;#R4=bE= z6YnywYGoxvt!Aj^%F07SeoMN2-a8_gAqah6w`0%vUhs40>dUe`zp$XQLj);hD2V@x zKSZ&`WDL+4DOH8P6^Sg<(-Ph{5>Pp5={l2~XYjh6Jwg|2d){~0ml?l#+fs=t_-|d2 zU?Y$O{>T>wBrw*Fb)8gaoOe6MCoCs2^gMwjA89bi);dQX``dURH$FZ}DOh??a~(^; z|0e9Qz|g=V1n)PH_s=1`Sc+UauM(l;c_=vdNczijk^*m$9Sf^-mq;HsY zJoDCKa}B)+pY_xP+erc|&Wkj1Pmu{KbD>{dmWwW{W-#;@5;&Z6JKiyAc{%bqw(S{W zK}cKZ?Qr=Fy_?inmbPN5{P)nW>0jB^fKVct4k5GZbRn~81jyg?!&@Q`0GS^Ln)A?!95kiQKy+(=*)N73rch7$LBVSlcZia zQrUDGm=wq$TjRU>(h(_f56HL2(~J&)Pg8B z%ZCkbqT(`}UhU)2WoxQEg}G~I{3>PLs@>$K42i zO6cg2*bssq?n0byGk=&laMYLy{ZpIr0XHk6!x8jA7H5sy^r&=MDC}HT=kLLA}p=;w&K5hM` zRZJdF(jr|u#v*#oly4qK^?6+MOP7rgfuReGuZXjH3QUe%s2m}`-&&k$k!4!?c;sr= zK5;v7=&Ah8do^eugJQ2Ml8>BCII`BMY*rU>gs0@|r`|Nz!7(9*2Ud)5Qy?Rd(Gc6L z=W9vjabn9)r_7LJ%gc+EP~^HmwvUClAprfks(PkN5a87{@#+}eD9LiMwc$|O_B?SKI# zuNr4_mtm@5_tggKp_mhHX4Hy85;z;9%!wXFCT%OAeZjlA z{D5_o&P^{A(Jg61Be3LD`QEFJ83mShLzFqg^@*?Pe6FwLB(iFdQ1d_>+nuE`?b6Q> z$)=yGPt?jGgd5+>C|iAnIj<+;tnVJ^^Q60bwQ|AD)v9u zyd{+)YG3Ob)?Yga9NtAtW>25??2WQ;N*q*j%~pcG5#)o1ClxuBbN$bAs$z3@E~8b( z(QILx8OnDR@z}@S%Vwwk`qMk#swbc73oNMBZ*r7rNk(&%m5wXj-IxwM{qs3X5@hnnEID z=N_%C<6U{Py3dr;P;B|FR>C)01nL>#JjKa4L2D{$R=!%ThhdYVwTOo%tC zN^94-lhw{XYDp}%+Hi4Fo|fHjLXYdmhOw5%i<^@2qfPFx)I*VAwi$Nn^f$Oc<6*;S zU6qtFG%2E7rzJl>f%fxrDP%D_hs}H%cM6F>2fgsorj2o`G5aD+KSf_4c|oQ@)Unab z6KHzFdr}=$NQ?%5eHt;vY_=LBmF{j-ijpbZPOR4BzSR*d?ZXCe5C zpd#5svu3h^WS$*ipu&KUM>%mQ>wFW`x!SI{BhDSlEmH027x~b}k9n zKKlV{b7O$3-|KAs3KVpH56#o}p^eybO)zDDW86(lM3>QZrIFlp57?98{AF?b2MTLT zh{1SPtx)RJaZNZ6JFV&5=ncrj)0G|x=bO6Grv5%?;hVWgcCjEzj6BLC!>f7ogY}IV z7M8~@&Kgl!4P%9iyr^b2^bC^C@2h_1x=(|g1voA6qg01Dz{T3I9^}Q^@}gFJ2lAg1*FPXd(gVcW|EQH zSj(75dIl5$V>ha)jz`|33?_Pg^$YL)lG5-Q({D--3Q$z7_MIeB6qVKDsVRkme*N2c z4_=m%43rik8DU!Ei%Y{707nl`Ne;K~DVQk6ZE$jn&cZN0NWDr{y)O_L<_&&>)Q>p} zqPE&cL}@IjFlWK>v8x{w?{86Lfg~$c?WX1rcBj1@?XQi7S9FgCLLOsg3>NzRq}TlLPI*9HO?1P+W=EUtWChPLWwf26aX{Eu?BD{zMOM@nO$hg4z4H7Mf$^HF#oQ(PB;Uf&Gt#=Mc z1#j?GiORcd<8Dg>+CZM8`b_OjipTw=Jvfw!D4U?82>f2m@5d|j-PX+r1MrKl0n-;R zu*s^{81FERb&=~(rJY<0PG*YeowB5i1uG{72^~)>M#)W&5D1Dabc}Q(45idUXo3-TOueHzxis<1Qp5Cez*_v6H?T);mG%>g&H@Z8saNi$yx!$KF zwY>Y!Iamv5yxy4eKD#1_8a`Fx%jQI2Y-vD3m$Twzm2dbDSQ@CgG4d}`qXtUnY^#3f zllCYZzXqQ8nf**wDcx&P?vx~luyq`DUj^}+oQ8pWm414G7Ib`B9Cd|GV-|)E)LUOK zR|4JpHzXdNqXPVE%(D6%X2`)bDo`=6BqPO{$9zQ3iw#MPPP@yIS262-KA!t#n>wwr zsE1NR|NcUg?#!dg7;SxWO0t)>1^7+U`(w2*#IeKqGC!;kQW38oGQFiEeM;TbrEvW;nY&XVKqQSaeR?(6Q!p^K`v`b3NP6 zawyb3Muw!a61~j+YFu;?0C6eARY|5Sy^q>l zE!+2XEoUZ8Ilouf+(&9>uWa^mbiL^H?IzceP?mMKhBJIT{r!6d!^fERZ%)cEv27~)yc<#Hl;CEo3^4&xWu2O1GXn0PZ`&$dp{cogZT)^nl_+RpCLzTI zkmuD4i9Y6vR$Maa(WE+m17w1_+~bt9!=@>SKAZb;pvBq4Y!mJlCS#$~%}z8{Sbe&F0sMN32d|QETN#UQN*3Y{#0L$K z=1f24vKa8(NFU>51Tmv{N-L<+AYo`L{(C%jssC+GziyY#HfJceqt^W6*{2hy$xr`{CT9rW~gqap2(Xa z5pbQmx%+W%v^M{v#MpT)F+8h(R*anN7sW}1Ab+d;(`wP=H-2yO#TS^)*`HKjq}~0p+RtxthT{Ev-9nVA|BlX zmvO@W!S>jOis8SpUD2ucU=@u|72}e@XA*XsGSxa>sHl{0q}$mc^>3JOf^tD@8XTs( zGC8O5Fx#N`M>pxWjjr_M!JmEqtRdG#7y~&hjGu8~!ZI1%eWKK8#HPT~1?{d_6AuV+ zC5{M*{B#~#Yx<2qVt+Mw?+)~U4q_Zdry3=>w#{P{zl^;3^Bu2`3;A1K!EUFL) zQYtCOJA1~MF~J7#qZN8vYaa{Kvd!8{@?5q_GN5Cj9zD%w%B-NVTe4<)-AHHpyj+ZP zOZds{f^OeK)KT6xa~cM;y0HZ;Nv`l^3KN@o-Tpo8d{qwjdDBFbeF_8RifxARD~102 zoWel)QBV@Lu^S#K92fhY#!0Np91`%f4B;#Wp{>6S2YjbfbOOwq(--q&k`gTsZEP-* zV7p0Nd|#&qI^K65;~v4n?aJzZ85fN4fvKF8eHUb_4JqLF=y(O$4!9E1xhk0QJQrCo za)H>In6>Khw&l%8*0t|{_m#+Vq?zuMeL zffs5(HZlGl>oIFCK);j1nT*&ITxai1R*>~SnbbvYhJ$Nq9ZoX3x~(E)V9rne6*JyS6_f&yqbZ znj0mDbcNb@iGrDE6Sh>l$2$=f>t8+Zn#A>hiq_We-mhrDc@kRo>RN)|qaSJ#o}l0A zpo+KM)D|-~4}{%S@IbzTih}UTaTq+opVGaT%7b4bJ?4$usvp7z$c3_*;Tr~hfExQv zGn3o0H=k%e%wzJHGO1X}ZO4}8mnB5wKLg)%8eDWRGBPrFzFqNAYHBVwNdXG!FVqV5 zPPJrvxxx$jiDi0ceM&}RGD;Cq+@w|pAlsftXF(rXYOe{`gw*Ru4)pNl_8xzZZ*_QO zU4_@dAp3ggL2st47Fq(D!)Af*_-hA}e?bIBooAoSB=?md{DpP)qg)itR207Qz_Glt zIE>R9j+@0z$oj_+1w)$Dy30{?^YMFJE%%Cys+_v=K!K`G!nZQ`d;Rfndl)!MX|M%o zF-mxDQbBWKJ#<}4YHk~96004q%5|6@twC330w@{_B#{hii6pvJE4y-pz8J53noHO0zh^<}>8UMPtxZ19OSGIX zdu5vB2%-ULKnD5eIbv+<;(~2#HVyGUZ}MAswT~`kQz( zC%Xjap{!iO@*BghM^to_k9$Sz%Vw}O#vy}D2b$pA-Ey#|4nCQ5wgA+l32_5%C$!Vn z{RJ1kUL%%jJ((x#9^eylY>|#DaP$^-RB{%~G%EPKGH2h8skSxJM%PPcu{K6L-lwZG za5PpBAe;3sM)Xb3`b`qf1fHnPsf_GtJTerXQMu9Qbrqe9MJ=}9czQCjd|`(5nrG8so6KoHpK4E zGAN%Xr$rYyBkp9^4=ReA~trmn6B(_;{KTRNcZlgN&ktX-q2tvC%_IN z(v+~{QJFBPyP%(T7OJ@`ip3y%q14!_0qpeI5HZ`|e7jZ*Myxu#7jaPR`gc83I`#ZS zeP#b9z13}f)`7C*0*&9iC&rVw+SrsS{lI9z+5YxgrY0$o%(+L&DZ`N;cL7;@SA+!J zCfJh%8&(cCbAQK4EE%v&?^sY8@i&p6Ze{$tL$eTd4w5xBB{{_>G;@6uiOpOYAnGE~ zIAAG&{JX39O$H#HCPRt4+cSi})9*H9D7#*k_|OnLJjtg|bQ>6?ZV;SZ(2{cW^+GBJ zOc6GAauQTAqW#HlsmYx$s>a0;Y8jPFhoHyDRfBHPk3eZCy)5hONbh4`h4V05;S3_k zSj!t}CP=JD-(%!z1$~E{2(+NP_XWxcB>&=O`}I_LiLaKzV8+A@m>M)4t#>u?VMmF& znJtTW+31K?)LiOD>~i?^4UnelCy;IMCiUtfs`v{wg)mQc_~ync#Ck9*zF`j~SJO## zV9L>;;b93nWdW~70Js1Ak8qs8P86^NX94?NFJru#rL0C$gf(%w%6_ zn!=I0BdJi68I1*_4?4f<+MaKNDEtDqxStULvN+KjfWHSRotW|vgLZ)MORBsB41~bX zGg(!jxOtZj5_-2f#Y9eiF+;##UM?ecDa#|7NW&pe(7SmvcKVjUZmz3Y`>l>;blP4{ z6Ki5Xj-ugNC!a!>A{x8D1}BJV@qios4Ry;qQkL-g4Lv)!Y+Zpe(OUQdnKE|sHk z<0siiCEH0o??U>i^`em{@*m$jIO}=@Wa2XtmaPq6v&fX=Rt6DSRfBQ{XSs8$EKsQ| zmc%_^W;n;3d#tpX6T7YU4H;S+*cVO}YRleFcoC8Bp8B96P{6`6_cCTfc@$m^UyC|}#8ySX*YI}^eBUn{YB+{6oWQXc% zv-Y~E<_X~+mKce7R%WXbU?@N z-dc)Gx~i*O`^|Yd6r2m&6M6wj=ji#S;@E+1M_9p_ie29yjJvqB%057ye`J=HqH=*4 zo3M>uje4~jEe{4b?iOXYoPoqj#;N0>6Eyc)xDIOHmpx9*FCy43)kuwb%_UXtYBx2J zMm<{r^^qJ|mn?)cHk;@#;_Wuww&cjX&MLSj@P>Zhy;DKrv^iNm{NeUe%85n5N{Jb5 zMg7OeRdZz<@HlazpVhMa>-`Q~)$?%PkNupX|N1hFHpt|l9t-F&`Yk^RXdspO z30P$Fz^Q9&L?3QkM#=`mLf z-?7P#e>;7pZ)zr6)19UO;^NqFYH=T||UFOWez+ zf;{*NQJQLmDJlq%>p}8efClpZM0sTobjn2NQ0`+|3uYjPLVM70&L94hLdRF@J>#zFVhx5a-jWI?hlQ!s)BB9WFBic=I>l3{8}PT zXiXK(fBnGvZSRkD8KyrF7Gf`nMkZ%_s1!OBfM_waeFDzVZP~{;wMwY2?>PLq=vdI5 zGoth6{hs~^cieC_snF(TMJvjW_6pANS1DzYS(z@2g1}T>M(tuUhFs1&@Y6E`N&3;J z#8l(Xj6s`4;tx)DHKFD@wZ_`{B{|)xS*y3;Kzs1~=VN9nFDOprK>6+V(To?arR3g`!A**I`1e?oUMF?g zDCTO_CE$&XSvI_J3TN`L| z>b)2RZC+|J7R*-U;h0UpuELs1P(>NNd!Kdg__mMpJpYp6R}lR#7a-US2(e~1DNKMS_d)MM50**E<^jvxN z@k*fOycv`%Rd_CV$GaX&uJlY zJXl1Jb@q--_Y+w4dMNnzw=fQ?Ogl9vV^M@^NOzHl?p+vtK{f?M0`GkA9@qWoJCqfd zrDntyP*$8h%W*E_(|WJ;O3YKun5SA2|BFA}bzPba?~HOeeEzVv`}($S&G$3&+-l_ki8poR;zi)SdQA!ds|&9Y^P_s z2Q)q=BogCTGrs3q&>Ldd7dVhws?Dfxs=|Rm2!YXLAR3LLOC3$u(R7I}WhMhum-omu z`e(jLxx%!5mVYOHoF}$1=tJX7R;u!Aw)%PeX?dFor-Yfq-qVN9S_T|6bnv!0kJGm@G z?tISu4w-6+IW%>WlR7g!AKkeVV^wv0{HO*D!zu}H*^+XC$@lmZgjcLUtr|wbwbvrL zWTo!!wh+Lt@4uVtueh48UvV`iWAE?S%QbI(iIPhFt&961aXL0v{AJB;FTnfrpJP7i zC}dk3wsX!U^!|G^-+C*;<)rxccV~V~^k2V9z;#|>ic`NEhhZ1S62SmH@V0sT9(2Xb zr17H;hSfucgD@OrMf;er9BD=F*RkGXCsq@~l$3W%B0*%iOEZVCS#6XTmvQrNe!-~f z5kw*pUU}yYzJA3um`&Z!=l_ zu$oQy{6Vg|{wFN`aCHwC&-OoWdBBxlIHTtsb*>w*7uKo+Md086#(DKuP^>oMVn-pk zcrnJxVF-&Et+|=V)-AZMzmBGR@1yay+wlJEr=2dIe&VYucGv7>&QY^58OkVFh z>lGXhCx5;3UM7wmLaxsN#OF#m$^&kuxFG9nT7%sRSLkhaGHna#K^y9^l!V4U>-**ohvxvH~|f6u++ zv+kSSjk_m=xCI|Yhcu@)~5f#Ob)2$F<^5aPb>vt#$Xf6UD4 z*zVlf>}>Ye&!)`Gz0ZBlbIv{IIggzY7zPX=`}V(6w`>{iox3m=73srJlo&E#5Z`>a zilGAr`{EOtK~Y4ztVtb36i%P^(TkHM+Ou^l&YyllZ5H5L|JGZioOve3f&yyaejDd^ z-(wj)68H8U*cL9tG$ekDla=P&s z2N{R6Fer+GAx>4V^~SC}`>>h~gq?olVFuP~C*PWe!9ea?%TQXm$`rdVgTs)Ui)Y<> zv^jHdu3SmO=bvLa=%6N2I$MrHQM&MqKXc{ZZ{*wWS75bRn=X`)m66G4IU;SrhOc}SoS&JJAii$XRM(B#qT?yCz;|XrN{LIKN zY#ib>Vx1&{#c8CTeYVfF5(=tH#$}fx6$RVexmah-0{>y9Qg+@#HEqHq-hJj3*8H-b zjlcft`_iA@o}QW-@;+UI#dYk`K+KVD%26{i!e%4s_~SA5?HlwWVKQMII}XN;$6NCR zT1`z5dY$7gZ7(EICMR@3-b2A+HlrFmcs=@`8QE1(SMT8GO~3HkW6$^!DEvlhA=;Pi zsvkOge8l#=9_0w|cr^ZS(vke(q$2@e@Rx$`FhIReq7Mb#hfTzJOrR*DxmTylLesQ} zFS8;P-4_<2#7;BlqT)Z*J{)Cf@TZ{*(W+3IzKdw2AX~G^#DtM!m@u;G*pW!b7U-vZ z=pS(@^c*M6=oktMBVO*yE0Cx;xfpr}uOFqKRZBnkT+ETjTo<24THx2sTc~U>VM~hc z5Zw5eL)9Nfqk-&6M$`_%5f41}VvFxA7893V=sQ9r z40B!9ttKMu_R301_^Na(^QU!R>sVJt#NO_t#JrA--%iaM)Dd09u#vPl{CRGPik!@J z3i5&<1J%{AZQD+q)*S4~p)H^-p!3-23@f1kw(i)@Ie)u^tjwk=;3J0&rN-^|JqrAweMmMhxl4z<~pGeQpmr4nYm)p4#wCt1}=A1Yd{x=5I$#sVV_l>eyuP;rf;l-tZv;+Alxd( z-8O_P_BURwiLM@y!{Ma1&JokIIup@IC2)bkwrC&7X2((WRkC83^bLT6lfio_2=rJY4V3;~-EVo{FKHzIcNl7^mKlT!tnURC_ zLeXq5Qj(K7H1#NEd~&ER^fK}@8tWJZ<54fZI|Bv1a;T`N2>H7JQj%;Om3A11d=grj zZ4a{Y(oj{UF;l0bh?kqB%er6Cm$xwk2a&veDPm9T%8XP6Mfo{?ONb8c(k|>)n^ic0 zA!`Q^`_@fhNimU;mJ&rOe~gdnEJ*0ruQ$a`6KWLlmPmRR_2#&Z{|BGy!$O}y{ZOMd z$#oElsxo8Z6t21OQZmvr0Qhdrk39Ue~Mi z7gj&$lsZRIRf6y{GI83pj&sw|XGe)pT{2?W07eXJI@e0mG=AN*oh@7UFl}<^fMqF! zZv2^TRz@GBzd5?~`GSGPUQtv`#>A))yZnds4DOpB_8Pn0%G86pX5FTqn4^wkVBbOp z_Kln-zdxXR59pXBG-~JoMh(@4R6Edh$7Bl#=-;mo&OLRwywNPwg3R0;GIMiuWh34d zmeP__xb^a@u~@8)VH9JAkKpcWZ;BXJp*wE^Oco21I#EA}RRxRMt=1h7>ebz_aT`Cb z*#P(i>Toph@fRyO?9dqz=_RCWq@<{K5#y&1-`|`>g$MujGWT3}Zaiu{b`IiXVbCL` zzAGzPT^?~LzmdpD815xBI2zErUX;X~!J~o@0-M#=;yZ(?VzAjG-rF6d5^EcJ(h7jh zTX*vMTOa%Vmx49xekCU>jTzGtB(Si%LjnkVv4aUS1;?!(P(Y#4qsnbOc-%kLcRL^P zP%IN!b_w%l3ixr&I_eE+Waa2<4eCMnL6^@~(-WcQvPBD~v1mb)xlc;TlzFG{?TR%l zSvWHc#rS6�%TDZezpBot%FBlKss}Wcd3naQs6L{LunBDK@IA`m2zubV$Au5E4y- zswfFtB(mLzLpu{$c7q4>rl_cExeiWENJlsomMgnv|^Byp7M+|H9&9kBbwZiG`>wM~Q6i4GmwLPCou{ zPCj0r@&i54A5oj0J8aZ$voiM92XyhmC_-R4W|6M)NKZ}X*rVq0pJgAjYgs9p z);h;AJcJOW*zFv5_#FE8>7^TAT^%N)kzsEvLwo(b7%BNGFC|YImyqK;fUlOXVbk_v z4x6WMMpE3+ZI(32prJ!RQMl^4S8+BpptWP7%m% zA~PZNpSTlNHVMSH)oQ}+cIsMHwlmL3uHUVf7ErI&4GjKq~90{SINj(-= ziQ5y%?nz8oZB|lpv-dX}QQ_%V-sQ*z(-L%N6OS3kCkMUcAwz!XAiad_r~QZK8?1zu zT>`Ogvsy43B`#0AOEK<0v`g9PhKflRdr41E zM<`0djVmc7d3kw^Djc}KS&0fCe72n595N%}toA+V!GVNCS|j$Alu}=>V6s^EH!D4e z6FMzhEunWbex#+PvAjZM`<@E=7iD&s&fd}*9(e9s&b{ygkcoSQW#aUy&pnke?Dn7s zJ?PR{v-W35&ZSpD!Tx5Y2XR8I>qT_Oha_xZO-m?7m8sKb@Xuu*QIxfv{=Je=mG)_u zrBw~A+vecJQ%}KWw?#aormG@;zvE?!Y&I00qp4^aFG3;S9c5JK1g4wjQ)atc5J2bE}cSfyd1tf%J@Y zetXPsDK9Idx~jTeY0FXyta&CDkIBR2ujP?watQ=~#>D_GL3%ah-a61ENMr&4$nmKX#RIzA_Mug2L0v$Mf(1F&^-}r-$w6N>n7`x zz<0z!c6K&7**Wb!)WLa^4y$f0EChhS> zF+ZS3G3r4Nx;84RYOqyQk&&Jn8zGa5+A9euK|0*v@PJuGm<{MTb4ihy^LG!TL#%xp zeG$mmp3^1aYg2nA*dp;cVIg%@HUzTVLG6uSVH}@}<@jOX*3dRrA}!9CYu_xKJq6v1 z`}Cj(J;N0K%qK)X!W4$ zp|b+AdxlW=f}Z^;rH13>^>~ZxvCr;u_>*13*9@uZ^{Qd?V_aCMG) z(1QaCI!-F?FL&X1dL6b4CSV$qgSNAVs%t*SG)oV~$X6pwBvdKf9<&#olFHwM?gu@q zdutU9cs7)y=Gjn-l6}fhof~a;4W@&}fmC7&&Z?-a!D#3S$nHT8x-#@qG_;Ba9ItIa zElS2THU|lKT^{gAjKe#gW!wb8g1Ix;TvCtQ+cT&1AUL9QtMv?^?uN{)bOz+=bB2y| zc#$p*(?s8i+ya4nYbC;NL`gTt9JPMi15a_~`KS75_n5(Z(1UIb@0KdO`|7crIjGT* z?parcQkaY|=~sAHh@Q2rJ0sR$pdQQ70g9BA6o%v{=|W4`3t!X%3+<@9Q zRhP8eU3*JVlpdE_4|;GQK&y3u!-H{L9^l(sxxe3sp&_MN*iq~-P~N8K5n zm95r;?gJrV-`-+=-n*Ut$HiU*Qb;f_=!aoY23n~D^UNY#U++RKO3^P6LG%Pq_Mit{ z8RJI|CND29W;*KN42;9FsC)1`lr#(8O%*6sBWhvnHI$l9NP5Oy_Mj`G6SCF1m0PxD zxuT};BDFEteF+Rbrv`;bVCx(6!Jh zZnbVSf>y5ljvX6+;HbHSI%HCn62AKJd*0aaGe;hIM8Zl&Prj4R8IMpEuo_SYrC}SN zi&~(E3EYP(&ijLZ9!ped_n-&eAG`OJ;PhxQDM#+jmDD}_9m!WsMytE!+P_Ugc zQb$hx=$TWxHDc}C11cc9y1ItSeVe)TkJGT2jm<}KJ8i!H)R<^`M=#Uk`*UVapUAc{M^6#R z?u^cGt0~>&*al5X42q1=+~&JCBh7}Rp)?_7Bc-IMu#l-E`|3k5&I-$8&%MC|H(%VN z81${OH<&z7@uZ;39n3)OV zyO?iQt);Hs5p@P9Kys3eQNso?xL;w*AG$fpE35hMM_=Ricw#3e1Y?H}WZcNXvD4a5 z7&f>cS(zF8n~f+SaJgJ4UT;FrDa|49&5s*5ZsF&jf5zoq*s1`M-wqq>)`Tn~<_{05sji`P*S3UEjsi$HSvSMSUwy~t zH(uiV5X(y|Am{EUIT`)rAvMXX|NV?#Hf-jUL*|l^Y!5stEHr#B>e9+;-u-wv9~zc( z`teJ!TFtuf)zwj1Rm(HI@~D|VJ?1`Dt7HlZ*z7k%4?nMTV8^g?*)4Rn6r&HW&0=h-nCSdfnQy%6Y>qkVkmzZ4aU|lnPyC5YqFF6SWK3g50#XsWoi!(s ze7AN3)x{NDa_mtURK@oWL?df|I|_5M_|xL~eDeJozFM)G`LibK!dsVz0BJO!Sg`MWJ7E52FI{GkJxGGZ`|BQa|22!{9W z#gG$@;-NR*VM9(1Lx(1=1v7xoDFWHep-GM3AG44%Pd_^7??nZ%m&ET%kd6N&782bA z9w=fNbA?}_rz`~jdE)~v{hd!S@_i#ZG(N@;>d#~EeaO6-2kXRJ7l-sKt3U@u)){KcN~8_y^AFyDqoHAcv(e?CmzANkqMFRq zSc`TFQj_fLE3HUq>qmPKk7g#a?CR zUDyk^F^TRW^hW!oj_3|@iLDJ$1uppJVbM+g+G4@n$?V-%!j;$GPgQj-;5)TGI_&TE zbnMSyFkm&CNd*7eUH^Pxf3wi#(DjrfKf3sivx`a%fUbU&MReSC?KT83eCQxDGcwrU z^ROO&q2_|vdO~L&_ZzK_BkGu={bFX*^9W`_x8cqIe#Qg;{0~;Eg{taW?!5UgEIj%g z?z;67X3q#8KcTx^gorKkB}(+|TSWf}&4Ty3FqZ$Yma|U%O*`%o#IE!S-`Epf*PYx| z6{N1pmv1kq={Mrz(9W(aJKsAMjyoUkU0XR0psv0ik4HnOYCG=I$%7&#C=wx+xJoNP zTeK?3C#8(o61~?oy+^f(rY}0jtrk6&z4eZM?}vcHmn_nWSa3*S_ns22z2Yp6SUihM zuDpw?>RJ}gpT?#wJ0mJbdSltLcl1+^1RTEPFcM|L(gUm4{mQyuw)#KC@=E$l)l(*n zqEBJ>3dm0+{P@#G)^FT~KGUnl)PqOUyC7EwT3sL8cJAftm1~I7K@b6w;e-1|I8-*g z^3;{^<<~2*XXR5=R2cO=%iemoNk59MasxtDS$tUV3~0Sj-{9b}=l)G?eO*u8Ru4%0mDWJ#_MhqLs zvoF5QF-I*RCCQG<<>s3eYZx9OKwI$}`1Djg-udi3w%`42?1Uur`OD;x^4qp;2dh`? z;@HFI#r~`=gr7EV=8^u7P(QLfW?HH57gDtQ;x4Hi=~bAu>sl&&SiR*}9vk>5j^PzC z(@J}{h(3G&dcY`0J9h8mam!Ow9kMfKTDG+roVj^vSmkKe!MyzS4ofmk{7hap}^ZUaJQo^y5!=`Rm9V_-JW1k3OC4 z1wEhLnwC#^M=qYtX6~$nt9M^Wz6qQIS%k!O?0HEMe=8?PRx7m^za;eGrMq*xC?f zwwglZsBk4(a#5oxY)sF;wn$A)X2_tv3?0;uAp`sH(MMlXIOI^Cd+D9<3<|w@6G*AI z(h}$weIxlh4S{0ih{h_};r?_Fl8xe-QSqg-y!GHIURB3+<*keMXGGa`fbD36qv&x|`_TRUQUa#ZVTOj=4R4?p*>7T<5&x|wAkzRT|x9}$i_ zUH?My7#rf-XHi;?G@^@MkH-QKc&;At70oNr!Y7}7OL18R{dyO$b9XVH|8FI)E&GJA zqlR$O@k^L7Wn2V`5}{i*lkn5iTZS5K&WlT=B9H=}f9NJ$F8$20J?KH)kq8@ay&)5< z*+S3xyCaBpU5$j`leyVs;*ZCl%&iZLAfS{003ZNKL_t*CMZc+I7&>4u^^SV>mK1Z$ zlA~BOJ9NCM{&I@qhcs!wHrl;#F2Dt_2O-KGhaOeE?TXxNxhqt7xDNe_$A)QVlDN9{i zb`km0`GIjnc2)*9i?v;Iv))ipbXhY(1s-dfMrm0oQqvecbU0&2jAY!1kqj9y2&>f^ zmN$LZ-OxP7f{4$Jk$zN`Bk6l9VtiElA%p-utp-8bpePCxCye6jkDsErxSVHSe4GD% z@FkaDcOQ4&bRmZyHak2`ovj+Wi@6nl0o^M*H;bZ-=xcoJPdFO7uS-pbv4v}+R@+3Z|@(PfSG(CQox?tbhcw(Z=3*Xu2)SEt(ZE4}WDC zy80E24vLD(YEE5x6}kQAl3qB2!oi2|!N*@?wRB$14iS#QsP1mx&A)Ei!n3bN$j0kJ zh}uP@x9LqU*HtLCOlB&sxPo0vmQerUhctZi4SP>Hg{_&{)ZBkRC<39hHA=_dtp*!5 zZDjoDF&GU-o_^&;9)12P6ePn24WY8CDvZeV8{8$SFK?mW1rg-G+I|YkVA$s!I(ssQ&Yl{HN|%67U(jb^ zi6l4C;;PQu;t0TGG+{QHF`LX}q^2=*;$&=AD;}>Wfuc`4CbLd)tNA9W2%-!%Q3QT) zYG_W0lG46~T-UWO1g*E<{etp}YVN<|GG2Q917=Md$IEYg#NdH_m^XV;7;>Q}NZpwX z3yEl^hPI%lG+xr`2dPVi2zXSv;ABT0;EVBRgB$YSaf@j+{ zl7IhuT>IJeB-?yyxrKGEZ2*J7G_-x2LseB#RYkWW$l?vX;-s1mO=+w<;)@^ke|B$k zg9JlgBh96+-`WwHkR~Ooe*BK_*Zo9xW+sQsn9Zyy(=i&1Vae&guEmPBI!HtwV{Ah^ z4xWw8$mHJTq2a4ql7FAxE_;=p^$|pn2TLj0uyHFVpKurp=TGIcuU2yS;yF}Q)v{^J z&aldnZxHF7@zDOMUwT2hG=x=YbAMx-TMd+8vgkqQ!;q0d)_wP(=I4{*cGIw81yv6} zOzJsjgYZqT*12={3vZLhAH+FV{uQ(NDjW?BT)gyL5R&iLt)W-W{yS1dul&$>!M11- zN=^>DfAbrXjyRIErAx`W|2|5VE~R1Bci0!k_t=BT@R#!lEZ-75r5>C=HQ-w7py7pD zQm@Wzw{rCDcWX&WPG-_UA>!w{9E48uxAar<&Bdd-d`?%VfejZ5kpo zlj8HwC++OBF%2FZw6EgIlOH!+d?icfEksI*q9}|QJQRV##IXl4e|qRd1wDBSMoE*B z^7Hm#o@FEH=#<8}!PnCfYM&6^mDoP!n&?P}1L<<1q^6=+tr)Vh$b05l3~6a#FktA_ zi=4+Er)23;L}R8<+qom&vU6{6QMz<5?p01-T_UqYu?v)B6(z-hk|dBN1PykL^e&WF z*5LMbWwo^>^wLf0m(@~naWUR9FKVF?HB~`a6@&m)QIKf_j}3|QD_m1s%e$X`%zqww zlJO%)<8-rGd4%LD)5(cA7p38!3i+WOG6#<$t@XyC2%Q2SafmN8b$6D)0SjU%FV ztHtupN=!4LeN+=e;V6& z>_IgssEW!{&n)ADb53N+qzGrL388BZ9-rRQ_9b$>QHy7i8T-QIMu~ZAyj0y=hG(Ul zhOZo?T^H=YM@j3LyGGrpOPWrD2g}f*7%sbv z>VN$U+rmR}|NINdC!F95+V4a@(o#|x*snhyeenqZL;4Q@DH+l~{DI8zh7ba+MpAuS z1zNF}lt26W&b`A$-5WI|pPGuAY=}W6Za+M`cT;uO-K5`i6KYB_bzgjrlAeKm)+`L! z*&roqZVoxmK8;ozI3>OFUqI7f6PXX>QulHV4PVxidQk>Sxk$qKbERDr7_ zvP&Gr2z((2;S#o@h3&8;)I62CkLtiBNm-hXnx^`?R{__0CnA`#snZ-B%ERr|H1cwC z7}%#DMw1Dn(a6b1AI~lK{{xrXg~e#iKW0LFpL?ob#^`iF5XWyUx zM9x1SA^nUq@a)=!=clz8X3t^!*l{R1*<@aQ4VDQ9`D-3YM2H0{ZV+G^VZt;b3-?wx zB}eTb?a@5U1I-B4x0)pVAr-xkg6e8mv|uKswJyAR%-<@Q54K`H*h<6Kb(CMekF5Li zQ7pb>3iAkG9|v;>>#=@>n#YX>1N(}LS@X+!CXbtdSJU{%qYsdgo`J<|3G1Hjte8Rp zY|Gw=tke)DzbQ|ENEH|tnX#XkhHJS4sfAw0LL$Z{M5P=lQI~~}FntS9je@lEGpKp4 z5_6#$$4UqF&s3un8L=*GTa`LsNNEar3-=7k-hIV*JRS-QdbJLd3u4*yUGcAH-{hK` zA8PS^lHJN(w_MDkL#BsrPN2XdAcgRi9f%N(xMJ1!Oc*~JMNwL6)8R&{Q!SH2c}FI6*>*I*+ikaB(CKRcn?WJ%``Mhz_rPR^D75+Z&u@c+kw(g zC1js9&zh1lxQol*c^fuurywsIyFGk?wP+v!j7IXGdx4U(&I-a-`&6Z!yNj_|&19sf z5$)T2^SyU+{l!<%uc$ZweCA)A_}gQ-D9F#oX7w#1 z0qdrpSc_`|RHO$i6RhMd?@jsn#Zl{0&H)rAes?}keP0aZ$;rQp05Pj<7 zTCj6ZFv6T!C#O7S^xinp`xmqva(7B_U|2nr_P0M3@8XBTeof}Hzym5 z&En%f2F1WSB?;4L8`Y1NW5`x9j<(?X*@>yQ3Clzq&1I$pB1&2WaEE@cSFc^ih+%^< zsA|gtp_=n6n)pvnO6D(1&*82|A7smptr%1zV~3CAlC#eBJvU4{ny;0O?@AafU-=yq z#*aZ!RD_U7*>Ze@x?2KypZ2Eui3*H;OxTZ2!H}gQ48AoLz&XH-YFFEgxJX&-v2G@# zECLzjE+Pas67cT!;NIrOGTBD`XLUIKRZGpb3XGF1Wd1uJwZ9S14p&Az<{zrHOyHAFiWEttlTL4EOSH?03CLj8xThc@JbxtIH-TS218#nijc@u74w*u zFP@JH;Y$UV0!{kjF?<6T>`69yrDcT3ERn4LWh-xe@Hw|#eXf5YjE~)-#X=YY4rjnD zCVBX&*YJ2e%$j;oWZG^?-D6caKXsty88Hl1&^CE-ZFJ+^;s#A(`%4<`)eX@x8*+U6 z0aMWAM6ka6P;Q81>3W)N7naSNY)wTf&0at0<-m`s&s zC@n8z^0*09R9B*^Dr1L_qOQS#%k2!S9JOf%p%oDD1@dm*wu{Tp{{u#YA&M*%fmY+i zTjs?y!c5jfd6b^9my!)T$$X+0)&+K)U)4uu%TFs*XGtVmckbc&*WTrVGmd9qzt9W2 zp{R(I(@sN4O(pI83vvJQGuAnCF&6pmsr>uxh|CN!uDP0;S6?A%(P2n;Bv)~WSd3

    E~V`qw^Vzba=h>l4yfVX@fq4_qvQEWBzoihV-D|E!E3bqEE8Bq zY%1$*;OKU_x@2nGBkXyQ)YzKr{XQgYfJg7)rjRpOC4$wP{qMWk9Fx zF7C+WSbl=Ni#UsBPQd-(-JL86dccP$p{j`J+`qH`RUJyWJ+Fl4Yf_t}5a06Y<_Ew` za+|?AC2nSnZ}s8jRGFBdlRtE-Hn<&QQk=b54=f?tA#zMQKYhJ9CVWga@f4(F&TxRTdOQ{Lr4o^d4;^QF%+rDW%^34j30iyey zRF!($f|XC|we*zC@A(aLqhg}NpA(OUw5^2N5!}QZ-*D^qHEQ9yCf;J0Y&sqC5eQ35 zB263R#KckB*`AstEYoB+X?Xs`b7|bH_}*|#vY~`T;Id%A%vnuVx(`aHJFn2G$GE@W z+k#Kihe;V^VuJ+`y#Rh_Ti5>mF7~GR;emdwFRP_EK#8Jw5j#v9bqbj?ph(ib|W1V0A zzC50Qyn7+0ZJZX9hl+f%cVcTf(=~xQoy_-}@C|d;G+TA^D%89dn#cV32>fh{o^%Hp z)U#UCovVa(s5eirhwCd3puN<9|H+0P12S9A2CYR^Z>pOb+2Gt4%e9(y5alv&8dvZr zm?-Hi%Qn(XJ8t%=-tqYROqH+)21oLhJvDThbg*di7=^U+GOy3~9}e7}b>|*&>nh%3 z8Tp(;JoTUxJ}{)1Sp14^-Tw@9F!_>;e6q4roTLkf4reC@FQ5GPQE|+yIM2*RM1EUw zhg%M|3m^gpG~k;imLnT?LxDtdm59Ua$=vI_8SNie#o85AkZ0&kl4D=thkUO^aU(mN zoY7nEk>1@a?Rcohb(-+|Cb{zJs&xE|ODf#x7O^F@T~?yv;h#*bCqjCe+*c-H5Pk+6ZgFx$8%#%f$Y-HUtjAe-JQ&F+7OEvxgl!k=^VGha zK+wxnk>Wm>U%8*^k?qw`u2F%iW?VhizF@$;|j@7;8ItDL# zs5R~Q%|q8=9>?MB%aK8+ag?4{^KJI5lExo949ZYEI2jE4<2qxSH#==*71(MC&gUGK zVvC$T=JgyS;P=$-_sEEqtsEmS)`Ql~HNKv$Z2zL&xr1lF7|L*xBiLVA(ilzE#EM zLQPRUyzuxGtFT!$lkx#Wz2hZ;jPhOas=d&J6=ww6aqmdC|9Q{C5Q}YYW*sEPR!5T} zc@Wz~ihbvo+1fWHGmKJ}tC+~rCW}s*s+U1{52FHZ-JTd0|^-l*+b93nN94-|+E5E1%`)nzS zsKYcW$+TG68dP@tcE7D+LuyF$;w60oUe6jmiDmb}E>*2|6x`Rn-=H1N2f5DB>K)F+ z=7BG;&qYWMTV>A|Xqw2xyAGEtPqh8AwY5eCpYACh zz=0YjY4^%cT;8m~m8O;~@W~A}RzU@m?3S}qokFnDgWS^Wq{HvjR|zB;s(Xtkx(^}( z_wE>Jh;2G%Kd=>CT*a-tgQV&h9wtetF^Mi;5?cv%?B_h>M2z2Uv}HPxJ%!!j4cS7Q zj1Z!3O_H-l&o2d^IIjX9FO2qqvqBvAuzF$P$2UCfnpNPb{{0!1foYUGRUP|w`^U40 z1+}H84j&sQCNUe&?>O}l-jBWB+xGiit(Pdu{#7eoK3ySo-cDlIY~O|teJ;wpHb%_O zWxW$c#JYRkMUOgNc3do*u%wloh|A!gRJs9Ml&*}UHMU$GF*6@J?W0xXjC%LUS_rS_ z={!{$-DoI5VRyeLA#R#d;cn4J8s$EM@2;ECq0L$@1pli2I023|p}#$mM>BLNtFkbO zqx)Au4B?_`-TmuC?CEM+fzk`W(OY`T&d98!I6I&@kmWct{TZId?V<9AMpbjP~C(gJ*J*=?P+d-Fd5+t^2myj?{liFBh4<_Kn+K5Aay%0@Ynhxc+up` z!}B*#-%CUeJT(K|1|JJ*sRXaFbUaywIGg#=_2t!ygD#X>wWLkAXA-J_1-#>hptK`0 z>p3g^+r&@4BYPG4_sqq}ZR>_xk0-m?S5RfDnnLm5Qx)Tabnn7w zIBjR+zPgrEh_cel8CMtFR231M%)o7UgPpuQiS13yYv6=)UDN9zCgj&W$Z3D$P9fcr zN+Dp|v0?sRi$R=dN=Ll9@c1C>!_Ew9E)nX7jYhJ;I+Bi=HJgOhGQ|g{mw5Z@dr)#B zThNCVqt?`43KnBLxB;u;T5K*volCn2j6Gh(r=Q`2U8=q~oa(a~OMf%iq^VrI$3OhI zIZ|+BN}n-qrxVxW6h^Z}u71P+{?2K^_|Ep*!G(Flow2QD;x1LMs~A6`oLBg0a)c2_#?uI-r| zK?@~W0>0Jy_>scH{Zs=UmnKz~+mH7^Q`J+~knUVJDXkW`(Jps_bYi3XRtR51Gh|g- zEA(>uJwcN>chjDSCd8ZX+B3f?=uz^yOJ}>xq}Q3a^|A9=H!h7>)**Szo5{pE#z}VHP1IU!=SKw$^|!YCpHh!|T#rg@=Qf zc}srJxUM_0j;qHX!N;;_zN}YK-G~A5^H)&XQh$Uhy%7^Bjf%vh1NNH-oo9TfPNbND zKq7BXw%2NBZEY$Bqh_3liyjSD7nGvuk086u-j2@-IK2!r>v4B5wa;%y4sKQ_<0@Al z-&zspa9OALdTjUNF-mcStIjJWb;A13xvr`si#@JPcZqW3F(jG~%-NKQvlLg5J%Vj( zb#qq4G0bC$cCGML88k*tU&sFL;&WlsPh1AM#|UVDOPkUS-0|2(n&ua_s3mz3QcvNv zxMRUS;{?_XW4sXTby43=F5@sL3QAqUPkUm2C`k99jgZ_%nEc zB`Z9cF=)4(|I6}f%Rjz!BOu2yM-N4N#our<2Nx9ic2mqkX@~ETXrNyGbJ0Y|^*vFkE94+U@ZwflQVKa4v zuFA6_%LX2~BNY@1%(E@KrQDN_(&82Y89u0p5bbxD5BM2moT`Gi&$`<PD9H z`;z8_-!Gc|*KwKoU6Ze_mxwGjnX2puoHGfV^51avRr}X&fnqG)4F+6YP0UVH*#sI@ zaH{dC_<77bb@3)=G@WAPFi2;wF84}=z1q448lG5V>d10z$ys9`l`tSOpuY3AgVV8g zRK~AZPQ&~?kwb8kOB*Ipiq@%{#c+EJQSzqA<1||%pfk&+LY#!#ucxSC9$ORvI~Io- z6(pE}n3l<-_ecqTRVhtG8G3tpVMExCJ|2^1<{8v$OGBBMSM`R%;-?k!Ty z@r=vcz!OXKELC8$V-I#w^T@`Dmn^If1!e?~0n00@(lW+W{zgvZMTb|{)e0#Pab+1A ztyjn1TF1x5ImL}W+9I38ht_$O^*WNO6>O8jTy#ah9tY$oGo? zlc}2zf8;vhj7R?oiAhbLH8XH<42Nv2n&p9`QkZL#RZw5b|k8AQvg3 zT7$b9Up2IHrs56dHOILo;*qZ$<&{!otrm+olgS#BENF%Xsd(wYhn4V(Mk`;(`VCUf z@wLk@hLOj|l||wlpByCix0uiGc#>%7hN8S!Aw!uGJonix2#p1?Y)KG?)^GONk5y-MySw*Xuczg4f`S_{DgOuu5^(Av>WV`2ID^v( z0wDOF9iiJjIc2zXl0V^%x#rCxJ`1SFk#6rqZ)T0MES5|Alf2T9!RY0RX zSBsK@dElg~&EB_$2aYM(J1t%uiF?`f2A?09b8Y_LZ#zZM zO{&=vIGhk6)UtXDhqp@-gim4ZY%Wu5advH=(_--6-?x$zR95DeZE8QAeA>#m`zRGZ z0G}^8(H zEb@FE=BcERb26l8L3?JEZ(LPhy;@c5`QoZ@rC67#d zQW$>5JoA^V)ph(cKGKv(UDIh+HM@o(u(WN2B*{xq$SPeqs#O~oi6K5j%*3q0=6CFs zFmgE0nkb-wa?M6_t^2~H78d7NK)>ZR;VGkQwbR?DL~Kqy=KeNWGYdPwWik!gzfZ%$ zAltsgkoct_e_}^Pl~=SkMTH({-mUZnr#(1uWEtu?#rPTycg5!SK>`K*M z@UbZpcI%~f=MQTP&@vdnx(B}HvSAuF1JT$m#sr|`|G^xOF*KG-c_d@#?s z(4}lQi6}n=KPT7h=&#;JPb3}o&-jB?t8L#}aPqvx*3&@;IEd$L};f63B#+Y%@JSq;q3&99kZiiLVW{ z9u4U<;tem4#CT@E;NY!%oNqBiN5ZB5;Q)|U# zNSmi8n+d$W@vp#zjT}oC&yS>+^5}=ZEAa{Ffo{*n2Am-jQCG17?e;nF=hJbO>b)Ur zSZX@*gPTZK#SeGSU+#tL^~qwat&bDBbf|_!9d0q$BJO^jtPkwbML>hrtx4#*7d%s8 z6K}#3EcKtFDT^!4qIs{hfs`$0x_4(ZrR4aWgD?HwQ6c#;i8|1`U~;-)KC+?S{K`j0xmdj8V$KNIKI5d~%;B6T?4p;ym z9D8=@aUKl4?C}uWf3nU+b`4L;W-Bw@Oh>NRy=1(Jnz^wHP`bVRuTb*|NY(S!MOb#M z4H@jMJ}#}W`l~gSfoV>CIa#Po>>8Mmwk?6j@=2l=Y4ynn(Fe|rNVP4AH41wo!m!Gv z-2~6A2h(}_rxSW^D}{^AP<+=B%gIZy@@6tNyACPslBFc7krN zhyNtNE9#gR^nEg}T)6xPQ*x|aZKm~hE~nO!Nf{XGiN{Rh9T|*ddI;em2_`V^3b5Hl zZq?%N=wH=<#IQ|)3u*7~*>)`imMw;^j=I_5Ny6L#DztRMKktGPAQ?X;n!RNu4 zV|!#eM3UNUrxB{kT-k-+y>V67 z;fcyUw^leOSGP?RJWWSc)dM3ivEr-}e4qX%UOJy?`s<=0b)8IN`I_r%=%{R!{WSa@ zXKUBq?MY1Cp~b=N zbBI%0%G?8;4tjyBadfR}!{|nl>MhC!Cvx_S%ell(Uzb{^-T%fPoOD?caL;Y?m#lT4 z`&gq@qLmy>tUw>Nit{S=g{5ja7=%-FbG2AD=f>`rp(!}!G7pI+>GgVL<*4G-8?XuE zI$R8y+<2#18R~j)zt}zWd9oj| zTSfuANtV!I1wCT2oD`0^r9@nUk>2o8l zdcs{iJ>R`~(gs{GN54EMmg01q@rZUaS{f9dz9|2(SK!0%nVaQl(D2O*|L0rK1qM$k zZOTwhLRm`)KPS0akb^<5Hj;h!AD}B9xT+qy$1I>x8BUzrVK?^8~Oxbj*- zlc5%Kur(<&Ie1ZIwv$nX!z?Kyth;2DD)$yDQIc5sZHtaOauP?GA5~nZ5AIKuT`{~a zWtC3b$7=d}BHHS20+6P-F+LKGPlY5uNsREtqm(@L?S6#rt|;Y~&2(%j(_hLlTC($g z9v4PofH7`v2=J$TUA1FGSg;e!9(EiaueGSOz^uUJeYdWn8`Q3kHq|=Y#aSdIv2D=y zHU39lyXoFurMCIA$%ljfrV5{<@U)c0yul@Ux>d--+SWiaKr^l^Ui&?7m4(0EejNcYG2*$j0?bB1;G!ijxaD>YU6l!(35>h+6A zsTj~t5=gl0XyUq!gizlK96T7L_Ao3H&5OuYsqUAmqCId9ufFea<2)dnMsq^;;g7cI zuMUh9{(lu>jDXC*_q!z8TcB8rhnNk(T?o*&FR&TCXFH{UTDpnI8c&B+c^7VD80jFXzB z{JM@;>~2Rq2;nSjOFqMT2krTJnf#;gmwXDUki}p=jpdm1+GgG5W%cnlP2ohXw~7^r zj_lO4Bf>kHnHR^#BUWnu4JsW!e`bgw0Ub87QWaG+*xGK1CplBMj20wEj;D6Qhk`{h zaW8i^@vg?*=qMXcs2$_ z0s<-^Qa^?$)m1g~*%9lmdR)OrjqMHtPQGwdiDqUxD&e)8Bf5=VLG zW=(BDZ{7FIKz_C4?wo#KQB}{ogR>8bU(@+Wh_l*fBuNg-9rTT8^J~>awHn*ZIQ)R3 zq}yhW8V`v{=^myzRO116((1UuzvZfrq~x_JxU!jkL9eANB4fi!qO4)-#i#*a8o|3= zRDLEwFmg`#m%sV)~hKIj$4!GQXc-8Ou*PBO`gLUK^&NR4p+A#p{*Xwt~e^Qp(v$+5T5 zxVymy^z=3S+d7TkZH!gfV(Tgd`9Yg~3q}*nph0<1-uelS5DFicao>;)HM3PAW1T#< zb-xb|W9joN??u;ep6=$pG~Z7{aZFnt1u{j>4e}>(_4&v8he%8PyS<~HWIG0dflqZ* z+*KD%ds4Sof~r~Kl#I1IAbvFvF$%_7|TAzjpmj9rm^KpvNX!aZ1hOYC-Dm} z>?D@Ex2IKTy=NsnO7Fv69z@~kA4t?WzrA)~<~slU^d+N+1@RmTxpAJrGPWrvFH)Kz z#96DhBvYW0)gRcp}qC@7VGRLv9t|=^%6G zZQ4s+qZv&9Amj&fbh&80e-r_h`<~b@xBM+Oy80lp|1AOl{s!+bo@AiW2(7t8j^35m zzyU`R8D*ThCJ&b8H7_WNw|kUP)D-BIBjcLimQJF!c%WdOBjh*nq}6#(C)J58-gdps z!J{94brkDWmS^+hsg=at-5sB39@=}4c6BLX)g}sU*EQAs%#+LL?yeNe%EooGaEd%D zUfX7$>V+r>{t(A>UIp;+xDT!i%oO>N(~ z8@j_{A02+`@n5t2QhX%6rYyUdW7?~55svVCWLXv`fE;!<)twK``)kPBduLa0vI@63 zN}bh~apeQPzXofgUhWT~^DV;8hn2Ogndl-Qie&nke0#vE#xHc0{AwWfE{q(f-lM;@ zzgFLexBd0^h+ZlUz7O3mT{fi+zWk#M9tGp014TyM>=3%5D?1f>TWSEp;q?c?(V;f_ z`qW7Kk5pf<*K(Vz@~>NvY7zmE>Q8I}O8s}Cf(%AJ35DsxIJ%pY6vT`DqTdAUuco`~ zJEpJ{x{aMK*wkui90Cv^I3g}cef`eGU^VXSGA;I8j<*H!P>`=6ce~BS~YjBSI zio>^frI^?Bwl)g}N)u*&Z-ETZ_cXCtoIXO4N4Z+9-^x`d`~=5}mFbWz#r(Dc%Ac3nyVw@g*)K&7GVE^2dZ6HT+B~kl|x=dbG zr`OLxR1~lze{tu}VRosIt0Z>u5C(x>sGzEb&kYr$p{tC`bhJp)8gUCsDg3P8-x`@A zUvevW^Oa4t`|;eF{MInpTY?zMbX=hW6sO698UsF*l2#K8TR8nlsC14#ZqhA{xwf}O zls2*>QmH(0s29`LB9G7{0&LL3oD~5B+4n@>E9bRZWTg#gIMFmBwri@_MZ^iszLe4$ zX?aG9DTri-2oxWUL}|?m=NJ94cQ3oCi94CUM~JoT)r83{s|NHkCyzI8&+svsQ?hO~ zh6s$@_l_&ZcF9r@TSOHCpB5c8rOF4SpY4JtHAtJTpP(vkEBq& z8Ix>w!>vdTq)O!YUSy&dX_WO3Rp7iKySw>QIPE-Yno}OY$7q2BDxL)xa>~71&nXls z`y>wL)~qC%4!rd-pirtoKPV~eKG1Vx!{P%78k`?X%b-vAN(}Jg>h$4&;~vtBDGt7^ zpHmH;8?r>j)R`X{Y${ZD1VsZp3;#kK+4_MRJlCf=;Hki6i>rq!O=oTn2TpNfZJ6^G ziYOfTb;Z^}c3sqR=;I?DlOzmm6)We7T&UVgeFEbZYj)!`CEc3SLj;IQ5#$%Y4_yis zs)xo^C!Bo-Oz6H83~_A$zNL2b>19JS4k^IjX$qcJq(smyGOTjA1U&yoeXZloIQKy@ z)d?h?*ho3exGynG3#B%)qlfS2)C$VXH};gKTL*YczfR~!hTuf-+z;FrYB)=%vsHc2 z%>0Xz?W~s^E&SWZpN)AMlShwmz}{~5~KuI7h7X$-mU(}e3Qf`IeP}$Yb6$jT;P^HrNOYM?kS@{fYq_XpVL|x(nPIzU)b9)ycOt?zq=W8 z?H~?T)BHY>*-+A=2_Dx5UfdO%j$NMFdTuSl3pY%{NQJ-Q9K!GS3`)~e>`%lFn{d9I zpV;PPM6B5>BinsXGXNHLNC(gDlozng1Nc263-c)&(gg{ zuN}63IrRArBzgL_@SR=|t-zMhGAe5zLpxu9p@E|?CMM~I=q17NoM*>Y$BFT{1~b)W zve|=c1rJ=jX^uS;2pP@G$_b5J%;K6ZkN@{xdk!MYl?u}D*y!r z532*6z5Gy82uV6Lg?)sA@F0}2O5G4%38P#1?-PJ+a)`V+=Ioo{^q4ke7(}_)68UC- zQC;lKDIu^v#)P2m+Z&w9RlrRVTd3|CMK`J6SSXMtJo(X#nNduQv`)MXmd$hE zA0qHo-(lrfHv#|w^O6S7N8=Q+DqWuL`fGwu(PA&xMa-2DYPbv4k>kuNNr)@5m4SFp?)D}dJ45r zmnwUR!a0T?te*RJcA!pgk(IHQ6C&`}UtC71DFVL!7<9Te3)AW=hQC>$W_LPbt?p#cq)Mi#c z6~KKpH;->BB9}Y+zBay~&%&=Nnvs zdoKc{iz^Immd3MT*Z&u|0rA_31wgLQFhEg)wjbUNBv30UO>xvr%3<~8ayw+lcdScm zneZ|j)wmul{+X$$U)*AZwM7*AYu%5<5ev`CHR>GMj+^0@WUSuL9Q}h7b4HNn_F4MW z!8t!{0jGFD%21qG0HN*=^-J!Ven8*4QUZX*)V*?3f+L#MDUDG`P)uWGz z45H*$7*JOf7tgnuh6WZ|k? zP!(V)E38c$8F|SF(`1d7txfe-{$|~WA4fZXanKSm5$vQey}`(9 zPlseTp_2i;>$;yj7&fF|v}lewsC61}To_Y$^&w`=niHAN6~w?z8XrI;jC^ru7 zm83d7)YqpFM+;zM-H@78L|bj5yNH9u6BrT&^^@xiu_F+Z-f-7wh=Ai@6^eQHMW%N1 z#**@M19mI7NjT)ReW*M+ncXP0v-$Z_-SXmK2b>ZBHrU_LLaWZ!L?Z4q^9o=>#qB0X zDr)ynw^;l+Sz4;nxJ8UqNcHE|$>w$=Te8L0)QA2!q)O9rmj*MA=J%t>0gRBw<*s<8 zP2*ZG;0w#}(`3Fsj1Iw>RZYpOr%m=NV*hqNGW(k|D@sg-ZkrCe4BBI6hs!8bpJIQn zoGW7S2+RTp z3S@=t_*(=2qJP#s@MC)2ew7$bF1UpfiG!&*X|9#dx74N11Tr%ttilE4b+9`bFXb2t z8FYH9Hon0Y@qtO6Tw{VkY=ttx&!4zS&}92oI-{`E2Nrya4dj{sa|{B%9tx4W_3k(& z|AO=%OxGGqDQ7!HhBi)f3)Vy@RZI`^;SWbfK^dF9zx(R-M^G=|d3K$U0Wbem-b|hX&i6DcFf&0h#C#D5 zoLt8jm1WMVh=kws1FPQPxQLf9V$QHu$VJ!x=qiv%av3glDF&#Nb3v zZ-+q;ACo27`M7lPb%HMAsyEtfn&HIvo3E`bC7#}x28Su2e9QdbgX7Ayv&aq(4)oWg zWUZNDuB(Hp641eG&iTQkT?}&;5<6j@k#&QG&bQa^ZfU1W5lWaLSL~duMyulx!zs4& zr6aa&kHBkAR(A6DHE|R59&@zQdnq_;uOG>3+P3 zzOUw0>Go2zJHY4Yl-FA9y1sca4CSoW3V5bW?c->>rgdvpv{_E;v}Mx{V8R@}7w#1C zq@JSYRd9g89B~ue_@$Fxz2MHGGi`1G_tcy={t;`6=LTF32`770sq6|q9|#V>I*9Mw zF)aEtyXJD&XPZi&y;)hgt~P#hW&t{?I85LBoWTb-+#p-708n3wNT*SZ>KBhlOCb&T z%C*iwsDk0K^Ad-Ar|WfkJho!aYdh6Fp7b;1oD4>^oma}_v-3SL`uWOf?=o*;tLq@l z{+iC6at_u=KY+>74=`Co?}KW2wXJ6F{$Y|(7e{IgnC^4p*xs(v+&@nnRN41Zq8@h! z`fg{LHz9;$sL-?(jH%-c092Nb(fB`d1H?Nkgm%5ghQav*ggd+OPAj7`hJa}Rr3Zrr3YBy4p!8FA6clT>taYDO$Oos-k1ah);t zXdNo_W%Fcz9!7q@8d>JGpcxk$p*T`x&`6plWubwRu~KIXxpH!73f@`%XF%Pss6)c3 zu{0l|XzVEt_QYOYlyE`}@6T;%i20>4VJFFSTUQw zSD`>gHd@lOY_L;@)@9B)wTUc>t(qMj^gLplx|B0KncN(}7@{rEZeaL)zz4v6b3TMM zmF-hJ=6b8XWU#9FdFvWJwKqfTEs@0AAO0pf9irkC!eE}6aYDHw&kqHtEQm2B7%TE4 zLb7NL(#q6K&+q_Jos+2f@jB zwH8xbAeJGut!=M973dzn?K$1sc%ZrAun9u1lc80;Y-iq7*Stk17*8RfR4`c;DxqE^ zy!hzxAKnMXu*U=4F|zf%X7Nh6;C?=Peu(OKFsZm#4}IfC{;yc>gzn!)6e>o)@*uS^ zjAkh-+vbOuq*816ra{}qy4c&QoeJIcuTxXo4h)Ua7x&bv%odF2l#9*ttaBcAuKxP3 zE^qCV7nZn-7sxc2>p};cW+cqmaTqm!KZr;+BrD4fNo(MYrm!|YrzHL%&&4{Ef@<6V z_)20o59@qk@h#V`#lWCexvs$epp$FLYF}1WZocFn+6v5)h}2LH!wGYlyYqf9+-z{2 z@GRD@qy{I8!}|pifHlc%3frCFfsp`~r1_A?=_%XrkL0{J^hpv<3r^bt)7?ySHLe*; z9NsHLY;~$)Oy}5|>}pKJn|uW_m2GBgv5Jz-vvP5*g_DPgs#+1hc<2&_&!@M5AvuFL zs7%YWwH2q7KMa{Rdw~>hj-E0pHb;qsBYr;^AwJ-7s7-6c#IMGCt(0n9dHMs%NC|&# zpB?~=Tr7fXI5FgweuF&7ahiFk&$sQL%+TF3;#Dc90j3jjs$o%cPlZbX&pB@2K3hEY zT@OW^%BFEc)GYu=8P*9}95jzU<|((#mSO$m@HU6%`0Zl01z8!y5vFn+Q_`W?!TpN( zU)YNJuZmFKg4B!Zy63idT*avtTEuop$LB5n!E`zagLPS0r5`k{mRE!7WW z5|XBlPAq0??`E@-ptb1Wh&SVu@0$I5v@1BqZzg`UT^eMc9t8 zegQZ^p-7-3^n?*U;qVU(H@8!UE8Ut0AC&&$ghHwRO@96Y2*g`>LER5@7SY)KE)&`v zRpp;Lw6o<_r^`M)*-zo-^$kXZFewnc5D=5=K#D%6+aq>MAk3uV6i>T0GUk58GO0^= z%arvdp7{A3SCOV*UH#jGp@SwN>{fU-qcF~N!KT@Zx?}lw}PxBiB z_Izvz+>$$uRCrdk8ed6UCfO?5q+8yq$Ex|qP)ezl?ah2kkDPh3LI(`!)lG4T9thb< zA&Ep(xIRXuvKOa$5noYUapMP; z$eGoJvarK|F>Xu|2-`s*F6>EB@S*$Hcl|NA1h_A)|uWJVz1u0-5@e8Ii&9_=C=9k5lXmG~G^5RsgH&x$++@T^oaQ!`I{_hlgV(~{C!+PyfYI4wEDsmT}8*xIH zYsr&NC0moH`PHI=+{n)=TOjlUSqHWPIp&0sj8D6JscNao=(L{6td*ZlLiH1d5yR_ENPNI?3TT55e-?Gcxkj;!+j!2s~;s?epqpzI-PXnuMf(x<4G4FoO`cO?x$;`=(c3Z+N5cTeib?;9 zR!?>f?U3(j9&-{q*zq(Lq_8U~`ds8~T;(xyvPy+mU{mif@-msQfR?;p>0`^_q1vKY z6v(ge%wS*efuy%S*hT-j_W#GV0D1wYSA-x1R5k0hK=D;((4!oS#p)fRz>5n^k9NG8 zk{$8>fsK`@Sg9l{ncR*qN`xL!TJThc1G$&g#s=n%C_5$%uEg5?8?i~I$@mb}2jBlA z>Mf(-7+jsy%K zY1+=bL@2UdsJ*X-E6NN_9;jkfF}2s#ME5M<_*i4JPAA--T#M}^oFUKxWCCBh66n=i z{FL_#jg%4$I&Y)A-p9bOj1jGIzN&bo2TEu+cYCHAjw~|u>D&!@nT{-T(S8`7Lr~Rh zFuMhB4V(ISzJzY|_!ak?KOp}}v+l*}8#~T=@&y~kMN(S^z~Gib*L1m3(I{V_#|en( zQPv@HV)RFIdEs$V2fs@I!ZLQ9<#XZI)aQ=I%be{!nfVI!36nFy+CD?SjKxm(e{;J3 zV^xClPT>BV^NBxROTWU`W(tIvDx?HJHA`{5A11lNIrW-zi?3y z3U&Us;Yo~MEGaYRM-1kx*Ri8t^~$bj1u4Tf*dVjAioC)O?cNn4m}S~52c-chb;wn@ zSa8)5jDpmY6BKIY`8?y{B;e9`-X>&Q zJs=?YRlRM032Oc@HFi05JSGK+e4$q_ujQW`lC1Jy8-sE#m`wxOMK--R#~SA;^v3Uu zx-HzR<}j8IYq{zYqZmIuA8noqzhgX`N!|C~4yxB`2F(of)oYKd1Sg{~c$?jFcKmTV z;q%RFCs5%{yy|HZtiD+lYV{C_;vsP|u043ET0|p05iI4- zZ9rcMc1L@7osQ}AwTAAc1$=TRlaW)uf3a<*)=X&Q4UCwm+$dJ{sC`_(JP z)-={;@6}Ah`vEwnM1H_S>;rw~E3SHZxqyO@E!u0X7MCBn1uLLtx|N<^rz5TP6_yd7 zd-$qu9epH-xFN~zWm!FPX=&%sECiKrpYDiUANJKb}trS0r38^n`{ z7J1LL$ZdCJeh&j?j5N^0O~6Mkt`O{8Qpio08rKlO3`ITc6g$bv>LTnRUby+$|GMjV zfv4}|QtBCd195G=+@ml92;7^qst=0SjJIj~=6tXijJv>HyZdq4D^}q4>4Jv~d{-5@ zl;L=qjogl<%-d&Om!+x5Yw^(}>6j#2mpvTzYu9H7t$BtW5Yzi$=j+YI({pf%R^&-; z9SPI`;z&VTTuNPUcy%db&@<`MZIsV;8W_;##Y!ahLF$oz@P`Tsl)`=_^*$l^+Ua$F zhE8uo@m5P#C}`0S*6USuM=&Wn_%6}P;fQ5*@Y5vD2~mNI?}R#KdSeAF>cy)Vu@e=@ z`GEm07f5nYcP+_>C>fiWisO~=ET9@0PlY_4XD~jFCrWM_jkQZjhL+?8>9_J-nAUMP z2o~Aw;;@}{e}7-mJ-8G`LCnwFhQgEzZZJDWEu7pAmJ}?>1f^UjIJIrOrJR}S=He$b zlC8kwvEqdZOvX$R!9cK4t?Bnzl>lU(6X!5>;vb}E;V!6#+!2?N6jr|3>ikE->Y@^&>T0~#x|Q~5LMxW_II3O;-r<;P@xO}GzESA5rNMC< z0DUXa7A6+!?28B{EO^XD-A8HA;GK%zVgNtC9^B~>7O9LGf-3NAsOT44pD-h){afqq zH4u=pdS2SNEgW6GDb4+KFDH^UF)@3;G)>R#wVZMTgKz$?;5vN7a&FPcIRcN$wc25c zq&5iGe#L00qW60#s`g^DcoMIKK?5RAL(0Qq(cZNzI5wpUr8BRR-t6r!{58*m z)!5uwQIv`HSA-KM1lVSsxdD9;3%oyfIq#?9UcRB7tu3y#jUTOvVz>4Is>jba*aTZI z*3&OhO2@c;laJJ6h|?TCL@n%Aa>qUD=-ko;|Cp@R?S{C8+ZHsOlBgV;_6z*FFfBFt}+;wc=r0aI~~;tA5Wqx=E<*KPcODkPVX z_w_A4f{ZHswOE)BC;x2A92(EQq7J1k^!EwV!Fa?c*&<9rT zb`G+0enLZwo=_%pJ@1~CIU1~t{y-TAMc(8)onHLr{|*obgqT4X{&%yX>QPJYgXmr3 z>L1U*3SpwpDQ`{Gb)uJ%`oi0&i9)Hn4f=dQaI)yiD&g^TR9I0F!`siwb#3QnW@kD| z)3LxW0#r()Ct`g+L-yHnaVI!$^=!e86_@Gecj8>W%y095k5tiEv1o4HgBch8sLk^i zF!|d~>)n=R&dU@fy>!Ez88lXBbbeOd^J32>R?gOfp<4A=rbiEMU+fZC#tYK!V91&G z;$Hdbk&aWqdBs8J$}PWIdP}{i5Z<2hdBI@ar83QqQwP|-AyumQy4d#erKL(m==slJ zo8eHZHB9JuWnqW@qQ9Emf?t8k#O{a9u)T<55Zuu=XtMbRjQAjGVd*-_BX3r(zB}gC z^5iCzR6oi~9Rk-J*Ccz}NW_xTdjaH*HEZrODRR~D4++BK=&J~72=iYT;u)X(5kJ~SMyhv?vIYl~0@MLL=tkmKjE zwp0`ijkW-%)Juj6Gm6BEJ=j~b;jrFpxd|8&5pn(!L1f#C1K0oJQ`+qQhfm=!7z(#l z+q!>8;mSe<|2I+0zx!I&@R%=0eq$TWZW^>h3}}{)H=GWLEmq&r+9I2C2){cbHy)0f zoYV5Bd+B;tlbKn}e5qM458R)o7)@(J>0Fi7(DkVPWk}GN#JHtbG8S$Q5=Yqx?9U+D zaP8ch%-iz37G|Es4)(2NfxAe)U%|E*K;Gwsbz~J9xny<$br=>sfDU-QhP|%A%dGMh zEzC^y-_Ur}@hNF(n&2T9RHwvq>oJWu!-p|UHCWP2E?tLq2EN2POKrpVe>xpC+&~D~ zQzOuup?mp8*f&%9@(k(^3bDh6y)B;+j0X^-nmMlB@J9k!Io3snB6q2sbbTPfepW}S zZ;R!p1nZ1utJL9ya)niAWYkz1c;%@;(3Zz3gf2IthW*+jfvF)FO4#rOxVt zM&X;A5g`a-r`gshC81w$^Js)lsb9kv7E?l9EZNJ+{;>0d)~xgKy}R8ltMl%;%B z(fYXMPZdAtJlA52i>-wN{9c~IX-w?r4Cf~_dKwPBz8bbK78lB?Suzl6-tXY)3QrTD z-8vy()+)4p8N`mOYi>%NDWkeMyB2Drrp+%d4scllnaeRWT6P#Uncmw8Ky3{qUA~Q{ zm0k^{#x6BpMp-#4YPBa)Hv&4-;bYw6^Vdy!jAYt^la@a=53Q?;s5#TZljIgV4506k zjo`Bor+@26iWpO>-ZPqZHrZjwK?-HCoT{{3XvDF|KTKDWm%D19DPQeVujCmS@|wcQ zMgfbzc8<0kLG#(7J;XJUbpP(vzq%9nFL_kFQ@Ac*6NE)O?RS;SvVpAFAm^q(j-1h9 zLF$zk(_Rbb&A>D0hALw+_mY_cX;=gfkuY4!qUckD+sefzu(6u+2tVR5>2<548Rlho zU%LUa>v=#cEde6_9shjV`P#AxxL%{7?oz z*xIWDkxEbZeobUt^HbeU_G|X41#>XLb9FMloxcU`8>J( zSx!V+VbG#c;fr#?6zEl}Jvuz?)qO7~XHoJEK9hCVNrR6<&&dy>M(XURRA0Y?35nyE zY>#;NIM=R@zNiA(bDm%I)G_h^e+7+9vv+get(}O*w>NiTs;@i0gm@=3(E0E1-sW%2 z?S%Ub^NJlmNIaObF1_IA^f?C63mcjbPBl=MX^>pKBKg=Ot zA~?4)o;8}&TCu=t(qzmp+Dzv{`7gXkAC(C(wPeMzmwjG6saw}B_{40wF4gRArlvdMOlp2yPEE_41F;nx&~*`@c0{GrnK6&EE@9jM z_O{^KZFEF4h<%ZeaSvFUKGTX|RDG2vcEkJ2kclC13TYULfRm6IxF|+hVSq3v8r1(d zEy2H*AYs`4zC2SQesr;0vI9Mjt3^t|o{5&${XjF_ z!B7`efac9*&J^(9`Am}^l=;(f{<8v=mz^~K0_VxjW3^h$J4-}Mrd4V~Y#NGrulr(l zg?q|1ix=#>^>pyXxUKpH`Y#m~$|EQYrw1twFDJyfh=Dx~FNZk zW?6Z9JFI8dsh9!FbA-X`t?0vVjBKw;*q_{!z_6*0 z0e5|?+UKYZUWSfSVeh#()(29ir6yu-1qmf1MnqJW1d-tFwu!=^b(2+=e>nCy`lsw% zwQFZtj<8Nq`DB?Hy>67P4vbJmY`?4ILm?=}$k;?tVWyBL?&|+K>lrh7GIoM=Q^7{P zKd>W`E0RIbU+uy3wRi%t-y6u=@V|Jlb%H*=reoj~H&hn(U-okXzp!BjR87Qh-1E`x zs&_gm*#OqdDIQM9ogOf00YXNrZf(#eOChKfjXV2>JInl>^!Xi%(i$ZCpa+kl8vgVu z_^uoIBC6jw6Pfh?baq2y3)%xYfF=m;3b=#|@hwY4i%L^U%ahzfTtR?Xf{AHft) zD58@UGa}cB4*jc4;Nu!HWu1c?_grCm@S^7Vepr5=Wzxi^8>g}|kRsFTDdTfZhU7s3 zI}o7Rw3*51h=Gi1zXUr%*3{Bw2`%)kwBs_7g;lb5ZHQ|o)$6uT#N4tw@b_(+fGP^BX?T-`om6^uIK5qhHLL9&1!PS2cajY)!z?d z>&Wn2&iDQN;htfT9D2N2fl=%Gxo5O4%WK`<53cDU*qI*d(Wetfc-~61CdIDGR69oP zF%_bqsb!0QF2LyGv^zIVoNP~u@XsPEgry;Sr+p*&w7DX}+_pc?qOrzd8d$jYL*t&n zcW9mwRN&5|7KhNvBdjX2dCu9P$OWE2;VksZWyO)Qmz=GzYZkmJy^mg}5{uf`0spO|SNjfe+tl$!##hky2OR#pjD4 zB2uIM#89fgjr;||9Ga5{!AQZq}Xs;=e7Yy|)7 zhELkS(j;F=O`@ap0n92m(q@?*#f3%3#;N<0qp^N)BU;3Qa(B?nNhrci!`i5#0NMEh zR`IvTK=j9T(o(ut{1`DiS7ln)Su``7DIlnIPWkIH$=)h$V}|GOW%D!%H1xBW=+*;; z_BQgz6=!&5fC5v(PSc5JIfP}<$0HgTfA|PziHURl%ihtm-jydf%Ls|eY+~b&SjK z&OhT$Ue|*CRkpP^*=?fYEQK@sc8~ImVEX#h@-AuAjV5X}+MT=SP@-iG9FwI45K~~5 z7J0l9-f#y&X4Pn2f4Rl*k>TS`PEkIM)nD-A;)Cep`iXRWT_(vCQGhKNA}V170C5*J zsa+q3-3KpvnQ4xXsEByCXItx+;0ef|jvsX03KlHiZ;_ zxl|+R8%GedM+pN{L4CO>VUv`8K|y0!Z0y(66;Z6sLmSK7w)}wO<@p#Fp2-E72&<24 z9uJs}IZ_xioAMl`<<7Q7dn0r;U01w>*oxd%iQy}oWjxD*w-GZl!VIyw6`C_$+B4m| zj<;a5in7FGC0WIjjizES(w#uMVO%PT>O#*fy0O(kQc9}8&_XE{m;Z;5GB-tma=q)v zZSp^z6g;WNdk@8}{gJx+gmbs}BhIFS-UDMdGS&|}Mj`i&9wiw`3hEn$#V6m!*fAXV zn_R*>l3g0vhN~1K;xFc$abs^+)f7b>7fuvaU=GbJv(?Q%^p|0|J=1|5M;C4LYuXX9 zE>bjA<}oKu*+u@(#}32f8SyHdlVtGQ+>2sT0QNmFGBRFudY0P@AtNHefj>cccBTy7 zQZKvH+U;Z5<7oX3SYHoURB-y)a}J!ct@TL_H}?fW+u&IlT^tb`WVO`Ubcv>;t)s>% z6JzUJu>c+Y#`UWAiRBCo?3NlZGlZfF4ifkxOU!rBv88U$gru{qWtR1*wg$$SnL9cVaY$!ejciQy6sFP1kO~p4Mef-fIh4X$07Ifej%FM4jhVwi)cH}2b z3~>Y{cWi%|6B=`~UHj*#<(ol|O>SOeXxUH?(q`8-jP%EhkI4alAG2Q$KK`7lzAIBA zwyvki*s7H9$x07KTLcR?&{&o;4oaw28uB20QRNrW8x48|V? zTXT+SxmuGcIsI)*H&Yt`#HVc#;?-i6_Gq}d1^oismakXKuFJ184(6TUz~VCVZ8y4; ziab#3bOBZ|J6$r!jBsOIr`z6kDJ0g#?=)gEx$_@$-H9%i*E6P9f|L$ydaXaFDHew| zJU@RGkU8Dq=UZvO)h+s&5#*@jsaN13mg33M-{)jj$~tmULrYRc9a&Ss{2O;K!vbI4GGoH3T2EV>P>A8dZU3 z92QvsVnZ*Bcs{gcBjb9Qd(EXiUrduY`<<0nDJNz^kRalzN4|91L?F}xD)F{QWF06& z?@Y~ES06f4^xS>9VCIz$vl8$$CtgI4#kyLvHP+VNayrmhJ;tg1OvdY1@EDmtvs%xe*us-DbKe;B-C$m^7}S##Ue-ud4?> z9O*OIaPkU++eEZ+(plFE^82hI_a!EH{P)~=b;xsfWpa3)B`yVItHG~mab%iil7YuX zJAZ0=$MK*(gQSNY9FdBmvLDmDrcon7I{a|b%3(#X-|jZN)cq0al`To*hRZ{y$v zl*%JTyMx?A^r_EJn~k=Wux8i(?#GJ%O7PG=JQ$X0rzGAgKPL6+ znJqX12bIM*)SrZ3mq%RS&i>OGpv;F7#zNgmkX$88qN2RAt-nfPim~`^n4Jjp_ z7g7f-pWB~OAJHXX7A^z!bl(sMu;-49iC`++7LN^`&P#4DY*r%*YbUQ8oeolefHhA* zJ?Sq)wrQ#h6Rb(vwAQY*nKQ{%Tee}o2Kb3pySuCfO}-rnE)`m#j%~B+t*A>pH;z_9 z-(h(S2MhDwuj!G?513cLXrD}6Qm-C2J9|9OiJ(C$D=m zC$gONieT)847a&a77d>k@DRVN_iaG8k?3 z#M-o}YET{1OC_Q7W>%P}=T+S89!TM;@Lv!8E!YWOmG9RFj-?|r?OkyLX0gUhQ%cyM zdf*GcLehr!scU^)ep0-L`kiz<7Q}9;-hNDnq;;v+9?HO?&YjLNXVMpYp=Rjne*?o& zCs>`F#EGa@T!SvNqrEC%J?yuGGTX~jCuzx0rUDoW4cQ`rOJU@BtupuM(LJDm=CQnT zo;G~G9V)lK&8t;*J`^7AdBzYNa+cOa&iz-YY8lh-%3dUtZTf zV7H^m?KSl~%US_&zBK{cvtRM)mP^wr{gS66K|Tat=(>Yj!tlkX%^onoAs{ZtlqyAzeA%pD^|Enm!mUoo>Zh zm6Zam4sBD9-b^JKf%xhbvkb*XiAtkNKzFjQ@kH!2Un30vN861|z2O z-SLI}NwRoi9Jgk@U)xw))prxa@T-tKR_|-4_k{71+3LLb{HWGYT&e=T%G)1Y)0CUS zP}Z~tF7#;O+q*Mqb>rYshNSa`sJS34YKAMr{f+%ivKt6xs9J|0jtO+cT8?pGUed*H z8swz;asITkQ-LS7_ohQj(eh<4!W3KDtyFe8r>JPHD!v#%NOyG^*Q%Fz-fG_J=$*C& zeLY<4JAcT8Hz1smkM!k8o^otiHdaaW4OmC7X)YS6^#hO(slBq zZn|@&*(y8YsDi5`KC532QD`dDGp-LvP<%YBce2Y=jgSz}8$Y7s?95314ygGoXvNcF zSr@1X@9v<5f7jl8+NDQX&HudTu$<^j4t=B#yD0+PK>T5^f2sA8h z@^@Q;yBIReD&+Bn3Ov=lW!JPF5uH)CS2E?M>aK(3Z%;hJW0D+>y*;!V)nHX^AvEsI zSTLRF`{q8nT{;UpPFSnA@fnZ9!mICKCedM4lY_P+qHG>|)qCT%bDbWkeBnT%LE5?@ zeKKXN53+#Er@}m$F>UOp?jFr&e?Gp#u5E&zs|c}TWTPiO$J$oB7IPZBE=Hg`_aMKe zH9bF?Pi>~4@c&igZT9^5U*Gm3W;0&7?EbEabs^vd0zNVAs$Qx0Cu&vnse;pI==dwVI3}Btr`G``w9z*?F6vS43^2mzL`Z%NuBDRCy&xH*%CDNb4ESwgWH)PG>jQt2F_KjP7ihCjXAVMY5^sBO3o)#(DCENXWP~1 z$+yMPv|4&20Mh+%>1QTEN{^stb|u2Z5y z7Duqs`wn_#fiLEG)tlIRxoeO{AXyD7Ye4%5=FmyK!GK8ZN90uH-yDP!r2(rg(u}FB zX{1%cP4^bsZ#-{*c1Q9TRmDX+wvhg)uDUzuXSyBx)oie6Emo#F#-pvQO9H>7`#n1zd-fdU1b!mA78Q6T@y& zK1qh-it{cYba|Vfk2W!WC28~>%JJz~@IBP$y{R)drzuUXUBiC&BN%3V^@6@E06T(cOWDbbUc3aYcMF`2h2lzB#o7(+5@V=%h2{R_kXuN?D{ z`M>rd0v9NhwuR ztPBEcg*2POhIW7NKBU^;NTn}Awx+@LKJeQ7c|;P>&mQJ+7iB!(TRN+P*WW)Uym{t3 zq^4;qAKxls&kSzoc=Kp#ynmd&LD^|bWgZ}(_xFpQ+MJ&DpT2@j3v=H2XS9{I*Oc#$ zYf_97Q&NmmjH3HSN9onUt#RNlRl5SZqp&AXu#)D2M6I@qH|h7&SDKssA;)Xa z>+}tYKakGgw8O26mSwqt3fIq!85(+qxAb9fdGK1^KF5{eyc!&WF~MaaMd?Ow+Vc4U zY@R7efvF@&%D^u@4gGs#VBcM16C3Ab2PW}ck*(=klimPaAX7k>=bk#72-DJ#Qpxs88+$~ zeHt)m!HN+c+vcc``mVAe<(nys?~g~wWYP3UfzvM|ocGr<#IFcIMeg6LWx;XTY{ANs>13rU)6FvDbA~?66;t-0XLp%|fbK8I`(1WoCM?@gdR$10X2l5A^tz+d` z$Nc8fL#%Zs1`n~j+;sW{*QK2Hq9U5!2l-_SO%KQKT<0>Vh+Om+Dy$Df*$CQU`MK}n zadv?2(Do=S4<7X3A9QVzTt}I#B4xSMT#YT|Ih8D1gm3QgMpd0mCaqdP; zu}#8rKCO`A5qL=jPh#5wr3^T@fxi*1OS9dd$_HJZhqaAdB1y)6isaI_M}I|N@BOJV z>T=dXcntHrRpKOPl;>|c^(i%uz+Qdyev$1H1cdWsi;Juguk#h=G*5rbQEudnfjRrz zR~AsK-`*a{9np z5IxHispZO$jv`gIm4@tXiAxAvViZ&o!mx$))j$^E;NdcU$Yxz3Sm?E?^*BEyum~E; z0G^mnXp4H?V1ETT*{jf3l$vrp5~#b;7DfRdFyyBu#EQ=iVYok8B=@q z;|RwB5U7Ky(UxZuHJ*c{CfG(=A=K1g0Tz+g)7Z;f)_c?=c?oeux-ar=B; zNeg+-d<3BgxL6IZQlJMRvd(a<#x3=34n-e|YOht4%xf23dGPt&4R~pQoYBhykyf0D za10hu==#mvI8nV;Nko>pL7cFH&g$U#t!x39P1wav4hTWm-pXsRwKY`s#ie83@HJ4> z7oW$$cx&=xhAc7i|Nd$G@y)NEqTw)kE}~KkSqrOfZ2=INnS+_U-d>-1e@fBIA+K>e z7dM$3CPw*~vE)WsDS`u4SoL+rhZMXGI4U6cp#+*~Oh83P!gtEn)zR&>nTf|`xw!qm zam~m}L>AC5K{q5mCTlM5k0uW%oay6#Z16b8VA?tAV43X2Omnj1+kOigjiTbLN24N3>@5O5OH;8 zOk=3vf#8Dqk}gVeFoWcVmRW$`cQ{#{E!i>84JifB>M@2oiRGUwRn;Tmcyi+`&#j^~g!j>u+a zQ85CB%KdtHYEwy&`I6XIJk&V^`DGJ^m=@@t7X8%BPpl~eI~ zMz63g0cxLBiHLJ@I-!t+!>h;gBBT@SewQV6JT+ZaB^E1MJL+?Y+*sG-EP_VZH273f zx|jYjSe7&?X7Q6fAYNctq3;eQxN(Xbfdf?p5@g@~!@yz4>t zgAly*A6OkZPM!lSFgsNV4TXJSY%G8_Wnf0U0CY4%Wz;T$B+yZC%0Rv7lcLaAti~uq zQ9ZTroF$l3JK?vXL0J+t>>>(mF?h~ySd=@;y8(k<>Z9~hH=a!xOhPSLa3h3RvCWy1 zX2nG43i|YS`#LN8+?|v=B!Fk&XVH3>VM1kqzoE7?saK08E{qB=5(VOV5ZdP(Qs!H?a{>On2EzY7TwzN z)OU&Shk5ZsvWn)+>D)~!jZ=KjG)zzOhUCOxeMzD*sMzP?bBuQ-U|>Tyrj%lD1NMZO zeRhs`?pUqHY20aT&G!HYDvdc(Q?*l0d{t zu+lDMg17ginw>%sIkSp${5buv9Yw(Bo-9trAroRZ1j~vcfYS-twfpL+{jOfshPcrl zAz>@AnIuuAhm3%%z3(@u(FSz&8?NB(3%ME{(8{S=&K#ijtve8CasvP1I`gy@Moz7;bPG2EnMG$nnLri4? zsxk>p?DBRi;|Cj@Z%QoT`$nc-$>0y$F4Uxe-QC zTI{9tC}o~iSYg-aM@s>erNd2Co{)JbPOyP%uSe?J)Po2&t^``C+VKA{Ie>KFzat<1 z4$HVDdJO-~P7dbt(<&*G)Y|2o2Y7$NTy%eSGVCGouuHHN-gTpqn&w0L?y&<;^Ex_un|Z)wYkNF`k^vPX}asXzU8tw zw5BfSd55{3HXu)E5P4!?-q6O%GOY-kKzGuZ7KJW9XuZk2jsw%M6M1Nwx0&!LWg$P2 zFt-hF&!8hF-Gg09T29_@2-a%vKrNOslSiSb$Fx=gTY*K&CcW(MU%)mJxq)DcWfnpe z3T$^?+`pD92->;e&1HF1C<)4X#I1Wtf%f#SlrSDgM!TDyjxsR7u}&A-{Y~Wrj&<^C_{n zm_gCtKMs;W8b;LT-}MpM^=dFZAOnrI8(8i&v_bCzvK-{N5J{8zs%Fd!nw0FA`P{_6 zKJ7;A&wAswZf%wosj_bfNbKFa?}#yH*9_{SKzE&IRAZc=c)yKQBzN#yBtaMBVgIUL zX9cfCX5ic5=vC}&uKOpqdQ75n%d6=KPw|3=LsRk`k@i2o!kvtSevZ1lv>;hfGX&vD zSX?RFFtx&a+3Swp&o1HtdaSFxy*&BP8dk(Da_X^-XiG#kw8EVZz!NPCk)WiDVO&ZK zw))(4@a=uRs2W}dn~|AYNQ;6X&|jm!o!T#tVnUK5A~S(dT1D%3Jm34Sr}3T9fm7iE=#zkf(% zeUfObGG{5lfl!V?1 zXQjW6J*GB{OBs-)S$Fb>aK@tSXxb-xOf=X4Ilnn&kFQYb0e+VqYJJ0B=L(y#w*Wt~ z6dgq2AiGVj0yhl7SDe=hp3-J&-^=&So|hn=wxQI%i;Ss8YHGq-63f3&L54*hFN#Oe zl9{Kc{05uF&nnJd6XfZJxU?C0siT(8=(lb!Rc~+KT@}264)eCMj=jAN`Nj8A0Oh{u z=S433bMqtCz%hLg?^E(lF8JPNh=ax_Kxr~q32~9O7bt=709!rS2M75@|3k7x&+DP~h`^biN@})E`L~^21^3?{677MFkK50O0WWn513(pUhb7WD5(SsM;el z^FOR5Uv~WO?ARv_ty+1OtL+elCk|m_*Q|@18RK*({$K~L{Q^q~^n{F5l~siPNymh3 zmC~y!gV!E6*+B#T?D|A{HkRUWcYCX+XAxNHOQY21DWn^(3m;<)z_uCoOK3!j%Mze0 znv@QYbm`hyg>6;R;nya>RtIH=G70M!m#Jkq#jG;>b%a4Cn^!Ch@mGRt_+;!j^CQz_ z#pZ@ahp@EszT|tZ$nQBk-66)Tu=qBvGGhZX&^p7R!|zo7$jhlZrO#)^Y{CV7+cE0O zL_2Q2jTn<(7Ytk`7sL*kqbV=OBdchs#+;KtL;6{QjdDvmIGkL8cDE9gh-8MHDJ;=T zS15a0b4;9L0^4?MllA*&S2Syq~$ZBpQS5WQgV9gn=vhy{U| zsXn=M@9}PihQGZId-`1q&yiWzG(S4vivX%tMe~U?K~fIOHE2oJ(Y_Q?!&9b)BgiC| zABJQ0ssaoz`w&Kee?~De9M(3~#!gZ1%{ zAq`@LzDh?sZ(dQ}ulg?}%Zwk2W+0P`sVK=il08?xdREwURLM zF^oZz!2b=2gLrl=qppR7oCW#I7IkgU>(=@CXSq@w#Q%D8DQFf81{g-~)C$b6=ROhhqlD=Ixrfsh_Y&>nM&T6+?Iq6(d8hk=a=j%&h zkqvUUhNVl4GR0pKc$81pA-6_VtblZ1794EM$Q)wY5bh)=Z@G;hvrW$3iU-j7_qKhu zA1g%FFNY+2&ayq?wi%1gE+S?QB>e$iMgfoZ&bgI!KCsJBtY*UzXrS?`=!NfJeu~k2 z={rVGP!`cX@Fstjcg~Gm@qf17h{nE+l*{2^->w6zh2+?qS(XM&-P|m;?`XcB6Lt8_ zh8j*r6o2k4h^}9In04WAdz)lt^BA*6ur^V4<^dy;T~L5B zFPT*B`tb00=JP>zPr&ceeA2LQm7RKxTUd{wWLXukeRid{ywY*B<%` z&M`B$03vA~F1LiZJf6QR6enF7X+q(z(e)9cDJPn1k+og}?9{`?Q^|V4bq3BJ7HAP&m0p7cg58Tq1kv5x zIL1m3H=FO*l+NKW#XM6!yg}K+`ti`Xo4pzt#jmA0+}LK~y=y`+IC$DcW!IEud(M9p z_DPWlxy0v=m?P8qq5@1le@|B;mf16n43)IRR`*|z{h_tisZzw>oVORIqr+d%m}PV# z{C0l@bd6qN52H5Z#xIP_8uHN-JZ0sXO5G2?QQ}{SJgO1FhW3!W+}@SP=aF787aU)W z5fj-U_EI_zJ9fI9M0rd^%KbiyUetS>f0!8AmB_YQR0*X>b-bz@J0nI`h368*$m-1V zYwqZ>MiI6ovw!xGSOc&7(x?peyE_0oBe_e@{ZnPtTxjck(=7gEpJmYd9qXyHfD`QW zfemq;&_8a#1Fl+?qBn(VNgENdMqb9Sxg%fuPi_E=-+YeYrD${g#V20J{b=#GNF={+ zz-OAOQS?$v$k3EPd{OJK^JErDF=&$X6j4AryE*8h?2J&*|1d~?w{7|lOQkG$YL9~A z^EycpIFX_oN9IFUT?IQEJuyNn;Ai;@i5dde-P+_}0(ro*E9O?9{?@I#7&pJTeB4g)80#RWvqT~Yxamezk)BeUe4KqR~ z+Yf`h8D)vT>6pUht?`9Rk2!)(y(#Q^(=Cd2XAAGp|01kv{dz*IJRSo>^@`n|`o`vv zcS~c*yW0V}6K&N*cOn@^Hx&OyJjao889NbKwO)aSE$9}47CEWFju>^q?JpVn`uabi zq!xL&h#NAsCEj~#LW0uu_Oa93zV=owAk+d*H?uS%hm6Mq+?6Pxna$gCC7ET?65Yk^ z9sj}5hOy+0M+!mBW9cHW`B`&lh>E&SM0VMp2epwd%lOWbI2DTrC-whPb(T?aEnBq4 z-Q5WxxCd+8AvgqgcMtBajfCLAEogwo-QC^Y-CbXwbMJj`yuXa@(VMDWt5(gm=2yqd zi{7ER*-Pkgan*-Ej`1yFzMifs&myT|CYVx2Cywdd{P-#8l(htpH-LpwOv zxOA0g6?p@)Eg&imW-+vT?4GNn|9|O5E?G9WUF~IUe7i}cK+>ycXb4eZJh|~|joaf; z^&Zl~>~9`7H^xN8NKX_mTn~I)E`elRD4A$T7+E4&Mkk1YWwNvNq%x0c2&O?{BRr`F znZwMWn<5XAU3RJQA>Cxs$u+cWo6fdlGf+g1a$*Xt8R@~%biqwL0kY~M4fAnrF3jM( zCA06Xu=11lEB!5LX3oteN)}OpuyEbCy-HQ(I9QyewSmwRUfP3r&j1^QWs@AQ(RYi? zzBLIE$K2)C%x%uoc#s6SD-tnO_SRs$Y?Y&1DA3`HHM}_ppa$JwVeR2!kIZZtZgfnV zZwd1^{-=;1TxmUX)ptKQwH-NxrVd|g#tH7OtygTz>1KE3u6WW3B|$iZm7C3rEhr6V z+zfy_Ii`GKe3djp{2y;Z{0pL84L=^#7g0v)PE9@PO>!qoov+;-`jVzrV6Fep)-Cvu^?c z`Q_Dkw!1~_YMEIK8}rHSB+fma#3Ao)A%>{PY;i6`*?*IX0FX|~ItIA88K-!(;p#|< zX!mcr6u_aVQ zRy3-zLFf}yyqx_4)7`)e#E(C@COAN;F8*MlSUW^`-y1-t^%&S3iyJx?MV70~4Ar4P zr@ZTG^w@yZ%Mfxj5{2;d|-yg!}js z)?Y%%OEij3I(vk}yw{&@;yxk;q~f*@*G(oJC!n*P6Pe@_MQ^74wi*2{UR!HCTw96Z z&>MP+!dg=E>pZTnF?~k98onL`FWy{2^3!*W#?Wl^fjd=zDrS>^R$hnrSTE@y{Hn2-@tv*$nX`me;sSmv zx>AID&K5Rq&X@Quf-!1PH!V%gXq_KJuE!)9y_9W+9B2tt2wcEEB=pnd21&m>2K(XF z@cxA0MpHsiiL8J0T%YiHE(Py8C<{qQ8hfJ*E|*^v{@|Qm>|m;)@jz$fE$Pxcq9zrr z;j)c>9$7~8W4kjA<^xAtt3N2_DI`-}eTW@dQ}d><J6=V2q^jql;GG=pq0HyT*Z( zO>7a60IkK^au$hR8?{|Dz~YEEQA@S6-sBx|`i?N(l4OM=t|_J?OUxA;=HWPtlbHbl z7nPx9!RD)p+{T*B7j&Vx5n!SZZ(b^JwHx}CzBwwlZ6m5}yzEXXkvCk^9yHhIG_OVS z(0Bf$QOnlQ^a=;P|9ZM=Jlimj0lLuR>J=`J7%BJajzTxN1(hj9JUU*&jOvk!5=f|C zN^x_*erb=Hjad~`g>T`cKtW*b@;xg1)LDG^zg&UbkxBKmr$Wu-2fK;!ll9Sf z4hX&WNFzWiEQWNTs3KS!nCw??q*bl4`YC9`prM%FYS$nEZ^u%QCHu;m~1y z=qhZpjqz7g2GI;s!-K=Al~2%Qp4c5qUAVdpa1#jwmPHY$Uc2bQR6!&b2Z z(VU5|hq{JFRjZtkPfRDcXNC5qUa4&lX}^eiD$y7%Z6!{r7u(q06R$+_!AW|Yoc$&N zL8T?;kNxoGoP zL*Le`C+;pPlcAXvnM$o)72$D8{br~XAs(IYgi<|Yx=rKXtw_HuoOuOqp*aL^7JhNI zhq-pjB?1eyk{qKSWPc&)7yYM5+Omsn@wA~FuUWnh5~cOd3iDUI9np&N(ydL5RUL$G zK;);e7$!~1z@gdU$!4<;0-Ldb%)HOcwl7Sx*f6%!a|!@Gp&-5}G{?!50qiD6IVGuM zuOp=MVuG%*M?^HTUt-6Tj2>p7s_%&dL*(zI`>?xxhrS>{5`0~oq~+GOgE@r$LxIy< z6Y1%l{mv__O05XQhIc7K@gZzSKmJ1@OJ`-OZM_KlDh6T>6oe4z}A<(sCZC`)`5mNX4d4WCctgMQbZ^-CT4+Jm=J zes!3+k?X<7Z!`RVUaoU~fT4WntB^GX_UAqBhMUXSMuIzqQ^C~hr z>~VX8GtKzWa*k(KvnNZknCk!i&0T~7XCgaHVi_VKt4i6z30=1` z+kE^)!HTE%M|$JkuIFYm&fmf}57YEO&tsR$@gy;m4g1r@dRU!$`{3CUhHv|DT3pXU z{joC-N*ts88X84EkBURuTHK5`rYr`D-b2>P%YcK`9l>_d=M7x|r-1j~5Jnv;l03T> z#ja1Sc(SH0{eP;zCI%l&e^*UkfrdROcel@ZEX!Fo&)cV4^;PR6Tpu&_MW{Gm4$WMN z>*mN!LYaQj`tP=6+pERc&ri<0jBZH9<8S4hR@|8{bv#&JxG;^?xi@5I`J`@SZ;zFq z--8gpfgJqh{>ZL=7Hx$&Er^VE6}KpMA9;h5n{PZY{%PX*f0VEN6JJr{UHAx)~F1ZPer?-e{wOS5otF5 zz9r$x#VO?QjN`zo3b5YA)=dbKjnKNy3?$&&7Z;syut{ruDY^3uWJu=GjlL9oI`VxG z+I%>#l<4?9EbO}KOn+ePki&%0qEM2P`F!y$9=iQ4#XrCbQ|so&4#VP#Wpy)Fc#)OZ z84uf!r^AB~32IB$u#kMpZL64!YJxu<`d{UiCCp%-aX)E!YuU`ctp&aBGy3?C>c(6% zG$DT*-Sv9X>vgr&lnE`Ek+T3lEWXhqN;$F?*ZlZBy6_rw)SM>|b~04Py@eI;|mW!g;?IK)ev)e=_ZW`F6v&>tvt8fGCu(>;dcbg(a=+vbMy zeW0R8=;o}W6`%$A#!lmO+R7O1i`-dJ{8&OE9ukUWZ_p8bvY;p|^Zd2-`*Tx=vYh$m z_?8syrhhl3|9jX!1Ic$-XuG|daD~L*agWk>u?UpZ8m6}Vzg;;s2l(LJ zT07VM9px6eiHcnCi^<)kGbkvbl2mnED9lxr^dX!lgntb(7-wFKQEp-2t47$YFn?i7 zk=dO!b>ExtiwxlZQ(6d!;RBZO+wZKbs6P@UXychvvr%LSYvj_94b$d^NRgghqe#O2 zHuT-&u@Cwr{<@fXI2u-TH4!3zr6FjTtg%@m4+SCVTU)VG8K^(?=Bbm z=~azU4n3zm=PkcA1p^_3C5gucZz12`I2P!D_TSnF7x+p_@t+lfK_f!rjuZPZI=TYY z$oOQXc|j|hVH*U{O~K(#yN^yJJhY+ka9ISpQ2nLBjxU2oB!XV#Q%tv%yByPuiB8X* zkR+@Ac>P^_a(dp#Z=Y&De$sw8uh1eyuxx46fcL6^En6CrKNT}mJq_R*&;>77Fk_4G zF2@XI=K!Y3Jx)}`lW8x)Kk*pZ_sE0M7e-1kqZ{$4r;)ZSdKfWh_Khm`Y$No=t8%j8 zUb1c#nLGv_P-}s1U$&IvYnM>9MSsW-fSW1cOuxNb_3_-hVv!3?^DsdTf9bWct9J$4$8>c2x@U`Z*g73gWU*8?n!g=_gI6Vn+et0RmvhYC zD|v!CaMh^Bez`s`0Y!W zm6DA`-+@5Sg+rN!fu%E{ul1xwWS>g2co&nHQ}tfabc##?5klga_St)TA3vo(Tg*m%w_k>S`gW#Em+5#HEl~B(+qE~{24e$ZnsTsj*fC*Ip z6LM>z#~~{C-Od5U$qB4@cw{5I%;dOF!TSCqcv6mU7wR8NJVDF`Emv`@Al3C08Kw$* z@q(ux)8gu@%-gQhQ!=aX1w-!Bqen*qADN#tAObr%t-mg7(63c8Q;B&G6@HH=0CI14}}gQoRNs@_*7T2HDii zwEo*$6mzNTE5{35@jAtPb^*dGE!@=KN5My#*dCk8?;<3{;FKo-c=_crz^2>gi^BZ!)BHMhc?g?%7fS)ZB*SbB>T5I{(_ZpUg= zF7|WpBc}c#mC+B+c#yo}gmG<8%JiN}qT*KxCC1MKD#X{zAdED>Zbqy*mYpb9$d&gJ zle~|=_kI!Zyn;CgzSHEy>gwtsfHK;ha|=eFubRHLdFBg)EU+9smj&8lTu(%42L}%x z`Repo;Bl_{g=FgP&F5cj{{QJ;O93?=b(C;YrnSnPU&Cz`%}AD>(E(=hL4^}@J>njU zww+q797yKZoV|Hgccxz7_!IxoUcUYTJRGx6@u^N9~&LP*fc-AHU<_gz6Z^%+a3x6f|=m` z0yMq>6a;3aG7a(4Dia+&PCj}GPolZ)iFgt$!W_t7482OR1-%|XH!mGMH^l~mh2CQ0 z#+W0P*OT2mYf7WvNM>PlahZ1lz4t6yy7rXHzxpZ3cVB;_vc;(|I+@?-1b92A;9@7j z?tnlvUPT+{Uq;=NyQIT;%WG*wp8jz7@i*d%#9S;l-#es9hzq`8>L~CBlr}Sej%p?> zI_0Q@TK_YVlT!|d>#Pxi*D?P)z4el*u#Z;($l=_|KWN|?vPgEt>0I>l@s<7@mh*>G zskjjz2}csf>V2$@fKI>x{N5IsB|N8^YomEJxvrCizzSPe;M7X%-eWnrxrN&%jVkx^~t8~ha59j8J|B6X? zlNnhtdDiO|FA|xTMq2uZqER|{cV6Ni26CtilvGu_q?FKLf5KL!g?%33&^qiYZ63={ zuppdb1MRq3!hGslvH8mnVmk9;^H)4e@A?*E2zM2v#nmbXW1WMTx(afGKEH&f2STEn zefNwE;)=m?kIWs8e&gP!X+!QTZI7M!QYaKxvdz{gs_$>Njv6ZPf`1;IzAeF7v#@l1 z1)$%v_y?FVzs$1s*a+);3WVGik0lWnyjX6<+aUFuFB`cSA-5hUS5?=8d$}1#?KgUE z4(YvwDSAq)@L?tT2sV8bLMz;>q`KgKv#@k?6hl-Tx8yq5#bHOmIZn1m%fg9#b8iil zr4H2e=ox_(QpVm8tP9IrrsMxHEL`5DuzFLm_1^5{7hmI13pYU~ApQ-sJ7Z;#fs3Zc zDSr28!B>FG6sUOF!WoiArvsgYk12sD>8zvafxNgIBHLP0phY#e;pt(h#luAB=Vre* z-;C+Jr%Z1TJT1EE&+@S^(7JOY0gv5S$nqipA1!RVofAawUviH3q2xPuU>;|KNqM8Bh=if*J%{`csHHxNEQvwF7lmPrZp}r}ZO?Ta) z#Zhsh#o5Ta7e}A-j1caWOBd@d@N`8fH|<`q;x3xDZ?w!p{yo6p_iz}y-Wn1=-q;{A zjmjB?OoTiBaCZOr@@iKDNQz1EcD;NSPFlz<`hp1Sb16k#)%({$f}Mc`I$J0zZRrmF zp>K&W`n#QjkFoFi>hU!ewPeX_zUVRJKB@y{C8(MoIF!ai&qQpXpBfzSJT`TeO|Hi# zL{$)L1(Psy5@+u64#E_KF;NkE$u(04jL%wWmn(FHO1C;DROllfBs|s2lUB{ zR;PFI5TS4lGkSu_Z=O69?+?wA_v|cuvy?oJs8m1Z*hQmfd&!w!R=Kgg{V(f-L~3da zi8EVsQQ;++)Jy|%wd(m4IfMlRB#jh2HUYNSC5dy>3PxjN+r+80!9m*o5YbWK8qJ|H z#}TueheVrZTiJ!SZmM~#m5cFj>4xgsfFUPH6+wSnEFiG`duq^72uT6|5*@#L`DHxcb>6Yjwloo?jfpD8{B8Ei3d+9Ghmre#SSm z$)DA@tK_Z;wWaq@B<2MY!~{PcKqFcHX|4>_b{Je$5q9;1cz-i^hYnl1{b@*1&U_Z2 zqW~Sv{0?g%bPKUIQwK9Brapi?w^jQ1vOz8?&gc4}UC}eL$@YxuceC-1?AP+fELt{D^~E-H=&VfXn%3evQZp6C!4P9c*MvOU zJM!bc@&#j6lvq2*<@pmt%k5at|;Jt=h|T65!KdE!S%9< z{&8@TlO@i_c(D1ymjR`wKegixi|d?GU$TD9SJ&eh`hA%xy*X|H-g>833wpM_4EqTs zmEmij(CbZuBWi56e z#q_xek!*NjA8l{&Cwm(?KQQk4miA^{9bgvwATX;5?d0N`Zk_YM&N{|ff+iY7go5>W zjbji(6B=`H=#E_M-8$TE@GJDs-trGmP(Fo(!qRRpmd~%aV4UpM7t9>&CXjUM2T~>_Ls6 zY}~951nY^)>86nm{vYnMBUHZE$y0=uuqBl6PKT3jKCZgBF}K~8*%Yz@gh~S5IDGZz zeW5!2wC8I+_zEozU(c7!TiecJ)Y|6VxZRfz229&&(3=%24M)z~8vhW>(OHczM$viwE!bPZieC%uuZH3?*UJla-4% z7^sncuErAv#Ng)`eAc>};O3MD=d0DQyumQJ_3R{k!{tOaGT1f6&0>ApGh%&+=nb%RJVLT2t#!K2 z!IEZby-OZukqHXefq7@L0}%QTDu=&$#p!8U zR~a{MKVI8AcF+~A@;nyZbujKp3p2f}Q#5%FhqL-CEl$Pevg|E~_JP3F;R!Uv!TIVq z639~_x)<8;0Oa0g{If;5G=f-PAN%dExez@D!iBlXxUnJP->0dnnz~R6 zOR{;de8R8fBa#u{tq6aZ}DH$%Z)h-L9BnhZC$&K~QBXHWLp* z)I|T_3HJgng?07Llmb52MhuLBLu1De^VqKWb>&?~{k;XsFZ@_`###JE8j$)^M;V_d zDVuZAD5qxPd6{Vn7Aht}waM(nbc?xVyn4|L|KN(aw4Jspu{|#Mx~x>ycr>EXfz0xp zW%z)6GQYO=u8_AnnxV{WVa*^oDE8!nxkFDXCN6qOgv3(0@_%=-R0ddRV$sGfIG zxI=i_qdVS4;3&{r=Dk%CP)I+%E=AOMaA?7=%$cyx;{f5gE4ELUDsxXrB6N_nY3+}K z%AVCSc%PXnem{0AOTqm{Y%4}i9+=9X2E4uUh@K@m^)lj{sI@oxT_KGtJ^j5|ginM^ zL4G9Xv(E`ip8`x_A$lIWmz4r@{2x%h8W4-?9=#)Y&mEyta4-x`r*u2S(&cCR9TQdd zIeYSisG)u8-^Pn2UkY?yuD|5=3~2FVI+W+^jKsEZI=)BjZ7=EeU21%n!{w0|xSVbe zOOWd5(MrH5w_|Nr<^s<=NqG7=m4}5JRy$DYtTH2<@$2enZw)j7A`KuWQcCXor6^|BtjnL|V`E*;D6VRr|fc5T&S zB(D2fiT_-hLD=Ad8eDLMy^CND@QM_owg`E?H`i=y4VsxY*s7PnVB|0}iXRdKEtr(D z@I)lABu);th2!w7cCxQ5WcJAp)+l#D?>xF5e9)7hPnd44y~j@N%y@eEe8)7G{JPlY z%tyw%&%Xb|-1N%daHU?%!yIoc+W5CMuz%0t1Jl4kC}2jTR;laGF5&ISqum0(o}>vL z-Afgf*YH(|^iT5-K%)ckc$rr~Ly5r`L6TlAU6jJgIi(>9#E=Jga{Yz5&7?JO@5u#l zs{R`-N!9wSa&?G7|8Y0LGw*=VgvA(G7PMen?4vn3avm2N>3mj4bX^d|ANnHYV*t48-otH;!;#|(2y+$P-7S|T(z;p?@QvbP01y5ii+^n(P#xSm8QYOtsc|7 zXuPW<(E8=}_qvKo>M31!&0BabS>HK{AU9R-E)02m>4D>87B|7i-7tTMi6}xTwBrw& zHeyeB6lT)EXnAV$_~JIT%5 zTn~8Ck2+>1BTBuZ$8kcPC4tW$a+vnAnUX&Y@T12+0%XVQ5|t4Kr0{dF)na|z312rAkcQnrVr#+_Nh3idYF0wzLQ=!X%=*^z4PuN(b2eW2QTR zG;Y$C-{$%nyvcHmlf6u7o%)xYuPqIyS@I~>h$}!;tGADS1O`h{HC(sT*zAt}euPNv z5*L%p>EICB_+=7Kh9zq&^p%q2d1qs%tjJX`CUVr(Dr{1C2vCGC*`qqaaqRt@=rXi+Ba`-FQt`1VN_p<2WD4^L3Q-W1B< zhEJ4?7xk4fjSDlGh^;K~Y^+~Q3uLXBFTo#fg&zJkGP>jx&|5v_j6aSy{dRL+nWf&? z_kk9VBS!GyQ9N-~am;8*0o z4TB+yFx&t7jv$CvHtj)`IOgswBGv=;Bs_F+;^r!!p|*-_ZG&4pXCDc+6K5$%9*xkm z03aI!`9z7Wp^}}S%gxXhWPl(-IlwwX3JrIa{{a~ZzXy63=WXi+Ahq`Z5&HZf!nJQK zVj%nvA2~w13@=yonavi#%N@qz@BQ43`C6ijGXhogIeLZ2gP_)M2X~pR$t(xQ z_a_DyyX|Qd@`R^zQG&H*h?27w$J^+y-&@v?hTd=)DTjrLh667M)J8#HpS@Pzv)n%r(MNAmsVF6Hf7I`J#`sd>Mez@y`ULhP*LHqA!oUF-(M4w z06v967m@e0{cKyG;mm53+i-XD4K``GRennI@&3Q9q9KaV|C@PdyqQtN0k%Qn&eY4B z#kb5V9&jrcN;xx&C@UZ!kODF}c_BljER|?4n42~FXvx)L<24aOOm&5r)PS6@u^JGk zO#iWY2}ssh$dqP4X2(#N_|7$6)qjk&Z*EZ~zNC?2&c;j-Y#0*P6$ptEhfvkYR8`L* z6a$vW5Cuk~ky%+;BO>WQY;iR+1Z0~=9;&N0bx}`8h;n1h5iwi^Cmh|gR@RcWZ@nzr zDLg=MqZ)UpXLFvM3mRol@az@3k@b5e!-qrYQY0J7B^(>`YHuMIOy6X?RctdV8oIyA z${UGR?j67Qk>}62rlh)r8Yl{X0{&qx_J($fk=0$8nG^)-#&%OzGLzuoL^LTj-mlR{ z@3i|?0?ZReGTYh;6m!d1<7}M7+UQQMu7o=-E94~Iqn~x&x1%v$Maba7q}axWj9eq> z{Zo0}WOjl|>J0Ql96chMgivA?jI>U@6cHcgv?*M(A9eq4nd1MA{Y=EnRJfku(*!RN zF(d|D@q`X-Yqs3}r{bw3Mv%x1*k;!~PX_m-Ldo6si-{~l2?^=HvzxgR^lee)Q7I#Y zJPeThW|y5DT`3QZZ`a+Y=2gZq7-5W;0z0+H!w5OP;OG0gIKv12&qe^(QAfHqD|UJU z&4w7#5jJNA`5U-A*z@9TYGjQuhAV(Y?2m~6whOKBAKur?8V&GJDW`o*# zqX>B5rtUGHhc9!%-(8C! zx=n%ItWG^^&4B#vp;wj6Lz8?D(JB+JjB5>J1*(0J7h_%22|AdDm#7yjs-&n<3DBIO zS!798XK2J~9ahrCt;zH)j`7mxCLz-YyRJ9&0TIo#>?B|S20IEwWd{io4O*lvyrRCO zbqhpf@`R%>lKN`^ZL<0lCBBKto24C7;8PO~EBdIgjRiV2^~&b$?QbxFg{0bR>X$80 z#kf9Q+x~EGPGl#ecCh~A(5ja!j$%eyIj=nBe2KtXp_i0xB{H2wX`LIwp)VZ_c_^Q0 zp3FKAnK7=nu~s+)t5M^9CMQ;L8N#Z;5KZ>n1`_sg&||5vB6H$%&>6 z(9^@kW6f>1TTcc^iLj7>yL6aCt{1m%Y#_}f$S#TO$^vS1R%bjDc}%`h#w=jTjf3CE zjK<3*6Zlk3D(IdtWn4Q&34X_TjW{=x;H53ER!w{*f%Oj)z``~3%3l7<2n}xU=%h-7(r_erearoQQk>VIPlJ zswe^E*yJEKr^Hnj3HXp;-a+j)m2Fjlu|ne|1lEcvlX@2_kttB1|QNX#s{fDvqdcU}(R)mS=`sx}#SfPsBOF4w4@+!Sdabt4N`S~$V%3?D*f>$dr#Sg)pi9xU1 zq{p3Ksb}3PlpHGBwaql^8X)a*6tf4ZpzBCNU9kJ<@8nKmrU08MGtp^Kj=0>b0eihQ$C^v1#}X1^%X>TfAZ zE`>LrG)RCo0M&w?P*aI6L|Wrg2KTXX0xPZvIm`#5z&~kA5K`3vM`2u_^Jp6_2X=Gv zeq;QLPybTHrj){@tnOk@N=@_pbF)Q1+1-$Ux8Hm!gBYF#RC`nhok~(HR-DSj$_3ri z?SwM`2PRI?US#$vnUKBq+A1&2rQ%%w+`V9`_IwJ@k!J-azrsXTe=jS+f60Xl0WLx1 ztFX~#2wV9SgK+}W_7sm}u$TBX66Wfnv?Ss(IdOygd-x&AQ(CpS)G~=lx>GUDw0ao; zCxVUF{f9^M7r%_=8>kYdVA@D@?81C8rvn&CZDh<(=E#aHJ-xda(?LEGTr3;H9C4$0 zHD3tVjja-f`Pew8gTAq)`-^|}=$2Nc4*%zYEy=@k+F$~W>;Lh|DqhE9ZZ~Aj50J<7 z)EqL)CebqF){?5^pJ6>;A_K)XdT~lsD8clF-8v-Jc{S?XxXE47X!+Pre6#)ne;Zhi z!_22t;=zA@+{Tk3&y2uaF#d+KoGz35uuFm_`xdbA{vfmkQMi(_E`i~a(*CE<2MkxK zv;u|A8{gSzGg{`|PQ_?uyAqf65zbolMSv&AnxtRcASpyJ2>S=?j66r#U5tsTpnv`C2?#gBfGOzkTY6OwXY-IsV32YC~Wkj3s$+ z&W>6Ph!IEbC)sWAXxN zpZ=9gnKi~Nk^r?2bAKTcP&H6Xp;%(s=ZG_Gm8z~pk{-=}rk4R?f^QNyFDqrIdMOQ( zlmt3q`Ko=%?+aU~TVzEsGN>!STX-&|mG)I_%dLzWDjS#;zp(LD0~GdYaqNAwX8UUn zu@^``EAaNI{W*MNt3@E(aX*mg+Ka0i*v}IFPZjrnzNfAH@91l1VU~Nwvad*uhbJP6 zaj5_Vi{`k+?GL{2UiTO=Cf}>+ zBKAvZZ(0?wR1<3RvP7i8;H`MMZ?@yK!!GBZ7hS{N7K}KI1wId7Xq|{$vnxj4& zeNLX|Y{uBhx4YzG!&pLLI#m|p552ZTKLi${(jE3{YEYc^+9p?uPSI>qJ|)$GpCmfj z={C8)Q+0G7D=nN&1pajtMS=^OU1t_WK9_!6Hm^Kql`Hxou~UE4e2XZAtc!M<-wwPx z7jPj?8epQcv!$zO;=u&j!)Ovj+CEdO7ZYVj%IBe@^iDsUas4268mEw($1vJkP28KK zDDYSUPES!Z`Qbp%MY7`8@PrhYxRm2}srP?QZwJ8+d(IRqUZv2EaOXo}x6%V5m*hx& zC)s$zZ2jTEV~tqngECjxDLQISo(T1>DI{tqH$Ov^mRsw96mt~FiX(fHsSS+m(nj=6 zUG&bebAj>Q6LTw)9N@&tU@|5moFuiZ+HpdFynwrm(VyJ?u?U(^3Mf zDD!sxT+GGfCW?FAhV-=x%_5zWY|E+Cxe~-102yF#1l~ekJLWYVQ~e%zsfYZKyG+-g z$zw0E|gLIpA#!od1X;65FOK~{dLJ&fANq(BnW1OKbyNjI$1 zzo@1(mL7Y*`_6qbCh4oQjA*Tpu005qKI{=_kDkg4*EBFQ-HVo#0aAgVvb6vVeFO(& zwqHkO+j{Fcecj+*H;M*Dmc*7&&pKRFdmnpgc2 zq1=?aQHM~>uaTQ^Y}Cli3>Fz#cDFt*zw#>~+MoTZ0^9vR)Cj}neZ7wCK7W}A(?gpG zGkhqggqVfZ1Bh`O46k*xSMtgokj}s*6;4i*ncr!js*6c?e3LG#sy1!^I3mk;T|Q!9 z@sp(e_{+#-WOottyk#|87~1)pA)A>2^gDd29;#7kpy@tcALL0ud?68%z{FVGnmp^U z()Xih6foH7+*Z7OO>ZA?QkQf{-Ff?=myG_i|Cz>nayg%eY= z;O#7F7rcQkg>nU>C#KZRxz3qiWeH7my1`hi9OF_^4cIEodNd4tG_xoQ`E2abr8C-o zD!ppAqLW%E+&6c;o|0x_tm9&dKvEr=;|9A7fvK-wLx(;5H6zC(d8Ub_SZ9AC(tvoM zs8WoKYoSHl=A*}^yGicJL=K74g+)?n)Zp9|82>6w@&sm8iLyC=T(=?Q~jrWu}|VoyB4!< zdi+y^s40Z@-vZ8I-7nJ?k)F#`zODE=*e#GHwbIq@ig!zvD@+yY5u7mhFig8in8m-G zTgMHlFtA08i3#hLZ@UXo_HpP_%j+o{fbW>MFB^8>Lm;aoaB| zY-Hx@e8K;pPIG!niL6G~Hyk(cjck{9=r4}?>SRNx(8UT!feCHU8JU?|hr1!4kIme2 zxUkRN&>ZvZ?&~8)?K*eA5sT_&Xr>1X@QY!wva-d#IvUWQ5AM4Ea59-cIcAg&Dx>Wn z#Tsrd#(&j@0r38KI@`q0jEqMVoQC4xJUoQ4D7D3Rci_0Pm`o%^O{sfT)jDzw9_3YB zM>`q7Nz{!-Jcbn-pNZkI2T$pmPtX)ys;JucL$6F)Z@(9UyP#}n^x^tsA^sLTQSb*> zlfe9x{oO-|xEzApIK>>=VgELT@ru!ALxDXyN9-~cTPMDdS!BPQ8STu|E$lS7_g>ns z!L8!1KiqD_doEG5Z;Yvq6TkQ5>tBv+$tKm@rw1a!5~LG|=RGjgE6h5yc){lQx)9Zs z7&+q24MFlS=xqE(SYA8)ByRwoV zwX(Ou*+m37q-PAlO}}G&eQSu|H^wKL*>pY{)l7ZQ+~zuDKd?#i^wIux>*6Cs>_9W8Hf(C|tWhyr z%D?!OOx-nQK0wTU_v77(oZd{qu)evxs1S%ZzFzjcC0k|1^$tzcM3$X|(_!l{_w2Ff z-?^W%3_OuhKP0dW9y)WDYYImv@1B}T ziJztxuKo)Zd!A$*7fCZo3>plFxJwwQ$8v?E2Zk_-JEK$9PR>_@h0uwD>*1))as z>@a|#$`d^;44t7__J7Fw%BZ@QW$O@v1ShyBcyM>u;O_43?(XjH!QI{2xWmSSySvM` zbI!f*jW@oZj6wIVt~I;Ys@YYuP?B_jlu+RCVJ#kq-$96QR9gR1*@2)ssyWaL4pqv5 zHyD?25I0T57n}g|yYzLOUd{}j$YNUKTWI;q;&B6%OT!YLYvcc@r`$%@6ofJ)_v;-$ zroBSl_wy8>RGLk)$p9VBQt>qc^>T~9&`RS2^7p?iK*hV><+?!$Y}P<7mYX^ix8*~ zr?|N}u=hZuJG+J7XI<4z9A+A<4wj8m4EXw2RC+whrGT&;qFyr-G+>=9V$O?P#-fJ_gY3UM0r#Js@L!w$OGW-; z*?<2aNZR9A(zg3PMuyDdM?YZB@x7@d=C3LXN@rS)H!5H9At8uDMdInxULq_s3jF_Q z%_MhIn;B6Y3?fj4+zoWnjHoGl&fnn?f5&Oc8g`ni5rcIbfU<{`b!1 z?>Zkb$FJiy^I##KES9bjS8WCP!63ViQnfXOMU%-;OIrN>2|tSmFWrKZ(9O#mpSER( zVWt&ZQlbpQ)#=ZfV}9?9($exSqR$?y{#2f7PBk-lB>lJ|whHj zB7UO5##j6$cvP>Vpl``>ZHJCalq^fgx&%H$SqtMQHKXKgYcesp{S!6eSnV$y6L+M| zlBuOcsVq6}hVkELBR57j3cZv$IA^2PfvwpT26N6)gP(5d=g1>#4CczriE7Ql#oDHm zVT^=Xy46&_9`^TH5(s3%V`kLFfm1*()jh+4FTE<-P8}IrVwU0ETUn^+B1~)On?0J?7?#D`hVC~Oi zjl7Z+_Ik{ph<{hY9`!Onw*W1w06$oyhcTQ8%sG(;fU=Eqnzjx}u*k8Oasx$`{8JFA zl~}s-e9!}LkJNXG*2f~>w6q*{UFM=9o|i|V7epY25{;dG;=+tuQZ49NoN#y>qUHvL z=gHdZ%+QpO)ak`>N(0D!quU4#idXO1yxNjQ;&x3Yi-0V`(qs;3^l~VGl@i?)*(qyJ zYRjPZBo+gO`fGuDUly7I_NXeHEak8KMwvvJDqp;$NCvAel0Nr-&T@n^15374mm)H3 zs((%`i`l!T>>;g6eijZNL2z9@q+?NnbzzE}Zq(w}*iT1CLy{H2>rqAFqEfutUH|#| zSS=r0c-r(eN>MFg7@^5{&KxA;ngkrORPZP7HgY5>2GBe%pU{J_qcV?BuYNjug| zb93gW-^R{2s#(1JP)FZOn*BPS8emoGVN5Ziv#li+; ziima3hZ7G5AYU`ONJS{sOdm8HeuKa-2ILPrQIj$;z+*+6naLuX)usZsHF*WJ;IKeY z;U0CX;Bgf`QTI%mFtclbTnNrY&+3~V`sImkG=pHNE*?>viJrpAm^WxPL)`L-F%lgL z=_wpY_@_zYMb!%~1+xCBbTfUXcZp6Jk<&mr0-^WN*xMODiIBSNM_IDjuAYk-W)|4 zVMqAOz;p%_HM257uyhh8=V%HB zZYz>?Sb_cpxQ2J zLkxMLIu5{+)<3JIYjg=Y2{ePs<`igSidxee^#5X}f?H_74C60@!83F8@bV4l-tb3Z zB@kFP?6mRP_`f(N5&8Z_4pn&LI#MbN*%!Ax7KNgZlQl zD}1I&Uv4E{dVxJ)1phbbAYIcyABW^DZ#zfG{U{ZSmFBnfV6~8uSzDs~)>kZG=jqsc zALgii0+xoPI{tIeII?aep6r4Mj zb-w|&riocAkL0SpQ6_AOx#fnhW~MksW9d6U+oyuqKXg4D{rivPk>V&3H^i48U?^I@!Vhlc)Dmd_qV~|lz zV5+QdD1Y*1J14+7R}W9jp@`gkdl zS#32hLR175ak!x=7WB{#|KYu7)S3YTANdHBmI(gS7Z=*9fz^g zPL{`1CY$wd)xFTC=7h3P)3r4+(*N>Ep^Nl>4!Z0wByoI?NmSBkd`8t^YA;vZ4D(3v zjj)rOgY=+J%03UHd{!)qS>;Fl=1uh@fd;YD6b+v~QidS?997h2>vQ8@I)rnt=0Ynb z@gR}FzxDl}TdQVD4p-NH{5Rxh^im3qy73!rrEDQHiQF7OZHTV=)Q2!26ii4?_vtHu z`t&vjV3%kN=-=2N*b-YlG${(L(9Pw+0Q2LAMt~P3VJ!^F&d0>Z_TTK``>p6)uOPYq zI4l(=@+pazp%a>DJTX_;QQ5eM_d8NAGQBCLesRh8oeB{f8)h#*|8T{@f?)c&sJZ8t z&!1nB6BGc_oN8IZf^slUl3>tbVgj73PA2&UsD6s;-;Y^gR$4@d!)OMCgMP%066h?2 zs;w|~5eo@Re^w~236!*6*`)~1xNhYL%8SfUZ)TYFAFe^#&m>NAsZmhd7nO}g@86C2 zMZHmth1U2+l9iKDY!}t?cSll>!*d)aeo5CAAQ%X^t zQ@Ave6mt48jVKZ*#Ov5?vYn$0;{Fx-)#Mm8MQ2L-K_gqpNt;lk_|efZratGBh&l}N zuf?Cls3aD1!C$OGVN9wW^v!TMy{!0sw|etoQKW>^k1A2M4o*d^ zHHQsp;|-QM0QCWZFcf`P{0?kVp50quTi- za4jqNlJc@1dcW0qqcRk4X;N8v`M^AjrDsy2uhqFF6`$d~7!4cirDIA5iA_yQ1hu5l z@Y7CkLfABp1cOFxwiAYFvbza|oLh)clM?fc%tPXN?u}eq2)~t?D|$P@e?hCZl9u}_ zL4;RALD|bd43%&T``$;l?Nyn^air>z8?7PRO~mS3nUIX2Z@%QzsqB8Ems>6d6vMi& z-J7>y14ttJYf9z)E`jzs_&x&(!#sj7E3X_BoJ~KgvFuc4tUi|u0FF>wSp~$Schu1b zGANN5OsP7@c}xD_9Zf4uD5_W^iIa97nQJkH^&= zn-^Q1RcZ(mkm<0U4CSTC<~bzm->njk`lpYRnK4 zuc}feJ@3TQz(>>U*6*str9j~HH@U_>VaVld%zJV?X{z^Ga}0or9bAyr4rk7-u`zs~3K40S`3dQakNXDkQjp znvIqBKZ1l?MEw6rL`EQ4_X`~{H4-I9hVG6pTvl)4Mf}8y5Oax-c`+IkV$Id^Rt~z* z+v;TC*8J>}6#BP#FiD%3{kOjFKQz6WVqj5ze!}hMMMl~TB(eLxK_-Yo@PSA10{%jv zf@uGQ?sw?`Eq9s{m=5MVxM=*(@BYUMze{>G8vni=CD~r#jd2R}gM*r3FEt%C?Sa{A z7-f|ddR9h=JlW6@GiNI^Sy0a{qnXIcy+R18O3%nAjfm9z=m?&sv{vD98Ks`1yeS;s zeQDy(mbet271z66@sWKUIfu`C)`4;r;r*U82 zE|r>u6wmfqivCJtT+|D(V)!tYtET4whzkP5sAA0$0~Ze|uL8EsFl^hIJ0PCld5O$L zPxJKyb4Cj$GN=O zw3P(&OUih0OZz`Jqy}}GFn5(JCHwg{hQBdY5Z-Z|%YHM?aGjLP-cj>;P|*cpqbwGdZ$~q{ak!jN_xjW9$_}*p4S@BrdrHW{pqjwwoFv{k|MtSmLjB=5wmWc zgyQUG|J1B}EQ11Yb-tuDHNGu4O=4*J*up8xS@FvXq508a?Qcgu<#e#UkfZ=j@jP#A z1lM!c!O>^F{^qu|DDW4){|lqPl^5hV2iBSc01KFs)GshplUs!*EanN|x|dwx%f5DX zvnb7zT%uBTARQKgjgJ1%!ud!DDIGh zq2(qzeWEAZx<-aD#ksDxwJ(AKZ1aBc(_dWO(gdcKAcNj`oxXl!=>sLbkC8vP1eQuu zpx}(x^;V9Jnx;cNi-X6eVnV@kbEm2n+e!c;iN+Se+!H5$QsM1b@RXPG2(=wDI(x)$ zIlBe#S$|Q(lwvizDqK~CyTv(H0q>7ztive>gUN{OY|8NGrK1D9AY{6H zOUUGC(@o>w)i4^HMg!S`2LtAlTf9;6&@zW}W@lcqyss=KcW~*mqjH)1GJCAPTCV8r zVaK=xVfd#u*z$|zdf^lyDt`0fl3&!^e-P56<4YF}n-@!D2%9k#+E7#$dXPVlAT8w> z34;3Eff~O6`MFV!0vG)7Y|GRJ6;;5(!%?=Z_2Ik(O?H{;9~Ai&C<@fLi5#Y1l&WXq zjE6LFxVdE%8QLLf3+Rbr&2R>t@l6jv4GYvAcNz|}*Tl1A9TWdRtHtVtqAXVP4^>0{Nn*EE;`qEg%wU+I(a&6yCWcbzje{{!S6VMnA4zJ% ztqFwd86X2jhy5#eN|n#ujD&U9Ad=v7}i zlY<7gX>2cQiOZ?c~*1^TGM>pB-lX7Z#HtVHpfjJ@O_jV5K3w`2#j{G;}IDSGJr3sl|23U{s@#P5h>qN|oZ+5_}i4)$p3x?E@t z-_iNQ$DGVFBP7nKI`~Bm?ima%E$6U~Yh-Rb^7yT>;0J)@ko~ELmkM3V za{(p6zzfv{*tvOf5R!Nd>b8kNHB~@5fm3$y;a^@PrmHm}Ky0QnV5HXODUtbc|4mbZ z2hF4w8wRAXIZ8WN`A0)ftJQyo3gwj7^Ao8xzwLYrB0CY8V>2q>-mBGNt9$>d0Hnzj zc0qL68ecNA^vGHOl#_O6r-3XlS3q8{W9mB4zaRwzcRZW#oxZ(8;;x*rrF8c*fe!CL ziW9@+Q?0rVIaiWjQakNugV|~BtERBK&#s_)VqO9t7#miDUl(1UUOpe=$}ct%kA$=Z zYuCtihJ~189a7i#-DhE3UIUa)m+Zk{TpsvYS|F)Me$csEe?g?E44d^jOeHpmd~xc_ zSZ=9TML|pCQF&pswO)00m2F=1V-r4Qv&U}I_O4NCiN+b-93Vr>0fQJ z1)^8Zch+-`d?GvN)xSMD%ko{gV!uTl>V5R1(LA&$e?S@)&l|VL404`rDL5Z7rEe{Z znIY_60<*W5+1lKT*bKD1T)&pDQ9Xz$@QB_?8|6NItY^|5#_wkiadb>B?A`4`$Ske1 zgeTw^Ci&(h=KiQR#oXa(kI(6%<=2<>CpN}ZC@+P_uw>`@{@#DI5_Z~_df@b$IFj~F zCR5olfWXMd{+)cg4Li^7mMindH0BX=?Lz*br{ELiJa=V}I=)O45ncbEUtf-bWp5|> ziuNQ9g%1dUS6JF2D!EHyU)#G@!cc}{wwHv3iSI%yA-LkAPDFEv40MATJz@S$zSd6_ zsfebr{5-baN;*Ay{S7^38TIzrbBvNwaR^Yg!OF}WRC=U#o@XDV)gSZ3n{m-kJJ&3L zx5z|FR!qk&#rp|1HuoZ|X+ zsLmF$)ngaNS}}9;Z&?l5IR+Kn{(jC%B72hRX&<(dCx;6O}uLAo40I?WV1Wh zvOMEd`n;_v4`J(U!x9-E<10BJJ#R3mL@o1gUBDw?{nQd zqGfnzDc->i?^vU4{<1&}VYRaNDq@uK!Gu`@t(_r}+9Tpnc+i(I=mk5kY6@S^n}k~DQLCGsyfkNd+~EhbF< zWUo(736xsaTcuKjz`md4UYQ(ZLyd9BOIq|S-p#P$JS3@k z)-wS|q^{P6h{{P4iW{tcFfv${bNZq#a~I!urkZ;|4WNF^d-zKS?xxR0ZH+$m^L{|c zzZBNE96Ydj&OqZOc1|47bn&=iEnG2gM%5!!0TzVTIN!h#Oha;QY`@gs3RKz{e&Jz7 z)0^V5l80FE8j zr$)scKS3d3*tsPdya*yPs12)s?}$bSA-Z?x7Aw^_wECt#a$7~_^5WwK=d^fb=Go87blO0!jVbEQwmODEDGieR15>OHejuo; zZ*0POWbq{Z@Pm90!TX*?(Zu=kJD&7tOCHq-+^SAnKG!I&%*9JCxjO8WWx(b2la*^? zt@P^EXOj}_v|I~E^eUKkEDbQkisU^!U)QWOeBQ?M8@m?0p^PCoO z;JuNTfuBYVUA2>%8oE!g3z6dmnyrN3%`Ir42Zfk)58mUo+nwgq+@%hOtdbZ7GIwmz z>((N0u+kFNBzqu{y_%yJU*|S0La)^iXqft5FM{gWv^;7&#t5#aDG(KMYJAL{xG=6& ztbgpmUvP7?-?zB;9+}`X_~8AkaB)95!HP~=N6!6D=Dj9*WVgyXjbKTJV~bS7rSs+P z&vWATx-cV?&yv55OPzZn{eH|-)Y4I4a98&Rk2tu@W zf*RDMZC@^I@H4j{-Hi`#(~TeoTfHxJ>4d$Qa)7Ne%Gc1WkIRHt*D?g_2A#0wV^LgL zigHFMJ*4{DdE5zajk=hhIB8df^xpOVI^dGfa(kclLF#Q-O&a{J$!>=NFbv(+>7~U~ zt4R7}jfNDB7HDA^l;PTsTkgNssc}h_uA5)kYwl`%8Z}wPwDx%QZB~sjBKMO@j26G^ zi-z?fxEY(9E3I|fu5CepSs*lHcksq3C|z~MI@%_0@Qg*fZMP<_)-p4}+gIpW&$JG@ zZX-L`;HsFhuEmtUj;4djpk%exK5oFoNgs`2 zZ=JvV#sS+Km9NIajuU$9F;#%in;bs;_RRqfxSj6MkCK;#E)oI)9@a)=Mezq}LEEl7 zI2JLf!{PegN)+@+gZ8-#890q<+?K*t6+Y$4_7Vf4XLI29ovO`gdB$o(;VJ zs4@lEUoEBywE&!6jYA;Y04o=ktvJp$+>rH$O{`t*v_Qzs9tUt#G|0`Mh9B3nXFm=w z!-$JbT1v2g#F;Ru;b=5=$YEJR29bdc$H)eBNT51quLnwDdx*I2L|rM)L3W}rUMeOC z%%3#Zm11LZDTM?iK)_jEv%g6aX)8b_S1&9w*<7}Q0dw``o*W>QmrU@2aA9#;9_wK} z9fg+ol~#35!9_77AGC(b!x0~A?@az>kpC&AultlO(tMRQ4?oZpl}9;&Dw9MlyO9lb zwsSv@F)ggkU>L0_C@1zUS`#Sz#&q_SAsD-ca7g0aQ|;F$KWxbh6JmaqQpew-Gb_xO zNYK#0yoIU)^Ne#w*p2+&bWB(0BuMQN8d3j7ek&pJtA=F0Au@KB;%Q0x(d}<=6nHO< zH$VHKWgU3^-g#M=ol~SS7n_g+4W&hGYVJ|s9*bXH=X>1n>5!tdXMPzA1}Na;g-Cy9QO%T|iVA>lvZT6x&GSnNJIwifKMC&m?pJ z@~W9q?C@?lQwmrDmHnw9BEB-Wo(jDv9oyAY4x5Zdjuv-Kha~`{rdu2$pXWTB?q;UG zWx}U?Oe|j-fL$HMXD&im!3@kZz>KCbhMxXX7oeG&_Z#jd*Y&b>>V2%=3A#Zcpe~$7 zJ1CKnh$K0X*>+(jI5?P8|D;9FsJ!5pb8}Ep8N+zT4*jqvuDXDOG#i|fQl6E0NPhO6 zij0`?bgW|dD>1F9+|Vwj)^G=71u?Fz6-j-0AiLC<;Im$?k%4JL%FykOGq z9MjX+e8RkQgt9qkaNI^{n4GeU0eI4aot>9)$Ay5ynKhaqV81w`e`N`VJ(|*^v9jG{ zC{`lina+K_{zia%@i91qL9a9q3|`A?0N7=OJAaBLO`RXs-Y*}0KT|#*d+)mmkI@_e75{p!-=lY#KSaNwsz?IjS z(KrgJ@BR~1?sC34`kpW^;a!j_7*}HlLOsn*V^Ej*xW$-M<#(Yy95y9exnOptpbWw1 z0kz|{^`1t`YiP=o{_SmO|8XPR<41^5Zzu{#JH-(P(@T}Uu(uSfRXhkXrmrdVf{i+i z>=ZONpIm6NgG+sglI&rAvP_jaK3vM~qgL@=3Qt)eEHEqYyKfcPL5X(1zoVr4FBNJMOsG?v`TE@Dq1RI&&aM zMgv#D}YI|QEY9jUOb9)^-GEV@ojSZDGvry-f9Qa3En!V^OI4Ayf}MIqF&6dgXn)Wq9fVkO0mxEL}UO)&3>&Zjb+A_G@O*>O&Tpt>n=c zUO}9VvbF-Mo=_~cOcdf6Q9q|cal76$6q6FiX1lX^+}CT4S~}j(7(rRk3Alz+)BEo5 zAKtr!ugf#~s1hYt@`K!XGLp7Y+#B1(*&e82(QYh@aGccna;C-5r35i`Mp~|6)7#!N zrn|s()yxjQtOw-#sPj3HTn*@g4Wl2#iV@iYmHg|15E46KXRPSQF7W>Yv znvrJsos8$ehb0^b8Y@4*>OGstG6*DkW;Q&*u{3DEsB`tyGK!eKBt92+gZ(|^{TC3N zQ0FX$8v)yo_ixZUU}xTSk6VFMp8W=Tq#iKtGXPUDJ3v6j=p{5v@Y4;YYgx`X8%F;) z%}#PKd!L{HQi7^z=KxIHZ)0fgCg5c72OJF{Q3z!*~E5u*k;**qCJDy$&Q|o zloc>8R(O6xMJ97s3;e?z2G%wIU0H?9)qCy^_Lx;e7dY)-Ieu{(sJKZsJEw&z;{lt- zwLeU*Y_Sy)QvKHn1yna9J3VOgeBDo|nAT$ms!fR^%Zl&OezGMw7Yqm@*2P33VQznI z)KJMiP>9$A(5sea0bo%?10jQcB)QXR=8NB^$c5BqV0e1n*?t^K>_&esU>}Ti%ZFyo zK_cE&@^gO}IJRY%+>_xSV}YCb`qPxaD+AUAwc1*I=R{&Ov?yB8{Ke@Dla&0TA!e(c z_T{y`U(vYKp&L^!ixB#T81KN`n5gY#BwY4bW}pN-BGxhFbhBMSRXrAdj%#S>3axV? zFvqB1Qa>xpd^o4j-EWdObG;VP?+>5Ku!1GKGnBSoZH=oV`ifqw>DcW+{|SsOEKCq^ ztlYH%ZHP_NprUH4E&Xf3P{||q@i}i8Q<=>Sql3RrELwI}oUrT@)BdVK-t#S_q*fnN zLVAEV$IUF;EA#zMsU00*KA~J=Q~$z=B$MFYE03m%dl-?=dPGPJ?N)j;Wx9rho}D1t zy`Q&jU2K2-5Pykueb0TTO77$BH|L91$ikPGJp9E_Dho6&PqU~RT@$WuM|hSnuha`# zK$gMAR*(|;tL5F5qMtjxnbaVV`axERVAPPt`os3?47g*)x!=r|!KL*c7hHXRsJ)6` zrvmZk&bv({Bt!)X6x4=2zI|4ofytdATf-MUQ?hW*@Zimg?@&9k&cqYNpq8~Ze`Rlj zs5osl>`5fk4_0{Yh$_Ic09!NINHF%q!|p03NAvU zs~FQS@r-kLVUSJFaSXreEG-6ZzX~KKtK^n^M~ve7(H8M`bz1_teRD0VI3t1!@L>jp zi~7!%sQm$bIb0Dka7}u*WHICprj^urRTlWioB)wEYSGLKS4rW;dgT0={> zLaEf;m5bS1I`fD_=eUsVQFm+OMZ#yCrH%1|6@lhUD=#*uhOs%*@?k+eF|1MXa9LIL zAh|X_**8~VIi69R4EkwKgst0s9zL6KHLj4nCjRjXacLcmOzgqU&`%Qa##GTR`LXO! z%-`xE0=0mrts~_^-LaL6Ox*!g zE6}`C^|ZhE0jNz(GB#Al+`uaZX)Ui6;_pA ziUy{^z|`2q(Wcf8eWp7`BElY&IeiRP4zJ*iFott)?d91C_|T`Hm|%8 zsq`Il!BK3;TjoBnx;ig@ZZYYg?+d;!5e2%`7{Nku_Tp$ckSg%9PKZJ|Y1aE|E27mu zlpH66)jF_8I2M?=>Lhck8 z^8nHh{Ij@hW}Owc$t)Q9RV<#P_mG zdr-N6c9?V+KoFx&+>0av_TnE2yFHO(yee7O_as~_&|Dw}+Kajl|4LP5_ zZedz7E;*qc-BoD2+qQ4Y(-KL{gGUEELzD&nXM-@xh}uX^93u`fj$G z592L`T{xcy+}46GT_-Vdm6wL>zJDIpS%o!|(mD1k#u#UuzDDh9_cpjHAOP5Sc4=c+pwtBuyAW#dzM=C7<3iJ7$C+q@fTJF)&lNSh(@pQbR)53bf1-xA ztA_8dO!Yv~$Pfj{b06mIgt#EQhe$}VhG)iMWttz{y3!M#4}W12k+<89+0Y%z1GOL( zf?4%$@h2|JzhUNYaW&_)UL9aK!DB$2>O)ZFU5>Gmv6PivjoxZ)W*2?DY^w@XfpD~P z*L8iT*Bm3~j%Mw-+=4jes3F*RyZ2aQkrY&8a>}O8kIQ@ctj_W?)FCCrL7ro`^yIbQ z`sXi+xy2_oPm6UPhF1v_gUM|X^0xz>OW^(4(p4uDRazW1pTs56a64%|EpY>WDF&iT zC9La+R6(ZoM4;cGLLxk~fZGWgaRg$eW=V+uExu=a)a-g~z&xlqsFumGk$VXo1?$6h zY#CXZx(A(zgpCuzj62!onUj;y!D61zF||9cO&@Hl$K|9<$a~jI=vcJjiZy@Y&#vd< zYTL`-OuwDa2bNzZ$tCIm+QKG@kZC&r6oOq*bH14coI07D@2zoOZs(!0e%Iwfm#Yz_ z?rWky=ai`Ry{+TbM7b5uKHbWH!9)P|qAWI=bpof?OT%EQ+^V5pHp`q4j_#u9x^4X8 zlQj=hF^TGFz_C4;V_7-;*Jt^Lvt6S>_udVma0*5xuOG`V7bO%;UK<#1Cyk5R7R`+w z=WY*~F6RgbnVpN-l+|ey><3ijgui6re98Ueg%!uuzUHUWe-FG0lx z4Ibz>cZ6#}+t2r0+?by5yj=cVQubbcd{?>d!#h;m4Qk~k4_(Xlx!#5vlf~B|F#*RO zLSn)qwEcuv9PRvs$`%nlYa~WWDjB1Q@g;!?=F5j#c)qo6SKnTMmu2tEQ3dU;Qx8@{ z5jDVJ6HhE|bn}L4dGFWO26s_G$ZUxQ73-3K?RmA2_7g**y>~4ZzMV~DpiBT#1jJ`Q znA{3g%fP#S$>Tb%G)6Z#jfWf(>VrluI6zIPp^-OLc74<|$MX6pdzbrHY5E-6VJ({~ zR)?XgT@k)MTFEvgZ zYFa?X>_$)>QG}5m`8TMA^>}wd0-PAzAY}DQSKR5r%Rb5{b-PrY!PT?j5*d@1N4J(j zicoPYJ4@}Z`$rD9 zWto}GqM&Yl5+xZm7SW6pd!Kg_lDLB>vp)5zkdcFj2L-&~=#|ebY+M9%9TFtx>dvl% zV~MQ-2-Pc-QGuR>)f%;c*EUB$ug9YO?P)-$o8h27nNIfVh5kqDcl?&qo;|ZqBIOHq zv=VGZ46lFm$Wdd%01Jycbh}1)u6Q$gb0Zknjn}CsmK#kW4THFnWaIk+`TiJA#CjD? z=npLqF~FnY2ff}*VbwOoqQ&?%?udS4%GBRSG&WG(xinc>#u}~YXukcDMA?C^dUmr( zn=c`%RUVd0CAn=FqN(~WVTY@t^c!3J!`u>7wA{K=#@tmxx6O+w2Te6&d+d#F$hb~R zB5RyKj3iKU?A*H{ATwfa%;_MdKC>jaT#DnHi;<#ZY*}_fcQ;2brbZt15^+I4>}|fz z*D=9ng(o6hH1T3%v z=#M*}ceJ)&G50%m=@i;Yr25uS@tW^uJ{kXu;LxBHOMeeojK#M&0{E%8tOx8>k~i9Z z!G`N`Z#>=!&EdLM*k$trMoh%Jn`w#NGcC!t9m8a5o$&o414D(0y~YJDOw@>|mNa!K zuLZ_?)5-0rk87W+;@bMsQYVa(^pQCcL69DBjuA#U zTXkaKBq2u?96f$usiHBQ=oX>YI_`^8ZKPxG5!%w7gT_c!?>fS8+q{toD{Lf#P;Yrn zm`tNK5I<2oY<*Ecx(qefc%xQl`PJOKZ7m`m|J+NM4Tsuf<{DT+n-!L2FMuieVeUPQ=dj8^FyJYuW z44`qWy}@U7jMMv~;F_apY z8S+@N_@euXTAk5RD#V5G6CEncKnErI#(MA`vv95I^~GAssd{~^*iCYcei)C6pkbWr zOQ@IQuk#oQVxReCcZjqU2wKie0nb!+EE=9SI1uGH_- zb%HZSM!|eCqk&I>4?Z1nVSu8HR&=bMuC!n-g!{hJR7&9_2u(({iSZ)|wHC=NuZP)E zZ?DW!7HgqDJpO56P_$g*bUvwqZw@Bt<2h5Wt+3P6ynL!+v}aF0aMzooT3i}4qs6Oi z-lBQ2b&ScYj2Kf`q=&AAKj@CDR$T9GGzaFI!NPD7=(ugaDS)UT1`kXH9w3MSyehI zE;i53&56~Ye%PNNdHsn(-8+cQQV>#jsg@*rPjqyV(~2bn<_LQ0L7aJYHn1|yty6RM z;j15Km|h$ev=_w1ziPrrjFOL!6*Uy46^(u$FH%RAR~FLa2hi%Lef7rK8`!2vx5x+y zlVxaI#VK^YuWC#Qezv!-b0DWF$1yLKe<`PqtExAorK-YPQlSL`Aa;K(@)VcEBb3^@ zcmz0R)$i?vQ0?Tm;fb+=w%V-wWa&1J^b(u1SZ`$%>dGv~2Ig6c7>pG`CAqFw);)A3 zu|+kucIdfXPhI!_hnPq+jiHuiP z$0jCOa$rJMhNxPW;fZ-+Rcln$J-S(jGe@C;S^V3lBXY`7n8N3qlK5;LhUecNViC8I z39Mqm>M*uz`9W_x?=`qKrJi13R#6JqkDGNH>xwEe2O2{s3El=0uvvfb=94fI$Sew; z;(5s%X4ZQ856!X&b%N3>|2aqh*5N6F|M&|$v|I=R52#7k0l3cF82xn0>^h6}EuF|o zRB{;=2k3U=r39WDn#ZBiU!5VX&u&kqQhu8kdBr=-#&ecp_^KG>qE2;)P*lnVHm=$6 zI-cF;dwz2WmQ@g-S0C}Sc)`p@5#TOW2CHTNqBw6jMJpmYY$@kI*;YE3|1lt=ZkMSJ z_SoBK^zksiz0-dn7Eb7V^MkAN50DtL5Mk_{@^Q%2hMe^0sUTJZg#W|bq6X2E!6>H~ zFGRPnQyH&{jce>RrJjg~Mg1>nvEN4*kqYw)X?+Q(07>66eiaL>Fj_`Y8_$(>u8whpPu5mW6;$pb}qT@aDnXwFkk$uQ``k;T#*%S%?b)(YgIvh`cHCL z7x|UyyXlZF!$XzvXhb+UwraQprRm9WUTvG_G^vJp2IrBvMXqA6vbvgtiVGRD5ebY@ zgxTp~US(doeo!5n(}bf-qkNt&B@K1TBJfsPi@94~#l!`pvHzGNzqT{velUH@3CJSm z)F>y%Wtb48VxDwtUR8=&Cnn*f5W+<@U(-fmzUu@`LaZ+5FDoxFbJg0bYinCA_0-X7 zyqE_r(IC1NsH%twGvn2@QHX~mS(%;`6|_3LYB5K>IVHg@hl`7w#T}cMR^o!DOsc4g zCLKk!B-vAv*ZMQxt)SAWs8Xz`66Rl7(FU46pe%knuMQxul_js`)PGWD>H2b1n5Kbq z+QOW=RVqb zNL}0^meP#y2EXXDSAy&Ls7Dago1C}yW+MNQ*BX7+_t<&yl)FZRSk0wf=s1ap_i57&?n5}|c*2cSu#?K&a2xB)zS>)@UfTs#0Cor~#iSr^-Oy!MhX=VxfF^ z0sS;ow$jCO{;3-9y~`v?anU13>Ebc}^l3&0&w7mdO5BHx@>*0R6!1UUh$8Qk}EU@c|iZ_>`N4Zi>nLgPPc8O4sFin*& znmMCyKWy_1h7a@YI# z8EBRnXOjska=vrYZ=jJ(m*Q0zoXRhhU3h&ZqLA3Bom1yDottLechbWH zJE&AGki9=FFr&w#S30}wUE-xf<-%GR85>y`5=`pG7$=)Yaku}2v|B1^U5*SK*HmedwMP)(Qjav@=b6l)_0+w@rMYP~rkF!`Qi!mB) zljLv5J~?A16Kd}TG}7@Kkfh17xNZh5v@7kgI&ni!XYK(kcg*QriJtWD@0?w(S82Nu za;jB0b!OHqNuIUILu4+9D~4l7#c7FMSB4S(R`qA>_LMMo^nXfZmBFV5u;V*q5N~3* zCuN9rA_2yo`LyXsG&g|yey^(*8;ZR!#N{I0RYNE7^N5sjc+}CVEoo1*(wM2dO4)_e z4%Q^2t0fD>r;y<6AJs4?Umff4_X$MtN#*0VIma>X9&kwtIs(g+H6TF_GnOR5dhhp| zxxMuo!AMxHZnvIpQZ|A8@dW=xrvXPfW#^4Go|Xr1Kf>KO`gd}YUH_mh2&YxT$#HM5 z+jJjGcCQFVXr*iv&-n|a{`NS~P8+P_7!Mm^`4p6~crYwkSqQ`u?{=%}uAosgAxW_~ z+tJznRxfcGiHe4{^SZ+rz{Qg*DvfKv2~_Y?$lC;6nSB7gQjOJ;wWxp@Nj8+^Un952 z1YopvaP}iaqE*PthWB?D;028qtv16zPQ?#VEtXw4Uy&HW(1E`ZrWT|;Qd0s1^!Mi| zN%KFU3a?@BN37z~h`LtC#ezz6j4;=KTYDR7|Ic3}A6`*nHkVQi1!0X+4 zTOF>w6n?di#TpZ!aYp^_udc&3i7T~6<@1PIcqNP2_JSN`TzYk1#hvjT05@WE!tgnyvBm%MfdG2(ZAwgQM}07hQc*jmK29af zG&k&CR$?fk2|FyTV4wH_-VY%d%8uRKcE&-+&sxmSLI!4EL71k36asl|jqrmJa6u!h z`)}!sLL$@D_Y{dv-l$Su!VJ<)f=!(vQ}OD{Jr*$ze^<_07g`qi9f5KQL8n8;%!*yR z7!h<>k+2oVoyM*%Q6_d?(q*=lIcnY`ET1A|5w_Nag|=gWo)CbDDkr@HDVkylp>-{< zrRnp9LkQ~V$|YBM_L|DWQnBp)+MDcg8Dne9l<~B&$0b`Bnv~EV7yO2icBPUcL*R5P#P7l`9m2^v*i)Orr*Wq{c(z zXiBKR6HMLP{S+LIO$MNd3BpxqtGsr{7knnl4oGpfYm;5na2&ikQeAYOH zSoDi^MI!t`WfzTKr0o}x(Tg}IpO(ZlWlKUtvN=5e3Ew;_j%2p} zML zVgGXsD?EElI<{m@L%^R@b;Lewk{W;Y*+kgt>)P!Yi1#OtkBoMEU(&Z@%Ezm}AtZPc z$UjJk%U@=)cnq-Jj zH=v-MCw%KDdw@iT@M&w>Tx9F}xr1b%MDUk025UyNjdb{oYPKZT+jf`7L1LXF2iEPK zEvPV=PA+n1VO?unb~*_0Rmm#zU)+3GvOsVzX}Ekbj&DESi)(G|K+Qq@rJ#y@VFspQ(X~-7 zzMvg--@v2&p;SlPQpD$r`$^%ulMYMc@ zI)(+Ptqn~Y*zl{t{86^`wr!V_I99rJ9ilxfX9{Vd~=3 znF#=Zc2TJQf*LnA|9N{&no=r_S;<7%8Qb(gDUN3k@7~a8Zb&8~ z5=J~smPU~~tr#cY#1A`1&dpAwcKkVF=;0n{JonYdclsAENv@W-zS$uA%URHQWRlTXv*yaDVEd+#pISA z#?7_wx$Ftr&v+aB-R{rYaskxv3O23IqZrw@$!uwtzUe{ecbi#W{v2KLX+u>T;Ex0^ zWXAsDY1=U~0xqX_rbbtXs%@2eDp-8^Qw~@wpl+AS^+`V*m$RpiUM> z+;|dVy!wlZA?FKy(o+#Tg+0O_J_K-_k{ZV%emOicelRusicrXOnp?<#RW$tE;3Nhj zyU`oKQF%cKzB{(y<~cowOusVLxJpQzM;{ckjnq1WK$RZ@YN>ZfjYpV4PeslxLEj|@ zo~fud4UO&7{5cT+R{%a0osb&)&z{EEoIaQ`IAB1!&LE8C)m69mrUydckExWm34GCT zihEOpsue{%n%dSb6H4IQD~B2lBQk{tQ}Lt`!l|`$2;53@8BOZtj>q_uw-&}FM1`@q z&DAVIjBv|hydrw!_Rf?FH5yFsr#0x$A>^4JG!RgGZ)l8OVDN7?6gNUxVwGCRhAaox zVVg<4a=oi(l)p$gTt+Nu=I2Yu5aREL<(H~xa(GqrSYTFqOH`$;=bbqpR#g!3cO7N~ za`2C5?qXig3RSgOx1p2B1mY->!|fhO455pQ2MuRea9zrWY6ItfO-nbygxe$cOk3BM z(_O~qRzV>b7+igb_z>048;x)e4l`U~1**)72gZtlkej>yF}8mdXEC_!>2lC4V7#8@ z^o}QP=FJK?IwppRu%vCJdiisao^N`@gY!A;M$PbRIsWtx)LEip4<>qwEIIW@We8{o za!zgX@bK8UP*g6jUs4eN9dMAI{g@zI!NuI18{ME%E!PBxjbDD&IQ%`kd%m80)lQRD z8N;Q0DpJ}|5Ig`NlYqKU#I+<_MsL1ut#VdKC~~u@lG&=(I9*!vnG%Yu=qDCGF(el* zjRDmpX)$;Z_mFLsIx7UrTBupk$7j8Vy6@S%xsKbpTXO~(qb+b4wgm!w6PpvCo?a26 zBn)O~>o#gwgu5tiq<4hiTBiugtb$gyc@fvOUPf*{NSl(aB~>pi9r-7v7t+e%ggR@t zjEQ3Fa%)<+A_@#1Y0qOvSg(IrS8dHalU%qlh|z&qagAqREE+2XGCR5A(W4=wklJN6 ze%b`Onja2T)a$dU!Dh+Tmzr30wtBJ{H)-VRL55@Bi?vmOxPu;HW*7eOaOrZYT|X&7 z+5@k4xvuV&tyf{!ihUx_(TgW>?dDkDdn*(ILP8&C9@-f?Q_8|4n}nPdUc8q&#p|vn zfg}ff?l~oPhvdcz60)z!oNDT=+c&|FGmR@*#WMp=jNAmBCZ~m$McT|KiW?nGTp37} zue+f|xzL+2;XuR$Al5q_ZoCNR**jh*3m-%e-rbePmPKIh@2cj_^Rj;(#(InzZUB%4 znRw*kgsxxF=${)*rVvg!1T35bgvPraPBSQF^AA>L`^s(RxoqsaD9JT#LF1kGq;TR= ziI({T)EtFeo<5H8k*TQ}%ST3JH3W)-p9iO`qF5Yh>5j3j63C9&m>gc%DI*>>`InWm zKmHGx`c_=Jb+89 ztKYHh)n_L*i7>z(k0D3f)7;ocZW0^mN*|JS<#i|BvRBEbCaApM^iQeegh7c8yyTmJ zY9dN=!VDFadowG;2$?%XR}E9#20XYbZKR)cjTIMgCo$2X#2?(LvaX&N6@sI!v~zZ@ zhvE;W1*~v}^<{!wg7(vkw}PHZ(-%8rmgBKVr|?fd*dEEBdy32jDNKULTMQ`b+cH|{ zn)!LWmEv(zv=pUm*~gIfBt3H+AE19!fWtX%Wjjqkx_GzMXUw-A7~j{rD06^tGeMUN zybT`=y#70vz%xG5jn!dp;vw}`PuqRL_T`I$DscE7p8?94>BVt@Bd6UxrejgMkj&`OcCq?6U2RI8zj^MTotV;d*h}tj+ZBxL z3wdmZTdJlNpw$?z@uGs|KGGO^F0e>Anl-=rbGW=(YcubJCnVt3&bGbIOm1kT2lks+ zizRPx4Q|2AE*`F($tG7q^=mSKbkk!^_;D1KkpX3TxTDDSUG%T z*y8*uM%xA_(QvrFzFn7jhV4bs&B|?rFD~Tl$a?Hx&FChJm%K$z)`oJ{E{!9oTbw}d z(Fh#KgrF2Hi)yRM@22~~TnRY~!eFb<-;7G)n*yml>S%Wm-nj!}z7IO{4N$Zf@z z3Jp2l1|A}{99jJt4p-G0^Yv5Unuifc`e=RDFSeTqQmEIpa{cFO^7R;zt=zTToUUML zn7pfKi)M&?5TUe{sJM zW59nyWyl=-^PY$)iYdYE#MFwSw)WimbSku%z^xH6rXEy@G)LekBP9prsco==3#1 ze7W6v`fB@g5IlX=PYFI|mOL@@6urXr3AyaDVwofER})haZzRT3p}yk8|D|QHM4H{L-5E=|pZ~WUaR$2>?1N zr%Cz>8zLP-e`Z1bAsN*1Z=ct4zMSP1{sgB_@4psAo_!mFHKi1&!AOGvLpsVs{26_zV| z?3rJC>~5MCc5gGN>zcU-AA52H9S^IFVmx;)zZ)gd2@(@gY|UQ#IXGRep3dooPnWkd z+}o=OFfxbqF(e1jRI|y`$deoXycQPt0jYY+itqYcEY$~Fp>Yu5-n$LcK0Pp$xu**jz$=!#d@VoK9UQb}v=bJ>A*vJUQe z%VvYdjouT>a^H`14Ql*M8f}@i)lfVsCR9K5$Nxkg$-3)uMo&{rs_yeSKvwEv4IT6U zq*c9V;7mrOoXx)A{&i8z{GLC*AP_kNU1U;4iP^D;xi$T%%K7OTDT>K4<*73+dxTpq zvu&HEEmKZ*e5j=Gb6C4otS)B|hZ5Eks&B@Z0Am_$?pv2ri2PID+Yh-~GIRfs^Q0Dk z*gsYNJ^apg$JURjgaLwQE+u1;hpVl_#73v$kb)C~MwSmY5j&5I<1Uc}w=CvoVj-uiwa zFW*P2VHE!%QEOu-K2irmO3!<3X(uaAC1lXuSvqgGby*2zESiwxOaDMc%}1KPO|URI zf&H?)zXwW9wO*}B&r9#c)+_hQO~rZ|LcjXaY)q%(?Nqd*U3hA_zWo3}(JJ^qLvhTG zjIqB*U4#gDJ$3S0BCXs7SPJ5w%w0k8{miUO2V5>AFIbgj9?v9KJ8tHq{M9AbHIpkN z&!I;G+him7TMVCkS=;A3&EFXHk4=7Pf5x0|5vHxI&1$Oq$V$%H-Q!!M0b=s%1z091 z3e@t4tpsIuKRM`gEH@?F$irk}N2= zZWCzm=;vN(6YpV`r%8(si&^Qxda9~&zzy2Xq5YS+KOaOIb&vVUq>&cp{e`l8^qWn7 zv?q3rHPe*08YdF`U>%Bt1SHYjUlN1hck`O?s^{TPj=RI)Zs9&jGdl3s+h5aj1g7Ud zzWh~elj%P_W{;uSx89c|rY?N8H_n^@;l2D^CNE0FEC3R%$&(G*10BT zJ`Lp(Jq~`tCnA{C zioAHG6ew!|ET1zKJu>ghV@-a1q*gQ6-dvndKKQj5+C4y$BTY52x$(}-r3>;k!{nt!96&&Ff{zG#k*!IC zrdX$j1$bA}l#FyUW>Z^UVglEbO;EsB?1F)U;FfyDbj%ZlmA}er6nrGyoxgPm|)I!8`i=zVYNd z#W9zD+qbl1Y-h_tWW7rZ&m`_U&N^S2Sy5Ym+dj&tG&H%oO^qK1X(>Ay^G)!Ps&K9L zmTaX}@zqg*t0U4nK6^T@{0^=s~;jMY!A(uYixiWaS6OE_I{+{9%c!gfclEK5EhoQ zuuI10jn)J4cb>F$ejAbMgSzJ8H&;Q61JB8-S-kFj`O^cN;r`jpELJ@|Vb)m(wRc#R zXNsecgF7-Mk?~@Nr7S~1y^abz(vlkTzwRVNRe~w-Cc@<+@-bh>fHWZ0VR+88!wgHh z&F}0T1JNC!W>E@B_^|3!ZO@`NSG)aladpnR+vLpC`%0pCb4-MVbv+@0NzM%slQ0=t zjy8T)Kh!6@GlVYs6P7+?GEXNN*%pg>dW_I$DSQmgEhpW5RQ`~-KyA1?cidQOGET;H zb3ALlOkTFGJ|X+0$X9hu$ZnQ|nS?|LW<7Wd9Z_WG;V@s|WO1aPy?yC@SIBw&iuaIE ztT1s2N;M^*Bl^*o^x@9qoknnO{JW|$7~os!1>3>EUS<+!)de`@R5 zBQ>W2p~VAC%C&m#yG8x*Ppb5|;f1szJX>AgCXzmzuTe^Tnqhax{7M7G z@YLQiXAfShBu@@9uX-=o(+_(b#1(ydtAg+i>yU|@{dBS9npBZ`^TF(uV#$?Fte3~} zPw$dNGO`5fJ(;0YdOH^8ArRTosO3~BvtxI!HF410H%fKdxGk@j=a=;$xC0U0iT5wJ zHeT(Wga_WSnoq-H^0ggpCSf<8${9(SAA|^$7sNMzqueHacQtk4!~}jq&JF(AVw=!Q zA>-TQL0j)M_o^e$u-9NIEIF9yb?O9&ebrW>Ki>@GrfbHAm24p)JIDUIvx)B-(e)3!QMW5E* z<)K)8^QGRMwuRjyj0G~e*4T}5-^(ZFtUb~NucD_TAavDm5CGv6E4kmR&)NzmuPy)r z0#Pv>)Dk*b$co+`X2(?LQ!a)%=(ET?rXJK^7XWlEedTR4R#K=-ff-M4j^ZTLyE7S? z%B!tY;I``zm^|+BU7uVE2vV?Jg^0IY7EOl7Rw^^NEs!H}H77y0-+x+K4cI1;y6}B1 zixuz}dXs=e`~=T~E5l(LxDRdmyuHcf)mwviFmj8jl+CEw5nY8W z&R0uIb%aH%;yKlBi=HHIR?6nJ6RMMaghZ)Q47L0-IShJ)k5V;Y=PNqL-^ygaISC0_ z0z5x8bdQW=Ry7lctyfA!LWZbY?5OV|@t3^ve&09XGl9}KQ_Cyd;{68B+9NZ-G)iN8 z>;~mw&!KpOjCW7;R0Li?&zE<$!|wcn8?SGcrr8Ddm02v7=dJ&_cOlHS=jF7532^%Q zGQ?u8PcC#a-%SNc^gg`C>aD{Q)oXCpJsO`E0|gM#Zx*gHPLIaBbPwiYFrl{)8q3`P zr+%=_Z2r)g3#8(SJq|bs+Du?h!HrR`eYT;o++%b8wx+dPEi7pu|FXb%E*P1)sBv|Q z2`i`)LhLJ6{=0_+#?2@)WNy}IsN`B7y%vOtzVgq zd0+SI#f6Td&dLHnjlAZt{dKU$-RLc2C_}D+qedUDfv|aNb>qs2qQzF3>uLVJPnzl5TYmIe2!I1NE#=xD?W2ttPi^ z6tW6>d(j~qY&P>%(>sxn1#l*bgIsP9*qGAE?4~xY79Jp zn9dk~GTlFpXNnJdMXR^9Bt`(2rn9Oc;#TFJ{U7pIeX)PWWmZtvox-Sj!z))>CGG&M5a@u8}zl69nXCR`w%u1wFMOjngJ#U zcSng_4;qNLWN*=}$o@!<%I@5b5Zir<0%o_bk7TGrTS=0uKaf?&RQ-wQ!#d6{6Aq8r zQ<8zLQyXCmJO@1IBKP>S4jl>Z1x6HN`#NdsF~ePm`XdtL_Rc7iq~22nsIS~v*6}7G zeanA5pk>=cwNjGX+bv`(3CJ<;ilk_krUf@AUr=V7xBh2Y{-M$@J0(Y=8F~fqE~gr! z%XD3s4$WokArDiX7o^@o)4)YA`m_o=v#G_BpI8ur#-D?6F9}936;IPvJ4>bd|77Om z$9C0%T7S*-w$2w`&2OLu%hJp!wZq(nOY*6-U(W+?)tK~K9EFpBHIRGdz>!2Wstk5!BDOO;+lR-U---XbwgMr7|;CIKDoEW=e!DrM{j z6&z4Px6Tby7vn*uPYrygJ}lBaW#})+h!~9pKX`sbh=p8J*^VG$c62)sJY90mC-tK9 z*~W|2WbaV<#ZHZIwcW~8h#4IsaXig7Xoap3CRkLz%gBHYO>cP=%t4Z5^1(bEvfYe1LH%W6>pHDnNzLJsi8t!kbq0SlG90`JJy z9y!$1n=x@UiI>?pA5WY)-uyN}>#D3B=bb36rol|zrtQF=ORY1PAA^DaEG(;cVcUsd zFXgSB!&X^|E7V*jaw5(lYrcZ-!Jg4YPPJBzmG63KRg%@R(_p(wPtaj%#L>wA!sh?6 z=uvUU_P1rKZ56Za0VX8Kx|)EX&_tzgxHC{~05|u%X!_F!dX~ z9wX8M*HRMRAh0rYjNLC+fT0cvR@H6Rj9lqOx^A@przPqaQ2vj|Wp-K`ub|%}GH`!T z8mbYNZ3bi1mLqws!>12|3jB1}hM0hz1#)Rpy|ZAwqhj)mx}lfSX7Pxe$3j(!InBSU zY>i1x*t6Dg4`b+PIMU}N<`f7u5NwIMztq}1X6uWuN&QPGw{%{(W2{p)K>!0K!N0e( zQ5A61rlFv*no>XS7}q~py9#Mdfm=(pmY+7-?VxTQS4)P89Ipx z>xU=CM&VVj3{Y#HXLRF#8RP~bLW1%w-x`w!#S~3oO>X8DXa4<&n6fIDGi(qz-9o@b z_MAtu)1z{Q?FRXd3M}LFnDoXJP7W*hvA1eO8hCd(K9b27VFRRf9bf}JbeJfB6itqc ziK@K3Kc^nj-DYs@&dw1%RO=_4t~!hm^TkKu(z4qex*HgS5}n6`2ZJ7R%iW3tls-Y5 zfLP@(CT7|qDeomz$s_xNUcx!*OS#-0kjaKPkr6X}pN<*9x(@QyrekpxCg_AC<~{Ro zKV3wZ)zN>_u8g=CbA2XJJ{}R?37SLBO&i-WFI6xYvI$z?Ui37|Zi`iJu}619!otel z-j&DxXrPH$e#gyV7%^?!V^kc9lgf>fW7?sl(^zinm!yG(iy6*C2*Q{Ao7m(3w^*y9 zh9D%TNG_y9UL(#PKo7g_xK)52vv$jR(aa`n6Aiy9LbvEuc_)qVZ7#^5xV{0??y~&g z-)415kUYAfbI}S7k+pV|osdBv79BggJ#3c*kEq0nsJb64i0NBH}9H{fi=G-0x7x zQ?J}5O}U=`V}o?`y8F?Z`2y-YsEb|iZE`(#Kq_nDzx z{OTP1aj2j9q(E3qH?Wg(jaq$`*BRnVEE5&|nUXWuHf(`(6k67kQ>@;8rD$wvnmSKH?F+bZg1M;QtXDI%iR?WFqtGQNuRkCtLj4D*o*U3RQn0+bFJP*F@)^;Z!Y2RJBbE~Rq$xS)gGu>rk!|7GR*T7K! znJY;%k#B6MNR_8$KxQ_fv~e{Fzh1qoX2B$tg*;9T{Bni1xQS#7Yip-3`cpWk^J+dF zLEU?$nIsQ>t=JC*&WZsVG-6OaeUhITi&c6}?}|cSaZgJAfnSdp{um9vw&Pg2jTJ^0RTNtH;a- z|2=wQlG&o4o%nPwN%uZo|AV9w3`PgFIaZp0$K6Zc1VFPoZgjQ2CWdE?syKcqSsoyw zp~G{s#psqUJG4t0@)@J5;E5ST8>a&mhD~}koFpixD|cU6XkH^9C&fi|lz6;PnIyvK zzi?yeXU4Sc5k?ByccSk*4J{;vBS~XfnD3xAm7_F0VsG&=@mW<=AAEzPgV>pjnRoq1_ja1|X>WV6XQi-@Z{a=mj`5Vc2WIMCxhaPi1_)EE+5; zsH72PZe;u)grkiGKsn}pUoU)ITLz%$IKd-#Cs9Un8{@bawAXv|W1q{qoCWH0LL%q- zN+2%cc9ZP2E$nxk`pxro>SpKwV~@dP%wP7;Upcp_k}C!30;!a0q8U)TqF0H9lBK<0 zTO*!;<9}lW9krmdwY5ENB|`d%zw9g+<)s$aA?Xl}Rc7djeLtU}=@z(~wHDC3ykom> zFC=T}gu&Nv+S9x-CObmZh$S9h^HH^=nMq94@WY3{FYWLwttkzULcG2Heq={)1XC(JyMJTJUZ z#*q~51Lcdhv@+Cgq;w%s_!pdFTaaj+$95pFv!Ig-ey_p=<=BCl4_vJKtJ}oi&+J#OyEYd)Mg^&qQiuK;Wws?3j|9 z8NK+fOD#(U;$h+8O7mHF)oX7ou_ciL%0te~Blkasml(9SYsiKJduN$0p{6cQ6088` zxhx{%>|>HEa{uv z-#mgHp5UbGKA+Y1-OpOlXteWK&i~c{(l?!ElOL6@9t zBx7exIycK^{#1%a-S1t*W5!Zuvt!*RCCGH}vC0LyjR(x*p_6TBOht4)4slxmf1+{n0KGM5VS z9GNq${{@Pi-G+w30QWSG-S+dUNg+A$k_twY6>2{nj;%*jh1`bMWmZyMA4K2e5$xbB zdn6r$K`v9J>^?&}r4H@P39!;KA>AmAYB*VV#`jboq({yW6X2)HZTX%y`9V$S1 z)yBk~!6#ky3{DG1^|JU4=&98PRVPaW9(d!=B6&t(Q%)bUj}+Ap7+esM8$SA>3burs zy^@$zk*c8co6Zp}|IC8O13~N6G1Ha2Py9X|SFV*~3+!P7b1OTare30m^b=HtdjJKf zfjO#q>q@A#Eqs@+E@u%lW?XpUU+v(na(y}aW4Ne!o-~;~Y&6BgY|u=+RoOjhc19~L z6es&Z9UXy}MnXi)AIYrX#9TP?hpi4=G1pC^+bEwlm51Hgmn{Z*ue3srq2GMFT7rPp zpWfECgs%58-CK@|I{Fw`495lmMlWcwb$O-%twl}N*mk!cYi%mX2=lBy1VX+q5CaCB z4hjU}yneho$(*lGPluu)gU4CcvMv7Z9^_cDLH#GXObCKoi35$OF2mBMHQA+3(LsQkzR+&7d={H1HXBv%6QAb#auzt@o zZ}@Irveqy2vvsn=ukk#DEr!hXKOCwz;m~$AnBTumR#|;>&9TpB5NcY06XVJEy6Om!E(WSw|mqDI*jx{y}e9 z!dUbD?K9O6k^vj_kJpiV9J-QX9+oTHYaHr!SOCntEgNwTi;TcedLHQ{qDI`{*hJ~$>AsaB?Q0rLOm4usyd?mOpj!jrZy z(}WqgEaaxEvh&?JBYI`sO@>Y8QT+7AEaJ>sl;~1^w7cqoro$_0HYK@JBwF8m#14=G z%GhL#xo$^6oPJ9V_qKiau&T8!rMPzz=YQcbN4m2_4rGFa9XpF)6kUP2q^`32aw1*{h=t_G}iQjyQh@>lp` zrc)KL4o17rqc3<`D2|j<5+sGywVFXFQB-H}>INO*K?0vIX*U|NR&2iobZ613woU^5 z#@cj>>W7sBSERw47wR4$>XnD_>IcD*?*atEhsW44Za;$IlQg3u>^`G2pnTg-s} z!(xlv*M#rlUO`i1E_4*x?HxiPiuisX1a`ZohU=fGNn9q&Cck-7j>JEIYSCOkj0=7w z=nTCrwYy{$WKSuiW1Fh5m**72pkSz&afqvk#7=P z@|b0J>NXZ2W88fcpoB1(euKU~IXR2 z&BL43o+zY{ZofTNdYyj1jb0_6_Ag!$`2(mW2vAyj{(z8QQjb7pc%E2+-_`5tN8Vo= zTf+B^s`j~cX^#SR6g)ojC^)4tr91FTfRhB8L=-WcFZfMDLtM(r6Nm~8Ck;hp5aDz8~OsJ|r$P%5A z>2goNK*tZ3EdsO`;VeIzNqM(po2Cl9l-uHdkil_Pc^nn5i7ZKIoN4;Xtj{z3s@}==xSwRmk7ogfHb{AjPRMJiGpoS7) z2!2R4vhbZ~Qk1ZI2Fe6}mk~QNl|1&gs3bOrGmc@(%9-MqL50mWqB{=JUAY#x9Z^2% zL&}HUY4(*Hy-)KnP7r!4_7oP1Em4I1ASl;j4$dZr%I_(x@w@2S+UzZ7G=Ldw$-fEZ-UY z?3~}|&GUCcEUd*@{TpOhX!2n2`R+s0=8| zg;Dyvcjc+!8(+Jpt;@BCs24L`o!FO=_NQ#bhTNvQx=zV88T))4xXxjoS^!krFXqNJ z3A4iHhj!7}9ss`X&8oU703$6ND|f=W*_(5d!sc*#uU(Q)g)KHRwq&#fUa{Ftj}(Z| zL7xd}UZ6La`d&`ZPkU_r8){#U(O6ogjk}}~bYMeJ6KU^$Ft_#CkC{A~x{MJfDg?@v zyb;uYet2y+&6aet%MIksD|1-iJ)5^A*aHrE+dKBw`f=XZ5|oWD32gsVkTmE9ePO6= zsR90R)4%^VqBDEE5Zl(afJJ{*mSg+CeStHQ{;XQ(L9S>iTN zO*p(Zx!nw^T^t)QH<9vf7qm_svkQ=r?`m0HkV+6vk|tl7Tw6f`uPx0mibY({nlX(VN%rCRM7@1Sk}7053dJuDg4ZSyAa!qM>f zt}8N*l~n{*L%PmzTksdOcgCwgc=r^YW!?Ctv@ivO!tOR2+=-^M!s=#xr(r2XZa61a zE8~SkT&e9s(Gpo&D1-U2%JhC{W@L(;q9tD}Tr&8@CanUtJ!&EoB^{!)R3K_4y!KS{ zG8$`bF?m*|O zLo=m1lS6y0pMN0ty2LaYV4WJ5uUf^7+rq?5tTHt%4sRBm%$F%WW@Y!BvU1AI?RtjY z$w5!`#mPJ^_RC|cr+D|GZ!01Y(uW48i&Zu5{Aeh_)d39e5(agGzh{)w=sKY&qk;vc zJ9U<|mHUrI;8;noy8$J@YhIUwz%-CLJ5MQ!)?38n*c1{x^*?*e7mr z5gbJ~@pLNrodqeeErqw}YS`~q0VrX$L@7Lhiv<71+4-*#<(}a;Wo?uGI9s@bOx2{k zD~dDwC)S^t7=rs!g!LfoE#vX795E$F(|Ss_YLrp3<oJ z21`oHen>txsC{&_Kx3XOlN0Nz(z*)W0v1no{X|7qwf7miy2Ub&tPY8=W~aV}6=R8V zc0A4CbR|wG8&h0EgSa3vRs-5gsNfHBRAQYVdA^bb8|uC?6@w{pFE0|0RK)?cy{iU3 zRm}9L@{DVT{(pk{1x@q@Gy6Oe8%q|^PEaZE-H_{8mi%u! zfkyFaufcPNa9>}|gu!`=;Uy+e>=$cM2|pPY=x3F#slu^LOee)1{gr!1zkMbPWktif z1UKG&o!tb-d*oRFsfemhd@W6&iRR}!Zw)s^Xqp{cgzKAkqL_^ftH-*|yL>$=Pelzt z!X(LWIRj@8jcPbs!863oYmvIlA393VL8)oWiXS)-b62{xmH8m&vc-F|u>;tk*E2=U z?IC>1xHcdEzGcH4;TbLeWvDt!`;uJbxqLFzGEiRq>l^LtcsDE+T%+nJ$a(iUV)wwfV+>*S@+PI8yWf#gn#hCE$JDx^EZv9?|=OC|X4$ zQ0YSb?{**jnIC))Y=g2e^v*aCP?gVpaQZuk?TqCmz`2Mt!uZyt2`v#8 zS^4D+L;})(a_{P;RJ@lF=`&b^_>OkIk&@o9wT&FT*Nz+yH-2r=w`nx#zs$bopZ#ZF zhb()c8+m0|<7&s*WEIc8IszBpIo_lGHhFqUuH{k|Q$Vm#=uv+eUmcmd)XijEI`FAC z=$o-~*);kje`zbQX!UEyMOVlT0ZNEGF@t|V1XVlf0{BW<=YD8%FR2DkSc2^ZCuS5=f2 zIM5$!8C#*vq^4@j@DB8P4&E#=%*-%UR#t@XDq;JYQzPuF0rp6|RC~+CA%2Y$xy3mz z-G$)8eia)n18e5|2ezweEmz87Zxf{MxtPBoclVF4e-YbY*8x)keManhoqXZCbja*s zbso368$g|*T=iaD;ch4q!rYYEl-CgK5xgGo_fl3t5nnR`CIm?&RoJY2*0IN~KzQ^0 zaL_}Sj^}2H?M==int*vzlmwgYK4Z*p`-9UvS!nR}|M48Au9xTd})eXL0P8uIQ9tpD2TN+oasL>a-m%psmEL)cY+P*{-?TWH&Z1 zqhe3NNAXoela0&o%C54^>)+Tvf5vxQ3HMjI1Z^9?+#3@KdCno z*u6UUZ~vVRg23FTE!XV}2d6W*G6XF?#T(VVv030((pxj0+djQlJwlfv-Gs@_{3W=> z2zDF0J6+Yd18NW2A>QW0h09*5Pqf)qM%o;8P2FsrtzEI}O#ilSBkKHBItI}C7?cU! z%D9yU0Ha7wE$>uf+2NvTcLeh~k!Ov)PKLe)jR) zubVTBMNL(-3VGS|t^?u1nn6&vA?ZV0B74hg0`=U| zY4rzltSx58v$HC)%rw?)KJw6soMSjfn=5~Z<#IlPTXZaC%k>ZYaoSYO&Qv~=Wx8Oo-O08fF(h;5{Q-0e{Z7O8&5%JsacG8Oew$ zvcfqRh3eGrCw|;`$Ux*ZW83>3qx=yxNn~4N6&w%W`EHZ_pWmNg<>8#bMlSG4&Kc~> zIETV84t<5+jZ;zxgeChPDE=sk9f)M_=_&Mi-sFzyG9De?wagVb-upv=Hd`}GVgCgM z=2%V3MUz$>xo1D86Z;HYizN9G@)y~?_L!QOM-W?x5a(Yt$Y3HS_Q^T8F?Axj4Yw~H z+Ss4u|7`p~*coLHKFdr_c0F8b`yq2_xkgOLDR@NB^0PR=Bn!N%R1qCR&&*g*F28#n z;NIjpxu6C}GOw|xVECVm216}eog<0qC6<{oFzz3PL}DJVye<< z4E8Ih#JQh~Mik;d>KYLD)n$lrt{pQocqAUSk%g)&A80`4Nl7iT=Q&fwSovu4^P|#q zteP@(k|~fo1O5WELQ&3jUmT7sTJO>iYm-VUKgnXbY^-&Wh92zvr=s1j)$We#@%5^- z6iZ41GniqHKdaT|4C&>XjZ3G)1T*Lv0p>gPSM6?_9gyo^pn7kQZh^?sM^{3Yj+zhF z+s?Rif5W?<5!!H}Rg;Zhk9=F^0mh-{&VO|2>T}Y(NkF1fv+wabTlr=Y2>;v;aqM{72oZs4*Q+ z1SF4|JUqN}TS5eCw?LK3-iSV6cqYwd55^ojBgIKJN*>SU!I71CtAsnS%2$D%>W6vda2wx-#^;;kCd7`Rv3xQ91;Oh#@R! z+ap@Wbhql!eO%1O!|A5%6f`ojs}q}6v!`ajqgm@%W%w%_ckcNes`B27CCIt|O`}`E zMb~3-9EoeLNkN%Zi+=>CFMT{3^zng4q3Wl1dO`YYNAtflUZcklmLP+H3KrXuj3=bU zh#qU!t8Wm0DkJT|RRdz@DZ>y*oQZJ4!Py+b65l7}m2w5e6Vn9%8OHrW&OdrIc@$fmWKtqww>p4j!tyAV_7G&B(v@p!Lq zb*1t_?~->vy9XJCTY-c62(6E;&gsFj;5$3vtO{hOT$Rc#f2EE z71$e}l`+lZ^+zR5-qx0}8)2NxJ+rg>jRc8@WugKg;F2zhJ@fy_>V`GM{XT0Oif3LG za^yNut?d&P9qu@g7OqSafjuB5twaA_Z2+mu&2$^s65nxR)i;^EIX{~uZhu_{x z#012V=dZOzE6XhbZUkjF9%|h|`f=O`!!ZQu29Jgz8&>M#=7xpwGQ2@uQ7AN2HRA${ z2r~3ZDI=TViM$cuik<&K=a2%?2b>1zm_|0GdcENEt()k@KFN5y@_&@I?>fr{14c)q z^Em>&O*#4kLAGKfEQ+#6%F|V9+pB93^B5)rftGldc(QB`l89a|j?)XbzkJcJ1C7u7 z4qU%YDp~#5-ZJzJ!DvpxsfKS`1^|4XGsSL?u}FRn%(_Bu6p!TQm>zw$A%BeOL(h6B zYU?+_Dw-oxhTkt^2EbelvA~XVxI6ik1@5^N8MIS zukD{pSe-N2^m)lBN}ATD@^r>}S8rkN8V4mzTfBDtB-o3eI~T$H{UI}i4D>CKYzx1E z6yt2YG3)VNS1p=Azot6)Z>~YDK^Ib_8xggUUsNhIDI~htW5F1<%qtCHBA&=2@GYE< z?a=W>m9ainoF8+5+9|a>d%Y=W3PQgjQ|hl1o66HEtiPrX=k` zG4&?9U|sm7A83C7&+IUTWkGK*4maYUfwS_D+>58;-CqEel!I9-$qt=2$PIILbz1}Y zcx|dw$QEV<`aygdt#&-cZY`<$9PdTS-Ms>Dd+q%rVEJ)BgrGZnnn91cb>^sV4aq-f z(hJ>QziC^g{47P#Rus3VeN)Jg0}$!OEgl+oU;xM84koXBY`bjrIHNtk-o%ZoC_?OQ z&vp=HRSjH25T}|HV83^XWRoh`*Jv0|E;mmzlm89GELv+s_(Kw>E&phq2!Es z;BQpkQ^79jMpR3{$s!mrn#~;Z1 zoeirkud#zhGiMgdWNf{?(&e#g*z8TZX{`=sVCkma8xzflx`>kuJ^u%H=M z##lL)?w_>w%8=|8AdHm3d3NXJ*sYhResZmmNN7aZpQOX+0vNvkSkCr$K(R1p!?A(y zX>Qz1C`D}x=QYL6At-p5to_9Pwoc;X}$n|oJj)=AmS)?QZ(wDT$q%aPlcXpZ`PFik(kI2kUebLxl#taBjQIBo*ic<{a zb@!Sg&zuLW1@5{X_FK)v)~%yf$NPrwL|=AZ**njv8OOdX{-IkN>6x$oVpaBIJcN`o{= zDcwkSNH@|9Dcua+At5!uNH<7H=g>n)cXxMpH++Ndd%ko2=LgI@d++Do>t5@+RoHi; z@gj{!kxkAfcdpC92kN%U^` zFUUAs{35#yMMyo@+_$L+HHNp!ffCP5hCqnju1xw?^Wm23*Z0JIgX3ER8J|(%uc>o1 zs>d`_+s2{kZk|iVWpRuOt#B+`5--xa&7>dxV}3x)G8VrzXNBFl^M=u$r>C1O8#seY z>zu;I{J~{Q7cTz8!FqEZGw{rkMz)1n(T;kbLE~M9>1Che*i{mDsj68X1h07-oLM`T z8WOL??>f7-#td!a&>30w&vx}?7xvqGfwW26I=%OKLbtD(On6X^CqR^mA#-y&78E2Y z#ZMw#vxvtBJXDi)tZ}0a3Hc|If4aGua2Vb|xF%cbLc;Z8*gMOp^i3nAgxf!s^kCMs z`@S=j`3gaxdX{L4q}}aMbYseU=TP$V2O3$oko7HXy0k{!Uoxt8y2l^E%0h<&CNI5= z*EqsF75K?+hr0tLSdK$WUVe0WJeD*bofQIG2Q`ASUWY%qsc!gb@Pb4tRd39%3Uzh-N@HCte7V8L=g&$y9w2A2d{*@t{?370j-3Y&rZBb-` zk=oityK>rxR6QQ|>`|RUV10*)O^)%f{>jUn;!sKi)&cO_dG5BZnE@DC9m`1UYlDwA zC*KnPwr&1M%E2!s`HBnuxO2dXIo+V1eQNXU%%IWBQrBvgki@;K@A-%ZrEAI(9qLL- z$*o@#Y|HvuKjtU>Jv8Em+E2_B%@_+ip9oe?*aR-~ht=;Y=wyiXl|ki%6)vHEO$D@k zHejruAN8`EO=0R7EFms|;eFxc`qcA&ea9u4CtXREEZ$*96hzV;%sfb6 z{qwV!{zekk=Li32z%qe7hU|K>e?ackN^-Rm^NZ5n?jKFh<5x(yUPGo4o(w#A+h*Bp z%3yTAn~$jCiZe|+6xl8ZMnINVL&HLDU>t-e$12R@%T%}9Zgp6otb{YdY?@5f;~$Xl zr3HgD*@-Q~;EPcx*n(`1{QJh4BH_f>e?24S)NPMc=ow~66`HV!nyqYy^F1ny$RW9M z!ZT)Vlb{z(p0TGcN-6XR1&goH2$%)&;;%_>;TY0BsUDk8Y7<{C3U6GY_aBy)HK8bl zO4?&qr6UY9SFa5I*$dvW1?JY*|0u;6vDn)``juOs6w)2i|RIcf1Sw%&#NPd$R$vhMzmezFIs{~to@sHP!et5Iv-krmvXfNqA@g%TI zBi3$1S&9M-5Hu&xY{RHLJGF}F& z)Bh)*$SkeMK81DlB;gMg+*>cJG;O1xQcN0<*!PUBDzLRIuIZbeDHt?s+n{BbTu?Ly z^xOG~oFwIM1wHB62Kvyn>XI^2+H>!eeG8kY=~7Jp@#CJ5cZck;hH~5|aUiz$+`cYK zj9Oq~3kFkhL|pcMhIzick&npwpSg{mE`;p7Xp!5eq&Kn5Oo!9ys%NFWpmy+4w>HYo z#Moj>Vm7@_REm1v9~V2G1|{*UdBEj5UPswz3g+@H0f)APXYbN-iKf*&PeSkvPp4(1 z4~VU&be|%?xpAq0N6)8=apPVyDz@R((GmExbS<55sQ=`-|uR}0p)%@$xrgoU-36WjiX6nps-?v6}KGS$tjOyy0d)2g0)RX99S zPMl`Gw|?=xEzIb|#3fw%rRQ|<*SgRIW4QKlisBh(kcMRt2^)>0S`zwe3fSX(sSNg= z6g7REXCR5KjdELt%h>)xG?m)H@qBTjBmVTVI?$09m4#R+C)q+}fqlK?8;KBD_o;!N~YF&<+s@B|r! zl6()*C5S$e+vWBAHLt^2o2blPVSUZgD{wFgHwkZefA;4O@XZ)F5&~=a+2oQ#4NU3G2CR-FP-_XlS@j^9_R^ddFAvDq@wnG01I?e*gY`TMV}kbD`J#6XOHr(^IQ4_O)#GVhKl%SkQa?lkmJ&gN+HPlCpxI zE5C$azs?n(6FiXT=cv3U3(dOunx*1sq1Je@s1_J3*nM1Ino<@}i7s*_=ewp{aP!N5V+1*7@p)<8Nll9vGk9o6wPQSzLEcn?pfzyGJ01(M@ z+kdpQpz#R5&5Fss&i2QIS(UcDw`>K39RQSEL{6IA=Y;A+=b5rNk6Ejo3(VhH4J#(73kSRIQ!y-y)PFgzyo-7}H~~L%7V`t$M*NfJhhn4F z1K-wz(fK?l!Qkm|LQK$nQN3}~2fSzYqeQt?J3C+fJTeXh$!z!Hk%f*YJ|$IWXH55F zY$%tOpehMMA z3v6QUM(KoaU+0>lQDXD?dt2)88{0M)J=zP%o5E0-V8bJc_sF)apx+vXlTsHmk}*@y zITSIdeT_xD51N!9Hca`rdnSRKD*Kz`iH>)#D6!?hGd25u{)<_!ii=ed2H8F9yasq) z&t`xmLFM7%pMKmSgXT!yFWH>#f297}XkppHu#888RQ@$V8r1cAt5cY0AIL3G{0vhw8Bp5W{a_0yh-*y?%t&n$rc@VA^B zE??~b$Y0^h+E^db;qLMLoS*|iJ(D(#V4=xYg&NI+ZF`Sw)zA3nX!_=zMN(qlb@d>To2{}5h+S_l;7-S19V zQb3pO3x1bRyv38cLWabz)qdrXzuvdFmnPQqWlrl{n6>CYA}6mYn=N#g^?V5dvF$ihIOc)=NJwuR8JT#D98?e|Lrlj{OeH zb|z%4SST5m5M`X=t&95Lh1#w=AA-fU+wxD}k4zeHU7zKkyjxSq#?Mvj0p@Py4Pdux z1E*$k@mifP$?z4p`)25mPlE$?#~MjA@X+u~K?9Rbg{OH7c+eOxXW~r^M&EB@Eq*Ly0NCy9^I`>FLj0oYbBtUaAfNBWkk%lidI-9_Y}|aYf8JZyh+@>sPtW zOmXCV>KHmIZBYdh{L%X%q1J@ZBzQ_Po8-T@Cs`vCmRSOej-?8NhhYgZJZhr~N32M_ zsGLSS3ExSF`tLV=;t8jOj5DT|-!+Um@mwQTJBprMMs!xVC~&W*)*FBV^fk8P;|@Zj zQOxBmfFNK^W4^G-f9DtM)xc%9=z~R-Ad=g{SKqiPNk!P;Ge&#*3}}d6OstuB4bW)V zyfk>a2xeKp(>E8Qhn?HE+)VmGo|bqMH`%{U6=ftzIg;EbYNP}%YPiF3^X-`${KL%g z!Xrw09&j=XY4<4lBg&*1>r)=`-IOuCDwcBOZ-n{FCp^1H=Dp%ytC65R6T?hnqfP&ZcX}P^w<>n>KAsr#JWW1i*Bc7+L+fx24EoLfoBg4FjGqZs6{pOHb-vO`_hZU^ z^_NuKx76o5#b{dH?f0PAAC=l>#0C+aL>ac?iPrBeBvGiaB2H*=3@ct1VzF>9&%BHdmt$Knm=+D`85dRuDRtloU?|GoHciCk&fu7xiXG99WJ9o}6QLuTZIkQB@Y zT7Z33aKloUi`@=`hxogXJx_mLO@2KAz=WyzhFzBH*~Saz2b)>bBe6aSYZRr7o!&l1 zbuqQ;vFOLZLC0pZfx2F~44d!z>1(mgazyGje*|3eSva?DzLgene8jC?@mwMi)rC`+ zVMbWTD_&Qqqx0Fi`3$>~rd5LhC%_n<3;uvJ8V&W~83}8vx7~P5t0Kh{=Lg}2_pe18 z9rt$k7}~w(jpY{FXqn4$U{K~eYxJ!LAC{xhTZ)FH&wxidjzQ+pjJ)=yI;1^^3@9mbcH2s`T1tQ+=b;9yWfv!HH?1(&1hJl42=P0ji!bHtDRNNrE;`O-7=qWf5kNt? z092cki7n--T&%^X{3~doA_YpzK$!HcjSbo`Yr~1HBcGxQAQck=1izWPCv5r+M$luic ztR3!7R%x8;+u|9|OheNbs*Ff>Sh+P`e(S^A&q?sD+?1ssr7TM5eOBqG6?};4Q((dK z*OqdqU|M$2IiPCl`yp=W#+ACG`z48OXWvF6rb403<_3ge&oN-8-h;p@cRx4pcdyU2 z)UKtp_gIB#V=ST{v6A(p1>}MOr!V903E2T*`yEY6Aay;So7nKSXC0*_7dnIINX)oq z;!Zj5SZ`bTwq#IwKCuNM08$an%!TQ&`q9H``)vF9k(ogU`2F6h0GLMFYVYk`mHSwl zmN)b&g8>`P4^b84f31g-Z}SEd*i7Qy-FqwNBf6TUByj-fW+26<%l*e@WgRzv>@oY2 z>rGgr)-%bOB@U$|V74npj2HrB9^82;**1bLMS%RtAGTVKUSx6((V@4S7liM2w`TpE z8CV-xxnEOk1_^)|%f4ohKt3?x)~s)NB02rPVFNYmAg9!OUtX3PJQ~1^qOq+ax`>%O6zSRI@`c zz)))Rxb?3~MSAO35S;Aq)R*#ZW4(qoOAr2hO&o!?Q*`|M9@4lQ?cudrm0T=I60B>O zSMFL&vYjE2Fl0bMSvhM33>E0lz|(3x|5ODIT9XqI)qDRTolM}|BY%)Sw^Yp~2%0}Y zpH*4m8<5;aA(FFyNyiG8BtZieiZzKHVQ{QzRu5whGvC2X&)b9jc30)Z8?Cc!yg)It z^P}4H^ed0B^KG^e|>7y8x3ko>59O>Kui)0TA;p#$>VtSUYPRcAq@kQ zM*52Kuf6}U9kGa_qn+nicMS6$;!Ez{#w{3WaOj(BDTSljhuww1ZF@;v53+O4bLxEz zsx@mLbyzyJ-%IIUIsvbeR%jOs5^Qqft>f-uQWRfdCnkvrd*E??nnOHca&0z9js`~y#1wr?p}cf;&DtPmD7r)5b54)Dvo#B0n*R9s zIK7bLzlaG5T!|984WbW4Ov=jZ!E*=lZ{no*EM>YZM=@>j)K+Y2(7AkP&DTEYaDH$U z3+bc-IT)V)e4T1P>|ZT$gsp*xdDjC6hSu^JWe*kuKs=|P)jV);y(0dba^%KVPWRr6 zztB$K%+K=#O8V#RGYtnoFL8+flrWSBtNd;$wa1Txz&lMoCveeVJG!38o!!ec<=cf4 zku%&m4ilMKu&}fYPs7*_1qHyTr66qdJ)KMb%Eh*-44;i|QlD=JfxW5morj0L z-R|95u*@FZ`806*62Xi4D;THE~=s*1HuVcIuY>(b@Jo z*Om+)nBSaO=s&*6j=^2yhYRT^bpKSYnl_cJny!a@Aw39Sk)!`@#`xscf;eJw_q+1^f){8o2HLLewHu6+$;uZokA|aB#@%IgE zCgIiM>+B8&Oa05>L5ec*mDUHsM{o=x5sYR+tK9xN_N)HLexVuaS$EyL6qG%(cO`grhj@9>?o<-+lRDGYn|J-o zGIrB_DZf0=_BS_tf@jX2>Xf|Y0SnH}A7_Els%)`Og)|SZrjPhP>c{{5l2uz2Ukgc}OkV8cr#RT`OluCk^LTfH(#8s=F) zXB7?PAi#-TZRm^kM9kpq`!7j5A}=k%O5k2iikl0P$bWqm|XU+STs zAxcN?#C3dHMZD1$vr;Geb!*w^q_T+4*9QL3!~7iT5BrFo()gO*LX3^Pncz8ID~0q* zvy`b6nDfFDdjwSs0nT!MRj+WkCLz% zB4@zMxX(AVcMhiyy3>{>d1z2>H9;jjqI`&trNJ+V?O19xUED+mb)tr*jxF?EVNr7% zm=0)l!>e9ijOx5^b>5Cib*M7l3`5lck%p#I+}zWwXC-r_hyE>`^#vGjX87#7$R)!0 zvHot48IJb!3qR*pAqDY0fq}_k-?0tZ2uc>WbWhmHY50abA{{oP3r2Wy|EBxVjTBY9 z`9RC*Ac19A5*b$Us+DOLnf}3={XUp%SLOS3oSz0#9z;J(Gf7=639i;?t{NM7dnk08 zJ!LP`Szi01^w^WWWlm*bos-pa$BL>Yo^>IMy(*@c(>(f!SCLGvcz!7K%tmcAU%*WF ztC21MD;cXkeySz7<_zyZizVisK)$L9{bHv-17h8&?r&0m+2{ZD{_0uc+LSjJV|u1} zGC60=bjgjx6tb>@eYPBl4=XDj?EpsIBvu?$3Pk;dk6;TR!&=THnvUKT7?%mLsYhXd z{LL~?$HuX5x6mW_Irz9+-grr7cXn30U*1MGk|$Ya9&FV0TQq0ta%@_+d)-JvE}c6{ znr+*jtyHuj%)PDz(mf_oX!U>}Kj%I+Lmsr2&kOrzyn#b!0Z%1<)nYZ_x&azK&)c=l zk(|k4>nvRJ9i1nZ_3|ei|KqxVCErx&@=(=Tw&v~g2~x>&ZU2-yTtwgg@tYM2H(Se5 z)A6{q0B-o}XX*GuNm%*OKKt}hP|1_@GmI0SDM569d^*tLNyvEOMQegf?@0!D?|D~I zKc;8oMBXbRPp#roHeDLKvjVrUsT9-OVtbA7xqJk{Cf-lh1qTzf;rH%TTic>{lD%=A z22hK)LSVx7T1O5$bP{*k)W7yFOfOZ?_!m(YYP-8mI4{q;c61w^n_RVFZLZon0+S`S|4(s$I0S4#crCC zN>-2+`M}SG-y0G zkNG);1q>RKmFobog&!P`&5rl4lT&G?f_@d{f!3xK@CorTR3;T9B@-^xmHKf8DFTiCD_%PjGRLKs&Z(?|<|sJQ`}IP`(vt3)-enFxKEJdQVdb(RhXv!ci2$VL z;{^4GlZN_WC!kPP|A$E(cX)`x-$_RXEG0gCg2z^JlYAhga0; z(=uK7oQi4fw;jq$Iy~P)Cy8(K37#FHGnVUYYi&F)>W!;v)EcSy3WPA#xj|0Vo!Ofs ziQk<-vRzm&ek|}GJ?bqCQZLjdf)=wSD)`hPC|0Vn1fUCxNQ1PS?Ltr?!8TfFk@T?+ zDuBnVK|Q2vx24u>?WT<0c?!r066);SNm&b8(TY6dOAj5zJ^ z`h*3AC3%61NcZu~FgSQ^Wc!8YkW>0;M^>7c_&|F6!x{0g5XtkuL@KJ&EkRrh9%U+L zb-h$As%nU=F?`xuiUsm)%`-)XngD+_J}7ti``U6Gn;e6B>ftuE%c@hFwNmj7eZv zO9Fiy+S8cRs=h6ZDFCj}G$IvO)-7W$HcXmAl#y5&G2HA}GbW;%#h~}?-CgzT#Xm@| z8Bx0VKFCKLDj&I$lgU_CbWdDQc1OlnbM(>T>v_zN%$fcGV?a+5>yXvqpIzzK8eq%v z*&<&i2}N(FAs6~wSFoYNKslw%)yTnr6sk_ykizU(O$H?b=Kz7NEutoS{|Mjtj&+&N zB_%v`7huQA_va9zT;?0kzoi#G2hAtk+BmxzL#(`jq8_)p1QW+DR$#Z_nd-^9#g8%^ zmEuQ>e&5!k2&*KHYvxopO0p)Om%JTR8r7-YX&sYY?!XSYC$`}PN%co|%hEq(1E!X+ z&fi7wGMj50+V3k3S|auGw`DeS2PbiQIF+Q8-}XOMhRM{`_>!OJJreRJtnU=|Z{8We zu}@olbLxxP7>mM-Q-OK9vxmSY&sHdN@R9aj!&>n+by7<5_;)JPt7+6Jo;p2GKj^tX z{1i)dI;zflgm-i*=CNN0H4DvE-S(*113S*TZ@qR&h9m;Fn4P_m>&=b7M-5sG5zln& z#a&T2bDsBzq-5aj*lqRLZprHh$aVmoE8glO!5+|uq2dQPp#j}?kHNH?oNGm5cW*6F zMz@?nT(ThbQ6ReF&&l-GW|L%{zMs;qoioEv`r*1&BsPZvjWkiS4%|7w&ZHO@bBla~ zy$#8xuMujvpK$3hg0i4L+mTHn=L+c)&NAH&KX~sSA*yUBZx3&dr^N?n0!o|FNM?5- zOG_XsHKMnVli4tPN-M~y%X-EqrXkWrS-?iK`ru`U$C27 z3yrzz!oFgBCYi3|`9M9E|wzoBlV8M0bR?_h}y+C*&@9LumRGvZ!7Zv@L#B}E00%9 zr+6Dl4gxrH1q|=i<p&Q|(vt+fe7w z<(nt@k%nM7ax=;QN$`mim5+%oyJ02Yr-pjEgfc>1qkj8m(u9#5ZJBv%UmZ;R;rWb( z-XF^SCg1XPa@h;#NQMc&%pfHOy_rd(c*!9!t5!r=gMiDGFw1qW#rML)jO|BKFGUmL zcF*0d`Jpc-H)|c`(Yy1@Yn2%$=XGUUHXw+nhEM&Q7e_RW87gN(^RJsr=Lx5XQ#7|e zrbqEq5B{?(T6$Q=Rb8;%IGNGDa>?3?j?RRB7xRoF*y%h`D=emQ;1h&Irc^4+JHpD$=c2)KkI>L{rEz;&BqL z?=X_B71F`Jt)_{Dt?1Nfk$mXem@Xi-#98-IkWA%RbSQd5=N)Z7Hv#X3O!}TdpBMeH zUp0v(5=2Tyfof%dlWHj1tCEj!Ud?gDzERQAg+{W<*&YeM8pB?V1hzC|Rus-cVSx_3 zVdJrmP;0)cz&X9i*f5ien(bA~%1G%Ls!F+Q{9Yiek{J$z znbmc0FJwJnrKeUj9ILw}@>QoKMKyZ#EX#Yb``5=BQ695Qlg$p3(WH;2>Ol@5b1ra7Pl14pXCF;-t;n z^N3ivX7knu$9X8B?!9=oTg*asv}&t6p8IZ2PHgMB=e%Zd45s$vf}TTX?DCwD!f0(z zG<)*K#O5QJQq3}>#siN*E{bv~7c@uvN!~$1Ku7KnHd=|0lwB@@Xy%I$G;QDS6t&R8 zIisIhr+ej|8IwsUQyCGYxfzj`8 z8n124x|4+6qeAwz0VR(9XBblnDzG+0fm`HaqvIK?z}RvFay&Vx=ZiD`{h*D#6L;69 zr0BWYqxH6d2T+P3e$t@mh^lEyWKx!zGhg=;^*QVK6z^9mQit)d`7MIF-*GSLLn}p- z?Rwl=c|M+_M*~{rlTDbip#E__ZFae24e-N1#wo`T>cu(b3V)?cc6*HC?B(doW0g&Q z#I549AZF-Dn^#Ndz?}Ph*}wb+6~xy<*@@T;<7cTWz@RoyKCsuvTVEj=Fhjbz=z!;) zH%|ge66E|9S>M5q1WY=3{YB`%h5-y3!IAT(Tu~DJ(0O08-WYk>$e1pcPB~z?S=Zds zR?KWwtnA#?S=S^+V3sYipQz7{L`60DB))t^ zp*H^{(1;eWXY-ew3~Nq8DThK&)+hBBELPaLs`S#jSwVm7B(Jqj(a@Fv7fo)|{qL)~ z+y1VP(<_amA>@X}6@4#$*}n7ht)2bd3b#=QhSDEBc^g|zb^g0ljOt~|qDI%W4jZD6 zP`64~IN3v6dasd!?NJPJr^^dKxgAd=`f`tVF~iPBOA!0$IBfWFb$lS2F&eD2dhr|@ zhNHq|kiaO~QXJDx)^%47JL|t9nIe=eR6e~vA@_NFl4azQsB2Exfp9W8zR4w^Nj zVw{sPuR&1bnxl<8{iD1n5A#S-(sr8sc$jfhV8avXm_jFvppMxL_UJpQS3If}W60 zPkCeIPd0GLwvOUx6&Pi1#A#xH$9cR%ra~l8zU9}89EC8I?9F+}+;~=}`9mUeqg7+I ziTn#kqbisLXIP-iHO&dqjPX|#WKryv+vvD;InTf5i0)2_n;% z!y`7Dkcd07`bbZvJs<|7_3+Zl5B;uE2>Yb9nFyy-gP zn-+0+@l!yo@0uU$-`z_mjCUKwLSd>J*|UNBl5H%CEx#Yl|Mt|5A!}uWw(8USi1_UD zqjX9{&$F{{J**{{g)0`h8#dMIDwFZ8KAX3|W}v%7W}#oeo5C+!HOVr~IV`_f z*Z2<9Vx%{uV7dhkMtgLaPY%yZ&a$%cbDHzn-|QtjhIfBPV03GunSWXMT7Jrb-tJlJ ztmo_2_b_U^Vb`G>Mv%yh0O{Y7(kytoyJ|!}RInlhGUt13fBzOK@mvXmndrd+9w?ax#mkXW)U#Is<|1a3herd}qm7yw8(EG#qR*@rbxv=1R z8Py$Y5tVGsZjrqwB^LD^%;vYTn~6H0+n8*2EC+8cSJ$3QIbM%XPIyTbC*M3A_Jb{n z(}q_CRpG1Zw1j_fZWYs^iq>z%l(4xga^r|Uz&@CueSdF0G!KU&qk?&I^D2Z>G}He6yfkN$02o z^cmY9ECNHRS(}nR4ZiT(3go;)(t_rxi7!WHm8BI7Llc9p<3`6ZLJtQ5a!_Fm{^RV5 z8Pb$oQVk~prI#yUf}OGv8ni5TY%VcuvTc&+RAV#;W~g|RJ{NN3;!3AI?`B>QkzPFv zZLQWVvcKIq}O zlv}qd>v;RS5yZ=h!l>?3hwW@P)n4~I1BMOnhSVGZyq~^&c@y0EmDxAcGMm3CJZk~p zUoY^d>O#g(z^^%27Z*>^`0zEnAw^4pVskqt2RvqqF={3_3_chiYKORAHH}r8{|UKS z`M)BzkLm?o+eM3VS!+LYNL%>8P&|y>)@n`|L=_y8kIk|s$OZe6B3bXi& zv>D(5)5_f|*&6(VwLS=idu3)#rJK%m@;77So-F}*x9BN~%L9=aNPpnrd@W?rjZZz2 zTB4^90;KQlXnp~U+Q-+(4&<>)v&p(-x{L5dhj;cL`huk9r= zX*h@h-yH@?LQ7^Pm3^wi5zS`nh@+R0(WP)B-s&0r_(9C)cUk;@d?e=K;2(sCAw|0s z4?&E`UC-$8r0Bo2al~ulbq)qO9eInRDo@XkbxI@w|io%=YwboyP~?`~Kyxe0DIft)DS(fl8!kX7+a zV0`_amf|34b1%!4MpdXIv84YD` zx2AG(uKkIdS?&>h__3p4eC(ez)6hb75lOzUkuD+@djArbG_!X&Rabc1NOjivRy}-P z%vhG0mWyfKYhQ5KHvQXfALC7&lQO|(-`!@i&NX<|#0|TiLs_E)2BrUF#_x&S0qG6~ zoGh~H!K{vUEC0YBxR$%*4haX#dnlbjyOapwFHE0%lJcW?P&HmM)MW|jS4>*kdj+K> z*(#>{M>)CKR-Z}Sfv)s6hl@_vst%pskb-Q1`9;8ACD7n7Rp3>rJtf#>Z~w!lNK>pkn55C$j>C#RjEcjG(bBO6N&co1t`P3<|O(8d95fiavzm4W` zt(arpTXfr0qyD3kMvPE}mVaczIsvZ^7Ha$+sfQsZ<61iYVKk=@Ztvl3plt?AciDgxSWIYnohpv97%PEXC>z<6K2Tvxd z5cunDm;_oE>4~Pz?Y8+U6U+>GT=AyM?Vp&RU1a93X>2v$<<(hMRU%jq+8X1~xjN!^5Pc|k$^YTCT>&-9 z=f3f%ba@9q72s=~lU5k^qLEn{cP|d81(;~+Ag|SAQ~BO(Kex5`@%PQfv~Id^LtP|Y0D*5F+d=@yXcqs9iA zh8nDwj-`{BwMqBbS2E!TiE{S{Eesv$tHzjPmLAjkrdf$kxMHHd$j3fbexA4oE2iYHPn!4*DVuor zL8e#9gfF`6*3l=*Gpd&7+Lo2~Ul{<}+_knS-EB)tA^qG1JbrvpvH6*!hhS!Kz!9L6a<`IBh_ItVI z9Pr)V)vVSS5TSjMSH7l-YWfZDhHV!hFgN5N6s1JqjksRYWsK6i9{V4sKCO>+$O5Jj zT9DO(%S4Z5Jba@XjL~fL#qDfZsDIC6Qr^!k5%fn&cir@~=^?0o*fhlLH_y!~4sQ(H zH&?7#0i<#fQawjxbSwTxs-ORlRG;n7+~W@p`41m}SW#F1o^PL#o4eky(viFy?a(AU zm)3$VQBUaliRriI;>r|@&BaxNW1ABL+Hm2!C$$rvmuw-$!;sPir$tu15?(i98Ma>C zy;Os1_~zDjZ$fxloOB&2yV-Q)VR346BDo^Ap%`bQnBrh!!mTKsx=|$hR)Z*vRx}1W z5310w)zo>6bzZh~X!*)(;oNGS-sJyrm-xi5Rph{g+?4KdCTm^FUxz!0I^J=1pUsJB zpVby(d0?tHaiv7;7Fb`ztgPvja0_ycA_L2O?L~~JuQSs)H3q-3bOBRpzm6!=~Q ze>L!W$@4h#WGQ9a6el-JW)TX#vMwMDb-Rk33`^(<4;#2K5o;tMs4&I4N(Y?1@n;E9 zA6g4SLHn?iRom?^Dp||Ael2^-XtCBC0jGn?oXL6hDjt^;JH+)hDPtR;c<`O8&6|vJ z!-Px9BR#&JK5?jfX+`g-rs2;84K9{tr|*2Ow&jkw7%sL+M(3b%Fo{iSC!8@f-FUJx zLZ`ty#fZ;|g>Oxke3DTTb?yJvQ|zhaqUTEz< zw792ece{Qzo3)STl_*uuooBgl%ByJ2)xk&@*NFLkxOICVXb*xP`qC4$U)MAc-5u(n zVTfafrmqR_TkL<@B;DR$cEy7c@&(aNkw}jXXQVB!#H8ct9y4pYPg2@Xi#} zin!io$aP5bd?DYS#c)C)mMZkTAAIo7JEhJTPzz{S*2%9Z!XMERQY5lI`32gC01wOOj6geh1C zP6h1E#0#}E(9GAr{Wy)`KU*R^^q1{-&0n4BF!FK9{8{3OjBcuJ6f{W1KfF{JYy zy5mi$=Qm0kIV*T#^Bkaituq891PZbT5<)8N6|D0>@r-9`tlz_CbAij}LeHD}NzI!~ zk8AdKN@S^>bKIcKi8c3iel)agw>V9~u7s6cveEs_3vJ@MZ4zqE+WXECTLPrJ6a;5z z7wxW(ZsUC{(xxpWU~qRqVDo$X?tIQfZ(6EoteCsQ=qmd*v+nAG1$=s-agmL}yrSNY zV>De{iMKP+G;?b7UaN&pHc~7z3a=FM2{{uGW$5mPZ)6VeaBEmY%;(o$urHQS!~=vJ zpQQl9LniA`9uFpIUW!+#IdQ^EnN_C@No&!Qk$E&^Yf!+r|hNu zce@v`w0!`l6)t+rrb|3A6e_V@A=TFiNw zRBzcYpf&iUC75$)?I=jt#iyE}J0GD@!Pac3fsf|uobbcOrQfX_yFgFTfKsi;Rm>#= z8=tJOFwLwQ^>Ibt!UF$WU0(1ge=qtr*^Xw_jjOW7qFoK185Z2W<6nJ#MX>YBHAjsqHs110sT zLNK!@x)%}>4r8Tz-^jP;UbpoRdb=3#LU&wqC$cjYIeZBD>C+g`O>V^Rn&5Zlz`pIH zE_dseB#tiqJA@^!I7G$rvD;I@8sX0}99;27-ISM)E@3|)FEg)RRn4!UX(76MrP~nJ ztl4#gEBNs;b14$r=W8o%rlr^RI$nDVam_J`zEWdBpRuS(JLDU%8;+gMcWFyyssK)t zTpcXIZkXEcMB4I_@^l)-wcjdj@U}C|!igA^V%9+8J@vpS0xd*jV_T1rDRU=9cKe9>Zix)WWHAib6qtyE;o2R z$0|ZeW&Y9w2^l4zK)fX}V~}o47v0MuMQrg5DWOWKJ0UysWP3l~XXw63EF6&d@p6>) zD6Lbpeqq*wbaTq!bt_*^+;^30L@7Mr{1S)Fq|!o-M=SVRSjMgHkQ4BjP2AGDMK+Wq zXEyr@+L_?|x>a#Jq;`>51hiDfCC2?i0mA&5LCsp>+hSMN8NEftoFx%^bnikx6cTuZw>hQ zcbQFbk*jhLmyOUvXxv*kBwjxHe-wMal?3C@k=_|C)up@Wtis|hbfKZNv0R0~5?)S= z(jIDBmZ;Qd50+0Wc-iT}U7T9Q0DdW5@zOOP%Dipm>1-Wo*ZhbD7~L$U(p*`aH52mt zdvPP#Tr4RnD*C0eN}_lnzl|YgJx4aki32+~Mq5jMc%CrUrMHKkK`CurRJ6kM^dLhE za&v6-D=&^0K-@&vI2@8bRbpW_;>WKHPl#4}ZJ#JN2=jK4z9X)~<`m<9mP zK)um_6zU#Z))$!t$a(~@GTqGN%Q&=7ounHEY`4Hh5FEv8C)&{LS~Uyuk#^G%bqx|6 z9V|19Bh+WIos)qthN7aMENN}b zp7fRjg{H(P>gkAzbuBA2vOg>L`OpH}|61Nxk~@AJ{Ie+hnencnC=?v}iHa%`CCQ7P zgGT^6j}ikXO$x9m6>L?W7q>P*VQ7>UDkGJfRjprGQ%%v)jY&xVBhhWl;oM?ZjY2X$ zK}6jiM*yg{(;SakiPXP)_fJtE!#&TvbJ_Dwa;Gn8dsV+ofl7+P#Lw+^m{&lkLMePj z)To>**<0n%$6;{oDgA>q_-xyhgm`g}j3BFo6TEQdan~7tO3GM*Q&z6yX8XWIniBzm zg}Vm(FWeF-@kD5$&9}saM~`)$w|GXsqTu1DD)&vJ$?goNjhl6S1Ky2N-n|_e0;F!c zzZ);v#vu%kcOz-$+SUudHRC_zM zBS;P&q2QF84fL(PGp~z;Gi~5*#8oj7^!axg5H_4tyRI}i_JMgp^N@v^CTkv#J1YRt z>z}d?Qf-0Dgq&B3R^GS_bAKd6$*qUQS?@ljuWbKnA;(PFrS8$_@=cWRDq+y9u^><3 zWHRX^^X$9JEHxxGS`SBM^`&y{N!9vgoC6Fo_VpESFYK8v*JXU(?w=kAg) zxBTa@4_{ION5DplN7&%r@r`LKFXjYS!69wylJN)M)XLa$(lp-BoCWnx@c~8%!^LiO;>YI`QUVs%}&2vshB47{7g^j8ztu5VSYwR8z0^! z1dTgupO;rHw3YO*@+(MtXXU~qie>qrM50^dUl9<^Lj5$3o8_6l3n2C9;VlgJh)4FD zgKb$p{k>=sL+iTu>LQg{}J|bS(9YZ(LFm%78&+~lm`|tb3HP?0a zVV`~WS$nN}-RoWfY%)o4$ADojR?iL}*#vMjK!Cm{? zwqpYYvDq9&1Y6LmNpdIJ?|fP|>_MqZL0{VQKW|0cl+`v%gRl3ThVqiC1&-fp z=&TeNh#}c=q#OG0TNeHD&N@E{o=kKk zElB1+?Xp7Yxr8zV1bh=`S?RlMHSmsGV!W`VtLQCv+l+mr!nCgZwq}Zga?9w&y8~or z?N?XvcVPG7U1v#Vm*Ujz>?U(hZM}O9f#`6io*Q@>q7~#lcKZFYM?|%oCl>a3cogcG z8*Lkk2QQlb^fmMfo#EgnZo4l{Wa@NN6O?k^ko(mi)5=coB)X2SJ{#$W19f_}@{J5n zwq3dQ z9dG}!A~Zz{F2p@F_UqY0fTze}WB4J#?*ikh)s25XOhTTMm(IE;lJHlD+lxp z<2HZXoDSmd6uCM~x=D)vO4IrqE*o-vr?ePpsDnO zFpj#D{IO%+4(+F@Gs4VmD2}|17E4FZN;}KAS^UvQc{zaJ#_3YfD4gziHhaQR@_hgU zz&Fabsbz$|tnGWRb@j4PFc;B4X7p{l@YPSKu#nTm4eTpMEY=7TL zCQ`3E6j`o{H-5d}%rOs$k-6B`IN{Dk9F$?R^urvraJ4SG;OJ_zXkMSKj-k8hm*fKg zmwgqhCBT_0TR?TP9PbVW{pmTXioro$h4qH1_C{Tru1Mn%Wf`}UmD!AmJw`LcswmZKIbVd2s9qu~hP zD%;_5Bzv@f&PpF=loQbO)ovrH?Ox|#dg-@bdp*E#xA!WR99Snjkyo?XOlsZHCd^aZ z+7qVRbvb0RW?B+j9M<1EPU_7=Y&~*{c zOn$OzDFT5uYdnp%jIT?b-Zx#m9kM?V8>qTt>Dk5U=o}A#Z?Z_mqC0wF6A%fx6RRTx zwF+h9*lTl{-||?oE(WWDbrbhfB$|1F2HdT44XgHT8Dd$s+&gZPTDvF$|8>xBq? zeY$|((8sNuoBM^`Ot{8~lJ=*;?%GKg9sYn1f5ErzZFZBB5Dz1eYUYqo*lZ0(6pux) ztIqZPnYb&D@451yPpZiEtjQYn%JWQcQ$wc)@7GM17JGtC>=3XT+e zM;245X7_Sbe6EO>1 zFzFBrwJP&pd8u~yEz5Ey1S8bk_eQt$W4AeSm5D_TjO>*n{Uv?cBVWxnFE+Y|DPsAh zK6K~6Txk<8Au-c>B2^Mv&JzLKqsuZ?EvZXdi}i-1O%L=w#3Dje7{ax0CWX-kQbl42 zboZA(ijkIJHxJll32V;yRr`-<=WUsfw7tM$)%&86hM@|iUc6wBjf`5HuSuBvttVnN zKBIRZx=w&PzTlssuJ`u2N_|7X6&C%_Me6MmX|$;P+mZFZzQIh7o`|HiA~hzavIll# z0Oz@9t$cg!*BMCV-V`~uO}$lR=JE)B_k*3?;ir8h%rPbyF|mPGhT@fH?d05;oT5F+ zaMZ-Z;en-%cWks3V^(5aDSIhkKzS_m&*72f2cC3ua>Oo(PuAZWeMFhpBqr9jQ-d_qiB;r= zthPt~7hUrrSsXX+D>o`lyY5e6;S*DVry9Xbe^5csSh5Ae!%bLL*TWh<9HK^h4SqVu z@iisO34Fj@@qa)|j@caP(azT2JrRG}<@Xo%Ls?O}PB(`}*fL_7&gMa{@AsYctO8@~ zEUnA>uHJ#HbQrX>7V$+K`MI~M+u=Ln4{_{tft}s4;}8?)044$!Wtq%A3POEWsquF| zI-V{l{P`|~+cGM?-Gw`gF_O_e{#FmHU5p^DrA0$!Zso}43Wtj247=9j+9^s3wl5Lg z?2#$E#=somx~Ipl>pT7VOsNvu(>v8tJo+c1p2nFQLi=a;?ALZ5sTTuT0i@_XREU^n z!N>JO7@A4ox3~)}_{yzafeNu)mWXS*I;ZJ&zO(?)yz;usZc_sxubibm`Y^4%h|Q|r zgM1v(^NrvjOAsD^@0<`lT!jA-eE3mtN-yl#$X)M{~D?&ebohDG%7SF0lC&^AJh=+vWjuy)69ODimpAhR`r;}+VK z@x!CN_pE^ez>VI6y2P4fL7y028;YzyA!X~&r&WFopvL~s9KNXdAH=v%3+f^8mgDb_ z(XTF5#d{W*vCtp)r!1QplYd-S0_L)+o?VWL2H%E3l!>xaoin>i9A}I1OekX%QT)s+ z(tQalKmAzkwJAR_efo{wcQsC&gE28Vo`3Ie_})9EPh9Ib_gKnk-xPyeWJ)#09}UYA zArcSe;9;1~hlt68Uwq}m7@vjZ>ql<~Q2w(-7#vE@vZzK&X1TN-!e$LImpxe1^St2@%Fz z%%aL`$sm6N$Rb7WW3LlFPTJ$6KZ+p`in;dHfS|YIinfw`Nvz^xL3$@^>PgqQBoEEv zm53K|5=!h;Uem*?T=q9PA?Aw`w}tmfeVS>x`1LZH6jGFh>{d)a<{NQM-v#?Vd6un* zz!RElLLzcIDtfLMufI=7dE0Dh(fc^Hvq-@?@8mM8)1}PJt)x`%I!Ui#UR_oxi~gKH z#>~#_+Y&ANCIYb!M5|d`b^7^ybRi7y%Kq#qxMj#rJJpX#Qm_H`vBZ7m8ZTO`U&BbH zVs@KD9&Thyi_*;OB32d53R2fkV9h*NV?&uq=*2s8oH!M3AV@6orPL(b`4$DkYH<@^ z%nm_(;|~EInq6>sqcCp?FPdFr?t6u{pTVk|u6;VqjWeZ*)30=s&Q<`^nkNGSb_U0J{&x_ibCEvt8>>pjV8b?*au4pD_o!PvX9zz)8kB}>)OGhzfIS&@d4xq}Z zGr$7Z-R%*#>{#~AmLFQapll!M9@X|XvI?p2z6*Wy%=3Hb8|Un z=dWwNnRP?f2@i_kk){g!U2$@5Jf!W&Ch|rJVa7)F8Avc;{uV#a;Fn}7bnEeJ3 zoag*b>iu?!bSCq}#YMJ4*o~ZVDb5QOScJZ{K8Sd=Yy_WV6`V7!h8Wdr5 zQ_ghU`PYz4uO~9anTqKsVKe zsv+*V9@SmSBfUwPutHku#U=XBrJ?mEO@6Gy>eWUQ234n=ovt$nUv)P5tHb0ejbln! ze?E(4w1@*k#02fS6AmNHR% zUVlX>|1fIn&s>{@MXq(O{-YCMFAGnBymOKL$~2xB}EX$zG$?}LI44D7)lklc*WnIzKJGM>i<}<3$Ku#XNpLoa;9;ty~N|%vVWW3ur-<$5)h?H^q^eNxnvuB+)#FXZIJxXc$P)QjAWZ&f(Bn8;;njD_}By zg_Ypd9nr6tTxALT9ncCX>H3b{HdVVOB*{kV~un5SSg^yvl zQCHV_=X}ZU09G4qE{knyR-1*p6_5Na2M6IpiB@^AEjWHY42>$Avy=az!aOZ@m-hMM zCA2hyV=*Xd5Z|P-v(RPp%Oe%(IbCsPcd)2dLpGOn3JmMKNXc~Y;v<5u#}zOt*g`&M zR<^i2NS%;@GoKcnv%L!zkaX_7YeC`1v4jdT4#X#*5vc-`BuEC3CK)W`*Tfb@B5R*-L@z zf=L~+zUPye1>v0kcdH>V3{~EuGGze@L)q|SH0`0sZ?NX*PeLdJh2>|0eSjIUb4?jn zn~dig*7tGfzYXrI5g0~8&biKP#hlZZ{x)huzl)jq7|hC4!t~Yj1r?1wq~yRG>*R9S z*F#%^5?=h-qaMFm{krB9$x76-`poyxDwNqtgS4|H=v?4rjPS%4*f!W-Ei!PiEIHr$ zF>xj?Se)=*+V8G%eTBYa{)ppJIJwe)lDcG%T32|Q@R;jnx_o_ey)g4cnR4cis+8^_ z0({#n=>G@9I2?}$w>keR%qKNVtkXD)k);OB>=QAeXw_WA?y`Aig>h8$vb-Cr&`FK{ zXxB{-Y5l@Tr0wmZ^0fX~%nwuLcvm%%&S_>-S3*&s>rO;yfS)8{m6U+XL9 z%%#41C;m;qR`PKJgLS#i(6=*8-j~01b0RVI?I74n=`#l|+l@G-4NxQ>{;F?C8Y#$d zSg;J4DtH~}_4lFP`gOj}Vg=xi^ePo$vgmDnaGFzo+(>}J+!qM+p5BZC{KFsYZxjs# zEhEypRj3opc?spjMhn_g&XqnZ#%`MrAWoUIW@Hm*!H7d+qY*#LlP;@wVMtwdY~u8Yqnf1A-nMHS zeMF?S0WVlI1?{%Q~fC@14pEsz)=yp-%z5!QueQ)nE2C(g`h7|Mp?)Uo+{Jscd zh!eVy(i<6qSq*an@f;e7WX?TB`3$jh8OM<}Xje`S^KQ!cIIN12&?)A}4|jieuN|ALe}pX6J4`_ctq)h{vu97| z@*gEM9WsJm+|U$;7BiPkwMrS^^3p+t6-(jEBK4^wwL`?lhYP*Avf%i_aAuIOz!9Z( z7g?#z7R!-y=h+`(u`~gq0CXJfz^3#sLmp+9jvKymSb}Xub2a0U#SxW%@Nint5g8F6 zFEDK1yQbq6ki>M8uH9nezSb~`tQ4K(;;sAHGtdGwAt`I1a9G{XPf)2$?;iNGyxN=M z)?W2p2BNAU#F5akMOiCIEDTh>>1M;HCuWW4sKP#OBvmt;9GCW|dj|Z}Pg&0sVW&?> zcu+{ik7zT+<76;omD`Bk8ho+HJJq`hGO1oueNn&C`S3JvLjfuk_|ck?;@&GB z6`9E;6lS@oUx{5r)5u&I;m%y|Lis+$aAoq(J`Aqkw7iCmRcSS>7ZDaAc$*|>3MVLS zJ#}~-dU?fk8fsORpH=Psr6)r2_3PIsBGqqkP}zma^&fda;QUXqih|CGxv5-@0KVcM(nY_s%cFp(UBDZci9{+*M1^?#TmlqKd78Ka^sv9z1yvz9X-kUl!u=ED=sxWtQ@mea#YKkO+mb5aK>4w^g zEE_&>ZV4U1*k&^vij)spUwPblGMl+XJrDYVh1{WvSf+Ug7y9w6VOY8S*fj6v!C3+4 zk-y`wjRUcB4^Noqko4)+m(`RH&6W%{>}w-&$`N<9PDQN!1877>(qab_lTtd)BMKQwCC zI?3F3y#sDeZ+%m<8We9(ueW~G+2d1OTX1oEYRk`#z3dgJRc3TQKFi4&`BiF2q%$Uc zkf0H<-{f?M3=b0uZ+b+uB=LdPq*O#XLtlVIrZsdpRxr-bl3*vQuv`Yem+W9A=p?ijlqSdsEEyhZtSQ zfl`~dvo$6%%$n1vKG>+omqpKAAPaF z11{e|sN9p&THy_`RXJ<~g4vUL(56V_*l;M?L}1u|&wSuX^X0)@ zTR54Ol}N-)CaWHqPsUQSLmz?pat!_&i^?ynZ%YU#3>i_*gF?zbw?_DgGh&7i)7OZb za@KRXq0crTnp3@{bi8`ws+*%F+gZ4r|5-#M={yk$1rX#$^!n@P@5D~KbE`~$p@P*7 zSz+tS`?HLJMGNCQl-O7;$Req^!^DD~@{oPdi*Go#2RM_fflf6Bp^{Cin2q0Gw$Z)k zNR8*axi2$ja+~-O9ua%+wFT?d`_hu&HY_4})5q&pEUH4AsmTRO=E9%FSi{1L622C% z0w33K`(-9Bd6M1|9LRpwrK!U9`1o_03m5b2!Mz=N^jpgBp@kEC=T?vB&rGdntIuP1 z44Mw9gy}GX+CNtfWb7U~7FJcLRC=9%$cW5)k%l;Zmy*6CBfmKjN+0lQtateVf-~(I zY(0_xY%7RNx2jA%)Rjh@)#7-zaq31V)Qt^rzfnLV%HRM~@3xHbH^4U+e0q0>5&15A z_KxPFIBP3e?`HL zW{Kp(h>)XciigMssqL_vMU7%?S-(BtD}SFK)9$l4pv$eo&-Wa2Bq80J25T>I?itGQ zmJf`)Wj1a&mkG)7_my5RW5b(h?Pl*jG(q(LT>9A-cn6uVBdvp8;o9iLy!^(0Pde)H zUvMh$b!VR3P#GSXUWc%01@zmhY+U~S7TTk!(7rpGtKxR=<Db3 zxr~kU$LEk%N^g1$Y|VFYju(}~MXSAk9GPk61dmW>A_)EoS-pRApZV{r{g1BIf90(Z zfBta_{(H%*K()~3IJ{ZPOBdW0KNMSVNq(WCL;fX%=)hi9d!~5KL;^7)kQ~JZ5<5O~ z76@>8H)8fb%F>7T*R({D=1&Bua{qy!M6qW{S50**x!AmQc<|f0+(x7al3Wp&dp~Kq zo5KPPCjuSl(}GC<`{d;BNR3WjGvbmgqAjB#Eu}5;2PqUHs58B`jKIwfHH!4ae`8ZT zrs-Xde+`~Clia(!t4lcO|0Ltb{H#;DmpD5wmBm=W6n5jb||mv??lca zwVO1qG8(RfUA~j9dVO@TP?kLNY_AhTCeL(;CN&v~dx2qmhwkr->e74b7PR<$T$G9n-=Fa9PdjWA7=p`(x4Y=5lZHPp zSxGABqqPGFrD|(>4cB+Ira*L=e6S7N!bLH+pOLa&RwW+rllP$X^)WvO&FVw3v+s6# zf-+1y&2Y zCqD+7UW_;z@u$z&hz26Fc8Lr2+tt=wM#u3a%Gmrd&q`wj$BQkJU6we&UASu!;kxun z(+I zK1ghu(J5WOrdgq~EYpBkpQnp8KOAFJkyyW`X*Yi?Z;2QwX}C{w!c3a`QO7}(&qv@& zVZtml41|wuSnz3LT@;gb?~JAPt3ysH@$DR7IH4(>2MbCvowp{#O-0L!vNSfeZX1Ik z4}|j6+IWVx%rlYC+t{6k8v>v6^PHQy?uHa}D@8jJPKT}CmdE28ptZWoX#m;PiwDnU z$JS*O8}aR3supsq`h(5=SwlJib3d&X{6<3b^{}w$dh%^&)8Db!a>6^_Dzl0y{%m_6 zD{5DFu`SzQ7PsbRQ8Yk@eU)FzfEM%<+&!uCnl*bIaZ6)G{YY042)LV`lLt4<&c<~K zB2}O@yXNH|gSZ|*HNXh;>iRfr3HW6YVRc6ghfI008=K+}gxxyt0t2x5o?2tX546x=3<%82Wy*P3Z`= zdbyIkPtv)^Oad-lHy)y$SfpxUVROcM;@mh_NT}M(qLhYSvP{~5NlC$Y22m@BSeYdZHkhHPQ6FYhRh&MR&xP3Y&*iC z_Kx(Uof@m?VJ%JZk^37p`Lr5OKzt5ixsaG<)96+?HI(F5c)cPkFrMGt#y4GocK|md zS=AaqqR07`diJ9+sE^QstW|S|&VF&HCEZJ?Ktg&dNTLcXA~ zG_2}827J^*xlh~}>IqvWooH2VrQ%bv*Mbx`I}e;=&n~?xnfC>=E9fNdD$ftW>6$VI z!?ly3A=(@QobNzGPu+AmF;Pu@*akDp4(3eFA@aM`Y#T&Fx8x5H=U6Ljs) zzA)K-B3WPi8ph>~`dvQ#59mFcE@Jm>R^iXvSDk89BY#BH6T>9kevaDct2)0X_Kn@@ z-Y8Gz6KPF^)Y<)Rk#Sspq@1b&Xe7QVsp^+fm**y{ZM-gp-BTwNw&=^FU1}-rO9uvE zdm^I|`-BwYPts18x*V%zq?p7O0Ldc~ej^Nx9D`j!HCxUz-V76Os^Kihf{<%);RQ`S zBqQ`UO;n3y0xhQeXo;M&YSQo^f9yaVxk5rG!rcF0E>6j24r$S@z6t4`oPw#yq_)e< zAI!5612w!0t4#Wsh9EW9sNYilZRxa5z!UzWi9rOa&)4&F11 zd_X+$P`XOGqQIG-G1u8DB%-L#y-+?d(%-5U8@IsbBS%s2(Yd;zLJ+TkDs9+9q( z5m}%zA!}R%5}RO}Z!7Aa z)bdD|1j;+tYQ1LNwzK0D3HS&!-P6Z-Rj6%O`g%j^?}lY^Oum&)chvfzIq(BNc-gL$ zhIMAEeOjBbmJkAxW^Z4Ot7AH4WyK$?Erw~|$kIgFoR-%ysoIt9q4U-fGR>Ox8t5m^ zGlk$M@YXWS${u8~PH;b;lh8{vhQx)r4n}-_U>U$!ZMQ(vPA-}EK ztog%6RtgJpu|^ep3mO{Ny@3HU-|_3?x1OGrX8BdbQndTg15B*t$sXlYj+TVoMwXfd zyf%oYqmo1feLfA$lA@!dUy=zZNotXxInBc8x1a*X1A~H~$a$rpXt{Rx&Fiepz)i*0 zss2XyzSr)%y69)Ex)g$npt6vPu$=;~C{ky(t=%|dS-ngW)ae0_T^}0dS|Y=$_z8xI z!?o443|u~*Rrka1nsi&dm5ZvMB+wfrqIFA?c9Ttrqbw^k;{)&dcwyafrK-g?W!tb_ zsfn8cd#VREMG|4NLM+LuUe?QYL1Tq|LbZ=ZPVZ$4{g)H_*V+g?qY4+r9r7Cp@FyC} z56I+79cd?asqmd4;pYY>G_#-5HajS5xr|)U3M<*dO8M>z?77tG48ls)cWU208DOQh zH;zuM+`DHmyz=|~m49c^5s` zfaT8WYlRXG6I1r3=D3Yvfqh&#sQ@v4hpW<~>fux1<`gfE$MKM?2EZ{oi=GksvR;^e zu2njQF-U)C*<98ooDsxwNj7V=a%uozEprHGNkB1z9z`e1Hx*$^!fhB{;d$z4fr9QE z_ix0qoAZtyXZMeCak{p&Rxeg@w}o}imYjF0?}|mQg5fg8P4f0$=4Bw0)`LdFoZdY!EXT#RUmPd8Txj*HQ;Q&=*VqX89Je_Z=Q}$c5 zH2h;qLYcZ^^D#*LVrx7Vig04*D-=sgqr9`{|Hcf0IsPT@a;hYT>kuU}-Bs#r$GZ8+ zUbK+isDh>}86DR08!hKOrVUuV-4v+WNqBrqxkbntf*8mMWwBGwu9hFJb6U}RZ8z9h zX`iJ(za>yKm9XYYE?QgD%-;4BHWgDS#m;vDTxS+#U&4%hYE z(=_tpEpo2>jGN(G-x#z&zSz^&?3>>)%Wx?^lsd%|VuwuFVPL3H*(3P|Dz zRi|R!Ue~5ha)iOPTKXKQZqkBfjFb8AX;^#Aa=GmcYL^{80*kJS5n;{@z0O1)5QJaO z_DR=ZK#NSEP^R2i={lnbkfkk{ok)UbLPeYeco+1&^`uh8Rlv63+ZEe&>r_{%_Ma4^C z+D2F`Adz{s>hh{)%37_E%!RUL?OixmbK1)toOH+O^4X~r=}MjQ@`c27j|<8Ws+h#ix&3NZawDyO7FgkbL zMS}20y>~uk9Sq3uF8G$7y+(wWjoPe-uMO2naJL1a;gMf2SsC!s+Z!8=3Y&k{KH93fD?MzLy9qX2L|BL+rwTL-;4D4c`oD3 zs*fhJ1V~a+=jRu$DrK~@Pjn>O5J{7&U5MdGE^3E*{@bW&JERb06CKpahoZjn^6j`o zuXF8PQ-*q|F%S^V^TADYkl41OK%i*kwMwg8ybxO8!OWxvDYV`WSvYz*UMQBT>_3d%F~`Yy!Z`V2O^?G5}UT zjEr*;5ub8Zf7QG~FG~tcSie%dp#a1QQ6oznB*DoTCl%_*ny7fAEf;Gzw37Rub=K0o z#N)-eIU$rO;VR$EbLN`{A32!ts?*(g&b!}oYQa@=>L3Y-ZJ8Fil@q1Svj%|4FeSjf zjf{?@lQl>m-{62Vod+xer1oMJkm{uNwH0SS-X8jv&Q;vK*@dg(Y~@JF&#D^JiS9Z> zz@8SqCZ?@bN^pDMF2-+7>n<#Xeu#6-04uZTgEk|kSDnI*!ru^um*hktg?PT(( z=)elrrEKW8rr=@Bd-JtkdMQBGbM}`vQsB*Smze7>-9xdQ3#B5uPJ@!>e9L#9IwAmZ zP?NobnT(4HQg{b@gJ*wyVNUs4d zlrjUiq=fGATbBH+DhXu+ zE0INUMQtE3wmpX#7L9}!rKKrw61WN!P41Jn+%^j5s^zy!1Vainm^6xd0wBq}a+Z5V zjoN|AiC(*6^%v0%1h>L@mFaVkwR-J>8Kh$NT*ka%Gbmn%V^fQ!kkdt{Pba|yo{7lL$S3d zz{%#=^aE4EZ2XXsW4SBsX^|k-?T`f*f2m6Jp`-1 zRjw`}(o0Ix*^y!h9@?8%%i<4Hc$#B07f;Ld{}CT}7GQDLLPrHQyC0@`*Fu2e2Rv;e z|U}Sb#-L zvF=}icDf6H%3qcwHrli|x(*o24k*hRn-+H6jWCMvwW!g@k(2I0f}_EzYPvb@8zpaSzMG=64^L zyBXH(c9nbsTfF;4p4EoO_t9J^5gz^N00h&#{T@hDALU#NE{nRH8oAJCxQsLxLO z$d_3S?>MA7gTV4op_61j;`G#oRtn_d)HEDC%Xp57rIE1B-d*TEaMpD9 zCB+o2#{Wxm7cZVqMNo$&Q{pK=LREv4QJ%4;_!1KDDm#m^Q{dYCup_IgwXWfCe$2!l z_h@h)PjdSW)s0o5$>&Xc{vu)E_iYyO#TOg4+A|rK5g*A{k|&n0K-%*)WD^Q#0DZeP&_#gR60}g(7`xMf}ktWfRGue9A;$ZAwy- za${NCW3H*bokG7Fd5+1M6?4z?5_C5?MUv_2&khs%r>}t_){GiGy=4El?%0<4;1=&- z=scA-01LWu?9mexYZ?*EUSYZ`gq0&S+@_4b7?{LN`nLTQrpU7QkKVW9+braHA_HXm zn0|t;w>;Vl7%+z;8EQ`vSj#h{YK=^>rZ84eOmRV2%}$Jfsnb9vd(ddW#qLP}uRzvH z|MAn#6MFM|pZ1cp&kIrxwZbF2lb20ZVvzhkPELlWdgTxvB zS9>f3%!qfY4w&St(qs>|SmdtQ(wmQIeuqrKrX|ldN4$K8X6?#LkGMN3D{Q0|Ah&n+3}u7E9^FI_pxuhJxc8aofRAR}4=-;9uCXU2W3!OYFwV#M zQwQq@V>ASxd8&=hRcq&1R$EoM?CZTQh;uZD2PFI7+FP-6f4=Dv6;X5!Q5T;f+0QSs z{?fM0@_3se-=K(eMLbH)lGR4Kw8dQbH5mslV2pTw#0rTFpVcV*aem|FpjgX|mM83J zZf|+p$V<`xa&#+Jr;>|qbORY-idSsOznNtJr$qG@Cuf%5&34QA#v&98rAf%4=`E;2 z#>B9bz{Ec|IvQ*6%A~NMF(&?tB8y4m>&|Humh?yKHUlSR2>>(w_9J^+D)X+MYRXcK zEOEFS6o)0JGEys^Tp1w#vqHJ0g`~7b>)N#XWJ1`p$^X+|_KR$?j+~D?Vb}}{*}};B zqdw$d;mff54R;%mWdE^mr)Q`5=9zU&;Pqd^A&BRX&;npiV{AuLlyXvb^CVC)1CMktjjsI9{xT}N?D-1axF7B#N(dE%280-2{@IE zS-0vJNUza$6Y6Zux@zm2bI;~BdQDj8F5S}EMXHHFZ{TEYSN(_bB!;CsUGE&zo zRlkQGr%|s2A&9B8Mf)#eFRh=a9hOZQbOsk-F<@`>AzL%yaa)AS1@8SKJN6cVrgN^< zKXv64vB)@2(>b+`sZI??mVPsVI}DQU{WDb z>G3&V3f6R;fPwNyJ0rFK$XRRv+N}{CwYMmc%`yL5D+Fc1-Je$MsjF@fL_K>jXxt|2 zM;_cN(KM2bOv@5c_4t#^)^Qhw;6n7!o^;Uy|ddrql`bzfmXBOh2ha&P<0$mZc z+qxN#$ko;ZsE408v3zkf5et$|3=gY=iajNklp!fQ`aI6fUWv}}9Ukg)V|h}PX}u6aUS;QJ`p=%5`} z*(8X<64BMgsVeJwo~^0LuP|E!2U~~TEIow8h;{cMt3F+$9Z`w6=5k}^U~xPYi_bJ_-Pse?vSI}%mftp>L^n!=joJ;Vnd z@1VQIuh&+$qbx3@CdzAB+Hj@2jBxOw!oS*#Gz5H3-=SZcP>IcjE0fHcw)xQ6vx`yR z?@8c0i$sQ{@b3(7bDP7^0_{48JJ|mtR^l4$#SN6TS-qWyHu8mLb#Ji`rTMb_i1?vc zHnGyzG@au{WwV)J*D)IGHDU&KPhKCHDGb_#GJBnfs7>atXsix0;kf@ULA4nT_&Q!- za@L=ckXAkQtyt({x|Uf0YCW{|9jnjPbmYk0qA<*~5%$d72+R#wN}xxrtG-MWrcKhz z)Dw=v!)I%`k$aSqKv4Q8nYSOH(278E>D$r-mW=9{$dPvk)~s7jzV$p6O5Oe=D4AUL zw&2nA-%U_X!K;Y#bBB_2KN~jRflQOpt@VAHZz|5K2GsQWemHa0-kAI**0BWpeLb-K zS2hma3koUX&X|HyZTq1)ca}XMN6N zIJ;Qd&^(fgoSP=+82P}ZUIv;&e6JlX*<(_>{{<$Fc*f|(oK-0LqF%$`*Qs+Ghq_(! zmOXn6Xp1%}Goc^CpS>Jehor|=9CUXYDwqU>Ky~8rMnlL@!!<8iEz#;>t)BcuC0c`mznt-~JiN?0Y@=aInx64}F4L&yf0s9@l!_-$ml-8oGj-^1qT;O*WiOMRhY1 zWWWl`4n34(aHrVx$0+`mkA_LP9L9ao;b=ukq`?IC@|OgHPoj`+`I82s)--1Zh9 z^79B{kVh}*onsMaY_VVWztUu>$#G{X*a}%&P?Te`G zIhb!FC=DJJSFm<1VtKRQxWl0J@wK2S2NJ(6sm6NK*QgZp;y_PeK#GWs7FKG@<>-2B z(Z71#vDOfO!udBo!MwiMeG#Ia7C_W=O z9wCBZe9Ex2dzA*K7k!~WJvEyhezWK^=iTPYbiVFqgV|L7F6mS)oqCk(RFV6g+x<8v z*XO8yJ4iQ=x=Roy2^(@-p>2n*gaEMC9UmL>d{TWo=BC7_h3U}zvrE~`A18losAVBc zW7l>irv8C06033iagt25Ls%9;&Smc?8rH2JOw$HOZ?y^Z4baLm+Npd#q_Bb)>y}t= zEwkL~Qyhmov(^rrxc$>hVYFg|L@!GiM#q=}?>SL-BRgy_Js8Xmy~z-7x5%T<+Se90|k zuT9KCtzPf{Q1zB!ab)4v_9PGp4gng6;10nZg1fuByE{xsaCe6wjk`NR8h3YsTX6TU zn3;3V_x{X9c31VT>bnFwf5|2y-7}tbS1OZkCn%_m z*>5gm+6ffGPlx(^CZ+I&W;v4WcgJq-7r3!rA}gDrjsJb+*$V3H*Iennu19Z5Vs!%y zt=LXR9*<-)hQShGtlQ6fc@cHIL6n6H{Y!3r)C`_XW@l)^!zqd-jn6HU=Pv)y<8ZY;YwB^u zg`p3Z8~3o|M-Y*Elq$f70k;{g9xTV@YH#J;Ri*z%X=r@4^YG)h1}mE{3%b2XEWQ-- zS-qR}$ocLOs~yYzkpO*(r~)xI=1mNZr)05><*&T(2Dh+9&|R<(lP_;q?l}xKMhG7G zJKp|D;jnz4{d_I&!(_<3T+v2&fJKeneOtwF8@F@oodTbYF7s@btAavdU~?*C+u7E* zKH$8pyCCiRTCL?@F9_`U4D9~*37s-&WV$^#_&z{F*rf5Ut>0szd~+t)4KpasboH%^ehENqf~}(~*BFiOzMTFLxSDH{kd`PoltL zA22^Z`Fw%!a+dxIA$ocEpgq=1y1s7shv{q75j9BeAxJWNbYnYx{1{pQ4^U$Sx59B+ z4(L+?x;OpT#MQ=axnVVkm%;1&>&>YTk9iX7=j>mO*+yDYLW}ix)|Owd=n+OMl$KbW z0QWA2D!WU~u$jKjOhDN>0%WDgQ`DZ{(H=x7&GYZz;O+Tz<>S;g5vl}PeLq6^Se}$) z0oVKc^+BFLn?3NfL#w-ffFpeYG2X)u?X|;)Xvj1K^D4O|hZ5;?=Zh0^zka@wGX9V6 zp?R!ji9xYUw~Gxo-!|7F(YE=Itf}^$X^^hq(5Wx=;p%RCj>xenKIxuq^GQd3o)^mS zeV+kc1E5Scz+9Kh`c*hRe@14x9Vm4p4Vxr;A0Q6|ot&0`Js2Cd>|b@GZM74PKyCu3 zIj!69ughfhnmfy=elI$)A!-=QCO>febo6Jn7=7Bbj}CAqeP+gZ-?bhLJQsDXN8hl7 z+e+Sjb0-I*jHEOV%UaDsZ>4s6K|=*&jkw;OH>k48s#599vrO35`F}vCp+Wb`>Y#Y%jX4aIV^$4k^6PPh6x*8-zJZq zNF!DXyNKG>0;(f{la};`B6T(V>NVGC=wYE?c$EACXgSDNsa;AUXkv&2auu()cBrnX zW;3&ki~{trH|V1J52dx05*5GwUR4~=t8d_iN4I|Lv`&N9udzQ7g7TTge|1~#i1Vyhy+L2eBB3XR;#Ak$D&=OxlP|`jPaFMhdW6(FgEZM}jn$Vamx}om;dRBp*uRABb7mWO!LsI0y8v!O2JD^NcLDwqJ?|Mo!hf4u8|DD4}S{ojB2 zdNxe59&M;#YikEgJ`*dIexvS8g?p4wboe*o9PMvk&sdntdQyA6LA3&66Bh$zF^Oj~!&d?~#fIQ;2xJpgVo6wXA z8&|WB80-ybTVC3x@vxOMTDTWN&G}}n7rn_XNm{)28AP()7Z}w$1Q=p1j#Bi;!mk)w zN6yWsU#xgs?~y%`j(a7%bImOSK>CmCwe z-s_aHb3yM3vvBLVbXp|sgkeJoZ{L_!d8)!<&NN!925A(1rB$8i^-O6iOZiWFD`J`% zu;w#UYu905aCJz(xm<}2GFC;r?z&*jyfDzr%<4~?(gF3eu`NR$?Nrz~P#s=t#Fi!k zKs2A)xk}Aix6nkLmEq*axQc8j$90K?=OojjNhxQi_`_*(!6dOqRt*U5*@|l`5)(b8 zK!Xo@hfO(gbC`>5nf}%xEq&P2Ya?OE?P7#{%8F<3%zbL$NK&K1si%xE%btJiGM`&C zVuSlVk*+d0KKSnpF{`O(u1d4kbKW0)yYM=UnS`?55AO%rGEHE)w|*wwNLnD5vstbv z)n>Ner^FS2NHXk!=JD_3#lPujV-Tzwj>g|xS`=?Wc3AQ({^`t!&7Lc9Hmk0F|yTpoT>&0g-6Sk9&ByO zI+6L3wSys8=R2&)y$-6QZUvQN0(ei|TZ!3%QD0M`7SqNt)&TMecy|u}# zYusjevFLotC&}=0T&*^T(`KRYN+Js@CXp#4((n| z*uXeYJZqiDOBBM6tSq#^)QpjV9r|psz2&(mwrm_occ#u`b3{~$oZbwEcXD8ui#K|% z1jDXmactE~$WocbP;MT$-#!l5{bVQosvKU?6$hgAtjK{}lBv7Lq30Ki7NRzTrkl&n zc)WXJH~T;&nf3t3{5xG!UbmqDRN34H;98N7iM4r_1Y>aui;{Z#ERk*aP%LEhX292n zRnnf7x?&k^7~+wwfS@Az>iBL_)~m94=%`O`4o;J5MR-&!s(r|pvQ5kvW%JGGa7um( zVmDendAko%b!3GFn}YZ&INRgusCpg?(m>?CBFXj2_}4#9Ak0umnwTBeHhEzXJdUOTU22W=KrKoB@Ev}A64 zegZ#HYnS2+$>}lD=N=EN_H`uY|15A@&T9qwbb`NQS z;mwf3g_L8XXp?2zhRorUYF0_4Q_b*p_~k5J9xjg?WXQE^TbqYSSTug;ypceT(jNKr zE#afvAEU~ryG))Yf*4U@Md*I_Q$nsiIhH6L=bVz9Pla2s`98N*?x%WmHh)4ZZAjOy zX+TOZVO!v~jC@!(+aSi9sZ)F|KUicGpQ1{}?YS=BKe17(t>zq$DA^RX4S_MR=8L7g ze$feq!7~;~5Z=ANwNe>c#3oeYy8?Zca79~m@i|?;cR#?iRpKirT1GdMf4d*uZu1-P zt7l%llFX}S4T_(#Cu*iJThMpQ&CS70R)z&%9(UvLa1fkp=DU*)xX>B~t;OMM-m{ z%;8ZL}uuEvHuzM%0xn4vZ(oU?iE!f^zQf=2q;K(Vq{Ov_j4u03if%bsK@E z8)x*44Wp2wJEWYo!Yh1sCxI1f959)YD z&3@W@mPwQ&Gz!zU9EjenPCNEAcwcVItS8`ERI}>6FJkA*0de$OQI|f7*ANo-<+rn9 z@87E|va+RIJZ%QH3B|xsRuE((-^?3?DlY9!ASh21S>XFy!cCV04EKWT*&arlpI43Q z&gRSOwJQqFkDmF91DI`+3p`5($={)AqY%cl-5W2VEUu`6_SYChO{^WsO15jXczJPU zCdS$NyQ>d@&7t_q;C>(=>i*dOmDZVjRf|6?ubB14Tg%4Ap6t)PwpQ6eF0-m~c$Okp z97y`y=Zls1@27yNBnIa1`oQXn8b?2G6js3B)?;B|O`@b5?tU7tG%M-q6;c2q6|37X zFNp30Lw?FghN~Vl6)k3YL|HH`>FN^QO2k<+vB(G$ofsa#(?9Uy&h|g|k5gG4zGQ*) z3)Lsp;_s6{P_dF)5!XI%*mi(UI2Ri3g9jSPyb+sUl+{B_oyO;RqnTx`vY)zEl)#s{ ziv+c=Pvy>x&F+a>S}{>c4wWYT5g?^$`D`E3ALL~w5KR~7 zuro5LOaUx2R+=q<1LFYagp(yHZXFl|I^}7mSt`l+A`e_}y+YOarx`J?g7Ljt|9b-c z@s#lr+XfNUw(QE30pNmrc%T@uut`SwdvwIt%8+zy= zo_h&?rzOhGn7__ts8mJYZrtsmvEsKJ*EcHucJ0ZW3-cZib`7xG7*i-5~*1dT|3RWTPktbKWdlQLPrvD+e&0sW{0+M(qu|! zEI;~g4l2=!eEfr!@fk@fAGv4PJnRA7G(N5`mHD?FywMZp zt!KVWR4<5aQYR^3Io+Bh@jx|!^!5w-@Xf1z;(~RM@!I_B2b(mL2YhXJ;)n%2O(9M)lg@*8|M-^w);* z>+6zkNi3{NK67vcDbqGOA3A<+4g|*`BQ$qRO90zPMUYy|seh`eRYSd7V{y?~uiI?* zWF3Oq@636bfotCpRcUKK4n)l?M!Xpt>A7R#;+mk~$2nkWb|9`w$o+X#rcjHstWzlC zWwvm{NGQwWPt91H9@1o69y_0f_iX?kib=7So_$HsCoqC??&xjuj`T$&?`&OWY)Wr+ zbMq;pjZ2fG3Nl+m_EKem5OvcIt))zL-&oAx8q={N+ng5)p)j5zxtDwM zX23cYEK;_6|!p^AzWxD8ph|(V{Q3dZ-XNCs$Gw zHHE4djsg;iD&I6R%k$zmu+61jdaNWf%W zxC@3BVw(~0hT@Bo8?)n4D>vHU^8EvK;_M0w$D1XurKP1pfL2fV>tKlLHU9V76v_c7^H9Hb~om z70E>f(&fz5Nyj;^rXp&ARfA}0BRb{$=DX2`!#aCn@P<2}aJ2CJwwdM=g;#2+XjQ5M zy!Y_eO;6-5ni83i8`>z^mtM(+GF0v@62Fnga$b8%gqNtgZ0Rp+EEsbbPF|U5AoI~J zNh>HgdE`#8D2j=x;-3@&tMpDQQZz798)u6sfUxJCXsYz_0A#TH9^3^Peb#zD0L@1U zg;S450g^y%IRSAswBd@tJgtmYBcvR#tVh-gz|4OycZM%VGM!@K3n0C0aF`?wXxnsJ z@>-XVmi(tz#WPjxxd765NJq~sYFnM zVoClGXS?fxUFnR?_0vvMscv z(F_)+r;OAEu71V%gTcQ0OYx{=?f~m@2C2xTnHZZXy4nh~U8&b!5a)4N;I7zD7|&!!{#RWX5=d(|r*cuY#dH=7H{pfNQ<#vM;u zrNdv3y0N%5lg!5*K#fI{wKbVuX#q5n{`~}GJylZD+-d*)3U5!+H15@wT3-1`(#Iq-gZK$K_j%jM8^28%HhNfkX$x@OPJcgTr+(qO@1%qxK}*1(7p@X)G7% z`4`z9se+>2%5x*W$_1%5HJ!quvh2Z;Y7uX;4kcl9k|Er4-g@anADUrSdj@r3DXNbx zp&cq?9Hz`Xrv=zmnq?Xz0f86HqoU^V9|l$Hc^jAvfkR(n(QP{BM$BLpYEnNJaP*&c z;>p+we%1ziGbt4;maX|_tl(OrHugviske+PrGglAsa+XbaQ06UaCP->{Jdj+jJbJv z!xl-qnPmc>2KMgkx=CZD8r~yNDx()E-8Db=-i`6PE6!t?d+n&s-#dc-cl!qr0slCpCEVs4A`ivGoA#3?K z!L+5ueN2+si|62k@LtNqC-(0hO7?tO$#WM-m_r z_9*HGj5E_?nP@A_ISv4M&*b(`Lw-3upj{+5b_qn$Ek{1L%oi*6 z0Y%H-x^HybaNr)5IzYGeFdJn-CVxi2L!U<3IYDg=l-nu4`Fm!~8RfQ5S*ZUr;(1!K zKprs=Nq6Y(5ok`n<+2#W^u;lkm>h5I)hc&+n|f3Q<|Zhu*`srJ>>rgCa@MB(2b+>Z zwMSxr)hF^I5dK3y{c!#tqk;bcLjNB+^nc2z|CUBS+&`cjA;EX?n0RQY6uCz>6$HG} z^a$skeDq6me2%~w>YrQ*{bZ{0*~7gK&1o<+hVGwYXx)78`NUef*PHud<%9-y)Yid# z9o5$RuIT9>uae@qQfZ0jxBqrtn7(3tD`aqb6>VAgE%!Ee5jzWU2pBY5fbV8dn>zmb zS;%ek>B(1nyqXIN7+^~KyA_Zdc-U72 z4aL$-#`84Gzn#Qk#;dDHiFZz&1$}cHk`A*t^m6vWYfe1~yEIc}rFwi2W}U-`+D=sc z>N_5+$%BiqTAkv%k!d%=MRgsBdx*EMj1+e0S#wXuI1K^y5YYm$Mhl%6pn3qVW07!n zWzvMFFNi&#V{xql2jqrvdnI0*RkeQ0aG|ypgIgt~_%I4y#7G!cIoNh#Q;T7kHy4cD z|L%|aA0WPd%JSd6@DpACFRXLg0-;7q^LXR@P>3Yc<{6`KDAQTnFw_jl#3yE=-c4zk0OZgc4Sy}CeuF;Xv|)q+-tt(Rlv0v+;VD>xN_Xyf9HMD zBzjJKBCePG901iKFdO^qjh##OKwd4Hr!M?YW{_*I!~T|06$dE2SbRVYNWsvX(qG-1 zfomp=c39()HB2;xcHD|E;jxK+)|I#7fhm%8>VG*?j3OeMT!%W9(0YKC@NvwH?~eu! z$7Bb+LmQQRbiZ@Wg3s*{q&^51nC$!LC%Hu9Q?2K5p2@AtPX&X{`Q-qWlFeSV ztc`Yg_*(7(-HKaO#$0vlv4wFI9uF3yQD7EfP!M~YR6HEg3kmC)#}+Z6_|rn$wc&wZ zpiyfds;(~|h%qI2vDl!va#irI*mWcW9K1$DKqm(?Ek$I?)TP6!-L>cc!WYFgq&$FQ zN7;SwSLbW?D-rUCVSO`~IOj)1>Fa#%(#C*L7Iw6lWg%rp*5J8vv&4k?xIpJcuMvIj z(_nGsSjxHZ3Wc1AP6X~+Lc8wW{yg}x6&c>hAATm5@4^G;S>oo9?3O!(4R^^o3P=k1s&yd+OQ!yd8*s>Gv({x4u0%oXIs&8{d@1=)K29__#eO=8_H$F`t;Xz zwgYIs_rdX?2@0qL8jea4Fa)X7zB2b=5ACg#ET#~+ZycE7$*}YORJy0^8W;8#Cu5(M z|LKDDj3m_zHV?|oL#U8PuH8>R&UX>47}g!rX&!rKVW*o}7PVOYe0|xSQ|mjg$GeMT2Y-B(KFjC+WOyWZ z^Kush{1Ve@BZ6RHF!pAiq@pd`PYFvw3%7Vg{mbqAdg<3`~Vnv{?jiFst_PHb9j6Z0mTb|bz);aYMAa$|zn3z}}zLRWX z5Wj2FN_&{RTRe%Va|O}-X=~iRJ62C1`hAbR|2PRBDf?-M(1rqObR^2Cdq-;Z43luhQO z2dm zA(V7+L0Jq*Gc6+Yd`zzPWer{huTHdLs9nr$(1{5t~zwS;W1ZG(Gav>AJ}LA zlkl#zoG2zq8?ZIf&P(3VTZyviU6KW2#C{KKGIH*w%1b)V4%nmzIk+IXe*y}B<|hbA zJ{O&}H-14+j6hT=kb?lgd8S1Ju$htGAF2K*X!T*2d;X~fqJWCv+s1L3{m)6%(oQLa z??0)YIp;>!V#9D-pLJG8;;hS>bVQQ=7X>u@y@NHM*Q|U4H^S9#)b9VHfOuEb`ZQ^w z^h{WPSMO=diARli*_TUfZ1*vyuu2CTpA^?guYSw1!};d!#(dN&Ia1vsFSoj&ue1~V zmQfYB=>L*<-=Bp>bQIFn=+%TZcviJMVh8IFS)klr(PJm_5EQnBt0Y?b*{KvwNqkNQ z22C-ojF4^x%B*;V-br8~792QIw?qW|jWET>6(aA$`@(T+VBiuvwZUBWaz|Z@+=_bY z|1C5-^|wAFum3+W2-iB3k5K-1KFskQ8Kah+B==VcLp@{F#}l?g7y8E5Em^DwZiOp) z{kcatDi1`4lNa^aIKC9KIQPSN`{h^0>uIV#2Imv;tae*6nI7AvCEGevn=euawAD9X zm1*Zb`WgP)XvYJf%+8$BUqoB3JdTVwmREhaknvk)$O#w!ye!j#cb0K$$EL*J3lE!n z0HS;T8K;9|p~;KE8EM`bLMxm1g886pyqHzHEuC4hcIYwohrMs&YqsY){g&8|B_TLI zM;ra-53d1}@4U5Rv67A=LlYULCv~GTN6qjFo1F|Q(dO8b${N`z-A$MWVZQ?)^_rNz z%S%;6|A#ns)0eXJ3ULo}0r?0l2a-CpG^oWAdvTriC#Xi^wlXurN2faRrHs>&P5FrP zN`e@eBJtGf3wG6dMongWShx#oE{&ndol8eL%U|ZN+`H)l@e#8bUo?UnDSBZ!T&EupZn)154{#zHwH=n6h{clIz+H)eJsIFP$P?~el3S(S4!2xL zotI4<&9)R?#lm3_8jGxL*IB>>2_^7$i=oJXbpG{E+|Ui*xKBGDSt9#y3%<;HaUW`L z-wc6@FR>??)D@`&Kk&1DW$j5N-e47Tg+GRH7WSV;%Kd)1z~M1}#qH@~l%%EGos3G{ z*Fgr8FNe<}>~1DZNto`tg7#C;)TerBR6%oW8=e0hixoh*CAV-#aG z!hSC4F~iY_ypo36uSj3-a~fRFgVhu#l*?l!0mU7GWXJJeFok6I8o61eVm!^8@j+Wa z>`)f??tCv1Z_&nD-bj?d-OYCpyb$Z?ES_2k>^g~$ z!PhAz0HazPT|^zx=hn~c^zrTwE46Q76+^obg0wMbWOOTjnuw<^~uQ$%S(KfZEnJj_M*!wZ%UJQWgwET_Wtq{x=!s z`v2Np#Ox90?+( zvy45zcXmNkZ!j&VebamC*He_q;LB6yog>%YU2%gi{v^^jStQkPn_~yn&uVqdi3Vjr zQ~~fY-qJ#gtuonpgw0aJ*rrWT>X)K2%ZFtl6l*Jyu{*lBz2cw$xh1_le7hq?=ScM> z2Lt4*j2bYTO$A4uZ!0}DC~&-$7r&S?$dTLI8P#9mXZ;AD85_=oFh$E7n;Szqd|Nfn z=3mF$gT&869L)P0tsUtTIW~1y<}yPe-$l9^G;0P|g#A9t#+qyJyTTq2{oXdR&4dJZ zRtryhUGLPIkLgr^hY6Y@If?j3W_jFf7LMt4Gtttfvy2*6f3PM-tDn#{K*&W;R!es; z4s~_BGi84tmt!wHP|YP1P@Ng2uTgwzrQu|mO_T5xFXf2MjCcP{o&hl?1P)@qKh`vK z724-vj&Xjs{jTix)OfD|>~p;*VCd2txczsY&pjpI=1%-$>53-%Xp97;gGR}{gy9Di z-c_hFqE1j~drK5tcocQgk75)D1IVE;hHnb`b3fX@@A62%^zziKq`an&FStPB|8(Pr z(c)oae^>4eYTaVW&K=GyV!qJFH(t`=InE>l91+#VHXl&gWhOSvT=+j!yK6p2GQB7< zmJjVw`9JMZXWT@SGBSn_6%fLBX;F{Kk&=>wN@v#%7?51HUG^Spd=B2PFt0U*!CDm4 zk@sDaz+8f;-3aZ8{t(~C^5Mxc#-}ECi7Fert%M{f`Dp;~`$-9k$tefu4upy7KcAM$ zZLSzW47UDlTc2{UjIxo+d-%6}yY(-cWmuE^J%!mRS=%VQN0&-sBIMaXJ)_yMqk)G{ z3w-j~(v>WO(Y}3iAMnoXZGHzf6Er@(CH@}aH9AiHZ{qLyXC%{RlUQQNCEk@!8L7S+;k(pb*9uy#|!^!RDWAMeVMeOk<@ri z8>gT@yLL-XzD$~&->5Iz%tp4QzJe2`g8@)+Q^)4z;)A}FX(rb_-W>jIp=9%7_t!tB z9e11Z_))l?kqqWVtr_XN;rAEmsdP*UOJg|{gm?tonuj5^cVXvt70{T|(~SFHl78!V%#-2)ugs;#;N?ioN|9`4!Bvr@aW5=4pMhYwWK37xA&x2 z)|xRtx6ZD}+uaQgY6s}!{!_>KCu$JN>!ogGdf@2TNh`?MI)@OgMPOr)nNG_AQ^+@UvgW_lrb9oZx?bSo%?h}D7M$-s859*02 z%@xUCo-9}Dku~76YpzX8PcJ(&JEue&m{d`%Tk6e| zhS^!${4DJ9!H@2rWTulFF_Wq15_>D|M~B+^)(o9ld{?yWF9f-jrQ$6Or0O#Kse*n;N4V=UTG;uG zwc^doD+@H^ITWaW@2DUKpFi>rqg?mjkLlq1KyS4r zSpm7MH?Pp*VZU#YC!^UKL7Qx_yLvyRaMUN0A=Q}STv!Di8o>r_fKTi9>lxBb1ca{W zDXoq0+3xU&RndYqmhaUXPqSer|I`y_oc-%DZ-v8wl3#0X_3sbf<{82rQvK@`1nKhh zA%{e+OUSNsXlG}ixi6Epu*pFegr_Qf0)0;R(YMtf0u)P{8wTF*snqkitU>)(q+R`y z(HWE{UI1th0dvz#xC|{<%<9zB);!|%=xOb23E{C$S#@D(Oj~^RP0#!Rw z`4hLCrbUdY)7;5aKoKT@D7T$ELD~9LQuwFikOS$#I~*(hl8?}*VR}+q%v9xA!)R^R z58NdZ2*c{-98W!UarfMjF!eYc!mE)#n%A~1>lP$sK1cpYACib>5TnDSn~1UtBbhF? z`2WXGw}}Vgfb_q@|4)hbeHZMvH_7DxRA>K}b$u%d|L;HkMP$GJ3!KV)388EJJ8p=h zwH_Y&75ks;pj}e6J#Y7`hWQwDeVRFCbl|K5U+i!enIJB-TU${%Hg+~Gq;oeQAT|4p z#}Br1#<7*;&O5K1Cg5ndV&Grz*H_lxhR)xAci*;hz0D+a08qhQyAf~R%pha`^{Ea0 zfv3AV#V>`|ROc+3^k4aS+q$niP`UO1*KcBi5XO}%BBBMDs~;aifGW+3rl^hrs^AbR zE~dP&I*O@fHPA#hQLmrY)ED`YMo+_)9ChMWW7I7^4HK*|w<0Yp$xUo;qa-m>np>C` z_2ajqwuA}(b!K~ld2g}JMU3X6l)5letX0E6zRglA&p@c^FfA3paO$N=u>fv#N&kle^O&{y)x-d9yCUmkyi?o)S{xG++ z;!46JsVxTd?OqOXqbq^|cNA-aenxeG#ozXDi$&=msm>@UUQa^Z z9l;VUC(ly2yV&YTI4L_fR^2H!BCEm5Xx7G%Ns`soRXO8R9&pNbwv@;9_qd*-g}c*> zt5Rb5UV4TqbwGDNr6g8eN$|}|GVLKmE+4SAvn(m#cd}YKD59O25tjl&qis#Z0wW8M zkil)DQ{s+k1^gGKeJ#y`6#Iuqn`TZ0zoNA%>uDZg4s=r%tUL&is+#-XrL|0 zJd}=!S7o_RYA6!!iK~V9U`THkX~Co^9IiOyb_E#gSql7woN;K$Ow1Soo~d&~__Dqx zNmO+yp9%`1t0V$J)=uklMgR~rbZS}MTpHfpSg%2Ou;r~q7k34C(|!hntbxBf^6{0c z_7*L68^vQjP)W_r7-h*!^)GA501kkOSB-TV)bmDN+FEo8*41Ld;W=?2JaC&7;NFMK zZSny)K09;n%q2X*q>*BYSf(sZ(R~Lf?-Pct>Hz=%U87hkuBicBliIvdh*G$~jGCpD zV}5zu62xV+fu^)1a;$Ng1`Mj5t;fa?D%J@HT>eP4%KS^qaVIx{;bwqGZC_(u3TG~) zak(U}AnVkcsWl_3BB*hxglNu4>mZ#{mIuY<&l!zR=XCBrrGTegL@6YFVQc;0C}vBbE!?5T(qMt`;tUAj;+< z*z(<@uAuHUY>y&Y_q^P#F=&4GE>QMPzs8{JDwVWTa_A_*9T}_`{UwFVsqKj)72o%( zswy{oDTu}Vgm+hRPp`&{WzzJTSD;3fW}414b=QHVEdW1zxJ-q1dMuo9CP~b&!K0&_ z-Tg+!Mdl`6O?6cFvVO`wgFaF6rBReuHkLbKtY71zKxXQE4yC(z@Wym&g$q8tO{Vmg zTR}m0gbST-cO;86(UR~oxuTo0GW7S68eKw^`V$59vE!`^&1R-1-;r!;L3P&&2y<3* z0f&x&I^N|3;)NGNWxKs2$+9cisqgsoM^gumA?NcMOliJT$Gz~M{40~5EgyQ_aBowYtdVts7|E)(jWMLCE$s-;RO?0glv9Tj@jZ$#?A3|woTkhnW=%upgEK=n;sirMYU^&PVNmfq2Ov|%s$eY(K zvMSkKOcl1WyhT@8kn%e-wwamI^C6UU)@4^O6Sq|hmXBw!COsS!1qR}ihFK0E?53n0 z4`5Q#;97e+zElA=;CFlV3K%axpsXy%TYpTdWDj3MJ*qT32g}82EsK>6n*t*VgFsUn z*nMzp>CX`H1h#>*CJzel~)!Iz&Pa?*`?>JCS)E93Ec*T@ldCtcxtL5pB z+kfh$Ns%}z)Dm#}M{fps0cbN|d)-A_`|V<34_~D=)A6b>Cntm_xJ;$N7d8r2e^xN= z5h=3G?%q@Z4i`Dtg~ro^0!zDN1#%j2r|iPmivm)Y1z#v+UN_)V+(X@ga1((M`^ zS&Z;U9O#cf#5LZ<#h|XipSItwd7s+e)0F%?J86zp6^#X}xz`qg?PkK(wW{+yLW>3l zLMTZ$M@J-BDIPZoY0KTk(EG$oHo9w4dAxJQsRSD;YnS?d+XDYUEup;_g)ojjfG^8VB2PUF*q25>-)MbsYz z%>Z6W7^=vG#l$dIR(Bh-Oe!odm{I^5WDA9b+y8&bR`i*!0|le8G5 z)6&Y}p^@(+<|qK0*e^)$&p!v)ENaz>e3&#+4l4k)PlbZSty$~$ukdw6!nqhNnJThi z<&1&9JOhFSA4Btwq`6*vut<10&O3m;%~A?>l!GFywlPDNAJkxrdio2iJ`!$n)>tI` zAWiEk6gD|Rbt8(79(P?FEf4a?AhITlTrIY`1WsPVc-FU7+=6t=NHHWGxGh44&^|8J z>P;E3;*|EGLgsQ~;VuR>W)p7#vBZhv4m>@-spVwv+y{D*a4>LIbuOsgW4xo)L!=oI zsye+bmQku)t4dd(K+F9tM;G8iz-HCfd5jN9=0GkaWa(jcZ*yCzLUg8)s$Ws`;MPaB+-5Xoauod3et_3F-8}I4@2>67`vv^NR?RDk>l4Fop2(mdK76L4`uOhK zw<5f(zdGA74Eqj7$=6( zyB&N9g?v=+pM7JeIU^+Xc#NRltFR07nj{G^Q;R9;aK3;49!+Ej897|2(gT-o2j`;} zeRe+m35J1B5usv!#;mavRZ&kbG}a;-@kcUGC@82i5*oCgDeAhKBaxvp!UUlO)hGwG zOr>F-&qPE*QXjeOjEoO4vSQ#)dZDeRjnSI43V#}~Iv0FdQu!OJ)RpAE_7IkddS8f_ z+Wnd{Zz}wATFg&YVjk~--WF-hMiwiZfu4ZWwt9sEyh@V@ z6DBtQSS^~mxCkx2hRGx-mOFL}nXHolH$FfSC$(;e?8)fC&cX)QiwDoZ|MFwU!WF;Y zv%t%bQE&Gi?h3f>6R9P4ef4!DI8yW@OT@YDs-;DgdwS!P-*qHGH@dD`UaQZh*Q+r_ z&@Q)iTd^CCKGzV~yIcP|sE)OEs32n1)3FAXOZ1}gXhDe>wwu>SLEevWf;x&UWZyr0 z;qrztAEWwyL+{M$&@18CTG+tU)oSpUasA|k@uJ<*qp`-9zvREuT3e3mezn@;#N8s= zdQV%fs=zOjH(SNoY1Zn{16^2 z%G#+cVVzp+>Q7P-gOc>%uxQ3;@nTAXC)1O8nVZ8RV8Kz+0ge4e3Z*IFzziHV#%UTV8nu_AI1gBa+?&e<<@6erO)UO|Ul zWuq>uJ2ALNvT~RN{Mf6s_`NQl9aBwN+@hhf*Wc}xV$C!-b9+(YC*-gW$mP)mr{G4^ z#alasz*hKqqN<;^ZhmM+zB+?NwQ|+Fsgq)77@+0ooZC z52i&QU3)#%v>uCSLLLwdM{0DLeZKa+d$);y3QXQdDm;jSCsjp_jk}Hz6U9CR-8Zwo z4vT@bRXiw7!aTCZrf29gF3s#j@R%(vtD~rdmqk8su^K%-lB%Ljm@e9JeJ~W6K#5*X zo}bE^7xPNSxUROteW*gF{$bJd$!u@VX6b=~>XcB_&k37X*UtFVd+#h|pS|4<+L#Ru zS8Jy6vM0s>RnNV$#5Mw?%-uJca3_*SH~cqmE(BM@qI{8Ie^q#Nfs;9FO?^4#4Dc#g z>4V;{wc)VgW%^pTZ(cSmw@~17T_PVo1gw2-r}vOk4ui z(*m^VmR*Mn;pt&g538rLL~Y_f65wP_7=qcZr2=E-5qUBJxmu?e-cAoQ#+`-LBt1CM zf=O67e13#5Cw*M7D6j*#N&{ZREzUk~%0Nw$Nh3V`N{eFKJ%*#a9~RY|ztjK?8#-Pn z$QcOExIoS1B_y!{7H=dJ-w(jP@WtyBRMt;)l*)3;^Rfm8cvurEgz9;!VWtL9_nHrO zjZJbS!Gzcqnz6e$ux~nifQ2F_#(iiy0=-m*a-Wg1N?e;I)4@YL^)y1LnpDKKP3kLEsVHGJWu05RDzjU_Lcn6^m-(lh|DiOr5WD&Z=LG zBUQ1kWfjHyAa(r|z!=`khhN1&n?JlmBwYB6-30hTpt$A!$&Bp^CN)jynEEY^@;(h$ z!EtOh5h6|b(HV9M-qAytN@_*na4+`pN%^{c`|i0-uHMFw$)w$!V$p0wFlfLC)pX9PH!_0^-z6HO+e&# z1l%Sk)_mx0vBdSM6~uEkQdF1i9>~i7$JSd%#T7IS-oXj(5?q20!GcS02<{B-?oP1a zGPo1mHMqOGySux)e3R#W_U!I=_Ak!dq3`Lr-Cb3;>Q_}lF_~OcQANBVeg1A+ub&br zD-!ZyPPH;~l{kH?X}+nZGsG-W^_pqZ5su9t@1`o|H?|xaN=-8zF^q2QqK89NL32UK zNi>zb&ezjW#oc9Wn=Mw2OG@Yv^ot)~Dr-}-Ha-vdQfyHS?POo4MekJCJ5nKbNwS#+ zn9b4_IH2X68#GoO-?H%{k`FO;YoDq(SNHk{%S}-wi>Q>L7RS+b1j0=JdD>(%=}eh* z{NCYefZrducG*xv_9W>M&CS*Lk}Jm+=@(sS=}hiJe~2&c10*~JIwK6*bU&(7@RkAQ z29CA~0u@3(73(mTxKM#R5{u)QfWnCDNGpQ&OiXX-4SttZz();U(M}9`@w&&r`Whik zMG-2WS$)mREFAHZOL>r;e;Y7d zc2zkD&~f##_ga|3cINF9g~6nFs$9N7x;QT{k!k#B$L3aH?hoZ%T>}?8;qqsXmO5G$!M)w^K7(I_liKRQ3i$|Y$d?N5L(Oa z{AxE0#d#!7C`;qJ*lH>XU|ctI#LXCbMSBi!UghfXD_u9lv=xk32M@%84 zN-JS4W?D;}Ro{2;4~L45soo`z6ayOOmGw0xJ)d6U6-c&GrX>ykNgG(rJIt>ZNv1Zw zf?YDc6K0nh#^_K^G>eV@UJRU{q@e{J`sWs-Lgb=0sn;+>M#uLZ3fC|kG3272qaNpG z9txMI*%+8G5K-~)Dmzk`ywifcQRT-wni@v8WDB!!O7rx!gA^KCZa|O-iDTYGdzq*h zNjT2;Te9eP8URXoy9PBWO+GbH67iXE2n)wV zcY2#iJYRH-pPu|V`Rnr}188bsM1#<+4o4CQs@a%_Q%ZC@H3qOeKi1>md)a-z6|5aR zH00AD)R@aD;MXY@9WSk;Q880cA}Q4%)L+5_v|E(@O2C2BE$-u&z&4yiwXBl+&iK>) zTLh?ruryvnYeGGLc3bNCksr`*Q1;9S@RkCPJk_mXewMemS}{z(pnXMM6BkO7#`a;A zH)ReKTKXhSF|dXHYM6SM(HFna-K`8`<{M6WjSUjkf$N=GdEJ_>0-^<@<{KyZui8?d?C#AYKOr^ z_F``IC9&UCdE9zb)7?ou!;P7g)z%gGj(DxOx^ep3M&-3-bJtz(r2g+>wKbw_qY>Rp zW-Ubh0qPb=uAyg^TKCH7+DI1#3B@b2g2Tsu0##eY?u>IXDa7BS-f*Sfn$oIkJr`G= zNettg1G<|duI#G4P*n5LfjqWVr0~ldx233sgCy50sMEa5Swd_NlFg zfzHdu0Rm?UCGTH}`*iyEb5OOV+Z84lpA)I8o>Bpe&Bm;e%|^6kXOiQb&1a5QORw1N z&HLfRSG>Ps&#vYcG%LMcT&$j+9krOeBDa8|4x=bIJM$^lZi;yt$`5%vvs-76uQQl# zvGcx3NviP?J-rfC=NI+^A$0HKRmxB`a(^fD!n|?4XmJ*w_Nc6$FNM>XQF|6w`_JYa zV=WdlmII?Wth?W9P>QfeTjwIX8gS8toXgNOShB)8!mvj#|LFTXp|N|_eT%%2$STwNgLc%6%XmVC;AE zqAKjOdcMz5FWz3Mu3pgwdAh^YDDpe-g^?mWmv|DuwMw}y*Y+KmXZF-=l#K-+b6SV5?9M!2uviK+?0Af zyF_LiiNE4)pnu%$k$F6=!89rdkXfdQ!h4)-x@~`Zbjw7spd2m(&G21L350ccHGkH8 zSQWGBNYg3g;MIC0W)-%&Inhe+BKV)}_WO&2Ho%V$L3( zL84!#wesbcYJgn6@6Z_&etO;pm>DV^>8@uBI}AEH@%{_U0g>}8Mh6Lc^0K!~L&A;| zH2cbP8F-fB6Cv=qe4mZ~k*wd20EV8*&|N?1uWuX`=C}fY+%;>JWfMS#-Wes5?!Qh!M#){fwbwbo+e=C( zl05?peHbOy-RF*0p*(RmS~MZLxGc_B;_*x$SqIEKD7)|NVc!JazmLXB%4r`5Ea4jl z%pvQ0*JAxz?w2?vB`>g&D5`Pu8aMb}jX=n6kHc&oZmI`W%^>cVj>EW=N1uaV0B8S0 zfRN?a-*R1?7vv5z} z$1*F)2jt%TFGhNzuBoNn70tj(eVFRk#JXw^unCh&EoZ#vt#H6o9Ev`E;5L(DN@iI`+5#PUgE2D7k#fTjFZ9V9bV|2eW-TYMkQ*g!AB=0 z0hG%iD}R1jPODZ>&#ZAnijwpBjr&$o^vAuHuO)cQw9fvl8mA<`)2IgS-lol0R&2CR z_VMr`X_6*WC%;j_`$W8ckE^UQ6a72&*Z(T3Ul<2>Ek0`^dome%j!sA?o_;jTQ)=3+x_x zBolKRlqIz(r?`swc`@DJpz}}=yk&xrGD%2Jrk9_Tn9C*nc#wL6$PhWgxo&q~w3rHE z@}X4VRi9(gqy&v$;rxJ`&Wj8GN+Yr9RT?D$^~V_^AxCRqDRoEJ6whZg2SiOnv6T#%ddLRWZA5#D=WvW)sKHq@-&9c4c^BsNwhXVDcm@#-mp%arl^VRDx1xDPl zDmx}b?Iks4^x^WzZmoM!_hK>2)m9~w^yJon`+v$lDQmcI8YgoWk`EhvV~FG0r3Tr* z?()Smd)YQY?ejEru}3hbanQ{ITXjsY;Qj6xw|C!n8n@CRu5MxP+GX!MM=$AkOO4G! zzxS>GCDCtr=X092cJzUr-(i3?5!DWH?C@nRloeWehxz$JO*&H7y>b54Fi{Dm^u)B% z>qelPCh&@o6{-8EcffLdo9naRXw;Uw1uMC&7epb3!`Ys(-bn@zjZua{{P(SU+{_*} zeP^C(*dL4zJ0z z>prt7zjZ@VMS;gHet;@!iVxW%tllj+hDhp_e* zHW2q*tt+Sp#gh)+DlHmKsSSAloIha5_)+H1=v20P2`1Ib7Xu8&);Rq=)eH$6Ldm5V z_918)>1r_02=M(hU+Q;))Do7>>v*FB{7q%1sI(jnWp4934->|>qr9iaxAFCH%DsOP zh=ym{HWLT6G>}C|iy_Lmc9H7i9ftPJ@et!Fx7q*EZ-%NrvW$`l;mOUe>4S_g@}i}4 z9`H46KRmkZ!7V)NR7@P)_OADcuw%{H`~9|{g>T$*C& z&=ODF1>Jr;XEz0%f2A3Jpdz!U3-zJU=4lgiHAFFD1<{Ma^ooVs;M}k#hJ-uldDHv(8&lY6kUEv<2N=Lrp5~m|3tXteZzjxvkwvy4q{Fch;I00##-^A47 z^{LqQDAD}_?_|P94E--J zO$zI5Ck{9_({`u1BQz>b&2(lym&BfbQa(naO)5Y&VpA1%;fy)ASLUZ}r{V=CNW71S zBm*)RKd+O%Q@uFod!G@Ou&twH1ayp3U)sFyaf9|=dDYJ@<)vMcs7tase0ZiMgA!t} zR$C4b1gO>A`ZY$IkzNz9jWd==2oW72D=Q#i=x)o7RBdvoK0)+B@ z*TQpaqL39OxRdDN=gT==b_J(aTAwUD?tDCdd#vdAfJLQacMq>IMR znV&A!!L_(r_hb$_KXQ?Hju?MV>tk@6GRknm z($BwR;%Pzz$5H#|zkP)HybbvDQXgo?kQo2=c^cE%Wlpy!fJ~~t5P}s?!Hy+YKTH$* zYp@b)jKxjTTT!1JogvyR?N2hE-X5wt%|}as|Gzc-jmFL8==2v7q8bZ;z`1wp^j2H~0J=L&kBhKLwBd@FKb_m8qT33G**g9QjiRjE}RA z*YD-r++AJ$Uxs8Lr4f11o(I3ECgN~9?BWTPB(O5-RDE+HnH8$~<-!rsAAqKQoZcDi zLEr81Rj610@+5KL%`8?d^m1nhZIu z6QK5ld!=aV)`JAnSOwQN{|t-Q$W*>WvzPtVidN)6l2wBYm*Z8FPJ{_O=lpeu4-C6T zb9!QnTy_6Mt>ks60J*S@x^O;gQ&H|+Cq!0-wS%sB4Vn6|F8x0JY)1j}NWhyBmO!5`*GT0~`f z`kvVAzD?i~;>YsrSYU+@-iKIp{=_@uW2ueqZ`|cvVp-a?3Pjc;cz-2-r_5uowiX9t zdemLD8o3#A*ZW+R4FFpMws=X>7V8MqBtM$ntWe7!kC{U4jeuV^x9c<;-Jd_9tcCS2 z4r6e$`s+LF5b{4KefZgqulCO5{hD9#9)VwYLL~edpEwT=*~oY+$*B=AGK(gwlZwwK zKn15ZFFvn)5L{d|K@(Kq*{us@UV(QQ5tENCX!~d^^#E0W1Ka1em|vPJV=tU&g}?Xj zZCyh5wi{1c`9*IRB8R>eQ9ze5*=y9QusRVd&lmrdDNDEbH7H3rb{k?YB!GiWMJ7KS z{e_~$(CET`PYaC+p|9gyKvahhybB(5EoFf8qP zuI>5LkvyWsF|2xA3D61Gm^3^O6d56^F2Q6WUMSHFJMHR^j!qC268$Mp&85wm#7J|2 z9cx=l>tn>cQ7n**oDSD7f9@w^_HHq|3w~f6>!^V_VScCrJQBP(kQ$X$3DRjE#n+wG zM29!z`52<_12>N-1PNJ8-VT!ct27V5cV#(3Uo>^hu&4^VjrSR^_n(cr?e2u#PeDoZ z>04|mX{y3Kk%AhGfzT@&T8v+P%$#6Dt9|9Fg=wRbb5jt3Vv*KqCXK4bvgGdWoW z(#{4)r@Xfd`0WXN#N5N#lu23nh>n2yc7jLZ zQM~yTHavGJRtHdhrFfA&L|d&)KV4*)3bo$nu3VjuOp@v7k;sOX}aL;xHv|-q(9PJfa8EnYatF@d>{v6kMj3`)K2voKb9NXH>KTi)&J1@Z>iyAO?WG{qb^C%ouAuxMgPa)^0}uYIZF^bUtWhq z2s(pVP;cv4rz?ITu#uV=R&&*Rvw04w<{>7Ek@mkQ`~P&A{{P&lSpQLD|M9;5 zAOG(AU*{(A*k|*>3c?%@93>q@shYJAXNhc&ymm3F{$sQQg`(#i7O>a0RmX&KEW~+a ze(~lt-M}5sJc{L?XPKiD%YuSMUh09t`%D_75p%2rSqyf0%G20pd{%}onBx2AKW*tm znl2;hrp_S_lM^Td$0$~VBkNt9oSd@S+MxV~+@+;B4>juQ>hzFnniQX=y=lmH4FN4Z0quUZVC~Ue zyWag^Z-K{m4RMyX-VC46vr!Dmn0wt#einyG&dlt#b1m$i7kr;znu!`}f(a3S0Y0sC zWhVEGa^kM_*8lN|rtj)rrG({?)JiCCu??98_crR%5!H&xoRws&`_yEnypx86NL9(i z7ug%H_&J5i%FY5ek=ZGauxYx}qsLmbj=c6hjF;K^F8HesTV? zJyOX;Au7-!3zeHpb~buqTIXm=-8(b9f{b|mGufuT=h(FacULHa`wr!Xi9^7@Dr3&Y z_;wR~Qo8z;_DS|^y4%5_$61w=NOa0UZm9@3- zP0HOr>AOa|S@un-$n^6QWX25K&52ot{#e(QElH*<_d|0{?P8bH&o_i)toZ` zTaZl)rc`PfVMX+>W25;U9-k*8Iq_;A%wNQ?*X5*gvQkSzUccboRtR3#>NW|0} zLX!y~4_ro?Yh6wsoS<=EJanY4gJIaLbX5KhOdF1h?VNh{+HX3Y-=PrM>?j9sEB)Au8IQHZG+OWg#TktS+|<%f z<@Vpv?s2yGX)gXVrc zbyB-&(S{vaFnpMBwO0WWPR!b+m`|DpHSlLC{FC7LGh15jFxeLxe=c*x{{Gq~WGALIyKE=&ywujl!4gC2q(#l-_j9BSQj1u3$ehEnwIQ(<}G^{5-~d6A0Xoy z_M6U66{u0=89GuPmo(d&UyCA~Iu%qkgew@}gNOC)z#QtWAAeIpNX)L2l(nIsdb!V3;^fbg@ z#PpqHJL+xX_c->m6ZH4tFCv7dHLCSWom9{$-^#dq3kD*9eHH8TcEe;JJGL z!de9;rbxml4^A*ZMxZTT8xuy)u#%!#Z?+RjHYxFs)A{igR~tyo*;jqU73e==9XOXl zVX@MMsWEsIm(%&u4i$|=uFS~3W;v3ydC~VvM3YXQIkmaM@@BWA~<|(!e6GK5o$5{YB0XG~J z!>mCPQ}ufK`ggo7=dP>m(7IZ2S+Y7u!o99}&e5NiU*zHSeX@i2WwqEv){@bf*%#*i zaX18>@<6olDyt-hHm2Sa-;sPJOLJ5raf27mlAcq$fQ0!)4!G_v#Z~@B6CM`@l0H76 z*#r{`s!0TD{`o${oRu&}ez9SHo#}1XKpYZXp4|E~0XJxS5s=?dnf zps+QLYRbuWWaZ#c;F2) zc(z{Hlrh`Pju3b*_C=!>WC>mIRZf(!o9&}uhRU0NK#dIClT5ztsA zxGbUQE~-S#!>Wid3A;j_2*~^om_rk!5B4YBkpjaZki%SuOqcOAX zozsVu(>MJ|3XI!g0`B+C&8~V?FxkSd(9MfW!Dq*P3k^05$}Z8k#p(>Cufvg63)JBC zgrpS#VUyK=(O~;b^V=>9o&$W*dnkHTB?QO%4!rn{2e*brywC?gb|OSMr0ZW>zXJ0` zSyY)d%0ep_CUat3+3`z%+8<2*IV-vcC`W`vZ)R11i9J9KT<+Y~7sG^mFW1|f=PQw< zf1~=Mt4jF$!H>)}vrw2*@pBJ_rQsYpfG-{u6mxx94?Y~h#YCI4qL$$*xZ!Jq>fhl} zz3OpL$*Tne_Mvejs*dYDVue5tkyW*B_jFU+oGgRCT;}=%;Hr$uj{lM`II$eOY*UW% zQbcQOA!odfjVAxcntv^mzQyVu8G%T0J?)BNZP>y=i=2-^H~Pi@`#hm^a%RTA{M@-Z z89S$e2@5ucEXOCGsG(o_eC$2 zMIqxtiy4YPIV0FjV{&drfTCA(x)hagZqWIs$t+Uww3`GQg(Jg|;fTZ6{8}htrd5JMv`tXU!%_^(H%2ctC zXJ6KAPcr)HHEG7xHgzet*%BH*hPaZc^|$o}QE);qkK#%2q%MZ+%em!o%YyBftV-RW z=YWGUZf!qT7dYtjrrL{l>SxBb^L8Syhk_WGpFCWH5mx~bOJA+t-pG;#yaJM|+g%5M zyc9pGAG{7vhz8(3-YC+h7lK89;6NK9UpiL?x3#hG-olGixItnww}Vf-;um)`XYB5D zV-K7}2;cZ%qi+p$wUOgRgo(Kglqln6wOuL5(VuzH4{J>vy^n(#rzlHy-oT=TaxOw} zRnxDKVPyuFc2g2Y;>Nu-alY%bv40#Mja;X&>T>6;#-Iovyo-zcN?L|nt}uK2 z$Ln!cPWP*#7^K}k95AfxbaIAAf})8YmBEULtf@R=wxBz~_(q1Vr_t!sR=KQ2YVHba z?PcL)JaLYKD%;V@bc}{uwN=z>E;BEqm`-IdcowbY_Tk@+%;NVjb;GZ&$VB)ZR96VK zR;PMXg``wz2!ZB3AnvL3i_czfQj7O5QI%(d?yI$KAz{7>yFt8ag@!^dEqJT@42@M* zI#n=eQBkD(L&9}qTU!*fj)jvM1lbCz>iTd-t5lDbR^>X`c+nCHgj@Ob%w)ML@ZOq? z;3SERsU|eZ0knr%W5C}UdW(UANgx^wwdyji``_p*&8>my;E@}>+fZW~(6#DHa`gOY za3uwaoH~0{h%|h@7A3!{w$#a?UkN(buP{TlTlCm9=#(rXh)B!&U^%a;+=e=b!Rs%rhJ535ox3-Rxr{WWaO2@(;QraT+O5WK>433{zl`vJ9YK83_%bYH7ht zs+!%GMk)iqK?6k2=YwmqSH+vzqoLa{0Gjz#%&NF{0>yv|`E48XIl5RuUom!r!R_MC$ zeA^}X%9_ZZ2FCWe1IxS3WB~j1Xn^{*ygpJE)ggJ}vt00M)`gl?)qs1AY>le-iN;cU zl>I82HYnRyPneUqodV~Jn!X?6$YimM%Yiql10P&YhJg$qr~n;s9aEAmN*#|XlheVV zBTOQQ?<_*14OUP!EvVdqaYd5w^57-^QA=|~KSNk56pN)XdTAV`5=n!f+^J`r6T}ax zSW1TZU}14pm1Iy>B88#|#-Nq>L+Wp-cmI?}*uK9(opR?#tQfT~cWNuj(CB>oA9U># zt3-o~udPU%gMDLnYvgKV^D_7B!a{C&Ik90i*C33r8D%|pzpAExIXe2j3FV>Qc9>J0 z+4*6hrwtB8I)Ji#R9ANczTJ~pQI_P3g=IpAg@wZ z??aUGIBM{dEu2dAeOw=+I1VcvnX;U&u(%B_&e0L?#OABH^3w@HXKGq-_`*bq67nz3 zo#a%E>xMc0rM2O8g_Tfm#>YXPCI;1pBF+ab5rVb_GT1>P{;g zN^tvV3!9$DQAlX0bg)+=aa2!(H%Fl`l8k@f#Q4yH(q5&xzn)Qgbli7lMr^ulzL+>I zluP;&PJ(Bl_&*n4_B{Vkc?`OCT9TyZu!Z_{<#4-YY_R^A>sPh&8+87NZU3A25x77ruUHp`5w2QizL9mXB6>uIDJ_s)&LL)5gfMZ^2 zUW;2;YAs!K8ux>JIY1;4K-=*}u?ZTx*xu5dB6o@`8KSTbdaA5;sje+KHiDGiVhkre z*;iqSja!NpEd1jiLYTVDr)cPz8!&bpXiKKI7pBF;vLghbbUaeTfWW!6?zb zo+|K)7B6-@tAT|ermXfWk>}u3x`fFRM)gn9Kk?a&^a|k9G_!~mu%;4Xyu>oRLYlct zCcysF8Wb+o{i$jFR_(wdUS+nwFTRq1mO<0g1RXjK(`--@EJi1pxIZh{)`y*gFGR_PAVaCt=&k_up9zjg^t`BoQVxTGlc zg_#UDhm%YUJT6lFTM8c194={C0(*x!o>HuLUX@iaM%!gRxdSy-OxfA=dF((;LcqymVu`Eoy`Mhz6uY~HSH(iY^VWBZ% zU@4`7=@p)P{S{D;32@K^%W(CX+)0D_Ae?2~j%5Dw;~_${+~(nlLy@2A!Cr?;gHv2#Ve#hdokRpumX2Upolqf~wZ?v;(eM~%itckNOvG8$7= z;!H_9WF76l64|jRO7@59(Qk(d1B5;CYPJ<$yOBrN#iPsTmtyon)lZr7rh zntnlD{*OBi>^T2F9PZ#x;|Bi_=>PlL{}YFX|L<&X4o4Y(i1sYD+}t@16BwJ@e8SIo zDv7F6(1COCrf1PrHc-Sal%F3N-bQ1|JTSPcA;cOkO-^tj@9Dr&{Ks?~LwbFGi!RN< z8a^?EXpQu~%qyvoRMa6rWEHw)kNj~5&;#SoeXGzUUVjOz$qVG?dkFhAo=KxSmWYM_ z(Lu&gOo3RInvLSKcXm8U%rtMDa7c@rp@bts!D0?&3_0m`Rbb0n?DJH zhR&a$-{Q-mBLk1C(Ck6k!AFcHj8~qwR5mO;-|&=-Gi-X;UZ|9Z6#tvQQX)v03%%T$TOgb z9lp*g>i?g(O#nQ%Wmr}>KbVtk{s&x_-!f+={1*@6As1fFd0bj-u&1G*k7QQb;7 z1FzrgE@Op!#Z*5@8p5NrKTJFt-N<@6i@=dN@BGjuN4Z!DCE{^0IEa+HD)f0l3eEkU z#gD2wu--G^QKujm%RK6MyyaA(=VR#^{9RtuS%O97UtN{7mrouoUjf@hPlpDC3?2voQ;udXmS9m&8WzwiU3qk~%4K`i#UZ$6=X3AIn znYGMVh3#bZf2q;6^3y!O0i?+m?pW@Lx%^v`M0n=D!2QkTL07J5U%8|kbG~0MgSR%8MyWs+~|ui zvA!N@5w86r_&y_rM^`MA=?0rz7JPn=d1-~_r@Mm;mHTq|hEnmNrHdAs=`BL#&I%Tx zLi&=t=eqn+2y+zGev$08wagRg(xiv|=!d-YYVTWse@sf?5!tSLZ`Z~KLi2Nw#A#X| zLg5$UZ2V37`EdHf!CPTzLCov!vdWz{pI&hf#vi&9(CGPIo3}1Z% z@p8fAAj_-Ealql)q`1);UKK+QUk#x3&2! zMnfDE0=T05J6)mzrfCqtd`dDkrs+aWD%#L}AcITi<{`S93MSb&2A{1rNkLgEW_$#U zGfu?neV@6!f`Fn;C2;0BU;uG>*n{r}L{uecc4YD+}-x<`s8lr^~QDc>r z!0CbEbXFh%3Oh#E1%ki$z`yW$MnLohgAv1>C{FLQFd5ME zIbpC;?D#m!h#35p)_1dGzIZ2hu@huVUJZ#<<+B0hBvadUSxNzaqDNJi&a~w6rG3sH z?~F0vX{uv0UKFjG1#yy{8$!~D&E@Tdoi}7$q;ZJZ)14MpCJ@ zP+(#9#frmx?9qYREr8yfO*C{%zr%qUc~k3n7)T>cs;_jKcAwthKXa((_M@_ zZp!&|7G_%Y$t++=V5xo`VV!Gic_YWb=0F+Ptb9;=?4eG*y2GVLo05Ijw8gkzcM`F* zg(Low)*Zv=({COHDXKHv@6Qcd0AsnQx8MY%c?n6oJ|bZrBr9dptJ_aj$Q9cPO-S9% z@@8j`MEorVzWmL!Hxr%D84QVcxIH^v0A4$nz%@hLfW!!c=GD&^8zG&mw`e(p<=eR( z?@*&PM#D(-{&xIV%tGAp3JhbUOGl8zZl*oDA5xqw;|8*H?ff>Os(oYk`irNlpI>en zXQ@3l*Nx%a;Q0Jf4-jjM*L#1hfC2nQWGbJBT~MPw`IGRVj8c5?zD9p3N6ewy)bdM0d$591`l=rxf# zDL)ROtPckw^y=x68=}0zQKQmCpHlAx6}{R1i@MBjF=T9Y>9~^A61V;^2}g|ddP67f ztrz6(xT5;eily}Wbf~h*G%GAt2+vHFx*1*ZM_tJq1B?pl5dNNOwpz zK2jp5s}45+Qqv>UDfP^;H^z5lRZ335BEQ`iBy1(heV-)YIG}Yo6oAOQGdiJ9@2jDZ zsBI3#G`aeM@{N78);8Bhz6wm##goGIucS@ohTLv;UD=`uAUVNibbq~o4L^9 z$EJ41FMg=+;x;~$l+*X%Jtw-NoYAkY1w8bo>_C{6y7U!jdh6APnhe&g_!Y_!=HV}O zvL={s*}LhsAs^&5++feU`ht-m$>t|6>9J9NwO6dnw{t=Syu#^gauX91WMr&~f;CF{$p;2^U@iGdgpccaT5Jh2gKHoXWUF0m=C zPHBM^FBrNa8s7#dGBvP?2sI$Rb_X|_n?;f3cj)afuYabPIjMB2?S5Jb3PJMpv+Qp! z+NjhFd5ir=dRKRR>VMX#w6p8VA2CIj=Y^FGqUC41^lP|NIOLZVZtSC;1Sx9=<34Zp zf|+4@NW0~EI73Iokmh@ILWH4Svy3RpQu;^F34OD*_TNw!7u~txjLJb@^hacgcu{VB z4cM=#J+2D7ydOPdE4HbqX2G3uk3H2Ms;HGeOrk2 zLz}+q1!vh?&DHL?xE-x3qr|>*0}M|;3cHETPm11$<^;nN zNdNe(c-Mb6={H$P^fL$4{{9C$%?D`y_p^_F{W!bZA@+4Tb9YV*-PbFr0F>AePE_g+ z{&0rpzVgg?XD$$zBP5GGuKKj) zXC%DWB{fWO%%R3*B&`1qWubqjs)qBHW`q>k0SGhFz1r(k(I z7DMk`MPI&JE7q~CA^T(w1Sedfh}yt%+M-p2L=+GhY{)9rsswBnXAT)bH;OVXv=zCLUmJt7N_6$tnZO!<6=7w(Q%L|=WEhI$GO5?C)f6mU!`Um~o0fC#W z^Mr6Pj}~8Apx_7TyO!j9f*%Cl z2{T{s1TF2z_hI;CH*O0W=PVT=~f zdp{nZ>4ss9Nbw~)?;LRzME{Q@ES~|mbNHH8jYVy~ux(_X=&`K%351vS^)OrhN7#q% z8r;3X%7X5e{{AN; z|2`(}XKelt3&eWuTG1L2F8>QT$DS%jRWmj3oo8&QlBSk&&N4gXyxlk zLYoku=Z`nsSLYR3tB+^I!xn1cdUV`}jh^QE?CAaNTb~yI|5_cCrT|z~&gv%=sL9{h zQNQ0+k|o^cEkOiWFz-oJbAhZi*?0Ua!1=5h)qFfk zt9Awh2ak_8M2*)>um+;Re}uZ*DkjEo#g)^+Ef>>sAA7flR*%K=P5xiV@}Fz`aVMrHb% z8_l?$3dp+G{3EqP=Knv&-ZH3;XlvI^aCZ$ZL4uRu?(XjH?hq`vy9f86!QElu?yztt zxDzb5lfAz>=Ul0}Rs2Y$s#)F5?Ade8@r>t{7R|Ot z_K6i^Bld~e^ARHOBdgA@$gtz$8%KBqP+>c@)7=}>?~xsMO+QD7RI+?HxArJIZ#zZ4 zvMdP{5N}V$+@>I%?zWQ#^HABqYel=$g%V8xhBT=zde1{1nXpa9=Pxiz{Tf@T3n2-k zq?SY-!9zuz9rU+(HdrW(UyWYhh_F~V%nr&O_nFe-neTn$XcgHZ{?5$>d;&1}MV;C9 z#Gv<(3YC6L-XYQOJNJ@D*_@FWGYAKu{Td!;eAIq&G*chq_G+<&%^WcgBkq?qdy}!v zw{Y(%VWka@fSGd3^emQO4^K`)#+=|LorLQEyiu6Yf0QOq|K=h6;3&qyq4@%d$8tD5 z6NBI<3}Koa%A^BipS*6x#deFU>uV%RS)K^WbRFZbo2=%gd+Axp=WLP=2!l(Dviwc(3$epj9Nf+m)ho6VGYn^W(ex~>o;qPLVx%2} zdU)u8x!NBCt)7;h7a0}>oHC@S8$?ajh7|;ygM-?!bF6b&C){% zKG#gaJK+c<8)corwOb?{+-1A2Xsa@11LHb4gZ9&ELH^0xTBmUg24!|epku(#f+fhd z=EHBXlz2PKixVJ<%*W8o40>vK_QJP5^qNX*lN~D^myBl(d$Fi9i#L$v$Ev=Kk}52! z4V~r+uNZb%t5|dNMxQ39)=JOD2);YJI^3we(RxRwgtt3hd1!{j$JCh=Oeliu%sBlb zp@J^kq%W=L2<@+P-V(1_?nP=b}X4>Fo+QO`1cPIV?PsgIj-JFm!nHg9Fs^Vg=p!5MwWxNngIzR zQU0gB+k}*V{VXaph`v8sa#mZYg~`NlTz2_rMn~UC6zdpy;}9n{ETRYj%SoEFLaIJk zc6QXQts^L%g5zMAl3j*E3IW~x=P>=UNLY3n0;2ymu&78j`h6=!|M~SSSNQj{7GN-N ztViZna~h2c+ayNZ-JKe_-D^*{sW)ut$3NXa5;b5U)*>3LO-el~$O-gFEN*qi^EE5` z3Y{kG#a5T_1@GJm^dAg6#M}R~Na24s$M_vWw+b5V@@Rg-IysO>yYu!w_!~Flr;>CP zouzynmqj7FUA5P}vs7QhEA42HUJUKuTYeCT7s{{f4Or^+4!3{mQd@}&`!AOaU(oeF z`jYC{Gy-rgXeAajyMOBvzx$AphByu89sTNj1g4*eno;3^T5Nt}?st$dcSiaEb-Tao z0ZbJ5Uu~Xh+r~bITz|r`5IeHMQ$JODlCN7sVInfvhZP0{7m--_?p?!{ru-x_UfHtI zIJ>Pvom};Ik5GA`DjK#dD(cBMu#>n@11is1TTapVG zl;K``Z=U94{rd{={|mYPyUzN*W7r}8z)N78HK-#@0Uc{oSin)ji-BHNt@mvU^R{d! znI!H99T7^JzF=M?aP;VjX<`y;yGj?H3TTr{ozimBE=2Vo2JeDbX%EVaKPUXtmXQ{h z-`W!9M-c@0HiF0VepcdB%yK-Z!2TN!1mCnwwFQI)MT4$^sT?C&^tpdz5lZO;ymU43d!UqCz)glG?_S~NEUI6M zmo-SwOV+f&9b&pD^yySQ9FfajjZ5K$oP`o!!YG}COWZ7*Fk4TNpEKa?ni5bIVX3Xb zA8TUzWCJ!N^zRTGKw;fa0ehd-&Dq^Wf+z1`+=HokBZ%Wym$0% zh(8|>O-2dhSY}i)8g2af4lUl^E_zk+tgB{hUMW!lzL~O8+YMF>+B@5AAU_?jd*RXY41J=5Z%-;qgx~8NgA%~!MnPYU-{J=KC6_5De%rv#TT+L zU`_twUZE(=fEAJ;%SqhTtx^$m<{AVAg?pYBaN3v1+w1opVJhU2->1!c_|8;6xd-O) z5wQ=wlBo`)vS{ZD0j$eOdz=ynm4t;5lCGv(pak`(^w|zPzxlHT!4H4mPj$fG+laZ(Q;qBZ$IYD$FAyGvh?Xja1 zo_1k+WM)@DrA%=%Rt6fC1xP|BhQ=_-oHDjXi)a*(!kW`qyWw2Y5H>VZ@zfN z*+{98cNi}tPMDc-&A5RkvpoqTiYbJ(>eL1TU*e&kBt;k@G~J{tj)$PSA!mx#7Iu+j zM#CJ1qYKh;e{uhY3C)u&T8Qd4*3KW(qxy8)Xg}Hl0)GhN07}B|acZUmi zn<7+s!C$H^jUX2DVZgPIq4$-m_!wbQ74=IZT&gUmb3==LG*RUaSZ$!0wXZlkjap5R zie@{ch+mkbP2o$&a;F&7SeYDm>XICO?{7?4?35qrGP3u8>=SU1BNWU#JvgbCyDRW^ zLQ=E8#lLp-@s7k^e88F~iCe5FO_-HTP~J2CuiY_-VwxgH+}kJ-81i);iu1I=Ek6;M zfRQp$F%)czRVGv;NlHX{gB^5jvi3AiIeiEi39_fl2%!r*HA6K%c{(rRQa* zJ+xCy9nvHEVqXolQWCI-?@Q41Xs)=w+MqU168nTTVTf&I#J!$6&)ms502)7qi5J$l zV@{pt_TN|KZV9|S8`QjBzF$MFP}<=2!`0LZ)cpjJn;8No!7KPS6(-3ZQWo$gSX^Sz2ctR5?x0|W^7me!%ntF;k-xE3{JkrminBtFbs@}dq!bn)qEzZ83gBs z-n85=U2&57?#&VU*26&s>`vSsz9YnfapndT>6Zta4J+9pN;R8ypYD(Lkak8=Oe!0| zXu1@FCY6x)b+h_Tu`murA3W~uHGm`e!vf#KD`z_x63|tdsX}tbuyFR*5zRCT6Ij3z zw^0Ly=|_$0>F;@{M-Sy($3nOWJ>yDz5d=Uk-?Md%RN5E>oT@ zx1)P>{xtjrL|IX|l}WbWjg{CDAfc4PQ-yhy*@5qgB*dpUopU~*5mgnlA<*;=d}%#M zIGMYd3-!#=g$$4k%sA%v+~BT|AA*VlT5T-&KtcfbJc~+^rJha#E^M*`yWo}S?b2L+ z8AJhC>p$d|?W}ndb!Xjlj{!yXw8s|)DKm~>u&EN z&M=Z6GmNR}&WJ8}FTGuyGK%ATEgbT`2FG1K|7B-e%!tM1rS;`7~=7*OgV;|&9Odw)V2)*U9mCPue-p}X7AG6b^#LOcE6A< z4-26%GqC>~JN}86r_;q>WPPX=@`&lO4?Xp)^4JB*na9Z6`VSB}|C}&QB~ zp8O`Ti&xjk%IPA5;0L*OH1nJi@PW*QxIrL4&cV8D_OHT&<@U?DCUI)sXmjL3W;Mm3 zICr@vK>~_U>qIt9e5KWSKAPc{6=MJ)NvOiZqKp35If;Ag?0t0!p%cY}K0_lc*Af%T ze@jdqB%m5f(7t$Ed(wg;4I?J$=_qGs(iarW9<3e*n24JRr^Ol`k7kwBSI<0!8kW)Q zezK_RdE+6z4a;crIunNkVNmk?0){GcoMq4aJZEc3bC27m-6L*;Fe&j+<+WDX63V{~ zvD_4~R=mPB^T%U?z5^xlHcy}N(J*vlcW9ojmC)>VccbTfBDf!0c^^lR!`PGHVQTH3 z59siCr{4YiiPqNcL1PPU_6yo=!?7eH2w0k^r|cqXcRw45<%30AA{Q2`VY~P36wB}* zTVh77fGXvJ$8B;@IoPbjKa9?RR%gm$BvwmqcR55k<#$C`>BKN?B`|(CU>G$aH{&XG zH>{zDDs7^<`8?$MTL^7%aCl&QncsG^7L#Yh;X=R#dw;xOOg(Kr0_mOX+ZlLUk2f&( zwv@B4d~SkTY7044$bXYD#BQjGf%EHc;_ri;m)#X&u}%bKm^aOx;kIabRDCGV8YiOH znK0Nv;GFl_L||<85_t=xtm%GPVy4`ldpodHw!86CfQjTqBI)KFl((cGh$W%n9)1F^ zES5B>VS(Eh*-nN81O>XDe{YsO>~O)8F9#!A*WxliB=7tZ`C?jdN$>xTuT>vdZMJ@u z{aRhSg^~mf4p?8VO)A6G!m}dC^Ggmx!z6OcZn{_w!DZw9GQhn{S|Kmq<+bH@xPxGY zD3(3kDp2<49g!*e!|fBTShjlt_t}~^+KimDAhXcZ z6;1bw=eDiG2g>gu&UlQ2WXJzcnmeW#QWs2J1eWVv7JaCKLNe@zp^kt^r@Ra0>uH(f8DLe zs~nT~LT6`mL-XB4w$^M6_p$%ai^j{gLuSp{9-C?W6Z>9q^*(jJpC?9Q05@fgtyQsH zdNQI?B364UJNx_2L@gu^EP!B}<_)Bk5@6MAYPt2f9QMg?WCkV{;j?(8*?gFoI^`$= zW}!tWvk?H%RnWywBdkQSlo+XhMXki%H}CNR$=fR z!0z1xxSLZrjyJ%9X(9De&KT#E;1GyYN8s47HKfZoa7s03W)068+ZG5%iQGYd*S5t6 zCAXj+18|`D`$tid3E;mf337TK1v^V{WWI;G3*#ey=>i#4k{Izx$#bpn8wotXi5 zwCs!w=nk{0B-f^VI`Y$sS{IF4b4O;Ns-lvO~HxlKD&HrtN{uu)81s}~0 z&Xf6T;(R=FYVmmC+Bij20Bpm0V(f`1g?!b2i6TC2!`BR=Hnm!4{g1>S@OlKX&sX?` z+YJeSzo7z4B~jJbz|BMyy3~oq&oG2nG<`(li?bVS;y|XtYJHI5xb!n+HN$zQcVRgVNBPN0iJ)ui8?KBQA5>nq2wfJx+-+%uck+%h`oI3s}eIG<1fKm z_8sP*>zqX~9UzKs4x6U69wi5O#(bF&h=T#n5~c zorXtEYPZfzXyyCRf=LtSSpeYLHF>Z*Il{}ewm5Nt%rrX`+X;>0s}1b%Fwzu8qjh{p z6Cs-O$Wzba)?hs#!6a6$;L{WBdEM;OzN_%80fcwz#M%PtEBX7xbvxjyDVxKYRndc< z=`&RvsYDC-AJYjc>^$Tu71_8WR$C4~n0Xa*h#U*2i;t`E49x!!zcfQh%vmj)V;WJRQ9T#RC3Rwh3Et-lEznkWaTW+uINqZCuHtJnAbpac8+KL8-BM#q<*qO-b(?T< z0&tjQnxLY*9OQd$8k3=g;4?wM8(oP-OEM`VD|Y1$Z^QL-xM2^Y3Rl6Cz*2 zlS`dblc2{S`4z>;EahjW3np$XqiGu~tD9J~$EmeX$AM?Mc3(7#fywHMY^DH7j0{M| zP~w(sNJl0}op`e16d%wP<*9G93_rD}ZM!kBw47lm*USf%K4?DGxQbE~f@+3@3xse8!ps;OD7A#*==I%@p}X(2M&z%WTiS|pc323wmB*0fHNoyRV7FKZ)5_utfAS7eUrnW>35>qAw=LCb7tucKDVnh*|P^;-# zKIe$+Nz&gQ=pZBO3W%He4u$N?`AFA@cUgx6>@uaFR^@{Z%c5~Aw#Vx-xOqj=1Osgy zB!EYn#ZA}oL!x1C-O6vOKx$&~nyO=MZ3>lSpnIU`H_!;DrOYs6{My_w9Ie*sHwoMc z@`}M88)<;_?=Vi$ZUCUs2gPNDhIH&7WuQmtVjScp+Xq|S8R^0DK+?a78bOSr`xjB`2O;My0&CfXOafJy(zl5&thc+!O{hskXdf(59e|Hn?JEVKJA6LFHzgU8AjWd4R5wiZ{lu@;NswEL-yajPpcvwo-HrY*9Vluxj~8}B6$>SwhvWJWviGJ#uGJ*OWG($b)}Pqpo$rW0ACgpGyQBIl^X2h#jdn23IJ|_@WoY zdS6`MJ^lxD-S&fDKgYqnfBp@fkhg!DxlO1$ql*>F z;Ea#ce+=0khr?-(6a``-7oZXUV-S8<+B2Pmz|nT)Xnl&Xjl~7Y~Yn zfM9vO6NrYn$d^Wdvw3mc8)I@Bx`!vs{axs4h_JP^U0d-vD@%84&k;NK)$jAHX#J0S zUzpG9{mWg45T8J!iRQo0!tWcyk{Y>UNqFINe{uZwInNZD&UtsU+x2rMp*?tjN5_Z| z{44;-S=(_HVZ_~%Y6%b+I22^dlYPOT!mm0ae{BV;SV1>iMoxJ z1=Hn&{_*6wX)q`4>Pcz@-)x&H!VO_5(tWd}f4fys$NSd4!RZedFrUF; zmdbKgsTM%%+cca9mX*Vp|GY#Kfo{n zf?S1;O3wzjP^{Qy!?F{3D0Wl}e7B0&d;yc7Zxp@|?3V(LN@I_z!b^GrkdFbzq`qHZ z7YKW}RakZ^Nnq;B)?oFnr+PnZ_aLl>+j*X~Eqs0b=6P{oCG4`php>;cL1K^;c-B)r zuQG><`OY|>>8HkxBj=!XRP1GL!rzdXbdIG2lD!La1CaD>JG_$b3^K{u83 zg9=JQ;29#EZwy%!7M%m*(GpfT(VXHNA57&!9@Srr-w`Mu=`+VqW_c zaJUmj?X*G*4Y9&|fTw5892Y&*#*Ab|@rNYjbvy2=!x*D}&SjK3e2C8RU_h-XPTnLW zEw1q571CATmDxZ4uixvhsp?}9o?j@1_}lQ z74f6m_L&=E*TmlveROp+>00~Fw}v&|V9w+1onk{cX1FC^gYEMh`aMdZic3uGS}9vN zyF_%qn96x`M}j`A8wg+jBD}VIZLutNK>K8&M&jZv7;O34*<$!1AnVj{jn_1K-VZ(W zlgYNNWlRN+AM%`P!ZqxMFyOG#?`6#}U}TyUWX$dB5U!)s)YLSr@=mR7Y54{U(YoCi z2%OZiOLp3e|GZ1~2*+s_vD_vk<`<(em4?gwk@~nqBbVDejV&hRLPt}H{jYx4Wvk)A zApfvzUc)cy`b;7ebU@?N`CG)t<&m;{aD-bjJ%&feqnCx1V8!pC#gQ@nr3tC*V6mpL zE5gM4p-@q6S@BrK@?uV0P#h&Ab90dBL{^sOkLXIxzt^yr1u*J0r<23ML(N9uRKRTh zilFSxevkxut%Z3UWrr?ivfsTlqg(5Kdv9C6xe;**9tM1IbVm&E{l)zx+q?bTaG$2v zl-w#Q;lk{f^Havt%|jy{|BSoaZgxRtc5_BtikE%DHfn_zr}Xy$ug+WlLswTOcWANr z+4=(<+;`UW9Ok1tedlTbC<=7E{O9EC(jLvkiWH@^n>#KNpX(*|JivyF%i`MjC?Ui+ zV5c@rYXILaR7w>$H5HG8hx817XmXQ8#P|;-r_;gXbvTJmSm+ho(DJDz*5oOxSXf^Ky-@H7{OLwJYUbuMl84AD=|$v_J){Lz5%kth zN(J*AIJEx!_!kJ<%mjd^PCRgO8^Q(Mr!$V6=41;tQOsqhvZ$K)Nrqrf-#k69hs>bTf6MAMD=-i9JuEpNVuTq95r zGqnB2+*ui7&ALt;+aC4v!Yq-@gs`=`^s++W4Vi@xm_g%Bd}v@xw1*UnJkQi(HHTzc z2qi^%`H-e;NVc1K8k8gqAXm2j*QoYH=Gu{s-D(koSaj!E=N!Jay{b^~=;`6ZdQG%~ z(%%fG#=h11o|^~^yM4Bnw&Qoa?Krh-hjG3nXQFM^^|DGCBn`1^VydrWL6_vxYm+7q zYyvc|JyAaZA#%UEQhXivg{Z-Y=*Fr1Yn`MjE>qr_A?O><*{DrM0H4pw&3z=;VjNeWR!JgvHMHb4>{R`Y(Fv7 zK8=e1MXQ^U#VkT`?E!0p47Iz-q0;B%EA}1PaOREoBA9$FFk!t%i&~nR-?uR$IHrAD z{b2qok}(_m$HWOrr?PqjP9Bt&Kf~L9XW{Q6iIBap;k3HMi$mh?iJ@6=^Qc9Yr{-7Z ziApmUN3(Y!(qJ^`%a=0{-0Vi*trA-HIYejew{`li8%2_N-cQ>MMZ=+xAyl+MUGoOw zi2@tlrr9x%7;U6}2wJb6@Lc5#Xfkv;(uOkmYEKL=&tZgH&YMtIVz}06^iBS5Rwy{{ zvxL(?Yxs*l7CS6sA6Gk(*n1AgJ{o-|6SpQl6pc7Nk^XVKN`ckT_6(B&&Dh-wJyAl7 zAbkoNYv7VnDK1*!t1*1D!B^O^c;Z5=OhN~UHp``740AGJ>npe|_1;;(G8)FIu=Zss z>;^{Q+B*0`Eg6Ht)R@_l-|yahvbd^)nyPGc9O5CKq#AMp_hEY=p^)D_=QjOMEXcpf z*3(KKrS==_h9G9Nf*;IWP0Vln^pps4-6!z-;ARDrf_cbHj!kjM*N)!`qitEB7y2x( zKUh0oi{p_gw_jjk69mnlXJ-!|Uj*EHs`9YO%i!vcp+P4;81HdkYWX=W{X=aOWUv&d z8Bwse=eLRAsMhgic4JDmZIYCbcA*Hg4fZt9>ZTq9?Dbf3EA zR*yB6+M(u}BU*XFD+mpJq_e6+w3*?shVFOG9yx*-3gJ*9bdTmN&+XycZc?c7QlotK zv;=X$1eC_Y#`sN;UPP|tJ^!W$uAXUr#}|eWMVdiHJqY=;m5^_r_LZ$E`#+Envh>t^ zMeYceVnOvuz!KBPz1)YVuIs?*y?OLoL$(54YtmMq@G^9VOp zU?akMp=R|Af}p5#^Ox-{HcHn>mh~;jPMwG8vRe&BKjpujE`p6yzJy&n#g6>!Az4EK zNsLFIZU=q>uLGT&2)t}P@s}!8!xXma@jBMmVF{Bc&Um;ybRkh>BV- zGK?A9XIKBx8sFi?hsfT)Bo1p5eDB%UKD7B7z+=UI|8jsPjDYBL)W2yw7u(NS^J?<) zEbs@jtL+wwc8Y1M+qVG`#Yrt)Yo}lC#008|2Dh-jaD{n>J60J4$?!S6c_YYw#reY< zHl*gkUjNZ7Emo%%3FW@Tzpuml*+$u5#V>;*f2NlnjZggWo;BjF&IZL4F+*LzO~xlp zh~X!BNe#K*@9ac`N-kK;i(;Bgo);NBvxbmblqornwlk-?dUIaAvE7$Wg@$2d44rof zd|J&os$0$IaFmgPeh1Vzg{z$^REdTnwS$-Ie-i*@A4qW1#W#bPcR?VbmdgcmpT^gB zvJmCN*GFe2z-rLTigEJgVRQS2kEhmR%FQ&ca^s_xQ0tJ>%XTnw=n98?ky?v~D=NFc z8(Ov01X}C@`2c@V0|ZwR1Fd6|ywZz-{9=Cr3+ zSeF8xb4h|NK<}Pza<@;W?vz*?qiI=SJMC6leor_UrY(k`Z&RkG9`sV!t&2S!BF>#H z=aWRPhylH_L?Kp|kT5!lPJUpoNA}AaW`;pS4GLo7S5)1*PZEDJpsjqw;m0g58~ccYb~%r`N?wjP2-jNOp_n4urst-5ksdGkY@=(q z2U9p0t6@E zP>4Ex&+plw_p|z?)gSW3*-XdUlk&YUNo!9?>mpjYYJ)>1HL~~|(ZWzj2J?ep4RMr1 z;eXTQoFsHfU6Zi+)Vfpn))BKG#$r6P@J8FHc4UAYB18D4JdV(k{*YF$dYUM{Ql5eP zft5KC^Yab$qw}VpGsufwJXe)hW{G|d{B8yAZ;=Xcn56`B~5R-kvC+AIss@D zNa&V|f1`Y7H4&}jgm9b_8hPV;-3PglG*TG;8Bv1zMzv3v1mgU;P+izz#@6-ko06>e zprfD@)siP)sOb06NLC(9eh%@G%JC1TDMX71c2q(Z(OTnY5?a=?i%++MNB5+^!AWZ@ z#$%{kxvn}qwSz;U1VE#&LF?Q_-kL;=>49jaB5BaWRIxg z4{Lu0t}m8D1A+7`@{Z}IfhZDZ9F;eFJd4_qoKk@;di#Kh5)CWw_bDhAj>~QD<^>+? z`#GZ~|`?JNbLxp~!*SwxcTU7~R|Bg(2&y7bk)rPlA+6y`Q zp|tncj|bGXJ?pT3j>R9}Rs0s?jHG6x!f*w8C8#T>X^TkMy_UAW4D{X`hwo$(VU42p zi)?8TP&Rd~fD!)>CR2cp?zRMBKc=>$CD`oi-2Gkunk4n%{qxi*U4z{9g!~AL$m;-B z;7&j^$n5KDX=PGM?%sF#zuoWuue+Od=$fIf$&g*1(m?CIA0eGg5}p-Fp@E)%4E?wy z^DISA;6EPjXHu~jZD_ma^ouF#e}B(@>3?wUe~Du5o;XSp|A9S+WegiuaP-PK_lNo& z;`%#xbGFL=v$2~HQ`Bo2NAP!FE{-c);^VD|1|Z$Vpn}Gboty@D{sginX~SDtr`IU~ zG*n@ToXXR9_;Bf7$RG3HD*rdIcdJXOqDh8v4&ISc^j}Fes1{<-6>SKB@5yTUaTUQT~mEdWWCnA8(liN~X5gt2j3 zttZeA2|Aj10O1?!RLz}HUv0(A_dPBPor0(4tvKZM*~I(tNQ@4aumG2)71M7WPa+aI zczaG@VfX-v*mE%wMhqBjvfXz&0w~&A`Y$_*#YR7#$^L5ibICH$IB9HVM6B$`7~Rz% zw7Wx?nwCmt86MbFF7GXs&8YL$D18Z8;tX@teWMvQO#UR#QZR6wTAVY_{!q{J`&?8& zB59V%g4E_mgtc1nH36@5fM5`qog4yo#@toES!Nn2ZZ6D}w${if?TsSR7GD zf<}2)=W=Jep4*%hYg3{&ASD+Q$z%K-7#qoapHSI;oelx9-8ZmqcCn_dJ z#>$EzXOe_=uS&*9Pna-(gH9azmv4wcC8Oh3Hj0XdlMps%3I1oEp>k4t1FSCyB`k}| zfp~6j%vjB_LX4;4pQM_tWk!C6sxk_RY4ehklcS4`#>UF@^*kay+1#KNPm;ur>Z6+$ zGDwmS0a(g$@x)(g4Gmc+A&1K;dbr$ZPrB`g>|z_YNK!L)Bn81Wji|?(#uqNOb->y~ z&J()#5#VI5@S)LUeeik9(+1CWH zEFbX&VYuwg-W(~-(}d{L^4aO6)JkbY6Wo8DpM;fO8Ts5^8C){`dH8wRi<_DE%1J~n zpC-;zinAz0gvhwcO`_Lm=gQGbl2X`_p|7{Oi=@7|CY zU2JBVimfy|Cni8bOg@5c*IOERkz5yhUS{QA&N?@D4c@i|F0YmhcJxL01|Z!4>KOc6 z;(k4y(>2PRc~-SZsdN^FmjH0?~9jNi|Y_Zj4q2Z$-mfcT0*E&Mwlmh zu-%X#5+dxjT~T}LeIUv#PqC%6YZwY$m7L>y(L%PGwiffbRs-KYm4I%#iD?p~QA@x15uVMc!CN_1oq{<(4!9;n_Q@b1h zAyXHi{bUe;)b1ztoFm+5@?qKB6m`Ta24h!K&uvz^s#XQ=;5GAQgRZdVaL+f@baYm)VgGLR;tZYw4?7e~=CRwuw+`gS*g#xMW{w zs~NiU4fcas>qicenY3Y$Z!P2Q%T@(uqA8QK#VR-(Akt&Y%wp9%{V}&bP@Mz!z7{|tr($C6pOO{Bg~v7Uw0vGK`+6cL4x-mrVGBmTp+~ zHMTIQK`fsLzm@hj0{P%f6oopGp)Er|(l;_gsAUlX4(_6Dn*F*2wt?3t>aba(CjYZ4 zI(xf48Eqs*%bB6awpDk18Q3yFz$&G{Hd===8fICEv$$%*o}F3pV?UZ=OhhGW%Ei0K zPI`8J2hsrvP8`i}bav-=l%#b^9>W>H)V$VXY5O=v6oUu`e^5OQ-_-%Wx63c}m@kW} zYep-#ReS(oif3D025K=!=dSNyyEQsUf*}K5BN~ghPFHR3KZ%>t+|JGK?9QvhakBz> zv6vw^{gr^;Pp)A(=`gHj*{sN?P_kAqJ!OXFYRbv7qO=`$8i<)l7O7@7KVYebYRajR6yqJDgY$q*Yu0qz$DU{?uw zRL_Z#4<-u}u=+rv`bKxJwD-3$4L6x<#&e&rm#|4jf($&NUqgNG0#qmeITyTo|X z!}gtCSHfo7aks6n#QP2*`ZTvtV_2fH9zViBWv|2m5It|qqZ1gzCpyZ#!{&%c_kt2fh=?Iw4SrtR3Co(ulrp*54 zu83_*KX=>C+16MaDS{3-oqbQ8Uh6&qHuI*Ouz*=OJPfe4-{GZjI9JU7&Hu{U)2e&{`#m4<*|2iTDB*1ll+US& zswY7UnjuANx;ev1AXFYpQcHyBVhNJ|+|6K@GpKOY&|GdT?gZBDJ&%slufDTDXWXk%p zdhVkpZxne{qR?XoKFx`!pU&nqA)ARQEB+1ypBs;XkR_Lv<^0Dj;)?7uh|i>Z9$U2m zU3J+~+2`pz31aA4x&nWo@ANQj=6~Rb2Hs<7l01Y7qU#77IMJN7{{-CTUj)fAE|;iz zEf$V3I_h~ykH$l|&f3fWahlZ^8*F07KXwWNBNB8q&%Huf%x>3(_+@cMpKFfi1a|#j zbN>W)+pY!&nG4}zB9RNQphuL)C#u11>>;($-2jj7JhC(kAmD=;OI2HiTS>-pw25a_$n4@37IzEaE59 zoW#OLn!hh8IuC@zUU5Lm=e0nT{v9Kqmru)1MtY(r-W8=^uK0slN~6m(p3yhCO#XqN zf4~CUOa3ne`~sfnWoHl#SQZs?EcsrtD94UT;##e0cJT@kN*M`tsq7z#Aa`Bx>}cm` zDjIx|=5|Gf0WYA73~9VVej&XE@UgqYP^J_Nk`^n@pR}2|J_@;+|I%LF`*JH|-fLAc zO;b_i1~pfQj_g}xjvK&~E3Oe@u7%vsDjVHsbwttL0gtJ&`(wiVgaW$LxF9Z88rc_% z0~OsjI{#a799BWxpP!u%(UBBdOKkoVQyJ~&-@TW(UB>xbCR>MW!QRat){SlZ36PX~ zo%#EGtlyx7ao_`jB|Oo@h2bu;FE_kV#jw?q`@KD>>&j_Z6WTcV7#bB%ycZ)sf@Dm^ z#LYYKGYrhd-RI4B*J3|D-u45P<+lg_UWxJM=WGK(hU)LTAmu9DB(FC^eOp8QzY&bJ2B*fhJ1>I%=-LQXwVe^)u=N3K_`@K)SjhxI|6hIehnCy67F z^?{gM*mRUs7H*hg+6@9zWU#*Mc-8b{A!)z`$nuMee&q=E?aKIQY+45%sAG9uAUITa zTT`uX(*J2dn2E|Xt+s8R1|wSSvwqfRbu~v5wKT)ZNNLHrXtkK%WnW`t#Hv2At1lqD zB*d%rbqo)F>cY+a%v4t!c|*QQf^Wt!PLjxFf_+1NQqSa-Ku}S12jM@^?RlqJe7BQT zTV8bhk;&Nv`w-M>qNXo`dd-VkZ}K$s*!<_289T5_E9?B01gY1D9mp4qo$U`O51X&)&zcK^6%mA+25n*i%- za(F#Y5L-tbyU~7pvOqt7PSe?*+q^XQmb-H)a4rr3SZqr7%foZA%b|kgj3Be5yRUXt zBg92bKjI%Rc^9uo^^s{q8@MrGJy#b^G?YG~_uiZmcMg7>(uM!EOWYS+`s(=k9fs6L z`eyrS8yk*uLF4)lWsJDcZhH4)gVTGQ;u4_C4HJ}$`LEHjF-Fdn*u;nT$74<-3@X8c zrnt5TR&Hp#LH}gcaGFX9;B2bz<$mXWSoFR zrXhFtA&oFmXF7F1=8V_wXUON(HUcM$+!5Oqw?Vj{O5_4P9|%0nrrJIVFQ!Id+E}m^ z>PcJ*e+jv&Rg$#sH*8uZp2%{bE+HMg$MAG|0Kr-w+lQ|AGJKsdJcHpaQ!U_p4DE?3 z%{C@G8Sb?Tz(u7f6$vIbJirmr?Xr*v6w+Rr$NA zFNav#OXJ9a6{lF-AVjjN2pmtRF{J}BC)|$7vjrR7ugMJJzJY~3 zQ6(HNWT~p#ztlxKn+833Fuu=Ax^z_yoqgg=zy6X$!dTcL1JNDz4BTK{1J?pt z5Vnrzs*p_;?$5LIyMlljoB`lOj;+Wpcf=efqeeK#=h%sP<1iAi;uuhl*%-M>n)2Cp z6MCYat6OY3JY7(yqE0x->(oOCH!C2Ra{QAKv}CwruR1I{F1{eNmQ7R!o22*}e_pNk zh0eZ4tkP~;fu6p%Jr+Y|kEYXEpq~V0Dx8yyoeD0% z@4TWT4;^c}fuk+tih>U$h5Ps8*Dm`|*jR)N*+wta!9Pe=M|0 z`3@AS|BJ7)42r8;!gX+WcZc8*+zA>iI1Dyea1S=P>tG3l;F=)8A-Kx`!QI{6UGL;O z-???FZq@xeyJpMUy?S-O`{`bNh&g*F2r7iqPxb}!DOBIK`m}!WHQjn_I{@7)h>G8Vx3@Qz%w>*8 zi4}9YbG-*xYSfBQ*Xfm3NQ1OkW9mAF94;pZXx^ol8t@LP101b52CJm=n{F1w&iQST zf9Cr#1r%+qrjRdhG(9PIif#*%-nktcE5uu&`NX$g$#pJS&+$44?FQY?9!fCi=EXt% z48NQKl5xr(*{9ZIPOTGlN3(#u7Ix!D9@(# zAiU`^zXl`het#SH2X%?3TE2`!3A+M|Aj`P2@3QN5#>ffX-RGVC?PKfuWim5*2@==4 zkJ%0{WfMju7|^dg35OT(=ac=|zE_mcQY^Q70XPJxSpnV*aRNm3iQ=g+Gdd?X7$cjS za!CzyIQg)4*SG4G){|)E3?AT!IjSW2WZ(L0?+~$-cgLRIbXO>bX7w_0-GivA5gFC@ zPvggai%zwJ&^ND}&wpEBSHlV%t-9meT(ivF%BZ1#d_$)=H9MVGSBEk!dop*nj=6<# zOSf_$d?HZ^?FQBSC4WpCLOd7{V{wRXQn`Z6d}_k$Cf>QRU#x!R&PuayD{0{_hCh@y z+1-l$M=NE_%S47fNQ?VTZ40T#q6B268KtP$Ch&(R@x;Pg!^NZE&+bgncwxN6hsw|U zGqwZYbW!QA_oG|_M#E5sC4@M^GdFFYy>!IILEQ}IUAu`37P+!`*SCKD{+?s&#A~}L z^hNeLbBgF04ix>SPsEuiW;2^NJ{Y9wWZ1_pxCV7gAv-hcIm8^gSeug)uRI(w)mg&J z!HdnK-9p90c^e*r;Fn+*e2O4OX5T(0T>+$V?7#yu#i0zH;+9a{op!@E;^l_xu(1U>_Jn4fa zT=KF-6mqY8G>)lq@w3v0aSSJZLc=G%rs!a2uS>eR!hmRX z-tAtR&`a7&6=#q9HAIlHw6Gb^;8+P6E+PW~@@5NhMyTTof`{}1o{R#hJJHlnn?K;68!s^*4H})~l z>1k&Ey9*E(BceXGeGO5-{hq|Sp^PfrQySDVm*Qokg1IyTaNcbg}~ux$2sB4?iD;mN5`oK6=o4?xa~)z7e>k2-TGl zWVyLm&GEfA?k?Ck4bgk zi}qJ0|JPa=e$wMGB`Jy@W#e4WW7s|;RK34OB|0!4Y3r0F-equfIu8@t>)j&oQ(T6i zcSp8>Q0-<&%@zxO-dX=~44oZWX6=zO_3tzjI>LBG&UgKIcHtY1Sj%5}wQ=IXw&+HV zWeA%99OodX>)m$dUXC-+)6f3~pWV~Q+Idz32r2nuc9XczHSZW_hU59<5IVdtT^Z## zoy0&$iO&Y@o=K#uUOxCj+#P#o*YmQ_r0{##dg)} zj-G!aW6B9Z2=>NUR&qy`;uRYAfP1!EnViF}%QTVrHzUwS=Zo0o_p03I+{RF6cH~{Q ztg(BD-%npsFNxPst@>RVpN&&2$@{!BeYlq}JtVOM7#N3GS(0fYK`ScW$fb{T4m_YM z->%5#mJ)rv00EA~Fwt~oNhXH{L5%QDHzbV$A=N^--)98dW;TV44?zBb(U$r1`;3%1?a8ofX$@;Pc3~XcKcMww#@q;~RNXZJ25L(yh;?S4TU=2QIyU%602n%bRJr3pZWVdEA~!zE#O z2ZTKjLhC`x6h}sFxZf#trKny`DMj-ugiv>HPh`r!4r0*to|3XjRgW>T z2SUm*KKRv`Ka!aOJI%acZ~X0}K__BOxxVDaRir5PWIfsXVPI~v96DGD^2T_?+8e#d z1|?P#3ZBOwiPhrpi$s~SE7&4l!wpy=DwNhXH{G7Sgw~Uy%-nD4;a0ozf#HA}b3xGe z^j|qhx3{TX!e1?WjEep)0pSZt*bRarn@#(2Y^%0d978!pgHnObbSK6_v9(IK>k66r z1{pNNjc_4i9BJvIn#1RFUgL-=`IDu_>|wGX5ltj)3s>@064H&e#t^{mIO<%lYr7(w z%v*qHwn-IiXT;6gP`)9$>hm4RWUz@L>Moj(3(b!&7L03+ukm-*gdzK{q?hoo%d;OlglyBdhq`V?b|rHBNsz=@?CU*m4}_dSSYV z1>kG=WqLAQUE-{O#Xc7>;EjSIJ>Xl&S_$XCn>Xt73Nr7t%BY?wFyUvVla9mk3v|bN zMv!c?R=e6*)lWYK+WL2H=-eGOV;tHoR^FvoRBZy^^*%PQS^L3=FWe7~W$s2O&1Rf! z{GsrDJP%l-dU>Hb8`lM!ntUdh)>MQ)sqP5zA)31YS3ZP_k^0PgO=(Gd%^WfNvjMsD znD!2xcD5=wJ`*uPBTGL9wCvg0H$p}ni0*QOQ=BRr&^*##&`7J;hyT_;dDHy^XM`U`ERZc?P{8iL>Fn`TeXwaHe9QB^Xi-Z~yX%^J_y%;O zmP7mn3v0dEsjcI-FjJkCDuVYxRJ;{8Y+;U!YrzY5_vRI+PuhL+MvK+a=|!1}Eo-j= z604HEc__QOmMFm%w2a0a&gyqIY=e~aNjzx(B~!4qaG#1vav!{xfG_6Ce5DYYWn7^Zp_;TIM|hSQ}Ze5!RSH!0*1(O7J8(WPBedNfy}WbqlzP zo-na#qMANhXqfX8m=WHgS-s^#_efb!+Lx~!oRSNSmfp{UA22 zSE)3eRUj9(i3FTbQdI@!{`K3Z7?aYDRwVfSaIYU15=if&~?JvxzYd9S*N@!foOhb0|r(yeOloCVd(diyKPu4v2p z=6+BNB7>Y(StOcUM0yFH2XRGjx7IPjchOO*k{JiN&wa|H6z~1K(KAW|IyU0lI=q_S z;Ma|=WKkU{LTom#Kde!Xd-bKvCPJb=k-tn@E}^0aUPfikISZqwp}-a}eQ|6=r#fnm z3GJO15iP@Es{P`m_bap*vI7JcayIRTJ|RR zNyieJSsQ8qTOUE-yb?exK*#&V|DW*n{H^U*>9y_6h!S~$&aJOR4OWrcwW-KA=b_!# zXmgd9$Urxn?6pxpXKZ5%7s-kfoY(n6dpR7B&nP2^E&{l72mn3QV7Qhw(6itWIUv9= zvLYXw6zaFzP0qt&|Z#u~VY)#1Vs_zQX*@cyzi z#-#S=gD#|5lJ1chIu7-6eI59?_867t4H9XZM`u(@0&8dA4*Ln?>BgTl4I`(*e)py$ zbef3|OxfKX<)amq#&=y{99CZUn~IhbzfTKI8e*7E=SRMC5w18o8(|-gYg-Sq#_{+ABl}gyes`hTPYJ)R zg4~8+t8Ysr-{3!rsCEj~k+;AE}$Gm)9 z%-Qbju3CUyWzbE-ya}``IHdvEdPr@$3o{o%2(8Dn8;I`f>SEBHQPAJyxkUgvMTc5h zG;yh77H7*l*?AGG5k$|8+J6&M);n#?h#5~^tGjK?TKZ3_RVF?i5HB^mlGUFs$;y%4 zzI&vO-jrV7GQkotFj#7H$E*4#M*MY_uIC)l-f>yldEvBK)e1RwQBJ#~r zvjrFli)!N})8IpSb|#9K*@u|mDR{Tm5G=1Dr;&|M&@ozp;RNp->D~tVFc`ta155Bf>u7_Gfdk8WhroU>G*Q>FJG%pVCPm_!wl+|)Tnpr zcH#A171d_{h-nK)FT{{1)fdmbMuqi5*CD#MUMJy;0u`Gh`Jn!XT0eP1LZm{GSrhZ! zs3mX509W)}76z1i_X=-C5URty`r9p9?3lU?tIB3mNN>y>f|w;XK3kSnk~2%p(A&h&Jj z=?GaQcBj9DOW!<}QI@jqb+`U(_oE*@3gC5~QD&fkOH3@bcwBCY>jimbdO@`BY6|}_ zUGuHmY7A%0sWjgl;-IYYrc_J@#!f7kfgSqek=!Sd%b>sfI8y$R*dHguDR!r^YjV@8bpG?tM!i_zv z2pomZ4TMqO5Xj`Den6B)>DhKY(s`ay_))*wusnlNYJjfqH3A!zReoa!3hQgNCQsSo zr(%&d17G}~N3R*GkNWv89_ch13>i*^Y6m{3CHzY>LnJ^dXB@kmIq4@%@AK#2|5z$I z?ZN;Q7Xaa1+D1ytkhkklU`Zwx%MqG-*X5Pm-u0D z9n3TO16o{EC!!$5O3%!-Yw3z*$Of><{L&+3l^5TfLP)Uv+Rpa|LUd>OlRIk`z2@?Q zIdamlGS0@>_lm-vHjO(-VarF~46>WOruuq)dN;^qycX$-I`w?qP#gBm+(Lr8S|&Bx z=c@}T-LMOnQ4_MU?~Rc1Y0rBxzAzUT8V(2JP*uHo*}>UOo9!_XFAOfIK0v|ox)1ks zemQy$>04cslH5NM&}CVIew%pW$zTPQWhZSG4?Fty)E#O_qiLyJ(>`s9$>_SO5-P3x zOrWmlQKXRKbsv{SwpWh;3PxAKHnet@t&*w$@M4Oi4>h$U+(EhY}ODKGq8BTk4 z#S+^t{%}IwRTB1-scF*0=MQ>q8WTWHZOtL>wCxEtJHQ^=eC&iU0e5ud6!JaWKT_i9 zr2TBX(0EPX4T-T25YYVjO}6W^HH4cUyiZdYhV@tJ0XY%Nx;PVW0DnW%aS*$5+!S@k z6LXWB*T(nB#ncGNMH!D@51TPXgX;l&QqI*SZ_yy@4uj52ZPJC`} zibG%;FBI5vS-UJpZ_ZRJXm0N>4$X(~B(m=%^z7qW&gW4QEwK?Yy1ts{reW3Mo^nd~ zkfLsgMf^3Te`UyaKgLU!bvoCigKGHmhrOUv4%a)&A%C@Gkl5R_E*JksIR6I^5CfY8 zzx2)Ck--~Hzs$2L-p=}xTe`;Nf+)}`4uN8Zz^1@vT^Z&|hiu zU$GRFu8bwUz_vXcq>5buMk$ijC_>u5zL*`}0#G$A;y>Jn8xJFY8eS%z7(N{0788%$ zsWwcqYipHkIgP=(m0Bp)PoZLEAf(1q^Y}0>a$)I%{&yVz6=MIr$UovQHnFtdXW@&1 z9*`G5XYk#YUW_3rWc!m#^^k@M@Hb;;wsA%eM9KO6GMSO`R-wF*Qy1{*=HvoyL#x ztI^?I{;i3%J*v61pX&nezydF=b)wXAp^awySBFHB%(CnP6}T?`>neVgIE*U(VUCI; zim^Hc8vDt&2L6CCMiN9hMlSAmIjf9@=~~ay_V?UH+fA!ewKhlJ9MXJ?RTiE_-&sl6 zE%JIwVC{=jz7tP~MN_gLEBXVYSz(X2INHMR@~Agi(D;~YH>vb^PvD?Y1ij0L*z4s~ z=9?dUqb`4XWM9u?zc0x6-4b0J4f|8>Wg0QvIIUtYj+Uwn{Qcci-0yo_^QqjbMM*_& zJr4{K`ib<9;kNI}mq1OX$XV3uG66&f{a|W;cmFWB|4f(?bZ|*R5^gZ7fya6Tj|V6# z3qxI1M5-b?c)SUNYo!4Psw5-7O~_%KK@piLg)*`HrAD!XSqcfYmg`51=CPK)O(||N z8iVu|h*~(wgV(*K?YG<#%OXp4a;W$C>-Lfb4C@gEq|nBV@W6E8;oien(a^6~1||V+ zMCcf?CieEBDk3*@WwK#f0!sE~gLX4E&%&Y~isY^Qxo&E%r^*vyJ}!ud%V@;f`P6lc z%@Oy8lT}`KFyp(iV4M0N_S(5Sn7MoJMvf(H#TnUIExL8`5mv|frZ`w04)f+?99A7} zX=_C>oYf18E*8WxB9|oOZ|#fOXU;Q1Mc8LSq=^T5a)F6jn`eFU7W?>}6A!`6CA4k~ z!}ORcmqki?zT&g4K~%;E{<1J5H@+stE@$V~a+M>2_~onjx}Fq{C#$_T2zGdn$Lv1w zH1*O^DUX(=Doxu0_JMt158gr_Q>+S=U2;0;H_gq9Iu)*WmWPfFlp=9Jxkf;=Nr`#d z^HA-dglG02GU3t4NcOX*R&a*r!-1vD6ioANDK?YhJO;dOo?XvJ{yLnCj7xPZ)o`PO z1Vx-5dJ}Wf=dKDfo#UcpbZU{8EljyA4h!|Nj(59fMaRaMBby6Ec%KJT7!sEY`t3Do z#?Nv2FK%jF6?qn_pR2ytXREFT1UtG2l^^k%I+#7jAMYMZWXQYdVs$xYJPlthJ0Pj9 z%Z9*i&sUv!fp8d&slrsv$Q2EK7AZOZ{0HmZp@=NgrR_Tv<{$kXbv!AuZjqoRvxaog z{J=l%a$7&`DNxgvZdf9v?I;Qh-Gr?m9&1D0m-n-6^J=?5ETfW(AH+dx|F2tAqrU2K z2;HeWyy!WUnkJ2~GY}nv%CykP(d)Av53T+WRfpeP~jBEMQHn?Gb zE8asE%WV0i>*;Hwr5BiWrr-8Z+Ch=J^lH#JFhHLo$)T~q$zQMe=_gvlApvHTMhw%f zll4JuWn~9Jy2x9c+kE;9oN}mPx4Yq}q6t%%sPtwkEo?Wsdt)b|hvgBp36z zJ2)Ih>g#b<2WRj)Ot3jQ#?AerbR(4AKztld-8Q1T$oV?cF&+JFeZZ(Klnkq7JWq}K z{0l{iZoLXy1#P}wY;}<^~Wa@Fb#m|M{pZs`S$W_nRZS5soe+3V!wX)^`p6)I!v$!+`r2k zXdc%l3`ti`*IT*QBTN#zKC5i&O163osr}Qt0;lqsLBsdRYO&`fc_K&3bq3FHv-S^9 z8~U$u4bk5jHj}c@R#sAl8qseHMHcw=K07o8l`e4f+%I)RZ5o_IULO#VI5N7sBhC6r zjn^m*B#woR0yCZPe<saL{Fv-|A z=Js}5<*dnHyFI&N+4a3&JSoWwmOqc*?Shz9(zL*ORX&6At$hd4Yb~t&5N8)>P1M7Q z-BB~44uAjQp0su_Rrv_p=)zTsgO#i+A=G#NV~?|*PRsJ@ej;6lWzJh;a=tH4tKC<) zi%clBr|ZA9_$ps65YyZm6=WHnj!!pV{2V@C`Nj}3TzZ9OLs=84pZL)Q_6?W6Id!hA|l@6V&w60qGEYjglMvk7h3mhBGoKNV+HUsBdXz2v9BDGZ0aV z4%6|%RIgCdKQuz^A|UW74(2yeFYVoLXKk@_QTk_lbEIN&QSzqO2XfR0Qn`Vw{sW@8OqR zR+b;qMWe`gQ`fvDFn$v(cjvrT*-U$C*R%pd@)a)3VX^FD?j+9JHo#GyoV{<<3*9z# zM{pjPv?Ma-I)apE1L18&ve~6NYgkMbCcldx)XtW4p?@oGLb?c7ZaC*1wJJ{F3Fz?_ z8clY0mZ<_^vkTz_H%U#Dj4?S$=5&|t^gpRrJaQ~I@S^C>_(=X7mYPlx>H{l4p@g2t zvli&l{+799D)uvZHARF=GrZtCpxf=FH#egi^eT1A=-PH!V_=%|gqwjcTf4oihaGX_ zaC>iK2=3^(6b>QSZA8nyz+(t5&J;!1*_O@8du~$Bf{}AGOtM>9hI7~e_#PtLmb)IC z889XXu*e)(s*LZ=od>0&qB_9kL0^Wzia(HAyTIE~QUN_X?OkBXA+U7ON6gGkh|X2? zAUx3RJOC5t(#$e)3``Ph>mkl?mFMJ!In_!l>giWBdqlB3Cny%W@ViiobB8xqRRhI1 zYv+il;9UMjGnZ)c;9J7X50qBoFDML`X7r=rTdJm)1;;|i@)r@%a0@x-9)nT}VLQO1H(5|WG z$sJT#rp%}2N2*Eo|{#_l5^JG=I2|- z5#cHQ2D)ucZy%JMn|b*r05@@WvDlx_R?T8lc3_NNhW`yT=^|^YL#)(_Z#JC0v9A$0 zvGE9E=jH%CJv})eg^?``5O7lnyMON$ec00SG>OE>NUh=~WoHydRY(m}!^F%H7WuYq z{(G~^}j?aeTM#%m6f)HUKe-y>WkZp>el8Y=T{XaaMZ1K@`tsloiy9}sg6 zilk^OROY`q5Feb%>5#ML7c-GYabDySlgl_brZHCNiSx%9U13Sj0D?hA=?&YLhYZ9T z?$nvH1xZWqijW@Fvc3#mEb*H%i=!MLsJm(J+AZ*E*8R{9g3-g%aC6wG)AUwnM?}?tB4&*C}5EWYZEt2ai`Tr!~jg~=Uuypz#yLncDM$kS% zqjgaZ6^I^HbWY58N4rZ49OzYO`RFVDA4fRCZ7Zq1r7}11LumOqU{n;3Gf+0+5GoBn zld-3fb1h9qY5~(HP=Ad{6v?^RZrG=P)fk@^h`zZXU-6HVleOY}=y4C`Z++{~vld9+ z80Avqpg0(8Z$ca!*8a0CJ&0r3E3%XkQHPOo3~2jNdLH=R&$B?K{r71vQ#E)A=YuVp z*P{@s?64SdQa!W+sXi*MxxKM1Jta6!eE2;=Yi4mJF9idiyI? zKu4LhG4+u;aK)yw#yz`P11=J;02 z!6vVq7^91y+n@={IDF+RYVO%lZY@B`^jq7hbLjpYjO_~X**BH?e#oW zwG<=9z7o+D9eRC)d)UT6-oD>dU*mBrdQ`{7D}~-IkSU^Ch)7(No%#VG$?qLK0%#=J z-6!<(AM6FxjkOV0vMg<|zbsxB@)J&QcoXiDP(5C_9pUuHjH`ibjxxZ}(T~?Up;4JxFkzeoXf9|Ct5 z4Ptw97c@X!u;LzMj5tz=Gko;i4DHg)A-sdC69r-K7>}+uVmDr1oRpLQlPqd0U@Y3- z1|t}hX65{&m+JnD6sIMB@t9^d19UYr^0={ zz?9TVrYA3%7#Oc`>~qb6F0zz0b_+CGKapFBb!nyMHCa8BjpJ2c_HXZOx4n-9-7&mq zmotJYNd5z?5x*}ll*nS>+sg{1jx_G=aTZ#NdobO3VLcyIueI!0riDfQSF?CK7;@Oi z^x!qVO2#Np<5(x3bHcSrVaErm;Hpe$~{ijJZJr{p*P6vCs zg__pN)ItXlr+7jII>%G)H1_6+H@O6;TS$P;K;4_WZsHG%?_AubOD!w6v8xxMP~S}m zG`#vkNC*lk~x7s*B~Z zVr$(1h-gCpTV$PB+;&T~uCx*jvrlxUsKX=<>r~|8y6-12^f7dS0>5Za%*0ehK$qO~ zMht@Orr&@DHVqhmhFN-6CBg_=s(0d{Is7$wvsc)Kn291<-FkK92A*&>w`m0vw#3v~G+ty1we$Dcu8HhdQzQPJO@c=s7wIRF7dLQ)x zwjIp}>g>oh7RECUP|dUI2A2qqzv=^=j06lkt`TLXekEM*z0)}#$Z}B%`prxd0eS({ zUkakiYKZc$7S1+&`aTvjLlj>bRe0QRhh6p+O5iq{)*TJhg7kmNlvgn5Tn=Qp#YH4# zzLEolO3IeKv2BK1t&3UZ<&wm1*qZ`DJB!{foWj+jp+Hfq2ulULle5+aGd`&Sg16|u z3BAW@NGERC$PL6wNbH3o*>)gS{8F4NkwrmoCEgf#EhKB!k5k)(EeEMUUl@)Wt-L}% zMvz3@oR~RqsJobt^=jcj#pla-Zwu(K2s*m%0E8X}nz@lljtHk* z{OwUkbBc=6{Y>l~2yKRuz#-(DoXD4jUs6ZQc(9!M;YpGjl#FK^y>z?eVPRb|lS zCjN7?I)xW>=7tEsrV=T+bUM#au?pDB;$E5sciZF=!Cj^j(ZK_Gg+IkbbKr=6JRY+% z{5*IdWmF1>?zJFpN9Cr#V$wD)S;21HjN%f@@&k^2m zsbsccD`6=*nODGm%SMD#goksc({k})zPec$ z&L`-XJ}4gRi?{Gj%(QE=E?tO=lYec8qkF!Zb=JBiW!F<%Z5E{u5K9z)*3w|Ph}FNp z*NImm4IPX6{KN4thpfaUb(yVwy}uZjmIp64E~b%L67%=Y`8omaLo{E#xXJo-FMEFt zBk=;!+x-F1Ydu`rMxs)o_5NZnbgZiSMR0?(3#w{B`l$-F244a6i4orneUU?mHrXu~ zCku}I<6mIj!kIZ~IrbRO^QfMens$<&g^nkZGSix!0$R!&5BdHi zKkK;JAG5d@hW_fW{|8<(P|`7*IMg7Y`*$pZM4*uX>7FQ z-Rcm)*3-mm5g=#eI{5O`v~#kUtx2SNV>QKBe-wX{Pj!oAyosB*0)QTFi)!+>_pv+l z_FA?5(&=7ClBG4Q%zkD050>;Fsk~?LlYTJKi{24#0yR*eyC`l@%DoWE&H{<(4&*+G zvWGkJQv^fZcMwdHL_m=Dd=AI!|Ni~Idiy^bGwi1IFuSBz=8@RbgU~r+3hAkoj&;z> z;ZzT&)raxOqdnA?d7rr+r!V^oDAm#z)@^nhOeBX+vN|A?hyO>f4vDO;4wqINSM^_W z{A`9!M@N^&E6iV>YV6S2HRIr$RCc)6U^zmwF>B#V4QG{XD4WUgYVPc+9Bcb9^_CGN zVr%A>WMiJP<=$T$<)Y(3+-;~g{6iD(;iX8=KRP|zw%{nGE+)_>rfPMX^EHm9%q!TH z@nZrMtc7!o<9JC{^W5rq)e%Lw4=`7bgI4vJp=s@3NFeUY54$9a^u~|+&wH6=Ty9qG zfClueY(E>GwvMasXx5P<37oE^_}}YYevGy86A-#Txj*sCtLGuLLO;nf`b$p#Rrgmg z|KX!yxIzU>7M)R8o}*08vA-I9+(;a4@2X$*1FQPqasmPlaJD1}jQs6la=TX1S6{YS z*&Y_KKe|vzkiatmB{t#o8d|m(N!{*sd7`7`bRUq$CJG2{f3iWz6ApSw zhBkzJ()D%l#oumCizYV5wg7dRKxloho-sjI!@i$(mf!2(VWQz-xgB0qgy5&gR;t)e zZZ`y8X&Q35c$LA}Y#j3lr6FBH_>0E3jblmUZ+s(H1MUdg?jMv&TL}u)!J_dSH4@J- z#(wt*;)|XX7#Hfip$6+UoO6K6Ed$?|@2?%V24!RK-n~KE{LA>5`U8I>FJ{*84r1wu z_TE5Y6|m~}wYg#9ekR^`Rd4nWWBM$hy<{TBZxf3d;f*jMM>t=ofke-sb=+v8T3))F zL(80IQ#18eZTq~}9`j$G3D!g$3JrurdWeT8 zdF%rsXP*-{obB%{Mcy@TdY19DJ%MR7Y_49lQ;jPdFn7TG>nKF2c}$sQc@F zut(1h=PSXgqX_tL_JpqB2i``w;L^J$gjE2iVZFOl8=!gYPLjRFX|;ndo$N!=CWi;p z_*c!Kye+k*CU=aH(RVp_U;Lz7{Gb3ra<|uQp>s)^UtS8`q$AqM{imn?Dt}VQda|JX zO-wB9azp*55`6u^%a2@~?*io^M*3J>I^pK4oWXpjJMdMNpwkdY;=SPOccr(}7RG5xBJFv4Y$6D_ z+%W7%U6KFg(A8mYH?t`It*kYk0vE4qU>0SB#!_4XqQk4bF8cmU#}m8b`v|S5DrmT9 z&Jx+PT+@CYmurpQH~;f?{(kk){Ifl_?KjdI&i;O!&sQ9Izks{P-exp3Ea;)BRgbG6 zYID?QF(d04P!(k?ZtdD}OECUM|A*2_)j2c8F@BPzytv3l% zNTi#QP~5DcR3v__THq0c@gJ=Hsm0`6O_%heqhK`r$(K=tNcs!hw#KxWpl|^BaxFp$ z26RhMPV0hM<3uR0R5+GI5U43aXg(dJbT(>_m*JKxg4FCmA@^w#TZ_OSaN8-dS+X5I z7tl*G9Bg+9gr}H;so9%$2(RzoSI79Xh`ekeOx!_gN+xiBGMrW?^cl)o3rz~IhneV!C>G{Awx}PIk8Yrkr zg+t%OrvvIg;Za&f)v~^46NK81&c=GdYX?<3FxK7frUuXCI5RmTqWNsySiI|kAl+p9 zTO3U6FAHyIu|J{+&e(uZJ=)CRD-IQ#UIfM=7F~IwSIoWh@iB4JTp#nfdqr!H-8u*itJ$0%)Y!y z_N}2Ln{f!7Z&kiBC(o2k1%d3vUM?b5X(R=9XH#ggBz&JF&hJrYBxcDvj}-VcBh=E# zQCt(_&(5Lw!}PMuEEkRRSS6P2a$`{ zoJ2S3WprJT+3LLB{~ByhbDSf6qI+pcxt#U3uT{;!IKr&m7`VYYi}=>8UTWz`dDyYZ z(|ecfHST<^vxH{TLoBB@+G5?9>UI98(m!J)kEQ79V*W_#Tkj|qGk5K-<}n-WOAKE7 z7d!^l2qgT&XTq-S!h@M`Jgo#ggo*Yq(6H2d5PTah$vHjXk5 z@q(IZv$NAGV#d{|)W$;3a(A~C`Ns`lV;|^M^Gmc$8`nP!?adey$f;$?!Fp_p{RdraS4BC(@p}L<>s!Ri|WdONyx&9!Vg|Zrek6BS%!YXh%0UkKF!Swl;t- z0fD&iz-Kkr);b;(mc2T+>WsnVqH8x%yck zPH4xb8po;)QvWEHlhZe|0t=-TX2Tb|uKMAIy;S9|-0dYu!o~G?b=X%TG)@>Bw@}(1 z`r_ACg@X%z({CjauC0tztoLC+VoVi;IZkRGi&8`F%1A`de8d@bxr;j{b}VlUy(~9p{_9`Sn`rD;@oe-6-NT#-B4( z-?*>z#z%aHp%O1A@mvdjZok2)YncaOrRwO+~jou!5!a$2?z3N_v*yi;_;IOK599L zT8*BS_93;FEUm>ytas0^{4#ZB$UmXNbcb05$~)d~u{7Ps#C2(5w!L3i3S;jMKfChU zJrGqn{};8yBE(9T==pkznePh|z~T4n4;GYwR=VS_9#Z4D;oy_RSLu2vc~@alB?#M_ zO&NYuCoQBbaZh|=*iuzb=z-+9UaCnb5|%bdykS4ny?19m{M1YA5PaQM+dw!VCdQ)_ zp!AMS*^}1za^{$LqNNnN%Mfs~oow@D{mw!pa&>x**0U{` zi)g#8tLy5m@Z-CV9ZYReHj1Lc&ZM`r{=)bQEn-lgnPrhTL$Mn0o1|E*1$$17fuqbg zy2Ny$Z?s@Rs_LfM;ls&VyJ4{f^($lB%MzXt5MZbF4Iz*m$d2-`5rIluUszpK|yp%;w4o69)9r3^@XolLx{S)yfnKAEcfS z&SJq9;Px(3@SCZdnD;oII8+9`QrZO3&mtZsgW&;?Vk_EZT!)6%dmg_^kg|3tp7D#A z_?X3_r`_2cCw+w1 zOuI9PxX35^d$Y}rVdtSk8Kr2t8^_kr${2JRQNeJB=FD;K<#4q2N3^z@D`~kN2j7Qv z8u~xX7xiP%a14Nmv>DKMDz4u%;-b?{^9@X@n$J226TvBxU(5lsgLIo`kNno>HxVL- zc0$2QuebEhVp#yB+wj;ZgBw~fvBCJs8JMBbxjpdFF!DsktAo3gi>p^o)13b0DmNH( z_)m$^ZTLMR_}kfBJK@u%p*K-Q;Tc9l6U;#2O9SMl4T-$$10%iaf^I z{4bTc-w!-T@8Ts-r>L`x4P#$#0-huQjdmjZ4gjpaDaSi#P75F2TmIYM7Y~|b}_dh z8exoctseD$jA%5e!mD#|mcx=uFm=VIs^B$yv|*0}gh>bai;B2nWT6;V0$^_8m`uoM z(C6^bBB1dM5>CJ^7e6W4`r^;MYXS?u|+hPkNngdC>~3Q*A68g|oS(8T&6cmcsT|J)BsZ(U*$sxlus&$}UI zTo9fCCRX7GT^CJ%+u>mf_DSlKN9GsC=f(s>oPwZpwQwxCzjtexDPfj6{QEraQS-OZ zn;(aF-38r(F1DPoNP#jn`OS-_Z!`tJ^+)#y7%D7m> zBfB*$d4#aRNHlgAV}Xa!B2{37QUvV%+^WXhQU?qUpz-vekC=?Z0}&fw1}w4*a^YE= zI&_7A!#^DFcP&G9zl^lL@EgV9WmCPEAqs$HGNXZda&9ul-aangIV`;qY2WL|Xo3Nf6880$aYBBvM58U4dQaE!d^e@^Z=;z4{g;J$D+Rc zX}@maJ#R`~&4AcTU<{4{jKK>{h(*1PMAwWtU!%vzXP}C{@YKwV)PzX%pK)B(9UqdWCv?1oI=sN{S2*zQ5TiH|Oda*+8(v$9-omWj_t>STsCl1Hy+YqQxGUXoF$~ma;9V~9A z2n}JC&|Cj1ky9}jNebbiu!U_EL%qT}zrBawqQDrc;j`Y;GQFu3!BkM9`(>AwvzK33 zLii4f{{~bxbhwfwpI-L_w;c;E$0tmdRQ6w0F&aGVp?hSw%44 z7DgteW|VFVBDLAH(9_p@b7&ztcbx9}SpDGIs%Q-!Lqq$sQfS>_wWy4G-#O$f*m?{> zwD41LhO|Vz9R{o;_MY=ZNkt^d8}NGS3gM(fPGMc21gT9Z#MURl-JaQw@-ljKE#+CA z`-x1>*1NX8cg?R|jNmu;5WRKI_SI0)k${P}#GLanL`@_(h1>f>U<37>edo!3uLeA- zvO1Vs+8o+#4t~e_l+5IKK+eDJ^1Y0nQFlmAUN$BnUXLgVRe#4kgAPc(u)`n$%Jr_b zL?v2AM^TwelE&AtI@;}!XPH*kz`u+jnO48xD_S3%+zd%gG4ondz0b)2M95?&#|nr$DZ=`p%~;Hl5^9 z)7yd3Dz|ACHiJ6yxN{0(2$v$25#)iOr0U?%sq*v$?aB-n5>#(mZN~kwdHO>3(qc_8 z)WS0^FcyV`q+9jeikb~=$1tXda)`hpV=NX<0G8-#e|lbBuQ}C$Za%A|%M=0cvDhv* zC*u9x8oCGyr@Y?d`L=y^NOvMMU7WV=clYG&tEM>HFAV7UHZ+fAs)|wDdH)+m{LzCT z#&cEXI0@E&5!AWiz;s64NYX9#v!U9LfUxGc)q9^%u#X1=qv4#76Yzw90A5A-n|5s$ zLkMHr+g^IHJ@b?hCbw0hHeK$={I#56HWWyj>%^nA5lbqcCM_DCet+}QizPn91DqfC zd2_BP;hU-fk!I;cs@$$w*q$v}mo9(0iM6f$8;0TSu~bO;m3+MoWqkkiYGo@GJ`t*T zwSNIs@MM;QCSt;~Gc+vBWP-o`AU)$$LKH^DSv!AYQ}%Gn9CaJB|A#WES67lJOyK{kzK{W|x_5C;$JYo$jvyA4Pw5*Om z?>FX}Ztk^z0}T+a-@7lp#+l2loJKsVs&QtO?NpmhZMfBAt-zDr%8TaC3kv!l1GS+CSQP^0p?I39Lc)CQ`~M%8+>6G91rM2;2z}_WGc_wBzG+@M zueaQ#+ljAcM!_u14g@CCBX6j1jb)-dQauG+(53yWJE#9cYqPzP!*+6~0>1ybpj%Y{ zMLO_djGd_N?(aMMW{DUm{1Rp?Hmkl1_zDdV;up=vfe|+kp5z zqG9olUdFGIT1pXP>S@t$RF^MSvOX}M%C!F`9QE~eOC!o++>)HH=m1-Fo}HhEQI#yd zp5yT{hmWU_VYR0|=*^ctP!@?O;tU+5>J>&_;a*+4(=iLaX|w-7&7J30Q%&3UMT&w* z5ip2A1f)n6>Cz++Kr{kULJdmqg46^72~s2iDhdin3knhly%>Us(xmqidZ^L~y@>DT zxvzIU-s}1Yo_yFJX06%TGnv^r&zawGjJb^sm`3fnRuqcGKb%mC5cZ^ z)?nh()0bXlxF~}|{^atXmJ^?d7dhD5Y0u7;U%QE8_+7(aPj}!Zi-&1Ud9&ewo zBMf3?{#XYr&dZ)OeXS7I`I?fsnk4Z1mmTa6nKTRLj=b28v@2sdn5s?t#-riv z!$VKU5BxT7u(kRxo?RIbsu=&8zY_l=i*J$V7PRG2?pmap4sI1^Uy^KQmZE~TN=QNH ztIa!YjTH^c8%_j&S)-%LyeI#xf^5IHv|9@55p7sVFc!F5oBnyN$9q>{ARs&~LqlN= zV&lYx=Ty88hyYI3&dC!0iDg7%$PZFgiZ6?)AFj1pl>O|E?iJ&FAv^iOgF>}Uve#Qx1LF&Z9GjvMGva~_iX*cZ2`(H*t28SS$q z_GEMB@+$6lcc+mViXzvT>m@QVRdyR7K;71&OIE4+vY(ODLiSUl#aEwY34-vIP9s|j z=3MNqtZ7OkZX@*}BOSk37iWKq#O(~0-8M>;NHFdVUtMHZq>yGV!%vlGT?*UTl}%px zFya-s#p$6ZR{{YS-}D`+-;pe_Ky@QH&mX3c{Kz{#UMM+KHEz-mKKN-wVQL0URcsNb zW#cp!GEnik=quYp=1#l$IJv#0iO`tXGrLn32J-e2`(947vaO}^U(|0$T1jX;O9^*W z_wgX?cAb|~r53@QV_%-%Yw>Je?3v0Q=^VX@)R~8Tbk|&`R&Gc6c$5Te>Xep%zC1(# z$IiK}t;R1BxT>tx! zbGpiKqJI$DG&W&6F-tN=Owddy=M=Tu(;gqxeyzXQOI^i=I6iKlRx&x$DWI02|B|&Z z9dUISzkv=-4iG z2VxuI2tCi9=TC!BWelIi6*;b0)D~A^MXE(mqqSr?Il9PTANb9H zZV?yYkTVanni#iUu6()im?}EKo>3{g-z6L*KiidB4#VdiSr3*Sx7&{9_}uh;^;{p+ z^qP$b?Tkc@F!SN8!*oZC%w&2$nTSoL4n#JxE4Vw5?xx%uW!4OEqUyL*7+%MPhlv&c znnb>dCa^vTiSEaF^ty(ZpA~tXwLA=va~=tpw9hZ)g{H5MfJpKZO>w;n2Er~;y&E6$ z5f$m?DFd9I-+OP!Y56ti!h9R?%rMqB>EVkNyt;bmVqi@7yHBiD`f5~X^D6AR_pCZm zQ|C{VfPNL|)sxa2EPjU|sG{nH%82XV{Shf~Bs$F=6EsF>$vnjjY;b1w9R zmb8a!qKnm}|AD3l0u}4+6kf+e zLE$q>gg0<2s{VBm!eR@s`_J`}n)AMYMsyAbT*1!E{#2|WZK;FLW>>!gorx0xZIuv@K=%u}EeVLUzF407w{OiX$*mE7ez`i`VxC%Rh3-f@ z5bjPy-jP*JqNTxUB^<80_fz$48ih~~!TDY`0 zFBxJj4B*lzx+Xq1yq094dx365w;H`(poIof;y^pH+teJu(fU4dYt)vR8^B$H&){$% zIa(x2LZSBT4O6)OowphIMWzV6!g&GvS~&>AN5T;c4yOh1as`x!jLjAX-+J{HNzy7k zQH1ZqgvS(yF4gNyNXytFyuXt68GDggm;H|>2F&pprP^k2pH3FKAks6x{)&gr+2C#H zeyPLeP++$@6STSrFY1p_xH%-St`l?hH z?w)G(ZQzzTrDNx}(xJ_UYE58@;YuQRLj8{?<+$S?Z+^tcXR6C0xStFIIF@L)Rj6-m z4<=O|LCKooK;*mk9boS+#NXTIL{)Rq_`Ox|Mi@&=Q4Yj6`RAE$PjgGfC`tLZZ$u4X zBxWI@z!8E(5@t~eVSmiY6;bK!T1e#LK50Jh4*A6$YK%?V$6_rO;xER6EN!M5 zLi#w`CoqsuF9f#(g8e{q8j4K$1{Rbn;0o>QJb9FI;Lb|=t%I>9Sr$`f+j>$rLY;|< zP2g0JjjCz;wyn~eLM~ACnd>S}R^oZCJ+UT4f@5PI9A&6z+8$`wvo0zXGxrNR$2YOuR7J78Zv zBNleo7KOTiRq^n3XEf?HDzplDY~E%Cvj3xFuW92*7Rn)BkW#~{UwBXb(gz{phH!VtEsY`_c@L+*KFk2fSVPi-95bQ zCeuZg_V0YBKEz0>eDwx20nd$eofn|iuN68dE#v1qp5C{hua*(sBDriXiM-t+N32-` zo`Hp1j$3^~WCnei%1vms75e4K9I-^0)e;A>%L8|8w2U+9hCxiJqkM$7NuMzTA{Tf0 zl)o_v2ReX1%HA4($sm^I{|hf{5kz41AA|r9W1e1EA}CC;v)6N92r_QOtW#=)TA7Iu z6Ioc)^ej@nM`A5p9AqEp8DIhcLr5qysU-YiW3OjmLsm`>t0YK& zsXSr6?bXO|JQ~Pb{FI+-)dRGaELBKmP zb%NI0g+_9-4A+s#B6f4q%|YIBr%x%nVk`4TBTo*}VJ&JFa!s=CLqaLZywy@9?~(Sq z-C#58p;*CM%lOu|%HvD1FW=~#B|7h4*PU)Sm4ZR7?EUQ3bzO$S&|R2aNXa`;tM}Sg zzJDWGsHHmS)m$*3{`ff$6|?W(D4KD(xO2Sl62n@*OWrVwE`(#%hIJ;PBTc|sk4cfb z@=|iqw{30TnaC-+PNuK#6X||8rw-@|+A!sTUY-Ns97&7J zXr#2T!2)zQ8`XTd_Zgxr_*o+FwY0I)J~ykFe0xOO6Y+I2ES+YqflkqSa{ou$Yh*Ea zpO5KuXqAw$W;X@0&|ryowne-s0->ad#4{^Oe5XULc3)`36Y23u9Sh}b^O9~vv%NmU zg@$(wA|oZ1H(meQt9WGA4()?8jf$kN{ zR7UmvI78N7G)9QwC8|pXMVQ}iXPo4EW#9(0aN z1tpSu)=CXx)!A9Ii0}dk3q8bJzS%zD1$;CMSmtGo|@*ZnrpeOe1r>> zNPDYE7h(L(42^-ARaes$vj=y;YO!rQYBMyWbySGq z>du&ML)LQ&l|e&?{ankc0X(Z=oj{ZdU5Cj8D@m$KE@)eto{HNjyA!A1E#X!lZlu0Y z{`-^g%W+Z^dNK%2aR0%O>D>NINnaWDOWrkGHM%O`HZYT=;4k8c4^k&xK-_4)RhO{d z;UA?A%sfy%AE}u1@g@z#@Ce@*1K$}kLnk<@9gi)s408_hIk9=oXNrzVo4l&894d7x zE6ud(Jo3P)W^Vs?^vHPIDUC7f1Ns`TP*C<$%Z zR@q4zxdI%;2zaRRfPK9XL|?1Im66c1#ci0kh7c*z-bW2S$;kFh^Y*PWS|D5@m;ny1 zUp~raR`XwQLXzI6#CHj?)isf?7A{k14dfcI%}0U*9SUj^PTp7vYYhoW215n1+e^Y{k_DJLsrD+Tft<3Fi@dNOt_UrgfcMG6+I&X#Gg?r69ldw zxMi9o0=%Z&bmpM+X!njqIZ0KC1njkb%Sq;Ql_GNu7Ng4YOYzh?o&42UX7i9r8o%LE z9}A>i%r{I^;Sd%f3SDU51f~{W!+jw7XWRQRdX4-39Q2Uw6UyVyR6<}K@!e%-F<|^s4BBGAxR(4=wEpyH#POkTxwNDHBPnz!v`?R zNQ-Zc@#1FdWdfi_nR%9R%}GQNh*L=PH;m(MxGcg~DZsvBIa5H_(-y(#!?HJRs>`%# zJI*ad%7A3udJzr~{w>S5;N3>4WS9BX18J#>y5DH8e6pu}qK+cNB7t`gjVxvvP?1Vb ztBJWUFWIPU4GeIK?xTq8<$Z1`l#G=BH=j6=-H@3H$BIf5=5rfHuqq^ap7{eP!8!sk z#`Q#(-2h%Py$WN)W`gL#S#Bv)F~iGiLIAD;&lAQd;L%Zl-MzKe9pP%mQm;zR=pLE_j=jB1(O7{0DU_*VmRaam@ zKlArr*FMb7op#wSu;#pUIWjlXzrx*5ZYG>ItFm-J2{^uz1eEvBZ)7leT+Nj;JenpA}E;C&H$KBGL*2CmwdFKGgl?4*U5CP56~ z9!AP^=GJ2p-wA|6GwT+~K1x~Ry-PSZ77rpJ&vh|_EA6SpsOV6oM~|BIBQD4~LX2iU zV2L8|vP`y3vh#Iz0Q(Qo;tYIboaYPR48R&ZKn%kq-sIyEw^K6krY=&AQ6|naF6FTW zoSZ!2`7pC2Gu0T@axF7>wU!ZFN3{^YXdx0Q)HSG@gZGqMDs|0N=d$oUb_V{67w^YD z$gZ?ph?g&J1b;Xk+IOpCaWSdl+;2X^VgicxIQQZ40?(kwZEtSL0^fn)BM{&jzleGZ zQVJ!0$F!${bN~+f-Vzi#&n+d2m}*Fb2ff;CdU~WoG?T%(5StJ3Ay>C-qmDQb?jhx% zkH6b6F>vIoeY-f|1|l|;rlXpncBFTlda~>>C(EAZ8gwI-U=K(zH+RWap|bwEatQar zpa_tlRFJp3?!I2A?n(ZUbdHKH6Qr>~kf6aK!JXh9G)Rz!Ai*Ux?!n#Np^*gF;MSM# z%$?udnY-RvZ>=|fyz|f5yK3)KpQ>GUPIt7Xx)Kg11ttOl0*>+Rwf`~!*vNoo5D=;pupZ4(UgkrZYB~yVIGkHMVRCZP*VlJ# zZLPk(UP(#m=;)}pxY*9lE;>58r>Ey-Ek8d$BO@aL0YQ3tx}KiiAT4bt0zxky-tW}Z z2}#KXBcscysdXQpWe115-@nNN`VpKvvAueNnqZ)~F*V3c_u833{`^r*&5WAbSyz|p zKqvxKv+~bCz(NsUhbwuLdDqeG>gBHfNGx`pe)iz*>fyCi^5oCyL+0?I5l&8~Ry(WG zrOn95*cGVdL~0hS)fKub+2iLtlfC!jM||h@3qt?Iwyc+%oV&LA+ZUz&KVAqj9{{H> zkHc_&W8j8>fcs(t+#bXMrxHs91g1R=RLmjiU(SFa=W%{@!UpDYlk`{{VR$z^q^zg~-lbgQxZNI{uK6~ke zsii+EC8-r@8i^sYJGC;a&1SQq{Gs?`;uWL8{h4h?%m60>RpF-FOg?Kda<}kh^cTLS zNU0lcC_*ml0b)Q|Db=gK9ybjMv(1>TXAcspK*d%I=c%_6uULmU!@|YhdKI7K$kD4% zsmU)ftrxLc8Qf2%eO4Tu*%;*evX;bTEu9amUv{}Hy-ekDp4*T8Yg?K;zo!2V|2w;> z1`_=IsMO{+vuYB;YZRSlj%acyD?Ld`!~s7A8f-ekcN^yJ%&vUVRq{jZHjz|57WTts z<}JbHkUk%zL9=_qtUj1fe_JtCkd48tf3^`wCHlVMnPrzf$`B%i7f}bnjNMyj@U{+_N z9kN_BE(fNjE>|jaO=oy%Rtl+gtYN6m!e2;GESsHv;EDYKmzB*n3yTl$FgDjxmb@iY z?<@|vTwWUsLdU0>mvNIx?q{^}B5uY&BF^tM|IYtO0tep4s>6u>by|SJuo#QIPSR@# zEfs)RIS`|r*_>{J;6YNF#bameA@*6U=<0&gsI0Nkm)f4|r+uV+@E7HhUr5@{V;K0{ zUzU@9GmIt|RTk6xit}%DsPf^;*|3rb(n*wjXgO+wg=qZ#-WjP4mGm?&>5p4;fz1|~ z30aSC=INn>g;`IJA zsrlQ(avSO^7$j8C{SOsHl^7Aua^{uKrNUlDgqA6+j^^Zd+egc|4AE5+ zX(v~{;JXo_vVNOsn0`=UY7M@f%2z8wwHwF>iQT{(m9LG*w9hM5k2i5MrIR+CBRKWA zF)+b=uO8szJW2c1G}_*iqd{}=Y)g1}ASK4JQ{3F`_v@Yy&*$ifIf|X75+9Ks8cPu- zyf=;|W{z*z7IS_QceocQz)z+z)Qmh_uR z?v?*m+mv~Gb50&!_*wz_e)*{lvZB33a0JY@!_q4A4T8JuY#yOK*g4qh)g=A2A`ewH z?$OdK$Qo4{3ZFE{Mm3^5OV&j??mz4gmcZFHi<&qqc^-B`l-+R#!yqA(D+&mwv>Po5 zh0l|}uOb7q3SoX>&0ZMTc!JxWO`Ru`noz>U*8Y~|`&7b6{-UA>uof#b>Y-#{FcJ!8 z!H3Ft+e#DR(4Fazv5{0nq1ZMiG>Sf`9IG9YLs;4JT6;`iD(96Aj?+}cRE|e~9cmARqDq6VJ?ZRdr|Yb$%Xbfg2`QAi zW3Md|>hEv?3#;h;PmPzocCYH_f2ilKtq#!8(~CbOSkBVy%lGZ=yrFhg!neJe`S2&h zkmAJXXfRa5EB^iME48%-Yj>n#B$S@Jvk?vAN2J`R$#K2M3#<+afvcaX_mwvrEjCud z#ts$Z>W^ry?)MdX30?L0k1OZ;ml@nh$R{UMH%ow2Iz%fVoQWk#?kQu&CQvX)m zmPXM}SvKa+pXh6i?~kaKib3k^8`s+dG2DP1qGf0GGvY7aXbg-TQz1&3Lf=et+h~(J_!v z7RL3*$`@k^pPGc_BIFJw;JzOzLQdk>2M@xAC^M-~ILE`bLLXVM`Z2kBM>KWMN#`&m z-|Li3<%Z4pkcmd}!NJ0+jmUcK-P~X1hj>j4V4Cq=IoNvxWmt!nJH~4;?lPh?=Ernf zOwz@j<+nn&{6aspV<)xZ?nNFML}jXmMYZPS-U#1DX$d^7Njl4$+v=Djq@hwLkro+n zoJBi6#Q$kRKMc!J{cvK19CD*kba|=Pg@hXfA(UD(meOQ7qx9sNkbDDdNInR4HPIAt z?os4yzimbNrOpZm*~Xc6@tC!$X45INeAdZQ2bv6yOb0Y1$3*-3hJ1;xN{(Y?#@$pK zptJH~pFg!&2$E&dA{J%S%$0r#;dwM4b13l+@rcZLHODA(+D;NrJ`2j@d!EE4j@JI! zq9jTTv)+xU+60_hq5mlop4)Yf%P?Ev@Wq$NS7O!Y#0aQ-5UYjsWgS;W#MoO(m+NZ6 zYg#X_8?W0nTdQ-*>7b}}S8CyUIYU+XNrerLR(kvIm-)vZ++!5b+d)Y7{zwBZ0cJLl zKPy=1_#1-W$RU|c;9jj9&S0ZQH(fg~0vLPY^B(WL4#xZJ$X?MWO7{Bra0C*9o0TLu zOKSU`(^9^RWJa0%wbS*?iugfsCgznTxBO!~Qb1~mXT(w3e??NP9~~nk_@TIRwN{W4 z_oHX3SU(1c@E8zV5P#}G5IK%VJQo}`qa}AQs(T`|jWu2njBp8jx6&DYGxB!kYw=HU zr!wE$?CX1)rNn?^vaN*VmXN6FzLMkhuHou(b?lfgj^0sVYoJN>K!{2_nROepxW@goRnYzc7BMEA(-IoNp@UuDbgb*lJuVv$+1 z1)iPZ4#cGnD*}jh*<-}tGKg&w6x6*6#-B>8IKW!>vMrdwdLYsXI7Yt~oKlZS)R|*H zQA)Scd)vB*fs(Sf;bq8WOOon)`W-gUXL~;@AqS_qECjq@#(&EDv*CrStOHH{k%=r) zI(NuDy#Ij@KTBCag8gh?Q6m2V!N^DBBg>A4P6wJNAH`KwwRCsO_5M)KYL|bcIOYS< zn?xz{&>c+IN#Onz3j@CmQCM%_QL+@Ggb6g8broReDL^#W+B}owFpPKGGZmCtWlOho zWJ#X>UtEwNkkBc_U(N=<2ESkX2**-z5d{2*}2fxkwwq{9I1!zfhtj5=>3;K z;Ro#iNC>%)q;yU z(d$*uZ6CFfc9%9vF%=@nq-S%R>50%`@p&r2$#`}sI;JXtfk+oA|MFh<)c>sPCf$Us z{%RGI!o|~dTf)~K(6RB|D|})^!o#g6o1ptt51@7b-Rqd`bm^^z_;<2j&Wf2;(j7;0 zQAxY~Tz&UxHzFigByh6%ID z?6AYz-c`V-H${C28h#$fy=KGr+e`~CmzUS})r(Q^({U43nTybz>@mkV0$u)`SE|d! z8Jb%}dE3Rn9C(4oh?g)R^wZbEec7S0>(1rVx@Yxqgj(I6M!sh5Mm=I8{eUTohdmT` z^+o=uh4zx38sWBofIzmi6-4p*TrsW4~);k5)sHkl2Tj1+W-fP-;CcTz< zq8DcPk^N|=bSfMx>zP0yc!E!#crrE(LBY2I~Me^d&$y1)h z`P*+sBh{T>u?bUjJYj`jQ5;o9)m{z}HthXOJ3rcB_`rZR1*Q~Vl+B=oLgib@3TFsLDser z_3~o$+X!M`nc-Wx`~G6hx!!9FXA6EvJ)LjzZ^N(SO;Ft^fzZGho@9PYmNENPV{2Z* zBPKEaCvSW)A4AM{`cfIOj!7Tw5Y?8PaIKRlUR@p>S&Fnp`W{7?{p%Qn(Duimh0yQ* zY~e4xEHUf9SHzbAl%v4E9(xB{RaU)*X;j^8e4|1k_7Tp>!Ch0DG3d%hSDm+Gy+tWu z2C^8#4}Fj6`bAvd#Ik3U;w$67aaM$g47h@=-lW;sq#H20G6|-vsy%kA)pYaJD(<}b z$l9E78=heqZf+en_;pd!6?BdT8p8a~?WER8BoP?`3`!s5hQ@liN10w@gB;}Jdn1w{ z(WdFj@5rp@qo|^Bn1zTsc#iFh1Or|6S4UQ+~btY8WnhM)cuU!2^F@7sIHs6K1Fz{ytjMNQ%mBpP>pl zvLzH0o$xhpNyHYId$5_fuB@bXviXgbZ!NSV5&EO5PMH3<6FnH*Kw=6D z#Cu|mDCq(#Xgnb$YPxbC9xN4*V_f{a9fWL&Up9WHDKeH<_QNj^F;l&#w44@?-wx;H z7!gJAU{{f*G=GVixlogZ3ndO3)arPpL2BF;KQ1teW!>H-|XZ&4pd z*0;H-%3s4UlxQTJ`FxQWGq%}>)W(+s3Uc4x0Dkv%4V-X}w<4K(No#?u3x&cZ(b8{- za2cN3CrKmZ&e7S@hkkZLjrpl%6rGR)R%GJvhRSCT%W$oywdT;~Dq|WZbgy#P^Uj#i zdq(YDza+|D!yJf4!=5))1D)xUio0l$qo0V}snAYXlyqB+C3QAWkgh8;J+2Ti8l2{i zBVGgP{Qa~ zPTEc&fSSZ z!&WS?tfQV6T-~4?`!n3GOgXzMtD2{rpkKOOy_fm?=&V@#J}}@@G-2vCVyshC$xM2twP6Tnpy@`AxkrC5Uqh+qUa#8XOPt1!hOx-Cp7T7;kr>4|YhWHk*IM&A%E(0+iWZ+pKrrUO zDJzOjt_ebOBKzRHiD2MooFcV%v9-!I8LKkBTN?FSrOt^!;M3z>t<(aGq#p|CE@Z+| zgy|FDoS{R9@@uq#){QxtwP5sWJ1t;=no*REk#OKUtYrS|buGo;m)Il>%(B$b5Mk^j zOjj6Gvz}~mnoy;6Wej1O_sN|=><;<*Ms+v-0B8nWNP|)9!U@}z@#4^=BazeH(E{Be zOFd{TW{Qq5wZyZL!I=Sv#%q|pRIGO zGZA9!%4(&|f%XG}+oc~A2#5$-X67EXe9J>T%jIJZgoNbfdJbWMmWZ)Y>QznBhlXdJ5D;_ocgVzHIBfISy zn;3sUBW3JHU+};h+SOJ<(~Q1Ivwb~J{uxZZewFA-w6n_ZRGZjbk}lSk?Wv1iunL*t z+3)3>%GqH99k~*%1JECj5i`M&Cz!F2AzijAfZ>E^cutne( zHLQf4YT7SzO3gYF9zSsVTi||-VO))=?2N?ZDJ^;bO;=ikF5A^?ngx*KA7%bNQv+e!WK`(A{NqTe0W$Oavi!a8z#BtudzV*l#uD1WJR|V0w{ngO|v*;ItGu^BKuJh*Ycnr#6u=~02uv2xS?x{k5QFg<;5I!1t%Ww8*LY-yiy zI6wrN@%jEaudcDR!`GCMaSyQ-IoyqG^46FNupYef#{1uN@rZT301gBi%@x`12y02# zr6-T>%+WqS+^{J14{$$O6F2}no~qMH%i!pac}i1*?T5Q1+|fj4{c6tE0ErrqJu~mo z-|e39@$q#@glXmmQVOLuZFv-&xQ*lcZ7;m)R6>f5@@iMt3W#P`nP7ShMcpKXO3wAD zrOEu&ly?4Rhb>V2@n%;;d_`i>$osY`;kHFNrz%h&1nJ!mN#A584hQLiQX&%A@I#+9 zk_VCP(%I5?=raYyi-r;YTt|BB@Xb1hfHiqw^I&Fw#u3DJ zZ8K)+CX{Ui{m0qcGCm#^CFA`Q05Zska^|2r&E>Vq6qUk>!s$T1{h_i%tx0RyCx(be z1zN*CMHc|Kx~}%p+-&@4TzAxK<{MqYf%96g1{Z4$MNbepvsyJ7OHcq1PUf!}f-Z46 zF)T^`V3j4hcmsSSLpc@U%Q-7c1W!_^TN6i;x%%58FLxzj_h@nZYEQ)4yCzs*iT_He z%aFD()Hcsro2eC*wFaFB$jvb0VvPYPW|A-&A19?4A5d;(g~)i5pSV->{pXluG>dn{ z#bMjH2Ec}QNEe2cQ#xqs*)%DW=Y{4tHiND$xJO}DJt7p-cG_dT)U95^D@h%(8FtYG zDbwV2nR(fvoEuV)QM4;lP>ZtzT=lNtGeX*X8y+I3E#rh-|1Iv)aLP#OAj&M`6q8e* zw*j75{ow^al$>6^W5pFUW%=*rbV>^Ss=B2il4;AerMLPl#5m#(a-q?wL{t*UTvp3k z7AeN168IeS`|FQPV_T5eG9$1AV`H#{sOiW`IYD`J;$+g}gc*qUVI-%u03@?L7~ZdS6(>A;q{ z)G&iz<8!l;uzh}FhJVN%%0|vgn$apG_~kr<<(G6S-Q?%xyTSVnVvM7bB%UMbvZF7pf^`XX>#ZDahIkh zc)bKzpF!HC1EzO!omM#$#!9pWW)NHeg`UDFXOPlsHpu=d4K(b9g-{ic0cO#W5bA%F z1~N?hv^Jh(@XKRbcfvD1V*W>UNgU#9AfB(~F8tR^(Er20A*?NTl|QT%Cko(c_7-Ui zCs9`nI*)a4cu)nN z!IWR{kX`Ceus^}ZKhfGzkQx)D(*Mg)1)U_+sJ)>I2g>)%@aH~{#Vj`Gj^Ul@_BVVWJL2pMa@1r43y;VQpCGjAUc4<1)NW`A*fUSbm-9NP1X= zre6_+CTMH{*Xlv{SD~JxXwt{TQzv?+96utjc=d_H4Qjc-TwlLfp4Bk!TupT{;w-Bi zNngQ87$nrC&_ga;qYn9|*w){pWPCHCExZUdWj}!UyV2 zymbJnMI+A8W8R$g*W8CiwH0X{{0YjdNk0u<;p&A$KdpAF{@z2l@0WG-Kj|l5g=8AL z#zO@WBvUN^4j6Y>f6y1&m+dQ%GvDv)m+SPitre9IMH`8%6Lrq=MLsEu+zUU$e;hDN zFm96SR*rB_&hC(Zflu|<&zXLJ3$==51Q)RZdpx%R$r|?>*G_o)Qv#YJmSgT`{^uU^ zlP7~1bp{9n{)_GPXL40bh(h?v@p9Gs2`U7t1ESDy9OgvdQJ3Q^B1FzBH(%mWR7pzL z|6!HcHZO9)yu~3&7Mlni?&T-^d$HV;iMl9uLgkYt;ekThi9>emgzG;9m)?_DGx80v zor~U8II-^)?auMQHAIoZ6^Q9FHRc46ZirG!g9-2`nnD{y`=*#MXe#YTo)ZF1!K;uWP> z)^AciL?zAxk{DyBTxoUNO+cj_s87}e*)}FY((?fStV_diVSS z6mc5n#_(p0bf3v-dcAYZ_1)ORIUC1Az#2Y{pslj%uDdu*>xuW##z%oL4|$}Yo&Ro%76;AUU?N7={cWw1xq( zCBmLQhr^i~4y%P?^xwNA~Kh}Qy| zW~or4td$XJP4DDxNE^34*ZA>oT-G+_P4GG?2kGv$D;n{7!5Q!r>sySQUo$eMleTyA z4a!JgbNeb{xDZ?0&_l@YtyPngpl%59`i_5>tBWJIm-HqW)9KK%Aw*-d}$^aTiu;B;qjBK@;h_)85gTLXf_l(hEfIQo3@gZS+w zNvP57V%2Cr`FiQ+G#uL3Ij#Vl`bhVU15 zaK=CTbx#F~$dW(Fd06s9JW)|u7On$ME;TyDzeos#D4dcvI3HDsHCyfnhRF1OXZ?kZ z7@snWWI5Sczk3xrIr8)?%p?M5g3uIiC-03QaqSG=M>3*XPzXn}DQH+MmAf_Rd%~|P zgOEL)=Le=}6=n@=Y^*m6dx;FDD!po7NaPUtMC5^{6J$al!gFDP6b+z7%?LG@IVPT4 z_d0-kz2k_j|D;_Q2Qw+UJSgWgdDS>%wD*un%l&dgY75LNeekhuT(Eyb6`5}$7ZD>x zS&u_$B&ErV$aZ+to9Ii@P}lj1FUGmbZlVk_xah*c*9 zX5+*3bKu5nu`|=MqjIMsIZxr5J&0n9qR6vjMwLyAJmUfgbs|umF=o3o|7@7VQ~wp`1=&$uSI6(J2^Gq z=H9k)Qw25}Y&R&@e{o3+jT$J$Dc z`?+dxX0M#zu78Rf*q;lW3-bAOQjqX7R7PK3aU07dbkY}fx3%S3mu;ddhi2^7}%-*wo#h`k!5WVKqTDbO2|H<`k2OxpLp6Pi9L!!;-EzLO*6V+B z_;v%po%iqAW&<)341XQAlE|{Q-9Z#k<6Iw&?M*m3TYlQKuFg^G2(RXB*L`_dZX0^7 z00&p@R!l*Rq`Ii#uCAQr%K8u?pHX7pJJU;GRNuNK>S>8%%`3}a0+lG9&Bz~ZHeC0S zj~qG$uStEAZ)4_;-ZWPFn>bC#>2iUiVueNTtQl~1o;3;rIZJ`kwE z#NTHPJj8vDUHev12*cfYeB2RQG^hJJXAhhDdt0e$Z8;}h@OpWc5}ecitD=yuv>0yh z)UXjW7;=~6_#~s=q8?0rLx~Ud=-rztzXC3a%{o}# z0Npgq2Tpi@(R`~XcGhdKB_+1}rH!M;5|EHMI(P#9v(5@`vvDV^`4&>}{rUCSQ_}6F z$R1eg5d13;PFDBnT1sW>6Jw;J*f{k5$B8hWz)83Ip|N4r{T0B%X}W-m$>WIY0yawl zF7^ydjcMK#id_6kQx>_q>R0(5twym@#664g=xkmdqqXM*?;tRNbs@if>B1+u!Eb}2 zarL3=3B$v|sg(tE7)0Wotf};D(I^n{Hm62ta~;@?C?35MaX(C3Q5$5YbAt{bl36k2 zStTRD$=IEip71-+fW4CP^}T?;e|Qu%Ls@5<{YuMDn@oA26XQW7+34JKASbXVTZ(mw zPdCUsFQF*S8glN7r~c>0B4&R-=k*N=A6=y4etyJZGZtL8n8-jW-Iqv`<;w0i1%{_< zq0S(LO1S3-a`6{O)Yf)wCBYNy822*u_)w~t%POfVL=>QP;sOLFX4cYDiu?kga}^RM zdf`U6%x~@_n-#y-wO_u%E{7C}!ileT-@jhJ_O32RjBlb4@g!BnU%ixe>EE}sth>5? zkpM#-2`_*N>w5=NhiUY|Yj-m!F{l5+=zkilCvYv5ZK?zX4Sp%%RsNyarW}cX4b-tK zoqm&rT&y?_@J4bvah64!;^n~j#3}b1B>^(vx(;}@<}wVVhVnUuP~X9uPB??xF>)Zj z=kS6eEXkrg-Uuv!MZRFXW-QW+MG+Fxzp9G6Pu(BpS-mUpNoGULxb+}9=$i-)&OPX! zko|)ajs9(NMoRAl%TgLQE}|VRp$=j!^^u=q5@uU~n>ZEK{CyL9@*{nua}bMLryD+_ z?W&FyT7}pZ#iJR9UoXV0*q1OO@;XW=k9R;yx<+vue}Yz4(y5!#I;max8uyBV7wN@< zp{<>g7nA9EyTHX5ILp-*TH1EQ`?FAHsK#HQGtB$5pd^G8`yU5bRuIcp`=0xWPkEo8 zQZW`4I63XJ^=+Kd^J_60^pf;)3r%X~)cpMbask%2*?)%}q#Fk`p7>26Xfhx7dEFA{ z4Iy1SBb+^GbfW@QTkN6=slg5YY^M8zw_oSvT^`}|qP#|S;~tpLbH@}V@s4t~^NL7E zl#2y2Egh`eU8Z8`H#c y&w$}VKBhqz9MD2m6;rN|fBiPj8KDhFo8=#)I{@45z5Lr9L0Lgv{->;2$o~S^MOIbD9Yu8W_L|7Oe@Sea?Q6F*b8c7T8-}QEf zZ)Vr7MGPp(%f51hZZ*6v;2m{ezVP|H?a)uv8MyXiK*xU4)vm+%XDwM1l~s_#r@_{_ zTXIrkw?3d8gJjX7F9^x6b7_WBurGXRB`;%V@G>X^b#|f5cqDclwou1!-MjTGS3E4X zjI)<51;#ePdPTWtVVg@j5u2imtBb1HKL$eXf?lP!K^8*pf9i)^5-xZoItpYxC@6&! zH&;=2@3u5^#85q@J#KU$x$;_V4i}QE-7zaP_v4n*=@*-EBiD8KIHmINWa5|PUhJMRfzK)% zuRb<-@b$xcw6b{eq=syd%bMyHKfR3|ich;tY4D)b#F>d)>~XmaY)n%UYhR)+;nN^b z&Q@pXbIQxJvuXq-(o?n#FK$KY^ZuN@8l3gMjN7?fcD?%5)yJZ|o|R+f$bq+9CM_Pp zdBi!3-5lfxt7|Q~H$6DoJveyn-2ny1uhr0IDF~+NF%v{jyI#)o=*}p`Lc}COXccV& zR*4RYl|K%W`FIIbM5`n76EYcQl@94Ae+i&B5-YFYEE_nRNp0U8Mg%k3q*zAreSx)8 zZ__%yBu@zs1THoWQ<|nlhp1STs!_gB{P}qi5sxy9JL~7a^2;pp>Z%d1Ke_O|8rI=F z7n$+;LxngtVeXyW@%Hk%dJtJ*d#5N%sZBm2jtXb%k_xHs?3brs4RqzEgtU129(DRc z*$-{7r!(G^EG}8%RDo@u+s@kB(?K9H&wbO%+yqd-W;P%#pRH~0&TlY<&j_dW4HD?- z=*gBq)WddG^e+m(X~`ebH%UsXNZW63Q@_6Y&9D&2DMu!Vh$GI^GS_^Xn^5N}98I;d z)Dm%FbhWeU5Dt3s3lLSSD}%<%ukIymDOBZu40jq_h`qz~wi`?WkM6U}CtyOFzHU>5 z$`JVlv_x1d%rlw!YlJ3$c(wnF18LOw4u+SmlRxKZFr$ReK&%{3aa;wxQqn&nD zAG;R6rSrPl<|mm_&mO*D>}1ph@rFd252K40C;mom8ow@0x&|F)f%`+lV{>73cm9x} z_8-0{eOD_A5ZNaUcv{o_stx$9KooQE}rU63J`HEYx z`8W6>J>us2x(jcnmA?5ehr|uBvHIVoRxvi;B#g7IU~RKV32gMr{jtq$g=X%M-4;kP zU2y@Kf7N_$Q}tIe*Os}PdT3N?l(inFT5)DS7>_a)=Utq{tjw%fj?v!mO*Un>KSofQM&D}5g!{KiC8cD}Qlo2_ zAEk`OK9*6xZr=>)-GW@6&NyYug&$KY(kBo1W*f0rOTf>jFgxoMv?*uWi`;$(MywTc zsb+Bx1N~yKy)s^S!e^(#uR$9DyG7?w$|K_;j6T-Cl`4VNl<(7#%cSnKQ8oy_5un%i z2xUK@5kRXR7M1BqD>jb84h*K3lQFV$&r?ZN$G`-$xkC$xdkajK2DBRpUNC{4k3%1tE-LYq?WmUHh*loid;Z;(o39;+T`2$0)_4CS&oj1Mkt@p31kkzj4UYv zNYG=6Z$AO6N26e4@1suS6m0OF+{=JZbWt&2yphv)m&Cs?AlKg?<1S12tKK+>MrBg$ z4z-GVhg6BL0BRFf-Q}{}ppC>MlF+WvK!1V0{80ma{&a3cd4zyMkL;6*JjP4V; z!%ys;lL?kFc&49HD31|YKGs5*hXKLwmY#5s={6}^_b8R?UtIv9Rj-e!I!W$jCr0;4 zM=4vt+e3j4jdrG~lKMdou?cI@=V@)FjK=(Q>6rJFJ{*+bdhZcPSy$}E&1KXJDT^)( z`i6p;&L~dXi5d%ft|m>$na=48FY);JM+nRS&7j<;_o&vp*6i&XMdZCA$kP`M(_LSs zVS~NHP7rNpfl~Z=%7vDidig{{$`Un^qOWbyhDx0?Sx^D(*o;VPx5_rMlgq zk{;_+5M5mNT5X^=0a+1;um?Z9G%})p1 zX0k69VdgoiVuHxp1L`_Y5rxB6Ht*lR`KA^>&DTWY`e32Zb3uYaZ7} zojjLP1q&^kl#8rf#JXc+8XOXR+a)DB4*rdL>%;^U5<(R60S4Z$I69Tqb4ZoH=a4Rf zG>;*j)D=D1OyEj(5}@w!{|!U}X&((GY|*mUphKaze3f+$7LyR}!DU_1?flo_`t}m# zroV2i8Noa0^r4TL_6AtJQqj@kg+g|d_t(ilNON7j-eo+3jygybTpy0ss_fF>qnDZA zkKg_&8NU5w%w<`KpxF@JubQ8eF`*AHT(#N_zMFkMR?@JT6I@@s8Be>B9 zC9kY*(_A*b=#K3hkVxbJ!w~C3Z|Qn6J?fa&Qa@_x=(q?(*5^>0=|LbgupY{HO>h7d zeF~)h&=%OEA~u7!={le+eD?V-yzD!KX(Q&OEE$lrdD zzu&MeO0n*SXr>*ugfY|&wU-(1%=Nx$v7WE%ZAbb}_0AABL<18X(8 zMssE=N?pv9i8kZr6y>1GSj|t;`>AMc@IX~YPYzm}OatZq(oSnBQ$p_iYp|s%vNa*g z>cs5ZyoMx~Uth<}7jdoU+SY7l5XQq97`W#`Yk2bX)axAMIaiA=lTS7*Hwcbe=0`p& zfLd($_ptDgq1fxGRKAD>ge}J!G#8tL9&G8QV=WP9b@YK%yDmkYKc_5X=8WjK5J>Zv z-4JLh_Ig}*+X8pXes*S09YJY0tE;59LsLpTYsX%Y;Nuvf{N>2ZI1MFwx@vg6CY&H7 z%nOL7_PH-?zNYb>-ofk#z23~mxm@;FgV{_Mr!yCg4d*Wk_D^fQpQ0S4OK|Qv&Xa%b zN3{$2=6m&wPj}uyY3WacvtRMyd3ZF3Q?oc}v|8Wm%H10q4@Mx(Yjo{S&Y zhGieeB$aLMi!@H=0NdTM-E9y}p3kZ(tHb}6kBE_@fT`z%_1?>`fCR9|!;S70Mbfn2 zkw=9)xyuV~&|0k<_px_rRA?TYuORpvRpN7Bav+)tAXs(lc`f3rA{v#uQbZ?h@(W{y zlyhoyDjnu!b=QoVB>L)=wOBAzVAtBt_-Cql%9M~FUK3&2)|H^f??TAe9_NU$tn+a_ zaGm;U<-U3bCx=pI`t7@D1p4kbJ5|zSB(Tt0aH5dA=-8mYa@@IG`v@r9hU+g zg-$Nf^LU*4zCX*PzY6S|3+N6!d#7}yEy*#lRz%U&YsD#yc6^}53UPEe(iVDiAppF6F`Ou{!YjMNtZ@@2R2ddF(&24bns+N4o z{>A;oF)qu*DXuX4c34W>t#AGeG9fKXu2MNMYnq>AKOGEYI8K``21be%P?6Qx~1j9Hm)sViGUp(WJ8xuoBD zaQZtJH;WZx4E@A?&!MYG>$%@MA#=z*9p+@?&=c3uOvFlSKE9~&PPLGFKYA!wBhuU9)T znbsC)As~=D6IbWR4yf9V(v146ubF5q`}EL4J>`>L=$oGqDA3W}pnlSL#-J z-qc@N91r2}`PbR$2_h@q#^YANc~7B63>o7&?OW6;0T06y>sUqE0pm=x`y$M|CIrS& z6Fd3`S-^I5R`P^RuTu`zs?)4-zZGRLbohh}_+x`8oqZ zrKH>>sqFoCf-S;OnW3AO_Wg;N*b^#aKOt|5e<{M<>+pAB6& z(W__H_J_kxZ>^G6)cA)-LmtLPz$NkmORn}7n@8^i1A&AT8gO{E)7moWp?&pTWqUhb z=z{^U?fXAv@i%ZqKlAmWqku7kQ5Ddi+X;_7f3s;e({0qR1;mP}#Y2-eHYu;zFzxT& zyz{pXtk06!;svK*Y`XkQ_=L&ZEh?`$3_;dOUL8Qcrdby6oQHAxpY<25MbY*77rfe! zT{g6aC*NM{pTsA_s1yi}bUjXx6Jdr9q#q+=LJ=D;A@$=5e&cY+vCq}Ja0sA zl&wqMc)lc)9D$iC+s#FD5ceps!5x&*onLC?SGr?mc zkaZXIpODv+tmL@lAGmd>`P3x`FK@_3{Otp7jm^S+6LMflRV4Akm@aflRJ5y{sWC*e ziYz@+t&;tp(OR=b{zqlb)x{|3wI_IqYfRe~U4Ipo~a9BCHDOt>NE& ztOU5cnQofQmu)dPc`X7}m>H~X9EA^U*z#UaNNH6H8&L6m8%^^8HFR7k&qWX_O$ruN zI`2)(@tqujgm1==$oGA99(^ia>3=??b4RaIq*q;NvmWq+JtY}5^d|7=rSK*bpb8lP6NaN|NXgf%^XZ z*6<`^3s=L6P$A#?_lURYlxazyePBU_XXfT7+4L;uF6Q`V$fsWkDl?LuHsqBSuL6aD zF(v$?n|(ydkqDCb!ycWupi7|amazNg}uoy;*F`Plg7GO_fB1V!{ zaNB+$jh&IxfVKUg1P3-7VM4p`~>Wd^|5x7-sNCf-Oil!;@&jGefDQE*_A2k}#yiB#h40Tq&? z+=gW~T!AEJB00aRXf0Wf>vlR;UPCoHP260%!ugx55cW-RTuPe|Ny$DvNIv35IBUwp zj0=dyQQWHGS&&Nq;CZM9421#^{s|RC;o|PhiX|87BgGA$L}Uoq zy~$r{+tj!ZG5Xx~x?A2A25I(?H=K5OYWfzGRzX{JKBcJWzalUF?V_=;YjJ(U<#;=6 zwJ4DHQ;qt+FHEtW&nQ6mS;06pn>~}=MbaHO`3Yqa znfzW2p{RE8)B@**SX-T*9n=xqL*DFb&~4G0O3`6a*>deuwJc?i@4br5<5jpP5zkr*83h zyBJ}RL@A`3S;V))>EZV0E-l7X08+ki*IM*MAlaj9v~%H_{;y1YSIv);A0eBaL@du> zMfE^bv^N?`gE7S_ez+s@=Ppb?=Zy{7x&%+$Sd*y+RKNuERhrEWh+6INFxl^G!Zs}E z?F?=#tkuSgtG!krZIBGgNswb0M3j8`*IktzYD=?Ce^$=1&)Y2kSze77;_Wc=FmlM5 z3bn_m^z1Tn06N^f!0hXDrFX2;NtD{ma#C8j1d)hpeqFLRyY#YoGT9yWTp9WnYXwF4 zfRK7>#blO1;+JOt~OEYuMyl4p2h<_8J{1h}d+?q*w$(rbIlgVIW~_C+Kd8ct{% zS@XPhnZFwow3y0=8LXl91^P2X(Sp%Qx_CzCKz;Hl_!;;Xk>WVXHX9Iz9c#ZJ`Ngod z8S&Z|?@-Y)5T*oCutN&1yur#QD8FD_qt0XbhB95cN(DsI)o_hZ8~h|t>{b7>Pjruw z6Z!^E2YY*d!x&0ZGq_4=)PSI2|05l(3U*0{ygb3s1rpprt{sB8acTR46!=6CP9PXr z#)F5ISmg%n^+cNSiNE+Q3C70(5`vK~-{6JyO8;65mDAn$onLsdde?_%-o^<=0NVyToc8)3B)BY z*ybJ0?1`2gKUiHIYZIO2?^$U5L2AE*&uH*Rb%(`4N|IwpS|L@G-s(>KPII8c&Ug>*35J*pk~eA_sfG4mX*=M zGG~6SvC}Ep6ma*i=^#pKsMnYFq~+N4v`F-nmsW-2t_O@YaY*S?I!uKtLw@TQg?g~v zkJNg?cX;@tK^S+g%;mR)fAx0uEU-kNN4U-4OkciJPAf9B>wTlNczd)d*HcPNMb%s9 z`e=RqCl>>Ycji{-xbJ7IT1PUrxdAl?;;+p5f2-9?M#bX$PY**?&Vk0k;(Wn^3Ld1Qrirbx-~v@K?eNiv z0RHNQ(#42l89%kDayX>NPmq-Rvg5l0d#Tx!Bj!T-`&Srb%xwRL zV$0>#(2x7U@+?t`Y~;9?Am%Xh|M^CdEg%vt^j{wV-*~17PyF|bxEJOulj$nBK7h{kJ0vetL}jZ|9+wqs9DhXH_Krf5T}DEkIX5?~^x?ejdgC9M&J* zS)QQwC{dNeRZ(rvuQZ|4%-NXHbD|c#?<6=3naB&F-7Kd z#h1x)y#}v1AvP(pZT9mc6bU)2)7;L*&Q&v{q)FPeF8yL*>=r5f z|8SBoJum2-+v-94-@7*|yBpZer|k^aGYsbgreGIuJ~OqF9F!qzc(wbKwo5;EPvcc9 zlez^V!X6_zt4-;XZ2%HRyd6n_M^@Mi2U3x1f;nl1bDx#+2=uW~>dlp{&P7PY-2QVr(-t6N@k?wnq=Kk8fzsalwDqx-B@GGwq z{h$O~(*Zc7L7!YtP|xv{z6>J8dWFjK$cRr*$T*oI76YGcv?)I6xSd?6)v5d`KYUq; z-J{Sv?fKDO(tNStH-(h&g2fpBxVL3*5*7n=vgV0|l&>z+FN^>3Guy>Yji({_{69|T z`V|_mNt_>d(Ya}o^6xdI_vWG$uU&W{74kB_P7{Si(H6aNi8xu`kw;UlrZ3pogh zkU0Y&9aIY;0G~8`OCZW4Q^+OqyQ90xa=GOu?=N$e%rxEvB&*kl%x*7l;b0O2;1WC> zrAnq7_^;ztq}0&SBRqcX?r}0P%=+SRlJ^BD);5ECznNx5{VY7U^U@_2*whXJnHpuv zh?5G;6mxTIm^2W0w<9I)Y0#5UE7>U*f9!p;d;t{WNTbreb%@w^d<#T`zUMOdoz?Nu~2?i_-ybD9>`rM7ttJwWFA;Q@voCPTA3>#B9oE&at*(k|lD86Jb(1Xg)9 zR`66J{fC0!h$2lKu);1}JEIcW=sj=eH66{_0jXY&E8`O zpU+B|URHH9ijX;YJ^-^G_3G;@#kW}zX^_;mHnRw=T%7MX8gTx+dFo%^p{^H}zOkZX zPvIO^tz}dq4VreoDRgZe9@)t#*i?rRdB(HzvS&2KG2P|-@t>Zl(+B06t)KU5x#8#*M>F21 zQU-|MI@70YBaD{DJB}`-=kg;WeMfFJ}Z^1o$aR>AIj9<0t~VQ zZ)+NZS0M6D!U8%DXplJ|;yaUqT49^qd|_n5(Nu)SpeZ zIsA`YAJiWm;^88ix0H#;k68)lbwb9vL#}`BAU1V5QO-0K9(U9-Fx*8L1nopune7Sk z?yOE7<>&214y_O)Kq6Yd%#)yBqDO%;GL*2-RG42xjw8hC>;5@_zo`-twT#**7n9_Y zHtP}omF3=p6tFZ~KY7jh`Gk+X^P5a5w~FUFzU~L86PPDj)L>y+@OR zd8I?vxPJ7Wwcf|qfH)STupy1&KcZgeEJKu^F0ChyJxj5fsz$Ybx9acK;N7I0qOwKT zNIrv)^JS01R-20HaYc(V<6@kooEYvE0gSQB~Z1Yl@nnFgPnRj zaPGXL{?R(dkVfU5U0tS_A`qBwW{GlcNCi(wPfbCPbat#F0gNo6tSRwRo(&QbdS|JB zch@@X({8U}3=G^&5!Cd3SmVVn4Vdvv3r?&~tkUG4VM>y9cIx>vKPz@|Hgx(!#FO*6 znZ2`L_zv|>*kvR5*+NL5>z)fw;C_M5&woS|o7n1#>aek=rcHJ#{rLUtAzuka(XFyC z?I9D++A}x1!K{|-+SWWe`={(CAa|-XwUGRnZK4sU-4D31rl+r}yJ*Zrm}Dc(!}mSj zEm=zY=Y~qm*WIbcQ7dCz5Nnx4OqNQY`A1A$d@G9JX>6j`MXqZCNMsOb(n|2}G?(k2 z3E6<}m`q;+(Q~xKF6R`<4+u{}0zyLG^ZGyXDjr6=nS~{~uOx zC6`;U3+5`~&G&!zm!j&{X8-JOHOKAwXHnw6EHc#>m?ft1PiBYTy2bqpqgD}O5z71& z$1jjVQS%ra96GK>hl!~oX&DU>A4RiDg7?w=i}WfelSWI;XM zv-pCKrRoMQa6yN$=M+%al%i5Hj|o=O?8>7#`oGtYSCcAiyaIor6Gdu2qZURI4N2zw zmXKO3HTuQTnUe1>KG=Q?H0*2nM5?u1e81wNkDx-ft{W^CpxnCw)DQACWZ~-x2!Nqc?lqPt{YKE)P*;4>DKmx=D6w_JTmMY&jumK5ONhLfnXm z$%_|q>P>CdPj2yq82i=g8<<9SC&b`O7fl;74RHKJ z1Ye#h>zR^2;^>HUKgE)Rq$E6xBd{>89pF2T-aPfq@`W`(u~jE;XB8$BT%>W)!lM_d z9`E2>Ro$oWeWvg5Jgls->#mF@$H>_k-xOtM%@?&ut~w*stAT>}^xM~95{)|UM4S_7llQM|Xm`&#^Nw1r zKBN_*7Nuwa*vZ9JA;6UktZYrn>i&RENz#0OL1W@Q!??yF1hbiCMEEoDmlSp7+T!}o z1R6lYX+|>nf{DRQf^9+ZL>q#X+Lp`L!xf{$%AvSZ?(nI3E;EQ=2|kc~X++36!qc7i~6 zi}P})yC4JV>%6kXmA1D>y$$#Ps{DOYQa?cO@doDXj~$(m7qIiYF@{WKSv(P_`e8NF z7b`T6V8}1k?z@3GCl(O9=YvHrJ59bj{(@5vd|CnJKwqFBjoOyvhnkI*Z92@5JCWTaLguK z>J3i)6B}>xG(LxApw*-qAFFLjVIKZ|U-RK6Z;MBc+0?JyC(gb_j`HO9!}7mfgDoE9 z)zsbg*9kR0ON0D*k|zG$uB-9cp+Q{WTg&BvcX zuuAri)VjoBW!lL(3KVg!HY)XoD)F7zR6QvcAEZoxSrA)(`S;IsV=)y!}LYFG|q^z1TkvLD(&>49Entjzvia?Geo4QT4?eL&+4 z#?(e#=C3$LyKN*J9nW$GTc<+rtLNJWKsq_l&L7T+<+`obfSl%@$ldF7#~6UPkssVw zFV>P#lzTchrT#BUIh4DSw=k3F_^xg1k6RxLkgbxCx46f>*;Vmw_84OSkSGRX_aCg2 zvvi6(myLc5aSw$lMPL8CXK`zANZ9S!AP}Q8SEE2zHcNTA*zCV6J7@{gXYJA*5;oSM9Rb@DGqy^FE>3Pdab>-#j>zg$dmn!k`r!ABX;T58gyvv4AL3dh)z3;Pe^W$L`2WRU0FO92LjM?9XmHq1Wm4|bgM)KI zqT6udVAad8TDTz?M@X5wvJd?9@}XLj9hlcfMBV3`L_C90CemOldsyiHxg z{epvmFwDwLQ0Q5`JXN_fwPJoD`YF1y75Iom>{yJ5eWm7K}_T{icxjZjLC@LbE@ z{bJ3&s-^L3uDwwD#UZI3J*xhPOT&d>ovW?zGSfY53CJdKF}6Jf>`gn886(;=*wq904m9m z_CXSP>8}2gn$vs#S`5fEM97g#XfdMUEduR~J&-}K9B~iXRmO<@0mAl+&S4qGH_+6k z%2Y0+1fmK-DY1IPr5HA<3|_yXO=GZp=({R)A+HAK9siYwIZp4tc6WJx!!3L)%Idtz zE-ZVyaBr{+E`P5dO@^_V--{^|v;hb;K)=-o;%S2F|w|D}HT&H+Rlp!P| zvC=HNp4!Y#;wHC%1o*1^$oElO|G0|v=boNtIBgdOj7wTAUjD_QZ{kY;S>*mPa$>$+ znI>0xWnni2sCyF8NvVP^b5E8%qCL9Hav27?$GOpWu6v>c)G_Y7;>>4q%GLP&_9%zR z3Fs3Du2U4yseHrHK4igqMfBlp?0UdMVLR=|bxF1>?8rdz^#W~SuZGxH2s^RU$4OlE zl?)i%!)@89;h}Onv#y=_RnMZjcUrUkIyG{k#h|lh zs64Yk8lR{i5x4xv)P2jb-FbUY-J>;b0KW>G_@X8=$9HfJfr(h5w0>o4pN*!DIkcKa zfr|HM8d?TRkFDE)jL1teG&}8mk<8=Dp8z;wK1NB2@%DaGUeneeLnWPvOHqqxM3Dx3 z!s91}#;uYhJ9a+EGLYe!xdGqt5}roR3;I-cVxT$m%-E@puY$#RZ^97(qyDbwS7NiG zK)GL20=Db@3LcP!!(?O*qf^t!2yQ*ix6ISCf#=MGm`>9OV(^z^Hs$Bw9`{!|L8MURJ17yI_k7ml_C;=Znpt>HeJh@) z5wN0NNdx->J!iP&uWf&hQf7xvR;)Z)m7vAr3vt$w^4;(A zcU#E73qeJ8LL9%BEI}<*;y_KOSa#kB9vp++Eu9hfsvUqP%`^9kJi$YkQOj)JC;JET z`JDOG?{fSmvhvodyo)rB@qtT-;EV=N1NT_*W(><`Nv-eRs;Ip4?!E!LOX*k)6~JyF zdDNHJJ|nVUeM~O%d;WKQZ9%BV&+l!yLx3C!q&3RnOjo)7zNUO~LDXK!G*?8P)xu;{& zonIrw;(~DD#y<;Qy7X@K=pKk)N-LDTZ9?U~1dqVTQxOFYH*HAAmv)-pJGwIfE6Oc} zVrw=xmB{v$xi-?HLUWzNf3^6BbV>n-UZ=PNe#m^5wf!5=*HHRj0)K!ITaajXwLRVI zQ)>A?<+s$-h3vb zDvr!uL@6m)+h#Ne$c4Q2+7{_{rX;Y?au1z5g^3x-(ylmLT%SI9q_>ob4$hF#6x90WV~%)i-7hKsjVXKoQts{~cWZY3^& zz!Swr*Y-Dg9o6r}mbZE9yT^c!gcVlcydH$7<$2!$f%K>T3-18YHc;X2DkDYh|4s3O zJYVT0eT>d2_|*DAl&mx~Ac-=N}d4TmPf z6d{8~N{XWojLG-bLcdZ8!i4?--a|g`2~IJ3W*8m?-}> z7s)m@rt1)2?AC(b&XCh#F`np6zBQQBF76S;2CPt(E{><-)Mo7kn#8iV3J$pXY8P-# zg`_4DSWKZ;mpOQBx|FdOGfovG?MY zX`mq_<$7?3TtTg4p}oWU8G(VjjHW4rS(k&%pZmJUZy)*a`Q{}34NUaATAE5m4SP4} zG#bX!fhA^EVKNtNPaZ>U(hL)|mrzj!fns~-jIUKkj=oKL*nVCYpF^)|t!yq9O^a(3 zs7Sied`kA)(jR(W)r<%(k7N84R?Q&(D96$ns2uiF$OP>v>spf@NIUb$;#?yl*tNO% z7sIK<4}P&?{3n;)R3fLlm>~0yejd~W*{xj);*Wv}xsF=IMK6sj*92Wh=emJ4l~No} zOj-1C`G*NA4g;7VdP6N-Q8xmh(XY2Ac-Vj7;mGDN+pA&$BCl3OppMrcU?j*NQqOzf z!HaNE(s!Li0{y9KW><(z(gHrJrM}SI9y5f9mMUaHxLyak1wj783d~<9rfy%E00uDp zmmH{pV&l`$yj99-Q!Q&bN?#tp@hOJll+&)`!9iKaq)^GI&t_Dw34c{G$&M1A; zxuoCC5v$4(Bb5cTZz0`gLN$C5V7ur4JDg`${r_J)&;M0-JC=&V0&#xjk^^T1??b{6 zBp`=Qvwfs{O0<-T1Mnst%IJCYBy9Q0?exqE>X>qM5XKaa2n1+@hlDKHCb6qO;(Fa3 zEbWT)Ha7*KS!oa2urpVeC4c6*-M0j|HdqBMpjA&>3lrksP2P3SQ_}AYkneU<-u}78 zvl==wRJGs}8I0|kH^rG0Eazf?V(j zbRTSyZCbzd50NAl3KupCl?p}JUI&PzoJi{b<@(1!WxRTwHi1g|Ou>Q&wRkKRZS{qd z_8lr}UCN?trs3Mo`1Y?QEGu$;JUGdeR|RJ3MCj*H^Gp0=cu5w=h^ZDy0MGFh@Elxh z=m)}O9fIWT6Hh~_@+_YViiX%?boM7;S|bXk-Sze#g7gLXH@?&DYsHjB-wjS!jGg*0 zsn<(Wl;_ZP@rXV&BTf$(cwos6BXe&yTbn*GR zzGi3-%cOH$y-PyuQ53asiQz+2-EK_nRzUqwR-xH5mKAY!*SX_3#0#5sLnj&KRveI* z5chfSKg<4q*Rn4TT7zEKd0qkK{aM0Xqo4zTUzh&FFXIV7CQlcn0r2|CLHm_iJB16n z=HNUKH=3plm@3n)a=r1$^`sE@{6fnx7Om}Yxw+ctwKqFczRyLqfZZZ;xB^C-u>7ps z!J5X2V?T0BnJ6#(tCCrrRWqJjz=O4r`B<6M5iRi)SrYf6kJ3kJ4w{ zR|02&bFqb2UXOZ_@m+5)oI>ho7lUi=FbFKCFe=Jqi1p{PEt>U}b*MQvygb59qlpUv z=Gcg^j$DL@i-vr{jkc)&GpKj9l$>6 zR~IrD#!@Rlczhmox@GB`p_#$KKi=fnR*!pd_6lH!-r%QTYU=0o(%dd^Lw7D_+LZ%m zxlnLI`~z+_S#!%}U`zy%x8lax0QF(}NhObl+u2Eoo6SmmMxc(Awpcx9{e<#nrQlE$ z4tQE_CasdjOEFXJmOJ{4xxY+Lh5#&{WAVPsdGD3irGkbsv~Wf|Q{;%zXO-zk#uJ^v z@J&nA&e(6Jw_-x$p`HQZ4)`f;zVYiOd*hj> z74S>df&l@%>&ZOQz*HOkv(ps!e}Lc-MpN%bmvt3BlDx?BQT=qH|)sKYw{M+{o|B0e(Z3;&efl<^J=n8U#G$; zRu5G>CsdqDe;h}TQ9L{8uUmO}!oM&EB4_p4&wR~jaQVeJvA|54jqkZWMYqnrqLylB z``Z0T+VA7xo17kn5kaA^+V8(I`Rah*(p3NULyn1)MPO=MO+B@+@7^9~JHp7%2@V5Q zb_kYz9;hkAf>?5TLoi=E5O4mGZs)*-P50e9oD!}m6U26MG9Xyl7oz}0wdVHGMgVUv zX|qEe-TE@H9nojvo{_?88osmhQ@<;brF|nz*g$&+_FG$Iv%?1+8|0Ri!-f`I)E`_X316p#oe7ZEM?g;)FO2ak|yK++wp!jwhF=b8DL&JbZ2Q z#pNRPL4H%=c5c|%34C1qxC$`Kj`xDZ2x9pIsoQsD5Z^=@^7MS=kV~-n?(j+_R zhY!q`vX`(`URZlC5M-j=5!(%J>kp&DlvT@&Lb}=nr6^a?o4rGBcivrXIgiBxe&Jz$ zLw}Z)=L;z<+jZH1-|m2BYux~P;3(i@HE;#=_gAqzrnt1{1!;`>V?)C1m>?cz{1kJL zM5Rv9x34$B>*d;jl0{t;jA^R?nk7Tix00BC?^@=War?GqIY#IyNxK0fqt^L$a9bL; znb+k{T29*f;okDE98Bv?D+EPPPMY1nV!4Z6uj=rPtCWC+!TTTp|2s+RP^Wv1eU($) z^E=8P7^vcPp51XJ}2>9SvR0%hemj^1{l-!tLR7@7_f zvW&8-nU(!G$lB(p6{H8**P@JlLue9^#5mrUK-~*nM|26{wSM;}%d9&2@>;@rncM1t zN6ziM{Hk(d0l;_9JPRWy4h^M#0Z3kuJ;R87M(GB15B1=q>#U3Aac`Xs==_J`xWmXg z0fqQ{B%Ip#cppEAv0STBwlE&Y7O6~S0z13ixJ!WvY-<3_4FKB1KTM@a6bRXSM;m<( zKM5y?aqg36-!e)=nemEDlLK{&o{dVoLFb~HSpZpikk1fN?^k$s!NMbm#AA_xqoc=- zRcL@3SLdsIu?cAdUBw>HDm8GehC{neS{lN%RU*an8W2!qMbfObM9j)CirIoHKWn7| zvPz^a>D>{{Q%&sgmRo?(y?=dL&_XJ@I-Ue{<@4Km3&K9u7+9}sMtr%eL4;|DURk`R z2KFB!nCJqi8B}vl0*4OV}T$42OP+`7p3z&XHK9@YqNk(T$-X2DTR>Mwze~`iB@$^fr(+D zrpmON$Lsp?-BU+G$Si8=E%qq!5j&~8?MAHGWetvTY8TJB$?!Q(xll6ILA4O*)y` zZ@DcI3^v>FEwev4nVN5)s)6Ul@%+5CegY6bw+xZ8V5UicFk=0atLDd5(k#{maF7|d zBI7~!Ko7yP7-**XY-&QVY&+I7YNOZ*Vn{oVr9xS!MD=PAtJRq~&85cU2KBne*ZBmq5Xi;b+x#5h;Qvhdd>$@@6 zK5Iz4Z3EWMm#g?ncstA-RgD1SeCuJk^kr9H+3UCWff+=_>o5Qi0E2FA3^gkmoBygl zD8jpYJb)Y8vcXC!qtv}pD%|b}tyMkBmN1iY>`Zd+sWHDz`a-TdA+E&{%2Lqn`ajxx z&!8x~uif(z6%-JVoO2Lpa+a);qcli^TG#$v3!kN3L%rLL9%+hHW;UDpZP(Iq zagTnX=TnhPn=lAFnGd}zuF5cmaNlVfen-8GQEo~?Mmp-Xbld;BMtc|jen9Rw@AR(` zMASr)ZQ!IQs4%%oJl%I+on&6)cq{YtVuF`VJY?qA4s2?Iq#H)Qz!+*N+iVCMg*Kl& z{UP{^9WkWNAoczI#LhKq?g{B6gj_vE6@$uAcP;(qcz{)FhIU)wC6Vw0Y}bf_RCj_Dpq zlPbo;Dywh2)BclV-u|X~BBHvgLxVm`>9k)IML1CNP^9@rxM|ePkj5dQOXye^5ihOA zlj~X+#{6Z@kipp;A|9y@OH2)WvJ1SoC7V{= z;;{hes&3KBwCZ_&lNk@r*ynSMP9zp(cKSB=7ODQ6$K@h_ZBUGG6hd6MjEIXx@lIfR znL$?OQMK*q>6wJJhLeIHHIURdHxxg68JNoQIzBy9jwwV?vH@Jh7_rqaWx3&dxXBUW zQFPRm80snON9k@$cb+_k>CpUB9%a)vEs^TO=TAix|HIsW&to~L+J7~nXiIvV@`z3& zg5qQ1zJD2G?1k0{>TX?%`VjQrx1%+rIE%&7T1w?x6Q-99$c->bXZZdoJ@@35JAj4L zI`T+0KB(HIMh0K{_)ltzXk@9$Eu6@%Qnr7eqgXI-oh{VwK_P9E(2r*@y|j_muK#=( zo_N#hYku8(v%`Qt;#D2v_q4Gi?qL|+PGinYL#J08$v<_$t?JHwuuaq*#fhFwAhBn? zusO=qwjlKMa>%90-_d+Z9(0fl+PpjrFXw$_R8!^2ac0a)(q#DjL0i|f8CSqG6 zo4o8Gk)`QuMBop@R*wK7xk*cw$j~XQOy<&+c5tNFx@_*K>ZH&rC?Vh1y#h3$OBfQh zn-oMpqa#OIk98$To~I?{2@h4~om%aJN2sLa`)AC>Z~*IT=A3oI3g1@^np^6`^+VsU zyC#A{MPBXBU9o#io-eL(c$}JSV=->m(^~lhxLXamQNJgWiKk(rop)uMZN|0iT%}ih zRJ@-JV;>|tLb^dIQv8o{jNj<+KJ&n-{{Tb|_hHbs673*2kR%yXh9GQlbx@Yq%8P%t zXP1V;J(D#&6Tz~en^Ahs-|nE4Ymy^04(mYj9Z+^LP_iQ3PD(9zN>~xgHqH*11ZXZU9)*^HGYS@2|PQajn|Tv60AiSd8wVCU>7C zxt+(}t-j*$z5M3Hd4;L^s%H~-hWHM576O#raZ!6*x9A4LE17ijw$cFu^DnPNOp%EBq(iR_9;%41?07X0coi; z^66XFB^pl@g5eSmt<}_IVjU0%vF57@opF7yoqg1O>pOm_N2EXeez*id58>3Y*yFT0 zTHoHu#QkN{zj+vfxl?&LE(iu>3J2z4bsx*12IQz$&gMJLkV}q}MC-wl! zG>M5q)tn#S{aM)dtY_S_$mjU`Q9(W2ldg^MwzecK;X~*k)yiG~5vdcNse>YR0!d|rPF*-cnDvEQp;q4%Yk=%1KG@_}RF}8ZyXQBnz z@`+%gEwLzWvxMocYqQ_?))2ETZqxpOd?ddX;2&;qbJcN78!R?fPjtP7kFt?tPc(uY ze>jna%E)Hn{#Y-W;*`on{fxM-AHKQ~pk^ZIu2^rab2r+viri0KDxTeJjj9I3k2SvO z*+@R7#;*^nmH}AkySEFc6?RMCwPHy&oe9a1d4D%|qu&h?aCS;JvW4eurPdD<=VJao z5`B>rw?{k0H?k|Yn;apfZ^U$(;tXx5VA3kM7`O`T^WK;R06N45wQMjwQ21*)k~LY-8CCj~!`zRZXp_7KC1;Yf1$W}5AX zQ(vW`lTT@&C8A7kMtkjQTZ^PNyKCjVY1QS-Nc&yfc9Va-LpsG&@!&DQP`FyWa(Wd4 z-kez?nG^B3Sgf?x&>PF#w7c0kMA2Sq(4owC1&TM0@PUe)So4kfc5DLs-J%c=Dxg3V`oi z$Hw>#ww5v9;oo$&Tl-d6yc@|C3veMcDwnl4sMJ}*P8&G|0QY9ly{XzJvZPHCGCw~Z zrD@db`)BY4$rl-Ef|gQtZMs3PRUAy}3TV-pq;nFiEibZ1$JNX=Xia(kJql|OiXV3E zM=MRTtX;c$wSwzFe}Kvf48SGThL_VhWWUyGf?D$rxH(wh`UP1|e?%wMmyCSn;-uC%@t#}9*jNgQ#^SGjpHBu7RC!5$VipM_)iHZQ1ge8~a1Q_5O9uKmhAdRt z9nx#Jw(7b1nfn*JB#q+Y*Jy8BiExyLumJWC#tf6ie)3xfIvSR@S>75BIzc6_e!x5_BL^;(mt- zHs|?SV%b=wK%Rw&Dz$MPlZqw-P5LHYp^Ik~jk~M%cV0iPepc)|qf%!+sBa zXir3y`w@v&(z|c?(($8$O9g`;sQ#}zp0u0gquMbqVgR7i)8i#KYf(npdwUfs<XEqC*W?ev^n&Or*|f^HtsXsAvFZBzm5z-4=iE%g z2f-+>k?kpL?QW^Q3zCUJjjWU6_pg@RmAzdadZ&8$MQa;e$0+QvSJxS#jNGVdzn}Zy zi$A%1cG{~Nephbx0j$yahaJ7>xqb1`dK~vH8X<6nCmp9bm6B^o>xJ= zxy)5}LYRlTexotJ!^S3_@ZMbwyu1h=@Lp_`{2QHeYa z&y7Tc@TLg^LvdxNRG2lq?+_uWpTP9}C8@$YakG!u;B0GY;=zyUXVyi47f?$OP% zUCh*%DlUP>{A$Qnt`t`GG*(<3yVHsjI7R%Lf^^Nt1Un;q$60$u@5sL3=y?x%a8+2@PQI#oVc`jwYx(G%vEh*nA*6$u(w%^Mh zx7L3jR1MJ~f%zr@f*iYM&hOVqj*sfNgpc5Uos{+A#!QfZ+#TIF0KF+L!bi~c4#oNM ziVo!`3cFRSCB8vcO^XhpKa-E(zKY7Xzgv;H@mF%Ycfff5cKXP1fCAE+)V-v<6-Q=f zk!lFS-cz`Q!!Y{n=Idijt`s5H&8=2yudWJGKs@?dC5XvW>b_?|JfdedO^2$Ha^#Q4 zw%l286Fql)rg(SPB1%JrIKr@3O*1&I4R(KV?al5{l>LyWcn3oM=rP@-jK!u zdO~90UJ_y&$M~B4;UXU>$b!ssE_vQmv@Z65L(6CXaD$ajTH2|9g-*E>1;@Gb6dU~n zZ!M*Ow(@mpze!*65C7$lRj~yx_&m1nKH<$sFGK z-Ve>3^q$lJuoHIHcXm~Imp3wTSZx|{g3Z?B)9+c4fZD(Dg1XnPk@^Mo+6>NHY}ZMs zB0WJV7BsGLty)!Od84Y0Y@SvmP7@{7$dOuOo)bJ8it;sT_#U5@nVjOH+D%`)N;Ln; z3*?%YyxS^~>4?-wFgXx1^U8L9S1r@Qfcj6^5C8uZc0bY7{vXA_5Nl1Q2c$Ml>?i}^twq>sAv=`~j{n1~fHpL@q=X5h-r~g8EkNOdm{>fR^;u-Z1g# ziM&U6+R(f{l8PprIb1TOg}`H25@h53;FXzBa=@qOkMyLFZ6Uob0E-hV2LD~HDo~wz z`y}|XpqkadZ|4TGW0Ef)980|8rG!$i1ndU`d4E0MSS_!bUnx^#mxi4i4=Nh@F#cio z001%BWdmIjz(~0~hyrKNwC=|L^8$c61T_JdZtZI&vHs2u(z67RXZ5$HHensiD5ANs zWIaWIcbQ9F|CaKuZIXC?i9uLTZ5lNA_PEE9ALp5-4Cz70E%s`9-joyTV4^+rj~!Ekx8h1 zc3`unr3zLOlSpj{qA0W*dxm@_p!&Qz{p(MBpiPV&IlYH}Z#3OJy{1`+tr+umSZpE9BOnV!RTQ0ybB zg$0I~BYx3DezFZvHuz%)zcu2ns6Y(RnF;6zD*!0^ zS(Yb-b;OECB-sV9+>~B4;hzwxGsYEi$`TqTO7{}1-tvXo;o5NdoH*NJ>2 z@I#6R8|L)NqYGhzr7`(Mq=&UK7dZ6CjzMJJ=dyRvhRgx`r=n-6c+E-KgQX^!QnPyi z!%L>pB&vm5@o#<#(-hJaRbTyeEXSCaK-E|FajFOVJbF_O?%JB<5>ZCvDyoU!^#>Xy zE36)gTDet^PAgVFAthU>NQqO$k-Cy8f)NhABX_zU;&;p_bC{f_(@4IyWgh6UHo}7A ze>nMkd6(eB&!F2a`@w+g5OG8mxP&%j?_(KpH*OI(4>;)%8TDBT)$n9QtarTl?Dw8V zHM^H7a98^CO0UTQ$y&oatmdEPK1^bT2R~7%L5@RIT)+tv7BGr6IkOb=_CL5W`1*^w zQ_Tk`J?OscgPj?V)U=cTe90T!dO8(;^cCn-n!5Xje%GG4lda|L$T@?S@B~zpI^878 z+f7?@%)$cKRXZD=(xsL$8|VHBRVjBeioIhXt!MJ3>*GuROEI~>s|8I;{n?DxV|M5; zI0C7Zb+rSMV6_{KEZ!#-% zNu$}lrI_JX<|G0rGMGw`PuyMsAi~A%`xBzE8T}W8QArh>DtD)S)-GBI zBxurTD7zwF06LQLhd%Yw$nT~N0nCV^Cm%Xg7=3rQTf#-!{=EGxy5vGXq<05r#7~M> zij>R9dZ8e9gdi~?$hI!6LiH+pOI_|&MclV>LX(Z4PCj2At9Q6B|GmI7KYz>Q3$CyI zFt+ZGf?gqxwjb}?>px&Y~)qu#M_VEJ5Ub6=jVn@ zhzl+OXeayZJg&=T#*{M4_v~$*dmlz(!v_y*6{IXzDY#BRCSdaN*|iuUJoexhP~s0^ zDac67qzmly)#_(eUb_j5jEugJolr3aKCqV%cW1xb+pH!J2v%YvJ-7OUEL;{&?yGeV zp?iErny70AP=+-6Sx+sOT6HUgQh$n;?Ljk^#fw#r;QRtD3d_Jkr`^dMdVa7pr7%wdfh!8c6# zsU3ZP)dgAk%QbzzRSwF)Iei#572s#Q7R$J8X&tFF*7*RCOKKpCruV9!)((~7!~B|x z(t!yKGGyyp!UojSNnpB+Ub}eZSL-a7%ak6JxEM+=U5Pog;}bj7EK%}kayoLdgpMXd zvV8ooAA{zWRHT#NR4x_xK}z@;NiaQCTp->0UY8%;|BGCGm7pANF$*CoR+C=k#r}A~ zL~m$PM=NrvI3=3kuvXI`=a-ICpu0z`M3^r3%$wo?d-6<5%Gbbr>h8l>^tNPq579cn zm}NyYLq!_m43n_wzJ$?`_>m=O;u_L)57F@&(GPyq1;fxRTTC5wXkB=Jl?>m`s)?DF zQzi!dl%-auO*?wuupOM;BRjKZ{?C8@Bsnh4D*@25<0{_Nt>@LM9>z0R{@qrwjNVMG zSY&P$`<)N(GbvN&p4Mcpc>$L(C;c+4Wf;4uzzt0T&ve5{5u0bjS{Iq=60Ig3O{o0@ zl~{MtIpwC#dLVd0$hFQE)vVP0@Yz{#CpUw03q;^rrT(-+?Gs<_PFK*~K%$?zGb}`u ze)MuZurew?Zx+)wNY*m0=+=coaI+hJ&|*W>C|TROn;}3+tHWh`1RRa^Z(k~qUfJgaIuycRy8xT7KPW`4fnRP-+90_op!^{g?-|E_E zmac}o8D#}n5>E}4773T{Xn<`XOsVl?lr87cZFhG}VSiz6u$b7i2s5xgn=3o~+4WoN zWPiR*0lRee8!K3UxNdewU9o9);Xk3hd058MnuD?%LuVEH_r|iF_RW!>sucZ?q|oU0 z9BGxaXBE}m<=2*BQpui9H(k29pGH5a`1AaP!v27iLxC7gy*&AeY|4jN4W5cxT9-hb zQN}aTyx6jr)!l!qZCZ4i|7Q<{_+izTk_L|add&S84x4TFZCrd}?_}KZDNg*XoJJCJ%STU}90*VChsMe=&kuQyi zplr1XspPU}E?TmNG&1Q2E?VM;>-lWFq0NebK6Go``exlBUv_C8HPxn zZq&7AtX7%6j;ev%=ojVwuc})`Ut?v>z-zCm6-hXe$ws+N(7pemehjD_i}BWE;rCbFJMLU`d4ML$ueLKipF*S(0O72Hb~hC<8fhptN@^s zi67qQn*TQGKShzY?xDs!ze|^8u*G%CojRP=CO^u(Em>L|%l6b~imh&$Oox6U0ylsP z7?@2TPd&xM>)ZJuYmymJeV+3}%Eix^#s7RWa!WKdY~0pu1~yXNgdzpHsDTMDkzE=M z*%P*2UXE+vCbKph%F8Lc6IIi1mACnH3~rT~ouzJBOnt*)**?>qc2QgQ)*G*gkTF-< z#hbDSi*(+e9X3==ncIo19b^zM&Uwg}3mlnPwTn^d)ydVqmp=k~cAoh*v&-UqWy4W! zP-FLu!srzk$t$z(9VLatDG`wwmfQ|F9EEK*i#VnD+4$JSC=Dl&#I7pMG2eY}KPpK8 z&<9`ZD4LWHXz;N|>zs)@IkZSz^QUJTZZ@jaWvaU)hg31k*@hBBh)qTB=)F*L>Zcyu z5;o~E&C^AXJ!7j|k?_1)$>@75sv-gv`oH3z^VWK20e?0~J!dhuGKL zeYG9TgqDTCuf+iDie$ZNp6=KAn>=QqmcDdaV&vbLk~dhz9y3&!`p8w9cwso(xWpWO znp0|N{msUgy#8>nZ$Ywg;Dp(OTVUZe@c%>y(IrLP@o^T9;+u@DlG7OFhVK2&f<7zv*4zzSC^VHVkjp zEV5o?>O^&nlz9Dv$c&5cNJ7BU-h+=ZQweay`#Y%&@#cTPM6%@06JlFma*I$=mZe@^sDbtN4N@^2)g0$;orBZ1f4gu?Hr()&5c8 zom~xh>Q5j5)e-}4IQ}b6Ej1?*Kl>$T9(4EV?bjZ1Yk7*tdU0#(0iYvRx;9&J;{+|M zRp9^`JgqzV73(1tJJoR$a8TtND!(EN8g%k<+f=RlXSGL*g?a|mY5cks>~z29*iI#< z2Ko7ZUU9S5ZnO=E326Y-$Rbuug3s07q~;Gkn;O^rVJ&{>JxE`#cRp)MiC! z!q2qp)O$sM2{>39-zv(ScRp1JGlM#rIm;b*QG#FtKa38IAcA)qsEL9RrA#e%n!)H2 z-RYa|B}S3F2K-h{Yd-wgE?hQkuN5agFkcc*y;6G#?R-?fAaoLZ&5@Fkpi#h$D~kwC zSK}(X0}5|ilR+?GrpOBYhvK|B+UNB7`>`jljqE;q<`fc0VHDJGG< zapUmfZ|(q~c`Z-W`jo41)!npehi$&AhqqKzhdPbc#j#?ejTdA4%Wlvfr<^`_f=$T6 z*QnM+zL|KE+yY=O0))S9p^o}4oPOIS>F&Vc^ z1{$BtMn8C4+Q+LvhA9j>j2{tvo3=S7mTI`i{)Ff-kh{JQujH`TI`(L2`6Oi5*j)k3=QZ9?VS0Z^)uK)I)iGf~n@MMJhkX&WE6PNe?z2B-dtBVH6XR0`XXimW5~=&3Nu6&yrfY_U^!O zCpq6`N(-e@iw{>es<(`}nNWsUj%yj2p!d5pWK%@Y##z+DZO@e^5g)6*6uaSbTwfo} z6?;2HMy*ltIN#=<3!}>H2U0O(@fB;OS0XJwsh-&2`h8AOvV&4 z28)ffkq>I1634m*mO3@Kq|yxYUuR;%En=!$@zIUcJmoS9$s`{}oR^frm#bUJ%TFe@ zz`y%9uFSy0)n!4)#Ak!uu!KpfIl%cX!`&4}PBc&G+Aof%#et+Cmk2 zaB4~*+>kKpwR|mQ)bEpTj2stsK4emhrUaN4ZUV#awe#waYTc&A( zJcLwKG_NYN^nDRy@|*k<_8AipW}tWaU3T|5P(5kjFT>!$qP19w=I~C><7~tMxDOdU zfuzxRpEUJS!~)2l&{AJ>M~?tpd9!GAp6Zs?H>kW=MxT+NSObS25BwFlG02wU{dx_T zDcV0#?-O!&cBJ`scgFeo-^$Ftt7RhriRLY*xw&yq^Twr57Ta;IGWHgO;CpzvErjD! z)A;iOaQ}iii-}SUB$ElgrVXxJosVwkZ?;m-TYs~x2c~ZtW0~8Jk7?Y$!G!&VR>kqE z71^kg-t84P{i-jop1`{ilq<(zulY6D@6pV>hU}xbrV?D+b}JMqS7grh^oA*sQj~Ib zW#EMc^V7M+-ce1~(Sa?=`(88#!B<`29Kgd`=J|ox+C4fuW;(CDL{~nM=~lDuPZ%!L zu@nK4*7Kj9DOIsh#(uWUworDn%sI!8iJTO#Zd{pPu}lI&thT(Rm4KKOS5f1D?VT_! zq1lo*+GW{N0d8XB69rryo6jkDHC*2x$>7hec0PENUCOl5Pr(Py){0_Hr?t@pIsw=z z1z;gwN9oapJg3sFtd#e=ja5~x+Q*fKY5@*iHxO=NxkRTDaXyQPl6)~;c+*tZeF4+? z$ORK#g8xfBbu*6Rb(wHg8vj?fPWJTc{eA%%7*3VE{9ouG2M zrM`oit1`00@-pOX$M^(D>EnMtBkEDcz9m0ThVTSBbpbnctRfA4>j!TcO_F|v#)XkO zQ1X6C2D~k>EM2IY#sP_ibr8?HDwp1glFx74-tr~)j_rIB*ZE&ksjEzF(++5?K5T;} zvt%RHG%8FFYwFVUUo{C-fXEB9!-TU&UUu5VY6%TR7~l9UV4Il&EjrmK;;7JTpgvvu zBW$2!?`dM|%Tq^ADSkLHKLs1G4%!%Dms6*Ewlq2$7GwJ@!)3FiK?mGQ5l60MP&wy` z_ZBfGAed+O)1fm^d9BTrh@+3`PNYcxxL|IZfAn9@GEcb>f;)Cjn~+kv!LQ$n3z%|D zb+X3MSSTy74%EXO$ziP0xJZpEFl7>kcz>NZJse7O=;%LUSENstn`bd zU!fU)^x++KCO$Kah3atOy!<5ebF0$||L|IjBi|jbs(?m?@zW0XL7#AnxAsnjvxIaS zve(W~bY^m+-t;eJNY}*qAX)L{01b4=HEmEkKqW=DgMGm z?R>N&nrh3RPjVftr*QE~wE9XH<^Lh3@DuxW=7Rnat?T7;{|IN6rMbaVpx(Mt(v$mO z>adq|-Y;q%{R;fulbGxrCtqy)!qVjc0S>k+^(a!OB>Y_hSNGFm2JWlfZVBv+gQm9^ z;x@Rq=*nnmoy1>UVty;wKyC>hP3R=q-d7Q)xK+n|y_%RcYPj|H>P-t0>vgt{{r&@t z>a8x{$%#l%7*=inG#;Mg`HY7c&-W=&Q<~*^;&oa-OOsoJNJ;ThMftgcT4C+k;qL{6+1?Kl30FkZh}~T zkfS!_ED}33Kq1jk9fpOQzi*>5%XeP8ZgETJNov|C7(Ed@xLIrJt7u7Peewk){JyZ` zsc!Gtr(aHLCRCE@MZ5!ywz?v>O1SLZ+d#^tldSLb=;E0QMj==4nKwnjyh*U1-^}%LF z6~4+oE4Oxcs~uYCZKwm>*VDA|et2^WF3O9gx1+U)ir@>x+Ha8c%FFGABHO-Vc2P#aHSw7hIF6gADnCQj@ecH(^YN8q)}8vp6qsjiYVqsq`sE6GFQt;ho#v@X|I3p6Szm|LY=gmtKb-I#L$EDVr+~ku#6r&eItvoTAj%g19c3dBlXfElI}gu) zOjUZ~FqF*Z8<|;|pMr-SzF$jKoB?jnqmAz<@FqQloVmG8(1~i35orFayesq^Ryjph z92O%9q+r+MssN~SudP_F`dbQ>2~zprNd^;`PBHym+7D-^!>YlqT4Er0jQ^SK{@>1a z|08MmkEG%Mg`@%g4H&rnU*tGt|BKt&FXsQRoZ5o$i2nwL^SVzCx=#B31OBKfX+NrX IU=jR30HB>IqyPW_ literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/nanopipe_v2.png b/model/train/yoco_moe/sources/images/nanopipe_v2.png new file mode 100644 index 0000000000000000000000000000000000000000..b60515ece5d14da9c2381df25cc26e038f3f59fc GIT binary patch literal 100512 zcmeEubx>SQ^yT1g2?PilA$YLh9xMq2clRN<`=CLCLk0;>aCf)h%wWNFaCc|09lqbz z)_%KH``2!*)V?Z;8hQQt_1x}v?m72#n4-K49`;LY004mZ`IDqF0Dy@F03gd_q9fjc z#Xt)X|B#%NWj+GRhADRtFHnKvU&R4{%4nQBBQ(ToEc;JdP5=OY_tPKJPiA}y0N^qA zv!uAHyZ-(nMiQ~qlIZcOYD_9A_-88GTTILV%v7X{ZV9P7FEV_nKPjJrlEQEo7d1vI zQbej}OPjF}hu;c-2o(t{&Pf-X%H{d6HLp^uwSyk0(X|5q(4##GxB0}5HcTFargm+96VZg$RSJq`#|cXMSA_O zBiqj~20-+$LrIMNli$B@n}L?>t=PYtA|}LrM*FX(USngTV-fzVDIzq~K-7OX6^a}n z{=dIll3}BJq{Q!E3OpB8&X(R#UQHLli#YWfKkJFRz=(9rZlj)@yw*wu?Jeb{SZK8e z^kW^wSJ}=JactaD8TYk4KTYe-HHN`;W0>c7%HGzuR2#%@yI^p9!$3^6=bShn}WKno1?mi;b~5g9$R_3vsSVJ()x@Y5`ivg?5;BiM$I0iXK* zb6b3a>b}meUJ|La)W41p!CR6r{e1RsQ?6Q$@&`KETjIs&B$BX04@XF&WSLW;fQ*C9 zp0g{7(k65~;vg++ulKKFn!G{H{EU3?T)lx0hb>OJQZ}ls+Hmx2DAJG_qX%C)n|F=A ztyF&0>@vZ7M9_|z0by?~)vReRsTLVUbcNL0_R2V>y^ZczhCK{LF`}p+wV+yUymVWB ziM!PYTBPA{HJ#8wGsktokJf?}`p{Xvo&ps%#f#+ad%zWRSNj%PC}oA;uk5|+vz#kn zqga~DS8Obi`G-GicVg&PaBi;DQ4qqq1tx~N=6)?ZJkqy*zH>lZAEofYma5*g-N03c zpyc_U8{1Evb?exXWE~ntEquFCrj8fVFLTbh9ax71;_WX zd%6xIO1h%j6UBrVbKv&dq-%F`aph^fT00+tXV<$&dZvg#i>eo@6?+P_WDSfGb}|o{ zM0&}A@h@R|A!j}D2KWgX4YI;X@0_S{o3p~UM@t+hoR*hkGrDVnnkmsdW;_Z-%56dQ z78=dX3_4W--uVSK7(Ain8m((^+UlYa$s>%*y;ha51=5N8jspRyKHA+HpULVU6FUPL z3ZVgtdh*)v6ncS}xKaf)f21^RRia7epAj=WU!>-{il=q2^=>gFZv})m)GabZKM2|^ z#8`mv4=L~s8Kdq)6vBAl*@1P&3gPFwhA#yc(;E+%mD-%mlq%d>$<9}APM^CK;>Vc| zE1{y-*ywUN`+%8Fy^qI7+C%kj6^L)|&Od^(O)4Ixx}+9UtECFL!k2)Tlq`f0o}p() za$a*>fjRnowkTf0i~=88GaWsbY`PqIN+Up?IP zWayS_sO8%(!H6_LpQ|a>!A>-+)LUP~GMn1Ha=(6ewe7Mjl`IXv*bvNnLr)NnrjzIO z+HkRSEm(X_r1U6GXFx}rU8Y6kmZR|}`EC-&Ct4m|3GGeSC{KLrnd${Tj0OA@rHh?Q zkfrc_`9#@}Lw)$zIJAefyn0cHx&_ z!+rJab0XI_9V94D!q04@uW?;gCpaptR>=7CFY#wbiC$DW>`tp3baYW(0Dl#FL z8~y-w_w{neFv(<7_XB+BXJw{ttSsbUPr|Z<3Dg%n;}!#?OtZ2{FOD;r+>%7kyS&7U zGT{uwDGVS#s(;8Uvsf%Jh60cZ)GL2Cw5Z^{$)tTg(Z*e%#$)LRSXUQTIX{R^th^8J zI}j*U>1y>Aq`rp}?ahHTN^2T!>+yt~?rGf?XN;1FXk+l~@msuoDNgpBOfUeeTs27P z6wXoJeFp4FcUzEmp*SxuqsVCi+kOp&YZB7=sWdyuZdK$};&)kFIN4^l`Jpc1PBpea z%~#uO-}LC|Wq^<1&*z?lnt}pFvuh(;rY=9xOS#zb6wbF!`0$l<4X+tB6XKCA$Mq=H=<^?)y z)+e{JkCu1TJ{fmdE^gye$yA2tIHvkJ-7B*jE3*2$>3Xm6qLAm>uPI0Pu0h$>0Y~S{ zhb9rUIhtF5bhHq?yP}g8v2A(8e)-mKq8-HMeDzus-sbd?K!k2X-CcTR-^?pAxy{`unALoD zk3A_S45=1)Xl*ph%e|&?F#m`9wkIt9@n=Tx6%NX|E$83zX5IW)!xzxTuZwuP4Q0S| z28}t&Wj>!%V5QGI(VHA3{U6iR?BTbSrUiNr%oTE zicYQgHZRK=ye4@`m z&<*nyB+hq5yjFQkg=C{|pqlU%}b(Wt(+>MVk}{k3`=3dkT@9%uq0qgZjQgtVh9 z&IS`ZTl9kh?dciYxV0l+x~z+Cird-si_d%sL!u7iGcey^t{*b@p^7LjpXi8i2qchD z=Fm~JaYvE+?DpZdhrocDexk3g8Rs%Tj!rTd=z#kfYe2{Q%B`uhz))b~q#&u3_0}pm zM*E1E|GLpE|0!5U@UaK;TKzb!&&@#iXk}KR(+N2pkDP(fRzcBtR9~uDjlfvim1S~R zYKopJ-HePOMhKb;s{n;94BBE6iYZ2^p-99@c?nX{Q&J_$PNm=+D~25W!rdjMk{K+;gETPV6UOW5Hr;Yg1fP*>)i zN&N@5RJt&vXscjBg_YQCpPsW`iGzsb0aw!Ih6Mn6HAj+l22%XM2Kqjg4JLXLAGd+6 z)~n2wIDGz~+aH}b^?BAWFsNKJ-!<74#p{RF*^wo*SIG|jN%p~Xbl+Kvl%5Tfykbt* zd0+h8p@ShFg17x`m*e=sydYhfHQ8yRZ2=eSk#@SjEz_{# zW~W{QQaUriai+X$<=nI^K1tnkxP|D5%lD2b3ix6O!Ms@vXpGFHCNEwctVbo z5%j#%5rR#Hvu&t|61N@U4lkb9)qBnzdy8}!nKV|^!pkbES{{~PA0@L$uu!A|E%_@b zPqc2NQ2TNx)rHxG^>?-i5h1(BaMWv;@QcIdx+3j;p4h~)M(ipv+WxL#I|$xe?1Rsy z(TXlFi@#bUppMnI#y2i;*B*^X6GT-g3>Ie!dKJz6oEC+i<_^UoY)9_J99Cb%^LIo= znz)Pwu)R4{lN=iTvk42L{Xl4-_iN@Ns_V%@e~%g`d2@Ehhi-|{ zn=j1lg4;cQGVeg1uef^QbtYSqf9L>Q&Z}fFI6n5q={$~a(aDNs>Rfr$M{8xD)3Muo z_xW(n{B*hI0xcY;iMs;OMx@HWQmd~&>Y)`Du%PfsOZ4i+ROZgNEIf>$GY-7<zT&a~alxqZPgGvplAbn~ z8Ij2vgB}+PRGRV$OJ(AI9nD#FGQS0+*n#inC6fc>7ZA5Qix1J!n<|txYSu*yxh4^S zvCsEX(=NNESs&aYKK8q6O9i@G-X7fTFWyjVVY}H1!+U=!38rQ|UbPzc#$9cw^W}&U z0=t$4VMRW?jU?$D#E2UpB+MLn{r54dg@CF07*=cbmW73ol?qr%}>M!BNvMl25;*Bbgt3Wg&MJiSVS|J0m{@gB%MqD-7TOnJ#T}&kZ^8#Uf9sG-F0oUZc%qZ4-k|7BXH# zaAe!?VbPG3;Ozb=V_E)esn#SV&Q*z)B?m%_tnc`V)8pW|r_GPCwh+(`d48m_5) zjlx}yYF&k+Z6I8j*q^ep>S4M1PI%Kr(m=W!GWt0)|F;aGm{8KUvV67@o!mke4}+iB zoA>BQHnRnK2h>%>habWx9EvizzI9PM9rq~si!?;<`TKCaI11xG<5RK8mh0>Y*zNA& zcANLdljg^Ke;A|E&_Xg7f3mv-VaK7WcB=XohLr0~enuT`hV~i_li^yf{oU=xT{oaW z#KdyyZ}jT0rFf=ax^%lontT;8Wvd{}M$75^f{|cA*;IwJl~r~nN<5W!oghiAVE5(h z>;9r|@`75}Fi2WWS72E&5A#c#40n_nIER!NdH^ni00@g&=%|%H@mpH!CxdtPJ(o8` zD5%7@)tL_^WFI^xUqa#t=daMWW*N0EGl!2fd|n_%(Pnv4XA6O?cc=JtzzolXWX>(l z9@o_y($3)tAsgzK#p+|JGXZ|fnlM)HU|g>+YeLock`G|#7=U^5n~U=FzoaMwaF%Pi zzZlmY-%^fC1)~R*;pf)CG^8EVo1j4Lt#5g+^r}UwU$}VcQaD3DKsjYHgW7sU(Bm$& zj+1|n_Siy+if%MfVg{S;-t;y`TrOOW!mei{se(ydS09o@W%q@5VaexVV)ma7D9@;y zT=$G*T#mT6ohdwSRo{GVZEM>~@K|G|qMiLSaN&9+&K0`EHvG*nEt2;%R&<6Fnlg-a9c zO8PM)>#f~*uAt`Fgdo|m5Cs9L!}rUWWptvBk_Zmj2gTRdy`ivHX)wdDw^VNx^Mhv@ zzt1L;MR+dLT`4<)s@>}=2{dIEb~0JFtINjCuCGb59}TuBZu9ezqtOG;J&$|F){?2Z zsW>V;R-oFHvh?D3JHJPGy8U>Ri+qoVcy8o3vrvd{kMB=G#`kq^P8Y8^D<%K@)|&6R z9^oOst+$rAOFkz1Jsrp7Np&Xa}HJ`LWZ_l^m=T%tC zO@U6NFf=A?2<|o#Q&QUDW$k+{pqjNsvVWWB*6Wk2!YlIw+@zOkt3dS85n#J!f@qg! z$OGi39R(Fo{-?lVM&L_$E3jIY-1ILVM4Yzt?cc!nGn`-BDD+G+^7Kn}m!PjJJVL6Q zBuD2d;&~|!8X2AL=XVO-RAe_zUdo$4w$5AC| z!pcpmcR(1n*`gjY3wcP?A-8~7qV@IEHla%s&t_QsP6qq38O<;}gorS3t^;T@6})fymA>-kX>N8_t=B-#9-s0k z{_GUo;7;p5PiLzZ8{!KUZE&-H^0nLcmfR$6vG-YSmD_QyYfQiymZ1s=vprPHw}(|^ zkX^q6Rzw71cZdZil8T%^T8raKiioZu_`c1Gl3(8{9OL-~Nv;Ysw_b^iA05D9p5y}4 z72W{z*~~R)q2-31fg6aAg%GT7@FC;fj9t^I8lPcih3B?3GdSoSjar|&A zTy4nzOZ)E<8LAzm`dDOQHBh!~HP+wtB6(d?JC7$6;YgTyUod@pr}mJ67uv~%3tqa3 zuH#T)fsehi!3y2|`Gs7ZdTf~sbVGP=*NV%vFoq;x{LvssVxmx6jbcEVeWf|dkBpL+ z-k(EH`!Y87*s}r3vTT#Y?JhAQv##zQsfGVcQoXWxoX)xw{IBnjWw(A6_EzAa2s*s>-KUs2J%e3~ zr6R_vx7|0aWt;)?$A6BLl0-sVE%$`9ICuIYQzZm2OYPN5!^Q)N&dcXP9Z-8^ z-mU#4E&5}vAtv))xqG_-SRDQ{#ZDx7^ww7Fy~DjdN;a*n!omGF<7YAwbZeX~P>pskH;I_E%gpbtj^_*fkg>I9D%(x5N{x44x$E$v ziQpR{OHioi1g5~qQO%GpFB-+SLa}@;+297c%}?8LNGZk?c_$KCsI$P2F1rVQk9Y?IW!ob?(bX%;qZKYR-@x`E^=*sM>Q5!}t&-rD zQu#>EDq?(Op>{J82Z)(V_i`Iv&qkUh*+r-ZjM`&$xOkTwg5UFU;#AN%{o!PKjm72W zUb?!v8&YH?d$H_OHkBjOoc~By8!4c65!TdtwTqr@Fd_uQ@XH)7v)~uA#X8vtf5ek} zsX3A6gKrXsg_;kae0b9wvFwNR<`>ZB(vM#oER1ULDqiEa71PpqwYUF6{VaOg2osuM z$$$Z^Eb&0#{SxulQz%jl9ff)SGmpufdl)NasCZtaHICmGl7;aL9?hSe+>x%cTpA#j z!#y75^KYQUlO_bW)9V%39L<5=8CW9s`AiGGd~ysz?js@xKfr&vvG&=xm)Cb^u>GPf zLlBZDUc^+$sOM&sZLn*!MDpVXy7{*F+)*c`fc#iLy7yI;sAT$YY#%fu+I@q@9xy$Nyg{F9SeySqv@@x05ll?? zcOwdxKD<66*+LFOS`kn(64})Np%ymy$~+>0&WJJ^k{amh-vZP}xTyqd>|Nsa2phW*zuM*Te>O5LjAX ziTzIP6Q&jiU}7uI1~`*>LrXpLw#Q4s+eQ%cb^3bO2iX-lS{8!UMEIAWvMtqG+}9&_ z4$&+(@hP9vhDL{_rFqb{Ht|R^%uFwolSLGJz2F`{+8vrB)8)6xCP1yr233;VsFGWb z?S}+pd$T$5ZJ>DBjr>2FuouY`{mtj+jY;Hv@bEG~*!ywd9g}-Gf+*~z zb&tVly-3*6452#Et^vjxeI{s7toSgFhUfJogw12oJvghs>BDs-OAtn^C_;;|jn&AC ziWyVtt|4X3d-YTt`VFGX!f1hUNnuf)EbV(^*V-oLBm%Ny zV1Wkz)FK`NA*3^>2@A~^MZ&InGM;QwMtUq^9%r062V4e5lBU%fqmF1%07xuFQ zX1k*`iDQl42WS$J_^RB2Us)mk7d8XU?3sSzq#ad+;%Vets%xorRi;6%&8(S?C(a))Q+-jen(NDD{#;j8RlJN;>d6)(jwzB?TxtN)X62%oeG#b z-#Ko~2Tfyok2ru7XFnivX+|MYd-)cjJt7&|F|*Jjg{)tt4}~$0eq`1t zVKHxHMkWRytkP)IEyUq{hyBJ1TOTu3e0g}3G?5Gm6&w9s$f9A{K;nB#0P!Qwu+l!x zuz_}hUN6EX~kanFYE~?^NdP)5OFZv`JVNbtS_bo zON-XF723iJ;sjEn7_L^-L)b%kjR(K34zOXo^!Ia*{KYSKw==#MVXXkqlF z%y7@MZHj5<0h|)CSX!v(bTt8~Qeqm-o+=Kfs&`lbd7qFwdV6tX1jB>dfaePeS{N*; zy^)ECK(fr^9K2~B`os@)ZADPV1)ZpceiuWCC=q&G5g0D|O627SFdzMzvNOy{)%?*V zT$~Jw{RhX4J05ei(D!qQ{ot5$Cv8XL*P+hVLLArTIHgx{2;`;Xj^FlhBYpBQ!rz3Y zi(~zUhBfentY#4P?w7(>O%fN^56I{hs-XdXIWPlt4O^lzH@LMEVjKuip^;R7do|-^ z%*H$JIp>6l+XFLy*N3?EH;Iuy^<=Mw=XS&qLKCBe5%_3XXIZn%zwm~=A;JqBu*_rn z`=q*y9-oZs?*#P2L{@+p5|ppg+NJ&DqA^y^rz>~U8?npxXcymT+%K7+o_mYCJdGs! z^gQ#4o@1~0@;&L!JO^2jAN-s4d_UVbEY1GJNqjCw>zCe&W=2Bk2kQh~A%tG?`B~J! z`Bnkk7@d`5D?8AA(u|8?v3JPw_DP_0NMTBHo;e8;GMYVZ3UI_BGx5Kq&Uze-_v=yS{$Sh~@z+Aw7eS^p-rFDI<`cCa08zKGB zOha>SFf5xLHHG~LLUi!E9!dPl9uP_dK*2l}tB4&e$ZET_Y<$KdOt`-BrOn_*%V-|C zI`)YSJblhwGsJ!zNsEj|j%J^MBS6`VNXuv(3@IQ)LI}o(^ef{%rdfzBa^-D@a%aCoXbY;Pv1MGP1zN>ek!$X6r%OA zn4_Ukp|F)F)`>@jzK@g zc>iNzqe`xw8y77$0%tXv@f5ftkTa}GlJGFT!JxM}#F#t{c=%zu<%|ng@d*ouqhbnO z4JpD{xH6ZizjyjBgxD*DW3-p&7foX@-D6gd9V>H-Ve-reuh;Vb^QH>>)=8ecnV7wB+)q#gf##WT@d2pGuj506 zWzm+pfY{*bUk4L3>5^LG4^^`;YtNuM!cyP>tgSd2DD>U7x#h@2123foAC*ohv>V zktCP}Bmd1pN2m{VUJXNFrljONH(a=b`J6dx(i^ZD#hnwLw7|mIZqT&Pcar(7AL@;1 zkrNfM`Wv#z(=4N+XI7SaEabWMb{Cmcr*+_qp<5bG5G^Kc`hb(GW;$L<@Z70S(0THc zTyM5GCee4vAx8VT7h2P^3H{Jeuozh-Y1_1u-525iTx`oGw`wv{e*RN=aP1sfd%mF# zs5d!yICDta2+oOovMe*WmSgVG>1Cg8p8ZpP$T z0A2u~Q?M|Tt@@cNpQwwjWrt{y8hfDbIScto$-Mc4t-hMCz975cN?sHJITa?h0BsCD zH$qSnuPQODV+oopiGj;07k=+(H=UW6)^BUiP^-+^pzKyA1Zl%aE^-%&pzDhaGZj^^y%%aGs5n5&26`Vr+V4jF=~wD^uANzUrazfZ1#ee z>Q;2JD{F%QVqdZ4@(1q<%PnNBaJ<>I^Rs;UjdxX&#IwN%vZjm<^uC-Jj%P*Oh>0_v zSUgkX)X0LSK~j3(;Bc#qI01rU<=P8b zB09`##ZVYfP8tUlD^ATN?|ncK=MiMik6;O^9=Lv`81d&^tX-0Fy6WWZEzJHoGHQi- z7Cs|xWm{Ipt7cqe+*qnSjXuNQ8r`_3rjte62N-3bkkYHdzn|+~rJB>yx@~La5o)F! z*=u1rW-OJ7Uc`GCL~PB3Wta_QjG$u@Hrx(!fSt3G^dg;aV*dissiH`$-oMS!mcl=e z^)<>QIH-A2ru60R51?c5n{hvZ7AU!4pWY(+IGz{}fzgPXP;e$jN5nOAL&b6!ls`f5t3DTRHljEh9?b6V#L(i3e@G zIHv>hSUW8pzq;Cit(bD9T^|TlU|<@i!Rt3gHY{2d$|WksJ%AcKRb3xsX(6A#&Q~!u z)Vq@lq{<$O6Qs@9IhzWCW`WZ?88_T0 zq3IipYFbgDf2Ghcx6}N%z%4W`prOoBza<_J!O zbme%W@I8CoBv8C~B#-|f!c`lxAm57v!QwYEz-O``Z*!%bdo7#A^Kr*ADv;SSp`29~ z^-|2(OPV9w+DU2tVRa5C7*PxMo;7ar-HTgJ{+?vLBbf$K=Gr2jxZG>Lu6gXet-XM@ z2|(=oy#UmGBxKEY_w{b>vf#d!!T1744J@L^LTR` zx4Sv7(G+a8N2mx{EH;d_7QZ_K@z`hSnL-p-B6P6Rdwi`gRJ!%&E|V9vvnJn^MsNu` zOKvi&F@UpyZ2j}6S4meH!L_O7KNhxG&fG3Gyjq5-X)7LQY)=r= zx36oBqPcCd9+GKLK>J8kQ1~*N=dn#xAW%_iz%8Fi+vd^*r(JEB{<=kl;%aY;!RiiUrze?liOm{sYa7s~?xL@5rl& zaSd;p} zsFIOqa|6oT{I@f0udDZ}7gM~?^n*5nQl|M8Y^8v9<)G$s2}B#90W{&`*Zt>fitU^H zRm27t;pU?>(PpmwMYKrJ%sLEAT#pBtX%!m{SU$J=ahb~sy03~f z2hrMZZ|b|OtDAP&S{Q4kcl+F9z4kIkd*8myCOeh91!$YUj_M0MlN_Pu@XZzFTryNg zV3y@ZyI}LlxdhRYswt_(l#}rdqZnlZ^ZCs{)vW?9khI5H%E8)j)t{}eHNzh#9^s3_ zQ$bED5AU5u$-WJ&vA*#K^a^l8J*THd=#R!w1Ueu{RH_q~!kZUWx;x`kW+xp^_yLh5 zUiOI^M&>QTVut(lfjxUF)I4>C3?SjLChlRL0G8FANh3d&psy(hk1R68x)#8!*4%aV zk_>w)cBI7bi0B4VheBccIn-0_SD!G( z6YErwiecsYJ|WkzqMlrvcmKWYm{EmJ22r~uL{|$ts+{x=G3Mx(o?pP(-`cK(75R^j>N!2Tp8UXNjX2lgJW<9Pol9Z;RZLF!));J3@YWZ({J zx3yS33e|$)2(rCzd#@4-S_Kp!qG4a!>G*EvbX8`Lbruz_pG3;72snC#>vCK)FwBn?_Q$SAwpBAW#Kr!oS>h1^|J4KxlF(E%vVxjb5tG4GX!(bLRBKm;|-ys z?7sqA-^zo(kKs#rvCJ77{Jxx9->@4{u7GMk7P2HCELij%R8`w?f}Iq34)n+yI>J8% z2;~S-c(*$s7OHkv063A493|KD|Owr1HY9IX~Sb#&=Q3S)kV|AFNbULBRee7wscA|jZ7;$ z&H@73@mBehBdgwh0-&J7_q94A?s=Gxf#9z+~U&AuXj`;PXI``*kq`ft!bkUr^5 zoD$@-gu(4?A8lhv4M(ZCHk^|xeGy8~GtXRz#+j?O07~o$;F0>0BJI-s;!s&-!9z_r zwm8>L8sqVIawnx?uNZva%p4o8Oy^7YRPakw5r`y{n!%@03A(bQAj@Dr0>x)c8NR)% z&v>-gTe(1abbb6`rBg;TBePGjq;Y)Pa2P|lA?w6y);>Oh)FFI+utF74g1k!|&N+;e zim^-C16#Jc2UJ^)cIGv=EfSs_)0NYopQ4~n8C4i`wqjlt4zofx^w~p$ye1LCl_q+^ z77J`EEK`U+Mq59FM7914S5DNZQqhNkslNrch=Phv&2csHt-=g}G$6~8%FLx0=+!8G zCk@>2?e106sF$aD=*4^#W;q)bku%WJCf$S{dIr8jnueHBKxfnWMA@)}>va**9^|I5 ztP#{S$;pY&aIGV%>vT{J9Sw_ckwKsv^N;13;`wKHlLq~z$GF)*+uOlUkHrCK$JI|| zcTQK{^oYV>s}>B+KA#18iS-w;zFDn#Bbd@&xCfjyBNKiZmgRWO*J`p}Hs3l^O9roh zV%DgP>8p(nM>;Q{yZXrK{@q76p&-P(-_!*%_EwWe#Zp_x+4!%B`yu(oJ?0=?EkmLQv%Hc4;*-qv`H)FXZUcZarDTolM( z&kd1W*h&D16I~Oj5 z?%Hgbjw(3X;MI$=%DhhL4t#V<2cK2lNfdYXYrd6Y*3}`#UmJoufpV`Ke6|CB!!J;7 zEnvKNKBM5MQ0ewE>glLgH!{DS01zzNV!x)4>=Ibtq{niioKUG{8k?-qwpfAwd$yHC znq_@1mrh$*F^WSKL}j2B?&18X`zt=c6%$%}v-<%OELVVr_w1GIOPvWKfiM4nxy5Ol z-<93psc8d-^Ek#aAgsP(FJ9LOp9@GGV<5<4#$62>Z%OH}&((Dh)>Ed|VY0W&3jLS< zd-(xCQd5fY5e1i_$-t^(UPLTy&-POK1)SzG|3ILxuh%4!h&qZ{&SHW-d`8PY{1mn& zFX!fyGXy%*4qzYse7{YhQKb@7)oWG#X#K4`OE36x_E_IeYt9OudAOVQqkFG~I9);iIMgJ6IQdZ~LBiSvGdq|I)?tB@A@Y60!_H9b?!P!nE`w zn!`ubU79DiMm)ufh1+t_GN4s27?#Fq*YXeVleXOHATcjXKpg74aQ@-=da;jQm%cur zR6p_sB7qds!j>S!8fHWDN5=fnc{Si8zbh_yO@W?i8eM|*Y;>CXk_lIJLD(~v!$gZ_ zyF!AtvyTg>n^l3;(9aKJCoNY$Xc%7KK4%FX9T(g`ApGQq-+{1j;5+;Iop#7Vx@UaL z>sbh%4Ls$%66Nn9pZxp+_WGVwqDR+~Jq(e)qZ>2B593oo!&s;EF8JNeA*)=X=@{^m z-?SZ|-O^ERRRd4jm#9@nfH}56H38=U1cmNvl1DD^-MDkE%%IGB-VMQE?4IY4&#&hF zmT5LM+onw8`>Ei9@Ey8i<; z?Yv*eeoJ(YQ_QdG3+F+t16Rm^TL9j{)XlH_USZq*vWwFa8Dv5Bmm^as5B)weQY)0# zu8VKi<_HN!0@+zGxDjMLn6}4){kabUd+?KyXCtGf{vPZ*V$bBNaz9w4{d03M!uDXA zYIm}Hz2fE}Z%!&l!Y)n7_GEwyFI5Y~&bD)NdN+?Sz{RHz!7mERdKKtzgH`Vf{bsx6 z-&Wa;(F!}x_x@a0=fk`Fi-+-pyC63FKD@iFTs}Hp{$PIFO(h+!Xyt&UMy!b|-Jw|>Ur&ss6yp7I+m&N$phEy_FmzL6#m91pXv0qc< zt(NT+{Zq#%xKdO>@4Q4i?kTq{Yro`T11pVbZ1wGKdbK~zX)rcc=wT_K>9`?%S2PJ z?YW38B0)HVW)^5fQGjWzyrzO2A@`TYEdRJl_kNxMF0_k{14lg!-=le+ufM9?JLci= zTH7+dKg>PVyL##k2-_;vVB%0(iFeCd=``Xvu7O^GilCOaF4Qj3XB&S1`9#|bQER4$ zD5x#a%5959!?s`GInaIIrhm+)08P01%OO#8hfp9Rs}j@qUOU`2WTm-_Ly9F}m2>F= z^sP@-+YP_+K76A6smzfCey_jCi!j@&5-{Zl>`tt<@o$Bg^jgUnPqdk-#E3N&nrv<$ zF|#QX!*8M`k$%a0-tM6C)eQdr7=)^(}9>JE+TT6>Y4IR7|HYXzqGZ)~sG zGyjj7WB+G*b`vmLeXvNw{_O#7$ugYh(PAnxw=H8)%1-+H-?@#y5L?gpBHYUfKU>Je z>sm8Au6H&-Io&7CU^1WSxV|H8<7+)x>UOKJ6*pFoyE3}o;@e&mFy*1nKR*>nmNHt) zvB55EjYAgy>OU;eZFIu~eNf1?p3^V{#@)_<`)rw%jYEj>SszFiAJNcS8(j#jw%Y*~ z*L9@5+F&<;XxU+*e*L%^%y+2ch`1c(6)QjDQc<^HN}uWf(P>@|jsM?djj)0LpX&Yo zpBdi&CphI9lqziI*%&6EL)=TpaR+E1SaS9{>+KmO1n zu??20$%6l@c^s!vVe^Z{1LL*>F}qiPxDo(k#7_UU5^6>NSk;N3!WM!LOu>UxJxqRh(lZvCI7eX~cloTjWXP_$v< zdWXcM?B>t)fC-q4x_!2z-6 zXYa-zM0FR`VMPBk97=sdkb902l`U1!smM<F4|vJF$|+fSUg zymX5g9rXkztq<}h)Bf?ReYF_fqV#sp1TWKKjs!Fic6+fe;x8@=DaN7aulRq+h z8-ihs!#+LP(~P=zAzD@IsdM%J?D~=S@u`SgYw=Xt1D?HTJeGojaP$8Y#d78_LbB0| z)S_(9T;&9NrCuJKs&b2%5O!753&11mSO07~kt(~ctG2cM&pr+>NYUFKN6*ld z>P*op_w3u;Pa*!N@!b)Pn>YwiIn9l$`{Zt0O_e5*y1?A}_;6^lx|WRgd9gU$32N{C zRM}c!aACbhHcQ3tp}2lzz}BY8FJ1y`Rc+hTecfl%?YEij^n=Nf*+c&5rtqPRj#-^5 z_3q|717C%>oHkrrxleR5+>6@TPUWlaQeiM3LVem05^FZu8_roZFt8<(ouWxNde_T* zL5>q33a@FCwGBFaPIDPY=T-oh+oTp~u#?bov_*(3PxV!f%sejtF}3qah22KnUL!op z|HXW-=hiCf9dg1o>q;~i@9tBmbs?uzxIlioXzz8PSa8pG)+d~GJLzy!=A>Ud;@_H1 zJhbQhOpR$h9FZnN+=6DDD#WbktJS?*?E|iDXZsL9_N{Wp(=G^jmL#Au*@ST*IossU z29+HNLN(lls>12?829Q?Mm;;Qpq%m>`4 z8?Hre$^DGJ-~!dA6mQ-M9z0>*L%td*?*{jh88_#A?`f+4ypaT=B$3{{1hNlOqvB<| z>T_Qa3U$Tb7p>FT7pmdj@$pktd2;c8seOFDvRNI_q~WpguvC7FvYLuc`r=#lfv_s= zi}GKGVLZQn^|2%$X}o>fnBrVdu)RlZHM{O!sOE!69MHy3y0fs@fIXVgZqaO9yM2vh1$R!HK^9F zA?bdNp|xCV>n^IX_v$jh+W*8lCyD&dDj)Vme3ybvv}=4-wy{2S2YAhpPIByi1l&o{ z4Qvq~_n#*^s|Mp48P84c|MByli~VAQH0JbvmjMkErd2lBW_J!^+Z_&rcql{7Z0;j# zXa4qqd+y;0ym#_iGK75AS2=RjB8eRD@0pPVb~(=oPwgm}b~Y|bUB{C9PaK!pGNe+% zu7w+TXJ6LbLqrGRV_UyEocqn{q31(Iw{y9DpJ}~ep8%Wdhlu}48Z|AFaY1ULf8Rj) zA&aeSFi?I2V?X@P)h`3Of-p<<`hH7jjy4H{Me@UH-b6+*I`-$+DM=ib(~oxF%eX}e z{wP&!|MhuTU}Lt=a+at%TGKqbox)yZ#ACg*XbAo%0qCD zNy#4v|L;7tw=QiZlu7GHNDzfSXP{UgKdZ}CF3ZM8oCi&J;Ok?N@x1n<7NN($g#G^h z{2As4p$D}b`hKqA(*!Xf3p83==WzWCB@=0X9h1m7gw<0lQaZL}6u4sm2sp)?5-0Y(=xrf`p*}mWYd1y9L>M;Z@@fq+AglXO!0MB|r z={HO26tkZdLP&_{-#W3fRb_|T@A1@MN5+Y24Kfh-Ta5s`gEfm#ipZzrufg^YCnKDN z2>Q#LcQ0{@F;esfsmb6I9~`L!%jN3{eIP6C7Uz#FJ&|9AXl_pBD6s#c84hPta!cz~ zJUzob8(@tdVLGVI?3KTE!n$&HZ#&dMR0Jf{5WOAXEqu982h#yWgMYXnx*gQ;=-Hc- zF7D7VD#0S{s16qD`X#%?b0uFR#RL>*F{HC|foGBYj)|0%6*UEvdgO(YCrBfPLw~eF z=9Y`(sDsTE=m=4~kZeIiy8u0~=OT}NCTnisU-QB&vbePmx=MzOYFfG-*VCK zROe`QRyGtBl%-Lk(0n6aC2o${VvQn=I90gKXcB$F3S9WKL1Kp9%xhQfCQaFX>o(i# zhY$EIdn{Mi<&=snUL@TXbtqu~UKa6VG0cq>$^VmGM<3S_q>mQ?tj|4P>BRPQHrR2V+*2 zjSr6WL?2%Pf=t%netj4ux$`n?=Y#)r1%Zg+8d-BL88Prz7XkxzNDMN;`aprPKm&p7 z-c{fwRVJnEN+siSgxLI9a6)BUC9Y6d_U^mleok+V*PRuckI9+d*!^O`X4_1RG5@)# z@;yubl8Yd0QZlR;0!4&%6u&edGlGWYznmXiM(05Ks<8~eJV`6-ofN5gzWpXMtFSKa z;+>tfZre96ISes&J{@^wgph5poMvH{lA7^Y8D81gk^Sz1LY>eDw$AMfQ>|5Dy*59u z-Vz|aE0BJzd^HOazZ&RCGr4mT8-#t2M+mX!KfSAY|ECJu9mKY7281ALtGBdmu9h#? zs7qAq3FYKNCRQ)z5BB&9tXb=fnS1-B`&`13WC*il_BMD(L#r5-bVxvlVkCTwD)1^_ z+UY+GCN!^LoDnuZ;pO+T+3v!keL7$39R2tjJL0uDHMeVfZ+6T$f%3M=X(8&yOXs%V zsCLtp(=f#ogGcy8dj(GNn)P$PE-8etOZ3SsbJ)a3GUHE~P$@o^3#G80gJfKwweV%g zmbDo$dhHS56$Z0omqV{R>R2v-Xz`?g-mz~cxkxu+ z()Ft#*tL4)^=va;IKnrT_yPjf3`>{&Mbu(i=Ee{Ca6iB>-f`o|#y@Tm+U*ER1MlQQ z-|aC38!l$2`Hdce96MU!D=>xXeM9yogg5l$Xey`G!+o5wqKW%-cRRd(SW^8tO!3kX z46>O@2475pqH! zM!($_uP*3w?S;@7iS#za+f} zZ#=P?!!FY&zT;a#(Ui5y+KP#s(FIj>O1Tqir?K$&yc)-1`s4;C=Zz5}-1~-{dcQ9Z z5-p`52tznCuB#fon{Qg(UP2n4qX(DKdmV&D-iAYXx+T0J@zz-L5eL~+NWE1$V-74j z%1!@wW?@UikUS|4zI4TGT_PYw#PK}8+P3%uMS0yGTnvkiDicSJ$?}i=5<|CZFX-I; zWyqT-gLRtN3SY|?260>We2X&_9zb%Lk%<*jVKL7Bq0c^cH@96BKznrdx|5=WhC&0V zxqBXP`~g?nZfuBSxv@?;Uiw@Q^4O#*-@gnDM28zZ z;P@7tyD`jxU_W=MjjhK*DNvuA*=9sXI(_VBN#vnp@= z8I*LSHaK?ME8sb8`i*O~esjggg`>6W*Av)~8E4wt($?E2GLm#pC2uJqO!XgTkx`CA zL<|#ClbL$TWNFB*LP%&+wk8Ox|8z%E(5a^(brCDH85rDg5fC?UyF|_I=ka?ZBvp|b z+i@|Mkli;9Cn-L*H?Om}2bERTY>P7%ZMb66cB3gmoz-#Jbl_&~v*Qrb^)DJLd|&G= zE@W+Nf4L#1tKC0ktd+c;_f_p3b)0foi-7uo0cWoUk@@!PXTw7el`A)>mV0XXQI9CTaEKjDN}6EW%u=%5b^YZuX_-*{nXzhkE_!?)q!gRf;5mjQKb-dg%ca_ zG4`O2$1la&iF>{~1o+AYROkjyw51yUri}x^$*9!6~k2+kh(WT}gp#9F>O@gJ4Mc?hI{3Q{u~sr3GApHd?L} zwy^qH`Hc}%CYjVjtIaEVa@4K2W{FQ^97pBkLv0_Yn2~iAhntvKmL{+t1q9xfs9a>k>aG82|j)bDtk zNu&L<;K}YkL=$D`z1Z5ZJvZ&O zXK;MJolNMw;wjtwu#K4og$t9PTu0`~Pt#$HkueLQk3U_6qdcp>q298cK(fRKUQw`r zgL?%x*uI!FMwS0{ZS%ZOIP{bHrgBe!1UkgU5CtpeZ6hU_y)(}52%zj{Ede;BKY3O1 zRdJIF$#rGs@dAfjB>E34fs5p0zl~VHpG7tPdf!LLRGR7zgzTsarec+^+zF;{^SvhMN53kT59lR54!MZ9%Z|Sb2pEW-L+Dy zmeZ+(-AkfB^;_^O;&EMhA@ZJllnH@(9~h0un$uzMkJQz-9;|4^K<`!bsoh$yJeC>> z|BYO_VDXX#=9a&tHdfp2g`G!=A1cbP7ouB?H%IGscm6TKi+I|mt)s^RW=kYqdZZ+;wB=`s*!RAAa_Ytq7sOX| zJ`@(09Lb)Pj{2frYhDd)_3_-P4;g!=)Vdh^jlTKf@`4K`+z|0p2DL+q_*)WjOSF0Q z@3;*QqhDqR(>Bj@pOw(0&#d(PN%@ke`67tfm+pQWsaO$? z#S&py|^9 zpP&rl_Lw5Hn>FbxK~(DvR=o=bee_|UV*Yq;LDZGmwXm*jhcJqw&7g7AZ^g`Gzw|!L zt??%ntWKbQQm5SWhD?8FvQsuB2wv-0GE|58+5V%yC(v~WWqSj9!XjcDE>JVF2~LpF z_n!L%sk51y)zOUK&ayvL`OH+!SKszj@A1e_#sb4Pnbv^BQ~b`C68)(miaws>!uDp+ zvzUprw>;e6;7OmDr*vk9?qC+SylF!NOl~t1ri1IIHlw}+CwAwtkIB_5PQczL0GQ>4 z;vz@+``%Z~$ZNptCy;4J28{>Cqe%;I>$G)fS32b!$PG}9>W+`~tV$$N6^kbty`Y)3 z0>&vuuYN4l{5;7bs7 z0{zpm_||R|hE3v?Eleou&ItYXOyKyis^E8?&Lm*QLBAdYsWDX2{$^ry8EwmBmqPkb z%q$K+MD(H7saSYhFb*PgU_Q}dDc;Iox)V@hf_~uf_G;AkSxNb%3`8pdujnp&4hsMyB;MglTNwCwi9rO)bN`{8 zJj;Hg-(jFE?YeX%O;0M}WzalQ_nsxU3mkVdizU9ChR1!u`F1+RrX^F}uAGvOvORj& zB77h2DY2x=h0;Dv`ExXy9FCAb^0+ur?}r4YKRc3}-!)^Rslztoc3YcL6J-dS@!Z z4|jkx&hXZCY+FSC=E3+qZf+9fr;3>lTB-$wqKXI*BfsXm^O za?uns=MP4FzC|)p6oN1Q6w7g~TJSLNuf+^UKVl^B%W?QLyelxd)O4&ktUhtOlVGIl}3~j7ne+M_#CN@^qB?Q zeL8h&-3^s0@z{t+ab#)nU7&zNL!lOg-Vgm`!pnkwGD|%xeuSvB|3llIkMg9|8XPnGo< zo{GJ>wn0pH>DbLEA8_{2*}EKDeOvL@osa5Kdjk{FD|daYgVf8?2+_H{l?o+RT4L}^ zbDBAb5;sp5H`id$0gT`tU-<%*@O=v zJRM|VZ;qBdk}Z3d0kfDN zf#yA#)@bd>|F-ix4tLDRb7!LqZmE9Md7-bn59@81)se(T92B(W@I6+A`o5e1z$T=> z#ON1i#s~HySx8mdnjdOaZ+$-3=()XK!;2on)`p}`6F9Cj_8^|qO?zO^Qu}@bjGUIg zaeJAe_?{u|I@KjOx!N$UL8$s1=~@vF6<{w((AVb@-W@43sC#l5mK zA;Q6I;Q8p*_+U}clN8-_1V*b)CoClTVXMoT7U{kxae(`@!l*7jya-z|i zrAlFWwI+nT;<~My3#GulS~02^V^AcGkTfxJtzR{fYt8b~c%NS$t(pHTZLF`>2Sa^y zr1unnLc{X-DyuF`5=E>i4;gjOh2cs5Z*}w^D$n7*g4+FICzD%Z6w9W%@&Cd?<;$@W`3>|=C0WcY^e!zsB4oPYYdS{%+ZGud1!iM;dL2%`t=_+Jp4H{ zd{kc0oqB6*6tuJkPL(}33bEw}Rg#Wc%?I#}yZIA5HZFb`w2E>yjPO3I12HMXwrts7 z?^ONfuB6Pr(0*(1rRl`!e1?9{5!vjZELkEB_9XxU8V+OAQL^l<0>#! zn-J%Y!Edh$Wv*6ICl-Rkz6I(~*A{Md-AN@;<^-WM-~B)PMbjbO=Q#6cv_4w&IUu}! zXE2RIq)*XM%x?Yy-f+B%8>#uHmpeCa2VMU`gOy@2=QkvY%{|FX^D&<~w|Hg}J07z? z=3S@41gBOCm?c4dKqkZqZKcAqHOc${Z1N_1@)d+nqj;lxQ1scIKkq0nG=>xq$h!;lJL)p=qQ+)>!WvTeqa?AacY+Ey z-51W$W2^5vD~dt6smbm#S+QMiE6$OgX2nK}wrk*_G;Yr4#d=BW0dOQj$Q+TPi!Ibq>!WU$)0 zidZ?P&S@$QfwM()R7@Rve=poi@^7b=`Rwi5w1A&$0@> zswv|YQi(EaaGto)lA>keaG^7ByQY_t^#Z^;TuV?*CUf9}1j3g3nGC%Fx-S>~8NV+-QHxhX< ziC|HX+{^VR#$vVm3PO5J43T-5`1tU{=XjJ2s~x6C04?N7`rh{QDuO79{a20Y)*}(> zYN@Q)Wk+3m7B8_aGb~ev7kGvx8=lCkym=8}IzITqS_t`CR2**dZ$z-jUoDaqIXQB5BY{PdjRfRu|}H* zQIJLgvx{OjW|wc&{P4a9j&rx)C+mgyfa3q{pDJHZWa#PhA}ap!Ov^o*?KZ^qYQEpL zZRtyny}D1kug~Qi&D^0OMEb@hbVE%;+A}1a%A`@>nz}zxMN<_%M}Ky$5q3ABCL{{4 zFpKqn84M!zB~|9H^`!vtcM1UVRyHkTdP!So>V#g#`t!XsW%#J^14FxRd=R%0scLSL zChO4Sq~XTeR8lMp(<@mp`cF7=6v%Q6U`wm@#K_I z7SFtXaXrM>;R})Av4ne^ehtZ8_jxK1^Tz-W-;(?K(ap>~ZF5e+dbr95^e*_vxM@jbUJ0dmNc<`N=lyZlZXYkcZKJi)KQHkYI)? zF-PDy992U~Mu4j@^t_nJ{FVz~I&@NQ@6y?GMrd<~50uThl0>XNG|HQ9&UpxLVpm(L(4wH> zW2a1#xX~1l^M&Gg&sw;d)v;}&)QT_Zlcxgzp@nX?qw%|%AJ7Nr#qmptEEUiDCNx(m z=D4w%_nnq}9H6lzd)ROzPxU@@V-HdHt|!<;(!Om$3?RgQ7&uK^!6Ds%QrKHs-vS4$ zh5?;ItgPm3Pvj1GsL7c{HqjtP$d&%jnT&q2nK;NT0&;En( zKdOYsTD~`=t#cnX%%Y2jCEuS;B;S499c9n;>w4CfoA@a}ug4kYm;sLl_U0pG zug4L)QSYKlJM7min`Ix1D5S`sn2E6Cy}+mK^+1!kM?2qlFDOkOxoOu zwAdUBw!+K5M4k+e%kwz38Okq=7O^|DmIU5i&b*$czy9h;fPoeOppgu{3{apY>3uNm zXbarOMu2+vF@t79SzJVW?pnNR*$mTslpChw6fGSqs&Yc&nxE3kzB7OANr`F-Y%_U^ zV9^9L?xCm38@e3A9teRmF9Qs(1kt(ml#^1PmF~A|?{}>=uy|b^j>y+!%F%3Q?8K@8 zSX(mb_<^xLUQ8qJ-6?A0>cW1-`U?@&V9X|W^yUhx5JwkiV8L$4fGOck1G{J9y1g6T z=A$ov!Eqp^K;#zH%niR5B3++A(m>m(wl1Aci!h$k@)P$aqn%g8Ca`>9*hDvyU!TN< z-cmja zA&FI5*!cYu7+cCS>wzkX>K1Af>Xs_cI#^gHpXHP7t5y$xI^b(Q>Km-0e9o&;){4!A zRox>L{9DPC>}gPip^MfusGErajumWKYNd-Uq~6DH4NY|$_1N;Y+v1N-cQIW(i1I-{}7?UNII6M7mXYUKJF zELX@4^qIy5M`~oFnzkB=mfm!HQ3UR?D&k&R{9O&2+UHqrUgD9RRADqMwtkwEZ(bNSy>>NE|v}$qQefbB&5$ zESN;7jdC^PYmgL@Sd~VLjw4+dog8O-V{(5ruRMdwaG(jg75U^wqo6j3`dMb2)JcEc zh?LO~EDke(tQwX^&oW_L7|BM;?&q?$LF>XZyUFA1O!Qa83GmL&ON*K(u~u zrA$ycquV{i{%+>}-r_?}oqNyQ;-xosCh8dUiRcc{QdY)Pj8`siKpmh!4KFNYdti!k z8IE?Z_qriRcK7=zVB6%3n|LK~^8Hoyk0(7yqs#`iQ*1pyYhN2i4S{Eo+`wS47EX=V7z23^?jd#w=9Tv;!y8~zH=BD94POCIFw zNuP>-gxPRz`8+=O5YSp*3M&?8m*d7K!T<|mc}o3bxQV0jm*!q)ec$=G>TuB3Q1su?mVE6^LKEl*zI z$uMy(F4_lK&iMkaO%MGDCC+-+#fvxX*@5L&@bd7-5VrBjVf$uZ>N?zWvP)=o<6Krk ztH^wcUV}L)@zOh5a&RZjp087BeC+-WqYUb?Noi<0N2UhJR-fC+EiU6)BgR)_L&NL` zAo;*)m4?4t%P>bZYNvogx^V#NZuYi2_%KPdHf zS|vp(^$CWXGs9ro@&s9^$(M$|jFd7MW@_0eq8`G0^W#KjGmk$=O}6Ws3gHXci77JE zt>K;8PZI`q3!3$8N(nossL*qdGSwZO1iHN{P-l+d@ffo>&H}sU3I2F{C@IMM8jJ#k zoA>mR;SU$`(G0hpq}@PL!_X)34{c8&MiXX!wZ@jGoF4(yG|+wt1Y5rEwyC|jkA;4@ zbz$lYGftN}p9Fq>$!!g$Fl^L`jjgy*aV0h$*o67jHA{7^v%m6fB;)6e74~1eYO=>k}tcxE7BGF`)_?5JFzFCoF)7Q27-iTJRVy8^H^*w#~8xifD z0F2Y{LPtLR+i>*zeiZhr?peC2s@dgYBF#G*BT@Ms{pBFTZYtMP+?YZ@S4?f|mWn;L zXh)B8saY#nTm4iy)8pjA)PkK4u916jUH2Nzs6_d}`;Ub-a512Gh@e;(APhbIJs$HVQ5)qL2e5@JQ5A*i!jPC zwuQ%C3Sm*t`(Vp9R2WY;%tE$Z_&St0Qp|!NX6f!dHu_BED`k1&(kD9JwAmW1wJYX8 zRvRTfQK=UT{W^j2CauT_t?1UFgU$us(5nzXZ{b1svFG7Ax)Y8!Dif;}a3x;it|w0q07iTQXRH?w4mEb0W?JkHtzB}53HKOW2t z=RUD(nc^$y-3mtap1BBgbb-s`yYd>#BKB=Ze;q4v|QMw;Rc$ez3V0*K2@UKAjD zRMgl-O(BYlw;N(JCJ!I2$OHyAto9PCbdEKKhe+QHp^n z8_TD4CoidrOgzGV&@mr{17a3^ZJ%cfYe0kP1~eK33;jiINqv+>rryWXCR3GZ&{lhM z<If~rmo&s5`>4UFJ^-V-6f2~ujy|e=>#Fw-!#Ux3s8>=x z;&V9J1SgcSt2TpKf8S;4m74`H1?xo0Uu_X5joi1Aqg0{FMJ_lEIbK>-3 z(Y2D874r+FsM;>~ZDXwp*J;^DzijHZJrS!Z#SXc}Wt?oAD$SVcf#|2F)7gf!jZul} z^Z}oo>m+DkEpY4H207HPrN1l;FM!fd%h*AX6QnVskUYs{W=RX={oQAJ(0{RC3p6GP zH1$Q9#sEgIhV$w530f8L1R;uOiS2N~B$rxL?R^aztdkf!pU+JBSw-_sG9DRCbBA&9 ziTF>eHw(!-f*NntQD%*0X2%<`(c5%Rf2M=%+B_^J-+dZXaSIR#zW^YKV$`>}vp}ri z)s>{7y9gF533qrdp1PKTzr!|C0t3Gq(N*1;_lUCD;8sx-QtMrsq)!7N6M|d zLWsf1XmTiO(H^LXTe_RzC)KlB@zouIXA}}A-B>3OD=ectm_2{`tJx~4P&p|7%PGDp z6+qnRvLg3IJ>FuH0FVv_RC3fLCn@vLXi#DfWvj%9li%k!;w83m5}-=lRnj?N2!)X2 z?d`5-x$WP=Auu;7mSVbEE+)sY6|GTV7U|le#qJ#Y&BW^LkQEEAS%J|ykAkUg7`!TeFC!$@b?*?lR)HdX-yI7a{fs>kIrfcDVRLqgJ z>Fb4Po(p$?zO!lIGX84dm4APx-AZhX&T<|=IyH87jWb<&Le`Dz41u{p$}(dRYU{BJ zu%0q5I124<)f(Q)2+fTUxZKb0!(-yD&;z?E67rH*0;lsg$NhGy6{{%@L%mN-t|DFr zI;&KdRlgM;-|eBN>icyWRw2(Lc%RsI_>z(5x|`-yz1VF&&cq`L`Mu5l4Pab=-sn>~ zbLrGFzW=dMyDCN3;zy8qpWN4U%q_FuG>1lL=ArUVMvcZ@RyBZv{vc#(cT$X&?UTJX z5DJoTu%oLQO==KRfKAtlcf7v;6*A;`SU%Oss!IZ)Bh9KG!7vg7_H#K4P21>c6%hE8UdKow>M@k+vI`y0bM+8`cB~SCQY=#FKT`j`-K_%D3|IY@VeIsU&X) zEPjzY(#NjK54&q66$OYcjW!|eTq|{UU^_s6nW0nuO%#S7@{WxzOykq*;&#*NoKfg)+Kl!uE731Ycr!=t$X-V4!P_gSekAUmN-xEvHs`n{F7UO8z7y_M4-rYy525(E z5Jw}E4a}?0+wXIst|gZ5)53J_YLBrTe$VPN9>aAVlTW(Ph!9>ZrC)t%HA!tglKIc; z!#*#JhE}~8F`_?Jz?P!p05Cd1R0t*B2nBlBb~5qmjr%sHO@Eh6Qq5{jlbzK4lYCNd zE#bMRHQU#J{uqcVDQ03)!)6l61s#qV(eEl=++(#^HgroHQlxn#M=~nXSE|3E{7O=j zm!zUku^Xw_p$Vn;nFV``S;g<5kLtDR zennv@9QSYCc5#g^Nx1Ln!`*`` z(6)46$8bSYnfLOsnHl~cLHMR;lBn>*_u3o{(fJd5?p}@V*cU>tB17h0J%iCI+U3M> zmt9Nr>4&j~?(74FDMgpaT8RLC<5oJmTK%?3rg^dDcEfSQ><&x^shAzmaPfshjp0ZJ zxJo(*aF$dTg!zi=>+=`my-mewxU8wj5}e61sD}(QZm58*`uyT)*C`_r?RBR~uLh(96tK}x z!q5!~L^P=vzfLX+guEI#>8D#907rk!B`y?ue)L@rq0RxT7w8bN@-ePIOH6EZ#P^eM zIKWPm7rf7UBhUgWUL`s0{AVhmPULgN6fe{hDnSEVsT|UBZt+{bZ-=k>Y-{HZhLIW6 zkv=0fZ+X#?ZV#K;IDrQM2*K8=a$gVk_SdgnZlJvkxiz(&{Po|iQ>X(Ptq0ITP`t7W z5!3*WOx*?ijDyyetaGU+WondI+u!O7h4YYnzl?F35l;>S#TJ1>lq1oX{F^W*MjFPjb@iHYSuN| zy8}w#JWjgV;Qg^kQc`oY7>I<7ig8E!b}eavJ<6W(?3O#b+3d1JD&N2PQc&(kA=Rx1 zhqwQ6zi#B43cTd=LOZfOhMP?+naDdt&2-K_jp z$t$lkTPE(xX07B2ySh?;=!f)<~ z*G`0JMuI4xE3FVXVVH`OEU}+yF?7)*2kl{-;KkiVIt7{z8;*c{l^B%s(?PvIj=?a1 z+eph-jDbxyk>DK?=RcIp6HF0|6C6*?0ble1xI&v##9%lC`JsNB5-12wFGGaWD{wu5 zT)H4CRihTXeiU`^kYJzK#fGq^HL!_A;B&_a}(d%!z`A5dvI8 z2!J{OhJ!FbHUO~00pK`bBR>DrtxG;}RaM1HI8*nr^nce_cEA2Yy(B{Mzty?F=Z#9x zP78<*2gC)lTkeb*5U{mNUKlb@w z{rbc1PUnq%UB|x{S19}X{+p5ij!yD_i@XRFNA}lEetAc<*x#uCmji&HEZO_u`QNd!zm@e?s>9Yv892pQ3*`Y@+c(C13ef{R?}8>7``W~Z%zi^MNsm3e z|Gl&$R%!Y5LpM&x)V@a6@sR9emTwPxa!7aZv;^K{lV4MVuFnEA0eX6Ryhfc<^7KCZ z%w#7)j9LbmPJ@P-wPYE(s>FFhahh#9H)!3a>)X54;b&gZY1fn|;F`NHkF0kE)|Kp! zj|8`mx=-6_`H?&!JaMX|202j=(mSJ|(=@fcxeA?Yr@vNFX`|pHAjmeVOT52^w`~Bt_Ye|!b$3+2u`EBKR#W{Rmu=LQ6ADCW1;a~_F zHqpPZm^9;li}{r=II;mS5)|wkmbE#UcKgctdE4jq&JR)ujEm<)s37(PNGMM={=Qmk za%$;qyf6CCABq6>DPET*DY4&&&!=CgzvEF9xW%7-qhhi5pV>SO(7w6+cJ+p?QeWX}6h+V&r&T1C%o^vRiY`3g%yj_Efv|e9-@n0l3qe8=Ui^ zcml=kE7rZQ{$L|t0=I=&;?+b9WonN4H#acQ5tM&>s|pEajHj=+NH{b}(EfFlbR!ow zLU6`3pK`)MHY)8!X&tr3uKpXbf7|VJAE8j?d2E{CzH-M6xX|ldjVLjEFs^He=ZCI= z#^L!~_e6r8{x!+p$;ySje#o*CRe|lU=XUL7AOwn7q<3okxWHSA*FgR#kS@YO_%FU; zxxC{+I!mGkKS#(i79J5Lfv*DC1Jl%DZDO>`BafGSOE9y*iR@P=gXuT z|Mo)~?F-9>;K-VmVB(rUo|`(POV7LJ#x8{2Z_W|LV6sM#{q0_Z@%kPhQH4uTBmEZ0 zUC(4+26bUx{Z}(7lU2?YKuvq^p8Hou=!o?_%d7c&caOft{^zZBvxIF-tQ~#N^baWO z?8@7_@bWFE`L6k&yFtZ$H-t$w|L0TXs?uqK1%%l3*vK-0pI^t^Kd29lA12MrQBdTU zr&M|h=>K;1`NPMODxVcvze~?==fkegxA2_ttyE|Ime8b7Uh2(^e(~fN#z|7r5=f3;OHT<)4onHwe>!^=boO3hps}+k^xxYj4AA~j3x6(v30e7*9^5Cye;Lv|DRU}@p(HD3gRjBAqjykp_qhR9EryCOXS?#Z z#4e-Tk#}=pz4h&F}rV{V^)Q zBbj*O^!L&4jsIMl%N5xhk;b7nJ z_bY8nw=Y2&jPk46`*#m^)c0UuX=7A?1PS_B(RI7>!N31VAFwojqZf5UW@_YhV?nYi z39iBZ+jUMIsfoV#wZT#w0j|j}b5Z(l*JOr^XV1Xe5w=apn3Sw4A$ zg8k=-z)vhc0<@{}Yp>@$)9)8?*eA3UbO0Xbe2S6 zj$^R@(hk`$`G?$+8GW^RgI+B0+tFx6QRBdeZj6V^J=onLZ+}Q8fA<|I(@NggeNzW@ zYL59_&*AR8U!CzEJP@Onh&ezWDYkD zY4;kaf!tp_kaN*YIL_-YxX=(&0+khw|cj|3#JeEPZ24JE;BjVC_cm{5d+wiLvrZ=Y#+HfHooUIJ5 z80ORqh1n|{|8{ZtIDDP>&lzH`l2ML|n-4u=k%`h0JYh?Z8~88h>(@7r{@aWJzotR| zx$pklU(?_^f9L$a3(nmSMcDrr@KyiszAQ)S9CTud^?Xq-gg{f`J4uEL$z)yRD^Wd67_KoF8H)Okn+H!6C$gU+nbDRP$Ev+wlnZbHr zxiY&PW>SL*waoge73$-ic$lM^Yk40XG|d@23=S7G#K|s;*md@uYHw${u8B7P?T*-D zXP>DE*t{Z9^9M<6<~wS7{foah$K5ybp~@nB5f>lrr{SBguL$V7BH#p!ugSy!hjZvm z7EqVeFRmEr344y! zX}l-aEn$mijxa1QL zQ93MKQ+$A_B;+1-gU;E>QI$v(Q2oV1XusDq7vKZJQYvkakurwZMIRmbEUN`^RqC$l z@1b0j-IsFIM4+ZTDGeJn%L&yC6G-(Jx4&tu%T{g1N&^;)^WgRQu;jqY;rlds(YyR= z33?=5cLDIpv3or3>H57x62au?XS+9MC{>ZdnWwCF)A>}oNt;?;NS<_R2-Aqwf}7{U zz?=PJE92C6m&5(B3ob2uF+h$-vZQ)h0w87%X7)~LwmbBvOy<)sa<%rMR&F7>X;xZ7 zis`QTPwjlpZ{HO<)TxIfV6LF7MzTdnFZJy&!_dz|3QWd_8eKg*fR&D>EC%vRaR`j^j2Eg zK8$E@d_AO{6o4B|&iDdM1YW2~w&+ZK#pda*#F1NV2Gk&`8sU=u=F4jDx5*PyP*#FK zm1U{Zq1i0;YUXpx8B>Vg2aNJghnYVsoH76zb_!Ux5&K5y>~3Qm`*d1DZTI_B{)=OD zC#G+IemeD?@$28{tllz)Khu6EOIzR1QylLpDnU%eD(ka!O29D*?!P?^Abu_z_h?R5 zw&-;GVzU`#S3uy;BB}{urHAn2#_*oYtEVURkd3{tmtUT;T-s2L>izjYczesZsG_#* zdxjQk(LJO5=2^hXoMl88-@k}l{V<^2I&q#K)SoTyYt<6J@0*AAD+*TpWMFz zX3yShuUYFnkK=zd_oeMt7`_BYJR*Xm(3-iPP|^VWbS2#}9zFnj^n>Y&d8on0-FR_n zN>HcEbp-b&sP(*`HUEu}^v7m^kHnO5LliONOx_1i`bbE2+YjIb766p`P~H1KpTS7J z;iI$pX5`L3eSwv$^;^MSk|3!7Z8$9zZKZ*i$Lb^as1;NL00hXLB3&&h>#piixE3FP z9NRxY#^_rJ9th$)Q8e7U`Rp-B^my*nlJq*sP=^@sXQv9yjw20&vAw zT)CdKYT62o_Sa}iVd8;wU5~D2+5}0P4|qLs2mbCnuVxrw@A4dxilc6kp-1^E`%Cz7L4KiOWp3q)kG0%T~VwJ+}O!lAp=69;bNAh01d~o$vNY&M9tE{@#_pN}Mp(d$a zTv%F>4R**^0MQp@J45pUP*T_wFQ9o(eB?z9mE@fHy`n6w@by~2p}xtpYc7rG zdXZ}K5g0RzEAD3vGfzT%f0+A0w}Olv?`kE(f_+fSQGt8{i{8@~%ZVk=t9M!eAL|1k zA97%B+#W5(adH0xH(M}4QQvF@&)`@w2$?c4UsB%MWFw)nPx4(hA!SmThm*_R^s(sYi2~;lC0?hPIz30c+7_G|bc@FCZw9 z)emgGlkOGbl7$0)J`p(mZHkb=x4%n=ZN-;1Q3zi3uA$gvExYW;TwRsEB?{Bl1u@SK z4jW~$RK&vZ7h7c1L-~MIeabTQ#VhoIH+AwN$nb)dobYY}cT|6q-GR!>{gr2>7sycG zN4yd#1D=1c-Vf3WFS}+jyuw#1f+QEzv%#bc%8xvnZ&`-ohMwtrv4xm~r0h_k%ROR9 z;ew~5TTqkDf(VVvKICDP#uwC_@&)W(X^R;A{rCIQ9gt^Hml@BYv&8mH5>w;WZpOxfPc+VnNw=K&G=FdI<$ z=HNSK%IAz!CUwKFHg>seKidffPL(aev~XDZM7UDQwys;4 z%&I^?;;}i(U|huy#j{`kCM?aLoYCn+3-@^|$W7Rw=fv1N;|9Pi%U3=N%+z9}^_Zsc<>%jir1?JQRPM{W^ek_NDbbFxiSXQL}gtsc5=I z?+#_!4iAXsWp4v1de{S^MGk^JtY&y>8Emvc(9_2#f}-B{ouNJ0gm43Cq=AAIR!Uo# z2FTgRu`?K}#A{JQ&p%&@?_f{HC>)~K{lk4F1R^B8qVdtu*~j&y#;Bu8B!gmr_8g9S z=sA+n_fQVm$MHE~(u|gOJc(Tr)Mz$ku3eJ{fHW)@a8EXal;hIb?q}7_mTu=^qv1mX zjWS+P^UnY1reObvvGmF~sNvN}4|XUyJP91(bgj$hfjiCEP{^U zbRmQ$&kJY+0!cqNgO%`Gw^|ySu5@di9Rd`|;GiBHBFkDZiyynW@{l~9u1(9+*mo^- zo9%x>T|IoovW=8m-uZ$I8y{!}9?W~SR1p;l3-=jZvrV>9Xe=(ff;2$n$?r0Q#(u{V zzt3Wr9=^T_gF}I22S5QuzAg3FcYs-lcmSe|_)PT~6?QUht0)DW_il(8p-YQHDeakY zsL^*HVGD_Q$&6YH&u|;{lafMl>ZHZR{ALr3WWMp- z5C(j-XFb1Cv`UEX*vMbWwfd!gmRU$KXETB1CKy+-#sD4epCB;X=h5(1j^QPbU|*1j zAle;m)A?e66f%I~Lf%FAwp3Uw`B~qzh*lkmh-k7igtz!PiGu=x^Y)pcf`Z%*3QAU` zG46d1tK&HB42MAAW1w{tVQVoSgkCdT$}56#xu?IL=s7`!U7BNzHjWZPlpc4>liA0-_8+B zmMInbgn205NyjOUf?5`uw`-QaeYub52CBPRc^t%@}kJ_03E@HKi?Gg3{GkqzLJdPYZ*j;nyfLC>h;a$+t5ZP5S*w7 zGP`pAWt!NOzxRw?D{iM~xd$k-(XBdOhY4RL7Wtw}RR)6Gq)9o_O5Wo<_Yi!kL5wL{ zZOjshL(R3?7@6Gk?kNmI{QBN?K8oV~qT_AyFhimR6YI43_wUPlGnwzk0ccje=+htw z2Qu0C$^EhvgrEy*f-6>MlKFLPVzM(-=0>0_PNnD@zopY0uUYxGCYTlFV7~5rqF<6M z!mI}1#9)tSxjZf7toX^Lfj_#q4#g@%KW)ye!f#0{`4vHUea`d1C3S$YP`LnmTjQ0j z8^_i65Tlk~oP-W2rnnPaP47LWrn`WJbVYAur`W6UFBjh|UlpoPzW)kyDA0}F%N+>n zUN|T``o-V_M6Ca0``I+Lfo&NF&@RxGy1vy9(tHjVhqn^5Zu4OFa`&J>5;tLR{Y!T5 zq$YlNxxImu7&w|@(v_inLYT!Ut&HiRrUzB_|amO`w+W!v1Ic@g#jsJqU9`hJe{KdPHk#f@7 zp(+`^UhN@d7gN?708LVD9T6weuu66xr?;Rir_}7;`JUETjOFqfSAT0!m3w~b(|=5^ zhV$pN?MuEWPoS#tRP48H`FHj^^Olj(I%X->31N|#0>mbSgUHd{KOB4O3UxcNQ9!i7k1XBENabUksTQ{~WMGn<@!&0>$4Mfi+OCd-fe{c;j(8{%b z*X(=7(!J0K6g$aL_-Idv7GWkp^i!wQ@3xlg{vO_v;&alJCr0H zw_I(ECv(v0B2SaK+nIW7T}`Brc0lYU8qNAW@HMab_rOyYl)O*k-d+pqecuB~UCi&k z$Hh)9)K}y6ncNV{J!>GI-R08nfcilMn&Ct}E!FWw8>X@gl{&Z-lzpT!Qw?pfjn#e# zHkDzL9t(#r*_Enwx%Kn+$fq`9pwGzw=L&bCR-nt@nfnZf*-64%#4LD)=WYbNkN_lP zks^00n)OhAt3F z63*^062A7Ks|G8SjAANRV(JncpQU(Vz|}lT1`mys+k7o&5{aK=LGQB4ldldFp5Oh=2l`qQRR#&|Yto}v{X?(EPCEb>?d&UEgKxI0PZ;z@N=ko* ziZ$-^Skg?=>VslP)JKpa&-}`)G@3%E&cAdhiOb5;v;Gk%%j35gSRpg-3T|uhgWxIu(|U(A6k6qdEhC&G%YnZV%#8W@9l5O3KDbT16l96eKnEaT z9{ajBklu~}UX_JT%SR$m2*3?G*tGS>*g2@q_wk#Rhz=s-FzZKGsH!r?q~U2MS3`ra z81FsEFrIRO$*|ca4d7^lTqx~j_8BahWaFouB1uAw?S-)8KGN8i?<)oziB3-M#mz2a0);nG$6LQHZm?`mia zdlX{Ba*z;%{5OyA3V2D~TA2V4J9xc(@U)Mt!8@?E%g6hO-X3skloyYn?!A>Vc*R7J zNP-y8mq&FYCQ*?|Z27aGoU{!rO6fchBUagLC8!2{CuXT{!;VdL7R89spKQt* zPEO2K&3HJOFYQVl_EhHT6LAq8?Y9Af7nP;;a9k48CjqX+q0vBup-?d6waTZc#afMl z+?cP=E$i=q71xEWkP?dC^F2}%#}i}0Yzzp( z7CoX$_+WYXh}RG@pT(F; zSO;yi2J~5>cA@wgze{chrV@!dV-$2IUiyXDB6OV_OJrD=kKiNs1LfY&4UAsfv;J48hnpt^#h#V?# z|0hCli!}|j3Q98w$!KSBWZ(UmclR|6b?QrRJ(F$LSkvGisqyx@Rx{(+PS)sf;C2%R z)ZinyoKrz^Ogik_D=z}@H(=p2pF}H3Y;g~u z`+MK7z>>Y}b?IxMTDtZa=KG0}BzdjpZw)$(04ze2uBlk76$vmP*@xeKMTic^=zr9& zZonsf+DCmIm!x4{(8xEJLy3%I7^$)m6Q?xB99KSm80uR{rB#OBD)=9=({(~!$IW>! z@)TOqNI;2uEjwJefzLRT#XUhkrc-hw!CydfE)7TBYyPnS2r#gtya1{k*d0#KReD%d zLXFiM`Wl6OU4v)fU4_>j`7@PTd1u(~-A(J|?Qz$T*168lq6WO;p-l+crJYBD6F_G2 z9hyJNP}f=SR>{3T?k6H*zE!m$3fv{dEK8M--(%UFv_|Mr2^g7ji{mkGpO!ScS3hE< zT%En9NugPjhr1a-8D+!a71yFQP+E_nGa60uf)UM~?e!8KxfQF+*`WYN%k|7B5NVB9 z2C9HiUEyd}c;U;FHr~${RC6{B9ShA8LL7@+#@VieTd-Zc++xV?SthCbjarX=n|%Y| zdAD;1quV8(g7<->LA_}IzWMMk&N$bVl&YILp_2*=*Z0PG-JOyPPjmOAMa2q; zpyoMk9Eu^mJXM9s1k@k!A`?VzwG_$YtzrPC6pZqoyJycpn~~C;72meVZc zo0o_V0>SFYNF1C*HK{otrRi@2*xWOW$&o9(ad1FR$=gY`_2*{ZhnPwAlMz54gp9I| z?=0UK)&ce-jlQX(v7+Q#5DCl+Dkg=FScTt%Gl4r9G+ac zYZ5*V1#eh6dWRH*_P0FdPi`KnJc5oBD$=v$;#xC4zGEFF+J-piumSR9)lt4=8a%hJ zAhl)*^i0g{*k7x6g?jUJ1dYPMV(VKX-uuH`l9eSRs{hzbuQaTMQ30k@u-HBtS<*nTAo&N5|@&3Y|5^ujfL_PC0ZuFpH3N05XZOo_4o z$n(rlJewNMT>yxL?Ot=zCelS1+9xEphUfB1w_6fzBxD}qu6XYsyWyW0$6t! zO@9Zr6))u_f-Q2&`w|!h7EJGBuR6fvWK#4tJJL(Os647v$V(X4M;=gVCc~HNTx735 zA&zzO)t!>{hHqURm}if;EG(|6B&K=86Q4huU@J5ZAsG*x82O4^>`Dr8LKaXS{N*3M zq)pVvJ(zG_OvpBXc*8OqKd<%)0YVL@E`Hb+hQ#KtRDBqsS!v2NjC4Bn+Aw*c3<48w zPo_#@iVN!pr9KsI0m4i4%HHS)@peRR_;45FWBSAKWKPEx>PbuPNhWxa;Gl^p0Ni)v z1(IReBpot4GHjG>9z+j!meGuGLDR1(*=AoXWP7@7wyD+8%j6`HF{0K-`g^zXqM4FT zA1E9B+CxV~f}oC|ZCQTLUHS}>Bh}!RksZYP6{j+2BYE5R4f_7cL(>EUIvDBmnPD=( z7r1t*#Y6Fz4mgL5>vhTn`^umYTv{8FA>TcgJ~l;;iag(vA{zn;2Q};?os}dI)`py1 z%c1VDU#i!LFH|VXMY_rOgK43uRSgEV}kCUXiquvPgreUt8$ODeKhzGfk-oCZeAA1^94;W_?PN?)kbjjuc2 zXvvf;fS-CD!dvulV7n6=j_QY8GXb3e5%Hu>JdIw&M8Q@iMZ3=;O|rZe6IipyOVf{~m($bzr0Q)r#?h3SM8k=qo8Z{tgHIbauv9B3 zxurbBe-xO!W$*HDC6<$Ys1LABt_WM7DGDJGhati?DF-%&taQdJ?tpEd$ z)M<%9E|}aDHp2Oc37v)~%T2+Mi5CW+Ilwd+%=y#t! zk7))ESVWbnz_eA@bG#QAgaE%Mye2NuZcps+EpN5LU~N!1J}MXUZJ9nDC()K=9P;qb zWG8b_jdl2b*|U282vsF#v!azF04hR@*#o8L$S9bP`7@IhZSqo7PnssmqrG2L*+e9# z&+haWxaQHd5p64vZKQ}aDtcD9CDLhu_7EtI-_!wu7M#)3n?1zW2dZq?-|_c@P9lq6 z?gXk%cF5nJg>MHaszj`t#CA!~?v^OiA4bl?)@BdM0p$I!1`4z$BFNVVfbqDhVVAcS zWB1U~QU!3}q&w(@wLnv7k?y|oUC#&ez4>bYupmW2m7!5Cp$440a3Vmtm#zp zLL(K}ih_gk&I7i~6nQWM_?4Aj)89z|mi#<54la>7O3?*Y>;Xh4&}8(UghQ;}_K5jg z?sk0{A5}u|#>n&ASyd0!ekKWzYLq+z=r`@U8ysWom+lvvQur}HTYoheuRf>c3h>`W zMhKj%{l3TrI#R0|*8%b>V855W&li=E1%-neRSa?0Y3vIgYa z3b{vFRp8h5rtz5fT4V@~|3K`&0;37)D<%$5-ACyK>`L~|(Sd~~y<-O%Wwf{-D8=gU zb5N%u<*P(kI0@&QG*+t=#xQG>cfSc#5vgJq9TTM1G@@F1gG#q`cf7{g^uuDzHTSue zQU0*d8XTi7KLf=RGxT~)JU4(|h96VZR;J~Xf+)$-^HJ0%Sr|2Y3B`*CI8sj*Rse^z zHOu!Lv)^qObICMP@VBkQ$iK=8OzIoau`o*lPBeXJ_IoEeWGvOcix2KoS@2KyJpqGNyD#tM;r#&}P%}Lt40z8e z1Kh(NNDaSCxZibBnhTab21MCG!n%=4_>b(1-&x z_zW*0g>NwNT3VuY*n5cCcvgtH-f?88rPzl!H~o5LEARNk4T9H2ZehI-?6Fe6-@Ps4`*2O7(YR4Y5B7XSgy*bQ`_iO zd|P6-#)GSzS_<`b>)9jkPG^oLGJjtvD6xjO&dK9m8g<|ckR1Ky_`RC}_*uG@+9;Vd zE1pIE3Pn;J*Q`dj(HqLkX2}}wqojUyO9g-4^@8Y>1*s;O19)VG9L=zLzR_Xul3y|((Z zpE^zX+X1!?tO;X?Z0=NY4bGg`8tDQWCbWfbyv&gF@ZRCiLpQ6>nAuC+>B44~z{gKG zJ9fr6^dct=aF6o(ZJ5p4>Tak>htW`>5Q^4mgnM@tuHj2ddvMaov2H_sPRuWPoO1DK-(vn!ua`D5K*xa`craDivb~9UwZs{% z<@VqOY+Ps|1gW`$X}6=+tU^RkJZ#e+A3BPt7zS@{M|1YuS(;`%CfIawJ%E;92b78k zcf9goq$oNE2_+q1D8@YI+`H)T$quls0LJYqHS}mDh;qI8$d;(Mt#Co%%3&OkWtIZU5PQoqtU* zPl=Ac&68iAs_QY3^i^m8ei= zdXZHoPjmGdkB@b{1EIK1$}?u!M%F6Lu*$1a7O7yDk>WekmIUhUlct3x<=2i+lB)kj z4U4BEnSckFy!)R=77oNT)5sZN7r-Xq=joisG-E4VJhyeS+9iC;ag3wOl_f9L(t*2cz+Neq+FCggWRvLt zv0sv)Ss%cS`AY(P72&6Gk#h}<8ooB*`D%yzjvHJ1~qp$mxW{^ywBKgd!8`R7lHKkqtwy?Vw}__ITj6@`5{LxGCSC-A^5n{{T91=OQ-_N9Nursy|U zsQrH9ne(9lMHIh$Nlu?&V)eVSCup)~YIA0Zriu)Q`L9@Sx>Po&;0K7KE7x(CIK3|^ z)OaJrcPX=&=qyRnIvk6lVSBi({! z;T_r;wCGS(_Bha)($QA=a+(V4f}X1eK0{?DH~2wW({>}85sr3m&*=(YNxlt^HZ3<~O*_hv2IFTv% zFo(*`PGgEaHw8%dxViz;gEGF>0rAe{HW*1pFJ$yy-^}1?qfH-Y*$;C5X6PvCduJ%Xk$Pn7e4Dp?hs^2Ygw;&JvE>K)5O)SsPffYZ^%kZQUlK2k2w z_02VJYS2!Wxzlm;(^PS8E?ftzESrDNgwa#?7dN^)b+S}XGB!4x5v2Y6`^6?g>VXgwASOl6~>Y8TWo>n zwD!h^rnu%eIh5Txz8V?j2dj<=O{NtL#t_7${m|5{h_{nviHEQWJtu<$zLTzHAbC$~ zyk2U9bNt5Dv2S^~x6W3J8I?ppHslfUu=mW;i~_ANySAo^-n5#&072JZcbE^9v$V!v zyg_ey8`25))cdP}%|tp;j*lEq!3Mc?#}?4|T8=|&I+8P;X1orS{mZ5JJ*)l2H$P|s zPZ!U@mz}CVO2G>AhPb)1t2YfdsZ&dNK+?e@&MG`T*_(j!EwB^JIuMLF&QG$a5Ip=| zxJy~2Ppu~6)-EuaQn;rb&2&uV^$x=;52IPK9ycy*7=LpMl`{VPTNZn7i{rubMz4)8Eo0}q2a z9yGON>kE&IX#B4}tpm}KL2*$acXK6hYVWVh_`$Sou}|{eIkm{zWmZWFWKtuAvb#l~ z7z1z6%%1jay{DXtV|<~v!bO3JY=SGZ;7^amOH@xbX9QwdjcoFYgX4Oe@ewN3{z%|E zcxm&l)e0s{DV)T{S@RmZc+l)Ni`-O~#w%+irfHmcGX?(9OVy)=(oF2z1h8@F)z*j{ zC){DX1|%FneB+nLveeGaAIe1B-k-1FXtGmgB_8z`ULF zrjDel-h7i6*Ik%ri(I3bi6(Dxcx!$$H+n=vTaF*S=8!M{u6$r|bfDE8?YPeCZC_l` zb^Pv;8l$RYG(nG8WD(x$wk$LTnPk$#I8s6Lrpvo5A0AFr{wyh2LVR6e?&b5d}czhwcq zijMh4iQU<)+pM->^dXUqP@_l?*)9+w*G>gs-ah%ZVKubK+Y5J2hf_}dTq?c2|J%&4wMnDb3?Pjqfx=U z2N;Sg_rSiEvU$CvEI9zUe_0-|CmukOm&qe$OsjcD-Z=5y`A$?*hZ~FV49w>s;ZNQ` zmA$UVYpJi&Ia4SffAEd07(s<7k>^!Ov;--3ceda9i}`;f>g{j*nV=$QNoA?|NUP{9 z!S;ymX@-o5_NYl?-P{d@fy89{UFC|vlnR!D2D|_l7ckYGHv9|%WjK8ygtEiu-Hv+O z6V{@eDMd`HgiAju1#+%C5d2bmWjjtkt)w4J#=I#e{Su?$Q7-RIpc7bTYWHl|u-2}IxkJu z*RdS|<}xJVqzm4NfwJU(bTXboxG70UP9kr){RMb~>)TwJeMx2GmXqGu*Q|o>Z#;w0 z-2P&VM$Nt{^E;d<*D2(2OHCUlxudOCC2>LAs)I`X&LmU2kIFrXQX%KsgVErbs!m-5 z{+^zR@n>{vqq{596`TXModg?a$3nSo`#f;L<0DK1>?aoPU$);~0Ruxa{vk2Z?yFA& zxgbP~spO222J3e`j|1(yjJH@vn;HEEb(LwB78L^w#=sIrdfHN0%TF+SgaE1xY@#`|cGr9J#&mE(R_OcsuMxHI zWtZ`l69JyyehH`Dy2l?jGHsxOmI91nV=-RU@+#WaGmHs3)VzhKjm0#+%#;(AQxu{x zS*0QI`fPGTG(1Hja*RGci0G_e3%Gc?_y;v}BRPHy>cdvc|5#h7G08BR%_%EDj>&O1 z?Z+GSw2SIJ0EH7h>FL#UO5Oq^Z8|+xB0B26PCfUDfsw=}c>;rbmwaZp;8R2-n<854b?!!y>jqaksJC7Xa!Jd%8@VTO-4D+21OS$KcKj&Z0*L|OW zvlV2s^7y*=RGD}Uj^t)21cQ9t5Y-hpR8upN`XfLI8D3>JBi$UGHOZuLnL~GHf4~31 zCD{}--Zz0Pnuv1uJm3@6DTmpL2@JDXds+p#OJZaFgq9A?IZ!|qpn^Ym*_88i9FH^5? z!s{`LsL%r5g!0fKun&x$Yb~8pz^h~@&g>n3ofYzg%_)J5!*psh&NX zBPc)myhBv1GlbSat2cqC_8liyAxI1pHOctbaDV#9(bs6?)o6Bs7tLB0UNVYV?-t^4 zrMA8C_G;#43&Pi$f?D{{LZ8T)23pB{f5v@XiM#>g6#L_UlJW4zc~!&hZw85otP)fy z{|=}%v2a;+cyqqqF@1Sg`mq^a>q~sbOuE&Z(Q0-{i5l=@gg9HIKt4Zh{*-KOFF}{w zu}G)4MdEJ9l=V&~y1_b+xBQuP`@CL}T*Ks+wSp(fE}3MP4v@Bt?6gaEaBI~XZBa(( zVjOIg?hv;rmzrvwi@sDmU)xm)(fblx-TmxRI{0dBqqCp4rY<$5rfP;7FjoS$iKonb z0zGAGRbzsES$cSvNA>5pT}A15gbw@y?f5m~q(i1$v{yWpZRX`q2A`R{o zs&OmN!MFA1?>bAnZgJhG>qK3Xnbi~Qml1Rs>A!&gObRdU;5#tCCa~T(-?{gtEB){{ zh^&lI7XWA#RP}$S8=jis(0cxYI5FegMV=RRwm#j}mL@*V+R)hWF3xNP$J{V3yVTjp^`iz_v?nO@>Qy)_PS0t}#h$a>2z2K8_gi^Y(UwvE!=Hm> z#6Mno+;HPP#wm~IYp^q%uKzHUdO+wUNROYvfZEzr7fWPU&lY*f7m2s^$yFp=^)|J= z0)wB)3}XMdw*4_9%kv{LkzAafpC7I?%00^FM;u2+!JttjNZ7;RiOMt7x2weD2XtXO zB*n%b2X*u&^nFs3*6M-|$V|s_dJ-H{Dtblgpia+*^3n$+B*;29t{sE5$G302S4b@O zUUZy2n{YFs*y{iTdgm>Wd)UtYFsv_Q0#CTJbfsP z6+8r;?*_{;HvM?@gK*wuW@+;7X}x0lS!C1u$?p2=82<)QY+zvyGoW^%qjU&$3DN^% z*gMP+cFUoEUiE9p3<{1ECDC^g;ODK7IbU3|rf6;Z{0*}8`X28MM6`bhiN;RV-n{qP zvE3%Ns81LEa&cC*I$ac8Zfc6C8ZLKxc62$6_azp9I?6?wXAO_Fr((S&vOqV++bb?s zqOg`kMYcF#A*HKsz(&5kW)Xr6pKiHG+&X9vktOzRd(lcSoE5>(=SWT-80F?Ku6H|G zxbkKuwixd(ckHFg-lT>fY#x8g5{{5ZA*9mMz@gf3YyuQb2BPSAlb;-+DyG;dBG`cc zLM~}Q`yr176>Hj(bXOBVV?n8QFS0o%b0t0Y;%1u4Dkho?uHrUgf@EWWkgs3QssUfp zv!j~1iy4x3po;;hM#X22RH>qLTGavdEevy}EX0d1%FuUVKAg<`X7uwwqg(&6qhLpQ zoofuL#nA^E#JT)#Ao$wpa*{U<*q8;@h)B3-?Y2=rkm^T^!N?FefGEI5Ir8Tv-J57e zFrDlt!R=T7@vf2H_7@O`C%3h@m06Da{0F<`oo38GzU!I8ZpY(!3pCO69)Veg$XphH z^u>)m@`pL^CzJEOFTkP|Fd9+Q3s%}@EGQ^U(?Ps7zgeb!Fcg}K`zu*&X>#jSW)KT@ zbq}4oQY>0yj{IOgJ7?EQwgn=i+P2vA&H;XOj{fdlSbzOJ`RGq$MwTIQN zW38CwH(C1j+k4?*1BD7|^!3Bi2;+OIM;bhZ|)&oaKVQaSeyd+F)am^QmDX0?wO zs7{2xX|jCneGYuJ0F`{kr77aS&-hw)Exgl&282O8YO$T;yM!wYa_7==6485^G~HAmL+&xX$TxXV#Tnq5V`a@is@DoGz;1uJPD z1W3ZWd%epQbd89#kooYkRSm$`pU*zn6&Dyf_h z>x;f{@aXFUpkCTudE9i<_L3BneexFiuT3$Lm?Hhew{F}3X(&~71O<}PH|x%tpHgC! zMD9LVfl@# z{${s7nNe^fae<%S9b>Lg8MoN-;aD5*vtckOJfnNQ^P%=N*9aZd_9iUfZbifRr|v{I zHeT)zCJ(ln154D#?r-KY!wVuu-|I9oH?Qw~l$zB!7YMCf*Gsa;oK^!(&yRO+&56ya z>Rb|!LwpEB^J&OA#9G0C4mpXw&YQc38#&)_$HrHze8{UEi}Ge<-Zhs7_V4vMk6C_q zeN}lbysWQz4|N-fsRTW$@I0oniZ{%P)Smn2K^m`JgU|E!=~&MsChkMgFgBfM<#Kox;vLknkvTW6ysN1-a)l>Oz z%lFq49K|u1<@wdb!ID9%#K3zj`gKwdbT~cfg7~9wN=QEBzn7G23<6iBOeJMe%@j~w zcIuwW4^k-pTsc1(8_bqHl$uy^A$HNJPHsu#K3)_fq+NZQ7cJ(kB_iQU|J*u*Tx8`iI?;1&3z0 zL+*>w!kYk0=@d3u%O9wy=!rfD9N2l#rp1n!+2-Uctw>vPBbs>Iczv;qBcTgfZ-xVD z%TxF1oQY}Fs~=endFW@$hdusTeRI$pF)(URBI3$1sk1q&<+*}I`ibO3jA=aL-3Vqy zMtw>w^{gv{mRR%mXe4A}7Cp+@@0;^0SrZF5DHbB}yo&`SwnStft5jdvH$H&t)tJ$l%c@Wg`N@Bw%B5&I>jyR(~g9xl%|(o z&WDs94%!!!^=@9(DLISg-*}>WviFJ2r^>9@m!q~p2cj*U^+KXsx$}LytH_0caZr+Y z!$VGeGP>#W+s9kT{D*qelo-}p!h`jYB?eyC&ImL0=gqgIlANfR4D9rKUcIb7djdrS zOn7^KDeB}|i=(PZ9*f|Oc{f@T$ReX~^UA$mD|q3d$+7v~`SMMGxz26b=e(pc-z_vI zwY@F0{6Dz(36*;URYjcMGx_`|o;;UngymIw!;qQr%{0OlHBKipVQ$G}_|>#{Q`XyL z^^Ogl=-{S)a?w+&n{ww%ulTVA<#RLlvO3W~FYPAN$rM<_MOS`dH0;wIjI3GdZ}q8X zGRK^l<>Afuh4bUf1=nL%;p0NiRh=u(WOe5{Kc>9t(kq^psz1hce$yXbfI3@$v6;J$8{_D{EDaw=oIvjk7LGWLPsS0>6{_9XI0V?xfhrYbxk^glFvi67mzk1`L zl?)ssa`@&@#@oY(97V6!m!iz~7sJ8<55QIbU8UY(I|Hw7dUl&al=H00;?QTjN-o}L zrs?eFOX~ydxLk7o$^F3gujn1d>X+s%PsrD|Nol$<`$xAV2C3X6?D$>JCeTppG(gj zHdZbQQJwnL-!%&vfX(y%U<*lF^)#zsv8B+Kqm8J{yx(ss=NCu%Y4H_cZtTypP`ib; zDD#I`9@`bFzv{W+>IsZ3T%&N;AUiJdT!q0sn|@huMVb9Hn&}3{YLGQEHSki^zn3Pf zj{oz*1jIdhjgxaGTAyo`OdM(L?{*cw){rpDlZxbiDA*>lyRGUca9JTU8X~|z^`0^R zB>#AL_3yCHuCH(Vmvr*a;Q#C2{?D5laIM0kd%CQ25Pgwmx`N$p&PAS9I`Fav>prM! zAoBlwd=Q8=;(rI;U=_lqG@OFN+q|?;)Ufez=z7EjGMm{UDFrgFeP_MIrsjLW zfR}Ihx%fI_n`CuJdM091sEK#h#!cB*!-!~WN*0gx-#Jxe_+{MGSB~p3YmMs`+=tCG zr4;BJZ$<~`mH6${CpV67%~G-MoNsMi;~Y?q2IltX&a1g*4eX2Nm*|u%)Fp*fS+sWs zq0S^%A_~EjH={Pppj15I8TO4=9_8U%{5v{W6l6`5{X?m0vHBcgiH<5k1R1lFD-{b5 z&x@i&9hjFypt_G5hfytYQSxzH7U2`7s%xny^KfoA$4&mvNmB6m@|DRK3P8dLQxRb* zY9s`sWY#>dJu|CDDGZ!B^6Sga*xygPj2Y)OF=LG~zV#yl`I`(MXwjtaAE##PG$;PY6vwQDDuHMk28?4DA^@Ac25Dgmhsa54cy zRpvJp4duIY{sLrUt~>~B6z8fft{+h^!Ks9g^O<_q@37~=Biz2#v^>4jU3Nr=YBef; z10?rqT`GQqs^afaJ6x#Vr#=LEVPW<%6Bj#`h;nuI#R;%!{TMfHGR&VK$o(ci!pT02 z{&)*hlk&j2VbfumG)LB6Z~~foo%xEFJI)1UfQr1%uSW5H`5fq7~5Jr0z^L zQ)&HbrK(`1KxU##A)+z|)Zau)w6)R)-V|rp6VoA>S?94TO)Q?y_BGKtozZ#08*vtUgs7t-w+>*W5(4VG3w@jZPU>}r8FztO|U#4>I0DIojmk)lkK zdA>0(FsRsRG0jnscn+%)&J-eCiDW~FVq^OCHAvML>b>3?$Z)!q`z6tOP11n>O;YC0 z`OU)^NL4gfjp_%1dDp`XKByTOvEjJyMxiBj_$3-=Z?-W^-ae_Mge~W`g%lZUnKf3n zsohP!f67zlmn{JTy`TlYU4 z)0W9zQlm5PQ8ZXy^+voti2LmjL4{JJh0M2gFph(@ov)VK0(rbZ<|J*M*iW`gJlh(` zD`Wcg_UFAV;idYvJEu%mG6uD|PD#O&hzRX19>Y8KpFQ+~cj5$sYhsOzTImZxS3_9b zWnfBLQ0nW)F*Y~sqqgAR^?^s=qxT)|f~9UNw7|;v;^yFG1k^B#@l$1;y4?H|@(T4D zogLtl^%lw5CQi^M0H&=$TOA; z9IiyH2SZl1dfzvlE|l8Vtk0IY+vbq}{r80WYNRiMoNb0~uVHyygDyWta;O_!g&!_J4glK->V;Pw}&VtAbz7qC7Cwb>CS z7Y<9^{t#p(l50q=Vy)VE9Q*z$IMv~C@~I14^qqb$-h=kUv-BDn;nS(?ryRIyq=RcO z>{0YoBtMT_BhrBKI(&B|j!htl_`M<8;{~rsGo`jJ-9X!^IlI|-n|OP8hf6X1#PBd1 zIRM^%nPvk_x3$hUr;wDtVWX*rp$Nm#H=s?b$9vwD3kK$Lp7HJnwG!Q9#aRDbc7czb zQ8t6IVDZtKfr8XXAy@bOr9Tr&i~)z;QKNI&3?>>Fg!WdhNtGN|y@=Fb(tbr=uP<52^Az<7gPTMM=BMQ6Q$KxZMR{2^ z-z^k#pQeImf5T9(x=A2-1bzDN*@%xotm4ifWMV*m2%IhIEdGbF|Bt%&jB0X=x<=_B zRn7s_AVslI1XP-II5Y(mkN{Goi1gkeAp}7|MLC!@v^iDtyf*`$@5RejD5?Tl$ zK)wghec$noamTp-Z~i9Z+1Y!qXYaY@nrp3dnt2ypO{|D_`>;qByAS+kIhYujySY$j z(Hk0Lfj^^hYsm}o@C$1=+!WviV;`jp@uaHq=JSRQKLWE@ILuk^{zH18<*GfszN+mhq__Lx2L=oZoixiQi0 z`kc`^q>z(4&Hav>B@|uz-C}hL>#it3M!yc9%7pFYYt4UK|3)J%}q8;-+M9$kRjy>zqM#j$9U3Wy3VrBH8|OJufY?&azy>5Q-cO@2-oSL_Sa)@1WIaKD!7amEzdW4-NUH zJ)?Owmj1MLMs6nR^V;^|gNY}~@VGn^#u=yQF=htljli8;XgmMZBvtyL(aA@OOX$CW zYyaXnZ*2KA&rrVV@1GRHiaHcWoOmQRle6Ioj$JZ?Qq1c20K+zwCg?;lYz6dTf#4)7 z4KD5sO>T&Sgmaj-Z1Ox$ViP==B>2kNO%-7=L{8J6VA0xFo8zVPY^LQ#jJ~{qqf$a` zB!|Nk#RJ8{mj7q{WTTz|7L4WR7Z#Sdz?Wcu+dhzbJK3}S#|dzM&9a%l6r<7L%Gsre$WmpyRuW}+QIkVV~4@9l^r?9#-~DU z)UVd}98DS~o|KeU5$^zS@)CpWwP9BlkY790XB^6rE>27Hp7ZQb8GFNO3c8E2ZVcsR zEadt8%aX6qU-mC8n~s_@06Zkgd||nAHC2S=KScdJenWl>{K=!NHimzv(!qNyL8Q;h zT}QxrtfOmFH9zGKvPJ`l>~84j3-3-`%!PL`+Z_PlupYe-<3STycJf36*xjU3XqdB)*I(ZS!A z21=g`7#U4h5UdC*&w24)Sl9RDj>{2$){e&VIsas}zJS8~F*dy3{iiZx9;#uYv%44Ow$>Ovp}O!U~`pw960|RHgm{RthD0#@!64u~OTP2y2XhW5Xo|Cbj( z?1;}$6cW4RdAPms;#ODyer^F?SFU1-mIpjHPit~db?>2%75HKvvUBZ`Jnj?lGYN%0vwbnTb4QyM>+>5^ zS<`Iu=Y%8&#bUic$JBC+5&J3fb8B~m{2{fmF|n~r0?WMaf3o!+8bxhR_Yq+(-((8O zV4If^=$@@gCd7Iq7n`@i!aI!`=2a=LJO)(nEe$%9+#T<&R@+3b{CKy;TcN1nbTo3? ztkHdCI#qKC9RurTT3;bZ`N82!KAKI+CCNS}2UDf{D&^yf?uo7X1E$$7$u>S*^B$q8 z`p68-Ie5S7Zmbrt22Q^Rx|yizC%@6&9U}@+fsNiOA-3Caxd=qbiLzCB_-xRe%TtVP zbf+8^ANG6Zw%Hf|y|Rx>67D;@q&YjpEtbiT;~P&lrQl`yAqkUK#S>xngD+|*WvONO z!6Hdr+;3g3*&Fu= zX$Wo7553x)XwkO!0=iDDj}9!T`li05P4WvNU`vbVP8!k9uukyG2!2x+Kj?a?>~Z|m z}gcZF4?Uw;bXy`>G7%Oers%Nh+(M-N%*u)B#z`!clpyRsBGy6viZ*)-rB$} zYwz8fqb!uA>V&7IMw9h?g?zS;kU)sdHr$N;^4eV8?CJ;!lTAU5Ixb)Yt9WTH4+T1AxJAyhg z!&6!JQ?n(ylUuzG_Es9P8R;sDHV+Vc)o#5Nz)^2yC>g~7whTK!d2#0prPuXjQZ}*C zfsNtUovzOownrj`RDQ(sqq~XgwdRMe^Hscr=1T$d!9Ww2Ox{Rl zpSLBpK^;#^gxB&uM!vu=r_+fBUuw{{_s!0=e7;!TcHCdi_;R{^+qmcP_cyIGsi$V~v)j1cr(f=aXF zMlC*>(X>;q&$PT(-rxnF9=`djbx5Yoigf|jO>)4*?44lk-j_uyCv5RxwLh}{D86Yi zBZ1EBOE39hxsDd*O*W-qHXvOqdf$hji1;1VAbs27NV$M%8>Xgq?;vC!JucMbQYc+~ z7*eB5E_|D9H8bKK|tAfwh$3mvi@8(NVo(C&cTs&_4m9{KC(Vgxbe3%aSh-%2M(u80tW zZ$Y9bca+iUPvENs(7EgN;n(jgyv1a%oW=<5zTx)HyR!MpWBp{S?vg`E%`I6g9Exay zAGH>|R8cUQ30=dl;Z;B2J3<5$a+UYI5 zH|ytfwuf8l=d*BvS$=w*{MY)>*_fk$+V0doHkgkXd7iz^VvnU(P!gWjP- z-XeRr5KL)V+Xy6JSOPzrLJ+JN*ZZ6xmx<_xt=_HQ`L>g}LSwLkLp^G|^EWNM8m45X z=(v@z0y0`TTneCC%dw+(d2udyHt3ip zSXDj>2U-5`mY&I-@0vZS7*tzpTbRahPGvhXz&bWaTuVZo)W`alv04VaEkK!Y8O3!$ zI@o&yjFoD;XVy-mNmttyNH40-A=E9vgITkX#@q{i*NorCp}m)w{$*l$El53dKAAIW zla~sGwXRj~f3fzOYc?pTaBZ$Z&zwTME$&!OA^gS;8%TBZowc4OOH19l!KfF7%HZ|m zS1{eCMf%Bjj`Qc`qI{9;fDc;B1PaP$~CM>HFv!U&u@##>ztw(cvQQN~m zX5o`xE0dSv3mrFdyxRCG#WEN%rksXI^DM~XalejYb79ULui(w|iiEe|;-2v4~OFjM3?fuWN zky*|Ao3qbXZL4P5!g|Ild87M_LIeocL2lZZ2^GF2(W(7~)_@M72PBOq^+{Nh?QGd` zYjITW5*>p!$50cNI&l-ur$#3`5uwa=PU+IMU+Ucxu+dc<=-*UK1%+vQourW*I~kQDeG^zFm12H)Rarb!)L79Fh*Lk@ z$lw0Jm&>%BA47&1Cz6CtdKx>qq}v1_-q|pJC&Bjr9WDNN$bAAWpnsm?HZIB$Nd5=l`joRx(^? z!kK-$$^a=#bT*OpmfUFFAK2jf?qXy7g1nv?Gp1s!5f-p}Q%Z8JGFNbWqw97Me%ao{ zDlrUFep)E_f-W{_*&4;+uO%P1yGA6JMyZTah(ckk~Z zB?-r{NmrGu2t6a^@ai;XR#BhRr8TNwC(|+#x$3GwO*RM;EK927vpA2w` zf3<$qN^o`3g8J|jTBYM*Jf80SLUOFhDCS8~+t0SM?>IiNo)-=FDWQZJyePv>oNsJN zA8nFj!;zf7($F&YSRWjNXWUo!Oyj9_YPnCQD+LAz?=AJiu8o*FdXA_b)>sH_9HOl+ zt0l4VxlTa0%bqj@2B*49D>;uH`fQ4T+xDDP1p?|?%B%h{U}SZD0w>P<@W_?GvC0F} z^9!ZQ5i;Y-)M=N1|CuV$^?joE?AwI9J8aU+bp})qbSXBNcXSPwwp=YRi52M+Iw`jv zTY&$%Gf?ib!H@mrhyNOz(TvTSqgs)G{mIgJ5%a9+1)pj8I@8$f9Lc>3>|lvpXR6`Y zP+a3*91rzHXUaInc1;?0bzi>Sl+$rK)|V{-t-#_vXS?bR33q+UKr&+<7=_-=pfeqS zrPhsp?@p17c*NkH-zf7)0!)Jb`gU^&^zV=+C1gW8P`AHlnFy%5j4 z0b{)1WTu01p6ss*$gmD{4GhUL4_F(>zmmAAh;AQ8cwJ!sTgrV|jf|zTjEsZJuXM^y zViY*r#MfSVjYFGWxGoj#;##N?m3;)q{UEp4U!>mtdLN`R@v+G+(G-++n9Q(>ujBNc8AmIA@WbU(B01;UMFb}pL_}As(=zz>n}}(CFU>o z!=a?IF;lo41iq1FD#CIIceCtQv%$$;+b?vMoHf5|BGW?oB<&P94GtA_IJN|pnI<|Xvq zTK=L7C}){PFD@WWy*KW@%HW!>C-R|-tJkEmeb9ngww)BA@NWT|+De4+fTkBokHj;U zsL2)?&2=Jn7o^yQSc(@M=^nmsvV|_bTGaRB3Kkz;30p^}fsWfgin_S+&GRwyi5U zZXlj6vtj`W3c)tiE39EPiks1W-1;A0Zg~w2)puiSLKemO-TN81SNV~&X7UqJbVj-y zPt|Suh@uZlrcCMUaZb;u6?}mHvO=Z#ppxn&`9w zp797*t7>KU^q!<)EuV7odpd@X_zr}j-G*R+Tw_OYhb0 zx7cFIwwblJy#w2F%-v`AGazaZYj2zE#@~6K(J%TDx+cgF;!w{F554>G#O3YbD<>e$ zs9PGRPzVPno;*bt+qJQYC&y;&9)R{4c-MRdf#o}0VaV#s5s zH;@|g9yrfbHVX8dvfrDHNTPo685&?U(?4vyUOgo2LXR7RHi796R^Z{+4Oa}O+I&L~ z+!cN6lK{S$7?ORd@S~2Mi~|MEW*VO-Y@i(VA&R%}Qb*ATp63n-NNC`OkfX#+RkO!d z(0A1E)ZVr$0+XCZqpY%>t*ropS5xEs&>!N59UsVA51`i-jJBTB`;5+yi8wPDAfJbNTp(q%_1<9qF zs`%IhHq*wI*PpSsR)!6b>f9B@rbd}qGbEo}i3k4pDG2km9oOuW%HpPW_&66itg>61 z<5f+|Q&5Ndp!Z#y`6N@sQl#kRS%>D;o;ei*2C zi8^>)S9W9%l35>=l(i%oDNM^SCPDZS5W*U>Pvj+)Jn;q(R=ei)jY>o%;t3}1*9&mHv)D0l5aIZ}g`?4N{V-j}kbD$5LMX(eBG z*`k!%bA}pqO3hQg$1hsJbfC5BC(oMp?00siwAEyH58XsCf|EdI7o%2y1zsJ;PB<=__>0JGO8L&((h1dBPHn; zhprNtqXJuXpd!2;YaR?b8`S-OV_8=}g9l%!s|C6>n8W3V9-;Jr^(P)<5Yp#B59CVP z-zmP@tFh?o(ImePT^Ww67UY`zxGq;0(s`uHugscMXm@nzOy+hSYnJoqetu_IIIh9= zx(Voa&2+ytSm`y8r1Eh*fAb^iMnUmwZ@#)=_vyD29DjX_H#bnm(^?Pn39RrM(SGa# z3AbLVwqPJ-A_whO=6^jgcKVC)7S$@(CQU9eMbWDQG9t`{Tv0!;Qo;Tn8D0W z@VfBR;k{8?m}mpnWDy;*-=p8;Rf%REdAh0rG7w<6ddtcdGBaB&&~0ft`t1;rX@0M} zEoNwP`!N3Y+&l4NCfxd~yt~~a9_Fb^sX-n*o-TT#LqofOZJkm&0VN>RYp4rXuS~L~ z?~6p8cvTFeA8~zUI)v_upvFEGg1(7}A4@uIuYVGA$kl4qQ`5K^#ch>S>@znm7s9tk8%`=)p<>ph|2E9`j$06z$(B5o7w~;|8i# zkZEn+c-UF)4rVgy40SHJnOx|#-Jw!4HZjH?U)7~c)^(U2RgehgCpAV5(}Uma^;3@< ztlO&YhzPoF@bkNd%-W6i??8{tM(2jr!+xCcYiJ*(2pM;WgQ#`Q(;|z#xmW90McWfw zc~CTuJ$VJTM{ASjtT8Gln?%Vb6NDpmD4M*9Zc`ta(~@zO8PC@ued5J#nRF1sPjBF@g#?%`oK8*=xsKXxJpAtwd1k6+3!t z`|ZKh{!VbW&%(z*T8j;@h||_?2GciBNmHbB=g_0aftcpa3G&;A@$AF5D3vzd_=D(y zyGRHEafw^1Z(_62IaPlGVX=;xF0+Pbj=U)1Ct|J|*4Ez<+09h5x{_KmA^R~!Ak1OJ zaXl(#0JX?=Mzi3h8doGbWbN1VNv+u!$}A9rKi~RfkR0ro*nI6E+*?1`l{#Lk{OH?* zV(dQhO=Ca+Eh{TH)k62Cuotu(9c0nXL8KpO-Hr+veas*z_iUMFvnX#@LiDm1XM894&KClffkBDD%zkP2ta(L0o zyKT0NU9n8p2RF%&hp7f=-7BFbUEs>%1Bc6fgfw^zgqz8ijNFm5uC$cc@X9?lVO&h+ z%H~$H%7By~W?^dcBmXuV6UXes#p>!)>2A3zRz6RNn6-QTh6XF- zlVaHHFD9?sR}B>v0!${kD{6#J)VzYSj}LRAPENtS+;eI12L%E#rv+rVk=#|AiCYRp z=4c9$Jxb-{nj-%@7pnZ`mR#MW5==iUj-RWC`J8JFDp<}_{b147t_b|%^(0- z!LaxSk??PtvM*7qdlot0ol;(LeZKu|X&xmQfG_>!sn9Pam==dKnb&o^P(vb>N&v8xI-#MGA0p?Kv{?CUpS_%CzhY zD1%Cw59jhkm!52=9xmA=t}80Xe?FOY-o@ka!?L*c4QHBy%F6XR5INB=_oEK!6~JDb7WM5Ki;y zc%d{&sn=n!nW{n-JEJ>8q)?Rkbo|Y3H0FSoKKf`ikzYAJEy@qRLx4;7)CZt;vBJ_p zMu-L4Yo;1qhYt@pC;c@kBo#lehKV+w^WKRv@ip&x!k_hV?vBNUyehu&JDPs%Hpg+jKO;y^gfrnIwcr5*uOTjnKuEp%xB7xNtjb{WmlqL>ee!TXwuSMz}#*1RwdVR zL@?0Dpa1%St4gDT)v`R$gp1(J!MS~aF7~xGj_>`zSOT7R3XE72F$*4R_OnRVE?F2? zp~Qwi(>8vf?yQa6p0;_FqwDR;fT7?4?~VujE31-#WQCAbWS0``d7>c3$a~q+C6}q4 zm{o@LTZbbFh!FD#H?_H2V9%Lih-_`2IX_A2P_k;w`bj*7^;*#dx$jO_?zLBI6?zB; z%Xv%OTR3A%dkl=iiUSVbn%1#JI96-(I@W!#N)`Z&_H+i7AmyrkI&SA2Q#`Q& zpZ7B}A;{e&F_0)u{>c%`xbFNFgH>10K#?Yhn%#|f%&{v-fAfwicHO6Hch)xHX*X=b z3l)Y}vnN7J)4buycN^?Zj|%2{tb>pFu00g_iX|#T$0G95DsCRhcZL8O!n1JK9CB*C zNkz48CsHzcffkLCjdQqhUqfqP^VCF>=7+BTyyNZ@C{|CXJb}oHvc^y>17u zsooN1ts3fqKSX?qrq|A6*{alQ7;)%&d&E_W^fK-|)?`);=@n~q*vH9of@a!mIfyf# z;U|;61wd+MYUatTX9w->EQT-P@y?W^107NKJ&jeAxYyY01vi!@yVE=BvevCd-S0t> zr%tgQjJ!=7!+$cu%5S1}wPuA`A2lq0-Vcz-3oE|*ulaqCN2g`rsHsNjl@AuDe-&=H z{OYmoXsqm;I6>=bO>AC%3|2h_I|TNftgq@K8RM&R^e0}V9bXpZZmqgJ=8QMjdXPuArm#jBO<~Y9v8wSFd%tibv)8#= zo0R2&^-%?I9S1218NbQS`K0dm$w2rhK;81(@|{^sXT zl^|wBy4*kY+U6~xBfs~mokMYNfs1WZ_ZitJyV7#dWT8qx*|A3{>im!4QgTGpuRjsN z`!pNS%29o&;d#IFe_F_|oQgYZWLLy@v({J-`JKR>2Hx76+|Cu;8xt+gT66#9&X>+X z4owx$r|`z(^HvJTrmT>*w|JkizS>J=ju8m$QE9RXqK0~Q#haaffjDWz6~fYQ80Ulx z9xz3w?>*kF6JCH)Jc(JU1(frLo#wubNzD%G}U`!E0Aihtte(4E1*l2 zPp`+b=0uLTA=P;16x`o%T32Y$$?4bS1x*at(L*a*BZllZ3AMZoxV?weS-}POIn`4c zbrECX7&^&7!!Y-SfyWCGd1BvAl`ZhR%W*}9<&lDRfQ`Nt($wglYc)Dmx_F73BS(-D zF%Khq(9^N=2AlfO(7b;6KwA^cd*1hOaX?_okKMDeAfWa80UzTnm2C}J?6^h5;oxKj zg_!X>XFR-asowFK+WDUTFH3Wl4==KeU1j-8W7L$^n1eYB)P-!!YYPa0Mjr*@{H~O= zSV*HFzmR1PZYVOjaxpES{=_TTK%Ot>z4$dV?%%~DyKOhC?`XG(b9*5}&3s+g{)JSl zcHS}*^t$679b>-1`2_9?Y#3{{YPTS`{_9h@%Lo7yynh4osMZiMxm<;w?5s`j|GAHG zc{Y|PDc_qNkZT`lDh(eHAa~wGuUkB1)Ab$B4U)r8+tbGH=W_ebe9^TvCWM@LQ!%4H zjX>sKiwtHTHG#WRG$|Njd}=jvf5R(3OD{|Oo^0SVk+swB?p-DfAH{{tzUDiq$YETn zIm4QEz7f&Q1lYxj>jryiWsf!D2^|R5RozU9+UE$!#|yUB_y}^W#T*BzP4M00e4UQh zs8*=?P=hC~vA=!Fp{iOGg35(93C+VkDO2YiqlRHMHstL~_zcu#K3On%9+_)f(Jv2f zjq-ufRN&+Ov&Ys0X=EdiJlY_9=s0%+mgc`fi(#{*6-tZ1A*dnGuxe&U^+w0}_6Y%u zwRj)`zG$^KB1&g2k$BY}k>a{)!cg`CHX(M8`Y}K3;pce4_YyKw9_|?=HBoj$Z#_p{ z&kKeAJV4pv$u@GfPzV;Y%^31i((W8zIrShbfHjH{c!Ri0XTE^kgW%;FbFtaYXaV?+ zc_z7#T3ZoGYlJ)*L8JxDe6#|_-;yZt@)FGTVncKQTEf#nW}o@NrMMbfYuOVH~~>}f9SUW&2;0JO=hr4 z2i1=U{4~AZ6Fn&POPkA=k)`ObVbhi0K3VtF0a9D;+tj>W+vJ-f2b-{>MrZy-Tu2I& zDj}Rp#rj;S1}*Wg;q%{W^Ot(Ky1x_%|4jx0enIA*^5Q+$2A1KgNRkx~`+J^G1!AyA z7|fZgyYudmqG0Z42p9jeduJw#`D%}EPs&-Lc9cq+=m zF(a&bEUuX1xkB#W;ba{rdu2%6h}u03qzU>%VTOl1Xl1D=b8zeKOoq_9mXnA&B(5-N zsD~~sLPK)`2iznCHastHd#O$b%>&g2IrSFZysFlvAfstRQc|t|-U*g*bfU|oJ(7cG zsEED^qm@9HqQ@b|e#1zKX*M~^uZ>JDjlNsk>%Y62YoXK!>6L-oZXAyE(1F(TETg}t zf23pB%AmUQpblny3kJ+9eld!wP&~cA`1A_0@=Uo1VcSTp+b=-VetK9n!GxkZNNqqp zCd(i%TusBi^|&Yl|ARl165v>f*9IiHu2$}z03q(0HKEJd3E4NwTHgX{Z46Sm_oKjs z`$n^nU$nLSCK?!mT;VDyh=LPSd zFNV;yvE``&7< z%iPWdqxF^)uod9)7>W4G8UV`P$iwAeJN6M)AT~^mK&=#B+UDU|AV?M(4h|+|Y9s4O zLUmv7_VMInp`WSv;t`R5iX1FE60_(|a|0G#G7Aev+9Y);HXOfftG5P=9-Atz3!d(L zA;)}mp39!#wBBcnQffgV?uDce12R43*t`@hrfkpVyIjujV7PDO+VSXpq2PVKGX$t0 zw_+m$qygWbOWeI8&gPcS1z28Gtt=#tu3iO4NK!Ysm_IHRI9LSL;dq$k9O{ow(MGVR ze>*|FDn7`kcrY6Cq@(RG4<;V$;QB%c zw>H=3#*27C{ojG%Sy}bs-3Cq?XkKKW*7yYwfTob%`Y$9ZdL&s{8V-NY1&3`c!u8{$ zhLdl`3Z`WTFMiO=fR>O61)$#^cAJ1Tpo0u$-qSLi%MmLEYLFLK@d&@yZWXEZ%BoXT zlJ%7ny&Hkb+-rR=yAlP;A8I<&I@rH`aSJ%b9$z(}rF$@Dy*5E%qh|z%eR}2aOekXq zo5O5d4)rX)IWRxVxm9+}%%?|JNGnS`E96q9w8s3-9P(}md$K`Z^I=v;)yfb4m&FJB z?U=n6fnC2+lhki`b-K5TYG@??WyyN}J-YrU$=uA>K2P_tJHtw|`1{-!bp1m~4_;IN zbp{wgUFiDtJE|8rc#4QUV01x%Zs*{U{>V++R37l02qB(KUNj@}x1gQLIfLeR!&aX< zh`UhY56+qPJ3Q*mTB%-@Bb|Z=mt{>ARd2U`!-A`EMnLK3EukPYjlAlX;(|eS!Szdgl-OdMun!cBN-|gQ{9U1zrM}Sz zB9gAManS7JpoY&9o3)fr!g2*awh7@kt${@@G#d0u;ozC_H^9pG8RlU2d$;(Jr z8f@Lsm2#zBQKp6i(Jn_ihP!WtX=Me7Sw~x0g`^b{zym21B{^s%BqEFl4_0%pJ zTBBsWQQLlnw)^HWkrrGnU)Zv_Cf!FJJusOufdd6)tRo5+Q2gMG>yi@?)k(wf3J~Ya zsz+w0ySr#VAcAGwyVNat#&L#bB7&$k6OcwXchK@DkWPELeD(I_@|(mMwKF#-`LC=P zEziKn$O_Z6hz?43U->kB)u%Nu7qIDWsHeM{nV{<;{r9hd1bUe+;`Vkr9K_QTD8HOC zvG2rb?7ql!J)9#?{oA!M>S6plpj;bOkJBLC?f3kAZdKHIrnFymyzdXK`#Vq_`J>kv z5or&nO31q+#@}tFAh#yLSNa8gmDMhv7=oG1&ufTXLD_&sW=Ca>A;l*cOg_8!qgJ-7;KZ->mja zElHT)#G*>Y4R}Sto|QaBANOtKrtJTYH}nIzBOrw8USU2en-d;96X}1;ddD`k z*TmRbwaDUVC!1{9-S|QJRw=d#*zM<$k=lroRN2Nu->GtaN{9L7X;`vZ^wBbb7AJbC z%T@b@#cBo1YFBt4)$`!GRNU{^4Gt@O z9$+zDfeya&m#ZP^I+QeO(??}P3mT(UK8hMoeC`Lttx|?^NRA0>8AC)j(IBLk0JQ#s?2fuE3aWpHzP zh8E>rku`=R^Txf8g?F}P(d{pol|l{|f5vfC8PBNI-NRP^=eorpq+ z`tgRj+_{ggItKGno?A#+dnp_X1Ntk?i!(YRYMwLg!kzx(LZUsQWJuLSLe977H)5n$ zXcl_2^k~1A`lH2BmszwcOZ`A~=t&c1%4?1$WJs`c?o1IVs*-cN_^^ZK14X(!X7%6N4U(XSs}7=GtzdDu!C zrN&f1NYS^=4h1c1PnF2SLSFWd#Mpk_r>ke6eu^_lZb z6pIT(K#-yR?(z=5KLqNWiCOBU{>uYh&MwX7OpQ_GTMhk6^-9q``ilq_0Nl@jxHby&rvS0$9zF<2d3n zJ%dXBS8BHll6_UDOo5Rxd}+_|e+XOS`c6Lb2fbbsH|eTp;L3<;EP<1c=#8wg=*}n@ z7f{r}m~j}nT`d#^2p8Aijo@nQ zdXI~yr0jHtaD4WPA@Hf0+64AymnBNcIYAc3Bt@7CJXDZfH3qeX&}%;T3H$N?L@V?p z@{E*i6?vB1oYsn{F5};%LrZ|P{CJHTtbA+}P9b~k;3Ad0Jv_p+KfwIOe5Sh1@*#}) z^>*^!&qSe1g>U&$50Ybv-n4+Qk-yWq%@U$?C+91$O^C&PpT@36JE~H6P67|lcAVY4 z5aRd%4CgTbMEei5zwnvWmtPN6f7tv!*hCPbKa2fbyIN#c86MgG`nc#mejSlX`Q>sZ z`wyqF{O3K*T}deo4G#9SX0y%;`R~0UyWDFVUCDK%wW6=&-f%fqARjRC^ruX5%B-O1 z<*Sy!Zj2(+_|)E(y#}zlT9=ER?FK#~cj-o62O!#!FqaNcP=^&meRUmk>GEj*7GXy4 zZ^&6yHoICjeU<2|JVt;2G5kk3@^;paAKl&Xo zo>MG7BE{7bXnkm*2w2-wz>{Ch8_)A-@LCX3X!4z!xdwu1m=1)-XWhu3m!*r$w1CC- zkIJ3SVvLoSA(Gnk3}BN`A~jsXwAC?pR8l)yI&FM|w5zJ9$MHZRu>ztz{T8j$hkaSR z_1L(uem!O#t0Sl+j4ATOh{T0vn6Pf1?%XSK83}#9=7-_{VE746;J6<<324;}r~dQ!f-~b;>)nep#HM zo7~5*X=*&pbXpUT;;~Pw>^~(DkOc${G@ULpYC~t=iLv7i?mioGb|_g=QX*tKl)!Wl z)~}iDG>CemonWb)tGFWG|7eJdwYjW+w&)%y9dj>)da_6##Fsf?Uwuq05bY=W^q2d` zzDV~OI5Ql}XB$qR>IPMb;6v>=Kp+)bv;+xlryXb=o)T03@J^Wa`OK@E?<82xAMSMq z@Xx;%$lN2vWsAd1^LoQ)m!xsW)sc`Ndh`rFfD!=sJ%v4YM zGsf%idoP6t4<37rJ&BmLV~b_*Vy^oDFO7$$u-C<%#;7Ue1#?f8%vA!(*~t+H$b+CH zgi~phoKn?4s)cq%u@{_I@_NlIOBbIRkb5Jryx=W0lD;E6dL(qQ_PADy)aX_rpk(xe zeQ@w^MGxCed3U+=HPy}^ofBN9KgaI7LGMbeNo@vF9rSRQ=0lRI2##Y(jhFC_&ZuN{ zl8KHx8BRXoS}4(EIXa*uAZ26nBIK*SrtjnMo&OE^>Hf zIcFWOtvMtKHea>z5UO6q@{<(=8nu2FgFYeaCR>HR>B~MpJf`cM;S_`F2su zL;u5<9~gOwfab-lTI}+l&_r*e?dHCp-88w(w_IXn=Vx}qSb4Q`TkNmDk!b$H_l(0H zjTOW$PzyX_x0^L$GEhyy9h{+i1Vn21-rew>(@baR4C)?UI#2-TDU5B{YVe-dGrXs4 z(`WYADL{S$9fPV0zd$~vECB%4Y>3_yE6H`s>OrxIkZeX7A}3*%3e=YL?j_oVw3_>J zergI{QyB{Tz3wAD*sglVKXCB&q^#9?);}b^2V+lavGU|FdAYakb~5LWzx?onN+y(^WGz@L#%FzhA-y?MepVyP+l<}6?DFy zBB(;3ae!!!M@8>vz*4vd8$=9i6EW(ceyP2~t*}byPoeOj$C{3$zoiX(ms{cG=tGqH zt&0yA46={fW}l5oEm~bs_3krl3Yk<5Z{{CUP;X`9nBL(D_1V*1xtEOnSQLKRuT{ZG z7tsRE_G~vkEZ6Ql5(@Wi7}3=FP2ER4h3+bR7h;XR9~I-|<@1pcG1+*f@bd0=w&_Db z+*ZASQ|q0=_^m8xhH@Mx)>#9vr3a(zoKuOyRju(uM~Tqn z3HglRLuW=U0}Q}N3J=kUUZ3eROzznr`vc&s*UDX!m#1zdWW9v3!?Ano;AG4oZT@`n zkZ&vU*H04IoGS_N0d8MBon&TPgZGQT&%(vNGIL1rmjq~4@C#0O=)v2l-mG|L&-H<$ z{UC2@c_OROdP6oyR;?%9$LP@M_I@b|2(G6U`nNt^vpTC2$Xn64V1@K(J2@F9Oj{iA zuK+2Dh@8n5L=5^5Bk3XCt8l^x*?4er7MuC#!`eOy7~Xyir9{2_)dk##Hdpf8Lgkq{ ze9WLyKf!&YHR80s;}LY2^>lmTx`VhvTKky`@xHz;_i<|qxR@VDCN@(~fD#5?tejQY zNL26{FF6H$kjWhUT=K+Andzd_^wT6PBgribFBzEl70O{0|D;J;*?#gXBS&u%z-2pk zycsW~q$(QNaZLJajcrEgkR;Wfv}3w6&H|`-$HIZkIdJQM5Fl-I(aJwa8PH>dPyM)$ z!*pKlC~RWxKN{nn=pVGPy8X|;33)dH8x>9tW3qiQ-^3Dr5$nx0`I~Tkd^>dQrcns$kc47xT~(E{wIw5b zUV7kk$On^R3P27j$C}LCS+MZw?n5VmGXp-DZ_S*J@#Nos_fP!^@q1JxPcjx@A8ZBs%Pui9@bQt3Deu_#{q+G=k3#4=#_v%k zEEvBR{w?xUeN7k}v7h(-hiiOH4wJiVf-ZatO7QI(iV$;|4w~HNGBEdS( z>uYg`-ly^cUxYr_rh)H30k@Tnj&bZT0LZ0W#5X@r1kz+??NPCX20h(cM>e_v8KtTnuQZ!U=EObRt(xnFoq4y#nq97%-009Cb5NeQ;2qAE0xS##(bM_hI ze0awh?}sz?`rsI#CTq<(*PQ>W{Vrvc!u(WCD$&~-`rM9$6no2jfP1=fK%dyacn;)- zk4vH?8a8LenFQ?;eKV&wANW!wb&~r!+(Vc43G8Lz`$hX;ig8_pu-?5^E=yHRpe0!h zq-eCcw=plS;;C+Mdgef2YPp3K^f^jU*p{WyQ9aW1Gxvk}GdJ0cJU<#&jG$+O14S!SHbArI#4>a`O43y3q^?h*CN1 z`FtwG%QYm#N+-uP2AYwCbjVoIY(_NYzer_f(XH2B%oSUquDNH&#Jsc*yHICOV|2@r zHH;Up?0mubU^kOI=GqlYfk*TCI;+9x+K8UdU;^D@CSh{KWbr_;Z0=4O-${NYUZI>T zRh8m$-3%-9!8~*qZzQ$4RBHX{Dg=p=aq1v_JK)T3Rq{##{rZQf3;_pDjckT1(F#g= zqTM!&T0buIx;qL5RW2uy?Hj4Ftt@+MPNRb&45m8RA1T_bVR)Gw*4VMRf%^_`(RfQMAw?FD%5J}dL+F2K!Rd~)n~5lRKLzlD}j&h0amEudH2GAd@LjC z+3i;P*lR}m2S3kt6+ns%SXFWLFHrf6=Qj3b;+}Q?9gubc&n+o+`jaX^j^#>;gmOF{B2-XNO}P8YpeJ~fv-h2{_)%V zgi>Rs3+F5qjc&wWe{}o9Xx?0PuV1fG*5{GfG%kt#b*42x$D z>eJ%HOpD4)57ys5exTLr(9G=;SGFNs5=C#hpMAwR3StH&Ajf zJTCu9*{;OM>3Xb&p?@x;IXzS1=IBDDV1l9Pb_Xs{s_>@Mf`%m*wn1l@lHK<*7xo>@ zbo3LFySPb-;FvCRXD#%moP&f_nV0vXd;TD!Mw92Jyf4MFG8D}JXbI`o{NOO{g!M$h zM6ge$AnzC4!)c0&r*uj&+PaNXv58!m^$xvmpmpn0SJ3Yn$2R8_86~ByAu^>P?_+)h zKL*BAvOXDE$yU3-Vgh-6f3cvnn3~7zX~gYPqi0@X!lua#Iq*KC+OH#WkV2kP3?8Md zBcj?{+t-4dzfohWU|d1%onReipdp}5Xkh!jdCP?ZdKJE7{5Y25=bBHLP)9_b@8mB> zzra)-v6chBKd0Y2ykSb5Gt4lk*PfS4jg%#Nl~7ulq#S!4mDogHxCM1l60DbP#w*(U zOT5rEm--(q zeuS6a^r!WJz~L*JIrQ83cN`jP|F>WrlatH;_h22B27*siV#{b-&tpe-$0eR(j`RJ$ zN9Oo1T-ev#Jl-dV9B82d+b0hWhY@E_9NK?Bv}&;5P{bN%l(?nhN_*VkG5}(tE#opY zbLdV9vfT(y+CQRc06;CE#KgJJZviSOE|iZ-QdN9-gtSoHP2%<+{ZC#BZea8?RL|Ka zxF(}y-GFHSbpPPjG#@k=_pVdE@z+>uq=4a1EG9@^^XaiJvlR%DLWo4T;5~XJ+YbNX zL?k`&$4>tk3v#*M5pnjc0l!IXz30OJ!l`wD96x0k^Phy7fzFNdeC>ulRI2Kq;GNa8 z02BKp{oRiZ3xqJCA5qZ625>QRV%R&HpAw(!reMFhMsctl_SSob9iF;SXkj0x#3p{L zNA6BBR`uUF)K*v1mZg6|Oap~7`zw8|f)&9tk4`xt0X~Mu*;MqV{mvu4{_eT0Xjdv> z_FpK}e}Fn=uJZso^A98SEv`lwAJ3*eT3Tjvs~XMJDa2Aij*rXz&foB& zl#n}I<6Y606eS)(w7hRpfr$+6vSps%?>1*H^Cs$;VoR_3WS$uh)cxZy7Y%jct0ni> zFeRxPH22h@+?Q)8eQfDW-nW#( zV;lci*@RNFx>loTUz~M!){t+8)o~hV;fT zCaDfc6}H@`vOOG%kT^ZUn=CUr32RGW-_|`CYvlFgw3zm zD?Aq2xD$!w5RT4r)x8Mwv65Q>|H|syoJ1_G*;92s2xr zt7Q_P8z8*HnwA0Wok%6YE=|t2`78w^VL^=$sO8g}MasuWj-W6|+G(2xUHHm8^JWe2 z9VPEmo;+qVR!u?A%J^MZ*tg9n%rEJboU=zEG%30_rA>cb=kh<+b>_Kqip`hdIw&E$ zc9O$jp+UxAk-n#-;8`OTF&nF}&mWk$S#1(myY}sD$AW-0TT{ii-gGHk^Uh{Fwaotr z?`kA^((rqCoU5t0!>Uqk-`?BpcS#FPQEbNVYI|GV7Xr?FGpTBVI_JTb$wQO47GeTy z+uR^@J~;Nns#WWv7OAa~Cjzzn4fDF=gU&hXg#Kli{0SitwG)Smq~#z972*sm6PJ-} zEe&;i6rVDFBq(t$XkC)duHj=Z3KD5>-O32=5lL?r9uuXYyy;K-wfVRX*9qvc-ua}* zT%n?Tm?C-;vTf~d9^@S#D$72?Qz&XPF)!ALAq0t2$+iJ~L%6rlw0Ddzja5X1X(T?r zOx^ZPAw{}T!QlhE0K422-s3Q3$XSoQ}H#C18B4PlF)-@GrEmY&m zc&QAV*Xpbic>%saBmOfFNpNOK?DF0HF2&5HtORfd=@Y+^1?(qwPN;9ES4l6d#D~f3}R%EJWwQM4s}%f zZE>69*|d63Y!%)_HBsr9AwK!4Pqf6c`lN%Llt#Gj7vz;BGc&OJ7z(iQ&fK@o(PpfE zT&v}8=!7*EHoKhvVsypiFy>;IG{;!IHEY#lwf2(Q$K9WKqR+~zluXI7zs=qZ^eXxs zaCukkUWcJ}ISsAYUWyG&-}hJ*g!=NHau5~iEqCWj8;Nty@p@;+muP|t5HmXFTE$!0 zx3@NeM;3a@xgT_gf2XCJhA|5&Nr$!Bo$%p~BnZR0kWyP)SKP4#msy|vgSt?MfStJ7 zcgx@O=0gh{8KzC+&bnx9E_5)p8)_5KYcd*))Wz!8!8gQIc;1vxK&mLFV{}VE_lg!* zl9fU1SNBvMLaTWI*0}5KjHwR9e>di5T=T9SF(8b8=JFJNCXzDQBj zR=z>SgkHK{no!Llf7RM}2V3U8Sa@1cNz5;ifp9g<7Y}N8(A%g3n;O_A4DgE1<=t)C zblj;RwX3}uayZ(7LgW>HW-h-qJaL=M<1SUD?XcTa8@6|;cc-=c?F&;4Xg+VcxOYY> zsoh+e>lfCY)u;6JkkDT4GZ$5BS399$oEt_qf?*`#XRpXdUtq>k;4LMXcD+Q$d`|ncuNr z$c=%FT6nE#MvpU|)(ryNjbfn0M@U;vj@jpWZy3o79yuT3h1o6o^oPYY_DM_G{#b3M zx40V|8pxs-8|17}Cclw*W3o~aek-u*PA}{vb(X$WdOJt2JBl*u>cZbHpB>bchU=}l*GgQnffyESEL&pyZ{*l)z_zJvTi7F-&k&WaMuIlQ4- z!DjI|VXfBq72>PzK>Odn?=i{73!~CkzbRRTrwADH7Z+W<_BqkO7(+jd_qN}p%ry^oGE*wAkr#%_4G<|t?=O^QO8R*d~ z6#?@wx3<#}0b=1)dT>=Ru6L7F&As1D-HOV*bb$3*1%Hx-VA1!7Tt^!*P0Is75e+bp z2gY&i0smV z>zUS)>^GAur&lPpM|oScM(?q`Z6AmkU9L}B!M!^>{S%W6Gyz=E;@Gl#0Z+8=%Gn;0 zu&IQ%<4poD7%!SE836!gAi8q(z6lrN81(2MO~#tpAtgh#uPa-c-?h}29M@J%$xqZ4 z9swx`=|Ok;b~xiugjDhXoiyQ=>W|}Q=K+^Aq$c-n6r`c~ta0$U?YC-I=A#(R%_IUs z{_23vXGSCsc^zNV7&`su?h?3PkDi5F+YpEvEgf9CCo`5HYPOcK`+I8^a&$Z6UqDh@ z(6ia%D1a#L_AWC$9|I|qC=`1$%E}B#!dl1ZC8k~ywsQp7CA5SZ#6*=IFKyRORrOc{ zhWgRgO5=J1{1V{j!=1|j4V0awrs@amK!o%)p!xUFb^BQ>WA7TsrR8&X&U@F07T@9G96i>KO+R_w_Djp*b$`n}e4Hd&)#eDx(?EYBU7!B=$$U`0U@*4;# zREbvnSCAYYoV~SepLfEl81tB(ovV)ds+)UgW#Yoa!Mo8H>f514y#tzF6_5v{uL#on zIvpCP&3CK-NF(E5@61SU0My&?cTz7bb+1WKH{|o^jknj7XdLcQfs+$=Ew&|T=@Jcw z>o)t%?VBxP_k7BX`>R7k8*2BFtn2CX5?`|qe*Q%{btGXz7Dj^&6{Q`1g!C;jQ!}7I zqMEsE?Q=#|@3O+{^HKd5y~pq)!BA=2{edr?qqKsqnZ-Ud_bMlT9!C3M_sP%PdNX$O z+iNqdG(%|qVO&-+Pmhwnf7LKLE0rmcKyhmf8rD5=d$UP`z(Svq~>Aguf=)O;7 z#zUMjrMm)!kpL%~4RUt=ZVBqGp2#^##FtZl_$_$ zN7$IArw@DfcZ=I`$xYuI?*p2HRBVpzWIZ_4Qc!&VAej1<4p$h@l^Hx?7~*KD;J?&c zK5eKt4Ex65Hgla~+0VR*HH?wOceti{2!RUIoGqzOsN zqoL)1Bij=r6r3aQD*=$EB_(MpXq;-b&~o`3Ba9w0@_nT>%%nT+lH1C-364FfcL&sO zANT@d-6=mk)`$Hq0~Rr1vhN%k!hlKE0N}g69H&J&$9a*YX@5mcqpk|+WNQ0@e5+N7 zk04!QqW+8!fE+tA@sNp?Zqs(d$#iQG)Q#fd8>VxQ?s%4)%fd|moNPL)_~)jk_vEFL zYSm@*@lh*Z8w*^g86p{UxS$wU>iHU>*36oR*#& z#r((Gybj^rm@Xns0s^?xBWRR@$&UPVLo+M_@zapq9=ZO9dt`eZTRgS7>t0}G$c`6q z&2+{rspZhpft`9OL*5rM#75@|vLtDgdY0rWbd*Ds_;OIGZCH0Ci2kNzd}3jiDUg|L z8WAcs-#Di0uxi7>di>J_`xhdX|GAO930!EeJd- zdCBX-+XZ#Cbj{189WB6O{trQW{3MMl$N~Ukf|hrLM=ZGevF=WO!>>Hc3e9BvG(E9` zoVvD$9ei+%BK3p)Ojsz6lQBpMxs9!;-eN7XixG;2o>{&1;Q z&l*E*?O|3*8`vDQLJ!3ai3;Xcr-qBOv9cUUTnLM3tfW`UJ3&3bfX&4li?b@L;BDtr z@^&TWMNg$bw`$iOTA8gg5XNw>Rk*U@D8YZ63FDt+g&%s0@O?ci;(WQnzh$;IWqAA< zkOv*rlQql`ao$3q?&z6%<&9Q_V81ROhbmefEo<3WLh&zXsy}^&%W-C%p8nK|{fS3E72ybert;?q4;`XvKp;DCB?bI+cIv8N8@gld znj)&SqkwMsOUnSUCr)Vv#CG}}6`*;g#dEIk+D|D%8;6BTreGqq2W8$pX4?uhW=z!M zC#k$`#>$2<@lm})Vk+pw&^CNbeL8p%UOSU`AI!R>)TVVrgaCEol!^xmlY7CJfCF2% zqV&Oz3e|$OigH0ectvwA608mf+hqze^?7R*Xch3w9@*%nDex0UZD+#DWYRz~ZpQcr z3N!;_TKV~BDhNM|E^9)f|DanMw`4IfWwwGTH93-UpO4_>kUEO~*;?q=?krtUxZ5b3=ZX3bO!z~|o&Al(3W52(K+KK94-F;eJi|vCi0XNvn z+aHo;|2VFWkjUe5D0{J)XH(Imj0gRjdtU59ow2ITcb|f0LQiG-Q|_-b9<^Pejy1Toe ztb#7|ecXC?;fJRl_reT657ziHXk@hOdhaQ?ou~d*w+*iscTLElT6w5X<5p$-;qJAg zztO&D7(uSyuCqP@olDR?{Wbc&lQJ$W8}`o|@XuE|F#OARC=$gJxp@xx=wEG;V(gIe zoX&}JkA%*4nau;1HTBvVIQ`VO`&(hbk?-`M4>Z3;NINzczgP$FWZnB31T$_O35^t+w$5x8G+G(~7HpCDyG$Zs0e*1(`ts zWumC2O^uBPGNDy5@JszIyRUW4q-m^c^@#C$BaY(sr_j?E2lxHp1h>zG*TD9c3c|L} z$gR%w$q|}(r@HrVdG7!2ii~BMaBrowCwQ`3`F+CrU|l<^7d+AT*;&(0jP^?7R!dtC z_IRg6$=uW%cKV)F9;>kD;QQsg+VLf0(rRTt+iGqE{3H`j^oU{?JHsRJEBJ!pP4Z9n)=RlL*R|8|eKL!jwHPZ{chKQg3uPMZ~W7lRSd9ex&fU(~40_wQ#y|V?zDN>_Tq5He<|E zE*vsNlIW+QfrKwH&}2n(zW8ruC*LEwcbeJRtPgL2zlpo(|NSy=0ANeiV&9FlRe2|?z`G-G$ErP_m!sV zwHgmxLUZ0>OsaRM;8TAU=kw3BZF3}CaPUSef1J{JTtHu|s9T{#l{Al1_S0ez#|Nq3 zk=y|ei=^S3AE;wMlO;Qm_X8Pl@eo=KWG6p$DpDbLobsI_jLqLG%QvDa>|LEhuiu|XIA5EOzS;hV5H!VZb8I{hDzA2do z>nzNRQ;YREWe0O&DcL$`y!XQ`z6U*DUSI4m{B$G3FWd7>^#O(4nC?3*^PmT<#>1tl zm$f$TjB0LIOMkd!kiEOMGksR!&*L@=(#7kV&*!>g32EzDoW9fk1*x*$MX-gB_LJ|g zJ#1GSn%7DTXJT)_y0=b2$*Yy=*t(80a&;6Eu2sAN)=D9Ik|sTM;*7bSCL){=MACV9 zEP1`LAG-1RSUSOUnX=85H&*_*wKllf;eis*Zxmr`EAvqPK@Y2m#k&CV8Z0Al^ZV&HiNrkLU&%3l3iEly!fEeMv~r$BZgZ4c?NL#0@@$3BdiE{0hzTJuH z*-w?5F$(0>i>DE&wW!fzhp}C!9YNB3lWFBM)?e=o%|^G#6QfHB-I-2_^*jDIKr&HW zlCAe~y&w)19CYc=vUbMqnlU$YXV_pir(QswQtr^RMVyS6*wNTa{T_dVJz=T`s^AKZ zqG*mxjctABDV`rz?&RV*dLZ2sjNqxB4CSiFD?sFYO3Y@WM8O>YRAXWjK6xmJ$|++d z?|~i}nO-bbOx$9s*u1q$RIK?nbO-(rO?H&qBN2GW3g~(8TEzCI7U~nm_3(RzIl3JC z?mR1{X7EgW&23}##-oEogp13T^+QCU#ytWk zY%REteOGfP-|(_wl!CNbjaH=y5$4>w+qF}hw24zJ2zV!v?R{Yar#JFVym3lB3VJ<< z-10%cE8coLMZc>6-s~W!!F4!6>Rc8c>v{eDdMf^R4X{XLeU8d%de|u8Td&qgAUCn0q@)w@f-HFCBPk-Y{ z>VmC#dHpqnWFLVV_ian?8q{4&Q5+HW6e?>!uq=nKjJKj|?i&g9uln2&A&%87W+Yvm zcgt+CE2lVGoA<%kix2rpO~d{tXuh2RzBk^mZ2Hqv_~PewFhijL8MB$emG_JqXp*Ka zP8`7-m#>twHb5N1AlfjhmMdh3#%xq&_zfYmzcJ+dOoIVBtUlTFlFf zf`Z5Atd{Dw_(W%v1~WK_-s-#O;i&bDMj~Z3C>mR^7j)n+g6@{@52~3q@OP^1=kY|( z6%MyhwdsI-_I!Yd;ITiOW!*Of+5dQpP4E|jg0$$vMu*E%dkPsMnO8^XReE$L7~m0N z_pZV74@6_EoUKiwU*vT^MBh$mi4{}Zb$z>#X`TIa=>=YV71o)!rIL9b89CrQ=Ky=S z=G+`?k*;A_m6rCUMYMrX1tmhaDH`Qi6v1`mum0>s!$F1U7E$eONwWkDIv@mJmC9=Xry6Ep*cAw8Sjzs3`a||%(rWDnP%Wz!>1bQ{)Uf4 z=@kzzPggJ|fBYPop4QN$8D?o4WWAPp#3pnc4@R3A9co%d!;UF*U4pvOD^L2q4vNif zig!?OR`1S~aQY4ZvaTqi9)9mN+w~lOH(c?UZ#CgO$-m~g=`w60Fhu;J(w#irN^Qp&;g93W9EXiT_ilCW-fN?}j(w0$6 zln~GOi-KuIjrntjO-Z(y0_fX%7M3a^-HAjPzVZ6}Qwi&R{Sp`BH-X2fg8hJFvpDyDRez`y;(!jcRXytLtIO?xR752czQ9627T1y>wZr^@ezldk4rK|FG{{R ziF(*m#RnMv2o5*?0Jmjo%n~;pxYavrR?#EYDqr4rm*BYaWF3g*n~C@tctS!LNhFAFiwxKQ100DX}>fMbD+dJR3nP#G7OETn?E6{SS@4 zQcVU2D;S~K_rsflh=-EfF#8ODfH!0l@w9@iRoe)1Nq)sfeB;@lIblL8E3Ta zZ}!aQ0L@6%!ndzNqFW9RK>77gFFW^~8ba&g!Of9N#I0(}JVtJ-ZSvQhy-^rzR2G}$ z-C`4xYwZ-9!eFA69g~dXrff?8h%S=Ar=v*m0&;&S^T6waV4oCSCJ=+5&?=8$&r~e+ zp{;^`zwSYBvEls~x--Kx2R+sYw?Q-9W$*PQS>xQXTSbADiGtSqKiQ|$Z=|yr(jn}1 z69fkakIPY4vx6IlTb<2^H$W=(18}QuKuX2yv6-+JuyblK=K@FJMEuJ80Jur91>$qC zm@C(ViQ9HERXT6rHQj-NiFnefdg2^C#BW)m92Z2UUwk-u@~q=>3vf+P zUD!{)RXGlUn6AwEHK2k|f))Pz%SD@_TdRxMHNk6p@SNTAc@d_P8P;Ct`^tpltPcXV z@v}8pH#y(-Q#V+UAs?VHpAX&RPQ}P}Ly!5u%opB^@D%5DYe zK8%8jKk2-v!&gFR&*i@nD){k8RC2J$#vG?MF9*-mlAe;_-$JbV9_%hgqTbqXIfDgx#-MYg`21K* zX|lw8$l8ajh*b%1?y25NCv>?(qElgQHY&O$S<-2^5HxhcOK>u34(w)csrs9g+@qu6 z?sW5DONv#O$o_=!OAblZzk9@!TY!Ey92?;MKh%u9Q3+{{~D!|^SipSo+ja|P6HP2 zsq(bW_yJidkw=y-u~k;;)FtZ3m|4p_wM|?yWO!JqQgA`Ii)ykMlD;(SNcJSw2xDvA zaW{>E8NGDt^`fhs#zskcyV|%ehT94H$Uqc5{a<767*UPu4PaY}t?!4gdx9;Xdb7kV zNz9r%zX3G{ON7W=Wu|YK^cvEl&tVPdmcw3A=CVn^(1f^9Rpfw#VV15<*`sS=8JG=E z!NRMOub$m6hG*Iu&OV9ECWBW5R8SA9?g@Q(oNVcHig8^%QA%)f1zOs$T*QkTNX$GRoec7`E3iAq~O z)}ni%xPrYtr?i+MH-BTHZfpAQg)JJ3nitk?%vaOheiQv1G`kZ6n<|Z#a`;JBeizzl z_R--n#NG5v^3fT>DA@VeACAjYVj~9%y)#Fm}t%;7B9$2pr0e|Pa%C*uDY=M@HXnMbjSFviHRF1yB_e%Q?4vYjUu?o6E$z%c4J ziYR=dE;nM09L~1*$l_-yW_`Ga3A>$Ium&cejk((s&XY)9Zy~Qgep3G|lS4sP|C6g% z4c7&Lk;$FfEJ+&vevj+CS08>~?{RecddJ|w1aPU~U(wK9ruwp;eEaiz4rG8#PCBVK zh?`hfk4Jp{C~hBt7|ce0@lmMG-2ZjMMo=wW%KDLkoqhe*ax90msOJ2z-23_ooSMt9 zPjy5qD*xD-J6V;RDsAV3x@np12|^CrfrEy}YH>j@<@+&$9)>54GM1V-OdZ^bo}en3 zafeaEwCu66u1vljgRoJSO2KRD!wv*Va5i_JOB9~hbbh=@G8sp4>d60A`S8N6{naU( zBB^-|Irml(+(_8+ZI^*&x;yzgncNA4;RmJiV#&zZZ?<-BUb*e&9q&$6BWv!pzf$`+6#uMmUzTB+awgZzei9q^Z_?wB)H=~^a-in;JcFNcXXZx23arWtrZ zbu~n|)Xyi`2zIeKJuLT}F&g}Ap(VFgWdL^cX&8aoAP;)DzXdoUU;aLCQ&d(^4-p$; z@p(hfufk-7?$>^CNFhZc)~WV8N7_3noY5WQD~~*HSpmg0n^SGakpX{o7%e@0X3Zb?hwZ9(P4x^2MZXe z5FcBP309@Fn%|FJey#aos-3eXg>|`@{{k8}#9+7L?K9k>m!levm`D(Ey%{EgW6gQI z>r~b}dw**wnehvMVyu+zUEGZ|g`o|~M8BC6m&Ic>!R;L(pe3vqgD@>xMfQmM8Aeex z9X>Sv8*L{#3>|B>qTsP+x>MY=*lq zzF^=-0Y|4HzUJM=(TVSLn!GzvS{06aC|ZA2a~}ApnDcYgVnK7$?DN0$1pUTUcO?9g zcXg|7nlg|4QOLf6m1q4m#A16xCxervP?CM_qkYC@g%mp>*`oQfjx$UKy8HKaGz4Ra}TF>e$nwSO3vVENkqF>N>+>leD4)v-7&(G z=5MIN6&XI{GVNVdi z&)`(G%`zD`e9{8>A`epHI-O8-3@tUmFy$mTVycu0Ka$zMs{l;$Z=yFsmP;-#d$T=8 zRTp{NH-QVWF>3BEa;aQzhtJW7uRk}9=7X(-MQg#{dlUfu0;P+09`yNKXjrxW7v#gm zt_*iaI&m7B>LZcymho6kWMcru=}+k$!9s^S(%6fp^p#GmnhkS6GM_Z4FLZM)sF99o z$!sYopqszd#d)!GeoS7^W`d|tYgjjApC|CVSZ|Uou$tL}xl!x6&NdFRe5*UfPiN$z1Et{ zNQ?gJDw!QL%Vd3Wa9J7k75G{LXI`FQ%}0qVQ)s>qgDl`TvEqE8DoKx#gwfGt44kEebqKQ(l& zN1wKz!5t6Z-+10io+8L{RKd3K=l8`xfv};CA^jKSFotkve|21!P&Mk;0hu6%pP#iR zFy(mQ3>K+D0`}7RKkcQ0s{6nRsyo<%BG#uai07=zlrWa#IaAfUGGfkW#yZBHr@jci z04{d$I&^h@iG$d+Qn!?iNBGl1vax*8Xtw%7dTebkH_V~{?x!qog1N7OlU#h? zHA|*Ig?y+gXtq7*;cDA8yVaqf;5e8&Q(jo*kHn;TkD8{hfax#> z(=oOtT`ElZ&#u^ZALbci zrcqKw+@QqkC|p)i@?}wIS7Q>mTbpz4oC+IOrl&%snTEv`^v3#4{UK*0yx<^=}%!>t|G6)Z}@?{YXBmKQeC|=uwG8KI?#;F|z ztZq-XOb@mUD4<)^-;|mBs#o>?yC+^FvUe*+cDe9oVvY>Q&_In3%UuJw*!;`bF;cc3 zF+)`q&tDbk?EmJ6n2C5=V|SPSVjk|RZP!UKbRoo?&6nM`1;xH3vUeug3$0yocbpDx zPM#KD5;r=~uRj3dQf%*IwV=tHWu@)uJ@TDB&BT#n)35rM4gC#D0v8oAn8vY96!Gcg z@X4Pf?-Y{gs9aOZQ@0gh%QG9uOib5VZR=Hci?0Qzv+Es+Fby5tz2=Rf;WfPE!6W32 z%-GJphmFy1d1+|I+r5s_45&y9CrjIY`8%6=Sx>MDL?3Qgc)q58#(bDzS7U);+DcF*n>$_r96frGo zDQhtXOE^=<7L#p15uzy?GXbAWhg&bzn}4@Au0Z9GYX>RR<&-7kRbatmr2SE-Xt2V` zS3$gJVqT*si9W~QBKo#RTGjkFjc&x00Q*Cc?#U@6Az5i1()vGAB1_a)jBzc ze82YgKr4@Yj7FD-nydEjYvB3^w}1ZtMqlc`gTVjCAn<<&`v1%Fq2^ou`^wV#U+TyH zw+`w5y}16rEH1hur|?~IKjOH37(D;!Y%@(-UoO1U{lP38_^F+>DVo!qJJ zE%grbMS?3N5Q}{*17fiqYD`9rFGhn$L74Zl!i9dRt9UZ-9RTNYdgy$1l&Ii}S%I@> zEH^aNx#*zgJhNj%5*c^SvT{Xhw;E&s)Ptk@c&q<&Wrf^bikNC^qijtTeyk`T_@PC| z?9HbA=At+Bc&BG0dDa=2A-}X`mvSR4=8hJ{_|c3DOafozdXl-D$C2Ytv~l%QMaSkO2t+mR{w1<~>KmdG$PvzmZi%~7I7D71SxZ}tVDc#@E zqxz!%{V4wmveR7EMHyO`2aFh*)X&8#Dm+j_=$S`v-1hjX;tR~o6OMYvS|FRyorMPA zcx^ONFU~qffAZT{x!n}iIg&e9_|lSD=LK-mGz{q|LLXpKAW#>(e6iI4q0*Jb^ z2K8U-C8*KZ)mWN`|uFdlrIi?D|6&=Fz`VhU&^&>=a!b9@-isneoA0|9~8_Qm&L0BB5%!# zx;dyIGV~1**~Tq4bsjpp`r9IQi-S6Uajw+2UH5L)1uND+>d`};p~t^SJ$?;oB-G8t zyx|BPF1CUyncE?7+>0>V zILm6r3lBWyk8IwF6CkN=;FY2O9oGqoS|GSL&M&G)L*}I>pLaw^I917F%TUws&IlG! z)~mb_y&2$JdtpDr`GM&>Mp^3h+T!4;#hS zNhNZNk*x2GH`N};+8+g7ScJu$iH>{L0 zEl_IP^pg%ngnZUsgMA=gmSbR$;jaG<_cKNHM*33;DQ+_<&ZSGMyCt}KkT!H;Nx$1s zQ;SjK^}(|VoM6u(l9~T)%<;!4F0`FExrgeF@X_IX4V*(Blif z61oMW7~(SKLo#bzRUAGFUTtpdBYO^kZr2aa?C7sg5xIKd zyf1$ija*9cTD$rni_F4X-LF8t%5XyEK8HC{gXFs%z|owMBvg9KnjmA}m-bww*+5$+ zkM~hqY?^)yzNYXuNcGA*gZa5s+bjLGi{9602qfI3H+T%K&%Fq}-n}albJ4d~+ZJ_< z=xY#7+_Rv#O^_;E?n8AT2&!lXe3^AgF+}J3OuN=7h006h1G6_= zY5}M!0=9J3!zNih{(3m-LDxbx8%sr3UFmBxv1+`T)7G#Ub+7qT>W`-Uy$O<;a(dho zi7!i$-R2^o+X)#$=>_IoyYKg?7SXlr*2-*5;|q3IU&Cz+7X7-`KiFGjkr-be*zEjkg^7^Zb+9xrLuzO!^uD%?6ACQ+1EtOZj zz~9F`H=h?Bmyj%bC;z)@zBT$?)Irj<1#%v_e5-Hq`4qUTTV(z_&b% zpvOe!zB}%vg)Y3koM`RTopnblSX_D*x!(JOGPYZ_7pBH*sjDL7v;pvs^QO9p6&{>|3e4fX1DYx33GP3?Y{HBurbGA0 zW@XS5zya6=aFc_9JUgmeD0H@6!7%@c0F*ufyT0mi-m+Up+r`RR`3mf<$%}*gYHsS{ z0VfCb`X8s(I+n7c=b(EW4y4+?a@+T%y+rYDnP>Yrhu7p)JlP4_dtuF#o0L-5c^IAa zVjRbkSDBjzce%Dk&c9Y}EmAwbfK;E%+JmZ^VzdQJ; zbZ|P~YP-SwJB-)L>(zx|EfLjbdkW{|h31xop&t2Cd;h8%UIf7t zKWzSr*I{t%i5a{AJ(HbtlBH3>FreiqnKk7iI56Y%&~Gp=gjTXyvui4_#?NAT6k8lN zLZ-XUpe&bnvcH}ufY`XY%Srt((&~6+n2o=PQH(ptSG~p&w@SLRJ#bGhQf*X95d=Nc z`=cR}-D^!-eNUM)U4%3`4=to*AMiF<&Kr7RTccZo^JwYxQSt)US8B!4Tc)hUqE%9K ze78GuxYhC{q{IWI%tH{bV3DvzTzNiO=Yc2{BBs@JMMF$^g7<&-c|Ic4gM5j6x3RZ_ zk~W*azPo{Vds~1EmVe6i!3AdWT4Y|V)T8^TNRLGaeXIMesik?1s}&DB?^&%-hT~R9 zj=oz8-B#8+(~cYm+5W#!y){?L(%^O^we1EY?J#4DoN{PpH`Gc)vKvbhEhw)a{Cc2q z*tanI`bNg3)BZfoRz?r6VV1zuu-3LdELRq22p+5!S=)82t%MA6i~F<|oN7YQV^1Oj zEAP!FUtk_a57U6~s8w7I*ogZpY-wq%27hxm*T&l}e(GIR<%)P`i!_MI~UmzWqOdJ>ms+4W@X^Q(MjFoIFjIJMT{qd-o8;lQO?128u@vm zEEluA+}Ij}mStc;CV~N$4;rJ1b)!LB5=J4LIt25+oz5T(o>Z}GwmjiRN(C~-cfHj%3YTxY=y&a5 zie0{3*F(ws!9CeSp)B!4S-Y*;G>YSWlAd_kZVv1xKUZ*2IYtAyLb||J-?B$~l zztW1`Ra%;FWx$5~>BUsiHDK=JF612%>`mAQMmhv(K>PA#>ea-*;R^(i41| zP;1h-eP$maLG|!2=tK_mw2sVtLZ6jwCpE&W++T8Mc_e?jx%;BfTK`iE0$Kxh*GwHN zS^k#W+edD{rgP`EP?Dm@sxLQ4e;Ft)n-*L$dZdHSs6X(lAggFFkGO1^y^rasqnK%w zJHK5+7F`(_%IH#Wz6-Uyct++|oM@N#1=l%Xd8Lk-PcPWZEJi$qr7ru`G~31#bDnN# z;2vTv1W|7+7v(LFQRAtsoMN3`B}Q3oA!W|Gws zKb?NjV$e0dQ)9S=|54Yu$1~l(e_VB^gGvX{O;k5>K2#1l3?<~8v#`0-?QG;6Yq;~R z?vkw>%lRzCa%OBxLMhhfFwCroIn9J^m~Fq$+`q@~_xtDf`PXM2k6nAeulM`9uIKBz zB3i4`1V_w@#fca50mn1jfJhF#BU4FHuOU+LR^}&Hn;2RpFF~aAP3W^we|ey`II1Ua zk#n~W|QN}QT9Oyi;eU|cM3U*KS=Ro$-TlP-=rZ0-~K zDVcH6kAqVCo+V^WIrSkCaUdFA1SB47*ISROr&U52 zRNV!yp~&zODmZ2EZsJOigMK_nG0TE=MuRrFx`AaimN1H;4fwu#Jb5W?>WepH14nUH z>%nH%6|yi*V@qB{*kTxZ9d?IZIaLE3FmqOQ3a%UXk#k5cwm;ct8N1GLK`y|y*lJ9f ztV9j9aOHOOiKPd`_E#T)J)pJBI&HvGJ^@U0byWe0_mB6vPV&^Bw74bu8+&j4Jhz_0 zhp?hE8_0Qcuj)~5GD~4W?P3Pz!(S+2DQikcF@=07wVo9-C{Gz&H}CJVCc_lx(p6#! zzlsea#1znlDcZ|7P?zH2XZfAp z8@iT#DU#=xgVP5sJMuj;L@kMPFX4EftygmKl&7Ii_1yV!8aHiN@zg*~85ALW=yb3N z0_#s?>K_GH(qWls^D(;3Dd$_yjlLwrcukFQ!HW|?Z15y(3u>S1&BHW(ton%#!1qM3 zbNL2pStJ_n%a1d*+eI>P8qiJHx-_2Bp>9EE1Wt~OLGN;m)-|P0K50vE^)v*Do z{@%P4I8J|;!t$?w{L^unCFf$)+A&`*ahjqV-q$hKIeOApYH!nK?_(dL)^B85I^^zn za2A64X5PO-FSLXTTJdVwavy9jURRy?r)f^+XydLOoV>_JIkjpY0aHeJnTbtB1;RdeF+vAi^<>;>yvz=9!M!|XGz%?NzmfR z#e=Guj*I9pJYTCXRF5Q{i|b*TI#&h5FzcEFKDffcV#lq}1=x6GFYj>=2IsQ!Ygg|U zkO11Sx43p(=tX!AjFpYsDesug@&vk97yuU*3II#IzRC!QIyU~qXHF3W#H5p%Zjj&0 z6a>VITZ5|myP_{%q!_FlhL^PO*g;~69)kAu^{KDz9Skf7Ey%y%@T8m3aXlc^+WS{5#h1#>O|<3@X*O&ASjjEdaQ!!1L~&F9J|JBiKr1tH=j+uv+%y@hnqGBQ71a4ceJE-%}6zT7d! zjbR(s7py1f&m9v2j3FJVs7@;uVebax}|-~z|D42kRq##b_rTpi9n^W z^4>)q3G}p{^{t}+Dxo@|BPpXi^ajyd1mPg9fa7x+DRbVFF4%n*9Koa%ip1#RgbW3tdNb17ZTSHSUYZv(5x75Y zas3v@54igND20Mh54R{^b5Sr`@>tHboLyacnc!1(z=wvHAhGU)j$M-6B3EUg*x)IK&02}UQ#sL<5 z*hxhVD)1m!w~Dn7#uaRJGt9SX#A$v4#kW-<)VMe}tmi7>u2Cp;qH~l8*7qxA{2u#! zf;O}~Gavhuxb>q@Wh8l+7|f;ifc=--NAu;YBtmky0fqK3#CSoSzMC2+pP0uekRB+` z!=*HkwACyz9`x9oFc9JyZ!o}eLc(!@XV)*vNzljcFig*K62kl2)y*90%i$OdjDT=JK+9A2Dt+Nj`jxPr0UIGYWhgvjN|JUe8_Ik;J64@2kv5~ z$MU7(VEf77>8yFKE#SveK7t^N@Ujk4!MwoMYJ}iuNtUjvlDlI3R|%1etAZ?z4y&WQ zoWbBtShxiu7?=vhj&Q5(fOOQYu6LN2n`83{t6CfE&kBWkulmBbfWr?&U^mBs zZAT_~@i(v7U~W7qV4Qm$^M0;b%s`#_I;Z3YEe9i>aWeM##r!Ll&*0Sg$l1J8<-E~? z;QoK5V-k%5u@^vQwS!|R&GW1CX@17yQo{O{_3YYFy^I*o>9Jtknzp`kbQR5-Aoe>@ zl(6p#j|Z|(XqCFQKfF@>*{u*`Ac8(G?LYVRbv}t!wp4MW(smV>m__k6xM-u~u#BGX zv8~nYv57p+-myK@og`tfQo<;Omvu5ZU&9IutIN*noC8V=4d=mDj46zDLGYB&6{vYR z>6s75pgL{cb_T*5RGTH*5Ai>MySr5iK!6f zt$afCd;2Tm7@(CJHME2NFe)clLl8|AnMH{y?-5c{tRww-mh41e%{5?d!5K9^nEvIN zMR{@Bhnr#JWoDq>5DgO`U{}Np8uM?Bs1MpbK0& zZOe=^zOvM|^VYeZK|6b1BfENbGcwkH;h3}&3sVA$p#RF!BAT2u z*qh=_ETHz(fgE&@*3wcew#Eu(s52YnHiJbb*hhzQNI-(cp|%r=RaZyS*Ig&6!TfQ? z_g4Izzgl+0pe-lXk}V$IKTgf!b~dGqb=Ejf8U>bFt$&*m>0!FJ#+65^9p&iGhFD)2 zVw&__o19+1ZaoPITpCD0;ABJCx#O|YyWXX#LR_V+}laHFi~Cff;bZO@P($*NB5^B7LI z1ZAlMdX1tdMe(;{c69cGcmk=ys>{H&xIbZum2~p2FvfdKR5?U!h2ZyS%BWO>Idje;zsGY z(>tSqS`u)45P|V1fxmokPd~No#NG_{w(al|MsR)j)X@}$g`tJzQa940%j0w1lIxI! zo*^SemSMzl^EALo1MC92u$iqm5QB2FCwV8r?@`aTx`*RA@{7BiTSV6zNCEEgQb9Ns z&OS;Mtkxu*7b&L!4)(KY1v@J%05#B50^&2V4y?xEA+KfXR$u2SThT8f-4H8QeVfcp z0{s@Mw+;HVA#uj|O9SZ3bbL+E(>Z88{T;*m*}%SXHI7jrHdd)$gr5)}_LOBrDxu$+ zg2fn2uQfWx{O933dmS0Ppw7Y6v~!uqK`T}c$lGKorxf#ycX37XRm>vE=I8z za)k@47|=lF1=mnw0})3#0Crz6hgmjaCZcAv^&_5S&4-!iwVl|rOdanl-HEM7wNksn z3R&gESydjWi7``v^qgKfH9dcSl@W$)*XpNGd&=U@!Bkz9Lks_bGvZYXK&@Xsch752 z6_BJ!9Ev);YQ)pAOCa~PL(^6 zF9HlplVN%^%bL6Ehjs=bioOjF-(?9{K~luORNpdyOgg>lXCCYXijmeb`@UqUi8wKf z>Ay&4<9@!h0G28q#0D5iI4*uebp!b$Ir^2|o*P1RlgKpql?f#qc zrGy&`l2>$=!6*l(9p?8mYxP6CY{mnc0ibIE+b?T7Q61LP`~#=%!M#(lzX06E#rLm$ z^ppV|vg(=r?;Y_xt@XsdRK+4gn=2(r*e`(8zRXrQ(Nz+-9E41LU*MHj(1I(E_?^_IY&vvMpJbEKY|{%7d-Evgy9DCq@#zA zdpe-lq?oNctdX|UqyRxtQ8P2W;)0%}`z@=e`%F1W>WN8{;b{mFr;Xt>j2P*KB6QpQzvO2Nb4SZ zyhaj}8$pkbvk9?ItA9&hHXhI%0meUKD(i0FKC*e0(YanNzF9_<;d?5fA&c=~yHYQ& zB9jeWeanf*TdMVYZp>;q>gFUUsNA*#(pam~4V7V3ETBu068et%h4x;(_DG-i`zZD* zrEg*T)W0re5P(aE;&2u48TQkaAB|#fb_MZoKmxY6v;_1PhaR#pWEyh zB-gkFh`ab#ZAJgZ-&N9YXO6d3B+5QZAX#WwFGYlO(&RSJ|$hYQ`rNJkGl z0QOsf06Lo=9+xq_3H+cuTyxgN_e((WQDqZCc-%ODmQUqOu|GIp28+PEkf^!6>4tW2I#BURPb| z%=tP(A1lOmC?#t64e%u^-DR^|yr~48d*$Uan=KY~!fbD7&VS0tBe z2VP~g{YY)jw@j5M^xuRQwz#DV1)U+4^j>yR4CcVU{|!~NCzslGonT=`j#mq7?|0v1 zR6yHV6T!(*vGW*cd91d@iCd*Hu45)Ie$Dg&BT843xV)gBpZ?Ico9L|wz%Rc|u`{@3 z)AYlPmW)W0uHkt(As>JZqNE9W#p_)Kg}dIhxm?(X(;j3p8nfW>CrTv#tB$%GU|WgV z`nup4&ajjHQPSbS8;3ZaMZs_CPE2GQp>|;A`#vV{In*ari|O5a^$kQT)Prh4=nfO}~piRR9pr}F(&lJfo50V?HD?nye-CcrQs)ktm zq+2r1b~Sx-H0K+vh7F6QfnCfNfi%qQJvEZa6a|tWb8x8%;}kq#r+r@50&&D?P@qKx z)C3EKBkdvH_kTRn%%sy*%~L<>1YCS72`M3oNb~hYciJlomW)LI5$7td){e7~d?w1GtUl*05uD~WDgM*4hx8*bOC zP3i(4IWv4U3P29pm7FSm!z_e6f}C{ehX7x+E3a?yQJ%S1w>|OfdeCHwj(e8mVRv%z zovak?sKt(PdJATB#KigSlk~@yF0wgmih%4H=2l9uNj=j-^dtk*E+G@UC$=&wHb|a` z_<9^F_b!eNXfd?_b#W5n?Z2mbsQRj3+0uJ~)VS_+<no#2tGX2E zOx)QC#C&v%v+Rl>byE>!@%PCDtCwwNcsFYh{$)%F_5S|6dG?gkH>a$@xYV;n4u)bG zrLZJddR%R<9}MXfi08K@C7FtXv}`V@8TdTkE2Iib9V!7Vs*m);7f553*)oT<8u#qo zh7&PKl{7J}v%1#>U4n&*m>ZjKOkW8DJz<$^4n5vYeJIJ1k)aOa;pn4us1y>w$Tl=b zGL@C-onLz(Qss2F(wBUg_G--~wYrlNu8*sW2NHCd_t#Pn1zlYy7Zo@LO|lrM2f&U2 zTj_i-8)vr~XG`i#vYxplht!*2=EQ3|sxV426slh_GuIBkxmlgzR)M3dF$n|L5^`C;ueRee#A#z^^ptg%MEF+CvtU%m&tCdQ5}D1@aBZjMOYRrcR^lK?pr|Tn5CmqyqBsb>xmORlfp}Bx)eOmn5+Xy2 z0s%|KP@+l0B&}TI?K+AyMOJx$-O$$wnu;>AEii0f@1O%JIf}k`DanBV3*u+NYE`n0 z$T~l>CUArYHO^6m6Twk>{q3RV3ul`59FVNNpnj$sQu(DF;&T0HykzQwURx#72G^V&5Iakkn*8U$KMxSOa9PYC#G3i<&?G}(eF?TCxW*J zw~Z3K}w#7dci>jD>CmZDafm%4hF@8F=Z*g60(?`j~!s+|) zHhY)J_;yu5nB}-SzA$P2=&}t=03LDt4= z|GlW(8<+v-ODLMw*9K7;#VmG9S5poUlO{+Jbqe-SUW&;jEjE(o%Yt9~qXI2SwqR-t zmB(+5Wg9w#gr7~yCYv_NM>)5+1z88ETHbsUqGTKLR(1Fc_V4{5%u^!a#fxu*+vD!; zw>Q`Jbn@m_ejJO;9={zUF2RaRJN1O}dOS`q(l2k_n6?A9RcH(G^)1g)M<5W@Yq$)! zl2hH`7mHVJ7X5Zx?qHb`5L@5>1r&g^nGP2yZe|ZwUg8Ulal;WH+Lfh_(EYyNMlfh|!}}5^GH9P+>=hSZ zH$7BMO^wUc*XY%spVO_ogxgy#ce-5})4(^x_0|@0HaylBM^tElLKINS0-(pFxu>UR z+$ryJE|>eGl3Rr`X0cf6JiCdpu}vm(iL}_E^O^><0m{yTT0eR6q+8zN3pOjRF*QA4 z#gvo*SBr^>Q8&UTG)pIM>u=kD@$Ao^KlfZ7kA2k~Z)D568?)%U(RLTCAyTKT>ok}d zhc0W0cmuJshb3+U%vd=UYE4p-B4_Ct8}pm7Scr6ESqA^L@oXR$87n4*n3(F>x`z6ChiEY9i@4{4cwAy4Zo5|fviBS+b}R3~!DqT*Q&FJ{Tb%a0 z)fx0x!wU-w_kuLe7C6KXK8^XzJ`V+=*(6 z!sVNkmPO$}R?2AW@fer>s}vfIrVDuq3hlVe+SH<~w_0Y;xP6YTdXN|8o*LrcqU#&^ z*EvTAqcBPi&4|B3=~yhF_O;04Ta@q;@PfX)+%&eel~L$ivGWmHhapYwzARp~@ss`6 zsjjZBp61L3+3u1kxNyH7z8xQ(zhuy5q2Y}^n*b1y@4*clDnPs7dm)XtwAC;O0*4EbDRbAhI{`_mY-)ZG>ydJpsK%s(Cw{gObyRCf# zTL2d<16Yr->A;;UwD$IaLqax!oe4aej3Yo6_}@q5y8T|ENe|`F4?zJ*K|T zTI>DRTHikQ&pi))9%9b>9%I~Bp4WLAswgjo`GDjB1OmZ)B`vNDfuP=lKoBj_5W!Cj z624=A|J}Ef)^dbE9tXq!B1Eqqnt&hTI!S0cso0u2xf(c_KvZnZ?F<~9Omqnc=OBXxx+6Z*?~5pOvx-W*Pl9K54QHN7{|x;m9a!x%8OkKevWZqAZWxPm z!IgY*ZEu%07|ojOxSnyfeCl-){ky-vE_c2^on!w>XkR5(CL3($&&y3$1op2lA+1>V z#1Q^|bB@@i_^%%~i?BZW`=uWX4hHhyFBj4O|M^x?Mfh&3!^Ekyn2L*wU8=hb%Zdmq ztE%o(PS4C(TP896*Ia3KNo%f-Cr;Q-Ql4;4Xb<9``5Y4~Re zZWw(l2(3~p%#tJ=V%W|$xZ>3Ilv`On^#z+-H%|N8_`T1PH^~%~`QhuOd&2PutY-|v zS!k3q5Dl63BxzQxoX+^{)1jLVni9{~Hzus&tnhb6Qxa{VlQT4%zG^twQex6>_VF-?B$QsoQI=-GG%fd|z z(GoGCtpJs{*l!<)43Z*2Jz+7WcUQO&`kHr~TMhf@W-|^4Zz(%wTjQu^l3O8mxLSks zvX+*X^SQmlU>-hnkq(2H z5W=z8*>AsWLQwBLB!bw`n%PW|A}6*yh6qIH=fRS5o{2kV(Q7IRo^XXXAODroS5h?gc z86U0i^0%#aZ^N)j)*MT*O>`(?XmVgVSFts8fE3x!;R(@$)~oZ=PXwXjqHkQ|qk)NN zMvN;QV3!C$!c5eN3$qBglqDsewmQ76*HhlWYrDaFC(in)`pbC==9r^@wwnMsA!eX@ z2NJUc*~A4#`-ad(oD8=A{TpYs(ebOY2QVW6zxx=BW?ICBU&0CTtxB~L?-Eow-|v3P z#av3)dhPP+gXnJHUecqK$GpZ9_rP-m#w${nXC{lw0%`0TVON)_Dfz04DXKV(W|!6) z;3GS|ZK_&W=?biiKy5&MAN4y9hz!iFu9QVomP)a%i^e(T4{)Dz=CVk_jVrD6k=DBs zJ!)nlh&~eg1|^$9{3rB_fUYn~%@O9E%XpOsL4zq=eFgSzof(3jPS|bBd9RGIjryb9 zh-G9DAVFb7{yePy2zD=lb2`|%>XML>YHMqAVL`h4gdMY(Q1|gg`@x}?H|Za~n=IdD zj3?gm-tr75HF60_F&Vu0lXU zJ+H&JDD)ALAR;}V&=VA-9O4ZdD9LfeSh!-wg}QBbvk1f#$wJc^(SqX8pT*KyV!t;$ zV})B!2+m0``?joOv$W4aik(^_G#>H?tG-;J$%1Pop$cz*Av~1-r z+=w_Rt2v<&=Sy|yr zxI!ekPkor01E!1;grATBl z;^#VT^G%OG2|%WqhkV9a6SaHtJ`1Abw=b-j-PEKTev&0Na_Nxxu%`rpgy6ct{giNQ zdioLsVNR^iWo2bmRcgx+zeWLGLUs(JzBW%aj}2*?H>!7j21M$ZuVOFnmmfcRh;b^! zfL_V}p+U$krbki}M<~k4&0T(S-w&J3-@k@!vRs@R@BZV3LVHM=rW~^>s-n#w z|5w^Hu+a}SPf3(xEvolpi0T;Z!E008=XkBc_TA)+>zLRPx1DtiemM|8-jpW@(4m9f zee|Qa@4hhTN(E_+{Dj`7q)Ch2dZm+8sV4-_9|4^!Lz-|?lEw=3wOd7q>FZ@zx3~c= zDN<)|VkbmFAO_-y_Qe5*sW6D`@6;8rBDyG!Q3T}rTF8R$_pn(4zeXfFu(=RLeNo>YL26G?)947?kb2_r=O2DAAwjsGSYqHE zbB5O8=+J@V+@2?DQ>=~H7BXTsiI~DAjQWK}4)KcmFHWwo=OeZZ`7drB??~CsT20od zksTbhk|S}7O%!A>9`h(a^9jEK8#vjDa*+m(+HH6_-8Qdwwjf|ekW+r8K+@igN5kd>aEZaGx(~Ff{0# z$(F?4xf6meWpxi5A|w^jO8Ti4Qv3P876WkaMun`b_SD4~5NTu07mkQYIgEa}M8#cogxB7ueBqEpEOW3#o;YO2}W)6C3F9Wmci zOY8F-@$4z_F>k>Uk{#D`35$;C0EZI@q&0?>P?Svv_CfITYThROO1<-Twe?IQ@qm?u zg@vW1Y_@V;S$7TwI8E&un{*s1+h3K^4X*o%M$#08`T4J08eDd#;~FHiB)%%)F<9xJ zacPezRQBO+X8*SPB{8{LZeeXbUJK6?Wo5D13J|Xlcsi;}q@jwH-Z+07i4^kF&>@S@ z&w=B$BZyEk*p5iDa-)^T?+wP0b?m7@7CN~nW|CJ14seyu-jv}6-{q1^xO8<=g1b*^5r zvf*lv3j!FAvBdViReHi~-e)Bq|2FIQ0hwpGD1(e7x@-AMTil9bD{n|UU#0=k9G#x> zCM9cDONm9jhp;-OcvyTnIO2kr>vx;^rHdT@p|W71^IFs^7KKezKO}4$vQ>zoq~ls%=_$i zRNhtA@SL-)oVMw^l3Bs<^02VAwVylFRpALcXQUfG)}ODc zjA52-otKLduxT%+N$*abUZRSp%w%P_jWaZOp{squgrV0?;|1g4ge(JITcLEfy9|1D zwrl;#3m1d1_b*dRI)Qr$&|n zs*ly7X83Xm^b_==@wqP_-tz#>3ezeas>j%KjTcH&N7u(t7QNfLNK%S&CUTOVq{3%( z&2($lhoBHe?vWZnnflEfv{5d2P$Gr-sn%_V5Ea( z+2G}M|3b}Iy0YG9<#@M?`5 zv}@BA6zSACob61Vu{bE?_JFpz7_V>yzh50uV24* zs9Tt>vMk-8S8QChikLDW zl6*m?J|*)O4L74&??&C9F?4&L^cSQztS;vG;sCM4?A4k#Y_7)p3S`n6C~!~L$@6r6 z*Tjk9{>zVZrX}<{J3DC{W>MYwb+m*#`?GZ%9Kq7faeS8keFX)3spGJ0-8zNH>+@@a z*OQ48OvqAJilWSRZx7uBm`NU){SxAF+4AOhQ6JJrfiNT3nOU)6*_{v(G-{ zf@0yJXm@1ln>TL+DkR_j%=;N>=5usa(c;EbQD(RjQcyZqY$@%x9Uz*u?|avC!Y!j^ zI@ox7*p-PEDC&asYM(X>xV;DJV&yBBSf_!XS@JjX{4HdVeWey>>ocx!AuVLl%?anq z!k1{7>YOs$dwV5?g$&JeC^ds6xS}h2?pk{;pUFTS?O$mWKTQ6o)qvr6ON^RNm7#itWSu}Jan zVUbtwS^0l(wg)mbVr&`sFr6Gm8D*2Lz;H zb?ersj+C`gMM+;gABlZ-Utg+*zqqw>RC-h6l4Y{P_EmzkM!ny%cB!DeTHStyHsEC8 z$auI!ss{63T$$GEO>|6p%YK=`e;U>MPi+&9GWYf1{q5~-wk)Bm%Ub>m$^f__@b5Ra zudOK71;&<~^h;CYh5rm_-YnIL`ME;M{G*p09ztIXtqnw%XjQyHEAzToXJ5`@$4il> zUa4^D6nlCx!nIn-1j~1*>~nuZkg74jZzXaHd~QKw=NoFV8!cJDueyEduGLG zFQ9y;Ed0_X!*HNYF`taGJL_!oH|sld&+ts|J-a&|U*|*UZNbqIo0&@o^ya7~5NCYB zs*s5H3x*LHiK};~7qftMOP*HFnAi(OovWM4g#}@^HpN4Efs^0C6#4nd`ZKMIeEmP2 zvy{I{UQ5$UVM9hnVihStkuKdOHX5sV@$u$R@rBgv^rEJ^9q%PZTHt@`Y7bGq&F(f zN6WcCE`XDqv1vFB7QV(M?fr_gw&XVhZ~yA{Y#SOHu0WJEt?qW)$Q7#EDC?^J_B3lG zg{#nPAVtu6nsX*7$rU*ovv6(n)mL{%4r@)W%v8ylXEPwRE zMn7Z4zbav~sB!#T$8mLL97)9XB`1fr(Ee~KTli{fw5|sk6BE;l9^38H_K$n=PkgEK zPQZUOj|6|*Lpk_VUtbTh)dQ3}Y99JnX{-6`ck0xtXk`}T7W_}M{OH&S-+#rykf>I< zyL7PY{iPI6%0UWcx;#jJwm-~8EfSlk7u}}MT1a%a^ix)&_zhElW|hTh7`|)vjM-qL z&&?%jMb0v$k&+CH;|euK;%1_X%{1762!Ni46V7haKBrz(T6mMrv6sMgRy)NUi{DhG za2@$QSQ8ywuSc)td!KdwFrOo|hT{75M)+h>zAuGO=MK-9&Z6kjp<89R;Kz^P0bugX zJ_$HqB5a`sk3bMs+2cEy=6Gx|>Fq$_RUKaJ0iC?ykzRXADv=dgspk_v zse2#VY<;iF%rY)6-Xo(|WV~r~Gk-l0kZMyuHCVh7xMyx-X<0ar@WLVpQ`qiLJ8h7X zgER2FC(Bf?Vk16#i9|I{434R3DlQE)z0npv$yZ>SZoC+t&9!Md<<^rB^=CI=zK2Rh z6y};e&o_Fv7*2Ti&VSDzGNp5KL--)j;c1p~mP;#CZuAk=w>X?wcsI4)*TX<7UV908 zZksIo;4+ZqqXnyJ_Em&0hka*3*_TWcZS@(JVNZ=`Ze=z00wr0cr`QreHIKDeBw+MG ze){zDr)MpVoJ%ZQWElio|KKmkmMcxK_8t~krjUk;8C2pWyVDWyEr>f@r$(D;5X4R_c!^=&}B9JybWwX)_giWu`}w@xE;{ZDz zJ5{JyF-Gt!bU!7{En0~V0O&@XXN)kO4xj&KUf%bW5DBegt*fZj=Z-4R=EOwmG@_Q-|9EpCj%eB(aVZfl*_l6VgB(qfU>?gC$&MYM&JOtt^*%Ll z=+LL+T_p$tlmQXJIZw0T5IVN??71QzdA-}A@`X(6?sMOxLfp*riN#XhnziaNiLb?i zB@HDQw=3<2XpHf};2xJQR%1(&q*&sKnV9fqGuBgp+_js^71C@-6x4f`FB$?M<#yH| zzrQb?<*OTbSEAaX_?M4V_4WL zdOWOm(=X?_4uo^4g?Ob_YXeyc-;c%+KVUnsSg7-s>Jl=19VrdL%5_bd0T_=fr2o4C z$GWOL;v$(uVi`$MuxpMwUrw`MIqX1rLv03-n*{L+{aOLXIDLD&%M91_BVr7P?MZ1`v;pIRJ_gF!E3 zV#9i}bi5#~C>|P9gkf$Lp#fV>W@Mq_0N&z?$^y`+-FAJNW7f(ySv*S{QWxlQo$V{H z>kmE2r**ra8;JM|h1?UnVRojqzYI-cxjmF}!v3aNaureckRt<$(r7&C6u}`|JMxuK5L%G z&ET|xUk{Tzsx-~xP+#!dKM?BrdcieBpfz}PCH(`%eTbLz{%cE0@jo#4EcaQp@$Pxi zoDtsLc6-mOq!n*AN6JU-&ceT#(BLS)Y}3OHhqbP?kZ7HJQ2#8#Lrk*wfN00~Rj3Qs zi{Cnjr~pL||BIrxG6e~iKRrklTc4Qawi?GamMw6(uEY1ua#a;F{sX4}@t04enuaOs z;gs}m1B&Z|7=i|O(-_NA8)2B`HEvsSOZ`#85$~ErIs>Kvs-8nb+NyO(Q`;_7PkY;s=`0K%d@{3voOt+@H}R{CJ%9ts44p8$11ZaEuErn|eHy}Ru5CoPGK zT;ID3AmVMD4K@XkyHYR4T!mS*+Oi*kS>|o~_ib7fX5xI$CnD~#(MTeMB?5LSoyFRP zgp4no7hBOabQ5gSQf2N_V$C~G#$uADv64ec8za<|1i$rZ=AVjzA&)TP+VF2#3ZbhF zGv%t~K%8}M=9240VQRCt$>-gtCW6fWf&(prE~htrg;|e`RCWcv{Ext($l-q<2K^^0 z82rxKI)PMoOSD)ypV2XE)GT=6GO~k^0OR_Cb1J5BtGuGnR<`*VS`JY8TrdvQ%fEFr zb8X>1%Sv^^zUNoWnYfZ9HOUX5o%){aNbnBH4(n8V@?Y8Qqm8aq14+R5&mE+YV00TF zZWB3ynjM7)8#K4oKZC?L2;U+jF_D+cfQ-+ zuJrrPs)%ZMu`2zi`)v0|58zVdPYQ-nI?kE(XGgH{FMW9lqgNz|l@{MfwUDC1t-F?) zb1G$@xFk{=W~M zQ0&r;p%>!@JsFAAegXMZNEJ+JgLKBu{Cz0;_J1Rd{T0IlK53%S!=j4JNOP8p!R**pKo@egfTK*-OB_UwLhmrKcTM- zATz)G4@dyHHF&Ea2c5E?d^GON23O5W^5x);Aj7~LKwo3SG$9hsDS#~k*R7_TQE4v9)J zp4_n&DnJfWduuK~+#Q`Hrx!Y72h`!D1;S5$j+g?r@4k18l(DjVz_ZAv6`q1#Y~QT! zIf4E_7h%V$ra<9yOi_^>F&`P1v&Hg6vDg510$bMy*SUl}rpKE782oFrnY~HlrFtw2 zZpQ6FPf5AIjdGv|nwUU0t%jV297m2;eep1Kn{}?uqt?ISGte^WWx(!?>J=`QUNNX+ zY_^DK{YN4OGrUIY*=jwkes|lEPkHtj|C;9^)}ul&QLwI+?lFMcrs{6U_+qLR-a2SC zWr7L8t|2xJ1TxF!@BX`>&=8%_Gzcr&>Q~HQ2U(ocENewg|GCknmitgYjC`uLEZ}LF z>zBk$9E*Jwh0q8mRZ4NNDR8(0Z+3l*+fm#*8v8Cd^wL~^3X)NgVO{OI)^cpl^}R(K zo|urVQ>F|sAbPIdgHDO~@Dj5fEx4Ua;m%>vkx3o1$awhO82-Yy18jD&n;$#;Ez0vd zmgKjYcS>xz&mObL)|fTX*m_<@z3d5u7Cmpy5yG=&*uBn~867zs0z=#6J2}$E8!x(v zbVm6azNXRHi3#;4B>f=FT3L_mYcNtR$_*OZtG_Q7c#vN7ZTQ!Z!1C4&aGb_ujwf7` zXq5A}yS5ec@F#F={%yapTrqPnwkBeH$iuBBLQQP#n`PdYzr*c=oQfGAN=Yoz+Ly-( ziZ{>O2Jkea^z$;iX;{5YR~=h;VaYAM)+{F;o|3^1#9J4S8wG1Z<`rjTf50dp=M!LF z&~dRCtK6W_ts8ATt9Wg0YI1lKM0lJNF5@8e6;FyWpgWI-5AEVkLLIHDK;gx`!ky8R zc@B`a0U?2;YiNF+xz^eHmVxnG99asypwBt)3ch?}Q!8!F`d?#N6HDQ{>5uo5$Y&^8 zPL?T3hO;Vj2P7x}heVVl?7-DQ>-qs6@ zx*IPo7mG}~MbOff_D~*I;X<|EA+L&UN+-F?p`o8$T}uJ8I6_TB%zQOsWLK5b0jr6p zI1r}{KKZnt`GpkZ4cn|qF{75szdeBcp5UYR-$tCcC+5Ab(^VQ&(?1~yXJ={V*l(*} zw8lM8CR}ier>EMXN6L8{CU3DA>LB?;P1p~iM*1vG?^))~eG@_F-)9_V!QQ@I6fIRc zXv0mVH2~G|iaEWcnkgFe zq12eBmD^@6r7C~|GAn!I(-S&4&@Oa?+Xu5%v|aZJ<+%7x1bAJ(yrgqH0s>+%ddQ3S zj4}zY2=bo0dZx_w(Z@3?s_nTz4l>|=CGc({Q7TzNSa##Jv2kjzZxmR~<8T4DN!`=! z$-YndUQ>DbD#zq(O@iJxm#0TBK_J28a(Skg>D+PBDyioP<}@NopfjG`P$tZ=NR&<& za$KW>s(Bxy*Q35imo5F6NvaulIu``uXi9Lq3wRPP~WypaaVM%?#-@Fw+KPJtr&tky;6>e(ZP1O-|X35OQ#_}VzYe1U& z*wz(pTc7XIhIrwjCCs^B@)}sWY_l?&{Z^}g(a%$fUcN6@0?vyfHqFn0KTl-}@K{Y; zAue}qlB0!{gINM)fVnB5Adv-s&^M}tvUB8&NzVbPuA2=~#&}f7#<-GFmM0pHWAZ(| zUok>Y!SnFl{T-|l+Obhsn&C95izV~^bZCw@!I)n5C;rTTGzZ7FLayvU(2$&p%%-$L7k1LVaA87Or3KS-#i1{vu6%spm6QU^I38 z!fhpFZxk6~@{#Z2Y-*z!m*T<^bT4F){fZR4H%-=NHIiLEuuM?pK^9$&gZL1%Y~{JX zL8T*c;#pb*&LeH9g)Vx&nnC4NeFQEu@3dxpJRN4LM_p%Qr-Y8S^{~AzN56bs-fg+H zEQ~TaUj8*zu^x7d-Dh_1Y&12MI8@wtKvB0Y**K_x|E5C$dVR1?c(aiT?E3{tn&~U0D&*d(*H+_lxCZ977`_1f9-wiP};j3h|uJSwk!}Ew%pL=3lMRH8wTBHP66@6!@-=^{`S8=fety;cI!^rr09gSnK;M0=D%hpgu$VW>MvWf$kkB1q%u*hYpn8}W*zs>BOrFM{tJ)!Zj%@a zu&*`2Y68vdRNp{W889$ zQ&MHPY~~97T+PJ3WLe{1Cqe>pft=) zY&l!cWw~%LO90$RG_I5aEWfbPBMjWC;dLy3n|W$T*&*S$*8e(ti^7X_&H;pD z9eCa9+gl`e(bAuU&o{JV)MdO}1r^SOZEU{D-fV-u&NijsPxSFTeUH3(yA3{8Bhh>` zy`Bl(RcV%MX0QY!u2#DLjE8m)L93p$ap`R$mJVl0eJ%iSYgAYm_yg+A?(b`$S~rme zak%NOZr%8k!$=rD2uupXYArtAu{S(6b8njT+roL~=@(v@UpO4fUvuvJm-wzGbfnt$ zy1465ir;*|mVeWHy{PNjb5=%iy$JRr_oi$qtkg@Qx|^`Cb#?D>*)!8a290tC+voPU zj_F9Gy`a#f4_s5TN&ls(fDWST#g*F zhyX7-ZhpP96?R_qi@<+C35wMRl*}#bv!*5!}2HSEdI1(?R7AB?ipo?Wb)gjBe(%)1%!ncxGu<`;c%t;8Nr z9YZ}rbKn~LypuDejVp1D3N-Ux9FDjiYd%u!886DKxna7286+zrBW+OM=C5DC!R~|e z6>awU6>wjxl$hJ#2iV_Hkc^|FQpFqNM9cB2Ni22%g)Azp^Z9aIuW7+u8KX@xTK2jm zb&C?*Y4c*wHCRoK7X^%**lWOi3j-2W9|$qwfP!t5e&+ww^l;huCpPh9D?3 zzkSGcFu#6+DpQ)?@@_EAV0_%6?lM&G+Hj{xYm;ebH5__+5&@2^xe&Nk*I@7Ma9rK0 zw|XA{Oa@QSOl^?HYn?6EP-K7|Wh&DxkiYP%EWdRCXQvaSpVx)Cvyb)GH4^R&iuGZO z4Qk$qPo;WMfsLG(rrMJ8TTaZX%G~Z_-(p zG+agXZ-h?IY<)cqq2z~ySh6j3zk?pKG#-Gb7!Pee^SzB07y>r_$^G-WM$2<-C*x(w zsUz@9{xUOYU(Et@KnRo0FHl&QCvMEDZgsHu4A;2YTK)b3y6)}_IdI(=GEU@%U~*coI|*kX!LCeumL-wl^V#Qql5neZBQKFBb&J zPY(Bk9SGLg@1Hc?j$@-!iWUlg1*ywV`9%JvFFbSZ(I){VkPMRm{ACjA%>tXdsS0}fa3XNf$mUpzhKn5xb z@&gyiZ_!;JK;nn^*x4-fwVO66fQnm#3S1ruRj;_wp&V|ukk;7u3HddbPS}VL#juX% zl0|1L8tjknF+Vn1>HES$0u8`ULA-hx*|;nQuM3U#>)b)dbpS1SAeTY(NuaEJkDXFn zfA&+Dy^#|(4n%RN4xEs~p%lpEb~Nv^-g4U=jQ3u?`PXICCUjkTY@0LP?0xT2f)#ws z+ZS1|iZ3z7B>>5PaM#nl|7#&Az5n$|*0)-|b$^!`ic+t)tzEx1n~@a6rAu0GYpy>M z@RzjJn>N&%J&ylWYJ>bK|idKGq^{?-P-x2=Oq;=nM z1F_mg7DWbB6yVYnG(c7Xf3dvU*`{K8qzFofin!ot6k3&k{bFDGx}*W##DUle*sr#r zR(u+eN5m@%j0^tqGeJ3XMJ;WR1gjx@O0@O5r(FrpP*sx3zsHA z7A28Tm;aE~kQWW_i8Vc&x=HC{7h7->N23&P$C%iUeQA#Er)cNcox>lNuf1D}A3hYKtr&osS0 zjOgnhYP{PFyW1y=lnfWH3!G{H(3t(przj)~WMX~5?heFJ>0tUgDatOT&_MbRilC8|k-VtI%z2e-qmU9l+MHjnLhzN)IK z0@RAF8}Y9)dQ>X`h>$T%GOwzm-X6FW0jf5zBCm#{5sSI6^z_r?pEJlFcy<6fzrbF* z-Xf$D9Wd?7?PFXh#pfaf^g`T8kE`K=4Ruv?rTu$2Sh9@BkdHl23XSJ4)fnxM`^7ua zgH*X@Zs($lw}SVDH1kgMH;S|>7|DWp0RK}8hBFA*e39>aArFSDM1;JZ008ITcuX9W z^(i+Q4pq7PNPLvEKeB#x(wj-iDw_y!z@@|UNA}@M?4XiQdvXvw8L!bOwG0WW@6J(d zvVt*B_@l-UZ@mT$K!X*J`)9P@cIWAWOTaZDt$j!HSQyP7Ct(Mebv+7oEZnbEyrI8E zvwD^Yr!a0+V{gXPUbBo9dul1uWL__fITK7(|2weI8PO1MxEy zsJp^s+UbBGwF=1on>PLdDRm*|YCx*ba)QiQRNWTQBqD za|K!?GpyU@Xh znmyZGX`b-cs861w?7W$Gp5pjUN1NOCvbS9!`hv2`NsK*rYK+A~s|QTO?Za^kCCF`x z*d(C1Qk-tQl)RVNzgXP6qoU)m5v=!g%d8PDMX_|MlzsM9C8%XQ7y-cOjmUK}tl;B6 z5QK%B1Z@vfDH8(vW|+xTm<9`6e(wuAGzCUN3YY zF+qCgMi#qgTC5`6cwq>4XmT5+g(-V6KZg?^DIKj>^%WSh{Noau4uk|m{TJn;0}|DQ z*AZ#H-tvJrqPtPQp_Y$FayXglon1fy_~OlxH1s=mtt|s(VQ;NxgvS;hJYs(UPZn{R zWkN8ljh^=Y5BV)Ep#N)q?xQ)mMA7-_aqn)Q_-_dfiRwPxSNMgEn72urtrqrvvNYHf zn6vYMwdJ=BlEP;Y+ZaT@!VxJ)a~&U==n_|mmC}}1kO6%T_8$oZSNPY!BdD|N7mZsC zf2{;I+~_X+a07%yIDdZ)sDQqBZ2VPMXe_)PDX>K6D*C*kxSn_>pJVZX7+%tDV2UzM zIcnO=sPlZSFR|b=nZ0S%)vyH(!^bUQe5<@sP$+q9HH^Ts#NXw~F;|=BnWxNazGr$` z0HTD#{S9(RQIgkZ!G#-y)%Noea0fE@{B}bGn!R9 zg@6e7wC+_Cg{(_23Uxc{qi{4Je*z-5tsBii5_LtMZ1dsb3QMQxA9bMUz?6IL0;85v zT1c3WU%7c>%jn?xm|d)io(+dcSRVESBMM+eGTgh?z1PSbCS0Rz!_-Pky7r zxv2M3NY^q*!eRW`h!gB^B?{2#jd=c>C5O{YjrueUQI2K}GVP|zjX#hf7b2-`%08de zjw>yGK~tEjqze>CKJ`FF$Pk4pzgAMn10szK`92`+d}bVT%|-H4q)S_hFin{?dg=k{;@p)--H(q6myKYa#ZZ#L%Xwk0MYSNY zp3<*aN(|l`WA@i_o!N=1`ShaH4x~^-L#hv@Q2TTGz?igVC2MHzYYmbD+0O7>jPr* zmyAKkx1J)@1ajlOpoSw_2;>czg`=C9$^BQapZdqo>ZbgJo_aCV{{VPb9*Hi@tg87%&_ZvBDNU zn1{7IEc)@>^Tk8o#Ka-(ZfP{!@Sj-DHp4F<7*rRRWv%dFf$%YW;E;HrZv})2S3v0v z_|@#)fb%6g+ho4c9Wc9WEG%SNOmD9*l5LZDY%_4Ewt<2Gkg5Seco4{O_U9dsv?x1( zy5&Kn@oH~EzN*aWU7StNvbW(18c0=Xpk#TX`4-_^42Z^b$DM*1qY``kw$e3OAJvpu zOlbggAE+=rgcEl@VoHOcN`MPQJUCo^V?fXfBBhPY=of9X$qn*Av26JahkX+FQ{7F$ zSM#N(om8%q!vq)!!>@>uUeACvTTNA2YVb`YUy)qs10Hy?J#?d ziZQz2wxoZS6>F|UWq5gX2?*57hc6seK-nT9se17?X!IcI{>Pct-(SV11@E^$0U8;8 ze*X3TduMBa@`2$-Q&MTQ?`X}RpvTid5kepIk8^!K`ibBP zScsp5{)nF@7^4V!h4-04anCu1^wm1W1;{h0Z2WS}`TpSoD9t{@|DI@`BFYKGtRLsa z0CkXK`LOo#EKXflo3SV3Y$gyZ89X@iedB^ey)Cx|#nBALo=NqzxbcC!lEM^b;W=lH zw2W$G{JnY)%RJ(gsDKh6ECTenWR3cqoE#8uRG%kQ-FqvP03KAe^Y+Aj%A=sy;Wc0{ zfT~_oS!pc!MtA>&cv7M&-@| z#;!mgleqFkQ+X@aBR1w2!hNg`2puhwM42vehhx4;;n|P>HQ(&MrCk6+ z)TC^F%6ppuY`^5ZvyvTi_Q&3_1c`;Q1-Bdg9J&2%ekCzb{*JSt83zNh$Pi@k!kB>x zLX z#Hzb!q4`dI=i2*QTh-$eaP!FlYapQ_6vZfC_l#Y6Acg*+Kq%Jp6-ZvD2kItg`Mztq zu+YcPPdVOdYg@j8uz@Dv+y$0V1ZZG@%uT=q$dm)orh=ywcZcnO3%=}o3ss=OyU%2V z1c6o*7^W=k-z)syOFr0X){VOS=XuFZ)B`M~R)s827@zjhum*SE4taxqn5 zW>y84F8omz>1BO{4i3uKr8atLxd+{E{Qyqj$Jr`AJ03~sHJ}Xx$;m@H`B`t+LSHVo zt&h_{%D8CAdZw?>#Ss)vA7p(8<03vNXbrdpV^AzDGNyQhasD!WdK;th@>0-aMy=4E z`Y=vCBaHzX3q~sAh$%{o#C(_$zJ=z?xsN##fa5fbn*&ORnHt-L zCQny&vi}ehX_V+B!y-4kfbKzMDs+A(F4z;`46W|}Xj*0gxA*R{^v-vGp0Ioxw!h%J z)DbGnT-OogzsUq$_(M+g`tSmX(*A4Wy*{3gBk~tU!c;er1HIj|C4!&n$_SS5f198J zaQ`;p|2BaT2YGa%EsObh)z7#c^A_EWn9YBckr$X`LVRO2n*UqYt(j{Zaz9>MrS%Vs z@Q!iWpHGKHO6*<3RiDi1ej54vGPV_EMqM^@m&+32LJ9bn{gDB0-A!(=eGa|}#wyn8 zdU|@!Hv|y^Z3QC*JTmOW5TQj-@O*Q1GtIOoRvH(4;1)JVO5bCV#h_6FL@t#VL+>$@ z1dPuP&ARrkPxF!*bai==eOUkMR}6vJ2GxC_KY7v?+%YBPes!eGKPjFW);22ozZ9y} zE+X%d?EMkbXk9Z8!N`rFKoUFuq3jtkphE#AC#?Bx39CuAP${6KgcX`$z4m#N*zG!8 zSIy5+2{e@@AN3kstYdpJB!B=@yUt-1h;%?iceD&G(QQ!IEDLX%$zLFJ=ssyWDlh1% z@sXlNiMrXXd0rr?F`=uj<*N00ue_H5a4z&1?%dm&&d84LvWv(Z=oAINatOfn3cU(J-7QPjyH-0&b3W@1s#R#^_MuvPC)s~V*8~*j{m#|IF zJlp4(l|!L9v7sDKJ=>aH(%XR=-PbTq(2lzF<2fJ+j$Li_q5_xGNV+wNFtO-bETW)h>CByyUf6z0&#%K zEMN6`6SBA|MrC$(_R7GLNew$TCqv0rrt~}gqkEZF^umf<1?#@m&FiTOYu?oW1SPG6zbfuedgZDuhoLH|6zue6FC{GY=@>S`X_2Qe!=wAfg@r(qHE1>UpX!;|0n*-eo3GUIxV>{3 zi4C!_jl}AJSz`B0xX994#Cxm{(sYoBbHZ#HPZbn>J=Qd!E9 zUU$GupvejvJ8TY%Z6Civc(U1&lJv}Al+OLLdp6$vIkE4r3IJd;z(Qs_Sc1q4q=vx* z9+`=h2+5KWL`o|0-i}uj+P2y?dzP?;7MJ5SDUKOZ5|YlS$Cl96lf-qvXMGU_#8@uz zt}j6QHHr%7pmwGF92UJ;Z=zvCOhO@f^nY~n|4aWDBG~3)lK_x`iUj%VZa5iKUM9JuPXHbsPiYG9F571kzZ@&d0%{u|hUB5rdLgM9C3 z$BR7&ZtZ*po{3LS!Am>XSJwH=wE{-GUJrCW@|`zF0h78tDfUd7{PN;S!(sO&pnk=P zQU|EtZiExRH<~ixR%X?2m7dQNBLQ8~n|UkfthrMOBj~J4hnp+t%O4OT5#NX8s80}r zNl`^;U#j>(x{t2{ggz!!Hv%M&{g8=Nod=_IxQ<#(E_q$=gk?RX*N`6&wsIW|$a)+u z_w(IdgZ`me+x|L6cC~18kU=n;Xy2VL%Z6a-S?IxN!LJPO09n`;^eIhvzonzS*vRyK zD-7BvxwjwciPV%+BpAoLzRN5;xW@kG?dIy8mm9yIwC85Mf4DoGCGC2BKu)n83bLAP zN3$w#+f!^z4(z0^d^$;*MzaZGE*E!7X#2Z!5l$PT3lOj?}{zhzS~0s z_@MgNJc>UKvM!bpfI^rjl6khbWy8d|1x;?p^Rox1Bgsdp}Og20_2Zw%|8BWvb5Uu6 z*M{e;I^(T=jjtQ%$!FBKJ!maFLxAr$EUiZsN*~;3ne}xdPs>{!NRX^wk2) zTCP)q9cP{n!-+W&c4YC5zW{vDnCtN6|sI z`7z>QOAE7zo7-(y|K0G!-t91fHKz6*Ynn$p7%oLyBO+QC&BSSRuHlXq4*Cak-qsI2 z{-rW3m1tl|Kt+XAN9g!(f+|~hL}Mrl#kyZ(K^wHI$rNglM~E55I`*Tyzdk;lwyvb* zPclD+1dUN(NKfoYVYx1j;Nb6$Ny65-2KpK2HC&AY`LUfC_&Vm@8P>@NX*8I81(O+yJI><=FDHdUqX2IB zAT{PmvGaKvzI*vh#~}mo?|>dd|LwC6=*#6u?4~Nr)LwM&&0c-~?(ccgf`8lj<>@N( zk>Jk!0npav*nU73_!-#U5#K#aXUugRYVYcL)g41E>103^Q|88i$T)D_=>79ddH1~7 zPSR$GG-g`6V4WvEuV{#!VZrBK;QRqPPb6CggT`Q+EuHtM)Yes7qJF*X#sLzo;p$qK z-xJ{S1?0^dUF5C;uWA1(szGBU{!jD-FsH>i1YlN(^J#NPx`e|ly^YOPm-TOv+&~kdSH;4L9_w8OiE#oHVg<0eXNH zeYqo3CK;r}*?{-+S&^d$z#W)B1VI)6y@dC^ng$E>R1GQTzlJA#U8s%=_i?U7ufZ}I zv~r(p;Uhfmxt->yRyR$(Of@p^caJZ^rH{xz3?{Vhvufd8zV}?$J30s&cZ!5UOV)Qp z9Ob+xb6v#6cMAJ_I$~}sF8QGfzfj~_<%S17x;f;eI(%8Kutu0)s+u}o7w%WJdi+hQ z4F%+D5WpM!(4qwW?V+i1&fX{`SJ0+LiNYbhRvkj1s^kS*~a@ z(Y2<>HLTU-^IRMazHT;stnb5S8Lq!{^jThLNFovv=XsppXg_>$=((eJbU_Ljo38N& zoe%@DX`e9l!@*ltBqY10mrfa2tEeAET88bAmXd<<0Ht0c%LtI`2*WX7kZ{i}71cla zh9rC|025-@7XfiKey<@|PkO;rGr~qxl2KFxz(V2Bj5PCOAS<=o zjPUGtuUlhFm*ZOo?k4tHSCM81f5&|Px~oj_zmu*)y67OTs+BRZf>pXUXn~ zsaSJAf2b>!+|Q2s0|O@Jg$%gwasg?#!83_#nWvQa^b{}&t}xjClBfC%XeSG`_q zX}pMvfC>9tlx(Ps?HKk><+iv3@QcFD-7BTbwJ?lWZ6wVok6vt1Z_xt~qkqax6A7RI z*G^E7Z-mz+MbK8QI$3O{^c!4f zZ*Rz-mI8u3tvNTvJ_`zC< zqc79TkUq8xORJZd@W};J8X_cbH%ETn(R4TgRVbr*-oj9}^eIZWv(#CO{tAFtU7ofZX8%1Ab>$?rq`hF^#%z0*kX#Z2K;?d7n-c$*btMcr$2 zYyA&z#Ov&#j!*4l#tPqzPQBc4kl^2H777Y4ijxr%VA*`Y1y3C4t?5>Ukjk?$ z2Z5D9@|LtTGBtqEL52KXf3y0_e$FO0g>8vfLJYVcX9#TQxPzNgAXz@pdHk;`HoY@p z)az1jWy43sN=7;;5r%F{?eP5-z%k5PTc%C$9e~jo!YDx@l2_z$w*jh}bSkF< zdZ7=#&RT8jJc4Y>ZR#r!YcS}=@|dFDYCPnp!R+Q^QTv=ceSkUV?Uj}LIazO({|QT{ zy26Sl)KO4+PyK2;a~SWJtMuZ5_(NSPeMi>jV>kjoT|E!f6133RRhpsB6;pMD&bjvw zQ`bK#3^z(hN}j>oeQ3a!H@qbFsdGW@UVO_yN3_y8D+>3r>?|GabU2zr)(&z|@IPCacAd1dAXVoY*dt3H< z=~KU9#GoF0el97EuB-*k&IG+w#! zfY}?6>RvuTE}M#|qrbotc^&mEY<$aTURxp8_5;!Aurb%*gN#t3lc^BFvNCm7wLT(w zH2874#G{d(<~Pau2v1lfbxf>D1fO;)C}bRT%QZ4tH7;Ib;zyCu?IS)yX>X(kLx#L^ zL#%EN)`|JZ{8WGbgpc2dt+@aTWFf9M)g5NcS3k(v^x4r1Ok!=UjHips&&mogK=`V~ z^VIAu#bC9mdGbLyzK+%W@l9@F^)rp{sL^I3tmyLIW`y?(bj{teN4BTw#IjLy8hc$| zF-1*whC|@&Qzj2)@ey(3ArVf-ewT0enCQW#XjdnP=6V1nchU6fbnL3EdM^9tvsYnn zeI5=Zl#pbRl1p;(#hQ|97msh9l#N@&jgmF#n*qBC3-Sxkt%-Y_|5I%`>`{@4MG!r8Wq@(LyLXu_xOb@^d)i;F#XznO(T*rx?FjN&_Xoaguao~{U8+=sV? zNcMHWre9RXs}rxRiK9lmyJ6w$S99K`{w<4ghI)4G@BAr(fqARCQ0(cX*Ofkv zOl?QA5WMJAG*3grZl+eAjI$#FH@0WyTE+8RarUO2EcgbMo|aab#QW26+2h0Mr+0d} zU8^=mNVq?-yry@uECe1vGM+EbzrtR&_faqviE%`OEbh;v5M&k@9XSQ2c(FJfQA&f_b?4tS5iM*jnUj&PcS`pLAa4Q zZssFTeEm9kZ?H(w15S6YHV;1b3hH(|$Tz3+G2>dzB2cS`GA=lr*jbJ(EQGU080Ne@ z!$xw`kFQ*PD{#O4rM1L#1;hfLgKclR*(MmVqnG_%)ui2Qs^v>?%#}(L&wIS~J#OYn z2Zqh^fYftx8AcCnP2Sy{9Y~5aURYqbrQ{-o0pIp20-8Z#luwcu(Mu(_56DEPA3}Dz znmwTg2tKDkD##A0eyA%%|J^f8MeU{*_l!Hf)05b5ryj=hAlpDB3?Vvcs&j4nG1PJp z+38vTux4jdV%-i+Omy#lu1C1+3;+Y8^S&5TAN zDIQU2IcL~mvC*eKR|+D}*9HEUgnnazLmjDE*4aea=)*$+jYa!6s@c}$n8k*1Gqqh3 z_V-%T0L&Wms&LQuD@gkv#KgVrKNPxZhw)*9`KBa{=(%@Bn(7Vpo*xxZO_h8Na~Pk1 zd2APqBiwxyH|_b^t#l-U&6Xg5za&g|mUM#lN`nf**CL{_VapJj7EIpxpepK=aM~#X zfxs@?ah4X)dMyd4^kUc;Cvi`0(KoOhLmw0*^fpe}>%`I5Om{`>RFH0!^?dM{89F-~ zCm)2uis>%jjzD0gQO7a&qPmQh&`{%=1I;585dJrvE#{gl?uR7CBCj~oEOpq5 z84slT7T<;s^XY98I^t7Kbk2n-ps7{McmvTvcckZgh99+`U4|mRCgDuc-8l@$9tejb z?0N0{D7hP&e1B?55^>U$@*IS=9`*Yhh}ku;k;!EdXnhyS^6pjVj|E(KuwW1`x($nY zgrLmE#A$vGCeT9_@T${^9U`e0?_fS&hZ68~y<@mvO0LlJ-M!>Iu z@{s+uclgc%K|QOW*=-REszK%B>q- zLw!IQ$-=`fFc5h7pD__K0NdYbF1pKmNwA&kH`TUlzVga=BObDD^)SED)U7xI&K5gihY2*e00-(P!5+C{L}@N7d%6pk22vMxjH zggU#JCb9@N`lc4An@FTJk(rpCIv!^E8iG~)gm_qTf6x^Au}C5_Q=!hDRooyZzk1EB z>$2RwCfzLV9HjdrMRntGO3f7`Wrhi6)+TAz>ruGlNF+J@`sn7?)~pp<2ZVt;5pPy* z&OF)>T-U7H)}9+`mz-R(zax0px8cGbFqA?&#e?hqGmzH)$ha2_0|AGA;ozF0uI_$t z!7&iOiX@q1qZEoQ66FOCGk5dk8P z=`}RoRwYdo)EhjoZDs-D+FviP5rvehjqG_PBIJ+aimZ(Ohhzkw$dJlnA}bUkgI~N@ zjZKJ+1^+mnVEov6uNVX%-o0mwH91^zs3TTUhk-C){dGUUe||#?zn>QT=cUNcgyxiH zyJ(@V4kG(se{BzZ8vW<@3R0T`x!QhLiD{4@T^p?|rP?K*DOE#(e1huFO$P7$hW1E2 z(y0UTWP%=6ygolGlhmTwM=d7iSW?GgYZ6n3yVO8*-Qu+b3Soxhk#S8!R%YAW0K?p{ zZBCwf!dzN_%FKjQSDx}|z~jqRC)xgq2;$w7trkQ!X{6Kx8XgB4mh%A*!w6CCQVW%d zGptM8!H(k?z4?V>ePOUu?qA4~HgQKOG@64@VC7nCd_QCY19EE>lS*2#{6Xs93VZlrSl$~mR0pr9$=Fiu1x<0d$i z%kS?8BRro-MX@y=e8B@(!!qP_3 zg!s()HgEuNscC$_^m6xSFhC4EMA9C``$&4aXIVx4$GSJ1EKPAKFnE}86n zoocxhCkkv6p5X3nq;fs?-*u1XJoNs4yT}XDo%@%U!$?I%L3GAIY#R&`9X`$qI?|`7 zzz56BebkEhOwgMz)3ji`?s2d!3S98-mmD0GpfsW7YVSbdL=?fE8q3E{&ViiaiOC+} zRTAqFt>xd|404>4m1%Ubpxf4$L``azNz$8}pT2Y4Qz9Y|bln{jiRLoNn_Xm;Nus7RX)|C=wwVkeZ?4EqJ-PvI&bk-lIn_9AX z)3CW32h_7)OG}-G$u4rws}FL_%u3l<>OE%dk3X`wGW2@K;&J`tvkj<{(hjceOW958INIq{xf^hNIK~ zhDR=8Rz^d+X_ph&);hGuL6$TWn4#(M%4sLT0_`Tkmy!V6{H>j{T0pUnt1agWdO&P;M#O+rG}uk804`%=P{034FPjkJ!+3Ow#`E|>+NF;MP_64K+g@daKX*?O*vbVp2UX)WFUDvSg zRV!Trzj?f(0JA4HEWnJFJ-4-!TFwN{j2+;T&)~mrtR-g`r#c$b9iIg@1~fV@S$jr> zQ8%5*DWmTUH+bB<$K@L0J0`qj-)=~^Zm>UQPEO6-W2sU-=mbNGi$Zt%(DIW9mfL~; zso#?qI{D8PdPTMa-}!#+IGL!9w8Jw6oZ2>~A8MV$B-qUuX@KM{2(T)g|a=P}-Yb@a#Ed}@#G$FvB}zWWhBAikb@g*!Mk^kZnK)3MFP zJON6+;cR~1_3j4PD!g^m4REuHygKT7dJ`5yaPYw8r^$?k6bL6h$AaY{DzxcUt7orZ zz~H9@*k3^xJmy1p=1082McAF{eoeoB)wGTqWA7qC7_Yhe z^3%-F%)@2dtFxoquYcAhi+R6CCu?#i^(vblA1A}PpZU!j;5%0v7Fx(Yg`FLD!dnH< zCa}#+npvB|&gOYsLUq}$t&NOkPVU&%UhY#D|In_IDniMm&$^JlEDBEIGNISNINkA6 zjsah4gT>PPymmxeS`ywt>~SWnLIo=UfBBLQbUd;?X@&^0TjWUbz_N2kXz%4fU}p!P z%vdr0-9$&{glc>H$5>hSV7J?&W$XHPdI$Q@Vc^Wqg|%M(NiB4rKhf%oIy-YlavlF% zTRR|RvL1_fd~$rSHLg{?U)_Z3v5M}D(%?qAW${LunntR8dIQeJFC6&3?)YwS+Xp|s z*T!yZ`dlW}G~8%XczdEfnhoHE(^JVwP>$F-1qRLetR*ZE?BfY!?WrgqEpm8t36 z%VSkN>R7TzD#!fc2%||t{a+puMy7><xd#c?$DD@$SCZdynXmIMeE+Wy>cy4(H`8r z4VZ1(-KxCgiwBw)!od=mj zu-AB-OZIVpQvZbm@g9GLmFY=)0d2B)(}DS%_wSE+Jxi$)hX+}rKoH1xQ}S3$cA_WR zxZrS0qS2tQWC5!6=b|Ceeug)D8Nl9l>t(6U&4d6pOE9wH{ckRc(B94kk@M|XesaS!d=YWmkxc1s zYOtc{haA}QEME()t+OvlhMbV(FxPAf)#csDscIxFnK>UQN|+calkib(%9gk*9+Ms~ zio79bxJl%Hg^7$xn+Cny({8jv`W>IJR<>h6LY-ZZ*j)Umq{RLkn6am18Dp_RHrl{$8 zRz>dmmBI+Xx@wovm^|Zgj~Sg( z65Ts&Z%DAS}t?-;Y1%Q$a>#gA`9(Tz_N!(4?N73r1b;%LOm;M+~ zSMXbMBYvf4M&WYA_eG>_+N|)^Mex%I`k<*Cr>ld|S@~q>0h$E)wheg5HzlW!V=HNc zIhFCHa4*pLaRY~@`DfL|fT8J#T73=rZsndtUVT`QOIj4$tt4Df|4 zjMz9hWh-k3p~8#+!heuXE{Kl3h%B%!5uo=K!3NDz1qP(6-GE7ojN=-;FrF%-cJ4@% z7mxwl;r^>rN)FLqfco?s7aU}u0l?>Ee$i2^2i2l4f#EVpC;>| z8|o`?@{z3=g2N31renm=G))lAR)PsT??zqBe!W)aCAbdqyMdy4L(@F#=PB$u7IN@V z5YAPAF1_OZki+`D6fTf>ng>s|po!pJ)o{1}GN~)E4t6D@xvTSCf*E3hsUHMFsi32S zZx2N}s=JjTl&R$!bJuT;O^Gc5TN;h__)1o7E$#@Bq!LS%ohgy=2>wy7f**Pb z3*_?%RZ%l>F>O9X+*YKCLL}?^^5BO-JE~7+J;RawcIKR0qsMut?M$kps&W1k? z?)%N}oN8@6fW1ZZNtkuunbZr1;PNq2@D7Dwdy}-pQ6g%5t26= znvQ5li6htLS2BboAB^8y#JN$TG_*<>#2BdLLrS4hdvgv$eUNr6GnMC>9=D~Yd=&!I z-z$f^l2?NhHL*m}zR}a@x9~2b&USV$NGv<3?s)!@ zgzJL>rQhuK#1q*DAF1#>WQP@h8&Hx28MoEwy`(u8Assy~zPxmA>xkFZm55}jNOVywgr4zmq7#ioiW6yNv#uIp5?xqdkH^Ww7K2LH(S%W_Y zPH0I7;qkt+eOsgHXVK<8y+hWj)u<0u4B^LQVd6+=NGk_;v1n_?JR;b7;WIlUtf!#V`T915iylbJyPQgBviwutQ44lR%U(vW0NTNx`w-Z!}QKiEXWv@bRSDkfVsqH&mI8EJ?WCW(QTT|za@YfUhy`Z zn`OB5H8FOJ$Dkuo!m!9hfeaKc>e@>U3L15pNk^r`Mk3qhG`t;~(!w2lAf-&vb@*p6+h4?i22Bb%D zLY)T7a1NmgM<8cuZM!;bR~QFu<{<%pwp*_vOK2ApV* z4My9%ERCI5%aEY{xZTTq-EwhMOurupvA#aqIeeY3j3|)x%lj^8$U71}8E^8Vr+Nec z8_$u{hwVA+_ruQ2PBZ2$!RpdQ>w|A%2<6afT zAx0B4-c34|H_3E!?E3q3V0t^LZbL6|6-aO^zW=;Jpa^4zAoZ{kS4yJv7lX_Dhi{;*i>SJ!~}B)mLt z?ftfD^Q;IR#;Q6I*%5@$`iBrLxeVRz`{=BF8igb&Z1SJ*j1Z#zxcu`j&;g)xXavu~ zg&a?`K;kh(EPm6j3kOcHT~%nI?MF)sdBS6{Ew^<^n5>9~u8-GQlO75_XD#0FyXH1~ z8x-9uc^L#5<_Bvg5ELGlc(D;s%KjRW6+s>?LZ2DkZ3&X1=&r^4xe=k|a~mdv8Ko}& zT^f`w+WEOC?T%k#tj>wRdjQ>VxxiE*v>!2$R z0#TqUaVS0Uyk#q~V%+0LBUmvMe-@KUklvFq0F7M1@7VH!L~s#G?al){530KNa?O+E zCH0=Fan^VG#PE1LwRoiZ_5a!;H(nS<)lU2EV_nPbx*=Z?plAUMcjyD%LIn)(Fwc>y z;Wn<|jn=#BANu|9h=`d?7;*?#JK7+I0B{*!43q;F;)4@KI>PR+;W1ID%7666y}gBI zi2eU%llb(LJrf`!0;=o&k(hgx;O01O@h*jV>INj z;V`?Lw?7Da-qoljTOl9XDB8jP6;ox%n>EYhf#9b7>L_%^9mH8zzVlXA|K+|6C28qi z(>atzMlbP_WV#7z&K~m}BaROfpskqToXeE`c`mbV@p=X^k38BFy(!Rc>n3XPm(Jlh zi*-|wGO#6LU5@1Kuqfc$kmzz#bku(tC+VOp7H5Xu2H~IuXU~k=-5_r)u}|o4!_;hb zP`0lvci84j4t0~rZ2(RVI%;_1^8s5gur4rbPvluYIU+u)nRF_50eR zV!410S@$7=R{X88NOn`D;=HPLa~G1j+yUgkG1I42JC*-`k+CZCEz527qvHC{=TKvJ zJb33~9E2q7;XBwV<92WeW+U}k6875La zjiTL_6N8DlBru7w_2NAg#8g@*4k??yg~8+i`ddiruKDM6NHogV^uEh&Y9P|UC}esv zZkMauho3tYlZuOr3tC`-4jlfCrG-U0Ef{D+{kkChMT@o|zU6!)&oSAkHxvwz9$I}} z3{VwC*vd{%PcJG8<<+ippmnb*ETlaiXpdS?S}UM^(MNNu1X8K0eBMt(R_>Kov$>4c zxiuJ2W#GzT0t-0Q>Y*M$7@nQ_P(lhhw96a;=?5ZJz<2FCL`cBO-AH9T8@E8!4QlCz zla>%6Gfz+%Nzz-+q}raGyvw>*s1cwK8a!)o7W6S6H=dr}KR`;m-85)?6(0f*&?OO1o9)I{Ah=ietnJP))h})x7Gjw#3A79HoevoFA3Q~X zc*z4d{OJ^0mugB)2jPSUuwf5#S@_l|DjHXQ;=-bW?G1dWBidA3BYs0!1@laBluJyi zjkvL@89AY0TQE9UuOJuu=jw$uCjDPPCo!!CIGHli4avzDR$mFX8(dGf)vB$uz;k*P z6;X@AjX=in#mRW@qWFejZTh2Mc&X@J&tY|xnkTXCseOch?vyDn;pL??l*4)J1ODuL zTgAW&6NOiSpO}Y)d<0>2An5Yar8?0BlIEH=uO_WtHw zhg>m}9k{c8;b3{nKF_Y4ku`LijHJsl_sspU@jQ-spM0m6chviBxr2B1+kJAaK#R30HM2V zmD0-lJ@smkD@kStLH?NaMmV#306cG}mU9isMyG1zc@%=Z>FK0p7+->^C>5t4yLVKqJgp^yf64@{0Goo0O15}V%!qQ zQNo3x8D4yp3N`NVxJcERnv}=~LIwvadK9Lu;iuSaIBr+Jc-A?7t9#48MJ5) zQ?h=4CS3Zt1W_L0NKrZ{udb`SUnINwJ(^S=iU4@}qUaUNzSW`lT_`k6Alz{4AYgfYs=T6cEon3j+H&CQp#NpLn4 zj2ZG534I{s0B9fp(7DlRLkX#Np>Bp7tF<4b%TY{4MLU2KHhr?D(sZeu`7m9BoN_ul z;`7~i$IR4-PKPGfx7$zL_n^ITwR%P1lfTaIc^Ll1hH@xKw%hVr`ho3&YgJhQ&)EG;VuV?tpeq z-Yd0aya~#y^n%ab{b%({4VgHSKi2T-I=Ncjui$G2eyZ!s=9g@2K<_$ zoE%{tjX(dk<*CHzrlXzR$mrUuQEMuPD(i-%xY7&ji(!XFBIw8MZPoFj=i+0&A-9thO)}y>Y7iLQE^JPY!ctc=LFn!DYj256j@vDG`X-+C4zt*UYu|It z7s{a7n~`w1o17PNgN28O(H8gpc`{f}wt2VZ{Y62&9nmm6Ts*wS;~~k>;6A%h$8D?4 zZ|pDl@j*3-K`4~10j9NjbwTrKqI}JxvhGhUe|kCndhRc$17y0ZrD1Q^(-dT~Z5{XG zVuHjTguF1ym&!C;2f=Jz>H>_vpyZ;xX@HjY|@n_t?i9%Cqsew ziAP&THDf(CCLTsCK*1^AEE2msti$kwHSx&OWQPGvbb9^Vq!>k+=5@^ zbUL}Fknm*nD>EfPdwzpGm*_6m^d1p)m;2O@U`0kBwDX&DIusoaF1(bn@M7$cmpy4Htk%Zt79WIA=RuaE9-`ZgSXB zHx!8C-{USp{RIzoRQgTs-?V~|e>0-U*R)l{MU0wKMk*P*J9kqKnknR0>3ZP4`239->58RMaAF=J3cPn+12T-uk186H!IpfJYj>JV)uXI!3@Qf_AKwbe z(~uVkqP1Krp{8jw*?~j@W0B0w4*~*qAWb^;!0O4iua?Ee{6DJF^Hy;Ohg0T$ZgT5%G~=$wOfXXT2uvtY z3S6|QfgKNqs-Bh(s&i|5to^`?&eYI`#q}RG>W#$LiVCHO^2~|CZ(?CQ2)oxFcegi7 zdCF`e_pY+h=|EHoD_dS2tFUI)YQ)&w8z9ys(+cbfc1XX};U){ATawk+LSM^e5~5-> z`Rv4eEjSqvJ|w_sl*Bn~P^JcAcEx6+BX_ALQ*_x4kg?QxX2wpY8=*YEbzs%gHT z^yPx27+lhJF)k{4n}Le*gvU1}|19XQS(4DUw_?iD0SZ7Sn?fSKLr5Y%Rc<0XQAjQx ziIO)w1AaHHKHkxbmvdP+qd3Af2V-*N4I}Huw*ZYWdG&xHb#QR7Znu`%T#bTo1=y_P zto_=$>7k?hgAk!yJ`Kp!yLv-UYSp$uDGJ;Sp2NLAoK#=ex$+%hW z(1Z85gYQ=S)hoTcWuOnU+eJ?bM5h|v838E0UDh@DsVAE|;IVb+lOu1YQ^2a71s=xg z@WT#@dAJXP%SG_ObT~cC3cf+>X2M_|9}my@e6diL`{0~7%ra(LtBB7M1fe>In*rlx zhi6a2E4r4hza`n5h28Y8IPR|C!c%zBMJ=7UIKv#`0gwF_52j)0mwVkK$D>A7fBkxF zd>nJi=6pZ({xOmlYo3WiPghxej7btX&nuvE-h=E`H#MTLqWfx`;WTBw$yfI=sKt+% zxVdS9>HS%G4_cS48!tpw7IxML(?PH?4;>z9vewek*>k8{r6KKgu0OSi^~{6UcCMK*HRBTq{`n~XXD%w#uUr|_?{@|S*l-5`gqo@WT03HF zY;0)g#V21itQ+23o$m2ou2+?0=H}(+E7qpEP3T{jj{TI?a#fWqIIzl_egHt!d#C#d z_*|^fUoCweO?oY}db$0bQtN?yng`qE{qc*TtbP(!I;d|VgAcPB&+alkPY>ST%oDtX zC-jq&%CdG5sO7#B*ot!!HKE%k#;Hw<9^ROly)q&tQhsoD;TvoVZrv?x4j7)C6<&aK z3=w=Lxo8SHGHUSedL?Q5n-WfYb!2&LUJ#P()D>d%Q!a5B-PoHf#2O9%^34(JHwlOe z5+F*71qjsNLwVapW8ZV>`*fJi^c70f6mj5ly!Ui86ObB~LuGNf!I;K(M^H#^4BK^U zUtFIMhH;nK|8ZL(AqXCNK4aLqFJD1FkH_f^y*U(YTAvVvPc{HBD_Wor;rq08%}y>@ z(%mHH?!G>cpYdw~)>?sSz7^Zsmi9i)?pp&Gax){IR-Io@sl5|<1%4V%Hr}~TeY(xe zCV#C%+1s`BVmbsNN?7^7fc?+x1i%tYzxt=k9i3rgcCrD~WyaJZ+6utx)Ort{o&V}% zsqd%1r|A-9x$P}qPDjb!Znt6Tn3}5MpN@dsPD4)atNG~1yr*U2Ap#KeJfxLo$B_p` zaHB90zSaW-_tir2I%tpYF^{l~<$S4lN`gB0zP?F;I@I zC4R)2ZJ+gFe+&(sbKT>?#odul=5ae8&~lv3HGW4y zXvktM{`n)NsUxb#S)q7Et$gUCS7J+Az?YBP7+L7f>Q6Jn6g2H3ys5Y~Og^cyfWoI- zLs#amcq?tfeW|{sNQp7`!0S0cFH7Ssy5I#!HmPN|JK*t^fg1`ZX?AT;Nl5=3ve_DWD{t3 z!)lRZ%q!gD@E{7WxS;Gt$|0u6y?!EANlO3gqCPxbReYLWsU&$d)bDR=5U~SeSU60xInDlza`wlWd3HT~+f03#osy&^S2`E5Z#RAB` zUs6Ow_Adb5-tFj`;3&)2OoCcvIS1T7`W9hbg!KWQOQ;YP_+)!D4N z*^E)`&*HhZT=M9c)8rozO(E2;Cytfz}#6Sltt{gI_A(QyT>oIJ|b2JM_Vtc?; z|7uehP|lHi2T6ek+3N>npTd;~A>GF1J5whu1MVw(HEVN$_;Vn62X3B}v<;Y2OclLa zSw)R=c5eXndi3y!>6`v9JoSQdBdvy0YmTx<^>3|ccBq>^T^}^%U<|TkyccgaUd0ub zC;>TT@ZWOEOLbSYY#X`6-}QYiOm$ub}wkqQonQ{qqxa{?F*_=iAeB8cS-$I3f zwCM6_m8jUoj!uEX`<@=*-*+5%TOn5>6`k{6kn#)hzx_bC@BjJ%1lA&WpiJJF+pXVk zuzs3~g9x)M*Q&s-9$PN9D~gr$dB#4hjf>**b0QNFe?`GqRFlqJ+wZxA&P(+>8>io9 z`@XR%r``+@K9wX9*Bl0YYU&cfs)%qdsepXxk_(Dca8(1Po;p^0$B{+WycC7d_tEqp z_t5@HDrj`vaj;mi;=}s%pWKi0bm04%>MzFd@7Z6-|84u`R-{PbM3FqAc^b$em!J74 z>RsDymJkj;irsp3d$)BljNAP)Nd%$~N(y^)`QJi;IV6LN3iB)DO_$L6ze@!ER;Tmc z?^4+RS6|Ys<>268NERJD`h|!tjyf@Xu%6T$j|FX4!y@p19802^m_fN%=7smrb&l*1 z;nSjxZw&Wb;DR8U@aIrzXu$fdEB!A#sVnj)=K@_6&|w|vog|LM@zwXcefzX4jwEat3OZ~K8K%&VUJG7!Qnwr-96l4T73bh=>^Od|eI_K<=&^ZudW z($_FKp)(a_Yy#Dr=Ia`cvtTLSJS)5(Gz3MjMQ|EtWxfqeb9`9Rv!{~f{!^8eG*_b;mm zYQX;s&{rah4+bb#4uZcEWo4ZIgjS)y-D|fs9m^C$oX)gvO*+g)b^J{SK#s8Oo(krf zfs%mhvaqkEfn;OoMQw&%m|_g?(?trm0OzQfPC`SE82*Dz8I9IDx(@m5pa=9 z_6Sf&a+_3X6%`{Oi)p}bKQtRX?TscQ^G9f~O8LLyWtc1ughhZjDt(gr@A&-ozriA4 zibkkRKt!Md`D$H=`=_JIbcp9~jCI1eU5=DbblE;4#0yCt&4e2lMmCjcp8cLQgs|ik z_XG~d>3u3cf2b=0$U7N32(m=NCp4xO>ZrHUad1DcBZvSm2$0zb@!fkOy2yA>ls+T% z86iFBsDePU(nkOly~F-bD*Ds+R%rd1r_l%^VZ=C6Q7}PIVlE)T;HEg%&Ei0P`%Dih zAG}Y)A3TJ#LV;Z|!EZ+PuK)wrWa%BnOWoI$kXkoP4S}=)7@}mL^R+ryc&9)}xBy6) zp}epc68gd?tk3f9@If38ifdUM=z9ZWFD6KQPjgd1(AxjrJR=B@=~rzJykze6AOf&p zW<5misfY!C2(NUMyPu#+v<+txXdo*P5`xp6q0lfA3-Fox3?CNh^HGY2JxM60#`xY! zBhyAsk`+J+esEh4WD39fnnHn-VL&+@h6Y8WT44haL;<6MPy#I&EFn<#CQp?5Rsr!j z&=dnTE26Tp86h6j@ZZ5&KQk4V@c*`>%~c1;{Z-`C_NcYaA>j^{9EStOlZBDe(9b&M zkx1rk8}CawpI>`d1dL_iOd7t+y7%CRMA^0>Cq&=HW?sG!XH0*efWQr+MM@wD%bzWE=1^k)npw133vW&2mcBv2C=mm;31X* zWwie3;%CS>ou>!`7M#!lEauw7;)2Arjd(C01&t^^MgKpF$yiq`Nzm?(SOOT)Ou@=iGb0`1h?Jx>)tTbIuXZ9Ai9V zQU#5shQZ(F6xJn^qDLCt0Aa4o`$w2b`Xt2FsKo*K0&F550a_}^^m@s@n?!B@SCs{o z;>X=T$=v%RKUImLSg^{&_(%r|QFy9VK+ZJ~e60%dWM7m^AvzC$ESj~kIuB6*c*Q8a z@MZ-?k}Hkkh|BiMA>j*p)?-tW(TvfW=E?4BmQ~Wu51*A6O0; z83RH@h*xwkZFI%992A#n+I^G!D_97MF(2$(VA7XYQ<9H2o44HE0~L1?=xJD2`Y%4^ zmPr&Rk9JDiIl0`2{P;SXBT{^aZ(Q$oB~gc{^Xt&5vecsHN}cgfsX1dod6dVdnFa6l zQn2{`2;qc!?yZl&3&25v@kJWbygfJktg*zhAH57q_qT` zk%ChlK55psHgzU`2Ir~$0Cb(Se44BHrXyKJMU6$ywg6bI1mnkxl1)Hyrc{e*~I3sojCi*5@vK|Rv;!w9kJ;b%~6YEjk zRZWy7!wG`jTuoYII+o2WVvm!`yG{%XN&Vf!j~uZbHhiPfXyre&w>5c3kyLf;+qc$v zW0^6Ze0=-!wgs#6Q4EMcQfMGb;uUpoeDbk|CqMGIC0c$U;d>eW{)QJp1}#O^PV(-3 zWY%N-XlUTh-Yv5vznxopS1Q(-$Cxjh(7kk&v((mPSOM%%#($>^nScF47OE{R*$e0c z;N9Aqh5n>Xc^LmF8ygUSfE5X7G*>drYEZ_qh|`v84taT*nIQYL`S~UI!cvFGC0l!F zsLhr-n1(>5JV;hvCQPRIaEPw(`rQH z06=mYffqq`R_u+bFYvhCzF2}U{W8!3e*E=$!zzakp19FXw|x!FP7??{vTeF=}=}I3L|0Y%c8d|`LM#iqCk5l@H9Ul zgC|RbBb}-o{awf*sWkQk>is~p*fcP|YF|NcWhM9#U~wPK=wF@5e(KP9)ig0tYCxXM z!La^^&b0Q2u_RPc$y`K-)x#&<1Pk!Z$T_F>WiA%~e0nH=1Y;g**rjG~~w zlvhcvqE(s1FTJXO&|!Ve*Ll+^8?Mn9b+s6z=U1nIo}^^BDk1)G7N6RV!egUYI{(ut zuZYq6N8P`RbAFJ4LVbhZc7%AJ%Fch)h>o=G=}T9BQ$c2Iq96e%wb?Weu#|YRf2Rc2 z-0pe(0(>ka;-(2DPVEG{@+c+ku=A54-oTCJA-bQ`Gnc>=05M#d-hQ}dkTJu6l(($` zzzSt;B2g``Ir`#?sq;$0QDhPk`3xY`LqyDUec<=|V{+7g1W4pImh_4m_ly?(5^A3w zXj%&~zf$(tsA`sSb6S{=;F6tRoyo>^Mr0p$ZLH)yQsrGZSp{u_x@EnneBAdzPa&AL zkvg-Vb%oAE3sfY4gVgZrw0W*Ey~BM#Qkts3vt#n^E~}7blMqx1gQ5{6;uP@|l!Mj!~`mIRPS{HPgmk-y?you<*eOQB<0a!s+Sh^YFdDYrNRYMceN|@U*6S zlG4=6DAT@`S$7ajkxs6Ls6CMgmQwXeV>t&pMn@M!=2nYDyDpwiLA50xa>}u@%2P*$ z1Q+_lM7qNxJ@5*S?j~CX+QYKUPjh?y%tBdl@WRDw1qAOfgnzbq71jMN67JCTnl5}G za4f2W1sGu6o4vpSsL>}@2Cc0Psh)udDBxD2_D2ll7ozqji0oNn)7Y6^;ExtYnuv@q z3}Ku()P0QhV9A8|Mt~ykb+j*_3~%vz0{jWezOX4@OQ<>_LcR)fT>5y|!azhj69k0E ztuEjp0yHO<3sOjF>V$N>4(7o0@Tc0rv-7-0jQ{x9j@x_+ZmP>o3~kuhT#J%9&f97}}_q)JJ@# z;HSTA#r=)CUgYPy*!Cy>P}V5Z#sHS^jrI#?&&ABX@^bLfFW>;qRgZr2FnJDwwcniW zYc=2PpfH4-pN*VVFf=gg^L#EW>^c)tRIGgA^2T{E7Z_w+?-gt<#s+e*=}0F{`Yo0)s=`;=O>i6}bW`Sj((cYe<9kw4cp3g% z8@0sxG-3+_gF6{dmx~};GzOK@gX_hdlI}#%2Gh!u``~1B|L!0_^>}^sY!4iUc;IvY zZc;0wgxd3DSV|Dic+ekEtduwQDLwd!$xa&g{2$C1pHuQ#JM3#+$10C>>h4QF!^d|% z9giCw0Y5IFAB+=zbLE|r%IDUOGkWsaWzK0q{dU*tH=*eFOw-bL5Vw~aQGsR!KQV|A z3@>3f&(I>D!~DW!BS`y}M&C~1YK4+zeZbJ|3%eCz?X5#vF4Ym+gzG-JvL?>OaQGVk zNd$~TbvV`$+Q3f_fdrCFfkS@^KPo&Xdzpp)p$wE=ita%NvI0DrjwSEe1=8b5Otyz> z@F@754G{>EQtIZ7qHX+<3rF=Iwy4#}`Ev6EibX0)ofk zd|a0k>Nc(}A|gh{l$#p|`!j3~2VcR(p@#-@{E9bwUWqHeMnO#&2oAFRg#pRWfr$4r!AsDT|K6iDH+#l8n*$yjlOEje@Qi8(DVRy80Xh>h(>gPW2n3*XM z5kVrJh|`j=G9j{E4+AIc-P%Jv+QYK0^dX`3_RjZ+$!unO0Rj2|hl5F{fPR_aKHhV{ zK4}g0*hTb>*s<~mb0h++74IHdIsyBnx7N_py!Y?BfmhsgV+kNcue0BLm(?lz}1 zi6`@^Lo_1N`u?dJE$lYHLV|}^ErQEpnGU&C!`8elDivf+ z89Bg)?VH$#4}ll(psWN|%mgk>7K@@3W_im*Oy;`nPQ3+qC`82fq(%`GzcOS0B;mOH zi}{nwFmxSOlpf=Vt>Y%<52rx}{=}iwhCq_l`8d8^{Cxdqo(16GM6YGPAkYAk`A?F& zz2<-2NjX@Li7jT3&_4vW_CqA7+!^Jo^MdU)byRS;9gAZ!IqmI7D`7*5k9g5L;Y|ZD z@VCIldC;n101zAgJZzDH2GS*U0>V8X7%m@C@(YxcMl)bmAYSZhFIDHJKsSU9_?afR z4-wVApkvC4GWcacgA=O%vu`+x3q~i@;N!q^Ai82WF)Txa@#ts1olKSGipEXSB+DyP zviH<`Gw?bT0+Os?7oby0h2u>NYO4Z5;xy%>kS5xf4`BDA35qB&J-;)vrvlmwY*O5a z4jfX8vV8!bA2zpwNO>lf3k*nrX$c5~z7c@%d(%U^U$rx%2&kZ=F}7PI_7bCTCO zYb8@LyS(MOKCmR61f!E%eR1EdvMY8egs0K8j0N(&) za=`r4JU!~IG;AhEcKI!r-8zOB3U`Ki^+SMEn>0tmc4R;i8VswRA6fQRp} z()F9so^=xb91O)P9J1E+Q4P%Z#dif|_8!nYs4@f4_AjjgY>gQdwe|Cj036@cqf0=f zO~$bK&8c0M5ApBKFR3f3=f>0Qri)uI(!A@kL1`-l(ScxIYizYC*9HpZh1T(?@pBekp4qTZOn2 z4Sqx)>D~df-V#mQNA5jf7T<-xr4@+|q%l6!6BGl^tgxmtVGE{5LztfD+VAimZovc> z-otUfP*E8)s(?V!la4OYRtm~O6bj*oU1SnhHK@fY9d@Ho1b+-6UmT&Bs;TWfdW76Q zRPS7&Q4W(*9VhRaa6oGt1R{DLsN97Y^fUwTx?mb2fGrOXU#0|?e014LkrvQo)3Xk$ z@k35Kx^l8PG3@m^(dtk)vS0;)H^u{Rn)oUz?RwUO6KO(Ul0Gs^J1qr7(40>&z5T9p zv!_M-xd>hn;D>`0(8Om2U8Q&J5DgfMbf~i#w;`cA&Gdl=P#dff>-yhl;jz#nKriYF$iwKbG{-ORk z83ghuLw?LyQK>kF>}p?1{aLb@hhAscAEK|x^8yH32>kwgKuVP#QBPkZ>?n9uv z4W0XYRpVMMFgrtJ2+oi=3?Kw}_D$52+r#jQFV6+LzTnWpo)gs80);-Ut7CCJd+xbG zT$|jKQl08{)D7T73WLWx53BQj_SET$98&i4Dzjqe_4-8uPB)c3c`=!|YwVHHRfj8= z8NPyapXyZTA~be3++&T!2AY%VEMgfKem%oBIpoA;W9z!#3SAediz za8e^m_zBu>>*~sX0=-)ttF>J{J)Q7`p5IWW!sx02Deh;jV+vpRr)Md0F*+WfT`59Za zXASvPQWl4sRT-6t6M-mmjojE^w!Y@W#AB8yR9>jf;87W&TV1g%_URK^H)Xz62}&!#pnoD&K^Q z@s#0`Tb+px^%13)ZwoX7EkMlDD;OwT$^Te$JxmI+zDz1R z&)eTMvbF4UU~B6O?((qSmCkk-kXHXD?Ly*v71onfs?o+^Z}T40gHA+$z6cNjXiT`? zS*XH-{pY*TOQa`%_kfYbpD2Vs1wL4SqP@5h#6tge@upkj%{pN`xqX063mE{RoR7CT zjM9VW0wl|vd6|WY7o-pDn`1?#t2Wh^ieexKlM}MvjS@U^+^cA?~mYwV~ zD5v|XDBwr@c}s`h!?DD$c>WI>!G(PU+ySdG-9OynKEwQD=|WY+E#S_j9-;hwXCHn+ zk1=P*r~2{6{0*q$pZ+tHpL(FJraE{n_Yp|;vc1In3nd&tYaF4!-f5nDL&F8|D5t{o zrM|GKe8#TH)LwC)fos7LG)bn%$a*_!1*eqzTz$KQ9XTz`P;P^3^W*JHhsmQ>bF3sQ zEC$^}A&vX>(j(@S{&w9&z9D**GVqgjwLFYGOaAkSu}t4_T>A;A@pdpAjNgWQ=GB(I8FS8k$dYpe%!|kIuRc+m+VYa zf58zJA*nvpM$qA>?(w;&DmB~kos7RG21uVZ+<`Q#7Sw9t4i`$i<3kkMg(`ht{?Z+E z_7*`~Wg*;hN39ATb@hC8`*HU!{ITjC=}#L3DCU6%ea@H$|LeZFU$qje`3o@OFbKco z*aj3&-3d%(2}po3OWdx_E;B$^Lxgp4;k_dRX>9$$#sxdDNle;hFE{8TUII_@EIx#2 zVjvXigDivz8VTnCvU7u6zAP2?LqCME=E1NgB}bQ~{@O`DaJOFYq{NKMC2g&6UX zAIW|``C_v=@bfHgPhNs8&yW!GQ^GLIN#Xn;-U&y6Dy)L!PZIiepYDs(;=ZruLx`^4 zz)+i+rKJO^|5GN1o6Df;XPjcDq-kM|wX1bbk~P}`wabXWtI*ARcx;(^B4TFPo?T2Y z*!~a-V}OSc(ZAjIIE&)nL#oFXWAI4}EC>q=%a2>-Je?n&RC4=n?ksEOF~A|=_suTK z(3ZD}{h=dhuChwd|UsVyW&$eL%kx|9MTGANRy&EvRV z&6_$nt3{;QHIn?i&w;BbI^XGNY||J`BjwV3>i%mNY2|C$jSWyeM}R~; z06$}g=NfTz$@5EWY#`5SH~CH!K?swASxBB(_Y;msYo4*FHut&bLPMte+i}rAlux7= zoZ2=Bsvd3Umq)R~3EVu&c^c#O@DRNoXrmMmCBZa-I6O$({f_}yJ3Es6v?)BA%|D^Y z^+dmHcdp}1Wj5tks|!#p9G>-hsmaU|C|XB^g`w@2I4)|*1FJ$$nR<7a^`hyuW?;!M zy#)FIhk33+j(6|TqF6YX`%V#{=|~G;t)#TeICkX7L^HQIS54<302ROg?>Zr+)Ff?8vhwsU%UI*q4Njly(?44}b`u|XU@&nRho|oHvh%E5FjoMP z7pAy7#8Kq0YFgbYMS&x7Qz(y?HVwkuq_omt9EJL(g`ya}(sIa%Jjx8s6CWhivl~bO zeMSQB%iHR5ax`D9&ONLiyz+%EV<6wJd~+5j<+{q=Ke+1Y)SZ`3e)4(d4huT@p(6Hq zYHAUmwzR2g?KfjPh3BY^o7qXG@4{5Loep%SgvBywCa64*pD}1%MK0#Q1F8X&A;rM$ zuHet$OvPRZ1V>zv^xnN(*2A2+GYMM<^VvaTcuwhkEz5!Pk=^xm(xaQJqgieV?FV1E zs9jx^6obL1dZ|6JA%xv9Q`tpVW@nHz?qvlHr??bmL1k zj^TpfX3MEPefl)yIgMf{0z@E?WL#U1hU)Uz>c#uJ_5x$*e8ZMQjy~RlzqDWVl~y)U zo*#Sa#qjK%Q1G2`X9KB*h`b*QT3Z)zW}$O-TwGisHwU+a7w@--AhIa6KgexeNED{c zm9C0@L=CPRI@o{z>gcvVG#!5TdMZooE6Cg{#gjMTQKUXZmMqL&a`bSqO0zVbCl4+u zx{}tX%CZ#oCEnIQyMv5_l7jyFHjD(d$RJ)eK) z_em4;V{iWKIlGH%xXeQNZstRWt`Ir*Bb(2D5|L`wsVC1NUI?x zK9LY~VCJoF(3}Hfj#5RG!YIXuBNfX268oWbUlZST#iR`Si~7poU;My1vJ&8y#_&W1 zsLtyufp!SXCy&<%>OBv(UJRhuV+Ym^bGrI;>l3kHcb~6?z0kZGUd&HoqTBI-1Y+dr zM!!f@h}`-=s6xs!!iV+Pj6_^!nu&U*{ZjI)_b>>gn@_7Hn;bWCxbH>{b>qn7ZnVGm zE7`b-5ZkY5M5{fy_0R?mfj0cKXsICQ?cU8Wck^{5B*-`MNc)HXf0O#$uFK27d=1j4 zUYe~}lG%H6&@&th+s|y5oJa&e-Vd=hy<{hB+i^e;`lTq9Gv~5gFsNO=bCoBp>rApI@^XG#Zdm&W$voNd6P!i4Tg_2KuSZPb>x3y}-4`w#2! z-szjBiOI)Fr_yS60^Eeq4=s@l`)L?@1a5a>?#zvAR4`*S&cw7Y2CBHJ8{2QBn5(!8 zBy?~cKMU=Hd*U)n=sZ zG0)Q%cc-5xN+lx=KsY^&Rx=>DZ%9E*MbEEu@v;_r-0^Johy~qF@m8elNpjO0D1I`t zQU2EaJqVMb>4geRI3L=5x+kE)=tL!VV_-Ld=1Gez*~BMNw8kQ{ zy%@*F`9sw;GKpt@8Fagp!yhuW@o!zwAOdh>RHH>SV0sk5 z72P}wpeh4xFO?0tvcpED=#G{%-Q9ss~;c>kOn zFX-FIX_5RQ0WU9Oag59E-u~d6V?llichjcM?S7Nl72Q5^eF$T>=c8vz#KoixYHDhN z02L{a0lW2;1Q_f@TnA3COfdmZao7kw?FaK3&wKf~xwtyQ`1jYn>`-%Fk6x6|Ibl@H zGD#6?3FpjC=R27$U96*bEm!bgPCibE97P1cD~Un#PR#Y&eR#<7Rz6neJEQ*oem*O% z?6m+)MTVzDWH_zR*Xf+{kKiU?&_ zfL2DA2Yln6Os>^thqY^F*yHicQsc^oScK3{@o`QP7VOu@P}*41uBWRgj{MGN#wFW1 zZw=6-FRxG^bQMsYk|#c8S@l$OQ8BDg#WD3Tfoa6?SjD|>jN_QILqj?%vbE1q4(~yH zh?to9*%>%}r0`dc*A#^VEaw?KV^mNPs*?HXqQ?sn1z1D=3zhk*!XBP z?Di_O587lh`lihCDrWOV{7uU6zf=XQ%EN}VLW4>1iC5QA!Z7l3`E=oldH{LqV5hYK za=QMEw)?jEdDIe{vcG180wePqA`1ZA{d~!lHWn}0!l8ppdC0RB04BLE7x24XCoUFU zU36por7kFn_q(}ADAmwPvTEU*hdH9lUY-a-1+_4zaQ1<&xU5ek`57BP`EM@6~R2LrmZiS50G_-Y^|LWEj z#Y$FUJeh;@Hd>qac+n+@g#o4Bq?_`8>sTlwO^AX*z@ zxpG_HhT=!Dq;{iX(7@U^M!lWzX`~cFX*?IpZdH9O1l4CsB?san9 zs>EOMi)MR3Kg_q)O7dNPOXSFj$zS}oy|d3Q<96Y$i~FdImCIO$(HZp@*$a>j-JP#j zrmk{5x6B$lTI~f4hBX-JsQT4yB47`zIM!aSCB@2BPf}$)KtyD>n6W$VXXjqK=&IOS zGEZ!2X-P{f7d3UT+-a__2@ZmQiB4 zT?W++XEWPirkgEhm(h7XG%uv|-`&nsnGw3%(O#ypk+bb^tOc|Ni=9TZ)5mXmwhndN zXL-g@KP=`v(lM5~jtNkv%-|z*Kh1}D7|B)i(o9uG*?C8h?32?|@sqD|?Rvvby6LMc zo06|~VglqnZjRr(FY?u|G@i=vt8)n~h-p$e|2L<(_*H!dD>())w;D{tbh#(ggJAom zdnk|7#jk)Oi%BpN1Zbf7k!#Pcw!jm-$tDmTW&wDZI62e}dvkEmyj@>n4lkYKw5_ zDF5w2?Yf7~tj9`MBzR{xzo0l@(^*hMRI9SG(pT#NBv&l|DYAg@z@R8A!;TF^95eVX z>b>@jrRJ}=tJKqQH?ytB%AcPREe;QZEu}NRm1yUOr>vn2y`hLq^_pK>WC;Re6l4iT zE3-{!b{3np`9D+TH}uKShtqwj(`yfc9ko4E`RPX1aX7)IU~gx-)l;+LfV4)6+Jkhb z9HOOZ-Hrp!Tyk`Av8?gqlO#>scOHCH@4%}#Dn_N3UtC-q)M)P5r~)8vZEyNLEioEj z=*JLYoh5x=`qKXy7I5U!-q_5LdO8k&{$$&}IZAP7@_I8k>*;a@eGE_blc2xxEdnm; zc;wKCk}BZ(sjDaH%l9W1UG}jIiuqjmKYEsLIoa*1TElbP#{%eQ)d+H9VNh-5S?D#f zpr;Hndzlo-N){hYWkF)o zVcZvg7;43n75A=O_1l5F3{RPDM+klkE^1&*?rHROVf`~6B*>W0V90cySsao z3;t@bm=9?h0M&tAL1&n_{{~gO+j8k?)Bduq@ehM>e{WDp?|sjNnVS|wcHXOviGhJ& zxmg4E-a+@hW5)$&Wfen7oTbu8z5StN4)$FJhtKY(-J_!umfThA4DeKVvQ^;1?yUFhsRMlQi(jiU6Fm4c=BAJ&0_ZK0v>&?Bu1^?A(V}fCUWT0 zCuHZ(0AUs1(Jm$(Wmw1IN?i^+-PU+89{xJNuar6VUl9|Wra-r0zhihN+crxs%{E)N zKEBPA5s|=Gy?-XE_<~T*tRvjaZn)`UQTRSUO^yO5XH^Iz9V;$ zPMRi>2QE#3J{~FS9OPuenE5MpN5n;H{C4Z2viV9qE2i!w%676zzQ1WppPpZztSKJR9|&+Ln0Xn@+egkX4d(kmp0U?z}A!} zHZ=4ZI~USUGH5q_A@+rs7HN=GmX%qXn}Z}buo$JL<2}unNmdhfubpRKDy&ju<Ma z`!L}=d!5Bxt``jKP&*~@AuCIz!sgSxY9=q3e&Pbtv$&3T368G4y*{VS^7r>Y+ncX< zJqQ0d!bbQj;BcwA$sz%O3y3; zrzyZeaZiYTzB@(`zvmQ}F9IhUd3=o7k5d19+|=CM?PR!U0#!C^tgX$r$Ui$KAJmtF zm);1zGmG41pw_Zooi-~j1o^74u(IXBYiBbq|Ix4S-Bc}k0;hN39{SU3H@Sg36A}`t z=Ez1MkCg+@!jkDGFkx)xiaezP?2Mu!i`%QyKTct~(X}R}4lOL+2IW8~e_+CXWxwIL zIP}@RvV_H)Q=K)j2iP-0#NWJDfPUeuIPEI*Ls>lcV{prB;#(QWFXdr7k)WEzr2f6F zdf1RDc){PLfdl8hFJH6$S8{ge~hY2M%Hdagv!2^y=o0x!pI zKJ{~*=wRUYe$4YfET^3J-6;RA3qT#=Lya*V50q(*}6K}^p5Q*%YODf z*EyMWW^6+K3a&HMs3siE($VdqfL(l#@%!H)It8Fa*f!w-51t?kk5yJzKh;2)$pE{! z_=kEk%2_3Dj$Hn_5&|u<2{E=m{2u|c`vBMHoa%yBnIxg7{F9gvNgL-UjG(|ThoJna z#NDD3dZi6LNJGCZ+s)(NZvdw$I|6a_#G8^k5-<=hN}V&Fpa1z9^7V0{vOb; zvkIo--lB4Ga8QlLmS?-=`*Kra`gG!lCk_m}?YtiYH##;^Q9j<3iG$_uVM9g1i}+I& z0A6}q|DLO(*I_X=YXJ``zGa2JD5;0D!b7%zYym$DmDkb8-7U{;8D; zEP@VKBmn}A672<%5QR(-%S|Qq7F;Jp#%bvkx<{n=FC#cK9yB`xI-}@mYp1roC}NWR z^j2x|k~AQeM^;^9{0Xum?ueL0$xC;92$|NmU|*w(X%NMGtW1wu0Ll0`d;LJ1ORVs3 zhyu0+u zREBj8CG_2=Y+9sedo{E^1s-cJ3O0P`xCi+Ki(LE;yO`n;78W@luT4)#kC=5bXnY%b zLfLqGmXatU=(qgslqjH&`>iqdLBu7isMzCaZDtsz6aA{s@hGt)ZnfiM{5-v{PHyfy zq>*J^WG^Le>KZTKPpyJk601owF0C{W5?a&S6+UI}I0dAC3pHM*c1VN~o!X{qBg;#6 zUu(qj&EYSwTQ+D@lOM_wAS+7PJ9r!ni`Hy8Z6rG`ke5j1(`(c#t9h$(KR{{a^w`GkCX3FcBh?A{~fu(HhvBn>G*d5l^OAd({X3YG6_ea zQwXRa`85k`NvsN#2RkCSi;CH1}2=g=iDc>`~m48+=vMa*q57= zj6@(=za{bY0r5q#v8!(PiTOu_2oTd2V-e#h^EoG|D1TkK9kv_LaWh^SDu{yU+OJQ7 z@#G>%{@H@9ShOcG>K`!w4fw!fG&C=@ev2jucWT81J7FOUn+~T1uMpz}#XNL;OM(#2 zLdXWT?_rOR5wr7(j19L7F;Ki9Lo#UXnd^`x>NeH+)p>4iZlNzN(JoJa`q|-$W~O8m z%zC&FUZKOOgk?XIwHjqQ5k^YR@L zA#E{vzw|-MN0w@c`~gG&2e=_P%r}hK)a_s1y#MjXIICu{zLd7$rByffQ-L4|a1CDk z%6T500w0)xVQe4gxWBXGX1Wnu`cQ!P(SaaW2&vrQu;Sh>!#u9(YaZQrH<~{dwyOt} z2IUHN-fv+bM<07yQ3nsXd6ry38PD^88CcjXunJv96?AlTl*Jl&O7}z%YtA3#pMui~ z6d_{@nOEJxkR7TpMPL#27;v`2dq`6zH*SQ z_sII0VB2=b@Y9HvlP@EJ%eGjYF4&_PoKW5t3%z=>1QG)fB<26y!A5r7MZs_vC)~eB zn^B_!l!t3W;224g`XxqQs~lb40CfPW*JWyHdFDLI#Ks?T01G<Oz`c5TFcpo<%7&K$PV>=hTr@JRi+U=XcloU7tUcA9*d_Q;DCTM zRM^z-_;SyxN+3D}1L0HTWvz2Jra=!;Dm%j;i|Mg5RMMe>iR&MM|FA?VYwAsAO&V}$ z^ci`r_=tLB!0L(^)#~NHghZ~gWITpokdO)?jASqM4ShsJpGbe6n+GCe2i)f*f2A{? zJG?Xf(2;4#|8D|+}uk7R|p2FZV_q8Vq-{C<=%&dM(Sn}xUmyOHzqkjv3muq#EJX;PkVc)poHpVr`;^drv3S!d*N`|P_?y)v zVxA?)hh;@S7q{rUK11MkIWe)%nXR6OUiJ{AsBbz`*Cex8O8h_Au{%ms3k}AK!}4dL zF_ack(k_Is^!CZzf$UV4*S%rY?GbEwGowTrnK@XjQ_F%bslNv)CA&IB7U%$-ZpfHl zROdfY_gFn#@rb8w;^S^*Puu_`g2>C9KMox%W;`vO4tp@*@`2ZpL zH3rZi&QO+r$G@gU~1ulv?1S7Ikj{J+xRyXb&X1#@zQNVlcwJto#8(lh zy%%oXDEp2Nm0{j1E6>d)S(GK@-{cJ$gGZxGvBwqjU9aWRx5lC7A%zW{QC?obpi~6U z3>|Xt-fnUptp6buTqsoCQ+ z6B{CnY4)?8_1&0zJyrikOoVmT?5VuszU3%uQ(v|48rEr3qXW|Kaw7pG*qvy?PgQF& zxV^KFRtf(Qmus@=<#@hp6}uv+Q8hwZipPtg%E!VLDOipqXUVf}>$(fOT<%D>km2b_ zqGg<-gz)A{TDi-6>6#MrMh zh+g93IKl0upjhC~gRS$+|K*y0)(3oA?Ee4l_Wpl-6t&l&nw7P0Z%YkSCU^z^7QkEq zp&vW`{?qXA*RAwOQc)9$cMQ?fa&?U-nY|Tm_z(!Pek0{8);{`q`on&2Z};kFCd!lf zBeP3GL*4?C=u-YVR10#&_!tD$wB;HG^>s?a689kAj#YAMp5jeSj#uSt(EsS_5_=RJ z{QdN7VRoi&>PlwzESWpi+)6uNn~|}!Bu&?WqJs0M4IBu!f!D{vU0o-qt81$1_x2ar z!29*C_7oM)>jRW1esQaLX=(8D^92^76$`rFR77||H792a1q`;0joWrCM2sWFrPc2& z%#{t^N1vSUj|`50o8dT;%c*HX5m05tG&38wbv1Hu@VZfx>*HkMW#vNpv$fq_N&6vrW2NqHgFgK7o~C92 zSy`d1lj@{1b3*^8PXSn{?PPY!UMJ`qX(to9AFIRsMkZAdf5y$Gdzw$cfBr0LY|(|| ztfP~fxvI`kQVFVkvM7p1X(fy1)#NLq)I;b(u|MhFMs7Wvu(XuGcrQ886&Fsk;P`oPWr6l*DtCK7LWPo!G*xGNsii@WFb*_3#*vh z=FV1ckLDP6l%BeMlNm$0V0xeIF`~v@6W=zf9aUVcXtQk~fq;-bx6vL*rAZ;I>BcV8 z{j(=Nzs|*oJLyZ0!mxdclu^YBaqY^pO{>VF$C&Sy$@#xFdn(s@Nik!9{UeyuiNZ^} zw1aYeb{~z$S)^iv$%QX9g|cw_^G9D^-4P4 zE?wk%c+l2~7^N8sarCrQFxu<#Y968YeiD5J8kN0eSjEvHBP`#Tv-1m5#7|v^)06BT z1ErA)x6fCRkBl+fR@VYT6>qt`QNX&ctWAO?Z%cs}^iWLy`|U`!nQ)dZydQ_sT3^KrA{JeTEzvr}Yp+I(7|lL-LK@0Knc(ito|Crwg->=aOwLQ6*_D zL9`f=U;3mKNK8cH9(`ByCIpLCfWB}1U2CsS?@=AAmpN|F4FX*P?4fi_pu zd~;hp>E7LKjC&laBc+yu=oMJ{;DxA&NFhu2#%q@7iMbg0#c&Hr*K4`?p4hh=Ho+Y#1z13_-ijZ)o#UETDpW!=^1 zO;3gTwq6(4{VKFN-BP!#c!VcxS^F_ZjXeg{mDTjsiY9-#q zWU|$aG`-+&d>uC0RFADrDBFpWJh+uWh$y|!#>(3|^V#-CTX`Addyntmzw>Q6Nve=3 zfWlK1yY#A9I-M=2d$FNd52^+@x^*sTz!I2BEbSagz98iLW-=^Y&7EtH@dVNqR@^O% z{F?DZdX(IcGCnbBxpF@Yi}M@vM1FBf`dG^|6d!`$ETJuAUvxgVelN?P|b01G--XRV@N9-Myux&6F42Gw@FG84UEX*J}gc_P;!(tW=>ikOqFTm+&jUK@wId=EciKreC9${C^`fj|N&aJmxPYSmtw-@iAFggEhPsA-ue zm-Qr?r0%rDS}VUhm1-rg#wS*kOD7gctX;XG$?93Xp`h3e(V;>u9pDhcp-coqoHn$0 z&#YZda3M^8#n7qz&KP)fU_dn#!FfF^HrKX!ld%Q0)V)E+^g;v{CDh4T3R^mLtwy`u z6P;2wKYP9v5E9#9wHim3uroar}OkXwx2DPTh-8KCwGd_0s&J{2s)styIw$I0cLGTHw;3+RSa1oBRR5!(x1Gb?)l>~S!x zVJ{to>B6|Vu*Ywx$4%i4wg`bJrj`(h0@&L^cEB6}lue&o3+5E_-+a444$Es?2wE8#W zW>(sDw(HT^VHC#BcQ=<=vlge(KuiIG!N+(6>4rg$;w=CwmOYFWM6C9zxtD?YQ18`L zj`+sX%I%Ye{5m83`BNM(QZoXQ=;?HLUcL-EfCBQ-aKuH!28)p{h32&NuV-%?2sBi- zPmCCL(ipEPfzcXsG>zkGl#}55v9MsDz3>!sa=x(+Kn-9!XU~e%U!Y|1K;VqF;WJzw zsO*OT@vEy>Mn``U0|CdW$)G(V0G5QH1#Nl7@Iick9SSdtmkt$Hn8^K~OM+ikwMe}Q zRr{6Bd4bZO(2e#M^@@oO0HLpKi#OoreSxG*QDJDZ;dP?jk^43r60_YBYV`wI00+p= z=0e+w;$Tm#_ZI$h&INoJsA4^sosE+SI0Vp;s@*oOv=36D>NwE{p{?aWh`(ai)C23`QLCg|%YCfXYt&uJvxawaGRoAi&e1v}$cDEWEZO0(}n8cM5?^G0Vk zlD=Aq838aHB$qJfILuDbS(ZA{Nao&sUDaE+or0AhkqI*QaW zX&ufDe4imG1DE7wcQqqjnIj=Gtx_-}RjJHorYUOmrSg-WWE+6XiuQ3In=+^9aic%C zT%8hiV5{;m?5v`#I_1zhQq5WvjBvTv^0%lbFDy{l#SCz{gB1hP=~4%|z2{(G^z

ZNOgHSVOYV4rq^-%|J{H^m;oiP8LPdc|M zTqU4iWHA1RnD*?AZZtv*VNsM*p>0PK(S%bmad#$-PKSN~cUNSAnG{;pF{XGI%a7wD zUmh|(h4S#dH^=x>Nv6z)6S(r6Ix=_{ASQ(e0TiOFLpnn>)f0|j)03=Ut4A1f=EiJR zt95~v%daR|2QZJb*+#69$a+OsG+UbJ>!v6tsiv%dRZery$5^^VmYVs^5pv-UJrmiB zZLqfU(m6qgQ&xN%R$Nr%UEq2p$aZ38rQ`*c@s+ zVIkiK{Sx0Y0~v~Vj5M^~kiD0o(iPQnynp8L;8WSMXt~5X;ibIBM$$etC()}%Wm~CB zg!sg%7xI0*%JUd>!Lz)vkpyHU10p5My^HfAN68W6k`vRfJhGWzcBZ?qSXprP4JEQ& z$+JrZ9^dHeHzzP{H|lybQzXgGB^?E_eFj9_;8ExqDZsThwKE&t!lVKhoEk} zxngRvV6$oh|2`|UL{E}|S$eEa8-15SI@GeQNBc8SfS1H`%FM2+xzZXEUH!7bG8Btt zOj6@S3XXw$@a0}+Qf|aR0rMn8e4Fv6-Q`a5F`GBVkN5K%xn7YOV7B2zid2zbUd){* zg)VVtP$bCRZzW_rx&;|Y{O$c-12s$B7ZuKpaJHCKAlQw&gZTipOmK~7jj`K9j9A)x zAEEn5iyRLqyER_Z#$c^RhNIk{>gwiZ(D8y$ZUvM7`W&df=?amUl2HLHnF)6&XDt$e z3AoXIBJD8Ztdj*N)B!q1Xq;5k3RruY={Z|gIFumS*NFyr_7%Qprd*^t+d~Q*$ z5_Px1!+6Y1r$6Bjf!cRcO`l^N2)pneQh>b)0Fk4NDZ|Ps$(m_cUslJd(?R;YQRU+F z^Ob|GHPi4pEt~o5T%?~57i&Gqq#wEa99QZ%QJ@jUVKhsf9q4|neUZ4B~DVL!n zGcq5DJr`bAsMswyFy)rR6lBG1{4vV#k6#ST+h|Q*oJ}tFd}B2sKNaql@<9j+wIgJ z(CYyj7g0E8mxm8e=I>pcPUp3bmw*x z^V*t=?rA2eObywby&E;w;exr{5+Y1&hX+N*HQ$!jh>WVC>_%bjgh`+SJwehJcAnTe zEGf9{EIA+Wba>+B!)J>ot1K>I2b9rwEvP>nTya}aT->O$Mh(VS>Z=kj8jKyPOA?AR zDyycfziZ=0??(BemI&DiTILv;x8K{*-ZrPtsQR56qpTK^QWN<4Q9+kO;c+;iw`q<=GMo*B)k&(J@FqQ^ zWNSk*TXVFv8yo7rj}jS}sS-qrb~?>0vp-`X#o?zeT|H@BJhR}fGYx5=b7uP-7fyJ7 z3AUu-yZ_NZn{#MCL0tbAAe%7F5vmB!RD;(5QcP&3Fnk}t5x^ei-r(CmpuUFTjwPlO zk)jl9-fvWKhexvTkyxcxqf-!)qpfF73lR;H;rY>6nPzw0WL1Q-WgfS}Zq}SYVY($< zaLRgMjf?EoQ=V4Z144dSBx4=Y^w|}W8ps~qZ3jM@IUBjFi7Jo?1iHZh3`}r?o6J?K z?gV2f7MiuQZCw~`8Pf0lHv9C-zry+z<$}ak4hUQQHYlv9NSH-5J@eX9Qw2wM)=7uMRj#oV3!eNfT!)3+yVOO3pHGn zP}@Q$LJl4X#$I02#}BUVb;C$@NK|ibx*%d)A}o?!#GCmUv%~Jj+2%3k1f}i)MLf@` zTxd$(2DEO0ycS{roCSbCc2VA`XGFdtJr4sS5+RL9deTZz2cRWA7lj zUn2Qe;`}>(%|OShEq3`nhWbTK4B=OBO;h$#{PWUjaP2>Wulomi8*>kxBCOd=-&g1j5P#H<6axZ>0#Xc+!CoTP@#vSExzu72loVVgKS5nsGoGrO7@sEg}jduSH0h5{a4b#|P>G)9+T zd|{IgE4g`1v=aZ{6wANv`*6I^v@h()No(0oJUd^!WLIq);}y-6tA zoZ>W%a%+#etAMY-Pp%57N|Uc}Er-Dab~9TKemfW0?{^ibu@?L*0M9fV$%v;%BJEsh zg;|GG^PUlzEYSVs|2UqPPHwldzZ6#$`(Z?f=WhpNUyM}t(N6bYXKDdLRxwA4GJC}w zSyO;eJiEnOG0=PIp2pw0x`nS^u=LWfivbUEf&M7U)mxS_L8P&{*8xb^!0tmX$Yu&r zfN8(rFqij6e4x=i$*T*yXi71@Oh~Ww{M0iScR8jd zl>D$3mCs2L)acI9!Fc~^8Rwom4#(`n%M|;|r-=ez4L8EzE+lEzHAUycYi13Cd--DQ zmF~&Ed3|ZLLuP&JtQ$PHM86ja;Q^&xa22s}16QYELEov?+vjIaNi`BEdYPvqD^au3 z3_mw3t-Uled%0!UoJY_C0(9RriVZ-&{^$VR#`sM&+9fKBx{BPj_+2(k9Ua9-JG8onoQrt1^I+nNYrQjId4Y zUg*Bi*r_hGbtK5SLjTziSEoJLCzV}d1~VRpeN5^~R7A}+EhdbA(7-^wy9%0qfQYMH zusLd%HHXM+^NIff$wW;d(3djzI#w7RvxQ@Frpgd?S@OYg!7tu=7Dv);Sj1s!Xn zjXv-**8FAv*B?*YpD6btC7umJ6zfZ?Yd;xFS50!PSq5vic_b_5b1JBd#}o0RD|o|F zYk#kf95e{PFi6yEyx+lOC#-EVsEOu8$akgYFco#gF+p>ZddV2eVa_^K(*+g5)`vVu zuVgb{%y;59#0ZCb=+megj$9UlN1O#C* z7{(vFJ~mW74y6j3r<;UPVy&u#OW2=-(%M3H?5e8RI$Ec0EvMCNAX2*yl(ZrOXZr-+x6%l&pAN+O~e#dr_peMig-;-P)I*Y zkhGuP!xPJ6hs~Z=cEB#cVYA<&; z`IT?Ji#IA7f+`er{;Ndh(y~!%s#`iqp)TroRXDD_aCELj-cxz;b;I%LZ~dcQ`ssUu zjFV1#Q4TrZHm7Dna#vZwVN~p{p6JtLYuaf0qDU_ZTnOf9yKbL{rAApJy07=;l{Q^4 zZb&-wYqz+No@q#B7i;QRoM~AI4kpyd&9WbI{3`#RMm36Wa9WG=r0?uI@;VFVb59$B zrta)dZZPpCOXF|aGLp`WUu|f|4%5TcpSS38rlSfVJ4nCVRqrpu(*bXM18}Sg@BE$y zk%ApHkIKM!R&qLj$k<9%DhB_|2U-&clB2Zp=llXf374h4T?oZGH~xZ$4?9G$kJ@nw z*GYfX8L1Yj4;??jjXuDZ2gv*LG=7^=f8zVUq{{l~@a14MsD*;j8@B0}IV*#ICbvs6!NAI8WFpbh3OX(>p6PbL${F$M*h zbSZTADko6<-K#Cz*q;TT+!0-x!ifBlpa3X{?~fG3LYjvC;!BXfBLnMVB-HmWA42)a zhYYOW;}R^ByIquhddM3^OI_Fa1-`|1#c#jnU7K6(SQ2ojllcuF^~a!jGJK2QEDVQ+ zeLMkodZWnI-93#J7A(qwZX_!+ccdOMh@TVjgjVy0cmM3CeoG?Dk|&kUI$#I7;tBcX zmM=G5iI%TTtyZV0f7c&8awAk8_Ujh+yqKZsgdwOh;k(;br_^>#>=^e6WG#g zdCzmHD6rYXXXo2BJSf(}=xdDcXWxQc&ee!00`DPqp>=7YhhjEUymsvo9r+YfNfhn# zvU;2rb<@GlbRutVzvp4Ce8C#?73}Z76T-*vbhfq0{*@>O*mQ??DPA%gT%e8$8eZJT0oC^{2MnUsXj>G6<(gHZ6r=kf0uAHuAg9w+75I0ALFp5g_QZh7 zlakuhPd}^TT$5WyyYOE!Z0d2U!-tL0T0#^^S-p$N~F9d=1=>W z;i>vY$#`E3(B1d;oWUhUMS9t$h3k1V_^c6pxlV?J_0fhbF0g zQ;c*8IJ}Incq!X*$PoQT&@s)B{N*6{(6;Uu1mhWXPt)_NDW&J8M9Mim)1DyFhL}Di=rnzYI9eJ8(H9lp# zrS%Wlp+u@R4zX*iZwTPx5d#BT*BwW;yFL=iMSr72rnLs5=Zl zz2~2ZmJ8E?eX1^vt$dy_b-4bS2r*hAufKG@saI zRK5MBEt|SriMoxbowAFfO7Kyuk~gn61tjUMH|5-$M8h`g!3N2GaU#MwM3`B37SmOo zaqNX}SwmI0r(!jB&+E?(xn!-#h|kusqXt3l6rCK`K+e@Fx9znoxQq`bf7-OXvDJSk zc~cL>L}4yB0@*8RE1I%69}p=pcw^CHK51#QEgm#fH)%Zo{KfyOhg#wp|9d3APLGYu zu*xiALcpj^^C@(wbKO{L<#m;MY0N}wR#_zlikqWy>XNp!G4HZd5&A$oLrK&kjm#xS^#904%7r!JQcf%LN2T=v#&{B%hpA33*@*U z=b8W?6vTkfqU%La+Uul);1k>;f^)6xTC%JzJEdr#rxM{PNG16BV&qm{G_Ws!K#DLY zMfhZb<4rvGc7<**5ZWu1PtpJ#mZOENvy1{HypZj+#*057;7LO@bCp?qqcDrx_0tG&GSd%*9gBoZWd|cFr){F$v zcej1AVD0UW6(xugVLC^e(&t7)-^kSfTxBK3-c0_&a3{o#0y4*7j@H8Pir9HD8NyKfGFC z7VlcciHF3KAx{4?7sVN`SQK08#L4_EW^H9<_#p(RbdkonUnm);yM%mwW6=$9sBL*4 zj#qgMkD#h{W2p6J8BV~Cw;>RxCWQ$s)}&_T4VV(aFmSXD+vXIS$JmaXdd|1U1lBZ- z(}s)`^3EXCgyZ(Qag?&%tfD)E-Wqo;F1Z=fzs={YhkoI#X-mvQs^}>>sa)i~%O)|P zK9heww0nUe*V=45#%7hKHF;n~8P2#?S*2+uBqgp&w`S7I{CtblDXVwot~{?Oogw7n zs9GA~vuuZp+oxXSpA@D5h6#qrw>VHdi{8YlZqHmo8^+x7o|uMM@{BK7sbCkxjn~Dt z3=txxqdq5NcXL1+#<|9Qe|(V4_d;{fA{Xkjt7!No=Ev&rk_jmR1!M1eDofS4ljfcP zEDqK|TzJ)|Dx~@2Km|l|M8S?PK%qNSv_tG9Nahn3U7W>M%>DC1tAe<+jm7+ z>ozN5RRki@bjGod$W%2ED{6pLe7Mz(5M;XDK5|hFyS{M0@J1vIRWMgvo_7yk;jDFG zFxz#>i^QU#g_ZPfI~SujmB6*TigW2Q>Sf2*UIhPO8d4865qo}afADQ$vLcx0ZDn=T zhDv3vTy@klaZ%Ls1uv=wFhvUN6KVV%+GnciuZ5QKgQW6-OYa}J8TtsPLwCe z9#UZDfmvjGejrk0YfTq-qyQg_UfWbV9cIl!Cfo<<;n|@StF3t>!Ep#XxOVHpnvz?U z_j%V|#P~YM5ro#DWTS&o$0rVaMl%;@3X;g(G??hR4wAlyVkyfHV6pS1GK3FJaz_zg3CYAYG>c}$bsA)5zqr}X$G{pAYrJ2Dj zAfjhhOMSlA_VOz$xztxOmxBnaoS#o7@gV7*?BPNmmG7oR@sG(owK;SwqlSiD?CZhrW)nX+CQ7!gUKq# zTk7O|R*=z$@3JBUgc12{dXY&D3G*~u{Y^{0ZCE#G7fU0THH>4EMV-+Z#&ucWnjT;N zZqf5%s;Nxs5T!{aIw+$4Sk+oJxjr$ue3g7N1_G2NriA#Ot#wbDN>}HtB^4%kdO6t9 zqh-1F)S~jTF6RT(#gGVF5G!KZUPdnI>PW9yYkYWji-5$_q1vekqU6P)Z3lG|(!>mT zsL5uSU)I^gTbo<)nqNj0m|>~Sy?PGN%OedfE!D>a0k*W$7S1`LowBoqIziw4d_R44 zkmtvD@y6VodXpqkQ&l~wK^G{OA|YV=POP?Vy{|AiIa;=N$Xn{{wXHgl<7<07Qy@nU za12fc61Xw;fPgIST%lOhS{{sG>6D?@P*u+7*+z=fJjU(`&~f zhKPGhzXk1+g{49i5FQmmxR zU4m)zaZVZH3iJPfTK^XY^xt1c=yrpPTNk)7Z`o{W8+D-ET~?#r_Y}pTXGZd$K?YfY z$X2W!t@zF?vqE7`y9|z~;@NUeB2=P;=U}mmDSh7Lv~wc?+EXAmTSII}uO9Gk)P%VO zimu;2m-Earv_Aku7V(K|GwgvV#p?ry&(<-z^ z*a*3bxLHefB;w3BQN78GdCckf886)nTIk^?i?R^TRSx~(@8H?D^xcL81(|)a&lG6c zhI22oVI<@33+{=E_W_Ixh2f$j?J;w786+zFbO!RSh>eVrq#INv060h5=>5ZEOk%^P!Dz^f%-1I}pzWZu7&-R6TOuxg= zh_t>T#02jE3;5S;-nI~5pP}q$=>=hL3tk?&>jttmwXfOYt1n9DxJ4+{zv$fh@Jwd* zJ(R1$7dh;u%NxBogfb-6skF@Iz>5U-%9^7K_h9VKyu2iTbL^HE!BMVEJ7;+*GySnG z_)A_r-H|q0MvqB?%D84?`_F?Zq9E!dQ?WsgiB)Cr-lnD#@`_@6Ps&p^s>;vmgN-#{ z#*ZqjpSBz$1z{!la;*ye&^0iDE{SK(K1XJNGXjGSOXG=YBcvrs~CP>HI})ekGVwj6jsH9(XGiF;f2WU|+Z+ zi<`0!kjB18HL!k0s|Rc%aNt?*F>3VEG>!MG=gVO3(M;dw zd+7q8PWM}`qB1=_emCcM-S5=6F3`IFe5d$wz6ox zvLbhsXnyj_DEgE&GB*o@Q&H59hcT>+GvDu&6X)_~}kr>5v zB%n1QEgZsEM5J2*+%gw#4eE50m6LI4-np!oWwL0R1F2eyd1dLoeUel(ly)eDJljxC z13rWI#`Aan_vW?exc$2E88VNt9cD5dj}WWmw<)WOMpL7;FfuN;Du$oFrT9u$B3o%3Nabc@v;G=_^OY`a?U{|rZ_}2WZ-D}&$!9v% zpM5(?0P5639byY_e5LoSFS`$uDq;f2862y;Alr8zidk*608{LqRy7xO*GdKKu{cF6hlH6kn=C(DMLu5eGO_?hSI)>Bo&l z-tHWFZ)-g2X>wAVALV!Mi_L-D%lK|dYfl2pK?~EonL4k7f_X9|H$)=(Z&ii7(7v@` z?ThHW$YvqWG$k90un{Y2Z?%x_NF$G)Z?b$b2%nCWeL$Ud{nR)()t#_?NzQS+VEoi? zb@1Jy`(ToHoGDl$6B_FzC>=5dQwWl89;poDI+XO=oVN`TO*O!2{@Us6q(t~HR*8RK zE&ivUYADAw1*w=g%>-Kt&0dBN`g}2lw`%S>p2tqr_bi3frZ?-cMYLbROQ}+v{gV?tCCC^Bn?E(=H z&k86UvtZjI0q)-CE+=m79ha_klLvv(alG+nHisQ2Z3nEidhBn4eGha%=UtdA5b=0x zIPJ}CYB)RhpweJ5+eWk-g=~GaoR#%_HTL}iZuHOaWLQk;nk}BV@k;%m(kKsqDPFVD zVwSw_AgYY3R0Q+uA+0Ax_VSQs7vI{MxzgilCKP zoM5|Y8ZFW=Ytl-<=Ic^N=(5YMCF){s4!nO`xVuDM9V91J@r;=;;1B;TwXelfpi z=Os4DFs{~Nup~!f9Ounu^|}0MJY`ZUj0ORVXsc4sP0MoUp>jQf80F5Zg%vz=&pSB; zDB(KCg`}3>X#^iw9XY4HaC@(%E_|s`-D_{_oaFeV1%)4x$(UU6u4XaSjiJz4!~Oft zD8&0&pptI|^E@zBNK=yEUloJPj~^w#y+hct1f5| zSbb2F9XoUm!mTz9&8)Eeiws3qs<%`!l&o?NSf zQ$oZrf(pCWT=SOvaDo9O8ea}2rO&V=$?Q8*zmKM};u#GYilDIrnbNXZZ8@|LmQM|7@JK=9w#xtlQ)n*rTVCP?gGuIhnqya`KVkPVi2Be8XBK z2GN^KEqaF}iV|7@qYXj|=Nu=*Z*!Y(x0{8UNMk0fGW9Ks(GrjoqPA-CFDFGL$cwJr zBvo4Pc$mc4!q~_M*@RI%MOw_)KY-uL&{w?G?q~O!?&e<)&-Ry_K=Zf?Y4~96na5Mv z6VXHJ}+ zZ{^ZwHZ3=Pw0hwfj!TRssp=O=0h7gg)n`0hk<1bbkx^R(#w81;o5NXiJ2r*OI*p0L z>SL9~FP3Il-)Xe|tf(qC5$hXeSzx*O(Yn+bEHEe5^UH6+jaOS&X;i=F1W_FSe%AaA zc7?t&nAw`OgO{^D#=p~A_}i+CR$DeUz~0A&!4YfIKnsJ;t0Sj6OA9kWM#|pk<_`6; zs$p|9r9Rdxpxi{DAauT7-i+oHFhHRY*6=jEnc@iX zqV;+WGrK4xSlKDHfRt!4J}mH|{O%DXT+S{nTz;Mx5xT_m=~R&o=G|Ruwt{?jM&?>~ z7;jx@&L`--dl=}%?EUeQW`W$2$cAQ!bzfMdcsH=U!$W37_>*rGTSnm1q{+1?`vwZH zsHHjevWDGC02hn3GeS56aAGTgzW$6O>4xcsp9y<;dkHjcg#ZWziR^M0E;UF zN$kiv_C@Z0>7Yakzi!OvFSr+)Vmk;a9I!7Oc>DAd;S9L{P$KGj5@@C@IrrdX<2+`u z;4y=z{CMk9uppF&DP&KBI_6WBdNQ@utYS~+3^AK~2Dc?cAmX;naN2x3e;d*}PRCih z%RZ36l0N>~e-p(v(vdrD3A_lT;MgY68r?*^=-fGk4&(RlqzWA`{lcSNmNsi}=e>f~ znrl8pFS9&+EICTtNw?+Ol*Lb(W}{Bd`oK*;5->2L?*n|62?DJk~17Xn#6lITLI6HqO0qbaw z^(D+crKj-JzfK*?QO03E>E#l)Nb{V9sM6N}Q5pU)p`NAexs-}Sw=1vx*W7?O9~BIo z!0F4!8ra}PJ<(r{2f&E{+dcuTW*pNm}uXxTbA zvDQQ@yjoKvvA9F~IgH5vBZld*)Y-&*W7bn(pSIMykS2a~pQE7Ph39_w>MM|x#+#SE z&rd2z;qb_^L=R;G0Yg9(-PK!)$3y>3BsKiVq{{lZ;TiNO4U#Ebeu%Di^^c_mI$TNb zSbFJ|9&E4^vuoDcU-So*R512f%sDAO7O;PJDA{D_FKso@NBQeWpxBxV6&etFCyi7g z2j_MPi{;-z*S+J+)&rdc-AE5|H#IG9{rpd4jX7s*%i7%Q2v^%Z`J4MzU!Xb!Ib*ZY z`@r_MGB};?AvbHGdV)t+sXA%C0HUFQs4rXwJof3oZsr@pcI3|DQILjgYZ@ax5+#Sa zX1iq!cCHhZ_??%7#RtgzvI7exFGXfqPYAjXd{pV z*hlEW{MqV6bDuz>uJXgE*fW>?Lp|kT$JbB|ErH15`)d0$>@grl!?>Eo`T>S6Nd!mO z1uhaYsK9SHu3?aBw|mT&gi4ncnW9G77rd1Y4LZyAov-r0yCkCh0|H&#ju{22G73)- zeiQ8z`9P1y8d3uUDRHMP+yZ-off#yAU2-{@9_Fs18hyV10i`?#TrNJ^ca-U@1tex? zGY0&3)OU~fgqaF+Ytq$Y0@L+yHlT%`6hlOwAuD48Hn0E;B5=H-!Da?>Pt2!oVbJcS zKvr$-_^a&T-;dk44CV(dbaY`I>DB{EMGMfU%K)5LbO^u^U4W~FDwps!g9{=Y9BsvZ zxP^v;5Dh88|GI`n>;`R5@HYHd5q@@<`CIfx$^(ITqxs$XA)7O>o^i1?c4jUGt@2t= zvw;r}i(7;#bxHvS=+jy9a|b5rUn5?0i=OGb9{0h0JdF}^HLgX|r+BQ2Hdjo9w$}hJ zM^=HYsoD9Y!IG4d^37X9)Sr3?2j8O0c>dd#=T#q^7Mg48)tptkVfr+@Xc@3C*WsMV z4LG-Gw_$qapsjnAB?ZSou|jBe-=wEqhCpi8Z|Cz<&x8EH71#GYJ@sl=sP@EIH)hbb znwYO0njkrnQ&+LP<`J^b{6|MDP;=YVMeO=&%L8 zp;4F1T$h6^80f-+&I-ASdUuIAf}MqM$@6pFOxC)8R;P(azk$C0Ro5yS`_WBP35ub$ z^I~yZ(7VEK3{q;2fv(uq5~w0BiWKcwPwUcJG$wI{%hOO`~%7X&}e^w?5TosPxf+J#F-^TYTjrgN>i zW0620zepcTc;WaOm|}&e?>`7sabaIs8v>C-K!Df%PKEDA!99HyIeycaH4jlh#z})} z-(}o(>FdvEtajZm1Q>)L996wuuX=J|&X;L6RKNTmq`hTaRBhPy3xa@jHw+4r(k(F{ z($XMZ(%l_{lG3d-NK1E@ba!`m*8l_eUUeeo)I?>E zKY>Eu4Wvp}cP~-;@XGHNt7Z4)1YAqUf2wY9J)5n8DZ3aLVNA|huY(aDHbNFJJNo92er4FXwrnzuVkwbgOmTiI0 zFtOb#+fXIe8s)cSnRpxJhdedY7UV9#dfEnvr-m}FxtSM2)3VY#OyiR30$ zR)D?V^yw1U$4O-{o2@?fRHpd>Q`@J{_>!ugf2-2i;KJ90QM%{29bRjlBrU#fct#zF z*}&g1F_BpO84u{2&olIaC$lt4;Sjimz9Ewp!UV=^_)ksiZB&lcM`=Rv6qC6F_~1jt z4gAP4QB$w@i_q2uihPWe8zx~2g>${m<8s0gi^L*Xc)uYh(g*}V) z%)-6f|1{uX0QU)QUGmxer-2T6wKtk+o;ZWFeM-t3^UDMKwd07k7kvA}a>W$|lSu`! zdFMZHHG-{xUDH+TjfCn_AIFdK6MB3&>W$1*_YWKn%+2$Nl6YaHhlc{;Vte&B7(G!1q|&y7@sc(R}wX1??8bbrbo>7E+#*;`jbqHGiNf9k;MR z$~bXTq8hh=mHAfu-u_zx=r!@!1;78^N~lRd{e){rw3Zt4SPXkg*i%)q7d(S8)i5_b z;g%8mhnqd9wLz03TJaVLfM)$GvvAk+><0xZK#iNQccq9Nz1NC3QBxE)Rdt>b5pd%q zh+AYS8&fO!DJuF*ix=T>vfBa_U~jV^XtpWRFaoYt0o}7iea${5)hjD>ZET2$Th_u) z`3LSp8W4(eHNZkBIQm5J&Z`cK1lm{29#_B$+$fE6R+d%Xeg8W<@fv}Kb$NB7q1?| z8E!V31ALbpH!ebHy!wjD^s^n_;dcBMh?@g^zUm|kEUb}J1;|}b zZ`yZXXh4cx_Tv?ild%FmYuADB+~ZS?9&OtGfg{Ps3eYlzvvKP}N9N-cdwf$S4ZP=` zH_iBE;xu=y@MY$cv@GZkE0#o)?_?1{Ki`8MKb??Y*~edgd<0O1^-Rarg>WDFEc&O9 zDzod70e}vZ8MgfdkH0>=8(d-%B0s(-5AD^2W2B2#6z$uE=qyUu8ul0Y?~hejLq5>o6VR{? zxRw61Gr^K}!%`8RDA8{6KFm|o9s3jTesx;gGJg0lwS<~IWINp08MMN^JG=o2(SHN^ zQ~3ga*Y4&BnQrP_4v-S$ZxE}4&&qfP9`zrzsK4&<<&Ok-vMbg6lK8{QDVyF3Ol|T` zXtG}QO1%Mg))(BnlHJ0BRCF+= z2g#ncC@vBtntYh`HtJSffVqLp9JjnnDLv%b69l+2}YQsVaI?;`mc|@-rg19!u+8#pQ(>O z2SuR0@FibjlmNSCEZo$OmaXLKW4liRTA)xQ2b$nr$g!98=9p7Ai!rjP45A6TcLKQC z_W}soy1U8$);DmJx%8Y0Ec)a*OV|j6HFbOt2C<>QN&N#?%iIp+^~u>d^+2hgusO68 zS^-2G8z#7sZwdK4NB(Gf_sDWQxt2a#e@m^1c$G#uhBRr>YJ zxVY(m$6F!4-ONCi+#m?O`Qtiyy{Ml&b0drbUba&cPSEd2XH7oXuLk3Nc;_t!*CBSx zf_%Fv)C+)$N1&5`64U9+Rb!G@?h#sNPJ}6mjzq6+GBHC#*P3xQin^z?JN4eF`Rn-k z9_W2{O-u556nFcd9+m$}OZ>A!*vW8kz;#N;3>rf&oZIVH1SLB8-3$|0FAjt)x}3Op zhOc5(0qpvoD!j_twO{$!Cn?TLT^*xTSP_#%26Thjb$ZlWu+~c9g*lFF2JexQU=p}z zfTZnw!z$}P-Gg8J7h!GxuU=QtYzF9z4cuilrX`kd5-Vj=8i`O{qq^2U58JO2sT?n` z#QlQPup+G~DgNqtXSre>ykAQ#LaBXL!O}|M-nucd-#<~BTV39Q7)u4n$hSfj42DdU zAn(6$T)Vm}Kq&lAP+$l@&vyggobx2NsywEs)(V^CPMnR`KLr)QgwKr^ zvgNX^D#Glr=OU`Q+;P9ctPU^$iZx97Y`J?jZi50oTWZYRic<<&xXu|p$%Ig1!KOJma6VTZ^36!Vx(wqN(@UH(a z@UJcIA`%y@?<=J79pL%uCV{4GaH4k`j{?hExn*7(j#QU!7vQjkMY|`=)7~8-TZYwW zRAGLC9|a^=^=A%SZRXFuGl>+iT2!D`4A|w63h>}yrTFkeC2rC#*U-IH#p>LuodXkh zgZab!=wp?H{O$Bbrh7x~(>)_^Hu{y~5Dm&15(L5&2)^*)VNC45kbt z_qw)%Ccb>0a*mL=psCJMYBF(-g)|#YjW?=4(}m( z8)L8@xIR}wXb<7nS!oz@u%JNxK4DJv=jf2z7=LOPGvTAIid*iY|3ysg|8jLxS{lVD z#^N5T=tM=Yj%8C*I#H0>ViBfu7iKD}5-ZL;&kN_tGO&|EljiN9|2DZ)Fhh{0FWpayJU*nC$y{KB9h1@Uxq9PZ$^sJ%%@` z`Bm?KPB_Jm(3#Fmg&y}9!Gk;-3SR%R0Bbv!cdy$4jx4wqZGZeK7RPk}*t52N$`ivV zA*=}`t zxp!0D*^bg(TQkX^eAZfPJH@;%QGxc0529VrBRxXif3lAK$5>Q^5#M*2`^IAxoS2yd~p`Uw>;N%ic7bK?*Qb46l<5NM3s#`eG=J4kKoBNJwSda?5 zP6^E@-OFdPDxhkqqo{*mg9^F#U3OOSk#7{f!K5n$MQ!o1(Z@-bf=};D(pnY@mnLNV;=xChzkPaTScJt) z8E^q+MVcPZziwo%gG-t-p2GD8j8Q+A{HpIMOgRT+`L%JS9wS$y%AItf)00nB;#O{e zMnQl?P^dt_TAJ42CNATK4nz+=s-5h^@3Y9R2ksW^|K+3lA0BOWhK*6YrFs3%EU4o+ zHx`Z#-OmcCZo}FrzRP^WKAkIsL~4YYI+ta63?lKisguhaFFwkRPcDqVr4d#`cUmd; zs}{`VAcaN`cIs~)lz&Dl{q(LtzaZrP1HXuPn`xn&RPHSVSPDNs^vq5|LyyT^=byAS z79T1^gms+4Yo$vkCT3Rt;0KMYpqU_Sh0>tJu#P#XynvHfJjMm>SWt^Rgn^ zp2#MC>|o7}bbbX=)3z46eXYOn0VYBQ_S+8I_W*-wOcDO#=)gMx;COJe4z~$<4Dp@d zn2#WflsMy68+P_-tPCPiLqJ9+yjfgXCtycAzVG;#WG z`L~}^t1)X!PG0}?9WB<-Ts2Gk!B$*Ee1nd~?M1`(rLEbPS^;SKOZ_ee0(lOPI-o(nk01~>{4&303PbuRG3=Z!(1DNuwkgp+>$-|kDD(yDXhPEzsPIa?Qv zowk1c$_47B0jw;kPHf-$-&g?kJt^y~EzBfBo;LzO9B*!cCj11FnZ5xxMPzq^&Tht5aqytGuc@!Uv zrXXd*xkDBU0orEHM>7|-*AvFpC`20i%+9aV9>_p&ucpjI!&1L#%J!8nIQK97L0ga5=wWQBVwUXFAviPo#2kwxg9TnMtFI>Gh;kA36N& z*{>lM&uxO8)w61|)w(BaJK|ju>IeR+J0)QAQ-ZbZwm)$#rW#wc`~!FT`yCOm*j_rE z%iY=lRWDvISw$+wJ2QZUmas6 z6|mWEPH6^?FOTc6LBBVI<_8Gs0YOPmqe@Z3-Q2?T=8FBKR-YGo0!(ZQ7SaM@s6n^w zOwe8YAU9`K*U3&7JMx=}UM^ggtymuPVj)p-9OZvgpLEQ+9}s*1^NY&dH))%Aou1a> z;5SY0i;A?vT3Ak&!oPnp?wwM6sgI@yO#GpCpPEKCnHxDlHx;9Y$y23xYdH@vcnfQt zCXKC;!jKTgKXKh((@ezgxkwsD%~r;IKe|IKW-iQ96$-+2E*vf@Ft_ zgAvrHrnBp)AI2p+2Ujx^e1;KZc1?;75=prOel*r>5xT}a?wi?%3RQYNs8g=c&I-zc zV9JCb!%6L^?h=-2 zpis31ew|sB)C6maq^jrQXSSx;crot0Zooa;854AF;@ihJoSvg+B~o>S2OQ=8xadmKmnxQ0Ff!(NhW4 z0-7%`*VP|({O}9mHePQ31X@Vh?YZPQQB&y< zn$43eAVdG_>wi`VlACqU`OdB-y?aHazcQ~`3vPNlCDVD17Mqd5<5$jZ@rg{ur6pEBo+yX`%>H^K#cKj674-vKPkH%Ce|$m43d|BKVhXrmcPkX`T)7$ zZxB~z8_&k87oS0#uHsjD+phjm%M5>5-M9>0V=|T*2J2k8PDFu0evToKq4Rgw7P<4s zi8gh(r0io5*8SRWp?(s1;$x1=HlWbQAM+1fvkwEug%lebBmiO$#D&6j3e;};N2Rp^ zSX&l}X~(uEatVJReGv3oYyt7`X+HOEML`Z36dq=RJF4qK_-odSp ze&EW|*3?jwh%)B#HacRp?7BpdW%T|_e%#|tu0Yt;<0U4tVweaq1l^YOaD6n0)=_g& zu5L-ekyo)4)~CK+lia>3&vC)!;vNstI<=++5Ca54NpOc5BW8 zaBFH0&DaNj_vK?EL@HxDzS7f1j%L!|M9O!{^A?^O$%ylD#vOped}X0Shg{J?FO44Iz_6N!nM^^G?=CAu37#(Bs1Ifozefmu;Zm?z3ACmz2 zmjNDL>BhU|zhi-sI#8_fUSz}rj0cOZ#@Y=8ojx{XV|+M+AoFQJ_1nqO|2V$K(DW6h zuZ?Ph82C5=%vxa^%LG4gP748$)>xKtyWv~MyewdpSr|Ae6A{dE#TrEI{4D11L{;)D z*k)fKh7`a^8u`f3>wB5%lhrF6!wg5uMuB!Y^9&|dN=5WZ9}}NE z2T}~h=gZlV`&2GiJ;-L#5$*y2U|;W$XUObC)U%}8|5;Ma-sDKZ`!LF?&<+rdm(#-W z7Kj@m?Gdyp1`C`o0G<+)O8Ju^pmlKgVGFP)|ARV&XV=s2nHxM>4qTYBZNJ2;V{{+z znnZFpYM|mLRC{C((o_UHlO@3&l9*?B(*Jx*HY&qe$g_;&6m$~GOVRfr?9{~$arR99 zRN8Hm->(w#jM9|1>2rv-r2!rx(Z2vqJ<-1t-f*~OVq4)W;7=pN;YN-4tY6Mp2WSq{ z3k33L5?FnIb?|~G5{$$iCqNK2fMgN+SI+aixL@LY^V@IRA|Iv$!u(9?3Ks+?*vZXu zte$lu+*0r;`thKp-sPy<2*Gc;G5WXRys|(_*4=-6`=5%XDN*tHoKl|uGFVNzq#f`i zozNY>$M@7;&>JtV+V5Y(3fBrgMi=6+;z%w>mEGM>tlx5+yf7t+>6Q9R%YMF;3#{i) zV3fa6>WRpbU2lJ}*S|C$dv!7K?j5aD*ZKWxL5|2Rqlp%24UQmX)R<_|KJKp@LM+Tq zowv!({?@$M)r$0o%~6R8?Q2naR&;A4*}_?8mvTg*$~#jnIU({gch+j6LVb!2x5atI z!v-gj3!(Id^v|H-%h7-)8&-fgkq@zTZ6I*w$6pgNMbTcLZRdM(r^C#Ak)VWB6J zl#J^*9^|Wo6Ap*m$|%bkd^1brpbu@l+3ae}6!@jxPPl;O?B^W@JrTa150uKjhzpqw zrZN)#lQ5{rH!RRzt^lDd+BlqVmM-X-56~&lc29Xq5)oF4Iweo%Og{Gg(q&9!*Ss8D zev@g;`$?D$?g@TRXg~j@)Ce`mJ#u$UnM>LkKUx(s%4T1*JdAM4vw@b;?1g2QOCE$6 z&I7z02RIe1|8Ur0vxj@)1SDbTdQBSWTd>gQ;?Dg+#jl?}e??lzypxa-g`;joDBe>= zBmYM=q)^2_YEwAA3%&h#p^XaA6Eyqy#vftJ0Gk2Jly^sqsbM5I0C;VnvfsMos@=W_ z0s&LxCy^%>3lWAhxy1 zC9cU=udnG;JIbc35q63gU5w=&|foln3hAX#p>bcc)X7ssoqhG{y{gTbvvu!Pj|s| zmCPzr#nvuNMAdCIK_}g*N0#GyURVJx5DXcz_WN2OSa1BA*Q12M zhuT+O8KI@0dpkq@7Y6-ci0 zLQdu4zMGR~=gs%m5$W_%u)1hWuQbp;%QIFA8?_})5mLtdj>-&EThu0wt+t4n0$RJr-IdGWUpJ9n!7=1%K zsCJ>2bdS-?!bfw}?W9L^-m*l=Jbv|EP-caud`eHszdwgNK87R9eqat204^hW;J+L- zbj7GXzLP{ruLf>cq{sCgNkFzjurs0SUZs8B7INjoq!}jjtxvmJ#V3Z&yz}*2IBRsX zSa}7*&;_VNslFAd&=Ki)U|IFzymhHbfQ@#LSSS1<)i-f?-(c-}>>SpnLz}NXHw8_LVT(|JUf%fxCD>4J#0trUc8zs!@@FMf_a?ANq##&F z0jOKG>TsNl4Z(AfmEhWK*P@u`K=@OTmJZJbukUq`Zd<(&li&Gh2sh}ZJ6M4?xu*KQ zW@z7>o2(Dfp^G@y#8{pWA@jgNL-^Hs@ud5o#Si*~n(n1D7YuJNvud!df=rFe>SVmJGlbr6kaX)z7-I*GA)msJ|6MCq=#=m z{>GFRtbHC-vHqyiLuy)MVPRs%-BCM{Q>m^wLCE+i7O$Rt!P9x;tIo|!N@f4L+WS2QID&pMZcLZZxEY3bs#s2YZWrGB(#DaXqj_fo(U84k zio9F_c>iq?^O`hq(QpfVT))#-|{c!h~3G?bCXJ%1> zzQmxbt%Uh$;(zN7b)gMi&MqQ#b{%)GF0}kRi<%PKcN6X={DN^8jp<7~eB_>f&Rpsl z(q+&~3BKuOX0xJp4y{eeMn;k2heD(c-=LDcRXlJ00^O2?tyxl@U+tiZgRv9~{^Pid zxFbw+_zXgVjX?M+iPdlBC^JK0U+(_*k|Ofz0SS&-nq4R?|agy z3>SGFSRm&It1o$&;2#3PP5O6xD>XUjyY&o<6}5&@IDpvrz+6o^s-X&-KVkGdL-hDV zkzgd6;rT-^fXV_;^aBLAY>T!T2a^vg*MB9)F@MFoP3PZ@^?#n@`=(zFc~96{J0uxj zftD$++Bo}DH zd~Be4_|Pu;g0G+2i_$;qz!O19*D{fC24{Imx3D}}YVsHb?R#3EDuS)Avea&_$8|C^ z$j7S7q5m<1v@ljzR;ey^c!ypBXMwTWs{Hp_ElIi8)+}AArR0aRk?$^V17UhseZJ$@KU;MgLfq_-#W>>B+D3Ke9a$%MH=TX{cQdY9o`8 z`3N+C`~r}0PXYfPJ;+u-=chr}lknv;T$?_E5$J?AE<``sq(<7iF2kdgLKpl$PoA)p z9WmRziQVY;YeAg{;@sgWfeBD5>7@-wakiRxZIWwPUAj3@PVJcD?IcY^r1nt4bayA* z@8T}pR=-)U&3p)MjM(^v-0dYYa9CrXDtiU<^vagBo;Ue_BU zo>_KKUAShj00*0NyEU%;tNnBagXwp4)h)>aUvHQgC|Gh0Bu6sbon|S2HJ^&1I@<*e zHX$zYdtliu$@Xq5S$??oW-ne6Np#$|x8mhk(OMcgt3D6M4pjr4lD}Z@+iW31qXfd2 ztXQHb%gMt$s4h2H!c*S#II8+dlBb5cBn%PFuGt1D%kCA2u%q|tCl8j=LEZ}8646jMr5RneF^BuZoG>QV#MeSf8z=WI;J2jE7In-fEjvqT?|5byZfY7 zfBf`kDpatgQmP>uRv{svGN&CDBr%HNLbW-L3{}B@9iC`(7ddw!Bh!f5aejEE^e(f;xxLTLOI%ET!Gj5`oUugZ;} z>m?9Z-8Y0VvCaG0Om)<-s!gs4ppfB81^p4#o&FyJFAAp7GMH7t;Sc3GZ*XPk8THTY z8J5a-n}(?&=R-s6Ffe^zf4Atl=e_F#W*kkdXpYRUT_y9MMZ>mLA7gt^c5VAfGRTj~ zUAPzL#9y`)o6rPbiPN6 zehz)@_E;mCwTsEB=}}K0j9CLsJLAp#mvL6jL@={93!)iDtUHxMq`=RunWHN^io;XJ zL9U4o^TfpH(tTUwu2jaSoOHOYX#Rq$#Mx#t?%}7VEnN-EII^bQufjgeiwLQZ;t)7^ z!*4le348t4`&s8SM9FlE)lAG{s$s+CxfpKMTUpNdaqGMrOTK5~Z>Q}OPX0?8 z159lA3}63PCV@^u*B`!EV~eV1)X>NaE46yB+Moe(3!1we>X8Hu+@*|o)EB#^t*@uy z=LA7z?}AU8edt~O(lRv3mfhsn-o?VnV}FKPBCp*|l6VvR)sk<`b~?$#o0fB< z!`^m|4wF1xK|;a>V;<}bcKI2Pj8Wy|)OE=|@WP>9TCo)`Z(ZzOI)%&oPI@9YJ`Bp_ z0JEK5q)$FmBMK^CMRbNHn`Jj5#MGv_4s{VEjJ^!i@j@0kJS0AF7^&P?pCFKIB2cH! zTxkUr+2!f29BR1)*4KVdC$?eLbD*83`KCp)I5{?p1~kFt+9spIxE^0TCQ=A!3_0jk z$;1T(@tBxC3}_~$I9?e6mdEswLDgXWnkj2m!wiMyNIZ(=>I_eO1Bor$$oy2czD;CA zY&hL{S=niC&BFaNz22<74D8dXm*NO8m@bq{<96sIiG?Tsty} ziL_SpOi18d7c1B$1%kad^S(5_l&UdYkiD--Q-Gqg0{Kmqw~=rORry3UzPsFIwWGd= zc6YI%qc#O^`iqlIIy%0NnX&2=8#Jk!iK}gN7jtVLOfelITwQIs?AHUnv zc`jl^N!_lX1mFqJ^OG3c0x`?u08u|#Kl9vhcFG5Njq#Cx01H|t!n&wVEYL}%(6~p{ zQ}p*UH4N12W57n4){WoIu~-G-n`ERved`=HOaux{ghAlcupVH|7{3fe1A&@AGGzsG zil# z^69%cMJ~ahH|vyLja~4<+miy#QJq17V@a6Y;unXU&q%=t$c% z;xeOfQd2Vrafk1P&`l6HxH{ISh7j~1UmT&{o#nVfQcgE!3CQ*%6p_db*Q?q3{eKS*LEY;jZ} zq}H9H%fPjt#wB02u-qz-!8lC|-8NN>Z1=xV&OYF5YVzJ0!Yv)Kt+Jru3M9iF3%=bf zJ3htXNdTl93>DV1!me>ej}7YRL{?AxJaMU!EsQ%=n|xQy%Gy3=vQq{Xh#sB+4jyCW z3$K{#-u}=t9HVY6#gwv41ojON+?$oH#E@8YtYjWK?t2eyj+w=K->@HFCSm)qAd++AQEkvXu zWLwOQA08Kzc7R`Wb%ODsjiA|V-Mz~fp#GO?04KOtY@r+SkVYh|wU=<7SltnM3P{L& zDjQ7JssF%D^2@oX?+ea2mgKaVHpHN63%5Th^pI#v>ffJY#Kdx+0cwoYI)1kztKiqo1d?> z^mbAVGlihS0(;wWtpbL`q$F#_3vAuRp%n%xCw4?*Z2Zn^yZJ(5lQMNyKjESS z?v84ms3}>co?Q{Z)V^DeS7z)*FgZJZeI}p1zSyEAy>hU5%<3H-U}B(dHXw@-5jJ&s ztfUVGbrJ2#v^1R?*r?0bUs^9&gitH0C43DBzx8+QN;UFh8dGIi?tMc5ra#$92t%TY z*=5_hi7-cFj$*8vxb`{?@#Q4tX}*`+c#M|itJNIujd!2xa!L=tfzH~jjbe1KN04ht z3fBGL7`Pl1oLlB9b{r8|!?LrSioG9TnzJ@;2(-=p>|Qe89;+yPjyAO^m_BWXP6qdM z{ruNau$wuoOUS^a{Dl2JYJ*it~E`p&HP7}gujy-XY4ZdT-Rek zz5Xkhhbyw%vG+R)496TzXS+rpZLkQc$IPiWo!dCI5o1Q0RxOK=^V=~zlmqF#QT?I( zSx?x84Xfg6F*Mi3U5n$pt@GZR2|tMQeY(OLXYy)OQ|;utoYpDYNEB-2?rnqL*$-Vn zLd6~grsbz8Wfk0_TuDG|BM>Aq zD;T!&DEety8nox0C)V0Xud+mNekW-bCmcb8_; z+RVnt%*>rN?GPl+j=5KGn~^mTsw14TKJauIATE!p;e$2BXs1_7iyYelMRzq$?J*z*HxKmCc$1zHjTbwu*rB;!?#1k%bP{jlzq6RVII+ z!Z|<`(-@Ek#NfZ*ZE`?J$rE(GQ8DFWRlvJ$>J^4^!9Hs@FHL^FQrn7Gx*Ft5MA_(! zahJ(GJxxq$$#wc2S@ zGLA+F$O01bo{?*m+MCQO`e$3M;-}s2txLYg5qV$Ht11qVmV0gOJ*=G%HDaJfK#Ifh z0*03R`ULs5wniNqtd*nx%e{4Ui3=DpQcb{FmqEGGwIp{l7C*9b?Vg~UsJi?T8GP~%oYzE2#-lX1yE75~4w${V*{>`>0AJj05ikH28^cuKG<^%-rD9!#p)DmgQ| z=x7pX(svOu368wV2_@tKdhFrplcx$BneWafklL~ITn?0ly0$3crG93YU3^G zk~4?#;kb0XttuyhMR$=Fhd%XXw5u|vXQ4N_);Uks?>%3RnQJ3_o_i}z?e@vRw2J}3 z;qnhH^I`eJ*Kl}~|hp|7wxN-@Q-y|!Kmv45ONyrn`52vrU2il8pw zGn};b#5tfZ*I6%d_DUhc3ZA*p?mM58eTwZ1^s}q#d-df#MlXqw^l!y|4y(lX3C*sv zM0lkQ4)1^I*QA=OhLg~U7?Y<)_@xbSe%1S_ktoS@jIKX5#dDK*R($wE65sB1;>JhC zR-^P5u!{J%-4v_$7&**&iwDV)!IN^|q*+U^nZgjw4WpPE&|~9TrO5zM5(5<>75QiV zpsN-ZvZ7OLsME&@mSe-|6e}%_-q(cNN^Xmdkp+uUgB_jf6^W)UU+Qh6dpv~zB`lW@WVzKxz z!A=TVqs(84c~QykI>ZPl?6$KZf$T4AaEcOywKA6T-R!jNF-HdLJ%+&Uo4;5yvssK0 zzwVab5mj3q`0TTNC4m?Ikg>uGNadnC9jztM9uoMz{~K^?6%G9~GU>jYOI30HwvUh8 zft>nY-RSZZtWrBzkEp^^6UGB-fIz!hY!#kf9EZJ&)Qp zP)2(QZYC6-!U+~ccBUYzIB>Mz^qLlp(HqJpHpfelK&l(2dtUGOVIY$NE1WiYPpD@i7F*%L2Xi zv)z-3_f>#iZs$cyK<8jO@aWVA$6RlD;%P66qLZ|W{3}QKZ1!F7^)G0U)b14Xi&e29 z-r5tx`hXjTH_@GxNxe0Ik1)KMmo$vu;9M_-&~TU#9bpL66#w;cEeBBNgsGgk z8dtooh3$rn0g|X|E&~Dsg_GGNvWMP@1|j6$1(qV8$27qzi}#a|;>xM)2f@KTB0(N| zhwAjC((;%ZDgEWhzYTiv{6(OISPPLA@Q?cW*!}E*$Ig<8x|Z>ES{}`1r3RFZeLw>f z*aUSLivyphP=}{~^Sr=pW9IWw=Pa%<6Y z^yU{DTTK&B#!CqmZax&p#Eb}Jd{Vo3^^O-~%k+7Qf&W`X%~cu4JwhM@Qu5-t2BEf$Mfi&_$A^WFIDL>4CWU&Z+OP5BO_4-Zug3hDEm zO9tz5CG1rZNj|}{A9iQ=MW|EHN&{VTtthD*k~WN|PM>CN3%cSJ81Q!ch^-6v(=p85 z)Bl{RPLj!0G{|bu+We?P7U=ruJ@E61dXg@a=EA+iSM^Bcp>weAR-Biq#x*J$6a#7X zQK*4$=&*c(OS-C^fC&^qO!pn%_tg>+u9`<;6qo!w+v|^Q{-)t*|I)goEQ}~rP9TE$ld6Yz5=8K-st_Mv{jn6eCvY?jAP*hWctk}dVtsn zmqpmxh5THL@1+zS4(p2DDM&_#Qjwgp5}bLJP4&_dkW0{<^v?LT zX@p8N9G&L3W$f5H3!@jmBuW0jgQ?jBGCX2fytRpmzn26#or-v%#g??RbClny3(^-% zVKgnimEHupfcm+NrN^VxFZ@sIZd-!ObL#6O7YG|0O(O7ddCL|38vsqKPyxnw{t&0j zPr#ETei1UrKV)$IR}-|6(u+~7BUJazdA{aO>?rw)H^zu$1m!a1uA-yMu#(Y35C^hB z4vwsGZT3eX{hUg=Fzmj?-XE(E#e;tuCVbofrnKh#%`{8yWHX%wZ^-o!kjYGz6ES^AT z4@>=^f!QC9AvPV1Sj2<8`l6$r2Y|(y-rcQh;dfk_d$O#?T+jng9JU{8!6l~76XNGm z|FvZ_vFJe~Ly`vTllq@YCf4^&#>sp1nV47`Ban><>TiP5X8zZ>Sckfi_}L6}11djk z|N0vnu)>sz=ApVNUqK<(WWq^tc2C2)?$m$B)=Kve6Q|nzawZw#GJnxW4%%e~sVxbz9@3BH(+p2Jz3?LZ+jy5|?>~FO}yi${ZdKn3h-}Zf# zXU65k55$utQKQ?G@|%0*^FYT}B##SjMhE5y-OXyh++!8O7f2V~e`|Z3ygS*dF*HQ; ziKS9!Ukm4Px{VT4*WMUb*EqBDd-4%?@%bVfUn4tzJoDD#ZJg!F$M!$|i=|AGJ(Rfh zGuLtgDibc>ozq;qbqS9RHRA@oZ6zdnBoXTKR@{|0HP5UYS--ouwIyVDyOnE~R0SjA znOqn$HYNYeZ| zX95+a)M96H&~uF1s*N(7;Zj+H@Wjl8+(rovgIN5u;YiNjiKddS$|lw!cVKe8M=E%_ zvBA6zBrh#)oTAcu0VXpxnX@kCxx|1<^uiuAEP1|-%4W6o z6jp+L4vX1SMk<#CTSJ>lNydxf;0l76tUXGGnkj>)@$=N=^bDLkn?<=$+Grz?px@Tl zBP6LzJxPppJ92Z@;H!e{Vfw{t~vzW^33mTR$_?$6zR$n z)tOAOq>3zm`5Y>tx>=8iu3q3S+4&FL9M63?*y`aUFgw+&PS+bE;$NKFrEJUP>`5o5 zq^_;klQZs!)<^+=Fz8+*VdS^8Azkze%gCF&-?=G zQMk7Rpsw7%lPg5CSJ*f318*WvT+=OA@QYX=uFZY_z^w=;sAwpFWt$~LJ|$i-N=U-X zL`!F){{t6LGh-CYtl-PX}5gfDnXCL9H+P$uZZvAZTj@>ygV&A-^!!ky=u*>mRsLO=M+A zqtN~Ux;rZJ08 zvcCUi&I*B{7fT^-<41*+3qQX0U9u{m`-Be@-OvSh>k;`&kEdOY7r>*j{CY#`?_vCN z*%ell?&yg5{<9WTU9)AH*=*dKFdeHta+~s0DNJ33ER)~m;){SjHFeBNQ^xoA!ezgO z{R_<%#T_6yD`EA;xzQir%H=8yxvTW`xED?*{9@5|k5j4{bCJ{TUih$=H!NEz?pmi2 z0_V<%JfuKeTm&MwWbyz&*>+InQ>pQ?TNlRSfvMUD|I*g^Q^zOH1hXqC+Ec2 z`F0D z|AU2@luW3eu3WXp1doz?d2whTIgh62g@{sm5{YiH`LK9C?fV)~!vk@wp&O zBmSwK+I!(YI^a(MyxY=H#Q=;noj*^NUlLr?N_W z9`SlK=S?v6X~U?_40DHk$^V17w+f5nYxe~k2x&r)KyYg;xVvl6;0f;T?%GKR?j9Th z1b26WTY|g0yIW_9|K4ZLxA)oe-OR>&^(8G{Dr|G#buG6W-*PxhvWpRPMTT zmdVmnj6o^kd064~LkA-qHnp!0viFPf=B84YHTlu6CHURMgjV7mQS|aBr*xS-?P`iG zPmWNd`b*zc#+Z;R95i~^kZgRlnq%HJ2Lu2%8bY8WJ91(MU-z1K1b$OzU1B9e=xy#q z7f_5FuX2r2c}8OMGc^g)FOoVs(u1g0p~Ag2(lPp0&>vva>~|S ziPX40qw8K>2^sIk;3*w_5Ri&%dz@iB%}Xxvkt|~wDqs03$WCj2pez_c>UOCtVP^3R zM|R{BkW>uQBTaLv+`B&WqpXA_EU06ze!aNu>P1!0ps28;W0R<>8!HeN9*EM)(wdYI&;>bL9Zo?_k$m z4VODhjas1{Gp>LE;f9X``zBT@It@He7wGYBqWjj-^=HF4^~!U%RlWLX`zUTA^>PH$ zKr$(J4DR>nL#=J>r7v48MqOckCVOFpDXjZ6dw>4~W8)NND2*FF8Ff>ZiA8=66(WUY z#e2`1VNEhmhzJC3NAzhw9sW_5QUqlaA+Wm;$Ez?8)rR`?1#ettof6^PBr5LGz!BPY zvdMY=!&-}UayQ^Tfc0NynbQY!86&kXK{iB?KP3t`=|2Ofh2jHWM`X|Xw0E{I3)QSO zaPa|)^zH6d4&|Fewxdq!JSrYn#`CL0MSVj#(STP2Lb4f(0C&wWFDZi%!V2jS1h>*I z9yUu7LI5cv2LMQ!Q)*3M7G!WkPRLoM)4TV=P9d;hUp zB&!ymdDjqvBk@frQ~nuMZjjS}A2{Z%J8OrK#C^14E07g1hOV3@t#|}&o-#$xOm}^o zEU(v|Ll_xm;~s>=>tqt1$7T>lpHOX#yy@dU_xQS{?WI+9r^b@8U;su9k4b(_aQwNW z<43{ox?XWChxajJAw<&fMWh`{{BZM`5?Dz8Izs<-d$Wy7qW%4ZPWYYHbjoe7Gb|Ys z0NU-seE)(jmx5Nqbdq|t+YkSO;C>g7;QYDn-D>@+`(E+9tl~>;yztPpjG%7-l_khF zLvIYACJuWyUgq9~Z307000aP8mE+Fq7D-}$@_DD|AT;1~MqNc69Ma-Y52{l?sbsA5 zLOatFQAgM-R^3D;J=N$hT>haPt}tyajcA%Y`%Hy{#IiCOAnDD(e&@%9g=zbSp$&pU zHK5@^EL)yC+y{R^S}by@S+tw}CbgMB99|SI_-Xt!>}f+p&7DHHMc6m!G)h!zSjR!2 zQ%L#g*MWEp0p3`!@=!bUFG!cw>qQ9?M*e+;MWyzeCr%F6!q75?)( ziZB1jqXBS+JUHMR7J$me=W{X4JoczX+Rmtx6t#C6u|OY)G9#uGpBFg({~-6s5z1CN zfB!YaeFHER;mbygHZdCk`99-)%UPo>(c#jYSt!!8J|Vow#djaTi=0cWC=V;Wu`32( zv%ock4+VU}4=17n6cO&9Xjh4Q;QkYAiDgiX0=HAqaVaWq+l&nFQ@gbQG+PlyRR+b@ zpl*O7|LsWmcRb?VokiKgs*Nk7#X?k7XM7O71ZpO@=p5+tyw%woEkHP0ve%BGU)FiJ9*512q_VqNmpGl%I_Zc*()SvC_C~Q zp0Pn%cINd!H@gF*s)|QTo4J_pCcMnCn$Jj3e}euWR181gr?&BpeF`nhxveAo2fhsN zWbN_K@S>k)8^ay3peumpd_1k03u>(A^TVm@?mOOtc%<|m!M%#9n=SD9G$PlSRzwg5 z*J*=J?=#pm^W%}iFTx4v?`cb4!JxbKq#gB+zIh@)4CcGXy-eyv!Z^G9En>M+pnipA z>yk<+ueGCEUpMFJQHX!{{R9a~MU)5c#U*yuTmBhn%Q&El^BJ(JW+LjBl$yr_ObdX8 zZZSWpL)c6_fOyM(UpW1ry8Ayu|Dx^3Tq{YrtN|B5sHx6 z5iqGExM{Z{iU3AN*z&V_;4}N+;ve>bzoSv=M=(sIR|*967_UZ(`5`6X%+PuPqrS{{ z3h#JhM`XHEg?=&qJr(19IC^3}YvKOU)5giaV4pU-$#mnv6c~L^V=$pwf&ym>X{cNK z0c7S0KrzxOl~VROc5Up~-)#d=U8XI%lD9W>j#$RPc%1;S1d9LAPyXp6(3+X40W`aH znvv6}69TSq4#P@<3yz@Ms63__6eaGH)MOLb>Lp}nIl2mOGT^B*P13(40tTqa0h#)f zXVmkK$TUM{A9w}Capj;j&=IVe4t4qE3z>tXzo6-S%zf#8ZWOkodCr0vIP9;n$w1m3 z(<=WO_`fm$o(EizLmnJLB*kSCa`8ushu(M^{+yBHq~u)c6Nutj9`Zscz28-1qUa;V z=tqrW$!4iztTim?kmS*=@;{`Mf6f7MIF?rRuIc?Z-?66+o$^jlwTHXPq>(eFRw5w6 zO9=H8*p?6u7O{ea@Lk^P#d5U1`<6jDtuZ&3#X~mh^ZW1(aZeMPpmoV_Zwd0V+F+xWucy~#(W7ZHM1{L3N>o_q4sg|r_1@Cv~(2<%_eRep`c&4 z+S@}pR@~K5DHkdY-yy1~HPBtoIY7Az*1flkJXZQWc70Aa1a{%mcV(=L_aV)sebmMhgjjRaf=$Hu{)A_4pu+O5kZ!;kJ=? zFb!H|-SXpeW2e-Hu>w&!X!+Po9=1xz7E_*gFpVb z5ZsRm*l_g=BAVX%>y5Vv8q}x!-sDQmTE~uQ19X!U_WNext;3?!&XU3>rPdbEZzV6? zHunj_y^kqL3UTEw4-y0;Twr*oObOWlPuoInhTvz7Bt&JB^5W+`tpMq@j$bStkD~k($G)dz+Fhw0o0CcH1g}LiDN$(XWvD~6{O4DV_Wae+$ zl^e^OH|9S_(Ocav1Vk;KtAqbbWa@wV{Sr6w^5={Iaf#&^jpxrgds^-6Xw-qs z+c(%Dr+wPG`kii;>^Kco@4v(}c={4Pwbka~ZPdhk zFCKL3!ns~Nm4gOyIq6cuz-HhrE4hMZ_Oa82{-uXbwv#zyOOj1!VZuYHassk!HR$avv*YSJL{(BFx&4r zL=I!CBb_o}S-g!Z%z0h28TaN{;DCPzHB#UP@|6)p_QFnNF~cm#9K*j?)5slh~ zr8h2ba+R{h)4u<@lLES0j*Enq=|MulH0WgLFAD6xUr^jDHH(EHk>LigXBLIb}+Yu_2dBj#rX!t9d+-6<@%t+7YN>Z*_Ep*89HVyVJ= znDTbx6y6jxLuhlaqhuQ)Jy67|5Ooa&A&uyTVfcs4*jAj-Ha979fd?2)(AP*~KuI8W zOdRk=UyHA==+7_jUO}vz!|nDmTRZR0aDW3Bmfa^)vJSwiIG{K21iZ6O6ruXTerUWA zKJq`0ggL4_S8=Yf3eV6xsWF*@U0R-IbGVV=-3H1kFNXlzD6Z100L9A!LeX);6H7Jl zflbi8@}I64qTREdZ!#uA@5bwV_jDEun0!cU(d!6a|jKduWx+@(jCe!lU_fR4y6 z3lWY+>Eg->YXg<3&NvuG1Q_-?MIQz)1;cUb?h> zouSU1H#nwW-ZxLrYD*wpVbHR%3a$Co?qaFz7;j2Ztf3=H<>(I;@7GsZEt94T)VCyv zmB;|8#JWOodop#bb=i;5*ncKL!%8S7$Gu=;4Az%-Q}*Pwo9495%=gHL#bI%LeLYCg zc3ks}*)a`+$$6`OWTIJUfFNgp68x9XU@I+}|3=LGZ+|yL^y%Mz$JR(Ceo0gTrki0y zgS$~3c$9FxkLc2x)lno;WbY$n>mx`sCv^Z30~(U9c{-LE#F+bZLxn3q(?+zB?xbJ; z7OCTr1|ctnW;#v!f$QgU$rxN{SIO<1X89X)z@I@v^Gw*^InjCkLP20JQ_2MKDM@&f09LK(SnYFpS^Nj>KRKo z)aus2PM8e=CRRA5QBKn&?25Fyqn0UGDKL@aK zD2yc@h`Wqq^b(6D3E^DUeilZDNPk`avTh@hDGBU8gZH1GJTkn^Z}%Qp4J0)HC5fO< z=W6)9`!>Jv>vvot*|Dho=uDQfgnx1xO4u3A1(pz+I>M_a>)7I38_1smnYEMPrg%NI zoONQVUwB)+nS@pKGfqK|xe%?(d727m_HUzY1eR_eQ`5C9U9EqTrH(v=i}o|TVx&T+ z7&h;-QFs406^H-nZvMO1zTWTC6Ve`+;W4me#|#{Temeo0b4RAIo=o*-vHFzmBab^g zGIWf>dm2c(_!*|;6xDax+8LMBUxp0fuBB0-w+r7u{p9U zZ80qi@A0FYDJi9FnQjrxThfyu z*PdV8ok2dSO?KNV`{e0w$q=ehUPv^PXF1?WoWhhR*xEDt=ZT!?eJ>$Bz@UCJ!JCr}Hah<})Xy2H= zH`j#f;(~>wMCK+7mG0l2_WvK=Gr4%zf+^4bB9fC~*c)y-Q^!6^Xre@S_SR^a@7*ce z@-X<7U9KJem2to$K+{8Ck5g-z6i|1`<;wCY<`n*AlfMN#(xZeE+-inAjO)Mh7%Ig` zW@AW6fDDj*{~r^b=VZsB9k=$?VGSWxZH6>u$~`tSVFJH6d>6%}YQ#rzmfIiew;p9T zGI5{IvqR^X`R$k4XipgIG&Uw65hgyCq6{P?;YD=^r@Tk!8S(NZnjba<^}7kuvN_7p zC9b74+e6*(o!W9ym^VZS^vF;1wyvXG^Orm}G$mTY7rtf2mX3T?EE_@o8?7beLCye6}LBOM)&h)EEtToT@>0FrZQue=8p8wq}`v2lR z`ac3H_cT*>HbDMR8->AdTEf6N)IW|XDf+#nnN+b1(B|+`&zV7DC^I?ByN<}UN+KXh zwKc~c|CO(;O=C$JD}U(IpBYy}!urPoA?`FW)te;A-Ep1r3@7VMUlFm(GUe&=>ntta z9OQf1E?9+I$cq*nrFdGkC!g2H72S>y^iy3kfsjGn7oLA7s>7v|-o33x6Dx?ZB_c02vJIM-N-`Cjq#4c^Dq*?J& z0R$021ljyo$uBm~ocfTwjQ63O_~vfN;K>w_FSb8*cnd2&xgy4Hab$D(56AV|cl?&)R=oT6=c$ZA`4J-lXUyXaIyh z_!$r?2El#iL#9sxqNy)yfC~RCddc@Z$QuDl=(f=Wc-nymA^%4s5T!m+00Lf2(L(jo z!llfQ)XNyc7wo(3w}g<}QLwy(v;JwOhcGO#^@Xo1QV{I7tbpel!bFc2++R>bxbUMY z$R%D~K)`X0eWHf|19 z^r%GP-I`pUkpL0jm$B#1G)n*q(w+Ki<@b0l&_`52^ei+_ALrI__Ql!D=voQRfe+T}HjwO|jW?XUwtE8Zvll{^;!L*eBlDwk`OE=K9BmvQ60v zwhs>8hPITFgGuDFy5hqjnf`Q>AbK|+S1Z2nTh+>TWH>{6zJ7J#WzeP{r~B^#YBvhS zmf2@QqHx*m2Zpzi?VDg!eGbIkdJ7>0xo%u=j{1E8d%!sP8*i2**%IS z#cO(aHOe=Ld}r3EPP%OEovgD@XF^>vF>-U{8_4$&!oqk*dmQbSVhs~gEQlWSXS<9Z&&E(>Ig57-S8qoF_V;aK$vEv+^TW?x8IsQ z#i)-Ej;_<5lg4>s%nd-5$vU?F*m;jy9^_J6Q_DzE$tEw5)-p~~);9@@hf8PfT6sA@ znB(zL6uG02KEFFZQw*2%A#}Io=tose;Pf1qC;+f3lj;2Ona#7X;D3kVRV^RNH9g$r zxSRZ?#rb+rSh79s-q|-3S@4<*22G}cRx=LF3Z&e(Mw zHyZ)VObNr~6m_?^ZtA2D+~JSP%=f;xr@a|}LFYS$b=s|&m*YmrIo_m#epia%t`X#t z_5BMu*i^hvn_nTtvJ5+PL8ZeFpY%F|_k#DFoBt$a)+F*~sKI8Gi=G*)BBrVakr`HySIeRc! zBIbLxw-kP=c|fPAece?Ml>2;2;$>9B^vv6^pr&j|xdJ(W}cL&xKocDh+O zSuTw`I%kTT==+aVFasVEZ}b>`$+n+ih{KAv?*#-j@~>Y;?63^g&;IB(9`$M8DW!fS z_rs$DnCD2>FipPVs!pZq*woP=FZ-S#N|>zMy1Wx@wYxuFl@2pa>-P13Oojk9U9zuh z{6AAsY=Lt>99;eQ%!4r ze5`Gf_4I}P$^SArxH6r_UDJSfG3ztTC)wt|3|vd>Q}w`C7JWf3@=K=C8WI855Ju)l zauQrb6B5F)I^Wr7jqcZ(AJ6}SGHRQaHKQ!n=g_Hv;{mMMWT5Nu_3e* zvR2tBT)|=Uv}f{wr6Ia2q@{YA85sCZh-e%nAOV@p8S>5q)}bkFD~j{_I*F#VaKIy1 z9$`4vUgo=#S8&qAfkJ0vXxp39y(PTSn(!UAeukXR2lT|47V5QpxYVu`en~%#94_TY z*v?jI9qXO96wX*0M3Zo0Io_Vl?Lj=MAOzZ*@O1~<^R%n$t%B*q5k#s@Ik9y-&sr^P zPVBM$J5Lc~>1|wxi2S-ts@`cM5AGh$LbEFDvVg}00m8CVC>Y@N+Gg%coLx}Y0)trA@H#_>WFu1n@uzuA16D`$3=huDx@tMxO8JEy&6h$;wBCRcLIa8MZoJB3a5(o!(m%CAzx`mlInCh{ z%Nd3zlKAeb;fojC$e(091iDvcD^gQ7oW)2dE~)*@jQYnzuLJRqd3L27Jh2{mjo_XR z(36`?>uWgMx)tzp=KN-NqPC26v{DFGDy3>yJTs@cPPi}A%d>MMt3?;S_ zBKfZ6BQ_z_oddGECd;exIpqf6ZtpXG_xXI$y-J_Nt*cy|8^P{rDQ@C5a@gIN#Q25P zPxT}8-?~u#t1$QfxLx+KXPGMT&$b|;Xox-ML(I9LidnZZ zmT%ALzxRE?SwzSn`)>l=xIssz zmhY@v74}+&n)@Ec+b@-8)t>%a&qB}lbOa2;tjJ^t(r>*mf$g8*J+bQf>p7p+DRZ`m zp#;6j4`J@|YP@&}J}(N~95HTC3BVC+2=EAHor)?YFNu{MR6HSkGDZspd>`#uH;Ihg zATdbCgqaEByL3rRx5?^9mV&=plRt>Fe6!`ihB5W|iQ;{fw6sjRHy6#2fuK7?nt zauVsBdbUBg`MwM1-gEv;D{sIG@r7*=F5X<*xuAMR;P$E~qwd1ZdjO(W^UB=Z|xy%$N_ zm9=v0k6n1w0VBkUdB*$8oVa{=2?A1UE9mT6$qJbTU*1h%zWNv=U}9oRv2}2uuwJ>R z-KH9yKgbiv{Bhrs#c8N{hk|WjR}j&2q^lgO_Npxcdk5g76zDE${Roo|Wa_dd!r?|u zT;16m(Mu>T(7pAS8-0|%shS_Mw@0Zb@ZEe>yUAmOvB3+W0{>29+2Tl^m%Wv~FV*e~dkRq549%MHT-R|S`sv4rrz43+-9k_(h)_MBD zc`jbMz3r5vKlAe}l{A?grtjn9n%j+QIzER#MD->dcTP(#D<~BiTm}k@K5e|WC@F2M zM&gYbd8`r2l*J&Ba%CKOtbJDb6mRhT0w|^iPdsb_DxZEIfH-6|?Dun!^awJye5V2i zwi%wOJFv6-G*Sy=BhX!5T*F_`LFRbUONuHsf^`L5pb@Wsij6U=WbhWb9-*aFa${EK zJiPD-1^U36=R@9PFW}743iDsNQ;8jm>wgdruXc+7(0lT7bUC)~oF58CfLw#)xi7x` z4^{!WIU)WCUWFCQH%I>?%(oDv$oLB&i49kgR6!K;0YZ2@EyF>3M)ijY$YCfq0^n=v z5BQok+^76+v_hmL5KEmlRdoSk!RVMDM_tqWqr)kgpDGUP^G%-E^_tpqDa&#lh{jLDr6r?~?ovf1bU&CoZwY zV=R#>KVd@e=Ra+Z0*4m)5EtHlwO!_$3OKY4B#;QUu=xSB5_Wu+;0P<&Gn24TsfOw^ z__Pl~7+SE6=AY)d2JDm{o9Uf<-ABISh3{Csv_qFI2m5bNE%UDstq@z;{>q+EmQ!Qp zR`Mx{BO`ho$JwYTtD9b)9v|nomNx#{3&J1|fiJHBxfROzcE`RbdDb9JO`&ne3y$vD zqQc<-0al9$RiS>;rk(3zEFl2ENk_lC{ms4>PO#~?%fSx+`uVZZ1Fr;?#0!>~E|ae! zoiY8JZsp}3*MCMH_+GZxkJp+{l7}Am?(fg#OQ)7?Njbm`#`tHX)6h0Q-?i)&TcYkD z&@*F9*3#zCpq*x&^6I8cv8!ow#+_Tsd%%-J_O-UIj>V1I)`&1eYL3RNf23-XdseT) zi)D&4+nV{O;iP%?y?fcB0cV^r{^|z&7-gY}ei3zsS349$O7t1uSE#m@E;3Bw9-h6k z!UN@tgD8-K02$_+GPN0&ixP8b3ZE*Vf;fD;j~nEh))ab8R2Lj-1hbYfEG|- zj^$}}m)3CowDLYdaE*5J!R%j<>zinwKdPFbJ{=Qe`0MA1DL(GN(vN;T3G`FXE_2Oz z(InBINgXq(&`x6@_#BWtMSA~Gd~Merv_LQQONcI2Q~%A7w{QNw+PuwRn0xL*GehB4 zASroEK%@+wikC~4_3%+Vd~V~@EU)p?h2mYkf7CL-GO9w?S`%PipL1RMs9#;@-IGC| zLy~@7O63}gjY0Obt;)Noab$+--)A-ZfE$jTjH@^Lnwtq=CrtRX-*JkRKf_4Gb2@&B z{4ZpGd}{LdH!n9}r2t-D-rzv%Vh=1^V=HvXU88`b1|#J;W1(_kOr<;5uoA8(QGQcy&&9=H~_V6Oq9=q4e8l}wh3k>q`$ zOjyyz&r_a>vW%NwNmUlO)m+y#5k5n-UgpD}Ptjho1t@ZsX5WTeHKk95&#S`Hoh^x+ zN^m9Lw(2{mz2qf=zB{1?Uz_v$1t zlq9K?4^asF)RIY5G9^nszlGEUq7D1c2UhLE%Rsx9i$NrPo)V5+8GNumDfEhj8g&7V zWDCS|P_|y@1)EvISHJj8ko{`si(e`YimI?7@xDxkANv>6h<{(n`OnIZhSyE#r63}E zk(JtMy~T~`!fsBSPSPXQ{YeO3&uS^MK3M~7Y!WUM;~2Nt7bxB>@6YEY#+bfa&UeMG zZ82szXHlXLf{jlA-KK1aUU;Psl)hO4y zSp5ZYYvx^ELv}*LD-2R`k$6G`q4OLiO1;>kEC`?Z0c4$Gm*ngV!X$j^D&)z2F|OkX zebn$F>Km#2>C{*`Z!9!nXZ%Cq7R#4QVKq2W!sHoM^@Z1k@a*i8rOAZJCd91)|EvZ7 z7+V`L7@mjr4T9Mk_jks>ADcV8F@(DVU;Qvg4K{={>G8lQ6yGz*SvdkJG7%L0fnqsh zzS8~&hN`B9`J3|lUfA6w$>fKNZY_<3p|3F_OK4KigCm7Owjc`I8Xxj6HzR=CsjH=C z{HE*%ifCVA+viBwWuV7ZUm$afAkh287PzMlDv4N>f7kOT*`dP#qlz=EUjz&NI)s`! zZOR_c5?3oTxaLC?`iM^@=V6Q}@t;BuOwSi8>{T&LM)-uHqEE+2Ac)wcz}u|ly^94Y z3pLAZpm{j|;%+7!{fR5i`3*n3^ds%KhwQaX*2;RT*@s}o?^S}W{+_%qOl9i&DKUAD zWU_xc!6?QQN)6ZgXMWuzT}L;j+Y zxZId;VTlRm;^wYksDx)zv8Ycl=pxj_!{*t~EB;}NEOSI&-!$VI7RJ*>k9sCD^CIo5 z9^P1zeG&GE&lK;ayyPWUvZGsF6OnagC!hep-nVvC`YCHya`~^Ag2695gD)+0^lN`}X9`$1eqO*GW>XQS;)%FYa(0=(679 zR3^^nAQU@CZa3Qsi^hpflKO)B&=TkkYZ_(d&>;t8eT$SEv^R>Bw!GBLYNvuNH#3Y@ zV^TOdPE4G|g)jGxF&tE02x7ApAzhN>b7%&P_D@lq7uQK&4tUwP zDvV}%w6=1jotjOZ?p7(@G>#aZ_?q&Y&7uPjZ|^!Bd8?i+733&YSNHLkCu&P#g-EUq z@|Mg9{>UKjNU4!U+nXwXvC4`W#v}V zmd<(GKxV{Jv`WHb_Y&=;qpt3C0KL^o(H=L3c(Dw!cBkDMH-;z@WqLu`H%tYYr2&M| zPg?fhN=sapnC7(`^$z6IkM*ue$r+9{n&3tT0d{@dmq_Q|1dAM%DNu@Rv(@i^pLCY0mV}pgfr%OQe3F)bo4(-R1o_@$jJ77g|WO_&DbkM??h$sqq%= za5+HW2EE7*2MLc&u6=Ti0a_P)+-6PD2429|qVI@2&4=jvIdS#80YIGXrR?}CtsTT> zaR7M(3+}hYeP^0e7^;vr2yl-f~I-gQyqid#=dP&a7OtNppu zenZwMX?UgNL~jN>aDU2CvNQxFyrUCiFWUlFQczGVP#lP$|7ftk{ZOljh5(6lsf(Ob z5Uz8Xv@F%|>Q5U$H|#h4<7cC8oWG*A&*iwZcf+!z)z20uwl)@2Nkjmul)&DR_bj-* zx18=gw&m`09KMFRR@*jCCci=hsvlcvWM2q_hG%(5l{m_5tTVW4>n3x@Ip{TOmt1+g z`NiOWop9uDh5LG5zN~6rdpjv0L6h0n6so(xgfwpBE@2kV&-!OC3gN`i>#=sy!iy_2 z%W^2+V!C2(p>GH&2?SM9EF3ttx?iQM)KAK_;GbL^lOU((x3IBY3mjjj!N4`aIU{`^ z;rn?NIqBynDq#2{_*Ce*{E29A#rJ{c5^I|7lkI;%*L~YNzdje5b-Spy=vpo#1i?2E z)I$|kNZ~TiUGfM6S_+~6acTT0^loQ-lJf+v(Ob)D2m+a%Dkx6xk%}&8fF$D zr}J3PRCOX($tSD&Zh29T5wx}hoE-O3>*@0zzfVQ@b$z5h1DjbI6UdV5Iin_Lx`3*I z@Keiak88oQgk4FMye(4LV4U?!xG*xueu!%lXQ8>dxm9dyYYmIoIliWGICIKQaUEaW zz6Exqx%j7=hOm#*C%>1=OL|qt=jUccVv3G2y7|nBl58*)4Fibi=d}0k>r!(UZWVh* zXBT_;W$c>H5c}yn5!>eKnVd5eJWlldP1krBnB}Juwz~b^JC&BajiJQRGaRxZ;vG2D z2RSrLNH7Q3mvg=*{-|<>x~~s}n<#2Hr(I~%y(fthF}-5p>6?+=N;tenZ{>9fwJR#? zYFewMFF)jk&YypZIhy#Rg6qT%58AtU8*WyWm7IOcz50s0K5ZlLiZlP-w zYR+5ka8+5EDo7-OpDXQQn8a$58^4-vDlSSqAh&NdGEm!-ub7GSxspAxLfi=%!vnbz z7Dc}6$0U1R+g&w3SFUr#nIEaVZW7ylTT)4UgYG$H))di(vX-ps-T4xF^SE;O1_Rf5 z#%7S-f`^cSdNEDMRTesTbycR6e~)_-LBLt-12pbkdTVz~>F~HXhN_yRy7+d*mUcqF zfJ=3uhEP)JvJ^wg)&BhMneK?CeYsxQT}x*b7L|%3Xw_TY6Fq>@c#7HHE>}A>nHgF` z?xv<%2ccr^$KYofb0g=;%ra1(?dxuL^$Cw3gx@MBJA<>v z^9)V}8+gx6O-D}ft3H&8rNK48Yyb4`yf@8Fv8ck-?)q6%7FwP#{P~z@^JWzuRH{9j z+fZkbm7%Sf?$&vR$cX!(VK9_gzya5Lt8lOTp|9!pa9tA4$F_7U%D^?QmKOpY<=J~( zXy{6gx_v_xtS`j!H}#Hvt#L(954~)Ut}gE*u5%onFw7d8&wrW99Vt<~*UhRFWRp42 zHvL}V1h^_#czdV7J?C=mBQi>WO%_QX{Az_m3&0rU)f3$?LwU%Uh>E4NH(L!ENH2?QXvarj+iY-M!QE&$R zpIm~d&gAi(k=Tn)Hog!eAG<0Q5ikNA6oq#86BmB^aGH(R6PB`E^#87BzCtJm0(s?{ zG3MnM@Kk_W!&J?K>F=Ja8sn*4_|HsZI#V-I0`yi3<};UD&|}YinMZoMoAt~a0w=++(CRJvomq^h?Hw=pvMhGy zmmX0aGOl!OlKs+y2)i-tfvVp2Xss?>$`-|;myf{;%TMgHzz$#!Pi)mkAQh-GNJS*? z(T#ndWBc?f2j~jn9O&G}^)TDNMNe5#K$Kct#JHcy8xtW4^grF{(&ET)aeM9RBF!i` zccB@4;@|6grt1L$fjWo)sLKDDr2MbfF3JN-J8GUwa0lG;y!(2TOz?b7dxk9%LFHsJ5282WpKhm5VZ+Osw ze8dE@gvC5mS!eJxYjk)MKam%(MR=BPsZja z5IG>M(anQ8z9rt-j#U7nrg9A-j>^`lM|xzwVr5 zo=|RE1}osM@*($`>R$pVx?^Z*-lp_*1yEZT&%SH8_=Ngh&Q#`)^99=HnPd-h*>NB0 z^-^+T_g{c9VAIlYAfc}oszlkiCKu5TB?~3OO?;%+xJOP$20`k^%OLFID6HuH#Cqr7 zfZ<3w3;kCGuX8VzHvhW(TJjog5zwEFE#i2u6!CiN_qm5)HeolRgl&DyQnFL2F4qYk z_7`=D9kr=LCZRGy234$gYzl0O_$4FYTR)&0bmo`job4g|AUd9_iFsu@McxqU zpuT1FLy{k`(jPF?r?&|A7r z?YUkv^^*>95y2SAq$vGYijNPueGI;SZSizv+&&H?J7dRTNjb*3dzs2(v#D5F`*jgtBMn%kM;`*9+>pH|@JQP`Y@VNB)V_Z$9-@50EUjm9f~>ScdT=- zWbOO$$Nk5RBQ#;8=}cgf>IVbU_5Sh2mOCJeuuNi>R23zv5TU9hyI~^(E9HMuY~`Yq z)a`=_j_y#yv>+dk#Q9~&EAia`jk=&LfHgC=1?(dl@W<7k*@p1<%kad9$ISybmOx0i zU_mi9AMoTE0kRRH%P&g3A21ET3`<+j-Sx+gZUl!7q}CjLEzm|4bj@9hzHSd3ly8QR z^?O*rFJVPJQyZu{2CV)Ns8B>}Fc1`n~{(@n>M>l|1e zd81XJi`&;UOo}G))PkFQx%s0`nCM~3(zcCy7UNo1yo;~I;IVgmS%Z?|DLTl`?`JDo zwd>{7X2rtVU5Q}}6P$N{%KKLub;oV&GajUCCR^}}`rW0+YB02ip?jsZT|Le1blHsuda=u>)3r#U2W?!_`rC_d z<)llLyiHZp05rEtA{6F7c?9PMlqc;%E|9a zkYn<0D$=wr36$X&hu^W62su(cFl)LsGplc`i`@YR)quS>d{UeoTF=}&5w@|d??L!| zaDgn=ZR3{_+%oFA^Z@ZgleI-#q%yEi?g&B9>Iy-j*DTZ1ykNG+)7X)?H)F%asry%V z-z3I#$z#u7&|WoqCuf-gVArCBh65u@d1`y|vDTJ+e0+UF zK+4r=l$`k{L}MjWG_>h=yWP|sbSuT{A=9esEb2OER~_g6EyKq_eACvr7b0A-`gWqm zy&&TL5&SHHKR7FW^Ho+i)hk15{ESRETou}S)@{-m8nbt?IjO#X*xXz|*n(cuy_p*e z>EgbUS*8ix8N_sTp zyW>-4xD#}ow$_KZmOAZpYbB=Vu8i2bbCwqUF6xh&E2@;l$f_r;{5AaTm~>-nPn2e_ z7J@BmU)tzvtuRqDBm?VnMf*8`lqmn@ri8(|Hl01iSu>iXa8`qajZK0N$9ztQupn;oKh{r30w zD1K3%l}Ntnn=qyps6Eta8MpfMU{;*uhI@Qy=a0z1T>X~fam3uF$(askz*Y&2>-5LG z6@lvH_FW{SkFIl3RTPy%rF0e?Y})y5ChxXsNO?+c=tFg^wl9Gh^oYMfhoL0P5tC@s zo*MCUKj=$j4dSuRX+|w1xP|?v)!9M|hVJ{Scg<-PcB>h;yfD{_BF$uX=!_KP9;xUW zM`?CJ)Z7?Zy9dRHXCvIuARGk|o8Okc#u46k<7s10ROAmDUeppo@F)}`k3T*JP&R%R z8prHHkvh>Z#H~WMSg1(k4JN9F`T&C;gaD^nA&c;jl}S+gLhu0K+X$|u$JA!*GR7NO z1W;b2eP~_uX>>Qn(ne*XAM3Y;_W>79N4!CL=V?va>cP6PmN@C)g%2_mX$)4plyRfT z8>VN;Q}3)55%$Kdhf7pX5bo5ZyYQEI!G3ILvt5upHoJPhh2jE6+m!u%>D9Vm6MPNw z&wSNyJAf5;=i?-k(7XC;?w~d3tt5ddY+`ClEX<*x<9il3aR85T)cGjA7z+2hR7c8p z10?dsm5VT>q0VE?UJ$x5xoz?>15{F8S&)?EM-}G zWjYM{tqrL@&c}<$dqjnx(Y?SAN?(7_S>(ACYPe$UceG|RXc7O)rPv4dB(pV zzAj|SI`mi~I?V=o+sjC;TIDpd%^viQFuT3_fLs>u~w$zz}^{kO1%3A zd2k;kL)QMJM$0Di30Kq7_i;zQg^o5|BJe}4vQ*m_k}?~5D)|b>7k^%djO@S zkq+tZ&QXz$p*uuML`v!Ip`@j|8-~vDEPVIA&i?jy_W90to%dXOpFg}17}m_JwdQ&5 z`~KBya-TA+^}9SEXYj1}c=VWaO5Ih(0<_G8Y<|C1W(i3a1fsIR!x~Cv6~dv{!#s-j zOD6Y>z8^>AF~3{#(e>aSku}zSzWk=QnPOF9Q8oSBoc%=LZ~{=2!p66m!Z!Scj*mK8KrM#KoeG+DQKmwc$?{7K&=fy@ zo&&u==Hr&Q)kyDUINB$#_@D;XpVUF~+_ADPxO`QEu0MXn!@_{W0`ZO}upyEwAX9bB zH|H=yi(*hopi`JVMkG)rbMvY`Yj_p`iTCSl`0a5dyl9Nx*F5ZoOs%H&B*oCa{LOlB0%+x_$|k< zp^YyNS6nNoRrN$xuU{P#0T2)j%k9<;(=qjS){f`9C<|O0gR5^vj z7k()}M_a=dptOv2CV;rBXRRvtY)ZE6(i)#$r%O-w%K*O0tX(IY?}Z?E-nbnIx@JRJ zoiQzWwb$}=>V4Sp$RqesPbl9{$=cad#fVrx9%knPsdq58tw@R~Gpifst3Q5ea$7A% zYpii3*DXb0aLw!toTM|*y3Giv)NY+Zf_dw2dd&CQ+B#Vqf00HH!xF9>{r3D4cL^9O_%nKm5B zw5T5)Bd8=@+L$ZH4Jq&5E(RUcAr_2Dsp~ARg@O(`=?g%=l}Sn?k$FMX>l^`oFwG^i z4hqmA>XXOxd-X!43*fymf|=ZxPO~jj07{+u-RdjAKSA7V_GO`kCp$rvPd|f?{w-|w zFF?@w;J!c`SG~1nBxSF1CF>kCcOYihQbzV#(g*HjT+gnj_>_ID?`l~T%9@Y+gnS~|@pX?H zxQZ(txFJQ#>59Y1yFa%DaOT#lhSO}g){bP{Y74n}K8(DT3#76&0@W!RhY5@9oK)h` z7g$HLWju!#tIIF@!(v=C%%tgj1<~EuodMHX;5wkmI){rj%1A!@ktj`}FSbjDwa2E= z&F+4iT4qlc$Er-O5cs9kza6F`6iOLaibh5 z5B^a|n;PvGuEw@MZ1J0Cmkct}Oc&uD1U+BP;!qrt0*zuYwu*cs4UWab{Z{;%X<+hW ztOx8QI{I=(d^vSGm!w#)#m9jBoQv0IS&M5ewm$y*9-e{!^R7*wm;Py8! zG^kPP2vUNd`B%#Ym- zQ=_g*iJQ%!7frs$lR3D}Ks12Q2#ESLYC)&JfMmzFzxi06vZ(kiC~RUWiG!|fcs^-~ zum3?xFvt(2n`}}{Iu6X((5qdV@j%93^``Bi#QeF=HvwRc|x@`>zt(8tC6Da*?D`}4_S!zMdQGzN{Cyb#>A33hzQ zi`^HLRGFNyFhRUoYScB0hP(XbbJwlK#xTV6I`jYYXRD%Kc5eB*lYt!LZ01dNg3&d3HqQozZS|eaDX@BG-id^`WGBsyN)YR@ zmspffOG65bI=!5P4+Ze}-@dqQflW}#`nF&O>te>?!?DduKzbVR6PCvf-`OJx#e`wJXUqes$#gMkPhs(D$-UR;sBf3)bg}QDXWdR9=+aDy9QlZft z+HH?C53>iGm|wF1X%(Esi>j$jm72K8cSH;Ety?(c{Lt6{qyHmvfAaN)_ zE~a910WDF-bL;Xb`U4xzplw>w@lB}>E92_?xFAp}bdgh(3b9?p!@^t-1Tlw=c(?&9 zrO+({$VEyF2@%)wfet9|O5fEOSx)%Nbm`G1 z(I1>`3trUIVs#J45i!jF35--yPkFP0ZokB4UOPVD*uf5LJs@4tQwjgXOrNrn6s@eo zdy)mrPI}VKufFPL>UzI5Y*AJ0_k=p7$C#6UKa#pX!#xsh9fABKD>^0?29Q-qPdAMt z<_L+FI*?yv5@sF(r$&s*IPzuMj6WTnNz}JN{##ezaZ|F~I9k3qZC<&MSU28m0JaVX z(*<(_#mo-W4dGob^6XA)w)khqhi6;FDcAvkqfvrv3f#&~T$HW3hv#-F(a-Jg5r_?r zln|!_Q_Vf=D*WQp#o4+nu zDnoE>D_WYxOW7K7wZWjI?9lfp^hXOEKn8RrAOQg@JTdK`lSm4oc{TcC;tx`e?db-X zP^Q~XEJvxMnG!*;)w=5FJJ_ewW-%mvUC+MRRg{@7CNt2yz?xu$~niYvQJrF;Na$4&{Hol{qO%F#) z3E`cZ;A5b->T=fqyg*mxttq`bsDgwG=j)eugEMGakIlUr=M|iaQgQv6bcvH6b#KO3 zykZ=>WA>fCc~$YcdiWU?4NEwn)tC#KkjX+mr$rfIm&@M_`TUC`G@WfTDNn`a`vGEh z;*j#@ip3G*;C*G#b{L+RxVo&~F5{PcPH=QNJAMDVo1Sb^ zJ3#84odR1Kz)Qk^~{@Ugm;#Q_g-pBqdMeLC7Z0oM)r{O1y0ctE%dmAEb!`I zOa~2Ga*q++>>CrJY#!i{b9o^fdx;YZ@Yy8F33v**c~9Jl&~}mJTGT1uOa*$l+n`rf z1Izx=;|e#6X$*1ur4&sNH;=EI7<}Cd%zJP(B(SOu?YoNDVOMi^%rzMhn{TtFLm573 z^Z3y{t4~l0#gGZD9=xt4yX3%OHzV2)Fo)^-k+w;i{vi_rZ`Y=@)($)VJpv9m=@G2r zY?8IO@&KexAinqo^4tEqys(6+#lMj*gGL(I5^oCz6J};Hm69HzoWopN#UWncG zfZb}VW$DJI7(l~$DQh_4e1LV)et9F^vYW}Ia*uR2U z|EIqSpBE~zI(Ln|*(0|V;NfaRnMhw|Z?c1|D%SHJh1QHN_}CK#7_H;k;NE6!IGMnj zku=~_p?3l+Z8zO9#R9?SLUWkz$0UwRKdNh4nELjS&-wHvClN1M=9cV?C_e5fGj>{h zl`9AM)mXeL?7ta`RlBxggthfoa?o^lUPKq~J|8IA8^f0Wy6|4dPo|mH*y8S&hvrj$ zsA$IO%O(ue#TRhj4}DQUEY94nxW@HwAyM5mF-N6rvg1!RkEKN%&udOz(=Jkd&^@hb6=5)V_Rr zEsgn242c{wJ;D?)8Ts&f(-j3T(Rab`LzFM+0p9I){d6thOz7GA$IUTfy)jE7l;JDrEeO!MAKFR@ha#ENg< zAV|t&SE>t|D8(k!Cnx)V>nj>Ws^I?0f8;;jCoj(qnFT2vG=Y|XF?3W^R)!Zgr=%S@ zlKGEiuUGDzfnrp6kq76l6!qYVl}2|GnflYk_|mzvaef(d2x|mQy`xE70m|Z=9KP5m*Q`Qk;YI)C?iy6D(J5U zCwSbz_T?%7I&W~8hypWsFZ9U?J*xBMG^=%ZH#3-Hxj0&~4%DVER(~+Vn#xxnIxI)-J4p zBDOo)euvy2&YUXCzS&vSp!hdTz}LDW5z8 zN|&X^Cov^jC>3r}l8?C?W@t2Rv-#CRB_5mRPoELdY4WM?jI$=?! zXF!%&m+qvQu0mI0T#ET2dy`HGn)kTjMqIVWR-v;eO7Vuh{*B(cmCaDMec_yeV!{n0 zg2vy`2NMkjJciymYRHF2`I0SkVuXY(I^#t{_s;w!?^(#YCTka&;GMR5p^;}-_REn2 zRY|N2o}pfJ`O=(ZHLG3680|d^3oM=DPaa1LH%)=0@Q)?GZe=~W3<{JI7 zs)Qzln9lt9FzsnM&zQY9zD>lJMK-z*y|-FBvK#c$9?3>r$4XH&WMWRQs>0k~rwb+q zlB=TYOS=WZ0`XY25%^gcoXf=%G^l}O-^}6A5aUDdNExSsGub~#mY<)_vb^+H$`wN9 zE9D9oByNn_IxkV%P4{ozEiW~`p8=G~dPw!se01`U)sjLhe+mD`WRU;U`{@75@rS3X zvqU7uHYYqH>ke2~LA>#xUL+y93lzHF1L0q4YUBOF@fl6kk7#QMHL>GW-l6$yh1zex zV@jM>OZE6q#1WAejMok(2hMvV{Xh)~@sK-8w=qzfP8KukUSu+Lx3g~I%U=WAPne#T zE%(HNzb>|E0Kj`eyYih4B#avtM6DF}*?H8C z(ip?@lF)g$Lqpu3_6JFgeo#r5B~n`n2&J%rtbj&5QxNTA6)-%G>9qmBvmHHI%b`C= zReo~A&+oWpv z{*^uUz#+0(-VmI00_N^2{UZH~Rj2hhYe9e`u&49`#aNaZ>nBr_z-LE{SxRUUSplRH z_TfeYyh3^mH-vRINV2=&lQEtI3=@8B^so4pfFY!|sC8ca_zx0iRFLyp!h}qf>*q~N z&f}a=EL3sODA~KgGJDgu3vx}!arl*aF#rp>ziJJOot92G4Ah%7qa4J(&%zOV0|?lE ziEJ(UfUI3kDZlVNoDR;p_C+Gg&MPNL5uxN)McvS%g*~;}k9Uk3F2~vL^ty*qucoR4O$5o}_5Ex>95TuT+>nZ; zl9=h_!~K|-+x?nKXNU#1=mlW)+}jd#cRSkuK-riUdZ2IUXMWGYX(AdP4&gN+H@k#h zSdiafmNRs6pfxOCS+*MvnMq^kBQI0{&WHQl1**aHF9jpS4+B(cfgT`}Fe)lyChZ?a zyO11Tp@(ipAD!oKha8%)q8)TMcG2LTEW^<%RysdQeuD%870?AfhZ9A-z2jRwkR0H$ zsGNM?6rD2C+P^LS=|vlolZOd_Bz3Zh{G9FuZ(tId~hcyhT@^IVvyZ4ah z<<~@e0}HqMFA>ZN|I=Rfe`TrqCqG88H3F@C-#rKC^|bK8iQW*3gVRr%s-J7SaljJ( zVM@N*B&_2T^Vh+$JmNn{=8$ZboGFLysjefeL%M(rl?g|J8~#1((`&ikqYTNghr6044E7I%4Wsj51cSD;KNpU2$%_XDR$Z1ukHOCyX(f(B!@hoRYm|D z6jORIC7B6Vwt!r32cTCY5m}+NI9=NTFW`RkNLfAjbqs$yX#S1Ig9}JG=ff@BYdAf} z8+l4e10Bo5Y8|v8yEIEPzFpdfn#fSv$eUHIZ35G0t(KsriSb&dx2~=A8ew@r>fvZD z@F~y>`Rnyz&AvHbrhQr3rIxcoSs2$QZT)-rvM{A}d{049Mulp=ms`agw4zoxk{d=( z6(|iDhv+@SL5+@jzC{bru10sQJFXo`!hsk4uTCne$Ur<+XIz>&s14k}?Xf3n8T+woKMPq`RM~wG3f>fO!bQ_g|#Dj}*&}0L6LGmF%T7S>jjM zx(|ZU=x|anuqgb)Xgqk{^#^w#yzar}U+u0yr*eTZ(DI1IH-NFv(v9}rSsY;2e~9V5 zW*^nM-~FuU-@ajvA!071dBMK|ORIs3GWk!Sw8s9$#G>e3V?ZvvgXk?Fk@~K&MQhIm zaraFJUiH1tUt@Hsm9DJlff$jnsXK&wbhX+SEgHH_?L*i&;Kt8o$!kCO2MO5PS14Oo z-WZ_CVE&z`Zr%3$cSwu6)7yetA}xd#@UsW@Xz_`8hGbXyT8(6}4O{*#-;~RAmHR~X z!=4#T$WLqlfIIs)S6ZXhTM%U9auV}}uMFJNxi7r+$)m)a?ZQFH{-yu@)uCkc&t24P zwVP6pu7Go^%WU9C+Dve{A6Js*k~MEzD{e(03p6L>&yVCwA7>YNNQ3lPo&#d$|BSZj zKYUJ<#ECThRmYeZ?$+trb*LF%(UsPo6MK>4OOvqJvnYu7!z^^Gnw|x&EyLUxKI-(I zq(>#o&JfE&Ao?mcdR!3*b_5pyi$mDl+c&oIQJ{w)&fC13@%QfU9*?`Ok-I+#%H>)c zr{~1oPC6ej(aZCKo`r4Drsyw@qKXnK8K%lud@7B<^T|7FvcdQ7MK3_i7vTXm^+O#c z2iO6vi@d9VN>P8&22ucexHJACJ)~$&7qL zS*fk3G%j<1`bgr#TSHH1@{EuvBx)+#JEw=gZ%xO6^sb}98k?0m_Q?@_tO=gkJl1=h zqc?HG=#(F5NibQ>*2!u9AoyY+-?aD~CAZ?a4`-6@@F0uCLO1vfg5|pI6z{ zzZDe~rjDtyt;Vxdk%Y#+a+@xoBCYS935 z`-#xJ(86%#7VCKcmIj=@wLryhmE<#NKxy?HeTK)&0S6zFWR>|t)mLaQ-_rrMIjEik zCAzyU)Z-ZvaHTYzSh6g7D=oj&u!T(@f4TDhSQTQ?h@tw)Y8EAq?kUyzeh#YNM2xz_ zscdWFs>~?n$fuNIzjWZ1KdlPAGdi>{NkSI#>J+W@__a;dq`tm!ocCkr(Ix$G+C;lX zLgO&1_CJMBWT3CXguqr=eIQd%1UJg4X3YOmgNetWX)%57(a*xkh?@&4jKk5SM7 zRjlTe<^3qw_f%kL-y2gnr@##(ED*HI8EyO~q(eZlu_QxL*J-UeWpJQ8Cw0;@?qhTq zLHWW~kcx@kW_jn_y_NQ;mZA!Yx8sQ$B8jiA%CDH(2FH`4_7zgV2&Zs>-FT}vjzRgX zZlEQ|^JO{x1Rw4*8*#S!i6aj-W)O#Bt)*F>f);VQHdz|lkhasy7&(+$!rc)xYk{VS zW8AcLikD7Kwl7k_)O*!x=I{Iqz>Ug>ON)FAzwj2P{n7-nW*|2cM01EZL6HWTECAy% z$`fLvzvwjMu0sG0Au2BRR9Mxu_y{inWku?@oo3H^d>Ik& zJlh{tCeXwp?p{dPjVkg90eV3d#2w;4nufK$th{y%_%Fq!CTqjQsG;Ncc~ajJ0}gIK zyRke(r35e{8D{I%Jo%u~+q~{%e0pA{z52RPt^xT)Gbi{1}C@5T`&y1G5mu$%SkVBRNQ$cas_~ zPCw=cxV)l3Lc`8s@H-dj!~TuZ`&LV9I%jVlNIkK83xw^SwWObrDSEG4JmDLOdP0x# z<);}PX7Z-B16Az2fh;!5seysQ3PB*lhkF+0scur7!UPHd-EknK$#96OA-V(kP9^B% z>t#XRg*3ylV+z~+SciwcS6xye*WB)<^JhyTtJ(Nq)Er5TS1SSh#G4UJ=S2}F8#VWq z2eYp-@ACUS&u#~Tjw0Gd0~xP8Z#5?O56=T=hOn3>c^e#LcrL1$an^?f`(UVNWs?_( zai!{8!-LEOR;#4UW}}wAgO$Q49!>Fbx&k!$v(R596cMh=?p&-td-sZI8m;jGu)tH zf*;;T5c{#R>dhbq-T45vGgs}&$VAnxn4F&2gt)m@!jsYzhBOH(aba@vHKDB>g%B|> z>GWfp5!)JWr3yI{#YFCYj3KF@c(GkD-X7<+uYE)G>YF|iX^UVkhI5| zcw?8A7L0daq-S+c&@8UnwX&jxnO`f=zBcfO(R~cqGgGRnsT@C8seMJ1WBpN(){-q4 z1tnfye5RnTp{ll;dqL%CO%5J%K~e_`zB4jIOc9(<7J)BDxw68wo9>rJ&wl~OO1xjs z`*yx7CLHmKcP48?ix$74KL}7ci=U0w+QvQ!11%K!=uZG;j;|o|+;nRDAQa$(QBIub z8c^gN;Bc1A@)bQaE~^|s%ifCA>16%;CyyH~bcOO`YB%Q7yn71V{?;Y%io%>!m4b2` z#QT2wMjdd!Mdl)#U-wgS&6tVqyl6ta&1ZIctxJ#)$9~H-(nrjft+wj8C^L~ zgP}WF&?Wly-WwpmMWzfOHH+M%u{~rHAuuk0?(qs83vMS@@%yZlpQY4Tc|3V+Uj=_x z(g>^mQO-gGkzprlHZEb(@sU<_{nWYAV+W3o%@qgKpGMokM)N(e#{z~;0=IPOXHPX` z$347hBelov%O~6j1e{sFSGKJs6}}X*3eM2uy8TCu*=Y~9B-Sg zIguf;5Z{&II#B4W-^@lE->9Z!Zq+_soi&`}NHNP@AjykRjUt&QL{@y-8dt`Ni}C6h zf81o;xEt4oOroo}Nk1z310>25X78t6-??%xAUEdZa0cvr9r|@Mp866MWpY$j)(6?u zk;}%9XRKQ&fayT>KJQ+%wo0uW(FQfne&=mjKN)3~n2p|A{n*YEUOk5l!!jqmm)E$e zb26(C(joZB%yR29ndp>T>|d;*wY}b@OIe_=0^Ha@Q11h2NV_fuE0y$#nQo~8Ay==! zlb=)0V$xdc``$g(xjQu=+&kQ_x-*pMf$m&v=IMk6h>BNcsj~`o-#N9F$SbB)0 zqPS39%bF8Dr})flk(v#VQh>oumQ6nWK6(6;FiQ3~gq}WAhJk|Ep`Drx>%zb+|6LA?G$Oo5v zn#F=;IFURzMOQTckKVQE}C)TVpg$DQ0+*u??FphWeE766P@Sh0YX#1mx z2(kgQmE<3|E1(6!Zhvj{8}>+Mq{{A6oz%RY#>;-5n~7Fn{cenQ0@m{1P$>fkVq)TD zGlHtftUONvvrPAb?@As2yQaqfusM08(5s^<;XZREfh& zp0M%8+TwgN%y@Y&lw`+~Y7skmTJr;Y|9xD%@=Mv);zDYqr_BZSzzA1w?dpLZEuU;4 zx09B!5dl7go=%KCYShwFr!k_5!qK@|yVel5@|ths2YcTq0=)1gU^6a7|2ty}<9zkP zz6Zw~a+D)^puWyn;R>}%YVb(zUXoHw|Ykzd738DY$ z*pLhHEo^GqPqRCfGcJX@7EczKuR%(wS*p!h!nDKkP>v{JcVOYcF^|TlmSo}X1Jh+q z^?Fa%qt;*d`$!^u@*u$Js(Sh`IiF}*M;fS$%j^XZPVu1GY-FenCiV~)>4n;!ZUbd? z!5)!{U!fsj4NDM>aWWL>AsU1TxY0}}sx~Rr&1fb)Av@xlzwgcni%#&N57T~A@Av2_3Qp3$_?lZri~BEZ7wxY)DeC1I<7C%624`!>@)Th}aS zFWIHt;23CCJh32bj-{xI3!?X?ux;IR7F^f5BwjUTDj?5+<4px>0C?BM`6AJAHB78t zA`fDpZL!uYd1B6$RLA;+V1tJdZ^pBe2^H@6Er9R{EQ|fgQrez6J^*|DKG7F5P|C2U zw_Q@TJH+*9;g{U!E&T;rZSb)bb&is&%^n7KtQvz|Jkh~po|*evaGGou@nz4giy}`*Z!^~ z@hZNti5D--VKzBy3v7bOK<{T(khO1hcWaDksj7-wGu|G8&t_c z(0TTuq@e#mX6ClS1>5WjT7G@=kn4^=msA7g3!#Mq>JSOlSA~D6k3gHQ{(deEe8lWO zkF4BH5C)qzzUDWO?8K;z8O(CfNjDet3J;Np^qek!D?i^E7pU6c1<`opWo!xYD)1cR zf+v*sqalDx<uyX~;{+IxncgW$8qX7U0>(On>&qH&UoR2I)G}&?G#h2eE z`L#S%`!4LfOAiLp$YqIDxQ4Z;Jm|kOA6gCg=cUmBo3%zQHtYAIcdNv=340RHbam%! z;BhuKXpM4%1K3=jRQ2QKD2CN-S@abR`4a9pH*;*`#y< zHd`4WVUzWp%-0QZvo5vEn;dxV!vud`6j)yn#I7Z>QOFk|#BUEs$Izsr8e5%1xrGos z^N^eZ=HmH(3{>Cv;huK$r4p(WV5Hou1D~;~fM&)A*kbTuj(_sl6ym++h~C>ePuZ$x z&y0C}>Vkj&`#e9|e|4C;i`lj7vRLoUL0sZiJ{Lp-;ayboaY+mNSEC#55@3#4WryJH zP4YZ)G(%^`)x|9z3s$`ys8IzbzJIkXEFyCKQXhYHFxdO%IBm(s`h*zCJA)_jGJypU zfdbF%w?7$qj``P-`_9JTyX|O6j&44%WFJBlq_wmRzo7!d0{|pdqK-ge0&vOAhttuL z$fy)a>t6%E?*>Y9@K~^>1ub}c+jSqZk>_>C=w;;*(8~1(=`G0tV)U<3JIcehAdyD; z=9u2(>UHayK~a}qaH!PyZy7TOGv(+PO{EugsfY->VnNKEx&vU+QG7<&cBkOxwW{*S z24$lzLrZZZj52O6x(hVyb%!!Nawbu2RfN54{%%y7@I&3d7rOi>e~;XE_h$UCtVx6d zkwT_aY<p99q)o(U*b%i){pt;x4DoiMzv*RafNKWCNm-cyLvy@;R~Ow zVHxIo%A#>>asK-8NIxbj`ywZAq0A6AtXou zQeh)65*HD83@0#5^vyXvP8oFatvNuP+0>gCIWU!Oz1I( zK*7V*CDQQzQLk)oGl2dbDR}x=NOu1SxH#Mzzq@A&BN>g;fu4B?zW$4}h5{IV0`>|2 zX6M>vjIa#2L2M_`GtRqfP+a-|A23Ch(y&3HS-@o|j%zKY4%;sPAXbTk2c!_fW$_AE zEX44yCmoZ_#ybUof`OTprw4sf9f>~@a-GpsiXcE@RQ=j6PjV0BbwHYIcJou90FOVR z=nfSSBU}%DG7rdwiFGL|IfRJ1U04WSCbgD>vygCBl5{u@L-Az5Q9!D;%(AW*u26`1 z!Vg>p<4pPK`MNtAy=f>E+ikEuAEDngT-gkXq~Uh@`kk0fXXK zzoD1WK1&}Iy&G5Q3s$ohEz?YMOe-NPag7ZK?7NsR(uaaS-Diip2>B=R46PT*5*@M; z?p*VRGLVlL(-vx>y5!hF9dT2wqh|w3pf|PSXWLq2iXJnhI4*}OuFCQZ@$!6`8>6IB zV!Eyoj9*#*+4JIm#__hAUM;ucUVYqPuND*LZ+T87?@k6Q@ffr=oI!DE*Y@Qmlo4GOf}sXw~~A1*kJ z3+3=+V1AroH-)$=+l&xObn7=wVEAGoo{*|N z>lOHNx6zhy)1*I!41otGc-rkB0I5g7g&oRi=hjy<0EJIVM2m!|pGUQjs(Ygm?)|CZ z0)ZRnhAcYptDHy#Ffn>^cROzay?EQY10#TRFs)%1iu&Ga976ZQI9ap}emmvDr$Tr$ z_|@z@C>DNg^*>=E{qpsKY(oPE<|uR>B=_hn3b9N_b5V=s0cX+ASG8otbkNtxSp@?Nuz!K30)Qh3FKd1WM|? z+P{xp#rd$N{t41yTd&#J zrG*YteY(Z2AszMny4OZFSk!2URE4si-#ObT!IA6g8IS}0a6MM(j@iy<%{1Ue;TEi} zt24{hfT$*L?J4w%(0JqyZKag-*l2bqoc%Kf*uK%TJ{50XG&t;z%TSuYfE6gJse=jr zZ((?ADd(2WO3s6z6Y5C#yvUJdn zRaq^;wW3$>9Y*gt0kY*kd0_D9KjV0HDiLW_2_5Mj(5vIg&-g9k`|2UVP#{NsVM+kb zAe&%xOkZ0xSHd@uJb1C&{HE^|2E8IJMe@mx3hIhCxN;n=)wk38eNp;N7#_)Zh^;Q$K^rlO6p?iLtkEBO2n9f+Cj@t=pa7n%|!pT zsi^-Ezl)h{pu~^GhMj{UVsVixC0QHeo0z;P<_e#2L<6h9qbZq>r@BohNs2l#80RF{ zSpjR>pK&F>G_~FuuZq`mDpGK$jFdvzUJe|27!5w@yCZ!GmekMd-T z^x?m@Xme_&jRB}y&76d@(5h>oe}ZX=1EfjK-1!u77@k1noqHJ(N66t|Cl!CaV?ILS zwi$di1~v%jueh<*mr(2+8dOW zNf0OmkFV3u3H{NdBB-S30~?W6$iY8noM_(q;QyKy$$U=ALg-Kk>%ef6_-AyR(55ar zz+{RLz@}yP&kHj}zyWAG4qZg_WNY>KqlMP7ab%uS1~kiLqi!oFFzE$9yIc2KVFlYN*A^zr8p~kLZ9V6`qQikomzIBpkC*CbcaAY?LJ`@ z6so7Pf3Kke=gTyx5*WDfNrE(#2{KI7i}}9Y;|Q;LrbJN-goYhqFVtfeUh_C^kz8AW zs42GyzKdj?1K~0es|4wbuag@L~t@VseTmfS1bu02ZH*vI_a z(=oqQ;DbOVkm(`n-dh5GcD~n4rchfms`hfKTE08$lT&{vRdQVGj5IcE8{O9!^U8I+ zQ%_%awsC+rdUMHGey~=9GCxOM{sz;=FuajArTQ3kws~wfQ2ck%@1j?UHMO1*vkYfZ z*w15RR%&TOhV>44OW6mcg9gU!9ZE@68nN*{sqHEYXRl}5PG9r)uidBS5lm>Xh+h!z z(O$rS5vXmvj8Y9&MNfA)CrXOrN$r)!Oe&{oYpcm8qh7{&&L2tFvd>_9M{nM8i+o%< zAH5d~1{?1IEy?=_c(SS8GsyvVi8TlXrW|B_mZOm$~X-T8qwYoqn=awxn;WjB~42HodA`b5D&=% zE*F(z=C)o#=KBnP+iT5oK7AIQLyL6pseKopUqoKKT+2CJ6Xs{= z3k`F!*%@gZX@^3zoi!z zJ6KQV*aUoBwWzI)qBcDtUwbbGttG!Kc_VcwH4hok6@$zWP(MjLIp#J&rvy%uW|6xU z!&5f~Bu_~MXy~Uqo6($R$m-#&5dSNY+3wg!^{+l^WXrhnz+Oq06+N9SDYp58)bw08 zY<5>2Nu3;@&}Z1uSFP@X!$P#hbtL4@0&-k*w=%@(Nbc$N8gJ2Er6R!MGXDV2=VXCw+P`*-48`$w0lxFf8sAEf-{;-w zg@-6kp%$X*4^k%B%#G^ifzN1Jd6q-4E!;holE0^Cct5RfT3y{3qx}{Ee@hoLBUYk` zeI#|~k*;EVA&ms0dUJZ={`VM7?Ox;=`(>$T7+Z;E!@XDHpQhBB*JBBh@!PHz zk#J##SWGUe7pZ;FbJh|7Zbmc(rWQLhy_xyabo#of;aH~9;`81Md;@Fkqm3Oex0W3? z^8-5;$TQkQuC0$nqNHcSze-4poPm6of0w}d?-M!y)4oUUP-#@VYU`I{%HQ5Kn+N~y z6D@6qwe)I2+M+#zEZe&99yv!YL^~tsVy!_$yrnV+gT7kzmb~?#FQxrL-XFjRAr>fe z)n3@TS>9by06WGdK&9F4X;e|&d(od1OM4jYXOaNizsaaQHp?@7&+DY(tJP5)PGN zj2%QO&j`lk&EK_ht?~l=pwF*g4Es_g?i{y^U4v*;xWWm1i7>k$OY}{fH~Mz1NU{NT zh3!Rrla(mHP`RCSE%ozd9OksC4iMYrZydgr)xSi(U&ASbG=NLJ5h*x7S%r;5&oSQB zq@Sw3@Vqdi_b$z=vq|^yqXN@>>X8)9%tY;pKEitf^ax#i*jN-aw6r&Yhel{tS23qX zRTEVtynAF==gEsAO58loyCZUDk98MBe-JamL_*1N^r}TwoOrptFC@t$ZExP#(pyj) zepHObmGU7eoWF7m&|2--wt){fbcnojIx^7z;xH8W3-p|cVN+KZ9fh(Aw|B#X4Q(B6 zcoy%i(J`AkYCm&SX36wow!q_Q{e*r6K2rXH5U2^2^pJYQVX9rjmJ)aDEC5Tb<)_B; z?<~&@+sJ=@S&x0v-rl}NIJnnf`Tpf(xxZqOw`a$gh*fo!-vFa&hHAa$rp|yxO$?#t z^YI-G>lR8yGT!m=4!FEciH%Ftv_x`z3G-L?Y-wia8P+F4E1?D!9-uUVkgCXNNM|H( zI%SfF6@i_TNukj^3EOL1m;l)2D(y~+9K@G;Fv)F#U1Wi(HxH@ z)g#sTMvz+D2??6&EYBc>bY_gXtJl0(QaZhdKcnbn!Riuz@XlJ3R;Daa%WI2avUdK+ zB7(3VX~lS7jCplUVVt+_Oi6jAX)cmbh0TKb(NQFbwvemeH@c0Ir6QUH zH;IJO4Z9IAZhTux*z4PfSrR|e1Ea7^nE)V$By#9x0}MYpvV+6yFyL!e$G3LJCnP<< z49i=Em3ZRg)={lW=Z@4NrPQ-WKUnTsedTd|4emRCZth^q*51mb^X~(mBJKCHUUAKV z*@<@osr_V^gFJsr+3U-Khy614f%9>5$|>rX$6mjDV5Tf+L;5&j2;9m1v=5qxd=+2| z>x?!s^lO{t&vWkHN}qp19_3{5QnwX9AHk;rYgpG~2;NHOTM}K`PWPy|ozmv$vPE~v zR;i$`@dxE`g!;1;*kHCJVD_;;V`AF;e_dtf^!&`;jGdkWG$~mB)if#p`g(Xo6?1j2 z5~Sv3*7e1Eljy*aB(GCF>29Y9I1;<9S%^c2MuC0+2Ax64EFkwGSUgUyNXSPQ@Dp%wTN&bpxyaB5TZHYA%t(0q9LI#n5)~`!-_<-G~KW zlRE+YG}r*n2ZX<4vdSS;@}5!ERMhMMyUAAu&q+=G!E3{u{r0(U@0Wc?k9`y@&f0#EPdC+zM1xb(<-+rmOUlk1WN^{wVNu_ zD(?r_gt;@0ti5*x9R}UePK+12IEFCxvHN%)$|I^a>9OCxGLRn-0G$1p1>Z|Q=cG#j z9Nhj=P$p`BLmi$~=Dx?_ab81$xNE%CEFl8d8MiDgiRY+nDPs)o7qplSmfw%wkLuoR zHs^0^+4+?k0jhZR*H|pU+!NaLcTlVNIC@V z=~2Eo#>vTBg4piz{QAB(h-7y=h@U8_a_^5|BJU1l zr~ft;t#I$lR^6_)vHNc;fBbr!YhJgj=*$HJJi|n=T?RVp?NP?^UX{*clCnh-RRWp7 zgWe=7KU_bYue15(w97B12l1Hvay@)Y$o49S^N*D)Qa;@8&QTG{_^0Q6;*syh=+|PL z>d#f&jDJk zf44v67r3z_eq!skAIe`(Pugkk7`NEfE=|v+a@+im$p^2u+;&}UqsRHVXWJ*{`745U z{>iR7eoRhgcgb(HolAW!e<`#}E$lkia>djRgFn4Vz7P00 z&YZhck?nrxZe*^Y|1(y*!Xl}S63^UHr^d-Vi*$T=e!;huk~8m0tUptK)dhIGeD;*- z56v5&6x{#NfBV?sJwcw5{~3-)giNf_|FA}7##(*gq;qk|?wGou%g@CXFAFg3dT-5m z=F<1%zZboXmP#!6%yA?Tv~2p>?1%H)A{{bHjTSGk+WS!cm)U~)mU}!CJ7;^%N~-#~ z{X74kCnm-1iWg&IqoUcXZ|_KIt(^FzVE6e2=d+IQ`S`wd&1Ti(9VU|tdpIV&lsRU5 zXj103{X90p+5c7_wgUEd)=jJgZjs!#Ioixl^uxD#jvFp)S)F7hk@ML9iKgY9EHR6R zAKed!`<+~}ZRJ{{IVX12&DZHwuHsoCAs;o*UiioMkO}*GAFS?RZW5}Ed#T&ve9!dG zkzdyrHZU;oFdks)^2^EMl;Nq90Pc9_=#T&C5>>mKNoCuGw@qT-KR>;5f706A7j~Q% xOuRQ&XD6s#>DR3-ds|p=T;lwUCmYT$P^b=fcU8MB<(V_IMvVv8=(ha-n*a{w?n3|o literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/adaptive_recompute_b.png b/model/train/yoco_moe/sources/images/adaptive_recompute_b.png new file mode 100644 index 0000000000000000000000000000000000000000..cf556e77728aec35d8846ac02e6ca84724865746 GIT binary patch literal 69240 zcmeFZ2T)YsmM(q}q?IT+(+CJCAUTHyl?(!svxsDgl2aonIS2>{D4>8Ch?2A9BpE?6 zP0n-^8)&-!$GP{t|J1E_>;2|U&D5Kz*VHbW!>)by+Gnr5*80}B4(>N@5xAkQtfma$ z;o$*~!5;uO1KbA)39b@eB_JfcN(g}vUL(3eM0EW+5j7b($qhPcdU`r)T3SYyTTn(O z4rW?fHa<2EF7DfRZZkmn1^IadZ}Hsb`7;O}1Og$tMnpwKM8(5M%gFO@{@{K96xXgy z6S(2yu>)5q@bD?{a2)^?0PwDYv;8ySzkl$q;DhspT)R$040fox0bIeu$G<{=fAuN> z0oXej{69cIah39>kRlWYqi0~e#l_8Y z`;MrXxP+vX^!*1)$||aA>U#PHhDOFFrZ!J)?d%;Kot}Gn`}q3#2ZXKS6#@c$0>~e^@UHlR4WEMG>P;a+N=03W zwFecu@QZ8I_mV!8w_fKEd4!~S;yFS@%PESuh594fzex7i1Pl8=lI(v9_W#Q@4UpjD zfe#O#0)PSMpJI4&ulz3?Zw5ND*nC5t<-#S2SYY~Q>@5jvi?(bz_lI(*qg=Agm_g*+ z&&h0-(GpkV#zRvj&Nw4i<6O?=8m>wQ;A0HW|7-uS1|`JHFPpX7w=+cI)5Uks)T`zv zv#`TI^$-t>`cG3oa%`{h1HJ*wArGSqyD$6wCJ%It9A7KnjF5Z{eEy)o5=wmz@O+H< zf9)UEfOkk<{s9--F_3|Q-@`Qzm7#K6dcOp^EX)u@RYtNm&X>+wR~M%P@lc}H!Taym zdhBAn_z9xfBY4yW*#spy5*EYAM?<0iPC*H^qpNBgVTCHe-c(KHbrJI_kANsO_D?56 z$gpVQh+Sc$li{5>UWP~hr$G|k&xnR~2%b@XIOWf%^zxeaQm213w zjXSNICv1ls`3tm4oIUPT zwQ)q{R+5elyg@QCzhr(brHv_w0z41_CJ?Oxn1|e;D7;5>!V?pmafcsQq=Ml z=#u_R=y~|PmflxDfD+9b4nV#_&tC3FmB9C?^Ul<77cau;zrS9%!6`-GQ|5swq{iFS zH_C#ykuKqYbLdkX@Z&c8j6DT=15OVgKF0xQd;uK5sRJp{wRCDGM1_5ZB6FmEbZN!q zV3JfYY*-;2@ZqclNvMrg%_Ud=*f%8{_af+GSUy@g?_7Y`2ztP*uWokC>3YDfsTN!73N@W znBf4F0Xtf6>Do5xHq&;lB6!~DKt7IG1~t>8(Ay|V!t)IwILezH{k%K(aQQ;-c-RI9 zEWg2Ox7$imN3}79?Pg z^MK84@kG96t`V-sl4_zx2Hdl;aWTO5=MqLY3cm`)*quy(2&M(X!k7+7EYOMqy?5K7YMLT?NJ?T_u3}zhI+z|KazXvDxgc zT}=AWt)F9g^}7cnv){XO%8&CuV;(}5O@FlqxvnEz8%07#x{9%z zsd$$n3UuJw@cN}vRyb_GK|v|jSyH~d#U z6pIF~@Eh{Dc2#+h9Ehz9xjvzvay36w_k#+W<70+Q$BHhgx9ns}cVAT-(LIWVv%RC2 zfbvztV_ijNMDafSRo}&2q8rqxwy!9%QM?140}&j+Puq}*2$2L9h>llz8$6YTj#GK? z>;K0p7A4a9g^jf{k`8_g;qSeMX*|B5 zzuiMxwtPXSU$6%DYxrhQdG)Lk`FxOkypsHU(_CyNVDeR?NUmS4U!l>so7-J}rPbn} zt;Mz`Z?Ak|6?{9_%K8RxciiYLdTkI(9+k}^?Njs3>~4u&bv8}Cyrz1&eUQg%eOMhN z+o?3VQtNqJHr2#hY)5_Z>N{WY8u1~yez6V()#fLa?`*H_Ze{||9r=5DwQy`rd~onZed@+_1~&Z=x?%52 zBMoGCGzUsPKRuC|a&}rne%mPW@oa-oKWjtupjs}uRtBcl0`>l@8Eb8c_)JG;bg0be9eS%4 zWgnH(A!y+k5M10HDUh|}F#=nH^C3H)mxStj z-(t~;&@A|3J`VVd(ZT^ituk z)Xx?HZq97+14q}Ot2TFQd_T`@nRaGc)~4+ZPH#V@BMlX9XCMg0n^tx%{TM1KhIVwO zi?ko%$J}3%31qMnz8bS7CiDd&B(R6%kVzzf7Q6Uyx0Rzcw49lOjSR=x&K}t4WIR1E zP`>i%m(Eo$eH{pcKk`sB$D(e0cmFtxys9O`*U{eZN!V(6{9vvJHXOhH94ZdjEzQYiq6Szm~Tl)r{#Im_PV-~{gNc$ zQJq{>Vvxnl_UAlQ2^BRNC_b|(QX0zbki*6Exbt|mBHfgw6t29Wqswr4>+G1c`t<(>cjVvVy8K7?HFS;qqV}bD zGp%cBXy?>(hUcx#fW9Q9zy<$wZB9>RYd2U=sI zbII0X7I6TG2ls9wFCiDxv#=3IID!K;g#$8(eg}L11|y_7z}L%&L<|S8(vNXKZ5iz1mdJ(u5)_6~ceGqU z3FClm9Z0Z}J8Z`4M$3lYC>Xri!P-(T+?H4uVTc=Js$I`7KS_d~XUoUhs}tCp%WKGl zw-2?oG|4wLF6&Sb?8{3T96$(~i;m$x-kvAIdf_5GO*9dxumr!M_7SwRI~*nYC(h!=aAL*wktf>iCN+hn(HLut6?sV$>YHiKWoD>M z3~!tj6*>>yi2ph9Q=S9-q(H7wYe_58E%f-_<%^aI{KG?d0pLTk9&%IIFMTO3x?no~ zL>@Yr_gP+;*3Mj!HBel5VK009Y^X!v#uJ)IEo5<)2WPYI%-wQq7ogY`m@L`p4LUe{ zL1Os@5$`y8>+;havw|vmW-F5=3~gpFAqrOk=l^I``Hvdk&?mOYbo&Z=Duev@UVcS- z!0zRejgOp)oU`(0N9p3UFDFAgXF)^X&6|pFz-^1~B@$#Raw;4#K+9is)&E&PV|i&5 zC0eg-GIMN(si0aY{95pC3 z6_F`EJyQ3G=EaGm*qY_d$;7B;CbalDLbbkrg70Rn*4ruRfoi4hXToKeCI|dUo)SH+ zUjkxQQJDozjYaj!iB4$;CG%YM>6EO2J8|60w@XU?TGakmBK!Ya`s5XuI-bM_US_`j zlDS$?cQ7OM_C;+?-eaP~d!Vz)+p?QwT)dL7WxOaP{-7-UqNeNNbr!`vQkeQ^QDt?r zIEBC6!D>n2S*&YnD8YdrLIwYidf9G#-x@RDvNx;&KW52k?BcWg*K`l-dKvg@Lxo<% z9h$!RH{lTfc!C%WK`E-3;8&f~Pb5?gd(dIs9k1^k48F{fCC^W?hwdg82a}sibigAg zn#IN&rF=c^bYG_X+)8DT;P2r4CaSWf3%4=_hSARX=9}_VHPUapii7q)TfJ}4&bdAR zBk{zOYU5*l=08I*|8Ks2`@3{ElkCeoJbWD|S3-H6GE>u1Y;Ua<`d%$4f(MwaT+2g= z^-W{6kgF)zl#0Q4>O}T85h$LJ0x|mTK$@v?o zH6iA50;Eu=hvhpkS1t%qRA7FW4I6pFkWK0XJ>UE=pLkhdz={5Xpl_M9vh2oU(|uY7 z@9b{mlHoNAqA5bK^;DM^YLjOLd~o_82=^?Cg#ReQ3d_~7_4IWo_jwO!Q|EOY@MEoF znu(veOboAoVoS#X(O=S+tX`V!Z4<$FU$$VhyuLe{2IXw?@iSd+=;HwO7mcty*utpj z+WE{)5Fn0ep$NJY;M9);LO{TF$(lSslBY?6$%sCx!l6ZxYUkJQ6nm92C zLw+8586+Nn4pf1e@wA{bfdbHgpLA%+R@(PQnKOG@L~(VS-56~ReG zne!I$x83{MBoSpM9toaOXHKssqk_2Y3!Dw>t)kY(?p9?*-?}hL;uY51qJDE#Me-rU zRm&?NCsg)kpxubMpS?N{QS;3z;kXS8-Hj6kOOZM(qKNmvc;7Pz~@g5dp1k(byZ7_06!|2{$bJ zN4J=HV$1qr=*g}G4xq4*-Y`|aoV)uYn12j&M`_e+WpF@#Y<722S4#3hGweKur zDR~ZeU0~on9$LlCU$J>L`TVG&7KFgPHxz6NsPTT3cH#hnn%Q3^OqX-g8zAy6of>in zt%P0Oy?{3qSmaz5SPW3(bqRE|kiuqD4BS4&`@v6A)mv_OsazyE$7A;*1Hiw%$8{jQ z;MSSQh%1han%T^q2OY$(A3g^?91%q{=u}MdgHJlUM%sLsdug0o@cSeHCBvW@PwUw3;_RgocynO^w| z59Sk4llz+R1sowKSI$Pbi-R17Zw2u`r3thMC?163#gP@J_yF8gULfs~u$0r|GJ(&;v zaKL?oey(bW=6fKSkw3&uBg=h zdQGJxhxFZ&mj2@6vU1}#sQF(OkmW&g@9>MLCZWI6TQE-bmRZK7a_os?v!gbV7V`(a zmsjb??q}eDSBVJBQ_`o2Ry$@xSTWTQP6xI{rQ-4sMu{y5{lxkU%>$j^;T#TX;ydss?lIr`Cbcp&RZth_HpA; zw4B@k0eOam$Al3mCZfkPe91CGANqZ}7Iu8C4%GkKaD=Tx%i(4zd@&o0g`!HnaejZ_ zBB45VkvBfN3l(-fQxR?{tpX>U45bo%Me>ffLu@YH(omTV_0LzPVgUjQ~$_Y@R;J0U(yRU5{DX4NtRRDF^XeZ zpHi3H;OL||=w+dWys>a{-T=q7_pcv42KrN0!!w`_Q1!dak>axO!W-8{F`9}S@UoIK z9MI#1Se_Gn+F$6G%xTrQ|H0Baq&A_R|EKYd7AXYX_e4Df;;M95ykEZ;-`{EVC%ATJbe0 zvJxo*W@azBJFp1T;*$@Evt6+a**P<{`jm$a>GV{he!N_;X=P=MgJwDtiwm``=Ig)V z=1UsY!M`Tn`csa?`{ z&w$0p0eBWH!1MdvSTfk5xE~yhGzGx}K)(fE`S-iPQw(3OugV; z;f_le@Qf5e>{1_(iY^~lYa?@sAl)GpEJ1p?IAF00ioKH=6ugMS0V0-RA(<~#aDd&n z^>%5B?T?O;+m2AQ9t=T)L;|G5YCjgq%e%4Ec-i0))X0_xLnb?F^+0d6)mVMV!_tz! zl5tLp$8PdtURlXgAvd>k9U<7+u!aQ-BCKzbnLH4ipb+QhVQw)#a# z(hCLV??GBfjU$wDvV08@?7evzYT@PnnsE_EEnkC|`3`aAZ1->b%#C07;&^~SqW(!) zsXXU$`r~-vK_o@!Xa}*tNPYMoDd03>=|&0?Q%D8)umk>bL*cG=8Yi5LJb8xqyk&x(?9R~30$IMXj z$TI$#&_*dD5`wYf?(s9{l(R`i6L|`HWOfWh(Uu$=YdZ0Fsva-Y9CfuM{`1C!k?!EZ zRt@1aK4aL|R^rv~%7oNM1Hdy~B^$qQ7V^-si48~7PhaXz=qooKlr&P+hW)_nHXkM& zph-kPZ|@Ap}6v{;&hwG%s~>srpu&y|}w z^U{-yX{vA&$&jtQ=2cM`A1p8E449ys_D_LroQ_1TYBGD~CY^&U6uXUjo6X|Su8o;% zGFhK$qd7NS3-tXRu|0RRf5kigYqE|ZEG9_N8Br|FJF~H#eETkY^%a_{HL3{e0~H3@ zVZu&oL*J-lKT%SM&aYBRqw4Q33X`4ybp{MVo?MXyd52Ht;J0F9({5?4{K4wO%9tw^G$cfe@8Vd|#b1GllWC?Isa;nc zH3C%SqnjIj?F1ppbynf0$(9%)gfi&CEj1Ghy1lArxT7mu zBTO}(BUDQ>6V%z-J*Z3rufJo`=tFa#_70gv^I<@sPN({7*m$Yc$xs2WJBgD!;B|o5iHa{*= zEb`~vYdvAXT7q0jgT3zg#{D1X;eKh$MiCYoosZ0>b1KYh(=t+cHJH|8YgRfdpRkf_gs1p|M44_9@#4*arT+=Y}bw& z+(@Ryx=LB{=Fc7#zwaDRdHeFw4dMryr9A`(+RP}0bRXa1)LCNE6+0E|;t@5;oo}g| z9UeG9Z{O19q!hwGJtK>iq7r7Hq&%NLQ(&X({-nA-Unx&xS=G57lNaIg(Aaw1kXL^^ z8<8IdW9MRv#sPkWYLfEJZ>|nrtT9+DG;#>TPE`sdWU6<~c`jlvwU}vstm5@@qlZdI zvvVV+Teh!lU$$-^=LTN+!uc^L3u_81EIa*FK{(A}^(MIU_*8yewH59SorMPeN-RW= z{SfkM%{ULh!%{=uzI@h4c$PT8nH>hY{#P5gJKR`icBhD$nBGoaZV*u=!g)drChyK= zNIA4(o;t0{%9i2)VzW-$sNS)ogC9OJC4#VvCVv)p9%J{k(ui_;KUP%pJ-mIysXClmrMpWwp!pd{28suKM2&7O?K3+zGuMY*cN2|F4~UKRuTpg>;ZWY#O1{A~ z9@gKjnaxYl%X8@#9L4Zvjs`uL2J=Ht zYlmG$WtvQGxP_4_m@6`6`&35NZ7#SiI_@uhF6MaU?IL1Q)#%sWDCjsgf5%fYTiINyb-=ty#i&HJU8 z$10Ctr&b%s`V3{v+{GIYYUUKk*-|EluV||SlOOAX-7H&%`9+-vOy_@_^z_pf^4)rC zCIis4!VZ2)5G4IzI}vO6q7jhBcdkiWVs4F`>&OU-m)7NpCLVLM8N9!ylw)SJ6dm5d$}z(Ksi|)GD4LHWRlhQzDBID{J$FYO zJ;b)`yR5(xOw;Mt(2-fy{JGxpMc%@!RmYQhJ94M;0m2#X8t*aN zd*M+{^?$yP79wmgJ={-tPJ`C|+S4;qAy%SWo;uKB&P)m8unhmtqsUtOp-8eJC`z*8 zLUVz2D#Kknzic6ih$@GyylKtCci#6EXo4UT)3W4!yARi(dmii1lL1VV5FPN9W1Rzo`@f^fjF_ySa$?@1|o2z2LPrnl99ia$xL2(Bj# zADn8j3*7A)TI0d*^WLXIw`){R+@-UOP<-;t_19%a6nWc(E->Fl7 zxc&ZVgl`D_={HMGq{Zr56Kfq}Ai#pk@ItaGThS?BBUDG>le(cM9v`CT(G$DeWb(&E_aiSOB;!gqs&0-x+ zcn(fg(pE^`<(Q9Rq<`&uw_G9KedH#;nT;UcG3YU2P$2>~t}1=@<2r6M@h;kEzF9lE zG)_Z_opMRGl6m}tXP}SS;~irSa=$HIo?^sqbp01$p8ZSqgsHM2RO7flld3lEM7uzA5xWr)w5+K6{^#F2C&m0vPWHnkSPaL|SR4~gYUTBm^ z&X6R+b@}#@B$LNNu}_FFimS=x6WX<{@0w^a+`Nvqqfe>N1fe0djMsyo@T=uHVhGl4 zXvuM5xKLr-Z+ik4kg2J!-kn-z@`&@@)Y-M6omp>s(qpDP^66HG0enPq}yTcIHZbR~>)LlufO8KY_c-I|Rog z74BrUwTd92#Y8Gc);qNmZnCyCTl2z9J%q_g{;kwsL+7n$Cd+b3zTp5$bg61x<@X8K zn~6F(_iNLi3FY0aSEHe6?RMWjI65g7fS?l%$7MBao#8J@KvsI( z4v(?3Y^Fn_TxUwA*^z!7e^Y^(x#h;)qxdJIfo==}(R?KWHZ_4NBXQR#Rc79nP!q^J zpMAH~AP{YYWgDq-xpJ!d%g1DPUd4v%-NeZEzM+P7VTxLt1VU*8jhsm7&V@*hkv)v5 zZFS|rK!WIs)$Kf6o>A)-Kjm5YwRKL$oX+fPD2XU0H5fAEzRL+dHJb6B{KdR`-n`aH9pX;2kzp|CW?KQ9#l_pQ?A9ZP<`Z(bN*ENh%iHtN=R zzr@T*$kNG>Qtqos5&L+?>(DqdW1q9Sb*Wv;Dz2F6#8*}-sH;8Wj7zhv-qIa64?P8q zc{l~eGM84-ZbfsOKIe7?%e-0r#8Qiy6B6a|>QV2FA+~IFZOgJ`&%L!lelM5xFP_^u=Og~kGBSvyTbGv;32K3db{TTM)=f6URb5c9P&y7AB zWrqy)=ppXLkXxK`;{Yz(T=M(>gW(GP#vjBIrGXHo^#}jAG7rAS$RvrxVQJ<(TiD2kF zJ5A!@b(v3`05x=xVGxhIiaWlNAxI9p&sb7<0|N_)US9Zvo+dvn;Eeg&#j zRTI-kzgQI^Sv$k5``I8qICPO{FsD6$WGil1k??XRpFvQI7CWM~&!BuM}kJ)K%Q@+w^p}dQ z_tQ#+AB#4(uas^7PET;D8jaE9}^8WJBwb)v4FV_K&rcW&G4lH#O7TySkY@%RaEy&7Doy z)qOC(k0_;ZW;`ml*dB8n``~c6GW9?QyTx}nNNBuR&Js!_N3eH4C0vHQ^{QTTMNLat z_QWnpb;Y*_sFm=$i13~j9bWU*qZaaRIHH*FWYAIc(o%*;sc9UrXHx8l@k^33X!5)! z$)1?#{cW3+Tdm^#C+tY(Dt9xsk8jXr>G7C!0+XG1m=sKWBxSOpqB3D{npeZRoIT!V zF=aKEp6L2|!F|^EHne%-pUQZ$9~d_dYDkyP;mZ*|beGQ7!n=f9O=h%i869q0-8hoj zC*BIMR`dE6PCbOz5T8B}=T7;cb*P1Y(;_W}tcp4X2Rz$i^qzHzW743jVLK(L97;OE zr!mt(Mn)Vn^_EmY$sHyoQdq{c#o`{ps@2R>^c*eek=R5}0nuWJ#`ikja@Ofl$#;ob zeR!St>RmtjmK}b^pQ;qreZ0Rakj`*6baxI1Tzai>vH25%I72%QDAtU=S8_Tyi`wc6 zJ!B4j@FA+7l_J=&wl{e90IX$GIR+B~fsZglH+$Qc*I;Vd1g><$iCx*Wim2d7on&C8wRVMEYnCkB~U zxCCsd)wveDe9)JrPq6rIG0c}LC36sYle;5C3W5#Wf*%+X?R<+v8q#%T%SB0W=+p!S zxK>8yte@%N2WfrzeLqlI(R)dbYt>AGj?pfhv+bDxQ@*n!O(;t%#+j(wqPnFrW~;l) zQo63{jcbBKkTeT(Km;v4dwmPD17l5;i%mt@uia;S#yB9qoj*m>6>>_<_xT>{qGzy2 zlKvh>q|}Ml{kPRN<2~c9K7C`y6f+Ilg?bwg&wEKTcT_%uq@_3(86)ugSWdCLc&g){ zyrAXyykJO3XGSf_CejC1FL_M!-lGT|(cdc1)G}5@Diwz^Q50SVw^E*rY zeKwHV-k$uva~h(?cOEHwM&&t$eiEl=J$hV;#8alcz_0{cZ|zFsr_N z#n%zMu+Qig#aAUK^C+j|HfdVX1gdA`*6NsP!5c--pt5qcx)}(cdjQ^xPQ@jDDzoyq zd55vNt?9VxN56YHOG!jny1gFRK5%|1wSm{#)5|>9jKmfenY9);rR#i(WOhg=iW?n* zhYEie0Vu9Bzo;$SYYf9vUhZJ!N^cu=O1ESjj*p#Y%D90#+ zck&}Sp=qzUI6+E=>U9R-%#UHyP}T&o_Dh!OZnX7@cb^hv@E48lDu##5eb3Vjgb!8TC}>sr8tP33^LXy+wfbZcu^r#<1Qef{h#NO< ztdNlR7_JpBHsq~NJL;Oe<*u#F+&bSv#Pz)vritgESS+E6t8IPazh8{lCn`x@BJRK3 zK6zYB6JP%>iee_{eT48OFbFW80L&IMPLo!|RV*N;Y}z=!?%$XnvC0-BcH6t^p-r?g zCH2#Hr%P30&MqpZsIF++rFaqTun7%~GTJeniAj9w%=t4Gyy8h?iI0ya%O~(duj2bLhbPz3z#_$@K8#{Cyz^#ofP=Z#H+6p9>%VvUvP|CMSIMLu zo&G2pGECG-n{-X_c65GT8*;EURjBF9WcWbpbWXpKKTjiXdXBiEvZ0}&F7Qi!0`#_o zxTHQ`2LNn!gIhHa;p|X~;eIl`AI8*?{#>?3f=+|xT~fxhGI^rBf%U1vW*i{%`0?g6 zOLOV6np<~!Vb$1?MsD>){WHcOVOhj|xA&M;Zl)L4>MUO-D?^Gmr9qSn@n+0xX4}J? zNk}6$_Q~dt4Z$qCB(Sy5<7PWuh505Nws^9MR8NtI-ljAfvh-CpQ4>QgxGB_bI!CCg z#+CCu>QupexsKCDvSd=LBJ$zb?z3V#+onnl5{-?o0R90r-jjb-LQF1$7GN(8eNq8= zR&8V?KFL34V&@5&bksvndbV&)5Fci9XzwNJnJW#xD)Q6oFik5(^c(#0dathg+`r&D z?QNSZ^rz^=zNsyPC@FkMm7F{mI)E~|NF(s0Wq)g;enlENAH31X8bg0R>M){a+U*?Y zs^sPmddsktNA`O+nnzSFwLErF0IZw-CE}P z$PQCPH+EkPvc1`54PVW@+furH{(JArN*RAQrV4IleZP9Q(2a#d+v>Tw6uNXVtd_Cw zZOUG5b=I0Z4!FgG{<1i_aKL!BurFAZwaOa4l6yDd_tiU{Gp9}=K3LSzxn3_@r0vi6 z4#!P%7=~Z_>N*TW^x55``0ZBCFZ;+lVGt^iY+w4m`QkFo?M(oAa%epkoP$l}&P>)S zb3?-VuwK`Ffj-#@dV`%N(#g!+PaYT99YWt97!3_U5%l#wHI?N zPk3B96Zc;9mt#e+G(CPo7?2tvFOw-4RC+}J`{a<8i%&5l+Ltj>m$K9{*%=crKtj_^xO z9R{B*e2lsM`cDFvrq%DBtyN4mtcIIp!8%nr0K4pnDQkIKa?)7S`90@4=0T*@YYIo7 zubkEx=St3g>_L&cx=d<8 z+i#zvVZ1M>pG5cd4E+grY57!6BkyRb%j2fX%!xg*wE3sE*#&!StteKoj zJ7ZJBkjs;Ki^kC|&F3>UcSPbSAp}ped9#{p#zSc5n)Wi7&m&*n3Lcvl7`T-L>&X7L zi&ceu8!-@luRdlpmfJkGrnYOR)3fDy4F@pR{HJ6fm_cxnDG&XH-9`O2M`JivZr>%R zKN}z0pSa>Fvn-ZqgMia-jy|hI_0(44UuXSLP_)-#LJ;#aq-qYV5c`)mBNWC}f^4+s zV2XhuUSXaY)eir8L5TwzJ}O?m5Uf^h`?!722T;9uGyvqQ_b~Znu!K&R}>O zz$D>~9U8$Ifxg6-D;^|&;)0dv0@Bif*iWpBvj!H)p(e7(+KEZdgUF?OKp zZyYvi5V50X0lQzF$icWafOrm=;(ck<2|dyggJI~V;V6(Hr_>1M0`6nMY*5$*3@c$# zOpVu>L+qF1*qwN+;L(lI*a3@%ehFaMv0UKG6D|yJKx=Rgxcnu9F~BkmL5GrXgKp)Z z1;Curvi=0JCB{=6qcE-_yRKCM;>(`=*8V?NA+eDUG=V4i> zQGW1yB3il8ex`c}y@53H0GA#tT8c-HVIxP-8^bst9>2X=9C|3P98%9`g)#7@)`5^U zv!cKZb;j20rI#wW601^PF%i?Q1+yS8#Hy6^#Q-aWDa+O%3Cu1zq@D?Wpk!2!tcmevw;6AAlNB!#9N9Fy z?zBFuI>3Q{)ai4i1A!r6kd1j-wS?4XA!9I+xu0ulpJ2OE>&VSri>rU}1w@(-dGO|# z1>MYGYCMx7zPWI@f(Ai)g0AaW{jpERtkvR$+h951D>JZ!5Ckk73rlkBLAoX#=C+aA zSo+>?Q}TRXkgpK~gvrP_;DBEuQXk%eq2semeYqu|eh!N`R zktqmqHn*c>uwmJ(U<%&~+yWDH@m1PUSSlpud|{Yfb+?6 zBk)aIv1>v6;~!a``c2{hR>-*y!RslEW-=#y7fw$4gW3Ns{KWLNagh3c5!w$v(~z@i z`2yT*u&EY`-|!x|;zPxr?>3iUq4`2opf*Ndn#`liYaQIde=0d8tHxygF_yPRAeU|6 zPTIA8RFb+J@_eZnTp5D+Zmgz}l@j2wu8Jz>z}}hT2_z)}WlN#XQ$y8Ul4h!bm3n`+ zmN&S4I4)C{tXpN0ELA&zZ^n2Q^Tnpyfx1fy@wH#acdfTbLh6s88V-TNxVN?70cqQ6@vpbYE5?YnT1c}T3Mowim%`i zP`vhIn|zB7Z=X&K7n2H7TI`9dcJ*~)RK564wokv)7Ahwl(}Qe@@&c9EQD}?NymiTy zL7SAx*N`RNc^L1n79;&f^fR6p(-vv1KY$0cd1IEHri%6{y4TQw`iU1O-%Hmb{-oAk z4ss%&)N)3&G(XEZzesCfM1)+2WgzEUe{^kUwNJthPQ`rn|6PSP|5$kQSN=A7ALOGb z5Qd0aKTRM0+{yt9JS*$djkP1|C@I{v3f6A5j!!`e&ddo zy+Pw@w$?aS9<|r~NMUDTeR~uw(&@v>JEql{8*R!^(!`~!$%1yczyfi{GYGN*5+*Br1n2I@}B3X z@qD6l3Y$KH7S4>n@e*vHwiqv4!ZExe{~vYl9oJ-( ztqTVcP-&vlOArtc1QZ0M21J^Op-7RcqErFtp@t&82?!`Cy;qSE>Ag$u9Rf&)1St_h zyxW;`=FU0a%r|q-cmMhR;78c(yvcswz4lt`SvD6bbum2o_+aZS z0!cGijZHN;piFUk$FKuwT<>qlxg~J%yQ!Gc66CrEHjzLIjGPXeKSm22r-ySb5X@P_ z^WTK6dtnC%6yS?Rg1dGJd@2~2`0Kz#`j8wOxaF6?J^9NhCH;R)`!53%8JhoK+Hh$r zJ;g=rLR83rLV*zM7gaqKr~@#7(-r!&pr}TSWh34$`oI49|C1>b|MBhr@^zar?Vb1m z6L2CngXj3i)YP>HE1ZZzV$8t`>o0*-V=+Tuj;uJ$SQUwzQ*g(OD_GnEyLX|Z^t9@i zq2_&RR_fgoy-cm!6j^LFEcdF&OgHmZ_%wd)=Uwb9F; zqe(lkB_m++Dhxy-G0%a{WX}5yu>iu*l7>0jtu-Gw0|rL@CEt5coF3qm5B^*>#WN^_ zEw!|&t3-t z0k+8?c3B8d8UAP6flq4$Mq~Oy2MotZ{pVUgz_ozTL;gAn#nJy6%?2waT||E0Tc*a7 z2ZJ~2*#p(C(3Ge^ut3ry7asv3{UlB83j5)TwE*AyFOob3B-tK+UHrpuNX$$-^7shA zj}h99OBD6s183S{$NNBXBJ`dsLLAkW$PXph#IP{5Ej3|g^eV!s{YwuNj%^zTUUaX3 z!Xtl;`L+&D(PIRFRlow2)@` z;VW|~tQXT!{R*v4I-onkwP53zoHzzo(%!wmei}CVPin^g4--1;_LlE0Q2Q@0UqPg_ zKlT=0A^6wY^y&5wXpL+Onoqg-7~95~(Og%YZDoIUxmr5r6N__+IC|I}<#S#hJZXV) zkT`VnW*W6zM&Z1v!BqX%xmiVWqaOCRQfA-Z`QMM$4wUxP`PY?G(Ri1|@*+50S^B`) zXS9^bvl^500jIH4n;zPA^vkdvwVb&@y?gz(XM_#MG4u{SJZM;LG#TIx@-Awg1mzt~VL| z^Uc|L8M%%?hAHFY@rKJ2f>-wxo=XR8a+=Ap1UDNxA+xPb_^Y*#>OD49M{Aa1NnX&% z27L0gJt$Q!@mw1FR*4}CTGEV6a%u1$NT`N|_Br4E_pKJAy)bFBOoaLAqsno?43y{b zE2RWW^aCA6ix=COmWvK?vA6kB^0CksS2~B;lG>cPj$@h`^XYy13;iBtg6DjRUZ}29 z2_w>|YQma=I~wRxZe|6v)o{f$)vh`>hJ;Mq>%BZn@lN6}E4rhBHeuds87mfoQK(Sk zSZLD`cb;{7W>IBHoR(4Dg?C$GCtd$)sufDYM>-C%U6a zYSlv>i3nc!$%R72ydajDd&C9ROQTj%r4I4oXAO2uT}pl-EG%x)f7?5%>#IyePHn-e z597M_y2-Ubm#ek9^)aQTvxinI60grl=?TQfI{y}UW`-zj^{@}STU9ppeE#DcOV}H% zxViS^1amGTS{tL3O&Y$;V-qQ;y*g!cZ}Eg{J{XXV3axeNYCgZON_tB(YxuC%M1oZQ z8qsro$sjF8l`i%b8Z{H6!a^_MBqi=;9n5oy&V^Js^i&whcVNq-RX$dMv#*#5*|jw{ z?&h*!4}+lo-dEB0k!HleLi{$)J_9=`BMHS1C>Z;kxL`u_1oY%)9+B52r^Qt2xQSEA zh2IdxV%u@3A96(nb9@$1{2OwhHNFa=ZO3JHBzho2IWYvkbk(BG9s8r)h&g3j7KasI zH8Dp=Nr+8F={;P0u90=kdks)#$V>HgOaSZg(OcbNr6~!`TB@`+j?_!-5bI60TVoZ` z7k)!*wtUzO)v{~C=qn6Y-}6a_rC5BRXR+(QGgGWF+Q4}herid1Bg=;M{(J|~QG|DH zA5sW8_v_6}kVOB2M{hYMH>9)0+x}sLf?%dp-8h{(!a{8G!prb}ub2LkaZw&)PD?T# zHUs3U+@CACKHF6q4KSWc-5THv_~|v^)srq6+NToE+`` z7g*Npk@PRsNqA+}FcL_9#J=q7s@3zK#viBWTA%PV_31qrb8da{`j!694-u}yH$jV+ zmvllkyB0qUlG&0)?WIo?35YIG8f_%mWedfac}?6DJm%!%lK<}MyUcaMqUUa6kvECU zZ8sPXwaQ;lI5CgQF}bmg@;@{`Rh4rM8HDu4I=boQXpo64n_g2}>UP$*Jy=GH<+~r< zmA@Ts^V*z@>$1C%>sHES5tQ0XCH2YpT{ZstbB4T!A`Vx++a<`w-B)pGV&w0n>>RF? zcF#23tmzC``x4L>)~Z7N%68{OkF*Uh*ZOw3&!32iZ$14X$m{`fjW8@QV(+UuU@ zRT2N8!{=C{YvYq!JH#HB*nV;xyeGs{jfoh+c})E9zCTY8i|Zx6MUaOqIyG@&b?Xe5 z^(&Z>)C0D!xHH+x9ILPwQf(-Q%JU&nPQDVHjNQ_DpBB7eW1{yClZ|a!Fq@Lat^I>r zKdp&;mHL}vR)pW(u6;v4Un8H13(y|%fhO0}Zd(kCUbFd@^y8-3gLhxuKk7DjbNa=} zF%xL9-bcA;=6Y(A!B;~rywB)0OQ*G#ZiH0L25H=u--H~_qql^CVA?4+$OPQQyTbc;d4(&|>*Qm3xf^yuWnlBY$KODHwk3`BTGl|bXm!GAU=pdpE z$e%={m*7Rw$*e?%Prg(o^lzyo9EqAo9^rd=dn|aVc|cA&wS#++mp0aDIzCG`7HMuU z(?=l^`c|Muq3jccd~wBZMCN3Wq}u1k!da}X`AmyjIYNuh_T)oNq*2DFyWwQ>yb^2d zix#xSAPqm1DWt3E@BV zG*+tDo~GBjm`0i`@DoJto|HUkEi`mIg(F4a>SN4WU<#q913kajM~~?sOF)i2pJZ8v zir*y~MA)=@q@<%64?f$sx=BiBP**f_8gXs$S+K+}8J>MgEp{1$D}Sr49T8X?ef>Dq zjpK)!Heb&R+2q6koK6p9e*u~}Qdu~Ws(fl-9nM-o5BGkhmMrAQ1a{$qB7#;9Jgnm*?QtFXD6Fu6w*pisRoipEWL?)7zt?44S&yORY|vr$2zuE$ z68^LTGhe#2PBJ$ihd|}qIIEXGuDuj5_5QV;r=J{Cl`Q9|fR{wx=KSrU5d}5&VTNI> z9ZHE_>MQLVrr$ z{)hEEZA|<)AGN#)nxNXKl}}O3H=CL}X)JtX;-QKY_BwrT??-d2M^`>Y-0x zIvnqOb5!DowN5`S%tV;Bl?`Ml=gk^h>xr~J=fW1uDsA6MkkGg=K(?Po?!$}~3Ujg9 z(so%~EgQCACXBo2XS-+#UyRgiWM!@wF~L@)Q-|19*i#_vY+ZkJ+>ON&MiVxuc6H>R zxk(V;Ti!B?4#e{pD=$+DpFYIo;;&cT?iuh3F9?m@5R3UD&&Vsmz7`c9Z6SzE zh{CBX-)toqOvH0`9<`%9-D6x|?k&CZm2{G0ieCO$>O}_9d3b`I-A37jTRbIp>E@QE zQg4e~Zsk*!AJ3lxaiZ!(#cDQ&8bb5vcg>uPMBY>53C`nZcDz(dyOI_R1!5l5AzmVw zjy3g);l!uoqBrfhs>8_N2VS>AUD3&EdY0w4X_JOCJgc-{GE|o`9h^~3YGK$JaT#@T zZ`LCTYH>ZIgt-2F?&{yarfq$;4x@7^YL3^o5|1ywxxlBGC4NO!f~<_4 z&k4B1ZJx1tikH&({Cpuz=jybN9K+Og=l6_1(AZmY4R5=%3*X(5E{)bQ5gj-aDSiDf z?O!Trd*i-#9m#KqJ%E$gi`~H1EGImS_5~E=QEI~dhr9nb6TSNZnAdu({Kn9MChUebBc4oLe6Fmc?b#$yftB zTN9g#Q}hxq+T5xTHj=*IKi7QV9tL8wzJ=N=by$2)seJ~{dXgrNnVj}rKRi4XGiuXiX+43Ze zyiR(hu$oVldj4!srF&iY=Cobrs{(`K@sIAd?ynbIZQOMq!0unXMr0kvpFD9m_jsgD zOj0R2XKEhqWG~PwGqz<|z$Z*WS*D^YbylN6rmE=P8R`OKP$)sz8mbRJds>ne+#76= z{5q>;g$!oSK+MP@&h9W@ffsJzHMCWLf2$ilKX`B7+!Hm*6>yJ3;)ybmwDXIZFG_AM z?5x?(aeIUZ$jiSWlac^ORupxbXP%JObb2nE>CT#xOQEMd%`79z*&gp*y0vVe_fH5`3-rt zg*>}fS*`DL9)f_jHobwB7h^$GFahbytbjd^q20tG^$*b4cyM>J0>~#;0pfAtdVMp*NA9~ zE9hdr8Z=+y?H{I%Jm>%2u`_BVRyF@m=!*BB{48-}52h2cWfJ`lq#_YB4c(;_z4n%o zvlmsQx^`*dnz}v$#zu;ZM`dLnn%xVHnzP$^H&dMb7zh^(YHaBrRSZ=W8cRJ6QLwUQ zh`P(_yX&pG33pUYqe2c~dkt#=?uf9KK=G({>(F^>Be7&@jmqx`5y)uBsNx&BA1oo)Z*|fUUcgL61Qz1i^r%y^!`VaP@hIYqU~p2H%8 zoDIEs6tUis^??n0f}B~wGu7d22KV8&Jd(zl=%YLF{Yma8>sz20%7i zp|OKM<+HpLES^%; zXN!IMav3GW(qZ~Fu1SQGL!|)Oa4pw+@<;9F8s2!RZM=9KJ=$qsWLQXUlVB9VR@Maz z<6iIa$$WPKvQE_1wW4Wc-y(A#V9n>I$U|1bY*cT2wOp|sUoH6K%nL0k>@&10*@eAes;!Ra@}?u^*HF8(|M1Ee;-QZ37e= z+T;j(s7|BZl>s$)MbFxx?XqlJfDY%Vmm6B>7uiOY2}7A3Nq>gVh_mvGFZq{przZb& z3HX-8Z^@N}eh|hmTHduy`-CbhZcy)rP@4cZ+7*S_rl04i)}3s982ml-;zo_*0|mlK z77FK?Js@m^NJ1aCC0e@$6sR^_dm%A(&JQpqFUlTHv|f92KjwZoi%PoAVG&pdfyWRsRqnVybdzCSG3zCT^LTiD@3d0)Zljmj?T zPt^K1wy}}_DEFZ|T5&4#c&ylkU(K`E{B+3Z0hhwzc^X~IG%D5xw$_}4xaR4O?4=DV z6@U7lJHl|TX6`4*riUZ_`OE6`u<0a$lzK}LC(E+B@=GL;INx8}AD+?W6aObpmJ)|0 zZ%sT_vk+ajN{S&CGx(%#b(`l46~v6NAv&(ZYN|3)nQULaIZlWAf^M}drC)B~!A#8TD9<+z>VGGb zHlHhFUyiG&be3hongpX2%`!G!kF|If-m-q}ocR&?r5EQX$L#!1N|b^O=7Q!NgrvH? zHmU&wT0Ok1PS~_`p;K(2(2u*)MNZ1T0(s)0yB1w?#aO$QDT0O>urfU(Wv)LXQV@Et zgQQ40cncC_hH>+`!4)ob0(JejGOpMi&Y>iVnRmoG2x?YBMa6b6tB}H^NjIFX0zSSf z^ycw3q7?$!Z3DzGmX4!$sh-c?{AYDvK>A`n8}fD zM^dXnUo+bwhd9r02R^d)9lp|g9beb=8fm?FWXomV*JxWMsyA3@6Mx3;%))z=yb=9U z=py_*LO=AXmmy$Cp{9^a&{LVJ-;lE9u2a3fTDyfrDThzGhim@m#0~?QZMqsqOI-t( z_P8ncXT(2*L7G~vD?V37Hh#>i#w9A=x@!8>=gQAt_c>6ZX7V`RbG`z4DTu0t)(L_{ zl>N1w_a}CVxlqx{8xgH6aVqJ;roJi>%UgRU!8*2gPYtEQt9Q^-qO<-_7aVR(@@h`x zLMW(siQ8THPob2!8={|?=D+hH{MG7(hdDT+(AVGF3t7Ur`})37n5D$eAso>oEn7!6 zKV6VT)+*DV=lY7vA90zx$U)=`EoTerp7E{37&w~X%ojS~l)cJqjZPOL)qC#!|eRaf8S#;FgCe%D8i7S#*klOS^x!~+oe_;h5cB@f~n>-4=auLyzq$miBPq=G zLPb-5J462w9R7bYMg>K``<_B&YD!~32)|B;6fIH`@z3sgR6VQZc5|DQ)<01Rij&Z{y13bs!d)TtE&j46cE3-a~iP| zK?i4!%PI|{3FN1UNBdmwx@a{7(QZ4_2u@!oIaX2H@Fn|e>+^h-Nz3vLYTuTSf{!>k zIX!5fUBDMw|HXp5XCJt7qcdWs^`#=3If3Cf#Fi8JqoXznEz1~l1KhwINaQ^GM-c8d zXVCi4_lUp5_0n!)5;F z^yjGP=K{NhZX}Jra3ACW<6DfZweqv?hn9liG&J>RSxkY8Di@^%R8VWS#zF{^D);clpo_>{~lA|{68 zjK858A3O#q59`vfrF#0-!4C)Og2P?6Tkqb1>=AytnC$|)R~LR2(Q5R|OK8a{S8h(o z<;Omf_gdnK1#1B*&fcx$%Dg02Fuky2YR=5gr`KA3TgUyiGvzyviOeB6Mi1)XgUE9W z-9ye>hFqMKt4hS;r+s6MS8TMxy0LT>NiucM*<*K4mg;H^Re}sGI!hHbN6Qv3K?Y-Y z=Y^!cr*Q3Se@}GsTP=l&cu-OE433u7bcJERynG)2O5(gn*jk*4DUUPZYK!QJ57DxAlZ#(E$As| zyiIXrlahH)0>s*d=l7>qr)+nx$H(h*DO244(vu>^)>#iXQBTvCUqkTyu&3E_vPLEN z&J^h$!MHeX_p`nw!k5<6!*6&lM%wIA&vauikqe==3Tp)?^LUG8WKY_=_B#7ZkBQAngsEl61l$87bHhLxP92VMXFqAlS6>8A9(_-y zh+Q<>k(g`lo^?tIXy}i*id$|u$@z>9UtY9 z9g;Gr3vZUSfOFIi6fxpW8Z28UX>->E}N5ajE2+=x?ZZ zcB(JlthbKRH0Kva*Wx@g&hmE;wB43Yf4nkJRY~rmj^0!JZYSe*y-)9_v{PC{PXgP` za+BiAbb0|3t6Xh3HqcIKm$$^MwLxOYrFMK|_-T>LD@JcAjgXmHp&`RPY8NiO{}`IityRbq5$mPKSv^oiL^=%{}*{*TB46PwU z#XLLVsR((AaBA}~4cTGx=H7TU>Dqhs+}8&SbI#KFCu61|!BA>E960~ArDxa1=tJ7EK9jPvHJ>v*I-5`B)Jrx43zv zf2i3OBUhBWse>Gw-{MmAh4lc!r)Qu@i;cly;^~mQ&pwm2Kg&{07Nz7jvAMh+uqeGI+|Jne$1Ku@J0k~ZJ zX}E=!2~!rAr*Yb$ePi*J@n!o9YagV7H10GrvZNZUd%1f9(^ID;{#B+rIM2d+KN|eh zqRVkXaoL^LH~ZtBGtO?7i!5q_Map2S#TV{2x zN9A#v^LT@yt78#4JA2;qj@`*lkE2Vx#GPK;wxX2eolJANp~aPJbrm;746Sx7L%Od% zTP&MZk?}5QsI{el5_>VdYYWSF$`SUJyvq0T>@j@JWDKzG^xByhsH}W$ejOF8GlT`< z6Lb z;Gl;zK`vAO;{OeyhHXXVf+&ARsoej_&ZWMGi_F@(iSA==OY%u(tF_2gda_Y6X*r7$ zQKBYkni=4-ZDLumL)dgqQ#HQ1wOsAV`sA5xa^7`R^?mn_j?>PwVMQ00#8Mnm1xr~v zbyzPpd~*GB1|ooVz(wZCX@mx8Hf=kP|6WBb^;^7Jp8kVuhgaT&)YsRjYlu_tD4%}4 z^kBm4c~%->9`jxDlXZ5=+Wl`7%4z%D(|{-E0=lVK^S>YQVaHh?tsAK2MS}AqVro;f zGHXi4zl$Ew8{9<40%Bsi(#{oF@9Vec7|WK7_vVHSOjCAi()FLB9HNU23yu@S5_7J) z+mJN-%imJcZ~bK_DZG}>20*G(-g;9oJa@YNS9}m!Qcfe^Z6v80e$R93_0OOLYCc23e3|-sIGl z&&?>p&(+dat*V3j?#t8lVdMzg)^J2-h}1_0j=F=lsY!823dZF@pU``=+<_#4bvRpNMFvH5-XM4Ah--cg@t4M6< zmdeFV*SvGZgJfX{#a%B}i#JI<0C+OIRW@ankSNIL(`D?MCFRg)X;xRtUMfveQrK2? z@;N7`fn!Y6)0X3|-?r9t$5g2x*ZgAX`##^7QhMa8w^^b*qU8bg(fA6Xvvt6&0N45C z%W$R4tF}Utol(uZnjC>DKi@Ih0G%;3K!~db^~`8i`aNd|*OE(QUyI{~c05)+nUMd_Y!e+(na7=MkGQIUslPne zvZ}?~;D}sha>=W+*=aqq5c)jMsG>?%avTtWx3ovRB&;W8jo+H@E|V2}d`Wcev;b{I8LOPC8)g4CUOHp9_1VD5aR(Li&c{UdJy9p3(G;+$ zME#%8^_uJrWVa+1e&%1&c%Y`IY_!nU_=|@P5IMi7FAzelA#2V^QuXJV8z<2L_Wg05 zH!8H$UoUoBy=f;^>%?;b?BNT|3 z*@T4wbCdwv7#BK&=tSP{n9!<-9Vg85CtH!S-P92Na!l%bix^02#y2I*57g4@-o=^5 zN`0!wk;v=3WLd(gA7z924~pT`xPixGLa_3ZabqC^)B4Vv`I>h~I8gHPQ zl0Dz^wj=mFg4h;)mA{ohTWeBk7i;yMO|NX|oy12r?(VqQ3e#0nGOT}XtB!dXLE+nU z5-Thx!xpwO5M7bLi2919Y+8S0K)cjAap#gfo~H_WZ~?1KxKILk=5r92QN8OxBOOMc z5#fQ|=+9qD5C>^Ee;M%7Lhh2dfH0*7*>gpQOf<t@NC+3 zP+A|kdl4Llpb?gZqJuj2(j-kZ72?~<(U8UR0jT>;+h|XT`}P|$%VdxFQ*siGTzcL* z)5<0UJ0^=w28B_tNP|?nLO>(F2Ez@4?(}Z}UHQuhArQ`hT+D9lh$GN(SU@qMxK5A@ zC_wnr;0lKAdikEIVa0EV(_f2yKG3(90 zA?`o-fJfi`3|4Lx_SC)P2m*ab&*~DNhq`S{M={S%37b5&`#7nKbF~qi<-UbRymVUTS!j5yqwkgh1_cOtYfof`7 zCJ3_4ORVr@ql&y<-k^fGQCWBu;gvP(`B}uQ)~sh=5@!@SwP4xXQ+}& z+`#%vvW3yXc~o!8oyf6S9RWiCjfek%#>G%y4lg=f=^{F0WPy4>s+o&IN|{MPtI1sU zXaj>q>Q;Vr(P>k%EvC}oS6rxgd-2b9`p1?pyI|RH)`4he`-J^YHLSq5>kq0Ny~9?b zw$0vs=su#VAez-Q^3H8jzc72FpAy5FY8Lu4SXmB}SG(~kpe*R~8Ivm=yuKdxS^oh0s-ZK)v#lR0YSc}Ba*U>!l-bQX z1gzB{rzn@iZkK*rH;Zj)g*lmUF2^q&H)3dtOF=|Yvd#HX+N8Iheuq9??IxRhqUQ!U z{T_O)3zSyJFq=3yQ&4(`y(o`Obtvnx%Ht8jE`QO#*-rFN@eBSW95^|+7TS?V7i)n{ zDr79+WO}u6B^gmpXckAF+*l_zQ(zc!#%|a{3I2RLPlh_~CWC|S5r_lG)7tr7`aW^M zD%3`YG#|f}Q4=mVAeBZbby~M+fabgSovSQ^ zsZSib(kN^A-~I$8e(s+S=Lx&(LJ*x!94!5|^y-Xx%a}e0ubzdAqW9+8c@--X`s|sO zK0Gsh6+Ws#+V_9xM*erV@_+g_oBJ~S*DWX&Y<3ioav$oYPoyA52oHZlT9L;tu(QkY z>;ESt^lK1Jqtv#~3YsGaHk?^%Zmey`n3>eCx46x9d9F(@tp@Cl`Td5_!q1*iRC{)C&tQh}1sJ4+%u$N*L}7*go&*je%E56h~k3k{YS-Ox>Y;tTc9*AeFMaV%H3kidhKfSE432jN>aVPtSCc zRA{zRm{}_`1?sT*QpMl+8%+A2T3_DV!Uk!5G;uUIJM*oz{qFOjhBS;#;5XcJ3k!bu z;*OpsW^`MPVds_5>+hZwx8C>!oU11VxkO5}xa4#S_^WJ0ED13sv3H93p+M|4Z(2j_ z#$)?XkH9KAKq;E%{^=PmCBb8Qx}FhW5(m6ayzD(JT^ju} zsI`n#r(`*@ZRF{Qvy|=Be#HlhpTaR+B9FuZ4x+Io(;%PwU?G?Qt*BA!t+f*MR#YPS zQOIxHBQe0df0b~K#2Bj^>6xcYIUkzWkXkss(zKHpBDrB*@6@pUbgU z()RW(7qeY3d73tn8G5a}c3r%QmsM33elFQ|d5&&#USj9P1h>B;RjhKlsc=ooTN($$RV$x3RMP7;Lw6P71RT70SWwvA+c-4!Kq zX|<0p8TSadM$7MJ2a;1pV}o+YS^s^)ES5{xHK!;}HBQ+n~Tg0MXT-XuvD z6f0g;t`>gSE4tGX{Jv~8WcgxT_a#y|4^I*#*9x@isV6FW?Q)tIt!jG^9(s+y7gU0Z zzKa-fJ~}yo9gf@wO^%dm3V4abkfDTYkyYL9-pnqo8*Vmubcqeop1oWk2Q2BK+HPLU@A9Y zrvgw+C7yofDbI!#FrB9OzX3nd1y{T{&Zw?szQ&?&1bQ05q1U{)eRd9)z3=t{|F&YQ z5=8WOL1V% zS`MS4?hXyNw%HyG3L%ShMgch_B1Lv2Q)?g__nvpNQ=t~(<1lh}AT;Ijs6Azt$9LUh zIp62?kvqveX=jTjKJk-de~svC+ZXE3m^2~%(d>(!5eKpdM2O3B>?>^Y*Yr~QGE8yj zxEnBKHI0KD%q4dvSh|Lk=XRb($+LQ8yqR4L2-`V`d^n;%yyLPPX?L(??@_xNq|4U!#`2s8pHyfDD)7g*7>-cV zA8xNn*s>;Q0sB?xipuyrX_YdH>~&J#bc|g}gTdkZv)c^^M5gHBoe>Ui-7O{C8zs+F ziuRm4tF0^q0@rqE@);$X86a0|-i=ov2;5c#dlZ1)h*^Sv*T1V4-bn4%M%(eY0SaZi?(K?J0CSV z{AcuP1I_L`mC@cH(uTD_h`)diNEAD-%KdFT#VdjzDqEc1@`2WlsKoTEn#yX49WLn+ z&t$vlL}mf9f?Ca~V#P^$cSVU{rD5TFptD}6GCJ)=jQ@L;xoAx_k_>GCsjwnp-*WV; z{8f&M;6eK9%4kJ>08!a=(Ce~sjT}muGT>0HN7)=P9D}bbUq(IvWg_;=IaL(QR;R-* ztcZyiN}4oUhCT=Q@o_|j;f|7und)87&kt{ES)KEJV3(aD#^pNy$@_ckm`w{)P;u5% z(`nS(!`_&)OgD}`W`mBBVv*bIYQkc-J$9ORV)|6kG736lIYZZFX~%bHMDN{j&#|$1 zpu@uzlH*Fez;MXn3tFdVeJc;L+R3Vn9~5ck+%>{d$k6D|C)=mjq!L!`@tV~H-n_G)qo zeR7w*7>C(9o4;&|2Y(-}43Z4|o^{y}$=|%uJga>CvEHUGZznBkdY<>XaoGbu8s%rC zko^D6bo{dz>BAB!eT+c)f^7a$?Z>+U&#SG9gLiX|5`=1gLrfYDCx_%1<{45A68e_0 zv_+TaAIx~on19!f`kwZ=brw4Qm--q1Ot6>g=r0e}S)lEyIxBCb_Kt05kewNPfBH&8 ziZ@oci7I)kx`goqvIP_QgtfJ`NPlq|YGqspV67+r*W0uc{q-P0VHURXksJ3)@0ZGA zQTX`iAW(k58g}xSfAHT4Q~$J&Fp&*!Il8@(pm(XJ{SJA^qNNSk0xY--3pvh5wG1!>$~6g#t4!qb z?3c<85zkm^SRE%7nNVZad#h8H=rtPzdZkzhd$4dl)g@5*03o7O=?!Z>(wp`({Wzjt zmS7%JTP}OMb2bSwUWJK2sui9`Y;FiP*@9X-E8!}&k!P#VH)lQdY?y926etyhr12F* zKtTI#t-(XGvAYK~u`T<}A7WOF^PZAbLy{W*hUfp=PJuZhz~oQ)a@O&3M*-Wc_XhO_veu$7$5E6Lk^-^FSS54EA+l(9?~*eM6q-qfK50 zh8E-Sj+B_1@U30;I>2b+GbipETHUI%O50WIW7Dou``{=&=@k8eg;c4z)ppUmrf@G< z@_gbIV=Gmcbd&Dm++iLYt%WD1F^#mMU5JsTfv`OT=01MYiu%zVW|zvbS*UqU1X0Vc zGOrSuk5Va0>JX2F;DTwJB$=Gt+=n+RY9iE_J=r=3qu#o*ht->iU@SnRi3Pe%0v+~o zykP3xO&4AWbNal>PgmjCTK#vEWJKXnB7WPW|3D9MMU|ynG4MsHn)&h)Sr&{>9Jy&- ztE@$RoG9pIe*xpv{jXcB#1x&1kAu@9Ck5C391t$Fy?B61#9E7^JJ;}l>u26f-rIb21ydt!!vI077Yj>nl_>-^gO=MiZBPGOBfOgrV zBt0vd+Dm4lPUd_YD8VDlD~(q&X-XHK}v2<_DkxiVM!Kr zMhJ}ct0PUv9ueG6Gi`&{P~MX=<+1k;(Y`W{Vf&wjlmQ8$eUASjrls5ddA`;0RgiXG!_wJKQ_m z_ny{3gVV+Y@W{m-NwnTjamt-%2HHGRJU72(-0A+Mf3pLTM*T9Q&h{_?+ZXItVdyJ6 zA~2My6E2-zak_Z+a^}~>R}CV?7r2qfqoT-5o32cvdy_8YAqMyCgq0II8FCkEh(WV%^Bl9ws>;5A;0b0NTNCJxkJTNR zx><&QJ%7Qo_RL#u2Y;g5=g!lyaA`e*S%}8l4+iR0Z=0R!{+L7cYerbG$`saOR}+Kx zyhNP9q2TqVA)1Q&{lnEKv>Su|@U%24|FC^{Ad>oE#@4;g&_0urjwhlscQVL!i)M?f zHuTpAXOq_XYWG_?Skurtjg!1f>VlwgMx| zu{|!GbC?wim|-IDEoLwj26b%I!x>lxhl0)vs`yU5*8zUj@83PX6HDn-^%~ykt?JQNu_NIGL(<8iiiDu4J9*%5bYstyW&2Bw}7OtB@d zra>tb@1Ei8ADhUZ%^FIw9JSxY3u6Eu>jO#N2_1CYR}#KsD^BpILmvM^c0}*kN)eDd zu)Tf2@N%NWpqe8j2Gh4vZoETokB(qUn}Z3@3S<+nQlF*xN~Zq(WTB&+6c1Aapr}^? zn7)#{1oNLMiCPF;Sq2>TKXAMHV(AGnia-iZsP;iJ)3fXs9wsx$g)uN&GhaV}Pe>q? zn9IG8APT&kmTV0Cj0jF3832=qku6!k1B7%ENWj9tdo7N(6oxmcPjS4bwk4okvdC|U zF^NMeCSRKRH$;vLz#gHExEf7NT+=e)84a{(KlKa*X5p1Hzaax{u;a<>(%{jaC~tx2g8^h)@jF@85Zmm%?4KahG^GqI?tV29 zxGBVhSH&>wePr=HuR9nOkO5$(hJQ}#E4e2r^lLr!crdd+n&826cEJB0-2O}2F-zW@ z*eY!;q1IGVDj-HMX5InNlqX)PpJk2#6p$eMq4Xu-)XU(%pBT5hnL zv0Sc(20pW<8}Sm4rBr~|d`oO$d!gw0%7I_)h~&jJb@Dp`U#^IAt3YD!fR-l)e`lt~= &Onmh|4XSU&u{1%am-rbf+g#>wXDr$|Zgv-x7`iT(&Z7 zFsS_T^sXt1yO^MGqU2|cZ|bS}WfWSN`h|^Zi7Y$KzEujX_FmpB3vz|%Pm!oLkV!Hy zhr;-KTU6Vhz0-EU@}|}=9zu6#QHQjDG9^@^(GpuuOCY5U~buZU$rfC)1Yz6aoG#2M>cBkHcqje6q>Qd9=)aqs(KM?c<8BlfDwmrC)p zV4HW-jstqVdjedYY0jsFcMQBOt z8`x2$d+}a#>eU5Y(w6b4OC%E@0iwhX{ugoY84%U7Zi_aOl$)QVS?3AVH!cQF6{va*_-U5(T=6Ei}DuvF_gIth4Vr_w66=z4MEV@qAwWVsgQGpaO|`xZ76}&#;d4DpC*x`!@Pg&~dI8NxC{lCVZRy}J zeHH%e?NKU{|7_BN2BzH~42{cFbXMhT6E*#9<)&ohc=?)hq5C0XyDg zfX!$2Z@60ctev>4Rj95PO1VA>+NEFJZ&CFR39@lJ$tP&yf(v7+QSjFd55Tsq6SAjk zD4%?1^vQk#&#z+ap3HXZ>N96cqsTL7WJJ8J_)TMnDwi+PUe2$XIw>^1d>PbeQWUJr zq!1rl&X07_*zNiXtuHR|c?s49alBLgJd{+D;gvunKVVNzmB7DNj)UPh%b+;?&0`F! z3esc0S=L-te8udjRmiajwU2)iq=Wo<>MWkIej%#a4sWYN;3AqqFluQYEP-#Zvfxv@ z=@!$Ba$Q_DpP<8Qm2Nkhg`y2Hy3h#N;p;ZBo-{fV8_C1L4*g0qAjg&G*W*JNHv}hFOkpQ< zX>eZpdGHB6wWN@^4Cajq?1ZHO+IL@zuN``M;9A1L9p~xGU1zhkyPBuW>}423dk{}{ zd!;}NDtA#EBvn;Z~u}|r~e)Vw5Ndg{7>V!2u9WUb-?cR@>wCPDR7__lHW-5(Gq_u6qW7f zUMZnpZKkVerZe)BH_7OLQn@fp{94m5HpBc>e#G+7q!f84^#pM1_jEhJLDimPYOG=J23Y;&QXwS`%6 z^*Lr+Q6QND%6&k^tBCv>|AgZ(2oM!aHPN@3D2uZRT+CQ;~dBRw!Y_= zqO>eGrB$bw1VC>p@t}4SkuI=bCOa{PNz2t=nr^8d84%DbEk)T!1c_z)S$FP82wpC_ z7GK>PW}Kt+^}Dv%X`&W#k9r1UsimWGD)94lF=sXDWl-9<_10>fxIZX9yu!K+e_`y_ zeav=Uw0SB#Ne${JHXHF<7J{uQs?Q+XII&>UA=>Sd|H#8-^pf_Q)_#fZhu2RLX6p-8 z{D868mQFE@>TT(kon@DyR7iIBF2Rzv?kd?V^phiH? zm5n!>3xNR>;l-{5AUbYy@J_n82p#F%4HXu)bF_ILkny1`*(U56WGQee*laOc0fd|` z<6`p`&k;M&rwvsV(`&z`*Ke3lw+`MMYp(Qqi>F~a`m}ODWaZw(wS!;ltWOYuBFkw< z4=CMCT2LqL86(v42Yz1AR9amp&o=Is^*z3aZ;qj7Dve4qV3+9w><4I22|T zfW>-C=n=X|buHc<+xhi1d%wtKScAq@xKg==S(Xo7OnQ$S>o-1RmA!*a=K_X-VYXrF zrZJef+lBBSlFi3c0OK+GcV!X;he8NuT~K6(`*_DiDoDpQw+C!pFwNG}M2m1U(N>~) z)Y#pttNv4up}F5bh01^Vi!Jq^@QE)o;pge8u6*MnnUyc=y0|~I3E0Wt^VKTgDKj|y zOB!2{w7X10`gwyY8~A2Z@70hW?$1;^r(xdly+dtxQqFtTe+>@G>xAeo8QW&Ga&RQA zCZ+4h&FAoawxLNpo&0I*ckd=+xR*l@B=@agcR9(#e;_R>D7#+ZM10$`|A6R}fHdvf zDStEILt)LDBO`ICi||cBLtkaWXK)g0>fRMq#K#5=A0ymljSc69V#?*1$TOR6y)^gY z-mBeNQDOiYJxuKXA3{#_T?6P#9ozeka<0imTji=Zs%vV>Di#Ur(+5|d11A-*gx>wD zw?zM@?&upP-Gz`Vb*F5GbYK;a`9Y`hpl2H54x+8@`)N_*cazv`6DNZQzZu0s@7k7L zcKCt#z(l2y_skzuD^~L~FgXI*W~w7a!j+fCGn**$>~JM3Uw^tV&loErnWH?v?T%zQ z_8fPIV|%?xUL|Xj0)khM5K=0n{4HiHLgF1{2g>Yf^eq;CyQkajW%C;WgiG~J`Ycwp zNfRrzKI_lfTNU{2WUt?#&@6Hz!Bpu|RdEh`Yx|AN2;_gQ@m`?It~}t> z==|gH$m1C_r`c7xE`6m`5sb!M-vxlBAjLJdccGs$^bH?3XVTAT&T`q|Hh6)P@xt*Z zRv6WCR2GXs-aL1}FaLK8oL8uNTC{fewO(%3e8OW(5eu03;zxHSSlvX$x&E$qthyp> z=jNR%6mKq1&HkIvueZjR9%x?0e(nxp*;cs8yBO1S^X_9U$0s56bif1!u@0KXrhMeJ z{LRzw?<}{q-4zlr^?KE!wWP~jdVO*M&z0F+I#MDML}u@!xr>$Ng{b$&cdxbRyJ-@z-qb>>PP+ZOnUU#Akj!xpv9^@^qz zbsre}J&2~6x$T%O>%)ng9Pm?I_&V@ASt>K8*QVX_J7pqqw2InKZBuSFD19%J57`9t zLw~RsM(T!CagfwaS~aF^6{NZXNlLz^cOS6Xyqm&5E~9+XijlH=W^2j3p}4C6(JD$A zBsZr2$b>ik^FnTRhhBoMSYzH^8r7|>NASlvvE? zn$e42HPth|%*R?vN#Fwt#qY$LYGOVh2UPYn!yH^Hq8+)T`Zm`-WE-aCb?is1*`$5G zDr!D3OR{2|Am}ft$ZCQ%#B$3mYX#K@lg~^mLu~I>#q7`sunEWxDzm;W4&B&q?iivE zPLlB^zq`qh$1HiJ>T-X>mQQ*1Q#`-662;%#lb|RIlKo}}2egdEHTqc{dscaE4MIP$ zCr;zhBbw@I9H^ORCcVJS#dVbK7M;h->;N=N}Z`TUULdiE%9a5p+9cru|;hsdkoU zzApE#SfMqwr4F&m_hd_9ME9;Yhhpsm1CiUpf?8-wEKiC;S&x0KoU>`;`}_NA%9o^c znOY?9YFG9cHrfr!hT+Tn^&^sx8beDX`iu#txu_XQD~Njbx&yX@bdidp?JNNc+rE@; zPY1ckN*i*@28pP(egzsPW1Z0J5x1pMv?e~W#KwJd6z*V!iha4m{92|PKV9$(ny-_c zSO+DP&O262l;t#fo^mOwuS7bF`x&uk6r_2%@ z>Py!}mCXF_S4kf3`YWWYq~lK7>PlXF&BHs6X9#?&5ANmd-Ew|gAE(+%6Ny)t+~Zz=|aD#Ou~(TAG~{>xenvnE%l>V)gn z8w_QYiN?iT-GcDrjwbwg1w|>aild`C4`dI|wAI|I?=-0}q2F|f{Z^N!?ez3D3CTu& z)Cw}SVvCnb(Kwb6|E6*Lc4*Id{E$Ypua*tM+6eKk;RgCTzD)^*j9IVl*szt!%yQFE z6l_lz1+XAPSf0ZW4Z(2iL$h{LsHH$&4B!y_b8r22^wC~-#AVXu-j=+#O(orGL+ttZ z{Un=TcoKM7#xo1mc^f`$!3Mj6Pi$V2d5l7D-#-vLhNLX*DL5Bl!nU-b#EwH8UDG@r zS;jzC)Lwx4*kuo(w6-S0QIq&y-job;Rb?Sqae0Pl<|U~RN8&T#{eY{PBoF+CI?_cv zi9-)ZTmPPn;O29JAyj9b%?o&MKFI;>pTSi==$tWAfZ$9BNRXtC*z;nO>}aSv_! zRqoyR!6ke3e9o)naV&$(1egA8J$ik02!7_J8br(~x7<+sj$`q9osrNlSJ&`{dknHU zM(?u{W8Qqlhg@dHzlsdK?x(F=iw@jexQZqqXGnJXa;lzsbwe4sb$>w>ONxc8@3F;Z zM2mh~Mnr*7L#AyDb~tKbm{CN&9E(}GX*aTR#80zB#g z$fW<8dc^N(c0iCjoGfruPwYKS(B3HCalszjv06EXT1_#@DoojAV^md8b{U`N{T|+l z8bunBLWQRr+A`mH$nYqOv88lC2 z)VH}_8-6v5oSlv9Q=v!NVPc1=!t}vS{g1CTdG*n?0`Lb*d18e)VaQn?bq2mCC?b3w|YQu^yVTi&O*OaDs zh#QbNnI;cVeCDoByHeDClGy&reOJU3HnyKhvHv%ksh1kgmY9W6+3GeE>6^{+A9Mo9 zpTRCM+PB4CejLXuqjb=#%zotF#5h?=y2plT2hqVr`~1q9Dugvc)XU#hxwrRdz(>Dh zi{nb88-uyCoaif8iJD5n+s##`Pm%|J+#tE>5y@sx6e=57!nBRXazvX&>{uGQXkDd| zaKIO++dq!4hS0Cb424vdx%eYvNxM(gGrn#coEB$%PxDkrk>=kYuqarK4t# z>f3@6^e=y|L`!Bi#qCbxI$w}{ZF>;8-oF~-${j3NG^qeP&qfB58lCATpC_+}toIM4 zu8Azvu0U0l*$Atn6>c5(oBB&a23(5G)coIbvF_Y1c$G#Wwtj6+Rz7&M`gAd>7)QI! z!C5ht(zZXic8uh#degghi&ZYi$hPGya~A}p+#-v;VRYtjXDe@SUV<}esW_j6>G=a} zPd<~yJC4F7@;fWbKdZyNfDmnNT8MVL|FfqA&5nkHzXaUBCFl|i;~!R_D^8PM7lNYq zCUcIA&rP;(@YS;8K(7ovnes+_WJEWb`8xJ z9SNe5#WVJ2&>v&pA70~mo-8YvI0iW$fN)5nrOMdz!)%AS;}*>Aj%9MS9?AXkGcWZU zBEavu(sgryH~O<*^eyyJa%6*AVQ0~-Q&Rby=^UwAwV#D}{hRQm&*ZJ!MiwH&N9Gi}U8D^z7pRY5u;^)#w$oRTpH zIE!JeuGYcebHN_ivhzYLdysmNTlNPOuM#VR&AA19;&<)hnoaZF;}UCBS9Hmb)VFB9 z*R*EU2{2(U+>mx~Q_ee^dak&M*ZCM$%MlhqOA5a}XGI#m?>^Fi3P&V4=p*zpjIp#w zCX>i7-)L%A!8dbbh9ZkSyeT_x$epYAQvc8La;p|aO@bJEw$*id(J*fWl^~{2Yhu@G zQj(l~+;lF76aN{D;uf>vPhtgk)01?zGu_z3A*%C2w;x5km1nTzk2tG`^*(-G3-1-3 zs4L!-p+;EYv2e!M?R z_ft{Lp#SS9Z(W}r_EmbP^G`_bn09L|yOD3jJ0Mx9gS`V>l8~ykeV;h1`+p?p-0}vo z8q3L)87PC_K&7J8m6e(^36>`YozoPCB3ER3*u_jagiyhg`9-tN0YudzufMimsd5aj zo6``wz3$xAVGrbGJ0W%$ZBTn|{$@uV`$9f_Reg?m@EfN$rUDcsk%s%Jf@y5jqor7B zUo?J)5_S2+;f}}bj?{jIjjvSDx#Zfc#040ScScb7CS0?2N#Rlc22}pgyWboX%v$*)vGxmK@M3QUZHlVqAq@x zgf8gqUDgPw*Oy4Fzf;cDa2bvsG1&Mggas|e^^qIL?sq&r=@cUOSk!MnzB z1zjY9c{{~2QnJTOUYM~jcJ+*9PO)RK9_5QFz+;y!_!V@k#4mZ-Wi8veATPeiabK6d z17(`?nM>J7Y9c6<6@QApsKk#?FSSG3WGF&GODfzmm_@@c!&ducsvAj*7^@k6%)wz! za!WEp2$3#@pPQ+ROqr*rui}@v#xCsd@aKYIT7w#QAm4|1(1V}tmj^<~4JNe(bHj@( zsw;1I+|Bk%clB&mSbFRk<8C|&g4o2f!Y-@h_>xt%?(}PqW85lfK5;&i=}wD$wz_jt zJlnznolubcRQ&PFwqv0NNsdqF)5*m?HKv(o1NB?Z*P1wL$FcXl&x^t*#^2NanwycG z(YE&;I^ayR3eY36&42Ayti)lyWm5fm>UMhY9ls(srCzr0WkQ_~BoR>A>rF2YbTL*d ztn-E)CJ~YTUy94$m@uImOe%Ub7h@Z#qa+<>qQCocagTppq{o_5x80UE6m!Z$B`>+%$gCX;8QAmoZScPu{rXOxUqmfBkdB`48fAWMB%UeQdES zxrrAIjpRZb8GPG!O;tXgvcI;xfrCiL1boKNu{wEvb-}LNUH8q4Mq8#G;RcPiw#n>+;WGCJ#liv;oYJE+F)#h{O={D373I}4^cc11 zt}1p8j;rCNzSA81zI3U3B7lmFU0QXV6}ejtK^s(*7biNcEvebB$%t!*bM?$VkBQ+| zNK5Pz#Tsf=h~gLZIT?}%xfU%a-}lw_PlrkGe(M^BG^9&st`}^ZPO_*STGB}dTWE^q z13}|Sbbn)rBaId*nLQU?B)3h9Z*0YY3dU3_!~KgGMg8L{Q_<_v%z}ccujV5tWOX`x z9x8LNG8DR8j#2xn5+;6!a58i@9mHy0uQQjZV(5cTlGFJHR?KgIQ|SwtSTEp_Z4dsm zl1_}RP?Q65A<|2YrrN_rUl+SrIy^WNHr?ny#N4Fm5D4_>c#__nf5*N(gLrYn#5=7z zr7AO72If#9X8bFWuqwhU(!WI(3ft_2`2I7=A$7lwkK>D)`yUXZAJT1$WYm8^l*Dj; z#eFqy&#^%QsQrWew%*<$EhhLKKwtZpFyi0EG7n<$xmN67om%ZY^1^MI!qNIuPzoc( z$x4ZZr-g;;WoBJN;WJ+hNn@zr_^ zm*q;4T}^3Yb?EFZZ&&?v{@w@5G(@T&qkA7X3A$07N48JS-23oli$MiHqoU)QzgdFT ze3R5YE$(HFDneI~8EZ%<91-P-bMX*3>P=4DN~}OLa1p*FKDoZQ%X#)_M)c)VFTwYV zw`<0QiWr|c)7Lt>yr;Q$NvpmelK7lsC@AjwnDb*ynU~Dt17;WU@%_j@AP-1g)C3X8 z_7kYrQ88oNDM-_DS;#f0)7}z&zk=D@|FR8yO8POxC=g$tYlx1%1x)T z4LSYv&z`onUk8s*Xs788IA{rC!|e>Zp2@PEO-UD+nL9)m`PrF2*f$q=T=m>_HG&D( zN)~-J{u`ZBCZQhVRcwcMCZ+bsVW2RFEazBu0uxi%wS$k@F?gY6Sv)qD$B*!K47UeG ziYlQb+Z^O&WoU+M&%F5sR_UsAQswjqty~2U*{*BwH*P-|JnNWg$}cL4$o@1vW3<5f zNb7Lg-s6(slhT@7Sj+iLiovM;#YjPeWtQ@$6BkVUgg*DR0t8RPPg7Tx)TsQn zyN|OmZZ2z3t*Zt&e{2DM{TG(5viuFeiGHvv=)MzEsq9X%RK$;wE3U`su|E@N@NnbbDbI*mVMDDCQU2lS1*R~T0U<8NIl?5d!p7mEsseqNNvLA?4HfkT_GHPneg!oi1F=8 zh@+q1?yyArwli2fx(Pi%g&=Vyrfm^CPElS1=&ziSvU9xRBk?%-( z6tEsyDSX|gQq(1!sULYR7cSC>k%nBYjJkrc+c8yFTrXa z-%NkG&Mx@vzxuI%=brwxttOV}_IQYp->093v2tBQ_b7jSW9EhT2as%FbwNtZSEYVJ z^!??`yWxXmF@BFYR+lJ?JToqP^{0aJyjh=LoaYvq^?sgvLj8hQDuzROhHhskfgiv6 z@1p3%d`shSI}~AVbo}~{IdhY{nd{EE9ifQTke7xoX87ivOK_G2)?+FRyhHpedX9su zs*!|#WXOv1ODXq?$Hk02)XNbM%c1fo@clQ*4BhCDAI5{A#nYk-wWaA`04~Kw9u-9$ zEWQd6Qx98jS7PYOM+SB{dq)t>RXvX&ODOHp$y_H+TZsYr1lV)e*uAhInAc)Ju-wAK zi<5Pait4hK&n3NFh;_!eTsCc8=*!D?sa0N_>i=g=9HavH4tSs-k9O|c_y=Uvu6Usa z&IIq(LJ^;PH^F`jT@*&Cu?&CA+~9Z>2cw=3iDm(|@owQj6LgLoiX@H+lK1e@^7Qcz zbm=~K>LJ=!jOJnH%_J2@`cZVY0%B#@V=cShm=)>uH?+EQ<;mU>;Jhj5(RPFa0I4y z<#lr{VHn=-NEz&uWGdLg+dIGu8nZg|vUEyf!&>wYh%R-{vgb|&X~v!$#nDCuL@&S{ zNa+q@RPWSP;D*P&y>ETly7_@e-=&Rz>Sx@I;7hoiN$EC>b^Wj;CQm=i99=WI6x_I(Vvqr()(}xbF2$d|jqyx8Vf4JlyFAs0IB`e6?AiLw z?FE6p_YSZO_bQUsGu<#a4y;gR(GG+wj>PKfH91$!E!i^-IJdeG*54UcrPI0jJAqC) zcNXF*v-OOaZ!}a^Fyl(zYxiRS^1Li@#Lv}GE^8Y2s-+7H@#)Kawo;-#MBsj!{zft* z)PUDHpMlDu%O{M}aLAqGp(p;yt#vog7}Skmd1KUZbg_g^kt2)Gr;$rs(muUncaOT3 z+|wxgihCVhgj0?C-AHG|{nd$-_4353-R|1N=n>1FM-~j&ed|OvI!#KwHd3WCu;pS6 zxHQbrJ)USb)#gU3?Xs*Rha2lK`D*FnR%O;zW&)|_p=tv^9j=FlKF0) zTIkED3N5ro>y@dF8OJ{$KIV&k^>UzAvs<(hkwp_{C2Ti4W3`et$O^@IH1+me*bde! z*aV<#=%$-~Wbp5S|%fyJ`WFpc*0!Rja@=69_WfPvNJB(3o^ud?oLx^Hb=GMJ#|R4PixK zU~?OCE;&1dbtU5%p)c#_%qh?2Cz{avQD*MLF z_sn>J2ntXV_k*R)&y&H&z5&Ql+bYT}a6A4Fh-(6L)8aL_{vzlP!O8e8G3YWp;Q=CM zfdF>E^d3H^1aSPZe-=;-Q4+&Y0j+A6$jQDX_>Nov))FNEaJc)FMHTHJ7+8D^c;v~* zq35YMJHh)$8qfhJeN66B#3s^Q2Hvm=7cw~G3mi9?Dgc0=DxK3 zhe&qVOCNbZ*6IM;|E9XKzPFCjn0{$W|NZK?wMpAq?gwNM(3Lrkt|9;vW5_^_l5_tW zZqPxxh<_(9bR-{lBcW8IQU7teF6rLD7%(5?f-Gs%z7J4@f@NUB}BgIJY^px$rLzZlqp^NI6(6c*WCXj4d;AGJGI5IAvS@Wlw zHxw%fz}i`}pl+db`@Oa@qi-)q9e&9E$g0Cg_9}Ry0 zY$)CWP`P>4)*Fshv*19ZD3+NL$61Nv%7RBW z@#=}gzlmsa;2v6k!Z`D#E|dDKM@Q!}A2QP50sB%ON52gR=TKsq5I9Z*#t=M(Dk27k zdfbdvyC|(q2`+mMC>$Q>NE~qqu*0%(^wNkcu$?&4s8er_cy2>}c^7%v( z@U#EwG%jP!fgvH<0^3`JVv~!JHExV-LHAIQ^+fnA?mfw_1Ekowi-RuO(pkIT^s~gg zM`!i)7B# zPZm)^jh_-$SkiswlD`+}rHOqrby4w|2CF%iy`AlsfgZ`XT>Dhm+^HxWu(7~}^;)xB zQv;?L@ki;-kKEp)k|Re|YB*PcNaSvu+-5+{X|y-aV=4#8*8znLQn=b){ubA^WG2pY zRJu#}iiVW$i!^xm0GLA1k~?7WnZhwLMxM3GoGuuHI?_IVt9HVGN{qGD6qaFP1(Zvy9nzkWP!^*4os1oq~Cs%32;yapF4l;wD zpt_5OmWDHa@&o`gW1V;f8s4()=l|;!Jn><77bPil1=wfJ%sBN%8ZaM6(1C6FI1_*j zUgFAF71&05w#9Vbo{3QIz0_udO{zZyZ_+X>2ozkAv`@gGrCl$^ra@~Kk>^fvpN~S8K4P)nNyP)$%}qe`#)q3{wa6x*T0@ha;RxE;Y%IYQr20I1(Tt8 zwt{v_TIwG1P*n>3cSc^F9zHmQMpU|RgOyE4mDYaIY7bd_0qzVj-;dj;T94_X&qVnVhsj2YPl)p8iZu@)8S3|kAS@bz`UEn`7$VOu5}|$!k;CA zTC8-4P#e*M=(QB|CjUg|M3NTAsuW z2ipPe29FXYM$0(b)LiMFe~nd~SeF=1=Z+FJ2G$rmed@$WicQlaHuEmx_a6TW-#dlJ zidCMoha4B>$IZ>K-1PzTz0 zj6m1;o*SW`F~I(rT;n*VhMp%CJ92Q0csa5k26Npj^<}cFtkR68RIMO^{M@a~pvHWW z6;_bUH+M)9ew%1G!6~N6SR00aKpS@1euh|q*4!}SP+OE@BPqq;1GBcN?M!`~@oQP3 z?s^(6zVXR-fbsoL88z@%U&ngbiA87U!=^ia8+_@G#!;b{9`nmkwZ(F}AQ5e9= z!45|Io&eZDD@IZDBkHv4?z-2BJ?!{E*gJW||5zH>9YdPaFtMm*C5X;3;$g=W5BRc?MVY>0K}i5!p}*tXL~+MZaCE zY?t*zWpJY)4c}~%W8f7z#Fo!AWIMe= zSf{-9XdI4D^AH}%44%dM$pwnzaUH(&`DRj|m;8TK8do9+oJg~sXvG4f4H4hJ)w>yU z5zxCGzkM+^K1}SX<}QfLpmvNeMH9c+v*lMg&sO^&vI2iV9i>TJHFi`e=C|LvGIy%` z8tJwx74D;plV^q&LA)yrd4PxYGxGdWknV+0Tl4=GYTE$~7ZBh-83hKb6$oMm z%5XaPQWRJmVgcnU*yf)N+n@muw$wbJs0<@u7O{51Fr0OOD+AQrl*&LvSTqWbQUeD1 z|5kkic1p$%a2VrX4ub*MZzf+<^FI%>QFCubl?nkv>ne=?9A0{%js~RR02+6gvJ;Ns z0xk}cAMiN*YcSo*4#c;^)Vb!U7SDtO%>7NxX8S6eCVI;Q93wK zi=SQzGIMRM;HsIF(J+*T=LKJi*j5ct!%Ck5!wVldgr%MXh(`%{TU!M=Tv63%gEB8UsqVcUA#A*0~d0hdUK}5yQIBn*(i!!hX$vKnT9U!=b1$aL$G?Fd(nk zU2GmZ8^vrv>OjWsd&vqk|5&I3T;&BW906qTk}QFA9x@6** zCZS*Vf|n-&b|23_mG{cP+yS!cFbrP;=y53+#3Eoh(*lr5Cp|c^tuq=IZV5KKrSv&n_j_MvAG|OMR@tmYd4D?17%sYs$gdsUF;}~G4 z^a@}`hJpUFfQxj4i{yig=mD=lnSJ2T+jJl$+fuj19kn1GuvT)Ya^5pY)u(Np>c@Ni*~nG_2RCre}@8f_mrInvFlwGg@A zD{VtKyNn~ToatgryIzs4r=Di{ZHp`#xfr#lLy<%eio4bLx!a&c%>&;TYIpheCgj`u>Ht*&YeTl=06O zcp)>$_t*dbv!9fw41Z2SM+f|>PcL|p4z3zAE&eq{xto*5l-7950f~smj{S}+dvqbY$IiM}Hoq$Q z?eLuGY_fNM2z`BI7H_Wm5-XbPXQ=C}!?D;;BmF_5*jMXj`qy9ne2KqqcJW`bByo^x zVnLm!zk6bwU42&q(SK+4=SM^#jn?U8<=<~c7n>9@7uqV!kKrg4a%D$r=sZZn#DWmf zi_(b9#;tvHZGNk5l)+-zhjA{{N^^1c8Oq~}tAkT?uVUbLqtBAcL|4N?CI0F*{m=a_ zm$amHkW*-5H#HsoWv589wkz`Dh(6l;4R9GD*h`%n`3D%vDZJ1<7sh)i%72>LrlY z+`qRsRLpBwCoii;r zR!nH6dl@4}Mp+zSSN|o~rqFRF{CQJpb46Xbo>onmtzQd!$Yb};XHe9;?fp$9&T&(f zNuDuYEmoa$qKa6XP>tt&1LD0&vaTPcmh_18?wg5*D`mOdvXUnFy@lF164oj(uRNjG-2X}ylPM;UaONvZd#L2q$E z>T@2(r#&QF!erw%HQ%n{>?%((rPvuGB z%6$sw_LNoGX$SOIWqb&+9gd_X5!f> zPk+6epVjzj+6jen60ZC(BtfDC;;L z_W{7>Gl<~i5xGeF11fU$uE}4fJ-m@a;ZOGGyli`xBMa7&$fdd#aL7|qL5C7oh;cCu zk1-E^rX4;X$06Q9^XrnhlI4C;n%VZHdcFjtV8Mf~o99`O13YWn2eSBk*qZG`^KJys zh-e5O=6bM2pq5v1S%dX{%?+5?iu+lv=XRI7#VZA|One{p$XrAfc{OY!9)}HpL8VuW z=LDrzu(DrvbXgwe z8_U#=92@0(X_hcOlWQVAEB59Q!ej*9^s!)ebNiDKrrD%+!dpV>crunSUhkz<&aU(Xkvg-rF7(Fkv(l?$B&z4 zLtNSt9bSuEG~MhG&*UJ45DyFbkxHVmv3qVz&Q&KV0YbEE&;+)#5uK zDpTS%ns@v*XK7wJFp-mF-~)5yFA*vy%^WzZY2n`K5BiL_HPgstG$M3l5NF&`X5Mx= zVrxN_ZY1p1Rk?$TH&-6MlVK+MqOrW5s9#<`esrahuuS*xNs@$_<}Gr;9mvh4rN{h` zba78*HhFG2c*JgyDa$P(DyN+hkEH(k09_Ezu2EyC675|!`gc~i*(ulfJ@nVX!r#!JsN3&?cXHlQNALN6wdSzVoB)Au-!8a}EpS7zEF2F1^GKLxZIELDUip57aeAa>y};I_Ts( zDckMTPN#nO4~EyxRft*@uKjMsFX$eKJ5w%^Lh!Gw z#3{f7zWPhDxoDe#g*|27De*T|#pg>OzyAFA<9dNUL4wvV#ZwYU^MB*R{&&9Z|EKMb zDr*VkBo8AokAY_smr`$b(jk~phJl*Z@Nt;u#AdXGiYr~|>P2}0?M`hi9yTK-`Pf)NhhM z`&MXFHiWV*RMxv5y)6PS!-{{vF--7r2fMrJI3r%c+VlMMg1uQ^{4;5gl!}xcoTa>s zxVtb1f|q&~i?q!VL#^4kg^9GD!AK_RniQtRdz?dzMip|B8%}^2|F!a5+ zPmL@fv}%`#Ty$t)fGtTA12vMo-x&<``7hy10@6nThjsk;1JYQ7-E?LU+4-6=Q816z zuG@g2=wTCuqZ^r&67y(S-6r@q6synK+<`Rp3h;w0QE{!>lmK^ArsOUNA`f(Te4tLa zUG~H(;z1deM!v}sjuiv#wxFD-Tq~;#R~qj5%thz^GweGZgJQKO!s(O~bm}fv^YF!f zB-UXb&2N}9FNE5*coRp!a!5feP9Jj808SuwlJ;L3zHg_O;x12=xWtyrwWG8 zvqdkK5qJ}@g;8FOYBpmps`Jx8avM}R{@lRv&}eer4-Y$)oa+q4Vk9f zZbk6QzhizD_$D;t1jw_wVYHP*9cQeeH}9d4q}wr68Z)|c#B)J8P)EXN=`yv^ttY2> zAAIf@mAn@6?y2d$p;enhaDK*s&X zqA9=X7L*C+J?$hFzxcz<stdj{Pt;K7s^B^zj5gx>-}w;aL# zDQgmx^Zbz~ChCyozoex8Z!c*%$SmEy6AFnFV;bk_QoIJj>C@YF#nK(5^~mHG2UsbS z*U;Z0up*yF|FBFwO}ZMrZCBuw8zUN5D#tl%{Egr4J!B$qk?jrv;%8nJ_szNy#aJz? z9L4qU!8q~;51nU{bn(^Ki$&tSC??QJqjAdTK2TJbxzBh@`^h3OdU{P{1~D#2uoFOsx`X2X}UdZd-DY|(5M zkK|3g&AWJA?t|w85O2EsX3Z~yZh3#*P8TSzizA4>(`4#E=AWwb*Nm_ zpH55h!>$l6z`raZnV!+W@BXp{eUjFf^G+vb+Kl=|{GtTj#1t+1hHL{$FFXt^2Uo}l zpjKp^&60C4mYu>Vbbkist^y&W-FP0n)5Z1=2(2=E(f=jH3PR+TKp}@u4#aRv2;OM8 z*^8MT1?{GjMUaRsUX9D%O95;0Umo=Tx8H+uV7wj$-)mFYYpgW${{BVmbV5r(JL|S) zvdI`#TvqW4kjs)RC=SLtFFOoEJL)(y0x^-e0ljJjmZSMV(m`X|4GHTlz|Z?stB#3Y zIs(E_iFe&j?8&iTcrhQAer}h{Ku=VmtNi$-pK6jYpO-4Pi*JX#>4PVX3!^<-NvBM) zMSQR`5Ey%Fm?7N-yHsNC2*OZ*ZibtYD;lBQ*e>vXTZ2I?hBcG&_NRtz9HYi_*oK%W z4g!Ldbnv!Kl;}C(PvGeYfH>lXveWN%+WJ4G3IqX_Kdoo1>A4)~h=NaWentLE>+pa_De z^UBa=>3Ot4D14WT5=5dN^2K05MAow-MKDWAnfxD%(29bJJ6gMSnt34olTrqH4@a-M619i& z{*=%nRb(xm69x-uyu4}9px@modWpk7`u;socLvngxU(fWz|wB^P22ndr4m7k?1XRm z^2iH;zBhhX-2r`2&V_U6&XmOOr!<--Qy@xc`+`=Mx}%5mD!~dtf^L!9EC#NPZjNnm zS`ay~3%>3Z*2m?6F8XR=g(^3tRTSA@nVHxrVVm*)x|Qz~+wRV7-z2j#*~oI% zeyWCh>1Em|H=`r?IL72H-le!uyfOAN<2uSvtYqe#scz0@w3}j?CtLc{xb|m|)o(O& z5ZOU_>rk1+%R}@{FVJH7@is?;3&KVe*kKXPqKv_wJR%hu_tZ#7jXoT7}4sHZTVCCBCZ+m_bN zAq%xYk>^X4WPX8$!Mf_BlcVTr@?ME>_1${CGX0@adgv=9HjHiuOSI)yih`TpVu(X- z0#V}m3aiLow*+3vGWBN!>87fsc}i(jjJe3tk3@(09&V0ca>oH&F3^gSm^Jm=aj~v` z?$b_ldBo!$9T{HYgHi&j01kfq_4)tR-FHSc)i!Gfl%fa$1eA_|0#cM-LjaW`A|g^G z6al4oL^@HB4k92RD7{HX=`~6*Ae{h_UP7;-LmFrMyx)&!eLv25Pg(1{>->YY_PzIH z_PuBBnYpeRfwnYB0()XdTFfzn!`S;NE&Bj5b4e#)0`3mi*$WO@** z+Xm#6-#a=NmS^p1`#AH3@CaF9(mReon2AOhcc}TrZ>_NUmvR#?;No{t0H@zpo_g~M z2i|ZXf^aR{=bgXrp+A{)XFab_f>G2xs^V=mI!F}&%TZ>TT~AH}$84qv@j3G@4fl^} z=f1KecDah&_jO~J8yGGQfM9RDSwK+Pn_p|KZ_VtG)fD)oA9ZvuL#S&1=6)z>x9)1i z9f0eVWryo=NPF?Rw%P7pgLog0&T9}=N|UW+zvNTzOqu|8?;;2xjrV#F;^WT?OQBC& z?(#}|HE&PW$!*NC77v}k)T1!Q zX^N1Fk{AmzzO7G7lj#lL<+edmWDBJ0NiJnxC9%Cd3wJ1TE-lKYNQt{aj%&Qk6)I89 zx(ga8=Vl)1$t~XVT4FCo(z>17_ljd0u7B-!KSA4MP-54guCbI9k}z5+{Z;x(;XMH> z{M$XA1AS>`p5IBFXld0i>wXT}{?+QkA{E>pb93qq$&ghxKy#)rw>TDpa6)^>>TxAa zbhM3DfmkQshfRtf6qTUUriIE@ZUPk5tc3-d#I|Jke+qm&8d230Wkfm7msDBYRJ%tu zz_GGsc92QeEQWP>pRRB(`mtBS8sqj0x?kSpAb4WtHb&aIO_>sV{FXnR0I@m>^SXH2 zko-tReom2I;RxvCqcbSA!y0gB_j$$*ATg6Sb&uk$+Ja_ivklLTRj6gRRgkO0=n1{d z3GbY5fs;c@ZZAJ00A_LVIV=M@viMwXSH3pN3$(Ah!5;nlyPnvx+Ajsl2^IIvaptn@ z)yAMuo1-+H`r0~sbB{OCwA-B@_Z~`UVY0SV*qAYFMsKAunNB_}2pnyCOtsVHDSdo9 z!1j}*#)+|Kb@|xu{!St7$gjV0NQ$Vu@;GC7wkPFrN`IsCW&m>Hic*Fg$UO?Qi;um3 zgqa%%dDb){mO!g^L?b+jkSfj4dUf^Ih%K4caapXS=2xt3XCJ#JwlZN-cjP5L(pBVb zA1|d^6My^V>IBUJza3N??&gV;9hGjB4u_C#)%^$2!Jea$5Z`_WL}&o>{LV$CfvmN! z6Fs+S_(Nwjo+;4wYawghRk$py=Yf%owIpe@lAg$)k&NY*!MZigwz`_g?IVziX^d>E z1FfU+n5&WiHqk%^F`>|#5-E_#pTiAlPFY{1myqulMtRMKn_otIxo$y>?gjgg{XCvp z|&)`6|J&Hep{E+gJMV%9g^2VS;V;J3E_5Byn%c z%Uyqy>I z{C(!PYg1YqinvTyRXy*)wIyvMBHSW+PhH8J2|J}H+C3=MwUv7{lH#-f5+o8M0101k zup>b7lYR8HP>;P&^Iz2JRNi_fho2=i4oP%-dccCvorbV8 zcb})YQp2|7&P;T4O6wU2eYi}B5?{qh6nCYWrA?^4>R_RmRy;UZgezAc4vVxasr~A9 zdCs11d2#EFKN->jKor4jJVji-F4(X;&qH`Ec4UE~b;(#F!^h88pw=}IMWa(y;q@dB$_~>{w1H^D3Rrhx9%;%q3d>>1KB;e#TcSw?`ow9uf|On zPi@R*u(WEU%^}WE&{rRN#RVa#RMs3+I7>KS`aQ<-w(2X4`r)$Zn9ZSb zGwCHo$|2Dahv4T0rV5!}C5L5=yHcAAu>-sU{!4A=vuv!npk?H{Tve$Aaz^?_A zI@N}|N&o|mTq|50pjCJ9zf-M9#9ln*2ynU#gbw&69)IiY)qMK<+2>tyKQLVXL_ej6 zo@B}v(G3H6RsqL>W*IdC;OVM zw=(1ByFFz;WZm$++3ZeKIFxq1d?6~Bme9WJQbO(r&nWnKUfIah9{V}LSF#nZm)VQg zzX7PWb$iDs+hX&T94rhq_d7`2PANMTI-vwt4oS0GV-ewhJuz`P{aBLv0|g+l4<@e5 zKdn}2P0T~C>Am|D47owtJp(~*3h}GcIJ<<-(T1AtD;{@~R@ZfeKjcp0NQf)tU8NH) z=}KQV^2b|uPL4yfOu2D(gqH_Eq0%FP^AsG{T5~>w5sMN2juT5zBOJ?8_A5ij&E-fTGqqoNHC3LWQQ&m^-ReX}`)leRD`nAhfA1<-yS1Lq0cPLFQeky374HFJp1JBea z;*pb3%$kZ|c_#W0)w;P@PM|5AteWxB@5j!R$1%NTP#Zbcooa)7A4h|0(1*mA>&HM$C|Dxj_BVn99S!m_ zjcX(~cqK8Sd86xT3rzOncdNh5}O&@@Rwvaf5_oeQr^i_3D{_&`MlM7-P0Sg zz$*V_q2`~cdIn~B@EUT}+ZMEek_fD8zuf}Jyb z14l|Ob$aBx!+-BsP`=?)0fzWvYeUORK6OY>l3vj&#=RLBskgDJTwQmIt@Fr7qIQTw zkxpAS>yV#Ry<}AT<02c*vR119t~SXpW4W+vvX6kMSnqmVcEOJK8Tv6Ts&9~ol+l(w z70l$NHn!@1K5*F;FmJkm^`^f3E76msMnCoWjv@~1;1OwC&mIH|<8<%10CW!L8UZ_> zfhsnz=1#^#c?nPKZ~IO4f~UEss7`VEF;NTxc1LZWP1;cc_zw83U8&4^J0BpQk^x_< zid&XEO;a0ITJNyg*84bk5E-aLJ_}f`z~oPI}{e+I#gF-0z`@Z`d(2%=Ua? zSum~ZFVUF~6G~XHmD`V4m*3hjx0r({z;s{+z(^gc8Wcf`A)!rIe!O%mYt1eg_H|dIRY`;q4?Jo)L&1G*qH&$fY4!4zkGe4T{LK^s@&E)cW zyMdhbm7<%nm3YQ@pYB4$p$-??bQd(@Y&nubhac>|P^Y2>bCV_SjQq9V5I%E`sk0oA zh{&5!xA3Z^GS0~_EJ4((#4+&R_VZZnl}qg}A4`0g&|ob`8@DDaN;OnfW)NQk(M(mH z-Dm_7*WQxkri{41uef#7^+c`Ob*o3yL`T~+czvM{?a|Z)l-EN>1l#u4%nCB+OZu3_ zs)=9&nX#%H%MoqYWoY^hS*$}jZ|56daue?)D+b=W1)3ls52f4it1SZZ+{i73*2E*^ zdkY6T=cR;2>eR3?SII#y&}d0d?9&=yVI6MMU4zj!a}746SDprEea!9st35{s77Yp1 zuGxK_xS$ZrVjutSW3pe44eQh`ZkskFY5R(uyUCs8K554)1&lej%itXKioQhye{PrD z*qS7m_^2iLszIKnrtp#kq4W0~;#i*QOLv3plF47PovYlm@HI1*4UvXU9VEi{OR09RSaZbf8&aq4`75%)YIpSlJ__jHZ5k? zcVh=To^XEsNo0+Le7h}Jxg=JBBQ+8`iT=nw$|kS*0UA4WE6>O?_MN4$Ct=qiL-{UD zXCPd-e3|YgpP$?e7TT6#neGJ}GM1Xd+bR^QYl4=0-~1R%;{YhX9h+>JOZBwX5&9vb zvLUG=yD`ZsVd3#&_-Witb@`lGSh(SCAUbw5gO*AIl^4FX8G08VA<#pWkbBkQu~zn5 zSj{8}QL(3?eI>5CeZd*t9?}Pk6%&W_;@`+}3yPGis;eB3*91^4_}{wh7WrE`)@A*UJA>lW(&agxjeTcUf*(4I zz+7W?a z&BW(Uq6ghKbbD@$KUJ-_j2NV&e}y>(lUjJ48S+ZsSnEYtZN=$t0ZVV*GtAJEy!Hr+ z)S#>|%&wj&^}^X&(Ruz{D*IRD9%1c`x&6`%9>$IM$i-)|Ekv?;JquXhjbolRrVm|9 zS^c&jyiyVMh|PDIg=LDzGd?`GCiyMmseer(*8|sNU2ZqtLuzY+xv?@wfVJ1rs^fCH zAVhmyzU{|=RNaZ|dPMtYZypX^a=-8f!%qy8OnttNYW4HqNAHzOwAs&M@TGG=hW4$k z@KqA{ARD=;v0;{F8S&h>Rc~(sb})dqC14?a?g|{heP}QS30il*Y2*|ulEttj(Lr}7 zzHD2=<^dfT?ZLRx?=cyC&i;F`-L$`I4obU@dq3h*JsS6Dkyt$TR5V{;q}eHtB#i>- zXC*!~>TXKohvS;77G^E{@I@2oJo%8yf|*;O^yCf+@>hTUOwbxt{%iyl$_!xIV5(7b zw8Aw2AuJqa3iu#Q5*oOV?+Lu%)kyPw=(fD~KSoqr*dq>@cV$UnF<^|uCeYIupmrT) z0zNB_35mV)(hC&?uIaz7A+rY7R4~Km zjy3>X<)7a87{om2E3gm^K%xIyI0WEADB_5e1OuRiI$1#yYhnWkJb+Hgdq^Qcz_jxp zTj(>;6$AJ@-7vRaAS6I*!L?3y0l;-4n~#-dsO|2-k@M8)Zkn3;cx>Gc~>K{$a&26}LHSAzt00e&+0 zwUZ`56h%f+A&&lfyjdU<8DMhTC@kYnX-;y_KvZ>bf=3F89s(r=x84AxJe)lPag^5r zGeXhd(|pb;6h_81pey8oHtHV`*x@G-u8H_5mcr z@Sj`f0Stwe2tc3yz)Nr}0UDI6&OroPW+TApf4n;c_RZ=UXn(f?&?o-SR09KGNJHUw z6QK>U`k=E zK8RjWyrF^wWo9%c6DPDUc>w0cZ+Q{$Id$m4SIj*D7M`?-w<}gXuG|r#m-f!ZiBfw_ z=~hXg@3-nER0NAyLz`42P>2cy1mZWMxtd)Ce1f{4ej5jO#P<(z1Wp=5Bjb+a8i*9U z!rlD8_Q0^zA9HY&bm3N-cCZA+WJP~_yh}K+`|W@*1a~n?BQi&G{;{(`t$Il54vDk7 zNPz`#`@k0Q&blJ0MIQ;muQ?^ z<+ci&@9PEWO{xR#SoH@w;aS%W+}d&FEAW!`5+U=|*lfYh zQN#gBE&dJVU5PgYBX3S|Qdc3j?0{tw-ns%Xob4_dA+x6_Pwpi4n=EyP#E!rfIB-Y= z3|js#@LY*g}}t{+S&`>pt2lI9^S2{a}>*%n<*EX#<>`DFY z+E9<2;tH7^43cJ}ExOEqXZaLJ+5qJ^<>`SlQ1P4Vffhh;h98fXeS1*Sq$B%~WmsJ# zY{UK1Ugx5i%wp0r7$2YG<evhtgO+&BV!nY-7kh%=K41C} z(of;|tBoRo>HRkT*ogC|KiG4St}P;^`|iiiRAZ;wI?nLZ#WeCBz-tkb^M#3wZ6xZg zLx6MqxWWCyyS?k_mU>g$1G*z5FDaI>WJ9>+SIwCPpy4r9BTu^wL%xy9oqo_dDed??wyf;t$Xu2@Dc+ej;x+pe`B8leMB zeOrp^jJIVksWPmBNPNKTtVd8?Kus)7U*W!<$|F3QDpvmLEpaE{j-iha?oo9Yss-8X z2>Su%)5QH^j`qt$2BB#$w`pUXE&B9jfxJBzXZ*7>P){|#TQ12@_8<#-YXi4u5*08R zoduedHzk{x29tX2-XoWaa}sUYEuVPRC!(fo_i-0Ji}^j&%I{KpdO^LR(_AD(iiz7T zcK+C_c>UO9?5oH?-9p5YO2wa43q|>+#Y|p4Q`m?AtM^Ys;Ft)3=%_7UW6Pm*d=glk zo7|9yi#deOYv1_2kbI4g;q&xEg(~hj3s2PV#}%oU3;6VXHfjaKr{4%t0O1wl`f@k; z6MN-US)gDK0{cC44yxZ>en-t1h2hFx6sFD_%zk0BaqV&!O^}k2VLTu58esAV-)|>B z_Du~Z{eqe-O8HlP32)PEZaJ!iVnqxzJZ~bvheUWv3554g214y` z83^)!@9!#7dm)&fRwWNU@pI%myYG}GxIVVwrz}Ii;#jC{YrM?_!K6DDQO#H5FCB>F zf+;jyT^pr>nt%rX#ZmzOQ5xYKw1W+jjL0=x^UE{$N*{KLmke2_hD7kt`_l_Y31Og@ zD(~232!D$VX!8G%WZ4`FF_&r6oTglQA;2kLaW5wu81z8N9=U;q{W$990Pj+jbV1>iDOlQPLF9jK6Wg~i7XT^QK+kcv6#Q>lD9iS!ji!@^bZHe!ujJol>rG;f( znO`iEN7m8spQE|;uTPz={NklrbC5fmLkayA@|1?h$`*X5C|D;+V*h!((go zpy!zW%J5~CY8yecX|akj{Sq|xV9(;@zLL!+&pFxK9o|lO>qNdYkWZ#?n#GBBfLqo5 zS!_t6r(jz{lLw!LD!$?j^t9q>P9`9zSRwGWv>9NSWA#b&td|%P%#y}>aXFt$79DT;r*cwC6WK~AVNZ`<&F$Hm+^^8jdG~~CLL(`TV_oi^GR5wd z@th@V*ek`!(Qk;AGQnL^Uc@+7H#%rYh`3l}onErqz=tJljQq6+;+J}MDTeRT&grs? zHiLZiVx{O|qpk3s{ldbI3$<0;)F*vnHVax06EtH3nJA$vX$R|ngv|3TRaOY)cE?sk zC(Ehx#8&%0G(NPEQ(7+6y*l}AGf{K*fwTCuyP}=98MLupO@*Y^LKh|fh*%A5oRxK7 z!+cCh%fnUU6S(SF(&dPi%m*KM;nnnSk~u>@n7_>!9VS4qBNGzl77iDx9<|(LWm6T- z5wc;QavPRZtYY|SOx6~)DOivcvm_we7A07q`M2WMze}qyG@FTb^;+OmeCm4xR&v!Q zSlW=C^@SkJ?Wc!Q%}?qbbd9qWM5f1L_%Is3CR*5T|6bjKp%*1VpJ%;eBmOS^p~EyO zp)PGK<;$l}5fklw9fH5L0y=5--<|x_XaU((6(?Q#&La0nS3hPh2t;~PGvKSB+_Lbz zVQSxjiz|NN96!H@Hd{g8;l$3UhmSzUh rxyic8Ph6?1a~Lb=V6{@^b9DpR{FDf^|JPvZ{Igs9U+g-vv+;ie>Mlyi literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/algo_tp_comm_optimize_a.png b/model/train/yoco_moe/sources/images/algo_tp_comm_optimize_a.png new file mode 100644 index 0000000000000000000000000000000000000000..4832eef45e468055ab97eb48a4c35b4229b98143 GIT binary patch literal 55803 zcmcG$1yq#n_wOwTNOyNh2ndKY3`lo~lr%^w-7!e0bhii$ElNp9*N_6r&>cfb!_Y&Y z8-LI9dw&0O{^woq`>u1=8g;FEuKT{P*w?=H_p|pHp{1dShfR%r@7_H;WhJ@i_wJ#B zfj=9V7{HZfYL5HBKPc|c6=m*~57KVlyT^D>S?<{jAJd(z2kE-w4WUx$AT{S>Q;dW> z#z#5f5s^gYY+Hf`e8l49c~7}qwD})(l|;Y*QC?DjTNqhdQ700@!{Q}2nseRjwXGy zI_w?`<*AEp#U7*dtxH&zh>9~&4*V!pQk3T3y&)F)(;xKzra%8N5_E>izsCd&&gbtQ z|IaPbrGpH^*`%$LvYDiB5z(BYL=?aa-4iNBUYrraC9U=<$rN8pqFjWX{G4XN(n7f6 z#Rp-bQ9}7p9(_8-1+Lqw^+nW&1M?9T-JP5G?^g!VqmqLCiYxK|>E-`{R___xx#b=c z5o4+a-9X2mp-zzGz^k)G{iE-pHrsNeM*rm8Vw1idiisstmKFx4bbiAi0q888nh`}3 zjdE#JHpWwlQ3B}M(t(xZKNIRA0krM?6n>xe-)kO>z|=x~=#BfX9E7IyuTT2{8V8cr znHfL+?<+xQ9W2sN3VNvtwtwG<(hKy-y*WZeTA0|JJddWVhu=s(=*EJ~{!|uKiZBPh z@|qEa1QU_XPm)um3?s}5^yx{?437d{*YLxD_NzZ8U&s=@k!t}kwm<_|req>ueE+YO z@*f-i-!1n4+G1K3SpNUs-ooydADg|@l0B+?_|etjck2)v98k&CyAW9F8c9Od z%t7Wbc1DOM7k{cZ9f~;Z@K;O>bjWsUJ{N{VExfv4iUVm}IVZudB%Uqs5~_``f7o+a z&H~3!9K%To(c~4Kz)@&-fZDPL@CMRP^$Ct#dUZDD!K03KsZox+-?3~7-baU+x|J$I zdj+LYdx%^*-9tlQBL3A6!-!pgujLh?pVMf!wg2oxSF|}qW-eS{aKWK37i|t6?l-KB z*nHUQM+!E+&uUJKA}nFcfsC#eHge-Y5f;fa^#HbHUnTt?d!kfq$AeK}gI0EarP8Xj z<15xY!bu2$%Fn6awcX0*#|C*< zsDF-K07jD%sD%*q{NU?s>?L7a^Sl5)suI0sqWI^bA2WJ_x zv^f9ky(Kj@HKdcK{ycgWrgYzqi?#C+lWjL-4Gj&CULGqXG&f6JU9C&?);i824hMKu zqcf3ifGw@r)WVJ0JZT1R*J7T9d5@B%t$Y!!P9UWC>`hH%vLEMsb6@KEATB;3V~Y}> z6#P4VIYE&g8iI}!BGl@0U@38h==_Zfztr2_+H!$Op25%8A-!dF3z5KSNR5SsRdi~v zxAJ7PFNu96Mc?$9jZKlE3A*FtAEP5v1@@B^D==kIy{pU`gkZ}^iddR3DPG?(?Y!<+ z#}>k7dWI2Z5{US**wHC%#0N*yHoRgYa9-XtGcdD0m(LOQueM&8lY&XX)M-_F%goQk zJhxsH6c&DY-N_lEiF;b8$N(KryOJBPH^ekJ45O$-2EN@_s@!3Nejc~o0S^|Ffjgu? zw^#0J>3mR9eP9>Smt<|L>^e{`={!>bmdL@R+@y(oJ=0yopWvo8AjL6CtJSBE>6WXa znb^7cKq0K{G&y!_HXGB{%zNHbE3`JC7rtMiX#KHn!4K#5Na~iw(Tr>MjI3pucWBnB z2~T*=<$j%qZYk4u`L2t`l~{4UHxt}*{`)OVA0R0q`zum6yggY$UprrTF-W}M-fLo* z_1+KNpKvmfz5n~MNhpPW>$GXF8O^e~=*$W9;$+wQK9;+0eN!ZpQt-kJfA9r?W1wlTpoO<{3mxV(aN%+e|Na zz~i~MH1zf7Y~NKmOWmAEz%Ri)gAd$`cNN!cEBC7vZovA~LPMt&ZHynPTBJiXr`zl< zzm}B0ma5Gs&1qd*<*^HdsRUXb1bCgt+8%EVkGN|#C%Ye8Gxi)0D>5j<)SNa$y?n7- zhoW;75$Kny8k;`Bd0`z)?|nXrdpl_@JrvKCdhqt>mvEubRf|l7Od($>5S00bYDW?3 z1SM=L1bhVaj>)<+(Vuj|sT{uv6HjT~$%BuuX=mhR2<8VO~eb$kNFVr%#7yRs#0b3mHD?j>}cD?CI}6I>T~eVxF2#v8>R2#e|rBdCiKZ%|*e4P;y@+%cQ3^AWPP zw*FqX3G=$iHUlQ%$qiaX6*s!shUWddk&0oqN_UT6ov?^Ie*5vUif)Zc^!%)KkXt;r zf}`c?aA1~T?ZW-V8fCKMg*)|HSd0X|S*{9n($)ICJ8a7A=8)oQnWD|knJ*ntZG_y(w}H>OmAM--blW1*Bu++zh_uLNB%(J@d^g z%_mJ)AyR@;yw*h_1?N)PlY@hAB9G=<$t+yf_Q4dTBUq~h_ZHjSR=eALk8JEOMYL+a zRmeuzUy>xW8lm0vaI@glUgw~eSj+Dqg9z9|hUey@nE2D>zpLd@+w#^zs(ixwQS9bW-=8M zGC3NY|H%!a_e%b+@o#HI!KMy;;WSG)zg2VJN9{Ar2H*Ic;mL%rCuVkk1=Fj25>lrg z+^%TbJ3D6z92~d~FV5O8Bg^Bh#N)ylkT&|&3gZnVQVfLgHJE-YgKN#n1ibxl*?Q+Y zrZ|x=;-4qgEb$($HEmpf7*1hw5Bz9&D3i8m)adcW!}OWMN$c&+<;FzWr`4`Qv*-On zR}m6x9@ki{=7EOxzMANI1Cf}D#x-^c*4I?S!>K$*2w33FsI!jDPQ+1wWs~=HOj~=w zzJO%R#$+9y;AB;}n2Kv&QLOO-#N|+!ATUqk@j7o_x|vVuwSz@Ynd#cTEEZ_7IO92P zf8GQpbbX&YpO=8^NuG1#o)~W9mN*kUHSHyY=w@_c!e*f6n9UK*ALBgg+lNLr?xAWZ z#P0cN*<2ABEc@;Cg|dEAxNT=Qh1uL=v=mE$DZNrDD1}3s^0vPo^~VrR7RS%1qi&(R z26Ad}M9fXZVqYN7`DKaH`Q*4eOp<+^!ou6y?E~RkF8%UkS{USh1Zb5YC^0edRE{_> z1STxNJvGkd)R-#2^4vrIAY0OZ`9S;g@>j*QbfE>U*T|UA&5Dn_Z{)Hh*cRX8CuBLKz;U|!!@C*L76-(=sfLsDX9z($ zT3Y5!a2(Q@!#dAR|InA^T(=3%1#ps{!|Tok@6Yz_K`0|aWtfkD^pP#!R@5(J8+#1P zzpe9GlZSKRzRTi#v4BIlvtw24o}kAg{dtRG?TTtir;<9F7@W%tzCnXjIWujf@|rEs zkCWV1M4v|B#LLR>f(gbWYXSq?k}7@V5e+;Yb)MUL%43)Q^78UaJv7emMILXUOT)!Do^gWt*4L(kW1cXu9iiHtdXy2vNxc*GV}V7vEe@hw^Of&e(U zr$`=wE~ZJr@kMUBmK;jKvD7_9M&2k`4MTjO53VJ?J7tvAitUoyFd3>@zIzoO(*rj$je+Z68?2e7{~Xb+bnzfJth7WgZ@alTopE(_9mxti zT8D*&YjBu(VS)HDte>iIf|K(e#xo|$r&r;vO+Q^b;kp_8Vc$0*D1(SN5A7epOR40Z z(TWw<^3eIHyc3s1ktpQRsL=lUk2lr)b#?u4s}FMB4?B~t0|7{*RhWl^ggbe-PC8`l z?59~yt!&Ige*Q|yO4;+D*knsqspT1DB#pS}W_Ajx^PLn*2z27tP&xhL_jrXqIJJdb z{a=({E`8^wqexufmRrlRJUBS0zdoIHy}dqT5f`7`SNX1kh(O_F@p!%e?TCyUI(T)S zI_^#Y(GGz@P(tJ-uTuz8=a_L8%+C;pu5UG?XLm3mrniXR7bURzX9!Mo-C~&8;t#WL z{>UqnGGe=?@iPGvPU>;f7~-uxiSI-pi!&`pepTU`^jtX3FQqg%$*&7+LBYkErG3@v zlx3bFI_2CM@)aVDiz%zPCFlc&op?Br3gVn3;HoGFj-$9QX|*h?7cOz|3+3qdFIfcz zg`wCTa|r5iSg;;|A1?(`omhm>{fT@S`l8)OPq{-cR`)mW+< zkFche!W9+-6rk%@g1^5LW&9=QD$3N>2Yc`29QEIF4rr(+&qulBKJVty9rTMFtattZ z9m19Ri5xeP0k;62#f(ksa*us$!AatL1pV)yQ=j^9c}y>q$D<|9EF%KHi#x$fE3&_{fS33 zIS7~oUNLM?%c?fa|G^D*nT}Rf~%KBh2&RD*z^1!K_~rm4(`7woE6= z-VIsLn&~-0)MK~-Y+%FgXt2g`^V1xdo`JOcVHF|&G5~DZs_>>iZRhW%XYc#w*f3x} z0A9cbeJ5 z61WSm4%gb3N*Ljxp7PMo9r4`DPN;Wn&!V*v4gQW|p2vTy-7j1}{a300wVRT+f(Dtu z`X+%@%rrDHQB;&}VO<(&jtT9hz)F8+ezZ33y)PYIGORf<0MniEsq}b7loKeI z+9GOX3t&L^P&yIpe-I>J4A4V+yJ^0EIxX@JxFoRCuKjOp7awE~AV@XYfzbb;l2pK@ zOp6DxA^%WGHGoRmQoj_U?L$ldK-o93J|_#j`aN^knR2@iFgZN})={kY76f|arBQ>i zhNSAnl-m7(>k|UDaYWKVMA4XhPd*oax_2!rET+ebG7r4LBi~B?pWeDW0D!6YR7Czi z!1TXw)fS=&(&8%mW;e{tZCtOFffuNWZ%h!yYR34$4ZhAKMQ|?K4nq+Rz(*qmSN(7g zb6Vs0XJ|oaS410QUr4U7K?e#(Ml@dlc)h_25a2Ah0S`0gkQ8&r&&+~%z&wloprnng zyMwCE(@E9*srRls4YD0#&}4`RLA^ZlTFFQd)&^rcW+=X0DKS6V1_P1RYmKpIXz7L9 zj8-ljtkS4|eata{fe92>ssG1VF;Sd0#kvS-G5%Sabon4<8k+>`rho2)6QX9IF9LKR zWV^hnJOBv^yb%RmY_O1$lEODXLlk6WP$$YwqR6=PYHlz81}Pf82g}_)$fdB^fwA%N zrELE_=<((#zeNYy=9#arZ~f(7)4@(Pj9FN?PS)QI5+`{|Ne@OmleoP)Vn1Q;c~rMX zc=KQSr6GNmT25#tb$bpwm~)4D5`tCJ9y?NaSG5y|XktELA{P)45Lf&9FI-f{+Laul znJfiBj)JPZtl@k`Ra7lZQ8U+K(qvN4q6#XnoWy(8gt{S?mEo+?x8X|F?ju4ZLn$>& znux-J0`tq$U3M&S9wJb~yyI-`hc0GC3jK29h_;)P3U>Kx8KHJmR&!<)U}rEJ3+?&= zsgB7{@gy!xDC!|F{rL0QkrV42_KtPlLw3b$)3@i=oR5mZ5v~#YPn&^Uifb>OcRR$q z`DDUqw(g^e9XtNCH9+!RK0-x|RRNRb?*28XbT6aMBLFtcS!WvH8(F4*fbIEKv-~sZ ztCwgHW3Q8~aRk{rXcybA=HII;jhY}W?LHDp=))XRG$%urr^4zu=QZnCzZ6PS;2zMw zig_N8FveuM%$@DSc{^Wi3&XcXj$wnWyN1nH<3f0eQ~B4G{(cx?bkJicJG)MJXo^vz zTj);1diJcvfbK3;tEl_$8g`pv{!ihUzgJfoB_-*Y8`+=u1=RU*BFExiQT^Mtcu9tu z3%NSA6fSN~>)O7X4IKXd&{!VrkvdViLx{=36{XdYM~A}C2h zGldzLC=PUCcPZa1lS3!rLwKhd7E7M5;-sLvoafn?ArIFETMfUzM2BRwGEXBin;HO5 zV)xjrcFze|NAKCVd~XJnUq+4n&NGCD^#Zno4vTInb*|RA4+kgdq~6Z$*O?+^AEGTJ zK2$|4`0p&Q#02)b zEMFDTU+eJ=B7Jl4dWWmH>n2~9@Yc+5G`zG%uAq>Wu~1bak$5FOnK0}=i2$ zi{S86M(dRCBBpxpR;+bJ{F<@Zw`dY(+3&Mpyt3?SSL^ELL$#v@XW?zD%nVIal>9r% z_g>k2c7Ygt;z-oGpda!q;K^{4k=TWoW*W1PSBo4PLd!z*5NDuLxQ(>)fz8rSokq*C z9PXyjuD1bzQ*~AT>);9M#CkC)WcjOBru6oLVk{$fobjFUbYwVESld-Y#DQ~vd6 z{sa{HrC-TcLeJW}wC58tDYsqv7wG8gJX+ou4MzVto7(TAJ$g-zfQEug=S_nWWE-uf zf}DP+Z7*khOwr41AH44U7U0dq7QGXJ(|fA$GBF@QX);K_Cr53DkCNjP3(7~p88whA zG4i2YC~k}o&rF1RPtbL=3Db8_b^2Mna?)h&>V99oF+}W%r#_Xf9~2`V2k*5#?oK`k z7&a`xII_k97F?CDI!TGO-M@!DMJy#*}FFg)|rvdJlum?-Gj_j3|IqFGq1@Sa`#*2VKg0 z9W7oa<>#$FI`jTb|2l=P`Sry8jZr}=eKS6T#@jevcPu{pPUBXGtiMic8DbHSQfw~0 zb!F|AgU=@fh6`Ni6ONLM@|Q)LgYt_`oTeXBS=N{Ap=Up4>%@q~9$3=`hkXV)8(Yyk zMYO^#R^CbpUWE8NHgoQa=#?&fz=6?6GX=QMi4$e9ECy_dfRL61MVj=H{0#}hfmu^e zb&sx6%YA;B@Si%VYZ**QqUkyLkb{4Gsip{cM#o;4n#Wr|?$*9Oi*r!Hi*I@OK6fLV ztbwS!Bk@tGX%mi=XcNOA)%1J|hUdl)Nk|PeEK^oL`&s9*tcIl0MTnoC6SuONOa6F} zr#SY(LRuJ~_41dZw_Kq{8{y9TKDNJv)4_UXYLw50Z42gs%v3>NEjN36Sf<}9DR|4U zp^`NK8lXpF7IxadwG-kFR0{m77Qh6#8K+RcWhz;%9OZwxaAEhL{F4M=v9B`bO}c7e zJoDlF--X$?R>Lw!t2{W7&71cROEY(HZhnaWk}5T&dzfd+SK#jN8aEw zaHZ9HubcQ|R6SpzlQONt9Xe8`k?34cEi4Ym)OhY<26}6Q$#pV>(5PO?~Mg*3&EaM8>{7FdW;(KX>}J>;i}#Ji|M9aCR=tF zY#`SEv?v?^1@|Z%4~@)(hTVRnPsew&oD9{ zn?QDZu|RI@zGRNOk?{IeHPIw_k`VXP^I5LNo-6UaBtP(SgPGM9na-^L?X~mUxL-82 z@V+%or4ZQJqFeEhtGDzvQd50qLL$V}lL~bEs_gj2)+ZEN2|(`G(;UGt3WFRzfen44w4csnUPTewKE%HR-(aJ+eK682;1 zYf8m(_nPX#Pn?R}+x9tpPx+>P%zTdR3aPSt_@v1eXTdUazUUQp!~GhMVLT=+s~%%X zAI5oY2jgGgx}K}j)q1{@M}q4trNHa&PA~F!D%B-k|Kw2a;#?##>go3inC0Z=NZyL% zkFd)p3_q-w`Cf8Acu6<&D&BoY(n=!|SRY((^vNt+&x}Ay=Y%99W5hB={qYr%lKO~O z!Z$)==az#i)eANzz!{h6XBd5_zb^!4PlU^k1-b}1SXg;wX8wL(9Q91138T(b z6kl6A%aiAxb+TqSzhKCI(MNYyCN@(Q{?ycL&g-U)DQ#o-PRDfbNxb-hi|i#FHe|o5 zYURd__-uYae+2rLX6&Qa!9yb>k?V^Fai90Iiwmg-9sy^(VGZpn5BG3Zct=E<^PwCc z)Y}|}qTF@f+9leqqe-*y3>N^!%*t}UPD2oQlKdCvCh}aInV4C6H?Jq|k??%p^lI!4 zoZl{jRpU+BH)!5w?&ZJP)*Q8&4$HG#FO?HShjW}qLV+MRO60tGaXZjASN7J@AGz0n zbAXte^~7qWGZOZkT1cTGn1F;cV?)23oa~f9mg>&bMqb9OwXwJJm_((9aHn2*vO~H@ ziI;S0^XKSEW~+b*KLe8i9U>pqwd$0~j0E~XnGph`Emx@h3TTQ?>Dp%cC8~%BU!=m^ zYD3@pS(i}dl+!r%5>7G|ZLyT38Wm6sw7V(>gchMzLwfJi{cp1f#4@LoK4dhL(d+?a zyMud90HAZ^+=MudE`LU9<$w)xucw~=3gB&(rJCRQ zCztIa-U3JCb6g1cL)4R8S%DouKbvl^HL0!^`w*v~ z#2|ULmU)Za^iZfA>3s{merL0ZL2T0>-p{azG;6MV{e7&qjoU<&#emiSeI)JIjOM-% zfUhQ)!U~CN%~>#gsX9VYLtNe3yufW@jj=nVqsYwF#N~Ig88=NJ5oxX3+QiN~mRc?` zD{WZq_F}&}Zc&O;RX-L#lG|dqE%SVU+hluEOGMI5RrsF+q#Y|n^V-hvITy-Bh@Ry{ zTA=Oxv*av0=roIOTBhEGnS)e#H0wg==af%X5=lZGxxJ0GBrb=tv4)Mcym8^wK}i8n z2vg>720d=_{5Jc}NPh7`g14gz$kZG-4ur|*TS8D0gPTT^Z8Q;xhTBCs{ZBN_(@yaT+B1cL>DMk%e@L5KeH_m{rFYuAimhH^&}pnpBXs*bI5zcavZ5QXCZH6_#39vMZSiqvmi9; zevX{P^02tE0r8fwI6CC7a4Bv4?b&ZHj{%kL;yKej>iQ4)^R?TUu_=1Xg9C~$r%Jq= zEJs4U9chRJR%A6=J^~u@Uk7wksorzLICit?@II2zJk2@}Z@h(&VS12W?D!jRi-qsD zj9h{XA@nVa=;)9$$DK{8L#EFQ&HoW;%~rONRsb<0)m8qI3-GIbV+`;8{s$?1=|aASA4$n(E=BS0}{xoR9`3b@!2{Wq>8p= z)rh*oglTnHBI7`;h10Spv_{t(Io{;5H2sdhz*%Dho1Uxka>x~o=%!bLNRV2&u{hu8 zrH>vNHR?%wJZD(evH?oZl0v&GRTxYbZAo8m=V?9aeC&ot2>3O_+hP()U6Xp=b4m0d+C)3c}NaQ4F?p>$+c;d$P zydY2JZ(0-KwqjJ7os-Rs=n`Ja=#ApRSo(_D2a3>jFPw|$MJ}_5b^dA>F(CjBR-=eD zJ&kVvJ>QL7x4j$!L|YH;c1%H+Nasv#Q*Oo#hQrLqxGSr*`klF8N=eq)Jtld%+qsb# zOab>qRzhEZ*D=F>n|^YAV??uH=#6>1;Dc!udeTumm#q{Y<+}THn>yngPsP1YUi8p7 z*hUoR%4MEA`&*NY6pRy(kw8=2Dt79@74yz@z zoS#%oNJtD14yfX<*Ce;h%dZi(af>PXO}-;)91;)~rLCtM<=W|QzFfSgq!BIHU@OEB zey`3$vQ`Kg?;iWi7na^qo5MRbAoXJZI8N(*NL!nuGgj+GEvDH%7c>X-am28#b$--Q za+`hdH{E0Iv?oqy^pCp{!Go%S99+$PRkciL=`|LZ3hPfh!ZO_*pqQcm<(XqG zi~o}5Osz#c`z}7xunOqc z6Wrj79&%bP@!*v;nS$}$n*SB4V7uNzl+wx0PNOanmU-eVkA%f zDQq=2J_RP@l_4-0KhiT*3;<*8=;-YyQh)<_#4a68$qx&unB^ikiY6?;4Y&%-Z% z#I(-;&P}WlYls2>(cPFVjCuak)6;f#cGAYiw2@I!RBK0YYV(oM-spZ{4RQOq<@_C= znSDzOEMC}T$i*}~YYpGyF=-@?iHQl1iRsQ1b*Egb#&5Hh)J7PF&Sb>a7rw!>6>I?j zvS9L0E+GFyZ^ZJlD3J0Q|CLbkuw;Xv!gArYp4NS$)Nq$h z;^?P9Jx~*J@Pj2zDRzJYOO6@^yH%-LWW6LMD16V_#zsX(GrT!3k9l=oAKZuiZ!2-xb0t4af@gpKwqa#s%d4#=A08eafzI|a1kT+TSS3}G76(Ad?oR`j zDy$*fX2(D_N1ygq36E_Q7B}or5_so;OM%1C3fhLif)F1%L~(~J56mpVJ80U~)um+N zqi`l+X{;ejfdXpwu&^q`WRr#*)E)rqNOXC)thP>-Xc#^*!#d^5%Eo8-21l@ESZYkG zsun0s_};v`5u;4wr6ruJdCtuM+xvJ=UoolKR_#Fz-&ek1PFx?M7lnaS_ax=gNWp{E z5yLEbaDj|s_1%Mk3P)510PXRW)`dL@RtBRqBd~y{J3kPV9Qo(zWqUDz%O{zfGV^p~ ziR9YS0*=oo2RidXUr7LB_eKVQiCZ+EhNs3t0GL=)Q=6;9o>PVgocdUIOp%7<19Hla zwMUVKlL>>Mo`Ui2kOXe1WhjEiR-)a=L9!fq$`C>vGpG+C{IHD53mGUxR++`~()@KwX zbPpq%2D8M!$9?_&U7a`>plpeZrOpmACM|>!F(TTN;Tb@uCA7m1zlwd)IQz+ZJg6+w zw%4~Y$%9VCdBFBFUOh4B94)>570Buot`2ydg_;rxph%Tw*JyDn#F-$~VmnGOu`=2@;{ncdsU*kUdzZlyzl%i^Uflf%6Wn2B@PS z9sKDi2sr29?o;v?mh-VP4)gA`vKOy%11q?3H&X1M!FoI@ZLoexDEUGRJ2 zHe#Pi>crS*mc;{F3zz^tEg=dbz{@l{+hBu89GiW;`9afeXm~MYlwvttp{M(gvg+Ko zRWotsJaaIcPAQlk*9r_uwNPj|k=$tRZ^L91qds+8)x;N+2NV;g0ihkDi0e9BPB0!HrAer+Noe%2SE$z_-7X*qRRhjJ`O00ZjlW@B}0xUJa z+yJ8LtcI%N$ny2gwzg+pt7PyOMLl9aTlQRDlJsL5EqQfeWEde>Kf1-J+rgkK<`38d zAO$dch(|nMiaVu1vvE~lF{0@wW|L%K6dwuSe)9g!!txMbO5FQi)kvm0`LOxX>(_&L zM7ZiCPbyvfQAeRqc0`xk(TYs%DJSBv)D#}02qS?^ege6-sSzo7arV9c`3%K+hNEKT z)=>bZ)K>-t+`Zful(fmI%5{zjNAYk~7*%*gKC|=9V`0cz=UH0&7z;aIOQR@n@aB(7 zY+A`dG`0bXh+lNi5z`L$lA7Y4LVI~k-jR<*j{9N z%f)E#7PpV(h7;C)5_Yhs@Hsib^Ra}IaQxuKG&-h|1b#tQHWBmfaCyh{&1bvliG-BP zIcNHA0nNvW;aUDS@&>hFbO4CnR{&6x0RMJsiR+**#nO@HVz0f`U}|2_e%lej&xVzz zHnh(sfljUgsu#aAb5 zGF<+ogQNdqTeD=dTeia8$h|9TZM45HW-ait$Lp2tlVHyD-LAS}THdk_6MevDdEU%% zKtFh2nkEr4j&pUsVo z|Ja@6b?y?)e~|merm84I#--(?XCP68i(7;ntGZ9;ac_)(Z5U8KFhr84!8wP4JLtGr zn=(tptclro?SIG`Ec|AH*Q`$PvuQnn| zrR3oJcT4?#qrlTjD+^l^pOvJRVi71Bo1h*smYRKpcKwqM;`Fg^u9j3l%?a&^AAI<- zay7SC95Ypm2(+eG%DsxlRY;gz7m+e7sIAa&z1dVqQ(F2wCvw+zvDk#s=nE%%oGiIt zRlD&?&#kiiBLHsJX7gsaF*&7+=F55@+OzNPc1(L48N)6jYA_j+gr1fsPPbj+wq0!+ zhQ~yV7#uY%_j5o9tzM%ostc?u$*7g{Mpf1vt9@z7vr=^Zg;!^>U?MF2c-udeHyzH! z%hR2Ax$h8n%E?|t8rXlcTAW9Y8KcgFTro1UoX%Q*{iUZ0pA9&^9eX0ifWMP7b!LDX zVXh6Zyfz1veX#QyV6+CO9Z!xKrU;f7WoRP=T&wO8YfBOZ=%lK{lyEqZgXhDd>{%al zCdxQ?(%z@Ed393)+499|F!Nzr>PLD6ckYvA4!NDxfyaAVsYZ=U=fF>;6yADEUSsXG zDm0lV)X9JN(d5;$%P#-Biw~S6w8~>**wwdwZF)O^L#Mz~8@8X_L`VQ@F-e2ZR(k4$ z?V2g-K_<%Xk zSUY&MzN3lg>(B~swihV+9;J;9vXTrjYNYnvia?yR*^Et&ZxMrUn8AmLwIhGaPX-Qk zuVWDC{kW$w2>Y(_Tpz#@&&7YBjFtS@UI3vdNxvtIRICTm4AI$c-9Ofkwzjv# z=8QxSXAa5YUZX>1Fd=$SiVa*dJ}A2Y@ez?xqY8Z$oMg7i5+{uabPW-8Q@7cCZC-qa z3wbV5c5e4Q87j+fqfjN(I8h zK@S2P{-{uXY@s}=4o;qDz^GaQJ70uzPK3-WcD@FvR0kRO&{xp#+YfryJTJfl1(OyY z&HJ5@3BDdg*vB9ucFhEBku}&LeyO!Nxu;qmJ&1rG7l@|9q%MpbN95%$eL{INHSKOa zIgwBK#xLV_Y=-b9Jz1-&n)37^H|f{y`3AMCP5y^6j+_0RxXFTCiYK5;(p6YEup5HCf0+j@aKYoNG`ixRq4NKg*R6#84&a?eg^%ZT$ zjmeddZjC@C?3qJmsZNX*MGj|!4c_L%bGO*QY1+)dXRyjQm0EtqwB+xld1abi(sp?- zB~Qyi-`2(!=#(iP^V?+=8k^0~qljx0!j1|!i8D~7#Me9_X)wFt$W6eEE%MOG%6DMK zFIJf2E-J-R(>M!vVF7DE%nIV-3>z0K?j`(UdeV;+`xun)V4URxFTq!T*a&c@E`ik9 z_5G~t7xeEF8Swhg<>dFQlvuERm1sc5d5m3Ysq#UUs~@azznl& z>E0+D1EFO?RzlwPn~74Y-0K!4EH!K0`t8VsQ-V+yEqNe!OU5=y9{Ci?j@!*`LXx^2 z8&K%8KOh?(8{2_^N=k5Ey&t;tgK0k%u7fXEU))uIkSJ9t3lqkZq<1lhe1>$CdYXNb z_d`vhnfTVm{~XaU?zYvV4*eWUqWXpPn>C<+7XDNJL;@)C(Hrp!19w#}FFKBn@@x6< zsNLuz#j}phchyFl~=w~j5H*U*O}x_<1A$7SbEKkeE%`Fj2#6e z{VP3TuDF{?A8uNz_;S+xnJq8ll9$^Ph<8B7A)^@U=2zi+lw$(p%zisP>VP&o&ibC- zzFMR^P^qV{Z`Bh`<+E9s7N41^#z+s;`I$t;%mC=^mnrU;+~2hTzTHz*pD8b6U*rVB zJdS@T%T_qqD|u^qIs1Cj#nP6v53i7kXlca;w~Q1hEo{s=bal%as{P8EZ(zb1=B|2E zOV|As_Oab#Q}jN+D=A6(Rav#tDM9ts<4iAxyTZDYF`aaA@5l81JC!aOmL_4u0E->w zJBi^zo|;p6(Q|iZvyj<2Imy@UkiPoN&ZK%(;Q7oF9KFd>TIMXH+>Mo-JegD9jL{oe zLP$4H_0GT0!k`T|bA4cFmYp4k zr%#_UK7Nef-Q691_z`bD^t7Sy&HZC(lT_XD6?o+%+DD}dZ(Z?XQ^%C#M|FtfM;_Am zMI3A-Qbu(l4gQcqA&g3X?eK`$#6fhgy}2(fn-dd4;YVHoI~bw(J}E4e3sfh4KLO~$ zZ1~oU-;1m}WSI5h^ItmB`Rvv4c4|m;vd@DA*L5vQD00xrY@fHgtu;Iot>`a>x?Ue@ zvLJu^E$XW8I&x!Sq)>HuYASqc%HZcmfiP{*dV23r0-x%nR%VZo9>92)BgK+mOyt*8 z(XWl;6^jiAWr#5?WN0r=abt-w?Y(?C`_1A6!sm5(M_H7KgJlSNdKFX`y%LfM-z7hG zRnm-=wRtT86lo-6r3S5b8CX5|HKEi_fKtKEr!Lk)p+Eqf4-{=pm>l{3hd-#++|%&B zN2i8p;_!r}t$mor7bXmQz{clFbjPwK5bgI3Y8^h3&a0}Z$f&Ewkgk1_^cPCyHeAjO zc4M_5fu}l6VP=g3HMi9}&|x$-IB)P5+sUWl44)tJk&)_3UjYXI*NiHfoCmLffDW;r z%qkff852AE19f%vLd{cXmEcn3~LfChM^kYSQNkQg)Jz zd*0zP0&ebDPMeL>ezZ;bSbR9c21gtu>HUwkwQZzPbrjrap8$NMH1|cV!mSooe}^TI z@9e1k!o}t|g|-mw1ehO&Zh+%qHh;ie^w;jNG;P*j_F*>`_>(R#3t{hNVRXP@KWD+N zn_tlRwd0jC6A_e93iv-T65k@LQtt&oUoO1#&kwYFX!F+hBKkafcPH4W(Q8CF*IBF- zOKZA-3!u5i_CokG3aH0aG)qQ9ftz5GvjOlnbN891T|Ix9<#Az_L{W z2tZhNr6(1XFZJ`p1QIc#@^j|!nBa|1)czNco)>EGz_ymLb{GId#RA zMj;C%Y^XvKVt}$ek-Kv8QSOcdgJ+DlK7yxYFD!3|ABEpml-kSby<{GF{>ObNN4P4f zgv;JR$CW%H3X&Lr@&OyMA;5>HtnALs$?5<^G(%f|@qI=kM)-bKIY^r~VBbSop67Xz z<*+zA)qTGLb92~>&D(Rg$9HAYlR~sqSn<-7cThj5_iH)R|8rSmO+&Ko#IlbeUTm7~ z(#FA~%wlIbCkSpdWqbtDeE}khPRty8gs0F?AdrD)8UX93QG?z4>aO@)ytrDk1k+*G zD5Rp5r(^O`g|AqX&Cgp^adB_+IU1}yE&Lw+BW}gF7NV!W(=3tg+?l>A`q9+M)|2s5 z6$HQo2zanSs}ir&G#+xJ9)I|TsI0pG|EtPTx+;O6^)9h92>#*wuk6k-`uv8$u*H97 z&Hkg<@tZaVy@q`^*3MiY?yNV3Wyg=~dKU-hf``)-5gvFV?;i9Q;Jpi;nWwGu*sX0R zxE8<}HZ{^hO#hwZ3Yw@DYOdZRylFWg9HgAUVR83oW_rx#As?1n{Kl^1v>DZ5&KFGz zl+Rlfy&pB0B_`@ovp_?R4jFK4beMj^ZSd`$YAQFh)UbNS7VtPQ_|4^hFH4+62#~C3 z0;lfLg7&GZC!AVa8F7F@lcRKq}I4ds^bVkgkzN=E*ET zwa$C{U8fuF^ZO~)&V1#jzyK^E6a|K*TT)Vz@bO~=kPhkU>9M>*9vIX)(NWXTXzS>_ z8yR`d$}XV0fNvCm8H)V))IRaJGpzv)9FSXFjA_#3RmhtjW#YYkOmMr=72s15Q2!t- z>MQ?V&CSNVpt`WtXZ@eNU)wLr9t?ec^Q64h{F}!ugV}FlAO+L_R(Cd$J4}6S0_4eT z3W|#Is;Y>A1V_4nEk1qVmFwuG?E_XgDXNMYVc-TPk&>B9q^C%%78*WXNe93kfm)p_ z`)gUUv!9pSwe<0M0i*2edCqmDu%81FB)M?+Jl&_!$`WH$)z!m8Lqn!qe;wF+_V6Eh zd+gF?E^T-$|AV}@49l`x-vF(|jX$gskZfWU~5TqNV1d;9zNoh&x?vfTHq(d6% zcxe8McfWhjyMHrt{J+dG^I^Uz&mC*6Yu#6#*STB~(@vsHdv^^Qj{bBKP4F?Uar}X} zkWqS9KK;pv*_*QPJ*^!daVx1DKY?{yLXx+KxS*hb;jwzMbwRXP4I|X-Kn8FZzh8C1 z22veeU5MZAg?~yBGG=PBkg^LX`Id)LGIP+SJPL~9(r#N#oGCEzyl9A4TF!&t5p%uV zbUxk*d<6OzS#yPG5=Xl}7wgc+14*4B(`vWgq=-Cvaigc+{joIv3R1$(5RtvjJwbuz zEj>}w$a-~)Gj=d*gXCr}2nMjvumbZM3+pdH=6^t#`0A)7Ar1A1WzlXcO#>2~-lFE( zwv{KC02sUXWr~W1^zUL{Pqi1uHx{s$#*oesZhn>Qznr+s&8wnNZmUwA>)ZdyYQS0- z!t8#j-B{BqcsR!$lSo3zdCKdRlc_ZKetlnWWV8Rmmf`4Va?6(D+_AfLT!k%#f0H?Pf&hK--FapVBF&bLm?96wiE=iMkUrIG!>qfeZ%!kU@Q3Mx_-jdX*wD`9kUge=f*1L2lMk|H@h=dd%4gU?g zyjbn?{v)jWi870)hy00K2h+pdh8@~&Yyj%$_P*bEgF6yPRo9ZsVEAFaJRkZe&NKYs z44)yAoNOGrZ&{nrkaZZ3?)7aYh`Uzvtp@`5B%9v6yuIgZ@3+soH3*JX;`fEi)W3%x z9D2ralQjGNhM}VSMp+1Ioqn3@%wfOjy#vj$0r<^lgDiQ=krZA6cFj;;!ePiI)>VwU zy*N7(0iX<1cuh$FBChTc9f{|C3d-N_j!PbA@Wtf2F`>@? zyWmK-Uv43jhyV}(H*_OdPiH?q2%1H>y-%(noIF=8nU@dHZM56$Z?ca|YrVDwKhS#D1=EoN4xS^BA;6<3!ZB&b5D`zDdv z(#%I=15(~I_iF2U7MvaaZ}Ol7MItYYix9*W6j(BSO@%YtymFGPo|gbf#FqaCmR*1{ z-MjCx!fyjU$&s>gw0)E~3tK_mLZkHQ>_F%(c!Arnx1;O;<=L_-XS5GTiFMzPm{$%P z4n_3&ADPQ*!uf9ZF@uoct)+5>qy1k^U%M~L7v6unm&8*=QYx#kFuFr)Z|%Pq$*hf< zXWjsnaXbZh;$Uol<&frjt+vQ3_XWmtmzfs9)cifTdD54XC~cHgKX*{2%b(ZfC;4R} zW!alZ`xun5b#AeDF-Ge_+8OIYd)-Q=k&Lp3-z)=;0Hymu&MrcXR?1y2%Wk5mf?AV= z6?OA{wWCyXwtn7MozPDJy2go9g^bx@coJ}RfR z)B{fFU)>KnUx^auhlD@Xz0aF>Bk!!7kYDh)k`4E|Fn7;Xo;xa-% z0PGO?WABj|e?~{tUH<#G=WMrUGPE(N)4e<0wKm@Q_>4!>GJ+2K>tBdCMQ_{>ba_>h zUDvNz4U%U+b)RFw7QWr_L@F!(`q+GHrbm6<@4h|r-{3@EFaG@B{{x)J+(hRFcw^##ish0^UbglYk zbs-kd%RIYah5J*nqOIq~y=2-E@myKRn~*HIcm_EUZyf~?%l|7HcA=~L?@pbXH5Z?& z#o(ylpV%+nT!UG~lX{%iclm)J*uN^!=ZM;Z+Qx0;9o!j$Oj_`#Q;C%ymCs7GGZ^U| zL8p;#4~K61IgDgn-`POp_B^W-ZMr!rNI{t6);0J*)}JGe9*`;cwXJCXba*3%^rF6I ztriqN9aL-Su0fB|>GO))92BI=nSZ&^967lHUT)<|DMScaxTSAahyW;x>J{&X6)29E z^BVPFemFfaxjn7+*`) zcH5%=GuRmz^gvTT(rSK2em(hO%(zlYJ<7-n?CWP1bfhB#N(ny#T66M3B}>?TA=xyoGn0a&A_0hIg&n zHc%USu4QH{f zd~2`*r}rCzQK5VmY4kx9`>Ik~Bv=MpBLDH1kw?^9WO^L_#{YaKwd)}tfY|i6sfAQn z4hiZ9u^m@#$7cAa596(y(WPGRldtJXOxoIF>VCPc0Z31`IUs73-jdK&NDR%4oR*|-90w*>z&TpOV zSPhBxDd}|J3BJJPCT4$=))~cxR%@Ua{+C^0lxP@*k}!5iR`vYg#JOL zmG_NUmaa@?R-$hepA_$rsOl|81ofG};Hej1cwodapP!>z3?}^F^>}jmZ`B2M<)PQ> zMEs_Q#X*wu^Jk}{A)xx`1~)Y|mBML?Xy~~^4`NWYt*LxKz(76Zq%b4VxBj||20$bU z0F0Ug4ij=omFc<5Je(m&2%4|814bZR1w%mCv#FRbqlpuKjw|}LWs{P#NE5+gFAj~4 zN=ryQSzB9E(bpel%6d8j;xZxg*MIE`bLu&3)NB9NTZwa}2PCQQO;9Np`<;pWcQ!VX zHa5>Uhf}+mvRp4&_d@;0xF)AvdRPst`Ac>JL7wT0nIRx*JpDy7&UM#~6t-IbCE{S9 zjB}6>qQzJTwjQHypy>%b?0;q79?^8Q7t#+u>dN?nJ%B#~Y zT2@wc=(4ZKhw~Dn)%J&*_T$Un(YZM}aLPi*0cXjiMszE#Kt)#QrJQm?-8>x|pbEfEF#LrpUm=-u*z z)HJ*AtRaKx+>5i@^NO;k*5c7;n7G+NvvdeXDKG{q8IOLbS`qbNEP5kZuGg24)CZ89 z;m)dTr$}E%ty&4+Uu-rgLjLG}6h!*tbbwKn!xzF;-iRpGex=yFg4@1#)q~4_8Ez!o zzQ^Te^K_gkT-0luWQOIgn?FgqRfO;12PKa(?@~QmmnQDOtp$86J^8EM2I0ZeGUg(cObI2)w>~8-qD<`Vt3D;!dD)yMwAC;4?M}uY}aQXAhQ^9g0eK#3fuCp2Ir-&cj z36!71%89~;aF=%T`T*>&hp!@NGN$GwfU}?Z;djQNwWt%I ze)Ks3OEW-85{bonR|V+lszF?)GX9Uh-|`gM+dM|%-}mFAzWDrPh9R$F;75bv#MC>J zUhIDgnG6jGXS2u%ES@UKm!nrq!BRUq2RaguRWGQ#+Ct$0U$1EmpU*7B#Q@ca?hPHt zwbEJ&RI@%pPvd-B5YyZVP5aw$m`P>|y#zeX3Ug$Xo;A_+=<-(Kx<-QDb{v^-7RQEf z`e7tdL@9ye5>@$3R7P{4c%drD%|6Z4a0q$rdm?wE!Qzv|E;GwgLA5%meun}94Odvf z;!FhXNB%Ct;or9v$%IK*6BDs$)Xes!E#5W~5Z`rs6y&ZrXDUXJVb{Av=6$UyE7;U5 zeQ3fm`tv-lPWS2gsQ81S&i4U_sBbu`?N1QsL0`M%oB+g2TPuQeiJWtMEr80%>XBuH zC!xhan!t-jy#ArMgbIf!5121c& z3wwL-*@HHM8|Cw^%v&|!)I1R(SwyvzbG#-A&(@{!r~bg8_?hnR2tO5m{nTB2h3C&w zq^YtHwe9|3GE4ZW&k3LX3vv-Qsgrd3m=QF-pK6 zV}VO61M~si2#jq;k$o3v<>}j+M{&6RK|us*cp8Vh$e2=WO?q^mnft41k7<;63KxJL zpBk*0KBxN5_Ah?yVL)|m!E5X_5AvD=w<2>c@P{89v1k!pyLSrD#KqHZ5Y!%>JZ~>} z!3&CRdJAZGbl(|QDoq;WU)lpzDF?OW?9R4!HdhQQXP_;I%P6y;^Cl-*&Bg`g>PfxDkmb zi*G((0pup&gIGyY2v#uKCm3K~;@C@3@0=_ zYQGP>R9AAOe|H^xIt+NE{@NZWkSFf|o_eK`ub!5X7QrI&kA-OXf!*2=`BTT18WeYZ z2#zDC>Otl!qTDTEQ{xV2ISmxZT~LniwuObJB0zm2Fa1rF z`>v*G2Yk<-uje;$ldWL~!{ zN_?vUO$PxVZ`cp_#nr7DUyb>^7NCZZQ`NTN!^sa0r0QqL+q7@Y zYI3fBDyyD!p~ms!5>kP_XinSDXSRf2;2*nwzvJwG*K1oVnKDz|nvbejiQfyg-^IEM zgIG_OC)T;-R#yvpI|twHQ=Dv!++W8_g#7#%K#qBk7TIV(gABpFEvXG$KW_P+(g5y0{A*vq6 zc`V$pwj6nNwS70t{XL6EVOKgt-l%g+PuU|#*;_F9QX0c|>k96i@!CN=Va(FH&s_+_ zpYz9%EbmvlaE>!cffh?iac{|?UEimfXelLum#G-( z1VsO@(K0T{1czJ6!+xij}|iwK$F95vQfZmgk4#hc+=Qc^oCuLHkssK!?3@+M?3pPQ?dDtD+q5 zEO<29Hwi1Jio+_>%ab-ch=9m49(_9a*9UwPmGry#-1lwRZKA22ZP<|R6=_+C$4~7A z!ZP%NXSaJIeCVx_82uw@g>_;2YVS@eSE1I@Y{*W0un8zBs?S;E(yb_Td>`Y;cPQ4b zm6!MbQJqh^jh6L%w^-y#QXcW^%_iwiY1XzxOW`J3R{9FIEZVa%v=g)}0pBkx>iq*{ z`Ct-`3R?a0cgyweG{UUJw206js)Af4e||aRZQxHeIPQK6fn5oDF3C;`aL9$j{`fjo zoCeJ_c0aP?N0$)hClYm@4u00SpssQUmTxa6pJfIxH@cQ2`Hu-qyPe@#`YUsoCzS^! zy5+PXW43*xcx==jSB>?l{Dx@r&d##;)V{Rev_3*2`I3lL!$2w6`=k2F zp^&_0a$1(Qc00npP4l&3_kLMYEXI5L0x0E~Lhq6RHbP&v!xkz*SBD!vSPq8T} zgP+`B$hY5Y3=^k#_D;rZ&iyO#cXr{_Sh#62i#qhf_su1Aqz%BbZvl8TsFn~E@lhoI zg0S@^(u^Ls?!*OPe`>Za-D!VM2b#aVP#o7JBVCfB*>V2X|E<@xjSjWgR;fndNeTZPF~K9i)*vuwtb_a|1SV zF10eK9)8(R?C!Y2w=I}4^M(hYW*drG4S2@+bdGH2Ysw62kiRtCe%(4+`Hd-$)$8Um zwj`X-x?x{`m{1qo%g>hL^o5Qf(RE+fJ(l6JA-}{ifAMZPYh|=#Oo}n`eEolbn_Ci0 zp0f(8U%QW2<~D2w&2t=VNn!dxOJKQp~c4z@t1ViGM02umZD21}lhNn9}KDVqHV;CYz%w09!K^@}TMH_0Qdu+rm zMw$=T@3JG{q<`Ah2zwtRWkEvexLFkR>ZWiZK3z{~z3n_T&*~cUtg+pjLF3%o+1yY` zEgh1eLX#WxXdnOso3&-Ts>1RHQAGQpt}cQF{E=4w44K@dQ7f)f5_&|hHzAERN?iT! z{^K*HuEM$|{}oDOYVYgP#rFZq+L8K6iK>u0Y|il{mvg%Ex?<`1YQ|)j#$aW1BeRJh zDdAtjNm5y2g7&br^?DG*QyGO{nlmE-Z~PM1dPwzh486tc@(7Zo$) z@cXx$70EhH18&b1uxN+~MnqJ4S{k0Ih z2y)h#gmTzHJgem=g(iZL&srrq@PDe;2Hw|x7S7WiVuqK!Oc&?!m`A2vjUR6m5xLm< zjkmXMOCt8|vl*?oI3pk`d6(Mmbzq7|ux{Ly@{z!r!Txw;eh5Uzexz(7oE~B;--W^U z7*lVMnAIUdHe$km>#RcO&HGWIz)BE0Blc&bd6R32$XN5x*ZW?0U%poL)5(aY@k^Q* zEI2xEt8QAlYELw?0bB@ukzN3 zU&!nVg4^0ksE*)bjt~q3IiJSZ1Vqb`^6oe06c9rpbr@Pk0@zYMRkn3F_f}KqBidJ8 zhzlNzW(foggi2!H#nGw};*P$SEmB=RMb4VzO(=4q^Uk9F&OUTmP-EWxp>$a@wBl77 zR=Y!7(&gUgk(3v8@IM(a5+_r?Wy`-ZU+h{t6P-jw?`rYD4v#Q2?h+dnc)qBL=Sa=jz zoHo&~3UbB>tG{vW;18N^V;wMma0uYE!A~ttj|T=nxZ}!)U$2y^%U&IHX06eP=IId4lPU3I_Vxm7QG+{6|w4Nd*Ra|4G z5)-F5NuRJCwp}k_)YR1)Z{jo33fI<4Y*!J}ydj6hf$XJJN}td;Y-PBweSAvM0eA-; z0o6YPI~Fy?%W4;L%>zoqxv)x*P5DKgUM6{&omiVoyAyayr7MI?#6?8vHZqn zzJ#7CB?HQCu0cm3(Js=Ml)0 z>%Ezc`kI+-rzx+zDhyGm)|_!dP2F)bz?f+ebD7v>%5~(u`zeKsi`$w}VPsc~+QH3_ zG>F`fxTb%#5AuZMEHp&J2=Hh{aj8S0~p#?^pL1Q#hJL zlthd=MSxqloW>`ZAEyPa^9bbcWq%Fn0w{> zrbrDgI=L@6J_X>?94V-ivopZR7X2&d3IeRA`ImZ7GO*~$)b>{U9pQ(6dPzy4zt4`J zIG&_@{5~}5(5SVQX8+Ev8_0J80x;5aNG6p>Fb27}r6sd~fWV72Ee(y{R`2_g0Z!zGygj0XW71+< zo9-df3*-F)rwC%Th|2C8W7!A)OMD+TRkv>h^KO(~7gf*F-3LYW`5ie>oIc5D`vk=< zf-|;|v-(nfXuJTc_fHy+YV#5Qv($WrWO2!Rko3)#y1oo}0xB1m+5J(YwD0%2 zpS-{ME%DB7zhj%)httHtD1l((no4nF)&L}J2|tJ_dPR03!|JJy>0A8BU*F1b(%dPey<`Z3FqnEe;=IwGkH{!# zHd##Y)Yjy-MxE`cp|&3~ z(QvMjtVLh##as<=i>=bAay-iRGl*VtQ2q`gFuDCEe#VYsq*>`Rsm&dNldm}haMfIou?lr;smN^w8i-exlh^+Edjw3TO zvn=*271K{L^0qlM_KreY|*pchSOJjZ>FA0J0(XxA8_ z7kgzrzrmj=(+RvVK|S<0T+erbxx&l9 zgrJV7sd-v!rM)%JqY|PO{ zYdCG^Ua$&q`rdml_&^swznT#1eYm>!M#r6Uz>3Y|v7S~jLix2HLCk%$og4kklCgNU zjPxXgo$sn)9MX=7n?=|IO-wQBY47276tdj18XYR$mU0ka+wqjEG&l%v|DlhVGV&A= zln>s#mJ-7k^MmAu(CFiR3K)}eXtn!{vB$IaM_B_fyA)dmIeiU-@sZXT;b$a7&T`hi zUcw=tcE?wNk;2MmXUahe!zKcN;*Y9{wqkOZHWGz~QUax(0$(KWsYH#Ap1NO<=!2g- zr!sT~(s%+SaVKudtXv3I_E>;ys%nsU7a6)iC>yDrNe*5ZvcQrf3)+F%NuIQ5sQr@CgKrnRYDxiHc88+pgND zq82qmimPK{!KQ*U1Uh^RVUXgyQ*M+tdp7a}klG|@VI$DNNp_e5CqoEvVc-C0Hh+)P zP2<(Bz!V;9f_1ujmmRf*4>$V_TbW)QUKciu&1QO%qH1^E&9;Hhb+Eg3H6slbHq|O>11plhmYSM>U0vNlteKfvc2!l}xG<&bkaJ(l)2y}? z;Y~NMJl|hZQRg9kK47|kObcHrV$WiEr2usrlay4jKX2+XW^{g(&gpwp$bR{$c?#4$ zG_g%RXkz%m6hlq7y-=?oahPo-pWrS-z{Zd{n&kltD^DxhRaoOWjLHaQcRbAW7}n-W z2UG0&sudN6g!Qopi)vK?P2{ar_;+B-xxbn8q8u!$OS9nhV1iZrkUa_sg~3*NG}co* zl2;)x;E9rcNLa8mF;w8S$Z+3x|Mg!{c<|eQu7e8&q^VE7@}lfx1LEFjKXX1w(O{rn zM?NAo_`!@3JHx)_B_;s`ykAu5pW;w~3B%<*4(RcjeRUq5HUimnjfN0+T{2NRu2F#ey3BmdOKSoBinu`tw{qM`D)`4UL z^*hNjMgA?k;)^v2_>hh*@aA{uk{w=POphaAfQ1vXc#fPfOg$$JIMzu4GdWxxkZ3hJ zISbCmZ6qKN-KT)&{0Cx)pFDWz*F5w=@yE6Jdz}cu`aaH9?jL_k1bp2r9F5*l&{9Z2gcVW&G~N_Nd^Nz@)c? zH~(-VW39Y2mCH8680Q5`KmVhW2K!s2Xa%TdQf7$wwoUt8LJyh`r|;Rx;|?F{gHRo* zFf?yX1{0Xy1AS5H1+^2p-p;MbdMLQluFU%8i#e9U`RPNaN0N89)DnHW#MvJeTJKoM zH$`fl*nbRFvV#yFoX--ND-%sIc4-o$I1S$pKutwK{4!dJo-a`421_gIA{%VRPeBc zV&eWd+P9~^{ciua^tU&ia0{+826Hz#!;hRhj9K0|;Bb$hOY!@LUi-C+wswn6`1K1; zs1%;tUk;qAR%@+i){grr`@z{&;oxEF@%@*Qabv>oX1V^cB$J{YCvoMOE4`%tbgosS z;Lw{UIYXe1${ROHki;1&XjgIJ4{lG#W+1qdwrf41i#=+}1if3(1!v67k+d0uX<32a z0|R6KSmu}rBuT}&5Xq%ASUMW#Reg4?{ zBb@Wek)`UNVhVwIh20iQD7J_N{8~e`F4WElUD# z!@e`Pq0{KDgu^U%uwGRHiDS(1%4wl8!xXvwR)s9(8vjbH##_OL*8_2kD$hZ*LpbrDn4K~k^W&#Nfi}q{dL-GAn z47`tf`{`|O@1gB8NglLse@Vj`oN4IlhIMs`hiqq3MLR`jKL@cggERWQ_WM!oBiHZP z4Z+|X0AbMcfc>o{k0n{TmqJ*qs)uDRcWlDe&aSWO?NC#)mQ;qePE#z7aL)cJHyi2R z2K8!gY)Jq(vEiBJr>deLa0UWTq)MilD3#Sj5S@t$X6F4R?wyV&993ma2|I1B-}vY* zH^mT5a@hHX++|$n89#_x{?;%ZEbuuv5V0N_@=rO8-hcr;%ZpB4wu;a)I)fjwhcL(n6pNg2G_{J$c$##C>%1uPX z3jc;3z1~=EzujQRy19zrvr3`&zLrQsSZOhq;Hl0P`u8(J{F0HhmFAAr_}Ooowi*vT zX31|(U_UmqiQG2{Q}(D7E6la3?K(&i&9$<8o3l!|H%F}#)li#Vmc4{<)31hTDYen7 z1W6f>&>jsp1!q^$ex?kqrniKSu6@h8zE-LzlxU5N?QT;}U%-;~68o0r#VI(hy&Z13(FRHHuEV87z&N&Xx~ zM43yK4DQtsw1{w{G%JZ>Z+_-RX*6L<|N0cO+9{1y#+N(%HOCOb2pFQjrA2}O23#?J zj@4F#BOFToUJ-bKs^*Bu@StrS+1^rzf z+!&+pUO(CQmq^n`F2w(y#nf_2KItp3+(}g|q+;5pG8H9JJK8_?H#J_q$9xw`{~8$X zYhT(`=Q*6`bDW{HU2F9Df)X&xANNWt~-r>6G9(w z_?E9FydV0P52^i+a%Uc{NXl=PT<323+D8VhpBLK8T<^NurGFf%Xj!m}FW41^4m&sO z7d-5;c`enPIy_X})LgO07SAEsWdb`PZfL#QGWIJ_;oE8o z40$9>cYH$Sct~M?-k=aJx+#M#lk8=l7~eGCyNQj9fxW1z zozRtr_`cPl z{Jp;mZ};!qm;3808Z>Y22~;6hT|A`F0k|3Ho^q|)f-c?+z%AEzp;8ab!fbS^Nj=XDbf#t ziVSI7lSIdpW>quku;qP#KzMo$tXO$3;(Fo8QRAIVoMG z-IllgF040QRVFy6dVWmf?I{v3snk)y=+A|forNn&e0NglD95L%BP@nLG-jT=Yd$FV za=nECd9%3ip&(Lj)PT_7f?nVEcLc?Fr*ggToML+9(3;yTwR4Xkg(ei7Hd(X5OO|JI zadwKXwwNXi8LQ<|B4T2WUoKKPyp0$7jFc;pvSP`0+g}&>Zmgy3&pDD6?iC$`C`HfM zcqu80n%{(LM7bTphY8Q~-*)(}`fFi$-vb<;-}_VmEqMy4CN%fb_8H{{U#{9qi(Al1 zfn@d4SP!PpaXpociE=ufs1s>yTdDrbxN#2QRl$d@)988B!U{b-ja=*!+5KH;A?Zj5 zKksit2=-O{scv3tz;#G2Ri#sT+}n6tIlj+npv$F@or*XvxxJyMy>C5K8ym9oP5L-6 zH^XIaK=^w$eivicZ_ha5&?7lUF5Dm}i6Ktg_Ygr&*q2{`-o|C3SVlz75^R{WokI5I z{?JbH5G8YYMkgC3VfQqy=M~$R&-C7XjZ!tFp+{0yd<3i9aq7$!ABJO*vgm^R*F)E( ziBsuT!)+SryxoKaIz1<1XnANe=)rZ;muLG#5=WXyS*b{r$VUEy&@WB6K<*|#f!t#Q z#>nw9ta(&@xevyFDlkB|YU^swLh6W--@L$~pW_-uHb>5yMat6XB4mA!6@EnQE3|ve zQ7%od7Z9z`l;!a%Cu)0A%b&`bUHBoI!s~)gHC~}I^k6kydKhQ1Tc&F(0ss9B#{T_T z0JOZ=2D;F_mBF66!wga&+i%}(FA?TT#y<0%>Rj^LuNRqOQV5o4f9`7<`= zzLhVZY8zik-}W%$4^IdOIk3Y3-(J{iI_ilLo2m?&5DYb?-!t4SUjjCpafkW8eA0#hdWp}tTO{Xl)F*k0ph@4!2c`VZ3&iccw-Z&j@{$W&{eODc&HpfkVe)u{BU z{`Pf~(tLwgFY4Z$;Omg!CRT07@FyevdP@K;_`&>j2nrtmioa85G^w|0x*wl*?w+i*Jae{KUM8pPL? z7i6b=$$h=_i(`HpJzH^s%oC-%QQL&CYP{NGpan+#f0k&ww$Pv5pNspRVtTQAx(zSu z;{fhzfvzo_K6&0|^9|2e&2v}u#Pm;ZQdQgy?n4IGu4vLy{Ni@LEY45zB4;VCcxo!} z`Z~kc`0?bN2uXAia+o^FFT4wLFZQXEfPXFN=Vq_YLb1PE%}idx93l!(%jUx=AHm^N zRP5}^V#x8zbZk2eS);Ys&T9&4y^L4iETM9CFl22ONDCfY8Shs?n-~=tDDKPiIUes%O&p9_gADfvO1&&qX7%cQFZpbU~ z!(kFXp~AXw?t48$2Z}!=Ile_@l-v_Lxn5O5x!&e6&h2RNybJx>0~8<*jn?h@Fq$G# z^!YF`U`$B!LqeYK<;SLKpCxd}ly;R_D8b^;$cU7wX)eKEln?}L`r9@ZHX}G-21zR@ z%En6GtK;-CCKRx9tsbS?aY))yptu1@XL)iMWtYB41gn5BBq|Cc@=`-X!;w8pLxueL z7`?&MPBb<3^h^Id*1=soxEim*7&Af1mG|(HX7)=h9=de)fd6%|Q0zHirKLHk2Qe+{ zM86Cx2#&ubLCBJUg^Q9HM10~0vMewOlGqZTVD4Cnis@ESgEE=uLEK7!wE}Mj`<#Ft z4w**OR{={jxWrj*##nTg4E(rD(}65&u&7dXL@f%O$ii21DhiaDa^3U>W7!Ae*wt8f zody?!$lw2+H&posDQy3m&yE8D-fQX$7sJG|Lj?is3gKHUoPqk8kE#g|d zE_#FiTxK3HMp;CAA>Um8HDY53*#EId1N!ooqic&J8^R~!d#VG=&}=agB$w^48==r7 zHbu6f1P`ku$~%4i)DZHE>AZ^AVV<- zx&DyB{_RYtfGK>p-f-lAyc3*BRb0I%2WZvr20;--#;g6|{=A`~lquq z5kk{pMo-ykF(VfZYB{P33*Tl!oX;55n&uPs?%`lMp z0csbX=WtOJ2x^{CaVvTIyuy4cZeje{)UV%1KAh{BfB- z0SU4rR9FU~ z&-0)d64X>cFgaQO&J_`F%S6_aEeP{__tCwN$|U2@gyH9s@VAnF1M(ko#@gcXL811| zyG%J!l%21&t*BXl%KQ4giYr@72{JNdezfOd%A+`sYqg*_#bl&zHua2@k@M@%2Xym9 z7L{7|bPjsv6Y|5gie|CWQ(KiYGt>goK^vThSLLU65VePDrFbfF23l}=xiN3 zrt%z)`p8@pviA{TCImUY1w&@^58wFax&P;G_-5)$zO>*CLupgd@$1W9CAkPP{nC_# z@!r|m3Y4q^!3bL~%-=Pf`TXu(FcB09ltfpH1PNOajgby#7pbKQFc%by`3euRqB1C& z^k7Rc1I;2yd1znK^BD^LQg(;b7yT#rnD*N8X5arh#OI?PL^y!88H1X(m9HQZLWJ9& z1P{m-21byAOjWGVcj6O^cXd^Ni&m)_8zP3KmQJdxf`e7p=(AF}VUL|IzxeW_6`_uV zvPwTK`Kw&kUCUZjY%gJ8N=l+TR}0@+ccJ9sLNR_3RH1jr#Gu@6m^L(_=0*Fvx~HS1 z)CE-5vWt|1lCZROQv&a$UM}FQ`?s9|vYtjXhn7u|1&zS=_SUCgzcTZC42z;sYU5SN zuRxX!3?nh?lK~nOH91`vjWHTcsfWwqO|bTx)Y2LmIu;GyoZF+V0eK%MuGr*^&~zJ% zxI~iAdaBEla~Y*CzS*+{C9IohS23`7NM4HDw0$Jk)scGQc2Ln>e&QMOFvC-qYcE!p zM3<5rAG+`dZQcAWnF7|Ztdz8Fab3M&K6DEE?#Hx@cl@+0(@ zNSlWRKJQ_1*L3UD_kmaa9TN~HR-6ibybX(H26I;f?q|iMs;UOjCt;p1YU@lFe$c!k z(uqLM4QpgypkD`dPMmK!vopL&rc8 zG-8zy<0mM!NSQ{_AutCoRX$aH;`voskHQ$e8_un~9jq<`Jy*z6xlriZiqoYuYQ;}< zEo*qQ#U|g;p|m%-4OEOUag?}~#dXM(hXNsr^B5~F>WHff z@xj?tGwrfwDt;iGdHUJPa<7Lyb2e}Pr{G-xHN_cGDcjOR9-4{o@`zTZe`$n6Jj9%u zqFS+1X1mutj!|c4tvblkE9SBXlu%a71Ib=`;*GU8K7D{LQKUPry-DY?!>Zz15UE)% zQ?0FWee-kr?)~9!pUY(KAb`vW{!-t-m!K>OG_w!~n%ISW*!7!_b|ou~t!W*b{RfIH$k9wIo#7&wOzm`cm@G`OP4q z^fA&mCK%wlSPpU5Kqo(w7g-;4C2&hOFQ^}-`JT6ue;-roY*#{M*Bl`0e82z6sZ@8! zET3hRG!RwEv`Q_{zSQgjv%amf*W`};PI(3Hr19|9Ae=IfCBZ514^?i2eLR6k*{X!xW_QYc`(vJ4MS<{Xn)s<}! z4}bxtg*kpeo|Dyz>vza(X$eamE-7GnC*(c z;2uUQK0SigyPIBn{N~KIKap<0AS>_Rt<`fFnC*7FXMQ4HXi@S65OZntyuxYo|H!yv zJu}kPgBz4xTZ@3YH8O35qL!GN8mKW=IvJAqzP!oK9C{N+W_$H6{^I_=YH<-8f=%DN z8;066kn-+#{Sbla&WVU|Pm$wdkNhNlbY_jzp7P*D=XHJa{5YoShx;DW{)6#JnTIpm zD3Kk!vJb|pMy$`6a|4%~uQa`HSD!BW4qeBtWACG;UmOVf8S;nq6>|w)t;ZAbMQxqE zv?<7Z0SzzY`;wP3AYa)*z&G~#_0;c8`v5_m0o+ffj3jW@e3Tu#?iVXns7g-fni1I@ ztagX*?`xx}xHiTzVixkE1ICO?60zH88@oB}JI8)h@EOoYxl;1*i9snNy6P7u+F%duyk+(7#xj(SNSybfDJS?p4x>76TFv;}QOS>Q82 zynSkwmQrelXdhepIwr&4K7e0m01e}LJU`r;-GxZ$l7EFEKefu%E`LXa-$P5}`Cr8}g%4*f2?@B4Xv&->i(b$y?I;05QL zefFMv?X~6_bBr;Kg{}8H@U7g=$1T1KEL=+;w^P570KRpKRg@lIS{^SSih1Y8G~}NT z_bmsJjFH>)2B+?bQaa84FNW-H@0MJaus#FLYKHA}9&rpeA7!jZsL?vff)_fmreo5X zT{F^xNRp>anEn8j{tXNV3M1gXK6d|7_eA!R+skR&F-={*PF?kW)*EF>t4=Gav67>z z-Gv5|cq)t-Qbp~Jq859*9!8uD-pvJg^;~dTUF$M6q8O48)a`V26mef4Y|jdQR3Qa> z9fw3L9ic5F`1oYz)Zcl-2(Og+Qn9ID6n}WxzFVQ`)_z zAX_^jfcfkF-Y;|(Gfm`|oNO20J6B`Nx&n#K(6cnp!fSAb2Z*7bwZ-A_{e~!jBul)M zBe#Ju*b@5`|NHgoWIJsp=tE)RqU7d&7>=Lzpn`fP? zgou3|8-8?8q-s>9JR?72b=WW;gX%s(nyngUuA2dSg}uumjrtQ58guvFi*C_Hd8LQi zt~Um=$8!$RqdX1Y`;0~dyXGMl%UFZSuU?P?4(_cqFOXtjD7}P)l}sKNXa;a-d%pO{ zpf&P|QWx{djaQecTH|}J4-B4Cmu5jyVMh9Rhgd7m4WiwTR~IflB{Ykpi3x@Rk333FvW_s?EZ?y z5f>JxqvSlO&2Z88?hB9dl9@=ii7c}(=GlW_^%FV0xgI9b-8s0$ zerCnxDjpEQx4O?`ckN3{eewXSn zTIUSPFpxZ`C&PpWJy?gU`Rywm_|UW~^}|=gd{{tKzvN*XO6L~Ip_p}fHxrO#S5iFI z?an42L=|p)Au&7oEaIS__REHN<`RCXLc@fn$IJZ~@7=Xub9GW7o(t!9`j3>Mmq_?q z+cn0S-e>)LRY%`1R5@Ptzvrp*#yq^9a`o|OY{Q>{^7)Wxr|^o^*h+9@Dsdigl_!Uy z>sM*AJRf%ewiuGV^p8?X92wkRXjhFcrHX21t@camokqbHe9k1lUG2`)>rW%BuB-Ei zs|s)B%*7ivzm))!77=hge108yk{TCda8MQI{cCSN)hw-_qTP`#~-0t`ih^-T8BTxpHRmr^xiN*>Mz#I7ZZ!ybQ=a$IjBS^ zJa9BQ`Ig^Y_a3wByuW3d9xWwx>+QNS*jJk=Wn zum+(DP}8e*h2`^$wOa=;VIPKy!ayZ{3=8V^yC}SMQzE^dZ-8NmH8H{cQn)Dw&9Tk^ zs~>kMED$S>eMdFApS#>sK2piTWqDR1$e=TMwQ1hy_{_e!!H%Ph)8^SrPR5H~n^9?a zt)0(oJn|E>A6}S*qn&UzV`x*-ubC7q>J&ienGVntNPxCLq%lO|Wxpte$MQ?pl_6<( zsRl>-?Jxrs_7Zmmo&X(c^KSLg2vM{Z5QEbl<8$g2VkjmwtNG5>3g!0b@+Ckh^p-<} zm_4#j8R)Zw>*jX%j?9Bs!*if`uxT989Gf>sqMTO#xY?m?uEtm(bvB(SISdJJzl<7` zSJR40i=$PDu3aRAcdm9cn2WEmxEkLp<{ZlGqG7cYp_Gm7WF)cjYyMz%@B^Zb8AKTe z@7tGoi>R%78vi&D_6-;H*?!-^s7(9;i`2OT&kmdS?R74Y&DS|3_9v2)SAZRzwucEi z0JLlY#rPwom*0kGeG6emhE_eSL=RA9lgFbGz&7YbZR`TrK6G**Obre!-u|MrMeYkK z%;=h!{7@~oq-L+&;pskR{cJ67vC1J1k%!t!^$TXw0N6!iTCnd3XL&a_v$K0i}vn98_zkOV_D}rzC~R~Uh%p2zGc^jZv$)@Z&rsPewab) zgJ$+WfKpR&rKb)fH&0Zyw**Q2v)?)8uFqFVd02;>LeeyNZ0l>0mqmTRVntX|B4%!G z6OO0pgB66c6tE=nRhp*XK??%62kj>GMw_dhqrGHGb;MTeTU# zE~nDMA`20jztKqYuNpM!Gdyg9G+#4dBK6j%xyigY?{{$`Nqzk=j6W5a*J-U9=YN!w zef;G3m3(?uNb5xk?TfF1+Rlwngz}$~2`=7PC8!>IxyB{@fvvitOIAmSTQO2<*M%Rob8 ze0RK5tG;mMx>z5RIV!O;t zO`kGhdL4Tb1;g#3CyPAA47$*9?vS7z7>-ooJ;_p;U|YGEfR3Ww;PeF?auEZH{|~xg zAC!B}J zQ!3dZslergG==u)^3y}t!`->*qdlKiPwAgXCJ?*%Ox4~N*v=BH^92AFYNR`HWUtl> zJwyT5+2nb)oN`f_g~*qO{gNVXc(efXt`2=Qg@4mUDNh|*x}fgcr()BQ^qtthvP)9t zCz48xl{?)*R9)T5^yUi4aE~JRW7!+>2woC!@ZrDQObrZuBoy%Ayev>Wg-}ux;f$~( zsD4e9(k|U$I=FtV>)j#%4nByF);nKvKHbi0hREhZL4gn(rH&I#)&m>P;831apb7m0 zJ{J-SQgCkl?aA=&qs)_a1ZNtD^{1*s zBAWLtXAt)OCEdCHPub(^)UZh6l?4Gh2wF8e7U9t_B#gf7RHJ*wzbjF)P>HH}*jzwmF`0Tn#z+dmj|#b*-4BCA~`mW)xb1`vrBT|fXGfy#f`7~ZX17Qq>hvTK#9>wai2vVI8?ui zzMp!-6%F(9$=SYefTGG?V$t?jh@&&{8x1iq*Kv=GPW$#IvIiq+3n>&k=MF7R8?UZ^ zG7vW-7De0f2U5K+cgJ>UaA5ZyWR$JG9BZBZQ$&wQ5OA2Oqwht2C^~F*eWx8;)QUfm zV#oc8F4=#w)qeb#PXzKzQ;U6Jv2{rB@tttz&PeW$?_E+-yAk0hiiRei$eHMgE*kJ? z_u_n8AF0tYeiOXQAvFBN>bD-$16*j(e@@YWf@B$xa+zRhN1gpu@)OSf_z~{jCph#u zeRNWvc!nR{7IsL0oYAg4`#n7w(5+2S*pWav+!YH#&x*dV`^g~_WnKhUuInJMHEX40?1y$ z?IFYHKjAW2f1~eFdfYBs*ScH=spb7-EQs2Zc!9U3AfyAx6+-BlQXQbaN;bg_43tP_ zC1BS*bC&xj=+)w%HTEKT_S$2qq-r*_f$RY|V1J=q*qN%Xv|SQ(dL*ac4|>+UT0=oh z=dj8AZ3|RYTf}pqvx42#Zrw_z``!p)EII@U3tqG-DaLBrGr8tlWuYHv4*DDi$3j>x zx>+a)?PE>NGa@1){KMj+qGfHUQha`#e}LSZd`j}z5y&v#xn}dl@3iT1W^R4*uT^CO zNW!T;m#5oeO)6?;@r)Hn-Iu;p_)}{xlk6I*DeD~U zeJe@ci>X2LZ>0dqZ>p*1zH8JoXvWNuefKg@z;&s~)~aL$Uq zXYc1b|0(@Vi8{ACCW_bJEBZcbG;h=>6`!yF~>`>3HwtH;G^n9zUtAh~=V^Vyx+&G(Nh zecsS1DCA>89TG7Q-bKoI!1Z;|LV*_JnK>@nE_C)%$rA`+i!3&hJtUH9IzpIm1I<(< zWj$hIa_45g6zcxWokJ9o(@TgAV8TtMx{tN-398{ z`nX6WyU<4WBEW@g6Zo59nq(>Q&O*7YucnwL7iE#bR6#L*S4Lu-q2-iZVb^pbfRg-NkZFwiqz2GFZBc zupR>f=hG;YOCl2r#PSrir_ILOk~jU>&$@{g=xa5-!b|@0+GdqVVE}`g_>MC zCaqor67nRj;5RzG1C9a~WCeFh54aIyTtb$Sko}F3Tp)+<1dt!H%20i10266#T5xjB z)r>%sT(_U<#1nY%A^H`W+NvTf=*!JOhE9{&E}A6Mwb(K*^sLzh8q(;i)9yR4Zzid? z7?C?D&bmu=hY8+BJP$;s9VLcN->`HkdBQ(wa3|tuW`=20ZGIbY_204qYjBQ{$N)7E z2O#6Wsw#+j&uk_HdDKKIic3v8?djNsw1u$h_nCgg5cgUJne_Q{@d z+@&uA@ZypU$FC}kE@n6`5m-Qx**O|PbI)&Zv%{AA821X=HobprJWAl-8cH{D+sae0 z`-_I`t19Lx0xx)(cr9(*8->fp`_lUGNwCuAJGELtP?rcfTRz3vb@)XAxT<929?lW3 z@&1sOGgyDGBg8*-UO&ers&)r>s?rdD`gm5>E%v^p5-WeY@aO`GGGFA_Fz+1ZQkd{p zVQ=y`KagWZJ59d7Ysizj{|{b!pfZH%4WVL~E{qF~3W=&XO`!eX2wIB;ZQzQ+>o`QX zzv)D!rHJcVSP<-`xsc#*6Vw#()a{Q3FN%?POtxgr;zHsf5{I{L3g3ICRTdlrc&T$7 z`mPxvWR^bxT8a8YRYVKHZ!6fEVhixY&n72Z{rjq1g<+j#to%1a$ekSaCrH;fR*xx)$;yYm8({wBtPv_*e{1N?TI`)Vr4LV*+h|JaM zI+gUgs<3=JiRh72CXf#f>Q9!_BO&~PXu%0h6lP&X&lnh7fmvY62ZRZLxlnWC-@Jp~ zO}BI&%@t|V_&1I~B^SQS+7RUZpZVwV~wILS)`|Y&w6YA|a6-r#U zolKUXJ?Xb~{4~jtzZ#yOXaq$_`V6Y*BHVT&)O3#GW^?KEZI5rd>knmZ83Yirgf&?A zs!B!kUCG?HXEqWln7j?=ayjKXTM2Ku`2c)^(4XdX#i-i2K~_qKoaK3CGzy5Up4-Us zGJVvo4G5}o^_Du~OF!CV9KRn+r^nRAcR2vwSOD42E!Nucj<|aWXjge|tgnn@E~L|26c z1F6Y7031Xj9j}vSfZ(3Cxr*oW%1lV~pqtD4{X5z-{7@>xrM7dXCiF)i9?en4sx^jq z=TmBpiY`Z2J|f2;@vX>W#U_FCmWn>O2=i{9@7g2r!fbV&JU;G*AZ=j-hI6;mFm`ae z9Z~?F-^a#*`}IzK@`fTjKu&c1_IffaiuEOp&6xa*eUwB2_tHwNHAYGBD@-`@MI?~L zN49?zwIs&?j{#Pwys|QQ zc|GLemi6#eB6=kE0B+h7ezDTL6D-G6Pd|>L7Rmx+hvJ%xdKy9+=y`?F;1;<+lZpyQ zKPeuvPW_h>Zh*@{7@fFn)Ygj!0TFylfrAOu0^w#j` zd2x(Wr-*=Z7w1}bU5VMFPqIDPSvS(|a+eHWc)@pUwx z^JLM#Ph@#C2vw+!pGh z^)KuNnpasJs7bZa_+dEDE>{HhnE&8G5wYhpe;lSj7$lcQQ=4HkRSh0{MSfQ-Rwk??!(~1Mk=XTt z!}|9gAndcK!y9+NxZmFV$GKUQAZ3v{j3DWBJXgtf{zdoH`{Zo|VW^len>zGTJk94+ z%6L9JAYj-o{W*n4uXqY91~cSf(+a;E|7j5aZ_=cwFu^={~8gQPpOLideJw=8C_ zYCVlYfZYFbw^*-N;GQ6@cFgE{*PjR;lRR~g=K}!JHuy*7fz%z+M%d-#^ zb08v_80^RpmXL>PwnBu6?o9I{Bu1FrV^mJq*fvl6W6VKFTMw6KmF4Sz6XbW_b2ntd z{@)42?a@Heknn>G0}Z_mwh;!c z7+%U(13Ywe3>jSo{H{~Urq4nSLMT+aiCf>w@ka>p-?{yyPzhf@kRTEn#6)XE6LBoB zuUDmb@3%_C?TD10pTDuO@grCV2tdz|E(#4Xp(GvW)fS@M*Kc%wIlxvCD9<=U#52U} zG2)bNwtnHuH>_viBprk>d=-*P%m#bA>9*wq^+UsOqM4O;b?<2Yar)hPxs#Q1qJ6?v z@fiI#t=+A>hhGCiKRInTvO2GEdBj9w#_lSWs`{oDWQleps1TbJ1Eq#)+4HE$ZJZY~ zDkMwQ@6oa(C1elT#;XH@fgcHnT-fSRDqHc)0_b)01ahHKq3esixw3)|56UozE(`8} zTpsmAG4f!O=7rnOfgeVL@;80_n@HjTca*bM#{hJ&D`)Cy}VkW)z%_Ra7sM9IZOAw|%(Y*^SD>13*=qog(er*>irxT94=r0p5L2YRX z8>SA>wdN;W4+!qr$Z)CfN|bN0`cgL}-hqstcSDs@DA?hc&ik~Om;y7%FAS-Mo zp8is8u+OKi(LTI9_h%!QltNgh5NL{udW#{6WKUCzdXfSq%CDQv2_XbscxlsU>7!&q z98XhuJE_2=-xyThb}Nkut;A6her}YuJc;wRPcmMXpiO)tGvsUfS7w*kgC2G2aT3B({uAD}(l;dl z7IhL`q7kE23d0Y`V_r9P4_WgS7jB$GLxwi=A9|3x$E*gt7~W^SwNipkwz&5sSM8I@(hkZP;vMK%jufzZ>(*iVTpxH7Xv}#X_fn z-FNEyaN$w%xfZ-l$u z0u+sJz$=q%QO05-=Z?rlcxW_XSD4#n822-lkw`3PXVdzLacH1Wx5Pj#uY6*#`*%fWMG>{lc?&jqG@u7% zbY9UyI@jdImvSc^BV)>_UI`JBB`gjTVeTS9foG-sZj+@fuV`s_xy&Rn@sRh#V;WsS ze#E;xTU|9=H57q(IJ)=zwKEENzqI;|nk!g(@w|Pyw*}?Ce*^mS;E%zh{UsVN(k4yl zDLis?E>^_kVB|zFhTJ)Ox$N7*WY?qOYhdN2H2!_P3hV@M--ptzjpPda-{wF$z%s0{ zF>(>fXZ`^&2Ldi(9Isg4fo{d2vpaxLotMkZgw^u?5n+H&s12nV;6TSo)ga4Av-^{H zU}R}~egVB0VdEOXua~-k@!*|de5b1H*EILGK z9!Jh?k!btHG=U#w#6mp3BU}}i^cc$~vtczI0=v@4vd=tx_k#VBe5o2PRYwU;nIjj# z>%c&Y)3zS@IU6J3NWby21~sZzfU+M5aP0evCKc7A7cGzCwjUAlg}yx8+7bjVQ`6Ht zBeT=KpA!Fo;e~S8k|@>86*g#(lYRru0P!V`(G}Y_`4bB&DHYk*_U~A&pCl2qoem22 zZs(N%73E;d`}PRP7uA6ugQVn<_6=&@REaF+v}eRmFljYk#KJ5(m zu1Ed=?WD1c9bQMW%|vcx1CxOd_p``4BRSOCz0wXq`!W1fkW)j8)ANW6ROa63WibGT zJJqv~SZ%Xg_!{yG3w~RT)QZ2@KIRlb{--k)ra;)4!EcaxxWf!7Nk_GL=>dOls_fM} z>hq?4Pl;Gl{3LfOyw`g`BPwkmTAIn=tgP$|I9-mfa%dCdM6O=F!;CuUnd$07$`a{P z%&W({lNbK^Bdo)X+Jnw`R8+u`F2@DXeD#mNcd5U&-tZB8%Fqvp;~uMq`R?qYpAwzp zRepYKu0nH1|7rTR3pw{ve|ZuU6a-JDBeR_isr?}IQYu+DIE)s`ZHPaT^XVxp^zULO zQ4+j2^9$XVKJ4wN^QblE$gKQ&dzt_FZ)TU-&1DGBU5&%IVYghkO4};^9rmEf5e#Rf zjWTrk?oymA_jSk7GuB&33FMKK`ghij(Ih1^30MjFZ@r8Ea3`C7x4m(%gpk)^Of48t zcsmy?cTUtBS=BW?dN@UM*2P7GE}&jCN%l49Z(9aYpX2a>X**?hu-6DHpeX&tUc0o0 zdR5`9z}5ToMepR&4RfBiFNIapl=xJKy5)=~(#&9y?fi>}c1ztIv|op8tEFmX{&I+5 z$5`YwcvusytT!5n9%}V(W^ic4dglM;1o!E}hqmbiAMOw(R!$%3-c~Dwt$lh<<7sy# ztyzBlTh^XZ0jgT+V7>x!NPn{7v%J!W=_Vznd|U&u2W}=^)($J)4=nZvaz&a@>{?}8dK#6)d_)A*rpq!ZuILn$B`ypez|L%^j+H6U~`qy z(Y+w-C~;=|)1wiIB*$oP6U82ea4$dY7l%OI@?w58?g-81LG~xJfrA8%F<*>@ zL#ExsU%Isto$GP)w+ExjgeB%3@%_1Pqm$O+p3S>x%>3|Enb;M$TW0XM%CVbAS#*ff z<2PZLLYB(;ozLI}3KSL~D;F8f0mnZM1AD4Sfmfmzs}P}kp>||W1xZFC4Uerj(O7~ru1;WrcznyC#jq#EC2;s8?_X|??s}m8D8^`C|b`PCY4(N$if3!TH|rxxyl=ZpH=9U#~JtvWejbFtG{b@odl` zGG7V!R#^xyx7Wi7_SaUejoK2cU@ydBA7xuj=SY%RyK=s!h%|Nap~(mS7KJ<5L7=N^uiQPrFV z>xp~ud;MNtOKwn7d~uI9G^KHka(Pr%Ii6*VhaqFCuoA#^QMQ zTE3ZtUTT7|v(JsX@sV97ghS;l)OAMzm^cj|XP0=uiO@Iwx3TG>u){~om2wMqgVD*@ zyrFs2uy=Em493Vh^YJet+h63clcUs}{7#Eef0tu5u+QW0srFn78e ziYBK}4)%Rg3W-o_4d`G>JoFy!HCw-uLtaiwGOtjVhx;_dTu%1WCN^U6gm!A$5e+;h1bKS9^BrH8BTnRvCSltS^5N zByHZr&{b-44WDly@;-QgXC)#0MlrdIX=)u0kUgrgX7auM%aL}S5$jOkT6Ylo^B2c5 zf2+s4<{aOfSxIbt3`D%BgOH*9Sl#}$T33_L$oAC3a;%x|pZF;liI>7^?0vr7-tLuG+hp9Y|}%RgeaB$gP*Rs-Jl{S!_iW z(9=~#7z8geuC@N+)8-GCj}tdBu>%L+>1&!z1N5+)1$uI;<)JATT<8 z$iUrRL)2T<=pbZ+$J{^E?O#{AxpSGD6WDgG(FEGJ>qY6RQh9j3Y!IH&WFptq@Bd;s z&LFd|zn?Z1AQ#JB-Tij&(d)goIQJ7_MP`GYN$aDDw{Jg2h`e0MQ7N~sDvEkG-UicG zTu#~vVA{D|>P^C5$J;Ip^vnziL`rfjoeBMnFH=fX?{J|3o9FlrdJY^BGFU!I$Nj+F z6uB5vu+Z7UI{Ze~SUqQ`vb7HhoQfbXUJom=6B$xg@Tbl59T11$5|OZPA~Rh$5eF#| z2gqth1o}35XL%AEoVEfpIE4#cIGZJG2CK}e>aiIIZkA=f`;o`te9x)3dAssZ-On(; zDF1EvRifR}D3zRl1}4s`cbI zb~;NLu1N;&?bV0UEy>fPN~EmWK5FrFO10%+EQw{qfN!pesS55eatPG7V79BEjjW@- zo9AfQyZ0lN)N9xpW;M(^kIy3}u>?SPb+fty=X-sKmDU!|Uvf`0P?UDNoGPhk`p_u1 z1OgSvx6}(8SQNp!qjwU)I*RWsP(Ezm@?yu9mNLTTs;KILb9i)0Ld1t-V~d`B)e3}^ zlzDBI)bQh{$XQ)pFOhYu+wAHun;d{(@4+OMwez+b(P$U??Xw`XL<`yT(PQt9^t7_E zbkF6Kb#*IA`n%2VrF^ZF_i`Y|Si5JQ)BH|F-JbvHN8*LmYM5>UAi($}kMT%K4>9{7gL%KKk2-e5-R1Zz zOey~<7-Ayal-Gbx=e4x8S-Q9c$n6LFg@>?vHCeJGzP!T{4ac95sS(yX+>p?yxy`dA zDe-P&cAW1F&;%3Wva8N z=muj(Q{jMT3_4hilhdEAw?nfC8EW|8lR~9ccUe1DHXB7Vtj9V(Z|iz-v{!v%c=bM( zZhnk}g#szt3r~HVY^gezXgnmSefai(_Y&2#=8em+M>XUn$ycyp9{1`C)LjU8uj;IDYEz9lm>d-A^>lYp@3K{b0JV(7W%}kjJxB zBhkmWFS@1R(Z+vuLh}CGqkGXztqkqE0J+Bm!Wx%rE3U`FHKZTZ2mxj40Oqj2bW1SI zp+Qx%29$e6?9)K)Vkm8MvYZr7sidNF=Q;cS_;l`y3K$oFQ7xnN{P)6bS8$snkTspl zY~+UOPrmvOVH4&ijw){_j!fp?mhm&is;);9m6m&dNrWb=F%oDPwo(JBQ;uLamlN$a z-d9|q3s=MkbNi`cY_d1?w|bP)%2()WH#o8xks~NL0-6y<>mesMR*46A=SQ0=MF9+# zv{~tQV79;@Kl)W!0F%7uLY)W(-V?CbG+#T+o zFLsEym}&KbjxI)U4Z;?#gB`ZYqKuBTX+QVq2IcjB9Hu1EzYFXFiWC%7W!cIqF_3GLRcpkDA zhxcQTT!y{IZFwqc@+&Fo%f+~Pz_7S%Q)8s=!D{irVu^Q1n9GkR-4VJ7RsxURKk z`0dh1WlgrxoGta6j8Y0Te}jgM^V>e4Wx8vwgL~-Xi;Y%$-qfm=E}dANRs%8fie6Y4 zLCTs0&oEO!vi+v=I8I;LS82_ZLWqR~2mVqRhuqiq&QT5$y?Ll04{~Iv?ja`{%+Yz7 zAORVvGCsW`4+m1Ibxbwcb-4y4o94@Rg7-OQQ=#uQG z@Xk?2A*VN`a!0K0Xj!-EYB!m-TpUk{xdmtTDEjjr98?zE`Qj+^qFnDomWR3NoPL*YR6)Z5eLlhb-G=k{}oYn317?R~(BY${gPVOlS# zu9r^`K6bX#qsXXj zZuR67@rT&ccNW{?pVhi#I&=7&IZJC4n=oYvlQkWGDd8_!Xf`QRSS-h6o!)$RA>HbK z7(a8q!?j1prpELoPq;~q0+a`LE7m|2sKN?(O5JL%loD8f+^6=k?*Ha|BLbiXr|USU zOn?tKX|QM+tlx46^I=bD_Qo|2`JMHxFN~Eb60p%OSSAQ0gr$AJEgT_aDoG0v@lHr> zrj`*WO>*pMX(qR)A%62u-Md6X+%sMdw6ov;{`?U_2UZNvjSPWO+Z`Xiwp;$yp61)O zmXe+NG>&w|6tFFLQJ`Vu_NQCJA%ijAeJMTawOo&nd;hI=)m)M;$x0iwp) z$Ff$)-QilCAkTIEdiW!cRclOaiJc;qvK>(gjJEXtxG!K$vCc`)1XGIIo1@F2l(8E% zhE9$xChv}qJz*dBrD~Xx{MWP^Ws3)8uG15LMVtZQl(jDP^eV3b@swe*NzO+=u{?Ix zFE$jBVcqkzBr0olBoDo;uRHHcw0)vfRA$pVfWyA{eVaPBhPj#Z1E1g0bG;Z!S&WK% zlu;`RI|Qf}Gcup5M;y2?@`@3fjHk)_=lu$r<;7sMxY$zH9(6PmIB^vbowj< z?T+jUCI^kdYVljilQm;c>J;>kpNtmSKPi8uDe;$_iqTgar6axBM73L(%pkJm;m zcZ-zNF$29MRD=&tu1@NMnEJ!nDZAFd&MGF|gxo;56KBDhA;H!16x-49wDVD%s76uJ z(+@ftZ4b>_qLv%O`e?drh4@4{kh31hwxP=z|J>@&nhY}mbK=b^7UNqXO3UCXfPs-2 zoS1He=u3J*?`u&jiIyq(1m+H?bBmnA!YVJ;@c=GO+<5)p%_wHOw14ew{dY5pqG`W( z+zLqbGB9amy{hQY_47x_{Bg|H$$bEYEE{LY%S@}_gW(xldfxZ_BPgq)S)p~83Z`L#Kfg_-+) z(BGPu4f6`l&Qwu$zB3~D0ZC{lX^eA-4*5o9nXu3CHx^3H;gb_&KzKMIIt)sxUDj1X z`6Nvo;n&QNm3PoOGaZQC+-Qw>iDR-fgA;MA{UFQzsXI0~vXuVg#2pwIyO`Bz+h4eb zDc5-oegPSV-g6d$HJ$ki5@36PqCg$djZAShV@jr~1f6oiC@)+1FgQ$Y#I`iAB%RJKd{O*U%{M1$l}mi~7HGo0t@1#btY>m%{Ju#T=j{ zFX=lsk}LK+KAijm9GIQ-e&pP+0M0ekIda1C-@wH0w#_7U1fVn7zjl^rjL^WH#%UwH zpbTuEb6LnxeOMxcpikQFM(s9qPbQw5_*bOUFZS%&=N(-}AbAehHB#c-kN2&&UDkcJ z`Z%hB4*m7LZ@do@W$=Ub6a=`Zx=t@Q=}Kz;1AqO~9pWzl7IcT8qi_;&qJYl1;PMQh zQSh*$nq*C)ucz}qFyl2B!6SN$ac{0G@ET0_KiGj&Rx)b5(rf*$T1m@!H(VeP1N-)!^xJB4qc^9b6Mphwo_0~un<(z z!lEm}ADNyHiFtU;Vr@#+i%u_mmvdnBd`67XOU9V2w=b9~76vS_^*HvrktIjZCFV@isrJzkc~wX##8KpVGu=PN3Cp z#`LJ2f)v3P{tWOeLdpN2hC9iD=r85)Wkpf^08`=hFdlG|A4lT=-s4WEgKnZ)bIA#y zXOwZw6ICwA$6_;65iJEC{Ir>o+3}F^TG?QFmp^r%2X-ITBqeQj&8zR_1}1sr96Ur9 zT${At2Bm2QahdCRv6B93JMgwD%VC}A^3WalTJ?$NW8Ss3T$#j>Bdk?r*lN{2fo8EI$VR!3?w;G|V$UzI7W4jo6UqkZ$>Tx91}Mwtf?|yP2|ARJX+y z%~R8~UAnHa);z(=x9#*P@3m~1NS8}h3-)Tu{hnhmk@t)6L(-feEj)b4ywR2ddO zMhvI~;j|7sWaqQlRDX!@3fa*Gw9cxJY4yBHWar=*8%$!FTN{iQ$n}JSsOMmBxviLeU99|tx7Wqd+R5HrlOqrz zI66AUgQf;?P|wSF(51jQYx7tR3I}qls}c(LrNOR!_uiv=3)oTPInmth2EEd6yWuQ; zIhXx;nf2i;xu&M3&GAx=&8cdWO)xoZ9?c|25dldAKLzGD|b)yZ6( z>-f4-wlugs@8WosTdvs9XJunk;P3CR=G&)Z5_#v<@T*QOC5WAFSgT9Lt^L!v3f%U5 zCn1qb?CkF5Bcc<3E6ViFuD!BpYvW*DYVT+bK0X&Z z$I-4cY0^2{=O=6~i@3a;->qDjL!VvaEpk|Z=524Hp`+hz2)2N)F=#zAzs{PNme6AM z@o^#nC!6~6Q2i+d9Ky7>HL(C zQ@ULtxN9~r(`XJoVbCG-CZ^A`ee zt!E?XcyW2SB6fLsYHKPl0WV9m5u#mjA>}ecJv)+ntKaxq%JT1Gc!Q{Z`_IaR&VO4L6%s zn1DAJFW$xJV&*XWttd`sm%7|5%`q@c#x}b{o|k~SnfWWZduKt%M8UldhNk7I(E?WRU)ASf%gM0 z1M`|Jovyd^0N0zLw~;5g0S6s7;Xh=idkDbQ8-EXf}kt|$mvdR;@$6&W-m-_1zv-Q9kb}glG|8+e- z0(}C@BhrWL^}!j*KDDlw2LUOpf2I)v{vJyzlgj|Nz^bfFT=YV!{g##<{Z|cH_L2pK zNp-qvJ5hK@2P%$a`z_M*Bz=0wj{_ylJhQ(q6agFA6jp-zPgvH#IowZ#fB)_iIE?>y z0Fe}5W-k@<>W)$T6;N;~C2;T*NUl%p5EPS2MB8r*sU@+HDYKVQo+Z9pL4svga;klH zd=GbJ@ZNy}L|^S8f+z+U&j0f6f1)k~qAv6^UFh$dg7o*n{(ru_hyX&QPL6;L?u{~{ z?$2J~W1gr~bNW4}6!wQF_Gk1pbcL|Lf!7w2LpggFk(j>_*VKZ`H5Ch-Ncjkm}F3ih@`J^2uM~d zOjbh#2U~O{7ps&poT+J*{y5YQ4+9t!7nzf~5*nXX5%e^no&cVSULM$wVh6VumgT+Xt!K4QJx^RpL2rvPD{(flw2mz#| zkRRyLerUo$NPeI6{?&vggNcLw&qF{$=+m%`VWy7WnIdKTv!;E6mxs%PiC;a4%4&Lg zF-?y9TMJc&4_&A}el=q}NL5u;&_Cyji)mHV)POuZ_~z#3{l2p`p7%EN>XlG}kALMS z@@3N#mP*RY`)z%nwMx~iV7R%t-L}H0W8>mBtMnvdi2G{Irz?N-z9q>GF3$18> z8cGcn4updx%D=jDXni^=dDukt1)rQ)&CJaObX?y{NlFSyOAjIT;V^2O&9GR^(BzF3 z7Se2=v$L~5tiQj7_C?_2{rUwxT_Ar?(@|W#JDjqeATI#DoU@|1F7R?8y5hDSB`Ga^ zwIEjiI?B0@>)<=eK2Itj@b=qbMDcOH<1JWOjvKZo1le+G@#V#|FB>2Q6_ucL+xA7Y$dQ^UFg5Op8HBv=(a_&p0__U z+O*xOZLg0DJeU5$lActd#NuP580WtyWNg~OFeKE_(4el{bZBBW9V2`>`rg6%=Cba6 z|La={V}D1H*Swx@+ppUDHP4D)3ws?UL*mhdVUAT_bY~HZPq)XHc_gocv4~?x`xD+a+zP5O&pF)ly>{I}i0TbCncrpLPzb@C z>t55T+sn2ctLj$V4J*v08Z~hH*}m=TZq4HayFsVQdigRx4Q|+}sQL?IkhpPt-^w9_ z7or`5%hGMBMdE#f&e;B7P}$-8III^bi(~w=`L1RC73Js8pVV!~NQUh~8B+8!T6eei z9q(xhyugCfJmi_($FZgyDGg)uFOx1gLwrWY!x7iaaFA_&lX*`U5bGkgg5ay;?*`#&6 z+R|{z=z4I!?klM4^Jtvmaa0t-ij2#=5#;eo26OlRCqf4=elSmwb>(=}qyZo9kvcaI@fPBcRJG9@vXkC{3x zE_ei30x3E!i&7xFr^5mQ0w#gsWCk`#DRdm;WQw8ngimh|1AzON-ejCFh-cSCd^ zfG{<0XoeWW7n?o8bm{2=bKaZ5IQXFcsuscA;r3l^I-uQoq&ug>q!Y?NVmfYJWm*~Nx_b~d z&M_rD-BKOV>phZfjM?SV$q*OJ_;GaB*tm$5 z4gogJgf%SEM+jtZABin4T@=D(KQH35w{N?kO!o^TOG;OqTy zPMA&=D96gw2u@e0PyKBVTM_*+vjg5N=<~6z0EnSt0AH_PpYJ&h7-?C$rpwj;7El8> z7?XKdIoI5J)LWajcVDg*Z6uo+%?};?oqZ2bSX)YFc#AVyhNYW~M$(ZOXNqlUH;!Yx zzBHf(wox!VZQ~7JV7Kbln#Gazw1fT9g^tKS+>}N@8ytmDZ~ZBcKakAz8o5M>{^qoJ zFQd6S6C8<<>fMu>LY++l8yLaE7Bs?$+zcvj-1_KrzP8GIDENHB*25qIyW4$CSJQ1} zl3wGs^5g3OLZQ)F40jL67*xnAZO_N>byPMbjizQdXXR6S{!g{fqx*Z{PQ8fEmE=Xv zlM+<9*Or#$03#XhEh>Zew^wniusLYg4yVTpZigLk9Bhx(S4~*$)^sFnM`9A+7kk4{ zd6*5p$v)A=O*W?kg_lElU)I5X?;qQQu@i^e)MOd%dq2g!{LaW38%;1297>GBesX>C zOK^Lj(WwTe7c=h8U40!taqaMVjkIT0j=ANO0Icjz-%2~|h+>c;vdV3p{@NWz&IEpd zD>e78(Sf+^gbr`5{k*E0Z#P?I9bAvcol^;v)Ns8C`$OtXeL6{}A?+S}J zsZUg|gtcR|^k0lM$mT zQqVoT+XNO9+qmEo0MRctZ898+N-^ZC3ys43ZTk~(C0H^2!($%U1c#1{J!{SIN^<`6 zHcMGKDLN2RFl)-yTBS#zGj|0x4~k_5eY4`$Z9hK#agpkrfTl{CiRS_FsmlQpOz5c-m_2;AVp+qk}^*QoBjux`PG9{48q zhqA8^jLPrW)ier^}pP%77$j^9ZG z-i{(0MkOt^MT^I?Fwopk{2j!O!O6&mexn)y3c(?cVSCK&;dst%F-~Sf%8?<%aCXF6 zZaPh;M=lRFXKo{q&~&=6T>e^t4!1L8&vw-;3p)iM74mF$C7!7hV2TgLqg-cXy5v`xI|snc-52<_>{HlG?rsRceE zIGPTJKCn!T>lZk*#g-=Vh8QP4i!~Xg2IqlAgNa=?B6g5pF26I<+H3B&s6SW530{U%g_OHd=q)K)0#|vxsbq7-<372 zv5v@6wyFrh*$BSBIQBU0qom#1T^1;?;+|MGSn76usEe@EYi)xyKB0n%r^I1!k0#&v zo0JDmL@K)~n~oaPXVM`aB^t3{RJWpnaSc{s1l3rjxvrW3nFnT7_AsV(eV?YY@zpA& z9hkKMk+QKPg`6fj2Kh09F`goTRpvp5T!+aRDxT`&JNrh zn*M+$(s2F9K%M%)KnoR)`^>@)gCAo;m-j@}iHOT?*W23q zS4x;$0fw)}ZH?JWrg$~R#}Npils3z3h^26$r^Xzx)kRVR+&3by{dxHEz0PUN;#&xw zR?OqC6_R=G#_tbscHE(AV5Zi`R?)&br*Dz7P-+3R)18>;m4Z%|RqICjZicDV2uPnYu50?GU-qzBLG0h#Yb{|vL% zh!{r{cBCI)|0Y~9W`RxlN&I#Bux8hwKNNdr{q2sEoIISDqx)tZ=ALq!R|>Dm*~pg`?Gd~ZB}8Oi zmlI|=_~|~;lcrb?N8~ET`%*F!So@l;_M#E4nr_c{Off(r5@Y4rJ>5^8nzIOaPQw{_ zxVp!}MyTh}9;21UOF7bL`M=Q>bX1b%k=_#y&K*c$)`g$Gye%h`MlizbrhJeF?Dc`m z3FUyFkE40d%$mdfTbN9yXn^`2c&;;8D2xgwtoq&*=+2vIng(W9%&(E*#}#7w#@~0$ zzg*{A?NyH?o$I}Kzj|Al+ z^?J1wuMN8od(N)_`+~y&Ly3mB6BfJT{t@+$SKQ7Ffb)gT(W*~d)i#>#iJtj>4!?e{ zT;4l5I;4y>*>y;SmJ>RU!lGic?j~OEgdvzoUC>8MbF;6{ZZo=`bs5;Ekfeo4{^bs= z89~YY)B9Ew%Ih|S%_*)u;?B{{PrQq4cvKTF(Hz^R<53`|S0meeq(WPE6nJoTN#_hx zGz?7%<4DYQKJjbA9)17!_4-PZL(0%9?C5vOR2Aa<>r0`ZJobYt<{zQizBYmtq0Eh$ zD|?9$D*mW(x_D?|uLSn3RY#U;0!mG{dc#6QE`C(T85-DW>D+Iiq8HM|X@m}GZg#AW zvt79EMf9({=2D2ai^6Oeh&xceMdsOUlD!>Cqnyd%)#g3QM)>jmIMbIh$S4ug*<8K) z!{cU)A*z{RGLPmuFrul|>{PTFu+x3^hDLHvM|5yUvDW&gu_`khiJO0?#Mt<#nDV#C z^Vh*t&Xv;GY>_KtZU&w6GsCh$Y&Ng!*cS$rd+yEdQCxgP|DRt81FO|P{6M!e><jpp6CnIhb7%JD;@7V4l6e81gIKi@9Oi9TsjoHx?snct(T(D+fk7I(v1(Ju z;TA=-7oTwlUVr`^`O35`lBE&9m?p$}_h{t$d-vfmTsP9IMMag^viq zI=ilP4y5Q4D)|OQ!}Doc9Eqe#Wc*(mJ?7{C8mP*_>J~g6}Z7PpH0mcdVikW^Fh0 zaWhD^;phh(NGn&1cJG_)wO)_0VRMh+l+h;d8pt*f;z$_Bu`sBt!VD!0eb@7LR(6T! zxgDcpNbQ!!+cya%$lNOj-G?OdAg&UkKLl1+Bc$#Pu_$pgRq0Q`m*HQN$;}n?5BZjS zRi2HW63kl2hxhyY>1b0tyfJRDHPhhEsVdj~ddHkEcX&Xts`%gf`n>YTHqnF`$Jv=g z5Y?xM2$OdRHYW3r(8RiAOX`c@(sl=6P3l}@_y{Cm6 zM!I7UY_yLhZCe3m9Ydc?%KeCsDdT}t*M(I$w}Q+)Dvk@I4XtS=wE7TcSQ-H|Bj`l{ zAQdEFfBku9<*$QcL7y~T0A$;111oqf4s;FQe>x1aTzGygUN=dJdj38QM!MXxF3P&$ z#&skv64zZuZcfD;ayzm)_0Ac$fR3N%^3 z#194rL65cq|uLS0MY zLFVxzS-Ip&zaWwTRO!3)m5_PWu$*Uxg$Xq?UzWGXSLuz5A|NI+w?uC_7bh3Tq^On_ z)kV5i9YA$s9O_H>iTc#&M1{C2u`dBla@;Son$X$l(x8WZ#~Qu1#&N3tKX_+=&;>e} zc5vA8x7J{KI{AZ6EH>c35YH2k5V%wBhG?PQfglX_u=8&Mqh+9{3thM5zqmIN@cW8xx^Yn_!!W0(rYklc+?;5I)xF0gL5Uw zpceb7G72S<$I!u0%QQMQ3Y^p1o|5xpT)CY_`*`%`Z8paBuMeru{8{41Q04mvrF#DrbY?QOSQ z4gUif|KU>D0us3yacKA4Z-Wf8Vgl4Cw?gAgPFk!h;-PDVg|AI(8jEwWWQ=D^&xswqZ+4gN(U(9~8uoYQP zr@8E!be+xnLY2Qvj=Fh0iHdW-36_OMj<}?LeH&13c8j}w?qGVr3LFzawQzO1b#BRBo>7Rk)Ea1Ku&gQI< z+va)49MW~S6z^PTku8PSD^=T#zDbWLNXxgH7CT4&nl?Cp>$@-+Go2nvz;Rr*`_rxL@7iEF#X zq4Q4+tne%mi^qTV^9V{6bq zw;?y4IeGh))G5&v`0&o}ac|9GIhdaF(^Zk_{hIv6vr90td6pZ-aV~a9!60@qL$}sk#D(uScpw`8$E*x)#TY^fKv%jz(IN?SeLN94*h5?h9vIt%NDT8| zHt{!b+rs6aO;rCsHX#D&ii4GyI|4z^!J5$}$D{c()?jD&a6#Jp$&QtAn4k%<_~wx` z$bGgSzW=czJ_-GIl~7aEjDH0||F=K{a5o&q!G9s)SN70dho_Ff0|cSL#1c&fVctrD zJIT5~hub`ncm6sE#au=M+HFDXb^QOwUIY4?M>Or8Hvi)TqUpJ+)u=yKV)|qMd*X53 z6f*Hf$d7^@KlG-;U`~3a(EsoK4Dt(KpPu&L5VsJP00eFHYFu~xd&4kjLy%BuDhz?| zHbQaKWjUN_C2Sj2EL#e26~qZ6H%(?vfNS1k&os2OA{G|KjLN3p1f3FazG;ZolTt-l1Vc7}QY5Z`;V`zY%=eSU8VExZ zo17d{UCr9t-*0fW^EZ{p@e?spz6E?#HNJl_CP=b)ED|rt$L#@5%2q^s!D`UA z1?A-^_V)JT+CruN+iZDSGE(5q-z*qAQ$HT`e`Khq~Cx1 zAgivap&b0v@X2zLch$vB(+>t$LnQLEq?J$#-sK7^84RcPh%loGM=T))q!!w@^BXb0 zCpMcEu90j(J%bNVa+nmBgVqj^!m3*7`C1|U`*xy*N^hboB-h-=EoKXRx=FNc5f$wZ zxm$h*>ZCkyLZ46*yyuO#txWhQI!SOQ)CaQ7K=T7YGFrT!Uryr^RzJLYB7h8DTexGPP22 zq6XTm7zPPwB`BE*S=p~&IdXn=!G0GdIy$mcJT4ZGD)~6;oW!N3 z5mFs)Kv+gEuIdeoe{{UbgB|7eoxsUr8F?RtFl^cb`C?Y*(Gv+0BV9mc0Ge9AB|yx( z&>Y46RPrwwz4{CG)o?xZ7j|$|jIg)^2T$E`NMs_453}aOZ1tog~ z_~@Oy#w$|ox?3SJ7>6kj!;|3oUpnHUIrrr1$o~mDbWJn^w6pxsrtf)M6rolw+5xn5w58V&hY7^afTm%L^IBerO2X5iSI5r3Dr;MF_iixvo3} z+7C(&PvvL7Ie|DK+wldfCB_cqBJoQV4>HMbH?OaK@4ZL--_SEPqEP;FAIus<@fhD+ z?5i>ioiHtioj%WPD;NvlOJlHb?Zar>Z^qOfq*urxpg)MvfPFdPo1~+O9ytb8;d?nq zc~HPtZl#o(6imxPG9#Fkn7*PT$Mc)Y* zR`0mCsA;nR+w?D-t6fEGY`yD3 zTF`9AKeT3JuDrL)h)*s-@`64d=&D{-Z}7&G9+sY_fl?&oB(al z7#j)0^rLOz^50fYRw;oaGbgd|P2s~x_Bk{plu1n!V*LMG3lJWPP1*^dsYw751=4cBA7{*ErpXpU!va6Y4{MRy?j?TusP)QHJRdv!q{UpqJw#!RGKqL0+@Pv z^+9q>rW5u=J!nZ1)WFsc;y5; z%`=6adco00;qo4WO7nY&mOzD^^Ux`>Mf03ACD4fXx6Z72sO!ar&x`w{ZQODlXUHuP zkrJVkja6TAOr_^J$BO5rt)29<9FHsej<+KKy|w>+vFT>y%5{cd4phv>@oKi=^jICPuRSr3kuiLu z%f4EH^IR*k+ieM z^X5=Zoop4fH-wkFB>2g8W$ZD**Q*N>D>{jXMb7hWJ7E?d$79U0(l30v^YIA8-j6tv zf2DgLO(kP359iOf)fL(E*Ph@RDCDU18nOM%rotL<+CoN?hV5s$?@5Qeb_=Lpr^^VU zqN3dHhxaF$%yG+`9`<5gYJP4V&uJ73ip3WkaXwY64(zfkZjMwR&12k zj9b{h{-ONiX7?hB`}Kx?<+-zsp`2QQ>5O41e0<&~=#X<9NhQf=J};fvTomxhfxpHK z|LLAEa=In+m@_9%=^~w9ProO6Hr4L(t5%?PGh!v>Ww4;F!gQ;aC6L!_MsVdUaX<+c;HEAxYB)qN2X=5{hzg!4pS_K z4M*Zo|5`tMo$NSf9=79q-l=gr3M+`Z57FZylppaV43m=5WeB(_{Up=ui4hXRhdM0q zNcaIB^=oNJ431AwC>5b#rSodpJYL5)8JW&4q>Uqpw;`0{ZMFF#Swn2pV4pYswkg}3r_D0t&ohQb*+?fiCoR7PSKe%596 zZ=pLPtsB|r1QB=s0U!DUyf0HH8@*q+vLS@7UqqX`@cCMcXTr0@Y)@Y{pvH2U(Qvcl zz3chYyn-VrHtwg*4BMT=_l+w{MtDTa(V;r(8Bqh~hItHE<;yA&dd)W~;=BP) zy((p0J`7>iGY`6 zIuo`25>r6~7=gp`Az0yCenx)lp*mdkQr+7Z;0xHvR$?*vhs(IB`x|8_ohRl8bh1&}+r3 zPpT0#r}y<+E$Pttc;2CJNbKO`P95rZtHeBFEw|f0FI1jc?yQofm}y{Vk+$mZqu3r1 z+;46DnAeC#PsN!Tf4l#Q?qwZjS@?$qMhvT#krAJ53H+^8t8L5bil%NvQ6O#9>4eVy_niFz0=>r zfb93D{fF%lL45wZU3{gUtK`1CQa|H}l*~l--=&Bq3{~DqVOT?@WQz(!*}_^iA({qL zX}3}V+P_d8VD0%6inYK(IXa~bCn4l|m^jA`AcfeJc{lwhC#R8N(E{^n5ssf2J3J_L zL#r;*v2;c-Kx7RO(pV{+WPBI9AG`^SPJyFyAB-8Q5GV$nwS!JRIxHR{&C9ww9mV|q zp_Pu@GF|CBP5$pY;D1mI-daZrp>(THzcDj0z_Yz(>vN~N1J&v=gulh~e~U$m&AWV~ z^QMtLs_scB(exUx*PeA?6|cwe2Q+yhl{H{EXs8WWA@!%bWRWw+NHE%3`MB&}iH|=l zq_Ouh!d^YC=+iVtI}hD9J&lGlf`p8AcwEP?$mW|o=LxjN(JQqetG>Q)@l_Z(6PwR+ z5VoN|8$o87O zPN;w+9@rrLM(|GFvcPxx$Afx#uoYbn9XeX9bGq7gW)c&Sn;cs7Nf)d<b+DwcJClx&aL?@_RlyLom3S!e5qm-r%Lr zz24^+J+RZJi`02}(;dIt66rm>OyWCPnKj(l2|ph_qCenbw%(YAS2J9|8h(3^MbNy< zN_>rO3C;hb!|VUpXWeTyjeF_py@FOf8#dP>PTx(cWWuk}Q{Ttx6RZPoJq4mKKWJOdaC7-ntD$4@5^VXR+^?Pq69wK>huR*r?z6 zSyD=}<^vAH($@g(<8=vRdt~A!RU5on1EualOqq4vgbpl4yjipCjKDP=xEy71)PYDl z-@kHJ%MHY#bk~#W@j=4e`=Kw(J<-s^5)#(@5)<0%kd4-rMs6hvS|>D5eqVSqpkaM` zL8%g7@BEeU=WJ8s`P24#K#~L+0vrsbv6$Pdpl{zVNB|}Ep8#sd-c*I=E^GizdUQjYwP{)$&$W>|9tH6 zUvA_iH{93sL1L>`_d~ZyGNii_X#Q!hIP2-5Is@&(O42~NVZ4B|#n()d9oiMYsK*io zRmG^YzXjWOB{wJz5U{kl^i%WcML^i_nKy6ck7u48oHNkhD)K_}Qjn+&5!$lf0T(GP zHaHp@?E^^6dp<$jhp3I%tPmbwM@W>YVlqnH$(s-(H$HAMpU9!wGsm~WfADmQEmgF# z&1UsxP!W`45+htb>zNGXslg|TcbpjZ7cT#Qlb8X(zr0|X{b?BmfpN|BpS~3t8M#>| z3HxNx%P} zw2%zdAxVX>+n@ZArku|V(4-LOmr=VG4GGJrI?LjI3boTg8gqQQIUF1wMxt1WuK333 zjz|EsOZoJ9XGyOf-r0Tt%@EL(8;6w;KC>h9yTB+{)((ri4a-c&C_2Le%?% z_CPNBxkaFybxW{LEM2_UrkWbTD%P^}`X8$U^a9pp8iPcwG;1(p+$7F`8uTFs(tH zmTm)4mK+;dIa#KOD7f>=3UZs(=qiBJ4g)4v;t44@uO=h_x#GkqAgXeP1Q9|y90(3a z^GW0gmQM9ZiVHP`t*}TEv44EA_w8bKh9i5ymx5U$`*G9NrL(y7^hvdY; zq;6B6x!I8*9i4Z`k$LByDh|aUpRnK|oWxsHI24qHfZ*eCYW#_%WM%oHJmd)fQCwq2 zI_`~hL1fk}j(ajP8C>Y(;*p&N**@VAk+s24;+IqwLpUngG%U1@KN!NK+UR!kv93nEM;+K|_ z6E`-dG=MP1=MWwE(0zw*fI0{$kR_M8Ttw$E9}AIPjMmz&6GIf#vc3>~btgnD+lODN zti*1+ynLwd!iyn4k}sF$6i?hY(7Uny+nBg--6)nW8zNdAmGcEU033x>h2Nbg+0AiCKtIuF| zJD^ZJ`;LY;W!asPHYmnP^CL6U zti>LpqRxRJgJknORf&9}j2=6%DU{!GrlU}j!5eRa^oUIXQh~q3Hj4Bzy!6_e;G5B* zpOaHkLMuAn@F9<8Tsj_?HQ)!?-7T7iV)F@VVe(&L(!k?5#uKBPK!8B3jqFt?|G)x? z<~2eXUk~pu85Eth2w3Qa_DP9}U(rRf1-w6r{L`}+&h|zSo3j%^pGQvL43Y-)_xFqY z#-Tyq;e0Voc1(lv1j4XgLA3D2!XNwk`rybWoD)-1&7OC_#ESy3!l!=IHk#GiDhQ#?Cfi!8z;)hwtTr5Q<6v znor<*$1wG|H!NZr7DqRd`&w66>JnY1^;z6%t@15Mcg$zcIPzhQ@*fS-f3Cs7Pmc75 zQmp?R4w)Ynlh|!xFRrfL-50PRP4{-TP&R`HuAZHpl@01_`6G$Yu;TEoq`2PTOYjGD zFtIxhj_=K2cx$aR^Q#y4mH_~3147u?YL1L@oi0`Nkp@8Mkp2Zr8y;wpx&tPiRs;&T zHyv<}`pW<=XzS5J6?qCE^3|~YTMDwH38So;~ zYm5vVfbeun;@33Z;SV8j4A8%l=g5)5%)Wl6Q^Qs3hMb;Q0U?-X*oUkiQG+gsRNia~ zK`}4b6*)MyUeTS0hk0d`FjN{u+}hx8{V!;oG0#P4!l`Kf`2BCM&*>}TkTV2qlABZC zm~M-}B7ivvuQ6C48C)v_!tnQu6l(IAk?SWUSGfEz79I|Nu};GHf|Eb$fj|DTSp^P9 zSHHC8`3t=G?aY^Y2T^zsz=tGJ>aWd$ zq$?MO0`?)bB04*w%q$s>CFjZC6;O1Fz_kdPm^(^%B1mgKL)(NMU{zWg3^u{34P&Ej4dauflxa}_P?b)FZzkh_&@5( zca&&=NwH{ne)T!6=JFwx;@qy6v+KrWm>97zFCK@#5zj`APQ)*Qx0u3RDXcSx| z2_USOoG3(P-RhkPfE?MF0YN!D7s|2WW_e#q!~`L9JD&9zsQjz+;db8>&?<3|W?7Et z(NLx?y@^JT!jpqy(@DZ#4P^S|Cqf^fkaH5t0ii=PLky^3S~?U=j6?Mx7s z0*Gd>A?lxfcTqHH4pTbK84--oErF`ZiveMHg#l{v*bUee?s~QWFYjWdg0=TO&W^4K zrgk&0>y7f3v+6oawXn{)v5>~g&m`^O8m1=x-m<0TjD(v(R`dlV>jSNr@bG|OCijr` zK#tnNblYh-1U?S)U<9W_$HFHv4=mc>T5Hji5jne~T-UPb8wvZe*nH#F^o?%w_LJnV z^MYgnCVOB3Ghe~L!gm$;{LtzR_kKls%JVBq4<}ZR=D!hW!uoRq;36@|8KZnnID8NQ z66QhNnQ*UIu?qqW1hWyn6?8sQ8Oh+r#H5xBn#%hhJE9~?+zSGv`du&r;a80* zRco~Wmviav7BMn4{cp~te^nlGZer)Iyyqb!7=TH?Zux^uH7Z{ zH`iSW)c)^1{v)Ep;g?pPTWRpd&LXoGtO4=?MH%(I7`QVi>|d^0V$&|W&W|FCNCq>` zRif_S3nxMz%3uvCN9J~njnF_Hafl_IcP0HBu&*%-z!`6}b7cQN1Pg=CAJK~}G=G9Y zj2#QzBcrbC>J0`B7|m}*dE$72n{P9X#rD6qpu%ZCDk{0-{z;Ost8KbzBt}D$g-@;z zpEM?=bb}E1!2h_8<6mgP3l)mQ@NPNuB#(W|l?`t2D2b=SVxx+`sp~{740z%(mVjT% zkukQFV{uc09#TQQ{S>4eR1nUEMfR5c>s*iOc|b`!T;1(#VR?y&vo&n-QS`6kJ({7C zZ;G1EfG>#FlPqNf%_essITKqanPEiU2?$8=7ypXX;2&v$(2_K;7M%Y+~d3rfnqq3xy1%^0ztzs{I&hw@l;nS7bGIyk2eA>_dOUu%4zToX^W z0gIt3hkFCBI#)a;OtuM@JZi|NiOV=;oO(N+3wetY-C^7I_{S5`R4{h^)Cn~-I}0{T z*eIo=gKX5;Ww}%2>aU{?Co~W~$1qf3+U-+R6(gvR9Y~7;j)py$iwu7&&x=A9GUT7w zfWbe0zznAO{-4ZfG5>>y%UMKML3Uv2jSEMNTpAAGMZ!)5$`BG#pPQE3Cs2ngRYYfQuK0Wcnj#tqskwgGKdasBSA9mr|l-|GH>6Y5qw zm7#-hkPJITh!Pcrtlnx2-M&zT@XCWC9CHE$2f$(yYiwhR+uGuJ*7E$@cV4?1rQ+F&K0 zL9*A6$P(L=+<$P&iM)|mr!s1mKeir(x<)}V&emUTKi+A*S`V#_8cxI43O?1(-jkSx z>C^vM?wY0HYsSIJ^gw=c#m6|H>aK6E{EP2#*k)Q)b3@Q;GDpVWPVV6%Ae%rsPkchZ z0FGb!VE1|8m&DUysxA_{hR z@=F^5BZ*yvy^Cc>pPZKgzQ9?4^V7tgEdl{jJ9BkZIgceFPeDW$ zduSSYnEvwF=)-e7j$f&Rlhkpj4|gCD&dyRs$H(WPdML4?#ve182bs&`f`|1@i4cdB zzG-Hm`yKdT@-9vPa3uEW%m*E^PA6f@V{akrT&;BNmZd+-omcMs>sO;wo-#5CbpK)1 z(8Y@VUyOMOt8C*!?<6_=i0n1$Oc4cE zo6$r)wF62Mqg_Oew4TL2!&f@&hvH7|JA69in5y?VSgAAwZ`hvm1-?90OmdgN!`B44}I*{Cg{CkzROh4 z)C(3cWTx4N@zq7Nz=J5VJb{IUrJV`yUVzKPPVNF-j631-T>?RnHQq&Mj0q{_V*7N) zTCETyggzD(qTR1UJNLcC2)SR=`_%qZ+U-n#G1(QFQ!)Dly33|~@d@IU%4$Lc zH0#lZKdwpKM;92FWk*hNuV6K3gC)O4Hea?pnv}`wQQ7U8tT8voRJgxLt>LB zQ4M=*VE+Y3bNj4ZWaIIh5}ogLZ<|JTaA1(FJoxodd1!k0dpUSPcYu@ z+EBPVPX{HOajGd1*}A-o#AR`6brOEAukV|Ap>}c=!?BZ8TF-Awt6lMi!Rqa=>koYg z1M&|hB85+spovdY6-+M#E{$qix&EBevBN=_f!ZV$o*(!PLN_4bC1O&kG%q$<<+9R~ z*Fflkvk~3c&}&w(u7t~EUCVpu+3C|pYgS4SAwtV;pLgd{W5}GXZlEitU?}=)MLcN= ze3en`@ig-JR0Tgbz*{SV zvpuZJbSo_vQp^^-sErG4G}-aJ@AtNl#;f|q_Yl3{Qd#ps zP8b)At}TZ1Uk3~5+pJAOd~qN^Yeua=kFHk9}k1ql_v?~EOr+{Jr)mcn-Xqj-It?(of~vbUl>_f zpj%u{IEHK&EBhY2yVe4zBJ@wg>!{W7Nm^f_-#Baq_V@(hS3UQ#E*Ov5wJ7M1i}*{} z+;W_7?ozC3*pp@~vI7;@T!5+9M_+5XDT&F)8d-6-k%oB$U!V91dDSDN8f1_jA5OF{ zL+zXeJ=!h*k=gQjg0aMZX;hOKR3Rl4GVW`);OFZ-p1mN zoYwWOI+f;s%C^<<3&VLG!*JPDTW|pmdLHX)e|?aEa0a9rpB%hfpR!YZR%@`=n}TDY zpinse{a=8!k1~q9IIW$Lq+E3i=3ho%?g85NbNt}2X!FU95jAKWb6@aA^yPLWY+8(a zoZPr1=Ud0c2HA?;GjgiKJ62)!8B4$(IH$G{z?RQ9XGD|=zgNa@9IOugtNj=8d4)Gh zM>~D3Jtu@-#1yE<=<*ft#yOT2>{|HYs`-<*!kGMTL(|4E4l}bi1tXEYs!-*10|pt4 zZ`?J$cY9BM-N;B znLI&wHAv0@;bvsK-TA~(T4VyW!w{2=e+rjV>M{04YQ0-MuFkdQq(e2B&~IMZd@}^> z!QR%qdz%3Cw6^dq%p--+Vap;S`Xf2EFpkwK*j9vT#dDNoNBxbOqp+m7*u^24%c3;E z{k;)B1+_&sK$~H($RzeSkV&um#!}RB>ImWsQ~Fxjx6RDwV!87QY~R}lEwY0dxpgkC z_1+16Gae=KzO_S6l&!B-yCr_Ew7RTLs18YF?<%FNuV9S>l37AzbQG5{jE(T$Q8P(} z%WaiUOX=ahny+g-e#(zVWe`UH&4>$UpPFLl?4?V4sYtd=CCs8&@6;_1ecRqek$pV4kkl1JmRuAjW#5f`8FTH4WGe=R(Ac+-uJ;-5a?d^I{qOfY=ggVk z@B93g@AKJ?lo$tQ$Wj1po8?j9nG1+YNNub+*%m^zKW%4OLD#xe>)E+i*mc8j^0q5j zoyrH}5lV8=a1#>DA;#fzio$KXXeY?Q9pN-Yj zd$eJHNL<6dRm>^XcE4c~q?QHg>Aq|5wnnZJYe?lYo*Txi!8^Z03%g?JlgQHHv8fbF z64bR(okQbZ{g*-!WQ6X3qUY_USaVV{LE-hdj?63Mhu&(z0jk=E=X6v{aTlU{KP77V zmZX-Dp})fBL5`xlhYTFD=jtoa`re4JOcsn~PT(rHtQD$}P>vhWg3S02% zpR(XCIo15Jxpn8o8fh7kb;b4N>Zx>5@<*EnzJkDeF=g!Dw1tcT76C3q_#l6H%V0b3>p$X4gF7JN;b6P-uqQY&C%f_W|f0GX?8ll zqCLwWrZX*gHIs?Zf8ToefW|#?8K=Bz#Oasn<49sm6TCCs7BS<>Ut=A*-we_XAOY2h zfQ{O5pcjnZqIA@x7aNWh%bm{qvqV_44$*M6*vd`~r?bir^n4y89-zGw8X^6j7wr90 zq-`xkaqlfdqLU*`zc{YxE-$vKc4vX`tcurtp&y7N0b`oRbVPh6n|TYN`sWg|zUko3;J3^|aYHtklN5%a5KBQ_bTeSAQQmWy2RJVi(s7 z{y6;wd9AU@MqFC@)5JB6Lc&-dJc!0%w!FmWTLPap9A_#SbBeb1lXPukRi~j?^gK?N z=xVH#ttG>LXxv>w)JKzwT3L@v2|BB;=Ee7TL}V#OKj<(OgF&s3pB+o05KUT;tu(HA__L&z02)FOTOFSm_lf8WQI_=95X-W+nvjW_x zUkk^JJ8V7dAtDs@nu;XHWpnIoxG4O^wB%L-=qN9qm`->vpTj%wDL~KF?@{JCKSllQ zzhejZ`up=K+C2BlkBaN4iuHd$tnUG(ZwC*nTlZ`}ZROU>V>U)gb#7dG2YA}yM(D?t zMd_PyqX}2{*IygMg$i!vr(v%|LJgW+-QQ1zmxnIOO~mehSyIPPj`FD($G;flmIC<8 zH50TY?%4kYPkzp+@#41RYrc76>~B5&RtFf`Uo!KH*_h}KkbQa#!C#%NM0Zr;JG&NL zY$b$0_*J%?A!Pt|k4a~8>`>@wYhTYRWGYph&KTQn%cDmD;b4_VpY+MobYkuoYn<-- zwas?~BR!lN>$Zf<`uC|bnCblX_$Ta=`I4x$GDFECX|{+yTz|ngX&{$9(kIWApM%BbphCd&^6d({HxwL zASQNp6ms!aF_f;<`H>~rfZoNTJ0wr4ftm?gPf0ZCx0amUw;@yUYk!JfQ`g9aR_=~^+g)36`=EvK=Kk#_>yJq zP7m6)VhffyBx+>sC+x^e<`Eq_vT&!I^x_kDp6P-dw_lP?Qr67;ZJ;eSFq>AY$rpNW zEBL7L`L@8|91f?x=s4D)APnVVbCe41z(!9wdoeYrzFfNn%jg{A(!I3VQHxTJHoixg zr%vuM1ToEGxP`A?fNTru=mzIj`;p8t5qBTfrvyF28z-}Gltmkt!>7J|tW`Mj@;-O@ zz|xP+jOOVSKdQc-nw73P*$Z1_0Fk4~C3K^q%qW$6QQw{@&*2?~!!PdV=r z1~rHnb~&w7403sVI}#cRO_yl|;ZheX0MOm+ZH1WS6$ef+G{5}-kcNw~UGYQ0Ubxjj z5a961E9~vVIdLiG921mP&eNBdO|JSgxr$k_92a2DI)Sqk2;-r4If7=)beIHY+b#pG z*4cXP7=HcMcxbz|GBvl^cLi6vhMeTh7y$%d}Me70k6p^_+fy z7~A4t23)!^iv-oL?g{ADs&5#-)DQOD`? zoVGOj`LT$KE&*tj(vd!C3jYJXKiv8Y_=ZN~*%n8WF1*!}XVx$XK8KI`8=D(z&V;c$ zVD7BQstVEx` zU+qR2y+qUf*zwQP{Is@&Sz#qRruj^`V%Omhlf*(5}7Y#3N+m;{2 zwR&q8_|561X9?Z5ZC^F}|6l4n-{0M~t>sUBt&2DO?WU0&iHA&ky+Y&noIWZXzEk9s z{+Wl{w?Bw^`2Kaw4Xq0oULv#}a9+J};lP=f2Ve3Z65-H)@Mp~PM5slVrysr5TGq7x z8x#__yF6>akr0qtC}Haf&G7tFA4A5`cXf4fb*}@ z&M9eWZvDfbd-nZ%$(n;K2o@yY&a?boXt0!S%}wisA-+V@(5G_*_WVp)qbr zQQ0Zosr?NRby!ePaB5+d(Q3S!!v9Cf0Zwr7>~mr6sfT(`pFaIPl$-M+*J;69{pB#E z#_zQfX0mJJo^`*g_)$+X`uzq@3JQw4vKD8Mc(lPQ$)wHO>c@%_x-xy@Ka4X>25#+H zn{s8P34kYeah&k~3|{$ax+Zw#9W_S|M}IFNc^E9!>5RFyJS|SUxyKehT2X@ntL4w* z%pLBNU*GX~{vuW6*7J3__Xf)Q#d6oi(dX$Kt75Ye+qZ3dxyMIJb2*3=V<4@M*~NbK zb}_cyRb3SB&1yl zPM7}=_8D)9zT`>49taQHw(a#n!F0P}dG3%627SC|wl&4E#X9-pX;WXf_JZPoq1uM= ztbz3cT86FQ$!*)h9_szt`v>>w!rQrFOwDOoNGwb=`LE?sb4S>w!0lZ_f()LyEY>Ng z{VW+q_b8%$wEX9_vaKSuNTwi!7VNl`9F;5E9;f%~wrxXya8HSl1&69?JyW=(>qpN+ zX1~THB(YTP`AFJf!u*1WIX%tE3v^5RrH}tuwKwi=pw+*t?@7qE>rmlwXMfOvee4es z=T2RriNz{SYpkzip>2&Wh76`Z!Omz$9AUV3N&jQb9uC;(Km`eext74| z|I3}!ds%1cP~#ENmK@p8HHL->DrE4w5Wr zv4;+nS~}LxJtQb7n=)q~sq%A3l4I}Mzfkahjc1-*BAk@5fY`94x;<8!$rDT6eNx0E z+33GUBULaRVKp5>>nRHgjhV@#yZ4T_n0~OSRmr&a*LXf{(p&ocRy*^?w_}+7&nxY0 zK0TeBTixDq=Zh7+`_^=~>GWw=DLhP5 zq;S{{su+~;i-yjA74?iZz><8%8a(-6$A;Ie)=9qhy^>eo6fuGe$Lk$zi{8>Pxj3aj z%Xq5sK#5dCU(c39UEr39_M$yNV_IbmlNHc}oz_g^3VF34-Ze+ISD1VIwhKy6!(P8A zeHO-ik>X6y*w_v+8@^ec_ptdjni)i@FbW|Zk}~h(R$0eb?@?oOgNvPJv@mOn4%YS?rZ}e5O?`rf_1~Q4;+zf4plIdQtT?r5nMty)k z`Rj2FANF#qcSb~Av38~%SFIjxMs`&gMoROS&k0d+(SI*sJHcBmR%rh;Z?aW+p9;@( z3Y6EMu`o5@h-?@Ad!c9x=RiZN1o-m<2E4(Dk6AsUVRlxx&FnV{Y?AZ$`m-?k6TkB* zu18O?WPh0kHy4;d>oWdGP_95@~i*-J+v%2k}y4EZ4 z&3#yMd>AKfIoYG&8T;2lvJ)JVDy9ae!H!6Wkic6}3wI;}sJX0eN%(@HKJ60Q3Bbr(pWA7G(W)Zxk!f(FBsI@;n5adWN4n|&MqA>a55EK# zj(enf&Y@w-Eg@q@6HSUUdEUQ2rsTapL#@AAYUadQzohjfD3X)UzM5V<|Oq6BrpRYt2O#y zz|;5{af$=$xP8Q`3aYI`v+_r;otod}rv4@wBx0 zf`6{2baifDpz?weR4Pb2)~5>tocCnNyw> zUc0;2%5Br_@PQ1B?I)_geBkyrE71gs!MTM604V^c!uIrG$zQ<8T1 zRxvY`m-Qq}u$hW7T&dmTnlE6&l2&;IjHLpn@Sx7YK?i~4+u`U4uxeB9;=TU)pzjG9 z>k3J-5bso~Hm~jUYOjJAJ5GvPmrRqV(>B&oFr&ZM68Nk}`Q<)swb2a@>Hd_vUltZ= z)kx(o-weBa!Xv&WJ*9i!PR5GQUh;`j$GN{FpB;BIA0&J{-3V@ssLy0S91<^t;PpJ>7=oF8%`&WX(-%{ z_DZ-=;m%Rc3Pal=wSg%#+OPmy_qK2iR=Z7wi#FP7cWtkpKp^-?1-3 z?^G9G+?k0Gy(00h1uk63Ef+IQ-xm(Tn7n|S#W)U8sRF`N?o0h;HUV=fb|}wJgk1(~ zOLe*cEcLv$=4_K^$Qk*(G9v4FETWj&H&t_q1b5neEs%~pP#6~Ee&#cS6UFKzX})U= z5cqw}Dx@GWyib}dq;I3sm0XfS59kimV1K12E-R&MwY)d-VS>OXLMgfZPMBhlNs9C> z^YfzY*tI#%WWn^MYMIc)`wyQabID-p189|v(P2WmFV5oEmp!StWRbtaTzFU>_m{h8 zc`Q4tZ8^J(*LOq(37iK0Zj|&YCjWWZkEH3x;5`q^!>=tw^6@YhFOhL%gN}I}2j>llfT;NibX6|iY%TW?PDx(pK z3iBknySxzgWdY<&N+-{X9P>SXx8J9!;N$N`bL zyU#ae6CP^&?JsnQ4GLY5057x&18o96MVi7t@t&=df1*cQ zclrqvZJqqnxozF)C%UwC^1qM$fxP`Y$eR%2za|Bi;{UjSt|p;mY&`N#*|*P@@n%^< z-iSw^xrsm6n)ucHh8JR%7)ZS$2cr6)W+1G9++^0~>1M7;otb5qXg?Qle9uV!eL~Pl zT$$6mO+;%4`TE8_x@*28<=Q{4Z}>I(dv~^da{(-+c>*GutV%Cyz46{Zz9CLLR+)}r z@asmxiRJcz$57GVkxYe@SGm2300mRQC!BL=bo*dd(K|1o)E&Gq6GemRje_jXn0=t*wJ*63=Ms#SBeY*qtOHHzsfIFuA%|HKH#T%ZN@rcgvNH+7=m zy>Sxul@)3RuW}qg1x94Go&I|W50^1AV#yqzcE3orfV`24K2xcWlA6UGu?;-$@t@Bk z+hnCquC@t{FN@1I2a%X5t)WXzeWbHqJNcu}%;Z^M2|*jerFk}mWb_)l{!%+JGt;6c z7bmp;Aw=#Uqoz$rJoc*zeD|JWf|j-5tq1)A0yMZ(YLW`5@!3e=>%#yDJU3M(qE%H` zc=?+PnZ;avn~*CTy3WF92EK5`lqFU1m5=vjCKX2jX$lJo3mpIVh^Wc{T}^x zSYfkxKqx`h1wBhF8Lx;(U;GSIZ8|nORh%+-7isb9&8(N)Zp0i*r2nZ~ZP;9R9Y+o` zqlH9s8dswrIgBkJ>gQ))lwqB#G`7_=ntn`a35v3SwAK`9Rn6#z%Jr}8lU0@}vcGWN zj~whsjM?ir3N^u!Ry1Z23%DXn%w2KHvK>MU)f2D^p*9y7ax|Noy85FGw`cD(Ax$T- zF}@#RRhT>T+j7KqNI@UWPnG(d%GB7PwH%SOs9#-Z-N=Emfj;&(0-P@*49nU-tB!bj zI4@^ZOP14389L_U(UdHe67aTbgxuUcD`QDFNU6PSGj)DodzQw%o)Q^rd~MvRfFx!) zjoSAu*k{U|I-Qi>`@bQz=2YDFG#lVhCpskU!HI3Kxwj64ZWGWSq* zJAK2b6!(HvthBMpXvw@}&l;CgfzdA0i~E^v88~?kPm zW1)il?t<^Yhqc;eibAVP+H;!wXds%~%E@Lt6M z7vhZi5V-X3R4Y8pS$?Vhvl>g|a)DYWO)V&ZQLU@)`~y(jbl)|G!ftKv!^z%>#7H*%aTvz5k*=tA^ufLsAL6_IXEr~baqUCG= z_@*K)Wl9WC?jAhZ7;F+xqPLsfPSYg!+ygNnSWH8yD=I@V@!V?@upt+^uoG- zq^0R#V98NsK4Y`pC(bYKl3h(oXi1S!Kz>_T>)RZnxBRoGSYyqw^q%0QV;$qj<~o;2cZ9{Fzc;tU zilLZfc<`z$c=*G|dl=(E3zM$@oG@+S={YZBdUbtg*SIz0!SQob;+`R+-Lp9Nrh$G( zk35rBF0aZ)=f(n*&5kqvX`R0Z+Lt87N!l*@K=6{51&M)scXS1o<=i`37&szLRZllc zmG26FGbUj-X0y?Xjq)p*K@&)OcsWsJlSk+wz`eGr@VLbi?`Not$&C%Vk7TvTnww?Q z-1nC_&BhRh|EOeS%g^K7WE;vRGfW844+zjx*S^M86g>0OFm>&_Y38nRyI;nF^I?1* zj!2T}HhA{x%pH4#L%+<8pP`vo@MQFv+{V+jv>?BwSROFt0s?31QxY~xTAbL(!H@7F z$lVBAx?3UH7oFVU&f4?)6x`pk$mO9Y=~H{F5}FtH_Pujgdc@I?m4|OFD(tGD2Lz7E z^`VVj?lED-Ey5Jtw2-vBq50!dFl4QT8>>6Vlx5~Q5Fu@35n%rOhH#v=YM*I@LA(?M z$+mrPn9f* zruLUT1rR@)*!AeEMmv1dAdC_wu%&ntS1w+0S~IVV<>>}r<-O)+yZ5Q>G7phr%CO>b zZ>lVjfe14Jfu!$27vh9C`ZuIQO%E5&4w@6;n(-zPSuOVNaNAt^%y8epyM%boAj3;r zGn}WdCw%3j>4)7%9ISCJxxjHAk+KeA@Ywq|{9{!EP*%jNOh9W&!71);(EEaNZ+6UG zKIee?L=dLrdxj`3a~-<37@z2r68_i&ame0(OkQ@SN1u#pvausfYxp*n+banj=d8Pu zJgJ7-x1U);*r^&+>*$Fi&$Q$M19hpZBjbjS=!U;Fq+_~*`T|@_QiwCEL3mug@7fIk z!$bsCU5VD8R&na`f0?MJhqhQIEgSL$jFOZ&?tvm6-evcLgaB>}l+kcun`QDo`a``n zofolg`%NRc?@>}pTwIa*`kWC3^diM0R%qR7Gxr@`Go()=e?qOhQtz86=V8+`p;`_T zAw$pa3G>fRAEGP60^g#*_&f>Z$g^bV;fIJxqjA4eXchNHLO?Qo4|Oq7-c_M+Z6YdC zp1RveJ3)xQb-Z3jIj~LbobS-e{v6r7t;|$QQ}C7*Qk?;#W2tS~5di<|(%68nK(AE2 zgN!z(XsoYC{XpP_%vo?Y1ufE`gs>*_+y$p zFLbcIebS{erH}*!O<$+;VcxsjQw}NB7I;E8b{|K=2$vdy5EK)rzk?=TlJ#M}Ca^ zLZmSwKD>XXIt@8ZO65btSFzbwm50hTj+Zaw6L$X0ya7l7k~q`w2er>cY%W;^yH5I0 z5#4W{8J}6JHBQv$e%37vq{Z11m{m$?)R>!8Iw~{h_O_12&iU@vHat zWL>D#bfoBtu|H@AcAvV~;QDJnjy_?Yc_XQS++3eO65A{cc`KBDip!(4^&M+O-}q!m zP{)(;@j!Ht?Co^H+L&gMEJUM(u6Sd};nSw_*84(Tkr9szUOrk zc%_-@aZi1EXJ1}3PV(jvOQFGc`)3t5Uh^wsjKbC{mfnfvG9654x{J}SAN7g~TeO-6 zwJ)azc_+hk%p?eYE7ggriVno{RGathPs6ag)h5;U&n6%9JiC_WPuRn!*4`H|@K6-` zN|G>Bd`xc9QJAlnCUJw)+0SokeA?t~_dCb2l4@d^(@~6GIKQb=O42#MM}C)m2pxF) zyWU4gHlOCoX+fukQt&%F98&gKP&Bs+Wp<$(t;c(#nCFK{ zto>Zl(j-JIk*VmZ2x`tVxmE*{K^hR8Iw?@Y>gO#$2LCkd_CTdjuz-=KdjLeWFo1rL z-j%K9U{Qr=ZW3>aFvGbb!!!K`x*CEAu8Q;$!c&7TkGVH#IpkI16f*}}sUyVQDE?*P znAqx$noOIHYeolOxgID-t9_r?MuqP>#`ksfsTRpbE}nXzal~NfmC3;S8KT0gsRzf3 zsAJaZ1%xtR8o4#a7IO;)`|`8a2l;QCbo3ca7LL_m6I%U($dnljaI9j;STVaKw`y6C zPafh$1q{e}Leb(@IbjXXanmm~-+B00tVg3W+{7^%h$C9V zof%|1@IfU$hem|3C;ZMYw$MQ?+T)o!=lDc@`CbgT`h^Zy5r>TYVfV}Oec>h(F>gdY z*SBY0FLu4c;s{LErce7ij^4P-be-*!fX4RezAvyUXjVBngM|Yvj|5jteew37y(irJ zPfYWS*n2zHUgOEKhG_Xcq69BFyVGMR7xqz5u|)wac-|rzU|= z)q_`$oQWRL&Fd*}P<-Lx>89XGa_Bd%_$%`_k3NHd@7~8yZ*E4QJ5@$hoO`K8f5c_X zk!^~;s_@WtB)Bub1fn_egpSgRZYh7gI!+LN>%2Hr_gDob!LAO;$uq>HxUFIpSkf80 z=XH9&Pn!>{JNeR8ozzz+?l4I98y1jZ?X6^3DNAt39jyqX>Gr`UOsLP7=9-lW&iR0O z-FJ*G5%n99FrG;ARQWC?#|inb}gGcS9yDAUcr~!NH@2j=ihj%oL|t&EyVbA0$J>qqFyQ0|TJq%07KN zsq4wdm3=V_#XCvkw4mJ0hd2KAnKlCRFGa(2;{N=7wJ!!b)fZ1(E4KRRafwR(b)mK^ z!Gh_xwQx7tw@rd-Gngnr1k-KIuL4oRZ1)A~bETOVk1{(h^;e(P?eAIantm`w%|(fH zQ?dvJG%8=X-YKroQu~N}83N!;!g3)h9njLW(KkApi3#0%_#ow09u4> zv{zLVDtYcO$|o2;oJkqon^NcRU{wwZg3Aj(Bg zredS5N_V!}ZDm}W|ITL4HW7gz#Mvgd2(sz_dV?Q)+t$f!pfCCdPxliE14POuJNL6d z11E0X>4Cx~@wPQ)?|;yETX*`&*KM8r?_`4f*$4+kU+On7a4R!p!Z>I|8*1XQqi6g}yId+j*p8 zAk7&3?f6XiBt6@H-DuTg*S75d6!#Fu6wG~d+@RoZ?THsy>WMs4GtaCTSkDh3R?Rc% z9osK#14Ywceuq8xgHOPlY7l2lscRI{=6S7sZDklkljBIY;|1#Hk{3Xf4}sPRO#n| ze~{~J&V2Qs)cnr_e-ngTPyUOa|9POQel~4mwQhOH&{p_IJ_|Fp!1U;|h)Xk=Xk@y>h8isdq%j4 zeorAJ>Zn>^k#qN_^s#mFT7=n|oY&lJM)`jjmMgEJ6audGPG#e@z{*v6=!UF>S8yERQ9G zn|x9Ja5eI`qLz2HfeTYf8tY3!vvxD9->dhFe`)GGjWVQ3*!_=wj*kJ}l%!lGv&2Yr zP1k&c5aQTXHf~SG1mr>?x2eQjENh|W5(`-w0?#NtLkJzPpZ{n)#t*V_=g)W;ipQK}+nD#P{!Q*~?w_G~k-$E8zmbh6m zLN|Dqz7*Z8d~i3)lh#bz1Lv`2r9GjK)mmLrc-umcy4 z?GJGPc!yTi&W#mHsA%YV7+3r&X%pfmSItRNna^fIhD%5(R&4V8hrgr*3QIx*mWAyL zM=^Ph?uC<|DMXh3{?-M(Qis-)RMrVez_y%UZQwNdaIaDK?Pwz`-LiP`IH)2)Ik==B zrFUx96O{Rx7a#7{$0h8v#svpZ814A=1yqe%MigQyMJ>pe%HUj>Ym!k6V$uxzWEc3= zgOzGco*KlTju#eg+%EpnEcY_zNvF`oeE->D9P2j)?cf`jH|fK-IX%%fAcKT}@ZNUeAP8 zM2^oMG)@{aHa^$S+ABMT`Y`OyD{@vMPjW2tiaIa>Q0iK59I7_`(i3jx71in| z`_W_+wFeV950uLF?~XBp@ZNlK&R{M}z+Cczjslj|`B?xnUnvsd7G3Io{aLZcoFZAm z)+GE`yMzTj|MuE|D;>m{X2cjh1)jF%f(-Dfx-l@R-+CpaUxnA5J*CWg$oPE^wU!7J z#!-Q$;-n7pT*{(rUPXHl9gv`C^E4cp0C+MGzT(yP@j|HS;#bExVwIssC|Fe>lxewt zDb0cs!lDXLe3WP`%)RB)R0JD24taB!6TiMfmE9MX=ktsP&W-htYOjQ^HHf#~V$%A< zJ0Ot=P4|k8tB}@IWNBT`$&aEWntdEqz(v$UXsDot!;nH-W2+C8vc_MDo43n3caSlw z<0QhO;S-!f)ZSwhrI22XFw3>*4!RCApd^FM?^F@;P+xs9sk-Xif!+AEj}!eET=LTU(-1FJpOJ`qxzh1BPM+U&z^X$jjeH}H$QJ7(HL z99j485SkmAzKKJ3Qel0cMlQs#+JI^&LB;;b_FY_-=n7n+JO1dIY5i8iphyb}0W@rF z=DWbQ60H(&Xc8;gvFTO+=6nG5x!kGY93AWWwWthGCP@_0z(Sw-ba$PMO#^uv=F?Ik zYPfwPRqa^B5veK3`h!wn2{^!Qx_}Q;@t&KG&Jc2s8VaOinW9FiYUcLv;)SzXk-}GY zIfsXlqSl>yHQGAvULLB7W$Qv@$yW5W6~GhbFyFNt^T?*CQT!G|T#%OY1P|c7K4Qvr zT4>KFwu^y_2iE3OoS_t3GL&=S0oS>t050B?1X28OzXGgx4;AiQ=+u6jz>qS_DTP&> z>Uu1G{jJlmt7_|>6Dn28yJ{4IL^DOWJ;*^dHhy;u>F|9ERa`RXK9UEV*?Nx(_ns9g zBJ1nu`kgEc#hOLjO+4z&Mx>^deUzYpD^wh@Pxe5nn*5A4C%HvWalI?Qxli6i;;IVW7P863xnY^{jh|J=AQ=HA5YdjZu97?+i$I@ zTKRi=j7wS~^oYgGn~cvHI}jP1{qBB67!r#f-D+4&PE%lv$xNId5UiCh_7ITwE^kN` z%90gA!3QTe3Ae>cdSvB`*|+-sC>G>`cTV_E+>-U_{GDNeUpKOeB{_^0k4sqvKvn(d zXmXmUhQ@y>V$|v0FjI{^A=P;I_$mp5pk=& zdjX|c0m1b=Mu#vUjYX0S60m(Yuqs2Yq8m3?mOk3&FtZwZBFuwO)M)ZrteeK_a#UuE z8Z$t4j<3sujJ!*8V~3#4VmEORW10dSLZxQHrZ9{xKyf9O_XN?+a_k^T@3m!SsB)HY zQx4*dB)+7@8Sv$UVtmn`jF2j1W{h5xIwp(&4`-@%&x|hqs5+;kk(U(7=_q zY^Xq=u2&km5z>)sjecrXn7KCLy8MS*ipH_JD1o%4%Ww7riJdB7s^NwXyY7k!J& zoG6iTV4rVv4>rQ~x<`!!t}e~82iT~!MD6jZuJBHyKEP%rVClIx%+s%%iXnQ&efe1p z%G|AMf_K%{5_0+PJEeSkYopi>C-~fb8^n6BP!X)Fclko!C%WtaP}s;vdwp^B3~Z$k zO8mqs-t+=4VB)yDSff0bJG3`0<77WY@*{r*8fA&EJS5G(6#h8_D-hu_f5lLEx>wfg zcoXiSt_7qy@%J{+eqRSY3$SNHDe9BP3&3!0X?A9 z`W{P;_5#}jkeLtsZ{(q4?GAAA#$=u9Hg9d;=6Y=42glBHo zAK&d{KzkBf!tmJ`u)+ln_gzzTzS>~}MbtG%fv)LOa?pFQ)xu@Ovpp!>%7+E-8a{a! zVI%FwEh#%7vSC&zJ8X?OwnkexPG~18=X!rq+2H&W+%BrG6~)GyV{P8zh(dmgmKAf> z#$8r?`6lW<8OLL1JJ=_hr4MOJ^+L)ks}*$(wjb2MDs_5Ae`lqID(?DC7;!|ZaymsI zB%e^WJ58ZIyXhe@Nsp?Y3Zxq%)nvsF^TXUl=6l`AfojPlZU@pr)52$P6I-WS-Bcus4TAweat=+UL zcBk~GNxHV%hvlJs^vSs7$D6h!)C%iFWFs z6@jp1rlsshZ{ezz3b*<~rwN()NU!(x#fLSzPx)a~VzK(GQV?=#vg`kd?pRcMgMmbA z8O4^UMRsTCM*SE(go1{bZ9%3V*%irI!hHET`erW4&!_-iz24gLw(FG=6x0Wb?)1=l z~?yT$m8>wl2eQ+N3oZ zD{$iSH{+}BKe})mnl8utF2=gfbnTMt2melDrfVxq!i+nEv9U4E?mAoG`T_6qj zgC4er19f^%LC-;BU}{pG1{Xr`6c?e#Q8WJV%YJxfi+{_e%;nECJ>9CI>L)=1yn@*_ znIbXuzcu?@&Y#I2y^zR;1-#Fl>+>-fWL$h-;XA`#A_Z*`Gu}0T$rCl26pIgD$2 z$9|5wTvk}GYg;TKyW2;Aw*CfD;Ff|0Vz=^$`1)(GIUZx6qnn}awK4GSAR=+@70l!F zB|SMN=+xz5Djm*x`=yPQ4|G&r0~~n*e@O810@*1GM&ST6s-2#NnVXvQ^bf$UJ&5l> zaqK<%>ch6=>p)bcJG$d-A7sejS*HJ?bZUJ3$H3N0k@Z~U#VQF@0LhKqLtB@%nh>53 zVavz7YE{W0KK->hJq5}eQlTLndxFlHX7L&)9aefV;Mk-}N)obPck5TQ_&F`?p$1bz z=obq)hgR5QBePs^E+K`vh%6#xCTxmd;#x5@)+@E3ebg)O_a57|@0>q;@%#_fPKe-f zPTi;zGjRK-Z+$xLg`*FLxJL<@<=H5EpFUTAx{=p5?z<=DYT?}m?siv!-`McbLcUfR zp~M$7HXf0&Z0t!Df=Cwvacqukf(&zfYloS~BYCvrk0v#cxV1Bu?P z7Rq{f00+LHQ5!3R$eGx9)E0wP9(2ANi?^!AH?JVHHTvV zi5Un`h$_X~oBdfba|}oZ0I+!mG*P!+A>HYYyWnp<64oX4>Xc<~&cH%AOhN5OP?p?2 zf%BXzTW$^A=u+Wzn^8drvLxB++fBu-Ms#Et*AdC@?4S$)8Bul3yB0-G(IBow9#<6w zapg{3q@{pv4v0X*H}1atLp)-z=Y)hSF=sq2bX_0O838)Cvl`1iKZK#$W)waWODmHS zsJxjXiw(~8yc8p)(S-(zJ*dSlDKGTEU%>bj1+%q{V#kN+?iWVLqK= zHLYo#=EmUzDwyX50+$W~`KFH)K6rUfm0@mY+Mp7P2wDui7l;pgEvn1k>-Z)eF*S|* z5|;4?PwJ)iH?`NX&+>Ox8sk^Ll(%3vK#iSZzrI9L83D_PUsc_!7meU{OX2h-J<-K3 zXXf@=L1Ij{znK=hW@u(o(xO#8=(_*%udt1Zsc&6cqX9tg&x-XVau|iUBE2`^47%RZ zI*YCCsg{a}%Hu?*W-uhxdNmYJ3v5jKaVu6@nv_wH6n zA^RAzMPzZOCQ?NNPbdd|1rrc%U=YYiL4zj$zp|o<_Rn~h;3UAXxCKt0+XHq^T2UH) zs~^jqC0M<>%*c%rYJ~La&CS*cSPSSrpWiOtt_c@LphXX4x_om_1X4dL6ESZjhI@0{ zfqLaK5ufp>kS?_L*cvG2Xi*$_$G$gr_87_C<9%C?R7|sEYf|uDIx5_RK%q}sW&CT9 z6wXB?bNk1YBG=F~n5amTxt_(hfLW464k=^E){mpAbI8~z5wu|pu32ej<(!9*WQy09 zsA&Y_Cu8fI@=I?+aiIG3qA`-?W?<}@+pF#uGWhf?TYRT&+y`$RUkP(A4zB%(sB2D= zNPw!qNQ=iYlxz5FcPIfpiw3zqR^R#wKi@5bIk2>T!mS}j1dsx; z&ku1jLslZ3zpk91yPB8koh@{sUZlE73bRvBmwsz%5Z$)46m}wP0L-5^d%4BD@NqE$OqkIZ(PH;(YQi*%GN%RmHTeN!Q?~_Z)el#2E z$kXV3F`I*o6+Do9orm*@+Hs|-)AA9BFspPQdB{MP7#<*pd#U{=N+-V0NeL*U_rj;N z=?cpID#cwQdlbG#dx1SRu$0cw!JA+_HoHi&9HV1zhOW%W#H!A|Cp>%-1<ZwiX{@o_784K|zwNU@EgN6%0T zIoRO%4~ZSy`9Dgq59wj8AKNEQqqEoT?!CJ4J8XBv|?4{<_@%%J}ugcgS$ezV>&dF<@eiACsKWxsk4nup3VA7 zon`*B+_zBjvDbyC7nW{<+9{OeXp@DX#FMMnN9?UBn(^D1gp##Po#7|&vb;2RX+9iQ z`g`>+meJkVwZR7Zl}8d10i@mg1ZGGw@{QD9piwOYeW3*S`L9%Os?8@h_yM{%*&;uB zp}pb9pzXQ|6p5>Cl5kHnOyLzbCqUUur7cHrenV9QFK-E9dwzNN*a9NQE+%+h0C9|W z{jdOy`Lyt?ZX`cJeoe5!+O~_8NTjL25IVHCV$EICs^3{C=si6>H1cg~23un)+NB zvwUt$W^mdZXs{SHCOk-!BzG2DRmvv*n#xXaiX14x$_z2n62&~#8N?MNvBzSwLNi^) zAS7mcN8i}&KVPO?hAUPTOIXTpVfSsZfr6{uXwS5URJC*t#EW*N6?cGm6YIDEc0~r| z#rGaa8asAge|LE^b|z_rzP8;Z0cp@y(bM5th`#F9ciIb^bXq>o?}#iXSsgNV1lnSV z_LqI5g*OyH&b2(UqLhecdPNV5+TRb0zgH{q@rH57?UN8{XPE@H4u^L|36mU#;sMYw z-BN@@N6?+BlN5~KsB_Q7%3I^3}cdRif{*_sKb4PkhxeP z%L5K{F?%Z}=U>3RGTT*km8CF?%O{4M+82giucz8=UE7$M$eD;X`I9JNE*I zrg1o4?_>4jy#|=9`g6hlgI|A3H%dHn5@fVs8?f#brQ2`KudNS9$mD>43=V6>oi%Z~kO zZfrEzrQe2BVH;Tk-Kc=2ln_~$2|J#sdmBy0!y^t$c;HqRb*VOQvCQ>IGStZ9+QBmQ z+hh#q6N#qn!r7KLv^8D8K4J)XUfE*#CtcHWu+(Z#MC}C{YzqJaWmfO#zru>#@1Hf= z*ogI+QTH3qzgW%I2)`Kt9b?I^p6@58t=D>w5d!mHc3NdmeIqjxTo3j-vc;Jd9_c$< zVAhL3#~a^Oe7o8gdM7t*0Ocd>=o2->c4C|BjGuF$AKuI?j=}Hn8EuX~eJB65zRuGy z7*#Czv2nyl1-C-7x!j8^u(b*<&u)Z|z7`{k4AdaDV$M4xA5~j)rWB$pr0j94<0B(! zmULbAW^8QE;eq6K!L+VEz^=>!P*$!Z%h(w6q`u>=J`Z)|*)c_rE1(p-^mtD`JHVU9 z4TP}V*%4k{^IS3jN)FZGl$!irRNNGKtft}>XF0d7PA_@f(rZB%LvZssY&iOnA^j(u zkI8#d)hM;}nL@(TCItbxWv<;u62Aj%6eiv6q^0S-R4;~gWj?Zcec>Xd_YoH_$NHaP z72gXC0m=NrH4nBJ>buGSpow+1&tb}}PwIM|)@9w52ImIu7zZ=6SQ6Mj1!lCmHosA- zF_UXG<3foWX-gUfZA2p7#ZEj*jS8LD9U4`-7OApciGCzNY~?cJW1eViw5vKO_ZOMTE4g zib{Q&exbs3Oa#+y1#kI}F>be5trx!4l=61gr|=DytjV*TB+rcAsa237Ies6k%-YDM zr4~cXBULbfd_|bt%h1oiecwQLmjp*BAi+J^*%bH#t^SHV*d2^t8=Sa7N1qLv6*fr@ z4J*qw+CiDW&FK~u6->Ps5}`H`dUWxZtv*%06$MNhP^Zt+Zf|mxluYdwwTmWiqR#VPbX$JrI~y zrimSXCIRY-Cm2*o-Lo&J8x2!-ulFB!c^mcexv2MH&g!0YYXN%1nE-+h^@@_NVdD67 zsP?1F#F^Q=lf#AG%|MRKF{}vGVlMBk0$4~MIw#q{qE`5`N%+yFDm-X z4dtdUzCjqU9nkmK)aa`_e2_JRt+th9SeGr^u0ZNetUD-#eO7SwjaC)D!X2=6sBn`N zWgy`O4V+W(gQF@TB}wiAp`KXVzQu<9nP0zgW|fjbYbr6x|JthRp2N40n|%IZ*)G?gP}=T^WoAHQTJx!5w_dp(WNPoExS)SO#IHJ;ZBbrNw-pfc&TB(P29eHh^Ir%KLH2h!A_kzJ^*k@XNB)RQ0Nq@M|Ni#rqpCF4I;wB zB+$nfalgWj$pg2{&DpP%9t?F;Q)Nq!J-n(n@-X2)R#y?&i-8v+gI zH6?x`_=(4-xDxn5RSsVEp>eTMgIAZ!b1mVx-^bO-o6Vs$K8N$>)zDYRiu!hl;fuju zqJHtCqdBD4(LJDxh>BzP?)-OI*xe#Miw`%%2Pk5Z=W+cR?wn^l%)Sd4=|SV2`%c=eF8&6L1(c7znmzB^N1E4;tExr> z*E^SI)Tz6Jb`$}0mz`yPATnvL@#8JE^&WHLnyt`5&SQ$k7M7NMh`Pw)d=K# zVDKkr&^FWYRvWbArm~v<7klR!)>PWI>vzUc2N4)S!GbUfh^T;wlu*Y8NL5jgE?r6p zBp@XOMnMq~7@9~4hy|oZS|F4}y0oBz1PHwa2qg3Xq3maYGw;0L-ruqJuf2cl`8RVM zCakPyt*6}gb)DDyj8t#^=HeMX9wFcmmfdK*mhQcEBK;1vmg2>#bQmq=7~ilEHeuyM zFYf;&rHhF=nRf0u?|Q=7uKWJ!x{RX_zudiM!v82=^+hR=@_Zl3f-@d94sby$v$Yhj zJvGNdwN|RbVp=V{OHi?fk#O0={oseXMJ~&>nhuQpM2Z>MvfsgJ{VxXKEvQNH?F}5? zhX>_;Xx?BmNzmy&{7&qc8az_02^A_%&U~yhJ00FpE#|WfY!)_lJB@~ai|n@rvW;3% zH~3F+Y_=Sleko%M=FK3%N=LMy%-8JGTc99B{t}9BIAdAiGkkmff1_#xH(>|>sb|X&xZN5j^%5@FgF~Z+ z-=Q&3?lTqE^auLG|FbVqfcK2DD-O9XjJNWEGLW>f_J^^gLM$7K5F4!L)t>B9XYvs?e6Y< z2hVS@2ex>{NB;BorJ_#o<^err(fuzcLwQseZwz1c|BIirEz-1$)l&4Gx1-afzL$Ee zzh&J4iwKR3O;S8`WK`=CxP?J`dGwQ?N&n}?k{<&w-GBMAw~uvxRr}wH4gV2U|06Zu zmYe+}!2f@1OMnP({;lB-GigkjCjl#Ch;d5bhQOhf5#cRI;@T>Jy$Psq(Tf7pek83e zS%n6oeme(bBX!}yht3$wmg{dml35&_Y`ZN^+TuKPIxv1~nH6~RVPNtyX-oF~)u*aJ zX#ONi>u$k(u|sW-!TBdrUEWQ8qKDE+R$KE3y1_k}4b|lut4?+4mz!#=&CpsB?AI4S zMLw)ieKs%4xl?G3+je67jk;+hWbc?YCYf#j$1$~mEM6O)zoXxF1jsk`{*C_z$-*rz zPebUSzfW7x(gC!Be|Lhs*V(?(J%7_3Lmxi=U-ax=9Qh&J;HG~&Mxa$>*MHhQ(xgr- z{~e44eG`~xy#Up8C({7@4|KI+wL<^Jp#tsixWQWVr#%i@0Lp^9Kwt;_FXGtu5C8C= zdi@*mzo7q%wE+6C5z!GB_DwH zw<93-m(oV0Tc*Ox1=RuKYlm(pq`LaJyO?&RVY@@SEyqW<+>KvRbtZVBfs*XMoxg4* z=&_%B^mFE4$3e)3Sgpe!fS(xH7rquJ0mV1;{|~Tcd?bEaf^fiU&9>5?gWGB$ztO`( zh~^SiqQ$W-l!pF5JZLAzPb*oUlDzBqL{+_gF>uOWc7@ z&E7%E)Fs7tRZe#K@ub6W5zJvD&mgBT|NNV4)sgC3Rp(6e`zuzOTv8fHo8Lgow;m;` zGPf#GIGFnm0yMm=L*UL^1uO)4Nu~Tb+r|0n#XJDSKeffLyxWAoJr&DO63LVy5ObrT zNn8i;C}2|H*aTQI-4qpHEUpezug!MAc4~zRegmmzI-T!tgYBSM9GRzCZEWl&yOdNc z7k^NC5~IFP&TLt5ysU?-rF4=rmlb81LXUDEd$DVLNG%BHle3Z_YQS1?DPV}SBf9Pu zvTF@G*;>+&5}Y=h;jl8{s2kSlop2%E%6h|8P;oiVYhY1cNMvY@E2(8kq4z$<>X>|4 z&2+!_=Q1mmpTDmgX9|5DXOYn!o4XTQ{y4OEdxTyN0=&*e0@7l_UlPJ3ei!P4>fM6Z zFHBc-iPx!ZQ8&8Qj_Kmz(xA4j`R;%g0AN56ykK3%!km`tUmASl(5g8AYGOSp%~9?% zWRXsmG7P8<$gXR%vUJS55b&>)H|u}=?n_8k!lc8t);V*SnJ&O3)jEF%2(VlPzAvh` zM?c&N(Hqa{26RIA$@v|b`D`V2ZzXBPtiPMQbCG0=*%z$h!?KJcJL-BL9rj0wl^IPE z;J5&KA#~%TL41keP~g}%N}@bgNM=_`ifanmHUZT$CtLX9C3htE#dmq{ConkPy(+SG zpc}Dl9swdggy;JQIHk$(qa|fjmAPI!S=%b-h4BKe@k{~7M6)S{dCc)V;<5(xtBy$U zT6%t|8?QGl)}shj>H#-VB^7iEjU#@I)#)o`S{IH>wuT4;MAqt%Kp~110CZqw{qh`- z_wooA_eVPXxmI6Oy8wU363{EAngZX?HXpIwl11dN(NMSD@e;6k1g9~QMmRKO=hs_$ zSR7}%d_inJ`1dOVtP|t`|4-+7mP5Ug=vh1l34VIaMWU6(1K-Cl1@(NuW5_F}lk|np zN~Sy_0D(Oq#;@JsMdB~)Hw9D_JQ$+hQu-@j-?d{7%_&#L!-TiS`lPBpV<3mYLL_O+ z_RO8U;mSafl*3p%4XIE_>|T*TW!{gr(!fIfx= z(qnp3^h5>mYfn*=|1w#u!KX`58l2KfexG^!zq zbV^z$6 ziDT^@Ceu@2)2)Bpt)b2Z7{aijPT|4o8_V&&pm#wwPz9#JX%$u&^yv2WcU7=A`5hg) zo8-nfBe2Iy7+u&=wTlpWj_g7x8ny47$J3 zfScUv+}qg=qH4ZxO@9b7P)@lR2%uPrDRj>Gx19oxl8Z!cfa3v=1+c*YHkz8+y+2s} z;pjCO{kcn<<-;IOOCJxA3X#3@Q~}&0xX&{7&K>i_1JoRU7g^MnA3t!306pl(ENp#PqnR{h`=h27 zakj|?7y{k~x2pIqz}i>^U+tr@kfc)jdDBU8>9&_}tg2xMI$2l2)GSn(E+Jn7_}53k|29kH22LoO?THTPmvX&OaAqYcuq!1Q(UQPyZQ{v> z%bGsk-n-Y{5sh^}wKd^M;-E>Hw}jexhx>FF9an0+oz>pO;aD(JaE9WG^5(`=`2{A% zevX&Zl%lPUOIs>6E~jO4CnUm!=PlbG`*J2mw6*GA@W!Xh^SXg@vc&Dur!4=x9P@9_ z{nmUJrlw0;f*WNkMCL}7E>g?`V-!lP8-Ac~I3WQVwStD$EHt`~7gk)&b?Q}dpMt~~ zFivbR8Ji0l5%X1I=;dG$XW$-1$l@xLnnNE+6IH(5&NOW$s0IQCj{oD}H)lZmmtpQw z_W)cZWssO>EutaTz1bMr*6KwW7;NqBoPDtHtwjA)g0dOFVkW7|ccP`{E43oty2uT} zP;Kus(?Lpt@<5@TSw1pOFTKSidFut!3D5qTp5ri0lwVCbR<==f9RbtSFO#Ln!-eAx z_568)c{vzkIPNnL*<_rz4XdsGX>Y{7^$(>Cc=)z&4bnIxojqn9={V9>w-W7)x{tS` zj-^qP(wjTmCp!&bIe>zpc!eEv}8(Du2q zaU#mSF^?~O;N|sGw8sK4=9Tdlla^Z8+1x^4{#!}vM$J1KW=$U=Q!WcCPu`)M@0b>b zL3oQFGbtHNp&UkJXAkKmk$|}+Rwpcc=WsoIiWh%u=@~2BIPco&uphd{YoR!f!fvF+ z!7uF|Z{C?3f5woxgeI7PBdQEyPAcKZM5~JePS^Fodoiw-s_DlnAwPvSb#~1ueMxa5 z`+zPDBH}P%r2PAjQ2@QV4g@JcwZx$r*hu$d)Hz*W4u6{&8VqNaHK6lwZ7Gh}13`h+ z5^7z=NdPzp0;*Uk3mH}cBfY1DYaih@ZUGl!p~D6m`q&WIkW8ADdiv5_rfa{(Ru%AE0WBjpU`=(#{!472#)IbXu$=2YT#c+<{})QWN^qlFWZ zae4Yhi4rFG3fR`8VByZE1zB?|3h`zvJ)Y3u%{M=X9Xva9&c`Mx4D5!$BwMM8$o3*HrOR7XAig4d^5K8xN;M7| zhK~6?LxvX&Qlds)1IMh4Yz|xFt9z6?CCvc)7|5|LEb|_rZl}S?#ml)|6_n3&Rhl`n zQJve0VC~LK;nMV;egon~;gpqn2%ub^>)GAh+S6K|yZtpc^kpBU8ZPkX0?J=OlLu&9 z8(tDOz`9|>iWnn=9*0{6S3`BM8+LQNt4mR~sfjLmT;|%pKK+f>RX;+5O#-Naz3cPKh?iUVm^eH$em}KQ=2PdK-ZuSl5+fmTshHYvC>?}h_0xU*hq&oEt?p4D zAQk_@QNX(d+Qov``N~_k6c9Nc%4fA{?+$}oy)F0<5y_zM99=h zOHTF~$MC~j0522)u%#v)_MkaLe%3&tRCM=xqK>x{TzCqBQl0ElG{fgys@0XXgzaDG zA)ra1Yd$5HH#c4~(+jA^Mn*b9!2f_c$}Bp9GjXh*+%}uQASWk6ozOW5GJ|v(y&Pf7 zPYb5ZOX+7eXPbq|PK~?v@_6;*k`S4;&J@BoChK zT+)$JPaIHg=$i2IS47eId!&+n$x800Z~D5iMlXWScOR=2#C|0oPw}mx8U*-wP$!xB zG^;-vSBuTfZW=k`X9UO%b` zTRtu0UBx%2n`b@2pgSjB*ALy#cvgqBz)cJ6*}NS8DWNCMz&`C?p$8enyuJ(^ew$jW z6DL9M&B&QbO1#_oT5r&^3&^xS01W8R*vEZYNUkeXaBDUo#kz#1b{X3uSvj&Xd4iYp-pn-a7y`@C>TUte=v;kjRR zc2D#0ulNM_GIlw5ePmZMN`ae$J>a`Au7ku>kNTq+KO)qVc=hdf+f#{!-tIL|?Zi|| z;ONijeEf>M*&PULDF11D`*`7&Xwm92!=QDLmYz_td!H{0rBmZTc5cA0-cP&<{8!;< zz0F4tSz8Qlo`h}hw4QV*gjgK+kK>}NGA6KGe*Q` zg!Jpm(pP~No&kWYXg8#1GW26TZ9|_y0C=U5GY|EvlLf29*|jHm$;l-bfd>K*rgInM zsowq{VQV*R(gZ_Gw^k?+*s>KMEcGIiVOVy|n|*Nho}^o%<}9~CmTn0{6-*j?Qr*sF zAE!Dz>MQcfC_Ma>{ZO1gY8FU-#k#FxQyD$L`Vdl~>8CO!_}FK3;`+iy8%Y@`pO?qY znYYI~;XS6?)(0Q9$ip`-NsB<;&gVLtS$=0q4kRd^)!f}YUALPz!aW>B?%giA9=$Q9 zRb6OuFzegLIr2s+E==pN6?BIFB9cZw#++8&@OG185^eH*u4_CGJ}W*OJfB18U;#V6K!0aE;K; znT)97^m)9X-@qAf2eJtgaJO=wfa&Q;c#^W_v|fT-TQ^NtWT+eX?T7kZVhywVXwqAx zb!;x+_*#_M8x&mf{8~f-_x$c*hE@xZOBRmo;_TD)9lHbGN;bFP@;H;()eS}x5C!+1 zB>Zb3OxnO}8P=}lBd=Y7N?kB))4HM5wu%TvXTsK`lX)+XZHhi=%523Sp@1*@#zpmM zTX=Xq3}|JWQk}dlrNZwedBnAQj49=aow0;F<1wGR#rd;OlxG+=qX_T%M1V|B=fvA_ z&&eC?)&YjOWAVhHxC*uiZ#@)nWB1mD_etTMkdR8IaVu5*9>7E#nMnk?3c>S_!+y%U zLoM=BE@ZV^m_W`7P-=>R0!c<)X1$5y0kX_z?IvHMbgF z6xHWg*EWCL-I}hdnZth937vy4*lU-*pswYz!GbSwt-4`tiItL(e9HxF_3|Vq$Ob-} z*FX-ys}mfW^$?K0(57JB``k(Ego1eGNUVC`4>YY$P@)4sDsaTFCEz+p>Y?0C1QvXHz1rQS9f@rxT{$US7+eFu8Sib#< z)4rCxi@lT^X%0aLjgv}z$0SRXHI_PF+z6UhRdS+73bf5T)I7$_!ysXKM{xq z5@$kmzYKF|PK1>xklLqVtA$?j{M&kQxs{`!aV=a-F5Q!w;t${qok#xM9sJ5GwLqL~ zQm+RiaMWM*GC?C1t`RH3cotjv)vnnqs>BqTbJBX&i?Aly#ppf7xm!!{59%mPTXBD% zbn=17;7b%v|58UQX!(ao#^VHmT-nz-za~ou{(!o+`fNPLkW_z>mtRL^ckyp&)&1}{ zso2pGLLydgOWI2=wekuCd#VXxe+{NSaC8d1y}w{ zta%@W=b)s@tJS~PSiKyp5U>e1^#Uiai*K%0 z7b{1;w28MIucR!wAsim0t~4qgJG;FA_!*R=D_L&+)pdZhI>)zb%V>fU`0CG^0@qU1 z2g)&~?||pnqg4rum=6^CG2Iug92p_cByvklaE1e-vf$zX4tlJ|ysD&RdMk%zI(l%l zVccyF&I6FcLyHOmzHPK|r6us_R%M(DL&DXl$z3^17UY&;Ii@lx;*RKk;<(L>U&$)| z`a!RfJAmduyFE2Hlj&94MN4L%HXhQZFW&=eFsP@^sHR$C)=VAEgVakuWfrT0IASmT z%L%o$pqMjbUSi-Dw*76d1?49Dr83aH0h)ta6=)`NqPAub1J^zs7Z)*>2+$sl$*WPhOqn-S?DIE5kIdtAD?v;OoV`kO_=|a++EHW=jcDd2$DwN z8AuGP;=I&Ye2DXfu40d2#Z1~m;)d(8RZ;V3Q_R5V*#`;Ui5H^It8M%C56?ybU;MzE zqj+aWXhs4m58!DdEEWWl4C;E85DLPA6js^xK&(}VJ#4)@*`l0S(V=wqi?05NmjM=qhy*i<3yRQ2d7$Lg z$6Z-D6Cay-Zx0ltc?t%I*7iR{a7t<2-7n^XcXePzwdJCcSVA89tzM}m(+hLPq`Qt* zAcjS@edwWEKOBr};ehFwD(!Q7rhz{z!|Cfu_Zb2;3GeB~nZiM<=i!EN7Jk|^v`Ru? zIe>NWu>U1{n4|(hWUAj6QP`La3sW#7UcN?8Rs>81$MG?V_6n-aA&%4l`JvNFbK$TB z3{`a5!lLp?31;^50pg6jm|xjKo|yDm`%dBl(Mzo(_N4WQX6h`%Fk&Ab6wh{r4mQLa zy1`MycZ&I{ZD(2Kcdu1DmlV&Zpe9(ngo-P(&JEt&zEvfv9a*6n63tOx8StysE5V>i z0xG^z1FXkO%QqbJ`mKS^eX2rW`0c7oF6zsod1Nq?q2{3dLW*A5AVO<1vX6g% z&L<2=CK?A`_&I#w#zxyX$Q^(QyFt~XRxwu0XW-GkV4}Zeo7LG`jL{sg0~`6&Ve4|s zSAtwVPL0hK37PEr&gC`!0Kyuk->R53BH#Rv`O zVufU{ybu@M5O4bO86;^xz&Emvd*@lwh(xwWrAuuydD+DvaDbDjY83T(V<=4U9lU&b z@QSqLT*TlDCJCF86l161s_s{|w93aZ0@ym10NHYPaFO<8uduL_Lr#xdr~e{6dL|5y z-6X24`J!$zK5D-?lx>k@C#9+qFEUimqb_GJ#k20d`D~wY!n!0b`qkCyeuSH+QMD)OhcKk-B^d9G*J)_8puKBoYh65*kv&us zgq^_$zG?uc-k$tO*1G*;d+Kv@^_#=5l*Kid-+J~GyIedu^!m=RiZ>kBOggUUlPVeY ziRn;w1GT`u&F#r^zosN7j4_&yDFzj+ZmF=Kp9X}o!MpbsjEqxK6dlkfIQj_IZ&LHg zyU%9G7*}K!$Vx|^2#W`qy}*C>tRO3&tx_upLn>t`_xc#>0TbWTYbI$vpa1yytaT(M zK&o-$lSBo@(z~+oC>0QfR`6i#o2HuPOL>!Hl(BaMUek&Cs~k)yQye8fRy4A=`~hk? zj>o$Z`)CHZRKatLBuG<|Gqya=P^Qm(-q8(uPA5)t{LWytO$%5ikD?>Y!){7t|BBPoH{|Zu5Pfd2!!}E+ zkf-$eE+l1VSvfj)B+hT#p-ZX!;{lq+A;_WQcGi%R98??W5LJgoL)Stf)9;Pxh z3;(M641Bvxy1**Z5kG0z>$j<6PW#$%V~V%QBy1APXm|{hgDBjoXZbQ zxOP9TkFxnTxlj5>pazGMi1zuP)v!GeI;I z<<$uI2?LV*1H6R^Aq$m=_+X%_Ue|pp<6?Qqag|kzX!g@0xAhpqM+Tq?0+hBz><`SP zX0~+_-`K&K>?~3L()(Sd1Q-NPhb4h3+REt5jAX?LwfK8uHO&yrZgUr3b?IsKx&4)q zCWsafyd2_7Q;sP+q7C@;6se;b84P$PVb5UqN;@JDCR{!00bypq9zTP#Jmb-5OG2~j zKruzh(NOJET;=*cN2|~@u$_00x^bCUtIYDsuMItECXgF$+&V+)Z}BWGnyP_Z3^!mzp4cZ!M=q^JMURmTMuj5-7S8% z{$;E9+DqL=P$kOB8}y32NohM09mwg1q7?>ie(=50_r!tB20>ggLcAO?+M>@>u2xT+ zcAWZ@a2n5QTLL|uR$O2`g^v7Q<_BE*vD*rv59fI%>1GeyjV>@&dR=8*zgat?!x~cO zh$aJHY_Ebd-hF;=w6p6M=A@aXF}=Be($p=aI|GnqFT%8P0za|NqP-I|3KHuEBLs6l-I z((Wk#ZWSo9B2@hbgJ7s9o9Vxj;*5p3-dbk*#s$!i_C)-3j7&}$bO zG53uk{Bw*(lxSYPw@LyEI7{BzIfKt~9@4(6YG$+mCRtEY;A?c1cS)IzY<+z#%(vv4 zGH-f@*HkCUa$q9gz^tSkw0hF*27DjZIW+a%1r5!3!JjvXqo%KizmbUrzelZ?-7HB1 zIe+)dZ{kFKRrBI4<9_+d8Fz-ZzJ|{W%3%Rp*`Gpz=i+Tkk9CF{pO?9ZYK+M7reyC_ zNy(W&o%dCayz!|xf3(8}gjm%NCkB7HXcToL;#3P(eCW{-avvxriP?JTRclGyEUNcX zP{Iv?c6)1-bReGmSe7?xcE!;O)^^49OF=R4#_#;Y?4wjSfidxF4rZ$_QA8RJ1mtC$ z-cP+4oPmVjZ`c;QUoQgt<(*RnB#-;9-`ml#g>O4NKAG_0T8%!~fl8g?h$`;`pK&f) zk7FK#PMCV3TtThIUOo<(_yDUj@(e;VVXrrWQ~@}HBiXb&3$ENw9NA?w(2XA`Be{rz zg-@)xf_4FH7lHl19xa*!P;s>Tmwaa+&l5t@f^lvIN$HzOuG1eaE(TaB;Jg6BfBJ)o z8H^;ux0fti0_rPRw$XR|?h@B5h%9RC`Pu)NMZ1V0X;vos)cl{wvSJYM3=QT>$ zrqsn$EFzNc5`TrVJoY-E=+9b2D@ zYb0f)96@wemVa4arl@!NCV`9g-6x)-^zy7h0DakgIid2)s%$TPi3@x*>!i|+k_u>k zzTv5wR6Lc|xOJOYr*B zsJ~k0|I=OlJ9GH|nrsZpB>%Vn=O0z4_%`zS`>gSg6!x!o<{$m=Kd|4~38(0P-k9wdX;0U~^(Uah;!O<4}z zAp`gJ_qmUv3u*0EHy`dLt+e_-SSvhK-;WDeqpr^}x6*#xQpufR`siArmqmFWN!`9y zO-E$ehl@QuauXOM1J_5kTJrcvuYP0&EN3sd1W&p&kgs0O1^M)#nxCUNVzUPGqC0kq z?3dZVY>f!#Xdbp%V}FSk{FYf~sJ;c?P!?*ZX!-%^J}CNMefQUvB;>zX3ZPjZ@bG~o z|3DnrU+pM}kAr{ycOw+EojI}NPq=;N&)Eum>_064;IrV{p_uubnSTk>fvE29DE@6D z0`GRv_CFW(&z<_`nfm|a?Yofx5IqWfr`p`j3nzEc_oqkF@zVfdy%UoOK=r=^Sdg5o zCAxhMAV|Mw696`Ll6XDIMuT*|rc4#mjjsc@a{_?YJlNDdXGW;smd*c%?E~83{;?BB zkAj3K=o8w>1+DkieA+g!01bcc5zs;p0!90T|8W3ebq&~%-2iM!>qp8F8+OR?vd^og zvBcMpc0}qb$Z10++mKq$$cM5LK)k!3sB1q{!mx1WN=l!Vg)LTxM^;U%zpi zcGQY&dS6Xsg0eEpeUn7_G&kPr%`#zKM}U6>B;=omcR8`9P%DmiVQ)KcH>e$&HImDXi@3J zk$l&>IUE-77G$0q@C87&fqY?2uBbK)_|bXPGKx~rr0rPNH!SW7P?5KSG}xWH`Y3RH zLpVl6Q=jH?lb{Vc)Y~Na&T@1G(;z&tWLraP{b+uuYs2*U5!Ul<3&&D)l!$daP2eSD zBVZSvfCO@YHwXnI{K3(#mmmEO!z^iSmcre8qNZNsCpYTfOzEuJ0kbvTy|ZxPgpQ1bu$%|*?hL7 zelGS4cS*i`xGeQSC=@o6b^*%T_LYWeyb*!y5U)LNm|t*#vL*#~9z=c!{J0$i0I*=kf9cIbn=TCGI|2O0!zPp{0a4(CBMFmfZ?U<;DHRevk+r{56vSZ{w3g z2@;OHeF2($18Cp5I)Wa+6ei1t_fiWS+YZDue@G7BUL9{l0p0r2q5?sejJ^p-x!^!2 zY>em~+prhO0NM;lt$&Fyp2#kKIlCVA-K)^Qmrt$ssUa<|Xy^L5++0R6lElqT$89^w zl-L`M>LjLbmv5g5YV^Wx9ZSZ>>aXBuga$)~gj#i2mV15EjKkz!@z!S;t3()6`24Pv z1e-HfG46z4jA4@94?yyP2q-W=Tdq$+$X%(|y{Mz$e-AIT)&y(1p)cpwPK;sFZ?N$7 zfItL2^er(8sRV+)D(&EiOq&YeUtO)1U6}hJsDd^7I!sC)3NF=`R$Hy3q&pSz6K5LK z*=IlvDQ@S@L;`tf1;TAcS@k})YH=>%x*_oNOgM^vw<@GmuK8GTntZI4hk*YamT1G7 zj})6N+RnZB@Od)O?h|F`Yc$ZdYnnl?rH$zH+Wall=Bj`quKYUgX8P^oW64|mp}}7a zZC=eR-!m3oukXki=wkcbt|Y$jl7c)Xvtty2BqNsRc^7X%yqh;>?VU+K<&r;pLlv-^ z(O^&|(cB3$Y;}37^%ZF*9xE)g)b9=0wyGcj14Ii{AF&iL|D$ml5H-9Qn-LG`jU`Z> z1%lp#G*-=O2v;CK1G9NSD*TqySD}205+3$;hvNF?up+-6o zFwSQZG^)ASYO*L}Aa<6-wB@h|OTA86_WG_nqC8S6$duPUjkh`z7H7fNIa*;{02l&+ zXLP}d>`4W>E+82!0A6y2L#x?#7RU{004{+n7ZT=W6$^(LvYASk5$#cpEdtj{4 z5-|F(uvPCX75@DqV5S&m?Gp1(JjPj}VH#+9|*5zIr<`v@@!`!vvbU>Gcbl(Z_mY8;>cp--JTAVMCFvI}K z!T{wmcRmwnZfOUnAdWYY-+*-fo1c15j)z6{q>O;M`@;oxtGozN#TJh`etp&S6XhyV z9#4>!$&I=Z&~z%$V;mIlS;ePs4&PcBZZ^bTna==BVhAJK;Kg(+>W@4%25Vl@81;%h zYgMHrvboCKN)&HUvCqr8-96Xo$F=Q#DBMnB$PqiHJF_PoJD^@=6;?2ocahKWoTmeX z_yFRs9*=L!B9Qq35T+I_H7lt}W{$P3AtV_n6}-Y|4xgUeTFzE0CZ~itg$B>F9RZ|M zx7D;{g~*Mpjq1gZ1YyX@Io#(0PXSJtvXoJM3}uw~Vn`&li2M{H^IQZ?3pVOYwt>}7 z2x>dny+b41IrEw7TPa{icJ*tyOYhYX?RaM%a^B}s!Wf(QfqbnU1p}<|fSP(gtC0F! z#!kEl@Kto?p|V+<@Sd`E(CA1-_jUu=oQY!*fPlbjEkt`)s?tO~&UZjeN?o*V8jG$5 zCPt=C(j6SJ-j|qQ5coN+l)ua(f_w}JN(F?Fv_=;PvRR^?RHgX`|4PDXnL*JE^ETYp z=bQaFD__jRCE`Wj;c|8C;JG&AVW4NJPD|!x5T#72bn|TQC+)Z?Hg{VUQPR!Z3~ZjV zT|%D$0sdPRP1b6^m$x##MZhYMS*}LalXHP-nsz(1b#!Q4hV|st)NpLw)6-uK`8wL& zteb#5gtel5N@$)|T#i}VytvvD`?2^$xw=}fQlb0CUj4(Kw4-w6?XD^-agY%MF1kJz zNx}#pglqr4Ir}EE((rB`AR7YT2x?+hyn^xpiWg}C0ZZx|)`eOi*4qIz?aA6hAVdMw z;UF8La033T`5!CZ1G&f9J|%n{UNBlEPbE(Cq0X~B6(>p^ds_2Y@FkA^!pQbiMV zq~mh{^Z4VmS}H%BA;W20VT+$;cja_1KAikIb5q*kg#`MsLdndcRWzR6K3Y!QCG22U z43D=fU$;aVr|}~YJzoVCSK-lwLJ%Br-p85;R%;o---Aa*qO#`DNqCrt9^M{Ah#Zu* ztOmL_kvY(SnD%|xr4EX&F|JlXSuSx)bXIjJKVYMknW6CA*irRx7Tix=)}}~udw4uZ zTBtZrmox;4M_72BRAoytpzn|ajdMMMmWV|XnrdY@_}84e>v(mU%|kJ&nwQPnlEuP~ zDEd9()4M_=cXnL8y!xB3=16+~LS@cvC)$q`-k-+W>Rb?HxkI>$rtVXk@5@j;4`v)H z)#36^J-ujhYQSnZB1!O$vTN;WT2PT|TPo>y6A)B`I0cZjOVKw^4naP{d?ZJpfWVnM zWY|nsA9%Cu+tEK#I9#aj0I`1C`6ix~z)nc94&Lw>?V9VP_cvzdR%Kj#TF#A0w9`*Q>Rn^4Hc7NsQTcOw?nz19 zYN7T+3VF?=>G8a^`wrn{!aM4Wv<3M;Qch-NPfdnwGIgYFY(QsEKgt ziuuIGgtJ2#CI!4pPxITZ&B(n2Ose9&eJ+TDQvRXcmCRROj3G@wAKdj=`g-?xQ8#W2 z*cqSzg>s9_pFZ8Rt>F9y_$m$OK!kJ&LR3#r)SnSY=toI<+aD@)ZfLmffEV8Q!udGQ z+iL<`_Zx+8PJ^nnLHb!YrNv^W2Q*AS{SRfqU+G8bIX#R6n#CbO zy)klAR4ShEXTXB6&)fJCoZxYZlH;I-fns5>71XeUk_0LtF365Q z1m*50@j;DDR_`?vnJ#$&ryya&qSm)|ApUINSS@59BIxA-y(0VDH_0={i}Ov|2^QS-%0jEo0y^ zHJiKZU&OLKP`>#}58AU`7M+%{#Wu#SfbQOl&HJAPr z+gMb=JW=RF^8g`h)*6w)X!VV{-rMCwmYh>h8P-cmbsGl#+FYs;!(`AuH!AVvtfQ;2 zZGLO$C^*DB2b;00mpLvXH&hd%6j3V+!T>27xqNICgL-ell&AkSU|69kFkm1Gz9l7+ zbiNqDB}fpV)W@`&!`bN>1xO%V1u+Pyt~V2Is2pG`*x-x!b-n66Yr z)19Bq9rXeX8Yc(v7!jvZEk)2vVuDg#e>+f11EDE;0r{-pFAI!*=qqH_+iBzZf?H(x zCOv0Eec^LWq#`8+JSShDgaz{~?-QI;^qJ-aq&e6;8GDG9(*x|NMLPx*)KY{&#fEcV z&nE(Wr7#8?xW5TLAN1TZDCK$6WL+Bgyre09Ifc$nDgjbj``zx|MtPH~^wdle_eA%E zS~`gHfQ_B(Wd!Wn!{wo$`@)4$njz3ErZ9T@7?l_myCVqWHpA4K``zK9$&E|h?$~cy zCf&F{>iov>&C7%5p5Mr}v}%5yLWT&HH`m^=CkTz%r- zttaz6$DEbejZU)2wLG*E?sSH*>SAkbU8M`hY3&hv-(4(gc{@d`x;Qit)dxM4ZLMu0M=$Ahcw9<;P!t$F<(3h zOfL&+)MYQ#wIu|(8dh*WEJsZ;!$jcp-A|CIMRsbEX1G**5WG!;?fVQ-qS&1NDKb;p zy{lwqRKO}pnc9zijl5_xtCsm?C%mA^DVh<0J%9O2|KNcl0$we$XY7iAYc}r zL|CzCe|0_kAXkb(E(1LYSw2jny8D>@dxhn~&35yDl<-+Yf7=ctd2E$9NHp=vX>+BI z8^slSeD&}fk1S?8Z_()a&L*ZjR zRSQONJo#NbkXKeQZAFVzHIN|spcm3-_m_jk8x{nfqcPrVc$nVyN_l_-k$Fr}WfKC^ zIX>dD)bpOtAza5B=b83V-4;LwL-aHs`7`?iH0I*4xVs=qCAgIH=%>r4+K4O9yo8fV z`X7`tVWog;k6)Gj5*`oY>Z15+TxF}|T`=)16<4eB91}i$0d4(I&op+h?4lYJ!r{w2 zfFKRhPDLRYU*&)wpb`fTV(PGJ?L6@WfR+Y?_%j~Bn322f#fU8xJNu$(&{|bWO|l4mOy+(#fIp) z7o|mS8{?%(-bu+R1edmuPD~JZR`e6ftIJ6{88$`{coZ%PpzDi8si}UeUd+vCSc7;a5$Ab7hHM{uFWDFkVd%e>l}gIWEZr0tlc?^LMM}?B7sT-t>h`MO+5n zn?M{C;~For*gGRX#rwUVoW!IT*JMO_ErDK)K<|%mSS*au z543HW?lpJ|l42xTghuUVkmfPwa%O?|5FB^ya&mI;Ai@X)D&a8wR2RK3HXPZU3J8>U^v(J!Ou=Dq?*9ur_7l1oA z@7S{Jc-cssyHC8QES{@$@X<$>xXBQ;(!tT$QzR`X*bQxw33Gl4tqU;T7v)1nv}Hj7 znhX%Hfl)Xcbwg9s6I<+%0aKq_>lsdB0{{HAc90mWsm6ErxAX0jfqWH$3?{ho&t!b=I8kqA%Cm&Gdi=^yYl<{s8ri&3 z8W{&c zU5EZEbk;gb1 zR5#4QMa)u#1D8?|AQB^~Vee0Qq_bg%-^q`bN6m(cH<*@WXh1sPF&AF`(xT@JJ);Fb zb|Woc=Fr4aqE0xFU-Xq%R_)rzWgU~0{VK;%=p<6U5uE-U4^j>MO51#^=9eH%edT8fHP-M&dcY7+amU^tSIYos@*?t2Y$)q! zq2PQNT*FLszvVd%n}I|z*C*PBEMt7E88jJ^+jtAY>rAQ1zKk#A(mS8~eTW8i2KmtK_FVE{VBj zc|O<~E?1B6_&0qcG^vT3$H$V#kR8s-h6=x*l)U>kkc z+P*cxLqv{pgz@V=UCV@3UL|fK*9a}m%(H~gUK8NwmV%gA^1=aOI%U9@3(nkUE?m%x z^?oGLPRIt-bsgsyFBheASXE(3 zk8Yl`v(sJw*KIXQBU(I_8VK@C>B%m$4-Zxf)4!ig($sjh-+FAl${5$G-YYaY*4hkG zEqb+9)~d(IKk^wmw91|xpjxX=omb8A)v;j0$}=cUKZ-J*k&zt<$w!ZuSt$bVgE(M` zsJ4yvn;H8`kI~1-k%3NSUS!oipPBJKa+Dvgg80$32Lr-xguoGTY?Yr>^wyi26lUb7 zd9{f$>N;HT(><2z>fo#+@W}vY!a&1bAC@e<-UaMn3BTiVxTnd~h-#FG_1Kv^tst-z zJA0b)6T}sWX-N-BP2HD#MojZySxIE-;Zl6W`b8PZjJr>!QVK98nP2-eKQ#hjenqDK zujfel{%=5LX7_kdMqrz`xu$-A0UWjtk3Iq>LPYOZU2OUMV#r3q_-aB}3wtk7_d=2o zaou6?=VATGMK8A)`Wbl=J%IVZm*=v;z3XIRO$u`P$gExU&Df~t!Mw42Deb=2>R0nj zNGCkxS5tM%qd+RHH94z#o(6U-tm=O6B7e|+=|=_FlKpA&tFz}c50sT;ILI7Q;B9j1 zV9-q~8GRRX8M;u|gdm5@0JXiNbR_SC={>H@Z=G`c#Rw2a+@XlQSNi{bTd;ZwNJ8bX*ov_n`vK zZQ?l--T-8@tg)21-5vyFK}rkOW>sbd%y5B`GI6iN9-_dzZLu+0hgE2%?@YO5QT?~* zYF{p_bSwj7BpbgQ_d)Up8S_CMvZkMy;1BoNl|s(iKxgTSOf`t#TMo=e!ZXJeZ`}#t zb>tMR%IM#7Hp8>e6jyab(iADb$Lg$pp5Eb#Tt}Q*KfWWyMq=)YzIdD=zvwfq#VC60 zr;}3IokZI{@hDH6y}w%KSBf%%#8u5dO#V8a1WMZ_7M7_k zA!Jd=UIyf?%M3%cU{Tg4R4|$|FmlqDVpw{5xH&F4`?ig_d;z_ObgeJJ?YecNmk}5J z!PyBY@{g;WY+Tl4@79mYyuIX%xEbA12ATOY>ybZo16!Jz<0I&l@_Ii56?MqZUQC+W z+uEtr*|Q9je3K8^b@qma~P_3GJa`yyMb$ zy9k|WIcLsgEO){?T9kjx2eBy*b)8o_F>kDcPCaD#b*JwFfvcV)hYs->wekxGxV}fA zKk)wI((4X9S7Kp^su(R$4Zc9eNF7)5$|rw{Dh_u;Ot*$MV)f5OeV86IeyMLE0vN8tC@l&w5U38x#kBsNeXy#Hc&&Vszdaqq0 zEF{{aziwfmWcu1kRAr%MEeI+vAUkabUuGBll9nuXl9{@mBudcXUJ_R9LZ=?I8HoiSXd zY3T;MRz=QKa`$+imZVYTTB0ax#2GCK`PF;7rG{?T`>%4oG|=Bjv^#ZlnW0{bCKqVw zOg!+#GU$Bvj~#vU(tsmIOPjSg7d3L~?)HIH``4er;uZ0pN_4@MU??d(Sdo9(CzWUA zzcg2jdNb#me}!NBy%%bxwxJZ(xO~4GSV);L;jQUxQEK4kh859}ry6S`p>7guSTv^O zhI|`~?zr>t$KBZW2$1^&Z17Mxthhg+?cL2nl@5Z*f z1u+|zDtSofLF)9&(FcS!$Ot-3UF(;+j@N9~(Ggo$i+TLYRdZicRQTgZ{3}Q1rej5A zhhKf+!_HWsd6xTx8nsTJpaLJ)@0YH$#1E_wy2kvWf*Ggx7fS_gnSYX~O=F2QO-|zt zzYGji^l^9<11utu0jn`ipWo-V4eFe4BpF{R z85kgB1D653;_Br&aQLfa-HHnXlj(@q(^X?0ZVI#sRd zO?A{=n@-7#w+u*EJ} z?LzC0ZEnVy9%X~*P;0hcW!}VjNYaD(!X5tH_vmU+U-#Ar6-Yvj_$~LkVIM>&VXUbi z^`<9fQJTM2Y}0O#qEBze%DBQqP6{A$y{VcklBGX>lIz}b*^6{0n@#2#N{G^E+T>8w~j z(lCliYF%{)Ma9CMy2|wZAwcpnQnuu=`hg85i>cX&qK!c^n#%`imfNT-1_NJ@6(kfdz^MY+W~_A0Ec!_0^^NlsCmEskyPCI@VMHqLTmQJ1$w`v zHkt%>Cd^@dkXu_8a@cfi{KAmA8Obz@8|T09P7%%n-Q6Hx_3Sy*UMu)!R!>M(c(+-W zP!~I_mD0*Oq^%TC!f9x2Tmt(!RFHE>8j_di7QU5Wy`)$UjS~Mt?$ojG>(AeR?BvK zfGh~Rc1b46X2r1<&0RAaLvzZ%0RKy>rXnn91UDg_lFptqyu-Ttu7AN1D@j4qPIxt*P8Wt2CE@xeO@n{G7W+NH{CCz(`GoVPR)_-`uI9 zHne%lO@AC%;)@HL`9SKw5#t}DwMz7L8mjKc=YO{ zm!=FHjjchN z5Qgzf4`D;`G?*`7rZ_z%I5eW5k|Nq10@YEY`#?gCoZ!@%HNcOl{c39RA=#W+L9`d& zst!!)&g%K+(c^0RsUO{00L^fMJ8~t6W|5<26+yWfYV}5b$SwWm6v|}zoSv@tb6H&y zwFMPtL?-L|3sV~~B=6O=OUk6zLrm77bTx>k1@cI*$DZN)le1u0_8^pfy)+6tWwF3Sb&DU%m0Jn`_aVNF-@=H7G(83Wc_Gg<|>H8qs0eK8o zoY<}xEpVDW?C&fL)hQ~!i-YT>8`Ir}8{gkG19uqY&Wl}d5Mw#{)z5Fm8-(DXp#A=3vl;L{@t93@z)5h)^>V5;2Xd>{#m0z|R-qi)a##zW-&DroHd(>j(V zw_rjiel~)YkGF^nw|%AVc{;a0F!9b^oI^k_xUHZ4jB;CKNqYEkzTTQt z=chW}4~m9d9>SnuzZ1^D``5>)t4GkPh0G{EspqUBf>7vsKUNnoxzEVqX&1pZ20^p1 zCV6(xnL9?lC~t9RpATbcNLj}$#s%FRRn$uAqb=oeOk;yIRZ1gyi|T9=ICQFC)1*Ke z#qKoQnpa-9)>ouyL*mJ(dsN_iRp8pcI9Pa&jhGwdUb>63>>YAwtUIQdX#_w5rYyhLYNYP-x^xZjfp(=~z!qT|_4*>JqqZ#JGfUuOdx)o3-*Y{urrg&AX>g zU7>AWJ(SzW={VB9h)3(qA}}2$%4sx4z*y!5uib~$#^o=uJ9X_*@4fLb{IM|d9Gs zzcqH%6rQudnwcBD9ovm|Zp&RwAJDi*+t2i&y|CnPX);!^8`aFAjP+7+_YsAoY93xMbntwuCfbMBJ!fvreY;U? z2qL406gjF9V4~^Q!0`|8B6G8XYMaSS)w6xh;wN3i17oesQBaY_Yzuc)*c>Li%^|nf zKDjQhQUxvMaB`{{KULIq-IUTtzAN!f8F}=K@zrb}NWLT6w9mP|Z z2f`@LD2&ipe{$-C{KiJr!+1d(Cnr*TZemVdO4?||6LrTKz`vSRIKYy>Owe6^c*RA$ zk*MgjM5cXaEQ#KLW@aWaqAl5Im1Scei%;G;X@SkkKkH9hpWMdAYN^izKOp;I{-3S~2*1!6RAS^Rjlr zndZ7-p;#!Jyt;ma@OEW+2;hwQ{CEnWM6k})Eq3H9S&#bCW;g;GW_hBg>Vf4whsHU? zsH$(04iXb6TD1!!izfw(K-j^GzJ~dX@}^7K<2J*!#ij@%nwvo5w=}Pqn_w_$ia%Pw z1opD_62&$z;OG@LU?r;tJP}=R#*iFVs7(&@D~$ z!VD^O@F!NYvQPQ2wA*vnVh;NgIz1=$-EckzOPyT0iz6rFEENWDlMa5g_T?juxUoF# ze;B+E*X}65Y2({-VBK`mUyhgE#qa->ZfyUFY}1W3Hu+7-p5SEM_2>Hj_)nCm@2~jC z{mgGl4!zl<(&-9r)BZ#|KkX|%^P7^3#fQ`~9{)+jzfjuNz`#~r1^&%PGEv+WCjJxu zzmoJs2ZzMt|6}MBR_-tC-mdifJAc|l-1RR9K>yIe7n?Ko@!x>hY5on!=7dqK)sIW` zZ$SPHfY{l!Wz#mM6T z8#x`#%IYEp-SUi|Tj!u{f|$0dXz8$IrGAu%HB_!Yxi@{DcNx3>PAkE{u~(;P9%opA zHMHNZ4)Z3dJ7#o8=ZDrqAH1Fk@x2_d(UE1XgrmS`z(8zd{}MEXZA4u?}DF^_z#BXtdCeeqdWi9imH+G PQCUZ@iyiUe)!Y9Kr~p&` literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/ampipe.png b/model/train/yoco_moe/sources/images/ampipe.png new file mode 100644 index 0000000000000000000000000000000000000000..751148e3eb3ca8313bca8cecde9547d9c4e3a436 GIT binary patch literal 58092 zcmeGEhf`F`7c~rnWXY0YkfcNb$wSV9AUP=rNX{^1$p{i92$Hj)2o14x++#+#E?7F25DdQ9jpWe{N6*pNhaw(X$gSlol6hJO!0-X^Cbw{{aRzU~lQsK)Hi^QqV5 zQ51V2oQGvC@gKHZ+aJyr;noemsus2Hei!~TEO+W5s&u9Fm=Nw&5>CW|IH&L2OsZX& z>g#0@`xuK@oLz(-mQVmUQeLCvJOdoSu`|?MT+F^JN`j()j|e2yx!QIeb?0NJ**AM$ zPBJ!>{jK*4u~t%8g~&zkSi0WIgr*UNdFwS!(3`yLW0j>dEGi$>dFecJ?NN4v5t~h# z%FKzGDn8vVP+Q97I_;Gc%I+K)O5c5StnYMgvPl_fw{xMg;+P|H_0H$F!TAdI3b-pK zE)xDV*gwnwOSmprlqJE^qCO}bMxaPL2r*qwu zG%Wdq2&~d5smpFnPYGT;d`CaiBJskj-URWuBG4K(-09%0s^4_?WFo9IP*;0G=$Bb*EV7fpUh`jh`pl zlp#Fy-ip54_sQj*mi4aG&Ava_6!D4dyK{e4*(@f;?4!cO3XIy0u(|Ru(ulERj6YOh zEEa?~j8+}t7&Yv?qV^5E7u5cq+lWH0RKk4pr{`+I*Tw@0vwB>V(cf>wo`b9w_FD1~ zEvRbs>nw*U_d3N(5!u5QR0gTYi5%XD9>>a3uyb%!&0(X^Qv%Nqate_ru`K7^Ua$A= z&8G6|zThOr4r&idy_EC-wV5pV&3`=7@H%LJQ%{N_^@s>mmVKeXMmvAwa~%rvkB?qN zc}IUya>=@usV7xi)Vb-ZcVJdefg(i-j7%*qE_N4H+nl+0Uk3h|Wbl4*Kal>yE-e*l zwpPd2lo=Mr9cxqG{Qfgdp=0s5;DF|4KsUKx(}xeZ9%CqM@8H(oZZ2N&I`-TL>}NV6 z+2Q0UMTE{h|DtM(3JkIDN_Gk`tEcY@#>5Juzk4=0Bv@@KDia&Yh2Cr(nDBj3Yv&QN zlVbXFjW=E@zH#uqpd1tuP04dQl3$H_@RF$dnLTW1W$?;#wGbC4i>lTwc5W7yFhhPR zC3>(v16xJ4XoKo0&g-O%GyHZxRB57B!M#1icdvAnT?LreA*fk^AwHiVrZ{&#p{uJl z_UjDQmz?EGXx||$R=IM*fCN~6^feCm7=CZ(bRu&ri7o%B}% zja_~&I%To6a?vk$E25z*>@aZMQC$%xe5Fx?*E7-2$f)>S-!4o%$yHH8h6ijJPwa)= z$)vAr{sQlwhoGeKgO$ovcs_a>BDZ_+%!I=dZ7y!Tq5F3OS)%QSpc}YQnKQj(zd2@5 zULp~aRled!xJ9~uxNmQIq zM?VEAh}@BEXd!Mf$6J$ZTis~mzQAf8Xqmj$e{?S>PvGMz?~lTCu3kdgYe0??{5is}kFjkIDZ$ea1vgUIrs!yV5o(|Unur3nm$pUJ^S zw(PR!e$PA)$s#nS+)h6FSY$MQ)ms;y^Kd9oP1NMsw*S#(HfFqWG~qZc_LUL2?b7b3 z?`FfU^DMI5WaVV;jL^OLvf9qXae?MjKt65Sav@VMNak~q=fiK8M~Exq=tlEXFD_N6 z)dw)HOqgXx%;|RS(8nZGuFzupO@7O@Z(`;9)|=n@Fwqr0Y&hJplsqJ{l-s)z7@3m_ z&MOGT^XEIo-Of{X_y#2wB@z~jVs#K;03UMA`HqizJi3ad+9fYIvaW^~4lFJeO->!4 zUmGzcR_MmELU=6IyG9XuK+a#cHaJ)D@)ITGjU(qDXhs(A0j zHo2HixV~Pka>^rIwXpU4>uOipD^Jb4I8St(_(B8P;!j@!vr$O{57Cb;;)Uj#pc$7OcF z_?$+Q9t@U>MV5i02}sw%{U1u-=N9JyoVLZKy_SV;z>P=lIuIKZwQfGf%IfDys%U zX6@vRL!DeRiB&jX8Lxvo;R%jay9;5-;LDBt4cCVC=`K7hDq#{N-)l3agJBzmpU3Te zjr7SQ@m|4s+noyx#gQ<*FO#(4CYo>AMpG=QeM>B*Wm?H20^!zBZEfwC6wuy2?G_@)!^of=u!BCJtUM{cB=X09gdsiaSb*iU( zBGOgU-phiLZIk%R&Tuf;$;6*lhpx_NTTN`lA5|Bf5(BnMrR6fN7P8-T$GO4d*xv%x zICEB@3ynl9bu?Ytvu8c?_(Fx*U6sr=X%~l$9Ya1?jgK|g{KEH8jIjOdh5NrRbS<`0 z;)l$!IT5-+EC$Trf9&E{fh0&~SUnfY_PU+WA%8nxsfPqZt7k4odR2;kBZenIH|wx_VA;5*IN{Qi#<|a$B(JD({kG);NUy73aobN zFyr$%`rAfDubVS-?D%d@f6CnmE+;<-3T8qNOOb(K^Y3dU#bqwGndw!|(MQny}%|@*;%+f!pm~a<+=wEI%IFAGMB3-lK_ui4h&wp z*VwT&OXy^$l9ZBBY=2Wk2jXD0v%qN`)+_HM7PBaFV=DuJyoVEMVaJ5dJ70tvB@;*A zS#YQjgX9BZrt|0CzgAT0Iegokj0qFGlHU`c^L-FFQagS^;tmsPSrg6;lrF^n&Ygx9>*e&i`*d0tP&6*e0*J zuFWf951&V&PnS+j4 zh$JrG?Ma!I4@GDQR>%{Bx{W2X-FrBs|Lm8E%ldS=U%ni&jjmZd9;E|_j@xqoNEb(% zg~P4r?d|afdZyy%dt`K_ggo9(8<%Nh5)%UlAulO0Uc(zlS*1$>Wls^$HcCXO?cs;(E)qGsFmooxvJP;dW_~+tI&?itMx`=?N3`m z4Y6#%0hHSKP0c`SF(rX+xA&{lNZ4#i-Byi2O}_2>NE8X>9*K%gOGT_u%ZknP13RVs zZJ*;T=cMDbVAE1f7ZzVrVaLK79K7Db%MyI{bWdlyKU2dmipuT8#>cplL;4Gx6)gNiUp3Ej z^=Fz>o}wCf$%<>*qwigq*UZKP26QY1YyzGTIMFPdi24g+or|up{aJb&uUVdLKD%9PPE=QG%7348n1SS8$p4bD%=MF7SD)mFW&)${ zj_F-NaCl*PacN;;X}#dfLxM{d#L{J6n3dSl;?OwkUcurEtIDv)c>b_4I02>5T*_kbqx|ef5ZF>lq|hE+*KrHSjpfNWq zdcB9W>!Bn^5B3)T4s^`19fDIz-sUzMr!Tw58ef(rrVG{lp>ZxeEuS)~uV;XBQNO<)|n!-TO zecey4>I0@C&W-ptlr}PQs@a%%&uy+dhY>-G9d+Tfxp=GB(75aE?`c1YKj{5zT1QB_ z+FDixbuzFo`7iHWN2Jy-JK7sQ7?sZ~-f!D?8Cv7)!0JtoMk6cSPj_Z|r65t7$0ZBl z|NNH)n<$*xC%0EK<1%Am`Vsf2L7k!aG$L-BPjRHO_jh(a{9PxYa0t;kKjcjx`C9p{ zZGNvn>07pE@=ozbA?F&yGR`uQYJNM}WS;C-a~!4D5C}tFp_Am()fxO@_Zmj@-uk@- z+sXZ%{iRsQg{^usXKXOsTIR15^&|{KZX|r>Jxm`}>Q)N!F*86(38+@f4tDYDY?9a0 zwZ18U*uRgMUadE^^F#}QZDSx}Z_n4Ci`)&ya?@b;X~7PAIaBhpvK*y$DBa4|K{1^` zg`^snTv<RZn{n}hFpXk^rv(fWz@z$|_Cj;uB&Q7a+CmI5i`d29GghZe(>+Q(99Uk-bTz7?| z0}rD`hcjCav!^x(95+vRf>y4(iD-(q4~Kk{&Tj_?&nN3vghS%I1!uyiYet?+ z$JKTxejfL<+bin{|L1HA->C}@cqJ0c&4Hw7Ub~0l512BocVyhnAym@q5*4Z2o8m1{ z{pktJ{>J7ev9QQfsG&%c(@L^1bsPeDDw$W6!!G>9KT_B{KJG2#V^Y+9ie$Dcv!!w7 z8#%sA7xAx-FUO2Oc=y#G>1RKg*WruD`Co{437icrlEZp4TS2if7gQ8Uda&Y9X(1<5 zL0MP($mQkU^7z6w@BRGXM9JVi!8Ho+%~#bK2gknD_hoYiwtNVhWLs8Yo2%?)wqa{;<}S zI{iFp<1{>U=-L-C`;~UiqhikA{vdlK>+shRMM3**R?PKkNCnkag=%k)<4lX^#Den1 z0|{K~gh{Jdg09`g!`@#LvF-4U2Q2?P?mm7WyrEM?3-+WHi-q~4q7Yyc9P@hZHvv`q z;Q{2M!9V#imAHfupOPfz??LgJx_1?TNo1`KRu&iN>;HLo-U^uV^zP=_+T7wy870c$ zJ*c$izSvtwYti@v&(5HK@nHYpAabm6dUb}+`JaJFrLY5W8@9VveA9ox`U6pS=+yVo z#VP|H6gWmu@Rzr5!%AnosgXUHJpX{p2$OsyL~f<3)kr3kr_v>XziIqRWh%CiUF5-}>KEV|qrN-yAM! zO;`PITq(@oe)J;bqMkpT;(uU}KNR&djg`Cu)bR&OATBlmFE6w8<-CQ@k&_J(H#JD4 zIw9=gaZX~cb_qTKhrky*gIqZxY=VBn))h6+3gHtT zV?d2A{V{{t!dF`8HPx@n8BXfIhQw>td=v31XuG6ACSghwJu0JpeOu8b-s3oil0%M7 zFn+Z0(@yQWR(FwN+Do;H+z^*9dZ-< zCY=gB1QC17Qnle(3wly2vJ{ZZ?7AOhElh^?%7Ru-DU?yg;ng_l>_G9RS2SfTvN7$u zbB%xOOX6TyE~*qKkiD2(*7lET{Coe*`U}c1(lIpTF#hHFr~aY7I3|@BFIY(<)6&u! zEqrd&Ce1O!C?hj<>Jd)HSp=`20 zxPT}AS^fARP$GH`pLBhh#z>r@vM+Aic*K(5hQt!he9N9i7lpTY+kh_Gii3@J>vz6P z`DaxW_yqg{9i5#%XWNfCI7&W$UfS58AS3(s(f9rP_cOnKJ%9c@H7zZ7ZLHqOIGxMm zBD=63567T1$L6R1A@!MuIGg79G4*%h2v@N>*e$`R<8^8DYk#DIZDyn#!@OE=iHOB`7 z)T|+_S~}XKI%0xfjv9TNFN5;74}?Z`B#&iSTSLRjVeSM-OHApdH39-bKC9u{sw!!r zp`@gw&35cYD*^GixVYPEgu=ACj?T}yxpW`$qM+b1`F3lvTSsgejj?)pfKNUWYTd)C za5URiii_7Bt+k)#$neqS!|a9fA)aLRn$=sMX_0_v=Bp6?Wc_5~tE;P(zmX44P%zIU z?~vw5{R2*_2AIzYh63L*nBIC0+J5-QV7`#bR9=h7)KsbvnW}H!GWo0sJUl+ZS61@! z^Fzyx+Wl`C85y6tKBolP4_(W#+jz>bk_xQs>!RtwvG}8DL(3;BH)Z4Ud1q5nnnHlD z%pe&p$CKZuD4OLmSh3mgLr2UoB4!a7@yXj57nMYtC6));vAFXIP z?)M&8=*&>vBU%Au%d1+R1rkq*Tebp$;3(?ZAEG4qjA>O`aj684oGd%R`WB(nLGTK~hA znI~HTF@G6?)Xn&&ZNnao1JHXiaT@&FJkGb7YG>&8zlWkjp;yYBYOmHxOL3iG zrCnPl^{rD(*vQddH8$LzR{1atVvwJ%Ji6Sw!qPwz?!p0}q%C$ggm=ut?YR+Omb|c> zk37ClK#)*a$a)@!i5dKEf8|JlnmV~is~{~k6^L;6ohi;okI+TaapSSDiq8&dnagWd zweiQqRSkal1H0ZCYx7#v%W16cC@!C4lx&t7tSY02A{lB1c;uRBku^kH8@qYPt{DDN zS3^rgVf{n%vm(I_%Z-{Yy!-H!?|ZDbMYZ zWKlL7g1O`$XObL(VD@oOg2^O4>V{SA@q+Q+P505o6gtEfGmmIXg6$V2gKr5O?l#o#~)Ua`f=a(HMaT% zeXEmolVP)Z|FqUK6#^d=ze!I`@NaZ}qXP#ct7?RLnaMwEjb|_F^ErhsrW8qYaLo@` zM)&*HBD!ign)6sJI!>n90-j_D*mm434zHQ>gDDQ?YH!C}Z=R{8e>>CAG#ZYG zc6~kP^Wt@bCG2uOYA~!hR(ec>f>YyB+}Zpx3x&tvA%afW z<|wIQ1i#`XZaozKCfEaWUT z(<;GntCW+jcd>Q4@ycnqoOTWFd110J67}9)Toga6>g?zfMQon;Tyrdrfh#8qkN*Zr zI!=JO;8O}jq-3`Jado=?nTGf5c|S?6qkmL;lSSoTA(@qvq0&VVS8;5aT3<|;l~d%n zQAUc$RXvlM1dHS?$Drrc`j_R>-5qEfaagwB^Om-bA!lf6J!8Au56wRKn@+!PeJMO< zl{MWK3oTwTF)`!gu~i-Q+We+cI0~;l1H3)clwP zcl;RmQ}GJe?SPv+ySlX4ShrdeI-6#C-JGmC4Bu=%Gc6PzvY(>$_ld~XyLSc%WSIu@ ziCh|x9OSCl^4BMGKD{hi5hxVq_hSH(?R|IM{U)YZze2A;5|9p^=c!5ITRN^%FRrl2 zf;d{6P-*8DT1~prx9TV2ceH%1j9yh`3tyB4kg~@(F<050SFRT4`V=Q{gOo&I8(~@j z%U=G-^l613I5CKwu+!9lSg$usJ1|5->A~|kzvdBTMXQk^{Vf~u4QXoE?;B^qhU@cB9*6Q9_TM{CnVk7WQddLhQmOYuprseCQ<1VF@3lpquLy-=x=P~h+H1h)k$Dbt zb^iYTGr#WHyn;gOK8@(Y%Ja<46ASTlZXH)j{pyx_!@tCx@B`3ms_GL%?n6=O;FZJT z;h&x#W4FQ%C22p$3xdTw{e2tfjdbmHZFU7)nwsF7*zSp_JdORzj|gI_ceKtn&pF25 zGT(dskpn!66Pw#KuoElZefR!73wWShW$deHX_xU2O~XG7eAT7j-pVmu2!?SZ+lA49 z7dhWUYuI#44`=@BE~Ntkzfph?0%d>;mr5oXolXlNjH><7xZBls%J-`p_j6P_Yg6ET ze#BFqc826uN*b>de2_!Nwsih;664|*6$hLCuy4rnjnW9V^j7aD&&~Ya962;Qb-Iqh zkng;V{I2ky?X|;|*hO~EYRYQ-3_*bWyW5x$flrWpC;I87or~)cz>DRzDnywGOYjRy za_sb{_2-2~-v^1Cn0kI{Yj1rMLvbjcT%ZU7>13N- z{oue84!joPQOEril-G2XjkT1yhR<@>pV31C+I(rTKGXF1TaICo+CkluZ|f%gM*!Ac zNA`jWV@kl89ZC^*JQqlof#smVT|0^(LITtglUrgxoqt4vdnb~OQu;3-Ek6twrd>^B zcy>lGo>#Pq!#39*oaLTl_oiE9-BL(eJX|8GG^Y6Nw;;(ZTzBVa)gd@O!Eo*a!53L> zE$i4=Kb95R2|Z#IP0I}yO0bhri9kWA{5J6ba!W`^2o9FDwr2J45Wl#*w6V7C8yGm> zhlz@bfkX|!`i_vwJ+OVS<@GyFUbm@lkND8D;r(T=MxKV$kq_<|56(YsybGZMm83$C z7;dre3YfHcu@rRNmz>+{TmWsL0SSAV+B9!8{Prq3e*1nzaNZ-6s?3&w9Pw{ho2X)| zxIiYzMyz|r3#Gb!&M7`UwboIEBmhpA+g!TR8WSSEG2*=KHhtz~_fVH?t)N|LZCytd zbwf43G>_~iMmWJJrEo{LnaCG{{g&W5c7tWR$?5F}8b)4D7I5Tp624n1U3S!dB~pK; z710#6%G7Ddf;=LH8BmCeNc39Fls6B$g3o9AxY<|6l8E!}!ewPw-ItH6paxk10ks~LvYRWDp4~WBx(!rN;+W6~!0!ZX z6Y}$!GgT3Xhc7GNpn9NF+}+&)`IVQKS5;N@WPPY}m^>uuA;?}7@6xcdMMz#L)T91-mP1Yl&cd!ZS_OQxq z&6b0b-sf6gd$fA(V+~AIBYDZtMBmG3Ozqf?IpO#RQchtg^zhuMDHJ{;jM#EDuHUwR3P&JDogz1!-jGS!X9eno>^-XfV;e@wZMROak9^?bk`InUb@4*k)z+mC zqg@k~EN0nRLe~$OQaIiyXUpkNxZiS58oe1u8GM@qSi7mu=Ulz}gT?t(p4#n@K%@|u z+E?|ml?XR45LZl=y(X61*i~5J%eupJC~ExAp8EuXOG`_BSEtfKJKNjaZEbD4e0=MJ zsb+R|9FHIWEH`WhlAvie6IU4wxoOSl)jWpgaGPHkJb_E4B(@R!pByVCQ#o&;?KQ!K!(FN70Q4X$ei&Z3_eZEcL#Z^-WH%$@#9W7}=23WUGS$l$Z=FmK_m zZgU}=YrAIk9~@Y$H%k(<*?w|UY3G^c9D8{(Abyhq|A;-9o10YLO*lvAljZZmk0U$2 zTR1a`Z86cf%7bQ|?6Umk=Ul|R*+u!ycZNEM5wK;t;1}sv=*8l4lXY4c?v2TCWQNb_t;pRC(ny~mGmBQ=BH1I_@s=DjkT0% zPJCY8%z598G$0PM=gl4%tuFQENQ_)`iQ7-NkzDY!m;LIX`c6QAkE`Z35*_ym_jd`p z)LZ*)JC+Rv#^t~1W^wDNeJQxPpO`}G5tMENNsZLjkWz!q{#!ny7-jgK`0IwdaG(9DlxN_j;8P$woLzo@suQ|%HJ#{Q z@S(alf*5o*jIkXIDD8%~h@V?~g``wjqS3nle^jhA5Z%dT;cF`7l(; zF{{;kMEnV4)_xZoeM_{_!q7x-evy5s{)IN?@u$AKvzk*is_~W>^(WWNy@@ zxzFQ_(5z&7DdyLI*O%Y(eap3yiRjBgph=3JwzjE12{Y!4W%~s$Y|(qtv$bR2(Zi~$ zcsz)L-oAYciB07(!}LJM9rns2CME`m64Ltz&wo7`J%$^RMwXU70fxPK(A(RK1^l*9 zn|d7q-<>59#g-oBg+Fw?9^=@>CXdxxa3M@57b66QT%FPk{@ys7G%?_pl+EZTyLZiM z-7!*nMyiQEc*VK8KEPVybfz36tp%*2etsvLA%61IDmGUjnD!mLlE~D&S;|=6GrlPq zO1CT%9xOFi6AliD!2Fe`e`e;=)6@#tsPfdY<1G&OfN-`djNJKd{I)?#hf1_dT_LJ;1X2i>if#dDV61 zkJ9IQ3fWDi8IcTca(O`yHZu1Ucjh8+nl;FB6&|C-^B*+!{Pa{=)O|_NpEBQY)bD}+ zV76Xm0Pbcx@loe832$R>i|;MI@y-B3+% zo?!)srTB(f*Xx2=lfHuso(orjCs$m8&gm09T{=pksuqPd?x5VcUwXRo*^DYbD>s9= zx8v4dPq)L6R6PCZhm5g4`(Yl^Ka7nR1s!H{Xl^C^uEPDU@d?V9!J~Df^)#^_ znEEu8fMjw~n`OTtDyvArZaK4Cmivf&`MiyM65 z>!SG0c(^5wHT&y585Af3-v5@LC>ke|DcKuuAW@y?<-irhH;gC4z$3&I*aYe>k8xreT{lF)U`1tR z8X6ibOiWX&>5(7|Trn{*YyLC8DuuLyRc{^x3S9KVME3Ed;pH3*u_l=GRF!+OL%=06l{p+ zCSs_pYncD-3$$W zw2|7d%MSH*ISBd(qKv^WT!YCtz+B4#A~GRx6m&E|SorO>08jNSit=-Ld5q&XadB}b zM#jpT$MjF1j!sS{_zl}ADq?#GO6u!EUY;7{mQIs>R@cVceR9p+P580vJXo(Jy)lnA zZ*;1obi?-sr7aig?8w9cuI5?+W+CGPx6CoPHb*!Gtm9k$;c9Qh;{f$Z-6aGs28V=s zb7u!==*W}|knr$e;;|g0?5)RA79}x5f>bo4}rN z+wU$@xEk+$8Oq@eK>#k%$$*cV(-&8Vlgs+*Sctl92L7c%|7ARU%-&j?iNT>EHd3Wz zE|4$YlunFLLXn-~%1qqDjpa+k(NHL+Q-~;FX&7L$4#vWcR3ngA?TH5U4Qs-qB zo6I`=O}0M6V->S3{U!F2)sU$J#u$0;OBQ6LBe9XE=aX$+tARUUZibk2s>KoHCvD81B-Y&m>P;P_O`u6 zZ%=npQW0MtC`s4&Vm>){)@4i(jM+=4yr-1auwf|){l6yofD;oQj=g`7yfND@M6>Hu zU!cxvX=ypX=%`i)f3x^dHndz(YxLvlIc0PSz=;mh_!n-jI9-edIo@F0*aAZFpZybt z0vTQ4A2cl;j4A)@+0@?2^Y5oy6TmKx?N(M*nFM?Sc?F?4;I89KQ59WLsqJ$cGSL8~5hf-kKtKVd z(Ppur52UaD8$TpNPS4ulnvBKZP}JwR?{1u&T{immE2k|p7jV-<_#K>{mI#!ynhSkL zj~ya2HZ`?BRhG`QVQYSe6l>u{3!B(aCVGcBjZ>tU$hS zp*%VAT`RvK>JY&Q>cT~K3C{cfgp9b4@WRw^f2-BvU2u#3$6HK-nz}0d3_%(si9qU{ zsp*`j`@xnlcDwg#y&4jlB*>+lRMCrktwN8E1@x8^jH5zPgK{2`Gmjt^Tg?cA z`1&sghWjx?xqs7}4e^yb1`2YF7KraC~&@Nlg~9#sRxF9R`Cb%LRlFGg{pcc-w0iC(iu{Bfnm*N8FP8v$r(dk(Cp#m?fWsJx|>4 zJ9m9{f$V{Xm0E${_FC~wr7+s?>mFg${@---aMq{~sViTYls^}5 zdsf!FZkxbVl~t9L7R>u@w1lbLJKKQnqVRwF`=qBGUtIL>x(%n4`eosz{YZN~ul&?_ z%{qtbC+{xEsrL(uA^wgNb6B67!9}&BunOZpCO%pz@&wKm2UXtxn9&qN!TDxb<4POD zH5RLt;e&&K@pzU z+2t#qEgtusAeBQZuXc-H?6hn?5M`3XiJ6qW^Va8j$HT^LoPDSp2DcXcZ9#X;-lzOv z7Z)SA6>)JPG%h2f#rtfVGrr1lD9s14Z$g3p@c|G}4Bik@Sd>ScMFXd|c2s(Ze#WWT zh6PO4A9}QJwY(0!?}pg4Dt#RO0A%Col=Z?qa}O0dRtof0+w9=QdQZ-g6*H)D!`kFgu%xyNj^`_4Ta`=X&2xuNVsA&{^*xqq4TC+gIS>( zEa(a~K#SRL*Y%g6?YJMK2Y5ZZEyVs;HE?#zD=7{1_F8@3jG~jwQu=CZZ9SMSMELtc z#&5@u3tk!edGt(s!bV8zsOF+i2pmaQzQxo_S$T!A!+@@U3v_3`FtCE)Sz;(q1N#_B z3~Fj>Lc_$9N;m5YC!Wb6>g$Ri8JmSTJKHR_iK8nhqXNfCI0*DA43k}BkNm5PVI-pz z&~oN(6^bugHC^Yk8v%4(s{b8@8Q^MOqsyVW7eHi_af3&_H|OfPXF*H~FI)Ak_m|z?pF-c0Qwrid{6P=~>FI@#^S|i8Qauh*>~mpMPQ=+j3J- zP&k)e3eH=;%pKOgFYh&)rKU6E_4BcUAL-_;=^>$V%v&B-#as2}C#viEF_qz!yaKWJ z--hxlu5C28U#I-gDg@e`*{k@{aeq6L3#3ItL_2X-m@&dm&G@aUks3!yOwyC+$t~Wb z6FaM_{Z42|RJ_Rs1F`6`@br)jq*n1yds^O6!XvOc;GZFay?}TE);fHVCH6{JcWQVz zhEIMI&e}|8%jI++@X~n9?8IQ6s{zoK%AatsT^1yDd@`Hd;XLvc#SM6;Yd`v8HaOiNE`(`zAe*7pr%qDQrJL*xGa`%_a>DYGwG%^});!kb_7dCeEC_(|CxL%wS?0Cq!D+Al3R0kbFrH4WwfaN?$b`0e=)E$!u3!<7 z@vYOqK$O#^C@MZuGBTj}3a^L`>A+CXxg${fTIbJlXDMFn`>n%BHQ@MXKtq@!V5luo zEZZMc_^(~qx3p|@+<%U=U}3F;?fOzNGN2h`?8gJZmDw%^Rp7NQkFWG8@To367EaC| zPx{L4&fSz$?I_IIXeQdDnJrGnkTQc)gyawcxnt2`oZ)^)83Mdad}oJFW@V&B-V5_W z>fHY4kB*dHcUVl?rHknl(FSuFnTod@(_^*2Wc>H2zw;U|xgvEuL)y35MX#>@~*gCM>#UKYy z30Hg)cCh-y!_1U@$BrEJLZwX!L)mvqahgwEv)yl|X!cEvrx3|j#(P21`C6p@sdCtV zqYx{|sX2y%7E(Q$5`_C|Xsk$!<$j2%xw*En!xO+d?C}(yi2|8k&5rfPXAJr6)oIh8 zx!qy(8h}I1kA;0+_G|cP#jeyn(HFztnFB6o%dG&QixkKd3lL*gSqKUCe<82e(L@Or z8_w^Yq+(A*%^%n_HmDh07-XDo4&SBR?$m+iy>6s^nX|2UlF##>k`;_{GZg)cKl({L zbs?)OfX}R8_U`_w(b3UKOh^D)XYy4uBQp(^gP|ICkgUfr-}N&!ECPgmm-*Y@-ib4iluNF+i`Cn!P0Y>Bdt+$HNJ)c&f+}Tc@M3ZS zd1JyshU?K|v$riUAlXL@;F;g>p#(3cS9}!yHWNYo+jk#>Wn^XN>KyE>tsCAz?+we< z3XfK*Yigze)Z_F^=6k>$k{uH#fmgkWTfQERlH8b^wUFcbix+N!AziIJtO z@{(`-FTKT5oXFDx5Lx+w`^>UZkpDpEqsyP`9jhNpZ)TkoY}p?Z{w6;FJ8$pogoT7S zt0_c;hga6t0#|Uegk6~tZqh)HoWGu=yl_3f)5X5C@=8!tA+x~rM@3SQ^~+KhQ)>dr zpvHT3r_a9#7)~Sa?c67=(!&fwqX&w;|6uYoWt?ENJxm7&2Y@K>b*Pk1&ogZHU}tBq zu^d`{K}rNl{Iz4aMtWsG(hCy^#y^qB)~>}$(JNFU2P6S`Az6{9{66xFzQ!lNp8Z6u zimyCb6Ts>=uQ^97S44$Fp=a-YoEQ$KGllfFmR`=UaUAG0s}TM_ki60Z(88h#kEw+ip`#z;a&9S`_tLjdW z9NZt`bPIDI8A?nXcQz-7loYbcO+=mbmSfWz|9TJ5Fk;7{sP2d65klMnxPS*x3I8oH zw07axxVRJ3)6?VQG869#xG&xbhDAg?PZx9mM3x$Lb%40l(bd+TY7Vy25JKl`Xxo?) zY7LF-i1MLO>*>h?2V8fv>|gr{oXRnul|>P$t%VA=WDw>)38w#l&eZ}31j2WpKbrtv zwK1IeA^L;2R5p-czI^@+Fi!0BV3ca0vlJk5eusjC9RYzn^#fDoguEvP#*mib>zPxb zrM?FoU^h3nsOaclTY*FO-@OkBp`xVBNi{Vw0Z_1J;Ga?%r~|*xkJV{?yz2g$!zWqi z;0GmsP)9lTQPzz#yqXhjEJ^K!Lnw^T`R*Xj@Qn5mNA^f`D2~n~{AIg;Yi(_9WaPkm z9FqJ9RVk@JfNM)s!Y3f;;i9I043t-BK)lp^Z-0X6(C5QVjAw3mmX(f_-wL<>Sgmwx`F-AU6+k61F(ngNUg{j01dLVsL! z)8=f^^5FlTbGaRX&#{tt^nXA61Bt@j3(EJ|sF20MsT<3CeDlUV<;4MTkFCs(fC z$AR{AVn2Y3#A!XEu@f;FP3*-K6_P#NO7~C^`I0zsL9lFKO0uHtS4=O8!nyhRFjjj@x`1gf7gdcds(Kg_j6f&}3 zdG60c{}UsBy4R&3m1W@y!Lk&<4$9-lEVTMC)%g#uZ;ISY^`l93RX}HXD7`vIubUN4 zdIl6_zuSn>e;0dHpkKc>&JUt8+TYlYw!p*BfW{t`w;!mqZcd@WHOc!~(ygEX39ie9 zl7pF$)wN#3=71G>Vh@oYt1LJhj%}*Y7rt-ZZ^yg&EaeA{{dRqM9p82RxtjHRF%~Ve z?T9nL1$8sv5J1kDf2Y3o(J@cnngD4Q9)xK+hMVe#8<&qm1=5jkWQ&Nfp;NBnQ~#&? z8c|t%sG_+%aaCmHh0i!O5H8id(N!a=#~Bl)Uwt0y`aGMX@+2L&Tl1%o@97?N9*_H~ zu5%_jGbemj=~(cXqhhY^<}@^I$ksiPtt}!nD!2itW$s15}{Z zir46D@fs6|I}S@o4(d`fSzMevsK=}`r~;LRNM5CD5I@!Z)1~?hyzp&XAzz}5#>3hT zEiw-y_y2Wh&v(W%8R6YWeBGcdG$d#FP$fj+WbytBFJchSK&LqJ72-}d>~!BUhc$0p z;KE+wE+h@~d#$Dr(%u8mS2dldh+hz~qL2SS(%w3tsF!1r-QDG)yF1Tht9!e@@BHpP=iYyvvfg*TF~&2V@r*H9_f)4p z`D=`%MZ^m+x#uLbqhVxkw2C|C`dZA*RYMHz_*uDByY_q!jA;cUen`||=~Io3>UCT_ zJ^nb|t)au9$L`AU3~wP(>g(S}w>9U|c@7IWN-dZe!|=EP8-e(Hg^r&r2VaT2dcwT* zsXHJihC&w-F@wn-U2#H`Td-PmW=3Uaesyu$S^W94yYX`#F0q8|VvYl91IbrL-jBJh zES1F`)3S0nX11&KRi4)CH!g9tQJ1D>ITK&fw8nA(^lO@M+W$H8g95cz^D%1Y$`9&V zB05;FtcX%y+0C%^ef_ig0blixZ4?>(8RtfZL;%Y-531p2D24>MncXW+wo(=vqXobBb9LU zQe-3fB@^SgBp6i7Htt;=K4xf{o!Zat+yNR?yc4I==0n1^B4rZ!C$hbpRKhmcxtqaq zv7f$lL1J|xW@|gK^y=l|wuz?BP65$RbMdnv(%AIlvmTB`{0$Mg zWKYVgUYsY@jb9cJYjpxF80^xGgDw`>uh&F=8L3o9Gp`>4#MMM$p9QB4_Fl(3VaTxx z9oA4Zx_@YUhz(NI!f?vULYp`tME4G8rZK5@Y_DTU#0C}>5mDTzbxopxaT3kw`E6&1 z->n&3?Ad^D{QDR$!qO7*7G1Nng)x9>e1YoE<#h+j?6uOktW(w7x=KJgc^IaHrgG(R zB)j@UfK@b5pYsIc*eNQ4SQC{kWc3+OFkpBn?<3zAKbQnJetPMEc#y8iD{Mj{UGp(B zj-r~QJ;-D{Oz59bcRYzkq5jUJ>rN42HrByj*xg3oB1fK~rXzc4m zUtpULisfJRL_oQ!<`V|6EHa}WhxpD}dugDb7`I3UF3SR~T~O#h{Thpk#_)9@w$aOj zaUUh;r>m%T`TdP@VYQVxxoh21wl!DTn9Ns^BefW=SsO^?CwrGbV6y<_L&Mu?s+6Js z-K1f}RzfE~=!HTp*UR4cWZw51QnUER&wibU^Ho_z;}yTO!)kwT>gA)-;&sE0L5T!L z9j*pD{0%;kdOHoF9SKemWuo;=E^vOVDw0rpYzO40v7uKwQ~jW(ZJ;XE6AJ`{x;fi} zT_h^=8MtPY$jC&5W z0P~+N4jH;p@rUxs&Zcf!7nPPu>6hXk?w;JM$9G|BY2Y|-j%KEO{rKh8DE|_#cj?(J zVg~9||L=G>!5%IUH4x9f?5qA%S7tbvE@)Nksyq*M!J_`FP7es473~cf_`v$=dTv1} zAABnN#ApWFKN@b{AqT32TG@n+w%rXazqVKBE~u9e^z+ptUH|W*WvM*JaWI{{m5Ud} zPF*`7A%&D3P=3(RBKGO4K(AHj$Bw2x<+}Nc2cB01(+$?&S zc>qKxAMl_&WfCc19c+CZ+-c&rO3XKSY@7Xb-SDPIM z#^y*6jkk@#_`#A)=OzOhqf75k&Q|03+p>lLHd2q@I4v^k?-<-;f3k{RQp=q9D}x2| zo(je_ZOrmt_y-5XkEP;x?{Z z*>borhbr3^>&|*KG>yL3i(C#p!2M}#kf(08C8BvZKLThk7#Cz{_IRAa*rz?h+7_2r zpS2*8xEP091p=@)yqkH?HY!<&lkScD5yqo$E||bcfiU=I7Ym4io>F>VVaCSm>h701 zsRPRCn%DJwIdvY5y9g7D*}=vr7&sJfA-Z){{9R&DeLgNEcdGTcG{X=Mg&f2(c`v{> z&h>R~3Mkt6S9+i2rk!3xv13_~+F8H;AKnG{FrlN3u3LK2<3_mo?L4 zb)ks2Xy)Djf5wo9Q&z^kFfXIprAF?eK#zqqosXAPw;;XWcCU(aTn~r#_f5H(Nsi#% zybVu2Beivdo5vr>jf&or(aG^J3(4wsPI4~5WdzffjMa2LKb+8w#;J!4t)~@ROjDh%wNk@E%&UdrCe`k%SxSc($%OI1YT) zS!X|%OsuuCZ%Z*n(Esmwnv$do+GlJsJ2;rkV-in@NXx1X_NgK8s?vzO<(q= zf^dpdTZWPf2;+b@1%NrDTTr|{87Bll8x<;4)zUBKVM zCGcB0+ft+RnG3NAdUuXe)_Q##swMZ)`SIQn5^{Cn^Z~pMov@f_xg07zlWViMMpTY# zPc^DeD%9M$`|atdT79BL#kWaDDYIov)mww`xUsPqeAeOV>3n=sfA$^P#F=0Bqhz74 z{VFjk5U4nAXn7T~y%{pmVBXc3&_>#9FnW`_x%se7o70rLc18OgsvG0p-%hXv6ROG? zX1DF9t@iW>xctTL%h&JSqObfI&;c!#BA~z3tK)XSw-tWjx#4W?I%JahuRas=^>fO? zGP)G|ox9##mGMk4CeIhQQ><*<3q<`HU!^(%;u8HiA$liDR3g6{Ia9S=+hen4b2Ony zVKH-p$Dlg^H*iZL{0_)~KcEIzBU323V*3!Wz)?8p@B4B1=8+2Nkh#?WO&YL%@f3d} zI3U(5(5%Cv6!%IF2;%p96M!Tdms)};q%5#F1*D`N0TcR33jwWmZvDO}I)+Y-X1Df> zVgh+#Zjl0D(_Al(&2Eb)Z_mw7g60;_AP-&c@FzOqrF#KIo&}kWmaF}6RqLkcR{}OR zhuu|hKoa*msQbzI0X<7lFowq*=dyRmZk!iCCW!)pDDrO3KfKRhFbSU(T+iaUP=xD- z{|B$|`~4{c2|-#ajRVB|fqxJK0Jry-wFqlLgZ2Tsg1h!ah_^i}{(QA|ZKO7Fpw}76 z$@e~rf4GKQ3KT2^5rBNvUvYhp==<>B!}{NLpUdIy@ECjqRJkJPRaT+UzOAWo_(4Eb zF4n5=7sUfa=7D5+^kB^uf=ADMUEInD|2;MJ3dfMVgzy9%Osp$jdW|aV8#-g6ip`h9 zH9KGVu8wGDd@5m!Xob5cweesgHJ1@(N5>UAIFB84Y5T|Si<`NXi$zM-k`mlE ziD&wDcI9d7%w_7=A0Yy(2(Vl=np747V6wxzaM{X_kUK2a3&*ZVo{ic?FYPv0DR02s z+;q>MCv$(cmyy=;bIVIl*YBqo7#ICzLcv9mqnd`H;-oBp_FYH%PBYuPPtOcqvN zE~*B$*Az^Y3M=;cG>s{0TLL9vfjQYGLp>G8I~ozi0XR=V7Ws(tiu*`j2tuS-7~T(2 zd>0=rZA)v!tA32UF(CIwm_V|E?(Xi|IHtSbT)%!cG&7UXqaSXcbfn<}g<_l!3=Od| zFa$dZmiW}x02J$ZsNw#0GCb@ZegdK7Yn?H{d{54H%xFQAvTojipaT6>8lF?#rSm)< zTVZ@(i*`hl4 z@)3n56t0f9NGj~_cQ(F}Ul0>-?v5{^<4-aSx4v|r>eEz<=3!>xa9!#UcO*FO1`!aj zJ04kZ6%ggMJg;S ze{#53E~+CI1bsE@RvB7Zo^91vya*3trVKA9tr|N0{_c4i&bLH{VQ# zZpUAe9$KG}Bz5|78ZQ89tDE3^JVFIGM{Kgns? zb||oV6>2{{yT6%JqwT|RRv!vl9s+IssJyx(=!xz;RZUSgpl$3`|aH4WrWf0K${)-aWL z^Jc3*86l)C5DFFJpND-yrIwD*Oik4>Hh$@PZqvE+=C!0`03gr>>c9Z`-atqtZ^ za%CLgFzao@c?{9{K5!zo{b-S3iC}-zpBv(qhJsv-rC>N{^jO$`BE9MT<$n55ejmLJ zfvvFkBCpTZ1t%HE+Y55oma~YCg6zdd211Qiq6AgAqwb$=r@jBM@W!$wTn!IX+3Q@I z_v~{nLv?kUL6V|}qpSH&@0z1`W?{BVe!*0;4%2<~?aSeUu8QChZTno>IMRM9b9Y5K zjaS!+4`vsxDWcxWD%VZp6g;f1ws#kG8D#pV8)u-_Dx)IEXodAiLvx%RQa4;-PUVM+ z+zB*X7?_@}cfA0Te@+MGxvwN8i;Ifnl$A-j?dVl0l)$o~#$@pR(NTPi^tD&7UzZja z4-OB5_}zGJAKu5c{$a|-oI^Oy?VbBQ!)&5(hdU%EJ0sg-y6lf{tXyhspE!-R#>cJ#hha`v~)Gh%?rIas=lUGvZ4L@6vkbM$3o$iFDk z9V6R$q^)^^sdDW{<5J9)X}_*G|4FnXzp&M0;LzRB80%~%xPQ$#G5K?CNyBTDqk~Nj zGCM_m3`OweJr)ui$_}Xqhr5^|s83#FF*{4m#;H+rw4f1j=$?Mb{J^JFr^M;DG5SCh zEIYrjP*z$>PfxGk{e_%9g5kk~2d%BG=jZ40>94WzIi7-OZu=KhRID7FF*7s6gkn6+ z1I*2Ib=k}&**<&_hQTFyoMcDYRvQn7LMl;}RB;e< z==HaIKjiGq>V8k@dQA0fsy{iYFpV5eQ-6_C%Ttk?67i1F{kKsr#fQ`MYAmeO9J=7` zZ4CNyO41S_pHXS#s=?beN2ZukiyRZ`@oQ2;VS)11rx_Y)Ggu92g}P z$E?$unUga$Ir)r=%0Frd1sV2Sw<^1ySXr^Jen;H}H5#-*BLBrSF)^<|>eOk#iU=++ zj@(P7Br^FDz5Dw`L;wdd5^KblbaS~BZ*Wg~)jXe7wv`Yh zv-tV_HeqVBE+Mz?55+?m{9VmTlSjsfO{~&84eB_d^Q&cBExML@wbZiNz>eiHYN>;> zZ=N^A*z#WVhKCHtIys&cl5IFC&CYNRqfUk<=ycG!941bda!@Gi>Mbos^EmhiZ!Gkf zWp(hYyGk)wGNgCsQ_ zx4%g!w>tz>q(RZ2@R+{Wootorlk3RCjnffs683!)Q)6&5?d|@`ePdKm7#1dNT47^r zE6U1QH6g!`M#A&>xu|fQ3>7xiCIuHi5UAUrD#wkzkBrP3^BSmWGDZv&^Y;8wGPX8x zj=H?L9lra{DM$krYWBr3s5wWS3JXKIQH_C)Jt$$KGuH0f$bo@3jiJbKUEAz|7sBBB zev9|QmW41|#N*djC~x|F8Tx?R4;DpUwfxw6*EvUrJUSHQG(0hKFVAn^Qoq3~%8O&) zsm94v5I8(HF*32##m3Ow8Hv0Z?VCCoFBaACqr1n!L+-o3r`IyVdmyx>pg1YRbzE!b zFme+y)6M8O`mu#KSU_Y z`*!(bbV8dbW+L=(Fp~Ksa4eNwXf>XE{kF-)FLSvY%Y8d%2?`JmP)7yAXlrUhn8OnU zOh`*d$171$#IyTl`EF@AP@8#MRa{(LV&Viq$NqL>{qvVEhGu4YSy^1>(_mwbD+wwP zO3rq8gbJvxyv|Tl#THdHS^s^X;z${$scufzB1}+wnfV@JNZ5gZ(?ZdK41nN_OD!!i z)V#+~jU>Rdo#w;wdbOQILBBBbBVq7)E*IMj)P#F!liQfRGmd|uTcO7FrY%>RrwFzO zKMJpjJW0Bu6GLuZLvU@@+S3-#OfQ~07Y{x@*gHz??DS=t<(%GKh-?wzrbQA=B!eD4Fj!kiJHGrXe&KmXSLq-sGOYSS_gP$q$xtf!0{NU@vJqL@2Y974 zN*mTM7n%APhFu8%FzX_tm%2KqCA(pgxJ8tkF=An@*K3*$siiyTWiL%-PHI_%jK7Aa zi$-U9w`Gge3D@%Vi(caGgoLmzIWm%Qy4CVW4cB$0T`_H}L#d8dSx@GtF2n~fCX9+z z4e9CY{m0rzqLW5878hnx4=AUD9Xya;C)RI#-GTCBU8n$%J(RG+o3ifHfq?fx-Kr%4 zTbLkl&8VLN^-QZUa$z68s1RaqB|vA>+Um1^ZXygkRyWa78Bt_4q~?K1rKyL|M6Xu9 zAoOZdEc_VOsWgg3|K95@!HaGlaav|ov0j9|90(B(w6oa63nKF4SCAlz4W&-X9z`}Z zJf3oCxK_~8DC^Z~f{Z7+^G#6-Y3jaLF$=}yIR$-eRC9-Ev1%WJEU9`Jaghm zE_AyDn|LE3oF|HkPYRE?Cep6c*Oc+mpO`x2@80fsKvrY0}P+9v%kE z;zseOPoEY;uAk+;)>FAr7Anl=_EgGz4#pmlnEsd|(Qm+5 z7+1>&!&z!CGpMfqzN8N8fRTniJv zQC#RcGCz91(oXdcr7LRsKFl^%OS5MQ`Ejj?3D%pw-KrF^LBz^8M*RZK589?oN_Z5? z&TmdX(l>hx2X~AdXlcss#Pn{+hfl_c>`;yg^5N0%mp`ex*>U0)4v0EpBR=e!qY%{< z0Qag_ndzJRTbKCpaHK|Ad|vtTC05>#ggt67E}>562`(#Y&5gZKX-_(XbI{b6w~&)f z$+hg8?+{8ITvTg(dIB>~Bn!h~By1qsUmz9%iq;^9_MpmQ;%9UfdtkV+`h|g_-j!c#6jK#=&_?;iL`1tr> zv}q`-(8-rho<0~ED9HPd@#rI>t!nJH0vN?PmmuIpCfO&#!omX_G{?usohp}CSEAmO zg<5_%;Vp}qvWEPSm{lILVZJXNa`@ks^l6w0SKUn$vp*4sw@@A14R7cF$X-uIdPGwY z^OZxLQASY_y>a1r>RxKT1CiJx*8UfkqDzm^h&{cSk}JImhFDH?v4jPiu)K?n9=&Q^ zVH&1mmWzl~JV*5Pzdn#vjrI4^aV84S$PMZr7)iYprsBWFcyeU$Oh;#y*#Hy%g+b&G zOMr^UuQlQRn@d_^^p;(RorZ>^}hE0>wJ^#@K zy)c$GxL)dj6xFJM;%2ZOZ=ssVD)`>m;uyMAJY*GZ;*IO$LuP}hxV6=$dnacK)oC>| zX+sfP)FYUqh+l$Ve$2D{2sEzxl9Qk8FC0@sEiE?zrns-XtcsZKyY+ zM1w12ckTAN6deb{6+sw>mluJ}f*kV&GjnqH1kgLiSf%|$Ltb7!FE4MvyPapa#hU_D z=X8Xll>4AY<$>p(H)qSgO^4KJ6qvMUQdOUJTnK&z^*ufQu`yZADL&tQ zI)hK};P7iAq?Vs!)N7?IG&3`)!kMDfp+3i$wA)Xu;A_Z_fIez_n&+0Kxz!@wXQ?70 z51UbtD>mkFOzAC7#0f9x32HYkwq=9-HO4oCmX1nnT$Fj|6z#J-5#V=p#U{mRbuTY3 z``ziLrlwLGj(6sg-o1%S=Xv|8;NqA*x=Zv}M_>QAhjUI=tzT?eoK`3(9*OpOk%A39 z09)tF4%;s!ME!JjDfD_cR)sn+QLqR5DI@pyrz>K3Z+hcGe)~{KurXgG<4#sKFXUm> z!-a`nNTZ$H{wpzFxB!6>D&zU|8;w~~QiE3Oq-YuGo8kxJTc~ZR;vq?O*R_-TRSy_J zwe@jDEm5&2)YRinyE0^pC&$nqd*-@Z8lMcLqZpcw5? z2LNqWP!e9Je$oN**mM-+v-2DupH{%+;^5#AxoY6c$a{H$*{7pBmR0SOOgC|!f$_U_ z?#~AKJVW($ovZS|!@eq)aN3pm#wIBY+e5s2yGcb}sp;o?qgZBcAwi#Ik}3i}Z>>LQ zs}W`vtKIJEEK>B2wZD0i-?kAZHxPjb70bt>1gSGW#ADyVfU>N;3R;}`B=?N3%cO1C zPyl=L#cfZ~Y@KD%E?|cSA zufy=9zXfi#V4#L?VBd!#4g&V3S7tR7cM&b0&4MqI6E-VZ1Xso7Nfgtf!cVjgxOJVPm;QPvc*cen%XRE?T_Q7u52`fhURnh86XAtt_1ij5h zCkDRA?Zk#t39jqzzi`L{Zylg%BvA6gCLSaMlvK#b^o@^K4h(^Hv7oD~h5#%s@tfb- z>CK6 zdw#cU1daG97s~y=Ho^fW=)xr`DkYU;UPBXee!DIE3PR5HA9lDhw#TF=MwqPJK;{L; zII9G1T<9<))k*Z>gJmoDa62FB@E~mOi3%}V!E?t&^3JewG(}B-jKhH z{pjhFakL4S96SEangE0$-ZI;(vFXMn)K35!&hjk? z2sqHoS?ttu&GA% zjC7HZVG?8W?TJ-ZKp(j>xokKuKE$s{CqfGs;2bGm?VjH^b2a!;mVzbYvMFZ)Y%R>h zo9!tm@|*JYj%b%bh5j!lI*Ex9c=IGaw@Z-(2f5_R;*fEt?`{1=PtrNEb(H`aR_?;z%}>5@KEK4aA-`kG*~{PAnyg^L{%x%YmU$` z!Z!}LE`ex}Zs&nR-cy+1Hi9CjRoVo*5JC^e5HQA5a?mjAwc4GZ`H@&6z_wX__eSU$ z1qzseay^z@Mp+iCMG2t44>fg0M8u;H(<>7MBfEzAV0uZwhBp);G_j{@C~VE4i}rG)`%HmIr^5*c2%RJitkx| zf=q*cJ4yj?Z2U<<60b9ZTc6Ks;GK1SC_w)r5)(4?dQbNj!UO>J4YeU(h!O<6dVYRB zdfhdxB0$6e&Kzi~b$+x#O`nF1u`fkssux`D_WYEdkzrZ0$OE&D8y#!ro)8nL%)pTg z310U~YRgWUm2)84_6a_;Arx|H%}P@68DN^R&ObtE)Nh>>HXg>IGN8@}%F;VC#$vx1 z4W);LNJ#ht(Xx<`kP5w02np}H#`Gef08cirvE2~kn*big3q|FuSKzq>md)9pUNSrZ zn2#3|JQ{I{ut+2#xZwg3^uFliP^S@AeJt>O?gyHvqD;*>Y z*4EZ;x0e(pb!SsUU1es_KOGNa7(s^fZVcCi)Z!_um7K89-dje}I>F z4odyU=xEPDK(bX7r@zYD;1$MHD1%|Ky{+xio&$gxQ(zxVO{amv9LDG;goLl^($9qI z8BIQX_|z(Z4YhOCPxVh4PpiU7Dv5&v9M2->Y66n}511fN+uGAp5viR?nNl{URSU_c z@tC|^qm?8vRPi2FAC+Bqj(5LK;?GQ7C@eWGZ4nM?s8gJ3=S*yI6w5igcZ=9^C^QQzw8!f2aIAI~WT41T)Y2>b&Q`v%g(JKrhZu@!%`R zXPN%s{w)}ic$obToD<0H46tqKu0DQhakzzW_EHHrWJ5=iV?d-TT>YuSLK6*i zPF|)>!<_Kaap4BdMesK>4H&hLXx*2`&c0AyS106hv>8_f(95&yLRV796Ri_cdz+1= zE!>Y45x5u$F0C@sFn7{=vOf;xozR%U&TMCU48@2!*gyFY0xXO>cXXGp6ZnC|XX1*B z=f4u4ckZ-zUvb}1dg+Lny&CC)(HcMDy8A0#hv@Va@`BKpR*z|Szs9)2=>%8aeEoXfXYcntfj7H2>$A|`XLucKcs06rd#*9A%J z5D~LKbGvS27?OZHxcu{av>F>Mq7)B#J#9sHgU9i7 z?KVvLc{RxyNHk~gx#`?dhVBoKYrO@(tg_c{e#v}|5_ZoyVVMBR@M*ZFe^;IqpkAhX zJ|$#qRi5o`VumC2)hVgiQqN?#j*U}svyv{U^G7{?X)L##lQKQmD(Y52d0^Rl)bQD- zxuDZcQC+dhQv3CMA}pS}rDiZi+?1w9x%6le(mEWeQzE%da<%tFi!PAtDad0MiIJAV zT*1yJZOSj0$^!Uk$JU1(Ztd+NYtay$UUUV3sl8*E_2cA0dsxUFu5dtSV+%Xp9i`B$P2IG9Fv;aK-z15Vzl zr?W#kar#IX4)zb?Yd4#9gs?pbmd8f%HQ({Lw0g^zcxWI{HX(^)2YsnOFo%EJx#C9h z9gPz4?&EccasE)27y z`{sx6><674+^vtPRKK{t*FS{Kn{>4W&1%%Zk5K~4tBQwF0#-3f@? zcVC{joFUF!vGkI|SJQV`+~!1|-q$c1dblTn7}G{mfTiHPbJj7U6F)hU z53w1r4xG|_>*uw+AGHcMksj;x4f>rCybJ$lS^+to_T40`FKi5NcC)0QV|;I4P4hU9 z|5z>bJ1wo#YgVIWV%j`sbdpezQ0X2{I%wAmf^8tDWPbJEE6U(kI zGTslze$cQvmAH2mw&DRZxyZnH@C1ulf3O-i4*%LA(_@^C2yD^QU0Fyz051x(*X1;F zOnnT*0V+Q&?BG2Z`F*ccyobn$0~!STf@C09&e`Tpz2ABUsc?WxKMY}b@)p+s8uvoy zt)y6M!C1kFxgf#*`uR7h3(TY`U$E8n_m2376-8#+iuN)jH4YRN$8cuv_-+&Tc-kuO zNP;*DCNZD#1gtV929*?lS7Yalkw!-}!5fw5R@9Q13ry zSO!su>^oVaJ5h1NR-480b%3|e5t_8yN`c>Gda}Xx2(d5_fe-O|Eyy)udO=+N0hEsz z-#!$d7{HDK#qDLRCvc3DX?rh>EVW$IUb+Rgd!h(}-!YJZ$Uq^e9JsKe$A4hIG<@j% zmLL%*#Nt3yQ#4sH9`YzDVYO3vr|$TkDhh1C7{I6SOxn6#9)ro5`G#Md^R4zp)Cqpd zCEz`!=a%IMspUD?h#gj)%NPzTUl-B9BL`_LlXlQneNnW!-Y=VpBM_}zvp&44=T$v% zdWa&P19)Pug@*w8`%6Y6Y7D2DV3&;>BBxT*O?SVZ3ev>DkUqX2<-}9&s;z-%iQy|T zPJ~a4l$OiS~cbCTy^>PeFTnZv_|}VHmvMP z3E|Cm8`WcwmIkz`yBg<#*TQU|YxtD)a7{*hy35$sNXwBY`ouDJ0}qT5KpiNzKT0yE zsc0Sb={n$#3s#KMR>$es7Huo|@N17{EM)b?-a=Az z=brh}eH33TEDS7Y+Zo*hxJvM!V?RKiAD!6JaWNbDf>vH!ULTg6&Jx!+U_t=4-kNBm z=y{5RA<`H*H}9HzCMgL*M-ZlTWryO(_lz2JZV&0D^d#yYD&k#g@!*$R6U}DC-???J z)ItTv+@HcI_Ja?@;!#wyMH-1GT?_kq{lK_-yf4x5SSyw28r@HR$A5#(lry+U?qo5$8w1r=O^8&uG`-kQDjTY|0 zhiZ^BLisx1N;{D{d6pU-X36tpE&0uiIUc`it#&xt zBK}SA$*81bEVnNfbn6cj2;9j#AG-k`@dl-h>3{d{-d+oWaEyOV`c?&*xa=l{(kA`i z|2`v$iy}V2_+Jl5@Gf^bO4?>V40POsp9y+Gp1Yf7&LtbeWlojYW#H`DP#C~Y79>;GR@ zajBAlTHODosRUk-<)E+U6%cf_paC}hXMmJNUw2suQB&39`j21#=Y?HmDCw7f0{ z4d!~)o*}57o$StjWD*sf2nOQ1NG0AsT`3ku)_C~^%t*pqn zjG(q5@wbwae=bhdDnSu%jZH$VxXzDzbgK54h;TvUE>p2MF3b}D-*^4q1K)6@1K}^} zLXC1MsJ>;{F!Z;9sc8bR4{4HH@ri5O%Tc2a%HO*-RfNF3@PCP06+S}R(NNlyJGwR> z1CZS9(+%eD!v&Z;JTedq0&Gulf4`MR2D{amlrp5KC3Ls7s;-u7g;=->h) zNqCdr>a6{2{s~@$w5CRb!&=Beq_@h-z3oRNTuxis%-=5e*6R4bQB%oCeBs<_>r&1l z?gTDU3;-r#0W0*=mK8q~SKXdGO>70gCj*_Y^sHvWj49gL>-1{HAGO4sTCV0wwRr_Z zo*m05F#YGukLeYyD;gi#PdliNEx!`eQiS%czrMtmFg$)!x>%kBIc!;hea<=XFpa1> zH{(9pJKAHDH!!K`8>es*cLdgul?)_s@HgTRgJ)Kk7>81ck1*>t`giHUU>D+}Z#KWS zD4cw$Ye>tNB?C>HKkN$hBYkMTvNI97Y|*kZ5q$GJy7YdQ*TO{Nb4J>8w=8ke&s5%H zZh?jJ%#RC?tniNBIo|otk)FXFY;6~?5Xy~ij_f6ZGnJ1HP~;O;Pi1ULYcrWZPrV6P zs*QHlaajFVmtTtX}?{Q2Cg_T)>Gi&r=^yrrI5-S-mK2NGb*z!m+=It9|h zUBTNLrxa%`Vk^N#2Zeu3w+ z#Mv2}>=k!Pl9mD71pHvVWa1)|t1EA5Zpxpq5Xg?C#F;o{XWFRAQViT@(gY`(`5QN_ zz1ZmM(XK6txVS`j~um7icFes8F^9-^dzgD0G!reXKhRs4$#YZdJ}(GGys4v z?gVqLlm>F|f(mdvRRCOh(d*lHGUxDSH+2#AiLJ$J$g-FDQ@Y>2uAV!(^vM!KVKBn4 zX=)mvc0?(~=(RPiUC#fPh~%6d;(AERT#Gi&Q?_fjHM=f;X0 z)^0jYa!}yAk~MbBIg8q(2}9H}xiQ33av`ZJC7Tjk8w{TxwuR#f8iSG)Riw0+ny9t_ zk9!vd84zgov~O%AOMpAOtp8Ariz30>FUWz#YGB%6Z0Pq!EH3`?rOmwr}SASx#M_w-tqDz!Gh{99vxu` zf%pG>6(l$fINq?0nWXlLP?PHGrb7^3MLlFVAEI6@7p3aAbLmiAn?##IU2jfKc5~uj zoj>HIQ|1hI^DkNiXON_mFq}hPk@6Lcz0?->bNPEu$B=PgmQVgLdB7=|=jKLUE!=vR zkeaA1O(6}=j1$|~voXuWXuIMr+Q4<>7d5u7!(j@|d0x=ly}wL^Hh1V~>-BYhUDqIg zEtCJCs4EVT$^eGpl-5yUx9K3>=v$=i%694BJfJQ%)cIu8i>ROwH=; z5Sg=T0PEwndkJI5ldTaJ-0Y>g1HUv$Dvv0iA>qw(9QFEs(e>Hq#j`~n41X{W{ z8N}PB8p#$?iE zuQQDXUMEgB>&TD0rUDi(dq23z6x>2Ke!hl6)(#D+RN=uEe22uhXt06%|1t=qBfNFJxvMH=z=&ue=cCES(p0-L30C<0#AbwW}y6q&6#5Y$lY#S;{*~vt2F@XIpuo z)nR#&F^nOpbI;AkjMh2cI^O*coA&JiOi=ddbh@Z%W9+)nbf$p+YDX;ni~o({hn7Gg z0BNfPpjzD2Td(XZ^541p!DgXz>bEmZ4`Zg!NDew6M1zS)x5B+*Kbc%hUT&_3?@lIOKv)Q8#kXfSkRyh&TD_m3 z{xXS96WB#6`=npUCy9vyL~7wS^(i$iZQ`a)d}R2gX#13g=di=4xFoqQwP!!ax~ZD~ zmq$L;&gUXy1~61ts{knJW)b+NN}LdK8^?r}>x%BwI{A+~v#Tzeq07|L#M5M8MyS z&mq1zT9gX_79hHq)v%=WLO6y9JYr;E0x`LTu)IA{Q&YTx&)ab8MVID=L?DqeQcoUa zp^}vsGLp{m)L;BK<1Vr-%W^xpG;f({xM?}`Dc3RPt97%g*uO4u&l1j0LfWBH1^h4H zHQ3ImzaL(tLmfiG`Atut;=*mh$>k{<=yb!X3)2cdXuCMG!lap#Zt^YFATuXp?%wL^ zdd~Vg^k5Rt8@^JzS&cOj6y$g|RJqF1QiuZo1b%iZ`;V12Lywy;k;||WsmfhVm5c2@ z!kOupWg`nO%ILW(AIx8D#YE4ec9`(Z^1kGU&G@y=1B%dJ8qQOmo(4bw%4#aw8|Sg5 z!(AbV%_cX$Zu=O>l|GEEdGJzNf^ke0!ePUYi4$K(Ta`!4qVa)=v`x|(sP6T z{e~Ta)pii-oy5y6p~Px(pp>B7)hqJkewFrsV^DS?gPQ74gVvYTo$vA9bZ6B-Hi|)E zN*wlf&{akL zx%|k|m9zabrvonH1@~Fc;**eWWYT6l)YAEuj{5{9+e7y1xV2kvuc|M*tHT&uHLdeU z>Mr&H`8axb`>^HA<>!-S-V2q3wWQja@M$*i=w90#n**m{Tg02x|0ks>_4MK7cwL%= z2@Z5V{!K|bdLW4V!$47BWai>hq-R`uYcSL=hRZUA_x>WDov;Bh?^ zPh7MH_yWgilBm2x81MPk$hL#y!#e;gDAR@7mccDmd-g09bF8N^sZ;qhlSpg_S5->D z`UEHea2WOlIlwbKL}%?NPi`0{H{}P46VpB2&f+d#{XjRx+xX&2VK$o(HyWk$$Xeo5 zh;f!#6c9^ZvI(fwZFs095oq6cbOZMWn6R$dn2F=eNEHKR6`9$SSToM6)w#JTFj8^* zqlBcSI)hmIepo_Gh&k|0j2D^Cr|HG>&IP@F8)JMgxE36i$`%4$xsUw66t9KEJ5#g3 z;(fQwa`9^x>N=?eXDyw(N(#0E2+J4yVBGHXB!iaylUkViiA-V|i1wftCQC6FXOT=S z?$`Q;>q#ryJoSYZZYrC{U5Xa_CmCK;myTJdvyF#$Lb6uU8f?P_V99xTyns;L)zwu; zXTi(s{yN(9>$S-;y0{(^7G1xJiV8GX^0#m7fSim?0+5meDSSC+i*%cfQFL^x-5-rJ zHTDERmOtqNkW1K@IRS}!phO3vZ20onb;U~YXc%ph%R5(qGX+I_eN&Y7r4o^u`Vs># zhTy|U_Q|geT4r60!jw9@{DDfSw;qpy7DQ^TZ~L}9#%1{cP5tV$FZ=>=F>i>G$7Iph*CKz5SohV_ z65HE0vZU1mA5eEBJ*|$`HDZTZUSyCZ?MS2IbId+c=BZNLgNSDC42PGgjRfQ~u1y$3 zzSm5es$c-<>=RqZmR!eG~yU3_*|WBynfZzOVYC*}w5Hk0;q zMB9-rhP~>qInF1_&D2+KCCeSl^)9{e7AcSg2bR1w5XPijH_`=xazGPFL1dQfcVuLO@(NS2QJ%v_Lc~nAd~hDK-YfP(Y*s69no6pXo&J9*IdK zM0}yQE>LmFHb*scrY>8&e{C`V||@$p*tCc%{dA1rD{h5_ZY18l?gL}&FpSa zHslmhm*=V2=V+W&I4KDp#}_i~PAoN8;Q4XKL-Z~1CmZa5YzEF9Yws_1&Pag8L&)hZ0(i{tzlH!-k#0Fys^p6 zrVB(+nepOJo1Z$C*UGlMy?p2ajf(n%HD)^uQG^%Pq)_{jN4d_@dc~2pxpiT{HVA^r zZn8?hLTY)LXN7j_Xm}($XQCrfRx~XC7kh6R6<61E0TRKXfe;*mySqaO?he6&J2dVR z+&#FvyK8WV5Zu$ayX)LM&-;Gg%vv+QX07=({GnlW-+RyLI(4c})!rMy7prrX|Lf?l z{wl`IKjvMrmjcGLFe$f16kjh^cDCoO@A`N!6GF8^4%nfuNoE_|d_2=}e#2kDhMtgP z*SG#DQ_kw$f8w7t)(i)K+^syn)N$_&yK~y_#UuX_?*A0WFjoOKsyaAMnp$X=(+gi! zlz{Y8>pV>}bOBg}p2||p$n@F@=Rp7LQcwMFRecVAH5fYgon@~{9|3TT;MMBtDmT6y zz~xx2up^Tt&>@?~@joC$MXukXFx)_soPN$2F>Fau5hafV0Kfa{9IkF)ceQ*U@8F=! zbcT#lQP9RDSe?8d4f_r|%hOvy-z{D^wWH@r z{hx$#J!Vz9+O=GH_1Kz5v%F9{@(VHG#kVVL-#rx)xJ6JLy<_$C+TGS&<;z_j0yOFr zRPP&IyIw{S@zUn^Eo*qyv}KX98p%rINr1cO>+HY5)N>5+(dd&n&?)6+rly!V zIkT7zgUsBf14%Qm^8qy>+^wJA--(VEi~}+(zlg@i#;E-Bu|NUVRU(*4RmLZh0`Fsy z4R;l~-{DRwCR~UG_Nq1$>CcWE|I%zXEn%|%6cVJr!~CTl6}juDh}WE}o&0aSp}ddeO@yA7z zYwltjOc&eVZJ82ep6awaYJIY^?%(crxM9-_KR-BF6OGjuh7AxIsAy01my)Vtz(x~b zqH)`9Ng~l9-lSfOiZ;;o9kB;9zCux>rr@aC?;1cE$)`q{+Y#qUkOl zA98YXK|w)_K%>y`aAj51hNdQYIk`d=_Hb|vJP0H6pT>@9Gf-Ay>Z!2Elvm6eDVC0~HUuQKyAADhN~4hJIG&R$G&)lsYF^fM&z z$z4nJOYZrfZs0@q`^*3B1}g zFf8Z;CT0-KBA}{bA*utQ+5(h^>{Xu|wN?2aJ1+s4U{MT)1yVpwi5E^4t_Na36mh1h?gk&MbIpy>-VXh@$*4#)VAJqKo3qm7Odi<>Pwgy< zTHJQ1s&BN&ma(v<#G$@?wbl7#3;$gB7^J`)IExZpc%NEoq2Y9OabT7=x|wp7>9lbv zOBN5sOkjxRUCtMCWZJDh*W_@x(!ZJ$Nh7~*Ju^_QIZf=Iqe}wk%ozH%=loiT5ufV( z{5&-^RZUGTF1>mC-`?-rQ~3&1$vQ-F_f;Z5gN7W3%Wl@9ifZ^*b3ZopAbcw^yp&EdiDG z)4p-o1OkE$+i)xcg7A*B&C6WG;G3hB`{nPfEz#8-qy~&=4pjp{N#J^_7@RGW!87~w z=g*Lvz6yX(cOrwgZt*BbI2ZsxZIJB@mB4LE5OYk-u<%Ot?DzQm|5|C%iE>Ep2&GOBGQY*Ny0sG3)^+4X)qjJM=JiOt{63Fv^$Lt%S}1Z8F#pe`7a z+596W=9`t3)wgd?Es_~BRIuN=Y;XyFrft{ycC0YKGjtc})2g7jrwW8yMB zAdcoD9s7KRUf|)SmeoSrE9#cdR7`T#u=dG&>J$aR<V zVmd#@U`Ej}Gs^|AjW^+N6L5;NO%4beCoEzh!>Ng2>lAeb>Ps_xbP?Wy=rFyn=)JJ5gj-=<}W3#rVBXK!*jXX5kj z1fCZzcQo5NbK2|hDx5N(p0teiWZI>jrv{St@mm}`Djn-ZHz?c2-9P429QOs2B9^_R zVrTbH0I7x2+G$95cwXk%dQ%lx*bI<)ii`V@G-^FvD4QkVMKw;PMO7%1LZWGDXgEF5 zz5i%@SmM9#q%igeC7lKnEoy7aXsR+GXXrh*D;5?O%wK98BxK|%qVvdHfOBOg>HvoE zW7OsR$$Mlt@%Se5qM^xVapgi&Aon_8-}`9(2fI&|)1cNhQD-pT8h~Tx)Dzp`CKE6H z$nZ%m&9sK)ls_O;_JS}0lHqN&DCx9TM6RS_b;w#8{zW!JTW81w{uXJWI8Q3=v`&V* zIkBX#6I!1I2=8yxuWua&Je$B%wO5@Xst$gct5W0=LJ2DTXwP}=F1vp&)BAS0S#^yD zW0T@lex&C;>xK>JTMv@sTNqANRCkir;V4;<%O z$4Dw@F=6ATY9>@ZTp{Lg7%&a=QHC_OWV$3R`N3_I)&N+&+{SNAboBJYOG85v9BGm1 z{)3x}c+A-P`ue##fVzz$Z~=>h8Pj{O$6zJk|8|>2nDeiibPdpGVH+U??SX-T0E`dl zOb2dC3czS}M7SD{1!SzdzX1vCNO}dZ$6EJ&qpIPeX;te<`j+t}QCSN$Cp`<$>JtOr zl&tktF~Gm9P~JKqbyiP``4V5$`a8_zMp&aA2GUx_Qb_k7^LiYd9U{lT_9wb2P4*v? zxZkNN%2A*5L`sP@X}4Z=+2|RoZcin`|F`MVpR8(Yzg@Hpe1-vPk4(mhVi(*T|NDA8 zcN>pdhnTTD4MK~b+u%M{6*QqgS0v z#1a@GG%fz!#aN|3GO{ieJ~cIUt`59^71(j_*PHv8j0~OmN}U$%%Gg*%SJ!5KplO3a83JGc~Zb>0eBc)53#e`36efqLKV>5({J+Yh?> zNFaElLL{b#|DWUWs45imTQHyjPP_~U0#W!R9Q4t%&{qfh+rMDj!#TF7c*q*9^+FbW z(A*Z>0K$Qx4;v~BoLhv}9+Q2`;RX91Tti-YOPIP0{&TGXRh*7UB#m)>8}xWznuGJU zPU?zU97-okNX+lR!#99CyCX!E(-0jlE6>@Qh~}L48yP)>cO3mManpZYu(b5$#l;4g ztEWkDNG6R#HV(ZhAhrw)H%W-jD>1xY7%R&)RFQfR6eX8KD>|yvGr!+(ZCu-|wn_<6 zhp;Jf(X~dX8q^}=zZFH4UCj% zW?=N*{yj?9l>1*65cCKC&UJpeq1MbFE=hOgVSU8BgK?^Nd;AUN)*_XFTKX`f+t~-a z`N6bhrFfs_heS<=-G4USe~jdKxk+775!hXG=0ek($%hs3FVI135|osbsY03>8j`NM zAxHo)ty=eow#rY-DYj?|54PB}eRt6N4q{4^^f7*Q0Z=g1PU+$GGwR zk__eJ83%uRUR?tnkHy_Z4hKHF^Z@+~%G#gsf6wROga7cJNg)-!#ONLRfb(@r%jRH< zX_>#y#ZM}fnF%{b1#WV8`2sv9eHe5gbyQGVNq(PN)77URb~jT%K|SnQAnfUH{6ZWG zj!|askcX3B`M|pvE+*GQJbZ6>wM2$%9@snL;rHh#()s1;5O#>;{V38zBpoXN!z-5e z;uT~=8nrohrI*wa>~7h?rKki5bIvps`w)VC+o}jLty=U%R>R2h>n~p7+kuixf$Qu) z#geQ<3{YZXRASKj(nyd1w>YCz>XCXda~@RnkX4(uNIofmCo%dS*vRt~A;}!psQ^n1nY;{u&EezcKOe+N z7!u|p6ZDDjWSZII)C1x~)?0{_j9MDV|C|ONV6Hd-VcCLHgkT^<0&b@a!*Xbk-s|Tx zND8*cuyp+Z_KXU@0{tI5QR8y$>?M`?vAVPJ;q(nqW#FOW1%*2*CrWY!JP=9t>7181 zTKgu}?mqFI_gd!swF`g}q-9m_8bq6*>}>?Gw-yZZ2@D$N8K2t zxqOOLXAk((M#INW&)O6ukVR1ep7GAnmDRhV$G{D-oz}XXL5}tO(?AAbHD6UUzy2>t z2Hj1AY67RvKuU@8<4d$ghAYr-+IvXpLVGIss2%|dnTCA`>DuUzQjj(7y3P``TL&i6 zTSepktp70x&)f6z?!qN8;ozFAq;)e!jw?8SA&7P;7tW4#a!d zhRT*eLqI1LkX8fYxZfcAKR>^3jdmEoQip=!I#DgB{fE^WPq;Cv3I+whm48NSV2%M) z(q@Aad@z9g$qZt~%{lA9mIQ;e3iLNdc1oJAr3Wl^{vQ0))K`PwTe&84t}8E6J=|i= zJfxbo!5o@jFKef;rqSU4Z)t-YpCdlce2UxxQbVJ7idu!@i00G(xVb6gmWhZEGDvkA zP44u!ATrw8YADJ7FxGK*^AdaI?VM_biHjI+yTNXMy={8UbMJb)_XOucnwkA3zUY)^ zuWb`0-;M5{AU+0+gvSxl3;7!lW<=v5k8h7t;R7~>yeb+OlhoGMR#aH1REmoM+O3{V zr65@RCx5)NqK6045taHefLsj!kug_Ak^vu}&9TCeMK(0u{Uvr|^an0rxUuNZD$Z%B ztF<)HYuz)V{#F0-uN#yP2f7yIRS(~>u{~OM>iLvbRch{5>2q043KH7PVnsy<;`v3W z()pX5j(cBKwiZ6f&K6Wmx9*oM44gc+qY~>9?vdPt9So*DmX;zfJfvGG`AkziDC+RL z?nG2pLc&2iug`Cgk2PLiciyMsUFo`MnF%t=Q8XtzWp!>Y^|};y&2O8|MLIIyq&mp1 z#6{ixwcQX5&_&ma$Mu0IbrQ|i+faXhVSD>pGdBnLU~s6P5`(H(^ruJ|b6Z<^9I}po zaL2Q@qspw{Jx;=`Qu0A+qrlYf{J~Yi+*T@IAk`<#KZe!j8cyWu2m(Gq+F0+*ztNN8 zLk}u$7aNzWuJ5r$1Zl%m0%#9+4+B@&zRTBB8;lrSd!t<;fzG&*P9@R8&;#O>N{d)dJBLp!RH@_v)JSM-ctXC{}s~;4-!qem;#B= zYl$cP|4Z@+t1}H4X8iSndlcvvy;vXf^)Ndf^FUruRbexeXGE?CKXbDI}W40tv;A{4| zBZn2|Y7S`lMBZaX`IW&=Z-}ErN{(~ARNt*_zFP(XUDJ!^gfozaoMFulTFK2ZMW9Y`@aUF zCj#&qm=v^K$QYn>R#td8kWECaTieBcMgbu9kA{Y_uDXb!VGnL?lwZPk+YY>sfQn!kB9i(=uw>y%VEO zOMy=_T(`pfJ*FW37zMyk0c}EoEJ8dyEn5y7dn&*vM7f6i*|wM^F0fIMks|>`-y#6S z*{~a^<%|iS)zQ&B>tsXCcbI#AsBsCLsvI*zpXO69K<7KC)WanKYQkb%G)eE>29N0{ zJ6mZ@7UBd)PxBw^PjTCf0Q$xB!@&>2;`Ae)|2K#%+8*hXj+S#vrJ+8(=xxCc@11!& z@5aJ+jqqfKl|DZ&#QP#MPqB|NUM{CMXzZGPzAVQ4)3V$d0iFR&Voh1q;}51i)<9hb z1mGsvijQ#b`OfhBqqfTY@NT=Hz(mBZ0AtpZBnPBe2jY6QcV0K2rLHBw*&XOV@50)+ zpmh1N^E^jA0Pqe+U?abAOwEo{akkQa6`x389l0 z2<3q)f6woLa$re`xtW=n(belRFG?{*ZBt}?43r-|lLRgb<2VPar7LSwwr9E1_|z+%^Wrlh=&OCd8d_!7J8lDQcx!_wlm zyLKz!0LVriKrCGqoj2%UDjYK;;#i-YzU!^*RVj;EV`Bj=fZv?}#DOX3y z&3<#wXnrE9;xTH`{S}S*>6huX@_Au%NCWB4N@)1mp~3v%tk)fsl8u1SEp`>3D|+3K z!|?t52FieL&JLK02|uDLh5?1P8w|Dm@dspRT(*sAKxSQkadT{8RxYR^bU*haOAslg zw~pmAZszWEKi=g~sLpn=?SYe`Cv-sPrJo`M-5x2}rXo~*Pu_GmRBySkaMvxD%#X6G zp_r9HWOfnv;ydK-y59Rxmw*4a) z>gluypH9M;8-^e0UjumEx3ED`HuP19wRK|9 z%%cS3!5KLNrnR;nTC<-Vbax22&neX_JMN?=*zxXvf}h+UE|0b~8!R-mv^eSL0Ks(A zM>@kqkx;n|MB1k>6Bp+NozF`dMawRP%T9k(a?F=c40pef=95<6M+@&?Ef;{Uv2KMDC5Y$Mt#|N6oi-OjQrGgA$K4c zI;n<4(7?f`VTAi-&fV?A{W|@A?CN@wxvcWYb)tac8wHKH(RT^4Wr+*BprL@q}P?D>Q*CKJOw%w8V@ zG?l$DNGHoa3k&qO#nLnmw{!#tC(Pf5@s;U*%quz^f~*D0eOhXV*dsVe2Hs;F?EfA&CFwoi z3{LOQeltX`*&9pi*;eM^W^<>W-Rv02#{jMYm!HLq=kNO-VTosy&wcSDSEdmxUE^ECL{BU=!EHrNzmmE(PGHJxB zf5L~-(jO~&Lx2B5dT1!~_2+pW-nlO=$~r4z8UEl&JaAT+ z@^{v0bfi2R?6$6SUYY*A{R^=o`|-*?4l)#cpc$7GPyHK|im7g=oq-Ola<{eQnDJQO3n|#mRc`8!x5;LkvO$sk1Q*Nb<3vqO zKbz36Pg;$k=39^J)w<{JiZt?^5RR;1(DKVaIaBfoA@_GEz9u3_Y--k#R$pdw|L5wn z!%~kItOt&x?KRne4 zKXI5t^XI)GdJ~klP-8y7-pF_65|&(l6eMpFz#KyNhwr9p9Au zWuk#jH;`>+_B@qIhbLJ!je9S`w$!0g5Rsi}E*bK5R?Jk7Q?;QVIw-GM(y@@uV-e&o z_U`jrB=MR}m0t~*tCYZmk*Oy@T$k&#Z+sE@#rexFlY>Yrwa#hs({Q?6aiUgr_ITBE z^Z50-!qVK`=ssjHPO;)befY)|MsS;FIBvDNu~z&#HlRd~4QhbyePgaR&J)S&v5)!R zk(}GwmjdLe*N^98H%QFTy*Z8FxQLJsB)`~SQ@s1cA6fM8cl1%bd^v*86{ak$jO;&G zQoyq1!^og6Fs;_vM@=9B?nwG>fKH?S1iMaBSf(3XOpgtc@L#h36Q>+4d8G~G`;iI1 zKi2pV1sS(1ZZV&@@Eg|*CK7DEkgRUA;ZgbZ|GXvpSBTiLSeI+sfYmIjEEB#3V$i$a z+1fA@YffAtnWIoKZ-eryZvG8MRI=&m5lLRX*;L6E>VTVG7h34}~*w`xb z)B77Dk`1e~ai~~^H3S3?$43nWFUnBc-Y>mXeM5S^=Tpx^4(fUWChaeiuW>fI@FWlWXdfx2?t3<-w;6~ zi8C9|{TAdatT5!Tf_`we+*NS!))@VsCbtvK?D@!h^vmo~YUN_0j^TyYEtU!Y$vN$m zBF?AN+vWHv7H9P^iMjfd8=K;h^a4&4qx$WnU8Q0}O-iV45`eXrtg_QNJG z|G`(WNdu^%x4nuo+^^~Zs)w;O46oNuQ?UXR?YPe<>$z3|V};5#I}e4za#Fgn?Dz?5 zd^gtWLRE`#-$ul$nepYLUFXWzoR2aG<#jW06`+HJ>99d+eh{jCi)j}a3b60n?+~iO ze6LBS1_IX`d$u}j)&zGa4Xt;*H_|5~PL%7pgq`kjuSI;l=Uu0YT8{~{r`1Qc_9#|J z;z#3UMR`MupP2BK{2<@Rv7%r;8yC7Q!^YIG|h)i}r;+hjIU`Kg&gC*dD{C|AolB!p;MKN17rQ!E^^Ajp4 z@6LYfN)K2xw0AoT0Wq-+_tzA*$GS_l z1FrXc`ro0%l`Z>pan#s(eR@s&&sECLlWQ%W+O5aL7)WCUsq%K)S#(E$BgI#T!fR+(m0c+3+W58O1t~qTSsN<+g_TErYmJw$+c@$z<$35RnGu5&s=xC1#c2nq4+0 zzr!0M6znL2@q%g(CxT#p@B6^TiOcH?k*=^O6U5vC_lGTI2cX@-%uu$}8N(~{T6>tn zo&ZGcx=o*Yty8_Jw&qpGgpZ4g1Pg?g2?~ItHd#0BSu^au8iV%b1J(<>pf_Bqqi>yr%OrZgAh&b0vR2TZC&_vM-_rkbB)i~~b(B@ro>r;>b z6aL75W>jU+|0H!8MN0BnZc-a+=pEoL>rSEtm#;E%91r#ek1JJO=N#T9Q~1X4^$KON z(Uc_(KH?rOGJ4WY@;tRoS0JqGyDi`iP+)_oyTR2Tki_9!1{%ki_kF zfv#Cg4hLQw&#c9z%a0AFU<}=3V)1om&Jsf|-Ssq+-l~Pup>=x>v^;9n;PhC{1!0o^ zxtgTP!m1Bod0G5EvXdH6RDjjIXL1<6>CE#?K^D-1hfG_K?rB6VqmO@mq$PGwkv8@g zCLE@v!H#+@crRl)#yIcX&A#NY+8KBX=7h}R+Ovh8qhW(^{(V--g!v=-lg-4bIt-{V zz^LE!b$RP`4EttR4~}tU6;US5H&l$US}=W^_%3mdZlKqN%-w1C6%pOrEx2az@)KWK z7=#E@giu9GzDv$0Ym*se*hRqS{Av#&I%xZj9ae%d`{nrbQ8B59gfZ-6Yk{R`=lB+4 z_f}RCi7vBC8S;Oeq4ny^V?#>C^6?|{l@+wF0kBYD;&-eMc3Tn_j|#5u24d|Pu1D>i zTwyTE)HqAN{hWrV*BqSGJr9VNG?f@}iG+1{SSSUz2H}IcJlW$9R7WqkS8A_$Wdz;m zg&wT4IBu9oS`lE{DZ-NyneYrj#E&$`v%aiU)g(zjF2n$)F^)>4>?c zHn4>smd8=Lr}b9**Sjjo&z}s21KJzDmHrkQ{uL_gTOw36{23?w)?)>#y5}U5|I8C|Mk}_t1@D0LH_*`_2=3xLm4y5IcvjlT%O`RU_tgpK~cQyp&3h09?h<@i)+tRW#TZWv@( z{bRz>LzR61CMXa55hKlL+3tu$a3$rfHzx2kkNCVkbK0lsEqk*{-y4BOz}R3XnJeyo z@mvyS0f3uDWoC~yWhP$@h=L` z`C|K&p*~auKRm|y5mu)acNmHK{r0fO;@SWZoX9^UWqLt2RrJw=vHqthgT81#cm7W7Hm=j zvw!FHzG)(8m8(DGJGqF2y=sU0oC7S7gS&I#v%&74tx>|ft>fn?p#~HbaDLw8I<5ER zlSAa}X)P6N?sWX5@tJ5)RCIv1q0V}hO{udIk1Txm;K9J_bIr^uDTJ!>+i80u9HKn0 zX3O$z)MbvsnJy!FI z&adV`pQqmRzUlLPxIJ95ro?T!Ee9+S2u;2x5gPn_=O-1$u`(XuG8LSA^!*P)f~4)-0C)}Z}DYPdK*%3!TE)$-isy zAbz4^GpF6DWx*)jPplpe%2mTk>ki0*~sFBy+L8d~*Cq?O29qkvk*nt=joxPWWU_j<# z(UWE`b+6BN&EO4HC(GramGfgZDJ`;AVntdkU3$s58W*vuS+`B z>%6|&@#^M{*5L;4?Bw_^i%$d*!bkWLNu&TTK+RrfV--4sy&R`q%?5`lEwm_68FdX^ zyDYnF({%VS;kcSQMfx2r(Lgmk?6h#C`hmh=!zqcPqrf+U52kn z?XCKQC5xlZ5y`Esw;|BLDl=Pi@{{$DKja{-(2VpYD*habE4}HQN6;E&JME%53AqH^ zIyVroG%U;GUv}Yty5I8p-37+}g9R7{6!4=7`~Uy`|7aeB=>zs9LnZgWJd73u9L=Dd z!%N~^C4I*nlDgNt`;M9KEjyai^_AeB+#UCS9^eRTm zlzfUhF0p`ZK~#x^`$^?%_*-&C)!I;Ksaqv&5MH*j6ml@r_3ZO#QPbp!rgU_ka5r=D z)KHD;p{FKylcS@Vfw|gu`OZK?zAdCh$*uOKUy1VG0jJeZ=I7=cOJ8CfEz8s`>sYkC z>Tcqj0y`yCt0;4c@1CC48nSyW2Y6H=VbQ*z!66IZ)66=37lXB_bzOs@2W$^YI`At3 z3a{bt>Bioy*-|B-I6$z}=xC-g`Zb!iR33lTz5#y~6F>CfOZ3jI7M|GE$v0Eo#E0g} zMJ^|07Q}!o#wy&kydTEKrIm+!PAc=@Et5_k$623w19jTG_QV3c)k3E>4*E3C3cU`x zduV)bH$=)NHxJS{qf~iFQ@Nj?&Kc3-?DuKkr*jq># z+Z9x>?}t5=GTz}gxa2zl=Vd?9XEaKPOdNJg`J!yLoxM?5%xz!#haz|d7Hbfqd#RUQ zXV!wLEpCKHhTK{PQbv$@S~24*pIFX6@_jWEerAXgs(RMDmMB~zJDf-l;>#Tq)aU9m3Wz8xk$&;Ext`j+405nn%G6stUR~_* zeY$S9u=Zq-^io7?pd;^Q$o)VqQc;bGER|bogpQbSRM1SoL6nn#2U2YM6>xy*s^)NF zG|jmi>pV5OQoRmw9A4*M7i_;j>j+_R!1~a`IPbNrI=j6Grgi!HDYLxd>4)9#4nC*f z1GXIkLZrmwVT(<-sY`A$!k%lZvRO|W_<3&zapvV~4VrF?#WZ!Ia{O({>UNCRQ!Hh# zp2v(zW#evdh!CGIgGhH5!**0ge^;})DCKoTV@m)|7(__#dveM zpCwg(SQ&i(-SM%k7ltgszBd-V?YAOEZ|vjqQAldXtz<^=%YkQCTdu4qgB@ti)naH; zc3wfwCwD+I{f|I<23E!R6i(3nbcj+-R z;a3_b;6$yI|2p58vh!Y!tlAfBH;XITK?(*?SjPN@m1obV4DMUb1m4r4ilQDF9cF$N zAad&Rf4@eyIxL0cpWu&HTmjsINgmSp=K9L?!?7AR*AUaVeJ%C~3gp3C3Be?(z6mKm z$WSM9$0u(6FTv{;$I`7y^r=Rgu0j6I32ITT358mgO3CS*A4x>ft(>SN^;Q3|`0SJp z-=6nDc|^Uj?kgROY?{-Uo}n#g!IRZdHfdY(-!Z+tJTEw_KGB!cjnUyY9C4Vfw6h2I zW>OSLU(?{AfW|WR*azYbRtC~G?uq@9h3hVKZ?3LVeKrEE^b)R#FJ@BV`E}IAO?D9k z*VZ<+e>XR_7(bR2=}C4l(leM%r!!Y(q-v$6&6;nmm_8lU)s%u%w)$-CateDc={ju7 zW@4ZjU(X|7V-eu%+i!Kd*;L?ZCa28SopPMJ zr0_b_Z9I2qsZSTg(N{H&O0akv7M^-gaf{H1Fwmd$n4A*pq!ZvD;mwvAn(wk0O;nym zJF;effW`qq+Kw|yIQQo82}iJ>7^qTqJVPHGu2q^qH*-#77Bl7Z3pL9jZe4nwv7DnJ z!?#%7U5wbLYyDHDYV^`=9IL_Zwk;p!X_N7LV?Vi>g}QjlUgNWxtX!?XHTw}K`JLZz z4IhiZ`*K^JyiIwbHlT#D@<@jzH}9|&Td49(dJEG3iQrhIVR_|uPTPFx&s_xXW(#B? zjcSa<5Yg2Hv4#)SU&gCTwQJZH#S%nRhbcbH`sG_OBA}!d?~{ohW%4%XDTk|BYwLNu0<`P&9dz4|-YsqAI&Svy-nabsr0_iv zD6x-0v)gFv-Ih$3n5yMt>hm`b?oPH!KiXrHOt~L#9u;gK?N#*XX~K^`l+yJ~HqBjC zDt9dY>eoWafai1g){unGd>L2WrCIhGTD^S`Pi8FWZ*)NGwy(;CB zUGv)9o-ZHU!kTxMHz(&A!S~l(^=T7xvCg%4dAay~(m?aoF{O_2X#X)`COSGQf@GkY z%NWliBZ0Y8rdKEiLtr^<5Ta_%?EE-;r(*IWNmz*A@L&;bwuGnGcFTcL%WibU+s^w~ zMZHMVSdwwrw)SjpmHzoa!FJSNt6P7rfNXJnJePO~v2o)=O=a4LuL%-pbiGoYg;M=$ z#BIf`Wqezr4S!_m{K#w>!%))hSQ$~8HL#Pfm%z8Iin@?_*tQ^zACRrKx+`9-!K-Rc zdf}CJIgWrn&Lq#aS(aa{mB!Y_YEfgru*6t(`6n~-hHw7bVf=76+R%4mzrJu#l) zM{bFC`FDr{;Mr0n6~!r+er#{ETMP9ZXG`b2p>QvRUKwuLjE&eIYGsL~!5ADemiD{C#E7xwvUhm@R^Pd2LbtC^ zmB7Y0_y!c&%D{smTb}ge^8RKdO64={)gt5S_|~BELtGd_EIJ0Mrwys%$e-CXZ&E=K z4Ng-IR2*!b%k!M8iM-DraX|VHZI4CjNER|&vY@Wi%dxtueW#Gmx-8^qoxJS1psYO8 zO)q}YhwCe|uA4qjp{UqZz7+bN$sAVe$U98oC~2?QvvD=7TGy4ZXOx{_g-i&lcjVWp z?!Fo0ss27cn&^)s9s{Sho?6&(Q;^i=PnfFO{q)jjCy-f%*M^O}BFo>+B@MkX#^Sbo zUXSOi)3m&ljpZ_t7ItMfiHt9ME@YZ|7+0CUNfE&ppS zQlKYGVp!H+<{^qckwE&W6-7dFdftt7(D49#iGaX zMoL*p24kkzPGq7lE#G{_OegxJB^|Cg71W&E&wNy^!OFS(9QP9(0tkewcUeK3kKi1H z4({T}@;>K#KpUV>gMowb>$$>-$>JV&WA>cpJk}mVc6U zfYL~kdnURyvj;Now2njW6i7zyej5YxkRQ1_Pv9WuKJ@i&e=affosKx&xsT-Jcj(-o z;FvAGTT4m&PoS#q;_0gVFBupDqj7hlD^kMG1u)awcp!xBSaiON2GXu!*R%a6c?7}Z zBdMckw`@d;bubvYEIFLHOm{d0cm5ZrC9l2%xV)!8c8gpYtp`OMHms+=g6(X<^g$e2 zUY!?RZx}M4WiM)<9GLaYKjzr-LkDFP1O@D&7V?SmjvRQg>b=-N2zGp*LO(`8k!8ZG zR9ny$vKc(BRZalt3%*9Vs=iY>cD|E%lS7^Uwdr_#OeIW%4IvM4P}n4f%JkdrQwP zuLt+o5AO8@5kIYhN!^qjJV+Wn!;A&x_Hubxq9jWe$_=T_*)I0Y(yQo$6Hr_WUI6c@ zo$}ZoHhOxm*G^5k44_gf^MiW9MljLDgY+rZGAlX`5y6?UEi_qn`bZi+_Y;;zmELt6 z_=`;@aaD(@ZaPNVx0CHGQ(uyl8*kTcacVuexf3+kWv-MH7!`;^bXO7p7Q5m{hJp6( z^5D^op{)rBrv{bjIh>P%30FX9+A{iZgz~_3aryVV)+)-yqiY^3$+6c6mfjpT`&@5Y zE4k@f?T*xUC=ywjA<@w`$J8UefWmigjw!FfEN+)7Z6f{oc_!OPLwpCfo11ShtzOai zSwT^jl-4xg7Nv5xu;K(Il65qHR>2w_C@S_=~>@64u|MvhVbf{jnAo|Fkvs-d=~xW>!(i z{zJ9WM;#?JVwrGc?YW{#*A$S!C+N+GrYECRba7SnRZaA5hH%=a<|0^QxY1i)k#uAD zPmB$_=sN2r)LMlBp1tRf(|DsgGW6Omg2s=%)E6FNHh~l!;z)Q$0^scBKl-HY6q7ow z`s*zoww~Fv1{HfmVvpx9Cj`}W~L4Y!ABr0Db*2yXCZ7(rR0R|3f5Ck=&d@-)S zhH7KkKa3S9#~}w7hgDjdmXtzdg2jP=q2Zam=l#i5NY&L{t)`;;MLJDufFnb zBLs*Vfty^M46A`~6uy!Do=U)&KmraZE}e4_CUhS1C&BUX_1wm2R_oHM$k%B z0$|IU)^s*xTrOIT{~QnVa3I>OcgUnEtwtqlmt+&s|o&Z-Mmb+?E* zZIi(S8N2Brv79}{ec+Xpydf}Oe&R*EGXCGT5GbHmCK!0 z_S;RRj-o%^Bva;4O%HzrjoVaYaY5(gYBtx*STnrTxXo(2*~&1?7G zc;)eYr0tE3MXl{F>nKny!to$oGuva-_138I!m>B8LOvf-2!!i+j4XE5{zMDT3Wwx#o3KaLY1-d|A>w@1M5`aaLS zW6cPir#o3(ZA{IH9~Zqpm*Fg&5k$nf@DV|}uh6EJF_vTov5W>I?X$48NCaRy%Y6b8 z7dIqg5Pi~!@ojPj_C6PMw94N0OTJgE`lor_RWhC%Kk}=EKB5I5RAus- z!9265zjMj|Y9S6~wf%z=acowfH)ELjQVN*{^{u7{*<8PHrfpBjP9{Qk>& zF{pn4oG-}zXEC3zN-Efx^K!?KCBEd$;m?Y$PKqO8E48gW88jZKq@XM0u(Nh~i(>jz zT&D!51EDHbg63n_{svX(r0PZ5zQj00!n3i0*6lh%t712b>xTd!de5KNlUT3SkP87;0VAYju+wheG~fJ1$?xb}ABt1w)1;x&By` zjzU!SI+a@~)stlq#ef3{pD9hpbmOLB*+MtMKWew_M7CKQ`sM_cHOuE8Nj}ZY5QEl* zUZ7b%l&sUTnycL1^r1RjG{TYwLpN<9^U(@rWGYIID%o(c$e|Ex*H)&?7sxo_`mKCZ zrlLbjYaQ+s3HwH%riP(aXD#x7v46?fOJ;-JfVqtt5U6S`q3m8AMW3twJyP%va`2yq z*O^Z?w2WO359H_f@e55-YEp45__Z!d#yUk5wzW+7Zjvc^^L>mNcGPWt=)c(Ss2Hzbnu~n&wcRaiMl|$KL=u0A_v)t#*S47(-_P<g|EB zO2Ldk2qL`)`<(mRa|h`-581?L$*?jiDsK&k!$#-{@q8e`v!}UQl3p`Npzjn_7N0ke z`Gw`C!MDBCPAq-NkU3=&j^PcTVj#=F`?KoUE>(c9=;IteX((U{GhjG_MD8a%{%8wzirG2|{L-yIdX~j=ULWB!A=z4+ zjd9=3Yk=#yHVT+RE^yk_ZFn$epWEuTZc@(-ZKQjaf6@|HenK7iXFE1WZM@xfWWQ`X z#ybL+bhqg$-YfZPzV~^!MD{95)Lcq}S1{-xiYv&uuIl%^IjVOv+vwEq=j3)fMDqY! zms_F_6ip9cpPf{>>IO)Z@MK^g3Ark6@yhGNqII@6#^0@-!E|4?VD z373G?sR%BpLvDHh_KGH4`^AZ55mr~SPy}2S0cT{%)k%0zc4c0Zd(_g9w_ppVy;;0f zb1w5UN+oE>$Ki%5u>3!YJNI`c7&eYCJanR*iW)-5<1AYa;XzJwrc%#ll5A17Ow4iR zxR!H{lkH`j_&}Mb zp2k?c2+^EAA!^!%r6K6VNyo6xJ8BKQ!`{Z14H2c3+{NE}8`s|H0~>!}!=cHw(T(Z) zsCcJ+_Iyi02Kl|mrtOIsH5=k|0S<&`+(kvRLqCK0{-P;zyDl&w)t4Yf)2>kcuS_xC zg00rKO^d$0^xD(w`Jj>&s4n+JE|cfn*0b&Gqp_r^hY~_tf6&|`@m#2HT_4s6k@HaL z;?Y{S`IfIU&WWA13(f1Zn|KY;)Yf9e;WPzdlUbgc-UsYW>W~7V)&NwG5g!vMy) zjGcMJXln9ext1P;rnTh%cn!{Th+6CNdW8d+clRf6bo z<&-P5V{2=fRQ82E42_bg?)S}feOaqNwm|bU1oE~9Rim0BcOy34t8483SuWlF>-i0S zYa0Bp7_dIxnBM2=Z(woOO5xpIT_CenV7*!xNx+K)U;ibh@k5&{bEmyL)70`DhAU;U z;-ddJmJ8z77B`6r3Zv=4n=O%7pC{%^y?)XnY&**zvYP7F=5#y$#kU8+yRmaB*DF0d z=q>$Ex-3avC79XEP)4Hu;M@XH|Cc2A>uXXj(a*Pmimk^gxfokEghg#d&F2@+JI%FB zOr!usrWXv_pCap=L0SzbN|o{jIXc9C zo9wT0U*G?r>m!UnC8Qu<#9GA=CJkJEYQ0?gUK_Y6 zed^Xqr@-c2504K$%p~8uQ^l(n>y}pZnj9vrKP+bdEzjxNTAA$|e=AL?%MWGGY(g7Q zux8-WqaY4swb1w8*_2_QZx8x!Hu|>q=g{TDsFk96FWX1Z%}$7$S4LKWYP>qtt*Zpdpl?B!=_q_yz(#fmdBN`m-%OLYrP1FlGOCzT^7JNuoJmPK0) zwIBbhoDHoakEbC2etXW)hb$UkPFQG-%@3(IMV421%?UR62Pyg@r4r<1qGef6jbPA` z##nVm#qgHIm#IuUGe?p8T?)6|E`8xrp#PcdX6)`q76<*oi#-q0=GJ_gRuTrWe0u1u zZAv+m7Tm>49*0+<&6-qDUfZJD{I2XkOIMc7CNQHHcJ&lae+B{$PBg&}R!gd#N#@#P zDN>D}NY87%=31VUdir`VRZSMT9O#vbitBhz;G-Nk5N^dVKVLtZ0mk1Bd zQ|%eFzWm*w!iBUa;?GX^Z3?x8heiI@dHV~g^!*{lUA^rvkat7KC6JM_{g0)>owg%4 ze;_;un3B){DMSD1nP4UdROth2!w-{hgy2&*_O^$vxG)!>_Cij|eou#NGHeCFscBNaUKwDq`ORgaVjt}rf60thEy9hP*UpWRzCOAlNYrSK$gBM zvG6K4IPNC8Wj}?xcK7hps9f@!D|)OkyK9hQz-9-ez`08(@iK)5F48Uk0s&jMn#Y_| zo{D?cX%VK$95w+h8t9f}_S5BJMO{+?$r((1)%ztitTK z=yJrWK58hhnGwTSUCP!fit~zTxqkIs5q-^@5+#D+OF9vHV)c%<>X(knTXgzp=!WCxLY|+P2Nvl z<1BJy5B;W1`{kbxfBenuJ`58PY^C)BEdxI$q$~Pxnbz+h$r|UxV^dBiu1q!I=<~UF z9(i~#v^)7c^Hb&HQCTg_cZE~nNCz!fo}rv(k#Q=-k;lMyu3~%Y`ShL*P>_G#t6aDV;5$87w(Q(;%rx!GM0b->4A1(k2q5++STG2B5mRTDQ z2j`6EO(WNsRa0u=G9&K%fD8PwypP=h5<+&edv&D(TB*Ztx=~%cz>eZ?y}les-rjM5 z;}HoV0PYYslf}R74=@O+1{jk@;VnwNQ^mb4c|JNH#{4wa=e-+z{n&*=0L;ZGYEMkg z!m;T?0LV#AV79!{H2dzo-hDjKEebN90Ulgh&IbG3f|KLb$KMxi0OzHo0goe3UWU$H zslvWfo}5d%q`3r0v*qTq`S^)#9(4a9fSATD-MZLwh6*6jR1knIiK%HVeW7jL-+omH m06C+{Jw?oY!lD2Fjt3_Q6S&r?BGj)x^;nzRU#T*^75N`^!t=lY literal 0 HcmV?d00001 diff --git a/model/train/yoco_moe/sources/images/async_ddp.png b/model/train/yoco_moe/sources/images/async_ddp.png new file mode 100644 index 0000000000000000000000000000000000000000..3557069567d5e7ab6a7e07e6b9da2ab03e5ca295 GIT binary patch literal 19147 zcmbTd1yodD+crKR9fFidD-1}Zlz^ll-3$!_KTt}#YiI$LkW`TFmWH7O6hx#uhLLVY za*!duGd}P0t@r)byZ-<8`&f&0IEP(lpL<{XzVGYWu{v7H@Us_ z&SO~+NX<%BQQpAMavwq7Zg3im!rWwZh#$Qp5Pe`D`kc`wiZhP~H~$~CY^}t}wNI0M zbiLb}`ZkFT*^>r=;jyZk5qO#_5~A}Nex3|ZaXds?IUYR6%Lkt-RCA8e+2b{xzPC&4 zKvzm3Zz_>K``{}3m5 zrUdVwG4_O^@9U;ZsH0^eH+`Y!)%hpdM_hseMs(lngW6Q zfvU~oT-4X)y+;Lz#I#R_w}5u(IgYH2RUVwRZIbL8McJ-c-EaM^fOh-f)OvcRE?{nC z{$6wA?>ixrNW;70=@9rO@!s`XQd4oSoZ_ajBcuOm5NmwX^ET=%Nt>^I6w!E6cQ3@y zytMKk+{0y5aLxKVeXXvV-=ngVNB*vowTVwYt!OceO}Y{Sb#&@Y<2(lu)42@($pv$^<9sQTgI9q%_1tlS|6Pnw36)+-$zG9X_ z>{?g|cpV^3!++a^_zwKk9NRDeQxkzHvi@AYW^*ff=i)RJVn6<|HMZcnq8ScQS0wx{ z1?!>eSJLR)yN;UHlmzKP-ox+u^N0FXG}$Jd9eQRb#PM2xh3E0}AB-%)(invc`RB1V zpZnfx?bzkT{aC+^#tyjJL|0q&;=2XGE_q5Xe1w_g9kzWrk>2B5G#4;=dS>IPoWLK%6SX8HJ-@LND6$b@SMDAF^>E}c7)du~`AbQCh*00`Nly6xPCkH)Wk=CYYe{7$lpHe8c{!t4f z6*&J1CnwFHvh>BhvvDlbHp*Pws>*~d(Vp`3Cxaa%<{v@uDs_xf!c#GJqD3&nlSjJ^ z`d8S`02*@Y%hYyvm9&oWl>a^bZC5Nv#MDBA&7*q#DkD2^oL;=H3Q~_PnDOc8mLQp| z&Zq5z9s_gtvk$s3V(jx_7U6mAUbf%(gag4F%l%6S;lG~p&tgpz^y}UG-YdaadBY{Af4g=h4Uoxc-FdsoP4iLj4UT@Bd6^>4h6 zH=3*t;pACS>uDnz644whPZ*>#cr9_{=rCvdZ#MwoTPM}N3aJ)mg|neJ%SrX$8aClw z2x6N!p|QtPl(!SOTZ((fart%GX3KJw0b6|f0m;{L-Z%OnIc=lLBWfN>kkIeW^T~;7 z_8eae0r6Ny0182%@9*$%fs5Qr4ggp|pvUZC*h}I)Vm#my`cs7sdu#g~7i-6C^%-20 zgK7Mp6i2@7HQ-5-SG=wwblK@uYa`S3pxT}f z+PTO@Fkd{Q`ykI2T(y$7AJC~emGVmE>2^#rzgg4yzFdbM)nWZiwl`~*n|lMNR0Z3o ztKVFR3#05q)i550w0Da>2dtQ{@(UZ%hu|EQp#AnJ2p2EV{Wad(8D}C<&)!MT#oyD0 zU3Fb$X73?)Qiuw5r{rRv8Sc~-mPeF^KMFj1mGk85+%Av5PHIo-j@yid@2T+&(qR9$ zp@r@z-2T|`VEn6cCO+huPD2w{T!GF?Vk#9~IYntJ?;EZ>FAqYH(C7bTc@8?zD2MSv z)qVq_@r>c#dI>#*WFcFLIZNu8#x6hGN!%Ci~z`Z%HTm3SxSoOD-m(NvL5!5t=eC2Lx=fI=LbF>t8Vod9QU9tU zKQnL&SJSUkJ3IX1 z`E&b3FYrQ>vnMsDMoik9)VJp}rCcOitLZ*fim8!)sq*RGw3@5CQ!BJYPA1j!z>Dsf zb#YMc@9)FP)mt4y>v1uB-GNC|IQ!3H70L!XicM<&Q9}C>f++%Etrfid#-G1H4Gj|@ zO6=E`RlE>>6b{ZwiHQyJpro9kect^G;n_~oq#1m1*G9N&iy9xn2tb|O1vu<8$TG}V z6#eqEdUg4No@jTQrG6?An7JsMRoii8m47lc;Owqz)@F$s(=hBj8P+MD@}yKMi7L@C z88E}qmW%c09;F(@N&RtaAU0<4q+6l#1uiZeSO>$(?f(B4fC^Ln1^tij<$$sP_G0A7 z5)Uxgn_TY6<6!3&iodX1t^+hersld>8=~z_0y^D!LwyyAwF6LS@w0zH@&DPY;3&tY_k4fxWN3A>yW z?jx2mJf=I0`@EmT`-T`6j+cymHSTR#!d$m!aqT%pwb`(MJ6&zDo5pXpkxskE`=MrX zJSm}nLvD8;b_BhUSKI+#`J z{W=7|YnP!6Q7j;j&D)ge3BCA1Bb(!6Cxd@3j_^uu+sJUr7ft0}Hg5GO$}tLeKOPcq z1FNTM8y^2i(Hd7_8u%`!#abyM(=kxx(izOlMv;0UO5z|0E2pvnNM9&Ce z1^U9_Q#l!upM!Kpi-D@DOoO<8jslgC7D_|z!?gobWwSk8`38e~ZR^Q4+IA?=L z5l$M2;9>Sfc;6+)2a!Dh5Q-O!mtQDnDZv0w4l$4n)lcV?eom~?uBUr@iqKX8cPtQb zLzHqy`(dS8t~;4w_ZE4D%U5*So9_r;ZKZqkd=JLrp-M`&HRC$oqsgd~M&wl4g*Wx=J5Kcga^heC}; zWYiR;N*t9$s0;30DiE!BWAB(sRqweu%%Ts(C4OHCHMnY$3aiz3;tPw_!jf*88HyLT zIC14r4&js*LU|}j=3QIY(+=uyNXB_<9mAxJR9_oRM3tEa>;r(@rHuj1^f|hV*?=uE zj4C!E-gUS7;Yl&B^#~;CzL&MakZxG1)-=giz(x@}Qxye854wuWN-nE^)2_aPqy`ssGnQp zt?Y^rnfm5bE2f9?dq+C~L5pmFa15MAdN@1zmDo#Md$EXoQvDF~Wfj_Z0J`82OE z$S?$2$%jm1#J|pkX{xiiz_*K&Ab4>Wqiha=Pl15!y&M}UqE|lMx{*z0_lj{vBwK!* zDh74163S`2H!3iNj#fB zeA(o@VE8z;{A&`4)5X%+{=%kVwfS~}bqEzp(hvnt(0pj%?6>B{ZvoX*C;gVugm z_3Gv(`>_XIh~e@CM)6_oWEvu**Mug_vfJnARTC9y$F%5-x&GNJKZ&N1DzoN`?If@| z9JvWB{j1JK&0X-df4YFh&P;4oJ!gRH!09xN6}s9N z`_LSt{`QPWjgjUnU?1Z$UC_T2t(tSi2QiRi=eg$!jYKlLrB5s^9h5k9*HRJS=hG2E z-M`Uxk|dJ3GzKsCM0J{unfWwX*bVT543&D5-med)V@C3v)dEsf9?I3ER6gV`Gmv;> z*!|)0i#O2e>&D8FnzFQS!;@y}6*ODk-h6!WyZpgg!Ir@xXPx7wM;+xjpnwNqpiq(( zix)Ioyp+xBe)_mp7N+CjHWg}Hf5aEvB69BHH>*7~xTB&6X0s&!t~R23WYrDNFt)0! zKRg_<4)biaRSN5n>4Hno9k1};JJ-&UL2wBe;?$BvCxJ8@`7)Y|Iqnbea9?PUTqjRu zRGz>kIsa?tWL{^IEcTq4V;}wbc`2JVjdLp%<#9TixMSklcFHR*4Hle+F7}KZ9lG0e zJ*-wKR=T|}{ec)EE9Ye+5U<%+i!f7kgy3Hv#cBujXk*l3RZNT6HG@7yP(5TO9OA7N zX_8(Vr&0jf;{h>fqI_9-L?Z`&#$9u_upFb@SF5-Utg14|a82g~!b(RHp9wg^IF)?~ z(U$>{Dm3`UHpeHgGL9J&%gSo5)*|V2cB_@CpdtpVq;4I$r}n9rI$F`vt5>IJl#<*D zK^2*LdzhNa^F-5J0M&t>)toj_z59rNFwn^=SwnhZGs7Q&kAvV ztbgsNwU(^D0{BHKiR4M~4by~_2zoP85~TfL`lw%y-T1Wh0% z=J3P$)~NX&$Mb^kQF(!y!qHM~mn0xLj38GLODHjU?t9=ik-m)uNtweD< z8F7+-p@W$3Nu0F~HdbN?L%Yj?q(Xdn@&4^Gmd^*98PLx>t$OzND(`*zPF= znW32}_j#AHxc6HR(~8tJT~8Zv^8^&n=qqHlI!x2^&({r{E5nVaI8=A1L{(Ah{m(IV zHra)zF`;yNOy{E{+ff3k-C`aeK9%jCr zFG9^Gds=VRi2AY31KA#deX7bs0Syw$WO@HjxPADfbmw2rFLrCt32{bDNjSx(b#FEK zfwA1NLwHOYn2a5-M8wwXH%vkJe;(%+LuK`~AkSNYqdiR(xiz#^@noJk)@M!9$8@tT#SxzkCbd`R*g?n&drGFEMpVBe^k?` zNGntp19q#fg_&FrcaoRhCRO-R*~BW>1*R{x5q#~RIaP)bgW|b(MIv=aip;I2?Wh7R zxqwZj6}=RA^#Q*CcBO_l=_{*hkxXM=oE0?#R;K0P&8<|D^#cn{JC`QeT2*j57`+<* zcw(FS;Ez9_ReZCVOKVZTXWjVb=wSoSn7duJe{oJ)+Z|}t_J?`9&j)&kGgBYvz$OkT z@KT4mCepXAho(jbM@~9me`{<-L#AAAt=FpKWj3M>N)57iBypkDSTJ$LA7>cR;h~(A zbk?7t!_W2ESfnCgWVH(3s3<~@WnxG;Zkg>qC~^quFm1=R=2wT@mpPNKqRN_@nez+2 z-SKk%@RN{rxVC`T_JSX6X^qIJnSq7c8zYCwBnUl3g&>~<7ZmZWKJbmdLLw=w+`(n; zg)j?=g`b=D{rXw|--$>f(2$zZ{MbEP-F12SO5An7yDa!$7*;7ux_bjEFMjaq>hw@j z^w54VrOen{$$yzH8CP$s2)PA*co=;0I^1W!YtrOIEeX}Ar;p3&^Ydh0P3L6(f-iuz zNG96io@)Phm+BeDvw8Obiu~0QM|DvfvGGEP|2SNzY%Gpgu0wKkZ2S7^i@Ypy?w(9x zSr>&OCH$s<8v=|gLl#b|q0oZ)XLyHa)hqkzk3*-LBI!ktG&w# zadmp5<-Z{EZD+dsCMV~ZpW`WfDwiKj9vA&lLp{rZB)m=F<_0fasc0tMs-6OKUtfrQ zk#?$A6yP>l=bqW>!^^dlZfibAnb5;E2a(dd;u#btIlD8`*UDj}ozgDnLgM~l>Xu?@)k3VSEJU=r&H3VSe_&| zc7HbH^M|K2UK__07awtRIF3@ zTTi&&BrIX}ENHQziYZDTK2IHNs}Q{r6zrO}=mg4wMgx7cKXf0I8CiNJDBU$6>Amc! zv6Q^h?Mighz`fr*^6^ja=(=ptpT;&zHz%Ja%qg}K!^o;&`_F-dzx%GpO%t_p8-?c6UUCQ3Aqi})$@}DHIwS0YUV~|tVyhP`3h+AO zNIuKvR5>-XFrnC`hhj*Ex2L`^T_w%rQ>BycjN$hzg@GQg_*4z zeEHPCT-UM~Pl2vxA;@DZ{u=!yDukuVqs#Y)vG|L<*n@X>dJF!hxd{sH-ez{!?rJL8 zR7z4nXQc^U_84vay!37;&i_dq*?O?tU)+G#9Vah<&MAPn{7e+{J^L^gCH5ohkw**T z{5X{}4lss{g{`rIs}nzH?DlDOJF56(aVhvLPu zJe1&1SZ*R;dr`|@bGdJfo&cDH^X)?#`0sCWs?Vy|y9=T%1GnPQ9fF3hOSOj7Z{=Jp z*K?8G8-oQH8My_hk3JS)g-=){ZqC;L!?9OuhDz@GXx2cRJNMRgo$eEvEkp<*vq$jT zloJ^0Fvk<|9myg@}m=|D!H}9ci2}jnW*@LYJ=d9@Yn==GziLt7>EQ5pc?XyM<>xx$88l0Hwboue z#~@42yvyfmQX$_>^8@J;baud1siKX1VEw4zbwd=pnOSd|`6$C*1~TJEBPwI3#P>Sr zZO&Qx*|TUw%=C2iEoSCcc8fnYtCypVZJz>#wmgvA>W{~E@m4P482brpHeLCio+gj+ z%rM|ByXHKRd=dz-O4e&`;^kGE#EK2Hz%aiYPJSF|yJF6A@x5Q>=YoQv_W-lfu`_Cb zv_`hc{@m&1{iAP5+75gFR|x^oSLf zWdn)UIwNt=Cr*Kg5hzU5lzsV#+VpkEvijYw<05+{rH?Du-(>ywUd_yVK&{Vk`L$U@ z$A=hv-((tmz=CZlYq?sbYskQrhqg$cq}PJ^Tg>eEoUj<3QHs%{uof7i58Wbkv(HAP z#e$g5$O-J*mVmHeyPd27F!GCZ`!2q$kHV3^?`fTcaOeKr9}hF-_+YlZId@syOE$RT z8J1;40HQ8HCG$UsNbikj01r9RW@lONsCbS{w-h@*{ubp0rjFTVMfiDBzP>5_gXJwV z(P_jmX`rLfzN2Id!4`5snrQjRJj~vH`p~&qSKP7^?qAb!G5LCm%i@4 zy7w=CLS^9FmG*fbY41dAi<{==sZ7hf+tAxl=zULm%_EY9XDt()o4cc*-c|X!7ke*3{ApF}CPXX>DIXEw6MfF21-7H>1+7hPz&Q;^-+x>N7 z`Kv1UxF7L5Fd{fYi3>^|FveVE(5me)(r;MLUa_E5Sy#;?Rq4N zf2}6hCW~#dTBGe+=Qg_VKOeoONG|krS!0O}>7ZzC%}Yi$_SWY0kRTEs#hJz_Gri z5&!kBd_cJh^vAN&)oD6+!ajt^=&)NG0ize)Sr?bU$ z6d?Y@TEhh5et;sOAA%@%S9VhTnfE6RWTL1is_B@zcG+P|4S3AAh&5!DC%b+1Z~P81-C*+|o_T_4jtrQ?Onz&`bofZRcgB;dz@}Wm3s!K*$OqDJXvi zZ8ImQQ+q%|fA!65Q^~2W9;6-CgXMa=!*Fkr8oEfK6M}P#odcNhUy(dX>i((RKXMcn zi_22L_AE@j1J5acHCMQHs@+bV#f!|VQl#tXa^$($Te~FL>&YzGHNb4{i?v@2PTGE( z-fq+RWnM#*Vweoto;5qlMP`A0&djFH#2e#I`e$MFz;_?`fxOyv4EeOm$9BCH`Sz-T zWeZ){Rpq!;57zZz@~UijMN95H)0uj*?cHF}FJp2z8XJC&Y}z=@G6n!(^!$Q9tTYP9 zC>9wKCuy!Sue=0Uj^uZKz8ny-%3b}Z{)ue|@CsYnUV@;KxFY&W8hY;B9e#X*faL7^ERNodySsLFdpN;2lI&8uYm|>eDIld)+#TA6J zm-$2zEq>rsP$e=cezmS=qC>mQ_hhRbN!}z6yh(a1qW0tDG9bT8zvArl zHB9+Jn*07|o1&3wWbwu+W5cSW=0^6#ZCU=3>|`^L9BN7=OeJ1)l%9z6%c^Erduc%8 zB3-)J^ZQk0Ja_CYGEag!I+xO=Irl|VpQ@Nr)nd7jiI?y9IQ7c6-@RD;r2mFCujI@H zJ<$?CHtl$SiM-PM*^5H~&!P;L4yweX5ybJ8jak@VpjCE+v0QJ&L;R(y)}>sli#(N)b&TauHi8h>^$<@0{ERSC5YXz;XO&XEX7 znrP>Jb#V^-g}5}i4{In6)f~+*$FlviHUZzDcknRo&jRUk#z#8bPpVC2Sq442s4;u% z@yS=Rxq9JXNv+LylB*wVQqpYEq`*E!o(6 zhm%Z$+=?}0q^d9%MUu~668|7Q5kzv2$En-}h*^=jnrtg*m&68)*cOb^ut>!RI^W`6 z&~-OzjLjYq3{TQa4|NusGGv2!p1+U{?*CwuEV_T3j>$<#%OIqUnJ;U?y~Fw}?;tSF z?L|eWnv~CgVL9&mlf^K^7!xS>9#cA=0#0k6v~3>F*Z@g{{iT9ON1qBu1&z_`EyF$+SBT9O89QG_EJXk$7=fzmLer4u z2ci3>O37a$Jx&68|?zdqt~w1Qj9vvCrJp5&`czirYciebf(29hy%)-{i~TupVN-o( zu>C1R;r5eq$K2IiP{aHnjK_Z3>T>Bid!_4JgP~w5vAWku<|IDx$x_9ig*vtHYu2%a z^00Ww(fQP+3?Mq-o{!JyEOlUrQ|VS)0u(uN>rTDl$7oP zY7!2Ax=0%n{P-8nZcI)kUu*G%+&V9&YTzRw*T=AItki@)1@>nU`rH#lFn@q(yE2GX zEMN-*5{2DN5*{2Gw}eLbQ0`l!d9ZmREOBWs#6kJxmpqpCB#@>eviglJ3yNSN#uM^m zk_npCjwWZ^D2WRNWGbNq^>!3S)wl$2I|9)c+uz@_Up3sBd>S^};E?<;?V$L4y1;6) zO@F+-<{04Z|0>qXV&G?1Pe65ELP_?U7Xh!n)UM5E|IG%7Oav3iBnxZL5eoGH1JH)#>LC&S;MZHHFW;EX-A(`@kkj`kAKTI9mq5LLMvvrc!3#l+uGUL>3rt| z>_b-`>=IeaIa0pWX7w-QGG&4T8qSQ%X*51Q{x(vAFn)$}ttpOJ10t3Td(9VWPp~BE zW~vp|L&ho=VHw{FRg)J1@l8IM%twu?gJIEL4X1Brm~rD^5jF{Upw_HpLBAWC?42BB z1FHr#L-ynv3<}v+(lKp|?OeB&oZis*nM*muU`_;Yx2-S4KX*z<4ccdIA9erPS?Gkx$xO_o(1{|XtP{76+2 zvwQ{nQ~Qzq(gQrOnvYyk?r&~Q4w*deaLnNyVLbo@J+d3VOeM$@6ZU{?&mRGMypp%O zRHb~*nr%bgrf#)t&GwrpWInYVxQR|lx(qBmxrwlhjb9I3I%Yw4pn^w6ZqYyK`ZMIZ zh`Jm`ov-CBp+=~>-kWzJVgvqvL^x7sFEm|rSRLxhT9@g)vt%e6ca{>;9Wrn_{nKe{JHBF};-or{ckl-JcRXR5ugFvl-|-QnZ23JQk{Bddz7oz4^WIs3cAsb%vLIO z0ao(S_-d)w-A1RsKI!t)-r5v7Tw{k3Ojy=)+65)(e}0(*X0hVDrHlWMgX#xW*xe56 zni|x0S%<$Yw1!yG^pwp~w-#w{DylKw`F3gaIC-Q#czW#vOUdfP3-UM2CFdx`kP*Wx zUWn2|$iOz~6>}j(vYd)p-5=0CnVXrNePxu}=k9D@e$BWzWdz+nXg?mdcMg_3AXY@n z;=TYhMT65K@!dT19oAEE=Q5Bxt(b#0#B`Ne!aaT6Y94D<cnIdR3C)VTQT! z;HKJf*%Kchy>8`c7b?0)lix`iG<XrCKzSg?+qD?~qgD6A8%LVCTK8 z@Q=cF&(%Q$J^ixR?#=&vBVirksjjlSN|Q7G%zyaH;)dvJ#Ic{Z{7E~OW5XQISBK3` zaZ>&RA;rIz@;&(Zl;FWf#jpLk9J~zlepmObj7LpWpXQZ%oz+ogxrgYRxoLp(YwQ9w zJv5ZL4_9AWB$P`#vCOCtM>40Y?X}F=)y&;HQ0chgENYHT$)YShMX8_)USi&XDN#Y2 zQiuo&8G8P_Yz{&6z1MkCe!s06A_MX;wU-B|>*|}31$}Mlb=Yn*jCWoy=b{7my#^8d z=FY=js@s9paCTKuix1^RGo3fr7)%2N6SR>;Xa%Pl-;|E++pXF2Ic0!4MWphvm5GHt z^yRFx96s9-Hf9Z!x;?O(QYjFVjYe27UmqR{w+&^;%?673FeM@2Uav%}yTcdPP}4l( zjaqE4xp6l5z=$USJ>mx+6k~l zr0lUi?BLo>qX}W!QLn-!qo(jm#xdun`(T1}%wlemYiYLlz;HiI=rEEkJ2%yaI6B+u zR!MT08#rz{iu)rapcOlee|hOejW{;)wRX~GgbKEaX)B6G$2yRVUKXr@YWVZwBvP*D7C0xcv&n^vYg7L(ha<%^WOxj-1lQJckpT2>Z{p zEv>tBoEnB$M+AtMQWGV&ztML!nd<3#nkXkyY)VM)060W0|AaYT?$t+T&gniUrsr)8S8^)hR|GUyh2 zww!@X9E6=@=`^4ZZRuU@u{)Lqfo~?pSb-; z>0>zXqmEW~<3hxA12l+ALubrWD=4YBPIj=3+Jik5)6o7; z;SsO9mVSA=dwY8adR6o!^cwMIxOpUz!!e}}mEi;`3w1m=CjVBvy#zLG>1hYH#se9P z*mCxw1Gl)1!unQ{Pm_~3CO4xC9z`w^Bt(0)>lwpdZ-!y>=V4F0mF{F|RvYd3*yPN> zgEdyX5t0BRdN3ZKO<&~`;{3x^HxP8FLF$}biIe_ z*7*<=`#_83Wa}cmu{bC31YZVgV5hvYS%7npgB$V9U7hWRt{c>;+|Ar;Old6X*hmi! zNL})a_9i>jrpPtNK26dp$|q;w26>BqelcWs2dP~5E!fxY@Kr6CW}|@m_4!Qo)ZAKF zZta7!k5kTyN&QV!(tRvBubM_)CSH>FCA^JypgRoa$Q+L8M_evEyE0f{h=YlrwVpCg z*)_j_nAXLh*yrjltF@)`&b8g4Z994HrKHJwh_)GWR++^t?`ad*J$gPs%B+)KxWCYh zaVJ_gZsx#8lhX0UOrK>T8ipVH{1x~0J$d!-xF&$TbvuEB@O0#QxrP=@MDovyos9RH zk)4vLOzwL~xf~l~h2hj29?1)rBvmEhC!WU6A<2@DC~Nj&%Be3f!vT+RhpaEp568?$ z{H;Eh`s3zVSs;?Vka*<(7z))0JU3pTleWeydep?G&O&w}-H3lG>#s@-fXs81+vGVVGBPU{3$(N@kmmA;J{ zcAM63zJsqsB3Y7Ud|j@}u;!Gb&rs&1Q5Cey{LOJvxi1|hP{PYThf-3tuNryT-*(1f z)}ixHHS$G~iwnbO%}CJ=>sCH-V?MI^I^>S5PXIY2&QbIsV_X;>ZiK~cw(8?mVwU2QNT~^qhArL0%b66qd(3NU{>3~F42#Mk$j;$A4 z++mi88m#kk;cvxuig{rkwpx@XUo$IvWKx52nH-rDApe*qmqO}D8!DoQn+Nz11J4jS z;(#LZ9ln_ZvEn7wCsl{^*ZaO#etf5YjIw3h`!}a*_ce_K0R;3yTk^(eDlZoC_dQsCQKQ3OlawHL%gjwnxf6ArehkZ45GK^ptPrN zeEMuoZcK$kn`uK-6HaG2&w247ADMw#>NA3V*LEyemDy# zm%i?fh5|CxFKjlkw}|lw*kP&Ia6yBTreA7~hF;@8YaxZcx9?h@b{^pqQ2$p#08r28 zR(nIaSUL=kGB8`B{|aWJF>;e#6d~ume^uh&-q2!C_>7ra{c>adtG$QhVS^N*?>b>$ zteyc>1TZSdf9);^vw-wL{u%b&>Wq|+#R}`K67NZRm12Go%F^@WM3DZsXQTcdi2arJ zr`*|Fyf-w}_56a5{>k4k?giPO4ROTx!&Ss*e9}vg-JL0y$*nP$bJR^I)KI7T&wc21 z9aL+g?u9y9Mno_`D@eA{t!Gol$Rj6U|IS_WU##rUIuFTZq18AJe@FYn-mG;TAA^=^ z_H}J2<^t36%VIuCEl-;T|H}4B>)Ou_yp%D8UD~9xN{N}8eGK;l<2h6@1W-8N{HBnJ zb+BmsLEgF=Uq`O;_dJ14csY3*rPt*lE5>1b7kZf=c-6;v6?Cv&VsX&AdD%X8Y{u&M zWyF`3>YZB;o@~zZu&Yt+5J`O__8dH#C20G)59(|oT%pbeg`kSBMq;G9hT`t6dYCoC zFC{ixs!0W(7CT7m+{R9v9PzWxYj0V|C{n^TJ7?tTC>_S&ds3g{9o=>1?7PN@bhAje zWT@iTi|R2FB8e9;T`O^**`v6cko9&(FYUg*>1I7qc|bGibmLfj8i@4ll{u?rsX7h0 zaqZGX=??PC-n2r~0GK+^xnK6=_fO&Kg3tTkXF4}-U!C{&i>SX2)%!btdRt_ja8A`3 z{JUN`14c&RF)_1}m-6i)8X_<`2(I@%s*u+qI|i~jg|_ml5n!%A&0QVJMGu&S`?!SC zz8dh(HrMnAYKaBE%%#E_WhhPBLzU~3iGtRpdq;mpe2rMu256I_?{wW@5ChNb?!SK$ zgzA8GGI?jsLsf`Y_P0u8)|o*6 zUxI4DuQ0;Jf2nx`p!2@1Xs&>tE8pH06lI_M*%--TsxxH9|jx!azlXg7HCOhtvWXJK^yAeP*RPl0R`+(ZGaT?Y__-<{eJx_ z%`r78$8qaw%acC%b;C@&c)jyNQh()te4vw z1QZS9bei+e+ntT{jf(CAHi|TSLm=7juqR*kBZBJPTI%~0gNFlSA7Yyn#>W{CBRR5D zKqgp0@x6c3X*1@YnhnJ&-KF8 z(2Jf4AqWq<-oe^6>)q)pSow%`O?|+rW0$M*M1V>C%b`c8XRoqE#{vP3axth)9GhPl zN-a^;LQTat#|O{v_GjAPT(n;Zl&gL}30lZW>J~YiX(+adyqG#&=(0F+ROP?99Dy;! zGi{d?ZSi%%Nx+yl?Nj5cj<`CtSa$Th@+hAzqFdy<&WeYXgS0t zn?C;C{N^o0uCgbOLvEtMrs98#nw?vlv6|*}9{B-u`Rk2B^;%BBex4m3+lc(OVOga1 zY6GI$*6U>C`lqkm7sxK5QNN6Zz}3>!QM*f3yp` zQu{V&aS8BqCDm&}#g|+|*otNgIK+?YDyx8++RU;7^vTFn08NbU+w!dVmrA0@Xq9B{Z1qm zC;x7sSenK+MjJMUgz9ozbkx>)vh3kR;jb?^vi#n2MpmshBCva?my2m+VPb7?L| zd19T*$JNs-HN+|Tcn>Buwr)s4rL$G z60XA;wGe)J?jXTn%YSAr%4lB2CaX2*Q{Td|`=uUx~y;`8XIcrWi(fNUGtq)2cb!5{uZ%8Wv7o$%u1egu!a+U4^u6Fvv%{6p`6 z&Ll$o{`Ec@Chg!Vxl+>i(tYI=B=4%!buT^2#SG;u9g_gj_m8dMIq8Qh7S$`S7z+YMdkpzZV2WSTXuTftyPWzrCvj=T!3Z`;8_ z4;lN8B89vJk%yi2(2*#CLcr4lmu70F)vvj$v3N~i0g>suX|l>rL$;Mlv-o-KkPH2r z_H(P1N`tIXj2=V*r#UHd%@1!M)5qv~GWBNZ_UP~rcp4;saET{&%vD}Byb~HcVjtu) z4MoIyC5w|AYXMY!x-mrE2M~vc>_3}$f`2`~8D`N2hbkYN+Np+7-PsqEd8sgXID>UX z5F^bqv4H>J=GC!lF_vM&z5Ka<4im$rX77SYpgWFa`@!fTPYE0-3Z**WzkDVWc(!NH z&4^!k_=xD%fWSvbd5?KNV*D+`qKDp1iv$||3jmq6p7U?P`36*fib!4U)vwn7EIDVe zbw067G5?lKul_U%(KZfmd~vUo{De#W$q#SUIz@2y2T&ggxwXz-YYJ)Ban|}g1@kl> zh_beR&B)wcso|xS!|z&e#&V8(v*odUR2(zA8Ub}`?<;wXzc9HrFM6maY}yt95G*Ig zCOKR#w!eSPG2^cK)k|Yv*x0x1eAn@JLsDe}nhLd4lr|kTK*bI!HQJAkKX;(S>lzc8 zS?M7ZJziY8s(!2f2uDsFf1wDse^nFbe+AS8EBk$K?kkWCY;7R3rt$S^@d|{sELDa0 z*ax*rbXH$UnLdYJGcB;5FkymEE_9Cw4REd%4fK{Lo_NAf3!Ihu#`+x z67PPbCdmQU`Yz+IzhJ?F9?z-NERjn}R(Z-bx-kK$SFr-agDXDrkuJ>y`p@-`K7MHs z29FGDX>p6rdz(9 z`DlFf`u3Ond=R=@|L3%6-8YTvP9Xkn_RQ6V$Nfn2>kCN)0001h=A?YU-~S;Df3ftk z8DEc9dp!m_1VPSMA^zp%SKZUE?e*&b0000qkKY_P=pNnuAAUXz0-qnm^++I=UON5p z(1l;q{L1am`^z=~0001J4yrnQf4jfWv@YHHz1uIH@xS~+OpgRYZohQqA{V;<(;0?7 zi8#{buhs+r005xDDS6m)^k^Y_aGOV;dg8~T)jb@OKuA|`^y)&!@yts{UN7wAdG~r( zKH35R005v7-TCyhZu|ZNDn0D!%5_}*`QQ1S)GI6b`#_{w^wW*+%yoqu3Xd52=3ztK z{E26{(}oX?wgLbE0B8jJj&`_}JDx8j4+s5n5V{Y1^|BfMM$>6ZCJ=J{!yC_cE#ZBI z2mN+iF6TZp=?r(yiN!m80ssI2sBiy%;GiG2u&0nj6k`5Y^FjEr+b@~sMEBoe3ic>n+a0QKQ#FTU(=5_hn((=UfE z|4#=;4f|O9W^v{8D*9ji=%#mv`K}v-Ah<-!quScsxhIWrXO9`>CXXKJ9|HgY0AO%= z{=gyk?B0FuS9|t3T_{NkiNMw{pZ~(rIkQ&y6&Y1kKDd6}hW`?T;f*c`&Wl#L;jJz1 ztTChAgb~AhVxcZhnvN<=b5tn+004k~6&Biojt;lKqtoBdVgJ!i_rl@Bg~VaMA1K`| z40nbu_l4UpyXb#JYkH+mk^Q1a9-o(U!TSsE;sb?bVqCNu0002M;IcP#`3IfLJ$U=2 z7yVNq4N$&Om70|X7Crj-tRM_N5QO36!n--WukZ(=OW z^f`a!|Mkd`BbEAh0)i1cnx;f^qWvX@L zNKGO;)AbkD{mCZ=_aR4)oC)~;^V8&Gk>Ddo4vP(Sv}}T%m-dg>qwJhB5?gomwf4*T zZfn`t+vw=NWjp+8{)|2Ch;+o`qA2wU(Vse!k8oXdi~Q-1MCzpzM+*#(#00SM!iyfcy^ULVJUp_IBcK_%lS?2RM>3{T6dxhUw_wT*vpmo|2fA1u^E8Qdb z+&{V*dzH)QoWIpfnYj3m=gEk+evWEll~a?JD6XS0QKK<<;YNPQbJ(r*F0oz#cHwa5 z8YH7Vs?qZC0s4zZ>16G_Ti^aTs#2Gtl9{9B@Yk#$VHUTo#9wEU^Rsdc`cm(GrMiM0;G$dwn zQ2cAHuR$iHCH60^ET!b$zfa;^O(%p4{lZyYd!6g!w;r11YSU_qTNv42I?|3blLuc> zJA{Tf>bBEHcFO*4tGSbbkKH^~njP#9p=V*${8iNrI=nxV1gve+FwrhLen^^QF+$T`Q{m>dMa2rF?aPZdx-9_(9s1|1ydWI$-Yuvja646Ydqt6 zrhP5rA5i1dZm03jbyopHU%f0!x{OI{X)2#{@%mswDoM^x5HczjorMCk5mAF8*!-7VMPhwnXF$;N#AX1$Z zU7s#E6V#*YuywJ|Ie+grZD-j? zBc|?vj5dU8BuH(}+};EArh+QKoW)UE<@;d@ZJ@8?;C6J#i<&3MsU(=^EaB>n8Akzy zr56R;_4NMyf?d$hAoUTE(_w-smq@j10)?({XPqKD&hXozFjx4mAi;GcNBot!2?SW? z!aSIEjx*FaaEX`RqfqQDG8M4Iudat|`U|sy*Kd;NkpeiHuW%wL-TZUsGAK2QVDQdN z1F01`h3ib>E^;$djS~Zi*&>_;FnUXuIQi@dZf5t%8z)eI8^(hq?+Gh(c?;ecY}Nql zBr2O^+wrm!cR9;Vo3R*(mo)we$q_$$|F`MLGtdhRjPn7QY$jJeMW7^8bhX@VAh0ga&Mi^_L&ZMPKf;sdinX1UJ5!s z#J)0Kf3(AUKa#@@^1%Qv+|=OZxsL(y9H6J8&cgg)bmOs-U4pt>*-O95p-9gS7P{