From 3bec5de82ad50b843668210176cbffc481382f7f Mon Sep 17 00:00:00 2001 From: Jeff Rasley Date: Tue, 2 Aug 2022 13:19:46 -0700 Subject: [PATCH 1/8] add ds inject policies --- mii/policies/__init__.py | 13 ++++++ mii/policies/bert.py | 69 ++++++++++++++++++++++++++++++++ mii/policies/bloom.py | 46 ++++++++++++++++++++++ mii/policies/gpt2.py | 44 +++++++++++++++++++++ mii/policies/gpt_neo.py | 51 ++++++++++++++++++++++++ mii/policies/gpt_neox.py | 59 ++++++++++++++++++++++++++++ mii/policies/gptj.py | 51 ++++++++++++++++++++++++ mii/policies/megatron.py | 85 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 418 insertions(+) create mode 100644 mii/policies/__init__.py create mode 100644 mii/policies/bert.py create mode 100644 mii/policies/bloom.py create mode 100644 mii/policies/gpt2.py create mode 100644 mii/policies/gpt_neo.py create mode 100644 mii/policies/gpt_neox.py create mode 100644 mii/policies/gptj.py create mode 100644 mii/policies/megatron.py diff --git a/mii/policies/__init__.py b/mii/policies/__init__.py new file mode 100644 index 00000000..06fa54a0 --- /dev/null +++ b/mii/policies/__init__.py @@ -0,0 +1,13 @@ +''' +Copyright 2022 The Microsoft DeepSpeed Team +''' + +supported_models = [ + HFBertLayerPolicy, + HFGPTNEOLayerPolicy, + GPTNEOXLayerPolicy, + HFGPTJLayerPolicy, + MegatronLayerPolicy, + HFGPT2LayerPolicy, + BLOOMLayerPolicy +] diff --git a/mii/policies/bert.py b/mii/policies/bert.py new file mode 100644 index 00000000..aaf20471 --- /dev/null +++ b/mii/policies/bert.py @@ -0,0 +1,69 @@ +''' +Copyright 2022 The Microsoft DeepSpeed Team +''' +import torch +from torch.nn.parameter import Parameter +from deepspeed.module_inject.base_policy import InjectBasePolicy + + +class HFBertLayerPolicy(InjectBasePolicy): + _orig_layer_class = None + + def __init__(self, client_module, inference=False, preln=False): + super().__init__(inference) + self.client_module = client_module + self.preln = preln + if HFBertLayerPolicy._orig_layer_class is None: + try: + import transformers + HFBertLayerPolicy._orig_layer_class = [ + transformers.models.bert.modeling_bert.BertLayer, + transformers.models.roberta.modeling_roberta.RobertaLayer + ] + except: + HFBertLayerPolicy._orig_layer_class = None + + def get_hidden_heads(self): + return self.client_module.attention.self.query.weight.shape[1], \ + self.client_module.attention.self.num_attention_heads + + def attention(self): + qw = self.client_module.attention.self.query.weight + qb = self.client_module.attention.self.query.bias + kw = self.client_module.attention.self.key.weight + kb = self.client_module.attention.self.key.bias + vw = self.client_module.attention.self.value.weight + vb = self.client_module.attention.self.value.bias + + qkvw = Parameter(torch.cat((qw, kw, vw), dim=0), requires_grad=False) + qkvb = Parameter(torch.cat((qb, kb, vb), dim=0), requires_grad=False) + + return self.linear_layer, \ + qkvw, \ + qkvb, \ + self.client_module.attention.output.dense.weight, \ + self.client_module.attention.output.dense.bias, \ + self.scale_attention, \ + self.is_megatron_v2 + + def mlp(self): + if self.preln: + intermediate_ff = self.client_module.intermediate.dense_act + else: + intermediate_ff = self.client_module.intermediate.dense + + return self.linear_layer, intermediate_ff.weight, intermediate_ff.bias, \ + self.client_module.output.dense.weight, \ + self.client_module.output.dense.bias + + def layerNorm(self): + if self.preln: + attention_layernorm = self.client_module.PostAttentionLayerNorm + transformer_layernorm = self.client_module.PreAttentionLayerNorm + else: + attention_layernorm = self.client_module.attention.output.LayerNorm + transformer_layernorm = self.client_module.output.LayerNorm + return attention_layernorm.weight, \ + attention_layernorm.bias, \ + transformer_layernorm.weight, \ + transformer_layernorm.bias diff --git a/mii/policies/bloom.py b/mii/policies/bloom.py new file mode 100644 index 00000000..441d542c --- /dev/null +++ b/mii/policies/bloom.py @@ -0,0 +1,46 @@ +''' +Copyright 2022 The Microsoft DeepSpeed Team +''' +from deepspeed.module_inject.base_policy import InjectBasePolicy + + +class BLOOMLayerPolicy(DSPolicy): + _orig_layer_class = None + + def __init__(self, client_module, inference=True): + super().__init__(inference, linear_layer=True) + self.client_module = client_module + try: + import transformers + BLOOMLayerPolicy._orig_layer_class = transformers.models.bloom.modeling_bloom.BloomBlock + global supported_models + supported_models.update( + {transformers.models.bloom.modeling_bloom.BloomModel}) + except: + BLOOMLayerPolicy._orig_layer_class = None + + def get_hidden_heads(self): + return self.client_module.self_attention.hidden_size, \ + self.client_module.self_attention.num_heads + + def attention(self): + return self.linear_layer, \ + self.client_module.self_attention.query_key_value.weight, \ + self.client_module.self_attention.query_key_value.bias, \ + self.client_module.self_attention.dense.weight, \ + self.client_module.self_attention.dense.bias, \ + self.scale_attention, \ + self.is_megatron_v2 + + def mlp(self): + return self.linear_layer, \ + self.client_module.mlp.dense_h_to_4h.weight, \ + self.client_module.mlp.dense_h_to_4h.bias, \ + self.client_module.mlp.dense_4h_to_h.weight, \ + self.client_module.mlp.dense_4h_to_h.bias + + def layerNorm(self): + return self.client_module.post_attention_layernorm.weight, \ + self.client_module.post_attention_layernorm.bias, \ + self.client_module.input_layernorm.weight, \ + self.client_module.input_layernorm.bias diff --git a/mii/policies/gpt2.py b/mii/policies/gpt2.py new file mode 100644 index 00000000..b605d1ee --- /dev/null +++ b/mii/policies/gpt2.py @@ -0,0 +1,44 @@ +''' +Copyright 2022 The Microsoft DeepSpeed Team +''' +from deepspeed.module_inject.base_policy import InjectBasePolicy + + +class HFGPT2LayerPolicy(InjectBasePolicy): + _orig_layer_class = None + + def __init__(self, client_module, inference=True): + # HuggingFace GPT2 uses convolutional layer instead of linear layer + super().__init__(inference, linear_layer=False) + self.client_module = client_module + try: + import transformers + HFGPT2LayerPolicy._orig_layer_class = transformers.models.gpt2.modeling_gpt2.GPT2Block + except: + HFGPT2LayerPolicy._orig_layer_class = None + + def get_hidden_heads(self): + return self.client_module.attn.embed_dim, \ + self.client_module.attn.num_heads + + def attention(self): + return self.linear_layer, \ + self.client_module.attn.c_attn.weight, \ + self.client_module.attn.c_attn.bias, \ + self.client_module.attn.c_proj.weight, \ + self.client_module.attn.c_proj.bias, \ + self.scale_attention, \ + self.is_megatron_v2 + + def mlp(self): + return self.linear_layer, \ + self.client_module.mlp.c_fc.weight, \ + self.client_module.mlp.c_fc.bias, \ + self.client_module.mlp.c_proj.weight, \ + self.client_module.mlp.c_proj.bias + + def layerNorm(self): + return self.client_module.ln_2.weight, \ + self.client_module.ln_2.bias, \ + self.client_module.ln_1.weight, \ + self.client_module.ln_1.bias diff --git a/mii/policies/gpt_neo.py b/mii/policies/gpt_neo.py new file mode 100644 index 00000000..fe11051c --- /dev/null +++ b/mii/policies/gpt_neo.py @@ -0,0 +1,51 @@ +''' +Copyright 2022 The Microsoft DeepSpeed Team +''' +import torch +from torch.nn.parameter import Parameter +from deepspeed.module_inject.base_policy import InjectBasePolicy + + +class HFGPTNEOLayerPolicy(InjectBasePolicy): + _orig_layer_class = None + + def __init__(self, client_module, inference=True): + super().__init__(inference, scale_attention=False) + self.client_module = client_module + try: + import transformers + HFGPTNEOLayerPolicy._orig_layer_class = transformers.models.gpt_neo.modeling_gpt_neo.GPTNeoBlock + except: + HFGPTNEOLayerPolicy._orig_layer_class = None + + def get_hidden_heads(self): + return self.client_module.attn.attention.q_proj.weight.shape[1], \ + self.client_module.attn.attention.num_heads + + def attention(self): + qw = self.client_module.attn.attention.q_proj.weight + kw = self.client_module.attn.attention.k_proj.weight + vw = self.client_module.attn.attention.v_proj.weight + + qkvw = Parameter(torch.cat((qw, kw, vw), dim=0), requires_grad=False) + + return self.linear_layer, \ + qkvw, \ + None, \ + self.client_module.attn.attention.out_proj.weight, \ + self.client_module.attn.attention.out_proj.bias, \ + self.scale_attention, \ + self.is_megatron_v2 + + def mlp(self): + return self.linear_layer, \ + self.client_module.mlp.c_fc.weight, \ + self.client_module.mlp.c_fc.bias, \ + self.client_module.mlp.c_proj.weight, \ + self.client_module.mlp.c_proj.bias + + def layerNorm(self): + return self.client_module.ln_2.weight, \ + self.client_module.ln_2.bias, \ + self.client_module.ln_1.weight, \ + self.client_module.ln_1.bias diff --git a/mii/policies/gpt_neox.py b/mii/policies/gpt_neox.py new file mode 100644 index 00000000..ae8d4b31 --- /dev/null +++ b/mii/policies/gpt_neox.py @@ -0,0 +1,59 @@ +''' +Copyright 2022 The Microsoft DeepSpeed Team +''' +import torch +from deepspeed.module_inject.base_policy import InjectBasePolicy + + +class GPTNEOXLayerPolicy(InjectBasePolicy): + _orig_layer_class = None + version = 0 + + def __init__(self, client_module, inference=True, megatron_v2=True): + super().__init__(inference, megatron_v2=megatron_v2) + self.client_module = client_module + if GPTNEOXLayerPolicy._orig_layer_class is None: + if pkg_version.parse(torch.__version__) <= pkg_version.parse("1.2"): + GPTNEOXLayerPolicy._orig_layer_class = None + else: + try: + from transformers import GPTNeoXLayer + GPTNEOXLayerPolicy._orig_layer_class = GPTNeoXLayer + except ImportError: + GPTNEOXLayerPolicy._orig_layer_class = None + + def get_hidden_heads(self): + if GPTNEOXLayerPolicy.version == 0: + attention = self.client_module.attention + else: + attention = self.client_module.self_attention + + return self.client_module.attention.query_key_value.weight.shape[1], \ + self.client_module.attention.num_attention_heads + + def attention(self): + if GPTNEOXLayerPolicy.version == 0: + attention = self.client_module.attention + else: + attention = self.client_module.self_attention + + return self.linear_layer, \ + attention.query_key_value.weight, \ + attention.query_key_value.bias, \ + attention.dense.weight, \ + attention.dense.bias, \ + self.scale_attention, \ + self.is_megatron_v2 + + def mlp(self): + return self.linear_layer, \ + self.client_module.mlp.dense_h_to_4h.weight, \ + self.client_module.mlp.dense_h_to_4h.bias, \ + self.client_module.mlp.dense_4h_to_h.weight, \ + self.client_module.mlp.dense_4h_to_h.bias + + def layerNorm(self): + return self.client_module.post_attention_layernorm.weight, \ + self.client_module.post_attention_layernorm.bias, \ + self.client_module.input_layernorm.weight, \ + self.client_module.input_layernorm.bias diff --git a/mii/policies/gptj.py b/mii/policies/gptj.py new file mode 100644 index 00000000..2baf1d39 --- /dev/null +++ b/mii/policies/gptj.py @@ -0,0 +1,51 @@ +''' +Copyright 2022 The Microsoft DeepSpeed Team +''' +import torch +from torch.nn.parameter import Parameter +from deepspeed.module_inject.base_policy import InjectBasePolicy + + +class HFGPTJLayerPolicy(InjectBasePolicy): + _orig_layer_class = None + + def __init__(self, client_module, inference=True): + super().__init__(inference, scale_attention=True) + self.client_module = client_module + try: + import transformers + HFGPTJLayerPolicy._orig_layer_class = transformers.models.gptj.modeling_gptj.GPTJBlock + except: + HFGPTJLayerPolicy._orig_layer_class = None + + def get_hidden_heads(self): + return self.client_module.attn.q_proj.weight.shape[1], \ + self.client_module.attn.num_attention_heads + + def attention(self): + qw = self.client_module.attn.q_proj.weight + kw = self.client_module.attn.k_proj.weight + vw = self.client_module.attn.v_proj.weight + + qkvw = Parameter(torch.cat((qw, kw, vw), dim=0), requires_grad=False) + + return self.linear_layer, \ + qkvw, \ + None, \ + self.client_module.attn.out_proj.weight, \ + None, \ + self.scale_attention, \ + self.is_megatron_v2 + + def mlp(self): + return self.linear_layer, \ + self.client_module.mlp.fc_in.weight, \ + self.client_module.mlp.fc_in.bias, \ + self.client_module.mlp.fc_out.weight, \ + self.client_module.mlp.fc_out.bias + + def layerNorm(self): + return None, \ + None, \ + self.client_module.ln_1.weight, \ + self.client_module.ln_1.bias diff --git a/mii/policies/megatron.py b/mii/policies/megatron.py new file mode 100644 index 00000000..15eb01cd --- /dev/null +++ b/mii/policies/megatron.py @@ -0,0 +1,85 @@ +''' +Copyright 2022 The Microsoft DeepSpeed Team +''' +from packaging import version as pkg_version +from deepspeed.module_inject.base_policy import InjectBasePolicy + + +class MegatronLayerPolicy(DSPolicy): + _orig_layer_class = None + version = 0 + moe_type = 'standard' + + def __init__(self, client_module, inference=True): + super().__init__(inference) + self.client_module = client_module + # we use megatron version to differentiate between the old and new + # megatron-lm source code + if MegatronLayerPolicy._orig_layer_class is None: + if pkg_version.parse(torch.__version__) <= pkg_version.parse("1.2"): + MegatronLayerPolicy._orig_layer_class = None + else: + try: + from megatron.model.transformer import ParallelTransformerLayer + MegatronLayerPolicy._orig_layer_class = ParallelTransformerLayer + except ImportError: + MegatronLayerPolicy._orig_layer_class = None + + def get_hidden_heads(self): + return self.client_module.attention.query_key_value.weight.shape[1], \ + self.client_module.attention.num_attention_heads + + def attention(self): + if self.inference: + if MegatronLayerPolicy.version == 0: + attention = self.client_module.attention + else: + attention = self.client_module.self_attention + + return self.linear_layer, \ + attention.query_key_value.weight, \ + attention.query_key_value.bias, \ + attention.dense.weight, \ + attention.dense.bias, \ + self.scale_attention, \ + self.is_megatron_v2 + + def mlp(self, moe_type='standard'): + from deepspeed.moe.utils import has_moe_layers + moe, _ = has_moe_layers(self.client_module) + + if moe: + moe_experts = self.client_module.mlp.deepspeed_moe.experts.deepspeed_experts if moe_type == 'standard' else \ + self.client_module.mlp.moe.deepspeed_moe.experts.deepspeed_experts + num_experts = len(moe_experts) + if moe_type == 'standard': + return self.linear_layer, \ + [moe_experts[i].dense_h_to_4h.weight for i in range(num_experts)], \ + [moe_experts[i].dense_h_to_4h.bias for i in range(num_experts)], \ + [moe_experts[i].dense_4h_to_h.weight for i in range(num_experts)], \ + [moe_experts[i].dense_4h_to_h.bias for i in range(num_experts)] + else: + + return self.linear_layer, \ + [moe_experts[i].dense_h_to_4h.weight for i in range(num_experts)], \ + [moe_experts[i].dense_h_to_4h.bias for i in range(num_experts)], \ + [moe_experts[i].dense_4h_to_h.weight for i in range(num_experts)], \ + [moe_experts[i].dense_4h_to_h.bias for i in range(num_experts)], \ + self.client_module.mlp.mlp.dense_h_to_4h.weight, \ + self.client_module.mlp.mlp.dense_h_to_4h.bias, \ + self.client_module.mlp.mlp.dense_4h_to_h.weight, \ + self.client_module.mlp.mlp.dense_4h_to_h.bias, \ + self.client_module.mlp.coefficient.weight + + else: + return self.linear_layer, \ + self.client_module.mlp.dense_h_to_4h.weight, \ + self.client_module.mlp.dense_h_to_4h.bias, \ + self.client_module.mlp.dense_4h_to_h.weight, \ + self.client_module.mlp.dense_4h_to_h.bias + + def layerNorm(self): + return self.client_module.post_attention_layernorm.weight, \ + self.client_module.post_attention_layernorm.bias, \ + self.client_module.input_layernorm.weight, \ + self.client_module.input_layernorm.bias From 2d28e52ffaab6388e2869ebf217775d6ac868499 Mon Sep 17 00:00:00 2001 From: Jeff Rasley Date: Fri, 5 Aug 2022 16:24:58 -0700 Subject: [PATCH 2/8] fix imports --- mii/policies/__init__.py | 15 ++++++++------- mii/policies/bloom.py | 2 +- mii/policies/gpt_neox.py | 1 + mii/policies/megatron.py | 3 ++- requirements/requirements.txt | 1 + 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/mii/policies/__init__.py b/mii/policies/__init__.py index 06fa54a0..82b17026 100644 --- a/mii/policies/__init__.py +++ b/mii/policies/__init__.py @@ -1,13 +1,14 @@ ''' Copyright 2022 The Microsoft DeepSpeed Team ''' +import mii.policies supported_models = [ - HFBertLayerPolicy, - HFGPTNEOLayerPolicy, - GPTNEOXLayerPolicy, - HFGPTJLayerPolicy, - MegatronLayerPolicy, - HFGPT2LayerPolicy, - BLOOMLayerPolicy + mii.policies.HFBertLayerPolicy, + mii.policies.HFGPTNEOLayerPolicy, + mii.policies.GPTNEOXLayerPolicy, + mii.policies.HFGPTJLayerPolicy, + mii.policies.MegatronLayerPolicy, + mii.policies.HFGPT2LayerPolicy, + mii.policies.BLOOMLayerPolicy ] diff --git a/mii/policies/bloom.py b/mii/policies/bloom.py index 441d542c..5aed42ee 100644 --- a/mii/policies/bloom.py +++ b/mii/policies/bloom.py @@ -4,7 +4,7 @@ from deepspeed.module_inject.base_policy import InjectBasePolicy -class BLOOMLayerPolicy(DSPolicy): +class BLOOMLayerPolicy(InjectBasePolicy): _orig_layer_class = None def __init__(self, client_module, inference=True): diff --git a/mii/policies/gpt_neox.py b/mii/policies/gpt_neox.py index ae8d4b31..7e519581 100644 --- a/mii/policies/gpt_neox.py +++ b/mii/policies/gpt_neox.py @@ -2,6 +2,7 @@ Copyright 2022 The Microsoft DeepSpeed Team ''' import torch +from packaging import version as pkg_version from deepspeed.module_inject.base_policy import InjectBasePolicy diff --git a/mii/policies/megatron.py b/mii/policies/megatron.py index 15eb01cd..33683ecd 100644 --- a/mii/policies/megatron.py +++ b/mii/policies/megatron.py @@ -1,11 +1,12 @@ ''' Copyright 2022 The Microsoft DeepSpeed Team ''' +import torch from packaging import version as pkg_version from deepspeed.module_inject.base_policy import InjectBasePolicy -class MegatronLayerPolicy(DSPolicy): +class MegatronLayerPolicy(InjectBasePolicy): _orig_layer_class = None version = 0 moe_type = 'standard' diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 5f1370ae..570715da 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -2,6 +2,7 @@ asyncio deepspeed>=0.6.7 grpcio grpcio-tools +packaging pydantic torch transformers From 42504d6fee479d10e501b2ebf16b2f800c4a962d Mon Sep 17 00:00:00 2001 From: Jeff Rasley Date: Fri, 5 Aug 2022 16:29:19 -0700 Subject: [PATCH 3/8] install latest ds --- .github/workflows/formatting.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/formatting.yml b/.github/workflows/formatting.yml index 6aa26187..23b3adf9 100644 --- a/.github/workflows/formatting.yml +++ b/.github/workflows/formatting.yml @@ -29,6 +29,7 @@ jobs: - name: Install MII run: | + pip install git+https://github.com/microsoft/deepspeed.git pip install .[dev] - name: Formatting checks From 48ea217b7231593c59596372a840e9016f7416db Mon Sep 17 00:00:00 2001 From: Jeff Rasley Date: Fri, 5 Aug 2022 17:35:16 -0700 Subject: [PATCH 4/8] fix policy imports --- mii/policies/__init__.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/mii/policies/__init__.py b/mii/policies/__init__.py index 82b17026..89164ca9 100644 --- a/mii/policies/__init__.py +++ b/mii/policies/__init__.py @@ -1,14 +1,20 @@ ''' Copyright 2022 The Microsoft DeepSpeed Team ''' -import mii.policies +from .bert import HFBertLayerPolicy +from .gpt_neo import HFGPTNEOLayerPolicy +from .gpt_neox import GPTNEOXLayerPolicy +from .gptj import HFGPTJLayerPolicy +from .megatron import MegatronLayerPolicy +from .gpt2 import HFGPT2LayerPolicy +from .bloom import BLOOMLayerPolicy -supported_models = [ - mii.policies.HFBertLayerPolicy, - mii.policies.HFGPTNEOLayerPolicy, - mii.policies.GPTNEOXLayerPolicy, - mii.policies.HFGPTJLayerPolicy, - mii.policies.MegatronLayerPolicy, - mii.policies.HFGPT2LayerPolicy, - mii.policies.BLOOMLayerPolicy +replace_policies = [ + HFBertLayerPolicy, + HFGPTNEOLayerPolicy, + GPTNEOXLayerPolicy, + HFGPTJLayerPolicy, + MegatronLayerPolicy, + HFGPT2LayerPolicy, + BLOOMLayerPolicy ] From 1c92d07da09716ac237b93372d2742b3ccde1b9f Mon Sep 17 00:00:00 2001 From: Jeff Rasley Date: Fri, 5 Aug 2022 17:39:10 -0700 Subject: [PATCH 5/8] point to ds branch --- .github/workflows/nv-torch-latest-v100.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nv-torch-latest-v100.yaml b/.github/workflows/nv-torch-latest-v100.yaml index 617ed8fa..d2729880 100644 --- a/.github/workflows/nv-torch-latest-v100.yaml +++ b/.github/workflows/nv-torch-latest-v100.yaml @@ -34,7 +34,7 @@ jobs: python -c "import torch; print('CUDA available:', torch.cuda.is_available())" - name: Install MII run: | - pip install git+https://github.com/microsoft/DeepSpeed.git + pip install git+https://github.com/microsoft/DeepSpeed.git@staging-mii-update pip install git+https://github.com/huggingface/transformers.git pip install -U accelerate pip install .[dev,local] From 4d420db39f8663de5e12e8239425492f6b4b8a9f Mon Sep 17 00:00:00 2001 From: Jeff Rasley Date: Thu, 15 Sep 2022 10:52:05 -0700 Subject: [PATCH 6/8] align ds version --- .github/workflows/cpu.yml | 3 +-- .github/workflows/formatting.yml | 3 +-- .github/workflows/nv-torch-latest-v100.yaml | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cpu.yml b/.github/workflows/cpu.yml index 743e243d..ab13bc92 100644 --- a/.github/workflows/cpu.yml +++ b/.github/workflows/cpu.yml @@ -29,8 +29,7 @@ jobs: - name: Install MII run: | - pip install git+https://github.com/microsoft/DeepSpeed.git - pip install .[dev,local] + pip install .[dev,local] git+https://github.com/microsoft/deepspeed.git@staging-mii-update - name: Unit tests run: | diff --git a/.github/workflows/formatting.yml b/.github/workflows/formatting.yml index 23b3adf9..ca38a4ff 100644 --- a/.github/workflows/formatting.yml +++ b/.github/workflows/formatting.yml @@ -29,8 +29,7 @@ jobs: - name: Install MII run: | - pip install git+https://github.com/microsoft/deepspeed.git - pip install .[dev] + pip install .[dev] git+https://github.com/microsoft/deepspeed.git@staging-mii-update - name: Formatting checks run: | diff --git a/.github/workflows/nv-torch-latest-v100.yaml b/.github/workflows/nv-torch-latest-v100.yaml index d2729880..fc61472d 100644 --- a/.github/workflows/nv-torch-latest-v100.yaml +++ b/.github/workflows/nv-torch-latest-v100.yaml @@ -34,10 +34,9 @@ jobs: python -c "import torch; print('CUDA available:', torch.cuda.is_available())" - name: Install MII run: | - pip install git+https://github.com/microsoft/DeepSpeed.git@staging-mii-update pip install git+https://github.com/huggingface/transformers.git pip install -U accelerate - pip install .[dev,local] + pip install .[dev,local] git+https://github.com/microsoft/deepspeed.git@staging-mii-update ds_report - name: Unit tests run: | From 9550973aa7e15a32ce1a5d924c8adff60041d274 Mon Sep 17 00:00:00 2001 From: Jeff Rasley Date: Thu, 15 Sep 2022 11:02:53 -0700 Subject: [PATCH 7/8] remove ds version min --- requirements/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 570715da..95f1ef4d 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,5 +1,5 @@ asyncio -deepspeed>=0.6.7 +deepspeed grpcio grpcio-tools packaging From 0b1e37a558b509ec11c8700d136c3bac8ae9530a Mon Sep 17 00:00:00 2001 From: Jeff Rasley Date: Thu, 15 Sep 2022 14:56:21 -0700 Subject: [PATCH 8/8] add release scripts --- release/bump_patch_version.py | 9 +++++++ release/release.sh | 49 +++++++++++++++++++++++++++++++++++ version.txt | 2 +- 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 release/bump_patch_version.py create mode 100644 release/release.sh diff --git a/release/bump_patch_version.py b/release/bump_patch_version.py new file mode 100644 index 00000000..8f1150de --- /dev/null +++ b/release/bump_patch_version.py @@ -0,0 +1,9 @@ +from packaging import version as pkg_version + +with open('../version.txt') as fd: + version = pkg_version.parse(fd.read()) + +with open('../version.txt', 'w') as fd: + fd.write(f'{version.major}.{version.minor}.{version.micro + 1}\n') + +print(f'{version} -> {version.major}.{version.minor}.{version.micro + 1}') diff --git a/release/release.sh b/release/release.sh new file mode 100644 index 00000000..ab56ba3c --- /dev/null +++ b/release/release.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +cd .. + +if [ ! -f ~/.pypirc ]; then + echo 'create .pypirc in order to upload to PyPI' + exit 1 +fi + +version=$1 + +if [ -z $version ]; then + echo "please provide version number for release" + exit 1 +fi + +if [[ $version == *"v"* ]]; then + echo "please only include version number without 'v' prefix" + exit 1 +fi + +if [ "${version}" != `cat version.txt` ]; then + echo "version=${version} does not match version.txt" + cat version.txt + exit 1 +fi + +python -c "import twine" +if [ $? != 0 ]; then + echo 'please install twine via pip' + exit 1 +fi + +MII_BUILD_STRING="" python setup.py sdist + +if [ ! -f dist/deepspeed-${version}.tar.gz ]; then + echo "prepared version does not match version given ($version), bump version first?" + ls dist + exit 1 +fi + +python -m twine upload dist/mii-${version}.tar.gz --repository mii + +git tag v${version} +git push origin v${version} + +echo "bumping up patch version" +cd - +python bump_patch_version.py diff --git a/version.txt b/version.txt index 6e8bf73a..8acdd82b 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.1.0 +0.0.1