Skip to content

Commit

Permalink
remove fcplugin to accelerate pipeline run time
Browse files Browse the repository at this point in the history
Signed-off-by: Rajeev Rao <rajeevrao@nvidia.com>
  • Loading branch information
ttyio authored and rajeevsrao committed May 19, 2021
1 parent 19e3899 commit 8593e51
Showing 1 changed file with 6 additions and 21 deletions.
27 changes: 6 additions & 21 deletions demo/BERT/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
emln_plg_creator = plg_registry.get_plugin_creator("CustomEmbLayerNormPluginDynamic", "1", "")
qkv2_plg_creator = plg_registry.get_plugin_creator("CustomQKVToContextPluginDynamic", "1", "")
skln_plg_creator = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic", "1", "")
fc_plg_creator = plg_registry.get_plugin_creator("CustomFCPluginDynamic", "1", "")

"""
Attentions Keys
Expand Down Expand Up @@ -214,16 +213,6 @@ def skipln(prefix, config, init_dict, network, input_tensor, skip, bias=None):
layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
return layer

def custom_fc(config, network, input_tensor, out_dims, W):
pf_out_dims = trt.PluginField("out_dims", np.array([out_dims], dtype=np.int32), trt.PluginFieldType.INT32)
pf_W = trt.PluginField("W", W.numpy(), trt.PluginFieldType.FLOAT32)
pf_type = trt.PluginField("type_id", np.array([1 if config.use_fp16 else 0], np.int32), trt.PluginFieldType.INT32)
pfc = trt.PluginFieldCollection([pf_out_dims, pf_W, pf_type])
fc_plugin = fc_plg_creator.create_plugin("fcplugin", pfc)
plug_inputs = [input_tensor]
out_dense = network.add_plugin_v2(plug_inputs, fc_plugin)
return out_dense

def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imask):
"""
Add the transformer layer
Expand All @@ -243,10 +232,9 @@ def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imas

# FC0
B_aout = init_dict[prefix + B_AOUT]
W_aout = init_dict[prefix + W_AOUT]
if config.use_int8:
W_aout = init_dict[prefix + W_AOUT]
attention_out_fc = network.add_convolution(attention_heads, hidden_size, (1, 1), W_aout, B_aout)
B_aout = None

if not config.use_int8_skipln:
attention_out_fc.set_output_type(0, trt.DataType.HALF if config.use_fp16 else trt.DataType.FLOAT)
Expand All @@ -255,10 +243,9 @@ def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imas
dr_fc_aout = init_dict[prefix + 'attention_output_add_local_input_quantizer_amax']
set_output_range(attention_out_fc, dr_fc_aout)
else:
W_aoutT = init_dict[prefix + W_AOUT + "_notrans"]
attention_out_fc = custom_fc(config, network, attention_heads, hidden_size, W_aoutT)
attention_out_fc = network.add_fully_connected(attention_heads, hidden_size, W_aout, B_aout)

skiplayer = skipln(prefix + "attention_output_layernorm_",config, init_dict, network, attention_out_fc.get_output(0), input_tensor, B_aout)
skiplayer = skipln(prefix + "attention_output_layernorm_",config, init_dict, network, attention_out_fc.get_output(0), input_tensor, None)
attention_ln = skiplayer.get_output(0)
if config.use_qat:
dr_skln1 = init_dict[prefix + 'intermediate_dense_input_amax']
Expand Down Expand Up @@ -303,23 +290,21 @@ def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imas
# FC2
# Dense to hidden size
B_lout = init_dict[prefix + B_LOUT]
W_lout = init_dict[prefix + W_LOUT]
if config.use_int8 and not config.use_fc2_gemm:
W_lout = init_dict[prefix + W_LOUT]
out_dense = network.add_convolution(intermediate_act, hidden_size, (1, 1), W_lout, B_lout)
B_lout = None

if not config.use_int8_skipln:
out_dense.set_output_type(0, trt.DataType.HALF if config.use_fp16 else trt.DataType.FLOAT)
else:
W_loutT = init_dict[prefix + W_LOUT + "_notrans"]
out_dense = custom_fc(config, network, intermediate_act, hidden_size, W_loutT)
out_dense = network.add_fully_connected(intermediate_act, hidden_size, W_lout, B_lout)

if config.use_qat:
dr_fc_out = init_dict[prefix + 'output_add_local_input_quantizer_amax']
set_output_range(out_dense, dr_fc_out)
set_output_name(out_dense, prefix + "output_", "dense")

out_layer = skipln(prefix + "output_layernorm_", config, init_dict, network, out_dense.get_output(0), attention_ln, B_lout)
out_layer = skipln(prefix + "output_layernorm_", config, init_dict, network, out_dense.get_output(0), attention_ln, None)
set_output_name(out_layer, prefix + "output_", "reshape")

return out_layer
Expand Down

0 comments on commit 8593e51

Please sign in to comment.