From 3ca4c75748eed1d33089158e6092c6000b477c7a Mon Sep 17 00:00:00 2001 From: chunyuan-w Date: Fri, 11 Dec 2020 11:51:59 +0800 Subject: [PATCH 1/2] optimize nms in IPEX --- .../single_stage_detector/pytorch/infer.py | 4 ++-- .../single_stage_detector/pytorch/utils.py | 24 ++++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/others/cloud/single_stage_detector/pytorch/infer.py b/others/cloud/single_stage_detector/pytorch/infer.py index 1bb622457a..ac1ca69c29 100644 --- a/others/cloud/single_stage_detector/pytorch/infer.py +++ b/others/cloud/single_stage_detector/pytorch/infer.py @@ -198,7 +198,7 @@ def coco_eval(model, val_dataloader, cocoGt, encoder, inv_map, args): inference_time.update(time.time() - start_time) end_time = time.time() try: - results = encoder.decode_batch(ploc.to('cpu'), plabel.to('cpu'), 0.50, 200,device=device) + results = encoder.decode_batch(ploc, plabel, 0.50, 200,device=device) except: print("No object detected in idx: {}".format(idx)) continue @@ -255,7 +255,7 @@ def coco_eval(model, val_dataloader, cocoGt, encoder, inv_map, args): inference_time.update(time.time() - start_time) end_time = time.time() try: - results = encoder.decode_batch(ploc.to('cpu'), plabel.to('cpu'), 0.50, 200,device=device) + results = encoder.decode_batch(ploc, plabel, 0.50, 200,device=device) except: print("No object detected in idx: {}".format(idx)) continue diff --git a/others/cloud/single_stage_detector/pytorch/utils.py b/others/cloud/single_stage_detector/pytorch/utils.py index 83b9d5bf55..5993e61f37 100644 --- a/others/cloud/single_stage_detector/pytorch/utils.py +++ b/others/cloud/single_stage_detector/pytorch/utils.py @@ -17,6 +17,8 @@ import pickle from math import sqrt, ceil +from intel_pytorch_extension import batch_score_nms + # This function is from https://github.com/kuangliu/pytorch-ssd. def calc_iou_tensor(box1, box2): """ Calculation of IoU based on two boxes tensor, @@ -122,6 +124,9 @@ def scale_back_batch(self, bboxes_in, scores_in,device): if bboxes_in.device == torch.device("cpu"): self.dboxes = self.dboxes.cpu() self.dboxes_xywh = self.dboxes_xywh.cpu() + elif bboxes_in.device == torch.device("dpcpp"): + self.dboxes = self.dboxes.to("dpcpp") + self.dboxes_xywh = self.dboxes_xywh.to("dpcpp") else: self.dboxes = self.dboxes.cuda(device) self.dboxes_xywh = self.dboxes_xywh.cuda(device) @@ -155,10 +160,27 @@ def decode_batch(self, bboxes_in, scores_in, criteria = 0.45, max_output=200,de for bbox, prob in zip(bboxes.split(1, 0), probs.split(1, 0)): bbox = bbox.squeeze(0) prob = prob.squeeze(0) - output.append(self.decode_single(bbox, prob, criteria, max_output)) + if bbox.device == torch.device("dpcpp"): + output.append(self.decode_single_dpcpp(bbox, prob, criteria, max_output)) + else: + output.append(self.decode_single(bbox, prob, criteria, max_output)) #print(output[-1]) return output + # perform non-maximum suppression for dpcpp tensor + def decode_single_dpcpp(self, bboxes_in, scores_in, criteria, max_output, max_num=200): + # Reference to https://github.com/amdegroot/ssd.pytorch + + bboxes_out = [] + scores_out = [] + labels_out = [] + + bboxes_out, labels_out, scores_out = batch_score_nms(bboxes_in, scores_in, criteria) + + _, max_ids = scores_out.sort(dim=0) + max_ids = max_ids[-max_output:] + return bboxes_out[max_ids, :], labels_out[max_ids], scores_out[max_ids] + # perform non-maximum suppression def decode_single(self, bboxes_in, scores_in, criteria, max_output, max_num=200): # Reference to https://github.com/amdegroot/ssd.pytorch From 8fcccda9d1d0650e67d6620a4114ca1eff949808 Mon Sep 17 00:00:00 2001 From: chunyuan-w Date: Fri, 29 Jan 2021 15:45:04 +0800 Subject: [PATCH 2/2] update device name to ipex.DEVICE --- .../single_stage_detector/pytorch/utils.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/others/cloud/single_stage_detector/pytorch/utils.py b/others/cloud/single_stage_detector/pytorch/utils.py index 5993e61f37..5ac707a3c3 100644 --- a/others/cloud/single_stage_detector/pytorch/utils.py +++ b/others/cloud/single_stage_detector/pytorch/utils.py @@ -17,6 +17,9 @@ import pickle from math import sqrt, ceil +if os.environ.get('USE_IPEX') == "1": + import intel_pytorch_extension as ipex + from intel_pytorch_extension import batch_score_nms # This function is from https://github.com/kuangliu/pytorch-ssd. @@ -124,9 +127,9 @@ def scale_back_batch(self, bboxes_in, scores_in,device): if bboxes_in.device == torch.device("cpu"): self.dboxes = self.dboxes.cpu() self.dboxes_xywh = self.dboxes_xywh.cpu() - elif bboxes_in.device == torch.device("dpcpp"): - self.dboxes = self.dboxes.to("dpcpp") - self.dboxes_xywh = self.dboxes_xywh.to("dpcpp") + elif bboxes_in.device == torch.device(ipex.DEVICE): + self.dboxes = self.dboxes.to(ipex.DEVICE) + self.dboxes_xywh = self.dboxes_xywh.to(ipex.DEVICE) else: self.dboxes = self.dboxes.cuda(device) self.dboxes_xywh = self.dboxes_xywh.cuda(device) @@ -160,15 +163,15 @@ def decode_batch(self, bboxes_in, scores_in, criteria = 0.45, max_output=200,de for bbox, prob in zip(bboxes.split(1, 0), probs.split(1, 0)): bbox = bbox.squeeze(0) prob = prob.squeeze(0) - if bbox.device == torch.device("dpcpp"): - output.append(self.decode_single_dpcpp(bbox, prob, criteria, max_output)) + if bbox.device == torch.device(ipex.DEVICE): + output.append(self.decode_single_ipex(bbox, prob, criteria, max_output)) else: output.append(self.decode_single(bbox, prob, criteria, max_output)) #print(output[-1]) return output - # perform non-maximum suppression for dpcpp tensor - def decode_single_dpcpp(self, bboxes_in, scores_in, criteria, max_output, max_num=200): + # perform non-maximum suppression for IPEX tensor + def decode_single_ipex(self, bboxes_in, scores_in, criteria, max_output, max_num=200): # Reference to https://github.com/amdegroot/ssd.pytorch bboxes_out = []