Skip to content

Commit afa5c2d

Browse files
authored
Merge pull request #143 from IDEA-Research/dev
release: v0.11.0
2 parents 802da5d + 042fffa commit afa5c2d

File tree

64 files changed

+755
-655
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+755
-655
lines changed

deepdataspace/io/importer.py

+20-7
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
from typing import Type
1818
from typing import Union
1919

20-
from tqdm import tqdm
2120
from pymongo import WriteConcern
21+
from tqdm import tqdm
2222

2323
from deepdataspace import constants
2424
from deepdataspace.constants import AnnotationType
@@ -325,9 +325,22 @@ def pre_run(self):
325325

326326
def post_run(self):
327327
"""
328-
A post-run hook for subclass importers to clean up data.
328+
A post-run hook for subclass importers.
329329
"""
330+
logger.info(f"Add cover to dataset [{self.dataset.name}]@[{self.dataset.id}]")
330331
self.dataset.add_cover()
332+
333+
logger.info(f"Add indices to dataset [{self.dataset.name}]@[{self.dataset.id}]")
334+
dataset_id = self.dataset.id
335+
Image(dataset_id).get_collection().create_index([
336+
("objects.category_id", 1),
337+
])
338+
339+
Image(dataset_id).get_collection().create_index([
340+
("idx", 1)
341+
])
342+
343+
logger.info(f"Set status ready for dataset [{self.dataset.name}]@[{self.dataset.id}]")
331344
DataSet.update_one({"id": self.dataset.id}, {"status": DatasetStatus.Ready})
332345
self.dataset = DataSet.find_one({"id": self.dataset.id})
333346

@@ -348,13 +361,13 @@ def load_existing_user_data(self):
348361
"""
349362

350363
pipeline = [
351-
{"$project": {"flag": 1,
364+
{"$project": {"flag" : 1,
352365
"flag_ts": 1,
353366
"objects": {
354367
"$filter": {
355368
"input": "$objects",
356-
"as": "object",
357-
"cond": {
369+
"as" : "object",
370+
"cond" : {
358371
"$eq": ["$$object.label_type", LabelType.User]
359372
}
360373
}
@@ -374,7 +387,7 @@ def load_existing_user_data(self):
374387

375388
self._user_data[image_id] = {
376389
"objects": user_objects,
377-
"flag": flag,
390+
"flag" : flag,
378391
"flag_ts": flag_ts,
379392
}
380393

@@ -400,7 +413,7 @@ def run_import(self):
400413

401414
desc = f"dataset[{self.dataset.name}@{self.dataset.id}] import progress"
402415
for (image, anno_list) in tqdm(self, desc=desc, unit=" images"):
403-
# for (image, anno_list) in self:
416+
# for (image, anno_list) in self:
404417
image = self.dataset_import_image(self.dataset, **image)
405418
self.image_add_user_data(image)
406419
for anno in anno_list:

deepdataspace/plugins/coco2017/importer.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ def __init__(self, meta_path: str, enforce: bool = False):
3434
info = self.parse_meta(meta_path)
3535
if info is None:
3636
raise RuntimeError(f"Cannot import coco dataset: {meta_path}")
37+
else:
38+
logger.info(f"Successfully parsed meta file {meta_path}: {info}")
3739

3840
dataset_name = info["dataset_name"]
3941
self.ground_truth = info["ground_truth"]
@@ -100,9 +102,8 @@ def parse_meta(meta_path: str):
100102
logger.error(traceback.format_exc())
101103
logger.error(f"Failed to parse meta file {meta_path}: {err}")
102104
return None
103-
104-
logger.info(f"Successfully parsed meta file {meta_path}: {info}")
105-
return info
105+
else:
106+
return info
106107

107108
def load_ground_truth(self):
108109
with open(self.ground_truth, "r", encoding="utf8") as fp:

deepdataspace/server/resources/api_v1/images.py

+81-46
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66

77
import json
88
import logging
9+
from random import randint
910

1011
from deepdataspace.constants import DatasetFileType
1112
from deepdataspace.constants import DatasetStatus
13+
from deepdataspace.constants import DatasetType
1214
from deepdataspace.constants import ErrCode
13-
from deepdataspace.constants import LabelType
1415
from deepdataspace.model import DataSet
1516
from deepdataspace.model.image import Image
1617
from deepdataspace.plugins.coco2017 import COCO2017Importer
@@ -19,7 +20,6 @@
1920
from deepdataspace.utils.http import format_response
2021
from deepdataspace.utils.http import parse_arguments
2122
from deepdataspace.utils.http import raise_exception
22-
from deepdataspace.constants import DatasetType
2323

2424
logger = logging.getLogger("django")
2525

@@ -68,9 +68,9 @@ class ImagesView(BaseAPIView):
6868
Argument("dataset_id", str, Argument.QUERY, required=True),
6969
Argument("category_id", str, Argument.QUERY, required=False),
7070
Argument("flag", int, Argument.QUERY, required=False),
71-
Argument("label_id", str, Argument.QUERY, required=False),
7271
Argument("page_num", Argument.PositiveInt, Argument.QUERY, default=1),
73-
Argument("page_size", Argument.PositiveInt, Argument.QUERY, default=100)
72+
Argument("page_size", Argument.PositiveInt, Argument.QUERY, default=100),
73+
Argument("offset", int, Argument.QUERY, required=False, default=None),
7474
]
7575

7676
def get(self, request):
@@ -79,7 +79,7 @@ def get(self, request):
7979
- GET /api/v1/images
8080
"""
8181

82-
dataset_id, category_id, flag, label_id, page_num, page_size = parse_arguments(request, self.get_args)
82+
dataset_id, category_id, flag, page_num, page_size, offset = parse_arguments(request, self.get_args)
8383

8484
dataset = DataSet.find_one({"_id": dataset_id})
8585
if dataset is None:
@@ -92,73 +92,108 @@ def get(self, request):
9292

9393
filters = {}
9494
if category_id is not None:
95-
filters = {"objects": {
96-
"$elemMatch": {
97-
"category_id": category_id,
98-
"label_type" : {"$in": [LabelType.User, LabelType.GroundTruth]}}}
99-
}
95+
filters["objects.category_id"] = category_id
10096

10197
if flag is not None:
10298
filters["flag"] = flag
10399

104100
total = Image(dataset_id).count_num(filters)
105101

106-
image_list = []
107-
offset = max(0, page_size * (page_num - 1))
102+
if offset is None:
103+
skip = max(0, page_size * (page_num - 1))
104+
else:
105+
skip = 0
106+
page_num = None
107+
if offset == -1: # generate a random offset
108+
includes = {"_id": 1, "idx": 1}
109+
max_idx = Image(dataset_id).find_many(filters, includes,
110+
sort=[("idx", -1)],
111+
skip=0, size=1,
112+
to_dict=True)
113+
max_idx = list(max_idx)[0]["idx"]
114+
115+
min_idx = Image(dataset_id).find_many(filters, includes,
116+
sort=[("idx", 1)],
117+
skip=0, size=1,
118+
to_dict=True)
119+
min_idx = list(min_idx)[0]["idx"]
120+
121+
offset = randint(min_idx, max_idx)
122+
123+
# try the best to return at least page_size objects
124+
if max_idx - offset + 1 < page_size:
125+
offset = max(min_idx, max_idx - page_size + 1)
126+
filters["idx"] = {"$gte": offset}
127+
elif offset >= 0: # query by specified offset
128+
filters["idx"] = {"$gte": offset}
129+
else:
130+
raise_exception(ErrCode.BadRequest, f"invalid offset value[{offset}]")
131+
132+
if skip > total:
133+
data = {
134+
"image_list": [],
135+
"offset" : offset,
136+
"page_size" : page_size,
137+
"page_num" : page_num,
138+
"total" : total
139+
}
140+
return format_response(data, enable_cache=True)
108141

109-
includes = {"id", "idx", "flag", "objects", "metadata", "type", "width", "height", "url",
110-
"url_full_res"}
142+
includes = {"id", "idx", "flag", "objects", "metadata",
143+
"type", "width", "height", "url", "url_full_res"}
111144
includes = {i: 1 for i in includes}
112145

113146
req_scheme = request.scheme
114147
req_host = request.META["HTTP_HOST"]
115148
req_prefix = f"{req_scheme}://{req_host}"
116149

117-
if offset <= total:
118-
for image in Image(dataset_id).find_many(filters, includes,
119-
sort=[("idx", 1)],
120-
skip=offset,
121-
size=page_size,
122-
to_dict=True):
123-
for obj in image["objects"]:
124-
obj["source"] = obj["label_type"] # TODO keep for compatibility, delete this in the future
150+
image_list = []
151+
for image in Image(dataset_id).find_many(filters,
152+
includes,
153+
sort=[("idx", 1)],
154+
skip=skip,
155+
size=page_size,
156+
to_dict=True):
157+
for obj in image["objects"]:
158+
obj["source"] = obj["label_type"] # TODO keep for compatibility, delete this in the future
125159

126-
alpha = obj.get("alpha", "")
127-
if alpha is None:
128-
obj["alpha"] = ""
129-
elif not alpha.startswith("http"):
130-
obj["alpha"] = f"{req_prefix}{alpha}"
160+
alpha = obj.get("alpha", "")
161+
if alpha is None:
162+
obj["alpha"] = ""
163+
elif not alpha.startswith("http"):
164+
obj["alpha"] = f"{req_prefix}{alpha}"
131165

132-
if obj["segmentation"] is None:
133-
obj["segmentation"] = ""
166+
if obj["segmentation"] is None:
167+
obj["segmentation"] = ""
134168

135-
obj["caption"] = obj["caption"] or ""
169+
obj["caption"] = obj["caption"] or ""
136170

137-
obj.pop("compare_result", None)
171+
obj.pop("compare_result", None)
138172

139-
image_url = image["url"]
140-
image_url = concat_url(req_prefix, image_url)
173+
image_url = image["url"]
174+
image_url = concat_url(req_prefix, image_url)
141175

142-
image_url_full_res = image["url_full_res"] or image_url
143-
image_url_full_res = concat_url(req_prefix, image_url_full_res)
176+
image_url_full_res = image["url_full_res"] or image_url
177+
image_url_full_res = concat_url(req_prefix, image_url_full_res)
144178

145-
desc = image.pop("metadata") or "{}"
179+
desc = image.pop("metadata") or "{}"
146180

147-
image.update({
148-
"desc" : desc,
149-
"metadata" : json.loads(desc),
150-
"url" : image_url,
151-
"url_full_res": image_url_full_res
152-
})
181+
image.update({
182+
"desc" : desc,
183+
"metadata" : json.loads(desc),
184+
"url" : image_url,
185+
"url_full_res": image_url_full_res
186+
})
153187

154-
image["caption"] = ""
155-
if caption_generator:
156-
image["caption"] = caption_generator(image)
188+
image["caption"] = ""
189+
if caption_generator:
190+
image["caption"] = caption_generator(image)
157191

158-
image_list.append(image)
192+
image_list.append(image)
159193

160194
data = {
161195
"image_list": image_list,
196+
"offset" : offset,
162197
"page_size" : page_size,
163198
"page_num" : page_num,
164199
"total" : total

deepdataspace/server/static/20.4f772983.async.js.map

-1
This file was deleted.

deepdataspace/server/static/20.4f772983.async.js deepdataspace/server/static/20.85fcbb81.async.js

-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

deepdataspace/server/static/222.a6c6168c.async.js deepdataspace/server/static/222.33ccd216.async.js

-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

deepdataspace/server/static/222.a6c6168c.async.js.map

-1
This file was deleted.

deepdataspace/server/static/233.9b953a00.async.js deepdataspace/server/static/233.3d91ac9a.async.js

-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

deepdataspace/server/static/233.9b953a00.async.js.map

-1
This file was deleted.

deepdataspace/server/static/422.622bc3b6.async.js.map

-1
This file was deleted.

deepdataspace/server/static/422.622bc3b6.async.js deepdataspace/server/static/422.f952182a.async.js

-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

deepdataspace/server/static/742.57cebfa0.async.js.map

-1
This file was deleted.

0 commit comments

Comments
 (0)