-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmy_bibliography.bib
1853 lines (1739 loc) · 191 KB
/
my_bibliography.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{suRenderCNNViewpoint2015,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1505.05641},
primaryClass = {cs},
title = {Render for {{CNN}}: {{Viewpoint Estimation}} in {{Images Using CNNs Trained}} with {{Rendered 3D Model Views}}},
doi = {10.1109/ICCV.2015.308},
shorttitle = {Render for {{CNN}}},
abstract = {Object viewpoint estimation from 2D images is an essential task in computer vision. However, two issues hinder its progress: scarcity of training data with viewpoint annotations, and a lack of powerful features. Inspired by the growing availability of 3D models, we propose a framework to address both issues by combining render-based image synthesis and CNNs. We believe that 3D models have the potential in generating a large number of images of high variation, which can be well exploited by deep CNN with a high learning capacity. Towards this goal, we propose a scalable and overfit-resistant image synthesis pipeline, together with a novel CNN specifically tailored for the viewpoint estimation task. Experimentally, we show that the viewpoint estimation from our pipeline can significantly outperform state-of-the-art methods on PASCAL 3D+ benchmark.},
date = {2015-05-21},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
author = {Su, Hao and Qi, Charles R. and Li, Yangyan and Guibas, Leonidas},
file = {/Users/abdullah/Zotero/storage/9G6TZKZT/Su et al. - 2015 - Render for CNN Viewpoint Estimation in Images Usi.pdf;/Users/abdullah/Zotero/storage/EVKTX4X2/1505.html}
}
% == BibLateX quality report for suRenderCNNViewpoint2015:
% Unexpected field 'archivePrefix'
% Unexpected field 'primaryClass'
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@article{mccormacSceneNetRGBD5M2016,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1612.05079},
primaryClass = {cs},
title = {{{SceneNet RGB}}-{{D}}: {{5M Photorealistic Images}} of {{Synthetic Indoor Trajectories}} with {{Ground Truth}}},
url = {http://arxiv.org/abs/1612.05079},
shorttitle = {{{SceneNet RGB}}-{{D}}},
abstract = {We introduce SceneNet RGB-D, expanding the previous work of SceneNet to enable large scale photorealistic rendering of indoor scene trajectories. It provides pixel-perfect ground truth for scene understanding problems such as semantic segmentation, instance segmentation, and object detection, and also for geometric computer vision problems such as optical flow, depth estimation, camera pose estimation, and 3D reconstruction. Random sampling permits virtually unlimited scene configurations, and here we provide a set of 5M rendered RGB-D images from over 15K trajectories in synthetic layouts with random but physically simulated object poses. Each layout also has random lighting, camera trajectories, and textures. The scale of this dataset is well suited for pre-training data-driven computer vision techniques from scratch with RGB-D inputs, which previously has been limited by relatively small labelled datasets in NYUv2 and SUN RGB-D. It also provides a basis for investigating 3D scene labelling tasks by providing perfect camera poses and depth data as proxy for a SLAM system. We host the dataset at http://robotvault.bitbucket.io/scenenet-rgbd.html},
urldate = {2019-05-16},
date = {2016-12-15},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
author = {McCormac, John and Handa, Ankur and Leutenegger, Stefan and Davison, Andrew J.},
file = {/Users/abdullah/Zotero/storage/CDXZM66C/McCormac et al. - 2016 - SceneNet RGB-D 5M Photorealistic Images of Synthe.pdf;/Users/abdullah/Zotero/storage/JTV9IDWX/1612.html}
}
% == BibLateX quality report for mccormacSceneNetRGBD5M2016:
% Unexpected field 'archivePrefix'
% Unexpected field 'primaryClass'
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{shrivastavaLearningSimulatedUnsupervised2017,
title = {Learning {{From Simulated}} and {{Unsupervised Images Through Adversarial Training}}},
url = {http://openaccess.thecvf.com/content_cvpr_2017/html/Shrivastava_Learning_From_Simulated_CVPR_2017_paper.html},
eventtitle = {Proceedings of the {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}}},
urldate = {2019-05-16},
date = {2017},
pages = {2107-2116},
author = {Shrivastava, Ashish and Pfister, Tomas and Tuzel, Oncel and Susskind, Joshua and Wang, Wenda and Webb, Russell},
file = {/Users/abdullah/Zotero/storage/ZSWEWGII/Shrivastava et al. - 2017 - Learning From Simulated and Unsupervised Images Th.pdf;/Users/abdullah/Zotero/storage/A5T2Z8NV/Shrivastava_Learning_From_Simulated_CVPR_2017_paper.html}
}
% == BibLateX quality report for shrivastavaLearningSimulatedUnsupervised2017:
% Missing required field 'booktitle'
% ? Title looks like it was stored in title-case in Zotero
@article{tremblayTrainingDeepNetworks2018,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1804.06516},
primaryClass = {cs},
title = {Training {{Deep Networks}} with {{Synthetic Data}}: {{Bridging}} the {{Reality Gap}} by {{Domain Randomization}}},
url = {http://arxiv.org/abs/1804.06516},
shorttitle = {Training {{Deep Networks}} with {{Synthetic Data}}},
abstract = {We present a system for training deep neural networks for object detection using synthetic images. To handle the variability in real-world data, the system relies upon the technique of domain randomization, in which the parameters of the simulator\$-\$such as lighting, pose, object textures, etc.\$-\$are randomized in non-realistic ways to force the neural network to learn the essential features of the object of interest. We explore the importance of these parameters, showing that it is possible to produce a network with compelling performance using only non-artistically-generated synthetic data. With additional fine-tuning on real data, the network yields better performance than using real data alone. This result opens up the possibility of using inexpensive synthetic data for training neural networks while avoiding the need to collect large amounts of hand-annotated real-world data or to generate high-fidelity synthetic worlds\$-\$both of which remain bottlenecks for many applications. The approach is evaluated on bounding box detection of cars on the KITTI dataset.},
urldate = {2019-05-16},
date = {2018-04-17},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
author = {Tremblay, Jonathan and Prakash, Aayush and Acuna, David and Brophy, Mark and Jampani, Varun and Anil, Cem and To, Thang and Cameracci, Eric and Boochoon, Shaad and Birchfield, Stan},
file = {/Users/abdullah/Zotero/storage/5GA2RXS2/Tremblay et al. - 2018 - Training Deep Networks with Synthetic Data Bridgi.pdf;/Users/abdullah/Zotero/storage/DVTW9EQ9/1804.html}
}
% == BibLateX quality report for tremblayTrainingDeepNetworks2018:
% Unexpected field 'archivePrefix'
% Unexpected field 'primaryClass'
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@article{lowryVisualPlaceRecognition2016,
title = {Visual {{Place Recognition}}: {{A Survey}}},
volume = {32},
issn = {1552-3098},
doi = {10.1109/TRO.2015.2496823},
shorttitle = {Visual {{Place Recognition}}},
abstract = {Visual place recognition is a challenging problem due to the vast range of ways in which the appearance of real-world places can vary. In recent years, improvements in visual sensing capabilities, an ever-increasing focus on long-term mobile robot autonomy, and the ability to draw on state-of-the-art research in other disciplines-particularly recognition in computer vision and animal navigation in neuroscience-have all contributed to significant advances in visual place recognition systems. This paper presents a survey of the visual place recognition research landscape. We start by introducing the concepts behind place recognition-the role of place recognition in the animal kingdom, how a “place” is defined in a robotics context, and the major components of a place recognition system. Long-term robot operations have revealed that changing appearance can be a significant factor in visual place recognition failure; therefore, we discuss how place recognition solutions can implicitly or explicitly account for appearance change within the environment. Finally, we close with a discussion on the future of visual place recognition, in particular with respect to the rapid advances being made in the related fields of deep learning, semantic scene understanding, and video description.},
number = {1},
journaltitle = {IEEE Trans. Robot.},
date = {2016-02},
pages = {1-19},
keywords = {animal kingdom,animal navigation,Animals,computer vision,Computer vision,Conferences,deep learning,learning (artificial intelligence),long-term mobile robot autonomy,mobile robots,Navigation,object recognition,place recognition,Robot sensing systems,robot vision,robotics context,semantic scene understanding,video description,video signal processing,Visual place recognition,visual place recognition research landscape,visual place recognition system,visual sensing capabilities,Visualization},
author = {Lowry, S. and Sünderhauf, N. and Newman, P. and Leonard, J. J. and Cox, D. and Corke, P. and Milford, M. J.},
file = {/Users/abdullah/Zotero/storage/NVEGIQ4V/Lowry et al. - 2016 - Visual Place Recognition A Survey.pdf;/Users/abdullah/Zotero/storage/88BDTSHG/7339473.html}
}
% == BibLateX quality report for lowryVisualPlaceRecognition2016:
% ? Possibly abbreviated journal title IEEE Trans. Robot.
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{orlandoImageBasedLocalization2019,
title = {Image {{Based Localization}} with {{Simulated Egocentric Navigations}}},
doi = {10.5220/0007356503050312},
date = {2019-01-01},
pages = {305-312},
author = {Orlando, Santi and Furnari, Antonino and Battiato, Sebastiano and Farinella, Giovanni},
file = {/Users/abdullah/Zotero/storage/HWNUWLMB/Orlando et al. - 2019 - Image Based Localization with Simulated Egocentric.pdf}
}
% == BibLateX quality report for orlandoImageBasedLocalization2019:
% Missing required field 'booktitle'
% ? Title looks like it was stored in title-case in Zotero
@article{hinterstoisserPreTrainedImageFeatures2017,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1710.10710},
primaryClass = {cs},
title = {On {{Pre}}-{{Trained Image Features}} and {{Synthetic Images}} for {{Deep Learning}}},
url = {http://arxiv.org/abs/1710.10710},
abstract = {Deep Learning methods usually require huge amounts of training data to perform at their full potential, and often require expensive manual labeling. Using synthetic images is therefore very attractive to train object detectors, as the labeling comes for free, and several approaches have been proposed to combine synthetic and real images for training. In this paper, we show that a simple trick is sufficient to train very effectively modern object detectors with synthetic images only: We freeze the layers responsible for feature extraction to generic layers pre-trained on real images, and train only the remaining layers with plain OpenGL rendering. Our experiments with very recent deep architectures for object recognition (Faster-RCNN, R-FCN, Mask-RCNN) and image feature extractors (InceptionResnet and Resnet) show this simple approach performs surprisingly well.},
urldate = {2019-05-17},
date = {2017-10-29},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
author = {Hinterstoisser, Stefan and Lepetit, Vincent and Wohlhart, Paul and Konolige, Kurt},
file = {/Users/abdullah/Zotero/storage/JA432ZR5/Hinterstoisser et al. - 2017 - On Pre-Trained Image Features and Synthetic Images.pdf;/Users/abdullah/Zotero/storage/VVR4E92M/1710.html}
}
% == BibLateX quality report for hinterstoisserPreTrainedImageFeatures2017:
% Unexpected field 'archivePrefix'
% Unexpected field 'primaryClass'
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{brachmannLearningLessMore2018,
title = {Learning {{Less Is More}} - {{6D Camera Localization}} via {{3D Surface Regression}}},
url = {http://openaccess.thecvf.com/content_cvpr_2018/html/Brachmann_Learning_Less_Is_CVPR_2018_paper.html},
eventtitle = {Proceedings of the {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}}},
urldate = {2019-05-17},
date = {2018},
pages = {4654-4662},
author = {Brachmann, Eric and Rother, Carsten},
file = {/Users/abdullah/Zotero/storage/8XGZ7LB6/Brachmann and Rother - 2018 - Learning Less Is More - 6D Camera Localization via.pdf;/Users/abdullah/Zotero/storage/M8C4KBZT/Brachmann_Learning_Less_Is_CVPR_2018_paper.html}
}
% == BibLateX quality report for brachmannLearningLessMore2018:
% Missing required field 'booktitle'
% ? Title looks like it was stored in title-case in Zotero
@article{rajpuraObjectDetectionUsing2017,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1706.06782},
primaryClass = {cs},
title = {Object {{Detection Using Deep CNNs Trained}} on {{Synthetic Images}}},
url = {http://arxiv.org/abs/1706.06782},
abstract = {The need for large annotated image datasets for training Convolutional Neural Networks (CNNs) has been a significant impediment for their adoption in computer vision applications. We show that with transfer learning an effective object detector can be trained almost entirely on synthetically rendered datasets. We apply this strategy for detecting pack- aged food products clustered in refrigerator scenes. Our CNN trained only with 4000 synthetic images achieves mean average precision (mAP) of 24 on a test set with 55 distinct products as objects of interest and 17 distractor objects. A further increase of 12\% in the mAP is obtained by adding only 400 real images to these 4000 synthetic images in the training set. A high degree of photorealism in the synthetic images was not essential in achieving this performance. We analyze factors like training data set size and 3D model dictionary size for their influence on detection performance. Additionally, training strategies like fine-tuning with selected layers and early stopping which affect transfer learning from synthetic scenes to real scenes are explored. Training CNNs with synthetic datasets is a novel application of high-performance computing and a promising approach for object detection applications in domains where there is a dearth of large annotated image data.},
urldate = {2019-05-17},
date = {2017-06-21},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
author = {Rajpura, Param S. and Bojinov, Hristo and Hegde, Ravi S.},
file = {/Users/abdullah/Zotero/storage/QGRSWXH7/Rajpura et al. - 2017 - Object Detection Using Deep CNNs Trained on Synthe.pdf;/Users/abdullah/Zotero/storage/U7KFWG4V/1706.html}
}
% == BibLateX quality report for rajpuraObjectDetectionUsing2017:
% Unexpected field 'archivePrefix'
% Unexpected field 'primaryClass'
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@article{wangDeLS3DDeepLocalization2018,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1805.04949},
primaryClass = {cs},
title = {{{DeLS}}-{{3D}}: {{Deep Localization}} and {{Segmentation}} with a {{3D Semantic Map}}},
url = {http://arxiv.org/abs/1805.04949},
shorttitle = {{{DeLS}}-{{3D}}},
abstract = {For applications such as autonomous driving, self-localization/camera pose estimation and scene parsing are crucial technologies. In this paper, we propose a unified framework to tackle these two problems simultaneously. The uniqueness of our design is a sensor fusion scheme which integrates camera videos, motion sensors (GPS/IMU), and a 3D semantic map in order to achieve robustness and efficiency of the system. Specifically, we first have an initial coarse camera pose obtained from consumer-grade GPS/IMU, based on which a label map can be rendered from the 3D semantic map. Then, the rendered label map and the RGB image are jointly fed into a pose CNN, yielding a corrected camera pose. In addition, to incorporate temporal information, a multi-layer recurrent neural network (RNN) is further deployed improve the pose accuracy. Finally, based on the pose from RNN, we render a new label map, which is fed together with the RGB image into a segment CNN which produces per-pixel semantic label. In order to validate our approach, we build a dataset with registered 3D point clouds and video camera images. Both the point clouds and the images are semantically-labeled. Each video frame has ground truth pose from highly accurate motion sensors. We show that practically, pose estimation solely relying on images like PoseNet may fail due to street view confusion, and it is important to fuse multiple sensors. Finally, various ablation studies are performed, which demonstrate the effectiveness of the proposed system. In particular, we show that scene parsing and pose estimation are mutually beneficial to achieve a more robust and accurate system.},
urldate = {2019-05-17},
date = {2018-05-13},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
author = {Wang, Peng and Yang, Ruigang and Cao, Binbin and Xu, Wei and Lin, Yuanqing},
file = {/Users/abdullah/Zotero/storage/WIMDWX5B/Wang et al. - 2018 - DeLS-3D Deep Localization and Segmentation with a.pdf;/Users/abdullah/Zotero/storage/CZSXRLRD/1805.html}
}
% == BibLateX quality report for wangDeLS3DDeepLocalization2018:
% Unexpected field 'archivePrefix'
% Unexpected field 'primaryClass'
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@article{acharyaBIMPoseNetIndoorCamera2019,
title = {{{BIM}}-{{PoseNet}}: {{Indoor}} Camera Localisation Using a {{3D}} Indoor Model and Deep Learning from Synthetic Images},
volume = {150},
doi = {10.1016/j.isprsjprs.2019.02.020},
shorttitle = {{{BIM}}-{{PoseNet}}},
abstract = {The ubiquity of cameras built in mobile devices has resulted in a renewed interest in image-based localisation in indoor environments where the global navigation satellite system (GNSS) signals are not available. Existing approaches for indoor localisation using images either require an initial location or need first to perform a 3D reconstruction of the whole environment using structure-from-motion (SfM) methods, which is challenging and time-consuming for large indoor spaces. In this paper, a visual localisation approach is proposed to eliminate the requirement of image-based reconstruction of the indoor environment by using a 3D indoor model. A deep convolutional neural network (DCNN) is fine-tuned using synthetic images obtained from the 3D indoor model to regress the camera pose. Results of the experiments indicate that the proposed approach can be used for indoor localisation in real-time with an accuracy of approximately 2 m.},
journaltitle = {ISPRS Journal of Photogrammetry and Remote Sensing},
date = {2019-03-06},
pages = {245-258},
author = {Acharya, Debaditya and Khoshelham, Kourosh and Winter, Stephan},
file = {/Users/abdullah/Zotero/storage/L7ZF2XUJ/Acharya et al. - 2019 - BIM-PoseNet Indoor camera localisation using a 3D.pdf}
}
@inproceedings{lofflerEvaluationCriteriaInsideOut2018,
title = {Evaluation {{Criteria}} for {{Inside}}-{{Out Indoor Positioning Systems Based}} on {{Machine Learning}}},
doi = {10.1109/IPIN.2018.8533862},
abstract = {Real-time tracking allows to trace goods and enables the optimization of logistics processes in many application areas. Camera-based inside-out tracking that uses an infrastructure of fixed and known markers is costly as the markers need to be installed and maintained in the environment. Instead, systems that use natural markers suffer from changes in the physical environment. Recently a number of approaches based on machine learning (ML) aim to address such issues. This paper proposes evaluation criteria that consider algorith-mic properties of ML-based positioning schemes and introduces a dataset from an indoor warehouse scenario to evaluate for them. Our dataset consists of images labeled with millimeter precise positions that allows for a better development and performance evaluation of learning algorithms. This allows an evaluation of machine learning algorithms for monocular optical positioning in a realistic indoor position application for the first time. We also show the feasibility of ML-based positioning schemes for an industrial deployment.},
date = {2018-09-24},
author = {Löffler, Christoffer and Riechel, Sascha and Fischer, Janina and Mutschler, Christopher}
}
% == BibLateX quality report for lofflerEvaluationCriteriaInsideOut2018:
% Missing required field 'booktitle'
% ? Title looks like it was stored in title-case in Zotero
@article{yassinRecentAdvancesIndoor2016,
title = {Recent {{Advances}} in {{Indoor Localization}}: {{A Survey}} on {{Theoretical Approaches}} and {{Applications}}},
volume = {PP},
doi = {10.1109/COMST.2016.2632427},
shorttitle = {Recent {{Advances}} in {{Indoor Localization}}},
abstract = {The availability of location information has become a key factor in today’s communications systems allowing location based services. In outdoor scenarios, the mobile terminal position is obtained with high accuracy thanks to the Global Positioning System (GPS) or to the standalone cellular systems. However, the main problem of GPS and cellular systems resides in the indoor environment and in scenarios with deep shadowing effects where the satellite or cellular signals are broken. In this paper, we survey different technologies and methodologies for indoor and outdoor localization with an emphasis on indoor methodologies and concepts. Additionally, we discuss in this review different localization-based applications, where the location information is critical to estimate. Finally, a comprehensive discussion of the challenges in terms of accuracy, cost, complexity, security, scalability, etc. is given. The aim of this survey is to provide a comprehensive overview of existing efforts as well as auspicious and anticipated dimensions for future work in indoor localization techniques and applications.},
journaltitle = {IEEE Communications Surveys \& Tutorials},
date = {2016-11-29},
pages = {1-1},
author = {Yassin, Ali and Nasser, Youssef and Awad, Mariette and Al-Dubai, Ahmed and Liu, Ran and Yuen, Chau and Raulefs, Ronald},
file = {/Users/abdullah/Zotero/storage/HK3TXFRK/Yassin et al. - 2016 - Recent Advances in Indoor Localization A Survey o.pdf}
}
% == BibLateX quality report for yassinRecentAdvancesIndoor2016:
% ? Title looks like it was stored in title-case in Zotero
@article{simonyanDeepConvolutionalNetworks2013,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1312.6034},
primaryClass = {cs},
title = {Deep {{Inside Convolutional Networks}}: {{Visualising Image Classification Models}} and {{Saliency Maps}}},
url = {http://arxiv.org/abs/1312.6034},
shorttitle = {Deep {{Inside Convolutional Networks}}},
abstract = {This paper addresses the visualisation of image classification models, learnt using deep Convolutional Networks (ConvNets). We consider two visualisation techniques, based on computing the gradient of the class score with respect to the input image. The first one generates an image, which maximises the class score [Erhan et al., 2009], thus visualising the notion of the class, captured by a ConvNet. The second technique computes a class saliency map, specific to a given image and class. We show that such maps can be employed for weakly supervised object segmentation using classification ConvNets. Finally, we establish the connection between the gradient-based ConvNet visualisation methods and deconvolutional networks [Zeiler et al., 2013].},
urldate = {2019-05-20},
date = {2013-12-20},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
author = {Simonyan, Karen and Vedaldi, Andrea and Zisserman, Andrew},
file = {/Users/abdullah/Zotero/storage/TQYCLX8I/Simonyan et al. - 2013 - Deep Inside Convolutional Networks Visualising Im.pdf;/Users/abdullah/Zotero/storage/M4K8NYGC/1312.html}
}
% == BibLateX quality report for simonyanDeepConvolutionalNetworks2013:
% Unexpected field 'archivePrefix'
% Unexpected field 'primaryClass'
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@article{sunderhaufPerformanceConvNetFeatures2015,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1501.04158},
primaryClass = {cs},
title = {On the {{Performance}} of {{ConvNet Features}} for {{Place Recognition}}},
url = {http://arxiv.org/abs/1501.04158},
abstract = {After the incredible success of deep learning in the computer vision domain, there has been much interest in applying Convolutional Network (ConvNet) features in robotic fields such as visual navigation and SLAM. Unfortunately, there are fundamental differences and challenges involved. Computer vision datasets are very different in character to robotic camera data, real-time performance is essential, and performance priorities can be different. This paper comprehensively evaluates and compares the utility of three state-of-the-art ConvNets on the problems of particular relevance to navigation for robots; viewpoint-invariance and condition-invariance, and for the first time enables real-time place recognition performance using ConvNets with large maps by integrating a variety of existing (locality-sensitive hashing) and novel (semantic search space partitioning) optimization techniques. We present extensive experiments on four real world datasets cultivated to evaluate each of the specific challenges in place recognition. The results demonstrate that speed-ups of two orders of magnitude can be achieved with minimal accuracy degradation, enabling real-time performance. We confirm that networks trained for semantic place categorization also perform better at (specific) place recognition when faced with severe appearance changes and provide a reference for which networks and layers are optimal for different aspects of the place recognition problem.},
urldate = {2019-05-20},
date = {2015-01-17},
keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Robotics},
author = {Sünderhauf, Niko and Dayoub, Feras and Shirazi, Sareh and Upcroft, Ben and Milford, Michael},
file = {/Users/abdullah/Zotero/storage/4ZE2WRQF/Sünderhauf et al. - 2015 - On the Performance of ConvNet Features for Place R.pdf;/Users/abdullah/Zotero/storage/GMT4F3LJ/1501.html}
}
% == BibLateX quality report for sunderhaufPerformanceConvNetFeatures2015:
% Unexpected field 'archivePrefix'
% Unexpected field 'primaryClass'
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{balntasRelocNetContinuousMetric2018a,
title = {{{RelocNet}}: {{Continuous Metric Learning Relocalisation}} Using {{Neural Nets}}},
url = {http://openaccess.thecvf.com/content_ECCV_2018/html/Vassileios_Balntas_RelocNet_Continous_Metric_ECCV_2018_paper.html},
shorttitle = {{{RelocNet}}},
eventtitle = {Proceedings of the {{European Conference}} on {{Computer Vision}} ({{ECCV}})},
urldate = {2019-06-01},
date = {2018},
pages = {751-767},
author = {Balntas, Vassileios and Li, Shuda and Prisacariu, Victor},
file = {/Users/abdullah/Zotero/storage/R2R4AVI8/Balntas et al. - 2018 - RelocNet Continuous Metric Learning Relocalisatio.pdf;/Users/abdullah/Zotero/storage/283RBUN6/Vassileios_Balntas_RelocNet_Continous_Metric_ECCV_2018_paper.html}
}
% == BibLateX quality report for balntasRelocNetContinuousMetric2018a:
% Missing required field 'booktitle'
@article{muellerCNNBASEDINITIALLOCALIZATION2018,
langid = {english},
title = {{{CNN}}-{{BASED INITIAL LOCALIZATION IMPROVED BY DATA AUGMENTATION}}},
volume = {IV-1},
issn = {2194-9050},
doi = {10.5194/isprs-annals-IV-1-117-2018},
abstract = {Image-based localization or camera re-localization is a fundamental task in computer vision and mandatory in the fields of navigation for robotics and autonomous driving or for virtual and augmented reality. Such image pose regression in 6 Degrees of Freedom (DoF) is recently solved by Convolutional Neural Networks (CNNs). However, already well-established methods based on feature matching still score higher accuracies so far. Therefore, we want to investigate how data augmentation could further improve CNN-based pose regression. Data augmentation is a valuable technique to boost performance on training based methods and wide spread in the computer vision community. Our aim in this paper is to show the benefit of data augmentation for pose regression by CNNs. For this purpose images are rendered from a 3D model of the actual test environment. This model again is generated by the original training data set, whereas no additional information nor data is required. Furthermore we introduce different training sets composed of rendered and real images. It is shown that the enhanced training of CNNs by utilizing 3D models of the environment improves the image localization accuracy. The accuracy of pose regression could be improved up to 69.37\% for the position component and 61.61\% for the rotation component on our investigated data set.},
journaltitle = {ISPRS Ann. Photogramm. Remote Sens. Spatial Inf. Sci.},
date = {2018-09-26},
pages = {117-124},
author = {Mueller, M. S. and Metzger, A. and Jutzi, B.},
file = {/Users/abdullah/Zotero/storage/G8YRPBL2/Mueller et al. - 2018 - CNN-BASED INITIAL LOCALIZATION IMPROVED BY DATA AU.pdf}
}
% == BibLateX quality report for muellerCNNBASEDINITIALLOCALIZATION2018:
% ? Possibly abbreviated journal title ISPRS Ann. Photogramm. Remote Sens. Spatial Inf. Sci.
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{caiHybridProbabilisticModel2018,
title = {A {{Hybrid Probabilistic Model}} for {{Camera Relocalization}}},
abstract = {We present a hybrid deep learning method for modelling the uncertainty of camera relocalization from a single RGB image. The proposed system leverages the discriminative deep image representation from a convolutional neural networks, and uses Gaussian Process regressors to generate the probability distribution of the six degree of freedom (6DoF) camera pose in an end-to-end fashion. This results in a network that can generate uncertainties over its inferences with no need to sample many times. Furthermore we show that our objective based on KL divergence reduces the dependence on the choice of hyperparameters. The results show that compared to the state-of-the-art Bayesian camera relocalization method, our model produces comparable localization uncertainty and improves the system efficiency significantly, without loss of accuracy.},
booktitle = {{{BMVC}}},
date = {2018},
keywords = {Approximation algorithm,Artificial neural network,Convolutional neural network,Deep learning,End-to-end principle,Gaussian process,KL-ONE,Kriging,The Australian},
author = {Cai, Ming and Shen, Chunhua and Reid, Ian D.},
file = {/Users/abdullah/Zotero/storage/MPZJ25A4/Cai et al. - 2018 - A Hybrid Probabilistic Model for Camera Relocaliza.pdf}
}
% == BibLateX quality report for caiHybridProbabilisticModel2018:
% ? Unsure about the formatting of the booktitle
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{kovalevDeepLearningTheano2016,
title = {Deep {{Learning}} with {{Theano}}, {{Torch}}, {{Caffe}}, {{TensorFlow}}, and {{Deeplearning4J}}: {{Which One Is}} the {{Best}} in {{Speed}} and {{Accuracy}}?},
shorttitle = {Deep {{Learning}} with {{Theano}}, {{Torch}}, {{Caffe}}, {{TensorFlow}}, and {{Deeplearning4J}}},
abstract = {This paper presents results of a comparative study of the leading Deep Learning frameworks, including Theano (with Keras wrapper), Torch, Caffe, TensorFlow, and Deeplearning4J. Detailed results of quantitative assessment of their training and predicting speed, as well as resultant classification accuracy, are provided. The research was conducted jointly by the United Institute of Informatics Problems (Belarus National Academy of Sciences) and Altoros, a global provider of big data and Platform-as-a-Service solutions.},
date = {2016-10-03},
author = {Kovalev, Vassili and Kalinovsky, Alexander and Kovalev, Sergey},
file = {/Users/abdullah/Zotero/storage/AEIR9UCS/Kovalev et al. - 2016 - Deep Learning with Theano, Torch, Caffe, TensorFlo.pdf}
}
% == BibLateX quality report for kovalevDeepLearningTheano2016:
% Missing required field 'booktitle'
% ? Title looks like it was stored in title-case in Zotero
@article{guIndoorLocalizationImproved2019,
title = {Indoor {{Localization Improved}} by {{Spatial Context}} - {{A Survey}}},
volume = {52},
abstract = {Indoor localization is essential for healthcare, security, augmented reality gaming, and many other location-based services. There is currently a wealth of relevant literature on indoor localization. This paper focuses on recent advances in indoor localization methods that use spatial context to improve the location estimation. Spatial context in the form of maps and spatial models have been used to improve the localization by constraining location estimates in the navigable parts of indoor environments. Landmarks such as doors and corners, which are also one form of spatial context, have proved useful in assisting indoor localization by correcting the localization error. This survey gives a comprehensive review of state-of-the-art indoor localization methods and localization
improvement methods using maps, spatial models, and landmarks.},
journaltitle = {ACM Computing Surveys},
date = {2019-06-10},
pages = {64:1-35},
author = {Gu, Fuqiang and Hu, Xuke and Ramezani, Milad and Acharya, Debaditya and Khoshelham, Kourosh and Valaee, Shahrokh and Shang, Jianga},
file = {/Users/abdullah/Zotero/storage/ATALC3TA/Gu et al. - 2019 - Indoor Localization Improved by Spatial Context - .pdf}
}
% == BibLateX quality report for guIndoorLocalizationImproved2019:
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{haImagebasedIndoorLocalization2018,
langid = {english},
location = {{Taipei, Taiwan}},
title = {Image-Based {{Indoor Localization Using BIM}} and {{Features}} of {{CNN}}},
doi = {10.22260/ISARC2018/0107},
abstract = {This study suggests an indoor localization method to estimate the location of a user of a mobile device with imaging capability. The proposed method uses a matching approach between an actual photograph and a rendered BIM (building information modeling) image. A pre-trained VGG 16 network is used for feature extraction. Experimental results show that the best image matching performance can be obtained when using features from pooling layer 4 of VGG16. The proposed method allows for indoor localization only by image matching without additional sensing information.},
eventtitle = {34th {{International Symposium}} on {{Automation}} and {{Robotics}} in {{Construction}}},
date = {2018-07-22},
author = {Ha, Inhae and Kim, Hongjo and Park, Somin and Kim, Hyoungkwan},
file = {/Users/abdullah/Zotero/storage/QH9PMFU6/Ha et al. - 2018 - Image-based Indoor Localization Using BIM and Feat.pdf}
}
% == BibLateX quality report for haImagebasedIndoorLocalization2018:
% Missing required field 'booktitle'
@article{prueferTobiasHassenkloeverKlassifikation,
langid = {german},
title = {Tobias Hassenklöver Klassifikation hochvarianter Muster mit Faltungsnetzwerken},
abstract = {The classification of objects and especially of humans by software is for years a great challenge. These highly variant forms are optically captured and often detected with the use of neural networks in combination with statistical methods. A new method for the detection of these highly variant patterns, such as characters, objects or people are Convolutional neural networks. Convolutional neural networks are a variety of Neural networks, which make their decisions based on algorithms used in image processing. In this thesis the limits of the detection of Convolutional neural networks are tested. In the tests the impact on the classification accuracy is checked with various modified input data.},
pages = {59},
author = {Prüfer, Betreuender and Fohl, Dr Wolfgang},
file = {/Users/abdullah/Zotero/storage/2KIIQBFT/Prüfer and Fohl - Tobias Hassenklöver Klassifikation hochvarianter Mu.pdf}
}
% == BibLateX quality report for prueferTobiasHassenkloeverKlassifikation:
% Exactly one of 'date' / 'year' must be present
% Missing required field 'journaltitle'
@article{kendallGeometryUncertaintyinDeep,
langid = {english},
title = {Geometry and {{Uncertaintyin Deep Learning}} for {{Computer Vision}}},
pages = {208},
author = {Kendall, Alex Guy},
file = {/Users/abdullah/Zotero/storage/CK2KUICS/Kendall - Geometry and Uncertaintyin Deep Learning for Compu.pdf}
}
% == BibLateX quality report for kendallGeometryUncertaintyinDeep:
% Exactly one of 'date' / 'year' must be present
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{furukawaInternetscaleMultiviewStereo2010,
langid = {english},
location = {{San Francisco, CA, USA}},
title = {Towards {{Internet}}-Scale Multi-View Stereo},
isbn = {978-1-4244-6984-0},
doi = {10.1109/CVPR.2010.5539802},
abstract = {This paper introduces an approach for enabling existing multi-view stereo methods to operate on extremely large unstructured photo collections. The main idea is to decompose the collection into a set of overlapping sets of photos that can be processed in parallel, and to merge the resulting reconstructions. This overlapping clustering problem is formulated as a constrained optimization and solved iteratively. The merging algorithm, designed to be parallel and out-of-core, incorporates robust filtering steps to eliminate low-quality reconstructions and enforce global visibility constraints. The approach has been tested on several large datasets downloaded from Flickr.com, including one with over ten thousand images, yielding a 3D reconstruction with nearly thirty million points.},
eventtitle = {2010 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
booktitle = {2010 {{IEEE Computer Society Conference}} on {{Computer Vision}} and {{Pattern Recognition}}},
publisher = {{IEEE}},
date = {2010-06},
pages = {1434-1441},
author = {Furukawa, Yasutaka and Curless, Brian and Seitz, Steven M. and Szeliski, Richard},
file = {/Users/abdullah/Zotero/storage/366VAF5B/Furukawa et al. - 2010 - Towards Internet-scale multi-view stereo.pdf}
}
% == BibLateX quality report for furukawaInternetscaleMultiviewStereo2010:
% ? Unsure about the formatting of the booktitle
@article{piascoSurveyVisualBasedLocalization2018,
title = {A Survey on {{Visual}}-{{Based Localization}}: {{On}} the Benefit of Heterogeneous Data},
volume = {74},
doi = {10.1016/j.patcog.2017.09.013},
shorttitle = {A Survey on {{Visual}}-{{Based Localization}}},
abstract = {We are surrounded by plenty of information about our environment. From these multiple sources, numerous data could be extracted: set of images, 3D model, coloured points cloud... When classical localization devices failed (e.g. GPS sensor in cluttered environments), aforementioned data could be used within a localization framework. This is called Visual Based Localization (VBL). Due to numerous data types that can be collected from a scene, VBL encompasses a large amount of different methods. This paper presents a survey about recent methods that localize a visual acquisition system according to a known environment. We start by categorizing VBL methods into two distinct families: indirect and direct localization systems. As the localization environment is almost always dynamic, we pay special attention to methods designed to handle appearances changes occurring in a scene. Thereafter, we highlight methods exploiting heterogeneous types of data. Finally, we conclude the paper with a discussion on promising trends that could permit to a localization system to reach high precision pose estimation within an area as large as possible.},
journaltitle = {Pattern Recognit.},
date = {2018-02},
pages = {90 - 109},
keywords = {Camera Relocalisation,Image-based localization,Pose estimation,Visual geo-localization},
author = {Piasco, Nathan and Sidibé, Désiré and Demonceaux, Cédric and Gouet-Brunet, Valérie},
file = {/Users/abdullah/Zotero/storage/97H79C5H/Piasco et al. - 2018 - A survey on Visual-Based Localization On the bene.pdf}
}
% == BibLateX quality report for piascoSurveyVisualBasedLocalization2018:
% ? Possibly abbreviated journal title Pattern Recognit.
@inproceedings{shottonSceneCoordinateRegression2013,
title = {Scene {{Coordinate Regression Forests}} for {{Camera Relocalization}} in {{RGB}}-{{D Images}}},
doi = {10.1109/CVPR.2013.377},
eventtitle = {Proceedings of the {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}}},
date = {2013},
pages = {2930-2937},
author = {Shotton, Jamie and Glocker, Ben and Zach, Christopher and Izadi, Shahram and Criminisi, Antonio and Fitzgibbon, Andrew},
file = {/Users/abdullah/Zotero/storage/K49T3LT8/Shotton et al. - 2013 - Scene Coordinate Regression Forests for Camera Rel.pdf;/Users/abdullah/Zotero/storage/N6LI2KWX/Shotton_Scene_Coordinate_Regression_2013_CVPR_paper.html}
}
% == BibLateX quality report for shottonSceneCoordinateRegression2013:
% Missing required field 'booktitle'
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{varolLearningSyntheticHumans2017,
langid = {english},
location = {{Honolulu, HI}},
title = {Learning from {{Synthetic Humans}}},
isbn = {978-1-5386-0457-1},
doi = {10.1109/CVPR.2017.492},
abstract = {Estimating human pose, shape, and motion from images and videos are fundamental challenges with many applications. Recent advances in 2D human pose estimation use large amounts of manually-labeled training data for learning convolutional neural networks (CNNs). Such data is time consuming to acquire and difficult to extend. Moreover, manual labeling of 3D pose, depth and motion is impractical. In this work we present SURREAL (Synthetic hUmans foR REAL tasks): a new large-scale dataset with synthetically-generated but realistic images of people rendered from 3D sequences of human motion capture data. We generate more than 6 million frames together with ground truth pose, depth maps, and segmentation masks. We show that CNNs trained on our synthetic dataset allow for accurate human depth estimation and human part segmentation in real RGB images. Our results and the new dataset open up new possibilities for advancing person analysis using cheap and large-scale synthetic data.},
eventtitle = {2017 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
booktitle = {2017 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
publisher = {{IEEE}},
date = {2017-07},
pages = {4627-4635},
author = {Varol, Gul and Romero, Javier and Martin, Xavier and Mahmood, Naureen and Black, Michael J. and Laptev, Ivan and Schmid, Cordelia},
file = {/Users/abdullah/Zotero/storage/QFNDJR64/Varol et al. - 2017 - Learning from Synthetic Humans.pdf}
}
% == BibLateX quality report for varolLearningSyntheticHumans2017:
% ? Unsure about the formatting of the booktitle
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{kendallGeometricLossFunctions2017,
langid = {english},
location = {{Honolulu, HI}},
title = {Geometric {{Loss Functions}} for {{Camera Pose Regression}} with {{Deep Learning}}},
isbn = {978-1-5386-0457-1},
doi = {10.1109/CVPR.2017.694},
abstract = {Deep learning has shown to be effective for robust and real-time monocular image relocalisation. In particular, PoseNet [22] is a deep convolutional neural network which learns to regress the 6-DOF camera pose from a single image. It learns to localize using high level features and is robust to difficult lighting, motion blur and unknown camera intrinsics, where point based SIFT registration fails. However, it is trained using a naive loss function, with hyperparameters which require expensive tuning. In this paper, we give the problem a more fundamental theoretical treatment. We explore a number of novel loss functions for learning camera pose which are based on geometry and scene reprojection error. Additionally we show how to automatically learn an optimal weighting to simultaneously regress position and orientation. By leveraging geometry, we demonstrate that our technique significantly improves PoseNet’s performance across datasets ranging from indoor rooms to a small city.},
eventtitle = {2017 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
booktitle = {2017 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
publisher = {{IEEE}},
date = {2017-07},
pages = {6555-6564},
author = {Kendall, Alex and Cipolla, Roberto},
file = {/Users/abdullah/Zotero/storage/T2W2SQNP/Kendall und Cipolla - 2017 - Geometric Loss Functions for Camera Pose Regressio.pdf}
}
% == BibLateX quality report for kendallGeometricLossFunctions2017:
% ? Unsure about the formatting of the booktitle
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{wangDeepVOEndtoendVisual2017,
langid = {english},
location = {{Singapore, Singapore}},
title = {{{DeepVO}}: {{Towards}} End-to-End Visual Odometry with Deep {{Recurrent Convolutional Neural Networks}}},
isbn = {978-1-5090-4633-1},
doi = {10.1109/ICRA.2017.7989236},
shorttitle = {{{DeepVO}}},
abstract = {This paper studies monocular visual odometry (VO) problem. Most of existing VO algorithms are developed under a standard pipeline including feature extraction, feature matching, motion estimation, local optimisation, etc. Although some of them have demonstrated superior performance, they usually need to be carefully designed and specifically fine-tuned to work well in different environments. Some prior knowledge is also required to recover an absolute scale for monocular VO. This paper presents a novel end-to-end framework for monocular VO by using deep Recurrent Convolutional Neural Networks (RCNNs). Since it is trained and deployed in an end-to-end manner, it infers poses directly from a sequence of raw RGB images (videos) without adopting any module in the conventional VO pipeline. Based on the RCNNs, it not only automatically learns effective feature representation for the VO problem through Convolutional Neural Networks, but also implicitly models sequential dynamics and relations using deep Recurrent Neural Networks. Extensive experiments on the KITTI VO dataset show competitive performance to state-ofthe-art methods, verifying that the end-to-end Deep Learning technique can be a viable complement to the traditional VO systems.},
eventtitle = {2017 {{IEEE International Conference}} on {{Robotics}} and {{Automation}} ({{ICRA}})},
booktitle = {2017 {{IEEE International Conference}} on {{Robotics}} and {{Automation}} ({{ICRA}})},
publisher = {{IEEE}},
date = {2017-05},
pages = {2043-2050},
author = {Wang, Sen and Clark, Ronald and Wen, Hongkai and Trigoni, Niki},
file = {/Users/abdullah/Zotero/storage/2JTAL47J/Wang et al. - 2017 - DeepVO Towards end-to-end visual odometry with de.pdf}
}
% == BibLateX quality report for wangDeepVOEndtoendVisual2017:
% ? Unsure about the formatting of the booktitle
@article{costanteExploringRepresentationLearning2016,
langid = {english},
title = {Exploring {{Representation Learning With CNNs}} for {{Frame}}-to-{{Frame Ego}}-{{Motion Estimation}}},
volume = {1},
issn = {2377-3766, 2377-3774},
doi = {10.1109/LRA.2015.2505717},
abstract = {Visual Ego-Motion Estimation, or briefly Visual Odometry (VO), is one of the key building blocks of modern SLAM systems. In the last decade, impressive results have been demonstrated in the context of visual navigation, reaching very high localization performance. However, all ego-motion estimation systems require careful parameter tuning procedures for the specific environment they have to work in. Furthermore, even in ideal scenarios, most state-of-the-art approaches fail to handle image anomalies and imperfections, which results in less robust estimates. VO systems that rely on geometrical approaches extract sparse or dense features and match them to perform Frame to Frame (F2F) motion estimation. However, images contain much more information that can be used to further improve the F2F estimation. To learn new feature representation a very successful approach is to use deep Convolutional Neural Networks. Inspired by recent advances in Deep Networks and by previous work on learning methods applied to VO, we explore the use of Convolutional Neural Networks to learn both the best visual features and the best estimator for the task of visual Ego-Motion Estimation. With experiments on publicly available datasets we show that our approach is robust with respect to blur, luminance and contrast anomalies and outperforms most state-of-the-art approaches even in nominal conditions.},
number = {1},
journaltitle = {IEEE Robot. Autom. Lett.},
date = {2016-01},
pages = {18-25},
author = {Costante, Gabriele and Mancini, Michele and Valigi, Paolo and Ciarfuglia, Thomas A.},
file = {/Users/abdullah/Zotero/storage/YPIGDKIY/Costante et al. - 2016 - Exploring Representation Learning With CNNs for Fr.pdf}
}
% == BibLateX quality report for costanteExploringRepresentationLearning2016:
% 'issn': not a valid ISSN
% ? Possibly abbreviated journal title IEEE Robot. Autom. Lett.
% ? Title looks like it was stored in title-case in Zotero
@incollection{melekhovRelativeCameraPose2017,
langid = {english},
location = {{Cham}},
title = {Relative {{Camera Pose Estimation Using Convolutional Neural Networks}}},
volume = {10617},
isbn = {978-3-319-70352-7 978-3-319-70353-4},
abstract = {This paper presents a convolutional neural network based approach for estimating the relative pose between two cameras. The proposed network takes RGB images from both cameras as input and directly produces the relative rotation and translation as output. The system is trained in an end-to-end manner utilising transfer learning from a large scale classification dataset. The introduced approach is compared with widely used local feature based methods (SURF, ORB) and the results indicate a clear improvement over the baseline. In addition, a variant of the proposed architecture containing a spatial pyramid pooling (SPP) layer is evaluated and shown to further improve the performance.},
booktitle = {Advanced {{Concepts}} for {{Intelligent Vision Systems}}},
publisher = {{Springer International Publishing}},
date = {2017},
pages = {675-687},
author = {Melekhov, Iaroslav and Ylioinas, Juha and Kannala, Juho and Rahtu, Esa},
editor = {Blanc-Talon, Jacques and Penne, Rudi and Philips, Wilfried and Popescu, Dan and Scheunders, Paul},
file = {/Users/abdullah/Zotero/storage/C82DPP7X/Melekhov et al. - 2017 - Relative Camera Pose Estimation Using Convolutiona.pdf},
doi = {10.1007/978-3-319-70353-4_57}
}
% == BibLateX quality report for melekhovRelativeCameraPose2017:
% 'isbn': not a valid ISBN
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{parisottoGlobalPoseEstimation2018,
langid = {english},
location = {{Salt Lake City, UT}},
title = {Global {{Pose Estimation}} with an {{Attention}}-{{Based Recurrent Network}}},
isbn = {978-1-5386-6100-0},
doi = {10.1109/CVPRW.2018.00061},
abstract = {The ability for an agent to localize itself within an environment is crucial for many real-world applications. For unknown environments, Simultaneous Localization and Mapping (SLAM) enables incremental and concurrent building of and localizing within a map. We present a new, differentiable architecture, Neural Graph Optimizer, progressing towards a complete neural network solution for SLAM by designing a system composed of a local pose estimation model, a novel pose selection module, and a novel graph optimization process. The entire architecture is trained in an end-to-end fashion, enabling the network to automatically learn domain-specific features relevant to the visual odometry and avoid the involved process of feature engineering. We demonstrate the effectiveness of our system on a simulated 2D maze and the 3D ViZ-Doom environment.},
eventtitle = {2018 {{IEEE}}/{{CVF Conference}} on {{Computer Vision}} and {{Pattern Recognition Workshops}} ({{CVPRW}})},
booktitle = {2018 {{IEEE}}/{{CVF Conference}} on {{Computer Vision}} and {{Pattern Recognition Workshops}} ({{CVPRW}})},
publisher = {{IEEE}},
date = {2018-06},
pages = {350-35009},
author = {Parisotto, Emilio and Chaplot, Devendra Singh and Zhang, Jian and Salakhutdinov, Ruslan},
file = {/Users/abdullah/Zotero/storage/W4E7VCDI/Parisotto et al. - 2018 - Global Pose Estimation with an Attention-Based Rec.pdf}
}
% == BibLateX quality report for parisottoGlobalPoseEstimation2018:
% ? Unsure about the formatting of the booktitle
% ? Title looks like it was stored in title-case in Zotero
@article{krizhevskyImageNetClassificationDeep2012,
title = {{{ImageNet Classification}} with {{Deep Convolutional Neural Networks}}},
doi = {10.1145/3065386},
journaltitle = {Adv. Neural Inf. Process. Syst. 25},
date = {2012},
pages = {1097--1105},
author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
editor = {Pereira, F. and Burges, C. J. C. and Bottou, L. and Weinberger, K. Q.},
file = {/Users/abdullah/Zotero/storage/U8YR4LH7/Krizhevsky et al. - 2012 - ImageNet Classification with Deep Convolutional Ne.pdf;/Users/abdullah/Zotero/storage/E82FVXG7/4824-imagenet-classification-with-deep-convolutional-neural-networks.html}
}
% == BibLateX quality report for krizhevskyImageNetClassificationDeep2012:
% ? Possibly abbreviated journal title Adv. Neural Inf. Process. Syst. 25
% ? Title looks like it was stored in title-case in Zotero
@article{glockerRealTimeRGBDCamera2015,
langid = {english},
title = {Real-{{Time RGB}}-{{D Camera Relocalization}} via {{Randomized Ferns}} for {{Keyframe Encoding}}},
volume = {21},
issn = {1077-2626},
doi = {10.1109/TVCG.2014.2360403},
abstract = {Recovery from tracking failure is essential in any simultaneous localization and tracking system. In this context, we explore an efficient keyframe-based relocalization method based on frame encoding using randomized ferns. The method enables automatic discovery of keyframes through online harvesting in tracking mode, and fast retrieval of pose candidates in the case when tracking is lost. Frame encoding is achieved by applying simple binary feature tests which are stored in the nodes of an ensemble of randomized ferns. The concatenation of small block codes generated by each fern yields a global compact representation of camera frames. Based on those representations we define the frame dissimilarity as the block-wise hamming distance (BlockHD). Dissimilarities between an incoming query frame and a large set of keyframes can be efficiently evaluated by simply traversing the nodes of the ferns and counting image co-occurrences in corresponding code tables. In tracking mode, those dissimilarities decide whether a frame/pose pair is considered as a novel keyframe. For tracking recovery, poses of the most similar keyframes are retrieved and used for reinitialization of the tracking algorithm. The integration of our relocalization method into a hand-held KinectFusion system allows seamless continuation of mapping even when tracking is frequently lost.},
number = {5},
journaltitle = {IEEE Trans. Visual. Comput. Graphics},
date = {2015-05-01},
pages = {571-583},
author = {Glocker, Ben and Shotton, Jamie and Criminisi, Antonio and Izadi, Shahram},
file = {/Users/abdullah/Zotero/storage/DFK8VGJM/Glocker et al. - 2015 - Real-Time RGB-D Camera Relocalization via Randomiz.pdf}
}
% == BibLateX quality report for glockerRealTimeRGBDCamera2015:
% ? Possibly abbreviated journal title IEEE Trans. Visual. Comput. Graphics
% ? Title looks like it was stored in title-case in Zotero
@article{santosMappingIndoorSpaces2016,
title = {Mapping {{Indoor Spaces}} by {{Adaptive Coarse}}-to-{{Fine Registration}} of {{RGB}}-{{D Data}}},
volume = {13},
issn = {1545-598X},
doi = {10.1109/LGRS.2015.2508880},
abstract = {In this letter, we present an adaptive coarse-to-fine registration method for 3-D indoor mapping using RGB-D data. We weight the 3-D points based on the theoretical random error of depth measurements and introduce a novel disparity-based model for an accurate and robust coarse-to-fine registration. Some feature extraction methods required by the method are also presented. First, our method exploits both visual and depth information to compute the initial transformation parameters. We employ scale-invariant feature transformation for extracting, detecting, and matching 2-D visual features, and their associated depth values are used to perform coarse registration. Then, we use an image-based segmentation technique for detecting regions in the RGB images. Their associated 3-D centroid and the correspondent disparity values are used to refine the initial transformation parameters. Finally, the loop-closure detection and a global adjustment of the complete sequence data are used to recognize when the camera has returned to a previously visited location and minimize the registration errors. The effectiveness of the proposed method is demonstrated with the Kinect data set. The experimental results show that the proposed method can properly map the indoor environment with a relative and absolute accuracy value of around 3-5 cm, respectively.},
number = {2},
journaltitle = {IEEE Geosci. Remote Sens. Lett.},
date = {2016-02},
pages = {262-266},
keywords = {Cameras,Simultaneous localization and mapping,Visualization,Feature extraction,Three-dimensional displays,2D visual feature matching,3-D indoor mapping,3D centroid,3D indoor mapping,adaptive coarse-to-fine registration method,Coarse-to-fine registration,depth measurements,disparity-based model,disparity-to-plane model,feature detection,feature extraction,feature extraction methods,global optimization,image matching,image registration,image segmentation,Image segmentation,image-based segmentation technique,indoor environment,Kinect data set,loop-closure detection,optical sensors,RGB-D data,RGB-D sensor,scale-invariant feature transformation,Solid modeling,spatial variables measurement,transforms},
author = {dos Santos, D. R. and Basso, M. A. and Khoshelham, K. and de Oliveira, E. and Pavan, N. L. and Vosselman, G.},
file = {/Users/abdullah/Zotero/storage/QSHDBCQM/Santos et al. - 2016 - Mapping Indoor Spaces by Adaptive Coarse-to-Fine R.pdf;/Users/abdullah/Zotero/storage/BUYE5C89/7374652.html}
}
% == BibLateX quality report for santosMappingIndoorSpaces2016:
% ? Possibly abbreviated journal title IEEE Geosci. Remote Sens. Lett.
% ? Title looks like it was stored in title-case in Zotero
@article{svarmCityScaleLocalizationCameras2017,
langid = {english},
title = {City-{{Scale Localization}} for {{Cameras}} with {{Known Vertical Direction}}},
volume = {39},
issn = {0162-8828, 2160-9292},
doi = {10.1109/TPAMI.2016.2598331},
abstract = {We consider the problem of localizing a novel image in a large 3D model, given that the gravitational vector is known. In principle, this is just an instance of camera pose estimation, but the scale of the problem introduces some interesting challenges. Most importantly, it makes the correspondence problem very difficult so there will often be a significant number of outliers to handle. To tackle this problem, we use recent theoretical as well as technical advances. Many modern cameras and phones have gravitational sensors that allow us to reduce the search space. Further, there are new techniques to efficiently and reliably deal with extreme rates of outliers. We extend these methods to camera pose estimation by using accurate approximations and fast polynomial solvers. Experimental results are given demonstrating that it is possible to reliably estimate the camera pose despite cases with more than 99\% outlier correspondences in city-scale models with several millions of 3D points.},
number = {7},
journaltitle = {IEEE Trans. Pattern Anal. Mach. Intell.},
date = {2017-07-01},
pages = {1455-1461},
author = {Svarm, Linus and Enqvist, Olof and Kahl, Fredrik and Oskarsson, Magnus},
file = {/Users/abdullah/Zotero/storage/FEXJL9TH/Svarm et al. - 2017 - City-Scale Localization for Cameras with Known Ver.pdf}
}
% == BibLateX quality report for svarmCityScaleLocalizationCameras2017:
% 'issn': not a valid ISSN
% ? Possibly abbreviated journal title IEEE Trans. Pattern Anal. Mach. Intell.
% ? Title looks like it was stored in title-case in Zotero
@incollection{radenovicCNNImageRetrieval2016,
langid = {english},
location = {{Cham}},
title = {{{CNN Image Retrieval Learns}} from {{BoW}}: {{Unsupervised Fine}}-{{Tuning}} with {{Hard Examples}}},
volume = {9905},
isbn = {978-3-319-46447-3 978-3-319-46448-0},
shorttitle = {{{CNN Image Retrieval Learns}} from {{BoW}}},
abstract = {Convolutional Neural Networks (CNNs) achieve state-of-theart performance in many computer vision tasks. However, this achievement is preceded by extreme manual annotation in order to perform either training from scratch or fine-tuning for the target task. In this work, we propose to fine-tune CNN for image retrieval from a large collection of unordered images in a fully automated manner. We employ state-of-the-art retrieval and Structure-from-Motion (SfM) methods to obtain 3D models, which are used to guide the selection of the training data for CNN fine-tuning. We show that both hard positive and hard negative examples enhance the final performance in particular object retrieval with compact codes.},
booktitle = {Computer {{Vision}} – {{ECCV}} 2016},
publisher = {{Springer International Publishing}},
date = {2016},
pages = {3-20},
author = {Radenović, Filip and Tolias, Giorgos and Chum, Ondřej},
editor = {Leibe, Bastian and Matas, Jiri and Sebe, Nicu and Welling, Max},
file = {/Users/abdullah/Zotero/storage/DPMRCD36/Radenović et al. - 2016 - CNN Image Retrieval Learns from BoW Unsupervised .pdf},
doi = {10.1007/978-3-319-46448-0_1}
}
% == BibLateX quality report for radenovicCNNImageRetrieval2016:
% 'isbn': not a valid ISBN
% ? Title looks like it was stored in title-case in Zotero
@article{arandjelovicThreeThingsEveryone2012,
title = {Three Things Everyone Should Know to Improve Object Retrieval},
doi = {10.1109/CVPR.2012.6248018},
abstract = {The objective of this work is object retrieval in large scale image datasets, where the object is specified by an image query and retrieval should be immediate at run time in the manner of Video Google [28]. We make the following three contributions: (i) a new method to compare SIFT descriptors (RootSIFT) which yields superior performance without increasing processing or storage requirements; (ii) a novel method for query expansion where a richer model for the query is learnt discriminatively in a form suited to immediate retrieval through efficient use of the inverted index; (iii) an improvement of the image augmentation method proposed by Turcot and Lowe [29], where only the augmenting features which are spatially consistent with the augmented image are kept. We evaluate these three methods over a number of standard benchmark datasets (Oxford Buildings 5k and 105k, and Paris 6k) and demonstrate substantial improvements in retrieval performance whilst maintaining immediate retrieval speeds. Combining these complementary methods achieves a new state-of-the-art performance on these datasets.},
journaltitle = {2012 IEEE Conf. Comput. Vis. Pattern Recognit.},
date = {2012},
pages = {2911-2918},
keywords = {Benchmark (computing),Google Videos,Inverted index,Query expansion,Requirement,Run time (program lifecycle phase),Scale-invariant feature transform,While},
author = {Arandjelovic, Relja and Zisserman, Andrew}
}
% == BibLateX quality report for arandjelovicThreeThingsEveryone2012:
% ? Possibly abbreviated journal title 2012 IEEE Conf. Comput. Vis. Pattern Recognit.
@inproceedings{zhuTenfoldImprovementVisual2007,
langid = {english},
location = {{Rio de Janeiro, Brazil}},
title = {Ten-Fold {{Improvement}} in {{Visual Odometry Using Landmark Matching}}},
isbn = {978-1-4244-1630-1},
doi = {10.1109/ICCV.2007.4409062},
abstract = {Our goal is to create a visual odometry system for robots and wearable systems such that localization accuracies of centimeters can be obtained for hundreds of meters of distance traveled. Existing systems have achieved approximately a 1\% to 5\% localization error rate whereas our proposed system achieves close to 0.1\% error rate, a ten-fold reduction. Traditional visual odometry systems drift over time as the frame-to-frame errors accumulate. In this paper, we propose to improve visual odometry using visual landmarks in the scene. First, a dynamic local landmark tracking technique is proposed to track a set of local landmarks across image frames and select an optimal set of tracked local landmarks for pose computation. As a result, the error associated with each pose computation is minimized to reduce the drift significantly. Second, a global landmark based drift correction technique is proposed to recognize previously visited locations and use them to correct drift accumulated during motion. At each visited location along the route, a set of distinctive visual landmarks is automatically extracted and inserted into a landmark database dynamically. We integrate the landmark based approach into a navigation system with 2 stereo pairs and a low-cost Inertial Measurement Unit (IMU) for increased robustness. We demonstrate that a real-time visual odometry system using local and global landmarks can precisely locate a user within 1 meter over 1000 meters in unknown indoor/outdoor environments with challenging situations such as climbing stairs, opening doors, moving foreground objects etc..},
eventtitle = {2007 {{IEEE}} 11th {{International Conference}} on {{Computer Vision}}},
booktitle = {2007 {{IEEE}} 11th {{International Conference}} on {{Computer Vision}}},
publisher = {{IEEE}},
date = {2007},
pages = {1-8},
author = {Zhu, Zhiwei and Oskiper, Taragay and Samarasekera, Supun and Kumar, Rakesh and Sawhney, Harpreet S.},
file = {/Users/abdullah/Zotero/storage/KQ8LBEMX/Zhu et al. - 2007 - Ten-fold Improvement in Visual Odometry Using Land.pdf}
}
% == BibLateX quality report for zhuTenfoldImprovementVisual2007:
% ? Unsure about the formatting of the booktitle
@inproceedings{xiaoLightweightMapMatching2014,
langid = {english},
location = {{Berlin}},
title = {Lightweight Map Matching for Indoor Localisation Using Conditional Random Fields},
isbn = {978-1-4799-3146-0 978-1-4799-3147-7},
doi = {10.1109/IPSN.2014.6846747},
abstract = {Indoor tracking and navigation is a fundamental need for pervasive and context-aware smartphone applications. Although indoor maps are becoming increasingly available, there is no practical and reliable indoor map matching solution available at present. We present MapCraft, a novel, robust and responsive technique that is extremely computationally efficient (running in under 10 ms on an Android smartphone), does not require training in different sites, and tracks well even when presented with very noisy sensor data. Key to our approach is expressing the tracking problem as a conditional random field (CRF), a technique which has had great success in areas such as natural language processing, but has yet to be considered for indoor tracking. Unlike directed graphical models like Hidden Markov Models, CRFs capture arbitrary constraints that express how well observations support state transitions, given map constraints. Extensive experiments in multiple sites show how MapCraft outperforms state-of-the art approaches, demonstrating excellent tracking error and accurate reconstruction of tortuous trajectories with zero training effort. As proof of its robustness, we also demonstrate how it is able to accurately track the position of a user from accelerometer and magnetometer measurements only (i.e. gyro- and WiFi-free). We believe that such an energy-efficient approach will enable alwayson background localisation, enabling a new era of location-aware applications to be developed.},
eventtitle = {2014 13th {{ACM}}/{{IEEE International Conference}} on {{Information Processing}} in {{Sensor Networks}} ({{IPSN}})},
booktitle = {{{IPSN}}-14 {{Proceedings}} of the 13th {{International Symposium}} on {{Information Processing}} in {{Sensor Networks}}},
publisher = {{IEEE}},
date = {2014-04},
pages = {131-142},
author = {Xiao, Zhuoling and Wen, Hongkai and Markham, Andrew and Trigoni, Niki},
file = {/Users/abdullah/Zotero/storage/4XYGQGWM/Xiao et al. - 2014 - Lightweight map matching for indoor localisation u.pdf}
}
% == BibLateX quality report for xiaoLightweightMapMatching2014:
% 'isbn': not a valid ISBN
@article{zampellaIndoorPositioningUsing2015,
langid = {english},
title = {Indoor {{Positioning Using Efficient Map Matching}}, {{RSS Measurements}}, and an {{Improved Motion Model}}},
volume = {64},
issn = {0018-9545, 1939-9359},
doi = {10.1109/TVT.2015.2391296},
abstract = {Unlike outdoor positioning, there is not a unique solution to obtain the position of a person inside a building or in GNSS denied areas. Typical implementations indoor rely on dead reckoning or beacon based positioning, but a robust estimation must combine several techniques to overcome their own drawbacks. In this paper, we present an indoor positioning system based on foot mounted Pedestrian Dead Reckoning (PDR) with an efficient Map Matching, Received Signal Strength (RSS) measurements and an improved motion model that includes the estimation of the turn rate bias. The system was implemented using a two levels structure with a low level PDR-filter and a high level particle filter to include all the measurements.},
number = {4},
journaltitle = {IEEE Trans. Veh. Technol.},
date = {2015-04},
pages = {1304-1317},
author = {Zampella, Francisco and Jimenez Ruiz, Antonio Ramon and Seco Granja, Fernando},
file = {/Users/abdullah/Zotero/storage/KNLCHL6Z/Zampella et al. - 2015 - Indoor Positioning Using Efficient Map Matching, R.pdf}
}
% == BibLateX quality report for zampellaIndoorPositioningUsing2015:
% 'issn': not a valid ISSN
% ? Possibly abbreviated journal title IEEE Trans. Veh. Technol.
% ? Title looks like it was stored in title-case in Zotero
@article{acharyaBIMTrackerModelbasedVisual2019,
langid = {english},
title = {{{BIM}}-{{Tracker}}: {{A}} Model-Based Visual Tracking Approach for Indoor Localisation Using a {{3D}} Building Model},
volume = {150},
issn = {09242716},
doi = {10.1016/j.isprsjprs.2019.02.014},
shorttitle = {{{BIM}}-{{Tracker}}},
abstract = {This article presents an accurate and robust visual indoor localisation approach that not only is infrastructurefree, but also avoids accumulation error by taking advantage of 1) the widespread ubiquity of mobile devices with cameras and 2) the availability of 3D building models for most modern buildings. Localisation is performed by matching image sequences captured by a camera, with a 3D model of the building in a model-based visual tracking framework. Comprehensive evaluation of the approach with a photo-realistic synthetic dataset shows the robustness of the localisation approach under challenging conditions. Additionally, the approach is tested and evaluated on real data captured by a smartphone. The results of the experiments indicate that a localisation accuracy better than 10 centimetres can be achieved by using this approach. Since localisation errors do not accumulate the proposed approach is suitable for indoor localisation tasks for long periods of time and augmented reality applications, without requiring any local infrastructure. A MATLAB implementation can be found on https://github.com/debaditya-unimelb/BIM-Tracker. For a video demo visit: https://youtu.be/cq7mk4mfdRA.},
journaltitle = {ISPRS Journal of Photogrammetry and Remote Sensing},
date = {2019-04},
pages = {157-171},
author = {Acharya, Debaditya and Ramezani, Milad and Khoshelham, Kourosh and Winter, Stephan},
file = {/Users/abdullah/Zotero/storage/ZME9LRNK/Acharya et al. - 2019 - BIM-Tracker A model-based visual tracking approac.pdf}
}
@article{lepetitMonocularModelBased3D2005,
langid = {english},
title = {Monocular {{Model}}-{{Based 3D Tracking}} of {{Rigid Objects}}: {{A Survey}}},
volume = {1},
issn = {1572-2740, 1572-2759},
doi = {10.1561/0600000001},
shorttitle = {Monocular {{Model}}-{{Based 3D Tracking}} of {{Rigid Objects}}},
abstract = {Many applications require tracking of complex 3D objects. These include visual servoing of robotic arms on specific target objects, Augmented Reality systems that require real-time registration of the object to be augmented, and head tracking systems that sophisticated interfaces can use. Computer Vision offers solutions that are cheap, practical and non-invasive.},
number = {1},
journaltitle = {FNT in Computer Graphics and Vision},
date = {2005},
pages = {1-89},
author = {Lepetit, Vincent and Fua, Pascal},
file = {/Users/abdullah/Zotero/storage/8QENBZ2L/Lepetit und Fua - 2005 - Monocular Model-Based 3D Tracking of Rigid Objects.pdf}
}
% == BibLateX quality report for lepetitMonocularModelBased3D2005:
% 'issn': not a valid ISSN
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{davisonRealtimeSimultaneousLocalisation2003,
langid = {english},
location = {{Nice, France}},
title = {Real-Time Simultaneous Localisation and Mapping with a Single Camera},
isbn = {978-0-7695-1950-0},
doi = {10.1109/ICCV.2003.1238654},
abstract = {Ego-motion estimation for an agile single camera moving through general, unknown scenes becomes a much more challenging problem when real-time performance is required rather than under the off-line processing conditions under which most successful structure from motion work has been achieved. This task of estimating camera motion from measurements of a continuously expanding set of selfmapped visual features is one of a class of problems known as Simultaneous Localisation and Mapping (SLAM) in the robotics community, and we argue that such real-time mapping research, despite rarely being camera-based, is more relevant here than off-line structure from motion methods due to the more fundamental emphasis placed on propagation of uncertainty.},
eventtitle = {{{ICCV}} 2003: 9th {{International Conference}} on {{Computer Vision}}},
booktitle = {Proceedings {{Ninth IEEE International Conference}} on {{Computer Vision}}},
publisher = {{IEEE}},
date = {2003},
pages = {1403-1410 vol.2},
author = {{Davison}},
file = {/Users/abdullah/Zotero/storage/E8PCDP4J/Davison - 2003 - Real-time simultaneous localisation and mapping wi.pdf}
}
@article{harleSurveyIndoorInertial23,
langid = {english},
title = {A {{Survey}} of {{Indoor Inertial Positioning Systems}} for {{Pedestrians}}},
volume = {15},
issn = {1553-877X},
doi = {10.1109/SURV.2012.121912.00075},
abstract = {With the continual miniaturisation of sensors and processing nodes, Pedestrian Dead Reckoning (PDR) systems are becoming feasible options for indoor tracking. These use inertial and other sensors, often combined with domain-specific knowledge about walking, to track user movements. There is currently a wealth of relevant literature spread across different research communities. In this survey, a taxonomy of modern PDRs is developed and used to contextualise the contributions from different areas. Techniques for step detection, characterisation, inertial navigation and step-and-heading-based deadreckoning are reviewed and compared. Techniques that incorporate building maps through particle filters are analysed, along with hybrid systems that use absolute position fixes to correct dead-reckoning output. In addition, consideration is given to the possibility of using smartphones as PDR sensing devices.},
number = {3},
journaltitle = {IEEE Commun. Surv. Tutorials},
date = {0023/2013},
pages = {1281-1293},
author = {Harle, Robert},
file = {/Users/abdullah/Zotero/storage/MRA2AQD2/Harle - 2013 - A Survey of Indoor Inertial Positioning Systems fo.pdf}
}
% == BibLateX quality report for harleSurveyIndoorInertial23:
% ? Possibly abbreviated journal title IEEE Commun. Surv. Tutorials
% ? Title looks like it was stored in title-case in Zotero
@article{hassanIndoorPositioningUsing,
langid = {english},
title = {Indoor {{Positioning Using Visible LED Lights}}: {{A Survey}}},
pages = {31},
author = {Hassan, Naveed Ul and Naeem, Aqsa and Pasha, Muhammad Adeel and Jadoon, Tariq},
file = {/Users/abdullah/Zotero/storage/4DDEFGL5/Hassan et al. - Indoor Positioning Using Visible LED Lights A Sur.pdf}
}
% == BibLateX quality report for hassanIndoorPositioningUsing:
% Exactly one of 'date' / 'year' must be present
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@article{mautzIndoorPositioningTechnologies2012,
langid = {english},
title = {Indoor Positioning Technologies},
doi = {10.3929/ethz-a-007313554},
date = {2012},
pages = {1 Band},
keywords = {Architecture,ASTRO-GEODETIC DETERMINATION OF POSITION + GEOGRAPHICAL COORDINATES (GEODESY),ASTRONOMISCH-GEODÄTISCHE ORTSBESTIMMUNG + GEOGRAPHISCHE KOORDINATEN (GEODÄSIE),Earth sciences,GEODÄTISCHE MESSVERFAHREN (GEODÄSIE),GEODETIC MEASURING METHODS (GEODESY),GEOMATICS (GEOGRAPHY),GEOMATIK (GEOGRAFIE),GLOBAL NAVIGATION SATELLITE SYSTEM; GNSS (GEODÄSIE),GLOBAL NAVIGATION SATELLITE SYSTEM; GNSS (GEODESY),GLOBAL POSITIONING SYSTEM; GPS + INDOOR GPS (GEODÄSIE),GLOBAL POSITIONING SYSTEM; GPS + INDOOR GPS (GEODESY),info:eu-repo/classification/ddc/550,info:eu-repo/classification/ddc/720},
author = {Mautz, Rainer},
file = {/Users/abdullah/Zotero/storage/UQFAZ3F2/Mautz - 2012 - Indoor positioning technologies.pdf}
}
% == BibLateX quality report for mautzIndoorPositioningTechnologies2012:
% Missing required field 'journaltitle'
@inproceedings{wuDelvingDeeperConvolutional2017,
langid = {english},
location = {{Singapore, Singapore}},
title = {Delving Deeper into Convolutional Neural Networks for Camera Relocalization},
isbn = {978-1-5090-4633-1},
doi = {10.1109/ICRA.2017.7989663},
abstract = {Convolutional Neural Networks (CNNs) have been applied to camera relocalization, which is to infer the pose of the camera given a single monocular image. However, there are still many open problems for camera relocalization with CNNs. We delve into the CNNs for camera relocalization. First, a variant of Euler angles named Euler6 is proposed to represent orientation. Then a data augmentation method named pose synthesis is designed to reduce sparsity of poses in the whole pose space to cope with overfitting in training. Third, a multi-task CNN named BranchNet is proposed to deal with the complex coupling of orientation and translation. The network consists of several shared convolutional layers and splits into two branches which predict orientation and translation, respectively. Experiments on the 7Scenes dataset show that incorporating these techniques one by one into an existing model PoseNet always leads to better results. Together these techniques reduce the orientation error by 15.9\% and the translation error by 38.3\% compared to the state-of-the-art model Bayesian PoseNet. We implement BranchNet on an Intel NUC mobile platform and reach a speed of 43 fps, which meets the real-time requirement of many robotic applications.},
eventtitle = {2017 {{IEEE International Conference}} on {{Robotics}} and {{Automation}} ({{ICRA}})},
booktitle = {2017 {{IEEE International Conference}} on {{Robotics}} and {{Automation}} ({{ICRA}})},
publisher = {{IEEE}},
date = {2017-05},
pages = {5644-5651},
author = {Wu, Jian and Ma, Liwei and Hu, Xiaolin},
file = {/Users/abdullah/Zotero/storage/3BXUW6N4/Wu et al. - 2017 - Delving deeper into convolutional neural networks .pdf}
}
% == BibLateX quality report for wuDelvingDeeperConvolutional2017:
% ? Unsure about the formatting of the booktitle
@article{mullerSQUEEZEPOSENETIMAGEBASED2017,
langid = {english},
title = {{{SQUEEZEPOSENET}}: {{IMAGE BASED POSE REGRESSION WITH SMALL CONVOLUTIONAL NEURAL NETWORKS FOR REAL TIME UAS NAVIGATION}}},
volume = {IV-2/W3},
issn = {2194-9050},
doi = {10.5194/isprs-annals-IV-2-W3-49-2017},
shorttitle = {{{SQUEEZEPOSENET}}},
abstract = {The number of unmanned aerial vehicles (UAVs) is increasing since low-cost airborne systems are available for a wide range of users. The outdoor navigation of such vehicles is mostly based on global navigation satellite system (GNSS) methods to gain the vehicles trajectory. The drawback of satellite-based navigation are failures caused by occlusions and multi-path interferences. Beside this, local image-based solutions like Simultaneous Localization and Mapping (SLAM) and Visual Odometry (VO) can e.g. be used to support the GNSS solution by closing trajectory gaps but are computationally expensive. However, if the trajectory estimation is interrupted or not available a re-localization is mandatory. In this paper we will provide a novel method for a GNSS-free and fast image-based pose regression in a known area by utilizing a small convolutional neural network (CNN). With on-board processing in mind, we employ a lightweight CNN called SqueezeNet and use transfer learning to adapt the network to pose regression. Our experiments show promising results for GNSS-free and fast localization.},
journaltitle = {ISPRS Ann. Photogramm. Remote Sens. Spatial Inf. Sci.},
date = {2017-08-18},
pages = {49-57},
author = {Müller, M. S. and Urban, S. and Jutzi, B.},
file = {/Users/abdullah/Zotero/storage/XENDKWHE/Müller et al. - 2017 - SQUEEZEPOSENET IMAGE BASED POSE REGRESSION WITH S.pdf}
}
% == BibLateX quality report for mullerSQUEEZEPOSENETIMAGEBASED2017:
% ? Possibly abbreviated journal title ISPRS Ann. Photogramm. Remote Sens. Spatial Inf. Sci.
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{valadaDeepAuxiliaryLearning2018,
langid = {english},
location = {{Brisbane, QLD}},
title = {Deep {{Auxiliary Learning}} for {{Visual Localization}} and {{Odometry}}},
isbn = {978-1-5386-3081-5},
doi = {10.1109/ICRA.2018.8462979},
abstract = {Localization is an indispensable component of a robot’s autonomy stack that enables it to determine where it is in the environment, essentially making it a precursor for any action execution or planning. Although convolutional neural networks have shown promising results for visual localization, they are still grossly outperformed by state-of-the-art local feature-based techniques. In this work, we propose VLocNet, a new convolutional neural network architecture for 6-DoF global pose regression and odometry estimation from consecutive monocular images. Our multitask model incorporates hard parameter sharing, thus being compact and enabling real-time inference, in addition to being end-to-end trainable. We propose a novel loss function that utilizes auxiliary learning to leverage relative pose information during training, thereby constraining the search space to obtain consistent pose estimates. We evaluate our proposed VLocNet on indoor as well as outdoor datasets and show that even our single task model exceeds the performance of state-of-the-art deep architectures for global localization, while achieving competitive performance for visual odometry estimation. Furthermore, we present extensive experimental evaluations utilizing our proposed Geometric Consistency Loss that show the effectiveness of multitask learning and demonstrate that our model is the first deep learning technique to be on par with, and in some cases outperforms state-of-theart SIFT-based approaches.},
eventtitle = {2018 {{IEEE International Conference}} on {{Robotics}} and {{Automation}} ({{ICRA}})},
booktitle = {2018 {{IEEE International Conference}} on {{Robotics}} and {{Automation}} ({{ICRA}})},
publisher = {{IEEE}},
date = {2018-05},
pages = {6939-6946},
author = {Valada, Abhinav and Radwan, Noha and Burgard, Wolfram},
file = {/Users/abdullah/Zotero/storage/CMRCM9V2/Valada et al. - 2018 - Deep Auxiliary Learning for Visual Localization an.pdf}
}
% == BibLateX quality report for valadaDeepAuxiliaryLearning2018:
% ? Unsure about the formatting of the booktitle
% ? Title looks like it was stored in title-case in Zotero
@article{valadaIncorporatingSemanticGeometric2018,
langid = {english},
title = {Incorporating {{Semantic}} and {{Geometric Priors}} in {{Deep Pose Regression}}},
url = {http://ais.informatik.uni-freiburg.de/publications/papers/valada18rsslair.pdf},
abstract = {Deep learning has enabled recent breakthroughs across a wide spectrum of scene understanding tasks, however, its applicability to camera pose regression has been unfruitful due to the direct formulation that renders it incapable of encoding scene-specific constrains. In this work, we propose the VLocNet++ architecture that overcomes this limitation by simultaneously embedding geometric and semantic knowledge of the world into the pose regression network. We employ a multitask learning approach to exploit the inter-task relationship between learning semantics, regressing 6-DoF global pose and odometry for the mutual benefit of each of these tasks. Furthermore, in order to enforce global consistency during camera pose regression, we propose the novel Geometric Consistency Loss function that leverages the predicted relative motion estimated from odometry to constrict the search space while training. Extensive experiments on the challenging Microsoft 7-Scenes benchmark and our DeepLoc dataset demonstrate that our approach exceeds the state-of-the-art outperforming local feature-based methods while simultaneously performing multiple tasks and exhibiting substantial robustness in challenging scenarios.},
date = {2018},
pages = {4},
author = {Valada, Abhinav and Radwan, Noha and Burgard, Wolfram},
file = {/Users/abdullah/Zotero/storage/CL7CDTMD/Valada et al. - Incorporating Semantic and Geometric Priors in Dee.pdf}
}
% == BibLateX quality report for valadaIncorporatingSemanticGeometric2018:
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@article{heDeepResidualLearning2015,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1512.03385},
primaryClass = {cs},
langid = {english},
title = {Deep {{Residual Learning}} for {{Image Recognition}}},
url = {http://arxiv.org/abs/1512.03385},
abstract = {Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers—8× deeper than VGG nets [41] but still having lower complexity. An ensemble of these residual nets achieves 3.57\% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers.},
urldate = {2019-06-18},
date = {2015-12-10},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}
}
% == BibLateX quality report for heDeepResidualLearning2015:
% Unexpected field 'archivePrefix'
% Unexpected field 'primaryClass'
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@incollection{liWorldwidePoseEstimation2012,
langid = {english},
location = {{Berlin, Heidelberg}},
title = {Worldwide {{Pose Estimation Using 3D Point Clouds}}},
volume = {7572},
isbn = {978-3-642-33717-8 978-3-642-33718-5},
abstract = {We address the problem of determining where a photo was taken by estimating a full 6-DOF-plus-intrincs camera pose with respect to a large geo-registered 3D point cloud, bringing together research on image localization, landmark recognition, and 3D pose estimation. Our method scales to datasets with hundreds of thousands of images and tens of millions of 3D points through the use of two new techniques: a co-occurrence prior for RANSAC and bidirectional matching of image features with 3D points. We evaluate our method on several large data sets, and show state-of-the-art results on landmark recognition as well as the ability to locate cameras to within meters, requiring only seconds per query.},
booktitle = {Computer {{Vision}} – {{ECCV}} 2012},
publisher = {{Springer Berlin Heidelberg}},
date = {2012},
pages = {15-29},
author = {Li, Yunpeng and Snavely, Noah and Huttenlocher, Dan and Fua, Pascal},
editor = {Fitzgibbon, Andrew and Lazebnik, Svetlana and Perona, Pietro and Sato, Yoichi and Schmid, Cordelia},
editorb = {Hutchison, David and Kanade, Takeo and Kittler, Josef and Kleinberg, Jon M. and Mattern, Friedemann and Mitchell, John C. and Naor, Moni and Nierstrasz, Oscar and Pandu Rangan, C. and Steffen, Bernhard and Sudan, Madhu and Terzopoulos, Demetri and Tygar, Doug and Vardi, Moshe Y. and Weikum, Gerhard},
editorbtype = {redactor},
file = {/Users/abdullah/Zotero/storage/XV7SJYPL/Li et al. - 2012 - Worldwide Pose Estimation Using 3D Point Clouds.pdf},
doi = {10.1007/978-3-642-33718-5_2}
}
% == BibLateX quality report for liWorldwidePoseEstimation2012:
% 'isbn': not a valid ISBN
% ? Title looks like it was stored in title-case in Zotero
@article{shafaeiPlayLearnUsing2016,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1608.01745},
primaryClass = {cs},
title = {Play and {{Learn}}: {{Using Video Games}} to {{Train Computer Vision Models}}},
url = {http://arxiv.org/abs/1608.01745},
shorttitle = {Play and {{Learn}}},
abstract = {Video games are a compelling source of annotated data as they can readily provide fine-grained groundtruth for diverse tasks. However, it is not clear whether the synthetically generated data has enough resemblance to the real-world images to improve the performance of computer vision models in practice. We present experiments assessing the effectiveness on real-world data of systems trained on synthetic RGB images that are extracted from a video game. We collected over 60000 synthetic samples from a modern video game with similar conditions to the real-world CamVid and Cityscapes datasets. We provide several experiments to demonstrate that the synthetically generated RGB images can be used to improve the performance of deep neural networks on both image segmentation and depth estimation. These results show that a convolutional network trained on synthetic data achieves a similar test error to a network that is trained on real-world data for dense image classification. Furthermore, the synthetically generated RGB images can provide similar or better results compared to the real-world datasets if a simple domain adaptation technique is applied. Our results suggest that collaboration with game developers for an accessible interface to gather data is potentially a fruitful direction for future work in computer vision.},
urldate = {2019-06-19},
date = {2016-08-04},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
author = {Shafaei, Alireza and Little, James J. and Schmidt, Mark},
file = {/Users/abdullah/Zotero/storage/DZRG8XBF/Shafaei et al. - 2016 - Play and Learn Using Video Games to Train Compute.pdf;/Users/abdullah/Zotero/storage/28ZWJJDA/1608.html}
}
% == BibLateX quality report for shafaeiPlayLearnUsing2016:
% Unexpected field 'archivePrefix'
% Unexpected field 'primaryClass'
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@inproceedings{dosovitskiyFlowNetLearningOptical2015,
langid = {english},
location = {{Santiago}},
title = {{{FlowNet}}: {{Learning Optical Flow}} with {{Convolutional Networks}}},
isbn = {978-1-4673-8391-2},
doi = {10.1109/ICCV.2015.316},
shorttitle = {{{FlowNet}}},
abstract = {Convolutional neural networks (CNNs) have recently been very successful in a variety of computer vision tasks, especially on those linked to recognition. Optical flow estimation has not been among the tasks CNNs succeeded at. In this paper we construct CNNs which are capable of solving the optical flow estimation problem as a supervised learning task. We propose and compare two architectures: a generic architecture and another one including a layer that correlates feature vectors at different image locations. Since existing ground truth data sets are not sufficiently large to train a CNN, we generate a large synthetic Flying Chairs dataset. We show that networks trained on this unrealistic data still generalize very well to existing datasets such as Sintel and KITTI, achieving competitive accuracy at frame rates of 5 to 10 fps.},
eventtitle = {2015 {{IEEE International Conference}} on {{Computer Vision}} ({{ICCV}})},
booktitle = {2015 {{IEEE International Conference}} on {{Computer Vision}} ({{ICCV}})},
publisher = {{IEEE}},
date = {2015-12},
pages = {2758-2766},
author = {Dosovitskiy, Alexey and Fischer, Philipp and Ilg, Eddy and Hausser, Philip and Hazirbas, Caner and Golkov, Vladimir and van der Smagt, Patrick and Cremers, Daniel and Brox, Thomas},
file = {/Users/abdullah/Zotero/storage/686IXTAG/Dosovitskiy et al. - 2015 - FlowNet Learning Optical Flow with Convolutional .pdf}
}
% == BibLateX quality report for dosovitskiyFlowNetLearningOptical2015:
% ? Unsure about the formatting of the booktitle
% ? Title looks like it was stored in title-case in Zotero
@article{simonyanVeryDeepConvolutional2014,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1409.1556},
primaryClass = {cs},
title = {Very {{Deep Convolutional Networks}} for {{Large}}-{{Scale Image Recognition}}},
url = {http://arxiv.org/abs/1409.1556},
abstract = {In this work we investigate the effect of the convolutional network depth on its accuracy in the large-scale image recognition setting. Our main contribution is a thorough evaluation of networks of increasing depth using an architecture with very small (3x3) convolution filters, which shows that a significant improvement on the prior-art configurations can be achieved by pushing the depth to 16-19 weight layers. These findings were the basis of our ImageNet Challenge 2014 submission, where our team secured the first and the second places in the localisation and classification tracks respectively. We also show that our representations generalise well to other datasets, where they achieve state-of-the-art results. We have made our two best-performing ConvNet models publicly available to facilitate further research on the use of deep visual representations in computer vision.},
urldate = {2019-06-19},
date = {2014-09-04},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
author = {Simonyan, Karen and Zisserman, Andrew},
file = {/Users/abdullah/Zotero/storage/G8U732PD/Simonyan and Zisserman - 2014 - Very Deep Convolutional Networks for Large-Scale I.pdf;/Users/abdullah/Zotero/storage/CCMKBL6U/1409.html}
}
% == BibLateX quality report for simonyanVeryDeepConvolutional2014:
% Unexpected field 'archivePrefix'
% Unexpected field 'primaryClass'
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@article{kokkinosPushingBoundariesBoundary2015,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1511.07386},
primaryClass = {cs},
title = {Pushing the {{Boundaries}} of {{Boundary Detection}} Using {{Deep Learning}}},
url = {http://arxiv.org/abs/1511.07386},
abstract = {In this work we show that adapting Deep Convolutional Neural Network training to the task of boundary detection can result in substantial improvements over the current state-of-the-art in boundary detection. Our contributions consist firstly in combining a careful design of the loss for boundary detection training, a multi-resolution architecture and training with external data to improve the detection accuracy of the current state of the art. When measured on the standard Berkeley Segmentation Dataset, we improve theoptimal dataset scale F-measure from 0.780 to 0.808 - while human performance is at 0.803. We further improve performance to 0.813 by combining deep learning with grouping, integrating the Normalized Cuts technique within a deep network. We also examine the potential of our boundary detector in conjunction with the task of semantic segmentation and demonstrate clear improvements over state-of-the-art systems. Our detector is fully integrated in the popular Caffe framework and processes a 320x420 image in less than a second.},
urldate = {2019-06-20},
date = {2015-11-23},
keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
author = {Kokkinos, Iasonas},
file = {/Users/abdullah/Zotero/storage/QYPZQT8E/Kokkinos - 2015 - Pushing the Boundaries of Boundary Detection using.pdf;/Users/abdullah/Zotero/storage/Y2CYWIME/1511.html}
}
% == BibLateX quality report for kokkinosPushingBoundariesBoundary2015:
% Unexpected field 'archivePrefix'
% Unexpected field 'primaryClass'
% Missing required field 'journaltitle'
@article{maninisConvolutionalOrientedBoundaries2016,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1608.02755},
primaryClass = {cs},
title = {Convolutional {{Oriented Boundaries}}},
volume = {9905},
doi = {10.1007/978-3-319-46448-0_35},
abstract = {We present Convolutional Oriented Boundaries (COB), which produces multiscale oriented contours and region hierarchies starting from generic image classification Convolutional Neural Networks (CNNs). COB is computationally efficient, because it requires a single CNN forward pass for contour detection and it uses a novel sparse boundary representation for hierarchical segmentation; it gives a significant leap in performance over the state-of-the-art, and it generalizes very well to unseen categories and datasets. Particularly, we show that learning to estimate not only contour strength but also orientation provides more accurate results. We perform extensive experiments on BSDS, PASCAL Context, PASCAL Segmentation, and MS-COCO, showing that COB provides state-of-the-art contours, region hierarchies, and object proposals in all datasets.},
date = {2016},
pages = {580-596},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
author = {Maninis, Kevis-Kokitsi and Pont-Tuset, Jordi and Arbeláez, Pablo and Van Gool, Luc},
file = {/Users/abdullah/Zotero/storage/6B4F8CGV/Maninis et al. - 2016 - Convolutional Oriented Boundaries.pdf;/Users/abdullah/Zotero/storage/2KDHEAIW/1608.html}
}
% == BibLateX quality report for maninisConvolutionalOrientedBoundaries2016:
% Unexpected field 'archivePrefix'
% Unexpected field 'primaryClass'
% Missing required field 'journaltitle'
% ? Title looks like it was stored in title-case in Zotero
@incollection{hazirbasFuseNetIncorporatingDepth2017,
langid = {english},
location = {{Cham}},
title = {{{FuseNet}}: {{Incorporating Depth}} into {{Semantic Segmentation}} via {{Fusion}}-{{Based CNN Architecture}}},