Skip to content

training and test UCAS-AOD dataset #57

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
liusiyuan1111 opened this issue Dec 28, 2019 · 4 comments
Open

training and test UCAS-AOD dataset #57

liusiyuan1111 opened this issue Dec 28, 2019 · 4 comments
Labels
enhancement New feature or request good first issue Good for newcomers

Comments

@liusiyuan1111
Copy link

WARNING:tensorflow:From convert_data_to_tfrecord.py:81: The name tf.python_io.TFRecordWriter is deprecated. Please use tf.io.TFRecordWriter instead.

Traceback (most recent call last):
  File "convert_data_to_tfrecord.py", line 125, in <module>
    convert_pascal_to_tfrecord()
  File "convert_data_to_tfrecord.py", line `93,` in convert_pascal_to_tfrecord
    img_height, img_width, gtbox_label = read_xml_gtbox_and_label(xml)
  File "convert_data_to_tfrecord.py", line 63, in read_xml_gtbox_and_label
    tmp_box.append(int(node.text))
ValueError: invalid literal for int() with base 10: '410.4547'
@yangxue0827
Copy link
Member

报错信息说的很明白,第63行int()改成float(),因为你的坐标是浮点型。

@liusiyuan1111
Copy link
Author

报错信息说的很明白,第63行int()改成float(),因为你的坐标是浮点型。

谢谢
再请问一下,我用UCAS-AOD这个数据集的话,test脚本可以直接使用DOTA那个吗?需要更改什么?

@yangxue0827
Copy link
Member

@liusiyuan1111
肯定是不行的,参考代码如下(有可能在cfgs.py需要加上的几个参数有:USE_07_METRIC = True,
EVAL_THRESHOLD = 0.5,EVALUATE_R_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
):

# -*- coding:utf-8 -*-

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division

import os
import sys
import tensorflow as tf
import time
import cv2
import pickle
import numpy as np
import argparse
from tqdm import tqdm
sys.path.append("../")

from data.io.image_preprocess import short_side_resize_for_inference_data
from libs.configs import cfgs
from libs.networks import build_whole_network
from libs.val_libs import voc_eval, voc_eval_r
from libs.box_utils import draw_box_in_img
from libs.box_utils.coordinate_convert import forward_convert, backward_convert
from help_utils import tools


def eval_with_plac(img_dir, det_net, num_imgs, image_ext, draw_imgs=False):

    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])  # is RGB. not BGR
    img_batch = tf.cast(img_plac, tf.float32)

    img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
                                                     target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                                                     length_limitation=cfgs.IMG_MAX_LENGTH)
    if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']:
        img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD)
    else:
        img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)

    img_batch = tf.expand_dims(img_batch, axis=0)

    detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
        input_img_batch=img_batch,
        gtboxes_batch_h=None,
        gtboxes_batch_r=None)

    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        all_boxes_r = []
        imgs = os.listdir(img_dir)
        pbar = tqdm(imgs)
        for a_img_name in pbar:
            a_img_name = a_img_name.split(image_ext)[0]

            raw_img = cv2.imread(os.path.join(img_dir,
                                              a_img_name + image_ext))
            raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

            resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \
                sess.run(
                    [img_batch, detection_boxes, detection_scores, detection_category],
                    feed_dict={img_plac: raw_img[:, :, ::-1]}
                )

            if draw_imgs:
                detected_indices = det_scores_r_ >= cfgs.VIS_SCORE
                detected_scores = det_scores_r_[detected_indices]
                detected_boxes = det_boxes_r_[detected_indices]
                detected_categories = det_category_r_[detected_indices]

                det_detections_r = draw_box_in_img.draw_boxes_with_label_and_scores(np.squeeze(resized_img, 0),
                                                                                    boxes=detected_boxes,
                                                                                    labels=detected_categories,
                                                                                    scores=detected_scores,
                                                                                    method=1,
                                                                                    in_graph=True)

                save_dir = os.path.join('test_usca-aod', cfgs.VERSION, 'usca-aod_img_vis')
                tools.mkdir(save_dir)

                cv2.imwrite(save_dir + '/{}.jpg'.format(a_img_name),
                            det_detections_r[:, :, ::-1])

            if det_boxes_r_.shape[0] != 0:
                resized_h, resized_w = resized_img.shape[1], resized_img.shape[2]
                det_boxes_r_ = forward_convert(det_boxes_r_, False)
                det_boxes_r_[:, 0::2] *= (raw_w / resized_w)
                det_boxes_r_[:, 1::2] *= (raw_h / resized_h)
                det_boxes_r_ = backward_convert(det_boxes_r_, False)

            x_c, y_c, w, h, theta = det_boxes_r_[:, 0], det_boxes_r_[:, 1], det_boxes_r_[:, 2], \
                                    det_boxes_r_[:, 3], det_boxes_r_[:, 4]

            boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta]))
            dets_r = np.hstack((det_category_r_.reshape(-1, 1),
                                det_scores_r_.reshape(-1, 1),
                                boxes_r))
            all_boxes_r.append(dets_r)

            pbar.set_description("Eval image %s" % a_img_name)

        # fw1 = open(cfgs.VERSION + '_detections_r.pkl', 'wb')
        # pickle.dump(all_boxes_r, fw1)
        return all_boxes_r


def eval(num_imgs, img_dir, image_ext, test_annotation_path, draw_imgs):
    retinanet = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
                                                     is_training=False)
    all_boxes_r = eval_with_plac(img_dir=img_dir, det_net=retinanet,
                                 num_imgs=num_imgs, image_ext=image_ext, draw_imgs=draw_imgs)

    # with open(cfgs.VERSION + '_detections_r.pkl', 'rb') as f2:
    #     all_boxes_r = pickle.load(f2)
    #
    #     print(len(all_boxes_r))

    imgs = os.listdir(img_dir)
    real_test_imgname_list = [i.split(image_ext)[0] for i in imgs]

    print(10 * "**")
    print('rotation eval:')
    voc_eval_r.voc_evaluate_detections(all_boxes=all_boxes_r,
                                       test_imgid_list=real_test_imgname_list,
                                       test_annotation_path=test_annotation_path)


def parse_args():
    """
    Parse input arguments
    """
    parser = argparse.ArgumentParser(description='Train a R2CNN network')
    parser.add_argument('--img_dir', dest='img_dir',
                        help='images path',
                        default='/data/dataset/UCAS-AOD/VOCdevkit_test/JPEGImages', type=str)
    parser.add_argument('--image_ext', dest='image_ext',
                        help='image format',
                        default='.png', type=str)
    parser.add_argument('--test_annotation_path', dest='test_annotation_path',
                        help='test annotate path',
                        default='/data/dataset/UCAS-AOD/VOCdevkit_test/Annotations', type=str)
    parser.add_argument('--gpu', dest='gpu',
                        help='gpu index',
                        default='0', type=str)
    parser.add_argument('--draw_imgs', '-s', default=False,
                        action='store_true')

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    args = parser.parse_args()
    return args

if __name__ == '__main__':
    args = parse_args()
    print('Called with args:')
    print(args)

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    eval(np.inf, args.img_dir, args.image_ext, args.test_annotation_path, args.draw_imgs)


@yangxue0827 yangxue0827 changed the title 使用UCAS-AOD数据集制作tfrecord时出现ValueError training and test UCAS-AOD dataset Dec 30, 2019
@yangxue0827 yangxue0827 reopened this Dec 30, 2019
@yangxue0827 yangxue0827 added enhancement New feature or request good first issue Good for newcomers labels Dec 30, 2019
@liusiyuan1111
Copy link
Author

liusiyuan1111 commented Dec 30, 2019

@liusiyuan1111
肯定是不行的,参考代码如下(有可能在cfgs.py需要加上的几个参数有:USE_07_METRIC = True,
EVAL_THRESHOLD = 0.5,EVALUATE_R_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
):

# -*- coding:utf-8 -*-

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division

import os
import sys
import tensorflow as tf
import time
import cv2
import pickle
import numpy as np
import argparse
from tqdm import tqdm
sys.path.append("../")

from data.io.image_preprocess import short_side_resize_for_inference_data
from libs.configs import cfgs
from libs.networks import build_whole_network
from libs.val_libs import voc_eval, voc_eval_r
from libs.box_utils import draw_box_in_img
from libs.box_utils.coordinate_convert import forward_convert, backward_convert
from help_utils import tools


def eval_with_plac(img_dir, det_net, num_imgs, image_ext, draw_imgs=False):

    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])  # is RGB. not BGR
    img_batch = tf.cast(img_plac, tf.float32)

    img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
                                                     target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                                                     length_limitation=cfgs.IMG_MAX_LENGTH)
    if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']:
        img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD)
    else:
        img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)

    img_batch = tf.expand_dims(img_batch, axis=0)

    detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
        input_img_batch=img_batch,
        gtboxes_batch_h=None,
        gtboxes_batch_r=None)

    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        all_boxes_r = []
        imgs = os.listdir(img_dir)
        pbar = tqdm(imgs)
        for a_img_name in pbar:
            a_img_name = a_img_name.split(image_ext)[0]

            raw_img = cv2.imread(os.path.join(img_dir,
                                              a_img_name + image_ext))
            raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

            resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \
                sess.run(
                    [img_batch, detection_boxes, detection_scores, detection_category],
                    feed_dict={img_plac: raw_img[:, :, ::-1]}
                )

            if draw_imgs:
                detected_indices = det_scores_r_ >= cfgs.VIS_SCORE
                detected_scores = det_scores_r_[detected_indices]
                detected_boxes = det_boxes_r_[detected_indices]
                detected_categories = det_category_r_[detected_indices]

                det_detections_r = draw_box_in_img.draw_boxes_with_label_and_scores(np.squeeze(resized_img, 0),
                                                                                    boxes=detected_boxes,
                                                                                    labels=detected_categories,
                                                                                    scores=detected_scores,
                                                                                    method=1,
                                                                                    in_graph=True)

                save_dir = os.path.join('test_usca-aod', cfgs.VERSION, 'usca-aod_img_vis')
                tools.mkdir(save_dir)

                cv2.imwrite(save_dir + '/{}.jpg'.format(a_img_name),
                            det_detections_r[:, :, ::-1])

            if det_boxes_r_.shape[0] != 0:
                resized_h, resized_w = resized_img.shape[1], resized_img.shape[2]
                det_boxes_r_ = forward_convert(det_boxes_r_, False)
                det_boxes_r_[:, 0::2] *= (raw_w / resized_w)
                det_boxes_r_[:, 1::2] *= (raw_h / resized_h)
                det_boxes_r_ = backward_convert(det_boxes_r_, False)

            x_c, y_c, w, h, theta = det_boxes_r_[:, 0], det_boxes_r_[:, 1], det_boxes_r_[:, 2], \
                                    det_boxes_r_[:, 3], det_boxes_r_[:, 4]

            boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta]))
            dets_r = np.hstack((det_category_r_.reshape(-1, 1),
                                det_scores_r_.reshape(-1, 1),
                                boxes_r))
            all_boxes_r.append(dets_r)

            pbar.set_description("Eval image %s" % a_img_name)

        # fw1 = open(cfgs.VERSION + '_detections_r.pkl', 'wb')
        # pickle.dump(all_boxes_r, fw1)
        return all_boxes_r


def eval(num_imgs, img_dir, image_ext, test_annotation_path, draw_imgs):
    retinanet = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
                                                     is_training=False)
    all_boxes_r = eval_with_plac(img_dir=img_dir, det_net=retinanet,
                                 num_imgs=num_imgs, image_ext=image_ext, draw_imgs=draw_imgs)

    # with open(cfgs.VERSION + '_detections_r.pkl', 'rb') as f2:
    #     all_boxes_r = pickle.load(f2)
    #
    #     print(len(all_boxes_r))

    imgs = os.listdir(img_dir)
    real_test_imgname_list = [i.split(image_ext)[0] for i in imgs]

    print(10 * "**")
    print('rotation eval:')
    voc_eval_r.voc_evaluate_detections(all_boxes=all_boxes_r,
                                       test_imgid_list=real_test_imgname_list,
                                       test_annotation_path=test_annotation_path)


def parse_args():
    """
    Parse input arguments
    """
    parser = argparse.ArgumentParser(description='Train a R2CNN network')
    parser.add_argument('--img_dir', dest='img_dir',
                        help='images path',
                        default='/data/dataset/UCAS-AOD/VOCdevkit_test/JPEGImages', type=str)
    parser.add_argument('--image_ext', dest='image_ext',
                        help='image format',
                        default='.png', type=str)
    parser.add_argument('--test_annotation_path', dest='test_annotation_path',
                        help='test annotate path',
                        default='/data/dataset/UCAS-AOD/VOCdevkit_test/Annotations', type=str)
    parser.add_argument('--gpu', dest='gpu',
                        help='gpu index',
                        default='0', type=str)
    parser.add_argument('--draw_imgs', '-s', default=False,
                        action='store_true')

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    args = parser.parse_args()
    return args

if __name__ == '__main__':
    args = parse_args()
    print('Called with args:')
    print(args)

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    eval(np.inf, args.img_dir, args.image_ext, args.test_annotation_path, args.draw_imgs)

使用这个eval_voc的时候会报错,因为这个数据集的xml文件中没有pose、truncated、difficult这三类,建议在数据处理部分txt2xml.py中写入这三个。

pose = doc.createElement('pose')
pose.appendChild(doc.createTextNode('Unspecified'))
nodeobject.appendChild(pose)
truncated = doc.createElement('truncated')
truncated.appendChild(doc.createTextNode('0'))
nodeobject.appendChild(truncated)
difficult = doc.createElement('difficult')
difficult.appendChild(doc.createTextNode('0'))
nodeobject.appendChild(difficult)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request good first issue Good for newcomers
Projects
None yet
Development

No branches or pull requests

2 participants