From 72506ec6afa646a9f1b2eb1b8dc3d245cb149796 Mon Sep 17 00:00:00 2001 From: Taku Fukada Date: Wed, 13 Mar 2024 17:38:01 +0900 Subject: [PATCH] =?UTF-8?q?Shapefile=20sink=E3=81=AE=E6=94=B9=E5=96=84=20(?= =?UTF-8?q?#467)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shapefile Sink に以下の改善を行いました: - 全カラムが .dbf に含まれるようにする。順序もスキーマ通りになるようにする。 - 属性を作る部分のコードが大分減った。 - 数値を Numeric 型で格納。長さやdecimal point ? は今後調整可能。 - 複数種類の地物がある場合に、マルチスレッド (rayon) で処理する。 ![Untitled](https://github.com/MIERUNE/PLATEAU-GIS-Converter/assets/5351911/9de6a1bf-0bd6-40ac-abc6-dc1443480304) Close: #427 ## Summary by CodeRabbit - **新機能** - Cesium JavaScriptライブラリと関連するCSSファイルをバージョン1.114から1.115に更新しました。 - **ドキュメント** - CesiumJSライブラリのバージョンを1.114から1.115に更新する手順をドキュメントに追加しました。 - **リファクタ** - Shapefileの属性を処理するコードにおいて、異なる属性タイプを扱うロジックを改善しました。 - ShapefileSinkの実行方法を更新し、並列処理とエラーハンドリングを強化しました。 --- nusamai/Cargo.toml | 2 +- nusamai/src/sink/shapefile/attributes.rs | 227 ++++++++--------------- nusamai/src/sink/shapefile/mod.rs | 191 ++++++++++--------- 3 files changed, 189 insertions(+), 231 deletions(-) diff --git a/nusamai/Cargo.toml b/nusamai/Cargo.toml index 71a5d5afb..4c292d03c 100644 --- a/nusamai/Cargo.toml +++ b/nusamai/Cargo.toml @@ -4,7 +4,7 @@ version.workspace = true edition = "2021" [dependencies] -indexmap = { version = "2.2.5", features = ["serde"] } +indexmap = { version = "2.2.5", features = ["serde", "rayon"] } rayon = "1.9.0" serde = { version = "1.0.197", features = ["derive"] } nusamai-plateau = { path = "../nusamai-plateau" } diff --git a/nusamai/src/sink/shapefile/attributes.rs b/nusamai/src/sink/shapefile/attributes.rs index aed6118b9..d35d92d31 100644 --- a/nusamai/src/sink/shapefile/attributes.rs +++ b/nusamai/src/sink/shapefile/attributes.rs @@ -1,205 +1,124 @@ use chrono::Datelike; use hashbrown::HashMap; +use nusamai_citygml::schema::DataTypeDef; +use nusamai_citygml::schema::FeatureTypeDef; +use nusamai_citygml::schema::TypeDef; use shapefile::dbase::{self, Date, FieldValue, Record}; use nusamai_citygml::object::Map; use nusamai_citygml::object::Value; use nusamai_citygml::schema::TypeRef; -use shapefile::Shape; -pub struct FieldInfo { - field_type: TypeRef, - size: u8, -} - -pub type FieldInfoMap = HashMap; -pub type Features = Vec<(Shape, Map)>; - -pub fn make_table_builder(fields: &FieldInfoMap) -> dbase::TableWriterBuilder { +pub fn make_table_builder( + typedef: &TypeDef, +) -> (dbase::TableWriterBuilder, HashMap) { let mut builder = dbase::TableWriterBuilder::new(); + let mut defaults = HashMap::new(); + + let attributes = match typedef { + TypeDef::Feature(FeatureTypeDef { attributes, .. }) => { + let key = "id"; + builder = builder.add_character_field(key.try_into().unwrap(), 255); + defaults.insert(key.into(), FieldValue::Character(None)); + attributes + } + TypeDef::Data(DataTypeDef { attributes, .. }) => attributes, + TypeDef::Property(_) => unreachable!(), + }; - for (field_name, field_info) in fields { + for (field_name, attr) in attributes { let name = field_name.as_str().try_into().unwrap(); // FIXME: handle errors + let key = field_name.to_string(); - match field_info.field_type { - TypeRef::String | TypeRef::Code | TypeRef::URI => { - builder = builder.add_character_field(name, field_info.size); + match attr.type_ref { + TypeRef::String | TypeRef::Code | TypeRef::URI | TypeRef::JsonString(_) => { + builder = builder.add_character_field(name, 255); + defaults.insert(key, FieldValue::Character(None)); } TypeRef::Integer | TypeRef::NonNegativeInteger => { - builder = builder.add_integer_field(name); + builder = builder.add_numeric_field(name, 11, 0); + defaults.insert(key, FieldValue::Numeric(None)); } TypeRef::Double | TypeRef::Measure => { - builder = builder.add_float_field(name, 50, 10); + builder = builder.add_numeric_field(name, 18, 6); + defaults.insert(key, FieldValue::Numeric(None)); } TypeRef::Boolean => { - builder = builder.add_logical_field(name); + builder = builder.add_character_field(name, 6); + defaults.insert(key, FieldValue::Character(None)); } TypeRef::Date => { builder = builder.add_date_field(name); + defaults.insert(key, FieldValue::Date(None)); + } + TypeRef::DateTime => { + // todo } TypeRef::Point => { // todo } TypeRef::Unknown => { - // todo + unreachable!(); } TypeRef::Named(_) => { - // todo - } - TypeRef::JsonString(_) => { - // todo - } - TypeRef::DateTime => { - // todo + unreachable!(); } } } - builder + (builder, defaults) } -pub fn fill_missing_fields(attributes: &mut Map, field_info: &FieldInfoMap) { - for (field_name, field_info) in field_info { - if !attributes.contains_key(field_name.as_str()) { - match field_info.field_type { - TypeRef::String | TypeRef::Code | TypeRef::URI => { - attributes.insert(field_name.clone(), Value::String("".to_string())); - } - TypeRef::Integer | TypeRef::NonNegativeInteger => { - attributes.insert(field_name.clone(), Value::Integer(0)); - } - TypeRef::Double | TypeRef::Measure => { - attributes.insert(field_name.clone(), Value::Double(0.0)); - } - TypeRef::Boolean => { - attributes.insert(field_name.clone(), Value::String("".to_string())); - } - TypeRef::Date => { - attributes.insert(field_name.clone(), Value::String("".to_string())); - } - TypeRef::Point => { - // todo - } - TypeRef::Unknown => { - // todo - } - TypeRef::Named(_) => { - // todo - } - TypeRef::JsonString(_) => { - // todo - } - TypeRef::DateTime => { - // todo - } - } - } - } -} - -pub fn make_field_list(features: &Features) -> FieldInfoMap { - let mut fields: FieldInfoMap = Default::default(); +pub fn attributes_to_record( + attributes: Map, + fields_default: &HashMap, +) -> Record { + let mut record = dbase::Record::default(); - for (_, attributes) in features { - for (field_name, field_value) in attributes { - match field_value { - Value::String(_) | Value::Code(_) | Value::Uri(_) => { - fields.insert( - field_name.clone(), - FieldInfo { - field_type: TypeRef::String, - size: 255, - }, - ); - } - Value::Integer(_) | Value::NonNegativeInteger(_) => { - fields.insert( - field_name.clone(), - FieldInfo { - field_type: TypeRef::Integer, - size: 4, - }, - ); - } - Value::Double(_) | Value::Measure(_) => { - fields.insert( - field_name.clone(), - FieldInfo { - field_type: TypeRef::Double, - size: 8, - }, - ); - } - Value::Boolean(_) => { - fields.insert( - field_name.clone(), - FieldInfo { - field_type: TypeRef::Boolean, - size: 1, - }, - ); - } - Value::Date(_) => { - fields.insert( - field_name.clone(), - FieldInfo { - field_type: TypeRef::Date, - size: 8, - }, - ); - } - Value::Point(_) => { - // todo - } - Value::Array(_) => { - // todo - } - Value::Object(_) => { - // todo - } - } + // Fill in with default values for attributes that are not present + for (name, default) in fields_default { + if !attributes.contains_key(name) { + record.insert(name.to_string(), default.clone()); } } - fields -} - -pub fn attributes_to_record(attributes: Map) -> Record { - let mut record = dbase::Record::default(); for (attr_name, attr_value) in attributes { match attr_value { Value::String(s) => { - // Shapefile string type can only store up to 255 characters. - if s.len() > 255 { - log::warn!("{} value too long, truncating to 255 characters", attr_name); - record.insert(attr_name, FieldValue::Character(Some(s[0..255].to_owned()))); - } else { - record.insert(attr_name, FieldValue::Character(Some(s.to_owned()))); - } + // Shapefile cannot store string longer than 254 bytes + let s = trim_string_bytes(s, 254); + record.insert(attr_name, FieldValue::Character(Some(s))); } Value::Code(c) => { // value of the code record.insert(attr_name, FieldValue::Character(Some(c.value().to_owned()))); } Value::Integer(i) => { - record.insert(attr_name, FieldValue::Integer(i.to_owned() as i32)); + record.insert(attr_name, FieldValue::Numeric(Some(i as f64))); } Value::NonNegativeInteger(i) => { - record.insert(attr_name, FieldValue::Integer(i.to_owned() as i32)); + record.insert(attr_name, FieldValue::Numeric(Some(i as f64))); } - // Handle as Float Value::Double(d) => { - record.insert(attr_name, FieldValue::Float(Some(d.to_owned() as f32))); - } - // Handle as Float - Value::Measure(m) => { record.insert( attr_name, - FieldValue::Float(Some(m.value().to_owned() as f32)), + FieldValue::Numeric(match d.is_nan() { + true => None, + false => Some(d), + }), ); } + Value::Measure(m) => { + record.insert(attr_name, FieldValue::Numeric(Some(m.value()))); + } Value::Boolean(b) => { - record.insert(attr_name, FieldValue::Logical(Some(b.to_owned()))); + record.insert( + attr_name, + FieldValue::Character(Some(match b { + true => "true".to_string(), + false => "false".to_string(), + })), + ); } Value::Uri(u) => { record.insert( @@ -228,3 +147,19 @@ pub fn attributes_to_record(attributes: Map) -> Record { record } + +fn trim_string_bytes(s: String, n: usize) -> String { + let bytes = s.as_bytes(); + if bytes.len() <= n { + return s; + } + log::warn!("string is too long, truncating to {} characters", n); + match std::str::from_utf8(&bytes[..n]) { + Ok(valid_str) => valid_str.to_string(), + Err(e) => { + let valid_up_to = e.valid_up_to(); + let valid_str = std::str::from_utf8(&bytes[..valid_up_to]).unwrap(); + valid_str.to_string() + } + } +} diff --git a/nusamai/src/sink/shapefile/mod.rs b/nusamai/src/sink/shapefile/mod.rs index 43c41bb26..f05d5e288 100644 --- a/nusamai/src/sink/shapefile/mod.rs +++ b/nusamai/src/sink/shapefile/mod.rs @@ -23,7 +23,7 @@ use crate::pipeline::{Feedback, PipelineError, Receiver, Result}; use crate::sink::{DataRequirements, DataSink, DataSinkProvider, SinkInfo}; use crate::transformer; -use attributes::{attributes_to_record, fill_missing_fields, make_field_list, make_table_builder}; +use attributes::{attributes_to_record, make_table_builder}; use self::crs::ProjectionRepository; @@ -124,99 +124,122 @@ impl DataSink for ShapefileSink { }); // Write a Shapefile file set for each typename - for (typename, features) in grouped_features { - let table_info = make_field_list(&features); - let table_builder = make_table_builder(&table_info); - - // Create all the files needed for the shapefile to be complete (.shp, .shx, .dbf) - std::fs::create_dir_all(&self.output_path)?; - let shp_path = self - .output_path - .join(format!("{}.shp", typename.replace(':', "_"))); - - let feature_count = features.len(); - let has_no_geometry = features - .iter() - .all(|(shape, _)| matches!(shape, shapefile::Shape::NullShape)); - - // NOTE: Need to be scoped to drop the writer before removing .shp/.shx - { - let mut writer = shapefile::Writer::from_path(&shp_path, table_builder)?; - - // Write each feature - for (shape, mut attributes) in features { - fill_missing_fields(&mut attributes, &table_info); - let record = attributes_to_record(attributes); - - match shape { - shapefile::Shape::PolygonZ(polygon) => { - writer.write_shape_and_record(&polygon, &record) - } - shapefile::Shape::NullShape if !has_no_geometry => { - // FIXME: feature may have no geometry. e.g. - // - Building (no geometry) - // - BuildingPart (has geometry) - // - BuildingPart (has geometry) - log::warn!("Feature without geometry is not supported yet."); - Ok(()) - } - shapefile::Shape::NullShape if has_no_geometry => { - // Write dummy data once because shapefile-rs cannot write NullShape file - let point = shapefile::Point::default(); - writer.write_shape_and_record(&point, &record) - } - _ => { - log::warn!("Unsupported shape type"); - Ok(()) + grouped_features + .into_iter() + .try_for_each(|(typename, features)| { + feedback.ensure_not_canceled()?; + + let typedef = schema.types.get(&typename).ok_or_else(|| { + PipelineError::Other(format!( + "Type {} not found in the schema", + typename + )) + })?; + + let (table_builder, fields_default) = make_table_builder(typedef); + + // Create all the files needed for the shapefile to be complete (.shp, .shx, .dbf) + std::fs::create_dir_all(&self.output_path)?; + let shp_path = self + .output_path + .join(format!("{}.shp", typename.replace(':', "_"))); + + let feature_count = features.len(); + let has_no_geometry = features + .iter() + .all(|(shape, _)| matches!(shape, shapefile::Shape::NullShape)); + + // NOTE: Need to be scoped to drop the writer before removing .shp/.shx + { + let mut writer = shapefile::Writer::from_path(&shp_path, table_builder) + .map_err(|err| match err { + shapefile::Error::IoError(io_err) => { + PipelineError::IoError(io_err) + } + _ => PipelineError::Other(err.to_string()), + })?; + + // Write each feature + for (shape, attributes) in features { + let record = attributes_to_record(attributes, &fields_default); + + match shape { + shapefile::Shape::PolygonZ(polygon) => { + writer.write_shape_and_record(&polygon, &record).map_err( + |err| match err { + shapefile::Error::IoError(io_err) => { + PipelineError::IoError(io_err) + } + _ => PipelineError::Other(err.to_string()), + }, + )?; + } + shapefile::Shape::NullShape if !has_no_geometry => { + // FIXME: feature may have no geometry. e.g. + // - Building (no geometry) + // - BuildingPart (has geometry) + // - BuildingPart (has geometry) + log::warn!( + "Feature without geometry is not supported yet." + ); + } + shapefile::Shape::NullShape if has_no_geometry => { + // Write dummy data once because shapefile-rs cannot write NullShape file + let point = shapefile::Point::default(); + writer.write_shape_and_record(&point, &record).map_err( + |err| match err { + shapefile::Error::IoError(io_err) => { + PipelineError::IoError(io_err) + } + _ => PipelineError::Other(err.to_string()), + }, + )?; + } + _ => { + log::warn!("Unsupported shape type"); + } } - }?; + } } - } - - // If geometry exists, also write the projection information - if !has_no_geometry { - let repo = ProjectionRepository::new(); - - // write .prj file - let prj_path = &shp_path.with_extension("prj"); - crs::write_prj( - BufWriter::new(File::create(prj_path)?), - &repo, - &schema.epsg.unwrap(), - )?; - } - - // If this type has no geometry (i.e. Data or Object stereotype) - if has_no_geometry { - // Remove dummy .shp and .shx and write a NullShape file. - remove_file(&shp_path)?; - let shx_path = &shp_path.with_extension("shx"); - remove_file(shx_path)?; - null_shape::write_shp( - BufWriter::new(File::create(&shp_path)?), - feature_count, - )?; - null_shape::write_shx( - BufWriter::new(File::create(shx_path)?), - feature_count, - )?; - } - } - - Ok::<(), shapefile::Error>(()) + + // If this type has no geometry (i.e. Data or Object stereotype) + if has_no_geometry { + // Remove dummy .shp and .shx and write a NullShape file. + remove_file(&shp_path)?; + let shx_path = shp_path.with_extension("shx"); + remove_file(&shx_path)?; + null_shape::write_shp( + BufWriter::new(File::create(shp_path)?), + feature_count, + )?; + null_shape::write_shx( + BufWriter::new(File::create(shx_path)?), + feature_count, + )?; + } else { + // write .prj file if this type has geometry + + let repo = ProjectionRepository::new(); + let prj_path = &shp_path.with_extension("prj"); + crs::write_prj( + BufWriter::new(File::create(prj_path)?), + &repo, + &schema.epsg.unwrap(), + )?; + } + + Ok::<(), PipelineError>(()) + }) }, ); match ra { Ok(_) | Err(PipelineError::Canceled) => {} - Err(error) => feedback.fatal_error(error), + Err(err) => feedback.fatal_error(err), } match rb { Ok(_) => {} - Err(shapefile::Error::IoError(error)) => { - feedback.fatal_error(PipelineError::IoError(error)) - } - Err(error) => feedback.fatal_error(PipelineError::Other(error.to_string())), + Err(err) => feedback.fatal_error(err), } Ok(())