From 4efa526ce2ad5f2fc13f0530612b66c523496cf9 Mon Sep 17 00:00:00 2001 From: Keita Kobayashi Date: Fri, 25 Apr 2025 10:08:50 +0900 Subject: [PATCH] point-on-surface, XML file outline calculations --- Cargo.lock | 209 +++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/error.rs | 2 + src/geo.rs | 28 +++++ src/main.rs | 22 +++- src/outline_feature.rs | 162 ++++++++++++++++++++++++++ src/parse.rs | 100 ++++++++++++---- src/processor.rs | 81 ++++++++++++- src/writer.rs | 258 +++++++++++++++++------------------------ 9 files changed, 686 insertions(+), 177 deletions(-) create mode 100644 src/geo.rs create mode 100644 src/outline_feature.rs diff --git a/Cargo.lock b/Cargo.lock index 080caee..bfaf3e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,6 +28,12 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "anstream" version = "0.6.18" @@ -358,6 +364,25 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -422,6 +447,22 @@ dependencies = [ "syn", ] +[[package]] +name = "earcutr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79127ed59a85d7687c409e9978547cffb7dc79675355ed22da6b66fd5f6ead01" +dependencies = [ + "itertools", + "num-traits", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "encode_unicode" version = "1.0.0" @@ -494,12 +535,24 @@ dependencies = [ "tempfile", ] +[[package]] +name = "float_next_after" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" + [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "foreign-types" version = "0.3.2" @@ -567,6 +620,24 @@ dependencies = [ "version_check", ] +[[package]] +name = "geo" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4416397671d8997e9a3e7ad99714f4f00a22e9eaa9b966a5985d2194fc9e02e1" +dependencies = [ + "earcutr", + "float_next_after", + "geo-types", + "geographiclib-rs", + "i_overlay", + "log", + "num-traits", + "robust", + "rstar", + "spade", +] + [[package]] name = "geo-traits" version = "0.2.0" @@ -584,9 +655,20 @@ checksum = "62ddb1950450d67efee2bbc5e429c68d052a822de3aad010d28b351fbb705224" dependencies = [ "approx", "num-traits", + "rayon", + "rstar", "serde", ] +[[package]] +name = "geographiclib-rs" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e5ed84f8089c70234b0a8e0aedb6dc733671612ddc0d37c6066052f9781960" +dependencies = [ + "libm", +] + [[package]] name = "geojson" version = "0.24.2" @@ -633,11 +715,35 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +[[package]] +name = "hash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47d60b12902ba28e2730cd37e95b8c9223af2808df9e902d4df49588d1470606" +dependencies = [ + "byteorder", +] + [[package]] name = "hashbrown" version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "heapless" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad" +dependencies = [ + "hash32", + "stable_deref_trait", +] [[package]] name = "heck" @@ -769,6 +875,50 @@ dependencies = [ "tracing", ] +[[package]] +name = "i_float" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85df3a416829bb955fdc2416c7b73680c8dcea8d731f2c7aa23e1042fe1b8343" +dependencies = [ + "serde", +] + +[[package]] +name = "i_key_sort" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "347c253b4748a1a28baf94c9ce133b6b166f08573157e05afe718812bc599fcd" + +[[package]] +name = "i_overlay" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0542dfef184afdd42174a03dcc0625b6147fb73e1b974b1a08a2a42ac35cee49" +dependencies = [ + "i_float", + "i_key_sort", + "i_shape", + "i_tree", + "rayon", +] + +[[package]] +name = "i_shape" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a38f5a42678726718ff924f6d4a0e79b129776aeed298f71de4ceedbd091bce" +dependencies = [ + "i_float", + "serde", +] + +[[package]] +name = "i_tree" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "155181bc97d770181cf9477da51218a19ee92a8e5be642e796661aee2b601139" + [[package]] name = "icu_collections" version = "1.5.0" @@ -952,6 +1102,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -1075,6 +1234,7 @@ dependencies = [ "clap", "crossbeam-channel", "flatgeobuf", + "geo", "geo-types", "geozero", "indicatif", @@ -1291,6 +1451,26 @@ version = "5.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "read-logger" version = "0.2.0" @@ -1340,12 +1520,29 @@ dependencies = [ "windows-registry", ] +[[package]] +name = "robust" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf4a6aa5f6d6888f39e980649f3ad6b666acdce1d78e95b8a2cb076e687ae30" + [[package]] name = "roxmltree" version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97" +[[package]] +name = "rstar" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "421400d13ccfd26dfa5858199c30a5d76f9c54e0dba7575273025b43c5175dbb" +dependencies = [ + "heapless", + "num-traits", + "smallvec", +] + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -1533,6 +1730,18 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spade" +version = "2.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ece03ff43cd2a9b57ebf776ea5e78bd30b3b4185a619f041079f4109f385034" +dependencies = [ + "hashbrown", + "num-traits", + "robust", + "smallvec", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" diff --git a/Cargo.toml b/Cargo.toml index 05406f2..059d00b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ anyhow = "1.0.98" clap = { version = "4.5.37", features = ["derive"] } crossbeam-channel = "0.5.15" flatgeobuf = "4.6.0" +geo = "0.30.0" geo-types = "0.7.16" geozero = "0.14.0" indicatif = "0.17.11" diff --git a/src/error.rs b/src/error.rs index ab6c8cc..17a3e58 100644 --- a/src/error.rs +++ b/src/error.rs @@ -22,6 +22,8 @@ pub enum Error { Projection(#[from] proj4rs::errors::Error), #[error("IO error: {0}")] FS(#[from] std::io::Error), + #[error("Parsing integer failed: {0}")] + ParseInt(#[from] std::num::ParseIntError), } pub type Result = std::result::Result; diff --git a/src/geo.rs b/src/geo.rs new file mode 100644 index 0000000..947eda0 --- /dev/null +++ b/src/geo.rs @@ -0,0 +1,28 @@ +use geo_types::{MultiPolygon, Point}; + +pub fn point_on_surface(mp: &MultiPolygon) -> Point { + use geo::{ + Triangle, + algorithm::{Area, Centroid, TriangulateEarcut}, + }; + + // get the biggest polygon + let polygon = mp + .into_iter() + .max_by(|a, b| a.unsigned_area().partial_cmp(&b.unsigned_area()).unwrap()) + .expect("MultiPolygon must have at least one Polygon"); + + // (1) Triangulate into a Vec> + let triangles: Vec> = polygon.earcut_triangles(); + + // (2) Pick the triangle with the max area + let largest = triangles + .into_iter() + .max_by(|a, b| a.unsigned_area().partial_cmp(&b.unsigned_area()).unwrap()) + .expect("polygon must have at least one triangle"); + + // (3) Its centroid is interior + + + largest.centroid() +} diff --git a/src/main.rs b/src/main.rs index f0a3313..93311f0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,6 +7,8 @@ static GLOBAL: Jemalloc = Jemalloc; mod constants; mod error; +mod geo; +mod outline_feature; mod parse; mod processor; mod reader; @@ -30,6 +32,11 @@ struct Cli { #[arg(required = true, num_args = 1..)] src_files: Vec, + /// Output FlatGeobuf file path containing the calculated outline based on the XML files. + /// This is extremely experimental. Use at your own risk. + #[arg(short, long)] + dst_xml_outline: Option, + /// Include features from arbitrary coordinate systems (unmapped files) ("任意座標系"). #[arg(short, long, default_value_t = false)] arbitrary: bool, @@ -80,11 +87,22 @@ fn main() -> Result<(), Box> { println!("Starting processing files..."); - let file_count = - processor::process_files(&cli.dst_file, cli.src_files, parse_options, write_options)?; + let outline_path = cli.dst_xml_outline.as_deref(); + + let file_count = processor::process_files( + &cli.dst_file, + cli.src_files, + parse_options, + write_options, + outline_path, + )?; println!("Finished processing {} XML file(s).", file_count); println!("Destination: {}", cli.dst_file.display()); + if let Some(outline_path) = outline_path { + println!("Outline features written to: {}", outline_path.display()); + } + Ok(()) } diff --git a/src/outline_feature.rs b/src/outline_feature.rs new file mode 100644 index 0000000..c44eab5 --- /dev/null +++ b/src/outline_feature.rs @@ -0,0 +1,162 @@ +use crate::error::Result; +use crate::impl_fgb_columnar; +use crate::parse::ParsedXML; +use geo::algorithm::ConcaveHull; +use geo_types::MultiPolygon; + +/// Represents a feature that is the outline of multiple features +#[derive(Debug, Clone)] +pub struct OutlineFeature { + pub geometry: MultiPolygon, + pub props: OutlineFeatureProperties, +} + +/// Properties for a outline feature +#[derive(Debug, Clone)] +pub struct OutlineFeatureProperties { + // Common properties from CommonProperties + pub 地図名: String, + pub 市区町村コード: u32, + pub 市区町村名: String, + pub 座標系: String, + pub 測地系判別: Option, + + // Additional properties for outline + pub count: u32, +} + +impl_fgb_columnar! { + for OutlineFeature { + { name: "地図名", field: 地図名, ctype: String, nullable: false }, + { name: "市区町村コード", field: 市区町村コード, ctype: UInt, nullable: false }, + { name: "市区町村名", field: 市区町村名, ctype: String, nullable: false }, + { name: "座標系", field: 座標系, ctype: String, nullable: false }, + { name: "測地系判別", field: 測地系判別, ctype: String, nullable: true }, + { name: "count", field: count, ctype: UInt, nullable: false }, + } +} + +/// Calculate the outline of all features in a ParsedXML struct +/// +/// This function combines all geometries into a single MultiPolygon and creates +/// a feature that represents the outline, as determined by the concave hull. +/// Common properties are preserved, and a count property is added to indicate +/// the number of features in the outline. +/// +/// # Arguments +/// +/// * `parsed_xml` - The ParsedXML struct containing features to outline +/// +/// # Returns +/// +/// * `Result` - A feature representing the outline of all features +pub fn calculate_feature_outline(parsed_xml: &ParsedXML) -> Result { + if parsed_xml.features.is_empty() { + return Err(crate::error::Error::MissingElement( + "No features to calculate outline".to_string(), + )); + } + + // Get the first feature to extract common properties + let first_feature = &parsed_xml.features[0]; + + // Create a MultiPolygon of all the multipolygons from the features + let all_geometries = MultiPolygon( + parsed_xml + .features + .iter() + .flat_map(|mp| mp.geometry.0.clone()) + .collect(), + ); + // Calculate the concave hull of the combined geometry + let outline_geometry = all_geometries.concave_hull(1.0); + + // Create the outline feature with common properties and the count + let outline_feature = OutlineFeature { + geometry: outline_geometry.into(), + props: OutlineFeatureProperties { + 地図名: first_feature.props.地図名.clone(), + 市区町村コード: first_feature.props.市区町村コード, + 市区町村名: first_feature.props.市区町村名.clone(), + 座標系: first_feature.props.座標系.clone(), + 測地系判別: first_feature.props.測地系判別.clone(), + count: parsed_xml.features.len() as u32, + }, + }; + + Ok(outline_feature) +} + +#[cfg(test)] +mod tests { + use crate::parse::{Feature, FeatureProperties}; + + use super::*; + use geo_types::{Coord, LineString, Polygon}; + + #[test] + fn test_calculate_feature_outline() { + // Create test features with different geometries + let feature1 = create_test_feature(0.0, 0.0, 1.0, 1.0); + let feature2 = create_test_feature(0.5, 0.5, 1.5, 1.5); + + // Create a ParsedXML with these features + let parsed_xml = ParsedXML { + file_name: "test.xml".to_string(), + features: vec![feature1, feature2], + }; + + // Calculate the outline + let outline_feature = calculate_feature_outline(&parsed_xml).unwrap(); + + // Check the count property + assert_eq!(outline_feature.props.count, 2); + + // The outline should cover both original features + // For a proper test, we could check specific properties of the geometry + // but for simplicity, we'll just check that it contains at least one polygon + assert!(!outline_feature.geometry.0.is_empty()); + } + + // Helper function to create a test feature with a rectangular polygon + fn create_test_feature(min_x: f64, min_y: f64, max_x: f64, max_y: f64) -> Feature { + let polygon = Polygon::new( + LineString::from(vec![ + Coord { x: min_x, y: min_y }, + Coord { x: max_x, y: min_y }, + Coord { x: max_x, y: max_y }, + Coord { x: min_x, y: max_y }, + Coord { x: min_x, y: min_y }, + ]), + vec![], + ); + + let multi_polygon = MultiPolygon::new(vec![polygon]); + + Feature { + geometry: multi_polygon, + props: FeatureProperties { + 地図名: "Test Map".to_string(), + 市区町村コード: 12345, + 市区町村名: "Test City".to_string(), + 座標系: "WGS84".to_string(), + 測地系判別: Some("Test".to_string()), + 筆id: "test-id".to_string(), + 精度区分: None, + 大字コード: None, + 丁目コード: None, + 小字コード: None, + 予備コード: None, + 大字名: None, + 丁目名: None, + 小字名: None, + 予備名: None, + 地番: None, + 座標値種別: None, + 筆界未定構成筆: None, + 代表点緯度: 0.5, + 代表点経度: 0.5, + }, + } + } +} diff --git a/src/parse.rs b/src/parse.rs index b0e594a..4fa6788 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -1,5 +1,7 @@ use crate::constants::{get_proj, get_xml_namespace}; use crate::error::{Error, Result}; +use crate::geo::point_on_surface; +use crate::impl_fgb_columnar; use crate::reader::FileData; use geo_types::{LineString, MultiPolygon, Point, Polygon}; use proj4rs::proj::Proj; @@ -17,14 +19,49 @@ pub struct Feature { pub props: FeatureProperties, } +impl_fgb_columnar! { + for Feature { + { name: "地図名", field: 地図名, ctype: String, nullable: false }, + { name: "市区町村コード", field: 市区町村コード, ctype: UInt, nullable: false }, + { name: "市区町村名", field: 市区町村名, ctype: String, nullable: false }, + { name: "座標系", field: 座標系, ctype: String, nullable: false }, + { name: "測地系判別", field: 測地系判別, ctype: String, nullable: true }, + + { name: "筆id", field: 筆id, ctype: String, nullable: true }, + { name: "精度区分", field: 精度区分, ctype: String, nullable: true }, + { name: "大字コード", field: 大字コード, ctype: UInt, nullable: true }, + { name: "丁目コード", field: 丁目コード, ctype: UInt, nullable: true }, + { name: "小字コード", field: 小字コード, ctype: UInt, nullable: true }, + { name: "予備コード", field: 予備コード, ctype: UInt, nullable: true }, + { name: "大字名", field: 大字名, ctype: String, nullable: true }, + { name: "丁目名", field: 丁目名, ctype: String, nullable: true }, + { name: "小字名", field: 小字名, ctype: String, nullable: true }, + { name: "予備名", field: 予備名, ctype: String, nullable: true }, + { name: "地番", field: 地番, ctype: String, nullable: true }, + { name: "座標値種別", field: 座標値種別, ctype: String, nullable: true }, + { name: "筆界未定構成筆", field: 筆界未定構成筆, ctype: String, nullable: true }, + + { name: "代表点緯度", field: 代表点緯度, ctype: Double, nullable: false }, + { name: "代表点経度", field: 代表点経度, ctype: Double, nullable: false }, + } +} + #[derive(Debug, Clone, Default)] pub struct FeatureProperties { + // common props + pub 地図名: String, + pub 市区町村コード: u32, + pub 市区町村名: String, + pub 座標系: String, + pub 測地系判別: Option, + + // props specific to each feature pub 筆id: String, pub 精度区分: Option, - pub 大字コード: Option, - pub 丁目コード: Option, - pub 小字コード: Option, - pub 予備コード: Option, + pub 大字コード: Option, + pub 丁目コード: Option, + pub 小字コード: Option, + pub 予備コード: Option, pub 大字名: Option, pub 丁目名: Option, pub 小字名: Option, @@ -32,11 +69,14 @@ pub struct FeatureProperties { pub 地番: Option, pub 座標値種別: Option, pub 筆界未定構成筆: Option, + + pub 代表点緯度: f64, + pub 代表点経度: f64, } pub struct CommonProperties { pub 地図名: String, - pub 市区町村コード: String, + pub 市区町村コード: u32, pub 市区町村名: String, pub 座標系: String, pub 測地系判別: Option, @@ -293,6 +333,7 @@ fn parse_surfaces( fn parse_features( subject_elem: &Node, surfaces: &HashMap, + common_props: &CommonProperties, options: &ParseOptions, ) -> Result> { let mut features: Vec = Vec::new(); @@ -333,15 +374,32 @@ fn parse_features( } } + let geometry = geometry.ok_or_else(|| Error::MissingElement("geometry".to_string()))?; + let point = point_on_surface(&geometry); + features.push(Feature { - geometry: geometry.ok_or_else(|| Error::MissingElement("geometry".to_string()))?, + geometry, props: FeatureProperties { + 地図名: common_props.地図名.clone(), + 市区町村コード: common_props.市区町村コード, + 市区町村名: common_props.市区町村名.clone(), + 座標系: common_props.座標系.clone(), + 測地系判別: common_props.測地系判別.clone(), + 筆id: fude_id.to_string(), 精度区分: prop_map.remove("精度区分"), - 大字コード: prop_map.remove("大字コード"), - 丁目コード: prop_map.remove("丁目コード"), - 小字コード: prop_map.remove("小字コード"), - 予備コード: prop_map.remove("予備コード"), + 大字コード: prop_map + .remove("大字コード") + .and_then(|s| s.parse::().ok()), + 丁目コード: prop_map + .remove("丁目コード") + .and_then(|s| s.parse::().ok()), + 小字コード: prop_map + .remove("小字コード") + .and_then(|s| s.parse::().ok()), + 予備コード: prop_map + .remove("予備コード") + .and_then(|s| s.parse::().ok()), 大字名: prop_map.remove("大字名"), 丁目名: prop_map.remove("丁目名"), 小字名: prop_map.remove("小字名"), @@ -349,13 +407,16 @@ fn parse_features( 地番: prop_map.remove("地番"), 座標値種別: prop_map.remove("座標値種別"), 筆界未定構成筆: prop_map.remove("筆界未定構成筆"), + + 代表点緯度: point.y(), + 代表点経度: point.x(), }, }); } Ok(features) } -fn parse_base_properties(root: &Node) -> Result { +fn parse_common_properties(root: &Node) -> Result { let map_name = get_child_element(root, "地図名")? .text() .ok_or_else(|| Error::MissingElement("地図名".to_string()))?; @@ -373,7 +434,7 @@ fn parse_base_properties(root: &Node) -> Result { Ok(CommonProperties { 地図名: map_name.to_string(), - 市区町村コード: city_code.to_string(), + 市区町村コード: city_code.parse()?, 市区町村名: city_name.to_string(), 座標系: crs.to_string(), 測地系判別: crs_det, @@ -383,7 +444,6 @@ fn parse_base_properties(root: &Node) -> Result { pub struct ParsedXML { pub file_name: String, pub features: Vec, - pub common_props: CommonProperties, } // --- Main Parsing Function --- @@ -392,7 +452,7 @@ pub fn parse_xml_content(file: &FileData, options: &ParseOptions) -> Result Result Result, parse_options: ParseOptions, write_options: WriterOptions, + outline_output_path: Option<&Path>, ) -> Result { let concurrency = num_cpus::get(); let m = MultiProgress::with_draw_target(indicatif::ProgressDrawTarget::stdout_with_hz(2)); @@ -43,13 +45,27 @@ pub fn process_files( .with_message("XML parse"), ); // Writer channels - let (writer_tx, writer_rx) = bounded::(1); + let (writer_tx, writer_rx) = bounded::>(1); let writer_pb = m.add( indicatif::ProgressBar::new(0) .with_style(sty.clone()) .with_message("FGB write"), ); + // We'll collect all parsed XML data if a outline is requested + let calculate_xml_outline = outline_output_path.is_some(); + let (outline_writer_tx, outline_writer_rx) = bounded::>(1); + let mut outline_writer_pb: Option<_> = None; + if calculate_xml_outline { + outline_writer_pb = Some( + m.add( + indicatif::ProgressBar::new(0) + .with_style(sty.clone()) + .with_message("outline out"), + ), + ); + } + let start = Instant::now(); let mut handles: Vec> = Vec::new(); { @@ -106,8 +122,12 @@ pub fn process_files( for i in 0..std::cmp::max(2, concurrency - 1) { let parser_rx = parser_rx.clone(); let writer_tx = writer_tx.clone(); + let outline_writer_tx = outline_writer_tx.clone(); + let parser_pb = parser_pb.clone(); let writer_pb = writer_pb.clone(); + let outline_writer_pb = outline_writer_pb.clone(); + let options = parse_options.clone(); handles.push(thread::spawn(move || { while let Ok(file_data) = parser_rx.recv() { @@ -115,6 +135,11 @@ pub fn process_files( let parsed_xml = crate::parse::parse_xml_content(&file_data, &options); match parsed_xml { Ok(parsed) => { + let parsed = Arc::new(parsed); + if calculate_xml_outline { + outline_writer_pb.as_ref().unwrap().inc_length(1); + outline_writer_tx.send(parsed.clone()).unwrap(); + } info!("[XML {:>2}] Parsed file: {}", i, file_data.file_name); writer_pb.inc_length(1); parser_pb.inc(1); @@ -133,15 +158,18 @@ pub fn process_files( })); } drop(writer_tx); + drop(outline_writer_tx); { let output_path = output_path.to_path_buf(); let writer_pb = writer_pb.clone(); + let write_options = write_options.clone(); + handles.push(thread::spawn(move || { let mut fgb = crate::writer::FGBWriter::new(&output_path, &write_options).unwrap(); while let Ok(parsed_xml) = writer_rx.recv() { info!("[FGB] Adding features from file: {}", parsed_xml.file_name); - let write_result = fgb.add_xml_features(parsed_xml); + let write_result = fgb.add_features(&parsed_xml.features); match write_result { Ok(_) => { writer_pb.inc(1); @@ -156,10 +184,59 @@ pub fn process_files( info!("[FGB] Finished writing file: {}", output_path.display()); })); } + + if calculate_xml_outline { + let outline_writer_pb = outline_writer_pb.unwrap().clone(); + let outline_output_path = outline_output_path.unwrap().to_path_buf(); + + handles.push(thread::spawn(move || { + let mut fgb = + crate::writer::FGBWriter::new(&outline_output_path, &write_options).unwrap(); + while let Ok(parsed_xml) = outline_writer_rx.recv() { + info!( + "[outline] Adding features from file: {}", + parsed_xml.file_name + ); + let outline_feature = calculate_feature_outline(&parsed_xml); + if outline_feature.is_err() { + error!( + "[outline] Error calculating outline for file {}: {}", + parsed_xml.file_name, + outline_feature.err().unwrap() + ); + continue; + } + let write_result = fgb.add_features(&[outline_feature.unwrap()]); + match write_result { + Ok(_) => { + outline_writer_pb.inc(1); + } + Err(e) => { + eprintln!( + "Error writing file {}: {}", + outline_output_path.display(), + e + ); + } + } + } + info!( + "[outline] Starting output file: {}", + outline_output_path.display() + ); + fgb.flush().unwrap(); + info!( + "[outline] Finished writing file: {}", + outline_output_path.display() + ); + })); + } + let _ = handles .into_iter() .map(|h| h.join().expect("Thread panicked")) .collect::>(); + let elapsed = start.elapsed(); xml_pb.finish(); diff --git a/src/writer.rs b/src/writer.rs index 9bebdcf..686241f 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -1,22 +1,107 @@ -use crate::parse::ParsedXML; use anyhow::Result; -use flatgeobuf::{ - ColumnType, FgbCrs, FgbWriter, FgbWriterOptions, GeometryType, - geozero::{ColumnValue, PropertyProcessor}, -}; -use geo_types::Geometry; +use flatgeobuf::{FgbCrs, FgbWriter, FgbWriterOptions, GeometryType}; use std::io::{BufWriter, Write}; +use std::marker::PhantomData; use std::{fs::File, path::Path}; +pub trait AsOption<'a, T> { + fn as_option(&'a self) -> Option; +} + +impl<'a> AsOption<'a, &'a str> for String { + fn as_option(&'a self) -> Option<&'a str> { + Some(self.as_str()) + } +} + +impl<'a> AsOption<'a, &'a str> for Option { + fn as_option(&'a self) -> Option<&'a str> { + self.as_deref() + } +} + +impl<'a> AsOption<'a, u32> for u32 { + fn as_option(&'a self) -> Option { + Some(*self) + } +} + +impl<'a> AsOption<'a, u32> for Option { + fn as_option(&'a self) -> Option { + *self + } +} + +impl<'a> AsOption<'a, f64> for f64 { + fn as_option(&'a self) -> Option { + Some(*self) + } +} + +impl<'a> AsOption<'a, f64> for Option { + fn as_option(&'a self) -> Option { + *self + } +} + +pub trait FgbColumnar { + /// Call once before writing any rows: + fn register_columns(fgb: &mut FgbWriter); + /// Call per-record to append its properties: + fn write_feature(&self, fgb: &mut FgbWriter); +} + +#[macro_export] +macro_rules! impl_fgb_columnar { + ( + for $ty:ident { + $( + { name: $col_name:expr, field: $field:ident, ctype: $ctype:ident, nullable: $nullable:literal } + ),* $(,)? + } + ) => { + impl $crate::writer::FgbColumnar for $ty { + fn register_columns(fgb: &mut flatgeobuf::FgbWriter) { + $( + fgb.add_column($col_name, flatgeobuf::ColumnType::$ctype, |_, c| { + c.nullable = $nullable; + }); + )* + } + + fn write_feature(&self, fgb: &mut flatgeobuf::FgbWriter) { + use flatgeobuf::geozero::PropertyProcessor; + use $crate::writer::AsOption; + + let geometry: geo_types::Geometry = self.geometry.clone().into(); + let _ = fgb.add_feature_geom(geometry, |feat| { + let mut _idx = 0; + $( + if let Some(val) = self.props.$field.as_option() { + feat.property(_idx, $col_name, &flatgeobuf::geozero::ColumnValue::$ctype(val)).unwrap(); + } + _idx += 1; + )* + }); + } + } + } +} + +#[derive(Debug, Clone)] pub struct WriterOptions { pub write_index: bool, } -pub struct FGBWriter<'a> { +pub struct FGBWriter<'a, T: FgbColumnar> { fgb: FgbWriter<'a>, writer: BufWriter, + phantom: PhantomData, } -impl FGBWriter<'_> { +impl FGBWriter<'_, T> +where + T: FgbColumnar, +{ pub fn new(output_path: &Path, options: &WriterOptions) -> Result { let file = File::create(output_path)?; let writer = BufWriter::new(file); @@ -33,141 +118,19 @@ impl FGBWriter<'_> { ..Default::default() }, )?; - fgb.add_column("地図名", ColumnType::String, |_, _| {}); - fgb.add_column("市区町村コード", ColumnType::String, |_, _| {}); - fgb.add_column("市区町村名", ColumnType::String, |_, _| {}); - fgb.add_column("座標系", ColumnType::String, |_, _| {}); - fgb.add_column("測地系判別", ColumnType::String, |_, col| { - col.nullable = true; - }); - fgb.add_column("筆id", ColumnType::String, |_, _| {}); - fgb.add_column("精度区分", ColumnType::String, |_, col| { - col.nullable = true; - }); - fgb.add_column("大字コード", ColumnType::String, |_, col| { - col.nullable = true; - }); - fgb.add_column("丁目コード", ColumnType::String, |_, col| { - col.nullable = true; - }); - fgb.add_column("小字コード", ColumnType::String, |_, col| { - col.nullable = true; - }); - fgb.add_column("予備コード", ColumnType::String, |_, col| { - col.nullable = true; - }); - fgb.add_column("大字名", ColumnType::String, |_, col| { - col.nullable = true; - }); - fgb.add_column("丁目名", ColumnType::String, |_, col| { - col.nullable = true; - }); - fgb.add_column("小字名", ColumnType::String, |_, col| { - col.nullable = true; - }); - fgb.add_column("予備名", ColumnType::String, |_, col| { - col.nullable = true; - }); - fgb.add_column("地番", ColumnType::String, |_, col| { - col.nullable = true; - }); - fgb.add_column("座標値種別", ColumnType::String, |_, col| { - col.nullable = true; - }); - fgb.add_column("筆界未定構成筆", ColumnType::String, |_, col| { - col.nullable = true; - }); - - Ok(FGBWriter { fgb, writer }) + T::register_columns(&mut fgb); + + Ok(FGBWriter:: { + fgb, + writer, + phantom: PhantomData, + }) } - pub fn add_xml_features(&mut self, parsed: ParsedXML) -> Result<()> { - // Write each feature, consuming the parsed data - for feature in parsed.features { - let geometry: Geometry = feature.geometry.into(); - self.fgb.add_feature_geom(geometry, |feat| { - feat.property( - 0, - "地図名", - &ColumnValue::String(&parsed.common_props.地図名), - ) - .unwrap(); - feat.property( - 1, - "市区町村コード", - &ColumnValue::String(&parsed.common_props.市区町村コード), - ) - .unwrap(); - feat.property( - 2, - "市区町村名", - &ColumnValue::String(&parsed.common_props.市区町村名), - ) - .unwrap(); - feat.property( - 3, - "座標系", - &ColumnValue::String(&parsed.common_props.座標系), - ) - .unwrap(); - if let Some(ref conversion) = parsed.common_props.測地系判別 { - feat.property(4, "測地系判別", &ColumnValue::String(conversion)) - .unwrap(); - } - feat.property(5, "筆id", &ColumnValue::String(&feature.props.筆id)) - .unwrap(); - - // only set optional properties if present, leave others null - if let Some(v) = feature.props.精度区分.as_ref() { - feat.property(6, "精度区分", &ColumnValue::String(v)) - .unwrap(); - } - if let Some(v) = feature.props.大字コード.as_ref() { - feat.property(7, "大字コード", &ColumnValue::String(v)) - .unwrap(); - } - if let Some(v) = feature.props.丁目コード.as_ref() { - feat.property(8, "丁目コード", &ColumnValue::String(v)) - .unwrap(); - } - if let Some(v) = feature.props.小字コード.as_ref() { - feat.property(9, "小字コード", &ColumnValue::String(v)) - .unwrap(); - } - if let Some(v) = feature.props.予備コード.as_ref() { - feat.property(10, "予備コード", &ColumnValue::String(v)) - .unwrap(); - } - if let Some(v) = feature.props.大字名.as_ref() { - feat.property(11, "大字名", &ColumnValue::String(v)) - .unwrap(); - } - if let Some(v) = feature.props.丁目名.as_ref() { - feat.property(12, "丁目名", &ColumnValue::String(v)) - .unwrap(); - } - if let Some(v) = feature.props.小字名.as_ref() { - feat.property(13, "小字名", &ColumnValue::String(v)) - .unwrap(); - } - if let Some(v) = feature.props.予備名.as_ref() { - feat.property(14, "予備名", &ColumnValue::String(v)) - .unwrap(); - } - if let Some(v) = feature.props.地番.as_ref() { - feat.property(15, "地番", &ColumnValue::String(v)).unwrap(); - } - if let Some(v) = feature.props.座標値種別.as_ref() { - feat.property(16, "座標値種別", &ColumnValue::String(v)) - .unwrap(); - } - if let Some(v) = feature.props.筆界未定構成筆.as_ref() { - feat.property(17, "筆界未定構成筆", &ColumnValue::String(v)) - .unwrap(); - } - })?; + pub fn add_features(&mut self, features: &[T]) -> Result<()> { + for feature in features { + feature.write_feature(&mut self.fgb); } - Ok(()) } @@ -183,11 +146,9 @@ impl FGBWriter<'_> { #[cfg(test)] mod tests { - use geo_types::{MultiPolygon, polygon}; - - use crate::parse::{CommonProperties, Feature, FeatureProperties}; - use super::*; + use crate::parse::{Feature, FeatureProperties, ParsedXML}; + use geo_types::{MultiPolygon, polygon}; use std::path::PathBuf; fn testdata_path() -> PathBuf { @@ -208,17 +169,10 @@ mod tests { ]]), props: FeatureProperties::default(), }], - common_props: CommonProperties { - 地図名: "テスト地図".to_string(), - 市区町村コード: "00000".to_string(), - 市区町村名: "テスト市".to_string(), - 座標系: "公共座標1系".to_string(), - 測地系判別: Some("変換".to_string()), - }, }; let output_path = testdata_path().join("output.fgb"); let mut fgb = FGBWriter::new(&output_path, &WriterOptions { write_index: true })?; - fgb.add_xml_features(parsed)?; + fgb.add_features(&parsed.features)?; fgb.flush()?; Ok(()) }