diff --git a/Cargo.lock b/Cargo.lock index 42b1220ea..96b2313b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -297,8 +297,7 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jiter" version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8243cf2d026264056bfacf305e54f5bee8866fd46b4c1873adcaebf614a0d306" +source = "git+https://github.com/pydantic/jiter?branch=dh/simpler-value#a3c9ea312b2c37ca664bf06bbb600284d30c91b8" dependencies = [ "ahash", "bitvec", @@ -312,9 +311,9 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "0.8.5" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" dependencies = [ "lexical-parse-integer", "lexical-util", @@ -323,9 +322,9 @@ dependencies = [ [[package]] name = "lexical-parse-integer" -version = "0.8.6" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" dependencies = [ "lexical-util", "static_assertions", @@ -333,9 +332,9 @@ dependencies = [ [[package]] name = "lexical-util" -version = "0.8.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" dependencies = [ "static_assertions", ] diff --git a/Cargo.toml b/Cargo.toml index 6f8842bea..44d65569e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,7 @@ idna = "1.0.3" base64 = "0.22.1" num-bigint = "0.4.6" uuid = "1.12.1" -jiter = { version = "0.8.2", features = ["python"] } +jiter = { git = "https://github.com/pydantic/jiter", branch = "dh/simpler-value", features = ["python"] } hex = "0.4.3" [lib] diff --git a/src/input/input_abstract.rs b/src/input/input_abstract.rs index aa9c6eb16..5fc897edf 100644 --- a/src/input/input_abstract.rs +++ b/src/input/input_abstract.rs @@ -1,6 +1,7 @@ use std::convert::Infallible; use std::fmt; +use jiter::JsonValue; use pyo3::exceptions::PyValueError; use pyo3::types::{PyDict, PyList, PyString}; use pyo3::{intern, prelude::*, IntoPyObjectExt}; @@ -73,6 +74,10 @@ pub trait Input<'py>: fmt::Debug { None } + fn as_json(&self) -> Option<&JsonValue<'_>> { + None + } + fn as_kwargs(&self, py: Python<'py>) -> Option>; type Arguments<'a>: Arguments<'py> @@ -240,6 +245,15 @@ pub trait ValidatedDict<'py> { type Item<'a>: BorrowInput<'py> where Self: 'a; + + /// Whether this dict requires consuming the input by `get_item` rather than iterating + /// + /// (This is true for Python dicts in v2 to preserve semantics in the case of overridden classes, + /// maybe in v3 we change this for performance?) + fn should_consume_model_input_by_get_item(&self) -> bool { + false + } + fn get_item<'k>(&self, key: &'k LookupKey) -> ValResult)>>; // FIXME this is a bit of a leaky abstraction fn is_py_get_attr(&self) -> bool { diff --git a/src/input/input_json.rs b/src/input/input_json.rs index 139c71a25..ae0316177 100644 --- a/src/input/input_json.rs +++ b/src/input/input_json.rs @@ -1,9 +1,8 @@ use std::borrow::Cow; -use jiter::{JsonArray, JsonObject, JsonValue, LazyIndexMap}; +use jiter::{JsonArray, JsonObject, JsonValue}; use pyo3::prelude::*; use pyo3::types::{PyDict, PyList, PyString}; -use smallvec::SmallVec; use speedate::MicrosecondsPrecisionOverflowBehavior; use strum::EnumMessage; @@ -44,6 +43,10 @@ impl From> for LocItem { } impl<'py, 'data> Input<'py> for JsonValue<'data> { + fn as_json(&self) -> Option<&JsonValue<'_>> { + Some(self) + } + #[inline] fn py_converter(&self) -> impl IntoPyObject<'py> + '_ { self @@ -62,7 +65,9 @@ impl<'py, 'data> Input<'py> for JsonValue<'data> { match self { JsonValue::Object(object) => { let dict = PyDict::new(py); - for (k, v) in LazyIndexMap::iter(object) { + for (k, v) in object.as_slice() { + // TODO: jiter doesn't deduplicate keys, so we should probably do that here to + // avoid potential wasted work creating Python objects. dict.set_item(k, v).unwrap(); } Some(dict) @@ -253,7 +258,14 @@ impl<'py, 'data> Input<'py> for JsonValue<'data> { JsonValue::Str(s) => Ok(string_to_vec(s).into()), JsonValue::Object(object) => { // return keys iterator to match python's behavior - let keys: JsonArray = JsonArray::new(object.keys().map(|k| JsonValue::Str(k.clone())).collect()); + // FIXME jiter doesn't deduplicate keys, should probably do that here before iteration. + let keys: JsonArray = JsonArray::new( + object + .as_slice() + .iter() + .map(|(k, _)| JsonValue::Str(k.clone())) + .collect(), + ); Ok(GenericIterator::from(keys).into_static()) } _ => Err(ValError::new(ErrorTypeDefaults::IterableType, self)), @@ -543,11 +555,11 @@ impl<'data> ValidatedDict<'_> for &'_ JsonObject<'data> { &'a self, consumer: impl ConsumeIterator, Self::Item<'a>)>, Output = R>, ) -> ValResult { - Ok(consumer.consume_iterator(LazyIndexMap::iter(self).map(|(k, v)| Ok((k.as_ref(), v))))) + Ok(consumer.consume_iterator(self.as_slice().iter().map(|(k, v)| Ok((k.as_ref(), v))))) } fn last_key(&self) -> Option> { - self.keys().last().map(AsRef::as_ref) + self.last().map(|(k, _)| k.as_ref()) } } @@ -555,7 +567,7 @@ impl<'a, 'py, 'data> ValidatedList<'py> for &'a JsonArray<'data> { type Item = &'a JsonValue<'data>; fn len(&self) -> Option { - Some(SmallVec::len(self)) + Some(Vec::len(self)) } fn iterate(self, consumer: impl ConsumeIterator, Output = R>) -> ValResult { Ok(consumer.consume_iterator(self.iter().map(Ok))) @@ -569,7 +581,7 @@ impl<'a, 'data> ValidatedTuple<'_> for &'a JsonArray<'data> { type Item = &'a JsonValue<'data>; fn len(&self) -> Option { - Some(SmallVec::len(self)) + Some(Vec::len(self)) } fn iterate(self, consumer: impl ConsumeIterator, Output = R>) -> ValResult { Ok(consumer.consume_iterator(self.iter().map(Ok))) @@ -637,12 +649,12 @@ impl<'data> KeywordArgs<'_> for JsonObject<'data> { Self: 'a; fn len(&self) -> usize { - LazyIndexMap::len(self) + Vec::len(self) } fn get_item<'k>(&self, key: &'k LookupKey) -> ValResult)>> { key.json_get(self) } fn iter(&self) -> impl Iterator, Self::Item<'_>)>> { - LazyIndexMap::iter(self).map(|(k, v)| Ok((k.as_ref(), v))) + self.as_slice().iter().map(|(k, v)| Ok((k.as_ref(), v))) } } diff --git a/src/input/input_python.rs b/src/input/input_python.rs index ea6eab054..136c6ea0b 100644 --- a/src/input/input_python.rs +++ b/src/input/input_python.rs @@ -823,6 +823,12 @@ impl<'py> ValidatedDict<'py> for GenericPyMapping<'_, 'py> { where Self: 'a; + fn should_consume_model_input_by_get_item(&self) -> bool { + // Backwards compatibility; in v2 we used get_item rather than iteration and changing this + // might have implications for dict / mapping behaviour + true + } + fn get_item<'k>( &self, key: &'k crate::lookup_key::LookupKey, diff --git a/src/input/input_string.rs b/src/input/input_string.rs index a50b3cff2..323a2be4a 100644 --- a/src/input/input_string.rs +++ b/src/input/input_string.rs @@ -305,6 +305,11 @@ impl<'py> ValidatedDict<'py> for StringMappingDict<'py> { = StringMapping<'py> where Self: 'a; + + fn should_consume_model_input_by_get_item(&self) -> bool { + true + } + fn get_item<'k>(&self, key: &'k LookupKey) -> ValResult)>> { key.py_get_string_mapping_item(&self.0) } diff --git a/src/lookup_key.rs b/src/lookup_key.rs index c83a00583..9c8c7c351 100644 --- a/src/lookup_key.rs +++ b/src/lookup_key.rs @@ -262,20 +262,33 @@ impl LookupKey { &'s self, dict: &'a JsonObject<'data>, ) -> ValResult)>> { + // FIXME: use of find_map in here probably leads to quadratic complexity match self { - Self::Simple(path) => match dict.get(path.first_key()) { + Self::Simple(path) => match dict + .iter() + .rev() + .find_map(|(k, v)| (k == path.first_key()).then_some(v)) + { Some(value) => { debug_assert!(path.rest.is_empty()); Ok(Some((path, value))) } None => Ok(None), }, - Self::Choice { path1, path2 } => match dict.get(path1.first_key()) { + Self::Choice { path1, path2 } => match dict + .iter() + .rev() + .find_map(|(k, v)| (k == path1.first_key()).then_some(v)) + { Some(value) => { debug_assert!(path1.rest.is_empty()); Ok(Some((path1, value))) } - None => match dict.get(path2.first_key()) { + None => match dict + .iter() + .rev() + .find_map(|(k, v)| (k == path2.first_key()).then_some(v)) + { Some(value) => { debug_assert!(path2.rest.is_empty()); Ok(Some((path2, value))) @@ -287,7 +300,11 @@ impl LookupKey { for path in path_choices { // first step is different from the rest as we already know dict is JsonObject // because of above checks, we know that path should have at least one element, hence unwrap - let v: &JsonValue = match dict.get(path.first_item.key.as_str()) { + let v: &JsonValue = match dict + .iter() + .rev() + .find_map(|(k, v)| (k == path.first_key()).then_some(v)) + { Some(v) => v, None => continue, }; @@ -406,6 +423,10 @@ impl LookupPath { pub fn first_key(&self) -> &str { &self.first_item.key } + + pub fn rest(&self) -> &[PathItem] { + &self.rest + } } #[derive(Debug, Clone)] @@ -420,8 +441,8 @@ pub(crate) enum PathItem { /// we store both the string and pystring to save creating the pystring for python #[derive(Debug, Clone)] pub(crate) struct PathItemString { - key: String, - py_key: Py, + pub key: String, + pub py_key: Py, } impl fmt::Display for PathItem { @@ -527,7 +548,7 @@ impl PathItem { pub fn json_obj_get<'a, 'data>(&self, json_obj: &'a JsonObject<'data>) -> Option<&'a JsonValue<'data>> { match self { - Self::S(PathItemString { key, .. }) => json_obj.get(key.as_str()), + Self::S(PathItemString { key, .. }) => json_obj.iter().rev().find_map(|(k, v)| (k == key).then_some(v)), _ => None, } } diff --git a/src/validators/model_fields.rs b/src/validators/model_fields.rs index 392760964..98c7c2f1d 100644 --- a/src/validators/model_fields.rs +++ b/src/validators/model_fields.rs @@ -1,3 +1,11 @@ +use std::collections::hash_map::Entry; +use std::hash::Hash; +use std::sync::Arc; + +use ahash::AHashMap; +use jiter::JsonArray; +use jiter::JsonObject; +use jiter::JsonValue; use pyo3::exceptions::PyKeyError; use pyo3::intern; use pyo3::prelude::*; @@ -13,6 +21,9 @@ use crate::errors::{ErrorType, ErrorTypeDefaults, ValError, ValLineError, ValRes use crate::input::ConsumeIterator; use crate::input::{BorrowInput, Input, ValidatedDict, ValidationMatch}; use crate::lookup_key::LookupKey; +use crate::lookup_key::LookupPath; +use crate::lookup_key::PathItem; +use crate::tools::new_py_string; use crate::tools::SchemaDict; use super::{build_validator, BuildValidator, CombinedValidator, DefinitionsBuilder, ValidationState, Validator}; @@ -37,6 +48,7 @@ pub struct ModelFieldsValidator { strict: bool, from_attributes: bool, loc_by_alias: bool, + lookup: LookupMap, } impl BuildValidator for ModelFieldsValidator { @@ -96,6 +108,265 @@ impl BuildValidator for ModelFieldsValidator { }); } + let mut map = AHashMap::new(); + + fn add_field_to_map(map: &mut AHashMap, key: K, field_index: usize) { + match map.entry(key) { + Entry::Occupied(mut entry) => match entry.get_mut() { + &mut LookupValue::Field(i) => { + entry.insert(LookupValue::Complex { + fields: vec![i, field_index], + lookup_map: LookupMap { + map: AHashMap::new(), + list: AHashMap::new(), + }, + }); + } + LookupValue::Complex { fields, .. } => { + fields.push(field_index); + } + }, + Entry::Vacant(entry) => { + entry.insert(LookupValue::Field(field_index)); + } + } + } + + fn add_path_to_map(map: &mut AHashMap, path: &LookupPath, field_index: usize) { + if path.rest().is_empty() { + // terminal value + add_field_to_map(map, path.first_key().to_owned(), field_index); + return; + } + + let mut nested_map = match map.entry(path.first_key().to_owned()) { + Entry::Occupied(mut entry) => { + let entry = entry.into_mut(); + match entry { + &mut LookupValue::Field(i) => { + *entry = LookupValue::Complex { + fields: vec![i], + lookup_map: LookupMap { + map: AHashMap::new(), + list: AHashMap::new(), + }, + }; + match entry { + LookupValue::Complex { + lookup_map: ref mut nested_map, + .. + } => nested_map, + _ => unreachable!(), + } + } + LookupValue::Complex { + lookup_map: ref mut nested_map, + .. + } => nested_map, + } + } + Entry::Vacant(entry) => { + let LookupValue::Complex { + lookup_map: ref mut nested_map, + .. + } = entry.insert(LookupValue::Complex { + fields: Vec::new(), + lookup_map: LookupMap { + map: AHashMap::new(), + list: AHashMap::new(), + }, + }) + else { + unreachable!() + }; + nested_map + } + }; + + let mut nested_map = nested_map; + let mut path_iter = path.rest().iter(); + + let mut current = path_iter.next().expect("rest is non-empty"); + + while let Some(next) = path_iter.next() { + nested_map = match current { + PathItem::S(s) => { + let str_key = s.key.to_owned(); + match nested_map.map.entry(str_key) { + Entry::Occupied(entry) => { + let entry = entry.into_mut(); + match entry { + &mut LookupValue::Field(i) => { + *entry = LookupValue::Complex { + fields: vec![i], + lookup_map: LookupMap { + map: AHashMap::new(), + list: AHashMap::new(), + }, + }; + let LookupValue::Complex { + lookup_map: ref mut nested_map, + .. + } = entry + else { + unreachable!() + }; + nested_map + } + LookupValue::Complex { + lookup_map: ref mut nested_map, + .. + } => nested_map, + } + } + Entry::Vacant(entry) => { + let LookupValue::Complex { + lookup_map: ref mut nested_map, + .. + } = entry.insert(LookupValue::Complex { + fields: vec![], + lookup_map: LookupMap { + map: AHashMap::new(), + list: AHashMap::new(), + }, + }) + else { + unreachable!() + }; + nested_map + } + } + } + PathItem::Pos(i) => match nested_map.list.entry(*i as i64) { + Entry::Occupied(entry) => { + let entry = entry.into_mut(); + match entry { + &mut LookupValue::Field(i) => { + *entry = LookupValue::Complex { + fields: vec![i], + lookup_map: LookupMap { + map: AHashMap::new(), + list: AHashMap::new(), + }, + }; + let LookupValue::Complex { + lookup_map: ref mut nested_map, + .. + } = entry + else { + unreachable!() + }; + nested_map + } + LookupValue::Complex { + lookup_map: ref mut nested_map, + .. + } => nested_map, + } + } + Entry::Vacant(entry) => { + let LookupValue::Complex { + lookup_map: ref mut nested_map, + .. + } = entry.insert(LookupValue::Complex { + fields: vec![], + lookup_map: LookupMap { + map: AHashMap::new(), + list: AHashMap::new(), + }, + }) + else { + unreachable!() + }; + nested_map + } + }, + // FIXME: handle integer cases + PathItem::Neg(i) => match nested_map.list.entry(-(*i as i64)) { + Entry::Occupied(entry) => { + let entry = entry.into_mut(); + match entry { + &mut LookupValue::Field(i) => { + *entry = LookupValue::Complex { + fields: vec![i], + lookup_map: LookupMap { + map: AHashMap::new(), + list: AHashMap::new(), + }, + }; + let LookupValue::Complex { + lookup_map: ref mut nested_map, + .. + } = entry + else { + unreachable!() + }; + nested_map + } + LookupValue::Complex { + lookup_map: ref mut nested_map, + .. + } => nested_map, + } + } + Entry::Vacant(entry) => { + let LookupValue::Complex { + lookup_map: ref mut nested_map, + .. + } = entry.insert(LookupValue::Complex { + fields: vec![], + lookup_map: LookupMap { + map: AHashMap::new(), + list: AHashMap::new(), + }, + }) + else { + unreachable!() + }; + nested_map + } + }, + }; + + current = next; + } + + // now have a terminal value + match current { + PathItem::S(s) => { + add_field_to_map(&mut nested_map.map, s.key.to_owned(), field_index); + } + PathItem::Pos(i) => { + add_field_to_map(&mut nested_map.list, *i as i64, field_index); + } + PathItem::Neg(i) => { + add_field_to_map(&mut nested_map.list, -(*i as i64), field_index); + } + } + } + + for (i, field) in fields.iter().enumerate() { + match &field.lookup_key { + LookupKey::Simple(path) => { + // should be a single string key + debug_assert!(path.rest().is_empty()); + add_field_to_map(&mut map, path.first_key().to_owned(), i); + } + LookupKey::Choice { path1, path2 } => { + // two choices of single string keys + debug_assert!(path1.rest().is_empty()); + debug_assert!(path2.rest().is_empty()); + add_field_to_map(&mut map, path1.first_key().to_owned(), i); + add_field_to_map(&mut map, path2.first_key().to_owned(), i); + } + LookupKey::PathChoices(paths) => { + for path in paths { + add_path_to_map(&mut map, path, i); + } + } + } + } + Ok(Self { fields, model_name, @@ -104,6 +375,10 @@ impl BuildValidator for ModelFieldsValidator { strict, from_attributes, loc_by_alias: config.get_as(intern!(py, "loc_by_alias"))?.unwrap_or(true), + lookup: LookupMap { + map, + list: AHashMap::new(), + }, } .into()) } @@ -127,34 +402,171 @@ impl Validator for ModelFieldsValidator { let strict = state.strict_or(self.strict); let from_attributes = state.extra().from_attributes.unwrap_or(self.from_attributes); - // we convert the DictType error to a ModelType error - let dict = match input.validate_model_fields(strict, from_attributes) { - Ok(d) => d, - Err(ValError::LineErrors(errors)) => { - let errors: Vec = errors + let (model_dict, mut model_extra_dict_op, fields_set) = if let Some(json_input) = input.as_json() { + let JsonValue::Object(json_object) = json_input else { + return Err(ValError::new( + ErrorType::ModelType { + context: None, + class_name: self.model_name.clone(), + }, + input, + )); + }; + self.validate_json_by_iteration(py, json_input, json_object, state)? + } else { + // we convert the DictType error to a ModelType error + let dict = match input.validate_model_fields(strict, from_attributes) { + Ok(d) => d, + Err(ValError::LineErrors(errors)) => { + let errors: Vec = errors + .into_iter() + .map(|e| match e.error_type { + ErrorType::DictType { .. } => { + let mut e = e; + e.error_type = ErrorType::ModelType { + class_name: self.model_name.clone(), + context: None, + }; + e + } + _ => e, + }) + .collect(); + return Err(ValError::LineErrors(errors)); + } + Err(err) => return Err(err), + }; + self.validate_by_get_item(py, input, dict, state)? + }; + state.add_fields_set(fields_set.len()); + + // if we have extra=allow, but we didn't create a dict because we were validating + // from attributes, set it now so __pydantic_extra__ is always a dict if extra=allow + if matches!(self.extra_behavior, ExtraBehavior::Allow) && model_extra_dict_op.is_none() { + model_extra_dict_op = Some(PyDict::new(py)); + }; + + Ok((model_dict, model_extra_dict_op, fields_set).into_py_any(py)?) + } + + fn validate_assignment<'py>( + &self, + py: Python<'py>, + obj: &Bound<'py, PyAny>, + field_name: &str, + field_value: &Bound<'py, PyAny>, + state: &mut ValidationState<'_, 'py>, + ) -> ValResult { + let dict = obj.downcast::()?; + + let get_updated_dict = |output: &Bound<'py, PyAny>| { + dict.set_item(field_name, output)?; + Ok(dict) + }; + + let prepare_result = |result: ValResult| match result { + Ok(output) => get_updated_dict(&output.into_bound(py)), + Err(ValError::LineErrors(line_errors)) => { + let errors = line_errors .into_iter() - .map(|e| match e.error_type { - ErrorType::DictType { .. } => { - let mut e = e; - e.error_type = ErrorType::ModelType { - class_name: self.model_name.clone(), - context: None, - }; - e - } - _ => e, - }) + .map(|e| e.with_outer_location(field_name)) .collect(); - return Err(ValError::LineErrors(errors)); + Err(ValError::LineErrors(errors)) + } + Err(err) => Err(err), + }; + + // by using dict but removing the field in question, we match V1 behaviour + let data_dict = dict.copy()?; + if let Err(err) = data_dict.del_item(field_name) { + // KeyError is fine here as the field might not be in the dict + if !err.get_type(py).is(&PyType::new::(py)) { + return Err(err.into()); + } + } + + let new_data = { + let state = &mut state.rebind_extra(move |extra| extra.data = Some(data_dict)); + + if let Some(field) = self.fields.iter().find(|f| f.name == field_name) { + if field.frozen { + return Err(ValError::new_with_loc( + ErrorTypeDefaults::FrozenField, + field_value, + field.name.to_string(), + )); + } + + prepare_result(field.validator.validate(py, field_value, state))? + } else { + // Handle extra (unknown) field + // We partially use the extra_behavior for initialization / validation + // to determine how to handle assignment + // For models / typed dicts we forbid assigning extra attributes + // unless the user explicitly set extra_behavior to 'allow' + match self.extra_behavior { + ExtraBehavior::Allow => match self.extras_validator { + Some(ref validator) => prepare_result(validator.validate(py, field_value, state))?, + None => get_updated_dict(field_value)?, + }, + ExtraBehavior::Forbid | ExtraBehavior::Ignore => { + return Err(ValError::new_with_loc( + ErrorType::NoSuchAttribute { + attribute: field_name.to_string(), + context: None, + }, + field_value, + field_name.to_string(), + )) + } + } + } + }; + + let new_extra = match &self.extra_behavior { + ExtraBehavior::Allow => { + let non_extra_data = PyDict::new(py); + self.fields.iter().try_for_each(|f| -> PyResult<()> { + let Some(popped_value) = new_data.get_item(&f.name)? else { + // field not present in __dict__ for some reason; let the rest of the + // validation pipeline handle it later + return Ok(()); + }; + new_data.del_item(&f.name)?; + non_extra_data.set_item(&f.name, popped_value)?; + Ok(()) + })?; + let new_extra = new_data.copy()?; + new_data.clear(); + new_data.update(non_extra_data.as_mapping())?; + new_extra.into() } - Err(err) => return Err(err), + _ => py.None(), }; + let fields_set = PySet::new(py, &[field_name.to_string()])?; + Ok((new_data, new_extra, fields_set).into_py_any(py)?) + } + + fn get_name(&self) -> &str { + Self::EXPECTED_TYPE + } +} + +type ValidatedModelFields<'py> = (Bound<'py, PyDict>, Option>, Bound<'py, PySet>); + +impl ModelFieldsValidator { + fn validate_by_get_item<'py>( + &self, + py: Python<'py>, + input: &(impl Input<'py> + ?Sized), + dict: impl ValidatedDict<'py>, + state: &mut ValidationState<'_, 'py>, + ) -> ValResult> { let model_dict = PyDict::new(py); let mut model_extra_dict_op: Option> = None; let mut errors: Vec = Vec::with_capacity(self.fields.len()); - let mut fields_set_vec: Vec> = Vec::with_capacity(self.fields.len()); - let mut fields_set_count: usize = 0; + let fields_set = PySet::empty(py)?; // we only care about which keys have been used if we're iterating over the object for extra after // the first pass @@ -188,8 +600,7 @@ impl Validator for ModelFieldsValidator { match field.validator.validate(py, value.borrow_input(), state) { Ok(value) => { model_dict.set_item(&field.name_py, value)?; - fields_set_vec.push(field.name_py.clone_ref(py)); - fields_set_count += 1; + fields_set.add(&field.name_py)?; } Err(ValError::Omit) => continue, Err(ValError::LineErrors(line_errors)) => { @@ -236,7 +647,7 @@ impl Validator for ModelFieldsValidator { py: Python<'py>, used_keys: AHashSet<&'a str>, errors: &'a mut Vec, - fields_set_vec: &'a mut Vec>, + fields_set: &'a Bound<'py, PySet>, extra_behavior: ExtraBehavior, extras_validator: Option<&'a CombinedValidator>, state: &'a mut ValidationState<'s, 'py>, @@ -294,7 +705,7 @@ impl Validator for ModelFieldsValidator { match validator.validate(self.py, value, self.state) { Ok(value) => { model_extra_dict.set_item(&py_key, value)?; - self.fields_set_vec.push(py_key.into()); + self.fields_set.add(py_key)?; } Err(ValError::LineErrors(line_errors)) => { for err in line_errors { @@ -305,7 +716,7 @@ impl Validator for ModelFieldsValidator { } } else { model_extra_dict.set_item(&py_key, value.to_object(self.py)?)?; - self.fields_set_vec.push(py_key.into()); + self.fields_set.add(py_key)?; }; } } @@ -318,7 +729,7 @@ impl Validator for ModelFieldsValidator { py, used_keys, errors: &mut errors, - fields_set_vec: &mut fields_set_vec, + fields_set: &fields_set, extra_behavior: self.extra_behavior, extras_validator: self.extras_validator.as_deref(), state, @@ -330,121 +741,239 @@ impl Validator for ModelFieldsValidator { } if !errors.is_empty() { - Err(ValError::LineErrors(errors)) - } else { - let fields_set = PySet::new(py, &fields_set_vec)?; - state.add_fields_set(fields_set_count); - - // if we have extra=allow, but we didn't create a dict because we were validating - // from attributes, set it now so __pydantic_extra__ is always a dict if extra=allow - if matches!(self.extra_behavior, ExtraBehavior::Allow) && model_extra_dict_op.is_none() { - model_extra_dict_op = Some(PyDict::new(py)); - }; - - Ok((model_dict, model_extra_dict_op, fields_set).into_py_any(py)?) + return Err(ValError::LineErrors(errors)); } + + Ok((model_dict, model_extra_dict_op, fields_set)) } - fn validate_assignment<'py>( + fn validate_json_by_iteration<'py>( &self, py: Python<'py>, - obj: &Bound<'py, PyAny>, - field_name: &str, - field_value: &Bound<'py, PyAny>, + json_input: &JsonValue<'_>, + json_object: &JsonObject<'_>, state: &mut ValidationState<'_, 'py>, - ) -> ValResult { - let dict = obj.downcast::()?; - - let get_updated_dict = |output: &Bound<'py, PyAny>| { - dict.set_item(field_name, output)?; - Ok(dict) - }; + ) -> ValResult> { + // expect json_input and json_object to be the same thing, just projected + debug_assert!(matches!(&json_input, JsonValue::Object(j) if Arc::ptr_eq(j, json_object))); - let prepare_result = |result: ValResult| match result { - Ok(output) => get_updated_dict(&output.into_bound(py)), - Err(ValError::LineErrors(line_errors)) => { - let errors = line_errors - .into_iter() - .map(|e| e.with_outer_location(field_name)) - .collect(); - Err(ValError::LineErrors(errors)) + let model_dict = PyDict::new(py); + let mut model_extra_dict_op: Option> = None; + let mut field_results: Vec>> = (0..self.fields.len()).map(|_| None).collect(); + let mut errors: Vec = Vec::new(); + let fields_set = PySet::empty(py)?; + + fn consume_json_array<'py>( + py: Python<'py>, + fields: &[Field], + field_results: &mut [Option>], + array_lookup: &AHashMap, + json_array: &JsonArray<'_>, + state: &mut ValidationState<'_, 'py>, + ) -> ValResult<()> { + for (list_item, value) in array_lookup { + let index = if *list_item < 0 { + list_item + json_array.len() as i64 + } else { + *list_item + }; + if let Some(json_value) = json_array.get(index as usize) { + match value { + &LookupValue::Field(i) => { + field_results[i] = Some(fields[i].validator.validate(py, json_value, state)); + } + LookupValue::Complex { + fields: complex_lookup_fields, + lookup_map, + } => perform_complex_lookup( + py, + fields, + field_results, + complex_lookup_fields, + lookup_map, + json_value, + state, + )?, + } + } } - Err(err) => Err(err), - }; + Ok(()) + } - // by using dict but removing the field in question, we match V1 behaviour - let data_dict = dict.copy()?; - if let Err(err) = data_dict.del_item(field_name) { - // KeyError is fine here as the field might not be in the dict - if !err.get_type(py).is(&PyType::new::(py)) { - return Err(err.into()); + fn perform_complex_lookup<'py>( + py: Python<'py>, + fields: &[Field], + field_results: &mut [Option>], + complex_lookup_fields: &[usize], + complex_lookup_map: &LookupMap, + json_value: &JsonValue<'_>, + state: &mut ValidationState<'_, 'py>, + ) -> ValResult<()> { + // this is a possibly recursive lookup with some complicated alias logic, + // not much we can do except recurse + for &i in complex_lookup_fields { + field_results[i] = Some(fields[i].validator.validate(py, json_value, state)); + } + if !complex_lookup_map.map.is_empty() { + if let JsonValue::Object(nested_object) = json_value { + for (key, value) in &**nested_object { + if let Some(lookup_value) = complex_lookup_map.map.get(key.as_ref()) { + match lookup_value { + &LookupValue::Field(i) => { + field_results[i] = Some(fields[i].validator.validate(py, value, state)); + } + LookupValue::Complex { + fields: complex_lookup_fields, + lookup_map, + } => { + perform_complex_lookup( + py, + fields, + field_results, + complex_lookup_fields, + lookup_map, + value, + state, + )?; + } + } + } + } + } } + if !complex_lookup_map.list.is_empty() { + if let JsonValue::Array(nested_array) = json_value { + consume_json_array(py, fields, field_results, &complex_lookup_map.list, nested_array, state)?; + } + } + Ok(()) } - let new_data = { - let state = &mut state.rebind_extra(move |extra| extra.data = Some(data_dict)); + let model_extra_dict = PyDict::new(py); + for (key, value) in &**json_object { + let key = key.as_ref(); + if let Some(lookup_value) = self.lookup.map.get(key) { + match lookup_value { + &LookupValue::Field(i) => { + field_results[i] = Some(self.fields[i].validator.validate(py, value, state)); + } + LookupValue::Complex { fields, lookup_map } => { + perform_complex_lookup(py, &self.fields, &mut field_results, fields, lookup_map, value, state)?; + } + } + continue; + } - if let Some(field) = self.fields.iter().find(|f| f.name == field_name) { - if field.frozen { - return Err(ValError::new_with_loc( - ErrorTypeDefaults::FrozenField, - field_value, - field.name.to_string(), + // Unknown / extra field - we only care about these at the top level + match self.extra_behavior { + ExtraBehavior::Forbid => { + errors.push(ValLineError::new_with_loc( + ErrorTypeDefaults::ExtraForbidden, + value, + key, )); } + ExtraBehavior::Ignore => {} + ExtraBehavior::Allow => { + let py_key: Bound<'_, PyString> = new_py_string(py, key, state.cache_str()); + if let Some(validator) = &self.extras_validator { + match validator.validate(py, value, state) { + Ok(value) => { + model_extra_dict.set_item(&py_key, value)?; + fields_set.add(py_key)?; + } + Err(ValError::LineErrors(line_errors)) => { + for err in line_errors { + errors.push(err.with_outer_location(key)); + } + } + Err(err) => return Err(err), + } + } else { + model_extra_dict.set_item(&py_key, value)?; + fields_set.add(py_key)?; + }; + } + } + } - prepare_result(field.validator.validate(py, field_value, state))? + // now that we've iterated over all the keys, we can set the values in the model + // dict, and try to set defaults for any missing fields + + for (field, field_result) in std::iter::zip(&self.fields, field_results) { + let field_value = if let Some(validation_result) = field_result { + match validation_result { + Ok(value) => { + fields_set.add(&field.name_py)?; + value + } + Err(ValError::Omit) => continue, + Err(ValError::LineErrors(line_errors)) => { + for err in line_errors { + // FIXME this should use the lookup path which the result was found at + errors.push(err.with_outer_location(&field.name)); + } + continue; + } + Err(err) => return Err(err), + } } else { - // Handle extra (unknown) field - // We partially use the extra_behavior for initialization / validation - // to determine how to handle assignment - // For models / typed dicts we forbid assigning extra attributes - // unless the user explicitly set extra_behavior to 'allow' - match self.extra_behavior { - ExtraBehavior::Allow => match self.extras_validator { - Some(ref validator) => prepare_result(validator.validate(py, field_value, state))?, - None => get_updated_dict(field_value)?, - }, - ExtraBehavior::Forbid | ExtraBehavior::Ignore => { - return Err(ValError::new_with_loc( - ErrorType::NoSuchAttribute { - attribute: field_name.to_string(), - context: None, - }, - field_value, - field_name.to_string(), - )) + match field.validator.default_value(py, Some(field.name.as_str()), state) { + Ok(Some(default_value)) => default_value, + Ok(None) => { + errors.push(field.lookup_key.error( + ErrorTypeDefaults::Missing, + json_input, + self.loc_by_alias, + &field.name, + )); + continue; + } + Err(ValError::Omit) => continue, + Err(ValError::LineErrors(line_errors)) => { + for err in line_errors { + // Note: this will always use the field name even if there is an alias + // However, we don't mind so much because this error can only happen if the + // default value fails validation, which is arguably a developer error. + // We could try to "fix" this in the future if desired. + errors.push(err); + } + continue; } + Err(err) => return Err(err), } - } - }; + }; - let new_extra = match &self.extra_behavior { - ExtraBehavior::Allow => { - let non_extra_data = PyDict::new(py); - self.fields.iter().try_for_each(|f| -> PyResult<()> { - let Some(popped_value) = new_data.get_item(&f.name)? else { - // field not present in __dict__ for some reason; let the rest of the - // validation pipeline handle it later - return Ok(()); - }; - new_data.del_item(&f.name)?; - non_extra_data.set_item(&f.name, popped_value)?; - Ok(()) - })?; - let new_extra = new_data.copy()?; - new_data.clear(); - new_data.update(non_extra_data.as_mapping())?; - new_extra.into() - } - _ => py.None(), - }; + model_dict.set_item(&field.name_py, field_value)?; + } - let fields_set = PySet::new(py, &[field_name.to_string()])?; - Ok((new_data, new_extra, fields_set).into_py_any(py)?) - } + if matches!(self.extra_behavior, ExtraBehavior::Allow) { + model_extra_dict_op = Some(model_extra_dict); + } - fn get_name(&self) -> &str { - Self::EXPECTED_TYPE + if !errors.is_empty() { + return Err(ValError::LineErrors(errors)); + } + + Ok((model_dict, model_extra_dict_op, fields_set)) } } + +#[derive(Debug)] +enum LookupValue { + /// This lookup hits an actual field + Field(usize), + /// This lookup might applicable to multiple fields + Complex { + /// All fields which wanted _exactly_ this key + fields: Vec, + /// Fields which use this key as path prefix + lookup_map: LookupMap, + }, +} + +#[derive(Debug)] +struct LookupMap { + map: AHashMap, + list: AHashMap, +}