Native deserialization based on Rust and PyO3

Proof of concept
Only capable of deserializing (nested) Messages with primitive fields
No handling of lists, maps, enums, .. implemented yet
See `example.py` for a working example
This commit is contained in:
Erik Friese 2023-08-25 19:41:22 +02:00
parent 4cdf1bb9e0
commit 421aa78014
12 changed files with 1203 additions and 486 deletions

72
betterproto-extras/.gitignore vendored Normal file
View File

@ -0,0 +1,72 @@
/target
# Byte-compiled / optimized / DLL files
__pycache__/
.pytest_cache/
*.py[cod]
# C extensions
*.so
# Distribution / packaging
.Python
.venv/
env/
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
include/
man/
venv/
*.egg-info/
.installed.cfg
*.egg
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
pip-selfcheck.json
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml
# Translations
*.mo
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
# Rope
.ropeproject
# Django stuff:
*.log
*.pot
.DS_Store
# Sphinx documentation
docs/_build/
# PyCharm
.idea/
# VSCode
.vscode/
# Pyenv
.python-version

344
betterproto-extras/Cargo.lock generated Normal file
View File

@ -0,0 +1,344 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "anyhow"
version = "1.0.75"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "betterproto-extras"
version = "0.1.0"
dependencies = [
"prost-reflect",
"pyo3",
]
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bytes"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "indoc"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306"
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "libc"
version = "0.2.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "lock_api"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "memoffset"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "parking_lot"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-targets",
]
[[package]]
name = "proc-macro2"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
dependencies = [
"unicode-ident",
]
[[package]]
name = "prost"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd"
dependencies = [
"bytes",
"prost-derive",
]
[[package]]
name = "prost-derive"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4"
dependencies = [
"anyhow",
"itertools",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "prost-reflect"
version = "0.11.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "000e1e05ebf7b26e1eba298e66fe4eee6eb19c567d0ffb35e0dd34231cdac4c8"
dependencies = [
"once_cell",
"prost",
"prost-types",
]
[[package]]
name = "prost-types"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13"
dependencies = [
"prost",
]
[[package]]
name = "pyo3"
version = "0.19.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e681a6cfdc4adcc93b4d3cf993749a4552018ee0a9b65fc0ccfad74352c72a38"
dependencies = [
"cfg-if",
"indoc",
"libc",
"memoffset",
"parking_lot",
"pyo3-build-config",
"pyo3-ffi",
"pyo3-macros",
"unindent",
]
[[package]]
name = "pyo3-build-config"
version = "0.19.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "076c73d0bc438f7a4ef6fdd0c3bb4732149136abd952b110ac93e4edb13a6ba5"
dependencies = [
"once_cell",
"target-lexicon",
]
[[package]]
name = "pyo3-ffi"
version = "0.19.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e53cee42e77ebe256066ba8aa77eff722b3bb91f3419177cf4cd0f304d3284d9"
dependencies = [
"libc",
"pyo3-build-config",
]
[[package]]
name = "pyo3-macros"
version = "0.19.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfeb4c99597e136528c6dd7d5e3de5434d1ceaf487436a3f03b2d56b6fc9efd1"
dependencies = [
"proc-macro2",
"pyo3-macros-backend",
"quote",
"syn",
]
[[package]]
name = "pyo3-macros-backend"
version = "0.19.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "947dc12175c254889edc0c02e399476c2f652b4b9ebd123aa655c224de259536"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "quote"
version = "1.0.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
dependencies = [
"proc-macro2",
]
[[package]]
name = "redox_syscall"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
dependencies = [
"bitflags",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "smallvec"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "target-lexicon"
version = "0.12.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d0e916b1148c8e263850e1ebcbd046f333e0683c724876bb0da63ea4373dc8a"
[[package]]
name = "unicode-ident"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
[[package]]
name = "unindent"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c"
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"

View File

@ -0,0 +1,12 @@
[package]
name = "betterproto-extras"
version = "0.1.0"
edition = "2021"
[lib]
name = "betterproto_extras"
crate-type = ["cdylib"]
[dependencies]
prost-reflect = "0.11.4"
pyo3 = { version = "0.19.0", features = ["abi3-py37", "extension-module"] }

View File

@ -0,0 +1,16 @@
[build-system]
requires = ["maturin>=1.2,<2.0"]
build-backend = "maturin"
[project]
name = "betterproto-extras"
requires-python = ">=3.7"
classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
[tool.maturin]
features = ["pyo3/extension-module"]

View File

@ -0,0 +1,99 @@
use prost_reflect::{
prost_types::{
field_descriptor_proto::Type, DescriptorProto, FieldDescriptorProto, FileDescriptorProto,
},
DescriptorPool, MessageDescriptor,
};
use pyo3::{exceptions::PyRuntimeError, PyAny, PyResult};
use std::sync::{Mutex, OnceLock};
use crate::py_any_extras::PyAnyExtras;
pub fn create_cached_descriptor(obj: &PyAny) -> PyResult<MessageDescriptor> {
static DESCRIPTOR_POOL: OnceLock<Mutex<DescriptorPool>> = OnceLock::new();
let mut pool = DESCRIPTOR_POOL
.get_or_init(|| Mutex::new(DescriptorPool::new()))
.lock()
.unwrap();
create_cached_descriptor_in_pool(obj, &mut pool)
}
fn create_cached_descriptor_in_pool<'py>(
obj: &'py PyAny,
pool: &mut DescriptorPool,
) -> PyResult<MessageDescriptor> {
let name = obj.qualified_class_name()?;
if let Some(desc) = pool.get_message_by_name(&name) {
return Ok(desc);
}
let meta = obj.get_proto_meta()?;
let mut message = DescriptorProto {
name: Some(name.clone()),
..Default::default()
};
let mut file = FileDescriptorProto {
name: Some(name.clone()),
..Default::default()
};
for item in meta
.getattr("meta_by_field_name")?
.call_method0("items")?
.iter()?
{
let (field_name, field_meta) = item?.extract::<(&str, &'py PyAny)>()?;
message.field.push({
let mut field = FieldDescriptorProto {
name: Some(field_name.to_string()),
number: Some(field_meta.getattr("number")?.extract::<i32>()?),
..Default::default()
};
field.set_type(map_type(
field_meta.getattr("proto_type")?.extract::<&str>()?,
)?);
if field.r#type() == Type::Message {
let instance = meta.create_instance(field_name)?;
let cls_name = instance.qualified_class_name()?;
field.type_name = Some(cls_name.to_string());
create_cached_descriptor_in_pool(instance, pool)?;
}
field
});
}
file.message_type.push(message);
pool.add_file_descriptor_proto(file)
.map_err(|_| PyRuntimeError::new_err("Error on proto registration"))?;
Ok(pool.get_message_by_name(&name).expect("Just registered..."))
}
fn map_type(str: &str) -> PyResult<Type> {
match str {
"enum" => Ok(Type::Enum),
"bool" => Ok(Type::Bool),
"int32" => Ok(Type::Int32),
"int64" => Ok(Type::Int64),
"uint32" => Ok(Type::Uint32),
"uint64" => Ok(Type::Uint64),
"sint32" => Ok(Type::Sint32),
"sint64" => Ok(Type::Sint64),
"float" => Ok(Type::Float),
"double" => Ok(Type::Double),
"fixed32" => Ok(Type::Fixed32),
"sfixed32" => Ok(Type::Sfixed32),
"fixed64" => Ok(Type::Fixed64),
"sfixed64" => Ok(Type::Sfixed64),
"string" => Ok(Type::String),
"bytes" => Ok(Type::Bytes),
"message" => Ok(Type::Message),
_ => Err(PyRuntimeError::new_err("Unsupported type")),
}
}

View File

@ -0,0 +1,23 @@
mod descriptor_pool;
mod merging;
mod py_any_extras;
use descriptor_pool::create_cached_descriptor;
use merging::merge_msg_into_pyobj;
use prost_reflect::DynamicMessage;
use pyo3::{exceptions::PyRuntimeError, prelude::*};
#[pyfunction]
fn deserialize(obj: &PyAny, buf: &[u8]) -> PyResult<()> {
let desc = create_cached_descriptor(obj)?;
let msg = DynamicMessage::decode(desc, buf)
.map_err(|_| PyRuntimeError::new_err("Error on deserializing."))?;
merge_msg_into_pyobj(obj, &msg)?;
Ok(())
}
#[pymodule]
fn betterproto_extras(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(deserialize, m)?)?;
Ok(())
}

View File

@ -0,0 +1,43 @@
use prost_reflect::{DynamicMessage, ReflectMessage, Value};
use pyo3::{exceptions::PyRuntimeError, IntoPy, PyAny, PyResult};
use crate::py_any_extras::PyAnyExtras;
pub fn merge_msg_into_pyobj(obj: &PyAny, msg: &DynamicMessage) -> PyResult<()> {
for field in msg.descriptor().fields() {
let field_name = field.name();
let proto_meta = obj.get_proto_meta()?;
if let Some(field_value) = msg.get_field_by_name(field_name) {
obj.setattr(
field_name,
map_field_value(field_name, &field_value, proto_meta)?,
)?;
}
}
Ok(())
}
fn map_field_value<'py>(
field_name: &str,
field_value: &Value,
proto_meta: &'py PyAny,
) -> PyResult<&'py PyAny> {
let py = proto_meta.py();
match field_value {
Value::Bool(x) => Ok(x.into_py(py).into_ref(py)),
Value::Bytes(x) => Ok(x.into_py(py).into_ref(py)),
Value::F32(x) => Ok(x.into_py(py).into_ref(py)),
Value::F64(x) => Ok(x.into_py(py).into_ref(py)),
Value::I32(x) => Ok(x.into_py(py).into_ref(py)),
Value::I64(x) => Ok(x.into_py(py).into_ref(py)),
Value::String(x) => Ok(x.into_py(py).into_ref(py)),
Value::U32(x) => Ok(x.into_py(py).into_ref(py)),
Value::U64(x) => Ok(x.into_py(py).into_ref(py)),
Value::Message(msg) => {
let obj = proto_meta.create_instance(field_name)?;
merge_msg_into_pyobj(obj, msg)?;
Ok(obj)
}
_ => Err(PyRuntimeError::new_err("Unsupported type")),
}
}

View File

@ -0,0 +1,27 @@
use pyo3::{PyAny, PyResult};
pub trait PyAnyExtras<'py> {
fn qualified_class_name(&self) -> PyResult<String>;
fn get_proto_meta(&self) -> PyResult<&'py PyAny>;
fn create_instance(&self, field_name: &str) -> PyResult<&'py PyAny>;
}
impl<'py> PyAnyExtras<'py> for &'py PyAny {
fn qualified_class_name(&self) -> PyResult<String> {
let class = self.getattr("__class__")?;
let module = class.getattr("__module__")?;
let name = class.getattr("__name__")?;
Ok(format!("{module}.{name}"))
}
fn get_proto_meta(&self) -> PyResult<&'py PyAny> {
self.getattr("_betterproto")
}
fn create_instance(&self, field_name: &str) -> PyResult<&'py PyAny> {
let res = self.getattr("cls_by_field")?
.get_item(field_name)?
.call0()?;
Ok(res)
}
}

22
example.py Normal file
View File

@ -0,0 +1,22 @@
# dev tests
# to be deleted later
import betterproto
from dataclasses import dataclass
@dataclass
class Foo(betterproto.Message):
x: int = betterproto.int32_field(1)
y: float = betterproto.double_field(2)
@dataclass
class Bar(betterproto.Message):
foo1: Foo = betterproto.message_field(1)
foo2: Foo = betterproto.message_field(2)
# Serialization has not been changed yet. So nothing unusual here
buffer = bytes(Bar(foo1 = Foo(1, 2.34), foo2 = Foo(3, 4.56)))
# Native deserialization happening here
bar = Bar().parse(buffer)
print(bar)

1021
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -19,6 +19,7 @@ importlib-metadata = { version = ">=1.6.0", python = "<3.8" }
jinja2 = { version = ">=3.0.3", optional = true }
python-dateutil = "^2.8"
isort = {version = "^5.11.5", optional = true}
betterproto-extras = { path = "betterproto-extras" }
[tool.poetry.dev-dependencies]
asv = "^0.4.2"

View File

@ -779,6 +779,7 @@ class Message(ABC):
"""
Get the binary encoded Protobuf representation of this message instance.
"""
output = bytearray()
for field_name, meta in self._betterproto.meta_by_field_name.items():
try:
@ -1002,6 +1003,14 @@ class Message(ABC):
:class:`Message`
The initialized message.
"""
if True:
# TODO: Make native deserialization optional
import betterproto_extras
betterproto_extras.deserialize(self, data)
return self
# Got some data over the wire
self._serialized_on_wire = True
proto_meta = self._betterproto