I have a relatively long dataframe (10 columns, about a million lines) in Python. I want to pass it to Rust, process it and return another dataframe as output into Python.
It seemed convenient to construct a polars dataframe and pass it to Rust using pyo3. My question: what are the alternatives for creating Vec in Rust from a source dataframe?
Here is my lib.rs
use polars::frame::DataFrame;
use polars::prelude::*;
use pyo3::prelude::*;
use pyo3_polars::PyDataFrame;
#[derive(Debug)]
struct DummyData {
foo: i64,
bar: String,
}
#[pyfunction]
fn my_fun(pydf: PyDataFrame) -> PyResult<PyDataFrame> {
let df: DataFrame = pydf.into();
let vec_foo_i64: Vec<Option<i64>> = df.column("foo").unwrap().i64().unwrap().to_vec();
let vec_bar_str: Vec<Option<&str>> = df
.column("bar")
.unwrap()
.str()
.unwrap()
.into_iter()
.collect();
let mut dummy_vec: Vec<DummyData> = Vec::new();
for (foo, bar) in vec_foo_i64.iter().zip(vec_bar_str.iter()) {
// if any of the columns is null, skip the row
if foo.is_none() || bar.is_none() {
continue;
} else {
let dummy = DummyData {
foo: foo.unwrap(),
bar: bar.unwrap().to_string(),
};
dummy_vec.push(dummy);
}
}
println!("{:#?}", dummy_vec);
Ok(PyDataFrame(df))
}
#[pymodule]
fn pyo3_trial(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(my_fun, m)?)?;
Ok(())
}
Here is my python code.
import pyo3_trial
import polars as pl
df = pl.DataFrame(
{
"foo": [1, 2, None],
"bar": ["a", None, "c"],
}
)
out_df = pyo3_trial.my_fun(df)