Source code for pod5.tools.polars_utils
from typing import Optional
import polars as pl
# Reserved column names used in polars dataframes
PL_DEST_FNAME = "__dest_fname"
PL_SRC_FNAME = "__src_fname"
PL_READ_ID = "__read_id"
PL_UUID_REGEX = "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
[docs]def pl_format_read_id(read_id_col: pl.Expr) -> pl.Expr:
"""Format read ids to in UUID style"""
read_id = read_id_col.bin.encode("hex")
return pl.format(
"{}-{}-{}-{}-{}",
read_id.str.slice(0, 8),
read_id.str.slice(8, 4),
read_id.str.slice(12, 4),
read_id.str.slice(16, 4),
read_id.str.slice(20, 12),
)
[docs]def pl_format_empty_string(expr: pl.Expr, subst: Optional[str]) -> pl.Expr:
"""Empty strings are read as a pair of double-quotes which need to be removed"""
return pl.when(expr.str.lengths() == 0).then(subst).otherwise(expr)