Source code for pod5.tools.pod5_recover

"""
Tool for recovering truncated pod5 files
"""
import dataclasses
import typing
from pathlib import Path

import lib_pod5 as p5b
import pod5 as p5
from pod5.tools.parsers import prepare_pod5_recover_argparser, run_tool
from pod5.tools.utils import collect_inputs


[docs]@dataclasses.dataclass class RecoveredData: """ Holds info about recovered data. """ signal_rows: int = 0 reads: int = 0 run_infos: int = 0 files_with_errors: int = 0
[docs]def do_consistency_check(path: Path, recovered: RecoveredData) -> bool: """ Count the number of recoverd records in `path` updating `recovered`. Returns True is this file was successfully recovered """ try: with p5.Reader(path) as file: for i in range(file.signal_table.num_record_batches): batch = file.signal_table.get_batch(i) recovered.signal_rows += batch.num_rows for i in range(file.run_info_table.num_record_batches): batch = file.run_info_table.get_batch(i) recovered.run_infos += batch.num_rows for i in range(file.read_table.num_record_batches): batch = file.read_table.get_batch(i) recovered.reads += batch.num_rows except RuntimeError: recovered.files_with_errors += 1 return False return True
[docs]def is_file_ok(path: Path) -> bool: try: with p5.Reader(path): pass return True except RuntimeError: return False
[docs]def recover_pod5(inputs: typing.List[Path], force_overwrite: bool, recursive: bool): """ Given a list of truncated pod5 files, recover their data. """ paths = collect_inputs(inputs, recursive=recursive, pattern=["*.tmp", "*.pod5"]) paths_to_recover = [p for p in paths if not is_file_ok(p)] if len(paths_to_recover) == 0: print(f"None of the {len(paths)} files given need recovery") return for path in paths_to_recover: dest = path.parent / (path.stem + "_recovered.pod5") if dest.exists(): if force_overwrite: dest.unlink() else: raise FileExistsError( f"Output files already exists and --force-overwrite not set. " f"Refusing to overwrite {dest}." ) recovered_data = RecoveredData() for input_file in paths_to_recover: dest = path.parent / (path.stem + "_recovered.pod5") # recover_file returns us an open writer: f = p5b.recover_file(str(input_file.resolve()), str(dest.resolve())) # We don't want to write any additional data: f.close() # Check how consistent the recovered file is: success = do_consistency_check(dest.resolve(), recovered_data) if not success: print(f"{dest} - Recovery failed") dest.unlink() else: print(f"{dest} - Recovered") print( f"Recovered {recovered_data.signal_rows} signal rows, " f"{recovered_data.reads} reads, " f"{recovered_data.run_infos} run infos, " f"{recovered_data.files_with_errors} files with errors" )
[docs]def main(): run_tool(prepare_pod5_recover_argparser())
if __name__ == "__main__": main()