Hello @klammens!
Below you will find a cryosparc-tools script which should help you import CTF parameters from an external STAR file. Before you use this script, please read the caveats below:
- I have only tested this by comparing the imported CTF values against existing defocus estimates and finding they are overall similar. Since different programs estimate the CTF parameters in the STAR files, they are not directly comparable.
- This script does not take into account any difference in orientation between however CryoSPARC and your external program represent the micrograph. If motion correction and CTF estimation happen in the same program this should not be a problem, but take caution if you use this script to add CTF estimates from micrographs motion-corrected elsewhere to micrographs motion-corrected in CryoSPARC.
To use this script, you will need to install a python environment with cryosparc-tools
and starfile
.
#!/usr/bin/env python
import sys
import_problems = False
try:
from cryosparc.tools import CryoSPARC
import numpy as np
except ImportError:
print("Install cryosparc tools: python -m pip install cryosparc-tools")
import_problems = True
try:
import starfile
except ImportError:
print("Install starfile: python -m pip install starfile")
import_problems = True
if import_problems:
sys.exit(1)
import json
from pathlib import Path
import re
import argparse
def make_imname(mic_path, type, args):
filename = Path(mic_path).name
filename = re.sub(r"^\d{21}_", "", filename)
if type == "cs":
trim_idx = 0
elif type == "star":
trim_idx = 1
filename = filename[args.trim_start[trim_idx]:-args.trim_end[trim_idx] if args.trim_end[trim_idx] else None]
return filename
def main(args):
with open(Path('~/instance-info.json').expanduser(), 'r') as f:
instance_info = json.load(f)
cs = CryoSPARC(**instance_info)
assert cs.test_connection()
puid = args.puid if "P" in args.puid else f"P{args.puid}"
project = cs.find_project(puid)
juid = args.juid if "J" in args.juid else f"J{args.juid}"
job = project.find_job(juid)
cs_mics = job.load_output(args.exposure_name)
new_fields = {
# field_name: type
"ctf/type": "O",
"ctf/path": "O",
"ctf/exp_group_id": "u4",
"ctf/accel_kv": "f4",
"ctf/cs_mm": "f4",
"ctf/amp_contrast": "f4",
"ctf/df1_A": "f4",
"ctf/df2_A": "f4",
"ctf/df_angle_rad": "f4",
"ctf/phase_shift_rad": "f4",
"ctf/cross_corr_ctffind4": "f4",
"ctf/ctf_fit_to_A": "f4",
"ctf/fig_of_merit_gctf": "f4",
"imname": "O"
}
cs_mics.add_fields(
list(x for x in new_fields.keys()),
list(x for x in new_fields.values())
)
cs_mics["ctf/type"] = "imported"
ext_star = starfile.read(args.starfile_path, always_dict = True)
ext_optics = ext_star.get("optics")
ext_mics = ext_star.get("micrographs")
if ext_optics is not None:
ext_mics = ext_mics.join(
ext_optics.set_index("rlnOpticsGroup"),
on = "rlnOpticsGroup",
how = "left",
lsuffix = "",
rsuffix = "_DROP"
)
if "rlnVoltage" not in ext_mics:
ext_mics["rlnVoltage"] = float(args.kv)
if "rlnSphericalAberration" not in ext_mics:
ext_mics["rlnSphericalAberration"] = float(args.cs)
if "rlnAmplitudeContrast" not in ext_mics:
ext_mics["rlnAmplitudeContrast"] = float(args.amp)
ext_mics["imname"] = [make_imname(f, "star", args) for f in ext_mics["rlnMicrographName"]]
cs_mics["imname"] = [make_imname(f, "cs", args) for f in cs_mics["micrograph_blob/path"]]
cs_mics.take(np.argsort(cs_mics["imname"]))
ext_mics = ext_mics.sort_values(by = "imname").reset_index()
if not np.all(cs_mics["imname"] == ext_mics["imname"]):
print("\nERROR: Could not match micrographs between datasets.")
print(f"\t{len(cs_mics)} mics in CryoSPARC dataset")
print(f"\t{len(ext_mics)} mics in STAR file")
print("First four micrographs from each, with rulers:", end = "\n\t")
print("CryoSPARC:", end = "\n\t")
topruler = "".join(list(str(i%10) for i in range(len(cs_mics["imname"][0]))))
print(
topruler + " trim-start",
end = "\n\t"
)
print(
"\n\t".join(cs_mics["imname"][:4]),
end = "\n\t"
)
print(
"".join(reversed(topruler)) + " trim-end",
end = "\n\n\t"
)
print("STAR file:", end = "\n\t")
topruler = "".join(list(str(i%10) for i in range(len(ext_mics.iloc[0]["imname"]))))
print(
topruler + " trim-start",
end = "\n\t"
)
print(
"\n\t".join(ext_mics.iloc[:4]["imname"]),
end = "\n\t"
)
print(
"".join(reversed(topruler)) + " trim-end",
end = "\n\n"
)
print("If these look like they should match, use the `--trim-start` and `--trim-end` parameters.")
sys.exit(2)
cs_info_access = {
"ctf/exp_group_id": lambda df: df["rlnOpticsGroup"],
"ctf/accel_kv": lambda df: df["rlnVoltage"],
"ctf/cs_mm": lambda df: df["rlnSphericalAberration"],
"ctf/amp_contrast": lambda df: df["rlnAmplitudeContrast"],
"ctf/df1_A": lambda df: df["rlnDefocusU"],
"ctf/df2_A": lambda df: df["rlnDefocusV"],
"ctf/df_angle_rad": lambda df: np.deg2rad(df["rlnDefocusAngle"]),
"ctf/phase_shift_rad": lambda df: np.deg2rad(df.get("rlnPhaseShift", 0)),
"ctf/ctf_fit_to_A": lambda df: df["rlnCtfMaxResolution"],
}
for field, access_function in cs_info_access.items():
cs_mics[field] = access_function(ext_mics)
cs_mics.drop_fields("imname")
ext_job = project.create_external_job(
workspace_uid = max(job.doc["workspace_uids"]),
title = "External CTF import",
desc = f"Import external CTF values from {args.starfile_path}"
)
ext_job.add_input(
type = "exposure",
name = "micrographs",
title = "Micrographs"
)
ext_job.connect(
target_input = "micrographs",
source_job_uid = juid,
source_output = args.exposure_name
)
ext_job.add_output(
type = "exposure",
name = "micrographs",
passthrough = "micrographs",
slots = ["ctf"],
title = "Micrographs",
alloc = cs_mics
)
with ext_job.run():
ext_job.save_output("micrographs", cs_mics)
job.log(f"CTF values added from {args.starfile_path}")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"puid",
help = "Project UID for micrograph import job in CryoSPARC"
)
parser.add_argument(
"juid",
help = "Job UID for micrograph import job in CryoSPARC"
)
parser.add_argument(
"starfile_path",
help = "Path to starfile containing CTF estimates. This starfile must contain *all* of the input micrographs and *no others*."
)
parser.add_argument(
"--exposure-name",
default = "micrographs",
help = "Name of the exposures output in the CryoSPARC job. Default 'micrographs'."
)
parser.add_argument(
"--kv",
help = "If accelerating voltage is not in starfile optics table, use this value.",
type = float
)
parser.add_argument(
"--cs",
help = "If spherical aberration is not in starfile optics table, use this value.",
type = float
)
parser.add_argument(
"--amp",
help = "If amplitude contrast is not in starfile optics table, use this value.",
type = float
)
parser.add_argument(
"--trim-start",
help = "Number of characters to trim from the start of the CryoSPARC and STAR micrograph filenames, in that order.",
nargs = 2,
type = int,
default = [0, 0]
)
parser.add_argument(
"--trim-end",
help = "Number of characters to trim from the end of the CryoSPARC and STAR micrograph filenames, in that order.",
nargs = 2,
type = int,
default = [None, None]
)
args = parser.parse_args()
main(args)