Hi,
Is there any way (maybe CS tools?) to assign custom half-sets for particles?
I can assign custom half maps using the low level results interface, but would like (for a custom workflow) to assign custom half-sets (after treating two halves of the data independently in ab initio/local refinement, for example).
Using custom half maps works fine for just calculating a straight FSC, but does not support orientation diagnostics, local refinement, etc
Cheers
Oli
EDIT: Ignore these and use the one Rich has linked to below!
Hacked together chatGPT solution for this - have done some testing and seems to give sensible results (after quite a few iterations that did not), but I need to pull it apart and do more extensive testing, but it is a starting point:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Combine two particle sets and enforce half-sets by overwriting an EXISTING
column CryoSPARC v4.x actually uses. Priority:
1) alignments3D/split (u4 on v4.x; used by Reconstruct Only)
2) alignments3D/halfset
3) split (top-level)
The script:
• loads two particle outputs
• finds the first existing column above (with its original dtype)
• concatenates A+B
• overwrites that column to A->0 and B->1
• if the other two names exist in either input, mirrors the same 0/1 values
using their own original dtypes
• declares only blob/ctf/alignments3D slots
• reloads saved output and reports counts for alignments3D/split if present,
else for the canonical column it used
"""
from pathlib import Path
import json
import argparse
import numpy as np
# Robust imports for v4/v5
try:
from cryosparc.tools import CryoSPARC, Dataset, APIError
except Exception:
from cryosparc.tools import CryoSPARC, Dataset
try:
from cryosparc.errors import APIError
except Exception:
class APIError(Exception): pass
# ---------------- CLI ----------------
p = argparse.ArgumentParser(
description="Combine two CryoSPARC particle sets and set half-sets (A->0, B->1) by overwriting an existing column."
)
p.add_argument("--project","-p",required=True)
p.add_argument("--workspace","-w",required=True)
p.add_argument("--jobA","-a",required=True)
p.add_argument("--jobB","-b",required=True)
p.add_argument("--outputA","-oa",default=None)
p.add_argument("--outputB","-ob",default=None)
p.add_argument("--title","-t",default=None)
p.add_argument("--instance-info","-i",default=str(Path.home()/ "instance_info.json"))
p.add_argument("--allow-mixed-psize",action="store_true")
args = p.parse_args()
print(">> combine_halfsets_align3d_split_fixed starting...")
# ---------------- Connect ----------------
data = json.loads(Path(args.instance_info).read_text())
cs = CryoSPARC(**data)
if not cs.test_connection():
raise RuntimeError("Could not connect to CryoSPARC")
project = cs.find_project(args.project)
if project is None:
raise RuntimeError(f"Project not found: {args.project}")
jobA = project.find_job(args.jobA)
jobB = project.find_job(args.jobB)
if jobA is None or jobB is None:
raise RuntimeError("Could not find one or both jobs in the project")
# ---------------- Load particle outputs ----------------
COMMON = ["particles","particles_selected","particles_keep","particles_out","particles_split"]
def try_load(j, nm):
try:
ds = j.load_output(nm)
if ds is None or len(ds)==0: return None
if "blob/path" not in ds.fields(): return None
return ds
except Exception:
return None
def autodetect(j, forced=None):
if forced:
ds = try_load(j, forced)
if ds is None: raise RuntimeError(f"{j.uid}: cannot load '{forced}'")
return forced, ds
for nm in COMMON:
ds = try_load(j, nm)
if ds is not None: return nm, ds
raise RuntimeError(f"{j.uid}: no usable particle output among {COMMON}")
outA_name, A = autodetect(jobA, args.outputA)
outB_name, B = autodetect(jobB, args.outputB)
nA, nB = len(A), len(B)
print(f"Loaded: {jobA.uid}:{outA_name} n={nA:,} | {jobB.uid}:{outB_name} n={nB:,}")
if nA==0 or nB==0:
raise RuntimeError("One of the inputs is empty.")
fA, fB = set(A.fields()), set(B.fields())
# Required fields
req = {"blob/path","blob/psize_A","alignments3D/pose","alignments3D/shift"}
missA = req - fA; missB = req - fB
if missA or missB:
raise RuntimeError(f"Missing required fields. A:{sorted(missA)} B:{sorted(missB)}")
# Pixel size consistency
if not args.allow_mixed_psize:
uA = np.unique(np.asarray(A["blob/psize_A"]).ravel())
uB = np.unique(np.asarray(B["blob/psize_A"]).ravel())
if len(uA)!=1 or len(uB)!=1 or uA[0]!=uB[0]:
raise RuntimeError(f"Pixel sizes differ (A:{uA}, B:{uB}). Re-extract/rebin or use --allow-mixed-psize.")
# ---------------- Choose canonical column ----------------
pref_order = ["alignments3D/split", "alignments3D/halfset", "split"] # v4 uses alignments3D/split
canon_name = None
canon_dtype = None
for nm in pref_order:
if nm in fA:
canon_name = nm; canon_dtype = A[nm].dtype; break
if nm in fB:
canon_name = nm; canon_dtype = B[nm].dtype; break
if canon_name is None:
raise RuntimeError(
"No existing half-set column found ('alignments3D/split', 'alignments3D/halfset', or 'split'). "
"On v4.x, Reconstruct expects 'alignments3D/split' in the alignments3D group."
)
print(f"Canonical column: '{canon_name}' dtype={canon_dtype}")
# ---------------- Build combined dataset ----------------
def tail(arr):
try: return tuple(arr.shape[1:])
except Exception: return ()
def empty_like(field, ref, n):
tsh = tail(ref)
try: dt = ref.dtype
except Exception: dt = np.float32
if np.issubdtype(getattr(dt,"type",dt), np.floating):
return np.full((n,)+tsh, 0.0, dtype=dt)
if np.issubdtype(getattr(dt,"type",dt), np.integer):
return np.zeros((n,)+tsh, dtype=dt)
arr = np.empty((n,)+tsh, dtype=dt)
try: arr[...] = None
except Exception:
arr = np.empty((n,)+tsh, dtype=object); arr[...] = None
return arr
def empty_for(field, refds, n):
if field in refds.fields():
return empty_like(field, refds[field], n)
return np.array([None]*n, dtype=object)
# ensure blob/idx
idxA = A["blob/idx"] if "blob/idx" in fA else np.zeros(nA, dtype=np.int64)
idxB = B["blob/idx"] if "blob/idx" in fB else np.zeros(nB, dtype=np.int64)
# union of fields; we'll overwrite split fields after concat
union = sorted(fA.union(fB))
cols = {}
for f in union:
colA = A[f] if f in fA else empty_for(f, B, nA)
colB = B[f] if f in fB else empty_for(f, A, nB)
if f=="blob/idx": colA,colB = idxA,idxB
if tail(colA)!=tail(colB):
continue
cols[f] = np.concatenate([np.asarray(colA), np.asarray(colB)], axis=0)
N = nA + nB
# ---------------- Overwrite canonical column to A->0, B->1 ----------------
new_values = np.empty(N, dtype=canon_dtype)
new_values[:nA] = 0
new_values[nA:] = 1
cols[canon_name] = new_values
# If the other names exist in either input, mirror values there too (with THEIR dtypes)
other_names = [nm for nm in pref_order if nm != canon_name]
for nm in other_names:
if nm in (fA | fB):
dt = (A[nm].dtype if nm in fA else B[nm].dtype)
tmp = np.empty(N, dtype=dt)
tmp[:nA]=0; tmp[nA:]=1
cols[nm] = tmp
combined = Dataset(sorted(cols.items(), key=lambda kv: kv[0]))
# ---------------- Sanity check ----------------
def counts(arr):
u,c = np.unique(arr, return_counts=True)
return {int(k): int(v) for k,v in zip(u,c)}
if "alignments3D/split" in combined.fields():
c_pre = counts(combined["alignments3D/split"])
print(f"alignments3D/split counts (pre-save): {c_pre} (expected 0:{nA}, 1:{nB})")
else:
c_pre = counts(combined[canon_name])
print(f"{canon_name} counts (pre-save): {c_pre} (expected 0:{nA}, 1:{nB})")
# ---------------- External job save ----------------
title = args.title or f"Combined half-sets from {jobA.uid} ({outA_name}) + {jobB.uid} ({outB_name})"
ej = project.create_external_job(workspace_uid=args.workspace, title=title)
# minimal slots
try:
ver = cs.version(); major = int(str(ver).lstrip('v').split('.')[0])
except Exception:
major = 4
IS_V5 = major >= 5
if IS_V5:
slots_in = [{"name":"blob","dtype":"particle_blob"},
{"name":"ctf","dtype":"ctf"},
{"name":"alignments3D","dtype":"alignments3D"}]
slots_out = [{"name":"blob","dtype":"particle_blob"},
{"name":"ctf","dtype":"ctf"},
{"name":"alignments3D","dtype":"alignments3D"}]
else:
slots_in = ["blob","ctf","alignments3D"]
slots_out = ["blob","ctf","alignments3D"]
ej.add_input(type="particle", name="input_particles_A", slots=slots_in)
ej.add_input(type="particle", name="input_particles_B", slots=slots_in)
ej.connect(target_input="input_particles_A", source_job_uid=jobA.uid, source_output=outA_name)
ej.connect(target_input="input_particles_B", source_job_uid=jobB.uid, source_output=outB_name)
ej.add_output(type="particle", name="particles", slots=slots_out, alloc=combined)
with ej.run():
ej.log(f"Canonical column overwritten: '{canon_name}' -> A:0, B:1 (dtype {canon_dtype})")
if "alignments3D/split" in combined.fields():
ej.log(f"alignments3D/split counts (pre-save): {c_pre}")
else:
ej.log(f"{canon_name} counts (pre-save): {c_pre}")
ej.save_output("particles", combined)
ej.stop()
# ---------------- Verify after reload ----------------
try:
saved = project.find_job(ej.uid).load_output("particles")
if "alignments3D/split" in saved.fields():
c_post = counts(saved["alignments3D/split"])
print(f"[VERIFY] alignments3D/split counts (post-save): {c_post}")
else:
c_post = counts(saved[canon_name])
print(f"[VERIFY] {canon_name} counts (post-save): {c_post}")
except Exception as e:
print(f"[VERIFY] Reload failed: {e}")
print(f"Created External Job: '{title}'")
print("Done.")
EDIT2:
Simpler version with uneccessary cruft removed:
#!/usr/bin/env python3
# Combine two particle sets and enforce half-sets via alignments3D/split (A->0, B->1).
# Works with CryoSPARC v4.7x and v5. Minimal, dtype-agnostic (reuses input dtype).
from pathlib import Path
import json
import argparse
import numpy as np
try:
from cryosparc.tools import CryoSPARC, Dataset, APIError
except Exception:
from cryosparc.tools import CryoSPARC, Dataset
try:
from cryosparc.errors import APIError
except Exception:
class APIError(Exception): pass
CANDIDATE_OUTPUTS = ("particles", "particles_selected", "particles_keep", "particles_out", "particles_split")
WANTED_PREFIXES = ("blob/", "ctf/", "alignments3D/")
def autodetect_particles(job):
for nm in CANDIDATE_OUTPUTS:
try:
ds = job.load_output(nm)
if ds and len(ds) and "blob/path" in ds.fields():
return nm, ds
except Exception:
pass
raise RuntimeError(f"{job.uid}: no usable particle output found.")
def trailing_shape(arr):
try: return arr.shape[1:]
except Exception: return ()
def empty_like(ref, n):
tsh = trailing_shape(ref)
dt = getattr(ref, "dtype", object)
if hasattr(dt, "kind") and dt.kind in "iu":
return np.zeros((n,)+tsh, dtype=dt)
if hasattr(dt, "kind") and dt.kind == "f":
return np.zeros((n,)+tsh, dtype=dt)
out = np.empty((n,)+tsh, dtype=object); out[...] = None
return out
def empty_for(field, refds, n):
return empty_like(refds[field], n) if field in refds.fields() else np.array([None]*n, dtype=object)
def counts(arr):
u, c = np.unique(arr, return_counts=True)
return {int(k): int(v) for k, v in zip(u, c)}
# ---------------- CLI ----------------
p = argparse.ArgumentParser(description="Combine two CryoSPARC particle sets and enforce half-sets via alignments3D/split.")
p.add_argument("--project","-p", required=True)
p.add_argument("--workspace","-w", required=True)
p.add_argument("--jobA","-a", required=True, help="First job (becomes half-set 0)")
p.add_argument("--jobB","-b", required=True, help="Second job (becomes half-set 1)")
p.add_argument("--title","-t", default=None)
p.add_argument("--instance-info","-i", default=str(Path.home()/ "instance_info.json"))
args = p.parse_args()
print(">> combine_halfsets_min starting...")
# Connect
cfg = json.loads(Path(args.instance_info).read_text())
cs = CryoSPARC(**cfg)
if not cs.test_connection(): raise RuntimeError("Could not connect to CryoSPARC")
project = cs.find_project(args.project) or (_ for _ in ()).throw(RuntimeError(f"Project not found: {args.project}"))
jobA = project.find_job(args.jobA) or (_ for _ in ()).throw(RuntimeError(f"Job not found: {args.jobA}"))
jobB = project.find_job(args.jobB) or (_ for _ in ()).throw(RuntimeError(f"Job not found: {args.jobB}"))
# Load inputs
outA_name, A = autodetect_particles(jobA)
outB_name, B = autodetect_particles(jobB)
nA, nB = len(A), len(B)
print(f"Loaded: {jobA.uid}:{outA_name} n={nA:,} | {jobB.uid}:{outB_name} n={nB:,}")
if nA == 0 or nB == 0: raise RuntimeError("One input is empty.")
fA, fB = set(A.fields()), set(B.fields())
# Sanity fields
req = {"blob/path", "blob/psize_A", "alignments3D/pose", "alignments3D/shift"}
missA, missB = req - fA, req - fB
if missA or missB:
raise RuntimeError(f"Missing required fields. A:{sorted(missA)} B:{sorted(missB)}")
# Strict pixel-size equality
uA = np.unique(np.asarray(A["blob/psize_A"]).ravel())
uB = np.unique(np.asarray(B["blob/psize_A"]).ravel())
if len(uA) != 1: raise RuntimeError(f"Set A has mixed pixel sizes: {uA}")
if len(uB) != 1: raise RuntimeError(f"Set B has mixed pixel sizes: {uB}")
if uA[0] != uB[0]: raise RuntimeError(f"Pixel sizes differ (A:{uA[0]}, B:{uB[0]}). ")
# Require alignments3D/split in at least one input
if "alignments3D/split" not in (fA | fB):
raise RuntimeError("Neither input has 'alignments3D/split'.")
split_dtype = (A["alignments3D/split"].dtype if "alignments3D/split" in fA else B["alignments3D/split"].dtype)
# Build combined table with only wanted groups and compatible trailing shapes
keepA = [f for f in fA if f.startswith(WANTED_PREFIXES)]
keepB = [f for f in fB if f.startswith(WANTED_PREFIXES)]
union = sorted(set(keepA).union(keepB))
# Ensure blob/idx exists on both sides
idxA = A["blob/idx"] if "blob/idx" in fA else np.zeros(nA, dtype=np.int64)
idxB = B["blob/idx"] if "blob/idx" in fB else np.zeros(nB, dtype=np.int64)
cols = {}
for f in union:
colA = A[f] if f in fA else empty_for(f, B, nA)
colB = B[f] if f in fB else empty_for(f, A, nB)
if f == "blob/idx": colA, colB = idxA, idxB
if trailing_shape(colA) != trailing_shape(colB):
continue
cols[f] = np.concatenate([np.asarray(colA), np.asarray(colB)], axis=0)
# Overwrite alignments3D/split to A->0, B->1 using the existing dtype
N = nA + nB
new_split = np.empty(N, dtype=split_dtype)
new_split[:nA] = 0
new_split[nA:] = 1
cols["alignments3D/split"] = new_split
combined = Dataset(sorted(cols.items(), key=lambda kv: kv[0]))
report = counts(combined["alignments3D/split"])
print(f"alignments3D/split (pre-save): {report} (expected 0:{nA}, 1:{nB})")
# External job
title = args.title or f"Combined half-sets from {jobA.uid} ({outA_name}) + {jobB.uid} ({outB_name})"
ej = project.create_external_job(workspace_uid=args.workspace, title=title)
try:
major = int(str(cs.version()).lstrip("v").split(".")[0])
except Exception:
major = 4
if major >= 5:
in_slots = [{"name":"blob","dtype":"particle_blob"},
{"name":"ctf","dtype":"ctf"},
{"name":"alignments3D","dtype":"alignments3D"}]
out_slots = [{"name":"blob","dtype":"particle_blob"},
{"name":"ctf","dtype":"ctf"},
{"name":"alignments3D","dtype":"alignments3D"}]
else:
in_slots, out_slots = ["blob","ctf","alignments3D"], ["blob","ctf","alignments3D"]
ej.add_input(type="particle", name="input_particles_A", slots=in_slots)
ej.add_input(type="particle", name="input_particles_B", slots=in_slots)
ej.connect(target_input="input_particles_A", source_job_uid=jobA.uid, source_output=outA_name)
ej.connect(target_input="input_particles_B", source_job_uid=jobB.uid, source_output=outB_name)
ej.add_output(type="particle", name="particles", slots=out_slots, alloc=combined)
with ej.run():
ej.log(f"Overwrote alignments3D/split → A:0 (n={nA:,}), B:1 (n={nB:,})")
ej.log(f"alignments3D/split counts (pre-save): {report}")
ej.save_output("particles", combined)
ej.stop()
# Verify
try:
saved = project.find_job(ej.uid).load_output("particles")
post = counts(saved["alignments3D/split"])
print(f"[VERIFY] alignments3D/split (post-save): {post}")
except Exception as e:
print(f"[VERIFY] reload failed: {e}")
print(f"Created External Job: '{title}'")
print("Done.")