Error in Deep Picker Train

Hi All,

I am running into the following error after data splitting and augmentation. Any ideas what could be wrong?

Traceback (most recent call last):
  File "cryosparc_master/cryosparc_compute/run.py", line 95, in cryosparc_master.cryosparc_compute.run.main
  File "cryosparc_master/cryosparc_compute/jobs/deep_picker/run_deep_picker.py", line 275, in cryosparc_master.cryosparc_compute.jobs.deep_picker.run_deep_picker.run_deep_picker_train
  File "cryosparc_master/cryosparc_compute/jobs/deep_picker/train.py", line 56, in cryosparc_master.cryosparc_compute.jobs.deep_picker.train.train_picker
  File "cryosparc_master/cryosparc_compute/jobs/deep_picker/train.py", line 121, in cryosparc_master.cryosparc_compute.jobs.deep_picker.train.train_picker
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 793, in from_tensor_slices
    return TensorSliceDataset(tensors, name=name)
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 4477, in __init__
    element = structure.normalize_element(element)
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/data/util/structure.py", line 125, in normalize_element
    ops.convert_to_tensor(t, name="component_%d" % i, dtype=dtype))
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/profiler/trace.py", line 183, in wrapped
    return func(*args, **kwargs)
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 1695, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/framework/tensor_conversion_registry.py", line 48, in _default_conversion_function
    return constant_op.constant(value, dtype, name=name)
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 267, in constant
    return _constant_impl(value, dtype, shape, name, verify_shape=False,
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 279, in _constant_impl
    return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 304, in _constant_eager_impl
    t = convert_to_eager_tensor(value, ctx, dtype)
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 102, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)
tensorflow.python.framework.errors_impl.InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

Thank you in advance for any assistance!

and the log (part 1)


================= CRYOSPARCW =======  2024-04-16 22:12:35.640068  =========
Project P4 Job J411
Master laue Port 39002
===========================================================================
========= monitor process now starting main process at 2024-04-16 22:12:35.640205
MAINPROCESS PID 132655
MAIN PID 132655
deep_picker.run_deep_picker cryosparc_compute.jobs.jobregister
========= monitor process now waiting for main process
========= sending heartbeat at 2024-04-16 22:12:57.328795
***************************************************************
Running job on hostname %s laue
Allocated Resources :  {'fixed': {'SSD': False}, 'hostname': 'laue', 'lane': 'default', 'lane_type': 'node', 'license': True, 'licenses_acquired': 4, 'slots': {'CPU': [0, 1, 2, 3], 'GPU': [0, 1, 2, 3], 'RAM': [0, 1, 2, 3, 4]}, 'target': {'cache_path': '/data/cryosparc/cryosparc_scratch', 'cache_quota_mb': None, 'cache_reserve_mb': 10000, 'desc': None, 'gpus': [{'id': 0, 'mem': 51049857024, 'name': 'NVIDIA RTX A6000'}, {'id': 1, 'mem': 51050250240, 'name': 'NVIDIA RTX A6000'}, {'id': 2, 'mem': 51050250240, 'name': 'NVIDIA RTX A6000'}, {'id': 3, 'mem': 51050250240, 'name': 'NVIDIA RTX A6000'}], 'hostname': 'laue', 'lane': 'default', 'monitor_port': None, 'name': 'laue', 'resource_fixed': {'SSD': True}, 'resource_slots': {'CPU': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95], 'GPU': [0, 1, 2, 3], 'RAM': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]}, 'ssh_str': 'cryosparc@laue', 'title': 'Worker node laue', 'type': 'node', 'worker_bin_path': '/data/cryosparc/cryosparc_worker/bin/cryosparcw'}}
/data/cryosparc/cryosparc_worker/cryosparc_compute/jobs/motioncorrection/mic_utils.py:95: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @jit(nogil=True)
/data/cryosparc/cryosparc_worker/cryosparc_compute/micrographs.py:563: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  def contrast_normalization(arr_bin, tile_size = 128):
========= sending heartbeat at 2024-04-16 22:13:07.356045
========= sending heartbeat at 2024-04-16 22:13:17.378370
========= sending heartbeat at 2024-04-16 22:13:27.403867
========= sending heartbeat at 2024-04-16 22:13:37.431766
========= sending heartbeat at 2024-04-16 22:13:47.457951
========= sending heartbeat at 2024-04-16 22:13:57.483336
========= sending heartbeat at 2024-04-16 22:14:07.519228
========= sending heartbeat at 2024-04-16 22:14:17.545052
========= sending heartbeat at 2024-04-16 22:14:27.573283
========= sending heartbeat at 2024-04-16 22:14:37.599625
========= sending heartbeat at 2024-04-16 22:14:47.625984
========= sending heartbeat at 2024-04-16 22:14:57.653187
========= sending heartbeat at 2024-04-16 22:15:07.678762
========= sending heartbeat at 2024-04-16 22:15:17.705505
========= sending heartbeat at 2024-04-16 22:15:27.732402
========= sending heartbeat at 2024-04-16 22:15:37.761068
========= sending heartbeat at 2024-04-16 22:15:47.787511
========= sending heartbeat at 2024-04-16 22:15:57.814052
========= sending heartbeat at 2024-04-16 22:16:07.841398
========= sending heartbeat at 2024-04-16 22:16:17.868922
========= sending heartbeat at 2024-04-16 22:16:27.895398
========= sending heartbeat at 2024-04-16 22:16:37.922470
========= sending heartbeat at 2024-04-16 22:16:47.949428
========= sending heartbeat at 2024-04-16 22:16:57.975698
========= sending heartbeat at 2024-04-16 22:17:08.001908
========= sending heartbeat at 2024-04-16 22:17:18.029030
========= sending heartbeat at 2024-04-16 22:17:28.055439
========= sending heartbeat at 2024-04-16 22:17:38.082050
========= sending heartbeat at 2024-04-16 22:17:48.109162
========= sending heartbeat at 2024-04-16 22:17:58.137788
========= sending heartbeat at 2024-04-16 22:18:08.165572
========= sending heartbeat at 2024-04-16 22:18:18.186083
========= sending heartbeat at 2024-04-16 22:18:28.214099
========= sending heartbeat at 2024-04-16 22:18:38.240401
========= sending heartbeat at 2024-04-16 22:18:48.267578
========= sending heartbeat at 2024-04-16 22:18:58.294564
========= sending heartbeat at 2024-04-16 22:19:08.322539
========= sending heartbeat at 2024-04-16 22:19:18.348680
========= sending heartbeat at 2024-04-16 22:19:28.375634
========= sending heartbeat at 2024-04-16 22:19:38.402049
========= sending heartbeat at 2024-04-16 22:19:48.428231
========= sending heartbeat at 2024-04-16 22:19:58.446365
========= sending heartbeat at 2024-04-16 22:20:08.472411
========= sending heartbeat at 2024-04-16 22:20:18.499413
========= sending heartbeat at 2024-04-16 22:20:28.526485
========= sending heartbeat at 2024-04-16 22:20:38.550987
========= sending heartbeat at 2024-04-16 22:20:48.578760
========= sending heartbeat at 2024-04-16 22:20:58.604281
========= sending heartbeat at 2024-04-16 22:21:08.630862
========= sending heartbeat at 2024-04-16 22:21:18.657359
========= sending heartbeat at 2024-04-16 22:21:28.683326
========= sending heartbeat at 2024-04-16 22:21:38.709326
========= sending heartbeat at 2024-04-16 22:21:48.737763
========= sending heartbeat at 2024-04-16 22:21:58.771548
========= sending heartbeat at 2024-04-16 22:22:08.800838
========= sending heartbeat at 2024-04-16 22:22:18.826209
========= sending heartbeat at 2024-04-16 22:22:28.853207
========= sending heartbeat at 2024-04-16 22:22:38.879823
========= sending heartbeat at 2024-04-16 22:22:48.905865
========= sending heartbeat at 2024-04-16 22:22:58.932514
========= sending heartbeat at 2024-04-16 22:23:08.960851
========= sending heartbeat at 2024-04-16 22:23:18.987317
========= sending heartbeat at 2024-04-16 22:23:29.016335
========= sending heartbeat at 2024-04-16 22:23:39.042368
========= sending heartbeat at 2024-04-16 22:23:49.068754
========= sending heartbeat at 2024-04-16 22:23:59.089129
========= sending heartbeat at 2024-04-16 22:24:09.116510
========= sending heartbeat at 2024-04-16 22:24:19.133901
========= sending heartbeat at 2024-04-16 22:24:29.160925
========= sending heartbeat at 2024-04-16 22:24:39.188198
========= sending heartbeat at 2024-04-16 22:24:49.214391
========= sending heartbeat at 2024-04-16 22:24:59.241227
========= sending heartbeat at 2024-04-16 22:25:09.268996
========= sending heartbeat at 2024-04-16 22:25:19.295854
========= sending heartbeat at 2024-04-16 22:25:29.328792
========= sending heartbeat at 2024-04-16 22:25:39.363113
========= sending heartbeat at 2024-04-16 22:25:49.389040
========= sending heartbeat at 2024-04-16 22:25:59.415542
========= sending heartbeat at 2024-04-16 22:26:09.445592
========= sending heartbeat at 2024-04-16 22:26:19.471534
========= sending heartbeat at 2024-04-16 22:26:29.497127
========= sending heartbeat at 2024-04-16 22:26:39.522977
========= sending heartbeat at 2024-04-16 22:26:49.553318
========= sending heartbeat at 2024-04-16 22:26:59.588099
========= sending heartbeat at 2024-04-16 22:27:09.613993
========= sending heartbeat at 2024-04-16 22:27:19.640965
========= sending heartbeat at 2024-04-16 22:27:29.666302
========= sending heartbeat at 2024-04-16 22:27:39.691536
========= sending heartbeat at 2024-04-16 22:27:49.717460
========= sending heartbeat at 2024-04-16 22:27:59.743522
========= sending heartbeat at 2024-04-16 22:28:09.769418
========= sending heartbeat at 2024-04-16 22:28:19.792377
========= sending heartbeat at 2024-04-16 22:28:29.817336
========= sending heartbeat at 2024-04-16 22:28:39.842771
========= sending heartbeat at 2024-04-16 22:28:49.869409
========= sending heartbeat at 2024-04-16 22:28:59.904462
========= sending heartbeat at 2024-04-16 22:29:09.929439
========= sending heartbeat at 2024-04-16 22:29:19.954017
========= sending heartbeat at 2024-04-16 22:29:29.986709
========= sending heartbeat at 2024-04-16 22:29:40.012338
========= sending heartbeat at 2024-04-16 22:29:50.037764
========= sending heartbeat at 2024-04-16 22:30:00.063851
========= sending heartbeat at 2024-04-16 22:30:10.089383
========= sending heartbeat at 2024-04-16 22:30:20.116392
========= sending heartbeat at 2024-04-16 22:30:30.132766
========= sending heartbeat at 2024-04-16 22:30:40.159335
========= sending heartbeat at 2024-04-16 22:30:50.184900
========= sending heartbeat at 2024-04-16 22:31:00.210169
========= sending heartbeat at 2024-04-16 22:31:10.236035
========= sending heartbeat at 2024-04-16 22:31:20.262056
========= sending heartbeat at 2024-04-16 22:31:30.287653
========= sending heartbeat at 2024-04-16 22:31:40.313934
========= sending heartbeat at 2024-04-16 22:31:50.339289
========= sending heartbeat at 2024-04-16 22:32:00.364663
========= sending heartbeat at 2024-04-16 22:32:10.390023
========= sending heartbeat at 2024-04-16 22:32:20.416998
========= sending heartbeat at 2024-04-16 22:32:30.442121
========= sending heartbeat at 2024-04-16 22:32:40.468291
========= sending heartbeat at 2024-04-16 22:32:50.493866
========= sending heartbeat at 2024-04-16 22:33:00.518892
========= sending heartbeat at 2024-04-16 22:33:10.543317
========= sending heartbeat at 2024-04-16 22:33:20.568980
========= sending heartbeat at 2024-04-16 22:33:30.595273
========= sending heartbeat at 2024-04-16 22:33:40.623434
========= sending heartbeat at 2024-04-16 22:33:50.651313
========= sending heartbeat at 2024-04-16 22:34:00.677461
========= sending heartbeat at 2024-04-16 22:34:11.696868
========= sending heartbeat at 2024-04-16 22:34:21.762656
========= sending heartbeat at 2024-04-16 22:34:31.788051
========= sending heartbeat at 2024-04-16 22:34:41.815839
========= sending heartbeat at 2024-04-16 22:34:51.841874
========= sending heartbeat at 2024-04-16 22:35:01.862851
========= sending heartbeat at 2024-04-16 22:35:11.890041
========= sending heartbeat at 2024-04-16 22:35:21.916498
========= sending heartbeat at 2024-04-16 22:35:32.389256
========= sending heartbeat at 2024-04-16 22:35:42.414832
========= sending heartbeat at 2024-04-16 22:35:52.440893
========= sending heartbeat at 2024-04-16 22:36:02.467906
========= sending heartbeat at 2024-04-16 22:36:12.494599
========= sending heartbeat at 2024-04-16 22:36:22.520841
========= sending heartbeat at 2024-04-16 22:36:32.546699
========= sending heartbeat at 2024-04-16 22:36:42.572325
========= sending heartbeat at 2024-04-16 22:36:52.598004
========= sending heartbeat at 2024-04-16 22:37:02.623751
========= sending heartbeat at 2024-04-16 22:37:12.649364
========= sending heartbeat at 2024-04-16 22:37:22.675846
========= sending heartbeat at 2024-04-16 22:37:32.701429
========= sending heartbeat at 2024-04-16 22:37:42.729495
========= sending heartbeat at 2024-04-16 22:37:52.755109
========= sending heartbeat at 2024-04-16 22:38:02.780524
========= sending heartbeat at 2024-04-16 22:38:12.808828
========= sending heartbeat at 2024-04-16 22:38:22.834716
========= sending heartbeat at 2024-04-16 22:38:32.860699
========= sending heartbeat at 2024-04-16 22:38:42.886686
========= sending heartbeat at 2024-04-16 22:38:52.912448
========= sending heartbeat at 2024-04-16 22:39:02.937803
========= sending heartbeat at 2024-04-16 22:39:12.963116
========= sending heartbeat at 2024-04-16 22:39:22.988338
========= sending heartbeat at 2024-04-16 22:39:33.007729
========= sending heartbeat at 2024-04-16 22:39:43.033393
========= sending heartbeat at 2024-04-16 22:39:53.059025
========= sending heartbeat at 2024-04-16 22:40:03.084643
========= sending heartbeat at 2024-04-16 22:40:13.104400
========= sending heartbeat at 2024-04-16 22:40:23.130464
========= sending heartbeat at 2024-04-16 22:40:33.156350
========= sending heartbeat at 2024-04-16 22:40:43.181904
========= sending heartbeat at 2024-04-16 22:40:53.207799
========= sending heartbeat at 2024-04-16 22:41:03.233921
========= sending heartbeat at 2024-04-16 22:41:13.251864
========= sending heartbeat at 2024-04-16 22:41:23.856488
========= sending heartbeat at 2024-04-16 22:41:33.882363
========= sending heartbeat at 2024-04-16 22:41:44.137493
========= sending heartbeat at 2024-04-16 22:41:54.165431
========= sending heartbeat at 2024-04-16 22:42:04.193435
========= sending heartbeat at 2024-04-16 22:42:14.221119
========= sending heartbeat at 2024-04-16 22:42:24.247023
========= sending heartbeat at 2024-04-16 22:42:34.274489
========= sending heartbeat at 2024-04-16 22:42:44.300548
========= sending heartbeat at 2024-04-16 22:42:54.332483
========= sending heartbeat at 2024-04-16 22:43:04.359533
========= sending heartbeat at 2024-04-16 22:43:14.386448
========= sending heartbeat at 2024-04-16 22:43:24.413476
========= sending heartbeat at 2024-04-16 22:43:34.482557
========= sending heartbeat at 2024-04-16 22:43:44.500862
========= sending heartbeat at 2024-04-16 22:43:54.522311
========= sending heartbeat at 2024-04-16 22:44:04.548867
========= sending heartbeat at 2024-04-16 22:44:14.575546
========= sending heartbeat at 2024-04-16 22:44:24.601939
========= sending heartbeat at 2024-04-16 22:44:34.628834
========= sending heartbeat at 2024-04-16 22:44:44.654363
========= sending heartbeat at 2024-04-16 22:44:54.679987
========= sending heartbeat at 2024-04-16 22:45:04.705686
========= sending heartbeat at 2024-04-16 22:45:14.777178
========= sending heartbeat at 2024-04-16 22:45:24.803069
========= sending heartbeat at 2024-04-16 22:45:34.829249
========= sending heartbeat at 2024-04-16 22:45:44.973224
========= sending heartbeat at 2024-04-16 22:45:55.003752
========= sending heartbeat at 2024-04-16 22:46:05.029336
========= sending heartbeat at 2024-04-16 22:46:15.055021
========= sending heartbeat at 2024-04-16 22:46:25.082746
========= sending heartbeat at 2024-04-16 22:46:35.108914
========= sending heartbeat at 2024-04-16 22:46:45.131694
========= sending heartbeat at 2024-04-16 22:46:55.155644
========= sending heartbeat at 2024-04-16 22:47:05.178558
========= sending heartbeat at 2024-04-16 22:47:15.204289
========= sending heartbeat at 2024-04-16 22:47:25.230099
========= sending heartbeat at 2024-04-16 22:47:35.454866
========= sending heartbeat at 2024-04-16 22:47:45.604098
========= sending heartbeat at 2024-04-16 22:47:55.676084
========= sending heartbeat at 2024-04-16 22:48:05.806371
========= sending heartbeat at 2024-04-16 22:48:15.832226
========= sending heartbeat at 2024-04-16 22:48:25.848881
========= sending heartbeat at 2024-04-16 22:48:36.100809
========= sending heartbeat at 2024-04-16 22:48:46.196669
========= sending heartbeat at 2024-04-16 22:48:56.245506
========= sending heartbeat at 2024-04-16 22:49:06.397496
========= sending heartbeat at 2024-04-16 22:49:16.424022
========= sending heartbeat at 2024-04-16 22:49:26.450318
========= sending heartbeat at 2024-04-16 22:49:36.475713
========= sending heartbeat at 2024-04-16 22:49:46.520100
========= sending heartbeat at 2024-04-16 22:49:56.547452
========= sending heartbeat at 2024-04-16 22:50:06.597435
========= sending heartbeat at 2024-04-16 22:50:16.625021
========= sending heartbeat at 2024-04-16 22:50:26.655236
========= sending heartbeat at 2024-04-16 22:50:36.707959
========= sending heartbeat at 2024-04-16 22:50:46.753018
========= sending heartbeat at 2024-04-16 22:50:56.797530
========= sending heartbeat at 2024-04-16 22:51:06.843900
========= sending heartbeat at 2024-04-16 22:51:16.872848
========= sending heartbeat at 2024-04-16 22:51:26.950218
========= sending heartbeat at 2024-04-16 22:51:36.988969
========= sending heartbeat at 2024-04-16 22:51:47.010275
========= sending heartbeat at 2024-04-16 22:51:57.036047
========= sending heartbeat at 2024-04-16 22:52:07.063780
========= sending heartbeat at 2024-04-16 22:52:17.090903
========= sending heartbeat at 2024-04-16 22:52:27.117973
========= sending heartbeat at 2024-04-16 22:52:37.146193
========= sending heartbeat at 2024-04-16 22:52:47.172205
========= sending heartbeat at 2024-04-16 22:52:57.199535
========= sending heartbeat at 2024-04-16 22:53:07.225812
========= sending heartbeat at 2024-04-16 22:53:17.254269
========= sending heartbeat at 2024-04-16 22:53:27.281741
========= sending heartbeat at 2024-04-16 22:53:37.309613
========= sending heartbeat at 2024-04-16 22:53:47.337465
========= sending heartbeat at 2024-04-16 22:53:57.359256
========= sending heartbeat at 2024-04-16 22:54:07.386898
========= sending heartbeat at 2024-04-16 22:54:17.413099
========= sending heartbeat at 2024-04-16 22:54:27.439603
========= sending heartbeat at 2024-04-16 22:54:37.467004
========= sending heartbeat at 2024-04-16 22:54:47.492185
========= sending heartbeat at 2024-04-16 22:54:57.517215
========= sending heartbeat at 2024-04-16 22:55:07.544499
========= sending heartbeat at 2024-04-16 22:55:17.573823
========= sending heartbeat at 2024-04-16 22:55:27.599493
========= sending heartbeat at 2024-04-16 22:55:37.627318
========= sending heartbeat at 2024-04-16 22:55:47.652059
========= sending heartbeat at 2024-04-16 22:55:57.685709
========= sending heartbeat at 2024-04-16 22:56:07.714662
========= sending heartbeat at 2024-04-16 22:56:17.741874
========= sending heartbeat at 2024-04-16 22:56:27.767783
========= sending heartbeat at 2024-04-16 22:56:37.793201
========= sending heartbeat at 2024-04-16 22:56:47.818492
========= sending heartbeat at 2024-04-16 22:56:57.845105
========= sending heartbeat at 2024-04-16 22:57:07.870391
========= sending heartbeat at 2024-04-16 22:57:17.895945
========= sending heartbeat at 2024-04-16 22:57:27.921337
========= sending heartbeat at 2024-04-16 22:57:37.948953
========= sending heartbeat at 2024-04-16 22:57:47.969779
========= sending heartbeat at 2024-04-16 22:57:57.995056
========= sending heartbeat at 2024-04-16 22:58:08.010825
========= sending heartbeat at 2024-04-16 22:58:18.036213
========= sending heartbeat at 2024-04-16 22:58:28.062138
========= sending heartbeat at 2024-04-16 22:58:38.079043
========= sending heartbeat at 2024-04-16 22:58:48.094442
========= sending heartbeat at 2024-04-16 22:58:58.119126
========= sending heartbeat at 2024-04-16 22:59:08.148703
========= sending heartbeat at 2024-04-16 22:59:18.182037
========= sending heartbeat at 2024-04-16 22:59:28.208858
========= sending heartbeat at 2024-04-16 22:59:38.235856
========= sending heartbeat at 2024-04-16 22:59:48.260295
========= sending heartbeat at 2024-04-16 22:59:58.285748
========= sending heartbeat at 2024-04-16 23:00:08.311585
========= sending heartbeat at 2024-04-16 23:00:18.339909
========= sending heartbeat at 2024-04-16 23:00:28.365457
========= sending heartbeat at 2024-04-16 23:00:38.391600
========= sending heartbeat at 2024-04-16 23:00:48.418612
========= sending heartbeat at 2024-04-16 23:00:58.448261
========= sending heartbeat at 2024-04-16 23:01:08.474821
========= sending heartbeat at 2024-04-16 23:01:18.501685
========= sending heartbeat at 2024-04-16 23:01:28.526697
========= sending heartbeat at 2024-04-16 23:01:38.550842
========= sending heartbeat at 2024-04-16 23:01:48.574879
========= sending heartbeat at 2024-04-16 23:01:58.589830
========= sending heartbeat at 2024-04-16 23:02:08.615665
========= sending heartbeat at 2024-04-16 23:02:18.641740
========= sending heartbeat at 2024-04-16 23:02:28.667473
========= sending heartbeat at 2024-04-16 23:02:38.691421
========= sending heartbeat at 2024-04-16 23:02:48.717854
========= sending heartbeat at 2024-04-16 23:02:58.743216
========= sending heartbeat at 2024-04-16 23:03:08.767643
========= sending heartbeat at 2024-04-16 23:03:18.794960
========= sending heartbeat at 2024-04-16 23:03:28.820528
========= sending heartbeat at 2024-04-16 23:03:38.843769
========= sending heartbeat at 2024-04-16 23:03:48.866825
========= sending heartbeat at 2024-04-16 23:03:58.891759
========= sending heartbeat at 2024-04-16 23:04:08.917153
========= sending heartbeat at 2024-04-16 23:04:18.942100
========= sending heartbeat at 2024-04-16 23:04:28.966936
========= sending heartbeat at 2024-04-16 23:04:38.991059
========= sending heartbeat at 2024-04-16 23:04:49.014941
========= sending heartbeat at 2024-04-16 23:04:59.039209
========= sending heartbeat at 2024-04-16 23:05:09.064503
========= sending heartbeat at 2024-04-16 23:05:19.089495
========= sending heartbeat at 2024-04-16 23:05:29.114564
========= sending heartbeat at 2024-04-16 23:05:39.139491
========= sending heartbeat at 2024-04-16 23:05:49.159471
========= sending heartbeat at 2024-04-16 23:05:59.179499
========= sending heartbeat at 2024-04-16 23:06:09.199487
========= sending heartbeat at 2024-04-16 23:06:19.223472
========= sending heartbeat at 2024-04-16 23:06:29.248444
========= sending heartbeat at 2024-04-16 23:06:39.272910
========= sending heartbeat at 2024-04-16 23:06:49.296912
========= sending heartbeat at 2024-04-16 23:06:59.321271
========= sending heartbeat at 2024-04-16 23:07:09.345646
========= sending heartbeat at 2024-04-16 23:07:19.369796
========= sending heartbeat at 2024-04-16 23:07:29.394717
========= sending heartbeat at 2024-04-16 23:07:39.419275
========= sending heartbeat at 2024-04-16 23:07:49.443100
========= sending heartbeat at 2024-04-16 23:07:59.467345
========= sending heartbeat at 2024-04-16 23:08:09.492201
========= sending heartbeat at 2024-04-16 23:08:19.519537
========= sending heartbeat at 2024-04-16 23:08:29.545485
========= sending heartbeat at 2024-04-16 23:08:39.569957
========= sending heartbeat at 2024-04-16 23:08:49.594252
========= sending heartbeat at 2024-04-16 23:08:59.619675
========= sending heartbeat at 2024-04-16 23:09:09.651898
========= sending heartbeat at 2024-04-16 23:09:19.678229
========= sending heartbeat at 2024-04-16 23:09:29.703874
========= sending heartbeat at 2024-04-16 23:09:39.727825
========= sending heartbeat at 2024-04-16 23:09:49.753856
========= sending heartbeat at 2024-04-16 23:09:59.778708
========= sending heartbeat at 2024-04-16 23:10:09.804949
========= sending heartbeat at 2024-04-16 23:10:19.834444
========= sending heartbeat at 2024-04-16 23:10:29.860428
========= sending heartbeat at 2024-04-16 23:10:39.886272
========= sending heartbeat at 2024-04-16 23:10:49.911878
========= sending heartbeat at 2024-04-16 23:10:59.936546
========= sending heartbeat at 2024-04-16 23:11:09.961898
========= sending heartbeat at 2024-04-16 23:11:19.987411
========= sending heartbeat at 2024-04-16 23:11:30.012512
========= sending heartbeat at 2024-04-16 23:11:40.037402
========= sending heartbeat at 2024-04-16 23:11:50.062470
========= sending heartbeat at 2024-04-16 23:12:00.087878
========= sending heartbeat at 2024-04-16 23:12:10.112681
========= sending heartbeat at 2024-04-16 23:12:20.137864
========= sending heartbeat at 2024-04-16 23:12:30.163075
========= sending heartbeat at 2024-04-16 23:12:40.188185
========= sending heartbeat at 2024-04-16 23:12:50.213140
========= sending heartbeat at 2024-04-16 23:13:00.238278
========= sending heartbeat at 2024-04-16 23:13:10.263535
========= sending heartbeat at 2024-04-16 23:13:20.289100
========= sending heartbeat at 2024-04-16 23:13:30.314277
========= sending heartbeat at 2024-04-16 23:13:40.340557
========= sending heartbeat at 2024-04-16 23:13:50.365496
========= sending heartbeat at 2024-04-16 23:14:00.388923
========= sending heartbeat at 2024-04-16 23:14:10.409080
========= sending heartbeat at 2024-04-16 23:14:20.429456
========= sending heartbeat at 2024-04-16 23:14:30.449523
========= sending heartbeat at 2024-04-16 23:14:40.475827
========= sending heartbeat at 2024-04-16 23:14:50.501937
========= sending heartbeat at 2024-04-16 23:15:00.531371
========= sending heartbeat at 2024-04-16 23:15:10.559420
========= sending heartbeat at 2024-04-16 23:15:20.584849
========= sending heartbeat at 2024-04-16 23:15:30.612772
========= sending heartbeat at 2024-04-16 23:15:40.637962
========= sending heartbeat at 2024-04-16 23:15:50.662950
========= sending heartbeat at 2024-04-16 23:16:00.687474
========= sending heartbeat at 2024-04-16 23:16:10.712141
========= sending heartbeat at 2024-04-16 23:16:20.737015
========= sending heartbeat at 2024-04-16 23:16:30.754814
========= sending heartbeat at 2024-04-16 23:16:40.781287
========= sending heartbeat at 2024-04-16 23:16:50.814061
========= sending heartbeat at 2024-04-16 23:17:00.839333
========= sending heartbeat at 2024-04-16 23:17:10.863063
========= sending heartbeat at 2024-04-16 23:17:20.882973
========= sending heartbeat at 2024-04-16 23:17:30.902950
========= sending heartbeat at 2024-04-16 23:17:40.923509
========= sending heartbeat at 2024-04-16 23:17:50.948451
========= sending heartbeat at 2024-04-16 23:18:00.973071
========= sending heartbeat at 2024-04-16 23:18:10.999058
========= sending heartbeat at 2024-04-16 23:18:21.026188
========= sending heartbeat at 2024-04-16 23:18:31.051163
2024-04-16 23:18:39.047168: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
========= sending heartbeat at 2024-04-16 23:18:41.076349
2024-04-16 23:18:45.354177: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 46702 MB memory:  -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:3b:00.0, compute capability: 8.6
2024-04-16 23:18:45.361049: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 46721 MB memory:  -> device: 1, name: NVIDIA RTX A6000, pci bus id: 0000:5e:00.0, compute capability: 8.6
2024-04-16 23:18:45.362537: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 46721 MB memory:  -> device: 2, name: NVIDIA RTX A6000, pci bus id: 0000:af:00.0, compute capability: 8.6
2024-04-16 23:18:45.363935: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:3 with 46721 MB memory:  -> device: 3, name: NVIDIA RTX A6000, pci bus id: 0000:d8:00.0, compute capability: 8.6
2024-04-16 23:18:45.527142: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 31726764032 exceeds 10% of free system memory.
========= sending heartbeat at 2024-04-16 23:18:51.101740
========= sending heartbeat at 2024-04-16 23:19:01.125985
========= sending heartbeat at 2024-04-16 23:19:11.150709
========= sending heartbeat at 2024-04-16 23:19:21.175191
========= sending heartbeat at 2024-04-16 23:19:31.199995
========= sending heartbeat at 2024-04-16 23:19:41.224649
========= sending heartbeat at 2024-04-16 23:19:51.250264
========= sending heartbeat at 2024-04-16 23:20:01.275178
========= sending heartbeat at 2024-04-16 23:20:11.300196
========= sending heartbeat at 2024-04-16 23:20:21.326659
========= sending heartbeat at 2024-04-16 23:20:31.351281
========= sending heartbeat at 2024-04-16 23:20:41.376307
========= sending heartbeat at 2024-04-16 23:20:51.402551
========= sending heartbeat at 2024-04-16 23:21:01.428826
2024-04-16 23:21:02.032255: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 31726764032 exceeds 10% of free system memory.
========= sending heartbeat at 2024-04-16 23:21:11.454306
========= sending heartbeat at 2024-04-16 23:21:21.478788
========= sending heartbeat at 2024-04-16 23:21:31.504523
========= sending heartbeat at 2024-04-16 23:21:41.530051
========= sending heartbeat at 2024-04-16 23:21:51.555163
========= sending heartbeat at 2024-04-16 23:22:01.580180
========= sending heartbeat at 2024-04-16 23:22:11.606481
========= sending heartbeat at 2024-04-16 23:22:21.631909
========= sending heartbeat at 2024-04-16 23:22:31.683848


log part 2:

2024-04-16 23:22:39.829932: W tensorflow/core/common_runtime/bfc_allocator.cc:462] Allocator (GPU_0_bfc) ran out of memory trying to allocate 29.55GiB (rounded to 31726764032)requested by op _EagerConst
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2024-04-16 23:22:39.830113: I tensorflow/core/common_runtime/bfc_allocator.cc:1010] BFCAllocator dump for GPU_0_bfc
2024-04-16 23:22:39.830154: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (256): 	Total Chunks: 4, Chunks in use: 4. 1.0KiB allocated for chunks. 1.0KiB in use in bin. 16B client-requested in use in bin.
2024-04-16 23:22:39.830182: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (512): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830209: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (1024): 	Total Chunks: 1, Chunks in use: 1. 1.2KiB allocated for chunks. 1.2KiB in use in bin. 1.0KiB client-requested in use in bin.
2024-04-16 23:22:39.830234: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (2048): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830258: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (4096): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830281: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (8192): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830304: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (16384): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830327: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (32768): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830349: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (65536): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830373: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (131072): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830395: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (262144): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830418: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (524288): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830442: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (1048576): 	Total Chunks: 1, Chunks in use: 0. 2.00MiB allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830466: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (2097152): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830489: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (4194304): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830511: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (8388608): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830534: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (16777216): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830572: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (33554432): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830596: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (67108864): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830636: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (134217728): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2024-04-16 23:22:39.830666: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (268435456): 	Total Chunks: 2, Chunks in use: 1. 32.00GiB allocated for chunks. 29.55GiB in use in bin. 29.55GiB client-requested in use in bin.
2024-04-16 23:22:39.830700: I tensorflow/core/common_runtime/bfc_allocator.cc:1033] Bin for 29.55GiB was 256.00MiB, Chunk State: 
2024-04-16 23:22:39.830737: I tensorflow/core/common_runtime/bfc_allocator.cc:1039]   Size: 2.45GiB | Requested Size: 0B | in_use: 0 | bin_num: 20, prev:   Size: 29.55GiB | Requested Size: 29.55GiB | in_use: 1 | bin_num: -1
2024-04-16 23:22:39.830759: I tensorflow/core/common_runtime/bfc_allocator.cc:1046] Next region of size 34359738368
2024-04-16 23:22:39.830788: I tensorflow/core/common_runtime/bfc_allocator.cc:1066] InUse at 7fac70000000 of size 31726764032 next 7
2024-04-16 23:22:39.830808: I tensorflow/core/common_runtime/bfc_allocator.cc:1066] Free  at 7fb3d3100000 of size 2632974336 next 18446744073709551615
2024-04-16 23:22:39.830828: I tensorflow/core/common_runtime/bfc_allocator.cc:1046] Next region of size 2097152
2024-04-16 23:22:39.830848: I tensorflow/core/common_runtime/bfc_allocator.cc:1066] InUse at 7fcb7b800000 of size 256 next 1
2024-04-16 23:22:39.830868: I tensorflow/core/common_runtime/bfc_allocator.cc:1066] InUse at 7fcb7b800100 of size 1280 next 2
2024-04-16 23:22:39.830888: I tensorflow/core/common_runtime/bfc_allocator.cc:1066] InUse at 7fcb7b800600 of size 256 next 3
2024-04-16 23:22:39.830906: I tensorflow/core/common_runtime/bfc_allocator.cc:1066] InUse at 7fcb7b800700 of size 256 next 4
2024-04-16 23:22:39.830925: I tensorflow/core/common_runtime/bfc_allocator.cc:1066] InUse at 7fcb7b800800 of size 256 next 5
2024-04-16 23:22:39.830944: I tensorflow/core/common_runtime/bfc_allocator.cc:1066] Free  at 7fcb7b800900 of size 2094848 next 18446744073709551615
2024-04-16 23:22:39.830962: I tensorflow/core/common_runtime/bfc_allocator.cc:1071]      Summary of in-use Chunks by size: 
2024-04-16 23:22:39.830985: I tensorflow/core/common_runtime/bfc_allocator.cc:1074] 4 Chunks of size 256 totalling 1.0KiB
2024-04-16 23:22:39.831006: I tensorflow/core/common_runtime/bfc_allocator.cc:1074] 1 Chunks of size 1280 totalling 1.2KiB
2024-04-16 23:22:39.831028: I tensorflow/core/common_runtime/bfc_allocator.cc:1074] 1 Chunks of size 31726764032 totalling 29.55GiB
2024-04-16 23:22:39.831048: I tensorflow/core/common_runtime/bfc_allocator.cc:1078] Sum Total of in-use chunks: 29.55GiB
2024-04-16 23:22:39.831069: I tensorflow/core/common_runtime/bfc_allocator.cc:1080] total_region_allocated_bytes_: 34361835520 memory_limit_: 48970858496 available bytes: 14609022976 curr_region_allocation_bytes_: 34359738368
2024-04-16 23:22:39.831100: I tensorflow/core/common_runtime/bfc_allocator.cc:1086] Stats: 
Limit:                     48970858496
InUse:                     31726766336
MaxInUse:                  31726766336
NumAllocs:                           6
MaxAllocSize:              31726764032
Reserved:                            0
PeakReserved:                        0
LargestFreeBlock:                    0

2024-04-16 23:22:39.831126: W tensorflow/core/common_runtime/bfc_allocator.cc:474] *********************************************************************************************______*
========= sending heartbeat at 2024-04-16 23:22:41.712777
========= sending heartbeat at 2024-04-16 23:22:51.738802
**** handle exception rc
Traceback (most recent call last):
  File "cryosparc_master/cryosparc_compute/run.py", line 95, in cryosparc_master.cryosparc_compute.run.main
  File "cryosparc_master/cryosparc_compute/jobs/deep_picker/run_deep_picker.py", line 275, in cryosparc_master.cryosparc_compute.jobs.deep_picker.run_deep_picker.run_deep_picker_train
  File "cryosparc_master/cryosparc_compute/jobs/deep_picker/train.py", line 56, in cryosparc_master.cryosparc_compute.jobs.deep_picker.train.train_picker
  File "cryosparc_master/cryosparc_compute/jobs/deep_picker/train.py", line 121, in cryosparc_master.cryosparc_compute.jobs.deep_picker.train.train_picker
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 793, in from_tensor_slices
    return TensorSliceDataset(tensors, name=name)
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 4477, in __init__
    element = structure.normalize_element(element)
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/data/util/structure.py", line 125, in normalize_element
    ops.convert_to_tensor(t, name="component_%d" % i, dtype=dtype))
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/profiler/trace.py", line 183, in wrapped
    return func(*args, **kwargs)
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 1695, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/framework/tensor_conversion_registry.py", line 48, in _default_conversion_function
    return constant_op.constant(value, dtype, name=name)
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 267, in constant
    return _constant_impl(value, dtype, shape, name, verify_shape=False,
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 279, in _constant_impl
    return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 304, in _constant_eager_impl
    t = convert_to_eager_tensor(value, ctx, dtype)
  File "/data/cryosparc/cryosparc_worker/deps/anaconda/envs/cryosparc_worker_env/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 102, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)
tensorflow.python.framework.errors_impl.InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.
set status to failed
========= sending heartbeat at 2024-04-16 23:23:01.762838
========= sending heartbeat at 2024-04-16 23:23:11.789820
========= main process now complete at 2024-04-16 23:23:15.619254.
========= monitor process now complete at 2024-04-16 23:23:15.630131.