2D Classification: Unsupported MRC datatype: 16843009

ammariesm · October 23, 2021, 3:39pm

I am receiving this error not sure what the issue is.

File "/mnt/mawu-data/cryosparc2/cryosparc2_worker/cryosparc_compute/blobio/mrc.py", line 57, in mrc_datatype_to_dtype
    assert False,'Unsupported MRC datatype: {0}'.format(datatype)
AssertionError: Unsupported MRC datatype: 16843009

ZhijieLi · October 28, 2021, 1:42pm

I just met this problem recently when reprocessing an old dataset. Turns out some of the MRC files were somehow corrupted.

Here is a python script for testing mrc files:

#!/usr/bin/env python3

import numpy as np
import os

def list_file_with_ext(directory, ext):
    
    return [f for f in os.listdir(os.path.abspath(directory))
            if os.path.isfile(os.path.join(directory, f))
            and os.path.splitext(f)[1].lower() == '.'+ext.lower()]


###############reading MRC################
NUMPY_MODE = {
    0: np.dtype(np.int8),
    1: np.dtype(np.int16),
    2: np.dtype(np.float32),
    6: np.dtype(np.uint16),
    101: np.dtype(np.uint8),

    np.dtype(np.int8): 0,
    np.dtype(np.int16): 1,
    np.dtype(np.float32): 2,
    np.dtype(np.uint16): 6
}

DSIZE_TABLE = {
    0: 1,
    1: 2,
    2: 4,  
    3: 4,
    4: 8,
    6: 2,
    101: 0.5
}
MODE_TABLE_HUMAN = {0: "8-bit signed integer (range -128 to 127)",
                    1: "16-bit Int (2 Bytes)",
                    2: "32-bit Real (4 Bytes)",  # normally should be this
                    3: "Complex 16-bit (4 Bytes)",
                    4: "Complex 32-bit  (8 Bytes)",
                    5: "mode 5: unkown",
                    6: "16-bit unsigned Int (2 Bytes)",
                    101: "4-bit unsigned Int, SerialEM"
                    }

HEADER_PACK_TABLE = {'MRC_NX': [0, 4, '<i', 0],  # [start,end,type,default]
                     'MRC_NY': [4, 8, '<i', 0],
                     'MRC_NZ': [8, 12, '<i', 0],
                     'MRC_MAPMODE': [12, 16, '<i', 2],
                     'MRC_NXSTART': [16, 20, '<f', 0],
                     'MRC_NYSTART': [20, 24, '<f', 0],
                     'MRC_NZSTART': [24, 28, '<f', 0],
                     'MRC_MX': [28, 32, '<i', 0],
                     'MRC_MY': [32, 36, '<i', 0],
                     'MRC_MZ': [36, 40, '<i', 0],
                     'MRC_CELL_A': [40, 44, '<f', 0],
                     'MRC_CELL_B': [44, 48, '<f', 0],
                     'MRC_CELL_C': [48, 52, '<f', 0],
                     'MRC_CELL_ALPHA': [52, 56, '<f', 90],
                     'MRC_CELL_BETA': [56, 60, '<f', 90],
                     'MRC_CELL_GAMMA': [60, 64, '<f', 90],
                     'MRC_MAPC': [64, 68, '<i', 1],
                     'MRC_MAPR': [68, 72, '<i', 2],
                     'MRC_MAPS': [72, 76, '<i', 3],
                     'MRC_DMIN': [76, 80, '<f', 0],
                     'MRC_DMAX': [80, 84, '<f', 0],
                     'MRC_DMEAN': [84, 88, '<f', 0],
                     'MRC_ISPG': [88, 92, '<i', 0],
                     'MRC_NSYMBT': [92, 96, '<i', 0],
                     'MRC_EXTTYP': [104, 108, '4s', b'MRCO'],
                     'MRC_NVERSION': [108, 112, '<i', 20140],
                     'MRC_ORIX': [196, 200, '<f', 0],
                     'MRC_ORIY': [200, 204, '<f', 0],
                     'MRC_ORIZ': [204, 208, '<f', 0],
                     'MRC_MAP_STR': [208, 212, '4s', b'MAP '],
                     'MRC_MACHST_STRING': [212, 214, '2s', b'DA'],
                     'MRC_RMS': [216, 220, '<f', -1],
                     'MRC_NLABL': [220, 224, '<i', 0],
                     'MRC_LABEL': [224, 1024, '800s', bytes(chr(0) * 800, 'utf-8')]
                     }



def calculate_expected_datasize(hdrinf):
    _bytes_per_point = None
    _expected_data_size = None
    _expected_file_size = None
    slice_size = None
    mapmode = hdrinf['MRC_MAPMODE']
    nsymbt = hdrinf['MRC_NSYMBT']
    if mapmode in DSIZE_TABLE.keys():
        _bytes_per_point = DSIZE_TABLE[mapmode]
    else:
        #print(f'datatype unknown. Mapmode: {mapmode}')
        return None, None, None, None
    [nx, ny, nz] = [hdrinf['MRC_NX'],    hdrinf['MRC_NY'],    hdrinf['MRC_NZ']]
    if _bytes_per_point is not None:
        slice_size = int(int((nx + 1) // 2 * _bytes_per_point) * 2 * ny)
        _expected_data_size = slice_size * nz
    _expected_file_size = _expected_data_size + 1024 + nsymbt
    return _expected_data_size, _expected_file_size, slice_size, _bytes_per_point


def test_MRC2014_header(header_data, filesize=None, print_info=False):
    """
  Interpreting MRC2014 file (.mrc) header.
  http://www.ccpem.ac.uk/mrc_format/mrc2014.php
  """
    try:
        import struct
    except ImportError:
        print('pip install struct')

    if len(header_data) < 1024:
        return {'OK': False}

    header_inf = {'ori_1024': header_data[:1024], 'size_OK': True,
                  'OK': True, 'file_type': "MRC", 'endianness': 'LE'}

    for k in HEADER_PACK_TABLE.keys():
        s = HEADER_PACK_TABLE[k][0]
        e = HEADER_PACK_TABLE[k][1]
        t = HEADER_PACK_TABLE[k][2]
        header_inf[k], = struct.unpack(t, header_data[s:e])

    if header_inf['MRC_MAP_STR'][:3] != b"MAP":
        header_inf['OK'] = False

    dsize, fsize, slicesize, _bytes_per_point = calculate_expected_datasize(header_inf)
    if _bytes_per_point is None:
        header_inf['OK'] = False
    if fsize != filesize:
        header_inf['OK'] = False

    machst_string = header_inf['MRC_MACHST_STRING']
    if machst_string[:2] != b'DD' and machst_string[:2] != b'DA':
        header_inf['OK'] = False
    return header_inf


mrclist = list_file_with_ext('.', 'Mrc') + list_file_with_ext('.', 'Mrcs')
goodlist = []
badlist = []
i=0
j=0

for fname in mrclist:
    i+=1
    with open(fname, 'rb') as f:
        f.seek(0)
        header = f.read(1024)
        f.close()
        header_inf = test_MRC2014_header(header, filesize=os.path.getsize(fname), print_info=False)
        if not header_inf['OK']:
            j+=1
            print(j,fname)
            badlist.append(fname)
        else:
            goodlist.append(fname)


print('good',len(goodlist))
print(len(badlist))

ammariesm · November 12, 2021, 3:25pm

If i can run inspect particle picks with the extracted particles and see them on the individual images wouldn’t that suggest the files aren’t corrupt?

ZhijieLi · November 12, 2021, 4:11pm

@ ammariesm
That only requires that the particle locations (saved in the .cs files) from the job are readable. The .mrc files containing the particle image stacks has no part in the inspection process.

You can go to the extraction job directory, locate the subdirectory containing the mrc files and run the script above, see upon which files it complains.

Each particle mrc file contains only the particles extracted from a single micrograph. Therefore, one quicker fix might be to figure out which micrographs are affected, select these micrographs, re-extract, then do some particleset boolean operations to replace the bad particles with the new extracts. Or, simply re-extract all.