Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''<b>ExportToCellH5</b> exports measurements, objects and object relationships,
- and images to the CellH5 data format.
- <hr>
- <h4>File structure</h4>
- In multiprocessing-mode, CellProfiler will create satellite .cellh5 files that
- are linked to the one that you specify using this module. The only thing
- to note is that you must keep all .cellh5 files that are generated together
- if you move them to a new folder.
- '''
- import os
- import tempfile
- import h5py
- import numpy as np
- import scipy.ndimage
- import cellprofiler.module as cpm
- import cellprofiler.measurement as cpmeas
- import cellprofiler.setting as cps
- from cellprofiler.gui.help import \
- USING_METADATA_TAGS_REF, USING_METADATA_HELP_REF
- from cellprofiler.modules.identify import R_PARENT
- from cellprofiler.preferences import \
- IO_FOLDER_CHOICE_HELP_TEXT, IO_WITH_METADATA_HELP_TEXT
- from cellprofiler.setting import YES, NO
- from cellprofiler.gui.help import \
- USING_METADATA_TAGS_REF, USING_METADATA_HELP_REF
- import cellh5
- import cellh5.cellh5write
- import numpy as np
- OFF_OBJECTS_COUNT = 0
- OFF_IMAGES_COUNT = 1
- COLORS = [("Red", "0xFF0000"),
- ("Green", "0x00FF00"),
- ("Blue", "0x0000FF")]
- class ExportToCellH5(cpm.Module):
- #
- # TODO: model z and t. Currently, CellProfiler would analyze each
- # stack plane independently. I think the easiest way to handle
- # z and t would be to add them to the site path if they are
- # used in the experiment (e.g. a time series would have a
- # path of "/plate/well/site/time")
- #
- # I can add two more optional metadata keys that would let
- # users capture this.
- #
- # The more-complicated choice would be to store the data in a
- # stack which would mean reworking the indices in every segmentation
- # after the first. There are some legacy measurements that are
- # really object numbers, so these would require a lot of work
- # to get right. Also, the resulting segmentations are a little
- # artificial since they seem to say that every object is one
- # pixel thick in the T or Z direction.
- #
- module_name = "ExportToCellH5"
- variable_revision_number = 1
- category = ["File Processing"]
- SUBFILE_KEY = "subfile"
- IGNORE_METADATA = "None"
- def create_settings(self):
- '''Create the settings for the ExportToCellH5 module'''
- self.directory = cps.DirectoryPath(
- "Output file location",
- doc="""
- This setting lets you choose the folder for the output files.
- %(IO_FOLDER_CHOICE_HELP_TEXT)s
- """ % globals())
- def get_directory_fn():
- '''Get the directory for the CellH5 file'''
- return self.directory.get_absolute_path()
- def set_directory_fn(path):
- dir_choice, custom_path = self.directory.get_parts_from_path(path)
- self.directory.join_parts(dir_choice, custom_path)
- self.file_name = cps.FilenameText(
- "Output file name", "DefaultOut.ch5",
- get_directory_fn=get_directory_fn,
- set_directory_fn=set_directory_fn,
- metadata=True,
- browse_msg="Choose CellH5 file",
- mode=cps.FilenameText.MODE_APPEND,
- exts=[("CellH5 file (*.cellh5)", "*.ch5"),
- ("HDF5 file (*.h5)", "*.h5"),
- ("All files (*.*", "*.*")],
- doc="""
- This setting lets you name your CellH5 file. If you choose an
- existing file, CellProfiler will add new data to the file
- or overwrite existing locations.
- <p>%(IO_WITH_METADATA_HELP_TEXT)s %(USING_METADATA_TAGS_REF)s.
- For instance, if you have a metadata tag named
- "Plate", you can create a per-plate folder by selecting one the subfolder options
- and then specifying the subfolder name as "\g<Plate>". The module will
- substitute the metadata values for the current image set for any metadata tags in the
- folder name.%(USING_METADATA_HELP_REF)s.</p>
- """ % globals())
- self.overwrite_ok = cps.Binary(
- "Overwrite existing data without warning?", False,
- doc="""
- Select <i>%(YES)s</i> to automatically overwrite any existing data
- for a site. Select <i>%(NO)s</i> to be prompted first.
- If you are running the pipeline on a computing cluster,
- select <i>%(YES)s</i> unless you want execution to stop because you
- will not be prompted to intervene. Also note that two instances
- of CellProfiler cannot write to the same file at the same time,
- so you must ensure that separate names are used on a cluster.
- """ % globals())
- self.repack = cps.Binary(
- "Repack after analysis", True,
- doc="""
- This setting determines whether CellProfiler in multiprocessing mode
- repacks the data at the end of analysis. If you select <i>%(YES)s</i>,
- CellProfiler will combine all of the satellite files into a single
- file upon completion. This option requires some extra temporary disk
- space and takes some time at the end of analysis, but results in
- a single file which may occupy less disk space. If you select
- <i>%(NO)s</i>, CellProfiler will create a master file using the
- name that you give and this file will have links to individual
- data files that contain the actual data. Using the data generated by
- this option requires that you keep the master file and the linked
- files together when copying them to a new folder.
- """ % globals())
- self.plate_metadata = cps.Choice(
- "Plate metadata", [], value="Plate",
- choices_fn=self.get_metadata_choices,
- doc="""
- This is the metadata tag that identifies the plate name of
- the images for the current cycle. Choose <i>None</i> if
- your assay does not have metadata for plate name. If your
- assay is slide-based, you can use a metadata item that identifies
- the slide as the choice for this setting and set the well
- and site metadata items to <i>None</i>.""")
- self.well_metadata = cps.Choice(
- "Well metadata", [], value="Well",
- choices_fn=self.get_metadata_choices,
- doc="""This is the metadata tag that identifies the well name
- for the images in the current cycle. Choose <i>None</i> if
- your assay does not have metadata for the well.""")
- self.site_metadata = cps.Choice(
- "Site metadata", [], value="Site",
- choices_fn=self.get_metadata_choices,
- doc=
- """This is the metadata tag that identifies the site name
- for the images in the current cycle. Choose <i>None</i> if
- your assay doesn't divide wells up into sites or if this
- tag is not required for other reasons.""")
- self.divider = cps.Divider()
- self.wants_to_choose_measurements = cps.Binary(
- "Choose measurements?", False,
- doc="""
- This setting lets you choose between exporting all measurements or
- just the ones that you choose. Select <i>%(YES)s</i> to pick the
- measurements to be exported. Select <i>%(NO)s</i> to automatically
- export all measurements available at this stage of the pipeline.
- """ % globals())
- self.measurements = cps.MeasurementMultiChoice(
- "Measurements to export",
- doc="""
- <i>(Used only if choosing measurements.)</i>
- <br>
- This setting lets you choose individual measurements to be exported.
- Check the measurements you want to export.
- """)
- self.objects_to_export = []
- self.add_objects_button = cps.DoSomething(
- "Add objects to export", "Add objects",
- self.add_objects)
- self.images_to_export = []
- self.add_image_button = cps.DoSomething(
- "Add an image to export", "Add image",
- self.add_image)
- self.objects_count = cps.HiddenCount(self.objects_to_export)
- self.images_count = cps.HiddenCount(self.images_to_export)
- def add_objects(self, can_delete=True):
- group = cps.SettingsGroup()
- self.objects_to_export.append(group)
- group.append(
- "objects_name",
- cps.ObjectNameSubscriber(
- "Objects name", value="Nuclei",
- doc="""
- This setting lets you choose the objects you want to export.
- <b>ExportToCellH5</b> will write the segmentation of the objects
- to your CellH5 file so that they can be saved and used by other
- applications that support the format.
- """))
- group.append(
- "Remover",
- cps.RemoveSettingButton(
- "Remove the objects above", "Remove",
- self.objects_to_export, group))
- def add_image(self, can_delete=True):
- group = cps.SettingsGroup()
- self.images_to_export.append(group)
- group.append("image_name",
- cps.ImageNameSubscriber(
- "Image name", value="DNA",
- doc="""
- This setting lets you choose the images you want to export.
- <b>ExportToCellH5</b> will write the image
- to your CellH5 file so that it can be used by other
- applications that support the format.
- """
- ))
- group.append("remover",
- cps.RemoveSettingButton(
- "Remove the image above", "Remove",
- self.objects_to_export, group))
- def get_metadata_choices(self, pipeline):
- columns = pipeline.get_measurement_columns(self)
- choices = [self.IGNORE_METADATA]
- for column in columns:
- object_name, feature_name, column_type = column[:3]
- if object_name == cpmeas.IMAGE and \
- column_type.startswith(cpmeas.COLTYPE_VARCHAR) and \
- feature_name.startswith(cpmeas.C_METADATA + "_"):
- choices.append(feature_name.split("_", 1)[1])
- return choices
- def settings(self):
- result = [
- self.objects_count, self.images_count,
- self.directory, self.file_name, self.overwrite_ok, self.repack,
- self.plate_metadata, self.well_metadata, self.site_metadata,
- self.wants_to_choose_measurements, self.measurements]
- for objects_group in self.objects_to_export:
- result += objects_group.pipeline_settings()
- for images_group in self.images_to_export:
- result += images_group.pipeline_settings()
- return result
- def visible_settings(self):
- result = [
- self.directory, self.file_name, self.overwrite_ok, self.repack,
- self.plate_metadata, self.well_metadata, self.site_metadata,
- self.divider, self.wants_to_choose_measurements]
- if self.wants_to_choose_measurements:
- result.append(self.measurements)
- for group in self.objects_to_export:
- result += group.visible_settings()
- result.append(self.add_objects_button)
- for group in self.images_to_export:
- result += group.visible_settings()
- result.append(self.add_image_button)
- return result
- def get_path_to_master_file(self, measurements):
- return os.path.join(self.directory.get_absolute_path(measurements),
- self.file_name.value)
- def get_site_path(self, workspace, image_number):
- '''Get the plate / well / site tuple that identifies a field of view
- workspace - workspace for the analysis containing the metadata
- measurements to be mined.
- image_number - the image number for the field of view
- returns a tuple which can be used for the hierarchical path
- to the group for a particular field of view
- '''
- m = workspace.measurements
- path = []
- for setting in self.plate_metadata, self.well_metadata, self.site_metadata:
- if setting.value == self.IGNORE_METADATA:
- path.append("NA")
- else:
- feature = "_".join((cpmeas.C_METADATA, setting.value))
- path.append(m[cpmeas.IMAGE, feature, image_number])
- return tuple(path)
- def get_subfile_name(self, workspace):
- '''Contact the UI to find the cellh5 file to use to store results
- Internally, this tells the UI to create a link from the master file
- to the plate / well / site group that will be used to store results.
- Then, the worker writes into that file.
- '''
- master_file_name = self.get_path_to_master_file(workspace.measurements)
- path = self.get_site_path(
- workspace,
- workspace.measurements.image_set_number)
- return workspace.interaction_request(
- self, master_file_name, os.getpid(), path, headless_ok=True)
- def handle_interaction(self, master_file, pid, path):
- '''Handle an analysis worker / UI interaction
- This function is used to coordinate linking a group in the master file
- with a group in a subfile that is reserved for a particular
- analysis worker. Upon entry, the worker should be sure to have
- flushed and closed its subfile.
- master_file - the master cellh5 file which has links to groups
- for each field of view
- pid - the process ID or other unique identifier of the worker
- talking to the master
- path - The combination of (Plate, Well, Site) that should be used
- as the folder path to the data.
- returns the name of the subfile to be used. After return, the
- subfile has been closed by the UI and a link has been established
- to the group named by the path.
- '''
- master_dict = self.get_dictionary().setdefault(master_file, {})
- if pid not in master_dict:
- md_head, md_tail = os.path.splitext(master_file)
- subfile = "%s_%s%s" % (md_head, str(pid), md_tail)
- master_dict[pid] = subfile
- else:
- subfile = master_dict[pid]
- ch5_master = cellh5.cellh5write.CH5MasterFile(master_file, "a")
- try:
- ch5_master.add_link_to_coord(self._to_ch5_coord(*path), subfile)
- finally:
- ch5_master.close()
- return subfile
- def _to_ch5_coord(self, plate, well, site):
- return cellh5.CH5PositionCoordinate(plate, well, site)
- def run(self, workspace):
- m = workspace.measurements
- object_set = workspace.object_set
- #
- # get plate / well / site as tuple
- #
- path = self.get_site_path(workspace, m.image_set_number)
- subfile_name = self.get_subfile_name(workspace)
- ### create CellH5 file
- with cellh5.cellh5write.CH5FileWriter(subfile_name, mode="a") as c5_file:
- ### add Postion (==plate, well, site) triple
- c5_pos = c5_file.add_position(self._to_ch5_coord(*path))
- for ch_idx, object_group in enumerate(self.objects_to_export):
- objects_name = object_group.objects_name.value
- objects = object_set.get_objects(objects_name)
- labels = objects.segmented
- if ch_idx == 0:
- ### get shape of 5D cube
- shape5D = (len(self.objects_to_export), 1, 1,
- labels.shape[0], labels.shape[1])
- dtype5D = np.uint16
- ### create lablel writer for incremental writing
- c5_label_writer = c5_pos.add_label_image(shape=shape5D, dtype=dtype5D)
- c5_label_def = cellh5.cellh5write.CH5ImageRegionDefinition()
- c5_label_writer.write(labels, c=ch_idx, t=0, z=0)
- c5_label_def.add_row(region_name=objects_name, channel_idx=ch_idx)
- if len(self.objects_to_export) > 0:
- ### finalize the writer
- c5_label_writer.write_definition(c5_label_def)
- c5_label_writer.finalize()
- n_channels = 0
- max_scale = 1
- max_i = 1
- max_j = 1
- for image_group in self.images_to_export:
- image = m.get_image(image_group.image_name.value)
- pixel_data = image.pixel_data
- if pixel_data.ndim == 3:
- n_channels += min(pixel_data.shape[2], 3)
- else:
- n_channels += 1
- max_scale = max(image.scale, max_scale)
- max_i = max(pixel_data.shape[0], max_i)
- max_j = max(pixel_data.shape[1], max_j)
- ### get shape of 5D cube
- shape5D = (n_channels, 1, 1, max_i, max_j)
- for dtype in (np.uint8, np.uint16, np.uint32, np.uint64):
- if max_scale <= np.iinfo(dtype).max:
- dtype5D = dtype
- break
- ### create image writer for incremental writing
- c5_image_writer = c5_pos.add_image(shape=shape5D, dtype=dtype5D)
- c5_image_def = cellh5.cellh5write.CH5ImageChannelDefinition()
- ch_idx = 0
- for image_group in self.images_to_export:
- image_name = image_group.image_name.value
- image = m.get_image(image_name).pixel_data
- scale = m.get_image(image_name).scale
- if not np.issubdtype(image.dtype, np.dtype(bool).type):
- if scale == 1:
- scale = max_scale
- image = image * scale
- if image.ndim == 3:
- for c in range(min(image.shape[2], 3)):
- color_name, html_color = COLORS[c]
- c5_image_writer.write(
- image[:, :, c].astype(dtype5D),
- c=ch_idx, t=0, z=0)
- c5_image_def.add_row(
- channel_name="_".join((image_name, color_name)),
- description="%s %s intensity" %
- (image_name, color_name),
- is_physical=True,
- voxel_size=(1, 1, 1),
- color=html_color)
- ch_idx += 1
- else:
- c5_image_writer.write(
- image.astype(dtype5D),
- c=ch_idx, t=0, z=0)
- c5_image_def.add_row(
- channel_name=image_name,
- description=image_name,
- is_physical=True,
- voxel_size=(1, 1, 1),
- color="0xFFFFFF")
- ch_idx += 1
- c5_image_writer.write_definition(c5_image_def)
- c5_image_writer.finalize()
- columns = workspace.pipeline.get_measurement_columns(self)
- if self.wants_to_choose_measurements:
- to_keep = set([
- (self.measurements.get_measurement_object(s),
- self.measurements.get_measurement_feature(s))
- for s in self.measurements.selections])
- def keep(column):
- return (column[0], column[1]) in to_keep
- columns = filter(keep, columns)
- #
- # I'm breaking the data up into the most granular form so that
- # it's clearer how it's organized. I'm expecting that you would
- # organize it differently when actually storing.
- #
- ### 0) extract object information (i.e. object_label_no)
- ### 1) extract all single cell features and write it as feature matrix (for e.g. classification)
- ### 2) extract Center
- ### 3) create artifical Bounding box... usefull for displaying it in fiji lateron
- ### 4) Don't see the point of features extracted on "Image" the only real and useful feature there is "Count" which can be deduced from single cell information
- ### 0) and 1) filter columns for cellular features
- feature_cols = filter(
- lambda xxx: (xxx[0] not in (cpmeas.EXPERIMENT, cpmeas.IMAGE)) and
- m.has_feature(xxx[0], xxx[1]), columns)
- ### iterate over objects to export
- for ch_idx, object_group in enumerate(self.objects_to_export):
- objects_name = object_group.objects_name.value
- objects = object_set.get_objects(objects_name)
- ### find features for that object
- feature_cols_per_object = filter(lambda xxx: xxx[0] == objects_name, feature_cols)
- c5_object_writer = c5_pos.add_region_object(objects_name)
- object_labels = objects.indices
- c5_object_writer.write(t=0, object_labels=np.array(object_labels))
- c5_object_writer.write_definition()
- c5_object_writer.finalize()
- ### iterate over all cellular feature to get feature matrix
- n_features = len(feature_cols_per_object)
- if n_features > 0:
- feature_names = []
- feature_matrix = []
- for column in feature_cols_per_object:
- object_name, feature_name = column[:2]
- values = m[object_name, feature_name]
- feature_names.append(feature_name)
- feature_matrix.append(values[:, np.newaxis])
- feature_matrix = np.concatenate(feature_matrix, axis=1)
- c5_feature_writer = c5_pos.add_object_feature_matrix(
- object_name=object_name,
- feature_name="object_features",
- n_features=n_features, dtype=np.float32)
- c5_feature_writer.write(feature_matrix)
- c5_feature_writer.write_definition(feature_names)
- c5_feature_writer.finalize()
- ### iterate over Location to create bounding_box and center
- c5_bbox = c5_pos.add_object_bounding_box(
- object_name=objects_name)
- if objects.count > 0:
- ijv = objects.ijv
- min_x = scipy.ndimage.minimum(
- ijv[:, 1], ijv[:, 2], objects.indices)
- max_x = scipy.ndimage.maximum(
- ijv[:, 1], ijv[:, 2], objects.indices)
- min_y = scipy.ndimage.minimum(
- ijv[:, 0], ijv[:, 2], objects.indices)
- max_y = scipy.ndimage.maximum(
- ijv[:, 0], ijv[:, 2], objects.indices)
- location_x = scipy.ndimage.mean(
- ijv[:, 1], ijv[:, 2], objects.indices)
- location_y = scipy.ndimage.mean(
- ijv[:, 0], ijv[:, 2], objects.indices)
- bb = np.c_[min_x, max_x, min_y, max_y]
- else:
- bb = np.zeros((0, 4))
- location_x = np.zeros(0)
- location_y = np.zeros(0)
- c5_bbox.write(bb.astype(np.int32))
- c5_bbox.write_definition()
- c5_bbox.finalize()
- c5_center = c5_pos.add_object_center(object_name=objects_name)
- locations = {'x': location_x, 'y': location_y}
- cent = np.column_stack(
- [locations[axis] for axis in c5_center.dtype.names])
- c5_center.write(cent.astype(np.int32))
- c5_center.write_definition()
- c5_center.finalize()
- #
- # The last part deals with relationships between segmentations.
- # The most typical relationship is "Parent" which is explained below,
- # but you can also have things like first nearest and second nearest
- # neighbor or in tracking, the relationship between the segmentation
- # of the previous and next frames.
- #
- for key in m.get_relationship_groups():
- relationships = m.get_relationships(
- key.module_number, key.relationship,
- key.object_name1, key.object_name2,
- [m.image_set_number])
- for image_number1, image_number2, \
- object_number1, object_number2 in relationships:
- if image_number1 == image_number2 and \
- key.relationship == R_PARENT:
- #
- # Object 1 is the parent to object 2 - this is the
- # most common relationship, so if you can only record
- # one, this is it. "Parent" usually means that
- # the child's segmentation was seeded by the parent
- # segmentation (e.g. Parent = nucleus, child = cell),
- # but can also be something like Parent = cell,
- # child = all organelles within the cell
- #
- # object_name1 is the name of the parent segmentation
- # object_name2 is the name of the child segmentation
- # object_number1 is the index used to label the
- # parent in the parent segmentation
- # object_number2 is the index used to label the
- # child in the child segmentation
- continue
- if image_number1 != m.image_set_number:
- path1 = self.get_site_path(workspace, image_number1)
- else:
- path1 = path
- if image_number2 != m.image_set_number:
- path2 = self.get_site_path(workspace, image_number2)
- else:
- path2 = path
- #
- # TODO: this is sort of extra credit, but the relationships
- # relate an object in one segmentation to another.
- # For tracking, these can be in different image
- # sets, (e.g. the cell at time T and at time T+1).
- # So, given object 1 and object 2, path1 and path2
- # tell you how the objects are related between planes.
- pass
- def post_run(self, workspace):
- if self.repack:
- ### to be implemented with
- ### ch5_master.repack()
- return
- measurements = workspace.measurements
- fd, temp_name = tempfile.mkstemp(
- suffix=".ch5",
- dir=self.directory.get_absolute_path())
- master_name = self.get_path_to_master_file(workspace.measurements)
- src = h5py.File(master_name, "r")
- dest = h5py.File(temp_name)
- os.close(fd)
- for key in src:
- dest.copy(src[key], dest, expand_external=True)
- src.close()
- dest.close()
- os.unlink(master_name)
- os.rename(temp_name, master_name)
- def prepare_settings(self, setting_values):
- objects_count, images_count = [int(x) for x in setting_values[:2]]
- del self.objects_to_export[:]
- while len(self.objects_to_export) < objects_count:
- self.add_objects()
- del self.images_to_export[:]
- while len(self.images_to_export) < images_count:
- self.add_image()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement