import tifffile as tf import numpy as np import time # NOTE - to upload this script to pastebin, I had to use pen15 to not get flagged as inappropriate # https://gist.github.com/rfezzani/b4b8852c5a48a901c1e94e09feb34743 - attribution # works for both Erect and Flaccid def get_crop(page, start_height, start_width, h, w): """Extract a crop from a TIFF image file directory (IFD). Only the tiles englobing the crop area are loaded and not the whole page. This is usefull for large Whole slide images that can't fit int RAM. Parameters ---------- page : TiffPage TIFF image file directory (IFD) from which the crop must be extracted. i0, j0: int Coordinates of the top left corner of the desired crop. h: int Desired crop height. w: int Desired crop width. Returns ------- out : ndarray of shape (imagedepth, h, w, sampleperpixel) Extracted crop. """ if not page.is_tiled: raise ValueError("Input page must be tiled.") im_width = page.imagewidth im_height = page.imagelength print('im_height: ', im_height) print('start_height: ', start_height + h) print('im_width: ', im_width) print('start_width: ', start_width + w) if h < 1 or w < 1: raise ValueError("h and w must be strictly positive.") if start_height < 0 or start_width < start_height or start_height + h > im_height or start_width + w > im_width: if start_height < 0: print(1) if start_width < start_height: print(2) if start_height + h >= im_height: print(3) if start_width + w >= im_width: print(4) raise ValueError("Requested crop area is out of image bounds.") tile_width, tile_height = page.tilewidth, page.tilelength i1, j1 = start_height + h, start_width + w tile_i0, tile_j0 = start_height // tile_height, start_width // tile_width tile_i1, tile_j1 = np.ceil([i1 / tile_height, j1 / tile_width]).astype(int) tile_per_line = int(np.ceil(im_width / tile_width)) out = np.empty((page.imagedepth, (tile_i1 - tile_i0) * tile_height, (tile_j1 - tile_j0) * tile_width, page.samplesperpixel), dtype=page.dtype) fh = page.parent.filehandle jpegtables = page.tags.get('JPEGTables', None) if jpegtables is not None: jpegtables = jpegtables.value for i in range(tile_i0, tile_i1): for j in range(tile_j0, tile_j1): index = int(i * tile_per_line + j) offset = page.dataoffsets[index] bytecount = page.databytecounts[index] fh.seek(offset) data = fh.read(bytecount) start_program = time.time() tile, indices, shape = page.decode(data, index, jpegtables) print('crop section time: ', time.time()-start_program) im_i = (i - tile_i0) * tile_height im_j = (j - tile_j0) * tile_width out[:, im_i: im_i + tile_height, im_j: im_j + tile_width, :] = tile im_i0 = start_height - tile_i0 * tile_height im_j0 = start_width - tile_j0 * tile_width return out[:, im_i0: im_i0 + h, im_j0: im_j0 + w, :] ## ## START HERE ## start_pos_x = 19200 start_pos_y = 0 crop_height = 19200 crop_width = 2000 # load The World's Largest pen15™ tif = tf.TiffFile('theworldsbiggestpen15.tif') page = tif.pages[0] print('Dimensions: ', page.shape) # crop section is returned as np array, do with it as you wish: crop_section = get_crop(page, start_pos_y, start_pos_x, crop_height, crop_width) # if you want to write to tif, use following: tf.imwrite('theworldsbiggestpen15_crop.tif', crop_section, compression="jpeg", photometric='rgb')