Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Populating the interactive namespace from numpy and matplotlib\n"
- ]
- }
- ],
- "source": [
- "%pylab inline"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [],
- "source": [
- "from dlinputs import tarrecords, utils\n",
- "import hashlib"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'5d41402abc4b2a76b9719d911017c592'"
- ]
- },
- "execution_count": 23,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "def md5hash(x, seed=\"\"):\n",
- " return hashlib.md5(str(seed)+str(x)).hexdigest()\n",
- "md5hash(\"hello\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "-rw-r--r-- tmb/tmb 1 1969-12-31 16:00 10.cls\n",
- "-rw-r--r-- tmb/tmb 306 1969-12-31 16:00 10.png\n",
- "-rw-r--r-- tmb/tmb 1 1969-12-31 16:00 11.cls\n",
- "-rw-r--r-- tmb/tmb 345 1969-12-31 16:00 11.png\n",
- "-rw-r--r-- tmb/tmb 1 1969-12-31 16:00 12.cls\n",
- "tar: write error\n"
- ]
- }
- ],
- "source": [
- "!tar -ztvf testdata/sample.tgz | sed 5q"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "__key__ '10'\n",
- "__source__ <type 'NoneType'> None\n",
- "cls 0\n",
- "png float32 (28, 28)\n"
- ]
- }
- ],
- "source": [
- "data = tarrecords.tariterator(open(\"testdata/sample.tgz\"))\n",
- "data = list(data)\n",
- "utils.print_sample(data[0])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Help on built-in function sorted in module __builtin__:\n",
- "\n",
- "sorted(...)\n",
- " sorted(iterable, cmp=None, key=None, reverse=False) --> new sorted list\n",
- "\n"
- ]
- }
- ],
- "source": [
- "help(sorted)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [],
- "source": [
- "sorted_by_key = sorted(data, key=lambda x: x[\"__key__\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [],
- "source": [
- "sorted_by_content = sorted(data, key=lambda x: x[\"cls\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {},
- "outputs": [],
- "source": [
- "sorted_by_keyhash = sorted(data, key=lambda x: md5hash(x[\"__key__\"], seed=17))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [],
- "source": [
- "sorted_by_contenthash = sorted(data, key=lambda x: md5hash(x[\"png\"], seed=34))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Add Comment
Please, Sign In to add comment