Guest User

Untitled

a guest
Apr 26th, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.65 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [
  8. {
  9. "name": "stdout",
  10. "output_type": "stream",
  11. "text": [
  12. "Populating the interactive namespace from numpy and matplotlib\n"
  13. ]
  14. }
  15. ],
  16. "source": [
  17. "%pylab inline"
  18. ]
  19. },
  20. {
  21. "cell_type": "code",
  22. "execution_count": 18,
  23. "metadata": {},
  24. "outputs": [],
  25. "source": [
  26. "from dlinputs import tarrecords, utils\n",
  27. "import hashlib"
  28. ]
  29. },
  30. {
  31. "cell_type": "code",
  32. "execution_count": 23,
  33. "metadata": {},
  34. "outputs": [
  35. {
  36. "data": {
  37. "text/plain": [
  38. "'5d41402abc4b2a76b9719d911017c592'"
  39. ]
  40. },
  41. "execution_count": 23,
  42. "metadata": {},
  43. "output_type": "execute_result"
  44. }
  45. ],
  46. "source": [
  47. "def md5hash(x, seed=\"\"):\n",
  48. " return hashlib.md5(str(seed)+str(x)).hexdigest()\n",
  49. "md5hash(\"hello\")"
  50. ]
  51. },
  52. {
  53. "cell_type": "code",
  54. "execution_count": 14,
  55. "metadata": {},
  56. "outputs": [
  57. {
  58. "name": "stdout",
  59. "output_type": "stream",
  60. "text": [
  61. "-rw-r--r-- tmb/tmb 1 1969-12-31 16:00 10.cls\n",
  62. "-rw-r--r-- tmb/tmb 306 1969-12-31 16:00 10.png\n",
  63. "-rw-r--r-- tmb/tmb 1 1969-12-31 16:00 11.cls\n",
  64. "-rw-r--r-- tmb/tmb 345 1969-12-31 16:00 11.png\n",
  65. "-rw-r--r-- tmb/tmb 1 1969-12-31 16:00 12.cls\n",
  66. "tar: write error\n"
  67. ]
  68. }
  69. ],
  70. "source": [
  71. "!tar -ztvf testdata/sample.tgz | sed 5q"
  72. ]
  73. },
  74. {
  75. "cell_type": "code",
  76. "execution_count": 11,
  77. "metadata": {},
  78. "outputs": [
  79. {
  80. "name": "stdout",
  81. "output_type": "stream",
  82. "text": [
  83. "__key__ '10'\n",
  84. "__source__ <type 'NoneType'> None\n",
  85. "cls 0\n",
  86. "png float32 (28, 28)\n"
  87. ]
  88. }
  89. ],
  90. "source": [
  91. "data = tarrecords.tariterator(open(\"testdata/sample.tgz\"))\n",
  92. "data = list(data)\n",
  93. "utils.print_sample(data[0])"
  94. ]
  95. },
  96. {
  97. "cell_type": "code",
  98. "execution_count": 13,
  99. "metadata": {},
  100. "outputs": [
  101. {
  102. "name": "stdout",
  103. "output_type": "stream",
  104. "text": [
  105. "Help on built-in function sorted in module __builtin__:\n",
  106. "\n",
  107. "sorted(...)\n",
  108. " sorted(iterable, cmp=None, key=None, reverse=False) --> new sorted list\n",
  109. "\n"
  110. ]
  111. }
  112. ],
  113. "source": [
  114. "help(sorted)"
  115. ]
  116. },
  117. {
  118. "cell_type": "code",
  119. "execution_count": 24,
  120. "metadata": {},
  121. "outputs": [],
  122. "source": [
  123. "sorted_by_key = sorted(data, key=lambda x: x[\"__key__\"])"
  124. ]
  125. },
  126. {
  127. "cell_type": "code",
  128. "execution_count": 25,
  129. "metadata": {},
  130. "outputs": [],
  131. "source": [
  132. "sorted_by_content = sorted(data, key=lambda x: x[\"cls\"])"
  133. ]
  134. },
  135. {
  136. "cell_type": "code",
  137. "execution_count": 26,
  138. "metadata": {},
  139. "outputs": [],
  140. "source": [
  141. "sorted_by_keyhash = sorted(data, key=lambda x: md5hash(x[\"__key__\"], seed=17))"
  142. ]
  143. },
  144. {
  145. "cell_type": "code",
  146. "execution_count": 27,
  147. "metadata": {},
  148. "outputs": [],
  149. "source": [
  150. "sorted_by_contenthash = sorted(data, key=lambda x: md5hash(x[\"png\"], seed=34))"
  151. ]
  152. },
  153. {
  154. "cell_type": "code",
  155. "execution_count": null,
  156. "metadata": {},
  157. "outputs": [],
  158. "source": []
  159. }
  160. ],
  161. "metadata": {
  162. "kernelspec": {
  163. "display_name": "Python 2",
  164. "language": "python",
  165. "name": "python2"
  166. },
  167. "language_info": {
  168. "codemirror_mode": {
  169. "name": "ipython",
  170. "version": 2
  171. },
  172. "file_extension": ".py",
  173. "mimetype": "text/x-python",
  174. "name": "python",
  175. "nbconvert_exporter": "python",
  176. "pygments_lexer": "ipython2",
  177. "version": "2.7.12"
  178. }
  179. },
  180. "nbformat": 4,
  181. "nbformat_minor": 2
  182. }
Add Comment
Please, Sign In to add comment