Guest User

Untitled

a guest
Feb 24th, 2018
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 61.38 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "import boto3\n",
  10. "from botocore import UNSIGNED\n",
  11. "from botocore.client import Config\n",
  12. "from distutils.version import LooseVersion, StrictVersion\n",
  13. "import pandas as pd\n",
  14. "import io\n",
  15. "import numpy as np\n",
  16. "s3 = boto3.client(\"s3\", config=Config(signature_version=UNSIGNED))"
  17. ]
  18. },
  19. {
  20. "cell_type": "code",
  21. "execution_count": 19,
  22. "metadata": {
  23. "scrolled": true
  24. },
  25. "outputs": [
  26. {
  27. "name": "stdout",
  28. "output_type": "stream",
  29. "text": [
  30. "ds000001\n",
  31. "1.0.0/ 2.0.4/\n",
  32. "{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 10, 'male_count': 6}\n",
  33. "ds000002\n",
  34. "1.0.0/ 2.0.5/\n",
  35. "{'participant_count': 17, 'below_18': 0, 'above_18': 17, 'female_count': 10, 'male_count': 7}\n",
  36. "ds000003\n",
  37. "1.0.0/ 2.0.2/\n",
  38. "{'participant_count': 13, 'below_18': 0, 'above_18': 13, 'female_count': 5, 'male_count': 8}\n",
  39. "ds000005\n",
  40. "1.0.0/ 2.0.1/\n",
  41. "{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 8, 'male_count': 8}\n",
  42. "ds000006\n",
  43. "1.0.0/ 2.0.1/\n",
  44. "{'participant_count': 14, 'below_18': 0, 'above_18': 14, 'female_count': 9, 'male_count': 5}\n",
  45. "ds000007\n",
  46. "1.0.1/ 2.0.1/\n",
  47. "{'participant_count': 20, 'below_18': 0, 'above_18': 20, 'female_count': 9, 'male_count': 11}\n",
  48. "ds000008\n",
  49. "1.0.1/ 2.0.0/\n",
  50. "{'participant_count': 14, 'below_18': 0, 'above_18': 14, 'female_count': 4, 'male_count': 10}\n",
  51. "ds000009\n",
  52. "1.0.0/ 2.0.3/\n",
  53. "{'participant_count': 24, 'below_18': 0, 'above_18': 24, 'female_count': 10, 'male_count': 14}\n",
  54. "ds000011\n",
  55. "1.0.0/ 2.0.1/\n",
  56. "{'participant_count': 14, 'below_18': 0, 'above_18': 14, 'female_count': 9, 'male_count': 5}\n",
  57. "ds000017\n",
  58. "1.0.0/ 2.0.1/\n",
  59. "{'participant_count': 8, 'below_18': 0, 'above_18': 5, 'female_count': 1, 'male_count': 4}\n",
  60. "ds000030\n",
  61. "1.0.2/ 1.0.5/\n",
  62. "{'participant_count': 272, 'below_18': 0, 'above_18': 272, 'female_count': 117, 'male_count': 155}\n",
  63. "ds000031\n",
  64. "1.0.2/ 1.0.4/\n",
  65. "no participants.tsv file\n",
  66. "ds000051\n",
  67. "no participants.tsv file\n",
  68. "ds000052\n",
  69. "1.0.0/ 2.0.0/\n",
  70. "no participants.tsv file\n",
  71. "ds000053\n",
  72. "1.0.0/ 1.0.2/\n",
  73. "{'participant_count': 59, 'below_18': 0, 'above_18': 59, 'female_count': 31, 'male_count': 28}\n",
  74. "ds000101\n",
  75. "1.0.0/ 2.0.0/\n",
  76. "{'participant_count': 21, 'below_18': 0, 'above_18': 21, 'female_count': 9, 'male_count': 12}\n",
  77. "ds000102\n",
  78. "1.0.0/ 2.0.0/\n",
  79. "{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 1, 'male_count': 16}\n",
  80. "ds000105\n",
  81. "1.0.1/ 2.0.2/\n",
  82. "no participants.tsv file\n",
  83. "ds000107\n",
  84. "1.0.0/ 2.0.2/\n",
  85. "no participants.tsv file\n",
  86. "ds000108\n",
  87. "1.0.1/ 2.0.0/\n",
  88. "{'participant_count': 34, 'below_18': 0, 'above_18': 34, 'female_count': 17, 'male_count': 17}\n",
  89. "ds000110\n",
  90. "1.0.0/ 2.0.1/\n",
  91. "{'participant_count': 18, 'below_18': 0, 'above_18': 18, 'female_count': 9, 'male_count': 9}\n",
  92. "ds000113b\n",
  93. "2.0.0/ 2.0.1/\n",
  94. "no participants.tsv file\n",
  95. "ds000113c\n",
  96. "1.0.0/ 1.0.0/\n",
  97. "{'participant_count': 7, 'below_18': 0, 'above_18': 7, 'female_count': 2, 'male_count': 5}\n",
  98. "ds000113d\n",
  99. "1.0.0/ 2.0.0/\n",
  100. "{'participant_count': 30, 'below_18': 2, 'above_18': 28, 'female_count': 16, 'male_count': 14}\n",
  101. "ds000114\n",
  102. "2.0.0/ 2.0.1/\n",
  103. "no participants.tsv file\n",
  104. "ds000115\n",
  105. "1.0.2/ 2.0.0/\n",
  106. "{'participant_count': 99, 'below_18': 12, 'above_18': 87, 'female_count': 40, 'male_count': 59}\n",
  107. "ds000116\n",
  108. "1.0.0/ 2.0.0/\n",
  109. "{'participant_count': 17, 'below_18': 0, 'above_18': 17, 'female_count': 6, 'male_count': 11}\n",
  110. "ds000117\n",
  111. "0.1.0/ 1.0.0/\n",
  112. "{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 7, 'male_count': 9}\n",
  113. "ds000119\n",
  114. "1.0.0/ 2.0.1/\n",
  115. "{'participant_count': 73, 'below_18': 46, 'above_18': 27, 'female_count': 43, 'male_count': 30}\n",
  116. "ds000120\n",
  117. "1.0.0/ 1.0.1/\n",
  118. "{'participant_count': 26, 'below_18': 16, 'above_18': 10, 'female_count': 15, 'male_count': 11}\n",
  119. "ds000121\n",
  120. "1.0.0/ 2.0.2/\n",
  121. "{'participant_count': 28, 'below_18': 12, 'above_18': 16, 'female_count': 16, 'male_count': 12}\n",
  122. "ds000122\n",
  123. "1.0.0/ 1.0.0/\n",
  124. "{'participant_count': 17, 'below_18': 0, 'above_18': 17, 'female_count': 8, 'male_count': 9}\n",
  125. "ds000133\n",
  126. "1.0.0/ 1.0.0/\n",
  127. "Index(['participant_id', 'study_group', 'handedness', 'gender'], dtype='object')\n",
  128. "{'participant_count': 26}\n",
  129. "ds000138\n",
  130. "1.0.0/ 2.0.0/\n",
  131. "{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 7, 'male_count': 9}\n",
  132. "ds000140\n",
  133. "2.0.0/ 2.0.0/\n",
  134. "{'participant_count': 33, 'below_18': 0, 'above_18': 33, 'female_count': 22, 'male_count': 11}\n",
  135. "ds000144\n",
  136. "{'participant_count': 45, 'below_18': 45, 'above_18': 0, 'female_count': 29, 'male_count': 16}\n",
  137. "ds000148\n",
  138. "1.0.0/ 1.0.0/\n",
  139. "{'participant_count': 49, 'below_18': 0, 'above_18': 49, 'female_count': 26, 'male_count': 23}\n",
  140. "ds000157\n",
  141. "1.0.0/ 1.0.3/\n",
  142. "{'participant_count': 30, 'below_18': 0, 'above_18': 30, 'female_count': 30, 'male_count': 0}\n",
  143. "ds000158\n",
  144. "1.0.0/ 1.0.1/\n",
  145. "no participants.tsv file\n",
  146. "ds000164\n",
  147. "1.0.0/ 1.0.1/\n",
  148. "no participants.tsv file\n",
  149. "ds000168\n",
  150. "1.0.0/ 1.0.1/\n",
  151. "{'participant_count': 35, 'below_18': 0, 'above_18': 25, 'female_count': 15, 'male_count': 20}\n",
  152. "ds000170\n",
  153. "1.0.0/ 1.0.1/\n",
  154. "{'participant_count': 15, 'below_18': 0, 'above_18': 15, 'female_count': 3, 'male_count': 12}\n",
  155. "ds000171\n",
  156. "1.0.0/ 1.0.0/\n",
  157. "{'participant_count': 39, 'below_18': 0, 'above_18': 39, 'female_count': 22, 'male_count': 17}\n",
  158. "ds000172\n",
  159. "1.0.0/ 1.0.1/\n",
  160. "{'participant_count': 13, 'below_18': 0, 'above_18': 13, 'female_count': 7, 'male_count': 6}\n",
  161. "ds000174\n",
  162. "1.0.0/ 1.0.0/\n",
  163. "{'participant_count': 42, 'below_18': 2, 'above_18': 40, 'female_count': 13, 'male_count': 29}\n",
  164. "ds000177\n",
  165. "1.0.0/ 1.0.1/\n",
  166. "{'participant_count': 11, 'below_18': 0, 'above_18': 11, 'female_count': 4, 'male_count': 7}\n",
  167. "ds000200\n",
  168. "1.0.0/ 1.0.0/\n",
  169. "{'participant_count': 14, 'below_18': 12, 'above_18': 2, 'female_count': 6, 'male_count': 8}\n",
  170. "ds000201\n",
  171. "1.0.0/ 1.0.5/\n",
  172. "{'participant_count': 86, 'below_18': 0, 'above_18': 86, 'female_count': 44, 'male_count': 42}\n",
  173. "ds000202\n",
  174. "{'participant_count': 95, 'below_18': 0, 'above_18': 95, 'female_count': 95, 'male_count': 0}\n",
  175. "ds000203\n",
  176. "1.0.0/ 1.0.2/\n",
  177. "{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 10, 'male_count': 16}\n",
  178. "ds000204\n",
  179. "1.0.0/ 1.0.2/\n",
  180. "no participants.tsv file\n",
  181. "ds000205\n",
  182. "1.0.0/ 1.0.0/\n",
  183. "Index(['participant_id', 'handedness'], dtype='object')\n",
  184. "{'participant_count': 11}\n",
  185. "ds000206\n",
  186. "1.0.0/ 1.0.0/\n",
  187. "no participants.tsv file\n",
  188. "ds000208\n",
  189. "1.0.0/ 1.0.0/\n",
  190. "{'participant_count': 76, 'below_18': 0, 'above_18': 76, 'female_count': 40, 'male_count': 36}\n",
  191. "ds000210\n",
  192. "1.0.0/ 1.0.1/\n",
  193. "{'participant_count': 31, 'below_18': 0, 'above_18': 31, 'female_count': 16, 'male_count': 15}\n",
  194. "ds000212\n",
  195. "1.0.0/ 1.0.1/\n",
  196. "{'participant_count': 39, 'below_18': 0, 'above_18': 39, 'female_count': 10, 'male_count': 29}\n",
  197. "ds000213\n",
  198. "1.0.0/ 1.0.2/\n",
  199. "Index(['participant_id', 'gender', 'group'], dtype='object')\n",
  200. "{'participant_count': 26}\n",
  201. "ds000214\n",
  202. "1.0.0/ 1.0.0/\n",
  203. "{'participant_count': 36, 'below_18': 0, 'above_18': 36, 'female_count': 30, 'male_count': 6}\n",
  204. "ds000216\n",
  205. "1.0.0/ 1.0.1/\n",
  206. "no participants.tsv file\n",
  207. "ds000217\n",
  208. "1.0.1/ 1.0.1/\n",
  209. "{'participant_count': 41, 'below_18': 0, 'above_18': 41, 'female_count': 23, 'male_count': 18}\n",
  210. "ds000218\n",
  211. "1.0.0/ 1.0.1/\n",
  212. "{'participant_count': 19, 'below_18': 0, 'above_18': 19, 'female_count': 0, 'male_count': 19}\n",
  213. "ds000219\n",
  214. "1.0.0/ 1.0.0/\n",
  215. "{'participant_count': 21, 'below_18': 0, 'above_18': 21, 'female_count': 0, 'male_count': 21}\n",
  216. "ds000220\n",
  217. "1.0.0/ 1.0.0/\n",
  218. "{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 13, 'male_count': 13}\n",
  219. "ds000221\n",
  220. "1.0.0/ 1.0.0/\n",
  221. "{'participant_count': 318, 'below_18': 0, 'above_18': 316, 'female_count': 129, 'male_count': 189}\n",
  222. "ds000222\n",
  223. "1.0.0/ 1.0.0/\n",
  224. "{'participant_count': 79, 'below_18': 0, 'above_18': 79, 'female_count': 41, 'male_count': 38}\n",
  225. "ds000223\n",
  226. "1.0.0/ 2.0.0/\n",
  227. "Index(['participant_id'], dtype='object')\n",
  228. "{'participant_count': 19}\n",
  229. "ds000224\n",
  230. "1.0.0/ 1.0.2/\n",
  231. "{'participant_count': 10, 'below_18': 0, 'above_18': 10, 'female_count': 5, 'male_count': 5}\n",
  232. "ds000228\n",
  233. "1.0.0/ 1.0.1/\n",
  234. "{'participant_count': 155, 'below_18': 122, 'above_18': 33, 'female_count': 84, 'male_count': 71}\n",
  235. "ds000229\n",
  236. "1.0.0/ 1.0.0/\n",
  237. "{'participant_count': 15, 'below_18': 0, 'above_18': 15, 'female_count': 7, 'male_count': 8}\n",
  238. "ds000231\n",
  239. "1.0.0/ 1.0.0/\n",
  240. "{'participant_count': 9, 'below_18': 0, 'above_18': 9, 'female_count': 7, 'male_count': 2}\n",
  241. "ds000232\n",
  242. "1.0.0/ 1.0.1/\n",
  243. "{'participant_count': 10, 'below_18': 0, 'above_18': 10, 'female_count': 7, 'male_count': 3}\n",
  244. "ds000233\n",
  245. "1.0.0/ 1.0.0/\n",
  246. "{'participant_count': 12, 'below_18': 0, 'above_18': 12, 'female_count': 7, 'male_count': 5}\n",
  247. "ds000234\n",
  248. "1.0.0/ 1.0.0/\n",
  249. "{'participant_count': 5, 'below_18': 0, 'above_18': 5, 'female_count': 1, 'male_count': 4}\n",
  250. "ds000235\n",
  251. "1.0.0/ 1.0.0/\n",
  252. "{'participant_count': 4, 'below_18': 0, 'above_18': 4, 'female_count': 2, 'male_count': 2}\n",
  253. "ds000236\n",
  254. "1.0.0/ 1.0.0/\n",
  255. "{'participant_count': 18, 'below_18': 0, 'above_18': 18, 'female_count': 12, 'male_count': 6}\n",
  256. "ds000237\n",
  257. "no participants.tsv file\n",
  258. "ds000238\n",
  259. "1.0.0/ 1.0.1/\n",
  260. "{'participant_count': 35, 'below_18': 0, 'above_18': 35, 'female_count': 17, 'male_count': 18}\n",
  261. "ds000239\n",
  262. "1.0.0/ 1.0.1/\n",
  263. "{'participant_count': 3, 'below_18': 0, 'above_18': 3, 'female_count': 1, 'male_count': 2}\n",
  264. "ds000240\n",
  265. "1.0.0/ 1.0.0/\n",
  266. "{'participant_count': 63, 'below_18': 0, 'above_18': 63, 'female_count': 35, 'male_count': 28}\n",
  267. "ds000241\n",
  268. "1.0.0/ 1.0.0/\n"
  269. ]
  270. },
  271. {
  272. "name": "stdout",
  273. "output_type": "stream",
  274. "text": [
  275. "no participants.tsv file\n",
  276. "ds000243\n",
  277. "1.0.0/ 1.0.0/\n",
  278. "{'participant_count': 120, 'below_18': 0, 'above_18': 120, 'female_count': 61, 'male_count': 59}\n",
  279. "ds000244\n",
  280. "{'participant_count': 12, 'below_18': 0, 'above_18': 12, 'female_count': 9, 'male_count': 3}\n",
  281. "ds000245\n",
  282. "1.0.0/ 1.0.0/\n",
  283. "{'participant_count': 45, 'below_18': 0, 'above_18': 45, 'female_count': 25, 'male_count': 20}\n",
  284. "ds000246\n",
  285. "1.0.0/ 1.0.0/\n",
  286. "{'participant_count': 1, 'below_18': 0, 'above_18': 1, 'female_count': 0, 'male_count': 1}\n",
  287. "ds000247\n",
  288. "1.0.0/ 1.0.0/\n",
  289. "{'participant_count': 6, 'below_18': 0, 'above_18': 5, 'female_count': 2, 'male_count': 3}\n",
  290. "ds000248\n",
  291. "no participants.tsv file\n",
  292. "ds000249\n",
  293. "1.0.0/ 1.0.0/\n",
  294. "{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 13, 'male_count': 13}\n",
  295. "ds000253\n",
  296. "1.0.0/ 1.0.0/\n",
  297. "{'participant_count': 20, 'below_18': 0, 'above_18': 20, 'female_count': 20, 'male_count': 0}\n",
  298. "ds000254\n",
  299. "1.0.0/ 1.0.0/\n",
  300. "{'participant_count': 13, 'below_18': 0, 'above_18': 13, 'female_count': 7, 'male_count': 6}\n",
  301. "ds000255\n",
  302. "1.0.0/ 1.0.0/\n",
  303. "no participants.tsv file\n",
  304. "ds000256\n",
  305. "1.0.0/ 1.0.0/\n",
  306. "{'participant_count': 24, 'below_18': 24, 'above_18': 0, 'female_count': 10, 'male_count': 14}\n",
  307. "ds000258\n",
  308. "1.0.0/ 1.0.0/\n",
  309. "no participants.tsv file\n"
  310. ]
  311. }
  312. ],
  313. "source": [
  314. "def sanitize(x):\n",
  315. " if pd.isnull(x):\n",
  316. " return None\n",
  317. " if '-' in x:\n",
  318. " return float(x.split('-')[0])\n",
  319. " elif '+' in x:\n",
  320. " return (float(x.replace('+','')))\n",
  321. " if x == 'Young':\n",
  322. " return 20\n",
  323. " elif x == 'Old':\n",
  324. " return 65\n",
  325. " else:\n",
  326. " return float(x)\n",
  327. "\n",
  328. "swap_age_sex = ['ds000119', 'ds000121']\n",
  329. "no_header = ['ds000109']\n",
  330. "skip = ['ds000006A', 'ds000113', 'ds000149', 'ds000109']\n",
  331. "ds_meta = {}\n",
  332. "for k in s3.list_objects(Bucket = 'openneuro', Prefix='', Delimiter='/')['CommonPrefixes']:\n",
  333. " accession_number = k['Prefix'][0:-1]\n",
  334. " if accession_number in skip:\n",
  335. " continue\n",
  336. " print(accession_number)\n",
  337. " ds_meta[accession_number] = {}\n",
  338. " valid_versions = [k for k in s3.list_objects(Bucket = 'openneuro', Prefix=k['Prefix'], Delimiter='/')['CommonPrefixes'] if '_R' in k['Prefix']]\n",
  339. " if accession_number == 'ds000202':\n",
  340. " Key_prefix = 'ds000202/ds000202_R1.0.2/uncompressed/ds202_R1.0.2/'\n",
  341. " elif accession_number == 'ds000248':\n",
  342. " Key_prefix = 'ds000248/ds000248_R1.0.0/ds000248_R1.0.0/uncompressed/'\n",
  343. " elif accession_number == 'ds000051':\n",
  344. " Key_prefix = 'ds000051/ds000051_R2.0.2/uncompressed/ds000051_R2.0.2/'\n",
  345. " elif valid_versions:\n",
  346. " lv = [LooseVersion(v['Prefix'].split('_R')[-1]) for v in valid_versions]\n",
  347. " lv.sort()\n",
  348. " print(str(lv[0]), str(lv[-1]))\n",
  349. " Key_prefix=accession_number + '/' + accession_number +'_R' + str(lv[-1]) + 'uncompressed/'\n",
  350. " else:\n",
  351. " Key_prefix=accession_number + '/uncompressed/'\n",
  352. " \n",
  353. " participant_count = len(s3.list_objects(Bucket='openneuro', Prefix=Key_prefix + 'sub-', Delimiter=\"/\")['CommonPrefixes'])\n",
  354. " ds_meta[accession_number]['participant_count'] = participant_count\n",
  355. " try:\n",
  356. " obj = s3.get_object(Bucket='openneuro', Key=Key_prefix + 'participants.tsv')\n",
  357. " except:\n",
  358. " print(\"no participants.tsv file\")\n",
  359. " else:\n",
  360. " no_age = False\n",
  361. " no_sex = False\n",
  362. " if accession_number in no_header:\n",
  363. " args = {'names': ['participant_id', 'sex', 'age'], 'header':None, 'delimiter':r\"\\s+\"}\n",
  364. " else:\n",
  365. " args = {'delimiter':'\\t'}\n",
  366. "\n",
  367. " df = pd.read_csv(io.BytesIO(obj['Body'].read()), na_values=['n/a', 'D'], **args)\n",
  368. " if accession_number in swap_age_sex:\n",
  369. " #print(df.sex)\n",
  370. " age = df.sex\n",
  371. " elif 'age' in df.columns:\n",
  372. " #print(df.age)\n",
  373. " age = df.age\n",
  374. " elif 'Age' in df.columns:\n",
  375. " #print(df.Age)\n",
  376. " age = df.Age\n",
  377. " elif 'ageAtFirstScanYears' in df.columns:\n",
  378. " age = df.ageAtFirstScanYears\n",
  379. " elif 'age at baseline ' in df.columns:\n",
  380. " age = df['age at baseline ']\n",
  381. " elif 'AgeGroup' in df.columns:\n",
  382. " age = df.AgeGroup\n",
  383. " elif 'age (5-year bins)' in df.columns:\n",
  384. " age = df['age (5-year bins)']\n",
  385. " elif 'ScanAge' in df.columns:\n",
  386. " age = df.ScanAge\n",
  387. " elif 'Age (years)' in df.columns:\n",
  388. " age = df['Age (years)']\n",
  389. " else:\n",
  390. " no_age = True\n",
  391. "\n",
  392. " if not no_age:\n",
  393. " if age.dtype == 'O':\n",
  394. " age = age.apply(sanitize)\n",
  395. " ds_meta[accession_number]['below_18'] = (age<18).sum()\n",
  396. " ds_meta[accession_number]['above_18'] = (age>=18).sum()\n",
  397. "\n",
  398. " if accession_number in swap_age_sex:\n",
  399. " #print(df.sex)\n",
  400. " sex = df.age\n",
  401. " elif 'sex' in df.columns:\n",
  402. " #print(df.age)\n",
  403. " sex = df.sex\n",
  404. " elif 'Sex' in df.columns:\n",
  405. " #print(df.age)\n",
  406. " sex = df.Sex\n",
  407. " elif 'gender' in df.columns:\n",
  408. " sex = df.gender\n",
  409. " elif 'Gender' in df.columns:\n",
  410. " sex = df.Gender\n",
  411. " elif 'jsex' in df.columns:\n",
  412. " sex = df.jsex\n",
  413. " elif 'gender_F' in df.columns:\n",
  414. " sex = df.gender_F\n",
  415. " else:\n",
  416. " no_sex = True\n",
  417. "\n",
  418. " if accession_number == 'ds000203':\n",
  419. " sex = sex.apply(lambda x: {1:'M', 2:'F'}[x])\n",
  420. " if accession_number == 'ds000249':\n",
  421. " sex = sex.apply(lambda x: {1:'F', 0:'M'}[x]) \n",
  422. " else:\n",
  423. " sex = sex.apply(lambda x: {'M':'M', 'F':'F', 'M,':'M', 'm':'M', 'f':'F', \n",
  424. " 'MALE':'M', 'FEMALE':'F', 'male':'M', 'female':'F',\n",
  425. " 'Male':'M', 'Female':'F', 'm':'M', 'f':'F',\n",
  426. " 'M ':'M', 'F ':'F', np.nan: np.nan}[x])\n",
  427. "\n",
  428. " if not no_age:\n",
  429. " if set(sex.unique()).intersection(set(['M', 'F', np.nan])) != set(sex.unique()):\n",
  430. " print(sex.unique())\n",
  431. " break\n",
  432. " ds_meta[accession_number]['female_count'] = (sex == 'F').sum()\n",
  433. " ds_meta[accession_number]['male_count'] = (sex == 'M').sum()\n",
  434. "\n",
  435. " if no_age or no_sex:\n",
  436. " print(df.columns)\n",
  437. "\n",
  438. " print(ds_meta[accession_number])"
  439. ]
  440. },
  441. {
  442. "cell_type": "code",
  443. "execution_count": 20,
  444. "metadata": {},
  445. "outputs": [
  446. {
  447. "data": {
  448. "text/html": [
  449. "<div>\n",
  450. "<style scoped>\n",
  451. " .dataframe tbody tr th:only-of-type {\n",
  452. " vertical-align: middle;\n",
  453. " }\n",
  454. "\n",
  455. " .dataframe tbody tr th {\n",
  456. " vertical-align: top;\n",
  457. " }\n",
  458. "\n",
  459. " .dataframe thead th {\n",
  460. " text-align: right;\n",
  461. " }\n",
  462. "</style>\n",
  463. "<table border=\"1\" class=\"dataframe\">\n",
  464. " <thead>\n",
  465. " <tr style=\"text-align: right;\">\n",
  466. " <th></th>\n",
  467. " <th>participant_count</th>\n",
  468. " <th>below_18</th>\n",
  469. " <th>above_18</th>\n",
  470. " <th>female_count</th>\n",
  471. " <th>male_count</th>\n",
  472. " <th>missing_age</th>\n",
  473. " <th>missing_sex</th>\n",
  474. " </tr>\n",
  475. " </thead>\n",
  476. " <tbody>\n",
  477. " <tr>\n",
  478. " <th>ds000001</th>\n",
  479. " <td>16</td>\n",
  480. " <td>0.0</td>\n",
  481. " <td>16.0</td>\n",
  482. " <td>10.0</td>\n",
  483. " <td>6.0</td>\n",
  484. " <td>0.0</td>\n",
  485. " <td>0.0</td>\n",
  486. " </tr>\n",
  487. " <tr>\n",
  488. " <th>ds000002</th>\n",
  489. " <td>17</td>\n",
  490. " <td>0.0</td>\n",
  491. " <td>17.0</td>\n",
  492. " <td>10.0</td>\n",
  493. " <td>7.0</td>\n",
  494. " <td>0.0</td>\n",
  495. " <td>0.0</td>\n",
  496. " </tr>\n",
  497. " <tr>\n",
  498. " <th>ds000003</th>\n",
  499. " <td>13</td>\n",
  500. " <td>0.0</td>\n",
  501. " <td>13.0</td>\n",
  502. " <td>5.0</td>\n",
  503. " <td>8.0</td>\n",
  504. " <td>0.0</td>\n",
  505. " <td>0.0</td>\n",
  506. " </tr>\n",
  507. " <tr>\n",
  508. " <th>ds000005</th>\n",
  509. " <td>16</td>\n",
  510. " <td>0.0</td>\n",
  511. " <td>16.0</td>\n",
  512. " <td>8.0</td>\n",
  513. " <td>8.0</td>\n",
  514. " <td>0.0</td>\n",
  515. " <td>0.0</td>\n",
  516. " </tr>\n",
  517. " <tr>\n",
  518. " <th>ds000006</th>\n",
  519. " <td>14</td>\n",
  520. " <td>0.0</td>\n",
  521. " <td>14.0</td>\n",
  522. " <td>9.0</td>\n",
  523. " <td>5.0</td>\n",
  524. " <td>0.0</td>\n",
  525. " <td>0.0</td>\n",
  526. " </tr>\n",
  527. " <tr>\n",
  528. " <th>ds000007</th>\n",
  529. " <td>20</td>\n",
  530. " <td>0.0</td>\n",
  531. " <td>20.0</td>\n",
  532. " <td>9.0</td>\n",
  533. " <td>11.0</td>\n",
  534. " <td>0.0</td>\n",
  535. " <td>0.0</td>\n",
  536. " </tr>\n",
  537. " <tr>\n",
  538. " <th>ds000008</th>\n",
  539. " <td>14</td>\n",
  540. " <td>0.0</td>\n",
  541. " <td>14.0</td>\n",
  542. " <td>4.0</td>\n",
  543. " <td>10.0</td>\n",
  544. " <td>0.0</td>\n",
  545. " <td>0.0</td>\n",
  546. " </tr>\n",
  547. " <tr>\n",
  548. " <th>ds000009</th>\n",
  549. " <td>24</td>\n",
  550. " <td>0.0</td>\n",
  551. " <td>24.0</td>\n",
  552. " <td>10.0</td>\n",
  553. " <td>14.0</td>\n",
  554. " <td>0.0</td>\n",
  555. " <td>0.0</td>\n",
  556. " </tr>\n",
  557. " <tr>\n",
  558. " <th>ds000011</th>\n",
  559. " <td>14</td>\n",
  560. " <td>0.0</td>\n",
  561. " <td>14.0</td>\n",
  562. " <td>9.0</td>\n",
  563. " <td>5.0</td>\n",
  564. " <td>0.0</td>\n",
  565. " <td>0.0</td>\n",
  566. " </tr>\n",
  567. " <tr>\n",
  568. " <th>ds000017</th>\n",
  569. " <td>8</td>\n",
  570. " <td>0.0</td>\n",
  571. " <td>5.0</td>\n",
  572. " <td>1.0</td>\n",
  573. " <td>4.0</td>\n",
  574. " <td>3.0</td>\n",
  575. " <td>3.0</td>\n",
  576. " </tr>\n",
  577. " <tr>\n",
  578. " <th>ds000030</th>\n",
  579. " <td>272</td>\n",
  580. " <td>0.0</td>\n",
  581. " <td>272.0</td>\n",
  582. " <td>117.0</td>\n",
  583. " <td>155.0</td>\n",
  584. " <td>0.0</td>\n",
  585. " <td>0.0</td>\n",
  586. " </tr>\n",
  587. " <tr>\n",
  588. " <th>ds000031</th>\n",
  589. " <td>1</td>\n",
  590. " <td>NaN</td>\n",
  591. " <td>NaN</td>\n",
  592. " <td>NaN</td>\n",
  593. " <td>NaN</td>\n",
  594. " <td>1.0</td>\n",
  595. " <td>1.0</td>\n",
  596. " </tr>\n",
  597. " <tr>\n",
  598. " <th>ds000051</th>\n",
  599. " <td>13</td>\n",
  600. " <td>NaN</td>\n",
  601. " <td>NaN</td>\n",
  602. " <td>NaN</td>\n",
  603. " <td>NaN</td>\n",
  604. " <td>13.0</td>\n",
  605. " <td>13.0</td>\n",
  606. " </tr>\n",
  607. " <tr>\n",
  608. " <th>ds000052</th>\n",
  609. " <td>13</td>\n",
  610. " <td>NaN</td>\n",
  611. " <td>NaN</td>\n",
  612. " <td>NaN</td>\n",
  613. " <td>NaN</td>\n",
  614. " <td>13.0</td>\n",
  615. " <td>13.0</td>\n",
  616. " </tr>\n",
  617. " <tr>\n",
  618. " <th>ds000053</th>\n",
  619. " <td>59</td>\n",
  620. " <td>0.0</td>\n",
  621. " <td>59.0</td>\n",
  622. " <td>31.0</td>\n",
  623. " <td>28.0</td>\n",
  624. " <td>0.0</td>\n",
  625. " <td>0.0</td>\n",
  626. " </tr>\n",
  627. " <tr>\n",
  628. " <th>ds000101</th>\n",
  629. " <td>21</td>\n",
  630. " <td>0.0</td>\n",
  631. " <td>21.0</td>\n",
  632. " <td>9.0</td>\n",
  633. " <td>12.0</td>\n",
  634. " <td>0.0</td>\n",
  635. " <td>0.0</td>\n",
  636. " </tr>\n",
  637. " <tr>\n",
  638. " <th>ds000102</th>\n",
  639. " <td>26</td>\n",
  640. " <td>0.0</td>\n",
  641. " <td>26.0</td>\n",
  642. " <td>1.0</td>\n",
  643. " <td>16.0</td>\n",
  644. " <td>0.0</td>\n",
  645. " <td>9.0</td>\n",
  646. " </tr>\n",
  647. " <tr>\n",
  648. " <th>ds000105</th>\n",
  649. " <td>6</td>\n",
  650. " <td>NaN</td>\n",
  651. " <td>NaN</td>\n",
  652. " <td>NaN</td>\n",
  653. " <td>NaN</td>\n",
  654. " <td>6.0</td>\n",
  655. " <td>6.0</td>\n",
  656. " </tr>\n",
  657. " <tr>\n",
  658. " <th>ds000107</th>\n",
  659. " <td>49</td>\n",
  660. " <td>NaN</td>\n",
  661. " <td>NaN</td>\n",
  662. " <td>NaN</td>\n",
  663. " <td>NaN</td>\n",
  664. " <td>49.0</td>\n",
  665. " <td>49.0</td>\n",
  666. " </tr>\n",
  667. " <tr>\n",
  668. " <th>ds000108</th>\n",
  669. " <td>34</td>\n",
  670. " <td>0.0</td>\n",
  671. " <td>34.0</td>\n",
  672. " <td>17.0</td>\n",
  673. " <td>17.0</td>\n",
  674. " <td>0.0</td>\n",
  675. " <td>0.0</td>\n",
  676. " </tr>\n",
  677. " <tr>\n",
  678. " <th>ds000110</th>\n",
  679. " <td>18</td>\n",
  680. " <td>0.0</td>\n",
  681. " <td>18.0</td>\n",
  682. " <td>9.0</td>\n",
  683. " <td>9.0</td>\n",
  684. " <td>0.0</td>\n",
  685. " <td>0.0</td>\n",
  686. " </tr>\n",
  687. " <tr>\n",
  688. " <th>ds000113b</th>\n",
  689. " <td>16</td>\n",
  690. " <td>NaN</td>\n",
  691. " <td>NaN</td>\n",
  692. " <td>NaN</td>\n",
  693. " <td>NaN</td>\n",
  694. " <td>16.0</td>\n",
  695. " <td>16.0</td>\n",
  696. " </tr>\n",
  697. " <tr>\n",
  698. " <th>ds000113c</th>\n",
  699. " <td>7</td>\n",
  700. " <td>0.0</td>\n",
  701. " <td>7.0</td>\n",
  702. " <td>2.0</td>\n",
  703. " <td>5.0</td>\n",
  704. " <td>0.0</td>\n",
  705. " <td>0.0</td>\n",
  706. " </tr>\n",
  707. " <tr>\n",
  708. " <th>ds000113d</th>\n",
  709. " <td>30</td>\n",
  710. " <td>2.0</td>\n",
  711. " <td>28.0</td>\n",
  712. " <td>16.0</td>\n",
  713. " <td>14.0</td>\n",
  714. " <td>0.0</td>\n",
  715. " <td>0.0</td>\n",
  716. " </tr>\n",
  717. " <tr>\n",
  718. " <th>ds000114</th>\n",
  719. " <td>10</td>\n",
  720. " <td>NaN</td>\n",
  721. " <td>NaN</td>\n",
  722. " <td>NaN</td>\n",
  723. " <td>NaN</td>\n",
  724. " <td>10.0</td>\n",
  725. " <td>10.0</td>\n",
  726. " </tr>\n",
  727. " <tr>\n",
  728. " <th>ds000115</th>\n",
  729. " <td>99</td>\n",
  730. " <td>12.0</td>\n",
  731. " <td>87.0</td>\n",
  732. " <td>40.0</td>\n",
  733. " <td>59.0</td>\n",
  734. " <td>0.0</td>\n",
  735. " <td>0.0</td>\n",
  736. " </tr>\n",
  737. " <tr>\n",
  738. " <th>ds000116</th>\n",
  739. " <td>17</td>\n",
  740. " <td>0.0</td>\n",
  741. " <td>17.0</td>\n",
  742. " <td>6.0</td>\n",
  743. " <td>11.0</td>\n",
  744. " <td>0.0</td>\n",
  745. " <td>0.0</td>\n",
  746. " </tr>\n",
  747. " <tr>\n",
  748. " <th>ds000117</th>\n",
  749. " <td>16</td>\n",
  750. " <td>0.0</td>\n",
  751. " <td>16.0</td>\n",
  752. " <td>7.0</td>\n",
  753. " <td>9.0</td>\n",
  754. " <td>0.0</td>\n",
  755. " <td>0.0</td>\n",
  756. " </tr>\n",
  757. " <tr>\n",
  758. " <th>ds000119</th>\n",
  759. " <td>73</td>\n",
  760. " <td>46.0</td>\n",
  761. " <td>27.0</td>\n",
  762. " <td>43.0</td>\n",
  763. " <td>30.0</td>\n",
  764. " <td>0.0</td>\n",
  765. " <td>0.0</td>\n",
  766. " </tr>\n",
  767. " <tr>\n",
  768. " <th>ds000120</th>\n",
  769. " <td>26</td>\n",
  770. " <td>16.0</td>\n",
  771. " <td>10.0</td>\n",
  772. " <td>15.0</td>\n",
  773. " <td>11.0</td>\n",
  774. " <td>0.0</td>\n",
  775. " <td>0.0</td>\n",
  776. " </tr>\n",
  777. " <tr>\n",
  778. " <th>...</th>\n",
  779. " <td>...</td>\n",
  780. " <td>...</td>\n",
  781. " <td>...</td>\n",
  782. " <td>...</td>\n",
  783. " <td>...</td>\n",
  784. " <td>...</td>\n",
  785. " <td>...</td>\n",
  786. " </tr>\n",
  787. " <tr>\n",
  788. " <th>ds000220</th>\n",
  789. " <td>26</td>\n",
  790. " <td>0.0</td>\n",
  791. " <td>26.0</td>\n",
  792. " <td>13.0</td>\n",
  793. " <td>13.0</td>\n",
  794. " <td>0.0</td>\n",
  795. " <td>0.0</td>\n",
  796. " </tr>\n",
  797. " <tr>\n",
  798. " <th>ds000221</th>\n",
  799. " <td>318</td>\n",
  800. " <td>0.0</td>\n",
  801. " <td>316.0</td>\n",
  802. " <td>129.0</td>\n",
  803. " <td>189.0</td>\n",
  804. " <td>2.0</td>\n",
  805. " <td>0.0</td>\n",
  806. " </tr>\n",
  807. " <tr>\n",
  808. " <th>ds000222</th>\n",
  809. " <td>79</td>\n",
  810. " <td>0.0</td>\n",
  811. " <td>79.0</td>\n",
  812. " <td>41.0</td>\n",
  813. " <td>38.0</td>\n",
  814. " <td>0.0</td>\n",
  815. " <td>0.0</td>\n",
  816. " </tr>\n",
  817. " <tr>\n",
  818. " <th>ds000223</th>\n",
  819. " <td>19</td>\n",
  820. " <td>NaN</td>\n",
  821. " <td>NaN</td>\n",
  822. " <td>NaN</td>\n",
  823. " <td>NaN</td>\n",
  824. " <td>19.0</td>\n",
  825. " <td>19.0</td>\n",
  826. " </tr>\n",
  827. " <tr>\n",
  828. " <th>ds000224</th>\n",
  829. " <td>10</td>\n",
  830. " <td>0.0</td>\n",
  831. " <td>10.0</td>\n",
  832. " <td>5.0</td>\n",
  833. " <td>5.0</td>\n",
  834. " <td>0.0</td>\n",
  835. " <td>0.0</td>\n",
  836. " </tr>\n",
  837. " <tr>\n",
  838. " <th>ds000228</th>\n",
  839. " <td>155</td>\n",
  840. " <td>122.0</td>\n",
  841. " <td>33.0</td>\n",
  842. " <td>84.0</td>\n",
  843. " <td>71.0</td>\n",
  844. " <td>0.0</td>\n",
  845. " <td>0.0</td>\n",
  846. " </tr>\n",
  847. " <tr>\n",
  848. " <th>ds000229</th>\n",
  849. " <td>15</td>\n",
  850. " <td>0.0</td>\n",
  851. " <td>15.0</td>\n",
  852. " <td>7.0</td>\n",
  853. " <td>8.0</td>\n",
  854. " <td>0.0</td>\n",
  855. " <td>0.0</td>\n",
  856. " </tr>\n",
  857. " <tr>\n",
  858. " <th>ds000231</th>\n",
  859. " <td>9</td>\n",
  860. " <td>0.0</td>\n",
  861. " <td>9.0</td>\n",
  862. " <td>7.0</td>\n",
  863. " <td>2.0</td>\n",
  864. " <td>0.0</td>\n",
  865. " <td>0.0</td>\n",
  866. " </tr>\n",
  867. " <tr>\n",
  868. " <th>ds000232</th>\n",
  869. " <td>10</td>\n",
  870. " <td>0.0</td>\n",
  871. " <td>10.0</td>\n",
  872. " <td>7.0</td>\n",
  873. " <td>3.0</td>\n",
  874. " <td>0.0</td>\n",
  875. " <td>0.0</td>\n",
  876. " </tr>\n",
  877. " <tr>\n",
  878. " <th>ds000233</th>\n",
  879. " <td>12</td>\n",
  880. " <td>0.0</td>\n",
  881. " <td>12.0</td>\n",
  882. " <td>7.0</td>\n",
  883. " <td>5.0</td>\n",
  884. " <td>0.0</td>\n",
  885. " <td>0.0</td>\n",
  886. " </tr>\n",
  887. " <tr>\n",
  888. " <th>ds000234</th>\n",
  889. " <td>5</td>\n",
  890. " <td>0.0</td>\n",
  891. " <td>5.0</td>\n",
  892. " <td>1.0</td>\n",
  893. " <td>4.0</td>\n",
  894. " <td>0.0</td>\n",
  895. " <td>0.0</td>\n",
  896. " </tr>\n",
  897. " <tr>\n",
  898. " <th>ds000235</th>\n",
  899. " <td>4</td>\n",
  900. " <td>0.0</td>\n",
  901. " <td>4.0</td>\n",
  902. " <td>2.0</td>\n",
  903. " <td>2.0</td>\n",
  904. " <td>0.0</td>\n",
  905. " <td>0.0</td>\n",
  906. " </tr>\n",
  907. " <tr>\n",
  908. " <th>ds000236</th>\n",
  909. " <td>18</td>\n",
  910. " <td>0.0</td>\n",
  911. " <td>18.0</td>\n",
  912. " <td>12.0</td>\n",
  913. " <td>6.0</td>\n",
  914. " <td>0.0</td>\n",
  915. " <td>0.0</td>\n",
  916. " </tr>\n",
  917. " <tr>\n",
  918. " <th>ds000237</th>\n",
  919. " <td>13</td>\n",
  920. " <td>NaN</td>\n",
  921. " <td>NaN</td>\n",
  922. " <td>NaN</td>\n",
  923. " <td>NaN</td>\n",
  924. " <td>13.0</td>\n",
  925. " <td>13.0</td>\n",
  926. " </tr>\n",
  927. " <tr>\n",
  928. " <th>ds000238</th>\n",
  929. " <td>35</td>\n",
  930. " <td>0.0</td>\n",
  931. " <td>35.0</td>\n",
  932. " <td>17.0</td>\n",
  933. " <td>18.0</td>\n",
  934. " <td>0.0</td>\n",
  935. " <td>0.0</td>\n",
  936. " </tr>\n",
  937. " <tr>\n",
  938. " <th>ds000239</th>\n",
  939. " <td>3</td>\n",
  940. " <td>0.0</td>\n",
  941. " <td>3.0</td>\n",
  942. " <td>1.0</td>\n",
  943. " <td>2.0</td>\n",
  944. " <td>0.0</td>\n",
  945. " <td>0.0</td>\n",
  946. " </tr>\n",
  947. " <tr>\n",
  948. " <th>ds000240</th>\n",
  949. " <td>63</td>\n",
  950. " <td>0.0</td>\n",
  951. " <td>63.0</td>\n",
  952. " <td>35.0</td>\n",
  953. " <td>28.0</td>\n",
  954. " <td>0.0</td>\n",
  955. " <td>0.0</td>\n",
  956. " </tr>\n",
  957. " <tr>\n",
  958. " <th>ds000241</th>\n",
  959. " <td>12</td>\n",
  960. " <td>NaN</td>\n",
  961. " <td>NaN</td>\n",
  962. " <td>NaN</td>\n",
  963. " <td>NaN</td>\n",
  964. " <td>12.0</td>\n",
  965. " <td>12.0</td>\n",
  966. " </tr>\n",
  967. " <tr>\n",
  968. " <th>ds000243</th>\n",
  969. " <td>120</td>\n",
  970. " <td>0.0</td>\n",
  971. " <td>120.0</td>\n",
  972. " <td>61.0</td>\n",
  973. " <td>59.0</td>\n",
  974. " <td>0.0</td>\n",
  975. " <td>0.0</td>\n",
  976. " </tr>\n",
  977. " <tr>\n",
  978. " <th>ds000244</th>\n",
  979. " <td>12</td>\n",
  980. " <td>0.0</td>\n",
  981. " <td>12.0</td>\n",
  982. " <td>9.0</td>\n",
  983. " <td>3.0</td>\n",
  984. " <td>0.0</td>\n",
  985. " <td>0.0</td>\n",
  986. " </tr>\n",
  987. " <tr>\n",
  988. " <th>ds000245</th>\n",
  989. " <td>45</td>\n",
  990. " <td>0.0</td>\n",
  991. " <td>45.0</td>\n",
  992. " <td>25.0</td>\n",
  993. " <td>20.0</td>\n",
  994. " <td>0.0</td>\n",
  995. " <td>0.0</td>\n",
  996. " </tr>\n",
  997. " <tr>\n",
  998. " <th>ds000246</th>\n",
  999. " <td>1</td>\n",
  1000. " <td>0.0</td>\n",
  1001. " <td>1.0</td>\n",
  1002. " <td>0.0</td>\n",
  1003. " <td>1.0</td>\n",
  1004. " <td>0.0</td>\n",
  1005. " <td>0.0</td>\n",
  1006. " </tr>\n",
  1007. " <tr>\n",
  1008. " <th>ds000247</th>\n",
  1009. " <td>6</td>\n",
  1010. " <td>0.0</td>\n",
  1011. " <td>5.0</td>\n",
  1012. " <td>2.0</td>\n",
  1013. " <td>3.0</td>\n",
  1014. " <td>1.0</td>\n",
  1015. " <td>1.0</td>\n",
  1016. " </tr>\n",
  1017. " <tr>\n",
  1018. " <th>ds000248</th>\n",
  1019. " <td>2</td>\n",
  1020. " <td>NaN</td>\n",
  1021. " <td>NaN</td>\n",
  1022. " <td>NaN</td>\n",
  1023. " <td>NaN</td>\n",
  1024. " <td>2.0</td>\n",
  1025. " <td>2.0</td>\n",
  1026. " </tr>\n",
  1027. " <tr>\n",
  1028. " <th>ds000249</th>\n",
  1029. " <td>26</td>\n",
  1030. " <td>0.0</td>\n",
  1031. " <td>26.0</td>\n",
  1032. " <td>13.0</td>\n",
  1033. " <td>13.0</td>\n",
  1034. " <td>0.0</td>\n",
  1035. " <td>0.0</td>\n",
  1036. " </tr>\n",
  1037. " <tr>\n",
  1038. " <th>ds000253</th>\n",
  1039. " <td>20</td>\n",
  1040. " <td>0.0</td>\n",
  1041. " <td>20.0</td>\n",
  1042. " <td>20.0</td>\n",
  1043. " <td>0.0</td>\n",
  1044. " <td>0.0</td>\n",
  1045. " <td>0.0</td>\n",
  1046. " </tr>\n",
  1047. " <tr>\n",
  1048. " <th>ds000254</th>\n",
  1049. " <td>13</td>\n",
  1050. " <td>0.0</td>\n",
  1051. " <td>13.0</td>\n",
  1052. " <td>7.0</td>\n",
  1053. " <td>6.0</td>\n",
  1054. " <td>0.0</td>\n",
  1055. " <td>0.0</td>\n",
  1056. " </tr>\n",
  1057. " <tr>\n",
  1058. " <th>ds000255</th>\n",
  1059. " <td>2</td>\n",
  1060. " <td>NaN</td>\n",
  1061. " <td>NaN</td>\n",
  1062. " <td>NaN</td>\n",
  1063. " <td>NaN</td>\n",
  1064. " <td>2.0</td>\n",
  1065. " <td>2.0</td>\n",
  1066. " </tr>\n",
  1067. " <tr>\n",
  1068. " <th>ds000256</th>\n",
  1069. " <td>24</td>\n",
  1070. " <td>24.0</td>\n",
  1071. " <td>0.0</td>\n",
  1072. " <td>10.0</td>\n",
  1073. " <td>14.0</td>\n",
  1074. " <td>0.0</td>\n",
  1075. " <td>0.0</td>\n",
  1076. " </tr>\n",
  1077. " <tr>\n",
  1078. " <th>ds000258</th>\n",
  1079. " <td>89</td>\n",
  1080. " <td>NaN</td>\n",
  1081. " <td>NaN</td>\n",
  1082. " <td>NaN</td>\n",
  1083. " <td>NaN</td>\n",
  1084. " <td>89.0</td>\n",
  1085. " <td>89.0</td>\n",
  1086. " </tr>\n",
  1087. " </tbody>\n",
  1088. "</table>\n",
  1089. "<p>92 rows × 7 columns</p>\n",
  1090. "</div>"
  1091. ],
  1092. "text/plain": [
  1093. " participant_count below_18 above_18 female_count male_count \\\n",
  1094. "ds000001 16 0.0 16.0 10.0 6.0 \n",
  1095. "ds000002 17 0.0 17.0 10.0 7.0 \n",
  1096. "ds000003 13 0.0 13.0 5.0 8.0 \n",
  1097. "ds000005 16 0.0 16.0 8.0 8.0 \n",
  1098. "ds000006 14 0.0 14.0 9.0 5.0 \n",
  1099. "ds000007 20 0.0 20.0 9.0 11.0 \n",
  1100. "ds000008 14 0.0 14.0 4.0 10.0 \n",
  1101. "ds000009 24 0.0 24.0 10.0 14.0 \n",
  1102. "ds000011 14 0.0 14.0 9.0 5.0 \n",
  1103. "ds000017 8 0.0 5.0 1.0 4.0 \n",
  1104. "ds000030 272 0.0 272.0 117.0 155.0 \n",
  1105. "ds000031 1 NaN NaN NaN NaN \n",
  1106. "ds000051 13 NaN NaN NaN NaN \n",
  1107. "ds000052 13 NaN NaN NaN NaN \n",
  1108. "ds000053 59 0.0 59.0 31.0 28.0 \n",
  1109. "ds000101 21 0.0 21.0 9.0 12.0 \n",
  1110. "ds000102 26 0.0 26.0 1.0 16.0 \n",
  1111. "ds000105 6 NaN NaN NaN NaN \n",
  1112. "ds000107 49 NaN NaN NaN NaN \n",
  1113. "ds000108 34 0.0 34.0 17.0 17.0 \n",
  1114. "ds000110 18 0.0 18.0 9.0 9.0 \n",
  1115. "ds000113b 16 NaN NaN NaN NaN \n",
  1116. "ds000113c 7 0.0 7.0 2.0 5.0 \n",
  1117. "ds000113d 30 2.0 28.0 16.0 14.0 \n",
  1118. "ds000114 10 NaN NaN NaN NaN \n",
  1119. "ds000115 99 12.0 87.0 40.0 59.0 \n",
  1120. "ds000116 17 0.0 17.0 6.0 11.0 \n",
  1121. "ds000117 16 0.0 16.0 7.0 9.0 \n",
  1122. "ds000119 73 46.0 27.0 43.0 30.0 \n",
  1123. "ds000120 26 16.0 10.0 15.0 11.0 \n",
  1124. "... ... ... ... ... ... \n",
  1125. "ds000220 26 0.0 26.0 13.0 13.0 \n",
  1126. "ds000221 318 0.0 316.0 129.0 189.0 \n",
  1127. "ds000222 79 0.0 79.0 41.0 38.0 \n",
  1128. "ds000223 19 NaN NaN NaN NaN \n",
  1129. "ds000224 10 0.0 10.0 5.0 5.0 \n",
  1130. "ds000228 155 122.0 33.0 84.0 71.0 \n",
  1131. "ds000229 15 0.0 15.0 7.0 8.0 \n",
  1132. "ds000231 9 0.0 9.0 7.0 2.0 \n",
  1133. "ds000232 10 0.0 10.0 7.0 3.0 \n",
  1134. "ds000233 12 0.0 12.0 7.0 5.0 \n",
  1135. "ds000234 5 0.0 5.0 1.0 4.0 \n",
  1136. "ds000235 4 0.0 4.0 2.0 2.0 \n",
  1137. "ds000236 18 0.0 18.0 12.0 6.0 \n",
  1138. "ds000237 13 NaN NaN NaN NaN \n",
  1139. "ds000238 35 0.0 35.0 17.0 18.0 \n",
  1140. "ds000239 3 0.0 3.0 1.0 2.0 \n",
  1141. "ds000240 63 0.0 63.0 35.0 28.0 \n",
  1142. "ds000241 12 NaN NaN NaN NaN \n",
  1143. "ds000243 120 0.0 120.0 61.0 59.0 \n",
  1144. "ds000244 12 0.0 12.0 9.0 3.0 \n",
  1145. "ds000245 45 0.0 45.0 25.0 20.0 \n",
  1146. "ds000246 1 0.0 1.0 0.0 1.0 \n",
  1147. "ds000247 6 0.0 5.0 2.0 3.0 \n",
  1148. "ds000248 2 NaN NaN NaN NaN \n",
  1149. "ds000249 26 0.0 26.0 13.0 13.0 \n",
  1150. "ds000253 20 0.0 20.0 20.0 0.0 \n",
  1151. "ds000254 13 0.0 13.0 7.0 6.0 \n",
  1152. "ds000255 2 NaN NaN NaN NaN \n",
  1153. "ds000256 24 24.0 0.0 10.0 14.0 \n",
  1154. "ds000258 89 NaN NaN NaN NaN \n",
  1155. "\n",
  1156. " missing_age missing_sex \n",
  1157. "ds000001 0.0 0.0 \n",
  1158. "ds000002 0.0 0.0 \n",
  1159. "ds000003 0.0 0.0 \n",
  1160. "ds000005 0.0 0.0 \n",
  1161. "ds000006 0.0 0.0 \n",
  1162. "ds000007 0.0 0.0 \n",
  1163. "ds000008 0.0 0.0 \n",
  1164. "ds000009 0.0 0.0 \n",
  1165. "ds000011 0.0 0.0 \n",
  1166. "ds000017 3.0 3.0 \n",
  1167. "ds000030 0.0 0.0 \n",
  1168. "ds000031 1.0 1.0 \n",
  1169. "ds000051 13.0 13.0 \n",
  1170. "ds000052 13.0 13.0 \n",
  1171. "ds000053 0.0 0.0 \n",
  1172. "ds000101 0.0 0.0 \n",
  1173. "ds000102 0.0 9.0 \n",
  1174. "ds000105 6.0 6.0 \n",
  1175. "ds000107 49.0 49.0 \n",
  1176. "ds000108 0.0 0.0 \n",
  1177. "ds000110 0.0 0.0 \n",
  1178. "ds000113b 16.0 16.0 \n",
  1179. "ds000113c 0.0 0.0 \n",
  1180. "ds000113d 0.0 0.0 \n",
  1181. "ds000114 10.0 10.0 \n",
  1182. "ds000115 0.0 0.0 \n",
  1183. "ds000116 0.0 0.0 \n",
  1184. "ds000117 0.0 0.0 \n",
  1185. "ds000119 0.0 0.0 \n",
  1186. "ds000120 0.0 0.0 \n",
  1187. "... ... ... \n",
  1188. "ds000220 0.0 0.0 \n",
  1189. "ds000221 2.0 0.0 \n",
  1190. "ds000222 0.0 0.0 \n",
  1191. "ds000223 19.0 19.0 \n",
  1192. "ds000224 0.0 0.0 \n",
  1193. "ds000228 0.0 0.0 \n",
  1194. "ds000229 0.0 0.0 \n",
  1195. "ds000231 0.0 0.0 \n",
  1196. "ds000232 0.0 0.0 \n",
  1197. "ds000233 0.0 0.0 \n",
  1198. "ds000234 0.0 0.0 \n",
  1199. "ds000235 0.0 0.0 \n",
  1200. "ds000236 0.0 0.0 \n",
  1201. "ds000237 13.0 13.0 \n",
  1202. "ds000238 0.0 0.0 \n",
  1203. "ds000239 0.0 0.0 \n",
  1204. "ds000240 0.0 0.0 \n",
  1205. "ds000241 12.0 12.0 \n",
  1206. "ds000243 0.0 0.0 \n",
  1207. "ds000244 0.0 0.0 \n",
  1208. "ds000245 0.0 0.0 \n",
  1209. "ds000246 0.0 0.0 \n",
  1210. "ds000247 1.0 1.0 \n",
  1211. "ds000248 2.0 2.0 \n",
  1212. "ds000249 0.0 0.0 \n",
  1213. "ds000253 0.0 0.0 \n",
  1214. "ds000254 0.0 0.0 \n",
  1215. "ds000255 2.0 2.0 \n",
  1216. "ds000256 0.0 0.0 \n",
  1217. "ds000258 89.0 89.0 \n",
  1218. "\n",
  1219. "[92 rows x 7 columns]"
  1220. ]
  1221. },
  1222. "execution_count": 20,
  1223. "metadata": {},
  1224. "output_type": "execute_result"
  1225. }
  1226. ],
  1227. "source": [
  1228. "openfmri_df = pd.DataFrame.from_dict(ds_meta, orient='index')\n",
  1229. "openfmri_df['missing_age'] = openfmri_df.participant_count - (openfmri_df.below_18.fillna(0) + openfmri_df.above_18.fillna(0))\n",
  1230. "openfmri_df['missing_sex'] = openfmri_df.participant_count - (openfmri_df.female_count.fillna(0) + openfmri_df.male_count.fillna(0))\n",
  1231. "openfmri_df"
  1232. ]
  1233. },
  1234. {
  1235. "cell_type": "code",
  1236. "execution_count": 22,
  1237. "metadata": {},
  1238. "outputs": [
  1239. {
  1240. "data": {
  1241. "text/html": [
  1242. "<div>\n",
  1243. "<style scoped>\n",
  1244. " .dataframe tbody tr th:only-of-type {\n",
  1245. " vertical-align: middle;\n",
  1246. " }\n",
  1247. "\n",
  1248. " .dataframe tbody tr th {\n",
  1249. " vertical-align: top;\n",
  1250. " }\n",
  1251. "\n",
  1252. " .dataframe thead th {\n",
  1253. " text-align: right;\n",
  1254. " }\n",
  1255. "</style>\n",
  1256. "<table border=\"1\" class=\"dataframe\">\n",
  1257. " <thead>\n",
  1258. " <tr style=\"text-align: right;\">\n",
  1259. " <th></th>\n",
  1260. " <th>participant_count</th>\n",
  1261. " <th>below_18</th>\n",
  1262. " <th>above_18</th>\n",
  1263. " <th>female_count</th>\n",
  1264. " <th>male_count</th>\n",
  1265. " <th>missing_age</th>\n",
  1266. " <th>missing_sex</th>\n",
  1267. " </tr>\n",
  1268. " </thead>\n",
  1269. " <tbody>\n",
  1270. " <tr>\n",
  1271. " <th>ds000017</th>\n",
  1272. " <td>8</td>\n",
  1273. " <td>0.0</td>\n",
  1274. " <td>5.0</td>\n",
  1275. " <td>1.0</td>\n",
  1276. " <td>4.0</td>\n",
  1277. " <td>3.0</td>\n",
  1278. " <td>3.0</td>\n",
  1279. " </tr>\n",
  1280. " <tr>\n",
  1281. " <th>ds000031</th>\n",
  1282. " <td>1</td>\n",
  1283. " <td>NaN</td>\n",
  1284. " <td>NaN</td>\n",
  1285. " <td>NaN</td>\n",
  1286. " <td>NaN</td>\n",
  1287. " <td>1.0</td>\n",
  1288. " <td>1.0</td>\n",
  1289. " </tr>\n",
  1290. " <tr>\n",
  1291. " <th>ds000051</th>\n",
  1292. " <td>13</td>\n",
  1293. " <td>NaN</td>\n",
  1294. " <td>NaN</td>\n",
  1295. " <td>NaN</td>\n",
  1296. " <td>NaN</td>\n",
  1297. " <td>13.0</td>\n",
  1298. " <td>13.0</td>\n",
  1299. " </tr>\n",
  1300. " <tr>\n",
  1301. " <th>ds000052</th>\n",
  1302. " <td>13</td>\n",
  1303. " <td>NaN</td>\n",
  1304. " <td>NaN</td>\n",
  1305. " <td>NaN</td>\n",
  1306. " <td>NaN</td>\n",
  1307. " <td>13.0</td>\n",
  1308. " <td>13.0</td>\n",
  1309. " </tr>\n",
  1310. " <tr>\n",
  1311. " <th>ds000105</th>\n",
  1312. " <td>6</td>\n",
  1313. " <td>NaN</td>\n",
  1314. " <td>NaN</td>\n",
  1315. " <td>NaN</td>\n",
  1316. " <td>NaN</td>\n",
  1317. " <td>6.0</td>\n",
  1318. " <td>6.0</td>\n",
  1319. " </tr>\n",
  1320. " <tr>\n",
  1321. " <th>ds000107</th>\n",
  1322. " <td>49</td>\n",
  1323. " <td>NaN</td>\n",
  1324. " <td>NaN</td>\n",
  1325. " <td>NaN</td>\n",
  1326. " <td>NaN</td>\n",
  1327. " <td>49.0</td>\n",
  1328. " <td>49.0</td>\n",
  1329. " </tr>\n",
  1330. " <tr>\n",
  1331. " <th>ds000113b</th>\n",
  1332. " <td>16</td>\n",
  1333. " <td>NaN</td>\n",
  1334. " <td>NaN</td>\n",
  1335. " <td>NaN</td>\n",
  1336. " <td>NaN</td>\n",
  1337. " <td>16.0</td>\n",
  1338. " <td>16.0</td>\n",
  1339. " </tr>\n",
  1340. " <tr>\n",
  1341. " <th>ds000114</th>\n",
  1342. " <td>10</td>\n",
  1343. " <td>NaN</td>\n",
  1344. " <td>NaN</td>\n",
  1345. " <td>NaN</td>\n",
  1346. " <td>NaN</td>\n",
  1347. " <td>10.0</td>\n",
  1348. " <td>10.0</td>\n",
  1349. " </tr>\n",
  1350. " <tr>\n",
  1351. " <th>ds000133</th>\n",
  1352. " <td>26</td>\n",
  1353. " <td>NaN</td>\n",
  1354. " <td>NaN</td>\n",
  1355. " <td>NaN</td>\n",
  1356. " <td>NaN</td>\n",
  1357. " <td>26.0</td>\n",
  1358. " <td>26.0</td>\n",
  1359. " </tr>\n",
  1360. " <tr>\n",
  1361. " <th>ds000158</th>\n",
  1362. " <td>217</td>\n",
  1363. " <td>NaN</td>\n",
  1364. " <td>NaN</td>\n",
  1365. " <td>NaN</td>\n",
  1366. " <td>NaN</td>\n",
  1367. " <td>217.0</td>\n",
  1368. " <td>217.0</td>\n",
  1369. " </tr>\n",
  1370. " <tr>\n",
  1371. " <th>ds000164</th>\n",
  1372. " <td>28</td>\n",
  1373. " <td>NaN</td>\n",
  1374. " <td>NaN</td>\n",
  1375. " <td>NaN</td>\n",
  1376. " <td>NaN</td>\n",
  1377. " <td>28.0</td>\n",
  1378. " <td>28.0</td>\n",
  1379. " </tr>\n",
  1380. " <tr>\n",
  1381. " <th>ds000168</th>\n",
  1382. " <td>35</td>\n",
  1383. " <td>0.0</td>\n",
  1384. " <td>25.0</td>\n",
  1385. " <td>15.0</td>\n",
  1386. " <td>20.0</td>\n",
  1387. " <td>10.0</td>\n",
  1388. " <td>0.0</td>\n",
  1389. " </tr>\n",
  1390. " <tr>\n",
  1391. " <th>ds000204</th>\n",
  1392. " <td>1</td>\n",
  1393. " <td>NaN</td>\n",
  1394. " <td>NaN</td>\n",
  1395. " <td>NaN</td>\n",
  1396. " <td>NaN</td>\n",
  1397. " <td>1.0</td>\n",
  1398. " <td>1.0</td>\n",
  1399. " </tr>\n",
  1400. " <tr>\n",
  1401. " <th>ds000205</th>\n",
  1402. " <td>11</td>\n",
  1403. " <td>NaN</td>\n",
  1404. " <td>NaN</td>\n",
  1405. " <td>NaN</td>\n",
  1406. " <td>NaN</td>\n",
  1407. " <td>11.0</td>\n",
  1408. " <td>11.0</td>\n",
  1409. " </tr>\n",
  1410. " <tr>\n",
  1411. " <th>ds000206</th>\n",
  1412. " <td>6</td>\n",
  1413. " <td>NaN</td>\n",
  1414. " <td>NaN</td>\n",
  1415. " <td>NaN</td>\n",
  1416. " <td>NaN</td>\n",
  1417. " <td>6.0</td>\n",
  1418. " <td>6.0</td>\n",
  1419. " </tr>\n",
  1420. " <tr>\n",
  1421. " <th>ds000213</th>\n",
  1422. " <td>26</td>\n",
  1423. " <td>NaN</td>\n",
  1424. " <td>NaN</td>\n",
  1425. " <td>NaN</td>\n",
  1426. " <td>NaN</td>\n",
  1427. " <td>26.0</td>\n",
  1428. " <td>26.0</td>\n",
  1429. " </tr>\n",
  1430. " <tr>\n",
  1431. " <th>ds000216</th>\n",
  1432. " <td>7</td>\n",
  1433. " <td>NaN</td>\n",
  1434. " <td>NaN</td>\n",
  1435. " <td>NaN</td>\n",
  1436. " <td>NaN</td>\n",
  1437. " <td>7.0</td>\n",
  1438. " <td>7.0</td>\n",
  1439. " </tr>\n",
  1440. " <tr>\n",
  1441. " <th>ds000221</th>\n",
  1442. " <td>318</td>\n",
  1443. " <td>0.0</td>\n",
  1444. " <td>316.0</td>\n",
  1445. " <td>129.0</td>\n",
  1446. " <td>189.0</td>\n",
  1447. " <td>2.0</td>\n",
  1448. " <td>0.0</td>\n",
  1449. " </tr>\n",
  1450. " <tr>\n",
  1451. " <th>ds000223</th>\n",
  1452. " <td>19</td>\n",
  1453. " <td>NaN</td>\n",
  1454. " <td>NaN</td>\n",
  1455. " <td>NaN</td>\n",
  1456. " <td>NaN</td>\n",
  1457. " <td>19.0</td>\n",
  1458. " <td>19.0</td>\n",
  1459. " </tr>\n",
  1460. " <tr>\n",
  1461. " <th>ds000237</th>\n",
  1462. " <td>13</td>\n",
  1463. " <td>NaN</td>\n",
  1464. " <td>NaN</td>\n",
  1465. " <td>NaN</td>\n",
  1466. " <td>NaN</td>\n",
  1467. " <td>13.0</td>\n",
  1468. " <td>13.0</td>\n",
  1469. " </tr>\n",
  1470. " <tr>\n",
  1471. " <th>ds000241</th>\n",
  1472. " <td>12</td>\n",
  1473. " <td>NaN</td>\n",
  1474. " <td>NaN</td>\n",
  1475. " <td>NaN</td>\n",
  1476. " <td>NaN</td>\n",
  1477. " <td>12.0</td>\n",
  1478. " <td>12.0</td>\n",
  1479. " </tr>\n",
  1480. " <tr>\n",
  1481. " <th>ds000247</th>\n",
  1482. " <td>6</td>\n",
  1483. " <td>0.0</td>\n",
  1484. " <td>5.0</td>\n",
  1485. " <td>2.0</td>\n",
  1486. " <td>3.0</td>\n",
  1487. " <td>1.0</td>\n",
  1488. " <td>1.0</td>\n",
  1489. " </tr>\n",
  1490. " <tr>\n",
  1491. " <th>ds000248</th>\n",
  1492. " <td>2</td>\n",
  1493. " <td>NaN</td>\n",
  1494. " <td>NaN</td>\n",
  1495. " <td>NaN</td>\n",
  1496. " <td>NaN</td>\n",
  1497. " <td>2.0</td>\n",
  1498. " <td>2.0</td>\n",
  1499. " </tr>\n",
  1500. " <tr>\n",
  1501. " <th>ds000255</th>\n",
  1502. " <td>2</td>\n",
  1503. " <td>NaN</td>\n",
  1504. " <td>NaN</td>\n",
  1505. " <td>NaN</td>\n",
  1506. " <td>NaN</td>\n",
  1507. " <td>2.0</td>\n",
  1508. " <td>2.0</td>\n",
  1509. " </tr>\n",
  1510. " <tr>\n",
  1511. " <th>ds000258</th>\n",
  1512. " <td>89</td>\n",
  1513. " <td>NaN</td>\n",
  1514. " <td>NaN</td>\n",
  1515. " <td>NaN</td>\n",
  1516. " <td>NaN</td>\n",
  1517. " <td>89.0</td>\n",
  1518. " <td>89.0</td>\n",
  1519. " </tr>\n",
  1520. " </tbody>\n",
  1521. "</table>\n",
  1522. "</div>"
  1523. ],
  1524. "text/plain": [
  1525. " participant_count below_18 above_18 female_count male_count \\\n",
  1526. "ds000017 8 0.0 5.0 1.0 4.0 \n",
  1527. "ds000031 1 NaN NaN NaN NaN \n",
  1528. "ds000051 13 NaN NaN NaN NaN \n",
  1529. "ds000052 13 NaN NaN NaN NaN \n",
  1530. "ds000105 6 NaN NaN NaN NaN \n",
  1531. "ds000107 49 NaN NaN NaN NaN \n",
  1532. "ds000113b 16 NaN NaN NaN NaN \n",
  1533. "ds000114 10 NaN NaN NaN NaN \n",
  1534. "ds000133 26 NaN NaN NaN NaN \n",
  1535. "ds000158 217 NaN NaN NaN NaN \n",
  1536. "ds000164 28 NaN NaN NaN NaN \n",
  1537. "ds000168 35 0.0 25.0 15.0 20.0 \n",
  1538. "ds000204 1 NaN NaN NaN NaN \n",
  1539. "ds000205 11 NaN NaN NaN NaN \n",
  1540. "ds000206 6 NaN NaN NaN NaN \n",
  1541. "ds000213 26 NaN NaN NaN NaN \n",
  1542. "ds000216 7 NaN NaN NaN NaN \n",
  1543. "ds000221 318 0.0 316.0 129.0 189.0 \n",
  1544. "ds000223 19 NaN NaN NaN NaN \n",
  1545. "ds000237 13 NaN NaN NaN NaN \n",
  1546. "ds000241 12 NaN NaN NaN NaN \n",
  1547. "ds000247 6 0.0 5.0 2.0 3.0 \n",
  1548. "ds000248 2 NaN NaN NaN NaN \n",
  1549. "ds000255 2 NaN NaN NaN NaN \n",
  1550. "ds000258 89 NaN NaN NaN NaN \n",
  1551. "\n",
  1552. " missing_age missing_sex \n",
  1553. "ds000017 3.0 3.0 \n",
  1554. "ds000031 1.0 1.0 \n",
  1555. "ds000051 13.0 13.0 \n",
  1556. "ds000052 13.0 13.0 \n",
  1557. "ds000105 6.0 6.0 \n",
  1558. "ds000107 49.0 49.0 \n",
  1559. "ds000113b 16.0 16.0 \n",
  1560. "ds000114 10.0 10.0 \n",
  1561. "ds000133 26.0 26.0 \n",
  1562. "ds000158 217.0 217.0 \n",
  1563. "ds000164 28.0 28.0 \n",
  1564. "ds000168 10.0 0.0 \n",
  1565. "ds000204 1.0 1.0 \n",
  1566. "ds000205 11.0 11.0 \n",
  1567. "ds000206 6.0 6.0 \n",
  1568. "ds000213 26.0 26.0 \n",
  1569. "ds000216 7.0 7.0 \n",
  1570. "ds000221 2.0 0.0 \n",
  1571. "ds000223 19.0 19.0 \n",
  1572. "ds000237 13.0 13.0 \n",
  1573. "ds000241 12.0 12.0 \n",
  1574. "ds000247 1.0 1.0 \n",
  1575. "ds000248 2.0 2.0 \n",
  1576. "ds000255 2.0 2.0 \n",
  1577. "ds000258 89.0 89.0 "
  1578. ]
  1579. },
  1580. "execution_count": 22,
  1581. "metadata": {},
  1582. "output_type": "execute_result"
  1583. }
  1584. ],
  1585. "source": [
  1586. "openfmri_df[openfmri_df.missing_age > 0]"
  1587. ]
  1588. },
  1589. {
  1590. "cell_type": "code",
  1591. "execution_count": 25,
  1592. "metadata": {},
  1593. "outputs": [
  1594. {
  1595. "data": {
  1596. "text/plain": [
  1597. "3307"
  1598. ]
  1599. },
  1600. "execution_count": 25,
  1601. "metadata": {},
  1602. "output_type": "execute_result"
  1603. }
  1604. ],
  1605. "source": [
  1606. "openfmri_df.participant_count.sum()"
  1607. ]
  1608. },
  1609. {
  1610. "cell_type": "code",
  1611. "execution_count": 23,
  1612. "metadata": {},
  1613. "outputs": [
  1614. {
  1615. "data": {
  1616. "text/plain": [
  1617. "583.0"
  1618. ]
  1619. },
  1620. "execution_count": 23,
  1621. "metadata": {},
  1622. "output_type": "execute_result"
  1623. }
  1624. ],
  1625. "source": [
  1626. "openfmri_df.missing_age.sum()"
  1627. ]
  1628. },
  1629. {
  1630. "cell_type": "code",
  1631. "execution_count": 24,
  1632. "metadata": {},
  1633. "outputs": [
  1634. {
  1635. "data": {
  1636. "text/plain": [
  1637. "580.0"
  1638. ]
  1639. },
  1640. "execution_count": 24,
  1641. "metadata": {},
  1642. "output_type": "execute_result"
  1643. }
  1644. ],
  1645. "source": [
  1646. "openfmri_df.missing_sex.sum()"
  1647. ]
  1648. },
  1649. {
  1650. "cell_type": "code",
  1651. "execution_count": 26,
  1652. "metadata": {},
  1653. "outputs": [
  1654. {
  1655. "data": {
  1656. "text/plain": [
  1657. "293.0"
  1658. ]
  1659. },
  1660. "execution_count": 26,
  1661. "metadata": {},
  1662. "output_type": "execute_result"
  1663. }
  1664. ],
  1665. "source": [
  1666. "openfmri_df.below_18.sum()"
  1667. ]
  1668. },
  1669. {
  1670. "cell_type": "code",
  1671. "execution_count": 27,
  1672. "metadata": {},
  1673. "outputs": [
  1674. {
  1675. "data": {
  1676. "text/plain": [
  1677. "2431.0"
  1678. ]
  1679. },
  1680. "execution_count": 27,
  1681. "metadata": {},
  1682. "output_type": "execute_result"
  1683. }
  1684. ],
  1685. "source": [
  1686. "openfmri_df.above_18.sum()"
  1687. ]
  1688. },
  1689. {
  1690. "cell_type": "code",
  1691. "execution_count": 29,
  1692. "metadata": {},
  1693. "outputs": [
  1694. {
  1695. "data": {
  1696. "text/plain": [
  1697. "1379.0"
  1698. ]
  1699. },
  1700. "execution_count": 29,
  1701. "metadata": {},
  1702. "output_type": "execute_result"
  1703. }
  1704. ],
  1705. "source": [
  1706. "openfmri_df.female_count.sum()"
  1707. ]
  1708. },
  1709. {
  1710. "cell_type": "code",
  1711. "execution_count": 30,
  1712. "metadata": {},
  1713. "outputs": [
  1714. {
  1715. "data": {
  1716. "text/plain": [
  1717. "1348.0"
  1718. ]
  1719. },
  1720. "execution_count": 30,
  1721. "metadata": {},
  1722. "output_type": "execute_result"
  1723. }
  1724. ],
  1725. "source": [
  1726. "openfmri_df.male_count.sum()"
  1727. ]
  1728. },
  1729. {
  1730. "cell_type": "code",
  1731. "execution_count": null,
  1732. "metadata": {
  1733. "collapsed": true
  1734. },
  1735. "outputs": [],
  1736. "source": []
  1737. }
  1738. ],
  1739. "metadata": {
  1740. "kernelspec": {
  1741. "display_name": "Python 3",
  1742. "language": "python",
  1743. "name": "python3"
  1744. },
  1745. "language_info": {
  1746. "codemirror_mode": {
  1747. "name": "ipython",
  1748. "version": 3
  1749. },
  1750. "file_extension": ".py",
  1751. "mimetype": "text/x-python",
  1752. "name": "python",
  1753. "nbconvert_exporter": "python",
  1754. "pygments_lexer": "ipython3",
  1755. "version": "3.6.3"
  1756. }
  1757. },
  1758. "nbformat": 4,
  1759. "nbformat_minor": 2
  1760. }
Add Comment
Please, Sign In to add comment