Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import boto3\n",
- "from botocore import UNSIGNED\n",
- "from botocore.client import Config\n",
- "from distutils.version import LooseVersion, StrictVersion\n",
- "import pandas as pd\n",
- "import io\n",
- "import numpy as np\n",
- "s3 = boto3.client(\"s3\", config=Config(signature_version=UNSIGNED))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "ds000001\n",
- "1.0.0/ 2.0.4/\n",
- "{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 10, 'male_count': 6}\n",
- "ds000002\n",
- "1.0.0/ 2.0.5/\n",
- "{'participant_count': 17, 'below_18': 0, 'above_18': 17, 'female_count': 10, 'male_count': 7}\n",
- "ds000003\n",
- "1.0.0/ 2.0.2/\n",
- "{'participant_count': 13, 'below_18': 0, 'above_18': 13, 'female_count': 5, 'male_count': 8}\n",
- "ds000005\n",
- "1.0.0/ 2.0.1/\n",
- "{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 8, 'male_count': 8}\n",
- "ds000006\n",
- "1.0.0/ 2.0.1/\n",
- "{'participant_count': 14, 'below_18': 0, 'above_18': 14, 'female_count': 9, 'male_count': 5}\n",
- "ds000007\n",
- "1.0.1/ 2.0.1/\n",
- "{'participant_count': 20, 'below_18': 0, 'above_18': 20, 'female_count': 9, 'male_count': 11}\n",
- "ds000008\n",
- "1.0.1/ 2.0.0/\n",
- "{'participant_count': 14, 'below_18': 0, 'above_18': 14, 'female_count': 4, 'male_count': 10}\n",
- "ds000009\n",
- "1.0.0/ 2.0.3/\n",
- "{'participant_count': 24, 'below_18': 0, 'above_18': 24, 'female_count': 10, 'male_count': 14}\n",
- "ds000011\n",
- "1.0.0/ 2.0.1/\n",
- "{'participant_count': 14, 'below_18': 0, 'above_18': 14, 'female_count': 9, 'male_count': 5}\n",
- "ds000017\n",
- "1.0.0/ 2.0.1/\n",
- "{'participant_count': 8, 'below_18': 0, 'above_18': 5, 'female_count': 1, 'male_count': 4}\n",
- "ds000030\n",
- "1.0.2/ 1.0.5/\n",
- "{'participant_count': 272, 'below_18': 0, 'above_18': 272, 'female_count': 117, 'male_count': 155}\n",
- "ds000031\n",
- "1.0.2/ 1.0.4/\n",
- "no participants.tsv file\n",
- "ds000051\n",
- "no participants.tsv file\n",
- "ds000052\n",
- "1.0.0/ 2.0.0/\n",
- "no participants.tsv file\n",
- "ds000053\n",
- "1.0.0/ 1.0.2/\n",
- "{'participant_count': 59, 'below_18': 0, 'above_18': 59, 'female_count': 31, 'male_count': 28}\n",
- "ds000101\n",
- "1.0.0/ 2.0.0/\n",
- "{'participant_count': 21, 'below_18': 0, 'above_18': 21, 'female_count': 9, 'male_count': 12}\n",
- "ds000102\n",
- "1.0.0/ 2.0.0/\n",
- "{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 1, 'male_count': 16}\n",
- "ds000105\n",
- "1.0.1/ 2.0.2/\n",
- "no participants.tsv file\n",
- "ds000107\n",
- "1.0.0/ 2.0.2/\n",
- "no participants.tsv file\n",
- "ds000108\n",
- "1.0.1/ 2.0.0/\n",
- "{'participant_count': 34, 'below_18': 0, 'above_18': 34, 'female_count': 17, 'male_count': 17}\n",
- "ds000110\n",
- "1.0.0/ 2.0.1/\n",
- "{'participant_count': 18, 'below_18': 0, 'above_18': 18, 'female_count': 9, 'male_count': 9}\n",
- "ds000113b\n",
- "2.0.0/ 2.0.1/\n",
- "no participants.tsv file\n",
- "ds000113c\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 7, 'below_18': 0, 'above_18': 7, 'female_count': 2, 'male_count': 5}\n",
- "ds000113d\n",
- "1.0.0/ 2.0.0/\n",
- "{'participant_count': 30, 'below_18': 2, 'above_18': 28, 'female_count': 16, 'male_count': 14}\n",
- "ds000114\n",
- "2.0.0/ 2.0.1/\n",
- "no participants.tsv file\n",
- "ds000115\n",
- "1.0.2/ 2.0.0/\n",
- "{'participant_count': 99, 'below_18': 12, 'above_18': 87, 'female_count': 40, 'male_count': 59}\n",
- "ds000116\n",
- "1.0.0/ 2.0.0/\n",
- "{'participant_count': 17, 'below_18': 0, 'above_18': 17, 'female_count': 6, 'male_count': 11}\n",
- "ds000117\n",
- "0.1.0/ 1.0.0/\n",
- "{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 7, 'male_count': 9}\n",
- "ds000119\n",
- "1.0.0/ 2.0.1/\n",
- "{'participant_count': 73, 'below_18': 46, 'above_18': 27, 'female_count': 43, 'male_count': 30}\n",
- "ds000120\n",
- "1.0.0/ 1.0.1/\n",
- "{'participant_count': 26, 'below_18': 16, 'above_18': 10, 'female_count': 15, 'male_count': 11}\n",
- "ds000121\n",
- "1.0.0/ 2.0.2/\n",
- "{'participant_count': 28, 'below_18': 12, 'above_18': 16, 'female_count': 16, 'male_count': 12}\n",
- "ds000122\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 17, 'below_18': 0, 'above_18': 17, 'female_count': 8, 'male_count': 9}\n",
- "ds000133\n",
- "1.0.0/ 1.0.0/\n",
- "Index(['participant_id', 'study_group', 'handedness', 'gender'], dtype='object')\n",
- "{'participant_count': 26}\n",
- "ds000138\n",
- "1.0.0/ 2.0.0/\n",
- "{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 7, 'male_count': 9}\n",
- "ds000140\n",
- "2.0.0/ 2.0.0/\n",
- "{'participant_count': 33, 'below_18': 0, 'above_18': 33, 'female_count': 22, 'male_count': 11}\n",
- "ds000144\n",
- "{'participant_count': 45, 'below_18': 45, 'above_18': 0, 'female_count': 29, 'male_count': 16}\n",
- "ds000148\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 49, 'below_18': 0, 'above_18': 49, 'female_count': 26, 'male_count': 23}\n",
- "ds000157\n",
- "1.0.0/ 1.0.3/\n",
- "{'participant_count': 30, 'below_18': 0, 'above_18': 30, 'female_count': 30, 'male_count': 0}\n",
- "ds000158\n",
- "1.0.0/ 1.0.1/\n",
- "no participants.tsv file\n",
- "ds000164\n",
- "1.0.0/ 1.0.1/\n",
- "no participants.tsv file\n",
- "ds000168\n",
- "1.0.0/ 1.0.1/\n",
- "{'participant_count': 35, 'below_18': 0, 'above_18': 25, 'female_count': 15, 'male_count': 20}\n",
- "ds000170\n",
- "1.0.0/ 1.0.1/\n",
- "{'participant_count': 15, 'below_18': 0, 'above_18': 15, 'female_count': 3, 'male_count': 12}\n",
- "ds000171\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 39, 'below_18': 0, 'above_18': 39, 'female_count': 22, 'male_count': 17}\n",
- "ds000172\n",
- "1.0.0/ 1.0.1/\n",
- "{'participant_count': 13, 'below_18': 0, 'above_18': 13, 'female_count': 7, 'male_count': 6}\n",
- "ds000174\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 42, 'below_18': 2, 'above_18': 40, 'female_count': 13, 'male_count': 29}\n",
- "ds000177\n",
- "1.0.0/ 1.0.1/\n",
- "{'participant_count': 11, 'below_18': 0, 'above_18': 11, 'female_count': 4, 'male_count': 7}\n",
- "ds000200\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 14, 'below_18': 12, 'above_18': 2, 'female_count': 6, 'male_count': 8}\n",
- "ds000201\n",
- "1.0.0/ 1.0.5/\n",
- "{'participant_count': 86, 'below_18': 0, 'above_18': 86, 'female_count': 44, 'male_count': 42}\n",
- "ds000202\n",
- "{'participant_count': 95, 'below_18': 0, 'above_18': 95, 'female_count': 95, 'male_count': 0}\n",
- "ds000203\n",
- "1.0.0/ 1.0.2/\n",
- "{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 10, 'male_count': 16}\n",
- "ds000204\n",
- "1.0.0/ 1.0.2/\n",
- "no participants.tsv file\n",
- "ds000205\n",
- "1.0.0/ 1.0.0/\n",
- "Index(['participant_id', 'handedness'], dtype='object')\n",
- "{'participant_count': 11}\n",
- "ds000206\n",
- "1.0.0/ 1.0.0/\n",
- "no participants.tsv file\n",
- "ds000208\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 76, 'below_18': 0, 'above_18': 76, 'female_count': 40, 'male_count': 36}\n",
- "ds000210\n",
- "1.0.0/ 1.0.1/\n",
- "{'participant_count': 31, 'below_18': 0, 'above_18': 31, 'female_count': 16, 'male_count': 15}\n",
- "ds000212\n",
- "1.0.0/ 1.0.1/\n",
- "{'participant_count': 39, 'below_18': 0, 'above_18': 39, 'female_count': 10, 'male_count': 29}\n",
- "ds000213\n",
- "1.0.0/ 1.0.2/\n",
- "Index(['participant_id', 'gender', 'group'], dtype='object')\n",
- "{'participant_count': 26}\n",
- "ds000214\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 36, 'below_18': 0, 'above_18': 36, 'female_count': 30, 'male_count': 6}\n",
- "ds000216\n",
- "1.0.0/ 1.0.1/\n",
- "no participants.tsv file\n",
- "ds000217\n",
- "1.0.1/ 1.0.1/\n",
- "{'participant_count': 41, 'below_18': 0, 'above_18': 41, 'female_count': 23, 'male_count': 18}\n",
- "ds000218\n",
- "1.0.0/ 1.0.1/\n",
- "{'participant_count': 19, 'below_18': 0, 'above_18': 19, 'female_count': 0, 'male_count': 19}\n",
- "ds000219\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 21, 'below_18': 0, 'above_18': 21, 'female_count': 0, 'male_count': 21}\n",
- "ds000220\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 13, 'male_count': 13}\n",
- "ds000221\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 318, 'below_18': 0, 'above_18': 316, 'female_count': 129, 'male_count': 189}\n",
- "ds000222\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 79, 'below_18': 0, 'above_18': 79, 'female_count': 41, 'male_count': 38}\n",
- "ds000223\n",
- "1.0.0/ 2.0.0/\n",
- "Index(['participant_id'], dtype='object')\n",
- "{'participant_count': 19}\n",
- "ds000224\n",
- "1.0.0/ 1.0.2/\n",
- "{'participant_count': 10, 'below_18': 0, 'above_18': 10, 'female_count': 5, 'male_count': 5}\n",
- "ds000228\n",
- "1.0.0/ 1.0.1/\n",
- "{'participant_count': 155, 'below_18': 122, 'above_18': 33, 'female_count': 84, 'male_count': 71}\n",
- "ds000229\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 15, 'below_18': 0, 'above_18': 15, 'female_count': 7, 'male_count': 8}\n",
- "ds000231\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 9, 'below_18': 0, 'above_18': 9, 'female_count': 7, 'male_count': 2}\n",
- "ds000232\n",
- "1.0.0/ 1.0.1/\n",
- "{'participant_count': 10, 'below_18': 0, 'above_18': 10, 'female_count': 7, 'male_count': 3}\n",
- "ds000233\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 12, 'below_18': 0, 'above_18': 12, 'female_count': 7, 'male_count': 5}\n",
- "ds000234\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 5, 'below_18': 0, 'above_18': 5, 'female_count': 1, 'male_count': 4}\n",
- "ds000235\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 4, 'below_18': 0, 'above_18': 4, 'female_count': 2, 'male_count': 2}\n",
- "ds000236\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 18, 'below_18': 0, 'above_18': 18, 'female_count': 12, 'male_count': 6}\n",
- "ds000237\n",
- "no participants.tsv file\n",
- "ds000238\n",
- "1.0.0/ 1.0.1/\n",
- "{'participant_count': 35, 'below_18': 0, 'above_18': 35, 'female_count': 17, 'male_count': 18}\n",
- "ds000239\n",
- "1.0.0/ 1.0.1/\n",
- "{'participant_count': 3, 'below_18': 0, 'above_18': 3, 'female_count': 1, 'male_count': 2}\n",
- "ds000240\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 63, 'below_18': 0, 'above_18': 63, 'female_count': 35, 'male_count': 28}\n",
- "ds000241\n",
- "1.0.0/ 1.0.0/\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "no participants.tsv file\n",
- "ds000243\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 120, 'below_18': 0, 'above_18': 120, 'female_count': 61, 'male_count': 59}\n",
- "ds000244\n",
- "{'participant_count': 12, 'below_18': 0, 'above_18': 12, 'female_count': 9, 'male_count': 3}\n",
- "ds000245\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 45, 'below_18': 0, 'above_18': 45, 'female_count': 25, 'male_count': 20}\n",
- "ds000246\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 1, 'below_18': 0, 'above_18': 1, 'female_count': 0, 'male_count': 1}\n",
- "ds000247\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 6, 'below_18': 0, 'above_18': 5, 'female_count': 2, 'male_count': 3}\n",
- "ds000248\n",
- "no participants.tsv file\n",
- "ds000249\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 13, 'male_count': 13}\n",
- "ds000253\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 20, 'below_18': 0, 'above_18': 20, 'female_count': 20, 'male_count': 0}\n",
- "ds000254\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 13, 'below_18': 0, 'above_18': 13, 'female_count': 7, 'male_count': 6}\n",
- "ds000255\n",
- "1.0.0/ 1.0.0/\n",
- "no participants.tsv file\n",
- "ds000256\n",
- "1.0.0/ 1.0.0/\n",
- "{'participant_count': 24, 'below_18': 24, 'above_18': 0, 'female_count': 10, 'male_count': 14}\n",
- "ds000258\n",
- "1.0.0/ 1.0.0/\n",
- "no participants.tsv file\n"
- ]
- }
- ],
- "source": [
- "def sanitize(x):\n",
- " if pd.isnull(x):\n",
- " return None\n",
- " if '-' in x:\n",
- " return float(x.split('-')[0])\n",
- " elif '+' in x:\n",
- " return (float(x.replace('+','')))\n",
- " if x == 'Young':\n",
- " return 20\n",
- " elif x == 'Old':\n",
- " return 65\n",
- " else:\n",
- " return float(x)\n",
- "\n",
- "swap_age_sex = ['ds000119', 'ds000121']\n",
- "no_header = ['ds000109']\n",
- "skip = ['ds000006A', 'ds000113', 'ds000149', 'ds000109']\n",
- "ds_meta = {}\n",
- "for k in s3.list_objects(Bucket = 'openneuro', Prefix='', Delimiter='/')['CommonPrefixes']:\n",
- " accession_number = k['Prefix'][0:-1]\n",
- " if accession_number in skip:\n",
- " continue\n",
- " print(accession_number)\n",
- " ds_meta[accession_number] = {}\n",
- " valid_versions = [k for k in s3.list_objects(Bucket = 'openneuro', Prefix=k['Prefix'], Delimiter='/')['CommonPrefixes'] if '_R' in k['Prefix']]\n",
- " if accession_number == 'ds000202':\n",
- " Key_prefix = 'ds000202/ds000202_R1.0.2/uncompressed/ds202_R1.0.2/'\n",
- " elif accession_number == 'ds000248':\n",
- " Key_prefix = 'ds000248/ds000248_R1.0.0/ds000248_R1.0.0/uncompressed/'\n",
- " elif accession_number == 'ds000051':\n",
- " Key_prefix = 'ds000051/ds000051_R2.0.2/uncompressed/ds000051_R2.0.2/'\n",
- " elif valid_versions:\n",
- " lv = [LooseVersion(v['Prefix'].split('_R')[-1]) for v in valid_versions]\n",
- " lv.sort()\n",
- " print(str(lv[0]), str(lv[-1]))\n",
- " Key_prefix=accession_number + '/' + accession_number +'_R' + str(lv[-1]) + 'uncompressed/'\n",
- " else:\n",
- " Key_prefix=accession_number + '/uncompressed/'\n",
- " \n",
- " participant_count = len(s3.list_objects(Bucket='openneuro', Prefix=Key_prefix + 'sub-', Delimiter=\"/\")['CommonPrefixes'])\n",
- " ds_meta[accession_number]['participant_count'] = participant_count\n",
- " try:\n",
- " obj = s3.get_object(Bucket='openneuro', Key=Key_prefix + 'participants.tsv')\n",
- " except:\n",
- " print(\"no participants.tsv file\")\n",
- " else:\n",
- " no_age = False\n",
- " no_sex = False\n",
- " if accession_number in no_header:\n",
- " args = {'names': ['participant_id', 'sex', 'age'], 'header':None, 'delimiter':r\"\\s+\"}\n",
- " else:\n",
- " args = {'delimiter':'\\t'}\n",
- "\n",
- " df = pd.read_csv(io.BytesIO(obj['Body'].read()), na_values=['n/a', 'D'], **args)\n",
- " if accession_number in swap_age_sex:\n",
- " #print(df.sex)\n",
- " age = df.sex\n",
- " elif 'age' in df.columns:\n",
- " #print(df.age)\n",
- " age = df.age\n",
- " elif 'Age' in df.columns:\n",
- " #print(df.Age)\n",
- " age = df.Age\n",
- " elif 'ageAtFirstScanYears' in df.columns:\n",
- " age = df.ageAtFirstScanYears\n",
- " elif 'age at baseline ' in df.columns:\n",
- " age = df['age at baseline ']\n",
- " elif 'AgeGroup' in df.columns:\n",
- " age = df.AgeGroup\n",
- " elif 'age (5-year bins)' in df.columns:\n",
- " age = df['age (5-year bins)']\n",
- " elif 'ScanAge' in df.columns:\n",
- " age = df.ScanAge\n",
- " elif 'Age (years)' in df.columns:\n",
- " age = df['Age (years)']\n",
- " else:\n",
- " no_age = True\n",
- "\n",
- " if not no_age:\n",
- " if age.dtype == 'O':\n",
- " age = age.apply(sanitize)\n",
- " ds_meta[accession_number]['below_18'] = (age<18).sum()\n",
- " ds_meta[accession_number]['above_18'] = (age>=18).sum()\n",
- "\n",
- " if accession_number in swap_age_sex:\n",
- " #print(df.sex)\n",
- " sex = df.age\n",
- " elif 'sex' in df.columns:\n",
- " #print(df.age)\n",
- " sex = df.sex\n",
- " elif 'Sex' in df.columns:\n",
- " #print(df.age)\n",
- " sex = df.Sex\n",
- " elif 'gender' in df.columns:\n",
- " sex = df.gender\n",
- " elif 'Gender' in df.columns:\n",
- " sex = df.Gender\n",
- " elif 'jsex' in df.columns:\n",
- " sex = df.jsex\n",
- " elif 'gender_F' in df.columns:\n",
- " sex = df.gender_F\n",
- " else:\n",
- " no_sex = True\n",
- "\n",
- " if accession_number == 'ds000203':\n",
- " sex = sex.apply(lambda x: {1:'M', 2:'F'}[x])\n",
- " if accession_number == 'ds000249':\n",
- " sex = sex.apply(lambda x: {1:'F', 0:'M'}[x]) \n",
- " else:\n",
- " sex = sex.apply(lambda x: {'M':'M', 'F':'F', 'M,':'M', 'm':'M', 'f':'F', \n",
- " 'MALE':'M', 'FEMALE':'F', 'male':'M', 'female':'F',\n",
- " 'Male':'M', 'Female':'F', 'm':'M', 'f':'F',\n",
- " 'M ':'M', 'F ':'F', np.nan: np.nan}[x])\n",
- "\n",
- " if not no_age:\n",
- " if set(sex.unique()).intersection(set(['M', 'F', np.nan])) != set(sex.unique()):\n",
- " print(sex.unique())\n",
- " break\n",
- " ds_meta[accession_number]['female_count'] = (sex == 'F').sum()\n",
- " ds_meta[accession_number]['male_count'] = (sex == 'M').sum()\n",
- "\n",
- " if no_age or no_sex:\n",
- " print(df.columns)\n",
- "\n",
- " print(ds_meta[accession_number])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>participant_count</th>\n",
- " <th>below_18</th>\n",
- " <th>above_18</th>\n",
- " <th>female_count</th>\n",
- " <th>male_count</th>\n",
- " <th>missing_age</th>\n",
- " <th>missing_sex</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>ds000001</th>\n",
- " <td>16</td>\n",
- " <td>0.0</td>\n",
- " <td>16.0</td>\n",
- " <td>10.0</td>\n",
- " <td>6.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000002</th>\n",
- " <td>17</td>\n",
- " <td>0.0</td>\n",
- " <td>17.0</td>\n",
- " <td>10.0</td>\n",
- " <td>7.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000003</th>\n",
- " <td>13</td>\n",
- " <td>0.0</td>\n",
- " <td>13.0</td>\n",
- " <td>5.0</td>\n",
- " <td>8.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000005</th>\n",
- " <td>16</td>\n",
- " <td>0.0</td>\n",
- " <td>16.0</td>\n",
- " <td>8.0</td>\n",
- " <td>8.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000006</th>\n",
- " <td>14</td>\n",
- " <td>0.0</td>\n",
- " <td>14.0</td>\n",
- " <td>9.0</td>\n",
- " <td>5.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000007</th>\n",
- " <td>20</td>\n",
- " <td>0.0</td>\n",
- " <td>20.0</td>\n",
- " <td>9.0</td>\n",
- " <td>11.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000008</th>\n",
- " <td>14</td>\n",
- " <td>0.0</td>\n",
- " <td>14.0</td>\n",
- " <td>4.0</td>\n",
- " <td>10.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000009</th>\n",
- " <td>24</td>\n",
- " <td>0.0</td>\n",
- " <td>24.0</td>\n",
- " <td>10.0</td>\n",
- " <td>14.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000011</th>\n",
- " <td>14</td>\n",
- " <td>0.0</td>\n",
- " <td>14.0</td>\n",
- " <td>9.0</td>\n",
- " <td>5.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000017</th>\n",
- " <td>8</td>\n",
- " <td>0.0</td>\n",
- " <td>5.0</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " <td>3.0</td>\n",
- " <td>3.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000030</th>\n",
- " <td>272</td>\n",
- " <td>0.0</td>\n",
- " <td>272.0</td>\n",
- " <td>117.0</td>\n",
- " <td>155.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000031</th>\n",
- " <td>1</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>1.0</td>\n",
- " <td>1.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000051</th>\n",
- " <td>13</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>13.0</td>\n",
- " <td>13.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000052</th>\n",
- " <td>13</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>13.0</td>\n",
- " <td>13.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000053</th>\n",
- " <td>59</td>\n",
- " <td>0.0</td>\n",
- " <td>59.0</td>\n",
- " <td>31.0</td>\n",
- " <td>28.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000101</th>\n",
- " <td>21</td>\n",
- " <td>0.0</td>\n",
- " <td>21.0</td>\n",
- " <td>9.0</td>\n",
- " <td>12.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000102</th>\n",
- " <td>26</td>\n",
- " <td>0.0</td>\n",
- " <td>26.0</td>\n",
- " <td>1.0</td>\n",
- " <td>16.0</td>\n",
- " <td>0.0</td>\n",
- " <td>9.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000105</th>\n",
- " <td>6</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>6.0</td>\n",
- " <td>6.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000107</th>\n",
- " <td>49</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>49.0</td>\n",
- " <td>49.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000108</th>\n",
- " <td>34</td>\n",
- " <td>0.0</td>\n",
- " <td>34.0</td>\n",
- " <td>17.0</td>\n",
- " <td>17.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000110</th>\n",
- " <td>18</td>\n",
- " <td>0.0</td>\n",
- " <td>18.0</td>\n",
- " <td>9.0</td>\n",
- " <td>9.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000113b</th>\n",
- " <td>16</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>16.0</td>\n",
- " <td>16.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000113c</th>\n",
- " <td>7</td>\n",
- " <td>0.0</td>\n",
- " <td>7.0</td>\n",
- " <td>2.0</td>\n",
- " <td>5.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000113d</th>\n",
- " <td>30</td>\n",
- " <td>2.0</td>\n",
- " <td>28.0</td>\n",
- " <td>16.0</td>\n",
- " <td>14.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000114</th>\n",
- " <td>10</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>10.0</td>\n",
- " <td>10.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000115</th>\n",
- " <td>99</td>\n",
- " <td>12.0</td>\n",
- " <td>87.0</td>\n",
- " <td>40.0</td>\n",
- " <td>59.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000116</th>\n",
- " <td>17</td>\n",
- " <td>0.0</td>\n",
- " <td>17.0</td>\n",
- " <td>6.0</td>\n",
- " <td>11.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000117</th>\n",
- " <td>16</td>\n",
- " <td>0.0</td>\n",
- " <td>16.0</td>\n",
- " <td>7.0</td>\n",
- " <td>9.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000119</th>\n",
- " <td>73</td>\n",
- " <td>46.0</td>\n",
- " <td>27.0</td>\n",
- " <td>43.0</td>\n",
- " <td>30.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000120</th>\n",
- " <td>26</td>\n",
- " <td>16.0</td>\n",
- " <td>10.0</td>\n",
- " <td>15.0</td>\n",
- " <td>11.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000220</th>\n",
- " <td>26</td>\n",
- " <td>0.0</td>\n",
- " <td>26.0</td>\n",
- " <td>13.0</td>\n",
- " <td>13.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000221</th>\n",
- " <td>318</td>\n",
- " <td>0.0</td>\n",
- " <td>316.0</td>\n",
- " <td>129.0</td>\n",
- " <td>189.0</td>\n",
- " <td>2.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000222</th>\n",
- " <td>79</td>\n",
- " <td>0.0</td>\n",
- " <td>79.0</td>\n",
- " <td>41.0</td>\n",
- " <td>38.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000223</th>\n",
- " <td>19</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>19.0</td>\n",
- " <td>19.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000224</th>\n",
- " <td>10</td>\n",
- " <td>0.0</td>\n",
- " <td>10.0</td>\n",
- " <td>5.0</td>\n",
- " <td>5.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000228</th>\n",
- " <td>155</td>\n",
- " <td>122.0</td>\n",
- " <td>33.0</td>\n",
- " <td>84.0</td>\n",
- " <td>71.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000229</th>\n",
- " <td>15</td>\n",
- " <td>0.0</td>\n",
- " <td>15.0</td>\n",
- " <td>7.0</td>\n",
- " <td>8.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000231</th>\n",
- " <td>9</td>\n",
- " <td>0.0</td>\n",
- " <td>9.0</td>\n",
- " <td>7.0</td>\n",
- " <td>2.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000232</th>\n",
- " <td>10</td>\n",
- " <td>0.0</td>\n",
- " <td>10.0</td>\n",
- " <td>7.0</td>\n",
- " <td>3.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000233</th>\n",
- " <td>12</td>\n",
- " <td>0.0</td>\n",
- " <td>12.0</td>\n",
- " <td>7.0</td>\n",
- " <td>5.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000234</th>\n",
- " <td>5</td>\n",
- " <td>0.0</td>\n",
- " <td>5.0</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000235</th>\n",
- " <td>4</td>\n",
- " <td>0.0</td>\n",
- " <td>4.0</td>\n",
- " <td>2.0</td>\n",
- " <td>2.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000236</th>\n",
- " <td>18</td>\n",
- " <td>0.0</td>\n",
- " <td>18.0</td>\n",
- " <td>12.0</td>\n",
- " <td>6.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000237</th>\n",
- " <td>13</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>13.0</td>\n",
- " <td>13.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000238</th>\n",
- " <td>35</td>\n",
- " <td>0.0</td>\n",
- " <td>35.0</td>\n",
- " <td>17.0</td>\n",
- " <td>18.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000239</th>\n",
- " <td>3</td>\n",
- " <td>0.0</td>\n",
- " <td>3.0</td>\n",
- " <td>1.0</td>\n",
- " <td>2.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000240</th>\n",
- " <td>63</td>\n",
- " <td>0.0</td>\n",
- " <td>63.0</td>\n",
- " <td>35.0</td>\n",
- " <td>28.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000241</th>\n",
- " <td>12</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>12.0</td>\n",
- " <td>12.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000243</th>\n",
- " <td>120</td>\n",
- " <td>0.0</td>\n",
- " <td>120.0</td>\n",
- " <td>61.0</td>\n",
- " <td>59.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000244</th>\n",
- " <td>12</td>\n",
- " <td>0.0</td>\n",
- " <td>12.0</td>\n",
- " <td>9.0</td>\n",
- " <td>3.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000245</th>\n",
- " <td>45</td>\n",
- " <td>0.0</td>\n",
- " <td>45.0</td>\n",
- " <td>25.0</td>\n",
- " <td>20.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000246</th>\n",
- " <td>1</td>\n",
- " <td>0.0</td>\n",
- " <td>1.0</td>\n",
- " <td>0.0</td>\n",
- " <td>1.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000247</th>\n",
- " <td>6</td>\n",
- " <td>0.0</td>\n",
- " <td>5.0</td>\n",
- " <td>2.0</td>\n",
- " <td>3.0</td>\n",
- " <td>1.0</td>\n",
- " <td>1.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000248</th>\n",
- " <td>2</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>2.0</td>\n",
- " <td>2.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000249</th>\n",
- " <td>26</td>\n",
- " <td>0.0</td>\n",
- " <td>26.0</td>\n",
- " <td>13.0</td>\n",
- " <td>13.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000253</th>\n",
- " <td>20</td>\n",
- " <td>0.0</td>\n",
- " <td>20.0</td>\n",
- " <td>20.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000254</th>\n",
- " <td>13</td>\n",
- " <td>0.0</td>\n",
- " <td>13.0</td>\n",
- " <td>7.0</td>\n",
- " <td>6.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000255</th>\n",
- " <td>2</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>2.0</td>\n",
- " <td>2.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000256</th>\n",
- " <td>24</td>\n",
- " <td>24.0</td>\n",
- " <td>0.0</td>\n",
- " <td>10.0</td>\n",
- " <td>14.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000258</th>\n",
- " <td>89</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>89.0</td>\n",
- " <td>89.0</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>92 rows × 7 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " participant_count below_18 above_18 female_count male_count \\\n",
- "ds000001 16 0.0 16.0 10.0 6.0 \n",
- "ds000002 17 0.0 17.0 10.0 7.0 \n",
- "ds000003 13 0.0 13.0 5.0 8.0 \n",
- "ds000005 16 0.0 16.0 8.0 8.0 \n",
- "ds000006 14 0.0 14.0 9.0 5.0 \n",
- "ds000007 20 0.0 20.0 9.0 11.0 \n",
- "ds000008 14 0.0 14.0 4.0 10.0 \n",
- "ds000009 24 0.0 24.0 10.0 14.0 \n",
- "ds000011 14 0.0 14.0 9.0 5.0 \n",
- "ds000017 8 0.0 5.0 1.0 4.0 \n",
- "ds000030 272 0.0 272.0 117.0 155.0 \n",
- "ds000031 1 NaN NaN NaN NaN \n",
- "ds000051 13 NaN NaN NaN NaN \n",
- "ds000052 13 NaN NaN NaN NaN \n",
- "ds000053 59 0.0 59.0 31.0 28.0 \n",
- "ds000101 21 0.0 21.0 9.0 12.0 \n",
- "ds000102 26 0.0 26.0 1.0 16.0 \n",
- "ds000105 6 NaN NaN NaN NaN \n",
- "ds000107 49 NaN NaN NaN NaN \n",
- "ds000108 34 0.0 34.0 17.0 17.0 \n",
- "ds000110 18 0.0 18.0 9.0 9.0 \n",
- "ds000113b 16 NaN NaN NaN NaN \n",
- "ds000113c 7 0.0 7.0 2.0 5.0 \n",
- "ds000113d 30 2.0 28.0 16.0 14.0 \n",
- "ds000114 10 NaN NaN NaN NaN \n",
- "ds000115 99 12.0 87.0 40.0 59.0 \n",
- "ds000116 17 0.0 17.0 6.0 11.0 \n",
- "ds000117 16 0.0 16.0 7.0 9.0 \n",
- "ds000119 73 46.0 27.0 43.0 30.0 \n",
- "ds000120 26 16.0 10.0 15.0 11.0 \n",
- "... ... ... ... ... ... \n",
- "ds000220 26 0.0 26.0 13.0 13.0 \n",
- "ds000221 318 0.0 316.0 129.0 189.0 \n",
- "ds000222 79 0.0 79.0 41.0 38.0 \n",
- "ds000223 19 NaN NaN NaN NaN \n",
- "ds000224 10 0.0 10.0 5.0 5.0 \n",
- "ds000228 155 122.0 33.0 84.0 71.0 \n",
- "ds000229 15 0.0 15.0 7.0 8.0 \n",
- "ds000231 9 0.0 9.0 7.0 2.0 \n",
- "ds000232 10 0.0 10.0 7.0 3.0 \n",
- "ds000233 12 0.0 12.0 7.0 5.0 \n",
- "ds000234 5 0.0 5.0 1.0 4.0 \n",
- "ds000235 4 0.0 4.0 2.0 2.0 \n",
- "ds000236 18 0.0 18.0 12.0 6.0 \n",
- "ds000237 13 NaN NaN NaN NaN \n",
- "ds000238 35 0.0 35.0 17.0 18.0 \n",
- "ds000239 3 0.0 3.0 1.0 2.0 \n",
- "ds000240 63 0.0 63.0 35.0 28.0 \n",
- "ds000241 12 NaN NaN NaN NaN \n",
- "ds000243 120 0.0 120.0 61.0 59.0 \n",
- "ds000244 12 0.0 12.0 9.0 3.0 \n",
- "ds000245 45 0.0 45.0 25.0 20.0 \n",
- "ds000246 1 0.0 1.0 0.0 1.0 \n",
- "ds000247 6 0.0 5.0 2.0 3.0 \n",
- "ds000248 2 NaN NaN NaN NaN \n",
- "ds000249 26 0.0 26.0 13.0 13.0 \n",
- "ds000253 20 0.0 20.0 20.0 0.0 \n",
- "ds000254 13 0.0 13.0 7.0 6.0 \n",
- "ds000255 2 NaN NaN NaN NaN \n",
- "ds000256 24 24.0 0.0 10.0 14.0 \n",
- "ds000258 89 NaN NaN NaN NaN \n",
- "\n",
- " missing_age missing_sex \n",
- "ds000001 0.0 0.0 \n",
- "ds000002 0.0 0.0 \n",
- "ds000003 0.0 0.0 \n",
- "ds000005 0.0 0.0 \n",
- "ds000006 0.0 0.0 \n",
- "ds000007 0.0 0.0 \n",
- "ds000008 0.0 0.0 \n",
- "ds000009 0.0 0.0 \n",
- "ds000011 0.0 0.0 \n",
- "ds000017 3.0 3.0 \n",
- "ds000030 0.0 0.0 \n",
- "ds000031 1.0 1.0 \n",
- "ds000051 13.0 13.0 \n",
- "ds000052 13.0 13.0 \n",
- "ds000053 0.0 0.0 \n",
- "ds000101 0.0 0.0 \n",
- "ds000102 0.0 9.0 \n",
- "ds000105 6.0 6.0 \n",
- "ds000107 49.0 49.0 \n",
- "ds000108 0.0 0.0 \n",
- "ds000110 0.0 0.0 \n",
- "ds000113b 16.0 16.0 \n",
- "ds000113c 0.0 0.0 \n",
- "ds000113d 0.0 0.0 \n",
- "ds000114 10.0 10.0 \n",
- "ds000115 0.0 0.0 \n",
- "ds000116 0.0 0.0 \n",
- "ds000117 0.0 0.0 \n",
- "ds000119 0.0 0.0 \n",
- "ds000120 0.0 0.0 \n",
- "... ... ... \n",
- "ds000220 0.0 0.0 \n",
- "ds000221 2.0 0.0 \n",
- "ds000222 0.0 0.0 \n",
- "ds000223 19.0 19.0 \n",
- "ds000224 0.0 0.0 \n",
- "ds000228 0.0 0.0 \n",
- "ds000229 0.0 0.0 \n",
- "ds000231 0.0 0.0 \n",
- "ds000232 0.0 0.0 \n",
- "ds000233 0.0 0.0 \n",
- "ds000234 0.0 0.0 \n",
- "ds000235 0.0 0.0 \n",
- "ds000236 0.0 0.0 \n",
- "ds000237 13.0 13.0 \n",
- "ds000238 0.0 0.0 \n",
- "ds000239 0.0 0.0 \n",
- "ds000240 0.0 0.0 \n",
- "ds000241 12.0 12.0 \n",
- "ds000243 0.0 0.0 \n",
- "ds000244 0.0 0.0 \n",
- "ds000245 0.0 0.0 \n",
- "ds000246 0.0 0.0 \n",
- "ds000247 1.0 1.0 \n",
- "ds000248 2.0 2.0 \n",
- "ds000249 0.0 0.0 \n",
- "ds000253 0.0 0.0 \n",
- "ds000254 0.0 0.0 \n",
- "ds000255 2.0 2.0 \n",
- "ds000256 0.0 0.0 \n",
- "ds000258 89.0 89.0 \n",
- "\n",
- "[92 rows x 7 columns]"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "openfmri_df = pd.DataFrame.from_dict(ds_meta, orient='index')\n",
- "openfmri_df['missing_age'] = openfmri_df.participant_count - (openfmri_df.below_18.fillna(0) + openfmri_df.above_18.fillna(0))\n",
- "openfmri_df['missing_sex'] = openfmri_df.participant_count - (openfmri_df.female_count.fillna(0) + openfmri_df.male_count.fillna(0))\n",
- "openfmri_df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>participant_count</th>\n",
- " <th>below_18</th>\n",
- " <th>above_18</th>\n",
- " <th>female_count</th>\n",
- " <th>male_count</th>\n",
- " <th>missing_age</th>\n",
- " <th>missing_sex</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>ds000017</th>\n",
- " <td>8</td>\n",
- " <td>0.0</td>\n",
- " <td>5.0</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " <td>3.0</td>\n",
- " <td>3.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000031</th>\n",
- " <td>1</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>1.0</td>\n",
- " <td>1.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000051</th>\n",
- " <td>13</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>13.0</td>\n",
- " <td>13.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000052</th>\n",
- " <td>13</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>13.0</td>\n",
- " <td>13.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000105</th>\n",
- " <td>6</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>6.0</td>\n",
- " <td>6.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000107</th>\n",
- " <td>49</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>49.0</td>\n",
- " <td>49.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000113b</th>\n",
- " <td>16</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>16.0</td>\n",
- " <td>16.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000114</th>\n",
- " <td>10</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>10.0</td>\n",
- " <td>10.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000133</th>\n",
- " <td>26</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>26.0</td>\n",
- " <td>26.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000158</th>\n",
- " <td>217</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>217.0</td>\n",
- " <td>217.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000164</th>\n",
- " <td>28</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>28.0</td>\n",
- " <td>28.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000168</th>\n",
- " <td>35</td>\n",
- " <td>0.0</td>\n",
- " <td>25.0</td>\n",
- " <td>15.0</td>\n",
- " <td>20.0</td>\n",
- " <td>10.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000204</th>\n",
- " <td>1</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>1.0</td>\n",
- " <td>1.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000205</th>\n",
- " <td>11</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>11.0</td>\n",
- " <td>11.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000206</th>\n",
- " <td>6</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>6.0</td>\n",
- " <td>6.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000213</th>\n",
- " <td>26</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>26.0</td>\n",
- " <td>26.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000216</th>\n",
- " <td>7</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>7.0</td>\n",
- " <td>7.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000221</th>\n",
- " <td>318</td>\n",
- " <td>0.0</td>\n",
- " <td>316.0</td>\n",
- " <td>129.0</td>\n",
- " <td>189.0</td>\n",
- " <td>2.0</td>\n",
- " <td>0.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000223</th>\n",
- " <td>19</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>19.0</td>\n",
- " <td>19.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000237</th>\n",
- " <td>13</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>13.0</td>\n",
- " <td>13.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000241</th>\n",
- " <td>12</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>12.0</td>\n",
- " <td>12.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000247</th>\n",
- " <td>6</td>\n",
- " <td>0.0</td>\n",
- " <td>5.0</td>\n",
- " <td>2.0</td>\n",
- " <td>3.0</td>\n",
- " <td>1.0</td>\n",
- " <td>1.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000248</th>\n",
- " <td>2</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>2.0</td>\n",
- " <td>2.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000255</th>\n",
- " <td>2</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>2.0</td>\n",
- " <td>2.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>ds000258</th>\n",
- " <td>89</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>89.0</td>\n",
- " <td>89.0</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " participant_count below_18 above_18 female_count male_count \\\n",
- "ds000017 8 0.0 5.0 1.0 4.0 \n",
- "ds000031 1 NaN NaN NaN NaN \n",
- "ds000051 13 NaN NaN NaN NaN \n",
- "ds000052 13 NaN NaN NaN NaN \n",
- "ds000105 6 NaN NaN NaN NaN \n",
- "ds000107 49 NaN NaN NaN NaN \n",
- "ds000113b 16 NaN NaN NaN NaN \n",
- "ds000114 10 NaN NaN NaN NaN \n",
- "ds000133 26 NaN NaN NaN NaN \n",
- "ds000158 217 NaN NaN NaN NaN \n",
- "ds000164 28 NaN NaN NaN NaN \n",
- "ds000168 35 0.0 25.0 15.0 20.0 \n",
- "ds000204 1 NaN NaN NaN NaN \n",
- "ds000205 11 NaN NaN NaN NaN \n",
- "ds000206 6 NaN NaN NaN NaN \n",
- "ds000213 26 NaN NaN NaN NaN \n",
- "ds000216 7 NaN NaN NaN NaN \n",
- "ds000221 318 0.0 316.0 129.0 189.0 \n",
- "ds000223 19 NaN NaN NaN NaN \n",
- "ds000237 13 NaN NaN NaN NaN \n",
- "ds000241 12 NaN NaN NaN NaN \n",
- "ds000247 6 0.0 5.0 2.0 3.0 \n",
- "ds000248 2 NaN NaN NaN NaN \n",
- "ds000255 2 NaN NaN NaN NaN \n",
- "ds000258 89 NaN NaN NaN NaN \n",
- "\n",
- " missing_age missing_sex \n",
- "ds000017 3.0 3.0 \n",
- "ds000031 1.0 1.0 \n",
- "ds000051 13.0 13.0 \n",
- "ds000052 13.0 13.0 \n",
- "ds000105 6.0 6.0 \n",
- "ds000107 49.0 49.0 \n",
- "ds000113b 16.0 16.0 \n",
- "ds000114 10.0 10.0 \n",
- "ds000133 26.0 26.0 \n",
- "ds000158 217.0 217.0 \n",
- "ds000164 28.0 28.0 \n",
- "ds000168 10.0 0.0 \n",
- "ds000204 1.0 1.0 \n",
- "ds000205 11.0 11.0 \n",
- "ds000206 6.0 6.0 \n",
- "ds000213 26.0 26.0 \n",
- "ds000216 7.0 7.0 \n",
- "ds000221 2.0 0.0 \n",
- "ds000223 19.0 19.0 \n",
- "ds000237 13.0 13.0 \n",
- "ds000241 12.0 12.0 \n",
- "ds000247 1.0 1.0 \n",
- "ds000248 2.0 2.0 \n",
- "ds000255 2.0 2.0 \n",
- "ds000258 89.0 89.0 "
- ]
- },
- "execution_count": 22,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "openfmri_df[openfmri_df.missing_age > 0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "3307"
- ]
- },
- "execution_count": 25,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "openfmri_df.participant_count.sum()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "583.0"
- ]
- },
- "execution_count": 23,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "openfmri_df.missing_age.sum()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "580.0"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "openfmri_df.missing_sex.sum()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "293.0"
- ]
- },
- "execution_count": 26,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "openfmri_df.below_18.sum()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "2431.0"
- ]
- },
- "execution_count": 27,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "openfmri_df.above_18.sum()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 29,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "1379.0"
- ]
- },
- "execution_count": 29,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "openfmri_df.female_count.sum()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 30,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "1348.0"
- ]
- },
- "execution_count": 30,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "openfmri_df.male_count.sum()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Add Comment
Please, Sign In to add comment