Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "2f70d940-27fa-46ab-bfb1-8fb8194b4c15",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import configparser\n",
"from ast import literal_eval\n",
"import ast"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "3f266616",
"metadata": {},
"outputs": [],
"source": [
"behaviors = pd.read_csv('behaviors_covid_ids_deID.csv', index_col = 0)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2cc22f40-ec8e-4e98-8f1f-7579ee2f330d",
"metadata": {},
"outputs": [],
"source": [
"behaviors['Date'] = pd.to_datetime(behaviors['Date'])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "cb6bfbd1-2250-4f8b-b0c8-9d1b3f22a317",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['23:00:00' '15:00:00' '07:00:00' '00:00:00']\n",
"['23:00:00' '15:00:00' '07:00:00']\n",
" Person_ID Date Time Target \\\n",
"Unnamed: 0 \n",
"838118 1 2019-09-15 23:00:00 sib \n",
"2140342 2 2019-09-15 15:00:00 disruptive \n",
"838121 1 2019-09-15 23:00:00 disruptive \n",
"838120 1 2019-09-15 23:00:00 mouthing pica \n",
"838119 1 2019-09-15 23:00:00 aggression \n",
"... ... ... ... ... \n",
"5097602 31 2020-09-08 15:00:00 NaN \n",
"3249998 39 2020-09-08 07:00:00 aggression \n",
"3249999 39 2020-09-08 07:00:00 sib \n",
"3250000 39 2020-09-08 07:00:00 disruptive \n",
"3250001 39 2020-09-08 07:00:00 sib \n",
"\n",
" Time_Sample_Percent Behavior_No_Data_Recorded Episode_Count \\\n",
"Unnamed: 0 \n",
"838118 0.0 False 0.0 \n",
"2140342 0.0 False 0.0 \n",
"838121 0.0 False 0.0 \n",
"838120 0.0 False 0.0 \n",
"838119 0.0 False 0.0 \n",
"... ... ... ... \n",
"5097602 0.0 False 0.0 \n",
"3249998 NaN NaN 1.0 \n",
"3249999 NaN NaN 1.0 \n",
"3250000 NaN NaN 0.0 \n",
"3250001 NaN NaN 0.0 \n",
"\n",
" Behavior_LOA Behavior_None Duration_01_Count ... \\\n",
"Unnamed: 0 ... \n",
"838118 False True NaN ... \n",
"2140342 False True NaN ... \n",
"838121 False True NaN ... \n",
"838120 False True NaN ... \n",
"838119 False True NaN ... \n",
"... ... ... ... ... \n",
"5097602 False True NaN ... \n",
"3249998 NaN NaN NaN ... \n",
"3249999 NaN NaN NaN ... \n",
"3250000 NaN NaN NaN ... \n",
"3250001 NaN NaN NaN ... \n",
"\n",
" Duration_04_Count Duration_05_Count Duration_06_Count \\\n",
"Unnamed: 0 \n",
"838118 NaN NaN NaN \n",
"2140342 NaN NaN NaN \n",
"838121 NaN NaN NaN \n",
"838120 NaN NaN NaN \n",
"838119 NaN NaN NaN \n",
"... ... ... ... \n",
"5097602 NaN NaN NaN \n",
"3249998 NaN NaN NaN \n",
"3249999 NaN NaN NaN \n",
"3250000 NaN NaN NaN \n",
"3250001 NaN NaN NaN \n",
"\n",
" Intensity_01_Count Intensity_02_Count Intensity_03_Count \\\n",
"Unnamed: 0 \n",
"838118 NaN NaN NaN \n",
"2140342 NaN NaN NaN \n",
"838121 NaN NaN NaN \n",
"838120 NaN NaN NaN \n",
"838119 NaN NaN NaN \n",
"... ... ... ... \n",
"5097602 NaN NaN NaN \n",
"3249998 NaN NaN 1.0 \n",
"3249999 NaN 1.0 NaN \n",
"3250000 NaN NaN NaN \n",
"3250001 NaN NaN NaN \n",
"\n",
" Intensity_04_Count Intensity_05_Count FirstDate Concat \n",
"Unnamed: 0 \n",
"838118 NaN NaN 2017-09-01 1 2019-09-15 \n",
"2140342 NaN NaN 2017-08-03 2 2019-09-15 \n",
"838121 NaN NaN 2017-09-01 1 2019-09-15 \n",
"838120 NaN NaN 2017-09-01 1 2019-09-15 \n",
"838119 NaN NaN 2017-09-01 1 2019-09-15 \n",
"... ... ... ... ... \n",
"5097602 NaN NaN 2019-03-01 31 2020-09-08 \n",
"3249998 NaN NaN 2015-12-24 39 2020-09-08 \n",
"3249999 NaN NaN 2015-12-24 39 2020-09-08 \n",
"3250000 NaN NaN 2015-12-24 39 2020-09-08 \n",
"3250001 NaN NaN 2015-12-24 39 2020-09-08 \n",
"\n",
"[244585 rows x 22 columns]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Concat</th>\n",
" <th>Concat_Date</th>\n",
" <th>Person_ID</th>\n",
" <th>Date</th>\n",
" <th>Time</th>\n",
" <th>Target</th>\n",
" <th>Time_Sample_Percent</th>\n",
" <th>Behavior_No_Data_Recorded</th>\n",
" <th>Episode_Count</th>\n",
" <th>Behavior_LOA</th>\n",
" <th>...</th>\n",
" <th>Duration_03_Count</th>\n",
" <th>Duration_04_Count</th>\n",
" <th>Duration_05_Count</th>\n",
" <th>Duration_06_Count</th>\n",
" <th>Intensity_01_Count</th>\n",
" <th>Intensity_02_Count</th>\n",
" <th>Intensity_03_Count</th>\n",
" <th>Intensity_04_Count</th>\n",
" <th>Intensity_05_Count</th>\n",
" <th>FirstDate</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1 2019-09-15 07:00:00</td>\n",
" <td>1 2019-09-15</td>\n",
" <td>1.0</td>\n",
" <td>2019-09-15</td>\n",
" <td>07:00:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2017-09-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1 2019-09-15 15:00:00</td>\n",
" <td>1 2019-09-15</td>\n",
" <td>1.0</td>\n",
" <td>2019-09-15</td>\n",
" <td>15:00:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2017-09-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1 2019-09-15 23:00:00</td>\n",
" <td>1 2019-09-15</td>\n",
" <td>1.0</td>\n",
" <td>2019-09-15</td>\n",
" <td>23:00:00</td>\n",
" <td>sib</td>\n",
" <td>0.0</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2017-09-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1 2019-09-15 23:00:00</td>\n",
" <td>1 2019-09-15</td>\n",
" <td>1.0</td>\n",
" <td>2019-09-15</td>\n",
" <td>23:00:00</td>\n",
" <td>disruptive</td>\n",
" <td>0.0</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2017-09-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1 2019-09-15 23:00:00</td>\n",
" <td>1 2019-09-15</td>\n",
" <td>1.0</td>\n",
" <td>2019-09-15</td>\n",
" <td>23:00:00</td>\n",
" <td>mouthing pica</td>\n",
" <td>0.0</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2017-09-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>253565</th>\n",
" <td>78 2020-05-12 15:00:00</td>\n",
" <td>78 2020-05-12</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>253566</th>\n",
" <td>78 2020-05-12 23:00:00</td>\n",
" <td>78 2020-05-12</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>253567</th>\n",
" <td>78 2020-05-13 07:00:00</td>\n",
" <td>78 2020-05-13</td>\n",
" <td>78.0</td>\n",
" <td>2020-05-13</td>\n",
" <td>07:00:00</td>\n",
" <td>aggression</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2015-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>253568</th>\n",
" <td>78 2020-05-13 15:00:00</td>\n",
" <td>78 2020-05-13</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>253569</th>\n",
" <td>78 2020-05-13 23:00:00</td>\n",
" <td>78 2020-05-13</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>253570 rows × 23 columns</p>\n",
"</div>"
],
"text/plain": [
" Concat Concat_Date Person_ID Date Time \\\n",
"0 1 2019-09-15 07:00:00 1 2019-09-15 1.0 2019-09-15 07:00:00 \n",
"1 1 2019-09-15 15:00:00 1 2019-09-15 1.0 2019-09-15 15:00:00 \n",
"2 1 2019-09-15 23:00:00 1 2019-09-15 1.0 2019-09-15 23:00:00 \n",
"3 1 2019-09-15 23:00:00 1 2019-09-15 1.0 2019-09-15 23:00:00 \n",
"4 1 2019-09-15 23:00:00 1 2019-09-15 1.0 2019-09-15 23:00:00 \n",
"... ... ... ... ... ... \n",
"253565 78 2020-05-12 15:00:00 78 2020-05-12 NaN NaT NaN \n",
"253566 78 2020-05-12 23:00:00 78 2020-05-12 NaN NaT NaN \n",
"253567 78 2020-05-13 07:00:00 78 2020-05-13 78.0 2020-05-13 07:00:00 \n",
"253568 78 2020-05-13 15:00:00 78 2020-05-13 NaN NaT NaN \n",
"253569 78 2020-05-13 23:00:00 78 2020-05-13 NaN NaT NaN \n",
"\n",
" Target Time_Sample_Percent Behavior_No_Data_Recorded \\\n",
"0 NaN NaN False \n",
"1 NaN NaN False \n",
"2 sib 0.0 False \n",
"3 disruptive 0.0 False \n",
"4 mouthing pica 0.0 False \n",
"... ... ... ... \n",
"253565 NaN NaN NaN \n",
"253566 NaN NaN NaN \n",
"253567 aggression NaN NaN \n",
"253568 NaN NaN NaN \n",
"253569 NaN NaN NaN \n",
"\n",
" Episode_Count Behavior_LOA ... Duration_03_Count Duration_04_Count \\\n",
"0 NaN True ... NaN NaN \n",
"1 NaN True ... NaN NaN \n",
"2 0.0 False ... NaN NaN \n",
"3 0.0 False ... NaN NaN \n",
"4 0.0 False ... NaN NaN \n",
"... ... ... ... ... ... \n",
"253565 NaN NaN ... NaN NaN \n",
"253566 NaN NaN ... NaN NaN \n",
"253567 1.0 NaN ... NaN NaN \n",
"253568 NaN NaN ... NaN NaN \n",
"253569 NaN NaN ... NaN NaN \n",
"\n",
" Duration_05_Count Duration_06_Count Intensity_01_Count \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"... ... ... ... \n",
"253565 NaN NaN NaN \n",
"253566 NaN NaN NaN \n",
"253567 NaN NaN NaN \n",
"253568 NaN NaN NaN \n",
"253569 NaN NaN NaN \n",
"\n",
" Intensity_02_Count Intensity_03_Count Intensity_04_Count \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"... ... ... ... \n",
"253565 NaN NaN NaN \n",
"253566 NaN NaN NaN \n",
"253567 NaN NaN NaN \n",
"253568 NaN NaN NaN \n",
"253569 NaN NaN NaN \n",
"\n",
" Intensity_05_Count FirstDate \n",
"0 NaN 2017-09-01 \n",
"1 NaN 2017-09-01 \n",
"2 NaN 2017-09-01 \n",
"3 NaN 2017-09-01 \n",
"4 NaN 2017-09-01 \n",
"... ... ... \n",
"253565 NaN NaN \n",
"253566 NaN NaN \n",
"253567 NaN 2015-01-01 \n",
"253568 NaN NaN \n",
"253569 NaN NaN \n",
"\n",
"[253570 rows x 23 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Filter out midnight cases\n",
"# we have some that are the correct time, can reassign these manually later.\n",
"print(behaviors['Time'].unique())\n",
"behaviors = behaviors[behaviors['Time'] != '00:00:00']\n",
"print(behaviors['Time'].unique())\n",
"\n",
"# We want to get dates of behavior and 3 shifts per day at least.\n",
"# Basically we want to create a blank date and time thing, and then concatenate those\n",
"# then we want to concatenate the one on the behavior file, and merge both on that by person/date/time\n",
"# Will have to be a left join\n",
"behaviors['Date'] = pd.to_datetime(behaviors['Date'],format = \"%m/%d/%Y %I:%M:%S %p\")\n",
"#behaviors['Time'] = pd.to_datetime(behaviors['Time'])\n",
"\n",
"# Create concat column\n",
"behaviors['Concat'] = behaviors['Person_ID'].astype(str) + \" \" + behaviors['Date'].astype(str)\n",
"\n",
"print(behaviors)\n",
"behaviors_grouped = behaviors[['Person_ID','Date']].sort_values('Date').groupby('Person_ID').agg(['first','last'])\n",
"\n",
"behaviors_grouped.columns = behaviors_grouped.columns.get_level_values(1)\n",
"behaviors_grouped = behaviors_grouped.reset_index().copy()\n",
"\n",
"\n",
"df = behaviors_grouped.copy()\n",
"df['Dates'] = [pd.date_range(x, y) for x , y in zip(df['first'],df['last'])]\n",
"df = df.explode('Dates')\n",
"\n",
"\n",
"df = df.drop(['first','last'],axis=1)\n",
"\n",
"\n",
"df['Times'] = \"['07:00:00','15:00:00', '23:00:00']\"\n",
"df['Times'] = df['Times'].apply(ast.literal_eval)\n",
"df = df.explode('Times').copy()\n",
"\n",
"behaviors['Concat'] = behaviors['Person_ID'].astype(str) + \" \" + behaviors['Date'].astype(str) + \" \" + behaviors['Time'].astype(str)\n",
"\n",
"\n",
"df['Concat'] = df['Person_ID'].astype(str) + \" \" + df['Dates'].astype(str) + \" \" + df['Times'].astype(str)\n",
"df['Concat_Date'] = df['Person_ID'].astype(str) + \" \" + df['Dates'].astype(str)\n",
"df = df[['Concat','Concat_Date']].copy()\n",
"\n",
"\n",
"behaviors = df.merge(behaviors, on = 'Concat', how = 'left')\n",
"behaviors.columns\n",
"behaviors\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2a7ea15e",
"metadata": {},
"outputs": [],
"source": [
"# Indicate places where the shift does not exist - was never recorded\n",
"behaviors['Missing_Shift'] = 0\n",
"behaviors.loc[behaviors['Person_ID'].isna(),'Missing_Shift'] = 1"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "63ec36a2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Behavior_No_Data_Recorded float64\n",
"Behavior_LOA float64\n",
"Behavior_None float64\n",
"dtype: object"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Convert all behavior columns to float type\n",
"# and confirm type\n",
"cols = [col for col in behaviors.columns if 'Behavior' in col]\n",
"behaviors[cols] = behaviors[cols].astype('float64') \n",
"behaviors.loc[:,behaviors.columns.str.contains('Behavior')].dtypes"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "7cf5965d-5f7a-4b2b-baf9-c347d76f6771",
"metadata": {},
"outputs": [],
"source": [
"# Sum columns of duration and intensity counts across\n",
"behaviors['Duration_Count'] = behaviors.loc[:, behaviors.columns.str.match('^Duration.*Count$')].sum(axis=1,min_count=1)\n",
"behaviors['Intensity_Count'] = behaviors.loc[:, behaviors.columns.str.match('^Intensity.*Count$')].sum(axis=1,min_count=1)\n",
"\n",
"# Drop extraneous duration and intensity columns since they've been summed\n",
"behaviors = behaviors.drop(behaviors.columns[behaviors.columns.str.match('^Duration_.*_Count$').tolist()],axis=1)\n",
"behaviors = behaviors.drop(behaviors.columns[behaviors.columns.str.match('^Intensity_.*_Count$').tolist()],axis=1)\n",
"\n",
"# Time sampling is a different way of recording behavior than counting episodes, \n",
"# so we exclude those that are recorded this way\n",
"behaviors.loc[behaviors['Time_Sample_Percent'] > 0,'Behavior_No_Data_Recorded'] = 1.0\n",
"\n",
"# If there are any non-integer episodes, they probably are some sort of time sample too\n",
"behaviors.loc[pd.notna(behaviors['Episode_Count']) & (behaviors['Episode_Count'] % 1 != 0),'Behavior_No_Data_Recorded'] = 1.0\n",
"\n",
"# Where are duration count OR intensity count not null?\n",
"# If they're both not null and unequal, set equal to the max of them both\n",
"test = behaviors.loc[behaviors['Duration_Count'].notna() & behaviors['Intensity_Count'].notna() & \n",
" (behaviors['Duration_Count'] != behaviors['Intensity_Count']),['Duration_Count','Intensity_Count']].max(axis=1)\n",
"behaviors.loc[behaviors['Duration_Count'].notna() & behaviors['Intensity_Count'].notna() & \n",
" (behaviors['Duration_Count'] != behaviors['Intensity_Count']),['Duration_Count','Intensity_Count']] = test\n",
"\n",
"# Aggregate duration and intensity counts into one count variable\n",
"behaviors['Count'] = behaviors['Duration_Count'].fillna(behaviors['Intensity_Count'])\n",
"\n",
"# Behaviors where episode count is 0 or null should be overridden by the duration/intensity counts\n",
"behaviors.loc[((behaviors['Episode_Count'].isna() | (behaviors['Episode_Count']==0)) \n",
" & behaviors['Count'] > 0),'Episode_Count'] = behaviors.loc[((behaviors['Episode_Count'].isna() | \n",
" (behaviors['Episode_Count']==0)) & \n",
" behaviors['Count'] > 0),'Count']\n",
"# Drop unnecessary count columns\n",
"behaviors = behaviors.drop(['Duration_Count','Intensity_Count','Count'], axis = 1)\n",
"\n",
"\n",
"# Filter for where all the behavior stuff is null\n",
"# But Episode Count is 0 = there is no behavior though it was recorded\n",
"behaviors.loc[((behaviors['Behavior_None'].isna()) &\n",
" ( behaviors['Behavior_LOA'].isna())\n",
" & (behaviors['Behavior_No_Data_Recorded'].isna())\n",
" & (behaviors['Episode_Count']==0)), 'Behavior_None'] = 1.0\n",
"\n",
"# We can't confirm for now if there was an LOA or no data recorded, so we leave that for now.\n",
"\n",
"# Depending on if behavior is aggression, sib or both, make a behavior column?\n",
"config = configparser.ConfigParser()\n",
"config.read('targets_of_interest.ini')\n",
"covid = True\n",
"if covid:\n",
" targets = literal_eval(config['section1']['targets_of_interest'])\n",
"else:\n",
" targets = ['aggression','sib']#\n",
"\n",
"final_targets = {}\n",
"for target in targets:\n",
" final_targets[target] = target\n",
"\n",
"# Convert behaviors of aggression and SIB to their generic target dictionary types\n",
"for key in final_targets:\n",
" current_list = [final_targets[key]]\n",
" \n",
" behaviors[key] = ''\n",
" behaviors.loc[behaviors['Target'].isin(current_list),key] = key\n",
"\n",
"behaviors['other'] = ''\n",
"behaviors.loc[((~behaviors['Target'].isna()) &(~behaviors['Target'].isin(targets))), 'other'] = 'other'"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "be5bca62",
"metadata": {},
"outputs": [],
"source": [
"len(targets)\n",
"targets = targets + ['other']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "61938b93",
"metadata": {},
"outputs": [],
"source": [
"\n",
"cols = []\n",
"if len(targets) > 1:\n",
" behaviors['Behavior_Episodes'] = np.where(behaviors[targets].ne('').any(axis=1) & (behaviors['Episode_Count'] > 0), behaviors['Episode_Count'], 0)\n",
" # multiple targets\n",
" \n",
" for target in targets:\n",
" col = 'Behavior_Episodes_' + target\n",
" behaviors[col] = np.where((behaviors[target] != '') & (behaviors['Episode_Count'] > 0), behaviors['Episode_Count'], 0)\n",
" behaviors['Behavior_No_Data_Recorded_'+target] = np.where((behaviors[target] != '') & (behaviors['Episode_Count'].isna()), 1.0, behaviors['Behavior_No_Data_Recorded']) \n",
" cols.append(col)\n",
"\n",
"\n",
" for target in targets:\n",
" col = 'Behavior_' + target\n",
" behaviors[col] = np.where((behaviors[target] != '') & (behaviors['Episode_Count'] > 0), behaviors['Episode_Count'] > 0, 0)\n",
" cols.append(col)\n",
" col = 'Behavior_No_Data_Recorded_' + target\n",
" behaviors[col] = np.where((behaviors[target] != '') & (behaviors['Behavior_No_Data_Recorded'] > 0), behaviors['Behavior_No_Data_Recorded'], 0)\n",
"\n",
" behaviors['Behavior'] = np.where(behaviors[targets].ne('').any(axis=1) & (behaviors['Episode_Count'] > 0), behaviors['Episode_Count'] > 0, 0)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "7009b00d",
"metadata": {},
"outputs": [],
"source": [
"# Note that each is one observation before grouping by shift\n",
"behaviors['n_observations'] = 1"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "0e9de5ff",
"metadata": {},
"outputs": [],
"source": [
"keys = ['n_observations','Missing_Shift'] + ['Behavior_' + target for target in targets] + ['Behavior_Episodes_' + target for target in targets] + ['Behavior_LOA',\n",
" 'Behavior_None','Behavior_No_Data_Recorded'] + ['Behavior_No_Data_Recorded_'+ target for target in targets]\n",
"values = [ 'sum']\n",
"\n",
"behaviors_wide = behaviors.copy()\n",
"\n",
"kvs = {k:values for k in keys }\n",
"\n",
"behaviors_wide = behaviors_wide.groupby('Concat').agg(kvs)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "24e2b14d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['n_observations', 'Missing_Shift', 'Behavior_aggression',\n",
" 'Behavior_sib', 'Behavior_elopement', 'Behavior_disruptive',\n",
" 'Behavior_other', 'Behavior_Episodes_aggression',\n",
" 'Behavior_Episodes_sib', 'Behavior_Episodes_elopement',\n",
" 'Behavior_Episodes_disruptive', 'Behavior_Episodes_other',\n",
" 'Behavior_LOA', 'Behavior_None', 'Behavior_No_Data_Recorded',\n",
" 'Behavior_No_Data_Recorded_aggression', 'Behavior_No_Data_Recorded_sib',\n",
" 'Behavior_No_Data_Recorded_elopement',\n",
" 'Behavior_No_Data_Recorded_disruptive',\n",
" 'Behavior_No_Data_Recorded_other'],\n",
" dtype='object')"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"behaviors_wide.columns = behaviors_wide.columns.get_level_values(0)\n",
"behaviors_wide.columns"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "f5233197",
"metadata": {},
"outputs": [],
"source": [
"# Where there's only one thing recorded and it's something that we don't care about, we say it's not recorded\n",
"behaviors_wide.loc[(behaviors_wide['n_observations']==1) & (behaviors_wide['Behavior_No_Data_Recorded_other']>0),'Behavior_No_Data_Recorded'] = 1.0\n",
"\n",
"# If it's missing the shift altogether, say no data is recorded\n",
"behaviors_wide.loc[behaviors_wide['Missing_Shift']==1,'Behavior_No_Data_Recorded'] = 1.0"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "56868208",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 14,
"id": "920a4d03",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Behavior_aggression',\n",
" 'Behavior_sib',\n",
" 'Behavior_elopement',\n",
" 'Behavior_disruptive',\n",
" 'Behavior_other',\n",
" 'Behavior_LOA',\n",
" 'Behavior_None',\n",
" 'Behavior_No_Data_Recorded',\n",
" 'Behavior_No_Data_Recorded_aggression',\n",
" 'Behavior_No_Data_Recorded_sib',\n",
" 'Behavior_No_Data_Recorded_elopement',\n",
" 'Behavior_No_Data_Recorded_disruptive',\n",
" 'Behavior_No_Data_Recorded_other']"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"behavior_cols = [col for col in behaviors_wide.columns if 'Behavior' in col]\n",
"behavior_cols = [col for col in behavior_cols if 'Episode' not in col]\n",
"behavior_cols"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "ca4b86fe",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 77793 entries, 1 2019-09-15 07:00:00 to 9 2020-08-31 23:00:00\n",
"Data columns (total 20 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 n_observations 77793 non-null int64 \n",
" 1 Missing_Shift 77793 non-null int64 \n",
" 2 Behavior_aggression 77793 non-null int32 \n",
" 3 Behavior_sib 77793 non-null int32 \n",
" 4 Behavior_elopement 77793 non-null int32 \n",
" 5 Behavior_disruptive 77793 non-null int32 \n",
" 6 Behavior_other 77793 non-null int32 \n",
" 7 Behavior_Episodes_aggression 77793 non-null float64\n",
" 8 Behavior_Episodes_sib 77793 non-null float64\n",
" 9 Behavior_Episodes_elopement 77793 non-null float64\n",
" 10 Behavior_Episodes_disruptive 77793 non-null float64\n",
" 11 Behavior_Episodes_other 77793 non-null float64\n",
" 12 Behavior_LOA 77793 non-null float64\n",
" 13 Behavior_None 77793 non-null float64\n",
" 14 Behavior_No_Data_Recorded 77793 non-null float64\n",
" 15 Behavior_No_Data_Recorded_aggression 77793 non-null float64\n",
" 16 Behavior_No_Data_Recorded_sib 77793 non-null float64\n",
" 17 Behavior_No_Data_Recorded_elopement 77793 non-null float64\n",
" 18 Behavior_No_Data_Recorded_disruptive 77793 non-null float64\n",
" 19 Behavior_No_Data_Recorded_other 77793 non-null float64\n",
"dtypes: float64(13), int32(5), int64(2)\n",
"memory usage: 13.0+ MB\n"
]
}
],
"source": [
"behaviors_wide.info()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "3869378a",
"metadata": {},
"outputs": [],
"source": [
"behaviors_wide.reset_index().to_csv('behaviors_wide_deID.csv')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "myenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}