aggregate_behaviors_by_shift.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2f70d940-27fa-46ab-bfb1-8fb8194b4c15",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import configparser\n",
    "from ast import literal_eval\n",
    "import ast"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3f266616",
   "metadata": {},
   "outputs": [],
   "source": [
    "behaviors = pd.read_csv('behaviors_covid_ids_deID.csv', index_col = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "2cc22f40-ec8e-4e98-8f1f-7579ee2f330d",
   "metadata": {},
   "outputs": [],
   "source": [
    "behaviors['Date'] = pd.to_datetime(behaviors['Date'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "cb6bfbd1-2250-4f8b-b0c8-9d1b3f22a317",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['23:00:00' '15:00:00' '07:00:00' '00:00:00']\n",
      "['23:00:00' '15:00:00' '07:00:00']\n",
      "            Person_ID       Date      Time         Target  \\\n",
      "Unnamed: 0                                                  \n",
      "838118              1 2019-09-15  23:00:00            sib   \n",
      "2140342             2 2019-09-15  15:00:00     disruptive   \n",
      "838121              1 2019-09-15  23:00:00     disruptive   \n",
      "838120              1 2019-09-15  23:00:00  mouthing pica   \n",
      "838119              1 2019-09-15  23:00:00     aggression   \n",
      "...               ...        ...       ...            ...   \n",
      "5097602            31 2020-09-08  15:00:00            NaN   \n",
      "3249998            39 2020-09-08  07:00:00     aggression   \n",
      "3249999            39 2020-09-08  07:00:00            sib   \n",
      "3250000            39 2020-09-08  07:00:00     disruptive   \n",
      "3250001            39 2020-09-08  07:00:00            sib   \n",
      "\n",
      "            Time_Sample_Percent Behavior_No_Data_Recorded  Episode_Count  \\\n",
      "Unnamed: 0                                                                 \n",
      "838118                      0.0                     False            0.0   \n",
      "2140342                     0.0                     False            0.0   \n",
      "838121                      0.0                     False            0.0   \n",
      "838120                      0.0                     False            0.0   \n",
      "838119                      0.0                     False            0.0   \n",
      "...                         ...                       ...            ...   \n",
      "5097602                     0.0                     False            0.0   \n",
      "3249998                     NaN                       NaN            1.0   \n",
      "3249999                     NaN                       NaN            1.0   \n",
      "3250000                     NaN                       NaN            0.0   \n",
      "3250001                     NaN                       NaN            0.0   \n",
      "\n",
      "           Behavior_LOA Behavior_None  Duration_01_Count  ...  \\\n",
      "Unnamed: 0                                                ...   \n",
      "838118            False          True                NaN  ...   \n",
      "2140342           False          True                NaN  ...   \n",
      "838121            False          True                NaN  ...   \n",
      "838120            False          True                NaN  ...   \n",
      "838119            False          True                NaN  ...   \n",
      "...                 ...           ...                ...  ...   \n",
      "5097602           False          True                NaN  ...   \n",
      "3249998             NaN           NaN                NaN  ...   \n",
      "3249999             NaN           NaN                NaN  ...   \n",
      "3250000             NaN           NaN                NaN  ...   \n",
      "3250001             NaN           NaN                NaN  ...   \n",
      "\n",
      "            Duration_04_Count  Duration_05_Count  Duration_06_Count  \\\n",
      "Unnamed: 0                                                            \n",
      "838118                    NaN                NaN                NaN   \n",
      "2140342                   NaN                NaN                NaN   \n",
      "838121                    NaN                NaN                NaN   \n",
      "838120                    NaN                NaN                NaN   \n",
      "838119                    NaN                NaN                NaN   \n",
      "...                       ...                ...                ...   \n",
      "5097602                   NaN                NaN                NaN   \n",
      "3249998                   NaN                NaN                NaN   \n",
      "3249999                   NaN                NaN                NaN   \n",
      "3250000                   NaN                NaN                NaN   \n",
      "3250001                   NaN                NaN                NaN   \n",
      "\n",
      "            Intensity_01_Count  Intensity_02_Count  Intensity_03_Count  \\\n",
      "Unnamed: 0                                                               \n",
      "838118                     NaN                 NaN                 NaN   \n",
      "2140342                    NaN                 NaN                 NaN   \n",
      "838121                     NaN                 NaN                 NaN   \n",
      "838120                     NaN                 NaN                 NaN   \n",
      "838119                     NaN                 NaN                 NaN   \n",
      "...                        ...                 ...                 ...   \n",
      "5097602                    NaN                 NaN                 NaN   \n",
      "3249998                    NaN                 NaN                 1.0   \n",
      "3249999                    NaN                 1.0                 NaN   \n",
      "3250000                    NaN                 NaN                 NaN   \n",
      "3250001                    NaN                 NaN                 NaN   \n",
      "\n",
      "            Intensity_04_Count  Intensity_05_Count   FirstDate         Concat  \n",
      "Unnamed: 0                                                                     \n",
      "838118                     NaN                 NaN  2017-09-01   1 2019-09-15  \n",
      "2140342                    NaN                 NaN  2017-08-03   2 2019-09-15  \n",
      "838121                     NaN                 NaN  2017-09-01   1 2019-09-15  \n",
      "838120                     NaN                 NaN  2017-09-01   1 2019-09-15  \n",
      "838119                     NaN                 NaN  2017-09-01   1 2019-09-15  \n",
      "...                        ...                 ...         ...            ...  \n",
      "5097602                    NaN                 NaN  2019-03-01  31 2020-09-08  \n",
      "3249998                    NaN                 NaN  2015-12-24  39 2020-09-08  \n",
      "3249999                    NaN                 NaN  2015-12-24  39 2020-09-08  \n",
      "3250000                    NaN                 NaN  2015-12-24  39 2020-09-08  \n",
      "3250001                    NaN                 NaN  2015-12-24  39 2020-09-08  \n",
      "\n",
      "[244585 rows x 22 columns]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Concat</th>\n",
       "      <th>Concat_Date</th>\n",
       "      <th>Person_ID</th>\n",
       "      <th>Date</th>\n",
       "      <th>Time</th>\n",
       "      <th>Target</th>\n",
       "      <th>Time_Sample_Percent</th>\n",
       "      <th>Behavior_No_Data_Recorded</th>\n",
       "      <th>Episode_Count</th>\n",
       "      <th>Behavior_LOA</th>\n",
       "      <th>...</th>\n",
       "      <th>Duration_03_Count</th>\n",
       "      <th>Duration_04_Count</th>\n",
       "      <th>Duration_05_Count</th>\n",
       "      <th>Duration_06_Count</th>\n",
       "      <th>Intensity_01_Count</th>\n",
       "      <th>Intensity_02_Count</th>\n",
       "      <th>Intensity_03_Count</th>\n",
       "      <th>Intensity_04_Count</th>\n",
       "      <th>Intensity_05_Count</th>\n",
       "      <th>FirstDate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1 2019-09-15 07:00:00</td>\n",
       "      <td>1 2019-09-15</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2019-09-15</td>\n",
       "      <td>07:00:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2017-09-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1 2019-09-15 15:00:00</td>\n",
       "      <td>1 2019-09-15</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2019-09-15</td>\n",
       "      <td>15:00:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2017-09-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1 2019-09-15 23:00:00</td>\n",
       "      <td>1 2019-09-15</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2019-09-15</td>\n",
       "      <td>23:00:00</td>\n",
       "      <td>sib</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2017-09-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1 2019-09-15 23:00:00</td>\n",
       "      <td>1 2019-09-15</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2019-09-15</td>\n",
       "      <td>23:00:00</td>\n",
       "      <td>disruptive</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2017-09-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1 2019-09-15 23:00:00</td>\n",
       "      <td>1 2019-09-15</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2019-09-15</td>\n",
       "      <td>23:00:00</td>\n",
       "      <td>mouthing pica</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2017-09-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>253565</th>\n",
       "      <td>78 2020-05-12 15:00:00</td>\n",
       "      <td>78 2020-05-12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>253566</th>\n",
       "      <td>78 2020-05-12 23:00:00</td>\n",
       "      <td>78 2020-05-12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>253567</th>\n",
       "      <td>78 2020-05-13 07:00:00</td>\n",
       "      <td>78 2020-05-13</td>\n",
       "      <td>78.0</td>\n",
       "      <td>2020-05-13</td>\n",
       "      <td>07:00:00</td>\n",
       "      <td>aggression</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2015-01-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>253568</th>\n",
       "      <td>78 2020-05-13 15:00:00</td>\n",
       "      <td>78 2020-05-13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>253569</th>\n",
       "      <td>78 2020-05-13 23:00:00</td>\n",
       "      <td>78 2020-05-13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>253570 rows × 23 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                        Concat    Concat_Date  Person_ID       Date      Time  \\\n",
       "0        1 2019-09-15 07:00:00   1 2019-09-15        1.0 2019-09-15  07:00:00   \n",
       "1        1 2019-09-15 15:00:00   1 2019-09-15        1.0 2019-09-15  15:00:00   \n",
       "2        1 2019-09-15 23:00:00   1 2019-09-15        1.0 2019-09-15  23:00:00   \n",
       "3        1 2019-09-15 23:00:00   1 2019-09-15        1.0 2019-09-15  23:00:00   \n",
       "4        1 2019-09-15 23:00:00   1 2019-09-15        1.0 2019-09-15  23:00:00   \n",
       "...                        ...            ...        ...        ...       ...   \n",
       "253565  78 2020-05-12 15:00:00  78 2020-05-12        NaN        NaT       NaN   \n",
       "253566  78 2020-05-12 23:00:00  78 2020-05-12        NaN        NaT       NaN   \n",
       "253567  78 2020-05-13 07:00:00  78 2020-05-13       78.0 2020-05-13  07:00:00   \n",
       "253568  78 2020-05-13 15:00:00  78 2020-05-13        NaN        NaT       NaN   \n",
       "253569  78 2020-05-13 23:00:00  78 2020-05-13        NaN        NaT       NaN   \n",
       "\n",
       "               Target  Time_Sample_Percent Behavior_No_Data_Recorded  \\\n",
       "0                 NaN                  NaN                     False   \n",
       "1                 NaN                  NaN                     False   \n",
       "2                 sib                  0.0                     False   \n",
       "3          disruptive                  0.0                     False   \n",
       "4       mouthing pica                  0.0                     False   \n",
       "...               ...                  ...                       ...   \n",
       "253565            NaN                  NaN                       NaN   \n",
       "253566            NaN                  NaN                       NaN   \n",
       "253567     aggression                  NaN                       NaN   \n",
       "253568            NaN                  NaN                       NaN   \n",
       "253569            NaN                  NaN                       NaN   \n",
       "\n",
       "        Episode_Count Behavior_LOA  ... Duration_03_Count  Duration_04_Count  \\\n",
       "0                 NaN         True  ...               NaN                NaN   \n",
       "1                 NaN         True  ...               NaN                NaN   \n",
       "2                 0.0        False  ...               NaN                NaN   \n",
       "3                 0.0        False  ...               NaN                NaN   \n",
       "4                 0.0        False  ...               NaN                NaN   \n",
       "...               ...          ...  ...               ...                ...   \n",
       "253565            NaN          NaN  ...               NaN                NaN   \n",
       "253566            NaN          NaN  ...               NaN                NaN   \n",
       "253567            1.0          NaN  ...               NaN                NaN   \n",
       "253568            NaN          NaN  ...               NaN                NaN   \n",
       "253569            NaN          NaN  ...               NaN                NaN   \n",
       "\n",
       "        Duration_05_Count  Duration_06_Count  Intensity_01_Count  \\\n",
       "0                     NaN                NaN                 NaN   \n",
       "1                     NaN                NaN                 NaN   \n",
       "2                     NaN                NaN                 NaN   \n",
       "3                     NaN                NaN                 NaN   \n",
       "4                     NaN                NaN                 NaN   \n",
       "...                   ...                ...                 ...   \n",
       "253565                NaN                NaN                 NaN   \n",
       "253566                NaN                NaN                 NaN   \n",
       "253567                NaN                NaN                 NaN   \n",
       "253568                NaN                NaN                 NaN   \n",
       "253569                NaN                NaN                 NaN   \n",
       "\n",
       "        Intensity_02_Count  Intensity_03_Count  Intensity_04_Count  \\\n",
       "0                      NaN                 NaN                 NaN   \n",
       "1                      NaN                 NaN                 NaN   \n",
       "2                      NaN                 NaN                 NaN   \n",
       "3                      NaN                 NaN                 NaN   \n",
       "4                      NaN                 NaN                 NaN   \n",
       "...                    ...                 ...                 ...   \n",
       "253565                 NaN                 NaN                 NaN   \n",
       "253566                 NaN                 NaN                 NaN   \n",
       "253567                 NaN                 NaN                 NaN   \n",
       "253568                 NaN                 NaN                 NaN   \n",
       "253569                 NaN                 NaN                 NaN   \n",
       "\n",
       "        Intensity_05_Count   FirstDate  \n",
       "0                      NaN  2017-09-01  \n",
       "1                      NaN  2017-09-01  \n",
       "2                      NaN  2017-09-01  \n",
       "3                      NaN  2017-09-01  \n",
       "4                      NaN  2017-09-01  \n",
       "...                    ...         ...  \n",
       "253565                 NaN         NaN  \n",
       "253566                 NaN         NaN  \n",
       "253567                 NaN  2015-01-01  \n",
       "253568                 NaN         NaN  \n",
       "253569                 NaN         NaN  \n",
       "\n",
       "[253570 rows x 23 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Filter out midnight cases\n",
    "# we have some that are the correct time, can reassign these manually later.\n",
    "print(behaviors['Time'].unique())\n",
    "behaviors = behaviors[behaviors['Time'] != '00:00:00']\n",
    "print(behaviors['Time'].unique())\n",
    "\n",
    "# We want to get dates of behavior and 3 shifts per day at least.\n",
    "# Basically we want to create a blank date and time thing, and then concatenate those\n",
    "# then we want to concatenate the one on the behavior file, and merge both on that by person/date/time\n",
    "# Will have to be a left join\n",
    "behaviors['Date'] = pd.to_datetime(behaviors['Date'],format = \"%m/%d/%Y %I:%M:%S %p\")\n",
    "#behaviors['Time'] = pd.to_datetime(behaviors['Time'])\n",
    "\n",
    "# Create concat column\n",
    "behaviors['Concat'] = behaviors['Person_ID'].astype(str) + \" \" + behaviors['Date'].astype(str)\n",
    "\n",
    "print(behaviors)\n",
    "behaviors_grouped = behaviors[['Person_ID','Date']].sort_values('Date').groupby('Person_ID').agg(['first','last'])\n",
    "\n",
    "behaviors_grouped.columns = behaviors_grouped.columns.get_level_values(1)\n",
    "behaviors_grouped = behaviors_grouped.reset_index().copy()\n",
    "\n",
    "\n",
    "df = behaviors_grouped.copy()\n",
    "df['Dates'] = [pd.date_range(x, y) for x , y in zip(df['first'],df['last'])]\n",
    "df = df.explode('Dates')\n",
    "\n",
    "\n",
    "df = df.drop(['first','last'],axis=1)\n",
    "\n",
    "\n",
    "df['Times'] = \"['07:00:00','15:00:00', '23:00:00']\"\n",
    "df['Times'] = df['Times'].apply(ast.literal_eval)\n",
    "df = df.explode('Times').copy()\n",
    "\n",
    "behaviors['Concat'] = behaviors['Person_ID'].astype(str) + \" \" + behaviors['Date'].astype(str) + \" \" + behaviors['Time'].astype(str)\n",
    "\n",
    "\n",
    "df['Concat'] = df['Person_ID'].astype(str) + \" \" + df['Dates'].astype(str) + \" \" + df['Times'].astype(str)\n",
    "df['Concat_Date'] = df['Person_ID'].astype(str) + \" \" + df['Dates'].astype(str)\n",
    "df = df[['Concat','Concat_Date']].copy()\n",
    "\n",
    "\n",
    "behaviors = df.merge(behaviors, on = 'Concat', how = 'left')\n",
    "behaviors.columns\n",
    "behaviors\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "2a7ea15e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Indicate places where the shift does not exist - was never recorded\n",
    "behaviors['Missing_Shift'] = 0\n",
    "behaviors.loc[behaviors['Person_ID'].isna(),'Missing_Shift'] = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "63ec36a2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Behavior_No_Data_Recorded    float64\n",
       "Behavior_LOA                 float64\n",
       "Behavior_None                float64\n",
       "dtype: object"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Convert all behavior columns to float type\n",
    "# and confirm type\n",
    "cols = [col for col in behaviors.columns if 'Behavior' in col]\n",
    "behaviors[cols] = behaviors[cols].astype('float64') \n",
    "behaviors.loc[:,behaviors.columns.str.contains('Behavior')].dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7cf5965d-5f7a-4b2b-baf9-c347d76f6771",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sum columns of duration and intensity counts across\n",
    "behaviors['Duration_Count'] = behaviors.loc[:, behaviors.columns.str.match('^Duration.*Count$')].sum(axis=1,min_count=1)\n",
    "behaviors['Intensity_Count'] = behaviors.loc[:, behaviors.columns.str.match('^Intensity.*Count$')].sum(axis=1,min_count=1)\n",
    "\n",
    "# Drop extraneous duration and intensity columns since they've been summed\n",
    "behaviors = behaviors.drop(behaviors.columns[behaviors.columns.str.match('^Duration_.*_Count$').tolist()],axis=1)\n",
    "behaviors = behaviors.drop(behaviors.columns[behaviors.columns.str.match('^Intensity_.*_Count$').tolist()],axis=1)\n",
    "\n",
    "# Time sampling is a different way of recording behavior than counting episodes, \n",
    "# so we exclude those that are recorded this way\n",
    "behaviors.loc[behaviors['Time_Sample_Percent'] > 0,'Behavior_No_Data_Recorded'] = 1.0\n",
    "\n",
    "# If there are any non-integer episodes, they probably are some sort of time sample too\n",
    "behaviors.loc[pd.notna(behaviors['Episode_Count']) & (behaviors['Episode_Count'] % 1 != 0),'Behavior_No_Data_Recorded'] = 1.0\n",
    "\n",
    "# Where are duration count OR intensity count not null?\n",
    "# If they're both not null and unequal, set equal to the max of them both\n",
    "test = behaviors.loc[behaviors['Duration_Count'].notna() & behaviors['Intensity_Count'].notna() & \n",
    "          (behaviors['Duration_Count'] != behaviors['Intensity_Count']),['Duration_Count','Intensity_Count']].max(axis=1)\n",
    "behaviors.loc[behaviors['Duration_Count'].notna() & behaviors['Intensity_Count'].notna() & \n",
    "          (behaviors['Duration_Count'] != behaviors['Intensity_Count']),['Duration_Count','Intensity_Count']] = test\n",
    "\n",
    "# Aggregate duration and intensity counts into one count variable\n",
    "behaviors['Count'] = behaviors['Duration_Count'].fillna(behaviors['Intensity_Count'])\n",
    "\n",
    "# Behaviors where episode count is 0 or null should be overridden by the duration/intensity counts\n",
    "behaviors.loc[((behaviors['Episode_Count'].isna() | (behaviors['Episode_Count']==0)) \n",
    "           & behaviors['Count'] > 0),'Episode_Count'] = behaviors.loc[((behaviors['Episode_Count'].isna() | \n",
    "                                                                    (behaviors['Episode_Count']==0)) & \n",
    "                                                                   behaviors['Count'] > 0),'Count']\n",
    "# Drop unnecessary count columns\n",
    "behaviors = behaviors.drop(['Duration_Count','Intensity_Count','Count'], axis = 1)\n",
    "\n",
    "\n",
    "# Filter for where all the behavior stuff is null\n",
    "# But Episode Count is 0 = there is no behavior though it was recorded\n",
    "behaviors.loc[((behaviors['Behavior_None'].isna()) &\n",
    "           ( behaviors['Behavior_LOA'].isna())\n",
    "           & (behaviors['Behavior_No_Data_Recorded'].isna())\n",
    "          & (behaviors['Episode_Count']==0)), 'Behavior_None'] = 1.0\n",
    "\n",
    "# We can't confirm for now if there was an LOA or no data recorded, so we leave that for now.\n",
    "\n",
    "# Depending on if behavior is aggression, sib or both, make a behavior column?\n",
    "config = configparser.ConfigParser()\n",
    "config.read('targets_of_interest.ini')\n",
    "covid = True\n",
    "if covid:\n",
    "    targets = literal_eval(config['section1']['targets_of_interest'])\n",
    "else:\n",
    "    targets = ['aggression','sib']#\n",
    "\n",
    "final_targets = {}\n",
    "for target in targets:\n",
    "    final_targets[target] = target\n",
    "\n",
    "# Convert behaviors of aggression and SIB to their generic target dictionary types\n",
    "for key in final_targets:\n",
    "    current_list = [final_targets[key]]\n",
    "    \n",
    "    behaviors[key] = ''\n",
    "    behaviors.loc[behaviors['Target'].isin(current_list),key] = key\n",
    "\n",
    "behaviors['other'] = ''\n",
    "behaviors.loc[((~behaviors['Target'].isna()) &(~behaviors['Target'].isin(targets))), 'other'] = 'other'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "be5bca62",
   "metadata": {},
   "outputs": [],
   "source": [
    "len(targets)\n",
    "targets = targets + ['other']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "61938b93",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "cols = []\n",
    "if len(targets) > 1:\n",
    "    behaviors['Behavior_Episodes'] = np.where(behaviors[targets].ne('').any(axis=1) & (behaviors['Episode_Count'] > 0), behaviors['Episode_Count'], 0)\n",
    "            # multiple targets\n",
    "    \n",
    "    for target in targets:\n",
    "        col = 'Behavior_Episodes_' + target\n",
    "        behaviors[col] = np.where((behaviors[target] != '') & (behaviors['Episode_Count'] > 0), behaviors['Episode_Count'], 0)\n",
    "        behaviors['Behavior_No_Data_Recorded_'+target] = np.where((behaviors[target] != '') & (behaviors['Episode_Count'].isna()), 1.0, behaviors['Behavior_No_Data_Recorded']) \n",
    "        cols.append(col)\n",
    "\n",
    "\n",
    "        for target in targets:\n",
    "            col = 'Behavior_' + target\n",
    "            behaviors[col] = np.where((behaviors[target] != '') & (behaviors['Episode_Count'] > 0), behaviors['Episode_Count'] > 0, 0)\n",
    "            cols.append(col)\n",
    "            col = 'Behavior_No_Data_Recorded_' + target\n",
    "            behaviors[col] = np.where((behaviors[target] != '') & (behaviors['Behavior_No_Data_Recorded'] > 0), behaviors['Behavior_No_Data_Recorded'], 0)\n",
    "\n",
    "    behaviors['Behavior'] = np.where(behaviors[targets].ne('').any(axis=1) & (behaviors['Episode_Count'] > 0), behaviors['Episode_Count'] > 0, 0)\n",
    "   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "7009b00d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Note that each is one observation before grouping by shift\n",
    "behaviors['n_observations'] = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "0e9de5ff",
   "metadata": {},
   "outputs": [],
   "source": [
    "keys = ['n_observations','Missing_Shift'] + ['Behavior_' + target for target in targets] + ['Behavior_Episodes_' + target for target in targets] + ['Behavior_LOA',\n",
    "                                                                                                               'Behavior_None','Behavior_No_Data_Recorded'] + ['Behavior_No_Data_Recorded_'+ target for target in targets]\n",
    "values = [ 'sum']\n",
    "\n",
    "behaviors_wide = behaviors.copy()\n",
    "\n",
    "kvs = {k:values for k in keys }\n",
    "\n",
    "behaviors_wide = behaviors_wide.groupby('Concat').agg(kvs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "24e2b14d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['n_observations', 'Missing_Shift', 'Behavior_aggression',\n",
       "       'Behavior_sib', 'Behavior_elopement', 'Behavior_disruptive',\n",
       "       'Behavior_other', 'Behavior_Episodes_aggression',\n",
       "       'Behavior_Episodes_sib', 'Behavior_Episodes_elopement',\n",
       "       'Behavior_Episodes_disruptive', 'Behavior_Episodes_other',\n",
       "       'Behavior_LOA', 'Behavior_None', 'Behavior_No_Data_Recorded',\n",
       "       'Behavior_No_Data_Recorded_aggression', 'Behavior_No_Data_Recorded_sib',\n",
       "       'Behavior_No_Data_Recorded_elopement',\n",
       "       'Behavior_No_Data_Recorded_disruptive',\n",
       "       'Behavior_No_Data_Recorded_other'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "behaviors_wide.columns = behaviors_wide.columns.get_level_values(0)\n",
    "behaviors_wide.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "f5233197",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Where there's only one thing recorded and it's something that we don't care about, we say it's not recorded\n",
    "behaviors_wide.loc[(behaviors_wide['n_observations']==1) & (behaviors_wide['Behavior_No_Data_Recorded_other']>0),'Behavior_No_Data_Recorded'] = 1.0\n",
    "\n",
    "# If it's missing the shift altogether, say no data is recorded\n",
    "behaviors_wide.loc[behaviors_wide['Missing_Shift']==1,'Behavior_No_Data_Recorded'] = 1.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "56868208",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "920a4d03",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Behavior_aggression',\n",
       " 'Behavior_sib',\n",
       " 'Behavior_elopement',\n",
       " 'Behavior_disruptive',\n",
       " 'Behavior_other',\n",
       " 'Behavior_LOA',\n",
       " 'Behavior_None',\n",
       " 'Behavior_No_Data_Recorded',\n",
       " 'Behavior_No_Data_Recorded_aggression',\n",
       " 'Behavior_No_Data_Recorded_sib',\n",
       " 'Behavior_No_Data_Recorded_elopement',\n",
       " 'Behavior_No_Data_Recorded_disruptive',\n",
       " 'Behavior_No_Data_Recorded_other']"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "behavior_cols = [col for col in behaviors_wide.columns if 'Behavior' in col]\n",
    "behavior_cols = [col for col in behavior_cols if 'Episode' not in col]\n",
    "behavior_cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "ca4b86fe",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Index: 77793 entries, 1 2019-09-15 07:00:00 to 9 2020-08-31 23:00:00\n",
      "Data columns (total 20 columns):\n",
      " #   Column                                Non-Null Count  Dtype  \n",
      "---  ------                                --------------  -----  \n",
      " 0   n_observations                        77793 non-null  int64  \n",
      " 1   Missing_Shift                         77793 non-null  int64  \n",
      " 2   Behavior_aggression                   77793 non-null  int32  \n",
      " 3   Behavior_sib                          77793 non-null  int32  \n",
      " 4   Behavior_elopement                    77793 non-null  int32  \n",
      " 5   Behavior_disruptive                   77793 non-null  int32  \n",
      " 6   Behavior_other                        77793 non-null  int32  \n",
      " 7   Behavior_Episodes_aggression          77793 non-null  float64\n",
      " 8   Behavior_Episodes_sib                 77793 non-null  float64\n",
      " 9   Behavior_Episodes_elopement           77793 non-null  float64\n",
      " 10  Behavior_Episodes_disruptive          77793 non-null  float64\n",
      " 11  Behavior_Episodes_other               77793 non-null  float64\n",
      " 12  Behavior_LOA                          77793 non-null  float64\n",
      " 13  Behavior_None                         77793 non-null  float64\n",
      " 14  Behavior_No_Data_Recorded             77793 non-null  float64\n",
      " 15  Behavior_No_Data_Recorded_aggression  77793 non-null  float64\n",
      " 16  Behavior_No_Data_Recorded_sib         77793 non-null  float64\n",
      " 17  Behavior_No_Data_Recorded_elopement   77793 non-null  float64\n",
      " 18  Behavior_No_Data_Recorded_disruptive  77793 non-null  float64\n",
      " 19  Behavior_No_Data_Recorded_other       77793 non-null  float64\n",
      "dtypes: float64(13), int32(5), int64(2)\n",
      "memory usage: 13.0+ MB\n"
     ]
    }
   ],
   "source": [
    "behaviors_wide.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "3869378a",
   "metadata": {},
   "outputs": [],
   "source": [
    "behaviors_wide.reset_index().to_csv('behaviors_wide_deID.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "myenv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}