{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"with open('./model_abtest.log','r') as f:\n",
" raw = f.read().split('\\n')[:-1]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data_list = []\n",
"for event in raw:\n",
" obs = json.loads(event)\n",
" obs['extra_data'] = obs.get('extra_data',None)\n",
" data_list.append(obs)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" time | \n",
" salt | \n",
" inputs | \n",
" params | \n",
" event | \n",
" checksum | \n",
" extra_data | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" ModelExperiment | \n",
" 1602739669 | \n",
" ModelExperiment | \n",
" {'userid': '431'} | \n",
" {'use_pytorch': 1, 'model_type': 'pytorch'} | \n",
" exposure | \n",
" 796b9a12 | \n",
" None | \n",
"
\n",
" \n",
" 1 | \n",
" ModelExperiment | \n",
" 1602739720 | \n",
" ModelExperiment | \n",
" {'userid': '431'} | \n",
" {'use_pytorch': 1, 'model_type': 'pytorch'} | \n",
" exposure | \n",
" 796b9a12 | \n",
" None | \n",
"
\n",
" \n",
" 2 | \n",
" ModelExperiment | \n",
" 1602739722 | \n",
" ModelExperiment | \n",
" {'userid': '431'} | \n",
" {'use_pytorch': 1, 'model_type': 'pytorch'} | \n",
" exposure | \n",
" 796b9a12 | \n",
" None | \n",
"
\n",
" \n",
" 3 | \n",
" ModelExperiment | \n",
" 1602739722 | \n",
" ModelExperiment | \n",
" {'userid': '431'} | \n",
" {'use_pytorch': 1, 'model_type': 'pytorch'} | \n",
" exposure | \n",
" 796b9a12 | \n",
" None | \n",
"
\n",
" \n",
" 4 | \n",
" ModelExperiment | \n",
" 1602739724 | \n",
" ModelExperiment | \n",
" {'userid': '637'} | \n",
" {'use_pytorch': 0, 'model_type': 'surprise'} | \n",
" exposure | \n",
" 796b9a12 | \n",
" None | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name time salt inputs \\\n",
"0 ModelExperiment 1602739669 ModelExperiment {'userid': '431'} \n",
"1 ModelExperiment 1602739720 ModelExperiment {'userid': '431'} \n",
"2 ModelExperiment 1602739722 ModelExperiment {'userid': '431'} \n",
"3 ModelExperiment 1602739722 ModelExperiment {'userid': '431'} \n",
"4 ModelExperiment 1602739724 ModelExperiment {'userid': '637'} \n",
"\n",
" params event checksum extra_data \n",
"0 {'use_pytorch': 1, 'model_type': 'pytorch'} exposure 796b9a12 None \n",
"1 {'use_pytorch': 1, 'model_type': 'pytorch'} exposure 796b9a12 None \n",
"2 {'use_pytorch': 1, 'model_type': 'pytorch'} exposure 796b9a12 None \n",
"3 {'use_pytorch': 1, 'model_type': 'pytorch'} exposure 796b9a12 None \n",
"4 {'use_pytorch': 0, 'model_type': 'surprise'} exposure 796b9a12 None "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame.from_dict(data_list) \n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"exp_df = df.loc[df.event=='rate',['params','extra_data']]\n",
"exp_df['variant'] = exp_df['params'].apply(lambda x: x['use_pytorch'])\n",
"exp_df['rating'] = exp_df['extra_data'].apply(lambda x: x['rate_val'])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" params | \n",
" extra_data | \n",
" variant | \n",
" rating | \n",
"
\n",
" \n",
" \n",
" \n",
" 32 | \n",
" {'use_pytorch': 0, 'model_type': 'surprise'} | \n",
" {'rate_val': 3} | \n",
" 0 | \n",
" 3 | \n",
"
\n",
" \n",
" 36 | \n",
" {'use_pytorch': 1, 'model_type': 'pytorch'} | \n",
" {'rate_val': 7} | \n",
" 1 | \n",
" 7 | \n",
"
\n",
" \n",
" 41 | \n",
" {'use_pytorch': 1, 'model_type': 'pytorch'} | \n",
" {'rate_val': 0} | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 44 | \n",
" {'use_pytorch': 0, 'model_type': 'surprise'} | \n",
" {'rate_val': 7} | \n",
" 0 | \n",
" 7 | \n",
"
\n",
" \n",
" 48 | \n",
" {'use_pytorch': 0, 'model_type': 'surprise'} | \n",
" {'rate_val': 3} | \n",
" 0 | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" params extra_data variant \\\n",
"32 {'use_pytorch': 0, 'model_type': 'surprise'} {'rate_val': 3} 0 \n",
"36 {'use_pytorch': 1, 'model_type': 'pytorch'} {'rate_val': 7} 1 \n",
"41 {'use_pytorch': 1, 'model_type': 'pytorch'} {'rate_val': 0} 1 \n",
"44 {'use_pytorch': 0, 'model_type': 'surprise'} {'rate_val': 7} 0 \n",
"48 {'use_pytorch': 0, 'model_type': 'surprise'} {'rate_val': 3} 0 \n",
"\n",
" rating \n",
"32 3 \n",
"36 7 \n",
"41 0 \n",
"44 7 \n",
"48 3 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exp_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"d1 = exp_df.loc[exp_df.variant==0,'rating'].values\n",
"d2 = exp_df.loc[exp_df.variant==1,'rating'].values"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.9650813700331365 0.37882815062317066\n"
]
}
],
"source": [
"import scipy.stats\n",
"statistic, pval = scipy.stats.ttest_ind(d1,d2)\n",
"print(statistic,pval)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"d1 = random.choices(range(1,6),k=50)\n",
"d2 = random.choices(range(3,10),k=50)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Ttest_indResult(statistic=-10.876400571212649, pvalue=1.5325813144977069e-18)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scipy.stats.ttest_ind(d1,d2)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}