import boto3
import pandas as pd
from io import StringIO
def load_all_csvs_from_s3(bucket_name):
= boto3.resource('s3')
s3 = s3.Bucket(bucket_name)
bucket
= []
dfs
for obj in bucket.objects.all():
if obj.key.endswith('.csv') and obj.key.startswith('data'):
= obj.get()['Body'].read().decode('utf-8')
data = pd.read_csv(StringIO(data))
df
dfs.append(df)
= pd.concat(dfs, ignore_index=True)
final_df return final_df
= load_all_csvs_from_s3('dipy-parallel-tests')
df
='cpu_count', inplace=True) df.sort_values(by
In [2]:
In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
= df['data_shape'].unique()
shapes
for shape in shapes:
= df[df['data_shape'] == shape]
shape_df = shape_df['model'].unique()
models
for model in models:
= shape_df[shape_df['model'] == model]
model_df
= model_df['cpu_count'].unique()
cpu_counts
= plt.subplots(figsize=(10, 10))
fig, ax
for cpu_count in cpu_counts:
= model_df[model_df['cpu_count'] == cpu_count]
cpu_df
# Calculate the average time of the serial runs
= cpu_df[cpu_df['engine'] == 'serial']['time'].mean()
serial_avg_time
# Group by 'num_chunks' and calculate the mean and std of 'time'
= cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
grouped_df
# Express the time of the ray runs as a percentage of the average serial time
'mean'] = grouped_df['mean'].apply(lambda x: (x / serial_avg_time) * 100 if serial_avg_time else 0)
grouped_df[
'num_chunks'], grouped_df['mean'], yerr=grouped_df['std'], label=f'{cpu_count} CPUs', fmt='-o')
ax.errorbar(grouped_df[
'log')
ax.set_xscale(=0)
ax.set_ylim(bottom
f'Model Type {model}, Shape {shape} (Average Time as Percentage of Serial Time with Error Bars)')
ax.set_title('num_chunks (log scale)')
ax.set_xlabel('Average Time as Percentage of Serial Time')
ax.set_ylabel(# Add a legend
ax.legend()
plt.show()
for shape in shapes:
= df[df['data_shape'] == shape]
shape_df = shape_df['model'].unique()
models
for model in models:
= shape_df[shape_df['model'] == model]
model_df = model_df['cpu_count'].unique()
cpu_counts
= plt.subplots(figsize=(10, 10))
fig, ax
for cpu_count in cpu_counts:
= model_df[model_df['cpu_count'] == cpu_count]
cpu_df
# Calculate the average time of the serial runs
= cpu_df[cpu_df['engine'] == 'serial']['time'].mean()
serial_avg_time
# Group by 'num_chunks' and calculate the mean and std of 'time'
= cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
grouped_df
# Calculate the speedup as the ratio of the serial time to the ray time
'mean'] = grouped_df['mean'].apply(lambda x: serial_avg_time / x if x else 0)
grouped_df[
'num_chunks'], grouped_df['mean'], yerr=grouped_df['std'], label=f'{cpu_count} CPUs', fmt='-o')
ax.errorbar(grouped_df[
'log')
ax.set_xscale(=0)
ax.set_ylim(bottom
f'Model Type {model}, Shape {shape} (Speedup over Serial Time with Error Bars)')
ax.set_title('num_chunks (log scale)')
ax.set_xlabel('Speedup over Serial Time')
ax.set_ylabel(# Add a legend
ax.legend() plt.show()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
= df['data_shape'].unique()
shapes
for shape in shapes:
= df[df['data_shape'] == shape]
shape_df = shape_df['model'].unique()
models
for model in models:
= shape_df[shape_df['model'] == model]
model_df = model_df['cpu_count'].unique()
cpu_counts
= plt.subplots(figsize=(10, 10))
fig, ax
for cpu_count in cpu_counts:
= model_df[model_df['cpu_count'] == cpu_count]
cpu_df
= cpu_df[cpu_df['engine'] == 'serial']['time'].mean()
serial_avg_time
'efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
cpu_df[
= cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
grouped_df
'num_chunks'], grouped_df['mean'], yerr=grouped_df['std'], label=f'{cpu_count} CPUs', fmt='-o')
ax.errorbar(grouped_df[
'log')
ax.set_xscale(=0)
ax.set_ylim(bottom
f'Model Type {model}, Shape {shape} (Efficency per Core with Error Bars)')
ax.set_title('num_chunks (log scale)')
ax.set_xlabel('Efficency per Core')
ax.set_ylabel(# Add a legend
ax.legend() plt.show()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
In [5]:
= plt.subplots(figsize=(10, 10))
fig, ax
for cpu_count in cpu_counts:
= model_df[model_df['cpu_count'] == cpu_count]
cpu_df
# Calculate the average time of the serial runs for each number of chunks
= cpu_df[cpu_df['engine'] == 'serial'].groupby('num_chunks')['time'].mean()
serial_avg_time
# Calculate the average time for the current cpu_count for each number of chunks
= cpu_df.groupby('num_chunks')['time'].mean()
avg_time
# Calculate efficiency per core for each number of chunks
= (serial_avg_time / avg_time) / cpu_count
efficiency
# Plot efficiency per core
=f'{cpu_count} CPUs', fmt='-o')
ax.errorbar(efficiency.index, efficiency.values, label
'Number of chunks')
ax.set_xlabel('Efficiency per core')
ax.set_ylabel(
ax.legend() plt.show()
In [6]:
# Calculate the efficiency per core as the ratio of the serial time to the ray time divided by the number of CPUs
'mean'] = grouped_df.apply(lambda row: (serial_avg_time / row['mean']) / row['cpu_count'] if row['mean'] else 0, axis=1) grouped_df[
KeyError: 'cpu_count'
In [9]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
= df['data_shape'].unique()
shapes
for shape in shapes:
= df[df['data_shape'] == shape]
shape_df = shape_df['model'].unique()
models
for model in models:
= shape_df[shape_df['model'] == model]
model_df = model_df['cpu_count'].unique()
cpu_counts
= plt.subplots(figsize=(10, 10))
fig, ax
for cpu_count in cpu_counts:
= model_df[model_df['cpu_count'] == cpu_count]
cpu_df
# Group by num_chunks and calculate the mean time
= cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
grouped_df
# Find the num_chunks with the minimum mean time
= grouped_df.loc[grouped_df['mean'].idxmin(), 'num_chunks']
fastest_num_chunks
# Filter the dataframe to include only the rows with the fastest num_chunks
= cpu_df[cpu_df['num_chunks'] == fastest_num_chunks]
fastest_df
# Calculate the mean time for the fastest num_chunks
= fastest_df['time'].mean()
mean_time
'o')
ax.plot(cpu_count, mean_time,
f'Model Type {model}, Shape {shape} (Average Time for Fastest num_chunks)')
ax.set_title('cpu_count')
ax.set_xlabel('Average Time')
ax.set_ylabel( plt.show()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()