import boto3
import pandas as pd
from io import StringIO
def load_all_csvs_from_s3(bucket_name):
= boto3.resource('s3')
s3 = s3.Bucket(bucket_name)
= []
for obj in bucket.objects.all():
if obj.key.endswith('.csv') and obj.key.startswith('data'):
= obj.get()['Body'].read().decode('utf-8')
data = pd.read_csv(StringIO(data))
= pd.concat(dfs, ignore_index=True)
final_df return final_df
= load_all_csvs_from_s3('dipy-parallel-tests')
='cpu_count', inplace=True) df.sort_values(by
In [2]:
In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
= df['data_shape'].unique()
for shape in shapes:
= df[df['data_shape'] == shape]
shape_df = shape_df['model'].unique()
for model in models:
= shape_df[shape_df['model'] == model]
= model_df['cpu_count'].unique()
= plt.subplots(figsize=(10, 10))
fig, ax
for cpu_count in cpu_counts:
= model_df[model_df['cpu_count'] == cpu_count]
# Calculate the average time of the serial runs
= cpu_df[cpu_df['engine'] == 'serial']['time'].mean()
# Group by 'num_chunks' and calculate the mean and std of 'time'
= cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
# Express the time of the ray runs as a percentage of the average serial time
'mean'] = grouped_df['mean'].apply(lambda x: (x / serial_avg_time) * 100 if serial_avg_time else 0)
'num_chunks'], grouped_df['mean'], yerr=grouped_df['std'], label=f'{cpu_count} CPUs', fmt='-o')
f'Model Type {model}, Shape {shape} (Average Time as Percentage of Serial Time with Error Bars)')
ax.set_title('num_chunks (log scale)')
ax.set_xlabel('Average Time as Percentage of Serial Time')
ax.set_ylabel(# Add a legend
for shape in shapes:
= df[df['data_shape'] == shape]
shape_df = shape_df['model'].unique()
for model in models:
= shape_df[shape_df['model'] == model]
model_df = model_df['cpu_count'].unique()
= plt.subplots(figsize=(10, 10))
fig, ax
for cpu_count in cpu_counts:
= model_df[model_df['cpu_count'] == cpu_count]
# Calculate the average time of the serial runs
= cpu_df[cpu_df['engine'] == 'serial']['time'].mean()
# Group by 'num_chunks' and calculate the mean and std of 'time'
= cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
# Calculate the speedup as the ratio of the serial time to the ray time
'mean'] = grouped_df['mean'].apply(lambda x: serial_avg_time / x if x else 0)
'num_chunks'], grouped_df['mean'], yerr=grouped_df['std'], label=f'{cpu_count} CPUs', fmt='-o')
f'Model Type {model}, Shape {shape} (Speedup over Serial Time with Error Bars)')
ax.set_title('num_chunks (log scale)')
ax.set_xlabel('Speedup over Serial Time')
ax.set_ylabel(# Add a legend
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
= df['data_shape'].unique()
for shape in shapes:
= df[df['data_shape'] == shape]
shape_df = shape_df['model'].unique()
for model in models:
= shape_df[shape_df['model'] == model]
model_df = model_df['cpu_count'].unique()
= plt.subplots(figsize=(10, 10))
fig, ax
for cpu_count in cpu_counts:
= model_df[model_df['cpu_count'] == cpu_count]
= cpu_df[cpu_df['engine'] == 'serial']['time'].mean()
'efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
= cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
'num_chunks'], grouped_df['mean'], yerr=grouped_df['std'], label=f'{cpu_count} CPUs', fmt='-o')
f'Model Type {model}, Shape {shape} (Efficency per Core with Error Bars)')
ax.set_title('num_chunks (log scale)')
ax.set_xlabel('Efficency per Core')
ax.set_ylabel(# Add a legend
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation:
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation:
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation:
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation:
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation:
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation:
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation:
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation:
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation:
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation:
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/ FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
In [5]:
= plt.subplots(figsize=(10, 10))
fig, ax
for cpu_count in cpu_counts:
= model_df[model_df['cpu_count'] == cpu_count]
# Calculate the average time of the serial runs for each number of chunks
= cpu_df[cpu_df['engine'] == 'serial'].groupby('num_chunks')['time'].mean()
# Calculate the average time for the current cpu_count for each number of chunks
= cpu_df.groupby('num_chunks')['time'].mean()
# Calculate efficiency per core for each number of chunks
= (serial_avg_time / avg_time) / cpu_count
# Plot efficiency per core
=f'{cpu_count} CPUs', fmt='-o')
ax.errorbar(efficiency.index, efficiency.values, label
'Number of chunks')
ax.set_xlabel('Efficiency per core')
In [6]:
# Calculate the efficiency per core as the ratio of the serial time to the ray time divided by the number of CPUs
'mean'] = grouped_df.apply(lambda row: (serial_avg_time / row['mean']) / row['cpu_count'] if row['mean'] else 0, axis=1) grouped_df[
KeyError: 'cpu_count'
In [9]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
= df['data_shape'].unique()
for shape in shapes:
= df[df['data_shape'] == shape]
shape_df = shape_df['model'].unique()
for model in models:
= shape_df[shape_df['model'] == model]
model_df = model_df['cpu_count'].unique()
= plt.subplots(figsize=(10, 10))
fig, ax
for cpu_count in cpu_counts:
= model_df[model_df['cpu_count'] == cpu_count]
# Group by num_chunks and calculate the mean time
= cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
# Find the num_chunks with the minimum mean time
= grouped_df.loc[grouped_df['mean'].idxmin(), 'num_chunks']
# Filter the dataframe to include only the rows with the fastest num_chunks
= cpu_df[cpu_df['num_chunks'] == fastest_num_chunks]
# Calculate the mean time for the fastest num_chunks
= fastest_df['time'].mean()
ax.plot(cpu_count, mean_time,
f'Model Type {model}, Shape {shape} (Average Time for Fastest num_chunks)')
ax.set_xlabel('Average Time')
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/ FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/ FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/ FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/ FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/ FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/ FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/ FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/ FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/ FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/ FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()