import boto3
import pandas as pd
from io import StringIO
def load_all_csvs_from_s3(bucket_name):
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)
dfs = []
for obj in bucket.objects.all():
if obj.key.endswith('.csv') and obj.key.startswith('data'):
data = obj.get()['Body'].read().decode('utf-8')
df = pd.read_csv(StringIO(data))
dfs.append(df)
final_df = pd.concat(dfs, ignore_index=True)
return final_df
df = load_all_csvs_from_s3('dipy-parallel-tests')
df.sort_values(by='cpu_count', inplace=True)In [2]:
In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
shapes = df['data_shape'].unique()
for shape in shapes:
shape_df = df[df['data_shape'] == shape]
models = shape_df['model'].unique()
for model in models:
model_df = shape_df[shape_df['model'] == model]
cpu_counts = model_df['cpu_count'].unique()
fig, ax = plt.subplots(figsize=(10, 10))
for cpu_count in cpu_counts:
cpu_df = model_df[model_df['cpu_count'] == cpu_count]
# Calculate the average time of the serial runs
serial_avg_time = cpu_df[cpu_df['engine'] == 'serial']['time'].mean()
# Group by 'num_chunks' and calculate the mean and std of 'time'
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
# Express the time of the ray runs as a percentage of the average serial time
grouped_df['mean'] = grouped_df['mean'].apply(lambda x: (x / serial_avg_time) * 100 if serial_avg_time else 0)
ax.errorbar(grouped_df['num_chunks'], grouped_df['mean'], yerr=grouped_df['std'], label=f'{cpu_count} CPUs', fmt='-o')
ax.set_xscale('log')
ax.set_ylim(bottom=0)
ax.set_title(f'Model Type {model}, Shape {shape} (Average Time as Percentage of Serial Time with Error Bars)')
ax.set_xlabel('num_chunks (log scale)')
ax.set_ylabel('Average Time as Percentage of Serial Time')
ax.legend() # Add a legend
plt.show()
for shape in shapes:
shape_df = df[df['data_shape'] == shape]
models = shape_df['model'].unique()
for model in models:
model_df = shape_df[shape_df['model'] == model]
cpu_counts = model_df['cpu_count'].unique()
fig, ax = plt.subplots(figsize=(10, 10))
for cpu_count in cpu_counts:
cpu_df = model_df[model_df['cpu_count'] == cpu_count]
# Calculate the average time of the serial runs
serial_avg_time = cpu_df[cpu_df['engine'] == 'serial']['time'].mean()
# Group by 'num_chunks' and calculate the mean and std of 'time'
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
# Calculate the speedup as the ratio of the serial time to the ray time
grouped_df['mean'] = grouped_df['mean'].apply(lambda x: serial_avg_time / x if x else 0)
ax.errorbar(grouped_df['num_chunks'], grouped_df['mean'], yerr=grouped_df['std'], label=f'{cpu_count} CPUs', fmt='-o')
ax.set_xscale('log')
ax.set_ylim(bottom=0)
ax.set_title(f'Model Type {model}, Shape {shape} (Speedup over Serial Time with Error Bars)')
ax.set_xlabel('num_chunks (log scale)')
ax.set_ylabel('Speedup over Serial Time')
ax.legend() # Add a legend
plt.show()/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/392492796.py:58: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean, np.std]).reset_index()
In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
shapes = df['data_shape'].unique()
for shape in shapes:
shape_df = df[df['data_shape'] == shape]
models = shape_df['model'].unique()
for model in models:
model_df = shape_df[shape_df['model'] == model]
cpu_counts = model_df['cpu_count'].unique()
fig, ax = plt.subplots(figsize=(10, 10))
for cpu_count in cpu_counts:
cpu_df = model_df[model_df['cpu_count'] == cpu_count]
serial_avg_time = cpu_df[cpu_df['engine'] == 'serial']['time'].mean()
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
ax.errorbar(grouped_df['num_chunks'], grouped_df['mean'], yerr=grouped_df['std'], label=f'{cpu_count} CPUs', fmt='-o')
ax.set_xscale('log')
ax.set_ylim(bottom=0)
ax.set_title(f'Model Type {model}, Shape {shape} (Efficency per Core with Error Bars)')
ax.set_xlabel('num_chunks (log scale)')
ax.set_ylabel('Efficency per Core')
ax.legend() # Add a legend
plt.show()/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:23: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cpu_df['efficency'] = (serial_avg_time / cpu_df['time']) / cpu_count
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function mean at 0x10636d260> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_99130/2647551254.py:25: FutureWarning: The provided callable <function std at 0x10636d3a0> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
grouped_df = cpu_df.groupby('num_chunks')['efficency'].agg([np.mean, np.std]).reset_index()
In [5]:
fig, ax = plt.subplots(figsize=(10, 10))
for cpu_count in cpu_counts:
cpu_df = model_df[model_df['cpu_count'] == cpu_count]
# Calculate the average time of the serial runs for each number of chunks
serial_avg_time = cpu_df[cpu_df['engine'] == 'serial'].groupby('num_chunks')['time'].mean()
# Calculate the average time for the current cpu_count for each number of chunks
avg_time = cpu_df.groupby('num_chunks')['time'].mean()
# Calculate efficiency per core for each number of chunks
efficiency = (serial_avg_time / avg_time) / cpu_count
# Plot efficiency per core
ax.errorbar(efficiency.index, efficiency.values, label=f'{cpu_count} CPUs', fmt='-o')
ax.set_xlabel('Number of chunks')
ax.set_ylabel('Efficiency per core')
ax.legend()
plt.show()In [6]:
# Calculate the efficiency per core as the ratio of the serial time to the ray time divided by the number of CPUs
grouped_df['mean'] = grouped_df.apply(lambda row: (serial_avg_time / row['mean']) / row['cpu_count'] if row['mean'] else 0, axis=1)KeyError: 'cpu_count'
In [9]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
shapes = df['data_shape'].unique()
for shape in shapes:
shape_df = df[df['data_shape'] == shape]
models = shape_df['model'].unique()
for model in models:
model_df = shape_df[shape_df['model'] == model]
cpu_counts = model_df['cpu_count'].unique()
fig, ax = plt.subplots(figsize=(10, 10))
for cpu_count in cpu_counts:
cpu_df = model_df[model_df['cpu_count'] == cpu_count]
# Group by num_chunks and calculate the mean time
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
# Find the num_chunks with the minimum mean time
fastest_num_chunks = grouped_df.loc[grouped_df['mean'].idxmin(), 'num_chunks']
# Filter the dataframe to include only the rows with the fastest num_chunks
fastest_df = cpu_df[cpu_df['num_chunks'] == fastest_num_chunks]
# Calculate the mean time for the fastest num_chunks
mean_time = fastest_df['time'].mean()
ax.plot(cpu_count, mean_time, 'o')
ax.set_title(f'Model Type {model}, Shape {shape} (Average Time for Fastest num_chunks)')
ax.set_xlabel('cpu_count')
ax.set_ylabel('Average Time')
plt.show()/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()
/var/folders/gb/604kkgfn2cj2q1bp1zw3t56m0000gn/T/ipykernel_20072/744342013.py:21: FutureWarning: The provided callable <function mean at 0x121c2cd60> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
grouped_df = cpu_df.groupby('num_chunks')['time'].agg([np.mean]).reset_index()








