1
0
Fork 0
cmpt815perf/results/1 - Scale Only App/Client - Frankfurt/server_processing.py
2024-12-11 15:08:53 -07:00

181 lines
No EOL
5.8 KiB
Python

import pandas as pd
import matplotlib.pyplot as plt
def read_csv_with_fallback(file_path):
"""
Attempt to read CSV file with multiple parsing strategies.
Args:
file_path (str): Path to the CSV file
Returns:
pandas.DataFrame: Parsed DataFrame
"""
try:
# First, try reading with header
try:
df = pd.read_csv(file_path,
dtype={'timestamp': str}, # Ensure timestamp is read as string
skipinitialspace=True,
skip_blank_lines=True)
except Exception:
# If that fails, try reading without header and specify column names
df = pd.read_csv(file_path,
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
'db_rows_read', 'db_rows_written', 'db_total_rows',
'cache_hits', 'cache_misses'],
header=None,
dtype={'timestamp': str},
skipinitialspace=True,
skip_blank_lines=True)
# Remove any rows where timestamp is 'timestamp'
df = df[df['timestamp'] != 'timestamp']
# Convert timestamp to numeric
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
# Validate required columns
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
for col in required_columns:
if col not in df.columns:
raise ValueError(f"Missing required column: {col}")
return df
except Exception as e:
print(f"Error reading CSV: {e}")
print("Please check the file format and ensure it matches the expected structure.")
raise
def convert_timestamps(df):
"""
Convert timestamps to datetime.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
pandas.DataFrame: DataFrame with converted timestamps
"""
# Convert millisecond timestamps to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
return df
def analyze_latency_data(df):
"""
Calculate latency statistics.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
dict: Latency statistics
"""
# Calculate statistics
stats = {
'overall': {
'avg': df['service_time'].mean(),
'p50': df['service_time'].quantile(0.5),
'p95': df['service_time'].quantile(0.95),
'p99': df['service_time'].quantile(0.99),
'max': df['service_time'].max()
},
'db': {
'avg': df['db_time'].mean(),
'p50': df['db_time'].quantile(0.5),
'p95': df['db_time'].quantile(0.95),
'p99': df['db_time'].quantile(0.99),
'max': df['db_time'].max()
},
'cache': {
'avg': df['cache_time'].mean(),
'p50': df['cache_time'].quantile(0.5),
'p95': df['cache_time'].quantile(0.95),
'p99': df['cache_time'].quantile(0.99),
'max': df['cache_time'].max()
}
}
return stats
def plot_latency_graph(df):
"""
Create a multi-axis time series plot for latencies.
Args:
df (pandas.DataFrame): DataFrame with timestamp and time columns
"""
plt.figure(figsize=(15, 7))
# Plot overall service time
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
# Create a twin axis for DB time
ax1 = plt.gca()
ax2 = ax1.twinx()
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
# Create a third axis for cache time
ax3 = ax1.twinx()
# Offset the third axis slightly to the right
ax3.spines['right'].set_position(('axes', 1.2))
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
# Set labels and title
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
ax2.set_ylabel('DB Time (ms)', color='red')
ax3.set_ylabel('Cache Time (ms)', color='green')
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
plt.title('Latency Breakdown Over Time')
# Combine legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
lines3, labels3 = ax3.get_legend_handles_labels()
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
plt.tight_layout()
plt.show()
def main(file_path='server_metrics.csv'):
"""
Main function to process and visualize server metrics.
Args:
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
"""
try:
# Read CSV file
df = read_csv_with_fallback(file_path)
# Convert timestamps
df = convert_timestamps(df)
# Sort by timestamp to ensure chronological order
df = df.sort_values('timestamp')
# Analyze latency data
stats = analyze_latency_data(df)
# Print statistics
print("Latency Statistics:")
for category, metrics in stats.items():
print(f"\n{category.capitalize()} Latency:")
for metric, value in metrics.items():
print(f"{metric}: {value:.2f} ms")
# Plot the graph
plot_latency_graph(df)
except FileNotFoundError:
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()