181 lines
No EOL
5.8 KiB
Python
181 lines
No EOL
5.8 KiB
Python
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
|
|
def read_csv_with_fallback(file_path):
|
|
"""
|
|
Attempt to read CSV file with multiple parsing strategies.
|
|
|
|
Args:
|
|
file_path (str): Path to the CSV file
|
|
|
|
Returns:
|
|
pandas.DataFrame: Parsed DataFrame
|
|
"""
|
|
try:
|
|
# First, try reading with header
|
|
try:
|
|
df = pd.read_csv(file_path,
|
|
dtype={'timestamp': str}, # Ensure timestamp is read as string
|
|
skipinitialspace=True,
|
|
skip_blank_lines=True)
|
|
except Exception:
|
|
# If that fails, try reading without header and specify column names
|
|
df = pd.read_csv(file_path,
|
|
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
|
|
'db_rows_read', 'db_rows_written', 'db_total_rows',
|
|
'cache_hits', 'cache_misses'],
|
|
header=None,
|
|
dtype={'timestamp': str},
|
|
skipinitialspace=True,
|
|
skip_blank_lines=True)
|
|
|
|
# Remove any rows where timestamp is 'timestamp'
|
|
df = df[df['timestamp'] != 'timestamp']
|
|
|
|
# Convert timestamp to numeric
|
|
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
|
|
|
|
# Validate required columns
|
|
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
|
|
for col in required_columns:
|
|
if col not in df.columns:
|
|
raise ValueError(f"Missing required column: {col}")
|
|
|
|
return df
|
|
|
|
except Exception as e:
|
|
print(f"Error reading CSV: {e}")
|
|
print("Please check the file format and ensure it matches the expected structure.")
|
|
raise
|
|
|
|
def convert_timestamps(df):
|
|
"""
|
|
Convert timestamps to datetime.
|
|
|
|
Args:
|
|
df (pandas.DataFrame): Input DataFrame
|
|
|
|
Returns:
|
|
pandas.DataFrame: DataFrame with converted timestamps
|
|
"""
|
|
# Convert millisecond timestamps to datetime
|
|
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
|
|
|
|
return df
|
|
|
|
def analyze_latency_data(df):
|
|
"""
|
|
Calculate latency statistics.
|
|
|
|
Args:
|
|
df (pandas.DataFrame): Input DataFrame
|
|
|
|
Returns:
|
|
dict: Latency statistics
|
|
"""
|
|
# Calculate statistics
|
|
stats = {
|
|
'overall': {
|
|
'avg': df['service_time'].mean(),
|
|
'p50': df['service_time'].quantile(0.5),
|
|
'p95': df['service_time'].quantile(0.95),
|
|
'p99': df['service_time'].quantile(0.99),
|
|
'max': df['service_time'].max()
|
|
},
|
|
'db': {
|
|
'avg': df['db_time'].mean(),
|
|
'p50': df['db_time'].quantile(0.5),
|
|
'p95': df['db_time'].quantile(0.95),
|
|
'p99': df['db_time'].quantile(0.99),
|
|
'max': df['db_time'].max()
|
|
},
|
|
'cache': {
|
|
'avg': df['cache_time'].mean(),
|
|
'p50': df['cache_time'].quantile(0.5),
|
|
'p95': df['cache_time'].quantile(0.95),
|
|
'p99': df['cache_time'].quantile(0.99),
|
|
'max': df['cache_time'].max()
|
|
}
|
|
}
|
|
|
|
return stats
|
|
|
|
def plot_latency_graph(df):
|
|
"""
|
|
Create a multi-axis time series plot for latencies.
|
|
|
|
Args:
|
|
df (pandas.DataFrame): DataFrame with timestamp and time columns
|
|
"""
|
|
plt.figure(figsize=(15, 7))
|
|
|
|
# Plot overall service time
|
|
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
|
|
|
|
# Create a twin axis for DB time
|
|
ax1 = plt.gca()
|
|
ax2 = ax1.twinx()
|
|
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
|
|
|
|
# Create a third axis for cache time
|
|
ax3 = ax1.twinx()
|
|
# Offset the third axis slightly to the right
|
|
ax3.spines['right'].set_position(('axes', 1.2))
|
|
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
|
|
|
|
# Set labels and title
|
|
ax1.set_xlabel('Timestamp')
|
|
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
|
|
ax2.set_ylabel('DB Time (ms)', color='red')
|
|
ax3.set_ylabel('Cache Time (ms)', color='green')
|
|
|
|
# Format x-axis to show timestamps nicely
|
|
plt.gcf().autofmt_xdate()
|
|
plt.title('Latency Breakdown Over Time')
|
|
|
|
# Combine legends
|
|
lines1, labels1 = ax1.get_legend_handles_labels()
|
|
lines2, labels2 = ax2.get_legend_handles_labels()
|
|
lines3, labels3 = ax3.get_legend_handles_labels()
|
|
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
|
|
|
|
plt.tight_layout()
|
|
plt.show()
|
|
|
|
def main(file_path='server_metrics.csv'):
|
|
"""
|
|
Main function to process and visualize server metrics.
|
|
|
|
Args:
|
|
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
|
|
"""
|
|
try:
|
|
# Read CSV file
|
|
df = read_csv_with_fallback(file_path)
|
|
|
|
# Convert timestamps
|
|
df = convert_timestamps(df)
|
|
|
|
# Sort by timestamp to ensure chronological order
|
|
df = df.sort_values('timestamp')
|
|
|
|
# Analyze latency data
|
|
stats = analyze_latency_data(df)
|
|
|
|
# Print statistics
|
|
print("Latency Statistics:")
|
|
for category, metrics in stats.items():
|
|
print(f"\n{category.capitalize()} Latency:")
|
|
for metric, value in metrics.items():
|
|
print(f"{metric}: {value:.2f} ms")
|
|
|
|
# Plot the graph
|
|
plot_latency_graph(df)
|
|
|
|
except FileNotFoundError:
|
|
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
|
|
except Exception as e:
|
|
print(f"An error occurred: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
main() |