Updated metrics
After Width: | Height: | Size: 101 KiB |
|
@ -0,0 +1,102 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
|
||||
# Read the CSV file
|
||||
def read_csv_data(file_path):
|
||||
"""
|
||||
Read CSV file with request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
file_path : str
|
||||
Path to the CSV file
|
||||
|
||||
Returns:
|
||||
--------
|
||||
pandas.DataFrame
|
||||
DataFrame with parsed request data
|
||||
"""
|
||||
# Read the CSV file
|
||||
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
|
||||
|
||||
# Ensure data types are correct
|
||||
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
|
||||
df['Status Code'] = df['Status Code'].astype(int)
|
||||
|
||||
return df
|
||||
|
||||
# Visualize the data
|
||||
def visualize_request_data(df):
|
||||
"""
|
||||
Create a dual-axis plot of request durations and status codes
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
# Create the figure and the first axis
|
||||
fig, ax1 = plt.subplots(figsize=(12, 6))
|
||||
|
||||
# Plot Duration on the left axis
|
||||
color1 = 'blue'
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Duration (ms)', color=color1)
|
||||
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
|
||||
ax1.tick_params(axis='y', labelcolor=color1)
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
# Title and legend
|
||||
plt.title('Request Data over Time')
|
||||
|
||||
# Create a legend
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
|
||||
# Add grid
|
||||
ax1.grid(True, linestyle='--', alpha=0.7)
|
||||
|
||||
# Tight layout to prevent cutting off labels
|
||||
plt.tight_layout()
|
||||
|
||||
# Show the plot
|
||||
plt.show()
|
||||
|
||||
# Main execution
|
||||
def main():
|
||||
# Path to your CSV file
|
||||
file_path = 'client_metrics.csv'
|
||||
|
||||
try:
|
||||
# Read the data
|
||||
df = read_csv_data(file_path)
|
||||
|
||||
# Visualize the data
|
||||
visualize_request_data(df)
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
# Demonstrate data summary
|
||||
def print_data_summary(df):
|
||||
"""
|
||||
Print a summary of the request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
print("\nData Summary:")
|
||||
print("-" * 20)
|
||||
print(f"Total Requests: {len(df)}")
|
||||
print(f"Unique Request Types: {df['Request Type'].unique()}")
|
||||
|
||||
print("\nDuration Statistics:")
|
||||
print(df['Duration (ms)'].describe())
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
After Width: | Height: | Size: 218 KiB |
|
@ -0,0 +1,181 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def read_csv_with_fallback(file_path):
|
||||
"""
|
||||
Attempt to read CSV file with multiple parsing strategies.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the CSV file
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: Parsed DataFrame
|
||||
"""
|
||||
try:
|
||||
# First, try reading with header
|
||||
try:
|
||||
df = pd.read_csv(file_path,
|
||||
dtype={'timestamp': str}, # Ensure timestamp is read as string
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
except Exception:
|
||||
# If that fails, try reading without header and specify column names
|
||||
df = pd.read_csv(file_path,
|
||||
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
|
||||
'db_rows_read', 'db_rows_written', 'db_total_rows',
|
||||
'cache_hits', 'cache_misses'],
|
||||
header=None,
|
||||
dtype={'timestamp': str},
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
|
||||
# Remove any rows where timestamp is 'timestamp'
|
||||
df = df[df['timestamp'] != 'timestamp']
|
||||
|
||||
# Convert timestamp to numeric
|
||||
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
|
||||
|
||||
# Validate required columns
|
||||
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
|
||||
for col in required_columns:
|
||||
if col not in df.columns:
|
||||
raise ValueError(f"Missing required column: {col}")
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error reading CSV: {e}")
|
||||
print("Please check the file format and ensure it matches the expected structure.")
|
||||
raise
|
||||
|
||||
def convert_timestamps(df):
|
||||
"""
|
||||
Convert timestamps to datetime.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: DataFrame with converted timestamps
|
||||
"""
|
||||
# Convert millisecond timestamps to datetime
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
|
||||
|
||||
return df
|
||||
|
||||
def analyze_latency_data(df):
|
||||
"""
|
||||
Calculate latency statistics.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
dict: Latency statistics
|
||||
"""
|
||||
# Calculate statistics
|
||||
stats = {
|
||||
'overall': {
|
||||
'avg': df['service_time'].mean(),
|
||||
'p50': df['service_time'].quantile(0.5),
|
||||
'p95': df['service_time'].quantile(0.95),
|
||||
'p99': df['service_time'].quantile(0.99),
|
||||
'max': df['service_time'].max()
|
||||
},
|
||||
'db': {
|
||||
'avg': df['db_time'].mean(),
|
||||
'p50': df['db_time'].quantile(0.5),
|
||||
'p95': df['db_time'].quantile(0.95),
|
||||
'p99': df['db_time'].quantile(0.99),
|
||||
'max': df['db_time'].max()
|
||||
},
|
||||
'cache': {
|
||||
'avg': df['cache_time'].mean(),
|
||||
'p50': df['cache_time'].quantile(0.5),
|
||||
'p95': df['cache_time'].quantile(0.95),
|
||||
'p99': df['cache_time'].quantile(0.99),
|
||||
'max': df['cache_time'].max()
|
||||
}
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def plot_latency_graph(df):
|
||||
"""
|
||||
Create a multi-axis time series plot for latencies.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): DataFrame with timestamp and time columns
|
||||
"""
|
||||
plt.figure(figsize=(15, 7))
|
||||
|
||||
# Plot overall service time
|
||||
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
|
||||
|
||||
# Create a twin axis for DB time
|
||||
ax1 = plt.gca()
|
||||
ax2 = ax1.twinx()
|
||||
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
|
||||
|
||||
# Create a third axis for cache time
|
||||
ax3 = ax1.twinx()
|
||||
# Offset the third axis slightly to the right
|
||||
ax3.spines['right'].set_position(('axes', 1.2))
|
||||
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
|
||||
|
||||
# Set labels and title
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
|
||||
ax2.set_ylabel('DB Time (ms)', color='red')
|
||||
ax3.set_ylabel('Cache Time (ms)', color='green')
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
plt.title('Latency Breakdown Over Time')
|
||||
|
||||
# Combine legends
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
lines2, labels2 = ax2.get_legend_handles_labels()
|
||||
lines3, labels3 = ax3.get_legend_handles_labels()
|
||||
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
def main(file_path='server_metrics.csv'):
|
||||
"""
|
||||
Main function to process and visualize server metrics.
|
||||
|
||||
Args:
|
||||
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
|
||||
"""
|
||||
try:
|
||||
# Read CSV file
|
||||
df = read_csv_with_fallback(file_path)
|
||||
|
||||
# Convert timestamps
|
||||
df = convert_timestamps(df)
|
||||
|
||||
# Sort by timestamp to ensure chronological order
|
||||
df = df.sort_values('timestamp')
|
||||
|
||||
# Analyze latency data
|
||||
stats = analyze_latency_data(df)
|
||||
|
||||
# Print statistics
|
||||
print("Latency Statistics:")
|
||||
for category, metrics in stats.items():
|
||||
print(f"\n{category.capitalize()} Latency:")
|
||||
for metric, value in metrics.items():
|
||||
print(f"{metric}: {value:.2f} ms")
|
||||
|
||||
# Plot the graph
|
||||
plot_latency_graph(df)
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,20 @@
|
|||
Overall Latency:
|
||||
avg: 19.67 ms
|
||||
p50: 15.00 ms
|
||||
p95: 72.60 ms
|
||||
p99: 100.00 ms
|
||||
max: 274.00 ms
|
||||
|
||||
Db Latency:
|
||||
avg: 7.15 ms
|
||||
p50: 0.00 ms
|
||||
p95: 39.00 ms
|
||||
p99: 78.00 ms
|
||||
max: 269.00 ms
|
||||
|
||||
Cache Latency:
|
||||
avg: 9.21 ms
|
||||
p50: 2.00 ms
|
||||
p95: 33.00 ms
|
||||
p99: 42.92 ms
|
||||
max: 197.00 ms
|
After Width: | Height: | Size: 97 KiB |
|
@ -0,0 +1,102 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
|
||||
# Read the CSV file
|
||||
def read_csv_data(file_path):
|
||||
"""
|
||||
Read CSV file with request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
file_path : str
|
||||
Path to the CSV file
|
||||
|
||||
Returns:
|
||||
--------
|
||||
pandas.DataFrame
|
||||
DataFrame with parsed request data
|
||||
"""
|
||||
# Read the CSV file
|
||||
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
|
||||
|
||||
# Ensure data types are correct
|
||||
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
|
||||
df['Status Code'] = df['Status Code'].astype(int)
|
||||
|
||||
return df
|
||||
|
||||
# Visualize the data
|
||||
def visualize_request_data(df):
|
||||
"""
|
||||
Create a dual-axis plot of request durations and status codes
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
# Create the figure and the first axis
|
||||
fig, ax1 = plt.subplots(figsize=(12, 6))
|
||||
|
||||
# Plot Duration on the left axis
|
||||
color1 = 'blue'
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Duration (ms)', color=color1)
|
||||
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
|
||||
ax1.tick_params(axis='y', labelcolor=color1)
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
# Title and legend
|
||||
plt.title('Request Data over Time')
|
||||
|
||||
# Create a legend
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
|
||||
# Add grid
|
||||
ax1.grid(True, linestyle='--', alpha=0.7)
|
||||
|
||||
# Tight layout to prevent cutting off labels
|
||||
plt.tight_layout()
|
||||
|
||||
# Show the plot
|
||||
plt.show()
|
||||
|
||||
# Main execution
|
||||
def main():
|
||||
# Path to your CSV file
|
||||
file_path = 'client_metrics.csv'
|
||||
|
||||
try:
|
||||
# Read the data
|
||||
df = read_csv_data(file_path)
|
||||
|
||||
# Visualize the data
|
||||
visualize_request_data(df)
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
# Demonstrate data summary
|
||||
def print_data_summary(df):
|
||||
"""
|
||||
Print a summary of the request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
print("\nData Summary:")
|
||||
print("-" * 20)
|
||||
print(f"Total Requests: {len(df)}")
|
||||
print(f"Unique Request Types: {df['Request Type'].unique()}")
|
||||
|
||||
print("\nDuration Statistics:")
|
||||
print(df['Duration (ms)'].describe())
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
After Width: | Height: | Size: 255 KiB |
|
@ -0,0 +1,181 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def read_csv_with_fallback(file_path):
|
||||
"""
|
||||
Attempt to read CSV file with multiple parsing strategies.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the CSV file
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: Parsed DataFrame
|
||||
"""
|
||||
try:
|
||||
# First, try reading with header
|
||||
try:
|
||||
df = pd.read_csv(file_path,
|
||||
dtype={'timestamp': str}, # Ensure timestamp is read as string
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
except Exception:
|
||||
# If that fails, try reading without header and specify column names
|
||||
df = pd.read_csv(file_path,
|
||||
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
|
||||
'db_rows_read', 'db_rows_written', 'db_total_rows',
|
||||
'cache_hits', 'cache_misses'],
|
||||
header=None,
|
||||
dtype={'timestamp': str},
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
|
||||
# Remove any rows where timestamp is 'timestamp'
|
||||
df = df[df['timestamp'] != 'timestamp']
|
||||
|
||||
# Convert timestamp to numeric
|
||||
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
|
||||
|
||||
# Validate required columns
|
||||
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
|
||||
for col in required_columns:
|
||||
if col not in df.columns:
|
||||
raise ValueError(f"Missing required column: {col}")
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error reading CSV: {e}")
|
||||
print("Please check the file format and ensure it matches the expected structure.")
|
||||
raise
|
||||
|
||||
def convert_timestamps(df):
|
||||
"""
|
||||
Convert timestamps to datetime.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: DataFrame with converted timestamps
|
||||
"""
|
||||
# Convert millisecond timestamps to datetime
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
|
||||
|
||||
return df
|
||||
|
||||
def analyze_latency_data(df):
|
||||
"""
|
||||
Calculate latency statistics.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
dict: Latency statistics
|
||||
"""
|
||||
# Calculate statistics
|
||||
stats = {
|
||||
'overall': {
|
||||
'avg': df['service_time'].mean(),
|
||||
'p50': df['service_time'].quantile(0.5),
|
||||
'p95': df['service_time'].quantile(0.95),
|
||||
'p99': df['service_time'].quantile(0.99),
|
||||
'max': df['service_time'].max()
|
||||
},
|
||||
'db': {
|
||||
'avg': df['db_time'].mean(),
|
||||
'p50': df['db_time'].quantile(0.5),
|
||||
'p95': df['db_time'].quantile(0.95),
|
||||
'p99': df['db_time'].quantile(0.99),
|
||||
'max': df['db_time'].max()
|
||||
},
|
||||
'cache': {
|
||||
'avg': df['cache_time'].mean(),
|
||||
'p50': df['cache_time'].quantile(0.5),
|
||||
'p95': df['cache_time'].quantile(0.95),
|
||||
'p99': df['cache_time'].quantile(0.99),
|
||||
'max': df['cache_time'].max()
|
||||
}
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def plot_latency_graph(df):
|
||||
"""
|
||||
Create a multi-axis time series plot for latencies.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): DataFrame with timestamp and time columns
|
||||
"""
|
||||
plt.figure(figsize=(15, 7))
|
||||
|
||||
# Plot overall service time
|
||||
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
|
||||
|
||||
# Create a twin axis for DB time
|
||||
ax1 = plt.gca()
|
||||
ax2 = ax1.twinx()
|
||||
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
|
||||
|
||||
# Create a third axis for cache time
|
||||
ax3 = ax1.twinx()
|
||||
# Offset the third axis slightly to the right
|
||||
ax3.spines['right'].set_position(('axes', 1.2))
|
||||
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
|
||||
|
||||
# Set labels and title
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
|
||||
ax2.set_ylabel('DB Time (ms)', color='red')
|
||||
ax3.set_ylabel('Cache Time (ms)', color='green')
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
plt.title('Latency Breakdown Over Time')
|
||||
|
||||
# Combine legends
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
lines2, labels2 = ax2.get_legend_handles_labels()
|
||||
lines3, labels3 = ax3.get_legend_handles_labels()
|
||||
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
def main(file_path='server_metrics.csv'):
|
||||
"""
|
||||
Main function to process and visualize server metrics.
|
||||
|
||||
Args:
|
||||
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
|
||||
"""
|
||||
try:
|
||||
# Read CSV file
|
||||
df = read_csv_with_fallback(file_path)
|
||||
|
||||
# Convert timestamps
|
||||
df = convert_timestamps(df)
|
||||
|
||||
# Sort by timestamp to ensure chronological order
|
||||
df = df.sort_values('timestamp')
|
||||
|
||||
# Analyze latency data
|
||||
stats = analyze_latency_data(df)
|
||||
|
||||
# Print statistics
|
||||
print("Latency Statistics:")
|
||||
for category, metrics in stats.items():
|
||||
print(f"\n{category.capitalize()} Latency:")
|
||||
for metric, value in metrics.items():
|
||||
print(f"{metric}: {value:.2f} ms")
|
||||
|
||||
# Plot the graph
|
||||
plot_latency_graph(df)
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,20 @@
|
|||
Overall Latency:
|
||||
avg: 14.89 ms
|
||||
p50: 9.00 ms
|
||||
p95: 64.00 ms
|
||||
p99: 100.00 ms
|
||||
max: 295.00 ms
|
||||
|
||||
Db Latency:
|
||||
avg: 6.92 ms
|
||||
p50: 0.00 ms
|
||||
p95: 38.00 ms
|
||||
p99: 78.00 ms
|
||||
max: 283.00 ms
|
||||
|
||||
Cache Latency:
|
||||
avg: 5.32 ms
|
||||
p50: 2.00 ms
|
||||
p95: 27.00 ms
|
||||
p99: 35.00 ms
|
||||
max: 147.00 ms
|
After Width: | Height: | Size: 89 KiB |
|
@ -0,0 +1,102 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
|
||||
# Read the CSV file
|
||||
def read_csv_data(file_path):
|
||||
"""
|
||||
Read CSV file with request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
file_path : str
|
||||
Path to the CSV file
|
||||
|
||||
Returns:
|
||||
--------
|
||||
pandas.DataFrame
|
||||
DataFrame with parsed request data
|
||||
"""
|
||||
# Read the CSV file
|
||||
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
|
||||
|
||||
# Ensure data types are correct
|
||||
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
|
||||
df['Status Code'] = df['Status Code'].astype(int)
|
||||
|
||||
return df
|
||||
|
||||
# Visualize the data
|
||||
def visualize_request_data(df):
|
||||
"""
|
||||
Create a dual-axis plot of request durations and status codes
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
# Create the figure and the first axis
|
||||
fig, ax1 = plt.subplots(figsize=(12, 6))
|
||||
|
||||
# Plot Duration on the left axis
|
||||
color1 = 'blue'
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Duration (ms)', color=color1)
|
||||
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
|
||||
ax1.tick_params(axis='y', labelcolor=color1)
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
# Title and legend
|
||||
plt.title('Request Data over Time')
|
||||
|
||||
# Create a legend
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
|
||||
# Add grid
|
||||
ax1.grid(True, linestyle='--', alpha=0.7)
|
||||
|
||||
# Tight layout to prevent cutting off labels
|
||||
plt.tight_layout()
|
||||
|
||||
# Show the plot
|
||||
plt.show()
|
||||
|
||||
# Main execution
|
||||
def main():
|
||||
# Path to your CSV file
|
||||
file_path = 'client_metrics.csv'
|
||||
|
||||
try:
|
||||
# Read the data
|
||||
df = read_csv_data(file_path)
|
||||
|
||||
# Visualize the data
|
||||
visualize_request_data(df)
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
# Demonstrate data summary
|
||||
def print_data_summary(df):
|
||||
"""
|
||||
Print a summary of the request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
print("\nData Summary:")
|
||||
print("-" * 20)
|
||||
print(f"Total Requests: {len(df)}")
|
||||
print(f"Unique Request Types: {df['Request Type'].unique()}")
|
||||
|
||||
print("\nDuration Statistics:")
|
||||
print(df['Duration (ms)'].describe())
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
After Width: | Height: | Size: 272 KiB |
|
@ -0,0 +1,181 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def read_csv_with_fallback(file_path):
|
||||
"""
|
||||
Attempt to read CSV file with multiple parsing strategies.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the CSV file
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: Parsed DataFrame
|
||||
"""
|
||||
try:
|
||||
# First, try reading with header
|
||||
try:
|
||||
df = pd.read_csv(file_path,
|
||||
dtype={'timestamp': str}, # Ensure timestamp is read as string
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
except Exception:
|
||||
# If that fails, try reading without header and specify column names
|
||||
df = pd.read_csv(file_path,
|
||||
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
|
||||
'db_rows_read', 'db_rows_written', 'db_total_rows',
|
||||
'cache_hits', 'cache_misses'],
|
||||
header=None,
|
||||
dtype={'timestamp': str},
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
|
||||
# Remove any rows where timestamp is 'timestamp'
|
||||
df = df[df['timestamp'] != 'timestamp']
|
||||
|
||||
# Convert timestamp to numeric
|
||||
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
|
||||
|
||||
# Validate required columns
|
||||
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
|
||||
for col in required_columns:
|
||||
if col not in df.columns:
|
||||
raise ValueError(f"Missing required column: {col}")
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error reading CSV: {e}")
|
||||
print("Please check the file format and ensure it matches the expected structure.")
|
||||
raise
|
||||
|
||||
def convert_timestamps(df):
|
||||
"""
|
||||
Convert timestamps to datetime.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: DataFrame with converted timestamps
|
||||
"""
|
||||
# Convert millisecond timestamps to datetime
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
|
||||
|
||||
return df
|
||||
|
||||
def analyze_latency_data(df):
|
||||
"""
|
||||
Calculate latency statistics.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
dict: Latency statistics
|
||||
"""
|
||||
# Calculate statistics
|
||||
stats = {
|
||||
'overall': {
|
||||
'avg': df['service_time'].mean(),
|
||||
'p50': df['service_time'].quantile(0.5),
|
||||
'p95': df['service_time'].quantile(0.95),
|
||||
'p99': df['service_time'].quantile(0.99),
|
||||
'max': df['service_time'].max()
|
||||
},
|
||||
'db': {
|
||||
'avg': df['db_time'].mean(),
|
||||
'p50': df['db_time'].quantile(0.5),
|
||||
'p95': df['db_time'].quantile(0.95),
|
||||
'p99': df['db_time'].quantile(0.99),
|
||||
'max': df['db_time'].max()
|
||||
},
|
||||
'cache': {
|
||||
'avg': df['cache_time'].mean(),
|
||||
'p50': df['cache_time'].quantile(0.5),
|
||||
'p95': df['cache_time'].quantile(0.95),
|
||||
'p99': df['cache_time'].quantile(0.99),
|
||||
'max': df['cache_time'].max()
|
||||
}
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def plot_latency_graph(df):
|
||||
"""
|
||||
Create a multi-axis time series plot for latencies.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): DataFrame with timestamp and time columns
|
||||
"""
|
||||
plt.figure(figsize=(15, 7))
|
||||
|
||||
# Plot overall service time
|
||||
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
|
||||
|
||||
# Create a twin axis for DB time
|
||||
ax1 = plt.gca()
|
||||
ax2 = ax1.twinx()
|
||||
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
|
||||
|
||||
# Create a third axis for cache time
|
||||
ax3 = ax1.twinx()
|
||||
# Offset the third axis slightly to the right
|
||||
ax3.spines['right'].set_position(('axes', 1.2))
|
||||
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
|
||||
|
||||
# Set labels and title
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
|
||||
ax2.set_ylabel('DB Time (ms)', color='red')
|
||||
ax3.set_ylabel('Cache Time (ms)', color='green')
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
plt.title('Latency Breakdown Over Time')
|
||||
|
||||
# Combine legends
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
lines2, labels2 = ax2.get_legend_handles_labels()
|
||||
lines3, labels3 = ax3.get_legend_handles_labels()
|
||||
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
def main(file_path='server_metrics.csv'):
|
||||
"""
|
||||
Main function to process and visualize server metrics.
|
||||
|
||||
Args:
|
||||
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
|
||||
"""
|
||||
try:
|
||||
# Read CSV file
|
||||
df = read_csv_with_fallback(file_path)
|
||||
|
||||
# Convert timestamps
|
||||
df = convert_timestamps(df)
|
||||
|
||||
# Sort by timestamp to ensure chronological order
|
||||
df = df.sort_values('timestamp')
|
||||
|
||||
# Analyze latency data
|
||||
stats = analyze_latency_data(df)
|
||||
|
||||
# Print statistics
|
||||
print("Latency Statistics:")
|
||||
for category, metrics in stats.items():
|
||||
print(f"\n{category.capitalize()} Latency:")
|
||||
for metric, value in metrics.items():
|
||||
print(f"{metric}: {value:.2f} ms")
|
||||
|
||||
# Plot the graph
|
||||
plot_latency_graph(df)
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,22 @@
|
|||
Latency Statistics:
|
||||
|
||||
Overall Latency:
|
||||
avg: 15.03 ms
|
||||
p50: 7.00 ms
|
||||
p95: 68.00 ms
|
||||
p99: 101.00 ms
|
||||
max: 277.00 ms
|
||||
|
||||
Db Latency:
|
||||
avg: 7.35 ms
|
||||
p50: 0.00 ms
|
||||
p95: 38.00 ms
|
||||
p99: 80.00 ms
|
||||
max: 273.00 ms
|
||||
|
||||
Cache Latency:
|
||||
avg: 5.01 ms
|
||||
p50: 2.00 ms
|
||||
p95: 24.00 ms
|
||||
p99: 34.00 ms
|
||||
max: 146.00 ms
|
BIN
results/1 - Scale Only App/Client - Frankfurt/client_latency.png
Normal file
After Width: | Height: | Size: 110 KiB |
|
@ -0,0 +1,102 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
|
||||
# Read the CSV file
|
||||
def read_csv_data(file_path):
|
||||
"""
|
||||
Read CSV file with request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
file_path : str
|
||||
Path to the CSV file
|
||||
|
||||
Returns:
|
||||
--------
|
||||
pandas.DataFrame
|
||||
DataFrame with parsed request data
|
||||
"""
|
||||
# Read the CSV file
|
||||
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
|
||||
|
||||
# Ensure data types are correct
|
||||
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
|
||||
df['Status Code'] = df['Status Code'].astype(int)
|
||||
|
||||
return df
|
||||
|
||||
# Visualize the data
|
||||
def visualize_request_data(df):
|
||||
"""
|
||||
Create a dual-axis plot of request durations and status codes
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
# Create the figure and the first axis
|
||||
fig, ax1 = plt.subplots(figsize=(12, 6))
|
||||
|
||||
# Plot Duration on the left axis
|
||||
color1 = 'blue'
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Duration (ms)', color=color1)
|
||||
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
|
||||
ax1.tick_params(axis='y', labelcolor=color1)
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
# Title and legend
|
||||
plt.title('Request Data over Time')
|
||||
|
||||
# Create a legend
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
|
||||
# Add grid
|
||||
ax1.grid(True, linestyle='--', alpha=0.7)
|
||||
|
||||
# Tight layout to prevent cutting off labels
|
||||
plt.tight_layout()
|
||||
|
||||
# Show the plot
|
||||
plt.show()
|
||||
|
||||
# Main execution
|
||||
def main():
|
||||
# Path to your CSV file
|
||||
file_path = 'client_metrics.csv'
|
||||
|
||||
try:
|
||||
# Read the data
|
||||
df = read_csv_data(file_path)
|
||||
|
||||
# Visualize the data
|
||||
visualize_request_data(df)
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
# Demonstrate data summary
|
||||
def print_data_summary(df):
|
||||
"""
|
||||
Print a summary of the request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
print("\nData Summary:")
|
||||
print("-" * 20)
|
||||
print(f"Total Requests: {len(df)}")
|
||||
print(f"Unique Request Types: {df['Request Type'].unique()}")
|
||||
|
||||
print("\nDuration Statistics:")
|
||||
print(df['Duration (ms)'].describe())
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
BIN
results/1 - Scale Only App/Client - Frankfurt/server_latency.png
Normal file
After Width: | Height: | Size: 293 KiB |
|
@ -0,0 +1,181 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def read_csv_with_fallback(file_path):
|
||||
"""
|
||||
Attempt to read CSV file with multiple parsing strategies.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the CSV file
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: Parsed DataFrame
|
||||
"""
|
||||
try:
|
||||
# First, try reading with header
|
||||
try:
|
||||
df = pd.read_csv(file_path,
|
||||
dtype={'timestamp': str}, # Ensure timestamp is read as string
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
except Exception:
|
||||
# If that fails, try reading without header and specify column names
|
||||
df = pd.read_csv(file_path,
|
||||
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
|
||||
'db_rows_read', 'db_rows_written', 'db_total_rows',
|
||||
'cache_hits', 'cache_misses'],
|
||||
header=None,
|
||||
dtype={'timestamp': str},
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
|
||||
# Remove any rows where timestamp is 'timestamp'
|
||||
df = df[df['timestamp'] != 'timestamp']
|
||||
|
||||
# Convert timestamp to numeric
|
||||
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
|
||||
|
||||
# Validate required columns
|
||||
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
|
||||
for col in required_columns:
|
||||
if col not in df.columns:
|
||||
raise ValueError(f"Missing required column: {col}")
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error reading CSV: {e}")
|
||||
print("Please check the file format and ensure it matches the expected structure.")
|
||||
raise
|
||||
|
||||
def convert_timestamps(df):
|
||||
"""
|
||||
Convert timestamps to datetime.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: DataFrame with converted timestamps
|
||||
"""
|
||||
# Convert millisecond timestamps to datetime
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
|
||||
|
||||
return df
|
||||
|
||||
def analyze_latency_data(df):
|
||||
"""
|
||||
Calculate latency statistics.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
dict: Latency statistics
|
||||
"""
|
||||
# Calculate statistics
|
||||
stats = {
|
||||
'overall': {
|
||||
'avg': df['service_time'].mean(),
|
||||
'p50': df['service_time'].quantile(0.5),
|
||||
'p95': df['service_time'].quantile(0.95),
|
||||
'p99': df['service_time'].quantile(0.99),
|
||||
'max': df['service_time'].max()
|
||||
},
|
||||
'db': {
|
||||
'avg': df['db_time'].mean(),
|
||||
'p50': df['db_time'].quantile(0.5),
|
||||
'p95': df['db_time'].quantile(0.95),
|
||||
'p99': df['db_time'].quantile(0.99),
|
||||
'max': df['db_time'].max()
|
||||
},
|
||||
'cache': {
|
||||
'avg': df['cache_time'].mean(),
|
||||
'p50': df['cache_time'].quantile(0.5),
|
||||
'p95': df['cache_time'].quantile(0.95),
|
||||
'p99': df['cache_time'].quantile(0.99),
|
||||
'max': df['cache_time'].max()
|
||||
}
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def plot_latency_graph(df):
|
||||
"""
|
||||
Create a multi-axis time series plot for latencies.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): DataFrame with timestamp and time columns
|
||||
"""
|
||||
plt.figure(figsize=(15, 7))
|
||||
|
||||
# Plot overall service time
|
||||
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
|
||||
|
||||
# Create a twin axis for DB time
|
||||
ax1 = plt.gca()
|
||||
ax2 = ax1.twinx()
|
||||
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
|
||||
|
||||
# Create a third axis for cache time
|
||||
ax3 = ax1.twinx()
|
||||
# Offset the third axis slightly to the right
|
||||
ax3.spines['right'].set_position(('axes', 1.2))
|
||||
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
|
||||
|
||||
# Set labels and title
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
|
||||
ax2.set_ylabel('DB Time (ms)', color='red')
|
||||
ax3.set_ylabel('Cache Time (ms)', color='green')
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
plt.title('Latency Breakdown Over Time')
|
||||
|
||||
# Combine legends
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
lines2, labels2 = ax2.get_legend_handles_labels()
|
||||
lines3, labels3 = ax3.get_legend_handles_labels()
|
||||
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
def main(file_path='server_metrics.csv'):
|
||||
"""
|
||||
Main function to process and visualize server metrics.
|
||||
|
||||
Args:
|
||||
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
|
||||
"""
|
||||
try:
|
||||
# Read CSV file
|
||||
df = read_csv_with_fallback(file_path)
|
||||
|
||||
# Convert timestamps
|
||||
df = convert_timestamps(df)
|
||||
|
||||
# Sort by timestamp to ensure chronological order
|
||||
df = df.sort_values('timestamp')
|
||||
|
||||
# Analyze latency data
|
||||
stats = analyze_latency_data(df)
|
||||
|
||||
# Print statistics
|
||||
print("Latency Statistics:")
|
||||
for category, metrics in stats.items():
|
||||
print(f"\n{category.capitalize()} Latency:")
|
||||
for metric, value in metrics.items():
|
||||
print(f"{metric}: {value:.2f} ms")
|
||||
|
||||
# Plot the graph
|
||||
plot_latency_graph(df)
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,22 @@
|
|||
Latency Statistics:
|
||||
|
||||
Overall Latency:
|
||||
avg: 298.54 ms
|
||||
p50: 312.00 ms
|
||||
p95: 323.45 ms
|
||||
p99: 382.00 ms
|
||||
max: 442.00 ms
|
||||
|
||||
Db Latency:
|
||||
avg: 182.40 ms
|
||||
p50: 208.00 ms
|
||||
p95: 216.00 ms
|
||||
p99: 268.76 ms
|
||||
max: 339.00 ms
|
||||
|
||||
Cache Latency:
|
||||
avg: 21.11 ms
|
||||
p50: 0.00 ms
|
||||
p95: 126.00 ms
|
||||
p99: 170.00 ms
|
||||
max: 201.00 ms
|
BIN
results/1 - Scale Only App/Client - Singapore/client_latency.png
Normal file
After Width: | Height: | Size: 73 KiB |
|
@ -0,0 +1,102 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
|
||||
# Read the CSV file
|
||||
def read_csv_data(file_path):
|
||||
"""
|
||||
Read CSV file with request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
file_path : str
|
||||
Path to the CSV file
|
||||
|
||||
Returns:
|
||||
--------
|
||||
pandas.DataFrame
|
||||
DataFrame with parsed request data
|
||||
"""
|
||||
# Read the CSV file
|
||||
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
|
||||
|
||||
# Ensure data types are correct
|
||||
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
|
||||
df['Status Code'] = df['Status Code'].astype(int)
|
||||
|
||||
return df
|
||||
|
||||
# Visualize the data
|
||||
def visualize_request_data(df):
|
||||
"""
|
||||
Create a dual-axis plot of request durations and status codes
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
# Create the figure and the first axis
|
||||
fig, ax1 = plt.subplots(figsize=(12, 6))
|
||||
|
||||
# Plot Duration on the left axis
|
||||
color1 = 'blue'
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Duration (ms)', color=color1)
|
||||
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
|
||||
ax1.tick_params(axis='y', labelcolor=color1)
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
# Title and legend
|
||||
plt.title('Request Data over Time')
|
||||
|
||||
# Create a legend
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
|
||||
# Add grid
|
||||
ax1.grid(True, linestyle='--', alpha=0.7)
|
||||
|
||||
# Tight layout to prevent cutting off labels
|
||||
plt.tight_layout()
|
||||
|
||||
# Show the plot
|
||||
plt.show()
|
||||
|
||||
# Main execution
|
||||
def main():
|
||||
# Path to your CSV file
|
||||
file_path = 'client_metrics.csv'
|
||||
|
||||
try:
|
||||
# Read the data
|
||||
df = read_csv_data(file_path)
|
||||
|
||||
# Visualize the data
|
||||
visualize_request_data(df)
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
# Demonstrate data summary
|
||||
def print_data_summary(df):
|
||||
"""
|
||||
Print a summary of the request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
print("\nData Summary:")
|
||||
print("-" * 20)
|
||||
print(f"Total Requests: {len(df)}")
|
||||
print(f"Unique Request Types: {df['Request Type'].unique()}")
|
||||
|
||||
print("\nDuration Statistics:")
|
||||
print(df['Duration (ms)'].describe())
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
BIN
results/1 - Scale Only App/Client - Singapore/server_latency.png
Normal file
After Width: | Height: | Size: 450 KiB |
|
@ -0,0 +1,181 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def read_csv_with_fallback(file_path):
|
||||
"""
|
||||
Attempt to read CSV file with multiple parsing strategies.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the CSV file
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: Parsed DataFrame
|
||||
"""
|
||||
try:
|
||||
# First, try reading with header
|
||||
try:
|
||||
df = pd.read_csv(file_path,
|
||||
dtype={'timestamp': str}, # Ensure timestamp is read as string
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
except Exception:
|
||||
# If that fails, try reading without header and specify column names
|
||||
df = pd.read_csv(file_path,
|
||||
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
|
||||
'db_rows_read', 'db_rows_written', 'db_total_rows',
|
||||
'cache_hits', 'cache_misses'],
|
||||
header=None,
|
||||
dtype={'timestamp': str},
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
|
||||
# Remove any rows where timestamp is 'timestamp'
|
||||
df = df[df['timestamp'] != 'timestamp']
|
||||
|
||||
# Convert timestamp to numeric
|
||||
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
|
||||
|
||||
# Validate required columns
|
||||
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
|
||||
for col in required_columns:
|
||||
if col not in df.columns:
|
||||
raise ValueError(f"Missing required column: {col}")
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error reading CSV: {e}")
|
||||
print("Please check the file format and ensure it matches the expected structure.")
|
||||
raise
|
||||
|
||||
def convert_timestamps(df):
|
||||
"""
|
||||
Convert timestamps to datetime.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: DataFrame with converted timestamps
|
||||
"""
|
||||
# Convert millisecond timestamps to datetime
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
|
||||
|
||||
return df
|
||||
|
||||
def analyze_latency_data(df):
|
||||
"""
|
||||
Calculate latency statistics.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
dict: Latency statistics
|
||||
"""
|
||||
# Calculate statistics
|
||||
stats = {
|
||||
'overall': {
|
||||
'avg': df['service_time'].mean(),
|
||||
'p50': df['service_time'].quantile(0.5),
|
||||
'p95': df['service_time'].quantile(0.95),
|
||||
'p99': df['service_time'].quantile(0.99),
|
||||
'max': df['service_time'].max()
|
||||
},
|
||||
'db': {
|
||||
'avg': df['db_time'].mean(),
|
||||
'p50': df['db_time'].quantile(0.5),
|
||||
'p95': df['db_time'].quantile(0.95),
|
||||
'p99': df['db_time'].quantile(0.99),
|
||||
'max': df['db_time'].max()
|
||||
},
|
||||
'cache': {
|
||||
'avg': df['cache_time'].mean(),
|
||||
'p50': df['cache_time'].quantile(0.5),
|
||||
'p95': df['cache_time'].quantile(0.95),
|
||||
'p99': df['cache_time'].quantile(0.99),
|
||||
'max': df['cache_time'].max()
|
||||
}
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def plot_latency_graph(df):
|
||||
"""
|
||||
Create a multi-axis time series plot for latencies.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): DataFrame with timestamp and time columns
|
||||
"""
|
||||
plt.figure(figsize=(15, 7))
|
||||
|
||||
# Plot overall service time
|
||||
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
|
||||
|
||||
# Create a twin axis for DB time
|
||||
ax1 = plt.gca()
|
||||
ax2 = ax1.twinx()
|
||||
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
|
||||
|
||||
# Create a third axis for cache time
|
||||
ax3 = ax1.twinx()
|
||||
# Offset the third axis slightly to the right
|
||||
ax3.spines['right'].set_position(('axes', 1.2))
|
||||
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
|
||||
|
||||
# Set labels and title
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
|
||||
ax2.set_ylabel('DB Time (ms)', color='red')
|
||||
ax3.set_ylabel('Cache Time (ms)', color='green')
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
plt.title('Latency Breakdown Over Time')
|
||||
|
||||
# Combine legends
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
lines2, labels2 = ax2.get_legend_handles_labels()
|
||||
lines3, labels3 = ax3.get_legend_handles_labels()
|
||||
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
def main(file_path='server_metrics.csv'):
|
||||
"""
|
||||
Main function to process and visualize server metrics.
|
||||
|
||||
Args:
|
||||
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
|
||||
"""
|
||||
try:
|
||||
# Read CSV file
|
||||
df = read_csv_with_fallback(file_path)
|
||||
|
||||
# Convert timestamps
|
||||
df = convert_timestamps(df)
|
||||
|
||||
# Sort by timestamp to ensure chronological order
|
||||
df = df.sort_values('timestamp')
|
||||
|
||||
# Analyze latency data
|
||||
stats = analyze_latency_data(df)
|
||||
|
||||
# Print statistics
|
||||
print("Latency Statistics:")
|
||||
for category, metrics in stats.items():
|
||||
print(f"\n{category.capitalize()} Latency:")
|
||||
for metric, value in metrics.items():
|
||||
print(f"{metric}: {value:.2f} ms")
|
||||
|
||||
# Plot the graph
|
||||
plot_latency_graph(df)
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,22 @@
|
|||
Latency Statistics:
|
||||
|
||||
Overall Latency:
|
||||
avg: 6068.11 ms
|
||||
p50: 4711.00 ms
|
||||
p95: 8681.00 ms
|
||||
p99: 8707.00 ms
|
||||
max: 8975.00 ms
|
||||
|
||||
Db Latency:
|
||||
avg: 230.91 ms
|
||||
p50: 238.00 ms
|
||||
p95: 504.00 ms
|
||||
p99: 668.74 ms
|
||||
max: 985.00 ms
|
||||
|
||||
Cache Latency:
|
||||
avg: 3104.81 ms
|
||||
p50: 4191.00 ms
|
||||
p95: 4233.00 ms
|
||||
p99: 4258.87 ms
|
||||
max: 4664.00 ms
|
After Width: | Height: | Size: 78 KiB |
|
@ -0,0 +1,102 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
|
||||
# Read the CSV file
|
||||
def read_csv_data(file_path):
|
||||
"""
|
||||
Read CSV file with request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
file_path : str
|
||||
Path to the CSV file
|
||||
|
||||
Returns:
|
||||
--------
|
||||
pandas.DataFrame
|
||||
DataFrame with parsed request data
|
||||
"""
|
||||
# Read the CSV file
|
||||
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
|
||||
|
||||
# Ensure data types are correct
|
||||
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
|
||||
df['Status Code'] = df['Status Code'].astype(int)
|
||||
|
||||
return df
|
||||
|
||||
# Visualize the data
|
||||
def visualize_request_data(df):
|
||||
"""
|
||||
Create a dual-axis plot of request durations and status codes
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
# Create the figure and the first axis
|
||||
fig, ax1 = plt.subplots(figsize=(12, 6))
|
||||
|
||||
# Plot Duration on the left axis
|
||||
color1 = 'blue'
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Duration (ms)', color=color1)
|
||||
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
|
||||
ax1.tick_params(axis='y', labelcolor=color1)
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
# Title and legend
|
||||
plt.title('Request Data over Time')
|
||||
|
||||
# Create a legend
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
|
||||
# Add grid
|
||||
ax1.grid(True, linestyle='--', alpha=0.7)
|
||||
|
||||
# Tight layout to prevent cutting off labels
|
||||
plt.tight_layout()
|
||||
|
||||
# Show the plot
|
||||
plt.show()
|
||||
|
||||
# Main execution
|
||||
def main():
|
||||
# Path to your CSV file
|
||||
file_path = 'client_metrics.csv'
|
||||
|
||||
try:
|
||||
# Read the data
|
||||
df = read_csv_data(file_path)
|
||||
|
||||
# Visualize the data
|
||||
visualize_request_data(df)
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
# Demonstrate data summary
|
||||
def print_data_summary(df):
|
||||
"""
|
||||
Print a summary of the request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
print("\nData Summary:")
|
||||
print("-" * 20)
|
||||
print(f"Total Requests: {len(df)}")
|
||||
print(f"Unique Request Types: {df['Request Type'].unique()}")
|
||||
|
||||
print("\nDuration Statistics:")
|
||||
print(df['Duration (ms)'].describe())
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
After Width: | Height: | Size: 502 KiB |
|
@ -0,0 +1,181 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def read_csv_with_fallback(file_path):
|
||||
"""
|
||||
Attempt to read CSV file with multiple parsing strategies.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the CSV file
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: Parsed DataFrame
|
||||
"""
|
||||
try:
|
||||
# First, try reading with header
|
||||
try:
|
||||
df = pd.read_csv(file_path,
|
||||
dtype={'timestamp': str}, # Ensure timestamp is read as string
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
except Exception:
|
||||
# If that fails, try reading without header and specify column names
|
||||
df = pd.read_csv(file_path,
|
||||
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
|
||||
'db_rows_read', 'db_rows_written', 'db_total_rows',
|
||||
'cache_hits', 'cache_misses'],
|
||||
header=None,
|
||||
dtype={'timestamp': str},
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
|
||||
# Remove any rows where timestamp is 'timestamp'
|
||||
df = df[df['timestamp'] != 'timestamp']
|
||||
|
||||
# Convert timestamp to numeric
|
||||
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
|
||||
|
||||
# Validate required columns
|
||||
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
|
||||
for col in required_columns:
|
||||
if col not in df.columns:
|
||||
raise ValueError(f"Missing required column: {col}")
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error reading CSV: {e}")
|
||||
print("Please check the file format and ensure it matches the expected structure.")
|
||||
raise
|
||||
|
||||
def convert_timestamps(df):
|
||||
"""
|
||||
Convert timestamps to datetime.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: DataFrame with converted timestamps
|
||||
"""
|
||||
# Convert millisecond timestamps to datetime
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
|
||||
|
||||
return df
|
||||
|
||||
def analyze_latency_data(df):
|
||||
"""
|
||||
Calculate latency statistics.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
dict: Latency statistics
|
||||
"""
|
||||
# Calculate statistics
|
||||
stats = {
|
||||
'overall': {
|
||||
'avg': df['service_time'].mean(),
|
||||
'p50': df['service_time'].quantile(0.5),
|
||||
'p95': df['service_time'].quantile(0.95),
|
||||
'p99': df['service_time'].quantile(0.99),
|
||||
'max': df['service_time'].max()
|
||||
},
|
||||
'db': {
|
||||
'avg': df['db_time'].mean(),
|
||||
'p50': df['db_time'].quantile(0.5),
|
||||
'p95': df['db_time'].quantile(0.95),
|
||||
'p99': df['db_time'].quantile(0.99),
|
||||
'max': df['db_time'].max()
|
||||
},
|
||||
'cache': {
|
||||
'avg': df['cache_time'].mean(),
|
||||
'p50': df['cache_time'].quantile(0.5),
|
||||
'p95': df['cache_time'].quantile(0.95),
|
||||
'p99': df['cache_time'].quantile(0.99),
|
||||
'max': df['cache_time'].max()
|
||||
}
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def plot_latency_graph(df):
|
||||
"""
|
||||
Create a multi-axis time series plot for latencies.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): DataFrame with timestamp and time columns
|
||||
"""
|
||||
plt.figure(figsize=(15, 7))
|
||||
|
||||
# Plot overall service time
|
||||
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
|
||||
|
||||
# Create a twin axis for DB time
|
||||
ax1 = plt.gca()
|
||||
ax2 = ax1.twinx()
|
||||
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
|
||||
|
||||
# Create a third axis for cache time
|
||||
ax3 = ax1.twinx()
|
||||
# Offset the third axis slightly to the right
|
||||
ax3.spines['right'].set_position(('axes', 1.2))
|
||||
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
|
||||
|
||||
# Set labels and title
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
|
||||
ax2.set_ylabel('DB Time (ms)', color='red')
|
||||
ax3.set_ylabel('Cache Time (ms)', color='green')
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
plt.title('Latency Breakdown Over Time')
|
||||
|
||||
# Combine legends
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
lines2, labels2 = ax2.get_legend_handles_labels()
|
||||
lines3, labels3 = ax3.get_legend_handles_labels()
|
||||
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
def main(file_path='server_metrics.csv'):
|
||||
"""
|
||||
Main function to process and visualize server metrics.
|
||||
|
||||
Args:
|
||||
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
|
||||
"""
|
||||
try:
|
||||
# Read CSV file
|
||||
df = read_csv_with_fallback(file_path)
|
||||
|
||||
# Convert timestamps
|
||||
df = convert_timestamps(df)
|
||||
|
||||
# Sort by timestamp to ensure chronological order
|
||||
df = df.sort_values('timestamp')
|
||||
|
||||
# Analyze latency data
|
||||
stats = analyze_latency_data(df)
|
||||
|
||||
# Print statistics
|
||||
print("Latency Statistics:")
|
||||
for category, metrics in stats.items():
|
||||
print(f"\n{category.capitalize()} Latency:")
|
||||
for metric, value in metrics.items():
|
||||
print(f"{metric}: {value:.2f} ms")
|
||||
|
||||
# Plot the graph
|
||||
plot_latency_graph(df)
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,22 @@
|
|||
Latency Statistics:
|
||||
|
||||
Overall Latency:
|
||||
avg: 235.42 ms
|
||||
p50: 236.00 ms
|
||||
p95: 319.00 ms
|
||||
p99: 352.94 ms
|
||||
max: 445.00 ms
|
||||
|
||||
Db Latency:
|
||||
avg: 142.14 ms
|
||||
p50: 112.00 ms
|
||||
p95: 215.00 ms
|
||||
p99: 249.82 ms
|
||||
max: 309.00 ms
|
||||
|
||||
Cache Latency:
|
||||
avg: 4.52 ms
|
||||
p50: 0.00 ms
|
||||
p95: 24.00 ms
|
||||
p99: 45.00 ms
|
||||
max: 84.00 ms
|
After Width: | Height: | Size: 87 KiB |
|
@ -0,0 +1,102 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
|
||||
# Read the CSV file
|
||||
def read_csv_data(file_path):
|
||||
"""
|
||||
Read CSV file with request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
file_path : str
|
||||
Path to the CSV file
|
||||
|
||||
Returns:
|
||||
--------
|
||||
pandas.DataFrame
|
||||
DataFrame with parsed request data
|
||||
"""
|
||||
# Read the CSV file
|
||||
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
|
||||
|
||||
# Ensure data types are correct
|
||||
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
|
||||
df['Status Code'] = df['Status Code'].astype(int)
|
||||
|
||||
return df
|
||||
|
||||
# Visualize the data
|
||||
def visualize_request_data(df):
|
||||
"""
|
||||
Create a dual-axis plot of request durations and status codes
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
# Create the figure and the first axis
|
||||
fig, ax1 = plt.subplots(figsize=(12, 6))
|
||||
|
||||
# Plot Duration on the left axis
|
||||
color1 = 'blue'
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Duration (ms)', color=color1)
|
||||
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
|
||||
ax1.tick_params(axis='y', labelcolor=color1)
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
# Title and legend
|
||||
plt.title('Request Data over Time')
|
||||
|
||||
# Create a legend
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
|
||||
# Add grid
|
||||
ax1.grid(True, linestyle='--', alpha=0.7)
|
||||
|
||||
# Tight layout to prevent cutting off labels
|
||||
plt.tight_layout()
|
||||
|
||||
# Show the plot
|
||||
plt.show()
|
||||
|
||||
# Main execution
|
||||
def main():
|
||||
# Path to your CSV file
|
||||
file_path = 'client_metrics.csv'
|
||||
|
||||
try:
|
||||
# Read the data
|
||||
df = read_csv_data(file_path)
|
||||
|
||||
# Visualize the data
|
||||
visualize_request_data(df)
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
# Demonstrate data summary
|
||||
def print_data_summary(df):
|
||||
"""
|
||||
Print a summary of the request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
print("\nData Summary:")
|
||||
print("-" * 20)
|
||||
print(f"Total Requests: {len(df)}")
|
||||
print(f"Unique Request Types: {df['Request Type'].unique()}")
|
||||
|
||||
print("\nDuration Statistics:")
|
||||
print(df['Duration (ms)'].describe())
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
After Width: | Height: | Size: 460 KiB |
|
@ -0,0 +1,181 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def read_csv_with_fallback(file_path):
|
||||
"""
|
||||
Attempt to read CSV file with multiple parsing strategies.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the CSV file
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: Parsed DataFrame
|
||||
"""
|
||||
try:
|
||||
# First, try reading with header
|
||||
try:
|
||||
df = pd.read_csv(file_path,
|
||||
dtype={'timestamp': str}, # Ensure timestamp is read as string
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
except Exception:
|
||||
# If that fails, try reading without header and specify column names
|
||||
df = pd.read_csv(file_path,
|
||||
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
|
||||
'db_rows_read', 'db_rows_written', 'db_total_rows',
|
||||
'cache_hits', 'cache_misses'],
|
||||
header=None,
|
||||
dtype={'timestamp': str},
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
|
||||
# Remove any rows where timestamp is 'timestamp'
|
||||
df = df[df['timestamp'] != 'timestamp']
|
||||
|
||||
# Convert timestamp to numeric
|
||||
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
|
||||
|
||||
# Validate required columns
|
||||
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
|
||||
for col in required_columns:
|
||||
if col not in df.columns:
|
||||
raise ValueError(f"Missing required column: {col}")
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error reading CSV: {e}")
|
||||
print("Please check the file format and ensure it matches the expected structure.")
|
||||
raise
|
||||
|
||||
def convert_timestamps(df):
|
||||
"""
|
||||
Convert timestamps to datetime.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: DataFrame with converted timestamps
|
||||
"""
|
||||
# Convert millisecond timestamps to datetime
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
|
||||
|
||||
return df
|
||||
|
||||
def analyze_latency_data(df):
|
||||
"""
|
||||
Calculate latency statistics.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
dict: Latency statistics
|
||||
"""
|
||||
# Calculate statistics
|
||||
stats = {
|
||||
'overall': {
|
||||
'avg': df['service_time'].mean(),
|
||||
'p50': df['service_time'].quantile(0.5),
|
||||
'p95': df['service_time'].quantile(0.95),
|
||||
'p99': df['service_time'].quantile(0.99),
|
||||
'max': df['service_time'].max()
|
||||
},
|
||||
'db': {
|
||||
'avg': df['db_time'].mean(),
|
||||
'p50': df['db_time'].quantile(0.5),
|
||||
'p95': df['db_time'].quantile(0.95),
|
||||
'p99': df['db_time'].quantile(0.99),
|
||||
'max': df['db_time'].max()
|
||||
},
|
||||
'cache': {
|
||||
'avg': df['cache_time'].mean(),
|
||||
'p50': df['cache_time'].quantile(0.5),
|
||||
'p95': df['cache_time'].quantile(0.95),
|
||||
'p99': df['cache_time'].quantile(0.99),
|
||||
'max': df['cache_time'].max()
|
||||
}
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def plot_latency_graph(df):
|
||||
"""
|
||||
Create a multi-axis time series plot for latencies.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): DataFrame with timestamp and time columns
|
||||
"""
|
||||
plt.figure(figsize=(15, 7))
|
||||
|
||||
# Plot overall service time
|
||||
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
|
||||
|
||||
# Create a twin axis for DB time
|
||||
ax1 = plt.gca()
|
||||
ax2 = ax1.twinx()
|
||||
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
|
||||
|
||||
# Create a third axis for cache time
|
||||
ax3 = ax1.twinx()
|
||||
# Offset the third axis slightly to the right
|
||||
ax3.spines['right'].set_position(('axes', 1.2))
|
||||
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
|
||||
|
||||
# Set labels and title
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
|
||||
ax2.set_ylabel('DB Time (ms)', color='red')
|
||||
ax3.set_ylabel('Cache Time (ms)', color='green')
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
plt.title('Latency Breakdown Over Time')
|
||||
|
||||
# Combine legends
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
lines2, labels2 = ax2.get_legend_handles_labels()
|
||||
lines3, labels3 = ax3.get_legend_handles_labels()
|
||||
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
def main(file_path='server_metrics.csv'):
|
||||
"""
|
||||
Main function to process and visualize server metrics.
|
||||
|
||||
Args:
|
||||
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
|
||||
"""
|
||||
try:
|
||||
# Read CSV file
|
||||
df = read_csv_with_fallback(file_path)
|
||||
|
||||
# Convert timestamps
|
||||
df = convert_timestamps(df)
|
||||
|
||||
# Sort by timestamp to ensure chronological order
|
||||
df = df.sort_values('timestamp')
|
||||
|
||||
# Analyze latency data
|
||||
stats = analyze_latency_data(df)
|
||||
|
||||
# Print statistics
|
||||
print("Latency Statistics:")
|
||||
for category, metrics in stats.items():
|
||||
print(f"\n{category.capitalize()} Latency:")
|
||||
for metric, value in metrics.items():
|
||||
print(f"{metric}: {value:.2f} ms")
|
||||
|
||||
# Plot the graph
|
||||
plot_latency_graph(df)
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,22 @@
|
|||
Latency Statistics:
|
||||
|
||||
Overall Latency:
|
||||
avg: 387.27 ms
|
||||
p50: 481.00 ms
|
||||
p95: 758.00 ms
|
||||
p99: 915.60 ms
|
||||
max: 1259.00 ms
|
||||
|
||||
Db Latency:
|
||||
avg: 216.24 ms
|
||||
p50: 241.00 ms
|
||||
p95: 499.00 ms
|
||||
p99: 675.00 ms
|
||||
max: 1023.00 ms
|
||||
|
||||
Cache Latency:
|
||||
avg: 13.85 ms
|
||||
p50: 1.00 ms
|
||||
p95: 77.00 ms
|
||||
p99: 132.80 ms
|
||||
max: 249.00 ms
|
After Width: | Height: | Size: 97 KiB |
|
@ -0,0 +1,102 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
|
||||
# Read the CSV file
|
||||
def read_csv_data(file_path):
|
||||
"""
|
||||
Read CSV file with request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
file_path : str
|
||||
Path to the CSV file
|
||||
|
||||
Returns:
|
||||
--------
|
||||
pandas.DataFrame
|
||||
DataFrame with parsed request data
|
||||
"""
|
||||
# Read the CSV file
|
||||
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
|
||||
|
||||
# Ensure data types are correct
|
||||
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
|
||||
df['Status Code'] = df['Status Code'].astype(int)
|
||||
|
||||
return df
|
||||
|
||||
# Visualize the data
|
||||
def visualize_request_data(df):
|
||||
"""
|
||||
Create a dual-axis plot of request durations and status codes
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
# Create the figure and the first axis
|
||||
fig, ax1 = plt.subplots(figsize=(12, 6))
|
||||
|
||||
# Plot Duration on the left axis
|
||||
color1 = 'blue'
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Duration (ms)', color=color1)
|
||||
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
|
||||
ax1.tick_params(axis='y', labelcolor=color1)
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
# Title and legend
|
||||
plt.title('Request Data over Time')
|
||||
|
||||
# Create a legend
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
|
||||
# Add grid
|
||||
ax1.grid(True, linestyle='--', alpha=0.7)
|
||||
|
||||
# Tight layout to prevent cutting off labels
|
||||
plt.tight_layout()
|
||||
|
||||
# Show the plot
|
||||
plt.show()
|
||||
|
||||
# Main execution
|
||||
def main():
|
||||
# Path to your CSV file
|
||||
file_path = 'client_metrics.csv'
|
||||
|
||||
try:
|
||||
# Read the data
|
||||
df = read_csv_data(file_path)
|
||||
|
||||
# Visualize the data
|
||||
visualize_request_data(df)
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
# Demonstrate data summary
|
||||
def print_data_summary(df):
|
||||
"""
|
||||
Print a summary of the request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
print("\nData Summary:")
|
||||
print("-" * 20)
|
||||
print(f"Total Requests: {len(df)}")
|
||||
print(f"Unique Request Types: {df['Request Type'].unique()}")
|
||||
|
||||
print("\nDuration Statistics:")
|
||||
print(df['Duration (ms)'].describe())
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
After Width: | Height: | Size: 250 KiB |
|
@ -0,0 +1,181 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def read_csv_with_fallback(file_path):
|
||||
"""
|
||||
Attempt to read CSV file with multiple parsing strategies.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the CSV file
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: Parsed DataFrame
|
||||
"""
|
||||
try:
|
||||
# First, try reading with header
|
||||
try:
|
||||
df = pd.read_csv(file_path,
|
||||
dtype={'timestamp': str}, # Ensure timestamp is read as string
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
except Exception:
|
||||
# If that fails, try reading without header and specify column names
|
||||
df = pd.read_csv(file_path,
|
||||
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
|
||||
'db_rows_read', 'db_rows_written', 'db_total_rows',
|
||||
'cache_hits', 'cache_misses'],
|
||||
header=None,
|
||||
dtype={'timestamp': str},
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
|
||||
# Remove any rows where timestamp is 'timestamp'
|
||||
df = df[df['timestamp'] != 'timestamp']
|
||||
|
||||
# Convert timestamp to numeric
|
||||
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
|
||||
|
||||
# Validate required columns
|
||||
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
|
||||
for col in required_columns:
|
||||
if col not in df.columns:
|
||||
raise ValueError(f"Missing required column: {col}")
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error reading CSV: {e}")
|
||||
print("Please check the file format and ensure it matches the expected structure.")
|
||||
raise
|
||||
|
||||
def convert_timestamps(df):
|
||||
"""
|
||||
Convert timestamps to datetime.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: DataFrame with converted timestamps
|
||||
"""
|
||||
# Convert millisecond timestamps to datetime
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
|
||||
|
||||
return df
|
||||
|
||||
def analyze_latency_data(df):
|
||||
"""
|
||||
Calculate latency statistics.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
dict: Latency statistics
|
||||
"""
|
||||
# Calculate statistics
|
||||
stats = {
|
||||
'overall': {
|
||||
'avg': df['service_time'].mean(),
|
||||
'p50': df['service_time'].quantile(0.5),
|
||||
'p95': df['service_time'].quantile(0.95),
|
||||
'p99': df['service_time'].quantile(0.99),
|
||||
'max': df['service_time'].max()
|
||||
},
|
||||
'db': {
|
||||
'avg': df['db_time'].mean(),
|
||||
'p50': df['db_time'].quantile(0.5),
|
||||
'p95': df['db_time'].quantile(0.95),
|
||||
'p99': df['db_time'].quantile(0.99),
|
||||
'max': df['db_time'].max()
|
||||
},
|
||||
'cache': {
|
||||
'avg': df['cache_time'].mean(),
|
||||
'p50': df['cache_time'].quantile(0.5),
|
||||
'p95': df['cache_time'].quantile(0.95),
|
||||
'p99': df['cache_time'].quantile(0.99),
|
||||
'max': df['cache_time'].max()
|
||||
}
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def plot_latency_graph(df):
|
||||
"""
|
||||
Create a multi-axis time series plot for latencies.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): DataFrame with timestamp and time columns
|
||||
"""
|
||||
plt.figure(figsize=(15, 7))
|
||||
|
||||
# Plot overall service time
|
||||
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
|
||||
|
||||
# Create a twin axis for DB time
|
||||
ax1 = plt.gca()
|
||||
ax2 = ax1.twinx()
|
||||
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
|
||||
|
||||
# Create a third axis for cache time
|
||||
ax3 = ax1.twinx()
|
||||
# Offset the third axis slightly to the right
|
||||
ax3.spines['right'].set_position(('axes', 1.2))
|
||||
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
|
||||
|
||||
# Set labels and title
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
|
||||
ax2.set_ylabel('DB Time (ms)', color='red')
|
||||
ax3.set_ylabel('Cache Time (ms)', color='green')
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
plt.title('Latency Breakdown Over Time')
|
||||
|
||||
# Combine legends
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
lines2, labels2 = ax2.get_legend_handles_labels()
|
||||
lines3, labels3 = ax3.get_legend_handles_labels()
|
||||
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
def main(file_path='server_metrics.csv'):
|
||||
"""
|
||||
Main function to process and visualize server metrics.
|
||||
|
||||
Args:
|
||||
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
|
||||
"""
|
||||
try:
|
||||
# Read CSV file
|
||||
df = read_csv_with_fallback(file_path)
|
||||
|
||||
# Convert timestamps
|
||||
df = convert_timestamps(df)
|
||||
|
||||
# Sort by timestamp to ensure chronological order
|
||||
df = df.sort_values('timestamp')
|
||||
|
||||
# Analyze latency data
|
||||
stats = analyze_latency_data(df)
|
||||
|
||||
# Print statistics
|
||||
print("Latency Statistics:")
|
||||
for category, metrics in stats.items():
|
||||
print(f"\n{category.capitalize()} Latency:")
|
||||
for metric, value in metrics.items():
|
||||
print(f"{metric}: {value:.2f} ms")
|
||||
|
||||
# Plot the graph
|
||||
plot_latency_graph(df)
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,22 @@
|
|||
Latency Statistics:
|
||||
|
||||
Overall Latency:
|
||||
avg: 178.84 ms
|
||||
p50: 108.00 ms
|
||||
p95: 531.00 ms
|
||||
p99: 605.00 ms
|
||||
max: 800.00 ms
|
||||
|
||||
Db Latency:
|
||||
avg: 116.50 ms
|
||||
p50: 4.00 ms
|
||||
p95: 426.00 ms
|
||||
p99: 500.89 ms
|
||||
max: 696.00 ms
|
||||
|
||||
Cache Latency:
|
||||
avg: 9.48 ms
|
||||
p50: 1.00 ms
|
||||
p95: 49.00 ms
|
||||
p99: 56.00 ms
|
||||
max: 272.00 ms
|
BIN
results/3 - Scale All The Things/Client - Singapore/Figure_1.png
Normal file
After Width: | Height: | Size: 342 KiB |
After Width: | Height: | Size: 69 KiB |
|
@ -0,0 +1,102 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
|
||||
# Read the CSV file
|
||||
def read_csv_data(file_path):
|
||||
"""
|
||||
Read CSV file with request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
file_path : str
|
||||
Path to the CSV file
|
||||
|
||||
Returns:
|
||||
--------
|
||||
pandas.DataFrame
|
||||
DataFrame with parsed request data
|
||||
"""
|
||||
# Read the CSV file
|
||||
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
|
||||
|
||||
# Ensure data types are correct
|
||||
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
|
||||
df['Status Code'] = df['Status Code'].astype(int)
|
||||
|
||||
return df
|
||||
|
||||
# Visualize the data
|
||||
def visualize_request_data(df):
|
||||
"""
|
||||
Create a dual-axis plot of request durations and status codes
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
# Create the figure and the first axis
|
||||
fig, ax1 = plt.subplots(figsize=(12, 6))
|
||||
|
||||
# Plot Duration on the left axis
|
||||
color1 = 'blue'
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Duration (ms)', color=color1)
|
||||
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
|
||||
ax1.tick_params(axis='y', labelcolor=color1)
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
# Title and legend
|
||||
plt.title('Request Data over Time')
|
||||
|
||||
# Create a legend
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
|
||||
# Add grid
|
||||
ax1.grid(True, linestyle='--', alpha=0.7)
|
||||
|
||||
# Tight layout to prevent cutting off labels
|
||||
plt.tight_layout()
|
||||
|
||||
# Show the plot
|
||||
plt.show()
|
||||
|
||||
# Main execution
|
||||
def main():
|
||||
# Path to your CSV file
|
||||
file_path = 'client_metrics.csv'
|
||||
|
||||
try:
|
||||
# Read the data
|
||||
df = read_csv_data(file_path)
|
||||
|
||||
# Visualize the data
|
||||
visualize_request_data(df)
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
# Demonstrate data summary
|
||||
def print_data_summary(df):
|
||||
"""
|
||||
Print a summary of the request data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
df : pandas.DataFrame
|
||||
DataFrame containing request data
|
||||
"""
|
||||
print("\nData Summary:")
|
||||
print("-" * 20)
|
||||
print(f"Total Requests: {len(df)}")
|
||||
print(f"Unique Request Types: {df['Request Type'].unique()}")
|
||||
|
||||
print("\nDuration Statistics:")
|
||||
print(df['Duration (ms)'].describe())
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,181 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def read_csv_with_fallback(file_path):
|
||||
"""
|
||||
Attempt to read CSV file with multiple parsing strategies.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the CSV file
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: Parsed DataFrame
|
||||
"""
|
||||
try:
|
||||
# First, try reading with header
|
||||
try:
|
||||
df = pd.read_csv(file_path,
|
||||
dtype={'timestamp': str}, # Ensure timestamp is read as string
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
except Exception:
|
||||
# If that fails, try reading without header and specify column names
|
||||
df = pd.read_csv(file_path,
|
||||
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
|
||||
'db_rows_read', 'db_rows_written', 'db_total_rows',
|
||||
'cache_hits', 'cache_misses'],
|
||||
header=None,
|
||||
dtype={'timestamp': str},
|
||||
skipinitialspace=True,
|
||||
skip_blank_lines=True)
|
||||
|
||||
# Remove any rows where timestamp is 'timestamp'
|
||||
df = df[df['timestamp'] != 'timestamp']
|
||||
|
||||
# Convert timestamp to numeric
|
||||
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
|
||||
|
||||
# Validate required columns
|
||||
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
|
||||
for col in required_columns:
|
||||
if col not in df.columns:
|
||||
raise ValueError(f"Missing required column: {col}")
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error reading CSV: {e}")
|
||||
print("Please check the file format and ensure it matches the expected structure.")
|
||||
raise
|
||||
|
||||
def convert_timestamps(df):
|
||||
"""
|
||||
Convert timestamps to datetime.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: DataFrame with converted timestamps
|
||||
"""
|
||||
# Convert millisecond timestamps to datetime
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
|
||||
|
||||
return df
|
||||
|
||||
def analyze_latency_data(df):
|
||||
"""
|
||||
Calculate latency statistics.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): Input DataFrame
|
||||
|
||||
Returns:
|
||||
dict: Latency statistics
|
||||
"""
|
||||
# Calculate statistics
|
||||
stats = {
|
||||
'overall': {
|
||||
'avg': df['service_time'].mean(),
|
||||
'p50': df['service_time'].quantile(0.5),
|
||||
'p95': df['service_time'].quantile(0.95),
|
||||
'p99': df['service_time'].quantile(0.99),
|
||||
'max': df['service_time'].max()
|
||||
},
|
||||
'db': {
|
||||
'avg': df['db_time'].mean(),
|
||||
'p50': df['db_time'].quantile(0.5),
|
||||
'p95': df['db_time'].quantile(0.95),
|
||||
'p99': df['db_time'].quantile(0.99),
|
||||
'max': df['db_time'].max()
|
||||
},
|
||||
'cache': {
|
||||
'avg': df['cache_time'].mean(),
|
||||
'p50': df['cache_time'].quantile(0.5),
|
||||
'p95': df['cache_time'].quantile(0.95),
|
||||
'p99': df['cache_time'].quantile(0.99),
|
||||
'max': df['cache_time'].max()
|
||||
}
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def plot_latency_graph(df):
|
||||
"""
|
||||
Create a multi-axis time series plot for latencies.
|
||||
|
||||
Args:
|
||||
df (pandas.DataFrame): DataFrame with timestamp and time columns
|
||||
"""
|
||||
plt.figure(figsize=(15, 7))
|
||||
|
||||
# Plot overall service time
|
||||
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
|
||||
|
||||
# Create a twin axis for DB time
|
||||
ax1 = plt.gca()
|
||||
ax2 = ax1.twinx()
|
||||
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
|
||||
|
||||
# Create a third axis for cache time
|
||||
ax3 = ax1.twinx()
|
||||
# Offset the third axis slightly to the right
|
||||
ax3.spines['right'].set_position(('axes', 1.2))
|
||||
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
|
||||
|
||||
# Set labels and title
|
||||
ax1.set_xlabel('Timestamp')
|
||||
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
|
||||
ax2.set_ylabel('DB Time (ms)', color='red')
|
||||
ax3.set_ylabel('Cache Time (ms)', color='green')
|
||||
|
||||
# Format x-axis to show timestamps nicely
|
||||
plt.gcf().autofmt_xdate()
|
||||
plt.title('Latency Breakdown Over Time')
|
||||
|
||||
# Combine legends
|
||||
lines1, labels1 = ax1.get_legend_handles_labels()
|
||||
lines2, labels2 = ax2.get_legend_handles_labels()
|
||||
lines3, labels3 = ax3.get_legend_handles_labels()
|
||||
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
def main(file_path='server_metrics.csv'):
|
||||
"""
|
||||
Main function to process and visualize server metrics.
|
||||
|
||||
Args:
|
||||
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
|
||||
"""
|
||||
try:
|
||||
# Read CSV file
|
||||
df = read_csv_with_fallback(file_path)
|
||||
|
||||
# Convert timestamps
|
||||
df = convert_timestamps(df)
|
||||
|
||||
# Sort by timestamp to ensure chronological order
|
||||
df = df.sort_values('timestamp')
|
||||
|
||||
# Analyze latency data
|
||||
stats = analyze_latency_data(df)
|
||||
|
||||
# Print statistics
|
||||
print("Latency Statistics:")
|
||||
for category, metrics in stats.items():
|
||||
print(f"\n{category.capitalize()} Latency:")
|
||||
for metric, value in metrics.items():
|
||||
print(f"{metric}: {value:.2f} ms")
|
||||
|
||||
# Plot the graph
|
||||
plot_latency_graph(df)
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,22 @@
|
|||
Latency Statistics:
|
||||
|
||||
Overall Latency:
|
||||
avg: 506.30 ms
|
||||
p50: 245.00 ms
|
||||
p95: 1231.00 ms
|
||||
p99: 1479.34 ms
|
||||
max: 2964.00 ms
|
||||
|
||||
Db Latency:
|
||||
avg: 322.60 ms
|
||||
p50: 7.00 ms
|
||||
p95: 982.35 ms
|
||||
p99: 1239.68 ms
|
||||
max: 2711.00 ms
|
||||
|
||||
Cache Latency:
|
||||
avg: 10.99 ms
|
||||
p50: 1.00 ms
|
||||
p95: 65.00 ms
|
||||
p99: 141.00 ms
|
||||
max: 256.00 ms
|