1
0
Fork 0

Updated metrics

This commit is contained in:
Atridad Lahiji 2024-12-11 15:08:53 -07:00
parent 55eb32a8e3
commit 0c05c5e472
45 changed files with 2741 additions and 0 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 101 KiB

View file

@ -0,0 +1,102 @@
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Read the CSV file
def read_csv_data(file_path):
"""
Read CSV file with request data
Parameters:
-----------
file_path : str
Path to the CSV file
Returns:
--------
pandas.DataFrame
DataFrame with parsed request data
"""
# Read the CSV file
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
# Ensure data types are correct
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
df['Status Code'] = df['Status Code'].astype(int)
return df
# Visualize the data
def visualize_request_data(df):
"""
Create a dual-axis plot of request durations and status codes
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
# Create the figure and the first axis
fig, ax1 = plt.subplots(figsize=(12, 6))
# Plot Duration on the left axis
color1 = 'blue'
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Duration (ms)', color=color1)
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
ax1.tick_params(axis='y', labelcolor=color1)
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
# Title and legend
plt.title('Request Data over Time')
# Create a legend
lines1, labels1 = ax1.get_legend_handles_labels()
# Add grid
ax1.grid(True, linestyle='--', alpha=0.7)
# Tight layout to prevent cutting off labels
plt.tight_layout()
# Show the plot
plt.show()
# Main execution
def main():
# Path to your CSV file
file_path = 'client_metrics.csv'
try:
# Read the data
df = read_csv_data(file_path)
# Visualize the data
visualize_request_data(df)
except Exception as e:
print(f"An error occurred: {e}")
# Demonstrate data summary
def print_data_summary(df):
"""
Print a summary of the request data
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
print("\nData Summary:")
print("-" * 20)
print(f"Total Requests: {len(df)}")
print(f"Unique Request Types: {df['Request Type'].unique()}")
print("\nDuration Statistics:")
print(df['Duration (ms)'].describe())
if __name__ == '__main__':
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 218 KiB

View file

@ -0,0 +1,181 @@
import pandas as pd
import matplotlib.pyplot as plt
def read_csv_with_fallback(file_path):
"""
Attempt to read CSV file with multiple parsing strategies.
Args:
file_path (str): Path to the CSV file
Returns:
pandas.DataFrame: Parsed DataFrame
"""
try:
# First, try reading with header
try:
df = pd.read_csv(file_path,
dtype={'timestamp': str}, # Ensure timestamp is read as string
skipinitialspace=True,
skip_blank_lines=True)
except Exception:
# If that fails, try reading without header and specify column names
df = pd.read_csv(file_path,
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
'db_rows_read', 'db_rows_written', 'db_total_rows',
'cache_hits', 'cache_misses'],
header=None,
dtype={'timestamp': str},
skipinitialspace=True,
skip_blank_lines=True)
# Remove any rows where timestamp is 'timestamp'
df = df[df['timestamp'] != 'timestamp']
# Convert timestamp to numeric
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
# Validate required columns
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
for col in required_columns:
if col not in df.columns:
raise ValueError(f"Missing required column: {col}")
return df
except Exception as e:
print(f"Error reading CSV: {e}")
print("Please check the file format and ensure it matches the expected structure.")
raise
def convert_timestamps(df):
"""
Convert timestamps to datetime.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
pandas.DataFrame: DataFrame with converted timestamps
"""
# Convert millisecond timestamps to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
return df
def analyze_latency_data(df):
"""
Calculate latency statistics.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
dict: Latency statistics
"""
# Calculate statistics
stats = {
'overall': {
'avg': df['service_time'].mean(),
'p50': df['service_time'].quantile(0.5),
'p95': df['service_time'].quantile(0.95),
'p99': df['service_time'].quantile(0.99),
'max': df['service_time'].max()
},
'db': {
'avg': df['db_time'].mean(),
'p50': df['db_time'].quantile(0.5),
'p95': df['db_time'].quantile(0.95),
'p99': df['db_time'].quantile(0.99),
'max': df['db_time'].max()
},
'cache': {
'avg': df['cache_time'].mean(),
'p50': df['cache_time'].quantile(0.5),
'p95': df['cache_time'].quantile(0.95),
'p99': df['cache_time'].quantile(0.99),
'max': df['cache_time'].max()
}
}
return stats
def plot_latency_graph(df):
"""
Create a multi-axis time series plot for latencies.
Args:
df (pandas.DataFrame): DataFrame with timestamp and time columns
"""
plt.figure(figsize=(15, 7))
# Plot overall service time
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
# Create a twin axis for DB time
ax1 = plt.gca()
ax2 = ax1.twinx()
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
# Create a third axis for cache time
ax3 = ax1.twinx()
# Offset the third axis slightly to the right
ax3.spines['right'].set_position(('axes', 1.2))
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
# Set labels and title
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
ax2.set_ylabel('DB Time (ms)', color='red')
ax3.set_ylabel('Cache Time (ms)', color='green')
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
plt.title('Latency Breakdown Over Time')
# Combine legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
lines3, labels3 = ax3.get_legend_handles_labels()
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
plt.tight_layout()
plt.show()
def main(file_path='server_metrics.csv'):
"""
Main function to process and visualize server metrics.
Args:
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
"""
try:
# Read CSV file
df = read_csv_with_fallback(file_path)
# Convert timestamps
df = convert_timestamps(df)
# Sort by timestamp to ensure chronological order
df = df.sort_values('timestamp')
# Analyze latency data
stats = analyze_latency_data(df)
# Print statistics
print("Latency Statistics:")
for category, metrics in stats.items():
print(f"\n{category.capitalize()} Latency:")
for metric, value in metrics.items():
print(f"{metric}: {value:.2f} ms")
# Plot the graph
plot_latency_graph(df)
except FileNotFoundError:
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,20 @@
Overall Latency:
avg: 19.67 ms
p50: 15.00 ms
p95: 72.60 ms
p99: 100.00 ms
max: 274.00 ms
Db Latency:
avg: 7.15 ms
p50: 0.00 ms
p95: 39.00 ms
p99: 78.00 ms
max: 269.00 ms
Cache Latency:
avg: 9.21 ms
p50: 2.00 ms
p95: 33.00 ms
p99: 42.92 ms
max: 197.00 ms

Binary file not shown.

After

Width:  |  Height:  |  Size: 97 KiB

View file

@ -0,0 +1,102 @@
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Read the CSV file
def read_csv_data(file_path):
"""
Read CSV file with request data
Parameters:
-----------
file_path : str
Path to the CSV file
Returns:
--------
pandas.DataFrame
DataFrame with parsed request data
"""
# Read the CSV file
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
# Ensure data types are correct
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
df['Status Code'] = df['Status Code'].astype(int)
return df
# Visualize the data
def visualize_request_data(df):
"""
Create a dual-axis plot of request durations and status codes
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
# Create the figure and the first axis
fig, ax1 = plt.subplots(figsize=(12, 6))
# Plot Duration on the left axis
color1 = 'blue'
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Duration (ms)', color=color1)
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
ax1.tick_params(axis='y', labelcolor=color1)
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
# Title and legend
plt.title('Request Data over Time')
# Create a legend
lines1, labels1 = ax1.get_legend_handles_labels()
# Add grid
ax1.grid(True, linestyle='--', alpha=0.7)
# Tight layout to prevent cutting off labels
plt.tight_layout()
# Show the plot
plt.show()
# Main execution
def main():
# Path to your CSV file
file_path = 'client_metrics.csv'
try:
# Read the data
df = read_csv_data(file_path)
# Visualize the data
visualize_request_data(df)
except Exception as e:
print(f"An error occurred: {e}")
# Demonstrate data summary
def print_data_summary(df):
"""
Print a summary of the request data
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
print("\nData Summary:")
print("-" * 20)
print(f"Total Requests: {len(df)}")
print(f"Unique Request Types: {df['Request Type'].unique()}")
print("\nDuration Statistics:")
print(df['Duration (ms)'].describe())
if __name__ == '__main__':
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 255 KiB

View file

@ -0,0 +1,181 @@
import pandas as pd
import matplotlib.pyplot as plt
def read_csv_with_fallback(file_path):
"""
Attempt to read CSV file with multiple parsing strategies.
Args:
file_path (str): Path to the CSV file
Returns:
pandas.DataFrame: Parsed DataFrame
"""
try:
# First, try reading with header
try:
df = pd.read_csv(file_path,
dtype={'timestamp': str}, # Ensure timestamp is read as string
skipinitialspace=True,
skip_blank_lines=True)
except Exception:
# If that fails, try reading without header and specify column names
df = pd.read_csv(file_path,
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
'db_rows_read', 'db_rows_written', 'db_total_rows',
'cache_hits', 'cache_misses'],
header=None,
dtype={'timestamp': str},
skipinitialspace=True,
skip_blank_lines=True)
# Remove any rows where timestamp is 'timestamp'
df = df[df['timestamp'] != 'timestamp']
# Convert timestamp to numeric
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
# Validate required columns
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
for col in required_columns:
if col not in df.columns:
raise ValueError(f"Missing required column: {col}")
return df
except Exception as e:
print(f"Error reading CSV: {e}")
print("Please check the file format and ensure it matches the expected structure.")
raise
def convert_timestamps(df):
"""
Convert timestamps to datetime.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
pandas.DataFrame: DataFrame with converted timestamps
"""
# Convert millisecond timestamps to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
return df
def analyze_latency_data(df):
"""
Calculate latency statistics.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
dict: Latency statistics
"""
# Calculate statistics
stats = {
'overall': {
'avg': df['service_time'].mean(),
'p50': df['service_time'].quantile(0.5),
'p95': df['service_time'].quantile(0.95),
'p99': df['service_time'].quantile(0.99),
'max': df['service_time'].max()
},
'db': {
'avg': df['db_time'].mean(),
'p50': df['db_time'].quantile(0.5),
'p95': df['db_time'].quantile(0.95),
'p99': df['db_time'].quantile(0.99),
'max': df['db_time'].max()
},
'cache': {
'avg': df['cache_time'].mean(),
'p50': df['cache_time'].quantile(0.5),
'p95': df['cache_time'].quantile(0.95),
'p99': df['cache_time'].quantile(0.99),
'max': df['cache_time'].max()
}
}
return stats
def plot_latency_graph(df):
"""
Create a multi-axis time series plot for latencies.
Args:
df (pandas.DataFrame): DataFrame with timestamp and time columns
"""
plt.figure(figsize=(15, 7))
# Plot overall service time
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
# Create a twin axis for DB time
ax1 = plt.gca()
ax2 = ax1.twinx()
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
# Create a third axis for cache time
ax3 = ax1.twinx()
# Offset the third axis slightly to the right
ax3.spines['right'].set_position(('axes', 1.2))
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
# Set labels and title
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
ax2.set_ylabel('DB Time (ms)', color='red')
ax3.set_ylabel('Cache Time (ms)', color='green')
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
plt.title('Latency Breakdown Over Time')
# Combine legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
lines3, labels3 = ax3.get_legend_handles_labels()
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
plt.tight_layout()
plt.show()
def main(file_path='server_metrics.csv'):
"""
Main function to process and visualize server metrics.
Args:
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
"""
try:
# Read CSV file
df = read_csv_with_fallback(file_path)
# Convert timestamps
df = convert_timestamps(df)
# Sort by timestamp to ensure chronological order
df = df.sort_values('timestamp')
# Analyze latency data
stats = analyze_latency_data(df)
# Print statistics
print("Latency Statistics:")
for category, metrics in stats.items():
print(f"\n{category.capitalize()} Latency:")
for metric, value in metrics.items():
print(f"{metric}: {value:.2f} ms")
# Plot the graph
plot_latency_graph(df)
except FileNotFoundError:
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,20 @@
Overall Latency:
avg: 14.89 ms
p50: 9.00 ms
p95: 64.00 ms
p99: 100.00 ms
max: 295.00 ms
Db Latency:
avg: 6.92 ms
p50: 0.00 ms
p95: 38.00 ms
p99: 78.00 ms
max: 283.00 ms
Cache Latency:
avg: 5.32 ms
p50: 2.00 ms
p95: 27.00 ms
p99: 35.00 ms
max: 147.00 ms

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

View file

@ -0,0 +1,102 @@
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Read the CSV file
def read_csv_data(file_path):
"""
Read CSV file with request data
Parameters:
-----------
file_path : str
Path to the CSV file
Returns:
--------
pandas.DataFrame
DataFrame with parsed request data
"""
# Read the CSV file
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
# Ensure data types are correct
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
df['Status Code'] = df['Status Code'].astype(int)
return df
# Visualize the data
def visualize_request_data(df):
"""
Create a dual-axis plot of request durations and status codes
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
# Create the figure and the first axis
fig, ax1 = plt.subplots(figsize=(12, 6))
# Plot Duration on the left axis
color1 = 'blue'
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Duration (ms)', color=color1)
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
ax1.tick_params(axis='y', labelcolor=color1)
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
# Title and legend
plt.title('Request Data over Time')
# Create a legend
lines1, labels1 = ax1.get_legend_handles_labels()
# Add grid
ax1.grid(True, linestyle='--', alpha=0.7)
# Tight layout to prevent cutting off labels
plt.tight_layout()
# Show the plot
plt.show()
# Main execution
def main():
# Path to your CSV file
file_path = 'client_metrics.csv'
try:
# Read the data
df = read_csv_data(file_path)
# Visualize the data
visualize_request_data(df)
except Exception as e:
print(f"An error occurred: {e}")
# Demonstrate data summary
def print_data_summary(df):
"""
Print a summary of the request data
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
print("\nData Summary:")
print("-" * 20)
print(f"Total Requests: {len(df)}")
print(f"Unique Request Types: {df['Request Type'].unique()}")
print("\nDuration Statistics:")
print(df['Duration (ms)'].describe())
if __name__ == '__main__':
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 272 KiB

View file

@ -0,0 +1,181 @@
import pandas as pd
import matplotlib.pyplot as plt
def read_csv_with_fallback(file_path):
"""
Attempt to read CSV file with multiple parsing strategies.
Args:
file_path (str): Path to the CSV file
Returns:
pandas.DataFrame: Parsed DataFrame
"""
try:
# First, try reading with header
try:
df = pd.read_csv(file_path,
dtype={'timestamp': str}, # Ensure timestamp is read as string
skipinitialspace=True,
skip_blank_lines=True)
except Exception:
# If that fails, try reading without header and specify column names
df = pd.read_csv(file_path,
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
'db_rows_read', 'db_rows_written', 'db_total_rows',
'cache_hits', 'cache_misses'],
header=None,
dtype={'timestamp': str},
skipinitialspace=True,
skip_blank_lines=True)
# Remove any rows where timestamp is 'timestamp'
df = df[df['timestamp'] != 'timestamp']
# Convert timestamp to numeric
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
# Validate required columns
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
for col in required_columns:
if col not in df.columns:
raise ValueError(f"Missing required column: {col}")
return df
except Exception as e:
print(f"Error reading CSV: {e}")
print("Please check the file format and ensure it matches the expected structure.")
raise
def convert_timestamps(df):
"""
Convert timestamps to datetime.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
pandas.DataFrame: DataFrame with converted timestamps
"""
# Convert millisecond timestamps to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
return df
def analyze_latency_data(df):
"""
Calculate latency statistics.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
dict: Latency statistics
"""
# Calculate statistics
stats = {
'overall': {
'avg': df['service_time'].mean(),
'p50': df['service_time'].quantile(0.5),
'p95': df['service_time'].quantile(0.95),
'p99': df['service_time'].quantile(0.99),
'max': df['service_time'].max()
},
'db': {
'avg': df['db_time'].mean(),
'p50': df['db_time'].quantile(0.5),
'p95': df['db_time'].quantile(0.95),
'p99': df['db_time'].quantile(0.99),
'max': df['db_time'].max()
},
'cache': {
'avg': df['cache_time'].mean(),
'p50': df['cache_time'].quantile(0.5),
'p95': df['cache_time'].quantile(0.95),
'p99': df['cache_time'].quantile(0.99),
'max': df['cache_time'].max()
}
}
return stats
def plot_latency_graph(df):
"""
Create a multi-axis time series plot for latencies.
Args:
df (pandas.DataFrame): DataFrame with timestamp and time columns
"""
plt.figure(figsize=(15, 7))
# Plot overall service time
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
# Create a twin axis for DB time
ax1 = plt.gca()
ax2 = ax1.twinx()
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
# Create a third axis for cache time
ax3 = ax1.twinx()
# Offset the third axis slightly to the right
ax3.spines['right'].set_position(('axes', 1.2))
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
# Set labels and title
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
ax2.set_ylabel('DB Time (ms)', color='red')
ax3.set_ylabel('Cache Time (ms)', color='green')
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
plt.title('Latency Breakdown Over Time')
# Combine legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
lines3, labels3 = ax3.get_legend_handles_labels()
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
plt.tight_layout()
plt.show()
def main(file_path='server_metrics.csv'):
"""
Main function to process and visualize server metrics.
Args:
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
"""
try:
# Read CSV file
df = read_csv_with_fallback(file_path)
# Convert timestamps
df = convert_timestamps(df)
# Sort by timestamp to ensure chronological order
df = df.sort_values('timestamp')
# Analyze latency data
stats = analyze_latency_data(df)
# Print statistics
print("Latency Statistics:")
for category, metrics in stats.items():
print(f"\n{category.capitalize()} Latency:")
for metric, value in metrics.items():
print(f"{metric}: {value:.2f} ms")
# Plot the graph
plot_latency_graph(df)
except FileNotFoundError:
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,22 @@
Latency Statistics:
Overall Latency:
avg: 15.03 ms
p50: 7.00 ms
p95: 68.00 ms
p99: 101.00 ms
max: 277.00 ms
Db Latency:
avg: 7.35 ms
p50: 0.00 ms
p95: 38.00 ms
p99: 80.00 ms
max: 273.00 ms
Cache Latency:
avg: 5.01 ms
p50: 2.00 ms
p95: 24.00 ms
p99: 34.00 ms
max: 146.00 ms

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

View file

@ -0,0 +1,102 @@
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Read the CSV file
def read_csv_data(file_path):
"""
Read CSV file with request data
Parameters:
-----------
file_path : str
Path to the CSV file
Returns:
--------
pandas.DataFrame
DataFrame with parsed request data
"""
# Read the CSV file
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
# Ensure data types are correct
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
df['Status Code'] = df['Status Code'].astype(int)
return df
# Visualize the data
def visualize_request_data(df):
"""
Create a dual-axis plot of request durations and status codes
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
# Create the figure and the first axis
fig, ax1 = plt.subplots(figsize=(12, 6))
# Plot Duration on the left axis
color1 = 'blue'
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Duration (ms)', color=color1)
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
ax1.tick_params(axis='y', labelcolor=color1)
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
# Title and legend
plt.title('Request Data over Time')
# Create a legend
lines1, labels1 = ax1.get_legend_handles_labels()
# Add grid
ax1.grid(True, linestyle='--', alpha=0.7)
# Tight layout to prevent cutting off labels
plt.tight_layout()
# Show the plot
plt.show()
# Main execution
def main():
# Path to your CSV file
file_path = 'client_metrics.csv'
try:
# Read the data
df = read_csv_data(file_path)
# Visualize the data
visualize_request_data(df)
except Exception as e:
print(f"An error occurred: {e}")
# Demonstrate data summary
def print_data_summary(df):
"""
Print a summary of the request data
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
print("\nData Summary:")
print("-" * 20)
print(f"Total Requests: {len(df)}")
print(f"Unique Request Types: {df['Request Type'].unique()}")
print("\nDuration Statistics:")
print(df['Duration (ms)'].describe())
if __name__ == '__main__':
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 293 KiB

View file

@ -0,0 +1,181 @@
import pandas as pd
import matplotlib.pyplot as plt
def read_csv_with_fallback(file_path):
"""
Attempt to read CSV file with multiple parsing strategies.
Args:
file_path (str): Path to the CSV file
Returns:
pandas.DataFrame: Parsed DataFrame
"""
try:
# First, try reading with header
try:
df = pd.read_csv(file_path,
dtype={'timestamp': str}, # Ensure timestamp is read as string
skipinitialspace=True,
skip_blank_lines=True)
except Exception:
# If that fails, try reading without header and specify column names
df = pd.read_csv(file_path,
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
'db_rows_read', 'db_rows_written', 'db_total_rows',
'cache_hits', 'cache_misses'],
header=None,
dtype={'timestamp': str},
skipinitialspace=True,
skip_blank_lines=True)
# Remove any rows where timestamp is 'timestamp'
df = df[df['timestamp'] != 'timestamp']
# Convert timestamp to numeric
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
# Validate required columns
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
for col in required_columns:
if col not in df.columns:
raise ValueError(f"Missing required column: {col}")
return df
except Exception as e:
print(f"Error reading CSV: {e}")
print("Please check the file format and ensure it matches the expected structure.")
raise
def convert_timestamps(df):
"""
Convert timestamps to datetime.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
pandas.DataFrame: DataFrame with converted timestamps
"""
# Convert millisecond timestamps to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
return df
def analyze_latency_data(df):
"""
Calculate latency statistics.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
dict: Latency statistics
"""
# Calculate statistics
stats = {
'overall': {
'avg': df['service_time'].mean(),
'p50': df['service_time'].quantile(0.5),
'p95': df['service_time'].quantile(0.95),
'p99': df['service_time'].quantile(0.99),
'max': df['service_time'].max()
},
'db': {
'avg': df['db_time'].mean(),
'p50': df['db_time'].quantile(0.5),
'p95': df['db_time'].quantile(0.95),
'p99': df['db_time'].quantile(0.99),
'max': df['db_time'].max()
},
'cache': {
'avg': df['cache_time'].mean(),
'p50': df['cache_time'].quantile(0.5),
'p95': df['cache_time'].quantile(0.95),
'p99': df['cache_time'].quantile(0.99),
'max': df['cache_time'].max()
}
}
return stats
def plot_latency_graph(df):
"""
Create a multi-axis time series plot for latencies.
Args:
df (pandas.DataFrame): DataFrame with timestamp and time columns
"""
plt.figure(figsize=(15, 7))
# Plot overall service time
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
# Create a twin axis for DB time
ax1 = plt.gca()
ax2 = ax1.twinx()
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
# Create a third axis for cache time
ax3 = ax1.twinx()
# Offset the third axis slightly to the right
ax3.spines['right'].set_position(('axes', 1.2))
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
# Set labels and title
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
ax2.set_ylabel('DB Time (ms)', color='red')
ax3.set_ylabel('Cache Time (ms)', color='green')
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
plt.title('Latency Breakdown Over Time')
# Combine legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
lines3, labels3 = ax3.get_legend_handles_labels()
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
plt.tight_layout()
plt.show()
def main(file_path='server_metrics.csv'):
"""
Main function to process and visualize server metrics.
Args:
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
"""
try:
# Read CSV file
df = read_csv_with_fallback(file_path)
# Convert timestamps
df = convert_timestamps(df)
# Sort by timestamp to ensure chronological order
df = df.sort_values('timestamp')
# Analyze latency data
stats = analyze_latency_data(df)
# Print statistics
print("Latency Statistics:")
for category, metrics in stats.items():
print(f"\n{category.capitalize()} Latency:")
for metric, value in metrics.items():
print(f"{metric}: {value:.2f} ms")
# Plot the graph
plot_latency_graph(df)
except FileNotFoundError:
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,22 @@
Latency Statistics:
Overall Latency:
avg: 298.54 ms
p50: 312.00 ms
p95: 323.45 ms
p99: 382.00 ms
max: 442.00 ms
Db Latency:
avg: 182.40 ms
p50: 208.00 ms
p95: 216.00 ms
p99: 268.76 ms
max: 339.00 ms
Cache Latency:
avg: 21.11 ms
p50: 0.00 ms
p95: 126.00 ms
p99: 170.00 ms
max: 201.00 ms

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

View file

@ -0,0 +1,102 @@
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Read the CSV file
def read_csv_data(file_path):
"""
Read CSV file with request data
Parameters:
-----------
file_path : str
Path to the CSV file
Returns:
--------
pandas.DataFrame
DataFrame with parsed request data
"""
# Read the CSV file
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
# Ensure data types are correct
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
df['Status Code'] = df['Status Code'].astype(int)
return df
# Visualize the data
def visualize_request_data(df):
"""
Create a dual-axis plot of request durations and status codes
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
# Create the figure and the first axis
fig, ax1 = plt.subplots(figsize=(12, 6))
# Plot Duration on the left axis
color1 = 'blue'
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Duration (ms)', color=color1)
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
ax1.tick_params(axis='y', labelcolor=color1)
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
# Title and legend
plt.title('Request Data over Time')
# Create a legend
lines1, labels1 = ax1.get_legend_handles_labels()
# Add grid
ax1.grid(True, linestyle='--', alpha=0.7)
# Tight layout to prevent cutting off labels
plt.tight_layout()
# Show the plot
plt.show()
# Main execution
def main():
# Path to your CSV file
file_path = 'client_metrics.csv'
try:
# Read the data
df = read_csv_data(file_path)
# Visualize the data
visualize_request_data(df)
except Exception as e:
print(f"An error occurred: {e}")
# Demonstrate data summary
def print_data_summary(df):
"""
Print a summary of the request data
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
print("\nData Summary:")
print("-" * 20)
print(f"Total Requests: {len(df)}")
print(f"Unique Request Types: {df['Request Type'].unique()}")
print("\nDuration Statistics:")
print(df['Duration (ms)'].describe())
if __name__ == '__main__':
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 450 KiB

View file

@ -0,0 +1,181 @@
import pandas as pd
import matplotlib.pyplot as plt
def read_csv_with_fallback(file_path):
"""
Attempt to read CSV file with multiple parsing strategies.
Args:
file_path (str): Path to the CSV file
Returns:
pandas.DataFrame: Parsed DataFrame
"""
try:
# First, try reading with header
try:
df = pd.read_csv(file_path,
dtype={'timestamp': str}, # Ensure timestamp is read as string
skipinitialspace=True,
skip_blank_lines=True)
except Exception:
# If that fails, try reading without header and specify column names
df = pd.read_csv(file_path,
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
'db_rows_read', 'db_rows_written', 'db_total_rows',
'cache_hits', 'cache_misses'],
header=None,
dtype={'timestamp': str},
skipinitialspace=True,
skip_blank_lines=True)
# Remove any rows where timestamp is 'timestamp'
df = df[df['timestamp'] != 'timestamp']
# Convert timestamp to numeric
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
# Validate required columns
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
for col in required_columns:
if col not in df.columns:
raise ValueError(f"Missing required column: {col}")
return df
except Exception as e:
print(f"Error reading CSV: {e}")
print("Please check the file format and ensure it matches the expected structure.")
raise
def convert_timestamps(df):
"""
Convert timestamps to datetime.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
pandas.DataFrame: DataFrame with converted timestamps
"""
# Convert millisecond timestamps to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
return df
def analyze_latency_data(df):
"""
Calculate latency statistics.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
dict: Latency statistics
"""
# Calculate statistics
stats = {
'overall': {
'avg': df['service_time'].mean(),
'p50': df['service_time'].quantile(0.5),
'p95': df['service_time'].quantile(0.95),
'p99': df['service_time'].quantile(0.99),
'max': df['service_time'].max()
},
'db': {
'avg': df['db_time'].mean(),
'p50': df['db_time'].quantile(0.5),
'p95': df['db_time'].quantile(0.95),
'p99': df['db_time'].quantile(0.99),
'max': df['db_time'].max()
},
'cache': {
'avg': df['cache_time'].mean(),
'p50': df['cache_time'].quantile(0.5),
'p95': df['cache_time'].quantile(0.95),
'p99': df['cache_time'].quantile(0.99),
'max': df['cache_time'].max()
}
}
return stats
def plot_latency_graph(df):
"""
Create a multi-axis time series plot for latencies.
Args:
df (pandas.DataFrame): DataFrame with timestamp and time columns
"""
plt.figure(figsize=(15, 7))
# Plot overall service time
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
# Create a twin axis for DB time
ax1 = plt.gca()
ax2 = ax1.twinx()
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
# Create a third axis for cache time
ax3 = ax1.twinx()
# Offset the third axis slightly to the right
ax3.spines['right'].set_position(('axes', 1.2))
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
# Set labels and title
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
ax2.set_ylabel('DB Time (ms)', color='red')
ax3.set_ylabel('Cache Time (ms)', color='green')
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
plt.title('Latency Breakdown Over Time')
# Combine legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
lines3, labels3 = ax3.get_legend_handles_labels()
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
plt.tight_layout()
plt.show()
def main(file_path='server_metrics.csv'):
"""
Main function to process and visualize server metrics.
Args:
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
"""
try:
# Read CSV file
df = read_csv_with_fallback(file_path)
# Convert timestamps
df = convert_timestamps(df)
# Sort by timestamp to ensure chronological order
df = df.sort_values('timestamp')
# Analyze latency data
stats = analyze_latency_data(df)
# Print statistics
print("Latency Statistics:")
for category, metrics in stats.items():
print(f"\n{category.capitalize()} Latency:")
for metric, value in metrics.items():
print(f"{metric}: {value:.2f} ms")
# Plot the graph
plot_latency_graph(df)
except FileNotFoundError:
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,22 @@
Latency Statistics:
Overall Latency:
avg: 6068.11 ms
p50: 4711.00 ms
p95: 8681.00 ms
p99: 8707.00 ms
max: 8975.00 ms
Db Latency:
avg: 230.91 ms
p50: 238.00 ms
p95: 504.00 ms
p99: 668.74 ms
max: 985.00 ms
Cache Latency:
avg: 3104.81 ms
p50: 4191.00 ms
p95: 4233.00 ms
p99: 4258.87 ms
max: 4664.00 ms

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

View file

@ -0,0 +1,102 @@
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Read the CSV file
def read_csv_data(file_path):
"""
Read CSV file with request data
Parameters:
-----------
file_path : str
Path to the CSV file
Returns:
--------
pandas.DataFrame
DataFrame with parsed request data
"""
# Read the CSV file
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
# Ensure data types are correct
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
df['Status Code'] = df['Status Code'].astype(int)
return df
# Visualize the data
def visualize_request_data(df):
"""
Create a dual-axis plot of request durations and status codes
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
# Create the figure and the first axis
fig, ax1 = plt.subplots(figsize=(12, 6))
# Plot Duration on the left axis
color1 = 'blue'
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Duration (ms)', color=color1)
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
ax1.tick_params(axis='y', labelcolor=color1)
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
# Title and legend
plt.title('Request Data over Time')
# Create a legend
lines1, labels1 = ax1.get_legend_handles_labels()
# Add grid
ax1.grid(True, linestyle='--', alpha=0.7)
# Tight layout to prevent cutting off labels
plt.tight_layout()
# Show the plot
plt.show()
# Main execution
def main():
# Path to your CSV file
file_path = 'client_metrics.csv'
try:
# Read the data
df = read_csv_data(file_path)
# Visualize the data
visualize_request_data(df)
except Exception as e:
print(f"An error occurred: {e}")
# Demonstrate data summary
def print_data_summary(df):
"""
Print a summary of the request data
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
print("\nData Summary:")
print("-" * 20)
print(f"Total Requests: {len(df)}")
print(f"Unique Request Types: {df['Request Type'].unique()}")
print("\nDuration Statistics:")
print(df['Duration (ms)'].describe())
if __name__ == '__main__':
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 502 KiB

View file

@ -0,0 +1,181 @@
import pandas as pd
import matplotlib.pyplot as plt
def read_csv_with_fallback(file_path):
"""
Attempt to read CSV file with multiple parsing strategies.
Args:
file_path (str): Path to the CSV file
Returns:
pandas.DataFrame: Parsed DataFrame
"""
try:
# First, try reading with header
try:
df = pd.read_csv(file_path,
dtype={'timestamp': str}, # Ensure timestamp is read as string
skipinitialspace=True,
skip_blank_lines=True)
except Exception:
# If that fails, try reading without header and specify column names
df = pd.read_csv(file_path,
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
'db_rows_read', 'db_rows_written', 'db_total_rows',
'cache_hits', 'cache_misses'],
header=None,
dtype={'timestamp': str},
skipinitialspace=True,
skip_blank_lines=True)
# Remove any rows where timestamp is 'timestamp'
df = df[df['timestamp'] != 'timestamp']
# Convert timestamp to numeric
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
# Validate required columns
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
for col in required_columns:
if col not in df.columns:
raise ValueError(f"Missing required column: {col}")
return df
except Exception as e:
print(f"Error reading CSV: {e}")
print("Please check the file format and ensure it matches the expected structure.")
raise
def convert_timestamps(df):
"""
Convert timestamps to datetime.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
pandas.DataFrame: DataFrame with converted timestamps
"""
# Convert millisecond timestamps to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
return df
def analyze_latency_data(df):
"""
Calculate latency statistics.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
dict: Latency statistics
"""
# Calculate statistics
stats = {
'overall': {
'avg': df['service_time'].mean(),
'p50': df['service_time'].quantile(0.5),
'p95': df['service_time'].quantile(0.95),
'p99': df['service_time'].quantile(0.99),
'max': df['service_time'].max()
},
'db': {
'avg': df['db_time'].mean(),
'p50': df['db_time'].quantile(0.5),
'p95': df['db_time'].quantile(0.95),
'p99': df['db_time'].quantile(0.99),
'max': df['db_time'].max()
},
'cache': {
'avg': df['cache_time'].mean(),
'p50': df['cache_time'].quantile(0.5),
'p95': df['cache_time'].quantile(0.95),
'p99': df['cache_time'].quantile(0.99),
'max': df['cache_time'].max()
}
}
return stats
def plot_latency_graph(df):
"""
Create a multi-axis time series plot for latencies.
Args:
df (pandas.DataFrame): DataFrame with timestamp and time columns
"""
plt.figure(figsize=(15, 7))
# Plot overall service time
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
# Create a twin axis for DB time
ax1 = plt.gca()
ax2 = ax1.twinx()
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
# Create a third axis for cache time
ax3 = ax1.twinx()
# Offset the third axis slightly to the right
ax3.spines['right'].set_position(('axes', 1.2))
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
# Set labels and title
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
ax2.set_ylabel('DB Time (ms)', color='red')
ax3.set_ylabel('Cache Time (ms)', color='green')
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
plt.title('Latency Breakdown Over Time')
# Combine legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
lines3, labels3 = ax3.get_legend_handles_labels()
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
plt.tight_layout()
plt.show()
def main(file_path='server_metrics.csv'):
"""
Main function to process and visualize server metrics.
Args:
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
"""
try:
# Read CSV file
df = read_csv_with_fallback(file_path)
# Convert timestamps
df = convert_timestamps(df)
# Sort by timestamp to ensure chronological order
df = df.sort_values('timestamp')
# Analyze latency data
stats = analyze_latency_data(df)
# Print statistics
print("Latency Statistics:")
for category, metrics in stats.items():
print(f"\n{category.capitalize()} Latency:")
for metric, value in metrics.items():
print(f"{metric}: {value:.2f} ms")
# Plot the graph
plot_latency_graph(df)
except FileNotFoundError:
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,22 @@
Latency Statistics:
Overall Latency:
avg: 235.42 ms
p50: 236.00 ms
p95: 319.00 ms
p99: 352.94 ms
max: 445.00 ms
Db Latency:
avg: 142.14 ms
p50: 112.00 ms
p95: 215.00 ms
p99: 249.82 ms
max: 309.00 ms
Cache Latency:
avg: 4.52 ms
p50: 0.00 ms
p95: 24.00 ms
p99: 45.00 ms
max: 84.00 ms

Binary file not shown.

After

Width:  |  Height:  |  Size: 87 KiB

View file

@ -0,0 +1,102 @@
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Read the CSV file
def read_csv_data(file_path):
"""
Read CSV file with request data
Parameters:
-----------
file_path : str
Path to the CSV file
Returns:
--------
pandas.DataFrame
DataFrame with parsed request data
"""
# Read the CSV file
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
# Ensure data types are correct
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
df['Status Code'] = df['Status Code'].astype(int)
return df
# Visualize the data
def visualize_request_data(df):
"""
Create a dual-axis plot of request durations and status codes
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
# Create the figure and the first axis
fig, ax1 = plt.subplots(figsize=(12, 6))
# Plot Duration on the left axis
color1 = 'blue'
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Duration (ms)', color=color1)
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
ax1.tick_params(axis='y', labelcolor=color1)
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
# Title and legend
plt.title('Request Data over Time')
# Create a legend
lines1, labels1 = ax1.get_legend_handles_labels()
# Add grid
ax1.grid(True, linestyle='--', alpha=0.7)
# Tight layout to prevent cutting off labels
plt.tight_layout()
# Show the plot
plt.show()
# Main execution
def main():
# Path to your CSV file
file_path = 'client_metrics.csv'
try:
# Read the data
df = read_csv_data(file_path)
# Visualize the data
visualize_request_data(df)
except Exception as e:
print(f"An error occurred: {e}")
# Demonstrate data summary
def print_data_summary(df):
"""
Print a summary of the request data
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
print("\nData Summary:")
print("-" * 20)
print(f"Total Requests: {len(df)}")
print(f"Unique Request Types: {df['Request Type'].unique()}")
print("\nDuration Statistics:")
print(df['Duration (ms)'].describe())
if __name__ == '__main__':
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 460 KiB

View file

@ -0,0 +1,181 @@
import pandas as pd
import matplotlib.pyplot as plt
def read_csv_with_fallback(file_path):
"""
Attempt to read CSV file with multiple parsing strategies.
Args:
file_path (str): Path to the CSV file
Returns:
pandas.DataFrame: Parsed DataFrame
"""
try:
# First, try reading with header
try:
df = pd.read_csv(file_path,
dtype={'timestamp': str}, # Ensure timestamp is read as string
skipinitialspace=True,
skip_blank_lines=True)
except Exception:
# If that fails, try reading without header and specify column names
df = pd.read_csv(file_path,
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
'db_rows_read', 'db_rows_written', 'db_total_rows',
'cache_hits', 'cache_misses'],
header=None,
dtype={'timestamp': str},
skipinitialspace=True,
skip_blank_lines=True)
# Remove any rows where timestamp is 'timestamp'
df = df[df['timestamp'] != 'timestamp']
# Convert timestamp to numeric
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
# Validate required columns
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
for col in required_columns:
if col not in df.columns:
raise ValueError(f"Missing required column: {col}")
return df
except Exception as e:
print(f"Error reading CSV: {e}")
print("Please check the file format and ensure it matches the expected structure.")
raise
def convert_timestamps(df):
"""
Convert timestamps to datetime.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
pandas.DataFrame: DataFrame with converted timestamps
"""
# Convert millisecond timestamps to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
return df
def analyze_latency_data(df):
"""
Calculate latency statistics.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
dict: Latency statistics
"""
# Calculate statistics
stats = {
'overall': {
'avg': df['service_time'].mean(),
'p50': df['service_time'].quantile(0.5),
'p95': df['service_time'].quantile(0.95),
'p99': df['service_time'].quantile(0.99),
'max': df['service_time'].max()
},
'db': {
'avg': df['db_time'].mean(),
'p50': df['db_time'].quantile(0.5),
'p95': df['db_time'].quantile(0.95),
'p99': df['db_time'].quantile(0.99),
'max': df['db_time'].max()
},
'cache': {
'avg': df['cache_time'].mean(),
'p50': df['cache_time'].quantile(0.5),
'p95': df['cache_time'].quantile(0.95),
'p99': df['cache_time'].quantile(0.99),
'max': df['cache_time'].max()
}
}
return stats
def plot_latency_graph(df):
"""
Create a multi-axis time series plot for latencies.
Args:
df (pandas.DataFrame): DataFrame with timestamp and time columns
"""
plt.figure(figsize=(15, 7))
# Plot overall service time
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
# Create a twin axis for DB time
ax1 = plt.gca()
ax2 = ax1.twinx()
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
# Create a third axis for cache time
ax3 = ax1.twinx()
# Offset the third axis slightly to the right
ax3.spines['right'].set_position(('axes', 1.2))
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
# Set labels and title
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
ax2.set_ylabel('DB Time (ms)', color='red')
ax3.set_ylabel('Cache Time (ms)', color='green')
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
plt.title('Latency Breakdown Over Time')
# Combine legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
lines3, labels3 = ax3.get_legend_handles_labels()
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
plt.tight_layout()
plt.show()
def main(file_path='server_metrics.csv'):
"""
Main function to process and visualize server metrics.
Args:
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
"""
try:
# Read CSV file
df = read_csv_with_fallback(file_path)
# Convert timestamps
df = convert_timestamps(df)
# Sort by timestamp to ensure chronological order
df = df.sort_values('timestamp')
# Analyze latency data
stats = analyze_latency_data(df)
# Print statistics
print("Latency Statistics:")
for category, metrics in stats.items():
print(f"\n{category.capitalize()} Latency:")
for metric, value in metrics.items():
print(f"{metric}: {value:.2f} ms")
# Plot the graph
plot_latency_graph(df)
except FileNotFoundError:
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,22 @@
Latency Statistics:
Overall Latency:
avg: 387.27 ms
p50: 481.00 ms
p95: 758.00 ms
p99: 915.60 ms
max: 1259.00 ms
Db Latency:
avg: 216.24 ms
p50: 241.00 ms
p95: 499.00 ms
p99: 675.00 ms
max: 1023.00 ms
Cache Latency:
avg: 13.85 ms
p50: 1.00 ms
p95: 77.00 ms
p99: 132.80 ms
max: 249.00 ms

Binary file not shown.

After

Width:  |  Height:  |  Size: 97 KiB

View file

@ -0,0 +1,102 @@
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Read the CSV file
def read_csv_data(file_path):
"""
Read CSV file with request data
Parameters:
-----------
file_path : str
Path to the CSV file
Returns:
--------
pandas.DataFrame
DataFrame with parsed request data
"""
# Read the CSV file
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
# Ensure data types are correct
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
df['Status Code'] = df['Status Code'].astype(int)
return df
# Visualize the data
def visualize_request_data(df):
"""
Create a dual-axis plot of request durations and status codes
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
# Create the figure and the first axis
fig, ax1 = plt.subplots(figsize=(12, 6))
# Plot Duration on the left axis
color1 = 'blue'
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Duration (ms)', color=color1)
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
ax1.tick_params(axis='y', labelcolor=color1)
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
# Title and legend
plt.title('Request Data over Time')
# Create a legend
lines1, labels1 = ax1.get_legend_handles_labels()
# Add grid
ax1.grid(True, linestyle='--', alpha=0.7)
# Tight layout to prevent cutting off labels
plt.tight_layout()
# Show the plot
plt.show()
# Main execution
def main():
# Path to your CSV file
file_path = 'client_metrics.csv'
try:
# Read the data
df = read_csv_data(file_path)
# Visualize the data
visualize_request_data(df)
except Exception as e:
print(f"An error occurred: {e}")
# Demonstrate data summary
def print_data_summary(df):
"""
Print a summary of the request data
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
print("\nData Summary:")
print("-" * 20)
print(f"Total Requests: {len(df)}")
print(f"Unique Request Types: {df['Request Type'].unique()}")
print("\nDuration Statistics:")
print(df['Duration (ms)'].describe())
if __name__ == '__main__':
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 250 KiB

View file

@ -0,0 +1,181 @@
import pandas as pd
import matplotlib.pyplot as plt
def read_csv_with_fallback(file_path):
"""
Attempt to read CSV file with multiple parsing strategies.
Args:
file_path (str): Path to the CSV file
Returns:
pandas.DataFrame: Parsed DataFrame
"""
try:
# First, try reading with header
try:
df = pd.read_csv(file_path,
dtype={'timestamp': str}, # Ensure timestamp is read as string
skipinitialspace=True,
skip_blank_lines=True)
except Exception:
# If that fails, try reading without header and specify column names
df = pd.read_csv(file_path,
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
'db_rows_read', 'db_rows_written', 'db_total_rows',
'cache_hits', 'cache_misses'],
header=None,
dtype={'timestamp': str},
skipinitialspace=True,
skip_blank_lines=True)
# Remove any rows where timestamp is 'timestamp'
df = df[df['timestamp'] != 'timestamp']
# Convert timestamp to numeric
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
# Validate required columns
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
for col in required_columns:
if col not in df.columns:
raise ValueError(f"Missing required column: {col}")
return df
except Exception as e:
print(f"Error reading CSV: {e}")
print("Please check the file format and ensure it matches the expected structure.")
raise
def convert_timestamps(df):
"""
Convert timestamps to datetime.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
pandas.DataFrame: DataFrame with converted timestamps
"""
# Convert millisecond timestamps to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
return df
def analyze_latency_data(df):
"""
Calculate latency statistics.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
dict: Latency statistics
"""
# Calculate statistics
stats = {
'overall': {
'avg': df['service_time'].mean(),
'p50': df['service_time'].quantile(0.5),
'p95': df['service_time'].quantile(0.95),
'p99': df['service_time'].quantile(0.99),
'max': df['service_time'].max()
},
'db': {
'avg': df['db_time'].mean(),
'p50': df['db_time'].quantile(0.5),
'p95': df['db_time'].quantile(0.95),
'p99': df['db_time'].quantile(0.99),
'max': df['db_time'].max()
},
'cache': {
'avg': df['cache_time'].mean(),
'p50': df['cache_time'].quantile(0.5),
'p95': df['cache_time'].quantile(0.95),
'p99': df['cache_time'].quantile(0.99),
'max': df['cache_time'].max()
}
}
return stats
def plot_latency_graph(df):
"""
Create a multi-axis time series plot for latencies.
Args:
df (pandas.DataFrame): DataFrame with timestamp and time columns
"""
plt.figure(figsize=(15, 7))
# Plot overall service time
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
# Create a twin axis for DB time
ax1 = plt.gca()
ax2 = ax1.twinx()
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
# Create a third axis for cache time
ax3 = ax1.twinx()
# Offset the third axis slightly to the right
ax3.spines['right'].set_position(('axes', 1.2))
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
# Set labels and title
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
ax2.set_ylabel('DB Time (ms)', color='red')
ax3.set_ylabel('Cache Time (ms)', color='green')
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
plt.title('Latency Breakdown Over Time')
# Combine legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
lines3, labels3 = ax3.get_legend_handles_labels()
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
plt.tight_layout()
plt.show()
def main(file_path='server_metrics.csv'):
"""
Main function to process and visualize server metrics.
Args:
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
"""
try:
# Read CSV file
df = read_csv_with_fallback(file_path)
# Convert timestamps
df = convert_timestamps(df)
# Sort by timestamp to ensure chronological order
df = df.sort_values('timestamp')
# Analyze latency data
stats = analyze_latency_data(df)
# Print statistics
print("Latency Statistics:")
for category, metrics in stats.items():
print(f"\n{category.capitalize()} Latency:")
for metric, value in metrics.items():
print(f"{metric}: {value:.2f} ms")
# Plot the graph
plot_latency_graph(df)
except FileNotFoundError:
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,22 @@
Latency Statistics:
Overall Latency:
avg: 178.84 ms
p50: 108.00 ms
p95: 531.00 ms
p99: 605.00 ms
max: 800.00 ms
Db Latency:
avg: 116.50 ms
p50: 4.00 ms
p95: 426.00 ms
p99: 500.89 ms
max: 696.00 ms
Cache Latency:
avg: 9.48 ms
p50: 1.00 ms
p95: 49.00 ms
p99: 56.00 ms
max: 272.00 ms

Binary file not shown.

After

Width:  |  Height:  |  Size: 342 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

View file

@ -0,0 +1,102 @@
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Read the CSV file
def read_csv_data(file_path):
"""
Read CSV file with request data
Parameters:
-----------
file_path : str
Path to the CSV file
Returns:
--------
pandas.DataFrame
DataFrame with parsed request data
"""
# Read the CSV file
df = pd.read_csv(file_path, parse_dates=['Timestamp'])
# Ensure data types are correct
df['Duration (ms)'] = df['Duration (ms)'].astype(int)
df['Status Code'] = df['Status Code'].astype(int)
return df
# Visualize the data
def visualize_request_data(df):
"""
Create a dual-axis plot of request durations and status codes
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
# Create the figure and the first axis
fig, ax1 = plt.subplots(figsize=(12, 6))
# Plot Duration on the left axis
color1 = 'blue'
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Duration (ms)', color=color1)
ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)')
ax1.tick_params(axis='y', labelcolor=color1)
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
# Title and legend
plt.title('Request Data over Time')
# Create a legend
lines1, labels1 = ax1.get_legend_handles_labels()
# Add grid
ax1.grid(True, linestyle='--', alpha=0.7)
# Tight layout to prevent cutting off labels
plt.tight_layout()
# Show the plot
plt.show()
# Main execution
def main():
# Path to your CSV file
file_path = 'client_metrics.csv'
try:
# Read the data
df = read_csv_data(file_path)
# Visualize the data
visualize_request_data(df)
except Exception as e:
print(f"An error occurred: {e}")
# Demonstrate data summary
def print_data_summary(df):
"""
Print a summary of the request data
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing request data
"""
print("\nData Summary:")
print("-" * 20)
print(f"Total Requests: {len(df)}")
print(f"Unique Request Types: {df['Request Type'].unique()}")
print("\nDuration Statistics:")
print(df['Duration (ms)'].describe())
if __name__ == '__main__':
main()

View file

@ -0,0 +1,181 @@
import pandas as pd
import matplotlib.pyplot as plt
def read_csv_with_fallback(file_path):
"""
Attempt to read CSV file with multiple parsing strategies.
Args:
file_path (str): Path to the CSV file
Returns:
pandas.DataFrame: Parsed DataFrame
"""
try:
# First, try reading with header
try:
df = pd.read_csv(file_path,
dtype={'timestamp': str}, # Ensure timestamp is read as string
skipinitialspace=True,
skip_blank_lines=True)
except Exception:
# If that fails, try reading without header and specify column names
df = pd.read_csv(file_path,
names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time',
'db_rows_read', 'db_rows_written', 'db_total_rows',
'cache_hits', 'cache_misses'],
header=None,
dtype={'timestamp': str},
skipinitialspace=True,
skip_blank_lines=True)
# Remove any rows where timestamp is 'timestamp'
df = df[df['timestamp'] != 'timestamp']
# Convert timestamp to numeric
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
# Validate required columns
required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time']
for col in required_columns:
if col not in df.columns:
raise ValueError(f"Missing required column: {col}")
return df
except Exception as e:
print(f"Error reading CSV: {e}")
print("Please check the file format and ensure it matches the expected structure.")
raise
def convert_timestamps(df):
"""
Convert timestamps to datetime.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
pandas.DataFrame: DataFrame with converted timestamps
"""
# Convert millisecond timestamps to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
return df
def analyze_latency_data(df):
"""
Calculate latency statistics.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
dict: Latency statistics
"""
# Calculate statistics
stats = {
'overall': {
'avg': df['service_time'].mean(),
'p50': df['service_time'].quantile(0.5),
'p95': df['service_time'].quantile(0.95),
'p99': df['service_time'].quantile(0.99),
'max': df['service_time'].max()
},
'db': {
'avg': df['db_time'].mean(),
'p50': df['db_time'].quantile(0.5),
'p95': df['db_time'].quantile(0.95),
'p99': df['db_time'].quantile(0.99),
'max': df['db_time'].max()
},
'cache': {
'avg': df['cache_time'].mean(),
'p50': df['cache_time'].quantile(0.5),
'p95': df['cache_time'].quantile(0.95),
'p99': df['cache_time'].quantile(0.99),
'max': df['cache_time'].max()
}
}
return stats
def plot_latency_graph(df):
"""
Create a multi-axis time series plot for latencies.
Args:
df (pandas.DataFrame): DataFrame with timestamp and time columns
"""
plt.figure(figsize=(15, 7))
# Plot overall service time
plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue')
# Create a twin axis for DB time
ax1 = plt.gca()
ax2 = ax1.twinx()
ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--')
# Create a third axis for cache time
ax3 = ax1.twinx()
# Offset the third axis slightly to the right
ax3.spines['right'].set_position(('axes', 1.2))
ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':')
# Set labels and title
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Overall Service Time (ms)', color='blue')
ax2.set_ylabel('DB Time (ms)', color='red')
ax3.set_ylabel('Cache Time (ms)', color='green')
# Format x-axis to show timestamps nicely
plt.gcf().autofmt_xdate()
plt.title('Latency Breakdown Over Time')
# Combine legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
lines3, labels3 = ax3.get_legend_handles_labels()
ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best')
plt.tight_layout()
plt.show()
def main(file_path='server_metrics.csv'):
"""
Main function to process and visualize server metrics.
Args:
file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'.
"""
try:
# Read CSV file
df = read_csv_with_fallback(file_path)
# Convert timestamps
df = convert_timestamps(df)
# Sort by timestamp to ensure chronological order
df = df.sort_values('timestamp')
# Analyze latency data
stats = analyze_latency_data(df)
# Print statistics
print("Latency Statistics:")
for category, metrics in stats.items():
print(f"\n{category.capitalize()} Latency:")
for metric, value in metrics.items():
print(f"{metric}: {value:.2f} ms")
# Plot the graph
plot_latency_graph(df)
except FileNotFoundError:
print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,22 @@
Latency Statistics:
Overall Latency:
avg: 506.30 ms
p50: 245.00 ms
p95: 1231.00 ms
p99: 1479.34 ms
max: 2964.00 ms
Db Latency:
avg: 322.60 ms
p50: 7.00 ms
p95: 982.35 ms
p99: 1239.68 ms
max: 2711.00 ms
Cache Latency:
avg: 10.99 ms
p50: 1.00 ms
p95: 65.00 ms
p99: 141.00 ms
max: 256.00 ms