import pandas as pd import matplotlib.pyplot as plt def read_csv_with_fallback(file_path): """ Attempt to read CSV file with multiple parsing strategies. Args: file_path (str): Path to the CSV file Returns: pandas.DataFrame: Parsed DataFrame """ try: # First, try reading with header try: df = pd.read_csv(file_path, dtype={'timestamp': str}, # Ensure timestamp is read as string skipinitialspace=True, skip_blank_lines=True) except Exception: # If that fails, try reading without header and specify column names df = pd.read_csv(file_path, names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time', 'db_rows_read', 'db_rows_written', 'db_total_rows', 'cache_hits', 'cache_misses'], header=None, dtype={'timestamp': str}, skipinitialspace=True, skip_blank_lines=True) # Remove any rows where timestamp is 'timestamp' df = df[df['timestamp'] != 'timestamp'] # Convert timestamp to numeric df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce') # Validate required columns required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time'] for col in required_columns: if col not in df.columns: raise ValueError(f"Missing required column: {col}") return df except Exception as e: print(f"Error reading CSV: {e}") print("Please check the file format and ensure it matches the expected structure.") raise def convert_timestamps(df): """ Convert timestamps to datetime. Args: df (pandas.DataFrame): Input DataFrame Returns: pandas.DataFrame: DataFrame with converted timestamps """ # Convert millisecond timestamps to datetime df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms') return df def analyze_latency_data(df): """ Calculate latency statistics. Args: df (pandas.DataFrame): Input DataFrame Returns: dict: Latency statistics """ # Calculate statistics stats = { 'overall': { 'avg': df['service_time'].mean(), 'p50': df['service_time'].quantile(0.5), 'p95': df['service_time'].quantile(0.95), 'p99': df['service_time'].quantile(0.99), 'max': df['service_time'].max() }, 'db': { 'avg': df['db_time'].mean(), 'p50': df['db_time'].quantile(0.5), 'p95': df['db_time'].quantile(0.95), 'p99': df['db_time'].quantile(0.99), 'max': df['db_time'].max() }, 'cache': { 'avg': df['cache_time'].mean(), 'p50': df['cache_time'].quantile(0.5), 'p95': df['cache_time'].quantile(0.95), 'p99': df['cache_time'].quantile(0.99), 'max': df['cache_time'].max() } } return stats def plot_latency_graph(df): """ Create a multi-axis time series plot for latencies. Args: df (pandas.DataFrame): DataFrame with timestamp and time columns """ plt.figure(figsize=(15, 7)) # Plot overall service time plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue') # Create a twin axis for DB time ax1 = plt.gca() ax2 = ax1.twinx() ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--') # Create a third axis for cache time ax3 = ax1.twinx() # Offset the third axis slightly to the right ax3.spines['right'].set_position(('axes', 1.2)) ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':') # Set labels and title ax1.set_xlabel('Timestamp') ax1.set_ylabel('Overall Service Time (ms)', color='blue') ax2.set_ylabel('DB Time (ms)', color='red') ax3.set_ylabel('Cache Time (ms)', color='green') # Format x-axis to show timestamps nicely plt.gcf().autofmt_xdate() plt.title('Latency Breakdown Over Time') # Combine legends lines1, labels1 = ax1.get_legend_handles_labels() lines2, labels2 = ax2.get_legend_handles_labels() lines3, labels3 = ax3.get_legend_handles_labels() ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best') plt.tight_layout() plt.show() def main(file_path='server_metrics.csv'): """ Main function to process and visualize server metrics. Args: file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'. """ try: # Read CSV file df = read_csv_with_fallback(file_path) # Convert timestamps df = convert_timestamps(df) # Sort by timestamp to ensure chronological order df = df.sort_values('timestamp') # Analyze latency data stats = analyze_latency_data(df) # Print statistics print("Latency Statistics:") for category, metrics in stats.items(): print(f"\n{category.capitalize()} Latency:") for metric, value in metrics.items(): print(f"{metric}: {value:.2f} ms") # Plot the graph plot_latency_graph(df) except FileNotFoundError: print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.") except Exception as e: print(f"An error occurred: {e}") if __name__ == "__main__": main()