diff --git a/results/0 - Single Region - Chicago/Client - Chicago/client_latency.png b/results/0 - Single Region - Chicago/Client - Chicago/client_latency.png new file mode 100644 index 0000000..099c02b Binary files /dev/null and b/results/0 - Single Region - Chicago/Client - Chicago/client_latency.png differ diff --git a/results/0 - Single Region - Chicago/Client - Chicago/client_processing.py b/results/0 - Single Region - Chicago/Client - Chicago/client_processing.py new file mode 100644 index 0000000..a652ccd --- /dev/null +++ b/results/0 - Single Region - Chicago/Client - Chicago/client_processing.py @@ -0,0 +1,102 @@ +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.dates as mdates + +# Read the CSV file +def read_csv_data(file_path): + """ + Read CSV file with request data + + Parameters: + ----------- + file_path : str + Path to the CSV file + + Returns: + -------- + pandas.DataFrame + DataFrame with parsed request data + """ + # Read the CSV file + df = pd.read_csv(file_path, parse_dates=['Timestamp']) + + # Ensure data types are correct + df['Duration (ms)'] = df['Duration (ms)'].astype(int) + df['Status Code'] = df['Status Code'].astype(int) + + return df + +# Visualize the data +def visualize_request_data(df): + """ + Create a dual-axis plot of request durations and status codes + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + # Create the figure and the first axis + fig, ax1 = plt.subplots(figsize=(12, 6)) + + # Plot Duration on the left axis + color1 = 'blue' + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Duration (ms)', color=color1) + ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)') + ax1.tick_params(axis='y', labelcolor=color1) + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S')) + + # Title and legend + plt.title('Request Data over Time') + + # Create a legend + lines1, labels1 = ax1.get_legend_handles_labels() + + # Add grid + ax1.grid(True, linestyle='--', alpha=0.7) + + # Tight layout to prevent cutting off labels + plt.tight_layout() + + # Show the plot + plt.show() + +# Main execution +def main(): + # Path to your CSV file + file_path = 'client_metrics.csv' + + try: + # Read the data + df = read_csv_data(file_path) + + # Visualize the data + visualize_request_data(df) + + except Exception as e: + print(f"An error occurred: {e}") + +# Demonstrate data summary +def print_data_summary(df): + """ + Print a summary of the request data + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + print("\nData Summary:") + print("-" * 20) + print(f"Total Requests: {len(df)}") + print(f"Unique Request Types: {df['Request Type'].unique()}") + + print("\nDuration Statistics:") + print(df['Duration (ms)'].describe()) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/results/0 - Single Region - Chicago/Client - Chicago/server_latency.png b/results/0 - Single Region - Chicago/Client - Chicago/server_latency.png new file mode 100644 index 0000000..561d587 Binary files /dev/null and b/results/0 - Single Region - Chicago/Client - Chicago/server_latency.png differ diff --git a/results/0 - Single Region - Chicago/Client - Chicago/server_processing.py b/results/0 - Single Region - Chicago/Client - Chicago/server_processing.py new file mode 100644 index 0000000..b706311 --- /dev/null +++ b/results/0 - Single Region - Chicago/Client - Chicago/server_processing.py @@ -0,0 +1,181 @@ +import pandas as pd +import matplotlib.pyplot as plt + +def read_csv_with_fallback(file_path): + """ + Attempt to read CSV file with multiple parsing strategies. + + Args: + file_path (str): Path to the CSV file + + Returns: + pandas.DataFrame: Parsed DataFrame + """ + try: + # First, try reading with header + try: + df = pd.read_csv(file_path, + dtype={'timestamp': str}, # Ensure timestamp is read as string + skipinitialspace=True, + skip_blank_lines=True) + except Exception: + # If that fails, try reading without header and specify column names + df = pd.read_csv(file_path, + names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time', + 'db_rows_read', 'db_rows_written', 'db_total_rows', + 'cache_hits', 'cache_misses'], + header=None, + dtype={'timestamp': str}, + skipinitialspace=True, + skip_blank_lines=True) + + # Remove any rows where timestamp is 'timestamp' + df = df[df['timestamp'] != 'timestamp'] + + # Convert timestamp to numeric + df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce') + + # Validate required columns + required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time'] + for col in required_columns: + if col not in df.columns: + raise ValueError(f"Missing required column: {col}") + + return df + + except Exception as e: + print(f"Error reading CSV: {e}") + print("Please check the file format and ensure it matches the expected structure.") + raise + +def convert_timestamps(df): + """ + Convert timestamps to datetime. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + pandas.DataFrame: DataFrame with converted timestamps + """ + # Convert millisecond timestamps to datetime + df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms') + + return df + +def analyze_latency_data(df): + """ + Calculate latency statistics. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + dict: Latency statistics + """ + # Calculate statistics + stats = { + 'overall': { + 'avg': df['service_time'].mean(), + 'p50': df['service_time'].quantile(0.5), + 'p95': df['service_time'].quantile(0.95), + 'p99': df['service_time'].quantile(0.99), + 'max': df['service_time'].max() + }, + 'db': { + 'avg': df['db_time'].mean(), + 'p50': df['db_time'].quantile(0.5), + 'p95': df['db_time'].quantile(0.95), + 'p99': df['db_time'].quantile(0.99), + 'max': df['db_time'].max() + }, + 'cache': { + 'avg': df['cache_time'].mean(), + 'p50': df['cache_time'].quantile(0.5), + 'p95': df['cache_time'].quantile(0.95), + 'p99': df['cache_time'].quantile(0.99), + 'max': df['cache_time'].max() + } + } + + return stats + +def plot_latency_graph(df): + """ + Create a multi-axis time series plot for latencies. + + Args: + df (pandas.DataFrame): DataFrame with timestamp and time columns + """ + plt.figure(figsize=(15, 7)) + + # Plot overall service time + plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue') + + # Create a twin axis for DB time + ax1 = plt.gca() + ax2 = ax1.twinx() + ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--') + + # Create a third axis for cache time + ax3 = ax1.twinx() + # Offset the third axis slightly to the right + ax3.spines['right'].set_position(('axes', 1.2)) + ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':') + + # Set labels and title + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Overall Service Time (ms)', color='blue') + ax2.set_ylabel('DB Time (ms)', color='red') + ax3.set_ylabel('Cache Time (ms)', color='green') + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + plt.title('Latency Breakdown Over Time') + + # Combine legends + lines1, labels1 = ax1.get_legend_handles_labels() + lines2, labels2 = ax2.get_legend_handles_labels() + lines3, labels3 = ax3.get_legend_handles_labels() + ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best') + + plt.tight_layout() + plt.show() + +def main(file_path='server_metrics.csv'): + """ + Main function to process and visualize server metrics. + + Args: + file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'. + """ + try: + # Read CSV file + df = read_csv_with_fallback(file_path) + + # Convert timestamps + df = convert_timestamps(df) + + # Sort by timestamp to ensure chronological order + df = df.sort_values('timestamp') + + # Analyze latency data + stats = analyze_latency_data(df) + + # Print statistics + print("Latency Statistics:") + for category, metrics in stats.items(): + print(f"\n{category.capitalize()} Latency:") + for metric, value in metrics.items(): + print(f"{metric}: {value:.2f} ms") + + # Plot the graph + plot_latency_graph(df) + + except FileNotFoundError: + print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.") + except Exception as e: + print(f"An error occurred: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/results/0 - Single Region - Chicago/Client - Chicago/server_summary.txt b/results/0 - Single Region - Chicago/Client - Chicago/server_summary.txt new file mode 100644 index 0000000..b16e219 --- /dev/null +++ b/results/0 - Single Region - Chicago/Client - Chicago/server_summary.txt @@ -0,0 +1,20 @@ +Overall Latency: +avg: 19.67 ms +p50: 15.00 ms +p95: 72.60 ms +p99: 100.00 ms +max: 274.00 ms + +Db Latency: +avg: 7.15 ms +p50: 0.00 ms +p95: 39.00 ms +p99: 78.00 ms +max: 269.00 ms + +Cache Latency: +avg: 9.21 ms +p50: 2.00 ms +p95: 33.00 ms +p99: 42.92 ms +max: 197.00 ms diff --git a/results/0 - Single Region - Chicago/Client - Frankfurt/client_latency.png b/results/0 - Single Region - Chicago/Client - Frankfurt/client_latency.png new file mode 100644 index 0000000..4f9df59 Binary files /dev/null and b/results/0 - Single Region - Chicago/Client - Frankfurt/client_latency.png differ diff --git a/results/0 - Single Region - Chicago/Client - Frankfurt/client_processing.py b/results/0 - Single Region - Chicago/Client - Frankfurt/client_processing.py new file mode 100644 index 0000000..a652ccd --- /dev/null +++ b/results/0 - Single Region - Chicago/Client - Frankfurt/client_processing.py @@ -0,0 +1,102 @@ +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.dates as mdates + +# Read the CSV file +def read_csv_data(file_path): + """ + Read CSV file with request data + + Parameters: + ----------- + file_path : str + Path to the CSV file + + Returns: + -------- + pandas.DataFrame + DataFrame with parsed request data + """ + # Read the CSV file + df = pd.read_csv(file_path, parse_dates=['Timestamp']) + + # Ensure data types are correct + df['Duration (ms)'] = df['Duration (ms)'].astype(int) + df['Status Code'] = df['Status Code'].astype(int) + + return df + +# Visualize the data +def visualize_request_data(df): + """ + Create a dual-axis plot of request durations and status codes + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + # Create the figure and the first axis + fig, ax1 = plt.subplots(figsize=(12, 6)) + + # Plot Duration on the left axis + color1 = 'blue' + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Duration (ms)', color=color1) + ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)') + ax1.tick_params(axis='y', labelcolor=color1) + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S')) + + # Title and legend + plt.title('Request Data over Time') + + # Create a legend + lines1, labels1 = ax1.get_legend_handles_labels() + + # Add grid + ax1.grid(True, linestyle='--', alpha=0.7) + + # Tight layout to prevent cutting off labels + plt.tight_layout() + + # Show the plot + plt.show() + +# Main execution +def main(): + # Path to your CSV file + file_path = 'client_metrics.csv' + + try: + # Read the data + df = read_csv_data(file_path) + + # Visualize the data + visualize_request_data(df) + + except Exception as e: + print(f"An error occurred: {e}") + +# Demonstrate data summary +def print_data_summary(df): + """ + Print a summary of the request data + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + print("\nData Summary:") + print("-" * 20) + print(f"Total Requests: {len(df)}") + print(f"Unique Request Types: {df['Request Type'].unique()}") + + print("\nDuration Statistics:") + print(df['Duration (ms)'].describe()) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/results/0 - Single Region - Chicago/Client - Frankfurt/server_latency.png b/results/0 - Single Region - Chicago/Client - Frankfurt/server_latency.png new file mode 100644 index 0000000..3e92ad2 Binary files /dev/null and b/results/0 - Single Region - Chicago/Client - Frankfurt/server_latency.png differ diff --git a/results/0 - Single Region - Chicago/Client - Frankfurt/server_processing.py b/results/0 - Single Region - Chicago/Client - Frankfurt/server_processing.py new file mode 100644 index 0000000..b706311 --- /dev/null +++ b/results/0 - Single Region - Chicago/Client - Frankfurt/server_processing.py @@ -0,0 +1,181 @@ +import pandas as pd +import matplotlib.pyplot as plt + +def read_csv_with_fallback(file_path): + """ + Attempt to read CSV file with multiple parsing strategies. + + Args: + file_path (str): Path to the CSV file + + Returns: + pandas.DataFrame: Parsed DataFrame + """ + try: + # First, try reading with header + try: + df = pd.read_csv(file_path, + dtype={'timestamp': str}, # Ensure timestamp is read as string + skipinitialspace=True, + skip_blank_lines=True) + except Exception: + # If that fails, try reading without header and specify column names + df = pd.read_csv(file_path, + names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time', + 'db_rows_read', 'db_rows_written', 'db_total_rows', + 'cache_hits', 'cache_misses'], + header=None, + dtype={'timestamp': str}, + skipinitialspace=True, + skip_blank_lines=True) + + # Remove any rows where timestamp is 'timestamp' + df = df[df['timestamp'] != 'timestamp'] + + # Convert timestamp to numeric + df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce') + + # Validate required columns + required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time'] + for col in required_columns: + if col not in df.columns: + raise ValueError(f"Missing required column: {col}") + + return df + + except Exception as e: + print(f"Error reading CSV: {e}") + print("Please check the file format and ensure it matches the expected structure.") + raise + +def convert_timestamps(df): + """ + Convert timestamps to datetime. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + pandas.DataFrame: DataFrame with converted timestamps + """ + # Convert millisecond timestamps to datetime + df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms') + + return df + +def analyze_latency_data(df): + """ + Calculate latency statistics. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + dict: Latency statistics + """ + # Calculate statistics + stats = { + 'overall': { + 'avg': df['service_time'].mean(), + 'p50': df['service_time'].quantile(0.5), + 'p95': df['service_time'].quantile(0.95), + 'p99': df['service_time'].quantile(0.99), + 'max': df['service_time'].max() + }, + 'db': { + 'avg': df['db_time'].mean(), + 'p50': df['db_time'].quantile(0.5), + 'p95': df['db_time'].quantile(0.95), + 'p99': df['db_time'].quantile(0.99), + 'max': df['db_time'].max() + }, + 'cache': { + 'avg': df['cache_time'].mean(), + 'p50': df['cache_time'].quantile(0.5), + 'p95': df['cache_time'].quantile(0.95), + 'p99': df['cache_time'].quantile(0.99), + 'max': df['cache_time'].max() + } + } + + return stats + +def plot_latency_graph(df): + """ + Create a multi-axis time series plot for latencies. + + Args: + df (pandas.DataFrame): DataFrame with timestamp and time columns + """ + plt.figure(figsize=(15, 7)) + + # Plot overall service time + plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue') + + # Create a twin axis for DB time + ax1 = plt.gca() + ax2 = ax1.twinx() + ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--') + + # Create a third axis for cache time + ax3 = ax1.twinx() + # Offset the third axis slightly to the right + ax3.spines['right'].set_position(('axes', 1.2)) + ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':') + + # Set labels and title + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Overall Service Time (ms)', color='blue') + ax2.set_ylabel('DB Time (ms)', color='red') + ax3.set_ylabel('Cache Time (ms)', color='green') + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + plt.title('Latency Breakdown Over Time') + + # Combine legends + lines1, labels1 = ax1.get_legend_handles_labels() + lines2, labels2 = ax2.get_legend_handles_labels() + lines3, labels3 = ax3.get_legend_handles_labels() + ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best') + + plt.tight_layout() + plt.show() + +def main(file_path='server_metrics.csv'): + """ + Main function to process and visualize server metrics. + + Args: + file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'. + """ + try: + # Read CSV file + df = read_csv_with_fallback(file_path) + + # Convert timestamps + df = convert_timestamps(df) + + # Sort by timestamp to ensure chronological order + df = df.sort_values('timestamp') + + # Analyze latency data + stats = analyze_latency_data(df) + + # Print statistics + print("Latency Statistics:") + for category, metrics in stats.items(): + print(f"\n{category.capitalize()} Latency:") + for metric, value in metrics.items(): + print(f"{metric}: {value:.2f} ms") + + # Plot the graph + plot_latency_graph(df) + + except FileNotFoundError: + print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.") + except Exception as e: + print(f"An error occurred: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/results/0 - Single Region - Chicago/Client - Frankfurt/server_summary.txt b/results/0 - Single Region - Chicago/Client - Frankfurt/server_summary.txt new file mode 100644 index 0000000..b67099b --- /dev/null +++ b/results/0 - Single Region - Chicago/Client - Frankfurt/server_summary.txt @@ -0,0 +1,20 @@ +Overall Latency: +avg: 14.89 ms +p50: 9.00 ms +p95: 64.00 ms +p99: 100.00 ms +max: 295.00 ms + +Db Latency: +avg: 6.92 ms +p50: 0.00 ms +p95: 38.00 ms +p99: 78.00 ms +max: 283.00 ms + +Cache Latency: +avg: 5.32 ms +p50: 2.00 ms +p95: 27.00 ms +p99: 35.00 ms +max: 147.00 ms diff --git a/results/0 - Single Region - Chicago/Client - Singapore/client_latency.png b/results/0 - Single Region - Chicago/Client - Singapore/client_latency.png new file mode 100644 index 0000000..de29224 Binary files /dev/null and b/results/0 - Single Region - Chicago/Client - Singapore/client_latency.png differ diff --git a/results/0 - Single Region - Chicago/Client - Singapore/client_processing.py b/results/0 - Single Region - Chicago/Client - Singapore/client_processing.py new file mode 100644 index 0000000..a652ccd --- /dev/null +++ b/results/0 - Single Region - Chicago/Client - Singapore/client_processing.py @@ -0,0 +1,102 @@ +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.dates as mdates + +# Read the CSV file +def read_csv_data(file_path): + """ + Read CSV file with request data + + Parameters: + ----------- + file_path : str + Path to the CSV file + + Returns: + -------- + pandas.DataFrame + DataFrame with parsed request data + """ + # Read the CSV file + df = pd.read_csv(file_path, parse_dates=['Timestamp']) + + # Ensure data types are correct + df['Duration (ms)'] = df['Duration (ms)'].astype(int) + df['Status Code'] = df['Status Code'].astype(int) + + return df + +# Visualize the data +def visualize_request_data(df): + """ + Create a dual-axis plot of request durations and status codes + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + # Create the figure and the first axis + fig, ax1 = plt.subplots(figsize=(12, 6)) + + # Plot Duration on the left axis + color1 = 'blue' + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Duration (ms)', color=color1) + ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)') + ax1.tick_params(axis='y', labelcolor=color1) + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S')) + + # Title and legend + plt.title('Request Data over Time') + + # Create a legend + lines1, labels1 = ax1.get_legend_handles_labels() + + # Add grid + ax1.grid(True, linestyle='--', alpha=0.7) + + # Tight layout to prevent cutting off labels + plt.tight_layout() + + # Show the plot + plt.show() + +# Main execution +def main(): + # Path to your CSV file + file_path = 'client_metrics.csv' + + try: + # Read the data + df = read_csv_data(file_path) + + # Visualize the data + visualize_request_data(df) + + except Exception as e: + print(f"An error occurred: {e}") + +# Demonstrate data summary +def print_data_summary(df): + """ + Print a summary of the request data + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + print("\nData Summary:") + print("-" * 20) + print(f"Total Requests: {len(df)}") + print(f"Unique Request Types: {df['Request Type'].unique()}") + + print("\nDuration Statistics:") + print(df['Duration (ms)'].describe()) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/results/0 - Single Region - Chicago/Client - Singapore/server_latency.png b/results/0 - Single Region - Chicago/Client - Singapore/server_latency.png new file mode 100644 index 0000000..949644a Binary files /dev/null and b/results/0 - Single Region - Chicago/Client - Singapore/server_latency.png differ diff --git a/results/0 - Single Region - Chicago/Client - Singapore/server_processing.py b/results/0 - Single Region - Chicago/Client - Singapore/server_processing.py new file mode 100644 index 0000000..b706311 --- /dev/null +++ b/results/0 - Single Region - Chicago/Client - Singapore/server_processing.py @@ -0,0 +1,181 @@ +import pandas as pd +import matplotlib.pyplot as plt + +def read_csv_with_fallback(file_path): + """ + Attempt to read CSV file with multiple parsing strategies. + + Args: + file_path (str): Path to the CSV file + + Returns: + pandas.DataFrame: Parsed DataFrame + """ + try: + # First, try reading with header + try: + df = pd.read_csv(file_path, + dtype={'timestamp': str}, # Ensure timestamp is read as string + skipinitialspace=True, + skip_blank_lines=True) + except Exception: + # If that fails, try reading without header and specify column names + df = pd.read_csv(file_path, + names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time', + 'db_rows_read', 'db_rows_written', 'db_total_rows', + 'cache_hits', 'cache_misses'], + header=None, + dtype={'timestamp': str}, + skipinitialspace=True, + skip_blank_lines=True) + + # Remove any rows where timestamp is 'timestamp' + df = df[df['timestamp'] != 'timestamp'] + + # Convert timestamp to numeric + df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce') + + # Validate required columns + required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time'] + for col in required_columns: + if col not in df.columns: + raise ValueError(f"Missing required column: {col}") + + return df + + except Exception as e: + print(f"Error reading CSV: {e}") + print("Please check the file format and ensure it matches the expected structure.") + raise + +def convert_timestamps(df): + """ + Convert timestamps to datetime. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + pandas.DataFrame: DataFrame with converted timestamps + """ + # Convert millisecond timestamps to datetime + df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms') + + return df + +def analyze_latency_data(df): + """ + Calculate latency statistics. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + dict: Latency statistics + """ + # Calculate statistics + stats = { + 'overall': { + 'avg': df['service_time'].mean(), + 'p50': df['service_time'].quantile(0.5), + 'p95': df['service_time'].quantile(0.95), + 'p99': df['service_time'].quantile(0.99), + 'max': df['service_time'].max() + }, + 'db': { + 'avg': df['db_time'].mean(), + 'p50': df['db_time'].quantile(0.5), + 'p95': df['db_time'].quantile(0.95), + 'p99': df['db_time'].quantile(0.99), + 'max': df['db_time'].max() + }, + 'cache': { + 'avg': df['cache_time'].mean(), + 'p50': df['cache_time'].quantile(0.5), + 'p95': df['cache_time'].quantile(0.95), + 'p99': df['cache_time'].quantile(0.99), + 'max': df['cache_time'].max() + } + } + + return stats + +def plot_latency_graph(df): + """ + Create a multi-axis time series plot for latencies. + + Args: + df (pandas.DataFrame): DataFrame with timestamp and time columns + """ + plt.figure(figsize=(15, 7)) + + # Plot overall service time + plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue') + + # Create a twin axis for DB time + ax1 = plt.gca() + ax2 = ax1.twinx() + ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--') + + # Create a third axis for cache time + ax3 = ax1.twinx() + # Offset the third axis slightly to the right + ax3.spines['right'].set_position(('axes', 1.2)) + ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':') + + # Set labels and title + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Overall Service Time (ms)', color='blue') + ax2.set_ylabel('DB Time (ms)', color='red') + ax3.set_ylabel('Cache Time (ms)', color='green') + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + plt.title('Latency Breakdown Over Time') + + # Combine legends + lines1, labels1 = ax1.get_legend_handles_labels() + lines2, labels2 = ax2.get_legend_handles_labels() + lines3, labels3 = ax3.get_legend_handles_labels() + ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best') + + plt.tight_layout() + plt.show() + +def main(file_path='server_metrics.csv'): + """ + Main function to process and visualize server metrics. + + Args: + file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'. + """ + try: + # Read CSV file + df = read_csv_with_fallback(file_path) + + # Convert timestamps + df = convert_timestamps(df) + + # Sort by timestamp to ensure chronological order + df = df.sort_values('timestamp') + + # Analyze latency data + stats = analyze_latency_data(df) + + # Print statistics + print("Latency Statistics:") + for category, metrics in stats.items(): + print(f"\n{category.capitalize()} Latency:") + for metric, value in metrics.items(): + print(f"{metric}: {value:.2f} ms") + + # Plot the graph + plot_latency_graph(df) + + except FileNotFoundError: + print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.") + except Exception as e: + print(f"An error occurred: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/results/0 - Single Region - Chicago/Client - Singapore/server_summary.txt b/results/0 - Single Region - Chicago/Client - Singapore/server_summary.txt new file mode 100644 index 0000000..86f6df5 --- /dev/null +++ b/results/0 - Single Region - Chicago/Client - Singapore/server_summary.txt @@ -0,0 +1,22 @@ +Latency Statistics: + +Overall Latency: +avg: 15.03 ms +p50: 7.00 ms +p95: 68.00 ms +p99: 101.00 ms +max: 277.00 ms + +Db Latency: +avg: 7.35 ms +p50: 0.00 ms +p95: 38.00 ms +p99: 80.00 ms +max: 273.00 ms + +Cache Latency: +avg: 5.01 ms +p50: 2.00 ms +p95: 24.00 ms +p99: 34.00 ms +max: 146.00 ms diff --git a/results/1 - Scale Only App/Client - Frankfurt/client_latency.png b/results/1 - Scale Only App/Client - Frankfurt/client_latency.png new file mode 100644 index 0000000..05ca3e4 Binary files /dev/null and b/results/1 - Scale Only App/Client - Frankfurt/client_latency.png differ diff --git a/results/1 - Scale Only App/Client - Frankfurt/client_processing.py b/results/1 - Scale Only App/Client - Frankfurt/client_processing.py new file mode 100644 index 0000000..a652ccd --- /dev/null +++ b/results/1 - Scale Only App/Client - Frankfurt/client_processing.py @@ -0,0 +1,102 @@ +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.dates as mdates + +# Read the CSV file +def read_csv_data(file_path): + """ + Read CSV file with request data + + Parameters: + ----------- + file_path : str + Path to the CSV file + + Returns: + -------- + pandas.DataFrame + DataFrame with parsed request data + """ + # Read the CSV file + df = pd.read_csv(file_path, parse_dates=['Timestamp']) + + # Ensure data types are correct + df['Duration (ms)'] = df['Duration (ms)'].astype(int) + df['Status Code'] = df['Status Code'].astype(int) + + return df + +# Visualize the data +def visualize_request_data(df): + """ + Create a dual-axis plot of request durations and status codes + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + # Create the figure and the first axis + fig, ax1 = plt.subplots(figsize=(12, 6)) + + # Plot Duration on the left axis + color1 = 'blue' + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Duration (ms)', color=color1) + ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)') + ax1.tick_params(axis='y', labelcolor=color1) + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S')) + + # Title and legend + plt.title('Request Data over Time') + + # Create a legend + lines1, labels1 = ax1.get_legend_handles_labels() + + # Add grid + ax1.grid(True, linestyle='--', alpha=0.7) + + # Tight layout to prevent cutting off labels + plt.tight_layout() + + # Show the plot + plt.show() + +# Main execution +def main(): + # Path to your CSV file + file_path = 'client_metrics.csv' + + try: + # Read the data + df = read_csv_data(file_path) + + # Visualize the data + visualize_request_data(df) + + except Exception as e: + print(f"An error occurred: {e}") + +# Demonstrate data summary +def print_data_summary(df): + """ + Print a summary of the request data + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + print("\nData Summary:") + print("-" * 20) + print(f"Total Requests: {len(df)}") + print(f"Unique Request Types: {df['Request Type'].unique()}") + + print("\nDuration Statistics:") + print(df['Duration (ms)'].describe()) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/results/1 - Scale Only App/Client - Frankfurt/server_latency.png b/results/1 - Scale Only App/Client - Frankfurt/server_latency.png new file mode 100644 index 0000000..58de289 Binary files /dev/null and b/results/1 - Scale Only App/Client - Frankfurt/server_latency.png differ diff --git a/results/1 - Scale Only App/Client - Frankfurt/server_processing.py b/results/1 - Scale Only App/Client - Frankfurt/server_processing.py new file mode 100644 index 0000000..b706311 --- /dev/null +++ b/results/1 - Scale Only App/Client - Frankfurt/server_processing.py @@ -0,0 +1,181 @@ +import pandas as pd +import matplotlib.pyplot as plt + +def read_csv_with_fallback(file_path): + """ + Attempt to read CSV file with multiple parsing strategies. + + Args: + file_path (str): Path to the CSV file + + Returns: + pandas.DataFrame: Parsed DataFrame + """ + try: + # First, try reading with header + try: + df = pd.read_csv(file_path, + dtype={'timestamp': str}, # Ensure timestamp is read as string + skipinitialspace=True, + skip_blank_lines=True) + except Exception: + # If that fails, try reading without header and specify column names + df = pd.read_csv(file_path, + names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time', + 'db_rows_read', 'db_rows_written', 'db_total_rows', + 'cache_hits', 'cache_misses'], + header=None, + dtype={'timestamp': str}, + skipinitialspace=True, + skip_blank_lines=True) + + # Remove any rows where timestamp is 'timestamp' + df = df[df['timestamp'] != 'timestamp'] + + # Convert timestamp to numeric + df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce') + + # Validate required columns + required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time'] + for col in required_columns: + if col not in df.columns: + raise ValueError(f"Missing required column: {col}") + + return df + + except Exception as e: + print(f"Error reading CSV: {e}") + print("Please check the file format and ensure it matches the expected structure.") + raise + +def convert_timestamps(df): + """ + Convert timestamps to datetime. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + pandas.DataFrame: DataFrame with converted timestamps + """ + # Convert millisecond timestamps to datetime + df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms') + + return df + +def analyze_latency_data(df): + """ + Calculate latency statistics. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + dict: Latency statistics + """ + # Calculate statistics + stats = { + 'overall': { + 'avg': df['service_time'].mean(), + 'p50': df['service_time'].quantile(0.5), + 'p95': df['service_time'].quantile(0.95), + 'p99': df['service_time'].quantile(0.99), + 'max': df['service_time'].max() + }, + 'db': { + 'avg': df['db_time'].mean(), + 'p50': df['db_time'].quantile(0.5), + 'p95': df['db_time'].quantile(0.95), + 'p99': df['db_time'].quantile(0.99), + 'max': df['db_time'].max() + }, + 'cache': { + 'avg': df['cache_time'].mean(), + 'p50': df['cache_time'].quantile(0.5), + 'p95': df['cache_time'].quantile(0.95), + 'p99': df['cache_time'].quantile(0.99), + 'max': df['cache_time'].max() + } + } + + return stats + +def plot_latency_graph(df): + """ + Create a multi-axis time series plot for latencies. + + Args: + df (pandas.DataFrame): DataFrame with timestamp and time columns + """ + plt.figure(figsize=(15, 7)) + + # Plot overall service time + plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue') + + # Create a twin axis for DB time + ax1 = plt.gca() + ax2 = ax1.twinx() + ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--') + + # Create a third axis for cache time + ax3 = ax1.twinx() + # Offset the third axis slightly to the right + ax3.spines['right'].set_position(('axes', 1.2)) + ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':') + + # Set labels and title + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Overall Service Time (ms)', color='blue') + ax2.set_ylabel('DB Time (ms)', color='red') + ax3.set_ylabel('Cache Time (ms)', color='green') + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + plt.title('Latency Breakdown Over Time') + + # Combine legends + lines1, labels1 = ax1.get_legend_handles_labels() + lines2, labels2 = ax2.get_legend_handles_labels() + lines3, labels3 = ax3.get_legend_handles_labels() + ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best') + + plt.tight_layout() + plt.show() + +def main(file_path='server_metrics.csv'): + """ + Main function to process and visualize server metrics. + + Args: + file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'. + """ + try: + # Read CSV file + df = read_csv_with_fallback(file_path) + + # Convert timestamps + df = convert_timestamps(df) + + # Sort by timestamp to ensure chronological order + df = df.sort_values('timestamp') + + # Analyze latency data + stats = analyze_latency_data(df) + + # Print statistics + print("Latency Statistics:") + for category, metrics in stats.items(): + print(f"\n{category.capitalize()} Latency:") + for metric, value in metrics.items(): + print(f"{metric}: {value:.2f} ms") + + # Plot the graph + plot_latency_graph(df) + + except FileNotFoundError: + print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.") + except Exception as e: + print(f"An error occurred: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/results/1 - Scale Only App/Client - Frankfurt/server_summary.txt b/results/1 - Scale Only App/Client - Frankfurt/server_summary.txt new file mode 100644 index 0000000..dddcb6f --- /dev/null +++ b/results/1 - Scale Only App/Client - Frankfurt/server_summary.txt @@ -0,0 +1,22 @@ +Latency Statistics: + +Overall Latency: +avg: 298.54 ms +p50: 312.00 ms +p95: 323.45 ms +p99: 382.00 ms +max: 442.00 ms + +Db Latency: +avg: 182.40 ms +p50: 208.00 ms +p95: 216.00 ms +p99: 268.76 ms +max: 339.00 ms + +Cache Latency: +avg: 21.11 ms +p50: 0.00 ms +p95: 126.00 ms +p99: 170.00 ms +max: 201.00 ms diff --git a/results/1 - Scale Only App/Client - Singapore/client_latency.png b/results/1 - Scale Only App/Client - Singapore/client_latency.png new file mode 100644 index 0000000..6737d6c Binary files /dev/null and b/results/1 - Scale Only App/Client - Singapore/client_latency.png differ diff --git a/results/1 - Scale Only App/Client - Singapore/client_processing.py b/results/1 - Scale Only App/Client - Singapore/client_processing.py new file mode 100644 index 0000000..a652ccd --- /dev/null +++ b/results/1 - Scale Only App/Client - Singapore/client_processing.py @@ -0,0 +1,102 @@ +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.dates as mdates + +# Read the CSV file +def read_csv_data(file_path): + """ + Read CSV file with request data + + Parameters: + ----------- + file_path : str + Path to the CSV file + + Returns: + -------- + pandas.DataFrame + DataFrame with parsed request data + """ + # Read the CSV file + df = pd.read_csv(file_path, parse_dates=['Timestamp']) + + # Ensure data types are correct + df['Duration (ms)'] = df['Duration (ms)'].astype(int) + df['Status Code'] = df['Status Code'].astype(int) + + return df + +# Visualize the data +def visualize_request_data(df): + """ + Create a dual-axis plot of request durations and status codes + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + # Create the figure and the first axis + fig, ax1 = plt.subplots(figsize=(12, 6)) + + # Plot Duration on the left axis + color1 = 'blue' + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Duration (ms)', color=color1) + ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)') + ax1.tick_params(axis='y', labelcolor=color1) + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S')) + + # Title and legend + plt.title('Request Data over Time') + + # Create a legend + lines1, labels1 = ax1.get_legend_handles_labels() + + # Add grid + ax1.grid(True, linestyle='--', alpha=0.7) + + # Tight layout to prevent cutting off labels + plt.tight_layout() + + # Show the plot + plt.show() + +# Main execution +def main(): + # Path to your CSV file + file_path = 'client_metrics.csv' + + try: + # Read the data + df = read_csv_data(file_path) + + # Visualize the data + visualize_request_data(df) + + except Exception as e: + print(f"An error occurred: {e}") + +# Demonstrate data summary +def print_data_summary(df): + """ + Print a summary of the request data + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + print("\nData Summary:") + print("-" * 20) + print(f"Total Requests: {len(df)}") + print(f"Unique Request Types: {df['Request Type'].unique()}") + + print("\nDuration Statistics:") + print(df['Duration (ms)'].describe()) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/results/1 - Scale Only App/Client - Singapore/server_latency.png b/results/1 - Scale Only App/Client - Singapore/server_latency.png new file mode 100644 index 0000000..c97b2bc Binary files /dev/null and b/results/1 - Scale Only App/Client - Singapore/server_latency.png differ diff --git a/results/1 - Scale Only App/Client - Singapore/server_processing.py b/results/1 - Scale Only App/Client - Singapore/server_processing.py new file mode 100644 index 0000000..b706311 --- /dev/null +++ b/results/1 - Scale Only App/Client - Singapore/server_processing.py @@ -0,0 +1,181 @@ +import pandas as pd +import matplotlib.pyplot as plt + +def read_csv_with_fallback(file_path): + """ + Attempt to read CSV file with multiple parsing strategies. + + Args: + file_path (str): Path to the CSV file + + Returns: + pandas.DataFrame: Parsed DataFrame + """ + try: + # First, try reading with header + try: + df = pd.read_csv(file_path, + dtype={'timestamp': str}, # Ensure timestamp is read as string + skipinitialspace=True, + skip_blank_lines=True) + except Exception: + # If that fails, try reading without header and specify column names + df = pd.read_csv(file_path, + names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time', + 'db_rows_read', 'db_rows_written', 'db_total_rows', + 'cache_hits', 'cache_misses'], + header=None, + dtype={'timestamp': str}, + skipinitialspace=True, + skip_blank_lines=True) + + # Remove any rows where timestamp is 'timestamp' + df = df[df['timestamp'] != 'timestamp'] + + # Convert timestamp to numeric + df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce') + + # Validate required columns + required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time'] + for col in required_columns: + if col not in df.columns: + raise ValueError(f"Missing required column: {col}") + + return df + + except Exception as e: + print(f"Error reading CSV: {e}") + print("Please check the file format and ensure it matches the expected structure.") + raise + +def convert_timestamps(df): + """ + Convert timestamps to datetime. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + pandas.DataFrame: DataFrame with converted timestamps + """ + # Convert millisecond timestamps to datetime + df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms') + + return df + +def analyze_latency_data(df): + """ + Calculate latency statistics. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + dict: Latency statistics + """ + # Calculate statistics + stats = { + 'overall': { + 'avg': df['service_time'].mean(), + 'p50': df['service_time'].quantile(0.5), + 'p95': df['service_time'].quantile(0.95), + 'p99': df['service_time'].quantile(0.99), + 'max': df['service_time'].max() + }, + 'db': { + 'avg': df['db_time'].mean(), + 'p50': df['db_time'].quantile(0.5), + 'p95': df['db_time'].quantile(0.95), + 'p99': df['db_time'].quantile(0.99), + 'max': df['db_time'].max() + }, + 'cache': { + 'avg': df['cache_time'].mean(), + 'p50': df['cache_time'].quantile(0.5), + 'p95': df['cache_time'].quantile(0.95), + 'p99': df['cache_time'].quantile(0.99), + 'max': df['cache_time'].max() + } + } + + return stats + +def plot_latency_graph(df): + """ + Create a multi-axis time series plot for latencies. + + Args: + df (pandas.DataFrame): DataFrame with timestamp and time columns + """ + plt.figure(figsize=(15, 7)) + + # Plot overall service time + plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue') + + # Create a twin axis for DB time + ax1 = plt.gca() + ax2 = ax1.twinx() + ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--') + + # Create a third axis for cache time + ax3 = ax1.twinx() + # Offset the third axis slightly to the right + ax3.spines['right'].set_position(('axes', 1.2)) + ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':') + + # Set labels and title + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Overall Service Time (ms)', color='blue') + ax2.set_ylabel('DB Time (ms)', color='red') + ax3.set_ylabel('Cache Time (ms)', color='green') + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + plt.title('Latency Breakdown Over Time') + + # Combine legends + lines1, labels1 = ax1.get_legend_handles_labels() + lines2, labels2 = ax2.get_legend_handles_labels() + lines3, labels3 = ax3.get_legend_handles_labels() + ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best') + + plt.tight_layout() + plt.show() + +def main(file_path='server_metrics.csv'): + """ + Main function to process and visualize server metrics. + + Args: + file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'. + """ + try: + # Read CSV file + df = read_csv_with_fallback(file_path) + + # Convert timestamps + df = convert_timestamps(df) + + # Sort by timestamp to ensure chronological order + df = df.sort_values('timestamp') + + # Analyze latency data + stats = analyze_latency_data(df) + + # Print statistics + print("Latency Statistics:") + for category, metrics in stats.items(): + print(f"\n{category.capitalize()} Latency:") + for metric, value in metrics.items(): + print(f"{metric}: {value:.2f} ms") + + # Plot the graph + plot_latency_graph(df) + + except FileNotFoundError: + print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.") + except Exception as e: + print(f"An error occurred: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/results/1 - Scale Only App/Client - Singapore/server_summary.txt b/results/1 - Scale Only App/Client - Singapore/server_summary.txt new file mode 100644 index 0000000..d4e7ed7 --- /dev/null +++ b/results/1 - Scale Only App/Client - Singapore/server_summary.txt @@ -0,0 +1,22 @@ +Latency Statistics: + +Overall Latency: +avg: 6068.11 ms +p50: 4711.00 ms +p95: 8681.00 ms +p99: 8707.00 ms +max: 8975.00 ms + +Db Latency: +avg: 230.91 ms +p50: 238.00 ms +p95: 504.00 ms +p99: 668.74 ms +max: 985.00 ms + +Cache Latency: +avg: 3104.81 ms +p50: 4191.00 ms +p95: 4233.00 ms +p99: 4258.87 ms +max: 4664.00 ms diff --git a/results/2 - Scale App and Cache/Client - Frankfurt/client_latency.png b/results/2 - Scale App and Cache/Client - Frankfurt/client_latency.png new file mode 100644 index 0000000..532e121 Binary files /dev/null and b/results/2 - Scale App and Cache/Client - Frankfurt/client_latency.png differ diff --git a/results/2 - Scale App and Cache/Client - Frankfurt/client_processing.py b/results/2 - Scale App and Cache/Client - Frankfurt/client_processing.py new file mode 100644 index 0000000..a652ccd --- /dev/null +++ b/results/2 - Scale App and Cache/Client - Frankfurt/client_processing.py @@ -0,0 +1,102 @@ +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.dates as mdates + +# Read the CSV file +def read_csv_data(file_path): + """ + Read CSV file with request data + + Parameters: + ----------- + file_path : str + Path to the CSV file + + Returns: + -------- + pandas.DataFrame + DataFrame with parsed request data + """ + # Read the CSV file + df = pd.read_csv(file_path, parse_dates=['Timestamp']) + + # Ensure data types are correct + df['Duration (ms)'] = df['Duration (ms)'].astype(int) + df['Status Code'] = df['Status Code'].astype(int) + + return df + +# Visualize the data +def visualize_request_data(df): + """ + Create a dual-axis plot of request durations and status codes + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + # Create the figure and the first axis + fig, ax1 = plt.subplots(figsize=(12, 6)) + + # Plot Duration on the left axis + color1 = 'blue' + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Duration (ms)', color=color1) + ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)') + ax1.tick_params(axis='y', labelcolor=color1) + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S')) + + # Title and legend + plt.title('Request Data over Time') + + # Create a legend + lines1, labels1 = ax1.get_legend_handles_labels() + + # Add grid + ax1.grid(True, linestyle='--', alpha=0.7) + + # Tight layout to prevent cutting off labels + plt.tight_layout() + + # Show the plot + plt.show() + +# Main execution +def main(): + # Path to your CSV file + file_path = 'client_metrics.csv' + + try: + # Read the data + df = read_csv_data(file_path) + + # Visualize the data + visualize_request_data(df) + + except Exception as e: + print(f"An error occurred: {e}") + +# Demonstrate data summary +def print_data_summary(df): + """ + Print a summary of the request data + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + print("\nData Summary:") + print("-" * 20) + print(f"Total Requests: {len(df)}") + print(f"Unique Request Types: {df['Request Type'].unique()}") + + print("\nDuration Statistics:") + print(df['Duration (ms)'].describe()) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/results/2 - Scale App and Cache/Client - Frankfurt/server_latency.png b/results/2 - Scale App and Cache/Client - Frankfurt/server_latency.png new file mode 100644 index 0000000..0c6af15 Binary files /dev/null and b/results/2 - Scale App and Cache/Client - Frankfurt/server_latency.png differ diff --git a/results/2 - Scale App and Cache/Client - Frankfurt/server_processing.py b/results/2 - Scale App and Cache/Client - Frankfurt/server_processing.py new file mode 100644 index 0000000..b706311 --- /dev/null +++ b/results/2 - Scale App and Cache/Client - Frankfurt/server_processing.py @@ -0,0 +1,181 @@ +import pandas as pd +import matplotlib.pyplot as plt + +def read_csv_with_fallback(file_path): + """ + Attempt to read CSV file with multiple parsing strategies. + + Args: + file_path (str): Path to the CSV file + + Returns: + pandas.DataFrame: Parsed DataFrame + """ + try: + # First, try reading with header + try: + df = pd.read_csv(file_path, + dtype={'timestamp': str}, # Ensure timestamp is read as string + skipinitialspace=True, + skip_blank_lines=True) + except Exception: + # If that fails, try reading without header and specify column names + df = pd.read_csv(file_path, + names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time', + 'db_rows_read', 'db_rows_written', 'db_total_rows', + 'cache_hits', 'cache_misses'], + header=None, + dtype={'timestamp': str}, + skipinitialspace=True, + skip_blank_lines=True) + + # Remove any rows where timestamp is 'timestamp' + df = df[df['timestamp'] != 'timestamp'] + + # Convert timestamp to numeric + df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce') + + # Validate required columns + required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time'] + for col in required_columns: + if col not in df.columns: + raise ValueError(f"Missing required column: {col}") + + return df + + except Exception as e: + print(f"Error reading CSV: {e}") + print("Please check the file format and ensure it matches the expected structure.") + raise + +def convert_timestamps(df): + """ + Convert timestamps to datetime. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + pandas.DataFrame: DataFrame with converted timestamps + """ + # Convert millisecond timestamps to datetime + df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms') + + return df + +def analyze_latency_data(df): + """ + Calculate latency statistics. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + dict: Latency statistics + """ + # Calculate statistics + stats = { + 'overall': { + 'avg': df['service_time'].mean(), + 'p50': df['service_time'].quantile(0.5), + 'p95': df['service_time'].quantile(0.95), + 'p99': df['service_time'].quantile(0.99), + 'max': df['service_time'].max() + }, + 'db': { + 'avg': df['db_time'].mean(), + 'p50': df['db_time'].quantile(0.5), + 'p95': df['db_time'].quantile(0.95), + 'p99': df['db_time'].quantile(0.99), + 'max': df['db_time'].max() + }, + 'cache': { + 'avg': df['cache_time'].mean(), + 'p50': df['cache_time'].quantile(0.5), + 'p95': df['cache_time'].quantile(0.95), + 'p99': df['cache_time'].quantile(0.99), + 'max': df['cache_time'].max() + } + } + + return stats + +def plot_latency_graph(df): + """ + Create a multi-axis time series plot for latencies. + + Args: + df (pandas.DataFrame): DataFrame with timestamp and time columns + """ + plt.figure(figsize=(15, 7)) + + # Plot overall service time + plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue') + + # Create a twin axis for DB time + ax1 = plt.gca() + ax2 = ax1.twinx() + ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--') + + # Create a third axis for cache time + ax3 = ax1.twinx() + # Offset the third axis slightly to the right + ax3.spines['right'].set_position(('axes', 1.2)) + ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':') + + # Set labels and title + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Overall Service Time (ms)', color='blue') + ax2.set_ylabel('DB Time (ms)', color='red') + ax3.set_ylabel('Cache Time (ms)', color='green') + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + plt.title('Latency Breakdown Over Time') + + # Combine legends + lines1, labels1 = ax1.get_legend_handles_labels() + lines2, labels2 = ax2.get_legend_handles_labels() + lines3, labels3 = ax3.get_legend_handles_labels() + ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best') + + plt.tight_layout() + plt.show() + +def main(file_path='server_metrics.csv'): + """ + Main function to process and visualize server metrics. + + Args: + file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'. + """ + try: + # Read CSV file + df = read_csv_with_fallback(file_path) + + # Convert timestamps + df = convert_timestamps(df) + + # Sort by timestamp to ensure chronological order + df = df.sort_values('timestamp') + + # Analyze latency data + stats = analyze_latency_data(df) + + # Print statistics + print("Latency Statistics:") + for category, metrics in stats.items(): + print(f"\n{category.capitalize()} Latency:") + for metric, value in metrics.items(): + print(f"{metric}: {value:.2f} ms") + + # Plot the graph + plot_latency_graph(df) + + except FileNotFoundError: + print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.") + except Exception as e: + print(f"An error occurred: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/results/2 - Scale App and Cache/Client - Frankfurt/server_summary.txt b/results/2 - Scale App and Cache/Client - Frankfurt/server_summary.txt new file mode 100644 index 0000000..8d202a7 --- /dev/null +++ b/results/2 - Scale App and Cache/Client - Frankfurt/server_summary.txt @@ -0,0 +1,22 @@ +Latency Statistics: + +Overall Latency: +avg: 235.42 ms +p50: 236.00 ms +p95: 319.00 ms +p99: 352.94 ms +max: 445.00 ms + +Db Latency: +avg: 142.14 ms +p50: 112.00 ms +p95: 215.00 ms +p99: 249.82 ms +max: 309.00 ms + +Cache Latency: +avg: 4.52 ms +p50: 0.00 ms +p95: 24.00 ms +p99: 45.00 ms +max: 84.00 ms diff --git a/results/2 - Scale App and Cache/Client - Singapore/client_latency.png b/results/2 - Scale App and Cache/Client - Singapore/client_latency.png new file mode 100644 index 0000000..f701372 Binary files /dev/null and b/results/2 - Scale App and Cache/Client - Singapore/client_latency.png differ diff --git a/results/2 - Scale App and Cache/Client - Singapore/client_processing.py b/results/2 - Scale App and Cache/Client - Singapore/client_processing.py new file mode 100644 index 0000000..a652ccd --- /dev/null +++ b/results/2 - Scale App and Cache/Client - Singapore/client_processing.py @@ -0,0 +1,102 @@ +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.dates as mdates + +# Read the CSV file +def read_csv_data(file_path): + """ + Read CSV file with request data + + Parameters: + ----------- + file_path : str + Path to the CSV file + + Returns: + -------- + pandas.DataFrame + DataFrame with parsed request data + """ + # Read the CSV file + df = pd.read_csv(file_path, parse_dates=['Timestamp']) + + # Ensure data types are correct + df['Duration (ms)'] = df['Duration (ms)'].astype(int) + df['Status Code'] = df['Status Code'].astype(int) + + return df + +# Visualize the data +def visualize_request_data(df): + """ + Create a dual-axis plot of request durations and status codes + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + # Create the figure and the first axis + fig, ax1 = plt.subplots(figsize=(12, 6)) + + # Plot Duration on the left axis + color1 = 'blue' + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Duration (ms)', color=color1) + ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)') + ax1.tick_params(axis='y', labelcolor=color1) + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S')) + + # Title and legend + plt.title('Request Data over Time') + + # Create a legend + lines1, labels1 = ax1.get_legend_handles_labels() + + # Add grid + ax1.grid(True, linestyle='--', alpha=0.7) + + # Tight layout to prevent cutting off labels + plt.tight_layout() + + # Show the plot + plt.show() + +# Main execution +def main(): + # Path to your CSV file + file_path = 'client_metrics.csv' + + try: + # Read the data + df = read_csv_data(file_path) + + # Visualize the data + visualize_request_data(df) + + except Exception as e: + print(f"An error occurred: {e}") + +# Demonstrate data summary +def print_data_summary(df): + """ + Print a summary of the request data + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + print("\nData Summary:") + print("-" * 20) + print(f"Total Requests: {len(df)}") + print(f"Unique Request Types: {df['Request Type'].unique()}") + + print("\nDuration Statistics:") + print(df['Duration (ms)'].describe()) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/results/2 - Scale App and Cache/Client - Singapore/server_latency.png b/results/2 - Scale App and Cache/Client - Singapore/server_latency.png new file mode 100644 index 0000000..5b9da35 Binary files /dev/null and b/results/2 - Scale App and Cache/Client - Singapore/server_latency.png differ diff --git a/results/2 - Scale App and Cache/Client - Singapore/server_processing.py b/results/2 - Scale App and Cache/Client - Singapore/server_processing.py new file mode 100644 index 0000000..b706311 --- /dev/null +++ b/results/2 - Scale App and Cache/Client - Singapore/server_processing.py @@ -0,0 +1,181 @@ +import pandas as pd +import matplotlib.pyplot as plt + +def read_csv_with_fallback(file_path): + """ + Attempt to read CSV file with multiple parsing strategies. + + Args: + file_path (str): Path to the CSV file + + Returns: + pandas.DataFrame: Parsed DataFrame + """ + try: + # First, try reading with header + try: + df = pd.read_csv(file_path, + dtype={'timestamp': str}, # Ensure timestamp is read as string + skipinitialspace=True, + skip_blank_lines=True) + except Exception: + # If that fails, try reading without header and specify column names + df = pd.read_csv(file_path, + names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time', + 'db_rows_read', 'db_rows_written', 'db_total_rows', + 'cache_hits', 'cache_misses'], + header=None, + dtype={'timestamp': str}, + skipinitialspace=True, + skip_blank_lines=True) + + # Remove any rows where timestamp is 'timestamp' + df = df[df['timestamp'] != 'timestamp'] + + # Convert timestamp to numeric + df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce') + + # Validate required columns + required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time'] + for col in required_columns: + if col not in df.columns: + raise ValueError(f"Missing required column: {col}") + + return df + + except Exception as e: + print(f"Error reading CSV: {e}") + print("Please check the file format and ensure it matches the expected structure.") + raise + +def convert_timestamps(df): + """ + Convert timestamps to datetime. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + pandas.DataFrame: DataFrame with converted timestamps + """ + # Convert millisecond timestamps to datetime + df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms') + + return df + +def analyze_latency_data(df): + """ + Calculate latency statistics. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + dict: Latency statistics + """ + # Calculate statistics + stats = { + 'overall': { + 'avg': df['service_time'].mean(), + 'p50': df['service_time'].quantile(0.5), + 'p95': df['service_time'].quantile(0.95), + 'p99': df['service_time'].quantile(0.99), + 'max': df['service_time'].max() + }, + 'db': { + 'avg': df['db_time'].mean(), + 'p50': df['db_time'].quantile(0.5), + 'p95': df['db_time'].quantile(0.95), + 'p99': df['db_time'].quantile(0.99), + 'max': df['db_time'].max() + }, + 'cache': { + 'avg': df['cache_time'].mean(), + 'p50': df['cache_time'].quantile(0.5), + 'p95': df['cache_time'].quantile(0.95), + 'p99': df['cache_time'].quantile(0.99), + 'max': df['cache_time'].max() + } + } + + return stats + +def plot_latency_graph(df): + """ + Create a multi-axis time series plot for latencies. + + Args: + df (pandas.DataFrame): DataFrame with timestamp and time columns + """ + plt.figure(figsize=(15, 7)) + + # Plot overall service time + plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue') + + # Create a twin axis for DB time + ax1 = plt.gca() + ax2 = ax1.twinx() + ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--') + + # Create a third axis for cache time + ax3 = ax1.twinx() + # Offset the third axis slightly to the right + ax3.spines['right'].set_position(('axes', 1.2)) + ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':') + + # Set labels and title + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Overall Service Time (ms)', color='blue') + ax2.set_ylabel('DB Time (ms)', color='red') + ax3.set_ylabel('Cache Time (ms)', color='green') + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + plt.title('Latency Breakdown Over Time') + + # Combine legends + lines1, labels1 = ax1.get_legend_handles_labels() + lines2, labels2 = ax2.get_legend_handles_labels() + lines3, labels3 = ax3.get_legend_handles_labels() + ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best') + + plt.tight_layout() + plt.show() + +def main(file_path='server_metrics.csv'): + """ + Main function to process and visualize server metrics. + + Args: + file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'. + """ + try: + # Read CSV file + df = read_csv_with_fallback(file_path) + + # Convert timestamps + df = convert_timestamps(df) + + # Sort by timestamp to ensure chronological order + df = df.sort_values('timestamp') + + # Analyze latency data + stats = analyze_latency_data(df) + + # Print statistics + print("Latency Statistics:") + for category, metrics in stats.items(): + print(f"\n{category.capitalize()} Latency:") + for metric, value in metrics.items(): + print(f"{metric}: {value:.2f} ms") + + # Plot the graph + plot_latency_graph(df) + + except FileNotFoundError: + print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.") + except Exception as e: + print(f"An error occurred: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/results/2 - Scale App and Cache/Client - Singapore/server_summary.txt b/results/2 - Scale App and Cache/Client - Singapore/server_summary.txt new file mode 100644 index 0000000..ceea51c --- /dev/null +++ b/results/2 - Scale App and Cache/Client - Singapore/server_summary.txt @@ -0,0 +1,22 @@ +Latency Statistics: + +Overall Latency: +avg: 387.27 ms +p50: 481.00 ms +p95: 758.00 ms +p99: 915.60 ms +max: 1259.00 ms + +Db Latency: +avg: 216.24 ms +p50: 241.00 ms +p95: 499.00 ms +p99: 675.00 ms +max: 1023.00 ms + +Cache Latency: +avg: 13.85 ms +p50: 1.00 ms +p95: 77.00 ms +p99: 132.80 ms +max: 249.00 ms diff --git a/results/3 - Scale All The Things/Client - Frankfurt/client_latency.png b/results/3 - Scale All The Things/Client - Frankfurt/client_latency.png new file mode 100644 index 0000000..279abbb Binary files /dev/null and b/results/3 - Scale All The Things/Client - Frankfurt/client_latency.png differ diff --git a/results/3 - Scale All The Things/Client - Frankfurt/client_processing.py b/results/3 - Scale All The Things/Client - Frankfurt/client_processing.py new file mode 100644 index 0000000..a652ccd --- /dev/null +++ b/results/3 - Scale All The Things/Client - Frankfurt/client_processing.py @@ -0,0 +1,102 @@ +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.dates as mdates + +# Read the CSV file +def read_csv_data(file_path): + """ + Read CSV file with request data + + Parameters: + ----------- + file_path : str + Path to the CSV file + + Returns: + -------- + pandas.DataFrame + DataFrame with parsed request data + """ + # Read the CSV file + df = pd.read_csv(file_path, parse_dates=['Timestamp']) + + # Ensure data types are correct + df['Duration (ms)'] = df['Duration (ms)'].astype(int) + df['Status Code'] = df['Status Code'].astype(int) + + return df + +# Visualize the data +def visualize_request_data(df): + """ + Create a dual-axis plot of request durations and status codes + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + # Create the figure and the first axis + fig, ax1 = plt.subplots(figsize=(12, 6)) + + # Plot Duration on the left axis + color1 = 'blue' + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Duration (ms)', color=color1) + ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)') + ax1.tick_params(axis='y', labelcolor=color1) + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S')) + + # Title and legend + plt.title('Request Data over Time') + + # Create a legend + lines1, labels1 = ax1.get_legend_handles_labels() + + # Add grid + ax1.grid(True, linestyle='--', alpha=0.7) + + # Tight layout to prevent cutting off labels + plt.tight_layout() + + # Show the plot + plt.show() + +# Main execution +def main(): + # Path to your CSV file + file_path = 'client_metrics.csv' + + try: + # Read the data + df = read_csv_data(file_path) + + # Visualize the data + visualize_request_data(df) + + except Exception as e: + print(f"An error occurred: {e}") + +# Demonstrate data summary +def print_data_summary(df): + """ + Print a summary of the request data + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + print("\nData Summary:") + print("-" * 20) + print(f"Total Requests: {len(df)}") + print(f"Unique Request Types: {df['Request Type'].unique()}") + + print("\nDuration Statistics:") + print(df['Duration (ms)'].describe()) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/results/3 - Scale All The Things/Client - Frankfurt/server_latency.png b/results/3 - Scale All The Things/Client - Frankfurt/server_latency.png new file mode 100644 index 0000000..62e2a01 Binary files /dev/null and b/results/3 - Scale All The Things/Client - Frankfurt/server_latency.png differ diff --git a/results/3 - Scale All The Things/Client - Frankfurt/server_processing.py b/results/3 - Scale All The Things/Client - Frankfurt/server_processing.py new file mode 100644 index 0000000..b706311 --- /dev/null +++ b/results/3 - Scale All The Things/Client - Frankfurt/server_processing.py @@ -0,0 +1,181 @@ +import pandas as pd +import matplotlib.pyplot as plt + +def read_csv_with_fallback(file_path): + """ + Attempt to read CSV file with multiple parsing strategies. + + Args: + file_path (str): Path to the CSV file + + Returns: + pandas.DataFrame: Parsed DataFrame + """ + try: + # First, try reading with header + try: + df = pd.read_csv(file_path, + dtype={'timestamp': str}, # Ensure timestamp is read as string + skipinitialspace=True, + skip_blank_lines=True) + except Exception: + # If that fails, try reading without header and specify column names + df = pd.read_csv(file_path, + names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time', + 'db_rows_read', 'db_rows_written', 'db_total_rows', + 'cache_hits', 'cache_misses'], + header=None, + dtype={'timestamp': str}, + skipinitialspace=True, + skip_blank_lines=True) + + # Remove any rows where timestamp is 'timestamp' + df = df[df['timestamp'] != 'timestamp'] + + # Convert timestamp to numeric + df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce') + + # Validate required columns + required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time'] + for col in required_columns: + if col not in df.columns: + raise ValueError(f"Missing required column: {col}") + + return df + + except Exception as e: + print(f"Error reading CSV: {e}") + print("Please check the file format and ensure it matches the expected structure.") + raise + +def convert_timestamps(df): + """ + Convert timestamps to datetime. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + pandas.DataFrame: DataFrame with converted timestamps + """ + # Convert millisecond timestamps to datetime + df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms') + + return df + +def analyze_latency_data(df): + """ + Calculate latency statistics. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + dict: Latency statistics + """ + # Calculate statistics + stats = { + 'overall': { + 'avg': df['service_time'].mean(), + 'p50': df['service_time'].quantile(0.5), + 'p95': df['service_time'].quantile(0.95), + 'p99': df['service_time'].quantile(0.99), + 'max': df['service_time'].max() + }, + 'db': { + 'avg': df['db_time'].mean(), + 'p50': df['db_time'].quantile(0.5), + 'p95': df['db_time'].quantile(0.95), + 'p99': df['db_time'].quantile(0.99), + 'max': df['db_time'].max() + }, + 'cache': { + 'avg': df['cache_time'].mean(), + 'p50': df['cache_time'].quantile(0.5), + 'p95': df['cache_time'].quantile(0.95), + 'p99': df['cache_time'].quantile(0.99), + 'max': df['cache_time'].max() + } + } + + return stats + +def plot_latency_graph(df): + """ + Create a multi-axis time series plot for latencies. + + Args: + df (pandas.DataFrame): DataFrame with timestamp and time columns + """ + plt.figure(figsize=(15, 7)) + + # Plot overall service time + plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue') + + # Create a twin axis for DB time + ax1 = plt.gca() + ax2 = ax1.twinx() + ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--') + + # Create a third axis for cache time + ax3 = ax1.twinx() + # Offset the third axis slightly to the right + ax3.spines['right'].set_position(('axes', 1.2)) + ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':') + + # Set labels and title + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Overall Service Time (ms)', color='blue') + ax2.set_ylabel('DB Time (ms)', color='red') + ax3.set_ylabel('Cache Time (ms)', color='green') + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + plt.title('Latency Breakdown Over Time') + + # Combine legends + lines1, labels1 = ax1.get_legend_handles_labels() + lines2, labels2 = ax2.get_legend_handles_labels() + lines3, labels3 = ax3.get_legend_handles_labels() + ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best') + + plt.tight_layout() + plt.show() + +def main(file_path='server_metrics.csv'): + """ + Main function to process and visualize server metrics. + + Args: + file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'. + """ + try: + # Read CSV file + df = read_csv_with_fallback(file_path) + + # Convert timestamps + df = convert_timestamps(df) + + # Sort by timestamp to ensure chronological order + df = df.sort_values('timestamp') + + # Analyze latency data + stats = analyze_latency_data(df) + + # Print statistics + print("Latency Statistics:") + for category, metrics in stats.items(): + print(f"\n{category.capitalize()} Latency:") + for metric, value in metrics.items(): + print(f"{metric}: {value:.2f} ms") + + # Plot the graph + plot_latency_graph(df) + + except FileNotFoundError: + print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.") + except Exception as e: + print(f"An error occurred: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/results/3 - Scale All The Things/Client - Frankfurt/server_summary.txt b/results/3 - Scale All The Things/Client - Frankfurt/server_summary.txt new file mode 100644 index 0000000..879cb9d --- /dev/null +++ b/results/3 - Scale All The Things/Client - Frankfurt/server_summary.txt @@ -0,0 +1,22 @@ +Latency Statistics: + +Overall Latency: +avg: 178.84 ms +p50: 108.00 ms +p95: 531.00 ms +p99: 605.00 ms +max: 800.00 ms + +Db Latency: +avg: 116.50 ms +p50: 4.00 ms +p95: 426.00 ms +p99: 500.89 ms +max: 696.00 ms + +Cache Latency: +avg: 9.48 ms +p50: 1.00 ms +p95: 49.00 ms +p99: 56.00 ms +max: 272.00 ms diff --git a/results/3 - Scale All The Things/Client - Singapore/Figure_1.png b/results/3 - Scale All The Things/Client - Singapore/Figure_1.png new file mode 100644 index 0000000..1411d57 Binary files /dev/null and b/results/3 - Scale All The Things/Client - Singapore/Figure_1.png differ diff --git a/results/3 - Scale All The Things/Client - Singapore/client_latency.png b/results/3 - Scale All The Things/Client - Singapore/client_latency.png new file mode 100644 index 0000000..2cab089 Binary files /dev/null and b/results/3 - Scale All The Things/Client - Singapore/client_latency.png differ diff --git a/results/3 - Scale All The Things/Client - Singapore/client_processing.py b/results/3 - Scale All The Things/Client - Singapore/client_processing.py new file mode 100644 index 0000000..a652ccd --- /dev/null +++ b/results/3 - Scale All The Things/Client - Singapore/client_processing.py @@ -0,0 +1,102 @@ +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.dates as mdates + +# Read the CSV file +def read_csv_data(file_path): + """ + Read CSV file with request data + + Parameters: + ----------- + file_path : str + Path to the CSV file + + Returns: + -------- + pandas.DataFrame + DataFrame with parsed request data + """ + # Read the CSV file + df = pd.read_csv(file_path, parse_dates=['Timestamp']) + + # Ensure data types are correct + df['Duration (ms)'] = df['Duration (ms)'].astype(int) + df['Status Code'] = df['Status Code'].astype(int) + + return df + +# Visualize the data +def visualize_request_data(df): + """ + Create a dual-axis plot of request durations and status codes + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + # Create the figure and the first axis + fig, ax1 = plt.subplots(figsize=(12, 6)) + + # Plot Duration on the left axis + color1 = 'blue' + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Duration (ms)', color=color1) + ax1.plot(df['Timestamp'], df['Duration (ms)'], color=color1, label='Duration (ms)') + ax1.tick_params(axis='y', labelcolor=color1) + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S')) + + # Title and legend + plt.title('Request Data over Time') + + # Create a legend + lines1, labels1 = ax1.get_legend_handles_labels() + + # Add grid + ax1.grid(True, linestyle='--', alpha=0.7) + + # Tight layout to prevent cutting off labels + plt.tight_layout() + + # Show the plot + plt.show() + +# Main execution +def main(): + # Path to your CSV file + file_path = 'client_metrics.csv' + + try: + # Read the data + df = read_csv_data(file_path) + + # Visualize the data + visualize_request_data(df) + + except Exception as e: + print(f"An error occurred: {e}") + +# Demonstrate data summary +def print_data_summary(df): + """ + Print a summary of the request data + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing request data + """ + print("\nData Summary:") + print("-" * 20) + print(f"Total Requests: {len(df)}") + print(f"Unique Request Types: {df['Request Type'].unique()}") + + print("\nDuration Statistics:") + print(df['Duration (ms)'].describe()) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/results/3 - Scale All The Things/Client - Singapore/server_processing.py b/results/3 - Scale All The Things/Client - Singapore/server_processing.py new file mode 100644 index 0000000..b706311 --- /dev/null +++ b/results/3 - Scale All The Things/Client - Singapore/server_processing.py @@ -0,0 +1,181 @@ +import pandas as pd +import matplotlib.pyplot as plt + +def read_csv_with_fallback(file_path): + """ + Attempt to read CSV file with multiple parsing strategies. + + Args: + file_path (str): Path to the CSV file + + Returns: + pandas.DataFrame: Parsed DataFrame + """ + try: + # First, try reading with header + try: + df = pd.read_csv(file_path, + dtype={'timestamp': str}, # Ensure timestamp is read as string + skipinitialspace=True, + skip_blank_lines=True) + except Exception: + # If that fails, try reading without header and specify column names + df = pd.read_csv(file_path, + names=['session_id', 'timestamp', 'service_time', 'db_time', 'cache_time', + 'db_rows_read', 'db_rows_written', 'db_total_rows', + 'cache_hits', 'cache_misses'], + header=None, + dtype={'timestamp': str}, + skipinitialspace=True, + skip_blank_lines=True) + + # Remove any rows where timestamp is 'timestamp' + df = df[df['timestamp'] != 'timestamp'] + + # Convert timestamp to numeric + df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce') + + # Validate required columns + required_columns = ['timestamp', 'service_time', 'db_time', 'cache_time'] + for col in required_columns: + if col not in df.columns: + raise ValueError(f"Missing required column: {col}") + + return df + + except Exception as e: + print(f"Error reading CSV: {e}") + print("Please check the file format and ensure it matches the expected structure.") + raise + +def convert_timestamps(df): + """ + Convert timestamps to datetime. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + pandas.DataFrame: DataFrame with converted timestamps + """ + # Convert millisecond timestamps to datetime + df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms') + + return df + +def analyze_latency_data(df): + """ + Calculate latency statistics. + + Args: + df (pandas.DataFrame): Input DataFrame + + Returns: + dict: Latency statistics + """ + # Calculate statistics + stats = { + 'overall': { + 'avg': df['service_time'].mean(), + 'p50': df['service_time'].quantile(0.5), + 'p95': df['service_time'].quantile(0.95), + 'p99': df['service_time'].quantile(0.99), + 'max': df['service_time'].max() + }, + 'db': { + 'avg': df['db_time'].mean(), + 'p50': df['db_time'].quantile(0.5), + 'p95': df['db_time'].quantile(0.95), + 'p99': df['db_time'].quantile(0.99), + 'max': df['db_time'].max() + }, + 'cache': { + 'avg': df['cache_time'].mean(), + 'p50': df['cache_time'].quantile(0.5), + 'p95': df['cache_time'].quantile(0.95), + 'p99': df['cache_time'].quantile(0.99), + 'max': df['cache_time'].max() + } + } + + return stats + +def plot_latency_graph(df): + """ + Create a multi-axis time series plot for latencies. + + Args: + df (pandas.DataFrame): DataFrame with timestamp and time columns + """ + plt.figure(figsize=(15, 7)) + + # Plot overall service time + plt.plot(df['timestamp'], df['service_time'], label='Overall Service Time', color='blue') + + # Create a twin axis for DB time + ax1 = plt.gca() + ax2 = ax1.twinx() + ax2.plot(df['timestamp'], df['db_time'], label='DB Time', color='red', linestyle='--') + + # Create a third axis for cache time + ax3 = ax1.twinx() + # Offset the third axis slightly to the right + ax3.spines['right'].set_position(('axes', 1.2)) + ax3.plot(df['timestamp'], df['cache_time'], label='Cache Time', color='green', linestyle=':') + + # Set labels and title + ax1.set_xlabel('Timestamp') + ax1.set_ylabel('Overall Service Time (ms)', color='blue') + ax2.set_ylabel('DB Time (ms)', color='red') + ax3.set_ylabel('Cache Time (ms)', color='green') + + # Format x-axis to show timestamps nicely + plt.gcf().autofmt_xdate() + plt.title('Latency Breakdown Over Time') + + # Combine legends + lines1, labels1 = ax1.get_legend_handles_labels() + lines2, labels2 = ax2.get_legend_handles_labels() + lines3, labels3 = ax3.get_legend_handles_labels() + ax1.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc='best') + + plt.tight_layout() + plt.show() + +def main(file_path='server_metrics.csv'): + """ + Main function to process and visualize server metrics. + + Args: + file_path (str, optional): Path to the CSV file. Defaults to 'server_metrics.csv'. + """ + try: + # Read CSV file + df = read_csv_with_fallback(file_path) + + # Convert timestamps + df = convert_timestamps(df) + + # Sort by timestamp to ensure chronological order + df = df.sort_values('timestamp') + + # Analyze latency data + stats = analyze_latency_data(df) + + # Print statistics + print("Latency Statistics:") + for category, metrics in stats.items(): + print(f"\n{category.capitalize()} Latency:") + for metric, value in metrics.items(): + print(f"{metric}: {value:.2f} ms") + + # Plot the graph + plot_latency_graph(df) + + except FileNotFoundError: + print(f"Error: File '{file_path}' not found. Please ensure the CSV file exists in the same directory.") + except Exception as e: + print(f"An error occurred: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/results/3 - Scale All The Things/Client - Singapore/server_summary.txt b/results/3 - Scale All The Things/Client - Singapore/server_summary.txt new file mode 100644 index 0000000..4734a50 --- /dev/null +++ b/results/3 - Scale All The Things/Client - Singapore/server_summary.txt @@ -0,0 +1,22 @@ +Latency Statistics: + +Overall Latency: +avg: 506.30 ms +p50: 245.00 ms +p95: 1231.00 ms +p99: 1479.34 ms +max: 2964.00 ms + +Db Latency: +avg: 322.60 ms +p50: 7.00 ms +p95: 982.35 ms +p99: 1239.68 ms +max: 2711.00 ms + +Cache Latency: +avg: 10.99 ms +p50: 1.00 ms +p95: 65.00 ms +p99: 141.00 ms +max: 256.00 ms