Source code for QhX.output

# pylint: disable=R0801
import math
import pandas as pd
import numpy as np






[docs]
def flatten_detected_periods(detected_periods):
    """Flatten the nested list of dictionaries and insert NaN for empty lists."""
    flat_list = []
    for sublist in detected_periods:
        if sublist:  # Check if the sublist is not empty
            flat_list.extend(sublist)
        else:
            # Append a dictionary with NaN values if the sublist is empty
            flat_list.append({key: np.nan for key in ['objectid', 'sampling_i', 'sampling_j', 'period', 'upper_error', 'lower_error', 'significance', 'label']})
    return flat_list




[docs]
def classify_periods(detected_periods):
    """
    Calculates IoU and compile other metrics (low errors,  upper errors, significance of detected period, and band pairs) for each quasar ID.
    It computes Intersection Over Union (IoU) and other relevant metrics for each quasar ID based
    on detected periods in different band pairs, while preserving NaN values.

    Parameters:
    -----------
    detected_periods (list of dict): List of dictionaries containing detected period data.

    Returns:
    --------
    pd.DataFrame: DataFrame containing the classification results.
    """
    # Flatten the list of dictionaries
    flat_list = flatten_detected_periods(detected_periods)

    # Convert flattened list to DataFrame
    df = pd.DataFrame(flat_list)
    def calculate_iou(radius1, radius2, distance):
        """
        Calculates the Intersection over Union (IoU) for two circles given their radii and the distance between their centers.

        Parameters:
        -----------
        radius1 (float): Radius of the first circle.
        radius2 (float): Radius of the second circle.
        distance (float): Distance between the centers of the two circles.

        Returns:
        --------
        float: IoU value.
        """
        if distance > (radius1 + radius2):
            return 0
        elif distance <= abs(radius1 - radius2):
            return 1
        else:
            area1 = math.pi * radius1**2
            area2 = math.pi * radius2**2
            d = distance

            # Calculate intersection area
            part1 = math.acos((radius1**2 + d**2 - radius2**2) / (2 * radius1 * d))
            part2 = math.acos((radius2**2 + d**2 - radius1**2) / (2 * radius2 * d))
            intersection = part1 * radius1**2 + part2 * radius2**2 - 0.5 * (radius1**2 * math.sin(2 * part1) + radius2**2 * math.sin(2 * part2))

            union = area1 + area2 - intersection
            return intersection / union

    # Initialize list to hold DataFrame rows
    rows_list = []

    # Process each unique quasar ID
    for name in df['objectid'].unique():
        quasar_data = df[df['objectid'] == name]

        for i in range(len(quasar_data)):
            for j in range(i + 1, len(quasar_data)):
                row_i = quasar_data.iloc[i]
                row_j = quasar_data.iloc[j]

                # Initialize IoU and period difference
                iou, period_diff = np.nan, np.nan

                # Check if all necessary values are present to calculate IoU and period difference
                if not pd.isna(row_i['period']) and not pd.isna(row_j['period']) and not pd.isna(row_i['upper_error']) and not pd.isna(row_i['lower_error']) and not pd.isna(row_j['upper_error']) and not pd.isna(row_j['lower_error']):
                    # Calculate relative difference in detected periods
                    period_diff = abs(row_i['period'] - row_j['period']) / row_i['period']

                    if period_diff <= 0.1:
                        # Calculate IoU
                        radius_i = (row_i['upper_error'] + row_i['lower_error']) / 2
                        radius_j = (row_j['upper_error'] + row_j['lower_error']) / 2
                        distance = abs(row_i['period'] - row_j['period'])
                        iou = calculate_iou(radius_i, radius_j, distance)

                # Add row to list
                rows_list.append({
                    'objectid': name,
                    'm3': row_i['period'],
                    'm4': row_i['lower_error'],
                    'm5': row_i['upper_error'],
                    'm6': row_i['significance'],
                    'm7_1': row_i['label'],
                    'm7_2': row_j['label'],
                    'period_diff': period_diff,
                    'iou': iou
                })

    # Convert list of rows to DataFrame
    output_df = pd.DataFrame(rows_list)

    return output_df



[docs]
def classify_period(row):
    """
    Classify the detected period as 'reliable', 'medium reliable', 'poor', or 'NAN'
    based on the significance of the detected period, the relative lower and upper errors,
    and the IoU of the error circles provided in function classify_periods.

    Parameters:
    -----------
    row (pd.Series): A row from the DataFrame containing detected period data.

    Returns:
    --------
    str: Classification of the period ('reliable', 'medium reliable', 'poor', 'NAN').
    """
    # Check for NaN values
    if pd.isna(row['m3']) or pd.isna(row['m4']) or pd.isna(row['m5']) or pd.isna(row['m6']) or pd.isna(row['iou']):
        return 'NAN'

    # Check if m3 (period) is zero to avoid division by zero
    if row['m3'] == 0:
        return 'NAN'

    # Calculate relative errors
    rel_error_lower = row['m4'] / row['m3']
    rel_error_upper = row['m5'] / row['m3']

    # Classify based on criteria
    if row['m6'] >= 0.99 and rel_error_lower <= 0.1 and rel_error_upper <= 0.1 and row['iou'] >= 0.99:
        return 'reliable'
    elif 0.5 <= row['m6'] < 0.99 and 0.1 < rel_error_lower <= 0.3 and 0.1 < rel_error_upper <= 0.3 and 0.8 <= row['iou'] < 0.99:
        return 'medium reliable'
    else:
        return 'poor'