Example of visualisation of large files obtained from HPC

[1]:
import QhX

Make individually classified periods from general output result across batches. qhx-batch.zip can be found in the data

[2]:
from QhX.output_parallel import *
[12]:
file_path = '/Users/andjelka/Downloads/qhx-batch/result.csv'
[13]:
from QhX.parallel_classification import *

[14]:
process_csv_in_chunks(file_path, chunk_size=10000, output_file_path='classified_individual.csv')
Processed and classified data saved to classified_individual.csv

now provide combined classification, and aggregated mean values for consistent periods detected across pairs of bands

[15]:
 combined_data, aggregated_stats = process_large_dataset('classified_individual.csv')
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

        >>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use

        >>> .groupby(..., group_keys=True)
  grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

        >>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use

        >>> .groupby(..., group_keys=True)
  grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

        >>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use

        >>> .groupby(..., group_keys=True)
  grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

        >>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use

        >>> .groupby(..., group_keys=True)
  grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

        >>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use

        >>> .groupby(..., group_keys=True)
  grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

        >>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use

        >>> .groupby(..., group_keys=True)
  grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

        >>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use

        >>> .groupby(..., group_keys=True)
  grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

        >>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use

        >>> .groupby(..., group_keys=True)
  grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

        >>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use

        >>> .groupby(..., group_keys=True)
  grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
[9]:
from QhX.interactive_plot_large_files import *
[16]:
import pandas as pd

df = pd.read_csv('classified_periods.csv')
[17]:
df = df.dropna()
[18]:
import holoviews as hv
from holoviews import opts
import numpy as np

# Initialize HoloViews with the Bokeh backend for interactive plotting
hv.extension('bokeh')

# Convert the output DataFrame to a HoloViews Dataset
dsr = hv.Dataset(df, kdims=['ID'], vdims=['m3', 'm4', 'm5', 'm6', 'm7_1', 'm7_2', 'period_diff', 'iou', 'classification'])

# Define plot options for appearance and interactivity
popts = opts.Points(alpha=0.6,  # Transparency of the points
                    legend_position='right',  # Position of the legend
                    height=400,  # Height of the plot
                    width=600,  # Width of the plot
                    show_grid=True,  # Display a grid
                    color='classification',  # Color points by the 'classification' column
                    cmap='Set1',  # Color map for different classifications
                    line_color='black',  # Color of the outline of points
                    xlabel='objectid',  # Label for the x-axis
                    ylabel='m3 (period[days])',  # Label for the y-axis
                    size=100 * (np.abs(dim('m5') - dim('m4')) / (dim('m3') + 0.1)))  # Size of points based on relative error

# Convert to Points and apply decimation
points = dsr.to(hv.Points, ['ID', 'm3'], groupby='classification')
decimated_data = points.clone(streams=[hv.streams.RangeXY()])

# Create an interactive plot, grouped by 'classification'
hvapp = decimated_data.opts(popts)

# Return the interactive plot
hvapp

WARNING:param.HoloMap00816: Setting non-parameter attribute streams=[RangeXY(x_range=None,y_range=None)] using a mechanism intended only for parameters
WARNING:param.HoloMap00816:Setting non-parameter attribute streams=[RangeXY(x_range=None,y_range=None)] using a mechanism intended only for parameters
[18]:
[19]:
df1 = pd.read_csv('combined.csv')
[20]:
df1.head()
[20]:
ID m3 m4 m5 m6 m7_1 m7_2 period_diff iou classification individual_classification final_classification
0 443663 29.120559 0.186411 4.132621 1.00 0 2 0.0 1.0 poor poor poor
1 443660 335.570470 -1.000000 -1.000000 0.94 0 2 0.0 0.0 poor poor poor
2 443660 335.570470 -1.000000 -1.000000 0.94 0 2 0.0 0.0 poor poor poor
3 443660 335.570470 -1.000000 -1.000000 0.98 0 2 0.0 0.0 poor poor poor
4 443640 252.525253 -1.000000 -1.000000 0.98 0 3 0.0 0.0 poor poor poor
[21]:
import holoviews as hv
from holoviews import opts
import numpy as np

# Initialize HoloViews with the Bokeh backend for interactive plotting
hv.extension('bokeh')

# Convert the output DataFrame to a HoloViews Dataset
dsr = hv.Dataset(df1, kdims=['ID'], vdims=['m3', 'm4', 'm5', 'm6', 'm7_1', 'm7_2', 'period_diff', 'iou', 'classification'])

# Define plot options for appearance and interactivity
popts = opts.Points(alpha=0.6,  # Transparency of the points
                    legend_position='right',  # Position of the legend
                    height=400,  # Height of the plot
                    width=600,  # Width of the plot
                    show_grid=True,  # Display a grid
                    color='classification',  # Color points by the 'classification' column
                    cmap='Set1',  # Color map for different classifications
                    line_color='black',  # Color of the outline of points
                    xlabel='ID',  # Label for the x-axis
                    ylabel='m3 (period[days])',  # Label for the y-axis
                    size=100 * (np.abs(dim('m5') - dim('m4')) / (dim('m3') + 0.1)))  # Size of points based on relative error

# Convert to Points and apply decimation
points = dsr.to(hv.Points, ['ID', 'm3'], groupby='final_classification')
decimated_data = points.clone(streams=[hv.streams.RangeXY()])

# Create an interactive plot, grouped by 'classification'
hvapp = decimated_data.opts(popts)

# Return the interactive plot
hvapp

WARNING:param.HoloMap01139: Setting non-parameter attribute streams=[RangeXY(x_range=None,y_range=None)] using a mechanism intended only for parameters
WARNING:param.HoloMap01139:Setting non-parameter attribute streams=[RangeXY(x_range=None,y_range=None)] using a mechanism intended only for parameters
[21]:
[22]:
df2 = pd.read_csv('aggregated.csv')
[23]:
df2.head(100)
[23]:
ID final_classification mean_period mean_lower_error mean_upper_error mean_significance mean_iou
0 271400 reliable 20.391517 0.641235 0.670589 1.0 1.0
1 271465 reliable 20.391517 0.327674 0.728985 1.0 1.0
2 271503 reliable 19.238169 0.360260 1.356715 1.0 1.0
3 271766 reliable 11.723329 0.207330 0.300756 1.0 1.0
4 272127 reliable 92.421442 7.527273 5.195175 1.0 1.0
... ... ... ... ... ... ... ...
95 443513 reliable 24.271845 1.123348 0.102427 1.0 1.0
96 451760 reliable 13.245033 0.131743 0.329935 1.0 1.0
97 452764 reliable 15.688735 0.493220 0.499617 1.0 1.0
98 454074 reliable 26.824034 1.399604 1.103342 1.0 1.0
99 454224 reliable 35.137034 1.831320 3.296370 1.0 1.0

100 rows × 7 columns

[24]:
import holoviews as hv
from holoviews import opts
import numpy as np

# Initialize HoloViews with the Bokeh backend for interactive plotting
hv.extension('bokeh')

# Convert the output DataFrame to a HoloViews Dataset
dsr = hv.Dataset(df2, kdims=['ID'], vdims=['mean_period', 'mean_lower_error', 'mean_upper_error', 'mean_significance',  'mean_iou'])

# Define plot options for appearance and interactivity
popts = opts.Points(alpha=0.6,  # Transparency of the points
                    legend_position='right',  # Position of the legend
                    height=400,  # Height of the plot
                    width=600,  # Width of the plot
                    show_grid=True,  # Display a grid
                    color='mean_iou',  # Color points by the 'classification' column
                    cmap='Set1',  # Color map for different classifications
                    line_color='black',  # Color of the outline of points
                    xlabel='ID',  # Label for the x-axis
                    ylabel='(period[days])',  # Label for the y-axis
                    size=100 * (np.abs(dim('mean_upper_error') - dim('mean_lower_error')) / (dim('mean_period') + 0.1)))  # Size of points based on relative error

# Convert to Points and apply decimation
points = dsr.to(hv.Points, ['ID', 'mean_period'], groupby='final_classification')
decimated_data = points.clone(streams=[hv.streams.RangeXY()])

# Create an interactive plot, grouped by 'classification'
hvapp = decimated_data.opts(popts)

# Return the interactive plot
hvapp

WARNING:param.HoloMap01701: Setting non-parameter attribute streams=[RangeXY(x_range=None,y_range=None)] using a mechanism intended only for parameters
WARNING:param.HoloMap01701:Setting non-parameter attribute streams=[RangeXY(x_range=None,y_range=None)] using a mechanism intended only for parameters
[24]:
[ ]: