Example of visualisation of large files obtained from HPC
[1]:
import QhX
Make individually classified periods from general output result across batches. qhx-batch.zip can be found in the data
[2]:
from QhX.output_parallel import *
[12]:
file_path = '/Users/andjelka/Downloads/qhx-batch/result.csv'
[13]:
from QhX.parallel_classification import *
[14]:
process_csv_in_chunks(file_path, chunk_size=10000, output_file_path='classified_individual.csv')
Processed and classified data saved to classified_individual.csv
now provide combined classification, and aggregated mean values for consistent periods detected across pairs of bands
[15]:
combined_data, aggregated_stats = process_large_dataset('classified_individual.csv')
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use
>>> .groupby(..., group_keys=False)
To adopt the future behavior and silence this warning, use
>>> .groupby(..., group_keys=True)
grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use
>>> .groupby(..., group_keys=False)
To adopt the future behavior and silence this warning, use
>>> .groupby(..., group_keys=True)
grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use
>>> .groupby(..., group_keys=False)
To adopt the future behavior and silence this warning, use
>>> .groupby(..., group_keys=True)
grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use
>>> .groupby(..., group_keys=False)
To adopt the future behavior and silence this warning, use
>>> .groupby(..., group_keys=True)
grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use
>>> .groupby(..., group_keys=False)
To adopt the future behavior and silence this warning, use
>>> .groupby(..., group_keys=True)
grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use
>>> .groupby(..., group_keys=False)
To adopt the future behavior and silence this warning, use
>>> .groupby(..., group_keys=True)
grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use
>>> .groupby(..., group_keys=False)
To adopt the future behavior and silence this warning, use
>>> .groupby(..., group_keys=True)
grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use
>>> .groupby(..., group_keys=False)
To adopt the future behavior and silence this warning, use
>>> .groupby(..., group_keys=True)
grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
/Users/andjelka/Documents/QhX3/QhX1/QhX/output_parallel.py:181: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use
>>> .groupby(..., group_keys=False)
To adopt the future behavior and silence this warning, use
>>> .groupby(..., group_keys=True)
grouped = data.groupby('ID', as_index=False).apply(aggregate_classifications).reset_index(drop=True)
[9]:
from QhX.interactive_plot_large_files import *
[16]:
import pandas as pd
df = pd.read_csv('classified_periods.csv')
[17]:
df = df.dropna()
[18]:
import holoviews as hv
from holoviews import opts
import numpy as np
# Initialize HoloViews with the Bokeh backend for interactive plotting
hv.extension('bokeh')
# Convert the output DataFrame to a HoloViews Dataset
dsr = hv.Dataset(df, kdims=['ID'], vdims=['m3', 'm4', 'm5', 'm6', 'm7_1', 'm7_2', 'period_diff', 'iou', 'classification'])
# Define plot options for appearance and interactivity
popts = opts.Points(alpha=0.6, # Transparency of the points
legend_position='right', # Position of the legend
height=400, # Height of the plot
width=600, # Width of the plot
show_grid=True, # Display a grid
color='classification', # Color points by the 'classification' column
cmap='Set1', # Color map for different classifications
line_color='black', # Color of the outline of points
xlabel='objectid', # Label for the x-axis
ylabel='m3 (period[days])', # Label for the y-axis
size=100 * (np.abs(dim('m5') - dim('m4')) / (dim('m3') + 0.1))) # Size of points based on relative error
# Convert to Points and apply decimation
points = dsr.to(hv.Points, ['ID', 'm3'], groupby='classification')
decimated_data = points.clone(streams=[hv.streams.RangeXY()])
# Create an interactive plot, grouped by 'classification'
hvapp = decimated_data.opts(popts)
# Return the interactive plot
hvapp
WARNING:param.HoloMap00816: Setting non-parameter attribute streams=[RangeXY(x_range=None,y_range=None)] using a mechanism intended only for parameters
WARNING:param.HoloMap00816:Setting non-parameter attribute streams=[RangeXY(x_range=None,y_range=None)] using a mechanism intended only for parameters
[18]:
[19]:
df1 = pd.read_csv('combined.csv')
[20]:
df1.head()
[20]:
ID | m3 | m4 | m5 | m6 | m7_1 | m7_2 | period_diff | iou | classification | individual_classification | final_classification | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 443663 | 29.120559 | 0.186411 | 4.132621 | 1.00 | 0 | 2 | 0.0 | 1.0 | poor | poor | poor |
1 | 443660 | 335.570470 | -1.000000 | -1.000000 | 0.94 | 0 | 2 | 0.0 | 0.0 | poor | poor | poor |
2 | 443660 | 335.570470 | -1.000000 | -1.000000 | 0.94 | 0 | 2 | 0.0 | 0.0 | poor | poor | poor |
3 | 443660 | 335.570470 | -1.000000 | -1.000000 | 0.98 | 0 | 2 | 0.0 | 0.0 | poor | poor | poor |
4 | 443640 | 252.525253 | -1.000000 | -1.000000 | 0.98 | 0 | 3 | 0.0 | 0.0 | poor | poor | poor |
[21]:
import holoviews as hv
from holoviews import opts
import numpy as np
# Initialize HoloViews with the Bokeh backend for interactive plotting
hv.extension('bokeh')
# Convert the output DataFrame to a HoloViews Dataset
dsr = hv.Dataset(df1, kdims=['ID'], vdims=['m3', 'm4', 'm5', 'm6', 'm7_1', 'm7_2', 'period_diff', 'iou', 'classification'])
# Define plot options for appearance and interactivity
popts = opts.Points(alpha=0.6, # Transparency of the points
legend_position='right', # Position of the legend
height=400, # Height of the plot
width=600, # Width of the plot
show_grid=True, # Display a grid
color='classification', # Color points by the 'classification' column
cmap='Set1', # Color map for different classifications
line_color='black', # Color of the outline of points
xlabel='ID', # Label for the x-axis
ylabel='m3 (period[days])', # Label for the y-axis
size=100 * (np.abs(dim('m5') - dim('m4')) / (dim('m3') + 0.1))) # Size of points based on relative error
# Convert to Points and apply decimation
points = dsr.to(hv.Points, ['ID', 'm3'], groupby='final_classification')
decimated_data = points.clone(streams=[hv.streams.RangeXY()])
# Create an interactive plot, grouped by 'classification'
hvapp = decimated_data.opts(popts)
# Return the interactive plot
hvapp
WARNING:param.HoloMap01139: Setting non-parameter attribute streams=[RangeXY(x_range=None,y_range=None)] using a mechanism intended only for parameters
WARNING:param.HoloMap01139:Setting non-parameter attribute streams=[RangeXY(x_range=None,y_range=None)] using a mechanism intended only for parameters
[21]:
[22]:
df2 = pd.read_csv('aggregated.csv')
[23]:
df2.head(100)
[23]:
ID | final_classification | mean_period | mean_lower_error | mean_upper_error | mean_significance | mean_iou | |
---|---|---|---|---|---|---|---|
0 | 271400 | reliable | 20.391517 | 0.641235 | 0.670589 | 1.0 | 1.0 |
1 | 271465 | reliable | 20.391517 | 0.327674 | 0.728985 | 1.0 | 1.0 |
2 | 271503 | reliable | 19.238169 | 0.360260 | 1.356715 | 1.0 | 1.0 |
3 | 271766 | reliable | 11.723329 | 0.207330 | 0.300756 | 1.0 | 1.0 |
4 | 272127 | reliable | 92.421442 | 7.527273 | 5.195175 | 1.0 | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... |
95 | 443513 | reliable | 24.271845 | 1.123348 | 0.102427 | 1.0 | 1.0 |
96 | 451760 | reliable | 13.245033 | 0.131743 | 0.329935 | 1.0 | 1.0 |
97 | 452764 | reliable | 15.688735 | 0.493220 | 0.499617 | 1.0 | 1.0 |
98 | 454074 | reliable | 26.824034 | 1.399604 | 1.103342 | 1.0 | 1.0 |
99 | 454224 | reliable | 35.137034 | 1.831320 | 3.296370 | 1.0 | 1.0 |
100 rows × 7 columns
[24]:
import holoviews as hv
from holoviews import opts
import numpy as np
# Initialize HoloViews with the Bokeh backend for interactive plotting
hv.extension('bokeh')
# Convert the output DataFrame to a HoloViews Dataset
dsr = hv.Dataset(df2, kdims=['ID'], vdims=['mean_period', 'mean_lower_error', 'mean_upper_error', 'mean_significance', 'mean_iou'])
# Define plot options for appearance and interactivity
popts = opts.Points(alpha=0.6, # Transparency of the points
legend_position='right', # Position of the legend
height=400, # Height of the plot
width=600, # Width of the plot
show_grid=True, # Display a grid
color='mean_iou', # Color points by the 'classification' column
cmap='Set1', # Color map for different classifications
line_color='black', # Color of the outline of points
xlabel='ID', # Label for the x-axis
ylabel='(period[days])', # Label for the y-axis
size=100 * (np.abs(dim('mean_upper_error') - dim('mean_lower_error')) / (dim('mean_period') + 0.1))) # Size of points based on relative error
# Convert to Points and apply decimation
points = dsr.to(hv.Points, ['ID', 'mean_period'], groupby='final_classification')
decimated_data = points.clone(streams=[hv.streams.RangeXY()])
# Create an interactive plot, grouped by 'classification'
hvapp = decimated_data.opts(popts)
# Return the interactive plot
hvapp
WARNING:param.HoloMap01701: Setting non-parameter attribute streams=[RangeXY(x_range=None,y_range=None)] using a mechanism intended only for parameters
WARNING:param.HoloMap01701:Setting non-parameter attribute streams=[RangeXY(x_range=None,y_range=None)] using a mechanism intended only for parameters
[24]:
[ ]: