titanicprediction.data package

Submodules

titanicprediction.data.analysis module

class titanicprediction.data.analysis.BackendType(*values)[source]

Bases: Enum

MATPLOTLIB = 'matplotlib'
PLOTLY = 'plotly'
SEABORN = 'seaborn'
class titanicprediction.data.analysis.CorrelationVisualizer[source]

Bases: object

_apply_common_styling(ax: Axes, config: PlotConfig) None[source]
_create_correlation_heatmap(ax: Axes, corr_matrix: DataFrame, config: PlotConfig) None[source]
create_plot(data: DataFrame, plot_config: PlotConfig) PlotResult[source]
class titanicprediction.data.analysis.DistributionVisualizer[source]

Bases: object

_apply_common_styling(ax: Axes, config: PlotConfig) None[source]
_create_histogram(ax: Axes, data: Series, config: PlotConfig) None[source]
_create_kde(ax: Axes, data: Series, config: PlotConfig) None[source]
_create_violin(ax: Axes, data: Series, config: PlotConfig) None[source]
create_plot(data: Any, plot_config: PlotConfig) PlotResult[source]
class titanicprediction.data.analysis.EDAVisualizer(output_dir: str = 'data/plots')[source]

Bases: object

__init__(output_dir: str = 'data/plots')[source]
_save_plot(plot_result: PlotResult, filename: str) Path[source]
create_correlation_analysis(dataset: Dataset) dict[str, Path][source]
create_demographic_plots(dataset: Dataset) dict[str, Path][source]
create_feature_importance_plot(model: TrainedModel) dict[str, Path][source]
create_survival_analysis_plots(dataset: Dataset) dict[str, Path][source]
create_training_plots(loss_history: list[float]) dict[str, Path][source]
get_supported_formats() list[str][source]
class titanicprediction.data.analysis.FeatureAnalysisVisualizer[source]

Bases: object

_apply_common_styling(ax: Axes, config: PlotConfig) None[source]
_create_box_plot(ax: Axes, dataset: Dataset, config: PlotConfig) None[source]
_create_count_plot(ax: Axes, dataset: Dataset, config: PlotConfig) None[source]
_create_feature_distribution(ax: Axes, dataset: Dataset, config: PlotConfig) None[source]
_create_pie_chart(ax: Axes, dataset: Dataset, config: PlotConfig) None[source]
_create_scatter_plot(ax: Axes, dataset: Dataset, config: PlotConfig) None[source]
_create_survival_by_feature(ax: Axes, dataset: Dataset, config: PlotConfig) None[source]
create_plot(dataset: Dataset, plot_config: PlotConfig) PlotResult[source]
class titanicprediction.data.analysis.IVisualizer(*args, **kwargs)[source]

Bases: Protocol

__init__(*args, **kwargs)
_abc_impl = <_abc._abc_data object>
_is_protocol = True
create_plot(data: Any, plot_config: PlotConfig) PlotResult[source]
get_supported_formats() list[str][source]
save_plot(plot: PlotResult, filename: str) Path[source]
class titanicprediction.data.analysis.PlotConfig(plot_type: titanicprediction.data.analysis.PlotType, title: str, x_label: str, y_label: str, figsize: tuple[int, int] = (10, 6), style: str = 'seaborn-v0_8', colors: list[str] = None, save_format: str = 'png', dpi: int = 300, font_size: int = 12, legend: bool = True, grid: bool = True)[source]

Bases: object

__init__(plot_type: PlotType, title: str, x_label: str, y_label: str, figsize: tuple[int, int] = (10, 6), style: str = 'seaborn-v0_8', colors: list[str] = None, save_format: str = 'png', dpi: int = 300, font_size: int = 12, legend: bool = True, grid: bool = True) None
colors: list[str] = None
dpi: int = 300
figsize: tuple[int, int] = (10, 6)
font_size: int = 12
grid: bool = True
legend: bool = True
plot_type: PlotType
save_format: str = 'png'
style: str = 'seaborn-v0_8'
title: str
x_label: str
y_label: str
class titanicprediction.data.analysis.PlotResult(figure: matplotlib.figure.Figure, axes: matplotlib.axes._axes.Axes, config: titanicprediction.data.analysis.PlotConfig, metadata: dict[str, Any])[source]

Bases: object

__init__(figure: Figure, axes: Axes, config: PlotConfig, metadata: dict[str, Any]) None
axes: Axes
config: PlotConfig
figure: Figure
metadata: dict[str, Any]
class titanicprediction.data.analysis.PlotType(*values)[source]

Bases: Enum

BAR = 'bar'
BOX = 'box'
COUNT = 'count'
HEATMAP = 'heatmap'
HISTOGRAM = 'histogram'
KDE = 'kde'
LINE = 'line'
PIE = 'pie'
SCATTER = 'scatter'
VIOLIN = 'violin'
class titanicprediction.data.analysis.TrainingVisualizer[source]

Bases: object

_create_training_curve(ax: Axes, loss_history: list[float], config: PlotConfig) None[source]
create_plot(loss_history: list[float], plot_config: PlotConfig) PlotResult[source]

titanicprediction.data.preprocessing module

class titanicprediction.data.preprocessing.AgeImputer(strategy: Literal['mean', 'median', 'mode', 'constant'] = 'median', fill_value: float | None = None, _imputer: Any = None)[source]

Bases: object

__init__(strategy: Literal['mean', 'median', 'mode', 'constant'] = 'median', fill_value: float | None = None, _imputer: Any = None) None
_imputer: Any = None
fill_value: float | None = None
fit(dataset: Dataset) None[source]
fit_transform(dataset: Dataset) Dataset[source]
get_params() dict[str, Any][source]
strategy: Literal['mean', 'median', 'mode', 'constant'] = 'median'
transform(dataset: Dataset) Dataset[source]
class titanicprediction.data.preprocessing.CategoricalEncoder(encoding_type: Literal['onehot', 'label'] = 'onehot', columns: list[str] = None, handle_unknown: Literal['error', 'ignore', 'use_encoded_value'] = 'error', _encoders: dict[str, Any] = None, _feature_names: list[str] = None)[source]

Bases: object

__init__(encoding_type: Literal['onehot', 'label'] = 'onehot', columns: list[str] = None, handle_unknown: Literal['error', 'ignore', 'use_encoded_value'] = 'error', _encoders: dict[str, Any] = None, _feature_names: list[str] = None) None
_encoders: dict[str, Any] = None
_feature_names: list[str] = None
_update_feature_names(original_col: str, new_cols: list[str]) None[source]
columns: list[str] = None
encoding_type: Literal['onehot', 'label'] = 'onehot'
fit(dataset: Dataset) None[source]
fit_transform(dataset: Dataset) Dataset[source]
get_params() dict[str, Any][source]
handle_unknown: Literal['error', 'ignore', 'use_encoded_value'] = 'error'
transform(dataset: Dataset) Dataset[source]
class titanicprediction.data.preprocessing.ColumnDropper(columns: list[str] = None)[source]

Bases: object

__init__(columns: list[str] = None) None
columns: list[str] = None
fit(dataset: Dataset) None[source]
fit_transform(dataset: Dataset) Dataset[source]
get_params() dict[str, Any][source]
transform(dataset: Dataset) Dataset[source]
class titanicprediction.data.preprocessing.DataPreprocessor[source]

Bases: object

__init__()[source]
add_step(name: str, transformer: IDataTransformer) None[source]
fit(dataset: Dataset) None[source]
fit_transform(dataset: Dataset) Dataset[source]
fitted: bool
get_params() dict[str, Any][source]
preprocessing_steps: list[tuple[str, IDataTransformer]]
transform(dataset: Dataset) Dataset[source]
transform_features(features: DataFrame) DataFrame[source]
class titanicprediction.data.preprocessing.FeatureScaler(method: Literal['standard', 'minmax', 'robust'] = 'standard', columns: list[str] = None, with_mean: bool = True, with_std: bool = True, _scalers: dict[str, Any] = None)[source]

Bases: object

__init__(method: Literal['standard', 'minmax', 'robust'] = 'standard', columns: list[str] = None, with_mean: bool = True, with_std: bool = True, _scalers: dict[str, Any] = None) None
_scalers: dict[str, Any] = None
columns: list[str] = None
fit(dataset: Dataset) None[source]
fit_transform(dataset: Dataset) Dataset[source]
get_params() dict[str, Any][source]
method: Literal['standard', 'minmax', 'robust'] = 'standard'
transform(dataset: Dataset) Dataset[source]
with_mean: bool = True
with_std: bool = True
class titanicprediction.data.preprocessing.IDataTransformer(*args, **kwargs)[source]

Bases: Protocol

__init__(*args, **kwargs)
_abc_impl = <_abc._abc_data object>
_is_protocol = True
fit(dataset: Dataset) None[source]
fit_transform(dataset: Dataset) Dataset[source]
get_params() dict[str, Any][source]
transform(dataset: Dataset) Dataset[source]
class titanicprediction.data.preprocessing.PreprocessorFactory[source]

Bases: object

static create_titanic_preprocessor() DataPreprocessor[source]
class titanicprediction.data.preprocessing.TitleExtractor(name_column: str = 'Name', title_column: str = 'Title', custom_mappings: dict[str, str] = None, _title_patterns: dict[str, str] = None)[source]

Bases: object

__init__(name_column: str = 'Name', title_column: str = 'Title', custom_mappings: dict[str, str] = None, _title_patterns: dict[str, str] = None) None
_title_patterns: dict[str, str] = None
custom_mappings: dict[str, str] = None
fit(dataset: Dataset) None[source]
fit_transform(dataset: Dataset) Dataset[source]
get_params() dict[str, Any][source]
name_column: str = 'Name'
title_column: str = 'Title'
transform(dataset: Dataset) Dataset[source]

titanicprediction.data.repositories module

class titanicprediction.data.repositories.CSVDataRepository(file_path: str, target_column: str = 'Survived')[source]

Bases: object

__init__(file_path: str, target_column: str = 'Survived')[source]
get_metadata() dict[str, Any][source]
load_data() Dataset[source]
save_data(dataset: Dataset) bool[source]
class titanicprediction.data.repositories.FileModelRepository(models_dir: str = 'models')[source]

Bases: object

__init__(models_dir: str = 'models')[source]
delete_model(name: str) bool[source]
get_model_info(name: str) dict[str, Any] | None[source]
list_models() list[str][source]
load_model(name: str) TrainedModel | None[source]
save_model(model: TrainedModel, name: str) bool[source]
class titanicprediction.data.repositories.IDataRepository(*args, **kwargs)[source]

Bases: Protocol

__init__(*args, **kwargs)
_abc_impl = <_abc._abc_data object>
_is_protocol = True
get_metadata() dict[str, Any][source]
load_data() Dataset[source]
save_data(dataset: Dataset) bool[source]
class titanicprediction.data.repositories.IModelRepository(*args, **kwargs)[source]

Bases: Protocol

__init__(*args, **kwargs)
_abc_impl = <_abc._abc_data object>
_is_protocol = True
list_models() list[str][source]
load_model(name: str) TrainedModel | None[source]
save_model(model: TrainedModel, name: str) bool[source]

Module contents