{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "tobac example: Compute bulk statistics during segmentation\n", "=== \n", "\n", "This example shows how to derive some basic statistics for the segmented precipitation features associated with isolated deep convective clouds using the same data as in [our example for precipitation tracking](https://github.com/tobac-project/tobac/blob/main/examples/Example_Precip_Tracking/Example_Precip_Tracking.ipynb). As usual, we perform the segmentation step using the output from the feature detection, but we require some statistics to be calculated for the segmented features. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:29.610362Z", "iopub.status.busy": "2025-12-18T15:02:29.610202Z", "iopub.status.idle": "2025-12-18T15:02:30.307180Z", "shell.execute_reply": "2025-12-18T15:02:30.306822Z" } }, "outputs": [], "source": [ "# Import libraries\n", "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", "import matplotlib.pyplot as plt\n", "import datetime\n", "import shutil\n", "from six.moves import urllib\n", "from pathlib import Path\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:30.309098Z", "iopub.status.busy": "2025-12-18T15:02:30.308904Z", "iopub.status.idle": "2025-12-18T15:02:30.311408Z", "shell.execute_reply": "2025-12-18T15:02:30.311147Z" } }, "outputs": [], "source": [ "# Disable a few warnings:\n", "import warnings\n", "\n", "warnings.filterwarnings(\"ignore\", category=UserWarning, append=True)\n", "warnings.filterwarnings(\"ignore\", category=RuntimeWarning, append=True)\n", "warnings.filterwarnings(\"ignore\", category=FutureWarning, append=True)\n", "warnings.filterwarnings(\"ignore\", category=pd.io.pytables.PerformanceWarning)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:30.312709Z", "iopub.status.busy": "2025-12-18T15:02:30.312601Z", "iopub.status.idle": "2025-12-18T15:02:31.719437Z", "shell.execute_reply": "2025-12-18T15:02:31.719145Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "using tobac version 1.6.2\n" ] } ], "source": [ "# Import tobac itself\n", "import tobac\n", "\n", "print(\"using tobac version\", str(tobac.__version__))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:31.739049Z", "iopub.status.busy": "2025-12-18T15:02:31.738781Z", "iopub.status.idle": "2025-12-18T15:02:31.746583Z", "shell.execute_reply": "2025-12-18T15:02:31.746341Z" } }, "outputs": [], "source": [ "data_out=Path('../../../examples')\n", "# Download the data: This only has to be done once for all tobac examples and can take a while\n", "data_file = list(data_out.rglob(\"data/Example_input_Precip.nc\"))\n", "if len(data_file) == 0:\n", " file_path = \"https://zenodo.org/records/3195910/files/climate-processes/tobac_example_data-v1.0.1.zip\"\n", " # file_path='http://zenodo..'\n", " tempfile = Path(\"temp.zip\")\n", " print(\"start downloading data\")\n", " request = urllib.request.urlretrieve(file_path, tempfile)\n", " print(\"start extracting data\")\n", " shutil.unpack_archive(tempfile, data_out)\n", " tempfile.unlink()\n", " print(\"data extracted\")\n", " data_file = list(data_out.rglob(\"data/Example_input_Precip.nc\"))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:31.747713Z", "iopub.status.busy": "2025-12-18T15:02:31.747624Z", "iopub.status.idle": "2025-12-18T15:02:31.749495Z", "shell.execute_reply": "2025-12-18T15:02:31.749281Z" } }, "outputs": [], "source": [ "# Set up directory to save output:\n", "savedir = Path(\"Save\")\n", "if not savedir.is_dir():\n", " savedir.mkdir()\n", "plot_dir = Path(\"Plot\")\n", "if not plot_dir.is_dir():\n", " plot_dir.mkdir()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:31.750543Z", "iopub.status.busy": "2025-12-18T15:02:31.750443Z", "iopub.status.idle": "2025-12-18T15:02:32.264805Z", "shell.execute_reply": "2025-12-18T15:02:32.264504Z" } }, "outputs": [], "source": [ "Precip = xr.open_dataset(data_file[0])[\"surface_precipitation_average\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Feature detection**" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:32.266557Z", "iopub.status.busy": "2025-12-18T15:02:32.266365Z", "iopub.status.idle": "2025-12-18T15:02:32.369818Z", "shell.execute_reply": "2025-12-18T15:02:32.369505Z" } }, "outputs": [], "source": [ "parameters_features = {}\n", "parameters_features[\"position_threshold\"] = \"weighted_diff\"\n", "parameters_features[\"sigma_threshold\"] = 0.5\n", "parameters_features[\"min_distance\"] = 0\n", "parameters_features[\"sigma_threshold\"] = 1\n", "parameters_features[\"threshold\"] = [1, 2, 3, 4, 5, 10, 15] # mm/h\n", "parameters_features[\"n_erosion_threshold\"] = 0\n", "parameters_features[\"n_min_threshold\"] = 3\n", "\n", "# get temporal and spation resolution of the data\n", "dxy, dt = tobac.get_spacings(Precip)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:32.371446Z", "iopub.status.busy": "2025-12-18T15:02:32.371342Z", "iopub.status.idle": "2025-12-18T15:02:33.311782Z", "shell.execute_reply": "2025-12-18T15:02:33.311516Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "starting feature detection based on multiple thresholds\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "feature detection done\n", "features saved\n" ] } ], "source": [ "# Feature detection based on based on surface precipitation field and a range of thresholds\n", "print(\"starting feature detection based on multiple thresholds\")\n", "Features = tobac.feature_detection_multithreshold(Precip, dxy, **parameters_features)\n", "print(\"feature detection done\")\n", "Features.to_hdf(savedir / \"Features.h5\", \"table\")\n", "print(\"features saved\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Segmentation with bulk statistics**\n", "\n", "Segmentation is performed based on a watershedding and a threshold value. The statistics are calculated for each individual feature region and added to the feature output dataframe from the segmentation process. You can decide which statistics to calculate by providing a dictionary with the name of the metric as keys (this will be the name of the column added to the dataframe) and functions as values. Note that it is also possible to provide input parameter to these functions. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Setting parameters for segmentation:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:33.313321Z", "iopub.status.busy": "2025-12-18T15:02:33.313122Z", "iopub.status.idle": "2025-12-18T15:02:33.318491Z", "shell.execute_reply": "2025-12-18T15:02:33.318242Z" } }, "outputs": [], "source": [ "# Dictionary containing keyword arguments for segmentation step:\n", "parameters_segmentation = {}\n", "parameters_segmentation[\"method\"] = \"watershed\"\n", "parameters_segmentation[\"threshold\"] = 1 # mm/h mixing ratio\n", "\n", "# get temporal and spation resolution of the data\n", "dxy, dt = tobac.get_spacings(Precip)" ] }, { "cell_type": "markdown", "metadata": { "execution": { "iopub.execute_input": "2023-07-10T23:49:21.717102Z", "iopub.status.busy": "2023-07-10T23:49:21.716272Z", "iopub.status.idle": "2023-07-10T23:49:21.985540Z", "shell.execute_reply": "2023-07-10T23:49:21.984704Z" } }, "source": [ "#### Defining the dictionary for the statistics to be calculated " ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:33.319696Z", "iopub.status.busy": "2025-12-18T15:02:33.319608Z", "iopub.status.idle": "2025-12-18T15:02:33.321274Z", "shell.execute_reply": "2025-12-18T15:02:33.321055Z" } }, "outputs": [], "source": [ "statistics = {}\n", "statistics[\"mean_precip\"] = np.mean\n", "statistics[\"total_precip\"] = np.sum\n", "statistics[\"max_precip\"] = np.max" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For some functions, we need to provide additional input parameters, e.g. [np.percentile()](https://numpy.org/doc/stable/reference/generated/numpy.percentile.html). These can be provided as key word arguments in form of a dictionary. So instead of the function, you can provide a tuple with both the function and its respective input parameters: \n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:33.322451Z", "iopub.status.busy": "2025-12-18T15:02:33.322367Z", "iopub.status.idle": "2025-12-18T15:02:33.323959Z", "shell.execute_reply": "2025-12-18T15:02:33.323714Z" } }, "outputs": [], "source": [ "statistics[\"percentiles\"] = (np.percentile, {\"q\": [95, 99]})" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:33.325194Z", "iopub.status.busy": "2025-12-18T15:02:33.325091Z", "iopub.status.idle": "2025-12-18T15:02:34.512725Z", "shell.execute_reply": "2025-12-18T15:02:34.512371Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Starting segmentation based on surface precipitation\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "segmentation based on surface precipitation performed, start saving results to files\n", "segmentation surface precipitation performed and saved\n" ] } ], "source": [ "# Perform Segmentation and save resulting mask to NetCDF file:\n", "print(\"Starting segmentation based on surface precipitation\")\n", "Mask, Features_Precip = tobac.segmentation_2D(\n", " Features, Precip, dxy, **parameters_segmentation, statistic=statistics\n", ")\n", "print(\n", " \"segmentation based on surface precipitation performed, start saving results to files\"\n", ")\n", "Mask.to_netcdf(savedir / \"Mask_Segmentation_precip.nc\", encoding={\"segmentation_mask\":{\"zlib\":True, \"complevel\":4}})\n", "Features_Precip.to_hdf(savedir / \"Features_Precip.h5\", \"table\")\n", "print(\"segmentation surface precipitation performed and saved\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Look at the output: " ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:34.514683Z", "iopub.status.busy": "2025-12-18T15:02:34.514554Z", "iopub.status.idle": "2025-12-18T15:02:34.524673Z", "shell.execute_reply": "2025-12-18T15:02:34.524381Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
frameidxhdim_1hdim_2numthreshold_valuefeaturetimetimestrsouth_north...longitudexyx_0y_0ncellsmean_preciptotal_precipmax_precippercentiles
00150.065727139.8574779112013-06-19 20:05:002013-06-19 20:05:00331.065727...-94.172015210678.738492165782.863285420.857477331.065727101.62969516.2969512.289786([2.221776068210602, 2.276183712482452],)
1015120.527119172.5003254122013-06-19 20:05:002013-06-19 20:05:00401.527119...-93.996892227000.162468201013.559414453.500325401.527119101.40954714.0954681.819811([1.8030404090881347, 1.8164567756652832],)
2018126.779273145.36840115132013-06-19 20:05:002013-06-19 20:05:00407.779273...-94.139960213434.200454204139.636582426.368401407.779273112.44152626.8567833.771701([3.710712432861328, 3.759503173828125],)
3034111.611369155.4520304242013-06-19 20:05:002013-06-19 20:05:00392.611369...-94.087317218476.015240196555.684682436.452030392.611369191.93850136.8315124.067666([3.940941762924194, 4.042321195602417],)
4035111.765231164.9388668252013-06-19 20:05:002013-06-19 20:05:00392.765231...-94.037226223219.433218196632.615461445.938866392.765231202.48688649.7377094.380943([4.087516045570374, 4.3222578477859495],)
\n", "

5 rows × 22 columns

\n", "
" ], "text/plain": [ " frame idx hdim_1 hdim_2 num threshold_value feature \\\n", "0 0 1 50.065727 139.857477 9 1 1 \n", "1 0 15 120.527119 172.500325 4 1 2 \n", "2 0 18 126.779273 145.368401 15 1 3 \n", "3 0 34 111.611369 155.452030 4 2 4 \n", "4 0 35 111.765231 164.938866 8 2 5 \n", "\n", " time timestr south_north ... longitude \\\n", "0 2013-06-19 20:05:00 2013-06-19 20:05:00 331.065727 ... -94.172015 \n", "1 2013-06-19 20:05:00 2013-06-19 20:05:00 401.527119 ... -93.996892 \n", "2 2013-06-19 20:05:00 2013-06-19 20:05:00 407.779273 ... -94.139960 \n", "3 2013-06-19 20:05:00 2013-06-19 20:05:00 392.611369 ... -94.087317 \n", "4 2013-06-19 20:05:00 2013-06-19 20:05:00 392.765231 ... -94.037226 \n", "\n", " x y x_0 y_0 ncells mean_precip \\\n", "0 210678.738492 165782.863285 420.857477 331.065727 10 1.629695 \n", "1 227000.162468 201013.559414 453.500325 401.527119 10 1.409547 \n", "2 213434.200454 204139.636582 426.368401 407.779273 11 2.441526 \n", "3 218476.015240 196555.684682 436.452030 392.611369 19 1.938501 \n", "4 223219.433218 196632.615461 445.938866 392.765231 20 2.486886 \n", "\n", " total_precip max_precip percentiles \n", "0 16.296951 2.289786 ([2.221776068210602, 2.276183712482452],) \n", "1 14.095468 1.819811 ([1.8030404090881347, 1.8164567756652832],) \n", "2 26.856783 3.771701 ([3.710712432861328, 3.759503173828125],) \n", "3 36.831512 4.067666 ([3.940941762924194, 4.042321195602417],) \n", "4 49.737709 4.380943 ([4.087516045570374, 4.3222578477859495],) \n", "\n", "[5 rows x 22 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Features_Precip.head()" ] } ], "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.5" } }, "nbformat": 4, "nbformat_minor": 4 }