{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "tobac example: Compute bulk statistics during feature detection\n", "=== \n", "\n", "You can compute bulk statistics for features from the feature detection or the masked features from the segmentation mask. \n", "\n", "This example shows how to derive some basic statistics for precipitation features associated with isolated deep convective clouds using the same data as in [our example for precipitation tracking](https://github.com/tobac-project/tobac/blob/main/examples/Example_Precip_Tracking/Example_Precip_Tracking.ipynb). The data used in this example is downloaded from automatically as part of the notebook. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:37.722182Z", "iopub.status.busy": "2025-12-18T15:02:37.722089Z", "iopub.status.idle": "2025-12-18T15:02:38.451987Z", "shell.execute_reply": "2025-12-18T15:02:38.451721Z" } }, "outputs": [], "source": [ "# Import libraries\n", "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", "import matplotlib.pyplot as plt\n", "import datetime\n", "import shutil\n", "from six.moves import urllib\n", "from pathlib import Path\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:38.453600Z", "iopub.status.busy": "2025-12-18T15:02:38.453414Z", "iopub.status.idle": "2025-12-18T15:02:39.873069Z", "shell.execute_reply": "2025-12-18T15:02:39.872776Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "using tobac version 1.6.2\n" ] } ], "source": [ "# Import tobac itself\n", "import tobac\n", "\n", "print(\"using tobac version\", str(tobac.__version__))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:39.892268Z", "iopub.status.busy": "2025-12-18T15:02:39.892030Z", "iopub.status.idle": "2025-12-18T15:02:39.894286Z", "shell.execute_reply": "2025-12-18T15:02:39.893981Z" } }, "outputs": [], "source": [ "# Disable a few warnings:\n", "import warnings\n", "\n", "warnings.filterwarnings(\"ignore\", category=UserWarning, append=True)\n", "warnings.filterwarnings(\"ignore\", category=RuntimeWarning, append=True)\n", "warnings.filterwarnings(\"ignore\", category=FutureWarning, append=True)\n", "warnings.filterwarnings(\"ignore\", category=pd.io.pytables.PerformanceWarning)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Download example data:**\n", "\n", "Actual download has to be performed only once for all example notebooks!" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:39.895598Z", "iopub.status.busy": "2025-12-18T15:02:39.895494Z", "iopub.status.idle": "2025-12-18T15:02:39.897146Z", "shell.execute_reply": "2025-12-18T15:02:39.896880Z" } }, "outputs": [], "source": [ "data_out=Path('../../../examples')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:39.898527Z", "iopub.status.busy": "2025-12-18T15:02:39.898442Z", "iopub.status.idle": "2025-12-18T15:02:39.905454Z", "shell.execute_reply": "2025-12-18T15:02:39.905236Z" } }, "outputs": [], "source": [ "# Download the data: This only has to be done once for all tobac examples and can take a while\n", "data_file = list(data_out.rglob(\"data/Example_input_Precip.nc\"))\n", "if len(data_file) == 0:\n", " file_path = \"https://zenodo.org/records/3195910/files/climate-processes/tobac_example_data-v1.0.1.zip\"\n", " # file_path='http://zenodo..'\n", " tempfile = Path(\"temp.zip\")\n", " print(\"start downloading data\")\n", " request = urllib.request.urlretrieve(file_path, tempfile)\n", " print(\"start extracting data\")\n", " shutil.unpack_archive(tempfile, data_out)\n", " tempfile.unlink()\n", " print(\"data extracted\")\n", " data_file = list(data_out.rglob(\"data/Example_input_Precip.nc\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Load Data from downloaded file:**" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:39.906666Z", "iopub.status.busy": "2025-12-18T15:02:39.906581Z", "iopub.status.idle": "2025-12-18T15:02:40.404307Z", "shell.execute_reply": "2025-12-18T15:02:40.404023Z" } }, "outputs": [], "source": [ "Precip = xr.open_dataset(data_file[0])[\"surface_precipitation_average\"]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:40.405849Z", "iopub.status.busy": "2025-12-18T15:02:40.405699Z", "iopub.status.idle": "2025-12-18T15:02:40.413703Z", "shell.execute_reply": "2025-12-18T15:02:40.413465Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.DataArray 'surface_precipitation_average' (time: 47, south_north: 198,\n",
       "                                                   west_east: 198)> Size: 7MB\n",
       "[1842588 values with dtype=float32]\n",
       "Coordinates:\n",
       "  * time         (time) datetime64[ns] 376B 2013-06-19T20:05:00 ... 2013-06-1...\n",
       "  * south_north  (south_north) int64 2kB 281 282 283 284 285 ... 475 476 477 478\n",
       "  * west_east    (west_east) int64 2kB 281 282 283 284 285 ... 475 476 477 478\n",
       "    latitude     (south_north, west_east) float32 157kB ...\n",
       "    longitude    (south_north, west_east) float32 157kB ...\n",
       "    x            (west_east) float64 2kB ...\n",
       "    y            (south_north) float64 2kB ...\n",
       "    x_0          (west_east) int64 2kB ...\n",
       "    y_0          (south_north) int64 2kB ...\n",
       "Attributes:\n",
       "    long_name:  surface_precipitation_average\n",
       "    units:      mm h-1
" ], "text/plain": [ " Size: 7MB\n", "[1842588 values with dtype=float32]\n", "Coordinates:\n", " * time (time) datetime64[ns] 376B 2013-06-19T20:05:00 ... 2013-06-1...\n", " * south_north (south_north) int64 2kB 281 282 283 284 285 ... 475 476 477 478\n", " * west_east (west_east) int64 2kB 281 282 283 284 285 ... 475 476 477 478\n", " latitude (south_north, west_east) float32 157kB ...\n", " longitude (south_north, west_east) float32 157kB ...\n", " x (west_east) float64 2kB ...\n", " y (south_north) float64 2kB ...\n", " x_0 (west_east) int64 2kB ...\n", " y_0 (south_north) int64 2kB ...\n", "Attributes:\n", " long_name: surface_precipitation_average\n", " units: mm h-1" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# display information about the iris cube containing the surface precipitation data:\n", "display(Precip)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:40.414828Z", "iopub.status.busy": "2025-12-18T15:02:40.414738Z", "iopub.status.idle": "2025-12-18T15:02:40.416531Z", "shell.execute_reply": "2025-12-18T15:02:40.416333Z" } }, "outputs": [], "source": [ "# Set up directory to save output:\n", "savedir = Path(\"Save\")\n", "if not savedir.is_dir():\n", " savedir.mkdir()\n", "plot_dir = Path(\"Plot\")\n", "if not plot_dir.is_dir():\n", " plot_dir.mkdir()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Feature detection with bulk statistics**\n", "\n", "Feature detection is perfomed based on surface precipitation field and a range of thresholds. \n", "The statistics are calculated for each individual feature region and added to the feature output dataframe. You can decide which statistics to calculate by providing a dictionary with the name of the metric as keys (this will be the name of the column added to the dataframe) and functions as values. Note that it is also possible to provide input parameter to these functions. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Setting parameters for feature detection:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:40.417771Z", "iopub.status.busy": "2025-12-18T15:02:40.417676Z", "iopub.status.idle": "2025-12-18T15:02:40.512040Z", "shell.execute_reply": "2025-12-18T15:02:40.511721Z" } }, "outputs": [], "source": [ "parameters_features = {}\n", "parameters_features[\"position_threshold\"] = \"weighted_diff\"\n", "parameters_features[\"sigma_threshold\"] = 0.5\n", "parameters_features[\"min_distance\"] = 0\n", "parameters_features[\"sigma_threshold\"] = 1\n", "parameters_features[\"threshold\"] = [1, 2, 3, 4, 5, 10, 15] # mm/h\n", "parameters_features[\"n_erosion_threshold\"] = 0\n", "parameters_features[\"n_min_threshold\"] = 3\n", "\n", "# get temporal and spation resolution of the data\n", "dxy, dt = tobac.get_spacings(Precip)" ] }, { "cell_type": "markdown", "metadata": { "execution": { "iopub.execute_input": "2023-07-10T23:49:21.717102Z", "iopub.status.busy": "2023-07-10T23:49:21.716272Z", "iopub.status.idle": "2023-07-10T23:49:21.985540Z", "shell.execute_reply": "2023-07-10T23:49:21.984704Z" } }, "source": [ "#### Defining the dictionary for the statistics to be calculated " ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:40.513583Z", "iopub.status.busy": "2025-12-18T15:02:40.513489Z", "iopub.status.idle": "2025-12-18T15:02:40.515407Z", "shell.execute_reply": "2025-12-18T15:02:40.515176Z" } }, "outputs": [], "source": [ "statistics = {}\n", "statistics[\"mean_precip\"] = np.mean\n", "statistics[\"total_precip\"] = np.sum\n", "statistics[\"max_precip\"] = np.max" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "for some functions, we need to provide additional input parameters, e.g. [np.percentile()](https://numpy.org/doc/stable/reference/generated/numpy.percentile.html). These can be provided as key word arguments in form of a dictionary. So instead of the function, you can provide a tuple with both the function and its respective input parameters: \n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:40.516643Z", "iopub.status.busy": "2025-12-18T15:02:40.516567Z", "iopub.status.idle": "2025-12-18T15:02:40.518255Z", "shell.execute_reply": "2025-12-18T15:02:40.518025Z" } }, "outputs": [], "source": [ "statistics[\"percentiles\"] = (np.percentile, {\"q\": [95, 99]})" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:40.519447Z", "iopub.status.busy": "2025-12-18T15:02:40.519369Z", "iopub.status.idle": "2025-12-18T15:02:42.067297Z", "shell.execute_reply": "2025-12-18T15:02:42.066900Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "starting feature detection based on multiple thresholds\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "feature detection done\n", "features saved\n" ] } ], "source": [ "# Feature detection based on based on surface precipitation field and a range of thresholds\n", "print(\"starting feature detection based on multiple thresholds\")\n", "Features = tobac.feature_detection_multithreshold(\n", " Precip, dxy, **parameters_features, statistic=statistics\n", ")\n", "print(\"feature detection done\")\n", "Features.to_hdf(savedir / \"Features.h5\", \"table\")\n", "print(\"features saved\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Look at the output: " ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "execution": { "iopub.execute_input": "2025-12-18T15:02:42.068952Z", "iopub.status.busy": "2025-12-18T15:02:42.068764Z", "iopub.status.idle": "2025-12-18T15:02:42.077887Z", "shell.execute_reply": "2025-12-18T15:02:42.077626Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
frameidxhdim_1hdim_2numthreshold_valuemean_preciptotal_precipmax_precippercentiles...timetimestrsouth_northwest_eastlatitudelongitudexyx_0y_0
00150.065727139.857477911.24101211.1691061.528488([1.4821563005447387, 1.5192213106155394],)...2013-06-19 20:05:002013-06-19 20:05:00331.065727420.85747729.846362-94.172015210678.738492165782.863285420.857477331.065727
1015120.527119172.500325411.250165.0006381.267255([1.266197031736374, 1.267043651342392],)...2013-06-19 20:05:002013-06-19 20:05:00401.527119453.50032530.166929-93.996892227000.162468201013.559414453.500325401.527119
2018126.779273145.3684011511.56411323.4616912.321664([2.268769121170044, 2.311084909439087],)...2013-06-19 20:05:002013-06-19 20:05:00407.779273426.36840130.196499-94.139960213434.200454204139.636582426.368401407.779273
3034111.611369155.452030422.3136589.254632.409467([2.4016830801963804, 2.4079100108146667],)...2013-06-19 20:05:002013-06-19 20:05:00392.611369436.45203030.126871-94.087317218476.015240196555.684682436.452030392.611369
4035111.765231164.938866822.61088620.8870893.081343([2.995926022529602, 3.064259362220764],)...2013-06-19 20:05:002013-06-19 20:05:00392.765231445.93886630.127221-94.037226223219.433218196632.615461445.938866392.765231
\n", "

5 rows × 21 columns

\n", "
" ], "text/plain": [ " frame idx hdim_1 hdim_2 num threshold_value mean_precip \\\n", "0 0 1 50.065727 139.857477 9 1 1.241012 \n", "1 0 15 120.527119 172.500325 4 1 1.25016 \n", "2 0 18 126.779273 145.368401 15 1 1.564113 \n", "3 0 34 111.611369 155.452030 4 2 2.313658 \n", "4 0 35 111.765231 164.938866 8 2 2.610886 \n", "\n", " total_precip max_precip percentiles ... \\\n", "0 11.169106 1.528488 ([1.4821563005447387, 1.5192213106155394],) ... \n", "1 5.000638 1.267255 ([1.266197031736374, 1.267043651342392],) ... \n", "2 23.461691 2.321664 ([2.268769121170044, 2.311084909439087],) ... \n", "3 9.25463 2.409467 ([2.4016830801963804, 2.4079100108146667],) ... \n", "4 20.887089 3.081343 ([2.995926022529602, 3.064259362220764],) ... \n", "\n", " time timestr south_north west_east latitude \\\n", "0 2013-06-19 20:05:00 2013-06-19 20:05:00 331.065727 420.857477 29.846362 \n", "1 2013-06-19 20:05:00 2013-06-19 20:05:00 401.527119 453.500325 30.166929 \n", "2 2013-06-19 20:05:00 2013-06-19 20:05:00 407.779273 426.368401 30.196499 \n", "3 2013-06-19 20:05:00 2013-06-19 20:05:00 392.611369 436.452030 30.126871 \n", "4 2013-06-19 20:05:00 2013-06-19 20:05:00 392.765231 445.938866 30.127221 \n", "\n", " longitude x y x_0 y_0 \n", "0 -94.172015 210678.738492 165782.863285 420.857477 331.065727 \n", "1 -93.996892 227000.162468 201013.559414 453.500325 401.527119 \n", "2 -94.139960 213434.200454 204139.636582 426.368401 407.779273 \n", "3 -94.087317 218476.015240 196555.684682 436.452030 392.611369 \n", "4 -94.037226 223219.433218 196632.615461 445.938866 392.765231 \n", "\n", "[5 rows x 21 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Features.head()" ] } ], "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.5" } }, "nbformat": 4, "nbformat_minor": 4 }