Check DB1 data out of range

Author

Matthias Cuntz

Published

December 5, 2025

Code
import datetime as dt
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pyfrhes import read_config
from pyfrhes import get_config_loggers, get_config_logger_files
from pyfrhes import get_config_calib_files
from pyfrhes import read_data


def _get_icos_value(df, var, col, default=np.nan):
    '''
    Get value of column for variable in ICOSVarName

    Parameters
    ----------
    df : pandas.DataFrame
        pandas DataFrame of calib file
    var : str
        Variable name
    col : str
        Column to read
    default : float, optional
        Default value if value conversion failed or NaN (default: NaN)

    Returns
    -------
    float

    '''
    maxi = df.loc[df['ICOSVarName'] == var, col].values[0]
    try:
        maxi = float(maxi)
    except ValueError:
        maxi = np.nan
    if np.isnan(maxi):
        maxi = default

    return maxi


debug = False

# Seaborn's Oranges color palette, start with color white
oranges = mpl.colors.LinearSegmentedColormap.from_list(
    'oranges', sns.color_palette('Oranges'))
ocols = oranges(np.arange(256, dtype=int))
ocols[0] = [1., 1., 1., 1.]  # add white at start
cmap = mpl.colors.ListedColormap(ocols)
Code
ndays = 7
today = dt.datetime.today().date()
# #MC
# today = dt.date(2025, 9, 25)
# #MC
fromday = today - dt.timedelta(days=ndays)
year = today.year

configfile = f'FR-Hes_{year}.cfg'
print(f"Read config file: {configfile}")
config = read_config(configfile)

# loggers
loggers = get_config_loggers(config)
if debug:
    print(f"Loggers: {loggers}")

dfiles = get_config_logger_files(config, loggers, ftype='DB1')
if debug:
    dfilenames = { ll: os.path.basename(dfiles[ll]) for ll in dfiles }
    print(f"DB1 filenames: {dfilenames}")

cfiles = { ll: get_config_calib_files(config, ll)[-1] for ll in loggers }
if debug:
    print(f"Config files: {cfiles}")
Read config file: FR-Hes_2025.cfg

Number of DB1 data out of range per logger

Code
# loggers
firstday = today - dt.timedelta(days=ndays)
prevdays = []
for dd in range(ndays):
    prevdays.append(today - dt.timedelta(days=ndays - dd))

for ll in loggers:
    print(f'{ll}')
    df = read_data(dfiles[ll], ftype='DB1', standard=False)
    df.drop(columns=['RECORD'], inplace=True)
    ndata = np.full((df.shape[1], ndays), 0, dtype=int)
    cf = read_data(cfiles[ll], ftype='calib')

    for dd in range(ndays):
        isday = today - dt.timedelta(days=dd + 1)
        for ii, cc in enumerate(df.columns):
            maxi = _get_icos_value(cf, cc, 'Max', default=10000.)
            mini = _get_icos_value(cf, cc, 'Min', default=-10000.)
            ndata[ii, -dd-1] = len(df[(df.index.date == isday) &
                                      ((df[cc] < mini) | (df[cc] > maxi))])

    sf = pd.DataFrame(ndata, index=df.columns, columns=prevdays)

    if 'Profile' in ll:
        vmax = 422
    else:
        vmax = 48
    fig, ax = plt.subplots(figsize=(6.4, sf.shape[0]/4.))
    sns.heatmap(axes=ax, data=sf, vmax=vmax, cmap=cmap, linewidths=0.5,
                xticklabels=prevdays, yticklabels=sf.index,
                annot=True, fmt='d', cbar=True)
    # ax.set_xlabel('Days before today')
    ax.set_ylabel('Variable name')
    plt.show()
CR3000_H1

CR1000_H1

CR310_H1

CR3000_NT

CR3000_Ray

CR1000_Ray

CR1000_Circonf

CR1000_Profile_NT

CR3000_SoilAB

CR1000_SoilCD

CR1000_SoilE

CR1000_SoilF

CR1000_SoilG

CR1000X_CP01

CR1000X_CP02