---
title: "Check DB1 data out of range"
title-block-banner: true
date: today
format:
html:
code-fold: true
code-tools: true
self-contained: true
embed-resources: true
author:
- Matthias Cuntz
execute:
freeze: false
jupyter: python3
---
```{python}
import datetime as dt
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pyfrhes import read_config
from pyfrhes import get_config_loggers, get_config_logger_files
from pyfrhes import get_config_calib_files
from pyfrhes import read_data
def _get_icos_value(df, var, col, default=np.nan):
'''
Get value of column for variable in ICOSVarName
Parameters
----------
df : pandas.DataFrame
pandas DataFrame of calib file
var : str
Variable name
col : str
Column to read
default : float, optional
Default value if value conversion failed or NaN (default: NaN)
Returns
-------
float
'''
maxi = df.loc[df['ICOSVarName'] == var, col].values[0]
try:
maxi = float(maxi)
except ValueError:
maxi = np.nan
if np.isnan(maxi):
maxi = default
return maxi
debug = False
# Seaborn's Oranges color palette, start with color white
oranges = mpl.colors.LinearSegmentedColormap.from_list(
'oranges', sns.color_palette('Oranges'))
ocols = oranges(np.arange(256, dtype=int))
ocols[0] = [1., 1., 1., 1.] # add white at start
cmap = mpl.colors.ListedColormap(ocols)
```
```{python}
ndays = 7
today = dt.datetime.today().date()
# #MC
# today = dt.date(2025, 9, 25)
# #MC
fromday = today - dt.timedelta(days=ndays)
year = today.year
configfile = f'FR-Hes_{year}.cfg'
print(f"Read config file: {configfile}")
config = read_config(configfile)
# loggers
loggers = get_config_loggers(config)
if debug:
print(f"Loggers: {loggers}")
dfiles = get_config_logger_files(config, loggers, ftype='DB1')
if debug:
dfilenames = { ll: os.path.basename(dfiles[ll]) for ll in dfiles }
print(f"DB1 filenames: {dfilenames}")
cfiles = { ll: get_config_calib_files(config, ll)[-1] for ll in loggers }
if debug:
print(f"Config files: {cfiles}")
```
## Number of DB1 data out of range per logger
```{python}
# loggers
firstday = today - dt.timedelta(days=ndays)
prevdays = []
for dd in range(ndays):
prevdays.append(today - dt.timedelta(days=ndays - dd))
for ll in loggers:
print(f'{ll}')
df = read_data(dfiles[ll], ftype='DB1', standard=False)
df.drop(columns=['RECORD'], inplace=True)
ndata = np.full((df.shape[1], ndays), 0, dtype=int)
cf = read_data(cfiles[ll], ftype='calib')
for dd in range(ndays):
isday = today - dt.timedelta(days=dd + 1)
for ii, cc in enumerate(df.columns):
maxi = _get_icos_value(cf, cc, 'Max', default=10000.)
mini = _get_icos_value(cf, cc, 'Min', default=-10000.)
ndata[ii, -dd-1] = len(df[(df.index.date == isday) &
((df[cc] < mini) | (df[cc] > maxi))])
sf = pd.DataFrame(ndata, index=df.columns, columns=prevdays)
if 'Profile' in ll:
vmax = 422
else:
vmax = 48
fig, ax = plt.subplots(figsize=(6.4, sf.shape[0]/4.))
sns.heatmap(axes=ax, data=sf, vmax=vmax, cmap=cmap, linewidths=0.5,
xticklabels=prevdays, yticklabels=sf.index,
annot=True, fmt='d', cbar=True)
# ax.set_xlabel('Days before today')
ax.set_ylabel('Variable name')
plt.show()
```