---
title: "Check number of NaN in 30 min raw data"
title-block-banner: true
date: today
format:
html:
code-fold: true
code-tools: true
self-contained: true
embed-resources: true
author:
- Matthias Cuntz
execute:
freeze: false
jupyter: python3
---
```{python}
import datetime as dt
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pyfrhes import read_config
from pyfrhes import get_config_loggers, get_config_logger_files
from pyfrhes import read_data, get_config_smartflux_files
from pyfrhes import read_eddypro
debug = False
# Seaborn's Oranges color palette, start with color white
oranges = mpl.colors.LinearSegmentedColormap.from_list(
'oranges', sns.color_palette('Oranges'))
ocols = oranges(np.arange(256, dtype=int))
ocols[0] = [1., 1., 1., 1.] # add white at start
cmap = mpl.colors.ListedColormap(ocols)
```
```{python}
ndays = 7
today = dt.datetime.today().date()
# #MC
# today = dt.date(2025, 9, 25)
# #MC
fromday = today - dt.timedelta(days=ndays)
year = today.year
configfile = f'FR-Hes_{year}.cfg'
print(f"Read config file: {configfile}")
config = read_config(configfile)
# loggers
loggers = get_config_loggers(config)
if debug:
print(f"Loggers: {loggers}")
rfiles = get_config_logger_files(config, loggers, ftype='raw')
if debug:
rfilenames = { ll: os.path.basename(rfiles[ll]) for ll in rfiles }
print(f"Raw filenames: {rfilenames}")
```
### Number of NaN in raw data per logger
```{python}
# loggers
firstday = today - dt.timedelta(days=ndays)
prevdays = []
for dd in range(ndays):
prevdays.append(today - dt.timedelta(days=ndays - dd))
for ll in loggers:
print(f'{ll}')
df = read_data(rfiles[ll], ftype='raw')
df = df[(df.index.date >= firstday) & (df.index.date < today)].isna()
sf = df.groupby(df.index.date).sum()
sf = sf.T
# sf.columns = prevdays[:len(sf.columns)]
if 'Profile' in ll:
vmax = 422
else:
vmax = 48
fig, ax = plt.subplots(figsize=(6.4, sf.shape[0]/4.))
sns.heatmap(axes=ax, data=sf, vmax=vmax, cmap=cmap, linewidths=0.5,
xticklabels=prevdays, yticklabels=sf.index,
annot=True, fmt='d')
# ax.set_xlabel('Days before today')
ax.set_ylabel('Variable name')
plt.show()
# smartflux
ivars = ['DOY', 'daytime']
sfile = get_config_smartflux_files(config, year=year, ftype='db1')
print(f'{os.path.basename(sfile)}')
df = read_eddypro(sfile)
df = df[(df.index.date >= firstday) & (df.index.date < today)].isna()
df = df[ivars]
sf = df.groupby(df.index.date).sum()
sf = sf.T
# sf.columns = prevdays[:len(sf.columns)]
vmax = 48
fig, ax = plt.subplots(figsize=(6.4, sf.shape[0]/4.))
sns.heatmap(axes=ax, data=sf, vmax=vmax, cmap=cmap, linewidths=0.5,
xticklabels=prevdays, yticklabels=sf.index,
annot=True, fmt='d')
# ax.set_xlabel('Days before today')
ax.set_ylabel('Variable name')
plt.show()
```