Check number of NaN in 30 min raw data

Author

Matthias Cuntz

Published

December 5, 2025

Code
import datetime as dt
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pyfrhes import read_config
from pyfrhes import get_config_loggers, get_config_logger_files
from pyfrhes import read_data, get_config_smartflux_files
from pyfrhes import read_eddypro

debug = False

# Seaborn's Oranges color palette, start with color white
oranges = mpl.colors.LinearSegmentedColormap.from_list(
    'oranges', sns.color_palette('Oranges'))
ocols = oranges(np.arange(256, dtype=int))
ocols[0] = [1., 1., 1., 1.]  # add white at start
cmap = mpl.colors.ListedColormap(ocols)
Code
ndays = 7
today = dt.datetime.today().date()
# #MC
# today = dt.date(2025, 9, 25)
# #MC
fromday = today - dt.timedelta(days=ndays)
year = today.year

configfile = f'FR-Hes_{year}.cfg'
print(f"Read config file: {configfile}")
config = read_config(configfile)

# loggers
loggers = get_config_loggers(config)
if debug:
    print(f"Loggers: {loggers}")

rfiles = get_config_logger_files(config, loggers, ftype='raw')
if debug:
    rfilenames = { ll: os.path.basename(rfiles[ll]) for ll in rfiles }
    print(f"Raw filenames: {rfilenames}")
Read config file: FR-Hes_2025.cfg

Number of NaN in raw data per logger

Code
# loggers
firstday = today - dt.timedelta(days=ndays)
prevdays = []
for dd in range(ndays):
    prevdays.append(today - dt.timedelta(days=ndays - dd))

for ll in loggers:
    print(f'{ll}')
    df = read_data(rfiles[ll], ftype='raw')
    df = df[(df.index.date >= firstday) & (df.index.date < today)].isna()
    sf = df.groupby(df.index.date).sum()
    sf = sf.T
    # sf.columns = prevdays[:len(sf.columns)]

    if 'Profile' in ll:
        vmax = 422
    else:
        vmax = 48
    fig, ax = plt.subplots(figsize=(6.4, sf.shape[0]/4.))
    sns.heatmap(axes=ax, data=sf, vmax=vmax, cmap=cmap, linewidths=0.5,
                xticklabels=prevdays, yticklabels=sf.index,
                annot=True, fmt='d')
    # ax.set_xlabel('Days before today')
    ax.set_ylabel('Variable name')
    plt.show()

# smartflux
ivars = ['DOY', 'daytime']
sfile = get_config_smartflux_files(config, year=year, ftype='db1')
print(f'{os.path.basename(sfile)}')
df = read_eddypro(sfile)
df = df[(df.index.date >= firstday) & (df.index.date < today)].isna()
df = df[ivars]
sf = df.groupby(df.index.date).sum()
sf = sf.T
# sf.columns = prevdays[:len(sf.columns)]

vmax = 48
fig, ax = plt.subplots(figsize=(6.4, sf.shape[0]/4.))
sns.heatmap(axes=ax, data=sf, vmax=vmax, cmap=cmap, linewidths=0.5,
            xticklabels=prevdays, yticklabels=sf.index,
            annot=True, fmt='d')
# ax.set_xlabel('Days before today')
ax.set_ylabel('Variable name')
plt.show()
CR3000_H1

CR1000_H1

CR310_H1

CR3000_NT

CR3000_Ray

CR1000_Ray

CR1000_Circonf

CR1000_Profile_NT

CR3000_SoilAB

CR1000_SoilCD

CR1000_SoilE

CR1000_SoilF

CR1000_SoilG

CR1000X_CP01

CR1000X_CP02

eddypro_full_output_2025.csv