Code
debug = FalseDefine help functions
debug = Falseimport configparser
import datetime as dt
import glob
import os
import platform
import re
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
def _get_datadir(config):
'''
Get datadir from config file and return Windows and macOS full path
Parameters
----------
config : ConfigParser
A ConfigParser having read a config file
Returns
-------
directory of BD_Hesse on current computer
'''
datadir = config['GENERAL'].get('datadir', '')
if datadir and (not datadir.startswith('/')):
ios = platform.system() # Windows, Darwin, Linux
if ios == 'Windows':
datadir = '//pnas1.stockage.inra.fr/nancy-eef-prnas/' + datadir
elif ios == 'Darwin':
datadir = '/Volumes/nancy-eef-prnas/' + datadir
else:
print('Operating system not known:', ios)
# # MC local
# datadir = '/Users/cuntz/data/inrae/hesse/BD_Hesse'
# # MC local
return datadir
def _get_logger_files(config, loggers, datadir, level='raw'):
'''
Get file names of loggers from config file
Parameters
----------
config : ConfigParser
A ConfigParser having read a config file
logger : list
List of logger names in config file
datadir : str
Directory of BD_Hesse on current computer
level : str, optional
Data level raw, DB1, calib (default: raw)
Returns
-------
dictionary with logger names as keys and file names as values.
Non-existing entries in config file have empty string as file name.
'''
if level.lower().startswith('raw'):
section = 'RAWFILES'
elif level.lower().startswith('db1'):
section = 'DB1FILES'
elif level.lower().startswith('cal'):
section = 'CALIBFILES'
else:
raise ValueError(f'level not known: {level}')
ifiles = {}
for mm in loggers:
ff = config[section].get(mm, '')
if ff:
ifiles.update({mm: datadir + '/' + ff})
else:
ifiles.update({mm: ''})
return ifiles
def _get_smartflux_files(datadir, year=None):
'''
Get filenames of Smartflux ghg files for year
Parameters
----------
datadir : str
Directory of BD_Hesse on current computer
year : int, optional
Smartflux ghg files for year. Must be >= 2019
(default: current year)
Returns
-------
list with filenames
'''
if year is None:
today = dt.datetime.today()
year = today.year
if year >= 2024:
idir = f'{datadir}/Donnees_brutes/{year}/Auto/SmartfluxNT/raw'
elif year == 2023:
idir = f'{datadir}/Donnees_brutes/{year}/Auto/SmartfluxNT_{year}/raw'
elif year >= 2019:
idir = f'{datadir}/Donnees_brutes/{year}/Auto/SmartfluxNT_{year}'
ifiles = glob.glob(f'{idir}/*.ghg')
return ifiles
def _read_csv(ifile, remove_cols=[], rename_cols=[], level='raw'):
'''
Read values from standard logger file from crbasic2db1.py,
optionally remove remove columns and rename columns, and then
select only columns with standard names such as TA_1_1_1.
Parameters
----------
ifile : str
Input logger file
remove_cols : list, optional
List of column names to delete
rename_cols : list, optional
List of column names to rename to `logger + '_' + column_name`
level : str, optional
Data level raw, DB1 (default: raw)
Returns
-------
pandas.DataFrame
'''
if level.lower().startswith('raw'):
skiprows = [0, 2, 3]
na_values = 'NAN'
elif level.lower().startswith('db1'):
skiprows = None
na_values = '-9999'
else:
raise ValueError(f'level not known: {level}')
logger = '_'.join(os.path.basename(ifile).split('_')[:-2])
df = pd.read_csv(ifile, sep=',', header='infer', skiprows=skiprows,
index_col=0, parse_dates=True, na_values=na_values)
rm_cols = [ nn for nn in remove_cols if nn in df.columns ]
if len(rm_cols) > 0:
df.drop(columns=rm_cols, inplace=True)
ren_cols = { nn: logger + '_' + nn for nn in rename_cols
if nn in df.columns }
if len(ren_cols) > 0:
df.rename(columns=ren_cols, inplace=True)
# # take only variables with standard names
# drops = [ cc for cc in df.columns
# if re.fullmatch('.+_[0-9]+_[0-9]+_[0-9]+', cc) is None ]
# if len(drops) > 0:
# df.drop(columns=drops, inplace=True)
return df
# Seaborn's Oranges color palette starting with white
oranges = mpl.colors.LinearSegmentedColormap.from_list(
'oranges', sns.color_palette('Oranges'))
ocols = oranges(np.arange(255, dtype=int))
ocols[0] = [1., 1., 1., 1.] # add white at start
cmap = mpl.colors.ListedColormap(ocols)ndays = 7
today = dt.datetime.today()
fromday = today - dt.timedelta(days=ndays)
year = today.year
configfile = f'FR-Hes_{year}.cfg'
print(f"Read config file: {configfile}")
config = configparser.ConfigParser(interpolation=None)
config.read(configfile)
# options
datadir = _get_datadir(config)
# loggers
loggers = list(config['RAWFILES'].keys())
loggers.remove('year')
loggers.remove('units')
if debug:
print(f"Loggers: {loggers}")
rfiles = _get_logger_files(config, loggers, datadir, level='raw')
rfilenames = { ll: os.path.basename(rfiles[ll]) for ll in rfiles }
rnames = { ll: '_'.join(rfilenames[ll].split('_')[0:2]) for ll in rfilenames }
if debug:
print(f"Raw filenames: {rfilenames}")
dfiles = _get_logger_files(config, loggers, datadir, level='DB1')
dfilenames = [ os.path.basename(dfiles[ll]) for ll in dfiles ]
if debug:
print(f"DB1 filenames: {dfilenames}")Read config file: FR-Hes_2025.cfg
# loggers + smartflux
ndata = np.full((len(loggers) + 1, ndays), 48, dtype=int)
# loggers
for ii, ll in enumerate(loggers):
if debug:
print(f"Read file: {rfiles[ll]}")
df = _read_csv(rfiles[ll], level='raw')
for dd in range(ndays):
isday = today - dt.timedelta(days=dd + 1)
ndata[ii, -dd-1] -= len(df[df.index.date == isday.date()])
if ll == 'profile':
ndata[ii, -dd-1] += 422 - 48
# if ll == 'profile':
# ndata[ii, -1] //= 8
# smartflux
sfiles = [ os.path.basename(ff)[0:10]
for ff in _get_smartflux_files(datadir, year=year) ]
for dd in range(ndays):
isday = today - dt.timedelta(days=dd + 1)
sisday = isday.strftime('%Y-%m-%d')
ndata[-1, -dd-1] = 48 - sfiles.count(sisday)
loggersmart = loggers.copy()
loggersmart.append('smartflux')
prevdays = np.linspace(-ndays, -1, ndays, dtype=int)
df = pd.DataFrame(ndata, index=loggersmart,
columns=prevdays)
vmax = 48
fig, ax = plt.subplots(figsize=(6.4, df.shape[0]/4.))
sns.heatmap(axes=ax, data=df, vmax=vmax, annot=True, linewidths=0.5,
cmap=cmap, xticklabels=prevdays,
yticklabels=df.index)
ax.set_xlabel('Days before today')
ax.set_ylabel('Logger')
plt.show()# loggers
firstday = today - dt.timedelta(days=ndays)
prevdays = np.linspace(-ndays, -1, ndays, dtype=int)
for ll in loggers:
print(f'{ll}')
df = _read_csv(rfiles[ll], level='raw')
df = df[(df.index.date >= firstday.date()) & (df.index.date < today.date())].isna()
sf = df.groupby(df.index.date).sum()
sf = sf.T
sf.columns = prevdays[:len(sf.columns)]
if ll == 'profile':
vmax = 422
else:
vmax = 48
fig, ax = plt.subplots(figsize=(6.4, sf.shape[0]/4.))
sns.heatmap(axes=ax, data=sf, vmax=vmax, cmap=cmap, linewidths=0.5,
xticklabels=prevdays, yticklabels=sf.index,
annot=True) # , fmt=':d')
ax.set_xlabel('Days before today')
ax.set_ylabel('Variable')
plt.show()h1
h1_meteobackup
h1_cr310
meteo
radiation
radiation2
circonf
profile
soil_ab
soil_cd
soil_e
soil_f
soil_g
cp_01
cp_02