Source code for fluxer.flux_config

"""Utility for configuring user variables for flux analyses"""

from collections import OrderedDict
from os import getcwd
import logging
import os.path as osp
import glob
import re
import configparser as cfg

__all__ = ["parse_config"]

logger = logging.getLogger(__name__)
# Add the null handler if importing as library; whatever using this library
# should set up logging.basicConfig() as needed
logger.addHandler(logging.NullHandler())
logger.setLevel(logging.INFO)

# We need to control names.  We leave the date time columns (always the
# first two in input) up to pandas to determine, as they're always safe.
_INCOLS_ALL = dict(longitude=float, latitude=float, speed_over_ground=float,
                   course_over_ground=float, heading=float, pitch=float,
                   roll=float, heave=float, atmospheric_pressure=float,
                   air_temperature=float, relative_humidity=float,
                   surface_temperature=float, wind_speed=float,
                   wind_direction=float, true_wind_speed=float,
                   true_wind_direction=float, PAR=float, K_down=float,
                   temperature_thermopile=float, temperature_case=float,
                   temperature_dome=float, LW_down=float,
                   UV_sensor_temperature=float, UV_a=float, UV_b=float,
                   UV_broad=float, acceleration_x=float, acceleration_y=float,
                   acceleration_z=float, rate_x=float, rate_y=float,
                   rate_z=float, wind_speed_u=float, wind_speed_v=float,
                   wind_speed_w=float, air_temperature_sonic=float,
                   sound_speed=float, anemometer_status=float,
                   op_CO2_fraction=float, op_CO2_density=float,
                   op_CO2_absorptance=float, op_H2O_fraction=float,
                   op_H2O_density=float, op_H2O_absorptance=float,
                   op_pressure=float, op_temperature=float,
                   op_temperature_base=float, op_temperature_spar=float,
                   op_temperature_bulb=float, op_cooler_voltage=float,
                   op_bandwidth=float, op_delay_interval=float,
                   op_bad_chopper_wheel_temperature_flag=bool,
                   op_bad_detector_temperature_flag=bool,
                   op_bad_optical_wheel_rate_flag=bool,
                   op_bad_sync_flag=bool, op_CO2_signal_strength=float,
                   op_AGC=float, op_analyzer_status=float,
                   cp_analyzer_status=float, cp_CO2_fraction=float,
                   cp_CO2_density=float, cp_CO2_dry_fraction=float,
                   cp_CO2_absorptance=float, cp_H2O_fraction=float,
                   cp_H2O_density=float, cp_H2O_dry_fraction=float,
                   cp_H2O_absorptance=float, cp_pressure=float,
                   cp_temperature=float, cp_temperature_in=float,
                   cp_temperature_out=float, cp_temperature_block=float,
                   cp_temperature_cell=float, cp_CO2_signal_strength=float,
                   cp_H2O_signal_strength=float, uw_CO2_cellA=float,
                   uw_CO2_cellB=float, uw_CO2_fraction=float,
                   uw_H2O_cellA=float, uw_H2O_cellB=float,
                   uw_H2O_fraction=float, uw_temperature_analyzer=float,
                   uw_pressure_analyzer=float, equ_temperature=float,
                   equ_pressure=float, H2O_flow=float,
                   air_flow_analyzer=float, equ_speed_pump=float,
                   ventilation_flow=float, condensation_atm=float,
                   condensation_equ=float, drip_1=float, drip_2=float,
                   condenser_temperature=float, ctd_pressure=float,
                   ctd_temperature=float, ctd_conductivity=float,
                   ctd_O2_saturation=float, ctd_O2_concentration=float,
                   uw_pH=float, uw_redox_potential=float,
                   temperature_external=float, temperature_in=float,
                   bad_wind_direction_flag=bool,
                   very_bad_wind_direction_flag=bool, bad_ice_flag=bool,
                   bad_atmospheric_pressure_flag=bool, bad_ctd_flag=bool,
                   bad_CO2_flag=bool, bad_H2O_flag=bool,
                   bad_H2O_flow_flag=bool, bad_pressure_analyzer_flag=bool,
                   bad_temperature_analyzer_flag=bool,
                   bad_equ_temperature_flag=bool,
                   bad_temperature_external_flag=bool)

# Simple dictionary to list our defaults
_DFLTS = {
    'ec_input_directory': getcwd(),
    'ec_file_pattern': "*[0-9].csv",
    'ec_colnames': ("time_20min,time_study,longitude,latitude," +
                    "speed_over_ground,course_over_ground,heading," +
                    "pitch,roll,heave,atmospheric_pressure," +
                    "air_temperature,relative_humidity," +
                    "surface_temperature,wind_speed,wind_direction," +
                    "true_wind_speed,true_wind_direction,PAR,K_down," +
                    "temperature_thermopile,temperature_case," +
                    "temperature_dome,LW_down,UV_sensor_temperature," +
                    "UV_b,UV_broad,acceleration_x,acceleration_y," +
                    "acceleration_z,rate_x,rate_y,rate_z,wind_speed_u," +
                    "wind_speed_v,wind_speed_w,air_temperature_sonic," +
                    "sound_speed,anemometer_status,op_CO2_fraction," +
                    "op_CO2_density,op_CO2_absorptance,op_H2O_fraction," +
                    "op_H2O_density,op_H2O_absorptance,op_pressure," +
                    "op_temperature,op_temperature_base," +
                    "op_temperature_spar,op_temperature_bulb," +
                    "op_cooler_voltage,op_bandwidth,op_delay_interval," +
                    "op_bad_chopper_wheel_temperature_flag," +
                    "op_bad_detector_temperature_flag," +
                    "op_bad_optical_wheel_rate_flag,op_bad_sync_flag," +
                    "op_CO2_signal_strength,cp_analyzer_status," +
                    "cp_CO2_fraction,cp_CO2_density,cp_CO2_dry_fraction," +
                    "cp_CO2_absorptance,cp_H2O_fraction,cp_H2O_density," +
                    "cp_H2O_dry_fraction,cp_H2O_absorptance,cp_pressure," +
                    "cp_temperature,cp_temperature_in,cp_temperature_out," +
                    "cp_temperature_block,cp_temperature_cell," +
                    "cp_CO2_signal_strength,cp_H2O_signal_strength"),
    'ec_sample_frequency': "10.0",
    'ec_summary_file': "fluxes.csv",
    'ec_sonic_xoffset': "0.0",
    'ec_imu_xoffset': "0.0",
    'ec_tilt_window_width': "120",
    'ec_despike_win_width': "3000",
    'ec_despike_step': "1500",
    'ec_despike_nreps': "20",
    'imu2anemometer_pos': "0.0, 0.0, 0.0",
    'imu_xyz_idx': "0, 1, 2",
    'imu2rhs_linaccel_mult': "1.0, 1.0, 1.0",
    'imu2rhs_angaccel_mult': "1.0, 1.0, 1.0",
    'ec_complementary_filter_period': "10.0",
    'ec_accel_highpass_cutoff': "20.0",
    'uw_input_directory': getcwd(),
    'uw_file_pattern': "*[0-9].csv",
    'uw_pco2_directory': getcwd(),
    'uw_colnames': ("time_30min,time_study,longitude,latitude," +
                    "speed_over_ground,course_over_ground,heading," +
                    "atmospheric_pressure,air_temperature," +
                    "relative_humidity,surface_temperature,wind_speed," +
                    "wind_direction,true_wind_speed,true_wind_direction," +
                    "PAR,K_down,LW_down,UV_b,UV_a,UV_broad," +
                    "air_temperature_sonic,cp_CO2_fraction," +
                    "cp_H2O_fraction,cp_pressure,cp_temperature," +
                    "op_CO2_density,op_H2O_density,op_pressure," +
                    "op_temperature,equ_temperature,equ_pressure," +
                    "equ_speed_pump,uw_CO2_fraction,uw_H2O_fraction," +
                    "uw_temperature_analyzer,uw_pressure_analyzer," +
                    "H2O_flow,air_flow_analyzer,ctd_pressure," +
                    "ctd_temperature,ctd_conductivity,ctd_O2_saturation," +
                    "ctd_O2_concentration,uw_pH,uw_redox_potential," +
                    "temperature_external,temperature_in," +
                    "bad_wind_direction_flag," +
                    "very_bad_wind_direction_flag,bad_ice_flag," +
                    "bad_atmospheric_pressure_flag,bad_ctd_flag," +
                    "bad_CO2_flag,bad_H2O_flag,bad_H2O_flow_flag," +
                    "bad_pressure_analyzer_flag," +
                    "bad_temperature_analyzer_flag," +
                    "bad_equ_temperature_flag," +
                    "bad_temperature_external_flag"),
    'uw_intake_depth': "5.0",
    'uw_regress_temperature_external': False,
    'uw_temperature_external_coefs': "0.0, 1.0",
    'anemometer2d_height': "16.0"}

# Scalar option names
_SCALAR_OPTS = {
    'EC Inputs': ["sample_frequency"],
    'EC Despiking': ["despike_win_width",
                     "despike_step", "despike_nreps"],
    'EC Motion Correction': ["sonic_xoffset", "imu_xoffset",
                             "tilt_window_width",
                             "complementary_filter_period",
                             "accel_highpass_cutoff"],
    'UW Inputs': ["anemometer2d_height", "uw_intake_depth"]}
_VECTOR_OPTS = {
    'EC Motion Correction': ["imu2anemometer_pos", "imu_xyz_idx",
                             "imu2rhs_linaccel_mult",
                             "imu2rhs_angaccel_mult"],
    'UW Inputs': ["uw_temperature_external_coefs"]}


[docs]def parse_config(cfg_file): """Parse configuration file for essential variables during flux analysis. Parameters ---------- cfg_file : str Path to configuration file. Returns ------- collections.OrderedDict Ordered dictionary with variables for each section. A list of input files found, given the input directory and file pattern, is also appended to the 'Inputs' section. """ # Ordered dictionary based on dflts to give to the parser dflt_dict = OrderedDict(( ("EC Inputs", OrderedDict(( ("input_directory", _DFLTS["ec_input_directory"]), ("file_pattern", _DFLTS["ec_file_pattern"]), ("colnames", _DFLTS["ec_colnames"]), ("sample_frequency", _DFLTS["ec_sample_frequency"]), )) ), ("EC Outputs", OrderedDict(( ("summary_file", _DFLTS["ec_summary_file"]), )) ), ("EC Despiking", OrderedDict(( ("despike_win_width", _DFLTS["ec_despike_win_width"]), ("despike_step", _DFLTS["ec_despike_step"]), ("despike_nreps", _DFLTS["ec_despike_nreps"]), )) ), ("EC Motion Correction", OrderedDict(( ("sonic_xoffset", _DFLTS["ec_sonic_xoffset"]), ("imu_xoffset", _DFLTS["ec_imu_xoffset"]), ("tilt_window_width", _DFLTS["ec_tilt_window_width"]), ("imu2anemometer_pos", _DFLTS["imu2anemometer_pos"]), ("imu_xyz_idx", _DFLTS["imu_xyz_idx"]), ("imu2rhs_linaccel_mult", _DFLTS["imu2rhs_linaccel_mult"]), ("imu2rhs_angaccel_mult", _DFLTS["imu2rhs_angaccel_mult"]), ("complementary_filter_period", _DFLTS["ec_complementary_filter_period"]), ("accel_highpass_cutoff", _DFLTS["ec_accel_highpass_cutoff"]), )) ), ("UW Inputs", OrderedDict(( ("input_directory", _DFLTS["uw_input_directory"]), ("file_pattern", _DFLTS["uw_file_pattern"]), ("colnames", _DFLTS["uw_colnames"]), ("uw_intake_depth", _DFLTS["uw_intake_depth"]), ("uw_regress_temperature_external", _DFLTS["uw_regress_temperature_external"]), ("uw_temperature_external_coefs", _DFLTS["uw_temperature_external_coefs"]), ("anemometer2d_height", _DFLTS["anemometer2d_height"]), )) ), ("UW Outputs", OrderedDict(( ("pco2_directory", _DFLTS["uw_pco2_directory"]), )) ),)) # Set up the parser to interpolate across sections config = cfg.ConfigParser(interpolation=cfg.ExtendedInterpolation()) config.read_dict(dflt_dict) # set up our specific defaults # Replace defaults with what we're given config.read_file(open(cfg_file)) # Copy where we'll replace strings with other types py_dict = dflt_dict.copy() # Loop through all items and clean them up to generate our variables as # lists of strings for sec in config.sections(): for opt in config.options(sec): opt_value = config.get(sec, opt) # Just replace and skip if we have the same as defaults if dflt_dict[sec][opt] == opt_value: py_dict[sec][opt] = opt_value.split(",") else: # Otherwise move on and remove double quotes, newlines, and # spaces clean_opt = re.sub('["\n ]+', "", opt_value) config.set(sec, opt, clean_opt) # Replace values with lists, splitting on comma character, # on our local dictionary py_dict[sec][opt] = clean_opt.split(",") # Extract single elements, and convert to floats and arrays where # appropriate if len(py_dict[sec][opt]) == 1: py_dict[sec][opt] = py_dict[sec][opt][0] if sec in _SCALAR_OPTS and opt in _SCALAR_OPTS[sec]: py_dict[sec][opt] = float(py_dict[sec][opt]) if sec in _VECTOR_OPTS and opt in _VECTOR_OPTS[sec]: py_dict[sec][opt] = [float(x) for x in py_dict[sec][opt]] # Check input directories exist if not osp.exists(py_dict["EC Inputs"]["input_directory"]): raise Exception("Input directory doesn't exist") if not osp.exists(py_dict["UW Inputs"]["input_directory"]): raise Exception("Input directory doesn't exist") # Check if we have a valid sampling frequency if py_dict["EC Inputs"]["sample_frequency"] <= 0: raise Exception("Sampling frequency must be greater than zero") # Check if we have a valid tilt window width if py_dict["EC Motion Correction"]["tilt_window_width"] <= 0: raise Exception("Tilt window width must be greater than zero") # Check if we have valid despiking parameters if py_dict["EC Despiking"]["despike_win_width"] <= 0: raise Exception("Despiking window width must be greater than zero") if py_dict["EC Despiking"]["despike_step"] <= 0: raise Exception("Despiking step size must be greater than zero") if py_dict["EC Despiking"]["despike_nreps"] < 0: raise Exception(("The number of despiking iterations" "cannot be negative")) # Sort input file lists ec_input_files = glob.glob(osp.join(config["EC Inputs"]["input_directory"], config["EC Inputs"]["file_pattern"])) ec_input_files.sort() py_dict["EC Inputs"]["input_files"] = ec_input_files uw_input_files = glob.glob(osp.join(config["UW Inputs"]["input_directory"], config["UW Inputs"]["file_pattern"])) uw_input_files.sort() py_dict["UW Inputs"]["input_files"] = uw_input_files # Check if we have all legal names for header of input files (don't # care about first 2 time columns). illegal_names = ((set(py_dict["EC Inputs"]["colnames"][2:]) | set(py_dict["UW Inputs"]["colnames"][2:])) - set(_INCOLS_ALL.keys())) legal_names = set(_INCOLS_ALL.keys()) - set(illegal_names) if len(illegal_names) > 0: logger.info("There are unknown column names in config file: %s", list(illegal_names)) ec_legal_names = set(py_dict["EC Inputs"]["colnames"][2:]) & legal_names if len(ec_legal_names) < 1: logger.error("There are no legal EC column names in config file") else: ec_dtypes = {key: _INCOLS_ALL[key] for key in ec_legal_names} py_dict["EC Inputs"]["dtypes"] = ec_dtypes uw_legal_names = set(py_dict["UW Inputs"]["colnames"][2:]) & legal_names if len(uw_legal_names) < 1: logger.error("There are no legal UW column names in config file") else: uw_dtypes = {key: _INCOLS_ALL[key] for key in uw_legal_names} py_dict["UW Inputs"]["dtypes"] = uw_dtypes return py_dict