import datetime
import logging
import os
import shutil
from collections import defaultdict
from configparser import ConfigParser
from itertools import chain
from pathlib import Path
from typing import List, Any

CONFIG_DESCRIPTION = {
    'host': ['Host or IP address to which EDPS server binds, e.g. localhost or 0.0.0.0'],
    'port': ['EDPS port number, e.g. 5000'],
    'workflow_dir': [
        'Comma-separated list of directories where workflows are installed.',
        'If not specified, EDPS will search for workflows in the pipeline installation tree.',
        'The naming convention for workflows is: <instrument>/<instrument>_wkf.py, e.g. espresso/espresso_wkf.py'
    ],
    'esorex_path': [
        'esorex is the command to execute pipeline recipes and it is installed with the pipeline.',
        'Please make sure that the path provided here can be located using the "which" command.'
    ],
    'pipeline_path': [
        'Path where pipeline plugins are installed.',
        'This configuration is used for ESO internal operations and can be left empty.'
    ],
    'genreport_path': [
        'genreport is the command to execute quality control plots and it is installed with the Adari package.'
    ],
    'base_dir': [
        'EDPS data directory where recipe products, logs and quality control plots are saved.',
        'The files are organised in a directory structure under the base directory, defined as:',
        '<instrument>/<data reduction task>/<unique identifier>/<files>',
        'Example: ESPRESSO/bias/fbf31155-a731-47f5-abf2-6445adce6c4b/master_bias.fits',
        'Please make sure that this directory has enough disk space available for storing the pipeline products,',
        'and consider enabling automatic data cleaning in the [cleanup] section.'
    ],
    'dummy': ['If true, a dummy command is executed instead of esorex'],
    'continue_on_error': [
        'If true, EDPS will attempt to execute a data reduction step even if the previous step has failed.'
    ],
    'processes': [
        "Number of concurrent data reductions processes.",
        "Running concurrent data reductions will increase performance if sufficient resources are available,",
        "but can also lead to pipeline crashes if not enough memory is available to execute parallel reductions."
    ],
    'cores': [
        'Number of CPUs (cores) available for data reduction. EDPS will not exceed the number of cores when scheduling',
        'data reduction tasks.'
    ],
    'default_omp_threads': [
        'Pipeline recipes are parallelized using OpenMP. EDPS uses this parameter to set the number of threads when',
        'running a recipe, up to the available cores: OMP_NUM_THREADS=min(default_omp_threads, cores)'
    ],
    'ordering': [
        'Execution ordering. All orderings follow topological order so parent tasks are always placed before their children.',
        'Options: dfs, bfs, type, dynamic',
        'dfs - depth-first, give preference to reaching final reduction target quicker',
        'bfs - breadth-first, give preference to following reduction cascade level by level',
        'type - same as bfs, but make sure to process same type of data together (eg. first all BIASes)',
        'dynamic - immediately run whichever job is ready (has all needed inputs), no stalling but order is unpredictable',
    ],
    'output_prefix': [
        'If provided, the recipe products will be renamed according to the following scheme:',
        '<prefix>.<instrument>.YYYY-MM-DDThh:mm.ss.mss.fits (Example: QC1.ESPRESSO.2023-02-09T17:30:14.326.fits),',
        'where timestamp is taken from the moment of renaming the file.',
        'Note that the renaming occurs in the base directory, not in the package (output) directory.'
    ],
    'resume_on_startup': [
        'In case EDPS was stopped while some jobs were waiting to be executed, should we execute them after restart.'
    ],
    'reexecution_window_minutes': [
        "EDPS will automatically re-execute a job if it's failed but needed as association, but only within this time window."
    ],
    'save_report_inputs': [
        'If true, EDPS will extract the input files from the reporting script output, and store them in the execution result.',
        'Please note that selecting this option will significantly increase the size of the EDPS database.',
        'default: False'
    ],
    'calibrations_config_file': [
        'Path to yaml file defining locations of static calibrations for each of the workflows.',
        'This configuration is used for ESO internal operations and can be left empty.',
        'EDPS will automatically load static calibrations delivered with the pipeline.'
    ],
    'parameters_config_file': [
        'Path to yaml file defining locations of recipe and workflow parameters for each of the workflows.',
        'This configuration is used for ESO internal operations and can be left empty.',
        'EDPS will automatically load recipe and workflow parameters delivered with the pipeline.'
    ],
    'association_preference': [
        'In case multiple matching associated inputs (e.g. calibrations) are available, which ones should be used.',
        'Options: raw, master, raw_per_quality_level, master_per_quality_level',
        'raw - use reduced raw data results even if master calibrations closer in time are available',
        'master - use master calibrations even if results of reduced raw data closer in time are available',
        'raw_per_quality_level - use calibrations closest in time but prefer reduced raw data results',
        'master_per_quality_level - use calibrations closest in time but prefer master calibrations'
    ],
    'breakpoints_url': ['URL to ESO-provided list of calibration breakpoints.'],
    'associate_incomplete_jobs': ['Should EDPS associate incomplete jobs if no complete job is available.'],
    'associate_existing_jobs': ['Should EDPS associate jobs created in previous reductions.'],
    'keep_request_files': [
        'Should EDPS keep in memory files submitted as part of data reduction or data organisation requests',
        'so they can be used as associations in subsequent requests.'],
    'meta_workflow': [
        'Comma-separated list of workflows which should be combined together into one.',
        'This allows to submit data from different instruments to a single workflow "edps.workflow.meta_wkf"'
    ],
    'truncate': [
        'Clear the EDPS bookkeeping database on startup.',
        'This will cause all tasks to be re-executed even if they have been executed before on the same data.'
    ],
    'local': ['Should we use local database for bookkeeping (currently always True).'],
    'path': ['Path where the bookkeeping database should be stored.'],
    'type': [
        'Type of bookkeeping database to use.',
        'Options: tiny, memory, caching',
        'tiny - directly use TinyDB json-file-based database',
        'memory - use fast in-memory non-persistent database',
        'caching - use in-memory cache on top of persistent TinyDB database for higher performance'
    ],
    'flush_size': ['How many changes are needed to trigger TinyDB flushing data to disk.'],
    'flush_timeout': ['How often automatically data should be flushed, regardless of changes.'],
    'min_disk_space_mb': [
        'Minimum amount of available disk space (in MB) required to flush data to disk.'
    ],
    'enabled': ['Should automatic cleanup of reduced data be enabled.'],
    'cleanup_older_than_seconds': ['How much time needs to pass since data got reduced to consider them for removal.'],
    'cleanup_check_period_seconds': ['How often should we check if there are data to be removed.'],
    'package_base_dir': ['Location where selected products should be placed.'],
    'skip_conflicting': [
        'How to handle filename conflicts in the package output directory.',
        'True - skip packaging the file if target filename already exists',
        'False - apply conflict resolution logic and add _{i} suffix with increasing number i'
    ],
    'mode': [
        'Method to place files in the package directory. Options: link, symlink, copy.',
        'link - create hardlinks',
        'symlink - create symbolic links',
        'copy - copy the files'
    ],
    'pattern': [
        'Directory and filename pattern to use when placing files in the package directory.',
        'The pattern can contain any string, header keywords enclosed in $ (e.g. $pro.catg$),',
        'and the following predefined special variables:',
        '$NIGHT - year-month-day of when the data was taken',
        '$FILENAME - original name of the file',
        '$EXT - original extension of the file name',
        '$TASK - name of EDPS task which produced the file',
        '$TIMESTAMP - timestamp when data were submitted for reduction',
        '$DATASET - dataset name, derived from the first raw input file',
        'Example: $DATASET/$TIMESTAMP/$object$_$pro.catg$.$EXT'
    ],
    'categories': [
        'Comma-separated list of product categories to place in the package directory.',
        'Empty means all products matching reduction target.'
    ],
    'tmpdir': [''],
    'calibdir': [''],
    'metrics_enabled': [''],
    'url': [''],
    'interval': [''],
    'index': [''],
    'user': [''],
    'password': ['']
}

BREAKPOINTS_URL = 'https://archive.eso.org/calselector/v1/breakpoints'
GENERATOR = 'generator'
EXECUTOR = 'executor'
REPOSITORY = 'repository'
PACKAGER = 'packager'
METRICS = 'metrics'
CLEANUP = 'cleanup'


class ConfigItem:
    def __init__(self, section: str, name: str, value: Any, default: bool):
        self.section = section
        self.name = name
        self.value = value
        self.default = default
        self.description = CONFIG_DESCRIPTION[name]


class Configuration:
    def __init__(self, args: ConfigParser):
        self.config_items: defaultdict[str, List[ConfigItem]] = defaultdict(list)
        self.logger = logging.getLogger('Configuration')
        self.args = args
        # server
        self.host: str = self._extract_config_param('server', 'host', str, '0.0.0.0')
        self.port: int = self._extract_config_param('server', 'port', int, 5000)
        # application
        self.workflow_dir: str = self._extract_config_param('application', 'workflow_dir', str, None)
        # executor
        self.esorex_path: str = self._extract_config_param(EXECUTOR, 'esorex_path', str, 'esorex')
        self.pipeline_path: str = self._extract_config_param(EXECUTOR, 'pipeline_path', str, None)
        self.genreport_path: str = self._extract_config_param(EXECUTOR, 'genreport_path', str, 'genreport')
        self.base_dir: str = self._extract_config_param(EXECUTOR, 'base_dir', str, '.')
        self.dummy: bool = self._extract_boolean_config_param(EXECUTOR, 'dummy', False)
        self.continue_on_error: bool = self._extract_boolean_config_param(EXECUTOR, 'continue_on_error', False)
        self.processes: int = self._extract_config_param(EXECUTOR, 'processes', int, 1)
        self.cores: int = self._extract_config_param(EXECUTOR, 'cores', int, 1)
        self.default_omp_threads: int = self._extract_config_param(EXECUTOR, 'default_omp_threads', int, 1)
        self.ordering: str = self._extract_config_param(EXECUTOR, 'ordering', str, 'bfs')
        self.output_prefix: str = self._extract_config_param(EXECUTOR, 'output_prefix', str, '')
        self.resume_on_startup: bool = self._extract_boolean_config_param(EXECUTOR, 'resume_on_startup', False)
        self.reexecution_window: datetime.timedelta = datetime.timedelta(
            minutes=self._extract_config_param(EXECUTOR, 'reexecution_window_minutes', int, 60))
        self.save_report_inputs: bool = self._extract_boolean_config_param(EXECUTOR, 'save_report_inputs', False)
        # generator
        self.calib_config_file: str = self._extract_config_param(GENERATOR, 'calibrations_config_file', str, None)
        self.param_config_file: str = self._extract_config_param(GENERATOR, 'parameters_config_file', str, None)
        self.assoc_preference: str = self._extract_config_param(GENERATOR, 'association_preference', str,
                                                                'raw_per_quality_level')
        self.assoc_breakpoints_url: str = self._extract_config_param(GENERATOR, 'breakpoints_url', str, BREAKPOINTS_URL)
        self.associate_incomplete_jobs: bool = self._extract_boolean_config_param(GENERATOR, 'associate_incomplete_jobs', False)
        self.associate_existing_jobs: bool = self._extract_boolean_config_param(GENERATOR, 'associate_existing_jobs', True)
        self.keep_request_files: bool = self._extract_boolean_config_param(GENERATOR, 'keep_request_files', True)
        meta_workflow: str = self._extract_config_param(GENERATOR, 'meta_workflow', str, '')
        self.meta_workflow: List[str] = [x.strip() for x in meta_workflow.split(',')] if meta_workflow.strip() else []
        # repository
        self.truncate_repository: bool = self._extract_boolean_config_param(REPOSITORY, 'truncate', False)
        self.local_repository: bool = self._extract_boolean_config_param(REPOSITORY, 'local', True)
        self.db_path: str = self._extract_config_param(REPOSITORY, 'path', str, 'db.json')
        self.db_type: str = self._extract_config_param(REPOSITORY, 'type', str, 'caching')
        self.db_flush_size: int = self._extract_config_param(REPOSITORY, 'flush_size', int, 10)
        self.db_flush_timeout: int = self._extract_config_param(REPOSITORY, 'flush_timeout', int, 60)
        self.min_disk_space_mb = self._extract_config_param(REPOSITORY, 'min_disk_space_mb', int, 100)
        # cleanup
        self.cleanup_enabled: bool = self._extract_boolean_config_param(CLEANUP, 'enabled', False)
        self.cleanup_older_than_seconds: int = self._extract_config_param(CLEANUP, 'cleanup_older_than_seconds', int,
                                                                          14 * 24 * 3600)
        self.cleanup_check_period_seconds: int = self._extract_config_param(CLEANUP, 'cleanup_check_period_seconds',
                                                                            int, 3600)
        # packager
        self.package_base_dir: str = self._extract_config_param(PACKAGER, 'package_base_dir', str, None)
        self.package_mode: str = self._extract_config_param(PACKAGER, 'mode', str, 'copy')
        self.package_default_pattern: str = self._extract_config_param(PACKAGER, 'pattern', str,
                                                                       '$DATASET/$TIMESTAMP/$object$_$pro.catg$.$EXT').strip()
        package_categories = self._extract_config_param(PACKAGER, 'categories', str, '')
        self.package_default_categories: List[str] = [cat.strip() for cat in
                                                      package_categories.split(',')] if package_categories else []
        self.package_skip_conflicting = self._extract_boolean_config_param(PACKAGER, "skip_conflicting", True)
        # test
        self.test_tmpdir: str = self._extract_config_param('test', 'tmpdir', str, '.')
        self.test_calibdir: str = self._extract_config_param('test', 'calibdir', str, '.')

        # metrics
        self.metrics_enabled: bool = self._extract_boolean_config_param(METRICS, 'metrics_enabled', False)
        self.metrics_url: str = self._extract_config_param(METRICS, 'url', str, 'http://localhost:9200')
        self.metrics_interval: int = self._extract_config_param(METRICS, 'interval', int, 60)
        self.metrics_index: str = self._extract_config_param(METRICS, 'index', str, 'edps-metrics')
        self.metrics_user: str = self._extract_config_param(METRICS, 'user', str, None)
        self.metrics_password: str = self._extract_config_param(METRICS, 'password', str, None)

    def write(self, filename: str):
        with open(filename, "w") as f:
            for section in [s for s in self.config_items if s not in ('test', METRICS)]:
                f.write(f"[{section}]\n")
                for item in self.config_items[section]:
                    for desc in item.description:
                        f.write(f"# {desc}\n")
                    f.write(f"{item.name}={item.value if item.value is not None else ''}\n\n")

    def _extract_config_param(self, section: str, name: str, converter: type, default):
        if section in self.args and name in self.args[section]:
            value = converter(self.args[section][name])
        else:
            value = default
        self.config_items[section].append(ConfigItem(section=section, name=name, value=value, default=value == default))
        return value

    def _extract_boolean_config_param(self, section: str, name: str, default: bool) -> bool:
        if section in self.args and name in self.args[section]:
            value = self.args[section][name].lower() == 'true'
        else:
            value = default
        self.config_items[section].append(ConfigItem(section=section, name=name, value=value, default=value == default))
        return value

    def __repr__(self):
        config_items = chain.from_iterable(self.config_items.values())
        return '\n'.join([f"{item.section}.{item.name}={item.value}" for item in config_items])

    def log(self):
        config_items = chain.from_iterable(self.config_items.values())
        for item in config_items:
            self.logger.info("{}.{}={} {}".format(item.section, item.name, item.value,
                                                  "DEFAULT" if item.default else ""))


class AppConfig:
    APPLICATION_CONFIG = "application.properties"
    LOGGING_CONFIG = "logging.yaml"
    BREAKPOINTS_CONFIG = "breakpoints.json"

    def __init__(self, application_config: str = APPLICATION_CONFIG, logging_config: str = LOGGING_CONFIG):
        self.home = os.path.expanduser("~")
        self.edps_home = f"{self.home}/.edps"
        if os.path.isfile(application_config) and os.path.isfile(logging_config):
            self.application_config = application_config
            self.logging_config = logging_config
        else:
            self.application_config = f"{self.edps_home}/{self.APPLICATION_CONFIG}"
            self.logging_config = f"{self.edps_home}/{self.LOGGING_CONFIG}"

    def exists(self) -> bool:
        return os.path.isfile(self.application_config) and os.path.isfile(self.logging_config)

    def create(self):
        edps_data = f"{self.home}/EDPS_data"
        print("### EDPS has not been initialised on this system. Creating initial configuration\n")
        base_dir = input(f"Enter EDPS bookkeeping directory where intermediate products are stored [{edps_data}]: ")
        base_dir = base_dir or edps_data
        args = ConfigParser()
        args[EXECUTOR] = {"base_dir": base_dir}
        args[REPOSITORY] = {"path": os.path.join(base_dir, "db.json")}
        args[GENERATOR] = {"associate_existing_jobs": "False", "keep_request_files": "False",
                           "associate_incomplete_jobs": "True"}
        config = Configuration(args)
        print(f"> Creating EDPS home directory: {self.edps_home}")
        os.makedirs(self.edps_home, exist_ok=True)
        print(f"> Creating EDPS configuration file: {self.application_config}")
        config.write(self.application_config)
        shutil.copy(Path(__file__).parent / self.LOGGING_CONFIG, Path(self.edps_home))
        shutil.copy(Path(__file__).parent / self.BREAKPOINTS_CONFIG, Path(self.edps_home))
        print(f"> Creating EDPS data directory: {base_dir}")
        os.makedirs(base_dir, exist_ok=True)
        print("\n### EDPS has now been configured, please execute the edps command again to start reducing your data\n")
        print(f"> To change the EDPS configuration please edit '{self.application_config}'")
        print(f"> To initialise EDPS from scratch please remove '{self.edps_home}' and rerun the edps command")


if __name__ == "__main__":
    cfg = Configuration(ConfigParser())
    print(cfg)
    cfg.write("app.prop")
