#!/usr/bin/env python
# coding: utf-8

# In[172]:


import cdflib
from astroquery.utils.tap.core import TapPlus


# In[182]:


# Not used if platform = 'Datalabs'
def download(url, params, file_name):
    '''
    Given the URL, a dictionary of parameters (optional) and an output 
    filename, download the request. Params may = 0/None in the call if all info
    is in the URL. 
    '''
    # open in binary mode
    with open(file_name, "wb") as file:
        # get request
        response = get(url, params=params)
        # write to file
        file.write(response.content)


def is_empty_or_whitespace(input_string):
    if isinstance(input_string, str):
        return input_string.strip() == ''
    else:
        return False
    

def TAP_PlusReq(ARCHIVE, ADQL):
    '''Takes an ADQL query string and launches a TapPlus
    request to the SOAR'''
    
    try:
        print(ADQL)
        TAP = TapPlus(url=ARCHIVE)
        results = TAP.launch_job(ADQL)
        astropy_table = results.get_results()
        #pandas_df = astropy_table.to_pandas()
        return astropy_table
    except Exception as e:
        print("Error occurred:", str(e))
        return None


def XNOR(a, b):
    if a != b:
        return 0
    else:
        return 1
    

def printANDwrite(output, filename):
    print(output)
    with open(filename, "a") as myfile:
        myfile.write(output+'\n')

        
def FindTheFile(ARCHIVE, platform, onefile):    

    cdf_fn = onefile['filename'][0]
    cdf_fn_parts = cdf_fn.split('_')
    if any(char.isupper() for char in cdf_fn_parts[2]): print('upper_case_descriptor!')

    if platform == 'Datalabs':
        yyyy = cdf_fn[cdf_fn.find('_202')+1: cdf_fn.find('_202')+5]
        instru = onefile['instrument'][0].lower()
        filepath = f"data/user/SolO/{instru}/{onefile['level'][0]}/{yyyy}/{cdf_fn}"

    else:
        # Download the file (local dir)
        ReqURL = ARCHIVE[:-4] + (f"/data?product_type=SCIENCE&"
                f"RETRIEVAL_TYPE=PRODUCT&data_item_id={onefile['data_item_id'][0]}")
        #printANDwrite(f'With request: {ReqURL} \n', opf)
        print(f"HTTP Response Code {download(ReqURL, 'None', onefile['filename'][0])}")
        filepath = cdf_fn
    
    return filepath


def IdealMandGlobAtts(filename):
    '''
    Take the CDF filename and break it down to provide the correct global 
    variables, where possible.
    '''
    #print(f'filename from inside IdealMandGlobAttrs {filename}')
    
    # From filename:
    # solo_Lx_<instr>-<sensor>-<the rest>_datetime_version_free-field.cdf

    instr_dict = {
    'EPD': 'Energetic Particle Detector',
    'MAG': 'Magnetometer',
    'SWA': 'Solar Wind Analyser',
    'RPW': 'Radio and Plasma Waves'
    }

    sensor_dict = {
    'PAS': 'Proton Alpha Sensor', #SWA
    'HIS': 'Heavy Ion Sensor', #SWA
    'EAS1': 'Electron Analyser System 1', #SWA
    'EAS2': 'Electron Analyser System 2', #SWA
    'IBS': 'Inboard Sensor', #MAG
    'OBS': 'Outboard Sensor', #MAG
    'STEP': 'SupraThermal Electrons and Protons', #EPD
    'EPT': 'Electron Proton Telescope', #EPD
    'SIS': 'Suprathermal Ion Spectrograph', #EPD
    'HET': 'High Energy Telescope', #EPD
    'HFR': 'High Frequency Receiver', #RPW
    'LFR': 'Low Frequency Receiver', #RPW
    'TDS': 'Time Domain Sampler', #RPW
    'TNR': 'Thermal Noise Receiver' #RPW
    }

    # Take the suffix off
    fn_no_suffix = filename.split('.')[0]
    # Split into sections
    fn_parts = fn_no_suffix.split('_')
    # Split descriptor up into instrument, sensor and data product
    descr_parts = fn_parts[2].split('-')

    # Time will be the fourth section
    time = fn_parts[3]
    # If the end time is there too
    if '-' in time:
        t_parts = time.split('-')
        st_time = t_parts[0]
        end_time = t_parts[1]
    else:
        st_time = time
        end_time = ''

    # descriptor is the whole third section
    descr = fn_parts[2]
    # instrument is first three of descriptor
    i = descr[:3].upper()
    instr = instr_dict[i]
    # sensor is second part of descriptor
    s = descr_parts[1].upper()
    if s.upper() in sensor_dict:
        sensor = sensor_dict[s]
    else:
        sensor=''

    # level
    l = fn_parts[1][1:]
    # version number
    v = fn_parts[4][1:3]
    # data product
    dp = '-'.join(descr_parts[2:])
    #print(dp)

    fn_global_dict = {'Project': 'SOLO>Solar Orbiter', 
                   'Source_name': 'SOLO>Solar Orbiter', 
                   'Discipline': 'Space Physics>Interplanetary Studies', 
                   'Data_type': f'L{l}>Level {l} Data', 
                   'Descriptor': f'{descr.upper()}>{instr}, {sensor}, etc', 
                   'Instrument': f'{i}-{s}>{instr} {sensor}',
                   'Data_version': v, 
                   #'Instrument_type', 
                   'Mission_group': 'Solar Orbiter', 
                   'Logical_source': '_'.join(fn_parts[:3]), 
                   'Logical_file_id': '_'.join(fn_parts[:5]), 
                   'Logical_source_description': f'Solar Orbiter, Level {l}, {instr}, {sensor}, etc ', 
                   'Data_product': f'{dp}>[description of dataset]', 
                   'SOOP_NAME': "none", 
                   'SOOP_TYPE': "none", 
                   'OBS_ID': "if not applicable, then none",  
                   'LEVEL': f'L{l}>Level {l} Data',
                   'filestarttime': st_time,
                   'fileendtime': end_time
                  }
    return fn_global_dict


def time_info(attr, f_t, data_start_t, data_end_t, fn_global_dict):
    '''
    attr is TIME_MIN or TIME_MAX
    
    f_t is the value of attr in the file - which could be 
    'no time_min or time_max'
    TT2000
    ISO
    
    data_start_t is element 0 of the epoch variable
    
    data_end_t is element -1 of the epoch variable
    
    fn_global_dict is the dict of the recommended values, where the datetime(s) 
    from the filename have been stored
        
    If it's not there, give start and end data in ISO
    If it's in ISO, give the start and end data in ISO, and print what's there
    If it's in TT2000, convert that into ISO and give start and end data in ISO
    
    '''

    # No matter what, I want the start and end time, from the data, in ISO
    data_start_iso = cdflib.epochs.CDFepoch.encode_tt2000(data_start_t, iso_8601=True)
    data_end_iso = cdflib.epochs.CDFepoch.encode_tt2000(data_end_t, iso_8601=True)
    
# From attribute:
    # If TIME_MIN/MAX not provided - ALERT in main program
    if f_t == 'no time_min or time_max':
        pass
    
    # If it's given as ISO
    elif ':' in str(f_t):
        printANDwrite(f'{attr} from metadata is given as {f_t}', opf)

    # if it is provided (as TT2000) 
    else:
        md_t = cdflib.epochs.CDFepoch.encode_tt2000(float(f_t), iso_8601=True) # needs float?
        printANDwrite(f'{attr} from metadata equates to {md_t}', opf)

# From filename
    # Times from filename (stored in global attributes dictionary)
    if attr == 'TIME_MIN':
        printANDwrite(f"Start time from filename: "
                      f"{fn_global_dict['filestarttime']}", opf)
        time_text = 'Start'
        cdf_t = data_start_iso
    else:
        printANDwrite(f"End time from filename: "
              f"{fn_global_dict['fileendtime']}", opf)
        time_text = 'End'
        cdf_t = data_end_iso

# From EPOCH
    printANDwrite(f"{time_text} time from epoch variable: "
                  f"{cdf_t}", opf)
    
    
def unit_check(va):

    # Handle if it's there but there are no units
    if ('UNITS' in va) | ('UNIT_PTR_1' in va):
        
        if 'UNITS' in va:
            unit = va['UNITS']
            printANDwrite(f'UNITS = {unit}', opf)
        elif 'UNIT_PTR_1' in va:
            unit = va['UNIT_PTR_1']
            printANDwrite(f'UNIT_PTR_1 = {unit}', opf)
    
        no_units = ['None', 'NONE', ' ']
        if unit in no_units:
            printANDwrite('If no units, then "unitless"', opf)

        units_dict = {'ElectronVolts': 'eV', 'Volts': 'V', 'nanoseconds': 'ns',
                     'microsecond': 'us', 'milliseconds': 'ms', 'nsec': 'ns',
                     'seconds': 's', 'Count': 'counts', 'Counts': 'counts',
                     'count': 'counts'}
        if unit in units_dict.keys():
            printANDwrite(f"Units are currently given as {unit} but "
                          f"'{units_dict[unit]}' would be preferred.", opf)
        if (unit.upper() == 'DEGREES') | (unit.upper() == 'DEG'):
            printANDwrite(f'Remember that the SI unit of angle is the radian.', opf)

    else:
        printANDwrite('UNIT attributes (UNITS or UNIT_PTR) must be present. '
                      'If no units, then "unitless"', opf)

    if 'SI_CONVERSION' in va:
        # all good
        pass
    else:
        printANDwrite(f'SI_CONVERSION should be present even if there ' 
                      'are no units (1>unitless), '
                      'e.g., 1.0E-9>s for Epoch where UNITS=ns, '
                      'and if already in SI: e.g., "1.0>s")', opf)


# # Functions Above

# In[185]:


# FILE INFORMATION ************************************************************

# Fetch list of descriptors
ARCHIVE = ('https://soar.esac.esa.int/soar-sl-tap/tap')
level = 'L2'
descrs = TAP_PlusReq(ARCHIVE, f"SELECT DISTINCT descriptor FROM v_sc_data_item "
                     f"WHERE file_format='CDF' "
                     f"AND level='{level}'" 
                     f"AND instrument='SWA'")

# For each descriptor 
for d in descrs['descriptor']:

    # Reset the file or new file based on descriptor
    # opf = output file
    opf = f'solo_{level}_{d}.txt'
    print(opf)

    with open(opf, 'w') as myfile:
        myfile.write('*** File Information *** \n\n')
    printANDwrite(f'For descriptor: {d} \n', opf)

    # Get one filename (not ordered so semi-random)
    onefile = TAP_PlusReq(ARCHIVE, f"SELECT TOP 1 filename, level, data_item_id, "
                             f"instrument FROM v_sc_data_item WHERE descriptor='{d}' "
                             f"AND level='{level}'")
    #onefile is an astropy table so awkward to write to file
    print(onefile)

    # Location of the file
    #platform = 'Mac'
    platform = 'Datalabs'
    filename = onefile['filename'][0]
    filepath = FindTheFile(ARCHIVE, platform, onefile) # See above
    printANDwrite(f'Filename: {filepath} \n', opf)

    # Open it
    cdf_file_obj = cdflib.CDF(filepath)
    # Read in all the metadata:
    info_dict = cdf_file_obj.cdf_info()
    # All the variables:
    zVars = info_dict.zVariables
    printANDwrite(f'Variables: {str(zVars)} \n', opf)


    # MANDATORY VARIABLES *********************************************************

    printANDwrite('*** Mandatory Variables *** \n', opf)

    # These variables must be present
    m_variables = ['QUALITY_FLAG', 'QUALITY_BITMASK']
    if level == 'L1': m_variables = m_variables + ['SCET']

    for mv in m_variables:
        printANDwrite(f'{mv}', opf)
        if mv in zVars:
            printANDwrite(f'OK', opf)
            continue
        else:
            printANDwrite(f"Mandatory variable {mv} is not present! \n", opf)

    # Epoch or EPOCH must also be there
    if 'EPOCH' in zVars: f_time_var_name = 'EPOCH'
    if 'Epoch' in zVars: f_time_var_name = 'Epoch'
#    if ('Epoch' in zVars) | ('EPOCH' in zVars):
    if f_time_var_name:
        pass
    else:
        printANDwrite('Where is Epoch/EPOCH?', opf)


    # MANDATORY GLOBAL ATTRIBUTES *************************************************

    printANDwrite('\n *** Mandatory Global Attributes *** ', opf)

    mand_global = ['Project', 'Source_name', 'Discipline', 'Data_type', 
               'Descriptor', 'Instrument', 'Data_version', 
               'Software_version', 'PI_name', 'PI_affiliation', 'TEXT', 
               'Instrument_type', 'Mission_group', 'Logical_source', 
               'Logical_file_id', 'Logical_source_description', 
               'Rules_of_use', 'Generated_by', 'Generation_date', 
               'Acknowledgement', 'MODS', 'Parents', 'TARGET_NAME', 
               'TARGET_CLASS', 'TARGET_REGION', 'TIME_MIN', 'TIME_MAX', 
               'Data_product', 'SOOP_NAME', 'SOOP_TYPE', 'OBS_ID', 'LEVEL']

    # What is there - file global attributes
    f_glob_attrs = cdf_file_obj.globalattsget()

    # What should be there given the filename - filename global
    print(filename)
    fn_global_dict = IdealMandGlobAtts(filename)
    
    # For each one that should be there
    for mga in mand_global:
        printANDwrite('\n', opf) # For formatting report

        # If the mandatory global attribute is in the file:
        if mga in f_glob_attrs: 
            
            # print and write the attribute and value in file for info
            printANDwrite(f'{mga} is {f_glob_attrs[mga]}', opf)
            
            # Alert if the contents are whitespace
            if is_empty_or_whitespace(f_glob_attrs[mga]): printANDwrite("which is whitespace", opf)

            # if TIME_MIN or TIME_MAX is there (see above):
            f_t = f_glob_attrs[mga][0]
                
#            continue

        # If the mandatory global attribute is NOT in the file:
        else: 
            printANDwrite(f"ALERT: Keyword '{mga}' NOT FOUND", opf)

            f_t = 'no time_min or time_max'
            
        if mga[:4] == 'TIME':
            # Send the attribute name, attribute value from file, 
            # first and last values of EPOCH/Epoch (f_time_var_name)
            # and the global attributes from filename dictionary to time function
            time_info(mga, f_t, 
                      cdf_file_obj.varget(f_time_var_name)[0], # TT2000, first
                      cdf_file_obj.varget(f_time_var_name)[-1], # last
                      fn_global_dict)

        else:
            # If I can construct it, print what it should be
            if mga in fn_global_dict:
                printANDwrite(f"From dict, it should be: '{fn_global_dict[mga]}'", opf)

#            continue

            
    # OBS_TYPE and OBS_ID should be “none” if not connected to an OBS_ID            
    # 'SEUI_060A_LS4_111_mACv_111'

    printANDwrite('\n', opf)
    
    # MANDATORY VARIABLE ATTRIBUTES ***********************************************

    printANDwrite('\n *** Mandatory Variable Attributes *** \n', opf)
    
    # These attributes must be present for all variables (not just mandatory ones)
    mand_varatts = ['FIELDNAM', 'CATDESC', 'VAR_TYPE', 'VAR_NOTES', 'UNITS']

    for v in zVars: # For each variable
        
        printANDwrite(f"Variable: {v}", opf)

        # Two different sets of metadata
        va = cdf_file_obj.varattsget(variable = v)
        printANDwrite(str(va), opf)

        VDRInfo = cdf_file_obj.varinq(v)
        printANDwrite(str(VDRInfo)+'\n', opf)

        #vinq_dict = VDR2dict(v)
        #print(v, va, VDRInfo)

        # Ignore anything that's a label or representation - CHECK IT'S VAR_TYPE = METADATA
        keys = ['label', 'representation']
        if any(key in va['CATDESC'] for key in keys):
            if va['VAR_TYPE'] != 'metadata':
                printANDwrite(f"VAR_TYPE for v should be 'metadata' not {va['VAR_TYPE']}", opf)
            continue

        # Just for ['FIELDNAM', 'CATDESC', 'VAR_TYPE', 'VAR_NOTES', 'UNITS']
        for mva in mand_varatts:
            #print(f'Mandatory Variable Attribute: {mva}')

            if mva[:4] == 'UNIT':
                unit_check(va)
            else:
                if mva in va:
                    #print(f' va[mva]: {va[mva]}')
                    if is_empty_or_whitespace(va[mva]):
                        printANDwrite(f"{mva} is whitespace", opf)
                    continue
                else:
                    printANDwrite(f"{mva} not found in variable attributes for {v}", opf)
                    continue        

        if v == f_time_var:
            if va['VAR_TYPE'] != 'support_data': 
                printANDwrite(f"{f_time_var} is {va['VAR_TYPE']} not support_data", opf)
            if 'SI_CONVERSION' in va:
                ns_si = ['1.0E-9>s', '1E-9>s', '1.0e-9>s', '1e-9>s',
                        '1.0E-09>s', '1E-09>s', '1.0e-09>s', '1e-09>s']
                if va['SI_CONVERSION'] not in ns_si: 
                    printANDwrite(f"Epoch SI_CONVERSION is {va['SI_CONVERSION']} not 1.0E-9>s", 
                              opf)
            if VDRInfo.Num != 0:
                printANDwrite(f'{f_time_var} is variable {VDRInfo.Num} in the CDF. '
                              'For (user-friendly) usability we '
                              'would rather this was the first variable, i.e., '
                              'that Num=0, but will not insist. ', opf)

        if (VDRInfo.Rec_Vary == 'True') & (v != f_time_var):
            if 'DEPEND_0' not in va:
                printANDwrite('DEPEND_0 is missing since Rec_Vary is True', opf)
            else:
                #print('DEPEND_0 is ok')
                pass

            # "Only mandatory for time varying data and support_data" but I guess 
            # this MUST be time-varying so I won't put in the check for support_data
            if (('VALIDMIN' in va) | ('VALIDMAX' in va) | ('SCALEMIN' in va) | 
                ('SCALEMAX' in va) | ('FILLVAL' in va)):
                pass
            else:
                printANDwrite('MINs and/or MAXs and/or FILLVAL missing', opf)

        print(f'VDRInfo.Dim_Sizes {VDRInfo.Dim_Sizes}')
        if (len(VDRInfo.Dim_Sizes) > 0) & (len(VDRInfo.Dim_Sizes) < 2):
            if (VDRInfo.Dim_Sizes[0] == 3) | (VDRInfo.Dim_Sizes[0] == 6):
                printANDwrite('Is this a vector or tensor?', opf)
                if 'COORDINATE_SYSTEM' not in va: printANDwrite('no COORDINATE_SYSTEM', opf)
                if 'FRAME_ORIGIN' not in va: printANDwrite('no FRAME_ORIGIN', opf)
                if 'REPRESENTATION_1' not in va: printANDwrite('no REPRESENTATION_1', opf) # REPRESENTATION_i
                if 'TENSOR_ORDER' not in va: printANDwrite('no TENSOR_ORDER', opf)


        if (va['VAR_TYPE'] == 'data'):
            if ('DISPLAY_TYPE' in va): 
                #print('display type ok')
                pass
            else:
                printANDwrite('VAR_TYPE is data, but DISPLAY_TYPE missing', opf)

        if (va['VAR_TYPE'] == 'data') | (va['VAR_TYPE'] == 'support_data'):
            if ('SCALETYP' in va) | ('SCAL_PTR' in va):
                pass
            else:
                printANDwrite('VAR_TYPE is data or support_data, but SCALE keywords missing', opf)

        if ('FORMAT' in va) | ('FORM_PTR' in va):
            pass
        else:
            printANDwrite('FORMAT keywords missing', opf)

        if ('LABLAXIS' in va) | ('LABL_PTR_1' in va):
            pass
        else:
            printANDwrite('LABEL keywords missing', opf)

        printANDwrite('\n', opf)


# In[150]:


len([])


# In[ ]: