#!/usr/bin/env python # coding: utf-8 # In[172]: import cdflib from astroquery.utils.tap.core import TapPlus # In[182]: # Not used if platform = 'Datalabs' def download(url, params, file_name): ''' Given the URL, a dictionary of parameters (optional) and an output filename, download the request. Params may = 0/None in the call if all info is in the URL. ''' # open in binary mode with open(file_name, "wb") as file: # get request response = get(url, params=params) # write to file file.write(response.content) def is_empty_or_whitespace(input_string): if isinstance(input_string, str): return input_string.strip() == '' else: return False def TAP_PlusReq(ARCHIVE, ADQL): '''Takes an ADQL query string and launches a TapPlus request to the SOAR''' try: print(ADQL) TAP = TapPlus(url=ARCHIVE) results = TAP.launch_job(ADQL) astropy_table = results.get_results() #pandas_df = astropy_table.to_pandas() return astropy_table except Exception as e: print("Error occurred:", str(e)) return None def XNOR(a, b): if a != b: return 0 else: return 1 def printANDwrite(output, filename): print(output) with open(filename, "a") as myfile: myfile.write(output+'\n') def FindTheFile(ARCHIVE, platform, onefile): cdf_fn = onefile['filename'][0] cdf_fn_parts = cdf_fn.split('_') if any(char.isupper() for char in cdf_fn_parts[2]): print('upper_case_descriptor!') if platform == 'Datalabs': yyyy = cdf_fn[cdf_fn.find('_202')+1: cdf_fn.find('_202')+5] instru = onefile['instrument'][0].lower() filepath = f"data/user/SolO/{instru}/{onefile['level'][0]}/{yyyy}/{cdf_fn}" else: # Download the file (local dir) ReqURL = ARCHIVE[:-4] + (f"/data?product_type=SCIENCE&" f"RETRIEVAL_TYPE=PRODUCT&data_item_id={onefile['data_item_id'][0]}") #printANDwrite(f'With request: {ReqURL} \n', opf) print(f"HTTP Response Code {download(ReqURL, 'None', onefile['filename'][0])}") filepath = cdf_fn return filepath def IdealMandGlobAtts(filename): ''' Take the CDF filename and break it down to provide the correct global variables, where possible. ''' #print(f'filename from inside IdealMandGlobAttrs {filename}') # From filename: # solo_Lx_--_datetime_version_free-field.cdf instr_dict = { 'EPD': 'Energetic Particle Detector', 'MAG': 'Magnetometer', 'SWA': 'Solar Wind Analyser', 'RPW': 'Radio and Plasma Waves' } sensor_dict = { 'PAS': 'Proton Alpha Sensor', #SWA 'HIS': 'Heavy Ion Sensor', #SWA 'EAS1': 'Electron Analyser System 1', #SWA 'EAS2': 'Electron Analyser System 2', #SWA 'IBS': 'Inboard Sensor', #MAG 'OBS': 'Outboard Sensor', #MAG 'STEP': 'SupraThermal Electrons and Protons', #EPD 'EPT': 'Electron Proton Telescope', #EPD 'SIS': 'Suprathermal Ion Spectrograph', #EPD 'HET': 'High Energy Telescope', #EPD 'HFR': 'High Frequency Receiver', #RPW 'LFR': 'Low Frequency Receiver', #RPW 'TDS': 'Time Domain Sampler', #RPW 'TNR': 'Thermal Noise Receiver' #RPW } # Take the suffix off fn_no_suffix = filename.split('.')[0] # Split into sections fn_parts = fn_no_suffix.split('_') # Split descriptor up into instrument, sensor and data product descr_parts = fn_parts[2].split('-') # Time will be the fourth section time = fn_parts[3] # If the end time is there too if '-' in time: t_parts = time.split('-') st_time = t_parts[0] end_time = t_parts[1] else: st_time = time end_time = '' # descriptor is the whole third section descr = fn_parts[2] # instrument is first three of descriptor i = descr[:3].upper() instr = instr_dict[i] # sensor is second part of descriptor s = descr_parts[1].upper() if s.upper() in sensor_dict: sensor = sensor_dict[s] else: sensor='' # level l = fn_parts[1][1:] # version number v = fn_parts[4][1:3] # data product dp = '-'.join(descr_parts[2:]) #print(dp) fn_global_dict = {'Project': 'SOLO>Solar Orbiter', 'Source_name': 'SOLO>Solar Orbiter', 'Discipline': 'Space Physics>Interplanetary Studies', 'Data_type': f'L{l}>Level {l} Data', 'Descriptor': f'{descr.upper()}>{instr}, {sensor}, etc', 'Instrument': f'{i}-{s}>{instr} {sensor}', 'Data_version': v, #'Instrument_type', 'Mission_group': 'Solar Orbiter', 'Logical_source': '_'.join(fn_parts[:3]), 'Logical_file_id': '_'.join(fn_parts[:5]), 'Logical_source_description': f'Solar Orbiter, Level {l}, {instr}, {sensor}, etc ', 'Data_product': f'{dp}>[description of dataset]', 'SOOP_NAME': "none", 'SOOP_TYPE': "none", 'OBS_ID': "if not applicable, then none", 'LEVEL': f'L{l}>Level {l} Data', 'filestarttime': st_time, 'fileendtime': end_time } return fn_global_dict def time_info(attr, f_t, data_start_t, data_end_t, fn_global_dict): ''' attr is TIME_MIN or TIME_MAX f_t is the value of attr in the file - which could be 'no time_min or time_max' TT2000 ISO data_start_t is element 0 of the epoch variable data_end_t is element -1 of the epoch variable fn_global_dict is the dict of the recommended values, where the datetime(s) from the filename have been stored If it's not there, give start and end data in ISO If it's in ISO, give the start and end data in ISO, and print what's there If it's in TT2000, convert that into ISO and give start and end data in ISO ''' # No matter what, I want the start and end time, from the data, in ISO data_start_iso = cdflib.epochs.CDFepoch.encode_tt2000(data_start_t, iso_8601=True) data_end_iso = cdflib.epochs.CDFepoch.encode_tt2000(data_end_t, iso_8601=True) # From attribute: # If TIME_MIN/MAX not provided - ALERT in main program if f_t == 'no time_min or time_max': pass # If it's given as ISO elif ':' in str(f_t): printANDwrite(f'{attr} from metadata is given as {f_t}', opf) # if it is provided (as TT2000) else: md_t = cdflib.epochs.CDFepoch.encode_tt2000(float(f_t), iso_8601=True) # needs float? printANDwrite(f'{attr} from metadata equates to {md_t}', opf) # From filename # Times from filename (stored in global attributes dictionary) if attr == 'TIME_MIN': printANDwrite(f"Start time from filename: " f"{fn_global_dict['filestarttime']}", opf) time_text = 'Start' cdf_t = data_start_iso else: printANDwrite(f"End time from filename: " f"{fn_global_dict['fileendtime']}", opf) time_text = 'End' cdf_t = data_end_iso # From EPOCH printANDwrite(f"{time_text} time from epoch variable: " f"{cdf_t}", opf) def unit_check(va): # Handle if it's there but there are no units if ('UNITS' in va) | ('UNIT_PTR_1' in va): if 'UNITS' in va: unit = va['UNITS'] printANDwrite(f'UNITS = {unit}', opf) elif 'UNIT_PTR_1' in va: unit = va['UNIT_PTR_1'] printANDwrite(f'UNIT_PTR_1 = {unit}', opf) no_units = ['None', 'NONE', ' '] if unit in no_units: printANDwrite('If no units, then "unitless"', opf) units_dict = {'ElectronVolts': 'eV', 'Volts': 'V', 'nanoseconds': 'ns', 'microsecond': 'us', 'milliseconds': 'ms', 'nsec': 'ns', 'seconds': 's', 'Count': 'counts', 'Counts': 'counts', 'count': 'counts'} if unit in units_dict.keys(): printANDwrite(f"Units are currently given as {unit} but " f"'{units_dict[unit]}' would be preferred.", opf) if (unit.upper() == 'DEGREES') | (unit.upper() == 'DEG'): printANDwrite(f'Remember that the SI unit of angle is the radian.', opf) else: printANDwrite('UNIT attributes (UNITS or UNIT_PTR) must be present. ' 'If no units, then "unitless"', opf) if 'SI_CONVERSION' in va: # all good pass else: printANDwrite(f'SI_CONVERSION should be present even if there ' 'are no units (1>unitless), ' 'e.g., 1.0E-9>s for Epoch where UNITS=ns, ' 'and if already in SI: e.g., "1.0>s")', opf) # # Functions Above # In[185]: # FILE INFORMATION ************************************************************ # Fetch list of descriptors ARCHIVE = ('https://soar.esac.esa.int/soar-sl-tap/tap') level = 'L2' descrs = TAP_PlusReq(ARCHIVE, f"SELECT DISTINCT descriptor FROM v_sc_data_item " f"WHERE file_format='CDF' " f"AND level='{level}'" f"AND instrument='SWA'") # For each descriptor for d in descrs['descriptor']: # Reset the file or new file based on descriptor # opf = output file opf = f'solo_{level}_{d}.txt' print(opf) with open(opf, 'w') as myfile: myfile.write('*** File Information *** \n\n') printANDwrite(f'For descriptor: {d} \n', opf) # Get one filename (not ordered so semi-random) onefile = TAP_PlusReq(ARCHIVE, f"SELECT TOP 1 filename, level, data_item_id, " f"instrument FROM v_sc_data_item WHERE descriptor='{d}' " f"AND level='{level}'") #onefile is an astropy table so awkward to write to file print(onefile) # Location of the file #platform = 'Mac' platform = 'Datalabs' filename = onefile['filename'][0] filepath = FindTheFile(ARCHIVE, platform, onefile) # See above printANDwrite(f'Filename: {filepath} \n', opf) # Open it cdf_file_obj = cdflib.CDF(filepath) # Read in all the metadata: info_dict = cdf_file_obj.cdf_info() # All the variables: zVars = info_dict.zVariables printANDwrite(f'Variables: {str(zVars)} \n', opf) # MANDATORY VARIABLES ********************************************************* printANDwrite('*** Mandatory Variables *** \n', opf) # These variables must be present m_variables = ['QUALITY_FLAG', 'QUALITY_BITMASK'] if level == 'L1': m_variables = m_variables + ['SCET'] for mv in m_variables: printANDwrite(f'{mv}', opf) if mv in zVars: printANDwrite(f'OK', opf) continue else: printANDwrite(f"Mandatory variable {mv} is not present! \n", opf) # Epoch or EPOCH must also be there if 'EPOCH' in zVars: f_time_var_name = 'EPOCH' if 'Epoch' in zVars: f_time_var_name = 'Epoch' # if ('Epoch' in zVars) | ('EPOCH' in zVars): if f_time_var_name: pass else: printANDwrite('Where is Epoch/EPOCH?', opf) # MANDATORY GLOBAL ATTRIBUTES ************************************************* printANDwrite('\n *** Mandatory Global Attributes *** ', opf) mand_global = ['Project', 'Source_name', 'Discipline', 'Data_type', 'Descriptor', 'Instrument', 'Data_version', 'Software_version', 'PI_name', 'PI_affiliation', 'TEXT', 'Instrument_type', 'Mission_group', 'Logical_source', 'Logical_file_id', 'Logical_source_description', 'Rules_of_use', 'Generated_by', 'Generation_date', 'Acknowledgement', 'MODS', 'Parents', 'TARGET_NAME', 'TARGET_CLASS', 'TARGET_REGION', 'TIME_MIN', 'TIME_MAX', 'Data_product', 'SOOP_NAME', 'SOOP_TYPE', 'OBS_ID', 'LEVEL'] # What is there - file global attributes f_glob_attrs = cdf_file_obj.globalattsget() # What should be there given the filename - filename global print(filename) fn_global_dict = IdealMandGlobAtts(filename) # For each one that should be there for mga in mand_global: printANDwrite('\n', opf) # For formatting report # If the mandatory global attribute is in the file: if mga in f_glob_attrs: # print and write the attribute and value in file for info printANDwrite(f'{mga} is {f_glob_attrs[mga]}', opf) # Alert if the contents are whitespace if is_empty_or_whitespace(f_glob_attrs[mga]): printANDwrite("which is whitespace", opf) # if TIME_MIN or TIME_MAX is there (see above): f_t = f_glob_attrs[mga][0] # continue # If the mandatory global attribute is NOT in the file: else: printANDwrite(f"ALERT: Keyword '{mga}' NOT FOUND", opf) f_t = 'no time_min or time_max' if mga[:4] == 'TIME': # Send the attribute name, attribute value from file, # first and last values of EPOCH/Epoch (f_time_var_name) # and the global attributes from filename dictionary to time function time_info(mga, f_t, cdf_file_obj.varget(f_time_var_name)[0], # TT2000, first cdf_file_obj.varget(f_time_var_name)[-1], # last fn_global_dict) else: # If I can construct it, print what it should be if mga in fn_global_dict: printANDwrite(f"From dict, it should be: '{fn_global_dict[mga]}'", opf) # continue # OBS_TYPE and OBS_ID should be “none” if not connected to an OBS_ID # 'SEUI_060A_LS4_111_mACv_111' printANDwrite('\n', opf) # MANDATORY VARIABLE ATTRIBUTES *********************************************** printANDwrite('\n *** Mandatory Variable Attributes *** \n', opf) # These attributes must be present for all variables (not just mandatory ones) mand_varatts = ['FIELDNAM', 'CATDESC', 'VAR_TYPE', 'VAR_NOTES', 'UNITS'] for v in zVars: # For each variable printANDwrite(f"Variable: {v}", opf) # Two different sets of metadata va = cdf_file_obj.varattsget(variable = v) printANDwrite(str(va), opf) VDRInfo = cdf_file_obj.varinq(v) printANDwrite(str(VDRInfo)+'\n', opf) #vinq_dict = VDR2dict(v) #print(v, va, VDRInfo) # Ignore anything that's a label or representation - CHECK IT'S VAR_TYPE = METADATA keys = ['label', 'representation'] if any(key in va['CATDESC'] for key in keys): if va['VAR_TYPE'] != 'metadata': printANDwrite(f"VAR_TYPE for v should be 'metadata' not {va['VAR_TYPE']}", opf) continue # Just for ['FIELDNAM', 'CATDESC', 'VAR_TYPE', 'VAR_NOTES', 'UNITS'] for mva in mand_varatts: #print(f'Mandatory Variable Attribute: {mva}') if mva[:4] == 'UNIT': unit_check(va) else: if mva in va: #print(f' va[mva]: {va[mva]}') if is_empty_or_whitespace(va[mva]): printANDwrite(f"{mva} is whitespace", opf) continue else: printANDwrite(f"{mva} not found in variable attributes for {v}", opf) continue if v == f_time_var: if va['VAR_TYPE'] != 'support_data': printANDwrite(f"{f_time_var} is {va['VAR_TYPE']} not support_data", opf) if 'SI_CONVERSION' in va: ns_si = ['1.0E-9>s', '1E-9>s', '1.0e-9>s', '1e-9>s', '1.0E-09>s', '1E-09>s', '1.0e-09>s', '1e-09>s'] if va['SI_CONVERSION'] not in ns_si: printANDwrite(f"Epoch SI_CONVERSION is {va['SI_CONVERSION']} not 1.0E-9>s", opf) if VDRInfo.Num != 0: printANDwrite(f'{f_time_var} is variable {VDRInfo.Num} in the CDF. ' 'For (user-friendly) usability we ' 'would rather this was the first variable, i.e., ' 'that Num=0, but will not insist. ', opf) if (VDRInfo.Rec_Vary == 'True') & (v != f_time_var): if 'DEPEND_0' not in va: printANDwrite('DEPEND_0 is missing since Rec_Vary is True', opf) else: #print('DEPEND_0 is ok') pass # "Only mandatory for time varying data and support_data" but I guess # this MUST be time-varying so I won't put in the check for support_data if (('VALIDMIN' in va) | ('VALIDMAX' in va) | ('SCALEMIN' in va) | ('SCALEMAX' in va) | ('FILLVAL' in va)): pass else: printANDwrite('MINs and/or MAXs and/or FILLVAL missing', opf) print(f'VDRInfo.Dim_Sizes {VDRInfo.Dim_Sizes}') if (len(VDRInfo.Dim_Sizes) > 0) & (len(VDRInfo.Dim_Sizes) < 2): if (VDRInfo.Dim_Sizes[0] == 3) | (VDRInfo.Dim_Sizes[0] == 6): printANDwrite('Is this a vector or tensor?', opf) if 'COORDINATE_SYSTEM' not in va: printANDwrite('no COORDINATE_SYSTEM', opf) if 'FRAME_ORIGIN' not in va: printANDwrite('no FRAME_ORIGIN', opf) if 'REPRESENTATION_1' not in va: printANDwrite('no REPRESENTATION_1', opf) # REPRESENTATION_i if 'TENSOR_ORDER' not in va: printANDwrite('no TENSOR_ORDER', opf) if (va['VAR_TYPE'] == 'data'): if ('DISPLAY_TYPE' in va): #print('display type ok') pass else: printANDwrite('VAR_TYPE is data, but DISPLAY_TYPE missing', opf) if (va['VAR_TYPE'] == 'data') | (va['VAR_TYPE'] == 'support_data'): if ('SCALETYP' in va) | ('SCAL_PTR' in va): pass else: printANDwrite('VAR_TYPE is data or support_data, but SCALE keywords missing', opf) if ('FORMAT' in va) | ('FORM_PTR' in va): pass else: printANDwrite('FORMAT keywords missing', opf) if ('LABLAXIS' in va) | ('LABL_PTR_1' in va): pass else: printANDwrite('LABEL keywords missing', opf) printANDwrite('\n', opf) # In[150]: len([]) # In[ ]: