; $Id: read_ascii.pro,v 1.38 2001/08/27 16:17:56 scottm Exp $ ; ; Copyright (c) 1996-2001, Research Systems, Inc. All rights reserved. ; Unauthorized reproduction prohibited. ;+ ; NAME: ; READ_ASCII ; ; PURPOSE: ; Read data from an ASCII file into IDL. ; ; CATEGORY: ; Input/Output. ; ; CALLING SEQUENCE: ; data = READ_ASCII(file) ; ; INPUTS: ; file - Name of file to read. ; ; INPUT KEYWORD PARAMETERS: ; record_start - 1st sequential "record" (see DESCRIPTION) to read. ; Default = 0 (the first record of the file). ; num_records - Number of records to read. ; Default = 0 = Read up to and including the last record. ; ; template - ASCII file template (e.g., generated by function ; ASCII_TEMPLATE) describing attributes of the file ; to read. Specific attributes contained in the ; template may be overridden by keywords below. ; Default = (see the keywords below). ; ; data_start - Number of lines of header to skip. ; Default (if no template) = 0L. ; delimiter - Character that delimits fields. ; Default (if no template) = '' = use fields(*).loc. ; missing_value - Value to replace any missing/invalid data. ; Default (if no template) = !VALUES.F_NAN. ; comment_symbol - String identifying comments ; (from comment_symbol to the next end-of-line). ; Default (if no template) = '' = no comments. ; ; [Note: The 'fields' keyword has not been implemented yet.] ; fields - Descriptions of the data fields, formatted as ; an array of structures containing the tags: ; name = name of the field (string) ; type = type of field as returned by SIZE (long) ; loc = offset from the beginning of line to ; the start of the field (long) ; group = sequential group the field is in (int) ; Default (if no template) = ; {name:'field', type:4L, loc:0L, group:0}. ; ; verbose - If set, print runtime messages. ; Default = Do not print them. ; ; OUTPUT KEYWORD PARAMETERS: ; header - The header read (string array of length ; data_start). If no header, empty string returned. ; ; count - The number of records read. ; ; OUTPUTS: ; The function returns an anonymous structure, where each field in ; the structure is a "field" of the data read (see DESCRIPTION). ; If no records are read, 0 is returned. ; ; COMMON BLOCKS: ; None. ; ; SIDE EFFECTS: ; None. ; ; RESTRICTIONS: ; See DESCRIPTION. ; ; DESCRIPTION: ; ASCII files handled by this routine consist of an optional header ; of a fixed number of lines, followed by columnar data. Files may ; also contain comments, which exist between a user-specified comment ; string and the corresponding end-of-line. ; ; One or more rows of data constitute a "record." Each data element ; within a record is considered to be in a different column, or "field." ; Adjacent fields may be "grouped" into multi-column fields. ; The data in one field must be of, or promotable to, a single ; type (e.g., FLOAT). ; ; EXAMPLES: ; ; Using default file attributes. ; data = READ_ASCII(file) ; ; ; Setting specific file attributes. ; data = READ_ASCII(file, DATA_START=10) ; ; ; Using a template to define file attributes. ; data = READ_ASCII(file, TEMPLATE=template) ; ; ; Using a template to define file attributes, ; ; and overriding some of those attributes. ; data = READ_ASCII(file, TEMPLATE=template, DATA_START=10) ; ; ; Using the ASCII_TEMPLATE GUI to generate a template in place. ; data = READ_ASCII(file, TEMPLATE=ASCII_TEMPLATE(file)) ; ; [Note: The 'fields' keyword has not been implemented yet.] ; ; An example defining fields by hand. ; fields = REPLICATE({name:'', type:0L, loc:0L, group:0}, 2, 3) ; num = N_ELEMENTS(fields) ; fields(*).name = 'field' + STRTRIM(STRING(INDGEN(num) + 1), 2) ; fields(*).type = REPLICATE(4L, num) ; fields(*).loc = [0L,10L, 0L,15L, 0L,12L] ; fields(*).group = INDGEN(num) ; data = READ_ASCII(file, FIELDS=fields) ; ; [Note: The 'fields' keyword has not been implemented yet.] ; ; Another example defining fields by hand. ; void = {sMyStructName, name:'', type:0L, loc:0L, group:0} ; fields = [ [ {sMyStructName, 'frog', (SIZE(''))(1), 0L, 0}, $ ; {sMyStructName, 'bird', (SIZE(0 ))(1), 15L, 1} ], $ ; [ {sMyStructName, 'fish', (SIZE(0.))(1), 0L, 2}, $ ; {sMyStructName, 'bear', (SIZE(0D))(1), 15L, 3} ], $ ; [ {sMyStructName, 'boar', (SIZE(0B))(1), 0L, 4}, $ ; {sMyStructName, 'nerd', (SIZE(OL))(1), 15L, 5} ] ] ; data = READ_ASCII(file, FIELDS=fields) ; ; DEVELOPMENT NOTES: ; ; - See ???,xxx in the code. ; ; - Error check input 'delimiter' to be a string (not a byte). ; ; - Implement the 'fields' keyword. ; ; MODIFICATION HISTORY: ; AL & RPM, 8/96 - Written. ; PCS, 3/99 - Deploy STRTOK and other new commands. Gain some speed. ;- ; ----------------------------------------------------------------------------- ; ; Purpose: Parse out values from a line of text which are in columns. ; pro ra_parse_column_values, line, types, p_vals, rec_count, $ locs, lengths, missingValue, num_fields compile_opt HIDDEN, STRICTARR on_ioerror, column_cast_failed nf1 = num_fields - 1 for i=0, nf1 do begin if (types[i] ne 0) then begin ; (0 == skip field.) token = (i eq nf1) ? STRTRIM(STRMID(line, locs[i]),2) : $ STRTRIM(STRMID(line, locs[i], lengths[i]),2) ; Assign substring to the variable. This will automatically do ; any necessary type conversions. (*p_vals[i])[rec_count] = (STRLEN(token) ne 0) ? token : $ (types[i] eq 7) ? token : missingValue continue column_cast_failed: message, /reset (*p_vals[i])[rec_count] = missingValue endif endfor ; i end ; ----------------------------------------------------------------------------- ; ; Purpose: Parse out values from a line of text which are separated by ; a given delimiter. ; pro ra_parse_delim_values, line, types, p_vals, rec_count, $ delimit, missing_value, whitespace_delimited compile_opt HIDDEN, STRICTARR ; Remove whitespace from beginning and end. toks = whitespace_delimited ? STRTOK(line, /EXTRACT) : $ STRTRIM(STRTOK(line, delimit, /EXTRACT, /PRESERVE_NULL), 2) length = STRLEN(toks) on_ioerror, delim_cast_failed n_types = N_ELEMENTS(types) n_toks = N_ELEMENTS(toks) nMin1 = (n_types < n_toks) - 1 ; Loop up to the end of the tokens or the number of fields, whichever ; is smaller. Empty fields will be filled in after this loop. for i=0,nMin1 do begin if (types[i] ne 0) then begin ; (0 == skip field.) ; Assign the substring to the variable. This will automatically do ; any necessary type conversions. (*p_vals[i])[rec_count] = (length[i] ne 0) ? toks[i] : $ ((types[i] eq 7) ? toks[i] : missing_value) ; If successful conversion, then continue the loop. continue delim_cast_failed: ; If failed conversion, suppress the error and fill with missing. message, /reset (*p_vals[i])[rec_count] = missing_value endif endfor ; Need to fill in extra fields with missing. if (n_toks lt n_types) then begin for i=n_toks, n_types-1 do begin if (types[i] gt 0) then $ (*p_vals[i])[rec_count] = missing_value endfor endif end ; ra_parse_delim_values ; ----------------------------------------------------------------------------- ; ; Purpose: Read in the next n lines of text (skipping blank lines and ; commented lines signified by template.commentSymbol at start; ; also throw away comment portions of regular lines). ; function ra_get_next_record, template, unit, lines ; COMPILE_OPT hidden, strictarr on_ioerror, end_of_file line = '' count = 0 ; Checking for comments... ; if (template.commentSymbol ne '') then begin while (count lt n_elements(lines)) do begin readf, unit, line pos = strpos(line, template.commentSymbol, 0) if (strtrim(line,2) ne '' and pos[0] ne 0) then begin lines[count] = (pos[0] eq -1) ? line : strmid(line,0,pos[0]) count = count + 1 endif endwhile ; NOT checking for comments... ; endif else begin while (count lt n_elements(lines)) do begin readf, unit, line if (strlen(strtrim(line,2)) ne 0) then begin lines[count] = line count = count + 1 endif endwhile endelse return, 0 ; success end_of_file: ; If read failed, suppress message and return EOF. message, /reset return, 1 ; failure, EOF end ; ra_get_next_record ; ----------------------------------------------------------------------------- ; ; Purpose: Given a template structure, open an ASCII file and parse out the ; numerical and string values based upon the parameters of the ; given template. ; ; (a) white space separates fields lined up in columns ; (b) a delimiter character separates fields ; ; Note: When skipping to the start of the data, blank lines ARE included ; as lines to skip, but once you get to the data, subsequent blank ; lines (as well as comment lines) are ignored. ; ; Function returns an array of pointers to the data read; ; if no data read, 0 is returned. ; function ra_read_from_templ, $ name, $ ; IN: name of ASCII file to read template, $ ; IN: ASCII file template start_record, $ ; IN: first record to read records_to_read, $ ; IN: number of records to read doVerbose, $ ; IN: 1B = print runtime messages num_fields_read, $ ; OUT: number of fields successfully read fieldNames, $ ; OUT: associated name of each field read rec_count, $ ; OUT: number of records successfully read num_blocks, $ ; OUT: number of blocks of data header=header ; OUT: (opt) header read COMPILE_OPT hidden, strictarr ; Set default numbers. ; num_fields_read = 0 num_blocks = 0L ; Catch errors. catch, error_status if (error_status ne 0) then begin print,'Unexpected Error: ' + !ERROR_STATE.msg rec_count = 0l return, 0 endif ; Open the file. ; openr, unit, name, /get_lun ; Set various parameters. ; blk_size = 1000 ; each block holds this many records blk_count = 500 ; number of blocks we can have blk_grow = 500 current_block = 0L lines_per_record = n_elements(template.fieldCount) num_fields = template.fieldCount tot_num_fields = total(template.fieldCount) types = template.fieldTypes locs = template.fieldLocations ; The length of the last field depends upon the line length, ; so here just make it some arbitrary number. fieldLengths = (n_elements(locs) gt 1) ? [locs[1:*],0] - locs : 0 ; Define an array of variables for each field. ; p_vals = ptrarr(tot_num_fields, blk_count) for i=0, tot_num_fields-1 do $ if (types[i] gt 0) then $ p_vals[i, current_block] = ptr_new(make_array(blk_size, type=types[i])) ; Read the header and skip to the start of the data. ; dataStart = template.dataStart if (dataStart gt 0) then begin if (doVerbose) then $ print, 'Reading header of ' + strtrim(string(dataStart), 2) + $ ' lines ...', format='(A/)' header = strarr(dataStart) readf, unit, header endif else $ header = '' ; Skip to the start of requested data. ; lines = strarr(lines_per_record) if ((doVerbose) and (start_record gt 0)) then $ print, 'Skipping ' + strtrim(string(start_record), 2) + ' records ...', $ format='(A/)' for i = 0L, start_record-1 do $ end_reached = RA_GET_NEXT_RECORD(template, unit, lines) if template.delimiter eq 32b then begin delim_str = string([32b, 9b]) whitespace_delimited = 1b end else begin delim_str = string(template.delimiter) whitespace_delimited = 0b endelse nRecord1 = (records_to_read gt 0) ? records_to_read-1L : 2147483647L for rec_count = 0L, nRecord1 do begin ; Read a record. ; end_reached = RA_GET_NEXT_RECORD(template, unit, lines) if (end_reached) then break ; out of the for loop ;xxx if (doVerbose) then $ print, 'Processing sequential record ' + $ strtrim(string(rec_count+1), 2) + ' ...' anchor = 0 rc = rec_count-current_block*blk_size ; For each line in the record... ; for i=0,lines_per_record-1 do begin if (template.delimiter eq 0B) then begin ; nice columned data... ra_parse_column_values, lines[i], $ types[ anchor:anchor+num_fields[i]-1], $ p_vals[anchor:anchor+num_fields[i]-1, current_block], $ rc, $ locs[ anchor:anchor+num_fields[i]-1], $ fieldLengths[anchor:anchor+num_fields[i]-1], $ template.missingValue, $ num_fields[i] endif else begin ; data separated by a delimiter... ra_parse_delim_values, lines[i], $ types[ anchor:anchor+num_fields[i]-1], $ p_vals[anchor:anchor+num_fields[i]-1, current_block], $ rc, $ delim_str, $ template.missingValue, $ whitespace_delimited endelse anchor = anchor + num_fields[i] endfor ; i ; If block is now full, ; Allocate and point to a new block ; if ((rec_count+1) mod blk_size eq 0) then begin current_block = current_block + 1 if (current_block eq blk_count) then begin p_vals = [[p_vals], [ptrarr(tot_num_fields, blk_grow)]] blk_count = blk_count + blk_grow endif for i=0, tot_num_fields-1 do if (types[i] gt 0) then $ p_vals[i, current_block] = $ ptr_new(make_array(blk_size, type=types[i])) endif ; new block endfor ; read ; ------------------------------------ free_lun, unit if (doVerbose) then $ print, 'Total records read: ' + strtrim(string(rec_count), 2), $ format='(A/)' ; If records were read ... ; if (rec_count gt 0) then begin ; Set the output arrays to exactly the correct size. ; for i=0, tot_num_fields-1 do begin if (p_vals[i,current_block] ne ptr_new()) then begin if (rec_count gt current_block*blk_size) then begin *p_vals[i,current_block] = $ (*p_vals[i,current_block])[0:rec_count-current_block*blk_size-1] endif else begin ; block is allocated, but empty ptr_free, p_vals[i,current_block] endelse endif endfor if (rec_count gt current_block*blk_size) then begin num_blocks = current_block + 1 endif else begin num_blocks = current_block endelse ; Check the groups array and arrange the output pointers into ; (potentially) groups of 2-D arrays. ; groups = template.fieldGroups ; Don't include any groups which are skipped fields. ; ptr = where(types eq 0, numSkip) for i=0, numSkip-1 do groups[ptr[i]] = max(groups) + 1 ; Concatenate 1-D arrays into multi arrays based upon groupings. ; uptr = uniq(groups, sort(groups)) if (n_elements(uptr) lt n_elements(groups)) then begin for i=0, n_elements(uptr)-1 do begin for b=0, num_blocks-1 do begin lptr = where(groups eq groups[uptr[i]], lcount) if (lcount gt 1) then begin p_new = p_vals[lptr[0],b] for j=1,lcount-1 do begin *p_new = [[temporary(*p_new)],[temporary(*p_vals[lptr[j],b])]] ptr_free, p_vals[lptr[j],b] p_vals[lptr[j],b] = ptr_new() endfor *p_new = transpose(temporary(*p_new)) endif endfor endfor endif ; Return the pointers that contain data, if any. ; and the associated fieldNames for these pointers ; ptr = where(p_vals[*,0] ne ptr_new(), num_fields_read) if (num_fields_read gt 0) then begin ; data successfully read fieldNames = template.fieldNames[ptr] return, p_vals[ptr,*] endif else begin ; no data read rec_count = 0l return, 0 endelse endif else $ ; no data read return, 0 end ; ra_read_from_templ ; ----------------------------------------------------------------------------- ; ; Purpose: Return 1B if the template is valid, else 0B. ; function ra_valid_template, $ template, $ ; IN: template to check message ; OUT: error message if the template is not valid COMPILE_OPT hidden, strictarr message = '' ; Make sure it's a structure. ; sz = size(template) if (sz[sz[0]+1] ne 8) then begin message = 'Template is not a structure.' RETURN, 0B endif ; Get tag names and make sure version field is present. ; tagNamesFound = TAG_NAMES(template) void = WHERE(tagNamesFound eq 'VERSION', count) if (count ne 1) then begin message = 'Version field missing from template.' RETURN, 0B endif ; Do checking based on version. ; case (template.version) of 1.0: begin ; Set the names of the required tags (version alread checked). ; tagNamesRequired = STRUPCASE([ $ 'dataStart', 'delimiter', 'missingValue', 'commentSymbol', $ 'fieldCount', 'fieldTypes', 'fieldNames', 'fieldLocations', $ 'fieldGroups']) ; Check that all of the required tags are present. ; for seqTag = 0, N_ELEMENTS(tagNamesRequired)-1 do begin tag = tagNamesRequired[seqTag] void = WHERE(tagNamesFound eq tag, count) if (count ne 1) then begin message = tag + ' field missing from template.' RETURN, 0B endif endfor end else: begin message = 'The only recognized template version is 1.0 (float).' RETURN, 0B end endcase ; Return that the template is valid. ; RETURN, 1B end ; ra_valid_template ; ----------------------------------------------------------------------------- ; ; Purpose: Convert to string and remove extra white space. ; function ra_stringit, value COMPILE_OPT hidden, strictarr result = STRTRIM( STRCOMPRESS( STRING(value) ), 2 ) num = N_ELEMENTS(result) if (num le 1) then RETURN, result ; If two or more values, concatenate them. ; delim = ' ' ret = result[0] for i = 1, num-1 do $ ret = ret + delim + result[i] RETURN, ret end ; ra_stringit ; ----------------------------------------------------------------------------- ; ; Purpose: Guess at the number of columns in an ASCII file. ; function ra_guess_columns, fname, dataStart, commentSymbol, delimiter COMPILE_OPT hidden, strictarr on_error, 2 ; Return to caller on error. catch, err_stat if err_stat ne 0 then begin catch, /cancel if n_elements(lun) gt 0 then begin close, lun free_lun, lun end message, !error_state.msg endif get_lun, lun openr, lun, fname if dataStart gt 0 then begin header = strarr(dataStart) readf, lun, header end line = '' end_reached = RA_GET_NEXT_RECORD({commentSymbol: commentSymbol}, $ lun, line) if end_reached then $ message, 'No columns found.' if delimiter eq ' ' then $ positions = strtok(line) $ else $ positions = strtok(line, delimiter, /preserve_null) close, lun free_lun, lun return, n_elements(positions) end ; ----------------------------------------------------------------------------- ; ; Purpose: Check that the input filename is a string, exists, and appears ; to be ASCII. ; function ra_check_file, fname COMPILE_OPT hidden, strictarr catch, error_status if (error_status ne 0) then begin if (n_elements(unit) gt 0) then free_lun, unit return, -3 ; unexpected error reading from file endif ; info = size(fname) if (info[info[0]+1] ne 7) then return, -1 ; filename isn't a string ; openr, unit, fname, error=error, /get_lun if (error eq 0) then begin finfo = fstat(unit) ; set non-ascii values in lookup table ; lut = bytarr(256) + 1b lut[7:13] = 0b lut[32:127] = 0b data = bytarr(32767 n_elements(fieldNames) $ > n_elements(fieldLocations) $ > n_elements(fieldGroups) if fieldCountUse le 0 then $ fieldCountUse = ra_guess_columns( $ file, $ dataStartUse, $ commentSymbolUse, $ delimiterUse $ ) fieldTypesUse = REPLICATE(4L, fieldCountUse) digits_str = strtrim(string(strlen(strtrim(string(fieldCountUse),2))),2) fstr = '(i' + digits_str + '.' + digits_str + ')' fieldNamesUse = 'field' + STRING(INDGEN(fieldCountUse)+1, format=fstr) fieldLocationsUse = LONARR(fieldCountUse) fieldGroupsUse = INTARR(fieldCountUse) endelse if n_elements(fieldTypes) ne 0 then $ fieldTypesUse = fieldTypes if n_elements(fieldNames) ne 0 then $ fieldNamesUse = fieldNames if n_elements(fieldLocations) ne 0 then $ fieldLocationsUse = fieldLocations if n_elements(fieldGroups) ne 0 then $ fieldGroupsUse = fieldGroups ; Error check the field data. ; lengths = [ $ N_ELEMENTS(fieldTypesUse), $ N_ELEMENTS(fieldNamesUse), $ N_ELEMENTS(fieldLocationsUse), $ N_ELEMENTS(fieldGroupsUse) $ ] if (TOTAL(ABS(lengths - SHIFT(lengths, 1))) ne 0) then $ MESSAGE, 'Field data (types/names/locs/groups) not the same length.' ; Set the template to use. ; templateUse = { $ version: versionUse, $ dataStart: dataStartUse, $ delimiter: BYTE(delimiterUse), $ missingValue: missingValueUse, $ commentSymbol: commentSymbolUse, $ fieldCount: fieldCountUse, $ fieldTypes: fieldTypesUse, $ fieldNames: fieldNamesUse, $ fieldLocations: fieldLocationsUse, $ fieldGroups: fieldGroupsUse $ } ; Print verbose information. ; if (doVerbose) then begin PRINT, 'Using the following file attributes ...', FORMAT='(/A)' PRINT, ' Data Start: ' + STRTRIM(STRING(dataStartUse), 2) PRINT, ' Delimiter: ' + $ STRTRIM(STRING(FIX(BYTE(delimiterUse))), 2) + 'B' PRINT, ' Missing Value: ' + STRTRIM(STRING(missingValueUse), 2) PRINT, ' Comment Symbol: ' + commentSymbolUse PRINT, ' Field Counts: ' + ra_stringit(fieldCountUse) PRINT, ' Field Types : ' + ra_stringit(fieldTypesUse) PRINT, ' Field Names : ' + ra_stringit(fieldNamesUse) PRINT, ' Field Locs : ' + ra_stringit(fieldLocationsUse) PRINT, ' Field Groups: ' + ra_stringit(fieldGroupsUse) PRINT, ' Template Version: ' + STRTRIM(STRING(versionUse), 2) PRINT endif ; Try to read the file. ; pData = ra_read_from_templ(file, templateUse, recordStartUse, $ numRecordsUse, doVerbose, numFieldsRead, FieldNames, count, num_blocks, header=header) ; Return zero if no records read. ; if (count eq 0) then RETURN, 0 ; Concatenate the blocks into fields. ; xData = ptrarr(numFieldsRead) for f=0L, numFieldsRead-1 do begin type = SIZE(*pData[f,0], /TYPE) dims = SIZE(*pData[f,0], /DIMENSIONS) n_dims = SIZE(*pData[f,0], /N_DIMENSIONS) if (count eq 1) then begin ; if the file contains a single record, it is really ; two-dimensional: n fields x 1 record n_dims = 2 dims = lonarr(2) dims[0] = SIZE(*pData[f,0],/N_ELEMENTS) endif dims[n_dims-1] = count xData[f] = ptr_new(make_array(DIMENSION=dims, TYPE=type)) start=0L for b=0L, num_blocks-1 do begin sz = SIZE(*pData[f,b],/N_ELEMENTS) stop = start + sz - 1 (*xData[f])[start:stop] = *pData[f,b] ptr_free, pData[f,b] start = stop + 1 endfor endfor ; Put the fields into a structure. ; For a small number of fields, ; use EXECUTE to build the struct all at once, instead of looping ; one time for each field. This saves a lot of copying when there ; are a lot of records. ; ; If the number of fields is larger, we cannot use EXECUTE since we run ; the risk of running out of code space in the compiler, so then use ; create_struct recursively. ; data = create_struct(strcompress(FieldNames[0],/rem), temporary(*xData[0])) if (numFieldsRead LE 10) then begin if (numFieldsRead GT 1) then begin callString = 'data = create_struct(temporary(data)' for i=1, numFieldsRead-1 do $ callString = callString + $ ', strcompress(FieldNames['+STRING(i)+'],/rem)' + $ ', temporary(*xData['+STRING(i)+'])' callString = callString + ')' r = EXECUTE(callString) endif endif else begin for i=1, numFieldsRead-1 do $ data = create_struct(temporary(data), $ strcompress(FieldNames[i],/rem), temporary(*xData[i])) endelse ; Clean up the heap data. ; for f = 0L, numFieldsRead-1 do $ PTR_FREE, xData[f] ; Print verbose information. ; if (doVerbose) then begin PRINT, 'Output data ...' HELP, data, /STRUCTURES PRINT endif ; Return the structure. ; RETURN, data end ; read_ascii ; -----------------------------------------------------------------------------