/* read_data.c */ /* This software may only be used by you under license from AT&T Corp. ("AT&T"). A copy of AT&T's Source Code Agreement is available at AT&T's Internet website having the URL: If you received this software without first entering into a license with AT&T, you have an infringing copy of this software and cannot use it without violating AT&T's intellectual property rights. */ #include #include #include #include #include #include #include "vars.h" #include "externs.h" #define INITSTRSIZE 512 /* Make certain this matches GlyphTypes. */ const gchar * const GlyphNames[] = { /* "+", "x", "or", "fr", "oc", "fc", ".", ""*/ "plus", "x", "or", "fr", "oc", "fc", ".", "" }; /*------------------------------------------------------------------------*/ /* row labels */ /*------------------------------------------------------------------------*/ void rowlabels_free (datad *d, ggobid *gg) { g_array_free (d->rowlab, true); /* unsure about the 2nd arg */ } void rowlabels_alloc (datad *d, ggobid *gg) { if (d->rowlab != NULL) rowlabels_free (d, gg); d->rowlab = g_array_new (false, false, sizeof (gchar *)); } gboolean rowlabels_read (InputDescription *desc, gboolean init, datad *d, ggobid *gg) { gint i; static const gchar *const suffixes[] = { "row", "rowlab", "case" }; gchar initstr[INITSTRSIZE]; gchar *lbl; gint ncase; gboolean found = true; FILE *fp; gint whichSuffix; gchar *fileName; if (init) rowlabels_alloc (d, gg); fileName = findAssociatedFile (desc, suffixes, sizeof(suffixes)/sizeof(suffixes[0]), &whichSuffix, false); if (fileName == NULL) found = false; if( ( fp = fopen(fileName, "r") ) == NULL ) { g_free(fileName); found = false; } /* * Read in case labels or initiate them to generic if no label * file exists */ if (found) { gint k, len; ncase = 0; k = 0; /* k is the file row */ while (fgets (initstr, INITSTRSIZE-1, fp) != NULL) { len = MIN ((int) strlen (initstr), ROWLABLEN-1) ; /* trim trailing blanks, and eliminate the carriage return */ while (initstr[len-1] == ' ' || initstr[len-1] == '\n') len-- ; initstr[len] = '\0'; lbl = g_strdup (initstr); g_array_append_val (d->rowlab, lbl); if (ncase++ >= d->nrows) break; k++; /* read the next row ... */ } /* * If there aren't enough labels, use blank labels for * the remainder. */ if (init && ncase != d->nrows) { g_printerr ("number of labels = %d, number of rows = %d\n", ncase, d->nrows); for (i=ncase; inrows; i++) { lbl = g_strdup (" "); g_array_append_val (d->rowlab, lbl); } } } else { if (init) { /* apply defaults if initializing; else, do nothing */ for (i=0; inrows; i++) { lbl = g_strdup_printf ("%d", i+1); g_array_append_val (d->rowlab, lbl); } } } if(found) { addInputSuffix(desc, suffixes[whichSuffix]); } g_free(fileName); return (found); } /*------------------------------------------------------------------------*/ /* column labels */ /*------------------------------------------------------------------------*/ static void collabels_process_word (gchar *word, gint field, gint nvar, datad *d) { gfloat var; /*-- remove leading and trailing whitespace --*/ g_strstrip (word); switch (field) { case 0: d->vartable[nvar].lim_specified_p = false; d->vartable[nvar].collab = g_strdup (word) ; break; case 1: var = atof (word); /*-- don't set lim_specified_p to true unless both are present --*/ d->vartable[nvar].lim_specified.min = d->vartable[nvar].lim_specified_tform.min = var; break; case 2: var = atof (word); d->vartable[nvar].lim_specified_p = true; d->vartable[nvar].lim_specified.max = d->vartable[nvar].lim_specified_tform.max = var; break; default: /*-- bail out: too many fields --*/ g_printerr ("Too many fields in row %d of collab file\n", nvar+1); exit (1); } } /* * Change: we'll no longer support blanks in column names, * because we want the option of adding meaningful 2nd and 3rd * fields, which will contain min and max range values. */ gboolean collabels_read (InputDescription *desc, gboolean init, datad *d, ggobid *gg) { static const gchar * const suffixes[] = { "col", "column", "collab", "var" }; gint j, nvar = 0; gboolean found = true; FILE *fp; gchar *fileName; int whichSuffix; gchar str[INITSTRSIZE]; fileName = findAssociatedFile(desc, suffixes, sizeof(suffixes)/sizeof(suffixes[0]), &whichSuffix, false); if(fileName == NULL) found = false; if( found && ( fp = fopen(fileName, "r") ) == NULL ) { g_free (fileName); found = false; } /* * Read in variable labels or initiate them to generic if no label * file exists */ if (found) { gint ch, len = 0, field = 0; gboolean fieldsep = false; nvar = 0; while ((ch = fgetc (fp)) != EOF) { /*-- blank or tab --*/ /*if (ch == ' ' || ch == ' ') {*/ if (ch == '|') { fieldsep = true; } else if (ch == '\n') { /*-- process preceding string --*/ str[len] = '\0'; collabels_process_word (str, field, nvar, d); field = len = 0; nvar++; if (nvar >= d->ncols) break; fieldsep = false; } else { /*-- process the next character --*/ /*-- if following a field separator, process string --*/ if (fieldsep && len > 0) { /*-- process string --*/ str[len] = '\0'; collabels_process_word (str, field, nvar, d); field++; len = 0; } if (field == 0 && len == COLLABLEN-1) { ; /*-- make sure the column label isn't too long */ } else { /*-- append character to str --*/ str[len] = ch; len++; if (len > INITSTRSIZE) break; fieldsep = false; } } } if (init && nvar != d->ncols) { g_printerr ("number of labels = %d, number of cols = %d\n", nvar, d->ncols); if (d->single_column) { /*-- will this be triggered? --*/ g_free (d->vartable[1].collab); d->vartable[1].collab = g_strdup_printf ("%s", d->vartable[0].collab); g_free (d->vartable[0].collab); d->vartable[0].collab = g_strdup ("Index"); } else { for (j=nvar; jncols; j++) d->vartable[j].collab = g_strdup_printf ("Var %d", j+1); } } } else { if (init) { for (j=0; jncols; j++) { d->vartable[j].lim_specified_p = false; d->vartable[j].collab = g_strdup_printf ("Var %d", j+1); } } } for (j=0; jncols; j++) { d->vartable[j].collab_tform = g_strdup (d->vartable[j].collab); } if(found) { addInputSuffix(desc, suffixes[whichSuffix]); } g_free(fileName); return (found); } /*------------------------------------------------------------------------*/ /* row groups */ /*------------------------------------------------------------------------*/ void rgroups_free (datad *d, ggobid *gg) { gint i, j; for (i=0; inrgroups; i++) for (j=0; jrgroups[i].nels; j++) g_free ((gpointer) d->rgroups[i].els); g_free ((gpointer) d->rgroups); g_free ((gpointer) d->rgroup_ids); } gboolean rgroups_read (gchar *ldata_in, gboolean init, datad *d, ggobid *gg) /* * Read in the grouping numbers for joint scaling of variables */ { gchar *suffixes[] = {"rgroups"}; gint itmp, i, k; gboolean found = false; gboolean found_rg; FILE *fp; gint *nels; gint nr; if (d->nrgroups > 0) rgroups_free (d, gg); if (ldata_in != NULL && ldata_in != "" && strcmp (ldata_in, "stdin") != 0) if ((fp = open_ggobi_file_r (ldata_in, 1, suffixes, true)) != NULL) found = true; if (!found) { d->nrgroups = 0; } else { /* * If this isn't the first time we've read files, then * see if the rgroups structures should be freed. */ if (!init) if (d->nrgroups > 0) rgroups_free (d, gg); /* rgroup_ids starts by containing the values in the file */ d->rgroup_ids = (gint *) g_malloc (d->nrows * sizeof (gint)); nels = (gint *) g_malloc (d->nrows * sizeof (gint)); i = 0; while ((fscanf (fp, "%d", &itmp) != EOF) && (i < d->nrows)) d->rgroup_ids[i++] = itmp; /* check the number of group ids read -- should be nrows */ if (init && i < d->nrows) { g_printerr ( "Number of rows and number of row group types do not match.\n"); g_printerr ("Creating extra generic groups.\n"); for (k=i; knrows; k++) d->rgroup_ids[k] = k; } /* * Initialize the global variables: nrows row groups, * nrows/10 elements in each group */ d->rgroups = (rgroupd *) g_malloc (d->nrows * sizeof (rgroupd)); for (i=0; inrows; i++) { nels[i] = d->nrows/10; d->rgroups[i].els = (gint *) g_malloc (nels[i] * sizeof (gint)); d->rgroups[i].nels = 0; d->rgroups[i].included = true; } d->nrgroups = 0; /* * On this sweep, find out how many groups there are and how * many elements are in each group */ nr = d->nrows; for (i=0; inrgroups; k++) { /* if we've found this id before ... */ if (d->rgroup_ids[i] == d->rgroups[k].id) { /* Reallocate els[k] if necessary */ if (d->rgroups[k].nels == nels[k]) { nels[k] *= 2; d->rgroups[k].els = (gint *) g_realloc ((gpointer) d->rgroups[k].els, (nels[k] * sizeof (gint))); } /* Add the element, increment the element counter */ d->rgroups[k].els[ d->rgroups[k].nels ] = i; d->rgroups[k].nels++; /* * Now the value in rgroup_ids has to change so that * it can point to the correct member in the array of * rgroups structures */ d->rgroup_ids[i] = k; found_rg = true; break; } } /* If it's a new group id, add it */ if (!found_rg) { d->rgroups[d->nrgroups].id = d->rgroup_ids[i]; /* from file */ d->rgroups[d->nrgroups].nels = 1; d->rgroups[d->nrgroups].els[0] = i; d->rgroup_ids[i] = d->nrgroups; /* rgroup_ids reset to index */ d->nrgroups++; } } d->nrgroups_in_plot = d->nrgroups; /* Reallocate everything now that we know how many there are */ d->rgroups = (rgroupd *) g_realloc ((gpointer) d->rgroups, (gulong) (d->nrgroups * sizeof (rgroupd))); /* Now reallocate the arrays within each rgroups structure */ for (k=0; knrgroups; k++) { d->rgroups[k].els = (gint *) g_realloc ((gpointer) d->rgroups[k].els, d->rgroups[k].nels * sizeof (gint)); } g_free ((gpointer) nels); } if (d->nrgroups != 0) g_printerr ("d.nrgroups=%d\n", d->nrgroups); return (found); } void readGlyphErr (void) { g_printerr ("The .glyphs file must contain either one number per line,\n"); g_printerr ("with the number between 1 and %d; using defaults,\n", NGLYPHS); g_printerr ("or a string and a number, with the string being one of\n"); g_printerr ("plus, x, or, fr, oc, fc, . and the number between 1 and %d.\n", /* g_printerr ("+, x, or, fr, oc, fc, . and the number between 1 and %d.\n",*/ NGLYPHSIZES); } /*------------------------------------------------------------------------*/ /* point glyphs and colors */ /*------------------------------------------------------------------------*/ gboolean point_glyphs_read (InputDescription *desc, gboolean reinit, datad *d, ggobid *gg) { gboolean ok = true; static const gchar * const suffixes[] = {"glyphs"}; gint i, k; gboolean found = true; FILE *fp; gint gid; glyphv glyph; gboolean use_defaults = false; gchar *fileName; gint whichSuffix; if (reinit) br_glyph_ids_alloc (d); fileName = findAssociatedFile (desc, suffixes, sizeof(suffixes)/sizeof(suffixes[0]), &whichSuffix, false); if (fileName == NULL) found = false; if (found && ( fp = fopen(fileName, "r") ) == NULL ) { found = false; } if (!found && reinit) br_glyph_ids_init (d, gg); else { enum { typeAndSize, glyphNumber } glyph_format; gint c, retval, gsize; gchar *gtype = (gchar *) g_malloc (16 * sizeof (gchar)); /* * For the first row, find out if we're going to be reading * %s %d (typeAndSize) or %d (glyphNumber) */ c = getc (fp); glyph_format = isdigit (c) ? glyphNumber : typeAndSize; ungetc (c, fp); i = 0; k = 0; while (i < d->nrows) { /* should there be a test on k as well? */ if (glyph_format == glyphNumber) { retval = fscanf (fp, "%d", &gid); } else { fscanf (fp, "%s", gtype); /*-- we're writing out size=1 for point glyphs, so this 'if' isn't right --*/ /* gsize = 1; if (strcmp (gtype, ".") != 0) */ retval = fscanf (fp, "%d", &gsize); } if (retval <= 0) { /* not using show_message () here; reading before ggobi startup */ g_printerr ("!Error in reading glyphs file; using defaults.\n"); use_defaults = true; break; } /* * If the input is a single number on a line */ if (glyph_format == glyphNumber) { if (gid < 1 || gid > NGLYPHS) { use_defaults = true; break; } find_glyph_type_and_size (gid, &glyph); /* * Else if the input is a string and a number */ } else { glyph.type = mapGlyphName (gtype); if (glyph.type == UNKNOWN_GLYPH) { readGlyphErr (); use_defaults = true; break; } glyph.size = gsize; if (gsize < 1 || gsize > 5) { use_defaults = true; readGlyphErr (); } } if (use_defaults) { break; } d->glyph_ids[i].type = d->glyph_now[i].type = d->glyph_prev[i].type = glyph.type; d->glyph_ids[i].size = d->glyph_now[i].size = d->glyph_prev[i].size = glyph.size; i++; /* increment the array index */ k++; /* increment the file's row counter */ } g_free (gtype); fclose (fp); } if (!found || use_defaults) br_glyph_ids_init (d, gg); if(found) { addInputSuffix(desc, suffixes[whichSuffix]); } g_free(fileName); return (ok); } GlyphType mapGlyphName (const gchar *gtype) { GlyphType type; int i; type = UNKNOWN_GLYPH; for (i = 0; i < sizeof (GlyphNames)/sizeof (GlyphNames[0]) - 1; i++) { if (strcmp(gtype, GlyphNames[i]) == 0) { type = (GlyphType) (i+1); break; } } return(type); } gboolean point_colors_read (InputDescription *desc, gboolean reinit, datad *d, ggobid *gg) { gboolean ok = false; gboolean found = true; const gchar * const suffixes[] = {"colors"}; gint i, k, retval; FILE *fp; gint id; gchar *fileName; int whichSuffix; if (reinit) br_color_ids_alloc (d, gg); fileName = findAssociatedFile(desc, suffixes, sizeof(suffixes)/sizeof(suffixes[0]), &whichSuffix, false); if(fileName) { found = true; } else found = false; if(found && ( fp = fopen(fileName, "r") ) == NULL ) { g_free(fileName); return(false); } if (!found && reinit == true) ; /* no need to init the ids */ else { ok = true; i = 0; k = 0; while (i < d->nrows) { /* should there be a test on k as well? */ retval = fscanf (fp, "%d", &id); if (retval <= 0 || id < 0 || id >= NCOLORS) { ok = false; g_printerr ("!!Error in reading colors file; using defaults.\n"); break; } d->color_ids.els[i] = d->color_now.els[i] = d->color_prev.els[i] = id; i++; /* increment the array index */ k++; /* increment the file's row counter */ } fclose (fp); } if (!ok) br_color_ids_init (d, gg); if(found) { addInputSuffix(desc, suffixes[whichSuffix]); } g_free(fileName); return (ok); } /*------------------------------------------------------------------------*/ /* lines and line colors */ /*------------------------------------------------------------------------*/ gboolean line_colors_read (InputDescription *desc, gboolean reinit, datad *d, ggobid *gg) { gint i, id, retval; gboolean ok = true; FILE *fp; const gchar * const suffixes[] = {"linecolors"}; gchar *fileName; int whichSuffix; if (reinit) { br_line_vectors_check_size (d->nedges, d, gg); } if (!gg->mono_p) { /* * Check if line colors file exists. */ fileName = findAssociatedFile (desc, suffixes, sizeof(suffixes)/sizeof(suffixes[0]), &whichSuffix, false); if(fileName == NULL) ok = false; if (ok && ( fp = fopen(fileName, "r") ) == NULL ) { ok = false; } if (!ok && reinit == true) ; /* no need to init the ids */ else { /* * read integers between 0 and 9, indices of the colors */ i = 0; while (i < d->nedges) { retval = fscanf (fp, "%d", &id); if (retval <= 0 || id < 0 || id >= NCOLORS) { ok = false; g_printerr ("!!Error in reading line colors; using defaults.\n"); break; } d->line.color.els[i] = d->line.color_now.els[i] = d->line.color_prev.els[i] = id; i++; } fclose(fp); if(ok) { addInputSuffix(desc, suffixes[whichSuffix]); } } if(fileName) g_free(fileName); } if (!ok) br_line_color_init (d, gg); return (ok); } gboolean edges_read (InputDescription *desc, gboolean startup, datad *d, ggobid *gg) /* startup - Initializing ggobi? */ { gint fs, nblocks, bsize = 500; gboolean ok = true; gint jlinks = 0; FILE *fp; /*-- if there's no edges file ... --*/ if (desc->fileName == NULL || desc->fileName[0] == '\0' || strcmp (desc->fileName, "stdin") == 0) { return (true); } else { gchar *fileName; int whichSuffix; static const gchar * const suffixes[] = {"lines"}; fileName = findAssociatedFile (desc, suffixes, sizeof(suffixes)/sizeof(suffixes[0]), &whichSuffix, false); if (fileName == NULL) { ok = false; return (false); } if ( ( fp = fopen(fileName, "r") ) == NULL ) { g_free (fileName); return (false); } if ((fp = fopen (fileName, "r")) != NULL) { gint a, b; d->nedges = 0; /* * Allocate space for connecting lines. */ edges_alloc (bsize, d, gg); nblocks = 1; while (1) { fs = fscanf (fp, "%d %d", &a, &b); if (fs == EOF) break; else if (fs < 0) { g_printerr ("Error in reading .lines file\n"); return (false); break; } if (a < 1 || b > d->nrows) { g_printerr ("Entry in .lines file > number of rows or < 1\n"); return (false); } else { /* * Sort lines data such that a <= b */ if (a <= b) { d->edge_endpoints[d->nedges].a = a; d->edge_endpoints[d->nedges].b = b; } else { d->edge_endpoints[d->nedges].a = b; d->edge_endpoints[d->nedges].b = a; } (d->nedges)++; jlinks++; if (jlinks == bsize) { /* * Allocate space for more connecting links. */ nblocks++; edges_alloc (nblocks*bsize, d, gg); jlinks = 0; } } } /* end while */ /* * Close the data file */ if (fclose (fp) == EOF) g_printerr ("Error in closing .lines file"); addInputSuffix(desc, suffixes[whichSuffix]); } g_free(fileName); } return (ok); } /*------------------------------------------------------------------------*/ /* erasing */ /*------------------------------------------------------------------------*/ gboolean hidden_read (InputDescription *desc, gboolean reinit, datad *d, ggobid *gg) /* * Read in the hidden vector */ { static const gchar *const suffixes[] = {"hide"}; gint itmp, i; gboolean found = true; FILE *fp; gchar *fileName; int whichSuffix; if (reinit) hidden_alloc (d); fileName = findAssociatedFile(desc, suffixes, sizeof(suffixes)/sizeof(suffixes[0]), &whichSuffix, false); if(fileName == NULL) found = false; if( ( fp = fopen(fileName, "r") ) == NULL ) { found = false; } if (found) { i = 0; while ((fscanf (fp, "%d", &itmp) != EOF) && (i < d->nrows)) { d->hidden.els[i] = d->hidden_now.els[i] = d->hidden_prev.els[i] = (gboolean) itmp; i++; } if (i < d->nrows) { g_printerr ("Problem in reading hide file; not enough rows\n"); } else addInputSuffix(desc, suffixes[whichSuffix]); } else { if (reinit) hidden_init (d, gg); } if(fileName) g_free(fileName); return (found); } /*------------------------------------------------------------------------*/ /* missing values */ /*------------------------------------------------------------------------*/ gboolean missing_values_read (InputDescription *desc, gboolean init, datad *d, ggobid *gg) { static const gchar *const suffixes[] = {"missing"}; gint i, j, ok, itmp, row, col; gint nmissing = 0; FILE *fp; gint whichSuffix; gchar *fileName; fileName = findAssociatedFile(desc, suffixes, sizeof(suffixes)/sizeof(suffixes[0]), &whichSuffix, false); if(fileName == NULL) return(false); if( ( fp = fopen(fileName, "r") ) == NULL ) { g_free(fileName); return(false); } if (init || d->nmissing == 0) arrays_alloc (&d->missing, d->nrows, d->ncols); for (j=0; jncols; j++) d->vartable[j].nmissing = 0; j = 0; i = 0; while ((ok = fscanf (fp, "%d", &itmp)) != EOF) { row = i; col = j; j++; if (j==d->ncols) { j=0; i++; } if (i==d->nrows && j>0) ok = false; if (!ok) { g_print ("Problem reading %s", fileName); g_print (" at row %d, column %d.\n", i, j); g_print ("Make sure dimensions of %s and %s match\n", desc->fileName, fileName); fclose (fp); g_free(fileName); return(false); } d->missing.vals[row][col] = itmp; if (itmp != 0) { nmissing++; d->vartable[col].nmissing++; } } if (d->nmissing != 0 && d->nmissing != nmissing) { g_print ("I found %d missing values in your data file\n", d->nmissing); g_print (" but %d missing values in your .missing file.", nmissing); g_print ("I'll use the .missing results.\n"); } d->nmissing = nmissing; fclose (fp); addInputSuffix(desc, suffixes[whichSuffix]); g_free(fileName); return(true); }