GRASS GIS 8 Programmer's Manual  8.4.0dev(2024)-8cbe8fef7c
class.c
Go to the documentation of this file.
1 /* functions to classify sorted arrays of doubles and fill a vector of
2  * classbreaks */
3 
4 #include <grass/glocale.h>
5 #include <grass/arraystats.h>
6 
7 int AS_option_to_algorithm(const struct Option *option)
8 {
9  if (G_strcasecmp(option->answer, "int") == 0)
10  return CLASS_INTERVAL;
11  if (G_strcasecmp(option->answer, "std") == 0)
12  return CLASS_STDEV;
13  if (G_strcasecmp(option->answer, "qua") == 0)
14  return CLASS_QUANT;
15  if (G_strcasecmp(option->answer, "equ") == 0)
16  return CLASS_EQUIPROB;
17  if (G_strcasecmp(option->answer, "dis") == 0)
18  return CLASS_DISCONT;
19 
20  G_fatal_error(_("Unknown algorithm '%s'"), option->answer);
21 }
22 
23 double AS_class_apply_algorithm(int algo, double *data, int nrec, int *nbreaks,
24  double *classbreaks)
25 {
26  double finfo = 0.0;
27 
28  switch (algo) {
29  case CLASS_INTERVAL:
30  finfo = AS_class_interval(data, nrec, *nbreaks, classbreaks);
31  break;
32  case CLASS_STDEV:
33  finfo = AS_class_stdev(data, nrec, *nbreaks, classbreaks);
34  break;
35  case CLASS_QUANT:
36  finfo = AS_class_quant(data, nrec, *nbreaks, classbreaks);
37  break;
38  case CLASS_EQUIPROB:
39  finfo = AS_class_equiprob(data, nrec, nbreaks, classbreaks);
40  break;
41  case CLASS_DISCONT:
42  /* finfo = class_discont(data, nrec, *nbreaks, classbreaks);
43  * disabled because of bugs */
45  _("Discont algorithm currently not available because of bugs"));
46  break;
47  default:
48  break;
49  }
50 
51  if (finfo == 0)
52  G_fatal_error(_("Classification algorithm failed"));
53 
54  return finfo;
55 }
56 
57 int AS_class_interval(double *data, int count, int nbreaks, double *classbreaks)
58 {
59  double min, max;
60  double step;
61  int i = 0;
62 
63  min = data[0];
64  max = data[count - 1];
65 
66  step = (max - min) / (nbreaks + 1);
67 
68  for (i = 0; i < nbreaks; i++)
69  classbreaks[i] = min + (step * (i + 1));
70 
71  return (1);
72 }
73 
74 double AS_class_stdev(double *data, int count, int nbreaks, double *classbreaks)
75 {
76  struct GASTATS stats;
77  int i;
78  int nbclass;
79  double scale = 1.0;
80 
81  AS_basic_stats(data, count, &stats);
82 
83  nbclass = nbreaks + 1;
84 
85  if (nbclass % 2 ==
86  1) { /* number of classes is uneven so we center middle class on mean */
87 
88  /* find appropriate fraction of stdev for step */
89  i = 1;
90  while (i) {
91  if (((stats.mean + stats.stdev * scale / 2) +
92  (stats.stdev * scale * (nbclass / 2 - 1)) >
93  stats.max) ||
94  ((stats.mean - stats.stdev * scale / 2) -
95  (stats.stdev * scale * (nbclass / 2 - 1)) <
96  stats.min))
97  scale = scale / 2;
98  else
99  i = 0;
100  }
101 
102  /* classbreaks below the mean */
103  for (i = 0; i < nbreaks / 2; i++)
104  classbreaks[i] = (stats.mean - stats.stdev * scale / 2) -
105  stats.stdev * scale * (nbreaks / 2 - (i + 1));
106  /* classbreaks above the mean */
107  for (; i < nbreaks; i++)
108  classbreaks[i] = (stats.mean + stats.stdev * scale / 2) +
109  stats.stdev * scale * (i - nbreaks / 2);
110  }
111  else { /* number of classes is even so mean is a classbreak */
112 
113  /* decide whether to use 1*stdev or 0.5*stdev as step */
114  i = 1;
115  while (i) {
116  if (((stats.mean) + (stats.stdev * scale * (nbclass / 2 - 1)) >
117  stats.max) ||
118  ((stats.mean) - (stats.stdev * scale * (nbclass / 2 - 1)) <
119  stats.min))
120  scale = scale / 2;
121  else
122  i = 0;
123  }
124 
125  /* classbreaks below the mean and on the mean */
126  for (i = 0; i <= nbreaks / 2; i++)
127  classbreaks[i] =
128  stats.mean - stats.stdev * scale * (nbreaks / 2 - i);
129  /* classbreaks above the mean */
130  for (; i < nbreaks; i++)
131  classbreaks[i] =
132  stats.mean + stats.stdev * scale * (i - nbreaks / 2);
133  }
134 
135  return (scale);
136 }
137 
138 int AS_class_quant(double *data, int count, int nbreaks, double *classbreaks)
139 {
140  int i, step;
141 
142  step = count / (nbreaks + 1);
143 
144  for (i = 0; i < nbreaks; i++)
145  classbreaks[i] = data[step * (i + 1)];
146 
147  return (1);
148 }
149 
150 int AS_class_equiprob(double *data, int count, int *nbreaks,
151  double *classbreaks)
152 {
153  int i, j;
154  double *lequi; /*Vector of scale factors for probabilities of the normal
155  distribution */
156  struct GASTATS stats;
157  int nbclass;
158 
159  nbclass = *nbreaks + 1;
160 
161  lequi = G_malloc(*nbreaks * sizeof(double));
162 
163  /* The following values come from the normal distribution and will be used
164  * as: classbreak[i] = (lequi[i] * stdev) + mean;
165  */
166 
167  if (nbclass < 3) {
168  lequi[0] = 0;
169  }
170  else if (nbclass == 3) {
171  lequi[0] = -0.43076;
172  lequi[1] = 0.43076;
173  }
174  else if (nbclass == 4) {
175  lequi[0] = -0.6745;
176  lequi[1] = 0;
177  lequi[2] = 0.6745;
178  }
179  else if (nbclass == 5) {
180  lequi[0] = -0.8416;
181  lequi[1] = -0.2533;
182  lequi[2] = 0.2533;
183  lequi[3] = 0.8416;
184  }
185  else if (nbclass == 6) {
186  lequi[0] = -0.9676;
187  lequi[1] = -0.43076;
188  lequi[2] = 0;
189  lequi[3] = 0.43076;
190  lequi[4] = 0.9676;
191  }
192  else if (nbclass == 7) {
193  lequi[0] = -1.068;
194  lequi[1] = -0.566;
195  lequi[2] = -0.18;
196  lequi[3] = 0.18;
197  lequi[4] = 0.566;
198  lequi[5] = 1.068;
199  }
200  else if (nbclass == 8) {
201  lequi[0] = -1.1507;
202  lequi[1] = -0.6745;
203  lequi[2] = -0.3187;
204  lequi[3] = 0;
205  lequi[4] = 0.3187;
206  lequi[5] = 0.6745;
207  lequi[6] = 1.1507;
208  }
209  else if (nbclass == 9) {
210  lequi[0] = -1.2208;
211  lequi[1] = -0.7648;
212  lequi[2] = -0.4385;
213  lequi[3] = -0.1397;
214  lequi[4] = 0.1397;
215  lequi[5] = 0.4385;
216  lequi[6] = 0.7648;
217  lequi[7] = 1.2208;
218  }
219  else if (nbclass == 10) {
220  lequi[0] = -1.28155;
221  lequi[1] = -0.84162;
222  lequi[2] = -0.5244;
223  lequi[3] = -0.25335;
224  lequi[4] = 0;
225  lequi[5] = 0.25335;
226  lequi[6] = 0.5244;
227  lequi[7] = 0.84162;
228  lequi[8] = 1.28155;
229  }
230  else {
232  _("Equiprobable classbreaks currently limited to 10 classes"));
233  }
234 
235  AS_basic_stats(data, count, &stats);
236 
237  /* Check if any of the classbreaks would fall outside of the range min-max
238  */
239  j = 0;
240  for (i = 0; i < *nbreaks; i++) {
241  if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
242  (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
243  j++;
244  }
245  }
246 
247  if (j < (*nbreaks)) {
248  G_warning(
249  _("There are classbreaks outside the range min-max. Number of "
250  "classes reduced to %i, but using probabilities for %i classes."),
251  j + 1, *nbreaks + 1);
252  G_realloc(classbreaks, j * sizeof(double));
253  for (i = 0; i < j; i++)
254  classbreaks[i] = 0;
255  }
256 
257  j = 0;
258  for (i = 0; i < *nbreaks; i++) {
259  if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
260  (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
261  classbreaks[j] = lequi[i] * stats.stdev + stats.mean;
262  j++;
263  }
264  }
265 
266  *nbreaks = j;
267 
268  G_free(lequi);
269  return (1);
270 }
271 
272 /* FIXME: there seems to a problem with array overflow, probably due to
273  the fact that the code was ported from fortran which has 1-based arrays */
274 double AS_class_discont(double *data, int count, int nbreaks,
275  double *classbreaks)
276 {
277  int *num, nbclass;
278  double *no, *zz, /* *nz, */ *xn, *co;
279  double *x; /* Vector standardized observations */
280  int i, j, k;
281  double min = 0, max = 0, rangemax = 0;
282  int n = 0;
283  double rangemin = 0, xlim = 0;
284  double dmax = 0.0 /*, d2 = 0.0, dd = 0.0, p = 0.0 */;
285  int nf = 0, nmax = 0;
286  double *abc;
287  int nd = 0;
288  double den = 0, d = 0;
289  int im = 0, ji = 0;
290  int tmp = 0;
291  int nff = 0, jj = 0, no1 = 0, no2 = 0;
292  double f = 0, xt1 = 0, xt2 = 0, chi2 = 1000.0, xnj_1 = 0, xj_1 = 0;
293 
294  /*get the number of values */
295  n = count;
296 
297  nbclass = nbreaks + 1;
298 
299  num = G_malloc((nbclass + 1) * sizeof(int));
300  no = G_malloc((nbclass + 1) * sizeof(double));
301  zz = G_malloc((nbclass + 1) * sizeof(double));
302  /* nz = G_malloc(3 * sizeof(double)); */
303  xn = G_malloc((n + 1) * sizeof(double));
304  co = G_malloc((nbclass + 1) * sizeof(double));
305 
306  /* We copy the array of values to x, in order to be able to standardize it
307  */
308  x = G_malloc((n + 1) * sizeof(double));
309  x[0] = n;
310  xn[0] = 0;
311 
312  min = data[0];
313  max = data[count - 1];
314  for (i = 1; i <= n; i++)
315  x[i] = data[i - 1];
316 
317  rangemax = max - min;
318  rangemin = rangemax;
319 
320  for (i = 2; i <= n; i++) {
321  if (x[i] != x[i - 1] && x[i] - x[i - 1] < rangemin)
322  rangemin = x[i] - x[i - 1]; /* rangemin = minimal distance */
323  }
324 
325  /* STANDARDIZATION
326  * and creation of the number vector (xn) */
327 
328  for (i = 1; i <= n; i++) {
329  x[i] = (x[i] - min) / rangemax;
330  xn[i] = i / (double)n;
331  }
332  xlim = rangemin / rangemax;
333  rangemin = rangemin / 2.0;
334 
335  /* Searching for the limits */
336  num[1] = n;
337  abc = G_malloc(3 * sizeof(double));
338 
339  /* Loop through possible solutions */
340  for (i = 1; i <= nbclass; i++) {
341  nmax = 0;
342  dmax = 0.0;
343  /* d2 = 0.0; */
344  nf = 0; /*End number */
345 
346  /* Loop through classes */
347  for (j = 1; j <= i; j++) {
348  nd = nf; /*Start number */
349  nf = num[j];
350  co[j] = 10e37;
351  AS_eqdrt(x, xn, nd, nf, abc);
352  den = sqrt(pow(abc[1], 2) + 1.0);
353  nd++;
354  /* Loop through observations */
355  for (k = nd; k <= nf; k++) {
356  if (abc[2] == 0.0)
357  d = fabs((-1.0 * abc[1] * x[k]) + xn[k] - abc[0]) / den;
358  else
359  d = fabs(x[k] - abc[2]);
360  /* d2 += pow(d, 2); */
361  if (x[k] - x[nd] < xlim)
362  continue;
363  if (x[nf] - x[k] < xlim)
364  continue;
365  if (d <= dmax)
366  continue;
367  dmax = d;
368  nmax = k;
369  }
370  nd--; /* A VERIFIER! */
371  if (x[nf] != x[nd]) {
372  if (nd != 0)
373  co[j] = (xn[nf] - xn[nd]) / (x[nf] - x[nd]);
374  else
375  co[j] = (xn[nf]) / (x[nf]); /* A VERIFIER! */
376  }
377  }
378  /* if (i == 1)
379  dd = d2;
380  p = d2 / dd; */
381  for (j = 1; j <= i; j++) {
382  no[j] = num[j];
383  zz[j] = x[num[j]] * rangemax + min;
384  if (j == i)
385  continue;
386  if (co[j] > co[j + 1]) {
387  zz[j] = zz[j] + rangemin;
388  continue;
389  }
390  zz[j] = zz[j] - rangemin;
391  no[j] = no[j] - 1;
392  }
393  im = i - 1;
394  if (im != 0.0) {
395  for (j = 1; j <= im; j++) {
396  ji = i + 1 - j;
397  no[ji] -= no[ji - 1];
398  }
399  }
400  if (nmax == 0) {
401  break;
402  }
403  nff = i + 2;
404  tmp = 0;
405  for (j = 1; j <= i; j++) {
406  jj = nff - j;
407  if (num[jj - 1] < nmax) {
408  num[jj] = nmax;
409  tmp = 1;
410  break;
411  }
412  num[jj] = num[jj - 1];
413  }
414  if (tmp == 0) {
415  num[1] = nmax;
416  jj = 1;
417  }
418  if (jj == 1) {
419  xnj_1 = 0;
420  xj_1 = 0;
421  }
422  else {
423  xnj_1 = xn[num[jj - 1]];
424  xj_1 = x[num[jj - 1]];
425  }
426  no1 = (xn[num[jj]] - xnj_1) * n;
427  no2 = (xn[num[jj + 1]] - xn[num[jj]]) * n;
428  f = (xn[num[jj + 1]] - xnj_1) / (x[num[jj + 1]] - xj_1);
429  f *= n;
430  xt1 = (x[num[jj]] - xj_1) * f;
431  xt2 = (x[num[jj + 1]] - x[num[jj]]) * f;
432  if (xt2 == 0) {
433  xt2 = rangemin / 2.0 / rangemax * f;
434  xt1 -= xt2;
435  }
436  else if (xt1 * xt2 == 0) {
437  xt1 = rangemin / 2.0 / rangemax * f;
438  xt2 -= xt1;
439  }
440 
441  /* calculate chi-square to indicate statistical significance of new
442  * class, i.e. how probable would it be that the new class could be the
443  * result of purely random choice */
444  if (chi2 > pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2))
445  chi2 = pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2);
446  }
447 
448  /* Fill up classbreaks of i <=nbclass classes */
449  for (j = 0; j <= (i - 1); j++)
450  classbreaks[j] = zz[j + 1];
451 
452  return (chi2);
453 }
454 
455 int AS_class_frequencies(double *data, int count, int nbreaks,
456  double *classbreaks, int *frequencies)
457 {
458  int i, j;
459 
460  /* min = data[0];
461  max = data[count - 1]; */
462  /* count cases in all classes, except for last class */
463  i = 0;
464  for (j = 0; j < nbreaks; j++) {
465  while (data[i] <= classbreaks[j]) {
466  frequencies[j]++;
467  i++;
468  }
469  }
470 
471  /*Now count cases in last class */
472  for (; i < count; i++) {
473  frequencies[nbreaks]++;
474  }
475 
476  return (1);
477 }
#define CLASS_EQUIPROB
Definition: arraystats.h:27
#define CLASS_QUANT
Definition: arraystats.h:26
#define CLASS_INTERVAL
Definition: arraystats.h:24
#define CLASS_DISCONT
Definition: arraystats.h:28
#define CLASS_STDEV
Definition: arraystats.h:25
double AS_class_discont(double *data, int count, int nbreaks, double *classbreaks)
Definition: class.c:274
int AS_class_equiprob(double *data, int count, int *nbreaks, double *classbreaks)
Definition: class.c:150
int AS_class_quant(double *data, int count, int nbreaks, double *classbreaks)
Definition: class.c:138
double AS_class_apply_algorithm(int algo, double *data, int nrec, int *nbreaks, double *classbreaks)
Definition: class.c:23
int AS_class_frequencies(double *data, int count, int nbreaks, double *classbreaks, int *frequencies)
Definition: class.c:455
int AS_class_interval(double *data, int count, int nbreaks, double *classbreaks)
Definition: class.c:57
int AS_option_to_algorithm(const struct Option *option)
Definition: class.c:7
double AS_class_stdev(double *data, int count, int nbreaks, double *classbreaks)
Definition: class.c:74
void AS_eqdrt(double[], double[], int, int, double *)
Definition: basic.c:37
void AS_basic_stats(double *, int, struct GASTATS *)
Definition: basic.c:5
void G_free(void *)
Free allocated memory.
Definition: gis/alloc.c:150
#define G_realloc(p, n)
Definition: defs/gis.h:96
void void void void G_fatal_error(const char *,...) __attribute__((format(printf
void G_warning(const char *,...) __attribute__((format(printf
#define G_malloc(n)
Definition: defs/gis.h:94
int int G_strcasecmp(const char *, const char *)
String compare ignoring case (upper or lower)
Definition: strings.c:47
#define min(x, y)
Definition: draw2.c:29
#define max(x, y)
Definition: draw2.c:30
#define _(str)
Definition: glocale.h:10
int count
double mean
Definition: arraystats.h:18
double max
Definition: arraystats.h:14
double stdev
Definition: arraystats.h:21
double min
Definition: arraystats.h:13
Structure that stores option information.
Definition: gis.h:554
char * answer
Definition: gis.h:568
#define x