GRASS GIS 7 Programmer's Manual  7.7.svn(2018)-r73373
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
class.c
Go to the documentation of this file.
1 /* functions to classify sorted arrays of doubles and fill a vector of classbreaks */
2 
3 #include <grass/glocale.h>
4 #include <grass/arraystats.h>
5 
6 int AS_option_to_algorithm(const struct Option * option)
7 {
8  if (G_strcasecmp(option->answer, "int") == 0)
9  return CLASS_INTERVAL;
10  if (G_strcasecmp(option->answer, "std") == 0)
11  return CLASS_STDEV;
12  if (G_strcasecmp(option->answer, "qua") == 0)
13  return CLASS_QUANT;
14  if (G_strcasecmp(option->answer, "equ") == 0)
15  return CLASS_EQUIPROB;
16  if (G_strcasecmp(option->answer, "dis") == 0)
17  return CLASS_DISCONT;
18 
19  G_fatal_error(_("Unknown algorithm '%s'"), option->answer);
20 }
21 
22 double AS_class_apply_algorithm(int algo, double *data, int nrec, int *nbreaks,
23  double *classbreaks)
24 {
25  double finfo = 0.0;
26 
27  switch (algo) {
28  case CLASS_INTERVAL:
29  finfo = AS_class_interval(data, nrec, *nbreaks, classbreaks);
30  break;
31  case CLASS_STDEV:
32  finfo = AS_class_stdev(data, nrec, *nbreaks, classbreaks);
33  break;
34  case CLASS_QUANT:
35  finfo = AS_class_quant(data, nrec, *nbreaks, classbreaks);
36  break;
37  case CLASS_EQUIPROB:
38  finfo = AS_class_equiprob(data, nrec, nbreaks, classbreaks);
39  break;
40  case CLASS_DISCONT:
41  /* finfo = class_discont(data, nrec, *nbreaks, classbreaks); disabled because of bugs */
42  G_fatal_error(_("Discont algorithm currently not available because of bugs"));
43  break;
44  default:
45  break;
46  }
47 
48  if (finfo == 0)
49  G_fatal_error(_("Classification algorithm failed"));
50 
51  return finfo;
52 }
53 
54 int AS_class_interval(double *data, int count, int nbreaks, double *classbreaks)
55 {
56  double min, max;
57  double step;
58  int i = 0;
59 
60  min = data[0];
61  max = data[count - 1];
62 
63  step = (max - min) / (nbreaks + 1);
64 
65  for (i = 0; i < nbreaks; i++)
66  classbreaks[i] = min + (step * (i + 1));
67 
68  return (1);
69 }
70 
71 double AS_class_stdev(double *data, int count, int nbreaks, double *classbreaks)
72 {
73  struct GASTATS stats;
74  int i;
75  int nbclass;
76  double scale = 1.0;
77 
78  AS_basic_stats(data, count, &stats);
79 
80  nbclass = nbreaks + 1;
81 
82  if (nbclass % 2 == 1) { /* number of classes is uneven so we center middle class on mean */
83 
84  /* find appropriate fraction of stdev for step */
85  i = 1;
86  while (i) {
87  if (((stats.mean + stats.stdev * scale / 2) +
88  (stats.stdev * scale * (nbclass / 2 - 1)) > stats.max) ||
89  ((stats.mean - stats.stdev * scale / 2) -
90  (stats.stdev * scale * (nbclass / 2 - 1)) < stats.min))
91  scale = scale / 2;
92  else
93  i = 0;
94  }
95 
96  /* classbreaks below the mean */
97  for (i = 0; i < nbreaks / 2; i++)
98  classbreaks[i] =
99  (stats.mean - stats.stdev * scale / 2) -
100  stats.stdev * scale * (nbreaks / 2 - (i + 1));
101  /* classbreaks above the mean */
102  for (i = i; i < nbreaks; i++)
103  classbreaks[i] =
104  (stats.mean + stats.stdev * scale / 2) +
105  stats.stdev * scale * (i - nbreaks / 2);
106 
107  }
108  else { /* number of classes is even so mean is a classbreak */
109 
110  /* decide whether to use 1*stdev or 0.5*stdev as step */
111  i = 1;
112  while (i) {
113  if (((stats.mean) + (stats.stdev * scale * (nbclass / 2 - 1)) >
114  stats.max) ||
115  ((stats.mean) - (stats.stdev * scale * (nbclass / 2 - 1)) <
116  stats.min))
117  scale = scale / 2;
118  else
119  i = 0;
120  }
121 
122  /* classbreaks below the mean and on the mean */
123  for (i = 0; i <= nbreaks / 2; i++)
124  classbreaks[i] =
125  stats.mean - stats.stdev * scale * (nbreaks / 2 - i);
126  /* classbreaks above the mean */
127  for (i = i; i < nbreaks; i++)
128  classbreaks[i] =
129  stats.mean + stats.stdev * scale * (i - nbreaks / 2);
130  }
131 
132  return (scale);
133 }
134 
135 int AS_class_quant(double *data, int count, int nbreaks, double *classbreaks)
136 {
137  int i, step;
138 
139  step = count / (nbreaks + 1);
140 
141  for (i = 0; i < nbreaks; i++)
142  classbreaks[i] = data[step * (i + 1)];
143 
144  return (1);
145 }
146 
147 
148 int AS_class_equiprob(double *data, int count, int *nbreaks, double *classbreaks)
149 {
150  int i, j;
151  double *lequi; /*Vector of scale factors for probabilities of the normal distribution */
152  struct GASTATS stats;
153  int nbclass;
154 
155  nbclass = *nbreaks + 1;
156 
157  lequi = G_malloc(*nbreaks * sizeof(double));
158 
159  /* The following values come from the normal distribution and will be used as:
160  * classbreak[i] = (lequi[i] * stdev) + mean;
161  */
162 
163  if (nbclass < 3) {
164  lequi[0] = 0;
165  }
166  else if (nbclass == 3) {
167  lequi[0] = -0.43076;
168  lequi[1] = 0.43076;
169  }
170  else if (nbclass == 4) {
171  lequi[0] = -0.6745;
172  lequi[1] = 0;
173  lequi[2] = 0.6745;
174  }
175  else if (nbclass == 5) {
176  lequi[0] = -0.8416;
177  lequi[1] = -0.2533;
178  lequi[2] = 0.2533;
179  lequi[3] = 0.8416;
180  }
181  else if (nbclass == 6) {
182  lequi[0] = -0.9676;
183  lequi[1] = -0.43076;
184  lequi[2] = 0;
185  lequi[3] = 0.43076;
186  lequi[4] = 0.9676;
187  }
188  else if (nbclass == 7) {
189  lequi[0] = -1.068;
190  lequi[1] = -0.566;
191  lequi[2] = -0.18;
192  lequi[3] = 0.18;
193  lequi[4] = 0.566;
194  lequi[5] = 1.068;
195  }
196  else if (nbclass == 8) {
197  lequi[0] = -1.1507;
198  lequi[1] = -0.6745;
199  lequi[2] = -0.3187;
200  lequi[3] = 0;
201  lequi[4] = 0.3187;
202  lequi[5] = 0.6745;
203  lequi[6] = 1.1507;
204  }
205  else if (nbclass == 9) {
206  lequi[0] = -1.2208;
207  lequi[1] = -0.7648;
208  lequi[2] = -0.4385;
209  lequi[3] = -0.1397;
210  lequi[4] = 0.1397;
211  lequi[5] = 0.4385;
212  lequi[6] = 0.7648;
213  lequi[7] = 1.2208;
214  }
215  else if (nbclass == 10) {
216  lequi[0] = -1.28155;
217  lequi[1] = -0.84162;
218  lequi[2] = -0.5244;
219  lequi[3] = -0.25335;
220  lequi[4] = 0;
221  lequi[5] = 0.25335;
222  lequi[6] = 0.5244;
223  lequi[7] = 0.84162;
224  lequi[8] = 1.28155;
225  }
226  else {
227  G_fatal_error(_("Equiprobable classbreaks currently limited to 10 classes"));
228  }
229 
230  AS_basic_stats(data, count, &stats);
231 
232  /* Check if any of the classbreaks would fall outside of the range min-max */
233  j = 0;
234  for (i = 0; i < *nbreaks; i++) {
235  if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
236  (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
237  j++;
238  }
239  }
240 
241  if (j < (*nbreaks)) {
242  G_warning(_("There are classbreaks outside the range min-max. Number of "
243  "classes reduced to %i, but using probabilities for %i classes."),
244  j + 1, *nbreaks + 1);
245  G_realloc(classbreaks, j * sizeof(double));
246  for (i = 0; i < j; i++)
247  classbreaks[i] = 0;
248  }
249 
250  j = 0;
251  for (i = 0; i < *nbreaks; i++) {
252  if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
253  (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
254  classbreaks[j] = lequi[i] * stats.stdev + stats.mean;
255  j++;
256  }
257  }
258 
259  *nbreaks = j;
260 
261  G_free(lequi);
262  return (1);
263 }
264 
265 /* FIXME: there seems to a problem with array overflow, probably due to
266  the fact that the code was ported from fortran which has 1-based arrays*/
267 double AS_class_discont(double *data, int count, int nbreaks,
268  double *classbreaks)
269 {
270  int *num, nbclass;
271  double *no, *zz, *nz, *xn, *co;
272  double *x; /* Vector standardized observations */
273  int i, j, k;
274  double min = 0, max = 0, rangemax = 0;
275  int n = 0;
276  double rangemin = 0, xlim = 0;
277  double dmax = 0.0, d2 = 0.0, dd = 0.0, p = 0.0;
278  int nf = 0, nmax = 0;
279  double *abc;
280  int nd = 0;
281  double den = 0, d = 0;
282  int im = 0, ji = 0;
283  int tmp = 0;
284  int nff = 0, jj = 0, no1 = 0, no2 = 0;
285  double f = 0, xt1 = 0, xt2 = 0, chi2 = 1000.0, xnj_1 = 0, xj_1 = 0;
286 
287 
288  /*get the number of values */
289  n = count;
290 
291  nbclass = nbreaks + 1;
292 
293  num = G_malloc((nbclass + 1) * sizeof(int));
294  no = G_malloc((nbclass + 1) * sizeof(double));
295  zz = G_malloc((nbclass + 1) * sizeof(double));
296  nz = G_malloc(3 * sizeof(double));
297  xn = G_malloc((n + 1) * sizeof(double));
298  co = G_malloc((nbclass + 1) * sizeof(double));
299 
300  /* We copy the array of values to x, in order to be able to standardize it */
301  x = G_malloc((n + 1) * sizeof(double));
302  x[0] = n;
303  xn[0] = 0;
304 
305  min = data[0];
306  max = data[count - 1];
307  for (i = 1; i <= n; i++)
308  x[i] = data[i - 1];
309 
310  rangemax = max - min;
311  rangemin = rangemax;
312 
313  for (i = 2; i <= n; i++) {
314  if (x[i] != x[i - 1] && x[i] - x[i - 1] < rangemin)
315  rangemin = x[i] - x[i - 1]; /* rangemin = minimal distance */
316  }
317 
318  /* STANDARDIZATION
319  * and creation of the number vector (xn) */
320 
321  for (i = 1; i <= n; i++) {
322  x[i] = (x[i] - min) / rangemax;
323  xn[i] = i / (double)n;
324  }
325  xlim = rangemin / rangemax;
326  rangemin = rangemin / 2.0;
327 
328  /* Searching for the limits */
329  num[1] = n;
330  abc = G_malloc(3 * sizeof(double));
331 
332  /* Loop through possible solutions */
333  for (i = 1; i <= nbclass; i++) {
334  nmax = 0;
335  dmax = 0.0;
336  d2 = 0.0;
337  nf = 0; /*End number */
338 
339  /* Loop through classes */
340  for (j = 1; j <= i; j++) {
341  nd = nf; /*Start number */
342  nf = num[j];
343  co[j] = 10e37;
344  AS_eqdrt(x, xn, nd, nf, abc);
345  den = sqrt(pow(abc[1], 2) + 1.0);
346  nd++;
347  /* Loop through observations */
348  for (k = nd; k <= nf; k++) {
349  if (abc[2] == 0.0)
350  d = fabs((-1.0 * abc[1] * x[k]) + xn[k] - abc[0]) / den;
351  else
352  d = fabs(x[k] - abc[2]);
353  d2 += pow(d, 2);
354  if (x[k] - x[nd] < xlim)
355  continue;
356  if (x[nf] - x[k] < xlim)
357  continue;
358  if (d <= dmax)
359  continue;
360  dmax = d;
361  nmax = k;
362  }
363  nd--; /* A VERIFIER! */
364  if (x[nf] != x[nd]) {
365  if (nd != 0)
366  co[j] = (xn[nf] - xn[nd]) / (x[nf] - x[nd]);
367  else
368  co[j] = (xn[nf]) / (x[nf]); /* A VERIFIER! */
369  }
370  }
371  if (i == 1)
372  dd = d2;
373  p = d2 / dd;
374  for (j = 1; j <= i; j++) {
375  no[j] = num[j];
376  zz[j] = x[num[j]] * rangemax + min;
377  if (j == i)
378  continue;
379  if (co[j] > co[j + 1]) {
380  zz[j] = zz[j] + rangemin;
381  continue;
382  }
383  zz[j] = zz[j] - rangemin;
384  no[j] = no[j] - 1;
385  }
386  im = i - 1;
387  if (im != 0.0) {
388  for (j = 1; j <= im; j++) {
389  ji = i + 1 - j;
390  no[ji] -= no[ji - 1];
391  }
392  }
393  if (nmax == 0) {
394  break;
395  }
396  nff = i + 2;
397  tmp = 0;
398  for (j = 1; j <= i; j++) {
399  jj = nff - j;
400  if (num[jj - 1] < nmax) {
401  num[jj] = nmax;
402  tmp = 1;
403  break;
404  }
405  num[jj] = num[jj - 1];
406  }
407  if (tmp == 0) {
408  num[1] = nmax;
409  jj = 1;
410  }
411  if (jj == 1) {
412  xnj_1 = 0;
413  xj_1 = 0;
414  }
415  else {
416  xnj_1 = xn[num[jj - 1]];
417  xj_1 = x[num[jj - 1]];
418  }
419  no1 = (xn[num[jj]] - xnj_1) * n;
420  no2 = (xn[num[jj + 1]] - xn[num[jj]]) * n;
421  f = (xn[num[jj + 1]] - xnj_1) / (x[num[jj + 1]] - xj_1);
422  f *= n;
423  xt1 = (x[num[jj]] - xj_1) * f;
424  xt2 = (x[num[jj + 1]] - x[num[jj]]) * f;
425  if (xt2 == 0) {
426  xt2 = rangemin / 2.0 / rangemax * f;
427  xt1 -= xt2;
428  }
429  else if (xt1 * xt2 == 0) {
430  xt1 = rangemin / 2.0 / rangemax * f;
431  xt2 -= xt1;
432  }
433 
434  /* calculate chi-square to indicate statistical significance of new class, i.e. how probable would it be that the new class could be the result of purely random choice */
435  if (chi2 > pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2))
436  chi2 = pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2);
437 
438  }
439 
440  /* Fill up classbreaks of i <=nbclass classes */
441  for (j = 0; j <= (i - 1); j++)
442  classbreaks[j] = zz[j + 1];
443 
444  return (chi2);
445 }
446 
447 int AS_class_frequencies(double *data, int count, int nbreaks,
448  double *classbreaks, int *frequencies)
449 {
450  int i, j;
451  double min, max;
452 
453  min = data[0];
454  max = data[count - 1];
455  /* count cases in all classes, except for last class */
456  i = 0;
457  for (j = 0; j < nbreaks; j++) {
458  while (data[i] <= classbreaks[j]) {
459  frequencies[j]++;
460  i++;
461  }
462  }
463 
464  /*Now count cases in last class */
465  for (i = i; i < count; i++) {
466  frequencies[nbreaks]++;
467  }
468 
469  return (1);
470 }
double AS_class_apply_algorithm(int algo, double *data, int nrec, int *nbreaks, double *classbreaks)
Definition: class.c:22
int G_strcasecmp(const char *x, const char *y)
String compare ignoring case (upper or lower)
Definition: strings.c:46
int AS_class_equiprob(double *data, int count, int *nbreaks, double *classbreaks)
Definition: class.c:148
void G_free(void *buf)
Free allocated memory.
Definition: gis/alloc.c:149
#define CLASS_DISCONT
Definition: arraystats.h:29
int AS_class_interval(double *data, int count, int nbreaks, double *classbreaks)
Definition: class.c:54
int AS_option_to_algorithm(const struct Option *option)
Definition: class.c:6
#define min(x, y)
Definition: draw2.c:31
double mean
Definition: arraystats.h:19
double stdev
Definition: arraystats.h:22
int count
#define CLASS_EQUIPROB
Definition: arraystats.h:28
#define x
#define max(x, y)
Definition: draw2.c:32
void G_fatal_error(const char *msg,...)
Print a fatal error message to stderr.
Definition: gis/error.c:160
char * answer
Definition: gis.h:522
double AS_class_discont(double *data, int count, int nbreaks, double *classbreaks)
Definition: class.c:267
double max
Definition: arraystats.h:15
void AS_basic_stats(double *data, int count, struct GASTATS *stats)
Definition: basic.c:6
int AS_class_frequencies(double *data, int count, int nbreaks, double *classbreaks, int *frequencies)
Definition: class.c:447
double AS_class_stdev(double *data, int count, int nbreaks, double *classbreaks)
Definition: class.c:71
#define CLASS_QUANT
Definition: arraystats.h:27
double min
Definition: arraystats.h:14
Structure that stores option information.
Definition: gis.h:509
#define _(str)
Definition: glocale.h:13
long num
Definition: raster3d/cats.c:85
void AS_eqdrt(double vectx[], double vecty[], int i1, int i2, double *vabc)
Definition: basic.c:40
#define CLASS_INTERVAL
Definition: arraystats.h:25
#define CLASS_STDEV
Definition: arraystats.h:26
void G_warning(const char *msg,...)
Print a warning message to stderr.
Definition: gis/error.c:204
int AS_class_quant(double *data, int count, int nbreaks, double *classbreaks)
Definition: class.c:135