Engauge Digitizer  2
 All Classes Functions Variables Typedefs Enumerations Friends Pages
GridClassifier.cpp
1 /******************************************************************************************************
2  * (C) 2014 markummitchell@github.com. This file is part of Engauge Digitizer, which is released *
3  * under GNU General Public License version 2 (GPLv2) or (at your option) any later version. See file *
4  * LICENSE or go to gnu.org/licenses for details. Distribution requires prior written permission. *
5  ******************************************************************************************************/
6 
7 #include "ColorFilter.h"
8 #include "Correlation.h"
9 #include "DocumentModelCoords.h"
10 #include "EngaugeAssert.h"
11 #include "GridClassifier.h"
12 #include <iostream>
13 #include "Logger.h"
14 #include <QDebug>
15 #include <QFile>
16 #include <QImage>
17 #include "QtToString.h"
18 #include "Transformation.h"
19 
20 int GridClassifier::NUM_PIXELS_PER_HISTOGRAM_BINS = 1;
21 double GridClassifier::PEAK_HALF_WIDTH = 4;
22 int GridClassifier::MIN_STEP_PIXELS = 4 * GridClassifier::PEAK_HALF_WIDTH; // Step includes down ramp, flat part, up ramp
23 const QString GNUPLOT_DELIMITER ("\t");
24 
25 // We set up the picket fence with binStart arbitrarily set close to zero. Peak is
26 // not exactly at zero since we want to include the left side of the first peak.
27 int GridClassifier::BIN_START_UNSHIFTED = GridClassifier::PEAK_HALF_WIDTH;
28 
29 using namespace std;
30 
32 {
33 }
34 
35 int GridClassifier::binFromCoordinate (double coord,
36  double coordMin,
37  double coordMax) const
38 {
39  ENGAUGE_ASSERT (coordMin < coordMax);
40  ENGAUGE_ASSERT (coordMin <= coord);
41  ENGAUGE_ASSERT (coord <= coordMax);
42 
43  int bin = 0.5 + (m_numHistogramBins - 1.0) * (coord - coordMin) / (coordMax - coordMin);
44 
45  return bin;
46 }
47 
48 void GridClassifier::classify (bool isGnuplot,
49  const QPixmap &originalPixmap,
50  const Transformation &transformation,
51  int &countX,
52  double &startX,
53  double &stepX,
54  int &countY,
55  double &startY,
56  double &stepY)
57 {
58  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::classify";
59 
60  QImage image = originalPixmap.toImage ();
61 
62  m_numHistogramBins = image.width() / NUM_PIXELS_PER_HISTOGRAM_BINS;
63  ENGAUGE_ASSERT (m_numHistogramBins > 1);
64 
65  double xMin, xMax, yMin, yMax;
66  double binStartX, binStepX, binStartY, binStepY;
67 
68  m_binsX = new double [m_numHistogramBins];
69  m_binsY = new double [m_numHistogramBins];
70 
71  computeGraphCoordinateLimits (image,
72  transformation,
73  xMin,
74  xMax,
75  yMin,
76  yMax);
77  initializeHistogramBins ();
78  populateHistogramBins (image,
79  transformation,
80  xMin,
81  xMax,
82  yMin,
83  yMax);
84  searchStartStepSpace (isGnuplot,
85  m_binsX,
86  "x",
87  xMin,
88  xMax,
89  startX,
90  stepX,
91  binStartX,
92  binStepX);
93  searchStartStepSpace (isGnuplot,
94  m_binsY,
95  "y",
96  yMin,
97  yMax,
98  startY,
99  stepY,
100  binStartY,
101  binStepY);
102  searchCountSpace (m_binsX,
103  binStartX,
104  binStepX,
105  countX);
106  searchCountSpace (m_binsY,
107  binStartY,
108  binStepY,
109  countY);
110 
111  delete [] m_binsX;
112  delete [] m_binsY;
113 }
114 
115 void GridClassifier::computeGraphCoordinateLimits (const QImage &image,
116  const Transformation &transformation,
117  double &xMin,
118  double &xMax,
119  double &yMin,
120  double &yMax)
121 {
122  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::computeGraphCoordinateLimits";
123 
124  // Project every pixel onto the x axis, and onto the y axis. One set of histogram bins will be
125  // set up along each of the axes. The range of bins will encompass every pixel in the image, and no more.
126 
127  QPointF posGraphTL, posGraphTR, posGraphBL, posGraphBR;
128  transformation.transformScreenToRawGraph (QPointF (0, 0) , posGraphTL);
129  transformation.transformScreenToRawGraph (QPointF (image.width(), 0) , posGraphTR);
130  transformation.transformScreenToRawGraph (QPointF (0, image.height()) , posGraphBL);
131  transformation.transformScreenToRawGraph (QPointF (image.width(), image.height()), posGraphBR);
132 
133  // Compute x and y ranges for setting up the histogram bins
134  if (transformation.modelCoords().coordsType() == COORDS_TYPE_CARTESIAN) {
135 
136  // For affine cartesian coordinates, we only need to look at the screen corners
137  xMin = qMin (qMin (qMin (posGraphTL.x(), posGraphTR.x()), posGraphBL.x()), posGraphBR.x());
138  xMax = qMax (qMax (qMax (posGraphTL.x(), posGraphTR.x()), posGraphBL.x()), posGraphBR.x());
139  yMin = qMin (qMin (qMin (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
140  yMax = qMax (qMax (qMax (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
141 
142  } else {
143 
144  // For affine polar coordinates, easiest approach is to assume the full circle
145  xMin = 0.0;
146  xMax = transformation.modelCoords().thetaPeriod();
147  yMin = transformation.modelCoords().originRadius();
148  yMax = qMax (qMax (qMax (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
149 
150  }
151 
152  ENGAUGE_ASSERT (xMin < xMax);
153  ENGAUGE_ASSERT (yMin < yMax);
154 }
155 
156 double GridClassifier::coordinateFromBin (int bin,
157  double coordMin,
158  double coordMax) const
159 {
160  ENGAUGE_ASSERT (1 < m_numHistogramBins);
161  ENGAUGE_ASSERT (coordMin < coordMax);
162 
163  return coordMin + (coordMax - coordMin) * (double) bin / ((double) m_numHistogramBins - 1.0);
164 }
165 
166 void GridClassifier::copyVectorToVector (const double from [],
167  double to []) const
168 {
169  for (int bin = 0; bin < m_numHistogramBins; bin++) {
170  to [bin] = from [bin];
171  }
172 }
173 
174 void GridClassifier::dumpGnuplotCoordinate (const QString &coordinateLabel,
175  double corr,
176  const double *bins,
177  double coordinateMin,
178  double coordinateMax,
179  int binStart,
180  int binStep) const
181 {
182  QString filename = QString ("gridclassifier_%1_corr%2_startMax%3_stepMax%4.gnuplot")
183  .arg (coordinateLabel)
184  .arg (corr, 8, 'f', 3, '0')
185  .arg (binStart)
186  .arg (binStep);
187 
188  cout << "Writing gnuplot file: " << filename.toLatin1().data() << "\n";
189 
190  QFile fileDump (filename);
191  fileDump.open (QIODevice::WriteOnly | QIODevice::Text);
192  QTextStream strDump (&fileDump);
193 
194  int bin;
195 
196  // For consistent scaling, get the max bin count
197  int binCountMax = 0;
198  for (bin = 0; bin < m_numHistogramBins; bin++) {
199  if (bins [bin] > binCountMax) {
200  binCountMax = qMax ((double) binCountMax,
201  bins [bin]);
202  }
203  }
204 
205  // Get picket fence
206  double *picketFence = new double [m_numHistogramBins];
207  loadPicketFence (picketFence,
208  binStart,
209  binStep,
210  0,
211  false);
212 
213  // Header
214  strDump << "bin"
215  << GNUPLOT_DELIMITER << "coordinate"
216  << GNUPLOT_DELIMITER << "binCount"
217  << GNUPLOT_DELIMITER << "startStep"
218  << GNUPLOT_DELIMITER << "picketFence" << "\n";
219 
220  // Data, with one row per coordinate value
221  for (bin = 0; bin < m_numHistogramBins; bin++) {
222 
223  double coordinate = coordinateFromBin (bin,
224  coordinateMin,
225  coordinateMax);
226  double startStepValue (((bin - binStart) % binStep == 0) ? 1 : 0);
227  strDump << bin
228  << GNUPLOT_DELIMITER << coordinate
229  << GNUPLOT_DELIMITER << bins [bin]
230  << GNUPLOT_DELIMITER << binCountMax * startStepValue
231  << GNUPLOT_DELIMITER << binCountMax * picketFence [bin] << "\n";
232  }
233 
234  delete [] picketFence;
235 }
236 
237 void GridClassifier::dumpGnuplotCorrelations (const QString &coordinateLabel,
238  double valueMin,
239  double valueMax,
240  const double signalA [],
241  const double signalB [],
242  const double correlations [])
243 {
244  QString filename = QString ("gridclassifier_%1_correlations.gnuplot")
245  .arg (coordinateLabel);
246 
247  cout << "Writing gnuplot file: " << filename.toLatin1().data() << "\n";
248 
249  QFile fileDump (filename);
250  fileDump.open (QIODevice::WriteOnly | QIODevice::Text);
251  QTextStream strDump (&fileDump);
252 
253  int bin;
254 
255  // Compute max values so curves can be normalized to the same heights
256  double signalAMax = 1, signalBMax = 1, correlationsMax = 1;
257  for (bin = 0; bin < m_numHistogramBins; bin++) {
258  if (bin == 0 || signalA [bin] > signalAMax) {
259  signalAMax = signalA [bin];
260  }
261  if (bin == 0 || signalB [bin] > signalBMax) {
262  signalBMax = signalB [bin];
263  }
264  if (bin == 0 || correlations [bin] > correlationsMax) {
265  correlationsMax = correlations [bin];
266  }
267  }
268 
269  // Prevent divide by zero error
270  if (signalAMax == 0) {
271  signalAMax = 1.0;
272  }
273  if (signalBMax == 0) {
274  signalBMax = 1.0;
275  }
276 
277  // Output normalized curves
278  for (int bin = 0; bin < m_numHistogramBins; bin++) {
279 
280  strDump << coordinateFromBin (bin,
281  valueMin,
282  valueMax)
283  << GNUPLOT_DELIMITER << signalA [bin] / signalAMax
284  << GNUPLOT_DELIMITER << signalB [bin] / signalBMax
285  << GNUPLOT_DELIMITER << correlations [bin] / correlationsMax << "\n";
286  }
287 }
288 
289 void GridClassifier::initializeHistogramBins ()
290 {
291  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::initializeHistogramBins";
292 
293  for (int bin = 0; bin < m_numHistogramBins; bin++) {
294  m_binsX [bin] = 0;
295  m_binsY [bin] = 0;
296  }
297 }
298 
299 void GridClassifier::loadPicketFence (double picketFence [],
300  int binStart,
301  int binStep,
302  int count,
303  bool isCount) const
304 {
305  const double PEAK_HEIGHT = 1.0; // Arbitrary height, since normalization will counteract any change to this value
306 
307  // Count argument is optional. Note that binStart already excludes PEAK_HALF_WIDTH bins at start,
308  // and we also exclude PEAK_HALF_WIDTH bins at the end
309  ENGAUGE_ASSERT (binStart >= PEAK_HALF_WIDTH);
310  ENGAUGE_ASSERT (binStep != 0);
311  if (!isCount) {
312  count = 1 + (m_numHistogramBins - binStart - PEAK_HALF_WIDTH) / binStep;
313  }
314 
315  // Bins that we need to worry about
316  int binStartMinusHalfWidth = binStart - PEAK_HALF_WIDTH;
317  int binStopPlusHalfWidth = (binStart + (count - 1) * binStep) + PEAK_HALF_WIDTH;
318 
319  // To normalize, we compute the area under the picket fence. Constraint is
320  // areaUnnormalized + NUM_HISTOGRAM_BINS * normalizationOffset = 0
321  double areaUnnormalized = count * PEAK_HEIGHT * PEAK_HALF_WIDTH;
322  double normalizationOffset = -1.0 * areaUnnormalized / m_numHistogramBins;
323 
324  for (int bin = 0; bin < m_numHistogramBins; bin++) {
325 
326  // Outside of the peaks, bin is small negative so correlation with unit function is zero. In other
327  // words, the function is normalized
328  picketFence [bin] = normalizationOffset;
329 
330  if ((binStartMinusHalfWidth <= bin) &&
331  (bin <= binStopPlusHalfWidth)) {
332 
333  // Closest peak
334  int ordinalClosestPeak = (int) ((bin - binStart + binStep / 2) / binStep);
335  int binClosestPeak = binStart + ordinalClosestPeak * binStep;
336 
337  // Distance from closest peak is used to define an isosceles triangle
338  int distanceToClosestPeak = qAbs (bin - binClosestPeak);
339 
340  if (distanceToClosestPeak < PEAK_HALF_WIDTH) {
341 
342  // Map 0 to PEAK_HALF_WIDTH to 1 to 0
343  picketFence [bin] = 1.0 - (double) distanceToClosestPeak / PEAK_HALF_WIDTH + normalizationOffset;
344 
345  }
346  }
347  }
348 }
349 
350 void GridClassifier::populateHistogramBins (const QImage &image,
351  const Transformation &transformation,
352  double xMin,
353  double xMax,
354  double yMin,
355  double yMax)
356 {
357  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::populateHistogramBins";
358 
359  ColorFilter filter;
360  QRgb rgbBackground = filter.marginColor (&image);
361 
362  for (int x = 0; x < image.width(); x++) {
363  for (int y = 0; y < image.height(); y++) {
364 
365  QColor pixel = image.pixel (x, y);
366 
367  // Skip pixels with background color
368  if (!filter.colorCompare (rgbBackground,
369  pixel.rgb ())) {
370 
371  // Add this pixel to histograms
372  QPointF posGraph;
373  transformation.transformScreenToRawGraph (QPointF (x, y), posGraph);
374 
375  if (transformation.modelCoords().coordsType() == COORDS_TYPE_POLAR) {
376 
377  // If out of the 0 to period range, the theta value must shifted by the period to get into that range
378  while (posGraph.x() < xMin) {
379  posGraph.setX (posGraph.x() + transformation.modelCoords().thetaPeriod());
380  }
381  while (posGraph.x() > xMax) {
382  posGraph.setX (posGraph.x() - transformation.modelCoords().thetaPeriod());
383  }
384  }
385 
386  int binX = binFromCoordinate (posGraph.x(), xMin, xMax);
387  int binY = binFromCoordinate (posGraph.y(), yMin, yMax);
388 
389  ENGAUGE_ASSERT (0 <= binX);
390  ENGAUGE_ASSERT (0 <= binY);
391  ENGAUGE_ASSERT (binX < m_numHistogramBins);
392  ENGAUGE_ASSERT (binY < m_numHistogramBins);
393 
394  // Roundoff error in log scaling may let bin go just outside legal range
395  binX = qMin (binX, m_numHistogramBins - 1);
396  binY = qMin (binY, m_numHistogramBins - 1);
397 
398  ++m_binsX [binX];
399  ++m_binsY [binY];
400  }
401  }
402  }
403 }
404 
405 void GridClassifier::searchCountSpace (double bins [],
406  double binStart,
407  double binStep,
408  int &countMax)
409 {
410  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::searchCountSpace"
411  << " start=" << binStart
412  << " step=" << binStep;
413 
414  // Loop though the space of possible counts
415  Correlation correlation (m_numHistogramBins);
416  double *picketFence = new double [m_numHistogramBins];
417  double corr, corrMax;
418  bool isFirst = true;
419  int countStop = 1 + (m_numHistogramBins - binStart) / binStep;
420  for (int count = 2; count <= countStop; count++) {
421 
422  loadPicketFence (picketFence,
423  binStart,
424  binStep,
425  count,
426  true);
427 
428  correlation.correlateWithoutShift (m_numHistogramBins,
429  bins,
430  picketFence,
431  corr);
432  if (isFirst || (corr > corrMax)) {
433  countMax = count;
434  corrMax = corr;
435  }
436 
437  isFirst = false;
438  }
439 
440  delete [] picketFence;
441 }
442 
443 void GridClassifier::searchStartStepSpace (bool isGnuplot,
444  double bins [],
445  const QString &coordinateLabel,
446  double valueMin,
447  double valueMax,
448  double &start,
449  double &step,
450  double &binStartMax,
451  double &binStepMax)
452 {
453  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::searchStartStepSpace";
454 
455  // Correlations are tracked for logging
456  double *signalA = new double [m_numHistogramBins];
457  double *signalB = new double [m_numHistogramBins];
458  double *correlations = new double [m_numHistogramBins];
459  double *correlationsMax = new double [m_numHistogramBins];
460 
461  // Loop though the space of possible gridlines using the independent variables (start,step).
462  Correlation correlation (m_numHistogramBins);
463  double *picketFence = new double [m_numHistogramBins];
464  int binStart;
465  double corr = 0, corrMax = 0;
466  bool isFirst = true;
467 
468  // We do not explicitly search(=loop) through binStart here, since Correlation::correlateWithShift will take
469  // care of that for us
470 
471  // Step search starts out small, and stops at value that gives count substantially greater than 2. Freakishly small
472  // images need to have MIN_STEP_PIXELS overridden so the loop iterates at least once
473  binStartMax = BIN_START_UNSHIFTED + 1; // In case search below ever fails
474  binStepMax = qMin (MIN_STEP_PIXELS, m_numHistogramBins / 8); // In case search below ever fails
475  for (int binStep = qMin (MIN_STEP_PIXELS, m_numHistogramBins / 8); binStep < m_numHistogramBins / 4; binStep++) {
476 
477  loadPicketFence (picketFence,
478  BIN_START_UNSHIFTED,
479  binStep,
480  PEAK_HALF_WIDTH,
481  false);
482 
483  correlation.correlateWithShift (m_numHistogramBins,
484  bins,
485  picketFence,
486  binStart,
487  corr,
488  correlations);
489  if (isFirst || (corr > corrMax)) {
490 
491  int binStartMaxNext = binStart + BIN_START_UNSHIFTED + 1; // Compensate for the shift performed inside loadPicketFence
492 
493  // Make sure binStartMax never goes out of bounds
494  if (binStartMaxNext < m_numHistogramBins) {
495 
496  binStartMax = binStartMaxNext;
497  binStepMax = binStep;
498  corrMax = corr;
499  copyVectorToVector (bins, signalA);
500  copyVectorToVector (picketFence, signalB);
501  copyVectorToVector (correlations, correlationsMax);
502 
503  // Output a gnuplot file. We should see the correlation values consistently increasing
504  if (isGnuplot) {
505 
506  dumpGnuplotCoordinate(coordinateLabel,
507  corr,
508  bins,
509  valueMin,
510  valueMax,
511  binStart,
512  binStep);
513  }
514  }
515  }
516 
517  isFirst = false;
518  }
519 
520  // Convert from bins back to graph coordinates
521  start = coordinateFromBin (binStartMax,
522  valueMin,
523  valueMax);
524  if (binStartMax + binStepMax < m_numHistogramBins) {
525 
526  // Normal case where a reasonable step value is being calculated
527  double next = coordinateFromBin (binStartMax + binStepMax,
528  valueMin,
529  valueMax);
530  step = next - start;
531  } else {
532 
533  // Pathological case where step jumps to outside the legal range. We bring the step back into range
534  double next = coordinateFromBin (m_numHistogramBins - 1,
535  valueMin,
536  valueMax);
537  step = next - start;
538  }
539 
540  if (isGnuplot) {
541  dumpGnuplotCorrelations (coordinateLabel,
542  valueMin,
543  valueMax,
544  signalA,
545  signalB,
546  correlationsMax);
547  }
548 
549  delete [] signalA;
550  delete [] signalB;
551  delete [] correlations;
552  delete [] correlationsMax;
553  delete [] picketFence;
554 }
void transformScreenToRawGraph(const QPointF &coordScreen, QPointF &coordGraph) const
Transform from cartesian pixel screen coordinates to cartesian/polar graph coordinates.
double originRadius() const
Get method for origin radius in polar mode.
Fast cross correlation between two functions.
Definition: Correlation.h:14
Class for filtering image to remove unimportant information.
Definition: ColorFilter.h:20
DocumentModelCoords modelCoords() const
Get method for DocumentModelCoords.
double thetaPeriod() const
Return the period of the theta value for polar coordinates, consistent with CoordThetaUnits.
Affine transformation between screen and graph coordinates, based on digitized axis points...
QRgb marginColor(const QImage *image) const
Identify the margin color of the image, which is defined as the most common color in the four margins...
Definition: ColorFilter.cpp:73
GridClassifier()
Single constructor.
CoordsType coordsType() const
Get method for coordinates type.
bool colorCompare(QRgb rgb1, QRgb rgb2) const
See if the two color values are close enough to be considered to be the same.
Definition: ColorFilter.cpp:25
Classify the grid pattern in an original image.
void classify(bool isGnuplot, const QPixmap &originalPixmap, const Transformation &transformation, int &countX, double &startX, double &stepX, int &countY, double &startY, double &stepY)
Classify the specified image, and return the most probably x and y grid settings. ...