Alexandria  2.19
Please provide a description of the project.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
PdfModeExtraction.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2012-2021 Euclid Science Ground Segment
3  *
4  * This library is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU Lesser General Public License as published by the Free
6  * Software Foundation; either version 3.0 of the License, or (at your option)
7  * any later version.
8  *
9  * This library is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * along with this library; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
27 #include <cstddef>
28 #include <iterator>
29 #include <tuple>
30 #include <utility>
31 #include <vector>
32 
33 namespace Euclid {
34 namespace MathUtils {
35 
39 
40  auto iter = pdf.begin();
41  while (iter != pdf.end()) {
42  xs.push_back((*iter).first);
43  ys.push_back((*iter).second);
44  ++iter;
45  }
46 
47  return std::make_pair(xs, ys);
48 }
49 
51  double max = -1;
52  size_t index = 0;
53  size_t max_index = 0;
54  for (auto iter = pdf.begin(); iter != pdf.end(); ++iter) {
55  if (*iter > max) {
56  max = *iter;
57  max_index = index;
58  }
59  ++index;
60  }
61 
62  return max_index;
63 }
64 
65 std::pair<size_t, size_t> catchPeak(const std::vector<double>& pdf, size_t center_index, double merge_ratio) {
66 
67  double peak_value = pdf[center_index];
68  double threshold = (1.0 - merge_ratio) * peak_value;
69  size_t min_x = 0;
70  size_t max_x = pdf.size() - 1;
71 
72  for (size_t index = 0; index <= center_index; ++index) {
73  size_t position = center_index - index;
74  if (position == 0 || pdf[position] == 0) {
75  break;
76  }
77  if ((pdf[position] < threshold && pdf[position - 1] >= pdf[position])) {
78  min_x = position;
79  break;
80  }
81  min_x = position;
82  }
83 
84  for (size_t position = center_index; position <= pdf.size(); ++position) {
85  if (position == pdf.size() - 1 || pdf[position] == 0) {
86  break;
87  }
88  if ((pdf[position] < threshold && pdf[position + 1] >= pdf[position])) {
89  max_x = position;
90  break;
91  }
92  max_x = position;
93  }
94 
95  return std::make_pair(min_x, max_x);
96 }
97 
98 // basic integration may be refined
100 
101  double num = 0.;
102  double area = 0.;
103  for (size_t index = min_x; index <= max_x; ++index) {
104  double dx = 0.;
105  if (index == 0) {
106  dx = pdf.first[index + 1] - pdf.first[index];
107  } else if (index == pdf.first.size() - 1) {
108  dx = pdf.first[index] - pdf.first[index - 1];
109  } else {
110  dx = (pdf.first[index + 1] - pdf.first[index - 1]) / 2.0;
111  }
112 
113  num += pdf.first[index] * pdf.second[index] * dx;
114  area += pdf.second[index] * dx;
115  }
116 
117  return std::make_pair(num / area, area);
118 }
119 
121  /*
122  without assuming equi-distant sampling
123 
124  x_max= ((x1^2-x0^2)*y2 - (x2^2-x0^2)*y1+(x2^2-x1^2)*y0)/(2*((x1-x0)*y2-(x2-x0)*y1+(x2-x1)*y0))
125 
126  */
127  if (x_index >= pdf.first.size()) {
128  throw Elements::Exception("getInterpolationAround: x_index out of range.");
129  }
130 
131  // ensure we are not on the border
132  if (x_index == 0) {
133  x_index += 1;
134  } else if (x_index + 1 == pdf.first.size()) {
135  x_index -= 1;
136  }
137 
138  double x0 = pdf.first[x_index - 1];
139  double y0 = pdf.second[x_index - 1];
140 
141  double x1 = pdf.first[x_index];
142  double y1 = pdf.second[x_index];
143 
144  double x2 = pdf.first[x_index + 1];
145  double y2 = pdf.second[x_index + 1];
146 
147  double denom = 2 * ((x1 - x0) * y2 - (x2 - x0) * y1 + (x2 - x1) * y0);
148  double num = (x1 * x1 - x0 * x0) * y2 - (x2 * x2 - x0 * x0) * y1 + (x2 * x2 - x1 * x1) * y0;
149 
150  double x_max = x1;
151  // protect against division by 0 (flat interval)
152  if (denom != 0) {
153  x_max = num / denom;
154  }
155 
156  // check in interval
157  if (x_max < pdf.first.front()) {
158  x_max = pdf.first.front();
159  } else if (x_max > pdf.first.back()) {
160  x_max = pdf.first.back();
161  }
162 
163  return x_max;
164 }
165 
167  size_t min_x, size_t max_x, double value) {
168  std::vector<double> flatterned{pdf.second};
169  for (size_t index = min_x; index <= max_x; ++index) {
170  flatterned[index] = value;
171  }
172  return std::make_pair(pdf.first, flatterned);
173 }
174 
176  size_t n) {
177  std::vector<ModeInfo> result{};
178  auto pdf_xy = std::make_pair(x_sampling, pdf_sampling);
179 
180  for (size_t idx = 0; idx < n; ++idx) {
181  auto peak_index = findMaximumIndex(pdf_xy.second);
182  auto min_max = catchPeak(pdf_xy.second, peak_index, merge_ratio);
183  auto mean_area = avgArea(pdf_xy, min_max.first, min_max.second);
184  auto max_inter = getInterpolationAround(pdf_xy, peak_index);
185 
186  result.push_back(ModeInfo(pdf_xy.first[peak_index], mean_area.first, max_inter, mean_area.second));
187  pdf_xy = flatternPeak(pdf_xy, min_max.first, min_max.second, 0.0);
188  }
189  return result;
190 }
191 
192 std::vector<ModeInfo> extractNHighestModes(const XYDataset::XYDataset& pdf, double merge_ratio, size_t n) {
193  auto pdf_xy = getXYs(pdf);
194  return extractNHighestModes(pdf_xy.first, pdf_xy.second, merge_ratio, n);
195 }
196 
198  size_t n) {
199  auto pdf_xy = std::make_pair(x_sampling, pdf_sampling);
200  double total_area = avgArea(pdf_xy, 0, x_sampling.size() - 1).second;
201 
202  std::vector<ModeInfo> result{};
203 
204  bool out = false;
205  size_t loop = 0;
206  while (!out) {
207 
208  auto peak_index = findMaximumIndex(pdf_xy.second);
209  auto min_max = catchPeak(pdf_xy.second, peak_index, merge_ratio);
210  auto mean_area = avgArea(pdf_xy, min_max.first, min_max.second);
211  auto max_sample = pdf_xy.first[peak_index];
212  double interp_max_value = getInterpolationAround(pdf_xy, peak_index);
213  auto new_mode = ModeInfo(max_sample, mean_area.first, interp_max_value, mean_area.second);
214 
215  auto iter_mode = result.begin();
216  while (iter_mode != result.end()) {
217  if ((*iter_mode).getModeArea() < mean_area.second) {
218  break;
219  }
220  ++iter_mode;
221  }
222 
223  if (iter_mode != result.end()) {
224  result.insert(iter_mode, new_mode);
225  } else if (result.size() < n) {
226  result.push_back(new_mode);
227  }
228  total_area -= mean_area.second;
229  out = result.size() >= n && (result.back().getModeArea() > total_area || loop > 3 * n);
230 
231  pdf_xy = flatternPeak(pdf_xy, min_max.first, min_max.second, 0.0);
232  ++loop;
233  }
234 
235  return result;
236 }
237 
238 std::vector<ModeInfo> extractNBigestModes(const XYDataset::XYDataset& pdf, double merge_ratio, size_t n) {
239  auto pdf_xy = getXYs(pdf);
240  return extractNBigestModes(pdf_xy.first, pdf_xy.second, merge_ratio, n);
241 }
242 
243 } // namespace MathUtils
244 } // namespace Euclid
Class for storing the information of a PDF mode.
constexpr double second
std::pair< std::vector< double >, std::vector< double > > getXYs(const XYDataset::XYDataset &pdf)
std::pair< std::vector< double >, std::vector< double > > flatternPeak(const std::pair< std::vector< double >, std::vector< double >> &pdf, size_t min_x, size_t max_x, double value)
T end(T...args)
size_t findMaximumIndex(const std::vector< double > &pdf)
std::vector< ModeInfo > extractNHighestModes(const XYDataset::XYDataset &pdf, double merge_ratio, size_t n)
const_iterator begin() const
Returns a const iterator to the first pair of the dataset.
Definition: XYDataset.cpp:36
std::pair< size_t, size_t > catchPeak(const std::vector< double > &pdf, size_t center_index, double merge_ratio)
T make_pair(T...args)
T size(T...args)
std::pair< double, double > avgArea(std::pair< std::vector< double >, std::vector< double >> &pdf, size_t min_x, size_t max_x)
T begin(T...args)
double getInterpolationAround(const std::pair< std::vector< double >, std::vector< double >> &pdf, size_t x_index)
This module provides an interface for accessing two dimensional datasets (pairs of (X...
Definition: XYDataset.h:59
std::vector< ModeInfo > extractNBigestModes(const XYDataset::XYDataset &pdf, double merge_ratio, size_t n)
const_iterator end() const
Returns a const iterator to the one after last pair dataset.
Definition: XYDataset.cpp:40