OpenTREP Logo  0.07.4
C++ Open Travel Request Parsing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
opentrep-searcher.cpp
Go to the documentation of this file.
1 // STL
2 #include <cassert>
3 #include <iostream>
4 #include <sstream>
5 #include <fstream>
6 #include <vector>
7 #include <string>
8 // Boost (Extended STL)
9 #include <boost/date_time/posix_time/posix_time.hpp>
10 #include <boost/date_time/gregorian/gregorian.hpp>
11 #include <boost/tokenizer.hpp>
12 #include <boost/program_options.hpp>
13 // OpenTREP
15 #include <opentrep/DBType.hpp>
18 #include <opentrep/Location.hpp>
19 #include <opentrep/CityDetails.hpp>
20 #include <opentrep/config/opentrep-paths.hpp>
21 
22 
23 // //////// Type definitions ///////
24 typedef std::vector<std::string> WordList_T;
25 
26 
27 // //////// Constants //////
31 const std::string K_OPENTREP_DEFAULT_LOG_FILENAME ("opentrep-searcher.log");
32 
36 const std::string K_OPENTREP_DEFAULT_QUERY_STRING ("sna francicso rio de janero lso angles reykyavki");
37 
46 const unsigned short K_OPENTREP_DEFAULT_SEARCH_TYPE = 0;
47 
52 
53 
54 // //////////////////////////////////////////////////////////////////////
55 void tokeniseStringIntoWordList (const std::string& iPhrase,
56  WordList_T& ioWordList) {
57  // Empty the word list
58  ioWordList.clear();
59 
60  // Boost Tokeniser
61  typedef boost::tokenizer<boost::char_separator<char> > Tokeniser_T;
62 
63  // Define the separators
64  const boost::char_separator<char> lSepatorList(" .,;:|+-*/_=!@#$%`~^&(){}[]?'<>\"");
65 
66  // Initialise the phrase to be tokenised
67  Tokeniser_T lTokens (iPhrase, lSepatorList);
68  for (Tokeniser_T::const_iterator tok_iter = lTokens.begin();
69  tok_iter != lTokens.end(); ++tok_iter) {
70  const std::string& lTerm = *tok_iter;
71  ioWordList.push_back (lTerm);
72  }
73 }
74 
75 // //////////////////////////////////////////////////////////////////////
76 std::string createStringFromWordList (const WordList_T& iWordList) {
77  std::ostringstream oStr;
78 
79  unsigned short idx = iWordList.size();
80  for (WordList_T::const_iterator itWord = iWordList.begin();
81  itWord != iWordList.end(); ++itWord, --idx) {
82  const std::string& lWord = *itWord;
83  oStr << lWord;
84  if (idx > 1) {
85  oStr << " ";
86  }
87  }
88 
89  return oStr.str();
90 }
91 
92 
93 // ///////// Parsing of Options & Configuration /////////
94 // A helper function to simplify the main part.
95 template<class T> std::ostream& operator<< (std::ostream& os,
96  const std::vector<T>& v) {
97  std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " "));
98  return os;
99 }
100 
103 
105 int readConfiguration (int argc, char* argv[],
106  unsigned short& ioSpellingErrorDistance,
107  std::string& ioQueryString,
108  std::string& ioXapianDBFilepath,
109  std::string& ioSQLDBTypeString,
110  std::string& ioSQLDBConnectionString,
111  unsigned short& ioDeploymentNumber,
112  std::string& ioLogFilename,
113  unsigned short& ioSearchType) {
114 
115  // Initialise the travel query string, if that one is empty
116  if (ioQueryString.empty() == true) {
117  ioQueryString = K_OPENTREP_DEFAULT_QUERY_STRING;
118  }
119 
120  // Transform the query string into a list of words (STL strings)
121  WordList_T lWordList;
122  tokeniseStringIntoWordList (ioQueryString, lWordList);
123 
124  // Declare a group of options that will be allowed only on command line
125  boost::program_options::options_description generic ("Generic options");
126  generic.add_options()
127  ("prefix", "print installation prefix")
128  ("version,v", "print version string")
129  ("help,h", "produce help message");
130 
131  // Declare a group of options that will be allowed both on command
132  // line and in config file
133  boost::program_options::options_description config ("Configuration");
134  config.add_options()
135  ("error,e",
136  boost::program_options::value< unsigned short >(&ioSpellingErrorDistance)->default_value(K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE),
137  "Spelling error distance (e.g., 3)")
138  ("xapiandb,d",
139  boost::program_options::value< std::string >(&ioXapianDBFilepath)->default_value(OPENTREP::DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH),
140  "Xapian database filepath (e.g., /tmp/opentrep/xapian_traveldb)")
141  ("sqldbtype,t",
142  boost::program_options::value< std::string >(&ioSQLDBTypeString)->default_value(OPENTREP::DEFAULT_OPENTREP_SQL_DB_TYPE),
143  "SQL database type (e.g., nodb for no SQL database, sqlite for SQLite, mysql for MariaDB/MySQL)")
144  ("sqldbconx,s",
145  boost::program_options::value< std::string >(&ioSQLDBConnectionString),
146  "SQL database connection string (e.g., ~/tmp/opentrep/sqlite_travel.db for SQLite, \"db=trep_trep user=trep password=trep\" for MariaDB/MySQL)")
147  ("deploymentnb,m",
148  boost::program_options::value<unsigned short>(&ioDeploymentNumber)->default_value(OPENTREP::DEFAULT_OPENTREP_DEPLOYMENT_NUMBER),
149  "Deployment number (from to N, where N=1 normally)")
150  ("log,l",
151  boost::program_options::value< std::string >(&ioLogFilename)->default_value(K_OPENTREP_DEFAULT_LOG_FILENAME),
152  "Filepath for the logs")
153  ("type,y",
154  boost::program_options::value<unsigned short>(&ioSearchType)->default_value(K_OPENTREP_DEFAULT_SEARCH_TYPE),
155  "Type of search request (0 = full text, 1 = coordinates)")
156  ("query,q",
157  boost::program_options::value< WordList_T >(&lWordList)->multitoken(),
158  "Travel query word list (e.g. sna francicso rio de janero lso anglese reykyavki), which sould be located at the end of the command line (otherwise, the other options would be interpreted as part of that travel query word list)")
159  ;
160 
161  // Hidden options, will be allowed both on command line and
162  // in config file, but will not be shown to the user.
163  boost::program_options::options_description hidden ("Hidden options");
164  hidden.add_options()
165  ("copyright",
166  boost::program_options::value< std::vector<std::string> >(),
167  "Show the copyright (license)");
168 
169  boost::program_options::options_description cmdline_options;
170  cmdline_options.add(generic).add(config).add(hidden);
171 
172  boost::program_options::options_description config_file_options;
173  config_file_options.add(config).add(hidden);
174 
175  boost::program_options::options_description visible ("Allowed options");
176  visible.add(generic).add(config);
177 
178  boost::program_options::positional_options_description p;
179  p.add ("copyright", -1);
180 
181  boost::program_options::variables_map vm;
182  boost::program_options::
183  store (boost::program_options::command_line_parser (argc, argv).
184  options (cmdline_options).positional(p).run(), vm);
185 
186  std::ifstream ifs ("opentrep-searcher.cfg");
187  boost::program_options::store (parse_config_file (ifs, config_file_options),
188  vm);
189  boost::program_options::notify (vm);
190 
191  if (vm.count ("help")) {
192  std::cout << visible << std::endl;
194  }
195 
196  if (vm.count ("version")) {
197  std::cout << PACKAGE_NAME << ", version " << PACKAGE_VERSION << std::endl;
199  }
200 
201  if (vm.count ("prefix")) {
202  std::cout << "Installation prefix: " << PREFIXDIR << std::endl;
204  }
205 
206  if (vm.count ("deploymentnb")) {
207  ioDeploymentNumber = vm["deploymentnb"].as< unsigned short >();
208  std::cout << "Deployment number " << ioDeploymentNumber << std::endl;
209  }
210 
211  if (vm.count ("xapiandb")) {
212  ioXapianDBFilepath = vm["xapiandb"].as< std::string >();
213  std::cout << "Xapian database filepath is: " << ioXapianDBFilepath
214  << ioDeploymentNumber << std::endl;
215  }
216 
217  if (vm.count ("sqldbtype")) {
218  ioSQLDBTypeString = vm["sqldbtype"].as< std::string >();
219  std::cout << "SQL database type is: " << ioSQLDBTypeString
220  << std::endl;
221  }
222 
223  // Derive the detault connection string depending on the SQL database type
224  const OPENTREP::DBType lDBType (ioSQLDBTypeString);
225  if (lDBType == OPENTREP::DBType::NODB) {
226  ioSQLDBConnectionString = "";
227 
228  } else if (lDBType == OPENTREP::DBType::SQLITE3) {
229  ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_SQLITE_DB_FILEPATH;
230 
231  } else if (lDBType == OPENTREP::DBType::MYSQL) {
232  ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_MYSQL_CONN_STRING;
233  }
234 
235  // Set the SQL database connection string, if any is given
236  if (vm.count ("sqldbconx")) {
237  ioSQLDBConnectionString = vm["sqldbconx"].as< std::string >();
238  }
239 
240  // Reporting of the SQL database connection string
241  if (lDBType == OPENTREP::DBType::SQLITE3
242  || lDBType == OPENTREP::DBType::MYSQL) {
243  const std::string& lSQLDBConnString =
245  ioSQLDBConnectionString,
246  ioDeploymentNumber);
247  //
248  std::cout << "SQL database connection string is: " << lSQLDBConnString
249  << std::endl;
250  }
251 
252  if (vm.count ("log")) {
253  ioLogFilename = vm["log"].as< std::string >();
254  std::cout << "Log filename is: " << ioLogFilename << std::endl;
255  }
256 
257  std::cout << "The type of search is: " << ioSearchType << std::endl;
258 
259  std::cout << "The spelling error distance is: " << ioSpellingErrorDistance
260  << std::endl;
261 
262  ioQueryString = createStringFromWordList (lWordList);
263  std::cout << "The travel query string is: " << ioQueryString << std::endl;
264 
265  return 0;
266 }
267 
271 std::string parseQuery (OPENTREP::OPENTREP_Service& ioOpentrepService,
272  const OPENTREP::TravelQuery_T& iTravelQuery) {
273  std::ostringstream oStr;
274 
275  // Query the Xapian database (index)
276  OPENTREP::WordList_T lNonMatchedWordList;
277  OPENTREP::LocationList_T lLocationList;
278  const OPENTREP::NbOfMatches_T nbOfMatches =
279  ioOpentrepService.interpretTravelRequest (iTravelQuery, lLocationList,
280  lNonMatchedWordList);
281 
282  oStr << nbOfMatches << " (geographical) location(s) have been found "
283  << "matching your query (`" << iTravelQuery << "'). "
284  << lNonMatchedWordList.size() << " words were left unmatched."
285  << std::endl;
286 
287  if (nbOfMatches != 0) {
288  OPENTREP::NbOfMatches_T idx = 1;
289  for (OPENTREP::LocationList_T::const_iterator itLocation =
290  lLocationList.begin();
291  itLocation != lLocationList.end(); ++itLocation, ++idx) {
292  const OPENTREP::Location& lLocation = *itLocation;
293  oStr << " [" << idx << "]: " << lLocation << std::endl;
294  }
295  }
296 
297  if (lNonMatchedWordList.empty() == false) {
298  oStr << "List of unmatched words:" << std::endl;
299 
300  OPENTREP::NbOfMatches_T idx = 1;
301  for (OPENTREP::WordList_T::const_iterator itWord =
302  lNonMatchedWordList.begin();
303  itWord != lNonMatchedWordList.end(); ++itWord, ++idx) {
304  const OPENTREP::Word_T& lWord = *itWord;
305  oStr << " [" << idx << "]: " << lWord << std::endl;
306  }
307  }
308 
309  return oStr.str();
310 }
311 
312 // /////////////// M A I N /////////////////
313 int main (int argc, char* argv[]) {
314 
315  /*
316  const OPENTREP::NbOfLetters_T lScaleArray[5] = {3, 6, 9, 14, 19};
317 
318  const OPENTREP::DistanceErrorScaleArray_T lScaleBoostArray =
319  { {3, 6, 9, 14, 19} };
320 
321  OPENTREP::DistanceErrorRule lScale (5, lScaleArray);
322  OPENTREP::DistanceErrorRule lScaleBoost (lScaleBoostArray);
323 
324  std::cout << "Standard array: " << lScale << std::endl;
325  std::cout << "Boost array: " << lScaleBoost << std::endl;
326 
327  for (int idx = 0; idx != 20; ++idx) {
328  std::cout << "For " << idx << " letters => "
329  << lScale.getAllowedDistanceError(idx) << std::endl;
330  }
331 
332  return 0;
333  */
334 
335  // Travel query
336  OPENTREP::TravelQuery_T lTravelQuery;
337 
338  // Output log File
339  std::string lLogFilename;
340 
341  // Xapian database name (directory of the index)
342  std::string lXapianDBNameStr;
343 
344  // Type of search
345  unsigned short lSearchType;
346 
347  // Xapian spelling error distance
348  unsigned short lSpellingErrorDistance;
349 
350  // SQL database type
351  std::string lSQLDBTypeStr;
352 
353  // SQL database connection string
354  std::string lSQLDBConnectionStr;
355 
356  // Deployment number/version
357  OPENTREP::DeploymentNumber_T lDeploymentNumber;
358 
359  // Call the command-line option parser
360  const int lOptionParserStatus =
361  readConfiguration (argc, argv, lSpellingErrorDistance, lTravelQuery,
362  lXapianDBNameStr, lSQLDBTypeStr, lSQLDBConnectionStr,
363  lDeploymentNumber, lLogFilename, lSearchType);
364 
365  if (lOptionParserStatus == K_OPENTREP_EARLY_RETURN_STATUS) {
366  return 0;
367  }
368 
369  // Set the log parameters
370  std::ofstream logOutputFile;
371  // open and clean the log outputfile
372  logOutputFile.open (lLogFilename.c_str());
373  logOutputFile.clear();
374 
375  if (lSearchType == 0) {
376  // Initialise the context
377  const OPENTREP::TravelDBFilePath_T lXapianDBName (lXapianDBNameStr);
378  const OPENTREP::DBType lDBType (lSQLDBTypeStr);
379  const OPENTREP::SQLDBConnectionString_T lSQLDBConnStr (lSQLDBConnectionStr);
380  OPENTREP::OPENTREP_Service opentrepService (logOutputFile, lXapianDBName,
381  lDBType, lSQLDBConnStr,
382  lDeploymentNumber);
383 
384  // Parse the query and retrieve the places from Xapian only
385  const std::string& lOutput = parseQuery (opentrepService, lTravelQuery);
386  std::cout << lOutput;
387 
388  } else {
389  std::cout << "Finding the airports closest to: " << lTravelQuery
390  << std::endl;
391  }
392 
393  // Close the Log outputFile
394  logOutputFile.close();
395 
396  return 0;
397 }
int main(int argc, char *argv[])
const std::string K_OPENTREP_DEFAULT_LOG_FILENAME("opentrep-indexer.log")
const unsigned short DEFAULT_OPENTREP_DEPLOYMENT_NUMBER
std::string createStringFromWordList(const WordList_T &iWordList)
unsigned short NbOfMatches_T
std::string parseAndDisplayConnectionString(const DBType &iDBType, const std::string &iSQLDBConnStr, const DeploymentNumber_T &iDeploymentNumber)
Definition: Utilities.cpp:255
int readConfiguration(int argc, char *argv[], std::string &ioPORFilepath, std::string &ioXapianDBFilepath, std::string &ioSQLDBTypeString, std::string &ioSQLDBConnectionString, unsigned short &ioDeploymentNumber, bool &ioIncludeNonIATAPOR, bool &ioIndexPORInXapian, bool &ioAddPORInDB, std::string &ioLogFilename)
Structure modelling a (geographical) location.
Definition: Location.hpp:25
const std::string DEFAULT_OPENTREP_MYSQL_CONN_STRING
std::basic_ostream< charT, traits > & operator<<(std::basic_ostream< charT, traits > &ioOut, const OPENTREP::StructAbstract &iStruct)
std::string Word_T
const std::string DEFAULT_OPENTREP_SQLITE_DB_FILEPATH
Interface for the OPENTREP Services.
void tokeniseStringIntoWordList(const std::string &iPhrase, WordList_T &ioWordList)
std::vector< std::string > WordList_T
std::list< Word_T > WordList_T
const unsigned short K_OPENTREP_DEFAULT_SEARCH_TYPE
Enumeration of database types.
Definition: DBType.hpp:17
const std::string DEFAULT_OPENTREP_SQL_DB_TYPE
std::string parseQuery(OPENTREP::OPENTREP_Service &ioOpentrepService, const OPENTREP::TravelQuery_T &iTravelQuery)
std::list< Location > LocationList_T
const std::string K_OPENTREP_DEFAULT_QUERY_STRING("sna francicso rio de janero lso angles reykyavki")
NbOfMatches_T interpretTravelRequest(const std::string &iTravelQuery, LocationList_T &, WordList_T &)
const std::string DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH
std::string TravelQuery_T
const int K_OPENTREP_EARLY_RETURN_STATUS
unsigned short DeploymentNumber_T
const unsigned short K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE