OpenTREP Logo  0.07.11
C++ Open Travel Request Parsing Library
opentrep-indexer.cpp
Go to the documentation of this file.
1 // STL
2 #include <cassert>
3 #include <iostream>
4 #include <sstream>
5 #include <fstream>
6 #include <vector>
7 #include <string>
8 // Boost (Extended STL)
9 #include <boost/date_time/posix_time/posix_time.hpp>
10 #include <boost/date_time/gregorian/gregorian.hpp>
11 #include <boost/program_options.hpp>
12 // OpenTREP
14 #include <opentrep/Location.hpp>
15 #include <opentrep/CityDetails.hpp>
16 #include <opentrep/DBType.hpp>
19 #include <opentrep/config/opentrep-paths.hpp>
20 
21 
22 // //////// Type definitions ///////
23 typedef std::vector<std::string> WordList_T;
24 
25 
26 // //////// Constants //////
30 const std::string K_OPENTREP_DEFAULT_LOG_FILENAME ("opentrep-indexer.log");
31 
45 
46 
47 // ///////// Parsing of Options & Configuration /////////
50 
52 int readConfiguration (int argc, char* argv[],
53  std::string& ioPORFilepath,
54  std::string& ioXapianDBFilepath,
55  std::string& ioSQLDBTypeString,
56  std::string& ioSQLDBConnectionString,
57  unsigned short& ioDeploymentNumber,
58  bool& ioIncludeNonIATAPOR,
59  bool& ioIndexPORInXapian,
60  bool& ioAddPORInDB,
61  std::string& ioLogFilename,
62  std::ostringstream& oStr) {
63 
64  // Declare a group of options that will be allowed only on command line
65  boost::program_options::options_description generic ("Generic options");
66  generic.add_options()
67  ("prefix", "print installation prefix")
68  ("version,v", "print version string")
69  ("help,h", "produce help message");
70 
71  // Declare a group of options that will be allowed both on command
72  // line and in config file
73  boost::program_options::options_description config ("Configuration");
74  config.add_options()
75  ("porfile,p",
76  boost::program_options::value< std::string >(&ioPORFilepath)->default_value(OPENTREP::DEFAULT_OPENTREP_POR_FILEPATH),
77  "POR file-path (e.g., optd_por_public.csv)")
78  ("xapiandb,d",
79  boost::program_options::value< std::string >(&ioXapianDBFilepath)->default_value(OPENTREP::DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH),
80  "Xapian database filepath (e.g., /tmp/opentrep/xapian_traveldb)")
81  ("sqldbtype,t",
82  boost::program_options::value< std::string >(&ioSQLDBTypeString)->default_value(OPENTREP::DEFAULT_OPENTREP_SQL_DB_TYPE),
83  "SQL database type (e.g., nodb for no SQL database, sqlite for SQLite, mysql for MariaDB/MySQL)")
84  ("sqldbconx,s",
85  boost::program_options::value< std::string >(&ioSQLDBConnectionString),
86  "SQL database connection string (e.g., ~/tmp/opentrep/sqlite_travel.db for SQLite, \"db=trep_trep user=trep password=trep\" for MariaDB/MySQL)")
87  ("deploymentnb,m",
88  boost::program_options::value<unsigned short>(&ioDeploymentNumber)->default_value(OPENTREP::DEFAULT_OPENTREP_DEPLOYMENT_NUMBER),
89  "Deployment number (from to N, where N=1 normally)")
90  ("noniata,n",
91  boost::program_options::value<bool>(&ioIncludeNonIATAPOR)->default_value(K_OPENTREP_DEFAULT_POR_INCLUDING),
92  "Whether or not to include POR not referenced by IATA (0 = only IATA-referenced POR, 1 = all POR are included)")
93  ("xapianindex,x",
94  boost::program_options::value<bool>(&ioIndexPORInXapian)->default_value(OPENTREP::DEFAULT_OPENTREP_INDEX_IN_XAPIAN),
95  "Whether or not to index the POR in Xapian (0 = do not touch the Xapian index, 1 = re-index all the POR in Xapian)")
96  ("dbadd,a",
97  boost::program_options::value<bool>(&ioAddPORInDB)->default_value(OPENTREP::DEFAULT_OPENTREP_ADD_IN_DB),
98  "Whether or not to add and index the POR in the SQL-based database (0 = do not touch the SQL-based database, 1 = add and re-index all the POR in the SQL-based database)")
99  ("log,l",
100  boost::program_options::value< std::string >(&ioLogFilename)->default_value(K_OPENTREP_DEFAULT_LOG_FILENAME),
101  "Filepath for the logs")
102  ;
103 
104  // Hidden options, will be allowed both on command line and
105  // in config file, but will not be shown to the user.
106  boost::program_options::options_description hidden ("Hidden options");
107  hidden.add_options()
108  ("copyright",
109  boost::program_options::value< std::vector<std::string> >(),
110  "Show the copyright (license)");
111 
112  boost::program_options::options_description cmdline_options;
113  cmdline_options.add(generic).add(config).add(hidden);
114 
115  boost::program_options::options_description config_file_options;
116  config_file_options.add(config).add(hidden);
117 
118  boost::program_options::options_description visible ("Allowed options");
119  visible.add(generic).add(config);
120 
121  boost::program_options::positional_options_description p;
122  p.add ("copyright", -1);
123 
124  boost::program_options::variables_map vm;
125  boost::program_options::
126  store (boost::program_options::command_line_parser (argc, argv).
127  options (cmdline_options).positional(p).run(), vm);
128 
129  std::ifstream ifs ("opentrep-indexer.cfg");
130  boost::program_options::store (parse_config_file (ifs, config_file_options),
131  vm);
132  boost::program_options::notify (vm);
133 
134  if (vm.count ("help")) {
135  std::cout << visible << std::endl;
137  }
138 
139  if (vm.count ("version")) {
140  std::cout << PACKAGE_NAME << ", version " << PACKAGE_VERSION << std::endl;
142  }
143 
144  if (vm.count ("prefix")) {
145  std::cout << "Installation prefix: " << PREFIXDIR << std::endl;
147  }
148 
149  if (vm.count ("porfile")) {
150  ioPORFilepath = vm["porfile"].as< std::string >();
151  oStr << "POR file-path is: " << ioPORFilepath << std::endl;
152  }
153 
154  if (vm.count ("deploymentnb")) {
155  ioDeploymentNumber = vm["deploymentnb"].as< unsigned short >();
156  oStr << "Deployment number: " << ioDeploymentNumber << std::endl;
157  }
158 
159  if (vm.count ("xapiandb")) {
160  ioXapianDBFilepath = vm["xapiandb"].as< std::string >();
161  oStr << "Xapian index/database filepath is: " << ioXapianDBFilepath
162  << ioDeploymentNumber << std::endl;
163  }
164 
165  // Parse the SQL database type, if any is given
166  if (vm.count ("sqldbtype")) {
167  ioSQLDBTypeString = vm["sqldbtype"].as< std::string >();
168  oStr << "SQL database type is: " << ioSQLDBTypeString << std::endl;
169  }
170 
182  const OPENTREP::DBType lDBType (ioSQLDBTypeString);
183  if (lDBType == OPENTREP::DBType::NODB) {
184  ioAddPORInDB = false;
185  ioSQLDBConnectionString = "";
186 
187  } else if (lDBType == OPENTREP::DBType::SQLITE3) {
188  ioAddPORInDB = true;
189  ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_SQLITE_DB_FILEPATH;
190 
191  } else if (lDBType == OPENTREP::DBType::MYSQL) {
192  ioAddPORInDB = true;
193  ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_MYSQL_CONN_STRING;
194  }
195 
196  // Set the SQL database connection string, if any is given
197  if (vm.count ("sqldbconx")) {
198  ioSQLDBConnectionString = vm["sqldbconx"].as< std::string >();
199  }
200 
201  // Reporting of the SQL database connection string
202  if (lDBType == OPENTREP::DBType::SQLITE3
203  || lDBType == OPENTREP::DBType::MYSQL) {
204  const std::string& lSQLDBConnString =
206  ioSQLDBConnectionString,
207  ioDeploymentNumber);
208  //
209  oStr << "SQL database connection string is: " << lSQLDBConnString
210  << std::endl;
211  }
212 
213  oStr << "Are non-IATA-referenced POR included? " << ioIncludeNonIATAPOR
214  << std::endl;
215 
216  oStr << "Index the POR in Xapian? " << ioIndexPORInXapian << std::endl;
217 
218  oStr << "Add and re-index the POR in the SQL-based database? " << ioAddPORInDB
219  << std::endl;
220 
221  if (vm.count ("log")) {
222  ioLogFilename = vm["log"].as< std::string >();
223  oStr << "Log filename is: " << ioLogFilename << std::endl;
224  }
225 
226  return 0;
227 }
228 
229 
230 // /////////////// M A I N /////////////////
231 int main (int argc, char* argv[]) {
232 
233  // Output log File
234  std::string lLogFilename;
235 
236  // File-path of POR (points of reference)
237  std::string lPORFilepathStr;
238 
239  // Xapian database name (directory of the index)
240  std::string lXapianDBNameStr;
241 
242  // SQL database type
243  std::string lSQLDBTypeStr;
244 
245  // SQL database connection string
246  std::string lSQLDBConnectionStr;
247 
248  // Deployment number/version
249  OPENTREP::DeploymentNumber_T lDeploymentNumber;
250 
251  // Whether or not to include non-IATA-referenced POR
252  OPENTREP::shouldIndexNonIATAPOR_T lIncludeNonIATAPOR;
253 
254  // Whether or not to index the POR in Xapian
255  OPENTREP::shouldIndexPORInXapian_T lShouldIndexPORInXapian;
256 
257  // Whether or not to insert the POR in the SQL database
258  OPENTREP::shouldAddPORInSQLDB_T lShouldAddPORInSQLDB;
259 
260  // Log stream for the introduction part
261  std::ostringstream oIntroStr;
262 
263  // Call the command-line option parser
264  const int lOptionParserStatus =
265  readConfiguration (argc, argv, lPORFilepathStr, lXapianDBNameStr,
266  lSQLDBTypeStr, lSQLDBConnectionStr, lDeploymentNumber,
267  lIncludeNonIATAPOR, lShouldIndexPORInXapian,
268  lShouldAddPORInSQLDB, lLogFilename, oIntroStr);
269 
270  if (lOptionParserStatus == K_OPENTREP_EARLY_RETURN_STATUS) {
271  return 0;
272  }
273 
274  // Set the log parameters
275  std::ofstream logOutputFile;
276  // open and clean the log outputfile
277  logOutputFile.open (lLogFilename.c_str());
278  logOutputFile.clear();
279 
280  //
281  oIntroStr << "Parsing and indexing the OpenTravelData POR data file (into "
282  << "Xapian and/or SQL databases) may take a few tens of minutes "
283  << "on some architectures (and a few minutes on fastest ones)..."
284  << std::endl;
285  std::cout << oIntroStr.str();
286 
287  // DEBUG
288  // Get the current time in UTC Timezone
289  boost::posix_time::ptime lTimeUTC =
290  boost::posix_time::second_clock::universal_time();
291  logOutputFile << "[" << lTimeUTC << "][" << __FILE__ << "#"
292  << __LINE__ << "]:Parameters:" << std::endl
293  << oIntroStr.str() << std::endl;
294 
295  // Initialise the context
296  const OPENTREP::PORFilePath_T lPORFilepath (lPORFilepathStr);
297  const OPENTREP::TravelDBFilePath_T lXapianDBName (lXapianDBNameStr);
298  const OPENTREP::DBType lDBType (lSQLDBTypeStr);
299  const OPENTREP::SQLDBConnectionString_T lSQLDBConnStr (lSQLDBConnectionStr);
300  OPENTREP::OPENTREP_Service opentrepService (logOutputFile, lPORFilepath,
301  lXapianDBName, lDBType,
302  lSQLDBConnStr, lDeploymentNumber,
303  lIncludeNonIATAPOR,
304  lShouldIndexPORInXapian,
305  lShouldAddPORInSQLDB);
306 
307  // Launch the indexation
308  const OPENTREP::NbOfDBEntries_T lNbOfEntries =
309  opentrepService.insertIntoDBAndXapian();
310 
311  //
312  std::ostringstream oStr;
313  oStr << lNbOfEntries << " entries have been processed" << std::endl;
314  std::cout << oStr.str();
315 
316  // Get the current time in UTC Timezone
317  lTimeUTC = boost::posix_time::second_clock::universal_time();
318  logOutputFile << "[" << lTimeUTC << "][" << __FILE__ << "#"
319  << __LINE__ << "]:" << oStr.str() << std::endl;
320 
321  // Close the Log outputFile
322  logOutputFile.close();
323 
324  return 0;
325 }
Interface for the OPENTREP Services.
NbOfDBEntries_T insertIntoDBAndXapian()
const std::string DEFAULT_OPENTREP_SQLITE_DB_FILEPATH
bool shouldAddPORInSQLDB_T
unsigned int NbOfDBEntries_T
const std::string DEFAULT_OPENTREP_SQL_DB_TYPE
std::string parseAndDisplayConnectionString(const DBType &iDBType, const std::string &iSQLDBConnStr, const DeploymentNumber_T &iDeploymentNumber)
Definition: Utilities.cpp:273
const bool DEFAULT_OPENTREP_INDEX_IN_XAPIAN
const unsigned short DEFAULT_OPENTREP_DEPLOYMENT_NUMBER
bool shouldIndexPORInXapian_T
const std::string DEFAULT_OPENTREP_MYSQL_CONN_STRING
unsigned short DeploymentNumber_T
const std::string DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH
const bool DEFAULT_OPENTREP_ADD_IN_DB
const std::string DEFAULT_OPENTREP_POR_FILEPATH
bool shouldIndexNonIATAPOR_T
const int K_OPENTREP_EARLY_RETURN_STATUS
const bool K_OPENTREP_DEFAULT_POR_INCLUDING
int main(int argc, char *argv[])
int readConfiguration(int argc, char *argv[], std::string &ioPORFilepath, std::string &ioXapianDBFilepath, std::string &ioSQLDBTypeString, std::string &ioSQLDBConnectionString, unsigned short &ioDeploymentNumber, bool &ioIncludeNonIATAPOR, bool &ioIndexPORInXapian, bool &ioAddPORInDB, std::string &ioLogFilename, std::ostringstream &oStr)
std::vector< std::string > WordList_T
const std::string K_OPENTREP_DEFAULT_LOG_FILENAME("opentrep-indexer.log")
Enumeration of database types.
Definition: DBType.hpp:17