Commit dde31901 authored by Cocophotos's avatar Cocophotos

Merge branch 'release/1.0'

parents 802bf0d6 291de40c
......@@ -2,3 +2,4 @@
*.user
*.swp
*.o
test/special_tests/*
cmake_minimum_required (VERSION 2.8)
project (OGRE)
option (WITH_DEPXML OFF)
option (WITH_TESTING OFF)
if (NOT CMAKE_BUILD_TYPE)
message(STATUS "No build type selected, default to Release")
set(CMAKE_BUILD_TYPE "Release")
endif()
set(BOOST_COMPONENTS_NEEDED program_options filesystem system)
file(
GLOB_RECURSE
SRC_FILES
src/*.cpp
src/*.h
src/*.hpp
)
# The following verifyies that BOOST_ROOT is set properly.
if(NOT BOOST_ROOT AND NOT $ENV{BOOST_ROOT} STREQUAL "")
FILE( TO_CMAKE_PATH $ENV{BOOST_ROOT} BOOST_ROOT )
if( NOT EXISTS ${BOOST_ROOT} )
MESSAGE( STATUS ${BOOST_ROOT} " does not exist. Checking if BOOST_ROOT was a quoted string.." )
STRING( REPLACE "\"" "" BOOST_ROOT ${BOOST_ROOT} )
if( EXISTS ${BOOST_ROOT} )
MESSAGE( STATUS "After removing the quotes " ${BOOST_ROOT} " was now found by CMake" )
endif( EXISTS ${BOOST_ROOT})
endif( NOT EXISTS ${BOOST_ROOT} )
# Save the BOOST_ROOT in the cache
if( NOT EXISTS ${BOOST_ROOT} )
MESSAGE( WARNING ${BOOST_ROOT} " does not exist." )
else(NOT EXISTS ${BOOST_ROOT})
SET (BOOST_ROOT ${BOOST_ROOT} CACHE STRING "Set the value of BOOST_ROOT to point to the root folder of your boost install." FORCE)
endif( NOT EXISTS ${BOOST_ROOT} )
endif(NOT BOOST_ROOT AND NOT $ENV{BOOST_ROOT} STREQUAL "")
if( WIN32 AND NOT BOOST_ROOT )
MESSAGE( WARNING "Please set the BOOST_ROOT environment variable." )
endif( WIN32 AND NOT BOOST_ROOT )
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
message(STATUS "Build with BOOST Debug support")
set(Boost_DEBUG ON)
else()
set(Boost_DEBUG OFF)
endif()
set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME OFF)
FIND_PACKAGE(Boost 1.46.0 COMPONENTS ${BOOST_COMPONENTS_NEEDED})
if(Boost_FOUND)
MESSAGE( STATUS "Setting up boost." )
if(WITH_DEPXML)
if(WITH_DEPXML STREQUAL "ON")
add_definitions( -DUSE_DEPXML=1 )
endif()
endif(WITH_DEPXML)
include_directories(${Boost_INCLUDE_DIRS})
include_directories(${LinguisticGraph_INCLUDE_DIRS})
include_directories(${OGREComponents_INCLUDE_DIRS})
add_executable(${PROJECT_NAME} ${SRC_FILES})
target_link_libraries(${PROJECT_NAME} ${Boost_LIBRARIES} OGREComponents LinguisticGraph)
endif(Boost_FOUND)
##########################
if(WITH_TESTING)
if(WITH_TESTING STREQUAL "ON")
#add_subdirectory (test)
#include (CTest)
#set(BOOST_TESTS
#
#)
#foreach(_test ${BOOST_TESTS})
# add_test(
# NAME
# ${_test}
# COMMAND
# Test --run_test=${_test}
# )
#endforeach()
endif()
endif(WITH_TESTING)
#include <exception>
#include "ogre.h"
int main(int argc, char *argv[])
{
OGRE *ogre = new OGRE();
if( ogre->init(argc, argv) )
ogre->run();
else
{
std::cerr << "Initialisation failed, abort." << std::endl;
delete ogre;
return -1;
}
delete ogre;
return 0;
}
#include "ogre.h"
#include "utils/ogre_utility.h"
#include <linguisticgraph/format/depxmlparser.h>
#include <fstream>
#include <ctime>
OGRE::OGRE(){}
OGRE::~OGRE()
{
delete m_compiler;
delete m_rewriter;
delete ogreOptions;
m_sentences.clear();
}
bool OGRE::init( int argc, char* argv[] )
{
try{
ogreOptions = new CLIOptions(argc, argv);
if (ogreOptions->exists("help")) {
ogreOptions->help();
return false;
}
if( ogreOptions->exists("verbose") )
verbose = INFO;
else
verbose = define_verbosity(ogreOptions->value<string>("log"));
no_rewriting = ogreOptions->exists("no-rewriting");
return true;
}catch(std::exception &e){
std::cerr << e.what() << std::endl;
return false;
}
}
void OGRE::run()
{
try{
load_and_parse( ogreOptions->value<string>("rules") );
if( ogreOptions->value<string>("output") == "" && verbose == INFO)
std::cerr << "Outputting on STDOUT" << std::endl;
if( ogreOptions->value<string>("treebank") == "" ){ //Read on STDIN
if( verbose >= INFO )
std::cerr << "Reading on STDIN" << std::endl;
load_sentences();
}else{
load_sentences( find_sentences( ogreOptions->value<string>("treebank") ) );
}
rewrite( ogreOptions->value<string>("output") );
}catch(IOError &e){
std::cerr << e.what() << std::endl;
}catch(runtime_error &e){
std::cerr << e.what() << std::endl;
}
}
vector<string> OGRE::find_sentences(string path)
{
std::vector<std::string> files;
bool ok = OGREStreamDispatcher::dispatch(path, files);
if(!ok){
exit(-1);
}
return files;
}
void OGRE::load_and_parse(string path)
{
ifstream in(path.c_str(), ios_base::in);
if (!in)
throw IOError("unable to open file");
string storage; // We will read the contents here.
in.unsetf(ios::skipws); // No white space skipping!
copy(istream_iterator<char>(in),
istream_iterator<char>(),
back_inserter(storage));
if(verbose == INFO){
std::cerr << "-------------------------------" << std::endl;
std::cerr << "Rules compilation in progress... please wait!" << std::endl;
}
m_compiler = new ogrecomponents::Compiler(storage, ogrecomponents::Compiler::Text, verbose);
if(!m_compiler->compile())
{
throw std::runtime_error("Rules compilation failed");
}
else
{
if(verbose == DEBUG){
std::cerr << "Compilation successful!" << std::endl;
std::cerr << "-------------------------------" << std::endl;
}
}
}
bool OGRE::makeFileParser(const string& type, const string& content, LinguisticGraph *graph)
{
if(type == "conll"){
return CoNLL::open(graph, content);
}
#ifdef USE_DEPXML
else if(type == "depxml")
{
boost::shared_ptr<DepXMLParser> depxml(new DepXMLParser(graph, false));
return depxml->read(content);
}
#endif
else{
std::cerr << "Type " + type + " cannot be recognised as valid." << std::endl;
return false;
}
}
void OGRE::load_sentences(vector<string> paths)
{
for(unsigned i = 0; i < paths.size(); i++)
{
string path = paths[i];
string content;
fs::path p(path);
ifstream file( p.c_str() );
if( file.is_open() )
{
while( file.good() )
{
string line;
getline( file, line );
content.append(line+"\n");
}
LinguisticGraph *g = new LinguisticGraph;
if(!makeFileParser(ogreOptions->value<string>("type"), content, g))
{
std::cerr << "Unable to create graph for file '" << p.c_str() << "'" << std::endl;
std::cerr << "Skip file..." << std::endl;
file.close();
continue;
}
m_sentences.push_back(g);
file.close();
}
else
{
std::cerr << "File " + path + " cannot be opened." << std::endl;
std::cerr << "Abort..." << std::endl;
break;
}
}
}
void OGRE::load_sentences()
{
string content;
string line;
while (getline(cin, line))
{
if( (line == "" || line == "\n") && !content.empty() )
{
LinguisticGraph *g = new LinguisticGraph;
if( !makeFileParser(ogreOptions->value<string>("type"), content, g) )
{
std::cerr << "Unable to create graph" << std::endl;
std::cerr << "Abort..." << std::endl;
return;
}
m_sentences.push_back(g);
content = "";
}
else
content.append(line+"\n");
}
if( !content.empty() )
{
LinguisticGraph *g = new LinguisticGraph;
if( !makeFileParser(ogreOptions->value<string>("type"), content, g) )
{
std::cerr << "Unable to create graph" << std::endl;
std::cerr << "Abort..." << std::endl;
return;
}
m_sentences.push_back(g);
}
}
void OGRE::rewrite(string outputDir)
{
if(verbose == INFO)
{
//std::cerr << m_ast.size() << " rule(s) loaded" << std::endl;
ogrecomponents::Compiler::RulesConstIter rit, reit;
boost::tie(rit, reit) = m_compiler->rules();
std::cerr << std::distance(rit, reit) << " rule(s) to apply" << std::endl;
std::cerr << m_sentences.size() << " sentence(s) to rewrite" << std::endl;
}
m_rewriter = new ogrecomponents::Rewriter(verbose);
for(vector<LinguisticGraph*>::const_iterator it = m_sentences.begin(); it != m_sentences.end(); ++it)
{
LinguisticGraph *s = *it;
bool rewriting = false; //Keep a track: Have local rewriting rules been applied?
LinguisticGraph *result = new LinguisticGraph(*s);
if(!no_rewriting){
if(verbose >= INFO){
LinguisticGraph::const_ordered_node_iter it = s->constOrderedNodes().first;
it++;
std::string sentid = s->nodeProperties(it->second).feature("sentid");
std::cerr << "Rewriting sentence " << sentid << std::endl;
}
rewriting = m_rewriter->rewrite(s, result, m_compiler);
}
/*
* Output part of the algorithm
*/
if(no_rewriting){
if(verbose >= DEBUG)
std::cerr << "Producing the graph" << std::endl;
//Save the new file or output on stdout
if( !saveNewGraph(result, outputDir) ){
std::cerr << "Unable to produce the graph...";
}
}else{
if(!no_rewriting && (rewriting || ogreOptions->exists("force-output")))
{
if(verbose >= DEBUG)
std::cerr << "Producing the graph" << std::endl;
//Save the new file or output on stdout
if( !saveNewGraph(result, outputDir) ){
std::cerr << "Unable to produce the graph...";
}
}
}
delete result;
result = NULL;
} //End for on sentences
}
bool OGRE::saveNewGraph(LinguisticGraph *result, string outputDir)
{
std::string out = "";
if(outputDir.empty())
{
CoNLL::save(result, out);
std::cout << out << std::endl;
return true;
}else{
std::cerr << "output to file is not implemented yet" << std::endl;
/*string filepath = "";
if( !result->getFilepath().empty() )
{
fs::path path( result->getFilepath() );
filepath = outputDir+"/"+path.stem().c_str()+".mod"+path.extension().c_str();
}
else
{
string input_type = ogreOptions->value<string>("type");
string filename = lexical_cast<string>( time(NULL) );
filepath = outputDir+"/"+filename+"."+input_type;
}
fs::path path(filepath);
ofstream file( path.c_str() );
if( file.is_open() )
{
file << CoNLL::save(result, out);
file.close();
return true;
}else
return false; */
return false;
}
}
#ifndef OGRE_H
#define OGRE_H
#include <vector>
#include <algorithm>
#include <functional>
#include <linguisticgraph/format/conll.h>
#include <ogrecomponents/compiler.h>
#include <ogrecomponents/rewriter.h>
#include <ogrecomponents/rule.h>
#include <ogrecomponents/utils.h>
#include "utils/ogrestream.h"
#include "utils/clioptions.h"
using namespace std;
class OGRE
{
public:
OGRE();
~OGRE();
bool init( int argc, char* argv[] );
void run();
void load_and_parse(string path);
vector<string> find_sentences(string path);
void load_sentences(vector<string> paths);
void load_sentences();
void rewrite(string outputDir);
private:
bool makeFileParser(const string &type, const string &content, LinguisticGraph *graph);
bool saveNewGraph(LinguisticGraph *result, string outputDir);
private:
vector<LinguisticGraph*> m_sentences;
CLIOptions *ogreOptions;
ogrecomponents::Compiler *m_compiler;
ogrecomponents::Rewriter *m_rewriter;
Verbosity verbose;
bool no_rewriting;
};
#endif // OGRE_H
#include "clioptions.h"
CLIOptions::CLIOptions(int argc, char *argv[])
{
addGeneralOptions();
po::store( po::parse_command_line(argc, argv, options), vm );
po::notify(vm);
}
void CLIOptions::addGeneralOptions()
{
#ifdef USE_DEPXML
const char* type_of_input = "Type of input (currently conll or depxml)";
#else
const char* type_of_input = "Type of input (currently conll only)";
#endif
po::options_description desc("General");
desc.add_options()
("help,h", "Help message")
("rules,r", po::value<string>(), "File with rewriting rules")
("treebank,t", po::value<string>()->default_value(""), "File or directory with the treebank to rewrite (read on STDIN if not provided)")
("type,f", po::value<string>()->default_value("conll", "conll"), type_of_input )
("log", po::value<string>()->default_value("none", "none"), "Set verbosity (currently 'none' for no output, 'info' for light information or 'debug' for large information)")
("verbose,v", "Set verbose mode")
("output,o", po::value<string>()->default_value(""), "Output directory (output on STDOUT if not provided)")
("force-output,p", "Force an output, even though nothing has been rewritten")
("no-rewriting,n", "Print the sentences where rules can be applied only.")
;
options.add( desc );
}
#ifndef CLIOPTIONS_H
#define CLIOPTIONS_H
#include <boost/program_options.hpp>
#include <iostream>
namespace po = boost::program_options;
using namespace std;
class CLIOptions
{
public:
CLIOptions(int argc, char* argv[]);
template<class T> const T& value( const string& opt ) const;
bool exists( const string& opt) const;
void help() const;
private:
void addGeneralOptions();
private:
po::options_description options;
po::variables_map vm;
};
inline bool CLIOptions::exists(const string &opt) const
{
return vm.count( opt );
}
template<class T> inline const T& CLIOptions::value( const string& opt ) const
{
if( !exists(opt) )
{
std::cerr << opt << " is not specified" << std::endl;
std::cerr << "Please see --help for more details." << std::endl;
throw runtime_error("command line error");
}
return vm[opt].as<T>();
}
inline void CLIOptions::help()const
{
std::cerr << options << std::endl;
}
#endif // CLIOPTIONS_H
#ifndef OGRE_UTILITY_H
#define OGRE_UTILITY_H
#include <string>
#include <iterator>
#include <exception>
#include <boost/tokenizer.hpp>
using namespace std;
//////////////////////
// Functions
//////////////////////
static vector<string> split( const std::string & Msg, const std::string & Separators )
{
typedef boost::tokenizer<boost::char_separator<char> > my_tok;
boost::char_separator<char> sep( Separators.c_str() );
my_tok tok( Msg, sep );
vector<string> vec;
vec.assign(tok.begin(), tok.end());
return vec;
}
template<typename T>
static vector<T> toVector( unordered_set<T>& uset)
{
vector<T> out;
std::copy( uset.begin(), uset.end(), std::inserter( out, out.end() ) );
return out;
}
#endif // OGRE_UTILITY_H
#include "ogrestream.h"
bool OGREStreamDispatcher::dispatch(std::string &path, std::vector<std::string> &files)
{
fs::path p(path);
OGREStream *stream = NULL;
if( fs::exists(p) )
{
if( fs::is_directory(p) )
{
stream = new OGREStreamPath;
bool ok = stream->find(path);
if(ok){
std::vector<std::string> stream_files = stream->files();
files.swap(stream_files);
}
return ok;
}
else if( fs::is_regular_file(p) )
{
stream = new OGREStreamFile;
bool ok = stream->find(path);
if(ok)
{
std::vector<std::string> stream_files = stream->files();
files.swap(stream_files);
}
return ok;
}
else
{