blob: fdd0d50027e794f3aa8b58f4e4a9d9fa93a99b6e [file] [log] [blame]
/*
* Copyright (C) 2004, 2005, 2006, 2008, 2009, 2010, 2011 Savoir-Faire Linux Inc.
* Author: Pierre-Luc Bacon <pierre-luc.bacon@savoirfairelinux.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Additional permission under GNU GPL version 3 section 7:
*
* If you modify this program, or any covered work, by linking or
* combining it with the OpenSSL project's OpenSSL library (or a
* modified version of that library), containing parts covered by the
* terms of the OpenSSL or SSLeay licenses, Savoir-Faire Linux Inc.
* grants you additional permission to convey the resulting work.
* Corresponding Source for a non-source form of such a combination
* shall include the source code for the parts of OpenSSL used as well
* as that of the covered work.
*/
#include "pattern.h"
#include <sstream>
#include <cstdio>
namespace sfl {
Pattern::Pattern(const std::string& pattern, const std::string& options) :
pattern_(pattern),
subject_(),
re_(NULL),
ovector_(),
count_(0),
options_(0),
optionsDescription_(options)
{
// Set offsets
offset_[0] = offset_[1] = 0;
for (unsigned int i = 0; i < options.length(); i++) {
switch (options.at(i)) {
case 'i':
options_ |= PCRE_CASELESS;
break;
case 'm':
options_ |= PCRE_MULTILINE;
break;
case 's':
options_ |= PCRE_DOTALL;
break;
case 'x':
options_ |= PCRE_EXTENDED;
break;
}
}
// Compile the pattern.
compile();
}
Pattern::~Pattern()
{
if (re_ != NULL)
pcre_free(re_);
}
void Pattern::compile()
{
// Compile the pattern
int offset;
const char * error;
re_ = pcre_compile(pattern_.c_str(), 0, &error, &offset, NULL);
if (re_ == NULL) {
std::string offsetStr;
std::stringstream ss;
ss << offset;
offsetStr = ss.str();
std::string msg("PCRE compiling failed at offset " + offsetStr);
throw CompileError(msg);
}
// Allocate an appropriate amount
// of memory for the output vector.
int captureCount;
pcre_fullinfo(re_, NULL, PCRE_INFO_CAPTURECOUNT, &captureCount);
ovector_.clear();
ovector_.resize((captureCount + 1) * 3);
}
unsigned int Pattern::getCaptureGroupCount()
{
int captureCount = 0;
pcre_fullinfo(re_, NULL, PCRE_INFO_CAPTURECOUNT, &captureCount);
return captureCount;
}
std::vector<std::string> Pattern::groups()
{
const char ** stringList;
pcre_get_substring_list(subject_.c_str(), &ovector_[0], count_, &stringList);
std::vector<std::string> matchedSubstrings;
for (int i = 1; stringList[i] != NULL; i++)
matchedSubstrings.push_back(stringList[i]);
pcre_free_substring_list(stringList);
return matchedSubstrings;
}
std::string Pattern::group(int groupNumber)
{
const char * stringPtr;
int rc = pcre_get_substring(subject_.substr(offset_[0]).c_str(), &ovector_[0],
count_, groupNumber, &stringPtr);
if (rc < 0) {
switch (rc) {
case PCRE_ERROR_NOSUBSTRING:
throw std::out_of_range("Invalid group reference.");
case PCRE_ERROR_NOMEMORY:
throw MatchError("Memory exhausted.");
default:
throw MatchError("Failed to get named substring.");
}
}
std::string matchedStr(stringPtr);
pcre_free_substring(stringPtr);
return matchedStr;
}
std::string Pattern::group(const std::string& groupName)
{
const char * stringPtr = NULL;
int rc = pcre_get_named_substring(re_, subject_.substr(offset_[0]).c_str(),
&ovector_[0], count_, groupName.c_str(),
&stringPtr);
if (rc < 0) {
switch (rc) {
case PCRE_ERROR_NOSUBSTRING:
break;
case PCRE_ERROR_NOMEMORY:
throw MatchError("Memory exhausted.");
default:
throw MatchError("Failed to get named substring.");
}
}
std::string matchedStr;
if (stringPtr) {
matchedStr = stringPtr;
pcre_free_substring(stringPtr);
}
return matchedStr;
}
void Pattern::start(const std::string& groupName) const
{
int index = pcre_get_stringnumber(re_, groupName.c_str());
start(index);
}
size_t Pattern::start(unsigned int groupNumber) const
{
if (groupNumber <= (unsigned int) count_)
return ovector_[(groupNumber + 1) * 2];
else
throw std::out_of_range("Invalid group reference.");
return 0;
}
size_t Pattern::start() const
{
return ovector_[0] + offset_[0];
}
void Pattern::end(const std::string& groupName) const
{
int index = pcre_get_stringnumber(re_, groupName.c_str());
end(index);
}
size_t Pattern::end(unsigned int groupNumber) const
{
if (groupNumber <= (unsigned int) count_)
return ovector_[((groupNumber + 1) * 2) + 1 ] - 1;
else
throw std::out_of_range("Invalid group reference.");
return 0;
}
size_t Pattern::end() const
{
return (ovector_[1] - 1) + offset_[0];
}
bool Pattern::matches()
{
return matches(subject_);
return true;
}
bool Pattern::matches(const std::string& subject)
{
// Try to find a match for this pattern
int rc = pcre_exec(re_, NULL, subject.substr(offset_[1]).c_str(),
subject.length() - offset_[1], 0, options_, &ovector_[0],
ovector_.size());
// Matching failed.
if (rc < 0) {
offset_[0] = offset_[1] = 0;
return false;
}
// Handle the case if matching should be done globally
if (optionsDescription_.find("g") != std::string::npos) {
offset_[0] = offset_[1];
// New offset is old offset + end of relative offset
offset_[1] = ovector_[1] + offset_[0];
}
// Matching succeded but not enough space.
// @TODO figure out something more clever to do in this case.
if (rc == 0)
throw MatchError("No space to store all substrings.");
// Matching succeeded. Keep the number of substrings for
// subsequent calls to group().
count_ = rc;
return true;
}
std::vector<std::string> Pattern::split()
{
size_t tokenEnd = -1;
size_t tokenStart = 0;
std::vector<std::string> substringSplitted;
while (matches()) {
tokenStart = start();
substringSplitted.push_back(subject_.substr(tokenEnd + 1,
tokenStart - tokenEnd - 1));
tokenEnd = end();
}
substringSplitted.push_back(subject_.substr(tokenEnd + 1,
tokenStart - tokenEnd - 1));
return substringSplitted;
}
}