| // Copyright (c) 2005, Google Inc. |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| // |
| // Author: Sanjay Ghemawat |
| |
| #ifdef HAVE_CONFIG_H |
| #include "config.h" |
| #endif |
| |
| #include <vector> |
| #include <assert.h> |
| |
| #include "pcrecpp_internal.h" |
| #include "pcre_scanner.h" |
| |
| using std::vector; |
| |
| namespace pcrecpp { |
| |
| Scanner::Scanner() |
| : data_(), |
| input_(data_), |
| skip_(NULL), |
| should_skip_(false), |
| skip_repeat_(false), |
| save_comments_(false), |
| comments_(NULL), |
| comments_offset_(0) { |
| } |
| |
| Scanner::Scanner(const string& in) |
| : data_(in), |
| input_(data_), |
| skip_(NULL), |
| should_skip_(false), |
| skip_repeat_(false), |
| save_comments_(false), |
| comments_(NULL), |
| comments_offset_(0) { |
| } |
| |
| Scanner::~Scanner() { |
| delete skip_; |
| delete comments_; |
| } |
| |
| void Scanner::SetSkipExpression(const char* re) { |
| delete skip_; |
| if (re != NULL) { |
| skip_ = new RE(re); |
| should_skip_ = true; |
| skip_repeat_ = true; |
| ConsumeSkip(); |
| } else { |
| skip_ = NULL; |
| should_skip_ = false; |
| skip_repeat_ = false; |
| } |
| } |
| |
| void Scanner::Skip(const char* re) { |
| delete skip_; |
| if (re != NULL) { |
| skip_ = new RE(re); |
| should_skip_ = true; |
| skip_repeat_ = false; |
| ConsumeSkip(); |
| } else { |
| skip_ = NULL; |
| should_skip_ = false; |
| skip_repeat_ = false; |
| } |
| } |
| |
| void Scanner::DisableSkip() { |
| assert(skip_ != NULL); |
| should_skip_ = false; |
| } |
| |
| void Scanner::EnableSkip() { |
| assert(skip_ != NULL); |
| should_skip_ = true; |
| ConsumeSkip(); |
| } |
| |
| int Scanner::LineNumber() const { |
| // TODO: Make it more efficient by keeping track of the last point |
| // where we computed line numbers and counting newlines since then. |
| // We could use std:count, but not all systems have it. :-( |
| int count = 1; |
| for (const char* p = data_.data(); p < input_.data(); ++p) |
| if (*p == '\n') |
| ++count; |
| return count; |
| } |
| |
| int Scanner::Offset() const { |
| return (int)(input_.data() - data_.c_str()); |
| } |
| |
| bool Scanner::LookingAt(const RE& re) const { |
| int consumed; |
| return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0); |
| } |
| |
| |
| bool Scanner::Consume(const RE& re, |
| const Arg& arg0, |
| const Arg& arg1, |
| const Arg& arg2) { |
| const bool result = re.Consume(&input_, arg0, arg1, arg2); |
| if (result && should_skip_) ConsumeSkip(); |
| return result; |
| } |
| |
| // helper function to consume *skip_ and honour save_comments_ |
| void Scanner::ConsumeSkip() { |
| const char* start_data = input_.data(); |
| while (skip_->Consume(&input_)) { |
| if (!skip_repeat_) { |
| // Only one skip allowed. |
| break; |
| } |
| } |
| if (save_comments_) { |
| if (comments_ == NULL) { |
| comments_ = new vector<StringPiece>; |
| } |
| // already pointing one past end, so no need to +1 |
| int length = (int)(input_.data() - start_data); |
| if (length > 0) { |
| comments_->push_back(StringPiece(start_data, length)); |
| } |
| } |
| } |
| |
| |
| void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) { |
| // short circuit out if we've not yet initialized comments_ |
| // (e.g., when save_comments is false) |
| if (!comments_) { |
| return; |
| } |
| // TODO: if we guarantee that comments_ will contain StringPieces |
| // that are ordered by their start, then we can do a binary search |
| // for the first StringPiece at or past start and then scan for the |
| // ones contained in the range, quit early (use equal_range or |
| // lower_bound) |
| for (vector<StringPiece>::const_iterator it = comments_->begin(); |
| it != comments_->end(); ++it) { |
| if ((it->data() >= data_.c_str() + start && |
| it->data() + it->size() <= data_.c_str() + end)) { |
| ranges->push_back(*it); |
| } |
| } |
| } |
| |
| |
| void Scanner::GetNextComments(vector<StringPiece> *ranges) { |
| // short circuit out if we've not yet initialized comments_ |
| // (e.g., when save_comments is false) |
| if (!comments_) { |
| return; |
| } |
| for (vector<StringPiece>::const_iterator it = |
| comments_->begin() + comments_offset_; |
| it != comments_->end(); ++it) { |
| ranges->push_back(*it); |
| ++comments_offset_; |
| } |
| } |
| |
| } // namespace pcrecpp |