Tristan Matthews | 0461646 | 2013-11-14 16:09:34 -0500 | [diff] [blame] | 1 | // Copyright (c) 2005, Google Inc. |
| 2 | // All rights reserved. |
| 3 | // |
| 4 | // Redistribution and use in source and binary forms, with or without |
| 5 | // modification, are permitted provided that the following conditions are |
| 6 | // met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright |
| 9 | // notice, this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above |
| 11 | // copyright notice, this list of conditions and the following disclaimer |
| 12 | // in the documentation and/or other materials provided with the |
| 13 | // distribution. |
| 14 | // * Neither the name of Google Inc. nor the names of its |
| 15 | // contributors may be used to endorse or promote products derived from |
| 16 | // this software without specific prior written permission. |
| 17 | // |
| 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | // |
| 30 | // Author: Sanjay Ghemawat |
| 31 | |
| 32 | #ifdef HAVE_CONFIG_H |
| 33 | #include "config.h" |
| 34 | #endif |
| 35 | |
| 36 | #include <vector> |
| 37 | #include <assert.h> |
| 38 | |
| 39 | #include "pcrecpp_internal.h" |
| 40 | #include "pcre_scanner.h" |
| 41 | |
| 42 | using std::vector; |
| 43 | |
| 44 | namespace pcrecpp { |
| 45 | |
| 46 | Scanner::Scanner() |
| 47 | : data_(), |
| 48 | input_(data_), |
| 49 | skip_(NULL), |
| 50 | should_skip_(false), |
| 51 | skip_repeat_(false), |
| 52 | save_comments_(false), |
| 53 | comments_(NULL), |
| 54 | comments_offset_(0) { |
| 55 | } |
| 56 | |
| 57 | Scanner::Scanner(const string& in) |
| 58 | : data_(in), |
| 59 | input_(data_), |
| 60 | skip_(NULL), |
| 61 | should_skip_(false), |
| 62 | skip_repeat_(false), |
| 63 | save_comments_(false), |
| 64 | comments_(NULL), |
| 65 | comments_offset_(0) { |
| 66 | } |
| 67 | |
| 68 | Scanner::~Scanner() { |
| 69 | delete skip_; |
| 70 | delete comments_; |
| 71 | } |
| 72 | |
| 73 | void Scanner::SetSkipExpression(const char* re) { |
| 74 | delete skip_; |
| 75 | if (re != NULL) { |
| 76 | skip_ = new RE(re); |
| 77 | should_skip_ = true; |
| 78 | skip_repeat_ = true; |
| 79 | ConsumeSkip(); |
| 80 | } else { |
| 81 | skip_ = NULL; |
| 82 | should_skip_ = false; |
| 83 | skip_repeat_ = false; |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | void Scanner::Skip(const char* re) { |
| 88 | delete skip_; |
| 89 | if (re != NULL) { |
| 90 | skip_ = new RE(re); |
| 91 | should_skip_ = true; |
| 92 | skip_repeat_ = false; |
| 93 | ConsumeSkip(); |
| 94 | } else { |
| 95 | skip_ = NULL; |
| 96 | should_skip_ = false; |
| 97 | skip_repeat_ = false; |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | void Scanner::DisableSkip() { |
| 102 | assert(skip_ != NULL); |
| 103 | should_skip_ = false; |
| 104 | } |
| 105 | |
| 106 | void Scanner::EnableSkip() { |
| 107 | assert(skip_ != NULL); |
| 108 | should_skip_ = true; |
| 109 | ConsumeSkip(); |
| 110 | } |
| 111 | |
| 112 | int Scanner::LineNumber() const { |
| 113 | // TODO: Make it more efficient by keeping track of the last point |
| 114 | // where we computed line numbers and counting newlines since then. |
| 115 | // We could use std:count, but not all systems have it. :-( |
| 116 | int count = 1; |
| 117 | for (const char* p = data_.data(); p < input_.data(); ++p) |
| 118 | if (*p == '\n') |
| 119 | ++count; |
| 120 | return count; |
| 121 | } |
| 122 | |
| 123 | int Scanner::Offset() const { |
| 124 | return (int)(input_.data() - data_.c_str()); |
| 125 | } |
| 126 | |
| 127 | bool Scanner::LookingAt(const RE& re) const { |
| 128 | int consumed; |
| 129 | return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0); |
| 130 | } |
| 131 | |
| 132 | |
| 133 | bool Scanner::Consume(const RE& re, |
| 134 | const Arg& arg0, |
| 135 | const Arg& arg1, |
| 136 | const Arg& arg2) { |
| 137 | const bool result = re.Consume(&input_, arg0, arg1, arg2); |
| 138 | if (result && should_skip_) ConsumeSkip(); |
| 139 | return result; |
| 140 | } |
| 141 | |
| 142 | // helper function to consume *skip_ and honour save_comments_ |
| 143 | void Scanner::ConsumeSkip() { |
| 144 | const char* start_data = input_.data(); |
| 145 | while (skip_->Consume(&input_)) { |
| 146 | if (!skip_repeat_) { |
| 147 | // Only one skip allowed. |
| 148 | break; |
| 149 | } |
| 150 | } |
| 151 | if (save_comments_) { |
| 152 | if (comments_ == NULL) { |
| 153 | comments_ = new vector<StringPiece>; |
| 154 | } |
| 155 | // already pointing one past end, so no need to +1 |
| 156 | int length = (int)(input_.data() - start_data); |
| 157 | if (length > 0) { |
| 158 | comments_->push_back(StringPiece(start_data, length)); |
| 159 | } |
| 160 | } |
| 161 | } |
| 162 | |
| 163 | |
| 164 | void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) { |
| 165 | // short circuit out if we've not yet initialized comments_ |
| 166 | // (e.g., when save_comments is false) |
| 167 | if (!comments_) { |
| 168 | return; |
| 169 | } |
| 170 | // TODO: if we guarantee that comments_ will contain StringPieces |
| 171 | // that are ordered by their start, then we can do a binary search |
| 172 | // for the first StringPiece at or past start and then scan for the |
| 173 | // ones contained in the range, quit early (use equal_range or |
| 174 | // lower_bound) |
| 175 | for (vector<StringPiece>::const_iterator it = comments_->begin(); |
| 176 | it != comments_->end(); ++it) { |
| 177 | if ((it->data() >= data_.c_str() + start && |
| 178 | it->data() + it->size() <= data_.c_str() + end)) { |
| 179 | ranges->push_back(*it); |
| 180 | } |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | |
| 185 | void Scanner::GetNextComments(vector<StringPiece> *ranges) { |
| 186 | // short circuit out if we've not yet initialized comments_ |
| 187 | // (e.g., when save_comments is false) |
| 188 | if (!comments_) { |
| 189 | return; |
| 190 | } |
| 191 | for (vector<StringPiece>::const_iterator it = |
| 192 | comments_->begin() + comments_offset_; |
| 193 | it != comments_->end(); ++it) { |
| 194 | ranges->push_back(*it); |
| 195 | ++comments_offset_; |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | } // namespace pcrecpp |