Blame - jni/libpcre/sources/pcrecpp.cc - jami-client-android

blob: eed425dce9a2b570035a6463a804f2cc2e7b8a47 [file] [log] [blame]

Tristan Matthews	0461646	2013-11-14 16:09:34 -0500	[diff] [blame]	1	// Copyright (c) 2010, Google Inc.
				2	// All rights reserved.
				3	//
				4	// Redistribution and use in source and binary forms, with or without
				5	// modification, are permitted provided that the following conditions are
				6	// met:
				7	//
				8	// * Redistributions of source code must retain the above copyright
				9	// notice, this list of conditions and the following disclaimer.
				10	// * Redistributions in binary form must reproduce the above
				11	// copyright notice, this list of conditions and the following disclaimer
				12	// in the documentation and/or other materials provided with the
				13	// distribution.
				14	// * Neither the name of Google Inc. nor the names of its
				15	// contributors may be used to endorse or promote products derived from
				16	// this software without specific prior written permission.
				17	//
				18	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				19	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				20	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				21	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				22	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				23	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				24	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				25	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				26	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				27	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				28	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				29	//
				30	// Author: Sanjay Ghemawat
				31
				32	#ifdef HAVE_CONFIG_H
				33	#include "config.h"
				34	#endif
				35
				36	#include <stdlib.h>
				37	#include <stdio.h>
				38	#include <ctype.h>
				39	#include <limits.h> /* for SHRT_MIN, USHRT_MAX, etc */
				40	#include <string.h> /* for memcpy */
				41	#include <assert.h>
				42	#include <errno.h>
				43	#include <string>
				44	#include <algorithm>
				45
				46	#include "pcrecpp_internal.h"
				47	#include "pcre.h"
				48	#include "pcrecpp.h"
				49	#include "pcre_stringpiece.h"
				50
				51
				52	namespace pcrecpp {
				53
				54	// Maximum number of args we can set
				55	static const int kMaxArgs = 16;
				56	static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace
				57
				58	// Special object that stands-in for no argument
				59	Arg RE::no_arg((void*)NULL);
				60
				61	// This is for ABI compatibility with old versions of pcre (pre-7.6),
				62	// which defined a global no_arg variable instead of putting it in the
				63	// RE class. This works on GCC >= 3, at least. It definitely works
				64	// for ELF, but may not for other object formats (Mach-O, for
				65	// instance, does not support aliases.) We could probably have a more
				66	// inclusive test if we ever needed it. (Note that not only the
				67	// __attribute__ syntax, but also __USER_LABEL_PREFIX__, are
				68	// gnu-specific.)
				69	#if defined(__GNUC__) && __GNUC__ >= 3 && defined(__ELF__)
				70	# define ULP_AS_STRING(x) ULP_AS_STRING_INTERNAL(x)
				71	# define ULP_AS_STRING_INTERNAL(x) #x
				72	# define USER_LABEL_PREFIX_STR ULP_AS_STRING(__USER_LABEL_PREFIX__)
				73	extern Arg no_arg
				74	__attribute__((alias(USER_LABEL_PREFIX_STR "_ZN7pcrecpp2RE6no_argE")));
				75	#endif
				76
				77	// If a regular expression has no error, its error_ field points here
				78	static const string empty_string;
				79
				80	// If the user doesn't ask for any options, we just use this one
				81	static RE_Options default_options;
				82
				83	void RE::Init(const string& pat, const RE_Options* options) {
				84	pattern_ = pat;
				85	if (options == NULL) {
				86	options_ = default_options;
				87	} else {
				88	options_ = *options;
				89	}
				90	error_ = &empty_string;
				91	re_full_ = NULL;
				92	re_partial_ = NULL;
				93
				94	re_partial_ = Compile(UNANCHORED);
				95	if (re_partial_ != NULL) {
				96	re_full_ = Compile(ANCHOR_BOTH);
				97	}
				98	}
				99
				100	void RE::Cleanup() {
				101	if (re_full_ != NULL) (*pcre_free)(re_full_);
				102	if (re_partial_ != NULL) (*pcre_free)(re_partial_);
				103	if (error_ != &empty_string) delete error_;
				104	}
				105
				106
				107	RE::~RE() {
				108	Cleanup();
				109	}
				110
				111
				112	pcre* RE::Compile(Anchor anchor) {
				113	// First, convert RE_Options into pcre options
				114	int pcre_options = 0;
				115	pcre_options = options_.all_options();
				116
				117	// Special treatment for anchoring. This is needed because at
				118	// runtime pcre only provides an option for anchoring at the
				119	// beginning of a string (unless you use offset).
				120	//
				121	// There are three types of anchoring we want:
				122	// UNANCHORED Compile the original pattern, and use
				123	// a pcre unanchored match.
				124	// ANCHOR_START Compile the original pattern, and use
				125	// a pcre anchored match.
				126	// ANCHOR_BOTH Tack a "\z" to the end of the original pattern
				127	// and use a pcre anchored match.
				128
				129	const char* compile_error;
				130	int eoffset;
				131	pcre* re;
				132	if (anchor != ANCHOR_BOTH) {
				133	re = pcre_compile(pattern_.c_str(), pcre_options,
				134	&compile_error, &eoffset, NULL);
				135	} else {
				136	// Tack a '\z' at the end of RE. Parenthesize it first so that
				137	// the '\z' applies to all top-level alternatives in the regexp.
				138	string wrapped = "(?:"; // A non-counting grouping operator
				139	wrapped += pattern_;
				140	wrapped += ")\\z";
				141	re = pcre_compile(wrapped.c_str(), pcre_options,
				142	&compile_error, &eoffset, NULL);
				143	}
				144	if (re == NULL) {
				145	if (error_ == &empty_string) error_ = new string(compile_error);
				146	}
				147	return re;
				148	}
				149
				150	/*** Matching interfaces ***/
				151
				152	bool RE::FullMatch(const StringPiece& text,
				153	const Arg& ptr1,
				154	const Arg& ptr2,
				155	const Arg& ptr3,
				156	const Arg& ptr4,
				157	const Arg& ptr5,
				158	const Arg& ptr6,
				159	const Arg& ptr7,
				160	const Arg& ptr8,
				161	const Arg& ptr9,
				162	const Arg& ptr10,
				163	const Arg& ptr11,
				164	const Arg& ptr12,
				165	const Arg& ptr13,
				166	const Arg& ptr14,
				167	const Arg& ptr15,
				168	const Arg& ptr16) const {
				169	const Arg* args[kMaxArgs];
				170	int n = 0;
				171	if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
				172	if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
				173	if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
				174	if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
				175	if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
				176	if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
				177	if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
				178	if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
				179	if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
				180	if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
				181	if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
				182	if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
				183	if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
				184	if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
				185	if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
				186	if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
				187	done:
				188
				189	int consumed;
				190	int vec[kVecSize];
				191	return DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
				192	}
				193
				194	bool RE::PartialMatch(const StringPiece& text,
				195	const Arg& ptr1,
				196	const Arg& ptr2,
				197	const Arg& ptr3,
				198	const Arg& ptr4,
				199	const Arg& ptr5,
				200	const Arg& ptr6,
				201	const Arg& ptr7,
				202	const Arg& ptr8,
				203	const Arg& ptr9,
				204	const Arg& ptr10,
				205	const Arg& ptr11,
				206	const Arg& ptr12,
				207	const Arg& ptr13,
				208	const Arg& ptr14,
				209	const Arg& ptr15,
				210	const Arg& ptr16) const {
				211	const Arg* args[kMaxArgs];
				212	int n = 0;
				213	if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
				214	if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
				215	if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
				216	if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
				217	if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
				218	if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
				219	if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
				220	if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
				221	if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
				222	if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
				223	if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
				224	if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
				225	if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
				226	if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
				227	if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
				228	if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
				229	done:
				230
				231	int consumed;
				232	int vec[kVecSize];
				233	return DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
				234	}
				235
				236	bool RE::Consume(StringPiece* input,
				237	const Arg& ptr1,
				238	const Arg& ptr2,
				239	const Arg& ptr3,
				240	const Arg& ptr4,
				241	const Arg& ptr5,
				242	const Arg& ptr6,
				243	const Arg& ptr7,
				244	const Arg& ptr8,
				245	const Arg& ptr9,
				246	const Arg& ptr10,
				247	const Arg& ptr11,
				248	const Arg& ptr12,
				249	const Arg& ptr13,
				250	const Arg& ptr14,
				251	const Arg& ptr15,
				252	const Arg& ptr16) const {
				253	const Arg* args[kMaxArgs];
				254	int n = 0;
				255	if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
				256	if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
				257	if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
				258	if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
				259	if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
				260	if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
				261	if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
				262	if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
				263	if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
				264	if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
				265	if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
				266	if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
				267	if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
				268	if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
				269	if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
				270	if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
				271	done:
				272
				273	int consumed;
				274	int vec[kVecSize];
				275	if (DoMatchImpl(*input, ANCHOR_START, &consumed,
				276	args, n, vec, kVecSize)) {
				277	input->remove_prefix(consumed);
				278	return true;
				279	} else {
				280	return false;
				281	}
				282	}
				283
				284	bool RE::FindAndConsume(StringPiece* input,
				285	const Arg& ptr1,
				286	const Arg& ptr2,
				287	const Arg& ptr3,
				288	const Arg& ptr4,
				289	const Arg& ptr5,
				290	const Arg& ptr6,
				291	const Arg& ptr7,
				292	const Arg& ptr8,
				293	const Arg& ptr9,
				294	const Arg& ptr10,
				295	const Arg& ptr11,
				296	const Arg& ptr12,
				297	const Arg& ptr13,
				298	const Arg& ptr14,
				299	const Arg& ptr15,
				300	const Arg& ptr16) const {
				301	const Arg* args[kMaxArgs];
				302	int n = 0;
				303	if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
				304	if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
				305	if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
				306	if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
				307	if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
				308	if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
				309	if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
				310	if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
				311	if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
				312	if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
				313	if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
				314	if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
				315	if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
				316	if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
				317	if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
				318	if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
				319	done:
				320
				321	int consumed;
				322	int vec[kVecSize];
				323	if (DoMatchImpl(*input, UNANCHORED, &consumed,
				324	args, n, vec, kVecSize)) {
				325	input->remove_prefix(consumed);
				326	return true;
				327	} else {
				328	return false;
				329	}
				330	}
				331
				332	bool RE::Replace(const StringPiece& rewrite,
				333	string *str) const {
				334	int vec[kVecSize];
				335	int matches = TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize);
				336	if (matches == 0)
				337	return false;
				338
				339	string s;
				340	if (!Rewrite(&s, rewrite, *str, vec, matches))
				341	return false;
				342
				343	assert(vec[0] >= 0);
				344	assert(vec[1] >= 0);
				345	str->replace(vec[0], vec[1] - vec[0], s);
				346	return true;
				347	}
				348
				349	// Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.
				350	// Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR \| P_N_LF.
				351	// Modified by PH to add PCRE_NEWLINE_ANY and PCRE_NEWLINE_ANYCRLF.
				352
				353	static int NewlineMode(int pcre_options) {
				354	// TODO: if we can make it threadsafe, cache this var
				355	int newline_mode = 0;
				356	/* if (newline_mode) return newline_mode; */ // do this once it's cached
				357	if (pcre_options & (PCRE_NEWLINE_CRLF\|PCRE_NEWLINE_CR\|PCRE_NEWLINE_LF\|
				358	PCRE_NEWLINE_ANY\|PCRE_NEWLINE_ANYCRLF)) {
				359	newline_mode = (pcre_options &
				360	(PCRE_NEWLINE_CRLF\|PCRE_NEWLINE_CR\|PCRE_NEWLINE_LF\|
				361	PCRE_NEWLINE_ANY\|PCRE_NEWLINE_ANYCRLF));
				362	} else {
				363	int newline;
				364	pcre_config(PCRE_CONFIG_NEWLINE, &newline);
				365	if (newline == 10)
				366	newline_mode = PCRE_NEWLINE_LF;
				367	else if (newline == 13)
				368	newline_mode = PCRE_NEWLINE_CR;
				369	else if (newline == 3338)
				370	newline_mode = PCRE_NEWLINE_CRLF;
				371	else if (newline == -1)
				372	newline_mode = PCRE_NEWLINE_ANY;
				373	else if (newline == -2)
				374	newline_mode = PCRE_NEWLINE_ANYCRLF;
				375	else
				376	assert(NULL == "Unexpected return value from pcre_config(NEWLINE)");
				377	}
				378	return newline_mode;
				379	}
				380
				381	int RE::GlobalReplace(const StringPiece& rewrite,
				382	string *str) const {
				383	int count = 0;
				384	int vec[kVecSize];
				385	string out;
				386	int start = 0;
				387	int lastend = -1;
				388	bool last_match_was_empty_string = false;
				389
				390	while (start <= static_cast<int>(str->length())) {
				391	// If the previous match was for the empty string, we shouldn't
				392	// just match again: we'll match in the same way and get an
				393	// infinite loop. Instead, we do the match in a special way:
				394	// anchored -- to force another try at the same position --
				395	// and with a flag saying that this time, ignore empty matches.
				396	// If this special match returns, that means there's a non-empty
				397	// match at this position as well, and we can continue. If not,
				398	// we do what perl does, and just advance by one.
				399	// Notice that perl prints '@@@' for this;
				400	// perl -le '$_ = "aa"; s/b*\|aa/@/g; print'
				401	int matches;
				402	if (last_match_was_empty_string) {
				403	matches = TryMatch(*str, start, ANCHOR_START, false, vec, kVecSize);
				404	if (matches <= 0) {
				405	int matchend = start + 1; // advance one character.
				406	// If the current char is CR and we're in CRLF mode, skip LF too.
				407	// Note it's better to call pcre_fullinfo() than to examine
				408	// all_options(), since options_ could have changed bewteen
				409	// compile-time and now, but this is simpler and safe enough.
				410	// Modified by PH to add ANY and ANYCRLF.
				411	if (matchend < static_cast<int>(str->length()) &&
				412	(str)[start] == '\r' && (str)[matchend] == '\n' &&
				413	(NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF \|\|
				414	NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY \|\|
				415	NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)) {
				416	matchend++;
				417	}
				418	// We also need to advance more than one char if we're in utf8 mode.
				419	#ifdef SUPPORT_UTF8
				420	if (options_.utf8()) {
				421	while (matchend < static_cast<int>(str->length()) &&
				422	((*str)[matchend] & 0xc0) == 0x80)
				423	matchend++;
				424	}
				425	#endif
				426	if (start < static_cast<int>(str->length()))
				427	out.append(*str, start, matchend - start);
				428	start = matchend;
				429	last_match_was_empty_string = false;
				430	continue;
				431	}
				432	} else {
				433	matches = TryMatch(*str, start, UNANCHORED, true, vec, kVecSize);
				434	if (matches <= 0)
				435	break;
				436	}
				437	int matchstart = vec[0], matchend = vec[1];
				438	assert(matchstart >= start);
				439	assert(matchend >= matchstart);
				440	out.append(*str, start, matchstart - start);
				441	Rewrite(&out, rewrite, *str, vec, matches);
				442	start = matchend;
				443	lastend = matchend;
				444	count++;
				445	last_match_was_empty_string = (matchstart == matchend);
				446	}
				447
				448	if (count == 0)
				449	return 0;
				450
				451	if (start < static_cast<int>(str->length()))
				452	out.append(*str, start, str->length() - start);
				453	swap(out, *str);
				454	return count;
				455	}
				456
				457	bool RE::Extract(const StringPiece& rewrite,
				458	const StringPiece& text,
				459	string *out) const {
				460	int vec[kVecSize];
				461	int matches = TryMatch(text, 0, UNANCHORED, true, vec, kVecSize);
				462	if (matches == 0)
				463	return false;
				464	out->erase();
				465	return Rewrite(out, rewrite, text, vec, matches);
				466	}
				467
				468	/static/ string RE::QuoteMeta(const StringPiece& unquoted) {
				469	string result;
				470
				471	// Escape any ascii character not in [A-Za-z_0-9].
				472	//
				473	// Note that it's legal to escape a character even if it has no
				474	// special meaning in a regular expression -- so this function does
				475	// that. (This also makes it identical to the perl function of the
				476	// same name; see `perldoc -f quotemeta`.) The one exception is
				477	// escaping NUL: rather than doing backslash + NUL, like perl does,
				478	// we do '\0', because pcre itself doesn't take embedded NUL chars.
				479	for (int ii = 0; ii < unquoted.size(); ++ii) {
				480	// Note that using 'isalnum' here raises the benchmark time from
				481	// 32ns to 58ns:
				482	if (unquoted[ii] == '\0') {
				483	result += "\\0";
				484	} else if ((unquoted[ii] < 'a' \|\| unquoted[ii] > 'z') &&
				485	(unquoted[ii] < 'A' \|\| unquoted[ii] > 'Z') &&
				486	(unquoted[ii] < '0' \|\| unquoted[ii] > '9') &&
				487	unquoted[ii] != '_' &&
				488	// If this is the part of a UTF8 or Latin1 character, we need
				489	// to copy this byte without escaping. Experimentally this is
				490	// what works correctly with the regexp library.
				491	!(unquoted[ii] & 128)) {
				492	result += '\\';
				493	result += unquoted[ii];
				494	} else {
				495	result += unquoted[ii];
				496	}
				497	}
				498
				499	return result;
				500	}
				501
				502	/*** Actual matching and rewriting code ***/
				503
				504	int RE::TryMatch(const StringPiece& text,
				505	int startpos,
				506	Anchor anchor,
				507	bool empty_ok,
				508	int *vec,
				509	int vecsize) const {
				510	pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
				511	if (re == NULL) {
				512	//fprintf(stderr, "Matching against invalid re: %s\n", error_->c_str());
				513	return 0;
				514	}
				515
				516	pcre_extra extra = { 0, 0, 0, 0, 0, 0 };
				517	if (options_.match_limit() > 0) {
				518	extra.flags \|= PCRE_EXTRA_MATCH_LIMIT;
				519	extra.match_limit = options_.match_limit();
				520	}
				521	if (options_.match_limit_recursion() > 0) {
				522	extra.flags \|= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
				523	extra.match_limit_recursion = options_.match_limit_recursion();
				524	}
				525
				526	int options = 0;
				527	if (anchor != UNANCHORED)
				528	options \|= PCRE_ANCHORED;
				529	if (!empty_ok)
				530	options \|= PCRE_NOTEMPTY;
				531
				532	int rc = pcre_exec(re, // The regular expression object
				533	&extra,
				534	(text.data() == NULL) ? "" : text.data(),
				535	text.size(),
				536	startpos,
				537	options,
				538	vec,
				539	vecsize);
				540
				541	// Handle errors
				542	if (rc == PCRE_ERROR_NOMATCH) {
				543	return 0;
				544	} else if (rc < 0) {
				545	//fprintf(stderr, "Unexpected return code: %d when matching '%s'\n",
				546	// re, pattern_.c_str());
				547	return 0;
				548	} else if (rc == 0) {
				549	// pcre_exec() returns 0 as a special case when the number of
				550	// capturing subpatterns exceeds the size of the vector.
				551	// When this happens, there is a match and the output vector
				552	// is filled, but we miss out on the positions of the extra subpatterns.
				553	rc = vecsize / 2;
				554	}
				555
				556	return rc;
				557	}
				558
				559	bool RE::DoMatchImpl(const StringPiece& text,
				560	Anchor anchor,
				561	int* consumed,
				562	const Arg* const* args,
				563	int n,
				564	int* vec,
				565	int vecsize) const {
				566	assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
				567	int matches = TryMatch(text, 0, anchor, true, vec, vecsize);
				568	assert(matches >= 0); // TryMatch never returns negatives
				569	if (matches == 0)
				570	return false;
				571
				572	*consumed = vec[1];
				573
				574	if (n == 0 \|\| args == NULL) {
				575	// We are not interested in results
				576	return true;
				577	}
				578
				579	if (NumberOfCapturingGroups() < n) {
				580	// RE has fewer capturing groups than number of arg pointers passed in
				581	return false;
				582	}
				583
				584	// If we got here, we must have matched the whole pattern.
				585	// We do not need (can not do) any more checks on the value of 'matches' here
				586	// -- see the comment for TryMatch.
				587	for (int i = 0; i < n; i++) {
				588	const int start = vec[2*(i+1)];
				589	const int limit = vec[2*(i+1)+1];
				590	if (!args[i]->Parse(text.data() + start, limit-start)) {
				591	// TODO: Should we indicate what the error was?
				592	return false;
				593	}
				594	}
				595
				596	return true;
				597	}
				598
				599	bool RE::DoMatch(const StringPiece& text,
				600	Anchor anchor,
				601	int* consumed,
				602	const Arg* const args[],
				603	int n) const {
				604	assert(n >= 0);
				605	size_t const vecsize = (1 + n) * 3; // results + PCRE workspace
				606	// (as for kVecSize)
				607	int space[21]; // use stack allocation for small vecsize (common case)
				608	int* vec = vecsize <= 21 ? space : new int[vecsize];
				609	bool retval = DoMatchImpl(text, anchor, consumed, args, n, vec, (int)vecsize);
				610	if (vec != space) delete [] vec;
				611	return retval;
				612	}
				613
				614	bool RE::Rewrite(string *out, const StringPiece &rewrite,
				615	const StringPiece &text, int *vec, int veclen) const {
				616	for (const char s = rewrite.data(), end = s + rewrite.size();
				617	s < end; s++) {
				618	int c = *s;
				619	if (c == '\\') {
				620	c = *++s;
				621	if (isdigit(c)) {
				622	int n = (c - '0');
				623	if (n >= veclen) {
				624	//fprintf(stderr, requested group %d in regexp %.*s\n",
				625	// n, rewrite.size(), rewrite.data());
				626	return false;
				627	}
				628	int start = vec[2 * n];
				629	if (start >= 0)
				630	out->append(text.data() + start, vec[2 * n + 1] - start);
				631	} else if (c == '\\') {
				632	*out += '\\';
				633	} else {
				634	//fprintf(stderr, "invalid rewrite pattern: %.*s\n",
				635	// rewrite.size(), rewrite.data());
				636	return false;
				637	}
				638	} else {
				639	*out += c;
				640	}
				641	}
				642	return true;
				643	}
				644
				645	// Return the number of capturing subpatterns, or -1 if the
				646	// regexp wasn't valid on construction.
				647	int RE::NumberOfCapturingGroups() const {
				648	if (re_partial_ == NULL) return -1;
				649
				650	int result;
				651	int pcre_retval = pcre_fullinfo(re_partial_, // The regular expression object
				652	NULL, // We did not study the pattern
				653	PCRE_INFO_CAPTURECOUNT,
				654	&result);
				655	assert(pcre_retval == 0);
				656	return result;
				657	}
				658
				659	/*** Parsers for various types ***/
				660
				661	bool Arg::parse_null(const char* str, int n, void* dest) {
				662	// We fail if somebody asked us to store into a non-NULL void* pointer
				663	return (dest == NULL);
				664	}
				665
				666	bool Arg::parse_string(const char* str, int n, void* dest) {
				667	if (dest == NULL) return true;
				668	reinterpret_cast<string*>(dest)->assign(str, n);
				669	return true;
				670	}
				671
				672	bool Arg::parse_stringpiece(const char* str, int n, void* dest) {
				673	if (dest == NULL) return true;
				674	reinterpret_cast<StringPiece*>(dest)->set(str, n);
				675	return true;
				676	}
				677
				678	bool Arg::parse_char(const char* str, int n, void* dest) {
				679	if (n != 1) return false;
				680	if (dest == NULL) return true;
				681	(reinterpret_cast<char>(dest)) = str[0];
				682	return true;
				683	}
				684
				685	bool Arg::parse_uchar(const char* str, int n, void* dest) {
				686	if (n != 1) return false;
				687	if (dest == NULL) return true;
				688	(reinterpret_cast<unsigned char>(dest)) = str[0];
				689	return true;
				690	}
				691
				692	// Largest number spec that we are willing to parse
				693	static const int kMaxNumberLength = 32;
				694
				695	// REQUIRES "buf" must have length at least kMaxNumberLength+1
				696	// REQUIRES "n > 0"
				697	// Copies "str" into "buf" and null-terminates if necessary.
				698	// Returns one of:
				699	// a. "str" if no termination is needed
				700	// b. "buf" if the string was copied and null-terminated
				701	// c. "" if the input was invalid and has no hope of being parsed
				702	static const char* TerminateNumber(char* buf, const char* str, int n) {
				703	if ((n > 0) && isspace(*str)) {
				704	// We are less forgiving than the strtoxxx() routines and do not
				705	// allow leading spaces.
				706	return "";
				707	}
				708
				709	// See if the character right after the input text may potentially
				710	// look like a digit.
				711	if (isdigit(str[n]) \|\|
				712	((str[n] >= 'a') && (str[n] <= 'f')) \|\|
				713	((str[n] >= 'A') && (str[n] <= 'F'))) {
				714	if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
				715	memcpy(buf, str, n);
				716	buf[n] = '\0';
				717	return buf;
				718	} else {
				719	// We can parse right out of the supplied string, so return it.
				720	return str;
				721	}
				722	}
				723
				724	bool Arg::parse_long_radix(const char* str,
				725	int n,
				726	void* dest,
				727	int radix) {
				728	if (n == 0) return false;
				729	char buf[kMaxNumberLength+1];
				730	str = TerminateNumber(buf, str, n);
				731	char* end;
				732	errno = 0;
				733	long r = strtol(str, &end, radix);
				734	if (end != str + n) return false; // Leftover junk
				735	if (errno) return false;
				736	if (dest == NULL) return true;
				737	(reinterpret_cast<long>(dest)) = r;
				738	return true;
				739	}
				740
				741	bool Arg::parse_ulong_radix(const char* str,
				742	int n,
				743	void* dest,
				744	int radix) {
				745	if (n == 0) return false;
				746	char buf[kMaxNumberLength+1];
				747	str = TerminateNumber(buf, str, n);
				748	if (str[0] == '-') return false; // strtoul() on a negative number?!
				749	char* end;
				750	errno = 0;
				751	unsigned long r = strtoul(str, &end, radix);
				752	if (end != str + n) return false; // Leftover junk
				753	if (errno) return false;
				754	if (dest == NULL) return true;
				755	(reinterpret_cast<unsigned long>(dest)) = r;
				756	return true;
				757	}
				758
				759	bool Arg::parse_short_radix(const char* str,
				760	int n,
				761	void* dest,
				762	int radix) {
				763	long r;
				764	if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
				765	if (r < SHRT_MIN \|\| r > SHRT_MAX) return false; // Out of range
				766	if (dest == NULL) return true;
				767	(reinterpret_cast<short>(dest)) = static_cast<short>(r);
				768	return true;
				769	}
				770
				771	bool Arg::parse_ushort_radix(const char* str,
				772	int n,
				773	void* dest,
				774	int radix) {
				775	unsigned long r;
				776	if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
				777	if (r > USHRT_MAX) return false; // Out of range
				778	if (dest == NULL) return true;
				779	(reinterpret_cast<unsigned short>(dest)) = static_cast<unsigned short>(r);
				780	return true;
				781	}
				782
				783	bool Arg::parse_int_radix(const char* str,
				784	int n,
				785	void* dest,
				786	int radix) {
				787	long r;
				788	if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
				789	if (r < INT_MIN \|\| r > INT_MAX) return false; // Out of range
				790	if (dest == NULL) return true;
				791	(reinterpret_cast<int>(dest)) = r;
				792	return true;
				793	}
				794
				795	bool Arg::parse_uint_radix(const char* str,
				796	int n,
				797	void* dest,
				798	int radix) {
				799	unsigned long r;
				800	if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
				801	if (r > UINT_MAX) return false; // Out of range
				802	if (dest == NULL) return true;
				803	(reinterpret_cast<unsigned int>(dest)) = r;
				804	return true;
				805	}
				806
				807	bool Arg::parse_longlong_radix(const char* str,
				808	int n,
				809	void* dest,
				810	int radix) {
				811	#ifndef HAVE_LONG_LONG
				812	return false;
				813	#else
				814	if (n == 0) return false;
				815	char buf[kMaxNumberLength+1];
				816	str = TerminateNumber(buf, str, n);
				817	char* end;
				818	errno = 0;
				819	#if defined HAVE_STRTOQ
				820	long long r = strtoq(str, &end, radix);
				821	#elif defined HAVE_STRTOLL
				822	long long r = strtoll(str, &end, radix);
				823	#elif defined HAVE__STRTOI64
				824	long long r = _strtoi64(str, &end, radix);
				825	#elif defined HAVE_STRTOIMAX
				826	long long r = strtoimax(str, &end, radix);
				827	#else
				828	#error parse_longlong_radix: cannot convert input to a long-long
				829	#endif
				830	if (end != str + n) return false; // Leftover junk
				831	if (errno) return false;
				832	if (dest == NULL) return true;
				833	(reinterpret_cast<long long>(dest)) = r;
				834	return true;
				835	#endif /* HAVE_LONG_LONG */
				836	}
				837
				838	bool Arg::parse_ulonglong_radix(const char* str,
				839	int n,
				840	void* dest,
				841	int radix) {
				842	#ifndef HAVE_UNSIGNED_LONG_LONG
				843	return false;
				844	#else
				845	if (n == 0) return false;
				846	char buf[kMaxNumberLength+1];
				847	str = TerminateNumber(buf, str, n);
				848	if (str[0] == '-') return false; // strtoull() on a negative number?!
				849	char* end;
				850	errno = 0;
				851	#if defined HAVE_STRTOQ
				852	unsigned long long r = strtouq(str, &end, radix);
				853	#elif defined HAVE_STRTOLL
				854	unsigned long long r = strtoull(str, &end, radix);
				855	#elif defined HAVE__STRTOI64
				856	unsigned long long r = _strtoui64(str, &end, radix);
				857	#elif defined HAVE_STRTOIMAX
				858	unsigned long long r = strtoumax(str, &end, radix);
				859	#else
				860	#error parse_ulonglong_radix: cannot convert input to a long-long
				861	#endif
				862	if (end != str + n) return false; // Leftover junk
				863	if (errno) return false;
				864	if (dest == NULL) return true;
				865	(reinterpret_cast<unsigned long long>(dest)) = r;
				866	return true;
				867	#endif /* HAVE_UNSIGNED_LONG_LONG */
				868	}
				869
				870	bool Arg::parse_double(const char* str, int n, void* dest) {
				871	if (n == 0) return false;
				872	static const int kMaxLength = 200;
				873	char buf[kMaxLength];
				874	if (n >= kMaxLength) return false;
				875	memcpy(buf, str, n);
				876	buf[n] = '\0';
				877	errno = 0;
				878	char* end;
				879	double r = strtod(buf, &end);
				880	if (end != buf + n) return false; // Leftover junk
				881	if (errno) return false;
				882	if (dest == NULL) return true;
				883	(reinterpret_cast<double>(dest)) = r;
				884	return true;
				885	}
				886
				887	bool Arg::parse_float(const char* str, int n, void* dest) {
				888	double r;
				889	if (!parse_double(str, n, &r)) return false;
				890	if (dest == NULL) return true;
				891	(reinterpret_cast<float>(dest)) = static_cast<float>(r);
				892	return true;
				893	}
				894
				895
				896	#define DEFINE_INTEGER_PARSERS(name) \
				897	bool Arg::parse_##name(const char* str, int n, void* dest) { \
				898	return parse_##name##_radix(str, n, dest, 10); \
				899	} \
				900	bool Arg::parse_##name##_hex(const char* str, int n, void* dest) { \
				901	return parse_##name##_radix(str, n, dest, 16); \
				902	} \
				903	bool Arg::parse_##name##_octal(const char* str, int n, void* dest) { \
				904	return parse_##name##_radix(str, n, dest, 8); \
				905	} \
				906	bool Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \
				907	return parse_##name##_radix(str, n, dest, 0); \
				908	}
				909
				910	DEFINE_INTEGER_PARSERS(short) /* */
				911	DEFINE_INTEGER_PARSERS(ushort) /* */
				912	DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */
				913	DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */
				914	DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */
				915	DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */
				916	DEFINE_INTEGER_PARSERS(longlong) /* */
				917	DEFINE_INTEGER_PARSERS(ulonglong) /* */
				918
				919	#undef DEFINE_INTEGER_PARSERS
				920
				921	} // namespace pcrecpp