Blame - jni/commoncpp2-android/inc/cc++/tokenizer.h - jami-client-android

blob: 5435f3d60efc569d29a8663a739963fab4d85dea [file] [log] [blame]

Emeric Vigier	2f62582	2012-08-06 11:09:52 -0400	[diff] [blame]	1	// Copyright (C) 1999-2005 Open Source Telecom Corporation.
				2	// Copyright (C) 2006-2010 David Sugar, Tycho Softworks.
				3	//
				4	// This program is free software; you can redistribute it and/or modify
				5	// it under the terms of the GNU General Public License as published by
				6	// the Free Software Foundation; either version 2 of the License, or
				7	// (at your option) any later version.
				8	//
				9	// This program is distributed in the hope that it will be useful,
				10	// but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	// GNU General Public License for more details.
				13	//
				14	// You should have received a copy of the GNU General Public License
				15	// along with this program; if not, write to the Free Software
				16	// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
				17	//
				18	// As a special exception, you may use this file as part of a free software
				19	// library without restriction. Specifically, if other files instantiate
				20	// templates or use macros or inline functions from this file, or you compile
				21	// this file and link it with other files to produce an executable, this
				22	// file does not by itself cause the resulting executable to be covered by
				23	// the GNU General Public License. This exception does not however
				24	// invalidate any other reasons why the executable file might be covered by
				25	// the GNU General Public License.
				26	//
				27	// This exception applies only to the code released under the name GNU
				28	// Common C++. If you copy code from other releases into a copy of GNU
				29	// Common C++, as the General Public License permits, the exception does
				30	// not apply to the code that you add in this way. To avoid misleading
				31	// anyone as to the status of such modified files, you must delete
				32	// this exception notice from them.
				33	//
				34	// If you write modifications of your own for GNU Common C++, it is your choice
				35	// whether to permit this exception to apply to your modifications.
				36	// If you do not wish that, delete this exception notice.
				37	//
				38
				39	/**
				40	* @file tokenizer.h
				41	* @short string tokenizer.
				42	**/
				43
				44	#ifndef CCXX_TOKENIZER_H_
				45	#define CCXX_TOKENIZER_H_
				46
				47	#ifndef CCXX_MISSING_H_
				48	#include <cc++/missing.h>
				49	#endif
				50
				51	#ifndef CCXX_THREAD_H_
				52	#include <cc++/thread.h>
				53	#endif
				54
				55	#ifdef CCXX_NAMESPACES
				56	namespace ost {
				57	#endif
				58
				59	/**
				60	* Splits delimited string into tokens.
				61	*
				62	* The StringTokenizer takes a pointer to a string and a pointer
				63	* to a string containing a number of possible delimiters.
				64	* The StringTokenizer provides an input forward iterator which allows
				65	* to iterate through all tokens. An iterator behaves like a logical
				66	* pointer to the tokens, i.e. to shift to the next token, you've
				67	* to increment the iterator, you get the token by dereferencing the
				68	* iterator.
				69	*
				70	* Memory consumption:
				71	* This class operates on the original string and only allocates memory
				72	* for the individual tokens actually requested, so this class
				73	* allocates at maximum the space required for the longest token in the
				74	* given string.
				75	* Since for each iteration, memory is reclaimed for the last token,
				76	* you MAY NOT store pointers to them; if you need them afterwards,
				77	* copy them. You may not modify the original string while you operate
				78	* on it with the StringTokenizer; the behaviour is undefined in that
				79	* case.
				80	*
				81	* The iterator has one special method 'nextDelimiter()' which returns
				82	* a character containing the next delimiter following this
				83	* tokenization process or '\\0', if there are no following delimiters. In
				84	* case of skipAllDelim, it returns the FIRST delimiter.
				85	*
				86	* With the method 'setDelimiters(const char*)' you may change the
				87	* set of delimiters. It affects all running iterators.
				88	*
				89	* Example:
				90	* <code><pre>
				91	* StringTokenizer st("mary had a little lamb;its fleece was..", " ;");
				92	* StringTokenizer::iterator i;
				93	* for (i = st.begin() ; i != st.end() ; ++i) {
				94	* cout << "Token: '" << *i << "'\t";
				95	* cout << " next Delim: '" << i.nextDelimiter() << "'" << endl;
				96	* }
				97	* </pre></code>
				98	*
				99	* @author Henner Zeller <H.Zeller@acm.org>
				100	* @license LGPL
				101	*/
				102	class __EXPORT StringTokenizer {
				103	public:
				104	/**
				105	* a delimiter string containing all usual whitespace delimiters.
				106	* These are space, tab, newline, carriage return,
				107	* formfeed and vertical tab. (see isspace() manpage).
				108	*/
				109	static const char * const SPACE;
				110
				111	/**
				112	* Exception thrown, if someone tried to read beyond the
				113	* end of the tokens.
				114	* Will not happen if you use it the 'clean' way with comparison
				115	* against end(), but if you skip some tokens, because you 'know'
				116	* they are there. Simplifies error handling a lot, since you can
				117	* just read your tokens the way you expect it, and if there is some
				118	* error in the input this Exception will be thrown.
				119	*/
				120	// maybe move more global ?
				121	class NoSuchElementException { };
				122
				123	/**
				124	* The input forward iterator for tokens.
				125	* @author Henner Zeller
				126	*/
				127	class __EXPORT iterator {
				128	friend class StringTokenizer; // access our private constructors
				129	private:
				130	const StringTokenizer *myTok; // my StringTokenizer
				131	const char *start; // start of current token
				132	const char *tokEnd; // end of current token (->nxDelimiter)
				133	const char *endp; // one before next token
				134	char *token; // allocated token, if requested
				135
				136	// for initialization of the itEnd iterator
				137	iterator(const StringTokenizer &tok, const char *end)
				138	: myTok(&tok),tokEnd(0),endp(end),token(0) {}
				139
				140	iterator(const StringTokenizer &tok)
				141	: myTok(&tok),tokEnd(0),endp(myTok->str-1),token(0) {
				142	++(*this); // init first token.
				143	}
				144
				145	public:
				146	iterator() : myTok(0),start(0),tokEnd(0),endp(0),token(0) {}
				147
				148	// see also: comment in implementation of operator++
				149	virtual ~iterator()
				150	{ if (token) *token='\0'; delete [] token; }
				151
				152	/**
				153	* copy constructor.
				154	*/
				155	// everything, but not responsible for the allocated token.
				156	iterator(const iterator& i) :
				157	myTok(i.myTok),start(i.start),tokEnd(i.tokEnd),
				158	endp(i.endp),token(0) {}
				159
				160	/**
				161	* assignment operator.
				162	*/
				163	// everything, but not responsible for the allocated token.
				164	iterator &operator = (const iterator &i)
				165	{
				166	myTok = i.myTok;
				167	start = i.start; endp = i.endp; tokEnd = i.tokEnd;
				168	if ( token )
				169	delete [] token;
				170	token = 0;
				171	return *this;
				172	}
				173
				174	/**
				175	* shifts this iterator to the next token in the string.
				176	*/
				177	iterator &operator ++ () THROWS (NoSuchElementException);
				178
				179	/**
				180	* returns the immutable string this iterator
				181	* points to or '0' if no token is available (i.e.
				182	* i == end()).
				183	* Do not store pointers to this token, since it is
				184	* invalidated for each iteration. If you need the token,
				185	* copy it (e.g. with strdup());
				186	*/
				187	const char* operator * () THROWS (NoSuchElementException);
				188
				189	/**
				190	* returns the next delimiter after the current token or
				191	* '\\0', if there are no following delimiters.
				192	* It returns the very next delimiter (even if
				193	* skipAllDelim=true).
				194	*/
				195	inline char nextDelimiter() const
				196	{return (tokEnd) ? *tokEnd : '\0';}
				197
				198	/**
				199	* compares to other iterator. Usually used to
				200	* compare against the end() iterator.
				201	*/
				202	// only compare the end-position. speed.
				203	inline bool operator == (const iterator &other) const
				204	{return (endp == other.endp);}
				205
				206	/**
				207	* compares to other iterator. Usually used to
				208	* compare against the end() iterator.
				209	*/
				210	// only compare the end position. speed.
				211	inline bool operator != (const iterator &other) const
				212	{return (endp != other.endp);}
				213	};
				214	private:
				215	friend class StringTokenizer::iterator;
				216	const char *str;
				217	const char *delim;
				218	bool skipAll, trim;
				219	iterator itEnd;
				220
				221	public:
				222	/**
				223	* creates a new StringTokenizer for a string
				224	* and a given set of delimiters.
				225	*
				226	* @param str String to be split up. This string will
				227	* not be modified by this StringTokenizer,
				228	* but you may as well not modfiy this string
				229	* while tokenizing is in process, which may
				230	* lead to undefined behaviour.
				231	*
				232	* @param delim String containing the characters
				233	* which should be regarded as delimiters.
				234	*
				235	* @param skipAllDelim OPTIONAL.
				236	* true, if subsequent
				237	* delimiters should be skipped at once
				238	* or false, if empty tokens should
				239	* be returned for two delimiters with
				240	* no other text inbetween. The first
				241	* behaviour may be desirable for whitespace
				242	* skipping, the second for input with
				243	* delimited entry e.g. /etc/passwd like files
				244	* or CSV input.
				245	* NOTE, that 'true' here resembles the
				246	* ANSI-C strtok(char s,char d) behaviour.
				247	* DEFAULT = false
				248	*
				249	* @param trim OPTIONAL.
				250	* true, if the tokens returned
				251	* should be trimmed, so that they don't have
				252	* any whitespaces at the beginning or end.
				253	* Whitespaces are any of the characters
				254	* defined in StringTokenizer::SPACE.
				255	* If delim itself is StringTokenizer::SPACE,
				256	* this will result in a behaviour with
				257	* skipAllDelim = true.
				258	* DEFAULT = false
				259	*/
				260	StringTokenizer (const char *str,
				261	const char *delim,
				262	bool skipAllDelim = false,
				263	bool trim = false);
				264
				265	/**
				266	* create a new StringTokenizer which splits the input
				267	* string at whitespaces. The tokens are stripped from
				268	* whitespaces. This means, if you change the set of
				269	* delimiters in either the 'begin(const char *delim)' method
				270	* or in 'setDelimiters()', you then get whitespace
				271	* trimmed tokens, delimited by the new set.
				272	* Behaves like StringTokenizer(s, StringTokenizer::SPACE,false,true);
				273	*/
				274	StringTokenizer (const char *s);
				275
				276	/**
				277	* returns the begin iterator
				278	*/
				279	iterator begin() const
				280	{return iterator(*this);}
				281
				282	/**
				283	* changes the set of delimiters used in subsequent
				284	* iterations.
				285	*/
				286	void setDelimiters (const char *d)
				287	{delim = d;}
				288
				289	/**
				290	* returns a begin iterator with an alternate set of
				291	* delimiters.
				292	*/
				293	iterator begin(const char *d)
				294	{
				295	delim = d;
				296	return iterator(*this);
				297	}
				298
				299	/**
				300	* the iterator marking the end.
				301	*/
				302	const iterator& end() const
				303	{return itEnd;}
				304	};
				305
				306	#ifdef CCXX_NAMESPACES
				307	}
				308	#endif
				309
				310	#endif
				311
				312	/ EMACS
				313	* Local variables:
				314	* mode: c++
				315	* c-basic-offset: 4
				316	* End:
				317	*/