jni/libucommon/sources/inc/commoncpp/tokenizer.h - jami-client-android - Gitiles

 // Copyright (C) 1999-2005 Open Source Telecom Corporation.
 // Copyright (C) 2006-2010 David Sugar, Tycho Softworks.
 //
 // This program is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
 // the Free Software Foundation; either version 2 of the License, or
 // (at your option) any later version.
 //
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 // GNU General Public License for more details.
 //
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 //
 // As a special exception, you may use this file as part of a free software
 // library without restriction.  Specifically, if other files instantiate
 // templates or use macros or inline functions from this file, or you compile
 // this file and link it with other files to produce an executable, this
 // file does not by itself cause the resulting executable to be covered by
 // the GNU General Public License.  This exception does not however
 // invalidate any other reasons why the executable file might be covered by
 // the GNU General Public License.
 //
 // This exception applies only to the code released under the name GNU
 // Common C++.  If you copy code from other releases into a copy of GNU
 // Common C++, as the General Public License permits, the exception does
 // not apply to the code that you add in this way.  To avoid misleading
 // anyone as to the status of such modified files, you must delete
 // this exception notice from them.
 //
 // If you write modifications of your own for GNU Common C++, it is your choice
 // whether to permit this exception to apply to your modifications.
 // If you do not wish that, delete this exception notice.
 //

 /**
  * @file tokenizer.h
  * @short string tokenizer.
  **/

 #ifndef COMMONCPP_TOKENIZER_H_
 #define COMMONCPP_TOKENIZER_H_

 #ifndef COMMONCPP_CONFIG_H_
 #include <commoncpp/config.h>
 #endif

 #ifndef COMMONCPP_THREAD_H_
 #include <commoncpp/thread.h>
 #endif

 #ifndef COMMMONCPP_EXCEPTION_H_
 #include <commoncpp/exception.h>
 #endif

 NAMESPACE_COMMONCPP

 /**
  * Splits delimited string into tokens.
  *
  * The StringTokenizer takes a pointer to a string and a pointer
  * to a string containing a number of possible delimiters.
  * The StringTokenizer provides an input forward iterator which allows
  * to iterate through all tokens. An iterator behaves like a logical
  * pointer to the tokens, i.e. to shift to the next token, you've
  * to increment the iterator, you get the token by dereferencing the
  * iterator.
  *
  * Memory consumption:
  * This class operates on the original string and only allocates memory
  * for the individual tokens actually requested, so this class
  * allocates at maximum the space required for the longest token in the
  * given string.
  * Since for each iteration, memory is reclaimed for the last token,
  * you MAY NOT store pointers to them; if you need them afterwards,
  * copy them. You may not modify the original string while you operate
  * on it with the StringTokenizer; the behaviour is undefined in that
  * case.
  *
  * The iterator has one special method 'nextDelimiter()' which returns
  * a character containing the next delimiter following this
  * tokenization process or '\\0', if there are no following delimiters. In
  * case of skipAllDelim, it returns the FIRST delimiter.
  *
  * With the method 'setDelimiters(const char*)' you may change the
  * set of delimiters. It affects all running iterators.
  *
  * Example:
  * <code><pre>
  *  StringTokenizer st("mary had a little lamb;its fleece was..", " ;");
  *  StringTokenizer::iterator i;
  *  for (i = st.begin() ; i != st.end() ; ++i) {
  *        cout << "Token: '" << *i << "'\t";
  *        cout << " next Delim: '" << i.nextDelimiter() << "'" << endl;
  *  }
  *  </pre></code>
  *
  * @author Henner Zeller <H.Zeller@acm.org>
  * @license LGPL
  */
 class __EXPORT StringTokenizer {
 public:
     /**
      * a delimiter string containing all usual whitespace delimiters.
      * These are space, tab, newline, carriage return,
      * formfeed and vertical tab. (see isspace() manpage).
      */
     static const char * const SPACE;

     /**
      * Exception thrown, if someone tried to read beyond the
      * end of the tokens.
      * Will not happen if you use it the 'clean' way with comparison
      * against end(), but if you skip some tokens, because you 'know'
      * they are there. Simplifies error handling a lot, since you can
      * just read your tokens the way you expect it, and if there is some
      * error in the input this Exception will be thrown.
      */
     // maybe move more global ?
     class NoSuchElementException { };

     /**
      * The input forward iterator for tokens.
      * @author Henner Zeller
      */
     class __EXPORT iterator {
         friend class StringTokenizer;  // access our private constructors
     private:
         const StringTokenizer *myTok; // my StringTokenizer
         const char *start;      // start of current token
         const char *tokEnd;     // end of current token (->nxDelimiter)
         const char *endp;       // one before next token
         char *token;            // allocated token, if requested

         // for initialization of the itEnd iterator
         iterator(const StringTokenizer &tok, const char *end)
             : myTok(&tok),tokEnd(0),endp(end),token(0) {}

         iterator(const StringTokenizer &tok)
             : myTok(&tok),tokEnd(0),endp(myTok->str-1),token(0) {
             ++(*this); // init first token.
         }

     public:
         iterator() : myTok(0),start(0),tokEnd(0),endp(0),token(0) {}

         // see also: comment in implementation of operator++
         virtual ~iterator()
             { if (token) *token='\0'; delete [] token; }

         /**
          * copy constructor.
          */
         // everything, but not responsible for the allocated token.
         iterator(const iterator& i) :
             myTok(i.myTok),start(i.start),tokEnd(i.tokEnd),
             endp(i.endp),token(0) {}

         /**
          * assignment operator.
          */
         // everything, but not responsible for the allocated token.
         iterator &operator = (const iterator &i)
         {
             myTok = i.myTok;
             start = i.start; endp = i.endp; tokEnd = i.tokEnd;
             if ( token )
                 delete [] token;
             token = 0;
             return *this;
         }

         /**
          * shifts this iterator to the next token in the string.
          */
         iterator &operator ++ () THROWS (NoSuchElementException);

         /**
          * returns the immutable string this iterator
          * points to or '0' if no token is available (i.e.
          * i == end()).
          * Do not store pointers to this token, since it is
          * invalidated for each iteration. If you need the token,
          * copy it (e.g. with strdup());
          */
         const char*  operator *  () THROWS (NoSuchElementException);

         /**
          * returns the next delimiter after the current token or
          * '\\0', if there are no following delimiters.
          * It returns the very next delimiter (even if
          * skipAllDelim=true).
          */
         inline char nextDelimiter() const
             {return (tokEnd) ? *tokEnd : '\0';}

         /**
          * compares to other iterator. Usually used to
          * compare against the end() iterator.
          */
         // only compare the end-position. speed.
         inline bool operator == (const iterator &other) const
             {return (endp == other.endp);}

         /**
          * compares to other iterator. Usually used to
          * compare against the end() iterator.
          */
         // only compare the end position. speed.
         inline bool operator != (const iterator &other) const
             {return (endp != other.endp);}
     };
 private:
     friend class StringTokenizer::iterator;
     const char *str;
     const char *delim;
     bool skipAll, trim;
     iterator itEnd;

 public:
     /**
      * creates a new StringTokenizer for a string
      * and a given set of delimiters.
      *
      * @param  str          String to be split up. This string will
      *                      not be modified by this StringTokenizer,
      *                      but you may as well not modfiy this string
      *                      while tokenizing is in process, which may
      *                      lead to undefined behaviour.
      *
      * @param  delim        String containing the characters
      *                      which should be regarded as delimiters.
      *
      * @param  skipAllDelim OPTIONAL.
      *                      true, if subsequent
      *                      delimiters should be skipped at once
      *                      or false, if empty tokens should
      *                      be returned for two delimiters with
      *                      no other text inbetween. The first
      *                      behaviour may be desirable for whitespace
      *                      skipping, the second for input with
      *                      delimited entry e.g. /etc/passwd like files
      *                      or CSV input.
      *                      NOTE, that 'true' here resembles the
      *                      ANSI-C strtok(char *s,char *d) behaviour.
      *                      DEFAULT = false
      *
      * @param trim          OPTIONAL.
      *                      true, if the tokens returned
      *                      should be trimmed, so that they don't have
      *                      any whitespaces at the beginning or end.
      *                      Whitespaces are any of the characters
      *                      defined in StringTokenizer::SPACE.
      *                      If delim itself is StringTokenizer::SPACE,
      *                      this will result in a behaviour with
      *                      skipAllDelim = true.
      *                      DEFAULT = false
      */
     StringTokenizer (const char *str,
              const char *delim,
              bool skipAllDelim = false,
              bool trim = false);

     /**
      * create a new StringTokenizer which splits the input
      * string at whitespaces. The tokens are stripped from
      * whitespaces. This means, if you change the set of
      * delimiters in either the 'begin(const char *delim)' method
      * or in 'setDelimiters()', you then get whitespace
      * trimmed tokens, delimited by the new set.
      * Behaves like StringTokenizer(s, StringTokenizer::SPACE,false,true);
      */
     StringTokenizer (const char *s);

     /**
      * returns the begin iterator
      */
     iterator begin() const
         {return iterator(*this);}

     /**
      * changes the set of delimiters used in subsequent
      * iterations.
      */
     void setDelimiters (const char *d)
         {delim = d;}

     /**
      * returns a begin iterator with an alternate set of
      * delimiters.
      */
     iterator begin(const char *d)
     {
         delim = d;
         return iterator(*this);
     }

     /**
      * the iterator marking the end.
      */
     const iterator& end() const
         {return itEnd;}
 };

 END_NAMESPACE

 #endif

 /** EMACS **
  * Local variables:
  * mode: c++
  * c-basic-offset: 4
  * End:
  */
	// Copyright (C) 1999-2005 Open Source Telecom Corporation.
	// Copyright (C) 2006-2010 David Sugar, Tycho Softworks.
	//
	// This program is free software; you can redistribute it and/or modify
	// it under the terms of the GNU General Public License as published by
	// the Free Software Foundation; either version 2 of the License, or
	// (at your option) any later version.
	//
	// This program is distributed in the hope that it will be useful,
	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	// GNU General Public License for more details.
	//
	// You should have received a copy of the GNU General Public License
	// along with this program; if not, write to the Free Software
	// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	//
	// As a special exception, you may use this file as part of a free software
	// library without restriction. Specifically, if other files instantiate
	// templates or use macros or inline functions from this file, or you compile
	// this file and link it with other files to produce an executable, this
	// file does not by itself cause the resulting executable to be covered by
	// the GNU General Public License. This exception does not however
	// invalidate any other reasons why the executable file might be covered by
	// the GNU General Public License.
	//
	// This exception applies only to the code released under the name GNU
	// Common C++. If you copy code from other releases into a copy of GNU
	// Common C++, as the General Public License permits, the exception does
	// not apply to the code that you add in this way. To avoid misleading
	// anyone as to the status of such modified files, you must delete
	// this exception notice from them.
	//
	// If you write modifications of your own for GNU Common C++, it is your choice
	// whether to permit this exception to apply to your modifications.
	// If you do not wish that, delete this exception notice.
	//

	/**
	* @file tokenizer.h
	* @short string tokenizer.
	**/

	#ifndef COMMONCPP_TOKENIZER_H_
	#define COMMONCPP_TOKENIZER_H_

	#ifndef COMMONCPP_CONFIG_H_
	#include <commoncpp/config.h>
	#endif

	#ifndef COMMONCPP_THREAD_H_
	#include <commoncpp/thread.h>
	#endif

	#ifndef COMMMONCPP_EXCEPTION_H_
	#include <commoncpp/exception.h>
	#endif

	NAMESPACE_COMMONCPP

	/**
	* Splits delimited string into tokens.
	*
	* The StringTokenizer takes a pointer to a string and a pointer
	* to a string containing a number of possible delimiters.
	* The StringTokenizer provides an input forward iterator which allows
	* to iterate through all tokens. An iterator behaves like a logical
	* pointer to the tokens, i.e. to shift to the next token, you've
	* to increment the iterator, you get the token by dereferencing the
	* iterator.
	*
	* Memory consumption:
	* This class operates on the original string and only allocates memory
	* for the individual tokens actually requested, so this class
	* allocates at maximum the space required for the longest token in the
	* given string.
	* Since for each iteration, memory is reclaimed for the last token,
	* you MAY NOT store pointers to them; if you need them afterwards,
	* copy them. You may not modify the original string while you operate
	* on it with the StringTokenizer; the behaviour is undefined in that
	* case.
	*
	* The iterator has one special method 'nextDelimiter()' which returns
	* a character containing the next delimiter following this
	* tokenization process or '\\0', if there are no following delimiters. In
	* case of skipAllDelim, it returns the FIRST delimiter.
	*
	* With the method 'setDelimiters(const char*)' you may change the
	* set of delimiters. It affects all running iterators.
	*
	* Example:
	* <code><pre>
	* StringTokenizer st("mary had a little lamb;its fleece was..", " ;");
	* StringTokenizer::iterator i;
	* for (i = st.begin() ; i != st.end() ; ++i) {
	* cout << "Token: '" << *i << "'\t";
	* cout << " next Delim: '" << i.nextDelimiter() << "'" << endl;
	* }
	* </pre></code>
	*
	* @author Henner Zeller <H.Zeller@acm.org>
	* @license LGPL
	*/
	class __EXPORT StringTokenizer {
	public:
	/**
	* a delimiter string containing all usual whitespace delimiters.
	* These are space, tab, newline, carriage return,
	* formfeed and vertical tab. (see isspace() manpage).
	*/
	static const char * const SPACE;

	/**
	* Exception thrown, if someone tried to read beyond the
	* end of the tokens.
	* Will not happen if you use it the 'clean' way with comparison
	* against end(), but if you skip some tokens, because you 'know'
	* they are there. Simplifies error handling a lot, since you can
	* just read your tokens the way you expect it, and if there is some
	* error in the input this Exception will be thrown.
	*/
	// maybe move more global ?
	class NoSuchElementException { };

	/**
	* The input forward iterator for tokens.
	* @author Henner Zeller
	*/
	class __EXPORT iterator {
	friend class StringTokenizer; // access our private constructors
	private:
	const StringTokenizer *myTok; // my StringTokenizer
	const char *start; // start of current token
	const char *tokEnd; // end of current token (->nxDelimiter)
	const char *endp; // one before next token
	char *token; // allocated token, if requested

	// for initialization of the itEnd iterator
	iterator(const StringTokenizer &tok, const char *end)
	: myTok(&tok),tokEnd(0),endp(end),token(0) {}

	iterator(const StringTokenizer &tok)
	: myTok(&tok),tokEnd(0),endp(myTok->str-1),token(0) {
	++(*this); // init first token.
	}

	public:
	iterator() : myTok(0),start(0),tokEnd(0),endp(0),token(0) {}

	// see also: comment in implementation of operator++
	virtual ~iterator()
	{ if (token) *token='\0'; delete [] token; }

	/**
	* copy constructor.
	*/
	// everything, but not responsible for the allocated token.
	iterator(const iterator& i) :
	myTok(i.myTok),start(i.start),tokEnd(i.tokEnd),
	endp(i.endp),token(0) {}

	/**
	* assignment operator.
	*/
	// everything, but not responsible for the allocated token.
	iterator &operator = (const iterator &i)
	{
	myTok = i.myTok;
	start = i.start; endp = i.endp; tokEnd = i.tokEnd;
	if ( token )
	delete [] token;
	token = 0;
	return *this;
	}

	/**
	* shifts this iterator to the next token in the string.
	*/
	iterator &operator ++ () THROWS (NoSuchElementException);

	/**
	* returns the immutable string this iterator
	* points to or '0' if no token is available (i.e.
	* i == end()).
	* Do not store pointers to this token, since it is
	* invalidated for each iteration. If you need the token,
	* copy it (e.g. with strdup());
	*/
	const char* operator * () THROWS (NoSuchElementException);

	/**
	* returns the next delimiter after the current token or
	* '\\0', if there are no following delimiters.
	* It returns the very next delimiter (even if
	* skipAllDelim=true).
	*/
	inline char nextDelimiter() const
	{return (tokEnd) ? *tokEnd : '\0';}

	/**
	* compares to other iterator. Usually used to
	* compare against the end() iterator.
	*/
	// only compare the end-position. speed.
	inline bool operator == (const iterator &other) const
	{return (endp == other.endp);}

	/**
	* compares to other iterator. Usually used to
	* compare against the end() iterator.
	*/
	// only compare the end position. speed.
	inline bool operator != (const iterator &other) const
	{return (endp != other.endp);}
	};
	private:
	friend class StringTokenizer::iterator;
	const char *str;
	const char *delim;
	bool skipAll, trim;
	iterator itEnd;

	public:
	/**
	* creates a new StringTokenizer for a string
	* and a given set of delimiters.
	*
	* @param str String to be split up. This string will
	* not be modified by this StringTokenizer,
	* but you may as well not modfiy this string
	* while tokenizing is in process, which may
	* lead to undefined behaviour.
	*
	* @param delim String containing the characters
	* which should be regarded as delimiters.
	*
	* @param skipAllDelim OPTIONAL.
	* true, if subsequent
	* delimiters should be skipped at once
	* or false, if empty tokens should
	* be returned for two delimiters with
	* no other text inbetween. The first
	* behaviour may be desirable for whitespace
	* skipping, the second for input with
	* delimited entry e.g. /etc/passwd like files
	* or CSV input.
	* NOTE, that 'true' here resembles the
	* ANSI-C strtok(char s,char d) behaviour.
	* DEFAULT = false
	*
	* @param trim OPTIONAL.
	* true, if the tokens returned
	* should be trimmed, so that they don't have
	* any whitespaces at the beginning or end.
	* Whitespaces are any of the characters
	* defined in StringTokenizer::SPACE.
	* If delim itself is StringTokenizer::SPACE,
	* this will result in a behaviour with
	* skipAllDelim = true.
	* DEFAULT = false
	*/
	StringTokenizer (const char *str,
	const char *delim,
	bool skipAllDelim = false,
	bool trim = false);

	/**
	* create a new StringTokenizer which splits the input
	* string at whitespaces. The tokens are stripped from
	* whitespaces. This means, if you change the set of
	* delimiters in either the 'begin(const char *delim)' method
	* or in 'setDelimiters()', you then get whitespace
	* trimmed tokens, delimited by the new set.
	* Behaves like StringTokenizer(s, StringTokenizer::SPACE,false,true);
	*/
	StringTokenizer (const char *s);

	/**
	* returns the begin iterator
	*/
	iterator begin() const
	{return iterator(*this);}

	/**
	* changes the set of delimiters used in subsequent
	* iterations.
	*/
	void setDelimiters (const char *d)
	{delim = d;}

	/**
	* returns a begin iterator with an alternate set of
	* delimiters.
	*/
	iterator begin(const char *d)
	{
	delim = d;
	return iterator(*this);
	}

	/**
	* the iterator marking the end.
	*/
	const iterator& end() const
	{return itEnd;}
	};

	END_NAMESPACE

	#endif

	/ EMACS
	* Local variables:
	* mode: c++
	* c-basic-offset: 4
	* End:
	*/