Tristan Matthews | 0461646 | 2013-11-14 16:09:34 -0500 | [diff] [blame] | 1 | {
|
| 2 | pcRegExp - Perl compatible regular expressions for Virtual Pascal
|
| 3 | (c) 2001 Peter S. Voronov aka Chem O'Dun <petervrn@yahoo.com>
|
| 4 |
|
| 5 | Based on PCRE library interface unit for Virtual Pascal.
|
| 6 | (c) 2001 Alexander Tokarev <dwalin@dwalin.ru>
|
| 7 |
|
| 8 | The current PCRE version is: 3.7
|
| 9 |
|
| 10 | This software may be distributed under the terms of the modified BSD license
|
| 11 | Copyright (c) 2001, Alexander Tokarev
|
| 12 | All rights reserved.
|
| 13 |
|
| 14 | Redistribution and use in source and binary forms, with or without
|
| 15 | modification, are permitted provided that the following conditions are met:
|
| 16 |
|
| 17 | * Redistributions of source code must retain the above copyright notice,
|
| 18 | this list of conditions and the following disclaimer.
|
| 19 | * Redistributions in binary form must reproduce the above copyright notice,
|
| 20 | this list of conditions and the following disclaimer in the documentation
|
| 21 | and/or other materials provided with the distribution.
|
| 22 | * Neither the name of the <ORGANIZATION> nor the names of its contributors
|
| 23 | may be used to endorse or promote products derived from this software without
|
| 24 | specific prior written permission.
|
| 25 |
|
| 26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
| 27 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
| 28 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
| 29 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
| 30 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
| 31 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
| 32 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
| 33 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
| 34 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 35 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 36 |
|
| 37 | The PCRE library is written by: Philip Hazel <ph10@cam.ac.uk>
|
| 38 | Copyright (c) 1997-2004 University of Cambridge
|
| 39 |
|
| 40 | AngelsHolocaust 4-11-04 updated to use version v5.0
|
| 41 | (INFO: this is regex-directed, NFA)
|
| 42 | AH: 9-11-04 - pcre_free: removed var, pcre already gives the ptr, now
|
| 43 | everything works as it should (no more crashes)
|
| 44 | -> removed CheckRegExp because pcre handles errors perfectly
|
| 45 | 10-11-04 - added pcError (errorhandling), pcInit
|
| 46 | 13-11-04 - removed the ErrorPos = 0 check -> always print erroroffset
|
| 47 | 17-10-05 - support for \1-\9 backreferences in TpcRegExp.GetReplStr
|
| 48 | 17-02-06 - added RunTimeOptions: caller can set options while searching
|
| 49 | 19-02-06 - added SearchOfs(): let PCRE use the complete string and offset
|
| 50 | into the string itself
|
| 51 | 20-12-06 - support for version 7.0
|
| 52 | 27.08.08 - support for v7.7
|
| 53 | }
|
| 54 |
|
| 55 | {$H+} {$DEFINE PCRE_3_7} {$DEFINE PCRE_5_0} {$DEFINE PCRE_7_0} {$DEFINE PCRE_7_7}
|
| 56 |
|
| 57 | Unit pcregexp;
|
| 58 |
|
| 59 | Interface
|
| 60 |
|
| 61 | uses objects;
|
| 62 |
|
| 63 | Type
|
| 64 | PpcRegExp = ^TpcRegExp;
|
| 65 | // TpcRegExp = object
|
| 66 | TpcRegExp = object(TObject)
|
| 67 | MatchesCount: integer;
|
| 68 | RegExpC, RegExpExt : Pointer;
|
| 69 | Matches:Pointer;
|
| 70 | RegExp: shortstring;
|
| 71 | SourceLen: integer;
|
| 72 | PartialMatch : boolean;
|
| 73 | Error : boolean;
|
| 74 | ErrorMsg : Pchar;
|
| 75 | ErrorPos : integer;
|
| 76 | RunTimeOptions: Integer; // options which can be set by the caller
|
| 77 | constructor Init(const ARegExp : shortstring; AOptions : integer; ALocale : Pointer);
|
| 78 | function Search(AStr: Pchar; ALen : longint) : boolean; virtual;
|
| 79 | function SearchNext( AStr: Pchar; ALen : longint) : boolean; virtual;
|
| 80 | function SearchOfs ( AStr: Pchar; ALen, AOfs : longint) : boolean; virtual;
|
| 81 | function MatchSub(ANom: integer; var Pos, Len : longint) : boolean; virtual;
|
| 82 | function MatchFull(var Pos, Len : longint) : boolean; virtual;
|
| 83 | function GetSubStr(ANom: integer; AStr: Pchar) : string; virtual;
|
| 84 | function GetFullStr(AStr: Pchar) : string; virtual;
|
| 85 | function GetReplStr(AStr: Pchar; const ARepl: string) : string; virtual;
|
| 86 | function GetPreSubStr(AStr: Pchar) : string; virtual;
|
| 87 | function GetPostSubStr(AStr: Pchar) : string; virtual;
|
| 88 | function ErrorStr : string; virtual;
|
| 89 | destructor Done; virtual;
|
| 90 | end;
|
| 91 |
|
| 92 | function pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer): Boolean;
|
| 93 | function pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer): string;
|
| 94 |
|
| 95 | function pcFastGrepMatch(WildCard, aStr: string): Boolean;
|
| 96 | function pcFastGrepSub(WildCard, aStr, aRepl: string): string;
|
| 97 |
|
| 98 | {$IFDEF PCRE_5_0}
|
| 99 | function pcGetVersion : pchar;
|
| 100 | {$ENDIF}
|
| 101 |
|
| 102 | function pcError (var pRegExp : Pointer) : Boolean;
|
| 103 | function pcInit (const Pattern: Shortstring; CaseSens: Boolean) : Pointer;
|
| 104 |
|
| 105 | Const { Options }
|
| 106 | PCRE_CASELESS = $0001;
|
| 107 | PCRE_MULTILINE = $0002;
|
| 108 | PCRE_DOTALL = $0004;
|
| 109 | PCRE_EXTENDED = $0008;
|
| 110 | PCRE_ANCHORED = $0010;
|
| 111 | PCRE_DOLLAR_ENDONLY = $0020;
|
| 112 | PCRE_EXTRA = $0040;
|
| 113 | PCRE_NOTBOL = $0080;
|
| 114 | PCRE_NOTEOL = $0100;
|
| 115 | PCRE_UNGREEDY = $0200;
|
| 116 | PCRE_NOTEMPTY = $0400;
|
| 117 | {$IFDEF PCRE_5_0}
|
| 118 | PCRE_UTF8 = $0800;
|
| 119 | PCRE_NO_AUTO_CAPTURE = $1000;
|
| 120 | PCRE_NO_UTF8_CHECK = $2000;
|
| 121 | PCRE_AUTO_CALLOUT = $4000;
|
| 122 | PCRE_PARTIAL = $8000;
|
| 123 | {$ENDIF}
|
| 124 | {$IFDEF PCRE_7_0}
|
| 125 | PCRE_DFA_SHORTEST = $00010000;
|
| 126 | PCRE_DFA_RESTART = $00020000;
|
| 127 | PCRE_FIRSTLINE = $00040000;
|
| 128 | PCRE_DUPNAMES = $00080000;
|
| 129 | PCRE_NEWLINE_CR = $00100000;
|
| 130 | PCRE_NEWLINE_LF = $00200000;
|
| 131 | PCRE_NEWLINE_CRLF = $00300000;
|
| 132 | PCRE_NEWLINE_ANY = $00400000;
|
| 133 | PCRE_NEWLINE_ANYCRLF = $00500000;
|
| 134 |
|
| 135 | PCRE_NEWLINE_BITS = PCRE_NEWLINE_CR or PCRE_NEWLINE_LF or PCRE_NEWLINE_ANY;
|
| 136 |
|
| 137 | {$ENDIF}
|
| 138 | {$IFDEF PCRE_7_7}
|
| 139 | PCRE_BSR_ANYCRLF = $00800000;
|
| 140 | PCRE_BSR_UNICODE = $01000000;
|
| 141 | PCRE_JAVASCRIPT_COMPAT= $02000000;
|
| 142 | {$ENDIF}
|
| 143 |
|
| 144 | PCRE_COMPILE_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_AUTO_CALLOUT + PCRE_CASELESS +
|
| 145 | PCRE_DOLLAR_ENDONLY + PCRE_DOTALL + PCRE_EXTENDED +
|
| 146 | PCRE_EXTRA + PCRE_MULTILINE + PCRE_NO_AUTO_CAPTURE +
|
| 147 | PCRE_UNGREEDY + PCRE_UTF8 + PCRE_NO_UTF8_CHECK
|
| 148 | {$IFDEF PCRE_7_0}
|
| 149 | + PCRE_DUPNAMES + PCRE_FIRSTLINE + PCRE_NEWLINE_BITS
|
| 150 | {$ENDIF}
|
| 151 | {$IFDEF PCRE_7_7}
|
| 152 | + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE + PCRE_JAVASCRIPT_COMPAT
|
| 153 | {$ENDIF}
|
| 154 | ;
|
| 155 |
|
| 156 | PCRE_EXEC_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_NOTBOL + PCRE_NOTEOL +
|
| 157 | PCRE_NOTEMPTY + PCRE_NO_UTF8_CHECK + PCRE_PARTIAL
|
| 158 | {$IFDEF PCRE_7_0}
|
| 159 | + PCRE_NEWLINE_BITS
|
| 160 | {$ENDIF}
|
| 161 | {$IFDEF PCRE_7_7}
|
| 162 | + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE
|
| 163 | {$ENDIF}
|
| 164 | ;
|
| 165 |
|
| 166 | {$IFDEF PCRE_7_0}
|
| 167 | PCRE_DFA_EXEC_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_NOTBOL + PCRE_NOTEOL +
|
| 168 | PCRE_NOTEMPTY + PCRE_NO_UTF8_CHECK + PCRE_PARTIAL +
|
| 169 | PCRE_DFA_SHORTEST + PCRE_DFA_RESTART +
|
| 170 | PCRE_NEWLINE_BITS
|
| 171 | {$IFDEF PCRE_7_7}
|
| 172 | + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE
|
| 173 | {$ENDIF}
|
| 174 | ;
|
| 175 | {$ENDIF}
|
| 176 |
|
| 177 | { Exec-time and get/set-time error codes }
|
| 178 | PCRE_ERROR_NOMATCH = -1;
|
| 179 | PCRE_ERROR_NULL = -2;
|
| 180 | PCRE_ERROR_BADOPTION = -3;
|
| 181 | PCRE_ERROR_BADMAGIC = -4;
|
| 182 | PCRE_ERROR_UNKNOWN_MODE = -5;
|
| 183 | PCRE_ERROR_NOMEMORY = -6;
|
| 184 | PCRE_ERROR_NOSUBSTRING = -7;
|
| 185 | {$IFDEF PCRE_5_0}
|
| 186 | PCRE_ERROR_MATCHLIMIT = -8;
|
| 187 | PCRE_ERROR_CALLOUT = -9; { Never used by PCRE itself }
|
| 188 | PCRE_ERROR_BADUTF8 = -10;
|
| 189 | PCRE_ERROR_BADUTF8_OFFSET = -11;
|
| 190 | PCRE_ERROR_PARTIAL = -12;
|
| 191 | PCRE_ERROR_BADPARTIAL = -13;
|
| 192 | PCRE_ERROR_INTERNAL = -14;
|
| 193 | PCRE_ERROR_BADCOUNT = -15;
|
| 194 | {$ENDIF}
|
| 195 | {$IFDEF PCRE_7_0}
|
| 196 | PCRE_ERROR_DFA_UITEM = -16;
|
| 197 | PCRE_ERROR_DFA_UCOND = -17;
|
| 198 | PCRE_ERROR_DFA_UMLIMIT = -18;
|
| 199 | PCRE_ERROR_DFA_WSSIZE = -19;
|
| 200 | PCRE_ERROR_DFA_RECURSE = -20;
|
| 201 | PCRE_ERROR_RECURSIONLIMIT = -21;
|
| 202 | PCRE_ERROR_NULLWSLIMIT = -22;
|
| 203 | PCRE_ERROR_BADNEWLINE = -23;
|
| 204 | {$ENDIF}
|
| 205 |
|
| 206 | { Request types for pcre_fullinfo() }
|
| 207 |
|
| 208 | PCRE_INFO_OPTIONS = 0;
|
| 209 | PCRE_INFO_SIZE = 1;
|
| 210 | PCRE_INFO_CAPTURECOUNT = 2;
|
| 211 | PCRE_INFO_BACKREFMAX = 3;
|
| 212 | PCRE_INFO_FIRSTBYTE = 4;
|
| 213 | PCRE_INFO_FIRSTCHAR = 4; { For backwards compatibility }
|
| 214 | PCRE_INFO_FIRSTTABLE = 5;
|
| 215 | {$IFDEF PCRE_5_0}
|
| 216 | PCRE_INFO_LASTLITERAL = 6;
|
| 217 | PCRE_INFO_NAMEENTRYSIZE = 7;
|
| 218 | PCRE_INFO_NAMECOUNT = 8;
|
| 219 | PCRE_INFO_NAMETABLE = 9;
|
| 220 | PCRE_INFO_STUDYSIZE = 10;
|
| 221 | PCRE_INFO_DEFAULT_TABLES = 11;
|
| 222 | {$ENDIF PCRE_5_0}
|
| 223 | {$IFDEF PCRE_7_7}
|
| 224 | PCRE_INFO_OKPARTIAL = 12;
|
| 225 | PCRE_INFO_JCHANGED = 13;
|
| 226 | PCRE_INFO_HASCRORLF = 14;
|
| 227 | {$ENDIF}
|
| 228 |
|
| 229 | { Request types for pcre_config() }
|
| 230 | {$IFDEF PCRE_5_0}
|
| 231 | PCRE_CONFIG_UTF8 = 0;
|
| 232 | PCRE_CONFIG_NEWLINE = 1;
|
| 233 | PCRE_CONFIG_LINK_SIZE = 2;
|
| 234 | PCRE_CONFIG_POSIX_MALLOC_THRESHOLD = 3;
|
| 235 | PCRE_CONFIG_MATCH_LIMIT = 4;
|
| 236 | PCRE_CONFIG_STACKRECURSE = 5;
|
| 237 | PCRE_CONFIG_UNICODE_PROPERTIES = 6;
|
| 238 | {$ENDIF PCRE_5_0}
|
| 239 | {$IFDEF PCRE_7_0}
|
| 240 | PCRE_CONFIG_MATCH_LIMIT_RECURSION = 7;
|
| 241 | {$ENDIF}
|
| 242 | {$IFDEF PCRE_7_7}
|
| 243 | PCRE_CONFIG_BSR = 8;
|
| 244 | {$ENDIF}
|
| 245 |
|
| 246 | { Bit flags for the pcre_extra structure }
|
| 247 | {$IFDEF PCRE_5_0}
|
| 248 | PCRE_EXTRA_STUDY_DATA = $0001;
|
| 249 | PCRE_EXTRA_MATCH_LIMIT = $0002;
|
| 250 | PCRE_EXTRA_CALLOUT_DATA = $0004;
|
| 251 | PCRE_EXTRA_TABLES = $0008;
|
| 252 | {$ENDIF PCRE_5_0}
|
| 253 | {$IFDEF PCRE_7_0}
|
| 254 | PCRE_EXTRA_MATCH_LIMIT_RECURSION = $0010;
|
| 255 | {$ENDIF}
|
| 256 |
|
| 257 | Const
|
| 258 | // DefaultOptions : integer = 0;
|
| 259 | DefaultLocaleTable : pointer = nil;
|
| 260 |
|
| 261 | {$IFDEF PCRE_5_0}
|
| 262 | { The structure for passing additional data to pcre_exec(). This is defined in
|
| 263 | such as way as to be extensible. Always add new fields at the end, in order to
|
| 264 | remain compatible. }
|
| 265 |
|
| 266 | type ppcre_extra = ^tpcre_extra;
|
| 267 | tpcre_extra = record
|
| 268 | flags : longint; { Bits for which fields are set }
|
| 269 | study_data : pointer; { Opaque data from pcre_study() }
|
| 270 | match_limit : longint; { Maximum number of calls to match() }
|
| 271 | callout_data : pointer; { Data passed back in callouts }
|
| 272 | tables : pointer; { Pointer to character tables }
|
| 273 | match_limit_recursion: longint; { Max recursive calls to match() }
|
| 274 | end;
|
| 275 |
|
| 276 | type ppcre_callout_block = ^pcre_callout_block;
|
| 277 | pcre_callout_block = record
|
| 278 | version,
|
| 279 | (* ------------------------ Version 0 ------------------------------- *)
|
| 280 | callout_number : integer;
|
| 281 | offset_vector : pointer;
|
| 282 | subject : pchar;
|
| 283 | subject_length, start_match, current_position, capture_top,
|
| 284 | capture_last : integer;
|
| 285 | callout_data : pointer;
|
| 286 | (* ------------------- Added for Version 1 -------------------------- *)
|
| 287 | pattern_position, next_item_length : integer;
|
| 288 | end;
|
| 289 | {$ENDIF PCRE_5_0}
|
| 290 |
|
| 291 | {$OrgName+}
|
| 292 | {$IFDEF VIRTUALPASCAL} {&Cdecl+} {$ENDIF VIRTUALPASCAL}
|
| 293 |
|
| 294 | { local replacement of external pcre memory management functions }
|
| 295 | function pcre_malloc( size : integer ) : pointer;
|
| 296 | procedure pcre_free( {var} p : pointer );
|
| 297 | {$IFDEF PCRE_5_0}
|
| 298 | const pcre_stack_malloc: function ( size : integer ): pointer = pcre_malloc;
|
| 299 | pcre_stack_free: procedure ( {var} p : pointer ) = pcre_free;
|
| 300 | function pcre_callout(var p : ppcre_callout_block) : integer;
|
| 301 | {$ENDIF PCRE_5_0}
|
| 302 | {$IFDEF VIRTUALPASCAL} {&Cdecl-} {$ENDIF VIRTUALPASCAL}
|
| 303 |
|
| 304 | Implementation
|
| 305 |
|
| 306 | Uses strings, collect, messages, dnapp, commands, advance0, stringsx
|
| 307 | {$IFDEF VIRTUALPASCAL} ,vpsyslow {$ENDIF VIRTUALPASCAL};
|
| 308 |
|
| 309 | Const
|
| 310 | MAGIC_NUMBER = $50435245; { 'PCRE' }
|
| 311 | MAX_MATCHES = 90; { changed in 3.5 version; should be divisible by 3, was 64}
|
| 312 |
|
| 313 | Type
|
| 314 | PMatchArray = ^TMatchArray;
|
| 315 | TMatchArray = array[0..( MAX_MATCHES * 3 )] of integer;
|
| 316 |
|
| 317 | PRegExpCollection = ^TRegExpCollection;
|
| 318 | TRegExpCollection = object(TSortedCollection)
|
| 319 | MaxRegExp : integer;
|
| 320 | SearchRegExp : shortstring;
|
| 321 | CompareModeInsert : boolean;
|
| 322 | constructor Init(AMaxRegExp:integer);
|
| 323 | procedure FreeItem(P: Pointer); virtual;
|
| 324 | function Compare(P1, P2: Pointer): Integer; virtual;
|
| 325 | function Find(ARegExp:shortstring;var P: PpcRegExp):boolean; virtual;
|
| 326 | function CheckNew(ARegExp:shortstring):PpcRegExp;virtual;
|
| 327 | end;
|
| 328 |
|
| 329 | Var
|
| 330 | PRegExpCache : PRegExpCollection;
|
| 331 |
|
| 332 |
|
| 333 | {$IFDEF VIRTUALPASCAL} {&Cdecl+} {$ENDIF VIRTUALPASCAL}
|
| 334 |
|
| 335 | { imported original pcre functions }
|
| 336 |
|
| 337 | function pcre_compile( const pattern : PChar; options : integer;
|
| 338 | var errorptr : PChar; var erroroffset : integer;
|
| 339 | const tables : PChar ) : pointer {pcre}; external;
|
| 340 | {$IFDEF PCRE_7_0}
|
| 341 | function pcre_compile2( const pattern : PChar; options : integer;
|
| 342 | var errorcodeptr : Integer;
|
| 343 | var errorptr : PChar; var erroroffset : integer;
|
| 344 | const tables : PChar ) : pointer {pcre}; external;
|
| 345 | {$ENDIF}
|
| 346 | {$IFDEF PCRE_5_0}
|
| 347 | function pcre_config( what : integer; where : pointer) : integer; external;
|
| 348 | function pcre_copy_named_substring( const code : pointer {pcre};
|
| 349 | const subject : pchar;
|
| 350 | var ovector : integer;
|
| 351 | stringcount : integer;
|
| 352 | const stringname : pchar;
|
| 353 | var buffer : pchar;
|
| 354 | size : integer) : integer; external;
|
| 355 | function pcre_copy_substring( const subject : pchar; var ovector : integer;
|
| 356 | stringcount, stringnumber : integer;
|
| 357 | var buffer : pchar; size : integer )
|
| 358 | : integer; external;
|
| 359 | function pcre_exec( const argument_re : pointer {pcre};
|
| 360 | const extra_data : pointer {pcre_extra};
|
| 361 | {$ELSE}
|
| 362 | function pcre_exec( const external_re : pointer;
|
| 363 | const external_extra : pointer;
|
| 364 | {$ENDIF}
|
| 365 | const subject : PChar;
|
| 366 | length, start_offset, options : integer;
|
| 367 | offsets : pointer;
|
| 368 | offsetcount : integer ) : integer; external;
|
| 369 | {$IFDEF PCRE_7_0}
|
| 370 | function pcre_dfa_exec( const argument_re : pointer {pcre};
|
| 371 | const extra_data : pointer {pcre_extra};
|
| 372 | const subject : pchar;
|
| 373 | length, start_offset, options : integer;
|
| 374 | offsets : pointer;
|
| 375 | offsetcount : integer;
|
| 376 | workspace : pointer;
|
| 377 | wscount : integer ) : integer; external;
|
| 378 | {$ENDIF}
|
| 379 | {$IFDEF PCRE_5_0}
|
| 380 | procedure pcre_free_substring( const p : pchar ); external;
|
| 381 | procedure pcre_free_substring_list( var p : pchar ); external;
|
| 382 | function pcre_fullinfo( const argument_re : pointer {pcre};
|
| 383 | const extra_data : pointer {pcre_extra};
|
| 384 | what : integer;
|
| 385 | where : pointer ) : integer; external;
|
| 386 | function pcre_get_named_substring( const code : pointer {pcre};
|
| 387 | const subject : pchar;
|
| 388 | var ovector : integer;
|
| 389 | stringcount : integer;
|
| 390 | const stringname : pchar;
|
| 391 | var stringptr : pchar ) : integer; external;
|
| 392 | function pcre_get_stringnumber( const code : pointer {pcre};
|
| 393 | const stringname : pchar ) : integer; external;
|
| 394 | function pcre_get_stringtable_entries( const code : pointer {pcre};
|
| 395 | const stringname : pchar;
|
| 396 | var firstptr,
|
| 397 | lastptr : pchar ) : integer; external;
|
| 398 | function pcre_get_substring( const subject : pchar; var ovector : integer;
|
| 399 | stringcount, stringnumber : integer;
|
| 400 | var stringptr : pchar ) : integer; external;
|
| 401 | function pcre_get_substring_list( const subject : pchar; var ovector : integer;
|
| 402 | stringcount : integer;
|
| 403 | listptr : pointer {const char ***listptr}) : integer; external;
|
| 404 | function pcre_info( const argument_re : pointer {pcre};
|
| 405 | var optptr : integer;
|
| 406 | var first_byte : integer ) : integer; external;
|
| 407 | function pcre_maketables : pchar; external;
|
| 408 | {$ENDIF}
|
| 409 | {$IFDEF PCRE_7_0}
|
| 410 | function pcre_refcount( const argument_re : pointer {pcre};
|
| 411 | adjust : integer ) : pchar; external;
|
| 412 | {$ENDIF}
|
| 413 | function pcre_study( const external_re : pointer {pcre};
|
| 414 | options : integer;
|
| 415 | var errorptr : PChar ) : pointer {pcre_extra}; external;
|
| 416 | {$IFDEF PCRE_5_0}
|
| 417 | function pcre_version : pchar; external;
|
| 418 | {$ENDIF}
|
| 419 |
|
| 420 | function pcre_malloc( size : integer ) : pointer;
|
| 421 | begin
|
| 422 | GetMem( result, size );
|
| 423 | end;
|
| 424 |
|
| 425 | procedure pcre_free( {var} p : pointer );
|
| 426 | begin
|
| 427 | if (p <> nil) then
|
| 428 | FreeMem( p, 0 );
|
| 429 | {@p := nil;}
|
| 430 | end;
|
| 431 |
|
| 432 | {$IFDEF PCRE_5_0}
|
| 433 | (* Called from PCRE as a result of the (?C) item. We print out where we are in
|
| 434 | the match. Yield zero unless more callouts than the fail count, or the callout
|
| 435 | data is not zero. *)
|
| 436 |
|
| 437 | function pcre_callout;
|
| 438 | begin
|
| 439 | end;
|
| 440 | {$ENDIF}
|
| 441 |
|
| 442 | {$IFDEF VIRTUALPASCAL} {&Cdecl-} {$ENDIF VIRTUALPASCAL}
|
| 443 |
|
| 444 | // Always include the newest version of the library
|
| 445 | {$IFDEF PCRE_7_7}
|
| 446 | {$L pcre77.lib}
|
| 447 | {$ELSE}
|
| 448 | {$IFDEF PCRE_7_0}
|
| 449 | {$L pcre70.lib}
|
| 450 | {$ELSE}
|
| 451 | {$IFDEF PCRE_5_0}
|
| 452 | {$L pcre50.lib}
|
| 453 | {$ELSE}
|
| 454 | {$IFDEF PCRE_3_7}
|
| 455 | {$L pcre37.lib}
|
| 456 | {$ENDIF PCRE_3_7}
|
| 457 | {$ENDIF PCRE_5_0}
|
| 458 | {$ENDIF PCRE_7_0}
|
| 459 | {$ENDIF PCRE_7_7}
|
| 460 |
|
| 461 | {TpcRegExp}
|
| 462 |
|
| 463 | constructor TpcRegExp.Init(const ARegExp:shortstring; AOptions:integer; ALocale : Pointer);
|
| 464 | var
|
| 465 | pRegExp : PChar;
|
| 466 | begin
|
| 467 | RegExp:=ARegExp;
|
| 468 | RegExpC:=nil;
|
| 469 | RegExpExt:=nil;
|
| 470 | Matches:=nil;
|
| 471 | MatchesCount:=0;
|
| 472 | Error:=true;
|
| 473 | ErrorMsg:=nil;
|
| 474 | ErrorPos:=0;
|
| 475 | RunTimeOptions := 0;
|
| 476 | if length(RegExp) < 255 then
|
| 477 | begin
|
| 478 | RegExp[length(RegExp)+1]:=#0;
|
| 479 | pRegExp:=@RegExp[1];
|
| 480 | end
|
| 481 | else
|
| 482 | begin
|
| 483 | GetMem(pRegExp,length(RegExp)+1);
|
| 484 | pRegExp:=strpcopy(pRegExp,RegExp);
|
| 485 | end;
|
| 486 | RegExpC := pcre_compile( pRegExp,
|
| 487 | AOptions and PCRE_COMPILE_ALLOWED_OPTIONS,
|
| 488 | ErrorMsg, ErrorPos, ALocale);
|
| 489 | if length(RegExp) = 255 then
|
| 490 | StrDispose(pRegExp);
|
| 491 | if RegExpC = nil then
|
| 492 | exit;
|
| 493 | ErrorMsg:=nil;
|
| 494 | RegExpExt := pcre_study( RegExpC, 0, ErrorMsg );
|
| 495 | if (RegExpExt = nil) and (ErrorMsg <> nil) then
|
| 496 | begin
|
| 497 | pcre_free(RegExpC);
|
| 498 | exit;
|
| 499 | end;
|
| 500 | GetMem(Matches,SizeOf(TMatchArray));
|
| 501 | Error:=false;
|
| 502 | end;
|
| 503 |
|
| 504 | destructor TpcRegExp.Done;
|
| 505 | begin
|
| 506 | if RegExpC <> nil then
|
| 507 | pcre_free(RegExpC);
|
| 508 | if RegExpExt <> nil then
|
| 509 | pcre_free(RegExpExt);
|
| 510 | if Matches <> nil then
|
| 511 | FreeMem(Matches,SizeOf(TMatchArray));
|
| 512 | end;
|
| 513 |
|
| 514 | function TpcRegExp.SearchNext( AStr: Pchar; ALen : longint ) : boolean;
|
| 515 | var Options: Integer;
|
| 516 | begin // must handle PCRE_ERROR_PARTIAL here
|
| 517 | Options := (RunTimeOptions or startup.MiscMultiData.cfgRegEx.DefaultOptions) and
|
| 518 | PCRE_EXEC_ALLOWED_OPTIONS;
|
| 519 | if MatchesCount > 0 then
|
| 520 | MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, PMatchArray(Matches)^[1],
|
| 521 | Options, Matches, MAX_MATCHES ) else
|
| 522 | MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, 0,
|
| 523 | Options, Matches, MAX_MATCHES );
|
| 524 | { if MatchesCount = 0 then
|
| 525 | MatchesCount := MatchesCount div 3;}
|
| 526 | PartialMatch := MatchesCount = PCRE_ERROR_PARTIAL;
|
| 527 | SearchNext := MatchesCount > 0;
|
| 528 | end;
|
| 529 |
|
| 530 | function TpcRegExp.Search( AStr: Pchar; ALen : longint):boolean;
|
| 531 | begin
|
| 532 | MatchesCount:=0;
|
| 533 | Search:=SearchNext(AStr,ALen);
|
| 534 | SourceLen:=ALen;
|
| 535 | end;
|
| 536 |
|
| 537 | function TpcRegExp.SearchOfs( AStr: Pchar; ALen, AOfs: longint ) : boolean;
|
| 538 | var Options: Integer;
|
| 539 | begin
|
| 540 | MatchesCount:=0;
|
| 541 | Options := (RunTimeOptions or startup.MiscMultiData.cfgRegEx.DefaultOptions) and
|
| 542 | PCRE_EXEC_ALLOWED_OPTIONS;
|
| 543 | MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, AOfs,
|
| 544 | Options, Matches, MAX_MATCHES );
|
| 545 | PartialMatch := MatchesCount = PCRE_ERROR_PARTIAL;
|
| 546 | SearchOfs := MatchesCount > 0;
|
| 547 | SourceLen := ALen-AOfs;
|
| 548 | end;
|
| 549 |
|
| 550 | function TpcRegExp.MatchSub(ANom:integer; var Pos,Len:longint):boolean;
|
| 551 | begin
|
| 552 | if (MatchesCount > 0) and (ANom <= (MatchesCount-1)) then
|
| 553 | begin
|
| 554 | ANom:=ANom*2;
|
| 555 | Pos:=PMatchArray(Matches)^[ANom];
|
| 556 | Len:=PMatchArray(Matches)^[ANom+1]-Pos;
|
| 557 | MatchSub:=true;
|
| 558 | end
|
| 559 | else
|
| 560 | MatchSub:=false;
|
| 561 | end;
|
| 562 |
|
| 563 | function TpcRegExp.MatchFull(var Pos,Len:longint):boolean;
|
| 564 | begin
|
| 565 | MatchFull:=MatchSub(0,Pos,Len);
|
| 566 | end;
|
| 567 |
|
| 568 | function TpcRegExp.GetSubStr(ANom: integer; AStr: Pchar):string;
|
| 569 | var
|
| 570 | s: ansistring;
|
| 571 | pos,len: longint;
|
| 572 | begin
|
| 573 | s:='';
|
| 574 | if MatchSub(ANom, pos, len) then
|
| 575 | begin
|
| 576 | setlength(s, len);
|
| 577 | Move(AStr[pos], s[1], len);
|
| 578 | end;
|
| 579 | GetSubStr:=s;
|
| 580 | end;
|
| 581 |
|
| 582 | function TpcRegExp.GetPreSubStr(AStr: Pchar):string;
|
| 583 | var
|
| 584 | s: ansistring;
|
| 585 | l: longint;
|
| 586 | begin
|
| 587 | s:='';
|
| 588 | if (MatchesCount > 0) then
|
| 589 | begin
|
| 590 | l:=PMatchArray(Matches)^[0]-1;
|
| 591 | if l > 0 then
|
| 592 | begin
|
| 593 | setlength(s,l);
|
| 594 | Move(AStr[1],s[1],l);
|
| 595 | end;
|
| 596 | end;
|
| 597 | GetPreSubStr:=s;
|
| 598 | end;
|
| 599 |
|
| 600 | function TpcRegExp.GetPostSubStr(AStr: Pchar):string;
|
| 601 | var
|
| 602 | s: ansistring;
|
| 603 | l: longint;
|
| 604 | ANom: integer;
|
| 605 | begin
|
| 606 | s:='';
|
| 607 | if (MatchesCount > 0) then
|
| 608 | begin
|
| 609 | ANom:=(MatchesCount-1){*2} shl 1;
|
| 610 | l:=SourceLen-PMatchArray(Matches)^[ANom+1]+1;
|
| 611 | if l > 0 then
|
| 612 | begin
|
| 613 | setlength(s,l);
|
| 614 | Move(AStr[PMatchArray(Matches)^[ANom+1]],s[1],l);
|
| 615 | end;
|
| 616 | end;
|
| 617 | GetPostSubStr:=s;
|
| 618 | end;
|
| 619 |
|
| 620 |
|
| 621 | function TpcRegExp.GetFullStr(AStr: Pchar):string;
|
| 622 | var
|
| 623 | s: ansistring;
|
| 624 | l: longint;
|
| 625 | begin
|
| 626 | GetFullStr:=GetSubStr(0,AStr);
|
| 627 | end;
|
| 628 |
|
| 629 | function TpcRegExp.GetReplStr(AStr: Pchar; const ARepl: string):string;
|
| 630 | var
|
| 631 | s: ansistring;
|
| 632 | l,i,lasti: longint;
|
| 633 | begin
|
| 634 | l:=length(ARepl);
|
| 635 | i:=1;
|
| 636 | lasti:=1;
|
| 637 | s:='';
|
| 638 | while i <= l do
|
| 639 | begin
|
| 640 | case ARepl[i] of
|
| 641 | '\' :
|
| 642 | begin
|
| 643 | if i < l then
|
| 644 | begin
|
| 645 | s:=s+copy(ARepl,lasti,i-lasti){+ARepl[i+1]};
|
| 646 | {AH 17-10-05 support for POSIX \1-\9 backreferences}
|
| 647 | case ARepl[i+1] of
|
| 648 | '0' : s:=s+GetFullStr(AStr);
|
| 649 | '1'..'9' : s:=s+GetSubStr(ord(ARepl[i+1])-ord('0'),AStr);
|
| 650 | else s:=s+ARepl[i+1]; // copy the escaped character
|
| 651 | end;
|
| 652 | end;
|
| 653 | inc(i);
|
| 654 | lasti:=i+1;
|
| 655 | end;
|
| 656 | '$' :
|
| 657 | begin
|
| 658 | if i < l then
|
| 659 | begin
|
| 660 | s:=s+copy(ARepl,lasti,i-lasti);
|
| 661 | case ARepl[i+1] of
|
| 662 | '&' : s:=s+GetFullStr(AStr);
|
| 663 | '1'..'9' : s:=s+GetSubStr(ord(ARepl[i+1])-ord('0'),AStr);
|
| 664 | '`' : s:=s+GetPreSubStr(AStr);
|
| 665 | #39 : s:=s+GetPostSubStr(AStr);
|
| 666 | end;
|
| 667 | end;
|
| 668 | inc(i);
|
| 669 | lasti:=i+1;
|
| 670 | end;
|
| 671 | end;
|
| 672 | inc(i);
|
| 673 | end;
|
| 674 | if lasti <= {AH 25-10-2004 added =, else l==1 won't work} l then
|
| 675 | s:=s+copy(ARepl,lasti,l-lasti+1);
|
| 676 | GetReplStr:=s;
|
| 677 | end;
|
| 678 |
|
| 679 | function TpcRegExp.ErrorStr:string;
|
| 680 | begin
|
| 681 | ErrorStr:=StrPas(ErrorMsg);
|
| 682 | end;
|
| 683 |
|
| 684 | {TRegExpCollection}
|
| 685 |
|
| 686 | constructor TRegExpCollection.Init(AMaxRegExp: integer);
|
| 687 | begin
|
| 688 | Inherited Init(1,1);
|
| 689 | MaxRegExp:=AMaxRegExp;
|
| 690 | CompareModeInsert:=true;
|
| 691 | end;
|
| 692 |
|
| 693 | procedure TRegExpCollection.FreeItem(P: Pointer);
|
| 694 | begin
|
| 695 | if P <> nil then
|
| 696 | begin
|
| 697 | Dispose(PpcRegExp(P),Done);
|
| 698 | end;
|
| 699 | end;
|
| 700 |
|
| 701 | function TRegExpCollection.Compare(P1, P2: Pointer): Integer;
|
| 702 | //var
|
| 703 | // l,l1,l2,i : byte;
|
| 704 | //// wPos: pchar;
|
| 705 | begin
|
| 706 | if CompareModeInsert then
|
| 707 | begin
|
| 708 | // l1:=length(PpcRegExp(P1)^.RegExp);
|
| 709 | // l2:=length(PpcRegExp(P2)^.RegExp);
|
| 710 | // if l1 > l2 then l:=l2 else
|
| 711 | // l:=l1;
|
| 712 | // for i:=1 to l do
|
| 713 | // if PpcRegExp(P1).RegExp[i] <> PpcRegExp(P2).RegExp[i] then break;
|
| 714 | // if i <=l then
|
| 715 | // Compare:=ord(PpcRegExp(P1).RegExp[i])-ord(PpcRegExp(P2).RegExp[i]) else
|
| 716 | // Compare:=l1-l2;
|
| 717 | Compare := stringsx.PasStrCmp(PpcRegExp(P1).RegExp, PpcRegExp(P2).RegExp, False);
|
| 718 | end
|
| 719 | else
|
| 720 | begin
|
| 721 | // l1:=length(PpcRegExp(P1)^.RegExp);
|
| 722 | // l2:=length(SearchRegExp);
|
| 723 | // if l1 > l2 then l:=l2 else
|
| 724 | // l:=l1;
|
| 725 | // for i:=1 to l do
|
| 726 | // if PpcRegExp(P1).RegExp[i] <> SearchRegExp[i] then
|
| 727 | // begin
|
| 728 | // Compare:=ord(PpcRegExp(P1).RegExp[i])-ord(SearchRegExp[i]);
|
| 729 | // break;
|
| 730 | // end;
|
| 731 | // if i > l then Compare:=l1-l2;
|
| 732 | Compare := stringsx.PasStrCmp(PpcRegExp(P1).RegExp, SearchRegExp, False);
|
| 733 | end;
|
| 734 | end;
|
| 735 |
|
| 736 | function TRegExpCollection.Find(ARegExp:shortstring;var P: PpcRegExp):boolean;
|
| 737 | var I : integer;
|
| 738 | begin
|
| 739 | CompareModeInsert:=false;
|
| 740 | SearchRegExp:=ARegExp;
|
| 741 | if Search(nil,I) then
|
| 742 | begin
|
| 743 | P:=PpcRegExp(At(I));
|
| 744 | Find:=true;
|
| 745 | end
|
| 746 | else
|
| 747 | begin
|
| 748 | P:=nil;
|
| 749 | Find:=false;
|
| 750 | end;
|
| 751 | CompareModeInsert:=true;
|
| 752 | end;
|
| 753 |
|
| 754 | function TRegExpCollection.CheckNew(ARegExp:shortstring):PpcRegExp;
|
| 755 | var
|
| 756 | P : PpcRegExp;
|
| 757 | begin
|
| 758 | if not Find(ARegExp,P) then
|
| 759 | begin
|
| 760 | if Count = MaxRegExp then
|
| 761 | AtFree(0);
|
| 762 | P:=New(ppcRegExp,Init(ARegExp,PCRE_CASELESS,nil));
|
| 763 | Insert(P);
|
| 764 | end;
|
| 765 | CheckNew:=P;
|
| 766 | end;
|
| 767 |
|
| 768 | function pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer): Boolean;
|
| 769 | var
|
| 770 | PpcRE:PpcRegExp;
|
| 771 | begin
|
| 772 | PpcRE:=New(ppcRegExp,Init(WildCard,AOptions,Alocale));
|
| 773 | pcGrepMatch:=PpcRE^.Search(pchar(AStr),Length(AStr));
|
| 774 | Dispose(PpcRE,Done);
|
| 775 | end;
|
| 776 |
|
| 777 | function pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer): string;
|
| 778 | var
|
| 779 | PpcRE:PpcRegExp;
|
| 780 | begin
|
| 781 | PpcRE:=New(ppcRegExp,Init(WildCard,AOptions,Alocale));
|
| 782 | if PpcRE^.Search(pchar(AStr),Length(AStr)) then
|
| 783 | pcGrepSub:=PpcRE^.GetReplStr(pchar(AStr),ARepl)
|
| 784 | else
|
| 785 | pcGrepSub:='';
|
| 786 | Dispose(PpcRE,Done);
|
| 787 | end;
|
| 788 |
|
| 789 | function pcFastGrepMatch(WildCard, aStr: string): Boolean;
|
| 790 | var
|
| 791 | PpcRE:PpcRegExp;
|
| 792 | begin
|
| 793 | PpcRE:=PRegExpCache^.CheckNew(WildCard);
|
| 794 | pcFastGrepMatch:=PpcRE^.Search(pchar(AStr),Length(AStr));
|
| 795 | end;
|
| 796 |
|
| 797 | function pcFastGrepSub(WildCard, aStr, aRepl: string): string;
|
| 798 | var
|
| 799 | PpcRE:PpcRegExp;
|
| 800 | begin
|
| 801 | PpcRE:=PRegExpCache^.CheckNew(WildCard);
|
| 802 | if PpcRE^.Search(pchar(AStr),Length(AStr)) then
|
| 803 | pcFastGrepSub:=PpcRE^.GetReplStr(pchar(AStr),ARepl)
|
| 804 | else
|
| 805 | pcFastGrepSub:='';
|
| 806 | end;
|
| 807 |
|
| 808 | {$IFDEF PCRE_5_0}
|
| 809 | function pcGetVersion : pchar; assembler; {$FRAME-}{$USES none}
|
| 810 | asm
|
| 811 | call pcre_version
|
| 812 | end;
|
| 813 | {$ENDIF PCRE_5_0}
|
| 814 |
|
| 815 | function pcError;
|
| 816 | var P: ppcRegExp absolute pRegExp;
|
| 817 | begin
|
| 818 | Result := (P = nil) or P^.Error;
|
| 819 | If Result and (P <> nil) then
|
| 820 | begin
|
| 821 | { if P^.ErrorPos = 0 then
|
| 822 | MessageBox(GetString(erRegExpCompile)+'"'+P^.ErrorStr+'"', nil,mfConfirmation+mfOkButton)
|
| 823 | else}
|
| 824 | MessageBox(GetString(erRegExpCompile)+'"'+P^.ErrorStr+'"'+GetString(erRegExpCompPos),
|
| 825 | @P^.ErrorPos,mfConfirmation+mfOkButton);
|
| 826 | Dispose(P, Done);
|
| 827 | P:=nil;
|
| 828 | end;
|
| 829 | end;
|
| 830 |
|
| 831 | function pcInit;
|
| 832 | var Options : Integer;
|
| 833 | begin
|
| 834 | If CaseSens then Options := 0 else Options := PCRE_CASELESS;
|
| 835 | Result := New( PpcRegExp, Init( Pattern,
|
| 836 | {DefaultOptions}
|
| 837 | startup.MiscMultiData.cfgRegEx.DefaultOptions or Options,
|
| 838 | DefaultLocaleTable) );
|
| 839 | end;
|
| 840 |
|
| 841 | Initialization
|
| 842 | PRegExpCache:=New(PRegExpCollection,Init(64));
|
| 843 | Finalization
|
| 844 | Dispose(PRegExpCache,Done);
|
| 845 | End.
|