blob: 74a37c2ddf163039ea7c45c5d4e498bbeb2b4cb8 [file] [log] [blame]
Tristan Matthews04616462013-11-14 16:09:34 -05001/*************************************************
2* pcregrep program *
3*************************************************/
4
5/* This is a grep program that uses the PCRE regular expression library to do
6its pattern matching. On a Unix or Win32 system it can recurse into
7directories.
8
9 Copyright (c) 1997-2011 University of Cambridge
10
11-----------------------------------------------------------------------------
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36POSSIBILITY OF SUCH DAMAGE.
37-----------------------------------------------------------------------------
38*/
39
40#ifdef HAVE_CONFIG_H
41#include "config.h"
42#endif
43
44#include <ctype.h>
45#include <locale.h>
46#include <stdio.h>
47#include <string.h>
48#include <stdlib.h>
49#include <errno.h>
50
51#include <sys/types.h>
52#include <sys/stat.h>
53
54#ifdef HAVE_UNISTD_H
55#include <unistd.h>
56#endif
57
58#ifdef SUPPORT_LIBZ
59#include <zlib.h>
60#endif
61
62#ifdef SUPPORT_LIBBZ2
63#include <bzlib.h>
64#endif
65
66#include "pcre.h"
67
68#define FALSE 0
69#define TRUE 1
70
71typedef int BOOL;
72
73#define MAX_PATTERN_COUNT 100
74#define OFFSET_SIZE 99
75
76#if BUFSIZ > 8192
77#define PATBUFSIZE BUFSIZ
78#else
79#define PATBUFSIZE 8192
80#endif
81
82/* Values for the "filenames" variable, which specifies options for file name
83output. The order is important; it is assumed that a file name is wanted for
84all values greater than FN_DEFAULT. */
85
86enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88/* File reading styles */
89
90enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92/* Actions for the -d and -D options */
93
94enum { dee_READ, dee_SKIP, dee_RECURSE };
95enum { DEE_READ, DEE_SKIP };
96
97/* Actions for special processing options (flag bits) */
98
99#define PO_WORD_MATCH 0x0001
100#define PO_LINE_MATCH 0x0002
101#define PO_FIXED_STRINGS 0x0004
102
103/* Line ending types */
104
105enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107/* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108environments), a warning is issued if the value of fwrite() is ignored.
109Unfortunately, casting to (void) does not suppress the warning. To get round
110this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111apply to fprintf(). */
112
113#define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117/*************************************************
118* Global variables *
119*************************************************/
120
121/* Jeffrey Friedl has some debugging requirements that are not part of the
122regular code. */
123
124#ifdef JFRIEDL_DEBUG
125static int S_arg = -1;
126static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128static const char *jfriedl_prefix = "";
129static const char *jfriedl_postfix = "";
130#endif
131
132static int endlinetype;
133
134static char *colour_string = (char *)"1;31";
135static char *colour_option = NULL;
136static char *dee_option = NULL;
137static char *DEE_option = NULL;
138static char *main_buffer = NULL;
139static char *newline = NULL;
140static char *pattern_filename = NULL;
141static char *stdin_name = (char *)"(standard input)";
142static char *locale = NULL;
143
144static const unsigned char *pcretables = NULL;
145
146static int pattern_count = 0;
147static pcre **pattern_list = NULL;
148static pcre_extra **hints_list = NULL;
149
150static char *include_pattern = NULL;
151static char *exclude_pattern = NULL;
152static char *include_dir_pattern = NULL;
153static char *exclude_dir_pattern = NULL;
154
155static pcre *include_compiled = NULL;
156static pcre *exclude_compiled = NULL;
157static pcre *include_dir_compiled = NULL;
158static pcre *exclude_dir_compiled = NULL;
159
160static int after_context = 0;
161static int before_context = 0;
162static int both_context = 0;
163static int bufthird = PCREGREP_BUFSIZE;
164static int bufsize = 3*PCREGREP_BUFSIZE;
165static int dee_action = dee_READ;
166static int DEE_action = DEE_READ;
167static int error_count = 0;
168static int filenames = FN_DEFAULT;
169static int only_matching = -1;
170static int process_options = 0;
171
172#ifdef SUPPORT_PCREGREP_JIT
173static int study_options = PCRE_STUDY_JIT_COMPILE;
174#else
175static int study_options = 0;
176#endif
177
178static unsigned long int match_limit = 0;
179static unsigned long int match_limit_recursion = 0;
180
181static BOOL count_only = FALSE;
182static BOOL do_colour = FALSE;
183static BOOL file_offsets = FALSE;
184static BOOL hyphenpending = FALSE;
185static BOOL invert = FALSE;
186static BOOL line_buffered = FALSE;
187static BOOL line_offsets = FALSE;
188static BOOL multiline = FALSE;
189static BOOL number = FALSE;
190static BOOL omit_zero_count = FALSE;
191static BOOL resource_error = FALSE;
192static BOOL quiet = FALSE;
193static BOOL silent = FALSE;
194static BOOL utf8 = FALSE;
195
196/* Structure for options and list of them */
197
198enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
199 OP_OP_NUMBER, OP_PATLIST };
200
201typedef struct option_item {
202 int type;
203 int one_char;
204 void *dataptr;
205 const char *long_name;
206 const char *help_text;
207} option_item;
208
209/* Options without a single-letter equivalent get a negative value. This can be
210used to identify them. */
211
212#define N_COLOUR (-1)
213#define N_EXCLUDE (-2)
214#define N_EXCLUDE_DIR (-3)
215#define N_HELP (-4)
216#define N_INCLUDE (-5)
217#define N_INCLUDE_DIR (-6)
218#define N_LABEL (-7)
219#define N_LOCALE (-8)
220#define N_NULL (-9)
221#define N_LOFFSETS (-10)
222#define N_FOFFSETS (-11)
223#define N_LBUFFER (-12)
224#define N_M_LIMIT (-13)
225#define N_M_LIMIT_REC (-14)
226#define N_BUFSIZE (-15)
227#define N_NOJIT (-16)
228
229static option_item optionlist[] = {
230 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
231 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
232 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
233 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
234 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
235 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
236 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
237 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
238 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
239 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
240 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
241 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
242 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
243 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
244 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
245 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
246 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
247 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
248#ifdef SUPPORT_PCREGREP_JIT
249 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
250#else
251 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
252#endif
253 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
254 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
255 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
256 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
257 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
258 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
259 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
260 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
261 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
262 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
263 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
264 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
265 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
266 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
267 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
268 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
269 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
270 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
271
272 /* These two were accidentally implemented with underscores instead of
273 hyphens in the option names. As this was not discovered for several releases,
274 the incorrect versions are left in the table for compatibility. However, the
275 --help function misses out any option that has an underscore in its name. */
276
277 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
278 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
279
280#ifdef JFRIEDL_DEBUG
281 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
282#endif
283 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
284 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
285 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
286 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
287 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
288 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
289 { OP_NODATA, 0, NULL, NULL, NULL }
290};
291
292/* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
293options. These set the 1, 2, and 4 bits in process_options, respectively. Note
294that the combination of -w and -x has the same effect as -x on its own, so we
295can treat them as the same. */
296
297static const char *prefix[] = {
298 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
299
300static const char *suffix[] = {
301 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
302
303/* UTF-8 tables - used only when the newline setting is "any". */
304
305const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
306
307const char utf8_table4[] = {
308 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
309 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
310 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
311 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
312
313
314
315/*************************************************
316* Exit from the program *
317*************************************************/
318
319/* If there has been a resource error, give a suitable message.
320
321Argument: the return code
322Returns: does not return
323*/
324
325static void
326pcregrep_exit(int rc)
327{
328if (resource_error)
329 {
330 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
331 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
332 PCRE_ERROR_JIT_STACKLIMIT);
333 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
334 }
335
336exit(rc);
337}
338
339
340/*************************************************
341* OS-specific functions *
342*************************************************/
343
344/* These functions are defined so that they can be made system specific,
345although at present the only ones are for Unix, Win32, and for "no support". */
346
347
348/************* Directory scanning in Unix ***********/
349
350#if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
351#include <sys/types.h>
352#include <sys/stat.h>
353#include <dirent.h>
354
355typedef DIR directory_type;
356
357static int
358isdirectory(char *filename)
359{
360struct stat statbuf;
361if (stat(filename, &statbuf) < 0)
362 return 0; /* In the expectation that opening as a file will fail */
363return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
364}
365
366static directory_type *
367opendirectory(char *filename)
368{
369return opendir(filename);
370}
371
372static char *
373readdirectory(directory_type *dir)
374{
375for (;;)
376 {
377 struct dirent *dent = readdir(dir);
378 if (dent == NULL) return NULL;
379 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
380 return dent->d_name;
381 }
382/* Control never reaches here */
383}
384
385static void
386closedirectory(directory_type *dir)
387{
388closedir(dir);
389}
390
391
392/************* Test for regular file in Unix **********/
393
394static int
395isregfile(char *filename)
396{
397struct stat statbuf;
398if (stat(filename, &statbuf) < 0)
399 return 1; /* In the expectation that opening as a file will fail */
400return (statbuf.st_mode & S_IFMT) == S_IFREG;
401}
402
403
404/************* Test for a terminal in Unix **********/
405
406static BOOL
407is_stdout_tty(void)
408{
409return isatty(fileno(stdout));
410}
411
412static BOOL
413is_file_tty(FILE *f)
414{
415return isatty(fileno(f));
416}
417
418
419/************* Directory scanning in Win32 ***********/
420
421/* I (Philip Hazel) have no means of testing this code. It was contributed by
422Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
423when it did not exist. David Byron added a patch that moved the #include of
424<windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
425The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
426undefined when it is indeed undefined. */
427
428#elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
429
430#ifndef STRICT
431# define STRICT
432#endif
433#ifndef WIN32_LEAN_AND_MEAN
434# define WIN32_LEAN_AND_MEAN
435#endif
436
437#include <windows.h>
438
439#ifndef INVALID_FILE_ATTRIBUTES
440#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
441#endif
442
443typedef struct directory_type
444{
445HANDLE handle;
446BOOL first;
447WIN32_FIND_DATA data;
448} directory_type;
449
450int
451isdirectory(char *filename)
452{
453DWORD attr = GetFileAttributes(filename);
454if (attr == INVALID_FILE_ATTRIBUTES)
455 return 0;
456return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
457}
458
459directory_type *
460opendirectory(char *filename)
461{
462size_t len;
463char *pattern;
464directory_type *dir;
465DWORD err;
466len = strlen(filename);
467pattern = (char *) malloc(len + 3);
468dir = (directory_type *) malloc(sizeof(*dir));
469if ((pattern == NULL) || (dir == NULL))
470 {
471 fprintf(stderr, "pcregrep: malloc failed\n");
472 pcregrep_exit(2);
473 }
474memcpy(pattern, filename, len);
475memcpy(&(pattern[len]), "\\*", 3);
476dir->handle = FindFirstFile(pattern, &(dir->data));
477if (dir->handle != INVALID_HANDLE_VALUE)
478 {
479 free(pattern);
480 dir->first = TRUE;
481 return dir;
482 }
483err = GetLastError();
484free(pattern);
485free(dir);
486errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
487return NULL;
488}
489
490char *
491readdirectory(directory_type *dir)
492{
493for (;;)
494 {
495 if (!dir->first)
496 {
497 if (!FindNextFile(dir->handle, &(dir->data)))
498 return NULL;
499 }
500 else
501 {
502 dir->first = FALSE;
503 }
504 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
505 return dir->data.cFileName;
506 }
507#ifndef _MSC_VER
508return NULL; /* Keep compiler happy; never executed */
509#endif
510}
511
512void
513closedirectory(directory_type *dir)
514{
515FindClose(dir->handle);
516free(dir);
517}
518
519
520/************* Test for regular file in Win32 **********/
521
522/* I don't know how to do this, or if it can be done; assume all paths are
523regular if they are not directories. */
524
525int isregfile(char *filename)
526{
527return !isdirectory(filename);
528}
529
530
531/************* Test for a terminal in Win32 **********/
532
533/* I don't know how to do this; assume never */
534
535static BOOL
536is_stdout_tty(void)
537{
538return FALSE;
539}
540
541static BOOL
542is_file_tty(FILE *f)
543{
544return FALSE;
545}
546
547
548/************* Directory scanning when we can't do it ***********/
549
550/* The type is void, and apart from isdirectory(), the functions do nothing. */
551
552#else
553
554typedef void directory_type;
555
556int isdirectory(char *filename) { return 0; }
557directory_type * opendirectory(char *filename) { return (directory_type*)0;}
558char *readdirectory(directory_type *dir) { return (char*)0;}
559void closedirectory(directory_type *dir) {}
560
561
562/************* Test for regular when we can't do it **********/
563
564/* Assume all files are regular. */
565
566int isregfile(char *filename) { return 1; }
567
568
569/************* Test for a terminal when we can't do it **********/
570
571static BOOL
572is_stdout_tty(void)
573{
574return FALSE;
575}
576
577static BOOL
578is_file_tty(FILE *f)
579{
580return FALSE;
581}
582
583#endif
584
585
586
587#ifndef HAVE_STRERROR
588/*************************************************
589* Provide strerror() for non-ANSI libraries *
590*************************************************/
591
592/* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
593in their libraries, but can provide the same facility by this simple
594alternative function. */
595
596extern int sys_nerr;
597extern char *sys_errlist[];
598
599char *
600strerror(int n)
601{
602if (n < 0 || n >= sys_nerr) return "unknown error number";
603return sys_errlist[n];
604}
605#endif /* HAVE_STRERROR */
606
607
608
609/*************************************************
610* Read one line of input *
611*************************************************/
612
613/* Normally, input is read using fread() into a large buffer, so many lines may
614be read at once. However, doing this for tty input means that no output appears
615until a lot of input has been typed. Instead, tty input is handled line by
616line. We cannot use fgets() for this, because it does not stop at a binary
617zero, and therefore there is no way of telling how many characters it has read,
618because there may be binary zeros embedded in the data.
619
620Arguments:
621 buffer the buffer to read into
622 length the maximum number of characters to read
623 f the file
624
625Returns: the number of characters read, zero at end of file
626*/
627
628static int
629read_one_line(char *buffer, int length, FILE *f)
630{
631int c;
632int yield = 0;
633while ((c = fgetc(f)) != EOF)
634 {
635 buffer[yield++] = c;
636 if (c == '\n' || yield >= length) break;
637 }
638return yield;
639}
640
641
642
643/*************************************************
644* Find end of line *
645*************************************************/
646
647/* The length of the endline sequence that is found is set via lenptr. This may
648be zero at the very end of the file if there is no line-ending sequence there.
649
650Arguments:
651 p current position in line
652 endptr end of available data
653 lenptr where to put the length of the eol sequence
654
655Returns: pointer after the last byte of the line,
656 including the newline byte(s)
657*/
658
659static char *
660end_of_line(char *p, char *endptr, int *lenptr)
661{
662switch(endlinetype)
663 {
664 default: /* Just in case */
665 case EL_LF:
666 while (p < endptr && *p != '\n') p++;
667 if (p < endptr)
668 {
669 *lenptr = 1;
670 return p + 1;
671 }
672 *lenptr = 0;
673 return endptr;
674
675 case EL_CR:
676 while (p < endptr && *p != '\r') p++;
677 if (p < endptr)
678 {
679 *lenptr = 1;
680 return p + 1;
681 }
682 *lenptr = 0;
683 return endptr;
684
685 case EL_CRLF:
686 for (;;)
687 {
688 while (p < endptr && *p != '\r') p++;
689 if (++p >= endptr)
690 {
691 *lenptr = 0;
692 return endptr;
693 }
694 if (*p == '\n')
695 {
696 *lenptr = 2;
697 return p + 1;
698 }
699 }
700 break;
701
702 case EL_ANYCRLF:
703 while (p < endptr)
704 {
705 int extra = 0;
706 register int c = *((unsigned char *)p);
707
708 if (utf8 && c >= 0xc0)
709 {
710 int gcii, gcss;
711 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
712 gcss = 6*extra;
713 c = (c & utf8_table3[extra]) << gcss;
714 for (gcii = 1; gcii <= extra; gcii++)
715 {
716 gcss -= 6;
717 c |= (p[gcii] & 0x3f) << gcss;
718 }
719 }
720
721 p += 1 + extra;
722
723 switch (c)
724 {
725 case 0x0a: /* LF */
726 *lenptr = 1;
727 return p;
728
729 case 0x0d: /* CR */
730 if (p < endptr && *p == 0x0a)
731 {
732 *lenptr = 2;
733 p++;
734 }
735 else *lenptr = 1;
736 return p;
737
738 default:
739 break;
740 }
741 } /* End of loop for ANYCRLF case */
742
743 *lenptr = 0; /* Must have hit the end */
744 return endptr;
745
746 case EL_ANY:
747 while (p < endptr)
748 {
749 int extra = 0;
750 register int c = *((unsigned char *)p);
751
752 if (utf8 && c >= 0xc0)
753 {
754 int gcii, gcss;
755 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
756 gcss = 6*extra;
757 c = (c & utf8_table3[extra]) << gcss;
758 for (gcii = 1; gcii <= extra; gcii++)
759 {
760 gcss -= 6;
761 c |= (p[gcii] & 0x3f) << gcss;
762 }
763 }
764
765 p += 1 + extra;
766
767 switch (c)
768 {
769 case 0x0a: /* LF */
770 case 0x0b: /* VT */
771 case 0x0c: /* FF */
772 *lenptr = 1;
773 return p;
774
775 case 0x0d: /* CR */
776 if (p < endptr && *p == 0x0a)
777 {
778 *lenptr = 2;
779 p++;
780 }
781 else *lenptr = 1;
782 return p;
783
784 case 0x85: /* NEL */
785 *lenptr = utf8? 2 : 1;
786 return p;
787
788 case 0x2028: /* LS */
789 case 0x2029: /* PS */
790 *lenptr = 3;
791 return p;
792
793 default:
794 break;
795 }
796 } /* End of loop for ANY case */
797
798 *lenptr = 0; /* Must have hit the end */
799 return endptr;
800 } /* End of overall switch */
801}
802
803
804
805/*************************************************
806* Find start of previous line *
807*************************************************/
808
809/* This is called when looking back for before lines to print.
810
811Arguments:
812 p start of the subsequent line
813 startptr start of available data
814
815Returns: pointer to the start of the previous line
816*/
817
818static char *
819previous_line(char *p, char *startptr)
820{
821switch(endlinetype)
822 {
823 default: /* Just in case */
824 case EL_LF:
825 p--;
826 while (p > startptr && p[-1] != '\n') p--;
827 return p;
828
829 case EL_CR:
830 p--;
831 while (p > startptr && p[-1] != '\n') p--;
832 return p;
833
834 case EL_CRLF:
835 for (;;)
836 {
837 p -= 2;
838 while (p > startptr && p[-1] != '\n') p--;
839 if (p <= startptr + 1 || p[-2] == '\r') return p;
840 }
841 return p; /* But control should never get here */
842
843 case EL_ANY:
844 case EL_ANYCRLF:
845 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
846 if (utf8) while ((*p & 0xc0) == 0x80) p--;
847
848 while (p > startptr)
849 {
850 register int c;
851 char *pp = p - 1;
852
853 if (utf8)
854 {
855 int extra = 0;
856 while ((*pp & 0xc0) == 0x80) pp--;
857 c = *((unsigned char *)pp);
858 if (c >= 0xc0)
859 {
860 int gcii, gcss;
861 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
862 gcss = 6*extra;
863 c = (c & utf8_table3[extra]) << gcss;
864 for (gcii = 1; gcii <= extra; gcii++)
865 {
866 gcss -= 6;
867 c |= (pp[gcii] & 0x3f) << gcss;
868 }
869 }
870 }
871 else c = *((unsigned char *)pp);
872
873 if (endlinetype == EL_ANYCRLF) switch (c)
874 {
875 case 0x0a: /* LF */
876 case 0x0d: /* CR */
877 return p;
878
879 default:
880 break;
881 }
882
883 else switch (c)
884 {
885 case 0x0a: /* LF */
886 case 0x0b: /* VT */
887 case 0x0c: /* FF */
888 case 0x0d: /* CR */
889 case 0x85: /* NEL */
890 case 0x2028: /* LS */
891 case 0x2029: /* PS */
892 return p;
893
894 default:
895 break;
896 }
897
898 p = pp; /* Back one character */
899 } /* End of loop for ANY case */
900
901 return startptr; /* Hit start of data */
902 } /* End of overall switch */
903}
904
905
906
907
908
909/*************************************************
910* Print the previous "after" lines *
911*************************************************/
912
913/* This is called if we are about to lose said lines because of buffer filling,
914and at the end of the file. The data in the line is written using fwrite() so
915that a binary zero does not terminate it.
916
917Arguments:
918 lastmatchnumber the number of the last matching line, plus one
919 lastmatchrestart where we restarted after the last match
920 endptr end of available data
921 printname filename for printing
922
923Returns: nothing
924*/
925
926static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
927 char *endptr, char *printname)
928{
929if (after_context > 0 && lastmatchnumber > 0)
930 {
931 int count = 0;
932 while (lastmatchrestart < endptr && count++ < after_context)
933 {
934 int ellength;
935 char *pp = lastmatchrestart;
936 if (printname != NULL) fprintf(stdout, "%s-", printname);
937 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
938 pp = end_of_line(pp, endptr, &ellength);
939 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
940 lastmatchrestart = pp;
941 }
942 hyphenpending = TRUE;
943 }
944}
945
946
947
948/*************************************************
949* Apply patterns to subject till one matches *
950*************************************************/
951
952/* This function is called to run through all patterns, looking for a match. It
953is used multiple times for the same subject when colouring is enabled, in order
954to find all possible matches.
955
956Arguments:
957 matchptr the start of the subject
958 length the length of the subject to match
959 startoffset where to start matching
960 offsets the offets vector to fill in
961 mrc address of where to put the result of pcre_exec()
962
963Returns: TRUE if there was a match
964 FALSE if there was no match
965 invert if there was a non-fatal error
966*/
967
968static BOOL
969match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
970 int *mrc)
971{
972int i;
973size_t slen = length;
974const char *msg = "this text:\n\n";
975if (slen > 200)
976 {
977 slen = 200;
978 msg = "text that starts:\n\n";
979 }
980for (i = 0; i < pattern_count; i++)
981 {
982 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
983 startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
984 if (*mrc >= 0) return TRUE;
985 if (*mrc == PCRE_ERROR_NOMATCH) continue;
986 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
987 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
988 fprintf(stderr, "%s", msg);
989 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
990 fprintf(stderr, "\n\n");
991 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
992 *mrc == PCRE_ERROR_JIT_STACKLIMIT)
993 resource_error = TRUE;
994 if (error_count++ > 20)
995 {
996 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
997 pcregrep_exit(2);
998 }
999 return invert; /* No more matching; don't show the line again */
1000 }
1001
1002return FALSE; /* No match, no errors */
1003}
1004
1005
1006
1007/*************************************************
1008* Grep an individual file *
1009*************************************************/
1010
1011/* This is called from grep_or_recurse() below. It uses a buffer that is three
1012times the value of bufthird. The matching point is never allowed to stray into
1013the top third of the buffer, thus keeping more of the file available for
1014context printing or for multiline scanning. For large files, the pointer will
1015be in the middle third most of the time, so the bottom third is available for
1016"before" context printing.
1017
1018Arguments:
1019 handle the fopened FILE stream for a normal file
1020 the gzFile pointer when reading is via libz
1021 the BZFILE pointer when reading is via libbz2
1022 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1023 filename the file name or NULL (for errors)
1024 printname the file name if it is to be printed for each match
1025 or NULL if the file name is not to be printed
1026 it cannot be NULL if filenames[_nomatch]_only is set
1027
1028Returns: 0 if there was at least one match
1029 1 otherwise (no matches)
1030 2 if an overlong line is encountered
1031 3 if there is a read error on a .bz2 file
1032*/
1033
1034static int
1035pcregrep(void *handle, int frtype, char *filename, char *printname)
1036{
1037int rc = 1;
1038int linenumber = 1;
1039int lastmatchnumber = 0;
1040int count = 0;
1041int filepos = 0;
1042int offsets[OFFSET_SIZE];
1043char *lastmatchrestart = NULL;
1044char *ptr = main_buffer;
1045char *endptr;
1046size_t bufflength;
1047BOOL endhyphenpending = FALSE;
1048BOOL input_line_buffered = line_buffered;
1049FILE *in = NULL; /* Ensure initialized */
1050
1051#ifdef SUPPORT_LIBZ
1052gzFile ingz = NULL;
1053#endif
1054
1055#ifdef SUPPORT_LIBBZ2
1056BZFILE *inbz2 = NULL;
1057#endif
1058
1059
1060/* Do the first read into the start of the buffer and set up the pointer to end
1061of what we have. In the case of libz, a non-zipped .gz file will be read as a
1062plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1063fail. */
1064
1065#ifdef SUPPORT_LIBZ
1066if (frtype == FR_LIBZ)
1067 {
1068 ingz = (gzFile)handle;
1069 bufflength = gzread (ingz, main_buffer, bufsize);
1070 }
1071else
1072#endif
1073
1074#ifdef SUPPORT_LIBBZ2
1075if (frtype == FR_LIBBZ2)
1076 {
1077 inbz2 = (BZFILE *)handle;
1078 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1079 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1080 } /* without the cast it is unsigned. */
1081else
1082#endif
1083
1084 {
1085 in = (FILE *)handle;
1086 if (is_file_tty(in)) input_line_buffered = TRUE;
1087 bufflength = input_line_buffered?
1088 read_one_line(main_buffer, bufsize, in) :
1089 fread(main_buffer, 1, bufsize, in);
1090 }
1091
1092endptr = main_buffer + bufflength;
1093
1094/* Loop while the current pointer is not at the end of the file. For large
1095files, endptr will be at the end of the buffer when we are in the middle of the
1096file, but ptr will never get there, because as soon as it gets over 2/3 of the
1097way, the buffer is shifted left and re-filled. */
1098
1099while (ptr < endptr)
1100 {
1101 int endlinelength;
1102 int mrc = 0;
1103 int startoffset = 0;
1104 BOOL match;
1105 char *matchptr = ptr;
1106 char *t = ptr;
1107 size_t length, linelength;
1108
1109 /* At this point, ptr is at the start of a line. We need to find the length
1110 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1111 length remainder of the data in the buffer. Otherwise, it is the length of
1112 the next line, excluding the terminating newline. After matching, we always
1113 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1114 option is used for compiling, so that any match is constrained to be in the
1115 first line. */
1116
1117 t = end_of_line(t, endptr, &endlinelength);
1118 linelength = t - ptr - endlinelength;
1119 length = multiline? (size_t)(endptr - ptr) : linelength;
1120
1121 /* Check to see if the line we are looking at extends right to the very end
1122 of the buffer without a line terminator. This means the line is too long to
1123 handle. */
1124
1125 if (endlinelength == 0 && t == main_buffer + bufsize)
1126 {
1127 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1128 "pcregrep: check the --buffer-size option\n",
1129 linenumber,
1130 (filename == NULL)? "" : " of file ",
1131 (filename == NULL)? "" : filename);
1132 return 2;
1133 }
1134
1135 /* Extra processing for Jeffrey Friedl's debugging. */
1136
1137#ifdef JFRIEDL_DEBUG
1138 if (jfriedl_XT || jfriedl_XR)
1139 {
1140 #include <sys/time.h>
1141 #include <time.h>
1142 struct timeval start_time, end_time;
1143 struct timezone dummy;
1144 int i;
1145
1146 if (jfriedl_XT)
1147 {
1148 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1149 const char *orig = ptr;
1150 ptr = malloc(newlen + 1);
1151 if (!ptr) {
1152 printf("out of memory");
1153 pcregrep_exit(2);
1154 }
1155 endptr = ptr;
1156 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1157 for (i = 0; i < jfriedl_XT; i++) {
1158 strncpy(endptr, orig, length);
1159 endptr += length;
1160 }
1161 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1162 length = newlen;
1163 }
1164
1165 if (gettimeofday(&start_time, &dummy) != 0)
1166 perror("bad gettimeofday");
1167
1168
1169 for (i = 0; i < jfriedl_XR; i++)
1170 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1171 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1172
1173 if (gettimeofday(&end_time, &dummy) != 0)
1174 perror("bad gettimeofday");
1175
1176 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1177 -
1178 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1179
1180 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1181 return 0;
1182 }
1183#endif
1184
1185 /* We come back here after a match when the -o option (only_matching) is set,
1186 in order to find any further matches in the same line. */
1187
1188 ONLY_MATCHING_RESTART:
1189
1190 /* Run through all the patterns until one matches or there is an error other
1191 than NOMATCH. This code is in a subroutine so that it can be re-used for
1192 finding subsequent matches when colouring matched lines. */
1193
1194 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1195
1196 /* If it's a match or a not-match (as required), do what's wanted. */
1197
1198 if (match != invert)
1199 {
1200 BOOL hyphenprinted = FALSE;
1201
1202 /* We've failed if we want a file that doesn't have any matches. */
1203
1204 if (filenames == FN_NOMATCH_ONLY) return 1;
1205
1206 /* Just count if just counting is wanted. */
1207
1208 if (count_only) count++;
1209
1210 /* If all we want is a file name, there is no need to scan any more lines
1211 in the file. */
1212
1213 else if (filenames == FN_MATCH_ONLY)
1214 {
1215 fprintf(stdout, "%s\n", printname);
1216 return 0;
1217 }
1218
1219 /* Likewise, if all we want is a yes/no answer. */
1220
1221 else if (quiet) return 0;
1222
1223 /* The --only-matching option prints just the substring that matched, or a
1224 captured portion of it, as long as this string is not empty, and the
1225 --file-offsets and --line-offsets options output offsets for the matching
1226 substring (they both force --only-matching = 0). None of these options
1227 prints any context. Afterwards, adjust the start and then jump back to look
1228 for further matches in the same line. If we are in invert mode, however,
1229 nothing is printed and we do not restart - this could still be useful
1230 because the return code is set. */
1231
1232 else if (only_matching >= 0)
1233 {
1234 if (!invert)
1235 {
1236 if (printname != NULL) fprintf(stdout, "%s:", printname);
1237 if (number) fprintf(stdout, "%d:", linenumber);
1238 if (line_offsets)
1239 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1240 offsets[1] - offsets[0]);
1241 else if (file_offsets)
1242 fprintf(stdout, "%d,%d\n",
1243 (int)(filepos + matchptr + offsets[0] - ptr),
1244 offsets[1] - offsets[0]);
1245 else if (only_matching < mrc)
1246 {
1247 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1248 if (plen > 0)
1249 {
1250 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1251 FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1252 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1253 fprintf(stdout, "\n");
1254 }
1255 }
1256 else if (printname != NULL || number) fprintf(stdout, "\n");
1257 match = FALSE;
1258 if (line_buffered) fflush(stdout);
1259 rc = 0; /* Had some success */
1260 startoffset = offsets[1]; /* Restart after the match */
1261 goto ONLY_MATCHING_RESTART;
1262 }
1263 }
1264
1265 /* This is the default case when none of the above options is set. We print
1266 the matching lines(s), possibly preceded and/or followed by other lines of
1267 context. */
1268
1269 else
1270 {
1271 /* See if there is a requirement to print some "after" lines from a
1272 previous match. We never print any overlaps. */
1273
1274 if (after_context > 0 && lastmatchnumber > 0)
1275 {
1276 int ellength;
1277 int linecount = 0;
1278 char *p = lastmatchrestart;
1279
1280 while (p < ptr && linecount < after_context)
1281 {
1282 p = end_of_line(p, ptr, &ellength);
1283 linecount++;
1284 }
1285
1286 /* It is important to advance lastmatchrestart during this printing so
1287 that it interacts correctly with any "before" printing below. Print
1288 each line's data using fwrite() in case there are binary zeroes. */
1289
1290 while (lastmatchrestart < p)
1291 {
1292 char *pp = lastmatchrestart;
1293 if (printname != NULL) fprintf(stdout, "%s-", printname);
1294 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1295 pp = end_of_line(pp, endptr, &ellength);
1296 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1297 lastmatchrestart = pp;
1298 }
1299 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1300 }
1301
1302 /* If there were non-contiguous lines printed above, insert hyphens. */
1303
1304 if (hyphenpending)
1305 {
1306 fprintf(stdout, "--\n");
1307 hyphenpending = FALSE;
1308 hyphenprinted = TRUE;
1309 }
1310
1311 /* See if there is a requirement to print some "before" lines for this
1312 match. Again, don't print overlaps. */
1313
1314 if (before_context > 0)
1315 {
1316 int linecount = 0;
1317 char *p = ptr;
1318
1319 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1320 linecount < before_context)
1321 {
1322 linecount++;
1323 p = previous_line(p, main_buffer);
1324 }
1325
1326 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1327 fprintf(stdout, "--\n");
1328
1329 while (p < ptr)
1330 {
1331 int ellength;
1332 char *pp = p;
1333 if (printname != NULL) fprintf(stdout, "%s-", printname);
1334 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1335 pp = end_of_line(pp, endptr, &ellength);
1336 FWRITE(p, 1, pp - p, stdout);
1337 p = pp;
1338 }
1339 }
1340
1341 /* Now print the matching line(s); ensure we set hyphenpending at the end
1342 of the file if any context lines are being output. */
1343
1344 if (after_context > 0 || before_context > 0)
1345 endhyphenpending = TRUE;
1346
1347 if (printname != NULL) fprintf(stdout, "%s:", printname);
1348 if (number) fprintf(stdout, "%d:", linenumber);
1349
1350 /* In multiline mode, we want to print to the end of the line in which
1351 the end of the matched string is found, so we adjust linelength and the
1352 line number appropriately, but only when there actually was a match
1353 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1354 the match will always be before the first newline sequence. */
1355
1356 if (multiline & !invert)
1357 {
1358 char *endmatch = ptr + offsets[1];
1359 t = ptr;
1360 while (t < endmatch)
1361 {
1362 t = end_of_line(t, endptr, &endlinelength);
1363 if (t < endmatch) linenumber++; else break;
1364 }
1365 linelength = t - ptr - endlinelength;
1366 }
1367
1368 /*** NOTE: Use only fwrite() to output the data line, so that binary
1369 zeroes are treated as just another data character. */
1370
1371 /* This extra option, for Jeffrey Friedl's debugging requirements,
1372 replaces the matched string, or a specific captured string if it exists,
1373 with X. When this happens, colouring is ignored. */
1374
1375#ifdef JFRIEDL_DEBUG
1376 if (S_arg >= 0 && S_arg < mrc)
1377 {
1378 int first = S_arg * 2;
1379 int last = first + 1;
1380 FWRITE(ptr, 1, offsets[first], stdout);
1381 fprintf(stdout, "X");
1382 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1383 }
1384 else
1385#endif
1386
1387 /* We have to split the line(s) up if colouring, and search for further
1388 matches, but not of course if the line is a non-match. */
1389
1390 if (do_colour && !invert)
1391 {
1392 int plength;
1393 FWRITE(ptr, 1, offsets[0], stdout);
1394 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1395 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1396 fprintf(stdout, "%c[00m", 0x1b);
1397 for (;;)
1398 {
1399 startoffset = offsets[1];
1400 if (startoffset >= (int)linelength + endlinelength ||
1401 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1402 break;
1403 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1404 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1405 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1406 fprintf(stdout, "%c[00m", 0x1b);
1407 }
1408
1409 /* In multiline mode, we may have already printed the complete line
1410 and its line-ending characters (if they matched the pattern), so there
1411 may be no more to print. */
1412
1413 plength = (int)((linelength + endlinelength) - startoffset);
1414 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1415 }
1416
1417 /* Not colouring; no need to search for further matches */
1418
1419 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1420 }
1421
1422 /* End of doing what has to be done for a match. If --line-buffered was
1423 given, flush the output. */
1424
1425 if (line_buffered) fflush(stdout);
1426 rc = 0; /* Had some success */
1427
1428 /* Remember where the last match happened for after_context. We remember
1429 where we are about to restart, and that line's number. */
1430
1431 lastmatchrestart = ptr + linelength + endlinelength;
1432 lastmatchnumber = linenumber + 1;
1433 }
1434
1435 /* For a match in multiline inverted mode (which of course did not cause
1436 anything to be printed), we have to move on to the end of the match before
1437 proceeding. */
1438
1439 if (multiline && invert && match)
1440 {
1441 int ellength;
1442 char *endmatch = ptr + offsets[1];
1443 t = ptr;
1444 while (t < endmatch)
1445 {
1446 t = end_of_line(t, endptr, &ellength);
1447 if (t <= endmatch) linenumber++; else break;
1448 }
1449 endmatch = end_of_line(endmatch, endptr, &ellength);
1450 linelength = endmatch - ptr - ellength;
1451 }
1452
1453 /* Advance to after the newline and increment the line number. The file
1454 offset to the current line is maintained in filepos. */
1455
1456 ptr += linelength + endlinelength;
1457 filepos += (int)(linelength + endlinelength);
1458 linenumber++;
1459
1460 /* If input is line buffered, and the buffer is not yet full, read another
1461 line and add it into the buffer. */
1462
1463 if (input_line_buffered && bufflength < (size_t)bufsize)
1464 {
1465 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1466 bufflength += add;
1467 endptr += add;
1468 }
1469
1470 /* If we haven't yet reached the end of the file (the buffer is full), and
1471 the current point is in the top 1/3 of the buffer, slide the buffer down by
1472 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1473 about to be lost, print them. */
1474
1475 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1476 {
1477 if (after_context > 0 &&
1478 lastmatchnumber > 0 &&
1479 lastmatchrestart < main_buffer + bufthird)
1480 {
1481 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1482 lastmatchnumber = 0;
1483 }
1484
1485 /* Now do the shuffle */
1486
1487 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1488 ptr -= bufthird;
1489
1490#ifdef SUPPORT_LIBZ
1491 if (frtype == FR_LIBZ)
1492 bufflength = 2*bufthird +
1493 gzread (ingz, main_buffer + 2*bufthird, bufthird);
1494 else
1495#endif
1496
1497#ifdef SUPPORT_LIBBZ2
1498 if (frtype == FR_LIBBZ2)
1499 bufflength = 2*bufthird +
1500 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1501 else
1502#endif
1503
1504 bufflength = 2*bufthird +
1505 (input_line_buffered?
1506 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1507 fread(main_buffer + 2*bufthird, 1, bufthird, in));
1508 endptr = main_buffer + bufflength;
1509
1510 /* Adjust any last match point */
1511
1512 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1513 }
1514 } /* Loop through the whole file */
1515
1516/* End of file; print final "after" lines if wanted; do_after_lines sets
1517hyphenpending if it prints something. */
1518
1519if (only_matching < 0 && !count_only)
1520 {
1521 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1522 hyphenpending |= endhyphenpending;
1523 }
1524
1525/* Print the file name if we are looking for those without matches and there
1526were none. If we found a match, we won't have got this far. */
1527
1528if (filenames == FN_NOMATCH_ONLY)
1529 {
1530 fprintf(stdout, "%s\n", printname);
1531 return 0;
1532 }
1533
1534/* Print the match count if wanted */
1535
1536if (count_only)
1537 {
1538 if (count > 0 || !omit_zero_count)
1539 {
1540 if (printname != NULL && filenames != FN_NONE)
1541 fprintf(stdout, "%s:", printname);
1542 fprintf(stdout, "%d\n", count);
1543 }
1544 }
1545
1546return rc;
1547}
1548
1549
1550
1551/*************************************************
1552* Grep a file or recurse into a directory *
1553*************************************************/
1554
1555/* Given a path name, if it's a directory, scan all the files if we are
1556recursing; if it's a file, grep it.
1557
1558Arguments:
1559 pathname the path to investigate
1560 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1561 only_one_at_top TRUE if the path is the only one at toplevel
1562
1563Returns: 0 if there was at least one match
1564 1 if there were no matches
1565 2 there was some kind of error
1566
1567However, file opening failures are suppressed if "silent" is set.
1568*/
1569
1570static int
1571grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1572{
1573int rc = 1;
1574int sep;
1575int frtype;
1576int pathlen;
1577void *handle;
1578FILE *in = NULL; /* Ensure initialized */
1579
1580#ifdef SUPPORT_LIBZ
1581gzFile ingz = NULL;
1582#endif
1583
1584#ifdef SUPPORT_LIBBZ2
1585BZFILE *inbz2 = NULL;
1586#endif
1587
1588/* If the file name is "-" we scan stdin */
1589
1590if (strcmp(pathname, "-") == 0)
1591 {
1592 return pcregrep(stdin, FR_PLAIN, stdin_name,
1593 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1594 stdin_name : NULL);
1595 }
1596
1597/* If the file is a directory, skip if skipping or if we are recursing, scan
1598each file and directory within it, subject to any include or exclude patterns
1599that were set. The scanning code is localized so it can be made
1600system-specific. */
1601
1602if ((sep = isdirectory(pathname)) != 0)
1603 {
1604 if (dee_action == dee_SKIP) return 1;
1605 if (dee_action == dee_RECURSE)
1606 {
1607 char buffer[1024];
1608 char *nextfile;
1609 directory_type *dir = opendirectory(pathname);
1610
1611 if (dir == NULL)
1612 {
1613 if (!silent)
1614 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1615 strerror(errno));
1616 return 2;
1617 }
1618
1619 while ((nextfile = readdirectory(dir)) != NULL)
1620 {
1621 int frc, nflen;
1622 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1623 nflen = (int)(strlen(nextfile));
1624
1625 if (isdirectory(buffer))
1626 {
1627 if (exclude_dir_compiled != NULL &&
1628 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1629 continue;
1630
1631 if (include_dir_compiled != NULL &&
1632 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1633 continue;
1634 }
1635 else
1636 {
1637 if (exclude_compiled != NULL &&
1638 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1639 continue;
1640
1641 if (include_compiled != NULL &&
1642 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1643 continue;
1644 }
1645
1646 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1647 if (frc > 1) rc = frc;
1648 else if (frc == 0 && rc == 1) rc = 0;
1649 }
1650
1651 closedirectory(dir);
1652 return rc;
1653 }
1654 }
1655
1656/* If the file is not a directory and not a regular file, skip it if that's
1657been requested. */
1658
1659else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1660
1661/* Control reaches here if we have a regular file, or if we have a directory
1662and recursion or skipping was not requested, or if we have anything else and
1663skipping was not requested. The scan proceeds. If this is the first and only
1664argument at top level, we don't show the file name, unless we are only showing
1665the file name, or the filename was forced (-H). */
1666
1667pathlen = (int)(strlen(pathname));
1668
1669/* Open using zlib if it is supported and the file name ends with .gz. */
1670
1671#ifdef SUPPORT_LIBZ
1672if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1673 {
1674 ingz = gzopen(pathname, "rb");
1675 if (ingz == NULL)
1676 {
1677 if (!silent)
1678 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1679 strerror(errno));
1680 return 2;
1681 }
1682 handle = (void *)ingz;
1683 frtype = FR_LIBZ;
1684 }
1685else
1686#endif
1687
1688/* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1689
1690#ifdef SUPPORT_LIBBZ2
1691if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1692 {
1693 inbz2 = BZ2_bzopen(pathname, "rb");
1694 handle = (void *)inbz2;
1695 frtype = FR_LIBBZ2;
1696 }
1697else
1698#endif
1699
1700/* Otherwise use plain fopen(). The label is so that we can come back here if
1701an attempt to read a .bz2 file indicates that it really is a plain file. */
1702
1703#ifdef SUPPORT_LIBBZ2
1704PLAIN_FILE:
1705#endif
1706 {
1707 in = fopen(pathname, "rb");
1708 handle = (void *)in;
1709 frtype = FR_PLAIN;
1710 }
1711
1712/* All the opening methods return errno when they fail. */
1713
1714if (handle == NULL)
1715 {
1716 if (!silent)
1717 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1718 strerror(errno));
1719 return 2;
1720 }
1721
1722/* Now grep the file */
1723
1724rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1725 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1726
1727/* Close in an appropriate manner. */
1728
1729#ifdef SUPPORT_LIBZ
1730if (frtype == FR_LIBZ)
1731 gzclose(ingz);
1732else
1733#endif
1734
1735/* If it is a .bz2 file and the result is 3, it means that the first attempt to
1736read failed. If the error indicates that the file isn't in fact bzipped, try
1737again as a normal file. */
1738
1739#ifdef SUPPORT_LIBBZ2
1740if (frtype == FR_LIBBZ2)
1741 {
1742 if (rc == 3)
1743 {
1744 int errnum;
1745 const char *err = BZ2_bzerror(inbz2, &errnum);
1746 if (errnum == BZ_DATA_ERROR_MAGIC)
1747 {
1748 BZ2_bzclose(inbz2);
1749 goto PLAIN_FILE;
1750 }
1751 else if (!silent)
1752 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1753 pathname, err);
1754 rc = 2; /* The normal "something went wrong" code */
1755 }
1756 BZ2_bzclose(inbz2);
1757 }
1758else
1759#endif
1760
1761/* Normal file close */
1762
1763fclose(in);
1764
1765/* Pass back the yield from pcregrep(). */
1766
1767return rc;
1768}
1769
1770
1771
1772
1773/*************************************************
1774* Usage function *
1775*************************************************/
1776
1777static int
1778usage(int rc)
1779{
1780option_item *op;
1781fprintf(stderr, "Usage: pcregrep [-");
1782for (op = optionlist; op->one_char != 0; op++)
1783 {
1784 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1785 }
1786fprintf(stderr, "] [long options] [pattern] [files]\n");
1787fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1788 "options.\n");
1789return rc;
1790}
1791
1792
1793
1794
1795/*************************************************
1796* Help function *
1797*************************************************/
1798
1799static void
1800help(void)
1801{
1802option_item *op;
1803
1804printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1805printf("Search for PATTERN in each FILE or standard input.\n");
1806printf("PATTERN must be present if neither -e nor -f is used.\n");
1807printf("\"-\" can be used as a file name to mean STDIN.\n");
1808
1809#ifdef SUPPORT_LIBZ
1810printf("Files whose names end in .gz are read using zlib.\n");
1811#endif
1812
1813#ifdef SUPPORT_LIBBZ2
1814printf("Files whose names end in .bz2 are read using bzlib2.\n");
1815#endif
1816
1817#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1818printf("Other files and the standard input are read as plain files.\n\n");
1819#else
1820printf("All files are read as plain files, without any interpretation.\n\n");
1821#endif
1822
1823printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1824printf("Options:\n");
1825
1826for (op = optionlist; op->one_char != 0; op++)
1827 {
1828 int n;
1829 char s[4];
1830
1831 /* Two options were accidentally implemented and documented with underscores
1832 instead of hyphens in their names, something that was not noticed for quite a
1833 few releases. When fixing this, I left the underscored versions in the list
1834 in case people were using them. However, we don't want to display them in the
1835 help data. There are no other options that contain underscores, and we do not
1836 expect ever to implement such options. Therefore, just omit any option that
1837 contains an underscore. */
1838
1839 if (strchr(op->long_name, '_') != NULL) continue;
1840
1841 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1842 n = 31 - printf(" %s --%s", s, op->long_name);
1843 if (n < 1) n = 1;
1844 printf("%.*s%s\n", n, " ", op->help_text);
1845 }
1846
1847printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1848printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1849printf("When reading patterns from a file instead of using a command line option,\n");
1850printf("trailing white space is removed and blank lines are ignored.\n");
1851printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1852 MAX_PATTERN_COUNT, PATBUFSIZE);
1853
1854printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1855printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1856}
1857
1858
1859
1860
1861/*************************************************
1862* Handle a single-letter, no data option *
1863*************************************************/
1864
1865static int
1866handle_option(int letter, int options)
1867{
1868switch(letter)
1869 {
1870 case N_FOFFSETS: file_offsets = TRUE; break;
1871 case N_HELP: help(); pcregrep_exit(0);
1872 case N_LBUFFER: line_buffered = TRUE; break;
1873 case N_LOFFSETS: line_offsets = number = TRUE; break;
1874 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1875 case 'c': count_only = TRUE; break;
1876 case 'F': process_options |= PO_FIXED_STRINGS; break;
1877 case 'H': filenames = FN_FORCE; break;
1878 case 'h': filenames = FN_NONE; break;
1879 case 'i': options |= PCRE_CASELESS; break;
1880 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1881 case 'L': filenames = FN_NOMATCH_ONLY; break;
1882 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1883 case 'n': number = TRUE; break;
1884 case 'o': only_matching = 0; break;
1885 case 'q': quiet = TRUE; break;
1886 case 'r': dee_action = dee_RECURSE; break;
1887 case 's': silent = TRUE; break;
1888 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1889 case 'v': invert = TRUE; break;
1890 case 'w': process_options |= PO_WORD_MATCH; break;
1891 case 'x': process_options |= PO_LINE_MATCH; break;
1892
1893 case 'V':
1894 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1895 pcregrep_exit(0);
1896 break;
1897
1898 default:
1899 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1900 pcregrep_exit(usage(2));
1901 }
1902
1903return options;
1904}
1905
1906
1907
1908
1909/*************************************************
1910* Construct printed ordinal *
1911*************************************************/
1912
1913/* This turns a number into "1st", "3rd", etc. */
1914
1915static char *
1916ordin(int n)
1917{
1918static char buffer[8];
1919char *p = buffer;
1920sprintf(p, "%d", n);
1921while (*p != 0) p++;
1922switch (n%10)
1923 {
1924 case 1: strcpy(p, "st"); break;
1925 case 2: strcpy(p, "nd"); break;
1926 case 3: strcpy(p, "rd"); break;
1927 default: strcpy(p, "th"); break;
1928 }
1929return buffer;
1930}
1931
1932
1933
1934/*************************************************
1935* Compile a single pattern *
1936*************************************************/
1937
1938/* When the -F option has been used, this is called for each substring.
1939Otherwise it's called for each supplied pattern.
1940
1941Arguments:
1942 pattern the pattern string
1943 options the PCRE options
1944 filename the file name, or NULL for a command-line pattern
1945 count 0 if this is the only command line pattern, or
1946 number of the command line pattern, or
1947 linenumber for a pattern from a file
1948
1949Returns: TRUE on success, FALSE after an error
1950*/
1951
1952static BOOL
1953compile_single_pattern(char *pattern, int options, char *filename, int count)
1954{
1955char buffer[PATBUFSIZE];
1956const char *error;
1957int errptr;
1958
1959if (pattern_count >= MAX_PATTERN_COUNT)
1960 {
1961 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1962 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1963 return FALSE;
1964 }
1965
1966sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1967 suffix[process_options]);
1968pattern_list[pattern_count] =
1969 pcre_compile(buffer, options, &error, &errptr, pcretables);
1970if (pattern_list[pattern_count] != NULL)
1971 {
1972 pattern_count++;
1973 return TRUE;
1974 }
1975
1976/* Handle compile errors */
1977
1978errptr -= (int)strlen(prefix[process_options]);
1979if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1980
1981if (filename == NULL)
1982 {
1983 if (count == 0)
1984 fprintf(stderr, "pcregrep: Error in command-line regex "
1985 "at offset %d: %s\n", errptr, error);
1986 else
1987 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1988 "at offset %d: %s\n", ordin(count), errptr, error);
1989 }
1990else
1991 {
1992 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1993 "at offset %d: %s\n", count, filename, errptr, error);
1994 }
1995
1996return FALSE;
1997}
1998
1999
2000
2001/*************************************************
2002* Compile one supplied pattern *
2003*************************************************/
2004
2005/* When the -F option has been used, each string may be a list of strings,
2006separated by line breaks. They will be matched literally.
2007
2008Arguments:
2009 pattern the pattern string
2010 options the PCRE options
2011 filename the file name, or NULL for a command-line pattern
2012 count 0 if this is the only command line pattern, or
2013 number of the command line pattern, or
2014 linenumber for a pattern from a file
2015
2016Returns: TRUE on success, FALSE after an error
2017*/
2018
2019static BOOL
2020compile_pattern(char *pattern, int options, char *filename, int count)
2021{
2022if ((process_options & PO_FIXED_STRINGS) != 0)
2023 {
2024 char *eop = pattern + strlen(pattern);
2025 char buffer[PATBUFSIZE];
2026 for(;;)
2027 {
2028 int ellength;
2029 char *p = end_of_line(pattern, eop, &ellength);
2030 if (ellength == 0)
2031 return compile_single_pattern(pattern, options, filename, count);
2032 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2033 pattern = p;
2034 if (!compile_single_pattern(buffer, options, filename, count))
2035 return FALSE;
2036 }
2037 }
2038else return compile_single_pattern(pattern, options, filename, count);
2039}
2040
2041
2042
2043/*************************************************
2044* Main program *
2045*************************************************/
2046
2047/* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2048
2049int
2050main(int argc, char **argv)
2051{
2052int i, j;
2053int rc = 1;
2054int pcre_options = 0;
2055int cmd_pattern_count = 0;
2056int hint_count = 0;
2057int errptr;
2058BOOL only_one_at_top;
2059char *patterns[MAX_PATTERN_COUNT];
2060const char *locale_from = "--locale";
2061const char *error;
2062
2063#ifdef SUPPORT_PCREGREP_JIT
2064pcre_jit_stack *jit_stack = NULL;
2065#endif
2066
2067/* Set the default line ending value from the default in the PCRE library;
2068"lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2069Note that the return values from pcre_config(), though derived from the ASCII
2070codes, are the same in EBCDIC environments, so we must use the actual values
2071rather than escapes such as as '\r'. */
2072
2073(void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2074switch(i)
2075 {
2076 default: newline = (char *)"lf"; break;
2077 case 13: newline = (char *)"cr"; break;
2078 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2079 case -1: newline = (char *)"any"; break;
2080 case -2: newline = (char *)"anycrlf"; break;
2081 }
2082
2083/* Process the options */
2084
2085for (i = 1; i < argc; i++)
2086 {
2087 option_item *op = NULL;
2088 char *option_data = (char *)""; /* default to keep compiler happy */
2089 BOOL longop;
2090 BOOL longopwasequals = FALSE;
2091
2092 if (argv[i][0] != '-') break;
2093
2094 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2095 but only if we have previously had -e or -f to define the patterns. */
2096
2097 if (argv[i][1] == 0)
2098 {
2099 if (pattern_filename != NULL || pattern_count > 0) break;
2100 else pcregrep_exit(usage(2));
2101 }
2102
2103 /* Handle a long name option, or -- to terminate the options */
2104
2105 if (argv[i][1] == '-')
2106 {
2107 char *arg = argv[i] + 2;
2108 char *argequals = strchr(arg, '=');
2109
2110 if (*arg == 0) /* -- terminates options */
2111 {
2112 i++;
2113 break; /* out of the options-handling loop */
2114 }
2115
2116 longop = TRUE;
2117
2118 /* Some long options have data that follows after =, for example file=name.
2119 Some options have variations in the long name spelling: specifically, we
2120 allow "regexp" because GNU grep allows it, though I personally go along
2121 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2122 These options are entered in the table as "regex(p)". Options can be in
2123 both these categories. */
2124
2125 for (op = optionlist; op->one_char != 0; op++)
2126 {
2127 char *opbra = strchr(op->long_name, '(');
2128 char *equals = strchr(op->long_name, '=');
2129
2130 /* Handle options with only one spelling of the name */
2131
2132 if (opbra == NULL) /* Does not contain '(' */
2133 {
2134 if (equals == NULL) /* Not thing=data case */
2135 {
2136 if (strcmp(arg, op->long_name) == 0) break;
2137 }
2138 else /* Special case xxx=data */
2139 {
2140 int oplen = (int)(equals - op->long_name);
2141 int arglen = (argequals == NULL)?
2142 (int)strlen(arg) : (int)(argequals - arg);
2143 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2144 {
2145 option_data = arg + arglen;
2146 if (*option_data == '=')
2147 {
2148 option_data++;
2149 longopwasequals = TRUE;
2150 }
2151 break;
2152 }
2153 }
2154 }
2155
2156 /* Handle options with an alternate spelling of the name */
2157
2158 else
2159 {
2160 char buff1[24];
2161 char buff2[24];
2162
2163 int baselen = (int)(opbra - op->long_name);
2164 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2165 int arglen = (argequals == NULL || equals == NULL)?
2166 (int)strlen(arg) : (int)(argequals - arg);
2167
2168 sprintf(buff1, "%.*s", baselen, op->long_name);
2169 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2170
2171 if (strncmp(arg, buff1, arglen) == 0 ||
2172 strncmp(arg, buff2, arglen) == 0)
2173 {
2174 if (equals != NULL && argequals != NULL)
2175 {
2176 option_data = argequals;
2177 if (*option_data == '=')
2178 {
2179 option_data++;
2180 longopwasequals = TRUE;
2181 }
2182 }
2183 break;
2184 }
2185 }
2186 }
2187
2188 if (op->one_char == 0)
2189 {
2190 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2191 pcregrep_exit(usage(2));
2192 }
2193 }
2194
2195 /* Jeffrey Friedl's debugging harness uses these additional options which
2196 are not in the right form for putting in the option table because they use
2197 only one hyphen, yet are more than one character long. By putting them
2198 separately here, they will not get displayed as part of the help() output,
2199 but I don't think Jeffrey will care about that. */
2200
2201#ifdef JFRIEDL_DEBUG
2202 else if (strcmp(argv[i], "-pre") == 0) {
2203 jfriedl_prefix = argv[++i];
2204 continue;
2205 } else if (strcmp(argv[i], "-post") == 0) {
2206 jfriedl_postfix = argv[++i];
2207 continue;
2208 } else if (strcmp(argv[i], "-XT") == 0) {
2209 sscanf(argv[++i], "%d", &jfriedl_XT);
2210 continue;
2211 } else if (strcmp(argv[i], "-XR") == 0) {
2212 sscanf(argv[++i], "%d", &jfriedl_XR);
2213 continue;
2214 }
2215#endif
2216
2217
2218 /* One-char options; many that have no data may be in a single argument; we
2219 continue till we hit the last one or one that needs data. */
2220
2221 else
2222 {
2223 char *s = argv[i] + 1;
2224 longop = FALSE;
2225 while (*s != 0)
2226 {
2227 for (op = optionlist; op->one_char != 0; op++)
2228 {
2229 if (*s == op->one_char) break;
2230 }
2231 if (op->one_char == 0)
2232 {
2233 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2234 *s, argv[i]);
2235 pcregrep_exit(usage(2));
2236 }
2237
2238 /* Check for a single-character option that has data: OP_OP_NUMBER
2239 is used for one that either has a numerical number or defaults, i.e. the
2240 data is optional. If a digit follows, there is data; if not, carry on
2241 with other single-character options in the same string. */
2242
2243 option_data = s+1;
2244 if (op->type == OP_OP_NUMBER)
2245 {
2246 if (isdigit((unsigned char)s[1])) break;
2247 }
2248 else /* Check for end or a dataless option */
2249 {
2250 if (op->type != OP_NODATA || s[1] == 0) break;
2251 }
2252
2253 /* Handle a single-character option with no data, then loop for the
2254 next character in the string. */
2255
2256 pcre_options = handle_option(*s++, pcre_options);
2257 }
2258 }
2259
2260 /* At this point we should have op pointing to a matched option. If the type
2261 is NO_DATA, it means that there is no data, and the option might set
2262 something in the PCRE options. */
2263
2264 if (op->type == OP_NODATA)
2265 {
2266 pcre_options = handle_option(op->one_char, pcre_options);
2267 continue;
2268 }
2269
2270 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2271 either has a value or defaults to something. It cannot have data in a
2272 separate item. At the moment, the only such options are "colo(u)r",
2273 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2274
2275 if (*option_data == 0 &&
2276 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2277 {
2278 switch (op->one_char)
2279 {
2280 case N_COLOUR:
2281 colour_option = (char *)"auto";
2282 break;
2283
2284 case 'o':
2285 only_matching = 0;
2286 break;
2287
2288#ifdef JFRIEDL_DEBUG
2289 case 'S':
2290 S_arg = 0;
2291 break;
2292#endif
2293 }
2294 continue;
2295 }
2296
2297 /* Otherwise, find the data string for the option. */
2298
2299 if (*option_data == 0)
2300 {
2301 if (i >= argc - 1 || longopwasequals)
2302 {
2303 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2304 pcregrep_exit(usage(2));
2305 }
2306 option_data = argv[++i];
2307 }
2308
2309 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2310 multiple times to create a list of patterns. */
2311
2312 if (op->type == OP_PATLIST)
2313 {
2314 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2315 {
2316 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2317 MAX_PATTERN_COUNT);
2318 return 2;
2319 }
2320 patterns[cmd_pattern_count++] = option_data;
2321 }
2322
2323 /* Otherwise, deal with single string or numeric data values. */
2324
2325 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2326 op->type != OP_OP_NUMBER)
2327 {
2328 *((char **)op->dataptr) = option_data;
2329 }
2330
2331 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2332 only for unpicking arguments, so just keep it simple. */
2333
2334 else
2335 {
2336 unsigned long int n = 0;
2337 char *endptr = option_data;
2338 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2339 while (isdigit((unsigned char)(*endptr)))
2340 n = n * 10 + (int)(*endptr++ - '0');
2341 if (toupper(*endptr) == 'K')
2342 {
2343 n *= 1024;
2344 endptr++;
2345 }
2346 else if (toupper(*endptr) == 'M')
2347 {
2348 n *= 1024*1024;
2349 endptr++;
2350 }
2351 if (*endptr != 0)
2352 {
2353 if (longop)
2354 {
2355 char *equals = strchr(op->long_name, '=');
2356 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2357 (int)(equals - op->long_name);
2358 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2359 option_data, nlen, op->long_name);
2360 }
2361 else
2362 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2363 option_data, op->one_char);
2364 pcregrep_exit(usage(2));
2365 }
2366 if (op->type == OP_LONGNUMBER)
2367 *((unsigned long int *)op->dataptr) = n;
2368 else
2369 *((int *)op->dataptr) = n;
2370 }
2371 }
2372
2373/* Options have been decoded. If -C was used, its value is used as a default
2374for -A and -B. */
2375
2376if (both_context > 0)
2377 {
2378 if (after_context == 0) after_context = both_context;
2379 if (before_context == 0) before_context = both_context;
2380 }
2381
2382/* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2383However, the latter two set only_matching. */
2384
2385if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2386 (file_offsets && line_offsets))
2387 {
2388 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2389 "and/or --line-offsets\n");
2390 pcregrep_exit(usage(2));
2391 }
2392
2393if (file_offsets || line_offsets) only_matching = 0;
2394
2395/* If a locale has not been provided as an option, see if the LC_CTYPE or
2396LC_ALL environment variable is set, and if so, use it. */
2397
2398if (locale == NULL)
2399 {
2400 locale = getenv("LC_ALL");
2401 locale_from = "LCC_ALL";
2402 }
2403
2404if (locale == NULL)
2405 {
2406 locale = getenv("LC_CTYPE");
2407 locale_from = "LC_CTYPE";
2408 }
2409
2410/* If a locale has been provided, set it, and generate the tables the PCRE
2411needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2412
2413if (locale != NULL)
2414 {
2415 if (setlocale(LC_CTYPE, locale) == NULL)
2416 {
2417 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2418 locale, locale_from);
2419 return 2;
2420 }
2421 pcretables = pcre_maketables();
2422 }
2423
2424/* Sort out colouring */
2425
2426if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2427 {
2428 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2429 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2430 else
2431 {
2432 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2433 colour_option);
2434 return 2;
2435 }
2436 if (do_colour)
2437 {
2438 char *cs = getenv("PCREGREP_COLOUR");
2439 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2440 if (cs != NULL) colour_string = cs;
2441 }
2442 }
2443
2444/* Interpret the newline type; the default settings are Unix-like. */
2445
2446if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2447 {
2448 pcre_options |= PCRE_NEWLINE_CR;
2449 endlinetype = EL_CR;
2450 }
2451else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2452 {
2453 pcre_options |= PCRE_NEWLINE_LF;
2454 endlinetype = EL_LF;
2455 }
2456else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2457 {
2458 pcre_options |= PCRE_NEWLINE_CRLF;
2459 endlinetype = EL_CRLF;
2460 }
2461else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2462 {
2463 pcre_options |= PCRE_NEWLINE_ANY;
2464 endlinetype = EL_ANY;
2465 }
2466else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2467 {
2468 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2469 endlinetype = EL_ANYCRLF;
2470 }
2471else
2472 {
2473 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2474 return 2;
2475 }
2476
2477/* Interpret the text values for -d and -D */
2478
2479if (dee_option != NULL)
2480 {
2481 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2482 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2483 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2484 else
2485 {
2486 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2487 return 2;
2488 }
2489 }
2490
2491if (DEE_option != NULL)
2492 {
2493 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2494 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2495 else
2496 {
2497 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2498 return 2;
2499 }
2500 }
2501
2502/* Check the values for Jeffrey Friedl's debugging options. */
2503
2504#ifdef JFRIEDL_DEBUG
2505if (S_arg > 9)
2506 {
2507 fprintf(stderr, "pcregrep: bad value for -S option\n");
2508 return 2;
2509 }
2510if (jfriedl_XT != 0 || jfriedl_XR != 0)
2511 {
2512 if (jfriedl_XT == 0) jfriedl_XT = 1;
2513 if (jfriedl_XR == 0) jfriedl_XR = 1;
2514 }
2515#endif
2516
2517/* Get memory for the main buffer, and to store the pattern and hints lists. */
2518
2519bufsize = 3*bufthird;
2520main_buffer = (char *)malloc(bufsize);
2521pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2522hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2523
2524if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2525 {
2526 fprintf(stderr, "pcregrep: malloc failed\n");
2527 goto EXIT2;
2528 }
2529
2530/* If no patterns were provided by -e, and there is no file provided by -f,
2531the first argument is the one and only pattern, and it must exist. */
2532
2533if (cmd_pattern_count == 0 && pattern_filename == NULL)
2534 {
2535 if (i >= argc) return usage(2);
2536 patterns[cmd_pattern_count++] = argv[i++];
2537 }
2538
2539/* Compile the patterns that were provided on the command line, either by
2540multiple uses of -e or as a single unkeyed pattern. */
2541
2542for (j = 0; j < cmd_pattern_count; j++)
2543 {
2544 if (!compile_pattern(patterns[j], pcre_options, NULL,
2545 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2546 goto EXIT2;
2547 }
2548
2549/* Compile the regular expressions that are provided in a file. */
2550
2551if (pattern_filename != NULL)
2552 {
2553 int linenumber = 0;
2554 FILE *f;
2555 char *filename;
2556 char buffer[PATBUFSIZE];
2557
2558 if (strcmp(pattern_filename, "-") == 0)
2559 {
2560 f = stdin;
2561 filename = stdin_name;
2562 }
2563 else
2564 {
2565 f = fopen(pattern_filename, "r");
2566 if (f == NULL)
2567 {
2568 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2569 strerror(errno));
2570 goto EXIT2;
2571 }
2572 filename = pattern_filename;
2573 }
2574
2575 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2576 {
2577 char *s = buffer + (int)strlen(buffer);
2578 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2579 *s = 0;
2580 linenumber++;
2581 if (buffer[0] == 0) continue; /* Skip blank lines */
2582 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2583 goto EXIT2;
2584 }
2585
2586 if (f != stdin) fclose(f);
2587 }
2588
2589/* Study the regular expressions, as we will be running them many times. Unless
2590JIT has been explicitly disabled, arrange a stack for it to use. */
2591
2592#ifdef SUPPORT_PCREGREP_JIT
2593if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2594 jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2595#endif
2596
2597for (j = 0; j < pattern_count; j++)
2598 {
2599 hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2600 if (error != NULL)
2601 {
2602 char s[16];
2603 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2604 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2605 goto EXIT2;
2606 }
2607 hint_count++;
2608#ifdef SUPPORT_PCREGREP_JIT
2609 if (jit_stack != NULL && hints_list[j] != NULL)
2610 pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
2611#endif
2612 }
2613
2614/* If --match-limit or --recursion-limit was set, put the value(s) into the
2615pcre_extra block for each pattern. */
2616
2617if (match_limit > 0 || match_limit_recursion > 0)
2618 {
2619 for (j = 0; j < pattern_count; j++)
2620 {
2621 if (hints_list[j] == NULL)
2622 {
2623 hints_list[j] = malloc(sizeof(pcre_extra));
2624 if (hints_list[j] == NULL)
2625 {
2626 fprintf(stderr, "pcregrep: malloc failed\n");
2627 pcregrep_exit(2);
2628 }
2629 }
2630 if (match_limit > 0)
2631 {
2632 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2633 hints_list[j]->match_limit = match_limit;
2634 }
2635 if (match_limit_recursion > 0)
2636 {
2637 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2638 hints_list[j]->match_limit_recursion = match_limit_recursion;
2639 }
2640 }
2641 }
2642
2643/* If there are include or exclude patterns, compile them. */
2644
2645if (exclude_pattern != NULL)
2646 {
2647 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2648 pcretables);
2649 if (exclude_compiled == NULL)
2650 {
2651 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2652 errptr, error);
2653 goto EXIT2;
2654 }
2655 }
2656
2657if (include_pattern != NULL)
2658 {
2659 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2660 pcretables);
2661 if (include_compiled == NULL)
2662 {
2663 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2664 errptr, error);
2665 goto EXIT2;
2666 }
2667 }
2668
2669if (exclude_dir_pattern != NULL)
2670 {
2671 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2672 pcretables);
2673 if (exclude_dir_compiled == NULL)
2674 {
2675 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2676 errptr, error);
2677 goto EXIT2;
2678 }
2679 }
2680
2681if (include_dir_pattern != NULL)
2682 {
2683 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2684 pcretables);
2685 if (include_dir_compiled == NULL)
2686 {
2687 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2688 errptr, error);
2689 goto EXIT2;
2690 }
2691 }
2692
2693/* If there are no further arguments, do the business on stdin and exit. */
2694
2695if (i >= argc)
2696 {
2697 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2698 (filenames > FN_DEFAULT)? stdin_name : NULL);
2699 goto EXIT;
2700 }
2701
2702/* Otherwise, work through the remaining arguments as files or directories.
2703Pass in the fact that there is only one argument at top level - this suppresses
2704the file name if the argument is not a directory and filenames are not
2705otherwise forced. */
2706
2707only_one_at_top = i == argc - 1; /* Catch initial value of i */
2708
2709for (; i < argc; i++)
2710 {
2711 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2712 only_one_at_top);
2713 if (frc > 1) rc = frc;
2714 else if (frc == 0 && rc == 1) rc = 0;
2715 }
2716
2717EXIT:
2718#ifdef SUPPORT_PCREGREP_JIT
2719if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2720#endif
2721if (main_buffer != NULL) free(main_buffer);
2722if (pattern_list != NULL)
2723 {
2724 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2725 free(pattern_list);
2726 }
2727if (hints_list != NULL)
2728 {
2729 for (i = 0; i < hint_count; i++)
2730 {
2731 if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2732 }
2733 free(hints_list);
2734 }
2735pcregrep_exit(rc);
2736
2737EXIT2:
2738rc = 2;
2739goto EXIT;
2740}
2741
2742/* End of pcregrep */