root/alternc-awstats/tags/0.5.1/logresolvemerge.pl

Revision 1867, 28.9 kB (checked in by benjamin, 1 year ago)

adding logresolvemerge.pl since awstats one is buggy ...

  • Property svn:executable set to *
Line 
1 #!/usr/bin/perl
2 #-----------------------------------------------------------------------------
3 # Allows you to get one unique output log file, sorted on date,
4 # built from particular sources.
5 # This tool is part of AWStats log analyzer but can be use
6 # alone for any other log analyzer.
7 # See COPYING.TXT file about AWStats GNU General Public License.
8 #-----------------------------------------------------------------------------
9 # $Revision: 1.34 $ - $Author: eldy $ - $Date: 2005/12/04 21:10:46 $
10
11 use strict; no strict "refs";
12 #use diagnostics;
13
14 #-----------------------------------------------------------------------------
15 # Defines
16 #-----------------------------------------------------------------------------
17
18 # ENABLETHREAD --> COMMENT THIS BLOCK TO USE A THREADED VERSION
19 my $UseThread=0;
20 &Check_Thread_Use();
21 my $NbOfDNSLookupAsked = 0;
22 my %threadarray = ();
23 my %MyDNSTable = ();
24 my %TmpDNSLookup = ();
25
26 # ENABLETHREAD --> UNCOMMENT THIS BLOCK TO USE A THREADED VERSION
27 #my $UseThread=1;
28 #&Check_Thread_Use();
29 #my $NbOfDNSLookupAsked : shared = 0;
30 #my %threadarray : shared = ();
31 #my %MyDNSTable : shared = ();
32 #my %TmpDNSLookup : shared = ();
33
34
35 # ---------- Init variables --------
36 use vars qw/ $REVISION $VERSION /;
37 $REVISION='$Revision: 1.34 $'; $REVISION =~ /\s(.*)\s/; $REVISION=$1;
38 $VERSION="1.2 (build $REVISION)";
39
40 use vars qw/ $NBOFLINESFORBENCHMARK /;
41 $NBOFLINESFORBENCHMARK=8192;
42
43 use vars qw/
44 $DIR $PROG $Extension
45 $Debug $ShowSteps $AddFileNum $AddFileName
46 $MaxNbOfThread $DNSLookup $DNSCache $DirCgi $DirData $DNSLookupAlreadyDone
47 $NbOfLinesShowsteps $AFINET $QueueCursor
48 /;
49 $DIR='';
50 $PROG='';
51 $Extension='';
52 $Debug=0;
53 $ShowSteps=0;
54 $AddFileNum=0;
55 $AddFileName=0;
56 $MaxNbOfThread=0;
57 $DNSLookup=0;
58 $DNSCache='';
59 $DirCgi='';
60 $DirData='';
61 $DNSLookupAlreadyDone=0;
62 $NbOfLinesShowsteps=0;
63 $AFINET='';
64
65 # ---------- Init arrays --------
66 use vars qw/
67 @SkipDNSLookupFor
68 @ParamFile
69 /;
70 # ---------- Init hash arrays --------
71 use vars qw/
72 %LogFileToDo %linerecord %timerecord %corrupted
73 %QueueHostsToResolve %QueueRecords
74 /;
75 %LogFileToDo = %linerecord = %timerecord = %corrupted = ();
76 %QueueHostsToResolve = %QueueRecords = ();
77
78 # DRA2: the order of timerecords are kept here, each index in the array is the filerecordnumber, which
79 # DRA2: is used as the key for the other hashes
80 use vars qw/
81 @timerecordorder
82 /;
83 @timerecordorder = ();
84
85 # ---------- External Program variables ----------
86 # For gzip compression
87 my $zcat = 'zcat';
88 my $zcat_file = '\.gz$';
89 # For bz2 compression
90 my $bzcat = 'bzcat';
91 my $bzcat_file = '\.bz2$';
92
93
94
95 #-----------------------------------------------------------------------------
96 # Functions
97 #-----------------------------------------------------------------------------
98
99 #------------------------------------------------------------------------------
100 # Function:             Write an error message and exit
101 # Parameters:   $message
102 # Input:                None
103 # Output:               None
104 # Return:               None
105 #------------------------------------------------------------------------------
106 sub error {
107         print STDERR "Error: $_[0].\n";
108     exit 1;
109 }
110
111 #------------------------------------------------------------------------------
112 # Function:             Write a debug message
113 # Parameters:   $message
114 # Input:                $Debug
115 # Output:               None
116 # Return:               None
117 #------------------------------------------------------------------------------
118 sub debug {
119         my $level = $_[1] || 1;
120         if ($Debug >= $level) {
121                 my $debugstring = $_[0];
122                 print "DEBUG $level - ".localtime(time())." : $debugstring\n";
123         }
124 }
125
126 #------------------------------------------------------------------------------
127 # Function:             Write a warning message
128 # Parameters:   $message
129 # Input:                $Debug
130 # Output:               None
131 # Return:               None
132 #------------------------------------------------------------------------------
133 sub warning {
134         my $messagestring=shift;
135         if ($Debug) { debug("$messagestring",1); }
136         print STDERR "$messagestring\n";
137 }
138
139 #-----------------------------------------------------------------------------
140 # Function:     Return 1 if string contains only ascii chars
141 # Input:        String
142 # Return:       0 or 1
143 #-----------------------------------------------------------------------------
144 sub IsAscii {
145         my $string=shift;
146         if ($Debug) { debug("IsAscii($string)",5); }
147         if ($string =~ /^[\w\+\-\/\\\.%,;:=\"\'&?!\s]+$/) {
148                 if ($Debug) { debug(" Yes",5); }
149                 return 1;               # Only alphanum chars (and _) or + - / \ . % , ; : = " ' & ? space \t
150         }
151         if ($Debug) { debug(" No",5); }
152         return 0;
153 }
154
155 #-----------------------------------------------------------------------------
156 # DRA Function:     Return 1 if DNS lookup should be skipped
157 # Input:        String
158 # Return:       0 or 1
159 #-----------------------------------------------------------------------------
160 sub SkipDNSLookup {
161         foreach my $match (@SkipDNSLookupFor) { if ($_[0] =~ /$match/i) { return 1; } }
162         0; # Not in @SkipDNSLookupFor
163 }
164
165 #-----------------------------------------------------------------------------
166 # Function:     Function that wait for DNS lookup (can be threaded)
167 # Input:        String
168 # Return:       0 or 1
169 #-----------------------------------------------------------------------------
170 sub MakeDNSLookup {
171         my $ipaddress=shift;
172         $NbOfDNSLookupAsked++;
173         use Socket; $AFINET=AF_INET;
174         my $tid=0;
175         $tid=$MaxNbOfThread?eval("threads->self->tid()"):0;
176         if ($Debug) { debug("  ***** Thread id $tid: MakeDNSlookup started (for $ipaddress)",4); }
177         my $lookupresult=gethostbyaddr(pack("C4",split(/\./,$ipaddress)),$AFINET);      # This is very slow, may took 20 seconds
178         if (! $lookupresult || $lookupresult =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ || ! IsAscii($lookupresult)) {
179                 $TmpDNSLookup{$ipaddress}='*';
180         }
181         else {
182                 $TmpDNSLookup{$ipaddress}=$lookupresult;
183         }
184         if ($Debug) { debug("  ***** Thread id $tid: MakeDNSlookup done ($ipaddress resolved into $TmpDNSLookup{$ipaddress})",4); }
185         delete $threadarray{$ipaddress};
186         return;
187 }
188
189 #-----------------------------------------------------------------------------
190 # Function:     WriteRecordsReadyInQueue
191 # Input:        -
192 # Return:       0
193 #-----------------------------------------------------------------------------
194 sub WriteRecordsReadyInQueue {
195         my $logfilechosen=shift;
196         if ($Debug) { debug("Check head of queue to write records ready to flush (QueueCursor=$QueueCursor, QueueSize=".(scalar keys %QueueRecords).")",4); }
197         while ( $QueueHostsToResolve{$QueueCursor} && ( ($QueueHostsToResolve{$QueueCursor} eq '*') || ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) || ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) ) ) {
198                 # $QueueCursor point to a ready record
199                 if ($QueueHostsToResolve{$QueueCursor} eq '*') {
200                         if ($Debug) { debug(" First elem in queue is ready. No change on it. We pull it.",4); }
201                 }
202                 else {
203                         if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) {
204                                 if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}} ne '*') {
205                                         $QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$MyDNSTable{$QueueHostsToResolve{$QueueCursor}}/;
206                                         if ($Debug) { debug(" First elem in queue has been resolved (found in MyDNSTable $MyDNSTable{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); }
207                                 }
208                         }
209                         elsif ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) {
210                                 if ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ne '*') {
211                                         $QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}/;
212                                         if ($Debug) { debug(" First elem in queue has been resolved (found in TmpDNSLookup $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); }
213                                 }
214                         }
215                 }
216                 # Record is ready, we output it.
217                 if ($AddFileNum)  { print "$logfilechosen "; }
218                 if ($AddFileName) { print "$LogFileToDo{$logfilechosen} "; }
219                 print "$QueueRecords{$QueueCursor}\n";
220                 delete $QueueRecords{$QueueCursor};
221                 delete $QueueHostsToResolve{$QueueCursor};
222                 $QueueCursor++;
223         }
224         return 0;
225 }
226
227 #-----------------------------------------------------------------------------
228 # Function:     Check if thread are enabled or not
229 # Input:        -
230 # Return:       -
231 #-----------------------------------------------------------------------------
232 sub Check_Thread_Use {
233         if ($] >= 5.008) {      for (0..@ARGV-1) { if ($ARGV[$_] =~ /^-dnslookup[:=](\d{1,2})/i) {
234                 if ($UseThread) {
235                         if (!eval ('require "threads.pm";')) { &error("Failed to load perl module 'threads' required for multi-threaded DNS lookup".($@?": $@":"")); }
236                         if (!eval ('require "threads/shared.pm";')) { &error("Failed to load perl module 'threads::shared' required for multi-threaded DNS lookup".($@?": $@":"")); }
237                 }
238                 else { &error("Multi-thread is disabled in default version of this script.\nYou must manually edit the file '$0' to comment/uncomment all\nlines marked with 'ENABLETHREAD' string to enable multi-threading"); }
239                 } }
240         }
241 }
242
243
244 #-----------------------------------------------------------------------------
245 # MAIN
246 #-----------------------------------------------------------------------------
247 ($DIR=$0) =~ s/([^\/\\]*)$//; ($PROG=$1) =~ s/\.([^\.]*)$//; $Extension=$1;
248
249 # Get parameters (Note: $MaxNbOfThread is already known
250 my $cpt=1;
251 for (0..@ARGV-1) {
252         if ($ARGV[$_] =~ /^-/) {
253                 if ($ARGV[$_] =~ /debug=(\d)/i) { $Debug=$1; }
254                 elsif ($ARGV[$_] =~ /dnscache=/i) { $DNSLookup||=2; $DNSCache=$ARGV[$_]; $DNSCache =~ s/-dnscache=//; }
255                 elsif ($ARGV[$_] =~ /dnslookup[:=](\d{1,2})/i) { $DNSLookup||=1; $MaxNbOfThread=$1; }
256                 elsif ($ARGV[$_] =~ /dnslookup/i) { $DNSLookup||=1; }
257                 elsif ($ARGV[$_] =~ /showsteps/i) { $ShowSteps=1; }
258                 elsif ($ARGV[$_] =~ /addfilenum/i) { $AddFileNum=1; }
259                 elsif ($ARGV[$_] =~ /addfilename/i) { $AddFileName=1; }
260                 else { print "Unknown argument $ARGV[$_] ignored\n"; }
261         }
262         else {
263                 push @ParamFile, $ARGV[$_];
264                 $cpt++;
265         }
266 }
267 if ($Debug) { $|=1; }
268
269 if ($Debug) {
270         debug(ucfirst($PROG)." - $VERSION - Perl $^X $]",1);
271         debug("DNSLookup=$DNSLookup");
272         debug("DNSCache=$DNSCache");
273         debug("MaxNbOfThread=$MaxNbOfThread");
274 }
275
276 # Disallow MaxNbOfThread and Perl < 5.8
277 if ($] < 5.008 && $MaxNbOfThread) {
278         error("Multi-threaded DNS lookup is only supported with Perl 5.8 or higher (not $]). Use -dnslookup option instead");
279 }
280
281 # Warning, there is a memory hole in ActiveState perl version (in delete functions)
282 if ($^X =~ /activestate/i || $^X =~ /activeperl/i) {
283         # TODO Add a warning
284
285 }
286
287 if (scalar @ParamFile == 0) {
288         print "----- $PROG $VERSION (c) Laurent Destailleur -----\n";
289         print "$PROG allows you to get one unique output log file, sorted on date,\n";
290         print "built from particular sources:\n";
291         print " - It can read several input log files,\n";
292         print " - It can read .gz/.bz2 log files,\n";
293         print " - It can also makes a fast reverse DNS lookup to replace\n";
294         print "   all IP addresses into host names in resulting log file.\n";
295         print "$PROG comes with ABSOLUTELY NO WARRANTY. It's a free software\n";
296         print "distributed with a GNU General Public License (See COPYING.txt file).\n";
297         print "$PROG is part of AWStats but can be used alone as a log merger\n";
298         print "or resolver before using any other log analyzer.\n";
299         print "\n";
300         print "Usage:\n";
301         print "  $PROG.$Extension [options] file\n";
302         print "  $PROG.$Extension [options] file1 ... filen\n";
303         print "  $PROG.$Extension [options] *.*\n";
304         print "  perl $PROG.$Extension [options] *.* > newfile\n";
305         print "Options:\n";
306         print "  -dnslookup     make a reverse DNS lookup on IP adresses\n";
307         print "  -dnslookup=n   same with a n parallel threads instead of serial requests\n";
308         print "  -dnscache=file make DNS lookup from cache file first before network lookup\n";
309         print "  -showsteps     print on stderr benchmark information every $NBOFLINESFORBENCHMARK lines\n";
310         print "  -addfilenum    if used with several files, file number can be added in first\n";
311         print "  -addfilename   if used with several files, file name can be added in first\n";
312         print "                 field of output file. This can be used to add a cluster id\n";
313         print "                 when log files come from several load balanced computers.\n";
314         print "\n";
315        
316         print "This runs $PROG in command line to open one or several\n";
317         print "server log files to merge them (sorted on date) and/or to make a reverse\n";
318         print "DNS lookup (if asked). The result log file is sent on standard output.\n";
319         print "Note: $PROG is not a 'sort' tool to sort one file. It's a\n";
320         print "software able to output sorted log records (with a reverse DNS lookup\n";
321         print "included or not) even if log records are dispatched in several files.\n";
322         print "Each of thoose files must be already independently sorted itself\n";
323         print "(but that is the case in all web server log files). So you can use it\n";
324         print "for load balanced log files or to group several old log files.\n";
325         print "\n";
326         print "Don't forget that the main goal of logresolvemerge is to send log records to\n";
327         print "a log analyzer in a sorted order without merging files on disk (NO NEED\n";
328         print "OF DISK SPACE AT ALL) and without loading files into memory (NO NEED\n";
329         print "OF MORE MEMORY). Choose of output records is done on the fly.\n";
330         print "\n";
331         print "So logresolvemerge is particularly usefull when you want to output several\n";
332         print "and/or large log files in a fast process, with no use of disk or\n";
333         print "more memory, and in a chronological order through a pipe (to be used by a log\n";
334         print "analyzer).\n";
335         print "\n";
336         print "Note: If input records are not 'exactly' sorted but 'nearly' sorted (this\n";
337         print "occurs with heavy servers), this is not a problem, the output will also\n";
338         print "be 'nearly' sorted but a few log analyzers (like AWStats) knowns how to deal\n";
339         print "with such logs.\n";
340         print "\n";
341         print "WARNING: If log files are old MAC text files (lines ended with CR char), you\n";
342         print "can't run this tool on Win or Unix platforms.\n";
343         print "\n";
344         print "WARNING: Because of important memory holes in ActiveState Perl version, use\n";
345         print "another Perl interpreter if you need to process large lof files.\n";
346         print "\n";
347         print "Now supports/detects:\n";
348         print "  Automatic detection of log format\n";
349         print "  Files can be .gz/.bz2 files if zcat/bzcat tools are available in PATH.\n";
350         print "  Multithreaded reverse DNS lookup (several parallel requests) with Perl 5.8+.\n";
351         print "New versions and FAQ at http://awstats.sourceforge.net\n";
352         exit 0;
353 }
354
355 # Get current time
356 my $nowtime=time;
357 my ($nowsec,$nowmin,$nowhour,$nowday,$nowmonth,$nowyear) = localtime($nowtime);
358 if ($nowyear < 100) { $nowyear+=2000; } else { $nowyear+=1900; }
359 my $nowsmallyear=$nowyear;$nowsmallyear =~ s/^..//;
360 if (++$nowmonth < 10) { $nowmonth = "0$nowmonth"; }
361 if ($nowday < 10) { $nowday = "0$nowday"; }
362 if ($nowhour < 10) { $nowhour = "0$nowhour"; }
363 if ($nowmin < 10) { $nowmin = "0$nowmin"; }
364 if ($nowsec < 10) { $nowsec = "0$nowsec"; }
365 # Get tomorrow time (will be used to discard some record with corrupted date (future date))
366 my ($tomorrowsec,$tomorrowmin,$tomorrowhour,$tomorrowday,$tomorrowmonth,$tomorrowyear) = localtime($nowtime+86400);
367 if ($tomorrowyear < 100) { $tomorrowyear+=2000; } else { $tomorrowyear+=1900; }
368 my $tomorrowsmallyear=$tomorrowyear;$tomorrowsmallyear =~ s/^..//;
369 if (++$tomorrowmonth < 10) { $tomorrowmonth = "0$tomorrowmonth"; }
370 if ($tomorrowday < 10) { $tomorrowday = "0$tomorrowday"; }
371 if ($tomorrowhour < 10) { $tomorrowhour = "0$tomorrowhour"; }
372 if ($tomorrowmin < 10) { $tomorrowmin = "0$tomorrowmin"; }
373 if ($tomorrowsec < 10) { $tomorrowsec = "0$tomorrowsec"; }
374 my $timetomorrow=$tomorrowyear.$tomorrowmonth.$tomorrowday.$tomorrowhour.$tomorrowmin.$tomorrowsec;     
375
376 # Init other parameters
377 $NBOFLINESFORBENCHMARK--;
378 if ($ENV{"GATEWAY_INTERFACE"}) { $DirCgi=''; }
379 if ($DirCgi && !($DirCgi =~ /\/$/) && !($DirCgi =~ /\\$/)) { $DirCgi .= '/'; }
380 if (! $DirData || $DirData eq '.') { $DirData=$DIR; }   # If not defined or choosed to "." value then DirData is current dir
381 if (! $DirData)  { $DirData='.'; }                                              # If current dir not defined then we put it to "."
382 $DirData =~ s/\/$//;
383
384 #my %monthlib =  ( "01","$Message[60]","02","$Message[61]","03","$Message[62]","04","$Message[63]","05","$Message[64]","06","$Message[65]","07","$Message[66]","08","$Message[67]","09","$Message[68]","10","$Message[69]","11","$Message[70]","12","$Message[71]" );
385 # monthnum must be in english because it's used to translate log date in apache log files which are always in english
386 my %monthnum =  ( "Jan","01","jan","01","Feb","02","feb","02","Mar","03","mar","03","Apr","04","apr","04","May","05","may","05","Jun","06","jun","06","Jul","07","jul","07","Aug","08","aug","08","Sep","09","sep","09","Oct","10","oct","10","Nov","11","nov","11","Dec","12","dec","12" );
387
388 if ($DNSCache) {
389         if ($Debug) { debug("Load DNS Cache file $DNSCache",2); }
390         open(CACHE, "<$DNSCache") or error("Can't open cache file $DNSCache");
391         while (<CACHE>) {
392                 my ($time, $ip, $name) = split;
393         if ($ip && $name) {
394             $name="$ip" if $name eq '*';
395                 $MyDNSTable{$ip}=$name;
396         }
397         }
398         close CACHE;
399 }
400
401 #-----------------------------------------------------------------------------
402 # PROCESSING CURRENT LOG(s)
403 #-----------------------------------------------------------------------------
404 my $NbOfLinesRead=0;
405 my $NbOfLinesParsed=0;
406 my $logfilechosen=0;
407 my $starttime=time();
408
409 # Define the LogFileToDo list
410 $cpt=1;
411 foreach my $key (0..(@ParamFile-1)) {
412         if ($ParamFile[$key] !~ /\*/ && $ParamFile[$key] !~ /\?/) {
413
414                 if ($Debug) { debug("DBG1 Log file $ParamFile[$key] is added to LogFileToDo with number $cpt."); }
415                 # Check for supported compression
416                 if ($ParamFile[$key] =~ /$zcat_file/) {
417                         if ($Debug) { debug("GZIP compression detected for Log file $ParamFile[$key]."); }
418                         # Modify the name to include the zcat command
419                         $ParamFile[$key] = $zcat . ' ' . $ParamFile[$key] . ' |';
420                 }
421                 elsif ($ParamFile[$key] =~ /$bzcat_file/) {
422                         if ($Debug) { debug("BZ2 compression detected for Log file $ParamFile[$key]."); }
423                         # Modify the name to include the bzcat command
424                         $ParamFile[$key] = $bzcat . ' ' . $ParamFile[$key] . ' |';
425                 }
426
427                 $LogFileToDo{$cpt}=@ParamFile[$key];
428                 $cpt++;
429                
430         }
431     else {
432         my $DirFile=$ParamFile[$key]; $DirFile =~ s/([^\/\\]*)$//;
433         $ParamFile[$key] = $1;
434         if ($DirFile eq '') { $DirFile = '.'; }
435         $ParamFile[$key] =~ s/\./\\\./g;
436         $ParamFile[$key] =~ s/\*/\.\*/g;
437         $ParamFile[$key] =~ s/\?/\./g;
438         if ($Debug) { debug("Search for file \"$ParamFile[$key]\" into \"$DirFile\""); }
439         opendir(DIR,"$DirFile");
440         my @filearray = sort readdir DIR;
441         close DIR;
442         foreach my $i (0..$#filearray) {
443             if ("$filearray[$i]" =~ /^$ParamFile[$key]$/ && "$filearray[$i]" ne "." && "$filearray[$i]" ne "..") {
444
445                 if ($Debug) { debug("DBG2 Log file $filearray[$i] is added to LogFileToDo with number $cpt."); }
446                 # Check for supported compression
447                 if ($filearray[$i] =~ /$zcat_file/) {
448                     if ($Debug) { debug("GZIP compression detected for Log file $filearray[$i]."); }
449                     # Modify the name to include the zcat command
450                     $LogFileToDo{$cpt}=$zcat . ' ' . "$DirFile/$filearray[$i]" . ' |';
451                 }
452                 elsif ($filearray[$i] =~ /$bzcat_file/) {
453                     if ($Debug) { debug("BZ2 compression detected for Log file $filearray[$i]."); }
454                     # Modify the name to include the bzcat command
455                     $LogFileToDo{$cpt}=$bzcat . ' ' . "$DirFile/$filearray[$i]" . ' |';
456                 }
457                 else {
458                     $LogFileToDo{$cpt}="$DirFile/$filearray[$i]";
459                 }
460                 $cpt++;
461                
462             }
463         }
464     }
465 }
466
467 # If no files to process
468 if (scalar keys %LogFileToDo == 0) {
469         error("No input log file found");
470 }
471
472 # Open all log files
473 if ($Debug) { debug("Start of processing ".(scalar keys %LogFileToDo)." log file(s), $MaxNbOfThread threads max"); }
474 foreach my $logfilenb (keys %LogFileToDo) {
475         if ($Debug) { debug("Open log file number $logfilenb: \"$LogFileToDo{$logfilenb}\""); }
476         open("LOG$logfilenb","$LogFileToDo{$logfilenb}") || error("Couldn't open log file \"$LogFileToDo{$logfilenb}\" : $!");
477         binmode "LOG$logfilenb";        # To avoid pb of corrupted text log files with binary chars.
478 }
479
480 $QueueCursor=1;
481 while (1 == 1)
482 {
483         # BEGIN Read new record
484         # For each log file if logfilechosen is 0
485         # If not, we go directly to log file instead of iterating over all keys for a match
486         #----------------------------------------------------------------------------------
487     my @readlist;
488         if($logfilechosen == 0) {
489             @readlist = keys %LogFileToDo;
490         } else {
491             @readlist = ($logfilechosen);
492         }
493         foreach my $logfilenb (@readlist)
494         {
495                 if ($Debug) { debug("Search next record in file number $logfilenb",3); }
496                 # Read chosen log file until we found a record with good date or reaching end of file
497                 while (1 == 1) {
498                         my $LOG="LOG$logfilenb";
499                         $_=<$LOG>;      # Read new line
500                         if (! $_) {                                                     # No more records in log file number $logfilenb
501                                 if ($Debug) { debug(" No more records in file number $logfilenb",2); }
502                                 delete $LogFileToDo{$logfilenb};
503                                 last;
504                         }
505
506                         $NbOfLinesRead++;
507                         chomp $_; s/\r$//;
508
509                         if (/^#/) { next; }                                                                     # Ignore comment lines (ISS writes such comments)
510                         if (/^!!/) { next; }                                                            # Ignore comment lines (Webstar writes such comments)
511                         if (/^$/) { next; }                                                                     # Ignore blank lines (With ISS: happens sometimes, with Apache: possible when editing log file)
512
513                         $linerecord{$logfilenb}=$_;
514
515                         # Check filters
516                         #----------------------------------------------------------------------
517
518                         # Split DD/Month/YYYY:HH:MM:SS or YYYY-MM-DD HH:MM:SS or MM/DD/YY\tHH:MM:SS
519                         my $year=0; my $month=0; my $day=0; my $hour=0; my $minute=0; my $second=0;
520                         if ($_ =~ /(\d\d\d\d)-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)/) { $year=$1; $month=$2; $day=$3; $hour=$4; $minute=$5; $second=$6; }
521                         elsif ($_ =~ /\[(\d\d)[\/:\s](\w+)[\/:\s](\d\d\d\d)[\/:\s](\d\d)[\/:\s](\d\d)[\/:\s](\d\d) /) { $year=$3; $month=$2; $day=$1; $hour=$4; $minute=$5; $second=$6; }
522                         elsif ($_ =~ /\[\w+ (\w+) (\d\d) (\d\d)[\/:\s](\d\d)[\/:\s](\d\d) (\d\d\d\d)\]/) { $year=$6; $month=$1; $day=$2; $hour=$3; $minute=$4; $second=$5; }
523
524                         if ($monthnum{$month}) { $month=$monthnum{$month}; }    # Change lib month in num month if necessary
525
526                         # Create $timerecord like YYYYMMDDHHMMSS
527                         $timerecord{$logfilenb}=int("$year$month$day$hour$minute$second");
528                         if ($timerecord{$logfilenb}<10000000000000) {
529                                 if ($Debug) { debug(" This record is corrupted (no date found)",3); }
530                                 $corrupted{$logfilenb}++;
531                                 next;
532                         }
533                         if ($Debug) { debug(" This is next record for file $logfilenb : timerecord=$timerecord{$logfilenb}",3); }
534                        
535                         # Sort and insert into timerecordorder, oldest at end/back of array
536                         # At the beginning, timerecordorder is empty. Then beceause the first pass is
537                         # a loop on each file to read each first line, the timerecordorder size is
538                         # number of input files.
539                         # After, each new loop, read only one new line, so timerecordorder size increase
540                         # by one but decrease just after by the pop command later.
541                         my $inserted=0;
542                         for(my $c=$#timerecordorder; $c>=0 ; $c--) {
543                             if($timerecord{$logfilenb} <= $timerecord{$timerecordorder[$c]})
544                             {
545                                 # Is older or equal than index at $c, add after
546                                     $timerecordorder[$c + 1]=$logfilenb;
547                                     $inserted = 1;
548                                     last;
549                             } else {
550                                     $timerecordorder[$c + 1]=$timerecordorder[$c];
551                             }
552                         }
553                         if(! $inserted) {
554                             $timerecordorder[0] = $logfilenb;
555                         }
556
557                         last;
558                 }
559         }
560         # END Read new lines for each log file. After this, following var are filled
561         # $timerecord{$logfilenb}
562         # @timerecordorder array
563
564         # We choose which record of which log file to process
565         if ($Debug) { debug("Choose which record of which log file to process",3); }
566         $logfilechosen=pop(@timerecordorder);
567         if(!defined($logfilechosen)) { last; }              # No more record to process
568         
569         # Record is chosen
570         if ($Debug) { debug(" We choosed to qualify record of file number $logfilechosen",3); }
571         if ($Debug) { debug("  Record is $linerecord{$logfilechosen}",3); }
572                        
573         # Record is approved. We found a new line to parse in file number $logfilechosen
574         #-------------------------------------------------------------------------------
575         $NbOfLinesParsed++;
576         if ($ShowSteps) {
577                 if ((++$NbOfLinesShowsteps & $NBOFLINESFORBENCHMARK) == 0) {
578                         my $delay=(time()-$starttime)||1;
579                         print STDERR "$NbOfLinesParsed lines processed (".(1000*$delay)." ms, ".int($NbOfLinesShowsteps/$delay)." lines/seconds)\n";
580                 }
581         }
582
583         # Do DNS lookup
584         #--------------------
585         my $Host='';
586         my $ip=0;
587         if ($DNSLookup) {                       # DNS lookup is 1 or 2
588                 if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; } # IPv4
589                 elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; }                                             # IPv6
590                 if ($ip) {
591                         # Check in static DNS cache file
592                         if ($MyDNSTable{$Host}) {
593                                 if ($Debug) { debug("  DNS lookup asked for $Host and found in static DNS cache file: $MyDNSTable{$Host}",4); }
594                         }
595                         elsif ($DNSLookup==1) {
596                                 # Check in session cache (dynamic DNS cache file + session DNS cache)
597                                 if (! $threadarray{$Host} && ! $TmpDNSLookup{$Host}) {
598                                         if (@SkipDNSLookupFor && &SkipDNSLookup($Host)) {
599                                                 $TmpDNSLookup{$Host}='*';
600                                                 if ($Debug) { debug("  No need of reverse DNS lookup for $Host, skipped at user request.",4); }
601                                         }
602                                         else {
603                                                 if ($ip == 4) {
604                                                         # Create or not a new thread
605                                                         if ($MaxNbOfThread) {
606                                                                 if (! $threadarray{$Host}) {    # No thread already launched for $Host
607                                                                         while ((scalar keys %threadarray) >= $MaxNbOfThread) {
608                                                                                 if ($Debug) { debug(" $MaxNbOfThread thread running reached, so we wait",4); }
609                                                                                 sleep 1;
610                                                                         }
611                                                                         $threadarray{$Host}=1;          # Semaphore to tell thread for $Host is active
612 #                                                                       my $t = new Thread \&MakeDNSLookup, $Host;
613                                                                         my $t = threads->create(sub { MakeDNSLookup($Host) });
614                                                                         if (! $t) { error("Failed to create new thread"); }
615                                                                         if ($Debug) { debug(" Reverse DNS lookup for $Host queued in thread ".$t->tid,4); }
616                                                                         $t->detach();   # We don't need to keep return code
617                                                                 }
618                                                                 else {
619