#!/usr/bin/perl


use strict;	
my $prog = "$0";
my $version = "1.0.2";
use Getopt::Long;


my $opt_group_by;
my $opt_aggre;
my $opt_help = '';
my $opt_stdin = '';
my $opt_version = '';
my $opt_verbose = '';
my $header;
my $code; 
my @headerargs;
my %hashtable;	
my $opt_preview;


my $result = GetOptions (
		"groupby=s"	=> \$opt_group_by,
		"preview"	=>  \$opt_preview,
		"aggre=s"    	=> \$opt_aggre,
		"version"  	=> \$opt_version,
		"verbose"  	=> \$opt_verbose,
		"help"	   	=> \$opt_help,
		""		=> \$opt_stdin);


if ($opt_version)
{
	print "$version\n";
	exit;
}

if ($opt_help)
{
	print 'in general this tool does the equivalent to the following SQL statement:'."\n"
	.'<aggre option sum count max min minstr maxstr >'."\n"
	.'<group by select option >'."\n"
	.'options:'."\n"
	.'  --groupby "[field, ...]"   - list of fields to group by.'."\n"
	.'  --aggre "[ sum($1),count(),min($3),max($3),minstr($4),maxstr($4)]" '."\n"
	.'  --help                     - print this usage information'."\n"
	.'  --version                     - print version number'."\n"
	."\n"
	.'examples:'."\n"
	.''.$prog.' --groupby "$1,$2"  --aggre "sum($1),count($1)"'."\n";

	exit;
}


my $idx = -1 ;

$opt_group_by =~ s/\$(\d+)/\$headerargs\[$1\-1\]/mg;

my @fields = split_aggre_fields ($opt_aggre);
my $str;
my $temparraystr;
my $arraystr;
my $counter = 0; 
my $arrayref;
foreach my $sub_call (@fields)
{
	if ($sub_call ne "")
	{
		$str  = $str . "\$idx=".$counter.";\n"; 
		$str  = $str . $sub_call .";\n";
		$counter++;	
		if ($counter == 1)
		  {$temparraystr = $temparraystr."-1";}
		else   
		{$temparraystr = $temparraystr.",-1";}	
	}	
}
$arraystr = "[$temparraystr]";


$code = <<EOC;
:0		:while (<IN>) 
:0		:{
:0			:next if (!parse_record());
:0			:if (!exists \$hashtable{"$opt_group_by"})	
:0			:{
:0			:	\$arrayref = $arraystr;
:0			:	\$hashtable{"$opt_group_by"} = \$arrayref;
:0			:}
:0			:else 
:0			:{
:0			:	\$arrayref = \$hashtable{"$opt_group_by"};
:0			:}
:0			:$str
:0		:}
:0			:return 1;
EOC


my $gen_code = prepare_code ($code);

if ($opt_preview)
{
	print "\n";
	print "-----------------------------------------------------\n";
	print $gen_code ."\n";
	print "-----------------------------------------------------\n";

exit;
}


sub prepare_code
{
	my $code = shift;
	$code =~ s/^:0\s*://mg;
	$code =~ s/^:.*\n//mg;
	return $code;
}




$opt_stdin = 1 if (!$opt_stdin && $#ARGV < 0);
process_file ("&=STDIN") if ($opt_stdin);
process_files (\@ARGV, 1, 1);


	my ($k,$v);
	while ( ($k,$v) = each %hashtable ) 
	{	
		 print "$k";
		 my $counter = 0; 
		 if ($idx != -1 )
		 {
			 foreach my $zzz (@$v)
			 {
				$counter++;	
				if ($counter == 1)
				{print ",".$zzz;}  		
				else   
				{print ",".$zzz;}
				
			 }
		}	 
		 print "\n";
	}    
	
sub process_files
{
	my $files = shift;
	my $force = shift;
	my $wc = shift;
	
	foreach my $arg (@$files)
	{
		if (-d $arg)
		{
			process_directory($arg);
		}
		elsif (-f $arg)
		{
			if ($force || $arg =~ /.*\.log/)
 			{
				process_file ($arg);
 			}
 			else
 			{
 				print "skipping file: $arg\n" if ($opt_verbose);
 			}
		}
		elsif ($wc)
		{
			process_wildcard ($arg);
		}
		else
		{
			die "file: $arg not found \n";
		}
	}
}


	


sub sum 
{
	$$arrayref[$idx] = 0 if ($$arrayref[$idx] == -1); 
	my $value_to_update = shift;
	$$arrayref[$idx] = $$arrayref[$idx] + $value_to_update; 
}
sub count
{
	$$arrayref[$idx] = 0 if ($$arrayref[$idx] == -1); 
	my $value_to_update = shift;
	$$arrayref[$idx] = $$arrayref[$idx] + 1; 

}

sub min 
{
	$$arrayref[$idx] = 999999999999999 if ($$arrayref[$idx] == -1); 
	my $value_to_update = shift;
	if ($$arrayref[$idx] > $value_to_update  )
	{
		$$arrayref[$idx]  = $value_to_update;
	}
}


sub max
{
	$$arrayref[$idx] = 0 if ($$arrayref[$idx] == -1); 
	my $value_to_update = shift;
	if ($$arrayref[$idx] < $value_to_update )
	{
		$$arrayref[$idx] = $value_to_update;
	}
}

sub minstr 
{
	$$arrayref[$idx] = "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" if ($$arrayref[$idx] == -1); 
	my $value_to_update = shift;
	if ($$arrayref[$idx] ge $value_to_update && $value_to_update ne "")
	{
		$$arrayref[$idx]  = $value_to_update;
	}
}


sub maxstr
{
	$$arrayref[$idx] = "" if ($$arrayref[$idx] == -1); 
	my $value_to_update = shift;
	if ($$arrayref[$idx] le $value_to_update )
	{
		$$arrayref[$idx] = $value_to_update;
	}
}

sub process_directory
{
	
	my $dir = shift;
	print "processing directory: $dir\n" if ($opt_verbose);
	
	my @files = glob ($dir."/*");
	foreach my $arg (@files)
	{
		if (-d $arg)
		{
			process_directory($arg);
		}
		elsif (-f $arg)
		{
			if ($arg =~ /.*\.log/)
			{
				 process_file ($arg);
			}
			else
			{
				print "skipping file: $arg\n" if ($opt_verbose);
			}
		}
		else
		{
			die "file: $arg not found \n";
		}
	}
	
}

sub process_wildcard
{
	my $wc = shift;
	my @files = glob ($wc);
	die "no files matching $wc \n" if ($#files < 0);
	process_files(\@files, 1, 0);
}

sub process_file

{
	my $file = shift;
	
	print "processing file: $file\n" if ($opt_verbose);
	
 	if ($file =~ /.*\.gz$/)
	{
		open (IN,"gzip -dc $file |") || die "could not open compressed file $file - $! \n";
	}
	else
	{
		open (IN,"<$file") || die "could not open $file - $! \n";
	}
		
		eval $gen_code  || die "failed to evaluate code: $@ \n";
	
	close (IN);

}



sub parse_record
{
	chomp;
	$header = $_;
	 #  @headerargs  = split /,/,$header;
	if ($header =~ /\\,/)
	{
		my $tmpheader = $header;
		$tmpheader =~ s/([^\\])((?:\\\\)*),/$1$2\000/g;
		$tmpheader =~ s/([^\\])((?:\\\\)*),/$1$2\000/g;
		@headerargs =   split /\000/,$tmpheader;	
	}
	else 
	{
		@headerargs  = split /,/,$header;	
	}
} 
  


sub split_aggre_fields 
{ 
	my $line = shift; 
	my ($i, $j); 
	my $curr   = ''; 
	my $parenthesis = 0;
	my @fields = (); 
	my $comma  = ','; 
	
	my $str;
	for ($i=0; $i<=length ($line); $i++) 
	{ 
		$j=substr ($line, $i, 1); 
		
		if ($j eq "(" )  
		{
			$parenthesis++;	
		}
		if ($j eq ")" )  
		{
			$parenthesis--;
		}
		
		if ($j eq $comma && $parenthesis==0)
		{ 
			$curr =~ s/\$(\d+)/\$headerargs\[$1-1\]/mg;
			push (@fields, $curr); 
			$curr=''; 
		}
		else
		{
			$curr.=$j; 
		}	
		
	}	
	$curr =~ s/\$(\d+)/\$headerargs\[$1-1\]/mg;
	push (@fields, $curr); 
   	return @fields;
   	
	

}



