#! /usr/bin/perl -w

# Ce script extrait d'un log mysql des statistiques de connexion et de requêtes
# par utilisateur et les insère dans une base.
# Si le script est appelé plusieurs fois pour le même log, les
# données ne seont pas insérées dans la base (le tuple day/user est une primary key)
# Donc pour tester faire un delete d'abord dans la table stats pour le jour
# correspondant au log rejoué.
# Attention, les fichiers de log sont effacés à la fin du script.

use strict;
use DBI;

my $hostname = `hostname -s`; chomp $hostname;
my $debug = 0;

my (undef, undef, undef, $yesterday_day, $yesterday_month, $yesterday_year) =
            localtime(time - 86400);
$yesterday_month = ++$yesterday_month;
$yesterday_year = $yesterday_year + 1900;
my $date = "$yesterday_year-$yesterday_month-$yesterday_day";

my $mysql_logfile = sprintf("/var/log/mysql/cronolog/%s/%02s/%02s/mysql.log",
                    $yesterday_year, $yesterday_month, $yesterday_day);
my $mysql_slow_queries_logfile = sprintf("/var/log/mysql/cronolog/%s/%02s/%02s/mysql-slow.log",
                    $yesterday_year, $yesterday_month, $yesterday_day);
my $error_logfile = "/var/log/mysql/mysql-stats.log";
open(ERROR_LOG, ">> $error_logfile") || die "Impossible d'ouvrir $error_logfile";
my $mysql_read_default_file="/usr/local/etc/my.cnf";
    
# Tests
# $mysql_logfile = "/var/log/mysql/cronolog/2005/09/02/mysql.log";
# $mysql_slow_queries_logfile = "/var/log/mysql/cronolog/2005/09/02/mysql-slow.log";

log_error(sprintf("Début de %s", $0));
my ($stats_dbh, $easy_dbh) = db_handles();
# On va insérer dans la table de stats le mysql_id, qu'on doit
# récupérer au préalable dans la base Easy
my $user_info = user_info($easy_dbh);
# On copie une partie de Easy dans la base des stats mysql, pour
# faciliter les requêtes (jointure sur le domain_id)
# mysql2 seulement fait la copie, pour éviter des problèmes de synchro.
easy_copy($stats_dbh, $user_info) if $hostname eq "mysql2";
# On récupère les stats dans le log
my ($connections, $queries) = parse_log();
my ($slow_queries, $slow_queries_time) = parse_slow_log();
# On les insère dans la base de stats.
do_inserts($stats_dbh, $user_info, $connections, $queries, $slow_queries, $slow_queries_time);
# On supprime les fichiers de log
unlink $mysql_logfile;
unlink $mysql_slow_queries_logfile;
log_error(sprintf("Fin de %s", $0));


sub parse_log {

    # Format du log :
    # Connect 9:31:46 1258260 motormchat bizmachine.co.fr.clara.net
    # Query 9:31:46 1258260
    # Quit 9:31:46 1258260
    # Plus généralement :
    # Connect time connect_id user host
    # Query time connect_id
    # Quit time connect_id
	my %connections;
	my %queries;
    my %log_state;
    # Nombre de lignes de log ignorées (parce que le log d'une connexion 
    # est antérieur au fichier de log examiné, donc on n'a pas les infos
    # de connexion quand le log d'une requête est examiné)
    my $ignored_lines = 0;
    my $total_lines = 0;

   my $time_format = qr/\d+:\d+:\d+/;

    if (! open(LOG, $mysql_logfile)) {
        log_error("Impossible d'ouvrir $mysql_logfile");
        die("Impossible d'ouvrir $mysql_logfile");
    }

	while (<LOG>) {
	    ++$total_lines;
	    chomp;
	    log_debug("Ligne examinée : \"$_\"") if $debug;
	
	    if (/Connect ($time_format) (\d+) (\S+) (\S+)/) {
	        
	        my $timestamp = $1;
	        my $connect_id = $2;
	        my $user = $3;
	        my $host = $4;
	        log_debug("Connect timestamp = $timestamp, connect_id = $connect_id, user = $user, host = $host") if $debug;
	
	        $log_state{$connect_id}{'user'} = $user;
	        $log_state{$connect_id}{'host'} = $host;
	
	        $connections{$user}{$host}++;
	        $connections{$user}{'all_hosts'}++;
	
	        $queries{$user}{$host} = 0 unless $queries{$user}{$host};
	        $queries{$user}{'all_hosts'} = 0 unless $queries{$user}{'all_hosts'};

	    }
	
	    if (/Query ($time_format) (\d+)/) {
	
	        my $timestamp = $1;
	        my $connect_id = $2;
	        log_debug("Query timestamp = $timestamp, connect_id = $connect_id") if $debug;
	
	        if (! $log_state{$connect_id}) {
	            ++$ignored_lines;
	            log_debug("Pas d'état pour connect_id $connect_id (\"$_\"), ignoré...") if $debug;
	            next;
	        }
	
	        my $user = $log_state{$connect_id}{'user'};
	        my $host = $log_state{$connect_id}{'host'};
	
	        $queries{$user}{$host}++;
	        $queries{$user}{'all_hosts'}++;
	    }
	
	    if (/Quit ($time_format) (\d+)/) {
	
	        my $timestamp = $1;
	        my $connect_id = $2;
	        log_debug("Quit timestamp = $timestamp, connect_id = $connect_id") if $debug;
	
	        if (! $log_state{$connect_id}) {
	            ++$ignored_lines;
	            log_debug("Pas d'état pour connect_id $connect_id (\"$_\"), ignoré...") if $debug;
	            next;
	        }
	        delete $log_state{$connect_id};
	    }
    }

    log_error(sprintf("Fin du parsing des logs par %s, $total_lines lignes de log, $ignored_lines ignorées", $0));

    return \%connections, \%queries;

}


sub parse_slow_log {

    # Format du log :
    # 7:50:00 motopassion bizmachine6.co.fr.clara.net 13
    # 7:57:52 crgpg_data2 bizmachine3.co.fr.clara.net 46
    # Plus généralement :
    # time user host slow_query_time

	my %slow_queries;
	my %slow_queries_time;
    my $total_lines = 0;
    my $ignored_lines = 0;

    my $time_format = qr/\d+:\d+:\d+/;

    if (! open(LOG, $mysql_slow_queries_logfile)) {
        log_error("Impossible d'ouvrir $mysql_slow_queries_logfile, pas de slow queries ?");
        return {}, {};
    }

	while (<LOG>) {
	    ++$total_lines;
	    chomp;
	    log_debug("Ligne examinée : \"$_\"") if $debug;
	
	    if (/($time_format) (\S+) (\S+) (\d+)/) {
	        
	        my $timestamp = $1;
	        my $user = $2;
	        my $host = $3;
	        my $time = $4;
	        log_debug("timestamp = $timestamp, user = $user, host = $host, time = $time") if $debug;
	
	        $slow_queries{$user}{$host}++;
	        $slow_queries{$user}{'all_hosts'}++;

            $slow_queries_time{$user}{$host} += $time;
            $slow_queries_time{$user}{'all_hosts'} += $time;

	    }

        else {
            ++$ignored_lines;
            log_error("Format inattendu : \"$_\", ligne ignorée");
            next;
        }
    }
	
    log_error(sprintf("Fin du parsing du slow query log par %s, $total_lines lignes de log, $ignored_lines ignorées", $0));
    return \%slow_queries, \%slow_queries_time;

}


sub do_inserts {
    my $stats_dbh = shift;
    my $user_info = shift;
    my $connections = shift;
    my $queries = shift;
    my $slow_queries = shift;
    my $slow_queries_time = shift;

	for my $user (sort keys %{$connections}) {
	    my $total_connections = $connections->{$user}{'all_hosts'};
	    my $total_queries = $queries->{$user}{'all_hosts'};
        my $total_slow_queries = $slow_queries->{$user}{'all_hosts'} || 0;
        my $total_slow_queries_time = $slow_queries_time->{$user}{'all_hosts'} || 0;
        if (! $user_info->{$user}{'mysql_id'}) {
            log_error("Pas d'info dans base Easy pour $user, pas d'insertion...");
            next;
        }
        my $mysql_id = $user_info->{$user}{'mysql_id'};
	    my $insert = " INSERT INTO stats SET day = '$date', mysql_id = '$mysql_id' "
                    . ", connections = '$total_connections', queries = '$total_queries' "
                    . ", slow_queries = '$total_slow_queries' "
                    . ", slow_queries_time = '$total_slow_queries_time' ";
        my $ok = $stats_dbh->do($insert);
        if (! $ok) {
            log_error("Insertion impossible pour $user : " . $DBI::errstr);
        }
	}
}


sub db_handles {

    my $stats_dbh = DBI->connect(
                        "dbi:mysql:easy_mysql_stats;"
                        . "mysql_read_default_file=$mysql_read_default_file;"
                        . "mysql_read_default_group=easy_mysql_stats"
                    );

    if (!$stats_dbh) {
        log_error("Impossible de se connecter sur la base easy_mysql_stats : " . $DBI::errstr);
        die "Impossible de se connecter sur la base easy_mysql_stats : " . $DBI::errstr;
    }
        
    my $easy_dbh = DBI->connect("dbi:mysql:easy;mysql_read_default_file=$mysql_read_default_file;mysql_read_default_group=easy");

    if (!$easy_dbh) {
        log_error("Impossible de se connecter sur la base easy : " . $DBI::errstr);
        die "Impossible de se connecter sur la base easy : " . $DBI::errstr;
    }

    return $stats_dbh, $easy_dbh;
} 


sub user_info {
    my $easy_dbh = shift;
    my %user_info;

    my $statement = $easy_dbh->prepare(
            "SELECT mysql.mysql_id, mysql.domain_id, mysql.user, mysql.server, domain.domain"
            . " FROM mysql, domain WHERE mysql.domain_id = domain.domain_id;"
        );
    my $ok = $statement->execute();
    if (! $ok) {
        log_error("Erreur SELECT Easy : " . $DBI::errstr);
        die "Erreur SELECT Easy : " . $DBI::errstr;
    }


    while (my $hash = $statement->fetchrow_hashref) {
        my $mysql_id = $hash->{'mysql_id'};
        my $domain_id = $hash->{'domain_id'};
        my $user = $hash->{'user'};
        my $server = $hash->{'server'};
        my $domain = $hash->{'domain'};
        $user_info{$user}{'mysql_id'} = $mysql_id;
        $user_info{$user}{'domain_id'} = $domain_id;
        $user_info{$user}{'server'} = $server;
        $user_info{$user}{'domain'} = $domain;
    }
    return \%user_info;
}
    

sub easy_copy {

    my $stats_dbh = shift;
    my $easy_info = shift;

    my $ok = $stats_dbh->do("DELETE FROM easy_copy");
    if (! $ok) {
        log_error("DELETE impossible easy_copy" . $DBI::errstr);
        die "DELETE impossible easy_copy : " . $DBI::errstr;
    }

    foreach my $user (keys %{$easy_info}) {
        my $domain_id = $easy_info->{$user}{'domain_id'};
        my $mysql_id = $easy_info->{$user}{'mysql_id'};
        my $domain = $easy_info->{$user}{'domain'};
        my $server = $easy_info->{$user}{'server'};
        
        $ok = $stats_dbh->do(
                "INSERT INTO easy_copy SET mysql_id = '$mysql_id',"
                . "domain_id = '$domain_id', user='$user', server='$server', domain='$domain'"
                   );
        if (! $ok) {
            log_error("Erreur insertion dans easy_copy user $user" . $DBI::errstr );
            die "Erreur insertion dans easy_copy user $user :" . $DBI::errstr; 
        }

    }
        
}


sub log_error {
    my $message = shift;
    print ERROR_LOG scalar localtime, " : $message\n";
}


sub log_debug {
    my $message = shift;
    print "$message\n";
}


